- add-inline.c Select all
#include <stdio.h>
// compile with command line
// clang add-inline.c -o add-inline_x86_64 -arch x86_64
// clang add-inline.c -o add-inline_arm64 -arch arm64
// disassemble with
// otool -otV add-inline_x86_64
// otool -otV add-inline_arm64
int main(int argc, const char * argv[]) {
int a = 10;
int b = 25;
int ans = 0;
#ifdef __x86_64__
__asm__( "add %2,%1;\n" // %1 += %2 add source to destination
"mov %1,%0;\n" // move data from %1 to %0
: "=r"(ans)
: "r"(a), "r"(b)
: );
#endif
#ifdef __arm64__
__asm__( "add %w0,%w1,%w2;\n" // load %w0 = %w1 + %w2
: "=r"(ans)
: "r"(a), "r"(b)
: );
#endif
printf("The answer is %d\n",ans);
return 0;
}
(2) Demo the assembly source code as a separate function for x86_64 and arm64
- add-main.c Select all
#include <stdio.h>
// compile with command line
// clang add-main.c add.s -o add-main_x86_64 -arch x86_64
// clang add-main.c add.s -o add-main_arm64 -arch arm64
#include <stdio.h>
int add(int x, int y);
int main(int argc, const char * argv[]) {
int ans = add(15,40);
printf("The answer is %d\n",ans);
return 0;
}
- add.s Select all
.text
.globl _add
.align 2
_add:
#ifdef __x86_64__
add %esi,%edi // %edi += %esi, source is the first
mov %edi,%eax // move data from %edi to %eax
// x86_64 calling convention
// rdi, rsi, rdx, rcx, r8, r9
// The 32-bit general purpose registers are edi, esi, edx, ecx, r8d, r9d instead.
// The 16-bit general purpose registers are di, si, dx, cx, r8w, r9w instead.
// The syscall number is placed in rax
// see https://sigsegv.pl/osx-bsd-syscalls/
// Return value is in rax
#endif
#ifdef __arm64__
add w0,w0,w1 // load w0 with w0+w1, destination is the first
// calling convention for arm64
// x0,x1,x2,x3,x4,x5,x6,x7 or r0 to r7
// The 32-bit general purpose registers are w0-w7 instead.
// The syscall number is placed in r8
// Return value is in x0
// see https://wiki.cdot.senecacollege.ca/wiki/Syscalls
#endif
ret
- entitlements Select all
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>com.apple.security.get-task-allow</key>
<true/>
</dict>
</plist>
(3) Compile with
clang add-inline.c -o add-inline_x86_64 -arch x86_64
clang add-inline.c -o add-inline_arm64 -arch arm64
clang add-main.c add.s -o add-main_x86_64 -arch x86_64
clang add-main.c add.s -o add-main_arm64 -arch arm64
(4) Disassemble with (e.g.)
otool -otV add-inline_arm64
(5) Install Rosetta 2 on M1 to run the x86_64 version (e.g.)
./add-main_x86_64
(6) codesign and debug with lldb (e.g.)
clang -g -o add-inline_x86_64 add-inline.c -arch x86_64
codesign --entitlement entitlements --force -s - add-inline_x86_64
lldb add-inline_x86_64
(lldb) breakpoint set --file add-inline.c --line 7
(7) floating point example for x86_64 and arm64
compile with
clang sum.s callsum.c -o callsum_x86_64 -arch x86_64
clang sum.s callsum.c -o callsum_arm64 -arch arm64
Debug and codesign similar to above example
- callsum.c Select all
/*
* callsum.c
*
* Illustrates how to call the sum.s function wrote in assembly language.
*/
// clang sum.s callsum.c -o callsum_x86_64 -arch x86_64
// clang sum.s callsum.c -o callsum_arm64 -arch arm64
#include <stdio.h>
double sum(double[], unsigned);
int main() {
double test[] = {
40.5, 26.7, 21.9, 1.5, -40.5, -23.4
};
printf("%20.7f\n", sum(test, 6));
printf("%20.7f\n", sum(test, 2));
printf("%20.7f\n", sum(test, 0));
printf("%20.7f\n", sum(test, 3));
return 0;
}
- sum.s Select all
# -----------------------------------------------------------------------
# A 64-bit function that returns the sum of the elements in a
# floating-point array for x86_64 and arm64. The function has prototype:
#
# double sum(double[] array, unsigned length)
# -----------------------------------------------------------------------
.global _sum
.text
.align 2
_sum:
#ifdef __x86_64__
xorpd %xmm0, %xmm0 // initialize the sum to 0
// floats are passed in xmm0
cmp $0, %rsi // special case for length = 0
je done
#endif
#ifdef __arm64__
movi d0, #0 // initialize the sum to 0
// floats are passed in s0-7 and doubles in the d0-7 registers.
cmp x1, #0 // special case for length = 0
b.eq done
#endif
next:
#ifdef __x86_64__
addsd (%rdi), %xmm0 // add in the current array element, return floating point value in xmm0
add $8, %rdi // move to next array element
dec %rsi // count down
jnz next // if not done counting, continue
#endif
#ifdef __arm64__
ldr d16, [x0] // load the float into d16
// floats in s0-7 and doubles in the d0-7 registers.
fadd d0, d0, d16 // add in the current array element, return floating point value in d0
add x0, x0, #8 // move to next array element
subs x1, x1, #1 // count down
cbnz w1, next // if not done counting, continue
#endif
done:
ret
No comments:
Post a Comment