Saturday, April 3, 2021

How to use inline assembly language for M1 Mac for x86_64 and arm64

(1) Demo the inline assembly code for x86_64 and arm64
add-inline.c  Select all
#include <stdio.h> // compile with command line // clang add-inline.c -o add-inline_x86_64 -arch x86_64 // clang add-inline.c -o add-inline_arm64 -arch arm64 // disassemble with // otool -otV add-inline_x86_64 // otool -otV add-inline_arm64 int main(int argc, const char * argv[]) { int a = 10; int b = 25; int ans = 0; #ifdef __x86_64__ __asm__( "add %2,%1;\n" // %1 += %2 add source to destination "mov %1,%0;\n" // move data from %1 to %0 : "=r"(ans) : "r"(a), "r"(b) : ); #endif #ifdef __arm64__ __asm__( "add %w0,%w1,%w2;\n" // load %w0 = %w1 + %w2 : "=r"(ans) : "r"(a), "r"(b) : ); #endif printf("The answer is %d\n",ans); return 0; }


(2) Demo the assembly source code as a separate function for x86_64 and arm64
add-main.c  Select all
#include <stdio.h> // compile with command line // clang add-main.c add.s -o add-main_x86_64 -arch x86_64 // clang add-main.c add.s -o add-main_arm64 -arch arm64 #include <stdio.h> int add(int x, int y); int main(int argc, const char * argv[]) { int ans = add(15,40); printf("The answer is %d\n",ans); return 0; }


add.s  Select all
.text .globl _add .align 2 _add: #ifdef __x86_64__ add %esi,%edi // %edi += %esi, source is the first mov %edi,%eax // move data from %edi to %eax // x86_64 calling convention // rdi, rsi, rdx, rcx, r8, r9 // The 32-bit general purpose registers are edi, esi, edx, ecx, r8d, r9d instead. // The 16-bit general purpose registers are di, si, dx, cx, r8w, r9w instead. // The syscall number is placed in rax // see https://sigsegv.pl/osx-bsd-syscalls/ // Return value is in rax #endif #ifdef __arm64__ add w0,w0,w1 // load w0 with w0+w1, destination is the first // calling convention for arm64 // x0,x1,x2,x3,x4,x5,x6,x7 or r0 to r7 // The 32-bit general purpose registers are w0-w7 instead. // The syscall number is placed in r8 // Return value is in x0 // see https://wiki.cdot.senecacollege.ca/wiki/Syscalls #endif ret




entitlements  Select all
<?xml version="1.0" encoding="UTF-8"?> <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd"> <plist version="1.0"> <dict> <key>com.apple.security.get-task-allow</key> <true/> </dict> </plist>


(3) Compile with
clang add-inline.c -o add-inline_x86_64 -arch x86_64
clang add-inline.c -o add-inline_arm64 -arch arm64
clang add-main.c add.s -o add-main_x86_64 -arch x86_64
clang add-main.c add.s -o add-main_arm64 -arch arm64


(4) Disassemble with (e.g.)
otool -otV add-inline_arm64

(5) Install Rosetta 2 on M1 to run the x86_64 version (e.g.)
./add-main_x86_64

(6) codesign and debug with lldb (e.g.)
clang -g -o add-inline_x86_64 add-inline.c -arch x86_64
codesign --entitlement entitlements --force -s - add-inline_x86_64
lldb add-inline_x86_64
(lldb) breakpoint set --file add-inline.c --line 7


(7) floating point example for x86_64 and arm64
compile with
clang sum.s callsum.c -o callsum_x86_64 -arch x86_64
clang sum.s callsum.c -o callsum_arm64 -arch arm64
Debug and codesign similar to above example
callsum.c  Select all
/* * callsum.c * * Illustrates how to call the sum.s function wrote in assembly language. */ // clang sum.s callsum.c -o callsum_x86_64 -arch x86_64 // clang sum.s callsum.c -o callsum_arm64 -arch arm64 #include <stdio.h> double sum(double[], unsigned); int main() { double test[] = { 40.5, 26.7, 21.9, 1.5, -40.5, -23.4 }; printf("%20.7f\n", sum(test, 6)); printf("%20.7f\n", sum(test, 2)); printf("%20.7f\n", sum(test, 0)); printf("%20.7f\n", sum(test, 3)); return 0; }


sum.s  Select all
# ----------------------------------------------------------------------- # A 64-bit function that returns the sum of the elements in a # floating-point array for x86_64 and arm64. The function has prototype: # # double sum(double[] array, unsigned length) # ----------------------------------------------------------------------- .global _sum .text .align 2 _sum: #ifdef __x86_64__ xorpd %xmm0, %xmm0 // initialize the sum to 0 // floats are passed in xmm0 cmp $0, %rsi // special case for length = 0 je done #endif #ifdef __arm64__ movi d0, #0 // initialize the sum to 0 // floats are passed in s0-7 and doubles in the d0-7 registers. cmp x1, #0 // special case for length = 0 b.eq done #endif next: #ifdef __x86_64__ addsd (%rdi), %xmm0 // add in the current array element, return floating point value in xmm0 add $8, %rdi // move to next array element dec %rsi // count down jnz next // if not done counting, continue #endif #ifdef __arm64__ ldr d16, [x0] // load the float into d16 // floats in s0-7 and doubles in the d0-7 registers. fadd d0, d0, d16 // add in the current array element, return floating point value in d0 add x0, x0, #8 // move to next array element subs x1, x1, #1 // count down cbnz w1, next // if not done counting, continue #endif done: ret


No comments: