changeset 480:cc78e34958e5

- arm64 doc additions w/ respect to aggregates, as well as fbsd and win disas examples
author Tassilo Philipp
date Tue, 01 Mar 2022 21:02:10 +0100
parents a55506bf924e
children 0fc22b5feac7
files doc/disas_examples/arm64.aapcs.disas doc/manual/callconvs/callconv_arm64.tex
diffstat 2 files changed, 1218 insertions(+), 28 deletions(-) [+]
line wrap: on
line diff
--- a/doc/disas_examples/arm64.aapcs.disas	Tue Mar 01 09:29:21 2022 +0100
+++ b/doc/disas_examples/arm64.aapcs.disas	Tue Mar 01 21:02:10 2022 +0100
@@ -1,16 +1,16 @@
 ; #include <stdlib.h>
-; 
+;
 ; void leaf_call(int b, int c, int d, int e, int f, int g, int h)
 ; {
 ; }
-; 
+;
 ; void nonleaf_call(int a, int b, int c, int d, int e, int f, int g, int h)
 ; {
 ; 	/* use some local data */
 ; 	*(char*)alloca(220) = 'L';
 ; 	leaf_call(b, c, d, e, f, g, h);
 ; }
-; 
+;
 ; int main()
 ; {
 ; 	nonleaf_call(0, 1, 2, 3, 4, 5, 6, 7);
@@ -87,18 +87,18 @@
 ; ---------- same with more args so stack is also used ---------->
 
 ; #include <stdlib.h>
-; 
+;
 ; void leaf_call(int b, int c, int d, int e, int f, int g, int h, int i, int j)
 ; {
 ; }
-; 
+;
 ; void nonleaf_call(int a, int b, int c, int d, int e, int f, int g, int h, int i, int j)
 ; {
 ;     /* use some local data */
 ;     *(char*)alloca(220) = 'L';
 ;     leaf_call(b, c, d, e, f, g, h, i, j);
 ; }
-; 
+;
 ; int main()
 ; {
 ;     nonleaf_call(0, 1, 2, 3, 4, 5, 6, 7, 8, 9);
@@ -177,7 +177,7 @@
       f0:       bf c3 1f b8     stur    wzr, [x29, #-4]      ; unsure... store a zero in local area@@@
       f4:       e0 03 08 2a     mov     w0, w8               ; arg 0 (= 0 set in w8, above)
       f8:       eb 03 00 91     mov     x11, sp              ; use sp in x11 (why?), to ...
-      fc:       69 01 00 b9     str     w9, [x11]            ; ... place arg 8 on top of stack 
+      fc:       69 01 00 b9     str     w9, [x11]            ; ... place arg 8 on top of stack
      100:       eb 03 00 91     mov     x11, sp              ; use sp in x11 (why?), to ... (set again, pointlessly)
      104:       6a 09 00 b9     str     w10, [x11, #8]       ; ... place arg 9 on stack (next to arg 8)
      108:       a8 83 1f b8     stur    w8, [x29, #-8]       ; temp storing 0 in local area @@@ why?
@@ -193,11 +193,11 @@
 
 ; #include <stdlib.h>
 ; #include <stdarg.h>
-; 
+;
 ; void leaf_call(int b, int c, int d, int e, int f, int g, int h, int i, int j)
 ; {
 ; }
-; 
+;
 ; void nonleaf_call(int a, ...)
 ; {
 ;     int b,c,d,e,f,g,h,i,j;
@@ -212,12 +212,12 @@
 ;     h = va_arg(ap, int);
 ;     i = va_arg(ap, int);
 ;     j = va_arg(ap, int);
-; 
+;
 ;     /* use some local data */
 ;     *(char*)alloca(220) = 'L';
 ;     leaf_call(b, c, d, e, f, g, h, i, j);
 ; }
-; 
+;
 ; int main()
 ; {
 ;     nonleaf_call(0, 1, 2, 3, 4, 5, 6, 7, 8, 9);
@@ -595,11 +595,11 @@
 
 ; #include <stdlib.h>
 ; #include <stdarg.h>
-; 
+;
 ; void leaf_call(int b, int c, int d, int e, float f, int g, int h, int i, int j)
 ; {
 ; }
-; 
+;
 ; void nonleaf_call(int a, ...)
 ; {
 ;     int b,c,d,e,g,h,i,j;
@@ -615,12 +615,12 @@
 ;     h = va_arg(ap, int);
 ;     i = va_arg(ap, int);
 ;     j = va_arg(ap, int);
-; 
+;
 ;     /* use some local data */
 ;     *(char*)alloca(220) = 'L';
 ;     leaf_call(b, c, d, e, f, g, h, i, j);
 ; }
-; 
+;
 ; int main()
 ; {
 ;     nonleaf_call(0, 1, 2, 3, 4, 5.f, 6, 7, 8, 9);
@@ -986,5 +986,1186 @@
      56c:       ff 83 00 91     add     sp, sp, #32
      570:       c0 03 5f d6     ret
 
+
+
+; ---------- structs by value ---------->
+;
+; struct A { int i, j; long long l; };
+;
+; void leaf_call(int b, int c, int d, int e, struct A f, int g, int h)
+; {
+; }
+;
+; void nonleaf_call(int a, int b, int c, int d, int e, struct A f, int g, int h)
+; {
+;     /* use some local data */
+;     char l[100] ={ 'L'};
+;     leaf_call(b, c, d, e, f, g, h);
+; }
+;
+; int main()
+; {
+;     nonleaf_call(0, 1, 2, 3, 4, (struct A){5, 6, 7ll}, 8, 9);
+;     return 0;
+; }
+
+
+
+; output from freebsd-13.0_r348764-arm64 w/ clang 8.0.0
+
+0000000000000000 leaf_call:
+       0:       ff c3 00 d1     sub     sp, sp, #48
+       4:       e4 13 00 f9     str     x4, [sp, #32]
+       8:       e5 17 00 f9     str     x5, [sp, #40]
+       c:       e0 1f 00 b9     str     w0, [sp, #28]
+      10:       e1 1b 00 b9     str     w1, [sp, #24]
+      14:       e2 17 00 b9     str     w2, [sp, #20]
+      18:       e3 13 00 b9     str     w3, [sp, #16]
+      1c:       e6 0f 00 b9     str     w6, [sp, #12]
+      20:       e7 0b 00 b9     str     w7, [sp, #8]
+      24:       ff c3 00 91     add     sp, sp, #48
+      28:       c0 03 5f d6     ret
+
+000000000000002c nonleaf_call:
+      2c:       ff c3 02 d1     sub     sp, sp, #176
+      30:       fd 7b 0a a9     stp     x29, x30, [sp, #160]
+      34:       fd 83 02 91     add     x29, sp, #160
+      38:       a8 13 40 b9     ldr     w8, [x29, #16]
+      3c:       09 00 80 52     mov     w9, #0
+      40:       8a 0c 80 d2     mov     x10, #100
+      44:       8b 09 80 52     mov     w11, #76
+      48:       ec 43 00 91     add     x12, sp, #16
+      4c:       a5 03 1f f8     stur    x5, [x29, #-16]
+      50:       a6 83 1f f8     stur    x6, [x29, #-8]
+      54:       a0 c3 1e b8     stur    w0, [x29, #-20]
+      58:       a1 83 1e b8     stur    w1, [x29, #-24]
+      5c:       a2 43 1e b8     stur    w2, [x29, #-28]
+      60:       a3 03 1e b8     stur    w3, [x29, #-32]
+      64:       a4 c3 1d b8     stur    w4, [x29, #-36]
+      68:       a7 83 1d b8     stur    w7, [x29, #-40]
+      6c:       a8 43 1d b8     stur    w8, [x29, #-44]
+      70:       e0 03 0c aa     mov     x0, x12
+      74:       e1 03 09 2a     mov     w1, w9
+      78:       e2 03 0a aa     mov     x2, x10
+      7c:       eb 0f 00 b9     str     w11, [sp, #12]
+      80:       ec 03 00 f9     str     x12, [sp]
+      84:       00 00 00 94     bl      #0 <nonleaf_call+0x58>
+      88:       e8 0f 40 b9     ldr     w8, [sp, #12]
+      8c:       ea 03 40 f9     ldr     x10, [sp]
+      90:       48 01 00 39     strb    w8, [x10]
+      94:       a0 83 5e b8     ldur    w0, [x29, #-24]
+      98:       a1 43 5e b8     ldur    w1, [x29, #-28]
+      9c:       a2 03 5e b8     ldur    w2, [x29, #-32]
+      a0:       a3 c3 5d b8     ldur    w3, [x29, #-36]
+      a4:       a6 83 5d b8     ldur    w6, [x29, #-40]
+      a8:       a7 43 5d b8     ldur    w7, [x29, #-44]
+      ac:       ac 03 5f f8     ldur    x12, [x29, #-16]
+      b0:       a5 83 5f f8     ldur    x5, [x29, #-8]
+      b4:       e4 03 0c aa     mov     x4, x12
+      b8:       d2 ff ff 97     bl      #-184 <leaf_call>
+      bc:       fd 7b 4a a9     ldp     x29, x30, [sp, #160]
+      c0:       ff c3 02 91     add     sp, sp, #176
+      c4:       c0 03 5f d6     ret
+
+00000000000000c8 main:
+      c8:       ff 03 01 d1     sub     sp, sp, #64          ; |
+      cc:       fd 7b 03 a9     stp     x29, x30, [sp, #48]  ; | prolog
+      d0:       fd c3 00 91     add     x29, sp, #48         ; |
+      d4:       08 00 80 52     mov     w8, #0               ; prep arg 0
+      d8:       a9 00 80 52     mov     w9, #5               ; |                              i
+      dc:       ea 07 1f 32     orr     w10, wzr, #0x6       ; | prep local struct A's data   j
+      e0:       eb 0b 40 b2     orr     x11, xzr, #0x7       ; |                              l
+      e4:       e1 03 00 32     orr     w1, wzr, #0x1        ; arg 1
+      e8:       e2 03 1f 32     orr     w2, wzr, #0x2        ; arg 2
+      ec:       e3 07 00 32     orr     w3, wzr, #0x3        ; arg 3
+      f0:       e4 03 1e 32     orr     w4, wzr, #0x4        ; arg 4
+      f4:       e7 03 1d 32     orr     w7, wzr, #0x8        ; arg 6
+      f8:       2c 01 80 52     mov     w12, #9              ; prep arg 7
+      fc:       ed 63 00 91     add     x13, sp, #24         ; used for indirection below, a bit pointless
+     100:       bf c3 1f b8     stur    wzr, [x29, #-4]      ; zero 4 bytes in local area @@@ unsure why
+     104:       a9 01 00 b9     str     w9, [x13]            ; |
+     108:       ea 1f 00 b9     str     w10, [sp, #28]       ; | write struct A to local area
+     10c:       eb 13 00 f9     str     x11, [sp, #32]       ; |
+     110:       eb 0f 40 f9     ldr     x11, [sp, #24]       ; 1st dword of struct A -> x11
+     114:       ed 13 40 f9     ldr     x13, [sp, #32]       ; 2nd dword of struct A -> x13
+     118:       e0 03 08 2a     mov     w0, w8               ; arg 0
+     11c:       e5 03 0b aa     mov     x5, x11              ; |
+     120:       e6 03 0d aa     mov     x6, x13              ; / arg 5 (struct A), passed in regs as 2 dwords
+     124:       eb 03 00 91     mov     x11, sp              ; \
+     128:       6c 01 00 b9     str     w12, [x11]           ; | arg 7, pushed onto stack
+     12c:       e8 17 00 b9     str     w8, [sp, #20]        ; prep return value, temp store on stack local area
+     130:       bf ff ff 97     bl      #-260 <nonleaf_call> ; return address -> r30/lr, and call
+     134:       e0 17 40 b9     ldr     w0, [sp, #20]        ; return value (unsure why not just using immediate @@@)
+     138:       fd 7b 43 a9     ldp     x29, x30, [sp, #48]  ; |
+     13c:       ff 03 01 91     add     sp, sp, #64          ; | epilog
+     140:       c0 03 5f d6     ret                          ; |
+
+
+
+; output from godbolt compiler explorer w/ msvc 19.14
+
+|leaf_call| PROC
+|$LN3|
+        sub         sp,sp,#0x30
+        str         w0,[sp]
+        str         w1,[sp,#4]
+        str         w2,[sp,#8]
+        str         w3,[sp,#0xC]
+        str         x4,[sp,#0x18]
+        str         x5,[sp,#0x20]
+        str         w6,[sp,#0x10]
+        str         w7,[sp,#0x14]
+        add         sp,sp,#0x30
+        ret
+
+        ENDP  ; |leaf_call|
+
+|nonleaf_call| PROC
+|$LN3|
+        stp         fp,lr,[sp,#-0x10]!
+        mov         fp,sp
+        bl          __security_push_cookie
+        sub         sp,sp,#0x90
+        str         w0,[sp,#0x14]
+        str         w1,[sp,#0x10]
+        str         w2,[sp,#0xC]
+        str         w3,[sp,#8]
+        str         w4,[sp,#4]
+        str         x5,[sp,#0x18]
+        str         x6,[sp,#0x20]
+        str         w7,[sp]
+        mov         w8,#0x4C
+        strb        w8,[sp,#0x28]
+        add         x9,sp,#0x29
+        mov         x8,#0
+        stp         x8,x8,[x9]
+        stp         x8,x8,[x9,#0x10]
+        stp         x8,x8,[x9,#0x20]
+        stp         x8,x8,[x9,#0x30]
+        stp         x8,x8,[x9,#0x40]
+        stp         x8,x8,[x9,#0x50]
+        strh        w8,[x9,#0x60]
+        strb        w8,[x9,#0x62]
+        ldr         w7,[sp,#0xB0]
+        ldr         w6,[sp]
+        ldr         x5,[sp,#0x20]
+        ldr         x4,[sp,#0x18]
+        ldr         w3,[sp,#4]
+        ldr         w2,[sp,#8]
+        ldr         w1,[sp,#0xC]
+        ldr         w0,[sp,#0x10]
+        bl          leaf_call
+        add         sp,sp,#0x90
+        bl          __security_pop_cookie
+        ldp         fp,lr,[sp],#0x10
+        ret
+
+        ENDP  ; |nonleaf_call|
+
+|main|  PROC
+|$LN3|
+        stp         fp,lr,[sp,#-0x10]!     ; |
+        mov         fp,sp                  ; |
+        bl          __security_push_cookie ; | prolog
+        sub         sp,sp,#0x20            ; /
+        mov         w8,#5                  ; \
+        str         w8,[sp,#0x10]          ; |                               i
+        mov         w8,#6                  ; |
+        str         w8,[sp,#0x14]          ; | write struct A to local area  j
+        mov         x8,#7                  ; |
+        str         x8,[sp,#0x18]          ; /                               l
+        mov         w8,#9                  ; \
+        str         w8,[sp]                ; | arg 7, pushed onto stack
+        mov         w7,#8                  ; arg 6
+        ldr         x6,[sp,#0x18]          ; |                                                  l
+        ldr         x5,[sp,#0x10]          ; | arg 5 (struct A), passed in regs as 2 dwords     i, j
+        mov         w4,#4                  ; arg 4
+        mov         w3,#3                  ; arg 3
+        mov         w2,#2                  ; arg 2
+        mov         w1,#1                  ; arg 1
+        mov         w0,#0                  ; arg 0
+        bl          nonleaf_call           ; return address -> r30/lr, and call
+        mov         w0,#0                  ; return value
+        add         sp,sp,#0x20            ; |
+        bl          __security_pop_cookie  ; | epilog
+        ldp         fp,lr,[sp],#0x10       ; |
+        ret                                ; |
+
+        ENDP  ; |main|
+
+
+
+; ---------- structs by value, complex example (multiple structs) ---------->
+;
+; struct A { int i, j; float f; };
+; struct B { double d; long long l; };
+;
+; void leaf_call(int b, struct A c, struct B d, int e, int f, struct A g, struct B h, int i, int j)
+; {
+; }
+;
+; void nonleaf_call(int a, int b, struct A c, struct B d, int e, int f, struct A g, struct B h, int i, int j)
+; {
+;     /* use some local data */
+;     char l[100] ={ 'L'};
+;     leaf_call(b, c, d, e, f, g, h, i, j);
+; }
+;
+; int main()
+; {
+;     nonleaf_call(0, 1, (struct A){2, 3, 4.f}, (struct B){5., 6ll}, 7, 8, (struct A){9, 10, 11.f}, (struct B){12., 13ll}, 14, 15);
+;     return 0;
+; }
+
+
+
+; output from freebsd-13.0_r348764-arm64 w/ clang 8.0.0
+
+0000000000000000 leaf_call:
+       0:       ff 83 03 d1     sub     sp, sp, #224
+       4:       fe 6b 00 f9     str     x30, [sp, #208]
+       8:       e8 73 40 f9     ldr     x8, [sp, #224]
+       c:       e9 77 40 f9     ldr     x9, [sp, #232]
+      10:       ea 7b 40 f9     ldr     x10, [sp, #240]
+      14:       eb 7f 40 f9     ldr     x11, [sp, #248]
+      18:       ec 03 41 b9     ldr     w12, [sp, #256]
+      1c:       ed 0b 41 b9     ldr     w13, [sp, #264]
+      20:       ee 07 7e b2     orr     x14, xzr, #0xc
+      24:       ef 13 03 91     add     x15, sp, #196
+      28:       f0 c3 02 91     add     x16, sp, #176
+      2c:       f1 53 02 91     add     x17, sp, #148
+      30:       f2 03 02 91     add     x18, sp, #128
+      34:       e1 5b 00 f9     str     x1, [sp, #176]
+      38:       e2 5f 00 f9     str     x2, [sp, #184]
+      3c:       e0 5b 00 b9     str     w0, [sp, #88]
+      40:       e0 03 0f aa     mov     x0, x15
+      44:       e1 03 10 aa     mov     x1, x16
+      48:       e2 03 0e aa     mov     x2, x14
+      4c:       e5 57 00 b9     str     w5, [sp, #84]
+      50:       e6 53 00 b9     str     w6, [sp, #80]
+      54:       e8 27 00 f9     str     x8, [sp, #72]
+      58:       e9 23 00 f9     str     x9, [sp, #64]
+      5c:       ea 1f 00 f9     str     x10, [sp, #56]
+      60:       eb 1b 00 f9     str     x11, [sp, #48]
+      64:       ec 2f 00 b9     str     w12, [sp, #44]
+      68:       ed 2b 00 b9     str     w13, [sp, #40]
+      6c:       e3 13 00 f9     str     x3, [sp, #32]
+      70:       e4 0f 00 f9     str     x4, [sp, #24]
+      74:       ee 0b 00 f9     str     x14, [sp, #16]
+      78:       f1 07 00 f9     str     x17, [sp, #8]
+      7c:       f2 03 00 f9     str     x18, [sp]
+      80:       00 00 00 94     bl      #0 <leaf_call+0x80>
+      84:       e8 13 40 f9     ldr     x8, [sp, #32]
+      88:       e8 53 00 f9     str     x8, [sp, #160]
+      8c:       e9 0f 40 f9     ldr     x9, [sp, #24]
+      90:       e9 57 00 f9     str     x9, [sp, #168]
+      94:       ea 27 40 f9     ldr     x10, [sp, #72]
+      98:       ea 43 00 f9     str     x10, [sp, #128]
+      9c:       eb 23 40 f9     ldr     x11, [sp, #64]
+      a0:       eb 47 00 f9     str     x11, [sp, #136]
+      a4:       e0 07 40 f9     ldr     x0, [sp, #8]
+      a8:       e1 03 40 f9     ldr     x1, [sp]
+      ac:       e2 0b 40 f9     ldr     x2, [sp, #16]
+      b0:       00 00 00 94     bl      #0 <leaf_call+0xb0>
+      b4:       e8 1f 40 f9     ldr     x8, [sp, #56]
+      b8:       e8 3b 00 f9     str     x8, [sp, #112]
+      bc:       e9 1b 40 f9     ldr     x9, [sp, #48]
+      c0:       e9 3f 00 f9     str     x9, [sp, #120]
+      c4:       ec 5b 40 b9     ldr     w12, [sp, #88]
+      c8:       ec 6f 00 b9     str     w12, [sp, #108]
+      cc:       ed 57 40 b9     ldr     w13, [sp, #84]
+      d0:       ed 6b 00 b9     str     w13, [sp, #104]
+      d4:       e5 53 40 b9     ldr     w5, [sp, #80]
+      d8:       e5 67 00 b9     str     w5, [sp, #100]
+      dc:       e6 2f 40 b9     ldr     w6, [sp, #44]
+      e0:       e6 63 00 b9     str     w6, [sp, #96]
+      e4:       e7 2b 40 b9     ldr     w7, [sp, #40]
+      e8:       e7 5f 00 b9     str     w7, [sp, #92]
+      ec:       fe 6b 40 f9     ldr     x30, [sp, #208]
+      f0:       ff 83 03 91     add     sp, sp, #224
+      f4:       c0 03 5f d6     ret
+
+00000000000000f8 nonleaf_call:
+      f8:       fc 67 bb a9     stp     x28, x25, [sp, #-80]!
+      fc:       f8 5f 01 a9     stp     x24, x23, [sp, #16]
+     100:       f6 57 02 a9     stp     x22, x21, [sp, #32]
+     104:       f4 4f 03 a9     stp     x20, x19, [sp, #48]
+     108:       fd 7b 04 a9     stp     x29, x30, [sp, #64]
+     10c:       fd 03 01 91     add     x29, sp, #64
+     110:       ff 43 08 d1     sub     sp, sp, #528
+     114:       a8 0b 40 f9     ldr     x8, [x29, #16]
+     118:       a9 0f 40 f9     ldr     x9, [x29, #24]
+     11c:       aa 13 40 f9     ldr     x10, [x29, #32]
+     120:       ab 17 40 f9     ldr     x11, [x29, #40]
+     124:       ac 33 40 b9     ldr     w12, [x29, #48]
+     128:       ad 3b 40 b9     ldr     w13, [x29, #56]
+     12c:       ee 07 7e b2     orr     x14, xzr, #0xc
+     130:       0f 00 80 52     mov     w15, #0
+     134:       90 0c 80 d2     mov     x16, #100
+     138:       91 09 80 52     mov     w17, #76
+     13c:       b2 33 01 d1     sub     x18, x29, #76
+     140:       b3 83 01 d1     sub     x19, x29, #96
+     144:       b4 f3 01 d1     sub     x20, x29, #124
+     148:       b5 43 02 d1     sub     x21, x29, #144
+     14c:       f6 d3 04 91     add     x22, sp, #308
+     150:       f7 83 04 91     add     x23, sp, #288
+     154:       f8 43 04 91     add     x24, sp, #272
+     158:       a2 03 1a f8     stur    x2, [x29, #-96]
+     15c:       a3 83 1a f8     stur    x3, [x29, #-88]
+     160:       e0 0f 01 b9     str     w0, [sp, #268]
+     164:       e0 03 12 aa     mov     x0, x18
+     168:       e1 0b 01 b9     str     w1, [sp, #264]
+     16c:       e1 03 13 aa     mov     x1, x19
+     170:       e2 03 0e aa     mov     x2, x14
+     174:       e6 07 01 b9     str     w6, [sp, #260]
+     178:       e7 03 01 b9     str     w7, [sp, #256]
+     17c:       e8 7f 00 f9     str     x8, [sp, #248]
+     180:       e9 7b 00 f9     str     x9, [sp, #240]
+     184:       ea 77 00 f9     str     x10, [sp, #232]
+     188:       eb 73 00 f9     str     x11, [sp, #224]
+     18c:       ec df 00 b9     str     w12, [sp, #220]
+     190:       ed db 00 b9     str     w13, [sp, #216]
+     194:       e4 6b 00 f9     str     x4, [sp, #208]
+     198:       e5 67 00 f9     str     x5, [sp, #200]
+     19c:       ee 63 00 f9     str     x14, [sp, #192]
+     1a0:       ef bf 00 b9     str     w15, [sp, #188]
+     1a4:       f0 5b 00 f9     str     x16, [sp, #176]
+     1a8:       f1 af 00 b9     str     w17, [sp, #172]
+     1ac:       f2 53 00 f9     str     x18, [sp, #160]
+     1b0:       f4 4f 00 f9     str     x20, [sp, #152]
+     1b4:       f5 4b 00 f9     str     x21, [sp, #144]
+     1b8:       f6 47 00 f9     str     x22, [sp, #136]
+     1bc:       f7 43 00 f9     str     x23, [sp, #128]
+     1c0:       f8 3f 00 f9     str     x24, [sp, #120]
+     1c4:       00 00 00 94     bl      #0 <nonleaf_call+0xcc>
+     1c8:       e8 6b 40 f9     ldr     x8, [sp, #208]
+     1cc:       a8 03 19 f8     stur    x8, [x29, #-112]
+     1d0:       e9 67 40 f9     ldr     x9, [sp, #200]
+     1d4:       a9 83 19 f8     stur    x9, [x29, #-104]
+     1d8:       ea 7f 40 f9     ldr     x10, [sp, #248]
+     1dc:       aa 03 17 f8     stur    x10, [x29, #-144]
+     1e0:       eb 7b 40 f9     ldr     x11, [sp, #240]
+     1e4:       ab 83 17 f8     stur    x11, [x29, #-136]
+     1e8:       e0 4f 40 f9     ldr     x0, [sp, #152]
+     1ec:       e1 4b 40 f9     ldr     x1, [sp, #144]
+     1f0:       e2 63 40 f9     ldr     x2, [sp, #192]
+     1f4:       00 00 00 94     bl      #0 <nonleaf_call+0xfc>
+     1f8:       e8 77 40 f9     ldr     x8, [sp, #232]
+     1fc:       a8 03 16 f8     stur    x8, [x29, #-160]
+     200:       e9 73 40 f9     ldr     x9, [sp, #224]
+     204:       a9 83 16 f8     stur    x9, [x29, #-152]
+     208:       ec 0f 41 b9     ldr     w12, [sp, #268]
+     20c:       ac c3 15 b8     stur    w12, [x29, #-164]
+     210:       ed 0b 41 b9     ldr     w13, [sp, #264]
+     214:       ad 83 15 b8     stur    w13, [x29, #-168]
+     218:       ef 07 41 b9     ldr     w15, [sp, #260]
+     21c:       af 43 15 b8     stur    w15, [x29, #-172]
+     220:       f1 03 41 b9     ldr     w17, [sp, #256]
+     224:       b1 03 15 b8     stur    w17, [x29, #-176]
+     228:       e6 df 40 b9     ldr     w6, [sp, #220]
+     22c:       a6 c3 14 b8     stur    w6, [x29, #-180]
+     230:       e7 db 40 b9     ldr     w7, [sp, #216]
+     234:       a7 83 14 b8     stur    w7, [x29, #-184]
+     238:       e0 47 40 f9     ldr     x0, [sp, #136]
+     23c:       f9 bf 40 b9     ldr     w25, [sp, #188]
+     240:       e1 03 19 2a     mov     w1, w25
+     244:       e2 5b 40 f9     ldr     x2, [sp, #176]
+     248:       00 00 00 94     bl      #0 <nonleaf_call+0x150>
+     24c:       ec af 40 b9     ldr     w12, [sp, #172]
+     250:       e8 47 40 f9     ldr     x8, [sp, #136]
+     254:       0c 01 00 39     strb    w12, [x8]
+     258:       a0 83 55 b8     ldur    w0, [x29, #-168]
+     25c:       a5 43 55 b8     ldur    w5, [x29, #-172]
+     260:       a6 03 55 b8     ldur    w6, [x29, #-176]
+     264:       ad c3 54 b8     ldur    w13, [x29, #-180]
+     268:       af 83 54 b8     ldur    w15, [x29, #-184]
+     26c:       e9 43 40 f9     ldr     x9, [sp, #128]
+     270:       e0 77 00 b9     str     w0, [sp, #116]
+     274:       e0 03 09 aa     mov     x0, x9
+     278:       e1 53 40 f9     ldr     x1, [sp, #160]
+     27c:       e2 63 40 f9     ldr     x2, [sp, #192]
+     280:       e5 73 00 b9     str     w5, [sp, #112]
+     284:       e6 6f 00 b9     str     w6, [sp, #108]
+     288:       ed 6b 00 b9     str     w13, [sp, #104]
+     28c:       ef 67 00 b9     str     w15, [sp, #100]
+     290:       00 00 00 94     bl      #0 <nonleaf_call+0x198>
+     294:       e8 93 40 f9     ldr     x8, [sp, #288]
+     298:       e9 97 40 f9     ldr     x9, [sp, #296]
+     29c:       aa 03 59 f8     ldur    x10, [x29, #-112]
+     2a0:       ab 83 59 f8     ldur    x11, [x29, #-104]
+     2a4:       e0 3f 40 f9     ldr     x0, [sp, #120]
+     2a8:       e1 4f 40 f9     ldr     x1, [sp, #152]
+     2ac:       e2 63 40 f9     ldr     x2, [sp, #192]
+     2b0:       e8 2f 00 f9     str     x8, [sp, #88]
+     2b4:       e9 2b 00 f9     str     x9, [sp, #80]
+     2b8:       ea 27 00 f9     str     x10, [sp, #72]
+     2bc:       eb 23 00 f9     str     x11, [sp, #64]
+     2c0:       00 00 00 94     bl      #0 <nonleaf_call+0x1c8>
+     2c4:       e8 8b 40 f9     ldr     x8, [sp, #272]
+     2c8:       e9 8f 40 f9     ldr     x9, [sp, #280]
+     2cc:       aa 03 56 f8     ldur    x10, [x29, #-160]
+     2d0:       ab 83 56 f8     ldur    x11, [x29, #-152]
+     2d4:       ee 2f 40 f9     ldr     x14, [sp, #88]
+     2d8:       f0 2b 40 f9     ldr     x16, [sp, #80]
+     2dc:       f2 27 40 f9     ldr     x18, [sp, #72]
+     2e0:       e0 23 40 f9     ldr     x0, [sp, #64]
+     2e4:       ec 77 40 b9     ldr     w12, [sp, #116]
+     2e8:       e0 1f 00 f9     str     x0, [sp, #56]
+     2ec:       e0 03 0c 2a     mov     w0, w12
+     2f0:       e1 03 0e aa     mov     x1, x14
+     2f4:       e2 03 10 aa     mov     x2, x16
+     2f8:       e3 03 12 aa     mov     x3, x18
+     2fc:       e4 1f 40 f9     ldr     x4, [sp, #56]
+     300:       e5 73 40 b9     ldr     w5, [sp, #112]
+     304:       e6 6f 40 b9     ldr     w6, [sp, #108]
+     308:       ee 03 00 91     mov     x14, sp
+     30c:       c8 01 00 f9     str     x8, [x14]
+     310:       e8 03 00 91     mov     x8, sp
+     314:       09 05 00 f9     str     x9, [x8, #8]
+     318:       e8 03 00 91     mov     x8, sp
+     31c:       0a 09 00 f9     str     x10, [x8, #16]
+     320:       e8 03 00 91     mov     x8, sp
+     324:       0b 0d 00 f9     str     x11, [x8, #24]
+     328:       e8 03 00 91     mov     x8, sp
+     32c:       ed 6b 40 b9     ldr     w13, [sp, #104]
+     330:       0d 21 00 b9     str     w13, [x8, #32]
+     334:       e8 03 00 91     mov     x8, sp
+     338:       ef 67 40 b9     ldr     w15, [sp, #100]
+     33c:       0f 29 00 b9     str     w15, [x8, #40]
+     340:       30 ff ff 97     bl      #-832 <leaf_call>
+     344:       ff 43 08 91     add     sp, sp, #528
+     348:       fd 7b 44 a9     ldp     x29, x30, [sp, #64]
+     34c:       f4 4f 43 a9     ldp     x20, x19, [sp, #48]
+     350:       f6 57 42 a9     ldp     x22, x21, [sp, #32]
+     354:       f8 5f 41 a9     ldp     x24, x23, [sp, #16]
+     358:       fc 67 c5 a8     ldp     x28, x25, [sp], #80
+     35c:       c0 03 5f d6     ret
+
+0000000000000360 main:
+     360:       ff c3 04 d1     sub     sp, sp, #304
+     364:       fc 4f 11 a9     stp     x28, x19, [sp, #272]
+     368:       fd 7b 12 a9     stp     x29, x30, [sp, #288]
+     36c:       fd 83 04 91     add     x29, sp, #288
+     370:       08 00 80 52     mov     w8, #0
+     374:       e9 03 1f 32     orr     w9, wzr, #0x2
+     378:       ea 07 00 32     orr     w10, wzr, #0x3
+     37c:       0b 10 a8 52     mov     w11, #1082130432
+     380:       60 01 27 1e     fmov    s0, w11
+     384:       8c 02 e8 d2     mov     x12, #4617315517961601024
+     388:       81 01 67 9e     fmov    d1, x12
+     38c:       ec 07 7f b2     orr     x12, xzr, #0x6
+     390:       2b 01 80 52     mov     w11, #9
+     394:       4d 01 80 52     mov     w13, #10
+     398:       0e 26 a8 52     mov     w14, #1093664768
+     39c:       c2 01 27 1e     fmov    s2, w14
+     3a0:       0f 05 e8 d2     mov     x15, #4622945017495814144
+     3a4:       e3 01 67 9e     fmov    d3, x15
+     3a8:       af 01 80 d2     mov     x15, #13
+     3ac:       f0 07 7e b2     orr     x16, xzr, #0xc
+     3b0:       e1 03 00 32     orr     w1, wzr, #0x1
+     3b4:       e6 0b 00 32     orr     w6, wzr, #0x7
+     3b8:       e7 03 1d 32     orr     w7, wzr, #0x8
+     3bc:       ee 0b 1f 32     orr     w14, wzr, #0xe
+     3c0:       f1 0f 00 32     orr     w17, wzr, #0xf
+     3c4:       b2 83 00 d1     sub     x18, x29, #32
+     3c8:       a0 c3 00 d1     sub     x0, x29, #48
+     3cc:       a2 f3 00 d1     sub     x2, x29, #60
+     3d0:       a3 43 01 d1     sub     x3, x29, #80
+     3d4:       a4 83 01 d1     sub     x4, x29, #96
+     3d8:       a5 c3 01 d1     sub     x5, x29, #112
+     3dc:       bf c3 1e b8     stur    wzr, [x29, #-20]
+     3e0:       49 02 00 b9     str     w9, [x18]
+     3e4:       aa 43 1e b8     stur    w10, [x29, #-28]
+     3e8:       a0 83 1e bc     stur    s0, [x29, #-24]
+     3ec:       01 00 00 fd     str     d1, [x0]
+     3f0:       ac 83 1d f8     stur    x12, [x29, #-40]
+     3f4:       4b 00 00 b9     str     w11, [x2]
+     3f8:       ad 83 1c b8     stur    w13, [x29, #-56]
+     3fc:       a2 c3 1c bc     stur    s2, [x29, #-52]
+     400:       63 00 00 fd     str     d3, [x3]
+     404:       af 83 1b f8     stur    x15, [x29, #-72]
+     408:       e0 03 04 aa     mov     x0, x4
+     40c:       a1 c3 18 b8     stur    w1, [x29, #-116]
+     410:       e1 03 12 aa     mov     x1, x18
+     414:       a2 03 18 f8     stur    x2, [x29, #-128]
+     418:       e2 03 10 aa     mov     x2, x16
+     41c:       a8 c3 17 b8     stur    w8, [x29, #-132]
+     420:       f0 4b 00 f9     str     x16, [sp, #144]
+     424:       e6 8f 00 b9     str     w6, [sp, #140]
+     428:       e7 8b 00 b9     str     w7, [sp, #136]
+     42c:       ee 87 00 b9     str     w14, [sp, #132]
+     430:       f1 83 00 b9     str     w17, [sp, #128]
+     434:       e5 3f 00 f9     str     x5, [sp, #120]
+     438:       00 00 00 94     bl      #0 <main+0xd8>
+     43c:       ac 03 5a f8     ldur    x12, [x29, #-96]
+     440:       af 83 5a f8     ldur    x15, [x29, #-88]
+     444:       b0 03 5d f8     ldur    x16, [x29, #-48]
+     448:       b2 83 5d f8     ldur    x18, [x29, #-40]
+     44c:       e0 3f 40 f9     ldr     x0, [sp, #120]
+     450:       a1 03 58 f8     ldur    x1, [x29, #-128]
+     454:       e2 4b 40 f9     ldr     x2, [sp, #144]
+     458:       ec 3b 00 f9     str     x12, [sp, #112]
+     45c:       ef 37 00 f9     str     x15, [sp, #104]
+     460:       f0 33 00 f9     str     x16, [sp, #96]
+     464:       f2 2f 00 f9     str     x18, [sp, #88]
+     468:       00 00 00 94     bl      #0 <main+0x108>
+     46c:       ac 03 59 f8     ldur    x12, [x29, #-112]
+     470:       af 83 59 f8     ldur    x15, [x29, #-104]
+     474:       b0 03 5b f8     ldur    x16, [x29, #-80]
+     478:       b2 83 5b f8     ldur    x18, [x29, #-72]
+     47c:       e0 3b 40 f9     ldr     x0, [sp, #112]
+     480:       e1 37 40 f9     ldr     x1, [sp, #104]
+     484:       e2 33 40 f9     ldr     x2, [sp, #96]
+     488:       e3 2f 40 f9     ldr     x3, [sp, #88]
+     48c:       a8 c3 57 b8     ldur    w8, [x29, #-132]
+     490:       e0 2b 00 f9     str     x0, [sp, #80]
+     494:       e0 03 08 2a     mov     w0, w8
+     498:       a9 c3 58 b8     ldur    w9, [x29, #-116]
+     49c:       e1 27 00 f9     str     x1, [sp, #72]
+     4a0:       e1 03 09 2a     mov     w1, w9
+     4a4:       e4 2b 40 f9     ldr     x4, [sp, #80]
+     4a8:       e2 23 00 f9     str     x2, [sp, #64]
+     4ac:       e2 03 04 aa     mov     x2, x4
+     4b0:       e5 27 40 f9     ldr     x5, [sp, #72]
+     4b4:       e3 1f 00 f9     str     x3, [sp, #56]
+     4b8:       e3 03 05 aa     mov     x3, x5
+     4bc:       e4 23 40 f9     ldr     x4, [sp, #64]
+     4c0:       e5 1f 40 f9     ldr     x5, [sp, #56]
+     4c4:       e6 8f 40 b9     ldr     w6, [sp, #140]
+     4c8:       e7 8b 40 b9     ldr     w7, [sp, #136]
+     4cc:       f3 03 00 91     mov     x19, sp
+     4d0:       6c 02 00 f9     str     x12, [x19]
+     4d4:       ec 03 00 91     mov     x12, sp
+     4d8:       8f 05 00 f9     str     x15, [x12, #8]
+     4dc:       ec 03 00 91     mov     x12, sp
+     4e0:       90 09 00 f9     str     x16, [x12, #16]
+     4e4:       ec 03 00 91     mov     x12, sp
+     4e8:       92 0d 00 f9     str     x18, [x12, #24]
+     4ec:       ec 03 00 91     mov     x12, sp
+     4f0:       ea 87 40 b9     ldr     w10, [sp, #132]
+     4f4:       8a 21 00 b9     str     w10, [x12, #32]
+     4f8:       ec 03 00 91     mov     x12, sp
+     4fc:       eb 83 40 b9     ldr     w11, [sp, #128]
+     500:       8b 29 00 b9     str     w11, [x12, #40]
+     504:       fd fe ff 97     bl      #-1036 <nonleaf_call>
+     508:       a0 c3 57 b8     ldur    w0, [x29, #-132]
+     50c:       fd 7b 52 a9     ldp     x29, x30, [sp, #288]
+     510:       fc 4f 51 a9     ldp     x28, x19, [sp, #272]
+     514:       ff c3 04 91     add     sp, sp, #304
+     518:       c0 03 5f d6     ret
+
+
+
+; ---------- returning long long ---------->
+;
+; long long f()
+; {
+;     return 7171LL;
+; }
+;
+; int main()
+; {
+;     return (int)f();
+; }
+
+
+
+; output from freebsd-13.0_r348764-arm64 w/ clang 8.0.0
+
+0000000000000000 f:
+       0:       60 80 83 d2     mov     x0, #7171
+       4:       c0 03 5f d6     ret
+
+0000000000000008 main:
+       8:       ff 83 00 d1     sub     sp, sp, #32
+       c:       fd 7b 01 a9     stp     x29, x30, [sp, #16]
+      10:       fd 43 00 91     add     x29, sp, #16
+      14:       bf c3 1f b8     stur    wzr, [x29, #-4]
+      18:       fa ff ff 97     bl      #-24 <f>
+      1c:       e8 03 00 2a     mov     w8, w0
+      20:       e0 03 08 2a     mov     w0, w8
+      24:       fd 7b 41 a9     ldp     x29, x30, [sp, #16]
+      28:       ff 83 00 91     add     sp, sp, #32
+      2c:       c0 03 5f d6     ret
+
+
+
+; ---------- passing structs with only fp parts ---------->
+;
+; struct A { float a; };
+; struct B { float a, b; };
+; struct C { float a, b, c; };
+; struct D { double a; };
+; struct E { double a, b; };
+; struct F { double a, b, c; };
+;
+; void leaf_call(struct A a, struct B b, struct C c, struct D d, struct E e, struct F f)
+; {
+; }
+;
+; int main()
+; {
+;     leaf_call((struct A){1.f}, (struct B){2.f,3.f}, (struct C){4.f,5.f,6.f}, (struct D){1.}, (struct E){2.,3.}, (struct F){4.,5.,6.});
+;     return 0;
+; }
+
+
+
+; output from freebsd-13.0_r348764-arm64 w/ clang 8.0.0
+
+0000000000000000 leaf_call:
+       0:       ff 43 01 d1     sub     sp, sp, #80
+       4:       e7 33 40 fd     ldr     d7, [sp, #96]
+       8:       f0 37 40 fd     ldr     d16, [sp, #104]
+       c:       f1 3b 40 fd     ldr     d17, [sp, #112]
+      10:       f2 2b 40 fd     ldr     d18, [sp, #80]
+      14:       f3 2f 40 fd     ldr     d19, [sp, #88]
+      18:       e0 4b 00 bd     str     s0, [sp, #72]
+      1c:       e2 47 00 bd     str     s2, [sp, #68]
+      20:       e1 43 00 bd     str     s1, [sp, #64]
+      24:       e5 3b 00 bd     str     s5, [sp, #56]
+      28:       e4 37 00 bd     str     s4, [sp, #52]
+      2c:       e3 33 00 bd     str     s3, [sp, #48]
+      30:       e6 17 00 fd     str     d6, [sp, #40]
+      34:       f3 13 00 fd     str     d19, [sp, #32]
+      38:       f2 0f 00 fd     str     d18, [sp, #24]
+      3c:       f1 0b 00 fd     str     d17, [sp, #16]
+      40:       f0 07 00 fd     str     d16, [sp, #8]
+      44:       e7 03 00 fd     str     d7, [sp]
+      48:       ff 43 01 91     add     sp, sp, #80
+      4c:       c0 03 5f d6     ret
+
+0000000000000050 main:
+      50:       ff 43 02 d1     sub     sp, sp, #144                  ; |
+      54:       fd 7b 08 a9     stp     x29, x30, [sp, #128]          ; | prolog
+      58:       fd 03 02 91     add     x29, sp, #128                 ; /
+      5c:       e8 03 1f 2a     mov     w8, wzr                       ; \                 |  |
+      60:       a8 c3 1f b8     stur    w8, [x29, #-4]                ; |                 |  | zero 4 bytes in local area @@@ struct padding?
+      64:       e9 1b 09 32     orr     w9, wzr, #0x3f800000          ; | struct A   1.f  |
+      68:       a9 83 1f b8     stur    w9, [x29, #-8]                ; /                 |
+      6c:       e9 03 02 32     orr     w9, wzr, #0x40000000          ; \            2.f  |
+      70:       a9 03 1f b8     stur    w9, [x29, #-16]               ; |                 |
+      74:       09 08 a8 52     mov     w9, #1077936128               ; | struct B   3.f  |
+      78:       a9 43 1f b8     stur    w9, [x29, #-12]               ; /                 |
+      7c:       09 10 a8 52     mov     w9, #1082130432               ; \            4.f  |
+      80:       a9 03 1e b8     stur    w9, [x29, #-32]               ; |                 |
+      84:       09 14 a8 52     mov     w9, #1084227584               ; |            5.f  |
+      88:       a9 43 1e b8     stur    w9, [x29, #-28]               ; | struct C        | write local struct data
+      8c:       09 18 a8 52     mov     w9, #1086324736               ; |            6.f  |
+      90:       a9 83 1e b8     stur    w9, [x29, #-24]               ; /                 |
+      94:       ea 27 4c b2     orr     x10, xzr, #0x3ff0000000000000 ; \            1.0  |
+      98:       aa 83 1d f8     stur    x10, [x29, #-40]              ; / struct D        |
+      9c:       ea 03 42 b2     orr     x10, xzr, #0x4000000000000000 ; \            2.0  |
+      a0:       aa 83 1c f8     stur    x10, [x29, #-56]              ; |                 |
+      a4:       0a 01 e8 d2     mov     x10, #4613937818241073152     ; | struct E   3.0  |
+      a8:       aa 03 1d f8     stur    x10, [x29, #-48]              ; /                 |
+      ac:       0a 02 e8 d2     mov     x10, #4616189618054758400     ; \            4.0  |
+      b0:       ea 1b 00 f9     str     x10, [sp, #48]                ; |                 |
+      b4:       8a 02 e8 d2     mov     x10, #4617315517961601024     ; | struct F   5.0  |
+      b8:       ea 1f 00 f9     str     x10, [sp, #56]                ; |                 |
+      bc:       0a 03 e8 d2     mov     x10, #4618441417868443648     ; |            6.0  |
+      c0:       ea 23 00 f9     str     x10, [sp, #64]                ; /                 |
+      c4:       a0 83 5f bc     ldur    s0, [x29, #-8]                ; arg 0 (struct A), via reg
+      c8:       a2 43 5f bc     ldur    s2, [x29, #-12]               ; |
+      cc:       a1 03 5f bc     ldur    s1, [x29, #-16]               ; / arg 1 (struct B), via reg
+      d0:       a5 83 5e bc     ldur    s5, [x29, #-24]               ; \
+      d4:       a4 43 5e bc     ldur    s4, [x29, #-28]               ; | arg 2 (struct C), via reg
+      d8:       a3 03 5e bc     ldur    s3, [x29, #-32]               ; /
+      dc:       a6 83 5d fc     ldur    d6, [x29, #-40]               ; arg 3 (struct D)
+      e0:       a7 83 5c fc     ldur    d7, [x29, #-56]               ; |
+      e4:       b0 03 5d fc     ldur    d16, [x29, #-48]              ; / prep arg 4 (struct E), note d7 is *not* used to pass, but just a scratch reg
+      e8:       f1 1b 40 fd     ldr     d17, [sp, #48]                ; \
+      ec:       f2 1f 40 fd     ldr     d18, [sp, #56]                ; | prep arg 5 (struct F)
+      f0:       f3 23 40 fd     ldr     d19, [sp, #64]                ; /
+      f4:       ea 03 00 91     mov     x10, sp                       ; sp in x10, for below indirections (a bit pointless)
+      f8:       53 11 00 fd     str     d19, [x10, #32]               ; \
+      fc:       52 0d 00 fd     str     d18, [x10, #24]               ; | arg 5 (struct F), via stack
+     100:       51 09 00 fd     str     d17, [x10, #16]               ; /
+     104:       50 05 00 fd     str     d16, [x10, #8]                ; \ arg 4 (struct E), via stack (not split across regs and stack)
+     108:       47 01 00 fd     str     d7, [x10]                     ; |
+     10c:       e8 2f 00 b9     str     w8, [sp, #44]                 ; prep return value, temp store on stack local area
+     110:       bc ff ff 97     bl      #-272 <leaf_call>             ; return address -> r30/lr, and call
+     114:       e0 2f 40 b9     ldr     w0, [sp, #44]                 ; return value (unsure why not just using immediate @@@)
+     118:       fd 7b 48 a9     ldp     x29, x30, [sp, #128]          ; |
+     11c:       ff 43 02 91     add     sp, sp, #144                  ; | epilog
+     120:       c0 03 5f d6     ret                                   ; |
+
+
+
+; output from godbolt compiler explorer w/ msvc 19.14
+
+|leaf_call| PROC
+|$LN3|
+        sub         sp,sp,#0x30
+        str         s0,[sp]
+        str         s1,[sp,#8]
+        str         s2,[sp,#0xC]
+        str         s3,[sp,#0x18]
+        str         s4,[sp,#0x1C]
+        str         s5,[sp,#0x20]
+        str         d6,[sp,#0x10]
+        add         sp,sp,#0x30
+        ret
+
+        ENDP  ; |leaf_call|
+
+|main|  PROC
+|$LN3|
+        stp         fp,lr,[sp,#-0x60]! ; |
+        mov         fp,sp              ; | prolog
+        sub         sp,sp,#0x30        ; /
+        fmov        s16,#1             ; \ struct A   1.f  |
+        str         s16,[sp,#0x40]     ; /                 |
+        fmov        s16,#2             ; \            2.f  |
+        str         s16,[sp,#0x48]     ; |                 |
+        fmov        s16,#3             ; | struct B   3.f  |
+        str         s16,[sp,#0x4C]     ; /                 |
+        fmov        s16,#4             ; \            4.f  |
+        str         s16,[sp,#0x58]     ; |                 |
+        fmov        s16,#5             ; |            5.f  |
+        str         s16,[sp,#0x5C]     ; | struct C        |
+        fmov        s16,#6             ; |            6.f  |
+        str         s16,[sp,#0x60]     ; /                 | write local struct data
+        fmov        d16,#1             ; \            1.0  |
+        str         d16,[sp,#0x50]     ; / struct D        |
+        fmov        d16,#2             ; \            2.0  |
+        str         d16,[sp,#0x68]     ; |                 |
+        fmov        d16,#3             ; | struct E   3.0  |
+        str         d16,[sp,#0x70]     ; /                 |
+        fmov        d16,#4             ; \            4.0  |
+        str         d16,[sp,#0x78]     ; |                 |
+        fmov        d16,#5             ; | struct F   5.0  |
+        str         d16,[sp,#0x80]     ; |                 |
+        fmov        d16,#6             ; |            6.0  |
+        str         d16,[sp,#0x88]     ; /                 |
+        mov         x10,sp             ; write ptr (top of stack)
+        add         x8,sp,#0x68        ; read ptr (beg of struct E)
+        ldp         x9,x8,[x8]         ; \
+        stp         x9,x8,[x10]        ; / arg 4 (struct E), via stack (not split across regs and stack)
+        add         x11,sp,#0x10       ; write ptr (top of stack + 16)
+        add         x10,sp,#0x78       ; read ptr (beg of struct F)
+        ldp         x9,x8,[x10]        ; \
+        stp         x9,x8,[x11]        ; | arg 5 (struct F), via stack
+        ldr         x8,[x10,#0x10]     ; |
+        str         x8,[x11,#0x10]     ; /
+        ldr         d6,[sp,#0x50]      ; arg 3 (struct D), via reg
+        ldr         s5,[sp,#0x60]      ; \
+        ldr         s4,[sp,#0x5C]      ; | arg 2 (struct C), via reg
+        ldr         s3,[sp,#0x58]      ; /
+        ldr         s2,[sp,#0x4C]      ; \
+        ldr         s1,[sp,#0x48]      ; / arg 1 (struct B), via reg
+        ldr         s0,[sp,#0x40]      ; arg 0 (struct A), via reg
+        bl          leaf_call          ; return address -> r30/lr, and call
+        mov         w0,#0              ; return value
+        add         sp,sp,#0x30        ; |
+        ldp         fp,lr,[sp],#0x60   ; | epilog
+        ret                            ; |
+
+        ENDP  ; |main|
+
+
+
+; ---------- returning structs by value ---------->
+;
+; struct Small { char x; };
+; struct Big { long long i,j,k,l; long m; }; /* bigger than 16b */
+;
+; struct Small f0()
+; {
+;     struct Small s = { 132 };
+;     return s;
+; }
+;
+; struct Big f1()
+; {
+;     struct Big b = { 7171LL, 99LL, -99LL, -3102LL, 32 };
+;     return b;
+; }
+;
+; int main()
+; {
+;     struct Small s = f0();
+;     struct Big b = f1();
+;     return b.j + b.k + b.m + s.x;
+; }
+
+
+
+; output from freebsd-13.0_r348764-arm64 w/ clang 8.0.0
+
+0000000000000000 f0:
+       0:       ff 83 00 d1     sub     sp, sp, #32         ; |
+       4:       fe 0b 00 f9     str     x30, [sp, #16]      ; | prolog
+       8:       08 00 00 90     adrp    x8, #0              ; compute addr to storage map (probably to #132)
+       c:       08 01 00 91     add     x8, x8, #0          ; addr offset (pointless)
+      10:       e2 03 40 b2     orr     x2, xzr, #0x1       ; @@@ unsure, #1 -> x2
+      14:       e9 3f 00 91     add     x9, sp, #15         ; addr to #132 -> x9 @@@?
+      18:       e0 03 09 aa     mov     x0, x9              ; @@@ unsure
+      1c:       e1 03 08 aa     mov     x1, x8              ; @@@ unsure
+      20:       e9 03 00 f9     str     x9, [sp]            ; store addr to #132 to top of stack
+      24:       00 00 00 94     bl      #0 <f0+0x24>        ; @@@unsure
+      28:       e8 03 40 f9     ldr     x8, [sp]            ; *sp -> x8
+      2c:       0a 01 40 39     ldrb    w10, [x8]           ; *x8 -> w10
+      30:       e9 03 0a 2a     mov     w9, w10             ; w10 -> w9
+      34:       20 1d 40 d3     ubfx    x0, x9, #0, #8      ; return value ("Unsigned Bit Field Extract", copy 8 bits from x9's LSBits starting at bit 0 to x0, and zero extend; = struct in reg)
+      38:       fe 0b 40 f9     ldr     x30, [sp, #16]      ; |
+      3c:       ff 83 00 91     add     sp, sp, #32         ; | epilog
+      40:       c0 03 5f d6     ret                         ; |
+
+0000000000000044 f1:
+      44:       ff 03 01 d1     sub     sp, sp, #64         ; |
+      48:       fe 1b 00 f9     str     x30, [sp, #48]      ; | prolog
+      4c:       09 00 80 52     mov     w9, #0              ; @@@ unsure
+      50:       02 05 80 d2     mov     x2, #40             ; @@@ unsure
+      54:       6a 80 83 d2     mov     x10, #7171          ; |                                i
+      58:       6b 0c 80 d2     mov     x11, #99            ; |                                j
+      5c:       4c 0c 80 92     mov     x12, #-99           ; | prep local struct Big's data   k
+      60:       ad 83 81 92     mov     x13, #-3102         ; |                                l
+      64:       ee 03 7b b2     orr     x14, xzr, #0x20     ; |                                m
+      68:       e0 03 08 aa     mov     x0, x8              ; retval: ptr to retval -> x0
+      6c:       e1 03 09 2a     mov     w1, w9              ; @@@ unsure
+      70:       e8 17 00 f9     str     x8, [sp, #40]       ; ptr to retval -> local area as temp copy
+      74:       ea 13 00 f9     str     x10, [sp, #32]      ; |                               i
+      78:       eb 0f 00 f9     str     x11, [sp, #24]      ; |                               j
+      7c:       ec 0b 00 f9     str     x12, [sp, #16]      ; | write struct A to local area  k   @@@ but in reverse?
+      80:       ed 07 00 f9     str     x13, [sp, #8]       ; |                               l
+      84:       ee 03 00 f9     str     x14, [sp]           ; |                               m
+      88:       00 00 00 94     bl      #0 <f1+0x44>        ; @@@ unsure
+      8c:       e8 13 40 f9     ldr     x8, [sp, #32]       ; |                       i -> x8
+      90:       ea 17 40 f9     ldr     x10, [sp, #40]      ; |                       ptr to retval -> x10, for below indirections
+      94:       48 01 00 f9     str     x8, [x10]           ; |                       i
+      98:       eb 0f 40 f9     ldr     x11, [sp, #24]      ; |                       | j
+      9c:       4b 05 00 f9     str     x11, [x10, #8]      ; |                       /
+      a0:       ec 0b 40 f9     ldr     x12, [sp, #16]      ; | write retval data     \ k
+      a4:       4c 09 00 f9     str     x12, [x10, #16]     ; |                       /
+      a8:       ed 07 40 f9     ldr     x13, [sp, #8]       ; |                       \ l
+      ac:       4d 0d 00 f9     str     x13, [x10, #24]     ; |                       /
+      b0:       ee 03 40 f9     ldr     x14, [sp]           ; |                       \ m
+      b4:       4e 11 00 f9     str     x14, [x10, #32]     ; /                       /
+      b8:       fe 1b 40 f9     ldr     x30, [sp, #48]      ; \
+      bc:       ff 03 01 91     add     sp, sp, #64         ; | epilog
+      c0:       c0 03 5f d6     ret                         ; |
+
+00000000000000c4 main:
+      c4:       ff 43 01 d1     sub     sp, sp, #80         ;
+      c8:       fd 7b 04 a9     stp     x29, x30, [sp, #64] ;
+      cc:       fd 03 01 91     add     x29, sp, #64        ;
+      d0:       a8 17 00 d1     sub     x8, x29, #5         ;
+      d4:       e9 43 00 91     add     x9, sp, #16         ;
+      d8:       bf c3 1f b8     stur    wzr, [x29, #-4]     ;
+      dc:       e8 07 00 f9     str     x8, [sp, #8]        ;
+      e0:       e9 03 00 f9     str     x9, [sp]            ;
+      e4:       c7 ff ff 97     bl      #-228 <f0>          ; return address -> r30/lr, and call
+      e8:       ea 03 00 2a     mov     w10, w0             ;
+      ec:       e8 07 40 f9     ldr     x8, [sp, #8]        ;
+      f0:       0a 01 00 39     strb    w10, [x8]           ;
+      f4:       e8 03 40 f9     ldr     x8, [sp]            ; ptr to retval struct, as sizeof(struct Big) > 16
+      f8:       d3 ff ff 97     bl      #-180 <f1>          ; return address -> r30/lr, and call
+      fc:       e8 0f 40 f9     ldr     x8, [sp, #24]       ;
+     100:       e9 13 40 f9     ldr     x9, [sp, #32]       ;
+     104:       08 01 09 8b     add     x8, x8, x9          ;
+     108:       e9 1b 40 f9     ldr     x9, [sp, #48]       ;
+     10c:       08 01 09 8b     add     x8, x8, x9          ;
+     110:       e9 07 40 f9     ldr     x9, [sp, #8]        ;
+     114:       2a 01 40 39     ldrb    w10, [x9]           ;
+     118:       e0 03 0a 2a     mov     w0, w10             ;
+     11c:       00 1c 40 d3     ubfx    x0, x0, #0, #8      ;
+     120:       08 01 00 8b     add     x8, x8, x0          ;
+     124:       ea 03 08 2a     mov     w10, w8             ;
+     128:       e0 03 0a 2a     mov     w0, w10             ;
+     12c:       fd 7b 44 a9     ldp     x29, x30, [sp, #64] ;
+     130:       ff 43 01 91     add     sp, sp, #80         ;
+     134:       c0 03 5f d6     ret                         ;
+
+
+
+; output from godbolt compiler explorer w/ msvc 19.14
+
+|f0|    PROC
+|$LN3|
+        sub         sp,sp,#0x10            ;
+        mov         w8,#-0x7C              ; (signed) #132
+        strb        w8,[sp]                ; stored in local area
+        ldrb        w0,[sp]                ; return value (entire struct in reg)
+        add         sp,sp,#0x10            ;
+        ret                                ;
+
+        ENDP  ; |f0|
+
+|f1|    PROC
+|$LN3|
+        stp         fp,lr,[sp,#-0x10]!     ;
+        mov         fp,sp                  ;
+        bl          __security_push_cookie ;
+        sub         sp,sp,#0x30            ;
+        str         x8,[sp]                ; ptr to retval -> local area as temp copy
+        mov         x9,#0x1C03             ;
+        str         x9,[sp,#8]             ;
+        mov         x8,#0x63               ;
+        str         x8,[sp,#0x10]          ;
+        mov         x8,#-0x63              ;
+        str         x8,[sp,#0x18]          ;
+        mov         x8,#-0xC1E             ;
+        str         x8,[sp,#0x20]          ;
+        mov         w8,#0x20               ;
+        str         w8,[sp,#0x28]          ;
+        ldr         x11,[sp]               ; ptr to retval -> x11, for below indirections
+        add         x10,sp,#8              ; read ptr
+        ldp         x9,x8,[x10]            ; |                           | i, j
+        stp         x9,x8,[x11]            ; |                           /
+        ldp         x9,x8,[x10,#0x10]      ; | cp struct data to retval  \ k, l
+        stp         x9,x8,[x11,#0x10]      ; |                           /
+        ldr         x8,[x10,#0x20]         ; |                           \
+        str         x8,[x11,#0x20]         ; |                           | m
+        add         sp,sp,#0x30            ;
+        bl          __security_pop_cookie  ;
+        ldp         fp,lr,[sp],#0x10       ;
+        ret                                ;
+
+        ENDP  ; |f1|
+
+|main|  PROC
+|$LN3|
+        stp         fp,lr,[sp,#-0x10]!     ;
+        mov         fp,sp                  ;
+        bl          __security_push_cookie ;
+        sub         sp,sp,#0x80            ;
+        bl          f0                     ; return address -> r30/lr, and call
+        strb        w0,[sp]                ;
+        add         x9,sp,#1               ;
+        mov         x8,sp                  ;
+        ldrsb       w8,[x8]                ;
+        strb        w8,[x9]                ;
+        add         x8,sp,#0x10            ; ptr to retval struct, as sizeof(struct Big) > 16
+        bl          f1                     ; return address -> r30/lr, and call
+        add         x8,sp,#0x10            ;
+        str         x8,[sp,#8]             ;
+        add         x11,sp,#0x38           ;
+        ldr         x10,[sp,#8]            ;
+        ldp         x9,x8,[x10]            ;
+        stp         x9,x8,[x11]            ;
+        ldp         x9,x8,[x10,#0x10]      ;
+        stp         x9,x8,[x11,#0x10]      ;
+        ldr         x8,[x10,#0x20]         ;
+        str         x8,[x11,#0x20]         ;
+        add         x11,sp,#0x60           ;
+        add         x10,sp,#0x38           ;
+        ldp         x9,x8,[x10]            ;
+        stp         x9,x8,[x11]            ;
+        ldp         x9,x8,[x10,#0x10]      ;
+        stp         x9,x8,[x11,#0x10]      ;
+        ldr         x8,[x10,#0x20]         ;
+        str         x8,[x11,#0x20]         ;
+        ldr         x9,[sp,#0x68]          ;
+        ldr         x8,[sp,#0x70]          ;
+        add         x9,x9,x8               ;
+        ldr         w8,[sp,#0x80]          ;
+        sxtw        x8,w8                  ;
+        add         x9,x9,x8               ;
+        ldrsb       w8,[sp,#1]             ;
+        mov         w8,w8                  ;
+        sxtw        x8,w8                  ;
+        add         x8,x9,x8               ;
+        mov         w0,w8                  ;
+        mov         w0,w0                  ;
+        add         sp,sp,#0x80            ;
+        bl          __security_pop_cookie  ;
+        ldp         fp,lr,[sp],#0x10       ;
+        ret                                ;
+
+        ENDP  ; |main|
+
+
+; ---------- single-field structs by values (and small array fields) ---------->
+;
+; struct C { char c; };
+; struct S { short s; };
+; struct I { int i; };
+; struct F { float f; };
+; struct D { double d; };
+;
+; struct C2 { char c[2]; };
+; struct C3 { char c[3]; };
+;
+; void leaf_call(struct C2 a, struct C b, struct S c, struct I d, struct F e, struct D f, struct C3 g)
+; {
+; }
+;
+; int main()
+; {
+;     leaf_call((struct C2){{0,1}}, (struct C){2}, (struct S){3}, (struct I){4}, (struct F){5.f}, (struct D){6.}, (struct C3){{7,8,9}});
+;     return 0;
+; }
+
+
+
+; output from freebsd-13.0_r348764-arm64 w/ clang 8.0.0
+
+0000000000000000 leaf_call:
+       0:       ff 03 03 d1     sub     sp, sp, #192
+       4:       fe 5b 00 f9     str     x30, [sp, #176]
+       8:       e8 03 7f b2     orr     x8, xzr, #0x2
+       c:       e9 07 40 b2     orr     x9, xzr, #0x3
+      10:       ea bb 02 91     add     x10, sp, #174
+      14:       eb 83 02 91     add     x11, sp, #160
+      18:       ec 7f 02 91     add     x12, sp, #159
+      1c:       ed 73 02 91     add     x13, sp, #156
+      20:       ee 63 02 91     add     x14, sp, #152
+      24:       ef 53 02 91     add     x15, sp, #148
+      28:       f0 23 02 91     add     x16, sp, #136
+      2c:       f1 17 02 91     add     x17, sp, #133
+      30:       f2 e3 01 91     add     x18, sp, #120
+      34:       e0 53 00 f9     str     x0, [sp, #160]
+      38:       e0 03 0a aa     mov     x0, x10
+      3c:       e1 3b 00 f9     str     x1, [sp, #112]
+      40:       e1 03 0b aa     mov     x1, x11
+      44:       e2 37 00 f9     str     x2, [sp, #104]
+      48:       e2 03 08 aa     mov     x2, x8
+      4c:       e3 33 00 f9     str     x3, [sp, #96]
+      50:       e0 5f 00 bd     str     s0, [sp, #92]
+      54:       e1 2b 00 fd     str     d1, [sp, #80]
+      58:       e4 27 00 f9     str     x4, [sp, #72]
+      5c:       e9 23 00 f9     str     x9, [sp, #64]
+      60:       ec 1f 00 f9     str     x12, [sp, #56]
+      64:       ed 1b 00 f9     str     x13, [sp, #48]
+      68:       ee 17 00 f9     str     x14, [sp, #40]
+      6c:       ef 13 00 f9     str     x15, [sp, #32]
+      70:       f0 0f 00 f9     str     x16, [sp, #24]
+      74:       f1 0b 00 f9     str     x17, [sp, #16]
+      78:       f2 07 00 f9     str     x18, [sp, #8]
+      7c:       00 00 00 94     bl      #0 <leaf_call+0x7c>
+      80:       e8 3b 40 f9     ldr     x8, [sp, #112]
+      84:       e5 03 08 2a     mov     w5, w8
+      88:       e9 1f 40 f9     ldr     x9, [sp, #56]
+      8c:       25 01 00 39     strb    w5, [x9]
+      90:       ea 37 40 f9     ldr     x10, [sp, #104]
+      94:       e5 03 0a 2a     mov     w5, w10
+      98:       eb 1b 40 f9     ldr     x11, [sp, #48]
+      9c:       65 01 00 79     strh    w5, [x11]
+      a0:       ec 33 40 f9     ldr     x12, [sp, #96]
+      a4:       e5 03 0c 2a     mov     w5, w12
+      a8:       ed 17 40 f9     ldr     x13, [sp, #40]
+      ac:       a5 01 00 b9     str     w5, [x13]
+      b0:       e0 5f 40 bd     ldr     s0, [sp, #92]
+      b4:       ee 13 40 f9     ldr     x14, [sp, #32]
+      b8:       c0 01 00 bd     str     s0, [x14]
+      bc:       e1 2b 40 fd     ldr     d1, [sp, #80]
+      c0:       ef 0f 40 f9     ldr     x15, [sp, #24]
+      c4:       e1 01 00 fd     str     d1, [x15]
+      c8:       f0 0b 40 f9     ldr     x16, [sp, #16]
+      cc:       f1 27 40 f9     ldr     x17, [sp, #72]
+      d0:       f1 3f 00 f9     str     x17, [sp, #120]
+      d4:       e0 03 10 aa     mov     x0, x16
+      d8:       e1 07 40 f9     ldr     x1, [sp, #8]
+      dc:       e2 23 40 f9     ldr     x2, [sp, #64]
+      e0:       00 00 00 94     bl      #0 <leaf_call+0xe0>
+      e4:       fe 5b 40 f9     ldr     x30, [sp, #176]
+      e8:       ff 03 03 91     add     sp, sp, #192
+      ec:       c0 03 5f d6     ret
+
+00000000000000f0 main:
+      f0:       ff 83 03 d1     sub     sp, sp, #224
+      f4:       f5 5b 00 f9     str     x21, [sp, #176]
+      f8:       f4 4f 0c a9     stp     x20, x19, [sp, #192]
+      fc:       fd 7b 0d a9     stp     x29, x30, [sp, #208]
+     100:       fd 43 03 91     add     x29, sp, #208
+     104:       00 00 80 52     mov     w0, #0
+     108:       08 00 80 52     mov     w8, #0
+     10c:       e9 03 40 b2     orr     x9, xzr, #0x1
+     110:       ea 03 00 32     orr     w10, wzr, #0x1
+     114:       eb 03 1f 32     orr     w11, wzr, #0x2
+     118:       ec 07 00 32     orr     w12, wzr, #0x3
+     11c:       ed 03 1e 32     orr     w13, wzr, #0x4
+     120:       0e 14 a8 52     mov     w14, #1084227584
+     124:       c0 01 27 1e     fmov    s0, w14
+     128:       0f 03 e8 d2     mov     x15, #4618441417868443648
+     12c:       e1 01 67 9e     fmov    d1, x15
+     130:       ee 0b 00 32     orr     w14, wzr, #0x7
+     134:       f0 03 1d 32     orr     w16, wzr, #0x8
+     138:       31 01 80 52     mov     w17, #9
+     13c:       e2 03 7f b2     orr     x2, xzr, #0x2
+     140:       ef 07 40 b2     orr     x15, xzr, #0x3
+     144:       b2 9b 00 d1     sub     x18, x29, #38
+     148:       a1 9f 00 d1     sub     x1, x29, #39
+     14c:       a3 ab 00 d1     sub     x3, x29, #42
+     150:       a4 c3 00 d1     sub     x4, x29, #48
+     154:       a5 d3 00 d1     sub     x5, x29, #52
+     158:       a6 03 01 d1     sub     x6, x29, #64
+     15c:       a7 0f 01 d1     sub     x7, x29, #67
+     160:       b3 43 01 d1     sub     x19, x29, #80
+     164:       b4 63 01 d1     sub     x20, x29, #88
+     168:       bf c3 1d b8     stur    wzr, [x29, #-36]
+     16c:       f5 03 12 aa     mov     x21, x18
+     170:       a8 02 00 39     strb    w8, [x21]
+     174:       aa 06 00 39     strb    w10, [x21, #1]
+     178:       2b 00 00 39     strb    w11, [x1]
+     17c:       6c 00 00 79     strh    w12, [x3]
+     180:       8d 00 00 b9     str     w13, [x4]
+     184:       a0 00 00 bd     str     s0, [x5]
+     188:       c1 00 00 fd     str     d1, [x6]
+     18c:       f5 03 07 aa     mov     x21, x7
+     190:       ae 02 00 39     strb    w14, [x21]
+     194:       a9 02 09 8b     add     x9, x21, x9
+     198:       b0 06 00 39     strb    w16, [x21, #1]
+     19c:       31 05 00 39     strb    w17, [x9, #1]
+     1a0:       a0 43 1a b8     stur    w0, [x29, #-92]
+     1a4:       e0 03 13 aa     mov     x0, x19
+     1a8:       e1 37 00 f9     str     x1, [sp, #104]
+     1ac:       e1 03 12 aa     mov     x1, x18
+     1b0:       ef 33 00 f9     str     x15, [sp, #96]
+     1b4:       e3 2f 00 f9     str     x3, [sp, #88]
+     1b8:       e4 2b 00 f9     str     x4, [sp, #80]
+     1bc:       e5 27 00 f9     str     x5, [sp, #72]
+     1c0:       e6 23 00 f9     str     x6, [sp, #64]
+     1c4:       e7 1f 00 f9     str     x7, [sp, #56]
+     1c8:       f4 1b 00 f9     str     x20, [sp, #48]
+     1cc:       00 00 00 94     bl      #0 <main+0xdc>
+     1d0:       a0 03 5b f8     ldur    x0, [x29, #-80]
+     1d4:       e9 37 40 f9     ldr     x9, [sp, #104]
+     1d8:       28 01 40 39     ldrb    w8, [x9]
+     1dc:       ef 03 08 2a     mov     w15, w8
+     1e0:       e1 1d 40 d3     ubfx    x1, x15, #0, #8
+     1e4:       ef 2f 40 f9     ldr     x15, [sp, #88]
+     1e8:       e8 01 40 79     ldrh    w8, [x15]
+     1ec:       f2 03 08 2a     mov     w18, w8
+     1f0:       42 3e 40 d3     ubfx    x2, x18, #0, #16
+     1f4:       f2 2b 40 f9     ldr     x18, [sp, #80]
+     1f8:       48 02 40 b9     ldr     w8, [x18]
+     1fc:       e3 03 08 2a     mov     w3, w8
+     200:       63 7c 40 d3     ubfx    x3, x3, #0, #32
+     204:       e4 27 40 f9     ldr     x4, [sp, #72]
+     208:       80 00 40 bd     ldr     s0, [x4]
+     20c:       e5 23 40 f9     ldr     x5, [sp, #64]
+     210:       a1 00 40 fd     ldr     d1, [x5]
+     214:       e6 1f 40 f9     ldr     x6, [sp, #56]
+     218:       e7 1b 40 f9     ldr     x7, [sp, #48]
+     21c:       e0 17 00 f9     str     x0, [sp, #40]
+     220:       e0 03 07 aa     mov     x0, x7
+     224:       e1 13 00 f9     str     x1, [sp, #32]
+     228:       e1 03 06 aa     mov     x1, x6
+     22c:       e6 33 40 f9     ldr     x6, [sp, #96]
+     230:       e2 0f 00 f9     str     x2, [sp, #24]
+     234:       e2 03 06 aa     mov     x2, x6
+     238:       e3 0b 00 f9     str     x3, [sp, #16]
+     23c:       e0 0f 00 bd     str     s0, [sp, #12]
+     240:       e1 03 00 fd     str     d1, [sp]
+     244:       00 00 00 94     bl      #0 <main+0x154>
+     248:       a4 83 5a f8     ldur    x4, [x29, #-88]
+     24c:       e0 17 40 f9     ldr     x0, [sp, #40]
+     250:       e1 13 40 f9     ldr     x1, [sp, #32]
+     254:       e2 0f 40 f9     ldr     x2, [sp, #24]
+     258:       e3 0b 40 f9     ldr     x3, [sp, #16]
+     25c:       e0 0f 40 bd     ldr     s0, [sp, #12]
+     260:       e1 03 40 fd     ldr     d1, [sp]
+     264:       67 ff ff 97     bl      #-612 <leaf_call>
+     268:       a0 43 5a b8     ldur    w0, [x29, #-92]
+     26c:       fd 7b 4d a9     ldp     x29, x30, [sp, #208]
+     270:       f4 4f 4c a9     ldp     x20, x19, [sp, #192]
+     274:       f5 5b 40 f9     ldr     x21, [sp, #176]
+     278:       ff 83 03 91     add     sp, sp, #224
+     27c:       c0 03 5f d6     ret
+
+
+
 ; vim: ft=asm68k
 
--- a/doc/manual/callconvs/callconv_arm64.tex	Tue Mar 01 09:29:21 2022 +0100
+++ b/doc/manual/callconvs/callconv_arm64.tex	Tue Mar 01 21:02:10 2022 +0100
@@ -1,6 +1,6 @@
 %//////////////////////////////////////////////////////////////////////////////
 %
-% Copyright (c) 2014-2020 Daniel Adler <dadler@uni-goettingen.de>, 
+% Copyright (c) 2014-2022 Daniel Adler <dadler@uni-goettingen.de>, 
 %                         Tassilo Philipp <tphilipp@potion-studios.com>
 %
 % Permission to use, copy, modify, and distribute this software for any
@@ -17,9 +17,6 @@
 %
 %//////////////////////////////////////////////////////////////////////////////
 
-% ==================================================
-% ARM64
-% ==================================================
 \subsection{ARM64 Calling Conventions}
 
 \paragraph{Overview}
@@ -62,10 +59,10 @@
 {\bf x30}     & permanent, link register\\
 {\bf sp}      & permanent, stack pointer\\
 {\bf pc}      & program counter\\
-{\bf v0}      & scratch, first float parameter, float return value\\
-{\bf v1-v7}   & scratch, float parameters\\
+{\bf v0-v7}   & scratch, float parameters, return value\\
 {\bf v8-v15}  & lower 64 bits are permanent, scratch\\
 {\bf v16-v31} & scratch\\
+{\bf xzr}     & zero register, always zero\\
 \end{tabular*}
 \caption{Register usage on arm64}
 \end{table}
@@ -78,18 +75,24 @@
 \item first 8 integer arguments are passed using x0-x7
 \item first 8 floating point arguments are passed using d0-d7
 \item subsequent parameters are pushed onto the stack
-\item if the callee takes the address of one of the parameters and uses it to address other parameters (e.g. varargs) it has to copy - in its prolog - the first 8 integer and 8 floating-point registers to a reserved stack area adjacent to the other parameters on the stack (only the unnamed integer parameters require saving, though)
-\item structures and unions up to 16 bytes in size are passed by value (after rounding up the size to the nearest multiple of 8), as a sequence of dwords
-\item for a structure or union larger than 16 bytes in size, a pointer to it is passed
-\item if return value is a structure, a pointer pointing to the return value's space is passed in r0, the first parameter in r1, etc... (see {\bf return values})
-\item stack is required to be throughout eight-byte aligned
+\item if the callee takes the address of one of the parameters and uses it to address other parameters (e.g. varargs) it has to copy - in its prolog - the first 8 integer
+and 8 floating-point registers to a reserved stack area adjacent to the other parameters on the stack (only the unnamed integer parameters require saving, though)
+\item aggregates (struct, union) with 1 to 4 identical floating-point members (either float or double) are passed field-by-field (8-byte aligned if passed via stack), except if passed as a vararg
+\item other aggregates (struct, union) \textgreater\ 16 bytes in size are passed indirectly, as a pointer to a copy (if needed)
+\item all other aggregates (struct, union), after rounding up the size to the nearest multiple of 8, are passed as a sequence of dwords, like integers
+\item aggregates are never split across registers and stack, so if not enough registers are available an aggregated is passed via the stack (for aggregates that
+would've been passed as floating point values, and any still unused float registers will be skipped for any subsequent arg)
+\item stack is required throughout to be eight-byte aligned
 \end{itemize}
 
 \paragraph{Return values}
+
 \begin{itemize}
 \item integer return values use x0
 \item floating-point return values use d0
-\item otherwise, the caller allocates space, passes pointer to it to the callee through x8, and callee writes return value to this space
+\item aggregates (struct, union) that would be passed via registers if passed as a first param, are returned via those registers
+\item otherwise (e.g. if regs exhausted, or \textgreater\ 16b, ...), the caller allocates space, passes pointer to it to the callee through
+x8, and callee writes return value to this space (note that this is not a hidden first param, as x8 is not used for passing params); the ptr to the aggregate is returned in x0
 \end{itemize}
 
 \paragraph{Stack layout}
@@ -116,7 +119,7 @@
                                          & \ldots                 &                                      &                              \\
                                          & q0                     &                                      &                              \\
 \hhline{~-~~}                                                                             
-register save area (with return address) &                        &                                      &                              \\ % fp will point here (to 1st arg) @@@ verify
+register save area (with return address) &                        &                                      &                              \\ % fp will point here (to 1st arg)
 \hhline{~-~~}                                                                             
 local data                               &                        &                                      &                              \\
 \hhline{~-~~}                                                                             
@@ -138,6 +141,11 @@
 \begin{itemize}
 \item arguments passed via stack use only the space they need, but are subject to type alignment requirements (which is 1 byte for char and bool, 2 for short, 4 for int and 8 for every other type)
 \item caller is required to sign and zero-extend arguments smaller than 32bits
+\item empty aggregates (allowed in C++, but non-standard in C, however compiler extensions exist) as parameters:
+\begin{itemize}
+\item allowed to be ignored in C
+\item allowed to be ignored in C++, if aggregate is trivial, otherwise it's treated as an aggregate with one byte field
+\end{itemize}
 \end{itemize}
 
 
@@ -150,5 +158,6 @@
 
 \begin{itemize}
 \item variadic function calls do not use any SIMD or floating point registers (for fixed and variable args), meaning first 8 params are passed via x0-x7, the rest via the stack
+\item a function that returns an aggregate indirectly via a pointer passed to via x8 does not seem to be required to put that address in x0 on return (but should be safe to do so)
 \end{itemize}