changeset 327:c0390dc85a07

- doc: added disassembly examples for many platforms and calling conventions, for reference
author Tassilo Philipp
date Fri, 22 Nov 2019 23:08:59 +0100
parents 09aaa2e774cd
children 276eb8c87aa0
files doc/disas_examples/arm.armhf.disas doc/disas_examples/arm.atpcs_arm.disas doc/disas_examples/arm.darwin_arm.disas doc/disas_examples/arm.darwin_thumb.disas doc/disas_examples/arm64.aapcs.disas doc/disas_examples/mips.eabi.disas doc/disas_examples/mips.o32.disas doc/disas_examples/mips64.n64.disas doc/disas_examples/ppc.darwin.disas doc/disas_examples/ppc.sysv.disas doc/disas_examples/ppc64.elfabi.disas doc/disas_examples/sparc.sparc.disas doc/disas_examples/sparc64.sparc64.disas doc/disas_examples/x64.sysv.disas doc/disas_examples/x64.win.disas doc/disas_examples/x86.cdecl.disas doc/disas_examples/x86.fastcall_borland.disas doc/disas_examples/x86.plan9call.disas
diffstat 18 files changed, 6108 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/doc/disas_examples/arm.armhf.disas	Fri Nov 22 23:08:59 2019 +0100
@@ -0,0 +1,91 @@
+; #include <stdlib.h>
+; 
+; void leaf_call(int b, int c, int d, int e, int f, int g, int h)
+; {
+; }
+; 
+; void nonleaf_call(int a, int b, int c, int d, int e, int f, int g, int h)
+; {
+; 	/* use some local data */
+; 	*(char*)alloca(220) = 'L';
+; 	leaf_call(b, c, d, e, f, g, h);
+; }
+; 
+; int main()
+; {
+; 	nonleaf_call(0, 1, 2, 3, 4, 5, 6, 7);
+; 	return 0;
+; }
+
+
+
+; output from raspbian-wheezy_20120715-raspberrypi w/ gcc 4.6.3
+; note: this is arm mode, -mthumb says "sorry, unimplemented: Thumb-1 hard-float VFP ABI" (however, -msoft-float -mthumb is supported)
+;       not sure about thumb-2 as there is no flag for it, but I don't think this exists if using the VFP
+
+00000000 <leaf_call>:
+   0:   e52db004        push    {fp} ; (str fp, [sp, #-4]!)
+   4:   e28db000        add     fp, sp, #0
+   8:   e24dd014        sub     sp, sp, #20
+   c:   e50b0008        str     r0, [fp, #-8]
+  10:   e50b100c        str     r1, [fp, #-12]
+  14:   e50b2010        str     r2, [fp, #-16]
+  18:   e50b3014        str     r3, [fp, #-20]
+  1c:   e28bd000        add     sp, fp, #0
+  20:   e8bd0800        pop     {fp}
+  24:   e12fff1e        bx      lr
+
+00000028 <nonleaf_call>:
+;spill here, if needed: push    {r0, r1, r2, r3}  ; |         just for ref, if present this would change below offsets
+  28:   e92d4800        push    {fp, lr}          ; |
+  2c:   e28db004        add     fp, sp, #4        ; | prolog
+  30:   e24dd020        sub     sp, sp, #32       ; |
+  34:   e50b0008        str     r0, [fp, #-8]     ; in arg 0 -> temp space in local area
+  38:   e50b100c        str     r1, [fp, #-12]    ; in arg 1 -> temp space in local area
+  3c:   e50b2010        str     r2, [fp, #-16]    ; in arg 2 -> temp space in local area
+  40:   e50b3014        str     r3, [fp, #-20]    ; in arg 3 -> temp space in local area
+  44:   e24dd0e8        sub     sp, sp, #232      ; alloca(220) - with padding to guarantee alignment
+  48:   e28d3010        add     r3, sp, #16       ; |
+  4c:   e2833007        add     r3, r3, #7        ; |
+  50:   e1a031a3        lsr     r3, r3, #3        ; | start of (aligned) alloca()'d memory -> r3, leaving room at top of stack for param area
+  54:   e1a03183        lsl     r3, r3, #3        ; |
+  58:   e3a0204c        mov     r2, #76           ; 'L' -> r2, and ...
+  5c:   e5c32000        strb    r2, [r3]          ; ... store in local area (of alloca()'d space) 
+  60:   e59b3008        ldr     r3, [fp, #8]      ; arg 4 (fetched from prev frame's param area), and ...
+  64:   e58d3000        str     r3, [sp]          ; ... "pushed" onto stack
+  68:   e59b300c        ldr     r3, [fp, #12]     ; arg 5 (fetched from prev frame's param area), and ...
+  6c:   e58d3004        str     r3, [sp, #4]      ; ... "pushed" onto stack
+  70:   e59b3010        ldr     r3, [fp, #16]     ; arg 6 (fetched from prev frame's param area), and ...
+  74:   e58d3008        str     r3, [sp, #8]      ; ... "pushed" onto stack
+  78:   e51b000c        ldr     r0, [fp, #-12]    ; arg 0
+  7c:   e51b1010        ldr     r1, [fp, #-16]    ; arg 1
+  80:   e51b2014        ldr     r2, [fp, #-20]    ; arg 2
+  84:   e59b3004        ldr     r3, [fp, #4]      ; arg 3 (fetched from prev frame's param area)
+  88:   ebfffffe        bl      0 <leaf_call>     ; return address -> r14/lr, and call
+  8c:   e24bd004        sub     sp, fp, #4        ; |
+  90:   e8bd8800        pop     {fp, pc}          ; | epilog
+
+00000094 <main>:
+  94:   e92d4800        push    {fp, lr}          ; |
+  98:   e28db004        add     fp, sp, #4        ; | prolog
+  9c:   e24dd010        sub     sp, sp, #16       ; |
+  a0:   e3a03004        mov     r3, #4            ; arg 4, and ...
+  a4:   e58d3000        str     r3, [sp]          ; ... "pushed" onto stack
+  a8:   e3a03005        mov     r3, #5            ; arg 5, and ...
+  ac:   e58d3004        str     r3, [sp, #4]      ; ... "pushed" onto stack
+  b0:   e3a03006        mov     r3, #6            ; arg 6, and ...
+  b4:   e58d3008        str     r3, [sp, #8]      ; ... "pushed" onto stack
+  b8:   e3a03007        mov     r3, #7            ; arg 7, and ...
+  bc:   e58d300c        str     r3, [sp, #12]     ; ... "pushed" onto stack
+  c0:   e3a00000        mov     r0, #0            ; arg 0
+  c4:   e3a01001        mov     r1, #1            ; arg 1
+  c8:   e3a02002        mov     r2, #2            ; arg 2
+  cc:   e3a03003        mov     r3, #3            ; arg 3
+  d0:   ebfffffe        bl      28 <nonleaf_call> ; return address -> r14/lr, and call
+  d4:   e3a03000        mov     r3, #0            ; return value (0) via r3 ... (a bit unoptimal)
+  d8:   e1a00003        mov     r0, r3            ; ... to r0
+  dc:   e24bd004        sub     sp, fp, #4        ; |
+  e0:   e8bd8800        pop     {fp, pc}          ; | epilog
+
+; vim: ft=asm68k
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/doc/disas_examples/arm.atpcs_arm.disas	Fri Nov 22 23:08:59 2019 +0100
@@ -0,0 +1,191 @@
+; #include <stdlib.h>
+; 
+; void leaf_call(int b, int c, int d, int e, int f, int g, int h)
+; {
+; }
+; 
+; void nonleaf_call(int a, int b, int c, int d, int e, int f, int g, int h)
+; {
+; 	/* use some local data */
+; 	*(char*)alloca(220) = 'L';
+; 	leaf_call(b, c, d, e, f, g, h);
+; }
+; 
+; int main()
+; {
+; 	nonleaf_call(0, 1, 2, 3, 4, 5, 6, 7);
+; 	return 0;
+; }
+
+
+
+; output from netbsd-4.0.1-cats w/ gcc 4.1.2
+
+00000000 <leaf_call>:
+   0:   e1a0c00d        mov     ip, sp
+   4:   e92dd800        stmdb   sp!, {fp, ip, lr, pc}
+   8:   e24cb004        sub     fp, ip, #4      ; 0x4
+   c:   e24dd010        sub     sp, sp, #16     ; 0x10
+  10:   e50b0010        str     r0, [fp, #-16]
+  14:   e50b1014        str     r1, [fp, #-20]
+  18:   e50b2018        str     r2, [fp, #-24]
+  1c:   e50b301c        str     r3, [fp, #-28]
+  20:   e24bd00c        sub     sp, fp, #12     ; 0xc
+  24:   e89da800        ldmia   sp, {fp, sp, pc}
+
+00000028 <nonleaf_call>:
+  28:   e1a0c00d        mov     ip, sp                 ; |
+;spill here, if needed: stmdb   sp!, {r0, r1, r2, r3}  ; |         just for ref, if present this would change below offsets
+  2c:   e92dd800        stmdb   sp!, {fp, ip, lr, pc}  ; |
+  30:   e24cb004        sub     fp, ip, #4             ; | prolog
+  34:   e24dd020        sub     sp, sp, #32            ; |
+  38:   e50b0010        str     r0, [fp, #-16]         ; in arg 0 -> temp space in local area
+  3c:   e50b1014        str     r1, [fp, #-20]         ; in arg 1 -> temp space in local area
+  40:   e50b2018        str     r2, [fp, #-24]         ; in arg 2 -> temp space in local area
+  44:   e50b301c        str     r3, [fp, #-28]         ; in arg 3 -> temp space in local area
+  48:   e24dd0e0        sub     sp, sp, #224           ; alloca(220) - with padding to guarantee alignment
+  4c:   e28d200c        add     r2, sp, #12            ; |
+  50:   e50b2020        str     r2, [fp, #-32]         ; |        @@@ pointless push of r2 to local area to put it back ...
+  54:   e51b2020        ldr     r2, [fp, #-32]         ; |        @@@ ... into r2
+  58:   e2823003        add     r3, r2, #3             ; | start of (aligned) alloca()'d memory -> r3, leaving room at top of stack for param area
+  5c:   e1a03123        mov     r3, r3, lsr #2         ; |
+  60:   e1a03103        mov     r3, r3, lsl #2         ; /
+  64:   e50b3020        str     r3, [fp, #-32]         ; \
+  68:   e51b2020        ldr     r2, [fp, #-32]         ; | r2 -> r3, to free r2 (kinda pointless as followup code could use r2 and r3 the other way round)
+  6c:   e3a0304c        mov     r3, #76                ; 'L' -> r3, and ...
+  70:   e5c23000        strb    r3, [r2]               ; ... store in local area (of alloca()'d space)
+  74:   e59b3008        ldr     r3, [fp, #8]           ; arg 4 (fetched from prev frame's param area), and ...
+  78:   e58d3000        str     r3, [sp]               ; ... "pushed" onto stack
+  7c:   e59b300c        ldr     r3, [fp, #12]          ; arg 5 (fetched from prev frame's param area), and ...
+  80:   e58d3004        str     r3, [sp, #4]           ; ... "pushed" onto stack
+  84:   e59b3010        ldr     r3, [fp, #16]          ; arg 6 (fetched from prev frame's param area), and ...
+  88:   e58d3008        str     r3, [sp, #8]           ; ... "pushed" onto stack
+  8c:   e51b0014        ldr     r0, [fp, #-20]         ; arg 0
+  90:   e51b1018        ldr     r1, [fp, #-24]         ; arg 1
+  94:   e51b201c        ldr     r2, [fp, #-28]         ; arg 2
+  98:   e59b3004        ldr     r3, [fp, #4]           ; arg 3 (fetched from prev frame's param area)
+  9c:   ebfffffe        bl      9c <nonleaf_call+0x74> ; return address -> r14/lr, and call
+  a0:   e24bd00c        sub     sp, fp, #12            ; |
+  a4:   e89da800        ldmia   sp, {fp, sp, pc}       ; | epilog
+
+000000a8 <main>:
+  a8:   e1a0c00d        mov     ip, sp                 ; |
+  ac:   e92dd800        stmdb   sp!, {fp, ip, lr, pc}  ; |
+  b0:   e24cb004        sub     fp, ip, #4             ; | prolog
+  b4:   e24dd010        sub     sp, sp, #16            ; |
+  b8:   e3a03004        mov     r3, #4                 ; arg 4, and ...
+  bc:   e58d3000        str     r3, [sp]               ; ... "pushed" onto stack
+  c0:   e3a03005        mov     r3, #5                 ; arg 5, and ...
+  c4:   e58d3004        str     r3, [sp, #4]           ; ... "pushed" onto stack
+  c8:   e3a03006        mov     r3, #6                 ; arg 6, and ...
+  cc:   e58d3008        str     r3, [sp, #8]           ; ... "pushed" onto stack
+  d0:   e3a03007        mov     r3, #7                 ; arg 7, and ...
+  d4:   e58d300c        str     r3, [sp, #12]          ; ... "pushed" onto stack
+  d8:   e3a00000        mov     r0, #0                 ; arg 0
+  dc:   e3a01001        mov     r1, #1                 ; arg 1
+  e0:   e3a02002        mov     r2, #2                 ; arg 2
+  e4:   e3a03003        mov     r3, #3                 ; arg 3
+  e8:   ebfffffe        bl      e8 <main+0x40>         ; return address -> r14/lr, and call
+  ec:   e3a03000        mov     r3, #0                 ; return value via r3, ... (a bit unoptimal)
+  f0:   e1a00003        mov     r0, r3                 ; ... to r0
+  f4:   e24bd00c        sub     sp, fp, #12            ; |
+  f8:   e89da800        ldmia   sp, {fp, sp, pc}       ; | epilog
+
+
+
+; output from freebsd-11.0_r260099-raspberrypi w/ clang 3.3
+
+00000000 <leaf_call>:
+   0:   e24dd030        sub     sp, sp, #48     ; 0x30
+   4:   e58d002c        str     r0, [sp, #44]
+   8:   e58d1028        str     r1, [sp, #40]
+   c:   e58d2024        str     r2, [sp, #36]
+  10:   e58d3020        str     r3, [sp, #32]
+  14:   e59dc030        ldr     ip, [sp, #48]
+  18:   e58dc01c        str     ip, [sp, #28]
+  1c:   e59dc034        ldr     ip, [sp, #52]
+  20:   e58dc018        str     ip, [sp, #24]
+  24:   e59dc038        ldr     ip, [sp, #56]
+  28:   e58dc014        str     ip, [sp, #20]
+  2c:   e58d1010        str     r1, [sp, #16]
+  30:   e58d300c        str     r3, [sp, #12]
+  34:   e58d2008        str     r2, [sp, #8]
+  38:   e58d0004        str     r0, [sp, #4]
+  3c:   e28dd030        add     sp, sp, #48     ; 0x30
+  40:   e12fff1e        bx      lr
+
+00000044 <nonleaf_call>:
+  44:   e92d4bf0        push    {r4, r5, r6, r7, r8, r9, fp, lr}
+  48:   e28db018        add     fp, sp, #24     ; 0x18
+  4c:   e24ddf4a        sub     sp, sp, #296    ; 0x128
+  50:   e50b001c        str     r0, [fp, #-28]
+  54:   e50b1020        str     r1, [fp, #-32]
+  58:   e50b2024        str     r2, [fp, #-36]
+  5c:   e50b3028        str     r3, [fp, #-40]
+  60:   e59bc008        ldr     ip, [fp, #8]
+  64:   e50bc02c        str     ip, [fp, #-44]
+  68:   e59bc00c        ldr     ip, [fp, #12]
+  6c:   e50bc030        str     ip, [fp, #-48]
+  70:   e59bc010        ldr     ip, [fp, #16]
+  74:   e50bc034        str     ip, [fp, #-52]
+  78:   e59bc014        ldr     ip, [fp, #20]
+  7c:   e50bc038        str     ip, [fp, #-56]
+  80:   e3a0c04c        mov     ip, #76 ; 0x4c
+  84:   e5cdc02c        strb    ip, [sp, #44]
+  88:   e51bc020        ldr     ip, [fp, #-32]
+  8c:   e51be024        ldr     lr, [fp, #-36]
+  90:   e51b4028        ldr     r4, [fp, #-40]
+  94:   e51b502c        ldr     r5, [fp, #-44]
+  98:   e51b6030        ldr     r6, [fp, #-48]
+  9c:   e51b7034        ldr     r7, [fp, #-52]
+  a0:   e51b8038        ldr     r8, [fp, #-56]
+  a4:   e1a0900d        mov     r9, sp
+  a8:   e5898008        str     r8, [r9, #8]
+  ac:   e5897004        str     r7, [r9, #4]
+  b0:   e5896000        str     r6, [r9]
+  b4:   e58d0028        str     r0, [sp, #40]
+  b8:   e1a0000c        mov     r0, ip
+  bc:   e58d1024        str     r1, [sp, #36]
+  c0:   e1a0100e        mov     r1, lr
+  c4:   e58d2020        str     r2, [sp, #32]
+  c8:   e1a02004        mov     r2, r4
+  cc:   e58d301c        str     r3, [sp, #28]
+  d0:   e1a03005        mov     r3, r5
+  d4:   ebfffffe        bl      0 <leaf_call>
+  d8:   e59d001c        ldr     r0, [sp, #28]
+  dc:   e59d1020        ldr     r1, [sp, #32]
+  e0:   e59d2024        ldr     r2, [sp, #36]
+  e4:   e59d3028        ldr     r3, [sp, #40]
+  e8:   e58d0018        str     r0, [sp, #24]
+  ec:   e58d1014        str     r1, [sp, #20]
+  f0:   e58d2010        str     r2, [sp, #16]
+  f4:   e58d300c        str     r3, [sp, #12]
+  f8:   e24bd018        sub     sp, fp, #24     ; 0x18
+  fc:   e8bd8bf0        pop     {r4, r5, r6, r7, r8, r9, fp, pc}
+
+00000100 <main>:
+ 100:   e92d4800        push    {fp, lr}
+ 104:   e1a0b00d        mov     fp, sp
+ 108:   e24dd018        sub     sp, sp, #24     ; 0x18
+ 10c:   e3a00000        mov     r0, #0  ; 0x0
+ 110:   e50b0004        str     r0, [fp, #-4]
+ 114:   e3a01007        mov     r1, #7  ; 0x7
+ 118:   e1a0200d        mov     r2, sp
+ 11c:   e582100c        str     r1, [r2, #12]
+ 120:   e3a01006        mov     r1, #6  ; 0x6
+ 124:   e5821008        str     r1, [r2, #8]
+ 128:   e3a01005        mov     r1, #5  ; 0x5
+ 12c:   e5821004        str     r1, [r2, #4]
+ 130:   e3a01004        mov     r1, #4  ; 0x4
+ 134:   e5821000        str     r1, [r2]
+ 138:   e3a01001        mov     r1, #1  ; 0x1
+ 13c:   e3a02002        mov     r2, #2  ; 0x2
+ 140:   e3a03003        mov     r3, #3  ; 0x3
+ 144:   e50b0008        str     r0, [fp, #-8]
+ 148:   ebfffffe        bl      44 <nonleaf_call>
+ 14c:   e51b0008        ldr     r0, [fp, #-8]
+ 150:   e1a0d00b        mov     sp, fp
+ 154:   e8bd8800        pop     {fp, pc}
+
+; vim: ft=asm68k
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/doc/disas_examples/arm.darwin_arm.disas	Fri Nov 22 23:08:59 2019 +0100
@@ -0,0 +1,235 @@
+; void leaf_call(int b, int c, int d, int e, int f, int g, int h)
+; {
+; }
+; 
+; void nonleaf_call(int a, int b, int c, int d, int e, int f, int g, int h)
+; {
+; 	/* use some local data */
+; 	char x = 'L';
+; 	leaf_call(b, c, d, e, f, g, h);
+; }
+; 
+; int main()
+; {
+; 	nonleaf_call(0, 1, 2, 3, 4, 5, 6, 7);
+; 	return 0;
+; }
+
+
+
+; output from iOS 6 w/ clang 3.7.1, dumped with llvm-objdump -triple armv7-unknown-unknown-macho ...
+
+_leaf_call:
+       0:       80 40 2d e9     push    {r7, lr}
+       4:       0d 70 a0 e1     mov     r7, sp
+       8:       1c d0 4d e2     sub     sp, sp, #28
+       c:       10 90 97 e5     ldr     r9, [r7, #16]
+      10:       0c c0 97 e5     ldr     r12, [r7, #12]
+      14:       08 e0 97 e5     ldr     lr, [r7, #8]
+      18:       04 00 07 e5     str     r0, [r7, #-4]
+      1c:       08 10 07 e5     str     r1, [r7, #-8]
+      20:       0c 20 07 e5     str     r2, [r7, #-12]
+      24:       0c 30 8d e5     str     r3, [sp, #12]
+      28:       08 e0 8d e5     str     lr, [sp, #8]
+      2c:       04 c0 8d e5     str     r12, [sp, #4]
+      30:       00 90 8d e5     str     r9, [sp]
+      34:       07 d0 a0 e1     mov     sp, r7
+      38:       80 80 bd e8     pop     {r7, pc}
+
+_nonleaf_call:
+      3c:       b0 40 2d e9     push    {r4, r5, r7, lr}      ; |
+      40:       08 70 8d e2     add     r7, sp, #8            ; | prolog
+      44:       30 d0 4d e2     sub     sp, sp, #48           ; /
+      48:       14 90 97 e5     ldr     r9, [r7, #20]         ; \
+      4c:       10 c0 97 e5     ldr     r12, [r7, #16]        ; |
+      50:       0c e0 97 e5     ldr     lr, [r7, #12]         ; | in args 4,5,6,7 from prev frame's param area -> regs ...
+      54:       08 40 97 e5     ldr     r4, [r7, #8]          ; |
+      58:       4c 50 00 e3     movw    r5, #76               ; 'L' -> r5
+      5c:       0c 00 07 e5     str     r0, [r7, #-12]        ; in arg 0 -> temp space in local area
+      60:       10 10 07 e5     str     r1, [r7, #-16]        ; in arg 1 -> temp space in local area
+      64:       14 20 07 e5     str     r2, [r7, #-20]        ; in arg 2 -> temp space in local area
+      68:       18 30 07 e5     str     r3, [r7, #-24]        ; in arg 3 -> temp space in local area
+      6c:       1c 40 8d e5     str     r4, [sp, #28]         ; |
+      70:       18 e0 8d e5     str     lr, [sp, #24]         ; |
+      74:       14 c0 8d e5     str     r12, [sp, #20]        ; | ... in args 4,5,6,7 in regs -> local area ...
+      78:       10 90 8d e5     str     r9, [sp, #16]         ; |
+      7c:       0f 50 cd e5     strb    r5, [sp, #15]         ; 'L' (in r5) -> local area
+      80:       10 00 17 e5     ldr     r0, [r7, #-16]        ; arg 0
+      84:       14 10 17 e5     ldr     r1, [r7, #-20]        ; arg 1
+      88:       18 20 17 e5     ldr     r2, [r7, #-24]        ; arg 2
+      8c:       1c 30 9d e5     ldr     r3, [sp, #28]         ; arg 3 (fetched from local area previously copied to)
+      90:       18 90 9d e5     ldr     r9, [sp, #24]         ; |
+      94:       14 c0 9d e5     ldr     r12, [sp, #20]        ; | args 4,5,6 (fetched from local area previously copied to) -> regs, and ...
+      98:       10 e0 9d e5     ldr     lr, [sp, #16]         ; /
+      9c:       00 90 8d e5     str     r9, [sp]              ; \
+      a0:       04 c0 8d e5     str     r12, [sp, #4]         ; | ... "pushed" onto stack
+      a4:       08 e0 8d e5     str     lr, [sp, #8]          ; |
+      a8:       d4 ff ff eb     bl      #-176 <_leaf_call>    ; return address -> r14/lr, and call
+      ac:       08 d0 47 e2     sub     sp, r7, #8            ; |
+      b0:       b0 80 bd e8     pop     {r4, r5, r7, pc}      ; | epilog
+
+_main:
+      b4:       90 40 2d e9     push    {r4, r7, lr}          ; |
+      b8:       04 70 8d e2     add     r7, sp, #4            ; | prolog
+      bc:       14 d0 4d e2     sub     sp, sp, #20           ; |
+      c0:       00 00 00 e3     movw    r0, #0                ; arg 0
+      c4:       01 10 00 e3     movw    r1, #1                ; arg 1
+      c8:       02 20 00 e3     movw    r2, #2                ; arg 2
+      cc:       03 30 00 e3     movw    r3, #3                ; arg 3
+      d0:       04 90 00 e3     movw    r9, #4                ; |
+      d4:       05 c0 00 e3     movw    r12, #5               ; |
+      d8:       06 e0 00 e3     movw    lr, #6                ; | args 4,5,6,7 in regs ...
+      dc:       07 40 00 e3     movw    r4, #7                ; |
+      e0:       08 00 07 e5     str     r0, [r7, #-8]         ; unsure... why place arg 0 in local area? @@@
+      e4:       00 90 8d e5     str     r9, [sp]              ; |
+      e8:       04 c0 8d e5     str     r12, [sp, #4]         ; |
+      ec:       08 e0 8d e5     str     lr, [sp, #8]          ; | ... "push" args 4,5,6,7 onto stack
+      f0:       0c 40 8d e5     str     r4, [sp, #12]         ; |
+      f4:       d0 ff ff eb     bl      #-192 <_nonleaf_call> ; return address -> r14/lr, and call
+      f8:       00 00 00 e3     movw    r0, #0                ; return value
+      fc:       04 d0 47 e2     sub     sp, r7, #4            ; |
+     100:       90 80 bd e8     pop     {r4, r7, pc}          ; | epilog
+
+
+
+; ----------------- with spilling ------------->
+
+; #include <stdarg.h>
+;
+; void leaf_call(int b, int c, int d, int e, int f, int g, int h)
+; {
+; }
+;
+; void nonleaf_call(int a, ...)
+; {
+; 	int b,c,d,e,f,g,h;
+; 	va_list ap;
+; 	va_start(ap,a);
+; 	b = va_arg(ap,int);
+; 	c = va_arg(ap,int);
+; 	d = va_arg(ap,int);
+; 	e = va_arg(ap,int);
+; 	f = va_arg(ap,int);
+; 	g = va_arg(ap,int);
+; 	h = va_arg(ap,int);
+; 	/* use some local data */
+; 	char x = 'L';
+; 	leaf_call(b,c,d,e,f,g,h);
+; }
+;
+; int main()
+; {
+; 	nonleaf_call(0, 1, 2, 3, 4, 5, 6, 7);
+; 	return 0;
+; }
+
+
+
+; output from iOS 6 w/ clang 3.7.1, dumped with llvm-objdump -triple armv7-unknown-unknown-macho ...
+
+_leaf_call:
+       0:       80 40 2d e9     push    {r7, lr}
+       4:       0d 70 a0 e1     mov     r7, sp
+       8:       1c d0 4d e2     sub     sp, sp, #28
+       c:       10 90 97 e5     ldr     r9, [r7, #16]
+      10:       0c c0 97 e5     ldr     r12, [r7, #12]
+      14:       08 e0 97 e5     ldr     lr, [r7, #8]
+      18:       04 00 07 e5     str     r0, [r7, #-4]
+      1c:       08 10 07 e5     str     r1, [r7, #-8]
+      20:       0c 20 07 e5     str     r2, [r7, #-12]
+      24:       0c 30 8d e5     str     r3, [sp, #12]
+      28:       08 e0 8d e5     str     lr, [sp, #8]
+      2c:       04 c0 8d e5     str     r12, [sp, #4]
+      30:       00 90 8d e5     str     r9, [sp]
+      34:       07 d0 a0 e1     mov     sp, r7
+      38:       80 80 bd e8     pop     {r7, pc}
+
+_nonleaf_call:
+      3c:       0c d0 4d e2     sub     sp, sp, #12
+      40:       80 40 2d e9     push    {r7, lr}
+      44:       0d 70 a0 e1     mov     r7, sp
+      48:       34 d0 4d e2     sub     sp, sp, #52
+      4c:       10 30 87 e5     str     r3, [r7, #16]
+      50:       0c 20 87 e5     str     r2, [r7, #12]
+      54:       08 10 87 e5     str     r1, [r7, #8]
+      58:       4c 10 00 e3     movw    r1, #76
+      5c:       08 20 47 e2     sub     r2, r7, #8
+      60:       04 00 07 e5     str     r0, [r7, #-4]
+      64:       08 00 87 e2     add     r0, r7, #8
+      68:       00 00 82 e5     str     r0, [r2]
+      6c:       08 00 17 e5     ldr     r0, [r7, #-8]
+      70:       04 20 80 e2     add     r2, r0, #4
+      74:       08 20 07 e5     str     r2, [r7, #-8]
+      78:       00 00 90 e5     ldr     r0, [r0]
+      7c:       0c 00 07 e5     str     r0, [r7, #-12]
+      80:       08 00 17 e5     ldr     r0, [r7, #-8]
+      84:       04 20 80 e2     add     r2, r0, #4
+      88:       08 20 07 e5     str     r2, [r7, #-8]
+      8c:       00 00 90 e5     ldr     r0, [r0]
+      90:       10 00 07 e5     str     r0, [r7, #-16]
+      94:       08 00 17 e5     ldr     r0, [r7, #-8]
+      98:       04 20 80 e2     add     r2, r0, #4
+      9c:       08 20 07 e5     str     r2, [r7, #-8]
+      a0:       00 00 90 e5     ldr     r0, [r0]
+      a4:       14 00 07 e5     str     r0, [r7, #-20]
+      a8:       08 00 17 e5     ldr     r0, [r7, #-8]
+      ac:       04 20 80 e2     add     r2, r0, #4
+      b0:       08 20 07 e5     str     r2, [r7, #-8]
+      b4:       00 00 90 e5     ldr     r0, [r0]
+      b8:       18 00 07 e5     str     r0, [r7, #-24]
+      bc:       08 00 17 e5     ldr     r0, [r7, #-8]
+      c0:       04 20 80 e2     add     r2, r0, #4
+      c4:       08 20 07 e5     str     r2, [r7, #-8]
+      c8:       00 00 90 e5     ldr     r0, [r0]
+      cc:       18 00 8d e5     str     r0, [sp, #24]
+      d0:       08 00 17 e5     ldr     r0, [r7, #-8]
+      d4:       04 20 80 e2     add     r2, r0, #4
+      d8:       08 20 07 e5     str     r2, [r7, #-8]
+      dc:       00 00 90 e5     ldr     r0, [r0]
+      e0:       14 00 8d e5     str     r0, [sp, #20]
+      e4:       08 00 17 e5     ldr     r0, [r7, #-8]
+      e8:       04 20 80 e2     add     r2, r0, #4
+      ec:       08 20 07 e5     str     r2, [r7, #-8]
+      f0:       00 00 90 e5     ldr     r0, [r0]
+      f4:       10 00 8d e5     str     r0, [sp, #16]
+      f8:       0f 10 cd e5     strb    r1, [sp, #15]
+      fc:       0c 00 17 e5     ldr     r0, [r7, #-12]
+     100:       10 10 17 e5     ldr     r1, [r7, #-16]
+     104:       14 20 17 e5     ldr     r2, [r7, #-20]
+     108:       18 30 17 e5     ldr     r3, [r7, #-24]
+     10c:       18 90 9d e5     ldr     r9, [sp, #24]
+     110:       14 c0 9d e5     ldr     r12, [sp, #20]
+     114:       10 e0 9d e5     ldr     lr, [sp, #16]
+     118:       00 90 8d e5     str     r9, [sp]
+     11c:       04 c0 8d e5     str     r12, [sp, #4]
+     120:       08 e0 8d e5     str     lr, [sp, #8]
+     124:       b5 ff ff eb     bl      #-300 <_leaf_call>
+     128:       07 d0 a0 e1     mov     sp, r7
+     12c:       80 40 bd e8     pop     {r7, lr}
+     130:       0c d0 8d e2     add     sp, sp, #12
+     134:       1e ff 2f e1     bx      lr
+
+_main:
+     138:       90 40 2d e9     push    {r4, r7, lr}
+     13c:       04 70 8d e2     add     r7, sp, #4
+     140:       14 d0 4d e2     sub     sp, sp, #20
+     144:       00 00 00 e3     movw    r0, #0
+     148:       01 10 00 e3     movw    r1, #1
+     14c:       02 20 00 e3     movw    r2, #2
+     150:       03 30 00 e3     movw    r3, #3
+     154:       04 90 00 e3     movw    r9, #4
+     158:       05 c0 00 e3     movw    r12, #5
+     15c:       06 e0 00 e3     movw    lr, #6
+     160:       07 40 00 e3     movw    r4, #7
+     164:       08 00 07 e5     str     r0, [r7, #-8]
+     168:       00 90 8d e5     str     r9, [sp]
+     16c:       04 c0 8d e5     str     r12, [sp, #4]
+     170:       08 e0 8d e5     str     lr, [sp, #8]
+     174:       0c 40 8d e5     str     r4, [sp, #12]
+     178:       af ff ff eb     bl      #-324 <_nonleaf_call>
+     17c:       00 00 00 e3     movw    r0, #0
+     180:       04 d0 47 e2     sub     sp, r7, #4
+     184:       90 80 bd e8     pop     {r4, r7, pc}
+
+; vim: ft=asm68k
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/doc/disas_examples/arm.darwin_thumb.disas	Fri Nov 22 23:08:59 2019 +0100
@@ -0,0 +1,235 @@
+; void leaf_call(int b, int c, int d, int e, int f, int g, int h)
+; {
+; }
+; 
+; void nonleaf_call(int a, int b, int c, int d, int e, int f, int g, int h)
+; {
+; 	/* use some local data */
+; 	char x = 'L';
+; 	leaf_call(b, c, d, e, f, g, h);
+; }
+; 
+; int main()
+; {
+; 	nonleaf_call(0, 1, 2, 3, 4, 5, 6, 7);
+; 	return 0;
+; }
+
+
+
+; output from iOS 6 w/ clang 3.7.1, dumped with llvm-objdump -triple thumbv7-unknown-unknown-macho ...
+
+_leaf_call:
+       0:       80 b5           push    {r7, lr}
+       2:       6f 46           mov     r7, sp
+       4:       87 b0           sub     sp, #28
+       6:       d7 f8 10 90     ldr.w   r9, [r7, #16]
+       a:       d7 f8 0c c0     ldr.w   r12, [r7, #12]
+       e:       d7 f8 08 e0     ldr.w   lr, [r7, #8]
+      12:       06 90           str     r0, [sp, #24]
+      14:       05 91           str     r1, [sp, #20]
+      16:       04 92           str     r2, [sp, #16]
+      18:       03 93           str     r3, [sp, #12]
+      1a:       cd f8 08 e0     str.w   lr, [sp, #8]
+      1e:       cd f8 04 c0     str.w   r12, [sp, #4]
+      22:       cd f8 00 90     str.w   r9, [sp]
+      26:       07 b0           add     sp, #28
+      28:       80 bd           pop     {r7, pc}
+
+_nonleaf_call:
+      2a:       b0 b5           push    {r4, r5, r7, lr}
+      2c:       02 af           add     r7, sp, #8
+      2e:       8c b0           sub     sp, #48
+      30:       d7 f8 14 90     ldr.w   r9, [r7, #20]
+      34:       d7 f8 10 c0     ldr.w   r12, [r7, #16]
+      38:       d7 f8 0c e0     ldr.w   lr, [r7, #12]
+      3c:       bc 68           ldr     r4, [r7, #8]
+      3e:       4c 25           movs    r5, #76
+      40:       0b 90           str     r0, [sp, #44]
+      42:       0a 91           str     r1, [sp, #40]
+      44:       09 92           str     r2, [sp, #36]
+      46:       08 93           str     r3, [sp, #32]
+      48:       07 94           str     r4, [sp, #28]
+      4a:       cd f8 18 e0     str.w   lr, [sp, #24]
+      4e:       cd f8 14 c0     str.w   r12, [sp, #20]
+      52:       cd f8 10 90     str.w   r9, [sp, #16]
+      56:       07 f8 29 5c     strb    r5, [r7, #-41]
+      5a:       0a 98           ldr     r0, [sp, #40]
+      5c:       09 99           ldr     r1, [sp, #36]
+      5e:       08 9a           ldr     r2, [sp, #32]
+      60:       07 9b           ldr     r3, [sp, #28]
+      62:       dd f8 18 90     ldr.w   r9, [sp, #24]
+      66:       dd f8 14 c0     ldr.w   r12, [sp, #20]
+      6a:       dd f8 10 e0     ldr.w   lr, [sp, #16]
+      6e:       cd f8 00 90     str.w   r9, [sp]
+      72:       cd f8 04 c0     str.w   r12, [sp, #4]
+      76:       cd f8 08 e0     str.w   lr, [sp, #8]
+      7a:       ff f7 c1 ff     bl      #-126
+      7e:       0c b0           add     sp, #48
+      80:       b0 bd           pop     {r4, r5, r7, pc}
+
+_main:
+      82:       90 b5           push    {r4, r7, lr}
+      84:       01 af           add     r7, sp, #4
+      86:       85 b0           sub     sp, #20
+      88:       00 20           movs    r0, #0
+      8a:       01 21           movs    r1, #1
+      8c:       02 22           movs    r2, #2
+      8e:       03 23           movs    r3, #3
+      90:       40 f2 04 09     movw    r9, #4
+      94:       40 f2 05 0c     movw    r12, #5
+      98:       40 f2 06 0e     movw    lr, #6
+      9c:       07 24           movs    r4, #7
+      9e:       04 90           str     r0, [sp, #16]
+      a0:       cd f8 00 90     str.w   r9, [sp]
+      a4:       cd f8 04 c0     str.w   r12, [sp, #4]
+      a8:       cd f8 08 e0     str.w   lr, [sp, #8]
+      ac:       03 94           str     r4, [sp, #12]
+      ae:       ff f7 bc ff     bl      #-136
+      b2:       00 20           movs    r0, #0
+      b4:       05 b0           add     sp, #20
+      b6:       90 bd           pop     {r4, r7, pc}
+
+
+
+; ----------------- with spilling ------------->
+
+; #include <stdarg.h>
+;
+; void leaf_call(int b, int c, int d, int e, int f, int g, int h)
+; {
+; }
+;
+; void nonleaf_call(int a, ...)
+; {
+; 	int b,c,d,e,f,g,h;
+; 	va_list ap;
+; 	va_start(ap,a);
+; 	b = va_arg(ap,int);
+; 	c = va_arg(ap,int);
+; 	d = va_arg(ap,int);
+; 	e = va_arg(ap,int);
+; 	f = va_arg(ap,int);
+; 	g = va_arg(ap,int);
+; 	h = va_arg(ap,int);
+; 	/* use some local data */
+; 	char x = 'L';
+; 	leaf_call(b,c,d,e,f,g,h);
+; }
+;
+; int main()
+; {
+; 	nonleaf_call(0, 1, 2, 3, 4, 5, 6, 7);
+; 	return 0;
+; }
+
+
+
+; output from iOS 6 w/ clang 3.7.1, dumped with llvm-objdump -triple thumbv7-unknown-unknown-macho ...
+
+_leaf_call:
+       0:       80 b5           push    {r7, lr}
+       2:       6f 46           mov     r7, sp
+       4:       87 b0           sub     sp, #28
+       6:       d7 f8 10 90     ldr.w   r9, [r7, #16]
+       a:       d7 f8 0c c0     ldr.w   r12, [r7, #12]
+       e:       d7 f8 08 e0     ldr.w   lr, [r7, #8]
+      12:       06 90           str     r0, [sp, #24]
+      14:       05 91           str     r1, [sp, #20]
+      16:       04 92           str     r2, [sp, #16]
+      18:       03 93           str     r3, [sp, #12]
+      1a:       cd f8 08 e0     str.w   lr, [sp, #8]
+      1e:       cd f8 04 c0     str.w   r12, [sp, #4]
+      22:       cd f8 00 90     str.w   r9, [sp]
+      26:       07 b0           add     sp, #28
+      28:       80 bd           pop     {r7, pc}
+
+_nonleaf_call:
+      2a:       83 b0           sub     sp, #12        ; |        space for spill area (b/c pushing was apparently too easy)
+      2c:       80 b5           push    {r7, lr}       ; |
+      2e:       6f 46           mov     r7, sp         ; |
+      30:       8d b0           sub     sp, #52        ; | prolog
+      32:       3b 61           str     r3, [r7, #16]  ; |        |
+      34:       fa 60           str     r2, [r7, #12]  ; |        | spill (before reg save area)
+      36:       b9 60           str     r1, [r7, #8]   ; |        |
+      38:       4c 21           movs    r1, #76
+      3a:       0b aa           add     r2, sp, #44
+      3c:       0c 90           str     r0, [sp, #48]
+      3e:       07 f1 08 00     add.w   r0, r7, #8
+      42:       10 60           str     r0, [r2]
+      44:       0b 98           ldr     r0, [sp, #44]
+      46:       02 1d           adds    r2, r0, #4
+      48:       0b 92           str     r2, [sp, #44]
+      4a:       00 68           ldr     r0, [r0]
+      4c:       0a 90           str     r0, [sp, #40]
+      4e:       0b 98           ldr     r0, [sp, #44]
+      50:       02 1d           adds    r2, r0, #4
+      52:       0b 92           str     r2, [sp, #44]
+      54:       00 68           ldr     r0, [r0]
+      56:       09 90           str     r0, [sp, #36]
+      58:       0b 98           ldr     r0, [sp, #44]
+      5a:       02 1d           adds    r2, r0, #4
+      5c:       0b 92           str     r2, [sp, #44]
+      5e:       00 68           ldr     r0, [r0]
+      60:       08 90           str     r0, [sp, #32]
+      62:       0b 98           ldr     r0, [sp, #44]
+      64:       02 1d           adds    r2, r0, #4
+      66:       0b 92           str     r2, [sp, #44]
+      68:       00 68           ldr     r0, [r0]
+      6a:       07 90           str     r0, [sp, #28]
+      6c:       0b 98           ldr     r0, [sp, #44]
+      6e:       02 1d           adds    r2, r0, #4
+      70:       0b 92           str     r2, [sp, #44]
+      72:       00 68           ldr     r0, [r0]
+      74:       06 90           str     r0, [sp, #24]
+      76:       0b 98           ldr     r0, [sp, #44]
+      78:       02 1d           adds    r2, r0, #4
+      7a:       0b 92           str     r2, [sp, #44]
+      7c:       00 68           ldr     r0, [r0]
+      7e:       05 90           str     r0, [sp, #20]
+      80:       0b 98           ldr     r0, [sp, #44]
+      82:       02 1d           adds    r2, r0, #4
+      84:       0b 92           str     r2, [sp, #44]
+      86:       00 68           ldr     r0, [r0]
+      88:       04 90           str     r0, [sp, #16]
+      8a:       07 f8 25 1c     strb    r1, [r7, #-37]
+      8e:       0a 98           ldr     r0, [sp, #40]
+      90:       09 99           ldr     r1, [sp, #36]
+      92:       08 9a           ldr     r2, [sp, #32]
+      94:       07 9b           ldr     r3, [sp, #28]
+      96:       dd f8 18 90     ldr.w   r9, [sp, #24]
+      9a:       dd f8 14 c0     ldr.w   r12, [sp, #20]
+      9e:       dd f8 10 e0     ldr.w   lr, [sp, #16]
+      a2:       cd f8 00 90     str.w   r9, [sp]
+      a6:       cd f8 04 c0     str.w   r12, [sp, #4]
+      aa:       cd f8 08 e0     str.w   lr, [sp, #8]
+      ae:       ff f7 a7 ff     bl      #-178
+      b2:       0d b0           add     sp, #52
+      b4:       bd e8 80 40     pop.w   {r7, lr}
+      b8:       03 b0           add     sp, #12
+      ba:       70 47           bx      lr
+
+_main:
+      bc:       90 b5           push    {r4, r7, lr}
+      be:       01 af           add     r7, sp, #4
+      c0:       85 b0           sub     sp, #20
+      c2:       00 20           movs    r0, #0
+      c4:       01 21           movs    r1, #1
+      c6:       02 22           movs    r2, #2
+      c8:       03 23           movs    r3, #3
+      ca:       40 f2 04 09     movw    r9, #4
+      ce:       40 f2 05 0c     movw    r12, #5
+      d2:       40 f2 06 0e     movw    lr, #6
+      d6:       07 24           movs    r4, #7
+      d8:       04 90           str     r0, [sp, #16]
+      da:       cd f8 00 90     str.w   r9, [sp]
+      de:       cd f8 04 c0     str.w   r12, [sp, #4]
+      e2:       cd f8 08 e0     str.w   lr, [sp, #8]
+      e6:       03 94           str     r4, [sp, #12]
+      e8:       ff f7 9f ff     bl      #-194
+      ec:       00 20           movs    r0, #0
+      ee:       05 b0           add     sp, #20
+      f0:       90 bd           pop     {r4, r7, pc}
+
+; vim: ft=asm68k
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/doc/disas_examples/arm64.aapcs.disas	Fri Nov 22 23:08:59 2019 +0100
@@ -0,0 +1,990 @@
+; #include <stdlib.h>
+; 
+; void leaf_call(int b, int c, int d, int e, int f, int g, int h)
+; {
+; }
+; 
+; void nonleaf_call(int a, int b, int c, int d, int e, int f, int g, int h)
+; {
+; 	/* use some local data */
+; 	*(char*)alloca(220) = 'L';
+; 	leaf_call(b, c, d, e, f, g, h);
+; }
+; 
+; int main()
+; {
+; 	nonleaf_call(0, 1, 2, 3, 4, 5, 6, 7);
+; 	return 0;
+; }
+
+
+
+; output from freebsd-13.0_r348764-arm64 w/ clang 8.0.0
+
+leaf_call:
+   0:   ff 83 00 d1     sub     sp, sp, #32
+   4:   e0 1f 00 b9     str     w0, [sp, #28]
+   8:   e1 1b 00 b9     str     w1, [sp, #24]
+   c:   e2 17 00 b9     str     w2, [sp, #20]
+  10:   e3 13 00 b9     str     w3, [sp, #16]
+  14:   e4 0f 00 b9     str     w4, [sp, #12]
+  18:   e5 0b 00 b9     str     w5, [sp, #8]
+  1c:   e6 07 00 b9     str     w6, [sp, #4]
+  20:   ff 83 00 91     add     sp, sp, #32
+  24:   c0 03 5f d6     ret
+
+nonleaf_call:
+  28:   ff 83 04 d1     sub     sp, sp, #288         ; |        includes alloca()'d static space, already
+  2c:   fc 83 00 f9     str     x28, [sp, #256]      ; |        (unsure why r28 is preserved @@@)
+  30:   fd 7b 11 a9     stp     x29, x30, [sp, #272] ; | prolog
+  34:   fd 43 04 91     add     x29, sp, #272        ; |        adjust/set frame pointer (since sp was modified first)
+  38:   88 09 80 52     mov     w8, #76              ; 'L' -> w8
+  3c:   a0 c3 1e b8     stur    w0, [x29, #-20]      ; in arg 0 -> local area (as temp store)
+  40:   a1 83 1e b8     stur    w1, [x29, #-24]      ; in arg 1 -> local area (as temp store)
+  44:   a2 43 1e b8     stur    w2, [x29, #-28]      ; in arg 2 -> local area (as temp store)
+  48:   a3 03 1e b8     stur    w3, [x29, #-32]      ; in arg 3 -> local area (as temp store)
+  4c:   a4 c3 1d b8     stur    w4, [x29, #-36]      ; in arg 4 -> local area (as temp store)
+  50:   a5 83 1d b8     stur    w5, [x29, #-40]      ; in arg 5 -> local area (as temp store)
+  54:   a6 43 1d b8     stur    w6, [x29, #-44]      ; in arg 6 -> local area (as temp store)
+  58:   a7 03 1d b8     stur    w7, [x29, #-48]      ; in arg 7 -> local area (as temp store)
+  5c:   e8 03 00 39     strb    w8, [sp]             ; place 'L' on top of stack (at alloca()'d addr)
+  60:   a0 83 5e b8     ldur    w0, [x29, #-24]      ; arg 0
+  64:   a1 43 5e b8     ldur    w1, [x29, #-28]      ; arg 1
+  68:   a2 03 5e b8     ldur    w2, [x29, #-32]      ; arg 2
+  6c:   a3 c3 5d b8     ldur    w3, [x29, #-36]      ; arg 3
+  70:   a4 83 5d b8     ldur    w4, [x29, #-40]      ; arg 4
+  74:   a5 43 5d b8     ldur    w5, [x29, #-44]      ; arg 5
+  78:   a6 03 5d b8     ldur    w6, [x29, #-48]      ; arg 6
+  7c:   e1 ff ff 97     bl      #-124                ; return address -> r30/lr, and call
+  80:   fd 7b 51 a9     ldp     x29, x30, [sp, #272] ; |
+  84:   fc 83 40 f9     ldr     x28, [sp, #256]      ; |
+  88:   ff 83 04 91     add     sp, sp, #288         ; | epilog
+  8c:   c0 03 5f d6     ret                          ; |
+
+main:
+  90:   ff 83 00 d1     sub     sp, sp, #32          ; |
+  94:   fd 7b 01 a9     stp     x29, x30, [sp, #16]  ; | prolog
+  98:   fd 43 00 91     add     x29, sp, #16         ; |
+  9c:   08 00 80 52     mov     w8, #0               ; clearing r8 (indirect result location pointer)
+  a0:   e1 03 00 32     orr     w1, wzr, #0x1        ; arg 1
+  a4:   e2 03 1f 32     orr     w2, wzr, #0x2        ; arg 2
+  a8:   e3 07 00 32     orr     w3, wzr, #0x3        ; arg 3
+  ac:   e4 03 1e 32     orr     w4, wzr, #0x4        ; arg 4
+  b0:   a5 00 80 52     mov     w5, #5               ; arg 5
+  b4:   e6 07 1f 32     orr     w6, wzr, #0x6        ; arg 6
+  b8:   e7 0b 00 32     orr     w7, wzr, #0x7        ; arg 7
+  bc:   bf c3 1f b8     stur    wzr, [x29, #-4]      ; unsure... store a zero in local area@@@
+  c0:   e0 03 08 2a     mov     w0, w8               ; arg 0 (= 0 set in w8, above)
+  c4:   e8 0b 00 b9     str     w8, [sp, #8]         ; temp storing 0 in local area
+  c8:   d8 ff ff 97     bl      #-160                ; return address -> r30/lr, and call
+  cc:   e0 0b 40 b9     ldr     w0, [sp, #8]         ; return value (unsure why not just using immediate @@@)
+  d0:   fd 7b 41 a9     ldp     x29, x30, [sp, #16]  ; |
+  d4:   ff 83 00 91     add     sp, sp, #32          ; | epilog
+  d8:   c0 03 5f d6     ret                          ; |
+
+
+
+; ---------- same with more args so stack is also used ---------->
+
+; #include <stdlib.h>
+; 
+; void leaf_call(int b, int c, int d, int e, int f, int g, int h, int i, int j)
+; {
+; }
+; 
+; void nonleaf_call(int a, int b, int c, int d, int e, int f, int g, int h, int i, int j)
+; {
+;     /* use some local data */
+;     *(char*)alloca(220) = 'L';
+;     leaf_call(b, c, d, e, f, g, h, i, j);
+; }
+; 
+; int main()
+; {
+;     nonleaf_call(0, 1, 2, 3, 4, 5, 6, 7, 8, 9);
+;     return 0;
+; }
+
+
+
+; output from freebsd-13.0_r348764-arm64 w/ clang 8.0.0
+
+0000000000000000 leaf_call:
+       0:       ff c3 00 d1     sub     sp, sp, #48
+       4:       e8 33 40 b9     ldr     w8, [sp, #48]
+       8:       e0 2f 00 b9     str     w0, [sp, #44]
+       c:       e1 2b 00 b9     str     w1, [sp, #40]
+      10:       e2 27 00 b9     str     w2, [sp, #36]
+      14:       e3 23 00 b9     str     w3, [sp, #32]
+      18:       e4 1f 00 b9     str     w4, [sp, #28]
+      1c:       e5 1b 00 b9     str     w5, [sp, #24]
+      20:       e6 17 00 b9     str     w6, [sp, #20]
+      24:       e7 13 00 b9     str     w7, [sp, #16]
+      28:       e8 0f 00 b9     str     w8, [sp, #12]
+      2c:       ff c3 00 91     add     sp, sp, #48
+      30:       c0 03 5f d6     ret
+
+0000000000000034 nonleaf_call:
+      34:       ff 03 05 d1     sub     sp, sp, #320         ; |        includes alloca()'d static space, already
+      38:       fc 93 00 f9     str     x28, [sp, #288]      ; |        (unsure why r28 is preserved @@@)
+      3c:       fd 7b 13 a9     stp     x29, x30, [sp, #304] ; | prolog
+      40:       fd c3 04 91     add     x29, sp, #304        ; /        adjust/set frame pointer (since sp was modified first)
+      44:       a8 13 40 b9     ldr     w8, [x29, #16]       ; \
+      48:       a9 1b 40 b9     ldr     w9, [x29, #24]       ; | in args 8,9 from prev frame's param area -> regs, no we have args 0-9 all in r0-r9
+      4c:       8a 09 80 52     mov     w10, #76             ; 'L' -> w10
+      50:       a0 c3 1e b8     stur    w0, [x29, #-20]      ; |
+      54:       a1 83 1e b8     stur    w1, [x29, #-24]      ; |
+      58:       a2 43 1e b8     stur    w2, [x29, #-28]      ; |
+      5c:       a3 03 1e b8     stur    w3, [x29, #-32]      ; |
+      60:       a4 c3 1d b8     stur    w4, [x29, #-36]      ; | ... in args 0,1,2,3,4,5,6,7,8,9 -> temp space in local area ...
+      64:       a5 83 1d b8     stur    w5, [x29, #-40]      ; |
+      68:       a6 43 1d b8     stur    w6, [x29, #-44]      ; |
+      6c:       a7 03 1d b8     stur    w7, [x29, #-48]      ; |
+      70:       a8 c3 1c b8     stur    w8, [x29, #-52]      ; |
+      74:       a9 83 1c b8     stur    w9, [x29, #-56]      ; |
+      78:       ea 43 00 39     strb    w10, [sp, #16]       ; 'L' -> local area (alloca()'d memory)
+      7c:       a0 83 5e b8     ldur    w0, [x29, #-24]      ; arg 0
+      80:       a1 43 5e b8     ldur    w1, [x29, #-28]      ; arg 1
+      84:       a2 03 5e b8     ldur    w2, [x29, #-32]      ; arg 2
+      88:       a3 c3 5d b8     ldur    w3, [x29, #-36]      ; arg 3
+      8c:       a4 83 5d b8     ldur    w4, [x29, #-40]      ; arg 4
+      90:       a5 43 5d b8     ldur    w5, [x29, #-44]      ; arg 5
+      94:       a6 03 5d b8     ldur    w6, [x29, #-48]      ; arg 6
+      98:       a7 c3 5c b8     ldur    w7, [x29, #-52]      ; arg 7
+      9c:       a8 83 5c b8     ldur    w8, [x29, #-56]      ; arg 8 -> w8, and ...
+      a0:       eb 03 00 91     mov     x11, sp              ; ... with help of x11 (why?) ...
+      a4:       68 01 00 b9     str     w8, [x11]            ; ... "pushed" onto to of stack
+      a8:       d6 ff ff 97     bl      #-168 <leaf_call>    ; return address -> r30/lr, and call
+      ac:       fd 7b 53 a9     ldp     x29, x30, [sp, #304] ; |
+      b0:       fc 93 40 f9     ldr     x28, [sp, #288]      ; |
+      b4:       ff 03 05 91     add     sp, sp, #320         ; | epilog
+      b8:       c0 03 5f d6     ret                          ; |
+
+00000000000000bc main:
+      bc:       ff c3 00 d1     sub     sp, sp, #48          ; |
+      c0:       fd 7b 02 a9     stp     x29, x30, [sp, #32]  ; | prolog
+      c4:       fd 83 00 91     add     x29, sp, #32         ; |
+      c8:       08 00 80 52     mov     w8, #0               ; clearing r8 (indirect result location pointer)
+      cc:       e1 03 00 32     orr     w1, wzr, #0x1        ; arg 1
+      d0:       e2 03 1f 32     orr     w2, wzr, #0x2        ; arg 2
+      d4:       e3 07 00 32     orr     w3, wzr, #0x3        ; arg 3
+      d8:       e4 03 1e 32     orr     w4, wzr, #0x4        ; arg 4
+      dc:       a5 00 80 52     mov     w5, #5               ; arg 5
+      e0:       e6 07 1f 32     orr     w6, wzr, #0x6        ; arg 6
+      e4:       e7 0b 00 32     orr     w7, wzr, #0x7        ; arg 7
+      e8:       e9 03 1d 32     orr     w9, wzr, #0x8        ; arg 8 -> r9
+      ec:       2a 01 80 52     mov     w10, #9              ; arg 9 -> r10
+      f0:       bf c3 1f b8     stur    wzr, [x29, #-4]      ; unsure... store a zero in local area@@@
+      f4:       e0 03 08 2a     mov     w0, w8               ; arg 0 (= 0 set in w8, above)
+      f8:       eb 03 00 91     mov     x11, sp              ; use sp in x11 (why?), to ...
+      fc:       69 01 00 b9     str     w9, [x11]            ; ... place arg 8 on top of stack 
+     100:       eb 03 00 91     mov     x11, sp              ; use sp in x11 (why?), to ... (set again, pointlessly)
+     104:       6a 09 00 b9     str     w10, [x11, #8]       ; ... place arg 9 on stack (next to arg 8)
+     108:       a8 83 1f b8     stur    w8, [x29, #-8]       ; temp storing 0 in local area @@@ why?
+     10c:       ca ff ff 97     bl      #-216 <nonleaf_call> ; return address -> r30/lr, and call
+     110:       a0 83 5f b8     ldur    w0, [x29, #-8]       ; |
+     114:       fd 7b 42 a9     ldp     x29, x30, [sp, #32]  ; |
+     118:       ff c3 00 91     add     sp, sp, #48          ; | epilog
+     11c:       c0 03 5f d6     ret                          ; |
+
+
+
+; ---------- for spilling ---------->
+
+; #include <stdlib.h>
+; #include <stdarg.h>
+; 
+; void leaf_call(int b, int c, int d, int e, int f, int g, int h, int i, int j)
+; {
+; }
+; 
+; void nonleaf_call(int a, ...)
+; {
+;     int b,c,d,e,f,g,h,i,j;
+;     va_list ap;
+;     va_start(ap, a);
+;     b = va_arg(ap, int);
+;     c = va_arg(ap, int);
+;     d = va_arg(ap, int);
+;     e = va_arg(ap, int);
+;     f = va_arg(ap, int);
+;     g = va_arg(ap, int);
+;     h = va_arg(ap, int);
+;     i = va_arg(ap, int);
+;     j = va_arg(ap, int);
+; 
+;     /* use some local data */
+;     *(char*)alloca(220) = 'L';
+;     leaf_call(b, c, d, e, f, g, h, i, j);
+; }
+; 
+; int main()
+; {
+;     nonleaf_call(0, 1, 2, 3, 4, 5, 6, 7, 8, 9);
+;     return 0;
+; }
+
+
+
+; output from freebsd-13.0_r348764-arm64 w/ clang 8.0.0
+
+0000000000000000 leaf_call:
+       0:       ff c3 00 d1     sub     sp, sp, #48
+       4:       e8 33 40 b9     ldr     w8, [sp, #48]
+       8:       e0 2f 00 b9     str     w0, [sp, #44]
+       c:       e1 2b 00 b9     str     w1, [sp, #40]
+      10:       e2 27 00 b9     str     w2, [sp, #36]
+      14:       e3 23 00 b9     str     w3, [sp, #32]
+      18:       e4 1f 00 b9     str     w4, [sp, #28]
+      1c:       e5 1b 00 b9     str     w5, [sp, #24]
+      20:       e6 17 00 b9     str     w6, [sp, #20]
+      24:       e7 13 00 b9     str     w7, [sp, #16]
+      28:       e8 0f 00 b9     str     w8, [sp, #12]
+      2c:       ff c3 00 91     add     sp, sp, #48
+      30:       c0 03 5f d6     ret
+
+0000000000000034 nonleaf_call:
+      34:       fc 4f be a9     stp     x28, x19, [sp, #-32]! ; |
+      38:       fd 7b 01 a9     stp     x29, x30, [sp, #16]   ; |
+      3c:       fd 43 00 91     add     x29, sp, #16          ; | prolog
+      40:       ff 83 07 d1     sub     sp, sp, #480          ; |
+      44:       f3 03 00 91     mov     x19, sp
+      48:       67 56 80 3d     str     q7, [x19, #336]       ; |
+      4c:       66 52 80 3d     str     q6, [x19, #320]       ; |
+      50:       65 4e 80 3d     str     q5, [x19, #304]       ; |
+      54:       64 4a 80 3d     str     q4, [x19, #288]       ; |
+      58:       63 46 80 3d     str     q3, [x19, #272]       ; | spill all flot regs (on top of spilled integers, below
+      5c:       62 42 80 3d     str     q2, [x19, #256]       ; |
+      60:       61 3e 80 3d     str     q1, [x19, #240]       ; |
+      64:       60 3a 80 3d     str     q0, [x19, #224]       ; /
+      68:       a7 03 1a f8     stur    x7, [x29, #-96]       ; \
+      6c:       a6 83 19 f8     stur    x6, [x29, #-104]      ; |
+      70:       a5 03 19 f8     stur    x5, [x29, #-112]      ; |
+      74:       a4 83 18 f8     stur    x4, [x29, #-120]      ; | spill integer args (adjacent to prev frame's param area)
+      78:       a3 03 18 f8     stur    x3, [x29, #-128]      ; | note: only needed ones are spilled
+      7c:       a2 83 17 f8     stur    x2, [x29, #-136]      ; |
+      80:       a1 03 17 f8     stur    x1, [x29, #-144]      ; |
+      84:       a0 c3 1e b8     stur    w0, [x29, #-20]
+      88:       e0 63 19 32     orr     w0, wzr, #0xffffff80
+      8c:       a0 43 1c b8     stur    w0, [x29, #-60]
+      90:       e0 06 80 12     mov     w0, #-56
+      94:       a0 03 1c b8     stur    w0, [x29, #-64]
+      98:       61 82 03 91     add     x1, x19, #224
+      9c:       21 00 02 91     add     x1, x1, #128
+      a0:       a1 83 1b f8     stur    x1, [x29, #-72]
+      a4:       a1 43 02 d1     sub     x1, x29, #144
+      a8:       21 e0 00 91     add     x1, x1, #56
+      ac:       a1 03 1b f8     stur    x1, [x29, #-80]
+      b0:       a1 43 00 91     add     x1, x29, #16
+      b4:       a1 83 1a f8     stur    x1, [x29, #-88]
+      b8:       a1 63 01 d1     sub     x1, x29, #88
+      bc:       21 60 00 91     add     x1, x1, #24
+      c0:       a0 03 5c b8     ldur    w0, [x29, #-64]
+      c4:       e8 03 00 2a     mov     w8, w0
+      c8:       61 6e 00 f9     str     x1, [x19, #216]
+      cc:       68 d6 00 b9     str     w8, [x19, #212]
+      d0:       e0 01 f8 36     tbz     w0, #31, #60 <nonleaf_call+0xd8>
+      d4:       01 00 00 14     b       #4 <nonleaf_call+0xa4>
+      d8:       68 d6 40 b9     ldr     w8, [x19, #212]
+      dc:       09 21 00 11     add     w9, w8, #8
+      e0:       6a 6e 40 f9     ldr     x10, [x19, #216]
+      e4:       49 01 00 b9     str     w9, [x10]
+      e8:       29 01 00 71     subs    w9, w9, #0
+      ec:       69 d2 00 b9     str     w9, [x19, #208]
+      f0:       ec 00 00 54     b.gt    #28 <nonleaf_call+0xd8>
+      f4:       01 00 00 14     b       #4 <nonleaf_call+0xc4>
+      f8:       a8 03 5b f8     ldur    x8, [x29, #-80]
+      fc:       69 d6 40 b9     ldr     w9, [x19, #212]
+     100:       08 c1 29 8b     add     x8, x8, w9, sxtw
+     104:       68 66 00 f9     str     x8, [x19, #200]
+     108:       06 00 00 14     b       #24 <nonleaf_call+0xec>
+     10c:       a8 83 5a f8     ldur    x8, [x29, #-88]
+     110:       09 21 00 91     add     x9, x8, #8
+     114:       a9 83 1a f8     stur    x9, [x29, #-88]
+     118:       68 66 00 f9     str     x8, [x19, #200]
+     11c:       01 00 00 14     b       #4 <nonleaf_call+0xec>
+     120:       68 66 40 f9     ldr     x8, [x19, #200]
+     124:       09 01 40 b9     ldr     w9, [x8]
+     128:       a9 83 1e b8     stur    w9, [x29, #-24]
+     12c:       a8 63 01 d1     sub     x8, x29, #88
+     130:       08 61 00 91     add     x8, x8, #24
+     134:       a9 03 5c b8     ldur    w9, [x29, #-64]
+     138:       ea 03 09 2a     mov     w10, w9
+     13c:       68 62 00 f9     str     x8, [x19, #192]
+     140:       6a be 00 b9     str     w10, [x19, #188]
+     144:       e9 01 f8 36     tbz     w9, #31, #60 <nonleaf_call+0x14c>
+     148:       01 00 00 14     b       #4 <nonleaf_call+0x118>
+     14c:       68 be 40 b9     ldr     w8, [x19, #188]
+     150:       09 21 00 11     add     w9, w8, #8
+     154:       6a 62 40 f9     ldr     x10, [x19, #192]
+     158:       49 01 00 b9     str     w9, [x10]
+     15c:       29 01 00 71     subs    w9, w9, #0
+     160:       69 ba 00 b9     str     w9, [x19, #184]
+     164:       ec 00 00 54     b.gt    #28 <nonleaf_call+0x14c>
+     168:       01 00 00 14     b       #4 <nonleaf_call+0x138>
+     16c:       a8 03 5b f8     ldur    x8, [x29, #-80]
+     170:       69 be 40 b9     ldr     w9, [x19, #188]
+     174:       08 c1 29 8b     add     x8, x8, w9, sxtw
+     178:       68 5a 00 f9     str     x8, [x19, #176]
+     17c:       06 00 00 14     b       #24 <nonleaf_call+0x160>
+     180:       a8 83 5a f8     ldur    x8, [x29, #-88]
+     184:       09 21 00 91     add     x9, x8, #8
+     188:       a9 83 1a f8     stur    x9, [x29, #-88]
+     18c:       68 5a 00 f9     str     x8, [x19, #176]
+     190:       01 00 00 14     b       #4 <nonleaf_call+0x160>
+     194:       68 5a 40 f9     ldr     x8, [x19, #176]
+     198:       09 01 40 b9     ldr     w9, [x8]
+     19c:       a9 43 1e b8     stur    w9, [x29, #-28]
+     1a0:       a8 63 01 d1     sub     x8, x29, #88
+     1a4:       08 61 00 91     add     x8, x8, #24
+     1a8:       a9 03 5c b8     ldur    w9, [x29, #-64]
+     1ac:       ea 03 09 2a     mov     w10, w9
+     1b0:       68 56 00 f9     str     x8, [x19, #168]
+     1b4:       6a a6 00 b9     str     w10, [x19, #164]
+     1b8:       e9 01 f8 36     tbz     w9, #31, #60 <nonleaf_call+0x1c0>
+     1bc:       01 00 00 14     b       #4 <nonleaf_call+0x18c>
+     1c0:       68 a6 40 b9     ldr     w8, [x19, #164]
+     1c4:       09 21 00 11     add     w9, w8, #8
+     1c8:       6a 56 40 f9     ldr     x10, [x19, #168]
+     1cc:       49 01 00 b9     str     w9, [x10]
+     1d0:       29 01 00 71     subs    w9, w9, #0
+     1d4:       69 a2 00 b9     str     w9, [x19, #160]
+     1d8:       ec 00 00 54     b.gt    #28 <nonleaf_call+0x1c0>
+     1dc:       01 00 00 14     b       #4 <nonleaf_call+0x1ac>
+     1e0:       a8 03 5b f8     ldur    x8, [x29, #-80]
+     1e4:       69 a6 40 b9     ldr     w9, [x19, #164]
+     1e8:       08 c1 29 8b     add     x8, x8, w9, sxtw
+     1ec:       68 4e 00 f9     str     x8, [x19, #152]
+     1f0:       06 00 00 14     b       #24 <nonleaf_call+0x1d4>
+     1f4:       a8 83 5a f8     ldur    x8, [x29, #-88]
+     1f8:       09 21 00 91     add     x9, x8, #8
+     1fc:       a9 83 1a f8     stur    x9, [x29, #-88]
+     200:       68 4e 00 f9     str     x8, [x19, #152]
+     204:       01 00 00 14     b       #4 <nonleaf_call+0x1d4>
+     208:       68 4e 40 f9     ldr     x8, [x19, #152]
+     20c:       09 01 40 b9     ldr     w9, [x8]
+     210:       a9 03 1e b8     stur    w9, [x29, #-32]
+     214:       a8 63 01 d1     sub     x8, x29, #88
+     218:       08 61 00 91     add     x8, x8, #24
+     21c:       a9 03 5c b8     ldur    w9, [x29, #-64]
+     220:       ea 03 09 2a     mov     w10, w9
+     224:       68 4a 00 f9     str     x8, [x19, #144]
+     228:       6a 8e 00 b9     str     w10, [x19, #140]
+     22c:       e9 01 f8 36     tbz     w9, #31, #60 <nonleaf_call+0x234>
+     230:       01 00 00 14     b       #4 <nonleaf_call+0x200>
+     234:       68 8e 40 b9     ldr     w8, [x19, #140]
+     238:       09 21 00 11     add     w9, w8, #8
+     23c:       6a 4a 40 f9     ldr     x10, [x19, #144]
+     240:       49 01 00 b9     str     w9, [x10]
+     244:       29 01 00 71     subs    w9, w9, #0
+     248:       69 8a 00 b9     str     w9, [x19, #136]
+     24c:       ec 00 00 54     b.gt    #28 <nonleaf_call+0x234>
+     250:       01 00 00 14     b       #4 <nonleaf_call+0x220>
+     254:       a8 03 5b f8     ldur    x8, [x29, #-80]
+     258:       69 8e 40 b9     ldr     w9, [x19, #140]
+     25c:       08 c1 29 8b     add     x8, x8, w9, sxtw
+     260:       68 42 00 f9     str     x8, [x19, #128]
+     264:       06 00 00 14     b       #24 <nonleaf_call+0x248>
+     268:       a8 83 5a f8     ldur    x8, [x29, #-88]
+     26c:       09 21 00 91     add     x9, x8, #8
+     270:       a9 83 1a f8     stur    x9, [x29, #-88]
+     274:       68 42 00 f9     str     x8, [x19, #128]
+     278:       01 00 00 14     b       #4 <nonleaf_call+0x248>
+     27c:       68 42 40 f9     ldr     x8, [x19, #128]
+     280:       09 01 40 b9     ldr     w9, [x8]
+     284:       a9 c3 1d b8     stur    w9, [x29, #-36]
+     288:       a8 63 01 d1     sub     x8, x29, #88
+     28c:       08 61 00 91     add     x8, x8, #24
+     290:       a9 03 5c b8     ldur    w9, [x29, #-64]
+     294:       ea 03 09 2a     mov     w10, w9
+     298:       68 3e 00 f9     str     x8, [x19, #120]
+     29c:       6a 76 00 b9     str     w10, [x19, #116]
+     2a0:       e9 01 f8 36     tbz     w9, #31, #60 <nonleaf_call+0x2a8>
+     2a4:       01 00 00 14     b       #4 <nonleaf_call+0x274>
+     2a8:       68 76 40 b9     ldr     w8, [x19, #116]
+     2ac:       09 21 00 11     add     w9, w8, #8
+     2b0:       6a 3e 40 f9     ldr     x10, [x19, #120]
+     2b4:       49 01 00 b9     str     w9, [x10]
+     2b8:       29 01 00 71     subs    w9, w9, #0
+     2bc:       69 72 00 b9     str     w9, [x19, #112]
+     2c0:       ec 00 00 54     b.gt    #28 <nonleaf_call+0x2a8>
+     2c4:       01 00 00 14     b       #4 <nonleaf_call+0x294>
+     2c8:       a8 03 5b f8     ldur    x8, [x29, #-80]
+     2cc:       69 76 40 b9     ldr     w9, [x19, #116]
+     2d0:       08 c1 29 8b     add     x8, x8, w9, sxtw
+     2d4:       68 36 00 f9     str     x8, [x19, #104]
+     2d8:       06 00 00 14     b       #24 <nonleaf_call+0x2bc>
+     2dc:       a8 83 5a f8     ldur    x8, [x29, #-88]
+     2e0:       09 21 00 91     add     x9, x8, #8
+     2e4:       a9 83 1a f8     stur    x9, [x29, #-88]
+     2e8:       68 36 00 f9     str     x8, [x19, #104]
+     2ec:       01 00 00 14     b       #4 <nonleaf_call+0x2bc>
+     2f0:       68 36 40 f9     ldr     x8, [x19, #104]
+     2f4:       09 01 40 b9     ldr     w9, [x8]
+     2f8:       a9 83 1d b8     stur    w9, [x29, #-40]
+     2fc:       a8 63 01 d1     sub     x8, x29, #88
+     300:       08 61 00 91     add     x8, x8, #24
+     304:       a9 03 5c b8     ldur    w9, [x29, #-64]
+     308:       ea 03 09 2a     mov     w10, w9
+     30c:       68 32 00 f9     str     x8, [x19, #96]
+     310:       6a 5e 00 b9     str     w10, [x19, #92]
+     314:       e9 01 f8 36     tbz     w9, #31, #60 <nonleaf_call+0x31c>
+     318:       01 00 00 14     b       #4 <nonleaf_call+0x2e8>
+     31c:       68 5e 40 b9     ldr     w8, [x19, #92]
+     320:       09 21 00 11     add     w9, w8, #8
+     324:       6a 32 40 f9     ldr     x10, [x19, #96]
+     328:       49 01 00 b9     str     w9, [x10]
+     32c:       29 01 00 71     subs    w9, w9, #0
+     330:       69 5a 00 b9     str     w9, [x19, #88]
+     334:       ec 00 00 54     b.gt    #28 <nonleaf_call+0x31c>
+     338:       01 00 00 14     b       #4 <nonleaf_call+0x308>
+     33c:       a8 03 5b f8     ldur    x8, [x29, #-80]
+     340:       69 5e 40 b9     ldr     w9, [x19, #92]
+     344:       08 c1 29 8b     add     x8, x8, w9, sxtw
+     348:       68 2a 00 f9     str     x8, [x19, #80]
+     34c:       06 00 00 14     b       #24 <nonleaf_call+0x330>
+     350:       a8 83 5a f8     ldur    x8, [x29, #-88]
+     354:       09 21 00 91     add     x9, x8, #8
+     358:       a9 83 1a f8     stur    x9, [x29, #-88]
+     35c:       68 2a 00 f9     str     x8, [x19, #80]
+     360:       01 00 00 14     b       #4 <nonleaf_call+0x330>
+     364:       68 2a 40 f9     ldr     x8, [x19, #80]
+     368:       09 01 40 b9     ldr     w9, [x8]
+     36c:       a9 43 1d b8     stur    w9, [x29, #-44]
+     370:       a8 63 01 d1     sub     x8, x29, #88
+     374:       08 61 00 91     add     x8, x8, #24
+     378:       a9 03 5c b8     ldur    w9, [x29, #-64]
+     37c:       ea 03 09 2a     mov     w10, w9
+     380:       68 26 00 f9     str     x8, [x19, #72]
+     384:       6a 46 00 b9     str     w10, [x19, #68]
+     388:       e9 01 f8 36     tbz     w9, #31, #60 <nonleaf_call+0x390>
+     38c:       01 00 00 14     b       #4 <nonleaf_call+0x35c>
+     390:       68 46 40 b9     ldr     w8, [x19, #68]
+     394:       09 21 00 11     add     w9, w8, #8
+     398:       6a 26 40 f9     ldr     x10, [x19, #72]
+     39c:       49 01 00 b9     str     w9, [x10]
+     3a0:       29 01 00 71     subs    w9, w9, #0
+     3a4:       69 42 00 b9     str     w9, [x19, #64]
+     3a8:       ec 00 00 54     b.gt    #28 <nonleaf_call+0x390>
+     3ac:       01 00 00 14     b       #4 <nonleaf_call+0x37c>
+     3b0:       a8 03 5b f8     ldur    x8, [x29, #-80]
+     3b4:       69 46 40 b9     ldr     w9, [x19, #68]
+     3b8:       08 c1 29 8b     add     x8, x8, w9, sxtw
+     3bc:       68 1e 00 f9     str     x8, [x19, #56]
+     3c0:       06 00 00 14     b       #24 <nonleaf_call+0x3a4>
+     3c4:       a8 83 5a f8     ldur    x8, [x29, #-88]
+     3c8:       09 21 00 91     add     x9, x8, #8
+     3cc:       a9 83 1a f8     stur    x9, [x29, #-88]
+     3d0:       68 1e 00 f9     str     x8, [x19, #56]
+     3d4:       01 00 00 14     b       #4 <nonleaf_call+0x3a4>
+     3d8:       68 1e 40 f9     ldr     x8, [x19, #56]
+     3dc:       09 01 40 b9     ldr     w9, [x8]
+     3e0:       a9 03 1d b8     stur    w9, [x29, #-48]
+     3e4:       a8 63 01 d1     sub     x8, x29, #88
+     3e8:       08 61 00 91     add     x8, x8, #24
+     3ec:       a9 03 5c b8     ldur    w9, [x29, #-64]
+     3f0:       ea 03 09 2a     mov     w10, w9
+     3f4:       68 1a 00 f9     str     x8, [x19, #48]
+     3f8:       6a 2e 00 b9     str     w10, [x19, #44]
+     3fc:       e9 01 f8 36     tbz     w9, #31, #60 <nonleaf_call+0x404>
+     400:       01 00 00 14     b       #4 <nonleaf_call+0x3d0>
+     404:       68 2e 40 b9     ldr     w8, [x19, #44]
+     408:       09 21 00 11     add     w9, w8, #8
+     40c:       6a 1a 40 f9     ldr     x10, [x19, #48]
+     410:       49 01 00 b9     str     w9, [x10]
+     414:       29 01 00 71     subs    w9, w9, #0
+     418:       69 2a 00 b9     str     w9, [x19, #40]
+     41c:       ec 00 00 54     b.gt    #28 <nonleaf_call+0x404>
+     420:       01 00 00 14     b       #4 <nonleaf_call+0x3f0>
+     424:       a8 03 5b f8     ldur    x8, [x29, #-80]
+     428:       69 2e 40 b9     ldr     w9, [x19, #44]
+     42c:       08 c1 29 8b     add     x8, x8, w9, sxtw
+     430:       68 12 00 f9     str     x8, [x19, #32]
+     434:       06 00 00 14     b       #24 <nonleaf_call+0x418>
+     438:       a8 83 5a f8     ldur    x8, [x29, #-88]
+     43c:       09 21 00 91     add     x9, x8, #8
+     440:       a9 83 1a f8     stur    x9, [x29, #-88]
+     444:       68 12 00 f9     str     x8, [x19, #32]
+     448:       01 00 00 14     b       #4 <nonleaf_call+0x418>
+     44c:       68 12 40 f9     ldr     x8, [x19, #32]
+     450:       09 01 40 b9     ldr     w9, [x8]
+     454:       a9 c3 1c b8     stur    w9, [x29, #-52]
+     458:       a8 63 01 d1     sub     x8, x29, #88
+     45c:       08 61 00 91     add     x8, x8, #24
+     460:       a9 03 5c b8     ldur    w9, [x29, #-64]
+     464:       ea 03 09 2a     mov     w10, w9
+     468:       68 0e 00 f9     str     x8, [x19, #24]
+     46c:       6a 16 00 b9     str     w10, [x19, #20]
+     470:       e9 01 f8 36     tbz     w9, #31, #60 <nonleaf_call+0x478>
+     474:       01 00 00 14     b       #4 <nonleaf_call+0x444>
+     478:       68 16 40 b9     ldr     w8, [x19, #20]
+     47c:       09 21 00 11     add     w9, w8, #8
+     480:       6a 0e 40 f9     ldr     x10, [x19, #24]
+     484:       49 01 00 b9     str     w9, [x10]
+     488:       29 01 00 71     subs    w9, w9, #0
+     48c:       69 12 00 b9     str     w9, [x19, #16]
+     490:       ec 00 00 54     b.gt    #28 <nonleaf_call+0x478>
+     494:       01 00 00 14     b       #4 <nonleaf_call+0x464>
+     498:       a8 03 5b f8     ldur    x8, [x29, #-80]
+     49c:       69 16 40 b9     ldr     w9, [x19, #20]
+     4a0:       08 c1 29 8b     add     x8, x8, w9, sxtw
+     4a4:       68 06 00 f9     str     x8, [x19, #8]
+     4a8:       06 00 00 14     b       #24 <nonleaf_call+0x48c>
+     4ac:       a8 83 5a f8     ldur    x8, [x29, #-88]
+     4b0:       09 21 00 91     add     x9, x8, #8
+     4b4:       a9 83 1a f8     stur    x9, [x29, #-88]
+     4b8:       68 06 00 f9     str     x8, [x19, #8]
+     4bc:       01 00 00 14     b       #4 <nonleaf_call+0x48c>
+     4c0:       68 06 40 f9     ldr     x8, [x19, #8]
+     4c4:       09 01 40 b9     ldr     w9, [x8]
+     4c8:       a9 83 1c b8     stur    w9, [x29, #-56]
+     4cc:       e8 03 00 91     mov     x8, sp
+     4d0:       0a 81 03 f1     subs    x10, x8, #224
+     4d4:       5f 01 00 91     mov     sp, x10
+     4d8:       89 09 80 52     mov     w9, #76
+     4dc:       09 01 12 38     sturb   w9, [x8, #-224]
+     4e0:       a0 83 5e b8     ldur    w0, [x29, #-24]
+     4e4:       a1 43 5e b8     ldur    w1, [x29, #-28]
+     4e8:       a2 03 5e b8     ldur    w2, [x29, #-32]
+     4ec:       a3 c3 5d b8     ldur    w3, [x29, #-36]
+     4f0:       a4 83 5d b8     ldur    w4, [x29, #-40]
+     4f4:       a5 43 5d b8     ldur    w5, [x29, #-44]
+     4f8:       a6 03 5d b8     ldur    w6, [x29, #-48]
+     4fc:       a7 c3 5c b8     ldur    w7, [x29, #-52]
+     500:       a9 83 5c b8     ldur    w9, [x29, #-56]
+     504:       ff 43 00 d1     sub     sp, sp, #16
+     508:       e8 03 00 91     mov     x8, sp
+     50c:       09 01 00 b9     str     w9, [x8]
+     510:       bc fe ff 97     bl      #-1296 <leaf_call>
+     514:       ff 43 00 91     add     sp, sp, #16
+     518:       bf 43 00 d1     sub     sp, x29, #16
+     51c:       fd 7b 41 a9     ldp     x29, x30, [sp, #16]
+     520:       fc 4f c2 a8     ldp     x28, x19, [sp], #32
+     524:       c0 03 5f d6     ret
+
+0000000000000528 main:
+     528:       ff c3 00 d1     sub     sp, sp, #48
+     52c:       fd 7b 02 a9     stp     x29, x30, [sp, #32]
+     530:       fd 83 00 91     add     x29, sp, #32
+     534:       08 00 80 52     mov     w8, #0
+     538:       e1 03 00 32     orr     w1, wzr, #0x1
+     53c:       e2 03 1f 32     orr     w2, wzr, #0x2
+     540:       e3 07 00 32     orr     w3, wzr, #0x3
+     544:       e4 03 1e 32     orr     w4, wzr, #0x4
+     548:       a5 00 80 52     mov     w5, #5
+     54c:       e6 07 1f 32     orr     w6, wzr, #0x6
+     550:       e7 0b 00 32     orr     w7, wzr, #0x7
+     554:       e9 03 1d 32     orr     w9, wzr, #0x8
+     558:       2a 01 80 52     mov     w10, #9
+     55c:       bf c3 1f b8     stur    wzr, [x29, #-4]
+     560:       e0 03 08 2a     mov     w0, w8
+     564:       eb 03 00 91     mov     x11, sp
+     568:       69 01 00 b9     str     w9, [x11]
+     56c:       eb 03 00 91     mov     x11, sp
+     570:       6a 09 00 b9     str     w10, [x11, #8]
+     574:       a8 83 1f b8     stur    w8, [x29, #-8]
+     578:       af fe ff 97     bl      #-1348 <nonleaf_call>
+     57c:       a0 83 5f b8     ldur    w0, [x29, #-8]
+     580:       fd 7b 42 a9     ldp     x29, x30, [sp, #32]
+     584:       ff c3 00 91     add     sp, sp, #48
+     588:       c0 03 5f d6     ret
+
+
+
+; ---------- for spilling with a float ---------->
+
+; #include <stdlib.h>
+; #include <stdarg.h>
+; 
+; void leaf_call(int b, int c, int d, int e, float f, int g, int h, int i, int j)
+; {
+; }
+; 
+; void nonleaf_call(int a, ...)
+; {
+;     int b,c,d,e,g,h,i,j;
+;     float f;
+;     va_list ap;
+;     va_start(ap, a);
+;     b = va_arg(ap, int);
+;     c = va_arg(ap, int);
+;     d = va_arg(ap, int);
+;     e = va_arg(ap, int);
+;     f = va_arg(ap, float);
+;     g = va_arg(ap, int);
+;     h = va_arg(ap, int);
+;     i = va_arg(ap, int);
+;     j = va_arg(ap, int);
+; 
+;     /* use some local data */
+;     *(char*)alloca(220) = 'L';
+;     leaf_call(b, c, d, e, f, g, h, i, j);
+; }
+; 
+; int main()
+; {
+;     nonleaf_call(0, 1, 2, 3, 4, 5.f, 6, 7, 8, 9);
+;     return 0;
+; }
+
+
+
+; output from freebsd-13.0_r348764-arm64 w/ clang 8.0.0
+
+0000000000000000 leaf_call:
+       0:       ff c3 00 d1     sub     sp, sp, #48
+       4:       e0 2f 00 b9     str     w0, [sp, #44]
+       8:       e1 2b 00 b9     str     w1, [sp, #40]
+       c:       e2 27 00 b9     str     w2, [sp, #36]
+      10:       e3 23 00 b9     str     w3, [sp, #32]
+      14:       e0 1f 00 bd     str     s0, [sp, #28]
+      18:       e4 1b 00 b9     str     w4, [sp, #24]
+      1c:       e5 17 00 b9     str     w5, [sp, #20]
+      20:       e6 13 00 b9     str     w6, [sp, #16]
+      24:       e7 0f 00 b9     str     w7, [sp, #12]
+      28:       ff c3 00 91     add     sp, sp, #48
+      2c:       c0 03 5f d6     ret
+
+0000000000000030 nonleaf_call:
+      30:       fc 4f be a9     stp     x28, x19, [sp, #-32]!
+      34:       fd 7b 01 a9     stp     x29, x30, [sp, #16]
+      38:       fd 43 00 91     add     x29, sp, #16
+      3c:       ff 83 07 d1     sub     sp, sp, #480
+      40:       f3 03 00 91     mov     x19, sp
+      44:       67 56 80 3d     str     q7, [x19, #336]
+      48:       66 52 80 3d     str     q6, [x19, #320]
+      4c:       65 4e 80 3d     str     q5, [x19, #304]
+      50:       64 4a 80 3d     str     q4, [x19, #288]
+      54:       63 46 80 3d     str     q3, [x19, #272]
+      58:       62 42 80 3d     str     q2, [x19, #256]
+      5c:       61 3e 80 3d     str     q1, [x19, #240]
+      60:       60 3a 80 3d     str     q0, [x19, #224]
+      64:       a7 03 1a f8     stur    x7, [x29, #-96]
+      68:       a6 83 19 f8     stur    x6, [x29, #-104]
+      6c:       a5 03 19 f8     stur    x5, [x29, #-112]
+      70:       a4 83 18 f8     stur    x4, [x29, #-120]
+      74:       a3 03 18 f8     stur    x3, [x29, #-128]
+      78:       a2 83 17 f8     stur    x2, [x29, #-136]
+      7c:       a1 03 17 f8     stur    x1, [x29, #-144]
+      80:       a0 c3 1e b8     stur    w0, [x29, #-20]
+      84:       e0 63 19 32     orr     w0, wzr, #0xffffff80
+      88:       a0 43 1c b8     stur    w0, [x29, #-60]
+      8c:       e0 06 80 12     mov     w0, #-56
+      90:       a0 03 1c b8     stur    w0, [x29, #-64]
+      94:       61 82 03 91     add     x1, x19, #224
+      98:       21 00 02 91     add     x1, x1, #128
+      9c:       a1 83 1b f8     stur    x1, [x29, #-72]
+      a0:       a1 43 02 d1     sub     x1, x29, #144
+      a4:       21 e0 00 91     add     x1, x1, #56
+      a8:       a1 03 1b f8     stur    x1, [x29, #-80]
+      ac:       a1 43 00 91     add     x1, x29, #16
+      b0:       a1 83 1a f8     stur    x1, [x29, #-88]
+      b4:       a1 63 01 d1     sub     x1, x29, #88
+      b8:       21 60 00 91     add     x1, x1, #24
+      bc:       a0 03 5c b8     ldur    w0, [x29, #-64]
+      c0:       e8 03 00 2a     mov     w8, w0
+      c4:       61 6e 00 f9     str     x1, [x19, #216]
+      c8:       68 d6 00 b9     str     w8, [x19, #212]
+      cc:       e0 01 f8 36     tbz     w0, #31, #60 <nonleaf_call+0xd8>
+      d0:       01 00 00 14     b       #4 <nonleaf_call+0xa4>
+      d4:       68 d6 40 b9     ldr     w8, [x19, #212]
+      d8:       09 21 00 11     add     w9, w8, #8
+      dc:       6a 6e 40 f9     ldr     x10, [x19, #216]
+      e0:       49 01 00 b9     str     w9, [x10]
+      e4:       29 01 00 71     subs    w9, w9, #0
+      e8:       69 d2 00 b9     str     w9, [x19, #208]
+      ec:       ec 00 00 54     b.gt    #28 <nonleaf_call+0xd8>
+      f0:       01 00 00 14     b       #4 <nonleaf_call+0xc4>
+      f4:       a8 03 5b f8     ldur    x8, [x29, #-80]
+      f8:       69 d6 40 b9     ldr     w9, [x19, #212]
+      fc:       08 c1 29 8b     add     x8, x8, w9, sxtw
+     100:       68 66 00 f9     str     x8, [x19, #200]
+     104:       06 00 00 14     b       #24 <nonleaf_call+0xec>
+     108:       a8 83 5a f8     ldur    x8, [x29, #-88]
+     10c:       09 21 00 91     add     x9, x8, #8
+     110:       a9 83 1a f8     stur    x9, [x29, #-88]
+     114:       68 66 00 f9     str     x8, [x19, #200]
+     118:       01 00 00 14     b       #4 <nonleaf_call+0xec>
+     11c:       68 66 40 f9     ldr     x8, [x19, #200]
+     120:       09 01 40 b9     ldr     w9, [x8]
+     124:       a9 83 1e b8     stur    w9, [x29, #-24]
+     128:       a8 63 01 d1     sub     x8, x29, #88
+     12c:       08 61 00 91     add     x8, x8, #24
+     130:       a9 03 5c b8     ldur    w9, [x29, #-64]
+     134:       ea 03 09 2a     mov     w10, w9
+     138:       68 62 00 f9     str     x8, [x19, #192]
+     13c:       6a be 00 b9     str     w10, [x19, #188]
+     140:       e9 01 f8 36     tbz     w9, #31, #60 <nonleaf_call+0x14c>
+     144:       01 00 00 14     b       #4 <nonleaf_call+0x118>
+     148:       68 be 40 b9     ldr     w8, [x19, #188]
+     14c:       09 21 00 11     add     w9, w8, #8
+     150:       6a 62 40 f9     ldr     x10, [x19, #192]
+     154:       49 01 00 b9     str     w9, [x10]
+     158:       29 01 00 71     subs    w9, w9, #0
+     15c:       69 ba 00 b9     str     w9, [x19, #184]
+     160:       ec 00 00 54     b.gt    #28 <nonleaf_call+0x14c>
+     164:       01 00 00 14     b       #4 <nonleaf_call+0x138>
+     168:       a8 03 5b f8     ldur    x8, [x29, #-80]
+     16c:       69 be 40 b9     ldr     w9, [x19, #188]
+     170:       08 c1 29 8b     add     x8, x8, w9, sxtw
+     174:       68 5a 00 f9     str     x8, [x19, #176]
+     178:       06 00 00 14     b       #24 <nonleaf_call+0x160>
+     17c:       a8 83 5a f8     ldur    x8, [x29, #-88]
+     180:       09 21 00 91     add     x9, x8, #8
+     184:       a9 83 1a f8     stur    x9, [x29, #-88]
+     188:       68 5a 00 f9     str     x8, [x19, #176]
+     18c:       01 00 00 14     b       #4 <nonleaf_call+0x160>
+     190:       68 5a 40 f9     ldr     x8, [x19, #176]
+     194:       09 01 40 b9     ldr     w9, [x8]
+     198:       a9 43 1e b8     stur    w9, [x29, #-28]
+     19c:       a8 63 01 d1     sub     x8, x29, #88
+     1a0:       08 61 00 91     add     x8, x8, #24
+     1a4:       a9 03 5c b8     ldur    w9, [x29, #-64]
+     1a8:       ea 03 09 2a     mov     w10, w9
+     1ac:       68 56 00 f9     str     x8, [x19, #168]
+     1b0:       6a a6 00 b9     str     w10, [x19, #164]
+     1b4:       e9 01 f8 36     tbz     w9, #31, #60 <nonleaf_call+0x1c0>
+     1b8:       01 00 00 14     b       #4 <nonleaf_call+0x18c>
+     1bc:       68 a6 40 b9     ldr     w8, [x19, #164]
+     1c0:       09 21 00 11     add     w9, w8, #8
+     1c4:       6a 56 40 f9     ldr     x10, [x19, #168]
+     1c8:       49 01 00 b9     str     w9, [x10]
+     1cc:       29 01 00 71     subs    w9, w9, #0
+     1d0:       69 a2 00 b9     str     w9, [x19, #160]
+     1d4:       ec 00 00 54     b.gt    #28 <nonleaf_call+0x1c0>
+     1d8:       01 00 00 14     b       #4 <nonleaf_call+0x1ac>
+     1dc:       a8 03 5b f8     ldur    x8, [x29, #-80]
+     1e0:       69 a6 40 b9     ldr     w9, [x19, #164]
+     1e4:       08 c1 29 8b     add     x8, x8, w9, sxtw
+     1e8:       68 4e 00 f9     str     x8, [x19, #152]
+     1ec:       06 00 00 14     b       #24 <nonleaf_call+0x1d4>
+     1f0:       a8 83 5a f8     ldur    x8, [x29, #-88]
+     1f4:       09 21 00 91     add     x9, x8, #8
+     1f8:       a9 83 1a f8     stur    x9, [x29, #-88]
+     1fc:       68 4e 00 f9     str     x8, [x19, #152]
+     200:       01 00 00 14     b       #4 <nonleaf_call+0x1d4>
+     204:       68 4e 40 f9     ldr     x8, [x19, #152]
+     208:       09 01 40 b9     ldr     w9, [x8]
+     20c:       a9 03 1e b8     stur    w9, [x29, #-32]
+     210:       a8 63 01 d1     sub     x8, x29, #88
+     214:       08 61 00 91     add     x8, x8, #24
+     218:       a9 03 5c b8     ldur    w9, [x29, #-64]
+     21c:       ea 03 09 2a     mov     w10, w9
+     220:       68 4a 00 f9     str     x8, [x19, #144]
+     224:       6a 8e 00 b9     str     w10, [x19, #140]
+     228:       e9 01 f8 36     tbz     w9, #31, #60 <nonleaf_call+0x234>
+     22c:       01 00 00 14     b       #4 <nonleaf_call+0x200>
+     230:       68 8e 40 b9     ldr     w8, [x19, #140]
+     234:       09 21 00 11     add     w9, w8, #8
+     238:       6a 4a 40 f9     ldr     x10, [x19, #144]
+     23c:       49 01 00 b9     str     w9, [x10]
+     240:       29 01 00 71     subs    w9, w9, #0
+     244:       69 8a 00 b9     str     w9, [x19, #136]
+     248:       ec 00 00 54     b.gt    #28 <nonleaf_call+0x234>
+     24c:       01 00 00 14     b       #4 <nonleaf_call+0x220>
+     250:       a8 03 5b f8     ldur    x8, [x29, #-80]
+     254:       69 8e 40 b9     ldr     w9, [x19, #140]
+     258:       08 c1 29 8b     add     x8, x8, w9, sxtw
+     25c:       68 42 00 f9     str     x8, [x19, #128]
+     260:       06 00 00 14     b       #24 <nonleaf_call+0x248>
+     264:       a8 83 5a f8     ldur    x8, [x29, #-88]
+     268:       09 21 00 91     add     x9, x8, #8
+     26c:       a9 83 1a f8     stur    x9, [x29, #-88]
+     270:       68 42 00 f9     str     x8, [x19, #128]
+     274:       01 00 00 14     b       #4 <nonleaf_call+0x248>
+     278:       68 42 40 f9     ldr     x8, [x19, #128]
+     27c:       09 01 40 b9     ldr     w9, [x8]
+     280:       a9 c3 1d b8     stur    w9, [x29, #-36]
+     284:       a8 63 01 d1     sub     x8, x29, #88
+     288:       08 71 00 91     add     x8, x8, #28
+     28c:       a9 43 5c b8     ldur    w9, [x29, #-60]
+     290:       ea 03 09 2a     mov     w10, w9
+     294:       68 3e 00 f9     str     x8, [x19, #120]
+     298:       6a 76 00 b9     str     w10, [x19, #116]
+     29c:       e9 01 f8 36     tbz     w9, #31, #60 <nonleaf_call+0x2a8>
+     2a0:       01 00 00 14     b       #4 <nonleaf_call+0x274>
+     2a4:       68 76 40 b9     ldr     w8, [x19, #116]
+     2a8:       09 41 00 11     add     w9, w8, #16
+     2ac:       6a 3e 40 f9     ldr     x10, [x19, #120]
+     2b0:       49 01 00 b9     str     w9, [x10]
+     2b4:       29 01 00 71     subs    w9, w9, #0
+     2b8:       69 72 00 b9     str     w9, [x19, #112]
+     2bc:       ec 00 00 54     b.gt    #28 <nonleaf_call+0x2a8>
+     2c0:       01 00 00 14     b       #4 <nonleaf_call+0x294>
+     2c4:       a8 83 5b f8     ldur    x8, [x29, #-72]
+     2c8:       69 76 40 b9     ldr     w9, [x19, #116]
+     2cc:       08 c1 29 8b     add     x8, x8, w9, sxtw
+     2d0:       68 36 00 f9     str     x8, [x19, #104]
+     2d4:       06 00 00 14     b       #24 <nonleaf_call+0x2bc>
+     2d8:       a8 83 5a f8     ldur    x8, [x29, #-88]
+     2dc:       09 21 00 91     add     x9, x8, #8
+     2e0:       a9 83 1a f8     stur    x9, [x29, #-88]
+     2e4:       68 36 00 f9     str     x8, [x19, #104]
+     2e8:       01 00 00 14     b       #4 <nonleaf_call+0x2bc>
+     2ec:       68 36 40 f9     ldr     x8, [x19, #104]
+     2f0:       00 01 40 bd     ldr     s0, [x8]
+     2f4:       a0 83 1c bc     stur    s0, [x29, #-56]
+     2f8:       a8 63 01 d1     sub     x8, x29, #88
+     2fc:       08 61 00 91     add     x8, x8, #24
+     300:       a9 03 5c b8     ldur    w9, [x29, #-64]
+     304:       ea 03 09 2a     mov     w10, w9
+     308:       68 32 00 f9     str     x8, [x19, #96]
+     30c:       6a 5e 00 b9     str     w10, [x19, #92]
+     310:       e9 01 f8 36     tbz     w9, #31, #60 <nonleaf_call+0x31c>
+     314:       01 00 00 14     b       #4 <nonleaf_call+0x2e8>
+     318:       68 5e 40 b9     ldr     w8, [x19, #92]
+     31c:       09 21 00 11     add     w9, w8, #8
+     320:       6a 32 40 f9     ldr     x10, [x19, #96]
+     324:       49 01 00 b9     str     w9, [x10]
+     328:       29 01 00 71     subs    w9, w9, #0
+     32c:       69 5a 00 b9     str     w9, [x19, #88]
+     330:       ec 00 00 54     b.gt    #28 <nonleaf_call+0x31c>
+     334:       01 00 00 14     b       #4 <nonleaf_call+0x308>
+     338:       a8 03 5b f8     ldur    x8, [x29, #-80]
+     33c:       69 5e 40 b9     ldr     w9, [x19, #92]
+     340:       08 c1 29 8b     add     x8, x8, w9, sxtw
+     344:       68 2a 00 f9     str     x8, [x19, #80]
+     348:       06 00 00 14     b       #24 <nonleaf_call+0x330>
+     34c:       a8 83 5a f8     ldur    x8, [x29, #-88]
+     350:       09 21 00 91     add     x9, x8, #8
+     354:       a9 83 1a f8     stur    x9, [x29, #-88]
+     358:       68 2a 00 f9     str     x8, [x19, #80]
+     35c:       01 00 00 14     b       #4 <nonleaf_call+0x330>
+     360:       68 2a 40 f9     ldr     x8, [x19, #80]
+     364:       09 01 40 b9     ldr     w9, [x8]
+     368:       a9 83 1d b8     stur    w9, [x29, #-40]
+     36c:       a8 63 01 d1     sub     x8, x29, #88
+     370:       08 61 00 91     add     x8, x8, #24
+     374:       a9 03 5c b8     ldur    w9, [x29, #-64]
+     378:       ea 03 09 2a     mov     w10, w9
+     37c:       68 26 00 f9     str     x8, [x19, #72]
+     380:       6a 46 00 b9     str     w10, [x19, #68]
+     384:       e9 01 f8 36     tbz     w9, #31, #60 <nonleaf_call+0x390>
+     388:       01 00 00 14     b       #4 <nonleaf_call+0x35c>
+     38c:       68 46 40 b9     ldr     w8, [x19, #68]
+     390:       09 21 00 11     add     w9, w8, #8
+     394:       6a 26 40 f9     ldr     x10, [x19, #72]
+     398:       49 01 00 b9     str     w9, [x10]
+     39c:       29 01 00 71     subs    w9, w9, #0
+     3a0:       69 42 00 b9     str     w9, [x19, #64]
+     3a4:       ec 00 00 54     b.gt    #28 <nonleaf_call+0x390>
+     3a8:       01 00 00 14     b       #4 <nonleaf_call+0x37c>
+     3ac:       a8 03 5b f8     ldur    x8, [x29, #-80]
+     3b0:       69 46 40 b9     ldr     w9, [x19, #68]
+     3b4:       08 c1 29 8b     add     x8, x8, w9, sxtw
+     3b8:       68 1e 00 f9     str     x8, [x19, #56]
+     3bc:       06 00 00 14     b       #24 <nonleaf_call+0x3a4>
+     3c0:       a8 83 5a f8     ldur    x8, [x29, #-88]
+     3c4:       09 21 00 91     add     x9, x8, #8
+     3c8:       a9 83 1a f8     stur    x9, [x29, #-88]
+     3cc:       68 1e 00 f9     str     x8, [x19, #56]
+     3d0:       01 00 00 14     b       #4 <nonleaf_call+0x3a4>
+     3d4:       68 1e 40 f9     ldr     x8, [x19, #56]
+     3d8:       09 01 40 b9     ldr     w9, [x8]
+     3dc:       a9 43 1d b8     stur    w9, [x29, #-44]
+     3e0:       a8 63 01 d1     sub     x8, x29, #88
+     3e4:       08 61 00 91     add     x8, x8, #24
+     3e8:       a9 03 5c b8     ldur    w9, [x29, #-64]
+     3ec:       ea 03 09 2a     mov     w10, w9
+     3f0:       68 1a 00 f9     str     x8, [x19, #48]
+     3f4:       6a 2e 00 b9     str     w10, [x19, #44]
+     3f8:       e9 01 f8 36     tbz     w9, #31, #60 <nonleaf_call+0x404>
+     3fc:       01 00 00 14     b       #4 <nonleaf_call+0x3d0>
+     400:       68 2e 40 b9     ldr     w8, [x19, #44]
+     404:       09 21 00 11     add     w9, w8, #8
+     408:       6a 1a 40 f9     ldr     x10, [x19, #48]
+     40c:       49 01 00 b9     str     w9, [x10]
+     410:       29 01 00 71     subs    w9, w9, #0
+     414:       69 2a 00 b9     str     w9, [x19, #40]
+     418:       ec 00 00 54     b.gt    #28 <nonleaf_call+0x404>
+     41c:       01 00 00 14     b       #4 <nonleaf_call+0x3f0>
+     420:       a8 03 5b f8     ldur    x8, [x29, #-80]
+     424:       69 2e 40 b9     ldr     w9, [x19, #44]
+     428:       08 c1 29 8b     add     x8, x8, w9, sxtw
+     42c:       68 12 00 f9     str     x8, [x19, #32]
+     430:       06 00 00 14     b       #24 <nonleaf_call+0x418>
+     434:       a8 83 5a f8     ldur    x8, [x29, #-88]
+     438:       09 21 00 91     add     x9, x8, #8
+     43c:       a9 83 1a f8     stur    x9, [x29, #-88]
+     440:       68 12 00 f9     str     x8, [x19, #32]
+     444:       01 00 00 14     b       #4 <nonleaf_call+0x418>
+     448:       68 12 40 f9     ldr     x8, [x19, #32]
+     44c:       09 01 40 b9     ldr     w9, [x8]
+     450:       a9 03 1d b8     stur    w9, [x29, #-48]
+     454:       a8 63 01 d1     sub     x8, x29, #88
+     458:       08 61 00 91     add     x8, x8, #24
+     45c:       a9 03 5c b8     ldur    w9, [x29, #-64]
+     460:       ea 03 09 2a     mov     w10, w9
+     464:       68 0e 00 f9     str     x8, [x19, #24]
+     468:       6a 16 00 b9     str     w10, [x19, #20]
+     46c:       e9 01 f8 36     tbz     w9, #31, #60 <nonleaf_call+0x478>
+     470:       01 00 00 14     b       #4 <nonleaf_call+0x444>
+     474:       68 16 40 b9     ldr     w8, [x19, #20]
+     478:       09 21 00 11     add     w9, w8, #8
+     47c:       6a 0e 40 f9     ldr     x10, [x19, #24]
+     480:       49 01 00 b9     str     w9, [x10]
+     484:       29 01 00 71     subs    w9, w9, #0
+     488:       69 12 00 b9     str     w9, [x19, #16]
+     48c:       ec 00 00 54     b.gt    #28 <nonleaf_call+0x478>
+     490:       01 00 00 14     b       #4 <nonleaf_call+0x464>
+     494:       a8 03 5b f8     ldur    x8, [x29, #-80]
+     498:       69 16 40 b9     ldr     w9, [x19, #20]
+     49c:       08 c1 29 8b     add     x8, x8, w9, sxtw
+     4a0:       68 06 00 f9     str     x8, [x19, #8]
+     4a4:       06 00 00 14     b       #24 <nonleaf_call+0x48c>
+     4a8:       a8 83 5a f8     ldur    x8, [x29, #-88]
+     4ac:       09 21 00 91     add     x9, x8, #8
+     4b0:       a9 83 1a f8     stur    x9, [x29, #-88]
+     4b4:       68 06 00 f9     str     x8, [x19, #8]
+     4b8:       01 00 00 14     b       #4 <nonleaf_call+0x48c>
+     4bc:       68 06 40 f9     ldr     x8, [x19, #8]
+     4c0:       09 01 40 b9     ldr     w9, [x8]
+     4c4:       a9 c3 1c b8     stur    w9, [x29, #-52]
+     4c8:       e8 03 00 91     mov     x8, sp
+     4cc:       0a 81 03 f1     subs    x10, x8, #224
+     4d0:       5f 01 00 91     mov     sp, x10
+     4d4:       89 09 80 52     mov     w9, #76
+     4d8:       09 01 12 38     sturb   w9, [x8, #-224]
+     4dc:       a0 83 5e b8     ldur    w0, [x29, #-24]
+     4e0:       a1 43 5e b8     ldur    w1, [x29, #-28]
+     4e4:       a2 03 5e b8     ldur    w2, [x29, #-32]
+     4e8:       a3 c3 5d b8     ldur    w3, [x29, #-36]
+     4ec:       a0 83 5c bc     ldur    s0, [x29, #-56]
+     4f0:       a4 83 5d b8     ldur    w4, [x29, #-40]
+     4f4:       a5 43 5d b8     ldur    w5, [x29, #-44]
+     4f8:       a6 03 5d b8     ldur    w6, [x29, #-48]
+     4fc:       a7 c3 5c b8     ldur    w7, [x29, #-52]
+     500:       c0 fe ff 97     bl      #-1280 <leaf_call>
+     504:       bf 43 00 d1     sub     sp, x29, #16
+     508:       fd 7b 41 a9     ldp     x29, x30, [sp, #16]
+     50c:       fc 4f c2 a8     ldp     x28, x19, [sp], #32
+     510:       c0 03 5f d6     ret
+
+0000000000000514 main:
+     514:       ff 83 00 d1     sub     sp, sp, #32
+     518:       fd 7b 01 a9     stp     x29, x30, [sp, #16]
+     51c:       fd 43 00 91     add     x29, sp, #16
+     520:       08 00 80 52     mov     w8, #0
+     524:       e1 03 00 32     orr     w1, wzr, #0x1
+     528:       e2 03 1f 32     orr     w2, wzr, #0x2
+     52c:       e3 07 00 32     orr     w3, wzr, #0x3
+     530:       e4 03 1e 32     orr     w4, wzr, #0x4
+     534:       89 02 e8 d2     mov     x9, #4617315517961601024
+     538:       20 01 67 9e     fmov    d0, x9
+     53c:       e5 07 1f 32     orr     w5, wzr, #0x6
+     540:       e6 0b 00 32     orr     w6, wzr, #0x7
+     544:       e7 03 1d 32     orr     w7, wzr, #0x8
+     548:       2a 01 80 52     mov     w10, #9
+     54c:       bf c3 1f b8     stur    wzr, [x29, #-4]
+     550:       e0 03 08 2a     mov     w0, w8
+     554:       e9 03 00 91     mov     x9, sp
+     558:       2a 01 00 b9     str     w10, [x9]
+     55c:       e8 0b 00 b9     str     w8, [sp, #8]
+     560:       b4 fe ff 97     bl      #-1328 <nonleaf_call>
+     564:       e0 0b 40 b9     ldr     w0, [sp, #8]
+     568:       fd 7b 41 a9     ldp     x29, x30, [sp, #16]
+     56c:       ff 83 00 91     add     sp, sp, #32
+     570:       c0 03 5f d6     ret
+
+; vim: ft=asm68k
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/doc/disas_examples/mips.eabi.disas	Fri Nov 22 23:08:59 2019 +0100
@@ -0,0 +1,613 @@
+; #include <stdlib.h>
+; 
+; void leaf_call(int b, int c, int d, int e, int f, int g, int h)
+; {
+; }
+; 
+; void nonleaf_call(int a, int b, int c, int d, int e, int f, int g, int h)
+; {
+;     /* use some local data */
+;     *(char*)alloca(220) = 'L';
+;     leaf_call(b, c, d, e, f, g, h);
+; }
+; 
+; int main()
+; {
+;     nonleaf_call(0, 1, 2, 3, 4, 5, 6, 7);
+;     return 0;
+; }
+
+
+
+; output from psptoolchain-20111215-psp w/ gcc 4.9.3
+
+00000000 <leaf_call>:
+   0:   27bdffd8        addiu   sp,sp,-40
+   4:   afbe0024        sw      s8,36(sp)
+   8:   03a0f021        move    s8,sp
+   c:   afc40000        sw      a0,0(s8)
+  10:   afc50004        sw      a1,4(s8)
+  14:   afc60008        sw      a2,8(s8)
+  18:   afc7000c        sw      a3,12(s8)
+  1c:   afc80010        sw      t0,16(s8)
+  20:   afc90014        sw      t1,20(s8)
+  24:   afca0018        sw      t2,24(s8)
+  28:   03c0e821        move    sp,s8
+  2c:   8fbe0024        lw      s8,36(sp)
+  30:   27bd0028        addiu   sp,sp,40
+  34:   03e00008        jr      ra
+  38:   00000000        nop
+
+0000003c <nonleaf_call>:
+  3c:   27bdffd8        addiu   sp,sp,-40     ; |
+  40:   afbf0024        sw      ra,36(sp)     ; |
+  44:   afbe0020        sw      s8,32(sp)     ; | prolog
+  48:   03a0f021        move    s8,sp         ; /         frame pointer (note: with offset to frame start, but static compared to sp)
+  4c:   afc40000        sw      a0,0(s8)      ; \
+  50:   afc50004        sw      a1,4(s8)      ; |
+  54:   afc60008        sw      a2,8(s8)      ; |
+  58:   afc7000c        sw      a3,12(s8)     ; |
+  5c:   afc80010        sw      t0,16(s8)     ; | in args 0,1,2,3,4,5,6,7 -> temp space in local area
+  60:   afc90014        sw      t1,20(s8)     ; |
+  64:   afca0018        sw      t2,24(s8)     ; |
+  68:   afcb001c        sw      t3,28(s8)     ; |
+  6c:   27bdff18        addiu   sp,sp,-232    ; alloca(220) - with padding to guarantee alignment
+  70:   03a01021        move    v0,sp         ; |
+  74:   24420007        addiu   v0,v0,7       ; |
+  78:   000210c2        srl     v0,v0,0x3     ; | start of alloca()'d memory -> v1, by ...
+  7c:   000210c0        sll     v0,v0,0x3     ; | ... using v0 as helper to align to 8b
+  80:   00401821        move    v1,v0         ; |
+  84:   2402004c        li      v0,76         ; 'L' -> v0, and...
+  88:   a0620000        sb      v0,0(v1)      ; ... store in local area (of alloca()'d space)
+  8c:   8fc40004        lw      a0,4(s8)      ; |
+  90:   8fc50008        lw      a1,8(s8)      ; |
+  94:   8fc6000c        lw      a2,12(s8)     ; |
+  98:   8fc70010        lw      a3,16(s8)     ; | arg 0,1,2,3,4,5,6 (fetched from local area stored to above)
+  9c:   8fc80014        lw      t0,20(s8)     ; |       (t0 = a4)
+  a0:   8fc90018        lw      t1,24(s8)     ; |       (t1 = a5)
+  a4:   8fca001c        lw      t2,28(s8)     ; |       (t2 = a6)
+  a8:   0c000000        jal     0 <leaf_call> ; call and ret addr -> ra
+  ac:   00000000        nop                   ; branch delay slot
+  b0:   03c0e821        move    sp,s8         ; |
+  b4:   8fbf0024        lw      ra,36(sp)     ; |
+  b8:   8fbe0020        lw      s8,32(sp)     ; |
+  bc:   27bd0028        addiu   sp,sp,40      ; | epilog
+  c0:   03e00008        jr      ra            ; |
+  c4:   00000000        nop                   ; |       branch delay slot
+
+000000c8 <main>:
+  c8:   27bdfff8        addiu   sp,sp,-8      ; |
+  cc:   afbf0004        sw      ra,4(sp)      ; |
+  d0:   afbe0000        sw      s8,0(sp)      ; | prolog
+  d4:   03a0f021        move    s8,sp         ; |         frame pointer (note: with offset to frame start, but static compared to sp)
+  d8:   00002021        move    a0,zero       ; arg 0
+  dc:   24050001        li      a1,1          ; arg 1
+  e0:   24060002        li      a2,2          ; arg 2
+  e4:   24070003        li      a3,3          ; arg 3
+  e8:   24080004        li      t0,4          ; arg 4 (t0 = a4)
+  ec:   24090005        li      t1,5          ; arg 5 (t1 = a5)
+  f0:   240a0006        li      t2,6          ; arg 6 (t2 = a6)
+  f4:   240b0007        li      t3,7          ; arg 7 (t3 = a7)
+  f8:   0c000000        jal     0 <leaf_call> ; call and ret addr -> ra
+  fc:   00000000        nop                   ; branch delay slot
+ 100:   00001021        move    v0,zero       ; return value
+ 104:   03c0e821        move    sp,s8         ; |
+ 108:   8fbf0004        lw      ra,4(sp)      ; |
+ 10c:   8fbe0000        lw      s8,0(sp)      ; |
+ 110:   27bd0008        addiu   sp,sp,8       ; | epilog
+ 114:   03e00008        jr      ra            ; |
+ 118:   00000000        nop                   ; |       branch delay slot
+
+
+
+; ------------- as above but more args to use stack params ----------->
+
+; #include <stdlib.h>
+; 
+; void leaf_call(int b, int c, int d, int e, int f, int g, int h, int i, int j)
+; {
+; }
+; 
+; void nonleaf_call(int a, int b, int c, int d, int e, int f, int g, int h, int i, int j)
+; {
+;     /* use some local data */
+;     *(char*)alloca(220) = 'L';
+;     leaf_call(b, c, d, e, f, g, h, i, j);
+; }
+; 
+; int main()
+; {
+;     nonleaf_call(0, 1, 2, 3, 4, 5, 6, 7, 8, 9);
+;     return 0;
+; }
+
+
+
+; output from psptoolchain-20111215-psp w/ gcc 4.9.3
+
+00000000 <leaf_call>:
+   0:   27bdffd8        addiu   sp,sp,-40
+   4:   afbe0024        sw      s8,36(sp)
+   8:   03a0f021        move    s8,sp
+   c:   afc40000        sw      a0,0(s8)
+  10:   afc50004        sw      a1,4(s8)
+  14:   afc60008        sw      a2,8(s8)
+  18:   afc7000c        sw      a3,12(s8)
+  1c:   afc80010        sw      t0,16(s8)
+  20:   afc90014        sw      t1,20(s8)
+  24:   afca0018        sw      t2,24(s8)
+  28:   afcb001c        sw      t3,28(s8)
+  2c:   03c0e821        move    sp,s8
+  30:   8fbe0024        lw      s8,36(sp)
+  34:   27bd0028        addiu   sp,sp,40
+  38:   03e00008        jr      ra
+  3c:   00000000        nop
+
+00000040 <nonleaf_call>:
+  40:   27bdffd0        addiu   sp,sp,-48     ; |
+  44:   afbf002c        sw      ra,44(sp)     ; |
+  48:   afbe0028        sw      s8,40(sp)     ; | prolog
+  4c:   03a0f021        move    s8,sp         ; /         frame pointer (note: with offset to frame start, but static compared to sp)
+  50:   afc40008        sw      a0,8(s8)      ; \
+  54:   afc5000c        sw      a1,12(s8)     ; |
+  58:   afc60010        sw      a2,16(s8)     ; |
+  5c:   afc70014        sw      a3,20(s8)     ; |
+  60:   afc80018        sw      t0,24(s8)     ; | in args 0,1,2,3,4,5,6,7 -> temp space in local area
+  64:   afc9001c        sw      t1,28(s8)     ; |
+  68:   afca0020        sw      t2,32(s8)     ; |
+  6c:   afcb0024        sw      t3,36(s8)     ; |
+  70:   27bdff18        addiu   sp,sp,-232    ; alloca(220) - with padding to guarantee alignment
+  74:   27a20008        addiu   v0,sp,8       ; |
+  78:   24420007        addiu   v0,v0,7       ; |
+  7c:   000210c2        srl     v0,v0,0x3     ; | start of alloca()'d memory -> v1, by ...
+  80:   000210c0        sll     v0,v0,0x3     ; | ... using v0 as helper to align to 8b
+  84:   00401821        move    v1,v0         ; |
+  88:   2402004c        li      v0,76         ; 'L' -> v0, and...
+  8c:   a0620000        sb      v0,0(v1)      ; ... store in local area (of alloca()'d space)
+  90:   8fc20034        lw      v0,52(s8)     ; arg 8 (fetched from prev frame's param area), and ...
+  94:   afa20000        sw      v0,0(sp)      ; ... "pushed" onto stack
+  98:   8fc4000c        lw      a0,12(s8)     ; |
+  9c:   8fc50010        lw      a1,16(s8)     ; |
+  a0:   8fc60014        lw      a2,20(s8)     ; |
+  a4:   8fc70018        lw      a3,24(s8)     ; | arg 0,1,2,3,4,5,6 (fetched from local area stored to above)
+  a8:   8fc8001c        lw      t0,28(s8)     ; |       (t0 = a4)
+  ac:   8fc90020        lw      t1,32(s8)     ; |       (t1 = a5)
+  b0:   8fca0024        lw      t2,36(s8)     ; |       (t2 = a6)
+  b4:   8fcb0030        lw      t3,48(s8)     ; |       (t3 = a7)
+  b8:   0c000000        jal     0 <leaf_call> ; call and ret addr -> ra
+  bc:   00000000        nop                   ; branch delay slot
+  c0:   03c0e821        move    sp,s8         ; |
+  c4:   8fbf002c        lw      ra,44(sp)     ; |
+  c8:   8fbe0028        lw      s8,40(sp)     ; |
+  cc:   27bd0030        addiu   sp,sp,48      ; | epilog
+  d0:   03e00008        jr      ra            ; |
+  d4:   00000000        nop                   ; |       branch delay slot
+
+000000d8 <main>:
+  d8:   27bdfff0        addiu   sp,sp,-16     ; |
+  dc:   afbf000c        sw      ra,12(sp)     ; |
+  e0:   afbe0008        sw      s8,8(sp)      ; | prolog
+  e4:   03a0f021        move    s8,sp         ; |         frame pointer (note: with offset to frame start, but static compared to sp)
+  e8:   24020008        li      v0,8          ; arg 8
+  ec:   afa20000        sw      v0,0(sp)      ; ... "pushed" onto stack
+  f0:   24020009        li      v0,9          ; arg 9
+  f4:   afa20004        sw      v0,4(sp)      ; ... "pushed" onto stack
+  f8:   00002021        move    a0,zero       ; arg 0
+  fc:   24050001        li      a1,1          ; arg 1
+ 100:   24060002        li      a2,2          ; arg 2
+ 104:   24070003        li      a3,3          ; arg 3
+ 108:   24080004        li      t0,4          ; arg 4 (t0 = a4)
+ 10c:   24090005        li      t1,5          ; arg 5 (t1 = a5)
+ 110:   240a0006        li      t2,6          ; arg 6 (t2 = a6)
+ 114:   240b0007        li      t3,7          ; arg 7 (t3 = a7)
+ 118:   0c000000        jal     0 <leaf_call> ; call and ret addr -> ra
+ 11c:   00000000        nop                   ; branch delay slot
+ 120:   00001021        move    v0,zero       ; return value
+ 124:   03c0e821        move    sp,s8         ; |
+ 128:   8fbf000c        lw      ra,12(sp)     ; |
+ 12c:   8fbe0008        lw      s8,8(sp)      ; |
+ 130:   27bd0010        addiu   sp,sp,16      ; | epilog
+ 134:   03e00008        jr      ra            ; |
+ 138:   00000000        nop                   ; |       branch delay slot
+
+
+
+; ------------- with var args to see spilling ----------->
+
+; #include <stdlib.h>
+; #include <stdarg.h>
+; 
+; void leaf_call(int b, int c, int d, int e, int f, int g, int h, int i, int j)
+; {
+; }
+; 
+; void nonleaf_call(int a, ...)
+; {
+;     int b, c, d, e, f, g, h, i, j;
+;     va_list ap;
+;     va_start(ap, a);
+;     b = va_arg(ap, int);
+;     c = va_arg(ap, int);
+;     d = va_arg(ap, int);
+;     e = va_arg(ap, int);
+;     f = va_arg(ap, int);
+;     g = va_arg(ap, int);
+;     h = va_arg(ap, int);
+;     i = va_arg(ap, int);
+;     j = va_arg(ap, int);
+;     /* use some local data */
+;     *(char*)alloca(220) = 'L';
+;     leaf_call(b, c, d, e, f, g, h, i, j);
+; }
+; 
+; int main()
+; {
+;     nonleaf_call(0, 1, 2, 3, 4, 5, 6, 7, 8, 9);
+;     return 0;
+; }
+
+
+
+; output from psptoolchain-20111215-psp w/ gcc 4.9.3
+
+00000000 <leaf_call>:
+   0:   27bdffd8        addiu   sp,sp,-40
+   4:   afbe0024        sw      s8,36(sp)
+   8:   03a0f021        move    s8,sp
+   c:   afc40000        sw      a0,0(s8)
+  10:   afc50004        sw      a1,4(s8)
+  14:   afc60008        sw      a2,8(s8)
+  18:   afc7000c        sw      a3,12(s8)
+  1c:   afc80010        sw      t0,16(s8)
+  20:   afc90014        sw      t1,20(s8)
+  24:   afca0018        sw      t2,24(s8)
+  28:   afcb001c        sw      t3,28(s8)
+  2c:   03c0e821        move    sp,s8
+  30:   8fbe0024        lw      s8,36(sp)
+  34:   27bd0028        addiu   sp,sp,40
+  38:   03e00008        jr      ra
+  3c:   00000000        nop
+
+00000040 <nonleaf_call>:
+  40:   27bdffa0        addiu   sp,sp,-96     ; |         leaving 32b extra space adjacent to prev frame's param area for spilling
+  44:   afbf003c        sw      ra,60(sp)     ; |
+  48:   afbe0038        sw      s8,56(sp)     ; | prolog
+  4c:   03a0f021        move    s8,sp         ; /         frame pointer (note: with offset to frame start, but static compared to sp)
+  50:   afc50044        sw      a1,68(s8)     ; \
+  54:   afc60048        sw      a2,72(s8)     ; |
+  58:   afc7004c        sw      a3,76(s8)     ; |
+  5c:   afc80050        sw      t0,80(s8)     ; | in args 1,2,3,4,5,6,7 -> spill area in current frame (adjacent to prev frame's param area)
+  60:   afc90054        sw      t1,84(s8)     ; |
+  64:   afca0058        sw      t2,88(s8)     ; |
+  68:   afcb005c        sw      t3,92(s8)     ; |
+  6c:   afc40030        sw      a0,48(s8)     ; in arg 0 -> temp space in local area
+  70:   27c20060        addiu   v0,s8,96      ; v0 initialized to point ...
+  74:   2442ffe4        addiu   v0,v0,-28     ; ... to start of spill area (96 - 28 = 68, where we spilled a1)
+  78:   afc2002c        sw      v0,44(s8)     ; store read ptr in local area
+  7c:   8fc2002c        lw      v0,44(s8)     ; |            get read pointer in v0 (pointless here, but part of following pattern) ...
+  80:   24430004        addiu   v1,v0,4       ; |            ... advance ...
+  84:   afc3002c        sw      v1,44(s8)     ; | in arg 1   ... store again ...
+  88:   8c420000        lw      v0,0(v0)      ; |            ... arg -> v0 (using pre-inc read location), then ...
+  8c:   afc20008        sw      v0,8(s8)      ; /            ... write to local area on stack for later
+  90:   8fc2002c        lw      v0,44(s8)     ; \
+  94:   24430004        addiu   v1,v0,4       ; |
+  98:   afc3002c        sw      v1,44(s8)     ; | in arg 2
+  9c:   8c420000        lw      v0,0(v0)      ; |
+  a0:   afc2000c        sw      v0,12(s8)     ; /
+  a4:   8fc2002c        lw      v0,44(s8)     ; \
+  a8:   24430004        addiu   v1,v0,4       ; |
+  ac:   afc3002c        sw      v1,44(s8)     ; | in arg 3
+  b0:   8c420000        lw      v0,0(v0)      ; |
+  b4:   afc20010        sw      v0,16(s8)     ; /
+  b8:   8fc2002c        lw      v0,44(s8)     ; \
+  bc:   24430004        addiu   v1,v0,4       ; |
+  c0:   afc3002c        sw      v1,44(s8)     ; | in arg 4
+  c4:   8c420000        lw      v0,0(v0)      ; |
+  c8:   afc20014        sw      v0,20(s8)     ; /
+  cc:   8fc2002c        lw      v0,44(s8)     ; \
+  d0:   24430004        addiu   v1,v0,4       ; |
+  d4:   afc3002c        sw      v1,44(s8)     ; | in arg 5
+  d8:   8c420000        lw      v0,0(v0)      ; |
+  dc:   afc20018        sw      v0,24(s8)     ; /
+  e0:   8fc2002c        lw      v0,44(s8)     ; \
+  e4:   24430004        addiu   v1,v0,4       ; |
+  e8:   afc3002c        sw      v1,44(s8)     ; | in arg 6
+  ec:   8c420000        lw      v0,0(v0)      ; |
+  f0:   afc2001c        sw      v0,28(s8)     ; /
+  f4:   8fc2002c        lw      v0,44(s8)     ; \
+  f8:   24430004        addiu   v1,v0,4       ; |
+  fc:   afc3002c        sw      v1,44(s8)     ; | in arg 7
+ 100:   8c420000        lw      v0,0(v0)      ; |
+ 104:   afc20020        sw      v0,32(s8)     ; /
+ 108:   8fc2002c        lw      v0,44(s8)     ; \
+ 10c:   24430004        addiu   v1,v0,4       ; |
+ 110:   afc3002c        sw      v1,44(s8)     ; | in arg 8
+ 114:   8c420000        lw      v0,0(v0)      ; |
+ 118:   afc20024        sw      v0,36(s8)     ; /
+ 11c:   8fc2002c        lw      v0,44(s8)     ; \
+ 120:   24430004        addiu   v1,v0,4       ; |
+ 124:   afc3002c        sw      v1,44(s8)     ; | in arg 9
+ 128:   8c420000        lw      v0,0(v0)      ; |
+ 12c:   afc20028        sw      v0,40(s8)     ; /
+ 130:   27bdff18        addiu   sp,sp,-232    ; alloca(220) - with padding to guarantee alignment
+ 134:   27a20008        addiu   v0,sp,8       ; |
+ 138:   24420007        addiu   v0,v0,7       ; |
+ 13c:   000210c2        srl     v0,v0,0x3     ; | start of alloca()'d memory -> v1, by ...
+ 140:   000210c0        sll     v0,v0,0x3     ; | ... using v0 as helper to align to 8b
+ 144:   00401821        move    v1,v0         ; |
+ 148:   2402004c        li      v0,76         ; 'L' -> v0, and...
+ 14c:   a0620000        sb      v0,0(v1)      ; ... store in local area (of alloca()'d space)
+ 150:   8fc20028        lw      v0,40(s8)     ; arg 8 (fetched from local area stored to above) and ...
+ 154:   afa20000        sw      v0,0(sp)      ; ... "pushed" onto stack
+ 158:   8fc40008        lw      a0,8(s8)      ; |
+ 15c:   8fc5000c        lw      a1,12(s8)     ; |
+ 160:   8fc60010        lw      a2,16(s8)     ; |
+ 164:   8fc70014        lw      a3,20(s8)     ; | arg 0,1,2,3,4,5,6 (fetched from local area stored to above)
+ 168:   8fc80018        lw      t0,24(s8)     ; |       (t0 = a4)
+ 16c:   8fc9001c        lw      t1,28(s8)     ; |       (t1 = a5)
+ 170:   8fca0020        lw      t2,32(s8)     ; |       (t2 = a6)
+ 174:   8fcb0024        lw      t3,36(s8)     ; |       (t3 = a7)
+ 178:   0c000000        jal     0 <leaf_call> ; call and ret addr -> ra
+ 17c:   00000000        nop                   ; branch delay slot
+ 180:   03c0e821        move    sp,s8         ; |
+ 184:   8fbf003c        lw      ra,60(sp)     ; |
+ 188:   8fbe0038        lw      s8,56(sp)     ; |
+ 18c:   27bd0060        addiu   sp,sp,96      ; | epilog
+ 190:   03e00008        jr      ra            ; |
+ 194:   00000000        nop                     |       branch delay slot
+
+00000198 <main>:
+ 198:   27bdfff0        addiu   sp,sp,-16     ; |
+ 19c:   afbf000c        sw      ra,12(sp)     ; |
+ 1a0:   afbe0008        sw      s8,8(sp)      ; | prolog
+ 1a4:   03a0f021        move    s8,sp         ; |         frame pointer (note: with offset to frame start, but static compared to sp)
+ 1a8:   24020008        li      v0,8          ; arg 8
+ 1ac:   afa20000        sw      v0,0(sp)      ; ... "pushed" onto stack
+ 1b0:   24020009        li      v0,9          ; arg 9
+ 1b4:   afa20004        sw      v0,4(sp)      ; ... "pushed" onto stack
+ 1b8:   00002021        move    a0,zero       ; arg 0
+ 1bc:   24050001        li      a1,1          ; arg 1
+ 1c0:   24060002        li      a2,2          ; arg 2
+ 1c4:   24070003        li      a3,3          ; arg 3
+ 1c8:   24080004        li      t0,4          ; arg 4 (t0 = a4)
+ 1cc:   24090005        li      t1,5          ; arg 5 (t1 = a5)
+ 1d0:   240a0006        li      t2,6          ; arg 6 (t2 = a6)
+ 1d4:   240b0007        li      t3,7          ; arg 7 (t3 = a7)
+ 1d8:   0c000000        jal     0 <leaf_call> ; call and ret addr -> ra
+ 1dc:   00000000        nop                   ; branch delay slot
+ 1e0:   00001021        move    v0,zero       ; return value
+ 1e4:   03c0e821        move    sp,s8         ; |
+ 1e8:   8fbf000c        lw      ra,12(sp)     ; |
+ 1ec:   8fbe0008        lw      s8,8(sp)      ; |
+ 1f0:   27bd0010        addiu   sp,sp,16      ; | epilog
+ 1f4:   03e00008        jr      ra            ; |
+ 1f8:   00000000        nop                   ; |       branch delay slot
+
+
+
+; ------------- var args with ints and floats to see spilling (which remains only a?-a7 regs), b/c doubles are passed via them and floats are promoted to doubles in (...) ----------->
+
+; #include <stdlib.h>
+; #include <stdarg.h>
+; 
+; void leaf_call(int b, int c, int d, int e, float f, float g, int h, int i, float j)
+; {
+; }
+; 
+; void nonleaf_call(int a, ...)
+; {
+;     int b, c, d, e, h, i;
+;     float f, g, j;
+;     va_list ap;
+;     va_start(ap, a);
+;     b = va_arg(ap, int);
+;     c = va_arg(ap, int);
+;     d = va_arg(ap, int);
+;     e = va_arg(ap, int);
+;     f = (float)va_arg(ap, double);
+;     g = (float)va_arg(ap, double);
+;     h = va_arg(ap, int);
+;     i = va_arg(ap, int);
+;     j = (float)va_arg(ap, double);
+;     /* use some local data */
+;     *(char*)alloca(220) = 'L';
+;     leaf_call(b, c, d, e, f, g, h, i, j);
+; }
+; 
+; int main()
+; {
+;     nonleaf_call(0, 1, 2, 3, 4, 5.f, 6.f, 7, 8, 9.f);
+;     return 0;
+; }
+
+
+
+; output from psptoolchain-20111215-psp w/ gcc 4.9.3
+
+00000000 <leaf_call>:
+   0:   27bdffd0        addiu   sp,sp,-48
+   4:   afbe002c        sw      s8,44(sp)
+   8:   03a0f021        move    s8,sp
+   c:   afc40000        sw      a0,0(s8)
+  10:   afc50004        sw      a1,4(s8)
+  14:   afc60008        sw      a2,8(s8)
+  18:   afc7000c        sw      a3,12(s8)
+  1c:   e7cc0010        swc1    $f12,16(s8)
+  20:   e7cd0014        swc1    $f13,20(s8)
+  24:   afc80018        sw      t0,24(s8)
+  28:   afc9001c        sw      t1,28(s8)
+  2c:   e7ce0020        swc1    $f14,32(s8)
+  30:   03c0e821        move    sp,s8
+  34:   8fbe002c        lw      s8,44(sp)
+  38:   27bd0030        addiu   sp,sp,48
+  3c:   03e00008        jr      ra
+  40:   00000000        nop
+
+00000044 <nonleaf_call>:
+  44:   27bdffa8        addiu   sp,sp,-88     ; |         leaving 32b extra space adjacent to prev frame's param area for spilling
+  48:   afbf0034        sw      ra,52(sp)     ; |
+  4c:   afbe0030        sw      s8,48(sp)     ; | prolog
+  50:   03a0f021        move    s8,sp         ; /         frame pointer (note: with offset to frame start, but static compared to sp)
+  54:   afc5003c        sw      a1,60(s8)     ; \
+  58:   afc60040        sw      a2,64(s8)     ; |
+  5c:   afc70044        sw      a3,68(s8)     ; |
+  60:   afc80048        sw      t0,72(s8)     ; | in args 1,2,3,4,5 (spread out over 7 param regs) -> spill area in current frame (adjacent to prev frame's param area)
+  64:   afc9004c        sw      t1,76(s8)     ; |            this one is just padding
+  68:   afca0050        sw      t2,80(s8)     ; |            |
+  6c:   afcb0054        sw      t3,84(s8)     ; |            | this is arg 5, passed as a double, spilled like integers
+  70:   afc40028        sw      a0,40(s8)     ; in arg 0 -> temp space in local area
+  74:   27c20058        addiu   v0,s8,88      ; v0 initialized to point ...
+  78:   2442ffe4        addiu   v0,v0,-28     ; ... to start of spill area (88 - 28 = 60, where we spilled a1)
+  7c:   afc20024        sw      v0,36(s8)     ; store read ptr in local area
+  80:   8fc20024        lw      v0,36(s8)     ; |            get read pointer in v0 (pointless here, but part of following pattern) ...
+  84:   24430004        addiu   v1,v0,4       ; |            ... advance ...
+  88:   afc30024        sw      v1,36(s8)     ; | in arg 1   ... store again ...
+  8c:   8c420000        lw      v0,0(v0)      ; |            ... arg -> v0 (using pre-inc read location), then ...
+  90:   afc20000        sw      v0,0(s8)      ; /            ... write to local area on stack for later
+  94:   8fc20024        lw      v0,36(s8)     ; \
+  98:   24430004        addiu   v1,v0,4       ; |
+  9c:   afc30024        sw      v1,36(s8)     ; | in arg 2
+  a0:   8c420000        lw      v0,0(v0)      ; |
+  a4:   afc20004        sw      v0,4(s8)      ; /
+  a8:   8fc20024        lw      v0,36(s8)     ; \
+  ac:   24430004        addiu   v1,v0,4       ; |
+  b0:   afc30024        sw      v1,36(s8)     ; | in arg 3
+  b4:   8c420000        lw      v0,0(v0)      ; |
+  b8:   afc20008        sw      v0,8(s8)      ; /
+  bc:   8fc20024        lw      v0,36(s8)     ; \
+  c0:   24430004        addiu   v1,v0,4       ; |
+  c4:   afc30024        sw      v1,36(s8)     ; | in arg 4
+  c8:   8c420000        lw      v0,0(v0)      ; |
+  cc:   afc2000c        sw      v0,12(s8)     ; /
+  d0:   8fc20024        lw      v0,36(s8)     ; \            get read ptr in v0
+  d4:   24430007        addiu   v1,v0,7       ; |            |
+  d8:   2402fff8        li      v0,-8         ; |            | align
+  dc:   00621024        and     v0,v1,v0      ; |            |
+  e0:   24430008        addiu   v1,v0,8       ; |            advance read ptr to point to double
+  e4:   afc30024        sw      v1,36(s8)     ; |            restore read ptr
+  e8:   8c430004        lw      v1,4(v0)      ; | in arg 5   |
+  ec:   8c420000        lw      v0,0(v0)      ; |            | load both parts of double ...
+  f0:   00402021        move    a0,v0         ; |            | ... and store in a{0,1} pair (used to pass doubles, used in next call)
+  f4:   00602821        move    a1,v1         ; |            /
+  f8:   0c000000        jal     0 <leaf_call> ; |            \ call to cast double to float, returned in f0
+  fc:   00000000        nop                   ; |            | NOTE: not a call to leaf_call() (objdump done from .o file, not finally linked executable)
+ 100:   e7c00010        swc1    $f0,16(s8)    ; /            write float to local area on stack for later
+ 104:   8fc20024        lw      v0,36(s8)     ; \
+ 108:   24430007        addiu   v1,v0,7       ; |
+ 10c:   2402fff8        li      v0,-8         ; |
+ 110:   00621024        and     v0,v1,v0      ; |
+ 114:   24430008        addiu   v1,v0,8       ; |
+ 118:   afc30024        sw      v1,36(s8)     ; |
+ 11c:   8c430004        lw      v1,4(v0)      ; | in arg 6
+ 120:   8c420000        lw      v0,0(v0)      ; |
+ 124:   00402021        move    a0,v0         ; |
+ 128:   00602821        move    a1,v1         ; |
+ 12c:   0c000000        jal     0 <leaf_call> ; |
+ 130:   00000000        nop                   ; |
+ 134:   e7c00014        swc1    $f0,20(s8)    ; /
+ 138:   8fc20024        lw      v0,36(s8)     ; \
+ 13c:   24430004        addiu   v1,v0,4       ; |
+ 140:   afc30024        sw      v1,36(s8)     ; | in arg 7
+ 144:   8c420000        lw      v0,0(v0)      ; |
+ 148:   afc20018        sw      v0,24(s8)     ; /
+ 14c:   8fc20024        lw      v0,36(s8)     ; \
+ 150:   24430004        addiu   v1,v0,4       ; |
+ 154:   afc30024        sw      v1,36(s8)     ; | in arg 8
+ 158:   8c420000        lw      v0,0(v0)      ; |
+ 15c:   afc2001c        sw      v0,28(s8)     ; /
+ 160:   8fc20024        lw      v0,36(s8)     ; \
+ 164:   24430007        addiu   v1,v0,7       ; |
+ 168:   2402fff8        li      v0,-8         ; |
+ 16c:   00621024        and     v0,v1,v0      ; |
+ 170:   24430008        addiu   v1,v0,8       ; |
+ 174:   afc30024        sw      v1,36(s8)     ; |
+ 178:   8c430004        lw      v1,4(v0)      ; | in arg 9
+ 17c:   8c420000        lw      v0,0(v0)      ; |
+ 180:   00402021        move    a0,v0         ; |
+ 184:   00602821        move    a1,v1         ; |
+ 188:   0c000000        jal     0 <leaf_call> ; |
+ 18c:   00000000        nop                   ; |
+ 190:   e7c00020        swc1    $f0,32(s8)    ; /
+ 194:   27bdff18        addiu   sp,sp,-232    ; alloca(220) - with padding to guarantee alignment
+ 198:   03a01021        move    v0,sp         ; |
+ 19c:   24420007        addiu   v0,v0,7       ; |
+ 1a0:   000210c2        srl     v0,v0,0x3     ; | start of alloca()'d memory -> v1, by ...
+ 1a4:   000210c0        sll     v0,v0,0x3     ; | ... using v0 as helper to align to 8b
+ 1a8:   00401821        move    v1,v0         ; |
+ 1ac:   2402004c        li      v0,76         ; 'L' -> v0, and...
+ 1b0:   a0620000        sb      v0,0(v1)      ; ... store in local area (of alloca()'d space)
+ 1b4:   8fc40000        lw      a0,0(s8)      ; |
+ 1b8:   8fc50004        lw      a1,4(s8)      ; |
+ 1bc:   8fc60008        lw      a2,8(s8)      ; | arg 0,1,2,3 (int args, fetched from local area stored to above)
+ 1c0:   8fc7000c        lw      a3,12(s8)     ; |
+ 1c4:   c7cc0010        lwc1    $f12,16(s8)   ; arg 4 (float, fetched from local area stored to above)
+ 1c8:   c7cd0014        lwc1    $f13,20(s8)   ; arg 5 (float, fetched from local area stored to above)
+ 1cc:   8fc80018        lw      t0,24(s8)     ; arg 6 (int, fetched from local area stored to above, t0 = a4)
+ 1d0:   8fc9001c        lw      t1,28(s8)     ; arg 7 (int, fetched from local area stored to above, t1 = a5)
+ 1d4:   c7ce0020        lwc1    $f14,32(s8)   ; arg 9 (float, fetched from local area stored to above)
+ 1d8:   0c000000        jal     0 <leaf_call> ; call and ret addr -> ra
+ 1dc:   00000000        nop                   ; branch delay slot
+ 1e0:   03c0e821        move    sp,s8         ; |
+ 1e4:   8fbf0034        lw      ra,52(sp)     ; |
+ 1e8:   8fbe0030        lw      s8,48(sp)     ; |
+ 1ec:   27bd0058        addiu   sp,sp,88      ; | epilog
+ 1f0:   03e00008        jr      ra            ; |
+ 1f4:   00000000        nop                   ; |       branch delay slot
+
+000001f8 <main>:
+ 1f8:   27bdffe0        addiu   sp,sp,-32     ; |
+ 1fc:   afbf001c        sw      ra,28(sp)     ; |
+ 200:   afbe0018        sw      s8,24(sp)     ; | prolog
+ 204:   03a0f021        move    s8,sp         ; /         frame pointer (note: with offset to frame start, but static compared to sp)
+ 208:   8f8a0000        lw      t2,0(gp)      ; \
+ 20c:   8f8b0004        lw      t3,4(gp)      ; / arg 5 (t1,t2 = a4,a5), as double b/c of vararg, effectively skipping t1 (=a5)
+ 210:   8f820008        lw      v0,8(gp)      ; \
+ 214:   8f83000c        lw      v1,12(gp)     ; | arg 6, as double b/c of vararg, via v0 and v1 ...
+ 218:   afa20000        sw      v0,0(sp)      ; |
+ 21c:   afa30004        sw      v1,4(sp)      ; | ... "pushed" onto stack
+ 220:   24020007        li      v0,7          ; arg 7
+ 224:   afa20008        sw      v0,8(sp)      ; ... "pushed" onto stack
+ 228:   24020008        li      v0,8          ; arg 8
+ 22c:   afa2000c        sw      v0,12(sp)     ; ... "pushed" onto stack
+ 230:   8f820010        lw      v0,16(gp)     ; |
+ 234:   8f830014        lw      v1,20(gp)     ; | arg 9, as double b/c of vararg ...
+ 238:   afa20010        sw      v0,16(sp)     ; |
+ 23c:   afa30014        sw      v1,20(sp)     ; | ... "pushed" onto stack
+ 240:   00002021        move    a0,zero       ; arg 0
+ 244:   24050001        li      a1,1          ; arg 1
+ 248:   24060002        li      a2,2          ; arg 2
+ 24c:   24070003        li      a3,3          ; arg 3
+ 250:   24080004        li      t0,4          ; arg 4 (t0 = a4)
+ 254:   0c000000        jal     0 <leaf_call> ; call and ret addr -> ra
+ 258:   00000000        nop                   ; branch delay slot
+ 25c:   00001021        move    v0,zero       ; return value
+ 260:   03c0e821        move    sp,s8         ; |
+ 264:   8fbf001c        lw      ra,28(sp)     ; |
+ 268:   8fbe0018        lw      s8,24(sp)     ; |
+ 26c:   27bd0020        addiu   sp,sp,32      ; | epilog
+ 270:   03e00008        jr      ra            ; |
+ 274:   00000000        nop                   ; |       branch delay slot
+
+
+
+; --------------------- further notes ------------------->
+
+; when passing less arguments than stack params, involving an ellipse, spill area still spills all registers,
+; excluding named ones, e.g.:
+;
+; void c(int a, ...) { ... }
+; c(0, 1, 2, 3, 4);
+;
+; contains as spilling code same as above:
+
+  84:   afc5002c        sw      a1,44(s8)
+  88:   afc60030        sw      a2,48(s8)
+  8c:   afc70034        sw      a3,52(s8)
+  90:   afc80038        sw      t0,56(s8)
+  94:   afc9003c        sw      t1,60(s8)
+  98:   afca0040        sw      t2,64(s8)
+  9c:   afcb0044        sw      t3,68(s8)
+
+
+; vim: ft=asm
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/doc/disas_examples/mips.o32.disas	Fri Nov 22 23:08:59 2019 +0100
@@ -0,0 +1,221 @@
+; #include <stdlib.h>
+; 
+; void leaf_call(int b, int c, int d, int e, int f, int g, int h)
+; {
+; }
+; 
+; void nonleaf_call(int a, int b, int c, int d, int e, int f, int g, int h)
+; {
+; 	/* use some local data */
+; 	*(char*)alloca(220) = 'L';
+; 	leaf_call(b, c, d, e, f, g, h);
+; }
+; 
+; int main()
+; {
+; 	nonleaf_call(0, 1, 2, 3, 4, 5, 6, 7);
+; 	return 0;
+; }
+
+
+
+; output from freebsd-12.0_r333647-malta_mipselhf w/ gcc 4.2.1 ----->
+
+00000000 <leaf_call>:
+   0:   3c1c0000        lui     gp,0x0
+   4:   279c0000        addiu   gp,gp,0
+   8:   0399e021        addu    gp,gp,t9
+   c:   27bdfff8        addiu   sp,sp,-8
+  10:   afbe0000        sw      s8,0(sp)
+  14:   03a0f021        move    s8,sp
+  18:   afc40008        sw      a0,8(s8)
+  1c:   afc5000c        sw      a1,12(s8)
+  20:   afc60010        sw      a2,16(s8)
+  24:   afc70014        sw      a3,20(s8)
+  28:   03c0e821        move    sp,s8
+  2c:   8fbe0000        lw      s8,0(sp)
+  30:   03e00008        jr      ra
+  34:   27bd0008        addiu   sp,sp,8
+
+00000038 <nonleaf_call>:
+  38:   3c1c0000        lui     gp,0x0     ; |
+  3c:   279c0000        addiu   gp,gp,0    ; |
+  40:   0399e021        addu    gp,gp,t9   ; |
+  44:   27bdffc8        addiu   sp,sp,-56  ; | prolog
+  48:   afbf0034        sw      ra,52(sp)  ; |
+  4c:   afbe0030        sw      s8,48(sp)  ; |
+  50:   03a0f021        move    s8,sp      ; |         frame pointer (note: with offset to frame start, but static compared to sp)
+  54:   afbc0020        sw      gp,32(sp)  ; /
+  58:   afc40038        sw      a0,56(s8)  ; \
+  5c:   afc5003c        sw      a1,60(s8)  ; |
+  60:   afc60040        sw      a2,64(s8)  ; | spill first 4 args into prev frame's reserved spill space in param area (although not actually needing to spill, here but just do a temp copy, but space is reserved for them anyways)
+  64:   afc70044        sw      a3,68(s8)  ; |
+  68:   27bdff18        addiu   sp,sp,-232 ; alloca(220) - with padding to guarantee alignment
+  6c:   27a20020        addiu   v0,sp,32   ; |
+  70:   afc20028        sw      v0,40(s8)  ; |
+  74:   8fc30028        lw      v1,40(s8)  ; | start of alloca()'d memory -> v1, by ...
+  78:   24620007        addiu   v0,v1,7    ; | ... using v0 as helper to align to 8b
+  7c:   000210c2        srl     v0,v0,0x3  ; |    @@@ unsure about use of helper space at 40(s8) in prev frame..?
+  80:   000210c0        sll     v0,v0,0x3  ; |
+  84:   afc20028        sw      v0,40(s8)  ; |
+  88:   8fc30028        lw      v1,40(s8)  ; |
+  8c:   2402004c        li      v0,76      ; 'L' -> v0, and...
+  90:   a0620000        sb      v0,0(v1)   ; ... store in local area (of alloca()'d space)
+  94:   8fc2004c        lw      v0,76(s8)  ; arg 4 (fetched from prev frame's param area), and ...
+  98:   afa20010        sw      v0,16(sp)  ; ... "pushed" onto stack
+  9c:   8fc20050        lw      v0,80(s8)  ; arg 5 (fetched from prev frame's param area), and ...
+  a0:   afa20014        sw      v0,20(sp)  ; ... "pushed" onto stack
+  a4:   8fc20054        lw      v0,84(s8)  ; arg 6 (fetched from prev frame's param area), and ...
+  a8:   afa20018        sw      v0,24(sp)  ; ... "pushed" onto stack
+  ac:   8fc4003c        lw      a0,60(s8)  ; arg 0 (fetched from spill area of prev frame)
+  b0:   8fc50040        lw      a1,64(s8)  ; arg 1 (fetched from spill area of prev frame)
+  b4:   8fc60044        lw      a2,68(s8)  ; arg 2 (fetched from spill area of prev frame)
+  b8:   8fc70048        lw      a3,72(s8)  ; arg 3 (fetched from prev frame's param area)
+  bc:   8f990000        lw      t9,0(gp)   ; func to call -> t9
+  c0:   0320f809        jalr    t9         ; call and ret addr -> ra
+  c4:   00000000        nop                ; branch delay slot
+  c8:   8fdc0020        lw      gp,32(s8)  ; |
+  cc:   03c0e821        move    sp,s8      ; |
+  d0:   8fbf0034        lw      ra,52(sp)  ; |
+  d4:   8fbe0030        lw      s8,48(sp)  ; | epilog
+  d8:   03e00008        jr      ra         ; |
+  dc:   27bd0038        addiu   sp,sp,56   ; |
+
+000000e0 <main>:
+  e0:   3c1c0000        lui     gp,0x0     ; |
+  e4:   279c0000        addiu   gp,gp,0    ; |
+  e8:   0399e021        addu    gp,gp,t9   ; |
+  ec:   27bdffd0        addiu   sp,sp,-48  ; | prolog
+  f0:   afbf002c        sw      ra,44(sp)  ; |
+  f4:   afbe0028        sw      s8,40(sp)  ; |
+  f8:   03a0f021        move    s8,sp      ; |         frame pointer (note: with offset to frame start, but static compared to sp)
+  fc:   afbc0020        sw      gp,32(sp)  ; |
+ 100:   24020004        li      v0,4       ; arg 4, and ...
+ 104:   afa20010        sw      v0,16(sp)  ; ... "pushed" onto stack
+ 108:   24020005        li      v0,5       ; arg 5, and ...
+ 10c:   afa20014        sw      v0,20(sp)  ; ... "pushed" onto stack
+ 110:   24020006        li      v0,6       ; arg 6, and ...
+ 114:   afa20018        sw      v0,24(sp)  ; ... "pushed" onto stack
+ 118:   24020007        li      v0,7       ; arg 7, and ...
+ 11c:   afa2001c        sw      v0,28(sp)  ; ... "pushed" onto stack
+ 120:   00002021        move    a0,zero    ; arg 0
+ 124:   24050001        li      a1,1       ; arg 1
+ 128:   24060002        li      a2,2       ; arg 2
+ 12c:   24070003        li      a3,3       ; arg 3
+ 130:   8f990000        lw      t9,0(gp)   ; func to call -> t9
+ 134:   0320f809        jalr    t9         ; call and ret addr -> ra
+ 138:   00000000        nop                ; branch delay slot
+ 13c:   8fdc0020        lw      gp,32(s8)  ; |
+ 140:   00001021        move    v0,zero    ; :        return value: not part of epilog, but unordered (branch delay slot style)
+ 144:   03c0e821        move    sp,s8      ; |
+ 148:   8fbf002c        lw      ra,44(sp)  ; | epilog
+ 14c:   8fbe0028        lw      s8,40(sp)  ; |
+ 150:   03e00008        jr      ra         ; |
+ 154:   27bd0030        addiu   sp,sp,48   ; |
+
+
+
+; output from netbsd-5.0.2-pmax_mipsel_o32 w/ gcc 4.1.3 ----->
+; nearly the same, equivalent to above except non-optimal use of branch delay slots and $gp preserving in leaf call
+
+00000000 <leaf_call>:
+   0:   27bdfff8        addiu   sp,sp,-8
+   4:   afbe0000        sw      s8,0(sp)
+   8:   03a0f021        move    s8,sp
+   c:   afc40008        sw      a0,8(s8)
+  10:   afc5000c        sw      a1,12(s8)
+  14:   afc60010        sw      a2,16(s8)
+  18:   afc70014        sw      a3,20(s8)
+  1c:   03c0e821        move    sp,s8
+  20:   8fbe0000        lw      s8,0(sp)
+  24:   27bd0008        addiu   sp,sp,8
+  28:   03e00008        jr      ra
+  2c:   00000000        nop
+
+00000030 <nonleaf_call>:
+  30:   3c1c0000        lui     gp,0x0
+  34:   279c0000        addiu   gp,gp,0
+  38:   0399e021        addu    gp,gp,t9
+  3c:   27bdffc8        addiu   sp,sp,-56
+  40:   afbf0034        sw      ra,52(sp)
+  44:   afbe0030        sw      s8,48(sp)
+  48:   03a0f021        move    s8,sp
+  4c:   afbc0020        sw      gp,32(sp)
+  50:   afc40038        sw      a0,56(s8)
+  54:   afc5003c        sw      a1,60(s8)
+  58:   afc60040        sw      a2,64(s8)
+  5c:   afc70044        sw      a3,68(s8)
+  60:   27bdff18        addiu   sp,sp,-232
+  64:   27a20020        addiu   v0,sp,32
+  68:   afc20028        sw      v0,40(s8)
+  6c:   8fc30028        lw      v1,40(s8)
+  70:   00000000        nop
+
+  74:   24620007        addiu   v0,v1,7
+  78:   000210c2        srl     v0,v0,0x3
+  7c:   000210c0        sll     v0,v0,0x3
+  80:   afc20028        sw      v0,40(s8)
+  84:   8fc30028        lw      v1,40(s8)
+  88:   2402004c        li      v0,76
+  8c:   a0620000        sb      v0,0(v1)
+  90:   8fc2004c        lw      v0,76(s8)
+  94:   00000000        nop
+  98:   afa20010        sw      v0,16(sp)
+  9c:   8fc20050        lw      v0,80(s8)
+  a0:   00000000        nop
+  a4:   afa20014        sw      v0,20(sp)
+  a8:   8fc20054        lw      v0,84(s8)
+  ac:   00000000        nop
+  b0:   afa20018        sw      v0,24(sp)
+  b4:   8fc4003c        lw      a0,60(s8)
+  b8:   8fc50040        lw      a1,64(s8)
+  bc:   8fc60044        lw      a2,68(s8)
+  c0:   8fc70048        lw      a3,72(s8)
+  c4:   8f990000        lw      t9,0(gp)
+  c8:   00000000        nop
+  cc:   0320f809        jalr    t9
+  d0:   00000000        nop
+  d4:   8fdc0020        lw      gp,32(s8)
+  d8:   03c0e821        move    sp,s8
+  dc:   8fbf0034        lw      ra,52(sp)
+  e0:   8fbe0030        lw      s8,48(sp)
+  e4:   27bd0038        addiu   sp,sp,56
+  e8:   03e00008        jr      ra
+  ec:   00000000        nop
+
+000000f0 <main>:
+  f0:   3c1c0000        lui     gp,0x0
+  f4:   279c0000        addiu   gp,gp,0
+  f8:   0399e021        addu    gp,gp,t9
+  fc:   27bdffd0        addiu   sp,sp,-48
+ 100:   afbf002c        sw      ra,44(sp)
+ 104:   afbe0028        sw      s8,40(sp)
+ 108:   03a0f021        move    s8,sp
+ 10c:   afbc0020        sw      gp,32(sp)
+ 110:   24020004        li      v0,4
+ 114:   afa20010        sw      v0,16(sp)
+ 118:   24020005        li      v0,5
+ 11c:   afa20014        sw      v0,20(sp)
+ 120:   24020006        li      v0,6
+ 124:   afa20018        sw      v0,24(sp)
+ 128:   24020007        li      v0,7
+ 12c:   afa2001c        sw      v0,28(sp)
+ 130:   00002021        move    a0,zero
+ 134:   24050001        li      a1,1
+ 138:   24060002        li      a2,2
+ 13c:   24070003        li      a3,3
+ 140:   8f990000        lw      t9,0(gp)
+ 144:   00000000        nop
+ 148:   0320f809        jalr    t9
+ 14c:   00000000        nop
+ 150:   8fdc0020        lw      gp,32(s8)
+ 154:   00001021        move    v0,zero
+ 158:   03c0e821        move    sp,s8
+ 15c:   8fbf002c        lw      ra,44(sp)
+ 160:   8fbe0028        lw      s8,40(sp)
+ 164:   27bd0030        addiu   sp,sp,48
+ 168:   03e00008        jr      ra
+ 16c:   00000000        nop
+
+; vim: ft=asm
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/doc/disas_examples/mips64.n64.disas	Fri Nov 22 23:08:59 2019 +0100
@@ -0,0 +1,690 @@
+; #include <stdlib.h>
+; 
+; void leaf_call(int b, int c, int d, int e, int f, int g, int h)
+; {
+; }
+; 
+; void nonleaf_call(int a, int b, int c, int d, int e, int f, int g, int h)
+; {
+; 	/* use some local data */
+; 	*(char*)alloca(220) = 'L';
+; 	leaf_call(b, c, d, e, f, g, h);
+; }
+; 
+; int main()
+; {
+; 	nonleaf_call(0, 1, 2, 3, 4, 5, 6, 7);
+; 	return 0;
+; }
+
+
+
+; output from freebsd-12.0_r333647-malta_mips64elhf w/ gcc 4.2.1
+
+0000000000000000 <leaf_call>:
+   0:   67bdffd0        daddiu  sp,sp,-48
+   4:   ffbe0028        sd      s8,40(sp)
+   8:   ffbc0020        sd      gp,32(sp)
+   c:   03a0f02d        move    s8,sp
+  10:   3c1c0000        lui     gp,0x0
+  14:   0399e02d        daddu   gp,gp,t9
+  18:   679c0000        daddiu  gp,gp,0
+  1c:   0080102d        move    v0,a0
+  20:   00a0182d        move    v1,a1
+  24:   00c0202d        move    a0,a2
+  28:   00e0282d        move    a1,a3
+  2c:   0100302d        move    a2,a4
+  30:   0120382d        move    a3,a5
+  34:   0140402d        move    a4,a6
+  38:   00021000        sll     v0,v0,0x0
+  3c:   afc20000        sw      v0,0(s8)
+  40:   00031000        sll     v0,v1,0x0
+  44:   afc20004        sw      v0,4(s8)
+  48:   00041000        sll     v0,a0,0x0
+  4c:   afc20008        sw      v0,8(s8)
+  50:   00051000        sll     v0,a1,0x0
+  54:   afc2000c        sw      v0,12(s8)
+  58:   00061000        sll     v0,a2,0x0
+  5c:   afc20010        sw      v0,16(s8)
+  60:   00071000        sll     v0,a3,0x0
+  64:   afc20014        sw      v0,20(s8)
+  68:   00081000        sll     v0,a4,0x0
+  6c:   afc20018        sw      v0,24(s8)
+  70:   03c0e82d        move    sp,s8
+  74:   dfbe0028        ld      s8,40(sp)
+  78:   dfbc0020        ld      gp,32(sp)
+  7c:   03e00008        jr      ra
+  80:   67bd0030        daddiu  sp,sp,48
+  84:   00000000        nop
+
+0000000000000088 <nonleaf_call>:
+  88:   67bdff90        daddiu  sp,sp,-112 ; |
+  8c:   ffbf0060        sd      ra,96(sp)  ; |
+  90:   ffbe0058        sd      s8,88(sp)  ; | prolog
+  94:   ffbc0050        sd      gp,80(sp)  ; |
+  98:   03a0f02d        move    s8,sp      ; |
+  9c:   3c1c0000        lui     gp,0x0     ; @@@ unsure
+  a0:   0399e02d        daddu   gp,gp,t9   ; @@@ unsure
+  a4:   679c0000        daddiu  gp,gp,0    ; @@@ unsure
+  a8:   0080102d        move    v0,a0      ; |
+  ac:   00a0182d        move    v1,a1      ; |
+  b0:   00c0202d        move    a0,a2      ; | pointlessly (?) moving a{0,1} to v{0,1} respectively,
+  b4:   00e0282d        move    a1,a3      ; | and all last 6 a? registers two regs down, freeing up
+  b8:   0100302d        move    a2,a4      ; | a{6,7}, which aren't used for anything though
+  bc:   0120382d        move    a3,a5      ; |
+  c0:   0140402d        move    a4,a6      ; |
+  c4:   0160482d        move    a5,a7      ; /
+  c8:   00021000        sll     v0,v0,0x0  ; \
+  cc:   afc20000        sw      v0,0(s8)   ; |
+  d0:   00031000        sll     v0,v1,0x0  ; |
+  d4:   afc20004        sw      v0,4(s8)   ; |
+  d8:   00041000        sll     v0,a0,0x0  ; |
+  dc:   afc20008        sw      v0,8(s8)   ; |
+  e0:   00051000        sll     v0,a1,0x0  ; |
+  e4:   afc2000c        sw      v0,12(s8)  ; | storing all register in args in local area on stack
+  e8:   00061000        sll     v0,a2,0x0  ; | (using the set of pointlessly moved-to regs, above)
+  ec:   afc20010        sw      v0,16(s8)  ; |
+  f0:   00071000        sll     v0,a3,0x0  ; |
+  f4:   afc20014        sw      v0,20(s8)  ; |
+  f8:   00081000        sll     v0,a4,0x0  ; |
+  fc:   afc20018        sw      v0,24(s8)  ; |
+ 100:   00091000        sll     v0,a5,0x0  ; |
+ 104:   afc2001c        sw      v0,28(s8)  ; |
+ 108:   67bdff10        daddiu  sp,sp,-240 ; alloca(220) - with padding to guarantee alignment
+ 10c:   ffdd0020        sd      sp,32(s8)  ; |
+ 110:   dfc30020        ld      v1,32(s8)  ; | start of alloca()'d memory -> v1, by ...
+ 114:   64620007        daddiu  v0,v1,7    ; | ... using v0 as helper to align to 8b, and some unnecessary stores/reloads instead of a move
+ 118:   000210fa        dsrl    v0,v0,0x3  ; |
+ 11c:   000210f8        dsll    v0,v0,0x3  ; |
+ 120:   ffc20020        sd      v0,32(s8)  ; move addr in v0 via local area ...
+ 124:   dfc30020        ld      v1,32(s8)  ; ... to v1
+ 128:   2402004c        li      v0,76      ; 'L' -> v0, and ...
+ 12c:   a0620000        sb      v0,0(v1)   ; ... store in local area (of alloca()'d space)
+ 130:   8fc20004        lw      v0,4(s8)   ; prep arg 0 (pointlessly) to move to a0 below
+ 134:   8fc30008        lw      v1,8(s8)   ; prep arg 1 (pointlessly) to move to a1 below
+ 138:   8fc6000c        lw      a2,12(s8)  ; arg 2
+ 13c:   8fc70010        lw      a3,16(s8)  ; arg 3
+ 140:   8fc80014        lw      a4,20(s8)  ; arg 4
+ 144:   8fc90018        lw      a5,24(s8)  ; arg 5
+ 148:   8fca001c        lw      a6,28(s8)  ; arg 6
+ 14c:   0040202d        move    a0,v0      ; arg 0
+ 150:   0060282d        move    a1,v1      ; arg 1
+ 154:   df990000        ld      t9,0(gp)   ; addr of callee -> t9
+ 158:   0320f809        jalr    t9         ; return address -> ra, and call
+ 15c:   00000000        nop                ; branch delay slot
+ 160:   03c0e82d        move    sp,s8      ; |
+ 164:   dfbf0060        ld      ra,96(sp)  ; |
+ 168:   dfbe0058        ld      s8,88(sp)  ; |
+ 16c:   dfbc0050        ld      gp,80(sp)  ; | epilog
+ 170:   03e00008        jr      ra         ; |
+ 174:   67bd0070        daddiu  sp,sp,112  ; |         branch delay slot style
+
+0000000000000178 <main>:
+ 178:   67bdffe0        daddiu  sp,sp,-32  ; |
+ 17c:   ffbf0010        sd      ra,16(sp)  ; |
+ 180:   ffbe0008        sd      s8,8(sp)   ; | prolog
+ 184:   ffbc0000        sd      gp,0(sp)   ; |
+ 188:   03a0f02d        move    s8,sp      ; |
+ 18c:   3c1c0000        lui     gp,0x0     ; @@@ unsure
+ 190:   0399e02d        daddu   gp,gp,t9   ; @@@ unsure
+ 194:   679c0000        daddiu  gp,gp,0    ; @@@ unsure
+ 198:   0000202d        move    a0,zero    ; arg 0
+ 19c:   24050001        li      a1,1       ; arg 1
+ 1a0:   24060002        li      a2,2       ; arg 2
+ 1a4:   24070003        li      a3,3       ; arg 3
+ 1a8:   24080004        li      a4,4       ; arg 4
+ 1ac:   24090005        li      a5,5       ; arg 5
+ 1b0:   240a0006        li      a6,6       ; arg 6
+ 1b4:   240b0007        li      a7,7       ; arg 7
+ 1b8:   df990000        ld      t9,0(gp)   ; address of callee -> t9
+ 1bc:   0320f809        jalr    t9         ; return address -> ra, and call
+ 1c0:   00000000        nop                ; branch delay slot
+ 1c4:   0000102d        move    v0,zero    ; return value
+ 1c8:   03c0e82d        move    sp,s8      ; |
+ 1cc:   dfbf0010        ld      ra,16(sp)  ; |
+ 1d0:   dfbe0008        ld      s8,8(sp)   ; |
+ 1d4:   dfbc0000        ld      gp,0(sp)   ; | epilog
+ 1d8:   03e00008        jr      ra         ; |
+ 1dc:   67bd0020        daddiu  sp,sp,32   ; |         branch delay slot style
+
+
+
+; output from debian-sid_20150616-malta_mips64el_n64 w/ gcc 4.9.2
+
+0000000000000000 <leaf_call>:
+   0:   67bdffd0        daddiu  sp,sp,-48
+   4:   ffbe0028        sd      s8,40(sp)
+   8:   03a0f02d        move    s8,sp
+   c:   0080602d        move    t0,a0
+  10:   00a0582d        move    a7,a1
+  14:   00e0282d        move    a1,a3
+  18:   0100202d        move    a0,a4
+  1c:   0120182d        move    v1,a5
+  20:   0140102d        move    v0,a6
+  24:   000c3800        sll     a3,t0,0x0
+  28:   afc70000        sw      a3,0(s8)
+  2c:   000b3800        sll     a3,a7,0x0
+  30:   afc70004        sw      a3,4(s8)
+  34:   00063000        sll     a2,a2,0x0
+  38:   afc60008        sw      a2,8(s8)
+  3c:   00052800        sll     a1,a1,0x0
+  40:   afc5000c        sw      a1,12(s8)
+  44:   00042000        sll     a0,a0,0x0
+  48:   afc40010        sw      a0,16(s8)
+  4c:   00031800        sll     v1,v1,0x0
+  50:   afc30014        sw      v1,20(s8)
+  54:   00021000        sll     v0,v0,0x0
+  58:   afc20018        sw      v0,24(s8)
+  5c:   03c0e82d        move    sp,s8
+  60:   dfbe0028        ld      s8,40(sp)
+  64:   67bd0030        daddiu  sp,sp,48
+  68:   03e00008        jr      ra
+  6c:   00200825        move    at,at
+
+0000000000000070 <nonleaf_call>:
+  70:   67bdffc0        daddiu  sp,sp,-64  ; |
+  74:   ffbf0038        sd      ra,56(sp)  ; |
+  78:   ffbe0030        sd      s8,48(sp)  ; | prolog
+  7c:   ffbc0028        sd      gp,40(sp)  ; |
+  80:   03a0f02d        move    s8,sp      ; |
+  84:   3c1c0000        lui     gp,0x0     ; @@@ unsure
+  88:   0399e02d        daddu   gp,gp,t9   ; @@@ unsure
+  8c:   679c0000        daddiu  gp,gp,0    ; @@@ unsure
+  90:   0080702d        move    t2,a0      ; |
+  94:   00a0682d        move    t1,a1      ; |
+  98:   00c0602d        move    t0,a2      ; | pointlessly (?) moving regs around, freeing effectively
+  9c:   00e0302d        move    a2,a3      ; | some registers for use below, still unnecessary though
+  a0:   0100282d        move    a1,a4      ; | with different code below
+  a4:   0120202d        move    a0,a5      ; |
+  a8:   0140182d        move    v1,a6      ; |
+  ac:   0160102d        move    v0,a7      ; /
+  b0:   000e3800        sll     a3,t2,0x0  ; \
+  b4:   afc70000        sw      a3,0(s8)   ; |
+  b8:   000d3800        sll     a3,t1,0x0  ; |
+  bc:   afc70004        sw      a3,4(s8)   ; |
+  c0:   000c3800        sll     a3,t0,0x0  ; |
+  c4:   afc70008        sw      a3,8(s8)   ; |
+  c8:   00063000        sll     a2,a2,0x0  ; |
+  cc:   afc6000c        sw      a2,12(s8)  ; | storing all register in args in local area on stack
+  d0:   00052800        sll     a1,a1,0x0  ; | (using the set of pointlessly moved-to regs, above)
+  d4:   afc50010        sw      a1,16(s8)  ; |
+  d8:   00042000        sll     a0,a0,0x0  ; |
+  dc:   afc40014        sw      a0,20(s8)  ; |
+  e0:   00031800        sll     v1,v1,0x0  ; |
+  e4:   afc30018        sw      v1,24(s8)  ; |
+  e8:   00021000        sll     v0,v0,0x0  ; |
+  ec:   afc2001c        sw      v0,28(s8)  ; |
+  f0:   67bdff10        daddiu  sp,sp,-240 ; alloca(220) - with padding to guarantee alignment
+  f4:   03a0102d        move    v0,sp      ; |
+  f8:   6442000f        daddiu  v0,v0,15   ; | start of alloca()'d memory -> v1, by ...
+  fc:   0002113a        dsrl    v0,v0,0x4  ; | ... using v0 as helper to align to 8b
+ 100:   00021138        dsll    v0,v0,0x4  ; |
+ 104:   0040182d        move    v1,v0      ; |
+ 108:   2402004c        li      v0,76      ; 'L' -> v0, and ...
+ 10c:   a0620000        sb      v0,0(v1)   ; ... store in local area (of alloca()'d space)
+ 110:   8fc40004        lw      a0,4(s8)   ; arg 0
+ 114:   8fc50008        lw      a1,8(s8)   ; arg 1
+ 118:   8fc6000c        lw      a2,12(s8)  ; arg 2
+ 11c:   8fc70010        lw      a3,16(s8)  ; arg 3
+ 120:   8fc80014        lw      a4,20(s8)  ; arg 4
+ 124:   8fc30018        lw      v1,24(s8)  ; prep arg 5 (pointlessly) to move to a5 below
+ 128:   8fc2001c        lw      v0,28(s8)  ; prep arg 5 (pointlessly) to move to a5 below
+ 12c:   0060482d        move    a5,v1      ; arg 5
+ 130:   0040502d        move    a6,v0      ; arg 6
+ 134:   df820000        ld      v0,0(gp)   ; addr of callee ...
+ 138:   0040c82d        move    t9,v0      ; ... -> t9
+ 13c:   0320f809        jalr    t9         ; return address -> ra, and call
+ 140:   00200825        move    at,at      ; branch delay slot (effectively nop)
+ 144:   03c0e82d        move    sp,s8      ; |
+ 148:   dfbf0038        ld      ra,56(sp)  ; |
+ 14c:   dfbe0030        ld      s8,48(sp)  ; |
+ 150:   dfbc0028        ld      gp,40(sp)  ; | epilog
+ 154:   67bd0040        daddiu  sp,sp,64   ; |
+ 158:   03e00008        jr      ra         ; |
+ 15c:   00200825        move    at,at      ; |         branch delay slot (effectively nop)
+
+0000000000000160 <main>:
+ 160:   67bdffe0        daddiu  sp,sp,-32  ; |
+ 164:   ffbf0018        sd      ra,24(sp)  ; |
+ 168:   ffbe0010        sd      s8,16(sp)  ; | prolog
+ 16c:   ffbc0008        sd      gp,8(sp)   ; |
+ 170:   03a0f02d        move    s8,sp      ; |
+ 174:   3c1c0000        lui     gp,0x0     ; @@@ unsure
+ 178:   0399e02d        daddu   gp,gp,t9   ; @@@ unsure
+ 17c:   679c0000        daddiu  gp,gp,0    ; @@@ unsure
+ 180:   0000202d        move    a0,zero    ; arg 0
+ 184:   24050001        li      a1,1       ; arg 1
+ 188:   24060002        li      a2,2       ; arg 2
+ 18c:   24070003        li      a3,3       ; arg 3
+ 190:   24080004        li      a4,4       ; arg 4
+ 194:   24090005        li      a5,5       ; arg 5
+ 198:   240a0006        li      a6,6       ; arg 6
+ 19c:   240b0007        li      a7,7       ; arg 7
+ 1a0:   df820000        ld      v0,0(gp)   ; address of callee, to ...
+ 1a4:   0040c82d        move    t9,v0      ; ... t9
+ 1a8:   0320f809        jalr    t9         ; return address -> ra, and call
+ 1ac:   00200825        move    at,at      ; branch delay slot (effectively nop)
+ 1b0:   0000102d        move    v0,zero    ; return value
+ 1b4:   03c0e82d        move    sp,s8      ; |
+ 1b8:   dfbf0018        ld      ra,24(sp)  ; |
+ 1bc:   dfbe0010        ld      s8,16(sp)  ; |
+ 1c0:   dfbc0008        ld      gp,8(sp)   ; | epilog
+ 1c4:   67bd0020        daddiu  sp,sp,32   ; |
+ 1c8:   03e00008        jr      ra         ; |
+ 1cc:   00200825        move    at,at      ; |         branch delay slot (effectively nop)
+
+
+
+; ------------- var args with ints and floats to see spilling (which remains only a?-a7 regs), b/c doubles are passed via them and floats are promoted to doubles in (...) ----------->
+
+; #include <stdlib.h>
+; #include <stdarg.h>
+; 
+; void leaf_call(int b, int c, int d, int e, float f, float g, int h, int i, float j)
+; {
+; }
+; 
+; void nonleaf_call(int a, ...)
+; {
+;     int b, c, d, e, h, i;
+;     float f, g, j;
+;     va_list ap;
+;     va_start(ap, a);
+;     b = va_arg(ap, int);
+;     c = va_arg(ap, int);
+;     d = va_arg(ap, int);
+;     e = va_arg(ap, int);
+;     f = (float)va_arg(ap, double);
+;     g = (float)va_arg(ap, double);
+;     h = va_arg(ap, int);
+;     i = va_arg(ap, int);
+;     j = (float)va_arg(ap, double);
+;     /* use some local data */
+;     *(char*)alloca(220) = 'L';
+;     leaf_call(b, c, d, e, f, g, h, i, j);
+; }
+; 
+; int main()
+; {
+;     nonleaf_call(0, 1, 2, 3, 4, 5.f, 6.f, 7, 8, 9.f);
+;     return 0;
+; }
+
+
+
+; output from freebsd-12.0_r333647-malta_mips64elhf w/ gcc 4.2.1
+
+0000000000000000 <leaf_call>:
+   0:   67bdffd0        daddiu  sp,sp,-48
+   4:   ffbe0028        sd      s8,40(sp)
+   8:   ffbc0020        sd      gp,32(sp)
+   c:   03a0f02d        move    s8,sp
+  10:   3c1c0000        lui     gp,0x0
+  14:   0399e02d        daddu   gp,gp,t9
+  18:   679c0000        daddiu  gp,gp,0
+  1c:   0080102d        move    v0,a0
+  20:   00a0182d        move    v1,a1
+  24:   00c0202d        move    a0,a2
+  28:   00e0302d        move    a2,a3
+  2c:   afc80010        sw      a4,16(s8)
+  30:   afc90014        sw      a5,20(s8)
+  34:   0140282d        move    a1,a6
+  38:   0160382d        move    a3,a7
+  3c:   00021000        sll     v0,v0,0x0
+  40:   afc20000        sw      v0,0(s8)
+  44:   00031000        sll     v0,v1,0x0
+  48:   afc20004        sw      v0,4(s8)
+  4c:   00041000        sll     v0,a0,0x0
+  50:   afc20008        sw      v0,8(s8)
+  54:   00061000        sll     v0,a2,0x0
+  58:   afc2000c        sw      v0,12(s8)
+  5c:   00051000        sll     v0,a1,0x0
+  60:   afc20018        sw      v0,24(s8)
+  64:   00071000        sll     v0,a3,0x0
+  68:   afc2001c        sw      v0,28(s8)
+  6c:   03c0e82d        move    sp,s8
+  70:   dfbe0028        ld      s8,40(sp)
+  74:   dfbc0020        ld      gp,32(sp)
+  78:   03e00008        jr      ra
+  7c:   67bd0030        daddiu  sp,sp,48
+
+0000000000000080 <nonleaf_call>:
+  80:   67bdff50        daddiu  sp,sp,-176
+  84:   ffbf0060        sd      ra,96(sp)
+  88:   ffbe0058        sd      s8,88(sp)
+  8c:   ffbc0050        sd      gp,80(sp)
+  90:   03a0f02d        move    s8,sp
+  94:   3c1c0000        lui     gp,0x0
+  98:   0399e02d        daddu   gp,gp,t9
+  9c:   679c0000        daddiu  gp,gp,0
+  a0:   ffc50078        sd      a1,120(s8)
+  a4:   ffc60080        sd      a2,128(s8)
+  a8:   ffc70088        sd      a3,136(s8)
+  ac:   ffc80090        sd      a4,144(s8)
+  b0:   ffc90098        sd      a5,152(s8)
+  b4:   ffca00a0        sd      a6,160(s8)
+  b8:   ffcb00a8        sd      a7,168(s8)
+  bc:   0080102d        move    v0,a0
+  c0:   00021000        sll     v0,v0,0x0
+  c4:   afc20040        sw      v0,64(s8)
+  c8:   67c200b0        daddiu  v0,s8,176
+  cc:   6442ffc8        daddiu  v0,v0,-56
+  d0:   ffc20038        sd      v0,56(s8)
+  d4:   dfc30038        ld      v1,56(s8)
+  d8:   64620008        daddiu  v0,v1,8
+  dc:   ffc20038        sd      v0,56(s8)
+  e0:   0060102d        move    v0,v1
+  e4:   8c420000        lw      v0,0(v0)
+  e8:   afc20030        sw      v0,48(s8)
+  ec:   dfc30038        ld      v1,56(s8)
+  f0:   64620008        daddiu  v0,v1,8
+  f4:   ffc20038        sd      v0,56(s8)
+  f8:   0060102d        move    v0,v1
+  fc:   8c420000        lw      v0,0(v0)
+ 100:   afc2002c        sw      v0,44(s8)
+ 104:   dfc30038        ld      v1,56(s8)
+ 108:   64620008        daddiu  v0,v1,8
+ 10c:   ffc20038        sd      v0,56(s8)
+ 110:   0060102d        move    v0,v1
+ 114:   8c420000        lw      v0,0(v0)
+ 118:   afc20028        sw      v0,40(s8)
+ 11c:   dfc30038        ld      v1,56(s8)
+ 120:   64620008        daddiu  v0,v1,8
+ 124:   ffc20038        sd      v0,56(s8)
+ 128:   0060102d        move    v0,v1
+ 12c:   8c420000        lw      v0,0(v0)
+ 130:   afc20024        sw      v0,36(s8)
+ 134:   dfc30038        ld      v1,56(s8)
+ 138:   64620008        daddiu  v0,v1,8
+ 13c:   ffc20038        sd      v0,56(s8)
+ 140:   0060102d        move    v0,v1
+ 144:   dc420000        ld      v0,0(v0)
+ 148:   0040202d        move    a0,v0
+ 14c:   df990000        ld      t9,0(gp)
+ 150:   0320f809        jalr    t9
+ 154:   00000000        nop
+ 158:   afc20018        sw      v0,24(s8)
+ 15c:   dfc30038        ld      v1,56(s8)
+ 160:   64620008        daddiu  v0,v1,8
+ 164:   ffc20038        sd      v0,56(s8)
+ 168:   0060102d        move    v0,v1
+ 16c:   dc420000        ld      v0,0(v0)
+ 170:   0040202d        move    a0,v0
+ 174:   df990000        ld      t9,0(gp)
+ 178:   0320f809        jalr    t9
+ 17c:   00000000        nop
+ 180:   afc20014        sw      v0,20(s8)
+ 184:   dfc30038        ld      v1,56(s8)
+ 188:   64620008        daddiu  v0,v1,8
+ 18c:   ffc20038        sd      v0,56(s8)
+ 190:   0060102d        move    v0,v1
+ 194:   8c420000        lw      v0,0(v0)
+ 198:   afc20020        sw      v0,32(s8)
+ 19c:   dfc30038        ld      v1,56(s8)
+ 1a0:   64620008        daddiu  v0,v1,8
+ 1a4:   ffc20038        sd      v0,56(s8)
+ 1a8:   0060102d        move    v0,v1
+ 1ac:   8c420000        lw      v0,0(v0)
+ 1b0:   afc2001c        sw      v0,28(s8)
+ 1b4:   dfc30038        ld      v1,56(s8)
+ 1b8:   64620008        daddiu  v0,v1,8
+ 1bc:   ffc20038        sd      v0,56(s8)
+ 1c0:   0060102d        move    v0,v1
+ 1c4:   dc420000        ld      v0,0(v0)
+ 1c8:   0040202d        move    a0,v0
+ 1cc:   df990000        ld      t9,0(gp)
+ 1d0:   0320f809        jalr    t9
+ 1d4:   00000000        nop
+ 1d8:   afc20010        sw      v0,16(s8)
+ 1dc:   67bdff10        daddiu  sp,sp,-240
+ 1e0:   67a20010        daddiu  v0,sp,16
+ 1e4:   ffc20048        sd      v0,72(s8)
+ 1e8:   dfc30048        ld      v1,72(s8)
+ 1ec:   64620007        daddiu  v0,v1,7
+ 1f0:   000210fa        dsrl    v0,v0,0x3
+ 1f4:   000210f8        dsll    v0,v0,0x3
+ 1f8:   ffc20048        sd      v0,72(s8)
+ 1fc:   dfc30048        ld      v1,72(s8)
+ 200:   2402004c        li      v0,76
+ 204:   a0620000        sb      v0,0(v1)
+ 208:   8fc30030        lw      v1,48(s8)
+ 20c:   8fc5002c        lw      a1,44(s8)
+ 210:   8fc60028        lw      a2,40(s8)
+ 214:   8fc70024        lw      a3,36(s8)
+ 218:   8fca0020        lw      a6,32(s8)
+ 21c:   8fcb001c        lw      a7,28(s8)
+ 220:   8fc20010        lw      v0,16(s8)
+ 224:   afa20000        sw      v0,0(sp)
+ 228:   0060202d        move    a0,v1
+ 22c:   8fc80018        lw      a4,24(s8)
+ 230:   8fc90014        lw      a5,20(s8)
+ 234:   df990000        ld      t9,0(gp)
+ 238:   0320f809        jalr    t9
+ 23c:   00000000        nop
+ 240:   03c0e82d        move    sp,s8
+ 244:   dfbf0060        ld      ra,96(sp)
+ 248:   dfbe0058        ld      s8,88(sp)
+ 24c:   dfbc0050        ld      gp,80(sp)
+ 250:   03e00008        jr      ra
+ 254:   67bd00b0        daddiu  sp,sp,176
+
+0000000000000258 <main>:
+ 258:   67bdffd0        daddiu  sp,sp,-48
+ 25c:   ffbf0020        sd      ra,32(sp)
+ 260:   ffbe0018        sd      s8,24(sp)
+ 264:   ffbc0010        sd      gp,16(sp)
+ 268:   03a0f02d        move    s8,sp
+ 26c:   3c1c0000        lui     gp,0x0
+ 270:   0399e02d        daddu   gp,gp,t9
+ 274:   679c0000        daddiu  gp,gp,0
+ 278:   df830000        ld      v1,0(gp)
+ 27c:   dc630000        ld      v1,0(v1)
+ 280:   df8a0000        ld      a6,0(gp)
+ 284:   dd4a0000        ld      a6,0(a6)
+ 288:   24020008        li      v0,8
+ 28c:   ffa20000        sd      v0,0(sp)
+ 290:   df820000        ld      v0,0(gp)
+ 294:   dc420000        ld      v0,0(v0)
+ 298:   ffa20008        sd      v0,8(sp)
+ 29c:   0000202d        move    a0,zero
+ 2a0:   24050001        li      a1,1
+ 2a4:   24060002        li      a2,2
+ 2a8:   24070003        li      a3,3
+ 2ac:   24080004        li      a4,4
+ 2b0:   0060482d        move    a5,v1
+ 2b4:   240b0007        li      a7,7
+ 2b8:   df990000        ld      t9,0(gp)
+ 2bc:   0320f809        jalr    t9
+ 2c0:   00000000        nop
+ 2c4:   03c0e82d        move    sp,s8
+ 2c8:   dfbf0020        ld      ra,32(sp)
+ 2cc:   dfbe0018        ld      s8,24(sp)
+ 2d0:   dfbc0010        ld      gp,16(sp)
+ 2d4:   03e00008        jr      ra
+ 2d8:   67bd0030        daddiu  sp,sp,48
+ 2dc:   00000000        nop
+
+
+
+; output from debian-sid_20150616-malta_mips64el_n64 w/ gcc 4.9.2
+
+0000000000000000 <leaf_call>:
+   0:   67bdffd0        daddiu  sp,sp,-48
+   4:   ffbe0028        sd      s8,40(sp)
+   8:   03a0f02d        move    s8,sp
+   c:   0080482d        move    a5,a0
+  10:   00a0402d        move    a4,a1
+  14:   00c0282d        move    a1,a2
+  18:   00e0202d        move    a0,a3
+  1c:   e7d00010        swc1    $f16,16(s8)
+  20:   e7d10014        swc1    $f17,20(s8)
+  24:   0140182d        move    v1,a6
+  28:   0160102d        move    v0,a7
+  2c:   00093000        sll     a2,a5,0x0
+  30:   afc60000        sw      a2,0(s8)
+  34:   00083000        sll     a2,a4,0x0
+  38:   afc60004        sw      a2,4(s8)
+  3c:   00052800        sll     a1,a1,0x0
+  40:   afc50008        sw      a1,8(s8)
+  44:   00042000        sll     a0,a0,0x0
+  48:   afc4000c        sw      a0,12(s8)
+  4c:   00031800        sll     v1,v1,0x0
+  50:   afc30018        sw      v1,24(s8)
+  54:   00021000        sll     v0,v0,0x0
+  58:   afc2001c        sw      v0,28(s8)
+  5c:   03c0e82d        move    sp,s8
+  60:   dfbe0028        ld      s8,40(sp)
+  64:   67bd0030        daddiu  sp,sp,48
+  68:   03e00008        jr      ra
+  6c:   00200825        move    at,at
+
+0000000000000070 <nonleaf_call>:
+  70:   67bdff50        daddiu  sp,sp,-176  ; |         leaving 64b extra space adjacent to prev frame's param area for spilling
+  74:   ffbf0068        sd      ra,104(sp)  ; |
+  78:   ffbe0060        sd      s8,96(sp)   ; | prolog
+  7c:   ffbc0058        sd      gp,88(sp)   ; |
+  80:   03a0f02d        move    s8,sp       ; |
+  84:   3c1c0000        lui     gp,0x0      ; @@@ unsure
+  88:   0399e02d        daddu   gp,gp,t9    ; @@@ unsure
+  8c:   679c0000        daddiu  gp,gp,0     ; @@@ unsure
+  90:   ffc50078        sd      a1,120(s8)  ; |
+  94:   ffc60080        sd      a2,128(s8)  ; |
+  98:   ffc70088        sd      a3,136(s8)  ; |
+  9c:   ffc80090        sd      a4,144(s8)  ; | in args 1,2,3,4,5,6,7 -> spill area in current frame (adjacent to prev frame's param area)
+  a0:   ffc90098        sd      a5,152(s8)  ; |
+  a4:   ffca00a0        sd      a6,160(s8)  ; |
+  a8:   ffcb00a8        sd      a7,168(s8)  ; /
+  ac:   0080102d        move    v0,a0       ; \
+  b0:   00021000        sll     v0,v0,0x0   ; |
+  b4:   afc20040        sw      v0,64(s8)   ; |
+  b8:   67c200b0        daddiu  v0,s8,176   ; |
+  bc:   6442ffc8        daddiu  v0,v0,-56   ; |
+  c0:   ffc20038        sd      v0,56(s8)   ; |
+  c4:   dfc20038        ld      v0,56(s8)   ; |
+  c8:   64430008        daddiu  v1,v0,8     ; |
+  cc:   ffc30038        sd      v1,56(s8)   ; |
+  d0:   8c420000        lw      v0,0(v0)    ; |
+  d4:   afc20010        sw      v0,16(s8)   ; |
+  d8:   dfc20038        ld      v0,56(s8)   ; |
+  dc:   64430008        daddiu  v1,v0,8     ; |
+  e0:   ffc30038        sd      v1,56(s8)   ; |
+  e4:   8c420000        lw      v0,0(v0)    ; |
+  e8:   afc20014        sw      v0,20(s8)   ; |
+  ec:   dfc20038        ld      v0,56(s8)   ; |
+  f0:   64430008        daddiu  v1,v0,8     ; |
+  f4:   ffc30038        sd      v1,56(s8)   ; |
+  f8:   8c420000        lw      v0,0(v0)    ; |
+  fc:   afc20018        sw      v0,24(s8)   ; |
+ 100:   dfc20038        ld      v0,56(s8)   ; |
+ 104:   64430008        daddiu  v1,v0,8     ; |
+ 108:   ffc30038        sd      v1,56(s8)   ; |
+ 10c:   8c420000        lw      v0,0(v0)    ; |
+ 110:   afc2001c        sw      v0,28(s8)   ; |
+ 114:   dfc20038        ld      v0,56(s8)   ; |
+ 118:   64430008        daddiu  v1,v0,8     ; |
+ 11c:   ffc30038        sd      v1,56(s8)   ; |
+ 120:   dc420000        ld      v0,0(v0)    ; |
+ 124:   44a20000        dmtc1   v0,$f0      ; | vararg stuff: pointer to beginning of spill area (constantly
+ 128:   46200020        cvt.s.d $f0,$f0     ; | stored and reloaded) to iterate over all params which are stored 
+ 12c:   44020000        mfc1    v0,$f0      ; | to a local space on stack, which they are refetched from, below
+ 130:   afc20020        sw      v0,32(s8)   ; | (see similar mips32 examples for detailed analysis)
+ 134:   dfc20038        ld      v0,56(s8)   ; |
+ 138:   64430008        daddiu  v1,v0,8     ; |
+ 13c:   ffc30038        sd      v1,56(s8)   ; |
+ 140:   dc420000        ld      v0,0(v0)    ; |
+ 144:   44a20000        dmtc1   v0,$f0      ; |
+ 148:   46200020        cvt.s.d $f0,$f0     ; |
+ 14c:   44020000        mfc1    v0,$f0      ; |
+ 150:   afc20024        sw      v0,36(s8)   ; |
+ 154:   dfc20038        ld      v0,56(s8)   ; |
+ 158:   64430008        daddiu  v1,v0,8     ; |
+ 15c:   ffc30038        sd      v1,56(s8)   ; |
+ 160:   8c420000        lw      v0,0(v0)    ; |
+ 164:   afc20028        sw      v0,40(s8)   ; |
+ 168:   dfc20038        ld      v0,56(s8)   ; |
+ 16c:   64430008        daddiu  v1,v0,8     ; |
+ 170:   ffc30038        sd      v1,56(s8)   ; |
+ 174:   8c420000        lw      v0,0(v0)    ; |
+ 178:   afc2002c        sw      v0,44(s8)   ; |
+ 17c:   dfc20038        ld      v0,56(s8)   ; |
+ 180:   64430008        daddiu  v1,v0,8     ; |
+ 184:   ffc30038        sd      v1,56(s8)   ; |
+ 188:   dc420000        ld      v0,0(v0)    ; |
+ 18c:   44a20000        dmtc1   v0,$f0      ; |
+ 190:   46200020        cvt.s.d $f0,$f0     ; |
+ 194:   44020000        mfc1    v0,$f0      ; |
+ 198:   afc20030        sw      v0,48(s8)   ; |
+ 19c:   67bdff10        daddiu  sp,sp,-240  ; alloca(220) - with padding to guarantee alignment
+ 1a0:   67a20010        daddiu  v0,sp,16    ; |
+ 1a4:   6442000f        daddiu  v0,v0,15    ; |
+ 1a8:   0002113a        dsrl    v0,v0,0x4   ; | start of alloca()'d memory -> v1, by ...
+ 1ac:   00021138        dsll    v0,v0,0x4   ; | ... using v0 as helper to align to 16b
+ 1b0:   0040182d        move    v1,v0       ; |
+ 1b4:   2402004c        li      v0,76       ; 'L' -> v0, and ...
+ 1b8:   a0620000        sb      v0,0(v1)    ; ... store in local area (of alloca()'d space)
+ 1bc:   8fc40010        lw      a0,16(s8)   ; arg 0
+ 1c0:   8fc50014        lw      a1,20(s8)   ; arg 1
+ 1c4:   8fc60018        lw      a2,24(s8)   ; arg 2
+ 1c8:   8fc7001c        lw      a3,28(s8)   ; arg 3
+ 1cc:   8fc80028        lw      a4,40(s8)   ; prepare arg 6 (using a4 only as temp reg), to move to a6 below
+ 1d0:   8fc3002c        lw      v1,44(s8)   ; prepare arg 7 (pointlessly) to move to a7 below
+ 1d4:   8fc20030        lw      v0,48(s8)   ; arg 8, and ...
+ 1d8:   afa20000        sw      v0,0(sp)    ; ... "pushed" onto stack
+ 1dc:   c7d00020        lwc1    $f16,32(s8) ; arg 4 (so skipping f12-f15)
+ 1e0:   c7d10024        lwc1    $f17,36(s8) ; arg 5 (so skipping f12-f15)
+ 1e4:   0100502d        move    a6,a4       ; arg 6 (from a4 used as temp reg, pointlessly)
+ 1e8:   0060582d        move    a7,v1       ; arg 7
+ 1ec:   df820000        ld      v0,0(gp)    ; address of callee, to ...
+ 1f0:   0040c82d        move    t9,v0       ; ... t9
+ 1f4:   0320f809        jalr    t9          ; return address -> ra, and call
+ 1f8:   00200825        move    at,at       ; branch delay slot (effectively nop)
+ 1fc:   03c0e82d        move    sp,s8       ; |
+ 200:   dfbf0068        ld      ra,104(sp)  ; |
+ 204:   dfbe0060        ld      s8,96(sp)   ; |
+ 208:   dfbc0058        ld      gp,88(sp)   ; | epilog
+ 20c:   67bd00b0        daddiu  sp,sp,176   ; |
+ 210:   03e00008        jr      ra          ; |
+ 214:   00200825        move    at,at       ; |         branch delay slot (effectively nop)
+
+0000000000000218 <main>:
+ 218:   67bdffd0        daddiu  sp,sp,-48   ; |
+ 21c:   ffbf0028        sd      ra,40(sp)   ; |
+ 220:   ffbe0020        sd      s8,32(sp)   ; | prolog
+ 224:   ffbc0018        sd      gp,24(sp)   ; |
+ 228:   03a0f02d        move    s8,sp       ; |
+ 22c:   3c1c0000        lui     gp,0x0      ; unsure@@@
+ 230:   0399e02d        daddu   gp,gp,t9    ; unsure@@@
+ 234:   679c0000        daddiu  gp,gp,0     ; unsure@@@
+ 238:   df820000        ld      v0,0(gp)    ; arg 6 (float promoted to double), from static data (0 b/c objdmp is from .o, not final linked exec), ...
+ 23c:   d4410000        ldc1    $f1,0(v0)   ; ... to f1
+ 240:   df820000        ld      v0,0(gp)    ; arg 5 (float promoted to double), from static data (0 b/c objdmp is from .o, not final linked exec), ...
+ 244:   d4400000        ldc1    $f0,0(v0)   ; ... to f0
+ 248:   24020008        li      v0,8        ; arg 8, ...
+ 24c:   ffa20000        sd      v0,0(sp)    ; ... "pushed" onto stack
+ 250:   df820000        ld      v0,0(gp)    ; arg 9 (float promoted to double), from static data (0 b/c objdmp is from .o, not final linked exec), ...
+ 254:   dc420000        ld      v0,0(v0)    ; ... via v0 ...
+ 258:   ffa20008        sd      v0,8(sp)    ; ... "pushed" onto stack
+ 25c:   0000202d        move    a0,zero     ; arg 0
+ 260:   24050001        li      a1,1        ; arg 1
+ 264:   24060002        li      a2,2        ; arg 2
+ 268:   24070003        li      a3,3        ; arg 3
+ 26c:   24080004        li      a4,4        ; arg 4
+ 270:   44290800        dmfc1   a5,$f1      ; arg 5 (note: passed in a5 b/c vararg)
+ 274:   442a0000        dmfc1   a6,$f0      ; arg 6 (note: passed in a6 b/c vararg)
+ 278:   240b0007        li      a7,7        ; arg 7
+ 27c:   df820000        ld      v0,0(gp)    ; address of callee, to ...
+ 280:   0040c82d        move    t9,v0       ; ... t9
+ 284:   0320f809        jalr    t9          ; return address -> ra, and call
+ 288:   00200825        move    at,at       ; branch delay slot (effectively nop)
+ 28c:   03c0e82d        move    sp,s8       ; |
+ 290:   dfbf0028        ld      ra,40(sp)   ; |
+ 294:   dfbe0020        ld      s8,32(sp)   ; |
+ 298:   dfbc0018        ld      gp,24(sp)   ; |
+ 29c:   67bd0030        daddiu  sp,sp,48    ; | epilog
+ 2a0:   03e00008        jr      ra          ; |
+ 2a4:   00200825        move    at,at       ; |         branch delay slot (effectively nop)
+ 2a8:   00200825        move    at,at       ; |         ? @@@
+ 2ac:   00200825        move    at,at       ; |         ? @@@
+
+; vim: ft=asm
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/doc/disas_examples/ppc.darwin.disas	Fri Nov 22 23:08:59 2019 +0100
@@ -0,0 +1,409 @@
+; #include <stdlib.h>
+; 
+; void leaf_call(int b, int c, int d, int e, int f, int g, int h)
+; {
+; }
+; 
+; void nonleaf_call(int a, int b, int c, int d, int e, int f, int g, int h)
+; {
+; 	/* use some local data */
+; 	*(char*)alloca(220) = 'L';
+; 	leaf_call(b, c, d, e, f, g, h);
+; }
+; 
+; int main()
+; {
+; 	nonleaf_call(0, 1, 2, 3, 4, 5, 6, 7);
+; 	return 0;
+; }
+
+
+
+; output from darwin-8.0.1-ppc w/ gcc 3.3
+
+_leaf_call:
+       0:       bf c1 ff f8     stmw 30, -8(1)
+       4:       94 21 ff d0     stwu 1, -48(1)
+       8:       7c 3e 0b 78     mr 30, 1
+       c:       90 7e 00 48     stw 3, 72(30)
+      10:       90 9e 00 4c     stw 4, 76(30)
+      14:       90 be 00 50     stw 5, 80(30)
+      18:       90 de 00 54     stw 6, 84(30)
+      1c:       90 fe 00 58     stw 7, 88(30)
+      20:       91 1e 00 5c     stw 8, 92(30)
+      24:       91 3e 00 60     stw 9, 96(30)
+      28:       80 21 00 00     lwz 1, 0(1)
+      2c:       bb c1 ff f8     lmw 30, -8(1)
+      30:       4e 80 00 20     blr
+
+_nonleaf_call:
+      34:       7c 08 02 a6     mflr 0          ; |         lr -> gpr0
+      38:       bf c1 ff f8     stmw 30, -8(1)  ; |         store gpr{30,31}
+      3c:       90 01 00 08     stw 0, 8(1)     ; | prolog  store lr
+      40:       94 21 ff b0     stwu 1, -80(1)  ; |         open frame and store sp at top of stack
+      44:       7c 3e 0b 78     mr 30, 1        ; /         sp -> gpr30, latter used for some fixed addressing below
+      48:       90 7e 00 68     stw 3, 104(30)  ; \
+      4c:       90 9e 00 6c     stw 4, 108(30)  ; |
+      50:       90 be 00 70     stw 5, 112(30)  ; |
+      54:       90 de 00 74     stw 6, 116(30)  ; |
+      58:       90 fe 00 78     stw 7, 120(30)  ; | all in args -> spill area in prev frame
+      5c:       91 1e 00 7c     stw 8, 124(30)  ; |
+      60:       91 3e 00 80     stw 9, 128(30)  ; |
+      64:       91 5e 00 84     stw 10, 132(30) ; |
+      68:       80 01 00 00     lwz 0, 0(1)     ; fetch sp saved on stack of top by prolog -> gpr0, and ...
+      6c:       94 01 ff 10     stwu 0, -240(1) ; ... update it further up the stack for alloca(220) - with padding to guarantee alignment
+      70:       38 41 00 40     addi 2, 1, 64   ; |
+      74:       38 02 00 0f     addi 0, 2, 15   ; | start of alloca()'d memory -> gpr2, by ...
+      78:       54 00 e1 3e     srwi 0, 0, 4    ; | ... using gpr0 as helper to align to 16b, leaving at least 64b at top of stack
+      7c:       54 02 20 36     slwi 2, 0, 4    ; |
+      80:       38 00 00 4c     li 0, 76        ; 'L' -> gpr0, and ...
+      84:       98 02 00 00     stb 0, 0(2)     ; ... store in local area (of alloca()'d space)
+      88:       80 7e 00 6c     lwz 3, 108(30)  ; |
+      8c:       80 9e 00 70     lwz 4, 112(30)  ; |
+      90:       80 be 00 74     lwz 5, 116(30)  ; |
+      94:       80 de 00 78     lwz 6, 120(30)  ; | arg 0,1,2,3,4,5,6 (fetched from spill area from prev frame)
+      98:       80 fe 00 7c     lwz 7, 124(30)  ; |
+      9c:       81 1e 00 80     lwz 8, 128(30)  ; |
+      a0:       81 3e 00 84     lwz 9, 132(30)  ; |
+      a4:       4b ff ff 5d     bl .+67108700   ; call and put return address -> lr
+      a8:       80 21 00 00     lwz 1, 0(1)     ; |
+      ac:       80 01 00 08     lwz 0, 8(1)     ; |
+      b0:       7c 08 03 a6     mtlr 0          ; | epilog
+      b4:       bb c1 ff f8     lmw 30, -8(1)   ; |
+      b8:       4e 80 00 20     blr             ; |
+
+_main:
+      bc:       7c 08 02 a6     mflr 0          ; |
+      c0:       bf c1 ff f8     stmw 30, -8(1)  ; |
+      c4:       90 01 00 08     stw 0, 8(1)     ; | prolog
+      c8:       94 21 ff b0     stwu 1, -80(1)  ; |
+      cc:       7c 3e 0b 78     mr 30, 1        ; |
+      d0:       38 60 00 00     li 3, 0         ; arg 0
+      d4:       38 80 00 01     li 4, 1         ; arg 1
+      d8:       38 a0 00 02     li 5, 2         ; arg 2
+      dc:       38 c0 00 03     li 6, 3         ; arg 3
+      e0:       38 e0 00 04     li 7, 4         ; arg 4
+      e4:       39 00 00 05     li 8, 5         ; arg 5
+      e8:       39 20 00 06     li 9, 6         ; arg 6
+      ec:       39 40 00 07     li 10, 7        ; arg 7
+      f0:       4b ff ff 45     bl .+67108676   ; call and put return address -> lr
+      f4:       38 00 00 00     li 0, 0         ; return value (pointlessly) via gpr0 ...
+      f8:       7c 03 03 78     mr 3, 0         ; ... to gpr3
+      fc:       80 21 00 00     lwz 1, 0(1)     ; |
+     100:       80 01 00 08     lwz 0, 8(1)     ; |
+     104:       7c 08 03 a6     mtlr 0          ; | epilog
+     108:       bb c1 ff f8     lmw 30, -8(1)   ; |
+     10c:       4e 80 00 20     blr             ; |
+
+
+
+; ------------- more than 8 int args ----------->
+
+; #include <stdlib.h>
+; 
+; void leaf_call(int b, int c, int d, int e, int f, int g, int h, int i, int j)
+; {
+; }
+; 
+; void nonleaf_call(int a, int b, int c, int d, int e, int f, int g, int h, int i, int j)
+; {
+; 	/* use some local data */
+; 	*(char*)alloca(220) = 'L';
+; 	leaf_call(b, c, d, e, f, g, h, i, j);
+; }
+; 
+; int main()
+; {
+; 	nonleaf_call(0, 1, 2, 3, 4, 5, 6, 7, 8, 9);
+; 	return 0;
+; }
+
+
+
+; output from darwin-8.0.1-ppc w/ gcc 3.3
+
+_leaf_call:
+       0:       bf c1 ff f8     stmw 30, -8(1)
+       4:       94 21 ff d0     stwu 1, -48(1)
+       8:       7c 3e 0b 78     mr 30, 1
+       c:       90 7e 00 48     stw 3, 72(30)
+      10:       90 9e 00 4c     stw 4, 76(30)
+      14:       90 be 00 50     stw 5, 80(30)
+      18:       90 de 00 54     stw 6, 84(30)
+      1c:       90 fe 00 58     stw 7, 88(30)
+      20:       91 1e 00 5c     stw 8, 92(30)
+      24:       91 3e 00 60     stw 9, 96(30)
+      28:       91 5e 00 64     stw 10, 100(30)
+      2c:       80 21 00 00     lwz 1, 0(1)
+      30:       bb c1 ff f8     lmw 30, -8(1)
+      34:       4e 80 00 20     blr
+
+_nonleaf_call:
+      38:       7c 08 02 a6     mflr 0          ; |
+      3c:       bf c1 ff f8     stmw 30, -8(1)  ; |
+      40:       90 01 00 08     stw 0, 8(1)     ; | prolog
+      44:       94 21 ff a0     stwu 1, -96(1)  ; |
+      48:       7c 3e 0b 78     mr 30, 1        ; /
+      4c:       90 7e 00 78     stw 3, 120(30)  ; \
+      50:       90 9e 00 7c     stw 4, 124(30)  ; |
+      54:       90 be 00 80     stw 5, 128(30)  ; |
+      58:       90 de 00 84     stw 6, 132(30)  ; |
+      5c:       90 fe 00 88     stw 7, 136(30)  ; | in args 0,1,2,3,4,5,6,7 -> spill area in prev frame
+      60:       91 1e 00 8c     stw 8, 140(30)  ; |
+      64:       91 3e 00 90     stw 9, 144(30)  ; |
+      68:       91 5e 00 94     stw 10, 148(30) ; |
+      6c:       80 01 00 00     lwz 0, 0(1)     ; fetch sp saved on stack of top by prolog -> gpr0, and ...
+      70:       94 01 ff 10     stwu 0, -240(1) ; ... update it further up the stack for alloca(220) - with padding to guarantee alignment
+      74:       38 41 00 50     addi 2, 1, 80   ; |
+      78:       38 02 00 0f     addi 0, 2, 15   ; | start of alloca()'d memory -> gpr2, by ...
+      7c:       54 00 e1 3e     srwi 0, 0, 4    ; | ... using gpr0 as helper to align to 16b, leaving at least 64b at top of stack
+      80:       54 02 20 36     slwi 2, 0, 4    ; |
+      84:       38 00 00 4c     li 0, 76        ; 'L' -> gpr0, and ...
+      88:       98 02 00 00     stb 0, 0(2)     ; ... store in local area (of alloca()'d space)
+      8c:       80 1e 00 9c     lwz 0, 156(30)  ; arg 7 (fetched from stack param area from prev frame), and ...
+      90:       90 01 00 38     stw 0, 56(1)    ; ... "pushed" onto stack
+      94:       80 7e 00 7c     lwz 3, 124(30)  ; |
+      98:       80 9e 00 80     lwz 4, 128(30)  ; |
+      9c:       80 be 00 84     lwz 5, 132(30)  ; |
+      a0:       80 de 00 88     lwz 6, 136(30)  ; | arg 0,1,2,3,4,5,6 (fetched from spill area from prev frame)
+      a4:       80 fe 00 8c     lwz 7, 140(30)  ; |
+      a8:       81 1e 00 90     lwz 8, 144(30)  ; |
+      ac:       81 3e 00 94     lwz 9, 148(30)  ; |
+      b0:       81 5e 00 98     lwz 10, 152(30) ; arg 7 (fetched from stack param area from prev frame)
+      b4:       4b ff ff 4d     bl .+67108684   ; call and put return address -> lr
+      b8:       80 21 00 00     lwz 1, 0(1)     ; |
+      bc:       80 01 00 08     lwz 0, 8(1)     ; |
+      c0:       7c 08 03 a6     mtlr 0          ; | epilog
+      c4:       bb c1 ff f8     lmw 30, -8(1)   ; |
+      c8:       4e 80 00 20     blr             ; |
+
+_main:
+      cc:       7c 08 02 a6     mflr 0          ; |
+      d0:       bf c1 ff f8     stmw 30, -8(1)  ; |
+      d4:       90 01 00 08     stw 0, 8(1)     ; | prolog
+      d8:       94 21 ff a0     stwu 1, -96(1)  ; |
+      dc:       7c 3e 0b 78     mr 30, 1        ; |
+      e0:       38 00 00 08     li 0, 8         ; arg 8, ...
+      e4:       90 01 00 38     stw 0, 56(1)    ; ... "pushed" onto stack
+      e8:       38 00 00 09     li 0, 9         ; arg 9, ...
+      ec:       90 01 00 3c     stw 0, 60(1)    ; ... "pushed" onto stack
+      f0:       38 60 00 00     li 3, 0         ; arg 0
+      f4:       38 80 00 01     li 4, 1         ; arg 1
+      f8:       38 a0 00 02     li 5, 2         ; arg 2
+      fc:       38 c0 00 03     li 6, 3         ; arg 3
+     100:       38 e0 00 04     li 7, 4         ; arg 4
+     104:       39 00 00 05     li 8, 5         ; arg 5
+     108:       39 20 00 06     li 9, 6         ; arg 6
+     10c:       39 40 00 07     li 10, 7        ; arg 7
+     110:       4b ff ff 29     bl .+67108648   ; call and put return address -> lr
+     114:       38 00 00 00     li 0, 0         ; return value (pointlessly) via gpr0 ...
+     118:       7c 03 03 78     mr 3, 0         ; ... to gpr3
+     11c:       80 21 00 00     lwz 1, 0(1)     ; |
+     120:       80 01 00 08     lwz 0, 8(1)     ; |
+     124:       7c 08 03 a6     mtlr 0          ; | epilog
+     128:       bb c1 ff f8     lmw 30, -8(1)   ; |
+     12c:       4e 80 00 20     blr             ; |
+
+
+
+; ------------- var args with ints and floats to see spilling (which remains only int regs), b/c doubles are passed via them and floats are promoted to doubles in (...) ----------->
+
+; #include <stdlib.h>
+; #include <stdarg.h>
+; 
+; void leaf_call(int b, int c, int d, int e, float f, float g, int h, int i, float j)
+; {
+; }
+; 
+; void nonleaf_call(int a, ...)
+; {
+;     int b, c, d, e, h, i;
+;     float f, g, j;
+;     va_list ap;
+;     va_start(ap, a);
+;     b = va_arg(ap, int);
+;     c = va_arg(ap, int);
+;     d = va_arg(ap, int);
+;     e = va_arg(ap, int);
+;     f = (float)va_arg(ap, double);
+;     g = (float)va_arg(ap, double);
+;     h = va_arg(ap, int);
+;     i = va_arg(ap, int);
+;     j = (float)va_arg(ap, double);
+;     /* use some local data */
+;     *(char*)alloca(220) = 'L';
+;     leaf_call(b, c, d, e, f, g, h, i, j);
+; }
+; 
+; int main()
+; {
+;     nonleaf_call(0, 1, 2, 3, 4, 5.f, 6.f, 7, 8, 9.f);
+;     return 0;
+; }
+
+
+
+; output from darwin-8.0.1-ppc w/ gcc 3.3
+
+_leaf_call:
+       0:       bf c1 ff f8     stmw 30, -8(1)
+       4:       94 21 ff d0     stwu 1, -48(1)
+       8:       7c 3e 0b 78     mr 30, 1
+       c:       90 7e 00 48     stw 3, 72(30)
+      10:       90 9e 00 4c     stw 4, 76(30)
+      14:       90 be 00 50     stw 5, 80(30)
+      18:       90 de 00 54     stw 6, 84(30)
+      1c:       d0 3e 00 58     stfs 1, 88(30)
+      20:       d0 5e 00 5c     stfs 2, 92(30)
+      24:       91 3e 00 60     stw 9, 96(30)
+      28:       91 5e 00 64     stw 10, 100(30)
+      2c:       d0 7e 00 68     stfs 3, 104(30)
+      30:       80 21 00 00     lwz 1, 0(1)
+      34:       bb c1 ff f8     lmw 30, -8(1)
+      38:       4e 80 00 20     blr
+
+_nonleaf_call:
+      3c:       7c 08 02 a6     mflr 0          ; |
+      40:       bf c1 ff f8     stmw 30, -8(1)  ; |
+      44:       90 01 00 08     stw 0, 8(1)     ; | prolog
+      48:       94 21 ff 70     stwu 1, -144(1) ; |
+      4c:       7c 3e 0b 78     mr 30, 1        ; /
+      50:       90 9e 00 ac     stw 4, 172(30)  ; \
+      54:       90 be 00 b0     stw 5, 176(30)  ; |
+      58:       90 de 00 b4     stw 6, 180(30)  ; |
+      5c:       90 fe 00 b8     stw 7, 184(30)  ; |
+      60:       91 1e 00 bc     stw 8, 188(30)  ; | in args ,1,2,3,4,5,6,7 -> spill area in prev frame
+      64:       91 3e 00 c0     stw 9, 192(30)  ; |
+      68:       91 5e 00 c4     stw 10, 196(30) ; |
+      6c:       90 7e 00 a8     stw 3, 168(30)  ; |                  <- this is in arg 0, the only named arg
+      70:       38 1e 00 ac     addi 0, 30, 172 ; get pointer to first unnamed arg in gpr0 for vararg iteration, ...
+      74:       90 1e 00 74     stw 0, 116(30)  ; ... and store read ptr in local area
+      78:       81 3e 00 74     lwz 9, 116(30)  ; \              read ptr -> gpr0
+      7c:       80 5e 00 74     lwz 2, 116(30)  ; |              use gpr2 as helper ...
+      80:       38 02 00 04     addi 0, 2, 4    ; |              ... to increment read ptr ...
+      84:       90 1e 00 74     stw 0, 116(30)  ; | in arg 1     ... and restore
+      88:       80 09 00 00     lwz 0, 0(9)     ; |              load in arg 1, and ...
+      8c:       90 1e 00 50     stw 0, 80(30)   ; /              ... store in temp space in local area
+      90:       81 3e 00 74     lwz 9, 116(30)  ; \
+      94:       80 5e 00 74     lwz 2, 116(30)  ; |
+      98:       38 02 00 04     addi 0, 2, 4    ; |
+      9c:       90 1e 00 74     stw 0, 116(30)  ; | in arg 2
+      a0:       80 09 00 00     lwz 0, 0(9)     ; |
+      a4:       90 1e 00 54     stw 0, 84(30)   ; /
+      a8:       81 3e 00 74     lwz 9, 116(30)  ; \
+      ac:       80 5e 00 74     lwz 2, 116(30)  ; |
+      b0:       38 02 00 04     addi 0, 2, 4    ; |
+      b4:       90 1e 00 74     stw 0, 116(30)  ; | in arg 3
+      b8:       80 09 00 00     lwz 0, 0(9)     ; |
+      bc:       90 1e 00 58     stw 0, 88(30)   ; /
+      c0:       81 3e 00 74     lwz 9, 116(30)  ; \
+      c4:       80 5e 00 74     lwz 2, 116(30)  ; |
+      c8:       38 02 00 04     addi 0, 2, 4    ; |
+      cc:       90 1e 00 74     stw 0, 116(30)  ; | in arg 4
+      d0:       80 09 00 00     lwz 0, 0(9)     ; |
+      d4:       90 1e 00 5c     stw 0, 92(30)   ; /
+      d8:       81 3e 00 74     lwz 9, 116(30)  ; \
+      dc:       80 5e 00 74     lwz 2, 116(30)  ; |
+      e0:       38 02 00 08     addi 0, 2, 8    ; |
+      e4:       90 1e 00 74     stw 0, 116(30)  ; | in arg 5 (float, promoted to double)
+      e8:       c8 09 00 00     lfd 0, 0(9)     ; |
+      ec:       fc 00 00 18     frsp 0, 0       ; |
+      f0:       d0 1e 00 68     stfs 0, 104(30) ; /
+      f4:       81 3e 00 74     lwz 9, 116(30)  ; \
+      f8:       80 5e 00 74     lwz 2, 116(30)  ; |
+      fc:       38 02 00 08     addi 0, 2, 8    ; |
+     100:       90 1e 00 74     stw 0, 116(30)  ; | in arg 6 (float, promoted to double)
+     104:       c8 09 00 00     lfd 0, 0(9)     ; |
+     108:       fc 00 00 18     frsp 0, 0       ; |
+     10c:       d0 1e 00 6c     stfs 0, 108(30) ; /
+     110:       81 3e 00 74     lwz 9, 116(30)  ; \
+     114:       80 5e 00 74     lwz 2, 116(30)  ; |
+     118:       38 02 00 04     addi 0, 2, 4    ; |
+     11c:       90 1e 00 74     stw 0, 116(30)  ; | in arg 7
+     120:       80 09 00 00     lwz 0, 0(9)     ; |
+     124:       90 1e 00 60     stw 0, 96(30)   ; /
+     128:       81 3e 00 74     lwz 9, 116(30)  ; \
+     12c:       80 5e 00 74     lwz 2, 116(30)  ; |
+     130:       38 02 00 04     addi 0, 2, 4    ; |
+     134:       90 1e 00 74     stw 0, 116(30)  ; | in arg 8
+     138:       80 09 00 00     lwz 0, 0(9)     ; |
+     13c:       90 1e 00 64     stw 0, 100(30)  ; /
+     140:       81 3e 00 74     lwz 9, 116(30)  ; \
+     144:       80 5e 00 74     lwz 2, 116(30)  ; |
+     148:       38 02 00 08     addi 0, 2, 8    ; |
+     14c:       90 1e 00 74     stw 0, 116(30)  ; | in arg 9 (float, promoted to double)
+     150:       c8 09 00 00     lfd 0, 0(9)     ; |
+     154:       fc 00 00 18     frsp 0, 0       ; |
+     158:       d0 1e 00 70     stfs 0, 112(30) ; /
+     15c:       80 01 00 00     lwz 0, 0(1)     ; fetch sp saved on stack of top by prolog -> g
+     160:       94 01 ff 10     stwu 0, -240(1) ; ... update it further up the stack for alloca
+     164:       38 41 00 50     addi 2, 1, 80   ; |
+     168:       38 02 00 0f     addi 0, 2, 15   ; | start of alloca()'d memory -> gpr2, by ...
+     16c:       54 00 e1 3e     srwi 0, 0, 4    ; | ... using gpr0 as helper to align to 16b, l
+     170:       54 02 20 36     slwi 2, 0, 4    ; |
+     174:       38 00 00 4c     li 0, 76        ; 'L' -> gpr0, and ...
+     178:       98 02 00 00     stb 0, 0(2)     ; ... store in local area (of alloca()'d space)
+     17c:       80 7e 00 50     lwz 3, 80(30)   ; arg 0
+     180:       80 9e 00 54     lwz 4, 84(30)   ; arg 1
+     184:       80 be 00 58     lwz 5, 88(30)   ; arg 2
+     188:       80 de 00 5c     lwz 6, 92(30)   ; arg 3
+     18c:       c0 3e 00 68     lfs 1, 104(30)  ; arg 4 (float)
+     190:       c0 5e 00 6c     lfs 2, 108(30)  ; arg 5 (float)
+     194:       81 3e 00 60     lwz 9, 96(30)   ; arg 6
+     198:       81 5e 00 64     lwz 10, 100(30) ; arg 7
+     19c:       c0 7e 00 70     lfs 3, 112(30)  ; arg 8 (float)
+     1a0:       4b ff fe 61     bl .+67108448   ; call and put return address -> lr
+     1a4:       80 21 00 00     lwz 1, 0(1)     ; |
+     1a8:       80 01 00 08     lwz 0, 8(1)     ; |
+     1ac:       7c 08 03 a6     mtlr 0          ; | epilog
+     1b0:       bb c1 ff f8     lmw 30, -8(1)   ; |
+     1b4:       4e 80 00 20     blr             ; |
+
+_main:
+     1b8:       7c 08 02 a6     mflr 0          ; |
+     1bc:       bf c1 ff f8     stmw 30, -8(1)  ; |
+     1c0:       90 01 00 08     stw 0, 8(1)     ; | prolog
+     1c4:       94 21 ff 90     stwu 1, -112(1) ; |
+     1c8:       7c 3e 0b 78     mr 30, 1        ; |
+     1cc:       42 9f 00 05     bcl 20, 31, .+4 ; ppc way to get PC in ...
+     1d0:       7f e8 02 a6     mflr 31         ; ... gpr31
+     1d4:       38 00 00 07     li 0, 7         ; arg 7, ...
+     1d8:       90 01 00 3c     stw 0, 60(1)    ; ... "pushed" onto stack
+     1dc:       38 00 00 08     li 0, 8         ; arg 8, ...
+     1e0:       90 01 00 40     stw 0, 64(1)    ; ... "pushed" onto stack
+     1e4:       3c 40 40 22     lis 2, 16418    ; | arg 9, top-half (double b/c of vararg), and ...
+     1e8:       38 60 00 00     li 3, 0         ; | ... bottom-half ...
+     1ec:       90 41 00 44     stw 2, 68(1)    ; | ... "pushed" into stack
+     1f0:       90 61 00 48     stw 3, 72(1)    ; |         "
+     1f4:       38 00 00 00     li 0, 0         ; arg 6, bottom-half, ...
+     1f8:       90 01 00 38     stw 0, 56(1)    ; ... "pushed" onto stack (first word in param area, top-half passed via gpr10, see below)
+     1fc:       38 60 00 00     li 3, 0         ; arg 0
+     200:       38 80 00 01     li 4, 1         ; arg 1
+     204:       38 a0 00 02     li 5, 2         ; arg 2
+     208:       38 c0 00 03     li 6, 3         ; arg 3
+     20c:       38 e0 00 04     li 7, 4         ; arg 4
+     210:       3d 20 40 14     lis 9, 16404    ; | prep arg 5 (double b/c of vararg) for move to arg reg later: top-half -> gpr9
+     214:       39 40 00 00     li 10, 0        ; |                                                           bottom-half -> gpr10
+     218:       3c 5f 00 00     addis 2, 31, 0  ; PC -> gpr2, to ...
+     21c:       c8 02 00 98     lfd 0, 152(2)   ; ... load some static data (arg 5, the float) stored right after this function -> gpr0
+     220:       7d 28 4b 78     mr 8, 9         ; arg 5, top-half
+     224:       7d 49 53 78     mr 9, 10        ; arg 5, bottom-half
+     228:       fc 20 00 90     fmr 1, 0        ; arg 5 in 1st fp reg
+     22c:       3d 60 40 18     lis 11, 16408   ; | prep arg 6 (double b/c of vararg) for move to arg reg later: top-half -> gpr11
+     230:       39 80 00 00     li 12, 0        ; |                                                           bottom-half -> gpr12 (this one is pointless, unused, bottom-half already placed on stack)
+     234:       3c 5f 00 00     addis 2, 31, 0  ; PC -> gpr2, to ...
+     238:       c8 02 00 a0     lfd 0, 160(2)   ; ... load some static data (arg 6, the 2nd float) stored right after this function -> gpr0
+     23c:       7d 6a 5b 78     mr 10, 11       ; arg 6, top-half
+     240:       fc 40 00 90     fmr 2, 0        ; arg 5 in 2nd fp reg
+     244:       c8 01 00 44     lfd 0, 68(1)    ; arg 9, ...
+     248:       fc 60 00 90     fmr 3, 0        ; ... -> 3rd fp reg
+     24c:       4b ff fd f1     bl .+67108336   ; call and put return address -> lr
+     250:       7c 03 03 78     mr 3, 0         ; return value @@@unsure why gpr0 is guaranteed to be 0 here
+     254:       80 21 00 00     lwz 1, 0(1)     ; |
+     258:       80 01 00 08     lwz 0, 8(1)     ; |
+     25c:       7c 08 03 a6     mtlr 0          ; | epilog
+     260:       bb c1 ff f8     lmw 30, -8(1)   ; |
+     264:       4e 80 00 20     blr             ; |
+
+; vim: ft=asm
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/doc/disas_examples/ppc.sysv.disas	Fri Nov 22 23:08:59 2019 +0100
@@ -0,0 +1,614 @@
+; #include <stdlib.h>
+; 
+; void leaf_call(int b, int c, int d, int e, int f, int g, int h)
+; {
+; }
+; 
+; void nonleaf_call(int a, int b, int c, int d, int e, int f, int g, int h)
+; {
+; 	/* use some local data */
+; 	*(char*)alloca(10) = 'L';
+; 	leaf_call(b, c, d, e, f, g, h);
+; }
+; 
+; int main()
+; {
+; 	nonleaf_call(0, 1, 2, 3, 4, 5, 6, 7);
+; 	return 0;
+; }
+
+
+
+; output from debian-4.1.1-21-ppc w/ gcc 4.1.2
+
+00000000 <leaf_call>:
+   0:   94 21 ff d0     stwu    r1,-48(r1)
+   4:   93 e1 00 2c     stw     r31,44(r1)
+   8:   7c 3f 0b 78     mr      r31,r1
+   c:   90 7f 00 08     stw     r3,8(r31)
+  10:   90 9f 00 0c     stw     r4,12(r31)
+  14:   90 bf 00 10     stw     r5,16(r31)
+  18:   90 df 00 14     stw     r6,20(r31)
+  1c:   90 ff 00 18     stw     r7,24(r31)
+  20:   91 1f 00 1c     stw     r8,28(r31)
+  24:   91 3f 00 20     stw     r9,32(r31)
+  28:   81 61 00 00     lwz     r11,0(r1)
+  2c:   83 eb ff fc     lwz     r31,-4(r11)
+  30:   7d 61 5b 78     mr      r1,r11
+  34:   4e 80 00 20     blr
+
+00000038 <nonleaf_call>:
+  38:   94 21 ff c0     stwu    r1,-64(r1)             ; |           open frame and store sp at top of stack
+  3c:   7c 08 02 a6     mflr    r0                     ; |           lr -> gpr0
+  40:   93 e1 00 3c     stw     r31,60(r1)             ; | prolog    store gpr31
+  44:   90 01 00 44     stw     r0,68(r1)              ; |           store lr
+  48:   7c 3f 0b 78     mr      r31,r1                 ; /           sp -> gpr31, latter used for some fixed addressing below
+  4c:   90 7f 00 08     stw     r3,8(r31)              ; \
+  50:   90 9f 00 0c     stw     r4,12(r31)             ; |
+  54:   90 bf 00 10     stw     r5,16(r31)             ; |
+  58:   90 df 00 14     stw     r6,20(r31)             ; |
+  5c:   90 ff 00 18     stw     r7,24(r31)             ; | all in args -> temp space in local area
+  60:   91 1f 00 1c     stw     r8,28(r31)             ; |
+  64:   91 3f 00 20     stw     r9,32(r31)             ; |
+  68:   91 5f 00 24     stw     r10,36(r31)            ; |
+  6c:   80 01 00 00     lwz     r0,0(r1)               ; fetch sp saved on stack of top by prolog -> gpr0, and ...
+  70:   94 01 ff 10     stwu    r0,-240(r1)            ; ... update it further up the stack for alloca(220) - with padding to guarantee alignment
+  74:   39 21 00 08     addi    r9,r1,8                ; |
+  78:   91 3f 00 28     stw     r9,40(r31)             ; |
+  7c:   81 3f 00 28     lwz     r9,40(r31)             ; |
+  80:   38 09 00 0f     addi    r0,r9,15               ; | start of alloca()'d memory -> gpr9, by ...
+  84:   54 00 e1 3e     rlwinm  r0,r0,28,4,31          ; | ... using gpr0 as helper to align to 16b, leaving at least 8b at top of stock
+  88:   54 00 20 36     rlwinm  r0,r0,4,0,27           ; |
+  8c:   90 1f 00 28     stw     r0,40(r31)             ; |
+  90:   81 3f 00 28     lwz     r9,40(r31)             ; |
+  94:   38 00 00 4c     li      r0,76                  ; 'L' -> gpr0, and ...
+  98:   98 09 00 00     stb     r0,0(r9)               ; ... store in local area (of alloca()'d space)
+  9c:   80 7f 00 0c     lwz     r3,12(r31)             ; arg 0
+  a0:   80 9f 00 10     lwz     r4,16(r31)             ; arg 1
+  a4:   80 bf 00 14     lwz     r5,20(r31)             ; arg 2
+  a8:   80 df 00 18     lwz     r6,24(r31)             ; arg 3
+  ac:   80 ff 00 1c     lwz     r7,28(r31)             ; arg 4
+  b0:   81 1f 00 20     lwz     r8,32(r31)             ; arg 5
+  b4:   81 3f 00 24     lwz     r9,36(r31)             ; arg 6
+  b8:   48 00 00 01     bl      b8 <nonleaf_call+0x80> ; call and put return address -> lr
+  bc:   81 61 00 00     lwz     r11,0(r1)              ; |
+  c0:   80 0b 00 04     lwz     r0,4(r11)              ; |
+  c4:   7c 08 03 a6     mtlr    r0                     ; |
+  c8:   83 eb ff fc     lwz     r31,-4(r11)            ; | epilog
+  cc:   7d 61 5b 78     mr      r1,r11                 ; |
+  d0:   4e 80 00 20     blr                            ; |
+
+000000d4 <main>:
+  d4:   94 21 ff f0     stwu    r1,-16(r1)             ; |
+  d8:   7c 08 02 a6     mflr    r0                     ; |
+  dc:   93 e1 00 0c     stw     r31,12(r1)             ; | prolog
+  e0:   90 01 00 14     stw     r0,20(r1)              ; |
+  e4:   7c 3f 0b 78     mr      r31,r1                 ; |
+  e8:   38 60 00 00     li      r3,0                   ; arg 0
+  ec:   38 80 00 01     li      r4,1                   ; arg 1
+  f0:   38 a0 00 02     li      r5,2                   ; arg 2
+  f4:   38 c0 00 03     li      r6,3                   ; arg 3
+  f8:   38 e0 00 04     li      r7,4                   ; arg 4
+  fc:   39 00 00 05     li      r8,5                   ; arg 5
+ 100:   39 20 00 06     li      r9,6                   ; arg 6
+ 104:   39 40 00 07     li      r10,7                  ; arg 7
+ 108:   48 00 00 01     bl      108 <main+0x34>        ; call and put return address -> lr
+ 10c:   38 00 00 00     li      r0,0                   ; return value (pointlessly) via gpr0 ...
+ 110:   7c 03 03 78     mr      r3,r0                  ; ... to gpr3
+ 114:   81 61 00 00     lwz     r11,0(r1)              ; |
+ 118:   80 0b 00 04     lwz     r0,4(r11)              ; |
+ 11c:   7c 08 03 a6     mtlr    r0                     ; |
+ 120:   83 eb ff fc     lwz     r31,-4(r11)            ; | epilog
+ 124:   7d 61 5b 78     mr      r1,r11                 ; |
+ 128:   4e 80 00 20     blr                            ; |
+
+
+
+; output from netbsd-4.0.1-macppc w/ gcc 4.1.2
+
+00000000 <leaf_call>:
+   0:   94 21 ff c0     stwu    r1,-64(r1)
+   4:   93 e1 00 3c     stw     r31,60(r1)
+   8:   7c 3f 0b 78     mr      r31,r1
+   c:   90 7f 00 08     stw     r3,8(r31)
+  10:   90 9f 00 0c     stw     r4,12(r31)
+  14:   90 bf 00 10     stw     r5,16(r31)
+  18:   90 df 00 14     stw     r6,20(r31)
+  1c:   90 ff 00 18     stw     r7,24(r31)
+  20:   91 1f 00 1c     stw     r8,28(r31)
+  24:   91 3f 00 20     stw     r9,32(r31)
+  28:   81 61 00 00     lwz     r11,0(r1)
+  2c:   83 eb ff fc     lwz     r31,-4(r11)
+  30:   7d 61 5b 78     mr      r1,r11
+  34:   4e 80 00 20     blr
+
+00000038 <nonleaf_call>:
+  38:   94 21 ff c0     stwu    r1,-64(r1)
+  3c:   7c 08 02 a6     mflr    r0
+  40:   93 e1 00 3c     stw     r31,60(r1)
+  44:   90 01 00 44     stw     r0,68(r1)
+  48:   7c 3f 0b 78     mr      r31,r1
+  4c:   90 7f 00 08     stw     r3,8(r31)
+  50:   90 9f 00 0c     stw     r4,12(r31)
+  54:   90 bf 00 10     stw     r5,16(r31)
+  58:   90 df 00 14     stw     r6,20(r31)
+  5c:   90 ff 00 18     stw     r7,24(r31)
+  60:   91 1f 00 1c     stw     r8,28(r31)
+  64:   91 3f 00 20     stw     r9,32(r31)
+  68:   91 5f 00 24     stw     r10,36(r31)
+  6c:   80 01 00 00     lwz     r0,0(r1)
+  70:   94 01 ff 10     stwu    r0,-240(r1)
+  74:   39 21 00 08     addi    r9,r1,8
+  78:   91 3f 00 28     stw     r9,40(r31)
+  7c:   81 3f 00 28     lwz     r9,40(r31)
+  80:   38 09 00 0f     addi    r0,r9,15
+  84:   54 00 e1 3e     rlwinm  r0,r0,28,4,31
+  88:   54 00 20 36     rlwinm  r0,r0,4,0,27
+  8c:   90 1f 00 28     stw     r0,40(r31)
+  90:   81 3f 00 28     lwz     r9,40(r31)
+  94:   38 00 00 4c     li      r0,76
+  98:   98 09 00 00     stb     r0,0(r9)
+  9c:   80 7f 00 0c     lwz     r3,12(r31)
+  a0:   80 9f 00 10     lwz     r4,16(r31)
+  a4:   80 bf 00 14     lwz     r5,20(r31)
+  a8:   80 df 00 18     lwz     r6,24(r31)
+  ac:   80 ff 00 1c     lwz     r7,28(r31)
+  b0:   81 1f 00 20     lwz     r8,32(r31)
+  b4:   81 3f 00 24     lwz     r9,36(r31)
+  b8:   48 00 00 01     bl      b8 <nonleaf_call+0x80>
+  bc:   81 61 00 00     lwz     r11,0(r1)
+  c0:   80 0b 00 04     lwz     r0,4(r11)
+  c4:   7c 08 03 a6     mtlr    r0
+  c8:   83 eb ff fc     lwz     r31,-4(r11)
+  cc:   7d 61 5b 78     mr      r1,r11
+  d0:   4e 80 00 20     blr
+
+000000d4 <main>:
+  d4:   94 21 ff e0     stwu    r1,-32(r1)
+  d8:   7c 08 02 a6     mflr    r0
+  dc:   93 e1 00 1c     stw     r31,28(r1)
+  e0:   90 01 00 24     stw     r0,36(r1)
+  e4:   7c 3f 0b 78     mr      r31,r1
+  e8:   38 60 00 00     li      r3,0
+  ec:   38 80 00 01     li      r4,1
+  f0:   38 a0 00 02     li      r5,2
+  f4:   38 c0 00 03     li      r6,3
+  f8:   38 e0 00 04     li      r7,4
+  fc:   39 00 00 05     li      r8,5
+ 100:   39 20 00 06     li      r9,6
+ 104:   39 40 00 07     li      r10,7
+ 108:   48 00 00 01     bl      108 <main+0x34>
+ 10c:   38 00 00 00     li      r0,0
+ 110:   7c 03 03 78     mr      r3,r0
+ 114:   81 61 00 00     lwz     r11,0(r1)
+ 118:   80 0b 00 04     lwz     r0,4(r11)
+ 11c:   7c 08 03 a6     mtlr    r0
+ 120:   83 eb ff fc     lwz     r31,-4(r11)
+ 124:   7d 61 5b 78     mr      r1,r11
+ 128:   4e 80 00 20     blr
+
+
+
+; ------------- check register skipping for 64bit args ----------->
+
+; void call(int a, long long b, int c, int d, long long e, long long f)
+; {
+; }
+; 
+; int main()
+; {
+; 	call(0,1,2,3,4,5);
+; 	return 1;
+; }
+
+
+
+; output from debian-4.1.1-21-ppc w/ gcc 4.1.2
+
+00000000 <call>:
+   0:   94 21 ff d0     stwu    r1,-48(r1)
+   4:   93 e1 00 2c     stw     r31,44(r1)
+   8:   7c 3f 0b 78     mr      r31,r1
+   c:   90 7f 00 08     stw     r3,8(r31)
+  10:   90 bf 00 10     stw     r5,16(r31)
+  14:   90 df 00 14     stw     r6,20(r31)
+  18:   90 ff 00 18     stw     r7,24(r31)
+  1c:   91 1f 00 1c     stw     r8,28(r31)
+  20:   91 3f 00 20     stw     r9,32(r31)
+  24:   91 5f 00 24     stw     r10,36(r31)
+  28:   81 61 00 00     lwz     r11,0(r1)
+  2c:   83 eb ff fc     lwz     r31,-4(r11)
+  30:   7d 61 5b 78     mr      r1,r11
+  34:   4e 80 00 20     blr
+
+00000038 <main>:
+  38:   94 21 ff e0     stwu    r1,-32(r1)     ; |
+  3c:   7c 08 02 a6     mflr    r0             ; |
+  40:   93 e1 00 1c     stw     r31,28(r1)     ; | prolog
+  44:   90 01 00 24     stw     r0,36(r1)      ; |
+  48:   7c 3f 0b 78     mr      r31,r1         ; /
+  4c:   39 20 00 00     li      r9,0           ; \
+  50:   39 40 00 05     li      r10,5          ; |
+  54:   91 21 00 08     stw     r9,8(r1)       ; | arg 5 via stack
+  58:   91 41 00 0c     stw     r10,12(r1)     ; |
+  5c:   38 60 00 00     li      r3,0           ; arg 0
+  60:   38 a0 00 00     li      r5,0           ; |
+  64:   38 c0 00 01     li      r6,1           ; | arg 1 (note that r4 is skipped)
+  68:   38 e0 00 02     li      r7,2           ; arg 2
+  6c:   39 00 00 03     li      r8,3           ; arg 3
+  70:   39 20 00 00     li      r9,0           ; |
+  74:   39 40 00 04     li      r10,4          ; | arg 4
+  78:   48 00 00 01     bl      78 <main+0x40> ; call and put return address -> lr
+  7c:   38 00 00 01     li      r0,1           ; return value (pointlessly) via gpr0 ...
+  80:   7c 03 03 78     mr      r3,r0          ; ... to gpr3
+  84:   81 61 00 00     lwz     r11,0(r1)      ; |
+  88:   80 0b 00 04     lwz     r0,4(r11)      ; |
+  8c:   7c 08 03 a6     mtlr    r0             ; |
+  90:   83 eb ff fc     lwz     r31,-4(r11)    ; | epilog
+  94:   7d 61 5b 78     mr      r1,r11         ; |
+  98:   4e 80 00 20     blr                    ; |
+
+
+
+; ------------- var args with ints and floats to see spilling (which remains only int regs), b/c doubles are passed via them and floats are promoted to doubles in (...) ----------->
+
+; #include <stdlib.h>
+; #include <stdarg.h>
+; 
+; void leaf_call(int b, int c, int d, int e, float f, float g, int h, int i, float j)
+; {
+; }
+; 
+; void nonleaf_call(int a, ...)
+; {
+;     int b, c, d, e, h, i;
+;     float f, g, j;
+;     va_list ap;
+;     va_start(ap, a);
+;     b = va_arg(ap, int);
+;     c = va_arg(ap, int);
+;     d = va_arg(ap, int);
+;     e = va_arg(ap, int);
+;     f = (float)va_arg(ap, double);
+;     g = (float)va_arg(ap, double);
+;     h = va_arg(ap, int);
+;     i = va_arg(ap, int);
+;     j = (float)va_arg(ap, double);
+;     /* use some local data */
+;     *(char*)alloca(220) = 'L';
+;     leaf_call(b, c, d, e, f, g, h, i, j);
+; }
+; 
+; int main()
+; {
+;     nonleaf_call(0, 1, 2, 3, 4, 5.f, 6.f, 7, 8, 9.f);
+;     return 0;
+; }
+
+
+
+; output from debian-4.1.1-21-ppc w/ gcc 4.1.2
+
+00000000 <leaf_call>:
+   0:   94 21 ff c0     stwu    r1,-64(r1)
+   4:   93 e1 00 3c     stw     r31,60(r1)
+   8:   7c 3f 0b 78     mr      r31,r1
+   c:   90 7f 00 08     stw     r3,8(r31)
+  10:   90 9f 00 0c     stw     r4,12(r31)
+  14:   90 bf 00 10     stw     r5,16(r31)
+  18:   90 df 00 14     stw     r6,20(r31)
+  1c:   d0 3f 00 18     stfs    f1,24(r31)
+  20:   d0 5f 00 1c     stfs    f2,28(r31)
+  24:   90 ff 00 20     stw     r7,32(r31)
+  28:   91 1f 00 24     stw     r8,36(r31)
+  2c:   d0 7f 00 28     stfs    f3,40(r31)
+  30:   81 61 00 00     lwz     r11,0(r1)
+  34:   83 eb ff fc     lwz     r31,-4(r11)
+  38:   7d 61 5b 78     mr      r1,r11
+  3c:   4e 80 00 20     blr
+
+00000040 <nonleaf_call>:
+  40:   94 21 ff 30     stwu    r1,-208(r1)
+  44:   7c 08 02 a6     mflr    r0
+  48:   93 e1 00 cc     stw     r31,204(r1)
+  4c:   90 01 00 d4     stw     r0,212(r1)
+  50:   7c 3f 0b 78     mr      r31,r1
+  54:   90 9f 00 3c     stw     r4,60(r31)
+  58:   90 bf 00 40     stw     r5,64(r31)
+  5c:   90 df 00 44     stw     r6,68(r31)
+  60:   90 ff 00 48     stw     r7,72(r31)
+  64:   91 1f 00 4c     stw     r8,76(r31)
+  68:   91 3f 00 50     stw     r9,80(r31)
+  6c:   91 5f 00 54     stw     r10,84(r31)
+  70:   40 86 00 24     bne-    cr1,94 <nonleaf_call+0x54>
+  74:   d8 3f 00 58     stfd    f1,88(r31)
+  78:   d8 5f 00 60     stfd    f2,96(r31)
+  7c:   d8 7f 00 68     stfd    f3,104(r31)
+  80:   d8 9f 00 70     stfd    f4,112(r31)
+  84:   d8 bf 00 78     stfd    f5,120(r31)
+  88:   d8 df 00 80     stfd    f6,128(r31)
+  8c:   d8 ff 00 88     stfd    f7,136(r31)
+  90:   d9 1f 00 90     stfd    f8,144(r31)
+  94:   90 7f 00 98     stw     r3,152(r31)
+  98:   38 00 00 01     li      r0,1
+  9c:   98 1f 00 2c     stb     r0,44(r31)
+  a0:   38 00 00 00     li      r0,0
+  a4:   98 1f 00 2d     stb     r0,45(r31)
+  a8:   38 1f 00 d8     addi    r0,r31,216  ; make r0 point to start of prev frame's param area (would make sense as no spill area to define param area start for iteration)
+  ac:   90 1f 00 30     stw     r0,48(r31)
+  b0:   38 1f 00 38     addi    r0,r31,56   ; make r0 point to end of prev frame's param area (would make sense as no spill area to define param area end for iteration)
+  b4:   90 1f 00 34     stw     r0,52(r31)
+  b8:   88 1f 00 2c     lbz     r0,44(r31)
+  bc:   54 00 06 3e     clrlwi  r0,r0,24
+  c0:   2b 80 00 08     cmplwi  cr7,r0,8
+  c4:   40 9c 00 30     bge-    cr7,f4 <nonleaf_call+0xb4>
+  c8:   81 7f 00 34     lwz     r11,52(r31)
+  cc:   88 1f 00 2c     lbz     r0,44(r31)
+  d0:   54 09 06 3e     clrlwi  r9,r0,24
+  d4:   7d 20 4b 78     mr      r0,r9
+  d8:   54 00 10 3a     rlwinm  r0,r0,2,0,29
+  dc:   7d 6b 02 14     add     r11,r11,r0
+  e0:   91 7f 00 c0     stw     r11,192(r31)
+  e4:   38 09 00 01     addi    r0,r9,1
+  e8:   54 00 06 3e     clrlwi  r0,r0,24
+  ec:   98 1f 00 2c     stb     r0,44(r31)
+  f0:   48 00 00 14     b       104 <nonleaf_call+0xc4>
+  f4:   81 3f 00 30     lwz     r9,48(r31)
+  f8:   91 3f 00 c0     stw     r9,192(r31)
+  fc:   38 09 00 04     addi    r0,r9,4
+ 100:   90 1f 00 30     stw     r0,48(r31)
+ 104:   81 3f 00 c0     lwz     r9,192(r31)
+ 108:   80 09 00 00     lwz     r0,0(r9)
+ 10c:   90 1f 00 28     stw     r0,40(r31)
+ 110:   88 1f 00 2c     lbz     r0,44(r31)
+ 114:   54 00 06 3e     clrlwi  r0,r0,24
+ 118:   2b 80 00 08     cmplwi  cr7,r0,8
+ 11c:   40 9c 00 30     bge-    cr7,14c <nonleaf_call+0x10c>
+ 120:   81 7f 00 34     lwz     r11,52(r31)
+ 124:   88 1f 00 2c     lbz     r0,44(r31)
+ 128:   54 09 06 3e     clrlwi  r9,r0,24
+ 12c:   7d 20 4b 78     mr      r0,r9
+ 130:   54 00 10 3a     rlwinm  r0,r0,2,0,29
+ 134:   7d 6b 02 14     add     r11,r11,r0
+ 138:   91 7f 00 bc     stw     r11,188(r31)
+ 13c:   38 09 00 01     addi    r0,r9,1
+ 140:   54 00 06 3e     clrlwi  r0,r0,24
+ 144:   98 1f 00 2c     stb     r0,44(r31)
+ 148:   48 00 00 14     b       15c <nonleaf_call+0x11c>
+ 14c:   81 3f 00 30     lwz     r9,48(r31)
+ 150:   91 3f 00 bc     stw     r9,188(r31)
+ 154:   38 09 00 04     addi    r0,r9,4
+ 158:   90 1f 00 30     stw     r0,48(r31)
+ 15c:   81 3f 00 bc     lwz     r9,188(r31)
+ 160:   80 09 00 00     lwz     r0,0(r9)
+ 164:   90 1f 00 24     stw     r0,36(r31)
+ 168:   88 1f 00 2c     lbz     r0,44(r31)
+ 16c:   54 00 06 3e     clrlwi  r0,r0,24
+ 170:   2b 80 00 08     cmplwi  cr7,r0,8
+ 174:   40 9c 00 30     bge-    cr7,1a4 <nonleaf_call+0x164>
+ 178:   81 7f 00 34     lwz     r11,52(r31)
+ 17c:   88 1f 00 2c     lbz     r0,44(r31)
+ 180:   54 09 06 3e     clrlwi  r9,r0,24
+ 184:   7d 20 4b 78     mr      r0,r9
+ 188:   54 00 10 3a     rlwinm  r0,r0,2,0,29
+ 18c:   7d 6b 02 14     add     r11,r11,r0
+ 190:   91 7f 00 b8     stw     r11,184(r31)
+ 194:   38 09 00 01     addi    r0,r9,1
+ 198:   54 00 06 3e     clrlwi  r0,r0,24
+ 19c:   98 1f 00 2c     stb     r0,44(r31)
+ 1a0:   48 00 00 14     b       1b4 <nonleaf_call+0x174>
+ 1a4:   81 3f 00 30     lwz     r9,48(r31)
+ 1a8:   91 3f 00 b8     stw     r9,184(r31)
+ 1ac:   38 09 00 04     addi    r0,r9,4
+ 1b0:   90 1f 00 30     stw     r0,48(r31)
+ 1b4:   81 3f 00 b8     lwz     r9,184(r31)
+ 1b8:   80 09 00 00     lwz     r0,0(r9)
+ 1bc:   90 1f 00 20     stw     r0,32(r31)
+ 1c0:   88 1f 00 2c     lbz     r0,44(r31)
+ 1c4:   54 00 06 3e     clrlwi  r0,r0,24
+ 1c8:   2b 80 00 08     cmplwi  cr7,r0,8
+ 1cc:   40 9c 00 30     bge-    cr7,1fc <nonleaf_call+0x1bc>
+ 1d0:   81 7f 00 34     lwz     r11,52(r31)
+ 1d4:   88 1f 00 2c     lbz     r0,44(r31)
+ 1d8:   54 09 06 3e     clrlwi  r9,r0,24
+ 1dc:   7d 20 4b 78     mr      r0,r9
+ 1e0:   54 00 10 3a     rlwinm  r0,r0,2,0,29
+ 1e4:   7d 6b 02 14     add     r11,r11,r0
+ 1e8:   91 7f 00 b4     stw     r11,180(r31)
+ 1ec:   38 09 00 01     addi    r0,r9,1
+ 1f0:   54 00 06 3e     clrlwi  r0,r0,24
+ 1f4:   98 1f 00 2c     stb     r0,44(r31)
+ 1f8:   48 00 00 14     b       20c <nonleaf_call+0x1cc>
+ 1fc:   81 3f 00 30     lwz     r9,48(r31)
+ 200:   91 3f 00 b4     stw     r9,180(r31)
+ 204:   38 09 00 04     addi    r0,r9,4
+ 208:   90 1f 00 30     stw     r0,48(r31)
+ 20c:   81 3f 00 b4     lwz     r9,180(r31)
+ 210:   80 09 00 00     lwz     r0,0(r9)
+ 214:   90 1f 00 1c     stw     r0,28(r31)
+ 218:   88 1f 00 2d     lbz     r0,45(r31)
+ 21c:   54 00 06 3e     clrlwi  r0,r0,24
+ 220:   2b 80 00 08     cmplwi  cr7,r0,8
+ 224:   40 9c 00 34     bge-    cr7,258 <nonleaf_call+0x218>
+ 228:   81 3f 00 34     lwz     r9,52(r31)
+ 22c:   39 69 00 20     addi    r11,r9,32
+ 230:   88 1f 00 2d     lbz     r0,45(r31)
+ 234:   54 09 06 3e     clrlwi  r9,r0,24
+ 238:   7d 20 4b 78     mr      r0,r9
+ 23c:   54 00 18 38     rlwinm  r0,r0,3,0,28
+ 240:   7d 6b 02 14     add     r11,r11,r0
+ 244:   91 7f 00 b0     stw     r11,176(r31)
+ 248:   38 09 00 01     addi    r0,r9,1
+ 24c:   54 00 06 3e     clrlwi  r0,r0,24
+ 250:   98 1f 00 2d     stb     r0,45(r31)
+ 254:   48 00 00 1c     b       270 <nonleaf_call+0x230>
+ 258:   81 3f 00 30     lwz     r9,48(r31)
+ 25c:   38 09 00 07     addi    r0,r9,7
+ 260:   54 09 00 38     rlwinm  r9,r0,0,0,28
+ 264:   91 3f 00 b0     stw     r9,176(r31)
+ 268:   38 09 00 08     addi    r0,r9,8
+ 26c:   90 1f 00 30     stw     r0,48(r31)
+ 270:   81 3f 00 b0     lwz     r9,176(r31)
+ 274:   c8 09 00 00     lfd     f0,0(r9)
+ 278:   fc 00 00 18     frsp    f0,f0
+ 27c:   d0 1f 00 10     stfs    f0,16(r31)
+ 280:   88 1f 00 2d     lbz     r0,45(r31)
+ 284:   54 00 06 3e     clrlwi  r0,r0,24
+ 288:   2b 80 00 08     cmplwi  cr7,r0,8
+ 28c:   40 9c 00 34     bge-    cr7,2c0 <nonleaf_call+0x280>
+ 290:   81 3f 00 34     lwz     r9,52(r31)
+ 294:   39 69 00 20     addi    r11,r9,32
+ 298:   88 1f 00 2d     lbz     r0,45(r31)
+ 29c:   54 09 06 3e     clrlwi  r9,r0,24
+ 2a0:   7d 20 4b 78     mr      r0,r9
+ 2a4:   54 00 18 38     rlwinm  r0,r0,3,0,28
+ 2a8:   7d 6b 02 14     add     r11,r11,r0
+ 2ac:   91 7f 00 ac     stw     r11,172(r31)
+ 2b0:   38 09 00 01     addi    r0,r9,1
+ 2b4:   54 00 06 3e     clrlwi  r0,r0,24
+ 2b8:   98 1f 00 2d     stb     r0,45(r31)
+ 2bc:   48 00 00 1c     b       2d8 <nonleaf_call+0x298>
+ 2c0:   81 3f 00 30     lwz     r9,48(r31)
+ 2c4:   38 09 00 07     addi    r0,r9,7
+ 2c8:   54 09 00 38     rlwinm  r9,r0,0,0,28
+ 2cc:   91 3f 00 ac     stw     r9,172(r31)
+ 2d0:   38 09 00 08     addi    r0,r9,8
+ 2d4:   90 1f 00 30     stw     r0,48(r31)
+ 2d8:   81 3f 00 ac     lwz     r9,172(r31)
+ 2dc:   c8 09 00 00     lfd     f0,0(r9)
+ 2e0:   fc 00 00 18     frsp    f0,f0
+ 2e4:   d0 1f 00 0c     stfs    f0,12(r31)
+ 2e8:   88 1f 00 2c     lbz     r0,44(r31)
+ 2ec:   54 00 06 3e     clrlwi  r0,r0,24
+ 2f0:   2b 80 00 08     cmplwi  cr7,r0,8
+ 2f4:   40 9c 00 30     bge-    cr7,324 <nonleaf_call+0x2e4>
+ 2f8:   81 7f 00 34     lwz     r11,52(r31)
+ 2fc:   88 1f 00 2c     lbz     r0,44(r31)
+ 300:   54 09 06 3e     clrlwi  r9,r0,24
+ 304:   7d 20 4b 78     mr      r0,r9
+ 308:   54 00 10 3a     rlwinm  r0,r0,2,0,29
+ 30c:   7d 6b 02 14     add     r11,r11,r0
+ 310:   91 7f 00 a8     stw     r11,168(r31)
+ 314:   38 09 00 01     addi    r0,r9,1
+ 318:   54 00 06 3e     clrlwi  r0,r0,24
+ 31c:   98 1f 00 2c     stb     r0,44(r31)
+ 320:   48 00 00 14     b       334 <nonleaf_call+0x2f4>
+ 324:   81 3f 00 30     lwz     r9,48(r31)
+ 328:   91 3f 00 a8     stw     r9,168(r31)
+ 32c:   38 09 00 04     addi    r0,r9,4
+ 330:   90 1f 00 30     stw     r0,48(r31)
+ 334:   81 3f 00 a8     lwz     r9,168(r31)
+ 338:   80 09 00 00     lwz     r0,0(r9)
+ 33c:   90 1f 00 18     stw     r0,24(r31)
+ 340:   88 1f 00 2c     lbz     r0,44(r31)
+ 344:   54 00 06 3e     clrlwi  r0,r0,24
+ 348:   2b 80 00 08     cmplwi  cr7,r0,8
+ 34c:   40 9c 00 30     bge-    cr7,37c <nonleaf_call+0x33c>
+ 350:   81 7f 00 34     lwz     r11,52(r31)
+ 354:   88 1f 00 2c     lbz     r0,44(r31)
+ 358:   54 09 06 3e     clrlwi  r9,r0,24
+ 35c:   7d 20 4b 78     mr      r0,r9
+ 360:   54 00 10 3a     rlwinm  r0,r0,2,0,29
+ 364:   7d 6b 02 14     add     r11,r11,r0
+ 368:   91 7f 00 a4     stw     r11,164(r31)
+ 36c:   38 09 00 01     addi    r0,r9,1
+ 370:   54 00 06 3e     clrlwi  r0,r0,24
+ 374:   98 1f 00 2c     stb     r0,44(r31)
+ 378:   48 00 00 14     b       38c <nonleaf_call+0x34c>
+ 37c:   81 3f 00 30     lwz     r9,48(r31)
+ 380:   91 3f 00 a4     stw     r9,164(r31)
+ 384:   38 09 00 04     addi    r0,r9,4
+ 388:   90 1f 00 30     stw     r0,48(r31)
+ 38c:   81 3f 00 a4     lwz     r9,164(r31)
+ 390:   80 09 00 00     lwz     r0,0(r9)
+ 394:   90 1f 00 14     stw     r0,20(r31)
+ 398:   88 1f 00 2d     lbz     r0,45(r31)
+ 39c:   54 00 06 3e     clrlwi  r0,r0,24
+ 3a0:   2b 80 00 08     cmplwi  cr7,r0,8
+ 3a4:   40 9c 00 34     bge-    cr7,3d8 <nonleaf_call+0x398>
+ 3a8:   81 3f 00 34     lwz     r9,52(r31)
+ 3ac:   39 69 00 20     addi    r11,r9,32
+ 3b0:   88 1f 00 2d     lbz     r0,45(r31)
+ 3b4:   54 09 06 3e     clrlwi  r9,r0,24
+ 3b8:   7d 20 4b 78     mr      r0,r9
+ 3bc:   54 00 18 38     rlwinm  r0,r0,3,0,28
+ 3c0:   7d 6b 02 14     add     r11,r11,r0
+ 3c4:   91 7f 00 a0     stw     r11,160(r31)
+ 3c8:   38 09 00 01     addi    r0,r9,1
+ 3cc:   54 00 06 3e     clrlwi  r0,r0,24
+ 3d0:   98 1f 00 2d     stb     r0,45(r31)
+ 3d4:   48 00 00 1c     b       3f0 <nonleaf_call+0x3b0>
+ 3d8:   81 3f 00 30     lwz     r9,48(r31)
+ 3dc:   38 09 00 07     addi    r0,r9,7
+ 3e0:   54 09 00 38     rlwinm  r9,r0,0,0,28
+ 3e4:   91 3f 00 a0     stw     r9,160(r31)
+ 3e8:   38 09 00 08     addi    r0,r9,8
+ 3ec:   90 1f 00 30     stw     r0,48(r31)
+ 3f0:   81 3f 00 a0     lwz     r9,160(r31)
+ 3f4:   c8 09 00 00     lfd     f0,0(r9)
+ 3f8:   fc 00 00 18     frsp    f0,f0
+ 3fc:   d0 1f 00 08     stfs    f0,8(r31)
+ 400:   80 01 00 00     lwz     r0,0(r1)
+ 404:   94 01 ff 10     stwu    r0,-240(r1)
+ 408:   39 21 00 08     addi    r9,r1,8
+ 40c:   91 3f 00 9c     stw     r9,156(r31)
+ 410:   81 3f 00 9c     lwz     r9,156(r31)
+ 414:   38 09 00 0f     addi    r0,r9,15
+ 418:   54 00 e1 3e     rlwinm  r0,r0,28,4,31
+ 41c:   54 00 20 36     rlwinm  r0,r0,4,0,27
+ 420:   90 1f 00 9c     stw     r0,156(r31)
+ 424:   81 3f 00 9c     lwz     r9,156(r31)
+ 428:   38 00 00 4c     li      r0,76
+ 42c:   98 09 00 00     stb     r0,0(r9)
+ 430:   80 7f 00 28     lwz     r3,40(r31)
+ 434:   80 9f 00 24     lwz     r4,36(r31)
+ 438:   80 bf 00 20     lwz     r5,32(r31)
+ 43c:   80 df 00 1c     lwz     r6,28(r31)
+ 440:   c0 3f 00 10     lfs     f1,16(r31)
+ 444:   c0 5f 00 0c     lfs     f2,12(r31)
+ 448:   80 ff 00 18     lwz     r7,24(r31)
+ 44c:   81 1f 00 14     lwz     r8,20(r31)
+ 450:   c0 7f 00 08     lfs     f3,8(r31)
+ 454:   48 00 00 01     bl      454 <nonleaf_call+0x414>
+ 458:   81 61 00 00     lwz     r11,0(r1)
+ 45c:   80 0b 00 04     lwz     r0,4(r11)
+ 460:   7c 08 03 a6     mtlr    r0
+ 464:   83 eb ff fc     lwz     r31,-4(r11)
+ 468:   7d 61 5b 78     mr      r1,r11
+ 46c:   4e 80 00 20     blr
+
+00000470 <main>:
+ 470:   94 21 ff f0     stwu    r1,-16(r1)      ; |
+ 474:   7c 08 02 a6     mflr    r0              ; |
+ 478:   93 e1 00 0c     stw     r31,12(r1)      ; | prolog
+ 47c:   90 01 00 14     stw     r0,20(r1)       ; |
+ 480:   7c 3f 0b 78     mr      r31,r1          ; /
+ 484:   3d 20 00 00     lis     r9,0            ; \ prep arg 5 (float, but double in reg & ellipsis), load from 0 b/c objdump is from .o, not finally linked exec
+ 488:   c8 09 00 00     lfd     f0,0(r9)        ; /
+ 48c:   3d 20 00 00     lis     r9,0            ; \ prep arg 6 (float, but double in reg & ellipsis), load from 0 b/c objdump is from .o, not finally linked exec
+ 490:   c9 a9 00 08     lfd     f13,8(r9)       ; /
+ 494:   3d 20 00 00     lis     r9,0            ; \ prep arg 9 (float, but double in reg & ellipsis), load from 0 b/c objdump is from .o, not finally linked exec
+ 498:   c9 89 00 10     lfd     f12,16(r9)      ; /
+ 49c:   38 60 00 00     li      r3,0            ; arg 0
+ 4a0:   38 80 00 01     li      r4,1            ; arg 1
+ 4a4:   38 a0 00 02     li      r5,2            ; arg 2
+ 4a8:   38 c0 00 03     li      r6,3            ; arg 3
+ 4ac:   38 e0 00 04     li      r7,4            ; arg 4
+ 4b0:   fc 20 00 90     fmr     f1,f0           ; arg 5
+ 4b4:   fc 40 68 90     fmr     f2,f13          ; arg 6
+ 4b8:   39 00 00 07     li      r8,7            ; arg 7
+ 4bc:   39 20 00 08     li      r9,8            ; arg 8
+ 4c0:   fc 60 60 90     fmr     f3,f12          ; arg 9
+ 4c4:   4c c6 32 42     crset   4*cr1+eq        ; set CR bit for ellipse call
+ 4c8:   48 00 00 01     bl      4c8 <main+0x58> ; call and put return address -> lr
+ 4cc:   38 00 00 00     li      r0,0            ; return value (pointlessly) via gpr0 ...
+ 4d0:   7c 03 03 78     mr      r3,r0           ; ... to gpr3
+ 4d4:   81 61 00 00     lwz     r11,0(r1)       ; |
+ 4d8:   80 0b 00 04     lwz     r0,4(r11)       ; |
+ 4dc:   7c 08 03 a6     mtlr    r0              ; |
+ 4e0:   83 eb ff fc     lwz     r31,-4(r11)     ; | epilog
+ 4e4:   7d 61 5b 78     mr      r1,r11          ; |
+ 4e8:   4e 80 00 20     blr                     ; |
+
+; vim: ft=asm68k
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/doc/disas_examples/ppc64.elfabi.disas	Fri Nov 22 23:08:59 2019 +0100
@@ -0,0 +1,130 @@
+; #include <stdlib.h>
+; 
+; void leaf_call(int b, int c, int d, int e, int f, int g, int h)
+; {
+; }
+; 
+; void nonleaf_call(int a, int b, int c, int d, int e, int f, int g, int h)
+; {
+; 	/* use some local data */
+; 	*(char*)alloca(220) = 'L';
+; 	leaf_call(b, c, d, e, f, g, h);
+; }
+; 
+; int main()
+; {
+; 	nonleaf_call(0, 1, 2, 3, 4, 5, 6, 7);
+; 	return 0;
+; }
+
+
+
+; output from freebsd-11.0-ppc64 w/ gcc 4.2.1
+
+0000000000000000 <.leaf_call>:
+   0:	fb e1 ff f8 	std     r31,-8(r1)
+   4:	f8 21 ff c1 	stdu    r1,-64(r1)
+   8:	7c 3f 0b 78 	mr      r31,r1
+   c:	7c 60 1b 78 	mr      r0,r3
+  10:	7c 8b 23 78 	mr      r11,r4
+  14:	7c aa 2b 78 	mr      r10,r5
+  18:	90 1f 00 70 	stw     r0,112(r31)
+  1c:	91 7f 00 78 	stw     r11,120(r31)
+  20:	91 5f 00 80 	stw     r10,128(r31)
+  24:	90 df 00 88 	stw     r6,136(r31)
+  28:	90 ff 00 90 	stw     r7,144(r31)
+  2c:	91 1f 00 98 	stw     r8,152(r31)
+  30:	91 3f 00 a0 	stw     r9,160(r31)
+  34:	e8 21 00 00 	ld      r1,0(r1)
+  38:	eb e1 ff f8 	ld      r31,-8(r1)
+  3c:	4e 80 00 20 	blr
+	...
+  48:	80 01 00 01 	lwz     r0,1(r1)
+
+000000000000004c <.nonleaf_call>:
+  4c:	7c 08 02 a6 	mflr    r0
+  50:	fb e1 ff f8 	std     r31,-8(r1)
+  54:	f8 01 00 10 	std     r0,16(r1)
+  58:	f8 21 ff 71 	stdu    r1,-144(r1)
+  5c:	7c 3f 0b 78 	mr      r31,r1
+  60:	7c 60 1b 78 	mr      r0,r3
+  64:	7c 8b 23 78 	mr      r11,r4
+  68:	90 1f 00 c0 	stw     r0,192(r31)
+  6c:	91 7f 00 c8 	stw     r11,200(r31)
+  70:	90 bf 00 d0 	stw     r5,208(r31)
+  74:	90 df 00 d8 	stw     r6,216(r31)
+  78:	90 ff 00 e0 	stw     r7,224(r31)
+  7c:	91 1f 00 e8 	stw     r8,232(r31)
+  80:	91 3f 00 f0 	stw     r9,240(r31)
+  84:	91 5f 00 f8 	stw     r10,248(r31)
+  88:	e8 01 00 00 	ld      r0,0(r1)
+  8c:	f8 01 ff 11 	stdu    r0,-240(r1)
+  90:	39 21 00 70 	addi    r9,r1,112
+  94:	f9 3f 00 70 	std     r9,112(r31)
+  98:	e9 3f 00 70 	ld      r9,112(r31)
+  9c:	38 09 00 0f 	addi    r0,r9,15
+  a0:	78 00 e1 02 	rldicl  r0,r0,60,4
+  a4:	78 00 26 e4 	rldicr  r0,r0,4,59
+  a8:	f8 1f 00 70 	std     r0,112(r31)
+  ac:	e9 3f 00 70 	ld      r9,112(r31)
+  b0:	38 00 00 4c 	li      r0,76
+  b4:	98 09 00 00 	stb     r0,0(r9)
+  b8:	80 1f 00 c8 	lwz     r0,200(r31)
+  bc:	7c 08 07 b4 	extsw   r8,r0
+  c0:	80 1f 00 d0 	lwz     r0,208(r31)
+  c4:	7c 07 07 b4 	extsw   r7,r0
+  c8:	80 1f 00 d8 	lwz     r0,216(r31)
+  cc:	7c 06 07 b4 	extsw   r6,r0
+  d0:	80 1f 00 e0 	lwz     r0,224(r31)
+  d4:	7c 09 07 b4 	extsw   r9,r0
+  d8:	80 1f 00 e8 	lwz     r0,232(r31)
+  dc:	7c 0b 07 b4 	extsw   r11,r0
+  e0:	80 1f 00 f0 	lwz     r0,240(r31)
+  e4:	7c 0a 07 b4 	extsw   r10,r0
+  e8:	80 1f 00 f8 	lwz     r0,248(r31)
+  ec:	7c 00 07 b4 	extsw   r0,r0
+  f0:	7d 03 43 78 	mr      r3,r8
+  f4:	7c e4 3b 78 	mr      r4,r7
+  f8:	7c c5 33 78 	mr      r5,r6
+  fc:	7d 26 4b 78 	mr      r6,r9
+ 100:	7d 67 5b 78 	mr      r7,r11
+ 104:	7d 48 53 78 	mr      r8,r10
+ 108:	7c 09 03 78 	mr      r9,r0
+ 10c:	48 00 00 01 	bl      10c <.nonleaf_call+0xc0>
+ 110:	e8 21 00 00 	ld      r1,0(r1)
+ 114:	e8 01 00 10 	ld      r0,16(r1)
+ 118:	7c 08 03 a6 	mtlr    r0
+ 11c:	eb e1 ff f8 	ld      r31,-8(r1)
+ 120:	4e 80 00 20 	blr
+ 124:	00 00 00 00 	.long 0x0
+ 128:	00 00 00 01 	.long 0x1
+ 12c:	80 01 00 01 	lwz     r0,1(r1)
+
+0000000000000130 <.main>:
+ 130:	7c 08 02 a6 	mflr    r0
+ 134:	fb e1 ff f8 	std     r31,-8(r1)
+ 138:	f8 01 00 10 	std     r0,16(r1)
+ 13c:	f8 21 ff 81 	stdu    r1,-128(r1)
+ 140:	7c 3f 0b 78 	mr      r31,r1
+ 144:	38 60 00 00 	li      r3,0
+ 148:	38 80 00 01 	li      r4,1
+ 14c:	38 a0 00 02 	li      r5,2
+ 150:	38 c0 00 03 	li      r6,3
+ 154:	38 e0 00 04 	li      r7,4
+ 158:	39 00 00 05 	li      r8,5
+ 15c:	39 20 00 06 	li      r9,6
+ 160:	39 40 00 07 	li      r10,7
+ 164:	48 00 00 01 	bl      164 <.main+0x34>
+ 168:	38 00 00 00 	li      r0,0
+ 16c:	7c 03 03 78 	mr      r3,r0
+ 170:	e8 21 00 00 	ld      r1,0(r1)
+ 174:	e8 01 00 10 	ld      r0,16(r1)
+ 178:	7c 08 03 a6 	mtlr    r0
+ 17c:	eb e1 ff f8 	ld      r31,-8(r1)
+ 180:	4e 80 00 20 	blr
+ 184:	00 00 00 00 	.long 0x0
+ 188:	00 00 00 01 	.long 0x1
+ 18c:	80 01 00 01 	lwz     r0,1(r1)
+
+; vim: ft=asm
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/doc/disas_examples/sparc.sparc.disas	Fri Nov 22 23:08:59 2019 +0100
@@ -0,0 +1,374 @@
+; #include <stdlib.h>
+; 
+; void leaf_call(int b, int c, int d, int e, int f, int g, int h)
+; {
+; }
+; 
+; void nonleaf_call(int a, int b, int c, int d, int e, int f, int g, int h)
+; {
+;     /* use some local data */
+;     *(char*)alloca(220) = 'L';
+;     leaf_call(b, c, d, e, f, g, h);
+; }
+; 
+; int main()
+; {
+;     nonleaf_call(0, 1, 2, 3, 4, 5, 6, 7);
+;     return 0;
+; }
+
+
+
+; output from debian-4.0_r3-sparc w/ gcc 4.1.2
+
+00000000 <leaf_call>:
+   0:   9d e3 bf 98     save  %sp, -104, %sp
+   4:   f0 27 a0 44     st  %i0, [ %fp + 0x44 ]
+   8:   f2 27 a0 48     st  %i1, [ %fp + 0x48 ]
+   c:   f4 27 a0 4c     st  %i2, [ %fp + 0x4c ]
+  10:   f6 27 a0 50     st  %i3, [ %fp + 0x50 ]
+  14:   f8 27 a0 54     st  %i4, [ %fp + 0x54 ]
+  18:   fa 27 a0 58     st  %i5, [ %fp + 0x58 ]
+  1c:   81 e8 00 00     restore
+  20:   81 c3 e0 08     retl
+  24:   01 00 00 00     nop
+
+00000028 <nonleaf_call>:
+  28:   9d e3 bf 88     save  %sp, -120, %sp         ; prolog
+  2c:   f0 27 a0 44     st  %i0, [ %fp + 0x44 ]      ; |
+  30:   f2 27 a0 48     st  %i1, [ %fp + 0x48 ]      ; |
+  34:   f4 27 a0 4c     st  %i2, [ %fp + 0x4c ]      ; | write input to prev frame's spill area
+  38:   f6 27 a0 50     st  %i3, [ %fp + 0x50 ]      ; |   (e.g. offset = 68 for i0, jumping over i*/l* save area and aggregate return pointer)
+  3c:   f8 27 a0 54     st  %i4, [ %fp + 0x54 ]      ; |
+  40:   fa 27 a0 58     st  %i5, [ %fp + 0x58 ]      ; |
+  44:   9c 03 bf 20     add  %sp, -224, %sp          ; alloca(220) - with 4b padding (multiple of 8), and ...
+  48:   82 03 a0 64     add  %sp, 0x64, %g1          ; ... at least 100b at top of stack, via ...
+  4c:   c2 27 bf f4     st  %g1, [ %fp + -12 ]       ; ... local space (pointlessly) ...
+  50:   c4 07 bf f4     ld  [ %fp + -12 ], %g2       ; ... to g2
+  54:   82 00 a0 07     add  %g2, 7, %g1             ; |
+  58:   83 30 60 03     srl  %g1, 3, %g1             ; | 8b alignment of alloca()'d space pointed to by g1
+  5c:   83 28 60 03     sll  %g1, 3, %g1             ; |
+  60:   c2 27 bf f4     st  %g1, [ %fp + -12 ]       ; free g1 again by copy via temp space, ...
+  64:   c4 07 bf f4     ld  [ %fp + -12 ], %g2       ; ... to g2
+  68:   82 10 20 4c     mov  0x4c, %g1               ; 'L' -> g1, and ...
+  6c:   c2 28 80 00     stb  %g1, [ %g2 ]            ; ... store in aligned alloca()'d space
+  70:   c2 07 a0 60     ld  [ %fp + 0x60 ], %g1      ; arg 6 (fetched from prev frame's stack param area), and ...
+  74:   c2 23 a0 5c     st  %g1, [ %sp + 0x5c ]      ; ... "pushed" onto stack
+  78:   d0 07 a0 48     ld  [ %fp + 0x48 ], %o0      ; |
+  7c:   d2 07 a0 4c     ld  [ %fp + 0x4c ], %o1      ; |
+  80:   d4 07 a0 50     ld  [ %fp + 0x50 ], %o2      ; |
+  84:   d6 07 a0 54     ld  [ %fp + 0x54 ], %o3      ; | arg 0,1,2,3,4 (fetched from prev frame's spill area)
+  88:   d8 07 a0 58     ld  [ %fp + 0x58 ], %o4      ; |
+  8c:   da 07 a0 5c     ld  [ %fp + 0x5c ], %o5      ; arg 5 (fetched from prev frame's stack param area)
+  90:   40 00 00 00     call  90 <nonleaf_call+0x68> ; call leaf_call (objdump not from final link but .o)
+  94:   01 00 00 00     nop                          ; branch delay slot
+  98:   81 e8 00 00     restore                      ; |
+  9c:   81 c3 e0 08     retl                         ; | epilog
+  a0:   01 00 00 00     nop                          ; |            branch delay slot
+
+000000a4 <main>:
+  a4:   9d e3 bf 90     save  %sp, -112, %sp         ; prolog
+  a8:   82 10 20 06     mov  6, %g1                  ; arg 6, ...
+  ac:   c2 23 a0 5c     st  %g1, [ %sp + 0x5c ]      ; ... "pushed" onto stack
+  b0:   82 10 20 07     mov  7, %g1                  ; arg 7, ...
+  b4:   c2 23 a0 60     st  %g1, [ %sp + 0x60 ]      ; ... "pushed" onto stack
+  b8:   90 10 20 00     clr  %o0                     ; arg 0
+  bc:   92 10 20 01     mov  1, %o1                  ; arg 1
+  c0:   94 10 20 02     mov  2, %o2                  ; arg 2
+  c4:   96 10 20 03     mov  3, %o3                  ; arg 3
+  c8:   98 10 20 04     mov  4, %o4                  ; arg 4
+  cc:   9a 10 20 05     mov  5, %o5                  ; arg 5
+  d0:   40 00 00 00     call  d0 <main+0x2c>         ; call nonleaf_call (objdump not from final link but .o)
+  d4:   01 00 00 00     nop                          ; branch delay slot
+  d8:   82 10 20 00     clr  %g1     ! 0 <leaf_call> ; |
+  dc:   b0 10 00 01     mov  %g1, %i0                ; / return value
+  e0:   81 e8 00 00     restore                      ; \
+  e4:   81 c3 e0 08     retl                         ; | epilog
+  e8:   01 00 00 00     nop                          ; |            branch delay slot
+
+
+
+; output from netbsd-6.0-sparc w/ gcc 4.5.3
+
+00000000 <leaf_call>:
+   0:   9d e3 bf a0     save  %sp, -96, %sp
+   4:   f0 27 a0 44     st  %i0, [ %fp + 0x44 ]
+   8:   f2 27 a0 48     st  %i1, [ %fp + 0x48 ]
+   c:   f4 27 a0 4c     st  %i2, [ %fp + 0x4c ]
+  10:   f6 27 a0 50     st  %i3, [ %fp + 0x50 ]
+  14:   f8 27 a0 54     st  %i4, [ %fp + 0x54 ]
+  18:   fa 27 a0 58     st  %i5, [ %fp + 0x58 ]
+  1c:   81 e8 00 00     restore
+  20:   81 c3 e0 08     retl
+  24:   01 00 00 00     nop
+
+00000028 <nonleaf_call>:
+  28:   9d e3 bf 98     save  %sp, -104, %sp
+  2c:   f0 27 a0 44     st  %i0, [ %fp + 0x44 ]
+  30:   f2 27 a0 48     st  %i1, [ %fp + 0x48 ]
+  34:   f4 27 a0 4c     st  %i2, [ %fp + 0x4c ]
+  38:   f6 27 a0 50     st  %i3, [ %fp + 0x50 ]
+  3c:   f8 27 a0 54     st  %i4, [ %fp + 0x54 ]
+  40:   fa 27 a0 58     st  %i5, [ %fp + 0x58 ]
+  44:   9c 03 bf 20     add  %sp, -224, %sp
+  48:   82 03 a0 64     add  %sp, 0x64, %g1
+  4c:   82 00 60 07     add  %g1, 7, %g1
+  50:   83 30 60 03     srl  %g1, 3, %g1
+  54:   83 28 60 03     sll  %g1, 3, %g1
+  58:   84 10 20 4c     mov  0x4c, %g2
+  5c:   c4 28 40 00     stb  %g2, [ %g1 ]
+  60:   c2 07 a0 60     ld  [ %fp + 0x60 ], %g1
+  64:   c2 23 a0 5c     st  %g1, [ %sp + 0x5c ]
+  68:   d0 07 a0 48     ld  [ %fp + 0x48 ], %o0
+  6c:   d2 07 a0 4c     ld  [ %fp + 0x4c ], %o1
+  70:   d4 07 a0 50     ld  [ %fp + 0x50 ], %o2
+  74:   d6 07 a0 54     ld  [ %fp + 0x54 ], %o3
+  78:   d8 07 a0 58     ld  [ %fp + 0x58 ], %o4
+  7c:   da 07 a0 5c     ld  [ %fp + 0x5c ], %o5
+  80:   40 00 00 00     call  80 <nonleaf_call+0x58>
+  84:   01 00 00 00     nop
+  88:   81 e8 00 00     restore
+  8c:   81 c3 e0 08     retl
+  90:   01 00 00 00     nop
+
+00000094 <main>:
+  94:   9d e3 bf 98     save  %sp, -104, %sp
+  98:   82 10 20 06     mov  6, %g1
+  9c:   c2 23 a0 5c     st  %g1, [ %sp + 0x5c ]
+  a0:   82 10 20 07     mov  7, %g1
+  a4:   c2 23 a0 60     st  %g1, [ %sp + 0x60 ]
+  a8:   90 10 20 00     clr  %o0
+  ac:   92 10 20 01     mov  1, %o1
+  b0:   94 10 20 02     mov  2, %o2
+  b4:   96 10 20 03     mov  3, %o3
+  b8:   98 10 20 04     mov  4, %o4
+  bc:   9a 10 20 05     mov  5, %o5
+  c0:   40 00 00 00     call  c0 <main+0x2c>
+  c4:   01 00 00 00     nop
+  c8:   82 10 20 00     clr  %g1     ! 0 <leaf_call>
+  cc:   b0 10 00 01     mov  %g1, %i0
+  d0:   81 e8 00 00     restore
+  d4:   81 c3 e0 08     retl
+  d8:   01 00 00 00     nop
+
+
+
+; output from openbsd-5.8-sparc w/ gcc 4.2.1
+
+00000000 <leaf_call>:
+   0:   9d e3 bf 90     save  %sp, -112, %sp
+   4:   f0 27 a0 44     st  %i0, [ %fp + 0x44 ]
+   8:   f2 27 a0 48     st  %i1, [ %fp + 0x48 ]
+   c:   f4 27 a0 4c     st  %i2, [ %fp + 0x4c ]
+  10:   f6 27 a0 50     st  %i3, [ %fp + 0x50 ]
+  14:   f8 27 a0 54     st  %i4, [ %fp + 0x54 ]
+  18:   fa 27 a0 58     st  %i5, [ %fp + 0x58 ]
+  1c:   81 e8 00 00     restore
+  20:   81 c3 e0 08     retl
+  24:   01 00 00 00     nop
+  28:   ae 03 c0 17     add  %o7, %l7, %l7
+  2c:   81 c3 e0 08     retl
+  30:   01 00 00 00     nop
+
+00000034 <nonleaf_call>:
+  34:   9d e3 bf 80     save  %sp, -128, %sp
+  38:   2f 00 00 00     sethi  %hi(0), %l7
+  3c:   ae 05 e0 00     add  %l7, 0, %l7     ! 0 <leaf_call>
+  40:   7f ff ff fa     call  28 <leaf_call+0x28>
+  44:   01 00 00 00     nop
+  48:   f0 27 a0 44     st  %i0, [ %fp + 0x44 ]
+  4c:   f2 27 a0 48     st  %i1, [ %fp + 0x48 ]
+  50:   f4 27 a0 4c     st  %i2, [ %fp + 0x4c ]
+  54:   f6 27 a0 50     st  %i3, [ %fp + 0x50 ]
+  58:   f8 27 a0 54     st  %i4, [ %fp + 0x54 ]
+  5c:   fa 27 a0 58     st  %i5, [ %fp + 0x58 ]
+  60:   03 00 00 00     sethi  %hi(0), %g1
+  64:   82 10 60 00     mov  %g1, %g1   ! 0 <leaf_call>
+  68:   c2 05 c0 01     ld  [ %l7 + %g1 ], %g1
+  6c:   c4 00 40 00     ld  [ %g1 ], %g2
+  70:   c4 27 bf f4     st  %g2, [ %fp + -12 ]
+  74:   84 10 20 00     clr  %g2
+  78:   9c 03 bf 20     add  %sp, -224, %sp
+  7c:   86 03 a0 64     add  %sp, 0x64, %g3
+  80:   c6 27 bf ec     st  %g3, [ %fp + -20 ]
+  84:   c4 07 bf ec     ld  [ %fp + -20 ], %g2
+  88:   82 00 a0 07     add  %g2, 7, %g1
+  8c:   83 30 60 03     srl  %g1, 3, %g1
+  90:   83 28 60 03     sll  %g1, 3, %g1
+  94:   c2 27 bf ec     st  %g1, [ %fp + -20 ]
+  98:   c4 07 bf ec     ld  [ %fp + -20 ], %g2
+  9c:   82 10 20 4c     mov  0x4c, %g1
+  a0:   c2 28 80 00     stb  %g1, [ %g2 ]
+  a4:   c2 07 a0 60     ld  [ %fp + 0x60 ], %g1
+  a8:   c2 23 a0 5c     st  %g1, [ %sp + 0x5c ]
+  ac:   d0 07 a0 48     ld  [ %fp + 0x48 ], %o0
+  b0:   d2 07 a0 4c     ld  [ %fp + 0x4c ], %o1
+  b4:   d4 07 a0 50     ld  [ %fp + 0x50 ], %o2
+  b8:   d6 07 a0 54     ld  [ %fp + 0x54 ], %o3
+  bc:   d8 07 a0 58     ld  [ %fp + 0x58 ], %o4
+  c0:   da 07 a0 5c     ld  [ %fp + 0x5c ], %o5
+  c4:   40 00 00 00     call  c4 <nonleaf_call+0x90>
+  c8:   01 00 00 00     nop
+  cc:   03 00 00 00     sethi  %hi(0), %g1
+  d0:   82 10 60 00     mov  %g1, %g1   ! 0 <leaf_call>
+  d4:   c2 05 c0 01     ld  [ %l7 + %g1 ], %g1
+  d8:   c6 07 bf f4     ld  [ %fp + -12 ], %g3
+  dc:   c4 00 40 00     ld  [ %g1 ], %g2
+  e0:   86 98 c0 02     xorcc  %g3, %g2, %g3
+  e4:   84 10 20 00     clr  %g2
+  e8:   02 80 00 08     be  108 <nonleaf_call+0xd4>
+  ec:   01 00 00 00     nop
+  f0:   03 00 00 00     sethi  %hi(0), %g1
+  f4:   82 10 60 00     mov  %g1, %g1   ! 0 <leaf_call>
+  f8:   c2 05 c0 01     ld  [ %l7 + %g1 ], %g1
+  fc:   90 10 00 01     mov  %g1, %o0
+ 100:   40 00 00 00     call  100 <nonleaf_call+0xcc>
+ 104:   01 00 00 00     nop
+ 108:   81 e8 00 00     restore
+ 10c:   81 c3 e0 08     retl
+ 110:   01 00 00 00     nop
+
+00000114 <main>:
+ 114:   9d e3 bf 88     save  %sp, -120, %sp
+ 118:   82 10 20 06     mov  6, %g1
+ 11c:   c2 23 a0 5c     st  %g1, [ %sp + 0x5c ]
+ 120:   82 10 20 07     mov  7, %g1
+ 124:   c2 23 a0 60     st  %g1, [ %sp + 0x60 ]
+ 128:   90 10 20 00     clr  %o0
+ 12c:   92 10 20 01     mov  1, %o1
+ 130:   94 10 20 02     mov  2, %o2
+ 134:   96 10 20 03     mov  3, %o3
+ 138:   98 10 20 04     mov  4, %o4
+ 13c:   9a 10 20 05     mov  5, %o5
+ 140:   40 00 00 00     call  140 <main+0x2c>
+ 144:   01 00 00 00     nop
+ 148:   82 10 20 00     clr  %g1     ! 0 <leaf_call>
+ 14c:   b0 10 00 01     mov  %g1, %i0
+ 150:   81 e8 00 00     restore
+ 154:   81 c3 e0 08     retl
+ 158:   01 00 00 00     nop
+
+
+
+; --------------------- with float params and aggregate return value ------------------->
+
+; #include <stdlib.h>
+; 
+; void leaf_call(int b, float c, int d, float e, int f, int g, int h)
+; {
+; }
+; 
+; struct aggr { int x; int y; int z; };
+; 
+; struct aggr nonleaf_call(int a, int b, float c, int d, float e, int f, int g, int h)
+; {
+;     struct aggr st = { b, d, f };
+;     /* use some local data */
+;     *(char*)alloca(220) = 'L';
+;     leaf_call(b, c, d, e, f, g, h);
+; 
+;     return st;
+; }
+; 
+; int main()
+; {
+;     struct aggr st = nonleaf_call(0, 1, 2.f, 3, 4.f, 5, 6, 7);
+;     return 0;
+; }
+
+
+
+; output from debian-4.0_r3-sparc w/ gcc 4.1.2
+
+00000000 <leaf_call>:
+   0:   9d e3 bf 98     save  %sp, -104, %sp
+   4:   f0 27 a0 44     st  %i0, [ %fp + 0x44 ]
+   8:   f2 27 a0 48     st  %i1, [ %fp + 0x48 ]
+   c:   f4 27 a0 4c     st  %i2, [ %fp + 0x4c ]
+  10:   f6 27 a0 50     st  %i3, [ %fp + 0x50 ]
+  14:   f8 27 a0 54     st  %i4, [ %fp + 0x54 ]
+  18:   fa 27 a0 58     st  %i5, [ %fp + 0x58 ]
+  1c:   81 e8 00 00     restore
+  20:   81 c3 e0 08     retl
+  24:   01 00 00 00     nop
+
+00000028 <nonleaf_call>:
+  28:   9d e3 bf 78     save  %sp, -136, %sp            ; prolog
+  2c:   e0 07 a0 40     ld  [ %fp + 0x40 ], %l0         ; pointer to struct to return -> l0
+  30:   f0 27 a0 44     st  %i0, [ %fp + 0x44 ]         ; |
+  34:   f2 27 a0 48     st  %i1, [ %fp + 0x48 ]         ; |
+  38:   f4 27 a0 4c     st  %i2, [ %fp + 0x4c ]         ; |
+  3c:   f6 27 a0 50     st  %i3, [ %fp + 0x50 ]         ; | write input to prev frame's spill area
+  40:   f8 27 a0 54     st  %i4, [ %fp + 0x54 ]         ; |
+  44:   fa 27 a0 58     st  %i5, [ %fp + 0x58 ]         ; /
+  48:   c2 07 a0 48     ld  [ %fp + 0x48 ], %g1         ; \
+  4c:   c2 27 bf ec     st  %g1, [ %fp + -20 ]          ; |
+  50:   c2 07 a0 50     ld  [ %fp + 0x50 ], %g1         ; | in arg 1,3,5 (the ints to be returned in struct), ...
+  54:   c2 27 bf f0     st  %g1, [ %fp + -16 ]          ; | ... copied to temp space in local area for later use
+  58:   c2 07 a0 58     ld  [ %fp + 0x58 ], %g1         ; |
+  5c:   c2 27 bf f4     st  %g1, [ %fp + -12 ]          ; |
+  60:   9c 03 bf 20     add  %sp, -224, %sp             ; alloca(220) - with 4b padding (multiple of 8), and ...
+  64:   82 03 a0 64     add  %sp, 0x64, %g1             ; ... at least 100b at top of stack, via ...
+  68:   c2 27 bf e4     st  %g1, [ %fp + -28 ]          ; ... local space (pointlessly) ...
+  6c:   c4 07 bf e4     ld  [ %fp + -28 ], %g2          ; ... to g2
+  70:   82 00 a0 07     add  %g2, 7, %g1                ; |
+  74:   83 30 60 03     srl  %g1, 3, %g1                ; | 8b alignment of alloca()'d space pointed to by g1
+  78:   83 28 60 03     sll  %g1, 3, %g1                ; |
+  7c:   c2 27 bf e4     st  %g1, [ %fp + -28 ]          ; free g1 again by copy via temp space, ...
+  80:   c4 07 bf e4     ld  [ %fp + -28 ], %g2          ; ... to g2
+  84:   82 10 20 4c     mov  0x4c, %g1                  ; 'L' -> g1, and ...
+  88:   c2 28 80 00     stb  %g1, [ %g2 ]               ; ... store in aligned alloca()'d space
+  8c:   c2 07 a0 60     ld  [ %fp + 0x60 ], %g1         ; arg 6 (fetched from prev frame's stack param area), and ...
+  90:   c2 23 a0 5c     st  %g1, [ %sp + 0x5c ]         ; ... "pushed" onto stack
+  94:   d0 07 a0 48     ld  [ %fp + 0x48 ], %o0         ; |
+  98:   d2 07 a0 4c     ld  [ %fp + 0x4c ], %o1         ; |
+  9c:   d4 07 a0 50     ld  [ %fp + 0x50 ], %o2         ; | arg 0,1,2,3,4 (fetched from prev frame's spill area)
+  a0:   d6 07 a0 54     ld  [ %fp + 0x54 ], %o3         ; |
+  a4:   d8 07 a0 58     ld  [ %fp + 0x58 ], %o4         ; |
+  a8:   da 07 a0 5c     ld  [ %fp + 0x5c ], %o5         ; arg 5 (fetched from prev frame's stack param area)
+  ac:   40 00 00 00     call  ac <nonleaf_call+0x84>    ; call leaf_call (objdump not from final link but .o)
+  b0:   01 00 00 00     nop                             ; branch delay slot
+  b4:   c2 07 bf ec     ld  [ %fp + -20 ], %g1          ; |
+  b8:   c2 24 00 00     st  %g1, [ %l0 ]                ; |
+  bc:   c2 07 bf f0     ld  [ %fp + -16 ], %g1          ; |
+  c0:   c2 24 20 04     st  %g1, [ %l0 + 4 ]            ; | store struct elements
+  c4:   c2 07 bf f4     ld  [ %fp + -12 ], %g1          ; |
+  c8:   c2 24 20 08     st  %g1, [ %l0 + 8 ]            ; |
+  cc:   b0 10 00 10     mov  %l0, %i0                   ; return value (pointer to struct)
+  d0:   81 e8 00 00     restore                         ; |
+  d4:   81 c3 e0 0c     jmp  %o7 + 0xc                  ; | epilog
+  d8:   01 00 00 00     nop                             ; |            branch delay slot
+
+000000dc <main>:
+  dc:   9d e3 bf 80     save  %sp, -128, %sp            ; prolog
+  e0:   03 00 00 00     sethi  %hi(0), %g1              ; |
+  e4:   82 10 60 00     mov  %g1, %g1   ! 0 <leaf_call> ; | prep arg 2, load from static data into f8 (addr = 0 b/c objdumped .o, not final linked)
+  e8:   d1 00 40 00     ld  [ %g1 ], %f8                ; /
+  ec:   03 00 00 00     sethi  %hi(0), %g1              ; \
+  f0:   82 10 60 00     mov  %g1, %g1   ! 0 <leaf_call> ; | prep arg 4, load from static data into f9 (addr = 0 b/c objdumped .o, not final linked)
+  f4:   d3 00 40 00     ld  [ %g1 ], %f9                ; |
+  f8:   82 10 20 06     mov  6, %g1                     ; arg 6, ...
+  fc:   c2 23 a0 5c     st  %g1, [ %sp + 0x5c ]         ; ... "pushed" onto stack
+ 100:   82 10 20 07     mov  7, %g1                     ; arg 7, ...
+ 104:   c2 23 a0 60     st  %g1, [ %sp + 0x60 ]         ; ... "pushed" onto stack
+ 108:   82 07 bf ec     add  %fp, -20, %g1              ; store pointer to some frame local data between ...
+ 10c:   c2 23 a0 40     st  %g1, [ %sp + 0x40 ]         ; ... spill and i*/l* save area to be used for struct return value
+ 110:   90 10 20 00     clr  %o0                        ; arg 0
+ 114:   92 10 20 01     mov  1, %o1                     ; arg 1
+ 118:   d1 27 bf f8     st  %f8, [ %fp + -8 ]           ; | arg 2, from f8 via temp space ...
+ 11c:   d4 07 bf f8     ld  [ %fp + -8 ], %o2           ; | ... to o2
+ 120:   96 10 20 03     mov  3, %o3                     ; arg 3
+ 124:   d3 27 bf f8     st  %f9, [ %fp + -8 ]           ; | arg 4, from f9 via temp space ...
+ 128:   d8 07 bf f8     ld  [ %fp + -8 ], %o4           ; | ... to o4
+ 12c:   9a 10 20 05     mov  5, %o5                     ; arg 5
+ 130:   40 00 00 00     call  130 <main+0x54>           ; call nonleaf_call (objdump not from final link but .o)
+ 134:   01 00 00 00     nop                             ; branch delay slot
+ 138:   00 00 00 0c     unimp  0xc                      ; sparc convention for returned aggregates: use unimp with field (here 0xc) specifiying size of returned struct (see sparc manual explanation)
+ 13c:   82 10 20 00     clr  %g1                        ; |
+ 140:   b0 10 00 01     mov  %g1, %i0                   ; / return value
+ 144:   81 e8 00 00     restore                         ; \
+ 148:   81 c3 e0 08     retl                            ; | epilog
+ 14c:   01 00 00 00     nop                             ; |            branch delay slot
+
+; vim: ft=asm
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/doc/disas_examples/sparc64.sparc64.disas	Fri Nov 22 23:08:59 2019 +0100
@@ -0,0 +1,558 @@
+; #include <stdlib.h>
+; 
+; void leaf_call(int b, int c, int d, int e, int f, int g, int h)
+; {
+; }
+; 
+; void nonleaf_call(int a, int b, int c, int d, int e, int f, int g, int h)
+; {
+;     /* use some local data */
+;     *(char*)alloca(220) = 'L';
+;     leaf_call(b, c, d, e, f, g, h);
+; }
+; 
+; int main()
+; {
+;     nonleaf_call(0, 1, 2, 3, 4, 5, 6, 7);
+;     return 0;
+; }
+
+
+
+; output from debian-9.0-sparc64 w/ gcc 6.1.1
+
+0000000000000000 <leaf_call>:
+   0:   9d e3 bf 50     save  %sp, -176, %sp
+   4:   8a 10 00 19     mov  %i1, %g5
+   8:   88 10 00 1a     mov  %i2, %g4
+   c:   86 10 00 1b     mov  %i3, %g3
+  10:   84 10 00 1c     mov  %i4, %g2
+  14:   82 10 00 1d     mov  %i5, %g1
+  18:   f0 27 a8 7f     st  %i0, [ %fp + 0x87f ]
+  1c:   ca 27 a8 87     st  %g5, [ %fp + 0x887 ]
+  20:   c8 27 a8 8f     st  %g4, [ %fp + 0x88f ]
+  24:   c6 27 a8 97     st  %g3, [ %fp + 0x897 ]
+  28:   c4 27 a8 9f     st  %g2, [ %fp + 0x89f ]
+  2c:   c2 27 a8 a7     st  %g1, [ %fp + 0x8a7 ]
+  30:   01 00 00 00     nop
+  34:   81 cf e0 08     rett  %i7 + 8
+  38:   01 00 00 00     nop
+
+000000000000003c <nonleaf_call>:
+  3c:   9d e3 bf 40     save  %sp, -192, %sp         ; prolog
+  40:   8a 10 00 19     mov  %i1, %g5                ; |
+  44:   88 10 00 1a     mov  %i2, %g4                ; |
+  48:   86 10 00 1b     mov  %i3, %g3                ; |
+  4c:   84 10 00 1c     mov  %i4, %g2                ; |
+  50:   82 10 00 1d     mov  %i5, %g1                ; |
+  54:   f0 27 a8 7f     st  %i0, [ %fp + 0x87f ]     ; | write input to prev frame's spill area (e.g. offset = 128 for i0, jumping over i*/l* save area)
+  58:   ca 27 a8 87     st  %g5, [ %fp + 0x887 ]     ; | (pointlessly using an extra reg copy to g* for most)
+  5c:   c8 27 a8 8f     st  %g4, [ %fp + 0x88f ]     ; | 
+  60:   c6 27 a8 97     st  %g3, [ %fp + 0x897 ]     ; |
+  64:   c4 27 a8 9f     st  %g2, [ %fp + 0x89f ]     ; |
+  68:   c2 27 a8 a7     st  %g1, [ %fp + 0x8a7 ]     ; |
+  6c:   9c 03 bf 10     add  %sp, -240, %sp          ; alloca(220) - with padding, and ...
+  70:   82 03 a8 bf     add  %sp, 0x8bf, %g1         ; ... at least 192b at top of stack
+  74:   82 00 60 0f     add  %g1, 0xf, %g1           ; |
+  78:   83 30 70 04     srlx  %g1, 4, %g1            ; |
+  7c:   83 28 70 04     sllx  %g1, 4, %g1            ; | 16b alignment of alloca()'d space pointed to by g2
+  80:   84 10 00 01     mov  %g1, %g2                ; |
+  84:   82 10 20 4c     mov  0x4c, %g1               ; 'L' -> g1, and ...
+  88:   c2 28 80 00     stb  %g1, [ %g2 ]            ; ... store in aligned alloca()'d space
+  8c:   c2 07 a8 b3     ld  [ %fp + 0x8b3 ], %g1     ; arg 5 (fetched from prev frame's stack param area), ...
+  90:   b9 38 60 00     sra  %g1, 0, %i4             ; ... -> i4
+  94:   c2 07 a8 a7     ld  [ %fp + 0x8a7 ], %g1     ; arg 4 (fetched from prev frame's spill area), ...
+  98:   bb 38 60 00     sra  %g1, 0, %i5             ; ... -> i5
+  9c:   c2 07 a8 9f     ld  [ %fp + 0x89f ], %g1     ; arg 3 (fetched from prev frame's spill area), ...
+  a0:   8b 38 60 00     sra  %g1, 0, %g5             ; ... -> g5
+  a4:   c2 07 a8 97     ld  [ %fp + 0x897 ], %g1     ; arg 2 (fetched from prev frame's spill area), ...
+  a8:   89 38 60 00     sra  %g1, 0, %g4             ; ... -> g4
+  ac:   c2 07 a8 8f     ld  [ %fp + 0x88f ], %g1     ; arg 1 (fetched from prev frame's spill area), ...
+  b0:   87 38 60 00     sra  %g1, 0, %g3             ; ... -> g3
+  b4:   c2 07 a8 87     ld  [ %fp + 0x887 ], %g1     ; arg 0 (fetched from prev frame's spill area), ...
+  b8:   85 38 60 00     sra  %g1, 0, %g2             ; ... -> g2
+  bc:   c2 07 a8 bb     ld  [ %fp + 0x8bb ], %g1     ; arg 6 (fetched from prev frame's stack param area), ...
+  c0:   83 38 60 00     sra  %g1, 0, %g1             ; ... -> g1, and ...
+  c4:   c2 73 a8 af     stx  %g1, [ %sp + 0x8af ]    ; ... "pushed" onto stack
+  c8:   9a 10 00 1c     mov  %i4, %o5                ; |
+  cc:   98 10 00 1d     mov  %i5, %o4                ; |
+  d0:   96 10 00 05     mov  %g5, %o3                ; |
+  d4:   94 10 00 04     mov  %g4, %o2                ; | arg 0,1,2,3,4 (fetched from prev frame's spill area)
+  d8:   92 10 00 03     mov  %g3, %o1                ; |
+  dc:   90 10 00 02     mov  %g2, %o0                ; |
+  e0:   40 00 00 00     call  e0 <nonleaf_call+0xa4> ; call leaf_call (objdump not from final link but .o)
+  e4:   01 00 00 00     nop                          ; branch delay slot
+  e8:   01 00 00 00     nop                          ;
+  ec:   81 cf e0 08     rett  %i7 + 8                ; | epilog
+  f0:   01 00 00 00     nop                          ; |            branch delay slot
+
+00000000000000f4 <main>:
+  f4:   9d e3 bf 40     save  %sp, -192, %sp         ; prolog
+  f8:   82 10 20 07     mov  7, %g1                  ; arg 7, ...
+  fc:   c2 73 a8 b7     stx  %g1, [ %sp + 0x8b7 ]    ; ... "pushed" onto stack
+ 100:   82 10 20 06     mov  6, %g1                  ; arg 6, ...
+ 104:   c2 73 a8 af     stx  %g1, [ %sp + 0x8af ]    ; ... "pushed" onto stack
+ 108:   9a 10 20 05     mov  5, %o5                  ; arg 5
+ 10c:   98 10 20 04     mov  4, %o4                  ; arg 4
+ 110:   96 10 20 03     mov  3, %o3                  ; arg 3
+ 114:   94 10 20 02     mov  2, %o2                  ; arg 2
+ 118:   92 10 20 01     mov  1, %o1                  ; arg 1
+ 11c:   90 10 20 00     clr  %o0                     ; arg 0
+ 120:   40 00 00 00     call  120 <main+0x2c>        ; call nonleaf_call (objdump not from final link but .o)
+ 124:   01 00 00 00     nop                          ; branch delay slot
+ 128:   82 10 20 00     clr  %g1     ! 0 <leaf_call> ; |
+ 12c:   83 38 60 00     sra  %g1, 0, %g1             ; | return value
+ 130:   b0 10 00 01     mov  %g1, %i0                ; /
+ 134:   81 cf e0 08     rett  %i7 + 8                ; \ epilog
+ 138:   01 00 00 00     nop                          ; |            branch delay slot
+
+
+
+; output from freebsd-11.0-sparc64 w/ gcc 4.2.1
+
+0000000000000000 <leaf_call>:
+   0:   9d e3 bf 40     save  %sp, -192, %sp
+   4:   82 10 00 18     mov  %i0, %g1
+   8:   84 10 00 19     mov  %i1, %g2
+   c:   86 10 00 1a     mov  %i2, %g3
+  10:   88 10 00 1b     mov  %i3, %g4
+  14:   8a 10 00 1c     mov  %i4, %g5
+  18:   c2 27 a8 7f     st  %g1, [ %fp + 0x87f ]
+  1c:   c4 27 a8 87     st  %g2, [ %fp + 0x887 ]
+  20:   c6 27 a8 8f     st  %g3, [ %fp + 0x88f ]
+  24:   c8 27 a8 97     st  %g4, [ %fp + 0x897 ]
+  28:   ca 27 a8 9f     st  %g5, [ %fp + 0x89f ]
+  2c:   fa 27 a8 a7     st  %i5, [ %fp + 0x8a7 ]
+  30:   81 cf e0 08     rett  %i7 + 8
+  34:   01 00 00 00     nop
+  38:   01 00 00 00     nop
+  3c:   01 00 00 00     nop
+
+0000000000000040 <nonleaf_call>:
+  40:   9d e3 bf 20     save  %sp, -224, %sp
+  44:   82 10 00 18     mov  %i0, %g1
+  48:   84 10 00 19     mov  %i1, %g2
+  4c:   86 10 00 1a     mov  %i2, %g3
+  50:   88 10 00 1b     mov  %i3, %g4
+  54:   8a 10 00 1c     mov  %i4, %g5
+  58:   9a 10 00 1d     mov  %i5, %o5
+  5c:   c2 27 a8 7f     st  %g1, [ %fp + 0x87f ]
+  60:   c4 27 a8 87     st  %g2, [ %fp + 0x887 ]
+  64:   c6 27 a8 8f     st  %g3, [ %fp + 0x88f ]
+  68:   c8 27 a8 97     st  %g4, [ %fp + 0x897 ]
+  6c:   ca 27 a8 9f     st  %g5, [ %fp + 0x89f ]
+  70:   da 27 a8 a7     st  %o5, [ %fp + 0x8a7 ]
+  74:   9c 03 bf 20     add  %sp, -224, %sp
+  78:   82 03 a8 bf     add  %sp, 0x8bf, %g1
+  7c:   c2 77 a7 e7     stx  %g1, [ %fp + 0x7e7 ]
+  80:   c4 5f a7 e7     ldx  [ %fp + 0x7e7 ], %g2
+  84:   82 00 a0 0f     add  %g2, 0xf, %g1
+  88:   83 30 70 04     srlx  %g1, 4, %g1
+  8c:   83 28 70 04     sllx  %g1, 4, %g1
+  90:   c2 77 a7 e7     stx  %g1, [ %fp + 0x7e7 ]
+  94:   c4 5f a7 e7     ldx  [ %fp + 0x7e7 ], %g2
+  98:   82 10 20 4c     mov  0x4c, %g1
+  9c:   c2 28 80 00     stb  %g1, [ %g2 ]
+  a0:   c2 07 a8 87     ld  [ %fp + 0x887 ], %g1
+  a4:   89 38 60 00     sra  %g1, 0, %g4
+  a8:   c2 07 a8 8f     ld  [ %fp + 0x88f ], %g1
+  ac:   8b 38 60 00     sra  %g1, 0, %g5
+  b0:   c2 07 a8 97     ld  [ %fp + 0x897 ], %g1
+  b4:   9b 38 60 00     sra  %g1, 0, %o5
+  b8:   c2 07 a8 9f     ld  [ %fp + 0x89f ], %g1
+  bc:   99 38 60 00     sra  %g1, 0, %o4
+  c0:   c2 07 a8 a7     ld  [ %fp + 0x8a7 ], %g1
+  c4:   85 38 60 00     sra  %g1, 0, %g2
+  c8:   c2 07 a8 b3     ld  [ %fp + 0x8b3 ], %g1
+  cc:   87 38 60 00     sra  %g1, 0, %g3
+  d0:   c2 07 a8 bb     ld  [ %fp + 0x8bb ], %g1
+  d4:   83 38 60 00     sra  %g1, 0, %g1
+  d8:   c2 73 a8 af     stx  %g1, [ %sp + 0x8af ]
+  dc:   90 10 00 04     mov  %g4, %o0
+  e0:   92 10 00 05     mov  %g5, %o1
+  e4:   94 10 00 0d     mov  %o5, %o2
+  e8:   96 10 00 0c     mov  %o4, %o3
+  ec:   98 10 00 02     mov  %g2, %o4
+  f0:   9a 10 00 03     mov  %g3, %o5
+  f4:   40 00 00 00     call  f4 <nonleaf_call+0xb4>
+  f8:   01 00 00 00     nop
+  fc:   81 cf e0 08     rett  %i7 + 8
+ 100:   01 00 00 00     nop
+ 104:   30 68 00 07     b,a   %xcc, 120 <main>
+ 108:   01 00 00 00     nop
+ 10c:   01 00 00 00     nop
+ 110:   01 00 00 00     nop
+ 114:   01 00 00 00     nop
+ 118:   01 00 00 00     nop
+ 11c:   01 00 00 00     nop
+
+0000000000000120 <main>:
+ 120:   9d e3 bf 30     save  %sp, -208, %sp
+ 124:   82 10 20 06     mov  6, %g1
+ 128:   c2 73 a8 af     stx  %g1, [ %sp + 0x8af ]
+ 12c:   82 10 20 07     mov  7, %g1
+ 130:   c2 73 a8 b7     stx  %g1, [ %sp + 0x8b7 ]
+ 134:   90 10 20 00     clr  %o0
+ 138:   92 10 20 01     mov  1, %o1
+ 13c:   94 10 20 02     mov  2, %o2
+ 140:   96 10 20 03     mov  3, %o3
+ 144:   98 10 20 04     mov  4, %o4
+ 148:   9a 10 20 05     mov  5, %o5
+ 14c:   40 00 00 00     call  14c <main+0x2c>
+ 150:   01 00 00 00     nop
+ 154:   82 10 20 00     clr  %g1     ! 0 <leaf_call>
+ 158:   83 38 60 00     sra  %g1, 0, %g1
+ 15c:   b0 10 00 01     mov  %g1, %i0
+ 160:   81 cf e0 08     rett  %i7 + 8
+ 164:   01 00 00 00     nop
+ 168:   30 68 00 06     b,a   %xcc, 180 <main+0x60>
+ 16c:   01 00 00 00     nop
+ 170:   01 00 00 00     nop
+ 174:   01 00 00 00     nop
+ 178:   01 00 00 00     nop
+ 17c:   01 00 00 00     nop
+
+
+
+; output from netbsd-7.1-sparc64 w/ gcc 4.8.5
+
+0000000000000000 <leaf_call>:
+   0:   9d e3 bf 50     save  %sp, -176, %sp
+   4:   8a 10 00 19     mov  %i1, %g5
+   8:   88 10 00 1a     mov  %i2, %g4
+   c:   86 10 00 1b     mov  %i3, %g3
+  10:   84 10 00 1c     mov  %i4, %g2
+  14:   82 10 00 1d     mov  %i5, %g1
+  18:   f0 27 a8 7f     st  %i0, [ %fp + 0x87f ]
+  1c:   ca 27 a8 87     st  %g5, [ %fp + 0x887 ]
+  20:   c8 27 a8 8f     st  %g4, [ %fp + 0x88f ]
+  24:   c6 27 a8 97     st  %g3, [ %fp + 0x897 ]
+  28:   c4 27 a8 9f     st  %g2, [ %fp + 0x89f ]
+  2c:   c2 27 a8 a7     st  %g1, [ %fp + 0x8a7 ]
+  30:   81 cf e0 08     rett  %i7 + 8
+  34:   01 00 00 00     nop
+
+0000000000000038 <nonleaf_call>:
+  38:   9d e3 bf 40     save  %sp, -192, %sp
+  3c:   8a 10 00 19     mov  %i1, %g5
+  40:   88 10 00 1a     mov  %i2, %g4
+  44:   86 10 00 1b     mov  %i3, %g3
+  48:   84 10 00 1c     mov  %i4, %g2
+  4c:   82 10 00 1d     mov  %i5, %g1
+  50:   f0 27 a8 7f     st  %i0, [ %fp + 0x87f ]
+  54:   ca 27 a8 87     st  %g5, [ %fp + 0x887 ]
+  58:   c8 27 a8 8f     st  %g4, [ %fp + 0x88f ]
+  5c:   c6 27 a8 97     st  %g3, [ %fp + 0x897 ]
+  60:   c4 27 a8 9f     st  %g2, [ %fp + 0x89f ]
+  64:   c2 27 a8 a7     st  %g1, [ %fp + 0x8a7 ]
+  68:   9c 03 bf 10     add  %sp, -240, %sp
+  6c:   82 03 a8 bf     add  %sp, 0x8bf, %g1
+  70:   82 00 60 0f     add  %g1, 0xf, %g1
+  74:   83 30 70 04     srlx  %g1, 4, %g1
+  78:   83 28 70 04     sllx  %g1, 4, %g1
+  7c:   84 10 20 4c     mov  0x4c, %g2
+  80:   c4 28 40 00     stb  %g2, [ %g1 ]
+  84:   c2 07 a8 87     ld  [ %fp + 0x887 ], %g1
+  88:   bb 38 60 00     sra  %g1, 0, %i5
+  8c:   c2 07 a8 8f     ld  [ %fp + 0x88f ], %g1
+  90:   8b 38 60 00     sra  %g1, 0, %g5
+  94:   c2 07 a8 97     ld  [ %fp + 0x897 ], %g1
+  98:   89 38 60 00     sra  %g1, 0, %g4
+  9c:   c2 07 a8 9f     ld  [ %fp + 0x89f ], %g1
+  a0:   87 38 60 00     sra  %g1, 0, %g3
+  a4:   c2 07 a8 a7     ld  [ %fp + 0x8a7 ], %g1
+  a8:   85 38 60 00     sra  %g1, 0, %g2
+  ac:   c2 07 a8 b3     ld  [ %fp + 0x8b3 ], %g1
+  b0:   83 38 60 00     sra  %g1, 0, %g1
+  b4:   f8 07 a8 bb     ld  [ %fp + 0x8bb ], %i4
+  b8:   b9 3f 20 00     sra  %i4, 0, %i4
+  bc:   f8 73 a8 af     stx  %i4, [ %sp + 0x8af ]
+  c0:   90 10 00 1d     mov  %i5, %o0
+  c4:   92 10 00 05     mov  %g5, %o1
+  c8:   94 10 00 04     mov  %g4, %o2
+  cc:   96 10 00 03     mov  %g3, %o3
+  d0:   98 10 00 02     mov  %g2, %o4
+  d4:   9a 10 00 01     mov  %g1, %o5
+  d8:   40 00 00 00     call  d8 <nonleaf_call+0xa0>
+  dc:   01 00 00 00     nop
+  e0:   81 cf e0 08     rett  %i7 + 8
+  e4:   01 00 00 00     nop
+
+00000000000000e8 <main>:
+  e8:   9d e3 bf 40     save  %sp, -192, %sp
+  ec:   82 10 20 06     mov  6, %g1
+  f0:   c2 73 a8 af     stx  %g1, [ %sp + 0x8af ]
+  f4:   82 10 20 07     mov  7, %g1
+  f8:   c2 73 a8 b7     stx  %g1, [ %sp + 0x8b7 ]
+  fc:   90 10 20 00     clr  %o0
+ 100:   92 10 20 01     mov  1, %o1
+ 104:   94 10 20 02     mov  2, %o2
+ 108:   96 10 20 03     mov  3, %o3
+ 10c:   98 10 20 04     mov  4, %o4
+ 110:   9a 10 20 05     mov  5, %o5
+ 114:   40 00 00 00     call  114 <main+0x2c>
+ 118:   01 00 00 00     nop
+ 11c:   82 10 20 00     clr  %g1     ! 0 <leaf_call>
+ 120:   83 38 60 00     sra  %g1, 0, %g1
+ 124:   b0 10 00 01     mov  %g1, %i0
+ 128:   81 cf e0 08     rett  %i7 + 8
+ 12c:   01 00 00 00     nop
+
+
+
+; output from openbsd-7.1-sparc64 w/ gcc 4.2.1
+
+0000000000000000 <leaf_call>:
+   0:   9d e3 bf 30     save  %sp, -208, %sp
+   4:   82 10 00 18     mov  %i0, %g1
+   8:   84 10 00 19     mov  %i1, %g2
+   c:   86 10 00 1a     mov  %i2, %g3
+  10:   88 10 00 1b     mov  %i3, %g4
+  14:   8a 10 00 1c     mov  %i4, %g5
+  18:   c2 27 a8 7f     st  %g1, [ %fp + 0x87f ]
+  1c:   c4 27 a8 87     st  %g2, [ %fp + 0x887 ]
+  20:   c6 27 a8 8f     st  %g3, [ %fp + 0x88f ]
+  24:   c8 27 a8 97     st  %g4, [ %fp + 0x897 ]
+  28:   ca 27 a8 9f     st  %g5, [ %fp + 0x89f ]
+  2c:   fa 27 a8 a7     st  %i5, [ %fp + 0x8a7 ]
+  30:   81 cf e0 08     rett  %i7 + 8
+  34:   01 00 00 00     nop
+  38:   ae 03 c0 17     add  %o7, %l7, %l7
+  3c:   81 c3 e0 08     retl
+  40:   01 00 00 00     nop
+
+0000000000000044 <nonleaf_call>:
+  44:   9d e3 bf 10     save  %sp, -240, %sp
+  48:   2f 00 00 00     sethi  %hi(0), %l7
+  4c:   ae 05 e0 00     add  %l7, 0, %l7     ! 0 <leaf_call>
+  50:   7f ff ff fa     call  38 <leaf_call+0x38>
+  54:   01 00 00 00     nop
+  58:   82 10 00 18     mov  %i0, %g1
+  5c:   84 10 00 19     mov  %i1, %g2
+  60:   86 10 00 1a     mov  %i2, %g3
+  64:   88 10 00 1b     mov  %i3, %g4
+  68:   8a 10 00 1c     mov  %i4, %g5
+  6c:   9a 10 00 1d     mov  %i5, %o5
+  70:   c2 27 a8 7f     st  %g1, [ %fp + 0x87f ]
+  74:   c4 27 a8 87     st  %g2, [ %fp + 0x887 ]
+  78:   c6 27 a8 8f     st  %g3, [ %fp + 0x88f ]
+  7c:   c8 27 a8 97     st  %g4, [ %fp + 0x897 ]
+  80:   ca 27 a8 9f     st  %g5, [ %fp + 0x89f ]
+  84:   da 27 a8 a7     st  %o5, [ %fp + 0x8a7 ]
+  88:   03 00 00 00     sethi  %hi(0), %g1
+  8c:   82 10 60 00     mov  %g1, %g1   ! 0 <leaf_call>
+  90:   c2 5d c0 01     ldx  [ %l7 + %g1 ], %g1
+  94:   c4 58 40 00     ldx  [ %g1 ], %g2
+  98:   c4 77 a7 e7     stx  %g2, [ %fp + 0x7e7 ]
+  9c:   84 10 20 00     clr  %g2
+  a0:   9c 03 bf 20     add  %sp, -224, %sp
+  a4:   86 03 a8 bf     add  %sp, 0x8bf, %g3
+  a8:   c6 77 a7 d7     stx  %g3, [ %fp + 0x7d7 ]
+  ac:   c4 5f a7 d7     ldx  [ %fp + 0x7d7 ], %g2
+  b0:   82 00 a0 0f     add  %g2, 0xf, %g1
+  b4:   83 30 70 04     srlx  %g1, 4, %g1
+  b8:   83 28 70 04     sllx  %g1, 4, %g1
+  bc:   c2 77 a7 d7     stx  %g1, [ %fp + 0x7d7 ]
+  c0:   c4 5f a7 d7     ldx  [ %fp + 0x7d7 ], %g2
+  c4:   82 10 20 4c     mov  0x4c, %g1
+  c8:   c2 28 80 00     stb  %g1, [ %g2 ]
+  cc:   c2 07 a8 87     ld  [ %fp + 0x887 ], %g1
+  d0:   89 38 60 00     sra  %g1, 0, %g4
+  d4:   c2 07 a8 8f     ld  [ %fp + 0x88f ], %g1
+  d8:   8b 38 60 00     sra  %g1, 0, %g5
+  dc:   c2 07 a8 97     ld  [ %fp + 0x897 ], %g1
+  e0:   9b 38 60 00     sra  %g1, 0, %o5
+  e4:   c2 07 a8 9f     ld  [ %fp + 0x89f ], %g1
+  e8:   99 38 60 00     sra  %g1, 0, %o4
+  ec:   c2 07 a8 a7     ld  [ %fp + 0x8a7 ], %g1
+  f0:   85 38 60 00     sra  %g1, 0, %g2
+  f4:   c2 07 a8 b3     ld  [ %fp + 0x8b3 ], %g1
+  f8:   87 38 60 00     sra  %g1, 0, %g3
+  fc:   c2 07 a8 bb     ld  [ %fp + 0x8bb ], %g1
+ 100:   83 38 60 00     sra  %g1, 0, %g1
+ 104:   c2 73 a8 af     stx  %g1, [ %sp + 0x8af ]
+ 108:   90 10 00 04     mov  %g4, %o0
+ 10c:   92 10 00 05     mov  %g5, %o1
+ 110:   94 10 00 0d     mov  %o5, %o2
+ 114:   96 10 00 0c     mov  %o4, %o3
+ 118:   98 10 00 02     mov  %g2, %o4
+ 11c:   9a 10 00 03     mov  %g3, %o5
+ 120:   40 00 00 00     call  120 <nonleaf_call+0xdc>
+ 124:   01 00 00 00     nop
+ 128:   03 00 00 00     sethi  %hi(0), %g1
+ 12c:   82 10 60 00     mov  %g1, %g1   ! 0 <leaf_call>
+ 130:   c2 5d c0 01     ldx  [ %l7 + %g1 ], %g1
+ 134:   c6 5f a7 e7     ldx  [ %fp + 0x7e7 ], %g3
+ 138:   c4 58 40 00     ldx  [ %g1 ], %g2
+ 13c:   86 18 c0 02     xor  %g3, %g2, %g3
+ 140:   84 10 20 00     clr  %g2
+ 144:   82 10 00 03     mov  %g3, %g1
+ 148:   02 c8 40 08     brz  %g1, 168 <nonleaf_call+0x124>
+ 14c:   01 00 00 00     nop
+ 150:   03 00 00 00     sethi  %hi(0), %g1
+ 154:   82 10 60 00     mov  %g1, %g1   ! 0 <leaf_call>
+ 158:   c2 5d c0 01     ldx  [ %l7 + %g1 ], %g1
+ 15c:   90 10 00 01     mov  %g1, %o0
+ 160:   40 00 00 00     call  160 <nonleaf_call+0x11c>
+ 164:   01 00 00 00     nop
+ 168:   81 cf e0 08     rett  %i7 + 8
+ 16c:   01 00 00 00     nop
+
+0000000000000170 <main>:
+ 170:   9d e3 bf 20     save  %sp, -224, %sp
+ 174:   82 10 20 06     mov  6, %g1
+ 178:   c2 73 a8 af     stx  %g1, [ %sp + 0x8af ]
+ 17c:   82 10 20 07     mov  7, %g1
+ 180:   c2 73 a8 b7     stx  %g1, [ %sp + 0x8b7 ]
+ 184:   90 10 20 00     clr  %o0
+ 188:   92 10 20 01     mov  1, %o1
+ 18c:   94 10 20 02     mov  2, %o2
+ 190:   96 10 20 03     mov  3, %o3
+ 194:   98 10 20 04     mov  4, %o4
+ 198:   9a 10 20 05     mov  5, %o5
+ 19c:   40 00 00 00     call  19c <main+0x2c>
+ 1a0:   01 00 00 00     nop
+ 1a4:   82 10 20 00     clr  %g1     ! 0 <leaf_call>
+ 1a8:   83 38 60 00     sra  %g1, 0, %g1
+ 1ac:   b0 10 00 01     mov  %g1, %i0
+ 1b0:   81 cf e0 08     rett  %i7 + 8
+ 1b4:   01 00 00 00     nop
+
+
+
+; --------------------- with float params, aggregate return value (<32b, passed in regs) and ellipsis with float ------------------->
+
+; #include <stdlib.h>
+; #include <stdarg.h>
+;
+; void leaf_call(int b, float c, int d, float e, int f, int g, float h)
+; {
+; }
+;
+; struct aggr { int x; int y; int z; };
+;
+; struct aggr nonleaf_call(int a, int b, float c, int d, float e, int f, ...)
+; {
+;     va_list v;
+;     int g;
+;     float h;
+;     struct aggr st = { b, d, f };
+;     va_start(v, f);
+;     g = va_arg(v, int);
+;     h = va_arg(v, float);
+;     /* use some local data */
+;     *(char*)alloca(220) = 'L';
+;     leaf_call(b, c, d, e, f, g, h);
+;
+;     return st;
+; }
+;
+; int main()
+; {
+;     struct aggr st = nonleaf_call(0, 1, 2.f, 3, 4.f, 5, 6, 7.f);
+;     return 0;
+; }
+
+
+
+; output from netbsd-7.1-sparc64 w/ gcc 4.8.5
+
+0000000000000000 <leaf_call>:
+   0:   9d e3 bf 50     save  %sp, -176, %sp            ; prolog
+   4:   88 10 00 18     mov  %i0, %g4                   ; |
+   8:   c7 27 a8 87     st  %f3, [ %fp + 0x887 ]        ; |
+   c:   86 10 00 1a     mov  %i2, %g3                   ; |
+  10:   cf 27 a8 97     st  %f7, [ %fp + 0x897 ]        ; |
+  14:   84 10 00 1c     mov  %i4, %g2                   ; | write input to prev frame's spill area (e.g. offset = 128 for i0, jumping over i*/l* save area)
+  18:   82 10 00 1d     mov  %i5, %g1                   ; | (pointlessly using an extra reg copy to g* for most)
+  1c:   db 27 a8 af     st  %f13, [ %fp + 0x8af ]       ; | note: float args are spilled as are all others
+  20:   c8 27 a8 7f     st  %g4, [ %fp + 0x87f ]        ; | 
+  24:   c6 27 a8 8f     st  %g3, [ %fp + 0x88f ]        ; |
+  28:   c4 27 a8 9f     st  %g2, [ %fp + 0x89f ]        ; |
+  2c:   c2 27 a8 a7     st  %g1, [ %fp + 0x8a7 ]        ; /
+  30:   81 cf e0 08     rett  %i7 + 8                   ; \ trap epilog
+  34:   01 00 00 00     nop                             ; |              branch delay slot
+
+0000000000000038 <nonleaf_call>:
+  38:   9d e3 bf 20     save  %sp, -224, %sp            ; prolog
+  3c:   88 10 00 18     mov  %i0, %g4                   ; |
+  40:   86 10 00 19     mov  %i1, %g3                   ; |
+  44:   cb 27 a8 8f     st  %f5, [ %fp + 0x88f ]        ; |
+  48:   84 10 00 1b     mov  %i3, %g2                   ; |
+  4c:   d3 27 a8 9f     st  %f9, [ %fp + 0x89f ]        ; | write input to prev frame's spill area (e.g. offset = 128 for i0, jumping over i*/l* save area)
+  50:   82 10 00 1d     mov  %i5, %g1                   ; | (pointlessly using an extra reg copy to g* for most)
+  54:   c8 27 a8 7f     st  %g4, [ %fp + 0x87f ]        ; | note: float args are spilled as are all others
+  58:   c6 27 a8 87     st  %g3, [ %fp + 0x887 ]        ; | 
+  5c:   c4 27 a8 97     st  %g2, [ %fp + 0x897 ]        ; |
+  60:   c2 27 a8 a7     st  %g1, [ %fp + 0x8a7 ]        ; |
+  64:   c2 07 a8 87     ld  [ %fp + 0x887 ], %g1        ; in arg 1 (int b, fetched from prev frame's spill area), ...
+  68:   c2 27 a7 db     st  %g1, [ %fp + 0x7db ]        ; ... copied to local space (0x7db - bias = -36)
+  6c:   c2 07 a8 97     ld  [ %fp + 0x897 ], %g1        ; in arg 3 (int d, fetched from prev frame's spill area), ...
+  70:   c2 27 a7 df     st  %g1, [ %fp + 0x7df ]        ; ... copied to local space (0x7df - bias = -32)
+  74:   c2 07 a8 a7     ld  [ %fp + 0x8a7 ], %g1        ; in arg 5 (int f, fetched from prev frame's spill area), ...
+  78:   c2 27 a7 e3     st  %g1, [ %fp + 0x7e3 ]        ; ... copied to local space (0x7e3 - bias = -28)
+  7c:   82 07 a8 af     add  %fp, 0x8af, %g1            ; va_list: pointer to arg 5 -> g1 ...
+  80:   c2 77 a7 e7     stx  %g1, [ %fp + 0x7e7 ]       ; ... store to local space (0x7e7 - bias = -24)
+  84:   c2 5f a7 e7     ldx  [ %fp + 0x7e7 ], %g1       ; reread to start iteration (pointlessly)
+  88:   84 00 60 04     add  %g1, 4, %g2                ; point read ptr in g2 to first unnamed param (int)
+  8c:   c4 00 80 00     ld  [ %g2 ], %g2                ; in arg 6 (fetched from prev frame's stack param area), ...
+  90:   c4 27 a7 fb     st  %g2, [ %fp + 0x7fb ]        ; ... copied to local space (0x7fb - bias = -4) helper var (probably int g)
+  94:   82 00 60 08     add  %g1, 8, %g1                ; point read ptr in g1 to second unnamed param (float, promoted to double), ...
+  98:   c2 77 a7 e7     stx  %g1, [ %fp + 0x7e7 ]       ; ... store in local space (0x7fb - bias = -24)
+  9c:   91 d0 20 05     ta  5                           ; trap - not sure what else is involved (objdump was made from .o, not finally linked exec)
+
+00000000000000a0 <main>:
+  a0:   9d e3 bf 30     save  %sp, -208, %sp            ; prolog
+  a4:   03 00 00 00     sethi  %hi(0), %g1              ; |
+  a8:   82 10 60 00     mov  %g1, %g1   ! 0 <leaf_call> ; |
+  ac:   83 28 70 0c     sllx  %g1, 0xc, %g1             ; | prep arg 2, load from static data into f11 (addr = 0 b/c objdumped .o, not final linked)
+  b0:   82 10 60 00     mov  %g1, %g1                   ; |
+  b4:   d7 00 40 00     ld  [ %g1 ], %f11               ; /
+  b8:   03 00 00 00     sethi  %hi(0), %g1              ; \
+  bc:   82 10 60 00     mov  %g1, %g1   ! 0 <leaf_call> ; | prep arg 4, load from static data into f10 (addr = 0 b/c objdumped .o, not final linked)
+  c0:   83 28 70 0c     sllx  %g1, 0xc, %g1             ; |
+  c4:   82 10 60 00     mov  %g1, %g1                   ; |
+  c8:   d5 00 40 00     ld  [ %g1 ], %f10               ; |
+  cc:   82 10 20 06     mov  6, %g1                     ; arg 6, ...
+  d0:   c2 73 a8 af     stx  %g1, [ %sp + 0x8af ]       ; ... "pushed" onto stack
+  d4:   03 00 00 00     sethi  %hi(0), %g1              ; |
+  d8:   82 10 60 00     mov  %g1, %g1   ! 0 <leaf_call> ; |
+  dc:   83 28 70 0c     sllx  %g1, 0xc, %g1             ; | prep arg 7, load from static data as double (b/c of vararg promotion) into d8 (addr = 0 b/c objdumped .o, not final linked)
+  e0:   82 10 60 00     mov  %g1, %g1                   ; |
+  e4:   d1 18 40 00     ldd  [ %g1 ], %f8               ; |
+  e8:   d1 3b a8 b7     std  %f8, [ %sp + 0x8b7 ]       ; arg 7 "pushed" onto stack as double
+  ec:   90 10 20 00     clr  %o0                        ; arg 0 (note, this is not the pointer to the aggregate return value, b/c latter <= 32b)
+  f0:   92 10 20 01     mov  1, %o1                     ; arg 1
+  f4:   8b a0 00 2b     fmovs  %f11, %f5                ; arg 2
+  f8:   96 10 20 03     mov  3, %o3                     ; arg 3
+  fc:   93 a0 00 2a     fmovs  %f10, %f9                ; arg 4
+ 100:   9a 10 20 05     mov  5, %o5                     ; arg 5
+ 104:   40 00 00 00     call  104 <main+0x64>           ; call nonleaf_call (objdump not from final link but .o)
+ 108:   01 00 00 00     nop                             ; branch delay slot
+ 10c:   84 10 00 08     mov  %o0, %g2                   ; |
+ 110:   82 10 00 09     mov  %o1, %g1                   ; / get return value (12b aggregate) out of 2 regs (16b)
+ 114:   87 30 b0 20     srlx  %g2, 0x20, %g3            ; \
+ 118:   c8 07 a7 f3     ld  [ %fp + 0x7f3 ], %g4        ; |
+ 11c:   88 09 20 00     and  %g4, 0, %g4                ; | store 1st struct field (int) by g2 >> 32 (and some other operations unnecessary here)
+ 120:   86 11 00 03     or  %g4, %g3, %g3               ; |
+ 124:   c6 27 a7 f3     st  %g3, [ %fp + 0x7f3 ]        ; /
+ 128:   86 10 3f ff     mov  -1, %g3                    ; \
+ 12c:   87 30 f0 20     srlx  %g3, 0x20, %g3            ; |
+ 130:   84 08 80 03     and  %g2, %g3, %g2              ; |
+ 134:   c6 07 a7 f7     ld  [ %fp + 0x7f7 ], %g3        ; | store 2nd struct field (int) by (-1 >> 32) & g2 (and then some other operations unnecessary here)
+ 138:   86 08 e0 00     and  %g3, 0, %g3                ; |
+ 13c:   84 10 c0 02     or  %g3, %g2, %g2               ; |
+ 140:   c4 27 a7 f7     st  %g2, [ %fp + 0x7f7 ]        ; /
+ 144:   83 38 70 20     srax  %g1, 0x20, %g1            ; \
+ 148:   c4 07 a7 fb     ld  [ %fp + 0x7fb ], %g2        ; |
+ 14c:   84 08 a0 00     and  %g2, 0, %g2                ; | store 3rd struct field (int) by g1 >> 32 (and then some other operations unnecessary here)
+ 150:   82 10 80 01     or  %g2, %g1, %g1               ; |
+ 154:   c2 27 a7 fb     st  %g1, [ %fp + 0x7fb ]        ; /
+ 158:   82 10 20 00     clr  %g1                        ; \
+ 15c:   83 38 60 00     sra  %g1, 0, %g1                ; / return value
+ 160:   b0 10 00 01     mov  %g1, %i0                   ; \
+ 164:   81 cf e0 08     rett  %i7 + 8                   ; | epilog
+ 168:   01 00 00 00     nop                             ; |            branch delay slot
+
+; vim: ft=asm
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/doc/disas_examples/x64.sysv.disas	Fri Nov 22 23:08:59 2019 +0100
@@ -0,0 +1,161 @@
+; #include <stdlib.h>
+; 
+; void leaf_call(int b, int c, int d, int e, int f, int g, int h)
+; {
+; }
+; 
+; void nonleaf_call(int a, int b, int c, int d, int e, int f, int g, int h)
+; {
+; 	/* use some local data */
+; 	*(char*)alloca(10) = 'L';
+; 	leaf_call(b, c, d, e, f, g, h);
+; }
+; 
+; int main()
+; {
+; 	nonleaf_call(0, 1, 2, 3, 4, 5, 6, 7);
+; 	return 0;
+; }
+
+
+
+; output from freebsd-12.0-x64 w/ clang 6.0.1
+
+0000000000000000 <leaf_call>:
+   0:   55                      push   %rbp
+   1:   48 89 e5                mov    %rsp,%rbp
+   4:   8b 45 10                mov    0x10(%rbp),%eax
+   7:   89 7d fc                mov    %edi,-0x4(%rbp)
+   a:   89 75 f8                mov    %esi,-0x8(%rbp)
+   d:   89 55 f4                mov    %edx,-0xc(%rbp)
+  10:   89 4d f0                mov    %ecx,-0x10(%rbp)
+  13:   44 89 45 ec             mov    %r8d,-0x14(%rbp)
+  17:   44 89 4d e8             mov    %r9d,-0x18(%rbp)
+  1b:   89 45 e4                mov    %eax,-0x1c(%rbp)
+  1e:   5d                      pop    %rbp
+  1f:   c3                      retq
+
+0000000000000020 <nonleaf_call>:
+  20:   55                      push   %rbp                 ; |
+  21:   48 89 e5                mov    %rsp,%rbp            ; | prolog
+  24:   48 83 ec 40             sub    $0x40,%rsp           ; |           open frame *with* static alloca() size included
+  28:   8b 45 18                mov    0x18(%rbp),%eax      ; unsure... get stack param from prev frame into some scratch reg... but why? see below @@@
+  2b:   44 8b 55 10             mov    0x10(%rbp),%r10d     ; unsure... get stack param from prev frame into some scratch reg... but why? see below @@@
+  2f:   89 7d fc                mov    %edi,-0x4(%rbp)      ; in arg 0 -> local area (as temp store)
+  32:   89 75 f8                mov    %esi,-0x8(%rbp)      ; in arg 1 -> local area (as temp store)
+  35:   89 55 f4                mov    %edx,-0xc(%rbp)      ; in arg 2 -> local area (as temp store)
+  38:   89 4d f0                mov    %ecx,-0x10(%rbp)     ; in arg 3 -> local area (as temp store)
+  3b:   44 89 45 ec             mov    %r8d,-0x14(%rbp)     ; in arg 4 -> local area (as temp store)
+  3f:   44 89 4d e8             mov    %r9d,-0x18(%rbp)     ; in arg 5 -> local area (as temp store)
+  43:   c6 45 d0 4c             movb   $0x4c,-0x30(%rbp)    ; 'L' -> local area (of alloca()'d space)
+  47:   8b 7d f8                mov    -0x8(%rbp),%edi      ; arg 0
+  4a:   8b 75 f4                mov    -0xc(%rbp),%esi      ; arg 1
+  4d:   8b 55 f0                mov    -0x10(%rbp),%edx     ; arg 2
+  50:   8b 4d ec                mov    -0x14(%rbp),%ecx     ; arg 3
+  53:   44 8b 45 e8             mov    -0x18(%rbp),%r8d     ; arg 4
+  57:   44 8b 4d 10             mov    0x10(%rbp),%r9d      ; arg 5 (fetched from prev frame's param area - behind return addr on 16b aligned stack)
+  5b:   44 8b 5d 18             mov    0x18(%rbp),%r11d     ; arg 6 (fetched from prev frame's param area), and ...
+  5f:   44 89 1c 24             mov    %r11d,(%rsp)         ; ... "pushed" onto stack
+  63:   44 89 55 cc             mov    %r10d,-0x34(%rbp)    ; unsure... write something to local area @@@?
+  67:   89 45 c8                mov    %eax,-0x38(%rbp)     ; unsure... write something to local area @@@?
+  6a:   e8 91 ff ff ff          callq  0 <leaf_call>        ; push return addr and call
+  6f:   48 83 c4 40             add    $0x40,%rsp           ; |
+  73:   5d                      pop    %rbp                 ; | epilog
+  74:   c3                      retq                        ; |
+  75:   66 66 2e 0f 1f 84 00    nopw   %cs:0x0(%rax,%rax,1) ; garbage data
+  7c:   00 00 00 00                                         ; garbage data
+
+0000000000000080 <main>:
+  80:   55                      push   %rbp                 ; |
+  81:   48 89 e5                mov    %rsp,%rbp            ; | prolog
+  84:   48 83 ec 20             sub    $0x20,%rsp           ; |
+  88:   31 ff                   xor    %edi,%edi            ; arg 0
+  8a:   be 01 00 00 00          mov    $0x1,%esi            ; arg 1
+  8f:   ba 02 00 00 00          mov    $0x2,%edx            ; arg 2
+  94:   b9 03 00 00 00          mov    $0x3,%ecx            ; arg 3
+  99:   41 b8 04 00 00 00       mov    $0x4,%r8d            ; arg 4
+  9f:   41 b9 05 00 00 00       mov    $0x5,%r9d            ; arg 5
+  a5:   b8 06 00 00 00          mov    $0x6,%eax            ; unsure... see below @@@?
+  aa:   41 ba 07 00 00 00       mov    $0x7,%r10d           ; unsure... see below @@@?
+  b0:   c7 45 fc 00 00 00 00    movl   $0x0,-0x4(%rbp)      ; unsure... write 0 to local area @@@?
+  b7:   c7 04 24 06 00 00 00    movl   $0x6,(%rsp)          ; "push" arg6 onto stack
+  be:   c7 44 24 08 07 00 00 00 movl   $0x7,0x8(%rsp)       ; "push" arg7 onto stack
+  c6:   44 89 55 f8             mov    %r10d,-0x8(%rbp)     ; unsure... write something to local area @@@?
+  ca:   89 45 f4                mov    %eax,-0xc(%rbp)      ; unsure... write something to local area @@@?
+  cd:   e8 4e ff ff ff          callq  20 <nonleaf_call>    ; push return addr and call
+  d2:   31 c0                   xor    %eax,%eax            ; return value
+  d4:   48 83 c4 20             add    $0x20,%rsp           ; |
+  d8:   5d                      pop    %rbp                 ; | epilog
+  d9:   c3                      retq                        ; |
+
+
+
+; output from arch_linux-2011.08.19-x64 w/ gcc 4.6.1 (w/ alloca(220) instead of 10)
+
+0000000000000000 <leaf_call>:
+   0:   55                      push   %rbp
+   1:   48 89 e5                mov    %rsp,%rbp
+   4:   89 7d fc                mov    %edi,-0x4(%rbp)
+   7:   89 75 f8                mov    %esi,-0x8(%rbp)
+   a:   89 55 f4                mov    %edx,-0xc(%rbp)
+   d:   89 4d f0                mov    %ecx,-0x10(%rbp)
+  10:   44 89 45 ec             mov    %r8d,-0x14(%rbp)
+  14:   44 89 4d e8             mov    %r9d,-0x18(%rbp)
+  18:   5d                      pop    %rbp
+  19:   c3                      retq
+
+000000000000001a <nonleaf_call>:
+  1a:   55                      push   %rbp
+  1b:   48 89 e5                mov    %rsp,%rbp
+  1e:   48 83 ec 30             sub    $0x30,%rsp
+  22:   89 7d fc                mov    %edi,-0x4(%rbp)
+  25:   89 75 f8                mov    %esi,-0x8(%rbp)
+  28:   89 55 f4                mov    %edx,-0xc(%rbp)
+  2b:   89 4d f0                mov    %ecx,-0x10(%rbp)
+  2e:   44 89 45 ec             mov    %r8d,-0x14(%rbp)
+  32:   44 89 4d e8             mov    %r9d,-0x18(%rbp)
+  36:   b8 10 00 00 00          mov    $0x10,%eax
+  3b:   48 83 e8 01             sub    $0x1,%rax
+  3f:   48 05 eb 00 00 00       add    $0xeb,%rax
+  45:   48 c7 45 e0 10 00 00 00 movq   $0x10,-0x20(%rbp)
+  4d:   ba 00 00 00 00          mov    $0x0,%edx
+  52:   48 f7 75 e0             divq   -0x20(%rbp)
+  56:   48 6b c0 10             imul   $0x10,%rax,%rax
+  5a:   48 29 c4                sub    %rax,%rsp
+  5d:   48 8d 44 24 08          lea    0x8(%rsp),%rax
+  62:   48 83 c0 0f             add    $0xf,%rax
+  66:   48 c1 e8 04             shr    $0x4,%rax
+  6a:   48 c1 e0 04             shl    $0x4,%rax
+  6e:   c6 00 4c                movb   $0x4c,(%rax)
+  71:   44 8b 45 e8             mov    -0x18(%rbp),%r8d
+  75:   8b 4d ec                mov    -0x14(%rbp),%ecx
+  78:   8b 55 f0                mov    -0x10(%rbp),%edx
+  7b:   8b 75 f4                mov    -0xc(%rbp),%esi
+  7e:   8b 45 f8                mov    -0x8(%rbp),%eax
+  81:   8b 7d 18                mov    0x18(%rbp),%edi
+  84:   89 3c 24                mov    %edi,(%rsp)
+  87:   44 8b 4d 10             mov    0x10(%rbp),%r9d
+  8b:   89 c7                   mov    %eax,%edi
+  8d:   e8 00 00 00 00          callq  92 <nonleaf_call+0x78>
+  92:   c9                      leaveq
+  93:   c3                      retq
+
+0000000000000094 <main>:
+  94:   55                      push   %rbp
+  95:   48 89 e5                mov    %rsp,%rbp
+  98:   48 83 ec 10             sub    $0x10,%rsp
+  9c:   c7 44 24 08 07 00 00 00 movl   $0x7,0x8(%rsp)
+  a4:   c7 04 24 06 00 00 00    movl   $0x6,(%rsp)
+  ab:   41 b9 05 00 00 00       mov    $0x5,%r9d
+  b1:   41 b8 04 00 00 00       mov    $0x4,%r8d
+  b7:   b9 03 00 00 00          mov    $0x3,%ecx
+  bc:   ba 02 00 00 00          mov    $0x2,%edx
+  c1:   be 01 00 00 00          mov    $0x1,%esi
+  c6:   bf 00 00 00 00          mov    $0x0,%edi
+  cb:   e8 00 00 00 00          callq  d0 <main+0x3c>
+  d0:   b8 00 00 00 00          mov    $0x0,%eax
+  d5:   c9                      leaveq
+  d6:   c3                      retq
+
+; vim: ft=asm
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/doc/disas_examples/x64.win.disas	Fri Nov 22 23:08:59 2019 +0100
@@ -0,0 +1,1 @@
+; @@@ missing
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/doc/disas_examples/x86.cdecl.disas	Fri Nov 22 23:08:59 2019 +0100
@@ -0,0 +1,490 @@
+; #include <stdlib.h>
+; 
+; void leaf_call(int b, int c, int d, int e, int f, int g, int h)
+; {
+; }
+; 
+; void nonleaf_call(int a, int b, int c, int d, int e, int f, int g, int h)
+; {
+; 	/* use some local data */
+; 	*(char*)alloca(220) = 'L';
+; 	leaf_call(b, c, d, e, f, g, h);
+; }
+; 
+; int main()
+; {
+; 	nonleaf_call(0, 1, 2, 3, 4, 5, 6, 7);
+; 	return 0;
+; }
+
+
+
+; output from arch_linux-2011.08.19-x86 w/ gcc 4.6.1
+
+00000000 <leaf_call>:
+   0:   55                      push   %ebp
+   1:   89 e5                   mov    %esp,%ebp
+   3:   5d                      pop    %ebp
+   4:   c3                      ret
+
+00000005 <nonleaf_call>:
+   5:   55                      push   %ebp                   ; |
+   6:   89 e5                   mov    %esp,%ebp              ; | prolog
+   8:   83 ec 38                sub    $0x38,%esp             ; /
+   b:   b8 10 00 00 00          mov    $0x10,%eax             ; \                     |
+  10:   83 e8 01                sub    $0x1,%eax              ; |                     | creative way to move 250 to eax
+  13:   05 eb 00 00 00          add    $0xeb,%eax             ; |                     /
+  18:   c7 45 f4 10 00 00 00    movl   $0x10,-0xc(%ebp)       ; | size comp wtf?      \
+  1f:   ba 00 00 00 00          mov    $0x0,%edx              ; |                     |
+  24:   f7 75 f4                divl   -0xc(%ebp)             ; |                     | obviously fastest way to round to multiple of 16
+  27:   6b c0 10                imul   $0x10,%eax,%eax        ; |                     |
+  2a:   29 c4                   sub    %eax,%esp              ; alloca(220) with size containing some padding computed above
+  2c:   8d 44 24 1c             lea    0x1c(%esp),%eax        ; |
+  30:   83 c0 0f                add    $0xf,%eax              ; | start of alloca()'d memory -> eax, by ...
+  33:   c1 e8 04                shr    $0x4,%eax              ; | ... using ebx and 2 pointless store/reads in local space as helper to align to 16b
+  36:   c1 e0 04                shl    $0x4,%eax              ; |
+  39:   c6 00 4c                movb   $0x4c,(%eax)           ; 'L' -> alloca()'d space
+  3c:   8b 45 24                mov    0x24(%ebp),%eax        ; |
+  3f:   89 44 24 18             mov    %eax,0x18(%esp)        ; |
+  43:   8b 45 20                mov    0x20(%ebp),%eax        ; |
+  46:   89 44 24 14             mov    %eax,0x14(%esp)        ; |
+  4a:   8b 45 1c                mov    0x1c(%ebp),%eax        ; |
+  4d:   89 44 24 10             mov    %eax,0x10(%esp)        ; |
+  51:   8b 45 18                mov    0x18(%ebp),%eax        ; | read in args 1-7 from prev frame's param area, and ...
+  54:   89 44 24 0c             mov    %eax,0xc(%esp)         ; | ... "push" onto stack as arg 0-6
+  58:   8b 45 14                mov    0x14(%ebp),%eax        ; |
+  5b:   89 44 24 08             mov    %eax,0x8(%esp)         ; |
+  5f:   8b 45 10                mov    0x10(%ebp),%eax        ; |
+  62:   89 44 24 04             mov    %eax,0x4(%esp)         ; |
+  66:   8b 45 0c                mov    0xc(%ebp),%eax         ; |
+  69:   89 04 24                mov    %eax,(%esp)            ; |
+  6c:   e8 fc ff ff ff          call   6d <nonleaf_call+0x68> ; push return address and call leaf_call (objdump not from final link but .o)
+  71:   c9                      leave                         ; |
+  72:   c3                      ret                           ; | epilog
+
+00000073 <main>:
+  73:   55                      push   %ebp                   ; |
+  74:   89 e5                   mov    %esp,%ebp              ; |
+  76:   83 e4 f0                and    $0xfffffff0,%esp       ; | prolog
+  79:   83 ec 20                sub    $0x20,%esp             ; |
+  7c:   c7 44 24 1c 07 00 00 00 movl   $0x7,0x1c(%esp)        ; arg 7 -> stack
+  84:   c7 44 24 18 06 00 00 00 movl   $0x6,0x18(%esp)        ; arg 6 -> stack
+  8c:   c7 44 24 14 05 00 00 00 movl   $0x5,0x14(%esp)        ; arg 5 -> stack
+  94:   c7 44 24 10 04 00 00 00 movl   $0x4,0x10(%esp)        ; arg 4 -> stack
+  9c:   c7 44 24 0c 03 00 00 00 movl   $0x3,0xc(%esp)         ; arg 3 -> stack
+  a4:   c7 44 24 08 02 00 00 00 movl   $0x2,0x8(%esp)         ; arg 2 -> stack
+  ac:   c7 44 24 04 01 00 00 00 movl   $0x1,0x4(%esp)         ; arg 1 -> stack
+  b4:   c7 04 24 00 00 00 00    movl   $0x0,(%esp)            ; arg 0 -> stack
+  bb:   e8 fc ff ff ff          call   bc <main+0x49>         ; push return address and call nonleaf_call (objdump not from final link but .o)
+  c0:   b8 00 00 00 00          mov    $0x0,%eax              ; return value
+  c5:   c9                      leave                         ; |
+  c6:   c3                      ret                           ; | epilog
+
+
+
+; output from darwin-8.0.1-x86 w/ gcc 3.3
+
+_leaf_call:
+   0:   55                      pushl  %ebp
+   1:   89 e5                   movl   %esp, %ebp
+   3:   83 ec 08                subl   $8, %esp
+   6:   c9                      leave
+   7:   c3                      retl
+
+_nonleaf_call:
+   8:   55                      pushl  %ebp                 ; |
+   9:   89 e5                   movl   %esp, %ebp           ; | prolog
+   b:   83 ec 28                subl   $40, %esp            ; |
+   e:   81 ec e0 00 00 00       subl   $224, %esp           ; alloca(220) - with 4b padding
+  14:   8d 44 24 20             leal   32(%esp), %eax       ; |
+  18:   c6 00 4c                movb   $76, (%eax)          ; / 'L' -> alloca()'d space
+  1b:   8b 45 24                movl   36(%ebp), %eax       ; \
+  1e:   89 44 24 18             movl   %eax, 24(%esp)       ; |
+  22:   8b 45 20                movl   32(%ebp), %eax       ; |
+  25:   89 44 24 14             movl   %eax, 20(%esp)       ; |
+  29:   8b 45 1c                movl   28(%ebp), %eax       ; |
+  2c:   89 44 24 10             movl   %eax, 16(%esp)       ; |
+  30:   8b 45 18                movl   24(%ebp), %eax       ; | read in args 1-7 from prev frame's param area, and ...
+  33:   89 44 24 0c             movl   %eax, 12(%esp)       ; | ... "push" onto stack as arg 0-6
+  37:   8b 45 14                movl   20(%ebp), %eax       ; |
+  3a:   89 44 24 08             movl   %eax, 8(%esp)        ; |
+  3e:   8b 45 10                movl   16(%ebp), %eax       ; |
+  41:   89 44 24 04             movl   %eax, 4(%esp)        ; |
+  45:   8b 45 0c                movl   12(%ebp), %eax       ; |
+  48:   89 04 24                movl   %eax, (%esp)         ; |
+  4b:   e8 b0 ff ff ff          calll  -80 <_leaf_call>     ; push return address and call
+  50:   c9                      leave                       ; |
+  51:   c3                      retl                        ; | epilog
+  52:   90                      nop                         ;
+  53:   90                      nop                         ;
+
+_main:
+  54:   55                      pushl  %ebp                 ; |
+  55:   89 e5                   movl   %esp, %ebp           ; | prolog
+  57:   83 ec 28                subl   $40, %esp            ; |
+  5a:   c7 44 24 1c 07 00 00 00 movl   $7, 28(%esp)         ; arg 7 -> stack
+  62:   c7 44 24 18 06 00 00 00 movl   $6, 24(%esp)         ; arg 6 -> stack
+  6a:   c7 44 24 14 05 00 00 00 movl   $5, 20(%esp)         ; arg 5 -> stack
+  72:   c7 44 24 10 04 00 00 00 movl   $4, 16(%esp)         ; arg 4 -> stack
+  7a:   c7 44 24 0c 03 00 00 00 movl   $3, 12(%esp)         ; arg 3 -> stack
+  82:   c7 44 24 08 02 00 00 00 movl   $2, 8(%esp)          ; arg 2 -> stack
+  8a:   c7 44 24 04 01 00 00 00 movl   $1, 4(%esp)          ; arg 1 -> stack
+  92:   c7 04 24 00 00 00 00    movl   $0, (%esp)           ; arg 0 -> stack
+  99:   e8 6a ff ff ff          calll  -150 <_nonleaf_call> ; push return address and call
+  9e:   b8 00 00 00 00          movl   $0, %eax             ; return value
+  a3:   c9                      leave                       ; |
+  a4:   c3                      retl                        ; | epilog
+
+
+
+; output from freebsd-9.3-x86 w/ gcc 4.2.1
+
+00000000 <leaf_call>:
+   0:   55                      push   %ebp
+   1:   89 e5                   mov    %esp,%ebp
+   3:   5d                      pop    %ebp
+   4:   c3                      ret
+   5:   8d 74 26 00             lea    0x0(%esi),%esi
+   9:   8d bc 27 00 00 00 00    lea    0x0(%edi),%edi
+
+00000010 <nonleaf_call>:
+  10:   55                      push   %ebp                   ; |
+  11:   89 e5                   mov    %esp,%ebp              ; | prolog
+  13:   83 ec 28                sub    $0x28,%esp             ; |
+  16:   81 ec f0 00 00 00       sub    $0xf0,%esp             ; alloca(220) - with padding for 16b alignment
+  1c:   8d 44 24 1c             lea    0x1c(%esp),%eax        ; |
+  20:   89 45 fc                mov    %eax,-0x4(%ebp)        ; |
+  23:   8b 45 fc                mov    -0x4(%ebp),%eax        ; |
+  26:   83 c0 0f                add    $0xf,%eax              ; | start of alloca()'d memory -> eax, by ...
+  29:   c1 e8 04                shr    $0x4,%eax              ; | ... using ebx and 2 pointless store/reads in local space as helper to align to 16b
+  2c:   c1 e0 04                shl    $0x4,%eax              ; |
+  2f:   89 45 fc                mov    %eax,-0x4(%ebp)        ; |
+  32:   8b 45 fc                mov    -0x4(%ebp),%eax        ; |
+  35:   c6 00 4c                movb   $0x4c,(%eax)           ; 'L' -> alloca()'d space
+  38:   8b 45 24                mov    0x24(%ebp),%eax        ; |
+  3b:   89 44 24 18             mov    %eax,0x18(%esp)        ; |
+  3f:   8b 45 20                mov    0x20(%ebp),%eax        ; |
+  42:   89 44 24 14             mov    %eax,0x14(%esp)        ; |
+  46:   8b 45 1c                mov    0x1c(%ebp),%eax        ; |
+  49:   89 44 24 10             mov    %eax,0x10(%esp)        ; |
+  4d:   8b 45 18                mov    0x18(%ebp),%eax        ; | read in args 1-7 from prev frame's param area, and ...
+  50:   89 44 24 0c             mov    %eax,0xc(%esp)         ; | ... "push" onto stack as arg 0-6
+  54:   8b 45 14                mov    0x14(%ebp),%eax        ; |
+  57:   89 44 24 08             mov    %eax,0x8(%esp)         ; |
+  5b:   8b 45 10                mov    0x10(%ebp),%eax        ; |
+  5e:   89 44 24 04             mov    %eax,0x4(%esp)         ; |
+  62:   8b 45 0c                mov    0xc(%ebp),%eax         ; |
+  65:   89 04 24                mov    %eax,(%esp)            ; |
+  68:   e8 fc ff ff ff          call   69 <nonleaf_call+0x59> ; push return address and call leaf_call (objdump not from final link but .o)
+  6d:   c9                      leave                         ; |
+  6e:   c3                      ret                           ; | epilog
+  6f:   90                      nop                           ;
+
+00000070 <main>:
+  70:   8d 4c 24 04             lea    0x4(%esp),%ecx         ; |
+  74:   83 e4 f0                and    $0xfffffff0,%esp       ; |
+  77:   ff 71 fc                pushl  -0x4(%ecx)             ; |
+  7a:   55                      push   %ebp                   ; | prolog
+  7b:   89 e5                   mov    %esp,%ebp              ; |
+  7d:   51                      push   %ecx                   ; |
+  7e:   83 ec 24                sub    $0x24,%esp             ; |
+  81:   c7 44 24 1c 07 00 00 00 movl   $0x7,0x1c(%esp)        ; arg 7 -> stack
+  89:   c7 44 24 18 06 00 00 00 movl   $0x6,0x18(%esp)        ; arg 6 -> stack
+  91:   c7 44 24 14 05 00 00 00 movl   $0x5,0x14(%esp)        ; arg 5 -> stack
+  99:   c7 44 24 10 04 00 00 00 movl   $0x4,0x10(%esp)        ; arg 4 -> stack
+  a1:   c7 44 24 0c 03 00 00 00 movl   $0x3,0xc(%esp)         ; arg 3 -> stack
+  a9:   c7 44 24 08 02 00 00 00 movl   $0x2,0x8(%esp)         ; arg 2 -> stack
+  b1:   c7 44 24 04 01 00 00 00 movl   $0x1,0x4(%esp)         ; arg 1 -> stack
+  b9:   c7 04 24 00 00 00 00    movl   $0x0,(%esp)            ; arg 0 -> stack
+  c0:   e8 fc ff ff ff          call   c1 <main+0x51>         ; push return address and call leaf_call (objdump not from final link but .o)
+  c5:   b8 00 00 00 00          mov    $0x0,%eax              ; return value
+  ca:   83 c4 24                add    $0x24,%esp             ; |
+  cd:   59                      pop    %ecx                   ; |
+  ce:   5d                      pop    %ebp                   ; | epilog
+  cf:   8d 61 fc                lea    -0x4(%ecx),%esp        ; |
+  d2:   c3                      ret                           ; |
+
+
+
+; output from gentoo_linux-20191029-x86 w/ gcc 8.3.0
+
+00000000 <leaf_call>:
+   0:   55                      push   %ebp
+   1:   89 e5                   mov    %esp,%ebp
+   3:   e8 fc ff ff ff          call   4 <leaf_call+0x4>
+   8:   05 01 00 00 00          add    $0x1,%eax
+   d:   90                      nop
+   e:   5d                      pop    %ebp
+   f:   c3                      ret
+
+00000010 <nonleaf_call>:
+  10:   55                      push   %ebp
+  11:   89 e5                   mov    %esp,%ebp
+  13:   83 ec 18                sub    $0x18,%esp
+  16:   e8 fc ff ff ff          call   17 <nonleaf_call+0x7>
+  1b:   05 01 00 00 00          add    $0x1,%eax
+  20:   65 a1 14 00 00 00       mov    %gs:0x14,%eax
+  26:   89 45 f4                mov    %eax,-0xc(%ebp)
+  29:   31 c0                   xor    %eax,%eax
+  2b:   b8 10 00 00 00          mov    $0x10,%eax
+  30:   48                      dec    %eax
+  31:   05 e8 00 00 00          add    $0xe8,%eax
+  36:   b9 10 00 00 00          mov    $0x10,%ecx
+  3b:   ba 00 00 00 00          mov    $0x0,%edx
+  40:   f7 f1                   div    %ecx
+  42:   6b c0 10                imul   $0x10,%eax,%eax
+  45:   29 c4                   sub    %eax,%esp
+  47:   89 e0                   mov    %esp,%eax
+  49:   83 c0 0f                add    $0xf,%eax
+  4c:   c1 e8 04                shr    $0x4,%eax
+  4f:   c1 e0 04                shl    $0x4,%eax
+  52:   c6 00 4c                movb   $0x4c,(%eax)
+  55:   83 ec 04                sub    $0x4,%esp
+  58:   ff 75 24                pushl  0x24(%ebp)
+  5b:   ff 75 20                pushl  0x20(%ebp)
+  5e:   ff 75 1c                pushl  0x1c(%ebp)
+  61:   ff 75 18                pushl  0x18(%ebp)
+  64:   ff 75 14                pushl  0x14(%ebp)
+  67:   ff 75 10                pushl  0x10(%ebp)
+  6a:   ff 75 0c                pushl  0xc(%ebp)
+  6d:   e8 fc ff ff ff          call   6e <nonleaf_call+0x5e>
+  72:   83 c4 20                add    $0x20,%esp
+  75:   90                      nop
+  76:   8b 45 f4                mov    -0xc(%ebp),%eax
+  79:   65 33 05 14 00 00 00    xor    %gs:0x14,%eax
+  80:   74 05                   je     87 <nonleaf_call+0x77>
+  82:   e8 fc ff ff ff          call   83 <nonleaf_call+0x73>
+  87:   c9                      leave
+  88:   c3                      ret
+
+00000089 <main>:
+  89:   8d 4c 24 04             lea    0x4(%esp),%ecx    ; |
+  8d:   83 e4 f0                and    $0xfffffff0,%esp  ; |
+  90:   ff 71 fc                pushl  -0x4(%ecx)        ; |
+  93:   55                      push   %ebp              ; |
+  94:   89 e5                   mov    %esp,%ebp         ; | prolog (with some stack protection check call, I think)
+  96:   51                      push   %ecx              ; |
+  97:   83 ec 04                sub    $0x4,%esp         ; |
+  9a:   e8 fc ff ff ff          call   9b <main+0x12>    ; |       unsure@@@ call of stackguard stuff, maybe?. (objdump not from final link but .o)
+  9f:   05 01 00 00 00          add    $0x1,%eax         ; |       ??? add 1 to ret val from unknown call
+  a4:   6a 07                   push   $0x7              ; arg 7 -> stack
+  a6:   6a 06                   push   $0x6              ; arg 6 -> stack
+  a8:   6a 05                   push   $0x5              ; arg 5 -> stack
+  aa:   6a 04                   push   $0x4              ; arg 4 -> stack
+  ac:   6a 03                   push   $0x3              ; arg 3 -> stack
+  ae:   6a 02                   push   $0x2              ; arg 2 -> stack
+  b0:   6a 01                   push   $0x1              ; arg 1 -> stack
+  b2:   6a 00                   push   $0x0              ; arg 0 -> stack
+  b4:   e8 fc ff ff ff          call   b5 <main+0x2c>    ; push return address and call nonleaf_call (objdump not from final link but .o)
+  b9:   83 c4 20                add    $0x20,%esp        ; ???
+  bc:   b8 00 00 00 00          mov    $0x0,%eax         ; return value
+  c1:   8b 4d fc                mov    -0x4(%ebp),%ecx   ; |           ???
+  c4:   c9                      leave                    ; |
+  c5:   8d 61 fc                lea    -0x4(%ecx),%esp   ; | epilog    ???
+  c8:   c3                      ret                      ; |
+
+
+
+; output from haiku w/ gcc 4.4.4
+
+00000000 <leaf_call>:
+   0:   55                      push   %ebp
+   1:   89 e5                   mov    %esp,%ebp
+   3:   5d                      pop    %ebp
+   4:   c3                      ret
+
+00000005 <nonleaf_call>:
+   5:   55                      push   %ebp
+   6:   89 e5                   mov    %esp,%ebp
+   8:   53                      push   %ebx
+   9:   83 ec 04                sub    $0x4,%esp
+   c:   e8 00 00 00 00          call   11 <nonleaf_call+0xc>
+  11:   5b                      pop    %ebx
+  12:   81 c3 03 00 00 00       add    $0x3,%ebx
+  18:   81 ec f0 00 00 00       sub    $0xf0,%esp
+  1e:   89 e0                   mov    %esp,%eax
+  20:   83 c0 0f                add    $0xf,%eax
+  23:   c1 e8 04                shr    $0x4,%eax
+  26:   c1 e0 04                shl    $0x4,%eax
+  29:   c6 00 4c                movb   $0x4c,(%eax)
+  2c:   83 ec 04                sub    $0x4,%esp
+  2f:   ff 75 24                pushl  0x24(%ebp)
+  32:   ff 75 20                pushl  0x20(%ebp)
+  35:   ff 75 1c                pushl  0x1c(%ebp)
+  38:   ff 75 18                pushl  0x18(%ebp)
+  3b:   ff 75 14                pushl  0x14(%ebp)
+  3e:   ff 75 10                pushl  0x10(%ebp)
+  41:   ff 75 0c                pushl  0xc(%ebp)
+  44:   e8 fc ff ff ff          call   45 <nonleaf_call+0x40>
+  49:   83 c4 20                add    $0x20,%esp
+  4c:   8b 5d fc                mov    -0x4(%ebp),%ebx
+  4f:   c9                      leave
+  50:   c3                      ret
+
+00000051 <main>:
+  51:   8d 4c 24 04             lea    0x4(%esp),%ecx
+  55:   83 e4 f0                and    $0xfffffff0,%esp
+  58:   ff 71 fc                pushl  -0x4(%ecx)
+  5b:   55                      push   %ebp
+  5c:   89 e5                   mov    %esp,%ebp
+  5e:   53                      push   %ebx
+  5f:   51                      push   %ecx
+  60:   e8 00 00 00 00          call   65 <main+0x14>
+  65:   5b                      pop    %ebx
+  66:   81 c3 03 00 00 00       add    $0x3,%ebx
+  6c:   6a 07                   push   $0x7
+  6e:   6a 06                   push   $0x6
+  70:   6a 05                   push   $0x5
+  72:   6a 04                   push   $0x4
+  74:   6a 03                   push   $0x3
+  76:   6a 02                   push   $0x2
+  78:   6a 01                   push   $0x1
+  7a:   6a 00                   push   $0x0
+  7c:   e8 fc ff ff ff          call   7d <main+0x2c>
+  81:   83 c4 20                add    $0x20,%esp
+  84:   b8 00 00 00 00          mov    $0x0,%eax
+  89:   8d 65 f8                lea    -0x8(%ebp),%esp
+  8c:   83 c4 00                add    $0x0,%esp
+  8f:   59                      pop    %ecx
+  90:   5b                      pop    %ebx
+  91:   5d                      pop    %ebp
+  92:   8d 61 fc                lea    -0x4(%ecx),%esp
+  95:   c3                      ret
+
+
+
+; output from nexenta-1.0.1-b85-x86 w/ gcc 4.0.3
+
+00000000 <leaf_call>:
+   0:   55                      push   %ebp
+   1:   89 e5                   mov    %esp,%ebp
+   3:   c9                      leave
+   4:   c3                      ret
+
+00000005 <nonleaf_call>:
+   5:   55                      push   %ebp
+   6:   89 e5                   mov    %esp,%ebp
+   8:   83 ec 08                sub    $0x8,%esp
+   b:   81 ec f0 00 00 00       sub    $0xf0,%esp
+  11:   89 65 fc                mov    %esp,0xfffffffc(%ebp)
+  14:   8b 45 fc                mov    0xfffffffc(%ebp),%eax
+  17:   83 c0 0f                add    $0xf,%eax
+  1a:   c1 e8 04                shr    $0x4,%eax
+  1d:   c1 e0 04                shl    $0x4,%eax
+  20:   89 45 fc                mov    %eax,0xfffffffc(%ebp)
+  23:   8b 45 fc                mov    0xfffffffc(%ebp),%eax
+  26:   c6 00 4c                movb   $0x4c,(%eax)
+  29:   ff 75 24                pushl  0x24(%ebp)
+  2c:   ff 75 20                pushl  0x20(%ebp)
+  2f:   ff 75 1c                pushl  0x1c(%ebp)
+  32:   ff 75 18                pushl  0x18(%ebp)
+  35:   ff 75 14                pushl  0x14(%ebp)
+  38:   ff 75 10                pushl  0x10(%ebp)
+  3b:   ff 75 0c                pushl  0xc(%ebp)
+  3e:   e8 fc ff ff ff          call   3f <nonleaf_call+0x3a>
+  43:   83 c4 1c                add    $0x1c,%esp
+  46:   c9                      leave
+  47:   c3                      ret
+
+00000048 <main>:
+  48:   55                      push   %ebp
+  49:   89 e5                   mov    %esp,%ebp
+  4b:   83 ec 08                sub    $0x8,%esp
+  4e:   83 e4 f0                and    $0xfffffff0,%esp
+  51:   b8 00 00 00 00          mov    $0x0,%eax
+  56:   83 c0 0f                add    $0xf,%eax
+  59:   83 c0 0f                add    $0xf,%eax
+  5c:   c1 e8 04                shr    $0x4,%eax
+  5f:   c1 e0 04                shl    $0x4,%eax
+  62:   29 c4                   sub    %eax,%esp
+  64:   6a 07                   push   $0x7
+  66:   6a 06                   push   $0x6
+  68:   6a 05                   push   $0x5
+  6a:   6a 04                   push   $0x4
+  6c:   6a 03                   push   $0x3
+  6e:   6a 02                   push   $0x2
+  70:   6a 01                   push   $0x1
+  72:   6a 00                   push   $0x0
+  74:   e8 fc ff ff ff          call   75 <main+0x2d>
+  79:   83 c4 20                add    $0x20,%esp
+  7c:   b8 00 00 00 00          mov    $0x0,%eax
+  81:   c9                      leave
+  82:   c3                      ret
+
+
+
+; output from openbsd-4.0-x86 w/ gcc 3.3.5 (propolice)
+
+00000000 <leaf_call>:
+   0:   55                      push   %ebp
+   1:   89 e5                   mov    %esp,%ebp
+   3:   c9                      leave
+   4:   c3                      ret
+
+00000005 <nonleaf_call>:
+   5:   55                      push   %ebp
+   6:   89 e5                   mov    %esp,%ebp
+   8:   83 ec 18                sub    $0x18,%esp
+   b:   a1 00 00 00 00          mov    0x0,%eax
+  10:   89 45 e8                mov    %eax,0xffffffe8(%ebp)
+  13:   81 ec e0 00 00 00       sub    $0xe0,%esp
+  19:   89 e0                   mov    %esp,%eax
+  1b:   c6 00 4c                movb   $0x4c,(%eax)
+  1e:   83 ec 04                sub    $0x4,%esp
+  21:   ff 75 24                pushl  0x24(%ebp)
+  24:   ff 75 20                pushl  0x20(%ebp)
+  27:   ff 75 1c                pushl  0x1c(%ebp)
+  2a:   ff 75 18                pushl  0x18(%ebp)
+  2d:   ff 75 14                pushl  0x14(%ebp)
+  30:   ff 75 10                pushl  0x10(%ebp)
+  33:   ff 75 0c                pushl  0xc(%ebp)
+  36:   e8 fc ff ff ff          call   37 <nonleaf_call+0x32>
+  3b:   83 c4 20                add    $0x20,%esp
+  3e:   8b 45 e8                mov    0xffffffe8(%ebp),%eax
+  41:   3b 05 00 00 00 00       cmp    0x0,%eax
+  47:   74 13                   je     5c <nonleaf_call+0x57>
+  49:   83 ec 08                sub    $0x8,%esp
+  4c:   ff 75 e8                pushl  0xffffffe8(%ebp)
+  4f:   68 00 00 00 00          push   $0x0
+  54:   e8 fc ff ff ff          call   55 <nonleaf_call+0x50>
+  59:   83 c4 10                add    $0x10,%esp
+  5c:   c9                      leave
+  5d:   c3                      ret
+
+0000005e <main>:
+  5e:   55                      push   %ebp
+  5f:   89 e5                   mov    %esp,%ebp
+  61:   83 ec 18                sub    $0x18,%esp
+  64:   83 e4 f0                and    $0xfffffff0,%esp
+  67:   b8 00 00 00 00          mov    $0x0,%eax
+  6c:   29 c4                   sub    %eax,%esp
+  6e:   a1 00 00 00 00          mov    0x0,%eax
+  73:   89 45 e8                mov    %eax,0xffffffe8(%ebp)
+  76:   6a 07                   push   $0x7
+  78:   6a 06                   push   $0x6
+  7a:   6a 05                   push   $0x5
+  7c:   6a 04                   push   $0x4
+  7e:   6a 03                   push   $0x3
+  80:   6a 02                   push   $0x2
+  82:   6a 01                   push   $0x1
+  84:   6a 00                   push   $0x0
+  86:   e8 fc ff ff ff          call   87 <main+0x29>
+  8b:   83 c4 20                add    $0x20,%esp
+  8e:   b8 00 00 00 00          mov    $0x0,%eax
+  93:   8b 55 e8                mov    0xffffffe8(%ebp),%edx
+  96:   3b 15 00 00 00 00       cmp    0x0,%edx
+  9c:   74 13                   je     b1 <main+0x53>
+  9e:   83 ec 08                sub    $0x8,%esp
+  a1:   ff 75 e8                pushl  0xffffffe8(%ebp)
+  a4:   68 0d 00 00 00          push   $0xd
+  a9:   e8 fc ff ff ff          call   aa <main+0x4c>
+  ae:   83 c4 10                add    $0x10,%esp
+  b1:   c9                      leave
+  b2:   c3                      ret
+
+
+
+; @@@ windows missing
+
+; vim: ft=asm
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/doc/disas_examples/x86.fastcall_borland.disas	Fri Nov 22 23:08:59 2019 +0100
@@ -0,0 +1,41 @@
+; #pragma pack(push, 1)
+; struct TTest
+; {
+;     __int32 i1;
+;     __int32 i2;
+; };
+; #pragma pack(pop)
+; 
+; TTest __fastcall DoTest()
+; {
+;     TTest t;
+;     t.i1 = 1;
+;     t.i2 = 2;
+;     return t;
+; }
+; 
+; ...
+; TTest t = DoTest();
+
+; from http://codeverge.com/embarcadero.cppbuilder.cpp/does-fastcall-have-any-bearing-on/1043767
+
+
+DoTest():
+  push ebp                     ; |
+  mov ebp,esp                  ; | prolog
+  add esp,-0x0c                ; |
+  mov [ebp-0x04],eax           ; pointer to hidden param in eax -> local area
+  mov [ebp-0x0c],0x00000001    ; val 1 -> local area
+  mov [ebp-0x08],0x00000002    ; val 2 -> local area
+  mov eax,[ebp-0x04]           ; refetch eax (pointlessly)
+  mov edx,[ebp-0x0c]           ; get val 1 in edx and ...
+  mov [eax],edx                ; ... store at *eax
+  mov edx,[ebp-0x08]           ; get val 2 in edx and ...
+  mov [eax+0x04],edx           ; ... store at *(eax + 4)
+  mov eax, [ebp-0x04]          ; return value
+  mov esp,ebp                  ; |
+  pop ebp                      ; | epilog
+  ret                          ; |
+
+; vim: ft=asm
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/doc/disas_examples/x86.plan9call.disas	Fri Nov 22 23:08:59 2019 +0100
@@ -0,0 +1,64 @@
+; void leaf_call(int a, int b, int c, int d, int e, int f)
+; {
+; }
+; 
+; int nonleaf_call(int a, int b, int c, int d, int e, int f, int g)
+; {
+;   leaf_call(b,c,d,e,f,g);
+;   return 'x';
+; }
+; 
+; int main()
+; {
+;   nonleaf_call(0,1,2,3,4,5,6);
+;   return 0;
+; }
+
+
+
+; output from plan9-4th_edition-x86 w/ 8c x.c && 8l -a x.8
+
+001020                        (1) TEXT leaf_call+0(SB),$0
+001020    c3                  (3)       RET                         ,
+
+001021                        (5) TEXT nonleaf_call+0(SB),$28
+001021    83ec1c              (5)       SUBL                        $28,SP             ; prolog (note, there is no register save area at all)
+001024    8b442424            (7)       MOVL                        b+36(FP),AX        ; |
+001028    890424              (7)       MOVL                        AX,(SP)            ; |
+00102b    8b442428            (7)       MOVL                        c+40(FP),AX        ; |
+00102f    89442404            (7)       MOVL                        AX,4(SP)           ; |
+001033    8b44242c            (7)       MOVL                        d+44(FP),AX        ; |
+001037    89442408            (7)       MOVL                        AX,8(SP)           ; | fetch in args from prev frame's param area ...
+00103b    8b442430            (7)       MOVL                        e+48(FP),AX        ; | ... and "push" onto stack
+00103f    8944240c            (7)       MOVL                        AX,12(SP)          ; |
+001043    8b442434            (7)       MOVL                        f+52(FP),AX        ; |
+001047    89442410            (7)       MOVL                        AX,16(SP)          ; |
+00104b    8b442438            (7)       MOVL                        g+56(FP),AX        ; |
+00104f    89442414            (7)       MOVL                        AX,20(SP)          ; |
+001053    e8c8ffffff          (7)       CALL                        ,1020+leaf_call    ; push return addrss and call
+001058    b878000000          (8)       MOVL                        $120,AX            ; return value: 'x' -> eax
+00105d    83c41c              (8)       ADDL                        $28,SP             ; |
+001060    c3                  (8)       RET                         ,                  ; | epilog
+
+001061                        (11) TEXT main+0(SB),$32
+001061    83ec20              (11)      SUBL                        $32,SP             ; prolog (note, there is no register save area at all)
+001064    c7042400000000      (13)      MOVL                        $0,(SP)            ; arg 0 -> "push" onto stack
+00106b    b801000000          (13)      MOVL                        $1,AX              ; arg 1 -> eax, then ...
+001070    89442404            (13)      MOVL                        AX,4(SP)           ; ... "pushed" onto stack
+001074    b802000000          (13)      MOVL                        $2,AX              ; arg 2 -> eax, then ...
+001079    89442408            (13)      MOVL                        AX,8(SP)           ; ... "pushed" onto stack
+00107d    b803000000          (13)      MOVL                        $3,AX              ;    .
+001082    8944240c            (13)      MOVL                        AX,12(SP)          ;    .
+001086    b804000000          (13)      MOVL                        $4,AX              ;    .
+00108b    89442410            (13)      MOVL                        AX,16(SP)          ;    .
+00108f    b805000000          (13)      MOVL                        $5,AX              ;    .
+001094    89442414            (13)      MOVL                        AX,20(SP)          ;    .
+001098    b806000000          (13)      MOVL                        $6,AX              ; arg 6 -> eax, then ...
+00109d    89442418            (13)      MOVL                        AX,24(SP)          ; ... "pushed" onto stack
+0010a1    e87bffffff          (13)      CALL                        ,1021+nonleaf_call ; push return addrss and call
+0010a6    31c0                (14)      MOVL                        $0,AX              ; return value
+0010a8    83c420              (14)      ADDL                        $32,SP             ; |
+0010ab    c3                  (14)      RET                         ,                  ; | epilog
+
+; vim: ft=asm
+