# HG changeset patch
# User Tassilo Philipp
# Date 1649080252 -7200
# Node ID fc614cb865c6796a7eae94cae1c904fe56b79e21
# Parent  fd9ba3a6d348fc85ddfcc1406cca566310ebafd6
- doc and disasexample additions specific to non-trivial C++ aggregates as return values (incl. fixes to doc and additional LSB specific PPC32 section)

diff -r fd9ba3a6d348 -r fc614cb865c6 doc/disas_examples/arm.armhf.disas
--- a/doc/disas_examples/arm.armhf.disas	Wed Mar 23 15:33:09 2022 +0100
+++ b/doc/disas_examples/arm.armhf.disas	Mon Apr 04 15:50:52 2022 +0200
@@ -696,5 +696,79 @@
 
 
 
+; ---------- C++ trivial and non-trivial aggrs as return values ---------->
+;
+; struct Trivial { int a; };
+; struct NonTrivial { int a; NonTrivial() : a(0) {} NonTrivial(const NonTrivial& rhs) : a(rhs.a) { } };
+; 
+; extern "C" {
+;     struct Trivial    f1() { return Trivial(); }
+; }
+; 
+;     struct NonTrivial f2() { return NonTrivial(); }
+; 
+; extern "C" {
+;     void f()
+;     {
+;         int a=1;
+;         a += 123;
+;         struct Trivial t = f1();
+;         a -= 123;
+;         struct NonTrivial n = f2();
+;         a -= 12;
+;     }
+; }
+
+
+
+; output from raspbian-11-armelhf w/ gcc 10.2.1
+
+000103d0 <f1>:
+   103d0:       e52db004        push    {fp}
+   103d4:       e28db000        add     fp, sp, #0
+   103d8:       e3a03000        mov     r3, #0
+   103dc:       e1a00003        mov     r0, r3
+   103e0:       e28bd000        add     sp, fp, #0
+   103e4:       e49db004        pop     {fp}
+   103e8:       e12fff1e        bx      lr
+
+000103ec <_Z2f2v>:
+   103ec:       e92d4800        push    {fp, lr}
+   103f0:       e28db004        add     fp, sp, #4
+   103f4:       e24dd008        sub     sp, sp, #8
+   103f8:       e50b0008        str     r0, [fp, #-8]
+   103fc:       e51b0008        ldr     r0, [fp, #-8]
+   10400:       eb00001f        bl      10484 <_ZN10NonTrivialC1Ev>
+   10404:       e51b0008        ldr     r0, [fp, #-8]  ; ptr to retval space -> r0
+   10408:       e24bd004        sub     sp, fp, #4
+   1040c:       e8bd8800        pop     {fp, pc}
+
+00010410 <f>:
+   10410:       e92d4800        push    {fp, lr}       ; |
+   10414:       e28db004        add     fp, sp, #4     ; | prolog
+   10418:       e24dd010        sub     sp, sp, #16    ; /
+   1041c:       e3a03001        mov     r3, #1         ; \ a = 1
+   10420:       e50b3008        str     r3, [fp, #-8]  ; /
+   10424:       e51b3008        ldr     r3, [fp, #-8]  ; \
+   10428:       e283307b        add     r3, r3, #123   ; | a += 123
+   1042c:       e50b3008        str     r3, [fp, #-8]  ; |
+   10430:       ebffffe6        bl      103d0 <f1>     ; call f1()
+   10434:       e1a03000        mov     r3, r0         ; | retval via r0, as small struct
+   10438:       e50b300c        str     r3, [fp, #-12] ; /
+   1043c:       e51b3008        ldr     r3, [fp, #-8]  ; \
+   10440:       e243307b        sub     r3, r3, #123   ; | a -= 123
+   10444:       e50b3008        str     r3, [fp, #-8]  ; |
+   10448:       e24b3010        sub     r3, fp, #16    ; space to hold non-triv retval -> eax ...
+   1044c:       e1a00003        mov     r0, r3         ; ... as hidden first arg (r0)
+   10450:       ebffffe5        bl      103ec <_Z2f2v> ; call f2()
+   10454:       e51b3008        ldr     r3, [fp, #-8]  ; |
+   10458:       e243300c        sub     r3, r3, #12    ; | a -= 12
+   1045c:       e50b3008        str     r3, [fp, #-8]  ; /
+   10460:       e1a00000        nop                    ; \
+   10464:       e24bd004        sub     sp, fp, #4     ; | epilog
+   10468:       e8bd8800        pop     {fp, pc}       ; |
+
+
+
 ; vim: ft=asm68k
 
diff -r fd9ba3a6d348 -r fc614cb865c6 doc/disas_examples/arm.atpcs_arm.disas
--- a/doc/disas_examples/arm.atpcs_arm.disas	Wed Mar 23 15:33:09 2022 +0100
+++ b/doc/disas_examples/arm.atpcs_arm.disas	Mon Apr 04 15:50:52 2022 +0200
@@ -830,5 +830,81 @@
 
 
 
+; ---------- C++ trivial and non-trivial aggrs as return values ---------->
+;
+; struct Trivial { int a; };
+; struct NonTrivial { int a; NonTrivial() : a(0) {} NonTrivial(const NonTrivial& rhs) : a(rhs.a) { } };
+; 
+; extern "C" {
+;     struct Trivial    f1() { return Trivial(); }
+; }
+; 
+;     struct NonTrivial f2() { return NonTrivial(); }
+; 
+; extern "C" {
+;     void f()
+;     {
+;         int a=1;
+;         a += 123;
+;         struct Trivial t = f1();
+;         a -= 123;
+;         struct NonTrivial n = f2();
+;         a -= 12;
+;     }
+; }
+
+
+
+; output from debian-6.0.8-armel w/ gcc 4.4.5
+
+00000000 <f1>:
+   0:   e52db004        push    {fp}
+   4:   e28db000        add     fp, sp, #0
+   8:   e3a03000        mov     r3, #0
+   c:   e1a00003        mov     r0, r3
+  10:   e28bd000        add     sp, fp, #0
+  14:   e8bd0800        pop     {fp}
+  18:   e12fff1e        bx      lr
+
+0000001c <_Z2f2v>:
+  1c:   e92d4830        push    {r4, r5, fp, lr}
+  20:   e28db00c        add     fp, sp, #12
+  24:   e1a04000        mov     r4, r0
+  28:   e1a03004        mov     r3, r4
+  2c:   e1a00003        mov     r0, r3
+  30:   ebfffffe        bl      0              ; ctor
+  34:   e1a00004        mov     r0, r4         ; ptr to retval space -> r0
+  38:   e1a00004        mov     r0, r4
+  3c:   e24bd00c        sub     sp, fp, #12
+  40:   e8bd4830        pop     {r4, r5, fp, lr}
+  44:   e12fff1e        bx      lr
+
+00000048 <f>:
+  48:   e92d4800        push    {fp, lr}       ;
+  4c:   e28db004        add     fp, sp, #4     ;
+  50:   e24dd010        sub     sp, sp, #16    ;
+  54:   e3a03001        mov     r3, #1         ;
+  58:   e50b3008        str     r3, [fp, #-8]  ;
+  5c:   e51b3008        ldr     r3, [fp, #-8]  ;
+  60:   e283307b        add     r3, r3, #123   ;
+  64:   e50b3008        str     r3, [fp, #-8]  ;
+  68:   ebfffffe        bl      0 <f1>         ; call f1()
+  6c:   e1a03000        mov     r3, r0         ; retval via r0, as small struct
+  70:   e50b300c        str     r3, [fp, #-12] ;
+  74:   e51b3008        ldr     r3, [fp, #-8]  ;
+  78:   e243307b        sub     r3, r3, #123   ;
+  7c:   e50b3008        str     r3, [fp, #-8]  ;
+  80:   e24b3010        sub     r3, fp, #16    ; space to hold non-triv retval -> eax ...
+  84:   e1a00003        mov     r0, r3         ; ... as hidden first arg (r0)
+  88:   ebfffffe        bl      1c <_Z2f2v>    ; call f2()
+  8c:   e51b3008        ldr     r3, [fp, #-8]  ;
+  90:   e243300c        sub     r3, r3, #12    ;
+  94:   e50b3008        str     r3, [fp, #-8]  ;
+  98:   e24bd004        sub     sp, fp, #4     ;
+  9c:   e8bd4800        pop     {fp, lr}       ;
+  a0:   e12fff1e        bx      lr             ;
+
+
+
 ; vim: ft=asm68k
 
diff -r fd9ba3a6d348 -r fc614cb865c6 doc/disas_examples/arm64.aapcs.disas
--- a/doc/disas_examples/arm64.aapcs.disas	Wed Mar 23 15:33:09 2022 +0100
+++ b/doc/disas_examples/arm64.aapcs.disas	Mon Apr 04 15:50:52 2022 +0200
@@ -1856,7 +1856,7 @@
       dc:       e8 07 00 f9     str     x8, [sp, #8]        ;
       e0:       e9 03 00 f9     str     x9, [sp]            ;
       e4:       c7 ff ff 97     bl      #-228 <f0>          ; return address -> r30/lr, and call
-      e8:       ea 03 00 2a     mov     w10, w0             ;
+      e8:       ea 03 00 2a     mov     w10, w0             ; retval via w0, as small struct
       ec:       e8 07 40 f9     ldr     x8, [sp, #8]        ;
       f0:       0a 01 00 39     strb    w10, [x8]           ;
       f4:       e8 03 40 f9     ldr     x8, [sp]            ; ptr to retval struct, as sizeof(struct Big) > 16
@@ -2257,5 +2257,89 @@
 
 
 
+; ---------- C++ trivial and non-trivial aggrs as return values ---------->
+;
+; struct Trivial { int a; };
+; struct NonTrivial { int a; NonTrivial() : a(0) {} NonTrivial(const NonTrivial& rhs) : a(rhs.a) { } };
+; 
+; extern "C" {
+;     struct Trivial    f1() { return Trivial(); }
+; }
+; 
+;     struct NonTrivial f2() { return NonTrivial(); }
+; 
+; extern "C" {
+;     void f()
+;     {
+;         int a=1;
+;         a += 123;
+;         struct Trivial t = f1();
+;         a -= 123;
+;         struct NonTrivial n = f2();
+;         a -= 12;
+;     }
+; }
+
+
+
+; output from freebsd-13.0_r348764-arm64 w/ clang 8.0.0
+
+0000000000210250 f1:
+  210250:       ff 83 00 d1     sub     sp, sp, #32
+  210254:       fe 0b 00 f9     str     x30, [sp, #16]
+  210258:       08 00 80 52     mov     w8, #0
+  21025c:       e2 03 7e b2     orr     x2, xzr, #0x4
+  210260:       e9 33 00 91     add     x9, sp, #12
+  210264:       e0 03 09 aa     mov     x0, x9
+  210268:       e1 03 08 2a     mov     w1, w8
+  21026c:       e9 03 00 f9     str     x9, [sp]
+  210270:       64 00 00 94     bl      #400 <memset@plt>
+  210274:       e9 03 40 f9     ldr     x9, [sp]
+  210278:       28 01 40 b9     ldr     w8, [x9]
+  21027c:       e0 03 08 2a     mov     w0, w8
+  210280:       00 7c 40 d3     ubfx    x0, x0, #0, #32
+  210284:       fe 0b 40 f9     ldr     x30, [sp, #16]
+  210288:       ff 83 00 91     add     sp, sp, #32
+  21028c:       c0 03 5f d6     ret
+
+0000000000210290 _Z2f2v:
+  210290:       fd 7b bf a9     stp     x29, x30, [sp, #-16]!
+  210294:       fd 03 00 91     mov     x29, sp
+  210298:       e0 03 08 aa     mov     x0, x8              ; ptr to retval space -> x0
+  21029c:       29 00 00 94     bl      #164 <_ZN10NonTrivialC2Ev>
+  2102a0:       fd 7b c1 a8     ldp     x29, x30, [sp], #16
+  2102a4:       c0 03 5f d6     ret
+
+00000000002102a8 f:
+  2102a8:       ff c3 00 d1     sub     sp, sp, #48         ;
+  2102ac:       fd 7b 02 a9     stp     x29, x30, [sp, #32] ;
+  2102b0:       fd 83 00 91     add     x29, sp, #32        ;
+  2102b4:       e8 03 00 32     orr     w8, wzr, #0x1       ;
+  2102b8:       a9 23 00 d1     sub     x9, x29, #8         ;
+  2102bc:       aa 33 00 d1     sub     x10, x29, #12       ;
+  2102c0:       a8 c3 1f b8     stur    w8, [x29, #-4]      ;
+  2102c4:       a8 c3 5f b8     ldur    w8, [x29, #-4]      ;
+  2102c8:       08 ed 01 11     add     w8, w8, #123        ;
+  2102cc:       a8 c3 1f b8     stur    w8, [x29, #-4]      ;
+  2102d0:       e9 07 00 f9     str     x9, [sp, #8]        ;
+  2102d4:       ea 03 00 f9     str     x10, [sp]           ;
+  2102d8:       de ff ff 97     bl      #-136 <f1>          ; call f1()
+  2102dc:       e8 03 00 2a     mov     w8, w0              ; retval via w0, as small struct
+  2102e0:       e9 07 40 f9     ldr     x9, [sp, #8]        ;
+  2102e4:       28 01 00 b9     str     w8, [x9]            ;
+  2102e8:       a8 c3 5f b8     ldur    w8, [x29, #-4]      ;
+  2102ec:       08 ed 01 71     subs    w8, w8, #123        ;
+  2102f0:       a8 c3 1f b8     stur    w8, [x29, #-4]      ;
+  2102f4:       e8 03 40 f9     ldr     x8, [sp]            ; ptr to retval struct via dedicated reg x8, as non-trivial aggr
+  2102f8:       e6 ff ff 97     bl      #-104 <_Z2f2v>      ; call f2()
+  2102fc:       ab c3 5f b8     ldur    w11, [x29, #-4]     ;
+  210300:       6b 31 00 71     subs    w11, w11, #12       ;
+  210304:       ab c3 1f b8     stur    w11, [x29, #-4]     ;
+  210308:       fd 7b 42 a9     ldp     x29, x30, [sp, #32] ;
+  21030c:       ff c3 00 91     add     sp, sp, #48         ;
+  210310:       c0 03 5f d6     ret                         ;
+
+
+
 ; vim: ft=asm68k
 
diff -r fd9ba3a6d348 -r fc614cb865c6 doc/disas_examples/mips.o32.disas
--- a/doc/disas_examples/mips.o32.disas	Wed Mar 23 15:33:09 2022 +0100
+++ b/doc/disas_examples/mips.o32.disas	Mon Apr 04 15:50:52 2022 +0200
@@ -1862,5 +1862,111 @@
 
 
 
+; ---------- C++ trivial and non-trivial aggrs as return values ---------->
+;
+; struct Trivial { int a; };
+; struct NonTrivial { int a; NonTrivial() : a(0) {} NonTrivial(const NonTrivial& rhs) : a(rhs.a) { } };
+; 
+; extern "C" {
+;     struct Trivial    f1() { return Trivial(); }
+; }
+; 
+;     struct NonTrivial f2() { return NonTrivial(); }
+; 
+; extern "C" {
+;     void f()
+;     {
+;         int a=1;
+;         a += 123;
+;         struct Trivial t = f1();
+;         a -= 123;
+;         struct NonTrivial n = f2();
+;         a -= 12;
+;     }
+; }
+
+
+
+; output from freebsd-12.0_r333647-malta_mipsebhf w/ gcc 4.2.1
+
+004008b0 <f1>:
+  4008b0:       3c1c0002        lui     gp,0x2
+  4008b4:       279c8310        addiu   gp,gp,-31984
+  4008b8:       0399e021        addu    gp,gp,t9
+  4008bc:       27bdfff8        addiu   sp,sp,-8
+  4008c0:       afbe0000        sw      s8,0(sp)
+  4008c4:       03a0f021        move    s8,sp
+  4008c8:       00801021        move    v0,a0         ; ptr to retval space -> v0
+  4008cc:       ac400000        sw      zero,0(v0)    ; return val
+  4008d0:       03c0e821        move    sp,s8
+  4008d4:       8fbe0000        lw      s8,0(sp)
+  4008d8:       03e00008        jr      ra
+  4008dc:       27bd0008        addiu   sp,sp,8
+
+004008e0 <_Z2f2v>:
+  4008e0:       3c1c0002        lui     gp,0x2        ; |
+  4008e4:       279c82e0        addiu   gp,gp,-32032  ; |
+  4008e8:       0399e021        addu    gp,gp,t9      ; |
+  4008ec:       27bdffd8        addiu   sp,sp,-40     ; | prolog
+  4008f0:       afbf0020        sw      ra,32(sp)     ; |
+  4008f4:       afbe001c        sw      s8,28(sp)     ; |
+  4008f8:       afb00018        sw      s0,24(sp)     ; |
+  4008fc:       03a0f021        move    s8,sp         ; |         frame pointer (note: with offset to frame start, but static compared to sp)
+  400900:       afbc0010        sw      gp,16(sp)     ;
+  400904:       00808021        move    s0,a0         ;
+  400908:       02001021        move    v0,s0         ;
+  40090c:       00402021        move    a0,v0         ;
+  400910:       8f998060        lw      t9,-32672(gp) ;
+  400914:       0320f809        jalr    t9            ;
+  400918:       00000000        nop                   ;
+  40091c:       8fdc0010        lw      gp,16(s8)     ;
+  400920:       02001021        move    v0,s0         ; ptr to retval space -> v0
+  400924:       03c0e821        move    sp,s8         ; |
+  400928:       8fbf0020        lw      ra,32(sp)     ; |
+  40092c:       8fbe001c        lw      s8,28(sp)     ; |
+  400930:       8fb00018        lw      s0,24(sp)     ; | epilog
+  400934:       03e00008        jr      ra            ; |
+  400938:       27bd0028        addiu   sp,sp,40      ; |
+
+0040093c <f>:
+  40093c:       3c1c0002        lui     gp,0x2        ;
+  400940:       279c8284        addiu   gp,gp,-32124  ;
+  400944:       0399e021        addu    gp,gp,t9      ;
+  400948:       27bdffd0        addiu   sp,sp,-48     ;
+  40094c:       afbf002c        sw      ra,44(sp)     ;
+  400950:       afbe0028        sw      s8,40(sp)     ;
+  400954:       03a0f021        move    s8,sp         ;
+  400958:       afbc0010        sw      gp,16(sp)     ;
+  40095c:       24020001        li      v0,1          ;
+  400960:       afc20018        sw      v0,24(s8)     ;
+  400964:       8fc20018        lw      v0,24(s8)     ;
+  400968:       2442007b        addiu   v0,v0,123     ;
+  40096c:       afc20018        sw      v0,24(s8)     ;
+  400970:       27c2001c        addiu   v0,s8,28      ;
+  400974:       00402021        move    a0,v0         ; hidden first arg (ptr to space for ret val)
+  400978:       8f99805c        lw      t9,-32676(gp) ; |
+  40097c:       0320f809        jalr    t9            ; | call f1()
+  400980:       00000000        nop                   ;
+  400984:       8fdc0010        lw      gp,16(s8)     ;
+  400988:       8fc20018        lw      v0,24(s8)     ;
+  40098c:       2442ff85        addiu   v0,v0,-123    ;
+  400990:       afc20018        sw      v0,24(s8)     ;
+  400994:       27c20020        addiu   v0,s8,32      ;
+  400998:       00402021        move    a0,v0         ; hidden first arg (ptr to space for ret val)
+  40099c:       8f998064        lw      t9,-32668(gp) ; |
+  4009a0:       0320f809        jalr    t9            ; | call f2()
+  4009a4:       00000000        nop                   ;
+  4009a8:       8fdc0010        lw      gp,16(s8)     ;
+  4009ac:       8fc20018        lw      v0,24(s8)     ;
+  4009b0:       2442fff4        addiu   v0,v0,-12     ;
+  4009b4:       afc20018        sw      v0,24(s8)     ;
+  4009b8:       03c0e821        move    sp,s8         ;
+  4009bc:       8fbf002c        lw      ra,44(sp)     ;
+  4009c0:       8fbe0028        lw      s8,40(sp)     ;
+  4009c4:       03e00008        jr      ra            ;
+  4009c8:       27bd0030        addiu   sp,sp,48      ;
+
+
+
 ; vim: ft=asm
 
diff -r fd9ba3a6d348 -r fc614cb865c6 doc/disas_examples/mips64.n64.disas
--- a/doc/disas_examples/mips64.n64.disas	Wed Mar 23 15:33:09 2022 +0100
+++ b/doc/disas_examples/mips64.n64.disas	Mon Apr 04 15:50:52 2022 +0200
@@ -3346,5 +3346,114 @@
 
 
 
+; ---------- C++ trivial and non-trivial aggrs as return values ---------->
+;
+; struct Trivial { int a; };
+; struct NonTrivial { int a; NonTrivial() : a(0) {} NonTrivial(const NonTrivial& rhs) : a(rhs.a) { } };
+; 
+; extern "C" {
+;     struct Trivial    f1() { return Trivial(); }
+; }
+; 
+;     struct NonTrivial f2() { return NonTrivial(); }
+; 
+; extern "C" {
+;     void f()
+;     {
+;         int a=1;
+;         a += 123;
+;         struct Trivial t = f1();
+;         a -= 123;
+;         struct NonTrivial n = f2();
+;         a -= 12;
+;     }
+; }
+
+
+
+; output from freebsd-12.0_r333647-malta_mips64ebhf w/ gcc 4.2.1
+
+0000000120000b60 <f1>:
+   120000b60:   67bdfff0        daddiu  sp,sp,-16
+   120000b64:   ffbe0008        sd      s8,8(sp)
+   120000b68:   ffbc0000        sd      gp,0(sp)
+   120000b6c:   03a0f02d        move    s8,sp
+   120000b70:   3c1c0002        lui     gp,0x2
+   120000b74:   0399e02d        daddu   gp,gp,t9
+   120000b78:   679c8320        daddiu  gp,gp,-31968
+   120000b7c:   0000102d        move    v0,zero
+   120000b80:   0002103c        dsll32  v0,v0,0x0
+   120000b84:   03c0e82d        move    sp,s8
+   120000b88:   dfbe0008        ld      s8,8(sp)
+   120000b8c:   dfbc0000        ld      gp,0(sp)
+   120000b90:   03e00008        jr      ra
+   120000b94:   67bd0010        daddiu  sp,sp,16
+
+0000000120000b98 <_Z2f2v>:
+   120000b98:   67bdffe0        daddiu  sp,sp,-32
+   120000b9c:   ffbf0018        sd      ra,24(sp)
+   120000ba0:   ffbe0010        sd      s8,16(sp)
+   120000ba4:   ffbc0008        sd      gp,8(sp)
+   120000ba8:   ffb00000        sd      s0,0(sp)
+   120000bac:   03a0f02d        move    s8,sp
+   120000bb0:   3c1c0002        lui     gp,0x2
+   120000bb4:   0399e02d        daddu   gp,gp,t9
+   120000bb8:   679c82e8        daddiu  gp,gp,-32024
+   120000bbc:   0080802d        move    s0,a0
+   120000bc0:   0200102d        move    v0,s0
+   120000bc4:   0040202d        move    a0,v0
+   120000bc8:   df9980e0        ld      t9,-32544(gp)
+   120000bcc:   0320f809        jalr    t9
+   120000bd0:   00000000        nop
+   120000bd4:   0200102d        move    v0,s0         ; ptr to retval space -> v0
+   120000bd8:   03c0e82d        move    sp,s8
+   120000bdc:   dfbf0018        ld      ra,24(sp)
+   120000be0:   dfbe0010        ld      s8,16(sp)
+   120000be4:   dfbc0008        ld      gp,8(sp)
+   120000be8:   dfb00000        ld      s0,0(sp)
+   120000bec:   03e00008        jr      ra
+   120000bf0:   67bd0020        daddiu  sp,sp,32
+   120000bf4:   00000000        nop
+
+0000000120000bf8 <f>:
+   120000bf8:   67bdffd0        daddiu  sp,sp,-48     ;
+   120000bfc:   ffbf0020        sd      ra,32(sp)     ;
+   120000c00:   ffbe0018        sd      s8,24(sp)     ;
+   120000c04:   ffbc0010        sd      gp,16(sp)     ;
+   120000c08:   03a0f02d        move    s8,sp         ;
+   120000c0c:   3c1c0002        lui     gp,0x2        ;
+   120000c10:   0399e02d        daddu   gp,gp,t9      ;
+   120000c14:   679c8288        daddiu  gp,gp,-32120  ;
+   120000c18:   24020001        li      v0,1          ;
+   120000c1c:   afc20000        sw      v0,0(s8)      ;
+   120000c20:   8fc20000        lw      v0,0(s8)      ;
+   120000c24:   2442007b        addiu   v0,v0,123     ;
+   120000c28:   afc20000        sw      v0,0(s8)      ;
+   120000c2c:   df9980d8        ld      t9,-32552(gp) ; | call f1()
+   120000c30:   0320f809        jalr    t9            ; |
+   120000c34:   00000000        nop                   ;
+   120000c38:   0002103f        dsra32  v0,v0,0x0     ; | returned via reg v0, as small and trivial
+   120000c3c:   afc20004        sw      v0,4(s8)      ; |
+   120000c40:   8fc20000        lw      v0,0(s8)      ;
+   120000c44:   2442ff85        addiu   v0,v0,-123    ;
+   120000c48:   afc20000        sw      v0,0(s8)      ;
+   120000c4c:   67c20008        daddiu  v0,s8,8       ;
+   120000c50:   0040202d        move    a0,v0         ; hidden first arg (ptr to space for ret val)
+   120000c54:   df9980e8        ld      t9,-32536(gp) ; | call f2()
+   120000c58:   0320f809        jalr    t9            ; |
+   120000c5c:   00000000        nop                   ;
+   120000c60:   8fc20000        lw      v0,0(s8)      ;
+   120000c64:   2442fff4        addiu   v0,v0,-12     ;
+   120000c68:   afc20000        sw      v0,0(s8)      ;
+   120000c6c:   03c0e82d        move    sp,s8         ;
+   120000c70:   dfbf0020        ld      ra,32(sp)     ;
+   120000c74:   dfbe0018        ld      s8,24(sp)     ;
+   120000c78:   dfbc0010        ld      gp,16(sp)     ;
+   120000c7c:   03e00008        jr      ra            ;
+   120000c80:   67bd0030        daddiu  sp,sp,48      ;
+   120000c84:   00000000        nop                   ;
+
+
+
 ; vim: ft=asm
 
diff -r fd9ba3a6d348 -r fc614cb865c6 doc/disas_examples/ppc.darwin.disas
--- a/doc/disas_examples/ppc.darwin.disas	Wed Mar 23 15:33:09 2022 +0100
+++ b/doc/disas_examples/ppc.darwin.disas	Mon Apr 04 15:50:52 2022 +0200
@@ -1035,5 +1035,89 @@
 
 
 
+; ---------- C++ trivial and non-trivial aggrs as return values ---------->
+;
+; struct Trivial { int a; };
+; struct NonTrivial { int a; NonTrivial() : a(0) {} NonTrivial(const NonTrivial& rhs) : a(rhs.a) { } };
+; 
+; extern "C" {
+;     struct Trivial    f1() { return Trivial(); }
+; }
+; 
+;     struct NonTrivial f2() { return NonTrivial(); }
+; 
+; extern "C" {
+;     void f()
+;     {
+;         int a=1;
+;         a += 123;
+;         struct Trivial t = f1();
+;         a -= 123;
+;         struct NonTrivial n = f2();
+;         a -= 12;
+;     }
+; }
+
+
+
+; output from darwin-8.0.1-ppc w/ gcc 3.3
+
+_f1:
+0000211c	stmw	r30,0xfff8(r1)
+00002120	stwu	r1,0xffd0(r1)
+00002124	or	r30,r1,r1
+00002128	or	r2,r3,r3
+0000212c	li	r0,0x0
+00002130	stw	r0,0x0(r2)
+00002134	or	r3,r2,r2
+00002138	lwz	r1,0x0(r1)
+0000213c	lmw	r30,0xfff8(r1)
+00002140	blr
+__Z2f2v:
+00002144	mfspr	r0,lr
+00002148	stmw	r29,0xfff4(r1)
+0000214c	stw	r0,0x8(r1)
+00002150	stwu	r1,0xffb0(r1)
+00002154	or	r30,r1,r1
+00002158	or	r29,r3,r3
+0000215c	or	r3,r29,r29
+00002160	bl	0x83cc
+00002164	or	r3,r29,r29
+00002168	lwz	r1,0x0(r1)
+0000216c	lwz	r0,0x8(r1)
+00002170	mtspr	lr,r0
+00002174	lmw	r29,0xfff4(r1)
+00002178	blr
+_f:
+0000217c	mfspr	r0,lr          ;
+00002180	stmw	r30,0xfff8(r1) ;
+00002184	stw	r0,0x8(r1)         ;
+00002188	stwu	r1,0xff90(r1)  ;
+0000218c	or	r30,r1,r1          ;
+00002190	li	r0,0x1             ;
+00002194	stw	r0,0x40(r30)       ;
+00002198	lwz	r2,0x40(r30)       ;
+0000219c	addi	r0,r2,0x7b     ;
+000021a0	stw	r0,0x40(r30)       ;
+000021a4	addi	r0,r30,0x44    ; |
+000021a8	or	r3,r0,r0           ; | ptr to space for retval is passed as hidden first arg
+000021ac	bl	0x211c             ; call f1()
+000021b0	lwz	r2,0x40(r30)       ;
+000021b4	addi	r0,r2,0xff85   ;
+000021b8	stw	r0,0x40(r30)       ;
+000021bc	addi	r0,r30,0x50    ; |
+000021c0	or	r3,r0,r0           ; | ptr to space for retval is passed as hidden first arg
+000021c4	bl	0x2144             ; call f2()
+000021c8	lwz	r2,0x40(r30)       ;
+000021cc	addi	r0,r2,0xfff4   ;
+000021d0	stw	r0,0x40(r30)       ;
+000021d4	lwz	r1,0x0(r1)         ;
+000021d8	lwz	r0,0x8(r1)         ;
+000021dc	mtspr	lr,r0          ;
+000021e0	lmw	r30,0xfff8(r1)     ;
+000021e4	blr                    ;
+
+
+
 ; vim: ft=asm
 
diff -r fd9ba3a6d348 -r fc614cb865c6 doc/disas_examples/ppc.sysv.disas
--- a/doc/disas_examples/ppc.sysv.disas	Wed Mar 23 15:33:09 2022 +0100
+++ b/doc/disas_examples/ppc.sysv.disas	Mon Apr 04 15:50:52 2022 +0200
@@ -1038,79 +1038,151 @@
 
 
 
-; output from debian-4.1.1-21-ppc w/ gcc 4.1.2
+; output from debian-4.1.1-21-ppc w/ gcc 4.1.2 (demonstrates LSB specifics about aggr return values)
 
 00000000 <f0>:
-   0:   94 21 ff e0     stwu    r1,-32(r1)
-   4:   93 e1 00 1c     stw     r31,28(r1)
-   8:   7c 3f 0b 78     mr      r31,r1
-   c:   7c 69 1b 78     mr      r9,r3
-  10:   38 00 ff 84     li      r0,-124
-  14:   98 1f 00 08     stb     r0,8(r31)
-  18:   88 1f 00 08     lbz     r0,8(r31)
-  1c:   98 09 00 00     stb     r0,0(r9)
-  20:   7d 23 4b 78     mr      r3,r9
-  24:   81 61 00 00     lwz     r11,0(r1)
-  28:   83 eb ff fc     lwz     r31,-4(r11)
-  2c:   7d 61 5b 78     mr      r1,r11
-  30:   4e 80 00 20     blr
+   0:   94 21 ff e0     stwu    r1,-32(r1)     ;
+   4:   93 e1 00 1c     stw     r31,28(r1)     ;
+   8:   7c 3f 0b 78     mr      r31,r1         ;
+   c:   7c 69 1b 78     mr      r9,r3          ;
+  10:   38 00 ff 84     li      r0,-124        ;
+  14:   98 1f 00 08     stb     r0,8(r31)      ;
+  18:   88 1f 00 08     lbz     r0,8(r31)      ;
+  1c:   98 09 00 00     stb     r0,0(r9)       ;
+  20:   7d 23 4b 78     mr      r3,r9          ;
+  24:   81 61 00 00     lwz     r11,0(r1)      ;
+  28:   83 eb ff fc     lwz     r31,-4(r11)    ;
+  2c:   7d 61 5b 78     mr      r1,r11         ;
+  30:   4e 80 00 20     blr                    ;
 
 00000034 <f1>:
-  34:   94 21 ff e0     stwu    r1,-32(r1)
-  38:   93 e1 00 1c     stw     r31,28(r1)
-  3c:   7c 3f 0b 78     mr      r31,r1
-  40:   7c 6b 1b 78     mr      r11,r3
-  44:   3d 20 00 00     lis     r9,0
-  48:   39 29 00 00     addi    r9,r9,0
-  4c:   80 09 00 00     lwz     r0,0(r9)
-  50:   81 49 00 04     lwz     r10,4(r9)
-  54:   81 09 00 08     lwz     r8,8(r9)
-  58:   81 29 00 0c     lwz     r9,12(r9)
-  5c:   90 1f 00 08     stw     r0,8(r31)
-  60:   91 5f 00 0c     stw     r10,12(r31)
-  64:   91 1f 00 10     stw     r8,16(r31)
-  68:   91 3f 00 14     stw     r9,20(r31)
-  6c:   80 1f 00 08     lwz     r0,8(r31)
-  70:   81 3f 00 0c     lwz     r9,12(r31)
-  74:   81 5f 00 10     lwz     r10,16(r31)
-  78:   81 1f 00 14     lwz     r8,20(r31)
-  7c:   90 0b 00 00     stw     r0,0(r11)
-  80:   91 2b 00 04     stw     r9,4(r11)
-  84:   91 4b 00 08     stw     r10,8(r11)
-  88:   91 0b 00 0c     stw     r8,12(r11)
-  8c:   7d 63 5b 78     mr      r3,r11
-  90:   81 61 00 00     lwz     r11,0(r1)
-  94:   83 eb ff fc     lwz     r31,-4(r11)
-  98:   7d 61 5b 78     mr      r1,r11
-  9c:   4e 80 00 20     blr
+  34:   94 21 ff e0     stwu    r1,-32(r1)     ;
+  38:   93 e1 00 1c     stw     r31,28(r1)     ;
+  3c:   7c 3f 0b 78     mr      r31,r1         ;
+  40:   7c 6b 1b 78     mr      r11,r3         ;
+  44:   3d 20 00 00     lis     r9,0           ;
+  48:   39 29 00 00     addi    r9,r9,0        ;
+  4c:   80 09 00 00     lwz     r0,0(r9)       ;
+  50:   81 49 00 04     lwz     r10,4(r9)      ;
+  54:   81 09 00 08     lwz     r8,8(r9)       ;
+  58:   81 29 00 0c     lwz     r9,12(r9)      ;
+  5c:   90 1f 00 08     stw     r0,8(r31)      ;
+  60:   91 5f 00 0c     stw     r10,12(r31)    ;
+  64:   91 1f 00 10     stw     r8,16(r31)     ;
+  68:   91 3f 00 14     stw     r9,20(r31)     ;
+  6c:   80 1f 00 08     lwz     r0,8(r31)      ;
+  70:   81 3f 00 0c     lwz     r9,12(r31)     ;
+  74:   81 5f 00 10     lwz     r10,16(r31)    ;
+  78:   81 1f 00 14     lwz     r8,20(r31)     ;
+  7c:   90 0b 00 00     stw     r0,0(r11)      ;
+  80:   91 2b 00 04     stw     r9,4(r11)      ;
+  84:   91 4b 00 08     stw     r10,8(r11)     ;
+  88:   91 0b 00 0c     stw     r8,12(r11)     ;
+  8c:   7d 63 5b 78     mr      r3,r11         ;
+  90:   81 61 00 00     lwz     r11,0(r1)      ;
+  94:   83 eb ff fc     lwz     r31,-4(r11)    ;
+  98:   7d 61 5b 78     mr      r1,r11         ;
+  9c:   4e 80 00 20     blr                    ;
 
 000000a0 <main>:
-  a0:   94 21 ff c0     stwu    r1,-64(r1)
-  a4:   7c 08 02 a6     mflr    r0
-  a8:   93 e1 00 3c     stw     r31,60(r1)
-  ac:   90 01 00 44     stw     r0,68(r1)
-  b0:   7c 3f 0b 78     mr      r31,r1
-  b4:   38 1f 00 28     addi    r0,r31,40
-  b8:   7c 03 03 78     mr      r3,r0
-  bc:   4c c6 31 82     crclr   4*cr1+eq
-  c0:   48 00 00 01     bl      c0 <main+0x20>
-  c4:   88 1f 00 28     lbz     r0,40(r31)
-  c8:   98 1f 00 08     stb     r0,8(r31)
-  cc:   38 1f 00 10     addi    r0,r31,16
-  d0:   7c 03 03 78     mr      r3,r0
-  d4:   4c c6 31 82     crclr   4*cr1+eq
-  d8:   48 00 00 01     bl      d8 <main+0x38>
-  dc:   81 3f 00 18     lwz     r9,24(r31)
-  e0:   88 1f 00 08     lbz     r0,8(r31)
-  e4:   54 00 06 3e     clrlwi  r0,r0,24
-  e8:   7c 09 02 14     add     r0,r9,r0
-  ec:   7c 03 03 78     mr      r3,r0
-  f0:   81 61 00 00     lwz     r11,0(r1)
-  f4:   80 0b 00 04     lwz     r0,4(r11)
-  f8:   7c 08 03 a6     mtlr    r0
-  fc:   83 eb ff fc     lwz     r31,-4(r11)
- 100:   7d 61 5b 78     mr      r1,r11
- 104:   4e 80 00 20     blr
+  a0:   94 21 ff c0     stwu    r1,-64(r1)     ; |           open frame and store sp at top of stack
+  a4:   7c 08 02 a6     mflr    r0             ; |           lr -> gpr0
+  a8:   93 e1 00 3c     stw     r31,60(r1)     ; | prolog    store gpr31
+  ac:   90 01 00 44     stw     r0,68(r1)      ; |           store lr
+  b0:   7c 3f 0b 78     mr      r31,r1         ; /           sp -> gpr31, latter used for some fixed addressing below
+  b4:   38 1f 00 28     addi    r0,r31,40      ; \
+  b8:   7c 03 03 78     mr      r3,r0          ; | space to retval -> gpr3 (hidden arg)
+  bc:   4c c6 31 82     crclr   4*cr1+eq       ; :
+  c0:   48 00 00 01     bl      c0 <main+0x20> ; call f0()
+  c4:   88 1f 00 28     lbz     r0,40(r31)     ;
+  c8:   98 1f 00 08     stb     r0,8(r31)      ;
+  cc:   38 1f 00 10     addi    r0,r31,16      ; |
+  d0:   7c 03 03 78     mr      r3,r0          ; | space to retval -> gpr3 (hidden arg)
+  d4:   4c c6 31 82     crclr   4*cr1+eq       ; :
+  d8:   48 00 00 01     bl      d8 <main+0x38> ; call f1()
+  dc:   81 3f 00 18     lwz     r9,24(r31)     ;
+  e0:   88 1f 00 08     lbz     r0,8(r31)      ;
+  e4:   54 00 06 3e     clrlwi  r0,r0,24       ;
+  e8:   7c 09 02 14     add     r0,r9,r0       ;
+  ec:   7c 03 03 78     mr      r3,r0          ;
+  f0:   81 61 00 00     lwz     r11,0(r1)      ; |
+  f4:   80 0b 00 04     lwz     r0,4(r11)      ; |
+  f8:   7c 08 03 a6     mtlr    r0             ; |
+  fc:   83 eb ff fc     lwz     r31,-4(r11)    ; | epilog
+ 100:   7d 61 5b 78     mr      r1,r11         ; |
+ 104:   4e 80 00 20     blr                    ; |
+
+
+
+; output from netbsd-4.0.1-macppc w/ gcc 4.1.2 (demonstrates default (non-LSB) aggr return values)
+
+018007a0 <f0>:
+ 18007a0:       94 21 ff d0     stwu    r1,-48(r1)
+ 18007a4:       93 e1 00 2c     stw     r31,44(r1)
+ 18007a8:       7c 3f 0b 78     mr      r31,r1
+ 18007ac:       38 00 ff 84     li      r0,-124
+ 18007b0:       98 1f 00 08     stb     r0,8(r31)
+ 18007b4:       88 1f 00 08     lbz     r0,8(r31)
+ 18007b8:       7c 03 03 78     mr      r3,r0
+ 18007bc:       81 61 00 00     lwz     r11,0(r1)
+ 18007c0:       83 eb ff fc     lwz     r31,-4(r11)
+ 18007c4:       7d 61 5b 78     mr      r1,r11
+ 18007c8:       4e 80 00 20     blr
+
+018007cc <f1>:
+ 18007cc:       94 21 ff d0     stwu    r1,-48(r1)
+ 18007d0:       93 e1 00 2c     stw     r31,44(r1)
+ 18007d4:       7c 3f 0b 78     mr      r31,r1
+ 18007d8:       7c 6b 1b 78     mr      r11,r3
+ 18007dc:       3d 20 01 80     lis     r9,384
+ 18007e0:       39 29 09 80     addi    r9,r9,2432
+ 18007e4:       80 09 00 00     lwz     r0,0(r9)
+ 18007e8:       81 49 00 04     lwz     r10,4(r9)
+ 18007ec:       81 09 00 08     lwz     r8,8(r9)
+ 18007f0:       81 29 00 0c     lwz     r9,12(r9)
+ 18007f4:       90 1f 00 08     stw     r0,8(r31)
+ 18007f8:       91 5f 00 0c     stw     r10,12(r31)
+ 18007fc:       91 1f 00 10     stw     r8,16(r31)
+ 1800800:       91 3f 00 14     stw     r9,20(r31)
+ 1800804:       80 1f 00 08     lwz     r0,8(r31)
+ 1800808:       81 3f 00 0c     lwz     r9,12(r31)
+ 180080c:       81 5f 00 10     lwz     r10,16(r31)
+ 1800810:       81 1f 00 14     lwz     r8,20(r31)
+ 1800814:       90 0b 00 00     stw     r0,0(r11)
+ 1800818:       91 2b 00 04     stw     r9,4(r11)
+ 180081c:       91 4b 00 08     stw     r10,8(r11)
+ 1800820:       91 0b 00 0c     stw     r8,12(r11)
+ 1800824:       7d 63 5b 78     mr      r3,r11
+ 1800828:       81 61 00 00     lwz     r11,0(r1)
+ 180082c:       83 eb ff fc     lwz     r31,-4(r11)
+ 1800830:       7d 61 5b 78     mr      r1,r11
+ 1800834:       4e 80 00 20     blr
+
+01800838 <main>:
+ 1800838:       94 21 ff c0     stwu    r1,-64(r1)   ; |           open frame and store sp at top of stack
+ 180083c:       7c 08 02 a6     mflr    r0           ; |           lr -> gpr0
+ 1800840:       93 e1 00 3c     stw     r31,60(r1)   ; | prolog    store gpr31
+ 1800844:       90 01 00 44     stw     r0,68(r1)    ; |           store lr
+ 1800848:       7c 3f 0b 78     mr      r31,r1       ; |           sp -> gpr31, latter used for some fixed addressing below
+ 180084c:       4b ff ff 55     bl      18007a0 <f0> ; call f0()
+ 1800850:       7c 60 1b 78     mr      r0,r3        ; |           NOTE: ret val is returned via gpr3
+ 1800854:       98 1f 00 08     stb     r0,8(r31)    ; / ret val
+ 1800858:       38 1f 00 10     addi    r0,r31,16    ; \
+ 180085c:       7c 03 03 78     mr      r3,r0        ; | space to retval -> gpr3 (hidden arg)
+ 1800860:       4c c6 31 82     crclr   4*cr1+eq     ; :
+ 1800864:       4b ff ff 69     bl      18007cc <f1> ; call f0()
+ 1800868:       81 3f 00 18     lwz     r9,24(r31)   ;
+ 180086c:       88 1f 00 08     lbz     r0,8(r31)    ;
+ 1800870:       54 00 06 3e     clrlwi  r0,r0,24     ;
+ 1800874:       7c 09 02 14     add     r0,r9,r0     ;
+ 1800878:       7c 03 03 78     mr      r3,r0        ;
+ 180087c:       81 61 00 00     lwz     r11,0(r1)    ;
+ 1800880:       80 0b 00 04     lwz     r0,4(r11)    ;
+ 1800884:       7c 08 03 a6     mtlr    r0           ;
+ 1800888:       83 eb ff fc     lwz     r31,-4(r11)  ;
+ 180088c:       7d 61 5b 78     mr      r1,r11       ;
+ 1800890:       4e 80 00 20     blr                  ;
+ 1800894:       00 01 81 48     .long 0x18148        ;
 
 
 
@@ -1204,5 +1276,164 @@
 
 
 
+; ---------- C++ trivial and non-trivial aggrs as return values ---------->
+;
+; struct Trivial { int a; };
+; struct NonTrivial { int a; NonTrivial() : a(0) {} NonTrivial(const NonTrivial& rhs) : a(rhs.a) { } };
+; 
+; extern "C" {
+;     struct Trivial    f1() { return Trivial(); }
+; }
+; 
+;     struct NonTrivial f2() { return NonTrivial(); }
+; 
+; extern "C" {
+;     void f()
+;     {
+;         int a=1;
+;         a += 123;
+;         struct Trivial t = f1();
+;         a -= 123;
+;         struct NonTrivial n = f2();
+;         a -= 12;
+;     }
+; }
+
+
+
+; output from debian-4.1.1-21-ppc w/ gcc 4.1.2 (demonstrates LSB specifics about aggr return values)
+
+100003e0 <f1>:
+100003e0:       94 21 ff f0     stwu    r1,-16(r1)
+100003e4:       93 e1 00 0c     stw     r31,12(r1)
+100003e8:       7c 3f 0b 78     mr      r31,r1
+100003ec:       7c 69 1b 78     mr      r9,r3
+100003f0:       38 00 00 00     li      r0,0
+100003f4:       90 09 00 00     stw     r0,0(r9)
+100003f8:       7d 23 4b 78     mr      r3,r9
+100003fc:       81 61 00 00     lwz     r11,0(r1)
+10000400:       83 eb ff fc     lwz     r31,-4(r11)
+10000404:       7d 61 5b 78     mr      r1,r11
+10000408:       4e 80 00 20     blr
+
+1000040c <_Z2f2v>:
+1000040c:       94 21 ff e0     stwu    r1,-32(r1)
+10000410:       7c 08 02 a6     mflr    r0
+10000414:       93 a1 00 14     stw     r29,20(r1)
+10000418:       93 e1 00 1c     stw     r31,28(r1)
+1000041c:       90 01 00 24     stw     r0,36(r1)
+10000420:       7c 3f 0b 78     mr      r31,r1
+10000424:       7c 7d 1b 78     mr      r29,r3
+10000428:       7f a0 eb 78     mr      r0,r29
+1000042c:       7c 03 03 78     mr      r3,r0
+10000430:       48 00 00 d5     bl      10000504 <_ZN10NonTrivialC1Ev>
+10000434:       7f a3 eb 78     mr      r3,r29
+10000438:       81 61 00 00     lwz     r11,0(r1)
+1000043c:       80 0b 00 04     lwz     r0,4(r11)
+10000440:       7c 08 03 a6     mtlr    r0
+10000444:       83 ab ff f4     lwz     r29,-12(r11)
+10000448:       83 eb ff fc     lwz     r31,-4(r11)
+1000044c:       7d 61 5b 78     mr      r1,r11
+10000450:       4e 80 00 20     blr
+
+10000454 <f>:
+10000454:       94 21 ff d0     stwu    r1,-48(r1)        ; |           open frame and store sp at top of stack
+10000458:       7c 08 02 a6     mflr    r0                ; |           lr -> gpr0
+1000045c:       93 e1 00 2c     stw     r31,44(r1)        ; | prolog    store gpr31
+10000460:       90 01 00 34     stw     r0,52(r1)         ; |           store lr
+10000464:       7c 3f 0b 78     mr      r31,r1            ; /           sp -> gpr31, latter used for some fixed addressing below
+10000468:       38 00 00 01     li      r0,1              ; \ a = 1
+1000046c:       90 1f 00 08     stw     r0,8(r31)         ; /
+10000470:       81 3f 00 08     lwz     r9,8(r31)         ; \
+10000474:       38 09 00 7b     addi    r0,r9,123         ; | a += 123
+10000478:       90 1f 00 08     stw     r0,8(r31)         ; /
+1000047c:       38 1f 00 18     addi    r0,r31,24         ; \
+10000480:       7c 03 03 78     mr      r3,r0             ; | space to retval -> gpr3 (hidden arg); NOTE: this follows the LSB definition
+10000484:       4b ff ff 5d     bl      100003e0 <f1>     ; call f1()
+10000488:       80 1f 00 18     lwz     r0,24(r31)        ;
+1000048c:       90 1f 00 0c     stw     r0,12(r31)        ;
+10000490:       81 3f 00 08     lwz     r9,8(r31)         ;
+10000494:       38 09 ff 85     addi    r0,r9,-123        ;
+10000498:       90 1f 00 08     stw     r0,8(r31)         ;
+1000049c:       38 1f 00 10     addi    r0,r31,16         ; |
+100004a0:       7c 03 03 78     mr      r3,r0             ; | hidden first arg (ptr to space to hold NonTrivial retval), via ptr as non-trivial
+100004a4:       4b ff ff 69     bl      1000040c <_Z2f2v> ; call f2()
+100004a8:       81 3f 00 08     lwz     r9,8(r31)         ;
+100004ac:       38 09 ff f4     addi    r0,r9,-12         ;
+100004b0:       90 1f 00 08     stw     r0,8(r31)         ;
+100004b4:       81 61 00 00     lwz     r11,0(r1)         ;
+100004b8:       80 0b 00 04     lwz     r0,4(r11)         ;
+100004bc:       7c 08 03 a6     mtlr    r0                ;
+100004c0:       83 eb ff fc     lwz     r31,-4(r11)       ;
+100004c4:       7d 61 5b 78     mr      r1,r11            ;
+100004c8:       4e 80 00 20     blr                       ;
+
+
+
+; output from netbsd-4.0.1-macppc w/ gcc 4.1.2 (demonstrates default (non-LSB) aggr return values)
+
+01800840 <f1>:
+ 1800840:       94 21 ff e0     stwu    r1,-32(r1)
+ 1800844:       93 e1 00 1c     stw     r31,28(r1)
+ 1800848:       7c 3f 0b 78     mr      r31,r1
+ 180084c:       38 00 00 00     li      r0,0
+ 1800850:       7c 03 03 78     mr      r3,r0
+ 1800854:       81 61 00 00     lwz     r11,0(r1)
+ 1800858:       83 eb ff fc     lwz     r31,-4(r11)
+ 180085c:       7d 61 5b 78     mr      r1,r11
+ 1800860:       4e 80 00 20     blr
+
+01800864 <_Z2f2v>:
+ 1800864:       94 21 ff e0     stwu    r1,-32(r1)
+ 1800868:       7c 08 02 a6     mflr    r0
+ 180086c:       93 a1 00 14     stw     r29,20(r1)
+ 1800870:       93 e1 00 1c     stw     r31,28(r1)
+ 1800874:       90 01 00 24     stw     r0,36(r1)
+ 1800878:       7c 3f 0b 78     mr      r31,r1
+ 180087c:       7c 7d 1b 78     mr      r29,r3
+ 1800880:       7f a0 eb 78     mr      r0,r29
+ 1800884:       7c 03 03 78     mr      r3,r0
+ 1800888:       48 00 00 cd     bl      1800954 <_ZN10NonTrivialC1Ev>
+ 180088c:       7f a3 eb 78     mr      r3,r29
+ 1800890:       81 61 00 00     lwz     r11,0(r1)
+ 1800894:       80 0b 00 04     lwz     r0,4(r11)
+ 1800898:       7c 08 03 a6     mtlr    r0
+ 180089c:       83 ab ff f4     lwz     r29,-12(r11)
+ 18008a0:       83 eb ff fc     lwz     r31,-4(r11)
+ 18008a4:       7d 61 5b 78     mr      r1,r11
+ 18008a8:       4e 80 00 20     blr
+
+018008ac <f>:
+ 18008ac:       94 21 ff d0     stwu    r1,-48(r1)       ;
+ 18008b0:       7c 08 02 a6     mflr    r0               ;
+ 18008b4:       93 e1 00 2c     stw     r31,44(r1)       ;
+ 18008b8:       90 01 00 34     stw     r0,52(r1)        ;
+ 18008bc:       7c 3f 0b 78     mr      r31,r1           ;
+ 18008c0:       38 00 00 01     li      r0,1             ;
+ 18008c4:       90 1f 00 08     stw     r0,8(r31)        ;
+ 18008c8:       81 3f 00 08     lwz     r9,8(r31)        ;
+ 18008cc:       38 09 00 7b     addi    r0,r9,123        ;
+ 18008d0:       90 1f 00 08     stw     r0,8(r31)        ; NOTE: no hidden param in contrast to LSB version of callconv
+ 18008d4:       4b ff ff 6d     bl      1800840 <f1>     ; call f1()
+ 18008d8:       7c 60 1b 78     mr      r0,r3            ; |           NOTE: ret val is returned via gpr3
+ 18008dc:       90 1f 00 0c     stw     r0,12(r31)       ; / ret val
+ 18008e0:       81 3f 00 08     lwz     r9,8(r31)        ;
+ 18008e4:       38 09 ff 85     addi    r0,r9,-123       ;
+ 18008e8:       90 1f 00 08     stw     r0,8(r31)        ;
+ 18008ec:       38 1f 00 10     addi    r0,r31,16        ; |
+ 18008f0:       7c 03 03 78     mr      r3,r0            ; | hidden first arg (ptr to space to hold NonTrivial retval), via ptr as non-trivial
+ 18008f4:       4b ff ff 71     bl      1800864 <_Z2f2v> ; call f2()
+ 18008f8:       81 3f 00 08     lwz     r9,8(r31)        ;
+ 18008fc:       38 09 ff f4     addi    r0,r9,-12        ;
+ 1800900:       90 1f 00 08     stw     r0,8(r31)        ;
+ 1800904:       81 61 00 00     lwz     r11,0(r1)        ;
+ 1800908:       80 0b 00 04     lwz     r0,4(r11)        ;
+ 180090c:       7c 08 03 a6     mtlr    r0               ;
+ 1800910:       83 eb ff fc     lwz     r31,-4(r11)      ;
+ 1800914:       7d 61 5b 78     mr      r1,r11           ;
+ 1800918:       4e 80 00 20     blr                      ;
+
+
+
 ; vim: ft=asm68k
 
diff -r fd9ba3a6d348 -r fc614cb865c6 doc/disas_examples/ppc64.elfabi.disas
--- a/doc/disas_examples/ppc64.elfabi.disas	Wed Mar 23 15:33:09 2022 +0100
+++ b/doc/disas_examples/ppc64.elfabi.disas	Mon Apr 04 15:50:52 2022 +0200
@@ -830,5 +830,99 @@
 
 
 
+; ---------- C++ trivial and non-trivial aggrs as return values ---------->
+;
+; struct Trivial { int a; };
+; struct NonTrivial { int a; NonTrivial() : a(0) {} NonTrivial(const NonTrivial& rhs) : a(rhs.a) { } };
+; 
+; extern "C" {
+;     struct Trivial    f1() { return Trivial(); }
+; }
+; 
+;     struct NonTrivial f2() { return NonTrivial(); }
+; 
+; extern "C" {
+;     void f()
+;     {
+;         int a=1;
+;         a += 123;
+;         struct Trivial t = f1();
+;         a -= 123;
+;         struct NonTrivial n = f2();
+;         a -= 12;
+;     }
+; }
+
+
+
+; output from freebsd-11.0-ppc64 w/ gcc 4.2.1
+
+0000000010000828 <.f1>:
+    10000828:   fb e1 ff f8     std     r31,-8(r1)                         ; |
+    1000082c:   f8 21 ff c1     stdu    r1,-64(r1)                         ; | prolog
+    10000830:   7c 3f 0b 78     mr      r31,r1                             ; use gpr31 as sort of frame pointer, below
+    10000834:   90 7f 00 70     stw     r3,112(r31)                        ;
+    10000838:   e8 21 00 00     ld      r1,0(r1)                           ; |
+    1000083c:   eb e1 ff f8     ld      r31,-8(r1)                         ; | epilog
+    10000840:   4e 80 00 20     blr                                        ; |
+    10000844:   00 00 00 00     .long 0x0                                  ; data
+    10000848:   00 09 00 00     .long 0x90000                              ; data
+    1000084c:   80 01 00 01     lwz     r0,1(r1)                           ; unsure@@@. data?
+
+0000000010000850 <.f2>:
+    10000850:   fb e1 ff f8     std     r31,-8(r1)                         ; |
+    10000854:   f8 21 ff c1     stdu    r1,-64(r1)                         ; | prolog
+    10000858:   7c 3f 0b 78     mr      r31,r1                             ; use gpr31 as sort of frame pointer, below
+    1000085c:   f8 7f 00 70     std     r3,112(r31)                        ;
+    10000860:   e8 21 00 00     ld      r1,0(r1)                           ; |
+    10000864:   eb e1 ff f8     ld      r31,-8(r1)                         ; | epilog
+    10000868:   4e 80 00 20     blr                                        ; |
+    1000086c:   00 00 00 00     .long 0x0                                  ; data
+    10000870:   00 09 00 00     .long 0x90000                              ; data
+    10000874:   80 01 00 01     lwz     r0,1(r1)                           ; unsure@@@. data?
+
+0000000010000878 <.f>:
+    10000878:   7c 08 02 a6     mflr    r0                                 ;
+    1000087c:   fb e1 ff f8     std     r31,-8(r1)                         ;
+    10000880:   f8 01 00 10     std     r0,16(r1)                          ;
+    10000884:   f8 21 ff 71     stdu    r1,-144(r1)                        ;
+    10000888:   7c 3f 0b 78     mr      r31,r1                             ;
+    1000088c:   38 1f 00 7c     addi    r0,r31,124                         ;
+    10000890:   7c 03 03 78     mr      r3,r0                              ;
+    10000894:   48 00 00 ad     bl      10000940 <._ZN10NonTrivialC1Ev>    ; ctor
+    10000898:   4f ff fb 82     crmove  4*cr7+so,4*cr7+so                  ;
+    1000089c:   38 00 00 01     li      r0,1                               ; | a = 1
+    100008a0:   90 1f 00 70     stw     r0,112(r31)                        ; /
+    100008a4:   81 3f 00 70     lwz     r9,112(r31)                        ; \
+    100008a8:   38 09 00 7b     addi    r0,r9,123                          ; | a += 123
+    100008ac:   90 1f 00 70     stw     r0,112(r31)                        ; |
+    100008b0:   80 7f 00 78     lwz     r3,120(r31)                        ; hidden first arg (ptr to space to hold Trivial retval) @@@ fetches addr from 120(r31)?
+    100008b4:   4b ff ff 75     bl      10000828 <.f1>                     ; call f1()
+    100008b8:   81 3f 00 70     lwz     r9,112(r31)                        ; |
+    100008bc:   38 09 ff 85     addi    r0,r9,-123                         ; | a -= 123
+    100008c0:   90 1f 00 70     stw     r0,112(r31)                        ; |
+    100008c4:   38 1f 00 74     addi    r0,r31,116                         ;
+    100008c8:   39 3f 00 7c     addi    r9,r31,124                         ;
+    100008cc:   7c 03 03 78     mr      r3,r0                              ;
+    100008d0:   7d 24 4b 78     mr      r4,r9                              ;
+    100008d4:   48 00 00 a1     bl      10000974 <._ZN10NonTrivialC1ERKS_> ; copy ctor
+    100008d8:   4f ff fb 82     crmove  4*cr7+so,4*cr7+so                  ;
+    100008dc:   38 1f 00 74     addi    r0,r31,116                         ;
+    100008e0:   7c 03 03 78     mr      r3,r0                              ; hidden first arg (ptr to space to hold NonTrivial retval)
+    100008e4:   4b ff ff 6d     bl      10000850 <.f2>                     ; call f2()
+    100008e8:   81 3f 00 70     lwz     r9,112(r31)                        ; |
+    100008ec:   38 09 ff f4     addi    r0,r9,-12                          ; | a -= 12
+    100008f0:   90 1f 00 70     stw     r0,112(r31)                        ; |
+    100008f4:   e8 21 00 00     ld      r1,0(r1)                           ;
+    100008f8:   e8 01 00 10     ld      r0,16(r1)                          ;
+    100008fc:   7c 08 03 a6     mtlr    r0                                 ;
+    10000900:   eb e1 ff f8     ld      r31,-8(r1)                         ;
+    10000904:   4e 80 00 20     blr                                        ;
+    10000908:   00 00 00 00     .long 0x0                                  ;
+    1000090c:   00 09 00 01     .long 0x90001                              ;
+    10000910:   80 01 00 01     lwz     r0,1(r1)                           ;
+
+
+
 ; vim: ft=asm
 
diff -r fd9ba3a6d348 -r fc614cb865c6 doc/disas_examples/sparc.sparc.disas
--- a/doc/disas_examples/sparc.sparc.disas	Wed Mar 23 15:33:09 2022 +0100
+++ b/doc/disas_examples/sparc.sparc.disas	Mon Apr 04 15:50:52 2022 +0200
@@ -1081,5 +1081,84 @@
 
 
 
+; ---------- C++ trivial and non-trivial aggrs as return values ---------->
+;
+; struct Trivial { int a; };
+; struct NonTrivial { int a; NonTrivial() : a(0) {} NonTrivial(const NonTrivial& rhs) : a(rhs.a) { } };
+; 
+; extern "C" {
+;     struct Trivial    f1() { return Trivial(); }
+; }
+; 
+;     struct NonTrivial f2() { return NonTrivial(); }
+; 
+; extern "C" {
+;     void f()
+;     {
+;         int a=1;
+;         a += 123;
+;         struct Trivial t = f1();
+;         a -= 123;
+;         struct NonTrivial n = f2();
+;         a -= 12;
+;     }
+; }
+
+
+
+; output from debian-4.0_r3-sparc w/ gcc 4.1.2
+
+00010504 <f1>:
+   10504:       9d e3 bf 98     save  %sp, -104, %sp
+   10508:       c2 07 a0 40     ld  [ %fp + 0x40 ], %g1
+   1050c:       c0 20 40 00     clr  [ %g1 ]
+   10510:       b0 10 00 01     mov  %g1, %i0
+   10514:       81 e8 00 00     restore
+   10518:       81 c3 e0 0c     jmp  %o7 + 0xc
+   1051c:       01 00 00 00     nop
+
+00010520 <_Z2f2v>:
+   10520:       9d e3 bf 98     save  %sp, -104, %sp
+   10524:       e0 07 a0 40     ld  [ %fp + 0x40 ], %l0
+   10528:       82 10 00 10     mov  %l0, %g1
+   1052c:       90 10 00 01     mov  %g1, %o0
+   10530:       40 00 00 29     call  105d4 <_ZN10NonTrivialC1Ev>
+   10534:       01 00 00 00     nop
+   10538:       b0 10 00 10     mov  %l0, %i0
+   1053c:       81 e8 00 00     restore
+   10540:       81 c3 e0 0c     jmp  %o7 + 0xc
+   10544:       01 00 00 00     nop
+
+00010548 <f>:
+   10548:       9d e3 bf 80     save  %sp, -128, %sp    ;
+   1054c:       82 10 20 01     mov  1, %g1             ;
+   10550:       c2 27 bf f4     st  %g1, [ %fp + -12 ]  ;
+   10554:       c2 07 bf f4     ld  [ %fp + -12 ], %g1  ;
+   10558:       82 00 60 7b     add  %g1, 0x7b, %g1     ;
+   1055c:       c2 27 bf f4     st  %g1, [ %fp + -12 ]  ;
+   10560:       82 07 bf e4     add  %fp, -28, %g1      ;
+   10564:       c2 23 a0 40     st  %g1, [ %sp + 0x40 ] ; ptr to retval space as hidden stack param (sp+64)
+   10568:       7f ff ff e7     call  10504 <f1>        ; call f1()
+   1056c:       01 00 00 00     nop                     ;
+   10570:       00 00 00 04     unimp  0x4              ;
+   10574:       c2 07 bf e4     ld  [ %fp + -28 ], %g1  ;
+   10578:       c2 27 bf f0     st  %g1, [ %fp + -16 ]  ;
+   1057c:       c2 07 bf f4     ld  [ %fp + -12 ], %g1  ;
+   10580:       82 00 7f 85     add  %g1, -123, %g1     ;
+   10584:       c2 27 bf f4     st  %g1, [ %fp + -12 ]  ;
+   10588:       82 07 bf ec     add  %fp, -20, %g1      ;
+   1058c:       c2 23 a0 40     st  %g1, [ %sp + 0x40 ] ; ptr to retval space as hidden stack param (sp+64)
+   10590:       7f ff ff e4     call  10520 <_Z2f2v>    ; call f2()
+   10594:       01 00 00 00     nop                     ;
+   10598:       00 00 00 04     unimp  0x4              ;
+   1059c:       c2 07 bf f4     ld  [ %fp + -12 ], %g1  ;
+   105a0:       82 00 7f f4     add  %g1, -12, %g1      ;
+   105a4:       c2 27 bf f4     st  %g1, [ %fp + -12 ]  ;
+   105a8:       81 e8 00 00     restore                 ;
+   105ac:       81 c3 e0 08     retl                    ;
+   105b0:       01 00 00 00     nop                     ;
+
+
+
 ; vim: ft=asm
 
diff -r fd9ba3a6d348 -r fc614cb865c6 doc/disas_examples/sparc64.sparc64.disas
--- a/doc/disas_examples/sparc64.sparc64.disas	Wed Mar 23 15:33:09 2022 +0100
+++ b/doc/disas_examples/sparc64.sparc64.disas	Mon Apr 04 15:50:52 2022 +0200
@@ -1722,5 +1722,114 @@
 
 
 
+; ---------- C++ trivial and non-trivial aggrs as return values ---------->
+;
+; struct Trivial { int a; };
+; struct NonTrivial { int a; NonTrivial() : a(0) {} NonTrivial(const NonTrivial& rhs) : a(rhs.a) { } };
+; 
+; extern "C" {
+;     struct Trivial    f1() { return Trivial(); }
+; }
+; 
+;     struct NonTrivial f2() { return NonTrivial(); }
+; 
+; extern "C" {
+;     void f()
+;     {
+;         int a=1;
+;         a += 123;
+;         struct Trivial t = f1();
+;         a -= 123;
+;         struct NonTrivial n = f2();
+;         a -= 12;
+;     }
+; }
+
+
+
+; output from openbsd-6.0-sparc64 w/ gcc 4.2.1
+
+0000000000000d60 <f1>:
+ d60:   9d e3 bf 30     save  %sp, -208, %sp
+ d64:   82 10 20 00     clr  %g1
+ d68:   83 30 60 00     srl  %g1, 0, %g1
+ d6c:   83 28 70 20     sllx  %g1, 0x20, %g1
+ d70:   b0 10 00 01     mov  %g1, %i0
+ d74:   81 cf e0 08     rett  %i7 + 8
+ d78:   01 00 00 00     nop
+
+0000000000000d7c <_Z2f2v>:
+ d7c:   9d e3 bf 30     save  %sp, -208, %sp
+ d80:   a0 10 00 18     mov  %i0, %l0
+ d84:   82 10 00 10     mov  %l0, %g1
+ d88:   90 10 00 01     mov  %g1, %o0
+ d8c:   40 14 01 1d     call  501200 <_ZN10NonTrivialC1Ev@plt>
+ d90:   01 00 00 00     nop
+ d94:   b0 10 00 10     mov  %l0, %i0
+ d98:   81 cf e0 08     rett  %i7 + 8
+ d9c:   01 00 00 00     nop
+ da0:   ae 03 c0 17     add  %o7, %l7, %l7
+ da4:   81 c3 e0 08     retl
+ da8:   01 00 00 00     nop
+
+0000000000000dac <f>:
+ dac:   9d e3 bf 10     save  %sp, -240, %sp                     ;
+ db0:   2f 00 0c 00     sethi  %hi(0x300000), %l7                ;
+ db4:   ae 05 e2 a0     add  %l7, 0x2a0, %l7                     ;
+ db8:   7f ff ff fa     call  da0 <_Z2f2v+0x24>                  ;
+ dbc:   01 00 00 00     nop                                      ;
+ dc0:   03 00 00 00     sethi  %hi(0), %g1                       ;
+ dc4:   82 10 60 48     or  %g1, 0x48, %g1                       ;
+ dc8:   c2 5d c0 01     ldx  [ %l7 + %g1 ], %g1                  ;
+ dcc:   c4 58 40 00     ldx  [ %g1 ], %g2                        ;
+ dd0:   c4 77 a7 e7     stx  %g2, [ %fp + 0x7e7 ]                ;
+ dd4:   84 10 20 00     clr  %g2                                 ;
+ dd8:   82 10 20 01     mov  1, %g1                              ;
+ ddc:   c2 27 a7 e3     st  %g1, [ %fp + 0x7e3 ]                 ;
+ de0:   c2 07 a7 e3     ld  [ %fp + 0x7e3 ], %g1                 ;
+ de4:   82 00 60 7b     add  %g1, 0x7b, %g1                      ;
+ de8:   c2 27 a7 e3     st  %g1, [ %fp + 0x7e3 ]                 ;
+ dec:   40 14 01 25     call  501280 <f1@plt>                    ; call f1()
+ df0:   01 00 00 00     nop                                      ;
+ df4:   82 10 00 08     mov  %o0, %g1                            ;
+ df8:   c0 27 a7 cb     clr  [ %fp + 0x7cb ]                     ;
+ dfc:   83 38 70 20     srax  %g1, 0x20, %g1                     ;
+ e00:   c6 07 a7 cb     ld  [ %fp + 0x7cb ], %g3                 ;
+ e04:   84 08 e0 00     and  %g3, 0, %g2                         ;
+ e08:   82 10 80 01     or  %g2, %g1, %g1                        ;
+ e0c:   c2 27 a7 cb     st  %g1, [ %fp + 0x7cb ]                 ;
+ e10:   c2 07 a7 cb     ld  [ %fp + 0x7cb ], %g1                 ;
+ e14:   c2 27 a7 df     st  %g1, [ %fp + 0x7df ]                 ;
+ e18:   c2 07 a7 e3     ld  [ %fp + 0x7e3 ], %g1                 ;
+ e1c:   82 00 7f 85     add  %g1, -123, %g1                      ;
+ e20:   c2 27 a7 e3     st  %g1, [ %fp + 0x7e3 ]                 ;
+ e24:   82 07 a7 db     add  %fp, 0x7db, %g1                     ;
+ e28:   90 10 00 01     mov  %g1, %o0                            ; hidden first arg (ptr to space to hold NonTrivial retval), via ptr as non-trivial
+ e2c:   40 14 00 e5     call  5011c0 <_Z2f2v@plt>                ; call f2()
+ e30:   01 00 00 00     nop                                      ;
+ e34:   c2 07 a7 e3     ld  [ %fp + 0x7e3 ], %g1                 ;
+ e38:   82 00 7f f4     add  %g1, -12, %g1                       ;
+ e3c:   c2 27 a7 e3     st  %g1, [ %fp + 0x7e3 ]                 ;
+ e40:   03 00 00 00     sethi  %hi(0), %g1                       ;
+ e44:   82 10 60 48     or  %g1, 0x48, %g1                       ;
+ e48:   c2 5d c0 01     ldx  [ %l7 + %g1 ], %g1                  ;
+ e4c:   c4 5f a7 e7     ldx  [ %fp + 0x7e7 ], %g2                ;
+ e50:   c6 58 40 00     ldx  [ %g1 ], %g3                        ;
+ e54:   84 18 80 03     xor  %g2, %g3, %g2                       ;
+ e58:   86 10 20 00     clr  %g3                                 ;
+ e5c:   82 10 00 02     mov  %g2, %g1                            ;
+ e60:   02 c8 40 08     brz  %g1, e80 <f+0xd4>                   ;
+ e64:   01 00 00 00     nop                                      ;
+ e68:   03 00 00 00     sethi  %hi(0), %g1                       ;
+ e6c:   82 10 60 40     or  %g1, 0x40, %g1                       ;
+ e70:   c2 5d c0 01     ldx  [ %l7 + %g1 ], %g1                  ;
+ e74:   90 10 00 01     mov  %g1, %o0                            ;
+ e78:   40 14 00 ca     call  5011a0 <__stack_smash_handler@plt> ;
+ e7c:   01 00 00 00     nop                                      ;
+ e80:   81 cf e0 08     rett  %i7 + 8                            ;
+ e84:   01 00 00 00     nop                                      ;
+
+
+
 ; vim: ft=asm
 
diff -r fd9ba3a6d348 -r fc614cb865c6 doc/disas_examples/x64.sysv.disas
--- a/doc/disas_examples/x64.sysv.disas	Wed Mar 23 15:33:09 2022 +0100
+++ b/doc/disas_examples/x64.sysv.disas	Mon Apr 04 15:50:52 2022 +0200
@@ -588,5 +588,84 @@
 
 
 
+; ---------- C++ trivial and non-trivial aggrs as return values ---------->
+;
+; struct Trivial { int a; };
+; struct NonTrivial { int a; NonTrivial() : a(0) {} NonTrivial(const NonTrivial& rhs) : a(rhs.a) { } };
+; 
+; extern "C" {
+;     struct Trivial    f1() { return Trivial(); }
+; }
+; 
+;     struct NonTrivial f2() { return NonTrivial(); }
+; 
+; extern "C" {
+;     void f()
+;     {
+;         int a=1;
+;         a += 123;
+;         struct Trivial t = f1();
+;         a -= 123;
+;         struct NonTrivial n = f2();
+;         a -= 12;
+;     }
+; }
+
+
+
+; output from freebsd-12.2-x64 w/ clang 10.0.1
+
+00000000002019c0 <f1>:
+  2019c0:       55                      push   %rbp
+  2019c1:       48 89 e5                mov    %rsp,%rbp
+  2019c4:       48 83 ec 10             sub    $0x10,%rsp
+  2019c8:       31 f6                   xor    %esi,%esi
+  2019ca:       48 8d 45 f8             lea    -0x8(%rbp),%rax
+  2019ce:       48 89 c7                mov    %rax,%rdi
+  2019d1:       ba 04 00 00 00          mov    $0x4,%edx
+  2019d6:       e8 75 01 00 00          callq  201b50 <memset@plt>
+  2019db:       8b 45 f8                mov    -0x8(%rbp),%eax
+  2019de:       48 83 c4 10             add    $0x10,%rsp
+  2019e2:       5d                      pop    %rbp
+  2019e3:       c3                      retq
+
+00000000002019f0 <_Z2f2v>:
+  2019f0:       55                      push   %rbp
+  2019f1:       48 89 e5                mov    %rsp,%rbp
+  2019f4:       48 83 ec 10             sub    $0x10,%rsp
+  2019f8:       48 89 f8                mov    %rdi,%rax
+  2019fb:       48 89 f9                mov    %rdi,%rcx
+  2019fe:       48 89 4d f8             mov    %rcx,-0x8(%rbp)
+  201a02:       48 89 45 f0             mov    %rax,-0x10(%rbp)
+  201a06:       e8 85 00 00 00          callq  201a90 <_ZN10NonTrivialC2Ev>
+  201a0b:       48 8b 45 f0             mov    -0x10(%rbp),%rax
+  201a0f:       48 83 c4 10             add    $0x10,%rsp
+  201a13:       5d                      pop    %rbp
+  201a14:       c3                      retq
+
+0000000000201a20 <f>:
+  201a20:       55                      push   %rbp             ; |
+  201a21:       48 89 e5                mov    %rsp,%rbp        ; | prolog
+  201a24:       48 83 ec 10             sub    $0x10,%rsp       ; |
+  201a28:       c7 45 fc 01 00 00 00    movl   $0x1,-0x4(%rbp)  ; a = 1
+  201a2f:       8b 45 fc                mov    -0x4(%rbp),%eax  ; |
+  201a32:       83 c0 7b                add    $0x7b,%eax       ; | a += 123
+  201a35:       89 45 fc                mov    %eax,-0x4(%rbp)  ; |
+  201a38:       e8 83 ff ff ff          callq  2019c0 <f1>      ; call f1()
+  201a3d:       89 45 f8                mov    %eax,-0x8(%rbp)  ; retval via reg, as small struct
+  201a40:       8b 45 fc                mov    -0x4(%rbp),%eax  ; |
+  201a43:       83 e8 7b                sub    $0x7b,%eax       ; | a -= 123
+  201a46:       89 45 fc                mov    %eax,-0x4(%rbp)  ; |
+  201a49:       48 8d 7d f0             lea    -0x10(%rbp),%rdi ; ptr to space to hold non-triv retval
+  201a4d:       e8 9e ff ff ff          callq  2019f0 <_Z2f2v>  ; call f2()
+  201a52:       8b 45 fc                mov    -0x4(%rbp),%eax  ; |
+  201a55:       83 e8 0c                sub    $0xc,%eax        ; | a-= 12
+  201a58:       89 45 fc                mov    %eax,-0x4(%rbp)  ; /
+  201a5b:       48 83 c4 10             add    $0x10,%rsp       ; \
+  201a5f:       5d                      pop    %rbp             ; | epilog
+  201a60:       c3                      retq                    ; |
+
+
+
 ; vim: ft=asm
 
diff -r fd9ba3a6d348 -r fc614cb865c6 doc/disas_examples/x64.win.disas
--- a/doc/disas_examples/x64.win.disas	Wed Mar 23 15:33:09 2022 +0100
+++ b/doc/disas_examples/x64.win.disas	Mon Apr 04 15:50:52 2022 +0200
@@ -498,5 +498,91 @@
 
 
 
+; ---------- C++ trivial and non-trivial aggrs as return values ---------->
+;
+; struct Trivial { int a; };
+; struct NonTrivial { int a; NonTrivial() : a(0) {} NonTrivial(const NonTrivial& rhs) : a(rhs.a) { } };
+; 
+; extern "C" {
+;     struct Trivial    f1() { return Trivial(); }
+; }
+; 
+;     struct NonTrivial f2() { return NonTrivial(); }
+; 
+; extern "C" {
+;     void f()
+;     {
+;         int a=1;
+;         a += 123;
+;         struct Trivial t = f1();
+;         a -= 123;
+;         struct NonTrivial n = f2();
+;         a -= 12;
+;     }
+; }
+
+
+
+; output from godbolt compiler explorer w/ msvc 19.0
+
+        ; ... snip, removed code of ctor and copy ctor ...
+
+$T1 = 0
+f1      PROC
+$LN3:
+        push    rdi
+        sub     rsp, 16
+        lea     rax, QWORD PTR $T1[rsp]
+        mov     rdi, rax
+        xor     eax, eax
+        mov     ecx, 4
+        rep stosb
+        mov     eax, DWORD PTR $T1[rsp]
+        add     rsp, 16
+        pop     rdi
+        ret     0
+f1      ENDP
+
+__$ReturnUdt$ = 48
+NonTrivial f2(void) PROC
+$LN3:
+        mov     QWORD PTR [rsp+8], rcx
+        sub     rsp, 40
+        mov     rcx, QWORD PTR __$ReturnUdt$[rsp]
+        call    NonTrivial::NonTrivial(void)
+        mov     rax, QWORD PTR __$ReturnUdt$[rsp]
+        add     rsp, 40
+        ret     0
+NonTrivial f2(void) ENDP
+
+a$ = 32
+$T1 = 36
+t$ = 40
+n$ = 44
+f       PROC
+$LN3:
+        sub     rsp, 56                 ; prolog
+        mov     DWORD PTR a$[rsp], 1    ; a = 1
+        mov     eax, DWORD PTR a$[rsp]  ; |
+        add     eax, 123                ; | a += 123
+        mov     DWORD PTR a$[rsp], eax  ; |
+        call    f1                      ; call f1()
+        mov     DWORD PTR $T1[rsp], eax ;
+        mov     eax, DWORD PTR $T1[rsp] ;
+        mov     DWORD PTR t$[rsp], eax  ;
+        mov     eax, DWORD PTR a$[rsp]  ; |
+        sub     eax, 123                ; | a -= 123
+        mov     DWORD PTR a$[rsp], eax  ; |
+        lea     rcx, QWORD PTR n$[rsp]  ; ptr to space to hold non-triv retval
+        call    NonTrivial f2(void)     ; call f2()
+        mov     eax, DWORD PTR a$[rsp]  ; |
+        sub     eax, 12                 ; | a -= 12
+        mov     DWORD PTR a$[rsp], eax  ; /
+        add     rsp, 56                 ; \ epilog
+        ret     0                       ; |
+f       ENDP                             
+
+
+
 ; vim: ft=asm
 
diff -r fd9ba3a6d348 -r fc614cb865c6 doc/disas_examples/x86.cdecl.disas
--- a/doc/disas_examples/x86.cdecl.disas	Wed Mar 23 15:33:09 2022 +0100
+++ b/doc/disas_examples/x86.cdecl.disas	Mon Apr 04 15:50:52 2022 +0200
@@ -715,5 +715,80 @@
 
 
 
+; ---------- C++ trivial and non-trivial aggrs as return values ---------->
+;
+; struct Trivial { int a; };
+; struct NonTrivial { int a; NonTrivial() : a(0) {} NonTrivial(const NonTrivial& rhs) : a(rhs.a) { } };
+; 
+; extern "C" {
+;     struct Trivial    f1() { return Trivial(); }
+; }
+; 
+;     struct NonTrivial f2() { return NonTrivial(); }
+; 
+; extern "C" {
+;     void f()
+;     {
+;         int a=1;
+;         a += 123;
+;         struct Trivial t = f1();
+;         a -= 123;
+;         struct NonTrivial n = f2();
+;         a -= 12;
+;     }
+; }
+
+
+
+; output from openbsd-4.0-x86 w/ gcc 3.3.5 (propolice)
+
+1c000920 <f1>:
+1c000920:       55                      push   %ebp
+1c000921:       89 e5                   mov    %esp,%ebp
+1c000923:       83 ec 04                sub    $0x4,%esp
+1c000926:       c7 45 fc 00 00 00 00    movl   $0x0,0xfffffffc(%ebp)
+1c00092d:       8b 45 fc                mov    0xfffffffc(%ebp),%eax
+1c000930:       c9                      leave
+1c000931:       c3                      ret
+
+1c000932 <_Z2f2v>:
+1c000932:       55                      push   %ebp
+1c000933:       89 e5                   mov    %esp,%ebp
+1c000935:       53                      push   %ebx
+1c000936:       83 ec 04                sub    $0x4,%esp
+1c000939:       8b 5d 08                mov    0x8(%ebp),%ebx
+1c00093c:       83 ec 0c                sub    $0xc,%esp
+1c00093f:       53                      push   %ebx
+1c000940:       e8 87 00 00 00          call   1c0009cc <_ZN10NonTrivialC1Ev>
+1c000945:       83 c4 10                add    $0x10,%esp
+1c000948:       89 d8                   mov    %ebx,%eax
+1c00094a:       8b 5d fc                mov    0xfffffffc(%ebp),%ebx
+1c00094d:       c9                      leave
+1c00094e:       c2 04 00                ret    $0x4
+1c000951:       90                      nop
+
+1c000952 <f>:
+1c000952:       55                      push   %ebp                  ;
+1c000953:       89 e5                   mov    %esp,%ebp             ;
+1c000955:       83 ec 28                sub    $0x28,%esp            ;
+1c000958:       c7 45 f4 01 00 00 00    movl   $0x1,0xfffffff4(%ebp) ; a = 1
+1c00095f:       8d 45 f4                lea    0xfffffff4(%ebp),%eax ; |
+1c000962:       83 00 7b                addl   $0x7b,(%eax)          ; | a += 12
+1c000965:       e8 b6 ff ff ff          call   1c000920 <f1>         ; call f1()
+1c00096a:       89 45 f0                mov    %eax,0xfffffff0(%ebp) ;
+1c00096d:       8d 45 f4                lea    0xfffffff4(%ebp),%eax ; |
+1c000970:       83 28 7b                subl   $0x7b,(%eax)          ; | a -= 123
+1c000973:       8d 45 d8                lea    0xffffffd8(%ebp),%eax ; space (at top of stack) to hold non-triv retval -> eax
+1c000976:       83 ec 0c                sub    $0xc,%esp             ; grow stack by 12
+1c000979:       50                      push   %eax                  ; hidden first arg: ptr to space for retval
+1c00097a:       e8 b3 ff ff ff          call   1c000932 <_Z2f2v>     ; call f2()
+1c00097f:       83 c4 0c                add    $0xc,%esp             ; shrink stack back by 12
+1c000982:       8d 45 f4                lea    0xfffffff4(%ebp),%eax ; |
+1c000985:       83 28 0c                subl   $0xc,(%eax)           ; | a -= 12
+1c000988:       c9                      leave                        ;
+1c000989:       c3                      ret                          ;
+
+
+
 ; vim: ft=asm
 
diff -r fd9ba3a6d348 -r fc614cb865c6 doc/disas_examples/x86.fastcall_gnu.disas
--- a/doc/disas_examples/x86.fastcall_gnu.disas	Wed Mar 23 15:33:09 2022 +0100
+++ b/doc/disas_examples/x86.fastcall_gnu.disas	Mon Apr 04 15:50:52 2022 +0200
@@ -369,5 +369,92 @@
 
 
 
+; ---------- C++ trivial and non-trivial aggrs as return values ---------->
+;
+; struct Trivial { int a; };
+; struct NonTrivial {
+;         int a;
+;         __attribute__((fastcall)) NonTrivial() : a(0) {}
+;         __attribute__((fastcall)) NonTrivial(const NonTrivial& rhs) : a(rhs.a) { }
+; };
+; 
+; extern "C" {
+;     struct Trivial    __attribute__((fastcall)) f1() { return Trivial(); }
+; }
+; 
+;     struct NonTrivial __attribute__((fastcall)) f2() { return NonTrivial(); }
+; 
+; extern "C" {
+;     void __attribute__((fastcall)) f()
+;     {
+;         int a=1;
+;         a += 123;
+;         struct Trivial t = f1();
+;         a -= 123;
+;         struct NonTrivial n = f2();
+;         a -= 12;
+;     }
+; }
+
+
+
+; output from alpine_linux-3.11.3-x86 w/ gcc 9.2.0
+
+00001215 <f1>:
+    1215:       55                      push   %ebp
+    1216:       89 e5                   mov    %esp,%ebp
+    1218:       83 ec 04                sub    $0x4,%esp
+    121b:       e8 ed ff ff ff          call   120d <__x86.get_pc_thunk.ax>
+    1220:       05 ac 2d 00 00          add    $0x2dac,%eax
+    1225:       89 4d fc                mov    %ecx,-0x4(%ebp)
+    1228:       8b 45 fc                mov    -0x4(%ebp),%eax
+    122b:       c7 00 00 00 00 00       movl   $0x0,(%eax)
+    1231:       8b 45 fc                mov    -0x4(%ebp),%eax
+    1234:       c9                      leave
+    1235:       c3                      ret
+
+00001236 <_Z2f2v>:
+    1236:       55                      push   %ebp
+    1237:       89 e5                   mov    %esp,%ebp
+    1239:       83 ec 18                sub    $0x18,%esp
+    123c:       e8 cc ff ff ff          call   120d <__x86.get_pc_thunk.ax>
+    1241:       05 8b 2d 00 00          add    $0x2d8b,%eax
+    1246:       89 4d f4                mov    %ecx,-0xc(%ebp)
+    1249:       8b 45 f4                mov    -0xc(%ebp),%eax
+    124c:       89 c1                   mov    %eax,%ecx
+    124e:       e8 6f 00 00 00          call   12c2 <_ZN10NonTrivialC1Ev>
+    1253:       8b 45 f4                mov    -0xc(%ebp),%eax
+    1256:       c9                      leave
+    1257:       c3                      ret
+
+00001258 <f>:
+    1258:       55                      push   %ebp                          ;
+    1259:       89 e5                   mov    %esp,%ebp                     ;
+    125b:       83 ec 18                sub    $0x18,%esp                    ;
+    125e:       e8 aa ff ff ff          call   120d <__x86.get_pc_thunk.ax>  ;
+    1263:       05 69 2d 00 00          add    $0x2d69,%eax                  ;
+    1268:       65 a1 14 00 00 00       mov    %gs:0x14,%eax                 ;
+    126e:       89 45 f4                mov    %eax,-0xc(%ebp)               ;
+    1271:       31 c0                   xor    %eax,%eax                     ;
+    1273:       c7 45 f0 01 00 00 00    movl   $0x1,-0x10(%ebp)              ;
+    127a:       83 45 f0 7b             addl   $0x7b,-0x10(%ebp)             ;
+    127e:       8d 45 e8                lea    -0x18(%ebp),%eax              ; ptr space to hold aggregate retval -> eax ...
+    1281:       89 c1                   mov    %eax,%ecx                     ; ... as hidden first arg (ecx)
+    1283:       e8 8d ff ff ff          call   1215 <f1>                     ; call f1()
+    1288:       83 6d f0 7b             subl   $0x7b,-0x10(%ebp)             ;
+    128c:       8d 45 ec                lea    -0x14(%ebp),%eax              ; ptr space to hold aggregate retval -> eax ...
+    128f:       89 c1                   mov    %eax,%ecx                     ; ... as hidden first arg (ecx)
+    1291:       e8 a0 ff ff ff          call   1236 <_Z2f2v>                 ; call f2()
+    1296:       83 6d f0 0c             subl   $0xc,-0x10(%ebp)              ;
+    129a:       90                      nop                                  ;
+    129b:       8b 45 f4                mov    -0xc(%ebp),%eax               ;
+    129e:       65 33 05 14 00 00 00    xor    %gs:0x14,%eax                 ;
+    12a5:       74 05                   je     12ac <f+0x54>                 ;
+    12a7:       e8 35 00 00 00          call   12e1 <__stack_chk_fail_local> ;
+    12ac:       c9                      leave                                ;
+    12ad:       c3                      ret                                  ;
+
+
+
 ; vim: ft=asm
 
diff -r fd9ba3a6d348 -r fc614cb865c6 doc/disas_examples/x86.stdcall.disas
--- a/doc/disas_examples/x86.stdcall.disas	Wed Mar 23 15:33:09 2022 +0100
+++ b/doc/disas_examples/x86.stdcall.disas	Mon Apr 04 15:50:52 2022 +0200
@@ -261,5 +261,85 @@
 
 
 
+; ---------- C++ trivial and non-trivial aggrs as return values ---------->
+;
+; struct Trivial { int a; };
+; struct NonTrivial {
+;         int a;
+;         __attribute__((stdcall)) NonTrivial() : a(0) {}
+;         __attribute__((stdcall)) NonTrivial(const NonTrivial& rhs) : a(rhs.a) { }
+; };
+; 
+; extern "C" {
+;     struct Trivial    __attribute__((stdcall)) f1() { return Trivial(); }
+; }
+; 
+;     struct NonTrivial __attribute__((stdcall)) f2() { return NonTrivial(); }
+; 
+; extern "C" {
+;     void __attribute__((stdcall)) f()
+;     {
+;         int a=1;
+;         a += 123;
+;         struct Trivial t = f1();
+;         a -= 123;
+;         struct NonTrivial n = f2();
+;         a -= 12;
+;     }
+; }
+
+
+
+; output from alpine_linux-3.11.3-x86 w/ gcc 9.2.0 (w/ -O0 --no-stack-protector for simplicity)
+
+00001205 <f1>:
+    1205:       55                      push   %ebp                         ; | prolog
+    1206:       89 e5                   mov    %esp,%ebp                    ; |
+    1208:       e8 f0 ff ff ff          call   11fd <__x86.get_pc_thunk.ax> ;
+    120d:       05 c3 2d 00 00          add    $0x2dc3,%eax                 ;
+    1212:       8b 45 08                mov    0x8(%ebp),%eax               ; fetch ptr to retval space -> eax
+    1215:       c7 00 00 00 00 00       movl   $0x0,(%eax)                  ; write retval
+    121b:       8b 45 08                mov    0x8(%ebp),%eax               ; re-fetch ptr to retval space -> eax, to return it in eax (a bit pointless)
+    121e:       5d                      pop    %ebp                         ; |
+    121f:       c2 04 00                ret    $0x4                         ; | epilog
+
+00001222 <_Z2f2v>:
+    1222:       55                      push   %ebp
+    1223:       89 e5                   mov    %esp,%ebp
+    1225:       83 ec 08                sub    $0x8,%esp
+    1228:       e8 d0 ff ff ff          call   11fd <__x86.get_pc_thunk.ax>
+    122d:       05 a3 2d 00 00          add    $0x2da3,%eax
+    1232:       83 ec 0c                sub    $0xc,%esp
+    1235:       ff 75 08                pushl  0x8(%ebp)
+    1238:       e8 65 00 00 00          call   12a2 <_ZN10NonTrivialC1Ev>
+    123d:       83 c4 0c                add    $0xc,%esp
+    1240:       8b 45 08                mov    0x8(%ebp),%eax
+    1243:       c9                      leave
+    1244:       c2 04 00                ret    $0x4
+
+00001247 <f>:
+    1247:       55                      push   %ebp                         ;
+    1248:       89 e5                   mov    %esp,%ebp                    ;
+    124a:       83 ec 18                sub    $0x18,%esp                   ;
+    124d:       e8 ab ff ff ff          call   11fd <__x86.get_pc_thunk.ax> ;
+    1252:       05 7e 2d 00 00          add    $0x2d7e,%eax                 ;
+    1257:       c7 45 f4 01 00 00 00    movl   $0x1,-0xc(%ebp)              ; a = 1
+    125e:       83 45 f4 7b             addl   $0x7b,-0xc(%ebp)             ; a += 123
+    1262:       8d 45 f0                lea    -0x10(%ebp),%eax             ; ptr to space (top of stack) to hold aggr retval -> eax ...
+    1265:       50                      push   %eax                         ; ... as hidden first arg
+    1266:       e8 9a ff ff ff          call   1205 <f1>                    ; call f1()
+    126b:       83 6d f4 7b             subl   $0x7b,-0xc(%ebp)             ; a -= 123
+    126f:       8d 45 ec                lea    -0x14(%ebp),%eax             ; ptr to space to hold aggr retval -> eax ...
+    1272:       83 ec 0c                sub    $0xc,%esp                    ; grow stack by 12
+    1275:       50                      push   %eax                         ; ... as hidden first arg
+    1276:       e8 a7 ff ff ff          call   1222 <_Z2f2v>                ; call f2()
+    127b:       83 c4 0c                add    $0xc,%esp                    ; shrink stack back by 12
+    127e:       83 6d f4 0c             subl   $0xc,-0xc(%ebp)              ; a -= 12
+    1282:       90                      nop                                 ;
+    1283:       c9                      leave                               ;
+    1284:       c3                      ret                                 ;
+
+
+
 ; vim: ft=asm
 
diff -r fd9ba3a6d348 -r fc614cb865c6 doc/manual/callconvs/callconv_arm32.tex
--- a/doc/manual/callconvs/callconv_arm32.tex	Wed Mar 23 15:33:09 2022 +0100
+++ b/doc/manual/callconvs/callconv_arm32.tex	Mon Apr 04 15:50:52 2022 +0200
@@ -89,6 +89,7 @@
 \item parameters \textless=\ 32 bits are passed as 32 bit words
 \item 64 bit parameters are passed as two 32 bit parts (even partly via the register and partly via the stack, although this doesn't seem to be specified in the ATPCS)
 \item aggregates (struct, union) are passed by value (after rounding up the size to the nearest multiple of 4), as a sequence of words (splitting across registers and stack is allowed)
+\item {\it non-trivial} C++ aggregates (as defined by the language) of any size, are passed indirectly via a pointer to a copy of the aggregate
 \item keeping the stack eight-byte aligned can improve memory access performance and is required by LDRD and STRD on ARMv5TE processors which are part of the ARM32 family, so, in order to avoid problems one should always align the stack (tests have shown, that GCC does care about the alignment when using the ellipsis)
 \end{itemize}
 
@@ -97,6 +98,8 @@
 \begin{itemize}
 \item return values \textless=\ 32 bits use r0
 \item 64 bit return values use r0 and r1
+\item for {\it non-trivial} C++ aggregates, the caller allocates space, passes pointer to it to the callee as a hidden first param
+(meaning in r0), and callee writes return value to this space; the ptr to the aggregate is returned in r0
 \item aggregates (struct, union) \textless=\ 32 bits are returned like an integer (in r0)
 \item aggregates (struct, union) \textgreater\ 32 bits the caller allocates space for the return value on the stack in its frame and passes a pointer to it in r0
 \item for all other aggregates, the caller allocates space, passes pointer to it to the callee as a hidden first param (meaning in r0), and callee writes return value to this space; the ptr to the aggregate is returned in r0
@@ -175,6 +178,7 @@
 \item parameters \textless=\ 32 bits are passed as 32 bit words
 \item 64 bit parameters are passed as two 32 bit parts (even partly via the register and partly via the stack, although this doesn't seem to be specified in the ATPCS)
 \item aggregates (struct, union) are passed by value (after rounding up the size to the nearest multiple of 4), as a sequence of words (splitting across registers and stack is allowed)
+\item {\it non-trivial} C++ aggregates (as defined by the language) of any size, are passed indirectly via a pointer to a copy of the aggregate
 \item keeping the stack eight-byte aligned can improve memory access performance and is required by LDRD and STRD on ARMv5TE processors which are part of the ARM32 family, so, in order to avoid problems one should always align the stack (tests have shown, that GCC does care about the alignment when using the ellipsis)
 \end{itemize}
 
@@ -183,6 +187,8 @@
 \begin{itemize}
 \item return values \textless=\ 32 bits use r0
 \item 64 bit return values use r0 and r1
+\item for {\it non-trivial} C++ aggregates, the caller allocates space, passes pointer to it to the callee as a hidden first param
+(meaning in r0), and callee writes return value to this space; the ptr to the aggregate is returned in r0
 \item aggregates (struct, union) \textless=\ 32 bits are returned like an integer (in r0)
 \item aggregates (struct, union) \textgreater\ 32 bits the caller allocates space for the return value on the stack in its frame and passes a pointer to it in r0
 \item for all other aggregates, the caller allocates space, passes pointer to it to the callee as a hidden first param (meaning in r0), and callee writes return value to this space; the ptr to the aggregate is returned in r0
@@ -369,6 +375,7 @@
 \item aggregates (struct, union) with 1 to 4 identical floating-point members (either float or double) are passed field-by-field, except if passed as a vararg
 \item aggregates that could be passed via floating point register are never split across those and the stack, so if not enough registers are available an aggregate is
 passed entirely via the stack (implying above rule that any still unused float registers will be skipped for any subsequent arg)
+\item {\it non-trivial} C++ aggregates (as defined by the language) of any size, are passed indirectly via a pointer to a copy of the aggregate
 \item all other aggregates (struct, union), after rounding up the size to the nearest multiple of 4, are passed as a sequence of dwords, like integers (splitting across registers and stack is allowed)
 \item callee spills, caller reserves spill area space, though
 \end{itemize}
@@ -379,10 +386,12 @@
 \item non floating point return values \textless=\ 32 bits use r0
 \item non floating point 64-bit return values use r0 and r1
 \item floating point return value uses s0 (for float) or d0 (for double), respectively
+\item for {\it non-trivial} C++ aggregates, the caller allocates space, passes pointer to it to the callee as a hidden first param
+(meaning in r0), and callee writes return value to this space; the ptr to the aggregate is returned in r0
 \item aggregates (struct, union) with 1 to 4 identical floating-point members are returned in s0-s3 (for float) or d0-d3 (for double), respectively
 \item all other aggregates \textless=\ 32 bits are returned via r0
 \item for all other aggregates, the caller allocates space, passes pointer to it to the callee as a hidden first param
-(meanin in r0), and callee writes return value to this space; the ptr to the aggregate is returned in x0
+(meaning in r0), and callee writes return value to this space; the ptr to the aggregate is returned in r0
 \end{itemize}
 
 \paragraph{Stack layout}
diff -r fd9ba3a6d348 -r fc614cb865c6 doc/manual/callconvs/callconv_arm64.tex
--- a/doc/manual/callconvs/callconv_arm64.tex	Wed Mar 23 15:33:09 2022 +0100
+++ b/doc/manual/callconvs/callconv_arm64.tex	Mon Apr 04 15:50:52 2022 +0200
@@ -79,6 +79,7 @@
 and 8 floating-point registers to a reserved stack area adjacent to the other parameters on the stack (only the unnamed integer parameters require saving, though)
 \item aggregates (struct, union) with 1 to 4 identical floating-point members (either float or double) are passed field-by-field (8-byte aligned if passed via stack), except if passed as a vararg
 \item other aggregates (struct, union) \textgreater\ 16 bytes in size are passed indirectly, as a pointer to a copy (if needed)
+\item {\it non-trivial} C++ aggregates (as defined by the language) of any size, are passed indirectly via a pointer to a copy of the aggregate
 \item all other aggregates (struct, union), after rounding up the size to the nearest multiple of 8, are passed as a sequence of dwords, like integers
 \item aggregates are never split across registers and stack, so if not enough registers are available an aggregated is passed via the stack (for aggregates that
 would've been passed as floating point values, any still unused float registers will be skipped for any subsequent arg)
@@ -90,6 +91,7 @@
 \begin{itemize}
 \item integer return values use x0
 \item floating-point return values use d0
+\item for {\it non-trivial} C++ aggregates, the caller allocates space, passes pointer to it to the callee via x8, and callee writes return value to this space; the ptr to the aggregate is returned in x0
 \item aggregates (struct, union) that would be passed via registers if passed as a first param, are returned via those registers
 \item for aggregates not returnable via registers (e.g. if regs exhausted, or \textgreater\ 16b, ...), the caller allocates space, passes pointer to it to the callee through
 x8, and callee writes return value to this space (note that this is not a hidden first param, as x8 is not used for passing params); the ptr to the aggregate is returned in x0
diff -r fd9ba3a6d348 -r fc614cb865c6 doc/manual/callconvs/callconv_mips32.tex
--- a/doc/manual/callconvs/callconv_mips32.tex	Wed Mar 23 15:33:09 2022 +0100
+++ b/doc/manual/callconvs/callconv_mips32.tex	Mon Apr 04 15:50:52 2022 +0200
@@ -89,6 +89,7 @@
 \item if the callee takes the address of one of the parameters and uses it to address other unnamed parameters (e.g. varargs) it has to copy - in its prolog - the the argument registers to a reserved stack area adjacent to the other parameters on the stack (only the unnamed integer parameters require saving, though) % @@@ seems to *ONLY* spill with varargs, never for any other reason
 \item float registers don't seem to ever need to be saved that way, because floats passed to an ellipsis function are promoted to doubles, which in turn are passed in a? register pairs, so only \$a0-\$a7 are need to be spilled
 \item aggregates (struct, union) \textless=\ 32bit are passed like an integer
+\item {\it non-trivial} C++ aggregates (as defined by the language) of any size, are passed indirectly via a pointer to a copy of the aggregate
 \item all other aggregates (struct, union) are passed indirectly, as a pointer to a copy (if needed, and for vararg arguments required to be copied by the caller) of the struct
 \end{itemize}
 
@@ -96,9 +97,11 @@
 
 \begin{itemize}
 \item results are returned in \$v0 (32-bit), \$v0 and \$v1 (64-bit), \$f0 or \$f0 and \$f2 (2 $\times$ 32 bit float e.g. complex)
+\item for {\it non-trivial} C++ aggregates, the caller allocates space, passes pointer to it to the callee as a hidden first param
+(meaning in \%a0), and callee writes return value to this space; the ptr to the aggregate is returned in \%v0
 \item aggregates (struct, union) \textless=\ 64bit are returned like an integer (aligned within the register according to endianness)
 \item all other aggregates (struct, union) are returned in a space allocated by the caller, with a pointer to it
-passed as first parameter to the function called (meaning in \%a0)
+passed as first parameter to the function called (meaning in \%a0); the ptr to the aggregate is returned in \%v0
 \end{itemize}
 
 \paragraph{Stack layout}
@@ -180,6 +183,7 @@
 \item only on hard-float targets: note that if the first argument is not a float, but the second, it'll get passed via the \$a? registers
 \item single precision float parameters (32 bit) are right-justified in their 8-byte slot on the stack on big endian targets, as they aren't promoted % @@@ verify
 \item aggregates (struct, union) are passed as a sequence of words like integers, no matter the fields or if hard-float target (splitting across registers and stack is allowed)
+\item {\it non-trivial} C++ aggregates (as defined by the language) of any size, are passed indirectly via a pointer to a copy of the aggregate
 \end{itemize}
 
 \paragraph{Return values}
@@ -187,8 +191,8 @@
 \begin{itemize}
 \item results are returned in \$v0 and \$v1, with \$v0 for all values \textless\ 64bit (only integer on hard-float targets)
 \item only on hard-float targets: floating point results are returned in \$f0 (32-bit float), or \$f0 and \$f3 (64bit float)
-\item aggregates (struct, union) of any size are returned in a space allocated by the caller, with a pointer to it
-passed as first parameter to the function called (meaning in \%a0)
+\item aggregates (struct, union) are returned in a space allocated by the caller, with a pointer to it
+passed as first parameter to the function called (meaning in \%a0); the ptr to the aggregate is returned in \%v0
 \end{itemize}
 
 \paragraph{Stack layout}
diff -r fd9ba3a6d348 -r fc614cb865c6 doc/manual/callconvs/callconv_mips64.tex
--- a/doc/manual/callconvs/callconv_mips64.tex	Wed Mar 23 15:33:09 2022 +0100
+++ b/doc/manual/callconvs/callconv_mips64.tex	Mon Apr 04 15:50:52 2022 +0200
@@ -84,6 +84,7 @@
 \item integer parameters \textless\ 64 bit are right-justified (meaning occupy higher-address bytes) in their 8-byte slot on the stack, requiring extra-care for big-endian targets
 \item single precision float parameters (32 bit) are left-justified in their 8-byte slot on the stack, but are right justified in fp-registers on big endian targets, as they aren't promoted (actually, official docs says "undecided", but real world implementations seem to use what is described here)
 \item aggregates (struct, union) are passed as a sequence of dwords in (integer registers and the stack), with the following particularities:
+\item {\it non-trivial} C++ aggregates (as defined by the language) of any size, are passed indirectly via a pointer to a copy of the aggregate
 \begin{itemize}
 \item if a dword happens to be a double precision floating point struct field, it is passed in a floating point register
 \item array and union fields are always passed like integers (even if their type is float or double)
@@ -123,9 +124,11 @@
 \item results are returned in \$v0, and for a second one \$v1 is used
 \item only on hard-float targets: floating point results are returned in \$f0 (and \$f2 if needed)
 \item only on hard-float targets: structs with only one or two floating point fields are returned in \$f0 (and \$f2 if necessary), field-by-field
+\item for {\it non-trivial} C++ aggregates, the caller allocates space, passes pointer to it to the callee as a hidden first param
+(meaning in \%a0), and callee writes return value to this space; the ptr to the aggregate is returned in \%v0
 \item any other aggregates (struct, union) \textless= 16 bytes are returned via registers \$v0 (and \$v1 if necessary), dword-by-dword
 \item all other aggregates (struct, union) \textgreater 16 bytes are returned in a space allocated by the caller, with a pointer to it
-passed as first parameter to the function called (meaning in \%a0)
+passed as first parameter to the function called (meaning in \%a0); the ptr to the aggregate is returned in \%v0
 %spec;
 %Composite results (struct, union, or array) are returned in
 %$2/$f0 and $3/$f2 according to the following rules:
diff -r fd9ba3a6d348 -r fc614cb865c6 doc/manual/callconvs/callconv_ppc32.tex
--- a/doc/manual/callconvs/callconv_ppc32.tex	Wed Mar 23 15:33:09 2022 +0100
+++ b/doc/manual/callconvs/callconv_ppc32.tex	Mon Apr 04 15:50:52 2022 +0200
@@ -96,6 +96,7 @@
 \item composite parameters 3 bytes or larger in size occupy high-order bytes
 \item integer parameters \textless\ 32 bit are right-justified (meaning occupy higher-address bytes) in their 4-byte slot on the stack, requiring extra-care for big-endian targets
 \item aggregates (struct, union) with only one (non-aggregate / non-array) field are passed as if the field itself would be passed
+\item {\it non-trivial} C++ aggregates (as defined by the language) of any size, are passed indirectly via a pointer to a copy of the aggregate
 \item all other aggregates are passed as a sequence of words (like integer parameters)
 \end{itemize}
 
@@ -106,8 +107,8 @@
 \item return values of integer \textless=\ 32bit or pointer type use gpr3
 \item 64 bit integers use gpr3 and gpr4 (hiword in gpr3, loword in gpr4)
 \item floating point values are returned via fpr1
-\item aggregates (struct, union) \textless=\ 64 bits use gpr3 and gpr4
-\item for all other aggregates and types \textgreater\ 64 bits, a secret first parameter with an address to a caller allocated space is passed to the function (in gpr3), which is written to by the callee
+\item for all aggregates, the caller allocates space, passes pointer to it to the callee as a hidden first param
+(meaning in gpr3), and callee writes return value to this space; the ptr to the aggregate is returned in gpr3
 \end{itemize}
 
 
@@ -209,6 +210,7 @@
 \item integer parameters \textless\ 32 bit are right-justified (meaning occupy high-order bytes) in their 4-byte area, requiring extra-care for big-endian targets
 \item no spill area is used on stack, iterating over varargs requires a specific va\_list implementation
 \item aggregates (struct, union) and types \textgreater\ 64 bits are passed indirectly, as a pointer to the data (or a copy of it, if necessary to avoid modification)
+\item {\it non-trivial} C++ aggregates (as defined by the language) of any size, are passed indirectly via a pointer to a copy of the aggregate
 \end{itemize}
 
 \paragraph{Return values}
@@ -216,6 +218,8 @@
 \begin{itemize}
 \item 32-bit integers use register r3, 64-bit use registers r3 and r4 (hiword in r3, loword in r4)
 \item floating-point values are returned using register f1
+\item for {\it non-trivial} C++ aggregates, the caller allocates space, passes pointer to it to the callee as a hidden first param
+(meaning in gpr3), and callee writes return value to this space; the ptr to the aggregate is returned in gpr3
 \item aggregates (struct, union) \textless=\ 64 bits use gpr3 and gpr4
 \item for all other aggregates and types \textgreater\ 64 bits, a secret first parameter with an address to a caller allocated space is passed to the function (in gpr3), which is written to by the callee
 \end{itemize}
@@ -254,6 +258,17 @@
 \end{figure}
 
 
+\subsubsection{System V PPC 32-bit / Linux Standard Base version}
+
+This is in essence the same as the System V PPC 32-bit calling convention, but differs for aggregate return values:
+% @@@STRUCT make this more obvious
+
+\begin{itemize}
+\item for all aggregates, the caller allocates space, passes pointer to it to the callee as a hidden first param
+(meaning in gpr3), and callee writes return value to this space; the ptr to the aggregate is returned in gpr3
+\end{itemize}
+
+
 \clearpage
 
 \subsubsection{System V syscalls}
diff -r fd9ba3a6d348 -r fc614cb865c6 doc/manual/callconvs/callconv_ppc64.tex
--- a/doc/manual/callconvs/callconv_ppc64.tex	Wed Mar 23 15:33:09 2022 +0100
+++ b/doc/manual/callconvs/callconv_ppc64.tex	Mon Apr 04 15:50:52 2022 +0200
@@ -105,6 +105,7 @@
 \item vector parameters are aligned on 16-byte boundaries
 \item integer parameters \textless\ 64 bit are right-justified (meaning occupy higher-address bytes) in their 8-byte slot on the stack, requiring extra-care for big-endian targets
 \item aggregates (struct, union) are passed as a sequence of doublewords (following above rules for doublewords)
+\item {\it non-trivial} C++ aggregates (as defined by the language) of any size, are passed indirectly via a pointer to a copy of the aggregate
 \end{itemize}
 
 
@@ -114,8 +115,8 @@
 \item return values of integer \textless=\ 32bit or pointer type use gpr3 and are zero or sign extended depending on their type
 \item 64 bit integers use gpr3
 \item floating point values are returned via fpr1
-\item for aggregates (struct, union) of any size, a secret first parameter with an address
-to a caller allocated space is passed to the function (in gpr3), which is written to by the callee
+\item for any aggregate (struct, union), the caller allocates space, passes pointer to it to the callee as a hidden first param
+(meaning in gpr3), and callee writes return value to this space; the ptr to the aggregate is returned in gpr3
 \end{itemize}
 
 
diff -r fd9ba3a6d348 -r fc614cb865c6 doc/manual/callconvs/callconv_sparc32.tex
--- a/doc/manual/callconvs/callconv_sparc32.tex	Wed Mar 23 15:33:09 2022 +0100
+++ b/doc/manual/callconvs/callconv_sparc32.tex	Mon Apr 04 15:50:52 2022 +0200
@@ -68,6 +68,7 @@
 \item all arguments \textless=\ 32 bit are passed as 32 bit values
 \item 64 bit arguments are passed like two consecutive \textless=\ 32 bit values (which allows for an argument to be split between the stack and \%i5)
 \item aggregates (struct, union) of any size, as well as quad precision values are passed indirectly as a pointer to a {\bf copy} of the aggregate (like: struct s2 = s; callee(\&s2);)
+\item {\it non-trivial} C++ aggregates (as defined by the language) of any size, are passed indirectly via a pointer to a copy of the aggregate
 \item minimum stack size is 64 bytes, b/c stack pointer must always point at enough space to store all \%i* and \%l* registers, used when running out of register windows
 \item if needed, register spill area is adjacent to parameters
 \end{itemize}
diff -r fd9ba3a6d348 -r fc614cb865c6 doc/manual/callconvs/callconv_sparc64.tex
--- a/doc/manual/callconvs/callconv_sparc64.tex	Wed Mar 23 15:33:09 2022 +0100
+++ b/doc/manual/callconvs/callconv_sparc64.tex	Mon Apr 04 15:50:52 2022 +0200
@@ -84,6 +84,7 @@
 \end{itemize}
 \item unions \textless=\ 16 bytes passed by-value are passed like integers in left-justified 8-byte slots (either via \%o* registers or the stack)
 \item aggregates (struct, union) and types \textgreater\ 16 bytes are passed indirectly, as a pointer to a correctly aligned copy of the data (that copy can be avoided under certain conditions)
+\item {\it non-trivial} C++ aggregates (as defined by the language) of any size, are passed indirectly via a pointer to a copy of the aggregate
 % from spec:
 %Structure or union types up to eight bytes in size are assigned to one parameter array word, and align to eight-byte
 %boundaries.
@@ -112,6 +113,8 @@
 \begin{itemize}
 \item results are expected by caller to be returned in \%o0-\%o3 (after reg window restore, meaning callee writes to \%i0-\%i3) for integers
 \item \%d0,\%d2,\%d4,\%d6 are used for floating point values
+\item for {\it non-trivial} C++ aggregates, the caller allocates space, passes pointer to it to the callee as a hidden first param
+(meaning in \%o0), and callee writes return value to this space; the ptr to the aggregate is returned in the same register (after reg window restore)
 \item the fields of aggregates (struct, union) \textless= 32 bytes are returned via registers mentioned above (which are
 assigned following the same logic as when passing the aggregate as a first argument to a function)
 \item aggregates (struct, union) \textgreater 32 bytes are returned in a space allocated by the caller, with a pointer to it
diff -r fd9ba3a6d348 -r fc614cb865c6 doc/manual/callconvs/callconv_x64.tex
--- a/doc/manual/callconvs/callconv_x64.tex	Wed Mar 23 15:33:09 2022 +0100
+++ b/doc/manual/callconvs/callconv_x64.tex	Mon Apr 04 15:50:52 2022 +0200
@@ -84,7 +84,7 @@
 \item caller cleans up the stack
 \item first 4 integer/pointer parameters are passed via rcx, rdx, r8, r9 (from left to right), others are pushed on stack (there is a
 spill area for the first 4)
-\item {\it non-trivial} C++ aggregates (as defined by the language), are passed indirectly via a pointer to a copy of the aggregate, no matter the size
+\item {\it non-trivial} C++ aggregates (as defined by the language) of any size, are passed indirectly via a pointer to a copy of the aggregate
 \item aggregates (structs and unions) \textless\ 64 bits are passed like equal-sized integers
 \item float and double parameters are passed via xmm0l-xmm3l
 \item first 4 parameters are passed via the correct register depending on the parameter type - with mixed float and int parameters,
@@ -197,7 +197,7 @@
 exact but an upper bound on the number of used xmm registers)
 \item aggregates (structs, unions (and arrays within those)) follow a more complicated logic (the following {\bf only considers field types supported by dyncall}):
 \begin{itemize}
-  \item {\it non-trivial} C++ aggregates (as defined by the language), are passed indirectly via a pointer to a copy of the aggregate, no matter the size
+  \item {\it non-trivial} C++ aggregates (as defined by the language) of any size, are passed indirectly via a pointer to a copy of the aggregate
   \item aggregates \textgreater\ 16 bytes are always passed entirely via the stack
   \item all other aggregates are classified per qword, by looking at all fields occupying all or part of that qword, recursively
   \begin{itemize}
diff -r fd9ba3a6d348 -r fc614cb865c6 doc/manual/callconvs/callconv_x86.tex
--- a/doc/manual/callconvs/callconv_x86.tex	Wed Mar 23 15:33:09 2022 +0100
+++ b/doc/manual/callconvs/callconv_x86.tex	Mon Apr 04 15:50:52 2022 +0200
@@ -98,11 +98,14 @@
 \item all arguments are pushed onto the stack (as dwords)
 \item arguments \textgreater\ 64 bits are pushed as a sequence of dwords
 \item aggregates (structs, unions) are pushed as a sequence of dwords
+\item {\it non-trivial} C++ aggregates (as defined by the language) of any size, are passed indirectly via a pointer to a copy of the aggregate
 \end{itemize}
 
 \paragraph{Return values}
 
 \begin{itemize}
+\item for {\it non-trivial} C++ aggregates, the caller allocates space, passes pointer to it to the callee as a hidden first param
+(meaning via the stack), and callee writes return value to this space; the ptr to the aggregate is returned in eax
 \item return values of pointer or integral type (\textless=\ 32 bits) are returned via the eax register
 \item integers and aggregates (structs, unions) \textgreater\ 32 and \textless=\ 64 bits are returned via the eax and edx registers
 \item return values \textgreater\ 64 bits (e.g. aggregates) are returned by the caller allocating the space and
@@ -177,6 +180,7 @@
 \item arguments \textgreater\ 64 bits are pushed as a sequence of dwords
 \item aggregates (structs, unions) are pushed as a sequence of dwords, but are never split between registers and stack (if registers are still available and
 aggregate doesn't fit entirely into ecx and edx, it is passed via the stack and remaining registers are free for subsequent arguments)
+\item {\it non-trivial} C++ aggregates (as defined by the language) of any size, are passed indirectly via a pointer to a copy of the aggregate
 \end{itemize}
 
 \clearpage
@@ -185,6 +189,8 @@
 
 \begin{itemize}
 \item return values of pointer or integral type (\textless=\ 32 bits) are returned via the eax register
+\item for {\it non-trivial} C++ aggregates, the caller allocates space, passes pointer to it to the callee as a hidden first param
+(meaning via ecx), and callee writes return value to this space; the ptr to the aggregate is returned in eax
 \item integers and aggregates (structs, unions) \textgreater\ 32 and \textless=\ 64 bits are returned via the eax and edx registers
 \item return values \textgreater\ 64 bits (e.g. aggregates) are returned by the caller allocating the space and
 passing a pointer to the callee as a new, implicit first parameter (always via the stack, never via a register)
@@ -258,6 +264,7 @@
 \item arguments \textgreater\ 32 bits are pushed onto the stack as a sequence of dwords (never passed via registers, any respective register is skipped and not used for subsequent args)
 \item all other parameters are pushed onto the stack (as dwords)
 \item aggregates (structs, unions) are pushed as a sequence of dwords, and never passed via registers (no matter their size, any respective register is skipped and not used for subsequent args)
+\item {\it non-trivial} C++ aggregates (as defined by the language) of any size, are passed indirectly via a pointer to a copy of the aggregate
 \item varargs are always passed via the stack
 \end{itemize}
 
@@ -338,6 +345,7 @@
 \item first three integers/pointers (with exception of method pointers) (\textless=\ 32bit) are passed via eax, ecx and edx (preceding or interleaved arguments that are not passed via registers are pushed onto the stack)
 \item arguments \textgreater\ 32 bits are passed as a pointer to the value
 \item aggregates (structs, unions) are pushed as a sequence of dwords, and never passed via registers (no matter their size)
+\item {\it non-trivial} C++ aggregates (as defined by the language) of any size, are passed indirectly via a pointer to a copy of the aggregate
 \item varargs are always passed via the stack
 \item all other parameters are pushed onto the stack
 \item the direction flag is clear on entry and must be returned clear % mention it first, above @@@
@@ -348,6 +356,8 @@
 
 \begin{itemize}
 \item return values of pointer or integral type (\textless=\ 32 bits) are returned via the eax register
+\item for {\it non-trivial} C++ aggregates, the caller allocates space, passes pointer to it to the callee as a hidden first param
+(meaning via ecx), and callee writes return value to this space; the ptr to the aggregate is returned in eax
 \item integers and aggregates (structs, unions) \textgreater\ 32 and \textless=\ 64 bits are returned via the eax and edx registers
 \item floating point types are returned via the st0 register
 \item return values \textgreater\ 32 bits (e.g. aggregates, long long, ...) are returned by the caller allocating the space and
@@ -418,7 +428,8 @@
 \item called function cleans up the stack
 \item first four integers/pointers (\textless=\ 32bit) are passed via eax, edx, ebx and ecx (even if preceded by other arguments)
 \item arguments \textgreater\ 32 bits, as well as all subsequent arguments, are passed via the stack
-\item aggregates (structs, unions) are passed as a pointer to the aggregate
+\item aggregates (structs, unions) are passed as a pointer to the aggregate (a copy, if needed, to guarantee by-value semantics) 
+\item {\it non-trivial} C++ aggregates (as defined by the language) of any size, are passed indirectly via a pointer to a copy of the aggregate
 \item all other parameters are pushed onto the stack
 \end{itemize}
 
@@ -428,6 +439,8 @@
 \begin{itemize}
 \item return values of pointer or integral type (\textless=\ 32 bits) are returned via the eax register
 \item integers \textgreater\ 32 bits and \textless=\ 64 bits are returned via the eax and edx registers
+\item for {\it non-trivial} C++ aggregates, the caller allocates space, passes pointer to it to the callee via esi, and callee writes return value to
+this space; the ptr to the aggregate is returned in eax
 \item aggregates (structs, unions) \textless=\ 32 bits are returned in eax
 \item aggregates (structs, unions) \textgreater\ 32 bits are returned by the caller allocating the space and
 passing a pointer to the callee via esi, that same pointer is returned in eax
@@ -496,6 +509,7 @@
 \item all parameters are pushed onto the stack (as dwords)
 \item arguments \textgreater\ 64 bits are pushed as a sequence of dwords
 \item aggregates (structs, unions) are pushed as a sequence of dwords
+\item {\it non-trivial} C++ aggregates (as defined by the language) of any size, are passed indirectly via a pointer to a copy of the aggregate
 \item stack is usually 4 byte aligned (GCC \textgreater=\ 3.x seems to use a 16byte alignement)
 \item the direction flag is clear on entry and must be returned clear % mention it first, above @@@
 \end{itemize}
@@ -506,9 +520,9 @@
 
 \begin{itemize}
 \item return values of pointer or integral type (\textless=\ 32 bits) are returned via the eax register
-\item integers and aggregates (structs, unions) \textgreater\ 32 and \textless=\ 64 bits are returned via the eax and edx registers
-\item return values \textgreater\ 64 bits (e.g. aggregates) are returned by the caller allocating the space and
-passing a pointer to the callee as a new, implicit first parameter (this means, on the stack)
+\item integers \textgreater\ 32 and \textless=\ 64 bits are returned via the eax and edx registers
+\item for aggregates and integer return values \textgreater\ 64 bits, the caller allocates space, passes pointer to it to the callee as a hidden first param
+(meaning via stack), and callee writes return value to this space; the ptr to the aggregate is returned in eax
 \item floating point types are returned via the st0 register
 \end{itemize}
 
@@ -577,6 +591,7 @@
 \item all other parameters are pushed onto the stack
 \item arguments \textgreater\ 64 bits are pushed as a sequence of dwords
 \item aggregates (structs, unions) are pushed as a sequence of dwords
+\item {\it non-trivial} C++ aggregates (as defined by the language) of any size, are passed indirectly via a pointer to a copy of the aggregate
 \end{itemize}
 
 % introduce mangling section? \item Function name is decorated by prepending a '@' character and appending a '@' character and the number of bytes (decimal) of stack space required
@@ -736,6 +751,7 @@
 \item all parameters are pushed onto the stack (as dwords)
 \item arguments \textgreater\ 64 bits are pushed as a sequence of dwords
 \item aggregates (structs, unions) are pushed as a sequence of dwords
+\item {\it non-trivial} C++ aggregates (as defined by the language) of any size, are passed indirectly via a pointer to a copy of the aggregate
 \end{itemize}