# HG changeset patch
# User Tassilo Philipp
# Date 1647870398 -3600
# Node ID 75cb8f79d7258d2c011d1afacb254fe00e7a5d8d
# Parent  29d09d10ecd9ca2e846d7a4fe43e0d617c4a6082
- doc and disas examples update about C++ non-trivial aggregates

diff -r 29d09d10ecd9 -r 75cb8f79d725 doc/disas_examples/x64.sysv.disas
--- a/doc/disas_examples/x64.sysv.disas	Mon Mar 21 10:04:10 2022 +0100
+++ b/doc/disas_examples/x64.sysv.disas	Mon Mar 21 14:46:38 2022 +0100
@@ -514,5 +514,79 @@
   4007cf:       c3                      retq                       ; | epilog
 
 
+
+; ---------- C++ trivial and non-trivial aggrs passed to C funcs ---------->
+;
+; struct Trivial { int a; };
+; struct NonTrivial { int a; NonTrivial() : a(0) {} NonTrivial(const NonTrivial& rhs) : a(rhs.a) { } };
+;
+; extern "C" {
+; 
+;     void f1(struct Trivial s)    { }
+;     void f2(struct NonTrivial s) { }
+;
+;     void f()
+;     {
+;         struct Trivial t;
+;         struct NonTrivial n;
+;         int a=1;
+;         a += 123;
+;         f1(t);
+;         a -= 123;
+;         f2(n);
+;         a -= 12;
+;     }
+; }
+
+
+
+; output from freebsd-12.2-x64 w/ clang 10.0.1
+
+0000000000000000 <f1>:
+   0:   55                      push   %rbp             ; | prolog
+   1:   48 89 e5                mov    %rsp,%rbp        ; |
+   4:   89 7d f8                mov    %edi,-0x8(%rbp)  ; local copy of trivial struct
+   7:   5d                      pop    %rbp             ; |
+   8:   c3                      retq                    ; | epilog
+
+0000000000000010 <f2>:
+  10:   55                      push   %rbp             ; | prolog
+  11:   48 89 e5                mov    %rsp,%rbp        ; /
+  14:   5d                      pop    %rbp             ; \         note: no local copy as non-trivial
+  15:   c3                      retq                    ; | epilog
+
+0000000000000020 <f>:
+  20:   55                      push   %rbp             ; |
+  21:   48 89 e5                mov    %rsp,%rbp        ; | prolog
+  24:   48 83 ec 20             sub    $0x20,%rsp       ; /
+  28:   48 8d 7d f0             lea    -0x10(%rbp),%rdi ; \ this ptr (NULL)
+  2c:   e8 00 00 00 00          callq  31 <f+0x11>      ; | NonTrivial::NonTrivial() / ctor
+  31:   c7 45 ec 01 00 00 00    movl   $0x1,-0x14(%rbp) ; a = 1
+  38:   8b 45 ec                mov    -0x14(%rbp),%eax ; |
+  3b:   83 c0 7b                add    $0x7b,%eax       ; | a += 123
+  3e:   89 45 ec                mov    %eax,-0x14(%rbp) ; /
+  41:   8b 45 f8                mov    -0x8(%rbp),%eax  ; \
+  44:   89 45 e8                mov    %eax,-0x18(%rbp) ; / local copy of t (struct Trivial)
+  47:   8b 7d e8                mov    -0x18(%rbp),%edi ; f1 arg 0 (struct Trivial), via reg as small struct
+  4a:   e8 00 00 00 00          callq  4f <f+0x2f>      ; call f1(struct Trivial)
+  4f:   8b 45 ec                mov    -0x14(%rbp),%eax ; |
+  52:   83 e8 7b                sub    $0x7b,%eax       ; | a -= 123
+  55:   89 45 ec                mov    %eax,-0x14(%rbp) ; /
+  58:   48 8d 7d e0             lea    -0x20(%rbp),%rdi ; \               ptr to dest of copy of n
+  5c:   48 8d 75 f0             lea    -0x10(%rbp),%rsi ; | copy n        ptr to n
+  60:   e8 00 00 00 00          callq  65 <f+0x45>      ; /               NonTrivial::NonTrivial(const NonTrivial&) / copy ctor
+  65:   48 8d 7d e0             lea    -0x20(%rbp),%rdi ; f2 arg 0 (ptr to copy of struct NonTrivial), via ptr as non-trivial
+  69:   e8 00 00 00 00          callq  6e <f+0x4e>      ; call f2(struct NonTrivial)
+  6e:   8b 45 ec                mov    -0x14(%rbp),%eax ; |
+  71:   83 e8 0c                sub    $0xc,%eax        ; | a -= 12
+  74:   89 45 ec                mov    %eax,-0x14(%rbp) ; /
+  77:   48 83 c4 20             add    $0x20,%rsp       ; \
+  7b:   5d                      pop    %rbp             ; | epilog
+  7c:   c3                      retq                    ; |
+
+  ; ... snip, removed code of ctor and copy ctor ...
+
+
+
 ; vim: ft=asm
 
diff -r 29d09d10ecd9 -r 75cb8f79d725 doc/disas_examples/x64.win.disas
--- a/doc/disas_examples/x64.win.disas	Mon Mar 21 10:04:10 2022 +0100
+++ b/doc/disas_examples/x64.win.disas	Mon Mar 21 14:46:38 2022 +0100
@@ -423,5 +423,80 @@
 main    ENDP
 
 
+
+; ---------- C++ trivial and non-trivial aggrs passed to C funcs ---------->
+;
+; struct Trivial { int a; };
+; struct NonTrivial { int a; NonTrivial() : a(0) {} NonTrivial(const NonTrivial& rhs) : a(rhs.a) { } };
+;
+; extern "C" {
+; 
+;     void f1(struct Trivial s)    { }
+;     void f2(struct NonTrivial s) { }
+;
+;     void f()
+;     {
+;         struct Trivial t;
+;         struct NonTrivial n;
+;         int a=1;
+;         a += 123;
+;         f1(t);
+;         a -= 123;
+;         f2(n);
+;         a -= 12;
+;     }
+; }
+
+
+
+; output from godbolt compiler explorer w/ msvc 19.0
+
+        ; ... snip, removed code of ctor and copy ctor ...
+
+f1      PROC
+        mov     DWORD PTR [rsp+8], ecx
+        ret     0
+f1      ENDP
+
+f2      PROC
+        mov     QWORD PTR [rsp+8], rcx
+        ret     0
+f2      ENDP
+
+a$ = 32
+n$ = 36
+t$ = 40
+$T1 = 44
+$T2 = 48
+f       PROC
+$LN3:
+        sub     rsp, 72                                    ; prolog
+        lea     rcx, QWORD PTR n$[rsp]                     ; \ this ptr (NULL)
+        call    NonTrivial::NonTrivial(void)               ; | NonTrivial::NonTrivial() / ctor
+        mov     DWORD PTR a$[rsp], 1                       ; a = 1
+        mov     eax, DWORD PTR a$[rsp]                     ; |
+        add     eax, 123                                   ; | a += 123
+        mov     DWORD PTR a$[rsp], eax                     ; /
+        mov     ecx, DWORD PTR t$[rsp]                     ; f1 arg 0 (struct Trivial), via reg as small struct
+        call    f1                                         ; call f1(struct Trivial)
+        mov     eax, DWORD PTR a$[rsp]                     ; |
+        sub     eax, 123                                   ; | a -= 123
+        mov     DWORD PTR a$[rsp], eax                     ; /
+        lea     rax, QWORD PTR $T1[rsp]                    ; @@@ unsure
+        mov     QWORD PTR $T2[rsp], rax                    ; ... @@@
+        lea     rdx, QWORD PTR n$[rsp]                     ; \               ptr to dest of copy of n
+        mov     rcx, QWORD PTR $T2[rsp]                    ; | copy n        ptr to n
+        call    NonTrivial::NonTrivial(NonTrivial const &) ; /               NonTrivial::NonTrivial(const NonTrivial&) / copy ctor
+        mov     rcx, rax                                   ; f2 arg 0 (ptr to copy of struct NonTrivial), via ptr as non-trivial
+        call    f2                                         ; call f2(struct NonTrivial)
+        mov     eax, DWORD PTR a$[rsp]                     ; |
+        sub     eax, 12                                    ; | a -= 12
+        mov     DWORD PTR a$[rsp], eax                     ; /
+        add     rsp, 72                                    ; \
+        ret     0                                          ; | epilog
+f       ENDP
+
+
+
 ; vim: ft=asm
 
diff -r 29d09d10ecd9 -r 75cb8f79d725 doc/manual/callconvs/callconv_arm32.tex
--- a/doc/manual/callconvs/callconv_arm32.tex	Mon Mar 21 10:04:10 2022 +0100
+++ b/doc/manual/callconvs/callconv_arm32.tex	Mon Mar 21 14:46:38 2022 +0100
@@ -233,14 +233,6 @@
 have to be skipped for parameters passed via registers, or 4-bytes on the stack
 for parameters passed via the stack. Refer to the Debian ARM EABI port wiki
 for more information \cite{armeabi}.\\
-\\
-\paragraph{Status}
-
-\begin{itemize}
-\item The EABI THUMB mode is tested and works fine (contrary to the ATPCS).
-\item Ellipsis calls do not work.
-\item C++ this calls do not work.
-\end{itemize}
 
 
 \clearpage
diff -r 29d09d10ecd9 -r 75cb8f79d725 doc/manual/callconvs/callconv_x64.tex
--- a/doc/manual/callconvs/callconv_x64.tex	Mon Mar 21 10:04:10 2022 +0100
+++ b/doc/manual/callconvs/callconv_x64.tex	Mon Mar 21 14:46:38 2022 +0100
@@ -84,6 +84,7 @@
 \item caller cleans up the stack
 \item first 4 integer/pointer parameters are passed via rcx, rdx, r8, r9 (from left to right), others are pushed on stack (there is a
 spill area for the first 4)
+\item {\it non-trivial} C++ aggregates (as defined by the language), are passed indirectly via a pointer to a copy of the aggregate, no matter the size
 \item aggregates (structs and unions) \textless\ 64 bits are passed like equal-sized integers
 \item float and double parameters are passed via xmm0l-xmm3l
 \item first 4 parameters are passed via the correct register depending on the parameter type - with mixed float and int parameters,
@@ -91,7 +92,7 @@
 \item parameters in registers are right justified
 \item parameters \textless\ 64bits are not zero extended - zero the upper bits contiaining garbage if needed (but they are always
 passed as a qword)
-\item parameters \textgreater\ 64 bits are passed by reference (for aggregate types, that caller-allocated memory must be 16-byte aligned)
+\item parameters \textgreater\ 64 bits are passed by via a pointer to a copy (for aggregate types, that caller-allocated memory must be 16-byte aligned)
 \item if callee takes address of a parameter, first 4 parameters must be dumped (to the reserved space on the stack) - for
 floating point parameters, value must be stored in integer AND floating point register
 \item caller cleans up the stack, not the callee (like cdecl)
@@ -108,7 +109,8 @@
 \begin{itemize}
 \item return values of pointer, integral or aggregate (structs and unions) type (\textless=\ 64 bits) are returned via the rax register
 \item floating point types are returned via the xmm0 register
-\item for any other type \textgreater\ 64 bits, a hidden first parameter, with an address to the return value is passed (for C++ thiscalls it is passed as {\bf second} parameter, after the this pointer)
+\item for any other type \textgreater\ 64 bits (or for {\it non-trivial} C++ aggregates of any size), a hidden first parameter, with an address to the
+return value is passed (for C++ thiscalls it is passed as {\bf second} parameter, after the this pointer)
 \end{itemize}
 
 
@@ -195,8 +197,8 @@
 exact but an upper bound on the number of used xmm registers)
 \item aggregates (structs, unions (and arrays within those)) follow a more complicated logic (the following {\bf only considers field types supported by dyncall}):
 \begin{itemize}
+  \item {\it non-trivial} C++ aggregates (as defined by the language), are passed indirectly via a pointer to a copy of the aggregate, no matter the size
   \item aggregates \textgreater\ 16 bytes are always passed entirely via the stack
-  \item for {\it non-trivial} (as defined by the language) C++ aggregates, a pointer to the aggregate is passed, instead
   \item all other aggregates are classified per qword, by looking at all fields occupying all or part of that qword, recursively
   \begin{itemize}
     \item if any field would be passed via the stack, the entire qword will
@@ -224,11 +226,10 @@
 \item floating point types are returned via the xmm0 register (and xmm1 if needed)
 \item aggregates are first classified in the same way as when passing them by value, then:
 \begin{itemize}
-  \item for aggregates that would be passed via the stack, a hidden pointer to a non-shared, caller provided space is {\bf passed} as hidden, first argument; this pointer will be returned via rax
+  \item for aggregates that would be passed via the stack (or for {\it non-trivial} C++ aggregates of any size), a hidden pointer to a non-shared,
+  caller provided space is {\bf passed} as hidden, first argument; this pointer will be returned via rax
   \item otherwise, qword by qword is passed, using rax and rdx for integer/pointer qwords, and xmm0 and xmm1 for floating point ones
 \end{itemize}
-\item for aggregates \textgreater\ 128 bits, a secret first parameter with an address to the return value is
-passed (via rdi) - this passed in address will be returned in rax
 \item floating point values \textgreater\ 64 bits are returned via st0 and st1
 \end{itemize}