changeset 468:79b95db3d68f

- added (only partly annotated) x64/win64 disas examples
author Tassilo Philipp
date Sun, 06 Feb 2022 17:22:59 +0100
parents b47168dacba6
children 984e6652b975
files doc/disas_examples/x64.sysv.disas doc/disas_examples/x64.win.disas
diffstat 2 files changed, 435 insertions(+), 9 deletions(-) [+]
line wrap: on
line diff
--- a/doc/disas_examples/x64.sysv.disas	Fri Feb 04 23:54:42 2022 +0100
+++ b/doc/disas_examples/x64.sysv.disas	Sun Feb 06 17:22:59 2022 +0100
@@ -6,15 +6,15 @@
 ;
 ; void nonleaf_call(int a, int b, int c, int d, int e, int f, int g, int h)
 ; {
-; 	/* use some local data */
-; 	*(char*)alloca(10) = 'L';
-; 	leaf_call(b, c, d, e, f, g, h);
+;     /* use some local data */
+;     *(char*)alloca(10) = 'L';
+;     leaf_call(b, c, d, e, f, g, h);
 ; }
 ;
 ; int main()
 ; {
-; 	nonleaf_call(0, 1, 2, 3, 4, 5, 6, 7);
-; 	return 0;
+;     nonleaf_call(0, 1, 2, 3, 4, 5, 6, 7);
+;     return 0;
 ; }
 
 
@@ -450,13 +450,13 @@
 ;
 ; struct A call(unsigned char c)
 ; {
-; 	return (struct A){c};
+;     return (struct A){c};
 ; }
 ;
 ; int main()
 ; {
-; 	struct A a = call(123);
-; 	return 0;
+;     struct A a = call(123);
+;     return 0;
 ; }
 
 
--- a/doc/disas_examples/x64.win.disas	Fri Feb 04 23:54:42 2022 +0100
+++ b/doc/disas_examples/x64.win.disas	Sun Feb 06 17:22:59 2022 +0100
@@ -1,1 +1,427 @@
-; @@@ missing
+; void leaf_call(int b, int c, int d, int e, int f, int g, int h)
+; {
+; }
+;
+; void nonleaf_call(int a, int b, int c, int d, int e, int f, int g, int h)
+; {
+;     /* use some local data */
+;     char x[10] = { 'L' };
+;     leaf_call(b, c, d, e, f, g, h);
+; }
+;
+; int main()
+; {
+;     nonleaf_call(0, 1, 2, 3, 4, 5, 6, 7);
+;     return 0;
+; }
+
+
+
+; output from godbolt compiler explorer w/ msvc 19.0
+
+leaf_call PROC
+        mov     DWORD PTR [rsp+32], r9d
+        mov     DWORD PTR [rsp+24], r8d
+        mov     DWORD PTR [rsp+16], edx
+        mov     DWORD PTR [rsp+8], ecx
+        ret     0
+leaf_call ENDP
+
+x$ = 64
+b$ = 104
+c$ = 112
+d$ = 120
+e$ = 128
+f$ = 136
+g$ = 144
+h$ = 152
+nonleaf_call PROC
+        mov     DWORD PTR [rsp+32], r9d   ; | |
+        mov     DWORD PTR [rsp+24], r8d   ; | |
+        mov     DWORD PTR [rsp+16], edx   ; | | spill
+        mov     DWORD PTR [rsp+8], ecx    ; | |
+        push    rdi                       ; |
+        sub     rsp, 80                   ; | prolog
+        mov     BYTE PTR x$[rsp], 76      ; 'L' -> local area (of local array's space)
+        lea     rax, QWORD PTR x$[rsp+1]  ; |
+        mov     rdi, rax                  ; |
+        xor     eax, eax                  ; | zero-init rest of local array's space
+        mov     ecx, 9                    ; |
+        rep stosb                         ; |
+        mov     eax, DWORD PTR h$[rsp]    ; arg 6 (fetched from caller's frame param area), and ...
+        mov     DWORD PTR [rsp+48], eax   ; ... "pushed" onto stack
+        mov     eax, DWORD PTR g$[rsp]    ; arg 5 (fetched from caller's frame param area), and ...
+        mov     DWORD PTR [rsp+40], eax   ; ... "pushed" onto stack
+        mov     eax, DWORD PTR f$[rsp]    ; arg 4 (fetched from caller's frame param area), and ...
+        mov     DWORD PTR [rsp+32], eax   ; ... "pushed" onto stack
+        mov     r9d, DWORD PTR e$[rsp]    ; arg 3
+        mov     r8d, DWORD PTR d$[rsp]    ; arg 2
+        mov     edx, DWORD PTR c$[rsp]    ; arg 1
+        mov     ecx, DWORD PTR b$[rsp]    ; arg 0
+        call    leaf_call                 ; push return addr and call
+        add     rsp, 80                   ; |
+        pop     rdi                       ; | epilog
+        ret     0                         ; |
+nonleaf_call ENDP
+
+main    PROC
+        sub     rsp, 72                   ; prolog
+        mov     DWORD PTR [rsp+56], 7     ; "push" arg 7 onto stack
+        mov     DWORD PTR [rsp+48], 6     ; "push" arg 6 onto stack
+        mov     DWORD PTR [rsp+40], 5     ; "push" arg 5 onto stack
+        mov     DWORD PTR [rsp+32], 4     ; "push" arg 4 onto stack
+        mov     r9d, 3                    ; arg 3
+        mov     r8d, 2                    ; arg 2
+        mov     edx, 1                    ; arg 1
+        xor     ecx, ecx                  ; arg 0
+        call    nonleaf_call              ; push return addr and call
+        xor     eax, eax                  ; return value
+        add     rsp, 72                   ; |
+        ret     0                         ; | epilog
+main    ENDP
+
+
+
+; ---------- structs by value, struct in first call on reg arg boundary ---------->
+;
+; struct A { int i, j; long long l; };
+;
+; void leaf_call(int b, int c, int d, int e, struct A f, int g, int h)
+; {
+; }
+;
+; void nonleaf_call(int a, int b, int c, int d, int e, struct A f, int g, int h)
+; {
+;     /* use some local data */
+;     char x[220] = { 'L' };
+;     leaf_call(b, c, d, e, f, g, h);
+; }
+;
+; int main()
+; {
+;     struct A a = {5, 6, 7ll};
+;     nonleaf_call(0, 1, 2, 3, 4, a, 8, 9);
+;     return 0;
+; }
+
+
+
+; output from godbolt compiler explorer w/ msvc 19.0
+
+leaf_call PROC
+        mov     DWORD PTR [rsp+32], r9d
+        mov     DWORD PTR [rsp+24], r8d
+        mov     DWORD PTR [rsp+16], edx
+        mov     DWORD PTR [rsp+8], ecx
+        ret     0
+leaf_call ENDP
+
+$T1 = 64
+x$ = 80
+b$ = 344
+c$ = 352
+d$ = 360
+e$ = 368
+f$ = 376
+g$ = 384
+h$ = 392
+nonleaf_call PROC
+        mov     DWORD PTR [rsp+32], r9d
+        mov     DWORD PTR [rsp+24], r8d
+        mov     DWORD PTR [rsp+16], edx
+        mov     DWORD PTR [rsp+8], ecx
+        push    rsi
+        push    rdi
+        sub     rsp, 312                      ; 00000138H
+        mov     BYTE PTR x$[rsp], 76                  ; 0000004cH
+        lea     rax, QWORD PTR x$[rsp+1]
+        mov     rdi, rax
+        xor     eax, eax
+        mov     ecx, 219                      ; 000000dbH
+        rep stosb
+        lea     rax, QWORD PTR $T1[rsp]
+        mov     rdi, rax
+        mov     rsi, QWORD PTR f$[rsp]
+        mov     ecx, 16
+        rep movsb
+        mov     eax, DWORD PTR h$[rsp]
+        mov     DWORD PTR [rsp+48], eax
+        mov     eax, DWORD PTR g$[rsp]
+        mov     DWORD PTR [rsp+40], eax
+        lea     rax, QWORD PTR $T1[rsp]
+        mov     QWORD PTR [rsp+32], rax
+        mov     r9d, DWORD PTR e$[rsp]
+        mov     r8d, DWORD PTR d$[rsp]
+        mov     edx, DWORD PTR c$[rsp]
+        mov     ecx, DWORD PTR b$[rsp]
+        call    leaf_call
+        add     rsp, 312                      ; 00000138H
+        pop     rdi
+        pop     rsi
+        ret     0
+nonleaf_call ENDP
+
+a$ = 64
+$T1 = 80
+main    PROC
+        push    rsi
+        push    rdi
+        sub     rsp, 104                      ; 00000068H
+        mov     DWORD PTR a$[rsp], 5
+        mov     DWORD PTR a$[rsp+4], 6
+        mov     QWORD PTR a$[rsp+8], 7
+        lea     rax, QWORD PTR $T1[rsp]
+        lea     rcx, QWORD PTR a$[rsp]
+        mov     rdi, rax
+        mov     rsi, rcx
+        mov     ecx, 16
+        rep movsb
+        mov     DWORD PTR [rsp+56], 9
+        mov     DWORD PTR [rsp+48], 8
+        lea     rax, QWORD PTR $T1[rsp]
+        mov     QWORD PTR [rsp+40], rax
+        mov     DWORD PTR [rsp+32], 4
+        mov     r9d, 3
+        mov     r8d, 2
+        mov     edx, 1
+        xor     ecx, ecx
+        call    nonleaf_call
+        xor     eax, eax
+        add     rsp, 104                      ; 00000068H
+        pop     rdi
+        pop     rsi
+        ret     0
+main    ENDP
+
+
+
+; ---------- structs by value, complex example (multiple structs, partly passed via regs) ---------->
+;
+; struct A { int i, j; float f; };
+; struct B { double d; long long l; };
+;
+; void leaf_call(int b, struct A c, struct B d, int e, int f, struct A g, struct B h, int i, int j)
+; {
+; }
+;
+; void nonleaf_call(int a, int b, struct A c, struct B d, int e, int f, struct A g, struct B h, int i, int j)
+; {
+;     /* use some local data */
+;     char x[220] = { 'L' };
+;     leaf_call(b, c, d, e, f, g, h, i, j);
+; }
+;
+; int main()
+; {
+;     struct A a = {2, 3, 4.f};
+;     struct B b = {5., 6ll};
+;     struct A c = {9, 10, 11.f};
+;     struct B d = {12., 13ll};
+;     nonleaf_call(0, 1, a, b, 7, 8, c, d, 14, 15);
+;     return 0;
+; }
+
+
+
+; output from godbolt compiler explorer w/ msvc 19.0
+
+__real@41300000 DD 041300000r                   ; |
+__real@40800000 DD 040800000r                   ; |
+__real@4028000000000000 DQ 04028000000000000r   ; | fp data
+__real@4014000000000000 DQ 04014000000000000r   ; |
+
+leaf_call PROC
+        mov     DWORD PTR [rsp+32], r9d
+        mov     QWORD PTR [rsp+24], r8
+        mov     QWORD PTR [rsp+16], rdx
+        mov     DWORD PTR [rsp+8], ecx
+        ret     0
+leaf_call ENDP
+
+$T1 = 80
+$T2 = 96
+$T3 = 112
+$T4 = 128
+x$ = 144
+b$ = 408
+c$ = 416
+d$ = 424
+e$ = 432
+f$ = 440
+g$ = 448
+h$ = 456
+i$ = 464
+j$ = 472
+nonleaf_call PROC
+        mov     QWORD PTR [rsp+32], r9
+        mov     QWORD PTR [rsp+24], r8
+        mov     DWORD PTR [rsp+16], edx
+        mov     DWORD PTR [rsp+8], ecx
+        push    rsi
+        push    rdi
+        sub     rsp, 376
+        mov     BYTE PTR x$[rsp], 76
+        lea     rax, QWORD PTR x$[rsp+1]
+        mov     rdi, rax
+        xor     eax, eax
+        mov     ecx, 219
+        rep stosb
+        lea     rax, QWORD PTR $T3[rsp]
+        mov     rdi, rax
+        mov     rsi, QWORD PTR h$[rsp]
+        mov     ecx, 16
+        rep movsb
+        lea     rax, QWORD PTR $T1[rsp]
+        mov     rdi, rax
+        mov     rsi, QWORD PTR g$[rsp]
+        mov     ecx, 12
+        rep movsb
+        lea     rax, QWORD PTR $T4[rsp]
+        mov     rdi, rax
+        mov     rsi, QWORD PTR d$[rsp]
+        mov     ecx, 16
+        rep movsb
+        lea     rax, QWORD PTR $T2[rsp]
+        mov     rdi, rax
+        mov     rsi, QWORD PTR c$[rsp]
+        mov     ecx, 12
+        rep movsb
+        mov     eax, DWORD PTR j$[rsp]
+        mov     DWORD PTR [rsp+64], eax
+        mov     eax, DWORD PTR i$[rsp]
+        mov     DWORD PTR [rsp+56], eax
+        lea     rax, QWORD PTR $T3[rsp]
+        mov     QWORD PTR [rsp+48], rax
+        lea     rax, QWORD PTR $T1[rsp]
+        mov     QWORD PTR [rsp+40], rax
+        mov     eax, DWORD PTR f$[rsp]
+        mov     DWORD PTR [rsp+32], eax
+        mov     r9d, DWORD PTR e$[rsp]
+        lea     r8, QWORD PTR $T4[rsp]
+        lea     rdx, QWORD PTR $T2[rsp]
+        mov     ecx, DWORD PTR b$[rsp]
+        call    leaf_call
+        add     rsp, 376
+        pop     rdi
+        pop     rsi
+        ret     0
+nonleaf_call ENDP
+
+c$ = 80
+a$ = 96
+$T1 = 112
+$T2 = 128
+d$ = 144
+b$ = 160
+$T3 = 176
+$T4 = 192
+main    PROC
+        push    rsi
+        push    rdi
+        sub     rsp, 216
+        mov     DWORD PTR a$[rsp], 2
+        mov     DWORD PTR a$[rsp+4], 3
+        movss   xmm0, DWORD PTR __real@40800000
+        movss   DWORD PTR a$[rsp+8], xmm0
+        movsd   xmm0, QWORD PTR __real@4014000000000000
+        movsd   QWORD PTR b$[rsp], xmm0
+        mov     QWORD PTR b$[rsp+8], 6
+        mov     DWORD PTR c$[rsp], 9
+        mov     DWORD PTR c$[rsp+4], 10
+        movss   xmm0, DWORD PTR __real@41300000
+        movss   DWORD PTR c$[rsp+8], xmm0
+        movsd   xmm0, QWORD PTR __real@4028000000000000
+        movsd   QWORD PTR d$[rsp], xmm0
+        mov     QWORD PTR d$[rsp+8], 13
+        lea     rax, QWORD PTR $T3[rsp]
+        lea     rcx, QWORD PTR d$[rsp]
+        mov     rdi, rax
+        mov     rsi, rcx
+        mov     ecx, 16
+        rep movsb
+        lea     rax, QWORD PTR $T1[rsp]
+        lea     rcx, QWORD PTR c$[rsp]
+        mov     rdi, rax
+        mov     rsi, rcx
+        mov     ecx, 12
+        rep movsb
+        lea     rax, QWORD PTR $T4[rsp]
+        lea     rcx, QWORD PTR b$[rsp]
+        mov     rdi, rax
+        mov     rsi, rcx
+        mov     ecx, 16
+        rep movsb
+        lea     rax, QWORD PTR $T2[rsp]
+        lea     rcx, QWORD PTR a$[rsp]
+        mov     rdi, rax
+        mov     rsi, rcx
+        mov     ecx, 12
+        rep movsb
+        mov     DWORD PTR [rsp+72], 15
+        mov     DWORD PTR [rsp+64], 14
+        lea     rax, QWORD PTR $T3[rsp]
+        mov     QWORD PTR [rsp+56], rax
+        lea     rax, QWORD PTR $T1[rsp]
+        mov     QWORD PTR [rsp+48], rax
+        mov     DWORD PTR [rsp+40], 8
+        mov     DWORD PTR [rsp+32], 7
+        lea     r9, QWORD PTR $T4[rsp]
+        lea     r8, QWORD PTR $T2[rsp]
+        mov     edx, 1
+        xor     ecx, ecx
+        call    nonleaf_call
+        xor     eax, eax
+        add     rsp, 216
+        pop     rdi
+        pop     rsi
+        ret     0
+main    ENDP
+
+
+
+; ---------- returning tiny struct by value (passes via regs) ---------->
+;
+; struct A { unsigned char a; };
+;
+; struct A call(unsigned char c)
+; {
+;     struct A r = {c};
+;     return r;
+; }
+;
+; int main()
+; {
+;     struct A a = call(123);
+;     return 0;
+; }
+
+
+
+; output from godbolt compiler explorer w/ msvc 19.0
+
+r$ = 0
+c$ = 32
+call    PROC
+        mov     BYTE PTR [rsp+8], cl   ; |         spill
+        sub     rsp, 24                ; | prolog
+        movzx   eax, BYTE PTR c$[rsp]  ; in arg 0 (fetched from spill area) -> eax, then ...
+        mov     BYTE PTR r$[rsp], al   ; ... -> struct in local area (top of stack, as leaf call and thus no reserved spill area)
+        movzx   eax, BYTE PTR r$[rsp]  ; reget same value into eax to return (small) struct via reg (a bit pointless to refetch)
+        add     rsp, 24                ; |
+        ret     0                      ; | epilog
+call    ENDP
+
+a$ = 32
+main    PROC
+        sub     rsp, 56                ; prolog
+        mov     cl, 123                ; arg 0
+        call    call                   ; push return addr and call
+        mov     BYTE PTR a$[rsp], al   ; write struct data to local area (123)
+        xor     eax, eax               ; return value
+        add     rsp, 56                ; |
+        ret     0                      ; | epilog
+main    ENDP
+
+
+; vim: ft=asm
+