changeset 475:5be9f5ccdd35

- doc: ppc64 clarifications
author Tassilo Philipp
date Sat, 19 Feb 2022 19:27:22 +0100
parents c9e19249ecd3
children c73c59c8b553
files doc/disas_examples/sparc64.sparc64.disas doc/manual/callconvs/callconv_sparc64.tex
diffstat 2 files changed, 269 insertions(+), 3 deletions(-) [+]
line wrap: on
line diff
--- a/doc/disas_examples/sparc64.sparc64.disas	Wed Feb 16 19:26:21 2022 +0100
+++ b/doc/disas_examples/sparc64.sparc64.disas	Sat Feb 19 19:27:22 2022 +0100
@@ -1000,7 +1000,6 @@
   60:   ae 05 e0 00     add  %l7, 0, %l7        ! 0 <leaf_call> ; | @@@ unsure, call to some code stub adding o7 to l7
   64:   7f ff ff fa     call  4c <leaf_call+0x4c>               ; |
   68:   01 00 00 00     nop                                     ; /
-;     leaf_call((struct A){0,1.f}, (struct B){2.f,3}, (struct C){4.f,5,6}, (struct D){7,8.f});
   6c:   c0 27 a7 df     clr  [ %fp + 0x7df ]                    ; \
   70:   03 00 00 00     sethi  %hi(0), %g1                      ; |
   74:   82 10 60 00     mov  %g1, %g1   ! 0 <leaf_call>         ; |
@@ -1056,6 +1055,270 @@
 
 
 
+; ---------- passing structs with only fp parts ---------->
+;
+; struct A { float a; };
+; struct B { float a, b; };
+; struct C { float a, b, c; };
+; struct D { double a; };
+; struct E { double a, b; };
+; struct F { double a, b, c; };
+;
+; void leaf_call(struct A a, struct B b, struct C c, struct D d, struct E e, struct F f)
+; {
+; }
+;
+; int main()
+; {
+;     leaf_call((struct A){1.f}, (struct B){2.f,3.f}, (struct C){4.f,5.f,6.f}, (struct D){1.}, (struct E){2.,3.}, (struct F){4.,5.,6.});
+;     return 0;
+; }
+
+
+
+; output from openbsd-6.0-sparc64 w/ gcc 4.2.1
+
+0000000000000000 <leaf_call>:
+   0:   9d e3 bf 30     save  %sp, -208, %sp
+   4:   c1 27 a8 7f     st  %f0, [ %fp + 0x87f ]
+   8:   9d a0 00 22     fmovs  %f2, %f14
+   c:   9f a0 00 23     fmovs  %f3, %f15
+  10:   a1 a0 00 24     fmovs  %f4, %f16
+  14:   a3 a0 00 25     fmovs  %f5, %f17
+  18:   a5 a0 00 26     fmovs  %f6, %f18
+  1c:   d1 3f a8 9f     std  %f8, [ %fp + 0x89f ]
+  20:   91 a0 00 4a     fmovd  %f10, %f8
+  24:   95 a0 00 4c     fmovd  %f12, %f10
+  28:   d1 3f a8 a7     std  %f8, [ %fp + 0x8a7 ]
+  2c:   d5 3f a8 af     std  %f10, [ %fp + 0x8af ]
+  30:   dd 27 a8 87     st  %f14, [ %fp + 0x887 ]
+  34:   df 27 a8 8b     st  %f15, [ %fp + 0x88b ]
+  38:   e1 27 a8 8f     st  %f16, [ %fp + 0x88f ]
+  3c:   e3 27 a8 93     st  %f17, [ %fp + 0x893 ]
+  40:   e5 27 a8 97     st  %f18, [ %fp + 0x897 ]
+  44:   81 cf e0 08     rett  %i7 + 8
+  48:   01 00 00 00     nop
+  4c:   ae 03 c0 17     add  %o7, %l7, %l7
+  50:   81 c3 e0 08     retl
+  54:   01 00 00 00     nop
+
+0000000000000058 <main>:
+  58:   9d e3 be b0     save  %sp, -336, %sp                    ; prolog
+  5c:   2f 00 00 00     sethi  %hi(0), %l7                      ; |
+  60:   ae 05 e0 00     add  %l7, 0, %l7        ! 0 <leaf_call> ; | @@@ unsure, call to some code stub adding o7 to l7
+  64:   7f ff ff fa     call  4c <leaf_call+0x4c>               ; |
+  68:   01 00 00 00     nop                                     ; /
+  6c:   03 00 00 00     sethi  %hi(0), %g1                      ; \
+  70:   82 10 60 00     mov  %g1, %g1   ! 0 <leaf_call>         ; |
+  74:   c2 5d c0 01     ldx  [ %l7 + %g1 ], %g1                 ; | put together local struct A
+  78:   d1 00 40 00     ld  [ %g1 ], %f8                        ; |
+  7c:   d1 27 a7 e3     st  %f8, [ %fp + 0x7e3 ]                ; /
+  80:   03 00 00 00     sethi  %hi(0), %g1                      ; \
+  84:   82 10 60 00     mov  %g1, %g1   ! 0 <leaf_call>         ; |
+  88:   c2 5d c0 01     ldx  [ %l7 + %g1 ], %g1                 ; |
+  8c:   d1 00 40 00     ld  [ %g1 ], %f8                        ; |
+  90:   d1 27 a7 db     st  %f8, [ %fp + 0x7db ]                ; |
+  94:   03 00 00 00     sethi  %hi(0), %g1                      ; | put together local struct B
+  98:   82 10 60 00     mov  %g1, %g1   ! 0 <leaf_call>         ; |
+  9c:   c2 5d c0 01     ldx  [ %l7 + %g1 ], %g1                 ; |
+  a0:   d1 00 40 00     ld  [ %g1 ], %f8                        ; |
+  a4:   d1 27 a7 df     st  %f8, [ %fp + 0x7df ]                ; /
+  a8:   03 00 00 00     sethi  %hi(0), %g1                      ; \
+  ac:   82 10 60 00     mov  %g1, %g1   ! 0 <leaf_call>         ; |
+  b0:   c2 5d c0 01     ldx  [ %l7 + %g1 ], %g1                 ; |
+  b4:   d1 00 40 00     ld  [ %g1 ], %f8                        ; |
+  b8:   d1 27 a7 c3     st  %f8, [ %fp + 0x7c3 ]                ; |
+  bc:   03 00 00 00     sethi  %hi(0), %g1                      ; |
+  c0:   82 10 60 00     mov  %g1, %g1   ! 0 <leaf_call>         ; |
+  c4:   c2 5d c0 01     ldx  [ %l7 + %g1 ], %g1                 ; |
+  c8:   d1 00 40 00     ld  [ %g1 ], %f8                        ; | put together local struct C
+  cc:   d1 27 a7 c7     st  %f8, [ %fp + 0x7c7 ]                ; |
+  d0:   03 00 00 00     sethi  %hi(0), %g1                      ; |
+  d4:   82 10 60 00     mov  %g1, %g1   ! 0 <leaf_call>         ; |
+  d8:   c2 5d c0 01     ldx  [ %l7 + %g1 ], %g1                 ; |
+  dc:   d1 00 40 00     ld  [ %g1 ], %f8                        ; |
+  e0:   d1 27 a7 cb     st  %f8, [ %fp + 0x7cb ]                ; /
+  e4:   03 00 00 00     sethi  %hi(0), %g1                      ; \
+  e8:   82 10 60 00     mov  %g1, %g1   ! 0 <leaf_call>         ; |
+  ec:   c2 5d c0 01     ldx  [ %l7 + %g1 ], %g1                 ; | put together local struct D
+  f0:   d1 18 40 00     ldd  [ %g1 ], %f8                       ; |
+  f4:   d1 3f a7 cf     std  %f8, [ %fp + 0x7cf ]               ; /
+  f8:   03 00 00 00     sethi  %hi(0), %g1                      ; \
+  fc:   82 10 60 00     mov  %g1, %g1   ! 0 <leaf_call>         ; |
+ 100:   c2 5d c0 01     ldx  [ %l7 + %g1 ], %g1                 ; |
+ 104:   d1 18 40 00     ldd  [ %g1 ], %f8                       ; |
+ 108:   d1 3f a7 af     std  %f8, [ %fp + 0x7af ]               ; |
+ 10c:   03 00 00 00     sethi  %hi(0), %g1                      ; | put together local struct E
+ 110:   82 10 60 00     mov  %g1, %g1   ! 0 <leaf_call>         ; |
+ 114:   c2 5d c0 01     ldx  [ %l7 + %g1 ], %g1                 ; |
+ 118:   d1 18 40 00     ldd  [ %g1 ], %f8                       ; |
+ 11c:   d1 3f a7 b7     std  %f8, [ %fp + 0x7b7 ]               ; /
+ 120:   03 00 00 00     sethi  %hi(0), %g1                      ; \
+ 124:   82 10 60 00     mov  %g1, %g1   ! 0 <leaf_call>         ; |
+ 128:   c2 5d c0 01     ldx  [ %l7 + %g1 ], %g1                 ; |
+ 12c:   d1 18 40 00     ldd  [ %g1 ], %f8                       ; |
+ 130:   d1 3f a7 97     std  %f8, [ %fp + 0x797 ]               ; |
+ 134:   03 00 00 00     sethi  %hi(0), %g1                      ; |
+ 138:   82 10 60 00     mov  %g1, %g1   ! 0 <leaf_call>         ; |
+ 13c:   c2 5d c0 01     ldx  [ %l7 + %g1 ], %g1                 ; |
+ 140:   d1 18 40 00     ldd  [ %g1 ], %f8                       ; | put together local struct F
+ 144:   d1 3f a7 9f     std  %f8, [ %fp + 0x79f ]               ; |
+ 148:   03 00 00 00     sethi  %hi(0), %g1                      ; |
+ 14c:   82 10 60 00     mov  %g1, %g1   ! 0 <leaf_call>         ; |
+ 150:   c2 5d c0 01     ldx  [ %l7 + %g1 ], %g1                 ; |
+ 154:   d1 18 40 00     ldd  [ %g1 ], %f8                       ; |
+ 158:   d1 3f a7 a7     std  %f8, [ %fp + 0x7a7 ]               ; /
+ 15c:   c2 5f a7 97     ldx  [ %fp + 0x797 ], %g1               ; \
+ 160:   c2 77 a7 6f     stx  %g1, [ %fp + 0x76f ]               ; |
+ 164:   c2 5f a7 9f     ldx  [ %fp + 0x79f ], %g1               ; | prep arg 5 (struct F, copied entirely onto stack, as > 16b, to pass indirectly)
+ 168:   c2 77 a7 77     stx  %g1, [ %fp + 0x777 ]               ; |
+ 16c:   c2 5f a7 a7     ldx  [ %fp + 0x7a7 ], %g1               ; |
+ 170:   c2 77 a7 7f     stx  %g1, [ %fp + 0x77f ]               ; /
+ 174:   d1 07 a7 e3     ld  [ %fp + 0x7e3 ], %f8                ; prep arg 0    (struct A)
+ 178:   d3 07 a7 db     ld  [ %fp + 0x7db ], %f9                ; \
+ 17c:   dd 07 a7 df     ld  [ %fp + 0x7df ], %f14               ; / prep arg 1  (struct B)
+ 180:   df 07 a7 c3     ld  [ %fp + 0x7c3 ], %f15               ; \                        
+ 184:   e1 07 a7 c7     ld  [ %fp + 0x7c7 ], %f16               ; | prep arg 2  (struct C)
+ 188:   e3 07 a7 cb     ld  [ %fp + 0x7cb ], %f17               ; /                        
+ 18c:   e5 1f a7 cf     ldd  [ %fp + 0x7cf ], %f18              ; prep arg 3    (struct D)
+ 190:   82 07 a7 6f     add  %fp, 0x76f, %g1                    ; \
+ 194:   c2 73 a8 b7     stx  %g1, [ %sp + 0x8b7 ]               ; / arg 5 (struct F, passed indirectly as ptr to copy; via stack as all %o* regs already skipped)
+ 198:   c2 5f a7 b7     ldx  [ %fp + 0x7b7 ], %g1               ; @@@ unsure ...
+ 19c:   c2 73 a8 af     stx  %g1, [ %sp + 0x8af ]               ; @@@ ... last float of struct E pushed onto stack (maybe register save area? maybe for some iteration facilitation?)
+ 1a0:   d5 1f a7 af     ldd  [ %fp + 0x7af ], %f10              ; \ arg 4 (struct E)   first double
+ 1a4:   d9 1f a7 b7     ldd  [ %fp + 0x7b7 ], %f12              ; /                    second double
+ 1a8:   81 a0 00 28     fmovs  %f8, %f0                         ; arg 0 (entire struct A, takes full %d0 slot despite not being 64bit, as only field of struct)
+ 1ac:   85 a0 00 29     fmovs  %f9, %f2                         ; \ arg 1 (struct B)   first float
+ 1b0:   87 a0 00 2e     fmovs  %f14, %f3                        ; /                    second float
+ 1b4:   89 a0 00 2f     fmovs  %f15, %f4                        ; \                    first float
+ 1b8:   8b a0 00 30     fmovs  %f16, %f5                        ; | arg 2 (struct C)   second float
+ 1bc:   8d a0 00 31     fmovs  %f17, %f6                        ; /                    third float
+ 1c0:   91 a0 00 52     fmovd  %f18, %f8                        ; arg 3  (entire struct D, single field double)
+ 1c4:   40 00 00 00     call  1c4 <main+0x16c>                  ; call nonleaf_call (objdump not from final link but .o)
+ 1c8:   01 00 00 00     nop                                     ; branch delay slot
+ 1cc:   82 10 20 00     clr  %g1        ! 0 <leaf_call>         ; \
+ 1d0:   83 38 60 00     sra  %g1, 0, %g1                        ; / return value
+ 1d4:   b0 10 00 01     mov  %g1, %i0                           ; \
+ 1d8:   81 cf e0 08     rett  %i7 + 8                           ; | epilog
+ 1dc:   01 00 00 00     nop                                     ; |            branch delay slot
+
+
+
+; ---------- passing only unions with only fp parts ---------->
+;
+; union A { float a; };
+; union B { float a, b; };
+; union C { float a, b, c; };
+; union D { double a; };
+; union E { double a, b; };
+; union F { double a, b, c; };
+;
+; void leaf_call(union A a, union B b, union C c, union D d, union E e, union F f)
+; {
+; }
+;
+; int main()
+; {
+;     leaf_call((union A){1.f}, (union B){2.f,3.f}, (union C){4.f,5.f,6.f}, (union D){1.}, (union E){2.,3.}, (union F){4.,5.,6.});
+;     return 0;
+; }
+
+
+
+; output from openbsd-6.0-sparc64 w/ gcc 4.2.1
+
+0000000000000000 <leaf_call>:
+   0:   9d e3 bf 30     save  %sp, -208, %sp
+   4:   84 10 00 18     mov  %i0, %g2
+   8:   86 10 00 19     mov  %i1, %g3
+   c:   88 10 00 1a     mov  %i2, %g4
+  10:   f6 77 a8 97     stx  %i3, [ %fp + 0x897 ]
+  14:   f8 77 a8 9f     stx  %i4, [ %fp + 0x89f ]
+  18:   fa 77 a8 a7     stx  %i5, [ %fp + 0x8a7 ]
+  1c:   85 38 b0 20     srax  %g2, 0x20, %g2
+  20:   c2 07 a8 7f     ld  [ %fp + 0x87f ], %g1
+  24:   82 08 60 00     and  %g1, 0, %g1
+  28:   82 10 40 02     or  %g1, %g2, %g1
+  2c:   c2 27 a8 7f     st  %g1, [ %fp + 0x87f ]
+  30:   87 38 f0 20     srax  %g3, 0x20, %g3
+  34:   c2 07 a8 87     ld  [ %fp + 0x887 ], %g1
+  38:   82 08 60 00     and  %g1, 0, %g1
+  3c:   82 10 40 03     or  %g1, %g3, %g1
+  40:   c2 27 a8 87     st  %g1, [ %fp + 0x887 ]
+  44:   89 39 30 20     srax  %g4, 0x20, %g4
+  48:   c2 07 a8 8f     ld  [ %fp + 0x88f ], %g1
+  4c:   82 08 60 00     and  %g1, 0, %g1
+  50:   82 10 40 04     or  %g1, %g4, %g1
+  54:   c2 27 a8 8f     st  %g1, [ %fp + 0x88f ]
+  58:   81 cf e0 08     rett  %i7 + 8
+  5c:   01 00 00 00     nop
+  60:   ae 03 c0 17     add  %o7, %l7, %l7
+  64:   81 c3 e0 08     retl
+  68:   01 00 00 00     nop
+
+000000000000006c <main>:
+  6c:   9d e3 bf 10     save  %sp, -240, %sp                    ; prolog
+  70:   2f 00 00 00     sethi  %hi(0), %l7                      ; |
+  74:   ae 05 e0 00     add  %l7, 0, %l7        ! 0 <leaf_call> ; | @@@ unsure, call to some code stub adding o7 to l7
+  78:   7f ff ff fa     call  60 <leaf_call+0x60>               ; |
+  7c:   01 00 00 00     nop                                     ; /
+  80:   03 00 00 00     sethi  %hi(0), %g1                      ; \
+  84:   82 10 60 00     mov  %g1, %g1   ! 0 <leaf_call>         ; |
+  88:   c2 5d c0 01     ldx  [ %l7 + %g1 ], %g1                 ; | put together local union A
+  8c:   d1 00 40 00     ld  [ %g1 ], %f8                        ; |
+  90:   d1 27 a7 e3     st  %f8, [ %fp + 0x7e3 ]                ; /
+  94:   03 00 00 00     sethi  %hi(0), %g1                      ; \
+  98:   82 10 60 00     mov  %g1, %g1   ! 0 <leaf_call>         ; |
+  9c:   c2 5d c0 01     ldx  [ %l7 + %g1 ], %g1                 ; | put together local union B (only writes one val)
+  a0:   d1 00 40 00     ld  [ %g1 ], %f8                        ; |
+  a4:   d1 27 a7 df     st  %f8, [ %fp + 0x7df ]                ; /
+  a8:   03 00 00 00     sethi  %hi(0), %g1                      ; \
+  ac:   82 10 60 00     mov  %g1, %g1   ! 0 <leaf_call>         ; |
+  b0:   c2 5d c0 01     ldx  [ %l7 + %g1 ], %g1                 ; | put together local union C (only writes one val)
+  b4:   d1 00 40 00     ld  [ %g1 ], %f8                        ; |
+  b8:   d1 27 a7 db     st  %f8, [ %fp + 0x7db ]                ; /
+  bc:   03 00 00 00     sethi  %hi(0), %g1                      ; \
+  c0:   82 10 60 00     mov  %g1, %g1   ! 0 <leaf_call>         ; |
+  c4:   c2 5d c0 01     ldx  [ %l7 + %g1 ], %g1                 ; | put together local union D
+  c8:   d1 18 40 00     ldd  [ %g1 ], %f8                       ; |
+  cc:   d1 3f a7 cf     std  %f8, [ %fp + 0x7cf ]               ; /
+  d0:   03 00 00 00     sethi  %hi(0), %g1                      ; \
+  d4:   82 10 60 00     mov  %g1, %g1   ! 0 <leaf_call>         ; |
+  d8:   c2 5d c0 01     ldx  [ %l7 + %g1 ], %g1                 ; | put together local union A (only writes one val)
+  dc:   d1 18 40 00     ldd  [ %g1 ], %f8                       ; |
+  e0:   d1 3f a7 c7     std  %f8, [ %fp + 0x7c7 ]               ; /
+  e4:   03 00 00 00     sethi  %hi(0), %g1                      ; \
+  e8:   82 10 60 00     mov  %g1, %g1   ! 0 <leaf_call>         ; |
+  ec:   c2 5d c0 01     ldx  [ %l7 + %g1 ], %g1                 ; | put together local union A (only writes one val)
+  f0:   d1 18 40 00     ldd  [ %g1 ], %f8                       ; |
+  f4:   d1 3f a7 bf     std  %f8, [ %fp + 0x7bf ]               ; /
+  f8:   c2 07 a7 e3     ld  [ %fp + 0x7e3 ], %g1                ; \
+  fc:   87 30 60 00     srl  %g1, 0, %g3                        ; | prep arg 0
+ 100:   87 28 f0 20     sllx  %g3, 0x20, %g3                    ; /               left-justify
+ 104:   c2 5f a7 df     ldx  [ %fp + 0x7df ], %g1               ; \
+ 108:   85 30 70 20     srlx  %g1, 0x20, %g2                    ; | prep arg 1
+ 10c:   85 28 b0 20     sllx  %g2, 0x20, %g2                    ; /
+ 110:   c2 07 a7 db     ld  [ %fp + 0x7db ], %g1                ; \
+ 114:   83 30 60 00     srl  %g1, 0, %g1                        ; | prep arg 2
+ 118:   83 28 70 20     sllx  %g1, 0x20, %g1                    ; /
+ 11c:   c8 5f a7 cf     ldx  [ %fp + 0x7cf ], %g4               ; prep arg 3    | a bit pointless, could be written
+ 120:   ca 5f a7 c7     ldx  [ %fp + 0x7c7 ], %g5               ; prep arg 4    | directly to %o3 and %o4
+ 124:   da 5f a7 bf     ldx  [ %fp + 0x7bf ], %o5               ; arg 5
+ 128:   90 10 00 03     mov  %g3, %o0                           ; arg 0      |
+ 12c:   92 10 00 02     mov  %g2, %o1                           ; arg 1      | note: all left-justified
+ 130:   94 10 00 01     mov  %g1, %o2                           ; arg 2      |
+ 134:   96 10 00 04     mov  %g4, %o3                           ; arg 3
+ 138:   98 10 00 05     mov  %g5, %o4                           ; arg 4
+ 13c:   40 00 00 00     call  13c <main+0xd0>                   ; call nonleaf_call (objdump not from final link but .o)
+ 140:   01 00 00 00     nop                                     ; branch delay slot
+ 144:   82 10 20 00     clr  %g1        ! 0 <leaf_call>         ; \
+ 148:   83 38 60 00     sra  %g1, 0, %g1                        ; / return value
+ 14c:   b0 10 00 01     mov  %g1, %i0                           ; \
+ 150:   81 cf e0 08     rett  %i7 + 8                           ; | epilog
+ 154:   01 00 00 00     nop                                     ; |            branch delay slot
+
+
+
 ; ---------- returning structs by value ---------->
 ;
 ; struct Small { char x; };
--- a/doc/manual/callconvs/callconv_sparc64.tex	Wed Feb 16 19:26:21 2022 +0100
+++ b/doc/manual/callconvs/callconv_sparc64.tex	Sat Feb 19 19:27:22 2022 +0100
@@ -73,13 +73,16 @@
 \item all arguments \textless=\ 64 bit are passed as 64 bit values
 \item minimum stack size is 128 bytes, b/c stack pointer must always point at enough space to store all \%i* and \%l* registers, used when running out of register windows
 \item if needed, register spill area (both, integer and float arguments are spilled in order) is adjacent to parameters
-\item aggregates (struct, union) \textless=\ 16 bytes are passed field-by-field, {\bf however} evaluated as a sequence of 8-byte parameter slots
+\item structs with only one field are passed as if the param would be the field itself
+\item structs \textless=\ 16 bytes (which have more than one field) are passed field-by-field, {\bf however} evaluated as a sequence of 8-byte parameter slots
 \begin{itemize}
+\item note that due to aggregate alignment rules, any floating point value is either the entire slot (for double precision) or exactly one half
 \item fields are left justified in register or stack slots
 \item integers in a slot are passed as such (either via \%o* registers or the stack)
 \item single precision floats (using half of the slot) use even numbered \%f* registers when they occupy the left half, odd numbered ones otherwise (no register skipping logic applied within a slot)
-\item splitting aggregates between registers and stack is allowed
+\item splitting struct fields between registers and stack is allowed
 \end{itemize}
+\item unions \textless=\ 16 bytes passed by-value are passed like integers in left-justified 8-byte slots (either via \%o* registers or the stack)
 \item aggregates (struct, union) and types \textgreater\ 16 bytes are passed indirectly, as a pointer to a correctly aligned copy of the data (that copy can be avoided under certain conditions)
 % from spec:
 %Structure or union types up to eight bytes in size are assigned to one parameter array word, and align to eight-byte