# HG changeset patch # User Tassilo Philipp # Date 1645295242 -3600 # Node ID 5be9f5ccdd354f8f524e6ba8b59a4e629432deee # Parent c9e19249ecd35e7d58a57126aa1c94f321fb06c3 - doc: ppc64 clarifications diff -r c9e19249ecd3 -r 5be9f5ccdd35 doc/disas_examples/sparc64.sparc64.disas --- a/doc/disas_examples/sparc64.sparc64.disas Wed Feb 16 19:26:21 2022 +0100 +++ b/doc/disas_examples/sparc64.sparc64.disas Sat Feb 19 19:27:22 2022 +0100 @@ -1000,7 +1000,6 @@ 60: ae 05 e0 00 add %l7, 0, %l7 ! 0 ; | @@@ unsure, call to some code stub adding o7 to l7 64: 7f ff ff fa call 4c ; | 68: 01 00 00 00 nop ; / -; leaf_call((struct A){0,1.f}, (struct B){2.f,3}, (struct C){4.f,5,6}, (struct D){7,8.f}); 6c: c0 27 a7 df clr [ %fp + 0x7df ] ; \ 70: 03 00 00 00 sethi %hi(0), %g1 ; | 74: 82 10 60 00 mov %g1, %g1 ! 0 ; | @@ -1056,6 +1055,270 @@ +; ---------- passing structs with only fp parts ----------> +; +; struct A { float a; }; +; struct B { float a, b; }; +; struct C { float a, b, c; }; +; struct D { double a; }; +; struct E { double a, b; }; +; struct F { double a, b, c; }; +; +; void leaf_call(struct A a, struct B b, struct C c, struct D d, struct E e, struct F f) +; { +; } +; +; int main() +; { +; leaf_call((struct A){1.f}, (struct B){2.f,3.f}, (struct C){4.f,5.f,6.f}, (struct D){1.}, (struct E){2.,3.}, (struct F){4.,5.,6.}); +; return 0; +; } + + + +; output from openbsd-6.0-sparc64 w/ gcc 4.2.1 + +0000000000000000 : + 0: 9d e3 bf 30 save %sp, -208, %sp + 4: c1 27 a8 7f st %f0, [ %fp + 0x87f ] + 8: 9d a0 00 22 fmovs %f2, %f14 + c: 9f a0 00 23 fmovs %f3, %f15 + 10: a1 a0 00 24 fmovs %f4, %f16 + 14: a3 a0 00 25 fmovs %f5, %f17 + 18: a5 a0 00 26 fmovs %f6, %f18 + 1c: d1 3f a8 9f std %f8, [ %fp + 0x89f ] + 20: 91 a0 00 4a fmovd %f10, %f8 + 24: 95 a0 00 4c fmovd %f12, %f10 + 28: d1 3f a8 a7 std %f8, [ %fp + 0x8a7 ] + 2c: d5 3f a8 af std %f10, [ %fp + 0x8af ] + 30: dd 27 a8 87 st %f14, [ %fp + 0x887 ] + 34: df 27 a8 8b st %f15, [ %fp + 0x88b ] + 38: e1 27 a8 8f st %f16, [ %fp + 0x88f ] + 3c: e3 27 a8 93 st %f17, [ %fp + 0x893 ] + 40: e5 27 a8 97 st %f18, [ %fp + 0x897 ] + 44: 81 cf e0 08 rett %i7 + 8 + 48: 01 00 00 00 nop + 4c: ae 03 c0 17 add %o7, %l7, %l7 + 50: 81 c3 e0 08 retl + 54: 01 00 00 00 nop + +0000000000000058
: + 58: 9d e3 be b0 save %sp, -336, %sp ; prolog + 5c: 2f 00 00 00 sethi %hi(0), %l7 ; | + 60: ae 05 e0 00 add %l7, 0, %l7 ! 0 ; | @@@ unsure, call to some code stub adding o7 to l7 + 64: 7f ff ff fa call 4c ; | + 68: 01 00 00 00 nop ; / + 6c: 03 00 00 00 sethi %hi(0), %g1 ; \ + 70: 82 10 60 00 mov %g1, %g1 ! 0 ; | + 74: c2 5d c0 01 ldx [ %l7 + %g1 ], %g1 ; | put together local struct A + 78: d1 00 40 00 ld [ %g1 ], %f8 ; | + 7c: d1 27 a7 e3 st %f8, [ %fp + 0x7e3 ] ; / + 80: 03 00 00 00 sethi %hi(0), %g1 ; \ + 84: 82 10 60 00 mov %g1, %g1 ! 0 ; | + 88: c2 5d c0 01 ldx [ %l7 + %g1 ], %g1 ; | + 8c: d1 00 40 00 ld [ %g1 ], %f8 ; | + 90: d1 27 a7 db st %f8, [ %fp + 0x7db ] ; | + 94: 03 00 00 00 sethi %hi(0), %g1 ; | put together local struct B + 98: 82 10 60 00 mov %g1, %g1 ! 0 ; | + 9c: c2 5d c0 01 ldx [ %l7 + %g1 ], %g1 ; | + a0: d1 00 40 00 ld [ %g1 ], %f8 ; | + a4: d1 27 a7 df st %f8, [ %fp + 0x7df ] ; / + a8: 03 00 00 00 sethi %hi(0), %g1 ; \ + ac: 82 10 60 00 mov %g1, %g1 ! 0 ; | + b0: c2 5d c0 01 ldx [ %l7 + %g1 ], %g1 ; | + b4: d1 00 40 00 ld [ %g1 ], %f8 ; | + b8: d1 27 a7 c3 st %f8, [ %fp + 0x7c3 ] ; | + bc: 03 00 00 00 sethi %hi(0), %g1 ; | + c0: 82 10 60 00 mov %g1, %g1 ! 0 ; | + c4: c2 5d c0 01 ldx [ %l7 + %g1 ], %g1 ; | + c8: d1 00 40 00 ld [ %g1 ], %f8 ; | put together local struct C + cc: d1 27 a7 c7 st %f8, [ %fp + 0x7c7 ] ; | + d0: 03 00 00 00 sethi %hi(0), %g1 ; | + d4: 82 10 60 00 mov %g1, %g1 ! 0 ; | + d8: c2 5d c0 01 ldx [ %l7 + %g1 ], %g1 ; | + dc: d1 00 40 00 ld [ %g1 ], %f8 ; | + e0: d1 27 a7 cb st %f8, [ %fp + 0x7cb ] ; / + e4: 03 00 00 00 sethi %hi(0), %g1 ; \ + e8: 82 10 60 00 mov %g1, %g1 ! 0 ; | + ec: c2 5d c0 01 ldx [ %l7 + %g1 ], %g1 ; | put together local struct D + f0: d1 18 40 00 ldd [ %g1 ], %f8 ; | + f4: d1 3f a7 cf std %f8, [ %fp + 0x7cf ] ; / + f8: 03 00 00 00 sethi %hi(0), %g1 ; \ + fc: 82 10 60 00 mov %g1, %g1 ! 0 ; | + 100: c2 5d c0 01 ldx [ %l7 + %g1 ], %g1 ; | + 104: d1 18 40 00 ldd [ %g1 ], %f8 ; | + 108: d1 3f a7 af std %f8, [ %fp + 0x7af ] ; | + 10c: 03 00 00 00 sethi %hi(0), %g1 ; | put together local struct E + 110: 82 10 60 00 mov %g1, %g1 ! 0 ; | + 114: c2 5d c0 01 ldx [ %l7 + %g1 ], %g1 ; | + 118: d1 18 40 00 ldd [ %g1 ], %f8 ; | + 11c: d1 3f a7 b7 std %f8, [ %fp + 0x7b7 ] ; / + 120: 03 00 00 00 sethi %hi(0), %g1 ; \ + 124: 82 10 60 00 mov %g1, %g1 ! 0 ; | + 128: c2 5d c0 01 ldx [ %l7 + %g1 ], %g1 ; | + 12c: d1 18 40 00 ldd [ %g1 ], %f8 ; | + 130: d1 3f a7 97 std %f8, [ %fp + 0x797 ] ; | + 134: 03 00 00 00 sethi %hi(0), %g1 ; | + 138: 82 10 60 00 mov %g1, %g1 ! 0 ; | + 13c: c2 5d c0 01 ldx [ %l7 + %g1 ], %g1 ; | + 140: d1 18 40 00 ldd [ %g1 ], %f8 ; | put together local struct F + 144: d1 3f a7 9f std %f8, [ %fp + 0x79f ] ; | + 148: 03 00 00 00 sethi %hi(0), %g1 ; | + 14c: 82 10 60 00 mov %g1, %g1 ! 0 ; | + 150: c2 5d c0 01 ldx [ %l7 + %g1 ], %g1 ; | + 154: d1 18 40 00 ldd [ %g1 ], %f8 ; | + 158: d1 3f a7 a7 std %f8, [ %fp + 0x7a7 ] ; / + 15c: c2 5f a7 97 ldx [ %fp + 0x797 ], %g1 ; \ + 160: c2 77 a7 6f stx %g1, [ %fp + 0x76f ] ; | + 164: c2 5f a7 9f ldx [ %fp + 0x79f ], %g1 ; | prep arg 5 (struct F, copied entirely onto stack, as > 16b, to pass indirectly) + 168: c2 77 a7 77 stx %g1, [ %fp + 0x777 ] ; | + 16c: c2 5f a7 a7 ldx [ %fp + 0x7a7 ], %g1 ; | + 170: c2 77 a7 7f stx %g1, [ %fp + 0x77f ] ; / + 174: d1 07 a7 e3 ld [ %fp + 0x7e3 ], %f8 ; prep arg 0 (struct A) + 178: d3 07 a7 db ld [ %fp + 0x7db ], %f9 ; \ + 17c: dd 07 a7 df ld [ %fp + 0x7df ], %f14 ; / prep arg 1 (struct B) + 180: df 07 a7 c3 ld [ %fp + 0x7c3 ], %f15 ; \ + 184: e1 07 a7 c7 ld [ %fp + 0x7c7 ], %f16 ; | prep arg 2 (struct C) + 188: e3 07 a7 cb ld [ %fp + 0x7cb ], %f17 ; / + 18c: e5 1f a7 cf ldd [ %fp + 0x7cf ], %f18 ; prep arg 3 (struct D) + 190: 82 07 a7 6f add %fp, 0x76f, %g1 ; \ + 194: c2 73 a8 b7 stx %g1, [ %sp + 0x8b7 ] ; / arg 5 (struct F, passed indirectly as ptr to copy; via stack as all %o* regs already skipped) + 198: c2 5f a7 b7 ldx [ %fp + 0x7b7 ], %g1 ; @@@ unsure ... + 19c: c2 73 a8 af stx %g1, [ %sp + 0x8af ] ; @@@ ... last float of struct E pushed onto stack (maybe register save area? maybe for some iteration facilitation?) + 1a0: d5 1f a7 af ldd [ %fp + 0x7af ], %f10 ; \ arg 4 (struct E) first double + 1a4: d9 1f a7 b7 ldd [ %fp + 0x7b7 ], %f12 ; / second double + 1a8: 81 a0 00 28 fmovs %f8, %f0 ; arg 0 (entire struct A, takes full %d0 slot despite not being 64bit, as only field of struct) + 1ac: 85 a0 00 29 fmovs %f9, %f2 ; \ arg 1 (struct B) first float + 1b0: 87 a0 00 2e fmovs %f14, %f3 ; / second float + 1b4: 89 a0 00 2f fmovs %f15, %f4 ; \ first float + 1b8: 8b a0 00 30 fmovs %f16, %f5 ; | arg 2 (struct C) second float + 1bc: 8d a0 00 31 fmovs %f17, %f6 ; / third float + 1c0: 91 a0 00 52 fmovd %f18, %f8 ; arg 3 (entire struct D, single field double) + 1c4: 40 00 00 00 call 1c4 ; call nonleaf_call (objdump not from final link but .o) + 1c8: 01 00 00 00 nop ; branch delay slot + 1cc: 82 10 20 00 clr %g1 ! 0 ; \ + 1d0: 83 38 60 00 sra %g1, 0, %g1 ; / return value + 1d4: b0 10 00 01 mov %g1, %i0 ; \ + 1d8: 81 cf e0 08 rett %i7 + 8 ; | epilog + 1dc: 01 00 00 00 nop ; | branch delay slot + + + +; ---------- passing only unions with only fp parts ----------> +; +; union A { float a; }; +; union B { float a, b; }; +; union C { float a, b, c; }; +; union D { double a; }; +; union E { double a, b; }; +; union F { double a, b, c; }; +; +; void leaf_call(union A a, union B b, union C c, union D d, union E e, union F f) +; { +; } +; +; int main() +; { +; leaf_call((union A){1.f}, (union B){2.f,3.f}, (union C){4.f,5.f,6.f}, (union D){1.}, (union E){2.,3.}, (union F){4.,5.,6.}); +; return 0; +; } + + + +; output from openbsd-6.0-sparc64 w/ gcc 4.2.1 + +0000000000000000 : + 0: 9d e3 bf 30 save %sp, -208, %sp + 4: 84 10 00 18 mov %i0, %g2 + 8: 86 10 00 19 mov %i1, %g3 + c: 88 10 00 1a mov %i2, %g4 + 10: f6 77 a8 97 stx %i3, [ %fp + 0x897 ] + 14: f8 77 a8 9f stx %i4, [ %fp + 0x89f ] + 18: fa 77 a8 a7 stx %i5, [ %fp + 0x8a7 ] + 1c: 85 38 b0 20 srax %g2, 0x20, %g2 + 20: c2 07 a8 7f ld [ %fp + 0x87f ], %g1 + 24: 82 08 60 00 and %g1, 0, %g1 + 28: 82 10 40 02 or %g1, %g2, %g1 + 2c: c2 27 a8 7f st %g1, [ %fp + 0x87f ] + 30: 87 38 f0 20 srax %g3, 0x20, %g3 + 34: c2 07 a8 87 ld [ %fp + 0x887 ], %g1 + 38: 82 08 60 00 and %g1, 0, %g1 + 3c: 82 10 40 03 or %g1, %g3, %g1 + 40: c2 27 a8 87 st %g1, [ %fp + 0x887 ] + 44: 89 39 30 20 srax %g4, 0x20, %g4 + 48: c2 07 a8 8f ld [ %fp + 0x88f ], %g1 + 4c: 82 08 60 00 and %g1, 0, %g1 + 50: 82 10 40 04 or %g1, %g4, %g1 + 54: c2 27 a8 8f st %g1, [ %fp + 0x88f ] + 58: 81 cf e0 08 rett %i7 + 8 + 5c: 01 00 00 00 nop + 60: ae 03 c0 17 add %o7, %l7, %l7 + 64: 81 c3 e0 08 retl + 68: 01 00 00 00 nop + +000000000000006c
: + 6c: 9d e3 bf 10 save %sp, -240, %sp ; prolog + 70: 2f 00 00 00 sethi %hi(0), %l7 ; | + 74: ae 05 e0 00 add %l7, 0, %l7 ! 0 ; | @@@ unsure, call to some code stub adding o7 to l7 + 78: 7f ff ff fa call 60 ; | + 7c: 01 00 00 00 nop ; / + 80: 03 00 00 00 sethi %hi(0), %g1 ; \ + 84: 82 10 60 00 mov %g1, %g1 ! 0 ; | + 88: c2 5d c0 01 ldx [ %l7 + %g1 ], %g1 ; | put together local union A + 8c: d1 00 40 00 ld [ %g1 ], %f8 ; | + 90: d1 27 a7 e3 st %f8, [ %fp + 0x7e3 ] ; / + 94: 03 00 00 00 sethi %hi(0), %g1 ; \ + 98: 82 10 60 00 mov %g1, %g1 ! 0 ; | + 9c: c2 5d c0 01 ldx [ %l7 + %g1 ], %g1 ; | put together local union B (only writes one val) + a0: d1 00 40 00 ld [ %g1 ], %f8 ; | + a4: d1 27 a7 df st %f8, [ %fp + 0x7df ] ; / + a8: 03 00 00 00 sethi %hi(0), %g1 ; \ + ac: 82 10 60 00 mov %g1, %g1 ! 0 ; | + b0: c2 5d c0 01 ldx [ %l7 + %g1 ], %g1 ; | put together local union C (only writes one val) + b4: d1 00 40 00 ld [ %g1 ], %f8 ; | + b8: d1 27 a7 db st %f8, [ %fp + 0x7db ] ; / + bc: 03 00 00 00 sethi %hi(0), %g1 ; \ + c0: 82 10 60 00 mov %g1, %g1 ! 0 ; | + c4: c2 5d c0 01 ldx [ %l7 + %g1 ], %g1 ; | put together local union D + c8: d1 18 40 00 ldd [ %g1 ], %f8 ; | + cc: d1 3f a7 cf std %f8, [ %fp + 0x7cf ] ; / + d0: 03 00 00 00 sethi %hi(0), %g1 ; \ + d4: 82 10 60 00 mov %g1, %g1 ! 0 ; | + d8: c2 5d c0 01 ldx [ %l7 + %g1 ], %g1 ; | put together local union A (only writes one val) + dc: d1 18 40 00 ldd [ %g1 ], %f8 ; | + e0: d1 3f a7 c7 std %f8, [ %fp + 0x7c7 ] ; / + e4: 03 00 00 00 sethi %hi(0), %g1 ; \ + e8: 82 10 60 00 mov %g1, %g1 ! 0 ; | + ec: c2 5d c0 01 ldx [ %l7 + %g1 ], %g1 ; | put together local union A (only writes one val) + f0: d1 18 40 00 ldd [ %g1 ], %f8 ; | + f4: d1 3f a7 bf std %f8, [ %fp + 0x7bf ] ; / + f8: c2 07 a7 e3 ld [ %fp + 0x7e3 ], %g1 ; \ + fc: 87 30 60 00 srl %g1, 0, %g3 ; | prep arg 0 + 100: 87 28 f0 20 sllx %g3, 0x20, %g3 ; / left-justify + 104: c2 5f a7 df ldx [ %fp + 0x7df ], %g1 ; \ + 108: 85 30 70 20 srlx %g1, 0x20, %g2 ; | prep arg 1 + 10c: 85 28 b0 20 sllx %g2, 0x20, %g2 ; / + 110: c2 07 a7 db ld [ %fp + 0x7db ], %g1 ; \ + 114: 83 30 60 00 srl %g1, 0, %g1 ; | prep arg 2 + 118: 83 28 70 20 sllx %g1, 0x20, %g1 ; / + 11c: c8 5f a7 cf ldx [ %fp + 0x7cf ], %g4 ; prep arg 3 | a bit pointless, could be written + 120: ca 5f a7 c7 ldx [ %fp + 0x7c7 ], %g5 ; prep arg 4 | directly to %o3 and %o4 + 124: da 5f a7 bf ldx [ %fp + 0x7bf ], %o5 ; arg 5 + 128: 90 10 00 03 mov %g3, %o0 ; arg 0 | + 12c: 92 10 00 02 mov %g2, %o1 ; arg 1 | note: all left-justified + 130: 94 10 00 01 mov %g1, %o2 ; arg 2 | + 134: 96 10 00 04 mov %g4, %o3 ; arg 3 + 138: 98 10 00 05 mov %g5, %o4 ; arg 4 + 13c: 40 00 00 00 call 13c ; call nonleaf_call (objdump not from final link but .o) + 140: 01 00 00 00 nop ; branch delay slot + 144: 82 10 20 00 clr %g1 ! 0 ; \ + 148: 83 38 60 00 sra %g1, 0, %g1 ; / return value + 14c: b0 10 00 01 mov %g1, %i0 ; \ + 150: 81 cf e0 08 rett %i7 + 8 ; | epilog + 154: 01 00 00 00 nop ; | branch delay slot + + + ; ---------- returning structs by value ----------> ; ; struct Small { char x; }; diff -r c9e19249ecd3 -r 5be9f5ccdd35 doc/manual/callconvs/callconv_sparc64.tex --- a/doc/manual/callconvs/callconv_sparc64.tex Wed Feb 16 19:26:21 2022 +0100 +++ b/doc/manual/callconvs/callconv_sparc64.tex Sat Feb 19 19:27:22 2022 +0100 @@ -73,13 +73,16 @@ \item all arguments \textless=\ 64 bit are passed as 64 bit values \item minimum stack size is 128 bytes, b/c stack pointer must always point at enough space to store all \%i* and \%l* registers, used when running out of register windows \item if needed, register spill area (both, integer and float arguments are spilled in order) is adjacent to parameters -\item aggregates (struct, union) \textless=\ 16 bytes are passed field-by-field, {\bf however} evaluated as a sequence of 8-byte parameter slots +\item structs with only one field are passed as if the param would be the field itself +\item structs \textless=\ 16 bytes (which have more than one field) are passed field-by-field, {\bf however} evaluated as a sequence of 8-byte parameter slots \begin{itemize} +\item note that due to aggregate alignment rules, any floating point value is either the entire slot (for double precision) or exactly one half \item fields are left justified in register or stack slots \item integers in a slot are passed as such (either via \%o* registers or the stack) \item single precision floats (using half of the slot) use even numbered \%f* registers when they occupy the left half, odd numbered ones otherwise (no register skipping logic applied within a slot) -\item splitting aggregates between registers and stack is allowed +\item splitting struct fields between registers and stack is allowed \end{itemize} +\item unions \textless=\ 16 bytes passed by-value are passed like integers in left-justified 8-byte slots (either via \%o* registers or the stack) \item aggregates (struct, union) and types \textgreater\ 16 bytes are passed indirectly, as a pointer to a correctly aligned copy of the data (that copy can be avoided under certain conditions) % from spec: %Structure or union types up to eight bytes in size are assigned to one parameter array word, and align to eight-byte