# HG changeset patch # User Tassilo Philipp # Date 1574513495 -3600 # Node ID 74c056b597b73d4df21c93cfd4a6fb8541db2e07 # Parent 4e6f63b7020e1c02c82bbeb2c55200e10b885954 - disassembly example annotations - callconv appendix in doc: * ppc64 chapter * some cleanups for consistency diff -r 4e6f63b7020e -r 74c056b597b7 doc/disas_examples/ppc.darwin.disas --- a/doc/disas_examples/ppc.darwin.disas Fri Nov 22 23:28:17 2019 +0100 +++ b/doc/disas_examples/ppc.darwin.disas Sat Nov 23 13:51:35 2019 +0100 @@ -50,7 +50,7 @@ 5c: 91 1e 00 7c stw 8, 124(30) ; | 60: 91 3e 00 80 stw 9, 128(30) ; | 64: 91 5e 00 84 stw 10, 132(30) ; | - 68: 80 01 00 00 lwz 0, 0(1) ; fetch sp saved on stack of top by prolog -> gpr0, and ... + 68: 80 01 00 00 lwz 0, 0(1) ; fetch back-chain ptr (parent frame's sp) from stack of top by prolog -> gpr0, and ... 6c: 94 01 ff 10 stwu 0, -240(1) ; ... update it further up the stack for alloca(220) - with padding to guarantee alignment 70: 38 41 00 40 addi 2, 1, 64 ; | 74: 38 02 00 0f addi 0, 2, 15 ; | start of alloca()'d memory -> gpr2, by ... @@ -152,7 +152,7 @@ 60: 91 1e 00 8c stw 8, 140(30) ; | 64: 91 3e 00 90 stw 9, 144(30) ; | 68: 91 5e 00 94 stw 10, 148(30) ; | - 6c: 80 01 00 00 lwz 0, 0(1) ; fetch sp saved on stack of top by prolog -> gpr0, and ... + 6c: 80 01 00 00 lwz 0, 0(1) ; fetch back-chain ptr (parent frame's sp) from stack of top by prolog -> gpr0, and ... 70: 94 01 ff 10 stwu 0, -240(1) ; ... update it further up the stack for alloca(220) - with padding to guarantee alignment 74: 38 41 00 50 addi 2, 1, 80 ; | 78: 38 02 00 0f addi 0, 2, 15 ; | start of alloca()'d memory -> gpr2, by ... @@ -335,7 +335,7 @@ 150: c8 09 00 00 lfd 0, 0(9) ; | 154: fc 00 00 18 frsp 0, 0 ; | 158: d0 1e 00 70 stfs 0, 112(30) ; / - 15c: 80 01 00 00 lwz 0, 0(1) ; fetch sp saved on stack of top by prolog -> g + 15c: 80 01 00 00 lwz 0, 0(1) ; fetch back-chain ptr (parent frame's sp) from stack of top by prolog -> gpr0, and ... 160: 94 01 ff 10 stwu 0, -240(1) ; ... update it further up the stack for alloca 164: 38 41 00 50 addi 2, 1, 80 ; | 168: 38 02 00 0f addi 0, 2, 15 ; | start of alloca()'d memory -> gpr2, by ... diff -r 4e6f63b7020e -r 74c056b597b7 doc/disas_examples/ppc64.elfabi.disas --- a/doc/disas_examples/ppc64.elfabi.disas Fri Nov 22 23:28:17 2019 +0100 +++ b/doc/disas_examples/ppc64.elfabi.disas Sat Nov 23 13:51:35 2019 +0100 @@ -42,89 +42,232 @@ 48: 80 01 00 01 lwz r0,1(r1) 000000000000004c <.nonleaf_call>: - 4c: 7c 08 02 a6 mflr r0 - 50: fb e1 ff f8 std r31,-8(r1) - 54: f8 01 00 10 std r0,16(r1) - 58: f8 21 ff 71 stdu r1,-144(r1) - 5c: 7c 3f 0b 78 mr r31,r1 - 60: 7c 60 1b 78 mr r0,r3 - 64: 7c 8b 23 78 mr r11,r4 - 68: 90 1f 00 c0 stw r0,192(r31) - 6c: 91 7f 00 c8 stw r11,200(r31) - 70: 90 bf 00 d0 stw r5,208(r31) - 74: 90 df 00 d8 stw r6,216(r31) - 78: 90 ff 00 e0 stw r7,224(r31) - 7c: 91 1f 00 e8 stw r8,232(r31) - 80: 91 3f 00 f0 stw r9,240(r31) - 84: 91 5f 00 f8 stw r10,248(r31) - 88: e8 01 00 00 ld r0,0(r1) - 8c: f8 01 ff 11 stdu r0,-240(r1) - 90: 39 21 00 70 addi r9,r1,112 - 94: f9 3f 00 70 std r9,112(r31) - 98: e9 3f 00 70 ld r9,112(r31) - 9c: 38 09 00 0f addi r0,r9,15 - a0: 78 00 e1 02 rldicl r0,r0,60,4 - a4: 78 00 26 e4 rldicr r0,r0,4,59 - a8: f8 1f 00 70 std r0,112(r31) - ac: e9 3f 00 70 ld r9,112(r31) - b0: 38 00 00 4c li r0,76 - b4: 98 09 00 00 stb r0,0(r9) - b8: 80 1f 00 c8 lwz r0,200(r31) - bc: 7c 08 07 b4 extsw r8,r0 - c0: 80 1f 00 d0 lwz r0,208(r31) - c4: 7c 07 07 b4 extsw r7,r0 - c8: 80 1f 00 d8 lwz r0,216(r31) - cc: 7c 06 07 b4 extsw r6,r0 - d0: 80 1f 00 e0 lwz r0,224(r31) - d4: 7c 09 07 b4 extsw r9,r0 - d8: 80 1f 00 e8 lwz r0,232(r31) - dc: 7c 0b 07 b4 extsw r11,r0 - e0: 80 1f 00 f0 lwz r0,240(r31) - e4: 7c 0a 07 b4 extsw r10,r0 - e8: 80 1f 00 f8 lwz r0,248(r31) - ec: 7c 00 07 b4 extsw r0,r0 - f0: 7d 03 43 78 mr r3,r8 - f4: 7c e4 3b 78 mr r4,r7 - f8: 7c c5 33 78 mr r5,r6 - fc: 7d 26 4b 78 mr r6,r9 - 100: 7d 67 5b 78 mr r7,r11 - 104: 7d 48 53 78 mr r8,r10 - 108: 7c 09 03 78 mr r9,r0 - 10c: 48 00 00 01 bl 10c <.nonleaf_call+0xc0> - 110: e8 21 00 00 ld r1,0(r1) - 114: e8 01 00 10 ld r0,16(r1) - 118: 7c 08 03 a6 mtlr r0 - 11c: eb e1 ff f8 ld r31,-8(r1) - 120: 4e 80 00 20 blr - 124: 00 00 00 00 .long 0x0 - 128: 00 00 00 01 .long 0x1 - 12c: 80 01 00 01 lwz r0,1(r1) + 4c: 7c 08 02 a6 mflr r0 ; | + 50: fb e1 ff f8 std r31,-8(r1) ; | + 54: f8 01 00 10 std r0,16(r1) ; | prolog + 58: f8 21 ff 71 stdu r1,-144(r1) ; | + 5c: 7c 3f 0b 78 mr r31,r1 ; use gpr31 as sort of frame pointer, below + 60: 7c 60 1b 78 mr r0,r3 ; in arg 0 -> gpr0 + 64: 7c 8b 23 78 mr r11,r4 ; in arg 1 -> gpr11 + 68: 90 1f 00 c0 stw r0,192(r31) ; | + 6c: 91 7f 00 c8 stw r11,200(r31) ; | + 70: 90 bf 00 d0 stw r5,208(r31) ; | + 74: 90 df 00 d8 stw r6,216(r31) ; | + 78: 90 ff 00 e0 stw r7,224(r31) ; | all in args -> spill area in prev frame (jump over own frame (144) + linkage area of prev frame (48) = 192) + 7c: 91 1f 00 e8 stw r8,232(r31) ; | + 80: 91 3f 00 f0 stw r9,240(r31) ; | + 84: 91 5f 00 f8 stw r10,248(r31) ; | + 88: e8 01 00 00 ld r0,0(r1) ; fetch back-chain ptr (parent frame's sp) from stack of top by prolog -> gpr0, and ... + 8c: f8 01 ff 11 stdu r0,-240(r1) ; ... update it further up the stack for alloca(220) - with padding to guarantee alignment + 90: 39 21 00 70 addi r9,r1,112 ; | + 94: f9 3f 00 70 std r9,112(r31) ; | + 98: e9 3f 00 70 ld r9,112(r31) ; | + 9c: 38 09 00 0f addi r0,r9,15 ; | start of alloca()'d memory -> gpr9, by ... + a0: 78 00 e1 02 rldicl r0,r0,60,4 ; | ... using gpr0 as helper to align to 16b, leaving at least 112b at top of stack + a4: 78 00 26 e4 rldicr r0,r0,4,59 ; | + a8: f8 1f 00 70 std r0,112(r31) ; | + ac: e9 3f 00 70 ld r9,112(r31) ; | + b0: 38 00 00 4c li r0,76 ; 'L' -> gpr0, and ... + b4: 98 09 00 00 stb r0,0(r9) ; ... store in local area (of alloca()'d space) + b8: 80 1f 00 c8 lwz r0,200(r31) ; prep arg 0 (from prev frame's spill area), ... + bc: 7c 08 07 b4 extsw r8,r0 ; ... -> gpr8 (w/ sign extension b/c int param in 64bit reg) + c0: 80 1f 00 d0 lwz r0,208(r31) ; prep arg 1 (from prev frame's spill area), ... + c4: 7c 07 07 b4 extsw r7,r0 ; ... -> gpr7 + c8: 80 1f 00 d8 lwz r0,216(r31) ; prep arg 2 (from prev frame's spill area), ... + cc: 7c 06 07 b4 extsw r6,r0 ; ... -> gpr6 + d0: 80 1f 00 e0 lwz r0,224(r31) ; prep arg 3 (from prev frame's spill area), ... + d4: 7c 09 07 b4 extsw r9,r0 ; ... -> gpr9 + d8: 80 1f 00 e8 lwz r0,232(r31) ; prep arg 4 (from prev frame's spill area), ... + dc: 7c 0b 07 b4 extsw r11,r0 ; ... -> gpr11 + e0: 80 1f 00 f0 lwz r0,240(r31) ; prep arg 5 (from prev frame's spill area), ... + e4: 7c 0a 07 b4 extsw r10,r0 ; ... -> gpr10 + e8: 80 1f 00 f8 lwz r0,248(r31) ; prep arg 6 (from prev frame's spill area), ... + ec: 7c 00 07 b4 extsw r0,r0 ; ... -> gpr0 + f0: 7d 03 43 78 mr r3,r8 ; arg 0 + f4: 7c e4 3b 78 mr r4,r7 ; arg 1 + f8: 7c c5 33 78 mr r5,r6 ; arg 2 + fc: 7d 26 4b 78 mr r6,r9 ; arg 3 + 100: 7d 67 5b 78 mr r7,r11 ; arg 4 + 104: 7d 48 53 78 mr r8,r10 ; arg 5 + 108: 7c 09 03 78 mr r9,r0 ; arg 6 + 10c: 48 00 00 01 bl 10c <.nonleaf_call+0xc0> ; call and put return address -> lr + 110: e8 21 00 00 ld r1,0(r1) ; | + 114: e8 01 00 10 ld r0,16(r1) ; | + 118: 7c 08 03 a6 mtlr r0 ; | epilog + 11c: eb e1 ff f8 ld r31,-8(r1) ; | + 120: 4e 80 00 20 blr ; | + 124: 00 00 00 00 .long 0x0 ; data + 128: 00 00 00 01 .long 0x1 ; data + 12c: 80 01 00 01 lwz r0,1(r1) ; unsure@@@. data? 0000000000000130 <.main>: - 130: 7c 08 02 a6 mflr r0 - 134: fb e1 ff f8 std r31,-8(r1) - 138: f8 01 00 10 std r0,16(r1) - 13c: f8 21 ff 81 stdu r1,-128(r1) - 140: 7c 3f 0b 78 mr r31,r1 - 144: 38 60 00 00 li r3,0 - 148: 38 80 00 01 li r4,1 - 14c: 38 a0 00 02 li r5,2 - 150: 38 c0 00 03 li r6,3 - 154: 38 e0 00 04 li r7,4 - 158: 39 00 00 05 li r8,5 - 15c: 39 20 00 06 li r9,6 - 160: 39 40 00 07 li r10,7 - 164: 48 00 00 01 bl 164 <.main+0x34> - 168: 38 00 00 00 li r0,0 - 16c: 7c 03 03 78 mr r3,r0 - 170: e8 21 00 00 ld r1,0(r1) - 174: e8 01 00 10 ld r0,16(r1) - 178: 7c 08 03 a6 mtlr r0 - 17c: eb e1 ff f8 ld r31,-8(r1) - 180: 4e 80 00 20 blr - 184: 00 00 00 00 .long 0x0 - 188: 00 00 00 01 .long 0x1 - 18c: 80 01 00 01 lwz r0,1(r1) + 130: 7c 08 02 a6 mflr r0 ; | lr -> gpr0 + 134: fb e1 ff f8 std r31,-8(r1) ; | preseve gpr31 (as used in func as helper addr) + 138: f8 01 00 10 std r0,16(r1) ; | prolog store lr + 13c: f8 21 ff 81 stdu r1,-128(r1) ; | open frame + 140: 7c 3f 0b 78 mr r31,r1 ; use gpr31 as sort of frame pointer, below + 144: 38 60 00 00 li r3,0 ; arg 0 + 148: 38 80 00 01 li r4,1 ; arg 1 + 14c: 38 a0 00 02 li r5,2 ; arg 2 + 150: 38 c0 00 03 li r6,3 ; arg 3 + 154: 38 e0 00 04 li r7,4 ; arg 4 + 158: 39 00 00 05 li r8,5 ; arg 5 + 15c: 39 20 00 06 li r9,6 ; arg 6 + 160: 39 40 00 07 li r10,7 ; arg 7 + 164: 48 00 00 01 bl 164 <.main+0x34> ; call and put return address -> lr + 168: 38 00 00 00 li r0,0 ; return value ... + 16c: 7c 03 03 78 mr r3,r0 ; ... in gpr3 + 170: e8 21 00 00 ld r1,0(r1) ; | + 174: e8 01 00 10 ld r0,16(r1) ; | + 178: 7c 08 03 a6 mtlr r0 ; | epilog + 17c: eb e1 ff f8 ld r31,-8(r1) ; | + 180: 4e 80 00 20 blr ; | + 184: 00 00 00 00 .long 0x0 ; data + 188: 00 00 00 01 .long 0x1 ; data + 18c: 80 01 00 01 lwz r0,1(r1) ; unsure@@@. data? + + + +; ------------- ints and floats, var args, struct return value (meaning implicit first param), more than 8 params (11, with implicit return value ptr) -----------> + +; #include +; #include +; +; void leaf_call(int b, float c, int d, float e, int f, float g, float h, int i, float j) +; { +; } +; +; struct aggr { int x; int y; int z; }; +; +; struct aggr nonleaf_call(int a, int b, float c, int d, float e, int f, ...) +; { +; va_list v; +; int i; +; float g, h, j; +; struct aggr st = { b, d, f }; +; va_start(v, f); +; g = va_arg(v, float); +; h = va_arg(v, float); +; i = va_arg(v, int); +; h = va_arg(v, float); +; /* use some local data */ +; *(char*)alloca(220) = 'L'; +; leaf_call(b, c, d, e, f, g, h, i, j); +; +; return st; +; } +; +; int main() +; { +; struct aggr st = nonleaf_call(0, 1, 2.f, 3, 4.f, 5, 6.f, 7.f, 8, 9.f); +; return 0; +; } + + + +; output from freebsd-11.0-ppc64 w/ gcc 4.2.1 + +0000000000000000 <.leaf_call>: + 0: fb e1 ff f8 std r31,-8(r1) + 4: f8 21 ff c1 stdu r1,-64(r1) + 8: 7c 3f 0b 78 mr r31,r1 + c: 7c 69 1b 78 mr r9,r3 + 10: d0 3f 00 78 stfs f1,120(r31) + 14: 7c ab 2b 78 mr r11,r5 + 18: d0 5f 00 88 stfs f2,136(r31) + 1c: 7c e8 3b 78 mr r8,r7 + 20: d0 7f 00 98 stfs f3,152(r31) + 24: d0 9f 00 a0 stfs f4,160(r31) + 28: 7d 40 53 78 mr r0,r10 + 2c: d0 bf 00 b0 stfs f5,176(r31) + 30: 91 3f 00 70 stw r9,112(r31) + 34: 91 7f 00 80 stw r11,128(r31) + 38: 91 1f 00 90 stw r8,144(r31) + 3c: 90 1f 00 a8 stw r0,168(r31) + 40: e8 21 00 00 ld r1,0(r1) + 44: eb e1 ff f8 ld r31,-8(r1) + 48: 4e 80 00 20 blr + ... + 54: 80 01 00 01 lwz r0,1(r1) + +0000000000000058 <.nonleaf_call>: + 58: fb e1 ff f8 std r31,-8(r1) ; | + 5c: f8 21 ff 91 stdu r1,-112(r1) ; | prolog + 60: 7c 3f 0b 78 mr r31,r1 ; use gpr31 as sort of frame pointer, below + 64: 7c 8b 23 78 mr r11,r4 ; in arg 1 (first explicit arg, b/c of struct return value ptr being arg0) -> r11 + 68: 7c a8 2b 78 mr r8,r5 ; in arg 2 -> r8 (free reg, was skipped for float param) + 6c: d0 3f 00 b8 stfs f1,184(r31) ; | in arg 3 (float) -> prev frame's spill area: 184 = 112 (frame) + 48 (prev frame's linkage area) + 8 (arg 0 = return value ptr) + 16 (first two explicit args) + 70: d0 5f 00 c8 stfs f2,200(r31) ; | in arg 5 (float) -> prev frame's spill area + 74: f9 5f 00 d8 std r10,216(r31) ; | in arg 7 (float, also held in gpr reg b/c vararg) -> prev frame's spill area + 78: 7d 20 4b 78 mr r0,r9 ; | spilling in arg 6 in gpr0 (spilled below) + 7c: 91 7f 00 a8 stw r11,168(r31) ; | in arg 1 (int) -> prev frame's spill area + 80: 91 1f 00 b0 stw r8,176(r31) ; | in arg 2 (int) -> prev frame's spill area + 84: 90 ff 00 c0 stw r7,192(r31) ; | in arg 4 (int) -> prev frame's spill area + 88: 90 1f 00 d0 stw r0,208(r31) ; / in arg 6 (int) -> prev frame's spill area + 8c: 80 1f 00 b0 lwz r0,176(r31) ; \ + 90: 90 1f 00 48 stw r0,72(r31) ; | + 94: 80 1f 00 c0 lwz r0,192(r31) ; | + 98: 90 1f 00 4c stw r0,76(r31) ; | filling struct with 3 int input args + 9c: 80 1f 00 d0 lwz r0,208(r31) ; | + a0: 90 1f 00 50 stw r0,80(r31) ; | + a4: 38 1f 00 d8 addi r0,r31,216 ; + a8: f8 1f 00 40 std r0,64(r31) ; . + ac: 7f e0 00 08 trap ; . + ... ; . + b8: 80 01 00 01 lwz r0,1(r1) ; + +00000000000000bc <.main>: + bc: 7c 08 02 a6 mflr r0 ; | lr -> gpr0 + c0: fb e1 ff f8 std r31,-8(r1) ; | preseve gpr31 (as used in func as helper addr) + c4: f8 01 00 10 std r0,16(r1) ; | prolog store lr + c8: f8 21 ff 41 stdu r1,-192(r1) ; | open frame + cc: 7c 3f 0b 78 mr r31,r1 ; use gpr31 as sort of frame pointer, below + d0: 39 61 00 30 addi r11,r1,48 ; ptr to param area -> r11 + d4: e9 22 00 00 ld r9,0(r2) ; prep arg 3 (=explicit arg 2, b/c of implicit return value pointer), ... + d8: c1 a9 00 00 lfs f13,0(r9) ; ... load from static data -> f13 + dc: e9 22 00 08 ld r9,8(r2) ; prep arg 5, ... + e0: c1 89 00 00 lfs f12,0(r9) ; ... load from static data -> f12 + e4: e9 22 00 10 ld r9,16(r2) ; prep arg 7, ... + e8: c8 09 00 00 lfd f0,0(r9) ; ... load from static data -> f0 + ec: d8 1f 00 a0 stfd f0,160(r31) ; | + f0: e8 1f 00 a0 ld r0,160(r31) ; | + f4: 7c 09 03 78 mr r9,r0 ; | also hold it in f11 (temporarily, before copying to fpr3 below) + f8: 7d 2a 4b 78 mr r10,r9 ; | and gpr10 (instead of skipping that int reg, for straightforward spilling) + fc: f8 1f 00 a0 std r0,160(r31) ; | (uses temp space to copy between fpr and gpr regs) + 100: c8 1f 00 a0 lfd f0,160(r31) ; | + 104: fd 60 00 90 fmr f11,f0 ; | + 108: e9 22 00 18 ld r9,24(r2) ; prep arg 8, ... + 10c: c8 09 00 00 lfd f0,0(r9) ; ... load from static data -> fpr0, and ... + 110: d8 0b 00 40 stfd f0,64(r11) ; ... "pushed" onto stack (in param area past spill area) and ... + 114: c9 4b 00 40 lfd f10,64(r11) ; ... also held in f10 (prep, see where it's used below) + 118: 38 00 00 08 li r0,8 ; arg 9, ... + 11c: f8 0b 00 48 std r0,72(r11) ; ... "pushed" onto stack + 120: e9 22 00 20 ld r9,32(r2) ; arg 10 (float, promoted to double), ... + 124: c8 09 00 00 lfd f0,0(r9) ; ... load from static data -> fpr0, and ... + 128: d8 0b 00 50 stfd f0,80(r11) ; ... "pushed" onto stack + 12c: c8 0b 00 50 lfd f0,80(r11) ; ... also held in f0 (prep, see where it's used below), in theory pointless reload of arg10 -> fpr0 + 130: 38 1f 00 90 addi r0,r31,144 ; ptr to return value struct in local space -> gpr0 + 134: 7c 03 03 78 mr r3,r0 ; arg 0 (this is the pointer to the struct return value) + 138: 38 80 00 00 li r4,0 ; arg 1 + 13c: 38 a0 00 01 li r5,1 ; arg 2 + 140: fc 20 68 90 fmr f1,f13 ; arg 3 (float, in 1st double reg) + 144: 38 e0 00 03 li r7,3 ; arg 4 (skipping gpr6 b/c of float arg) + 148: fc 40 60 90 fmr f2,f12 ; arg 5 (float, in 2nd double reg) + 14c: 39 20 00 05 li r9,5 ; arg 6 (skipping gpr8 b/c of float arg, vararg) + 150: fc 60 58 90 fmr f3,f11 ; arg 7 (float, in 3rd double reg, promoted to double anyways b/c vararg) + 154: fc 80 50 90 fmr f4,f10 ; arg 8 (float, in 4th double reg, promoted to double anyways b/c vararg) + 158: fc a0 00 90 fmr f5,f0 ; arg 10 (float, in 5th double reg, promoted to double anyways b/c vararg) + 15c: 48 00 00 01 bl 15c <.main+0xa0> ; call and put return address -> lr + 160: 38 00 00 00 li r0,0 ; return value ... + 164: 7c 03 03 78 mr r3,r0 ; ... in gpr3 + 168: e8 21 00 00 ld r1,0(r1) ; | + 16c: e8 01 00 10 ld r0,16(r1) ; | + 170: 7c 08 03 a6 mtlr r0 ; | epilog + 174: eb e1 ff f8 ld r31,-8(r1) ; | + 178: 4e 80 00 20 blr ; | + 17c: 00 00 00 00 .long 0x0 ; data + 180: 00 00 00 01 .long 0x1 ; data + 184: 80 01 00 01 lwz r0,1(r1) ; unsure@@@. data? ; vim: ft=asm diff -r 4e6f63b7020e -r 74c056b597b7 doc/disas_examples/sparc64.sparc64.disas --- a/doc/disas_examples/sparc64.sparc64.disas Fri Nov 22 23:28:17 2019 +0100 +++ b/doc/disas_examples/sparc64.sparc64.disas Sat Nov 23 13:51:35 2019 +0100 @@ -499,7 +499,7 @@ 90: c4 27 a7 fb st %g2, [ %fp + 0x7fb ] ; ... copied to local space (0x7fb - bias = -4) helper var (probably int g) 94: 82 00 60 08 add %g1, 8, %g1 ; point read ptr in g1 to second unnamed param (float, promoted to double), ... 98: c2 77 a7 e7 stx %g1, [ %fp + 0x7e7 ] ; ... store in local space (0x7fb - bias = -24) - 9c: 91 d0 20 05 ta 5 ; trap - not sure what else is involved (objdump was made from .o, not finally linked exec) + 9c: 91 d0 20 05 ta 5 ; trap - not sure what else is involved (objdump was made from .o, not finally linked exec) - maybe just b/c objdump skipped this for the output? 00000000000000a0
: a0: 9d e3 bf 30 save %sp, -208, %sp ; prolog diff -r 4e6f63b7020e -r 74c056b597b7 doc/manual/callconvs/callconv_ppc32.tex --- a/doc/manual/callconvs/callconv_ppc32.tex Fri Nov 22 23:28:17 2019 +0100 +++ b/doc/manual/callconvs/callconv_ppc32.tex Sat Nov 23 13:51:35 2019 +0100 @@ -28,9 +28,10 @@ \item Word size is 32 bits \item Big endian (MSB) and litte endian (LSB) operating modes. \item Processor operates on floats in double precision floating point arithmetc (IEEE-754) values directly (single precision is converted on the fly) -\item Apple Mac OS X/Darwin PPC is specified in "Mac OS X ABI Function Call Guide"\cite{ppcMacOSX}. It uses Big Endian (MSB). -\item Linux PPC 32-bit ABI is specified in "LSB for PPC"\cite{ppc32LSB} which is based on "System V ABI". It uses Big Endian (MSB). -\item PowerPC EABI is defined in the "PowerPC Embedded Application Binary Interface 32-Bit Implementation". +\item Apple macos/Mac OS X/Darwin PPC is specified in "Mac OS X ABI Function Call Guide"\cite{ppcMacOSX}. It uses Big Endian (MSB) +\item Linux PPC 32-bit ABI is specified in "LSB for PPC"\cite{ppc32LSB} which is based on "System V ABI". It uses Big Endian (MSB) +\item PowerPC EABI is defined in the "PowerPC Embedded Application Binary Interface 32-Bit Implementation"\cite{ppceabi} +\item There is also the "PowerOpen ABI"\cite{poabi}, a nearly identical version of it is used in AIX % more info: http://www.ingallegri.com/public/ppc.html \end{itemize} \paragraph{\product{dyncall} support} @@ -76,19 +77,19 @@ \item stack parameter order: right-to-left \item caller cleans up the stack \item the first 8 integer parameters are passed in registers gpr3-gpr10 -\item the first 12 floating point parameters are passed in registers fpr1-fpr13 +\item the first 13 floating point parameters are passed in registers fpr1-fpr13 \item 64 bit arguments are passed as if they were two 32 bit arguments, without skipping registers for alignment (this means passing half via a register and half via the stack is allowed) \item if a float parameter is passed via a register, gpr registers are skipped for subsequent integer parameters (based on the size of the float - 1 register for single precision and 2 for double precision floating point values) \item the caller pushes subsequent parameters onto the stack \item for every parameter passed via a register, space is reserved in the stack parameter area (in order to spill the parameters if needed - e.g. varargs) -\item ellipsis calls take floating point values in int and float registers (single precision floats are promoted to double precision -as defined for ellipsis calls) +\item ellipsis calls take floating point values in int and float registers (single precision floats are promoted to double precision as +required by ellipsis calls) \item all nonvector parameters are aligned on 4-byte boundaries \item vector parameters are aligned on 16-byte boundaries \item composite parameters with size of 1 or 2 bytes occupy low-order bytes of their 4-byte area. INCONSISTENT with other 32-bit PPC -binary interfaces. In AIX and OS 9, padding bytes always follow the data structure +binary interfaces. In AIX and mac OS 9, padding bytes always follow the data structure \item composite parameters 3 bytes or larger in size occupy high-order bytes \item integer parameters \textless\ 32 bit are right-justified (meaning occupy higher-address bytes) in their 4-byte slot on the stack, requiring extra-care for big-endian targets \end{itemize} diff -r 4e6f63b7020e -r 74c056b597b7 doc/manual/callconvs/callconv_ppc64.tex --- a/doc/manual/callconvs/callconv_ppc64.tex Fri Nov 22 23:28:17 2019 +0100 +++ b/doc/manual/callconvs/callconv_ppc64.tex Sat Nov 23 13:51:35 2019 +0100 @@ -25,7 +25,7 @@ \paragraph{Overview} \begin{itemize} -\item Word size is 64 bits +\item Word size is 32 bits for historical reasons \item Big endian (MSB) and litte endian (LSB) operating modes. \item Apple Mac OS X/Darwin PPC is specified in "Mac OS X ABI Function Call Guide"\cite{ppcMacOSX}. It uses Big Endian (MSB). \item Linux PPC 64-bit ABI is specified in "64-bit PowerPC ELF Application Binary Interface Supplement"\cite{ppcelf64abi} which is based on "System V ABI". @@ -40,23 +40,115 @@ \paragraph{Registers and register usage} -@@@ +\begin{table}[h] +\begin{tabular*}{0.95\textwidth}{3 B} +Name & Brief description\\ +\hline +{\bf gpr0} & scratch\\ +{\bf gpr1} & stack pointer\\ +{\bf gpr2} & TOC base ptr (offset table and data for position independent code), scratch\\ +{\bf gpr3} & return value, parameter 0 for integer or pointer, scratch\\ +{\bf gpr4-gpr10} & parameter 1-7 for integer or pointer parameters, scratch\\ +{\bf gpr11} & env pointer if needed, scratch\\ +{\bf gpr12} & used for exception handling and glink code, scratch\\ +{\bf gpr13} & used for system thread ID, preserve\\ +{\bf gpr14-31} & preserve\\ +{\bf fpr0} & scratch\\ +{\bf fpr1-fpr4} & floating point return value, floating point parameter 0-3 (always double precision)\\ +{\bf fpr5-fpr13} & floating point parameters 4-12 (always double precision)\\ +{\bf fpr14-fpr31} & preserve\\ +{\bf v0-v1} & scratch\\ +{\bf v2-v13} & vector parameters\\ +{\bf v14-v19} & scratch\\ +{\bf v20-v31} & preserve\\ +{\bf lr} & link-register, scratch\\ +{\bf ctr} & count-register, scratch\\ +{\bf xer} & fixed point exception register, scratch\\ +{\bf fpscr} & floating point status and control register, scratch\\ +{\bf cr0-cr7} & conditional register fields, each 4-bit wide (cr0-cr1 and cr5-cr7 are scratch)\\ +\end{tabular*} +\caption{Register usage on PowerPC 64-Bit ELF ABI} +\end{table} \paragraph{Parameter passing} -@@@ \begin{itemize} +\item stack grows down +\item stack parameter order: right-to-left +\item caller cleans up the stack +\item stack is always 16 byte aligned +\item the stack pointer must be atomically updated (to avoid any timing window in which an interrupt can occur with a partially updated stack), usually with the stdu (store doubleword with update) instruction +\item the first 8 integer parameters are passed in registers gpr3-gpr10 +\item the first 13 floating point parameters are passed in registers fpr1-fpr13 +\item preserved registers are saved using a defined order (from high to low addresses): + fpr* (64bit aligned), + gpr*, + VRSAVE save word (32 bits), + padding for alignment (4 or 12 bytes), + v* (128bit aligned) +\item if a floating point parameter is passed via a register, a gpr registers is skipped for subsequent integer parameters +\item the caller pushes subsequent parameters onto the stack +\item single precision floating point values use the second word in a doubleword +\item a quad precision floating point argument is passed as two consecutive double precision ones +\item integer types \textless\ 64 bit are sign or zero extended and use a doubleword +\item ellipsis calls take floating point values in int and float registers (single precision floats are promoted to double precision as +required by ellipsis calls) +\item space for all potential gpr* register passed arguments is reserved in the stack parameter area (in order to spill the parameters if +needed - e.g. varargs), meaning a minimum of 64 bytes to hold gpr3-gpr10 +\item all nonvector parameters are aligned on 8-byte boundaries +\item vector parameters are aligned on 16-byte boundaries \item integer parameters \textless\ 64 bit are right-justified (meaning occupy higher-address bytes) in their 8-byte slot on the stack, requiring extra-care for big-endian targets \end{itemize} \paragraph{Return values} -@@@ +\begin{itemize} +\item return values of integer \textless=\ 32bit or pointer type use gpr3 and are zero or sign extended depending on their type +\item 64 bit integers use gpr3 +\item floating point values are returned via fpr1 +\item character arrays \textless=\ 8 bytes use gpr3, and are right justified +\item for all structs/unions (regardless of size) or character arrays \textgreater\ 8 bytes, a secret first parameter with an address to a caller allocated space is passed as first argument to the function (meaning in gpr3), which is written to by the callee +\end{itemize} \paragraph{Stack layout} -@@@ +Stack frame is always 16-byte aligned. +% verified/amended: TP nov 2019 (see also doc/disas_examples/ppc64.elfabi.disas) +Stack directly after function prolog:\\ +\begin{figure}[h] +\begin{tabular}{5|3|1 1} + & \vdots & & \\ +\hhline{~=~~} +register save area & \hspace{4cm} & & \mrrbrace{14}{caller's frame} \\ +\hhline{~-~~} +local data & & & \\ +\hhline{~-~~} +\mrlbrace{6}{parameter area} & last arg & \mrrbrace{3}{stack parameters} & \\ + & \ldots & & \\ + & arg 8 & & \\ + & gpr10 & \mrrbrace{3}{spill area (as needed)} & \\ + & \ldots & & \\ + & gpr3 & & \\ +\hhline{~-~~} +\mrlbrace{6}{linkage area} & TOC ptr reg & & \\ + & reserved & & \\ + & reserved & & \\ + & return address (callee saved) & & \\ + & condition reg (callee saved) & & \\ + & parent stack frame pointer & & \\ +\hhline{~=~~} +register save area & & & \mrrbrace{4}{current frame} \\ +\hhline{~-~~} +local data & & & \\ +\hhline{~-~~} +parameter area & & & \\ +\hhline{~-~~} +linkage area & \vdots & & \\ +\end{tabular} +\caption{Stack layout on ppc64 ELF ABI} +\end{figure} + diff -r 4e6f63b7020e -r 74c056b597b7 doc/manual/callconvs/callconv_x64.tex --- a/doc/manual/callconvs/callconv_x64.tex Fri Nov 22 23:28:17 2019 +0100 +++ b/doc/manual/callconvs/callconv_x64.tex Sat Nov 23 13:51:35 2019 +0100 @@ -97,8 +97,8 @@ \item caller cleans up the stack, not the callee (like cdecl) \item stack is always 16byte aligned - since return address is 64 bits in size, stacks with an odd number of parameters are already aligned -\item ellipsis calls take floating point values in int and float registers (single precision floats are promoted to double precision -as defined for ellipsis calls) +\item ellipsis calls take floating point values in int and float registers (single precision floats are promoted to double precision as +required by ellipsis calls) \item if size of parameters \textgreater\ 1 page of memory (usually between 4k and 64k), chkstk must be called \end{itemize}