Mercurial > pub > dyncall > dyncall
view doc/disas_examples/ppc64.elfabi.disas @ 357:d982a00c2177
- PPC64 asm syntax fix, specifying explicitly comparison mode for cmpi (newer toolchains complain, older ones took optional field of instruction which happened to be same value)
author | Tassilo Philipp |
---|---|
date | Tue, 25 Feb 2020 18:16:13 +0100 |
parents | 74c056b597b7 |
children | ead041d93e36 |
line wrap: on
line source
; #include <stdlib.h> ; ; void leaf_call(int b, int c, int d, int e, int f, int g, int h) ; { ; } ; ; void nonleaf_call(int a, int b, int c, int d, int e, int f, int g, int h) ; { ; /* use some local data */ ; *(char*)alloca(220) = 'L'; ; leaf_call(b, c, d, e, f, g, h); ; } ; ; int main() ; { ; nonleaf_call(0, 1, 2, 3, 4, 5, 6, 7); ; return 0; ; } ; output from freebsd-11.0-ppc64 w/ gcc 4.2.1 0000000000000000 <.leaf_call>: 0: fb e1 ff f8 std r31,-8(r1) 4: f8 21 ff c1 stdu r1,-64(r1) 8: 7c 3f 0b 78 mr r31,r1 c: 7c 60 1b 78 mr r0,r3 10: 7c 8b 23 78 mr r11,r4 14: 7c aa 2b 78 mr r10,r5 18: 90 1f 00 70 stw r0,112(r31) 1c: 91 7f 00 78 stw r11,120(r31) 20: 91 5f 00 80 stw r10,128(r31) 24: 90 df 00 88 stw r6,136(r31) 28: 90 ff 00 90 stw r7,144(r31) 2c: 91 1f 00 98 stw r8,152(r31) 30: 91 3f 00 a0 stw r9,160(r31) 34: e8 21 00 00 ld r1,0(r1) 38: eb e1 ff f8 ld r31,-8(r1) 3c: 4e 80 00 20 blr ... 48: 80 01 00 01 lwz r0,1(r1) 000000000000004c <.nonleaf_call>: 4c: 7c 08 02 a6 mflr r0 ; | 50: fb e1 ff f8 std r31,-8(r1) ; | 54: f8 01 00 10 std r0,16(r1) ; | prolog 58: f8 21 ff 71 stdu r1,-144(r1) ; | 5c: 7c 3f 0b 78 mr r31,r1 ; use gpr31 as sort of frame pointer, below 60: 7c 60 1b 78 mr r0,r3 ; in arg 0 -> gpr0 64: 7c 8b 23 78 mr r11,r4 ; in arg 1 -> gpr11 68: 90 1f 00 c0 stw r0,192(r31) ; | 6c: 91 7f 00 c8 stw r11,200(r31) ; | 70: 90 bf 00 d0 stw r5,208(r31) ; | 74: 90 df 00 d8 stw r6,216(r31) ; | 78: 90 ff 00 e0 stw r7,224(r31) ; | all in args -> spill area in prev frame (jump over own frame (144) + linkage area of prev frame (48) = 192) 7c: 91 1f 00 e8 stw r8,232(r31) ; | 80: 91 3f 00 f0 stw r9,240(r31) ; | 84: 91 5f 00 f8 stw r10,248(r31) ; | 88: e8 01 00 00 ld r0,0(r1) ; fetch back-chain ptr (parent frame's sp) from stack of top by prolog -> gpr0, and ... 8c: f8 01 ff 11 stdu r0,-240(r1) ; ... update it further up the stack for alloca(220) - with padding to guarantee alignment 90: 39 21 00 70 addi r9,r1,112 ; | 94: f9 3f 00 70 std r9,112(r31) ; | 98: e9 3f 00 70 ld r9,112(r31) ; | 9c: 38 09 00 0f addi r0,r9,15 ; | start of alloca()'d memory -> gpr9, by ... a0: 78 00 e1 02 rldicl r0,r0,60,4 ; | ... using gpr0 as helper to align to 16b, leaving at least 112b at top of stack a4: 78 00 26 e4 rldicr r0,r0,4,59 ; | a8: f8 1f 00 70 std r0,112(r31) ; | ac: e9 3f 00 70 ld r9,112(r31) ; | b0: 38 00 00 4c li r0,76 ; 'L' -> gpr0, and ... b4: 98 09 00 00 stb r0,0(r9) ; ... store in local area (of alloca()'d space) b8: 80 1f 00 c8 lwz r0,200(r31) ; prep arg 0 (from prev frame's spill area), ... bc: 7c 08 07 b4 extsw r8,r0 ; ... -> gpr8 (w/ sign extension b/c int param in 64bit reg) c0: 80 1f 00 d0 lwz r0,208(r31) ; prep arg 1 (from prev frame's spill area), ... c4: 7c 07 07 b4 extsw r7,r0 ; ... -> gpr7 c8: 80 1f 00 d8 lwz r0,216(r31) ; prep arg 2 (from prev frame's spill area), ... cc: 7c 06 07 b4 extsw r6,r0 ; ... -> gpr6 d0: 80 1f 00 e0 lwz r0,224(r31) ; prep arg 3 (from prev frame's spill area), ... d4: 7c 09 07 b4 extsw r9,r0 ; ... -> gpr9 d8: 80 1f 00 e8 lwz r0,232(r31) ; prep arg 4 (from prev frame's spill area), ... dc: 7c 0b 07 b4 extsw r11,r0 ; ... -> gpr11 e0: 80 1f 00 f0 lwz r0,240(r31) ; prep arg 5 (from prev frame's spill area), ... e4: 7c 0a 07 b4 extsw r10,r0 ; ... -> gpr10 e8: 80 1f 00 f8 lwz r0,248(r31) ; prep arg 6 (from prev frame's spill area), ... ec: 7c 00 07 b4 extsw r0,r0 ; ... -> gpr0 f0: 7d 03 43 78 mr r3,r8 ; arg 0 f4: 7c e4 3b 78 mr r4,r7 ; arg 1 f8: 7c c5 33 78 mr r5,r6 ; arg 2 fc: 7d 26 4b 78 mr r6,r9 ; arg 3 100: 7d 67 5b 78 mr r7,r11 ; arg 4 104: 7d 48 53 78 mr r8,r10 ; arg 5 108: 7c 09 03 78 mr r9,r0 ; arg 6 10c: 48 00 00 01 bl 10c <.nonleaf_call+0xc0> ; call and put return address -> lr 110: e8 21 00 00 ld r1,0(r1) ; | 114: e8 01 00 10 ld r0,16(r1) ; | 118: 7c 08 03 a6 mtlr r0 ; | epilog 11c: eb e1 ff f8 ld r31,-8(r1) ; | 120: 4e 80 00 20 blr ; | 124: 00 00 00 00 .long 0x0 ; data 128: 00 00 00 01 .long 0x1 ; data 12c: 80 01 00 01 lwz r0,1(r1) ; unsure@@@. data? 0000000000000130 <.main>: 130: 7c 08 02 a6 mflr r0 ; | lr -> gpr0 134: fb e1 ff f8 std r31,-8(r1) ; | preseve gpr31 (as used in func as helper addr) 138: f8 01 00 10 std r0,16(r1) ; | prolog store lr 13c: f8 21 ff 81 stdu r1,-128(r1) ; | open frame 140: 7c 3f 0b 78 mr r31,r1 ; use gpr31 as sort of frame pointer, below 144: 38 60 00 00 li r3,0 ; arg 0 148: 38 80 00 01 li r4,1 ; arg 1 14c: 38 a0 00 02 li r5,2 ; arg 2 150: 38 c0 00 03 li r6,3 ; arg 3 154: 38 e0 00 04 li r7,4 ; arg 4 158: 39 00 00 05 li r8,5 ; arg 5 15c: 39 20 00 06 li r9,6 ; arg 6 160: 39 40 00 07 li r10,7 ; arg 7 164: 48 00 00 01 bl 164 <.main+0x34> ; call and put return address -> lr 168: 38 00 00 00 li r0,0 ; return value ... 16c: 7c 03 03 78 mr r3,r0 ; ... in gpr3 170: e8 21 00 00 ld r1,0(r1) ; | 174: e8 01 00 10 ld r0,16(r1) ; | 178: 7c 08 03 a6 mtlr r0 ; | epilog 17c: eb e1 ff f8 ld r31,-8(r1) ; | 180: 4e 80 00 20 blr ; | 184: 00 00 00 00 .long 0x0 ; data 188: 00 00 00 01 .long 0x1 ; data 18c: 80 01 00 01 lwz r0,1(r1) ; unsure@@@. data? ; ------------- ints and floats, var args, struct return value (meaning implicit first param), more than 8 params (11, with implicit return value ptr) -----------> ; #include <stdlib.h> ; #include <stdarg.h> ; ; void leaf_call(int b, float c, int d, float e, int f, float g, float h, int i, float j) ; { ; } ; ; struct aggr { int x; int y; int z; }; ; ; struct aggr nonleaf_call(int a, int b, float c, int d, float e, int f, ...) ; { ; va_list v; ; int i; ; float g, h, j; ; struct aggr st = { b, d, f }; ; va_start(v, f); ; g = va_arg(v, float); ; h = va_arg(v, float); ; i = va_arg(v, int); ; h = va_arg(v, float); ; /* use some local data */ ; *(char*)alloca(220) = 'L'; ; leaf_call(b, c, d, e, f, g, h, i, j); ; ; return st; ; } ; ; int main() ; { ; struct aggr st = nonleaf_call(0, 1, 2.f, 3, 4.f, 5, 6.f, 7.f, 8, 9.f); ; return 0; ; } ; output from freebsd-11.0-ppc64 w/ gcc 4.2.1 0000000000000000 <.leaf_call>: 0: fb e1 ff f8 std r31,-8(r1) 4: f8 21 ff c1 stdu r1,-64(r1) 8: 7c 3f 0b 78 mr r31,r1 c: 7c 69 1b 78 mr r9,r3 10: d0 3f 00 78 stfs f1,120(r31) 14: 7c ab 2b 78 mr r11,r5 18: d0 5f 00 88 stfs f2,136(r31) 1c: 7c e8 3b 78 mr r8,r7 20: d0 7f 00 98 stfs f3,152(r31) 24: d0 9f 00 a0 stfs f4,160(r31) 28: 7d 40 53 78 mr r0,r10 2c: d0 bf 00 b0 stfs f5,176(r31) 30: 91 3f 00 70 stw r9,112(r31) 34: 91 7f 00 80 stw r11,128(r31) 38: 91 1f 00 90 stw r8,144(r31) 3c: 90 1f 00 a8 stw r0,168(r31) 40: e8 21 00 00 ld r1,0(r1) 44: eb e1 ff f8 ld r31,-8(r1) 48: 4e 80 00 20 blr ... 54: 80 01 00 01 lwz r0,1(r1) 0000000000000058 <.nonleaf_call>: 58: fb e1 ff f8 std r31,-8(r1) ; | 5c: f8 21 ff 91 stdu r1,-112(r1) ; | prolog 60: 7c 3f 0b 78 mr r31,r1 ; use gpr31 as sort of frame pointer, below 64: 7c 8b 23 78 mr r11,r4 ; in arg 1 (first explicit arg, b/c of struct return value ptr being arg0) -> r11 68: 7c a8 2b 78 mr r8,r5 ; in arg 2 -> r8 (free reg, was skipped for float param) 6c: d0 3f 00 b8 stfs f1,184(r31) ; | in arg 3 (float) -> prev frame's spill area: 184 = 112 (frame) + 48 (prev frame's linkage area) + 8 (arg 0 = return value ptr) + 16 (first two explicit args) 70: d0 5f 00 c8 stfs f2,200(r31) ; | in arg 5 (float) -> prev frame's spill area 74: f9 5f 00 d8 std r10,216(r31) ; | in arg 7 (float, also held in gpr reg b/c vararg) -> prev frame's spill area 78: 7d 20 4b 78 mr r0,r9 ; | spilling in arg 6 in gpr0 (spilled below) 7c: 91 7f 00 a8 stw r11,168(r31) ; | in arg 1 (int) -> prev frame's spill area 80: 91 1f 00 b0 stw r8,176(r31) ; | in arg 2 (int) -> prev frame's spill area 84: 90 ff 00 c0 stw r7,192(r31) ; | in arg 4 (int) -> prev frame's spill area 88: 90 1f 00 d0 stw r0,208(r31) ; / in arg 6 (int) -> prev frame's spill area 8c: 80 1f 00 b0 lwz r0,176(r31) ; \ 90: 90 1f 00 48 stw r0,72(r31) ; | 94: 80 1f 00 c0 lwz r0,192(r31) ; | 98: 90 1f 00 4c stw r0,76(r31) ; | filling struct with 3 int input args 9c: 80 1f 00 d0 lwz r0,208(r31) ; | a0: 90 1f 00 50 stw r0,80(r31) ; | a4: 38 1f 00 d8 addi r0,r31,216 ; a8: f8 1f 00 40 std r0,64(r31) ; . ac: 7f e0 00 08 trap ; . ... ; . b8: 80 01 00 01 lwz r0,1(r1) ; 00000000000000bc <.main>: bc: 7c 08 02 a6 mflr r0 ; | lr -> gpr0 c0: fb e1 ff f8 std r31,-8(r1) ; | preseve gpr31 (as used in func as helper addr) c4: f8 01 00 10 std r0,16(r1) ; | prolog store lr c8: f8 21 ff 41 stdu r1,-192(r1) ; | open frame cc: 7c 3f 0b 78 mr r31,r1 ; use gpr31 as sort of frame pointer, below d0: 39 61 00 30 addi r11,r1,48 ; ptr to param area -> r11 d4: e9 22 00 00 ld r9,0(r2) ; prep arg 3 (=explicit arg 2, b/c of implicit return value pointer), ... d8: c1 a9 00 00 lfs f13,0(r9) ; ... load from static data -> f13 dc: e9 22 00 08 ld r9,8(r2) ; prep arg 5, ... e0: c1 89 00 00 lfs f12,0(r9) ; ... load from static data -> f12 e4: e9 22 00 10 ld r9,16(r2) ; prep arg 7, ... e8: c8 09 00 00 lfd f0,0(r9) ; ... load from static data -> f0 ec: d8 1f 00 a0 stfd f0,160(r31) ; | f0: e8 1f 00 a0 ld r0,160(r31) ; | f4: 7c 09 03 78 mr r9,r0 ; | also hold it in f11 (temporarily, before copying to fpr3 below) f8: 7d 2a 4b 78 mr r10,r9 ; | and gpr10 (instead of skipping that int reg, for straightforward spilling) fc: f8 1f 00 a0 std r0,160(r31) ; | (uses temp space to copy between fpr and gpr regs) 100: c8 1f 00 a0 lfd f0,160(r31) ; | 104: fd 60 00 90 fmr f11,f0 ; | 108: e9 22 00 18 ld r9,24(r2) ; prep arg 8, ... 10c: c8 09 00 00 lfd f0,0(r9) ; ... load from static data -> fpr0, and ... 110: d8 0b 00 40 stfd f0,64(r11) ; ... "pushed" onto stack (in param area past spill area) and ... 114: c9 4b 00 40 lfd f10,64(r11) ; ... also held in f10 (prep, see where it's used below) 118: 38 00 00 08 li r0,8 ; arg 9, ... 11c: f8 0b 00 48 std r0,72(r11) ; ... "pushed" onto stack 120: e9 22 00 20 ld r9,32(r2) ; arg 10 (float, promoted to double), ... 124: c8 09 00 00 lfd f0,0(r9) ; ... load from static data -> fpr0, and ... 128: d8 0b 00 50 stfd f0,80(r11) ; ... "pushed" onto stack 12c: c8 0b 00 50 lfd f0,80(r11) ; ... also held in f0 (prep, see where it's used below), in theory pointless reload of arg10 -> fpr0 130: 38 1f 00 90 addi r0,r31,144 ; ptr to return value struct in local space -> gpr0 134: 7c 03 03 78 mr r3,r0 ; arg 0 (this is the pointer to the struct return value) 138: 38 80 00 00 li r4,0 ; arg 1 13c: 38 a0 00 01 li r5,1 ; arg 2 140: fc 20 68 90 fmr f1,f13 ; arg 3 (float, in 1st double reg) 144: 38 e0 00 03 li r7,3 ; arg 4 (skipping gpr6 b/c of float arg) 148: fc 40 60 90 fmr f2,f12 ; arg 5 (float, in 2nd double reg) 14c: 39 20 00 05 li r9,5 ; arg 6 (skipping gpr8 b/c of float arg, vararg) 150: fc 60 58 90 fmr f3,f11 ; arg 7 (float, in 3rd double reg, promoted to double anyways b/c vararg) 154: fc 80 50 90 fmr f4,f10 ; arg 8 (float, in 4th double reg, promoted to double anyways b/c vararg) 158: fc a0 00 90 fmr f5,f0 ; arg 10 (float, in 5th double reg, promoted to double anyways b/c vararg) 15c: 48 00 00 01 bl 15c <.main+0xa0> ; call and put return address -> lr 160: 38 00 00 00 li r0,0 ; return value ... 164: 7c 03 03 78 mr r3,r0 ; ... in gpr3 168: e8 21 00 00 ld r1,0(r1) ; | 16c: e8 01 00 10 ld r0,16(r1) ; | 170: 7c 08 03 a6 mtlr r0 ; | epilog 174: eb e1 ff f8 ld r31,-8(r1) ; | 178: 4e 80 00 20 blr ; | 17c: 00 00 00 00 .long 0x0 ; data 180: 00 00 00 01 .long 0x1 ; data 184: 80 01 00 01 lwz r0,1(r1) ; unsure@@@. data? ; vim: ft=asm