Mercurial > pub > dyncall > dyncall
changeset 148:12729fd52ab7
- performance improvement and more correct handling of arm32 armhf calls, not copying bogus data just to reserve spill area space
- added armhf callconv stack layout diagram to doc
author | cslag |
---|---|
date | Sun, 11 Sep 2016 01:19:27 +0200 |
parents | 8ce75240a0f1 |
children | c12120a1fbc0 |
files | doc/manual/callconvs/callconv_arm32.tex dyncall/dyncall_call_arm32_arm_armhf.S |
diffstat | 2 files changed, 54 insertions(+), 25 deletions(-) [+] |
line wrap: on
line diff
--- a/doc/manual/callconvs/callconv_arm32.tex Fri Sep 09 15:23:16 2016 +0200 +++ b/doc/manual/callconvs/callconv_arm32.tex Sun Sep 11 01:19:27 2016 +0200 @@ -333,6 +333,7 @@ \item parameters \textless=\ 32 bits are passed as 32 bit words \item structures and unions are passed by value, with the first four words of the parameters in r0-r3 @@@?check doc \item if return value is a structure, a pointer pointing to the return value's space is passed in r0, the first parameter in r1, etc. (see {\bf return values}) +\item callee spills, caller reserves spill area space, though \end{itemize} \paragraph{Return values} @@ -344,7 +345,37 @@ \item if return value is a structure, the caller allocates space for the return value on the stack in its frame and passes a pointer to it in r0 \end{itemize} +\paragraph{Stack layout} +Stack directly after function prolog:\\ + +\begin{figure}[h] +\begin{tabular}{5|3|1 1} +\hhline{~-~~} + & \vdots & & \\ +\hhline{~=~~} +register save area & \hspace{4cm} & & \mrrbrace{6}{caller's frame} \\ +\hhline{~-~~} +local data & & & \\ +\hhline{~-~~} +\mrlbrace{7}{parameter area} & r0-r3 & \mrrbrace{1}{spill area (if needed)} & \\ +\hhline{~-~~} + & \ldots & \mrrbrace{3}{stack parameters} & \\ + & \ldots & & \\ + & \ldots & & \\ +\hhline{~=~~} +register save area (with return address) & & & \mrrbrace{3}{current frame} \\ +\hhline{~-~~} +local data & & & \\ +\hhline{~-~~} +parameter area & \vdots & & \\ +\hhline{~-~~} +\end{tabular} +\caption{Stack layout on arm32 armhf} +\end{figure} + + +\newpage \subsubsection{Architectures}
--- a/dyncall/dyncall_call_arm32_arm_armhf.S Fri Sep 09 15:23:16 2016 +0200 +++ b/dyncall/dyncall_call_arm32_arm_armhf.S Sun Sep 11 01:19:27 2016 +0200 @@ -51,38 +51,36 @@ ENTRY_C(dcCall_arm32_armhf) /* Prolog. This function never needs to spill inside its prolog, so just store the permanent registers. */ - mov r12 , r13 /* Stack ptr (r13) -> temporary (r12). */ - stmdb r13!, {r4-r5, r11, r12, r14} /* Permanent registers and stack pointer (now in r12), etc... -> save area on stack (except counter). */ + mov r12 , r13 /* Stack ptr (r13) -> temporary (r12). */ + stmdb r13!, {r4-r5, r11, r12, r14} /* Permanent registers and stack pointer (now in r12), etc... -> save area on stack (except counter). */ - mov r11 , r12 /* Set frame ptr. */ - mov r4 , r0 /* r4 = 'fptr' (1st argument is passed in r0). */ - mov r5 , r1 /* r5 = 'args' (2nd argument is passed in r1). */ - + mov r11, r12 /* Set frame ptr. */ + mov r4, r0 /* r4 = fptr */ + add r5, r1, #16 /* r5 = stack args (after intreg ones) */ + /* Load 16 single-precision registers (= 8 double-precision registers). */ - fldmiad r3, {d0-d7} + fldmiad r3, {d0-d7} - /* nope, or varargs will not work: sub r2 , r2 , #16 /* skip spill area */ - cmp r2, #0 - ble armhf_call - - sub r13, r13, r2 - and r13, r13, #-8 /* align 8-byte. */ + /* prep stack parameter area (includes room for spill area, callee spills if needed) */ + sub r13, r13, r2 + and r13, r13, #-8 /* align 8-byte. */ - mov r3, #0 /* Init byte counter. */ - add r1 , r1 , #16 + sub r2, r2, #16 /* loop counters for stack params to copy */ + mov r3, #0 armhf_pushArgs: - ldr r0, [r1, +r3] /* Load word into r0. */ - str r0, [r13, +r3] /* Push word onto stack. */ - add r3, r3, #4 /* Increment byte counter. */ - cmp r2, r3 - bne armhf_pushArgs + cmp r2, r3 + ble armhf_call + ldr r0, [r5, +r3] /* load word into r0 ... */ + str r0, [r13, +r3] /* ... then push onto stack */ + add r3, r3, #4 + b armhf_pushArgs armhf_call: - ldmia r5, {r0-r3} /* Load first 4 arguments for new call into r0-r3. */ - /* 'blx %r4' workaround for ARMv4t: */ - mov r14, r15 /* Branch return address(r15) -> link register (r14) -- r15 always points to address of current + 2 instructions (= Epilog code). */ - bx r4 /* Call (ARM/THUMB), available for ARMv4t. */ + ldmia r1, {r0-r3} /* Load first 4 arguments for new call into r0-r3. */ + /* 'blx %r4' workaround for ARMv4t: */ + mov r14, r15 /* Branch return address(r15) -> link register (r14) -- r15 always points to address of current + 2 instructions (= Epilog code). */ + bx r4 /* Call (ARM/THUMB), available for ARMv4t. */ /* Epilog. */ - ldmdb r11, {r4-r5, r11, r13, r15} /* Restore permanent registers (ignore temporary (r12), restore stack ptr and program counter).@@@db not needed since we rewrite r13? */ + ldmdb r11, {r4-r5, r11, r13, r15} /* Restore permanent registers (ignore temporary (r12), restore stack ptr and program counter).@@@db not needed since we rewrite r13? */