0
|
1 /*
|
|
2
|
|
3 Package: dyncall
|
|
4 Library: dyncall
|
|
5 File: dyncall/dyncall_call_arm32_thumb_armhf.S
|
|
6 Description: Call Kernel for ARM 32-bit ARM Architecture - Hard Float in Thumb code
|
|
7 License:
|
|
8
|
281
|
9 Copyright (c) 2007-2018 Daniel Adler <dadler@uni-goettingen.de>,
|
0
|
10 Tassilo Philipp <tphilipp@potion-studios.com>
|
|
11
|
|
12 Permission to use, copy, modify, and distribute this software for any
|
|
13 purpose with or without fee is hereby granted, provided that the above
|
|
14 copyright notice and this permission notice appear in all copies.
|
|
15
|
|
16 THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
|
17 WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
|
18 MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
|
19 ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
|
20 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
|
21 ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
|
22 OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|
23
|
|
24 */
|
|
25
|
|
26
|
|
27
|
|
28 #include "../portasm/portasm-arm.S"
|
|
29
|
|
30 /* ============================================================================
|
|
31 DynCall Call Kernel for ARM 32-bit ARM Architecture Hard-Float
|
|
32 ----------------------------------------------------------------------------
|
|
33 C Interface:
|
|
34 dcCall_arm32_armhf (DCpointer target, DCpointer argv, DCsize size, DCfloat* regdata);
|
|
35
|
|
36 This Call Kernel was tested on Raspberry Pi/Raspbian (Debian)
|
|
37 */
|
|
38
|
|
39 .text
|
|
40 .thumb
|
|
41 #ifndef __thumb2__
|
|
42 .code 16
|
|
43 #endif
|
|
44 // .arch armv6
|
|
45 // .fpu vfp
|
|
46
|
36
|
47 /*
|
|
48 1st arg / r0 = funptr
|
|
49 2st arg / r1 = ptr to int args
|
|
50 3st arg / r2 = size
|
|
51 4st arg / r3 = ptr to float args
|
|
52 */
|
0
|
53 GLOBAL_C(dcCall_arm32_armhf)
|
|
54 .thumb_func
|
|
55 ENTRY_C(dcCall_arm32_armhf)
|
|
56
|
|
57 /* Prolog. This function never needs to spill inside its prolog, so just store the permanent registers. */
|
|
58
|
|
59 // mov r12 , r13 /* Stack ptr (r13) -> temporary (r12). */
|
|
60 // stmdb r13!, {r4-r5, r11, r12, r14} /* Permanent registers and stack pointer (now in r12), etc... -> save area on stack (except counter). */
|
|
61
|
|
62 //mov r11 , r12 /* Set frame ptr. */
|
|
63
|
|
64 push {r4-r7, r14}
|
|
65 mov r7 , r13
|
|
66
|
|
67 mov r4 , r0 /* r4 = 'fptr' (1st argument is passed in r0). */
|
|
68 mov r5 , r1 /* r5 = 'args' (2nd argument is passed in r1). */
|
|
69
|
|
70 /* Load 16 single-precision registers (= 8 double-precision registers). */
|
36
|
71 fldmiad r3, {d0-d7}
|
0
|
72
|
|
73 sub r2 , #16
|
|
74 cmp r2, #0
|
|
75 ble armhf_call
|
|
76
|
|
77
|
|
78 // sub r13, r13, r2
|
|
79 // and r13, #-8 /* align 8-byte. */
|
|
80
|
|
81 mov r6, r13
|
|
82 sub r6 , r2
|
|
83 // mov r3 , #8
|
|
84 // neg r3 , r3
|
|
85 // and r6 , r3
|
|
86 lsr r6 , #3
|
|
87 lsl r6 , #3
|
|
88 mov r13, r6
|
|
89
|
|
90 mov r3, #0 /* Init byte counter. */
|
|
91 add r1 , #16
|
|
92 armhf_pushArgs:
|
|
93 ldr r0, [r1, +r3] /* Load word into r0. */
|
|
94 // str r0, [r13, +r3] /* Push word onto stack. */
|
|
95 str r0, [r6, +r3] /* Push word onto stack. */
|
|
96 add r3, #4 /* Increment byte counter. */
|
|
97 cmp r2, r3
|
|
98 bne armhf_pushArgs
|
|
99
|
|
100 armhf_call:
|
|
101 ldmia r5!, {r0-r3} /* Load first 4 arguments for new call into r0-r3. */
|
|
102 /* 'blx %r4' workaround for ARMv4t: */
|
|
103 // mov r14, r15 /* Branch return address(r15) -> link register (r14) -- r15 always points to address of current + 2 instructions (= Epilog code). */
|
|
104 mov r6, r15
|
|
105 add r6, #5
|
|
106 mov r14, r6
|
|
107 bx r4 /* Call (ARM/THUMB), available for ARMv4t. */
|
|
108
|
|
109 /* Epilog. */
|
|
110 // ldmdb r11, {r4-r5, r11, r13, r15} /* Restore permanent registers (ignore temporary (r12), restore stack ptr and program counter).@@@db not needed since we rewrite r13? */
|
|
111 mov r13, r7
|
|
112 pop {r4-r7, r15}
|
|
113
|