# HG changeset patch # User Tassilo Philipp # Date 1527068494 -7200 # Node ID b104c5beec8b7c20b6a422c91b507f40b2fdd9e6 # Parent 6869d834a847e3ab9384762bd836597a72bdf7ad - softfloat support for mips64/n64 abi diff -r 6869d834a847 -r b104c5beec8b ChangeLog --- a/ChangeLog Wed May 23 11:39:40 2018 +0200 +++ b/ChangeLog Wed May 23 11:41:34 2018 +0200 @@ -2,9 +2,9 @@ Version 1.1 (upcoming) dyncall: - o support for soft-float MIPS o32 (big- and little-endian) + o support for soft-float MIPS o32 & n64 (big- and little-endian, each) dyncallback: - o support for soft-float MIPS o32 (big- and little-endian) + o support for soft-float MIPS o32 & n64 (big- and little-endian, each) doc: o more detail in support matrix for bi-endian platforms buildsys: diff -r 6869d834a847 -r b104c5beec8b doc/manual/callconvs/callconv_mips32.tex --- a/doc/manual/callconvs/callconv_mips32.tex Wed May 23 11:39:40 2018 +0200 +++ b/doc/manual/callconvs/callconv_mips32.tex Wed May 23 11:41:34 2018 +0200 @@ -150,7 +150,7 @@ \item Stack grows down \item Stack parameter order: right-to-left \item Caller cleans up the stack -\item Caller is required to always leave a 16-byte spill area for \$a0-\$a3 at the and of {\bf its} frame, to be used and spilled to by the callee, if needed +\item Caller is required to always leave a 16-byte spill area for \$a0-\$a3 at the end of {\bf its} frame, to be used and spilled to by the callee, if needed \item The different stack areas (local data, register save area, parameter area) are each aligned to 8 bytes \item generally, first four 32bit arguments are passed in registers \$a0-\$a3, respectively (only on hard-float targets: see below for exceptions if first arg is a float) \item subsequent parameters are passed vie the stack diff -r 6869d834a847 -r b104c5beec8b doc/manual/callconvs/callconv_mips64.tex --- a/doc/manual/callconvs/callconv_mips64.tex Wed May 23 11:39:40 2018 +0200 +++ b/doc/manual/callconvs/callconv_mips64.tex Wed May 23 11:41:34 2018 +0200 @@ -29,7 +29,7 @@ \paragraph{\product{dyncall} support} -For MIPS 64-bit machines, dyncall supports the N64 calling conventions for calls and callbacks (for big- and little-endian targets). +For MIPS 64-bit machines, dyncall supports the N64 calling conventions for calls and callbacks (for all four combinations of big/little-endian, and soft/hard-float targets). The N32 calling convention might work - it used to, but hasn't been tested, recently. \subsubsection{MIPS N64 Calling Convention} @@ -40,23 +40,23 @@ \begin{tabular*}{0.95\textwidth}{lll} Name & Alias & Brief description\\ \hline -{\bf \$0} & {\bf \$zero} & Hardware zero \\ -{\bf \$1} & {\bf \$at} & Assembler temporary \\ -{\bf \$2-\$3} & {\bf \$v0-\$v1} & Integer results \\ -{\bf \$4-\$11} & {\bf \$a0-\$a7} & Integer arguments, or double precision float arguments\\ -{\bf \$12-\$15,\$24} & {\bf \$t4-\$t7,\$t8} & Integer temporaries \\ -{\bf \$25} & {\bf \$t9} & Integer temporary, hold the address of the called function for all PIC calls (by convention) \\ -{\bf \$16-\$23} & {\bf \$s0-\$s7} & Preserved \\ -{\bf \$26,\$27} & {\bf \$kt0,\$kt1} & Reserved for kernel \\ -{\bf \$28} & {\bf \$gp} & Global pointer, preserve \\ -{\bf \$29} & {\bf \$sp} & Stack pointer, preserve \\ -{\bf \$30} & {\bf \$s8} & Frame pointer, preserve \\ -{\bf \$31} & {\bf \$ra} & Return address, preserve \\ -{\bf hi, lo} & & Multiply/divide special registers \\ -{\bf \$f0,\$f2} & & Float results \\ -{\bf \$f1,\$f3,\$f4-\$f11,\$f20-\$f23} & & Float temporaries \\ -{\bf \$f12-\$f19} & & Float arguments \\ -{\bf \$f24-\$f31} & & Preserved \\%@@@on N32, this changes +{\bf \$0} & {\bf \$zero} & hardware zero \\ +{\bf \$1} & {\bf \$at} & assembler temporary \\ +{\bf \$2-\$3} & {\bf \$v0-\$v1} & return value (only integer on hard-float targets) \\ +{\bf \$4-\$11} & {\bf \$a0-\$a7} & first arguments (only integer on hard-float targets) \\ +{\bf \$12-\$15,\$24} & {\bf \$t4-\$t7,\$t8} & temporaries, scratch \\ +{\bf \$25} & {\bf \$t9} & temporary, holds the address of the called function for all PIC calls (by convention) \\ +{\bf \$16-\$23} & {\bf \$s0-\$s7} & preserved \\ +{\bf \$26,\$27} & {\bf \$kt0,\$kt1} & reserved for kernel \\ +{\bf \$28} & {\bf \$gp} & global pointer, preserve \\ +{\bf \$29} & {\bf \$sp} & stack pointer, preserve \\ +{\bf \$30} & {\bf \$s8} & frame pointer, preserve \\ +{\bf \$31} & {\bf \$ra} & return address, preserve \\ +{\bf hi, lo} & & multiply/divide special registers \\ +{\bf \$f0,\$f2} & & only on hard-float targets: float results \\ +{\bf \$f1,\$f3,\$f4-\$f11,\$f20-\$f23} & & only on hard-float targets: float temporaries \\ +{\bf \$f12-\$f19} & & only on hard-float targets: float arguments \\ +{\bf \$f24-\$f31} & & only on hard-float targets: preserved \\%@@@on N32, this changes \end{tabular*} \caption{Register usage on MIPS N64 calling convention} \end{table} @@ -67,12 +67,14 @@ \item Stack grows down \item Stack parameter order: right-to-left \item Caller cleans up the stack -\item first 8 params \textgreater=\ 64-bit are passed in registers \$a0-\$a7 for integers and \$f12-\$f19 for floats - with mixed float and int parameters, -some registers are left out (e.g. first parameter ends up in \$a0 or \$f12, second in \$a1 or \$f13, etc.) +\item generally, first 8 params \textgreater=\ 64-bit are passed via registers +\item for hard-float targets: register arguments are passed via \$a0-\$a7 for integers and \$f12-\$f19 for floats - with mixed float and int parameters, some registers are left out (e.g. first parameter ends up in \$a0 or \$f12, second in \$a1 or \$f13, etc.) +\item for soft-float targets: register arguments are passed via \$a0-\$a7 \item subsequent arguments are pushed onto the stack \item all stack entries are 64-bit aligned \item all stack regions are 16-byte aligned \item results are returned in \$v0, and for a second one \$v1 is used +\item only on hard-float targets: floating point results are returned in \$f0 \item float arguments passed in the variable part of a vararg call are passed like integers \item quad precision float arguments are passed in even-odd register pairs, skipping one register if needed \item integer parameters \textless\ 64 bit are right-justified (meaning occupy higher-address bytes) in their 8-byte slot on the stack, requiring extra-care for big-endian targets @@ -82,7 +84,7 @@ \paragraph{Stack layout} Stack directly after function prolog:\\ -@@@ WIP, might be wrong +@@@ might be wrong \begin{figure}[h] \begin{tabular}{5|3|1 1} diff -r 6869d834a847 -r b104c5beec8b dyncall/dyncall_call_mips_n64.S --- a/dyncall/dyncall_call_mips_n64.S Wed May 23 11:39:40 2018 +0200 +++ b/dyncall/dyncall_call_mips_n64.S Wed May 23 11:41:34 2018 +0200 @@ -86,11 +86,9 @@ /* locals: */ /* $13 = register data */ - /* $14 = useDouble flags */ move $13, $5 - ld $14, 128($13) - /* load integer parameter registers */ + /* load parameter registers (only ints on hardfloat) */ ld $4 , 0($13) ld $5 , 8($13) @@ -101,6 +99,11 @@ ld $10,48($13) ld $11,56($13) +#if defined(DC__ABI_HARDFLOAT) + + /* $14 = useDouble flags */ + ld $14, 128($13) + /* load float-or-double floating pointer parameter registers a 64-bit bitmask given at byte offset 128 of regdata indicates if loading a float (bit cleared) or double (bit set), starting @@ -172,6 +175,8 @@ .d7: l.d $f19,120($13) +#endif /* DC__ABI_HARDFLOAT) */ + .fregend: /* jump-and-link to register $25 */ diff -r 6869d834a847 -r b104c5beec8b dyncall/dyncall_call_mips_n64.h --- a/dyncall/dyncall_call_mips_n64.h Wed May 23 11:39:40 2018 +0200 +++ b/dyncall/dyncall_call_mips_n64.h Wed May 23 11:41:34 2018 +0200 @@ -53,8 +53,10 @@ struct DCRegData_mips_n64 { DClonglong mIntData[8]; +#if defined(DC__ABI_HARDFLOAT) union { DCfloat f; DCdouble d; } mFloatData[8]; DClonglong mUseDouble; /* bitmask: lower 8 bits specify to use float or double from union array. */ +#endif /* DC__ABI_HARDFLOAT */ }; void dcCall_mips_n64(DCpointer target, struct DCRegData_mips_n64* regdata, DCsize stksize, DCpointer stkdata); diff -r 6869d834a847 -r b104c5beec8b dyncall/dyncall_callvm_mips_n64.c --- a/dyncall/dyncall_callvm_mips_n64.c Wed May 23 11:39:40 2018 +0200 +++ b/dyncall/dyncall_callvm_mips_n64.c Wed May 23 11:41:34 2018 +0200 @@ -58,7 +58,9 @@ DCCallVM_mips_n64* self = (DCCallVM_mips_n64*)in_self; dcVecReset(&self->mVecHead); self->mRegCount = 0; +#if defined(DC__ABI_HARDFLOAT) self->mRegData.mUseDouble = 0LL; +#endif /* DC__ABI_HARDFLOAT */ } @@ -75,24 +77,24 @@ /* arg int -- fillup 64-bit integer register file OR push on stack */ -static void dc_callvm_argLongLong_mips_n64(DCCallVM* in_self, DClonglong Lv) +static void dc_callvm_argLongLong_mips_n64(DCCallVM* in_self, DClonglong x) { DCCallVM_mips_n64* self = (DCCallVM_mips_n64*)in_self; /* fillup integer register file */ if (self->mRegCount < 8) - self->mRegData.mIntData[self->mRegCount++] = Lv; + self->mRegData.mIntData[self->mRegCount++] = x; else - dcVecAppend(&self->mVecHead, &Lv, sizeof(DClonglong)); + dcVecAppend(&self->mVecHead, &x, sizeof(DClonglong)); } -static void dc_callvm_argInt_mips_n64(DCCallVM* in_self, DCint i) +static void dc_callvm_argInt_mips_n64(DCCallVM* in_self, DCint x) { - dc_callvm_argLongLong_mips_n64(in_self, (DClonglong) i ); + dc_callvm_argLongLong_mips_n64(in_self, (DClonglong)x); } static void dc_callvm_argPointer_mips_n64(DCCallVM* in_self, DCpointer x) { - dc_callvm_argLongLong_mips_n64(in_self, * (DClonglong*) &x ); + dc_callvm_argLongLong_mips_n64(in_self, *(DClonglong*)&x); } static void dc_callvm_argBool_mips_n64(DCCallVM* in_self, DCbool x) @@ -117,25 +119,41 @@ static void dc_callvm_argDouble_mips_n64(DCCallVM* in_self, DCdouble x) { +#if defined(DC__ABI_HARDFLOAT) DCCallVM_mips_n64* self = (DCCallVM_mips_n64*)in_self; if (self->mRegCount < 8) { self->mRegData.mUseDouble |= 1<<( self->mRegCount ); self->mRegData.mFloatData[self->mRegCount++].d = x; } else { - dcVecAppend(&self->mVecHead, &x, sizeof(DCdouble) ); + dcVecAppend(&self->mVecHead, &x, sizeof(DCdouble)); } +#else + dc_callvm_argLongLong_mips_n64(in_self, *(DClonglong*)&x); +#endif /* DC__ABI_HARDFLOAT */ } static void dc_callvm_argFloat_mips_n64(DCCallVM* in_self, DCfloat x) { +#if defined(DC__ABI_HARDFLOAT) DCCallVM_mips_n64* self = (DCCallVM_mips_n64*)in_self; if (self->mRegCount < 8) { /*self->mRegData.mFloatData[self->mRegCount++].d = (DCdouble) x;*/ self->mRegData.mFloatData[self->mRegCount++].f = x; } else { - dcVecAppend(&self->mVecHead, &x, sizeof(DCfloat) ); - dcVecSkip(&self->mVecHead, sizeof(DCfloat) ); + dcVecAppend(&self->mVecHead, &x, sizeof(DCfloat)); + dcVecSkip(&self->mVecHead, sizeof(DCfloat)); } +#else + DCfloat f[] = {x,0.f}; +# if defined(DC__Endian_BIG) + // floats in regs always right justified + if (((DCCallVM_mips_n64*)in_self)->mRegCount < 8) { + f[1] = f[0]; + f[0] = 0.f; + } +# endif /* DC__Endian_BIG */ + dc_callvm_argLongLong_mips_n64(in_self, *(DClonglong*)&f); +#endif /* DC__ABI_HARDFLOAT */ } diff -r 6869d834a847 -r b104c5beec8b dyncall/dyncall_callvm_mips_o32.c --- a/dyncall/dyncall_callvm_mips_o32.c Wed May 23 11:39:40 2018 +0200 +++ b/dyncall/dyncall_callvm_mips_o32.c Wed May 23 11:41:34 2018 +0200 @@ -118,10 +118,11 @@ #if defined(DC__ABI_HARDFLOAT) if (self->mArgCount < 2) { + /* @@@ unsure if we should zero init, here; seems to work as-is */ # if defined(DC__Endian_LITTLE) self->mRegData.u[self->mArgCount].f[0] = x; # else - self->mRegData.u[self->mArgCount].f[1] = x; + self->mRegData.u[self->mArgCount].f[1] = x; // floats in regs always right justified # endif # if 0 self->mRegData.u[self->mArgCount].f[1] = x; diff -r 6869d834a847 -r b104c5beec8b dyncallback/dyncall_args_mips.h --- a/dyncallback/dyncall_args_mips.h Wed May 23 11:39:40 2018 +0200 +++ b/dyncallback/dyncall_args_mips.h Wed May 23 11:41:34 2018 +0200 @@ -33,19 +33,37 @@ { /* Don't change order or types, laid out for asm code to fill in! */ #if defined(DC__Arch_MIPS) && defined(DC__ABI_MIPS_O32) + DCint freg_count; /* unused on soft-float targets, but keep as 4b-padding */ + #else + +# if defined(DC__Arch_MIPS) + # define DCARGS_MIPS_NUM_IREGS 8 # define DCARGS_MIPS_NUM_FREGS 8 -# if defined(DC__Arch_MIPS) DCint ireg_data[DCARGS_MIPS_NUM_IREGS]; DCfloat freg_data[DCARGS_MIPS_NUM_FREGS]; struct { DCshort i; DCshort f; } reg_count; -# elif defined(DC__Arch_MIPS64) - DClonglong ireg_data[DCARGS_MIPS_NUM_IREGS]; - DCdouble freg_data[DCARGS_MIPS_NUM_FREGS]; + +# elif defined(DC__Arch_MIPS64) + + /* single counter for both, int & float: mips64 uses 8 max, total, either */ + /* skipping over other/type's reg, or only using int regs on soft-float */ +# define DCARGS_MIPS_NUM_REGS 8 +# if defined(DC__ABI_SOFTFLOAT) + union +# else + struct +# endif + { + DClonglong ireg_data[DCARGS_MIPS_NUM_REGS]; + DCdouble freg_data[DCARGS_MIPS_NUM_REGS]; + }; DClonglong reg_count; -# endif + +# endif + #endif DCuchar* stackptr; }; diff -r 6869d834a847 -r b104c5beec8b dyncallback/dyncall_args_mips64.c --- a/dyncallback/dyncall_args_mips64.c Wed May 23 11:39:40 2018 +0200 +++ b/dyncallback/dyncall_args_mips64.c Wed May 23 11:41:34 2018 +0200 @@ -30,7 +30,7 @@ DClonglong dcbArgLongLong(DCArgs* p) { DClonglong value; - if(p->reg_count < DCARGS_MIPS_NUM_IREGS) + if(p->reg_count < DCARGS_MIPS_NUM_REGS) value = p->ireg_data[p->reg_count++]; else { value = *((DClonglong*)p->stackptr); @@ -54,7 +54,7 @@ DCdouble dcbArgDouble(DCArgs* p) { DCdouble result; - if(p->reg_count < DCARGS_MIPS_NUM_FREGS) + if(p->reg_count < DCARGS_MIPS_NUM_REGS) result = p->freg_data[p->reg_count++]; else { result = *((DCdouble*)p->stackptr); @@ -65,7 +65,7 @@ DCfloat dcbArgFloat(DCArgs* p) { DCfloat result; - if(p->reg_count < DCARGS_MIPS_NUM_FREGS) { + if(p->reg_count < DCARGS_MIPS_NUM_REGS) { result = ((DCfloat*)&p->freg_data[p->reg_count++]) #if defined(DC__Endian_LITTLE) [0]; diff -r 6869d834a847 -r b104c5beec8b dyncallback/dyncall_callback_mips_n64.S --- a/dyncallback/dyncall_callback_mips_n64.S Wed May 23 11:39:40 2018 +0200 +++ b/dyncallback/dyncall_callback_mips_n64.S Wed May 23 11:41:34 2018 +0200 @@ -6,7 +6,7 @@ Description: Callback Thunk - Implementation for mips64 n64 License: - Copyright (c) 2016 Tassilo Philipp + Copyright (c) 2016-2018 Tassilo Philipp Permission to use, copy, modify, and distribute this software for any purpose with or without fee is hereby granted, provided that the above @@ -28,6 +28,18 @@ $t8+64 -> userdata */ +/* Frame size is 160b for hard- and 128b for soft-float, as follows: */ +/* DCargs(fregs:64? + iregs:64 + regcnts:8 + sp:8) + rval:8 + ra:8 */ +#if defined(DC__ABI_HARDFLOAT) +SP_SP = 160 +#else +SP_SP = 96 +#endif +SP_LR = SP_SP-8 +SP_RVAL = SP_SP-16 +SP_ARG_SP = SP_SP-24 +SP_ARG_RC = SP_SP-32 + .section .mdebug.abi64 .previous .abicalls @@ -39,17 +51,15 @@ .set noreorder /* Prolog. */ - /* Frame size of 160b comes from following: */ - /* DCargs(fregs:64 + iregs:64 + regcounts:8 + stackptr:8) + retval:8 + ra:8 */ - daddiu $sp, $sp, -160 /* open frame */ - sd $ra, 152($sp) /* save link register */ + daddiu $sp, $sp, -SP_SP /* open frame */ + sd $ra, SP_LR($sp) /* save link register */ - .frame $fp,160,$31 /* specify our frame: fp,size,lr; creates virt $fp */ + .frame $fp,SP_SP,$31 /* specify our frame: fp,size,lr; creates virt $fp */ /* code below doesn't use $fp though, as n/a with -O1 */ /* Init return value */ - sd $zero, 144($sp) + sd $zero, SP_RVAL($sp) - /* Store float and int args where our DCargs member arrays are, in local area. */ + /* Store reg args where our DCargs member arrays are, in local stack area */ sd $4, 0($sp) sd $5, 8($sp) sd $6, 16($sp) @@ -58,6 +68,7 @@ sd $9, 40($sp) sd $10, 48($sp) sd $11, 56($sp) +#if defined(DC__ABI_HARDFLOAT) s.d $f12, 64($sp) s.d $f13, 72($sp) s.d $f14, 80($sp) @@ -66,31 +77,34 @@ s.d $f17, 104($sp) s.d $f18, 112($sp) s.d $f19, 120($sp) +#endif /* Init DCarg's reg_counts and stackptr. */ - sd $zero, 128($sp) /* reg_count */ - daddiu $4, $sp, 160 - sd $4, 136($sp) /* stackptr */ + sd $zero, SP_ARG_RC($sp) /* reg_count */ + daddiu $4, $sp, SP_SP + sd $4, SP_ARG_SP($sp) /* stackptr */ /* Prepare callback handler call. */ - move $4, $24 /* Param 0 = DCCallback*, $24 ($t8) holds pointer to thunk */ - move $5, $sp /* Param 1 = DCArgs*, pointer to where pointer to args is stored */ - daddiu $6, $sp, 144 /* Param 2 = results pointer to 8b of local data on stack */ - ld $7, 64($24) /* Param 3 = userdata pointer */ + move $4, $24 /* Param 0 = DCCallback*, $24/$t8 holds DCThunk* */ + move $5, $sp /* Param 1 = ptr to where DCArgs* is stored */ + daddiu $6, $sp, SP_RVAL /* Param 2 = results ptr to 8b of local stack data */ + ld $7, 64($24) /* Param 3 = userdata pointer */ - ld $25, 56($24) /* store handler entry in $25 ($t9), required for PIC */ - jalr $25 /* jump */ - nop /* branch delay nop */ + ld $25, 56($24) /* store handler entry in $25/$t9, required for PIC */ + jalr $25 /* jump */ + nop /* branch delay nop */ /* Copy result in corresponding registers $2-$3 ($v0-$v1) and $f0 */ - ld $2, 144($sp) /* note: ignoring second possible retval in $3, here */ - l.d $f0, 144($sp) + ld $2, SP_RVAL($sp) /* note: ignoring 2nd possible retval in $3, here */ +#if defined(DC__ABI_HARDFLOAT) + l.d $f0, SP_RVAL($sp) +#endif /* Epilog. Tear down frame and return. */ - ld $ra, 152($sp) /* restore return address */ - daddiu $sp, $sp, 160 /* close frame */ - j $ra /* return */ - nop /* branch delay nop */ + ld $ra, SP_LR($sp) /* restore return address */ + daddiu $sp, $sp, SP_SP /* close frame */ + j $ra /* return */ + nop /* branch delay nop */ .set reorder .end dcCallbackThunkEntry