# HG changeset patch
# User Tassilo Philipp
# Date 1527068494 -7200
# Node ID b104c5beec8b7c20b6a422c91b507f40b2fdd9e6
# Parent  6869d834a847e3ab9384762bd836597a72bdf7ad
- softfloat support for mips64/n64 abi

diff -r 6869d834a847 -r b104c5beec8b ChangeLog
--- a/ChangeLog	Wed May 23 11:39:40 2018 +0200
+++ b/ChangeLog	Wed May 23 11:41:34 2018 +0200
@@ -2,9 +2,9 @@
 
 Version 1.1 (upcoming)
 dyncall:
-  o support for soft-float MIPS o32 (big- and little-endian)
+  o support for soft-float MIPS o32 & n64 (big- and little-endian, each)
 dyncallback:
-  o support for soft-float MIPS o32 (big- and little-endian)
+  o support for soft-float MIPS o32 & n64 (big- and little-endian, each)
 doc:
   o more detail in support matrix for bi-endian platforms
 buildsys:
diff -r 6869d834a847 -r b104c5beec8b doc/manual/callconvs/callconv_mips32.tex
--- a/doc/manual/callconvs/callconv_mips32.tex	Wed May 23 11:39:40 2018 +0200
+++ b/doc/manual/callconvs/callconv_mips32.tex	Wed May 23 11:41:34 2018 +0200
@@ -150,7 +150,7 @@
 \item Stack grows down
 \item Stack parameter order: right-to-left
 \item Caller cleans up the stack
-\item Caller is required to always leave a 16-byte spill area for \$a0-\$a3 at the and of {\bf its} frame, to be used and spilled to by the callee, if needed
+\item Caller is required to always leave a 16-byte spill area for \$a0-\$a3 at the end of {\bf its} frame, to be used and spilled to by the callee, if needed
 \item The different stack areas (local data, register save area, parameter area) are each aligned to 8 bytes
 \item generally, first four 32bit arguments are passed in registers \$a0-\$a3, respectively (only on hard-float targets: see below for exceptions if first arg is a float)
 \item subsequent parameters are passed vie the stack
diff -r 6869d834a847 -r b104c5beec8b doc/manual/callconvs/callconv_mips64.tex
--- a/doc/manual/callconvs/callconv_mips64.tex	Wed May 23 11:39:40 2018 +0200
+++ b/doc/manual/callconvs/callconv_mips64.tex	Wed May 23 11:41:34 2018 +0200
@@ -29,7 +29,7 @@
 
 \paragraph{\product{dyncall} support}
 
-For MIPS 64-bit machines, dyncall supports the N64 calling conventions for calls and callbacks (for big- and little-endian targets).
+For MIPS 64-bit machines, dyncall supports the N64 calling conventions for calls and callbacks (for all four combinations of big/little-endian, and soft/hard-float targets).
 The N32 calling convention might work - it used to, but hasn't been tested, recently.
 
 \subsubsection{MIPS N64 Calling Convention}
@@ -40,23 +40,23 @@
 \begin{tabular*}{0.95\textwidth}{lll}
 Name                                   & Alias                & Brief description\\
 \hline
-{\bf \$0}                              & {\bf \$zero}         & Hardware zero \\
-{\bf \$1}                              & {\bf \$at}           & Assembler temporary \\
-{\bf \$2-\$3}                          & {\bf \$v0-\$v1}      & Integer results \\
-{\bf \$4-\$11}                         & {\bf \$a0-\$a7}      & Integer arguments, or double precision float arguments\\
-{\bf \$12-\$15,\$24}                   & {\bf \$t4-\$t7,\$t8} & Integer temporaries \\
-{\bf \$25}                             & {\bf \$t9}           & Integer temporary, hold the address of the called function for all PIC calls (by convention) \\
-{\bf \$16-\$23}                        & {\bf \$s0-\$s7}      & Preserved \\
-{\bf \$26,\$27}                        & {\bf \$kt0,\$kt1}    & Reserved for kernel \\
-{\bf \$28}                             & {\bf \$gp}           & Global pointer, preserve \\
-{\bf \$29}                             & {\bf \$sp}           & Stack pointer, preserve \\
-{\bf \$30}                             & {\bf \$s8}           & Frame pointer, preserve \\
-{\bf \$31}                             & {\bf \$ra}           & Return address, preserve \\
-{\bf hi, lo}                           &                      & Multiply/divide special registers \\
-{\bf \$f0,\$f2}                        &                      & Float results \\
-{\bf \$f1,\$f3,\$f4-\$f11,\$f20-\$f23} &                      & Float temporaries \\
-{\bf \$f12-\$f19}                      &                      & Float arguments \\
-{\bf \$f24-\$f31}                      &                      & Preserved \\%@@@on N32, this changes
+{\bf \$0}                              & {\bf \$zero}         & hardware zero \\
+{\bf \$1}                              & {\bf \$at}           & assembler temporary \\
+{\bf \$2-\$3}                          & {\bf \$v0-\$v1}      & return value (only integer on hard-float targets) \\
+{\bf \$4-\$11}                         & {\bf \$a0-\$a7}      & first arguments (only integer on hard-float targets) \\
+{\bf \$12-\$15,\$24}                   & {\bf \$t4-\$t7,\$t8} & temporaries, scratch \\
+{\bf \$25}                             & {\bf \$t9}           & temporary, holds the address of the called function for all PIC calls (by convention) \\
+{\bf \$16-\$23}                        & {\bf \$s0-\$s7}      & preserved \\
+{\bf \$26,\$27}                        & {\bf \$kt0,\$kt1}    & reserved for kernel \\
+{\bf \$28}                             & {\bf \$gp}           & global pointer, preserve \\
+{\bf \$29}                             & {\bf \$sp}           & stack pointer, preserve \\
+{\bf \$30}                             & {\bf \$s8}           & frame pointer, preserve \\
+{\bf \$31}                             & {\bf \$ra}           & return address, preserve \\
+{\bf hi, lo}                           &                      & multiply/divide special registers \\
+{\bf \$f0,\$f2}                        &                      & only on hard-float targets: float results \\
+{\bf \$f1,\$f3,\$f4-\$f11,\$f20-\$f23} &                      & only on hard-float targets: float temporaries \\
+{\bf \$f12-\$f19}                      &                      & only on hard-float targets: float arguments \\
+{\bf \$f24-\$f31}                      &                      & only on hard-float targets: preserved \\%@@@on N32, this changes
 \end{tabular*}
 \caption{Register usage on MIPS N64 calling convention}
 \end{table}
@@ -67,12 +67,14 @@
 \item Stack grows down
 \item Stack parameter order: right-to-left
 \item Caller cleans up the stack
-\item first 8 params \textgreater=\ 64-bit are passed in registers \$a0-\$a7 for integers and \$f12-\$f19 for floats - with mixed float and int parameters,
-some registers are left out (e.g. first parameter ends up in \$a0 or \$f12, second in \$a1 or \$f13, etc.)
+\item generally, first 8 params \textgreater=\ 64-bit are passed via registers
+\item for hard-float targets: register arguments are passed via \$a0-\$a7 for integers and \$f12-\$f19 for floats - with mixed float and int parameters, some registers are left out (e.g. first parameter ends up in \$a0 or \$f12, second in \$a1 or \$f13, etc.)
+\item for soft-float targets: register arguments are passed via \$a0-\$a7
 \item subsequent arguments are pushed onto the stack
 \item all stack entries are 64-bit aligned
 \item all stack regions are 16-byte aligned
 \item results are returned in \$v0, and for a second one \$v1 is used
+\item only on hard-float targets: floating point results are returned in \$f0
 \item float arguments passed in the variable part of a vararg call are passed like integers
 \item quad precision float arguments are passed in even-odd register pairs, skipping one register if needed
 \item integer parameters \textless\ 64 bit are right-justified (meaning occupy higher-address bytes) in their 8-byte slot on the stack, requiring extra-care for big-endian targets
@@ -82,7 +84,7 @@
 \paragraph{Stack layout}
 
 Stack directly after function prolog:\\
-@@@ WIP, might be wrong
+@@@ might be wrong
 
 \begin{figure}[h]
 \begin{tabular}{5|3|1 1}
diff -r 6869d834a847 -r b104c5beec8b dyncall/dyncall_call_mips_n64.S
--- a/dyncall/dyncall_call_mips_n64.S	Wed May 23 11:39:40 2018 +0200
+++ b/dyncall/dyncall_call_mips_n64.S	Wed May 23 11:41:34 2018 +0200
@@ -86,11 +86,9 @@
 
 	/* locals: */
 	/* $13 = register data */
-	/* $14 = useDouble flags */
 	move    $13, $5
-	ld      $14, 128($13)
 	
-	/* load integer parameter registers */
+	/* load parameter registers (only ints on hardfloat) */
 
 	ld      $4 , 0($13)
 	ld      $5 , 8($13)
@@ -101,6 +99,11 @@
 	ld      $10,48($13)
 	ld      $11,56($13)
 
+#if defined(DC__ABI_HARDFLOAT)
+
+	/* $14 = useDouble flags */
+	ld      $14, 128($13)
+
 	/* load float-or-double floating pointer parameter registers 
 	   a 64-bit bitmask given at byte offset 128 of regdata indicates
 	   if loading a float (bit cleared) or double (bit set), starting
@@ -172,6 +175,8 @@
 .d7:
 	l.d     $f19,120($13)
 
+#endif /* DC__ABI_HARDFLOAT) */
+
 .fregend:
 
 	/* jump-and-link to register $25 */
diff -r 6869d834a847 -r b104c5beec8b dyncall/dyncall_call_mips_n64.h
--- a/dyncall/dyncall_call_mips_n64.h	Wed May 23 11:39:40 2018 +0200
+++ b/dyncall/dyncall_call_mips_n64.h	Wed May 23 11:41:34 2018 +0200
@@ -53,8 +53,10 @@
 struct DCRegData_mips_n64
 {
   DClonglong                       mIntData[8];
+#if defined(DC__ABI_HARDFLOAT)
   union { DCfloat f; DCdouble d; } mFloatData[8];
   DClonglong                       mUseDouble; /* bitmask: lower 8 bits specify to use float or double from union array. */
+#endif /* DC__ABI_HARDFLOAT */
 };
 
 void dcCall_mips_n64(DCpointer target, struct DCRegData_mips_n64* regdata, DCsize stksize, DCpointer stkdata);
diff -r 6869d834a847 -r b104c5beec8b dyncall/dyncall_callvm_mips_n64.c
--- a/dyncall/dyncall_callvm_mips_n64.c	Wed May 23 11:39:40 2018 +0200
+++ b/dyncall/dyncall_callvm_mips_n64.c	Wed May 23 11:41:34 2018 +0200
@@ -58,7 +58,9 @@
   DCCallVM_mips_n64* self = (DCCallVM_mips_n64*)in_self;
   dcVecReset(&self->mVecHead);
   self->mRegCount = 0;
+#if defined(DC__ABI_HARDFLOAT)
   self->mRegData.mUseDouble = 0LL;
+#endif /* DC__ABI_HARDFLOAT */
 }
 
 
@@ -75,24 +77,24 @@
 
 /* arg int -- fillup 64-bit integer register file OR push on stack */
 
-static void dc_callvm_argLongLong_mips_n64(DCCallVM* in_self, DClonglong Lv)
+static void dc_callvm_argLongLong_mips_n64(DCCallVM* in_self, DClonglong x)
 {
   DCCallVM_mips_n64* self = (DCCallVM_mips_n64*)in_self;
   /* fillup integer register file */
   if (self->mRegCount < 8)
-    self->mRegData.mIntData[self->mRegCount++] = Lv;
+    self->mRegData.mIntData[self->mRegCount++] = x;
   else
-    dcVecAppend(&self->mVecHead, &Lv, sizeof(DClonglong));
+    dcVecAppend(&self->mVecHead, &x, sizeof(DClonglong));
 }
 
-static void dc_callvm_argInt_mips_n64(DCCallVM* in_self, DCint i)
+static void dc_callvm_argInt_mips_n64(DCCallVM* in_self, DCint x)
 {
-  dc_callvm_argLongLong_mips_n64(in_self, (DClonglong) i );
+  dc_callvm_argLongLong_mips_n64(in_self, (DClonglong)x);
 }
 
 static void dc_callvm_argPointer_mips_n64(DCCallVM* in_self, DCpointer x)
 {
-  dc_callvm_argLongLong_mips_n64(in_self, * (DClonglong*) &x );
+  dc_callvm_argLongLong_mips_n64(in_self, *(DClonglong*)&x);
 }
 
 static void dc_callvm_argBool_mips_n64(DCCallVM* in_self, DCbool x)
@@ -117,25 +119,41 @@
 
 static void dc_callvm_argDouble_mips_n64(DCCallVM* in_self, DCdouble x)
 {
+#if defined(DC__ABI_HARDFLOAT)
   DCCallVM_mips_n64* self = (DCCallVM_mips_n64*)in_self;
   if (self->mRegCount < 8) {
     self->mRegData.mUseDouble |= 1<<( self->mRegCount );
     self->mRegData.mFloatData[self->mRegCount++].d = x;
   } else {
-    dcVecAppend(&self->mVecHead, &x, sizeof(DCdouble) );
+    dcVecAppend(&self->mVecHead, &x, sizeof(DCdouble));
   }
+#else
+  dc_callvm_argLongLong_mips_n64(in_self, *(DClonglong*)&x);
+#endif /* DC__ABI_HARDFLOAT */
 }
 
 static void dc_callvm_argFloat_mips_n64(DCCallVM* in_self, DCfloat x)
 {
+#if defined(DC__ABI_HARDFLOAT)
   DCCallVM_mips_n64* self = (DCCallVM_mips_n64*)in_self;
   if (self->mRegCount < 8) {
     /*self->mRegData.mFloatData[self->mRegCount++].d = (DCdouble) x;*/
     self->mRegData.mFloatData[self->mRegCount++].f = x;
   } else {
-    dcVecAppend(&self->mVecHead, &x, sizeof(DCfloat) );
-    dcVecSkip(&self->mVecHead, sizeof(DCfloat) );
+    dcVecAppend(&self->mVecHead, &x, sizeof(DCfloat));
+    dcVecSkip(&self->mVecHead, sizeof(DCfloat));
   }
+#else
+  DCfloat f[] = {x,0.f};
+# if defined(DC__Endian_BIG)
+  // floats in regs always right justified
+  if (((DCCallVM_mips_n64*)in_self)->mRegCount < 8) {
+    f[1] = f[0];
+    f[0] = 0.f;
+  }
+# endif /* DC__Endian_BIG */
+  dc_callvm_argLongLong_mips_n64(in_self, *(DClonglong*)&f);
+#endif /* DC__ABI_HARDFLOAT */
 }
 
 
diff -r 6869d834a847 -r b104c5beec8b dyncall/dyncall_callvm_mips_o32.c
--- a/dyncall/dyncall_callvm_mips_o32.c	Wed May 23 11:39:40 2018 +0200
+++ b/dyncall/dyncall_callvm_mips_o32.c	Wed May 23 11:41:34 2018 +0200
@@ -118,10 +118,11 @@
 
 #if defined(DC__ABI_HARDFLOAT)
   if (self->mArgCount < 2) {
+    /* @@@ unsure if we should zero init, here; seems to work as-is */
 # if defined(DC__Endian_LITTLE)
     self->mRegData.u[self->mArgCount].f[0] = x;
 # else
-    self->mRegData.u[self->mArgCount].f[1] = x;
+    self->mRegData.u[self->mArgCount].f[1] = x; // floats in regs always right justified
 # endif
 # if 0
     self->mRegData.u[self->mArgCount].f[1] = x;
diff -r 6869d834a847 -r b104c5beec8b dyncallback/dyncall_args_mips.h
--- a/dyncallback/dyncall_args_mips.h	Wed May 23 11:39:40 2018 +0200
+++ b/dyncallback/dyncall_args_mips.h	Wed May 23 11:41:34 2018 +0200
@@ -33,19 +33,37 @@
 {
 	/* Don't change order or types, laid out for asm code to fill in! */
 #if defined(DC__Arch_MIPS) && defined(DC__ABI_MIPS_O32)
+
 	DCint freg_count; /* unused on soft-float targets, but keep as 4b-padding */
+
 #else
+
+# if defined(DC__Arch_MIPS)
+
 #  define DCARGS_MIPS_NUM_IREGS 8
 #  define DCARGS_MIPS_NUM_FREGS 8
-#  if defined(DC__Arch_MIPS)
 	DCint   ireg_data[DCARGS_MIPS_NUM_IREGS];
 	DCfloat freg_data[DCARGS_MIPS_NUM_FREGS];
 	struct { DCshort i; DCshort f; } reg_count;
-#  elif defined(DC__Arch_MIPS64)
-	DClonglong ireg_data[DCARGS_MIPS_NUM_IREGS];
-	DCdouble   freg_data[DCARGS_MIPS_NUM_FREGS];
+
+# elif defined(DC__Arch_MIPS64)
+
+   /* single counter for both, int & float: mips64 uses 8 max, total, either */
+   /* skipping over other/type's reg, or only using int regs on soft-float   */
+#  define DCARGS_MIPS_NUM_REGS 8
+#  if defined(DC__ABI_SOFTFLOAT)
+	union
+#  else
+	struct
+#  endif
+	{
+		DClonglong ireg_data[DCARGS_MIPS_NUM_REGS];
+		DCdouble   freg_data[DCARGS_MIPS_NUM_REGS];
+	};
 	DClonglong reg_count;
-#  endif
+
+# endif
+
 #endif
 	DCuchar* stackptr;
 };
diff -r 6869d834a847 -r b104c5beec8b dyncallback/dyncall_args_mips64.c
--- a/dyncallback/dyncall_args_mips64.c	Wed May 23 11:39:40 2018 +0200
+++ b/dyncallback/dyncall_args_mips64.c	Wed May 23 11:41:34 2018 +0200
@@ -30,7 +30,7 @@
 DClonglong dcbArgLongLong(DCArgs* p)
 {
   DClonglong value;
-  if(p->reg_count < DCARGS_MIPS_NUM_IREGS)
+  if(p->reg_count < DCARGS_MIPS_NUM_REGS)
     value = p->ireg_data[p->reg_count++];
   else {
     value = *((DClonglong*)p->stackptr);
@@ -54,7 +54,7 @@
 DCdouble dcbArgDouble(DCArgs* p)
 {
   DCdouble result;
-  if(p->reg_count < DCARGS_MIPS_NUM_FREGS)
+  if(p->reg_count < DCARGS_MIPS_NUM_REGS)
     result = p->freg_data[p->reg_count++];
   else {
     result = *((DCdouble*)p->stackptr);
@@ -65,7 +65,7 @@
 DCfloat dcbArgFloat(DCArgs* p)
 {
   DCfloat result;
-  if(p->reg_count < DCARGS_MIPS_NUM_FREGS) {
+  if(p->reg_count < DCARGS_MIPS_NUM_REGS) {
     result = ((DCfloat*)&p->freg_data[p->reg_count++])
 #if defined(DC__Endian_LITTLE)
       [0];
diff -r 6869d834a847 -r b104c5beec8b dyncallback/dyncall_callback_mips_n64.S
--- a/dyncallback/dyncall_callback_mips_n64.S	Wed May 23 11:39:40 2018 +0200
+++ b/dyncallback/dyncall_callback_mips_n64.S	Wed May 23 11:41:34 2018 +0200
@@ -6,7 +6,7 @@
  Description: Callback Thunk - Implementation for mips64 n64
  License:
 
-   Copyright (c) 2016 Tassilo Philipp <tphilipp@potion-studios.com>
+   Copyright (c) 2016-2018 Tassilo Philipp <tphilipp@potion-studios.com>
 
    Permission to use, copy, modify, and distribute this software for any
    purpose with or without fee is hereby granted, provided that the above
@@ -28,6 +28,18 @@
 		$t8+64 -> userdata
 	*/
 
+/* Frame size is 160b for hard- and 128b for soft-float, as follows: */
+/*   DCargs(fregs:64? + iregs:64 + regcnts:8 + sp:8) + rval:8 + ra:8 */
+#if defined(DC__ABI_HARDFLOAT)
+SP_SP = 160
+#else
+SP_SP = 96
+#endif
+SP_LR     = SP_SP-8
+SP_RVAL   = SP_SP-16
+SP_ARG_SP = SP_SP-24
+SP_ARG_RC = SP_SP-32
+
 	.section .mdebug.abi64
 	.previous
 	.abicalls
@@ -39,17 +51,15 @@
 	.set    noreorder
 
 	/* Prolog. */
-	/* Frame size of 160b comes from following: */
-	/*   DCargs(fregs:64 + iregs:64 + regcounts:8 + stackptr:8) + retval:8 + ra:8 */
-	daddiu $sp, $sp, -160 /* open frame */
-	sd     $ra, 152($sp)  /* save link register */
+	daddiu $sp, $sp, -SP_SP   /* open frame */
+	sd     $ra, SP_LR($sp)    /* save link register */
 
-	.frame $fp,160,$31    /* specify our frame: fp,size,lr; creates virt $fp */
+	.frame $fp,SP_SP,$31  /* specify our frame: fp,size,lr; creates virt $fp */
 	                      /* code below doesn't use $fp though, as n/a with -O1 */
 	/* Init return value */
-	sd $zero, 144($sp)
+	sd $zero, SP_RVAL($sp)
 
-	/* Store float and int args where our DCargs member arrays are, in local area. */
+	/* Store reg args where our DCargs member arrays are, in local stack area */
 	sd  $4,     0($sp)
 	sd  $5,     8($sp)
 	sd  $6,    16($sp)
@@ -58,6 +68,7 @@
 	sd  $9,    40($sp)
 	sd $10,    48($sp)
 	sd $11,    56($sp)
+#if defined(DC__ABI_HARDFLOAT)
 	s.d $f12,  64($sp)
 	s.d $f13,  72($sp)
 	s.d $f14,  80($sp)
@@ -66,31 +77,34 @@
 	s.d $f17, 104($sp)
 	s.d $f18, 112($sp)
 	s.d $f19, 120($sp)
+#endif
 
 	/* Init DCarg's reg_counts and stackptr. */
-	sd $zero, 128($sp)    /* reg_count */
-	daddiu $4, $sp, 160
-	sd     $4, 136($sp)   /* stackptr */
+	sd  $zero, SP_ARG_RC($sp)   /* reg_count */
+	daddiu $4, $sp, SP_SP
+	sd     $4, SP_ARG_SP($sp)   /* stackptr */
 
 	/* Prepare callback handler call. */
-	move   $4, $24        /* Param 0 = DCCallback*, $24 ($t8) holds pointer to thunk */
-	move   $5, $sp        /* Param 1 = DCArgs*, pointer to where pointer to args is stored */
-	daddiu $6, $sp, 144   /* Param 2 = results pointer to 8b of local data on stack */
-	ld     $7, 64($24)    /* Param 3 = userdata pointer */
+	move   $4, $24          /* Param 0 = DCCallback*, $24/$t8 holds DCThunk* */
+	move   $5, $sp          /* Param 1 = ptr to where DCArgs* is stored */
+	daddiu $6, $sp, SP_RVAL /* Param 2 = results ptr to 8b of local stack data */
+	ld     $7, 64($24)      /* Param 3 = userdata pointer */
 
-	ld     $25, 56($24)   /* store handler entry in $25 ($t9), required for PIC */
-	jalr   $25            /* jump */
-	nop                   /* branch delay nop */
+	ld     $25, 56($24)     /* store handler entry in $25/$t9, required for PIC */
+	jalr   $25              /* jump */
+	nop                     /* branch delay nop */
 
 	/* Copy result in corresponding registers $2-$3 ($v0-$v1) and $f0 */
-	ld     $2, 144($sp)   /* note: ignoring second possible retval in $3, here */
-	l.d   $f0, 144($sp)
+	ld     $2, SP_RVAL($sp)   /* note: ignoring 2nd possible retval in $3, here */
+#if defined(DC__ABI_HARDFLOAT)
+	l.d    $f0, SP_RVAL($sp)
+#endif
 
 	/* Epilog. Tear down frame and return. */
-	ld    $ra, 152($sp)   /* restore return address */
-	daddiu $sp, $sp, 160  /* close frame */
-	j     $ra             /* return */
-	nop                   /* branch delay nop */
+	ld     $ra, SP_LR($sp)  /* restore return address */
+	daddiu $sp, $sp, SP_SP  /* close frame */
+	j      $ra              /* return */
+	nop                     /* branch delay nop */
 
 	.set    reorder
 	.end    dcCallbackThunkEntry