diff dyncall/dyncall_callvm_x64.c @ 533:71c884e610f0

- integration of patches from Raphael Luba, Thekla, Inc.: * integration of aggregate-by-value (struct, union) support patch for x64 (win and sysv) * windows/x64 asm additions to specify how stack unwinds (help for debuggers, exception handling, etc.) * see Changelog for details - new calling convention modes for thiscalls (platform agnostic, was specific before) * new signature character for platform agnostic thiscalls ('*' / DC_SIGCHAR_CC_THISCALL) - dcCallF(), dcVCallF(), dcArgF() and dcVArgF(): * added support for aggregates-by-value (wasn't part of patch) * change that those functions don't implicitly call dcReset() anymore, which was unflexible (breaking change) - added macros to feature test implementation for aggregate-by-value and syscall support - changed libdyncall_s.lib and libdyncallback_s.lib order in callback test makefiles, as some toolchains are picky about order - doc: * man page updates to describe aggregate interface * manual overview changes to highlight platforms with aggregate-by-value support - test/plain: replaced tests w/ old/stale sctruct interface with new aggregate one
author Tassilo Philipp
date Thu, 21 Apr 2022 13:35:47 +0200
parents ddfb9577a00e
children ba70fb631bea
line wrap: on
line diff
--- a/dyncall/dyncall_callvm_x64.c	Sat Apr 16 15:00:58 2022 +0200
+++ b/dyncall/dyncall_callvm_x64.c	Thu Apr 21 13:35:47 2022 +0200
@@ -6,7 +6,7 @@
  Description: 
  License:
 
-   Copyright (c) 2007-2020 Daniel Adler <dadler@uni-goettingen.de>, 
+   Copyright (c) 2007-2022 Daniel Adler <dadler@uni-goettingen.de>, 
                            Tassilo Philipp <tphilipp@potion-studios.com>
 
    Permission to use, copy, modify, and distribute this software for any
@@ -31,7 +31,11 @@
 
 #include "dyncall_callvm_x64.h"
 #include "dyncall_alloc.h"
-#include "dyncall_struct.h"
+#include "dyncall_aggregate.h"
+
+#include <stdint.h>
+#include <string.h>
+#include <assert.h>
 
 
 /* 
@@ -41,9 +45,16 @@
 **
 */
 
-void dcCall_x64_sysv(DCsize stacksize, DCpointer stackdata, DCpointer regdata_i, DCpointer regdata_f, DCpointer target);
-void dcCall_x64_win64(DCsize stacksize, DCpointer stackdata, DCpointer regdata, DCpointer target);
-void dcCall_x64_syscall_sysv(DCpointer argdata, DCpointer target);
+#if defined(DC_UNIX)
+extern void dcCall_x64_sysv(DCsize stacksize, DCpointer stackdata, DCpointer regdata_i, DCpointer regdata_f, DCpointer target);
+extern void dcCall_x64_sysv_aggr(DCsize stacksize, DCpointer stackdata, DCpointer regdata_i, DCpointer regdata_f, DCpointer target, DCpointer ret_regs);
+#else
+extern void dcCall_x64_win64(DCsize stacksize, DCpointer stackdata, DCpointer regdata, DCpointer target);
+extern void dcCall_x64_win64_aggr(DCsize stacksize, DCpointer stackdata, DCpointer regdata, DCpointer target, DCpointer aggr_mem);
+#endif
+extern void dcCall_x64_syscall_sysv(DCpointer argdata, DCpointer target);
+
+
 
 
 static void dc_callvm_free_x64(DCCallVM* in_self)
@@ -57,13 +68,22 @@
   DCCallVM_x64* self = (DCCallVM_x64*)in_self;
   dcVecReset(&self->mVecHead);
   self->mRegCount.i = self->mRegCount.f = 0;
+  self->mAggrReturnReg = -1;
+#if defined(DC_WINDOWS)
+  self->mpAggrVecCopies = ((DCchar*)dcVecData(&self->mVecHead)) + self->mVecHead.mTotal;
+#endif
 }
 
 
+
+
 static void dc_callvm_argLongLong_x64(DCCallVM* in_self, DClonglong x)
 {
   /* A long long always has 64 bits on the supported x64 platforms (lp64 on unix and llp64 on windows). */
   DCCallVM_x64* self = (DCCallVM_x64*)in_self;
+
+  self->mRegCount.i += (self->mRegCount.i == self->mAggrReturnReg);
+
   if(self->mRegCount.i < numIntRegs)
     self->mRegData.i[self->mRegCount.i++] = x;
   else
@@ -101,10 +121,23 @@
 }
 
 
-static void dc_callvm_argFloat_x64(DCCallVM* in_self, DCfloat x)
+static void dc_callvm_argDouble_x64(DCCallVM* in_self, DCdouble x)
 {
   DCCallVM_x64* self = (DCCallVM_x64*)in_self;
 
+#if defined(DC_WINDOWS) 
+  self->mRegCount.f += (self->mRegCount.f == self->mAggrReturnReg);
+#endif
+
+  if(self->mRegCount.f < numFloatRegs)
+    self->mRegData.f[self->mRegCount.f++] = x;
+  else
+    dcVecAppend(&self->mVecHead, &x, sizeof(DCdouble));
+}
+
+
+static void dc_callvm_argFloat_x64(DCCallVM* in_self, DCfloat x)
+{
   /* Although not promoted to doubles, floats are stored with 64bits in this API.*/
   union {
     DCdouble d;
@@ -112,46 +145,89 @@
   } f;
   f.f = x;
 
-  if(self->mRegCount.f < numFloatRegs)
-    *(DCfloat*)&self->mRegData.f[self->mRegCount.f++] = x;
-  else
-    dcVecAppend(&self->mVecHead, &f.f, sizeof(DCdouble));
-}
-
-
-static void dc_callvm_argDouble_x64(DCCallVM* in_self, DCdouble x)
-{
-  DCCallVM_x64* self = (DCCallVM_x64*)in_self;
-  if(self->mRegCount.f < numFloatRegs)
-    self->mRegData.f[self->mRegCount.f++] = x;
-  else
-    dcVecAppend(&self->mVecHead, &x, sizeof(DCdouble));
+  dc_callvm_argDouble_x64(in_self, f.d);
 }
 
 
 static void dc_callvm_argPointer_x64(DCCallVM* in_self, DCpointer x)
 {
   DCCallVM_x64* self = (DCCallVM_x64*)in_self;
+
+  self->mRegCount.i += (self->mRegCount.i == self->mAggrReturnReg);
+
   if(self->mRegCount.i < numIntRegs)
     *(DCpointer*)&self->mRegData.i[self->mRegCount.i++] = x;
   else
     dcVecAppend(&self->mVecHead, &x, sizeof(DCpointer));
 }
 
-static void dc_callvm_argStruct_x64(DCCallVM* in_self, DCstruct* s, DCpointer x)
+
+static void dc_callvm_argAggr_x64(DCCallVM* in_self, const DCaggr* ag, const void* x)
 {
+  int i;
   DCCallVM_x64* self = (DCCallVM_x64*)in_self;
-  dcVecAppend(&self->mVecHead, x, s->size);
-  /*printf("dc_callvm_argStruct_x64 size = %d\n", (int)s->size);@@@*/
-  if (s->size <= 64)
-  	  dcArgStructUnroll(in_self, s, x);
-  /*else@@@*/
-  /*	  dcVecAppend(&self->mVecHead, &x, sizeof(DCpointer));@@@*/
+
+  if (!ag) {
+	/* non-trivial aggrs (C++) are passed via pointer (win and sysv callconv),
+	 * copy has to be provided by user, as dyncall cannot do such copies*/
+    dc_callvm_argPointer_x64(in_self, (DCpointer)x);
+    return;
+  }
+
+#if defined(DC_UNIX)
+  DCRegCount_x64 n_regs = { self->mRegCount.i, self->mRegCount.f };
+
+  if(ag->sysv_classes[0] != SYSVC_MEMORY) {
+    /* reclassify aggr w/ respect to remaining regs, might need to pass it all via the stack */
+    for(i=0; ag->sysv_classes[i] && i<DC_SYSV_MAX_NUM_CLASSES; ++i) {
+      DCuchar clz = ag->sysv_classes[i];
+      n_regs.i += (clz == SYSVC_INTEGER);
+      n_regs.f += (clz == SYSVC_SSE);
+      /* @@@AGGR implement when implementing x87 types */
+    }
+  }
+
+  if(ag->sysv_classes[0] == SYSVC_MEMORY || (n_regs.i > numIntRegs) || (n_regs.f > numFloatRegs))
+  {
+     dcVecAppend(&self->mVecHead, x, ag->size);
+     dcVecSkip(&self->mVecHead, (ag->size + (sizeof(DClonglong)-1) & -sizeof(DClonglong)) - ag->size); /* realign to qword */
+     return;
+  }
+
+  for(i=0; ag->sysv_classes[i] && i<DC_SYSV_MAX_NUM_CLASSES; ++i)
+  {
+    switch (ag->sysv_classes[i]) {
+      case SYSVC_INTEGER: dc_callvm_argLongLong_x64(in_self, ((DClonglong*)x)[i]); break;
+      case SYSVC_SSE:     dc_callvm_argDouble_x64  (in_self, ((DCdouble  *)x)[i]); break;
+      /* @@@AGGR implement when implementing x87 types */
+    }
+  }
+
+#else
+
+  switch (ag->size) {
+    case 1:  dc_callvm_argChar_x64    (in_self, *(DCchar    *)x); break;
+    case 2:  dc_callvm_argShort_x64   (in_self, *(DCshort   *)x); break;
+    case 4:  dc_callvm_argLong_x64    (in_self, *(DClong    *)x); break;
+    case 8:  dc_callvm_argLongLong_x64(in_self, *(DClonglong*)x); break;
+    default:
+      /* pass the aggr indirectly via hidden pointer; requires caller-made copy
+       * to mimic pass-by-value semantics (or a call that modifies the param
+       * would corrupt the source aggr)
+       * place those copies at the end of the param vector (aligned to 16b for
+       * this calling convention); it's a bit of a hack, but should be safe: in
+       * any case the vector has to be big enough to hold all params */
+      self->mpAggrVecCopies = (void*)((intptr_t)((DCchar*)self->mpAggrVecCopies - ag->size) & -16);
+      x = memcpy(self->mpAggrVecCopies, x, ag->size);
+      dc_callvm_argPointer_x64(in_self, (DCpointer)x);
+      break;
+  }
+#endif
 }
 
 
 /* Call. */
-void dc_callvm_call_x64(DCCallVM* in_self, DCpointer target)
+static void dc_callvm_call_x64(DCCallVM* in_self, DCpointer target)
 {
   DCCallVM_x64* self = (DCCallVM_x64*)in_self;
 #if defined(DC_UNIX)
@@ -170,6 +246,115 @@
 }
 
 
+static void dc_callvm_begin_aggr_x64(DCCallVM* in_self, const DCaggr *ag)
+{
+  DCCallVM_x64* self = (DCCallVM_x64*)in_self;
+
+  assert(self->mRegCount.i == 0 && self->mRegCount.f == 0 && "dc_callvm_begin_aggr_x64 should be called before any function arguments are declared");
+#if defined(DC_UNIX)
+  if (!ag || (ag->sysv_classes[0] == SYSVC_MEMORY)) {
+#else
+  if (!ag || ag->size > 8) {
+#endif 
+    /* pass pointer to aggregate as hidden first argument */
+    self->mAggrReturnReg = 0;
+  }
+}
+
+
+#if defined(DC_WINDOWS)
+static void dc_callvm_begin_aggr_x64_win64_this(DCCallVM* in_self, const DCaggr *ag)
+{
+  DCCallVM_x64* self = (DCCallVM_x64*)in_self;
+
+  assert(self->mRegCount.i == 0 && self->mRegCount.f == 0 && "dc_callvm_begin_aggr_x64_win64_this should be called before any function arguments are declared");
+
+  if (!ag || ag->size > 8) {
+    /* thiscall: this-ptr comes first, then pointer to aggregate as hidden (second) argument */
+    self->mAggrReturnReg = 1;
+  }
+}
+#endif
+
+
+static void dc_callvm_call_x64_aggr(DCCallVM* in_self, DCpointer target, const DCaggr *ag, DCpointer ret)
+{
+  DCCallVM_x64* self = (DCCallVM_x64*)in_self;
+
+#if defined(DC_UNIX)
+
+  if (self->mAggrReturnReg != -1) {
+    /* call regular dcCall_x64_sysv here, w/ pointer to the aggr in first arg */
+    self->mRegData.i[self->mAggrReturnReg] = (int64)ret;
+
+    dcCall_x64_sysv(
+      dcVecSize(&self->mVecHead),  /* rdi: Size of stack data.                           */
+      dcVecData(&self->mVecHead),  /* rsi: Pointer to stack arguments.                   */
+      self->mRegData.i,            /* rdx: Pointer to register arguments (ints on SysV). */
+      self->mRegData.f,            /* rcx: Pointer to floating point register arguments. */
+      target                       /* r8 */
+    );
+  } else {
+    int i;
+    DCchar ret_regs[32];           /* 4 qwords: 2 for ints, 2 for floats */
+    DCchar *ret_regs_i = ret_regs+0;
+    DCchar *ret_regs_f = ret_regs+16;
+    DCsize st_size = ag->size;
+    DCchar* dst = (char*)ret;
+    dcCall_x64_sysv_aggr(
+      dcVecSize(&self->mVecHead),  /* rdi: Size of stack data.                           */
+      dcVecData(&self->mVecHead),  /* rsi: Pointer to stack arguments.                   */
+      self->mRegData.i,            /* rdx: Pointer to register arguments (ints on SysV). */
+      self->mRegData.f,            /* rcx: Pointer to floating point register arguments. */
+      target,                      /* r8 */
+      ret_regs                     /* r9 */
+    );
+    /* reassemble aggr to be returned from reg data */
+    for(i=0; ag->sysv_classes[i] && i<DC_SYSV_MAX_NUM_CLASSES; ++i) {
+      DCchar** src;
+      int ll = 8;
+      switch(ag->sysv_classes[i]) {
+        case SYSVC_INTEGER:  src = &ret_regs_i; break;
+        case SYSVC_SSE:      src = &ret_regs_f; break;
+        /* @@@AGGR implement when implementing x87 types */
+      }
+      while(ll-- && st_size--)
+        *dst++ = *(*src)++;
+    }
+  }
+
+#else
+
+  if (self->mAggrReturnReg != -1) {
+    /* call regular dcCall_x64_sysv here, w/ pointer to the aggr in first arg */
+    self->mRegData.i[self->mAggrReturnReg] = (int64)ret;
+
+    dcCall_x64_win64(
+      dcVecSize(&self->mVecHead),  /* rcx: Size of stack data.           */
+      dcVecData(&self->mVecHead),  /* rdx: Pointer to stack arguments.   */
+      self->mRegData.i,            /* r8:  Pointer to register arguments */
+      target                       /* r9 */
+    );
+  } else {
+    DCchar ret_reg[8];             /* 1 qword */
+    DCsize st_size = ag->size;     /* guaranteed to be <= 8 */
+    DCchar* dst = (char*)ret;
+    DCchar* src = ret_reg;
+    dcCall_x64_win64_aggr(
+      dcVecSize(&self->mVecHead),  /* rcx: Size of stack data.           */
+      dcVecData(&self->mVecHead),  /* rdx: Pointer to stack arguments.   */
+      self->mRegData.i,            /* r8:  Pointer to register arguments */
+      target,                      /* r9 */
+      ret_reg                      /* stack */
+    );
+    while(st_size--)
+      *dst++ = *src++;
+  }
+
+#endif
+}
+
+
 static void dc_callvm_mode_x64(DCCallVM* in_self, DCint mode);
 
 DCCallVM_vt gVT_x64 =
@@ -186,24 +371,59 @@
 , &dc_callvm_argFloat_x64
 , &dc_callvm_argDouble_x64
 , &dc_callvm_argPointer_x64
-, &dc_callvm_argStruct_x64
-, (DCvoidvmfunc*)       &dc_callvm_call_x64
-, (DCboolvmfunc*)       &dc_callvm_call_x64
-, (DCcharvmfunc*)       &dc_callvm_call_x64
-, (DCshortvmfunc*)      &dc_callvm_call_x64
-, (DCintvmfunc*)        &dc_callvm_call_x64
-, (DClongvmfunc*)       &dc_callvm_call_x64
-, (DClonglongvmfunc*)   &dc_callvm_call_x64
-, (DCfloatvmfunc*)      &dc_callvm_call_x64
-, (DCdoublevmfunc*)     &dc_callvm_call_x64
-, (DCpointervmfunc*)    &dc_callvm_call_x64
-, NULL /* callStruct */
+, &dc_callvm_argAggr_x64
+, (DCvoidvmfunc*)     &dc_callvm_call_x64
+, (DCboolvmfunc*)     &dc_callvm_call_x64
+, (DCcharvmfunc*)     &dc_callvm_call_x64
+, (DCshortvmfunc*)    &dc_callvm_call_x64
+, (DCintvmfunc*)      &dc_callvm_call_x64
+, (DClongvmfunc*)     &dc_callvm_call_x64
+, (DClonglongvmfunc*) &dc_callvm_call_x64
+, (DCfloatvmfunc*)    &dc_callvm_call_x64
+, (DCdoublevmfunc*)   &dc_callvm_call_x64
+, (DCpointervmfunc*)  &dc_callvm_call_x64
+, (DCaggrvmfunc*)     &dc_callvm_call_x64_aggr
+, (DCbeginaggrvmfunc*)&dc_callvm_begin_aggr_x64
 };
 
 
+#if defined(DC_WINDOWS)
+/* --- win64 thiscalls ------------------------------------------------------------- */
+
+DCCallVM_vt gVT_x64_win64_this =
+{
+  &dc_callvm_free_x64
+, &dc_callvm_reset_x64
+, &dc_callvm_mode_x64
+, &dc_callvm_argBool_x64
+, &dc_callvm_argChar_x64
+, &dc_callvm_argShort_x64
+, &dc_callvm_argInt_x64
+, &dc_callvm_argLong_x64
+, &dc_callvm_argLongLong_x64
+, &dc_callvm_argFloat_x64
+, &dc_callvm_argDouble_x64
+, &dc_callvm_argPointer_x64
+, &dc_callvm_argAggr_x64
+, (DCvoidvmfunc*)     &dc_callvm_call_x64
+, (DCboolvmfunc*)     &dc_callvm_call_x64
+, (DCcharvmfunc*)     &dc_callvm_call_x64
+, (DCshortvmfunc*)    &dc_callvm_call_x64
+, (DCintvmfunc*)      &dc_callvm_call_x64
+, (DClongvmfunc*)     &dc_callvm_call_x64
+, (DClonglongvmfunc*) &dc_callvm_call_x64
+, (DCfloatvmfunc*)    &dc_callvm_call_x64
+, (DCdoublevmfunc*)   &dc_callvm_call_x64
+, (DCpointervmfunc*)  &dc_callvm_call_x64
+, (DCaggrvmfunc*)     &dc_callvm_call_x64_aggr
+, (DCbeginaggrvmfunc*)&dc_callvm_begin_aggr_x64_win64_this
+};
+
+#endif
+
 /* --- syscall ------------------------------------------------------------- */
 
-#include <assert.h>
+#if defined(DC_UNIX)
 void dc_callvm_call_x64_syscall_sysv(DCCallVM* in_self, DCpointer target)
 {
   DCCallVM_x64* self;
@@ -230,7 +450,7 @@
 , &dc_callvm_argFloat_x64
 , &dc_callvm_argDouble_x64
 , &dc_callvm_argPointer_x64
-, NULL /* argStruct */
+, NULL /* argAggr */
 , (DCvoidvmfunc*)       &dc_callvm_call_x64_syscall_sysv
 , (DCboolvmfunc*)       &dc_callvm_call_x64_syscall_sysv
 , (DCcharvmfunc*)       &dc_callvm_call_x64_syscall_sysv
@@ -241,8 +461,10 @@
 , (DCfloatvmfunc*)      &dc_callvm_call_x64_syscall_sysv
 , (DCdoublevmfunc*)     &dc_callvm_call_x64_syscall_sysv
 , (DCpointervmfunc*)    &dc_callvm_call_x64_syscall_sysv
-, NULL /* callStruct */
+, NULL /* callAggr */
+, NULL /* beginAggr */
 };
+#endif
 
 
 
@@ -255,9 +477,9 @@
 
   switch(mode) {
     case DC_CALL_C_DEFAULT:
-	case DC_CALL_C_DEFAULT_THIS:
 #if defined(DC_UNIX)
-    case DC_CALL_C_X64_SYSV:
+    case DC_CALL_C_DEFAULT_THIS:
+    case DC_CALL_C_X64_SYSV: /* = DC_CALL_C_X64_SYSV_THIS */
 #else
     case DC_CALL_C_X64_WIN64:
 #endif
@@ -265,6 +487,12 @@
     case DC_CALL_C_ELLIPSIS_VARARGS:
       vt = &gVT_x64;
       break;
+#if defined(DC_WINDOWS)
+    case DC_CALL_C_DEFAULT_THIS:
+    case DC_CALL_C_X64_WIN64_THIS:
+      vt = &gVT_x64_win64_this;
+      break;
+#endif
     case DC_CALL_SYS_DEFAULT:
 #if defined(DC_UNIX)
     case DC_CALL_SYS_X64_SYSCALL_SYSV: