diff dyncallback/dyncall_callback_x64.S @ 533:71c884e610f0

- integration of patches from Raphael Luba, Thekla, Inc.: * integration of aggregate-by-value (struct, union) support patch for x64 (win and sysv) * windows/x64 asm additions to specify how stack unwinds (help for debuggers, exception handling, etc.) * see Changelog for details - new calling convention modes for thiscalls (platform agnostic, was specific before) * new signature character for platform agnostic thiscalls ('*' / DC_SIGCHAR_CC_THISCALL) - dcCallF(), dcVCallF(), dcArgF() and dcVArgF(): * added support for aggregates-by-value (wasn't part of patch) * change that those functions don't implicitly call dcReset() anymore, which was unflexible (breaking change) - added macros to feature test implementation for aggregate-by-value and syscall support - changed libdyncall_s.lib and libdyncallback_s.lib order in callback test makefiles, as some toolchains are picky about order - doc: * man page updates to describe aggregate interface * manual overview changes to highlight platforms with aggregate-by-value support - test/plain: replaced tests w/ old/stale sctruct interface with new aggregate one
author Tassilo Philipp
date Thu, 21 Apr 2022 13:35:47 +0200
parents 41e2a01cad32
children 951cbfb5020a
line wrap: on
line diff
--- a/dyncallback/dyncall_callback_x64.S	Sat Apr 16 15:00:58 2022 +0200
+++ b/dyncallback/dyncall_callback_x64.S	Thu Apr 21 13:35:47 2022 +0200
@@ -6,7 +6,8 @@
  Description: Callback Thunk entry for x64 (portasm version)
  License:
 
-   Copyright (c) 2011-2018 Daniel Adler <dadler@uni-goettingen.de>
+   Copyright (c) 2011-2022 Daniel Adler <dadler@uni-goettingen.de>,
+                           Tassilo Philipp <tphilipp@potion-studios.com>
 
    Permission to use, copy, modify, and distribute this software for any
    purpose with or without fee is hereby granted, provided that the above
@@ -29,27 +30,27 @@
 /* structure sizes */
 
 SET(DCThunk_size,24)
-SET(DCArgs_size_win64,80)
-SET(DCArgs_size_sysv,128)
+SET(DCArgs_size_win64,96) /* 8 (stack_ptr) + 4 (reg_count) + 4 (pad_w) + 4 (aggr_return_register) + 4 (pad) + 8*4 (int regs) + 8 (aggrs) + 8*4 (float regs) */
+SET(DCArgs_size_sysv,144) /* 8 (stack_ptr) + 8 (reg_count) +             4 (aggr_return_register) + 4 (pad) + 8*6 (int regs) + 8 (aggrs) + 8*8 (float regs) */
 SET(DCValue_size,8)
+SET(DCRetRegs_SystemV_size,32)
 
-/* frame local variable offsets relative to %rbp*/
+/* frame local variable offsets relative to %rbp */
 
 SET(FRAME_arg0_win64,48)
 SET(FRAME_arg0_sysv,16)
 SET(FRAME_return,8)
 SET(FRAME_parent,0)
-SET(FRAME_DCArgs_sysv,-128)
-SET(FRAME_DCValue_sysv,-136)
-SET(FRAME_DCArgs_win64,-80)
-SET(FRAME_DCValue_win64,-80)
 
-/* struct DCCallback */
+/* struct DCCallback layout, relative to ptr passed to functions below via RAX */
 
 SET(CTX_thunk,0)
 SET(CTX_handler,24)
 SET(CTX_userdata,32)
-SET(DCCallback_size,40)
+SET(CTX_aggr_ret_reg,40)
+SET(CTX_pad,44)
+SET(CTX_aggrs_pp,48)
+SET(DCCallback_size,56)
 
 
 
@@ -61,54 +62,71 @@
 
 	/* initialize DCArgs */
 
-	/* float parameters (8 registers spill to DCArgs) */
-
+	/* float parameters (8 registers spill to DCArgs.reg_data) */
 	SUB(LIT(8*8),RSP)
-
-	MOVSD(XMM7, QWORD(RSP,8*7))		/* struct offset 120: float parameter 7 */
-	MOVSD(XMM6, QWORD(RSP,8*6))		/* struct offset 112: float parameter 6 */
-	MOVSD(XMM5, QWORD(RSP,8*5))		/* struct offset 104: float parameter 5 */
-	MOVSD(XMM4, QWORD(RSP,8*4))		/* struct offset  96: float parameter 4 */
-	MOVSD(XMM3, QWORD(RSP,8*3))		/* struct offset  88: float parameter 3 */
-	MOVSD(XMM2, QWORD(RSP,8*2))		/* struct offset  80: float parameter 2 */
-	MOVSD(XMM1, QWORD(RSP,8*1))		/* struct offset  72: float parameter 1 */
-	MOVSD(XMM0, QWORD(RSP,8*0))		/* struct offset  64: float parameter 0 */
+	MOVSD(XMM7, QWORD(RSP,8*7))		/* DCArgs offset 136: float parameter 7 */
+	MOVSD(XMM6, QWORD(RSP,8*6))		/* DCArgs offset 128: float parameter 6 */
+	MOVSD(XMM5, QWORD(RSP,8*5))		/* DCArgs offset 120: float parameter 5 */
+	MOVSD(XMM4, QWORD(RSP,8*4))		/* DCArgs offset 112: float parameter 4 */
+	MOVSD(XMM3, QWORD(RSP,8*3))		/* DCArgs offset 104: float parameter 3 */
+	MOVSD(XMM2, QWORD(RSP,8*2))		/* DCArgs offset  96: float parameter 2 */
+	MOVSD(XMM1, QWORD(RSP,8*1))		/* DCArgs offset  88: float parameter 1 */
+	MOVSD(XMM0, QWORD(RSP,8*0))		/* DCArgs offset  80: float parameter 0 */
 
-	/* integer parameters (6 registers spill to DCArgs) */
+	/* integer parameters (6 registers spill to DCArgs.reg_data) */
+	PUSH(R9)				/* DCArgs offset 72: parameter 5 */
+	PUSH(R8)				/* DCArgs offset 64: parameter 4 */
+	PUSH(RCX)				/* DCArgs offset 56: parameter 3 */
+	PUSH(RDX)				/* DCArgs offset 48: parameter 2 */
+	PUSH(RSI)				/* DCArgs offset 40: parameter 1 */
+	PUSH(RDI)				/* DCArgs offset 32: parameter 0 */
 
-	PUSH(R9)				/* struct offset 56: parameter 5 */
-	PUSH(R8)				/* struct offset 48: parameter 4 */
-	PUSH(RCX)				/* struct offset 40: parameter 3 */
-	PUSH(RDX)				/* struct offset 32: parameter 2 */
-	PUSH(RSI)				/* struct offset 24: parameter 1 */
-	PUSH(RDI)				/* struct offset 16: parameter 0 */
-	
+	MOV(QWORD(RAX, CTX_aggrs_pp), R8)
+	PUSH(R8)				/* DCArgs offset 24: **aggrs */
+
+	/* get val of aggr_return_register from DCCallback struct into r8's LSBs, clear */
+	/* MSBs, write DCarg's pad and  aggr_return_register at once */
+	MOVL(DWORD(RAX, CTX_aggr_ret_reg), R8D)	/* implicitly zeroes the high bits of R8 */
+	PUSH(R8)				/* DCArgs offset 16: pad=0, aggr_return_register=DCCallback.aggr_return_register */
 	/* register counts for integer/pointer and float regs */
+	PUSH(LIT(0))				/* DCArgs offset 12: fcount */
+						/* DCArgs offset  8: icount */
 
-	PUSH(LIT(0))				/* struct offset 12: fcount */
-						/* struct offset  8: icount */
-
-	LEA(QWORD(RBP,FRAME_arg0_sysv),RDX)		/* struct offset  0: stack pointer */
+	LEA(QWORD(RBP,FRAME_arg0_sysv),RDX)	/* DCArgs offset  0: *stack_ptr */
 	PUSH(RDX)
 
 	MOV(RSP,RSI)				/* arg 1 RSI : DCArgs* */
 
-	/* initialize DCValue */
+	/* stack space for DCValue or DCRetRegs_SysV (passed to handler as DCValue*) and padding */
+	SUB(LIT(4*8),RSP)			/* 4 qwords for DCRetRegs_SysV */
 
-	PUSH(LIT(0))				/* struct offset 0: return value (max long long) */
-
-	/* call handler( *ctx, *args, *value, *userdata) */
-
+	/* call handler(*ctx, *args, *value, *userdata) - stack must be 16b aligned, here */
 	MOV(RAX,RDI)				/* arg 0 RDI : DCCallback* (RAX) */
 	MOV(QWORD(RDI,CTX_userdata),RCX)	/* arg 3 RCX : userdata* */
 	MOV(RSP,RDX)				/* arg 2 RDX : DCValue*  */
-	PUSH(LIT(0))				/* align to 16 bytes */
+
 	CALL_REG(QWORD(RAX,CTX_handler))
 
-	/* pass return type via registers, handle ints and floats */
-	
-	MOV(QWORD(RBP,FRAME_DCValue_sysv),RAX)
-	MOVD(RAX,XMM0)
+	/* get info about return type, use to select how to store reg-based retval */
+	CMPL(LIT(-2/*see C*/), DWORD(RSP, 48)) /* rsp+48 = where r8 (aggr_return_register) was pushed */
+
+	/* if retval is small aggregate via regs */
+	JE(scalar_retval)
+
+		MOV(QWORD(RSP,0),RAX)
+		MOV(QWORD(RSP,8),RDX)
+		MOVSD(QWORD(RSP,16),XMM0) /* @@@AGGR needed to be put in xmm in this case? @@@ also not what doc/appendix says, actually */
+		MOVSD(QWORD(RSP,24),XMM1) /* @@@AGGR needed to be put in xmm in this case? @@@ also not what doc/appendix says, actually */
+
+	/* else (retval is int, float, or ptr to aggregate) */
+	JMP(epilog)
+CSYM(scalar_retval):
+
+		/* pass return type via registers, handle ints and floats */
+		MOV(QWORD(RSP,0),RAX)
+		MOVD(RAX,XMM0)
+
+CSYM(epilog):
 
 	MOV(RBP,RSP)
 	POP(RBP)
@@ -116,55 +134,63 @@
 
 END_PROC(dcCallback_x64_sysv)
 
-GLOBAL(dcCallback_x64_win64)
-BEGIN_PROC(dcCallback_x64_win64)
+
+GLOBAL_FRAME(dcCallback_x64_win64)
+FRAME_BEGIN_PROC(dcCallback_x64_win64)
 
 	PUSH(RBP)
+	FRAME_PUSH_REG(RBP)
 	MOV(RSP,RBP)
+	FRAME_SET(0, RBP)
+	FRAME_ENDPROLOG()
 
 	/* initialize DCArgs */
 
-	/* float parameters (4 registers spill to DCArgs) */
-
+	/* float parameters (4 registers spill to DCArgs.reg_data) */
 	SUB(LIT(4*8),RSP)
+	MOVSD(XMM3, QWORD(RSP,8*3))		/* DCArgs offset  88: float parameter 3 */
+	MOVSD(XMM2, QWORD(RSP,8*2))		/* DCArgs offset  80: float parameter 2 */
+	MOVSD(XMM1, QWORD(RSP,8*1))		/* DCArgs offset  72: float parameter 1 */
+	MOVSD(XMM0, QWORD(RSP,8*0))		/* DCArgs offset  64: float parameter 0 */
 
-	MOVSD(XMM3, QWORD(RSP,8*3))		/* struct offset  72: float parameter 3 */
-	MOVSD(XMM2, QWORD(RSP,8*2))		/* struct offset  64: float parameter 2 */
-	MOVSD(XMM1, QWORD(RSP,8*1))		/* struct offset  56: float parameter 1 */
-	MOVSD(XMM0, QWORD(RSP,8*0))		/* struct offset  48: float parameter 0 */
-
-	/* integer parameters (4 registers spill to DCArgs) */
+	/* integer parameters (4 registers spill to DCArgs.reg_data) */
+	PUSH(R9)				/* DCArgs offset 56: parameter 3 */
+	PUSH(R8)				/* DCArgs offset 48: parameter 2 */
+	PUSH(RDX)				/* DCArgs offset 40: parameter 1 */
+	PUSH(RCX)				/* DCArgs offset 32: parameter 0 */
 
-	PUSH(R9)				/* struct offset 40: parameter 3 */
-	PUSH(R8)				/* struct offset 32: parameter 2 */
-	PUSH(RDX)				/* struct offset 24: parameter 1 */
-	PUSH(RCX)				/* struct offset 16: parameter 0 */
-	
+	MOV(QWORD(RAX, CTX_aggrs_pp), R8)
+	PUSH(R8)				/* DCArgs offset 24: **aggrs */
+
+	/* get val of aggr_return_register from DCCallback struct into r8's LSBs, clear */
+	/* MSBs, write DCarg's pad and  aggr_return_register at once */
+	MOVL(DWORD(RAX, CTX_aggr_ret_reg), R8D)	/* implicitly zeroes the high bits of R8 */
+	PUSH(R8)				/* DCArgs offset 16: pad=0, aggr_return_register=DCCallback.aggr_return_register */
+
 	/* register counts for integer/pointer and float regs */
+	PUSH(LIT(0))				/* DCArgs offset 12: pad_w */
+						/* DCArgs offset  8: reg_count */
 
-	PUSH(LIT(0))				/* struct offset 12: fcount */
-						/* struct offset  8: icount */
-
-	LEA(QWORD(RBP,FRAME_arg0_win64),RDX)		/* struct offset  0: stack pointer */
+	LEA(QWORD(RBP,FRAME_arg0_win64),RDX)	/* DCArgs offset  0: *stack_ptr */
 	PUSH(RDX)
 
 	MOV(RSP,RDX)				/* arg 1 RDX : DCArgs* */
-	
-	/* initialize DCValue */
 
-	// PUSHQ(LIT(0))				/* struct offset 0: return value (max long long) */
+	/* space for retval (also aligns stack to 16b) */
+	SUB(LIT(2*8),RSP)
 
-	/* call handler( *ctx, *args, *value, *userdata) */
-
+	/* call handler(*ctx, *args, *value, *userdata) - stack must be 16b aligned, here */
 	MOV(RAX,RCX)				/* arg 0 RCX : DCCallback* (RAX) */
-	MOV(QWORD(RAX,CTX_userdata),R9)	/* arg 3 R9  : userdata* */
+	MOV(QWORD(RAX,CTX_userdata),R9)		/* arg 3 R9  : userdata* */
 	MOV(RSP,R8)				/* arg 2 R8  : DCValue*  */
-	SUB(LIT(4*8),RSP)                      /* make room for spill area and call */
+
+	/* spill area */
+	SUB(LIT(4*8),RSP)			/* 4 qwords for spill area */
+
 	CALL_REG(QWORD(RAX,CTX_handler))
 
 	/* pass return type via registers, handle ints and floats */
-	
-	MOV(QWORD(RBP,FRAME_DCValue_win64),RAX)
+	MOV(QWORD(RSP,4*8),RAX)
 	MOVD(RAX,XMM0)
 
 	MOV(RBP,RSP)
@@ -173,5 +199,8 @@
 
 END_PROC(dcCallback_x64_win64)
 
+
 END_ASM
 
+/* vim: set ts=8: */
+