diff dyncall/dyncall_call_sparc64.S @ 0:3e629dc19168

initial from svn dyncall-1745
author Daniel Adler
date Thu, 19 Mar 2015 22:24:28 +0100
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/dyncall/dyncall_call_sparc64.S	Thu Mar 19 22:24:28 2015 +0100
@@ -0,0 +1,362 @@
+/*
+
+ Package: dyncall
+ Library: dyncall
+ File: dyncall/dyncall_call_sparc64.S
+ Description: Call kernel for sparc64 v9 ABI.
+ License:
+
+   Copyright (c) 2011-2015 Daniel Adler <dadler@uni-goettingen.de>
+
+   Permission to use, copy, modify, and distribute this software for any
+   purpose with or without fee is hereby granted, provided that the above
+   copyright notice and this permission notice appear in all copies.
+
+   THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+   WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+   MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+   ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+*/
+
+
+
+
+/* --------------------------------------------------------------------------- 
+
+call kernel for sparc64 v9 abi
+tested on sparc64/linux/debian [gcc54.fsffrance.org - thanx to the farm!] 
+
+new C Interface:
+  void dcCall_sparc (DCCallVM* callvm, DCpointer target);
+                     %i0               %1 
+
+see dyncall_call_sparc.S for details.
+
+old C Interface:
+  void dcCall_sparc (DCpointer target, DCsize size, DCpointer data);
+		     %i0             , %i1        , %i2
+Input:
+  i0   target
+  i1   size
+  i2   data
+
+*/
+
+#define REGSIZE 8
+
+#define BIAS 2047
+
+#define ALIGN 16 
+
+#define IREGS 6
+#define FREGS 16
+#define SREGS 16
+#define IBASE  0
+#define FBASE  (IREGS*8)
+
+#define SHEAD ((16+6)*8)
+#define DHEAD  ((IREGS+FREGS)*8)+SREGS*4
+
+CALLVM_singleUseFlags = 24
+CALLVM_size    = 40
+CALLVM_dataoff = 48
+
+
+.global dcCall_sparc64
+dcCall_sparc64:
+
+/* Basic Prolog: supports up to 6 arguments. */
+	
+	/* new C interface */
+	/* o0-1: callvm,target */
+	
+	or   %o0, %g0, %o3	       /* %o3: callvm */
+	or   %o1, %g0, %o0	       /* %o0: target */
+	ldx [%o3+CALLVM_size], %o1     /* %o1: size */
+	add %o3, CALLVM_dataoff, %o2  /* %o2: data */
+	ld  [%o3+CALLVM_singleUseFlags], %o4 /* %o4: flags */
+	/*leaf functions: may use the first six output registers.*/
+	/*o0-2:target,size,data*/
+	/*o3-5:free to use */
+		
+		/* Arguments: */
+		/* %o0 = ptr to target. */
+		/* %o1 = size of data. */
+		/* %o2 = data pointer. */
+		/* %o4 = use flags. */
+
+	/* Compute a matching stack size (approximate): o3 = align(o1+136,16) */
+
+	add     %o1, SHEAD+ALIGN-1, %o3
+	and     %o3,   -ALIGN, %o3
+	neg     %o3
+	
+	/* Prolog. */
+	save	%sp, %o3, %sp	
+
+		/* Arguments: */
+		/* %i0 = ptr to target. */
+		/* %i1 = size of data. */
+		/* %i2 = data pointer. */
+		/* %i3 = stack size. */
+	
+	/* Load output registers. */
+
+	ldx	[%i2+IBASE+REGSIZE*0 ],%o0
+	ldx	[%i2+IBASE+REGSIZE*1 ],%o1
+	ldx	[%i2+IBASE+REGSIZE*2 ],%o2
+	ldx	[%i2+IBASE+REGSIZE*3 ],%o3
+	ldx	[%i2+IBASE+REGSIZE*4 ],%o4
+	ldx	[%i2+IBASE+REGSIZE*5 ],%o5
+
+	/* Load double-precision float registers. */
+
+	ldd     [%i2+FBASE+REGSIZE*0 ],%f0
+	ldd     [%i2+FBASE+REGSIZE*1 ],%f2
+	ldd     [%i2+FBASE+REGSIZE*2 ],%f4
+	ldd     [%i2+FBASE+REGSIZE*3 ],%f6
+	ldd     [%i2+FBASE+REGSIZE*4 ],%f8
+	ldd     [%i2+FBASE+REGSIZE*5 ],%f10
+	ldd     [%i2+FBASE+REGSIZE*6 ],%f12
+	ldd     [%i2+FBASE+REGSIZE*7 ],%f14
+	ldd     [%i2+FBASE+REGSIZE*8 ],%f16
+    ldd     [%i2+FBASE+REGSIZE*9 ],%f18
+	ldd     [%i2+FBASE+REGSIZE*10],%f20
+	ldd     [%i2+FBASE+REGSIZE*11],%f22
+	ldd     [%i2+FBASE+REGSIZE*12],%f24
+	ldd     [%i2+FBASE+REGSIZE*13],%f26
+	ldd     [%i2+FBASE+REGSIZE*14],%f28
+	ldd     [%i2+FBASE+REGSIZE*15],%f30
+
+	/* load single-precision float registers */
+
+	or	%g0, 1, %l0
+.f0:
+	andcc   %i4, %l0, %g0
+	beq	.f1	
+	nop
+	ld	[%i2+FBASE+REGSIZE*16+4*0 ], %f1
+.f1:
+	sll     %l0, 1, %l0
+	andcc   %i4, %l0, %g0
+	beq	.f2	
+	nop
+	ld	[%i2+FBASE+REGSIZE*16+4*1 ], %f3
+.f2:
+	sll     %l0, 1, %l0
+	andcc   %i4, %l0, %g0
+	beq	.f3	
+	nop
+	ld	[%i2+FBASE+REGSIZE*16+4*2 ], %f5
+.f3:
+	sll     %l0, 1, %l0
+	andcc   %i4, %l0, %g0
+	beq	.f4	
+	nop
+	ld	[%i2+FBASE+REGSIZE*16+4*3 ], %f7
+.f4:
+	sll     %l0, 1, %l0
+	andcc   %i4, %l0, %g0
+	beq	.f5	
+	nop
+	ld	[%i2+FBASE+REGSIZE*16+4*4 ], %f9
+.f5:
+	sll     %l0, 1, %l0
+	andcc   %i4, %l0, %g0
+	beq	.f6	
+	nop
+	ld	[%i2+FBASE+REGSIZE*16+4*5 ], %f11
+.f6:
+	sll     %l0, 1, %l0
+	andcc   %i4, %l0, %g0
+	beq	.f7
+	nop	
+	ld	[%i2+FBASE+REGSIZE*16+4*6 ], %f13
+.f7:
+	sll     %l0, 1, %l0
+	andcc   %i4, %l0, %g0
+	beq	.f8
+	nop	
+	ld	[%i2+FBASE+REGSIZE*16+4*7 ], %f15
+.f8:
+	sll     %l0, 1, %l0
+	andcc   %i4, %l0, %g0
+	beq	.f9
+	nop	
+	ld	[%i2+FBASE+REGSIZE*16+4*8 ], %f17
+.f9:
+	sll     %l0, 1, %l0
+	andcc   %i4, %l0, %g0
+	beq	.f10
+	nop	
+	ld	[%i2+FBASE+REGSIZE*16+4*9 ], %f19
+.f10:
+	sll     %l0, 1, %l0
+	andcc   %i4, %l0, %g0
+	beq	.f11
+	nop	
+	ld	[%i2+FBASE+REGSIZE*16+4*10], %f21
+.f11:
+	sll     %l0, 1, %l0
+	andcc   %i4, %l0, %g0
+	beq	.f12
+	nop	
+	ld	[%i2+FBASE+REGSIZE*16+4*11], %f23
+.f12:
+	sll     %l0, 1, %l0
+	andcc   %i4, %l0, %g0
+	beq	.f13
+	nop	
+	ld	[%i2+FBASE+REGSIZE*16+4*12], %f25
+.f13:
+	sll     %l0, 1, %l0
+	andcc   %i4, %l0, %g0
+	beq	.f14
+	nop	
+	ld	[%i2+FBASE+REGSIZE*16+4*13], %f27
+.f14:
+	sll     %l0, 1, %l0
+	andcc   %i4, %l0, %g0
+	beq	.f15
+	nop	
+	ld	[%i2+FBASE+REGSIZE*16+4*14], %f29
+.f15:
+	sll     %l0, 1, %l0
+	andcc   %i4, %l0, %g0
+	beq	.f_end
+	nop	
+	ld	[%i2+FBASE+REGSIZE*16+4*15], %f31
+.f_end:
+        /* Skip Register Data, do we nee to copy on stack at all? */
+	sub	%i1,  DHEAD, %i1   		/* skip data header. */
+	cmp     %i1, 0
+    ble	.do_call
+    nop
+
+	/* Copy loop: */
+	add     %i2,  DHEAD, %i2		/* i2 = skip data header. */
+	or	%g0, %g0, %l0			/* l0 = offset initialized to 0. */
+	add     %sp,  BIAS+SHEAD, %l2		/* l2 = argument area on stack space (7th word). (64+4+6*4 = byte offset 92). */
+
+.next:
+	ldx     [%i2+%l0],%l1			/* Read from arg buffer(%i2) to %l1. */
+	stx     %l1, [%l2+%l0]			/* Write %l1 to stack space(%l2). */
+	add     %l0, REGSIZE, %l0		/* Increment offset. */
+	sub     %i1, REGSIZE, %i1		/* Decrement copy size. */
+	cmp     %i1, 0
+	bgt     .next
+	nop
+.do_call:
+	call	%i0				/* Call target. */
+    	nop
+	or	%o0, %g0, %i0
+	jmpl	%i7 + 8, %g0
+    	restore
+/*
+	return  %i7 + 8
+	jmpl %i7 + 8, %g0
+        nop
+
+	jmpl %i7 + 8, %g0
+    nop
+    restore
+	ret
+*/
+
+/* 
+    or     %o0, %g0, %i0
+    or     %o1, %g0, %i1
+    or     %o2, %g0, %i2
+    or     %o3, %g0, %i3
+	return  %i7 + 8
+    nop
+
+Changes from v8:
+- fundamental data types
+	- (un)signed int: 8,16,32,64
+	- float: 32,64,128
+- float: IEEE 754 compilant
+	32 32-bit  float registers f0,f1,..,f31
+	32 64-bit  float registers f0,f2,..,f62
+	16 128-bit float registers f0,f4,..,f60
+
+Description:
+We need to raise up a dynamic stack frame.
+Therefore we need to compute the stack size. We do this first, 
+in the context of the caller as a leaf function (using o3 as scratch for addition).
+Then we raise the frame, ending up in o0-o3 is then i0-i3.
+
+
+Stack Layout:
+     BIAS = 2047
+
+   BIAS+XX: should be 16 byte aligned.
+                 ...
+       136: argument overflow area
+       128:  1 extended word  for struct/union poiner return value
+   BIAS+ 0: 16 extended words for registers (in/local) save area [register window]
+
+
+Function Argument Passing:
+- integer %o0..%o5 (caller view).
+- floating-point %f0 .. %f15
+- continuous memory starting at %sp+BIAS+136 (caller view).
+
+Register Usage:
+%fp0..%fp31  : floating-point arguments.
+%sp  or  %o6 : stack pointer, always 8 (or 16?)-byte aligned.
+%fp  or  %i6 : frame pointer.
+%i0  and %o0 : integer and pointer return values.
+%i7  and %o7 : return address. (caller puts return address to %o7, callee uses %i7)
+%fp0 and %fp1: return value (float).
+%i0..%i5     : input argument registers 
+%o0..%o5     : output argument registers
+%g0          : always zero, writes to it have no effect.
+
+Register Mappings:
+r0-7    -> globals
+r8-15   -> outs
+r16-r23 -> locals
+r24-r31 -> ins
+
+Integer Register Overview Table:
+ID      Class   Name    Description
+------------------------------------------------------------------------------
+0	globals	g0	always zero, writes to it have no effect
+1		g1
+2		g2
+3		g3
+4		g4
+5		g5
+6		g6
+7		g7
+8	out	o0	[int/ptr] arg 0 and return
+9		o1	          arg 1
+10		o2	          arg 2
+11		o3	          arg 3
+12              o4                arg 4
+13              o5                arg 5
+14              o6	stack pointer
+15		o7
+16	local	l0	scratch
+17		l1
+18		l2
+19		l3
+20		l4
+21		l5
+22		l6
+23		l7
+24	in	i0	[int/pt] arg 0 and return
+25		i1
+26		i2
+27		i3
+28		i4
+29		i5
+30		i6	frame pointer
+31		i7
+*/
+