changeset 192:cf8134a20759

- b/c no preproc needed, changed some files from .S -> .s - some format/whitespace cleanup for readability, some declutter
author Tassilo Philipp
date Tue, 14 Mar 2017 14:14:47 +0100
parents 2f7a7f3472cb
children 600bd90afdb7
files dyncall/dyncall_call.S dyncall/dyncall_call_sparc.S dyncall/dyncall_call_sparc.s dyncall/dyncall_call_sparc64.S dyncall/dyncall_call_sparc64.s
diffstat 5 files changed, 374 insertions(+), 417 deletions(-) [+]
line wrap: on
line diff
--- a/dyncall/dyncall_call.S	Fri Mar 17 03:27:36 2017 +0100
+++ b/dyncall/dyncall_call.S	Tue Mar 14 14:14:47 2017 +0100
@@ -69,9 +69,9 @@
 #  elif defined(DC__Arch_MIPS) || defined(DC__Arch_MIPS64)
 #    include "dyncall_call_mips_gas.S"
 #  elif defined(DC__Arch_Sparc64)
-#    include "dyncall_call_sparc64.S"
+#    include "dyncall_call_sparc64.s"
 #  elif defined(DC__Arch_Sparc)
-#    include "dyncall_call_sparc.S"
+#    include "dyncall_call_sparc.s"
 #  else
 #    error Unsupported Architecture.
 #  endif
--- a/dyncall/dyncall_call_sparc.S	Fri Mar 17 03:27:36 2017 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,193 +0,0 @@
-/*
-
- Package: dyncall
- Library: dyncall
- File: dyncall/dyncall_call_sparc.S
- Description: Call kernel for sparc processor architecture.
- License:
-
-   Copyright (c) 2011-2015 Daniel Adler <dadler@uni-goettingen.de>
-
-   Permission to use, copy, modify, and distribute this software for any
-   purpose with or without fee is hereby granted, provided that the above
-   copyright notice and this permission notice appear in all copies.
-
-   THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
-   WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
-   MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
-   ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
-   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
-   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-
-*/
-
-
-
-
-/* --------------------------------------------------------------------------- 
-
-call kernel for sparc 32-bit
-----------------------------
-tested on linux/debian [gcc54.fsffrance.org - thanx to the farm!] 
-
-new C Interface:
-  void dcCall_sparc (DCCallVM* callvm, DCpointer target);
-                     %i0               %1 
-
-we need to do that, due to the special property of sparc, its 'register windows'
-that propagate input registers..
-otherwise, we would have a 'void' return-value layer which results in failure
-to propagate back return values.
-instead of implementing 'dummy'-C return-values, we call directly.
-
-in sparc, this is simply a leaf-function layer using %o3.
-
-old C Interface:
-  void dcCall_sparc (DCpointer target, DCsize size, DCpointer data);
-		     %i0             , %i1        , %i2
-
-
-Input:
-  i0   callvm
-  i1   target
-
-old Input:
-  i0   target
-  i1   size
-  i2   data
-
-Description:
-We need to raise a dynamic stack frame.
-Therefore we need to compute the stack size in the context of the caller as a leaf note (using o3 in addition).
-Then we raise the frame.
-
-sparc:
-- big endian
-
-sparc V8:
-- integer/pointer: 32 32-bit integers.
-- float: 8 quad precision, 16 double precision, 32 single precision.
-
-sparc V9:
-- integer/pointer: 32 64-bit integers.
-
-plan9:
-- completely different scheme - similar to mips/plan9.
-- registers are named r0 .. r31
-  r1 stack pointer
-  r2 static base register
-  .. to be continued..
-
-Stack Layout 32-Bit Model:
-- sp+92 seventh argument
-- sp+68 first argument
-- sp+64 
-- 16 registers save area (in/local).
-
-	XX: should be 8 byte aligned (min stack frame size is 96).
-	            ...
-	92: on stack argument 6
-	88: input argument 5 spill
-	            ...
-	68: input argument 0 spill
-	64: struct/union pointer return value
-	 0: 16 registers save area
-
-Stack Layout 64-Bit Model:
-        XX: should be 16 byte aligned (min stack frame size is 172).
-       168: on stack argument 6
-       136: input argument 0 spill
-       128: struct/union poiner return value
-	 0: 16 registers save area
-
-
-
-Register Usage:
-%sp or  %o6: stack pointer, always 8 (or 16?)-byte aligned.
-%fp or  %i6: frame pointer.
-%i0 and %o0: integer and pointer return values.
-%i7 and %o7: return address. (caller puts return address to %o7, callee uses %i7)
-%f0 and %f1: return value (float).
-%i0..%i5:    input argument registers 
-%o0..%o5:    output argument registers
-%g0:         always zero, writes to it have no effect.
-
-Register Mappings:
-r0-7    -> globals
-r8-15   -> outs
-r16-r23 -> locals
-r24-r31 -> ins
-
-*/
-
-#if defined(DC__Arch_Sparc64) /* @@@ this entire block can even be removed, b/c this file is about 32 bit sparc */
-#define REGSIZE 8
-#error invalid arch
-#else
-#define REGSIZE 4
-#endif
-
-#define ALIGN   16
-CALLVM_size    = 12
-CALLVM_dataoff = 16
-.global dcCall_sparc
-dcCall_sparc:
-
-/* Basic Prolog: supports up to 6 arguments. */
-
-	/* new C interface */
-	/* o0-1: callvm,target */
-	
-	or   %o0, %g0, %o3	       /* %o3: callvm */
-	or   %o1, %g0, %o0	       /* %o0: target */
-	ld  [%o3+CALLVM_size], %o1     /* %o1: size */
-	add  %o3, CALLVM_dataoff, %o2  /* %o2: data */
-	/*o0-2:target,size,data*/
-
-	/*leaf functions: may use the first six output registers.*/
-	/*o3-5:free to use */
-
-	/* Compute a matching stack size (approximate): o3 = align(92+o1,16) */
-
-	add     %o1, (16+1+6)*REGSIZE+ALIGN-1, %o3
-	and     %o3,   -ALIGN, %o3
-	neg     %o3
-	
-	/* Prolog. */
-	save	%sp, %o3, %sp	/* min stack size (16+1+6)*sizeof(ptr)=92 paddded to 8-byte alignment => min frame size of 96 bytes. */
-	
-	/* Load output registers. */
-
-	ld	[%i2           ],%o0
-	ld	[%i2+REGSIZE*1 ],%o1
-	ld	[%i2+REGSIZE*2 ],%o2
-	ld	[%i2+REGSIZE*3 ],%o3
-	ld	[%i2+REGSIZE*4 ],%o4
-	ld	[%i2+REGSIZE*5 ],%o5
-
-	/* Copy on stack? */
-	sub	%i1, REGSIZE*6, %i1   		/* i1 = decrement copy size by 6 regs (=6 regs x 4 bytes = 24 bytes total). */
-	cmp %i1, 0
-    ble .do_call
-	nop
-
-	/* Copy loop: */
-	add     %i2, REGSIZE*6, %i2		/* i2 = address of 7th word of args buffer. */
-	or      %g0, %g0, %l0			/* l0 = offset initialized to 0. */
-	add     %sp, (16+1+6)*REGSIZE, %l2	/* l2 = argument area on stack space (7th word). (64+4+6*4 = byte offset 92). */
-.next:
-	ld      [%i2+%l0],%l1			/* Read from arg buffer(%i2) to %l1. */
-	st      %l1, [%l2+%l0]			/* Write %l1 to stack space(%l2). */
-	add     %l0, REGSIZE, %l0		/* Increment offset. */
-	sub     %i1, REGSIZE, %i1		/* Decrement copy size. */
-	cmp     %i1, 0
-	bgt     .next
-	nop
-.do_call:
-	call    %i0						/* Call target. */
-	nop
-	or      %o0, %g0, %i0
-	or      %o1, %g0, %i1
-	jmpl	%i7 + 8, %g0
-	restore
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/dyncall/dyncall_call_sparc.s	Tue Mar 14 14:14:47 2017 +0100
@@ -0,0 +1,188 @@
+/*
+
+ Package: dyncall
+ Library: dyncall
+ File: dyncall/dyncall_call_sparc.S
+ Description: Call kernel for sparc processor architecture.
+ License:
+
+   Copyright (c) 2011-2015 Daniel Adler <dadler@uni-goettingen.de>
+
+   Permission to use, copy, modify, and distribute this software for any
+   purpose with or without fee is hereby granted, provided that the above
+   copyright notice and this permission notice appear in all copies.
+
+   THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+   WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+   MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+   ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+*/
+
+
+
+
+/* --------------------------------------------------------------------------- 
+
+@@@ this should all go in manual
+
+call kernel for sparc 32-bit
+----------------------------
+tested on linux/debian [gcc54.fsffrance.org - thanx to the farm!] 
+
+new C Interface:
+  void dcCall_sparc (DCCallVM* callvm, DCpointer target);
+                     %i0               %1 
+
+we need to do that, due to the special property of sparc, its 'register windows'
+that propagate input registers..
+otherwise, we would have a 'void' return-value layer which results in failure
+to propagate back return values.
+instead of implementing 'dummy'-C return-values, we call directly.
+
+in sparc, this is simply a leaf-function layer using %o3.
+
+old C Interface:
+  void dcCall_sparc (DCpointer target, DCsize size, DCpointer data);
+                     %i0             , %i1        , %i2
+
+
+Input:
+  i0   callvm
+  i1   target
+
+old Input:
+  i0   target
+  i1   size
+  i2   data
+
+Description:
+We need to raise a dynamic stack frame.
+Therefore we need to compute the stack size in the context of the caller as a leaf note (using o3 in addition).
+Then we raise the frame.
+
+sparc:
+- big endian
+
+sparc V8:
+- integer/pointer: 32 32-bit integers.
+- float: 8 quad precision, 16 double precision, 32 single precision.
+
+sparc V9:
+- integer/pointer: 32 64-bit integers.
+
+plan9:
+- completely different scheme - similar to mips/plan9.
+- registers are named r0 .. r31
+  r1 stack pointer
+  r2 static base register
+  .. to be continued..
+
+Stack Layout 32-Bit Model:
+- sp+92 seventh argument
+- sp+68 first argument
+- sp+64 
+- 16 registers save area (in/local).
+
+	XX: should be 8 byte aligned (min stack frame size is 96).
+	            ...
+	92: on stack argument 6
+	88: input argument 5 spill
+	            ...
+	68: input argument 0 spill
+	64: struct/union pointer return value
+	 0: 16 registers save area
+
+Stack Layout 64-Bit Model:
+	 XX: should be 16 byte aligned (min stack frame size is 172).
+	168: on stack argument 6
+	136: input argument 0 spill
+	128: struct/union poiner return value
+	  0: 16 registers save area
+
+
+
+Register Usage:
+%sp or  %o6: stack pointer, always 8 (or 16?)-byte aligned.
+%fp or  %i6: frame pointer.
+%i0 and %o0: integer and pointer return values.
+%i7 and %o7: return address. (caller puts return address to %o7, callee uses %i7)
+%f0 and %f1: return value (float).
+%i0..%i5:    input argument registers 
+%o0..%o5:    output argument registers
+%g0:         always zero, writes to it have no effect.
+
+Register Mappings:
+r0-7    -> globals
+r8-15   -> outs
+r16-r23 -> locals
+r24-r31 -> ins
+
+*/
+
+.set REGSIZE,         4
+.set ALIGN,          16
+.set CALLVM_size,    12
+.set CALLVM_dataoff, 16
+
+.text
+.global dcCall_sparc
+dcCall_sparc:
+
+/* Basic Prolog: supports up to 6 arguments. */
+
+	/* o0-1: callvm,target */
+	or   %o0, %g0, %o3             /* %o3: callvm */
+	or   %o1, %g0, %o0             /* %o0: target */
+	ld  [%o3+CALLVM_size], %o1     /* %o1: size */
+	add  %o3, CALLVM_dataoff, %o2  /* %o2: data */
+	/*o0-2:target,size,data*/
+
+	/*leaf functions: may use the first six output registers.*/
+	/*o3-5:free to use */
+
+	/* Compute a matching stack size (approximate): o3 = align(92+o1,16) */
+	add     %o1, (16+1+6)*REGSIZE+ALIGN-1, %o3
+	and     %o3,   -ALIGN, %o3
+	neg     %o3
+
+	/* Prolog. */
+	save    %sp, %o3, %sp    /* min stack size (16+1+6)*sizeof(ptr)=92 paddded to 8-byte alignment => min frame size of 96 bytes. */
+
+	/* Load output registers. */
+	ld      [%i2           ],%o0
+	ld      [%i2+REGSIZE*1 ],%o1
+	ld      [%i2+REGSIZE*2 ],%o2
+	ld      [%i2+REGSIZE*3 ],%o3
+	ld      [%i2+REGSIZE*4 ],%o4
+	ld      [%i2+REGSIZE*5 ],%o5
+
+	/* Copy on stack? */
+	sub     %i1, REGSIZE*6, %i1   /* i1 = decrement copy size by 6 regs (=6 regs x 4 bytes = 24 bytes total). */
+	cmp     %i1, 0
+	ble     .do_call
+	nop
+
+	/* Copy loop: */
+	add     %i2, REGSIZE*6, %i2         /* i2 = address of 7th word of args buffer. */
+	or      %g0, %g0, %l0               /* l0 = offset initialized to 0. */
+	add     %sp, (16+1+6)*REGSIZE, %l2  /* l2 = argument area on stack space (7th word). (64+4+6*4 = byte offset 92). */
+.next:
+	ld      [%i2+%l0],%l1      /* Read from arg buffer(%i2) to %l1. */
+	st      %l1, [%l2+%l0]     /* Write %l1 to stack space(%l2). */
+	add     %l0, REGSIZE, %l0  /* Increment offset. */
+	sub     %i1, REGSIZE, %i1  /* Decrement copy size. */
+	cmp     %i1, 0
+	bgt     .next
+	nop
+.do_call:
+	call    %i0   /* Call target. */
+	nop
+	or      %o0, %g0, %i0
+	or      %o1, %g0, %i1
+	jmpl    %i7 + 8, %g0       /* optimized restore;retl;nop */
+	restore
+
--- a/dyncall/dyncall_call_sparc64.S	Fri Mar 17 03:27:36 2017 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,222 +0,0 @@
-/*
-
- Package: dyncall
- Library: dyncall
- File: dyncall/dyncall_call_sparc64.S
- Description: Call kernel for sparc64 (v9) ABI.
- License:
-
-   Copyright (c) 2011-2015 Daniel Adler <dadler@uni-goettingen.de>
-
-   Permission to use, copy, modify, and distribute this software for any
-   purpose with or without fee is hereby granted, provided that the above
-   copyright notice and this permission notice appear in all copies.
-
-   THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
-   WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
-   MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
-   ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
-   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
-   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-
-*/
-
-
-#define BIAS 2047
-.global dcCall_v9
-	/* dcCall_sparc64( DCCallVM* , void * target ) */ 
-        /*                 o0          o1              */
-dcCall_v9:
-	or   %o0, %g0, %o3  /* o3: callvm   */
-	or   %o1, %g0, %o0  /* o0: target   */
-	ldx  [%o3+24], %o1  /* o1: mVecSize */
-	add   %o3, 32, %o2  /* o2: stack    */
-	/* Compute a matching stack size (approximate): o3 = align(o1+136,16) */
-
-	add  %o1, (16+1+6)*8+15, %o3
-	and  %o3, -16, %o3
-	neg  %o3            /* o3: -stacksize */
-	save %sp, %o3, %sp	
-
-	ldd     [%i2+8*0 ],%f0		/* Load double-precision float registers. */
-	ldd     [%i2+8*1 ],%f2
-	ldd     [%i2+8*2 ],%f4
-	ldd     [%i2+8*3 ],%f6
-	ldd     [%i2+8*4 ],%f8
-	ldd     [%i2+8*5 ],%f10
-	ldd     [%i2+8*6 ],%f12
-	ldd     [%i2+8*7 ],%f14
-	ldd     [%i2+8*8 ],%f16
-        ldd     [%i2+8*9 ],%f18
-	ldd     [%i2+8*10],%f20
-	ldd     [%i2+8*11],%f22
-	ldd     [%i2+8*12],%f24
-	ldd     [%i2+8*13],%f26
-	ldd     [%i2+8*14],%f28
-	ldd     [%i2+8*15],%f30
-	ldx	[%i2+8*0],%o0		/* Load output registers. */
-	ldx	[%i2+8*1],%o1
-	ldx	[%i2+8*2],%o2
-	ldx	[%i2+8*3],%o3
-	ldx	[%i2+8*4],%o4
-	ldx	[%i2+8*5],%o5
-	sub     %i1, 48, %i1
-	cmp     %i1, 0
-    	ble	.do_call
-    	nop
-	/* Copy loop: */
-	add     %i2, 48, %i2	/* skip homing area */
-	or	%g0, %g0, %l0			/* l0 = offset initialized to 0. */
-	add     %sp, BIAS+((16+6)*8), %l2	/* l2 = argument area on stack space (7th word). (64+4+6*4 = byte offset 92). */
-.next:
-	ldx     [%i2+%l0],%l1	/* Read from arg buffer(%i2) to %l1. */
-	stx     %l1, [%l2+%l0]	/* Write %l1 to stack space(%l2). */
-	add     %l0, 8, %l0	/* Increment offset. */
-	sub     %i1, 8, %i1	/* Decrement copy size. */
-	cmp     %i1, 0
-	bgt     .next
-	nop
-.do_call:
-	call	%i0				/* Call target. */
-    	nop
-	or	%o0, %g0, %i0
-	jmpl	%i7 + 8, %g0
-    	restore
-
-/* 
-    or     %o0, %g0, %i0
-    or     %o1, %g0, %i1
-    or     %o2, %g0, %i2
-    or     %o3, %g0, %i3
-	return  %i7 + 8
-    nop
-
-Changes from v8:
-- fundamental data types
-	- (un)signed int: 8,16,32,64
-	- float: 32,64,128
-- float: IEEE 754 compilant
-	32 32-bit  float registers f0,f1,..,f31
-	32 64-bit  float registers f0,f2,..,f62
-	16 128-bit float registers f0,f4,..,f60
-
-Description:
-We need to raise up a dynamic stack frame.
-Therefore we need to compute the stack size. We do this first, 
-in the context of the caller as a leaf function (using o3 as scratch for addition).
-Then we raise the frame, ending up in o0-o3 is then i0-i3.
-
-
-Stack Layout:
-     BIAS = 2047
-
-   BIAS+XX: should be 16 byte aligned.
-                 ...
-       136: argument overflow area
-       128:  1 extended word  for struct/union poiner return value
-   BIAS+ 0: 16 extended words for registers (in/local) save area [register window]
-
-
-Function Argument Passing:
-- integer %o0..%o5 (caller view).
-- floating-point %f0 .. %f15
-- continuous memory starting at %sp+BIAS+136 (caller view).
-
-Register Usage:
-%fp0..%fp31  : floating-point arguments.
-%sp  or  %o6 : stack pointer, always 8 (or 16?)-byte aligned.
-%fp  or  %i6 : frame pointer.
-%i0  and %o0 : integer and pointer return values.
-%i7  and %o7 : return address. (caller puts return address to %o7, callee uses %i7)
-%fp0 and %fp1: return value (float).
-%i0..%i5     : input argument registers 
-%o0..%o5     : output argument registers
-%g0          : always zero, writes to it have no effect.
-
-Register Mappings:
-r0-7    -> globals
-r8-15   -> outs
-r16-r23 -> locals
-r24-r31 -> ins
-
-Integer Register Overview Table:
-ID      Class   Name    Description
-------------------------------------------------------------------------------
-0	globals	g0	always zero, writes to it have no effect
-1		g1
-2		g2
-3		g3
-4		g4
-5		g5
-6		g6
-7		g7
-8	out	o0	[int/ptr] arg 0 and return
-9		o1	          arg 1
-10		o2	          arg 2
-11		o3	          arg 3
-12              o4                arg 4
-13              o5                arg 5
-14              o6	stack pointer
-15		o7
-16	local	l0	scratch
-17		l1
-18		l2
-19		l3
-20		l4
-21		l5
-22		l6
-23		l7
-24	in	i0	[int/pt] arg 0 and return
-25		i1
-26		i2
-27		i3
-28		i4
-29		i5
-30		i6	frame pointer
-31		i7
-*/
-
-/* --------------------------------------------------------------------------- 
-
-call kernel for sparc64 v9 abi
-tested on sparc64/linux/debian [gcc54.fsffrance.org - thanx to the farm!] 
-
-new C Interface:
-  void dcCall_sparc (DCCallVM* callvm, DCpointer target);
-                     %i0               %1 
-
-see dyncall_call_sparc.S for details.
-
-old C Interface:
-  void dcCall_sparc (DCpointer target, DCsize size, DCpointer data);
-		     %i0             , %i1        , %i2
-Input:
-  i0   target
-  i1   size
-  i2   data
-
-*/
-
-#if 0
-
-
-#define REGSIZE 8
-
-
-
-#define SHEAD ((16+6)*8)
-#define ALIGN 16 
-#define IREGS 6
-#define FREGS 16
-#define SREGS 16
-#define IBASE  0
-#define FBASE  (IREGS*8)
-
-// #define DHEAD  ((IREGS+FREGS)*8)+SREGS*4
-
-CALLVM_regdata = 72
-CALLVM_size    = 208
-CALLVM_buffer  = 216
-
-#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/dyncall/dyncall_call_sparc64.s	Tue Mar 14 14:14:47 2017 +0100
@@ -0,0 +1,184 @@
+/*
+
+ Package: dyncall
+ Library: dyncall
+ File: dyncall/dyncall_call_sparc64.S
+ Description: Call kernel for sparc64 (v9) ABI.
+ License:
+
+   Copyright (c) 2011-2015 Daniel Adler <dadler@uni-goettingen.de>
+
+   Permission to use, copy, modify, and distribute this software for any
+   purpose with or without fee is hereby granted, provided that the above
+   copyright notice and this permission notice appear in all copies.
+
+   THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+   WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+   MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+   ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+*/
+
+
+/* NOTE: %sp/%fp for v9 are offset, using them needs a "BIAS" of 2047 */
+.set BIAS, 2047
+
+.text
+.global dcCall_v9
+	/* dcCall_sparc64( DCCallVM* , void * target ) */ 
+        /*                 o0          o1              */
+dcCall_v9:
+	or      %o0, %g0, %o3  /* o3: callvm   */
+	or      %o1, %g0, %o0  /* o0: target   */
+	ldx     [%o3+24], %o1  /* o1: mVecSize */
+	add     %o3, 32,  %o2  /* o2: stack    */
+	/* Compute a matching stack size (approximate): o3 = align(o1+136,16) */
+
+	add     %o1, (16+1+6)*8+15, %o3
+	and     %o3, -16, %o3
+	neg     %o3            /* o3: -stacksize */
+	save    %sp, %o3, %sp
+
+	ldd     [%i2+8*0 ], %f0  /* Load double-precision float registers. */
+	ldd     [%i2+8*1 ], %f2
+	ldd     [%i2+8*2 ], %f4
+	ldd     [%i2+8*3 ], %f6
+	ldd     [%i2+8*4 ], %f8
+	ldd     [%i2+8*5 ], %f10
+	ldd     [%i2+8*6 ], %f12
+	ldd     [%i2+8*7 ], %f14
+	ldd     [%i2+8*8 ], %f16
+	ldd     [%i2+8*9 ], %f18
+	ldd     [%i2+8*10], %f20
+	ldd     [%i2+8*11], %f22
+	ldd     [%i2+8*12], %f24
+	ldd     [%i2+8*13], %f26
+	ldd     [%i2+8*14], %f28
+	ldd     [%i2+8*15], %f30
+	ldx     [%i2+8*0 ], %o0  /* Load output registers. */
+	ldx     [%i2+8*1 ], %o1
+	ldx     [%i2+8*2 ], %o2
+	ldx     [%i2+8*3 ], %o3
+	ldx     [%i2+8*4 ], %o4
+	ldx     [%i2+8*5 ], %o5
+	sub     %i1, 48, %i1
+	cmp     %i1, 0
+	ble     .do_call
+	nop
+	/* Copy loop: */
+	add     %i2, 48, %i2   /* skip homing area */
+	or      %g0, %g0, %l0  /* l0 = offset initialized to 0. */
+	add     %sp, BIAS+((16+6)*8), %l2  /* l2 = argument area on stack space (7th word). (64+4+6*4 = byte offset 92). */
+.next:
+	ldx     [%i2+%l0],%l1   /* Read from arg buffer(%i2) to %l1. */
+	stx     %l1, [%l2+%l0]  /* Write %l1 to stack space(%l2). */
+	add     %l0, 8, %l0     /* Increment offset. */
+	sub     %i1, 8, %i1     /* Decrement copy size. */
+	cmp     %i1, 0
+	bgt     .next
+	nop
+.do_call:
+	call    %i0  /* Call target. */
+	nop
+	or      %o0, %g0, %i0
+	jmpl    %i7 + 8, %g0    /* optimized restore;retl;nop */
+	restore
+
+/* 
+@@@ all of this needs to go in manual
+
+	or     %o0, %g0, %i0
+	or     %o1, %g0, %i1
+	or     %o2, %g0, %i2
+	or     %o3, %g0, %i3
+	return  %i7 + 8
+	nop
+
+Changes from v8:
+- fundamental data types
+	- (un)signed int: 8,16,32,64
+	- float: 32,64,128
+- float: IEEE 754 compilant
+	32 32-bit  float registers f0,f1,..,f31
+	32 64-bit  float registers f0,f2,..,f62
+	16 128-bit float registers f0,f4,..,f60
+
+Description:
+We need to raise up a dynamic stack frame.
+Therefore we need to compute the stack size. We do this first, 
+in the context of the caller as a leaf function (using o3 as scratch for addition).
+Then we raise the frame, ending up in o0-o3 is then i0-i3.
+
+
+Stack Layout:
+   BIAS = 2047
+
+   BIAS+XX: should be 16 byte aligned.
+                 ...
+       136: argument overflow area
+       128:  1 extended word  for struct/union poiner return value
+   BIAS+ 0: 16 extended words for registers (in/local) save area [register window]
+
+
+Function Argument Passing:
+- integer %o0..%o5 (caller view).
+- floating-point %f0 .. %f15
+- continuous memory starting at %sp+BIAS+136 (caller view).
+
+Register Usage:
+%fp0..%fp31  : floating-point arguments.
+%sp  or  %o6 : stack pointer, always 8 (or 16?)-byte aligned.
+%fp  or  %i6 : frame pointer.
+%i0  and %o0 : integer and pointer return values.
+%i7  and %o7 : return address. (caller puts return address to %o7, callee uses %i7)
+%fp0 and %fp1: return value (float).
+%i0..%i5     : input argument registers 
+%o0..%o5     : output argument registers
+%g0          : always zero, writes to it have no effect.
+
+Register Mappings:
+r0-7    -> globals
+r8-15   -> outs
+r16-r23 -> locals
+r24-r31 -> ins
+
+Integer Register Overview Table:
+ID   Class   Name   Description
+------------------------------------------------------------------------------
+0   globals   g0    always zero, writes to it have no effect
+1             g1
+2             g2
+3             g3
+4             g4
+5             g5
+6             g6
+7             g7
+8   out       o0    [int/ptr] arg 0 and return
+9             o1              arg 1
+10            o2              arg 2
+11            o3              arg 3
+12            o4              arg 4
+13            o5              arg 5
+14            o6    stack pointer
+15            o7
+16  local     l0    scratch
+17            l1
+18            l2
+19            l3
+20            l4
+21            l5
+22            l6
+23            l7
+24  in        i0    [int/pt] arg 0 and return
+25            i1
+26            i2
+27            i3
+28            i4
+29            i5
+30            i6    frame pointer
+31            i7
+*/
+