diff doc/manual/callconvs/callconv_arm64.tex @ 480:cc78e34958e5

- arm64 doc additions w/ respect to aggregates, as well as fbsd and win disas examples
author Tassilo Philipp
date Tue, 01 Mar 2022 21:02:10 +0100
parents b47168dacba6
children 0fc22b5feac7
line wrap: on
line diff
--- a/doc/manual/callconvs/callconv_arm64.tex	Tue Mar 01 09:29:21 2022 +0100
+++ b/doc/manual/callconvs/callconv_arm64.tex	Tue Mar 01 21:02:10 2022 +0100
@@ -1,6 +1,6 @@
 %//////////////////////////////////////////////////////////////////////////////
 %
-% Copyright (c) 2014-2020 Daniel Adler <dadler@uni-goettingen.de>, 
+% Copyright (c) 2014-2022 Daniel Adler <dadler@uni-goettingen.de>, 
 %                         Tassilo Philipp <tphilipp@potion-studios.com>
 %
 % Permission to use, copy, modify, and distribute this software for any
@@ -17,9 +17,6 @@
 %
 %//////////////////////////////////////////////////////////////////////////////
 
-% ==================================================
-% ARM64
-% ==================================================
 \subsection{ARM64 Calling Conventions}
 
 \paragraph{Overview}
@@ -62,10 +59,10 @@
 {\bf x30}     & permanent, link register\\
 {\bf sp}      & permanent, stack pointer\\
 {\bf pc}      & program counter\\
-{\bf v0}      & scratch, first float parameter, float return value\\
-{\bf v1-v7}   & scratch, float parameters\\
+{\bf v0-v7}   & scratch, float parameters, return value\\
 {\bf v8-v15}  & lower 64 bits are permanent, scratch\\
 {\bf v16-v31} & scratch\\
+{\bf xzr}     & zero register, always zero\\
 \end{tabular*}
 \caption{Register usage on arm64}
 \end{table}
@@ -78,18 +75,24 @@
 \item first 8 integer arguments are passed using x0-x7
 \item first 8 floating point arguments are passed using d0-d7
 \item subsequent parameters are pushed onto the stack
-\item if the callee takes the address of one of the parameters and uses it to address other parameters (e.g. varargs) it has to copy - in its prolog - the first 8 integer and 8 floating-point registers to a reserved stack area adjacent to the other parameters on the stack (only the unnamed integer parameters require saving, though)
-\item structures and unions up to 16 bytes in size are passed by value (after rounding up the size to the nearest multiple of 8), as a sequence of dwords
-\item for a structure or union larger than 16 bytes in size, a pointer to it is passed
-\item if return value is a structure, a pointer pointing to the return value's space is passed in r0, the first parameter in r1, etc... (see {\bf return values})
-\item stack is required to be throughout eight-byte aligned
+\item if the callee takes the address of one of the parameters and uses it to address other parameters (e.g. varargs) it has to copy - in its prolog - the first 8 integer
+and 8 floating-point registers to a reserved stack area adjacent to the other parameters on the stack (only the unnamed integer parameters require saving, though)
+\item aggregates (struct, union) with 1 to 4 identical floating-point members (either float or double) are passed field-by-field (8-byte aligned if passed via stack), except if passed as a vararg
+\item other aggregates (struct, union) \textgreater\ 16 bytes in size are passed indirectly, as a pointer to a copy (if needed)
+\item all other aggregates (struct, union), after rounding up the size to the nearest multiple of 8, are passed as a sequence of dwords, like integers
+\item aggregates are never split across registers and stack, so if not enough registers are available an aggregated is passed via the stack (for aggregates that
+would've been passed as floating point values, and any still unused float registers will be skipped for any subsequent arg)
+\item stack is required throughout to be eight-byte aligned
 \end{itemize}
 
 \paragraph{Return values}
+
 \begin{itemize}
 \item integer return values use x0
 \item floating-point return values use d0
-\item otherwise, the caller allocates space, passes pointer to it to the callee through x8, and callee writes return value to this space
+\item aggregates (struct, union) that would be passed via registers if passed as a first param, are returned via those registers
+\item otherwise (e.g. if regs exhausted, or \textgreater\ 16b, ...), the caller allocates space, passes pointer to it to the callee through
+x8, and callee writes return value to this space (note that this is not a hidden first param, as x8 is not used for passing params); the ptr to the aggregate is returned in x0
 \end{itemize}
 
 \paragraph{Stack layout}
@@ -116,7 +119,7 @@
                                          & \ldots                 &                                      &                              \\
                                          & q0                     &                                      &                              \\
 \hhline{~-~~}                                                                             
-register save area (with return address) &                        &                                      &                              \\ % fp will point here (to 1st arg) @@@ verify
+register save area (with return address) &                        &                                      &                              \\ % fp will point here (to 1st arg)
 \hhline{~-~~}                                                                             
 local data                               &                        &                                      &                              \\
 \hhline{~-~~}                                                                             
@@ -138,6 +141,11 @@
 \begin{itemize}
 \item arguments passed via stack use only the space they need, but are subject to type alignment requirements (which is 1 byte for char and bool, 2 for short, 4 for int and 8 for every other type)
 \item caller is required to sign and zero-extend arguments smaller than 32bits
+\item empty aggregates (allowed in C++, but non-standard in C, however compiler extensions exist) as parameters:
+\begin{itemize}
+\item allowed to be ignored in C
+\item allowed to be ignored in C++, if aggregate is trivial, otherwise it's treated as an aggregate with one byte field
+\end{itemize}
 \end{itemize}
 
 
@@ -150,5 +158,6 @@
 
 \begin{itemize}
 \item variadic function calls do not use any SIMD or floating point registers (for fixed and variable args), meaning first 8 params are passed via x0-x7, the rest via the stack
+\item a function that returns an aggregate indirectly via a pointer passed to via x8 does not seem to be required to put that address in x0 on return (but should be safe to do so)
 \end{itemize}