changeset 533:71c884e610f0

- integration of patches from Raphael Luba, Thekla, Inc.: * integration of aggregate-by-value (struct, union) support patch for x64 (win and sysv) * windows/x64 asm additions to specify how stack unwinds (help for debuggers, exception handling, etc.) * see Changelog for details - new calling convention modes for thiscalls (platform agnostic, was specific before) * new signature character for platform agnostic thiscalls ('*' / DC_SIGCHAR_CC_THISCALL) - dcCallF(), dcVCallF(), dcArgF() and dcVArgF(): * added support for aggregates-by-value (wasn't part of patch) * change that those functions don't implicitly call dcReset() anymore, which was unflexible (breaking change) - added macros to feature test implementation for aggregate-by-value and syscall support - changed libdyncall_s.lib and libdyncallback_s.lib order in callback test makefiles, as some toolchains are picky about order - doc: * man page updates to describe aggregate interface * manual overview changes to highlight platforms with aggregate-by-value support - test/plain: replaced tests w/ old/stale sctruct interface with new aggregate one
author Tassilo Philipp
date Thu, 21 Apr 2022 13:35:47 +0200
parents d4bf63ab9164
children 48eede2fa034
files ChangeLog doc/manual/manual_dyncall_api.tex doc/manual/manual_dyncallback_api.tex doc/manual/manual_overview.tex dyncall/CMakeLists.txt dyncall/Makefile.embedded dyncall/Makefile.generic dyncall/Nmakefile dyncall/dyncall.3 dyncall/dyncall.h dyncall/dyncall_aggregate.c dyncall/dyncall_aggregate.h dyncall/dyncall_aggregate_x64.c dyncall/dyncall_api.c dyncall/dyncall_call_x64.S dyncall/dyncall_call_x64_generic_masm.asm dyncall/dyncall_callf.c dyncall/dyncall_callvm.h dyncall/dyncall_callvm_arm32_arm.c dyncall/dyncall_callvm_arm32_arm_armhf.c dyncall/dyncall_callvm_arm32_thumb.c dyncall/dyncall_callvm_arm64.c dyncall/dyncall_callvm_arm64_apple.c dyncall/dyncall_callvm_mips_eabi.c dyncall/dyncall_callvm_mips_n32.c dyncall/dyncall_callvm_mips_n64.c dyncall/dyncall_callvm_mips_o32.c dyncall/dyncall_callvm_ppc32.c dyncall/dyncall_callvm_ppc64.c dyncall/dyncall_callvm_sparc.c dyncall/dyncall_callvm_sparc64.c dyncall/dyncall_callvm_x64.c dyncall/dyncall_callvm_x64.h dyncall/dyncall_callvm_x86.c dyncall/dyncall_signature.h dyncall/dyncall_struct.c dyncall/dyncall_struct.h dyncall/mkfile dyncallback/dyncall_args.h dyncallback/dyncall_args_arm32.c dyncallback/dyncall_args_arm64.c dyncallback/dyncall_args_arm64_apple.c dyncallback/dyncall_args_mips.c dyncallback/dyncall_args_mips64.c dyncallback/dyncall_args_mips_o32.c dyncallback/dyncall_args_ppc32.c dyncallback/dyncall_args_ppc32_sysv.c dyncallback/dyncall_args_ppc64.c dyncallback/dyncall_args_sparc32.c dyncallback/dyncall_args_sparc64.c dyncallback/dyncall_args_x64.c dyncallback/dyncall_args_x64.h dyncallback/dyncall_args_x86.c dyncallback/dyncall_callback.c dyncallback/dyncall_callback.h dyncallback/dyncall_callback_arm32.c dyncallback/dyncall_callback_arm64.c dyncallback/dyncall_callback_mips.c dyncallback/dyncall_callback_ppc32.c dyncallback/dyncall_callback_ppc64.c dyncallback/dyncall_callback_sparc32.c dyncallback/dyncall_callback_sparc64.c dyncallback/dyncall_callback_x64.S dyncallback/dyncall_callback_x64.c dyncallback/dyncall_callback_x64_masm.asm dyncallback/dyncall_callback_x86.c dyncallback/dyncallback.3 portasm/portasm-x64.S test/callback_plain/CMakeLists.txt test/callback_plain/Makefile.embedded test/callback_plain/Makefile.generic test/callback_plain/callback_plain.c test/callback_plain/mkfile test/callback_suite/CMakeLists.txt test/callback_suite/Makefile.embedded test/callback_suite/Makefile.generic test/callback_suite/main.c test/callback_suite_aggrs/CMakeLists.txt test/callback_suite_aggrs/Makefile.embedded test/callback_suite_aggrs/Makefile.generic test/callback_suite_aggrs/main.c test/callback_suite_aggrs/mkfile test/callf/main.c test/malloc_wx/Nmakefile test/plain/CMakeLists.txt test/plain/Makefile.embedded test/plain/Makefile.generic test/plain/Nmakefile test/plain/mkfile test/plain/test_aggrs.c test/plain/test_main.c test/plain/test_structs.c test/plain_c++/test_main.cc test/thunk/Nmakefile
diffstat 94 files changed, 3128 insertions(+), 1350 deletions(-) [+]
line wrap: on
line diff
--- a/ChangeLog	Sat Apr 16 15:00:58 2022 +0200
+++ b/ChangeLog	Thu Apr 21 13:35:47 2022 +0200
@@ -3,8 +3,37 @@
 
 Version 1.4 (upcoming)
 
+dyncall:
+  o aggregate-by-value (struct, union) support for x64 (win and sysv; thanks Raphael Luba!)
+  o new aggregate arg interface DCaggr replacing stale DCstruct (latter had shortcomings, was
+    mostly unused as it lacked stable implementations, didn't handle unions, ...)
+  o new calling convention modes for thiscalls (platform agnostic DC_CALL_C_DEFAULT_THIS, as well as
+    DC_CALL_C_X64_{WIN64,SYSV}_THIS (needed to handle C++ ABI rules for returning aggregates by value)
+  o new signature character for platform agnostic thiscalls ('*' / DC_SIGCHAR_CC_THISCALL)
+  o dcCallF(), dcVCallF(), dcArgF() and dcVArgF() don't implicitly call dcReset()
+    anymore, which was unflexible (breaking change)
+  o added macros to feature test implementation for aggregate-by-value and syscall support
+dyncallback:
+  o aggregate-by-value (struct, union) support for x64 (win and sysv; thanks Raphael Luba!)
+  o new aggregate arg interface DCaggr
 dynload:
   o build fix for ReactOS with RosBE/cmake/mingw-make environment
+tests:
+  o call_suite: simplified and extended to test also unsigned types, and _Bool
+  o callback_suite: refactored to be much more like call_suite test
+  o call_suite_aggrs: new test suite for calls with aggregate arguments and
+    return types (structs, unions and arrays as aggregate members)
+  o callback_suite_aggrs: new test suite, like call_suite_aggrs but for callbacks
+  o simplifications and refactoring to share more code (especially signature
+    and case generator) across test suites
+doc:
+  o callconv section additions about aggregate-by-value passing for all platforms
+  o new Linux-Std-Base-specific PPC32/SysV section, as aggregate retval handling differs from
+    default SysV for such systems
+  o man page changes/additions to document new aggregate interfaces
+  o replaced API description in manual with references to the manpages, to
+    avoid duplication and keeping things in sync
+  o general clarifications and cleanup
 
 
 Version 1.3 (2021/12/17)
@@ -327,7 +356,7 @@
     target "config" modified, phony without dependencies to other builds
   o bugfix: GNU fastcall calling convention for float and double arguments was wrong (no skip of register)
   o update: x86win32* suite tests are built on cygwin now, added total result output
-  o signature types change:
+  o signature char (breaking) changes:
     C Strings: 'S' -> 'Z'
     long: 'l' -> 'j'
     long long: 'L' -> 'l'
--- a/doc/manual/manual_dyncall_api.tex	Sat Apr 16 15:00:58 2022 +0200
+++ b/doc/manual/manual_dyncall_api.tex	Thu Apr 21 13:35:47 2022 +0200
@@ -1,7 +1,7 @@
 %//////////////////////////////////////////////////////////////////////////////
 %
-% Copyright (c) 2007,2010 Daniel Adler <dadler@uni-goettingen.de>, 
-%                         Tassilo Philipp <tphilipp@potion-studios.com>
+% Copyright (c) 2007,2010-2022 Daniel Adler <dadler@uni-goettingen.de>, 
+%                              Tassilo Philipp <tphilipp@potion-studios.com>
 %
 % Permission to use, copy, modify, and distribute this software for any
 % purpose with or without fee is hereby granted, provided that the above
--- a/doc/manual/manual_dyncallback_api.tex	Sat Apr 16 15:00:58 2022 +0200
+++ b/doc/manual/manual_dyncallback_api.tex	Thu Apr 21 13:35:47 2022 +0200
@@ -1,7 +1,7 @@
 %//////////////////////////////////////////////////////////////////////////////
 %
-% Copyright (c) 2007,2013 Daniel Adler <dadler@uni-goettingen.de>, 
-%                         Tassilo Philipp <tphilipp@potion-studios.com>
+% Copyright (c) 2007,2013-2022 Daniel Adler <dadler@uni-goettingen.de>, 
+%                              Tassilo Philipp <tphilipp@potion-studios.com>
 %
 % Permission to use, copy, modify, and distribute this software for any
 % purpose with or without fee is hereby granted, provided that the above
--- a/doc/manual/manual_overview.tex	Sat Apr 16 15:00:58 2022 +0200
+++ b/doc/manual/manual_overview.tex	Thu Apr 21 13:35:47 2022 +0200
@@ -148,13 +148,17 @@
 
 The feature matrix below gives a brief overview of the currently supported
 platforms. Different colors are used, where a green cell indicates a supported
-platform, yellow a platform that might work (but is untested) and red a platform
-that is currently unsupported. Gray cells are combinations that don't exist
-at the time of writing, or that are not taken into account.\\
-Light green cells mark complete feature support, as in dyncall and dyncallback. Dark green means basic support but lacking features (e.g. dyncall support, but not dyncallback).
-Please note that a green cell (even a light-green one) doesn't imply that all existing calling conventions/features/build tools are supported for that platform (but the most
-important). % @@@ The rightmost column gives a brief info about calling convention support or other notes.
-For detailed info about a platform's support consult the calling convention appendix.
+platform, with both, call and callback support, but lacking aggregate (struct,
+union) support, yellow a platform that might work (but is untested) and red a
+platform that is currently unsupported. Gray cells are combinations that don't
+exist at the time of writing, or that are not taken into account.\\
+Light green cells mark complete feature support, including passing aggregates
+(struct, union) by value. Dark green means basic support but lacking features
+(e.g. dyncall support, but not dyncallback).  Please note that a green cell
+(even a light-green one) doesn't imply that all existing calling
+conventions/features/build tools are supported for that platform (but the most
+important).
+% @@@ The rightmost column gives a brief info about calling convention support or other notes.
 
 
 % colors used to signalize undefined, not used, supported, etc. states.
@@ -165,19 +169,19 @@
 \newcommand{\markunkn}{\cellcolor{yellow}}
 \newcommand{\marknotx}{\cellcolor{gray1}}
 
-\newcommand{\OSwinfam} {\ninety{Windows family}      }
-\newcommand{\OSlinux}  {\ninety{Linux}               }
-\newcommand{\OSdarwin} {\ninety{macOS / iOS / Darwin}}
-\newcommand{\OSfreebsd}{\ninety{FreeBSD}             }
-\newcommand{\OSnetbsd} {\ninety{NetBSD}              }
-\newcommand{\OSopenbsd}{\ninety{OpenBSD}             }
-\newcommand{\OSdflybsd}{\ninety{DragonFlyBSD}        }
-\newcommand{\OSsunos}  {\ninety{Solaris / SunOS}     }
-\newcommand{\OSplanN}  {\ninety{Plan 9 / 9front}     }
-\newcommand{\OSbeos}   {\ninety{Haiku / BeOS}        }
-\newcommand{\OSminix}  {\ninety{Minix}               }
-\newcommand{\OSpsp}    {\ninety{Playstation Portable}}
-\newcommand{\OSnds}    {\ninety{Nintendo DS}         }
+\newcommand{\OSwinfam} {\ninety{Windows family}             }
+\newcommand{\OSlinux}  {\ninety{Linux}                      }
+\newcommand{\OSdarwin} {\ninety{macOS / iOS / Darwin}       }
+\newcommand{\OSfreebsd}{\ninety{FreeBSD}                    }
+\newcommand{\OSnetbsd} {\ninety{NetBSD}                     }
+\newcommand{\OSopenbsd}{\ninety{OpenBSD}                    }
+\newcommand{\OSdflybsd}{\ninety{DragonFlyBSD}               }
+\newcommand{\OSsunos}  {\ninety{Solaris / SunOS}            }
+\newcommand{\OSplanN}  {\ninety{Plan 9 / 9front}            }
+\newcommand{\OSbeos}   {\ninety{Haiku / BeOS}               }
+\newcommand{\OSminix}  {\ninety{Minix}                      }
+\newcommand{\OSpsp}    {\ninety{Playstation Portable (EABI)}}
+\newcommand{\OSnds}    {\ninety{Nintendo DS}                }
 
 % also include hardfloat/softfloat @@@
 \begin{table}[h]
@@ -186,45 +190,45 @@
 \arrayrulecolor{gray3}%            
 \multirow{2}{*}{ARM}       & EB & \marknotx  & \markunkn  & \marknotx  & \markunkn  & \markunkn  & \markunkn  & \marknotx  & \marknotx  & \marknotx  & \marknotx  & \marknotx  & \marknotx  & \marknotx \\%& \multirow{2}{*}{atpcs (arm \& thumb), eabi (arm \& thumb), armhf} \\
 \hhline{*{1}{|~}*{14}{|-}}%       ^- checked   ^- find@@@   ^- checked   ^- find@@@   ^- find@@@   ^- find@@@   ^- checked   ^- checked   ^- checked   ^- new@@@   ^- checked    ^- checked   ^- checked
-                           & EL & \markunkn  & \markcmpl  & \markcmpl  & \markcmpl  & \markcmpl  & \markcmpl  & \marknotx  & \marknotx  & \marknimp  & \marknotx  & \markunkn  & \marknotx  & \markcmpl \\%& \\
+                           & EL & \markunkn  & \markimpl  & \markimpl  & \markimpl  & \markimpl  & \markimpl  & \marknotx  & \marknotx  & \marknimp  & \marknotx  & \markunkn  & \marknotx  & \markimpl \\%& \\
 \hline%                           ^- find@@@   ^- dynOS     ^- real      ^- dynOS     ^- dynOS     ^- real/bbb  ^- checked   ^- checked   ^- checked   ^- new@@@   ^- new@@@     ^- checked   ^- checked
 \multirow{2}{*}{ARM64}     & EB & \marknotx  & \markunkn  & \marknotx  & \marknotx  & \marknotx  & \marknotx  & \marknotx  & \marknotx  & \marknotx  & \marknotx  & \marknotx  & \marknotx  & \marknotx \\%& \multirow{2}{*}{aapcs64, apple} \\
 \hhline{*{1}{|~}*{14}{|-}}%       ^- checked   ^- find@@@   ^- checked   ^- checked   ^- checked   ^- checked   ^- checked   ^- checked   ^- checked   ^- checked   ^- checked   ^- checked   ^- checked
-                           & EL & \markcmpl  & \markcmpl  & \markcmpl  & \markcmpl  & \markunkn  & \markcmpl  & \marknotx  & \marknotx  & \marknotx  & \marknotx  & \marknotx  & \marknotx  & \marknotx \\%& \\
+                           & EL & \markimpl  & \markimpl  & \markimpl  & \markimpl  & \markunkn  & \markimpl  & \marknotx  & \marknotx  & \marknotx  & \marknotx  & \marknotx  & \marknotx  & \marknotx \\%& \\
 \hline%                           ^- checked   ^- find@@@   ^- checked   ^- checked   ^- find@@@   ^- checked   ^- checked   ^- checked   ^- checked   ^- checked   ^- checked   ^- checked   ^- checked
-\multirow{2}{*}{MIPS}      & EB & \marknotx  & \markcmpl  & \marknotx  & \markcmpl  & \markunkn  & \marknotx  & \marknotx  & \marknotx  & \marknimp  & \marknotx  & \marknotx  & \marknotx  & \marknotx \\%& \multirow{2}{*}{o32 (hf \& sf), eabi (hf only)} \\
+\multirow{2}{*}{MIPS}      & EB & \marknotx  & \markimpl  & \marknotx  & \markimpl  & \markunkn  & \marknotx  & \marknotx  & \marknotx  & \marknimp  & \marknotx  & \marknotx  & \marknotx  & \marknotx \\%& \multirow{2}{*}{o32 (hf \& sf), eabi (hf only)} \\
 \hhline{*{1}{|~}*{14}{|-}}%       ^- checked   ^- dynOS     ^- checked   ^- checked   ^- find@@@   ^- checked   ^- checked   ^- checked   ^- checked   ^- checked   ^- checked   ^- checked   ^- checked
-                           & EL & \marknotx  & \markunkn  & \marknotx  & \markcmpl  & \markcmpl  & \markunkn  & \marknotx  & \marknotx  & \marknimp  & \marknotx  & \marknotx  & \markcmpl  & \marknotx \\%& \\
+                           & EL & \marknotx  & \markunkn  & \marknotx  & \markimpl  & \markimpl  & \markunkn  & \marknotx  & \marknotx  & \marknimp  & \marknotx  & \marknotx  & \markimpl  & \marknotx \\%& \\
 \hline%                           ^- checked   ^- find@@@   ^- checked   ^- checked   ^- dynOS     ^- checked   ^- checked   ^- checked   ^- checked   ^- checked   ^- checked   ^- checked   ^- checked
-\multirow{2}{*}{MIPS64}    & EB & \marknotx  & \markunkn  & \marknotx  & \markcmpl  & \markunkn  & \markcmpl  & \marknotx  & \marknotx  & \marknotx  & \marknotx  & \marknotx  & \marknotx  & \marknotx \\%& \multirow{2}{*}{n64 (hf only), n32 (hf only)} \\
+\multirow{2}{*}{MIPS64}    & EB & \marknotx  & \markunkn  & \marknotx  & \markimpl  & \markunkn  & \markimpl  & \marknotx  & \marknotx  & \marknotx  & \marknotx  & \marknotx  & \marknotx  & \marknotx \\%& \multirow{2}{*}{n64 (hf only), n32 (hf only)} \\
 \hhline{*{1}{|~}*{14}{|-}}%       ^- checked   ^- find@@@   ^- checked   ^- checked   ^- find@@@   ^- real/ERlt ^- checked   ^- checked   ^- checked   ^- checked   ^- checked   ^- checked   ^- checked
-                           & EL & \marknotx  & \markcmpl  & \marknotx  & \markcmpl  & \markunkn  & \markunkn  & \marknotx  & \marknotx  & \marknotx  & \marknotx  & \marknotx  & \marknotx  & \marknotx \\%& \\
+                           & EL & \marknotx  & \markimpl  & \marknotx  & \markimpl  & \markunkn  & \markunkn  & \marknotx  & \marknotx  & \marknotx  & \marknotx  & \marknotx  & \marknotx  & \marknotx \\%& \\
 \hline%                           ^- checked   ^- dynOS     ^- checked   ^- checked   ^- find@@@   ^- checked   ^- checked   ^- checked   ^- checked   ^- checked   ^- checked   ^- checked   ^- checked
 \multirow{2}{*}{SuperH}    & EB & \marknotx  & \marknimp  & \marknotx  & \marknotx  & \marknimp  & \marknotx  & \marknotx  & \marknotx  & \marknotx  & \marknotx  & \marknotx  & \marknotx  & \marknotx \\%& \\
 \hhline{*{1}{|~}*{14}{|-}}%       ^- checked   ^- checked   ^- checked   ^- checked   ^- checked   ^- checked   ^- checked   ^- checked   ^- checked   ^- checked   ^- checked   ^- checked   ^- checked
                            & EL & \marknotx  & \marknimp  & \marknotx  & \marknotx  & \marknimp  & \marknimp  & \marknotx  & \marknotx  & \marknotx  & \marknotx  & \marknotx  & \marknotx  & \marknotx \\%& \\
 \hline%                           ^- checked   ^- checked   ^- checked   ^- checked   ^- checked   ^- checked   ^- checked   ^- checked   ^- checked   ^- checked   ^- checked   ^- checked   ^- checked
-\multirow{2}{*}{PowerPC}   & EB & \marknotx  & \markcmpl  & \markcmpl  & \markimpl  & \markcmpl  & \markunkn  & \marknotx  & \marknotx  & \marknimp  & \marknotx  & \marknotx  & \marknotx  & \marknotx \\%& \multirow{2}{*}{apple, sysv}\\
+\multirow{2}{*}{PowerPC}   & EB & \marknotx  & \markimpl  & \markimpl  & \markimpl  & \markimpl  & \markunkn  & \marknotx  & \marknotx  & \marknimp  & \marknotx  & \marknotx  & \marknotx  & \marknotx \\%& \multirow{2}{*}{apple, sysv}\\
 \hhline{*{1}{|~}*{14}{|-}}%       ^- checked   ^- dynOS     ^-dynOS      ^- checked   ^- checked   ^- checked   ^- checked   ^- checked   ^- checked   ^- checked   ^- checked   ^- checked   ^- checked
                            & EL & \marknotx  & \markunkn  & \marknotx  & \marknotx  & \marknotx  & \marknotx  & \marknotx  & \marknotx  & \marknotx  & \marknotx  & \marknotx  & \marknotx  & \marknotx \\%& \\
 \hline%                           ^- checked   ^- find@@@   ^- checked   ^- checked   ^- checked   ^- checked   ^- checked   ^- checked   ^- checked   ^- checked   ^- checked   ^- checked   ^- checked
-\multirow{2}{*}{PowerPC64} & EB & \marknotx  & \markcmpl  & \markunkn  & \markcmpl  & \marknotx  & \markunkn  & \marknotx  & \marknotx  & \marknimp  & \marknotx  & \marknotx  & \marknotx  & \marknotx \\%& \multirow{2}{*}{elf} \\
+\multirow{2}{*}{PowerPC64} & EB & \marknotx  & \markimpl  & \markunkn  & \markimpl  & \marknotx  & \markunkn  & \marknotx  & \marknotx  & \marknimp  & \marknotx  & \marknotx  & \marknotx  & \marknotx \\%& \multirow{2}{*}{elf} \\
 \hhline{*{1}{|~}*{14}{|-}}%       ^- checked   ^- masanori@ ^- checked   ^- dynOS     ^- checked   ^- checked   ^- checked   ^- checked   ^- checked   ^- checked   ^- checked   ^- checked   ^- checked
-                           & EL & \marknotx  & \markcmpl  & \marknotx  & \marknotx  & \marknotx  & \marknotx  & \marknotx  & \marknotx  & \marknotx  & \marknotx  & \marknotx  & \marknotx  & \marknotx \\%& \\
+                           & EL & \marknotx  & \markimpl  & \marknotx  & \marknotx  & \marknotx  & \marknotx  & \marknotx  & \marknotx  & \marknotx  & \marknotx  & \marknotx  & \marknotx  & \marknotx \\%& \\
 \hline%                           ^- checked   ^- masanori@ ^- checked   ^- checked   ^- checked   ^- checked   ^- checked   ^- checked   ^- checked   ^- checked   ^- checked   ^- checked   ^- checked
 m68k                       &    & \marknotx  & \marknimp  & \marknotx  & \marknotx  & \marknimp  & \marknimp  & \marknotx  & \marknotx  & \marknotx  & \marknotx  & \marknotx  & \marknotx  & \marknotx \\%& \\
 \hline
 m88k                       &    & \marknotx  & \marknimp  & \marknotx  & \marknotx  & \marknimp  & \marknimp  & \marknotx  & \marknotx  & \marknotx  & \marknotx  & \marknotx  & \marknotx  & \marknotx \\%& \\
 \hline
-x86                        &    & \markcmpl  & \markcmpl  & \markcmpl  & \markcmpl  & \markcmpl  & \markcmpl  & \markcmpl  & \markcmpl  & \markcmpl  & \markcmpl  & \markcmpl  & \marknotx  & \marknotx \\%& cdecl, stdcall, fastcall (MS \& GNU), thiscall (MS \& GNU), plan9 \\
+x86                        &    & \markimpl  & \markimpl  & \markimpl  & \markimpl  & \markimpl  & \markimpl  & \markimpl  & \markimpl  & \markimpl  & \markimpl  & \markimpl  & \marknotx  & \marknotx \\%& cdecl, stdcall, fastcall (MS \& GNU), thiscall (MS \& GNU), plan9 \\
 \hline
 x64                        &    & \markcmpl  & \markcmpl  & \markcmpl  & \markcmpl  & \markcmpl  & \markcmpl  & \markcmpl  & \markcmpl  & \marknimp  & \markcmpl  & \marknotx  & \marknotx  & \marknotx \\%& ms, sysv\\
-\hline
+\hline%                                                                                                                      ^- @@@ check              ^- @@@ check
 Itanium                    &    & \marknimp  & \marknimp  & \marknotx  & \marknimp  & \marknimp  & \marknimp  & \marknotx  & \marknotx  & \marknotx  & \marknotx  & \marknotx  & \marknotx  & \marknotx \\%& \\
 \hline
-SPARC                      &    & \marknotx  & \markcmpl  & \marknotx  & \marknotx  & \markcmpl  & \markcmpl  & \marknotx  & \markcmpl  & \marknimp  & \marknotx  & \marknotx  & \marknotx  & \marknotx \\%& v7, v8\\
+SPARC                      &    & \marknotx  & \markimpl  & \marknotx  & \marknotx  & \markimpl  & \markimpl  & \marknotx  & \markimpl  & \marknimp  & \marknotx  & \marknotx  & \marknotx  & \marknotx \\%& v7, v8\\
 \hline
-SPARC64                    &    & \marknotx  & \markcmpl  & \marknotx  & \markcmpl  & \markcmpl  & \markcmpl  & \marknotx  & \markimpl  & \marknotx  & \marknotx  & \marknotx  & \marknotx  & \marknotx \\%& v9 \\
+SPARC64                    &    & \marknotx  & \markimpl  & \marknotx  & \markimpl  & \markimpl  & \markimpl  & \marknotx  & \markimpl  & \marknotx  & \marknotx  & \marknotx  & \marknotx  & \marknotx \\%& v9 \\
 \hline
 RISC-V                     &    & \marknotx  & \marknimp  & \marknotx  & \marknotx  & \marknotx  & \marknotx  & \marknotx  & \marknotx  & \marknotx  & \marknotx  & \marknotx  & \marknotx  & \marknotx \\%& \\
 \hline
--- a/dyncall/CMakeLists.txt	Sat Apr 16 15:00:58 2022 +0200
+++ b/dyncall/CMakeLists.txt	Thu Apr 21 13:35:47 2022 +0200
@@ -34,7 +34,7 @@
 
 add_library(dyncall_s STATIC ${ASM_SRC} 
   dyncall_vector.c 
-  dyncall_struct.c 
+  dyncall_aggregate.c 
   dyncall_api.c 
   dyncall_callvm.c 
   dyncall_callvm_base.c
--- a/dyncall/Makefile.embedded	Sat Apr 16 15:00:58 2022 +0200
+++ b/dyncall/Makefile.embedded	Thu Apr 21 13:35:47 2022 +0200
@@ -1,7 +1,7 @@
 MAKEFILE = Makefile.embedded
 MAKE_CMD = ${MAKE} -f ${MAKEFILE} 
 TARGET	= libdyncall_s.a
-OBJS	= dyncall_vector.o dyncall_api.o dyncall_callvm.o dyncall_callvm_base.o dyncall_call.o dyncall_callf.o dyncall_struct.o
+OBJS	= dyncall_vector.o dyncall_api.o dyncall_callvm.o dyncall_callvm_base.o dyncall_call.o dyncall_callf.o dyncall_aggregate.o
 HEADERS	= dyncall_version.h dyncall_macros.h dyncall_config.h dyncall_types.h dyncall.h dyncall_signature.h dyncall_value.h dyncall_callf.h dyncall_alloc.h
 all: ${TARGET}
 libdyncall_s.a: ${OBJS}
--- a/dyncall/Makefile.generic	Sat Apr 16 15:00:58 2022 +0200
+++ b/dyncall/Makefile.generic	Thu Apr 21 13:35:47 2022 +0200
@@ -1,5 +1,5 @@
 LIBNAME = dyncall
-OBJS = dyncall_vector.o dyncall_api.o dyncall_callvm.o dyncall_callvm_base.o dyncall_call.o dyncall_callf.o dyncall_struct.o
+OBJS = dyncall_vector.o dyncall_api.o dyncall_callvm.o dyncall_callvm_base.o dyncall_call.o dyncall_callf.o dyncall_aggregate.o
 HEADERS = ${VPATH}/dyncall_version.h ${VPATH}/dyncall_macros.h ${VPATH}/dyncall_config.h ${VPATH}/dyncall_types.h ${VPATH}/dyncall.h ${VPATH}/dyncall_signature.h ${VPATH}/dyncall_value.h ${VPATH}/dyncall_callf.h ${VPATH}/dyncall_alloc.h
 LIB = lib${LIBNAME}_s.a
 .PHONY: all clean install
--- a/dyncall/Nmakefile	Sat Apr 16 15:00:58 2022 +0200
+++ b/dyncall/Nmakefile	Thu Apr 21 13:35:47 2022 +0200
@@ -34,7 +34,7 @@
 
 TARGETS = dyncall_s.lib #dyncall.lib dyncall.dll
 
-OBJS = dyncall_call_$(BUILD_ARCH)_generic_masm.obj dyncall_vector.obj dyncall_vector.obj dyncall_callvm.obj dyncall_callvm_base.obj dyncall_api.obj dyncall_callf.obj dyncall_struct.obj
+OBJS = dyncall_call_$(BUILD_ARCH)_generic_masm.obj dyncall_vector.obj dyncall_vector.obj dyncall_callvm.obj dyncall_callvm_base.obj dyncall_api.obj dyncall_callf.obj dyncall_aggregate.obj
 
 dyncall_s.lib: $(OBJS)
 	echo Creating library $@ ...
--- a/dyncall/dyncall.3	Sat Apr 16 15:00:58 2022 +0200
+++ b/dyncall/dyncall.3	Thu Apr 21 13:35:47 2022 +0200
@@ -1,4 +1,4 @@
-.\" Copyright (c) 2007-2020 Daniel Adler <dadler AT uni-goettingen DOT de>, 
+.\" Copyright (c) 2007-2022 Daniel Adler <dadler AT uni-goettingen DOT de>, 
 .\"                         Tassilo Philipp <tphilipp AT potion-studios DOT com>
 .\" 
 .\" Permission to use, copy, modify, and distribute this software for any
@@ -47,6 +47,8 @@
 .Fn dcArgDouble "DCCallVM * vm" "DCdouble arg"
 .Ft void
 .Fn dcArgPointer "DCCallVM * vm" "DCpointer arg"
+.Ft void
+.Fn dcArgAggr "DCCallVM * vm" "const DCaggr * ag" "const void * value"
 .Ft DCvoid
 .Fn dcCallVoid "DCCallVM * vm" "DCpointer funcptr"
 .Ft DCbool
@@ -67,6 +69,10 @@
 .Fn dcCallDouble "DCCallVM * vm" "DCpointer funcptr"
 .Ft DCpointer
 .Fn dcCallPointer "DCCallVM * vm" "DCpointer funcptr"
+.Ft DCpointer
+.Fn dcCallAggr "DCCallVM * vm" "DCpointer funcptr" "const DCaggr * ag" "DCpointer ret"
+.Ft void
+.Fn dcBeginCallAggr "DCCallVM * vm" "const DCaggr * ag"
 .Ft void
 .Fn dcArgF "DCCallVM * vm" "const DCsigchar * signature" "..."
 .Ft void
@@ -75,6 +81,14 @@
 .Fn dcCallF "DCCallVM * vm" "DCValue * result" "DCpointer funcptr" "const DCsigchar * signature" "..."
 .Ft void
 .Fn dcVCallF "DCCallVM * vm" "DCValue * result" "DCpointer funcptr" "const DCsigchar * signature" "va_list args"
+.Ft DCaggr*
+.Fn dcNewAggr "DCsize maxFieldCount" "DCsize size"
+.Ft void
+.Fn dcAggrField "DCaggr* ag" "DCsigchar type" "DCint offset" "DCsize array_len" "..."
+.Ft void
+.Fn dcCloseAggr "DCaggr* ag"
+.Ft void
+.Fn dcFreeAggr "DCaggr* ag"
 .Sh DESCRIPTION
 The
 .Nm
@@ -95,7 +109,7 @@
 that nearly every platform comes with one or more distinct calling conventions, the
 .Nm
 library project intends to be a portable and open-source approach to the variety of
-compiler-specific binary interfaces, platform specific subtleties, and so on...
+compiler/toolchain/platform-specific binary interfaces subtleties, and so on...
 .Pp
 .Fn dcNewCallVM
 creates a new CallVM object, where
@@ -113,13 +127,13 @@
 .Pp
 .Fn dcReset
 resets the internal stack of arguments and prepares it for a new call. This
-function should be called after setting the call mode (using dcMode), but prior
-to binding arguments to the CallVM (except for when setting mode
-DC_SIGCHAR_CC_ELLIPSIS_VARARGS, which is used prior to binding varargs of
-variadic functions). Use it also when reusing a CallVM, as arguments don't get
-flushed automatically after a function call invocation.  Note: you should also
-call this function after initial creation of the a CallVM object, as
-dcNewCallVM doesn't do this, implicitly.
+function should be called after setting the initial/main call mode (using
+dcMode()), but prior to binding arguments to the CallVM (sometimes dcMode()
+calls are needed after pushing some args, e.g.  DC_SIGCHAR_CC_ELLIPSIS_VARARGS,
+which is used prior to binding varargs of variadic functions). Use it also when
+reusing a CallVM, as arguments don't get flushed automatically after a function
+call invocation. Note: you should also call this function after initial
+creation of the a CallVM object, as dcNewCallVM doesn't do this, implicitly.
 .Pp
 .Fn dcArgBool ,
 .Fn dcArgChar ,
@@ -128,9 +142,10 @@
 .Fn dcArgLong ,
 .Fn dcArgLongLong ,
 .Fn dcArgFloat ,
-.Fn dcArgDouble
+.Fn dcArgDouble ,
+.Fn dcArgPointer
 and
-.Fn dcArgPointer
+.Fn dcArgAggr
 are used to bind arguments of the named types to the CallVM object. Arguments should
 be bound in
 .Em "left to right"
@@ -144,16 +159,61 @@
 .Fn dcCallLong ,
 .Fn dcCallLongLong ,
 .Fn dcCallFloat ,
-.Fn dcCallDouble
+.Fn dcCallDouble ,
+.Fn dcCallPointer
 and
-.Fn dcCallPointer
-call the function with the bound arguments and returning the named type, where
+.Fn dcCallAggr
+call the function with the previously bound arguments and return the named
+type, where
 .Ar funcptr
 is a pointer to the function to call. After the invocation of the function
 call, the argument values are still bound to the CallVM and a second call
 using the same arguments can be issued. Call
-.Fn reset
-to clear the internal argument stack.
+.Fn dcReset
+(as described above) to clear the internal argument stack.
+.Pp
+The interfaces for passing and/or returning aggregates (struct, union) by value
+need to be explained as they are a bit more complex. Every such argument or
+return type needs some extra info describing its layout via a
+.Ft DCaggr
+structure (except for non-trivial C++ aggregates, see AGGREGATE DESCRIPTION for
+more information, below). Passing such arguments is then done by using
+.Fn dcArgAggr ,
+where
+.Ar ag
+is a pointer to the description and
+.Ar value
+is a pointer to the aggregate in question. Calling a function that returns an
+aggregate by value is done via two functions,
+.Fn dcBeginCallAggr ,
+which handles special cases to facilitate the implementation and
+.Sy must
+be called
+.Sy before
+pushing any arguments, and finally
+.Fn dcCallAggr
+where
+.Ar ag
+is a pointer to the description (for both calls) and
+.Ar ret
+points to memory large enough to hold the to be returned aggregate.
+.Fn dcCallAggr
+returns a pointer to
+.Ar ret .
+.Pp
+.Sy NOTE:
+C++ non-trivial aggregates (check with the std::is_trivial type trait) need
+some special handling. First of all, no aggregate description is needed and
+NULL must be passed wherever a
+.Ft DCaggr*
+argument is needed. Also, as
+.Nm
+is oblivious to how to do any custom/non-trivial construction or copy, and thus
+cannot do the copy of the aggregate, passed by-value, itself, the user has to
+provide such copies, where needed. E.g. when passing such an aggregate as an
+argument by-value, using
+.Fn dcArgAggr ,
+in order to preserver the call's by-value semantics.
 .Pp
 .Fn dcArgF ,
 .Fn dcVArgF ,
@@ -161,7 +221,7 @@
 and
 .Fn dcVCallF
 can be used to bind arguments in a printf-style call, using a signature
-string encoding the argument types and return type. The former 2 only bind
+string encoding the argument and return types. The former 2 only bind
 the arguments to the
 .Ar vm
 object (and ignore return types specified in the
@@ -169,9 +229,60 @@
 afterwards. The return value will be stored in
 .Ar result .
 The signature string also features calling convention mode selection.
-For information about the signature format, refer to the
+For information about the signature format, refer to dyncall_signature.h or the
 .Nm
-manual in PDF format.
+manual.
+.Pp
+For passing aggregates using
+.Fn dc*F
+functions, pass two varargs for each aggregate, first a pointer to DCaggr, then
+a pointer to the aggregate in question. For returning aggregates using those
+functions, pass
+.Sy two final extra
+arguments, first a pointer to DCaggr describing the return value, then a
+pointer to memory large enough to hold it. An explicit call do
+.Fn dcBeginCallAggr
+is not needed in those cases, and a pointer to the to be returned aggregate is
+returned via
+.Ar result .
+.Sh AGGREGATE DESCRIPTION
+In order to describe an aggregate (except for C++ non-trivial aggregates, as
+mentioned above), create a DCaggr object using
+.Fn dcNewAggr ,
+where
+.Ar maxFieldCount
+is greater or equal to the number of fields the aggregate has (a nested
+aggregate or an array is counted as one field), and
+.Ar size
+is the size of the aggregate (e.g. as determined by sizeof()).
+.Pp
+.Fn dcFreeAggr
+destroys the DCaggr object.
+.Pp
+.Fn dcAggrField
+is used to describe the aggregate, field-by-field (in order), with
+.Ar type
+being a DC_SIGCHAR_* (see dyncall_signature.h),
+.Ar offset
+being the offset of the field from the beginning of the aggregate (use C's
+offsetof(3)), and
+.Ar array_len
+being the number of array elements,
+.Sy iff
+the field is an array, otherwise use 1. For nested aggregates (when using
+DC_SIGCHAR_AGGREGATE as
+.Ft type ) ,
+one needs to pass the pointer to the nested aggregate's DCaggr object as last
+argument (in
+.Ar ... ) .
+.Pp
+Call
+.Fn dcCloseAggr
+after having described all fields of an aggregate.
+.Pp
+Note that c99 flexible array members do not count as a field, and must be
+omitted, as passing aggregates with a flexible array member by value in C would
+also omit it.
 .Sh EXAMPLE
 Let's say, we want to make a call to the function:
 .Bd -literal -offset indent
--- a/dyncall/dyncall.h	Sat Apr 16 15:00:58 2022 +0200
+++ b/dyncall/dyncall.h	Thu Apr 21 13:35:47 2022 +0200
@@ -6,7 +6,7 @@
  Description: public header for library dyncall
  License:
 
-   Copyright (c) 2007-2020 Daniel Adler <dadler@uni-goettingen.de>,
+   Copyright (c) 2007-2022 Daniel Adler <dadler@uni-goettingen.de>,
                            Tassilo Philipp <tphilipp@potion-studios.com>
 
    Permission to use, copy, modify, and distribute this software for any
@@ -35,14 +35,16 @@
 #endif
 
 typedef struct DCCallVM_    DCCallVM;
-typedef struct DCstruct_    DCstruct;
+typedef struct DCaggr_      DCaggr;
 
 /* Supported Calling Convention Modes */
 
-#define DC_CALL_C_DEFAULT               0
-#define DC_CALL_C_DEFAULT_THIS         99
-#define DC_CALL_C_ELLIPSIS            100
-#define DC_CALL_C_ELLIPSIS_VARARGS    101
+/* default */
+#define DC_CALL_C_DEFAULT               0   /* C default (platform native) */
+#define DC_CALL_C_DEFAULT_THIS         99   /* for C++ calls where first param is hidden this ptr (platform native) */
+#define DC_CALL_C_ELLIPSIS            100   /* to be set for vararg calls' non-hidden (e.g. C++ this ptr), named arguments */
+#define DC_CALL_C_ELLIPSIS_VARARGS    101   /* to be set for vararg calls' non-hidden (e.g. C++ this ptr), variable arguments (in ... part) */
+/* platform specific */
 #define DC_CALL_C_X86_CDECL             1
 #define DC_CALL_C_X86_WIN32_STD         2
 #define DC_CALL_C_X86_WIN32_FAST_MS     3
@@ -50,7 +52,7 @@
 #define DC_CALL_C_X86_WIN32_THIS_MS     5
 #define DC_CALL_C_X86_WIN32_THIS_GNU    DC_CALL_C_X86_CDECL /* alias - identical to cdecl (w/ this-ptr as 1st arg) */
 #define DC_CALL_C_X64_WIN64             7
-#define DC_CALL_C_X64_WIN64_THIS       70   /* only needed when using aggregate by value as return type    @@@STRUCT implement */
+#define DC_CALL_C_X64_WIN64_THIS       70   /* only needed when using aggregate by value as return type */
 #define DC_CALL_C_X64_SYSV              8
 #define DC_CALL_C_X64_SYSV_THIS         DC_CALL_C_X64_SYSV  /* alias */
 #define DC_CALL_C_PPC32_DARWIN          9
@@ -73,7 +75,9 @@
 #define DC_CALL_C_ARM64                22
 #define DC_CALL_C_PPC64                23
 #define DC_CALL_C_PPC64_LINUX          DC_CALL_C_PPC64 /* alias */
+/* syscalls, default */
 #define DC_CALL_SYS_DEFAULT           200
+/* syscalls, platform specific */
 #define DC_CALL_SYS_X86_INT80H_LINUX  201
 #define DC_CALL_SYS_X86_INT80H_BSD    202
 #define DC_CALL_SYS_X64_SYSCALL_SYSV  204
@@ -85,48 +89,44 @@
 #define DC_ERROR_NONE                0
 #define DC_ERROR_UNSUPPORTED_MODE   -1
 
-DC_API DCCallVM*  dcNewCallVM       (DCsize size);
-DC_API void       dcFree            (DCCallVM* vm);
-DC_API void       dcReset           (DCCallVM* vm);
+DC_API DCCallVM*  dcNewCallVM     (DCsize size);
+DC_API void       dcFree          (DCCallVM* vm);
+DC_API void       dcReset         (DCCallVM* vm);
 
-DC_API void       dcMode            (DCCallVM* vm, DCint mode);
+DC_API void       dcMode          (DCCallVM* vm, DCint mode);
+
+DC_API void       dcBeginCallAggr (DCCallVM* vm, const DCaggr* ag);
 
-DC_API void       dcArgBool         (DCCallVM* vm, DCbool     value);
-DC_API void       dcArgChar         (DCCallVM* vm, DCchar     value);
-DC_API void       dcArgShort        (DCCallVM* vm, DCshort    value);
-DC_API void       dcArgInt          (DCCallVM* vm, DCint      value);
-DC_API void       dcArgLong         (DCCallVM* vm, DClong     value);
-DC_API void       dcArgLongLong     (DCCallVM* vm, DClonglong value);
-DC_API void       dcArgFloat        (DCCallVM* vm, DCfloat    value);
-DC_API void       dcArgDouble       (DCCallVM* vm, DCdouble   value);
-DC_API void       dcArgPointer      (DCCallVM* vm, DCpointer  value);
-DC_API void       dcArgStruct     (DCCallVM* vm, DCstruct* s, DCpointer value);
+DC_API void       dcArgBool       (DCCallVM* vm, DCbool     value);
+DC_API void       dcArgChar       (DCCallVM* vm, DCchar     value);
+DC_API void       dcArgShort      (DCCallVM* vm, DCshort    value);
+DC_API void       dcArgInt        (DCCallVM* vm, DCint      value);
+DC_API void       dcArgLong       (DCCallVM* vm, DClong     value);
+DC_API void       dcArgLongLong   (DCCallVM* vm, DClonglong value);
+DC_API void       dcArgFloat      (DCCallVM* vm, DCfloat    value);
+DC_API void       dcArgDouble     (DCCallVM* vm, DCdouble   value);
+DC_API void       dcArgPointer    (DCCallVM* vm, DCpointer  value);
+DC_API void       dcArgAggr       (DCCallVM* vm, const DCaggr* ag, const void* value);
 
-DC_API void       dcCallVoid        (DCCallVM* vm, DCpointer funcptr);
-DC_API DCbool     dcCallBool        (DCCallVM* vm, DCpointer funcptr);
-DC_API DCchar     dcCallChar        (DCCallVM* vm, DCpointer funcptr);
-DC_API DCshort    dcCallShort       (DCCallVM* vm, DCpointer funcptr);
-DC_API DCint      dcCallInt         (DCCallVM* vm, DCpointer funcptr);
-DC_API DClong     dcCallLong        (DCCallVM* vm, DCpointer funcptr);
-DC_API DClonglong dcCallLongLong    (DCCallVM* vm, DCpointer funcptr);
-DC_API DCfloat    dcCallFloat       (DCCallVM* vm, DCpointer funcptr);
-DC_API DCdouble   dcCallDouble      (DCCallVM* vm, DCpointer funcptr);
-DC_API DCpointer  dcCallPointer     (DCCallVM* vm, DCpointer funcptr);
-DC_API void       dcCallStruct    (DCCallVM* vm, DCpointer funcptr, DCstruct* s, DCpointer returnValue);
+DC_API void       dcCallVoid      (DCCallVM* vm, DCpointer funcptr);
+DC_API DCbool     dcCallBool      (DCCallVM* vm, DCpointer funcptr);
+DC_API DCchar     dcCallChar      (DCCallVM* vm, DCpointer funcptr);
+DC_API DCshort    dcCallShort     (DCCallVM* vm, DCpointer funcptr);
+DC_API DCint      dcCallInt       (DCCallVM* vm, DCpointer funcptr);
+DC_API DClong     dcCallLong      (DCCallVM* vm, DCpointer funcptr);
+DC_API DClonglong dcCallLongLong  (DCCallVM* vm, DCpointer funcptr);
+DC_API DCfloat    dcCallFloat     (DCCallVM* vm, DCpointer funcptr);
+DC_API DCdouble   dcCallDouble    (DCCallVM* vm, DCpointer funcptr);
+DC_API DCpointer  dcCallPointer   (DCCallVM* vm, DCpointer funcptr);
+DC_API DCpointer  dcCallAggr      (DCCallVM* vm, DCpointer funcptr, const DCaggr* ag, DCpointer ret); /* retval is written to *ret, returns ret */
 
-DC_API DCint      dcGetError        (DCCallVM* vm);
+DC_API DCint      dcGetError      (DCCallVM* vm);
 
-#define DEFAULT_ALIGNMENT 0
-DC_API DCstruct*  dcNewStruct      (DCsize fieldCount, DCint alignment);
-DC_API void       dcStructField    (DCstruct* s, DCint type, DCint alignment, DCsize arrayLength);
-DC_API void       dcSubStruct      (DCstruct* s, DCsize fieldCount, DCint alignment, DCsize arrayLength);
-/* Each dcNewStruct or dcSubStruct call must be paired with a dcCloseStruct. */
-DC_API void       dcCloseStruct    (DCstruct* s);
-DC_API DCsize     dcStructSize     (DCstruct* s);
-DC_API DCsize     dcStructAlignment(DCstruct* s);
-DC_API void       dcFreeStruct     (DCstruct* s);
-
-DC_API DCstruct*  dcDefineStruct  (const char* signature);
+DC_API DCaggr*    dcNewAggr       (DCsize maxFieldCount, DCsize size);
+DC_API void       dcFreeAggr      (DCaggr* ag);
+/* if type == DC_SIGCHAR_AGGREGATE, pass DCaggr* of nested struct/union in ...  */
+DC_API void       dcAggrField     (DCaggr* ag, DCsigchar type, DCint offset, DCsize array_len, ...);
+DC_API void       dcCloseAggr     (DCaggr* ag);   /* to indicate end of struct definition, required */
 
 
 /* helpers */
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/dyncall/dyncall_aggregate.c	Thu Apr 21 13:35:47 2022 +0200
@@ -0,0 +1,104 @@
+/*
+
+ Package: dyncall
+ Library: dyncall
+ File: dyncall/dyncall_aggregate.c
+ Description: C interface to compute struct size
+ License:
+
+   Copyright (c) 2021-2022 Tassilo Philipp <tphilipp@potion-studios.com>
+
+   Permission to use, copy, modify, and distribute this software for any
+   purpose with or without fee is hereby granted, provided that the above
+   copyright notice and this permission notice appear in all copies.
+
+   THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+   WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+   MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+   ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+*/
+
+
+
+#include "dyncall.h"
+#include "dyncall_signature.h"
+#include "dyncall_aggregate.h"
+#include "dyncall_alloc.h"
+#include <stdio.h>
+#include <assert.h>
+#include <stdarg.h>
+
+
+
+#if defined(DC__Arch_AMD64) && defined(DC_UNIX)
+#  include "dyncall_aggregate_x64.c"
+#else
+static void dcFinishAggr(DCaggr *ag)
+{
+}
+#endif
+
+
+DCaggr* dcNewAggr(DCsize maxFieldCount, DCsize size)
+{
+	DCaggr* ag = (DCaggr*)dcAllocMem(sizeof(DCaggr) + maxFieldCount * sizeof(DCfield));
+	ag->n_fields = 0;
+	ag->size = size;
+	return ag;
+}
+
+
+void dcAggrField(DCaggr* ag, DCsigchar type, DCint offset, DCsize array_len, ...)
+{
+	DCfield *f = ag->fields + (ag->n_fields++);
+	f->type = type;
+	f->offset = offset;
+	f->array_len = array_len;
+	f->sub_aggr = NULL;
+	switch(type) {
+		case DC_SIGCHAR_BOOL:       f->size = sizeof(DCbool);     break;
+		case DC_SIGCHAR_CHAR:       
+		case DC_SIGCHAR_UCHAR:      f->size = sizeof(DCchar);     break;
+		case DC_SIGCHAR_SHORT:      
+		case DC_SIGCHAR_USHORT:     f->size = sizeof(DCshort);    break;
+		case DC_SIGCHAR_INT:        
+		case DC_SIGCHAR_UINT:       f->size = sizeof(DCint);      break;
+		case DC_SIGCHAR_LONG:       
+		case DC_SIGCHAR_ULONG:      f->size = sizeof(DClong);     break;
+		case DC_SIGCHAR_LONGLONG:   
+		case DC_SIGCHAR_ULONGLONG:  f->size = sizeof(DClonglong); break;
+		case DC_SIGCHAR_FLOAT:      f->size = sizeof(DCfloat);    break;
+		case DC_SIGCHAR_DOUBLE:     f->size = sizeof(DCdouble);   break;
+		case DC_SIGCHAR_POINTER:
+		case DC_SIGCHAR_STRING:     f->size = sizeof(DCpointer);  break;
+		case DC_SIGCHAR_AGGREGATE:
+		{
+			va_list ap;
+			va_start(ap, array_len);
+			f->sub_aggr = va_arg(ap, const DCaggr*);
+			va_end(ap);
+
+			f->size = f->sub_aggr->size;
+			break;
+		}
+		default:
+			assert(0);
+	}
+}
+
+
+void dcCloseAggr(DCaggr* ag)
+{
+	dcFinishAggr(ag);
+}
+
+
+void dcFreeAggr(DCaggr* ag)
+{
+	dcFreeMem(ag);
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/dyncall/dyncall_aggregate.h	Thu Apr 21 13:35:47 2022 +0200
@@ -0,0 +1,72 @@
+/*
+
+ Package: dyncall
+ Library: dyncall
+ File: dyncall/dyncall_aggregate.h
+ Description: C interface to compute struct size
+ License:
+
+   Copyright (c) 2021-2022 Tassilo Philipp <tphilipp@potion-studios.com>
+
+   Permission to use, copy, modify, and distribute this software for any
+   purpose with or without fee is hereby granted, provided that the above
+   copyright notice and this permission notice appear in all copies.
+
+   THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+   WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+   MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+   ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+*/
+
+
+#ifndef DYNCALL_STRUCT_H
+#define DYNCALL_STRUCT_H
+
+#include "dyncall.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif 
+
+#if defined(DC_UNIX) && defined(DC__Arch_AMD64)
+
+/* x64 param classification - only used for aggregates, so comments might be aggregate specific */
+#  define DC_SYSV_MAX_NUM_CLASSES 8  /* max num of aggregate qwords to be classified; constant defined by call conv */
+#  define SYSVC_NONE             0  /* end of classification !code relies on this being 0! */
+#  define SYSVC_INTEGER      (1<<0) /* (signed and unsigned) _Bool/bool, char, short, int, long, long long and pointers (also __int128, but treated as two longs) */
+#  define SYSVC_SSE          (1<<1) /* float, double and __m64, as well as least significant half of __float128 and __m128 (also complex float/double, but treated as two float/double) */
+#  define SYSVC_SSEUP        (1<<2) /* @@@AGGR currently unsupported/unused: most significant half of __float128 and __m128, most significant parts of __m256 and __m512 */
+#  define SYSVC_X87          (1<<3) /* @@@AGGR currently unsupported/unused: 64bit mantissa of type long double (80bit x87 extended precision format) */
+#  define SYSVC_X87UP        (1<<4) /* @@@AGGR currently unsupported/unused: 16bit exponent plus 6 bytes of padding of type long double (80bit x87 extended precision format) */
+#  define SYSVC_COMPLEX_X87  (1<<5) /* @@@AGGR currently unsupported/unused: complex long double */
+#  define SYSVC_MEMORY       (1<<6) /* for everything not fitting or allowed in regs given call conv (if class[0] == SYSVC_MEMORY, shortcut to pass entire aggregate via memory) */
+
+#endif
+
+
+typedef struct DCfield_ {
+	DCsize offset, size, array_len;
+	DCsigchar type;
+	const DCaggr* sub_aggr;
+} DCfield;
+
+struct DCaggr_ {
+	DCsize size, n_fields;
+#if defined(DC_UNIX) && defined(DC__Arch_AMD64)
+	DCuchar sysv_classes[DC_SYSV_MAX_NUM_CLASSES]; /* !code relies on this to be 64 bits! */
+#endif
+	DCfield fields[];
+};
+
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* DYNCALL_H */
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/dyncall/dyncall_aggregate_x64.c	Thu Apr 21 13:35:47 2022 +0200
@@ -0,0 +1,195 @@
+/*
+
+ Package: dyncall
+ Library: dyncall
+ File: dyncall/dyncall_aggregate_x64.c
+ Description:
+ License:
+
+   Copyright (c) 2021-2022 Tassilo Philipp <tphilipp@potion-studios.com>
+
+   Permission to use, copy, modify, and distribute this software for any
+   purpose with or without fee is hereby granted, provided that the above
+   copyright notice and this permission notice appear in all copies.
+
+   THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+   WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+   MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+   ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+*/
+
+
+#if defined(DC_UNIX)
+
+#define DC_ONE_8BYTE      8
+#define DC_TWO_8BYTES   2*8
+#define DC_EIGHT_8BYTES 8*8
+
+/* helper - long long mask with each byte being X */
+#define LLBYTE(X) ((X)&0xFFULL)
+#define SYSVC_CHECK_ALL_CLASSES(X) ((LLBYTE(X)<<56)|(LLBYTE(X)<<48)|(LLBYTE(X)<<40)|(LLBYTE(X)<<32)|(LLBYTE(X)<<24)|(LLBYTE(X)<<16)|(LLBYTE(X)<<8)|LLBYTE(X))
+
+
+static DCuchar dc_get_sysv_class_for_8byte(const DCaggr *ag, int index, int base_offset)
+{
+  int qword_offset = index * DC_ONE_8BYTE, i;
+  DCuchar clz = SYSVC_NONE;
+
+  for(i = 0; i < ag->n_fields; i++) {
+    const DCfield *f = ag->fields + i;
+    DCsize offset = base_offset + f->offset;
+
+    /* field outside of qword at index? */
+    if(offset >= (qword_offset + DC_ONE_8BYTE) || (offset + f->size * f->array_len) <= qword_offset)
+      continue;
+
+    DCuchar new_class = SYSVC_NONE;
+
+    switch (f->type) {
+      case DC_SIGCHAR_BOOL:
+      case DC_SIGCHAR_CHAR:
+      case DC_SIGCHAR_UCHAR:
+      case DC_SIGCHAR_SHORT:
+      case DC_SIGCHAR_USHORT:
+      case DC_SIGCHAR_INT:
+      case DC_SIGCHAR_UINT:
+      case DC_SIGCHAR_LONG:
+      case DC_SIGCHAR_ULONG:
+      case DC_SIGCHAR_LONGLONG:
+      case DC_SIGCHAR_ULONGLONG:
+      case DC_SIGCHAR_STRING:
+      case DC_SIGCHAR_POINTER:
+        new_class = SYSVC_INTEGER;
+        break;
+      case DC_SIGCHAR_FLOAT:
+      case DC_SIGCHAR_DOUBLE:
+        new_class = SYSVC_SSE;
+        break;
+      case DC_SIGCHAR_AGGREGATE:
+        new_class = dc_get_sysv_class_for_8byte(f->sub_aggr, index, offset);
+        break;
+      /*case DClongdouble, DCcomplexfloat DCcomplexdouble DCcomplexlongdouble etc... -> x87/x87up/complexx87 classes @@@AGGR implement */
+    }
+
+    if (clz == new_class)
+      continue;
+
+    if (clz == SYSVC_NONE)
+      clz = new_class;
+    else if (new_class == SYSVC_NONE)
+      continue;
+    else if (clz == SYSVC_MEMORY || new_class == SYSVC_MEMORY)
+      clz = SYSVC_MEMORY;
+    else if (clz == SYSVC_INTEGER || new_class == SYSVC_INTEGER)
+      clz = SYSVC_INTEGER;
+    /* @@@AGGR implement when implementing x87 types
+    else if ((clz & (SYSVC_X87|SYSVC_X87UP|SYSVC_COMPLEX_X87)) || (new_class & (SYSVC_X87|SYSVC_X87UP|SYSVC_COMPLEX_X87)))
+      clz = SYSVC_MEMORY;*/
+    else
+      clz = SYSVC_SSE;
+  }
+
+  return clz;
+}
+
+
+static void dc_get_sysv_classes_for_aggr(const DCaggr *ag, DCuchar *classes)
+{
+  int i;
+
+#if 1 /* this is the optimized version that only respects types supported by dyncall */
+
+  if(ag->size > DC_TWO_8BYTES) { /* @@@AGGR not checking if a field is unaligned */
+    classes[0] = SYSVC_MEMORY;
+    return;
+  }
+
+  /* abi doc: "If one of the classes is MEMORY, the whole argument is passed in memory." */
+  classes[0] = dc_get_sysv_class_for_8byte(ag, 0, 0);
+  if(classes[0] != SYSVC_MEMORY) {
+    classes[1] = dc_get_sysv_class_for_8byte(ag, 1, 0);
+    if(classes[1] == SYSVC_MEMORY)
+      classes[0] = SYSVC_MEMORY;
+    else
+      classes[2] = SYSVC_NONE;
+  }
+  /* @@@AGGR what would happen with alignment-enforced padding >= 8? Then no field would cover the eightbyte @@@test */
+
+#else /* this would be the version following the ABI more closely, to be implemented fully or partly when those types get supported by dyncall */
+
+  /* abi doc: "If the size of an object is larger than eight qwords, or it
+   *           contains unaligned fields, it has class MEMORY."
+   * note:     ABI specs <v1.0 (2018) specify "four qwords", instead (b/c _m512 was added, later) */ /* @@@AGGR not checking if a field is unaligned */
+  if(ag->size > DC_EIGHT_8BYTES) {
+    classes[0] = SYSVC_MEMORY;
+    return;
+  }
+
+  /* classify fields according to each of max 8 qwords */
+  for(i = 0; i < DC_SYSV_MAX_NUM_CLASSES; ++i) {
+    classes[i] = dc_get_sysv_class_for_8byte(ag, i, 0);
+
+    /* abi doc: "If one of the classes is MEMORY, the whole argument is passed in memory." */
+    if(classes[i] == SYSVC_MEMORY) {
+      classes[0] = SYSVC_MEMORY;
+      return;
+    }
+
+    /* stop eightbyte classification on first SYSVC_NONE returned */
+    /* @@@AGGR what would happen with alignment-enforced padding >= 8? Then no field would cover the eightbyte @@@test */
+    if(classes[i] == SYSVC_NONE)
+      break;
+  }
+
+  /* Do post merger cleanup */
+
+  /* abi doc: "If X87UP is not preceded by X87, the whole argument is passed in memory." */
+  for(i = 1; i < DC_SYSV_MAX_NUM_CLASSES; ++i) {
+    if (classes[i-1] == SYSVC_X87 && classes[i] != SYSVC_X87UP) {
+      classes[0] = SYSVC_MEMORY;
+      return;
+    }
+  }
+
+  /* abi doc: "If the size of the aggregate exceeds two qwords and the first eightbyte isn't
+   *           SSE or any other eightbyte isn't SSEUP, the whole argument is passed in memory." */
+  if(ag->size > DC_TWO_8BYTES) {
+    DClonglong mask = SYSVC_CHECK_ALL_CLASSES(SYSVC_SSEUP|SYSVC_NONE) ^ (LLBYTE(SYSVC_SSE|SYSVC_SSEUP|SYSVC_NONE)<<56);
+    if((*(DClonglong*)ag->sysv_classes & mask) != *(DClonglong*)ag->sysv_classes) {
+      classes[0] = SYSVC_MEMORY;
+      return;
+    }
+  }
+
+  /* abi doc: "If SSEUP is not preceded by SSE or SSEUP, it is converted to SSE." */
+  for(i = 1; i < DC_SYSV_MAX_NUM_CLASSES; ++i) {
+    DCuchar clz = classes[i];
+    if(classes[i] == SYSVC_SSEUP && !(classes[i-1] & (SYSVC_SSE|SYSVC_SSEUP)))
+      classes[i] = SYSVC_SSE;
+  }
+
+#endif
+}
+
+
+void dcFinishAggr(DCaggr *ag)
+{
+  dc_get_sysv_classes_for_aggr(ag, ag->sysv_classes);
+
+  /* @@@AGGR implement when implementing x87 types
+  for(i=0; ag->sysv_classes[i] && i<DC_SYSV_MAX_NUM_CLASSES; ++i)
+    assert((ag->sysv_classes[i] & (SYSVC_MEMORY|SYSVC_INTEGER|SYSVC_SSE)) && "Unsupported System V class detected in struct");*/
+}
+
+#else
+
+void dcFinishAggr(DCaggr *ag)
+{
+}
+
+#endif
+
--- a/dyncall/dyncall_api.c	Sat Apr 16 15:00:58 2022 +0200
+++ b/dyncall/dyncall_api.c	Thu Apr 21 13:35:47 2022 +0200
@@ -88,9 +88,9 @@
   vm->mVTpointer->argPointer(vm, x);
 }
 
-void dcArgStruct(DCCallVM* vm, DCstruct* s, DCpointer x)
+void dcArgAggr(DCCallVM* vm, const DCaggr* ag, const void* x)
 {
-  vm->mVTpointer->argStruct(vm, s, x);
+  vm->mVTpointer->argAggr(vm, ag, x);
 }
 
 
@@ -144,9 +144,17 @@
   return vm->mVTpointer->callPointer(vm, funcptr);
 }
 
-void dcCallStruct(DCCallVM* vm, DCpointer funcptr, DCstruct* s, DCpointer x)
+void dcBeginCallAggr(DCCallVM* vm, const DCaggr* ag)
 {
-  vm->mVTpointer->callStruct(vm, funcptr, s, x);
+  if (vm->mVTpointer->beginAggr) {
+    vm->mVTpointer->beginAggr(vm, ag);
+  }
+}
+
+DCpointer dcCallAggr(DCCallVM* vm, DCpointer funcptr, const DCaggr* ag, DCpointer ret)
+{
+  vm->mVTpointer->callAggr(vm, funcptr, ag, ret);
+  return ret;
 }
 
 DCint dcGetError(DCCallVM *vm)
--- a/dyncall/dyncall_call_x64.S	Sat Apr 16 15:00:58 2022 +0200
+++ b/dyncall/dyncall_call_x64.S	Thu Apr 21 13:35:47 2022 +0200
@@ -84,6 +84,22 @@
 	RET()
 END_PROC(dcCall_x64_sysv)
 
+/* wrapper for dcCall_x64_sysv to grab 4 regs used to return (small) aggregate by value */
+GLOBAL(dcCall_x64_sysv_aggr)
+BEGIN_PROC(dcCall_x64_sysv_aggr)
+	PUSH(R9)			/* preserve ptr to copy retval regs to (also realigns stack) */
+	CALL(CSYM(dcCall_x64_sysv))	/* params (in regs) passed-through to next call, as-is */
+	POP(R9)				/* get ptr to retval regs back */
+
+	/* copy regs holding aggregate data to provided space (pointed to by r12) */
+	MOV(RAX, QWORD(R9,0))
+	MOV(RDX, QWORD(R9,8))
+	MOVSD(XMM0, QWORD(R9,16))
+	MOVSD(XMM1, QWORD(R9,24))
+
+	RET()
+END_PROC(dcCall_x64_sysv_aggr)
+
 /*---------------------------------------------------------------------------
 
   Call Kernel for x64 Win64
@@ -96,14 +112,19 @@
 
 */
 
-GLOBAL(dcCall_x64_win64)
-BEGIN_PROC(dcCall_x64_win64)
+GLOBAL_FRAME(dcCall_x64_win64)
+FRAME_BEGIN_PROC(dcCall_x64_win64)
 
 	PUSH(RBP)			/* Pseudo-prolog - preserve RBP. */
+	FRAME_PUSH_REG(RBP)
 	PUSH(RSI)			/* Preserve RSI and RDI. */
+	FRAME_PUSH_REG(RSI)
 	PUSH(RDI)
+	FRAME_PUSH_REG(RDI)
 
 	MOV(RSP,RBP)			/* Store stack pointer in RBP. */
+	FRAME_SET(0, RBP)
+	FRAME_ENDPROLOG()
 
 	ADD(LIT(15),RCX)		/* Align stack size to 16 bytes. */
 	AND(LIT(-16),RCX)
@@ -143,6 +164,18 @@
 
 END_PROC(dcCall_x64_win64)
 
+GLOBAL(dcCall_x64_win64_aggr)
+BEGIN_PROC(dcCall_x64_win64_aggr)
+	SUB(LIT(8), RSP)		/* Re-align the stack */
+	CALL(CSYM(dcCall_x64_win64))	/* params (in regs) passed-through to next call, as-is */
+	ADD(LIT(8), RSP)		/* Restore the stack pointer */
+
+	MOV(QWORD(RSP, 40), R8)		/* ptr to aggregate mem -> R8 (passed as only stack arg, 0x40 to skip ret addr and spill area */
+	MOV(RAX, QWORD(R8, 0))		/* Copy aggregate value to memory */
+
+	RET()
+END_PROC(dcCall_x64_win64_aggr)
+
 /*---------------------------------------------------------------------------
 
   Call Kernel for x64 System V syscalls
@@ -170,3 +203,5 @@
 
 END_ASM
 
+/* vim: set ts=8: */
+
--- a/dyncall/dyncall_call_x64_generic_masm.asm	Sat Apr 16 15:00:58 2022 +0200
+++ b/dyncall/dyncall_call_x64_generic_masm.asm	Thu Apr 21 13:35:47 2022 +0200
@@ -34,12 +34,28 @@
  pop RBP
  ret
 dcCall_x64_sysv ENDP
-dcCall_x64_win64 PROC
+dcCall_x64_sysv_aggr PROC
 OPTION PROLOGUE:NONE, EPILOGUE:NONE
+ push R9
+ call dcCall_x64_sysv
+ pop R9
+ mov qword ptr [R9+0],RAX
+ mov qword ptr [R9+8],RDX
+ movsd qword ptr [R9+16],XMM0
+ movsd qword ptr [R9+24],XMM1
+ ret
+dcCall_x64_sysv_aggr ENDP
+dcCall_x64_win64 PROC FRAME
+OPTION EPILOGUE:NONE
  push RBP
+ .pushreg RBP
  push RSI
+ .pushreg RSI
  push RDI
+ .pushreg RDI
  mov RBP,RSP
+ .setframe RBP, 0
+ .endprolog
  add RCX,15
  and RCX,-16
  sub RSP,RCX
@@ -66,6 +82,15 @@
  pop RBP
  ret
 dcCall_x64_win64 ENDP
+dcCall_x64_win64_aggr PROC
+OPTION PROLOGUE:NONE, EPILOGUE:NONE
+ sub RSP,8
+ call dcCall_x64_win64
+ add RSP,8
+ mov R8,qword ptr [RSP+40]
+ mov qword ptr [R8+0],RAX
+ ret
+dcCall_x64_win64_aggr ENDP
 dcCall_x64_syscall_sysv PROC
 OPTION PROLOGUE:NONE, EPILOGUE:NONE
  mov RAX,RSI
--- a/dyncall/dyncall_callf.c	Sat Apr 16 15:00:58 2022 +0200
+++ b/dyncall/dyncall_callf.c	Thu Apr 21 13:35:47 2022 +0200
@@ -6,7 +6,7 @@
  Description: formatted call C interface (extension module)
  License:
 
-   Copyright (c) 2007-2018 Daniel Adler <dadler@uni-goettingen.de>, 
+   Copyright (c) 2007-2022 Daniel Adler <dadler@uni-goettingen.de>, 
                            Tassilo Philipp <tphilipp@potion-studios.com>
 
    Permission to use, copy, modify, and distribute this software for any
@@ -28,13 +28,24 @@
 #include "dyncall_callf.h"
 
 
+static void handle_mode(DCCallVM* vm, const DCsigchar** sigptr)
+{
+  if(*((*sigptr)+1) != '\0') {
+    DCint mode = dcGetModeFromCCSigChar(*(*sigptr)++);
+    if(mode != DC_ERROR_UNSUPPORTED_MODE)
+      dcMode(vm, mode);
+  }
+}
+
+
 /* Shareable implementation for argument binding used in ArgF and CallF below. */
 static void dcArgF_impl(DCCallVM* vm, const DCsigchar** sigptr, va_list args)
 {
   DCsigchar ch;
-  dcReset(vm);
   while((ch=*(*sigptr)++) != '\0' && ch != DC_SIGCHAR_ENDARG) {
     switch(ch) {
+      /* calling convention modes */
+      case DC_SIGCHAR_CC_PREFIX: handle_mode(vm, sigptr); break;
       /* types */
       case DC_SIGCHAR_BOOL:      dcArgBool    (vm, (DCbool)           va_arg(args, DCint     )); break;
       case DC_SIGCHAR_CHAR:      dcArgChar    (vm, (DCchar)           va_arg(args, DCint     )); break;
@@ -51,14 +62,12 @@
       case DC_SIGCHAR_DOUBLE:    dcArgDouble  (vm, (DCdouble)         va_arg(args, DCdouble  )); break;
       case DC_SIGCHAR_POINTER:   dcArgPointer (vm, (DCpointer)        va_arg(args, DCpointer )); break;
       case DC_SIGCHAR_STRING:    dcArgPointer (vm, (DCpointer)        va_arg(args, DCpointer )); break;
-      /* calling convention modes */
-      case DC_SIGCHAR_CC_PREFIX:
-        if(*((*sigptr)+1) != '\0') {
-          DCint mode = dcGetModeFromCCSigChar(*(*sigptr)++);
-          if(mode != DC_ERROR_UNSUPPORTED_MODE)
-            dcMode(vm, mode);
-        }
+      case DC_SIGCHAR_AGGREGATE: {
+        /* aggregates expect 2 va args, a DCaggr*, then a ptr to the aggregate */
+        DCaggr* ag = va_arg(args, DCaggr*);
+        dcArgAggr(vm, ag, va_arg(args, DCpointer));
         break;
+      }
     }
   }
 }
@@ -76,11 +85,66 @@
   va_end(va);
 }
 
+
+/* msvc introduced C99'w va_copy() late (in 2013 w/ msvc 18.00); plan9 APE does
+ * not have it either; luckily given their va_list being only a ptr in both
+ * cases, work around the issue for older versions */
+#if (defined(DC__C_MSVC) || defined(DC__OS_Plan9)) && !defined(va_copy)
+	#define va_copy(dst, src) ((dst)=(src))
+#endif
+
+
 void dcVCallF(DCCallVM* vm, DCValue* result, DCpointer funcptr, const DCsigchar* signature, va_list args)
 {
+  DCaggr* ret_ag = NULL; /* only needed for when func returns an aggregate      */
   const DCsigchar* ptr = signature;
+
+  /* need preparatory call if return type is an aggregate, so check end of sig */
+  /* @@@ugly */
+  while(*ptr && ptr[1]) ++ptr;
+  if(*ptr == DC_SIGCHAR_AGGREGATE) {
+    va_list args_;
+    va_copy(args_, args);
+
+    /* iterate va_list to get return type related args*/
+    ptr = signature;
+    while(*ptr) {
+      switch(*ptr++) {
+        /* calling convention modes */
+        case DC_SIGCHAR_CC_PREFIX: handle_mode(vm, &ptr); break; /* needs handling before dcBeginCallAggr */
+        /* types */
+        case DC_SIGCHAR_BOOL:
+        case DC_SIGCHAR_CHAR:
+        case DC_SIGCHAR_UCHAR:
+        case DC_SIGCHAR_SHORT:
+        case DC_SIGCHAR_USHORT:
+        case DC_SIGCHAR_INT:
+        case DC_SIGCHAR_UINT:      va_arg(args_, DCint     ); break;
+        case DC_SIGCHAR_LONG:
+        case DC_SIGCHAR_ULONG:     va_arg(args_, DClong    ); break;
+        case DC_SIGCHAR_LONGLONG:
+        case DC_SIGCHAR_ULONGLONG: va_arg(args_, DClonglong); break;
+        case DC_SIGCHAR_FLOAT:
+        case DC_SIGCHAR_DOUBLE:    va_arg(args_, DCdouble  ); break;
+        case DC_SIGCHAR_POINTER:
+        case DC_SIGCHAR_STRING:    va_arg(args_, DCpointer ); break;
+        case DC_SIGCHAR_AGGREGATE:
+          /* aggregate as retval expects 2 more va args, a DCaggr*, then a ptr to the aggregate */
+          ret_ag = va_arg(args_, DCaggr*);
+          result->p = va_arg(args_, DCpointer);
+          break;
+      }
+    }
+    dcBeginCallAggr(vm, ret_ag);
+
+	va_end(args_);
+  }
+
+  /* push args */
+  ptr = signature;
   dcArgF_impl(vm, &ptr, args);
 
+  /* call */
   switch(*ptr) {
     case DC_SIGCHAR_VOID:                   dcCallVoid             (vm,funcptr); break;
     case DC_SIGCHAR_BOOL:       result->B = dcCallBool             (vm,funcptr); break;
@@ -98,6 +162,10 @@
     case DC_SIGCHAR_DOUBLE:     result->d = dcCallDouble           (vm,funcptr); break;
     case DC_SIGCHAR_POINTER:    result->p = dcCallPointer          (vm,funcptr); break;
     case DC_SIGCHAR_STRING:     result->Z = (DCstring)dcCallPointer(vm,funcptr); break;
+    case DC_SIGCHAR_AGGREGATE: {
+      result->p = dcCallAggr(vm, funcptr, ret_ag, result->p);
+      break;
+    }
   }
 }
 
--- a/dyncall/dyncall_callvm.h	Sat Apr 16 15:00:58 2022 +0200
+++ b/dyncall/dyncall_callvm.h	Thu Apr 21 13:35:47 2022 +0200
@@ -6,7 +6,7 @@
  Description: Common call vm binary interface.
  License:
 
-   Copyright (c) 2007-2018 Daniel Adler <dadler@uni-goettingen.de>, 
+   Copyright (c) 2007-2022 Daniel Adler <dadler@uni-goettingen.de>, 
                            Tassilo Philipp <tphilipp@potion-studios.com>
 
    Permission to use, copy, modify, and distribute this software for any
@@ -46,40 +46,43 @@
 {
   void         (*free)           (DCCallVM* vm);
   void         (*reset)          (DCCallVM* vm);
-  void         (*mode)           (DCCallVM* vm,DCint        mode);
-  void         (*argBool)        (DCCallVM* vm,DCbool          B);
-  void         (*argChar)        (DCCallVM* vm,DCchar          c);
-  void         (*argShort)       (DCCallVM* vm,DCshort         s);
-  void         (*argInt)         (DCCallVM* vm,DCint           i);
-  void         (*argLong)        (DCCallVM* vm,DClong          l);
-  void         (*argLongLong)    (DCCallVM* vm,DClonglong      L);
-  void         (*argFloat)       (DCCallVM* vm,DCfloat         f);
-  void         (*argDouble)      (DCCallVM* vm,DCdouble        d);
-  void         (*argPointer)     (DCCallVM* vm,DCpointer       p);
-  void         (*argStruct)      (DCCallVM* vm,DCstruct* s, DCpointer p);
-  void         (*callVoid)       (DCCallVM* vm,DCpointer funcptr);
-  DCbool       (*callBool)       (DCCallVM* vm,DCpointer funcptr);
-  DCchar       (*callChar)       (DCCallVM* vm,DCpointer funcptr);
-  DCshort      (*callShort)      (DCCallVM* vm,DCpointer funcptr);
-  DCint        (*callInt)        (DCCallVM* vm,DCpointer funcptr);
-  DClong       (*callLong)       (DCCallVM* vm,DCpointer funcptr);
-  DClonglong   (*callLongLong)   (DCCallVM* vm,DCpointer funcptr);
-  DCfloat      (*callFloat)      (DCCallVM* vm,DCpointer funcptr);
-  DCdouble     (*callDouble)     (DCCallVM* vm,DCpointer funcptr);
-  DCpointer    (*callPointer)    (DCCallVM* vm,DCpointer funcptr);
-  void         (*callStruct)     (DCCallVM* vm,DCpointer funcptr,DCstruct* s, DCpointer returnValue);
+  void         (*mode)           (DCCallVM* vm, DCint mode);
+  void         (*argBool)        (DCCallVM* vm, DCbool     B);
+  void         (*argChar)        (DCCallVM* vm, DCchar     c);
+  void         (*argShort)       (DCCallVM* vm, DCshort    s);
+  void         (*argInt)         (DCCallVM* vm, DCint      i);
+  void         (*argLong)        (DCCallVM* vm, DClong     l);
+  void         (*argLongLong)    (DCCallVM* vm, DClonglong L);
+  void         (*argFloat)       (DCCallVM* vm, DCfloat    f);
+  void         (*argDouble)      (DCCallVM* vm, DCdouble   d);
+  void         (*argPointer)     (DCCallVM* vm, DCpointer  p);
+  void         (*argAggr)        (DCCallVM* vm, const DCaggr* ag, const void* p);
+  void         (*callVoid)       (DCCallVM* vm, DCpointer funcptr);
+  DCbool       (*callBool)       (DCCallVM* vm, DCpointer funcptr);
+  DCchar       (*callChar)       (DCCallVM* vm, DCpointer funcptr);
+  DCshort      (*callShort)      (DCCallVM* vm, DCpointer funcptr);
+  DCint        (*callInt)        (DCCallVM* vm, DCpointer funcptr);
+  DClong       (*callLong)       (DCCallVM* vm, DCpointer funcptr);
+  DClonglong   (*callLongLong)   (DCCallVM* vm, DCpointer funcptr);
+  DCfloat      (*callFloat)      (DCCallVM* vm, DCpointer funcptr);
+  DCdouble     (*callDouble)     (DCCallVM* vm, DCpointer funcptr);
+  DCpointer    (*callPointer)    (DCCallVM* vm, DCpointer funcptr);
+  void         (*callAggr)       (DCCallVM* vm, DCpointer funcptr, const DCaggr* ag, DCpointer returnValue);
+  void         (*beginAggr)      (DCCallVM* vm, const DCaggr* ag);
 };
 
-typedef DCvoid       (DCvoidvmfunc)      (DCCallVM* vm,DCpointer funcptr); 
-typedef DCbool       (DCboolvmfunc)      (DCCallVM* vn,DCpointer funcptr);
-typedef DCchar       (DCcharvmfunc)      (DCCallVM* vm,DCpointer funcptr);
-typedef DCshort      (DCshortvmfunc)     (DCCallVM* vm,DCpointer funcptr);
-typedef DCint        (DCintvmfunc)       (DCCallVM* vm,DCpointer funcptr);
-typedef DClong       (DClongvmfunc)      (DCCallVM* vm,DCpointer funcptr);
-typedef DClonglong   (DClonglongvmfunc)  (DCCallVM* vm,DCpointer funcptr);
-typedef DCfloat      (DCfloatvmfunc)     (DCCallVM* vm,DCpointer funcptr);
-typedef DCdouble     (DCdoublevmfunc)    (DCCallVM* vm,DCpointer funcptr);
-typedef DCpointer    (DCpointervmfunc)   (DCCallVM* vm,DCpointer funcptr);
+typedef DCvoid       (DCvoidvmfunc)     (DCCallVM* vm, DCpointer funcptr); 
+typedef DCbool       (DCboolvmfunc)     (DCCallVM* vm, DCpointer funcptr);
+typedef DCchar       (DCcharvmfunc)     (DCCallVM* vm, DCpointer funcptr);
+typedef DCshort      (DCshortvmfunc)    (DCCallVM* vm, DCpointer funcptr);
+typedef DCint        (DCintvmfunc)      (DCCallVM* vm, DCpointer funcptr);
+typedef DClong       (DClongvmfunc)     (DCCallVM* vm, DCpointer funcptr);
+typedef DClonglong   (DClonglongvmfunc) (DCCallVM* vm, DCpointer funcptr);
+typedef DCfloat      (DCfloatvmfunc)    (DCCallVM* vm, DCpointer funcptr);
+typedef DCdouble     (DCdoublevmfunc)   (DCCallVM* vm, DCpointer funcptr);
+typedef DCpointer    (DCpointervmfunc)  (DCCallVM* vm, DCpointer funcptr);
+typedef DCvoid       (DCaggrvmfunc)     (DCCallVM* vm, DCpointer funcptr, const DCaggr* ag, DCpointer returnValue);
+typedef DCvoid       (DCbeginaggrvmfunc)(DCCallVM* vm, const DCaggr* ag);
 
 /* Common base functions for CallVM implementations. */
 
--- a/dyncall/dyncall_callvm_arm32_arm.c	Sat Apr 16 15:00:58 2022 +0200
+++ b/dyncall/dyncall_callvm_arm32_arm.c	Thu Apr 21 13:35:47 2022 +0200
@@ -172,7 +172,7 @@
 , &dc_callvm_argFloat_arm32_arm
 , &dc_callvm_argDouble_arm32_arm
 , &dc_callvm_argPointer_arm32_arm
-, NULL /* argStruct */
+, NULL /* argAggr */
 , (DCvoidvmfunc*)       &dc_callvm_call_arm32_arm
 , (DCboolvmfunc*)       &dc_callvm_call_arm32_arm
 , (DCcharvmfunc*)       &dc_callvm_call_arm32_arm
@@ -183,7 +183,8 @@
 , (DCfloatvmfunc*)      &dc_callvm_call_arm32_arm
 , (DCdoublevmfunc*)     &dc_callvm_call_arm32_arm
 , (DCpointervmfunc*)    &dc_callvm_call_arm32_arm
-, NULL /* callStruct */
+, NULL /* callAggr */
+, NULL /* beginAggr */
 };
 
 DCCallVM_vt gVT_arm32_arm_eabi =
@@ -200,7 +201,7 @@
 , &dc_callvm_argFloat_arm32_arm
 , &dc_callvm_argDouble_arm32_arm_eabi
 , &dc_callvm_argPointer_arm32_arm
-, NULL /* argStruct */
+, NULL /* argAggr */
 , (DCvoidvmfunc*)       &dc_callvm_call_arm32_arm
 , (DCboolvmfunc*)       &dc_callvm_call_arm32_arm
 , (DCcharvmfunc*)       &dc_callvm_call_arm32_arm
@@ -211,7 +212,8 @@
 , (DCfloatvmfunc*)      &dc_callvm_call_arm32_arm
 , (DCdoublevmfunc*)     &dc_callvm_call_arm32_arm
 , (DCpointervmfunc*)    &dc_callvm_call_arm32_arm
-, NULL /* callStruct */
+, NULL /* callAggr */
+, NULL /* beginAggr */
 };
 
 static void dc_callvm_mode_arm32_arm(DCCallVM* in_self, DCint mode)
--- a/dyncall/dyncall_callvm_arm32_arm_armhf.c	Sat Apr 16 15:00:58 2022 +0200
+++ b/dyncall/dyncall_callvm_arm32_arm_armhf.c	Thu Apr 21 13:35:47 2022 +0200
@@ -162,7 +162,7 @@
 , &a_float
 , &a_double
 , &a_pointer
-, NULL /* argStruct */
+, NULL /* argAggr */
 , (DCvoidvmfunc*)       &call
 , (DCboolvmfunc*)       &call
 , (DCcharvmfunc*)       &call
@@ -173,7 +173,8 @@
 , (DCfloatvmfunc*)      &call
 , (DCdoublevmfunc*)     &call
 , (DCpointervmfunc*)    &call
-, NULL /* callStruct */
+, NULL /* callAggr */
+, NULL /* beginAggr */
 };
 
 DCCallVM_vt vt_armhf_ellipsis =
@@ -190,7 +191,7 @@
 , &a_float_ellipsis
 , &a_double_ellipsis
 , &a_pointer
-, NULL /* argStruct */
+, NULL /* argAggr */
 , (DCvoidvmfunc*)       &call
 , (DCboolvmfunc*)       &call
 , (DCcharvmfunc*)       &call
@@ -201,7 +202,8 @@
 , (DCfloatvmfunc*)      &call
 , (DCdoublevmfunc*)     &call
 , (DCpointervmfunc*)    &call
-, NULL /* callStruct */
+, NULL /* callAggr */
+, NULL /* beginAggr */
 };
 
 static void mode(DCCallVM* in_self, DCint mode)
--- a/dyncall/dyncall_callvm_arm32_thumb.c	Sat Apr 16 15:00:58 2022 +0200
+++ b/dyncall/dyncall_callvm_arm32_thumb.c	Thu Apr 21 13:35:47 2022 +0200
@@ -171,7 +171,7 @@
 , &dc_callvm_argFloat_arm32_thumb
 , &dc_callvm_argDouble_arm32_thumb
 , &dc_callvm_argPointer_arm32_thumb
-, NULL /* argStruct */
+, NULL /* argAggr */
 , (DCvoidvmfunc*)       &dc_callvm_call_arm32_thumb
 , (DCboolvmfunc*)       &dc_callvm_call_arm32_thumb
 , (DCcharvmfunc*)       &dc_callvm_call_arm32_thumb
@@ -182,7 +182,8 @@
 , (DCfloatvmfunc*)      &dc_callvm_call_arm32_thumb
 , (DCdoublevmfunc*)     &dc_callvm_call_arm32_thumb
 , (DCpointervmfunc*)    &dc_callvm_call_arm32_thumb
-, NULL /* callStruct */
+, NULL /* callAggr */
+, NULL /* beginAggr */
 };
 
 DCCallVM_vt gVT_arm32_thumb_eabi =
@@ -199,7 +200,7 @@
 , &dc_callvm_argFloat_arm32_thumb
 , &dc_callvm_argDouble_arm32_thumb_eabi
 , &dc_callvm_argPointer_arm32_thumb
-, NULL /* argStruct */
+, NULL /* argAggr */
 , (DCvoidvmfunc*)       &dc_callvm_call_arm32_thumb
 , (DCboolvmfunc*)       &dc_callvm_call_arm32_thumb
 , (DCcharvmfunc*)       &dc_callvm_call_arm32_thumb
@@ -210,7 +211,8 @@
 , (DCfloatvmfunc*)      &dc_callvm_call_arm32_thumb
 , (DCdoublevmfunc*)     &dc_callvm_call_arm32_thumb
 , (DCpointervmfunc*)    &dc_callvm_call_arm32_thumb
-, NULL /* callStruct */
+, NULL /* callAggr */
+, NULL /* beginAggr */
 };
 
 static void dc_callvm_mode_arm32_thumb(DCCallVM* in_self, DCint mode)
--- a/dyncall/dyncall_callvm_arm64.c	Sat Apr 16 15:00:58 2022 +0200
+++ b/dyncall/dyncall_callvm_arm64.c	Thu Apr 21 13:35:47 2022 +0200
@@ -117,7 +117,7 @@
 , &a_float
 , &a_double
 , &a_pointer
-, NULL /* argStruct */
+, NULL /* argAggr */
 , (DCvoidvmfunc*)       &call
 , (DCboolvmfunc*)       &call
 , (DCcharvmfunc*)       &call
@@ -128,7 +128,8 @@
 , (DCfloatvmfunc*)      &call
 , (DCdoublevmfunc*)     &call
 , (DCpointervmfunc*)    &call
-, NULL /* callStruct */
+, NULL /* callAggr */
+, NULL /* beginAggr */
 };
 
 #ifdef DC__OS_Win64
@@ -153,7 +154,7 @@
 , &var_float
 , &var_double
 , &a_pointer
-, NULL /* argStruct */
+, NULL /* argAggr */
 , (DCvoidvmfunc*)       &call
 , (DCboolvmfunc*)       &call
 , (DCcharvmfunc*)       &call
@@ -164,7 +165,8 @@
 , (DCfloatvmfunc*)      &call
 , (DCdoublevmfunc*)     &call
 , (DCpointervmfunc*)    &call
-, NULL /* callStruct */
+, NULL /* callAggr */
+, NULL /* beginAggr */
 };
 #endif
 
--- a/dyncall/dyncall_callvm_arm64_apple.c	Sat Apr 16 15:00:58 2022 +0200
+++ b/dyncall/dyncall_callvm_arm64_apple.c	Thu Apr 21 13:35:47 2022 +0200
@@ -190,7 +190,7 @@
 , &a_float
 , &a_double
 , &a_pointer
-, NULL /* argStruct */
+, NULL /* argAggr */
 , (DCvoidvmfunc*)       &call
 , (DCboolvmfunc*)       &call
 , (DCcharvmfunc*)       &call
@@ -201,7 +201,8 @@
 , (DCfloatvmfunc*)      &call
 , (DCdoublevmfunc*)     &call
 , (DCpointervmfunc*)    &call
-, NULL /* callStruct */
+, NULL /* callAggr */
+, NULL /* beginAggr */
 };
 
 DCCallVM_vt vt_arm64_variadic =
@@ -218,7 +219,7 @@
 , &var_float
 , &var_double
 , &var_pointer
-, NULL /* argStruct */
+, NULL /* argAggr */
 , (DCvoidvmfunc*)       &call
 , (DCboolvmfunc*)       &call
 , (DCcharvmfunc*)       &call
@@ -229,7 +230,8 @@
 , (DCfloatvmfunc*)      &call
 , (DCdoublevmfunc*)     &call
 , (DCpointervmfunc*)    &call
-, NULL /* callStruct */
+, NULL /* callAggr */
+, NULL /* beginAggr */
 };
 
 static void mode(DCCallVM* in_self, DCint mode)
--- a/dyncall/dyncall_callvm_mips_eabi.c	Sat Apr 16 15:00:58 2022 +0200
+++ b/dyncall/dyncall_callvm_mips_eabi.c	Thu Apr 21 13:35:47 2022 +0200
@@ -139,7 +139,7 @@
 , &dc_callvm_argFloat_mips_eabi
 , &dc_callvm_argDouble_mips_eabi
 , &dc_callvm_argPointer_mips_eabi
-, NULL /* argStruct */
+, NULL /* argAggr */
 , (DCvoidvmfunc*)       &dc_callvm_call_mips_eabi
 , (DCboolvmfunc*)       &dc_callvm_call_mips_eabi
 , (DCcharvmfunc*)       &dc_callvm_call_mips_eabi
@@ -150,7 +150,8 @@
 , (DCfloatvmfunc*)      &dc_callvm_call_mips_eabi
 , (DCdoublevmfunc*)     &dc_callvm_call_mips_eabi
 , (DCpointervmfunc*)    &dc_callvm_call_mips_eabi
-, NULL /* callStruct */
+, NULL /* callAggr */
+, NULL /* beginAggr */
 };
 
 /* mode: only a single mode available currently. */
--- a/dyncall/dyncall_callvm_mips_n32.c	Sat Apr 16 15:00:58 2022 +0200
+++ b/dyncall/dyncall_callvm_mips_n32.c	Thu Apr 21 13:35:47 2022 +0200
@@ -189,7 +189,7 @@
 , &dc_callvm_argFloat_mips_n32
 , &dc_callvm_argDouble_mips_n32
 , &dc_callvm_argPointer_mips_n32
-, NULL /* argStruct */
+, NULL /* argAggr */
 , (DCvoidvmfunc*)       &dc_callvm_call_mips_n32
 , (DCboolvmfunc*)       &dc_callvm_call_mips_n32
 , (DCcharvmfunc*)       &dc_callvm_call_mips_n32
@@ -200,7 +200,8 @@
 , (DCfloatvmfunc*)      &dc_callvm_call_mips_n32
 , (DCdoublevmfunc*)     &dc_callvm_call_mips_n32
 , (DCpointervmfunc*)    &dc_callvm_call_mips_n32
-, NULL /* callStruct */
+, NULL /* callAggr */
+, NULL /* beginAggr */
 };
 
 DCCallVM_vt gVT_mips_n32_ellipsis =
@@ -217,7 +218,7 @@
 , &dc_callvm_argFloat_mips_n32_ellipsis
 , &dc_callvm_argDouble_mips_n32_ellipsis
 , &dc_callvm_argPointer_mips_n32
-, NULL /* argStruct */
+, NULL /* argAggr */
 , (DCvoidvmfunc*)       &dc_callvm_call_mips_n32
 , (DCboolvmfunc*)       &dc_callvm_call_mips_n32
 , (DCcharvmfunc*)       &dc_callvm_call_mips_n32
@@ -228,7 +229,8 @@
 , (DCfloatvmfunc*)      &dc_callvm_call_mips_n32
 , (DCdoublevmfunc*)     &dc_callvm_call_mips_n32
 , (DCpointervmfunc*)    &dc_callvm_call_mips_n32
-, NULL /* callStruct */
+, NULL /* callAggr */
+, NULL /* beginAggr */
 };
 
 static void dc_callvm_mode_mips_n32(DCCallVM* in_self, DCint mode)
--- a/dyncall/dyncall_callvm_mips_n64.c	Sat Apr 16 15:00:58 2022 +0200
+++ b/dyncall/dyncall_callvm_mips_n64.c	Thu Apr 21 13:35:47 2022 +0200
@@ -206,7 +206,7 @@
 , &dc_callvm_argFloat_mips_n64
 , &dc_callvm_argDouble_mips_n64
 , &dc_callvm_argPointer_mips_n64
-, NULL /* argStruct */
+, NULL /* argAggr */
 , (DCvoidvmfunc*)       &dc_callvm_call_mips_n64
 , (DCboolvmfunc*)       &dc_callvm_call_mips_n64
 , (DCcharvmfunc*)       &dc_callvm_call_mips_n64
@@ -217,7 +217,8 @@
 , (DCfloatvmfunc*)      &dc_callvm_call_mips_n64
 , (DCdoublevmfunc*)     &dc_callvm_call_mips_n64
 , (DCpointervmfunc*)    &dc_callvm_call_mips_n64
-, NULL /* callStruct */
+, NULL /* callAggr */
+, NULL /* beginAggr */
 };
 
 DCCallVM_vt gVT_mips_n64_ellipsis =
@@ -234,7 +235,7 @@
 , &dc_callvm_argFloat_mips_n64_ellipsis
 , &dc_callvm_argDouble_mips_n64_ellipsis
 , &dc_callvm_argPointer_mips_n64
-, NULL /* argStruct */
+, NULL /* argAggr */
 , (DCvoidvmfunc*)       &dc_callvm_call_mips_n64
 , (DCboolvmfunc*)       &dc_callvm_call_mips_n64
 , (DCcharvmfunc*)       &dc_callvm_call_mips_n64
@@ -245,7 +246,8 @@
 , (DCfloatvmfunc*)      &dc_callvm_call_mips_n64
 , (DCdoublevmfunc*)     &dc_callvm_call_mips_n64
 , (DCpointervmfunc*)    &dc_callvm_call_mips_n64
-, NULL /* callStruct */
+, NULL /* callAggr */
+, NULL /* beginAggr */
 };
 
 static void dc_callvm_mode_mips_n64(DCCallVM* in_self, DCint mode)
--- a/dyncall/dyncall_callvm_mips_o32.c	Sat Apr 16 15:00:58 2022 +0200
+++ b/dyncall/dyncall_callvm_mips_o32.c	Thu Apr 21 13:35:47 2022 +0200
@@ -199,7 +199,7 @@
 , &dc_callvm_argFloat_mips_o32
 , &dc_callvm_argDouble_mips_o32
 , &dc_callvm_argPointer_mips_o32
-, NULL /* argStruct */
+, NULL /* argAggr */
 , (DCvoidvmfunc*)       &dc_callvm_call_mips_o32
 , (DCboolvmfunc*)       &dc_callvm_call_mips_o32
 , (DCcharvmfunc*)       &dc_callvm_call_mips_o32
@@ -210,7 +210,8 @@
 , (DCfloatvmfunc*)      &dc_callvm_call_mips_o32
 , (DCdoublevmfunc*)     &dc_callvm_call_mips_o32
 , (DCpointervmfunc*)    &dc_callvm_call_mips_o32
-, NULL /* callStruct */
+, NULL /* callAggr */
+, NULL /* beginAggr */
 };
 
 /* mode: only a single mode available currently. */
--- a/dyncall/dyncall_callvm_ppc32.c	Sat Apr 16 15:00:58 2022 +0200
+++ b/dyncall/dyncall_callvm_ppc32.c	Thu Apr 21 13:35:47 2022 +0200
@@ -309,7 +309,7 @@
 , &dc_callvm_argFloat_ppc32_darwin
 , &dc_callvm_argDouble_ppc32_darwin
 , &dc_callvm_argPointer_ppc32
-, NULL /* argStruct */
+, NULL /* argAggr */
 , (DCvoidvmfunc*)       &dc_callvm_call_ppc32_darwin
 , (DCboolvmfunc*)       &dc_callvm_call_ppc32_darwin
 , (DCcharvmfunc*)       &dc_callvm_call_ppc32_darwin
@@ -320,7 +320,8 @@
 , (DCfloatvmfunc*)      &dc_callvm_call_ppc32_darwin
 , (DCdoublevmfunc*)     &dc_callvm_call_ppc32_darwin
 , (DCpointervmfunc*)    &dc_callvm_call_ppc32_darwin
-, NULL /* callStruct */
+, NULL /* callAggr */
+, NULL /* beginAggr */
 };
 
 DCCallVM_vt gVT_ppc32_sysv =
@@ -337,7 +338,7 @@
 , &dc_callvm_argFloat_ppc32_sysv
 , &dc_callvm_argDouble_ppc32_sysv
 , &dc_callvm_argPointer_ppc32
-, NULL /* argStruct */
+, NULL /* argAggr */
 , (DCvoidvmfunc*)       &dc_callvm_call_ppc32_sysv
 , (DCboolvmfunc*)       &dc_callvm_call_ppc32_sysv
 , (DCcharvmfunc*)       &dc_callvm_call_ppc32_sysv
@@ -348,7 +349,8 @@
 , (DCfloatvmfunc*)      &dc_callvm_call_ppc32_sysv
 , (DCdoublevmfunc*)     &dc_callvm_call_ppc32_sysv
 , (DCpointervmfunc*)    &dc_callvm_call_ppc32_sysv
-, NULL /* callStruct */
+, NULL /* callAggr */
+, NULL /* beginAggr */
 };
 
 DCCallVM_vt gVT_ppc32_syscall =
@@ -365,7 +367,7 @@
 , &dc_callvm_argFloat_ppc32_sysv
 , &dc_callvm_argDouble_ppc32_sysv
 , &dc_callvm_argPointer_ppc32
-, NULL /* argStruct */
+, NULL /* argAggr */
 , (DCvoidvmfunc*)       &dc_callvm_call_ppc32_syscall
 , (DCboolvmfunc*)       &dc_callvm_call_ppc32_syscall
 , (DCcharvmfunc*)       &dc_callvm_call_ppc32_syscall
@@ -376,7 +378,8 @@
 , (DCfloatvmfunc*)      &dc_callvm_call_ppc32_syscall
 , (DCdoublevmfunc*)     &dc_callvm_call_ppc32_syscall
 , (DCpointervmfunc*)    &dc_callvm_call_ppc32_syscall
-, NULL /* callStruct */
+, NULL /* callAggr */
+, NULL /* beginAggr */
 };
 
 
--- a/dyncall/dyncall_callvm_ppc64.c	Sat Apr 16 15:00:58 2022 +0200
+++ b/dyncall/dyncall_callvm_ppc64.c	Thu Apr 21 13:35:47 2022 +0200
@@ -259,7 +259,7 @@
   if (size < 64) {
 	dcVecSkip(&self->mVecHead, 64-size);
   }
-
+ 
   dcCall_ppc64( target, &self->mRegData, dcVecSize(&self->mVecHead) , dcVecData(&self->mVecHead));
 }
 
@@ -285,7 +285,7 @@
 , &dc_callvm_argFloat_ppc64
 , &dc_callvm_argDouble_ppc64
 , &dc_callvm_argPointer_ppc64
-, NULL /* argStruct */
+, NULL /* argAggr */
 , (DCvoidvmfunc*)       &dc_callvm_call_ppc64
 , (DCboolvmfunc*)       &dc_callvm_call_ppc64
 , (DCcharvmfunc*)       &dc_callvm_call_ppc64
@@ -296,7 +296,8 @@
 , (DCfloatvmfunc*)      &dc_callvm_call_ppc64
 , (DCdoublevmfunc*)     &dc_callvm_call_ppc64
 , (DCpointervmfunc*)    &dc_callvm_call_ppc64
-, NULL /* callStruct */
+, NULL /* callAggr */
+, NULL /* beginAggr */
 };
 
 #if DC__ABI_PPC64_ELF_V == 2
@@ -314,7 +315,7 @@
 , &dc_callvm_argFloat_ppc64_ellipsis
 , &dc_callvm_argDouble_ppc64_ellipsis
 , &dc_callvm_argPointer_ppc64
-, NULL /* argStruct */
+, NULL /* argAggr */
 , (DCvoidvmfunc*)       &dc_callvm_call_ppc64
 , (DCboolvmfunc*)       &dc_callvm_call_ppc64
 , (DCcharvmfunc*)       &dc_callvm_call_ppc64
@@ -325,7 +326,8 @@
 , (DCfloatvmfunc*)      &dc_callvm_call_ppc64
 , (DCdoublevmfunc*)     &dc_callvm_call_ppc64
 , (DCpointervmfunc*)    &dc_callvm_call_ppc64
-, NULL /* callStruct */
+, NULL /* callAggr */
+, NULL /* beginAggr */
 };
 #endif
 
@@ -343,7 +345,7 @@
 , &dc_callvm_argFloat_ppc64
 , &dc_callvm_argDouble_ppc64
 , &dc_callvm_argPointer_ppc64
-, NULL /* argStruct */
+, NULL /* argAggr */
 , (DCvoidvmfunc*)       &dc_callvm_call_ppc64_syscall
 , (DCboolvmfunc*)       &dc_callvm_call_ppc64_syscall
 , (DCcharvmfunc*)       &dc_callvm_call_ppc64_syscall
@@ -354,7 +356,8 @@
 , (DCfloatvmfunc*)      &dc_callvm_call_ppc64_syscall
 , (DCdoublevmfunc*)     &dc_callvm_call_ppc64_syscall
 , (DCpointervmfunc*)    &dc_callvm_call_ppc64_syscall
-, NULL /* callStruct */
+, NULL /* callAggr */
+, NULL /* beginAggr */
 };
 
 void dc_callvm_mode_ppc64(DCCallVM* in_self, DCint mode)
--- a/dyncall/dyncall_callvm_sparc.c	Sat Apr 16 15:00:58 2022 +0200
+++ b/dyncall/dyncall_callvm_sparc.c	Thu Apr 21 13:35:47 2022 +0200
@@ -115,7 +115,7 @@
   &dc_callvm_argFloat_sparc,
   &dc_callvm_argDouble_sparc,
   &dc_callvm_argPointer_sparc,
-  NULL /* argStruct */,
+  NULL /* argAggr */,
   (DCvoidvmfunc*)       &dcCall_sparc,
   (DCboolvmfunc*)       &dcCall_sparc,
   (DCcharvmfunc*)       &dcCall_sparc,
@@ -126,7 +126,8 @@
   (DCfloatvmfunc*)      &dcCall_sparc,
   (DCdoublevmfunc*)     &dcCall_sparc,
   (DCpointervmfunc*)    &dcCall_sparc,
-  NULL /* callStruct */
+  NULL /* callAggr */,
+  NULL /* beginAggr */
 };
 
 /* mode: only a single mode available currently. */
--- a/dyncall/dyncall_callvm_sparc64.c	Sat Apr 16 15:00:58 2022 +0200
+++ b/dyncall/dyncall_callvm_sparc64.c	Thu Apr 21 13:35:47 2022 +0200
@@ -106,7 +106,7 @@
   &dc_callvm_argFloat_v9_ellipsis,
   &dc_callvm_argDouble_v9_ellipsis,
   &dc_callvm_argPointer_v9,
-  NULL /* argStruct */,
+  NULL /* argAggr */,
   (DCvoidvmfunc*)       &dcCall_v9,
   (DCboolvmfunc*)       &dcCall_v9,
   (DCcharvmfunc*)       &dcCall_v9,
@@ -117,7 +117,8 @@
   (DCfloatvmfunc*)      &dcCall_v9,
   (DCdoublevmfunc*)     &dcCall_v9,
   (DCpointervmfunc*)    &dcCall_v9,
-  NULL /* callStruct */
+  NULL /* callAggr */,
+  NULL /* beginAggr */
 };
 
 /* CallVM virtual table. */
@@ -135,7 +136,7 @@
   &dc_callvm_argFloat_v9,
   &dc_callvm_argDouble_v9,
   &dc_callvm_argPointer_v9,
-  NULL /* argStruct */,
+  NULL /* argAggr */,
   (DCvoidvmfunc*)       &dcCall_v9,
   (DCboolvmfunc*)       &dcCall_v9,
   (DCcharvmfunc*)       &dcCall_v9,
@@ -146,7 +147,8 @@
   (DCfloatvmfunc*)      &dcCall_v9,
   (DCdoublevmfunc*)     &dcCall_v9,
   (DCpointervmfunc*)    &dcCall_v9,
-  NULL /* callStruct */
+  NULL /* callAggr */,
+  NULL /* beginAggr */
 };
 
 /* mode: only a single mode available currently. */
--- a/dyncall/dyncall_callvm_x64.c	Sat Apr 16 15:00:58 2022 +0200
+++ b/dyncall/dyncall_callvm_x64.c	Thu Apr 21 13:35:47 2022 +0200
@@ -6,7 +6,7 @@
  Description: 
  License:
 
-   Copyright (c) 2007-2020 Daniel Adler <dadler@uni-goettingen.de>, 
+   Copyright (c) 2007-2022 Daniel Adler <dadler@uni-goettingen.de>, 
                            Tassilo Philipp <tphilipp@potion-studios.com>
 
    Permission to use, copy, modify, and distribute this software for any
@@ -31,7 +31,11 @@
 
 #include "dyncall_callvm_x64.h"
 #include "dyncall_alloc.h"
-#include "dyncall_struct.h"
+#include "dyncall_aggregate.h"
+
+#include <stdint.h>
+#include <string.h>
+#include <assert.h>
 
 
 /* 
@@ -41,9 +45,16 @@
 **
 */
 
-void dcCall_x64_sysv(DCsize stacksize, DCpointer stackdata, DCpointer regdata_i, DCpointer regdata_f, DCpointer target);
-void dcCall_x64_win64(DCsize stacksize, DCpointer stackdata, DCpointer regdata, DCpointer target);
-void dcCall_x64_syscall_sysv(DCpointer argdata, DCpointer target);
+#if defined(DC_UNIX)
+extern void dcCall_x64_sysv(DCsize stacksize, DCpointer stackdata, DCpointer regdata_i, DCpointer regdata_f, DCpointer target);
+extern void dcCall_x64_sysv_aggr(DCsize stacksize, DCpointer stackdata, DCpointer regdata_i, DCpointer regdata_f, DCpointer target, DCpointer ret_regs);
+#else
+extern void dcCall_x64_win64(DCsize stacksize, DCpointer stackdata, DCpointer regdata, DCpointer target);
+extern void dcCall_x64_win64_aggr(DCsize stacksize, DCpointer stackdata, DCpointer regdata, DCpointer target, DCpointer aggr_mem);
+#endif
+extern void dcCall_x64_syscall_sysv(DCpointer argdata, DCpointer target);
+
+
 
 
 static void dc_callvm_free_x64(DCCallVM* in_self)
@@ -57,13 +68,22 @@
   DCCallVM_x64* self = (DCCallVM_x64*)in_self;
   dcVecReset(&self->mVecHead);
   self->mRegCount.i = self->mRegCount.f = 0;
+  self->mAggrReturnReg = -1;
+#if defined(DC_WINDOWS)
+  self->mpAggrVecCopies = ((DCchar*)dcVecData(&self->mVecHead)) + self->mVecHead.mTotal;
+#endif
 }
 
 
+
+
 static void dc_callvm_argLongLong_x64(DCCallVM* in_self, DClonglong x)
 {
   /* A long long always has 64 bits on the supported x64 platforms (lp64 on unix and llp64 on windows). */
   DCCallVM_x64* self = (DCCallVM_x64*)in_self;
+
+  self->mRegCount.i += (self->mRegCount.i == self->mAggrReturnReg);
+
   if(self->mRegCount.i < numIntRegs)
     self->mRegData.i[self->mRegCount.i++] = x;
   else
@@ -101,10 +121,23 @@
 }
 
 
-static void dc_callvm_argFloat_x64(DCCallVM* in_self, DCfloat x)
+static void dc_callvm_argDouble_x64(DCCallVM* in_self, DCdouble x)
 {
   DCCallVM_x64* self = (DCCallVM_x64*)in_self;
 
+#if defined(DC_WINDOWS) 
+  self->mRegCount.f += (self->mRegCount.f == self->mAggrReturnReg);
+#endif
+
+  if(self->mRegCount.f < numFloatRegs)
+    self->mRegData.f[self->mRegCount.f++] = x;
+  else
+    dcVecAppend(&self->mVecHead, &x, sizeof(DCdouble));
+}
+
+
+static void dc_callvm_argFloat_x64(DCCallVM* in_self, DCfloat x)
+{
   /* Although not promoted to doubles, floats are stored with 64bits in this API.*/
   union {
     DCdouble d;
@@ -112,46 +145,89 @@
   } f;
   f.f = x;
 
-  if(self->mRegCount.f < numFloatRegs)
-    *(DCfloat*)&self->mRegData.f[self->mRegCount.f++] = x;
-  else
-    dcVecAppend(&self->mVecHead, &f.f, sizeof(DCdouble));
-}
-
-
-static void dc_callvm_argDouble_x64(DCCallVM* in_self, DCdouble x)
-{
-  DCCallVM_x64* self = (DCCallVM_x64*)in_self;
-  if(self->mRegCount.f < numFloatRegs)
-    self->mRegData.f[self->mRegCount.f++] = x;
-  else
-    dcVecAppend(&self->mVecHead, &x, sizeof(DCdouble));
+  dc_callvm_argDouble_x64(in_self, f.d);
 }
 
 
 static void dc_callvm_argPointer_x64(DCCallVM* in_self, DCpointer x)
 {
   DCCallVM_x64* self = (DCCallVM_x64*)in_self;
+
+  self->mRegCount.i += (self->mRegCount.i == self->mAggrReturnReg);
+
   if(self->mRegCount.i < numIntRegs)
     *(DCpointer*)&self->mRegData.i[self->mRegCount.i++] = x;
   else
     dcVecAppend(&self->mVecHead, &x, sizeof(DCpointer));
 }
 
-static void dc_callvm_argStruct_x64(DCCallVM* in_self, DCstruct* s, DCpointer x)
+
+static void dc_callvm_argAggr_x64(DCCallVM* in_self, const DCaggr* ag, const void* x)
 {
+  int i;
   DCCallVM_x64* self = (DCCallVM_x64*)in_self;
-  dcVecAppend(&self->mVecHead, x, s->size);
-  /*printf("dc_callvm_argStruct_x64 size = %d\n", (int)s->size);@@@*/
-  if (s->size <= 64)
-  	  dcArgStructUnroll(in_self, s, x);
-  /*else@@@*/
-  /*	  dcVecAppend(&self->mVecHead, &x, sizeof(DCpointer));@@@*/
+
+  if (!ag) {
+	/* non-trivial aggrs (C++) are passed via pointer (win and sysv callconv),
+	 * copy has to be provided by user, as dyncall cannot do such copies*/
+    dc_callvm_argPointer_x64(in_self, (DCpointer)x);
+    return;
+  }
+
+#if defined(DC_UNIX)
+  DCRegCount_x64 n_regs = { self->mRegCount.i, self->mRegCount.f };
+
+  if(ag->sysv_classes[0] != SYSVC_MEMORY) {
+    /* reclassify aggr w/ respect to remaining regs, might need to pass it all via the stack */
+    for(i=0; ag->sysv_classes[i] && i<DC_SYSV_MAX_NUM_CLASSES; ++i) {
+      DCuchar clz = ag->sysv_classes[i];
+      n_regs.i += (clz == SYSVC_INTEGER);
+      n_regs.f += (clz == SYSVC_SSE);
+      /* @@@AGGR implement when implementing x87 types */
+    }
+  }
+
+  if(ag->sysv_classes[0] == SYSVC_MEMORY || (n_regs.i > numIntRegs) || (n_regs.f > numFloatRegs))
+  {
+     dcVecAppend(&self->mVecHead, x, ag->size);
+     dcVecSkip(&self->mVecHead, (ag->size + (sizeof(DClonglong)-1) & -sizeof(DClonglong)) - ag->size); /* realign to qword */
+     return;
+  }
+
+  for(i=0; ag->sysv_classes[i] && i<DC_SYSV_MAX_NUM_CLASSES; ++i)
+  {
+    switch (ag->sysv_classes[i]) {
+      case SYSVC_INTEGER: dc_callvm_argLongLong_x64(in_self, ((DClonglong*)x)[i]); break;
+      case SYSVC_SSE:     dc_callvm_argDouble_x64  (in_self, ((DCdouble  *)x)[i]); break;
+      /* @@@AGGR implement when implementing x87 types */
+    }
+  }
+
+#else
+
+  switch (ag->size) {
+    case 1:  dc_callvm_argChar_x64    (in_self, *(DCchar    *)x); break;
+    case 2:  dc_callvm_argShort_x64   (in_self, *(DCshort   *)x); break;
+    case 4:  dc_callvm_argLong_x64    (in_self, *(DClong    *)x); break;
+    case 8:  dc_callvm_argLongLong_x64(in_self, *(DClonglong*)x); break;
+    default:
+      /* pass the aggr indirectly via hidden pointer; requires caller-made copy
+       * to mimic pass-by-value semantics (or a call that modifies the param
+       * would corrupt the source aggr)
+       * place those copies at the end of the param vector (aligned to 16b for
+       * this calling convention); it's a bit of a hack, but should be safe: in
+       * any case the vector has to be big enough to hold all params */
+      self->mpAggrVecCopies = (void*)((intptr_t)((DCchar*)self->mpAggrVecCopies - ag->size) & -16);
+      x = memcpy(self->mpAggrVecCopies, x, ag->size);
+      dc_callvm_argPointer_x64(in_self, (DCpointer)x);
+      break;
+  }
+#endif
 }
 
 
 /* Call. */
-void dc_callvm_call_x64(DCCallVM* in_self, DCpointer target)
+static void dc_callvm_call_x64(DCCallVM* in_self, DCpointer target)
 {
   DCCallVM_x64* self = (DCCallVM_x64*)in_self;
 #if defined(DC_UNIX)
@@ -170,6 +246,115 @@
 }
 
 
+static void dc_callvm_begin_aggr_x64(DCCallVM* in_self, const DCaggr *ag)
+{
+  DCCallVM_x64* self = (DCCallVM_x64*)in_self;
+
+  assert(self->mRegCount.i == 0 && self->mRegCount.f == 0 && "dc_callvm_begin_aggr_x64 should be called before any function arguments are declared");
+#if defined(DC_UNIX)
+  if (!ag || (ag->sysv_classes[0] == SYSVC_MEMORY)) {
+#else
+  if (!ag || ag->size > 8) {
+#endif 
+    /* pass pointer to aggregate as hidden first argument */
+    self->mAggrReturnReg = 0;
+  }
+}
+
+
+#if defined(DC_WINDOWS)
+static void dc_callvm_begin_aggr_x64_win64_this(DCCallVM* in_self, const DCaggr *ag)
+{
+  DCCallVM_x64* self = (DCCallVM_x64*)in_self;
+
+  assert(self->mRegCount.i == 0 && self->mRegCount.f == 0 && "dc_callvm_begin_aggr_x64_win64_this should be called before any function arguments are declared");
+
+  if (!ag || ag->size > 8) {
+    /* thiscall: this-ptr comes first, then pointer to aggregate as hidden (second) argument */
+    self->mAggrReturnReg = 1;
+  }
+}
+#endif
+
+
+static void dc_callvm_call_x64_aggr(DCCallVM* in_self, DCpointer target, const DCaggr *ag, DCpointer ret)
+{
+  DCCallVM_x64* self = (DCCallVM_x64*)in_self;
+
+#if defined(DC_UNIX)
+
+  if (self->mAggrReturnReg != -1) {
+    /* call regular dcCall_x64_sysv here, w/ pointer to the aggr in first arg */
+    self->mRegData.i[self->mAggrReturnReg] = (int64)ret;
+
+    dcCall_x64_sysv(
+      dcVecSize(&self->mVecHead),  /* rdi: Size of stack data.                           */
+      dcVecData(&self->mVecHead),  /* rsi: Pointer to stack arguments.                   */
+      self->mRegData.i,            /* rdx: Pointer to register arguments (ints on SysV). */
+      self->mRegData.f,            /* rcx: Pointer to floating point register arguments. */
+      target                       /* r8 */
+    );
+  } else {
+    int i;
+    DCchar ret_regs[32];           /* 4 qwords: 2 for ints, 2 for floats */
+    DCchar *ret_regs_i = ret_regs+0;
+    DCchar *ret_regs_f = ret_regs+16;
+    DCsize st_size = ag->size;
+    DCchar* dst = (char*)ret;
+    dcCall_x64_sysv_aggr(
+      dcVecSize(&self->mVecHead),  /* rdi: Size of stack data.                           */
+      dcVecData(&self->mVecHead),  /* rsi: Pointer to stack arguments.                   */
+      self->mRegData.i,            /* rdx: Pointer to register arguments (ints on SysV). */
+      self->mRegData.f,            /* rcx: Pointer to floating point register arguments. */
+      target,                      /* r8 */
+      ret_regs                     /* r9 */
+    );
+    /* reassemble aggr to be returned from reg data */
+    for(i=0; ag->sysv_classes[i] && i<DC_SYSV_MAX_NUM_CLASSES; ++i) {
+      DCchar** src;
+      int ll = 8;
+      switch(ag->sysv_classes[i]) {
+        case SYSVC_INTEGER:  src = &ret_regs_i; break;
+        case SYSVC_SSE:      src = &ret_regs_f; break;
+        /* @@@AGGR implement when implementing x87 types */
+      }
+      while(ll-- && st_size--)
+        *dst++ = *(*src)++;
+    }
+  }
+
+#else
+
+  if (self->mAggrReturnReg != -1) {
+    /* call regular dcCall_x64_sysv here, w/ pointer to the aggr in first arg */
+    self->mRegData.i[self->mAggrReturnReg] = (int64)ret;
+
+    dcCall_x64_win64(
+      dcVecSize(&self->mVecHead),  /* rcx: Size of stack data.           */
+      dcVecData(&self->mVecHead),  /* rdx: Pointer to stack arguments.   */
+      self->mRegData.i,            /* r8:  Pointer to register arguments */
+      target                       /* r9 */
+    );
+  } else {
+    DCchar ret_reg[8];             /* 1 qword */
+    DCsize st_size = ag->size;     /* guaranteed to be <= 8 */
+    DCchar* dst = (char*)ret;
+    DCchar* src = ret_reg;
+    dcCall_x64_win64_aggr(
+      dcVecSize(&self->mVecHead),  /* rcx: Size of stack data.           */
+      dcVecData(&self->mVecHead),  /* rdx: Pointer to stack arguments.   */
+      self->mRegData.i,            /* r8:  Pointer to register arguments */
+      target,                      /* r9 */
+      ret_reg                      /* stack */
+    );
+    while(st_size--)
+      *dst++ = *src++;
+  }
+
+#endif
+}
+
+
 static void dc_callvm_mode_x64(DCCallVM* in_self, DCint mode);
 
 DCCallVM_vt gVT_x64 =
@@ -186,24 +371,59 @@
 , &dc_callvm_argFloat_x64
 , &dc_callvm_argDouble_x64
 , &dc_callvm_argPointer_x64
-, &dc_callvm_argStruct_x64
-, (DCvoidvmfunc*)       &dc_callvm_call_x64
-, (DCboolvmfunc*)       &dc_callvm_call_x64
-, (DCcharvmfunc*)       &dc_callvm_call_x64
-, (DCshortvmfunc*)      &dc_callvm_call_x64
-, (DCintvmfunc*)        &dc_callvm_call_x64
-, (DClongvmfunc*)       &dc_callvm_call_x64
-, (DClonglongvmfunc*)   &dc_callvm_call_x64
-, (DCfloatvmfunc*)      &dc_callvm_call_x64
-, (DCdoublevmfunc*)     &dc_callvm_call_x64
-, (DCpointervmfunc*)    &dc_callvm_call_x64
-, NULL /* callStruct */
+, &dc_callvm_argAggr_x64
+, (DCvoidvmfunc*)     &dc_callvm_call_x64
+, (DCboolvmfunc*)     &dc_callvm_call_x64
+, (DCcharvmfunc*)     &dc_callvm_call_x64
+, (DCshortvmfunc*)    &dc_callvm_call_x64
+, (DCintvmfunc*)      &dc_callvm_call_x64
+, (DClongvmfunc*)     &dc_callvm_call_x64
+, (DClonglongvmfunc*) &dc_callvm_call_x64
+, (DCfloatvmfunc*)    &dc_callvm_call_x64
+, (DCdoublevmfunc*)   &dc_callvm_call_x64
+, (DCpointervmfunc*)  &dc_callvm_call_x64
+, (DCaggrvmfunc*)     &dc_callvm_call_x64_aggr
+, (DCbeginaggrvmfunc*)&dc_callvm_begin_aggr_x64
 };
 
 
+#if defined(DC_WINDOWS)
+/* --- win64 thiscalls ------------------------------------------------------------- */
+
+DCCallVM_vt gVT_x64_win64_this =
+{
+  &dc_callvm_free_x64
+, &dc_callvm_reset_x64
+, &dc_callvm_mode_x64
+, &dc_callvm_argBool_x64
+, &dc_callvm_argChar_x64
+, &dc_callvm_argShort_x64
+, &dc_callvm_argInt_x64
+, &dc_callvm_argLong_x64
+, &dc_callvm_argLongLong_x64
+, &dc_callvm_argFloat_x64
+, &dc_callvm_argDouble_x64
+, &dc_callvm_argPointer_x64
+, &dc_callvm_argAggr_x64
+, (DCvoidvmfunc*)     &dc_callvm_call_x64
+, (DCboolvmfunc*)     &dc_callvm_call_x64
+, (DCcharvmfunc*)     &dc_callvm_call_x64
+, (DCshortvmfunc*)    &dc_callvm_call_x64
+, (DCintvmfunc*)      &dc_callvm_call_x64
+, (DClongvmfunc*)     &dc_callvm_call_x64
+, (DClonglongvmfunc*) &dc_callvm_call_x64
+, (DCfloatvmfunc*)    &dc_callvm_call_x64
+, (DCdoublevmfunc*)   &dc_callvm_call_x64
+, (DCpointervmfunc*)  &dc_callvm_call_x64
+, (DCaggrvmfunc*)     &dc_callvm_call_x64_aggr
+, (DCbeginaggrvmfunc*)&dc_callvm_begin_aggr_x64_win64_this
+};
+
+#endif
+
 /* --- syscall ------------------------------------------------------------- */
 
-#include <assert.h>
+#if defined(DC_UNIX)
 void dc_callvm_call_x64_syscall_sysv(DCCallVM* in_self, DCpointer target)
 {
   DCCallVM_x64* self;
@@ -230,7 +450,7 @@
 , &dc_callvm_argFloat_x64
 , &dc_callvm_argDouble_x64
 , &dc_callvm_argPointer_x64
-, NULL /* argStruct */
+, NULL /* argAggr */
 , (DCvoidvmfunc*)       &dc_callvm_call_x64_syscall_sysv
 , (DCboolvmfunc*)       &dc_callvm_call_x64_syscall_sysv
 , (DCcharvmfunc*)       &dc_callvm_call_x64_syscall_sysv
@@ -241,8 +461,10 @@
 , (DCfloatvmfunc*)      &dc_callvm_call_x64_syscall_sysv
 , (DCdoublevmfunc*)     &dc_callvm_call_x64_syscall_sysv
 , (DCpointervmfunc*)    &dc_callvm_call_x64_syscall_sysv
-, NULL /* callStruct */
+, NULL /* callAggr */
+, NULL /* beginAggr */
 };
+#endif
 
 
 
@@ -255,9 +477,9 @@
 
   switch(mode) {
     case DC_CALL_C_DEFAULT:
-	case DC_CALL_C_DEFAULT_THIS:
 #if defined(DC_UNIX)
-    case DC_CALL_C_X64_SYSV:
+    case DC_CALL_C_DEFAULT_THIS:
+    case DC_CALL_C_X64_SYSV: /* = DC_CALL_C_X64_SYSV_THIS */
 #else
     case DC_CALL_C_X64_WIN64:
 #endif
@@ -265,6 +487,12 @@
     case DC_CALL_C_ELLIPSIS_VARARGS:
       vt = &gVT_x64;
       break;
+#if defined(DC_WINDOWS)
+    case DC_CALL_C_DEFAULT_THIS:
+    case DC_CALL_C_X64_WIN64_THIS:
+      vt = &gVT_x64_win64_this;
+      break;
+#endif
     case DC_CALL_SYS_DEFAULT:
 #if defined(DC_UNIX)
     case DC_CALL_SYS_X64_SYSCALL_SYSV:
--- a/dyncall/dyncall_callvm_x64.h	Sat Apr 16 15:00:58 2022 +0200
+++ b/dyncall/dyncall_callvm_x64.h	Thu Apr 21 13:35:47 2022 +0200
@@ -97,11 +97,15 @@
 
 typedef struct
 {
-  DCCallVM       mInterface;  /* This CallVM interface.                                  */
-  DCpointer      mpCallFunc;  /* Function to call.                                       */
-  DCRegCount_x64 mRegCount;   /* Number of int/sse registers used for parameter passing. */
-  DCRegData_x64  mRegData;    /* Parameters to be passed via registers.                  */
-  DCVecHead      mVecHead;    /* Parameters to be pushed onto stack.                     */
+  DCCallVM       mInterface;       /* this CallVM interface                                        */
+  DCpointer      mpCallFunc;       /* function to call                                             */
+  DCint          mAggrReturnReg;   /* reg index for aggregate ret value (if hidden argument)       */
+#if defined(DC_WINDOWS)
+  DCpointer      mpAggrVecCopies;  /* ptr to copies of aggrs passed via hidden ptr (end of vector) */
+#endif
+  DCRegCount_x64 mRegCount;        /* number of int/sse registers used for parameter passing       */
+  DCRegData_x64  mRegData;         /* parameters to be passed via registers                        */
+  DCVecHead      mVecHead;         /* parameters to be pushed onto stack                           */
 } DCCallVM_x64;
 
 #endif /* DYNCALL_CALLVM_X64_H */
--- a/dyncall/dyncall_callvm_x86.c	Sat Apr 16 15:00:58 2022 +0200
+++ b/dyncall/dyncall_callvm_x86.c	Thu Apr 21 13:35:47 2022 +0200
@@ -178,7 +178,7 @@
 , &dc_callvm_argFloat_x86
 , &dc_callvm_argDouble_x86
 , &dc_callvm_argPointer_x86
-, NULL /* argStruct */
+, NULL /* argAggr */
 , (DCvoidvmfunc*)       &dc_callvm_call_x86_plan9
 , (DCboolvmfunc*)       &dc_callvm_call_x86_plan9
 , (DCcharvmfunc*)       &dc_callvm_call_x86_plan9
@@ -189,7 +189,8 @@
 , (DCfloatvmfunc*)      &dc_callvm_call_x86_plan9
 , (DCdoublevmfunc*)     &dc_callvm_call_x86_plan9
 , (DCpointervmfunc*)    &dc_callvm_call_x86_plan9
-, NULL /* callStruct */
+, NULL /* callAggr */
+, NULL /* beginAggr */
 };
 
 
@@ -218,7 +219,7 @@
 , &dc_callvm_argFloat_x86
 , &dc_callvm_argDouble_x86
 , &dc_callvm_argPointer_x86
-, NULL /* argStruct */
+, NULL /* argAggr */
 , (DCvoidvmfunc*)       &dc_callvm_call_x86_cdecl
 , (DCboolvmfunc*)       &dc_callvm_call_x86_cdecl
 , (DCcharvmfunc*)       &dc_callvm_call_x86_cdecl
@@ -229,7 +230,8 @@
 , (DCfloatvmfunc*)      &dc_callvm_call_x86_cdecl
 , (DCdoublevmfunc*)     &dc_callvm_call_x86_cdecl
 , (DCpointervmfunc*)    &dc_callvm_call_x86_cdecl
-, NULL /* callStruct */
+, NULL /* callAggr */
+, NULL /* beginAggr */
 };
 
 
@@ -260,7 +262,7 @@
 , &dc_callvm_argFloat_x86
 , &dc_callvm_argDouble_x86
 , &dc_callvm_argPointer_x86
-, NULL /* argStruct */
+, NULL /* argAggr */
 , (DCvoidvmfunc*)       &dc_callvm_call_x86_win32_std
 , (DCboolvmfunc*)       &dc_callvm_call_x86_win32_std
 , (DCcharvmfunc*)       &dc_callvm_call_x86_win32_std
@@ -271,7 +273,8 @@
 , (DCfloatvmfunc*)      &dc_callvm_call_x86_win32_std
 , (DCdoublevmfunc*)     &dc_callvm_call_x86_win32_std
 , (DCpointervmfunc*)    &dc_callvm_call_x86_win32_std
-, NULL /* callStruct */
+, NULL /* callAggr */
+, NULL /* beginAggr */
 };
 
 
@@ -365,7 +368,7 @@
 , &dc_callvm_argFloat_x86
 , &dc_callvm_argDouble_x86
 , &dc_callvm_argPointer_x86_win32_fast_ms
-, NULL /* argStruct */
+, NULL /* argAggr */
 , (DCvoidvmfunc*)       &dc_callvm_call_x86_win32_fast
 , (DCboolvmfunc*)       &dc_callvm_call_x86_win32_fast
 , (DCcharvmfunc*)       &dc_callvm_call_x86_win32_fast
@@ -376,7 +379,8 @@
 , (DCfloatvmfunc*)      &dc_callvm_call_x86_win32_fast
 , (DCdoublevmfunc*)     &dc_callvm_call_x86_win32_fast
 , (DCpointervmfunc*)    &dc_callvm_call_x86_win32_fast
-, NULL /* callStruct */
+, NULL /* callAggr */
+, NULL /* beginAggr */
 };
 
 
@@ -459,7 +463,7 @@
 , &dc_callvm_argFloat_x86
 , &dc_callvm_argDouble_x86
 , &dc_callvm_argPointer_x86_win32_fast_gnu
-, NULL /* argStruct */
+, NULL /* argAggr */
 , (DCvoidvmfunc*)       &dc_callvm_call_x86_win32_fast
 , (DCboolvmfunc*)       &dc_callvm_call_x86_win32_fast
 , (DCcharvmfunc*)       &dc_callvm_call_x86_win32_fast
@@ -470,7 +474,8 @@
 , (DCfloatvmfunc*)      &dc_callvm_call_x86_win32_fast
 , (DCdoublevmfunc*)     &dc_callvm_call_x86_win32_fast
 , (DCpointervmfunc*)    &dc_callvm_call_x86_win32_fast
-, NULL /* callStruct */
+, NULL /* callAggr */
+, NULL /* beginAggr */
 };
 
 
@@ -500,7 +505,7 @@
 , &dc_callvm_argFloat_x86
 , &dc_callvm_argDouble_x86
 , &dc_callvm_argPointer_x86
-, NULL /* argStruct */
+, NULL /* argAggr */
 , (DCvoidvmfunc*)       &dc_callvm_call_x86_win32_this_ms
 , (DCboolvmfunc*)       &dc_callvm_call_x86_win32_this_ms
 , (DCcharvmfunc*)       &dc_callvm_call_x86_win32_this_ms
@@ -511,7 +516,8 @@
 , (DCfloatvmfunc*)      &dc_callvm_call_x86_win32_this_ms
 , (DCdoublevmfunc*)     &dc_callvm_call_x86_win32_this_ms
 , (DCpointervmfunc*)    &dc_callvm_call_x86_win32_this_ms
-, NULL /* callStruct */
+, NULL /* callAggr */
+, NULL /* beginAggr */
 };
 
 /* --- syscall ------------------------------------------------------------- */
@@ -544,7 +550,7 @@
 , &dc_callvm_argFloat_x86
 , &dc_callvm_argDouble_x86
 , &dc_callvm_argPointer_x86
-, NULL /* argStruct */
+, NULL /* argAggr */
 , (DCvoidvmfunc*)       &dc_callvm_call_x86_syscall_int80h_linux
 , (DCboolvmfunc*)       &dc_callvm_call_x86_syscall_int80h_linux
 , (DCcharvmfunc*)       &dc_callvm_call_x86_syscall_int80h_linux
@@ -555,7 +561,8 @@
 , (DCfloatvmfunc*)      &dc_callvm_call_x86_syscall_int80h_linux
 , (DCdoublevmfunc*)     &dc_callvm_call_x86_syscall_int80h_linux
 , (DCpointervmfunc*)    &dc_callvm_call_x86_syscall_int80h_linux
-, NULL /* callStruct */
+, NULL /* callAggr */
+, NULL /* beginAggr */
 };
 
 DCCallVM_vt gVT_x86_syscall_int80h_bsd =
@@ -572,7 +579,7 @@
 , &dc_callvm_argFloat_x86
 , &dc_callvm_argDouble_x86
 , &dc_callvm_argPointer_x86
-, NULL /* argStruct */
+, NULL /* argAggr */
 , (DCvoidvmfunc*)       &dc_callvm_call_x86_syscall_int80h_bsd
 , (DCboolvmfunc*)       &dc_callvm_call_x86_syscall_int80h_bsd
 , (DCcharvmfunc*)       &dc_callvm_call_x86_syscall_int80h_bsd
@@ -583,7 +590,8 @@
 , (DCfloatvmfunc*)      &dc_callvm_call_x86_syscall_int80h_bsd
 , (DCdoublevmfunc*)     &dc_callvm_call_x86_syscall_int80h_bsd
 , (DCpointervmfunc*)    &dc_callvm_call_x86_syscall_int80h_bsd
-, NULL /* callStruct */
+, NULL /* callAggr */
+, NULL /* beginAggr */
 };
 
 
--- a/dyncall/dyncall_signature.h	Sat Apr 16 15:00:58 2022 +0200
+++ b/dyncall/dyncall_signature.h	Thu Apr 21 13:35:47 2022 +0200
@@ -6,7 +6,7 @@
  Description: Type and calling-convention signature character defines
  License:
 
-   Copyright (c) 2007-2020 Daniel Adler <dadler@uni-goettingen.de>, 
+   Copyright (c) 2007-2022 Daniel Adler <dadler@uni-goettingen.de>, 
                            Tassilo Philipp <tphilipp@potion-studios.com>
 
    Permission to use, copy, modify, and distribute this software for any
@@ -24,17 +24,6 @@
 */
 
 
-
-/*
-
-  dyncall signature characters
-
-  REVISION
-  2007/12/11 initial
-  
-*/
-
-
 #ifndef DYNCALL_SIGNATURE_H
 #define DYNCALL_SIGNATURE_H
 
@@ -54,14 +43,14 @@
 #define DC_SIGCHAR_ULONGLONG    'L'
 #define DC_SIGCHAR_FLOAT        'f'
 #define DC_SIGCHAR_DOUBLE       'd'
-#define DC_SIGCHAR_POINTER      'p'
+#define DC_SIGCHAR_POINTER      'p' /* also used for arrays, as such args decay to ptrs */
 #define DC_SIGCHAR_STRING       'Z' /* in theory same as 'p', but convenient to disambiguate */
-#define DC_SIGCHAR_STRUCT       'T'
-#define DC_SIGCHAR_ENDARG       ')' /* also works for end struct */
+#define DC_SIGCHAR_AGGREGATE    'A' /* aggregate (struct/union described out-of-band via DCaggr) */
+#define DC_SIGCHAR_ENDARG       ')'
 
 /* calling convention / mode signatures */
 
-#define DC_SIGCHAR_CC_PREFIX           '_'
+#define DC_SIGCHAR_CC_PREFIX           '_' /* announces next char to be one of the below calling convention mode chars */
 #define DC_SIGCHAR_CC_DEFAULT          ':' /* default calling conv (platform native) */
 #define DC_SIGCHAR_CC_THISCALL         '*' /* C++ this calls (platform native) */
 #define DC_SIGCHAR_CC_ELLIPSIS         'e'
--- a/dyncall/dyncall_struct.c	Sat Apr 16 15:00:58 2022 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,262 +0,0 @@
-/*
-
- Package: dyncall
- Library: dyncall
- File: dyncall/dyncall_struct.c
- Description: C interface to compute struct size
- License:
-
-   Copyright (c) 2010-2015 Olivier Chafik <olivier.chafik@centraliens.net>
-
-   Permission to use, copy, modify, and distribute this software for any
-   purpose with or without fee is hereby granted, provided that the above
-   copyright notice and this permission notice appear in all copies.
-
-   THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
-   WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
-   MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
-   ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
-   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
-   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-
-*/
-
-
-
-#include "dyncall.h"
-#include "dyncall_signature.h"
-#include "dyncall_struct.h"
-#include "dyncall_alloc.h"
-#include <stdio.h>
-#include <assert.h>
-
-
-DCstruct* dcNewStruct(DCsize fieldCount, DCint alignment)
-{
-	DCstruct* s = (DCstruct*)dcAllocMem(sizeof(DCstruct));
-	s->pCurrentStruct = s;
-	s->pLastStruct = NULL;
-	s->nextField = 0;
-	s->fieldCount = fieldCount;
-	s->alignment = alignment;
-	s->size = 0;
-	s->pFields = (DCfield*)dcAllocMem(fieldCount * sizeof(DCfield));
-	return s;
-}
-
-
-void dcStructField(DCstruct* s, DCint type, DCint alignment, DCsize arrayLength)
-{
-	DCfield *f;
-	if (type == DC_SIGCHAR_STRING) {
-		assert(!"Use dcSubStruct instead !!!");
-		return;
-	}
-	assert(s && s->pCurrentStruct);
-	assert(s->pCurrentStruct->nextField < (DCint)s->pCurrentStruct->fieldCount);
-	f = s->pCurrentStruct->pFields + (s->pCurrentStruct->nextField++);
-	f->type = type;
-	f->alignment = alignment;
-	f->arrayLength = arrayLength;
-	f->pSubStruct = NULL;
-	switch (type) {
-    	case DC_SIGCHAR_BOOL:       f->size = sizeof(DCbool);     break;
-    	case DC_SIGCHAR_CHAR:       
-    	case DC_SIGCHAR_UCHAR:      f->size = sizeof(DCchar);     break;
-    	case DC_SIGCHAR_SHORT:      
-    	case DC_SIGCHAR_USHORT:     f->size = sizeof(DCshort);    break;
-    	case DC_SIGCHAR_INT:        
-    	case DC_SIGCHAR_UINT:       f->size = sizeof(DCint);      break;
-    	case DC_SIGCHAR_LONG:       
-    	case DC_SIGCHAR_ULONG:      f->size = sizeof(DClong);     break;
-    	case DC_SIGCHAR_LONGLONG:   
-    	case DC_SIGCHAR_ULONGLONG:  f->size = sizeof(DClonglong); break;
-    	case DC_SIGCHAR_FLOAT:      f->size = sizeof(DCfloat);    break;
-    	case DC_SIGCHAR_DOUBLE:     f->size = sizeof(DCdouble);   break;
-    	case DC_SIGCHAR_POINTER:
-		case DC_SIGCHAR_STRING:     f->size = sizeof(DCpointer);  break;
-		default:                    assert(0);
-	}
-}
-
-
-void dcSubStruct(DCstruct* s, DCsize fieldCount, DCint alignment, DCsize arrayLength)
-{
-	DCfield *f = s->pCurrentStruct->pFields + (s->pCurrentStruct->nextField++);
-	f->type = DC_SIGCHAR_STRUCT;
-	f->arrayLength = arrayLength;
-	f->alignment = alignment;
-	f->pSubStruct = dcNewStruct(fieldCount, alignment);
-	f->pSubStruct->pLastStruct = s->pCurrentStruct;
-	s->pCurrentStruct = f->pSubStruct;
-}  	
-
-
-static void dcAlign(DCsize *size, DCsize alignment)
-{
-	DCsize mod = (*size) % alignment;
-	if (mod) {
-		DCsize rest = alignment - mod;
-		(*size) += rest;
-	}
-}
-
-
-static void dcComputeStructSize(DCstruct* s)
-{
-	DCsize i;
-	assert(s);
-
-	/* compute field sizes and alignments, recurse if needed */
-	for (i = 0; i < s->fieldCount; i++) {
-		DCfield *f = s->pFields + i;
-		DCsize fieldAlignment;
-		if (f->type == DC_SIGCHAR_STRUCT) {
-			dcComputeStructSize(f->pSubStruct);
-			f->size = f->pSubStruct->size;
-			fieldAlignment = f->pSubStruct->alignment;
-		} else
-			fieldAlignment = f->size;
-
-		if (!f->alignment)
-			f->alignment = fieldAlignment;
-
-		/* if field alignment > struct alignment, choose former */
-		if (f->alignment > s->alignment)
-			s->alignment = f->alignment;
-
-		/* if array, it's x times the size */
-		f->size *= f->arrayLength;
-
-		/*printf("FIELD %d, size = %d, alignment = %d\n", (int)i, (int)f->size, (int)f->alignment);@@@*/
-	}
-
-	/* compute overall struct size */
-	for (i = 0; i < s->fieldCount; i++) {
-		DCfield *f = s->pFields + i;
-		dcAlign(&s->size, f->alignment);
-		s->size += f->size;
-	}
-	dcAlign(&s->size, s->alignment);
-	
-	/*printf("STRUCT size = %d, alignment = %d\n", (int)s->size, (int)s->alignment);@@@*/
-}
-
-
-void dcCloseStruct(DCstruct* s)
-{
-	assert(s);
-	assert(s->pCurrentStruct);
-	assert(s->pCurrentStruct->nextField == s->pCurrentStruct->fieldCount);
-	if (!s->pCurrentStruct->pLastStruct) {
-		dcComputeStructSize(s->pCurrentStruct);
-	}
-	s->pCurrentStruct = s->pCurrentStruct->pLastStruct;
-}
-
-
-void dcFreeStruct(DCstruct* s)
-{
-	DCsize i;
-	assert(s);
-	for (i = 0; i < s->fieldCount; i++) {
-		DCfield *f = s->pFields + i;
-		if (f->type == DC_SIGCHAR_STRUCT)
-			dcFreeStruct(f->pSubStruct);
-	}
-	free(s->pFields);
-	free(s);
-}
-
-
-DCsize dcStructSize(DCstruct* s)
-{
-	assert(!s->pCurrentStruct && "Struct was not closed");
-	return s->size;
-}
-
-DCsize dcStructAlignment(DCstruct* s)
-{
-	assert(!s->pCurrentStruct && "Struct was not closed");
-	return s->alignment;
-}
-
-
-void dcArgStructUnroll(DCCallVM* vm, DCstruct* s, DCpointer  value)
-{
-	DCsize i;
-	/*printf("UNROLLING STRUCT !\n");@@@*/
-	assert(s && value);
-	for (i = 0; i < s->fieldCount; i++) {
-		DCfield *f = s->pFields + i;
-		DCpointer p = (char*)value + f->offset;
-		switch(f->type) {
-		  case DC_SIGCHAR_STRUCT:
-		  	dcArgStruct(vm, f->pSubStruct, p);
-		  	break;
-		  case DC_SIGCHAR_BOOL: 
-			dcArgBool      (vm, *(DCbool*)p); 
-			break;
-		  case DC_SIGCHAR_CHAR:
-		  case DC_SIGCHAR_UCHAR:
-			dcArgChar      (vm, *(DCchar*)p);
-			break;
-		  case DC_SIGCHAR_SHORT:
-		  case DC_SIGCHAR_USHORT:
-			dcArgShort     (vm, *(DCshort*)p);
-			break;
-		  case DC_SIGCHAR_INT:
-		  case DC_SIGCHAR_UINT:
-			dcArgInt       (vm, *(DCint*)p);
-			break;
-		  case DC_SIGCHAR_LONG:
-		  case DC_SIGCHAR_ULONG:
-			dcArgLong      (vm, *(DCulong*)p);
-			break;
-		  case DC_SIGCHAR_LONGLONG:
-		  case DC_SIGCHAR_ULONGLONG:
-			dcArgLongLong  (vm, *(DCulonglong*)p);
-			break;
-		  case DC_SIGCHAR_FLOAT:
-			dcArgFloat     (vm, *(DCfloat*)p);
-			break;
-		  case DC_SIGCHAR_DOUBLE:
-			dcArgDouble    (vm, *(DCdouble*)p);
-			break;
-		  case DC_SIGCHAR_POINTER:
-		  case DC_SIGCHAR_STRING:
-			dcArgPointer   (vm, *(DCpointer**)p);
-			break;
-	       default:
-	       	assert(0);
-		}	
-	}
-}
-
-
-static DCint readInt(const char** ptr)
-{
-	return strtol(*ptr, (char**)ptr, 10);/*@@@ enough*/
-}
-
-
-DCstruct* dcDefineStruct(const char* signature)
-{
-	DCstruct* s;
-	const char* ptr = signature;
-	DCint fieldCount = readInt(&ptr);
-	s = dcNewStruct(fieldCount, DEFAULT_ALIGNMENT);
-	
-	while (*ptr) {
-		char type = *(ptr++);
-		if (type == DC_SIGCHAR_STRUCT) {
-			/*dcSubStruct(	@@@*/
-		} else {
-			dcStructField(s, type, DEFAULT_ALIGNMENT, readInt(&ptr));
-		}
-	}
-	dcCloseStruct(s);
-	return s;
-}
-
--- a/dyncall/dyncall_struct.h	Sat Apr 16 15:00:58 2022 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,70 +0,0 @@
-/*
-
- Package: dyncall
- Library: dyncall
- File: dyncall/dyncall_struct.h
- Description: C interface to compute struct size
- License:
-
-   Copyright (c) 2010-2015 Olivier Chafik <olivier.chafik@centraliens.net>
-
-   Permission to use, copy, modify, and distribute this software for any
-   purpose with or without fee is hereby granted, provided that the above
-   copyright notice and this permission notice appear in all copies.
-
-   THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
-   WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
-   MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
-   ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
-   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
-   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-
-*/
-
-
-
-/*
-
-  dyncall struct metadata structures
-
-  REVISION
-  2007/12/11 initial
-  
-*/
-
-#ifndef DYNCALL_STRUCT_H
-#define DYNCALL_STRUCT_H
-
-#include "dyncall.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif 
-
-typedef struct DCfield_ {
-	DCsize offset, size, alignment, arrayLength;
-	DCint type;
-	DCstruct* pSubStruct;
-} DCfield;
-
-struct DCstruct_ {
-	DCfield *pFields;
-	DCsize size, alignment, fieldCount;
-	
-	/* struct building uses a state machine. */
-	DCint nextField;                        /* == -1 if struct is closed */
-	DCstruct *pCurrentStruct, *pLastStruct; /* == this, unless we're in a sub struct */
-};
-
-
-/* Helper. */
-void dcArgStructUnroll(DCCallVM* vm, DCstruct* s, DCpointer  value);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* DYNCALL_H */
-
--- a/dyncall/mkfile	Sat Apr 16 15:00:58 2022 +0200
+++ b/dyncall/mkfile	Thu Apr 21 13:35:47 2022 +0200
@@ -23,7 +23,7 @@
 
 
 LIBRARY		= dyncall
-UNITS = $UNITS dyncall_vector dyncall_api dyncall_callf dyncall_call dyncall_callvm dyncall_callvm_base dyncall_struct
+UNITS = $UNITS dyncall_vector dyncall_api dyncall_callf dyncall_call dyncall_callvm dyncall_callvm_base dyncall_aggregate
 
 
 <$TOP/buildsys/mk/epilog.mk
--- a/dyncallback/dyncall_args.h	Sat Apr 16 15:00:58 2022 +0200
+++ b/dyncallback/dyncall_args.h	Thu Apr 21 13:35:47 2022 +0200
@@ -6,7 +6,7 @@
  Description: Callback's Arguments VM - Interface
  License:
 
-   Copyright (c) 2007-2018 Daniel Adler <dadler@uni-goettingen.de>,
+   Copyright (c) 2007-2022 Daniel Adler <dadler@uni-goettingen.de>,
                            Tassilo Philipp <tphilipp@potion-studios.com>
 
    Permission to use, copy, modify, and distribute this software for any
@@ -37,6 +37,8 @@
 
 #include "dyncall.h"
 
+#include "dyncall_value.h"
+
 
 #ifdef __cplusplus
 extern "C" {
@@ -44,23 +46,30 @@
 
 typedef struct DCArgs DCArgs;
 
-DC_API DCbool      dcbArgBool     (DCArgs*);
-DC_API DCchar      dcbArgChar     (DCArgs*);
-DC_API DCshort     dcbArgShort    (DCArgs*);
-DC_API DCint       dcbArgInt      (DCArgs*);
-DC_API DClong      dcbArgLong     (DCArgs*);
-DC_API DClonglong  dcbArgLongLong (DCArgs*);
-DC_API DCuchar     dcbArgUChar    (DCArgs*);
-DC_API DCushort    dcbArgUShort   (DCArgs*);
-DC_API DCuint      dcbArgUInt     (DCArgs*);
-DC_API DCulong     dcbArgULong    (DCArgs*);
-DC_API DCulonglong dcbArgULongLong(DCArgs*);
-DC_API DCfloat     dcbArgFloat    (DCArgs*);
-DC_API DCdouble    dcbArgDouble   (DCArgs*);
-DC_API DCpointer   dcbArgPointer  (DCArgs*);
+/* functions to retrieve callback's params in callback handler */
+DC_API DCbool      dcbArgBool     (DCArgs* p);
+DC_API DCchar      dcbArgChar     (DCArgs* p);
+DC_API DCshort     dcbArgShort    (DCArgs* p);
+DC_API DCint       dcbArgInt      (DCArgs* p);
+DC_API DClong      dcbArgLong     (DCArgs* p);
+DC_API DClonglong  dcbArgLongLong (DCArgs* p);
+DC_API DCuchar     dcbArgUChar    (DCArgs* p);
+DC_API DCushort    dcbArgUShort   (DCArgs* p);
+DC_API DCuint      dcbArgUInt     (DCArgs* p);
+DC_API DCulong     dcbArgULong    (DCArgs* p);
+DC_API DCulonglong dcbArgULongLong(DCArgs* p);
+DC_API DCfloat     dcbArgFloat    (DCArgs* p);
+DC_API DCdouble    dcbArgDouble   (DCArgs* p);
+DC_API DCpointer   dcbArgPointer  (DCArgs* p);
+DC_API void        dcbArgAggr     (DCArgs* p, DCpointer target);
+
+/* function helper to put a to be returned struct-by-value into the 'result'
+   param of the callback handler, in order to return it */
+DC_API void dcbReturnAggr(DCArgs *args, DCValue *result, DCpointer ret);
 
 #ifdef __cplusplus
 }
 #endif
 
 #endif /* DYNCALL_ARGS_H */
+
--- a/dyncallback/dyncall_args_arm32.c	Sat Apr 16 15:00:58 2022 +0200
+++ b/dyncallback/dyncall_args_arm32.c	Thu Apr 21 13:35:47 2022 +0200
@@ -6,7 +6,7 @@
  Description: Callback's Arguments VM - Implementation for ARM32 (ARM and THUMB mode)
  License:
 
-   Copyright (c) 2007-2018 Daniel Adler <dadler@uni-goettingen.de>,
+   Copyright (c) 2007-2022 Daniel Adler <dadler@uni-goettingen.de>,
                            Tassilo Philipp <tphilipp@potion-studios.com>
 
    Permission to use, copy, modify, and distribute this software for any
@@ -115,13 +115,6 @@
 }
 
 
-
-// ----------------------------------------------------------------------------
-// C API implementation:
-
-
-// base operations:
-
 DClonglong  dcbArgLongLong (DCArgs* p) { return arm_longlong(p); }
 DClong      dcbArgLong     (DCArgs* p) { return *(DClong*)arm_word(p); }
 DCint       dcbArgInt      (DCArgs* p) { return (DCint)   dcbArgLong(p); }
@@ -141,3 +134,6 @@
 DCdouble    dcbArgDouble   (DCArgs* p) { return arm_double(p); }
 DCfloat     dcbArgFloat    (DCArgs* p) { return arm_float(p); }
 
+void        dcbArgAggr     (DCArgs* p, DCpointer target)                   { /* @@@AGGR not impl */ }
+void        dcbReturnAggr  (DCArgs *args, DCValue *result, DCpointer ret)  { /* @@@AGGR not impl */ }
+
--- a/dyncallback/dyncall_args_arm64.c	Sat Apr 16 15:00:58 2022 +0200
+++ b/dyncallback/dyncall_args_arm64.c	Thu Apr 21 13:35:47 2022 +0200
@@ -75,3 +75,6 @@
 DCulong     dcbArgULong    (DCArgs* p) { return (DCulong)     dcbArgLong(p);     }
 DCulonglong dcbArgULongLong(DCArgs* p) { return (DCulonglong) dcbArgLongLong(p); }
 
+void        dcbArgAggr     (DCArgs* p, DCpointer target)                   { /* @@@AGGR not impl */ }
+void        dcbReturnAggr  (DCArgs *args, DCValue *result, DCpointer ret)  { /* @@@AGGR not impl */ }
+
--- a/dyncallback/dyncall_args_arm64_apple.c	Sat Apr 16 15:00:58 2022 +0200
+++ b/dyncallback/dyncall_args_arm64_apple.c	Thu Apr 21 13:35:47 2022 +0200
@@ -158,3 +158,6 @@
 DCulong     dcbArgULong    (DCArgs* p) { return (DCulong)     dcbArgLong(p);     }
 DCulonglong dcbArgULongLong(DCArgs* p) { return (DCulonglong) dcbArgLongLong(p); }
 
+void        dcbArgAggr     (DCArgs* p, DCpointer target)                   { /* @@@AGGR not impl */ }
+void        dcbReturnAggr  (DCArgs *args, DCValue *result, DCpointer ret)  { /* @@@AGGR not impl */ }
+
--- a/dyncallback/dyncall_args_mips.c	Sat Apr 16 15:00:58 2022 +0200
+++ b/dyncallback/dyncall_args_mips.c	Thu Apr 21 13:35:47 2022 +0200
@@ -6,7 +6,7 @@
  Description: Callback's Arguments VM - Implementation for non-o32 MIPS
  License:
 
-   Copyright (c) 2013-2018 Daniel Adler <dadler@uni-goettingen.de>,
+   Copyright (c) 2013-2022 Daniel Adler <dadler@uni-goettingen.de>,
                            Tassilo Philipp <tphilipp@potion-studios.com>
 
    Permission to use, copy, modify, and distribute this software for any
@@ -85,3 +85,6 @@
   return d.result;
 }
 
+void        dcbArgAggr     (DCArgs* p, DCpointer target)                   { /* @@@AGGR not impl */ }
+void        dcbReturnAggr  (DCArgs *args, DCValue *result, DCpointer ret)  { /* @@@AGGR not impl */ }
+
--- a/dyncallback/dyncall_args_mips64.c	Sat Apr 16 15:00:58 2022 +0200
+++ b/dyncallback/dyncall_args_mips64.c	Thu Apr 21 13:35:47 2022 +0200
@@ -6,7 +6,7 @@
  Description: Callback's Arguments VM - Implementation for MIPS64 n32&n64
  License:
 
-   Copyright (c) 2016-2018 Tassilo Philipp <tphilipp@potion-studios.com>
+   Copyright (c) 2016-2022 Tassilo Philipp <tphilipp@potion-studios.com>
 
    Permission to use, copy, modify, and distribute this software for any
    purpose with or without fee is hereby granted, provided that the above
@@ -79,3 +79,7 @@
   return result;
 }
 
+
+void        dcbArgAggr     (DCArgs* p, DCpointer target)                   { /* @@@AGGR not impl */ }
+void        dcbReturnAggr  (DCArgs *args, DCValue *result, DCpointer ret)  { /* @@@AGGR not impl */ }
+
--- a/dyncallback/dyncall_args_mips_o32.c	Sat Apr 16 15:00:58 2022 +0200
+++ b/dyncallback/dyncall_args_mips_o32.c	Thu Apr 21 13:35:47 2022 +0200
@@ -6,7 +6,7 @@
  Description: Callback's Arguments VM - Implementation for MIPS o32
  License:
 
-   Copyright (c) 2013-2018 Daniel Adler <dadler@uni-goettingen.de>,
+   Copyright (c) 2013-2022 Daniel Adler <dadler@uni-goettingen.de>,
                            Tassilo Philipp <tphilipp@potion-studios.com>
 
    Permission to use, copy, modify, and distribute this software for any
@@ -115,3 +115,6 @@
   return d.result;
 }
 
+void        dcbArgAggr     (DCArgs* p, DCpointer target)                   { /* @@@AGGR not impl */ }
+void        dcbReturnAggr  (DCArgs *args, DCValue *result, DCpointer ret)  { /* @@@AGGR not impl */ }
+
--- a/dyncallback/dyncall_args_ppc32.c	Sat Apr 16 15:00:58 2022 +0200
+++ b/dyncallback/dyncall_args_ppc32.c	Thu Apr 21 13:35:47 2022 +0200
@@ -6,7 +6,7 @@
  Description: Callback's Arguments VM - Implementation for ppc32
  License:
 
-   Copyright (c) 2007-2018 Daniel Adler <dadler@uni-goettingen.de>,
+   Copyright (c) 2007-2022 Daniel Adler <dadler@uni-goettingen.de>,
                            Tassilo Philipp <tphilipp@potion-studios.com>
 
    Permission to use, copy, modify, and distribute this software for any
@@ -87,3 +87,6 @@
   return result; 
 }
 
+void        dcbArgAggr     (DCArgs* p, DCpointer target)                   { /* @@@AGGR not impl */ }
+void        dcbReturnAggr  (DCArgs *args, DCValue *result, DCpointer ret)  { /* @@@AGGR not impl */ }
+
--- a/dyncallback/dyncall_args_ppc32_sysv.c	Sat Apr 16 15:00:58 2022 +0200
+++ b/dyncallback/dyncall_args_ppc32_sysv.c	Thu Apr 21 13:35:47 2022 +0200
@@ -6,7 +6,7 @@
  Description: Callback's Args Implementation for PowerPC 32-bit System V ABI
  License:
 
-   Copyright (c) 2015-2018 Daniel Adler <dadler@uni-goettingen.de>
+   Copyright (c) 2015-2022 Daniel Adler <dadler@uni-goettingen.de>
 
    Permission to use, copy, modify, and distribute this software for any
    purpose with or without fee is hereby granted, provided that the above
@@ -93,3 +93,6 @@
   return result; 
 }
 
+void        dcbArgAggr     (DCArgs* p, DCpointer target)                   { /* @@@AGGR not impl */ }
+void        dcbReturnAggr  (DCArgs *args, DCValue *result, DCpointer ret)  { /* @@@AGGR not impl */ }
+
--- a/dyncallback/dyncall_args_ppc64.c	Sat Apr 16 15:00:58 2022 +0200
+++ b/dyncallback/dyncall_args_ppc64.c	Thu Apr 21 13:35:47 2022 +0200
@@ -6,7 +6,8 @@
  Description: Callback's Arguments VM - Implementation for ppc64
  License:
 
-   Copyright (c) 2014-2015 Masanori Mitsugi <mitsugi@linux.vnet.ibm.com>
+   Copyright (c) 2014-2015 Masanori Mitsugi <mitsugi@linux.vnet.ibm.com>,
+                      2022 Tassilo Philipp <tphilipp@potion-studios.com>
 
    Permission to use, copy, modify, and distribute this software for any
    purpose with or without fee is hereby granted, provided that the above
@@ -90,3 +91,7 @@
   p->stackptr += sizeof(double);
   return result;
 }
+
+void        dcbArgAggr     (DCArgs* p, DCpointer target)                   { /* @@@AGGR not impl */ }
+void        dcbReturnAggr  (DCArgs *args, DCValue *result, DCpointer ret)  { /* @@@AGGR not impl */ }
+
--- a/dyncallback/dyncall_args_sparc32.c	Sat Apr 16 15:00:58 2022 +0200
+++ b/dyncallback/dyncall_args_sparc32.c	Thu Apr 21 13:35:47 2022 +0200
@@ -6,7 +6,7 @@
  Description: Callback's Arguments VM - Implementation for sparc32 - not yet
  License:
 
-   Copyright (c) 2007-2018 Daniel Adler <dadler@uni-goettingen.de>,
+   Copyright (c) 2007-2022 Daniel Adler <dadler@uni-goettingen.de>,
                            Tassilo Philipp <tphilipp@potion-studios.com>
 
    Permission to use, copy, modify, and distribute this software for any
@@ -66,3 +66,6 @@
 
 DCfloat     dcbArgFloat    (DCArgs* p) { return *(DCfloat*) sparc_word(p); }
 
+void        dcbArgAggr     (DCArgs* p, DCpointer target)                   { /* @@@AGGR not impl */ }
+void        dcbReturnAggr  (DCArgs *args, DCValue *result, DCpointer ret)  { /* @@@AGGR not impl */ }
+
--- a/dyncallback/dyncall_args_sparc64.c	Sat Apr 16 15:00:58 2022 +0200
+++ b/dyncallback/dyncall_args_sparc64.c	Thu Apr 21 13:35:47 2022 +0200
@@ -6,7 +6,7 @@
  Description: Callback's Arguments VM - Implementation for sparc64 - not yet
  License:
 
-   Copyright (c) 2007-2018 Daniel Adler <dadler@uni-goettingen.de>,
+   Copyright (c) 2007-2022 Daniel Adler <dadler@uni-goettingen.de>,
                            Tassilo Philipp <tphilipp@potion-studios.com>
 
    Permission to use, copy, modify, and distribute this software for any
@@ -53,3 +53,6 @@
 		: *((DCfloat*)(p->arg_ptr   + p->i++)+1); /* right aligned in 64bit slot   */
 }
 
+void        dcbArgAggr     (DCArgs* p, DCpointer target)                   { /* @@@AGGR not impl */ }
+void        dcbReturnAggr  (DCArgs *args, DCValue *result, DCpointer ret)  { /* @@@AGGR not impl */ }
+
--- a/dyncallback/dyncall_args_x64.c	Sat Apr 16 15:00:58 2022 +0200
+++ b/dyncallback/dyncall_args_x64.c	Thu Apr 21 13:35:47 2022 +0200
@@ -6,7 +6,7 @@
  Description: Callback's Arguments VM - Implementation for x64
  License:
 
-   Copyright (c) 2007-2018 Daniel Adler <dadler@uni-goettingen.de>,
+   Copyright (c) 2007-2022 Daniel Adler <dadler@uni-goettingen.de>,
                            Tassilo Philipp <tphilipp@potion-studios.com>
 
    Permission to use, copy, modify, and distribute this software for any
@@ -27,9 +27,14 @@
 
 #include "dyncall_args_x64.h"
 
+#include <assert.h>
+#include <string.h>
+
 
 static int64* arg_i64(DCArgs* args)
 {
+  args->reg_count.i += (args->reg_count.i == args->aggr_return_register);
+
   if (args->reg_count.i < numIntRegs)
     return &args->reg_data.i[args->reg_count.i++];
   else
@@ -39,27 +44,23 @@
 
 static double* arg_f64(DCArgs* args)
 {
+#if defined(DC_WINDOWS)
+  args->reg_count.f += (args->reg_count.f == args->aggr_return_register);
+#endif
   if (args->reg_count.f < numFloatRegs)
     return &args->reg_data.f[args->reg_count.f++];
   else
-  {
     return (double*)args->stack_ptr++;
-  }
 }
 
 
-// ----------------------------------------------------------------------------
-// C API implementation:
-
-
-// base operations:
 
 DClonglong  dcbArgLongLong (DCArgs* p) { return *arg_i64(p); }
 DCint       dcbArgInt      (DCArgs* p) { return (int)   dcbArgLongLong(p); }
 DClong      dcbArgLong     (DCArgs* p) { return (long)  dcbArgLongLong(p); }
 DCchar      dcbArgChar     (DCArgs* p) { return (char)  dcbArgLongLong(p); }
 DCshort     dcbArgShort    (DCArgs* p) { return (short) dcbArgLongLong(p); }
-DCbool      dcbArgBool     (DCArgs* p) { return (dcbArgInt(p) == 0) ? 0 : 1; }
+DCbool      dcbArgBool     (DCArgs* p) { return dcbArgInt(p) != 0; }
 
 DCuint      dcbArgUInt     (DCArgs* p) { return (DCuint)      dcbArgInt(p);      }
 DCuchar     dcbArgUChar    (DCArgs* p) { return (DCuchar)     dcbArgChar(p);     }
@@ -72,3 +73,99 @@
 
 DCdouble    dcbArgDouble   (DCArgs* p) { return *arg_f64(p); }
 DCfloat     dcbArgFloat    (DCArgs* p) { return *(float*)arg_f64(p); }
+
+void        dcbArgAggr     (DCArgs* p, DCpointer target)
+{
+  int i;
+  DCaggr *ag = *(p->aggrs++);
+
+  if(!ag) {
+    /* non-trivial aggr: pass as ptr, user was supposed to make copy */
+    dcbArgPointer(target);
+    return;
+  }
+
+#if defined(DC_UNIX)
+  DCRegCount_x64 n_regs = { p->reg_count.i, p->reg_count.f };
+
+  if(ag->sysv_classes[0] != SYSVC_MEMORY) {
+    /* reclassify aggr w/ respect to remaining regs, might have been passed entirely via the stack */
+    for(i=0; ag->sysv_classes[i] && i<DC_SYSV_MAX_NUM_CLASSES; ++i) {
+      DCuchar clz = ag->sysv_classes[i];
+      n_regs.i += (clz == SYSVC_INTEGER);
+      n_regs.f += (clz == SYSVC_SSE);
+      /* @@@AGGR implement when implementing x87 types */
+    }
+  }
+
+  if(ag->sysv_classes[0] == SYSVC_MEMORY || (n_regs.i > numIntRegs) || (n_regs.f > numFloatRegs))
+  {
+     memcpy(target, p->stack_ptr, ag->size);
+     p->stack_ptr = p->stack_ptr + ((ag->size + (sizeof(DClonglong)-1)) >> 3); // advance to next full stack slot
+     return;
+  }
+
+
+  for(i=0; ag->sysv_classes[i] && i<DC_SYSV_MAX_NUM_CLASSES; ++i)
+  {
+    switch (ag->sysv_classes[i])
+    {
+      case SYSVC_INTEGER: ((DClonglong*)target)[i] = dcbArgLongLong(p); break;
+      case SYSVC_SSE:     ((DCdouble  *)target)[i] = dcbArgDouble  (p); break;
+      /* @@@AGGR implement when implementing x87 types */
+      default:
+          assert(DC_FALSE && "Should never be reached because we check for unupported classes earlier");
+    }
+  }
+
+#else
+
+  switch (ag->size) {
+    case 1: *(DCchar    *)target = dcbArgChar    (p); break;
+    case 2: *(DCshort   *)target = dcbArgShort   (p); break;
+    case 4: *(DClong    *)target = dcbArgLong    (p); break;
+    case 8: *(DClonglong*)target = dcbArgLongLong(p); break;
+    default: memcpy(target, dcbArgPointer(p), ag->size); break;
+  }
+#endif
+}
+
+
+/* A 16 byte struct would be sufficient for System V (because at most two of the four registers can be full). */
+/* But then it's much more complicated to copy the result to the correct registers in assembly. */
+typedef struct {
+  DClonglong r[2]; /* rax, rdx   */
+  DCdouble   x[2]; /* xmm0, xmm1 */
+} DCRetRegs_SysV;
+
+void dcbReturnAggr(DCArgs *args, DCValue *result, DCpointer ret)
+{
+  int i;
+  DCaggr *ag = *(args->aggrs++);
+
+  if (args->aggr_return_register >= 0) {
+    DCpointer dest = (DCpointer) args->reg_data.i[args->aggr_return_register];
+    memcpy(dest, ret, ag->size);
+    result->p = dest;
+  } else {
+#if defined(DC_UNIX)
+    /* a max of 2 regs are used in this case, out of rax, rdx, xmm0 and xmm1 */
+    /* space for 4 qwords is pointed to by (DCRetRegs_SysV*)result */
+    DClonglong *intRegs = ((DCRetRegs_SysV*)result)->r;
+    DCdouble   *sseRegs = ((DCRetRegs_SysV*)result)->x;
+    for(i=0; ag->sysv_classes[i] && i<2/*guaranteed*/; ++i) {
+      switch (ag->sysv_classes[i]) {
+        case SYSVC_INTEGER: *(intRegs++) = ((DClonglong*)ret)[i]; break;
+        case SYSVC_SSE:     *(sseRegs++) = ((DCdouble  *)ret)[i]; break;
+        /* @@@AGGR implement when implementing x87 types, might lead to more than 2 regs (e.g. _m512) */
+        default: assert(DC_FALSE && "Should never be reached because we check for unupported classes earlier");
+      }
+    }
+#else
+    /* copy aggregate (guaranteed to be <= 8b by call conv, as no hidden ptr) into result */
+    assert(ag->size <= 8 && "aggregate info mismatch for return type");
+    memcpy(result, ret, ag->size);
+#endif
+  }
+}
+
--- a/dyncallback/dyncall_args_x64.h	Sat Apr 16 15:00:58 2022 +0200
+++ b/dyncallback/dyncall_args_x64.h	Thu Apr 21 13:35:47 2022 +0200
@@ -6,7 +6,7 @@
  Description: Callback's Arguments VM - Header for x64
  License:
 
-   Copyright (c) 2007-2018 Daniel Adler <dadler@uni-goettingen.de>,
+   Copyright (c) 2007-2022 Daniel Adler <dadler@uni-goettingen.de>,
                            Tassilo Philipp <tphilipp@potion-studios.com>
 
    Permission to use, copy, modify, and distribute this software for any
@@ -28,17 +28,25 @@
 #define DYNCALLBACK_ARGS_X64_H
 
 #include "dyncall_args.h"
-#include "dyncall_callvm_x64.h"  /* reuse structures */
+#include "dyncall_callvm_x64.h" /* reuse DCRegCount_x64 and DCRegData_x64_s */
+#include "dyncall_aggregate.h"
+
 
 
 struct DCArgs
 {
-	/* state */
-	int64*          stack_ptr;
-	DCRegCount_x64  reg_count;	/* @@@ win64 version should maybe force alignment to 8 in order to be secure */
+  /* state */
+  int64*          stack_ptr;              /* offset 0 */
+  DCRegCount_x64  reg_count;              /* offset 8, size:win 4, size:*nix 8 */
+#if defined(DC_WINDOWS)
+  int             pad_w;                  /* alignment helper for win/x64 */
+#endif
+  int             aggr_return_register;   /* offset 16 */
+  int             pad;                    /* offset 20 */
+  DCaggr**        aggrs;                  /* offset 24 */
 
-	/* reg data */
-	DCRegData_x64_s reg_data;
+  /* reg data */
+  DCRegData_x64_s reg_data;               /* offset 32 */
 };
 
 #endif /* DYNCALLBACK_ARGS_X64_H */
--- a/dyncallback/dyncall_args_x86.c	Sat Apr 16 15:00:58 2022 +0200
+++ b/dyncallback/dyncall_args_x86.c	Thu Apr 21 13:35:47 2022 +0200
@@ -6,7 +6,7 @@
  Description: Callback's Arguments VM - Implementation for x86
  License:
 
-   Copyright (c) 2007-2018 Daniel Adler <dadler@uni-goettingen.de>,
+   Copyright (c) 2007-2022 Daniel Adler <dadler@uni-goettingen.de>,
                            Tassilo Philipp <tphilipp@potion-studios.com>
 
    Permission to use, copy, modify, and distribute this software for any
@@ -26,10 +26,7 @@
 
 #include "dyncall_args_x86.h"
 
-/* ---------------------------------------------------------------------------- */
-/* C API implementation: */
 
-/* base operations */
 
 DCint      dcbArgInt     (DCArgs* p) { return p->vt->i32(p); }
 DClonglong dcbArgLongLong(DCArgs* p) { return p->vt->i64(p); }
@@ -52,8 +49,10 @@
 DCulong     dcbArgULong    (DCArgs* p) { return (DCulong)     dcbArgLong(p); }
 DCulonglong dcbArgULongLong(DCArgs* p) { return (DCulonglong) dcbArgLongLong(p); }
 
-/* ---------------------------------------------------------------------------- */
-/* virtual tables: */
+void        dcbArgAggr     (DCArgs* p, DCpointer target)                   { /* @@@AGGR not impl */ }
+void        dcbReturnAggr  (DCArgs *args, DCValue *result, DCpointer ret)  { /* @@@AGGR not impl */ }
+
+
 
 /* cdecl calling convention */
 
@@ -120,3 +119,4 @@
 }
 
 DCArgsVT dcArgsVT_fast_gnu = { fast_i32, fast_gnu_i64, default_f32, default_f64 };
+
--- a/dyncallback/dyncall_callback.c	Sat Apr 16 15:00:58 2022 +0200
+++ b/dyncallback/dyncall_callback.c	Thu Apr 21 13:35:47 2022 +0200
@@ -6,7 +6,7 @@
  Description: Callback - Implementation back-end selector
  License:
 
-   Copyright (c) 2007-2018 Daniel Adler <dadler@uni-goettingen.de>,
+   Copyright (c) 2007-2022 Daniel Adler <dadler@uni-goettingen.de>,
                            Tassilo Philipp <tphilipp@potion-studios.com>
 
    Permission to use, copy, modify, and distribute this software for any
@@ -25,6 +25,8 @@
 
 
 #include "../dyncall/dyncall_macros.h"
+#include "../dyncall/dyncall_aggregate.h"
+
 
 #if defined(DC__Arch_Intel_x86)
 #include "dyncall_callback_x86.c"
@@ -44,5 +46,31 @@
 #include "dyncall_callback_sparc64.c"
 #elif defined(DC__Arch_ARM64)
 #include "dyncall_callback_arm64.c"
+#else
+#error unsupported platform
 #endif
 
+
+void dcbInitCallback(DCCallback* pcb, const DCsigchar* signature, DCCallbackHandler* handler, void* userdata)
+{
+  dcbInitCallback2(pcb, signature, handler, userdata, NULL);
+}
+
+
+DCCallback* dcbNewCallback(const DCsigchar* signature, DCCallbackHandler* handler, void* userdata)
+{
+  return dcbNewCallback2(signature, handler, userdata, NULL);
+}
+
+
+void dcbFreeCallback(DCCallback* pcb)
+{
+  dcFreeWX(pcb, sizeof(DCCallback));
+}
+
+
+void* dcbGetUserData(DCCallback* pcb)
+{
+  return pcb->userdata;
+}
+
--- a/dyncallback/dyncall_callback.h	Sat Apr 16 15:00:58 2022 +0200
+++ b/dyncallback/dyncall_callback.h	Thu Apr 21 13:35:47 2022 +0200
@@ -6,7 +6,7 @@
  Description: Callback - Interface
  License:
 
-   Copyright (c) 2007-2018 Daniel Adler <dadler@uni-goettingen.de>,
+   Copyright (c) 2007-2022 Daniel Adler <dadler@uni-goettingen.de>,
                            Tassilo Philipp <tphilipp@potion-studios.com>
 
    Permission to use, copy, modify, and distribute this software for any
@@ -32,20 +32,27 @@
 
 typedef struct DCCallback DCCallback;
 
-/* return value is the type encoded as a type-specifying signature char (see dyncall_signature.h) */
-typedef char (DCCallbackHandler)(DCCallback* pcb, DCArgs* args, DCValue* result, void* userdata);
+/* callback handler:
+   - handler's return value signature char (see dyncall_signature.h) of callback's return value type
+   - callback return value is written to the corresponding type's field of result
+   - if callback return value is an aggregate (by value), use dcbReturnAggr() as a helper to write to result
+*/
+typedef DCsigchar (DCCallbackHandler)(DCCallback* pcb, DCArgs* args, DCValue* result, void* userdata);
 
 #ifdef __cplusplus
 extern "C" {
 #endif 
 
-DCCallback* dcbNewCallback(const char* signature, DCCallbackHandler* funcptr, void* userdata);
-void        dcbInitCallback(DCCallback* pcb, const char* signature, DCCallbackHandler* handler, void* userdata);
-void        dcbFreeCallback(DCCallback* pcb);
-void*       dcbGetUserData (DCCallback* pcb);
+DCCallback* dcbNewCallback  (const DCsigchar* signature, DCCallbackHandler* funcptr, void* userdata);
+DCCallback* dcbNewCallback2 (const DCsigchar* signature, DCCallbackHandler* funcptr, void* userdata, DCaggr *const * aggrs);
+void        dcbInitCallback (DCCallback* pcb, const DCsigchar* signature, DCCallbackHandler* handler, void* userdata);
+void        dcbInitCallback2(DCCallback* pcb, const DCsigchar* signature, DCCallbackHandler* handler, void* userdata, DCaggr *const * aggrs);
+void        dcbFreeCallback (DCCallback* pcb);
+void*       dcbGetUserData  (DCCallback* pcb);
 
 #ifdef __cplusplus
 }
 #endif 
 
 #endif /* DYNCALL_CALLBACK_H */
+
--- a/dyncallback/dyncall_callback_arm32.c	Sat Apr 16 15:00:58 2022 +0200
+++ b/dyncallback/dyncall_callback_arm32.c	Thu Apr 21 13:35:47 2022 +0200
@@ -6,7 +6,7 @@
  Description: Callback - Implementation for ARM32 (ARM and THUMB mode)
  License:
 
-   Copyright (c) 2007-2018 Daniel Adler <dadler@uni-goettingen.de>,
+   Copyright (c) 2007-2022 Daniel Adler <dadler@uni-goettingen.de>,
                            Tassilo Philipp <tphilipp@potion-studios.com>
 
    Permission to use, copy, modify, and distribute this software for any
@@ -28,6 +28,7 @@
 #include "dyncall_alloc_wx.h"
 #include "dyncall_thunk.h"
 
+
 /* Callback symbol. */
 extern void dcCallbackThunkEntry();
 
@@ -39,23 +40,22 @@
 };
 
 
-void dcbInitCallback(DCCallback* pcb, const char* signature, DCCallbackHandler* handler, void* userdata)
+void dcbInitCallback2(DCCallback* pcb, const DCsigchar* signature, DCCallbackHandler* handler, void* userdata, DCaggr *const * aggrs)
 {
   pcb->handler  = handler;
   pcb->userdata = userdata;
 }
 
 
-DCCallback* dcbNewCallback(const char* signature, DCCallbackHandler* handler, void* userdata)
+DCCallback* dcbNewCallback2(const DCsigchar* signature, DCCallbackHandler* handler, void* userdata, DCaggr *const * aggrs)
 {
-  int err;
   DCCallback* pcb;
-  err = dcAllocWX(sizeof(DCCallback), (void**)&pcb);
+  int err = dcAllocWX(sizeof(DCCallback), (void**)&pcb);
   if(err)
     return NULL;
 
+  dcbInitCallback2(pcb, signature, handler, userdata, aggrs);
   dcbInitThunk(&pcb->thunk, dcCallbackThunkEntry);
-  dcbInitCallback(pcb, signature, handler, userdata);
 
   err = dcInitExecWX(pcb, sizeof(DCCallback));
   if(err) {
@@ -66,14 +66,3 @@
   return pcb;
 }
 
-
-void dcbFreeCallback(DCCallback* pcb)
-{
-  dcFreeWX(pcb, sizeof(DCCallback));
-}
-
-void* dcbGetUserData(DCCallback* pcb)
-{
-  return pcb->userdata;
-}
-
--- a/dyncallback/dyncall_callback_arm64.c	Sat Apr 16 15:00:58 2022 +0200
+++ b/dyncallback/dyncall_callback_arm64.c	Thu Apr 21 13:35:47 2022 +0200
@@ -6,7 +6,7 @@
  Description: Callback - Implementation for ARM64 / ARMv8 / AAPCS64
  License:
 
-   Copyright (c) 2015-2018 Daniel Adler <dadler@uni-goettingen.de>,
+   Copyright (c) 2015-2022 Daniel Adler <dadler@uni-goettingen.de>,
                            Tassilo Philipp <tphilipp@potion-studios.com>
 
    Permission to use, copy, modify, and distribute this software for any
@@ -28,6 +28,7 @@
 #include "dyncall_alloc_wx.h"
 #include "dyncall_thunk.h"
 
+
 /* Callback symbol. */
 extern void dcCallbackThunkEntry();
 
@@ -39,22 +40,22 @@
 };                              /* total   48 */ 
                                 /* aligned 48 */ 
 
-void dcbInitCallback(DCCallback* pcb, const char* signature, DCCallbackHandler* handler, void* userdata)
+void dcbInitCallback2(DCCallback* pcb, const DCsigchar* signature, DCCallbackHandler* handler, void* userdata, DCaggr *const * aggrs)
 {
   pcb->handler = handler;
   pcb->userdata = userdata;
 }
 
-DCCallback* dcbNewCallback(const char* signature, DCCallbackHandler* handler, void* userdata)
+
+DCCallback* dcbNewCallback2(const DCsigchar* signature, DCCallbackHandler* handler, void* userdata, DCaggr *const * aggrs)
 {
-  int err;
   DCCallback* pcb;
-  err = dcAllocWX(sizeof(DCCallback), (void**) &pcb);
+  int err = dcAllocWX(sizeof(DCCallback), (void**)&pcb);
   if(err)
     return NULL;
 
+  dcbInitCallback2(pcb, signature, handler, userdata, aggrs);
   dcbInitThunk(&pcb->thunk, dcCallbackThunkEntry);
-  dcbInitCallback(pcb, signature, handler, userdata);
 
   err = dcInitExecWX(pcb, sizeof(DCCallback));
   if(err) {
@@ -65,13 +66,3 @@
   return pcb;
 }
 
-void dcbFreeCallback(DCCallback* pcb)
-{
-  dcFreeWX(pcb, sizeof(DCCallback));
-}
-
-void* dcbGetUserData(DCCallback* pcb)
-{
-  return pcb->userdata;
-}
-
--- a/dyncallback/dyncall_callback_mips.c	Sat Apr 16 15:00:58 2022 +0200
+++ b/dyncallback/dyncall_callback_mips.c	Thu Apr 21 13:35:47 2022 +0200
@@ -6,7 +6,7 @@
  Description: Callback - Implementation Header for MIPS
  License:
 
-   Copyright (c) 2013-2018 Daniel Adler <dadler@uni-goettingen.de>,
+   Copyright (c) 2013-2022 Daniel Adler <dadler@uni-goettingen.de>,
                            Tassilo Philipp <tphilipp@potion-studios.com>
 
    Permission to use, copy, modify, and distribute this software for any
@@ -28,6 +28,7 @@
 #include "dyncall_alloc_wx.h"
 #include "dyncall_thunk.h"
 
+
 /* Callback symbol. */
 extern void dcCallbackThunkEntry();
 
@@ -40,23 +41,22 @@
 };
 
 
-void dcbInitCallback(DCCallback* pcb, const char* signature, DCCallbackHandler* handler, void* userdata)
+void dcbInitCallback2(DCCallback* pcb, const DCsigchar* signature, DCCallbackHandler* handler, void* userdata, DCaggr *const * aggrs)
 {
   pcb->handler  = handler;
   pcb->userdata = userdata;
 }
 
 
-DCCallback* dcbNewCallback(const char* signature, DCCallbackHandler* handler, void* userdata)
+DCCallback* dcbNewCallback2(const DCsigchar* signature, DCCallbackHandler* handler, void* userdata, DCaggr *const * aggrs)
 {
-  int err;
   DCCallback* pcb;
-  err = dcAllocWX(sizeof(DCCallback), (void**)&pcb);
+  int err = dcAllocWX(sizeof(DCCallback), (void**)&pcb);
   if(err)
     return NULL;
 
+  dcbInitCallback2(pcb, signature, handler, userdata, aggrs);
   dcbInitThunk(&pcb->thunk, dcCallbackThunkEntry);
-  dcbInitCallback(pcb, signature, handler, userdata);
 
   err = dcInitExecWX(pcb, sizeof(DCCallback));
   if(err) {
@@ -67,13 +67,3 @@
   return pcb;
 }
 
-
-void dcbFreeCallback(DCCallback* pcb)
-{
-  dcFreeWX(pcb, sizeof(DCCallback));
-}
-
-void* dcbGetUserData(DCCallback* pcb)
-{
-  return pcb->userdata;
-}
--- a/dyncallback/dyncall_callback_ppc32.c	Sat Apr 16 15:00:58 2022 +0200
+++ b/dyncallback/dyncall_callback_ppc32.c	Thu Apr 21 13:35:47 2022 +0200
@@ -6,7 +6,7 @@
  Description: Callback - Implementation Header for ppc32
  License:
 
-   Copyright (c) 2007-2018 Daniel Adler <dadler@uni-goettingen.de>,
+   Copyright (c) 2007-2022 Daniel Adler <dadler@uni-goettingen.de>,
                            Tassilo Philipp <tphilipp@potion-studios.com>
 
    Permission to use, copy, modify, and distribute this software for any
@@ -28,6 +28,7 @@
 #include "dyncall_alloc_wx.h"
 #include "dyncall_thunk.h"
 
+
 /* Callback symbol. */
 extern void dcCallbackThunkEntry();
 
@@ -40,24 +41,22 @@
 };                                  /*     total size 36 */                                  
 
 
-void dcbInitCallback(DCCallback* pcb, const char* signature, DCCallbackHandler* handler, void* userdata)
+void dcbInitCallback2(DCCallback* pcb, const DCsigchar* signature, DCCallbackHandler* handler, void* userdata, DCaggr *const * aggrs)
 {
-  const char* ptr;
-  char ch;
-
   pcb->handler  = handler;
   pcb->userdata = userdata;
 }
 
-DCCallback* dcbNewCallback(const char* signature, DCCallbackHandler* handler, void* userdata)
+
+DCCallback* dcbNewCallback2(const DCsigchar* signature, DCCallbackHandler* handler, void* userdata, DCaggr *const * aggrs)
 {
   DCCallback* pcb;
-  int err = dcAllocWX(sizeof(DCCallback), (void**) &pcb);
+  int err = dcAllocWX(sizeof(DCCallback), (void**)&pcb);
   if(err)
     return NULL;
 
+  dcbInitCallback2(pcb, signature, handler, userdata, aggrs);
   dcbInitThunk(&pcb->thunk, dcCallbackThunkEntry);
-  dcbInitCallback(pcb, signature, handler, userdata);
 
   err = dcInitExecWX(pcb, sizeof(DCCallback));
   if(err) {
@@ -68,12 +67,3 @@
   return pcb;
 }
 
-void dcbFreeCallback(DCCallback* pcb)
-{
-  dcFreeWX(pcb, sizeof(DCCallback));
-}
-
-void* dcbGetUserData(DCCallback* pcb)
-{
-  return pcb->userdata;
-}
--- a/dyncallback/dyncall_callback_ppc64.c	Sat Apr 16 15:00:58 2022 +0200
+++ b/dyncallback/dyncall_callback_ppc64.c	Thu Apr 21 13:35:47 2022 +0200
@@ -6,7 +6,8 @@
  Description: Callback - Implementation Header for ppc64
  License:
 
-   Copyright (c) 2014-2016 Masanori Mitsugi <mitsugi@linux.vnet.ibm.com>
+   Copyright (c) 2014-2016 Masanori Mitsugi <mitsugi@linux.vnet.ibm.com>,
+                      2022 Tassilo Philipp <tphilipp@potion-studios.com>
 
    Permission to use, copy, modify, and distribute this software for any
    purpose with or without fee is hereby granted, provided that the above
@@ -27,6 +28,7 @@
 #include "dyncall_alloc_wx.h"
 #include "dyncall_thunk.h"
 
+
 /* Callback symbol. */
 extern void dcCallbackThunkEntry();
 
@@ -39,24 +41,22 @@
 };
 
 
-void dcbInitCallback(DCCallback* pcb, const char* signature, DCCallbackHandler* handler, void* userdata)
+void dcbInitCallback2(DCCallback* pcb, const DCsigchar* signature, DCCallbackHandler* handler, void* userdata, DCaggr *const * aggrs)
 {
-  const char* ptr;
-  char ch;
-
   pcb->handler  = handler;
   pcb->userdata = userdata;
 }
 
-DCCallback* dcbNewCallback(const char* signature, DCCallbackHandler* handler, void* userdata)
+
+DCCallback* dcbNewCallback2(const DCsigchar* signature, DCCallbackHandler* handler, void* userdata, DCaggr *const * aggrs)
 {
   DCCallback* pcb;
-  int err = dcAllocWX(sizeof(DCCallback), (void**) &pcb);
+  int err = dcAllocWX(sizeof(DCCallback), (void**)&pcb);
   if(err)
     return NULL;
 
+  dcbInitCallback2(pcb, signature, handler, userdata, aggrs);
   dcbInitThunk(&pcb->thunk, dcCallbackThunkEntry);
-  dcbInitCallback(pcb, signature, handler, userdata);
 
   err = dcInitExecWX(pcb, sizeof(DCCallback));
   if(err) {
@@ -67,12 +67,3 @@
   return pcb;
 }
 
-void dcbFreeCallback(DCCallback* pcb)
-{
-  dcFreeWX(pcb, sizeof(DCCallback));
-}
-
-void* dcbGetUserData(DCCallback* pcb)
-{
-  return pcb->userdata;
-}
--- a/dyncallback/dyncall_callback_sparc32.c	Sat Apr 16 15:00:58 2022 +0200
+++ b/dyncallback/dyncall_callback_sparc32.c	Thu Apr 21 13:35:47 2022 +0200
@@ -6,7 +6,7 @@
  Description: Callback - Implementation for sparc32
  License:
 
-   Copyright (c) 2007-2018 Daniel Adler <dadler@uni-goettingen.de>,
+   Copyright (c) 2007-2022 Daniel Adler <dadler@uni-goettingen.de>,
                            Tassilo Philipp <tphilipp@potion-studios.com>
 
    Permission to use, copy, modify, and distribute this software for any
@@ -28,6 +28,7 @@
 #include "dyncall_alloc_wx.h"
 #include "dyncall_thunk.h"
 
+
 /* Callback symbol. */
 extern void dcCallbackThunkEntry();
 
@@ -39,21 +40,22 @@
 };
 
 
-void dcbInitCallback(DCCallback* pcb, const char* signature, DCCallbackHandler* handler, void* userdata)
+void dcbInitCallback2(DCCallback* pcb, const DCsigchar* signature, DCCallbackHandler* handler, void* userdata, DCaggr *const * aggrs)
 {
   pcb->handler  = handler;
   pcb->userdata = userdata;
 }
 
-DCCallback* dcbNewCallback(const char* signature, DCCallbackHandler* handler, void* userdata)
+
+DCCallback* dcbNewCallback2(const DCsigchar* signature, DCCallbackHandler* handler, void* userdata, DCaggr *const * aggrs)
 {
   DCCallback* pcb;
-  int err = dcAllocWX(sizeof(DCCallback), (void**) &pcb);
+  int err = dcAllocWX(sizeof(DCCallback), (void**)&pcb);
   if(err)
     return NULL;
 
+  dcbInitCallback2(pcb, signature, handler, userdata, aggrs);
   dcbInitThunk(&pcb->thunk, dcCallbackThunkEntry);
-  dcbInitCallback(pcb, signature, handler, userdata);
 
   err = dcInitExecWX(pcb, sizeof(DCCallback));
   if(err) {
@@ -64,13 +66,3 @@
   return pcb;
 }
 
-void dcbFreeCallback(DCCallback* pcb)
-{
-  dcFreeWX(pcb, sizeof(DCCallback));
-}
-
-void* dcbGetUserData(DCCallback* pcb)
-{
-  return pcb->userdata;
-}
-
--- a/dyncallback/dyncall_callback_sparc64.c	Sat Apr 16 15:00:58 2022 +0200
+++ b/dyncallback/dyncall_callback_sparc64.c	Thu Apr 21 13:35:47 2022 +0200
@@ -6,7 +6,7 @@
  Description: Callback - Implementation for sparc64
  License:
 
-   Copyright (c) 2007-2018 Daniel Adler <dadler@uni-goettingen.de>,
+   Copyright (c) 2007-2022 Daniel Adler <dadler@uni-goettingen.de>,
                            Tassilo Philipp <tphilipp@potion-studios.com>
 
    Permission to use, copy, modify, and distribute this software for any
@@ -28,6 +28,7 @@
 #include "dyncall_alloc_wx.h"
 #include "dyncall_thunk.h"
 
+
 /* Callback symbol. */
 extern void dcCallbackThunkEntry();
 
@@ -39,21 +40,22 @@
 };
 
 
-void dcbInitCallback(DCCallback* pcb, const char* signature, DCCallbackHandler* handler, void* userdata)
+void dcbInitCallback2(DCCallback* pcb, const DCsigchar* signature, DCCallbackHandler* handler, void* userdata, DCaggr *const * aggrs)
 {
   pcb->handler  = handler;
   pcb->userdata = userdata;
 }
 
-DCCallback* dcbNewCallback(const char* signature, DCCallbackHandler* handler, void* userdata)
+
+DCCallback* dcbNewCallback2(const DCsigchar* signature, DCCallbackHandler* handler, void* userdata, DCaggr *const * aggrs)
 {
   DCCallback* pcb;
   int err = dcAllocWX(sizeof(DCCallback), (void**) &pcb);
   if(err)
     return NULL;
 
+  dcbInitCallback2(pcb, signature, handler, userdata, aggrs);
   dcbInitThunk(&pcb->thunk, dcCallbackThunkEntry);
-  dcbInitCallback(pcb, signature, handler, userdata);
 
   err = dcInitExecWX(pcb, sizeof(DCCallback));
   if(err) {
@@ -64,13 +66,3 @@
   return pcb;
 }
 
-void dcbFreeCallback(DCCallback* pcb)
-{
-  dcFreeWX(pcb, sizeof(DCCallback));
-}
-
-void* dcbGetUserData(DCCallback* pcb)
-{
-  return pcb->userdata;
-}
-
--- a/dyncallback/dyncall_callback_x64.S	Sat Apr 16 15:00:58 2022 +0200
+++ b/dyncallback/dyncall_callback_x64.S	Thu Apr 21 13:35:47 2022 +0200
@@ -6,7 +6,8 @@
  Description: Callback Thunk entry for x64 (portasm version)
  License:
 
-   Copyright (c) 2011-2018 Daniel Adler <dadler@uni-goettingen.de>
+   Copyright (c) 2011-2022 Daniel Adler <dadler@uni-goettingen.de>,
+                           Tassilo Philipp <tphilipp@potion-studios.com>
 
    Permission to use, copy, modify, and distribute this software for any
    purpose with or without fee is hereby granted, provided that the above
@@ -29,27 +30,27 @@
 /* structure sizes */
 
 SET(DCThunk_size,24)
-SET(DCArgs_size_win64,80)
-SET(DCArgs_size_sysv,128)
+SET(DCArgs_size_win64,96) /* 8 (stack_ptr) + 4 (reg_count) + 4 (pad_w) + 4 (aggr_return_register) + 4 (pad) + 8*4 (int regs) + 8 (aggrs) + 8*4 (float regs) */
+SET(DCArgs_size_sysv,144) /* 8 (stack_ptr) + 8 (reg_count) +             4 (aggr_return_register) + 4 (pad) + 8*6 (int regs) + 8 (aggrs) + 8*8 (float regs) */
 SET(DCValue_size,8)
+SET(DCRetRegs_SystemV_size,32)
 
-/* frame local variable offsets relative to %rbp*/
+/* frame local variable offsets relative to %rbp */
 
 SET(FRAME_arg0_win64,48)
 SET(FRAME_arg0_sysv,16)
 SET(FRAME_return,8)
 SET(FRAME_parent,0)
-SET(FRAME_DCArgs_sysv,-128)
-SET(FRAME_DCValue_sysv,-136)
-SET(FRAME_DCArgs_win64,-80)
-SET(FRAME_DCValue_win64,-80)
 
-/* struct DCCallback */
+/* struct DCCallback layout, relative to ptr passed to functions below via RAX */
 
 SET(CTX_thunk,0)
 SET(CTX_handler,24)
 SET(CTX_userdata,32)
-SET(DCCallback_size,40)
+SET(CTX_aggr_ret_reg,40)
+SET(CTX_pad,44)
+SET(CTX_aggrs_pp,48)
+SET(DCCallback_size,56)
 
 
 
@@ -61,54 +62,71 @@
 
 	/* initialize DCArgs */
 
-	/* float parameters (8 registers spill to DCArgs) */
-
+	/* float parameters (8 registers spill to DCArgs.reg_data) */
 	SUB(LIT(8*8),RSP)
-
-	MOVSD(XMM7, QWORD(RSP,8*7))		/* struct offset 120: float parameter 7 */
-	MOVSD(XMM6, QWORD(RSP,8*6))		/* struct offset 112: float parameter 6 */
-	MOVSD(XMM5, QWORD(RSP,8*5))		/* struct offset 104: float parameter 5 */
-	MOVSD(XMM4, QWORD(RSP,8*4))		/* struct offset  96: float parameter 4 */
-	MOVSD(XMM3, QWORD(RSP,8*3))		/* struct offset  88: float parameter 3 */
-	MOVSD(XMM2, QWORD(RSP,8*2))		/* struct offset  80: float parameter 2 */
-	MOVSD(XMM1, QWORD(RSP,8*1))		/* struct offset  72: float parameter 1 */
-	MOVSD(XMM0, QWORD(RSP,8*0))		/* struct offset  64: float parameter 0 */
+	MOVSD(XMM7, QWORD(RSP,8*7))		/* DCArgs offset 136: float parameter 7 */
+	MOVSD(XMM6, QWORD(RSP,8*6))		/* DCArgs offset 128: float parameter 6 */
+	MOVSD(XMM5, QWORD(RSP,8*5))		/* DCArgs offset 120: float parameter 5 */
+	MOVSD(XMM4, QWORD(RSP,8*4))		/* DCArgs offset 112: float parameter 4 */
+	MOVSD(XMM3, QWORD(RSP,8*3))		/* DCArgs offset 104: float parameter 3 */
+	MOVSD(XMM2, QWORD(RSP,8*2))		/* DCArgs offset  96: float parameter 2 */
+	MOVSD(XMM1, QWORD(RSP,8*1))		/* DCArgs offset  88: float parameter 1 */
+	MOVSD(XMM0, QWORD(RSP,8*0))		/* DCArgs offset  80: float parameter 0 */
 
-	/* integer parameters (6 registers spill to DCArgs) */
+	/* integer parameters (6 registers spill to DCArgs.reg_data) */
+	PUSH(R9)				/* DCArgs offset 72: parameter 5 */
+	PUSH(R8)				/* DCArgs offset 64: parameter 4 */
+	PUSH(RCX)				/* DCArgs offset 56: parameter 3 */
+	PUSH(RDX)				/* DCArgs offset 48: parameter 2 */
+	PUSH(RSI)				/* DCArgs offset 40: parameter 1 */
+	PUSH(RDI)				/* DCArgs offset 32: parameter 0 */
 
-	PUSH(R9)				/* struct offset 56: parameter 5 */
-	PUSH(R8)				/* struct offset 48: parameter 4 */
-	PUSH(RCX)				/* struct offset 40: parameter 3 */
-	PUSH(RDX)				/* struct offset 32: parameter 2 */
-	PUSH(RSI)				/* struct offset 24: parameter 1 */
-	PUSH(RDI)				/* struct offset 16: parameter 0 */
-	
+	MOV(QWORD(RAX, CTX_aggrs_pp), R8)
+	PUSH(R8)				/* DCArgs offset 24: **aggrs */
+
+	/* get val of aggr_return_register from DCCallback struct into r8's LSBs, clear */
+	/* MSBs, write DCarg's pad and  aggr_return_register at once */
+	MOVL(DWORD(RAX, CTX_aggr_ret_reg), R8D)	/* implicitly zeroes the high bits of R8 */
+	PUSH(R8)				/* DCArgs offset 16: pad=0, aggr_return_register=DCCallback.aggr_return_register */
 	/* register counts for integer/pointer and float regs */
+	PUSH(LIT(0))				/* DCArgs offset 12: fcount */
+						/* DCArgs offset  8: icount */
 
-	PUSH(LIT(0))				/* struct offset 12: fcount */
-						/* struct offset  8: icount */
-
-	LEA(QWORD(RBP,FRAME_arg0_sysv),RDX)		/* struct offset  0: stack pointer */
+	LEA(QWORD(RBP,FRAME_arg0_sysv),RDX)	/* DCArgs offset  0: *stack_ptr */
 	PUSH(RDX)
 
 	MOV(RSP,RSI)				/* arg 1 RSI : DCArgs* */
 
-	/* initialize DCValue */
+	/* stack space for DCValue or DCRetRegs_SysV (passed to handler as DCValue*) and padding */
+	SUB(LIT(4*8),RSP)			/* 4 qwords for DCRetRegs_SysV */
 
-	PUSH(LIT(0))				/* struct offset 0: return value (max long long) */
-
-	/* call handler( *ctx, *args, *value, *userdata) */
-
+	/* call handler(*ctx, *args, *value, *userdata) - stack must be 16b aligned, here */
 	MOV(RAX,RDI)				/* arg 0 RDI : DCCallback* (RAX) */
 	MOV(QWORD(RDI,CTX_userdata),RCX)	/* arg 3 RCX : userdata* */
 	MOV(RSP,RDX)				/* arg 2 RDX : DCValue*  */
-	PUSH(LIT(0))				/* align to 16 bytes */
+
 	CALL_REG(QWORD(RAX,CTX_handler))
 
-	/* pass return type via registers, handle ints and floats */
-	
-	MOV(QWORD(RBP,FRAME_DCValue_sysv),RAX)
-	MOVD(RAX,XMM0)
+	/* get info about return type, use to select how to store reg-based retval */
+	CMPL(LIT(-2/*see C*/), DWORD(RSP, 48)) /* rsp+48 = where r8 (aggr_return_register) was pushed */
+
+	/* if retval is small aggregate via regs */
+	JE(scalar_retval)
+
+		MOV(QWORD(RSP,0),RAX)
+		MOV(QWORD(RSP,8),RDX)
+		MOVSD(QWORD(RSP,16),XMM0) /* @@@AGGR needed to be put in xmm in this case? @@@ also not what doc/appendix says, actually */
+		MOVSD(QWORD(RSP,24),XMM1) /* @@@AGGR needed to be put in xmm in this case? @@@ also not what doc/appendix says, actually */
+
+	/* else (retval is int, float, or ptr to aggregate) */
+	JMP(epilog)
+CSYM(scalar_retval):
+
+		/* pass return type via registers, handle ints and floats */
+		MOV(QWORD(RSP,0),RAX)
+		MOVD(RAX,XMM0)
+
+CSYM(epilog):
 
 	MOV(RBP,RSP)
 	POP(RBP)
@@ -116,55 +134,63 @@
 
 END_PROC(dcCallback_x64_sysv)
 
-GLOBAL(dcCallback_x64_win64)
-BEGIN_PROC(dcCallback_x64_win64)
+
+GLOBAL_FRAME(dcCallback_x64_win64)
+FRAME_BEGIN_PROC(dcCallback_x64_win64)
 
 	PUSH(RBP)
+	FRAME_PUSH_REG(RBP)
 	MOV(RSP,RBP)
+	FRAME_SET(0, RBP)
+	FRAME_ENDPROLOG()
 
 	/* initialize DCArgs */
 
-	/* float parameters (4 registers spill to DCArgs) */
-
+	/* float parameters (4 registers spill to DCArgs.reg_data) */
 	SUB(LIT(4*8),RSP)
+	MOVSD(XMM3, QWORD(RSP,8*3))		/* DCArgs offset  88: float parameter 3 */
+	MOVSD(XMM2, QWORD(RSP,8*2))		/* DCArgs offset  80: float parameter 2 */
+	MOVSD(XMM1, QWORD(RSP,8*1))		/* DCArgs offset  72: float parameter 1 */
+	MOVSD(XMM0, QWORD(RSP,8*0))		/* DCArgs offset  64: float parameter 0 */
 
-	MOVSD(XMM3, QWORD(RSP,8*3))		/* struct offset  72: float parameter 3 */
-	MOVSD(XMM2, QWORD(RSP,8*2))		/* struct offset  64: float parameter 2 */
-	MOVSD(XMM1, QWORD(RSP,8*1))		/* struct offset  56: float parameter 1 */
-	MOVSD(XMM0, QWORD(RSP,8*0))		/* struct offset  48: float parameter 0 */
-
-	/* integer parameters (4 registers spill to DCArgs) */
+	/* integer parameters (4 registers spill to DCArgs.reg_data) */
+	PUSH(R9)				/* DCArgs offset 56: parameter 3 */
+	PUSH(R8)				/* DCArgs offset 48: parameter 2 */
+	PUSH(RDX)				/* DCArgs offset 40: parameter 1 */
+	PUSH(RCX)				/* DCArgs offset 32: parameter 0 */
 
-	PUSH(R9)				/* struct offset 40: parameter 3 */
-	PUSH(R8)				/* struct offset 32: parameter 2 */
-	PUSH(RDX)				/* struct offset 24: parameter 1 */
-	PUSH(RCX)				/* struct offset 16: parameter 0 */
-	
+	MOV(QWORD(RAX, CTX_aggrs_pp), R8)
+	PUSH(R8)				/* DCArgs offset 24: **aggrs */
+
+	/* get val of aggr_return_register from DCCallback struct into r8's LSBs, clear */
+	/* MSBs, write DCarg's pad and  aggr_return_register at once */
+	MOVL(DWORD(RAX, CTX_aggr_ret_reg), R8D)	/* implicitly zeroes the high bits of R8 */
+	PUSH(R8)				/* DCArgs offset 16: pad=0, aggr_return_register=DCCallback.aggr_return_register */
+
 	/* register counts for integer/pointer and float regs */
+	PUSH(LIT(0))				/* DCArgs offset 12: pad_w */
+						/* DCArgs offset  8: reg_count */
 
-	PUSH(LIT(0))				/* struct offset 12: fcount */
-						/* struct offset  8: icount */
-
-	LEA(QWORD(RBP,FRAME_arg0_win64),RDX)		/* struct offset  0: stack pointer */
+	LEA(QWORD(RBP,FRAME_arg0_win64),RDX)	/* DCArgs offset  0: *stack_ptr */
 	PUSH(RDX)
 
 	MOV(RSP,RDX)				/* arg 1 RDX : DCArgs* */
-	
-	/* initialize DCValue */
 
-	// PUSHQ(LIT(0))				/* struct offset 0: return value (max long long) */
+	/* space for retval (also aligns stack to 16b) */
+	SUB(LIT(2*8),RSP)
 
-	/* call handler( *ctx, *args, *value, *userdata) */
-
+	/* call handler(*ctx, *args, *value, *userdata) - stack must be 16b aligned, here */
 	MOV(RAX,RCX)				/* arg 0 RCX : DCCallback* (RAX) */
-	MOV(QWORD(RAX,CTX_userdata),R9)	/* arg 3 R9  : userdata* */
+	MOV(QWORD(RAX,CTX_userdata),R9)		/* arg 3 R9  : userdata* */
 	MOV(RSP,R8)				/* arg 2 R8  : DCValue*  */
-	SUB(LIT(4*8),RSP)                      /* make room for spill area and call */
+
+	/* spill area */
+	SUB(LIT(4*8),RSP)			/* 4 qwords for spill area */
+
 	CALL_REG(QWORD(RAX,CTX_handler))
 
 	/* pass return type via registers, handle ints and floats */
-	
-	MOV(QWORD(RBP,FRAME_DCValue_win64),RAX)
+	MOV(QWORD(RSP,4*8),RAX)
 	MOVD(RAX,XMM0)
 
 	MOV(RBP,RSP)
@@ -173,5 +199,8 @@
 
 END_PROC(dcCallback_x64_win64)
 
+
 END_ASM
 
+/* vim: set ts=8: */
+
--- a/dyncallback/dyncall_callback_x64.c	Sat Apr 16 15:00:58 2022 +0200
+++ b/dyncallback/dyncall_callback_x64.c	Thu Apr 21 13:35:47 2022 +0200
@@ -6,7 +6,7 @@
  Description: Callback - Implementation for x64
  License:
 
-   Copyright (c) 2007-2018 Daniel Adler <dadler@uni-goettingen.de>,
+   Copyright (c) 2007-2022 Daniel Adler <dadler@uni-goettingen.de>,
                            Tassilo Philipp <tphilipp@potion-studios.com>
 
    Permission to use, copy, modify, and distribute this software for any
@@ -26,28 +26,61 @@
 
 #include "dyncall_callback.h"
 #include "dyncall_alloc_wx.h"
+#include "dyncall_aggregate.h"
 #include "dyncall_thunk.h"
 
+
 /* Callback symbol. */
 extern void dcCallback_x64_sysv();
 extern void dcCallback_x64_win64();
 
 struct DCCallback
 {
-  DCThunk  	         thunk;    // offset 0,  size 24
-  DCCallbackHandler* handler;  // offset 24
-  void*              userdata; // offset 32
+  DCThunk            thunk;                /* offset 0,  size 24 */
+  DCCallbackHandler* handler;              /* offset 24 */
+  void*              userdata;             /* offset 32 */
+  DCint              aggr_return_register; /* offset 40 */
+  DCint              pad;                  /* offset 44 */
+  DCaggr *const *    aggrs;                /* offset 48 */
 };
 
 
-void dcbInitCallback(DCCallback* pcb, const char* signature, DCCallbackHandler* handler, void* userdata)
+void dcbInitCallback2(DCCallback* pcb, const DCsigchar* signature, DCCallbackHandler* handler, void* userdata, DCaggr *const * aggrs)
 {
-  pcb->handler  = handler;
-  pcb->userdata = userdata;
+  const DCsigchar *ch = signature;
+  DCint num_aggrs = 0;
+
+  pcb->handler              = handler;
+  pcb->userdata             = userdata;
+  pcb->aggrs                = NULL;
+  pcb->aggr_return_register = -2; /* default, = no aggr as ret value */
+
+  while(*ch)
+    num_aggrs += (*(ch++) == DC_SIGCHAR_AGGREGATE);
+
+  if(num_aggrs)
+  {
+    pcb->aggrs = aggrs;
+  
+    if (ch != signature && *(ch - 1) == DC_SIGCHAR_AGGREGATE) {
+      const DCaggr *ag = pcb->aggrs[num_aggrs - 1];
+  
+#if defined(DC_UNIX)
+      if (!ag || (ag->sysv_classes[0] == SYSVC_MEMORY)) {
+#else
+      if (!ag || ag->size > 8) {
+#endif 
+        /* we need to "return" this aggr as a hidden pointer (first arg) */
+        pcb->aggr_return_register = 0;
+      } else {
+        pcb->aggr_return_register = -1; /* small aggr, returned in register */
+      }
+    }
+  }
 }
 
 
-DCCallback* dcbNewCallback(const char* signature, DCCallbackHandler* handler, void* userdata)
+DCCallback* dcbNewCallback2(const DCsigchar* signature, DCCallbackHandler* handler, void* userdata, DCaggr *const * aggrs)
 {
   int err;
   DCCallback* pcb;
@@ -55,12 +88,13 @@
   if(err)
     return NULL;
 
+  dcbInitCallback2(pcb, signature, handler, userdata, aggrs);
+
 #if defined (DC__OS_Win64)
   dcbInitThunk(&pcb->thunk, dcCallback_x64_win64); 
 #else
   dcbInitThunk(&pcb->thunk, dcCallback_x64_sysv); 
 #endif
-  dcbInitCallback(pcb, signature, handler, userdata);
 
   err = dcInitExecWX(pcb, sizeof(DCCallback));
   if(err) {
@@ -71,13 +105,3 @@
   return pcb;
 }
 
-
-void dcbFreeCallback(DCCallback* pcb)
-{
-  dcFreeWX(pcb, sizeof(DCCallback));
-}
-
-void* dcbGetUserData(DCCallback* pcb)
-{
-  return pcb->userdata;
-}
--- a/dyncallback/dyncall_callback_x64_masm.asm	Sat Apr 16 15:00:58 2022 +0200
+++ b/dyncallback/dyncall_callback_x64_masm.asm	Thu Apr 21 13:35:47 2022 +0200
@@ -1,21 +1,21 @@
 ; auto-generated by gen-masm.sh
 .CODE
 DCThunk_size = 24
-DCArgs_size_win64 = 80
-DCArgs_size_sysv = 128
+DCArgs_size_win64 = 96
+DCArgs_size_sysv = 144
 DCValue_size = 8
+DCRetRegs_SystemV_size = 32
 FRAME_arg0_win64 = 48
 FRAME_arg0_sysv = 16
 FRAME_return = 8
 FRAME_parent = 0
-FRAME_DCArgs_sysv = -128
-FRAME_DCValue_sysv = -136
-FRAME_DCArgs_win64 = -80
-FRAME_DCValue_win64 = -80
 CTX_thunk = 0
 CTX_handler = 24
 CTX_userdata = 32
-DCCallback_size = 40
+CTX_aggr_ret_reg = 40
+CTX_pad = 44
+CTX_aggrs_pp = 48
+DCCallback_size = 56
 dcCallback_x64_sysv PROC
 OPTION PROLOGUE:NONE, EPILOGUE:NONE
  push RBP
@@ -35,26 +35,41 @@
  push RDX
  push RSI
  push RDI
+ mov R8,qword ptr [RAX+CTX_aggrs_pp]
+ push R8
+ mov R8D,dword ptr [RAX+CTX_aggr_ret_reg]
+ push R8
  push 0
  lea RDX,qword ptr [RBP+FRAME_arg0_sysv]
  push RDX
  mov RSI,RSP
- push 0
+ sub RSP,4*8
  mov RDI,RAX
  mov RCX,qword ptr [RDI+CTX_userdata]
  mov RDX,RSP
- push 0
  call qword ptr [RAX+CTX_handler]
- mov RAX,qword ptr [RBP+FRAME_DCValue_sysv]
- movd XMM0,RAX
+ cmp dword ptr [RSP+48],-2
+ je scalar_retval
+  mov RAX,qword ptr [RSP+0]
+  mov RDX,qword ptr [RSP+8]
+  movsd XMM0,qword ptr [RSP+16]
+  movsd XMM1,qword ptr [RSP+24]
+ jmp epilog
+scalar_retval:
+  mov RAX,qword ptr [RSP+0]
+  movd XMM0,RAX
+epilog:
  mov RSP,RBP
  pop RBP
  ret
 dcCallback_x64_sysv ENDP
-dcCallback_x64_win64 PROC
-OPTION PROLOGUE:NONE, EPILOGUE:NONE
+dcCallback_x64_win64 PROC FRAME
+OPTION EPILOGUE:NONE
  push RBP
+ .pushreg RBP
  mov RBP,RSP
+ .setframe RBP, 0
+ .endprolog
  sub RSP,4*8
  movsd qword ptr [RSP+8*3],XMM3
  movsd qword ptr [RSP+8*2],XMM2
@@ -64,16 +79,21 @@
  push R8
  push RDX
  push RCX
+ mov R8,qword ptr [RAX+CTX_aggrs_pp]
+ push R8
+ mov R8D,dword ptr [RAX+CTX_aggr_ret_reg]
+ push R8
  push 0
  lea RDX,qword ptr [RBP+FRAME_arg0_win64]
  push RDX
  mov RDX,RSP
+ sub RSP,2*8
  mov RCX,RAX
  mov R9,qword ptr [RAX+CTX_userdata]
  mov R8,RSP
  sub RSP,4*8
  call qword ptr [RAX+CTX_handler]
- mov RAX,qword ptr [RBP+FRAME_DCValue_win64]
+ mov RAX,qword ptr [RSP+4*8]
  movd XMM0,RAX
  mov RSP,RBP
  pop RBP
--- a/dyncallback/dyncall_callback_x86.c	Sat Apr 16 15:00:58 2022 +0200
+++ b/dyncallback/dyncall_callback_x86.c	Thu Apr 21 13:35:47 2022 +0200
@@ -6,7 +6,7 @@
  Description: Callback - Implementation for x86
  License:
 
-   Copyright (c) 2007-2020 Daniel Adler <dadler@uni-goettingen.de>,
+   Copyright (c) 2007-2022 Daniel Adler <dadler@uni-goettingen.de>,
                            Tassilo Philipp <tphilipp@potion-studios.com>
 
    Permission to use, copy, modify, and distribute this software for any
@@ -29,6 +29,7 @@
 #include "dyncall_thunk.h"
 #include "dyncall_args_x86.h"
 
+
 /* Callback symbol. */
 extern void dcCallbackThunkEntry();
 
@@ -47,16 +48,16 @@
  * cdecl,stdcall,thiscall_ms,fastcall_ms,fastcall_gnu
  */
 
-static int dcbCleanupSize_x86_cdecl(const char* signature)
+static int dcbCleanupSize_x86_cdecl(const DCsigchar* signature)
 {
   return 0;
 }
 
-static int dcbCleanupSize_x86_std(const char* signature)
+static int dcbCleanupSize_x86_std(const DCsigchar* signature)
 {
-  const char* ptr = signature;
+  const DCsigchar* ptr = signature;
   int size = 0;
-  char ch;
+  DCsigchar ch;
   while( (ch = *ptr++) != DC_SIGCHAR_ENDARG ) {
     switch(ch) {
       case DC_SIGCHAR_BOOL:
@@ -83,11 +84,11 @@
   return size;
 }
 
-static int dcbCleanupSize_x86_this_ms(const char* signature)
+static int dcbCleanupSize_x86_this_ms(const DCsigchar* signature)
 {
-  const char* ptr = signature;
+  const DCsigchar* ptr = signature;
   int size = 0;
-  char ch;
+  DCsigchar ch;
   while( (ch = *ptr++) != DC_SIGCHAR_ENDARG )
   {
     switch(ch)
@@ -116,12 +117,12 @@
   return size;
 }
 
-static int dcbCleanupSize_x86_fast_ms(const char* signature)
+static int dcbCleanupSize_x86_fast_ms(const DCsigchar* signature)
 {
-  const char* ptr = signature;
+  const DCsigchar* ptr = signature;
   int size = 0;
   int regs = 0;
-  char ch;
+  DCsigchar ch;
   while( (ch = *ptr++) != DC_SIGCHAR_ENDARG )
   {
     switch(ch)
@@ -155,10 +156,10 @@
   return size;
 }
 
-static int dcbCleanupSize_x86_fast_gnu(const char* signature)
+static int dcbCleanupSize_x86_fast_gnu(const DCsigchar* signature)
 {
-  const char* ptr = signature;
-  char ch;
+  const DCsigchar* ptr = signature;
+  DCsigchar ch;
   int size = 0;
   int regs = 0;
   while( (ch = *ptr++) != DC_SIGCHAR_ENDARG ) {
@@ -183,9 +184,10 @@
   return size;
 }
 
-void dcbInitCallback(DCCallback* pcb, const char* signature, DCCallbackHandler* handler, void* userdata)
+
+void dcbInitCallback2(DCCallback* pcb, const DCsigchar* signature, DCCallbackHandler* handler, void* userdata, DCaggr *const * aggrs)
 {
-  const char* ptr;
+  const DCsigchar* ptr;
   int mode;
   pcb->handler = handler;
   pcb->userdata = userdata;
@@ -221,6 +223,9 @@
       pcb->args_vt = &dcArgsVT_fast_gnu;
       pcb->stack_cleanup = dcbCleanupSize_x86_fast_gnu(ptr);
       break;
+#if defined(DC_WINDOWS) && defined(DC__C_MSVC)
+	case DC_CALL_C_DEFAULT_THIS:
+#endif
     case DC_CALL_C_X86_WIN32_THIS_MS:
       pcb->args_vt = &dcArgsVT_this_ms;
       pcb->stack_cleanup = dcbCleanupSize_x86_this_ms(ptr);
@@ -248,19 +253,16 @@
 #endif
 }
 
-/*
- * callback constructor
- */
-DCCallback* dcbNewCallback(const char* signature, DCCallbackHandler* handler, void* userdata)
+
+DCCallback* dcbNewCallback2(const DCsigchar* signature, DCCallbackHandler* handler, void* userdata, DCaggr *const * aggrs)
 {
-  int err;
   DCCallback* pcb;
-  err = dcAllocWX(sizeof(DCCallback), (void**) &pcb);
+  int err = dcAllocWX(sizeof(DCCallback), (void**) &pcb);
   if(err)
     return NULL;
 
+  dcbInitCallback2(pcb, signature, handler, userdata, aggrs);
   dcbInitThunk(&pcb->thunk, dcCallbackThunkEntry);
-  dcbInitCallback(pcb, signature, handler, userdata);
 
   err = dcInitExecWX(pcb, sizeof(DCCallback));
   if(err) {
@@ -271,16 +273,3 @@
   return pcb;
 }
 
-/*
- * free
- */
-
-void dcbFreeCallback(DCCallback* pcb)
-{
-  dcFreeWX(pcb, sizeof(DCCallback));
-}
-
-void* dcbGetUserData(DCCallback* pcb)
-{
-  return pcb->userdata;
-}
--- a/dyncallback/dyncallback.3	Sat Apr 16 15:00:58 2022 +0200
+++ b/dyncallback/dyncallback.3	Thu Apr 21 13:35:47 2022 +0200
@@ -1,4 +1,4 @@
-.\" Copyright (c) 2007-2014 Daniel Adler <dadler AT uni-goettingen DOT de>, 
+.\" Copyright (c) 2007-2022 Daniel Adler <dadler AT uni-goettingen DOT de>, 
 .\"                         Tassilo Philipp <tphilipp AT potion-studios DOT com>
 .\" 
 .\" Permission to use, copy, modify, and distribute this software for any
@@ -21,70 +21,150 @@
 .Sh SYNOPSIS
 .In dyncall_callback.h
 .Ft DCCallback *
-.Fn dcbNewCallback "const char * signature" "DCCallbackHandler * funcptr" "void * userdata"
+.Fn dcbNewCallback "const DCsigchar * signature" "DCCallbackHandler * funcptr" "void * userdata"
+.Ft DCCallback *
+.Fn dcbNewCallback2 "const DCsigchar * signature" "DCCallbackHandler * funcptr" "void * userdata" "DCaggr *const * aggrs"
 .Ft void
-.Fn dcbInitCallback "DCCallback * pcb" "const char * signature" "DCCallbackHandler * funcptr" "void * userdata"
+.Fn dcbInitCallback "DCCallback * pcb" "const DCsigchar * signature" "DCCallbackHandler * funcptr" "void * userdata"
+.Ft void
+.Fn dcbInitCallback2 "DCCallback * pcb" "const DCsigchar * signature" "DCCallbackHandler * funcptr" "void * userdata" "DCaggr *const * aggrs"
 .Ft void
 .Fn dcbFreeCallback "DCCallback * pcb"
 .Ft void
 .Fn dcbGetUserData "DCCallback * pcb"
+.Ft DCbool
+.Fn dcbArgBool "DCArgs * p"
+.Ft DCchar
+.Fn dcbArgChar "DCArgs * p"
+.Ft DCshort
+.Fn dcbArgShort "DCArgs * p"
+.Ft DCint
+.Fn dcbArgInt "DCArgs * p"
+.Ft DClong
+.Fn dcbArgLong "DCArgs * p"
+.Ft DClonglong
+.Fn dcbArgLongLong "DCArgs * p"
+.Ft DCuchar
+.Fn dcbArgUChar "DCArgs * p"
+.Ft DCushort
+.Fn dcbArgUShort "DCArgs * p"
+.Ft DCuint
+.Fn dcbArgUInt "DCArgs * p"
+.Ft DCulong
+.Fn dcbArgULong "DCArgs * p"
+.Ft DCulonglong
+.Fn dcbArgULongLong "DCArgs * p"
+.Ft DCfloat
+.Fn dcbArgFloat "DCArgs * p"
+.Ft DCdouble
+.Fn dcbArgDouble "DCArgs * p"
+.Ft DCpointer
+.Fn dcbArgPointer "DCArgs * p"
+.Ft void
+.Fn dcbArgAggr "DCArgs * p" "DCpointer target"
+.Ft void
+.Fn dcbReturnAggr "DCArgs * args" "DCValue * result" "DCpointer ret"
 .Sh DESCRIPTION
 The
 .Nm
 dyncall library has an interface to create callback objects, that can be passed
-to functions as callback arguments. In other words, a pointer to the callback
-object can be "called", directly. The callback handler then allows iterating
-dynamically over the arguments once called back.
+to functions as callback function pointers. In other words, a pointer to the
+callback object can be "called", directly. A generic callback handler invoked
+by this object then allows iterating dynamically over the arguments once called
+back.
 .Pp
-.Fn dcbNewCallback
+.Fn dcbNewCallback2
 creates a new callback object, where
 .Ar signature
 is a signature string describing the function to be called back (see manual or
-dyncall_signature.h for format). This is needed for
-.Nm
-dyncallback to correctly prepare the arguments passed in by the function that
-calls the callback handler. Note that the handler doesn't return the value
-specified in the signature, directly, but a signature character, specifying the
-return value's type.
-The return value itself is stored where the handler's
-3rd parameter points to (see below).
+dyncall_signature.h for format), and
 .Ar funcptr
-is a pointer to the
-.Nm
-dyncallback callback handler (see below), and
+is a pointer to a generic callback handler (see below). The signature is needed
+in the generic callback handler to correctly retrieve the arguments provided by
+the caller of the callback. Note that the generic handler's function
+type/declaration is always the same for any callback.
 .Ar userdata
-a pointer to arbitrary user data you want to use in the callback handler.
-Use the returned pointer as callback argument in functions requiring a callback
-function pointer.
+is a pointer to arbitrary user data to be available in the generic callback
+handler. If the callback expects aggregates (struct, union) to be passed or
+returned by value, a pointer to an array of DCaggr* descriptions must be
+provided (exactly one per aggregate, in the same order as in the signature) via
+the
+.Ar aggrs
+parameter, otherwise pass NULL. This pointer must point to valid data during
+callback.
+.Pp
+.Fn dcbNewCallback
+is the same as
+.Fn dcbNewCallback2 ,
+with an implicit NULL passed via the
+.Ar aggrs
+parameter, meaning it can only be used for callbacks that do not use any
+aggregate by value.
+.Pp
+.Sy NOTE:
+C++ non-trivial aggregates (check with the std::is_trivial type trait) do not
+use any aggregate descriptions, so the respective pointers in the provided
+array must be NULL. See
+.Xr dyncall 3
+for more information on C++ non-trivial aggregates.
+.Pp
+Use the pointer returned by
+.Fn dcbNewCallback*
+as argument in functions requiring a callback function pointer.
 .Pp
 .Fn dcbInitCallback
-(re)initialize the callback object.
+and
+.Fn dcbInitCallback2
+(re)initializes the callback object. For a description of its parameters, see
+.Fn dcbNewCallback* .
 .Pp
 .Fn dcbFreeCallback
 destroys and frees the callback handler.
 .Pp
 .Fn dcbGetUserData
 returns a pointer to the userdata passed to the callback object on creation or
-initialization.
+(re)initialization.
 .Pp
-Declaration of a dyncallback handler (following function pointer definition in
-dyncallback/dyncall_callback.h):
+Declaration of a dyncallback handler (following function pointer declaration in
+dyncall_callback.h):
 .Bd -literal -offset indent
-char cbHandler(DCCallback* cb,
-               DCArgs*     args,
-               DCValue*    result,
-               void*       userdata);
+DCsigchar cbHandler(DCCallback* cb,
+                    DCArgs*     args,
+                    DCValue*    result,
+                    void*       userdata);
 .Ed
 .Pp
-.Ar cb is a pointer to the DCCallback object in use
-.Nm
-result is a pointer to a DCValue object in order to store the callback's
-return value (output, to be set by handler). Finally,
-.Ar userdata is a pointer to some user defined data that can be
-set when creating the callback object.
-The handler itself returns a signature character (see manual for format)
-specifying the data type used for
+.Ar cb
+is a pointer to the DCCallback object in use,
+.Ar args
+is to be used with the
+.Fn dcbArg*
+functions to iterate over the arguments passed to the callback, and
+.Ar result
+is a pointer to an object used to store the callback's return value (output, to
+be set by the handler). Finally,
+.Ar userdata
+is a pointer to some user defined data that can be set when creating or
+(re)initializing the callback object.
+The handler itself must return a signature character (see manual or
+dyncall_signature.h for format) specifying the data type of
 .Ar result .
+.Pp
+Retrieving aggregates from the generic handler's
+.Ar args
+argument can be done via
+.Fn dcbArgAggr ,
+where
+.Ar target
+must point to memory large enough for the aggregate to be copied to.
+.Pp
+To return an aggregate by value, a helper function
+.Fn dcbReturnAggr
+needs to be used in order to correctly place the aggregate pointed to by
+.Ar ret
+into
+.Ar result ,
+then let the generic handler return DC_SIGCHAR_AGGREGATE.
 .Sh EXAMPLE
 Let's say, we want to create a callback object and call it. For simplicity, this
 example will omit passing it as a function pointer to a function (e.g. compar
@@ -92,10 +172,10 @@
 our callback handler - the following handler illustrates how to access the passed-
 in arguments:
 .Bd -literal -offset indent
-char cbHandler(DCCallback* cb,
-               DCArgs*     args,
-               DCValue*    result,
-               void*       userdata)
+DCsigchar cbHandler(DCCallback* cb,
+                    DCArgs*     args,
+                    DCValue*    result,
+                    void*       userdata)
 {
   int* ud = (int*)userdata;
   int       arg1 = dcbArgInt     (args);
@@ -119,8 +199,11 @@
   short result = 0;
   int userdata = 1337;
   cb = dcbNewCallback("ifsdl)s", &cbHandler, &userdata);
+
+  /* call the callback object */
   result = ((short(*)(int, float, short, double, long long))cb)
     (123, 23.f, 3, 1.82, 9909ll);
+
   dcbFreeCallback(cb);
 .Ed
 .Sh CONFORMING TO
--- a/portasm/portasm-x64.S	Sat Apr 16 15:00:58 2022 +0200
+++ b/portasm/portasm-x64.S	Thu Apr 21 13:35:47 2022 +0200
@@ -6,7 +6,8 @@
  Description: Portable Assembler Macros for X64
  License:
 
-   Copyright (c) 2011-2018 Daniel Adler <dadler@uni-goettingen.de>
+   Copyright (c) 2007-2022 Daniel Adler <dadler@uni-goettingen.de>,
+                           Tassilo Philipp <tphilipp@potion-studios.com>
 
    Permission to use, copy, modify, and distribute this software for any
    purpose with or without fee is hereby granted, provided that the above
@@ -31,24 +32,32 @@
 .CODE
 #  define BEGIN_ASM
 #  define END_ASM END
-#  define GLOBAL(X) X PROC
-#  define BEGIN_PROC(X) 	OPTION PROLOGUE:NONE, EPILOGUE:NONE
+#  define CSYM(X) X
+#  define GLOBAL(X)       X PROC
+#  define GLOBAL_FRAME(X) X PROC FRAME
+#  define BEGIN_PROC(X)       OPTION PROLOGUE:NONE, EPILOGUE:NONE
+#  define FRAME_BEGIN_PROC(X) OPTION EPILOGUE:NONE
+#  define FRAME_SET(OFFSET, X)  .setframe X, OFFSET
+#  define FRAME_PUSH_REG(X)   .pushreg X
+#  define FRAME_ENDPROLOG()   .endprolog
 #  define END_PROC(X)   X ENDP
 #  define PUSH(R) push R
 #  define POP(R) pop R
 #  define MOV(S,D)  mov D,S
 #  define MOVB(S,D) mov D,S
 #  define MOVL(S,D) mov D,S
+#  define MOVZXQ(S,D) movzx D,S
 #  define ADDL(S,D) add D,S
 #  define ANDL(S,D) and D,S
 #  define SUBL(S,D) sub D,S
 #  define SHRL(S,D) shr D,S
-#  define MOVQ(S,D) movq D,S
+#  define SHLQ(S,D) shl D,S
 #  define ADD(S,D)  add D,S
 #  define AND(S,D)  and D,S
 #  define SUB(S,D)  sub D,S
 #  define SHR(S,D)  shr D,S
 #  define MOVD(S,D) movd D,S
+#  define XOR(S,D) xor D,S
 #  define RET() ret
 #  define CALL_DWORD(R,OFF) call DWORD(R,OFF)
 #  define REP(X) rep X
@@ -56,6 +65,7 @@
 #  define MOVSW movsw
 #  define MOVSD(S,D)  movsd D,S
 #  define MOVSDX(S,D) movsdx D,S
+#  define BYTE(R) byte ptr [R]
 #  define DWORD(R,OFF) dword ptr [R+OFF]
 #  define QWORD(R,OFF) qword ptr [R+OFF]
 #  define LIT(X) X
@@ -67,7 +77,8 @@
 #  define SET(K,V) K = V
 #  define JE(X) je X
 #  define JNE(X) jne X
-#  define CMP(A,B) cmp B,A
+#  define JMP(X) jmp X
+#  define CMPL(A,B) cmp B,A
 #  define LOCAL(X) X
 /* @@@ check if masm support wanted/needed */
 #  define SECTION_NOTE_NXSTACK
@@ -91,6 +102,7 @@
 #  define RBP %rbp
 #  define RSP %rsp
 #  define R8   %r8
+#  define R8D  %r8d
 #  define R9   %r9
 #  define R10  %r10
 #  define R11  %r11
@@ -114,30 +126,38 @@
 #  define CH   %ch
 #  define DL   %dl
 #  define DH   %dh
-#  define GLOBAL(X) .globl CSYM(X)
-#  define BEGIN_PROC(X)  CSYM(X):
+#  define GLOBAL(X)       .globl CSYM(X)
+#  define GLOBAL_FRAME(X) .globl CSYM(X)
+#  define BEGIN_PROC(X)       CSYM(X):
+#  define FRAME_BEGIN_PROC(X) CSYM(X):
+#  define FRAME_SET(OFFSET, X)
+#  define FRAME_PUSH_REG(X)
+#  define FRAME_ENDPROLOG()
 #  define END_PROC(X)
 #  define PUSH(R) pushq R
 #  define POP(R) popq R
 #  define MOV(S,D)  movq S,D
 #  define MOVB(S,D) movb S,D
 #  define MOVL(S,D) movl S,D
+#  define MOVZXQ(S,D) movzbq S,D
 #  define ADDL(S,D) addl S,D
 #  define ANDL(S,D) andl S,D
 #  define SUBL(S,D) subl S,D
 #  define SHRL(S,D) shrl S,D
-#  define MOVQ(S,D) movq S,D
+#  define SHLQ(S,D) shlq S,D
 #  define ADD(S,D) addq S,D
 #  define AND(S,D) andq S,D
 #  define SUB(S,D) subq S,D
 #  define SHR(S,D) shrq S,D
 #  define MOVD(S,D) movd S,D
+#  define XOR(S,D) xorq S,D
 #  define RET() ret
 #  define CALL_DWORD(R,OFF) call *DWORD(R,OFF)
 #  define REP(X) rep X
 #  define MOVSB movsb
 #  define MOVSW movsw
 #  define MOVSD(S,D) movsd S,D
+#  define BYTE(R)  (R)
 #  define DWORD(R,OFF) OFF(R)
 #  define QWORD(R,OFF) OFF(R)
 #  define LIT(X) $X
@@ -146,10 +166,10 @@
 #  define CALL(X) call X
 #  define CALL_REG(X) call *X
 #  define LEA(A,B) lea A,B
-#  define CMP(A,B) cmp A,B
-#  define CMPB(A,B) cmpb A,B
+#  define CMPL(A,B) cmpl A,B
 #  define JE(X)    je X
 #  define JNE(X)    jne X
+#  define JMP(X)    jmp X
 #  define FLDS(X)  flds X
 #  define FLDL(X)  fldl X
 /* not available on some platforms, e.g. old solaris, so use K=V syntax #  define SET(K,V) .set K,V */
--- a/test/callback_plain/CMakeLists.txt	Sat Apr 16 15:00:58 2022 +0200
+++ b/test/callback_plain/CMakeLists.txt	Thu Apr 21 13:35:47 2022 +0200
@@ -1,4 +1,4 @@
 #
 add_executable(callback_plain callback_plain.c)
-target_link_libraries(callback_plain dyncallback_s)
+target_link_libraries(callback_plain dyncallback_s dyncall_s)
 
--- a/test/callback_plain/Makefile.embedded	Sat Apr 16 15:00:58 2022 +0200
+++ b/test/callback_plain/Makefile.embedded	Thu Apr 21 13:35:47 2022 +0200
@@ -5,7 +5,7 @@
 BLDTOP   = ${SRCTOP}
 CFLAGS  += -I${SRCTOP}/dyncall
 LDFLAGS += -L${BLDDIR}/dyncallback
-LDLIBS  += -ldyncallback_s
+LDLIBS  += -ldyncallback_s -ldyncall_s
 
 all: ${APP}
 
--- a/test/callback_plain/Makefile.generic	Sat Apr 16 15:00:58 2022 +0200
+++ b/test/callback_plain/Makefile.generic	Thu Apr 21 13:35:47 2022 +0200
@@ -3,7 +3,7 @@
 SRCTOP  = ${VPATH}/../..
 BLDTOP  = ../..
 CFLAGS += -I${SRCTOP}/dyncall
-LDLIBS  += -L${BLDTOP}/dyncall -L${BLDTOP}/dyncallback -ldyncallback_s
+LDLIBS  += -L${BLDTOP}/dyncallback -ldyncallback_s -L${BLDTOP}/dyncall -ldyncall_s
 .PHONY: all clean install
 all: ${APP}
 ${APP}: ${OBJS} 
--- a/test/callback_plain/callback_plain.c	Sat Apr 16 15:00:58 2022 +0200
+++ b/test/callback_plain/callback_plain.c	Thu Apr 21 13:35:47 2022 +0200
@@ -6,7 +6,7 @@
  Description: 
  License:
 
-   Copyright (c) 2011-2021 Daniel Adler <dadler@uni-goettingen.de>,
+   Copyright (c) 2011-2022 Daniel Adler <dadler@uni-goettingen.de>,
                            Tassilo Philipp <tphilipp@potion-studios.com>
 
    Permission to use, copy, modify, and distribute this software for any
@@ -28,22 +28,23 @@
 #include "../common/platformInit.c" /* Impl. for functions only used in this translation unit */
 
 
-char cbHandler(DCCallback* cb, DCArgs* args, DCValue* result, void* userdata)
+char cbSimpleHandler(DCCallback* cb, DCArgs* args, DCValue* result, void* userdata)
 {
   int* ud = (int*)userdata;
-  int       arg1 = dcbArgInt     (args);
-  float     arg2 = dcbArgFloat   (args);
-  short     arg3 = dcbArgShort   (args);
-  double    arg4 = dcbArgDouble  (args);
-  long long arg5 = dcbArgLongLong(args);
+  int       arg1;
+  float     arg2;
+  short     arg3;
+  double    arg4;
+  long long arg5;
 
   printf("reached callback\n");
   printf("userdata (should be 1337): %d\n", *ud);
-  printf("1st argument (should be  123): %d\n", arg1);
-  printf("2nd argument (should be 23.f): %f\n", arg2);
-  printf("3rd argument (should be    3): %d\n", arg3);
-  printf("4th argument (should be 1.82): %f\n", arg4);
-  printf("5th argument (should be 9909): %lld\n", arg5);
+
+  arg1 = dcbArgInt     (args);  printf("1st argument (should be  123): %d\n",   arg1);
+  arg2 = dcbArgFloat   (args);  printf("2nd argument (should be 23.f): %f\n",   arg2);
+  arg3 = dcbArgShort   (args);  printf("3rd argument (should be    3): %d\n",   arg3);
+  arg4 = dcbArgDouble  (args);  printf("4th argument (should be 1.82): %f\n",   arg4);
+  arg5 = dcbArgLongLong(args);  printf("5th argument (should be 9909): %lld\n", arg5);
 
   if(*ud == 1337) *ud = 1;
   if(arg1 ==  123) ++*ud;
@@ -56,17 +57,13 @@
   return 's';
 }
 
-
-int main()
+int testSimpleCallback()
 {
   DCCallback* cb;
   short result = 0;
   int userdata = 1337;
 
-  dcTest_initPlatform();
-
-  cb = dcbNewCallback("ifsdl)s", &cbHandler, &userdata);
-  printf("about to callback (trampoline for 0x%tx at 0x%tx)...\n", &cbHandler, cb);
+  cb = dcbNewCallback("ifsdl)s", &cbSimpleHandler, &userdata);
 
   result = ((short(*)(int, float, short, double, long long))cb)(123, 23.f, 3, 1.82, 9909ull);
   dcbFreeCallback(cb);
@@ -74,7 +71,437 @@
   printf("successfully returned from callback\n");
   printf("return value (should be 1234): %d\n", result);
 
-  result = (userdata == 6) && (result == 1234);
+  return (userdata == 6) && (result == 1234);
+}
+
+
+#if defined(DC__Feature_AggrByVal)
+typedef struct {
+  float a;
+  float b;
+} Float_Float;
+
+typedef struct {
+  unsigned char a;
+  double b;
+} U8_Double;
+
+typedef struct {
+  unsigned long long a;
+  unsigned long long b;
+} U64_U64;
+
+typedef struct {
+  double a;
+  double b;
+} Double_Double;
+
+typedef struct {
+  unsigned long long a;
+  unsigned long long b;
+  unsigned long long c;
+} Three_U64;
+
+typedef struct {
+  double a;
+  double b;
+  double c;
+} Three_Double;
+
+
+char cbAggrArgHandler(DCCallback* cb, DCArgs* args, DCValue* result, void* userdata)
+{
+  int* ud = (int*)userdata;
+  int           arg1;
+  Float_Float   arg2;
+  U8_Double     arg3;
+  Three_Double  arg4;
+  double        arg5;
+
+  printf("reached callback\n");
+  printf("userdata: %d\n", *ud);
+
+  arg1 = dcbArgInt(args);                printf("1st argument: %d\n", arg1);
+  dcbArgAggr(args, (DCpointer)&arg2);    printf("2nd argument: %f %f\n", arg2.a, arg2.b);
+  dcbArgAggr(args, (DCpointer)&arg3);    printf("3nd argument: %d %f\n", arg3.a, arg3.b);
+  dcbArgAggr(args, (DCpointer)&arg4);    printf("4rd argument: %f %f %f\n", arg4.a, arg4.b, arg4.c);
+  arg5 = dcbArgDouble(args);             printf("5th argument: %f\n", arg5);
+
+  result->d = *ud + arg1 + arg2.a + arg2.b + arg3.a + arg3.b + arg4.a + arg4.b + arg4.c + arg5;
+  return 'd';
+}
+
+
+int testAggrArgsCallback()
+{
+  DCCallback* cb;
+  DCaggr *float_float_aggr, *u8_double_aggr, *three_double_aggr, *aggrs[3];
+  Float_Float ff;
+  U8_Double u8d;
+  Three_Double threed;
+
+  int ret = 1;
+  double result = 0;
+  int userdata = 1337;
+
+  ff.a = 1.5;
+  ff.b = 5.5;
+  float_float_aggr = dcNewAggr(2, sizeof(ff));
+  dcAggrField(float_float_aggr, DC_SIGCHAR_FLOAT, offsetof(Float_Float, a), 1);
+  dcAggrField(float_float_aggr, DC_SIGCHAR_FLOAT, offsetof(Float_Float, b), 1);
+  dcCloseAggr(float_float_aggr);
+
+  u8d.a = 5;
+  u8d.b = 5.5;
+  u8_double_aggr = dcNewAggr(2, sizeof(u8d));
+  dcAggrField(u8_double_aggr, DC_SIGCHAR_UCHAR,  offsetof(U8_Double, a), 1);
+  dcAggrField(u8_double_aggr, DC_SIGCHAR_DOUBLE, offsetof(U8_Double, b), 1);
+  dcCloseAggr(u8_double_aggr);
+
+  threed.a = 1.5;
+  threed.b = 2.5;
+  threed.c = 3.5;
+  three_double_aggr = dcNewAggr(3, sizeof(threed));
+  dcAggrField(three_double_aggr, DC_SIGCHAR_DOUBLE, offsetof(Three_Double, a), 1);
+  dcAggrField(three_double_aggr, DC_SIGCHAR_DOUBLE, offsetof(Three_Double, b), 1);
+  dcAggrField(three_double_aggr, DC_SIGCHAR_DOUBLE, offsetof(Three_Double, c), 1);
+  dcCloseAggr(three_double_aggr);
+
+  aggrs[0] = float_float_aggr;
+  aggrs[1] = u8_double_aggr;
+  aggrs[2] = three_double_aggr;
+
+  cb = dcbNewCallback2("iAAAd)d", &cbAggrArgHandler, &userdata, aggrs);
+
+  result = ((double(*)(int, Float_Float, U8_Double, Three_Double, double))cb)(123, ff, u8d, threed, 4.5);
+  dcbFreeCallback(cb);
+  dcFreeAggr(float_float_aggr);
+  dcFreeAggr(u8_double_aggr);
+  dcFreeAggr(three_double_aggr);
+
+  printf("successfully returned from callback\n");
+  printf("return value (should be 1489.5): %f\n", result);
+
+  ret = result == 1489.5 && ret;
+
+  return ret;
+}
+
+char cbFloatFloatReturnHandler(DCCallback* cb, DCArgs* args, DCValue* result, void* userdata)
+{
+  int* ud = (int*)userdata;
+  float arg1, arg2;
+  Float_Float ret;
+
+  printf("reached callback\n");
+  printf("userdata: %d\n", *ud);
+
+  arg1 = dcbArgFloat(args);  printf("1st argument: %f\n", arg1);
+  arg2 = dcbArgFloat(args);  printf("2th argument: %f\n", arg2);
+
+  ret.a = *ud + arg1;
+  ret.b = arg2;
+
+  dcbReturnAggr(args, result, (DCpointer)&ret);
+
+  return 't';
+}
+
+char cbU8DoubleReturnHandler(DCCallback* cb, DCArgs* args, DCValue* result, void* userdata)
+{
+  int* ud = (int*)userdata;
+  int arg1;
+  double arg2;
+  U8_Double ret;
+
+  printf("reached callback\n");
+  printf("userdata: %d\n", *ud);
+
+  arg1 = dcbArgInt   (args);  printf("1st argument: %d\n", arg1);
+  arg2 = dcbArgDouble(args);  printf("2th argument: %f\n", arg2);
+
+  ret.a = *ud + arg1;
+  ret.b = arg2;
+
+  dcbReturnAggr(args, result, (DCpointer)&ret);
+
+  return 't';
+}
+
+char cbU64U64ReturnHandler(DCCallback* cb, DCArgs* args, DCValue* result, void* userdata)
+{
+  int* ud = (int*)userdata;
+  unsigned long long arg1, arg2;
+  U64_U64 ret;
+
+  printf("reached callback\n");
+  printf("userdata: %d\n", *ud);
+
+  arg1 = dcbArgULongLong(args);  printf("1st argument: %lld\n", arg1);
+  arg2 = dcbArgULongLong(args);  printf("2th argument: %lld\n", arg2);
+
+  ret.a = *ud + arg1;
+  ret.b = arg2;
+
+  dcbReturnAggr(args, result, (DCpointer)&ret);
+
+  return 't';
+}
+
+char cbDoubleDoubleReturnHandler(DCCallback* cb, DCArgs* args, DCValue* result, void* userdata)
+{
+  int* ud = (int*)userdata;
+  double arg1, arg2;
+  Double_Double ret;
+
+  printf("reached callback\n");
+  printf("userdata: %d\n", *ud);
+
+  arg1 = dcbArgDouble(args);  printf("1st argument: %f\n", arg1);
+  arg2 = dcbArgDouble(args);  printf("2th argument: %f\n", arg2);
+
+  ret.a = *ud + arg1;
+  ret.b = arg2;
+
+  dcbReturnAggr(args, result, (DCpointer)&ret);
+
+  return 't';
+}
+
+char cbThreeU64ReturnHandler(DCCallback* cb, DCArgs* args, DCValue* result, void* userdata)
+{
+  int* ud = (int*)userdata;
+  unsigned long long arg1, arg2, arg3;
+  Three_U64 ret;
+
+  printf("reached callback\n");
+  printf("userdata: %d\n", *ud);
+
+  arg1 = dcbArgULongLong(args);  printf("1st argument: %lld\n", arg1);
+  arg2 = dcbArgULongLong(args);  printf("2th argument: %lld\n", arg2);
+  arg3 = dcbArgULongLong(args);  printf("3th argument: %lld\n", arg3);
+
+  ret.a = *ud + arg1;
+  ret.b = arg2;
+  ret.c = arg3;
+
+  dcbReturnAggr(args, result, (DCpointer)&ret);
+
+  return 't';
+}
+
+char cbThreeDoubleReturnHandler(DCCallback* cb, DCArgs* args, DCValue* result, void* userdata)
+{
+  int* ud = (int*)userdata;
+  double arg1, arg2, arg3;
+  Three_Double ret;
+
+  printf("reached callback\n");
+  printf("userdata: %d\n", *ud);
+
+  arg1 = dcbArgDouble(args);   printf("1st argument: %f\n", arg1);
+  arg2 = dcbArgDouble(args);   printf("2th argument: %f\n", arg2);
+  arg3 = dcbArgDouble(args);   printf("3th argument: %f\n", arg3);
+
+  ret.a = *ud + arg1;
+  ret.b = arg2;
+  ret.c = arg3;
+
+  dcbReturnAggr(args, result, (DCpointer)&ret);
+
+  return 't';
+}
+
+int testAggrReturnCallback()
+{
+  int ret = 1;
+
+  {
+    DCCallback* cb;
+    DCaggr *s;
+    DCaggr *aggrs[1];
+    int userdata = 10;
+    Float_Float expected, result;
+
+    expected.a = 11.5;
+    expected.b = 2.5;
+
+    s = dcNewAggr(2, sizeof(expected));
+    dcAggrField(s, DC_SIGCHAR_FLOAT, offsetof(Float_Float, a), 1);
+    dcAggrField(s, DC_SIGCHAR_FLOAT, offsetof(Float_Float, b), 1);
+    dcCloseAggr(s);
+
+    aggrs[0] = s;
+
+    cb = dcbNewCallback2("ff)A", &cbFloatFloatReturnHandler, &userdata, aggrs);
+
+    result = ((Float_Float(*)(float, float))cb)(1.5, 2.5);
+    dcbFreeCallback(cb);
+    dcFreeAggr(s);
+
+    printf("successfully returned from callback\n");
+    printf("return value (should be %f %f): %f %f\n", expected.a, expected.b, result.a, result.b);
+
+    ret = result.a == expected.a && result.b == expected.b && ret;
+  }
+  {
+    DCCallback* cb;
+    DCaggr *s;
+    DCaggr *aggrs[1];
+    int userdata = 10;
+    U8_Double expected, result;
+
+    expected.a = 15;
+    expected.b = 5.5;
+
+    s = dcNewAggr(2, sizeof(expected));
+    dcAggrField(s, DC_SIGCHAR_UCHAR,  offsetof(U8_Double, a), 1);
+    dcAggrField(s, DC_SIGCHAR_DOUBLE, offsetof(U8_Double, b), 1);
+    dcCloseAggr(s);
+
+    aggrs[0] = s;
+
+    cb = dcbNewCallback2("id)A", &cbU8DoubleReturnHandler, &userdata, aggrs);
+
+    result = ((U8_Double(*)(int, double))cb)(5, 5.5);
+    dcbFreeCallback(cb);
+    dcFreeAggr(s);
+
+    printf("successfully returned from callback\n");
+    printf("return value (should be %d %f): %d %f\n", (int)expected.a, expected.b, (int)result.a, result.b);
+
+    ret = result.a == expected.a && result.b == expected.b && ret;
+  }
+  {
+    DCCallback* cb;
+    DCaggr *s;
+    DCaggr *aggrs[1];
+    int userdata = 10;
+    U64_U64 expected, result;
+
+    expected.a = 35;
+    expected.b = 26;
+    s = dcNewAggr(2, sizeof(expected));
+    dcAggrField(s, DC_SIGCHAR_ULONGLONG, offsetof(U64_U64, a), 1);
+    dcAggrField(s, DC_SIGCHAR_ULONGLONG, offsetof(U64_U64, b), 1);
+    dcCloseAggr(s);
+
+    aggrs[0] = s;
+
+    cb = dcbNewCallback2("LL)A", &cbU64U64ReturnHandler, &userdata, aggrs);
+
+    result = ((U64_U64(*)(unsigned long long, unsigned long long))cb)(25, 26);
+    dcbFreeCallback(cb);
+    dcFreeAggr(s);
+
+    printf("successfully returned from callback\n");
+    printf("return value (should be %lld %lld): %lld %lld\n", expected.a, expected.b, result.a, result.b);
+
+    ret = result.a == expected.a && result.b == expected.b && ret;
+  }
+  {
+    DCCallback* cb;
+    DCaggr *s;
+    DCaggr *aggrs[1];
+    int userdata = 10;
+    Double_Double expected, result;
+
+    expected.a = 11.5;
+    expected.b = 2.5;
+    s = dcNewAggr(2, sizeof(expected));
+    dcAggrField(s, DC_SIGCHAR_DOUBLE, offsetof(Double_Double, a), 1);
+    dcAggrField(s, DC_SIGCHAR_DOUBLE, offsetof(Double_Double, b), 1);
+    dcCloseAggr(s);
+
+    aggrs[0] = s;
+
+    cb = dcbNewCallback2("dd)A", &cbDoubleDoubleReturnHandler, &userdata, aggrs);
+
+    result = ((Double_Double(*)(double, double))cb)(1.5, 2.5);
+    dcbFreeCallback(cb);
+    dcFreeAggr(s);
+
+    printf("successfully returned from callback\n");
+    printf("return value (should be %f %f): %f %f\n", expected.a, expected.b, result.a, result.b);
+
+    ret = result.a == expected.a && result.b == expected.b && ret;
+  }
+  {
+    DCCallback* cb;
+    DCaggr *s;
+    DCaggr *aggrs[1];
+    int userdata = 10;
+    Three_U64 expected, result;
+
+    expected.a = 11;
+    expected.b = 2;
+    expected.c = 3;
+    s = dcNewAggr(3, sizeof(expected));
+    dcAggrField(s, DC_SIGCHAR_ULONGLONG, offsetof(Three_U64, a), 1);
+    dcAggrField(s, DC_SIGCHAR_ULONGLONG, offsetof(Three_U64, b), 1);
+    dcAggrField(s, DC_SIGCHAR_ULONGLONG, offsetof(Three_U64, c), 1);
+    dcCloseAggr(s);
+
+    aggrs[0] = s;
+
+    cb = dcbNewCallback2("LLL)A", &cbThreeU64ReturnHandler, &userdata, aggrs);
+
+    result = ((Three_U64(*)(unsigned long long, unsigned long long, unsigned long long))cb)(1, 2, 3);
+    dcbFreeCallback(cb);
+    dcFreeAggr(s);
+
+    printf("successfully returned from callback\n");
+    printf("return value (should be %lld %lld %lld): %lld %lld %lld\n", expected.a, expected.b, expected.c, result.a, result.b, result.c);
+
+    ret = result.a == expected.a && result.b == expected.b && result.c == expected.c && ret;
+  }
+  {
+    DCCallback* cb;
+    DCaggr *s;
+    DCaggr *aggrs[1];
+    int userdata = 10;
+    Three_Double expected, result;
+
+    expected.a = 11.5;
+    expected.b = 2.5;
+    expected.c = 3.5;
+    s = dcNewAggr(3, sizeof(expected));
+    dcAggrField(s, DC_SIGCHAR_DOUBLE, offsetof(Three_Double, a), 1);
+    dcAggrField(s, DC_SIGCHAR_DOUBLE, offsetof(Three_Double, b), 1);
+    dcAggrField(s, DC_SIGCHAR_DOUBLE, offsetof(Three_Double, c), 1);
+    dcCloseAggr(s);
+
+    aggrs[0] = s;
+
+    cb = dcbNewCallback2("ddd)A", &cbThreeDoubleReturnHandler, &userdata, aggrs);
+
+    result = ((Three_Double(*)(double, double, double))cb)(1.5, 2.5, 3.5);
+    dcbFreeCallback(cb);
+    dcFreeAggr(s);
+
+    printf("successfully returned from callback\n");
+    printf("return value (should be %f %f %f): %f %f %f\n", expected.a, expected.b, expected.c, result.a, result.b, result.c);
+
+    ret = result.a == expected.a && result.b == expected.b && result.c == expected.c && ret;
+  }
+
+  return ret;
+}
+#endif
+
+
+int main()
+{
+  int result = 1;
+
+  dcTest_initPlatform();
+
+  result = testSimpleCallback() && result;
+#if defined(DC__Feature_AggrByVal)
+  result = testAggrArgsCallback() && result;
+  result = testAggrReturnCallback() && result;
+#endif
+
   printf("result: callback_plain: %d\n", result);
 
   dcTest_deInitPlatform();
--- a/test/callback_plain/mkfile	Sat Apr 16 15:00:58 2022 +0200
+++ b/test/callback_plain/mkfile	Thu Apr 21 13:35:47 2022 +0200
@@ -24,7 +24,7 @@
 
 UNITS       = callback_plain
 APPLICATION = callback_plain
-LIBS        = $TOP/dyncallback/libdyncallback_s.a$O
+LIBS        = $TOP/dyncallback/libdyncallback_s.a$O $TOP/dyncall/libdyncall_s.a$O
 
 
 <$TOP/buildsys/mk/epilog.mk
--- a/test/callback_suite/CMakeLists.txt	Sat Apr 16 15:00:58 2022 +0200
+++ b/test/callback_suite/CMakeLists.txt	Thu Apr 21 13:35:47 2022 +0200
@@ -1,3 +1,3 @@
 add_executable(callback_suite globals.c cases.c main.c)
-target_link_libraries(callback_suite dyncall_s dyncallback_s)
+target_link_libraries(callback_suite dyncallback_s dyncall_s)
 
--- a/test/callback_suite/Makefile.embedded	Sat Apr 16 15:00:58 2022 +0200
+++ b/test/callback_suite/Makefile.embedded	Thu Apr 21 13:35:47 2022 +0200
@@ -5,7 +5,7 @@
 BLDTOP   = ${SRCTOP}
 CFLAGS  += -I${SRCTOP}/dyncall -I${SRCTOP}/dyncallback
 LDFLAGS += -L${BLDTOP}/dyncall -L${BLDTOP}/dyncallback
-LDLIBS  += -ldyncall_s -ldyncallback_s
+LDLIBS  += -ldyncallback_s -ldyncall_s
 
 all: ${APP}
 
--- a/test/callback_suite/Makefile.generic	Sat Apr 16 15:00:58 2022 +0200
+++ b/test/callback_suite/Makefile.generic	Thu Apr 21 13:35:47 2022 +0200
@@ -3,7 +3,7 @@
 SRCTOP   = ${VPATH}/../..
 BLDTOP   = ../..
 CFLAGS  += -I${SRCTOP}/dyncall -I${SRCTOP}/dyncallback
-LDLIBS  += -L${BLDTOP}/dyncall -ldyncall_s -L${BLDTOP}/dyncallback -ldyncallback_s
+LDLIBS  += -L${BLDTOP}/dyncallback -ldyncallback_s -L${BLDTOP}/dyncall -ldyncall_s
 LUA      = lua
 
 .PHONY: all clean install config
--- a/test/callback_suite/main.c	Sat Apr 16 15:00:58 2022 +0200
+++ b/test/callback_suite/main.c	Thu Apr 21 13:35:47 2022 +0200
@@ -156,7 +156,7 @@
   signature = G_sigtab[id];
   printf("%d:%s", id, signature);
 
-  pcb = dcbNewCallback(signature, handler, (void*)signature, NULL);
+  pcb = dcbNewCallback(signature, handler, (void*)signature);
   assert(pcb != NULL);
 
   clear_V();
--- a/test/callback_suite_aggrs/CMakeLists.txt	Sat Apr 16 15:00:58 2022 +0200
+++ b/test/callback_suite_aggrs/CMakeLists.txt	Thu Apr 21 13:35:47 2022 +0200
@@ -1,3 +1,3 @@
 add_executable(callback_suite_aggrs globals.c cases.c main.c)
-target_link_libraries(callback_suite_aggrs dyncall_s dyncallback_s)
+target_link_libraries(callback_suite_aggrs dyncallback_s dyncall_s)
 
--- a/test/callback_suite_aggrs/Makefile.embedded	Sat Apr 16 15:00:58 2022 +0200
+++ b/test/callback_suite_aggrs/Makefile.embedded	Thu Apr 21 13:35:47 2022 +0200
@@ -5,7 +5,7 @@
 BLDTOP   = ${SRCTOP}
 CFLAGS  += -I${SRCTOP}/dyncall -I${SRCTOP}/dyncallback
 LDFLAGS += -L${BLDTOP}/dyncall -L${BLDTOP}/dyncallback
-LDLIBS  += -ldyncall_s -ldyncallback_s
+LDLIBS  += -ldyncallback_s -ldyncall_s
 
 all: ${APP}
 
--- a/test/callback_suite_aggrs/Makefile.generic	Sat Apr 16 15:00:58 2022 +0200
+++ b/test/callback_suite_aggrs/Makefile.generic	Thu Apr 21 13:35:47 2022 +0200
@@ -3,7 +3,7 @@
 SRCTOP   = ${VPATH}/../..
 BLDTOP   = ../..
 CFLAGS  += -I${SRCTOP}/dyncall -I${SRCTOP}/dyncallback
-LDLIBS  += -L${BLDTOP}/dyncall -ldyncall_s -L${BLDTOP}/dyncallback -ldyncallback_s
+LDLIBS  += -L${BLDTOP}/dyncallback -ldyncallback_s -L${BLDTOP}/dyncall -ldyncall_s
 LUA      = lua
 
 .PHONY: all clean install config
--- a/test/callback_suite_aggrs/main.c	Sat Apr 16 15:00:58 2022 +0200
+++ b/test/callback_suite_aggrs/main.c	Thu Apr 21 13:35:47 2022 +0200
@@ -226,7 +226,7 @@
   dc_sig[len_sig] = '\0';
 
 
-  pcb = dcbNewCallback(dc_sig, handler, (void*)dc_sig, dc_aggrs);
+  pcb = dcbNewCallback2(dc_sig, handler, (void*)dc_sig, dc_aggrs);
   assert(pcb != NULL);
 
   clear_V();
--- a/test/callback_suite_aggrs/mkfile	Sat Apr 16 15:00:58 2022 +0200
+++ b/test/callback_suite_aggrs/mkfile	Thu Apr 21 13:35:47 2022 +0200
@@ -23,7 +23,7 @@
 
 UNITS       = globals cases main
 APPLICATION = callback_suite_aggrs
-LIBS        = $TOP/dyncallback/libdyncallback_s.a$O
+LIBS        = $TOP/dyncallback/libdyncallback_s.a$O $TOP/dyncall/libdyncall_s.a$O
 
 
 <$TOP/buildsys/mk/epilog.mk
--- a/test/callf/main.c	Sat Apr 16 15:00:58 2022 +0200
+++ b/test/callf/main.c	Thu Apr 21 13:35:47 2022 +0200
@@ -6,7 +6,7 @@
  Description:
  License:
 
-   Copyright (c) 2007-2021 Daniel Adler <dadler@uni-goettingen.de>,
+   Copyright (c) 2007-2022 Daniel Adler <dadler@uni-goettingen.de>,
                            Tassilo Philipp <tphilipp@potion-studios.com>
 
    Permission to use, copy, modify, and distribute this software for any
@@ -39,21 +39,21 @@
 
 /* sample void function */
 
-int vf_iii(int x,int y,int z)
+int i_iii(int x,int y,int z)
 {
   int r = (x == 1 && y == 2 && z == 3);
   printf("%d %d %d: %d", x, y, z, r);
   return r;
 }
 
-int vf_ffiffiffi(float a, float b, int c, float d, float e, int f, float g, float h, int i)
+int i_ffiffiffi(float a, float b, int c, float d, float e, int f, float g, float h, int i)
 {
   int r = (a == 1.f && b == 2.f && c == 3 && d == 4.f && e == 5.f && f == 6 && g == 7.f && h == 8.f && i == 9);
   printf("%f %f %d %f %f %d %f %f %d: %d", a, b, c, d, e, f, g, h, i, r);
   return r;
 }
 
-int vf_ffiV(float a, float b, int c, ...)
+int i_ffiV(float a, float b, int c, ...)
 {
   va_list ap;
   double d, e, g, h;
@@ -74,6 +74,32 @@
   return r;
 }
 
+
+#if defined(DC__Feature_AggrByVal)
+struct A { int i; char x[7]; long long dummy_too_big_for_regs[50]; }; /* returned via hidden ptr arg on x64/sysv */
+struct A A_cc(char a, char b)
+{
+  int i;
+  struct A r = { (int)a-(int)b, { 3, a|b } };
+  for(i=2; i<7; ++i)
+    r.x[i] = r.x[i-2]+r.x[i-1];
+  printf("%d %d: ", a, b);
+  return r;
+}
+
+struct B { int i; unsigned char x[7]; }; /* returned via regs on x64/sysv */
+struct B A_CC(unsigned char a, unsigned char b)
+{
+  int i;
+  struct B r = { (int)a-(int)b, { 3, a|b } };
+  for(i=2; i<7; ++i)
+    r.x[i] = r.x[i-2]+r.x[i-1];
+  printf("%d %d: ", a, b);
+  return r;
+}
+#endif
+
+
 /* main */
 
 int main(int argc, char* argv[])
@@ -91,24 +117,24 @@
   /* calls using 'formatted' API */
   dcReset(vm);
   printf("callf iii)i:       ");
-  dcCallF(vm, &ret, (void*)&vf_iii, "iii)i", 1, 2, 3);
+  dcCallF(vm, &ret, (void*)&i_iii, "iii)i", 1, 2, 3);
   r = ret.i && r;
 
   dcReset(vm);
   printf("\ncallf ffiffiffi)i: ");
-  dcCallF(vm, &ret, (void*)&vf_ffiffiffi, "ffiffiffi)i", 1.f, 2.f, 3, 4.f, 5.f, 6, 7.f, 8.f, 9);
+  dcCallF(vm, &ret, (void*)&i_ffiffiffi, "ffiffiffi)i", 1.f, 2.f, 3, 4.f, 5.f, 6, 7.f, 8.f, 9);
   r = ret.i && r;
 
   /* same but with calling convention prefix */
   dcReset(vm);
   printf("\ncallf _:ffiffiffi)i: ");
-  dcCallF(vm, &ret, (void*)&vf_ffiffiffi, "_:ffiffiffi)i", 1.f, 2.f, 3, 4.f, 5.f, 6, 7.f, 8.f, 9);
+  dcCallF(vm, &ret, (void*)&i_ffiffiffi, "_:ffiffiffi)i", 1.f, 2.f, 3, 4.f, 5.f, 6, 7.f, 8.f, 9);
   r = ret.i && r;
 
   /* vararg call */
   dcReset(vm);
   printf("\ncallf _effi_.ddiddi)i: ");
-  dcCallF(vm, &ret, (void*)&vf_ffiV, "_effi_.ddiddi)i", 1.f, 2.f, 3, 4., 5., 6, 7., 8., 9);
+  dcCallF(vm, &ret, (void*)&i_ffiV, "_effi_.ddiddi)i", 1.f, 2.f, 3, 4., 5., 6, 7., 8., 9);
   r = ret.i && r;
 
   /* arg binding then call using 'formatted' API */
@@ -117,30 +143,75 @@
   dcMode(vm, DC_CALL_C_DEFAULT);
   printf("\nargf iii)i       then call: ");
   dcArgF(vm, "iii)i", 1, 2, 3);
-  r = r && dcCallInt(vm, (void*)&vf_iii);
+  r = r && dcCallInt(vm, (void*)&i_iii);
 
   dcReset(vm);
   printf("\nargf iii         then call: ");
   dcArgF(vm, "iii", 1, 2, 3);
-  r = r && dcCallInt(vm, (void*)&vf_iii);
+  r = r && dcCallInt(vm, (void*)&i_iii);
 
   dcReset(vm);
   printf("\nargf ffiffiffi)i then call: ");
   dcArgF(vm, "ffiffiffi)i", 1.f, 2.f, 3, 4.f, 5.f, 6, 7.f, 8.f, 9);
-  r = r && dcCallInt(vm, (void*)&vf_ffiffiffi);
+  r = r && dcCallInt(vm, (void*)&i_ffiffiffi);
 
   dcReset(vm);
   printf("\nargf ffiffiffi   then call: ");
   dcArgF(vm, "ffiffiffi", 1.f, 2.f, 3, 4.f, 5.f, 6, 7.f, 8.f, 9);
-  r = r && dcCallInt(vm, (void*)&vf_ffiffiffi);
+  r = r && dcCallInt(vm, (void*)&i_ffiffiffi);
 
-#if defined(DC_UNIX) && !defined(DC__OS_MacOSX) && !defined(DC__OS_SunOS) && !defined(DC__OS_BeOS)
+#if defined(DC__Feature_Syscall)
+#  if defined(DC_UNIX)
   /* testing syscall using calling convention prefix - not available on all platforms */
   dcReset(vm);
   printf("\ncallf _$iZi)i");
   fflush(NULL); /* needed before syscall write as it's immediate, or order might be incorrect */
   dcCallF(vm, &ret, (DCpointer)(ptrdiff_t)SYS_write, "_$iZi)i", 1/*stdout*/, " = syscall: 1", 13);
   r = ret.i == 13 && r;
+#  else
+/*@@@*/
+#  endif
+#endif
+
+#if defined(DC__Feature_AggrByVal)
+  /* aggregate return value test */
+  {
+    int r_;
+    struct A a;
+    DCaggr *s = dcNewAggr(1, sizeof(struct A));
+    dcAggrField(s, DC_SIGCHAR_INT,  offsetof(struct A, i), 1);
+    dcAggrField(s, DC_SIGCHAR_CHAR, offsetof(struct A, x), 7);
+    dcCloseAggr(s);
+  
+    dcReset(vm);
+    printf("\ncallf _:cc)A (A={ic[7]l[50]}): ");
+    dcCallF(vm, &ret, (void*)&A_cc, "_:cc)A", 3, 16, s, &a);
+    r_ = ret.p == &a && a.i == -13 && a.x[0] == 3 && a.x[1] == 19 && a.x[2] == 22 && a.x[3] == 41 && a.x[4] == 63 && a.x[5] == 104 && a.x[6] == -89;
+    printf("%d %d %d %d %d %d %d %d: %d", a.i, a.x[0], a.x[1], a.x[2], a.x[3], a.x[4], a.x[5], a.x[6], r_);
+
+    dcFreeAggr(s);
+
+    r = r_ && r;
+  }
+  /* aggregate return value test */
+  {
+    int r_;
+    struct B b;
+    DCaggr *s = dcNewAggr(1, sizeof(struct B));
+    dcAggrField(s, DC_SIGCHAR_INT,   offsetof(struct B, i), 1);
+    dcAggrField(s, DC_SIGCHAR_UCHAR, offsetof(struct B, x), 7);
+    dcCloseAggr(s);
+  
+    dcReset(vm);
+    printf("\ncallf _:cc)A (A={iC[7]}): ");
+    dcCallF(vm, &ret, (void*)&A_CC, "_:CC)A", 3, 16, s, &b);
+    r_ = ret.p == &b && b.i == -13 && b.x[0] == 3 && b.x[1] == 19 && b.x[2] == 22 && b.x[3] == 41 && b.x[4] == 63 && b.x[5] == 104 && b.x[6] == 167;
+    printf("%d %d %d %d %d %d %d %d: %d", b.i, b.x[0], b.x[1], b.x[2], b.x[3], b.x[4], b.x[5], b.x[6], r_);
+
+    dcFreeAggr(s);
+
+    r = r_ && r;
+  }
 #endif
 
   /* free vm */
--- a/test/malloc_wx/Nmakefile	Sat Apr 16 15:00:58 2022 +0200
+++ b/test/malloc_wx/Nmakefile	Thu Apr 21 13:35:47 2022 +0200
@@ -37,7 +37,7 @@
 
 $(TARGETS): $(OBJS)
 	echo Linking $@ ...
-	$(LD) /OUT:"$@" $(LDFLAGS) $(OBJS) $(TOP)\dyncall\dyncall_s.lib $(TOP)\dyncallback\dyncallback_s.lib
+	$(LD) /OUT:"$@" $(LDFLAGS) $(OBJS) $(TOP)\dyncallback\dyncallback_s.lib
 
 
 !ELSE IF "$(BUILD_OS)" == "nds"
@@ -47,7 +47,7 @@
 
 $(TARGETS): $(OBJS)
 	echo Linking $@ ...
-	$(LD) $(LDFLAGS) $(OBJS) $(DEVKITPRO_PATH)\libnds\lib\libnds9.a $(TOP)/dyncall/libdyncall_s.a $(TOP)\dyncallback\libdyncallback_s.a -o "$(@B).elf"
+	$(LD) $(LDFLAGS) $(OBJS) $(DEVKITPRO_PATH)\libnds\lib\libnds9.a $(TOP)\dyncallback\libdyncallback_s.a -o "$(@B).elf"
 	$(OCP) -O binary "$(@B).elf" "$(@B).arm9"
 	ndstool -c "$@" -9 "$(@B).arm9"
 	del "$(@B).elf" "$(@B).arm9"
--- a/test/plain/CMakeLists.txt	Sat Apr 16 15:00:58 2022 +0200
+++ b/test/plain/CMakeLists.txt	Thu Apr 21 13:35:47 2022 +0200
@@ -1,3 +1,3 @@
-add_executable(plain test_main.c test_structs.c)
+add_executable(plain test_main.c test_aggrs.c)
 target_link_libraries(plain dyncall_s)
 
--- a/test/plain/Makefile.embedded	Sat Apr 16 15:00:58 2022 +0200
+++ b/test/plain/Makefile.embedded	Thu Apr 21 13:35:47 2022 +0200
@@ -1,5 +1,5 @@
 APP = plain
-OBJS = test_main.o test_structs.o
+OBJS = test_main.o test_aggrs.o
 
 TOP      = ../..
 CFLAGS  += -I${TOP}/dyncall
--- a/test/plain/Makefile.generic	Sat Apr 16 15:00:58 2022 +0200
+++ b/test/plain/Makefile.generic	Thu Apr 21 13:35:47 2022 +0200
@@ -1,5 +1,5 @@
 APP     = plain
-OBJS    = test_main.o test_structs.o
+OBJS    = test_main.o test_aggrs.o
 SRCTOP  = ${VPATH}/../..
 BLDTOP  = ../..
 CFLAGS += -I${SRCTOP}/dyncall
--- a/test/plain/Nmakefile	Sat Apr 16 15:00:58 2022 +0200
+++ b/test/plain/Nmakefile	Thu Apr 21 13:35:47 2022 +0200
@@ -1,7 +1,7 @@
 #//////////////////////////////////////////////////////////////////////////////
 #
-# Copyright (c) 2007,2009 Daniel Adler <dadler@uni-goettingen.de>, 
-#                         Tassilo Philipp <tphilipp@potion-studios.com>
+# Copyright (c) 2007,2009-2022 Daniel Adler <dadler@uni-goettingen.de>, 
+#                              Tassilo Philipp <tphilipp@potion-studios.com>
 #
 # Permission to use, copy, modify, and distribute this software for any
 # purpose with or without fee is hereby granted, provided that the above
@@ -33,7 +33,7 @@
 !IF "$(BUILD_OS)" == "windows"
 
 TARGETS = plain.exe
-OBJS = test_main.obj test_structs.obj
+OBJS = test_main.obj test_aggrs.obj
 
 $(TARGETS): $(OBJS)
 	echo Linking $@ ...
@@ -43,10 +43,10 @@
 !ELSE IF "$(BUILD_OS)" == "nds"
 
 TARGETS = plain.nds
-OBJS = test_main.o test_structs.o
+OBJS = test_main.o test_aggrs.o
 
 $(TARGETS):# $(OBJS)
-	echo Not building: There is no struct support on this platform @@@ or not taken into account.
+	echo Not building: There is no struct support on this platform @@@AGGRS or not taken into account.
 #	echo Linking $@ ...
 #	$(LD) $(LDFLAGS) $(OBJS) $(DEVKITPRO_PATH)\libnds\lib\libnds9.a $(TOP)/dyncall/libdyncall_s.a -o "$(@B).elf"
 #	$(OCP) -O binary "$(@B).elf" "$(@B).arm9"
--- a/test/plain/mkfile	Sat Apr 16 15:00:58 2022 +0200
+++ b/test/plain/mkfile	Thu Apr 21 13:35:47 2022 +0200
@@ -1,7 +1,7 @@
 #//////////////////////////////////////////////////////////////////////////////
 #
-# Copyright (c) 2010 Daniel Adler <dadler@uni-goettingen.de>, 
-#                    Tassilo Philipp <tphilipp@potion-studios.com>
+# Copyright (c) 2010-2022 Daniel Adler <dadler@uni-goettingen.de>, 
+#                         Tassilo Philipp <tphilipp@potion-studios.com>
 #
 # Permission to use, copy, modify, and distribute this software for any
 # purpose with or without fee is hereby granted, provided that the above
@@ -22,7 +22,7 @@
 <$TOP/buildsys/mk/prolog.mk
 
 
-UNITS       =   test_main test_structs
+UNITS       =   test_main test_aggrs
 APPLICATION =   plain
 LIBS        =   $TOP/dyncall/libdyncall_s.a$O
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/plain/test_aggrs.c	Thu Apr 21 13:35:47 2022 +0200
@@ -0,0 +1,516 @@
+/*
+
+ Package: dyncall
+ Library: test
+ File: test/plain/test_aggrs.c
+ Description:
+ License:
+
+   Copyright (c) 2022 Tassilo Philipp <tphilipp@potion-studios.com>
+
+   Permission to use, copy, modify, and distribute this software for any
+   purpose with or without fee is hereby granted, provided that the above
+   copyright notice and this permission notice appear in all copies.
+
+   THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+   WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+   MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+   ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+*/
+
+
+
+
+#include "../../dyncall/dyncall.h"
+#include "../../dyncall/dyncall_signature.h"
+#include "../../dyncall/dyncall_aggregate.h"
+#include <stdio.h>
+
+
+#if defined(DC__Feature_AggrByVal)
+
+#if !defined(DC__OS_Win32)
+#  define __cdecl
+#endif
+
+typedef struct {
+	unsigned char a;
+} U8;
+
+typedef struct {
+	unsigned char a;
+	double b;
+} U8_Double;
+
+typedef struct {
+	float a;
+	float b;
+} Float_Float;
+
+typedef struct {
+	double a;
+	unsigned char b;
+} Double_U8;
+
+typedef struct {
+	float f;
+} NestedFloat;
+
+typedef struct {
+	int a;
+	NestedFloat b;
+} Int_NestedFloat;
+
+typedef struct {
+	double f;
+} NestedDouble;
+
+typedef struct {
+	int a;
+	NestedDouble b;
+} Int_NestedDouble;
+
+typedef struct {
+	double a;
+	double b;
+	double c;
+} Three_Double;
+
+typedef struct {
+	int       a;
+	long long b;
+} Int_LongLong;
+
+/* large struct: more than 8 int/ptr and 8 fp args, more than are passed by reg for both win and sysv for example */
+typedef struct {
+	double       a;
+	double       b;
+	double       c;
+	long long    d;
+	char         e;
+	char         f;
+	double       g;
+	double       h;
+	double       i;
+	float        j;
+	int          k;
+	float        l;
+	double       m;
+	short        n;
+	long         o;
+	int          p;
+	unsigned int q;
+	long long    r;
+} More_Than_Regs;
+
+
+static U8               __cdecl fun_return_u8(unsigned char a)                        { U8               r;  r.a = a;                       return r; }
+static U8_Double        __cdecl fun_return_u8_double(unsigned char a, double b)       { U8_Double        r;  r.a = a;  r.b   = b;           return r; }
+static Double_U8        __cdecl fun_return_double_u8(double a, unsigned char b)       { Double_U8        r;  r.a = a;  r.b   = b;           return r; }
+static Int_NestedFloat  __cdecl fun_return_int_nested_float(int a, float b)           { Int_NestedFloat  r;  r.a = a;  r.b.f = b;           return r; }
+static Int_NestedDouble __cdecl fun_return_int_nested_double(int a, double b)         { Int_NestedDouble r;  r.a = a;  r.b.f = b;           return r; }
+static Three_Double     __cdecl fun_return_three_double(double a, double b, double c) { Three_Double     r;  r.a = a;  r.b   = b;  r.c = c; return r; }
+
+
+int testAggrReturns()
+{
+	int ret = 1;
+
+	DCCallVM* vm = dcNewCallVM(4096);
+	dcMode(vm,DC_CALL_C_DEFAULT);
+	{
+		U8 expected = fun_return_u8(5), returned = { 124 };
+
+		DCaggr *s = dcNewAggr(1, sizeof(expected));
+
+		dcAggrField(s, DC_SIGCHAR_UCHAR, offsetof(U8, a), 1);
+		dcCloseAggr(s);
+
+		dcReset(vm);
+		dcBeginCallAggr(vm, s);
+		dcArgChar(vm, expected.a);
+
+		dcCallAggr(vm, (DCpointer) &fun_return_u8, s, &returned);
+
+		dcFreeAggr(s);
+
+		printf("r:{C}  (cdecl): %d\n", (returned.a == expected.a));
+		ret = returned.a == expected.a && ret;
+	}
+	{
+		U8_Double expected = fun_return_u8_double(5, 5.5), returned = { 6, 7.8 };
+
+		DCaggr *s = dcNewAggr(2, sizeof(expected));
+
+		dcAggrField(s, DC_SIGCHAR_UCHAR,  offsetof(U8_Double, a), 1);
+		dcAggrField(s, DC_SIGCHAR_DOUBLE, offsetof(U8_Double, b), 1);
+		dcCloseAggr(s);
+
+		dcReset(vm);
+		dcBeginCallAggr(vm, s);
+		dcArgChar(vm, expected.a);
+		dcArgDouble(vm, expected.b);
+
+		dcCallAggr(vm, (DCpointer) &fun_return_u8_double, s, &returned);
+
+		dcFreeAggr(s);
+
+		printf("r:{Cd}  (cdecl): %d\n", (returned.a == expected.a && returned.b == expected.b));
+		ret = returned.a == expected.a && returned.b == expected.b && ret;
+	}
+	{
+		Double_U8 expected = fun_return_double_u8(5.5, 42), returned = { 6.7, 8 };
+
+		DCaggr *s = dcNewAggr(2, sizeof(expected));
+
+		dcAggrField(s, DC_SIGCHAR_DOUBLE, offsetof(Double_U8, a), 1);
+		dcAggrField(s, DC_SIGCHAR_UCHAR,  offsetof(Double_U8, b), 1);
+		dcCloseAggr(s);
+
+		dcReset(vm);
+		dcBeginCallAggr(vm, s);
+		dcArgDouble(vm, expected.a);
+		dcArgChar(vm, expected.b);
+
+		dcCallAggr(vm, (DCpointer) &fun_return_double_u8, s, &returned);
+
+		dcFreeAggr(s);
+
+		printf("r:{dC}  (cdecl): %d\n", (returned.a == expected.a && returned.b == expected.b));
+		ret = returned.a == expected.a && returned.b == expected.b && ret;
+	}
+	{
+		Int_NestedFloat expected = fun_return_int_nested_float(24, 2.5f), returned = { 25, { 3.5f } };
+		DCaggr *s, *s_;
+
+		s_ = dcNewAggr(1, sizeof(NestedFloat));
+		dcAggrField(s_, DC_SIGCHAR_FLOAT, offsetof(NestedFloat, f), 1);
+		dcCloseAggr(s_);
+
+		s = dcNewAggr(2, sizeof(expected));
+		dcAggrField(s, DC_SIGCHAR_INT, offsetof(Int_NestedFloat, a), 1);
+		dcAggrField(s, DC_SIGCHAR_AGGREGATE, offsetof(Int_NestedFloat, b), 1, s_);
+		dcCloseAggr(s);
+
+		dcReset(vm);
+		dcBeginCallAggr(vm, s);
+		dcArgInt(vm, expected.a);
+		dcArgFloat(vm, expected.b.f);
+
+		dcCallAggr(vm, (DCpointer) &fun_return_int_nested_float, s, &returned);
+
+		dcFreeAggr(s_);
+		dcFreeAggr(s);
+
+		printf("r:{i{f}}  (cdecl): %d\n", (returned.a == expected.a && returned.b.f == expected.b.f));
+		ret = returned.a == expected.a && returned.b.f == expected.b.f && ret;
+	}
+	{
+		Int_NestedDouble expected = fun_return_int_nested_double(24, 2.5), returned = { 25, { 3.5f } };
+		DCaggr *s, *s_;
+
+		s_ = dcNewAggr(1, sizeof(NestedDouble));
+		dcAggrField(s_, DC_SIGCHAR_DOUBLE, offsetof(NestedDouble, f), 1);
+		dcCloseAggr(s_);
+
+		s = dcNewAggr(2, sizeof(expected));
+		dcAggrField(s, DC_SIGCHAR_INT, offsetof(Int_NestedDouble, a), 1);
+		dcAggrField(s, DC_SIGCHAR_AGGREGATE, offsetof(Int_NestedDouble, b), 1, s_);
+		dcCloseAggr(s);
+
+		dcReset(vm);
+		dcBeginCallAggr(vm, s);
+		dcArgInt(vm, expected.a);
+		dcArgDouble(vm, expected.b.f);
+
+		dcCallAggr(vm, (DCpointer) &fun_return_int_nested_double, s, &returned);
+
+		dcFreeAggr(s_);
+		dcFreeAggr(s);
+
+		printf("r:{i{d}}  (cdecl): %d\n", (returned.a == expected.a && returned.b.f == expected.b.f));
+		ret = returned.a == expected.a && returned.b.f == expected.b.f && ret;
+	}
+	{
+		Three_Double expected = fun_return_three_double(1.5, 2.5, 3.5), returned = { 2.5, 3.5, 4.5 };
+
+		DCaggr *s = dcNewAggr(3, sizeof(expected));
+
+		dcAggrField(s, DC_SIGCHAR_DOUBLE, offsetof(Three_Double, a), 1);
+		dcAggrField(s, DC_SIGCHAR_DOUBLE, offsetof(Three_Double, b), 1);
+		dcAggrField(s, DC_SIGCHAR_DOUBLE, offsetof(Three_Double, c), 1);
+		dcCloseAggr(s);
+
+		dcReset(vm);
+		dcBeginCallAggr(vm, s);
+		dcArgDouble(vm, expected.a);
+		dcArgDouble(vm, expected.b);
+		dcArgDouble(vm, expected.c);
+
+		dcCallAggr(vm, (DCpointer) &fun_return_three_double, s, &returned);
+
+		dcFreeAggr(s);
+
+		printf("r:{ddd}  (cdecl): %d\n", (returned.a == expected.a && returned.b == expected.b && returned.c == expected.c));
+		ret = returned.a == expected.a && returned.b == expected.b && returned.c == expected.c && ret;
+	}
+
+	dcFree(vm);
+
+	return ret;
+}
+
+static double __cdecl fun_take_u8(U8 s)                                                                                { return s.a; }
+static double __cdecl fun_take_u8_double(U8_Double s)                                                                  { return s.a + s.b; }
+static double __cdecl fun_take_float_float(Float_Float s)                                                              { return s.a + s.b; }
+static double __cdecl fun_take_double_u8(Double_U8 s)                                                                  { return s.a + s.b; }
+static double __cdecl fun_take_int_nested_float(Int_NestedFloat s)                                                     { return s.a + s.b.f; }
+static double __cdecl fun_take_int_nested_double(Int_NestedDouble s)                                                   { return s.a + s.b.f; }
+static double __cdecl fun_take_three_double(Three_Double s)                                                            { return s.a + s.b + s.c; }
+static double __cdecl fun_take_mixed_fp(double a, float b, float c, int d, float e, double f, float g, Three_Double s) { return a + 2.*b + 3.*c + 4.*d + 5.*e + 6.*f + 7.*g + 8.*s.a + 9.*s.b + 10.*s.c; }
+static int    __cdecl fun_take_iiiii_il(int a, int b, int c, int d, int e, Int_LongLong f)                             { return a + b + c + d + e + f.a + (int)f.b; }
+static double __cdecl fun_take_more_than_regs(More_Than_Regs s)                                                        { return s.a + s.b + s.c + s.d + s.e + s.f + s.g + s.h + s.i + s.j + s.k + s.l + s.m + s.n + s.o + s.p + s.q + s.r; }
+
+
+int testAggrParameters()
+{
+	int ret = 1;
+
+	DCCallVM* vm = dcNewCallVM(4096);
+	dcMode(vm,DC_CALL_C_DEFAULT);
+	{
+		U8 t = { 5 };
+		double returned;
+
+		DCaggr *s = dcNewAggr(1, sizeof(t));
+		dcAggrField(s, DC_SIGCHAR_UCHAR, offsetof(U8, a), 1);
+		dcCloseAggr(s);
+
+		dcReset(vm);
+		dcArgAggr(vm, s, &t);
+		returned = dcCallDouble(vm, (DCpointer) &fun_take_u8);
+
+		dcFreeAggr(s);
+
+		printf("{C}  (cdecl): %d\n", returned == t.a);
+		ret = returned == t.a && ret;
+	}
+	{
+		U8_Double t = { 5, 5.5 };
+		double returned;
+
+		DCaggr *s = dcNewAggr(2, sizeof(t));
+		dcAggrField(s, DC_SIGCHAR_UCHAR,  offsetof(U8_Double, a), 1);
+		dcAggrField(s, DC_SIGCHAR_DOUBLE, offsetof(U8_Double, b), 1);
+		dcCloseAggr(s);
+
+		dcReset(vm);
+		dcArgAggr(vm, s, &t);
+		returned = dcCallDouble(vm, (DCpointer) &fun_take_u8_double);
+
+		dcFreeAggr(s);
+
+		printf("{Cd}  (cdecl): %d\n", returned == t.a + t.b);
+		ret = returned == t.a + t.b && ret;
+	}
+	{
+		Float_Float t = { 1.5, 5.5 };
+		double returned;
+
+		DCaggr *s = dcNewAggr(2, sizeof(t));
+		dcAggrField(s, DC_SIGCHAR_FLOAT, offsetof(Float_Float, a), 1);
+		dcAggrField(s, DC_SIGCHAR_FLOAT, offsetof(Float_Float, b), 1);
+		dcCloseAggr(s);
+
+		dcReset(vm);
+		dcArgAggr(vm, s, &t);
+		returned = dcCallDouble(vm, (DCpointer) &fun_take_float_float);
+
+		dcFreeAggr(s);
+
+		printf("{ff}  (cdecl): %d\n", returned == t.a + t.b);
+		ret = returned == t.a + t.b && ret;
+	}
+	{
+		Double_U8 t = { 5.5, 42 };
+		double returned;
+
+		DCaggr *s = dcNewAggr(2, sizeof(t));
+		dcAggrField(s, DC_SIGCHAR_DOUBLE, offsetof(Double_U8, a), 1);
+		dcAggrField(s, DC_SIGCHAR_UCHAR,  offsetof(Double_U8, b), 1);
+		dcCloseAggr(s);
+
+		dcReset(vm);
+		dcArgAggr(vm, s, &t);
+		returned = dcCallDouble(vm, (DCpointer) &fun_take_double_u8);
+
+		dcFreeAggr(s);
+
+		printf("{dC}  (cdecl): %d\n", returned == t.a + t.b);
+		ret = returned == t.a + t.b && ret;
+	}
+	{
+		Int_NestedFloat t = { 24, { 2.5f } };
+		double returned;
+		DCaggr *s, *s_;
+
+		s_ = dcNewAggr(1, sizeof(NestedFloat));
+		dcAggrField(s_, DC_SIGCHAR_FLOAT, offsetof(NestedFloat, f), 1);
+		dcCloseAggr(s_);
+
+		s = dcNewAggr(2, sizeof(t));
+		dcAggrField(s, DC_SIGCHAR_INT, offsetof(Int_NestedFloat, a), 1);
+		dcAggrField(s, DC_SIGCHAR_AGGREGATE, offsetof(Int_NestedFloat, b), 1, s_);
+		dcCloseAggr(s);
+
+		dcReset(vm);
+		dcArgAggr(vm, s, &t);
+		returned = dcCallDouble(vm, (DCpointer) &fun_take_int_nested_float);
+
+		dcFreeAggr(s_);
+		dcFreeAggr(s);
+
+		printf("{i{f}}  (cdecl): %d\n", returned == t.a + t.b.f);
+		ret = returned == t.a + t.b.f && ret;
+	}
+	{
+		Int_NestedDouble t = { 24, { 2.5} };
+		double returned;
+		DCaggr *s, *s_;
+
+		s_ = dcNewAggr(1, sizeof(NestedDouble));
+		dcAggrField(s_, DC_SIGCHAR_DOUBLE, offsetof(NestedDouble, f), 1);
+		dcCloseAggr(s_);
+
+		s = dcNewAggr(2, sizeof(t));
+		dcAggrField(s, DC_SIGCHAR_INT, offsetof(Int_NestedDouble, a), 1);
+		dcAggrField(s, DC_SIGCHAR_AGGREGATE, offsetof(Int_NestedDouble, b), 1, s_);
+		dcCloseAggr(s);
+
+		dcReset(vm);
+		dcArgAggr(vm, s, &t);
+		returned = dcCallDouble(vm, (DCpointer) &fun_take_int_nested_double);
+
+		dcFreeAggr(s_);
+		dcFreeAggr(s);
+
+		printf("{i{d}}  (cdecl): %d\n", returned == t.a + t.b.f);
+		ret = returned == t.a + t.b.f && ret;
+	}
+	{
+		Three_Double t = { 1.5, 2.5, 3.5 };
+		double returned;
+
+		DCaggr *s = dcNewAggr(3, sizeof(t));
+		dcAggrField(s, DC_SIGCHAR_DOUBLE, offsetof(Three_Double, a), 1);
+		dcAggrField(s, DC_SIGCHAR_DOUBLE, offsetof(Three_Double, b), 1);
+		dcAggrField(s, DC_SIGCHAR_DOUBLE, offsetof(Three_Double, c), 1);
+		dcCloseAggr(s);
+
+		dcReset(vm);
+		dcArgAggr(vm, s, &t);
+		returned = dcCallDouble(vm, (DCpointer) &fun_take_three_double);
+
+		dcFreeAggr(s);
+
+		printf("{fff}  (cdecl): %d\n", returned == t.a + t.b + t.c);
+		ret = returned == t.a + t.b + t.c && ret;
+	}
+	{
+		/* w/ some prev params, so not fitting into float regs anymore (on win and sysv) */
+		Three_Double t = { 1.5, 2.5, 3.5 };
+		double returned;
+
+		DCaggr *s = dcNewAggr(3, sizeof(t));
+		dcAggrField(s, DC_SIGCHAR_DOUBLE, offsetof(Three_Double, a), 1);
+		dcAggrField(s, DC_SIGCHAR_DOUBLE, offsetof(Three_Double, b), 1);
+		dcAggrField(s, DC_SIGCHAR_DOUBLE, offsetof(Three_Double, c), 1);
+		dcCloseAggr(s);
+
+		dcReset(vm);
+		dcArgDouble(vm, 234.4);
+		dcArgFloat(vm, 34.4f);
+		dcArgFloat(vm, 4.0f);
+		dcArgInt(vm, -12);
+		dcArgFloat(vm, -83.9f);
+		dcArgDouble(vm, -.9);
+		dcArgFloat(vm, .6f);
+		dcArgAggr(vm, s, &t);
+		returned = dcCallDouble(vm, (DCpointer) &fun_take_mixed_fp) + 84.;
+		if(returned < 0.)
+			returned = -returned;
+
+		dcFreeAggr(s);
+
+		printf("dffifdf{fff}  (cdecl): %d\n", returned < .00001);
+		ret = returned < .00001 && ret;
+	}
+	{
+		Int_LongLong t = { -17, 822LL };
+		int returned;
+
+		DCaggr *s = dcNewAggr(2, sizeof(t));
+		dcAggrField(s, DC_SIGCHAR_INT,      offsetof(Int_LongLong, a), 1);
+		dcAggrField(s, DC_SIGCHAR_LONGLONG, offsetof(Int_LongLong, b), 1);
+		dcCloseAggr(s);
+
+		dcReset(vm);
+		dcArgInt(vm, 23);
+		dcArgInt(vm, -211);
+		dcArgInt(vm, 111);
+		dcArgInt(vm, 34);
+		dcArgInt(vm, -19290);
+		dcArgAggr(vm, s, &t);
+		returned = dcCallInt(vm, (DCpointer) &fun_take_iiiii_il);
+
+		dcFreeAggr(s);
+
+		printf("iiiii{il}  (cdecl): %d\n", returned == -18528);
+		ret = returned == -18528 && ret;
+	}
+	{
+		More_Than_Regs t = { 1., 2., 3., 4, 5, 6, 7., 8., 9., 10.f, 11, 12.f, 13., 14, 15, 16, 17, 18 };
+		double returned;
+
+		DCaggr *s = dcNewAggr(18, sizeof(t));
+		dcAggrField(s, DC_SIGCHAR_DOUBLE,   offsetof(More_Than_Regs, a), 1);
+		dcAggrField(s, DC_SIGCHAR_DOUBLE,   offsetof(More_Than_Regs, b), 1);
+		dcAggrField(s, DC_SIGCHAR_DOUBLE,   offsetof(More_Than_Regs, c), 1);
+		dcAggrField(s, DC_SIGCHAR_LONGLONG, offsetof(More_Than_Regs, d), 1);
+		dcAggrField(s, DC_SIGCHAR_CHAR,     offsetof(More_Than_Regs, e), 1);
+		dcAggrField(s, DC_SIGCHAR_CHAR,     offsetof(More_Than_Regs, f), 1);
+		dcAggrField(s, DC_SIGCHAR_DOUBLE,   offsetof(More_Than_Regs, g), 1);
+		dcAggrField(s, DC_SIGCHAR_DOUBLE,   offsetof(More_Than_Regs, h), 1);
+		dcAggrField(s, DC_SIGCHAR_DOUBLE,   offsetof(More_Than_Regs, i), 1);
+		dcAggrField(s, DC_SIGCHAR_FLOAT,    offsetof(More_Than_Regs, j), 1);
+		dcAggrField(s, DC_SIGCHAR_INT,      offsetof(More_Than_Regs, k), 1);
+		dcAggrField(s, DC_SIGCHAR_FLOAT,    offsetof(More_Than_Regs, l), 1);
+		dcAggrField(s, DC_SIGCHAR_DOUBLE,   offsetof(More_Than_Regs, m), 1);
+		dcAggrField(s, DC_SIGCHAR_SHORT,    offsetof(More_Than_Regs, n), 1);
+		dcAggrField(s, DC_SIGCHAR_LONG,     offsetof(More_Than_Regs, o), 1);
+		dcAggrField(s, DC_SIGCHAR_INT,      offsetof(More_Than_Regs, p), 1);
+		dcAggrField(s, DC_SIGCHAR_UINT,     offsetof(More_Than_Regs, q), 1);
+		dcAggrField(s, DC_SIGCHAR_LONGLONG, offsetof(More_Than_Regs, r), 1);
+		dcCloseAggr(s);
+
+		dcReset(vm);
+		dcArgAggr(vm, s, &t);
+		returned = dcCallDouble(vm, (DCpointer) &fun_take_more_than_regs);
+
+		dcFreeAggr(s);
+
+		printf("{dddlccdddfifdsjiIl}  (cdecl): %d\n", returned == 171.);
+		ret = returned == 171. && ret;
+	}
+
+	dcFree(vm);
+
+	return ret;
+}
+
+#endif
+
--- a/test/plain/test_main.c	Sat Apr 16 15:00:58 2022 +0200
+++ b/test/plain/test_main.c	Thu Apr 21 13:35:47 2022 +0200
@@ -335,8 +335,10 @@
 #endif
 
 
-int testCallStructs();
-int testStructSizes();
+#if defined(DC__Feature_AggrByVal)
+int testAggrReturns();
+int testAggrParameters();
+#endif
 
 int main(int argc, char* argv[])
 {
@@ -344,8 +346,10 @@
   dcTest_initPlatform();
 
   r = testCallC() && r;
-  r = testStructSizes() && r;
-  /*r = testCallStructs() && r;*/
+#if defined(DC__Feature_AggrByVal)
+  r = testAggrReturns() && r;
+  r = testAggrParameters() && r;
+#endif
 #if defined(DC__OS_Win32)
   r = testCallStd() && r;
   r = testCallFast() && r;
--- a/test/plain/test_structs.c	Sat Apr 16 15:00:58 2022 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,271 +0,0 @@
-/*
-
- Package: dyncall
- Library: test
- File: test/plain/test_structs.c
- Description:
- License:
-
-   Copyright (c) 2010-2015 Olivier Chafik <olivier.chafik@gmail.com>
-                      2019 Tassilo Philipp <tphilipp@potion-studios.com>
-
-   Permission to use, copy, modify, and distribute this software for any
-   purpose with or without fee is hereby granted, provided that the above
-   copyright notice and this permission notice appear in all copies.
-
-   THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
-   WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
-   MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
-   ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
-   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
-   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-
-*/
-
-
-
-
-#include "../../dyncall/dyncall.h"
-#include "../../dyncall/dyncall_signature.h"
-#include "../../dyncall/dyncall_struct.h"
-#include "../common/platformInit.h"
-
-#define DC_TEST_STRUCT_SIZE(sig, type, s) { \
-	DCsize expected = sizeof(type), computed = dcStructSize(s);\
-	printf("struct_%s size: expected = %d, computed = %d: %d\n", sig, (int)expected, (int)computed, (expected == computed)); \
-	ret = (expected == computed) && ret; \
-}
-
-/* @@@ incomplete and should be makde generally available in dyncall once struct support will make it in */
-#if defined(DC__OS_Plan9)
-#  define DEFAULT_STRUCT_ALIGNMENT 4
-#else
-#  define DEFAULT_STRUCT_ALIGNMENT DEFAULT_ALIGNMENT
-#endif
-
-int testStructSizes()
-{
-	int ret = 1;
-
-	{
-		typedef struct {
-			char a, b;
-		} S;
-
-		size_t size;
-		DCstruct* s = dcNewStruct(2, DEFAULT_STRUCT_ALIGNMENT);
-		dcStructField(s, DC_SIGCHAR_CHAR, DEFAULT_ALIGNMENT, 1);
-		dcStructField(s, DC_SIGCHAR_CHAR, DEFAULT_ALIGNMENT, 1);
-		dcCloseStruct(s);
-
-		DC_TEST_STRUCT_SIZE("cc", S, s);
-		dcFreeStruct(s);
-	}
-	{
-		typedef struct {
-			char a, b, c;
-		} S;
-
-		size_t size;
-		DCstruct* s = dcNewStruct(3, DEFAULT_STRUCT_ALIGNMENT);
-		dcStructField(s, DC_SIGCHAR_CHAR, DEFAULT_ALIGNMENT, 1);
-		dcStructField(s, DC_SIGCHAR_CHAR, DEFAULT_ALIGNMENT, 1);
-		dcStructField(s, DC_SIGCHAR_CHAR, DEFAULT_ALIGNMENT, 1);
-		dcCloseStruct(s);
-
-		DC_TEST_STRUCT_SIZE("ccc", S, s);
-		dcFreeStruct(s);
-	}
-	{
-		typedef struct {
-			char a;
-			short b;
-		} S;
-
-		size_t size;
-		DCstruct* s = dcNewStruct(2, DEFAULT_STRUCT_ALIGNMENT);
-		dcStructField(s, DC_SIGCHAR_CHAR, DEFAULT_ALIGNMENT, 1);
-		dcStructField(s, DC_SIGCHAR_SHORT, DEFAULT_ALIGNMENT, 1);
-		dcCloseStruct(s);
-
-		DC_TEST_STRUCT_SIZE("cs", S, s);
-		dcFreeStruct(s);
-	}
-	{
-		typedef struct {
-			double a, b, c, d;
-		} S;
-
-		size_t size;
-		DCstruct* s = dcNewStruct(4, DEFAULT_STRUCT_ALIGNMENT);
-		dcStructField(s, DC_SIGCHAR_DOUBLE, DEFAULT_ALIGNMENT, 1);
-		dcStructField(s, DC_SIGCHAR_DOUBLE, DEFAULT_ALIGNMENT, 1);
-		dcStructField(s, DC_SIGCHAR_DOUBLE, DEFAULT_ALIGNMENT, 1);
-		dcStructField(s, DC_SIGCHAR_DOUBLE, DEFAULT_ALIGNMENT, 1);
-		dcCloseStruct(s);
-
-		DC_TEST_STRUCT_SIZE("dddd", S, s);
-		dcFreeStruct(s);
-	}
-	{
-		typedef struct {
-			char a, b;
-			void* p[3];
-		} S;
-
-		size_t size;
-		DCstruct* s = dcNewStruct(3, DEFAULT_STRUCT_ALIGNMENT);
-		dcStructField(s, DC_SIGCHAR_CHAR, DEFAULT_ALIGNMENT, 1);
-		dcStructField(s, DC_SIGCHAR_CHAR, DEFAULT_ALIGNMENT, 1);
-		dcStructField(s, DC_SIGCHAR_POINTER, DEFAULT_ALIGNMENT, 3);
-		dcCloseStruct(s);
-
-		DC_TEST_STRUCT_SIZE("cc[ppp]", S, s);
-		dcFreeStruct(s);
-	}
-	{
-		typedef struct {
-			short a;
-			struct {
-			char a, b;
-			void* p[3];
-			} sub;
-			short b;
-		} S;
-	 	
-		size_t size;
-		DCstruct* s = dcNewStruct(3, DEFAULT_STRUCT_ALIGNMENT);
-		dcStructField(s, DC_SIGCHAR_SHORT, DEFAULT_ALIGNMENT, 1);
-		dcSubStruct(s, 3, DEFAULT_STRUCT_ALIGNMENT, 1);
-		dcStructField(s, DC_SIGCHAR_CHAR, DEFAULT_ALIGNMENT, 1);
-		dcStructField(s, DC_SIGCHAR_CHAR, DEFAULT_ALIGNMENT, 1);
-		dcStructField(s, DC_SIGCHAR_POINTER, DEFAULT_ALIGNMENT, 3);
-		dcCloseStruct(s);
-		dcStructField(s, DC_SIGCHAR_SHORT, DEFAULT_ALIGNMENT, 1);
-		dcCloseStruct(s);
-
-		DC_TEST_STRUCT_SIZE("s{cc[ppp]}s", S, s);
-		dcFreeStruct(s);
-	}
-
-#define TEST_MONO_STRUCT(sig, type, sigchar) \
-	{ \
-		typedef struct { \
-			type v; \
-		} S; \
-		 \
-		DCstruct* s = dcNewStruct(1, DEFAULT_STRUCT_ALIGNMENT); \
-		dcStructField(s, sigchar, DEFAULT_ALIGNMENT, 1); \
-		dcCloseStruct(s); \
-		 \
-		DC_TEST_STRUCT_SIZE(sig, S, s); \
-		dcFreeStruct(s); \
-	}
-
-	TEST_MONO_STRUCT("c", char,               DC_SIGCHAR_CHAR);      // 4 on plan 9 |
-	TEST_MONO_STRUCT("C", unsigned char,      DC_SIGCHAR_UCHAR);     // 4 on plan 9 |
-	TEST_MONO_STRUCT("s", short,              DC_SIGCHAR_SHORT);     // 4 on plan 9 |  minimal size of a struct, period?
-	TEST_MONO_STRUCT("S", unsigned short,     DC_SIGCHAR_USHORT);    // 4 on plan 9 |
-	TEST_MONO_STRUCT("i", int,                DC_SIGCHAR_INT);
-	TEST_MONO_STRUCT("I", unsigned int,       DC_SIGCHAR_UINT);
-	TEST_MONO_STRUCT("j", long,               DC_SIGCHAR_LONG);
-	TEST_MONO_STRUCT("J", unsigned long,      DC_SIGCHAR_ULONG);
-	TEST_MONO_STRUCT("l", long long,          DC_SIGCHAR_LONGLONG);
-	TEST_MONO_STRUCT("L", unsigned long long, DC_SIGCHAR_ULONGLONG);
-	TEST_MONO_STRUCT("p", void*,              DC_SIGCHAR_POINTER);
-	TEST_MONO_STRUCT("f", float,              DC_SIGCHAR_FLOAT);
-	TEST_MONO_STRUCT("d", double,             DC_SIGCHAR_DOUBLE);
-
-	return ret;
-}
-
-
-
-typedef struct
-{
-	char a, b, c;
-} FewValues;
-
-double sum_FewValues(FewValues values)
-{
-	printf("sum_FewValues(a = %d, b = %d, c = %d)\n", (int)values.a, (int)values.b, (int)values.c);
-	return ((double)values.a) + ((double)values.b) + ((double)values.c);
-}
-
-
-typedef struct
-{
-	char a, b;
-	double p[10];
-} SomeValues;
-
-double sum_SomeValues(SomeValues values)
-{
-	return ((double)values.a) + ((double)values.b) + values.p[0] + values.p[1] + values.p[2];
-}
-
-
-/*int testCallStructs()
-{
-	int ret = 1;
-
-	DCCallVM* pc = dcNewCallVM(4096);
-	{
-		FewValues values;
-		double calledSum, expectedSum;
-		DCstruct* s = dcNewStruct(3, DEFAULT_STRUCT_ALIGNMENT);
-		dcStructField(s, DC_SIGCHAR_CHAR, DEFAULT_ALIGNMENT, 1);
-		dcStructField(s, DC_SIGCHAR_CHAR, DEFAULT_ALIGNMENT, 1);
-		dcStructField(s, DC_SIGCHAR_CHAR, DEFAULT_ALIGNMENT, 1);
-		dcCloseStruct(s);
-
-		DC_TEST_STRUCT_SIZE("ccc", FewValues, s);
-
-		values.a = 1;
-		values.b = 2;
-		values.c = 3;
-
-		dcMode(pc, DC_CALL_C_DEFAULT);
-		dcReset(pc);
-		printf("BEFORE dcArgStruct\n");
-		dcArgStruct(pc, s, &values);
-		printf("AFTER dcArgStruct\n");
-		calledSum = dcCallDouble(pc, (DCpointer)&sum_FewValues);
-		expectedSum = sum_FewValues(values);
-
-		DC_TEST_INT_EQUAL(expectedSum, calledSum);
-		dcFreeStruct(s);
-	}
-	{
-		SomeValues values;
-		double calledSum, expectedSum;
-		DCstruct* s = dcNewStruct(3, DEFAULT_STRUCT_ALIGNMENT);
-		dcStructField(s, DC_SIGCHAR_CHAR, DEFAULT_ALIGNMENT, 1);
-		dcStructField(s, DC_SIGCHAR_CHAR, DEFAULT_ALIGNMENT, 1);
-		dcStructField(s, DC_SIGCHAR_DOUBLE, DEFAULT_ALIGNMENT, 10);
-		dcCloseStruct(s);
-
-		DC_TEST_STRUCT_SIZE("ccd", SomeValues, s);
-
-		values.a = 1;
-		values.b = 2;
-		values.p[0] = 10;
-		values.p[1] = 11;
-		values.p[2] = 12;
-
-		dcMode(pc, DC_CALL_C_DEFAULT);
-		dcReset(pc);
-		dcArgStruct(pc, s, &values);
-		calledSum = dcCallDouble(pc, (DCpointer) &sum_SomeValues);
-		expectedSum = sum_SomeValues(values);
-
-		DC_TEST_INT_EQUAL(expectedSum, calledSum);
-		dcFreeStruct(s);
-	}
-
-	dcFree(pc);
-
-	return ret;
-}*/
-
--- a/test/plain_c++/test_main.cc	Sat Apr 16 15:00:58 2022 +0200
+++ b/test/plain_c++/test_main.cc	Thu Apr 21 13:35:47 2022 +0200
@@ -6,7 +6,7 @@
  Description:
  License:
 
-   Copyright (c) 2007-2019 Daniel Adler <dadler@uni-goettingen.de>,
+   Copyright (c) 2007-2022 Daniel Adler <dadler@uni-goettingen.de>,
                            Tassilo Philipp <tphilipp@potion-studios.com>
 
    Permission to use, copy, modify, and distribute this software for any
@@ -30,9 +30,11 @@
 #include "../common/platformInit.h"
 #include "../common/platformInit.c" /* Impl. for functions only used in this translation unit */
 
+#include "../../dyncall/dyncall_aggregate.h"
 
 #include <signal.h>
 #include <setjmp.h>
+#include <stdarg.h>
 
 jmp_buf jbuf;
 
@@ -110,6 +112,7 @@
 #define VTBI_GET_DOUBLE VTBI_BASE+11
 #define VTBI_SET_POINTER VTBI_BASE+12
 #define VTBI_GET_POINTER VTBI_BASE+13
+#define VTBI_SUM_3_INTS VTBI_BASE+14
 
 class Value
 {
@@ -130,31 +133,10 @@
   virtual DCdouble   __cdecl getDouble()               { return mValue.d; }
   virtual void       __cdecl setPtr(DCpointer x)       { mValue.p = x; }
   virtual DCpointer  __cdecl getPtr()                  { return mValue.p; }
-private:
-  ValueUnion mValue;
-};
-
-/* C++ class using (on win32: microsoft) this call */
-
-class ValueMS
-{
-public:
-  virtual ~ValueMS()    {}
 
-  virtual void       setBool(DCbool x)         { mValue.B = x; }
-  virtual DCbool     getBool()                 { return mValue.B; }
-  virtual void       setInt(DCint x)           { mValue.i = x; }
-  virtual DCint      getInt()                  { return mValue.i; }
-  virtual void       setLong(DClong x)         { mValue.j = x; }
-  virtual DClong     getLong()                 { return mValue.j; }
-  virtual void       setLongLong(DClonglong x) { mValue.l = x; }
-  virtual DClonglong getLongLong()             { return mValue.l; }
-  virtual void       setFloat(DCfloat x)       { mValue.f = x; }
-  virtual DCfloat    getFloat()                { return mValue.f; }
-  virtual void       setDouble(DCdouble x)     { mValue.d = x; }
-  virtual DCdouble   getDouble()               { return mValue.d; }
-  virtual void       setPtr(DCpointer x)       { mValue.p = x; }
-  virtual DCpointer  getPtr()                  { return mValue.p; }
+  /* ellipsis test w/ this ptr */
+  virtual int        __cdecl sum3Ints(DCint x, ...)    { va_list va; va_start(va,x); x += va_arg(va,int) + va_arg(va,int); va_end(va); return x; }
+
 private:
   ValueUnion mValue;
 };
@@ -263,20 +245,59 @@
   printf("p  (%s): %d\n", name, b);
   r = r && b;
 
+  /* ellipsis test w/ this pointer */
+
+  dcReset(pc);
+  dcMode(pc, DC_CALL_C_ELLIPSIS);
+  dcArgPointer(pc, pThis);
+  dcArgInt(pc, 23);
+  dcMode(pc, DC_CALL_C_ELLIPSIS_VARARGS);
+  dcArgInt(pc, -223);
+  dcArgInt(pc, 888);
+  int r_ = dcCallInt(pc, vtbl[VTBI_SUM_3_INTS]);
+  b = (r_ == 688);
+  printf("...  (%s): %d\n", name, b);
+  r = r && b;
+
   return r;
 }
 
 
-#if defined(DC__OS_Win32)
+#if defined(DC__OS_Win32) && defined(DC__C_MSVC)
+
+/* C++ class using (on win32: microsoft) this call */
+
+class ValueMS
+{
+public:
+  virtual ~ValueMS()    {}
 
-int testCallThisMS()
+  virtual void       setBool(DCbool x)         { mValue.B = x; }
+  virtual DCbool     getBool()                 { return mValue.B; }
+  virtual void       setInt(DCint x)           { mValue.i = x; }
+  virtual DCint      getInt()                  { return mValue.i; }
+  virtual void       setLong(DClong x)         { mValue.j = x; }
+  virtual DClong     getLong()                 { return mValue.j; }
+  virtual void       setLongLong(DClonglong x) { mValue.l = x; }
+  virtual DClonglong getLongLong()             { return mValue.l; }
+  virtual void       setFloat(DCfloat x)       { mValue.f = x; }
+  virtual DCfloat    getFloat()                { return mValue.f; }
+  virtual void       setDouble(DCdouble x)     { mValue.d = x; }
+  virtual DCdouble   getDouble()               { return mValue.d; }
+  virtual void       setPtr(DCpointer x)       { mValue.p = x; }
+  virtual DCpointer  getPtr()                  { return mValue.p; }
+private:
+  ValueUnion mValue;
+};
+
+static bool testCallThisMS()
 {
   bool r = false;
   DCCallVM* pc = dcNewCallVM(4096);
   dcMode(pc, DC_CALL_C_X86_WIN32_THIS_MS);
   dcReset(pc);
   if(setjmp(jbuf) != 0)
-    printf("sigsegv\n");
+    printf("sigsegv\n"), r=false;
   else
     r = testCallValue<ValueMS>(pc, "MS");
   dcFree(pc);
@@ -286,14 +307,14 @@
 #endif
 
 
-int testCallThisC()
+static bool testCallThisC()
 {
   bool r = false;
   DCCallVM* pc = dcNewCallVM(4096);
   dcMode(pc, DC_CALL_C_DEFAULT_THIS);
   dcReset(pc);
   if(setjmp(jbuf) != 0)
-    printf("sigsegv\n");
+    printf("sigsegv\n"), r=false;
   else
     r = testCallValue<Value>(pc, "c");
   dcFree(pc);
@@ -301,6 +322,159 @@
 }
 
 
+#if defined(DC__Feature_AggrByVal)
+
+class ValueAggr
+{
+public:
+  struct S { int i, j, k, l, m; };
+
+  virtual ~ValueAggr()    {}
+
+  virtual void  __cdecl setAggr(S x)  { mS.i = x.i; mS.j = x.j; mS.k = x.k; mS.l = x.l; mS.m = x.m; }
+  virtual S     __cdecl getAggr()     { return mS; }
+
+  /* ellipsis test w/ this ptr and big (!) aggregate return */
+  struct Big { int sum; long long dummy[50]; /*dummy to make it not fit in any regs*/ };
+  virtual struct Big  __cdecl sum3RetAggr(DCint x, ...) { va_list va; va_start(va,x); struct Big r = { x + va_arg(va,int) + va_arg(va,int) }; va_end(va); return r; }
+
+  /* non-trivial aggregate */
+  struct NonTriv {
+    int i, j;
+    NonTriv(int a, int b) : i(a),j(b) { }
+    NonTriv(const NonTriv& rhs) { static int a=13, b=37; i = a++; j = b++; }
+  };
+  /* by value, so on first invocation a = 13,37, b = 14,38 and retval = 13*14,37*38, no matter the contents of the instances as copy ctor is called */
+  /* NOTE: copy of return value is subject to C++ "copy elision", so it is *not* calling the copy ctor for the return value */
+  virtual struct NonTriv  __cdecl squareFields(NonTriv a, NonTriv b) { return NonTriv(a.i*b.i, a.j*b.j); }
+
+private:
+  struct S mS;
+};
+
+#if (__cplusplus >= 201103L)
+#  include <type_traits>
+#endif
+
+/* special case w/ e.g. MS x64 C++ calling cconf: struct return ptr is passed as *2nd* arg */
+static bool testCallThisAggr()
+{
+  bool r = false;
+  DCCallVM* pc = dcNewCallVM(4096);
+  dcMode(pc, DC_CALL_C_DEFAULT_THIS);
+
+  if(setjmp(jbuf) != 0)
+    printf("sigsegv\n"), r=false;
+  else
+  {
+    ValueAggr o;
+
+    DCpointer* vtbl =  *( (DCpointer**) &o ); /* vtbl is located at beginning of class */
+    ValueAggr::S st = { 124, -12, 434, 20202, -99999 }, returned;
+
+#if (__cplusplus >= 201103L)
+    bool istriv = std::is_trivial<ValueAggr::S>::value;
+#else
+    bool istriv = true; /* own deduction as no type trait */
+#endif
+    DCaggr *s = dcNewAggr(5, sizeof(ValueAggr::S));
+    dcAggrField(s, DC_SIGCHAR_INT, offsetof(ValueAggr::S, i), 1);
+    dcAggrField(s, DC_SIGCHAR_INT, offsetof(ValueAggr::S, j), 1);
+    dcAggrField(s, DC_SIGCHAR_INT, offsetof(ValueAggr::S, k), 1);
+    dcAggrField(s, DC_SIGCHAR_INT, offsetof(ValueAggr::S, l), 1);
+    dcAggrField(s, DC_SIGCHAR_INT, offsetof(ValueAggr::S, m), 1);
+    dcCloseAggr(s);
+
+    // set S::mS
+    dcReset(pc);
+    dcArgPointer(pc, &o); // this ptr
+    dcArgAggr(pc, s, &st);
+    dcCallVoid(pc, vtbl[VTBI_BASE+0]);
+
+    // get it back
+    dcReset(pc);
+    dcBeginCallAggr(pc, s);
+    dcArgPointer(pc, &o); // this ptr
+    dcCallAggr(pc, vtbl[VTBI_BASE+1], s, &returned);
+
+    dcFreeAggr(s);
+
+    r = returned.i == st.i && returned.j == st.j && returned.k == st.k && returned.l == st.l && returned.m == st.m && istriv;
+    printf("r:{iiiii}  (this/trivial): %d\n", r);
+
+
+
+    /* ellipsis test w/ this pointer returning big aggregate (quite an edge
+     * case) by value (won't fit in regs, so hidden pointer is is used to write
+     * return values to), showing the need to use the DC_CALL_C_DEFAULT_THIS
+     * mode first, for the this ptr alone, then DC_CALL_C_ELLIPSIS, then
+     * DC_CALL_C_ELLIPSIS_VARARGS (test is useful on win64 where thisptr is
+     * passed *after* return aggregate's hidden ptr) */
+#if (__cplusplus >= 201103L)
+    istriv = std::is_trivial<ValueAggr::Big>::value;
+#else
+    istriv = true; /* own deduction as no type trait */
+#endif
+    s = dcNewAggr(2, sizeof(struct ValueAggr::Big));
+    dcAggrField(s, DC_SIGCHAR_INT, offsetof(struct ValueAggr::Big, sum), 1);
+    dcAggrField(s, DC_SIGCHAR_LONGLONG, offsetof(struct ValueAggr::Big, dummy), 50);
+    dcCloseAggr(s);
+    dcReset(pc);
+    dcMode(pc, DC_CALL_C_DEFAULT_THIS); /* <-- needed on x64/win64 */
+
+    dcBeginCallAggr(pc, s);
+    dcMode(pc, DC_CALL_C_ELLIPSIS);
+    dcArgPointer(pc, &o);
+    dcArgInt(pc, 89);
+    dcMode(pc, DC_CALL_C_ELLIPSIS_VARARGS);
+    dcArgInt(pc, -157);
+    dcArgInt(pc, 888);
+    struct ValueAggr::Big big;
+    dcCallAggr(pc, vtbl[VTBI_BASE+2], s, &big);
+
+    dcFreeAggr(s);
+
+    bool b = (big.sum == 820) && istriv;
+    r = r && b;
+    printf("r:{il[50]}  (this/trivial/ellipsis): %d\n", b);
+
+
+
+    /* non-trivial test ----------------------------------------------------------- */
+
+#if (__cplusplus >= 201103L)
+    istriv = std::is_trivial<ValueAggr::NonTriv>::value;
+#else
+    istriv = false; /* own deduction as no type trait */
+#endif
+    dcReset(pc);
+    dcMode(pc, DC_CALL_C_DEFAULT_THIS);
+
+    /* non trivial aggregates: pass NULL for DCaggr* and do copy on our own (see doc) */
+    dcBeginCallAggr(pc, NULL);
+
+    ValueAggr::NonTriv nt0(5, 6), nt1(7, 8), ntr(0, 0);
+    dcArgAggr(pc, NULL, &o); // this ptr
+    /* make *own* copies, as dyncall cannot know how to call copy ctor */ //@@@ put into doc
+    ValueAggr::NonTriv nt0_ = nt0, nt1_ = nt1;
+    dcArgAggr(pc, NULL, &nt0_); /* use *own* copy */
+    dcArgAggr(pc, NULL, &nt1_); /* use *own* copy */
+
+    dcCallAggr(pc, vtbl[VTBI_BASE+3], NULL, &ntr); /* note: "copy elision", so retval might *not* call copy ctor */
+
+
+    b = ntr.i == 13*14 && ntr.j == 37*38 && !istriv;
+    r = r && b;
+    printf("r:{ii}  (this/nontrivial/retval_copy_elision): %d\n", b);
+  }
+
+  dcFree(pc);
+  return r;
+}
+
+#endif
+
+
 extern "C" {
 
 int main(int argc, char* argv[])
@@ -312,9 +486,12 @@
   bool r = true;
 
   r = testCallThisC() && r;
-#if defined(DC__OS_Win32)
+#if defined(DC__OS_Win32) && defined(DC__C_MSVC)
   r = testCallThisMS() && r;
 #endif
+#if defined(DC__Feature_AggrByVal)
+  r = testCallThisAggr() && r;
+#endif
 
   printf("result: plain_cpp: %d\n", r);
 
--- a/test/thunk/Nmakefile	Sat Apr 16 15:00:58 2022 +0200
+++ b/test/thunk/Nmakefile	Thu Apr 21 13:35:47 2022 +0200
@@ -37,7 +37,7 @@
 
 $(TARGETS): $(OBJS)
 	echo Linking $@ ...
-	$(LD) /OUT:"$@" $(LDFLAGS) $(OBJS) $(TOP)\dyncall\dyncall_s.lib $(TOP)\dyncallback\dyncallback_s.lib
+	$(LD) /OUT:"$@" $(LDFLAGS) $(OBJS) $(TOP)\dyncallback\dyncallback_s.lib
 
 
 !ELSE IF "$(BUILD_OS)" == "nds"
@@ -47,7 +47,7 @@
 
 $(TARGETS): $(OBJS)
 	echo Linking $@ ...
-	$(LD) $(LDFLAGS) $(OBJS) $(DEVKITPRO_PATH)\libnds\lib\libnds9.a $(TOP)/dyncall/libdyncall_s.a $(TOP)\dyncallback\libdyncallback_s.a -o "$(@B).elf"
+	$(LD) $(LDFLAGS) $(OBJS) $(DEVKITPRO_PATH)\libnds\lib\libnds9.a $(TOP)\dyncallback\libdyncallback_s.a -o "$(@B).elf"
 	$(OCP) -O binary "$(@B).elf" "$(@B).arm9"
 	ndstool -c "$@" -9 "$(@B).arm9"
 	del "$(@B).elf" "$(@B).arm9"