changeset 308:7c6f19d42b31

dynload UTF-8 support for library paths: - added missing support to windows - added test code for all platforms to dynload_plain - doc update
author Tassilo Philipp
date Thu, 24 Oct 2019 23:19:20 +0200
parents d2c8ea3ef2ed
children b51401bc4c37
files dynload/dynload.3 dynload/dynload_windows.c test/dynload_plain/CMakeLists.txt test/dynload_plain/Makefile.embedded test/dynload_plain/Makefile.generic test/dynload_plain/Nmakefile test/dynload_plain/dynload_plain.c
diffstat 7 files changed, 184 insertions(+), 35 deletions(-) [+]
line wrap: on
line diff
--- a/dynload/dynload.3	Mon Apr 29 11:51:30 2019 +0200
+++ b/dynload/dynload.3	Thu Oct 24 23:19:20 2019 +0200
@@ -54,7 +54,9 @@
 .Fn dlFindSymbol
 calls. Passing a null pointer for the
 .Ar libpath
-argument is valid, and returns a handle to the main executable of the calling code. Also, searching libraries in library paths (e.g. by just passing the library's leaf name) should work, however, they are OS specific. Returns a null pointer on error.
+argument is valid, and returns a handle to the main executable of the calling code. Also, searching libraries in library paths (e.g. by just passing the library's leaf name) should work, however, they are OS specific. The
+.Ar libPath
+argument is expected to be UTF-8 encoded. Returns a null pointer on error.
 .Pp
 .Fn dlFreeLibrary 
 frees the loaded library with handle
@@ -74,9 +76,9 @@
 .Ar sOut
 is a pointer to a buffer of size
 .Ar bufSize
-(in bytes), to hold the output string. The return value is the size of the buffer (in bytes) needed to hold the null-terminated string, or 0 if it can't be looked up. If
+(in bytes), to hold the output string (UTF-8 encoded). The return value is the size of the buffer (in bytes) needed to hold the null-terminated string, or 0 if it can't be looked up. If
 .Ar bufSize
->= return value > 1, a null-terminted string with the path to the library should be in
+>= return value >= 1, a null-terminted string with the path to the library should be in
 .Ar sOut .
 If it returns 0, the library name wasn't able to be found. Please note that this might happen in some rare cases, so make sure to always check.
 .Pp
--- a/dynload/dynload_windows.c	Mon Apr 29 11:51:30 2019 +0200
+++ b/dynload/dynload_windows.c	Thu Oct 24 23:19:20 2019 +0200
@@ -35,13 +35,37 @@
 
 
 #include "dynload.h"
+#include "dynload_alloc.h"
 
 #include <windows.h>
 
 
 DLLib* dlLoadLibrary(const char* libPath)
 {
-  return (DLLib*)(libPath != NULL ? LoadLibraryA(libPath) : GetModuleHandle(NULL));
+  if(libPath == NULL)
+    return (DLLib*)GetModuleHandle(NULL);
+  else {
+    /* convert from UTF-8 to wide chars, so count required size... */
+    DLLib* pLib;
+    wchar_t* ws;
+    int r = MultiByteToWideChar(CP_UTF8, 0, libPath, -1, NULL, 0);
+    if(!r) {
+      return NULL;
+    }
+
+    /* ... reserve temp space, ... */
+    ws = (wchar_t*)dlAllocMem(r * sizeof(wchar_t));
+    if(!ws)
+      return NULL;
+
+    /* ... convert (and use r as success flag), ... */
+    r = (MultiByteToWideChar(CP_UTF8, 0, libPath, -1, ws, r) == r);
+    pLib = (DLLib*)(r ? LoadLibraryW(ws) : NULL);/*@@@ testcode of unicode path*/
+
+    /* ... free temp space and return handle */
+    dlFreeMem(ws);
+    return pLib;
+  }
 }
 
 
@@ -59,6 +83,48 @@
 
 int dlGetLibraryPath(DLLib* pLib, char* sOut, int bufSize)
 {
-  return GetModuleFileNameA((HMODULE)pLib, sOut, bufSize)+1; /* strlen + '\0' */
+  /* get the path name as wide chars, then convert to UTF-8; we need   */
+  /* some trial and error to figure out needed wide char string length */
+
+  wchar_t* ws;
+  int r;
+
+  /* num chars to alloc temp space for, and upper limit, must be both power */
+  /* of 2s for loop to be precise and to test allow testing up to 32768 chars */
+  /* (including \0), which is the extended path ("\\?\...") maximum */
+  static const int MAX_EXT_PATH = 1<<15; /* max extended path length (32768) */
+  int nc = 1<<6;                         /* guess start buffer size, */
+
+  while(nc <= MAX_EXT_PATH)/*@@@ testcode*/
+  {
+    ws = (wchar_t*)dlAllocMem(nc * sizeof(wchar_t));
+    if(!ws)
+      break;
+
+    r = GetModuleFileNameW((HMODULE)pLib, ws, nc);
+
+    /* r == nc if string was truncated, double temp buffer size */
+    if(r == nc) {
+      nc <<= 1;/*@@@ testcode*/
+      dlFreeMem(ws);
+      continue;
+    }
+    /* error if r is 0 */
+    else if(!r) {
+      dlFreeMem(ws);
+      break;
+    }
+
+    /* check if output buffer is big enough */
+    r = WideCharToMultiByte(CP_UTF8, 0, ws, -1, NULL, 0, NULL, NULL);
+    if(r <= bufSize)
+      r = WideCharToMultiByte(CP_UTF8, 0, ws, -1, sOut, bufSize, NULL, NULL);
+
+    /* cleanup and return either size of copied bytes or needed buffer size */
+    dlFreeMem(ws);
+    return r;
+  }
+
+  return 0;
 }
 
--- a/test/dynload_plain/CMakeLists.txt	Mon Apr 29 11:51:30 2019 +0200
+++ b/test/dynload_plain/CMakeLists.txt	Thu Oct 24 23:19:20 2019 +0200
@@ -1,7 +1,12 @@
 add_executable(dynload_plain dynload_plain.c)
 target_link_libraries(dynload_plain dynload_s ${CMAKE_DL_LIBS})
+file(WRITE x.c "int dynload_plain_testfunc() { return 5; }")
+add_library(x SHARED x.c)
+set_target_properties(x PROPERTIES OUTPUT_NAME "dynload_plain_ß_test")
+set_target_properties(x PROPERTIES PREFIX "")
+set_target_properties(x PROPERTIES SUFFIX "")
 if(${UNIX})
-exec_program("((ls /lib*/libc.so* || ls /usr/lib/libc.so*) | (sort -V -r || sort -t . -n -k 2)) 2>/dev/null | head -1" OUTPUT_VARIABLE DEF_C_DYLIB)
+exec_program("((ls /lib*/libc.so* || ls /usr/lib/libc.so*) | grep -v '\\.a\$' | (sort -V -r || sort -t . -n -k 2)) 2>/dev/null | head -1" OUTPUT_VARIABLE DEF_C_DYLIB)
 set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DDEF_C_DYLIB=\\\"${DEF_C_DYLIB}\\\"")
 endif()
 
--- a/test/dynload_plain/Makefile.embedded	Mon Apr 29 11:51:30 2019 +0200
+++ b/test/dynload_plain/Makefile.embedded	Thu Oct 24 23:19:20 2019 +0200
@@ -1,20 +1,30 @@
+# path to default libc.so file, easier to do via shell than in code (see main() in dynload_plain.c)
+# for compat: first gmake style, then assignment op which will use ! as part of var name on gmake<4
+#             and thus not override previously set var
+DEF_C_DYLIB=$(shell ((ls /lib*/libc.so* || ls /usr/lib/libc.so*) | grep -v '\.a$$' | (sort -V -r || sort -t . -n -k 2)) 2>/dev/null | head -1)
+DEF_C_DYLIB!=((ls /lib*/libc.so* || ls /usr/lib/libc.so*) | grep -v '\.a$$' | (sort -V -r || sort -t . -n -k 2)) 2>/dev/null | head -1
+
 APP = dynload_plain
 OBJS = dynload_plain.o
+TEST_U8_SO = dynload_plain_ß_test # @@@ unsure if every platform handles ß, here (ANSI, UTF-8, ...)
 
 TOP = ../..
-CFLAGS  += -I${TOP}/dynload
+CFLAGS  += -I${TOP}/dynload -DDEF_C_DYLIB=\"${DEF_C_DYLIB}\"
 LDFLAGS += -L${TOP}/dynload
 LDLIBS  += -ldynload_s
 # Works on: Darwin, NetBSD.
 # Linux: add '-ldl'
 
-all: ${APP}
+all: ${APP} ${TEST_U8_SO}
 
 .PHONY: all clean
 
 ${APP}: ${OBJS}
 	${CC} ${OBJS} ${LDFLAGS} ${LDLIBS} -o $@
 
+${TEST_U8_SO}:
+	echo 'int dynload_plain_testfunc() { return 5; }' | ${CC} -shared -x c - -o ${TEST_U8_SO}
+
 clean:
-	rm -f ${APP} ${OBJS}
+	rm -f ${APP} ${OBJS} ${TEST_U8_SO}
 
--- a/test/dynload_plain/Makefile.generic	Mon Apr 29 11:51:30 2019 +0200
+++ b/test/dynload_plain/Makefile.generic	Thu Oct 24 23:19:20 2019 +0200
@@ -4,22 +4,25 @@
 DEF_C_DYLIB=$(shell ((ls /lib*/libc.so* || ls /usr/lib/libc.so*) | grep -v '\.a$$' | (sort -V -r || sort -t . -n -k 2)) 2>/dev/null | head -1)
 DEF_C_DYLIB!=((ls /lib*/libc.so* || ls /usr/lib/libc.so*) | grep -v '\.a$$' | (sort -V -r || sort -t . -n -k 2)) 2>/dev/null | head -1
 
-APP       = dynload_plain
-OBJS      = dynload_plain.o
-SRCTOP    = ${VPATH}/../..
-BLDTOP    = ../..
-CFLAGS   += -I${SRCTOP}/dynload -DDEF_C_DYLIB=\"${DEF_C_DYLIB}\"
-LDLIBS_D += -L${BLDTOP}/dynload -ldynload_s
+APP        = dynload_plain
+OBJS       = dynload_plain.o
+TEST_U8_SO = dynload_plain_ß_test # @@@ unsure if every platform handles ß, here (ANSI, UTF-8, ...)
+SRCTOP     = ${VPATH}/../..
+BLDTOP     = ../..
+CFLAGS    += -I${SRCTOP}/dynload -DDEF_C_DYLIB=\"${DEF_C_DYLIB}\"
+LDLIBS_D  += -L${BLDTOP}/dynload -ldynload_s
 
 # Works on: Darwin, NetBSD.
 # Linux: add '-ldl'
 .PHONY: all clean install
-all: ${APP}
+all: ${APP} ${TEST_U8_SO}
 ${APP}: ${OBJS}
 	${CC} ${LDFLAGS} ${OBJS} ${LDLIBS_D} ${LDLIBS} -o ${APP}
+${TEST_U8_SO}:
+	echo 'int dynload_plain_testfunc() { return 5; }' | ${CC} -shared -x c - -o ${TEST_U8_SO}
 clean:
-	rm -f ${APP} ${OBJS}
+	rm -f ${APP} ${OBJS} ${TEST_U8_SO}
 install:
 	mkdir -p ${PREFIX}/test
-	cp ${APP} ${PREFIX}/test
+	cp ${APP} ${TEST_U8_SO} ${PREFIX}/test
 
--- a/test/dynload_plain/Nmakefile	Mon Apr 29 11:51:30 2019 +0200
+++ b/test/dynload_plain/Nmakefile	Thu Oct 24 23:19:20 2019 +0200
@@ -1,4 +1,4 @@
-#//////////////////////////////////////////////////////////////////////////////
+#//////////////////////////////////////////////////////////////////////////////
 #
 # Copyright (c) 2017 Tassilo Philipp <tphilipp@potion-studios.com>
 #
@@ -31,13 +31,21 @@
 
 !IF "$(BUILD_OS)" == "windows"
 
-TARGETS = dynload_plain.exe
+TEST_U8_SO = dynload_plain_ß_test # this non-ASCII character seems to work in Nmakefiles *iff* the utf-8/BOM mark is present
+TARGETS = dynload_plain.exe $(TEST_U8_SO)
 OBJS = dynload_plain.obj
 
-$(TARGETS): $(OBJS)
+dynload_plain.exe: $(OBJS)
 	echo Linking $@ ...
 	$(LD) /OUT:"$@" $(LDFLAGS) $(OBJS) $(TOP)\dynload\libdynload_s.lib $(TOP)\dyncall\libdyncall_s.lib
 
+$(TEST_U8_SO):
+	echo Building helper lib with UTF-8 path $(TEST_U8_SO) ...
+	echo.int dynload_plain_testfunc^(^) { return 5; } > x.c
+	type x.c
+	$(CC) x.c /link /DLL /OUT:$(TEST_U8_SO)
+	del x.c x.obj
+
 
 !ELSE IF "$(BUILD_OS)" == "nds"
 
--- a/test/dynload_plain/dynload_plain.c	Mon Apr 29 11:51:30 2019 +0200
+++ b/test/dynload_plain/dynload_plain.c	Thu Oct 24 23:19:20 2019 +0200
@@ -36,6 +36,15 @@
 #endif
 
 
+int strlen_utf8(const char *s)
+{
+  int i=0, j=0;
+  while(s[i])
+    j += ((s[i++] & 0xc0) != 0x80);
+  return j;
+}
+
+
 int main(int argc, char* argv[])
 {
   int r = 0, i;
@@ -47,7 +56,7 @@
   /* can be specified in Makefile; this avoids trying to write portable directory traversal stuff */
   const char* clibs[] = {
 #if defined(DEF_C_DYLIB)
-	DEF_C_DYLIB,
+    DEF_C_DYLIB,
 #endif
     "/lib/libc.so",
     "/lib32/libc.so",
@@ -90,8 +99,8 @@
 
       bs = dlGetLibraryPath(pLib, queriedPath, 200);
       if(bs && bs <= 200) {
-	    struct stat st0, st1; /* to check if same file */
-        int b;
+        struct stat st0, st1; /* to check if same file */
+        int b, bs_;
         printf("path of lib looked up via handle: %s\n", queriedPath);
         b = (stat(path, &st0) != -1) && (stat(queriedPath, &st1) != -1);
         printf("lib (inode:%d) and looked up lib (inode:%d) are same: %d\n", b?st0.st_ino:-1, b?st1.st_ino:-1, b && (st0.st_ino == st1.st_ino)); //@@@ on windows, inode numbers returned here are always 0
@@ -100,13 +109,59 @@
 
         /* check correct bufsize retval */
         b = (bs == strlen(queriedPath) + 1);
-        printf("looked up path's needed buffer size (%d) computed correctly: %d\n", bs, b);
+        printf("looked up path's needed buffer size (%d) computed correctly 1/2: %d\n", bs, b);
+        r += b;
+
+        /* check perfect fitting bufsize */
+        queriedPath[0] = 0;
+        bs_ = dlGetLibraryPath(pLib, queriedPath, bs);
+        b = (bs == bs_ && bs_ == strlen(queriedPath) + 1);
+        printf("looked up path's needed buffer size (%d) computed correctly 2/2: %d\n", bs_, b);
+        r += b;
+
+        /* check if dlGetLibraryPath returns size required if bufsize too small */
+        queriedPath[0] = 0;
+        bs_ = dlGetLibraryPath(pLib, queriedPath, 1);  /* tiny max buffer size */
+        b = (bs == bs_ && strlen(queriedPath) == 0);   /* nothing copied */
+        printf("path lookup size requirement (%d) correctly returned: %d\n", bs_, b);
         r += b;
       }
       else
         printf("failed to query lib path using lib's handle\n");
 
       dlFreeLibrary(pLib);
+
+      /* check if dlGetLibraryPath returns 0 when trying to lookup dummy */
+      bs = dlGetLibraryPath((DLLib*)1234, queriedPath, 200);
+      printf("path lookup failed as expected with bad lib handle: %d\n", bs == 0);
+      r += (bs == 0);
+
+      /* test UTF-8 path through dummy library that's created by this test's build */
+      {
+        static const char* pathU8 = "./dynload_plain_\xc3\x9f_test";
+		int nu8c, b;
+
+        //cp(pathU8, "/lib/libz.so.6");
+        pLib = dlLoadLibrary(pathU8); /* check if we can load a lib with a UTF-8 path */
+        printf("pLib (loaded w/ UTF-8 path %s) handle: %p\n", pathU8, pLib);
+        r += (p != NULL);
+
+        if(pLib) {
+          /* get UTF-8 path back */
+          bs = dlGetLibraryPath((DLLib*)pLib, queriedPath, 200);
+          if(bs && bs <= 200) {
+            nu8c = strlen_utf8(queriedPath); /* num of UTF-8 chars is as big as ... */
+            b = (bs > 0) && (nu8c == bs-2);   /* ... buffer size minus 2 (b/c of one 2-byte UTF-8 char and "\0") */
+            printf("UTF-8 path of lib looked up via handle: %s\n", queriedPath);
+            printf("looked up UTF-8 path's needed buffer size (%d) for %d UTF-8 char string computed correctly: %d\n", bs, nu8c, b);
+            r += b;
+ 
+            dlFreeLibrary(pLib);
+          }
+          else
+            printf("failed to query UTF-8 lib path using lib's handle\n");
+        }
+      }
     }
     else
       printf("unable to open library %s\n", path);
@@ -145,15 +200,15 @@
         name = dlSymsNameFromValue(pSyms, p);
         printf("printf symbol name by its own address (%p): %s\n", p, name?name:"");
         if(name) {
-			if(strcmp(name, "printf") == 0)
-      			++r;
-			else {
-				/* Symbol name returned might be an "alias". In that case, check address again (full lookup to be sure). */
-				void* p0 = dlFindSymbol(pLib, name);
-        		printf("lookup by address returned different name (%s), which is alias of printf: %d\n", name, (p==p0));
-        		r += (p == p0);
-			}
-		}
+          if(strcmp(name, "printf") == 0)
+            ++r;
+          else {
+            /* Symbol name returned might be an "alias". In that case, check address again (full lookup to be sure). */
+            void* p0 = dlFindSymbol(pLib, name);
+            printf("lookup by address returned different name (%s), which is alias of printf: %d\n", name, (p==p0));
+            r += (p == p0);
+          }
+        }
         dlFreeLibrary(pLib);
       }
 
@@ -164,7 +219,7 @@
   }
 
   /* Check final score of right ones to see if all worked */
-  r = (r == 10);
+  r = (r == 15);
   printf("result: dynload_plain: %d\n", r);
   return !r;
 }