| Anonymous | Login | Signup for a new account | 2013-06-20 10:31 CEST | ![]() |
| Main | My View | View Issues | Change Log | Roadmap |
| View Issue Details [ Jump to Notes ] | [ Issue History ] [ Print ] | ||||||||||
| ID | Project | Category | View Status | Date Submitted | Last Update | ||||||
| 0003771 | OCaml | OCaml windows | public | 2005-08-26 00:07 | 2012-09-11 09:55 | ||||||
| Reporter | administrator | ||||||||||
| Assigned To | |||||||||||
| Priority | normal | Severity | minor | Reproducibility | always | ||||||
| Status | acknowledged | Resolution | open | ||||||||
| Platform | OS | OS Version | |||||||||
| Product Version | |||||||||||
| Target Version | 4.00.2+dev | Fixed in Version | |||||||||
| Summary | 0003771: Reading Unicode filenames fails on Windows | ||||||||||
| Description | Full_Name: spiralvoice Version: 3.08.4 OS: Windows/MinGW Submission from: p5481eb87.dip.t-dialin.net (84.129.235.135) Hi, in otherlibs\win32unix\windir.c the functions win_findfirst and win_findnext use WIN32_FIND_DATA which is not Unicode aware: http://msdn.microsoft.com/library/default.asp?url=/library/en-us/fileio/fs/win32_find_data_str.asp [^] | ||||||||||
| Tags | No tags attached. | ||||||||||
| Attached Files | diff -uwNr ocaml-3.10.0-src/asmrun/Makefile.nt ocaml-3.10.0-unicode-src/asmrun/Makefile.nt
--- ocaml-3.10.0-src/asmrun/Makefile.nt 2007-02-23 11:29:45.000000000 +0200
+++ ocaml-3.10.0-unicode-src/asmrun/Makefile.nt 2008-04-11 13:56:33.861955100 +0300
@@ -24,13 +24,13 @@
intern.$(O) hash.$(O) sys.$(O) parsing.$(O) gc_ctrl.$(O) terminfo.$(O) \
md5.$(O) obj.$(O) lexing.$(O) win32.$(O) printexc.$(O) callback.$(O) \
weak.$(O) compact.$(O) finalise.$(O) custom.$(O) globroots.$(O) \
- backtrace.$(O)
+ backtrace.$(O) u8tou16.$(O)
LINKEDFILES=misc.c freelist.c major_gc.c minor_gc.c memory.c alloc.c array.c \
compare.c ints.c floats.c str.c io.c extern.c intern.c hash.c sys.c \
parsing.c gc_ctrl.c terminfo.c md5.c obj.c lexing.c printexc.c callback.c \
weak.c compact.c meta.c finalise.c custom.c main.c globroots.c \
- dynlink.c signals.c
+ dynlink.c signals.c u8tou16.c
ifeq ($(TOOLCHAIN),mingw)
ASMOBJS=$(ARCH).o
diff -uwNr ocaml-3.10.0-src/byterun/Makefile.nt ocaml-3.10.0-unicode-src/byterun/Makefile.nt
--- ocaml-3.10.0-src/byterun/Makefile.nt 2007-02-23 11:29:45.000000000 +0200
+++ ocaml-3.10.0-unicode-src/byterun/Makefile.nt 2008-04-11 13:56:33.893283100 +0300
@@ -24,7 +24,7 @@
str.o array.o io.o extern.o intern.o hash.o sys.o \
meta.o parsing.o gc_ctrl.o terminfo.o md5.o obj.o lexing.o \
win32.o printexc.o callback.o debugger.o weak.o compact.o \
- finalise.o custom.o backtrace.o globroots.o dynlink.o
+ finalise.o custom.o backtrace.o globroots.o dynlink.o u8tou16.o
DOBJS=$(COMMONOBJS:.o=.$(DO)) prims.$(DO)
SOBJS=$(COMMONOBJS:.o=.$(SO)) main.$(SO)
@@ -34,7 +34,7 @@
PRIMS=alloc.c array.c compare.c extern.c floats.c gc_ctrl.c hash.c \
intern.c interp.c ints.c io.c lexing.c md5.c meta.c obj.c parsing.c \
signals.c str.c sys.c terminfo.c callback.c weak.c finalise.c stacks.c \
- dynlink.c
+ dynlink.c u8tou16.c
PUBLIC_INCLUDES=alloc.h callback.h config.h custom.h fail.h intext.h \
memory.h misc.h mlvalues.h printexc.h signals.h compatibility.h
diff -uwNr ocaml-3.10.0-src/byterun/sys.c ocaml-3.10.0-unicode-src/byterun/sys.c
--- ocaml-3.10.0-src/byterun/sys.c 2007-03-01 15:37:39.000000000 +0200
+++ ocaml-3.10.0-unicode-src/byterun/sys.c 2008-04-14 18:57:16.318592300 +0300
@@ -46,6 +46,7 @@
#include "fail.h"
#include "instruct.h"
#include "mlvalues.h"
+#include "memory.h"
#include "osdeps.h"
#include "signals.h"
#include "stacks.h"
@@ -54,6 +55,9 @@
#ifndef _WIN32
extern int errno;
#endif
+#ifdef UTF16
+#include "u8tou16.h"
+#endif
static char * error_message(void)
{
@@ -156,13 +160,38 @@
CAMLprim value caml_sys_file_exists(value name)
{
struct stat st;
+#ifdef UTF16
+ char * temp=String_val(name);
+ WCHAR * wtemp;
+ int retcode;
+ if(is_valid_utf8(temp))
+ wtemp = utf8_to_utf16(temp);
+ else
+ wtemp = ansi_to_utf16(temp);
+ retcode=_wstat(wtemp, &st);
+ free(wtemp);
+ return Val_bool((retcode==0));
+#else
return Val_bool(stat(String_val(name), &st) == 0);
+#endif
}
CAMLprim value caml_sys_is_directory(value name)
{
struct stat st;
+#ifdef UTF16
+ char * temp=String_val(name);
+ WCHAR * wtemp;
+ int retcode;
+ if(is_valid_utf8(temp))
+ wtemp = utf8_to_utf16(temp);
+ else
+ wtemp = ansi_to_utf16(temp);
+ retcode=_wstat(wtemp, &st);
+ free(wtemp);
+#else
if (stat(String_val(name), &st) == -1) caml_sys_error(name);
+#endif /* UTF16 */
#ifdef S_ISDIR
return Val_bool(S_ISDIR(st.st_mode));
#else
@@ -173,26 +202,82 @@
CAMLprim value caml_sys_remove(value name)
{
int ret;
+#ifdef UTF16
+ char * temp=String_val(name);
+ WCHAR * wtemp;
+ if(is_valid_utf8(temp))
+ wtemp = utf8_to_utf16(temp);
+ else
+ wtemp = ansi_to_utf16(temp);
+ ret = _wunlink(wtemp);
+ free(wtemp);
+#else
ret = unlink(String_val(name));
+#endif
if (ret != 0) caml_sys_error(name);
return Val_unit;
}
CAMLprim value caml_sys_rename(value oldname, value newname)
{
+#ifdef UTF16
+ char * temp1=String_val(oldname);
+ char * temp2=String_val(newname);
+ WCHAR * wtemp1, * wtemp2;
+ if(is_valid_utf8(temp1))
+ wtemp1 = utf8_to_utf16(temp1);
+ else
+ wtemp1 = ansi_to_utf16(temp1);
+ if(is_valid_utf8(temp2))
+ wtemp2 = utf8_to_utf16(temp2);
+ else
+ wtemp2 = ansi_to_utf16(temp2);
+ if (_wrename(wtemp1, wtemp2) != 0)
+#else
if (rename(String_val(oldname), String_val(newname)) != 0)
+#endif
caml_sys_error(NO_ARG);
+#ifdef UTF16
+ free(wtemp1);
+ free(wtemp2);
+#endif
return Val_unit;
}
CAMLprim value caml_sys_chdir(value dirname)
{
+#ifdef UTF16
+ char * temp=String_val(dirname);
+ WCHAR * wtemp;
+ if(is_valid_utf8(temp))
+ wtemp = utf8_to_utf16(temp);
+ else
+ wtemp = ansi_to_utf16(temp);
+ if (_wchdir(wtemp) != 0) caml_sys_error(dirname);
+ free(wtemp);
+#else
if (chdir(String_val(dirname)) != 0) caml_sys_error(dirname);
+#endif
return Val_unit;
}
CAMLprim value caml_sys_getcwd(value unit)
{
+#ifdef UTF16
+ CAMLparam0 ();
+ CAMLlocal1 (v);
+ WCHAR buff[4096*2];
+ unsigned char * temp;
+#ifdef HAS_GETCWD
+ if (_wgetcwd(buff, sizeof(buff)/sizeof(WCHAR)) == 0) caml_sys_error(NO_ARG);
+#else
+ if (getwd(buff) == 0) caml_sys_error(NO_ARG);
+#endif /* HAS_GETCWD */
+ temp=utf16_to_utf8(buff);
+ v=caml_copy_string(temp);
+ free(temp);
+ CAMLreturn (v);
+#else
char buff[4096];
#ifdef HAS_GETCWD
if (getcwd(buff, sizeof(buff)) == 0) caml_sys_error(NO_ARG);
@@ -200,6 +285,7 @@
if (getwd(buff) == 0) caml_sys_error(NO_ARG);
#endif /* HAS_GETCWD */
return caml_copy_string(buff);
+#endif
}
CAMLprim value caml_sys_getenv(value var)
diff -uwNr ocaml-3.10.0-src/byterun/u8tou16.c ocaml-3.10.0-unicode-src/byterun/u8tou16.c
--- ocaml-3.10.0-src/byterun/u8tou16.c 1970-01-01 02:00:00.000000000 +0200
+++ ocaml-3.10.0-unicode-src/byterun/u8tou16.c 2008-04-15 12:02:31.098641300 +0300
@@ -0,0 +1,245 @@
+#include "u8tou16.h"
+
+/* Copyright 2005 b8_bavard, INRIA, CML */
+/*
+ This file is part of mldonkey.
+
+ mldonkey is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ mldonkey is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with mldonkey; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+
+/* Stub code to interface with libiconv
+ *
+ * taken and modified from
+ * mldonkey/src/utils/lib/charsetstubs.c
+ *
+ */
+
+/*
+ 20080415 ygrek
+ Modified to use WideCharToMultiByte windows api
+*/
+
+#define STRICT /* Strict typing, please */
+#include <windows.h>
+#undef STRICT
+
+#define FALSE 0
+#define TRUE 1
+
+#define UTF8_COMPUTE(Char, Mask, Len) \
+ if (Char < 128) \
+ { \
+ Len = 1; \
+ Mask = 0x7f; \
+ } \
+ else if ((Char & 0xe0) == 0xc0) \
+ { \
+ Len = 2; \
+ Mask = 0x1f; \
+ } \
+ else if ((Char & 0xf0) == 0xe0) \
+ { \
+ Len = 3; \
+ Mask = 0x0f; \
+ } \
+ else if ((Char & 0xf8) == 0xf0) \
+ { \
+ Len = 4; \
+ Mask = 0x07; \
+ } \
+ else if ((Char & 0xfc) == 0xf8) \
+ { \
+ Len = 5; \
+ Mask = 0x03; \
+ } \
+ else if ((Char & 0xfe) == 0xfc) \
+ { \
+ Len = 6; \
+ Mask = 0x01; \
+ } \
+ else \
+ Len = -1;
+
+#define UTF8_LENGTH(Char) \
+ ((Char) < 0x80 ? 1 : \
+ ((Char) < 0x800 ? 2 : \
+ ((Char) < 0x10000 ? 3 : \
+ ((Char) < 0x200000 ? 4 : \
+ ((Char) < 0x4000000 ? 5 : 6)))))
+
+
+#define UTF8_GET(Result, Chars, Count, Mask, Len) \
+ (Result) = (Chars)[0] & (Mask); \
+ for ((Count) = 1; (Count) < (Len); ++(Count)) \
+ { \
+ if (((Chars)[(Count)] & 0xc0) != 0x80) \
+ { \
+ (Result) = -1; \
+ break; \
+ } \
+ (Result) <<= 6; \
+ (Result) |= ((Chars)[(Count)] & 0x3f); \
+ }
+
+#define UNICODE_VALID(Char) \
+ ((Char) < 0x110000 && \
+ (((Char) & 0xFFFFF800) != 0xD800) && \
+ ((Char) < 0xFDD0 || (Char) > 0xFDEF) && \
+ ((Char) & 0xFFFE) != 0xFFFE)
+
+int
+ocaml_utf8_validate (const char *str,
+ size_t max_len,
+ const char **end)
+{
+
+ const char *p;
+
+ if (str == NULL)
+ return FALSE;
+
+ if (end)
+ *end = str;
+
+ p = str;
+
+ while ((max_len < 0 || (p - str) < max_len) && *p)
+ {
+ int i, mask = 0, len;
+ unsigned int result;
+ unsigned char c = (unsigned char) *p;
+
+ UTF8_COMPUTE (c, mask, len);
+
+ if (len == -1)
+ break;
+
+ /* check that the expected number of bytes exists in str */
+ if (max_len >= 0 &&
+ ((max_len - (p - str)) < len))
+ break;
+
+ UTF8_GET (result, p, i, mask, len);
+
+ if (UTF8_LENGTH (result) != len) /* Check for overlong UTF-8 */
+ break;
+
+ if (result == (unsigned int)-1)
+ break;
+
+ if (!UNICODE_VALID (result))
+ break;
+
+ p += len;
+ }
+
+ if (end)
+ *end = p;
+
+ /* See that we covered the entire length if a length was
+ * passed in, or that we ended on a nul if not
+ */
+ if (max_len >= 0 &&
+ p != (str + max_len))
+ return FALSE;
+ else if (max_len < 0 &&
+ *p != '\0')
+ return FALSE;
+ else
+ return TRUE;
+}
+
+unsigned char* convert_to_utf16(const char* str,
+ size_t len,
+ UINT codepage)
+{
+ unsigned char* outp;
+ size_t outbuf_size;
+ int chars;
+
+ outbuf_size = len*2 + 8;
+
+ outp = malloc(outbuf_size + 2);
+ memset(outp,0,outbuf_size + 2);
+
+ if (0 == len)
+ {
+ return outp;
+ }
+
+ chars = MultiByteToWideChar(codepage, 0, str, len, outp, outbuf_size);
+ if (0 == chars)
+ {
+ free(outp);
+ return NULL;
+ }
+
+ *(WCHAR*)(outp + sizeof(WCHAR)*chars) = (WCHAR)0;
+ return outp;
+}
+
+
+unsigned char* convert_from_utf16(const char* str,
+ size_t len,
+ UINT codepage)
+{
+ unsigned char* outp;
+ size_t outbuf_size;
+ int chars;
+
+ outbuf_size = len*2 + 8;
+
+ outp = malloc(outbuf_size + 2);
+ memset(outp,0,outbuf_size + 2);
+
+ if (0 == len)
+ {
+ return outp;
+ }
+
+ chars = WideCharToMultiByte(codepage, 0, str, len, outp, outbuf_size, NULL, NULL);
+ if (0 == chars)
+ {
+ free(outp);
+ return NULL;
+ }
+
+ *(outp + chars) = '\0';
+ return outp;
+}
+
+// -----------------------------------------------------------------------------
+
+int is_valid_utf8(const char *s)
+{
+ return ocaml_utf8_validate(s,strlen(s),NULL);
+}
+
+unsigned char * ansi_to_utf16(const char * str)
+{
+ return convert_to_utf16(str,strlen(str),CP_ACP);
+}
+
+unsigned char * utf8_to_utf16(const char * str)
+{
+ return convert_to_utf16(str,strlen(str),CP_UTF8);
+}
+
+unsigned char* utf16_to_utf8(const unsigned char * str)
+{
+ return convert_from_utf16(str,wcslen(str)*sizeof(WCHAR),CP_UTF8);
+}
+
+// -----------------------------------------------------------------------------
diff -uwNr ocaml-3.10.0-src/byterun/u8tou16.h ocaml-3.10.0-unicode-src/byterun/u8tou16.h
--- ocaml-3.10.0-src/byterun/u8tou16.h 1970-01-01 02:00:00.000000000 +0200
+++ ocaml-3.10.0-unicode-src/byterun/u8tou16.h 2008-04-11 13:15:24.181731100 +0300
@@ -0,0 +1,8 @@
+#ifndef WCHAR
+typedef unsigned short WCHAR;
+#endif
+
+int is_valid_utf8(const char *s);
+unsigned char * ansi_to_utf16(const char * str);
+unsigned char * utf8_to_utf16(const char * str);
+unsigned char * utf16_to_utf8(const unsigned char * str);
diff -uwNr ocaml-3.10.0-src/byterun/win32.c ocaml-3.10.0-unicode-src/byterun/win32.c
--- ocaml-3.10.0-src/byterun/win32.c 2007-03-01 15:37:39.000000000 +0200
+++ ocaml-3.10.0-unicode-src/byterun/win32.c 2008-04-14 19:58:22.546202000 +0300
@@ -31,6 +31,9 @@
#include "misc.h"
#include "osdeps.h"
#include "signals.h"
+#ifdef UTF16
+#include "u8tou16.h"
+#endif
#ifndef S_ISREG
#define S_ISREG(mode) (((mode) & S_IFMT) == S_IFREG)
@@ -245,10 +248,28 @@
static void expand_pattern(char * pat)
{
int handle;
+#ifdef UTF16
+ struct _wfinddata_t ffblk;
+ WCHAR* wname = NULL;
+#else
struct _finddata_t ffblk;
+#endif
int preflen;
+#ifdef UTF16
+ if (is_valid_utf8(pat))
+ {
+ wname = utf8_to_utf16(pat);
+ }
+ else
+ {
+ wname = ansi_to_utf16(pat);
+ }
+ handle = _wfindfirst(wname, &ffblk);
+ free(wname);
+#else
handle = _findfirst(pat, &ffblk);
+#endif
if (handle == -1) {
store_argument(pat); /* a la Bourne shell */
return;
@@ -258,12 +279,25 @@
if (c == '\\' || c == '/' || c == ':') break;
}
do {
- char * name = malloc(preflen + strlen(ffblk.name) + 1);
+ #ifdef UTF16
+ char * aname = utf16_to_utf8(ffblk.name);
+ #else
+ char * aname = ffblk.name;
+ #endif
+ char * name = malloc(preflen + strlen(aname) + 1);
if (name == NULL) out_of_memory();
memcpy(name, pat, preflen);
- strcpy(name + preflen, ffblk.name);
+ strcpy(name + preflen, aname);
+#ifdef UTF16
+ free(aname);
+#endif
store_argument(name);
- } while (_findnext(handle, &ffblk) != -1);
+ }
+#ifdef UTF16
+ while (_wfindnext(handle, &ffblk) != -1);
+#else
+ while (_findnext(handle, &ffblk) != -1);
+#endif
_findclose(handle);
}
@@ -345,12 +379,22 @@
{
int dirnamelen;
char * template;
+#ifdef UTF16
+ WCHAR * wtemplate;
+ WCHAR * name;
+#else
+ char * name;
+#endif
#if _MSC_VER <= 1200
int h;
#else
intptr_t h;
#endif
+#ifdef UTF16
+ struct _wfinddata_t fileinfo;
+#else
struct _finddata_t fileinfo;
+#endif
char * p;
dirnamelen = strlen(dirname);
@@ -362,16 +406,40 @@
default:
strcat(template, "\\*.*");
}
+#ifdef UTF16
+ if (is_valid_utf8(template))
+ wtemplate = utf8_to_utf16(template);
+ else
+ wtemplate = ansi_to_utf16(template);
+ h = _wfindfirst(wtemplate, &fileinfo);
+ free(wtemplate);
+#else
h = _findfirst(template, &fileinfo);
+#endif
caml_stat_free(template);
if (h == -1) return errno == ENOENT ? 0 : -1;
do {
- if (strcmp(fileinfo.name, ".") != 0 && strcmp(fileinfo.name, "..") != 0) {
- p = caml_stat_alloc(strlen(fileinfo.name) + 1);
- strcpy(p, fileinfo.name);
+#ifdef UTF16
+ name = utf16_to_utf8(fileinfo.name);
+ if (NULL == name)
+ {
+ /*printf("debug - very strange findfirst\n");*/
+ continue;
+ }
+#else
+ name = fileinfo.name;
+#endif
+ if (strcmp(name, ".") != 0 && strcmp(name, "..") != 0) {
+ p = caml_stat_alloc(strlen(name) + 1);
+ strcpy(p, name);
caml_ext_table_add(contents, p);
}
- } while (_findnext(h, &fileinfo) == 0);
+ }
+#ifdef UTF16
+ while (_wfindnext(h, &fileinfo) == 0);
+#else
+ while (_findnext(h, &fileinfo) == 0);
+#endif
_findclose(h);
return 0;
}
diff -uwNr ocaml-3.10.0-src/config/Makefile.mingw ocaml-3.10.0-unicode-src/config/Makefile.mingw
--- ocaml-3.10.0-src/config/Makefile.mingw 2007-03-01 16:48:53.000000000 +0200
+++ ocaml-3.10.0-unicode-src/config/Makefile.mingw 2008-04-11 13:56:34.034259100 +0300
@@ -75,20 +75,20 @@
########## Configuration for the bytecode compiler
### Which C compiler to use for the bytecode interpreter.
-BYTECC=gcc -mno-cygwin
+BYTECC=gcc -mno-cygwin -DUTF16
### Additional compile-time options for $(BYTECC). (For static linking.)
-BYTECCCOMPOPTS=-O -mms-bitfields -Wall -Wno-unused
+BYTECCCOMPOPTS=-O -mms-bitfields -Wall -Wno-unused -DUTF16
### Additional link-time options for $(BYTECC). (For static linking.)
-BYTECCLINKOPTS=
+BYTECCLINKOPTS=-DUTF16
### Additional compile-time options for $(BYTECC). (For building a DLL.)
-DLLCCCOMPOPTS=-O -mms-bitfields -Wall -Wno-unused -DCAML_DLL
+DLLCCCOMPOPTS=-O -mms-bitfields -Wall -Wno-unused -DCAML_DLL -DUTF16
### Libraries needed
-BYTECCLIBS=
-NATIVECCLIBS=
+BYTECCLIBS=-liconv
+NATIVECCLIBS=-liconv
### How to invoke the C preprocessor
CPP=$(BYTECC) -E
diff -uwNr ocaml-3.10.0-src/config/Makefile.msvc ocaml-3.10.0-unicode-src/config/Makefile.msvc
--- ocaml-3.10.0-src/config/Makefile.msvc 2007-05-12 16:50:20.000000000 +0300
+++ ocaml-3.10.0-unicode-src/config/Makefile.msvc 2008-04-15 09:12:21.709845200 +0300
@@ -74,7 +74,7 @@
########## Configuration for the bytecode compiler
### Which C compiler to use for the bytecode interpreter.
-BYTECC=cl /nologo -D_CRT_SECURE_NO_DEPRECATE
+BYTECC=cl /nologo -D_CRT_SECURE_NO_DEPRECATE -DUTF16
### Additional compile-time options for $(BYTECC). (For static linking.)
BYTECCCOMPOPTS=/Ox /MT
@@ -133,7 +133,7 @@
SYSTEM=win32
### Which C compiler to use for the native-code compiler.
-NATIVECC=cl /nologo -D_CRT_SECURE_NO_DEPRECATE
+NATIVECC=cl /nologo -D_CRT_SECURE_NO_DEPRECATE -DUTF16
### Additional compile-time options for $(NATIVECC).
NATIVECCCOMPOPTS=/Ox /MT
diff -uwNr ocaml-3.10.0-src/ocamlcomp.sh ocaml-3.10.0-unicode-src/ocamlcomp.sh
--- ocaml-3.10.0-src/ocamlcomp.sh 1970-01-01 02:00:00.000000000 +0200
+++ ocaml-3.10.0-unicode-src/ocamlcomp.sh 2008-04-15 09:44:57.765824700 +0300
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+topdir=`dirname $0`
+
+exec $topdir/boot/ocamlrun $topdir/ocamlc -nostdlib -I $topdir/stdlib "$@"
diff -uwNr ocaml-3.10.0-src/ocamlcompopt.sh ocaml-3.10.0-unicode-src/ocamlcompopt.sh
--- ocaml-3.10.0-src/ocamlcompopt.sh 1970-01-01 02:00:00.000000000 +0200
+++ ocaml-3.10.0-unicode-src/ocamlcompopt.sh 2008-04-15 09:54:20.477380500 +0300
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+topdir=`dirname $0`
+
+exec $topdir/boot/ocamlrun $topdir/ocamlopt -nostdlib -I $topdir/stdlib "$@"
diff -uwNr ocaml-3.10.0-src/otherlibs/unix/chdir.c ocaml-3.10.0-unicode-src/otherlibs/unix/chdir.c
--- ocaml-3.10.0-src/otherlibs/unix/chdir.c 2001-12-07 15:40:24.000000000 +0200
+++ ocaml-3.10.0-unicode-src/otherlibs/unix/chdir.c 2008-04-11 13:56:34.049923100 +0300
@@ -15,11 +15,25 @@
#include <mlvalues.h>
#include "unixsupport.h"
+#ifdef UTF16
+#include "u8tou16.h"
+#endif
CAMLprim value unix_chdir(value path)
{
int ret;
+#ifdef UTF16
+ char * temp=String_val(path);
+ WCHAR * wtemp;
+ if(is_valid_utf8(temp))
+ wtemp = utf8_to_utf16(temp);
+ else
+ wtemp = ansi_to_utf16(temp);
+ ret = _wchdir(wtemp);
+ free(wtemp);
+#else
ret = chdir(String_val(path));
+#endif
if (ret == -1) uerror("chdir", path);
return Val_unit;
}
diff -uwNr ocaml-3.10.0-src/otherlibs/unix/chmod.c ocaml-3.10.0-unicode-src/otherlibs/unix/chmod.c
--- ocaml-3.10.0-src/otherlibs/unix/chmod.c 2001-12-07 15:40:26.000000000 +0200
+++ ocaml-3.10.0-unicode-src/otherlibs/unix/chmod.c 2008-04-11 13:56:34.065587100 +0300
@@ -17,11 +17,25 @@
#include <sys/stat.h>
#include <mlvalues.h>
#include "unixsupport.h"
+#ifdef UTF16
+#include "u8tou16.h"
+#endif
CAMLprim value unix_chmod(value path, value perm)
{
int ret;
+#ifdef UTF16
+ char * temp=String_val(path);
+ WCHAR * wtemp;
+ if(is_valid_utf8(temp))
+ wtemp = utf8_to_utf16(temp);
+ else
+ wtemp = ansi_to_utf16(temp);
+ ret = _wchmod(wtemp, Int_val(perm));
+ free(wtemp);
+#else
ret = chmod(String_val(path), Int_val(perm));
+#endif
if (ret == -1) uerror("chmod", path);
return Val_unit;
}
diff -uwNr ocaml-3.10.0-src/otherlibs/unix/getcwd.c ocaml-3.10.0-unicode-src/otherlibs/unix/getcwd.c
--- ocaml-3.10.0-src/otherlibs/unix/getcwd.c 2005-03-24 19:20:53.000000000 +0200
+++ ocaml-3.10.0-unicode-src/otherlibs/unix/getcwd.c 2008-04-11 14:20:12.707075100 +0300
@@ -16,6 +16,7 @@
#include <mlvalues.h>
#include <alloc.h>
#include <fail.h>
+#include <memory.h>
#include "unixsupport.h"
#if !defined (_WIN32) && !macintosh
@@ -31,12 +32,27 @@
#endif
#ifdef HAS_GETCWD
+#ifdef UTF16
+#include "u8tou16.h"
+#endif
CAMLprim value unix_getcwd(value unit)
{
+#ifdef UTF16
+ CAMLparam0 ();
+ CAMLlocal1 (v);
+ WCHAR buff[PATH_MAX*2];
+ unsigned char* temp;
+ if (_wgetcwd(buff, sizeof(buff)/sizeof(WCHAR)) == 0) uerror("getcwd", Nothing);
+ temp=utf16_to_utf8(buff);
+ v=copy_string(temp);
+ free(temp);
+ CAMLreturn (v);
+#else
char buff[PATH_MAX];
if (getcwd(buff, sizeof(buff)) == 0) uerror("getcwd", Nothing);
return copy_string(buff);
+#endif
}
#else
diff -uwNr ocaml-3.10.0-src/otherlibs/unix/rmdir.c ocaml-3.10.0-unicode-src/otherlibs/unix/rmdir.c
--- ocaml-3.10.0-src/otherlibs/unix/rmdir.c 2001-12-07 15:40:33.000000000 +0200
+++ ocaml-3.10.0-unicode-src/otherlibs/unix/rmdir.c 2008-04-11 13:56:34.112579100 +0300
@@ -15,9 +15,23 @@
#include <mlvalues.h>
#include "unixsupport.h"
+#ifdef UTF16
+#include "u8tou16.h"
+#endif
CAMLprim value unix_rmdir(value path)
{
+#ifdef UTF16
+ char * temp=String_val(path);
+ WCHAR * wtemp;
+ if(is_valid_utf8(temp))
+ wtemp = utf8_to_utf16(temp);
+ else
+ wtemp = ansi_to_utf16(temp);
+ if (_wrmdir(wtemp) == -1) uerror("rmdir", path);
+ free(wtemp);
+#else
if (rmdir(String_val(path)) == -1) uerror("rmdir", path);
+#endif
return Val_unit;
}
diff -uwNr ocaml-3.10.0-src/otherlibs/unix/unlink.c ocaml-3.10.0-unicode-src/otherlibs/unix/unlink.c
--- ocaml-3.10.0-src/otherlibs/unix/unlink.c 2001-12-07 15:40:39.000000000 +0200
+++ ocaml-3.10.0-unicode-src/otherlibs/unix/unlink.c 2008-04-11 13:56:34.112579100 +0300
@@ -15,9 +15,23 @@
#include <mlvalues.h>
#include "unixsupport.h"
+#ifdef UTF16
+#include "u8tou16.h"
+#endif
CAMLprim value unix_unlink(value path)
{
+#ifdef UTF16
+ char * temp=String_val(path);
+ WCHAR * wtemp;
+ if(is_valid_utf8(temp))
+ wtemp = utf8_to_utf16(temp);
+ else
+ wtemp = ansi_to_utf16(temp);
+ if (_wunlink(wtemp) == -1) uerror("unlink", path);
+ free(wtemp);
+#else
if (unlink(String_val(path)) == -1) uerror("unlink", path);
+#endif
return Val_unit;
}
diff -uwNr ocaml-3.10.0-src/otherlibs/unix/utimes.c ocaml-3.10.0-unicode-src/otherlibs/unix/utimes.c
--- ocaml-3.10.0-src/otherlibs/unix/utimes.c 2005-03-24 19:20:53.000000000 +0200
+++ ocaml-3.10.0-unicode-src/otherlibs/unix/utimes.c 2008-04-15 09:35:27.882262500 +0300
@@ -25,9 +25,16 @@
#else
#include <sys/utime.h>
#endif
+#ifdef UTF16
+#include "u8tou16.h"
+#endif
CAMLprim value unix_utimes(value path, value atime, value mtime)
{
+#ifdef UTF16
+ char * temp;
+ WCHAR * wtemp;
+#endif
struct utimbuf times, * t;
times.actime = Double_val(atime);
times.modtime = Double_val(mtime);
@@ -35,7 +42,17 @@
t = ×
else
t = (struct utimbuf *) NULL;
+#ifdef UTF16
+ temp=String_val(path);
+ if(is_valid_utf8(temp))
+ wtemp = utf8_to_utf16(temp);
+ else
+ wtemp = ansi_to_utf16(temp);
+ if (_wutime(wtemp, t) == -1) uerror("utimes", path);
+ free(wtemp);
+#else
if (utime(String_val(path), t) == -1) uerror("utimes", path);
+#endif
return Val_unit;
}
diff -uwNr ocaml-3.10.0-src/otherlibs/win32unix/Makefile.nt ocaml-3.10.0-unicode-src/otherlibs/win32unix/Makefile.nt
--- ocaml-3.10.0-src/otherlibs/win32unix/Makefile.nt 2007-02-07 17:49:11.000000000 +0200
+++ ocaml-3.10.0-unicode-src/otherlibs/win32unix/Makefile.nt 2008-04-15 09:22:48.873646200 +0300
@@ -30,7 +30,8 @@
mkdir.c open.c pipe.c read.c rename.c \
select.c sendrecv.c \
shutdown.c sleep.c socket.c sockopt.c startup.c stat.c \
- system.c unixsupport.c windir.c winwait.c write.c
+ system.c unixsupport.c windir.c winwait.c write.c \
+ u8tou16.c
# Files from the ../unix directory
UNIX_FILES = access.c addrofstr.c chdir.c chmod.c cst2constr.c \
@@ -66,11 +67,11 @@
unix.cma: $(CAML_OBJS)
$(CAMLC) -a -linkall -o unix.cma $(CAML_OBJS) \
- -dllib -lunix -cclib -lunix -cclib $(LIBS)
+ -dllib -lunix -cclib -lunix -cclib "$(LIBS)"
unix.cmxa: $(CAMLOPT_OBJS)
$(CAMLOPT) -a -linkall -o unix.cmxa $(CAMLOPT_OBJS) \
- -cclib -lunix -cclib $(LIBS)
+ -cclib -lunix -cclib "$(LIBS)"
partialclean:
rm -f *.cm*
diff -uwNr ocaml-3.10.0-src/otherlibs/win32unix/link.c ocaml-3.10.0-unicode-src/otherlibs/win32unix/link.c
--- ocaml-3.10.0-src/otherlibs/win32unix/link.c 2001-12-07 15:40:44.000000000 +0200
+++ ocaml-3.10.0-unicode-src/otherlibs/win32unix/link.c 2008-04-11 13:56:34.175235100 +0300
@@ -17,26 +17,67 @@
#include <mlvalues.h>
#include <fail.h>
#include "unixsupport.h"
+#ifdef UTF16
+#include "u8tou16.h"
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#endif
+#ifdef UTF16
+typedef
+BOOL (WINAPI *tCreateHardLink)(
+ LPCWSTR lpFileName,
+ LPCWSTR lpExistingFileName,
+ LPSECURITY_ATTRIBUTES lpSecurityAttributes
+);
+#else
typedef
BOOL (WINAPI *tCreateHardLink)(
LPCTSTR lpFileName,
LPCTSTR lpExistingFileName,
LPSECURITY_ATTRIBUTES lpSecurityAttributes
);
+#endif
CAMLprim value unix_link(value path1, value path2)
{
HMODULE hModKernel32;
tCreateHardLink pCreateHardLink;
+#ifdef UTF16
+ char * temp1=String_val(path1);
+ char * temp2=String_val(path2);
+ WCHAR * wtemp1, * wtemp2;
+ if(is_valid_utf8(temp1))
+ wtemp1 = utf8_to_utf16(temp1);
+ else
+ wtemp1 = ansi_to_utf16(temp1);
+ if(is_valid_utf8(temp2))
+ wtemp2 = utf8_to_utf16(temp2);
+ else
+ wtemp2 = ansi_to_utf16(temp2);
+#endif
hModKernel32 = GetModuleHandle("KERNEL32.DLL");
+#ifdef UTF16
+ pCreateHardLink =
+ (tCreateHardLink) GetProcAddress(hModKernel32, "CreateHardLinkW");
+#else
pCreateHardLink =
(tCreateHardLink) GetProcAddress(hModKernel32, "CreateHardLinkA");
+#endif
if (pCreateHardLink == NULL)
invalid_argument("Unix.link not implemented");
+#ifdef UTF16
+ if (! pCreateHardLink(wtemp2, wtemp1, NULL)) {
+#else
if (! pCreateHardLink(String_val(path2), String_val(path1), NULL)) {
+#endif
win32_maperr(GetLastError());
uerror("link", path2);
}
+#ifdef UTF16
+ free(wtemp1);
+ free(wtemp2);
+#endif
return Val_unit;
}
diff -uwNr ocaml-3.10.0-src/otherlibs/win32unix/mkdir.c ocaml-3.10.0-unicode-src/otherlibs/win32unix/mkdir.c
--- ocaml-3.10.0-src/otherlibs/win32unix/mkdir.c 2001-12-07 15:40:45.000000000 +0200
+++ ocaml-3.10.0-unicode-src/otherlibs/win32unix/mkdir.c 2008-04-11 13:56:34.190899100 +0300
@@ -15,10 +15,24 @@
#include <mlvalues.h>
#include "unixsupport.h"
+#ifdef UTF16
+#include "u8tou16.h"
+#endif
CAMLprim value unix_mkdir(path, perm)
value path, perm;
{
+#ifdef UTF16
+ char * temp=String_val(path);
+ WCHAR * wtemp;
+ if(is_valid_utf8(temp))
+ wtemp = utf8_to_utf16(temp);
+ else
+ wtemp = ansi_to_utf16(temp);
+ if (_wmkdir(wtemp) == -1) uerror("mkdir", path);
+ free(wtemp);
+#else
if (_mkdir(String_val(path)) == -1) uerror("mkdir", path);
+#endif
return Val_unit;
}
diff -uwNr ocaml-3.10.0-src/otherlibs/win32unix/open.c ocaml-3.10.0-unicode-src/otherlibs/win32unix/open.c
--- ocaml-3.10.0-src/otherlibs/win32unix/open.c 2001-12-07 15:40:45.000000000 +0200
+++ ocaml-3.10.0-unicode-src/otherlibs/win32unix/open.c 2008-04-11 13:56:34.222227100 +0300
@@ -17,6 +17,9 @@
#include <alloc.h>
#include "unixsupport.h"
#include <fcntl.h>
+#ifdef UTF16
+#include "u8tou16.h"
+#endif
static int open_access_flags[8] = {
GENERIC_READ, GENERIC_WRITE, GENERIC_READ|GENERIC_WRITE, 0, 0, 0, 0, 0,
@@ -31,6 +34,14 @@
int fileaccess, createflags, fileattrib, filecreate;
SECURITY_ATTRIBUTES attr;
HANDLE h;
+#ifdef UTF16
+ char * temp=String_val(path);
+ WCHAR * wtemp;
+ if(is_valid_utf8(temp))
+ wtemp = utf8_to_utf16(temp);
+ else
+ wtemp = ansi_to_utf16(temp);
+#endif
fileaccess = convert_flag_list(flags, open_access_flags);
@@ -55,9 +66,16 @@
attr.lpSecurityDescriptor = NULL;
attr.bInheritHandle = TRUE;
+#ifdef UTF16
+ h = CreateFileW(wtemp, fileaccess,
+ FILE_SHARE_READ | FILE_SHARE_WRITE, &attr,
+ filecreate, fileattrib, NULL);
+ free(wtemp);
+#else
h = CreateFile(String_val(path), fileaccess,
FILE_SHARE_READ | FILE_SHARE_WRITE, &attr,
filecreate, fileattrib, NULL);
+#endif
if (h == INVALID_HANDLE_VALUE) {
win32_maperr(GetLastError());
uerror("open", path);
diff -uwNr ocaml-3.10.0-src/otherlibs/win32unix/rename.c ocaml-3.10.0-unicode-src/otherlibs/win32unix/rename.c
--- ocaml-3.10.0-src/otherlibs/win32unix/rename.c 2004-07-13 15:25:15.000000000 +0300
+++ ocaml-3.10.0-unicode-src/otherlibs/win32unix/rename.c 2008-04-11 13:56:34.237891100 +0300
@@ -16,12 +16,28 @@
#include <stdio.h>
#include <mlvalues.h>
#include "unixsupport.h"
+#ifdef UTF16
+#include "u8tou16.h"
+#endif
CAMLprim value unix_rename(value path1, value path2)
{
static int supports_MoveFileEx = -1; /* don't know yet */
BOOL ok;
+#ifdef UTF16
+ char * temp1=String_val(path1);
+ char * temp2=String_val(path2);
+ WCHAR * wtemp1, * wtemp2;
+ if(is_valid_utf8(temp1))
+ wtemp1 = utf8_to_utf16(temp1);
+ else
+ wtemp1 = ansi_to_utf16(temp1);
+ if(is_valid_utf8(temp2))
+ wtemp2 = utf8_to_utf16(temp2);
+ else
+ wtemp2 = ansi_to_utf16(temp2);
+#endif
if (supports_MoveFileEx < 0) {
OSVERSIONINFO VersionInfo;
VersionInfo.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
@@ -30,11 +46,25 @@
&& (VersionInfo.dwPlatformId == VER_PLATFORM_WIN32_NT);
}
if (supports_MoveFileEx > 0)
+#ifdef UTF16
+ ok = MoveFileExW(wtemp1, wtemp2,
+ MOVEFILE_REPLACE_EXISTING | MOVEFILE_WRITE_THROUGH |
+ MOVEFILE_COPY_ALLOWED);
+#else
ok = MoveFileEx(String_val(path1), String_val(path2),
MOVEFILE_REPLACE_EXISTING | MOVEFILE_WRITE_THROUGH |
MOVEFILE_COPY_ALLOWED);
+#endif
else
+#ifdef UTF16
+ ok = MoveFileW(wtemp1, wtemp2);
+#else
ok = MoveFile(String_val(path1), String_val(path2));
+#endif
+#ifdef UTF16
+ free(wtemp1);
+ free(wtemp2);
+#endif
if (! ok) {
win32_maperr(GetLastError());
uerror("rename", path1);
diff -uwNr ocaml-3.10.0-src/otherlibs/win32unix/stat.c ocaml-3.10.0-unicode-src/otherlibs/win32unix/stat.c
--- ocaml-3.10.0-src/otherlibs/win32unix/stat.c 2006-09-21 16:57:34.000000000 +0300
+++ ocaml-3.10.0-unicode-src/otherlibs/win32unix/stat.c 2008-04-11 13:56:34.253555100 +0300
@@ -22,6 +22,9 @@
#define _INTEGRAL_MAX_BITS 64
#include <sys/types.h>
#include <sys/stat.h>
+#ifdef UTF16
+#include "u8tou16.h"
+#endif
#ifndef S_IFLNK
#define S_IFLNK 0
@@ -67,8 +70,18 @@
{
int ret;
struct _stati64 buf;
-
+#ifdef UTF16
+ char * temp=String_val(path);
+ WCHAR * wtemp;
+ if(is_valid_utf8(temp))
+ wtemp = utf8_to_utf16(temp);
+ else
+ wtemp = ansi_to_utf16(temp);
+ ret = _wstati64(wtemp, &buf);
+ free(wtemp);
+#else
ret = _stati64(String_val(path), &buf);
+#endif
if (ret == -1) uerror("stat", path);
if (buf.st_size > Max_long) {
win32_maperr(ERROR_ARITHMETIC_OVERFLOW);
@@ -81,7 +94,18 @@
{
int ret;
struct _stati64 buf;
+#ifdef UTF16
+ char * temp=String_val(path);
+ WCHAR * wtemp;
+ if(is_valid_utf8(temp))
+ wtemp = utf8_to_utf16(temp);
+ else
+ wtemp = ansi_to_utf16(temp);
+ ret = _wstati64(wtemp, &buf);
+ free(wtemp);
+#else
ret = _stati64(String_val(path), &buf);
+#endif
if (ret == -1) uerror("stat", path);
return stat_aux(1, &buf);
}
diff -uwNr ocaml-3.10.0-src/otherlibs/win32unix/system.c ocaml-3.10.0-unicode-src/otherlibs/win32unix/system.c
--- ocaml-3.10.0-src/otherlibs/win32unix/system.c 2006-09-21 11:03:56.000000000 +0300
+++ ocaml-3.10.0-unicode-src/otherlibs/win32unix/system.c 2008-04-11 14:00:34.570643100 +0300
@@ -20,6 +20,9 @@
#include "unixsupport.h"
#include <process.h>
#include <stdio.h>
+#ifdef UTF16
+#include "u8tou16.h"
+#endif
CAMLprim value win_system(cmd)
value cmd;
@@ -28,13 +31,27 @@
value st;
char *buf;
intnat len;
+#ifdef UTF16
+ char * temp;
+ WCHAR * wtemp;
+ temp=String_val(cmd);
+ if(is_valid_utf8(temp))
+ wtemp = utf8_to_utf16(temp);
+ else
+ wtemp = ansi_to_utf16(temp);
+#endif
len = caml_string_length (cmd);
buf = caml_stat_alloc (len + 1);
memmove (buf, String_val (cmd), len + 1);
enter_blocking_section();
_flushall();
+#ifdef UTF16
+ ret = _wsystem(wtemp);;
+ free(wtemp);
+#else
ret = system(buf);
+#endif
leave_blocking_section();
caml_stat_free(buf);
if (ret == -1) uerror("system", Nothing);
@@ -42,6 +59,3 @@
Field(st, 0) = Val_int(ret);
return st;
}
-
-
-
diff -uwNr ocaml-3.10.0-src/otherlibs/win32unix/u8tou16.c ocaml-3.10.0-unicode-src/otherlibs/win32unix/u8tou16.c
--- ocaml-3.10.0-src/otherlibs/win32unix/u8tou16.c 1970-01-01 02:00:00.000000000 +0200
+++ ocaml-3.10.0-unicode-src/otherlibs/win32unix/u8tou16.c 2008-04-15 12:03:14.146620900 +0300
@@ -0,0 +1,245 @@
+#include "u8tou16.h"
+
+/* Copyright 2005 b8_bavard, INRIA, CML */
+/*
+ This file is part of mldonkey.
+
+ mldonkey is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ mldonkey is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with mldonkey; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+
+/* Stub code to interface with libiconv
+ *
+ * taken and modified from
+ * mldonkey/src/utils/lib/charsetstubs.c
+ *
+ */
+
+/*
+ 20080415 ygrek
+ Modified to use WideCharToMultiByte windows api
+*/
+
+#define STRICT /* Strict typing, please */
+#include <windows.h>
+#undef STRICT
+
+#define FALSE 0
+#define TRUE 1
+
+#define UTF8_COMPUTE(Char, Mask, Len) \
+ if (Char < 128) \
+ { \
+ Len = 1; \
+ Mask = 0x7f; \
+ } \
+ else if ((Char & 0xe0) == 0xc0) \
+ { \
+ Len = 2; \
+ Mask = 0x1f; \
+ } \
+ else if ((Char & 0xf0) == 0xe0) \
+ { \
+ Len = 3; \
+ Mask = 0x0f; \
+ } \
+ else if ((Char & 0xf8) == 0xf0) \
+ { \
+ Len = 4; \
+ Mask = 0x07; \
+ } \
+ else if ((Char & 0xfc) == 0xf8) \
+ { \
+ Len = 5; \
+ Mask = 0x03; \
+ } \
+ else if ((Char & 0xfe) == 0xfc) \
+ { \
+ Len = 6; \
+ Mask = 0x01; \
+ } \
+ else \
+ Len = -1;
+
+#define UTF8_LENGTH(Char) \
+ ((Char) < 0x80 ? 1 : \
+ ((Char) < 0x800 ? 2 : \
+ ((Char) < 0x10000 ? 3 : \
+ ((Char) < 0x200000 ? 4 : \
+ ((Char) < 0x4000000 ? 5 : 6)))))
+
+
+#define UTF8_GET(Result, Chars, Count, Mask, Len) \
+ (Result) = (Chars)[0] & (Mask); \
+ for ((Count) = 1; (Count) < (Len); ++(Count)) \
+ { \
+ if (((Chars)[(Count)] & 0xc0) != 0x80) \
+ { \
+ (Result) = -1; \
+ break; \
+ } \
+ (Result) <<= 6; \
+ (Result) |= ((Chars)[(Count)] & 0x3f); \
+ }
+
+#define UNICODE_VALID(Char) \
+ ((Char) < 0x110000 && \
+ (((Char) & 0xFFFFF800) != 0xD800) && \
+ ((Char) < 0xFDD0 || (Char) > 0xFDEF) && \
+ ((Char) & 0xFFFE) != 0xFFFE)
+
+int
+ocaml_utf8_validate (const char *str,
+ size_t max_len,
+ const char **end)
+{
+
+ const char *p;
+
+ if (str == NULL)
+ return FALSE;
+
+ if (end)
+ *end = str;
+
+ p = str;
+
+ while ((max_len < 0 || (p - str) < max_len) && *p)
+ {
+ int i, mask = 0, len;
+ unsigned int result;
+ unsigned char c = (unsigned char) *p;
+
+ UTF8_COMPUTE (c, mask, len);
+
+ if (len == -1)
+ break;
+
+ /* check that the expected number of bytes exists in str */
+ if (max_len >= 0 &&
+ ((max_len - (p - str)) < len))
+ break;
+
+ UTF8_GET (result, p, i, mask, len);
+
+ if (UTF8_LENGTH (result) != len) /* Check for overlong UTF-8 */
+ break;
+
+ if (result == (unsigned int)-1)
+ break;
+
+ if (!UNICODE_VALID (result))
+ break;
+
+ p += len;
+ }
+
+ if (end)
+ *end = p;
+
+ /* See that we covered the entire length if a length was
+ * passed in, or that we ended on a nul if not
+ */
+ if (max_len >= 0 &&
+ p != (str + max_len))
+ return FALSE;
+ else if (max_len < 0 &&
+ *p != '\0')
+ return FALSE;
+ else
+ return TRUE;
+}
+
+unsigned char* convert_to_utf16(const char* str,
+ size_t len,
+ UINT codepage)
+{
+ unsigned char* outp;
+ size_t outbuf_size;
+ int chars;
+
+ outbuf_size = len*2 + 8;
+
+ outp = malloc(outbuf_size + 2);
+ memset(outp,0,outbuf_size + 2);
+
+ if (0 == len)
+ {
+ return outp;
+ }
+
+ chars = MultiByteToWideChar(codepage, 0, str, len, outp, outbuf_size);
+ if (0 == chars)
+ {
+ free(outp);
+ return NULL;
+ }
+
+ *(WCHAR*)(outp + sizeof(WCHAR)*chars) = (WCHAR)0;
+ return outp;
+}
+
+
+unsigned char* convert_from_utf16(const char* str,
+ size_t len,
+ UINT codepage)
+{
+ unsigned char* outp;
+ size_t outbuf_size;
+ int chars;
+
+ outbuf_size = len*2 + 8;
+
+ outp = malloc(outbuf_size + 2);
+ memset(outp,0,outbuf_size + 2);
+
+ if (0 == len)
+ {
+ return outp;
+ }
+
+ chars = WideCharToMultiByte(codepage, 0, str, len, outp, outbuf_size, NULL, NULL);
+ if (0 == chars)
+ {
+ free(outp);
+ return NULL;
+ }
+
+ *(outp + chars) = '\0';
+ return outp;
+}
+
+// -----------------------------------------------------------------------------
+
+int is_valid_utf8(const char *s)
+{
+ return ocaml_utf8_validate(s,strlen(s),NULL);
+}
+
+unsigned char * ansi_to_utf16(const char * str)
+{
+ return convert_to_utf16(str,strlen(str),CP_ACP);
+}
+
+unsigned char * utf8_to_utf16(const char * str)
+{
+ return convert_to_utf16(str,strlen(str),CP_UTF8);
+}
+
+unsigned char* utf16_to_utf8(const unsigned char * str)
+{
+ return convert_from_utf16(str,wcslen(str)*sizeof(WCHAR),CP_UTF8);
+}
+
+// -----------------------------------------------------------------------------
diff -uwNr ocaml-3.10.0-src/otherlibs/win32unix/u8tou16.h ocaml-3.10.0-unicode-src/otherlibs/win32unix/u8tou16.h
--- ocaml-3.10.0-src/otherlibs/win32unix/u8tou16.h 1970-01-01 02:00:00.000000000 +0200
+++ ocaml-3.10.0-unicode-src/otherlibs/win32unix/u8tou16.h 2008-04-11 13:15:24.181731100 +0300
@@ -0,0 +1,8 @@
+#ifndef WCHAR
+typedef unsigned short WCHAR;
+#endif
+
+int is_valid_utf8(const char *s);
+unsigned char * ansi_to_utf16(const char * str);
+unsigned char * utf8_to_utf16(const char * str);
+unsigned char * utf16_to_utf8(const unsigned char * str);
diff -uwNr ocaml-3.10.0-src/otherlibs/win32unix/windir.c ocaml-3.10.0-unicode-src/otherlibs/win32unix/windir.c
--- ocaml-3.10.0-src/otherlibs/win32unix/windir.c 2002-07-23 17:12:01.000000000 +0300
+++ ocaml-3.10.0-unicode-src/otherlibs/win32unix/windir.c 2008-04-11 13:56:34.347539100 +0300
@@ -19,18 +19,35 @@
#include <alloc.h>
#include <fail.h>
#include "unixsupport.h"
+#ifdef UTF16
+#include "u8tou16.h"
+#endif
CAMLprim value win_findfirst(name)
value name;
{
HANDLE h;
value v;
- WIN32_FIND_DATA fileinfo;
value valname = Val_unit;
value valh = Val_unit;
+#ifdef UTF16
+ WIN32_FIND_DATAW fileinfo;
+ char * tempo, *temp=String_val(name);
+ WCHAR * wtemp;
+ if(is_valid_utf8(temp))
+ wtemp = utf8_to_utf16(temp);
+ else
+ wtemp = ansi_to_utf16(temp);
+#else
+ WIN32_FIND_DATA fileinfo;
+#endif
Begin_roots2 (valname,valh);
+#ifdef UTF16
+ h = FindFirstFileW(wtemp,&fileinfo);
+#else
h = FindFirstFile(String_val(name),&fileinfo);
+#endif
if (h == INVALID_HANDLE_VALUE) {
DWORD err = GetLastError();
if (err == ERROR_NO_MORE_FILES)
@@ -40,22 +57,42 @@
uerror("opendir", Nothing);
}
}
+#ifdef UTF16
+ tempo = utf16_to_utf8(fileinfo.cFileName);
+ valname = copy_string(tempo);
+ free(tempo);
+#else
valname = copy_string(fileinfo.cFileName);
+#endif
valh = win_alloc_handle(h);
v = alloc_small(2, 0);
Field(v,0) = valname;
Field(v,1) = valh;
End_roots();
+#ifdef UTF16
+ free(wtemp);
+#endif
return v;
}
CAMLprim value win_findnext(valh)
value valh;
{
+#ifdef UTF16
+ CAMLparam0 ();
+ CAMLlocal1 (v);
+ WIN32_FIND_DATAW fileinfo;
+ char * temp;
+#else
WIN32_FIND_DATA fileinfo;
+#endif
BOOL retcode;
+#ifdef UTF16
+ retcode = FindNextFileW(Handle_val(valh), &fileinfo);
+#else
retcode = FindNextFile(Handle_val(valh), &fileinfo);
+#endif
if (!retcode) {
DWORD err = GetLastError();
if (err == ERROR_NO_MORE_FILES)
@@ -65,7 +102,14 @@
uerror("readdir", Nothing);
}
}
+#ifdef UTF16
+ temp = utf16_to_utf8(fileinfo.cFileName);
+ v=copy_string(temp);
+ free(temp);
+ CAMLreturn (v);
+#else
return copy_string(fileinfo.cFileName);
+#endif
}
CAMLprim value win_findclose(valh)
diff -uwNr ocaml-3.10.0-src/utils/ccomp.ml ocaml-3.10.0-unicode-src/utils/ccomp.ml
--- ocaml-3.10.0-src/utils/ccomp.ml 2007-02-25 16:58:21.000000000 +0200
+++ ocaml-3.10.0-unicode-src/utils/ccomp.ml 2008-04-11 13:56:34.363203100 +0300
@@ -42,9 +42,9 @@
let s =
String.concat " "
(List.map (fun f -> if f = "" then f else Filename.quote f) lst) in
- if Sys.os_type = "Win32" && String.length s >= 256
+ (*if Sys.os_type = "Win32" && String.length s >= 256
then build_diversion lst
- else s
+ else*) s
let compile_file name =
command
| ||||||||||
Relationships |
||||||||||||||||
|
||||||||||||||||
Notes |
|
|
(0007832) ygrek (reporter) 2012-07-30 12:07 |
Here is an old update to mldonkey's patch which does not rely on iconv and works(ed) with msvc : http://ygrek.org.ua/p/ocaml_unicode.html [^] It worked ok in 2009, but YMMV today. |
|
(0008058) doligez (manager) 2012-09-11 09:55 |
Uploaded the patch referenced by ygrek. |
Issue History |
|||
| Date Modified | Username | Field | Change |
| 2005-11-18 10:14 | administrator | New Issue | |
| 2008-01-22 12:08 | doligez | Relationship added | parent of 0003786 |
| 2012-07-11 17:01 | doligez | Relationship added | parent of 0003789 |
| 2012-07-11 17:04 | doligez | Target Version | => 4.01.0+dev |
| 2012-07-11 17:04 | doligez | Description Updated | View Revisions |
| 2012-07-29 18:02 | frisch | Category | OCaml general => OCaml windows |
| 2012-07-30 12:07 | ygrek | Note Added: 0007832 | |
| 2012-07-31 13:37 | doligez | Target Version | 4.01.0+dev => 4.00.1+dev |
| 2012-09-11 09:55 | doligez | File Added: ocaml_unicode_mod_20080421.patch | |
| 2012-09-11 09:55 | doligez | Note Added: 0008058 | |
| 2012-09-11 09:55 | doligez | Target Version | 4.00.1+dev => 4.00.2+dev |
| Copyright © 2000 - 2011 MantisBT Group |



