Lhogho  0.0.028
 All Data Structures Files Functions Variables Typedefs Macros Pages
unicode.c
Go to the documentation of this file.
1 
2 //
3 // Project: Lhogho
4 // File: unicode.c
5 //
6 // Copyright (C) 2007 P.Boytchev
7 //
8 // Revision history:
9 // 2006-10-09 - file created
10 // 2006-10-10 - fixed memory leaks
11 // 2006-10-11 - added UNICODE macro
12 // added ASCII_to_ASCII()
13 // 2007-02-27 - module renamed to UNICODE
14 // 2007-05-17 - added license info
15 // 2007-05-22 - added doxygen-friendly documentation
16 // 2007-06-07 - definitions spread to where they belong
17 // 2007-06-13 - fixed bug #1736021 "Alloc/dealloc statistics"
18 // 2010-02-07 - Fixed bug #3174945 OS alloc balance is not 0
19 // - Fixed bug #3174840 Invalid options cause a crash
20 // 2011-03-02 - Fixed bug #3188211 LOAD crashes if file size is >1MB
21 // 2012-10-14 - Fixed bug #314 -x compile on Windows needs lhogho.exe next to .lgo source file
22 // 2013-07-04 - If rt_libload() fails, it tries in lib subfolder of the compiler
23 //
24 //
25 // This program is free software; you can redistribute it and/or modify
26 // it under the terms of the GNU General Public License as published by
27 // the Free Software Foundation; either version 2 of the License, or
28 // (at your option) any later version.
29 //
30 // This program is distributed in the hope that it will be useful,
31 // but WITHOUT ANY WARRANTY; without even the implied warranty of
32 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
33 // GNU General Public License for more details.
34 //
35 // You should have received a copy of the GNU General Public License
36 // along with this program; if not, write to the Free Software
37 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
38 //
39 
40 
41 #include "globals.h"
42 #ifdef UNICODE_CHARS
43  #include <wchar.h> // UNICODE support: wcslen()
44 #endif //UNICODE_CHARS
45 
46 #include <stdio.h> // fopen(), fread(), fclose()
47 #include <stdlib.h> // malloc()
48 #include <string.h> // memcpy
49 #include <errno.h> // errno
50 #include <assert.h> // assert()
51 #include <sys/stat.h> // stat()
52 #include <limits.h> // PATH_MAX
53 #include <libgen.h> // dirname()
54  #if defined(WINDOWS) || defined(LINUX)
55  #include <malloc.h> // alloca()
56 #endif
57 #include "atoms.h"
58 #include "unicode.h"
59 #include "options.h"
60 
61 
62 
63 
64 //===================================================
70 // UNICODE_CHARS symbol is defined
74 //===================================================
75 #ifdef UNICODE_CHARS
77 {
78  size_t len = wcslen( ws );
79  char* buffer = alloca( 4*(len+1) ); // assume one utf16 can expand to 4 bytes max
80  char* buf = buffer;
81  mbstate_t state;
82  size_t nbytes;
83 
84  memset (&state, '\0', sizeof (state));
85  while (len>0)
86  {
87  nbytes = wcrtomb (buf, *ws, &state);
88  buf += nbytes;
89  len -= 1;
90  ws += 1;
91  }
92  *buf = '\0';
93 
94  len = buf-buffer+1;
95  buf = ALLOC( len );
96  memcpy( buf, buffer, len );
97  return buf;
98 }
99 #endif //UNICODE_CHARS
100 
101 
102 
103 
104 //===================================================
110 // symbol is defined
114 //===================================================
115 #ifdef UNICODE_CHARS
116 chars_t ASCII_to_UTF16 (const char *s)
117 {
118  //printf("===%s===\n",s);
119 
120  size_t len = strlen(s);
121  wchar_t *buffer = ALLOC( CHAR_SIZE*(len+1) );
122  wchar_t *buf = buffer;
123  mbstate_t state;
124  size_t nbytes;
125  memset (&state, '\0', sizeof (state));
126  while (len>0)
127  {
128  nbytes = mbrtowc (buf, s, len, &state);
129  buf++;
130  len -= nbytes;
131  s += nbytes;
132  }
133  *buf = L'\0';
134 
135  len = CHAR_SIZE*(buf-buffer+1);
136  buf = ALLOC( len );
137  memcpy( buf, buffer, len );
138  DEALLOC( buffer );
139  return buf;
140 }
141 #endif //UNICODE_CHARS
142 
143 
144 
145 
146 //===================================================
158 //===================================================
159 chars_t ASCII_to_ASCII (const char *s)
160 {
161 #ifndef UNICODE_CHARS
162  size_t len = STRLEN(s)+1;
163  char* buf = ALLOC( len );
164  memcpy( buf, s, len );
165  return buf;
166 #else
167  return (chars_t)0;
168 #endif //UNICODE_CHARS
169 }
170 
171 
172 
173 
174 //===================================================
184 //===================================================
185 #ifdef UNICODE_CHARS
187 {
188  int len = STRLEN( source );
189  byte_t *buffer = alloca( len+1 );
190  byte_t *buf = buffer;
191 
192  while( len>0 )
193  {
194  char_t wc = *source;
195  //printf("\nCODE=%4x|",wc);
196  if( wc < 0x0080 )
197  { // 0000-007F
198  // from: [0xxxxxxx]
199  // to: [0xxxxxxx]
200  *buf++ = (byte_t)wc;
201  }
202  else if( wc < 0x0800 )
203  { // 0080-07FF
204  // from: [00000yyy yyxxxxxx]
205  // to: [110yyyyy] [10xxxxxx]
206  *buf++ = 0xC0 | (byte_t)(wc >> 6);
207  *buf++ = 0x80 | (byte_t)(wc & 0x3F);
208  }
209  else
210  { // 0800-FFFF
211  // from: [zzzzyyyy yyxxxxxx]
212  // to: [1110zzzz] [10yyyyyy] [10xxxxxx]
213  *buf++ = 0xE0 | (byte_t)(wc >> 12);
214  *buf++ = 0x80 | (byte_t)((wc >> 6) & 0x3F);
215  *buf++ = 0x80 | (byte_t)(wc & 0x3F);
216  }
217  len--;
218  source++;
219  }
220 
221  *buf = '\0';
222 
223  len = buf-buffer+1;
224  buf = ALLOC( len );
225  memcpy( buf, buffer, len );
226 
227  return buf;
228 }
229 
231 {
232  int len = strlen ((char*)source);
233  wchar_t *buffer = alloca( CHAR_SIZE*(len+1) );
234  wchar_t *buf = buffer;
235  unsigned long wc;
236 
237  while( len>0 )
238  {
239  if( (*source & 0x80)==0x00 )
240  { // 00-7F [0zzz-zzzz]
241  wc = (byte_t)*source++;
242  len -= 1;
243  }
244  else if( (*source & 0xE0)==0xC0 )
245  { // 080-7FF [110y-yyyy] [10zz-zzzz]
246  wc = *source & 0x1F;
247  source++;
248  wc = (wc<<6) + (*source & 0x3F);
249  source++;
250  len -= 2;
251  }
252  else if( ((byte_t)*source & 0xF0)==0xE0 )
253  { // 0800-FFFF [1110-xxxx] [10yy-yyyy] [10zz-zzzz]
254  wc = *source & 0x1F;
255  source++;
256  wc = (wc<<6) + (*source & 0x3F);
257  source++;
258  wc = (wc<<6) + (*source & 0x3F);
259  source++;
260  len -= 3;
261  }
262  else
263  { // 01000-10FFFF [1111-wwww] [10xx-xxxx] [10yy-yyyy] [10zz-zzzz]
264  wc = *source & 0x1F;
265  source++;
266  wc = (wc<<6) + (*source & 0x3F);
267  source++;
268  wc = (wc<<6) + (*source & 0x3F);
269  source++;
270  wc = (wc<<6) + (*source & 0x3F);
271  source++;
272  len -= 4;
273  }
274  *buf = (unsigned short)wc;
275  buf++;
276  }
277 
278  *buf = L'\0';
279 
280  len = CHAR_SIZE*(buf-buffer+1);
281  buf = ALLOC( len );
282  memcpy( buf, buffer, len );
283 
284  return buf;
285 }
286 #endif //UNICODE_CHARS
287 
288 
289 
290 
291 //===================================================
302 //===================================================
303 void* load_file( chars_t wfilename, int* filesize )
304 {
305  FILE* file; // file stream
306  void* buffer; // file buffer
307  struct stat st_info; // file attributes
308 
309  // convert filename to multibyte
310  char* filename = FILENAME(wfilename);
311 
312  errno = 0;
313 
314  // open file
315  file = fopen( filename, "rb" );
316  if( errno )
317  {
318  // searching failed, try again looking in subfolder
319  // lib of the folder where the compiler is
320  char buf[PATH_MAX+1];
322  char* path = dirname(buf);
323  int pathlen = strlen(path);
324  strncpy(buf,path,pathlen);
325 
326  int filelen = strlen(filename);
327  if( pathlen+filelen+6 > PATH_MAX ) filelen=0;
328 #ifdef WINDOWS
329  strncpy(buf+pathlen,"\\lib\\",5);
330 #else
331  strncpy(buf+pathlen,"/lib/",5);
332 #endif
333  strncpy(buf+5+pathlen,filename,filelen);
334  buf[pathlen+5+filelen] = '\0';
335 
336  //printf("failed %s, try '%s' while compiler is %s\n",filename,buf,option_compiler_filename);
337  errno = 0;
338  file = fopen( buf, "rb" );
339  if( errno )
340  {
341  //printf("failed again\n");
342  return NULL;
343  }
344  }
345 
346  // get file size
347  fstat( fileno(file), &st_info );
348  *filesize = st_info.st_size;
349 
350  // file name is not needed any more
351  #ifdef UNICODE_CHARS
352  DEALLOC( filename );
353  #endif
354 
355  // allocate buffer
356  buffer = ALLOC(*filesize+1);
357  #ifdef SAFEMODE
358  if( !buffer )
359  {
360  fclose( file );
361  errno = ENOMEM;
362  return NULL;
363  }
364  #endif //SAFEMODE
365 
366  // read file into the buffer
367  if( *filesize && !fread(buffer,1,*filesize,file ) )
368  {
369  #ifdef SAFEMODE
370  DEALLOC( buffer );
371  return NULL;
372  #endif //SAFEMODE
373  }
374 
375  fclose( file );
376  #ifdef SAFEMODE
377  if( errno )
378  {
379  free( buffer );
380  return NULL;
381  }
382  #endif //SAFEMODE
383 
384  return buffer;
385 }
386 
387 
388 
389 
390 
391 

[ HOME | INDEX | ATOMS | VARS | REFERENCE ]
Lhogho Developer's Documentation
Wed Jul 10 2013