Lhogho  0.0.028
 All Data Structures Files Functions Variables Typedefs Macros Pages
words.c
Go to the documentation of this file.
1 //
2 // Project: Lhogho
3 // File: words.c
4 //
5 // Copyright (C) 2007 P.Boytchev
6 //
7 // Revision history:
8 // 2007-06-09 - file created from atoms.c
9 // 2007-06-12 - fixed bug #1735808 (Linux executables crash)
10 // 2007-06-13 - fixed bug #1736021 Alloc/dealloc statistics
11 // 2007-06-17 - fixed bug #1738571 Invalid pointer in Ubuntu binary
12 // 2007-09-01 - FULLPRINTP
13 // - atom_to_boolean()
14 // 2007-09-14 - atom_to_string()
15 // 2007-10-06 - create_word()
16 // 2007-10-21 - atom_to_string() fixed trailing zeros removed
17 // 2007-11-06 - atom_to_word()
18 // 2007-12-02 - fixed dump_word for barred characters
19 // 2008-04-18 - atom_to_boolean() is always case-insensitive
20 // 2008-04-28 - fixed atom_to_word() to use FORMAT_FP
21 // fixed bug #1953207 In Fedora 4 tests fail
22 // 2012-01-02 - Command-line options are translatable
23 //
24 //
25 // This program is free software; you can redistribute it and/or modify
26 // it under the terms of the GNU General Public License as published by
27 // the Free Software Foundation; either version 2 of the License, or
28 // (at your option) any later version.
29 //
30 // This program is distributed in the hope that it will be useful,
31 // but WITHOUT ANY WARRANTY; without even the implied warranty of
32 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
33 // GNU General Public License for more details.
34 //
35 // You should have received a copy of the GNU General Public License
36 // along with this program; if not, write to the Free Software
37 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
38 //
39 
40 
41 
42 #undef __STRICT_ANSI__
43 
44 
45 #include <stdio.h> // fopen(), fread(), fclose()
46 #include <errno.h> // errno, ENOMEM
47 #include <assert.h>
48 #include <stdlib.h> // malloc()
49 #include <string.h>
50 #include <math.h> // isinf()
51 #include "globals.h"
52 #ifdef UNICODE_CHARS
53  #include <wctype.h>
54  #include <wchar.h> // UNICODE support: wcslen()
55 #endif //UNICODE_CHARS
56 
57 #include "atoms.h"
58 #include "pools.h"
59 #include "words.h"
60 #include "lists.h"
61 #include "errors.h"
62 #include "options.h"
63 #include "numbers.h"
64 #include "unicode.h"
65 #include "vars.h"
66 #include "compiler.h"
67 #include "parser.h"
68 
69 
70 int full_print_p = 0;
72 
73 
74 //===================================================
81 //===================================================
83 {
84  DEALLOC( STRING(a) );
86 }
87 
88 
89 
90 
91 //===================================================
98 //===================================================
100 {
101  DEUSE( WORD(a) );
102  return_to_pool( &data_pool, a );
103 }
104 
105 
106 
107 
108 //===================================================
123 //===================================================
124 atom_t new_subword( atom_t word, chars_t string, uint_t length)
125 {
127 
128  #ifdef SAFEMODE
129  assert( word );
130  assert( length<WORD_SIZE );
131  #endif //SAFEMODE
132 
133  REF(a) = 1;
134  IDLENGTH(a) = SUBWORD_ID | (length << 8);
135  if( IS_SUBWORD(word) ) word = WORD(word);
136  WORD(a) = USE(word);
137  STRING(a) = string;
138 
139  #ifdef DEBUG_ATOM
140  printf("<ATOM> [%08x] subword=",(int)a);
141  char_t x;
142  x = *(string+length);
143  *(string+length) = NULL_CHAR;
144  printf("{"STR"}\n",string);
145  *(string+length) = x;
146  #endif //DEBUG_ATOM
147 
148  #ifdef ADVANCED
149  stats[ID(a)].allocs++;
150  if( stats[ID(a)].max<(stats[ID(a)].allocs-stats[ID(a)].deallocs) )
151  stats[ID(a)].max=(stats[ID(a)].allocs-stats[ID(a)].deallocs);
152  stats_free--;
153  #endif //ADVANCED
154 
155  #ifdef DEBUG_RUNTIME_ATOMS
157  {
158  outter( TEXT("<RUNTIME> new "), -1 );
159  dump_atom_address( a );
160  dump_atom( a, 1 );
161  outter( TEXT("\n"), -1 );
162  }
163  #endif
164  #ifdef DEBUG_COMPILETIME_ATOMS
165  if( compiling_code )
166  {
167  outter( TEXT("<COMPILETIME> new "), -1 );
168  dump_atom_address( a );
169  dump_atom( a, 1 );
170  outter( TEXT("\n"), -1 );
171  }
172  #endif
173 
174  return a;
175 }
176 
177 
178 
179 
180 //===================================================
190 //===================================================
191 atom_t new_word( chars_t string, uint_t length)
192 {
194 
195  if( length==UNKNOWN ) length = STRLEN(string);
196  REF(a) = 1;
197  IDLENGTH(a) = WORD_ID | (length<<8);
198  STRING(a) = (chars_t)ALLOC( (size_t)CHAR_SIZE*(length+1) );
199 
200  #ifdef SAFEMODE
201  assert( length<WORD_SIZE );
202  assert( STRING(a) );
203  #endif //SAFEMODE
204 
205  STRNCPY (STRING(a),string,length);
206  *(STRING(a)+length) = NULL_CHAR;
207 
208  #ifdef DEBUG_ATOM
209  printf("<ATOM> [%08x] word={"STR"}\n",(int)a,string);
210  #endif //DEBUG_ATOM
211 
212  #ifdef ADVANCED
213  stats[ID(a)].allocs++;
214  if( stats[ID(a)].max<(stats[ID(a)].allocs-stats[ID(a)].deallocs) )
215  stats[ID(a)].max=(stats[ID(a)].allocs-stats[ID(a)].deallocs);
216  stats_free--;
217  #endif //ADVANCED
218 
219  #ifdef DEBUG_RUNTIME_ATOMS
221  {
222  outter( TEXT("<RUNTIME> new "), -1 );
223  dump_atom_address( a );
224  dump_atom( a, 1 );
225  outter( TEXT("\n"), -1 );
226  }
227  #endif
228  #ifdef DEBUG_COMPILETIME_ATOMS
229  if( compiling_code )
230  {
231  outter( TEXT("<COMPILETIME> new "), -1 );
232  dump_atom_address( a );
233  dump_atom( a, 1 );
234  outter( TEXT("\n"), -1 );
235  }
236  #endif
237 
238  return a;
239 }
240 
241 
242 
243 
244 //===================================================
252 //===================================================
253 void dump_word( atom_t a, int level )
254 {
255  // test for full print
256  if( full_print_p )
257  {
258  // count the number of dangerous characters
259  int length = LENGTH(a);
260  int dirty = (length == 0);
261 
262  chars_t chs;
263  for( chs=STRING(a); length && !dirty ; chs++,length-- )
264  {
265  char_t ch = DEBAR(*chs);
266  if( ch==TEXT('[') || ch==TEXT(']') ||
267  ch==TEXT('(') || ch==TEXT(')') ||
268  ch==TEXT('+') || ch==TEXT('-') ||
269  ch==TEXT('*') || ch==TEXT('\\') ||
270  ch==TEXT('<') || ch==TEXT('>') ||
271  ch==TEXT('=') || ch==TEXT(';') ||
272  ch==TEXT('|') || ch==TEXT('~') ||
273  ch<=TEXT(' ') )
274  dirty = 1;
275  }
276 
277  if( dirty )
278  {
279  length = LENGTH(a);
280  outter( TEXT("|"), 1 );
281  outter_size += 1;
282  for( chs=STRING(a); length; chs++,length-- )
283  {
284  char_t ch = DEBAR(*chs);
285  if( ch==TEXT('|') || ch==TEXT('\\') )
286  {
287  outter( TEXT("\\"), 1 );
288  outter_size += 1;
289  }
290  outter( chs, 1 );
291  outter_size += 1;
292  }
293  outter( TEXT("|"), 1 );
294  outter_size += 1;
295  }
296  else
297  {
298  outter( STRING(a), LENGTH(a) );
299  outter_size += LENGTH(a);
300  }
301  return;
302  }
303 
304 
305  // no full print
306  int limit = print_width_limit;
307 
308  if( 0<=limit && limit<10 ) limit=10;
309 
310  if( 0<=limit && limit<LENGTH(a) )
311  {
312  outter( STRING(a), limit );
313  outter( TEXT("..."), 3 );
314  outter_size += limit+3;
315  }
316  else
317  {
318  outter( STRING(a), LENGTH(a) );
319  outter_size += LENGTH(a);
320  }
321 }
322 
323 
324 
325 
326 //===================================================
338 //===================================================
339 void write_word( atom_t word, chars_t wfilename )
340 {
341  FILE* file;
342  char* filename = FILENAME(wfilename);
343 
344  // open file
345  errno = 0;
346  file = fopen( filename, "wb" );
347 
348  // read file into the buffer
349  #ifdef UNICODE_CHARS
350  unsigned char buffer[2] = {0xFF,0xFE};
351  if( !errno ) fwrite( buffer, 2, 1, file );
352  #endif //UNICODE_CHARS
353  if( !errno ) fwrite( STRING(word), LENGTH(word), CHAR_SIZE, file );
354  if( !errno ) fclose( file );
355 
356  DEALLOC( filename );
357 }
358 
359 
360 
361 
362 //===================================================
381 //===================================================
382 atom_t decode_word( unsigned char* buffer, int size, int dealc )
383 {
384  #ifdef SAFEMODE
385  assert( buffer );
386  #endif //SAFEMODE
388  REF(a) = 1;
389 
390  #ifdef ADVANCED
391  stats[WORD_ID].allocs++;
392  stats_free--;
393  #endif //ADVANCED
394 
395  #ifdef UNICODE_CHARS
396  // UTF-16LE
397  if( buffer[0]==0xFF && buffer[1]==0xFE )
398  { // reusing the buffer from load_file
399  memcpy( &buffer[0], &buffer[2], size-2 );
400  buffer[size-2] = 0;
401  buffer[size-1] = 0;
402  IDLENGTH(a) = WORD_ID | (((size-2)>>1)<<8);
403  STRING(a) = (chars_t)buffer;
404  return a;
405  }
406  // UTF-8
407  if( buffer[0]==0xEF && buffer[1]==0xBB && buffer[2]==0xBF )
408  { // UTF-8 (header EF BB BF)
409  memcpy( &buffer[0], &buffer[3], size-3 );
410  buffer[size-3] = 0;
411  STRING(a) = UTF8_to_UTF16( buffer );
412  IDLENGTH(a) = WORD_ID | (wcslen(STRING(a)))<<8;
413  if( dealc ) DEALLOC( buffer );
414  return a;
415  }
416  // ASCII
417  buffer[size] = 0;
418  STRING(a) = ASCII_to_UTF16( (char*)buffer );
419  IDLENGTH(a) = WORD_ID | (STRLEN(STRING(a)))<<8;
420  if( dealc ) DEALLOC( buffer );
421 
422  #else //UNICODE_CHARS
423 
424  // ASCII
425  buffer[size] = 0;
426  STRING(a) = ASCII_to_ASCII( (char*)buffer );
427  IDLENGTH(a) = WORD_ID | (STRLEN(STRING(a)))<<8;
428  if( dealc ) DEALLOC( buffer );
429  #endif //UNICODE_CHARS
430 
431  return a;
432 }
433 
434 
435 
436 
437 //===================================================
449 //===================================================
451 {
452  int filesize;
453  unsigned char* buffer = (unsigned char*)load_file( filename, &filesize );
454 
455  if( errno ) return new_os_error( filename );
456 
457  return decode_word( buffer, filesize, 1 );
458 }
459 
460 
461 
462 
463 
464 //===========================================================
474 //===========================================================
475 int same_strings( int ci, chars_t sa, chars_t sb, int n )
476 {
477  if( ci )
478  // case sensitive A!=a
479  while( n-- )
480  {
481  if( TOLOWER(DEBAR(*sa)) != TOLOWER(DEBAR(*sb)) ) return 0;
482  sa++;
483  sb++;
484  }
485  else
486  // case sensitive A!=a
487  while( n-- )
488  {
489  if( DEBAR(*sa) != DEBAR(*sb) ) return 0;
490  sa++;
491  sb++;
492  }
493  return 1;
494 }
495 
496 
497 
498 
499 //===========================================================
508 //===========================================================
510 {
511 #ifdef SAFEMODE
512  assert( IS_WORD(a) || IS_SUBWORD(a) );
513  assert( IS_WORD(b) || IS_SUBWORD(b) );
514 #endif
515 
516  unsigned int n = LENGTH(a);
517  if( n!=LENGTH(b) ) return 0; // different sizes
518 
519  return same_strings( OPTION_CASE_INSENSITIVE, STRING(a), STRING(b), n );
520 }
521 
522 
523 
524 //===================================================
537 //===================================================
538 int atom_to_boolean( atom_t a, int* np )
539 {
540  *np = 0;
541 
542  // check for FALSE atom
543  if( a == false_true[0] )
544  {
545  *np = 0;
546  return 1;
547  }
548 
549  // check for TRUE atom
550  if( a == false_true[1] )
551  {
552  *np = 1;
553  return 1;
554  }
555 
556  // eliminate non-words
557  if( !IS_ANY_WORD(a) )
558  {
559  return 0;
560  }
561 
562  // check for FALSE word
563  if( LENGTH(a)==LENGTH(false_true[0]) &&
564  same_strings( 1, STRING(a), STRING(false_true[0]), LENGTH(a)) )
565  {
566  *np = 0;
567  return 1;
568  }
569 
570  // check for TRUE word
571  if( LENGTH(a)==LENGTH(false_true[1]) &&
572  same_strings( 1, STRING(a), STRING(false_true[1]), LENGTH(a)) )
573  {
574  *np = 1;
575  return 1;
576  }
577 
578  return 0;
579 }
580 
581 //===================================================
591 //===================================================
593 {
594  if( IS_ERROR(data) ) return data;
595  if( IS_ANY_WORD(data) ) return USE(data);
596  if( IS_UNBOUND(data) ) return new_error( ERROR_NOT_A_WORD, data );
597  if( IS_INTEGER(data) || IS_FLOAT(data) )
598  {
599  #define BUFFER_SIZE 128
600  char_t ch[BUFFER_SIZE];
601  int len = BUFFER_SIZE;
602  if( atom_to_string(data,ch,&len) ) return new_word( ch, len );
603  #undef BUFFER_SIZE
604  }
605  return new_error( ERROR_NOT_A_WORD, data );
606 }
607 
608 
609 
610 //===================================================
621 //===================================================
623 {
624  atom_t subword;
625  atom_t word = atom_to_word( data );
626  if( IS_SUBWORD(word) )
627  {
628  subword = word;
629  word = new_word( STRING(subword), LENGTH(subword) );
630  DEUSE( subword );
631  }
632  return word;
633 }
634 
635 
636 
637 //===================================================
648 //===================================================
649 int atom_to_string(atom_t a, chars_t buff, int * buff_len)
650 {
651  // outter(TEXT("a2s atom=|"),-1);dump(a);outter(TEXT("|\n"),-1);
652 
653  if (IS_ANY_WORD(a))
654  {
655  int min = LENGTH(a) < *buff_len ? LENGTH(a) : *buff_len;
656  int i;
657  for(i = 0; i < min; ++ i)
658  {
659  buff[i] = STRING(a)[i];
660  }
661  *buff_len = min;
662  return 1;
663  }else
664  {
665  float64_t x;
666  if (!atom_to_float (a, &x))
667  {
668  return 0;
669  }
670  // In Window infinities are printed as "1.#INF" instead
671  // of just "inf", and "-1.#IND" instead of "nan". So let's fix this.
672  #ifdef WINDOWS
673  if(isinf(x)!=0 && x > 0)
674  {
675  STRNCPY(buff, TEXT("inf"), *buff_len-1);
676  if (*buff_len > 4) *buff_len = 3;
677  return 1;
678  }
679  if( isinf(x)!=0 && x < 0)
680  {
681  STRNCPY(buff, TEXT("-inf"), *buff_len-1);
682  if (*buff_len > 5) *buff_len = 4;
683  return 1;
684  }
685  if( isnan(x)!=0 )
686  {
687  STRNCPY(buff, TEXT("nan"), *buff_len-1);
688  if (*buff_len > 4) *buff_len = 3;
689  return 1;
690  }
691  #endif //WINDOWS
692 
693  if( (x>=1e23) || (x<=-1e23) || ((x>=-1e-23) && (x<=1e-23)) )
694  {
695  SPRINTF( buff, *buff_len, FORMAT_EXTRA_FP, x );
696  *buff_len = STRLEN(buff);
697  }
698  else
699  {
700  SPRINTF(buff, *buff_len, FORMAT_FP, x);
701  *buff_len = STRLEN(buff);
702  //outter(TEXT("a2s=|"),-1); outter(buff,*buff_len); outter(TEXT("|\n"),-1);
703  //atom_t nn=new_integer(*buff_len);
704  //outter(TEXT("len="),-1); dumpln(nn);
705  while (buff[*buff_len-1] == '0') --*buff_len;
706  if (buff[*buff_len-1] == '.') --*buff_len;
707  buff[*buff_len] = '\0';
708  }
709  return 1;
710  }
711 }
712 
713 //===================================================
719 //===================================================
720 atom_t create_word(int buff_len)
721 {
722  atom_t word;
723 
724 #ifdef SAFEMODE
725  assert (buff_len < WORD_SIZE);
726 #endif //SAFEMODE
727 
728  word = (atom_t)take_from_pool(&data_pool);
729 
730  REF(word) = 1;
731  IDLENGTH(word) = WORD_ID | (buff_len << 8);
732  STRING(word) = (chars_t)ALLOC((size_t)CHAR_SIZE * (buff_len+1));
733 
734  #ifdef SAFEMODE
735  assert (STRING(word));
736  #endif //SAFEMODE
737 
738  #ifdef DEBUG_ATOM
739  printf("<ATOM> [%08x] word with len %d \n", (int)word, buff_len);
740  #endif //DEBUG_ATOM
741 
742  #ifdef ADVANCED
743  stats[ID(word)].allocs++;
744  stats_free--;
745  #endif //ADVANCED
746 
747  return word;
748 }

[ HOME | INDEX | ATOMS | VARS | REFERENCE ]
Lhogho Developer's Documentation
Wed Jul 10 2013