Lhogho  0.0.028
 All Data Structures Files Functions Variables Typedefs Macros Pages
words.h File Reference

Go to the source code of this file.

Macros

#define STRING(x)   ((x)->unode.c[2])
 
#define WORD(x)   ((x)->unode.a[3])
 
#define LENGTH(x)   ((x)->unode.u[1] >> 8)
 
#define IDLENGTH(x)   ((x)->unode.u[1])
 
#define IS_WORD(x)   (ID(x)==WORD_ID)
 
#define IS_SUBWORD(x)   (ID(x)==SUBWORD_ID)
 
#define IS_ANY_WORD(x)   ((ID(x)==WORD_ID)||(ID(x)==SUBWORD_ID))
 
#define WORD_SIZE   256*256*256
 max word length (in chars) More...
 

Functions

atom_t new_word (chars_t string, uint_t length)
 creates a new word atom More...
 
atom_t new_subword (atom_t word, chars_t string, uint_t length)
 creates a new subword atom More...
 
atom_t create_word (int buff_len)
 Creates word with len buff_len and empty data. More...
 
void delete_word (atom_t a)
 deletes word atom More...
 
void delete_subword (atom_t a)
 deletes subword atom More...
 
void dump_word (atom_t a, int level)
 dumps (sub)word atom More...
 
atom_t read_word (chars_t filename)
 reads a word from ASCII, UTF-8 or UTF16-LE encoded file More...
 
void write_word (atom_t word, chars_t filename)
 writes a word to ASCII or UTF-16LE encoded file More...
 
int same_words (atom_t a, atom_t b)
 compares two words More...
 
atom_t decode_word (unsigned char *buffer, int size, int dealc)
 decodes ASCII, UTF-8 or UTF16-LE encoded text More...
 
int atom_to_boolean (atom_t a, int *np)
 convert atom to a boolean More...
 
int atom_to_string (atom_t a, chars_t buff, int *buff_len)
 convert atom to string More...
 
atom_t atom_to_word (atom_t data)
 convert atom to word or subword atom More...
 
atom_t atom_to_real_word (atom_t data)
 convert atom to word More...
 
int same_strings (int ci, chars_t sa, chars_t sb, int n)
 compares two strings More...
 

Variables

int full_print_p
 integer of FULLPRINTP More...
 
atom_t false_true [2]
 array holding words with FALSE and TRUE More...
 

Macro Definition Documentation

#define STRING (   x)    ((x)->unode.c[2])

Definition at line 35 of file words.h.

#define WORD (   x)    ((x)->unode.a[3])

Definition at line 36 of file words.h.

#define LENGTH (   x)    ((x)->unode.u[1] >> 8)

Definition at line 37 of file words.h.

#define IDLENGTH (   x)    ((x)->unode.u[1])

Definition at line 38 of file words.h.

#define IS_WORD (   x)    (ID(x)==WORD_ID)

Definition at line 39 of file words.h.

#define IS_SUBWORD (   x)    (ID(x)==SUBWORD_ID)

Definition at line 40 of file words.h.

#define IS_ANY_WORD (   x)    ((ID(x)==WORD_ID)||(ID(x)==SUBWORD_ID))

Definition at line 41 of file words.h.

#define WORD_SIZE   256*256*256

Definition at line 43 of file words.h.

Function Documentation

atom_t new_word ( chars_t  string,
uint_t  length 
)
Parameters
stringpointer to the first character of the word
lengthlength of the word
Returns
word atom

Creates a word atom with reference count 1. A NULL_CHAR is automatically appended to the end of the word.

Definition at line 191 of file words.c.

192 {
194 
195  if( length==UNKNOWN ) length = STRLEN(string);
196  REF(a) = 1;
197  IDLENGTH(a) = WORD_ID | (length<<8);
198  STRING(a) = (chars_t)ALLOC( (size_t)CHAR_SIZE*(length+1) );
199 
200  #ifdef SAFEMODE
201  assert( length<WORD_SIZE );
202  assert( STRING(a) );
203  #endif //SAFEMODE
204 
205  STRNCPY (STRING(a),string,length);
206  *(STRING(a)+length) = NULL_CHAR;
207 
208  #ifdef DEBUG_ATOM
209  printf("<ATOM> [%08x] word={"STR"}\n",(int)a,string);
210  #endif //DEBUG_ATOM
211 
212  #ifdef ADVANCED
213  stats[ID(a)].allocs++;
214  if( stats[ID(a)].max<(stats[ID(a)].allocs-stats[ID(a)].deallocs) )
215  stats[ID(a)].max=(stats[ID(a)].allocs-stats[ID(a)].deallocs);
216  stats_free--;
217  #endif //ADVANCED
218 
219  #ifdef DEBUG_RUNTIME_ATOMS
221  {
222  outter( TEXT("<RUNTIME> new "), -1 );
223  dump_atom_address( a );
224  dump_atom( a, 1 );
225  outter( TEXT("\n"), -1 );
226  }
227  #endif
228  #ifdef DEBUG_COMPILETIME_ATOMS
229  if( compiling_code )
230  {
231  outter( TEXT("<COMPILETIME> new "), -1 );
232  dump_atom_address( a );
233  dump_atom( a, 1 );
234  outter( TEXT("\n"), -1 );
235  }
236  #endif
237 
238  return a;
239 }
atom_t new_subword ( atom_t  word,
chars_t  string,
uint_t  length 
)
Parameters
wordword atom containing the host word of the word
stringpointer to the first character of the word
lengthlength of the word
Returns
subword atom

Creates a subword atom containing a substring of the string in a word atom. The reference count of the subword is 1, the reference count of the host word atom is incremented.

If word parameter is subword, then it is replaced by its hosting word.

Definition at line 124 of file words.c.

125 {
127 
128  #ifdef SAFEMODE
129  assert( word );
130  assert( length<WORD_SIZE );
131  #endif //SAFEMODE
132 
133  REF(a) = 1;
134  IDLENGTH(a) = SUBWORD_ID | (length << 8);
135  if( IS_SUBWORD(word) ) word = WORD(word);
136  WORD(a) = USE(word);
137  STRING(a) = string;
138 
139  #ifdef DEBUG_ATOM
140  printf("<ATOM> [%08x] subword=",(int)a);
141  char_t x;
142  x = *(string+length);
143  *(string+length) = NULL_CHAR;
144  printf("{"STR"}\n",string);
145  *(string+length) = x;
146  #endif //DEBUG_ATOM
147 
148  #ifdef ADVANCED
149  stats[ID(a)].allocs++;
150  if( stats[ID(a)].max<(stats[ID(a)].allocs-stats[ID(a)].deallocs) )
151  stats[ID(a)].max=(stats[ID(a)].allocs-stats[ID(a)].deallocs);
152  stats_free--;
153  #endif //ADVANCED
154 
155  #ifdef DEBUG_RUNTIME_ATOMS
157  {
158  outter( TEXT("<RUNTIME> new "), -1 );
159  dump_atom_address( a );
160  dump_atom( a, 1 );
161  outter( TEXT("\n"), -1 );
162  }
163  #endif
164  #ifdef DEBUG_COMPILETIME_ATOMS
165  if( compiling_code )
166  {
167  outter( TEXT("<COMPILETIME> new "), -1 );
168  dump_atom_address( a );
169  dump_atom( a, 1 );
170  outter( TEXT("\n"), -1 );
171  }
172  #endif
173 
174  return a;
175 }
atom_t create_word ( int  buff_len)
Returns
new word

Creates new word with given length and no data set.

Definition at line 720 of file words.c.

721 {
722  atom_t word;
723 
724 #ifdef SAFEMODE
725  assert (buff_len < WORD_SIZE);
726 #endif //SAFEMODE
727 
728  word = (atom_t)take_from_pool(&data_pool);
729 
730  REF(word) = 1;
731  IDLENGTH(word) = WORD_ID | (buff_len << 8);
732  STRING(word) = (chars_t)ALLOC((size_t)CHAR_SIZE * (buff_len+1));
733 
734  #ifdef SAFEMODE
735  assert (STRING(word));
736  #endif //SAFEMODE
737 
738  #ifdef DEBUG_ATOM
739  printf("<ATOM> [%08x] word with len %d \n", (int)word, buff_len);
740  #endif //DEBUG_ATOM
741 
742  #ifdef ADVANCED
743  stats[ID(word)].allocs++;
744  stats_free--;
745  #endif //ADVANCED
746 
747  return word;
748 }
void delete_word ( atom_t  a)
Parameters
aatom to delete

Deletes the word atom and deallocates memory used for the word's characters.

Definition at line 82 of file words.c.

83 {
84  DEALLOC( STRING(a) );
86 }
void delete_subword ( atom_t  a)
Parameters
aatom to delete

Deletes the subword atom and unlinks it from the host word.

Definition at line 99 of file words.c.

100 {
101  DEUSE( WORD(a) );
102  return_to_pool( &data_pool, a );
103 }
void dump_word ( atom_t  a,
int  level 
)
Parameters
aatom to dump
leveldump level

Dumps word or subword atom through the current outter function.

Definition at line 253 of file words.c.

254 {
255  // test for full print
256  if( full_print_p )
257  {
258  // count the number of dangerous characters
259  int length = LENGTH(a);
260  int dirty = (length == 0);
261 
262  chars_t chs;
263  for( chs=STRING(a); length && !dirty ; chs++,length-- )
264  {
265  char_t ch = DEBAR(*chs);
266  if( ch==TEXT('[') || ch==TEXT(']') ||
267  ch==TEXT('(') || ch==TEXT(')') ||
268  ch==TEXT('+') || ch==TEXT('-') ||
269  ch==TEXT('*') || ch==TEXT('\\') ||
270  ch==TEXT('<') || ch==TEXT('>') ||
271  ch==TEXT('=') || ch==TEXT(';') ||
272  ch==TEXT('|') || ch==TEXT('~') ||
273  ch<=TEXT(' ') )
274  dirty = 1;
275  }
276 
277  if( dirty )
278  {
279  length = LENGTH(a);
280  outter( TEXT("|"), 1 );
281  outter_size += 1;
282  for( chs=STRING(a); length; chs++,length-- )
283  {
284  char_t ch = DEBAR(*chs);
285  if( ch==TEXT('|') || ch==TEXT('\\') )
286  {
287  outter( TEXT("\\"), 1 );
288  outter_size += 1;
289  }
290  outter( chs, 1 );
291  outter_size += 1;
292  }
293  outter( TEXT("|"), 1 );
294  outter_size += 1;
295  }
296  else
297  {
298  outter( STRING(a), LENGTH(a) );
299  outter_size += LENGTH(a);
300  }
301  return;
302  }
303 
304 
305  // no full print
306  int limit = print_width_limit;
307 
308  if( 0<=limit && limit<10 ) limit=10;
309 
310  if( 0<=limit && limit<LENGTH(a) )
311  {
312  outter( STRING(a), limit );
313  outter( TEXT("..."), 3 );
314  outter_size += limit+3;
315  }
316  else
317  {
318  outter( STRING(a), LENGTH(a) );
319  outter_size += LENGTH(a);
320  }
321 }
atom_t read_word ( chars_t  filename)
Parameters
filenamefile name
Returns
word atom containing text from the file

Reads the contents of text file into a word atom. If Unicode mode is switched off, the file is assumed to be in ASCII, otherwise it is UTF-8 or UTF-16LE (little endian) depending on the BOM (byte order mark). In case of error its code is stored in global variable errno.

Definition at line 450 of file words.c.

451 {
452  int filesize;
453  unsigned char* buffer = (unsigned char*)load_file( filename, &filesize );
454 
455  if( errno ) return new_os_error( filename );
456 
457  return decode_word( buffer, filesize, 1 );
458 }
void write_word ( atom_t  word,
chars_t  wfilename 
)
Parameters
wordword to be written
wfilenamefile name

Writes the contents of a word atom to a file determined by its name. If Unicode mode is switched of, the encoding is ASCII, otherwise it is UTF-16LE (little endian) and the BOM (byte order mark) is included. In case of error its code is stored in global variable errno.

Definition at line 339 of file words.c.

340 {
341  FILE* file;
342  char* filename = FILENAME(wfilename);
343 
344  // open file
345  errno = 0;
346  file = fopen( filename, "wb" );
347 
348  // read file into the buffer
349  #ifdef UNICODE_CHARS
350  unsigned char buffer[2] = {0xFF,0xFE};
351  if( !errno ) fwrite( buffer, 2, 1, file );
352  #endif //UNICODE_CHARS
353  if( !errno ) fwrite( STRING(word), LENGTH(word), CHAR_SIZE, file );
354  if( !errno ) fclose( file );
355 
356  DEALLOC( filename );
357 }
int same_words ( atom_t  a,
atom_t  b 
)
Parameters
afirst word
bsecond word
Returns
0 if words are different, non zero - otherwise

Compares two word atoms case sensitive or insensitive depending on option_case_insensitive global flag.

Definition at line 509 of file words.c.

510 {
511 #ifdef SAFEMODE
512  assert( IS_WORD(a) || IS_SUBWORD(a) );
513  assert( IS_WORD(b) || IS_SUBWORD(b) );
514 #endif
515 
516  unsigned int n = LENGTH(a);
517  if( n!=LENGTH(b) ) return 0; // different sizes
518 
519  return same_strings( OPTION_CASE_INSENSITIVE, STRING(a), STRING(b), n );
520 }
atom_t decode_word ( unsigned char *  buffer,
int  size,
int  dealc 
)
Parameters
bufferbuffer containing raw data
sizesize of buffer
dealcdeallocate (=1) or not (=0) the input buffer
Returns
word atom containing decoded text

If Unicode mode is switched off, the raw data are assumed to be in ASCII, otherwise they are UTF-8 or UTF-16LE (little endian) depending on the BOM (byte order mark).

BOM signatures are:

  • UTF-16LE: FF FE
  • UTF-16BE: FE FF
  • UTF-8: EF BB BF

The input buffer is deallocated if dealc!=0

Definition at line 382 of file words.c.

383 {
384  #ifdef SAFEMODE
385  assert( buffer );
386  #endif //SAFEMODE
388  REF(a) = 1;
389 
390  #ifdef ADVANCED
391  stats[WORD_ID].allocs++;
392  stats_free--;
393  #endif //ADVANCED
394 
395  #ifdef UNICODE_CHARS
396  // UTF-16LE
397  if( buffer[0]==0xFF && buffer[1]==0xFE )
398  { // reusing the buffer from load_file
399  memcpy( &buffer[0], &buffer[2], size-2 );
400  buffer[size-2] = 0;
401  buffer[size-1] = 0;
402  IDLENGTH(a) = WORD_ID | (((size-2)>>1)<<8);
403  STRING(a) = (chars_t)buffer;
404  return a;
405  }
406  // UTF-8
407  if( buffer[0]==0xEF && buffer[1]==0xBB && buffer[2]==0xBF )
408  { // UTF-8 (header EF BB BF)
409  memcpy( &buffer[0], &buffer[3], size-3 );
410  buffer[size-3] = 0;
411  STRING(a) = UTF8_to_UTF16( buffer );
412  IDLENGTH(a) = WORD_ID | (wcslen(STRING(a)))<<8;
413  if( dealc ) DEALLOC( buffer );
414  return a;
415  }
416  // ASCII
417  buffer[size] = 0;
418  STRING(a) = ASCII_to_UTF16( (char*)buffer );
419  IDLENGTH(a) = WORD_ID | (STRLEN(STRING(a)))<<8;
420  if( dealc ) DEALLOC( buffer );
421 
422  #else //UNICODE_CHARS
423 
424  // ASCII
425  buffer[size] = 0;
426  STRING(a) = ASCII_to_ASCII( (char*)buffer );
427  IDLENGTH(a) = WORD_ID | (STRLEN(STRING(a)))<<8;
428  if( dealc ) DEALLOC( buffer );
429  #endif //UNICODE_CHARS
430 
431  return a;
432 }
int atom_to_boolean ( atom_t  a,
int *  np 
)
Parameters
aatom to convert
nppointer where to put the result
Returns
returns 1 if conversion was successful.

Converts an atom to a boolean integer number, i.e. 0 indicates FALSE and 1 (or non-zero) indicates TRUE. If conversion is possible, then *np will contain the result and the return value will be 1. Otherwise *np will be set to 0 and the return value will be set to 0.

Definition at line 538 of file words.c.

539 {
540  *np = 0;
541 
542  // check for FALSE atom
543  if( a == false_true[0] )
544  {
545  *np = 0;
546  return 1;
547  }
548 
549  // check for TRUE atom
550  if( a == false_true[1] )
551  {
552  *np = 1;
553  return 1;
554  }
555 
556  // eliminate non-words
557  if( !IS_ANY_WORD(a) )
558  {
559  return 0;
560  }
561 
562  // check for FALSE word
563  if( LENGTH(a)==LENGTH(false_true[0]) &&
564  same_strings( 1, STRING(a), STRING(false_true[0]), LENGTH(a)) )
565  {
566  *np = 0;
567  return 1;
568  }
569 
570  // check for TRUE word
571  if( LENGTH(a)==LENGTH(false_true[1]) &&
572  same_strings( 1, STRING(a), STRING(false_true[1]), LENGTH(a)) )
573  {
574  *np = 1;
575  return 1;
576  }
577 
578  return 0;
579 }
int atom_to_string ( atom_t  a,
chars_t  buff,
int *  buff_len 
)
Parameters
aatom to convert
buffbuffer where string will be written
buff_lenlength of the buffer
Returns
returns 1 if conversion was successful.

Converts an atom to a string. If buffer is too small string is truncated. Conversion is successfull if atom is word, subword or number. Else return 0

Definition at line 649 of file words.c.

650 {
651  // outter(TEXT("a2s atom=|"),-1);dump(a);outter(TEXT("|\n"),-1);
652 
653  if (IS_ANY_WORD(a))
654  {
655  int min = LENGTH(a) < *buff_len ? LENGTH(a) : *buff_len;
656  int i;
657  for(i = 0; i < min; ++ i)
658  {
659  buff[i] = STRING(a)[i];
660  }
661  *buff_len = min;
662  return 1;
663  }else
664  {
665  float64_t x;
666  if (!atom_to_float (a, &x))
667  {
668  return 0;
669  }
670  // In Window infinities are printed as "1.#INF" instead
671  // of just "inf", and "-1.#IND" instead of "nan". So let's fix this.
672  #ifdef WINDOWS
673  if(isinf(x)!=0 && x > 0)
674  {
675  STRNCPY(buff, TEXT("inf"), *buff_len-1);
676  if (*buff_len > 4) *buff_len = 3;
677  return 1;
678  }
679  if( isinf(x)!=0 && x < 0)
680  {
681  STRNCPY(buff, TEXT("-inf"), *buff_len-1);
682  if (*buff_len > 5) *buff_len = 4;
683  return 1;
684  }
685  if( isnan(x)!=0 )
686  {
687  STRNCPY(buff, TEXT("nan"), *buff_len-1);
688  if (*buff_len > 4) *buff_len = 3;
689  return 1;
690  }
691  #endif //WINDOWS
692 
693  if( (x>=1e23) || (x<=-1e23) || ((x>=-1e-23) && (x<=1e-23)) )
694  {
695  SPRINTF( buff, *buff_len, FORMAT_EXTRA_FP, x );
696  *buff_len = STRLEN(buff);
697  }
698  else
699  {
700  SPRINTF(buff, *buff_len, FORMAT_FP, x);
701  *buff_len = STRLEN(buff);
702  //outter(TEXT("a2s=|"),-1); outter(buff,*buff_len); outter(TEXT("|\n"),-1);
703  //atom_t nn=new_integer(*buff_len);
704  //outter(TEXT("len="),-1); dumpln(nn);
705  while (buff[*buff_len-1] == '0') --*buff_len;
706  if (buff[*buff_len-1] == '.') --*buff_len;
707  buff[*buff_len] = '\0';
708  }
709  return 1;
710  }
711 }
atom_t atom_to_word ( atom_t  data)
Parameters
dataatom to convert
Returns
returns word, subword or error atom

Converts an atom to a word or a subword atom. If the input is a (sub)word, then it is returned directly. If it is a number, then a new word is created. Otherwise an error atom is returned.

Definition at line 592 of file words.c.

593 {
594  if( IS_ERROR(data) ) return data;
595  if( IS_ANY_WORD(data) ) return USE(data);
596  if( IS_UNBOUND(data) ) return new_error( ERROR_NOT_A_WORD, data );
597  if( IS_INTEGER(data) || IS_FLOAT(data) )
598  {
599  #define BUFFER_SIZE 128
600  char_t ch[BUFFER_SIZE];
601  int len = BUFFER_SIZE;
602  if( atom_to_string(data,ch,&len) ) return new_word( ch, len );
603  #undef BUFFER_SIZE
604  }
605  return new_error( ERROR_NOT_A_WORD, data );
606 }
atom_t atom_to_real_word ( atom_t  data)
Parameters
dataatom to convert
Returns
returns word or error atom

Converts an atom to a word. This guarantees that the string in the word atom is zero-terminated. If the input is a word then it is reused. Otherwise a new word is created. Note that a new word is created even if the input is a subword.

Definition at line 622 of file words.c.

623 {
624  atom_t subword;
625  atom_t word = atom_to_word( data );
626  if( IS_SUBWORD(word) )
627  {
628  subword = word;
629  word = new_word( STRING(subword), LENGTH(subword) );
630  DEUSE( subword );
631  }
632  return word;
633 }
int same_strings ( int  ci,
chars_t  sa,
chars_t  sb,
int  n 
)
Parameters
ci1=case insensitive, 0=case sensitive
safirst string
sbsecond string
nstring length
Returns
0 if strings are different, non zero - otherwise

Compares two strings case sensitive or insensitive.

Definition at line 475 of file words.c.

476 {
477  if( ci )
478  // case sensitive A!=a
479  while( n-- )
480  {
481  if( TOLOWER(DEBAR(*sa)) != TOLOWER(DEBAR(*sb)) ) return 0;
482  sa++;
483  sb++;
484  }
485  else
486  // case sensitive A!=a
487  while( n-- )
488  {
489  if( DEBAR(*sa) != DEBAR(*sb) ) return 0;
490  sa++;
491  sb++;
492  }
493  return 1;
494 }

Variable Documentation

int full_print_p

Definition at line 70 of file words.c.

atom_t false_true[2]

Definition at line 71 of file words.c.


[ HOME | INDEX | ATOMS | VARS | REFERENCE ]
Lhogho Developer's Documentation
Wed Jul 10 2013