Lhogho  0.0.028
 All Data Structures Files Functions Variables Typedefs Macros Pages
parser.c File Reference

Go to the source code of this file.

Macros

#define GET_CHAR   ch=*source
 get current character More...
 
#define VAR   vars[count]
 shortens access to vars array More...
 
#define LINK
 
#define RELINK
 

Tokentypes

#define TOKEN_END   0
 no more tokens available More...
 
#define TOKEN_WORD   1
 normal word token More...
 
#define TOKEN_DIRTY_WORD   2
 word token with unhandled backslashs or bars More...
 
#define TOKEN_OPEN   3
 open square bracket token More...
 
#define TOKEN_CLOSE   4
 close square bracket token More...
 
#define TOKEN_LINEEND   5
 end of line token More...
 
#define TOKEN_SPACE   6
 whitespace token More...
 
#define TOKEN_ERROR   7
 error occured More...
 

Parse modes

#define MODE_ENTRY   0
 entry mode More...
 
#define MODE_SPACE   1
 space mode More...
 
#define MODE_WORD   2
 word mode More...
 
#define MODE_BARRED   3
 barred mode More...
 
#define MODE_BACKSLASHED   4
 backslashed mode More...
 
#define MODE_TILDE   5
 tilde mode More...
 
#define MODE_SEMITILDE   6
 semitilde mode More...
 
#define MODE_SEMICOLON   7
 semicolon mode More...
 
#define MODE_TILDESPACE   8
 tildespace mode More...
 
#define MODE_LESS   9
 less mode More...
 
#define MODE_GREATER   10
 greater mode More...
 
#define MAX_MODE   11
 greatest mode + 1 More...
 

Microprogram defines (basic)

#define PAT_TILDE   0x000001
 push tilde More...
 
#define PAT_PUSH   0x000002
 push last character More...
 
#define PAT_NEXT   0x000004
 goto next character More...
 
#define PAT_DIRTY   0x000008
 set dirty flag More...
 
#define PAT_MUTATED   0x000010
 set mutated flag More...
 
#define PAT_RETURN_TO_WORD   0x000020
 next return will return to word mode More...
 
#define PAT_RETURN_TO_SELF   0x000040
 next return will return to current mode More...
 
#define PAT_GOTO   0x000080
 follows 4bit go-to-mode More...
 
#define PAT_TOKEN   0x000100
 follows 4bit token_id More...
 
#define PAT_RETURN   0x000200
 return to stack-to-mode More...
 
#define PAT_ERROR_POS   0x000400
 remember current position as error position More...
 
#define PAT_ERROR   0x000800
 generate error More...
 
#define PAT_RETURN_TO_CALLER   0x001000
 next return will return to the caller of this mode More...
 
#define PAT_NEXT2   0x002000
 goto next next character More...
 

Microprogram defines (compound)

#define PAT_SHIFT   16
 shift factor More...
 
#define PAT_TOKEN_END   PAT_TOKEN+(TOKEN_END<<PAT_SHIFT)
 microprogram for end token More...
 
#define PAT_TOKEN_SPACE   PAT_TOKEN+(TOKEN_SPACE<<PAT_SHIFT)
 microprogram for space token More...
 
#define PAT_TOKEN_WORD   PAT_TOKEN+(TOKEN_WORD<<PAT_SHIFT)
 microprogram for word token More...
 
#define PAT_TOKEN_LINE   PAT_TOKEN+(TOKEN_LINEEND<<PAT_SHIFT)
 microprogram for line end token More...
 
#define PAT_TOKEN_OPEN   PAT_TOKEN+(TOKEN_OPEN<<PAT_SHIFT)
 microprogram for open token More...
 
#define PAT_TOKEN_CLOSE   PAT_TOKEN+(TOKEN_CLOSE<<PAT_SHIFT)
 microprogram for close token More...
 
#define PAT_GOTO_SPACE   PAT_GOTO+(MODE_SPACE<<PAT_SHIFT)
 microprogram for goto space mode More...
 
#define PAT_GOTO_WORD   PAT_GOTO+(MODE_WORD<<PAT_SHIFT)
 microprogram for goto word mode More...
 
#define PAT_GOTO_BACKSLASHED   PAT_GOTO+(MODE_BACKSLASHED<<PAT_SHIFT)
 microprogram for goto backslashed mode More...
 
#define PAT_GOTO_BARRED   PAT_GOTO+(MODE_BARRED<<PAT_SHIFT)
 microprogram for goto barred mode More...
 
#define PAT_GOTO_SEMICOLON   PAT_GOTO+(MODE_SEMICOLON<<PAT_SHIFT)
 microprogram for goto semicolon mode More...
 
#define PAT_GOTO_TILDE   PAT_GOTO+(MODE_TILDE<<PAT_SHIFT)
 microprogram for goto tilde mode More...
 
#define PAT_GOTO_TILDESPACE   PAT_GOTO+(MODE_TILDESPACE<<PAT_SHIFT)
 microprogram for goto tildespace mode More...
 
#define PAT_GOTO_SEMITILDE   PAT_GOTO+(MODE_SEMITILDE<<PAT_SHIFT)
 microprogram for goto semitilde mode More...
 
#define PAT_GOTO_LESS   PAT_GOTO+(MODE_LESS<<PAT_SHIFT)
 microprogram for goto less mode More...
 
#define PAT_GOTO_GREATER   PAT_GOTO+(MODE_GREATER<<PAT_SHIFT)
 microprogram for goto greater mode More...
 

Special characters

When a chracter is barrable and is actually barred then its code is changed. For example codes for space and barred space are different for Lhogho, but should appear the same for users.

Barrable characters are ()+-

#define MAX_ELEMS   128
 maximal number of elements in a statement More...
 
char_t debar [32]
 table for a->|a| conversions More...
 
atom_t build_syntax_subtree (atom_t function, atom_t source)
 parses sublist of commands More...
 
void init_parser ()
 initializes parser More...
 
atom_t tokenize (atom_t input, int method)
 tokenizes into a list More...
 
atom_t trim_shell_comment (atom_t word)
 trims shell comment (if any) More...
 
atom_t purify (atom_t word)
 purifies a word More...
 
atom_t parentheses (atom_t input)
 processes parentheses More...
 
atom_t define_user_function (atom_t source, atom_t input, atom_t parent)
 creates var atom for a user-defined function More...
 
atom_t preparse (atom_t input, atom_t parent, int level)
 extracts all TO ... END definitions More...
 
atom_t parse (atom_t input, atom_t parent, int top_level)
 parses a list into a tree More...
 
atom_t build_syntax_tree (atom_t func)
 parses body of user-defined function More...
 

Macro Definition Documentation

#define GET_CHAR   ch=*source

Definition at line 112 of file parser.c.

#define VAR   vars[count]

Definition at line 113 of file parser.c.

#define TOKEN_END   0

Definition at line 122 of file parser.c.

#define TOKEN_WORD   1

Definition at line 123 of file parser.c.

#define TOKEN_DIRTY_WORD   2

Definition at line 124 of file parser.c.

#define TOKEN_OPEN   3

Definition at line 125 of file parser.c.

#define TOKEN_CLOSE   4

Definition at line 126 of file parser.c.

#define TOKEN_LINEEND   5

Definition at line 127 of file parser.c.

#define TOKEN_SPACE   6

Definition at line 128 of file parser.c.

#define TOKEN_ERROR   7

Definition at line 129 of file parser.c.

#define MODE_ENTRY   0

Definition at line 137 of file parser.c.

#define MODE_SPACE   1

Definition at line 138 of file parser.c.

#define MODE_WORD   2

Definition at line 139 of file parser.c.

#define MODE_BARRED   3

Definition at line 140 of file parser.c.

#define MODE_BACKSLASHED   4

Definition at line 141 of file parser.c.

#define MODE_TILDE   5

Definition at line 142 of file parser.c.

#define MODE_SEMITILDE   6

Definition at line 143 of file parser.c.

#define MODE_SEMICOLON   7

Definition at line 144 of file parser.c.

#define MODE_TILDESPACE   8

Definition at line 145 of file parser.c.

#define MODE_LESS   9

Definition at line 146 of file parser.c.

#define MODE_GREATER   10

Definition at line 147 of file parser.c.

#define MAX_MODE   11

Definition at line 148 of file parser.c.

#define PAT_TILDE   0x000001

Definition at line 156 of file parser.c.

#define PAT_PUSH   0x000002

Definition at line 157 of file parser.c.

#define PAT_NEXT   0x000004

Definition at line 158 of file parser.c.

#define PAT_DIRTY   0x000008

Definition at line 159 of file parser.c.

#define PAT_MUTATED   0x000010

Definition at line 160 of file parser.c.

#define PAT_RETURN_TO_WORD   0x000020

Definition at line 161 of file parser.c.

#define PAT_RETURN_TO_SELF   0x000040

Definition at line 162 of file parser.c.

#define PAT_GOTO   0x000080

Definition at line 163 of file parser.c.

#define PAT_TOKEN   0x000100

Definition at line 164 of file parser.c.

#define PAT_RETURN   0x000200

Definition at line 165 of file parser.c.

#define PAT_ERROR_POS   0x000400

Definition at line 166 of file parser.c.

#define PAT_ERROR   0x000800

Definition at line 167 of file parser.c.

#define PAT_RETURN_TO_CALLER   0x001000

Definition at line 168 of file parser.c.

#define PAT_NEXT2   0x002000

Definition at line 169 of file parser.c.

#define PAT_SHIFT   16

Definition at line 178 of file parser.c.

#define PAT_TOKEN_END   PAT_TOKEN+(TOKEN_END<<PAT_SHIFT)

Definition at line 180 of file parser.c.

#define PAT_TOKEN_SPACE   PAT_TOKEN+(TOKEN_SPACE<<PAT_SHIFT)

Definition at line 181 of file parser.c.

#define PAT_TOKEN_WORD   PAT_TOKEN+(TOKEN_WORD<<PAT_SHIFT)

Definition at line 182 of file parser.c.

#define PAT_TOKEN_LINE   PAT_TOKEN+(TOKEN_LINEEND<<PAT_SHIFT)

Definition at line 183 of file parser.c.

#define PAT_TOKEN_OPEN   PAT_TOKEN+(TOKEN_OPEN<<PAT_SHIFT)

Definition at line 184 of file parser.c.

#define PAT_TOKEN_CLOSE   PAT_TOKEN+(TOKEN_CLOSE<<PAT_SHIFT)

Definition at line 185 of file parser.c.

#define PAT_GOTO_SPACE   PAT_GOTO+(MODE_SPACE<<PAT_SHIFT)

Definition at line 187 of file parser.c.

#define PAT_GOTO_WORD   PAT_GOTO+(MODE_WORD<<PAT_SHIFT)

Definition at line 188 of file parser.c.

#define PAT_GOTO_BACKSLASHED   PAT_GOTO+(MODE_BACKSLASHED<<PAT_SHIFT)

Definition at line 189 of file parser.c.

#define PAT_GOTO_BARRED   PAT_GOTO+(MODE_BARRED<<PAT_SHIFT)

Definition at line 190 of file parser.c.

#define PAT_GOTO_SEMICOLON   PAT_GOTO+(MODE_SEMICOLON<<PAT_SHIFT)

Definition at line 191 of file parser.c.

#define PAT_GOTO_TILDE   PAT_GOTO+(MODE_TILDE<<PAT_SHIFT)

Definition at line 192 of file parser.c.

#define PAT_GOTO_TILDESPACE   PAT_GOTO+(MODE_TILDESPACE<<PAT_SHIFT)

Definition at line 193 of file parser.c.

#define PAT_GOTO_SEMITILDE   PAT_GOTO+(MODE_SEMITILDE<<PAT_SHIFT)

Definition at line 194 of file parser.c.

#define PAT_GOTO_LESS   PAT_GOTO+(MODE_LESS<<PAT_SHIFT)

Definition at line 195 of file parser.c.

#define PAT_GOTO_GREATER   PAT_GOTO+(MODE_GREATER<<PAT_SHIFT)

Definition at line 196 of file parser.c.

#define MAX_ELEMS   128

Definition at line 1631 of file parser.c.

#define LINK
Value:
{ \
usedby[j] = i; \
args[i]--; \
}
#define RELINK
Value:
{ \
if( j<usedby[j] ) { largs[usedby[j]]++; finished=0; } \
if( j>usedby[j] ) { rargs[usedby[j]]++; finished=0; } \
LINK; \
}

Function Documentation

atom_t build_syntax_subtree ( atom_t  function,
atom_t  source 
)
Parameters
functionvar atom for the parse context
sourceword or list containing the source
Returns
abstract syntax tree or an error atom

Parses a list of commands (like these in IF or REPEAT) and build an abstract syntax tree:

  • tokenization of body as commands
  • extracting all TO ... ENDs and create them as subfunctions
  • parsing the func's body into abstract syntax tree
  • recursively build trees of subfunctions

Definition at line 2356 of file parser.c.

2357 {
2358  //printf("BUILD_SYNTAX_SUBTREE(");
2359  //dump(NAME(function));
2360  //printf(",");
2361  //dump(source);
2362  //printf(")\n\n");
2363 
2364  // tokenize
2365  atom_t tokens1 = tokenize( source, TOKENIZE_DATA );
2366  if( IS_ERROR(tokens1) ) return tokens1;
2367  //printf("tokens1="); dumpln(tokens1);
2368 
2369  atom_t tokens2 = tokenize( tokens1, TOKENIZE_COMMANDS );
2370  DEUSE( tokens1 );
2371  if( IS_ERROR(tokens2) ) return tokens2;
2372  //printf("tokens2="); dumpln(tokens2);
2373 
2374  // extract TO..END's
2375  atom_t body = preparse( tokens2, function, LEVEL(function) );
2376  if( IS_ERROR(body) ) return body;
2377  //printf("body="); dumpln(body);
2378 
2379  // parse function body
2380  atom_t tree = parse( body, function, 1 );
2381  if( IS_ERROR(tree) ) return tree;
2382  //printf("tree="); dumpln(tree);
2383 
2384  DEUSE( tokens2 );
2385  //DEUSE( body );
2386  return tree;
2387 }
void init_parser ( )

Initializes tables enbar[] and debar[] which are used to enbar and debar a character.

Definition at line 224 of file parser.c.

225 {
226  int i;
227 
228  // NOTE! if the string of enbarrable characters is
229  // changed, also change dump_word()
230  chars_t s = TEXT("______________()+-*/=<>|?_______");
231  //locked-> x........xx..x..................
232 
233  for( i=0; i<32; i++ ) debar[i]=i; // identity matrix
234  for( i=0; i<128; i++ ) enbar[i]=i; // identity matrix
235 
236  for( i=0; *s; i++,s++ )
237  {
238  if( *s==TEXT('_') ) continue;
239  debar[i] = *s;
240  enbar[(unsigned char)*s] = i;
241  }
242 }
atom_t tokenize ( atom_t  input,
int  method 
)
Parameters
inputword, subword or list to be tokenized
methodmethod of tokenization
Returns
tokenized list

Tokenizes a word, a subword or a list into a list. If possible makes all words as subwords. Backslashes and bars in words are preserved (i.e. words are not purified). Comments and line continuations are ignored.

If the method is TOKENIZE_DATA then the input is tokenized as if it contains Logo data. If the method is TOKENIZE_COMMANDS then the input is tokenized as if it contains Logo commands. If the method is TOKENIZE_READWORD then the input is tokenized as expected by READWORD function.

If the input is a list then all its elements are tokenized one-by-one.

Return value is the tokenized list. In there is an error, returns an error atom which error code is ERROR_INCOMPLETE_PAIR, error position points the position in the word (0-based) and the error source is the word itself.

Definition at line 275 of file parser.c.

276 {
277  if( IS_LIST(input) )
278  {
279  if( method==TOKENIZE_DATA &&
281  return USE(input);
282  if( method==TOKENIZE_COMMANDS &&
284  return USE(input);
285  }
286 
287  if( IS_FLOAT(input) )
288  {
289  return USE(input);
290  }
291 
292  //printf("\n");
293  //if(method==TOKENIZE_DATA)
294  // printf("ENTER TOKENIZE_DATA(");
295  //else
296  // printf("ENTER TOKENIZE_COMMANDS(");
297  //dump_atom(input,1);
298  //printf(")\n");
299 
300 
301  #ifdef SAFEMODE
302  assert( IS_WORD(input) || IS_SUBWORD(input) || IS_LIST(input) );
303  #endif
304 
305  // First check whether the input is a list.
306  // If it is then tokenize recursively all its elements.
307  if( IS_LIST(input) )
308  {
309  atom_t result = empty_list;
310  atom_t last = empty_list;
311  atom_t x;
312  atom_t y;
313  for( x=input; IS_NOT_EMPTY(x); x=CDR(x) )
314  {
315  //printf("@@@@@@@@PROCESSING=|"); dump_atom(CAR(x),1); printf("|\n");
316 
317  int submethod = IS_LIST(CAR(x))?TOKENIZE_DATA:method;
318  atom_t element = tokenize( CAR(x), submethod );
319  //printf("@@@@@@@@TOKENIZED_INTO=|"); dump_atom(element,1); printf("|\n");
320 
321  if( IS_ERROR(element) )
322  {
323  DEUSE( result );
324  //DEUSE( last );
325  result = element;
326  break;
327  }
328 
329  int initial_flags = GET_FLAGS( x, FLAG_NEWLINE|FLAG_AFTER_SPACE );
330  int final_flags = GET_FLAGS( x, FLAG_BEFORE_SPACE );
331 
332  //printf(">>>CAR(x) = "); dump_atom(CAR(x),1); printf("\n");
333  //printf(">>>element = "); dump_atom(element,1); printf("\n");
334  if( IS_FLOAT(element) )
335  {
336  append( USE(element), &result, &last );
337  SET_FLAGS( last, initial_flags|final_flags );
338  }
339  else if( IS_LIST(CAR(x)) )
340  {
341  if( IS_EXTENDED(x) )
342  {
343  append_ex( USE(element), &result, &last );
344  DEUSE( POS(last) );
345  POS( last ) = USE( POS(x) );
346  }
347  else
348  append( USE(element), &result, &last );
349  SET_FLAGS( last, initial_flags|final_flags );
350  }
351  else
352  {
353  for( y=element; IS_NOT_EMPTY(y); y=CDR(y) )
354  {
355  //printf("APPEND SUBELEMENT |");
356  //dump_atom(CAR(y),1);
357  //printf("|\n");
358 
359  if( IS_EXTENDED(y) )
360  {
361  append_ex( USE(CAR(y)), &result, &last );
362  DEUSE( POS(last) );
363  POS( last ) = USE( POS(y) );
364  }
365  else
366  append( USE(CAR(y)), &result, &last );
367  SET_FLAGS( last, FLAGS(y) );
368  if( y==element ) SET_FLAGS( last, initial_flags );
369  }
370  #ifdef SAFE_MODE
371  assert( IS_NOT_EMPTY(last) );
372  #endif
373  SET_FLAGS( last, final_flags);
374  }
375 
376  DEUSE( element );
377  }
378 
379  //printf("FINAL RESULT IS |"); dump_atom(result,1); printf("|\n");
380  return result;
381  }
382 
383  // The input is a word or a subword
384  chars_t source = STRING(input);
385  int len = LENGTH(input);
386  int origlen = len;
387 
388  chars_t buffer = ALLOC( CHAR_SIZE*len ); // buffer for the longest word
389  //chars_t bp = buffer;
390  char_t ch;
391 
392  int_t errpos = -1;
393  //char_t errchar = NULL_CHAR;
394 
395  int last_token = TOKEN_LINEEND;
396  //int crlf = 0;
397 
398  // Gets the next token. Return:
399  // TOKEN_END if there are no more tokens
400  // TOKEN_WORD if the token is a word
401  // TOKEN_DIRTY_WORD if the token is a word with \ or |
402  // TOKEN_OPEN if the token is [
403  // TOKEN_CLOSE if the token is ]
404  // TOKEN_LINEEND if the token is <nl>
405  // TOKEN_SPACE if at least one whitespace is met
406 
407 
408  //int co=0;
409  int get_token( atom_t *token, int method )
410  {
411  //co++;
412  //if (0 == co%1024)
413  //{
414  //printf("%d ",co);
415  //}
416  // return 1 if buffer contains number
417  int is_number(chars_t bp)
418  { // "E" {digit}* "." {digit}+
419  // 1 2 3 4
420  chars_t cp = bp;
421  int num_mode = 1;
422  char_t ch;
423 
424  cp = bp;
425 
426  if( bp==buffer ) return 0;
427 
428  while( cp>buffer )
429  {
430  cp--;
431  ch = *cp;
432  //printf("num_mode=%d ch=%C\n",num_mode,ch);
433  switch( num_mode )
434  {
435  case 1:
436  if( ch!=TEXT('E') && ch!=TEXT('e') ) return 0;
437  num_mode = 2;
438  break;
439  case 2: ;
440  if( ch<TEXT('0') || ch>TEXT('9') ) return 0;
441  num_mode = 3;
442  break;
443  case 3: ;
444  if( ch!=TEXT('.') ) return 0;
445  num_mode = 4;
446  break;
447  case 4: ;
448  if( ch<TEXT('0') || ch>TEXT('9') ) return 0;
449  break;
450  }
451  //printf("num_mode=%d\n",num_mode);
452  }
453  return 1;
454  }
455 
456  if( !len ) return TOKEN_END;
457 
458  int dirty = 0;
459  int mutated = 0;
460  chars_t bp = buffer; *bp=NULL_CHAR;
461  chars_t sp = source;
462 
463  int mode = MODE_ENTRY; // current mode
464  int code; // action code
465 
466 
467  int stack[MAX_MODE]; // return-to-mode for each mode
468  static int mode_eof[MAX_MODE] =
469  {
470  /* entry */ PAT_TOKEN_END,
471  /* whitespace */ PAT_TOKEN_SPACE,
472  /* word */ PAT_TOKEN_WORD,
473  /* barred */ PAT_ERROR,
474  /* backslashed */ PAT_ERROR,
475  /* tilde */ PAT_ERROR,
476  /* semitilde */ PAT_ERROR,
477  /* semicolon */ PAT_RETURN,
478  /* tildespace */ PAT_ERROR,
479  /* less */ PAT_TOKEN_WORD,
480  /* greater */ PAT_TOKEN_WORD
481  };
482  static int mode_eol[MAX_MODE] =
483  {
484  /* entry */ PAT_NEXT+PAT_TOKEN_LINE,
485  /* whitespace */ PAT_TOKEN_SPACE,
486  /* word */ PAT_TOKEN_WORD,
487  /* barred */ PAT_PUSH+PAT_NEXT,
488  /* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN,
489  /* tilde */ PAT_NEXT+PAT_RETURN,
490  /* semitilde */ PAT_NEXT+PAT_RETURN,
491  /* semicolon */ PAT_RETURN,
492  /* tildespace */ PAT_NEXT+PAT_RETURN,
493  /* less */ PAT_TOKEN_WORD,
494  /* greater */ PAT_TOKEN_WORD
495  };
496  static int mode_space[MAX_MODE] =
497  {
498  /* entry */ PAT_NEXT+PAT_GOTO_SPACE,
499  /* whitespace */ PAT_NEXT,
500  /* word */ PAT_TOKEN_WORD,
501  /* barred */ PAT_PUSH+PAT_NEXT,
502  /* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN,
503  /* tilde */ PAT_NEXT+PAT_GOTO_TILDESPACE,
504  /* semitilde */ PAT_NEXT,
505  /* semicolon */ PAT_NEXT,
506  /* tildespace */ PAT_NEXT,
507  /* less */ PAT_TOKEN_WORD,
508  /* greater */ PAT_TOKEN_WORD
509  };
510  static int mode_open[MAX_MODE] =
511  {
512  /* entry */ PAT_NEXT+PAT_TOKEN_OPEN,
513  /* whitespace */ PAT_TOKEN_SPACE,
514  /* word */ PAT_TOKEN_WORD,
515  /* barred */ PAT_PUSH+PAT_NEXT,
516  /* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN,
517  /* tilde */ PAT_TILDE+PAT_RETURN,
518  /* semitilde */ PAT_RETURN,
519  /* semicolon */ PAT_NEXT,
520  /* tildespace */ PAT_TILDE+PAT_TOKEN_WORD,
521  /* less */ PAT_TOKEN_WORD,
522  /* greater */ PAT_TOKEN_WORD
523  };
524  static int mode_close[MAX_MODE] =
525  {
526  /* entry */ PAT_NEXT+PAT_TOKEN_CLOSE,
527  /* whitespace */ PAT_TOKEN_SPACE,
528  /* word */ PAT_TOKEN_WORD,
529  /* barred */ PAT_PUSH+PAT_NEXT,
530  /* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN,
531  /* tilde */ PAT_TILDE+PAT_RETURN,
532  /* semitilde */ PAT_RETURN,
533  /* semicolon */ PAT_NEXT,
534  /* tildespace */ PAT_TILDE+PAT_TOKEN_WORD,
535  /* less */ PAT_TOKEN_WORD,
536  /* greater */ PAT_TOKEN_WORD
537  };
538  static int mode_bar[MAX_MODE] =
539  {
541  /* whitespace */ PAT_TOKEN_SPACE,
543  /* barred */ PAT_PUSH+PAT_NEXT+PAT_RETURN,
544  /* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN,
545  /* tilde */ PAT_TILDE+PAT_RETURN,
546  /* semitilde */ PAT_RETURN,
548  /* tildespace */ PAT_TILDE+PAT_TOKEN_WORD,
549  /* less */ PAT_TOKEN_WORD,
550  /* greater */ PAT_TOKEN_WORD
551  };
552  static int mode_backslash[MAX_MODE] =
553  {
555  /* whitespace */ PAT_TOKEN_SPACE,
558  /* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN,
559  /* tilde */ PAT_TILDE+PAT_RETURN,
560  /* semitilde */ PAT_RETURN,
561  /* semicolon */ PAT_NEXT2, //+PAT_RETURN_TO_SELF+PAT_GOTO_BACKSLASHED,
562  /* tildespace */ PAT_TILDE+PAT_TOKEN_WORD,
563  /* less */ PAT_TOKEN_WORD,
564  /* greater */ PAT_TOKEN_WORD
565  };
566  static int mode_tilde[MAX_MODE] =
567  {
569  /* whitespace */ PAT_TOKEN_SPACE,
571  /* barred */ PAT_PUSH+PAT_NEXT,
572  /* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN,
573  /* tilde */ PAT_TILDE+PAT_RETURN,
574  /* semitilde */ PAT_RETURN,
576  /* tildespace */ PAT_TILDE+PAT_TOKEN_WORD,
577  /* less */ PAT_TOKEN_WORD,
578  /* greater */ PAT_TOKEN_WORD
579  };
580  static int mode_semicolon[MAX_MODE] =
581  {
583  /* whitespace */ PAT_TOKEN_SPACE,
585  /* barred */ PAT_PUSH+PAT_NEXT,
586  /* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN,
587  /* tilde */ PAT_TILDE+PAT_RETURN,
588  /* semitilde */ PAT_RETURN,
589  /* semicolon */ PAT_NEXT+PAT_RETURN_TO_SELF+PAT_GOTO_SEMITILDE,
590  /* tildespace */ PAT_TILDE+PAT_TOKEN_WORD,
591  /* less */ PAT_TOKEN_WORD,
592  /* greater */ PAT_TOKEN_WORD
593  };
594  static int mode_else[MAX_MODE] =
595  {
596  /* entry */ PAT_PUSH+PAT_NEXT+PAT_GOTO_WORD,
597  /* whitespace */ PAT_TOKEN_SPACE,
598  /* word */ PAT_PUSH+PAT_NEXT,
599  /* barred */ PAT_PUSH+PAT_NEXT,
600  /* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN,
601  /* tilde */ PAT_TILDE+PAT_RETURN,
602  /* semitilde */ PAT_RETURN,
603  /* semicolon */ PAT_NEXT,
604  /* tildespace */ PAT_TILDE+PAT_TOKEN_WORD,
605  /* less */ PAT_TOKEN_WORD,
606  /* greater */ PAT_TOKEN_WORD
607  };
608  static int mode_parens[MAX_MODE] =
609  {
610  /* entry */ PAT_PUSH+PAT_NEXT+PAT_TOKEN_WORD,
611  /* whitespace */ PAT_TOKEN_SPACE,
612  /* word */ PAT_TOKEN_WORD,
613  /* barred */ PAT_PUSH+PAT_NEXT,
614  /* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN,
615  /* tilde */ PAT_TILDE+PAT_RETURN,
616  /* semitilde */ PAT_RETURN,
617  /* semicolon */ PAT_NEXT,
618  /* tildespace */ PAT_TILDE+PAT_TOKEN_WORD,
619  /* less */ PAT_TOKEN_WORD,
620  /* greater */ PAT_TOKEN_WORD
621  };
622  static int mode_equal[MAX_MODE] =
623  {
624  /* entry */ PAT_PUSH+PAT_NEXT+PAT_TOKEN_WORD,
625  /* whitespace */ PAT_TOKEN_SPACE,
626  /* word */ PAT_TOKEN_WORD,
627  /* barred */ PAT_PUSH+PAT_NEXT,
628  /* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN,
629  /* tilde */ PAT_TILDE+PAT_RETURN,
630  /* semitilde */ PAT_RETURN,
631  /* semicolon */ PAT_NEXT,
632  /* tildespace */ PAT_TILDE+PAT_TOKEN_WORD,
633  /* less */ PAT_PUSH+PAT_NEXT+PAT_TOKEN_WORD,
634  /* greater */ PAT_PUSH+PAT_NEXT+PAT_TOKEN_WORD
635  };
636  static int mode_less[MAX_MODE] =
637  {
638  /* entry */ PAT_PUSH+PAT_NEXT+PAT_GOTO_LESS,
639  /* whitespace */ PAT_PUSH+PAT_NEXT+PAT_GOTO_LESS, //PAT_TOKEN_WORD, @boza
640  /* word */ PAT_TOKEN_WORD,
641  /* barred */ PAT_PUSH+PAT_NEXT,
642  /* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN,
643  /* tilde */ PAT_TILDE+PAT_RETURN,
644  /* semitilde */ PAT_RETURN,
645  /* semicolon */ PAT_NEXT,
646  /* tildespace */ PAT_TILDE+PAT_TOKEN_WORD,
647  /* less */ PAT_TOKEN_WORD,
648  /* greater */ PAT_TOKEN_WORD
649  };
650  static int mode_greater[MAX_MODE] =
651  {
652  /* entry */ PAT_PUSH+PAT_NEXT+PAT_GOTO_GREATER,
653  /* whitespace */ PAT_PUSH+PAT_NEXT+PAT_GOTO_GREATER, //PAT_TOKEN_WORD, @boza
654  /* word */ PAT_TOKEN_WORD,
655  /* barred */ PAT_PUSH+PAT_NEXT,
656  /* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN,
657  /* tilde */ PAT_TILDE+PAT_RETURN,
658  /* semitilde */ PAT_RETURN,
659  /* semicolon */ PAT_NEXT,
660  /* tildespace */ PAT_TILDE+PAT_TOKEN_WORD,
661  /* less */ PAT_PUSH+PAT_NEXT+PAT_TOKEN_WORD,
662  /* greater */ PAT_TOKEN_WORD
663  };
664 
665 
666 again:
667  // get action code
668  GET_CHAR;
669 
670 
671 #ifdef DEBUG_TOKENS
672  printf("<TOKENS> length=%d\n",len);
673  if(ch<TEXT(' '))
674  printf("<TOKENS> get(#%d)\t",ch);
675  else
676  printf("<TOKENS> get('%c')\t",ch);
677  switch( mode )
678  {
679  case MODE_ENTRY: printf(" ENTRY -> "); break;
680  case MODE_SPACE: printf(" SPACE -> "); break;
681  case MODE_WORD: printf(" WORD -> "); break;
682  case MODE_BARRED: printf(" BARRED -> "); break;
683  case MODE_BACKSLASHED: printf(" BACKSLASH -> "); break;
684  case MODE_TILDE: printf(" TILDE -> "); break;
685  case MODE_SEMITILDE: printf(" SEMITILDE -> "); break;
686  case MODE_SEMICOLON: printf(" SEMICOLON -> "); break;
687  case MODE_TILDESPACE: printf("TILDESPACE -> "); break;
688  }
689 #endif
690  code = mode_else[mode];
691  if( !len )
692  code = mode_eof[mode];
693  else
694  {
695  if( method==TOKENIZE_READWORD )
696  { // tokenize as expected by READWORD
697  if( ch==TEXT('|') ) code = mode_bar[mode];
698  else if( ch==TEXT('\\') ) code = mode_backslash[mode];
699  }
700  else
701  if( method==TOKENIZE_READLIST )
702  { // tokenize as data
703  if( ch==TEXT('\n') ) code = mode_eol[mode];
704  else if( ch==TEXT('\r') ) code = mode_eol[mode];
705  else if( ch<=TEXT(' ') ) code = mode_space[mode];
706  else if( ch==TEXT('[') ) code = mode_open[mode];
707  else if( ch==TEXT(']') ) code = mode_close[mode];
708  else if( ch==TEXT('|') ) code = mode_bar[mode];
709  else if( ch==TEXT('\\') ) code = mode_backslash[mode];
710  else if( ch==TEXT('~') ) code = mode_tilde[mode];
711  }
712  else
713  if( method==TOKENIZE_DATA )
714  { // tokenize as data
715  if( ch==TEXT('\n') ) code = mode_eol[mode];
716  else if( ch==TEXT('\r') ) code = mode_eol[mode];
717  else if( ch<=TEXT(' ') ) code = mode_space[mode];
718  else if( ch==TEXT('[') ) code = mode_open[mode];
719  else if( ch==TEXT(']') ) code = mode_close[mode];
720  else if( ch==TEXT('|') ) code = mode_bar[mode];
721  else if( ch==TEXT('\\') ) code = mode_backslash[mode];
722  else if( ch==TEXT('~') ) code = mode_tilde[mode];
723  else if( ch==TEXT(';') ) code = mode_semicolon[mode];
724  }
725  else
726  { // tokenize as commands
727  //printf("ch=%c *buf=%c\n",ch,*buffer);
728  if( ch==TEXT('(') ||
729  ch==TEXT(')') ) code = mode_parens[mode];
730  else if( *buffer!=TEXT('"') )
731  {
732  if( (ch==TEXT('+') || ch==TEXT('-')) && is_number(bp) ) { }
733  else if( ch==TEXT('+') ||
734  ch==TEXT('-') ||
735  ch==TEXT('*') ||
736  ch==TEXT('/')) code = mode_parens[mode];
737  else if( ch==TEXT('=') ) code = mode_equal[mode];
738  else if( ch==TEXT('<') ) code = mode_less[mode];
739  else if( ch==TEXT('>') ) code = mode_greater[mode];
740  }
741  }
742  }
743  // process action code
744  int _stack = -1;
745 
746  int newmode = 0;
747  if( code & PAT_GOTO )
748  {
749  // this is pred-processing of GOTO
750  // if old mode was space, and new is not,
751  // then update initial position of next token
752  newmode = (code>>PAT_SHIFT) & 0xF;
753  if( mode==MODE_SPACE && newmode!=MODE_SPACE ) { sp = source; }
754  }
755 
756 
757  if( code & PAT_TILDE )
758  {
759  *bp++ = TEXT('~');
760  #ifdef DEBUG_TOKENS
761  printf("\n<TOKENS> put('%c'/%d)\n",TEXT('~'),TEXT('~'));
762  #endif //DEBUG_TOKENS
763  }
764  if( code & PAT_PUSH )
765  {
766  // push a character only if:
767  // - currently not in bars |..?..|
768  // - currently in bars, but not in semicolon ;..|..?..|
769  if( mode!=MODE_BARRED ||
770  (stack[mode]!=MODE_SEMITILDE &&
771  stack[mode]!=MODE_SEMICOLON) )
772  {
773  //if( mode==MODE_BARRED || mode==MODE_BACKSLASHED )
774  //*bp++ = ENBAR(ch);
775  //else
776  *bp++ = ch;
777  #ifdef DEBUG_TOKENS
778  printf("\n<TOKENS> put('%c'/%d)\n",ch,ch);
779  #endif //DEBUG_TOKENS
780  //if( ch=='\r' && *(source+1)=='\n' ) // handle CRLF cases
781  //{
782  //*bp++ = '\n';
783  //#ifdef DEBUG_TOKENS
784  // printf("\n<TOKENS> put('%d')\n",'\n');
785  // #endif //DEBUG_TOKENS
786  //}
787  }
788  }
789  if( code & PAT_NEXT2 )
790  {
791  source++;
792  len--;
793  }
794  if( code & (PAT_NEXT|PAT_NEXT2) )
795  {
796  if( *source=='\r' )
797  {
798  //crlf = 0;
799  if( *(source+1)=='\n' )
800  {
801  //crlf = 1;
802  source++;
803  len--;
804  }
805  }
806  source++;
807  len--;
808  }
809  if( code & PAT_DIRTY ) dirty = 1;
810  if( code & PAT_MUTATED ) mutated = 1;
811  if( code & PAT_ERROR_POS ) errpos = origlen-len-1;
812  if( code & PAT_RETURN_TO_WORD ) _stack = MODE_WORD;
813  if( code & PAT_RETURN_TO_SELF ) _stack = mode;
814  if( code & PAT_RETURN_TO_CALLER ) _stack = stack[mode];
815  if( code & PAT_GOTO )
816  {
817  // this is post-processing of GOTO
818  mode = newmode;
819  stack[mode] = _stack;
820  }
821  if( code & PAT_RETURN ) mode = stack[mode];
822  if( code & PAT_TOKEN )
823  {
824  int _token = (code>>PAT_SHIFT) & 0xF;
825  if( _token!=TOKEN_WORD ) return _token;
826  if( mutated )
827  {
828  *bp = NULL_CHAR;
829  *token = new_word( buffer, UNKNOWN );
830  #ifdef DEBUG_TOKENS
831  printf("MUTATED TOKEN "); dumpln(*token);
832  printf("\n\n");
833  #endif
834  }
835  else
836  {
837  *token = new_subword( input, sp, source-sp /*bp-buffer*/ );
838  #ifdef DEBUG_TOKENS
839  printf("NORMAL TOKEN **"); dump(*token);
840  printf("** (len=%d)\n\n\n",source-sp);
841  #endif
842  }
843  return dirty?TOKEN_DIRTY_WORD:TOKEN_WORD;
844  }
845  if( code & PAT_ERROR )
846  {
847  #ifdef DEBUG_TOKENS
848  printf("ERROR\n");
849  #endif
850  return TOKEN_ERROR;
851  }
852 
853 #ifdef DEBUG_TOKENS
854  switch( mode )
855  {
856  case MODE_ENTRY: printf("ENTRY\n"); break;
857  case MODE_SPACE: printf("SPACE\n"); break;
858  case MODE_WORD: printf("WORD\n"); break;
859  case MODE_BARRED: printf("BARRED\n"); break;
860  case MODE_BACKSLASHED: printf("BACKSLASH\n"); break;
861  case MODE_TILDE: printf("TILDE\n"); break;
862  case MODE_SEMITILDE: printf("SEMITILDE\n"); break;
863  case MODE_SEMICOLON: printf("SEMICOLON\n"); break;
864  case MODE_TILDESPACE: printf("TILDESPACE\n"); break;
865  }
866 #endif
867  goto again;
868  } // get_token()
869 
870 
871  atom_t get_sublist( int level, int full_parse, atom_t* pos ) //ex2//
872  {
873  atom_t result = empty_list;
874  atom_t last = empty_list;
875  if( pos ) *pos = NULL; //ex2//
876 
877  atom_t token = NULL;
878  atom_t sublist_pos = NULL; //ex2//
879  int flags;
880  int bracketlen = len; // LEN of last opening bar
881 
882  int pos_from = source-STRING(input); //ex2//
883  if( pos_from ) pos_from--; //ex2//
884 
885  flags = 0;//FLAG_NEWLINE;
886  while( (last_token=get_token(&token,full_parse)) )
887  {
888  sublist_pos = NULL;
889 
890  #ifdef DEBUG_TOKENIZATION
891  switch(last_token)
892  {
893  case TOKEN_END: printf("TOKEN_END\n"); break;
894  case TOKEN_SPACE: printf("TOKEN_SPACE\n"); break;
895  case TOKEN_WORD: printf("TOKEN_WORD @"); dump(token); printf("@\n"); break;
896  case TOKEN_DIRTY_WORD: printf("TOKEN_|WORD| @"); dump(token); printf("@\n"); break;
897  case TOKEN_OPEN: printf("TOKEN_OPEN [\n"); break;
898  case TOKEN_CLOSE: printf("TOKEN_CLOSE ]\n"); break;
899  case TOKEN_LINEEND: printf("TOKEN_LINEEND\n"); break;
900  case TOKEN_ERROR: printf("TOKEN_ERROR\n"); break;
901  }
902  #endif
903 
904  if( last_token==TOKEN_ERROR ) return result;
905  if( last_token==TOKEN_SPACE )
906  {
907  if( IS_NOT_EMPTY(last) ) SET_FLAGS( last, FLAG_BEFORE_SPACE );
908  flags |= FLAG_AFTER_SPACE;
909  continue;
910  }
911  if( last_token==TOKEN_DIRTY_WORD )
912  {
913  //printf("###BEFORE="); dumpln(token);
914  atom_t x = purify( token );
915  DEUSE( token );
916  token = x;
917  //printf("###AFTER="); dumpln(token);
918  }
919  if( last_token==TOKEN_CLOSE ) break;
920  if( last_token==TOKEN_LINEEND )
921  {
922  flags |= FLAG_NEWLINE;
923  continue;
924  }
925  if( last_token==TOKEN_OPEN)
926  {
927  token = get_sublist( level+1, TOKENIZE_DATA, &sublist_pos ); // recursive //ex2//
928 
929  if( last_token==TOKEN_ERROR )
930  {
931  DEUSE( token );
932  if( sublist_pos ) DEUSE(sublist_pos);
933  sublist_pos = NULL;
934  break;
935  }
936  }
937 
938  if( method==TOKENIZE_COMMANDS )
939  flags |= FLAG_TOKENIZED_COMMANDS;
940 
941  if( method==TOKENIZE_DATA )
942  flags |= FLAG_TOKENIZED_DATA;
943 
944  if( method==TOKENIZE_COMMANDS &&
945  last_token==TOKEN_WORD &&
946  LENGTH(token)>1 &&
947  *STRING(token)==TEXT('?') &&
948  *(STRING(token)+1)>=TEXT('0') &&
949  *(STRING(token)+1)<=TEXT('9') )
950  {
951  //printf(">>>%d %d\n", last_token==TOKEN_WORD, last_token==TOKEN_DIRTY_WORD);
952  // process template ?nn->(? nn) for command tokenization
953  //printf("append token **"); dump(token); printf("**\n");
954  atom_t new_qoken = new_subword( token, STRING(token), 1 );
955  atom_t new_token = new_subword( token, STRING(token)+1, LENGTH(token)-1 );
956  DEUSE( token );
957 
958  append( new_word(TEXT("("),-1), &result, &last ); // (
959  append( new_qoken, &result, &last ); // ?
960  append( new_token, &result, &last ); // nn
961  append( new_word(TEXT(")"),-1), &result, &last ); // )
962  }
963  else
964  {
965  // normal token, no more processing needed
966  if( sublist_pos ) //ex//
967  {
968  append_ex( token, &result, &last );
969  POS( last ) = sublist_pos;
970 
971  //printf("\n\nSET EXTENDED POSITION ");
972  //dump_atom(sublist_pos,1); printf("\n");
973  //printf("CURRENT RESULT ");
974  //dump_atom(result,1); printf("\n\n");
975 
976  sublist_pos = NULL;
977  }
978  else
979  {
980  append( token, &result, &last );
981  }
982  SET_FLAGS( last, flags );
983  }
984 
985  flags = 0;
986  if( last_token==TOKEN_ERROR ) break;
987  }
988 
989  // test for unmatching square brackets
990  // i.e. ...[... or ...]...
991  if( level )
992  {
993  if( last_token==TOKEN_END )
994  {
995  errpos = origlen-bracketlen-1;
996  last_token = TOKEN_ERROR;
997  }
998  }
999  else
1000  {
1001  if( last_token==TOKEN_CLOSE )
1002  {
1003  errpos = origlen-len-1;
1004  last_token = TOKEN_ERROR;
1005  }
1006  }
1007 
1008  int pos_to = source-STRING(input);
1009  if( pos_to ) pos_to--;
1010 
1011  if( pos && method == TOKENIZE_DATA ) //ex2//
1012  {
1013  *pos = new_subword( input, STRING(input)+pos_from, pos_to-pos_from+1 );
1014  }
1015 
1016  return result;
1017  } // get_sublist()
1018 
1019  atom_t result = get_sublist( 0, method, NULL ); //ex//
1020 
1021  // in case of error return empty list
1022  if( last_token==TOKEN_ERROR )
1023  {
1024  //printf("ERROR RESULT=");dumpln(result);
1025  DEUSE( result );
1026  result = new_parse_error( ERROR_INCOMPLETE_PAIR, errpos, input );
1027  }
1028 
1029 
1030  DEALLOC( buffer );
1031  //printf("#########");
1032  //dump_atom(result,1);
1033  //printf("######\n");
1034 
1035  return result;
1036 }
atom_t trim_shell_comment ( atom_t  word)
Parameters
wordword containing source text
Returns
atom with the source text with trimmed shell comment

Trims a shell comment from the beginning of the word. Shell comment can be only the first line if its first two characters are #!. If a shell comment is trimmed, then the result is a subword from the first character on the second line, otherwise the input word is returned as is but with increased reference count.

Definition at line 1055 of file parser.c.

1056 {
1057  #ifdef SAFEMODE
1058  assert( IS_WORD(word) || IS_SUBWORD(word) );
1059  #endif
1060 
1061  chars_t source = STRING(word);
1062  int_t len = LENGTH(word);
1063 
1064  // if there are no enough characters just exit
1065  if( LENGTH(word)<2 ) return USE(word);
1066 
1067  // if the first two characters are not #! then exit
1068  if( *source!=TEXT('#') || *(source+1)!=TEXT('!') ) return USE(word);
1069 
1070  // skip the line
1071  while( len && *source!=TEXT('\n') )
1072  {
1073  source++;
1074  len--;
1075  }
1076 
1077  // return a subword. Pay attention to always reference
1078  // the main host word because the input could be a word
1079  // or a subword.
1080  if( IS_WORD(word) )
1081  return new_subword( word, source, len );
1082  else
1083  return new_subword( WORD(word), source, len );
1084 }
atom_t purify ( atom_t  word)
Parameters
wordword to be purified
Returns
purified word

Purifies a word by processing all backslashes and bars. Returns a new word if needed. Assumes that the input needs purification.

Definition at line 1099 of file parser.c.

1100 {
1101  //return USE(word);
1102  #ifdef SAFEMODE
1103  assert( IS_WORD(word) || IS_SUBWORD(word) );
1104  #endif
1105 
1106  chars_t source = STRING(word);
1107  int_t len = LENGTH(word);
1108 
1109  chars_t buffer = alloca( CHAR_SIZE*len ); // buffer for the longest word
1110  chars_t bp = buffer;
1111 
1112  int need_enbar = 0;
1113  int is_mutated = 0; // set to 1 if the word is mutated
1114  int in_backslash = 0;
1115  int in_bars = 0;
1116  for( ; len; len--,source++ )
1117  {
1118  need_enbar = in_bars || in_backslash;
1119  if( in_backslash )
1120  {
1121  in_backslash = 0;
1122  }
1123  else if( *source==TEXT('\\') )
1124  {
1125  is_mutated = 1;
1126  in_backslash = 1;
1127  continue;
1128  }
1129  else if( *source==TEXT('|') )
1130  {
1131  is_mutated = 1;
1132  in_bars = !in_bars;
1133  continue;
1134  }
1135  if( need_enbar )
1136  *bp++ = ENBAR(*source);
1137  else
1138  *bp++ = *source;
1139 
1140  //if( need_enbar )
1141  //printf(" PURIFY %d %d\n",*source,ENBAR(*source));
1142  //else
1143  //printf(" PURIFY %d \n",*source );
1144  }
1145 
1146  *bp = NULL_CHAR;
1147 
1148  if( is_mutated )
1149  return new_word( buffer, bp-buffer );
1150  else
1151  return USE(word);
1152 }
atom_t parentheses ( atom_t  input)
Parameters
inputflat list
Returns
list with nested sublists

Processes all (...) of a flat list by making them as sublists. Returns ERROR_INCOMPLETE_PAIR if the parentheses are not paired well.

Definition at line 1167 of file parser.c.

1168 {
1169  atom_t result = empty_list;
1170 
1171  atom_t _paren( int level, atom_t openparen, int* last_flags )
1172  {
1173 #ifdef DEBUG_PARENTHESES
1174  printf("<PAREN> ENTER LEVEL %d INPUT=", level);
1175  dumpln(input);
1176 #endif //DEBUG_PARENTHESES
1177 
1178  *last_flags = 0;
1179 
1180  atom_t result = empty_list;
1181  atom_t last = empty_list;
1182  while( IS_NOT_EMPTY(input) )
1183  {
1184  // get current element
1185  atom_t elem = USE(CAR(input));
1186  int flags = FLAGS(input);
1187 
1188 #ifdef DEBUG_PARENTHESES
1189  printf("<PAREN> READ ELEMENT ");
1190  dumpln(elem);
1191 #endif //DEBUG_PARENTHESES
1192 
1193  // if it is ( then call _parse() recursively
1194  // if it is ) then exit current _parse()
1195  if( IS_WORD(elem)||IS_SUBWORD(elem) )
1196  if( LENGTH(elem)==1 )
1197  {
1198  if( *STRING(elem)==TEXT('(') ) // process (
1199  {
1200  int f;
1201  DEUSE( elem ); //compensate USE() when elem was retrieved
1202  input = CDR(input);
1203  elem = _paren( level+1, elem, &f );
1204  flags |= f;
1205  if( IS_ERROR(elem) )
1206  {
1207  DEUSE( result );
1208  return elem;
1209  }
1210  }
1211  else if( *STRING(elem)==TEXT(')') )// process )
1212  {
1213  DEUSE( elem ); //compensate USE() when elem was retrieved
1214  *last_flags = FLAGS(input);
1215  break;
1216  }
1217  }
1218 
1219 #ifdef DEBUG_PARENTESES
1220  printf("<PAREN> APPEND ");
1221  dumpln(elem);
1222 #endif //DEBUG_PARENTHESES
1223 
1224  // append current element
1225  if( IS_EXTENDED(input) )
1226  {
1227  append_ex( elem, &result, &last );
1228  DEUSE( POS(last) );
1229  POS( last ) = USE( POS( input ) );
1230  }
1231  else
1232  append( elem, &result, &last );
1233  FLAGS(last) |= flags;
1234 
1235  // empty list
1236  if( IS_EMPTY(input) ) break;
1237 
1238  // move to next element
1239  input = CDR(input);
1240  }
1241 
1242  if( level && IS_EMPTY(input) )
1243  {
1244  DEUSE( result );
1245  return new_error( ERROR_INCOMPLETE_PAIR, openparen );
1246  }
1247  if( !level && !IS_EMPTY(input) )
1248  {
1249  DEUSE( result );
1250  return new_error( ERROR_INCOMPLETE_PAIR, CAR(input) );
1251  }
1252  if( level && IS_EMPTY(result) )
1253  {
1254  return new_error( ERROR_EMPTY_EXPRESSION, openparen );
1255  }
1256 
1257 #ifdef DEBUG_PARENTHESES
1258  printf("<PAREN> EXIT LEVEL %d RESULT=", level);
1259  dumpln(result);
1260 #endif //DEBUG_PARENTHESES
1261  if( level ) SET_FLAGS( result, FLAG_EXPRESSION );
1262  return result;
1263  } // _paren()
1264 
1265 
1266  int f;
1267  result = _paren( 0, empty_list, &f );
1268 
1269  return result;
1270 }
atom_t define_user_function ( atom_t  source,
atom_t  input,
atom_t  parent 
)
Parameters
sourceword containing the source
inputcontents between TO and END
parentcontext var where creation is done
Returns
var atom or error atom

Defines new user-defined command or function. The input contains the contents of the function – these are all tokens between TO and END. Var creation is done in several steps:

  • a new var atom is created with some temporary name
  • the left inputs are determined and created as local variables to the var atom. Left inputs are all consequitive words which start with colon : and are on a single line
  • the first token which cannot be a left input is considered as a function name. This token must be a word which does not start with colon : or double quotes ".
  • the right inputs are all tokens after the name which start with colon : and are on the same line.
  • a "..." after the last right input (or after the procedure name if there are no right inputs) determines that the procedure has unlimited number of accpeted inputs.
  • the body of the function is everything after the last right input (or funtion name) till the end. The only compulsory element of a function is its name. The left and right inputs could be missing. The body could be empty.

Definition at line 1310 of file parser.c.

1311 {
1312  #ifdef DEBUG_TO_END
1313  printf("<TO-END> DEFINING=");
1314  dumpln( input);
1315  #endif
1316 
1317  //atom_t last;
1318  atom_t a = input;
1319  atom_t name;
1320  atom_t lhi; // last_header_item
1321 
1322  // create the function
1323  atom_t function = new_var( word_to, parent, 1 ); //DEUSE(to);
1324  need_descr2( function );
1325  SET_FLAGS( function, FLAG_FUNCTION );
1326  LARGS( function ) = 0;
1327  RARGS( function ) = 0;
1328 
1329  // collecting left parameters
1330  while( IS_NOT_EMPTY(a) &&
1331  !GET_FLAGS(a,FLAG_NEWLINE) &&
1332  IS_ANY_WORD(CAR(a)) &&
1333  (*STRING(CAR(a))==TEXT(':')) )
1334  {
1335  lhi = CAR(a);
1336  atom_t var = new_local_var( CAR(a), function, 1 );
1337  if( IS_ERROR(var) )
1338  {
1339  DEUSE( input );
1340  //DEUSE( function );
1341  LOCALS(parent) = behead( LOCALS(parent) ); // this DEUSEs function
1342  return var;
1343  }
1344 
1345  SET_FLAGS( var, FLAG_VARIABLE );
1346  LARGS( function )++;
1347 #ifdef SAFE_MODE
1348  assert( LARGS(function)<255 );
1349 #endif
1350  a = CDR(a);
1351  }
1352 
1353  // collecting name
1354  if( IS_NOT_EMPTY(a) &&
1355  !GET_FLAGS(a,FLAG_NEWLINE) &&
1356  IS_ANY_WORD(CAR(a)) &&
1357  (*STRING(CAR(a))!=TEXT('"')) )
1358  {
1359  lhi = CAR(a);
1360  name = CAR(a);
1361  a = CDR(a);
1362  }
1363  else
1364  {
1365  DEUSE( input );
1366  return new_error( ERROR_MISSING_NAME, a );
1367  }
1368 
1369  //printf("<TO-END> DEFINING="); dumpln( name );
1370 
1371  // collecting right parameters
1372  while( IS_NOT_EMPTY(a) &&
1373  !GET_FLAGS(a,FLAG_NEWLINE) &&
1374  IS_ANY_WORD(CAR(a)) &&
1375  (*STRING(CAR(a))==TEXT(':')) )
1376  {
1377  lhi = CAR(a);
1378  atom_t var = new_local_var( CAR(a), function, 1 );
1379  if( IS_ERROR(var) )
1380  {
1381  DEUSE( input );
1382  //DEUSE( function );
1383  LOCALS(parent) = behead( LOCALS(parent) ); // this DEUSEs function
1384  return var;
1385  }
1386  SET_FLAGS( var, FLAG_VARIABLE );
1387  RARGS( function )++;
1388 #ifdef SAFE_MODE
1389  assert( RARGS(function)<255 );
1390 #endif
1391  a = CDR(a);
1392  }
1393 
1394  // collect "..."
1395  if( IS_NOT_EMPTY(a) &&
1396  !GET_FLAGS(a,FLAG_NEWLINE) &&
1397  IS_ANY_WORD(CAR(a)) &&
1398  (LENGTH(CAR(a))==3) &&
1399  (*(STRING(CAR(a))+0)==TEXT('.')) &&
1400  (*(STRING(CAR(a))+1)==TEXT('.')) &&
1401  (*(STRING(CAR(a))+2)==TEXT('.')) )
1402  {
1403  SET_FLAGS( function, FLAG_INFINITE_ARGS );
1404  a = CDR(a);
1405  }
1406 
1407 
1408  // set offset of parameters
1409  int offset = BASE_OFFSET_PARAMS; // this is the start offset
1410  atom_t x;
1411  for( x = LOCALS(function); IS_NOT_EMPTY(x); x=CDR(x) )
1412  {
1413  //printf("set offset of "); dump(NAME(CAR(x))); printf(" to be %d\n",offset);
1414  OFFSET(CAR(x)) = offset;
1415  offset += sizeof( atom_t );
1416  }
1417 
1418 
1419  // check whether the function is already defined
1420  atom_t var = find_local_var( name, parent );
1421  if( var )
1422  {
1423  if( LARGS(var)!=LARGS(function) || RARGS(var)!=RARGS(function) )
1424  {
1425  LOCALS(parent) = behead( LOCALS(parent) ); // this DEUSEs function
1427  DEUSE( input );
1428  return result;
1429  }
1430 
1431  // forget data of the old function and
1432  // reuse data of the new function
1433  DEUSE( FULLSOURCE(var) );
1434  DEUSE( SOURCE(var) );
1435  DEUSE( LOCALS(var) );
1436  DEUSE( BODY(var) );
1437  DEUSE( TREE(var) );
1438  DEUSE( BINARY(var) );
1439 
1440  TREE(var) = empty_list;
1441  BINARY(var) = empty_list;
1442 
1443  BODY(var) = USE(a); //USE(BODY(function));
1444  LOCALS(var) = USE(LOCALS(function));
1445 
1446  LOCALS(parent) = behead(LOCALS(parent));
1447  //DEUSE(function);
1448  function = var;
1449  }
1450  else
1451  {
1452  DEUSE( NAME(function) );
1453  NAME( function ) = USE(name);
1454  BODY( function ) = USE(a);
1455  }
1456 
1457  //LEVEL( function ) = level;
1458  ADDRESS( function ) = 0;
1459  PRIORITY( function ) = PRIORITY_FUN;
1460 
1461  // cut header and 'END' from source
1462  chars_t new_src = STRING(lhi)+LENGTH(lhi);
1463  int new_len = LENGTH(source) - (new_src-STRING(source)) - LENGTH(word_end);
1464  SOURCE( function ) = new_subword( source, new_src, new_len );
1465  FULLSOURCE( function ) = USE( source );
1466 
1467  //DEUSE( input );
1468  #ifdef DEBUG_TO_END
1469  printf("<TO-END> DEFINED FUNCTION "); dumpln(NAME(function));
1470  printf("<TO-END> SOURCE "); dumpln(SOURCE(function));
1471  printf("<TO-END> BODY "); dumpln(BODY(function));
1472  printf("<TO-END> TREE "); dumpln(TREE(function));
1473  #endif
1474 
1475  //printf("@@@@@@DEFINE "); dump_atom(NAME(function),1);
1476  //printf(" IN PARENT "); dump_atom(NAME(PARENT(function)),1);
1477  //printf("@@@@@@\n");
1478 
1479 
1480  return function;
1481 }
atom_t preparse ( atom_t  input,
atom_t  parent,
int  level 
)
Parameters
inputlist with tokenized source code
parentcontext var where preparsing is done
levelcurrent level of nesting return source without TO ... END definitions

Pre-parses a flat tokenized list by extracting all local TO ... END definitions and creating corresponding local functions. Returns the remaining source code.

Definition at line 1499 of file parser.c.

1500 {
1501  #ifdef DEBUG_TO_END
1502  printf("<TO-END> ENTER1=");
1503  dumpln( input);
1504  #endif
1505 
1506  atom_t a = input;
1507  atom_t before_a = NULL;
1508  atom_t before_to = NULL;
1509  atom_t first_to = NULL;
1510  int to_end_depth = 0;
1511  while( IS_NOT_EMPTY(a) )
1512  {
1513  if( IS_ANY_WORD(CAR(a)) )
1514  {
1515  // found TO or its synonym
1516  if( same_words(CAR(a),word_to) || same_words(CAR(a),word_to_syn) )
1517  {
1518  if( to_end_depth==0 )
1519  {
1520  before_to = before_a;
1521  first_to = a;
1522  }
1523  to_end_depth++;
1524  }
1525 
1526  // found END
1527  if( same_words( CAR(a), word_end ))
1528  {
1529  // END without to ?
1530  if( to_end_depth==0 )
1531  {
1532  atom_t result = new_error( ERROR_INCOMPLETE_PAIR, CAR(a) );
1533  DEUSE( input );
1534  return result;
1535  }
1536 
1537  to_end_depth--;
1538  if( to_end_depth==0 )
1539  {
1540  // found END corresponding to TO.
1541  // extract the whole TO..END
1542  if( first_to==before_a )
1543  {
1544  DEUSE( input );
1545  return new_error( ERROR_EMPTY_TO_END, first_to );
1546  }
1547 
1548  atom_t word = new_subword(WORD(CAR(first_to)),STRING(CAR(first_to)),(STRING(CAR(a))-STRING(CAR(first_to))+LENGTH(word_end)));
1549  //atom_t word = empty_list;
1550 
1551  first_to = behead(first_to);
1552  //first_to = CDR(first_to);
1553  CDR(before_a) = empty_list;
1554  before_a = before_to;
1555  a = behead(a);
1556  if( before_to )
1557  CDR(before_a) = a; // TO is not first item
1558  else
1559  input = a; // TO is first item
1560 
1561  #ifdef DEBUG_TO_END
1562  printf("<TO-END> EXTRACT:");
1563  dumpln( first_to );
1564  printf("<TO-END> CURRENT INPUT:");
1565  dumpln( input );
1566  printf("<TO-END> DEFINE IN PARENT:");
1567  dumpln( parent );
1568  #endif
1569 
1570  // create new command/function
1571  atom_t var = define_user_function( word, (first_to), parent );
1572  DEUSE( word );
1573  if( IS_ERROR(var) )
1574  {
1575  DEUSE( input );
1576  return var;
1577  }
1578  DEUSE( first_to );
1579 
1580  // process recursively the function
1581  // for nested to-end definitions
1582  BODY(var) = preparse( BODY(var), var, level+1 );
1583 
1584  // next item is set explicitely,
1585  SET_FLAGS( a, FLAG_NEWLINE );
1586  first_to = NULL;
1587  before_to = NULL;
1588  continue;
1589  }
1590  }
1591  }
1592 
1593  // next item
1594  before_a = a;
1595  a = CDR(a);
1596  }
1597 
1598  // TO without END ?
1599  if( first_to )
1600  {
1601  atom_t result = new_error( ERROR_INCOMPLETE_PAIR, CAR(first_to) );
1602  DEUSE(input);
1603  return result;
1604  }
1605 
1606  #ifdef DEBUG_TO_END
1607  printf("<TO-END> LEFTOVERS=");
1608  dumpln( input);
1609  printf("\n");
1610  #endif
1611 
1612  return input;
1613 }
parse ( atom_t  input,
atom_t  parent,
int  top_level 
)
Parameters
inputunparsed list
parentparent var in which parsing is done
top_levelflag whether parsing is at its top-level
Returns
parsed abstract syntax tree

Parses a list into a tree (aka LISP notation). The list is supposed to be tokenized as commands. The parent variable provides a context of variables which can be referenced from the parsed input.

running_compiled_code && bug #3442773

Definition at line 1632 of file parser.c.

1633 {
1635  atom_t data[MAX_ELEMS];
1636  atom_t poses[MAX_ELEMS]; // source position (if available)
1637  int pris[MAX_ELEMS];
1638  int usedby[MAX_ELEMS];
1639  int largs[MAX_ELEMS]; // number of unlinked left arguments
1640  int rargs[MAX_ELEMS]; // number of unlinked right arguments
1641  int count; // count of elements in above arrays
1642 
1643  atom_t aux = empty_list; // additional lists to delete
1644 
1645  //printf("parse==");dumpln(input);
1646 
1647  #ifdef DEBUG_PARSE
1648  void dump_arrays( )
1649  {
1650  int i;
1651  for( i=0; i<count; i++ ) if( pris[i]>=PRIORITY_MIN )
1652  {
1653  printf("<PARSE> %2d.[used by %2d; free=%d:%d] v%c0 pr=%d =",
1654  i, usedby[i], largs[i], rargs[i], vars[i]?'#':'=', pris[i]);
1655  dump_atom(data[i],1);
1656  if( !IS_UNBOUND(poses[i]) )
1657  {
1658  printf(" @ ");
1659  dump_atom(poses[i],1);
1660  }
1661  printf("\n");
1662  }
1663  printf("\n");
1664  }
1665  #endif
1666 
1667 
1668  // Finds the first line of input. Return a pointer
1669  // to the first node after the line. Sets arrays
1670  // vars[], data[] and prios[].
1671  atom_t get_line( atom_t input )
1672  {
1673  int has_no_infinite = 1;
1674  count = 0;
1675  while( IS_NOT_EMPTY(input) )
1676  {
1677  atom_t elem = USE(CAR(input));
1678  //printf("testing element ");
1679  //dumpln(elem);
1680 
1681  #ifdef SAFEMODE
1682  assert( count<MAX_ELEMS );
1683  #endif
1684  data[count] = elem;
1685  vars[count] = IS_ANY_WORD(elem) ? find_var(elem,parent) : NULL;
1686  if( VAR && !IS_COMMAND(VAR) && !IS_FUNCTION(VAR) )
1687  {
1688  // If we have variable called 4 (e.g. MAKE 4 "BOZA)
1689  // then VAR!=NULL, but this is allowed only for
1690  // functions and commands, so set VAR=NULL!
1691  VAR=NULL;
1692  }
1693  pris[count] = (VAR ? PRIORITY(VAR) : PRIORITY_MAX);
1694  usedby[count] = -1;
1695  largs[count] = VAR ? LARGS(VAR) : 0;
1696  rargs[count] = VAR ? RARGS(VAR) : 0;
1697  poses[count] = IS_EXTENDED(input)?POS(input):unbound;
1698 
1699  if( VAR && IS_VARIABLE(VAR) )
1700  {
1701  printf("found variable as function: ");
1702  dump_atom(NAME(VAR),1);
1703  printf("\n");
1704  }
1705 
1706  // if element is a word not starting with ":" and
1707  // cannot be transfered into a number and does not
1708  // exist as a variable, then this is unknown function
1709  float64_t x;
1710  if( IS_ANY_WORD(elem) &&
1711  !VAR &&
1712  *STRING(elem)!=TEXT(':') &&
1713  *STRING(elem)!=TEXT('"') &&
1714  !atom_to_float(elem,&x) )
1715  {
1716  //DEUSE( input );
1717  DEUSE( elem );
1718  return new_error( ERROR_DO_NOT_KNOW, elem );
1719  }
1720 
1721  if( !top_level && VAR && has_no_infinite )
1722  {
1724  {
1725  rargs[count]=-1;
1726  has_no_infinite = 0;
1727  }
1728  }
1729 
1730  // test for unary
1731  if( VAR && GET_FLAGS(VAR,FLAG_CAN_BE_UNARY) && largs[count]==1 )
1732  {
1733  // case 1: <spc> op <spc> -> binary
1734  // case 2: <spc> op -> unary
1735  // case 3: op <spc> -> binary
1736  // case 4: op -> binary
1738  == FLAG_AFTER_SPACE )
1739  largs[count]=0;
1740  }
1741 
1742  count++;
1743  //next:
1744  input = CDR(input);
1745 
1746  // if the next item is on a new line AND if it is
1747  // not constant-list AND we are not in a sublist
1748  // then break the line here
1749  if( top_level && IS_NOT_EMPTY(input) && GET_FLAGS(input,FLAG_NEWLINE) &&
1750  (!IS_LIST(CAR(input)) || IS_EXPRESSION(CAR(input))) )
1751  break;
1752  } // line while
1753 
1754  return input;
1755  } //get_line()
1756 
1757  #ifdef DEBUG_PARSE
1758  printf("<PARSE> ENTER=");
1759  dumpln( input);
1760  #endif
1761 
1762 
1763  //-----------------------------------------
1764  // STEP 1: Convert (..) into subexpressions
1765  //-----------------------------------------
1766  if( IS_EMPTY(input) ) return input;
1767  if( IS_ERROR(input) ) return input;
1768  atom_t orig_input = input = parentheses( input );
1769  if( IS_ERROR(input) ) return input;
1770 
1771  #ifdef DEBUG_PARSE
1772  printf("<PARSE> PARENTHESISEZ=");
1773  dumpln( input);
1774  #endif
1775 
1776  // if there is no parent consider the root as parent
1777  if( !parent ) parent = root;
1778 
1779 
1780  //-----------------------------------------
1781  // STEP 2-7: Convert list into lisp line-by-line
1782  //-----------------------------------------
1783  atom_t result = empty_list;
1784  atom_t last = empty_list;
1785 
1786  atom_t err = NULL;
1787 
1788  int i,j,finished;
1789  int* args;
1790 
1791 #define LINK \
1792  { \
1793  usedby[j] = i; \
1794  args[i]--; \
1795  }
1796  //printf("%d is used by %d\n",j,i);
1797 
1798 #define RELINK \
1799  { \
1800  if( j<usedby[j] ) { largs[usedby[j]]++; finished=0; } \
1801  if( j>usedby[j] ) { rargs[usedby[j]]++; finished=0; } \
1802  LINK; \
1803  }
1804 
1805  while( IS_NOT_EMPTY(input) ) // main while
1806  {
1807  next_line:
1808  #ifdef DEBUG_PARSE
1809  printf("\n\n");
1810  printf("<PARSE>------------------\n");
1811  printf("<PARSE> START A NEW LINE \n");
1812  printf("<PARSE>------------------\n");
1813  #endif
1814 
1815  //-----------------------------------
1816  // STEP 2: Get a line from the input
1817  //-----------------------------------
1818  input = get_line( input );
1819  if( IS_ERROR(input) )
1820  {
1821  err = input;
1822  break;
1823  }
1824  #ifdef DEBUG_PARSE
1825  printf("<PARSE> ORIGINAL PARSE_LIST:\n");
1826  dump_arrays( );
1827  #endif
1828  //printf("<PARSE> LEFTOVER:"); dumpln(input);
1829  //printf("------------------------------------\n");
1830 
1831 
1832  //-----------------------------------
1833  // STEP 3: Analyze and set dependencies between items
1834  //-----------------------------------
1835  finished = 0;
1836  while( !finished )
1837  {
1838  finished = 1;
1839  for( i=0; i<count; i++ )
1840  {
1841 
1842  // now process right parameters
1843  args = rargs;
1844  for( j=i+1; args[i] && j<count; j++ )
1845  {
1846  if( vars[j] && !IS_FUNCTION(vars[j]) )
1847  break;
1848  if( usedby[j]==-1 )
1849  LINK
1850  else
1851  if( (usedby[j]>j || usedby[j]<i) &&
1852  (pris[usedby[j]]<=pris[i]) )
1853  RELINK;
1854  } // for largs
1855 
1856  // now process left parameters
1857  args = largs;
1858  for( j=i-1; args[i] && j>=0; j-- )
1859  {
1860  //if(vars[j])
1861  //{
1862  //printf("j=%d cond=%d var=",j,!IS_FUNCTION(vars[j]));
1863  //dumpln(vars[j]);
1864  //}
1865  if( vars[j] && !IS_FUNCTION(vars[j]) )
1866  break;
1867  if( usedby[i]==j ) break;
1868  if( usedby[j]==-1 )
1869  {
1870  /*if( usedby[i]!=j )*/ LINK; // skip direct circular references
1871  }
1872  else
1873  if( (usedby[j]<j || usedby[j]>i) &&
1874  (pris[usedby[j]]<pris[i]) )
1875  {
1876  /*if( usedby[i]!=j )*/ RELINK; // skip direct circular reference
1877  }
1878  } // for largs
1879  } // for each element
1880  } // while not finished
1881 
1882  #ifdef DEBUG_PARSE
1883  printf("<PARSE> FINAL PARSE_LIST:\n");
1884  dump_arrays( );
1885  #endif
1886 
1887 
1888  //-----------------------------------
1889  // STEP 4: Check for extra or missing arguments
1890  //-----------------------------------
1891  #ifdef DEBUG_PARSE
1892  printf("<PARSE> START STEP 4:\n");
1893  dump_arrays( );
1894  #endif
1895 
1896  args = rargs;
1897  int top_level_pos = -1;
1898  for( j=0; j<count; j++ )
1899  {
1900  // check whether there are two or more free items
1901  // only one is allowed for non-top levels
1902  if( usedby[j]==-1 )
1903  {
1904  if( !top_level && top_level_pos>=0 )
1905  {
1906  // we are ready to announce that there are extra values
1907  // but before doing this check whether there is function
1908  // that would be so polite to take any of them
1909  //printf(">>> top_level_pos=%d\n",top_level_pos);
1910  if( rargs[top_level_pos]==0 &&
1911  vars[top_level_pos] &&
1912  GET_FLAGS(vars[top_level_pos],FLAG_MAY_HAVE_EXTRA_ARG) )
1913  {
1914  i = top_level_pos;
1915  //printf(">>> attach %d to %d\n",j,i);
1916  //printf(">>> rargs
1917  LINK;
1918  //printf(">>>>>>>"); dumpln(NAME(vars[top_level_pos]));
1919  }
1920  else
1921  {
1922  err = new_error( ERROR_CROWDED_EXPRESSION, data[j] );
1923  break;
1924  }
1925  }
1926  if( top_level_pos<0 ) top_level_pos = j;
1927  }
1928 
1929  // left paramers can be skipped only for unary functions
1930  if( largs[j] && !GET_FLAGS(vars[j],FLAG_CAN_BE_UNARY) )
1931  {
1932  err = new_error( ERROR_MISSING_LEFTS, data[j] );
1933  break;
1934  }
1935 
1936  // some functions may ignore 1 missing right parameter
1937  if( rargs[j]==1 && GET_FLAGS(vars[j],FLAG_MAY_SKIP_LAST_ARG) )
1938  continue;
1939 
1940  // when arguments can be infinite, this also mean any number
1941  if( rargs[j] && !GET_FLAGS(vars[j],FLAG_INFINITE_ARGS) )
1942  {
1943  //printf("rargs=%d\n",rargs[j]);
1944  err = new_error( ERROR_MISSING_RIGHTS, data[j] );
1945  break;
1946  }
1947  }
1948 
1949  if( err ) break;
1950 
1951 
1952  //-----------------------------------
1953  // STEP 5: Recursivelly process nested expressions
1954  // and arguments which must be lisp'ed too
1955  //-----------------------------------
1956  #ifdef DEBUG_PARSE
1957  printf("<PARSE> START STEP 5:\n");
1958  dump_arrays( );
1959  #endif
1960  for( i=0; i<count; i++ ) if( IS_LIST(data[i]) )
1961  {
1962  if( GET_FLAGS(data[i],FLAG_EXPRESSION) )
1963  { // nested expression
1964  atom_t expr = parse( data[i], parent, 0 );
1965 
1966  if( IS_ERROR(expr) )
1967  {
1968  err = expr;
1969  }
1970  else
1971  {
1972  DEUSE( data[i] );
1973  data[i] = expr;
1974  SET_FLAGS( data[i], FLAG_EXPRESSION );
1975  }
1976  }
1977  else
1978  { // possibly argument to be reparsed
1979  j=usedby[i];
1980  if( (j>=0) && vars[j] && GET_FLAGS(vars[j],FLAG_PROCESS_ARGS) )
1981  {
1982  atom_t expr = build_syntax_subtree( parent, data[i] );
1983  if( IS_ERROR(expr) )
1984  {
1985  err = expr;
1986  }
1987  else
1988  {
1989  DEUSE( data[i] );
1990  data[i] = expr;
1992  }
1993  }
1994  }
1995  }
1996  if( err ) break;
1997 
1998 
1999  //-----------------------------------
2000  // STEP 6: Group items of subexpressions, process numbers
2001  //-----------------------------------
2002  #ifdef DEBUG_PARSE
2003  printf("<PARSE> START STEP 6:\n");
2004  dump_arrays( );
2005  #endif
2006  for( i=0; i<count; i++ )
2007  {
2008  /*
2009  // convert all numeric data into numbers
2010  float64_t n;
2011  if( IS_ANY_WORD(data[i]) && atom_to_float(data[i],&n) )
2012  {
2013  DEUSE( data[i] );
2014  data[i] = new_float( n );
2015  }
2016 
2017  // create a list node for each element
2018  // whatever the element is, this list node will be used
2019  data[i] = new_list( data[i], empty_list );
2020  SET_FLAGS( data[i], FLAG_EXPRESSION );
2021  */
2022 
2023  // create a list node for each element
2024  // whatever the element is, this list node will be used
2025  float64_t n;
2026  if( IS_ANY_WORD(data[i]) && atom_to_float(data[i],&n) )
2027  {
2028  atom_t pos = data[i]; // old source of the number
2029  data[i] = new_list_ex( new_float(n), empty_list );
2030  POS( data[i] ) = pos;
2031  }
2032  else
2033  {
2034  data[i] = new_list( data[i], empty_list );
2035  }
2036  SET_FLAGS( data[i], FLAG_EXPRESSION );
2037  }
2038 
2039  for( i=0; i<count; i++ )
2040  {
2041  if( usedby[i]>=0 )
2042  {
2043  atom_t a = data[usedby[i]];
2044  atom_t name = CAR(data[i]);
2045  atom_t function = vars[usedby[i]];
2046  //printf("name(%d)=",i);dumpln(name);
2047  //printf("function(%d)=",usedby[i]);dumpln(function);
2048  //printf("a(%d)=",usedby[i]);dumpln(a);
2049 
2050  // if the item is word constant and is used by
2051  // a function which creates variables, then do
2052  // create the variables now.
2053  if( GET_FLAGS(function,FLAG_SET_ALL_VARS)
2054  && IS_ANY_WORD(name)
2055  && LENGTH(name)>1
2056  && *STRING(name)==TEXT('"') )
2057  {
2058  //printf("create local "); dumpln(name);
2059  //printf(" in "); dumpln(NAME(parent));
2060  atom_t var = new_local_var( name, parent, 1 );
2061  if( IS_ERROR(var) ) err = var;
2062  SET_FLAGS( var, FLAG_VARIABLE );
2063  }
2064 
2065  // if the item is word constant and is used by
2066  // a function which creates a single variable
2067  // (like MAKE), then do create the variable as
2068  // global if it does not exist
2069  // 2010.06.26: create the variable only if the
2070  // parsing has not been activated at run-time.
2071  atom_t real_name = NULL;
2073  && GET_FLAGS(function,FLAG_SET_ONE_VAR)
2074  && (i==usedby[i]+1)
2075  && (IS_INTEGER(name) || IS_FLOAT(name)) )
2076  {
2077  real_name = atom_to_word( name );
2078  goto use_numeric_name;
2079  }
2080 
2081  if(
2082  GET_FLAGS(function,FLAG_SET_ONE_VAR)
2083  && (i==usedby[i]+1)
2084  && IS_ANY_WORD(name)
2085  && LENGTH(name)>1
2086  && *STRING(name)==TEXT('"') )
2087  {
2088  real_name = new_subword( name, STRING(name)+1, LENGTH(name)-1 );
2089  use_numeric_name:
2090  if( !find_var( real_name, parent ) )
2091  {
2092  atom_t var = new_local_var( real_name, parent/*root*/, 0 ); //printf("!2087!");
2093  if( IS_ERROR(var) ) err = var;
2094  SET_FLAGS( var, FLAG_VARIABLE );
2095  }
2096  DEUSE( real_name );
2097 
2098  // patch for FOR command. If the control
2099  // variable is ABC then create ABC and
2100  // ^ABC
2101  if( ADDRESS(function)==(int)rt_for )
2102  {
2103  assert( IS_ANY_WORD(name) );
2104  atom_t real_name = new_word( STRING(name), LENGTH(name) );
2105  *STRING(real_name) = L'^';
2106  if( !find_var( real_name, parent ) )
2107  {
2108  atom_t var = new_local_var( real_name, parent/*root*/, 0 ); //printf("!2103!");
2109  if( IS_ERROR(var) ) err = var;
2110  SET_FLAGS( var, FLAG_VARIABLE );
2111  }
2112  DEUSE( real_name );
2113  }
2114 
2115  }
2116 
2117  // if the item is word constant and is used by
2118  // a function which load libraries, then do
2119  // load the library now.
2120  if( ADDRESS(function)==(int)rt_load
2121  && IS_ANY_WORD(name)
2122  && LENGTH(name)>1
2123  && *STRING(name)==TEXT('"') )
2124  {
2125  atom_t real_name = new_word( STRING(name)+1, LENGTH(name)-1 );
2126  atom_t word = read_word( STRING(real_name) );
2127  DEUSE( real_name );
2128 
2129  if( IS_ERROR(word) ) { err = USE(word); break; }
2130  atom_t trimmed = trim_shell_comment( word );
2131  DEUSE( word );
2132 
2133  atom_t tokens1 = tokenize( trimmed, TOKENIZE_DATA );
2134  DEUSE( trimmed );
2135  if( IS_ERROR(tokens1) )
2136  {
2137  err = tokens1;
2138  break;
2139  }
2140 
2141  atom_t tokens2 = tokenize( tokens1, TOKENIZE_COMMANDS );
2142  DEUSE( tokens1 );
2143  if( IS_ERROR(tokens2) ) return tokens2;
2144 
2145  tokens2 = preparse( tokens2, root, LEVEL(root) );
2146  atom_t body = parentheses( tokens2 );
2147  DEUSE( tokens2 );
2148 
2149  //printf("BODY=");dumpln(body);
2150  //printf("input=");dumpln(input);
2151 
2152  if( IS_NOT_EMPTY(body) )
2153  {
2154  atom_t a;
2155  for( a = body; IS_NOT_EMPTY(CDR(a)); a=CDR(a) );
2156  CDR(a) = USE(input);
2157  input = body;
2158  }
2159 
2160  //printf("old aux="); dumpln(aux);
2161  aux = new_list( body, aux );
2162  //printf("new aux="); dumpln(aux);
2163  for( i=0; i<count; i++ ) DEUSE( data[i] );
2164  goto next_line;
2165  }
2166 
2167  SET_FLAGS( a, FLAG_EXPRESSION );
2168  while( IS_NOT_EMPTY(CDR(a)) ) a=CDR(a);
2169  CDR(a) = vars[i] ? new_list(data[i],empty_list) : data[i];
2170  }
2171  }
2172  if( err ) break;
2173 
2174 
2175  //-----------------------------------
2176  // STEP 7: Group top-most items into result list
2177  //-----------------------------------
2178  #ifdef DEBUG_PARSE
2179  printf("<PARSE> START STEP 7:\n");
2180  dump_arrays( );
2181  #endif
2182  //printf("data[0]=");dumpln(data[0]);
2183  //printf("vars[0]=");dumpln(vars[0]);
2184  for( i=0; i<count; i++ )
2185  if( usedby[i]==-1 )
2186  {
2187  //printf("i=%d var=%x is_expr=%d\n",i,vars[i],IS_EXPRESSION(CAR(data[i])));
2188  //boza if( !vars[i] && !IS_EXPRESSION(CAR(data[i])))
2189  //boza {
2190  //printf("i=%d\n",i);
2191  //boza err = new_error_atom( ERROR_UNUSED_VALUE, data[i] );
2192  //boza break;
2193  //boza }
2194  if( IS_UNBOUND(poses[i]) )
2195  {
2196  append( data[i], &result, &last );
2197  }
2198  else
2199  {
2200  append_ex( data[i], &result, &last );
2201  DEUSE( POS(last) );
2202  POS( last ) = USE( poses[i] );
2203  }
2204  }
2205 
2206  if( err ) break;
2207  } // main line
2208 
2209 
2210  //-----------------------------------------
2211  // STEP pre-8: Test for error and release unused resources
2212  //-----------------------------------------
2213  DEUSE( orig_input );
2214  DEUSE( aux );
2215  if( err )
2216  {
2217  //printf("ERRRRRRROR\n");
2218  for( i=0; i<count; i++ ) DEUSE( data[i] );
2219  DEUSE( result );
2220  return err;
2221  }
2222 
2223  //-----------------------------------
2224  // STEP 8: Remove unnecessary parentheses
2225  //-----------------------------------
2226  //printf("before=");dumpln(result);
2227  //
2228  // step 8 removed, because in some cases it removes more
2229  // parentheses than needed - e.g. when function body has
2230  // only one command
2231  // while( IS_EMPTY(CDR(result)) &&
2232  // IS_LIST(CAR(result)) &&
2233  // GET_FLAGS(CAR(result),FLAG_EXPRESSION) )
2234  // {
2235  // atom_t a = CAR(result);
2236  // CAR(result) = empty_list;
2237  // DEUSE( result );
2238  // result = a;
2239  // }
2240  //
2241  //printf(" after=");dumpln(result);
2242 
2243  if( !top_level ) SET_FLAGS( result, FLAG_EXPRESSION );
2244 
2245  #ifdef DEBUG_PARSE
2246  printf("<PARSE> EXIT=");
2247  dumpln(result);
2248  #endif
2249 
2250  if( OPTION_DUMP_AST )
2251  {
2252  outter( TEXT("Abstract Syntax Tree:\n\0"), UNKNOWN );
2253  dumpln( result );
2254  outter( TEXT("\n\0"), UNKNOWN );
2255  }
2256 
2257  return result;
2258 }
atom_t build_syntax_tree ( atom_t  func)
Parameters
funcvar atom for the parse context
Returns
empty_list or an error atom

Parses completely a function. Its source is stored in its body as word, subword, data-tokenized list or command-tokenized list. Building algorithm:

  • tokenization of body as commands
  • extracting all TO ... ENDs and create them as subfunctions
  • parsing the func's body into abstract syntax tree
  • recursively build trees of subfunctions

Definition at line 2278 of file parser.c.

2279 {
2280  // exit is function is already treefied
2281  if( IS_NOT_EMPTY(TREE(func)) ) return empty_list;
2282 
2283  //printf("BUILD_SYNTAX_TREE(FUNC=");
2284  //dump(NAME(func));
2285  //printf(",SOURCE=");
2286  //dump(SOURCE(func));
2287  //printf(",BODY=");
2288  //dump(BODY(func));
2289  //printf(",LOCALS=");
2290  //dump(LOCALS(func));
2291  //printf(")\n\n");
2292 
2293  if( IS_EMPTY(BODY(func)) )
2294  {
2295  // Step 1. Tokenize
2296  //printf("SOURCE="); dumpln(SOURCE(func));
2297  atom_t tokens1 = tokenize( SOURCE(func), TOKENIZE_DATA );
2298  if( IS_ERROR(tokens1) ) return tokens1;
2299  //printf("TOKENS1="); dumpln(tokens1);
2300 
2301  atom_t tokens2 = tokenize( tokens1, TOKENIZE_COMMANDS );
2302  DEUSE( tokens1 );
2303  if( IS_ERROR(tokens2) ) return tokens2;
2304  //printf("TOKENS2="); dumpln(tokens2);
2305 
2306  // Step 2. Extract TO..END's
2307  atom_t body = preparse( tokens2, func, LEVEL(func) );
2308  if( IS_ERROR(body) ) return body;
2309  DEUSE( BODY(func) );
2310  BODY(func) = body;
2311  //printf("BODY="); dumpln(BODY(func));
2312  }
2313 
2314 
2315  // Step 3. Parse function body
2316  //printf("BODY="); dumpln(BODY(func));
2317  atom_t tree = parse( BODY(func), func, 1 );
2318  if( IS_ERROR(tree) ) return tree;
2319  DEUSE(TREE(func));
2320  TREE(func) = tree;
2321  //printf("TREE="); dumpln(TREE(func));
2322 
2323  // because the might be some new TO..ENDs
2324  // scan all locals and build those which
2325  // have no trees
2326  atom_t local;
2327  atom_t locals;
2328  for( locals=LOCALS(func); IS_NOT_EMPTY(locals); locals=CDR(locals) )
2329  {
2330  local = CAR(locals);
2331  if( !DESCR2(local) ) continue;
2332  atom_t x = build_syntax_tree( local );
2333  if( IS_ERROR(x) ) return x;
2334  }
2335 
2336  return empty_list;
2337 }

Variable Documentation

char_t debar[32]
Initial value:
=<>|? */

table for |a|->a conversions

Definition at line 211 of file parser.c.


[ HOME | INDEX | ATOMS | VARS | REFERENCE ]
Lhogho Developer's Documentation
Wed Jul 10 2013