Lhogho  0.0.028
 All Data Structures Files Functions Variables Typedefs Macros Pages
parser.c
Go to the documentation of this file.
1 //
2 // Project: Lhogho
3 // File: parser.c
4 //
5 // Copyright (C) 2007 P.Boytchev
6 //
7 // Revision history:
8 // 2006-10-13 - file created
9 // 2006-10-18 - added error checking in tokenizer
10 // 2006-10-26 - tokenizer returns error atoms
11 // - added trim_shell_comment()
12 // 2007-02-28 - tokenization uses microprogrammable
13 // cell automata
14 // 2007-03-13 - parse()
15 // 2007-03-19 - fixed parentheses() to preserve flags
16 // 2007-05-13 - fixed infinite arg for more than 1 funcs in (...)
17 // 2007-05-17 - license info
18 // 2007-05-22 - doxygen-friendly documentation
19 // 2007-05-27 - parsing TO...END
20 // 2007-05-31 - support for --dump-ast option
21 // 2007-06-03 - errors renamed
22 // 2007-06-05 - definitions spread to where they belong
23 // 2007-06-08 - fixed bugs #1733243, #1733248
24 // 2007-06-09 - fixed bug #1734106 Allows duplicate names of inputs
25 // 2007-06-10 - function_syntax_tree() renamed to build_syntax_tree()
26 // - fixed bug #1734521 Tokenization crashes
27 // 2007-06-19 - removed step 8 from parse()
28 // 2007-06-24 - fixed bug #1742055 Token11 test cases fail under Windows
29 // - fixed bug #1742052 Token09-CRLF.lgo crashes under Windows
30 // - fixed bug #1739110 LOCAL :P creates local variable P
31 // 2007-07-30 - fixed bug #1763506 Memory leak after duplicated locals
32 // 2007-07-31 - fixed bug #1764709 Wrong error if double-nested
33 // 2007-08-07 - fixed bug #1769531 Wrong stack offsets of parameters
34 // 2007-08-11 - fixed bug #1771146 No error for print 1/*sin 45
35 // 2007-08-14 - fixed bug #1773638 Error parsing make "printwidthlimit -1
36 // 2007-09-03 - Fixed bug #1787023 ARCTAN and RADARCTAN
37 // 2007-09-06 - Fixed bug #1787968 Parsing MAKE "Z 1+1 fails
38 // 2007-09-09 - Fixed bug #1790939 Wrong handling of prenthesised unknown functions
39 // - Fixed bug #1790970 Memory leak and unreported error
40 // 2007-09-10 - Fixed bug #1791439 (print 1 2+3) not parsed correctly
41 // 2007-09-14 - Fixed bug #1794499 Problem with < > <> <= >=
42 // 2007-09-21 - Fixed bug #1799380 Redefined functions not compiled correctly
43 // 2007-09-23 - Support for IF
44 // - Added tokenization of templates
45 // 2007-10-11 - Fixed bug #1810683 Failed assertion when redefining a function
46 // - Fixed bug #1810685 Wrong error message in function redefinition
47 // 2007-12-02 - Fixed bugs #1838919, #1838911, #1838910, #1838909,
48 // #1838908, #1838907, #1838906 and #1831028
49 // 2007-12-04 - Fixed bug #1843897 Crash in multiline parentheses (...)
50 // 2007-12-14 - Support for FULLTEXT
51 // 2008-01-16 - Fixed bug #1872272 Assertion failed for PRINT "[
52 // 2008-01-23 - Fixed bug #1836433 Example with IFTRUE fails
53 // 2008-04-14 - Fixed bug #1902856 Bars in comment do not work
54 // 2008-04-28 - Fixed bug #1953268 Wrong error position in bug1872138.lgo
55 // 2008-05-03 - Fixed bug #1941373 Fails testcase goto-tag-04
56 // 2008-08-14 - Fixed bug #2051144 IF consumes extra inputs
57 // 2009-08-02 - Fixed bug #2794985 CALHC fails with wrong input
58 // 2009-08-29 - Fixed bug #2845099 Memory leak in THROW
59 // - Fixed bug #2845579 DEBUG_MEMORY_LEAKS finds two bad tests
60 // - Fixed bug #2846281 Errors not (de)used correctly
61 // 2009-08-30 - Fixed bug #2847199 Crash in PARSE
62 // 2009-08-31 - Fixed bug #2847770 Problem loading executeless libraries
63 // 2011-01-30 - Fixed bug #3167459 Inconsistency in numeric variable names
64 // 2011-03-02 - Fixed bug #3188211 LOAD crashes if file size is >1MB
65 // - Fixed bug #3197401 Warnings and errors compiling the developer's documentation
66 // 2011-10-13 - Fixed Doxygen warnings
67 // 2011-12-01 - Fixed bug #3442803 END without TO causes assertion fail
68 // 2011-12-03 - Fixed bug #3442773 CLI fails on "for"
69 // 2012-01-02 - Command-line options are translatable
70 // 2012-01-05 - More synonyms for primitives
71 //
72 //
73 //
74 // This program is free software; you can redistribute it and/or modify
75 // it under the terms of the GNU General Public License as published by
76 // the Free Software Foundation; either version 2 of the License, or
77 // (at your option) any later version.
78 //
79 // This program is distributed in the hope that it will be useful,
80 // but WITHOUT ANY WARRANTY; without even the implied warranty of
81 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
82 // GNU General Public License for more details.
83 //
84 // You should have received a copy of the GNU General Public License
85 // along with this program; if not, write to the Free Software
86 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
87 //
88 
89 
90 #include <stdlib.h>
91 #include <stdio.h>
92 #include <assert.h> // assert()
93 #if defined(WINDOWS) || defined(LINUX)
94 #include <malloc.h> // alloca()
95 #endif
96 #include "globals.h"
97 #include "atoms.h"
98 #include "words.h"
99 #include "lists.h"
100 #include "unicode.h"
101 #include "parser.h"
102 #include "vars.h"
103 #include "options.h"
104 #include "numbers.h"
105 #include "errors.h"
106 #include "runtime.h"
107 #include "compiler.h"
108 
109 
110 
111 
112 #define GET_CHAR ch=*source
113 #define VAR vars[count]
114 
115 
116 atom_t build_syntax_subtree( atom_t function, atom_t source ); // forward def
117 
118 
119 //===================================================
121 //===================================================
122 #define TOKEN_END 0
123 #define TOKEN_WORD 1
124 #define TOKEN_DIRTY_WORD 2
125 #define TOKEN_OPEN 3
126 #define TOKEN_CLOSE 4
127 #define TOKEN_LINEEND 5
128 #define TOKEN_SPACE 6
129 #define TOKEN_ERROR 7
130 
131 
132 
133 
134 //===================================================
136 //===================================================
137 #define MODE_ENTRY 0
138 #define MODE_SPACE 1
139 #define MODE_WORD 2
140 #define MODE_BARRED 3
141 #define MODE_BACKSLASHED 4
142 #define MODE_TILDE 5
143 #define MODE_SEMITILDE 6
144 #define MODE_SEMICOLON 7
145 #define MODE_TILDESPACE 8
146 #define MODE_LESS 9
147 #define MODE_GREATER 10
148 #define MAX_MODE 11
149 
150 
151 
152 
153 //===================================================
155 //===================================================
156 #define PAT_TILDE 0x000001
157 #define PAT_PUSH 0x000002
158 #define PAT_NEXT 0x000004
159 #define PAT_DIRTY 0x000008
160 #define PAT_MUTATED 0x000010
161 #define PAT_RETURN_TO_WORD 0x000020
162 #define PAT_RETURN_TO_SELF 0x000040
163 #define PAT_GOTO 0x000080
164 #define PAT_TOKEN 0x000100
165 #define PAT_RETURN 0x000200
166 #define PAT_ERROR_POS 0x000400
167 #define PAT_ERROR 0x000800
168 #define PAT_RETURN_TO_CALLER 0x001000
169 #define PAT_NEXT2 0x002000
170 
171 
172 
173 
174 //===================================================
176 //===================================================
177 // @{
178 #define PAT_SHIFT 16
179 
180 #define PAT_TOKEN_END PAT_TOKEN+(TOKEN_END<<PAT_SHIFT)
181 #define PAT_TOKEN_SPACE PAT_TOKEN+(TOKEN_SPACE<<PAT_SHIFT)
182 #define PAT_TOKEN_WORD PAT_TOKEN+(TOKEN_WORD<<PAT_SHIFT)
183 #define PAT_TOKEN_LINE PAT_TOKEN+(TOKEN_LINEEND<<PAT_SHIFT)
184 #define PAT_TOKEN_OPEN PAT_TOKEN+(TOKEN_OPEN<<PAT_SHIFT)
185 #define PAT_TOKEN_CLOSE PAT_TOKEN+(TOKEN_CLOSE<<PAT_SHIFT)
186 
187 #define PAT_GOTO_SPACE PAT_GOTO+(MODE_SPACE<<PAT_SHIFT)
188 #define PAT_GOTO_WORD PAT_GOTO+(MODE_WORD<<PAT_SHIFT)
189 #define PAT_GOTO_BACKSLASHED PAT_GOTO+(MODE_BACKSLASHED<<PAT_SHIFT)
190 #define PAT_GOTO_BARRED PAT_GOTO+(MODE_BARRED<<PAT_SHIFT)
191 #define PAT_GOTO_SEMICOLON PAT_GOTO+(MODE_SEMICOLON<<PAT_SHIFT)
192 #define PAT_GOTO_TILDE PAT_GOTO+(MODE_TILDE<<PAT_SHIFT)
193 #define PAT_GOTO_TILDESPACE PAT_GOTO+(MODE_TILDESPACE<<PAT_SHIFT)
194 #define PAT_GOTO_SEMITILDE PAT_GOTO+(MODE_SEMITILDE<<PAT_SHIFT)
195 #define PAT_GOTO_LESS PAT_GOTO+(MODE_LESS<<PAT_SHIFT)
196 #define PAT_GOTO_GREATER PAT_GOTO+(MODE_GREATER<<PAT_SHIFT)
197 // @}
198 
199 
200 
201 
202 
203 //===================================================
212 //===================================================
213 char_t enbar[128];
214 char_t debar[32];
215 
216 
217 //===================================================
223 //===================================================
224 void init_parser( )
225 {
226  int i;
227 
228  // NOTE! if the string of enbarrable characters is
229  // changed, also change dump_word()
230  chars_t s = TEXT("______________()+-*/=<>|?_______");
231  //locked-> x........xx..x..................
232 
233  for( i=0; i<32; i++ ) debar[i]=i; // identity matrix
234  for( i=0; i<128; i++ ) enbar[i]=i; // identity matrix
235 
236  for( i=0; *s; i++,s++ )
237  {
238  if( *s==TEXT('_') ) continue;
239  debar[i] = *s;
240  enbar[(unsigned char)*s] = i;
241  }
242 }
243 
244 
245 
246 //===================================================
274 //===================================================
275 atom_t tokenize( atom_t input, int method )
276 {
277  if( IS_LIST(input) )
278  {
279  if( method==TOKENIZE_DATA &&
281  return USE(input);
282  if( method==TOKENIZE_COMMANDS &&
284  return USE(input);
285  }
286 
287  if( IS_FLOAT(input) )
288  {
289  return USE(input);
290  }
291 
292  //printf("\n");
293  //if(method==TOKENIZE_DATA)
294  // printf("ENTER TOKENIZE_DATA(");
295  //else
296  // printf("ENTER TOKENIZE_COMMANDS(");
297  //dump_atom(input,1);
298  //printf(")\n");
299 
300 
301  #ifdef SAFEMODE
302  assert( IS_WORD(input) || IS_SUBWORD(input) || IS_LIST(input) );
303  #endif
304 
305  // First check whether the input is a list.
306  // If it is then tokenize recursively all its elements.
307  if( IS_LIST(input) )
308  {
309  atom_t result = empty_list;
310  atom_t last = empty_list;
311  atom_t x;
312  atom_t y;
313  for( x=input; IS_NOT_EMPTY(x); x=CDR(x) )
314  {
315  //printf("@@@@@@@@PROCESSING=|"); dump_atom(CAR(x),1); printf("|\n");
316 
317  int submethod = IS_LIST(CAR(x))?TOKENIZE_DATA:method;
318  atom_t element = tokenize( CAR(x), submethod );
319  //printf("@@@@@@@@TOKENIZED_INTO=|"); dump_atom(element,1); printf("|\n");
320 
321  if( IS_ERROR(element) )
322  {
323  DEUSE( result );
324  //DEUSE( last );
325  result = element;
326  break;
327  }
328 
329  int initial_flags = GET_FLAGS( x, FLAG_NEWLINE|FLAG_AFTER_SPACE );
330  int final_flags = GET_FLAGS( x, FLAG_BEFORE_SPACE );
331 
332  //printf(">>>CAR(x) = "); dump_atom(CAR(x),1); printf("\n");
333  //printf(">>>element = "); dump_atom(element,1); printf("\n");
334  if( IS_FLOAT(element) )
335  {
336  append( USE(element), &result, &last );
337  SET_FLAGS( last, initial_flags|final_flags );
338  }
339  else if( IS_LIST(CAR(x)) )
340  {
341  if( IS_EXTENDED(x) )
342  {
343  append_ex( USE(element), &result, &last );
344  DEUSE( POS(last) );
345  POS( last ) = USE( POS(x) );
346  }
347  else
348  append( USE(element), &result, &last );
349  SET_FLAGS( last, initial_flags|final_flags );
350  }
351  else
352  {
353  for( y=element; IS_NOT_EMPTY(y); y=CDR(y) )
354  {
355  //printf("APPEND SUBELEMENT |");
356  //dump_atom(CAR(y),1);
357  //printf("|\n");
358 
359  if( IS_EXTENDED(y) )
360  {
361  append_ex( USE(CAR(y)), &result, &last );
362  DEUSE( POS(last) );
363  POS( last ) = USE( POS(y) );
364  }
365  else
366  append( USE(CAR(y)), &result, &last );
367  SET_FLAGS( last, FLAGS(y) );
368  if( y==element ) SET_FLAGS( last, initial_flags );
369  }
370  #ifdef SAFE_MODE
371  assert( IS_NOT_EMPTY(last) );
372  #endif
373  SET_FLAGS( last, final_flags);
374  }
375 
376  DEUSE( element );
377  }
378 
379  //printf("FINAL RESULT IS |"); dump_atom(result,1); printf("|\n");
380  return result;
381  }
382 
383  // The input is a word or a subword
384  chars_t source = STRING(input);
385  int len = LENGTH(input);
386  int origlen = len;
387 
388  chars_t buffer = ALLOC( CHAR_SIZE*len ); // buffer for the longest word
389  //chars_t bp = buffer;
390  char_t ch;
391 
392  int_t errpos = -1;
393  //char_t errchar = NULL_CHAR;
394 
395  int last_token = TOKEN_LINEEND;
396  //int crlf = 0;
397 
398  // Gets the next token. Return:
399  // TOKEN_END if there are no more tokens
400  // TOKEN_WORD if the token is a word
401  // TOKEN_DIRTY_WORD if the token is a word with \ or |
402  // TOKEN_OPEN if the token is [
403  // TOKEN_CLOSE if the token is ]
404  // TOKEN_LINEEND if the token is <nl>
405  // TOKEN_SPACE if at least one whitespace is met
406 
407 
408  //int co=0;
409  int get_token( atom_t *token, int method )
410  {
411  //co++;
412  //if (0 == co%1024)
413  //{
414  //printf("%d ",co);
415  //}
416  // return 1 if buffer contains number
417  int is_number(chars_t bp)
418  { // "E" {digit}* "." {digit}+
419  // 1 2 3 4
420  chars_t cp = bp;
421  int num_mode = 1;
422  char_t ch;
423 
424  cp = bp;
425 
426  if( bp==buffer ) return 0;
427 
428  while( cp>buffer )
429  {
430  cp--;
431  ch = *cp;
432  //printf("num_mode=%d ch=%C\n",num_mode,ch);
433  switch( num_mode )
434  {
435  case 1:
436  if( ch!=TEXT('E') && ch!=TEXT('e') ) return 0;
437  num_mode = 2;
438  break;
439  case 2: ;
440  if( ch<TEXT('0') || ch>TEXT('9') ) return 0;
441  num_mode = 3;
442  break;
443  case 3: ;
444  if( ch!=TEXT('.') ) return 0;
445  num_mode = 4;
446  break;
447  case 4: ;
448  if( ch<TEXT('0') || ch>TEXT('9') ) return 0;
449  break;
450  }
451  //printf("num_mode=%d\n",num_mode);
452  }
453  return 1;
454  }
455 
456  if( !len ) return TOKEN_END;
457 
458  int dirty = 0;
459  int mutated = 0;
460  chars_t bp = buffer; *bp=NULL_CHAR;
461  chars_t sp = source;
462 
463  int mode = MODE_ENTRY; // current mode
464  int code; // action code
465 
466 
467  int stack[MAX_MODE]; // return-to-mode for each mode
468  static int mode_eof[MAX_MODE] =
469  {
470  /* entry */ PAT_TOKEN_END,
471  /* whitespace */ PAT_TOKEN_SPACE,
472  /* word */ PAT_TOKEN_WORD,
473  /* barred */ PAT_ERROR,
474  /* backslashed */ PAT_ERROR,
475  /* tilde */ PAT_ERROR,
476  /* semitilde */ PAT_ERROR,
477  /* semicolon */ PAT_RETURN,
478  /* tildespace */ PAT_ERROR,
479  /* less */ PAT_TOKEN_WORD,
480  /* greater */ PAT_TOKEN_WORD
481  };
482  static int mode_eol[MAX_MODE] =
483  {
484  /* entry */ PAT_NEXT+PAT_TOKEN_LINE,
485  /* whitespace */ PAT_TOKEN_SPACE,
486  /* word */ PAT_TOKEN_WORD,
487  /* barred */ PAT_PUSH+PAT_NEXT,
488  /* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN,
489  /* tilde */ PAT_NEXT+PAT_RETURN,
490  /* semitilde */ PAT_NEXT+PAT_RETURN,
491  /* semicolon */ PAT_RETURN,
492  /* tildespace */ PAT_NEXT+PAT_RETURN,
493  /* less */ PAT_TOKEN_WORD,
494  /* greater */ PAT_TOKEN_WORD
495  };
496  static int mode_space[MAX_MODE] =
497  {
498  /* entry */ PAT_NEXT+PAT_GOTO_SPACE,
499  /* whitespace */ PAT_NEXT,
500  /* word */ PAT_TOKEN_WORD,
501  /* barred */ PAT_PUSH+PAT_NEXT,
502  /* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN,
503  /* tilde */ PAT_NEXT+PAT_GOTO_TILDESPACE,
504  /* semitilde */ PAT_NEXT,
505  /* semicolon */ PAT_NEXT,
506  /* tildespace */ PAT_NEXT,
507  /* less */ PAT_TOKEN_WORD,
508  /* greater */ PAT_TOKEN_WORD
509  };
510  static int mode_open[MAX_MODE] =
511  {
512  /* entry */ PAT_NEXT+PAT_TOKEN_OPEN,
513  /* whitespace */ PAT_TOKEN_SPACE,
514  /* word */ PAT_TOKEN_WORD,
515  /* barred */ PAT_PUSH+PAT_NEXT,
516  /* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN,
517  /* tilde */ PAT_TILDE+PAT_RETURN,
518  /* semitilde */ PAT_RETURN,
519  /* semicolon */ PAT_NEXT,
520  /* tildespace */ PAT_TILDE+PAT_TOKEN_WORD,
521  /* less */ PAT_TOKEN_WORD,
522  /* greater */ PAT_TOKEN_WORD
523  };
524  static int mode_close[MAX_MODE] =
525  {
526  /* entry */ PAT_NEXT+PAT_TOKEN_CLOSE,
527  /* whitespace */ PAT_TOKEN_SPACE,
528  /* word */ PAT_TOKEN_WORD,
529  /* barred */ PAT_PUSH+PAT_NEXT,
530  /* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN,
531  /* tilde */ PAT_TILDE+PAT_RETURN,
532  /* semitilde */ PAT_RETURN,
533  /* semicolon */ PAT_NEXT,
534  /* tildespace */ PAT_TILDE+PAT_TOKEN_WORD,
535  /* less */ PAT_TOKEN_WORD,
536  /* greater */ PAT_TOKEN_WORD
537  };
538  static int mode_bar[MAX_MODE] =
539  {
541  /* whitespace */ PAT_TOKEN_SPACE,
543  /* barred */ PAT_PUSH+PAT_NEXT+PAT_RETURN,
544  /* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN,
545  /* tilde */ PAT_TILDE+PAT_RETURN,
546  /* semitilde */ PAT_RETURN,
548  /* tildespace */ PAT_TILDE+PAT_TOKEN_WORD,
549  /* less */ PAT_TOKEN_WORD,
550  /* greater */ PAT_TOKEN_WORD
551  };
552  static int mode_backslash[MAX_MODE] =
553  {
555  /* whitespace */ PAT_TOKEN_SPACE,
558  /* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN,
559  /* tilde */ PAT_TILDE+PAT_RETURN,
560  /* semitilde */ PAT_RETURN,
561  /* semicolon */ PAT_NEXT2, //+PAT_RETURN_TO_SELF+PAT_GOTO_BACKSLASHED,
562  /* tildespace */ PAT_TILDE+PAT_TOKEN_WORD,
563  /* less */ PAT_TOKEN_WORD,
564  /* greater */ PAT_TOKEN_WORD
565  };
566  static int mode_tilde[MAX_MODE] =
567  {
569  /* whitespace */ PAT_TOKEN_SPACE,
571  /* barred */ PAT_PUSH+PAT_NEXT,
572  /* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN,
573  /* tilde */ PAT_TILDE+PAT_RETURN,
574  /* semitilde */ PAT_RETURN,
576  /* tildespace */ PAT_TILDE+PAT_TOKEN_WORD,
577  /* less */ PAT_TOKEN_WORD,
578  /* greater */ PAT_TOKEN_WORD
579  };
580  static int mode_semicolon[MAX_MODE] =
581  {
583  /* whitespace */ PAT_TOKEN_SPACE,
585  /* barred */ PAT_PUSH+PAT_NEXT,
586  /* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN,
587  /* tilde */ PAT_TILDE+PAT_RETURN,
588  /* semitilde */ PAT_RETURN,
589  /* semicolon */ PAT_NEXT+PAT_RETURN_TO_SELF+PAT_GOTO_SEMITILDE,
590  /* tildespace */ PAT_TILDE+PAT_TOKEN_WORD,
591  /* less */ PAT_TOKEN_WORD,
592  /* greater */ PAT_TOKEN_WORD
593  };
594  static int mode_else[MAX_MODE] =
595  {
596  /* entry */ PAT_PUSH+PAT_NEXT+PAT_GOTO_WORD,
597  /* whitespace */ PAT_TOKEN_SPACE,
598  /* word */ PAT_PUSH+PAT_NEXT,
599  /* barred */ PAT_PUSH+PAT_NEXT,
600  /* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN,
601  /* tilde */ PAT_TILDE+PAT_RETURN,
602  /* semitilde */ PAT_RETURN,
603  /* semicolon */ PAT_NEXT,
604  /* tildespace */ PAT_TILDE+PAT_TOKEN_WORD,
605  /* less */ PAT_TOKEN_WORD,
606  /* greater */ PAT_TOKEN_WORD
607  };
608  static int mode_parens[MAX_MODE] =
609  {
610  /* entry */ PAT_PUSH+PAT_NEXT+PAT_TOKEN_WORD,
611  /* whitespace */ PAT_TOKEN_SPACE,
612  /* word */ PAT_TOKEN_WORD,
613  /* barred */ PAT_PUSH+PAT_NEXT,
614  /* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN,
615  /* tilde */ PAT_TILDE+PAT_RETURN,
616  /* semitilde */ PAT_RETURN,
617  /* semicolon */ PAT_NEXT,
618  /* tildespace */ PAT_TILDE+PAT_TOKEN_WORD,
619  /* less */ PAT_TOKEN_WORD,
620  /* greater */ PAT_TOKEN_WORD
621  };
622  static int mode_equal[MAX_MODE] =
623  {
624  /* entry */ PAT_PUSH+PAT_NEXT+PAT_TOKEN_WORD,
625  /* whitespace */ PAT_TOKEN_SPACE,
626  /* word */ PAT_TOKEN_WORD,
627  /* barred */ PAT_PUSH+PAT_NEXT,
628  /* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN,
629  /* tilde */ PAT_TILDE+PAT_RETURN,
630  /* semitilde */ PAT_RETURN,
631  /* semicolon */ PAT_NEXT,
632  /* tildespace */ PAT_TILDE+PAT_TOKEN_WORD,
633  /* less */ PAT_PUSH+PAT_NEXT+PAT_TOKEN_WORD,
634  /* greater */ PAT_PUSH+PAT_NEXT+PAT_TOKEN_WORD
635  };
636  static int mode_less[MAX_MODE] =
637  {
638  /* entry */ PAT_PUSH+PAT_NEXT+PAT_GOTO_LESS,
639  /* whitespace */ PAT_PUSH+PAT_NEXT+PAT_GOTO_LESS, //PAT_TOKEN_WORD, @boza
640  /* word */ PAT_TOKEN_WORD,
641  /* barred */ PAT_PUSH+PAT_NEXT,
642  /* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN,
643  /* tilde */ PAT_TILDE+PAT_RETURN,
644  /* semitilde */ PAT_RETURN,
645  /* semicolon */ PAT_NEXT,
646  /* tildespace */ PAT_TILDE+PAT_TOKEN_WORD,
647  /* less */ PAT_TOKEN_WORD,
648  /* greater */ PAT_TOKEN_WORD
649  };
650  static int mode_greater[MAX_MODE] =
651  {
652  /* entry */ PAT_PUSH+PAT_NEXT+PAT_GOTO_GREATER,
653  /* whitespace */ PAT_PUSH+PAT_NEXT+PAT_GOTO_GREATER, //PAT_TOKEN_WORD, @boza
654  /* word */ PAT_TOKEN_WORD,
655  /* barred */ PAT_PUSH+PAT_NEXT,
656  /* backslashed */ PAT_PUSH+PAT_NEXT+PAT_RETURN,
657  /* tilde */ PAT_TILDE+PAT_RETURN,
658  /* semitilde */ PAT_RETURN,
659  /* semicolon */ PAT_NEXT,
660  /* tildespace */ PAT_TILDE+PAT_TOKEN_WORD,
661  /* less */ PAT_PUSH+PAT_NEXT+PAT_TOKEN_WORD,
662  /* greater */ PAT_TOKEN_WORD
663  };
664 
665 
666 again:
667  // get action code
668  GET_CHAR;
669 
670 
671 #ifdef DEBUG_TOKENS
672  printf("<TOKENS> length=%d\n",len);
673  if(ch<TEXT(' '))
674  printf("<TOKENS> get(#%d)\t",ch);
675  else
676  printf("<TOKENS> get('%c')\t",ch);
677  switch( mode )
678  {
679  case MODE_ENTRY: printf(" ENTRY -> "); break;
680  case MODE_SPACE: printf(" SPACE -> "); break;
681  case MODE_WORD: printf(" WORD -> "); break;
682  case MODE_BARRED: printf(" BARRED -> "); break;
683  case MODE_BACKSLASHED: printf(" BACKSLASH -> "); break;
684  case MODE_TILDE: printf(" TILDE -> "); break;
685  case MODE_SEMITILDE: printf(" SEMITILDE -> "); break;
686  case MODE_SEMICOLON: printf(" SEMICOLON -> "); break;
687  case MODE_TILDESPACE: printf("TILDESPACE -> "); break;
688  }
689 #endif
690  code = mode_else[mode];
691  if( !len )
692  code = mode_eof[mode];
693  else
694  {
695  if( method==TOKENIZE_READWORD )
696  { // tokenize as expected by READWORD
697  if( ch==TEXT('|') ) code = mode_bar[mode];
698  else if( ch==TEXT('\\') ) code = mode_backslash[mode];
699  }
700  else
701  if( method==TOKENIZE_READLIST )
702  { // tokenize as data
703  if( ch==TEXT('\n') ) code = mode_eol[mode];
704  else if( ch==TEXT('\r') ) code = mode_eol[mode];
705  else if( ch<=TEXT(' ') ) code = mode_space[mode];
706  else if( ch==TEXT('[') ) code = mode_open[mode];
707  else if( ch==TEXT(']') ) code = mode_close[mode];
708  else if( ch==TEXT('|') ) code = mode_bar[mode];
709  else if( ch==TEXT('\\') ) code = mode_backslash[mode];
710  else if( ch==TEXT('~') ) code = mode_tilde[mode];
711  }
712  else
713  if( method==TOKENIZE_DATA )
714  { // tokenize as data
715  if( ch==TEXT('\n') ) code = mode_eol[mode];
716  else if( ch==TEXT('\r') ) code = mode_eol[mode];
717  else if( ch<=TEXT(' ') ) code = mode_space[mode];
718  else if( ch==TEXT('[') ) code = mode_open[mode];
719  else if( ch==TEXT(']') ) code = mode_close[mode];
720  else if( ch==TEXT('|') ) code = mode_bar[mode];
721  else if( ch==TEXT('\\') ) code = mode_backslash[mode];
722  else if( ch==TEXT('~') ) code = mode_tilde[mode];
723  else if( ch==TEXT(';') ) code = mode_semicolon[mode];
724  }
725  else
726  { // tokenize as commands
727  //printf("ch=%c *buf=%c\n",ch,*buffer);
728  if( ch==TEXT('(') ||
729  ch==TEXT(')') ) code = mode_parens[mode];
730  else if( *buffer!=TEXT('"') )
731  {
732  if( (ch==TEXT('+') || ch==TEXT('-')) && is_number(bp) ) { }
733  else if( ch==TEXT('+') ||
734  ch==TEXT('-') ||
735  ch==TEXT('*') ||
736  ch==TEXT('/')) code = mode_parens[mode];
737  else if( ch==TEXT('=') ) code = mode_equal[mode];
738  else if( ch==TEXT('<') ) code = mode_less[mode];
739  else if( ch==TEXT('>') ) code = mode_greater[mode];
740  }
741  }
742  }
743  // process action code
744  int _stack = -1;
745 
746  int newmode = 0;
747  if( code & PAT_GOTO )
748  {
749  // this is pred-processing of GOTO
750  // if old mode was space, and new is not,
751  // then update initial position of next token
752  newmode = (code>>PAT_SHIFT) & 0xF;
753  if( mode==MODE_SPACE && newmode!=MODE_SPACE ) { sp = source; }
754  }
755 
756 
757  if( code & PAT_TILDE )
758  {
759  *bp++ = TEXT('~');
760  #ifdef DEBUG_TOKENS
761  printf("\n<TOKENS> put('%c'/%d)\n",TEXT('~'),TEXT('~'));
762  #endif //DEBUG_TOKENS
763  }
764  if( code & PAT_PUSH )
765  {
766  // push a character only if:
767  // - currently not in bars |..?..|
768  // - currently in bars, but not in semicolon ;..|..?..|
769  if( mode!=MODE_BARRED ||
770  (stack[mode]!=MODE_SEMITILDE &&
771  stack[mode]!=MODE_SEMICOLON) )
772  {
773  //if( mode==MODE_BARRED || mode==MODE_BACKSLASHED )
774  //*bp++ = ENBAR(ch);
775  //else
776  *bp++ = ch;
777  #ifdef DEBUG_TOKENS
778  printf("\n<TOKENS> put('%c'/%d)\n",ch,ch);
779  #endif //DEBUG_TOKENS
780  //if( ch=='\r' && *(source+1)=='\n' ) // handle CRLF cases
781  //{
782  //*bp++ = '\n';
783  //#ifdef DEBUG_TOKENS
784  // printf("\n<TOKENS> put('%d')\n",'\n');
785  // #endif //DEBUG_TOKENS
786  //}
787  }
788  }
789  if( code & PAT_NEXT2 )
790  {
791  source++;
792  len--;
793  }
794  if( code & (PAT_NEXT|PAT_NEXT2) )
795  {
796  if( *source=='\r' )
797  {
798  //crlf = 0;
799  if( *(source+1)=='\n' )
800  {
801  //crlf = 1;
802  source++;
803  len--;
804  }
805  }
806  source++;
807  len--;
808  }
809  if( code & PAT_DIRTY ) dirty = 1;
810  if( code & PAT_MUTATED ) mutated = 1;
811  if( code & PAT_ERROR_POS ) errpos = origlen-len-1;
812  if( code & PAT_RETURN_TO_WORD ) _stack = MODE_WORD;
813  if( code & PAT_RETURN_TO_SELF ) _stack = mode;
814  if( code & PAT_RETURN_TO_CALLER ) _stack = stack[mode];
815  if( code & PAT_GOTO )
816  {
817  // this is post-processing of GOTO
818  mode = newmode;
819  stack[mode] = _stack;
820  }
821  if( code & PAT_RETURN ) mode = stack[mode];
822  if( code & PAT_TOKEN )
823  {
824  int _token = (code>>PAT_SHIFT) & 0xF;
825  if( _token!=TOKEN_WORD ) return _token;
826  if( mutated )
827  {
828  *bp = NULL_CHAR;
829  *token = new_word( buffer, UNKNOWN );
830  #ifdef DEBUG_TOKENS
831  printf("MUTATED TOKEN "); dumpln(*token);
832  printf("\n\n");
833  #endif
834  }
835  else
836  {
837  *token = new_subword( input, sp, source-sp /*bp-buffer*/ );
838  #ifdef DEBUG_TOKENS
839  printf("NORMAL TOKEN **"); dump(*token);
840  printf("** (len=%d)\n\n\n",source-sp);
841  #endif
842  }
843  return dirty?TOKEN_DIRTY_WORD:TOKEN_WORD;
844  }
845  if( code & PAT_ERROR )
846  {
847  #ifdef DEBUG_TOKENS
848  printf("ERROR\n");
849  #endif
850  return TOKEN_ERROR;
851  }
852 
853 #ifdef DEBUG_TOKENS
854  switch( mode )
855  {
856  case MODE_ENTRY: printf("ENTRY\n"); break;
857  case MODE_SPACE: printf("SPACE\n"); break;
858  case MODE_WORD: printf("WORD\n"); break;
859  case MODE_BARRED: printf("BARRED\n"); break;
860  case MODE_BACKSLASHED: printf("BACKSLASH\n"); break;
861  case MODE_TILDE: printf("TILDE\n"); break;
862  case MODE_SEMITILDE: printf("SEMITILDE\n"); break;
863  case MODE_SEMICOLON: printf("SEMICOLON\n"); break;
864  case MODE_TILDESPACE: printf("TILDESPACE\n"); break;
865  }
866 #endif
867  goto again;
868  } // get_token()
869 
870 
871  atom_t get_sublist( int level, int full_parse, atom_t* pos ) //ex2//
872  {
873  atom_t result = empty_list;
874  atom_t last = empty_list;
875  if( pos ) *pos = NULL; //ex2//
876 
877  atom_t token = NULL;
878  atom_t sublist_pos = NULL; //ex2//
879  int flags;
880  int bracketlen = len; // LEN of last opening bar
881 
882  int pos_from = source-STRING(input); //ex2//
883  if( pos_from ) pos_from--; //ex2//
884 
885  flags = 0;//FLAG_NEWLINE;
886  while( (last_token=get_token(&token,full_parse)) )
887  {
888  sublist_pos = NULL;
889 
890  #ifdef DEBUG_TOKENIZATION
891  switch(last_token)
892  {
893  case TOKEN_END: printf("TOKEN_END\n"); break;
894  case TOKEN_SPACE: printf("TOKEN_SPACE\n"); break;
895  case TOKEN_WORD: printf("TOKEN_WORD @"); dump(token); printf("@\n"); break;
896  case TOKEN_DIRTY_WORD: printf("TOKEN_|WORD| @"); dump(token); printf("@\n"); break;
897  case TOKEN_OPEN: printf("TOKEN_OPEN [\n"); break;
898  case TOKEN_CLOSE: printf("TOKEN_CLOSE ]\n"); break;
899  case TOKEN_LINEEND: printf("TOKEN_LINEEND\n"); break;
900  case TOKEN_ERROR: printf("TOKEN_ERROR\n"); break;
901  }
902  #endif
903 
904  if( last_token==TOKEN_ERROR ) return result;
905  if( last_token==TOKEN_SPACE )
906  {
907  if( IS_NOT_EMPTY(last) ) SET_FLAGS( last, FLAG_BEFORE_SPACE );
908  flags |= FLAG_AFTER_SPACE;
909  continue;
910  }
911  if( last_token==TOKEN_DIRTY_WORD )
912  {
913  //printf("###BEFORE="); dumpln(token);
914  atom_t x = purify( token );
915  DEUSE( token );
916  token = x;
917  //printf("###AFTER="); dumpln(token);
918  }
919  if( last_token==TOKEN_CLOSE ) break;
920  if( last_token==TOKEN_LINEEND )
921  {
922  flags |= FLAG_NEWLINE;
923  continue;
924  }
925  if( last_token==TOKEN_OPEN)
926  {
927  token = get_sublist( level+1, TOKENIZE_DATA, &sublist_pos ); // recursive //ex2//
928 
929  if( last_token==TOKEN_ERROR )
930  {
931  DEUSE( token );
932  if( sublist_pos ) DEUSE(sublist_pos);
933  sublist_pos = NULL;
934  break;
935  }
936  }
937 
938  if( method==TOKENIZE_COMMANDS )
939  flags |= FLAG_TOKENIZED_COMMANDS;
940 
941  if( method==TOKENIZE_DATA )
942  flags |= FLAG_TOKENIZED_DATA;
943 
944  if( method==TOKENIZE_COMMANDS &&
945  last_token==TOKEN_WORD &&
946  LENGTH(token)>1 &&
947  *STRING(token)==TEXT('?') &&
948  *(STRING(token)+1)>=TEXT('0') &&
949  *(STRING(token)+1)<=TEXT('9') )
950  {
951  //printf(">>>%d %d\n", last_token==TOKEN_WORD, last_token==TOKEN_DIRTY_WORD);
952  // process template ?nn->(? nn) for command tokenization
953  //printf("append token **"); dump(token); printf("**\n");
954  atom_t new_qoken = new_subword( token, STRING(token), 1 );
955  atom_t new_token = new_subword( token, STRING(token)+1, LENGTH(token)-1 );
956  DEUSE( token );
957 
958  append( new_word(TEXT("("),-1), &result, &last ); // (
959  append( new_qoken, &result, &last ); // ?
960  append( new_token, &result, &last ); // nn
961  append( new_word(TEXT(")"),-1), &result, &last ); // )
962  }
963  else
964  {
965  // normal token, no more processing needed
966  if( sublist_pos ) //ex//
967  {
968  append_ex( token, &result, &last );
969  POS( last ) = sublist_pos;
970 
971  //printf("\n\nSET EXTENDED POSITION ");
972  //dump_atom(sublist_pos,1); printf("\n");
973  //printf("CURRENT RESULT ");
974  //dump_atom(result,1); printf("\n\n");
975 
976  sublist_pos = NULL;
977  }
978  else
979  {
980  append( token, &result, &last );
981  }
982  SET_FLAGS( last, flags );
983  }
984 
985  flags = 0;
986  if( last_token==TOKEN_ERROR ) break;
987  }
988 
989  // test for unmatching square brackets
990  // i.e. ...[... or ...]...
991  if( level )
992  {
993  if( last_token==TOKEN_END )
994  {
995  errpos = origlen-bracketlen-1;
996  last_token = TOKEN_ERROR;
997  }
998  }
999  else
1000  {
1001  if( last_token==TOKEN_CLOSE )
1002  {
1003  errpos = origlen-len-1;
1004  last_token = TOKEN_ERROR;
1005  }
1006  }
1007 
1008  int pos_to = source-STRING(input);
1009  if( pos_to ) pos_to--;
1010 
1011  if( pos && method == TOKENIZE_DATA ) //ex2//
1012  {
1013  *pos = new_subword( input, STRING(input)+pos_from, pos_to-pos_from+1 );
1014  }
1015 
1016  return result;
1017  } // get_sublist()
1018 
1019  atom_t result = get_sublist( 0, method, NULL ); //ex//
1020 
1021  // in case of error return empty list
1022  if( last_token==TOKEN_ERROR )
1023  {
1024  //printf("ERROR RESULT=");dumpln(result);
1025  DEUSE( result );
1026  result = new_parse_error( ERROR_INCOMPLETE_PAIR, errpos, input );
1027  }
1028 
1029 
1030  DEALLOC( buffer );
1031  //printf("#########");
1032  //dump_atom(result,1);
1033  //printf("######\n");
1034 
1035  return result;
1036 }
1037 
1038 
1039 
1040 
1041 //===================================================
1054 //===================================================
1056 {
1057  #ifdef SAFEMODE
1058  assert( IS_WORD(word) || IS_SUBWORD(word) );
1059  #endif
1060 
1061  chars_t source = STRING(word);
1062  int_t len = LENGTH(word);
1063 
1064  // if there are no enough characters just exit
1065  if( LENGTH(word)<2 ) return USE(word);
1066 
1067  // if the first two characters are not #! then exit
1068  if( *source!=TEXT('#') || *(source+1)!=TEXT('!') ) return USE(word);
1069 
1070  // skip the line
1071  while( len && *source!=TEXT('\n') )
1072  {
1073  source++;
1074  len--;
1075  }
1076 
1077  // return a subword. Pay attention to always reference
1078  // the main host word because the input could be a word
1079  // or a subword.
1080  if( IS_WORD(word) )
1081  return new_subword( word, source, len );
1082  else
1083  return new_subword( WORD(word), source, len );
1084 }
1085 
1086 
1087 
1088 
1089 //===================================================
1098 //===================================================
1100 {
1101  //return USE(word);
1102  #ifdef SAFEMODE
1103  assert( IS_WORD(word) || IS_SUBWORD(word) );
1104  #endif
1105 
1106  chars_t source = STRING(word);
1107  int_t len = LENGTH(word);
1108 
1109  chars_t buffer = alloca( CHAR_SIZE*len ); // buffer for the longest word
1110  chars_t bp = buffer;
1111 
1112  int need_enbar = 0;
1113  int is_mutated = 0; // set to 1 if the word is mutated
1114  int in_backslash = 0;
1115  int in_bars = 0;
1116  for( ; len; len--,source++ )
1117  {
1118  need_enbar = in_bars || in_backslash;
1119  if( in_backslash )
1120  {
1121  in_backslash = 0;
1122  }
1123  else if( *source==TEXT('\\') )
1124  {
1125  is_mutated = 1;
1126  in_backslash = 1;
1127  continue;
1128  }
1129  else if( *source==TEXT('|') )
1130  {
1131  is_mutated = 1;
1132  in_bars = !in_bars;
1133  continue;
1134  }
1135  if( need_enbar )
1136  *bp++ = ENBAR(*source);
1137  else
1138  *bp++ = *source;
1139 
1140  //if( need_enbar )
1141  //printf(" PURIFY %d %d\n",*source,ENBAR(*source));
1142  //else
1143  //printf(" PURIFY %d \n",*source );
1144  }
1145 
1146  *bp = NULL_CHAR;
1147 
1148  if( is_mutated )
1149  return new_word( buffer, bp-buffer );
1150  else
1151  return USE(word);
1152 }
1153 
1154 
1155 
1156 
1157 //===================================================
1166 //===================================================
1168 {
1169  atom_t result = empty_list;
1170 
1171  atom_t _paren( int level, atom_t openparen, int* last_flags )
1172  {
1173 #ifdef DEBUG_PARENTHESES
1174  printf("<PAREN> ENTER LEVEL %d INPUT=", level);
1175  dumpln(input);
1176 #endif //DEBUG_PARENTHESES
1177 
1178  *last_flags = 0;
1179 
1180  atom_t result = empty_list;
1181  atom_t last = empty_list;
1182  while( IS_NOT_EMPTY(input) )
1183  {
1184  // get current element
1185  atom_t elem = USE(CAR(input));
1186  int flags = FLAGS(input);
1187 
1188 #ifdef DEBUG_PARENTHESES
1189  printf("<PAREN> READ ELEMENT ");
1190  dumpln(elem);
1191 #endif //DEBUG_PARENTHESES
1192 
1193  // if it is ( then call _parse() recursively
1194  // if it is ) then exit current _parse()
1195  if( IS_WORD(elem)||IS_SUBWORD(elem) )
1196  if( LENGTH(elem)==1 )
1197  {
1198  if( *STRING(elem)==TEXT('(') ) // process (
1199  {
1200  int f;
1201  DEUSE( elem ); //compensate USE() when elem was retrieved
1202  input = CDR(input);
1203  elem = _paren( level+1, elem, &f );
1204  flags |= f;
1205  if( IS_ERROR(elem) )
1206  {
1207  DEUSE( result );
1208  return elem;
1209  }
1210  }
1211  else if( *STRING(elem)==TEXT(')') )// process )
1212  {
1213  DEUSE( elem ); //compensate USE() when elem was retrieved
1214  *last_flags = FLAGS(input);
1215  break;
1216  }
1217  }
1218 
1219 #ifdef DEBUG_PARENTESES
1220  printf("<PAREN> APPEND ");
1221  dumpln(elem);
1222 #endif //DEBUG_PARENTHESES
1223 
1224  // append current element
1225  if( IS_EXTENDED(input) )
1226  {
1227  append_ex( elem, &result, &last );
1228  DEUSE( POS(last) );
1229  POS( last ) = USE( POS( input ) );
1230  }
1231  else
1232  append( elem, &result, &last );
1233  FLAGS(last) |= flags;
1234 
1235  // empty list
1236  if( IS_EMPTY(input) ) break;
1237 
1238  // move to next element
1239  input = CDR(input);
1240  }
1241 
1242  if( level && IS_EMPTY(input) )
1243  {
1244  DEUSE( result );
1245  return new_error( ERROR_INCOMPLETE_PAIR, openparen );
1246  }
1247  if( !level && !IS_EMPTY(input) )
1248  {
1249  DEUSE( result );
1250  return new_error( ERROR_INCOMPLETE_PAIR, CAR(input) );
1251  }
1252  if( level && IS_EMPTY(result) )
1253  {
1254  return new_error( ERROR_EMPTY_EXPRESSION, openparen );
1255  }
1256 
1257 #ifdef DEBUG_PARENTHESES
1258  printf("<PAREN> EXIT LEVEL %d RESULT=", level);
1259  dumpln(result);
1260 #endif //DEBUG_PARENTHESES
1261  if( level ) SET_FLAGS( result, FLAG_EXPRESSION );
1262  return result;
1263  } // _paren()
1264 
1265 
1266  int f;
1267  result = _paren( 0, empty_list, &f );
1268 
1269  return result;
1270 }
1271 
1272 
1273 
1274 
1275 //===================================================
1309 //===================================================
1311 {
1312  #ifdef DEBUG_TO_END
1313  printf("<TO-END> DEFINING=");
1314  dumpln( input);
1315  #endif
1316 
1317  //atom_t last;
1318  atom_t a = input;
1319  atom_t name;
1320  atom_t lhi; // last_header_item
1321 
1322  // create the function
1323  atom_t function = new_var( word_to, parent, 1 ); //DEUSE(to);
1324  need_descr2( function );
1325  SET_FLAGS( function, FLAG_FUNCTION );
1326  LARGS( function ) = 0;
1327  RARGS( function ) = 0;
1328 
1329  // collecting left parameters
1330  while( IS_NOT_EMPTY(a) &&
1331  !GET_FLAGS(a,FLAG_NEWLINE) &&
1332  IS_ANY_WORD(CAR(a)) &&
1333  (*STRING(CAR(a))==TEXT(':')) )
1334  {
1335  lhi = CAR(a);
1336  atom_t var = new_local_var( CAR(a), function, 1 );
1337  if( IS_ERROR(var) )
1338  {
1339  DEUSE( input );
1340  //DEUSE( function );
1341  LOCALS(parent) = behead( LOCALS(parent) ); // this DEUSEs function
1342  return var;
1343  }
1344 
1345  SET_FLAGS( var, FLAG_VARIABLE );
1346  LARGS( function )++;
1347 #ifdef SAFE_MODE
1348  assert( LARGS(function)<255 );
1349 #endif
1350  a = CDR(a);
1351  }
1352 
1353  // collecting name
1354  if( IS_NOT_EMPTY(a) &&
1355  !GET_FLAGS(a,FLAG_NEWLINE) &&
1356  IS_ANY_WORD(CAR(a)) &&
1357  (*STRING(CAR(a))!=TEXT('"')) )
1358  {
1359  lhi = CAR(a);
1360  name = CAR(a);
1361  a = CDR(a);
1362  }
1363  else
1364  {
1365  DEUSE( input );
1366  return new_error( ERROR_MISSING_NAME, a );
1367  }
1368 
1369  //printf("<TO-END> DEFINING="); dumpln( name );
1370 
1371  // collecting right parameters
1372  while( IS_NOT_EMPTY(a) &&
1373  !GET_FLAGS(a,FLAG_NEWLINE) &&
1374  IS_ANY_WORD(CAR(a)) &&
1375  (*STRING(CAR(a))==TEXT(':')) )
1376  {
1377  lhi = CAR(a);
1378  atom_t var = new_local_var( CAR(a), function, 1 );
1379  if( IS_ERROR(var) )
1380  {
1381  DEUSE( input );
1382  //DEUSE( function );
1383  LOCALS(parent) = behead( LOCALS(parent) ); // this DEUSEs function
1384  return var;
1385  }
1386  SET_FLAGS( var, FLAG_VARIABLE );
1387  RARGS( function )++;
1388 #ifdef SAFE_MODE
1389  assert( RARGS(function)<255 );
1390 #endif
1391  a = CDR(a);
1392  }
1393 
1394  // collect "..."
1395  if( IS_NOT_EMPTY(a) &&
1396  !GET_FLAGS(a,FLAG_NEWLINE) &&
1397  IS_ANY_WORD(CAR(a)) &&
1398  (LENGTH(CAR(a))==3) &&
1399  (*(STRING(CAR(a))+0)==TEXT('.')) &&
1400  (*(STRING(CAR(a))+1)==TEXT('.')) &&
1401  (*(STRING(CAR(a))+2)==TEXT('.')) )
1402  {
1403  SET_FLAGS( function, FLAG_INFINITE_ARGS );
1404  a = CDR(a);
1405  }
1406 
1407 
1408  // set offset of parameters
1409  int offset = BASE_OFFSET_PARAMS; // this is the start offset
1410  atom_t x;
1411  for( x = LOCALS(function); IS_NOT_EMPTY(x); x=CDR(x) )
1412  {
1413  //printf("set offset of "); dump(NAME(CAR(x))); printf(" to be %d\n",offset);
1414  OFFSET(CAR(x)) = offset;
1415  offset += sizeof( atom_t );
1416  }
1417 
1418 
1419  // check whether the function is already defined
1420  atom_t var = find_local_var( name, parent );
1421  if( var )
1422  {
1423  if( LARGS(var)!=LARGS(function) || RARGS(var)!=RARGS(function) )
1424  {
1425  LOCALS(parent) = behead( LOCALS(parent) ); // this DEUSEs function
1427  DEUSE( input );
1428  return result;
1429  }
1430 
1431  // forget data of the old function and
1432  // reuse data of the new function
1433  DEUSE( FULLSOURCE(var) );
1434  DEUSE( SOURCE(var) );
1435  DEUSE( LOCALS(var) );
1436  DEUSE( BODY(var) );
1437  DEUSE( TREE(var) );
1438  DEUSE( BINARY(var) );
1439 
1440  TREE(var) = empty_list;
1441  BINARY(var) = empty_list;
1442 
1443  BODY(var) = USE(a); //USE(BODY(function));
1444  LOCALS(var) = USE(LOCALS(function));
1445 
1446  LOCALS(parent) = behead(LOCALS(parent));
1447  //DEUSE(function);
1448  function = var;
1449  }
1450  else
1451  {
1452  DEUSE( NAME(function) );
1453  NAME( function ) = USE(name);
1454  BODY( function ) = USE(a);
1455  }
1456 
1457  //LEVEL( function ) = level;
1458  ADDRESS( function ) = 0;
1459  PRIORITY( function ) = PRIORITY_FUN;
1460 
1461  // cut header and 'END' from source
1462  chars_t new_src = STRING(lhi)+LENGTH(lhi);
1463  int new_len = LENGTH(source) - (new_src-STRING(source)) - LENGTH(word_end);
1464  SOURCE( function ) = new_subword( source, new_src, new_len );
1465  FULLSOURCE( function ) = USE( source );
1466 
1467  //DEUSE( input );
1468  #ifdef DEBUG_TO_END
1469  printf("<TO-END> DEFINED FUNCTION "); dumpln(NAME(function));
1470  printf("<TO-END> SOURCE "); dumpln(SOURCE(function));
1471  printf("<TO-END> BODY "); dumpln(BODY(function));
1472  printf("<TO-END> TREE "); dumpln(TREE(function));
1473  #endif
1474 
1475  //printf("@@@@@@DEFINE "); dump_atom(NAME(function),1);
1476  //printf(" IN PARENT "); dump_atom(NAME(PARENT(function)),1);
1477  //printf("@@@@@@\n");
1478 
1479 
1480  return function;
1481 }
1482 
1483 
1484 
1485 
1486 //===================================================
1498 //===================================================
1499 atom_t preparse( atom_t input, atom_t parent, int level )
1500 {
1501  #ifdef DEBUG_TO_END
1502  printf("<TO-END> ENTER1=");
1503  dumpln( input);
1504  #endif
1505 
1506  atom_t a = input;
1507  atom_t before_a = NULL;
1508  atom_t before_to = NULL;
1509  atom_t first_to = NULL;
1510  int to_end_depth = 0;
1511  while( IS_NOT_EMPTY(a) )
1512  {
1513  if( IS_ANY_WORD(CAR(a)) )
1514  {
1515  // found TO or its synonym
1516  if( same_words(CAR(a),word_to) || same_words(CAR(a),word_to_syn) )
1517  {
1518  if( to_end_depth==0 )
1519  {
1520  before_to = before_a;
1521  first_to = a;
1522  }
1523  to_end_depth++;
1524  }
1525 
1526  // found END
1527  if( same_words( CAR(a), word_end ))
1528  {
1529  // END without to ?
1530  if( to_end_depth==0 )
1531  {
1532  atom_t result = new_error( ERROR_INCOMPLETE_PAIR, CAR(a) );
1533  DEUSE( input );
1534  return result;
1535  }
1536 
1537  to_end_depth--;
1538  if( to_end_depth==0 )
1539  {
1540  // found END corresponding to TO.
1541  // extract the whole TO..END
1542  if( first_to==before_a )
1543  {
1544  DEUSE( input );
1545  return new_error( ERROR_EMPTY_TO_END, first_to );
1546  }
1547 
1548  atom_t word = new_subword(WORD(CAR(first_to)),STRING(CAR(first_to)),(STRING(CAR(a))-STRING(CAR(first_to))+LENGTH(word_end)));
1549  //atom_t word = empty_list;
1550 
1551  first_to = behead(first_to);
1552  //first_to = CDR(first_to);
1553  CDR(before_a) = empty_list;
1554  before_a = before_to;
1555  a = behead(a);
1556  if( before_to )
1557  CDR(before_a) = a; // TO is not first item
1558  else
1559  input = a; // TO is first item
1560 
1561  #ifdef DEBUG_TO_END
1562  printf("<TO-END> EXTRACT:");
1563  dumpln( first_to );
1564  printf("<TO-END> CURRENT INPUT:");
1565  dumpln( input );
1566  printf("<TO-END> DEFINE IN PARENT:");
1567  dumpln( parent );
1568  #endif
1569 
1570  // create new command/function
1571  atom_t var = define_user_function( word, (first_to), parent );
1572  DEUSE( word );
1573  if( IS_ERROR(var) )
1574  {
1575  DEUSE( input );
1576  return var;
1577  }
1578  DEUSE( first_to );
1579 
1580  // process recursively the function
1581  // for nested to-end definitions
1582  BODY(var) = preparse( BODY(var), var, level+1 );
1583 
1584  // next item is set explicitely,
1585  SET_FLAGS( a, FLAG_NEWLINE );
1586  first_to = NULL;
1587  before_to = NULL;
1588  continue;
1589  }
1590  }
1591  }
1592 
1593  // next item
1594  before_a = a;
1595  a = CDR(a);
1596  }
1597 
1598  // TO without END ?
1599  if( first_to )
1600  {
1601  atom_t result = new_error( ERROR_INCOMPLETE_PAIR, CAR(first_to) );
1602  DEUSE(input);
1603  return result;
1604  }
1605 
1606  #ifdef DEBUG_TO_END
1607  printf("<TO-END> LEFTOVERS=");
1608  dumpln( input);
1609  printf("\n");
1610  #endif
1611 
1612  return input;
1613 }
1614 
1615 
1616 
1617 
1618 //===================================================
1630 //===================================================
1631 #define MAX_ELEMS 128
1632 atom_t parse( atom_t input, atom_t parent, int top_level )
1633 {
1635  atom_t data[MAX_ELEMS];
1636  atom_t poses[MAX_ELEMS]; // source position (if available)
1637  int pris[MAX_ELEMS];
1638  int usedby[MAX_ELEMS];
1639  int largs[MAX_ELEMS]; // number of unlinked left arguments
1640  int rargs[MAX_ELEMS]; // number of unlinked right arguments
1641  int count; // count of elements in above arrays
1642 
1643  atom_t aux = empty_list; // additional lists to delete
1644 
1645  //printf("parse==");dumpln(input);
1646 
1647  #ifdef DEBUG_PARSE
1648  void dump_arrays( )
1649  {
1650  int i;
1651  for( i=0; i<count; i++ ) if( pris[i]>=PRIORITY_MIN )
1652  {
1653  printf("<PARSE> %2d.[used by %2d; free=%d:%d] v%c0 pr=%d =",
1654  i, usedby[i], largs[i], rargs[i], vars[i]?'#':'=', pris[i]);
1655  dump_atom(data[i],1);
1656  if( !IS_UNBOUND(poses[i]) )
1657  {
1658  printf(" @ ");
1659  dump_atom(poses[i],1);
1660  }
1661  printf("\n");
1662  }
1663  printf("\n");
1664  }
1665  #endif
1666 
1667 
1668  // Finds the first line of input. Return a pointer
1669  // to the first node after the line. Sets arrays
1670  // vars[], data[] and prios[].
1671  atom_t get_line( atom_t input )
1672  {
1673  int has_no_infinite = 1;
1674  count = 0;
1675  while( IS_NOT_EMPTY(input) )
1676  {
1677  atom_t elem = USE(CAR(input));
1678  //printf("testing element ");
1679  //dumpln(elem);
1680 
1681  #ifdef SAFEMODE
1682  assert( count<MAX_ELEMS );
1683  #endif
1684  data[count] = elem;
1685  vars[count] = IS_ANY_WORD(elem) ? find_var(elem,parent) : NULL;
1686  if( VAR && !IS_COMMAND(VAR) && !IS_FUNCTION(VAR) )
1687  {
1688  // If we have variable called 4 (e.g. MAKE 4 "BOZA)
1689  // then VAR!=NULL, but this is allowed only for
1690  // functions and commands, so set VAR=NULL!
1691  VAR=NULL;
1692  }
1693  pris[count] = (VAR ? PRIORITY(VAR) : PRIORITY_MAX);
1694  usedby[count] = -1;
1695  largs[count] = VAR ? LARGS(VAR) : 0;
1696  rargs[count] = VAR ? RARGS(VAR) : 0;
1697  poses[count] = IS_EXTENDED(input)?POS(input):unbound;
1698 
1699  if( VAR && IS_VARIABLE(VAR) )
1700  {
1701  printf("found variable as function: ");
1702  dump_atom(NAME(VAR),1);
1703  printf("\n");
1704  }
1705 
1706  // if element is a word not starting with ":" and
1707  // cannot be transfered into a number and does not
1708  // exist as a variable, then this is unknown function
1709  float64_t x;
1710  if( IS_ANY_WORD(elem) &&
1711  !VAR &&
1712  *STRING(elem)!=TEXT(':') &&
1713  *STRING(elem)!=TEXT('"') &&
1714  !atom_to_float(elem,&x) )
1715  {
1716  //DEUSE( input );
1717  DEUSE( elem );
1718  return new_error( ERROR_DO_NOT_KNOW, elem );
1719  }
1720 
1721  if( !top_level && VAR && has_no_infinite )
1722  {
1724  {
1725  rargs[count]=-1;
1726  has_no_infinite = 0;
1727  }
1728  }
1729 
1730  // test for unary
1731  if( VAR && GET_FLAGS(VAR,FLAG_CAN_BE_UNARY) && largs[count]==1 )
1732  {
1733  // case 1: <spc> op <spc> -> binary
1734  // case 2: <spc> op -> unary
1735  // case 3: op <spc> -> binary
1736  // case 4: op -> binary
1738  == FLAG_AFTER_SPACE )
1739  largs[count]=0;
1740  }
1741 
1742  count++;
1743  //next:
1744  input = CDR(input);
1745 
1746  // if the next item is on a new line AND if it is
1747  // not constant-list AND we are not in a sublist
1748  // then break the line here
1749  if( top_level && IS_NOT_EMPTY(input) && GET_FLAGS(input,FLAG_NEWLINE) &&
1750  (!IS_LIST(CAR(input)) || IS_EXPRESSION(CAR(input))) )
1751  break;
1752  } // line while
1753 
1754  return input;
1755  } //get_line()
1756 
1757  #ifdef DEBUG_PARSE
1758  printf("<PARSE> ENTER=");
1759  dumpln( input);
1760  #endif
1761 
1762 
1763  //-----------------------------------------
1764  // STEP 1: Convert (..) into subexpressions
1765  //-----------------------------------------
1766  if( IS_EMPTY(input) ) return input;
1767  if( IS_ERROR(input) ) return input;
1768  atom_t orig_input = input = parentheses( input );
1769  if( IS_ERROR(input) ) return input;
1770 
1771  #ifdef DEBUG_PARSE
1772  printf("<PARSE> PARENTHESISEZ=");
1773  dumpln( input);
1774  #endif
1775 
1776  // if there is no parent consider the root as parent
1777  if( !parent ) parent = root;
1778 
1779 
1780  //-----------------------------------------
1781  // STEP 2-7: Convert list into lisp line-by-line
1782  //-----------------------------------------
1783  atom_t result = empty_list;
1784  atom_t last = empty_list;
1785 
1786  atom_t err = NULL;
1787 
1788  int i,j,finished;
1789  int* args;
1790 
1791 #define LINK \
1792  { \
1793  usedby[j] = i; \
1794  args[i]--; \
1795  }
1796  //printf("%d is used by %d\n",j,i);
1797 
1798 #define RELINK \
1799  { \
1800  if( j<usedby[j] ) { largs[usedby[j]]++; finished=0; } \
1801  if( j>usedby[j] ) { rargs[usedby[j]]++; finished=0; } \
1802  LINK; \
1803  }
1804 
1805  while( IS_NOT_EMPTY(input) ) // main while
1806  {
1807  next_line:
1808  #ifdef DEBUG_PARSE
1809  printf("\n\n");
1810  printf("<PARSE>------------------\n");
1811  printf("<PARSE> START A NEW LINE \n");
1812  printf("<PARSE>------------------\n");
1813  #endif
1814 
1815  //-----------------------------------
1816  // STEP 2: Get a line from the input
1817  //-----------------------------------
1818  input = get_line( input );
1819  if( IS_ERROR(input) )
1820  {
1821  err = input;
1822  break;
1823  }
1824  #ifdef DEBUG_PARSE
1825  printf("<PARSE> ORIGINAL PARSE_LIST:\n");
1826  dump_arrays( );
1827  #endif
1828  //printf("<PARSE> LEFTOVER:"); dumpln(input);
1829  //printf("------------------------------------\n");
1830 
1831 
1832  //-----------------------------------
1833  // STEP 3: Analyze and set dependencies between items
1834  //-----------------------------------
1835  finished = 0;
1836  while( !finished )
1837  {
1838  finished = 1;
1839  for( i=0; i<count; i++ )
1840  {
1841 
1842  // now process right parameters
1843  args = rargs;
1844  for( j=i+1; args[i] && j<count; j++ )
1845  {
1846  if( vars[j] && !IS_FUNCTION(vars[j]) )
1847  break;
1848  if( usedby[j]==-1 )
1849  LINK
1850  else
1851  if( (usedby[j]>j || usedby[j]<i) &&
1852  (pris[usedby[j]]<=pris[i]) )
1853  RELINK;
1854  } // for largs
1855 
1856  // now process left parameters
1857  args = largs;
1858  for( j=i-1; args[i] && j>=0; j-- )
1859  {
1860  //if(vars[j])
1861  //{
1862  //printf("j=%d cond=%d var=",j,!IS_FUNCTION(vars[j]));
1863  //dumpln(vars[j]);
1864  //}
1865  if( vars[j] && !IS_FUNCTION(vars[j]) )
1866  break;
1867  if( usedby[i]==j ) break;
1868  if( usedby[j]==-1 )
1869  {
1870  /*if( usedby[i]!=j )*/ LINK; // skip direct circular references
1871  }
1872  else
1873  if( (usedby[j]<j || usedby[j]>i) &&
1874  (pris[usedby[j]]<pris[i]) )
1875  {
1876  /*if( usedby[i]!=j )*/ RELINK; // skip direct circular reference
1877  }
1878  } // for largs
1879  } // for each element
1880  } // while not finished
1881 
1882  #ifdef DEBUG_PARSE
1883  printf("<PARSE> FINAL PARSE_LIST:\n");
1884  dump_arrays( );
1885  #endif
1886 
1887 
1888  //-----------------------------------
1889  // STEP 4: Check for extra or missing arguments
1890  //-----------------------------------
1891  #ifdef DEBUG_PARSE
1892  printf("<PARSE> START STEP 4:\n");
1893  dump_arrays( );
1894  #endif
1895 
1896  args = rargs;
1897  int top_level_pos = -1;
1898  for( j=0; j<count; j++ )
1899  {
1900  // check whether there are two or more free items
1901  // only one is allowed for non-top levels
1902  if( usedby[j]==-1 )
1903  {
1904  if( !top_level && top_level_pos>=0 )
1905  {
1906  // we are ready to announce that there are extra values
1907  // but before doing this check whether there is function
1908  // that would be so polite to take any of them
1909  //printf(">>> top_level_pos=%d\n",top_level_pos);
1910  if( rargs[top_level_pos]==0 &&
1911  vars[top_level_pos] &&
1912  GET_FLAGS(vars[top_level_pos],FLAG_MAY_HAVE_EXTRA_ARG) )
1913  {
1914  i = top_level_pos;
1915  //printf(">>> attach %d to %d\n",j,i);
1916  //printf(">>> rargs
1917  LINK;
1918  //printf(">>>>>>>"); dumpln(NAME(vars[top_level_pos]));
1919  }
1920  else
1921  {
1922  err = new_error( ERROR_CROWDED_EXPRESSION, data[j] );
1923  break;
1924  }
1925  }
1926  if( top_level_pos<0 ) top_level_pos = j;
1927  }
1928 
1929  // left paramers can be skipped only for unary functions
1930  if( largs[j] && !GET_FLAGS(vars[j],FLAG_CAN_BE_UNARY) )
1931  {
1932  err = new_error( ERROR_MISSING_LEFTS, data[j] );
1933  break;
1934  }
1935 
1936  // some functions may ignore 1 missing right parameter
1937  if( rargs[j]==1 && GET_FLAGS(vars[j],FLAG_MAY_SKIP_LAST_ARG) )
1938  continue;
1939 
1940  // when arguments can be infinite, this also mean any number
1941  if( rargs[j] && !GET_FLAGS(vars[j],FLAG_INFINITE_ARGS) )
1942  {
1943  //printf("rargs=%d\n",rargs[j]);
1944  err = new_error( ERROR_MISSING_RIGHTS, data[j] );
1945  break;
1946  }
1947  }
1948 
1949  if( err ) break;
1950 
1951 
1952  //-----------------------------------
1953  // STEP 5: Recursivelly process nested expressions
1954  // and arguments which must be lisp'ed too
1955  //-----------------------------------
1956  #ifdef DEBUG_PARSE
1957  printf("<PARSE> START STEP 5:\n");
1958  dump_arrays( );
1959  #endif
1960  for( i=0; i<count; i++ ) if( IS_LIST(data[i]) )
1961  {
1962  if( GET_FLAGS(data[i],FLAG_EXPRESSION) )
1963  { // nested expression
1964  atom_t expr = parse( data[i], parent, 0 );
1965 
1966  if( IS_ERROR(expr) )
1967  {
1968  err = expr;
1969  }
1970  else
1971  {
1972  DEUSE( data[i] );
1973  data[i] = expr;
1974  SET_FLAGS( data[i], FLAG_EXPRESSION );
1975  }
1976  }
1977  else
1978  { // possibly argument to be reparsed
1979  j=usedby[i];
1980  if( (j>=0) && vars[j] && GET_FLAGS(vars[j],FLAG_PROCESS_ARGS) )
1981  {
1982  atom_t expr = build_syntax_subtree( parent, data[i] );
1983  if( IS_ERROR(expr) )
1984  {
1985  err = expr;
1986  }
1987  else
1988  {
1989  DEUSE( data[i] );
1990  data[i] = expr;
1992  }
1993  }
1994  }
1995  }
1996  if( err ) break;
1997 
1998 
1999  //-----------------------------------
2000  // STEP 6: Group items of subexpressions, process numbers
2001  //-----------------------------------
2002  #ifdef DEBUG_PARSE
2003  printf("<PARSE> START STEP 6:\n");
2004  dump_arrays( );
2005  #endif
2006  for( i=0; i<count; i++ )
2007  {
2008  /*
2009  // convert all numeric data into numbers
2010  float64_t n;
2011  if( IS_ANY_WORD(data[i]) && atom_to_float(data[i],&n) )
2012  {
2013  DEUSE( data[i] );
2014  data[i] = new_float( n );
2015  }
2016 
2017  // create a list node for each element
2018  // whatever the element is, this list node will be used
2019  data[i] = new_list( data[i], empty_list );
2020  SET_FLAGS( data[i], FLAG_EXPRESSION );
2021  */
2022 
2023  // create a list node for each element
2024  // whatever the element is, this list node will be used
2025  float64_t n;
2026  if( IS_ANY_WORD(data[i]) && atom_to_float(data[i],&n) )
2027  {
2028  atom_t pos = data[i]; // old source of the number
2029  data[i] = new_list_ex( new_float(n), empty_list );
2030  POS( data[i] ) = pos;
2031  }
2032  else
2033  {
2034  data[i] = new_list( data[i], empty_list );
2035  }
2036  SET_FLAGS( data[i], FLAG_EXPRESSION );
2037  }
2038 
2039  for( i=0; i<count; i++ )
2040  {
2041  if( usedby[i]>=0 )
2042  {
2043  atom_t a = data[usedby[i]];
2044  atom_t name = CAR(data[i]);
2045  atom_t function = vars[usedby[i]];
2046  //printf("name(%d)=",i);dumpln(name);
2047  //printf("function(%d)=",usedby[i]);dumpln(function);
2048  //printf("a(%d)=",usedby[i]);dumpln(a);
2049 
2050  // if the item is word constant and is used by
2051  // a function which creates variables, then do
2052  // create the variables now.
2053  if( GET_FLAGS(function,FLAG_SET_ALL_VARS)
2054  && IS_ANY_WORD(name)
2055  && LENGTH(name)>1
2056  && *STRING(name)==TEXT('"') )
2057  {
2058  //printf("create local "); dumpln(name);
2059  //printf(" in "); dumpln(NAME(parent));
2060  atom_t var = new_local_var( name, parent, 1 );
2061  if( IS_ERROR(var) ) err = var;
2062  SET_FLAGS( var, FLAG_VARIABLE );
2063  }
2064 
2065  // if the item is word constant and is used by
2066  // a function which creates a single variable
2067  // (like MAKE), then do create the variable as
2068  // global if it does not exist
2069  // 2010.06.26: create the variable only if the
2070  // parsing has not been activated at run-time.
2071  atom_t real_name = NULL;
2073  && GET_FLAGS(function,FLAG_SET_ONE_VAR)
2074  && (i==usedby[i]+1)
2075  && (IS_INTEGER(name) || IS_FLOAT(name)) )
2076  {
2077  real_name = atom_to_word( name );
2078  goto use_numeric_name;
2079  }
2080 
2081  if(
2082  GET_FLAGS(function,FLAG_SET_ONE_VAR)
2083  && (i==usedby[i]+1)
2084  && IS_ANY_WORD(name)
2085  && LENGTH(name)>1
2086  && *STRING(name)==TEXT('"') )
2087  {
2088  real_name = new_subword( name, STRING(name)+1, LENGTH(name)-1 );
2089  use_numeric_name:
2090  if( !find_var( real_name, parent ) )
2091  {
2092  atom_t var = new_local_var( real_name, parent/*root*/, 0 ); //printf("!2087!");
2093  if( IS_ERROR(var) ) err = var;
2094  SET_FLAGS( var, FLAG_VARIABLE );
2095  }
2096  DEUSE( real_name );
2097 
2098  // patch for FOR command. If the control
2099  // variable is ABC then create ABC and
2100  // ^ABC
2101  if( ADDRESS(function)==(int)rt_for )
2102  {
2103  assert( IS_ANY_WORD(name) );
2104  atom_t real_name = new_word( STRING(name), LENGTH(name) );
2105  *STRING(real_name) = L'^';
2106  if( !find_var( real_name, parent ) )
2107  {
2108  atom_t var = new_local_var( real_name, parent/*root*/, 0 ); //printf("!2103!");
2109  if( IS_ERROR(var) ) err = var;
2110  SET_FLAGS( var, FLAG_VARIABLE );
2111  }
2112  DEUSE( real_name );
2113  }
2114 
2115  }
2116 
2117  // if the item is word constant and is used by
2118  // a function which load libraries, then do
2119  // load the library now.
2120  if( ADDRESS(function)==(int)rt_load
2121  && IS_ANY_WORD(name)
2122  && LENGTH(name)>1
2123  && *STRING(name)==TEXT('"') )
2124  {
2125  atom_t real_name = new_word( STRING(name)+1, LENGTH(name)-1 );
2126  atom_t word = read_word( STRING(real_name) );
2127  DEUSE( real_name );
2128 
2129  if( IS_ERROR(word) ) { err = USE(word); break; }
2130  atom_t trimmed = trim_shell_comment( word );
2131  DEUSE( word );
2132 
2133  atom_t tokens1 = tokenize( trimmed, TOKENIZE_DATA );
2134  DEUSE( trimmed );
2135  if( IS_ERROR(tokens1) )
2136  {
2137  err = tokens1;
2138  break;
2139  }
2140 
2141  atom_t tokens2 = tokenize( tokens1, TOKENIZE_COMMANDS );
2142  DEUSE( tokens1 );
2143  if( IS_ERROR(tokens2) ) return tokens2;
2144 
2145  tokens2 = preparse( tokens2, root, LEVEL(root) );
2146  atom_t body = parentheses( tokens2 );
2147  DEUSE( tokens2 );
2148 
2149  //printf("BODY=");dumpln(body);
2150  //printf("input=");dumpln(input);
2151 
2152  if( IS_NOT_EMPTY(body) )
2153  {
2154  atom_t a;
2155  for( a = body; IS_NOT_EMPTY(CDR(a)); a=CDR(a) );
2156  CDR(a) = USE(input);
2157  input = body;
2158  }
2159 
2160  //printf("old aux="); dumpln(aux);
2161  aux = new_list( body, aux );
2162  //printf("new aux="); dumpln(aux);
2163  for( i=0; i<count; i++ ) DEUSE( data[i] );
2164  goto next_line;
2165  }
2166 
2167  SET_FLAGS( a, FLAG_EXPRESSION );
2168  while( IS_NOT_EMPTY(CDR(a)) ) a=CDR(a);
2169  CDR(a) = vars[i] ? new_list(data[i],empty_list) : data[i];
2170  }
2171  }
2172  if( err ) break;
2173 
2174 
2175  //-----------------------------------
2176  // STEP 7: Group top-most items into result list
2177  //-----------------------------------
2178  #ifdef DEBUG_PARSE
2179  printf("<PARSE> START STEP 7:\n");
2180  dump_arrays( );
2181  #endif
2182  //printf("data[0]=");dumpln(data[0]);
2183  //printf("vars[0]=");dumpln(vars[0]);
2184  for( i=0; i<count; i++ )
2185  if( usedby[i]==-1 )
2186  {
2187  //printf("i=%d var=%x is_expr=%d\n",i,vars[i],IS_EXPRESSION(CAR(data[i])));
2188  //boza if( !vars[i] && !IS_EXPRESSION(CAR(data[i])))
2189  //boza {
2190  //printf("i=%d\n",i);
2191  //boza err = new_error_atom( ERROR_UNUSED_VALUE, data[i] );
2192  //boza break;
2193  //boza }
2194  if( IS_UNBOUND(poses[i]) )
2195  {
2196  append( data[i], &result, &last );
2197  }
2198  else
2199  {
2200  append_ex( data[i], &result, &last );
2201  DEUSE( POS(last) );
2202  POS( last ) = USE( poses[i] );
2203  }
2204  }
2205 
2206  if( err ) break;
2207  } // main line
2208 
2209 
2210  //-----------------------------------------
2211  // STEP pre-8: Test for error and release unused resources
2212  //-----------------------------------------
2213  DEUSE( orig_input );
2214  DEUSE( aux );
2215  if( err )
2216  {
2217  //printf("ERRRRRRROR\n");
2218  for( i=0; i<count; i++ ) DEUSE( data[i] );
2219  DEUSE( result );
2220  return err;
2221  }
2222 
2223  //-----------------------------------
2224  // STEP 8: Remove unnecessary parentheses
2225  //-----------------------------------
2226  //printf("before=");dumpln(result);
2227  //
2228  // step 8 removed, because in some cases it removes more
2229  // parentheses than needed - e.g. when function body has
2230  // only one command
2231  // while( IS_EMPTY(CDR(result)) &&
2232  // IS_LIST(CAR(result)) &&
2233  // GET_FLAGS(CAR(result),FLAG_EXPRESSION) )
2234  // {
2235  // atom_t a = CAR(result);
2236  // CAR(result) = empty_list;
2237  // DEUSE( result );
2238  // result = a;
2239  // }
2240  //
2241  //printf(" after=");dumpln(result);
2242 
2243  if( !top_level ) SET_FLAGS( result, FLAG_EXPRESSION );
2244 
2245  #ifdef DEBUG_PARSE
2246  printf("<PARSE> EXIT=");
2247  dumpln(result);
2248  #endif
2249 
2250  if( OPTION_DUMP_AST )
2251  {
2252  outter( TEXT("Abstract Syntax Tree:\n\0"), UNKNOWN );
2253  dumpln( result );
2254  outter( TEXT("\n\0"), UNKNOWN );
2255  }
2256 
2257  return result;
2258 }
2259 
2260 
2261 
2262 
2263 //===================================================
2277 //===================================================
2279 {
2280  // exit is function is already treefied
2281  if( IS_NOT_EMPTY(TREE(func)) ) return empty_list;
2282 
2283  //printf("BUILD_SYNTAX_TREE(FUNC=");
2284  //dump(NAME(func));
2285  //printf(",SOURCE=");
2286  //dump(SOURCE(func));
2287  //printf(",BODY=");
2288  //dump(BODY(func));
2289  //printf(",LOCALS=");
2290  //dump(LOCALS(func));
2291  //printf(")\n\n");
2292 
2293  if( IS_EMPTY(BODY(func)) )
2294  {
2295  // Step 1. Tokenize
2296  //printf("SOURCE="); dumpln(SOURCE(func));
2297  atom_t tokens1 = tokenize( SOURCE(func), TOKENIZE_DATA );
2298  if( IS_ERROR(tokens1) ) return tokens1;
2299  //printf("TOKENS1="); dumpln(tokens1);
2300 
2301  atom_t tokens2 = tokenize( tokens1, TOKENIZE_COMMANDS );
2302  DEUSE( tokens1 );
2303  if( IS_ERROR(tokens2) ) return tokens2;
2304  //printf("TOKENS2="); dumpln(tokens2);
2305 
2306  // Step 2. Extract TO..END's
2307  atom_t body = preparse( tokens2, func, LEVEL(func) );
2308  if( IS_ERROR(body) ) return body;
2309  DEUSE( BODY(func) );
2310  BODY(func) = body;
2311  //printf("BODY="); dumpln(BODY(func));
2312  }
2313 
2314 
2315  // Step 3. Parse function body
2316  //printf("BODY="); dumpln(BODY(func));
2317  atom_t tree = parse( BODY(func), func, 1 );
2318  if( IS_ERROR(tree) ) return tree;
2319  DEUSE(TREE(func));
2320  TREE(func) = tree;
2321  //printf("TREE="); dumpln(TREE(func));
2322 
2323  // because the might be some new TO..ENDs
2324  // scan all locals and build those which
2325  // have no trees
2326  atom_t local;
2327  atom_t locals;
2328  for( locals=LOCALS(func); IS_NOT_EMPTY(locals); locals=CDR(locals) )
2329  {
2330  local = CAR(locals);
2331  if( !DESCR2(local) ) continue;
2332  atom_t x = build_syntax_tree( local );
2333  if( IS_ERROR(x) ) return x;
2334  }
2335 
2336  return empty_list;
2337 }
2338 
2339 
2340 
2341 //===================================================
2355 //===================================================
2357 {
2358  //printf("BUILD_SYNTAX_SUBTREE(");
2359  //dump(NAME(function));
2360  //printf(",");
2361  //dump(source);
2362  //printf(")\n\n");
2363 
2364  // tokenize
2365  atom_t tokens1 = tokenize( source, TOKENIZE_DATA );
2366  if( IS_ERROR(tokens1) ) return tokens1;
2367  //printf("tokens1="); dumpln(tokens1);
2368 
2369  atom_t tokens2 = tokenize( tokens1, TOKENIZE_COMMANDS );
2370  DEUSE( tokens1 );
2371  if( IS_ERROR(tokens2) ) return tokens2;
2372  //printf("tokens2="); dumpln(tokens2);
2373 
2374  // extract TO..END's
2375  atom_t body = preparse( tokens2, function, LEVEL(function) );
2376  if( IS_ERROR(body) ) return body;
2377  //printf("body="); dumpln(body);
2378 
2379  // parse function body
2380  atom_t tree = parse( body, function, 1 );
2381  if( IS_ERROR(tree) ) return tree;
2382  //printf("tree="); dumpln(tree);
2383 
2384  DEUSE( tokens2 );
2385  //DEUSE( body );
2386  return tree;
2387 }

[ HOME | INDEX | ATOMS | VARS | REFERENCE ]
Lhogho Developer's Documentation
Wed Jul 10 2013