/* * Copyright (c) 1997 David I. Bell * Permission is granted to use, distribute, or modify this source, * provided that this copyright notice remains intact. * * Read input file characters into tokens */ #include #include #include "calc.h" #include "token.h" #include "string.h" #include "args.h" #include "math_error.h" #define isletter(ch) ((((ch) >= 'a') && ((ch) <= 'z')) || \ (((ch) >= 'A') && ((ch) <= 'Z'))) #define isdigit(ch) (((ch) >= '0') && ((ch) <= '9')) #define issymbol(ch) (isletter(ch) || isdigit(ch) || ((ch) == '_')) #define isoctal(ch) (((ch) >= '0') && ((ch) <= '7')) #define STRBUFSIZE 1024 /* * Current token. */ static struct { short t_type; /* type of token */ char *t_sym; /* symbol name */ long t_strindex; /* index of string value */ long t_numindex; /* index of numeric value */ } curtoken; static BOOL rescan; /* TRUE to reread current token */ static BOOL newlines; /* TRUE to return newlines as tokens */ static BOOL allsyms; /* TRUE if always want a symbol token */ static STRINGHEAD strings; /* list of constant strings */ static char *numbuf; /* buffer for numeric tokens */ static long numbufsize; /* current size of numeric buffer */ long errorcount = 0; /* number of compilation errors */ /* * Table of keywords */ struct keyword { char *k_name; /* keyword name */ int k_token; /* token number */ }; static struct keyword keywords[] = { {"if", T_IF}, {"else", T_ELSE}, {"for", T_FOR}, {"while", T_WHILE}, {"do", T_DO}, {"continue", T_CONTINUE}, {"break", T_BREAK}, {"goto", T_GOTO}, {"return", T_RETURN}, {"local", T_LOCAL}, {"global", T_GLOBAL}, {"static", T_STATIC}, {"switch", T_SWITCH}, {"case", T_CASE}, {"default", T_DEFAULT}, {"quit", T_QUIT}, {"exit", T_QUIT}, {"define", T_DEFINE}, {"read", T_READ}, {"show", T_SHOW}, {"help", T_HELP}, {"write", T_WRITE}, {"mat", T_MAT}, {"obj", T_OBJ}, {"print", T_PRINT}, {"cd", T_CD}, {"undefine", T_UNDEFINE}, {"abort", T_ABORT}, {NULL, 0} }; static void eatcomment(void); static void eatstring(int quotechar); static int eatsymbol(void); static int eatnumber(void); /* * Initialize all token information. */ void inittokens(void) { initstr(&strings); newlines = FALSE; allsyms = FALSE; rescan = FALSE; setprompt(conf->prompt1); } /* * Set the new token mode according to the specified flag, and return the * previous value of the flag. */ int tokenmode(int flag) { int oldflag; oldflag = TM_DEFAULT; if (newlines) oldflag |= TM_NEWLINES; if (allsyms) oldflag |= TM_ALLSYMS; newlines = FALSE; allsyms = FALSE; if (flag & TM_NEWLINES) newlines = TRUE; if (flag & TM_ALLSYMS) allsyms = TRUE; setprompt(newlines ? conf->prompt1 : conf->prompt2); return oldflag; } /* * Routine to read in the next token from the input stream. * The type of token is returned as a value. If the token is a string or * symbol name, information is saved so that the value can be retrieved. */ int gettoken(void) { int ch; /* current input character */ int type; /* token type */ if (rescan) { /* rescanning */ rescan = FALSE; return curtoken.t_type; } curtoken.t_sym = NULL; curtoken.t_strindex = 0; curtoken.t_numindex = 0; type = T_NULL; while (type == T_NULL) { ch = nextchar(); if (allsyms && ((ch!=' ') && (ch!=';') && (ch!='"') && (ch!='\n'))) { reread(); type = eatsymbol(); break; } switch (ch) { case ' ': case '\t': case '\0': break; case '\n': if (newlines) type = T_NEWLINE; break; case EOF: type = T_EOF; break; case '{': type = T_LEFTBRACE; break; case '}': type = T_RIGHTBRACE; break; case '(': type = T_LEFTPAREN; break; case ')': type = T_RIGHTPAREN; break; case '[': type = T_LEFTBRACKET; break; case ']': type = T_RIGHTBRACKET; break; case ';': type = T_SEMICOLON; break; case ':': type = T_COLON; break; case ',': type = T_COMMA; break; case '?': type = T_QUESTIONMARK; break; case '@': type = T_AT; break; case '`': type = T_BACKQUOTE; break; case '$': type = T_DOLLAR; break; case '"': case '\'': type = T_STRING; eatstring(ch); break; case '^': switch (nextchar()) { case '=': type = T_POWEREQUALS; break; default: type = T_POWER; reread(); } break; case '=': switch (nextchar()) { case '=': type = T_EQ; break; default: type = T_ASSIGN; reread(); } break; case '+': switch (nextchar()) { case '+': type = T_PLUSPLUS; break; case '=': type = T_PLUSEQUALS; break; default: type = T_PLUS; reread(); } break; case '-': switch (nextchar()) { case '-': type = T_MINUSMINUS; break; case '=': type = T_MINUSEQUALS; break; case '>': type = T_ARROW; break; default: type = T_MINUS; reread(); } break; case '*': switch (nextchar()) { case '=': type = T_MULTEQUALS; break; case '*': switch (nextchar()) { case '=': type = T_POWEREQUALS; break; default: type = T_POWER; reread(); } break; default: type = T_MULT; reread(); } break; case '/': switch (nextchar()) { case '/': switch (nextchar()) { case '=': type = T_SLASHSLASHEQUALS; break; default: reread(); type = T_SLASHSLASH; break; } break; case '=': type = T_DIVEQUALS; break; case '*': eatcomment(); break; default: type = T_DIV; reread(); } break; case '%': switch (nextchar()) { case '=': type = T_MODEQUALS; break; default: type = T_MOD; reread(); } break; case '<': switch (nextchar()) { case '=': type = T_LE; break; case '<': switch (nextchar()) { case '=': type = T_LSHIFTEQUALS; break; default: reread(); type = T_LEFTSHIFT; break; } break; default: type = T_LT; reread(); } break; case '>': switch (nextchar()) { case '=': type = T_GE; break; case '>': switch (nextchar()) { case '=': type = T_RSHIFTEQUALS; break; default: reread(); type = T_RIGHTSHIFT; break; } break; default: type = T_GT; reread(); } break; case '&': switch (nextchar()) { case '&': type = T_ANDAND; break; case '=': type = T_ANDEQUALS; break; default: type = T_AND; reread(); break; } break; case '|': switch (nextchar()) { case '|': type = T_OROR; break; case '=': type = T_OREQUALS; break; default: type = T_OR; reread(); break; } break; case '!': switch (nextchar()) { case '=': type = T_NE; break; default: type = T_NOT; reread(); break; } break; case '#': switch(nextchar()) { case '=': type = T_HASHEQUALS; break; default: type = T_HASH; reread(); } break; case '~': switch (nextchar()) { case '=': type = T_TILDEEQUALS; break; default: type = T_TILDE; reread(); } break; case '\\': switch (nextchar()) { case '\n': setprompt(conf->prompt2); break; case '=': type = T_BACKSLASHEQUALS; break; default: type = T_BACKSLASH; reread(); } break; default: if (isletter(ch) || ch == '_') { reread(); type = eatsymbol(); break; } if (isdigit(ch) || (ch == '.')) { reread(); type = eatnumber(); break; } scanerror(T_NULL, "Unknown token character '%c'", ch); } } curtoken.t_type = (short)type; return type; } /* * Continue to eat up a comment string. * The leading slash-asterisk has just been scanned at this point. */ static void eatcomment(void) { int ch; setprompt(conf->prompt2); for (;;) { ch = nextchar(); if (ch == '*') { ch = nextchar(); if (ch == '/') break; reread(); } if (ch == EOF || ch == '\0') { fprintf(stderr, "Unterminated comment ignored\n"); reread(); break; } } setprompt(conf->prompt1); } /* * Read in a string and add it to the literal string pool. * The leading single or double quote has been read in at this point. */ static void eatstring(int quotechar) { register char *cp; /* current character address */ int ch, cch; /* current character */ int i; /* index */ char buf[STRBUFSIZE]; /* buffer for string */ long len; /* length in buffer */ long totlen; /* total length, including '\0' */ char *str; BOOL done; str = buf; totlen = 0; done = FALSE; while (!done) { cp = buf; len = 0; while (!done && len < STRBUFSIZE) { ch = nextchar(); switch (ch) { case '\n': if (!newlines) break; case EOF: reread(); scanerror(T_NULL, "Unterminated string constant"); done = TRUE; ch = '\0'; break; case '\\': ch = nextchar(); if (isoctal(ch)) { ch = ch - '0'; for (i = 2; i > 0; i--) { cch = nextchar(); if (!isoctal(cch)) break; ch = 8 * ch + cch - '0'; } ch &= 0xff; if (i > 0) reread(); break; } switch (ch) { case 'n': ch = '\n'; break; case 'r': ch = '\r'; break; case 't': ch = '\t'; break; case 'b': ch = '\b'; break; case 'f': ch = '\f'; break; case 'v': ch = '\v'; break; case 'a': ch = '\007'; break; case 'e': ch = '\033'; break; case '\n': setprompt(conf->prompt2); continue; case EOF: reread(); continue; case 'x': ch = 0; for (i = 2; i > 0; i--) { cch = nextchar(); if (isdigit(cch)) ch = 16 * ch + cch - '0'; else if (cch >= 'a' && cch <= 'f') ch = 16 * ch + 10 + cch - 'a'; else if (cch >= 'A' && cch <= 'F') ch = 16 * ch + 10 + cch - 'A'; else break; } if (i > 0) reread(); } break; case '"': case '\'': if (ch == quotechar) { for (;;) { ch = nextchar(); if (ch != ' ' && ch != '\t' && (ch != '\n' || newlines)) break; } if (ch == '"' || ch == '\'') { quotechar = ch; continue; } reread(); done = TRUE; ch = '\0'; } break; } *cp++ = (char) ch; len++; } if (!done || totlen) { if (totlen) str = (char *) realloc(str, totlen + len); else str = (char *) malloc(len); if (str == NULL) { math_error("Out of memory for reading tokens"); /*NOTREACHED*/ } memcpy(str + totlen, buf, len); totlen += len; len = 0; } } curtoken.t_strindex = addstring(str, totlen + len); if (str != buf) free(str); } /* * Read in a symbol name which may or may not be a keyword. * If allsyms is set, keywords are not looked up and almost all chars * will be accepted for the symbol. Returns the type of symbol found. */ static int eatsymbol(void) { register struct keyword *kp; /* pointer to current keyword */ register char *cp; /* current character pointer */ int ch; /* current character */ int cc; /* character count */ static char buf[SYMBOLSIZE+1]; /* temporary buffer */ cp = buf; cc = SYMBOLSIZE; if (allsyms) { for (;;) { ch = nextchar(); if ((ch == ' ') || (ch == ';') || (ch == '\n')) break; if (cc-- > 0) *cp++ = (char)ch; } reread(); *cp = '\0'; if (cc < 0) scanerror(T_NULL, "Symbol too long"); curtoken.t_sym = buf; return T_SYMBOL; } for (;;) { ch = nextchar(); if (!issymbol(ch)) break; if (cc-- > 0) *cp++ = (char)ch; } reread(); *cp = '\0'; if (cc < 0) scanerror(T_NULL, "Symbol too long"); for (kp = keywords; kp->k_name; kp++) if (strcmp(kp->k_name, buf) == 0) return kp->k_token; curtoken.t_sym = buf; return T_SYMBOL; } /* * Read in and remember a possibly numeric constant value. * The constant is inserted into a constant table so further uses * of the same constant will not take more memory. This can also * return just a period, which is used for element accesses and for * the old numeric value. */ static int eatnumber(void) { register char *cp; /* current character pointer */ long len; /* parsed size of number */ long res; /* result of parsing number */ if (numbufsize == 0) { numbuf = (char *)malloc(128+1); if (numbuf == NULL) math_error("Cannot allocate number buffer"); numbufsize = 128; } cp = numbuf; len = 0; for (;;) { if (len >= numbufsize) { cp = (char *)realloc(numbuf, numbufsize + 1001); if (cp == NULL) { math_error("Cannot reallocate number buffer"); /*NOTREACHED*/ } numbuf = cp; numbufsize += 1000; cp = &numbuf[len]; } *cp = nextchar(); *(++cp) = '\0'; if ((numbuf[0] == '.') && isletter(numbuf[1])) { reread(); return T_PERIOD; } res = qparse(numbuf, QPF_IMAG); if (res < 0) { reread(); scanerror(T_NULL, "Badly formatted number"); curtoken.t_numindex = addnumber("0"); return T_NUMBER; } if (res != ++len) break; } cp[-1] = '\0'; reread(); if ((numbuf[0] == '.') && (numbuf[1] == '\0')) { curtoken.t_numindex = 0; return T_OLDVALUE; } cp -= 2; res = T_NUMBER; if ((*cp == 'i') || (*cp == 'I')) { *cp = '\0'; res = T_IMAGINARY; } curtoken.t_numindex = addnumber(numbuf); return (int)res; } /* * Return the index for string value of the current token. */ long tokenstring(void) { return curtoken.t_strindex; } /* * Return the constant index of a numeric token. */ long tokennumber(void) { return curtoken.t_numindex; } /* * Return the address of a symbol */ char * tokensymbol(void) { return curtoken.t_sym; } /* * Push back the token just read so that it will be seen again. */ void rescantoken(void) { rescan = TRUE; } /* * Describe an error message. * Then skip to the next specified token (or one more powerful). */ void scanerror(int skip, char *fmt, ...) { va_list ap; char *name; /* name of file with error */ char buf[MAXERROR+1]; /* count the error */ errorcount++; /* print the error message */ name = inputname(); if (name) fprintf(stderr, "\"%s\", line %ld: ", name, linenumber()); va_start(ap, fmt); vsprintf(buf, fmt, ap); va_end(ap); fprintf(stderr, "%s\n", buf); /* bail out if continuation not permitted */ if ((!c_flag && !stoponerror) || stoponerror > 0) longjmp(jmpbuf, 1); /* bail out if too many errors */ if (conf->maxscancount > 0 && errorcount > conf->maxscancount) { fputs("Too many scan errors, compilation aborted.\n", stderr); longjmp(jmpbuf, 1); /*NOTREACHED*/ } /* post-error report processing */ switch (skip) { case T_NULL: return; case T_COMMA: rescan = TRUE; for (;;) { switch (gettoken()) { case T_NEWLINE: case T_SEMICOLON: case T_LEFTBRACE: case T_RIGHTBRACE: case T_EOF: case T_COMMA: rescan = TRUE; return; } } default: fprintf(stderr, "Unknown skip token for scanerror\n"); /* fall into semicolon case */ /*FALLTHRU*/ case T_SEMICOLON: rescan = TRUE; for (;;) switch (gettoken()) { case T_NEWLINE: case T_SEMICOLON: case T_LEFTBRACE: case T_RIGHTBRACE: case T_EOF: rescan = TRUE; return; } } } /* END CODE */