在网上,看到还是有部分程序爱好者希望能编出自己的编译器.当然,这的确是件难事,许多人都说要去看什么编译原理和精通汇编语言,结果让这些爱好者都望而却步.但是,当我们亲手去做做后,发现要做一个简单的程序解释器(就像Java和Basic)那样,还是挺容易的.你根本不用去看那些东西,只要你懂C语言,在看了本文后,就可以完成那样的解释器.
在网上,有许多大型C语言,Perl语言的编译器源代码.但当你下载后看看,竟发现点都看不懂.其实那些东西还是不看为妙.看了本文后,我相信你宁愿自己动手编,也不愿意去领会那些庞大的源代码了.
少说费话了,我们开始讲解.
这一篇Basic解释器的代码.十分经典.而且十分简单化.
get_token()是词汇提取,譬如 PRINT A+B
通过调用一次get_token(),就在 字符串token里装上PRINT
再调用一次get_token(),token里就装上A
再调用一次get_token(),token里就装上+
再调用一次get_token(),token里就装上B
很简单吧!
putback()是将prog指针向回移动一格.
其中包含了词发分析和十分关键的代数式求值get_exp(int *result)
关于它的代数式求值get_exp(int *result),用到递归函数
void get_exp(),level2(),level3(),level4(),level5();
void level6(),primitive(),arith(),unary();
,确实难看懂,不过你尽管拿来用就是了.
话不多说,你看源代码就是了.最后,我将给你看看C++中完整的源代码
/*
recursive descent parser for integer expression
which may include variables
*/
#include <stdio.h>
#include <setjmp.h>
#include <math.h>
#include <ctype.h>
#include <stdlib.h>
#define DELIMITER 1
#define VARIABLE 2
#define NUMBER 3
#define COMMAND 4
#define STRING 5
#define QUOTE 6
#define EOL 9
#define FINISHED 10
extern char *prog; /* holds expression to be analyzed */
extern jmp_buf e_buf; /* hold enviroment */
extern int variables[26]; /* variables */
extern struct commands {
char command[20];
char tok;
} table[];
extern char token[80]; /* holds string representation of token */
extern char token_type; /* contains type of token */
extern char tok; /* holds the internal representation of token */
void get_exp(),level2(),level3(),level4(),level5();
void level6(),primitive(),arith(),unary();
void serror(),putback();
/* entry point into parser */
void get_exp(int *result)
{
get_token();
if (!*token) {
serror(2);
return;
}
level2(result);
putback(); /*return last token read to input stream */
}
/* add or subtract two terms */
void level2(int *result)
{
register char op;
int hold;
level3(result);
while ((op = *token) =='+' || op == '-') {
get_token();
level3(&hold);
arith(op,result,&hold);
}
}
/* multiply or divide two factors */
void level3(int *result)
{
register char op;
int hold;
level4(result);
while ((op = *token) == '*' || op == '/' || op == '%') {
get_token();
level3(&hold);
arith(op,result,&hold);
}
}
/* process integer exponent */
void level4(int *result)
{
register char op;
int hold;
level5(result);
if (*token == '^') {
get_token();
level4(&hold);
arith(op,result,&hold);
}
}
/* is a unary + or - */
void level5(int *result)
{
register char op;
op = 0;
if ((token_type==DELIMITER) && *token == '+' || *token == '-' ) {
op = *token;
get_token();
}
level6(result);
if (op) unary(op,result);
}
/* process parenthesized expression */
void level6(int *result)
{
if ((*token == '(') && (token_type == DELIMITER)) {
get_token();
level2(result);
if (*token!=')')
serror(1);
get_token();
}
else
primitive(result);
}
/* find value of number or variable */
void primitive(int *result)
{
switch (token_type) {
case VARIABLE:
*result = find_var(token);
get_token();
return;
case NUMBER:
*result = atoi(token);
get_token();
return;
default:
serror(0);
}
}
/* perform the specified arithmetic */
void arith(char o,int *r,int *h)
{
register int t,ex;
switch (o) {
case '-':
*r = *r-*h;
break;
case '+':
*r = *r+*h;
break;
case '*':
*r = *r**h;
break;
case '/':
*r = (*r)/(*h);
break;
case '%':
*r = (*r)%(*h);
break;
case '^':
ex = *r;
if (*h==0) {
*r = 1;
break;
}
for (t=*h-1;t>0;--t) *r=(*r)*ex;
break;
}
}
/* reverse the sign */
void unary(char o,int *r)
{
if (o=='-') *r = -(*r);
}
/* find the value of a variable */
int find_var(char *s)
{
if (!isalpha(*s)) {
serror(4); /* not a variable */
return 0;
}
return variables[toupper(*token)-'A'];
}
/* display an error message */
void serror(int error)
{
char *e[] = {
"syntax error",
"unbalanced parentheses",
"no expression present",
"equal sign expected",
"not a variable",
"label table full",
"duplicate label",
"undefined label",
"THEN expected",
"TO expected",
"too many nested FOR loops",
"NEXT without FOR",
"too many nested GOSUB",
"RETURN without GOSUB"
};
printf ("%s\n",e[error]);
longjmp(e_buf,1); /* return to save point */
}
/* get a token */
get_token()
{
register char *temp;
token_type = 0;tok = 0;
temp = token;
if (*prog == '\0') { /* end of file */
*token = 0;
tok = FINISHED;
return (token_type = DELIMITER);
}
while (iswhite(*prog)) ++prog; /* skip over white space */
if (*prog == '\r') { /* CR LF */
++prog;++prog;
tok = EOL;*token = '\r';
token[1] = '\n';token[2] = 0;
return (token_type = DELIMITER);
}
if (strchr("+-*^/%=;(),><",*prog)) { /* delimiter */
*temp = *prog;
prog++; /* advance to next position */
temp++;
*temp=0;
return (token_type = DELIMITER);
}
if (*prog == '"') { /* quote string */
prog++;
while (*prog!='"'&&*prog!='\r') *temp++=*prog++;
if (*prog=='\r') serror(1);
prog++;*temp=0;
return (token_type = QUOTE);
}
if (isdigit(*prog)) { /* number */
while (!isdelim(*prog)) *temp++=*prog++;
*temp = '\0';
return (token_type = NUMBER);
}
if (isalpha(*prog)) { /* var or command */
while (!isdelim(*prog)) *temp++=*prog++;
token_type = STRING;
}
*temp = '\0';
/* see if a string is a command or a variable */
if (token_type == STRING) {
tok = look_up(token); /* convert to internal rep */
if (!tok) token_type = VARIABLE;
else token_type = COMMAND; /* is a command */
}
return token_type;
}
/* return a token to input stream */
void putback()
{
char *t;
t = token;
for (;*t;t++) prog--;
}
look_up(char *s)
{
register int i,j;
char *p;
/* convert to lowercase */
p = s;
while (*p) { *p = tolower(*p); p++; }
/* see if token is in table */
for (i=0;*table[i].command;i++)
if (!strcmp(table[i].command,s)) return table[i].tok;
return 0; /* unknown command */
}
/* return true if c is a delimiter */
isdelim(char c)
{
if (strchr(";,+-<>/*%^=() ",c)||c==9||c=='\r'||c==0)
return 1;
return 0;
}
iswhite (char c)
{
if (c==' '||c=='\t') return 1;
else return 0;
}