构造使用类C语言的脚本引擎(4)作者 :kevin_qing
转贴请注明
同上一章一样,语法分析部分也不准备使用yacc直接生成代码,而是只使用yacc的生成的分析表。
BNF:
%token ID IF ELSE SWITCH GOTO RETURN NUMBER STRING MAIN
%token ADD_OP MUL_OP ASSIGN_OP CMP_OP LOGIC_OP1 LOGIC_OP2
%token LPAREN RPAREN LBRACE RBRACE COLON COMMA SEMI
%%
PROG
: MAIN LPAREN RPAREN CPD_SMT
;
CPD_SMT
: LBRACE SMT_SEQ RBRACE
;
SMT_SEQ
: LB_SMT
| SMT_SEQ LB_SMT
;
LABLE : ID COLON
;
LB_SMT
: SMT
| LABLE SMT
;
SMT
: EXPR SEMI
| SEMI
| GOTO ID SEMI
| RETURN SEMI
| IF LPAREN EXPR RPAREN SMT
| IF LPAREN EXPR RPAREN SMT ELSE SMT
| SWITCH LPAREN EXPR RPAREN LBRACE CASES RBRACE
| CPD_SMT
;
CASE : NUMBERS COLON SMT
;
CASES : CASE
| CASES CASE
;
NUMBERS : ADD_OP NUMBER
| NUMBER
;
STRINGS : STRING
| STRINGS STRING
;
PAM_LIST
: EXPR
| PAM_LIST COMMA EXPR
;
PRI_EXPR
: ID
| NUMBERS
| STRINGS
| LPAREN EXPR RPAREN
;
POST_EXPR
: PRI_EXPR
| ID LPAREN RPAREN
| ID LPAREN PAM_LIST RPAREN
;
MUL_EXPR
: POST_EXPR
| MUL_EXPR MUL_OP POST_EXPR
;
ADD_EXPR
: MUL_EXPR
| ADD_EXPR ADD_OP MUL_EXPR
;
CMP_EXPR
: ADD_EXPR
| ADD_EXPR CMP_OP ADD_EXPR
;
LOGIC_EXPR
: CMP_EXPR
| LOGIC_EXPR LOGIC_OP2 CMP_EXPR
| LOGIC_OP1 CMP_EXPR
;
EXPR
: LOGIC_EXPR
| ID ASSIGN_OP EXPR
;
%%
使用bison++ -o o -v bnf.txt编译后生成o.output和其他文件
我只使用o.output的输出.
o.output的部分内容
State 69 contains 1 shift/reduce conflict. //69条有一个关于悬挂else的冲突,这里yacc默认的状态转移已经是正确的了,就不需要再做其他处理
Grammar
语法规则,//我通过对此的识别生成规约函数
---------------
rule 1 PROG -> MAIN LPAREN RPAREN CPD_SMT
rule 2 CPD_SMT -> LBRACE SMT_SEQ RBRACE
rule 3 SMT_SEQ -> LB_SMT
--------------
状态转移,程序主要通过对状态转移行的识别来构建分析表.
左边为输入的标志符,右边为动作。
------------------------
state 0
MAIN shift, and go to state 1
PROG go to state 80
-------------------------
动作可能有4种
shift, and go to state [n]
reduce using rule [n] ...
go to state [n]
accept
通过状态转移表构造分析表。
在这里我是写了一个小程序实现的自动生成分析表和规约函数。
LPCSTR LableName[]={ "ID","IF","ELSE","SWITCH","GOTO","RETURN","NUMBER","STRING","MAIN",
"ADD_OP","MUL_OP","ASSIGN_OP","CMP_OP","LOGIC_OP1","LOGIC_OP2",
"LPAREN","RPAREN","LBRACE","RBRACE","COLON","COMMA","SEMI",
"PROG","CPD_SMT","SMT_SEQ","LABLE","LB_SMT","SMT","CASE","CASES",
"NUMBERS","STRINGS","PAM_LIST","PRI_EXPR","POST_EXPR","MUL_EXPR","ADD_EXPR","CMP_EXPR","LOGIC_EXPR","EXPR","$END"
};
#define CNT (sizeof(LableName)/sizeof(LableName[0]))
char* getToken(char* &p){
while(*p){
if((*p==' ')||(*p=='\t')){
++p;
}else{
char *r=p;
while(*p){
if(('\n'==*p)||('\t'==*p)|| (' '==*p)){
*p=0;
++p;
break;
}else{
++p;
}
}
return r;
}
};
return NULL;
}
bool sm(LPCSTR s,LPCSTR d){
if(s&&d){
return 0==strcmp(s,d);
}
return false;
}
#define m(x) (sm(getToken(p),x))
void yy2ini(){
FILE* fp=fopen("lalr分析表.txt","r");
FILE* fpo=fopen("lalr.ini","w");
FILE* fp1=fopen("Rfun.c","w");
char tmp[4096];
while(fgets(tmp,4096,fp)){
char* p=tmp;
char* r=getToken(p);
int i;
if(sm(r,"state")){
r=getToken(p);
if(r)
{
i=atoi(r);
fprintf(fpo,"[S%d]\n",i);
}
}else if(sm(r,"rule")){
char* r1=getToken(p);
if(r1){
int n=atoi(r1);
char* tmp[1024];
int i=0;
r=getToken(p);
if(r)if(m("->")){
while(tmp[i]=getToken(p))++i;
fprintf(fp1,"//Rule[%d] %s -> ",n,r);
for(int j=0;j<i;++j)
fprintf(fp1,"%s ",tmp[j]);
fprintf(fp1,
"\nvoid Reduce%d(){\n",n);
for(int j=0;j<i;++j)
fprintf(fp1,
" assert(TokenStack[%d]->tokenType==%s);\n",-j-1,tmp[i-j-1]);
if(i>1)
fprintf(fp1,
" TokenStack.pop(%d);\n",i-1);
fprintf(fp1,
" TokenStack[-1]->tokenType=%s;\n"
" StateStack.pop(%d);\n"
"}\n\n",r,i);
}
}
}else{
char* r1=getToken(p);
if(sm(r1,"shift,"))
{
if(m("and")&&m("go")&&m("to")&&m("state")){
r1=getToken(p);
if(r1){
i=atoi(r1);
fprintf(fpo,"%s=S%d\n",r,i);
}
}
}else if(sm(r1,"go")){
if(m("to")&&m("state")){
r1=getToken(p);
if(r1){
i=atoi(r1);
fprintf(fpo,"%s=G%d\n",r,i);
}
}
}else if(sm(r1,"reduce"))//reduce using rule
{
if(m("using")&&m("rule")){
r1=getToken(p);
if(r1){
i=atoi(r1);
fprintf(fpo,"%s=R%d\n",r,i);
}
}
}
}
}
fclose(fp);
fclose(fpo);
fprintf(fp1,"typedef void REDUCE();\n"
"REDUCE* Reduce[]={\n");
for(int i=1;i<44;++i)
fprintf(fp1,"\t&Reduce%d,\n",i);
fprintf(fp1,"};\n");
fclose(fp1);
}
主程序(lalr分析)
void main(){
SCCompiler sc;
sc.init();
sc.open("test.sc");
StateStack.push(0);
TokenStack.push(new Token(START,0));
while(1)
try{
Token*p=sc.getToken();
// printf("Get %s @%d\n",LableName[p->tokenType],p->line);
rescan:
if((StateStack[ST_TOP]==80)&&p->tokenType==ENDOFSTREAM)
break;
uint32_t r=LalrTab[StateStack[ST_TOP]][p->tokenType&0xffff];
switch(r&0xf0000){
case SMASK:
TokenStack.push(p);
StateStack.push(r&0xffff);
break;
case RMASK:{
uint32_t id=r;
do{
id&=0xffff;
Reduce[id-1]();
id=LalrTab[StateStack[ST_TOP]][TokenStack[ST_TOP]->tokenType&0xffff];
}while(ISR(id));
if(ISG(id)){
StateStack.push(id&0xffff);
//StateStack[0]=(id&0xffff);
goto rescan;
}else
assert(false);
}
default:
assert(false);
}
}catch(int err){ printf("EOF %d",err);break; }
printf("Accept\n");
printf("StateStack %d\n",StateStack[ST_TOP]);