/*****************************C_minus词法分析器 1.0版 *********************************
*作者:温铭
*Email: moonbingbing@gmail.com
*版权所有(C) 2005.11
***************************************************************************************/
#include<stdio.h>
#include<ctype.h>
#include<stdlib.h>
#include<string.h>
//****************************************
char * change(char *ps,char *pt); //处理路径中的反斜杠问题。因为在字符串中要用\\表示\
int searchkey(char *word,struct key tab[],int n);//二分法查找关键字
int searchsymbol(char c,struct symbol tab[],int n);
void getword(int c,FILE * p);
//****************************************
//用到的结构数组:
struct key{ //关键字表
char* word;
int value;
}keytab[] = {
"else", 0,
"if", 1,
"int", 2,
"return",3,
"void", 4,
"while", 5,
};
//****
struct symbol{ //符号表
char c;
int value;
}symboltab[] = {
'(', 0,
')', 1,
'*', 2,
'+', 3,
',', 4,
'-', 5,
'/', 6,
';', 7,
'<', 8,
'=', 9,
'>', 10,
'[', 11,
']', 12,
'{', 13,
'}', 14,
};
//*****************************************
//用到的常量
enum{MAX = 50,
NKEYS = sizeof keytab / sizeof keytab[0],
NSYMBOL = sizeof symboltab / sizeof symboltab[0]
};
//*****************************************
//用到的全局变量
int flagnum = 0; //用来防止出现10t这种情况被当作数字处理。这种情况程序报错
int countnum = 0;
int countid = 0;
int countfault = 0;
int type[] = {0,1,2,3,4}; //词法分析中的类型 依次为 关键字,数字,id,符号
char array[MAX]; //存放getword中的字符串
//*****************************************
main()
{
int c;
int flag; //判断搜索函数是否成功返回
char s[MAX]; //数组s,t用来存放读取文件的路径
char t[2 * MAX];
char *ps = s;
char *pt = t;
FILE * p = NULL;
printf("input the path of the file\n");
scanf("%s",s);
p = fopen( change(ps,pt),"r" ); //打开文件
if( p == NULL ){ //如果输入的文件路径不对或文件不存在
printf("open fail!\n");
exit(0);
}
printf("data \t (type,value)\n");
while( ( c = fgetc(p) ) != EOF ){
if ( isspace(c) ) // 如果是空白字符
continue;
else if ( isalpha(c) ){
getword(c,p);
flag = searchkey(array,keytab,NKEYS);
if ( flag >= 0 ) //如果是关键字
printf("%s\t(%d,%d)\n",array,type[0],flag);
else{ //如果以字母开头,但不是关键字
printf("%s\t(%d,%d)\n",array,type[2],countid);
countid ++;
}
}else if ( isdigit(c) ){ //如果是数字
getword(c,p);
if ( flagnum == 0 )
printf("%s\t(%d,%d)\n",array,type[1],countnum);
else
printf("%s\t(%d,%d)\t illegal input \n",array,type[4],countfault);
}else if ( ispunct(c) ){ //如果是符号
flag = searchsymbol(c,symboltab,NSYMBOL);
if ( flag >= 0 )
printf("%c\t(%d,%d)\n",c,type[3],flag);
else{
printf("%c\t(%d,%d)\n",c,type[4],countfault); //出错处理
countfault ++;
}
}else{
printf("%c\t(%d,%d)\n",c,type[4],countfault); //出错处理
countfault ++;
}
}
return 0;
}
//*******************************************
char * change(char *ps,char *pt) /*处理反斜杠的问题*/
{
char *p = pt;
char c;
while( (c = *pt++ = *ps++) != '\0' )
if( c == '\\' )
*pt = '\\';
return p;
}
//******************************************
int searchkey(char *word,struct key tab[],int n)
{
int cond;
int low,high,mid;
low = 0;
high = n -1;
while ( low <= high ){
mid = (low +high) / 2;
if ( ( cond = strcmp(word,tab[mid].word) ) < 0 )
high = mid - 1;
else if ( cond > 0 )
low = mid + 1;
else
return mid;
}
return -1;
}
//**********************************************
int searchsymbol(char c,struct symbol tab[],int n)
{
int low,high,mid;
low = 0;
high = n -1;
while ( low <= high ){
mid = (low +high) / 2;
if ( c < tab[mid].c)
high = mid - 1;
else if(c > tab[mid].c)
low = mid + 1;
else
return mid;
}
return -1;
}
//*******************************************
void getword(int c,FILE * p)
{
int i = 0;
array[i] = c;
while( (c = fgetc(p)) != ' ' && c != '\n'){
if ( isalpha(c) | ispunct(c) ) //如果数字中有字母或字符,则报错
flagnum = 1;
array[++i] = c;
}
array[++i] = '\0';
}