/******************************************/
/* File:Unit2.h (scan.h) */
/* A Scanner for lexical analysis for C++ */
/* Author:zhanjiantao(compower) */
/******************************************/
#include <vcl.h>
#define MAXNO 48
typedef struct TypeToken
{
int line;
AnsiString words;
AnsiString type;
}aToken;
typedef aToken *Listnd;
class scan
{
public:
bool IsReserveWord(AnsiString Token);
void InitRW();
void DoScan(char *infile);
void Print(int lineno, AnsiString stoken, int strgrdl, AnsiString Type);
void MakeTL(int line,AnsiString words,AnsiString type);
void Compress(char *zipFname);
public:
char *file;
char ch;
AnsiString strToken;
AnsiString ReserveWs[MAXNO];
TList *TokenList;
Listnd Anode;
};
#endif
/******************************************/
/* File:Unit2.cpp (scan.cpp) */
/* A Scanner for lexical analysis for C++ */
/* Author:zhanjiantao(compower) */
/******************************************/
#include "Unit2.h"
#include <string.h>
#include <fstream.h>
//initiate Reserved Words list
void scan::InitRW()
{
const int buflen = 10;
char buf[buflen];
AnsiString gotWord = "";
ifstream iniRw("InitRW.ini");
int i = 0;
while(iniRw.getline(buf,buflen))
{
gotWord = buf;
ReserveWs[i++] = gotWord;
}
iniRw.close();
TokenList = new TList;
}
//judge the ch in the RW list or not
bool scan::IsReserveWord(AnsiString Token)
{
bool result = false;
int low = 0;
int high = MAXNO-1;
while(low<=high)
{
int mid = (low + high)/2;
int rsComp = Token.AnsiCompare(ReserveWs[mid]);
if(rsComp==0)
{
result = true;
break;
}
if(rsComp<0)
{
high = mid-1;
}
else
{
low = mid+1;
}
}
return result;
}
//print on StringGrid
void scan::Print(int lineno, AnsiString stoken, int strgrdl, AnsiString Type)
{
Form1->StringGrid1->RowCount++;
Form1->StringGrid1->Cells[0][strgrdl] = lineno;
Form1->StringGrid1->Cells[1][strgrdl] = stoken;
Form1->StringGrid1->Cells[2][strgrdl] = Type;
}
//make a token list
void scan::MakeTL(int line,AnsiString words,AnsiString type)
{
Anode = new aToken;
Anode->line = line;
Anode->words = words;
Anode->type = type;
TokenList->Add(Anode);
}
//scan--the hardcore of the scanner
void scan::DoScan(char *infile)
{
file = infile ;
ifstream scanFile(file);
int LineCount = 1; //the word in which line
strToken = ""; //member of class scan
int strgrdLine = 1; //temp var for show result on StringGrid
const int bflength = 254; //length of getline buffer
char buffer[bflength]; //getline buffer
bool comment = false; //for this kind of comment--"/**/"
char prech = '@'; //pre char for /**/ comment
AnsiString preToken = ""; //pre Token for judging pointee and multi '*'
while(scanFile.getline(buffer,bflength)) //get each line of the .cpp file
{
int lnscptr = 0;
while(buffer[lnscptr]==' ') //trim left space
lnscptr++;
ch = buffer[lnscptr];
/*scan:important arithmetic*/
if(comment)
{
prech = ch;
goto flag1;
}
else
{
while(ch!='\0') //while not the line finish symbol do analyse
{
if(isalpha(ch) || ch=='_') // ID or KeyWord
{
while(isalpha(ch) || isdigit(ch) || ch=='_')
{
strToken = strToken + ch;
ch = buffer[++lnscptr];
}
if(IsReserveWord(strToken)) //is ReserveWord
{
Print(LineCount,strToken,strgrdLine,"保留字");
MakeTL(LineCount,strToken,"保留字");
}
else //is ID
{
Print(LineCount,strToken,strgrdLine,"标识符");
MakeTL(LineCount,strToken,"标识符");
}
preToken = strToken;
strgrdLine++;
strToken.Delete(1,strToken.Length());
}
else if(isdigit(ch)) // Numerci
{
while(isdigit(ch) || ch=='.')
{
strToken = strToken + ch;
ch = buffer[++lnscptr];
}
bool isInt = true;
for(int pos=1; pos<=strToken.Length(); pos++)
{
if(strToken[pos]=='.')
{
isInt = false;
break;
}
}
if(isInt) //is Int
{
Print(LineCount,strToken,strgrdLine,"整数");
MakeTL(LineCount,strToken,"整数");
}
else //is Float
{
Print(LineCount,strToken,strgrdLine,"浮点数");
MakeTL(LineCount,strToken,"浮点数");
}
strgrdLine++;
strToken.Delete(1,strToken.Length());
}
else if(ch==' ' || ch=='\t' || ch=='\n') //skip space,tab and enter
{
ch = buffer[++lnscptr];
}
else //other special symbols
{
switch(ch)
{
case '#':
strToken = strToken + ch;
ch = buffer[++lnscptr];
while(ch!='\0' && ch!='/')
{
strToken = strToken + ch;
ch = buffer[++lnscptr];
}
Print(LineCount,strToken,strgrdLine,"预定义");
MakeTL(LineCount,strToken,"预定义");
break;
case '\'':
strToken = strToken + ch;
ch = buffer[++lnscptr];
while(ch!='\'')
{
strToken = strToken + ch;
ch = buffer[++lnscptr];
}
strToken = strToken + ch;
ch = buffer[++lnscptr];
Print(LineCount,strToken,strgrdLine,"字符常量");
MakeTL(LineCount,strToken,"字符常量");
break;
case '"':
strToken = strToken + ch;
ch = buffer[++lnscptr];
while(ch!='"')
{
strToken = strToken + ch;
ch = buffer[++lnscptr];
}
strToken = strToken + ch;
ch = buffer[++lnscptr];
Print(LineCount,strToken,strgrdLine,"字符串");
MakeTL(LineCount,strToken,"字符串");
break;
case '=':
strToken = strToken + ch;
ch = buffer[++lnscptr];
if(ch=='=')
{
strToken = strToken + ch;
ch = buffer[++lnscptr];
Print(LineCount,strToken,strgrdLine,"逻辑等");
MakeTL(LineCount,strToken,"逻辑等");
}
else
{
Print(LineCount,strToken,strgrdLine,"赋值号");
MakeTL(LineCount,strToken,"赋值号");
}
break;
case '+':
strToken = strToken + ch;
ch = buffer[++lnscptr];
if(ch=='=')
{
strToken = strToken + ch;
ch = buffer[++lnscptr];
Print(LineCount,strToken,strgrdLine,"+=运算符");
MakeTL(LineCount,strToken,"+=运算符");
}
else if(ch=='+')
{
strToken = strToken + ch;
ch = buffer[++lnscptr];
Print(LineCount,strToken,strgrdLine,"递增运算符");
MakeTL(LineCount,strToken,"递增运算符");
}
else
{
Print(LineCount,strToken,strgrdLine,"加号运算符");
MakeTL(LineCount,strToken,"加号运算符");
}
break;
case '-':
strToken = strToken + ch;
ch = buffer[++lnscptr];
if(ch=='=')
{
strToken = strToken + ch;
ch = buffer[++lnscptr];
Print(LineCount,strToken,strgrdLine,"-=运算符");
MakeTL(LineCount,strToken,"-=运算符");
}
else if(ch=='>')
{
strToken = strToken + ch;
ch = buffer[++lnscptr];
Print(LineCount,strToken,strgrdLine,"指针运算符");
MakeTL(LineCount,strToken,"指针运算符");
}
else if(ch=='-')
{
strToken = strToken + ch;
ch = buffer[++lnscptr];
Print(LineCount,strToken,strgrdLine,"递减运算符");
MakeTL(LineCount,strToken,"递减运算符");
}
else
{
Print(LineCount,strToken,strgrdLine,"减号运算符");
MakeTL(LineCount,strToken,"减号运算符");
}
break;
case '*':
strToken = strToken + ch;
ch = buffer[++lnscptr];
if(ch=='=')
{
strToken = strToken + ch;
ch = buffer[++lnscptr];
Print(LineCount,strToken,strgrdLine,"*=运算符");
MakeTL(LineCount,strToken,"*=运算符");
}
else if(IsReserveWord(preToken))
{
Print(LineCount,strToken,strgrdLine,"指针定义符");
MakeTL(LineCount,strToken,"指针定义符");
}
else
{
Print(LineCount,strToken,strgrdLine,"乘号运算符");
MakeTL(LineCount,strToken,"乘号运算符");
}
break;
case '/':
strToken = strToken + ch;
ch = buffer[++lnscptr];
if(ch=='/')
{
strToken = strToken + ch;
ch = buffer[++lnscptr];
while(ch!='\0')
{
strToken = strToken + ch;
ch = buffer[++lnscptr];
}
Print(LineCount,strToken,strgrdLine,"注释");
}
else if(ch=='*')
{
flag1:strToken = strToken + ch;
ch = buffer[++lnscptr];
while((prech!='*' || ch!='/') && ch!='\0')
{
strToken = strToken + ch;
ch = buffer[++lnscptr];
prech = buffer[lnscptr-1];
}
if(ch!='\0')
{strToken = strToken + ch;}
if(prech=='*' && ch=='/')
{ comment = false;}
else
{ comment = true;}
if(ch!='\0')
{ch = buffer[++lnscptr];}
Print(LineCount,strToken,strgrdLine,"注释");
}
else if(ch=='=')
{
strToken = strToken + ch;
ch = buffer[++lnscptr];
Print(LineCount,strToken,strgrdLine,"/=运算符");
MakeTL(LineCount,strToken,"/=运算符");
}
else
{
Print(LineCount,strToken,strgrdLine,"除号运算符");
MakeTL(LineCount,strToken,"除号运算符");
}
break;
case '%':
strToken = strToken + ch;
ch = buffer[++lnscptr];
if(ch=='=')
{
strToken = strToken + ch;
ch = buffer[++lnscptr];
Print(LineCount,strToken,strgrdLine,"%=运算符");
MakeTL(LineCount,strToken,"%=运算符");
}
else
{
Print(LineCount,strToken,strgrdLine,"模运算符");
MakeTL(LineCount,strToken,"模运算符");
}
break;
case '<':
strToken = strToken + ch;
ch = buffer[++lnscptr];
if(ch=='=')
{
strToken = strToken + ch;
ch = buffer[++lnscptr];
Print(LineCount,strToken,strgrdLine,"小于等于号");
MakeTL(LineCount,strToken,"小于等于号");
}
else
{
Print(LineCount,strToken,strgrdLine,"小于号");
MakeTL(LineCount,strToken,"小于号");
}
break;
case '>':
strToken = strToken + ch;
ch = buffer[++lnscptr];
if(ch=='=')
{
strToken = strToken + ch;
ch = buffer[++lnscptr];
Print(LineCount,strToken,strgrdLine,"大于等于号");
MakeTL(LineCount,strToken,"大于等于号");
}
else
{
Print(LineCount,strToken,strgrdLine,"大于号");
MakeTL(LineCount,strToken,"大于号");
}
break;
case '!':
strToken = strToken + ch;
ch = buffer[++lnscptr];
if(ch=='=')
{
strToken = strToken + ch;
ch = buffer[++lnscptr];
Print(LineCount,strToken,strgrdLine,"不等于号");
MakeTL(LineCount,strToken,"不等于号");
}
else
{
Print(LineCount,strToken,strgrdLine,"逻辑非");
MakeTL(LineCount,strToken,"逻辑非");
}
break;
case '&':
strToken = strToken + ch;
ch = buffer[++lnscptr];
if(ch=='&')
{
strToken = strToken + ch;
ch = buffer[++lnscptr];
Print(LineCount,strToken,strgrdLine,"逻辑与");
MakeTL(LineCount,strToken,"逻辑与");
}
else
{
Print(LineCount,strToken,strgrdLine,"位与运算符");
MakeTL(LineCount,strToken,"位与运算符");
}
break;
case '|':
strToken = strToken + ch;
ch = buffer[++lnscptr];
if(ch=='|')
{
strToken = strToken + ch;
ch = buffer[++lnscptr];
Print(LineCount,strToken,strgrdLine,"逻辑或");
MakeTL(LineCount,strToken,"逻辑或");
}
else
{
Print(LineCount,strToken,strgrdLine,"位或运算符");
MakeTL(LineCount,strToken,"位或运算符");
}
break;
case '^':
strToken = strToken + ch;
ch = buffer[++lnscptr];
Print(LineCount,strToken,strgrdLine,"异或运算符");
MakeTL(LineCount,strToken,"异或运算符");
break;
case '[':
strToken = strToken + ch;
ch = buffer[++lnscptr];
Print(LineCount,strToken,strgrdLine,"左方括号");
MakeTL(LineCount,strToken,"左方括号");
break;
case ']':
strToken = strToken + ch;
ch = buffer[++lnscptr];
Print(LineCount,strToken,strgrdLine,"右方括号");
MakeTL(LineCount,strToken,"右方括号");
break;
case '(':
strToken = strToken + ch;
ch = buffer[++lnscptr];
Print(LineCount,strToken,strgrdLine,"左圆括号");
MakeTL(LineCount,strToken,"左圆括号");
break;
case ')':
strToken = strToken + ch;
ch = buffer[++lnscptr];
Print(LineCount,strToken,strgrdLine,"右圆括号");
MakeTL(LineCount,strToken,"右圆括号");
break;
case '{':
strToken = strToken + ch;
ch = buffer[++lnscptr];
Print(LineCount,strToken,strgrdLine,"左花括号");
MakeTL(LineCount,strToken,"左花括号");
break;
case '}':
strToken = strToken + ch;
ch = buffer[++lnscptr];
Print(LineCount,strToken,strgrdLine,"右花括号");
MakeTL(LineCount,strToken,"右花括号");
break;
case ',':
case ';':
strToken = strToken + ch;
ch = buffer[++lnscptr];
Print(LineCount,strToken,strgrdLine,"分界符");
MakeTL(LineCount,strToken,"分界符");
break;
default:
strToken = strToken + ch;
ch = buffer[++lnscptr];
Print(LineCount,strToken,strgrdLine,"其他特殊符号");
MakeTL(LineCount,strToken,"其他特殊符号");
break;
}//switch
strgrdLine++;
strToken.Delete(1,strToken.Length());
}//else
}//_while buffer[]!='/0'
}
/*scan:important arithmetic*/
LineCount++;
} //_while getline
scanFile.close();
}
void scan::Compress(char *zipFname)
{
ofstream compress(zipFname,ios::app);
for(int i=0; i<TokenList->Count; i++)
{
Anode = (Listnd)TokenList->Items[i];
if(Anode->type=="预定义")
{
compress<<Anode->words.c_str()<<endl;
}
else if(Anode->type=="保留字")
{
compress<<Anode->words.c_str();
compress<<" ";
}
else
{
compress<<Anode->words.c_str();
}
}
compress.close();
}