大学书信选1（关于编译器）

简序：

大学中，有很多梦想，写一个教学用的小编译器就是一个。也曾就此与我最喜欢的年轻老师之一薛老师进行讨论，后来更是联系了另外两个非常厉害的同学，准备大四一起做这个，运用有关模式，重新设计语言，把它做出来。

可是现在，学校中只我一人，他们都去北京实习去了。而自己要忙的事情也太多，想实践演化证明的思路，想通过内核代码把“进程”理解好，想整理自己数据结构的笔记，想把数学分析看完……这个梦想，或许也就永远留在心中了。

忽然看到原来的一封email，写给薛老师的，就原封不动的，转贴在这里，纪念这段岁月时光！

不去评论过去的东西了；只是要记得，新的梦想，新的征程！

也愿所有的大学生朋友珍惜光阴，梦想成真。

percylee

2003-10-15于武汉

--------------------------------------------------------------------------------------

薛老师：

首先祝您节日快乐！

我是111001班的学生，前些日子和您说要写个小编译器，那时语言与词法分析器已写好了，可惜现在要忙于考试，但争取寒假前多写点。

语言很小，是基于代数结构<Z/rou;+,*>的，rou=mod（8），呵呵，目标语言是c++吧。希望能和您常联系。

下面是语言的小例子：

item a, b={3}, c;

a ={5};

c=b*(a +b)+a ;

out<<"c="<<c;

c=a+a*b;

out<<"now c="<<c;

if(c==a*b)

then

out<<"c is the mutlip of a and b";

else

out<<"c isnot mutlip of a and b";

end;

您看看部分程序吧，我用c++写的，感觉不少困难；但总是想，如果有一天写完了，一定会很有收获---这都是您课堂上讲的呀；到时候我会到bbs上写系列的帖子，把所有的设计过程和原代码都展示给同学看。对了，我是那个kindman：），离散版块上问问题，真希望能得到您的回答---或者，我找您吧，去找蔡老师，怕他太忙了。

//一个文件

#pragma once

#include <iostream>

#include <fstream>

#include <string>

using namespace std;

#define SIZE 50000 //默认最大文件字符数

#define WORDNUM 20000 //最大单词数

#define CONSTNUM 100 //最大常量数

#define IDNUM 200 //最大标志符数

#define ELENUM 500 //常量运算单元数

#define ERRORSIZE 1000 //

class Sources

{

public:

Sources(void);

~Sources(void);

private:

const char * keyWords[7]; //关键字

char * constTable[CONSTNUM]; //常量（字符串）

char * idTable[IDNUM]; //自定义标志符

char * elementTable[ELENUM]; //运算单元

char idFollow[10]; //标志符follow集

int idNum;

int eleNum;

int constNum;

//......

public:

bool IsKeyword(const char *& keyWord,int &numOfkey);

bool IsIdfollow(const char ch);

int InsertID(const char *& strToken);

int InsertELE(const char *& strToken);

int InsertConst(const char *& strToken);

//......

};

//错误记录

class ERROR

{

public:

int row;

static char * strError;

};

class Error

{

static int errorNum;

//int waringNum;

ERROR error[ERRORSIZE];

public:

Error(void);

~Error(void);

int GetErrornum()const;

void SetError(const char *&strError,int row);

void PrintError();

//......

};

//词法分析

class Lexer

{

enum Token_value{

KW,ID,ELEMENT,CONST,

EQUAL,INPUT,OUTPUT,

ASSIGN='=',PLUS='+',MUL='*',PRINT=';',

LP='(',RP=')',COMMA=',',SEMI='"',ROWEND='\n'

};

char ch; //reading file

char memary[SIZE];//file managed a lettle

char *word;

int realSize; // of memary

int rowCount; //of row in file

int wordCount; //of token

int charCount; //of reading ch in memary

int tokenChCount;//of every tokenWord

std::fstream file;

char *& strToken;//complete word in your code

struct Token{

Token_value tokval;

int codeNum;

int rowNum;

}token[WORDNUM];//result of lexical analysis

public:

Lexer(void);

~Lexer(void);

void Openfile(const char filename[30]);

// 读一个字符至ch

void Readchar(void);

void ReCheckch();

//字符放入单词

void Concat();

bool IsLetter();

bool IsDigit();

bool IsKeyword(const char* strWord,int &numOfkey,Sources source);

//词法分析

void DoAnalysis(Sources source,Error error);

//编程需要：检查中间结果

void Test();

};

//编译处理

class Compiler

{

public:

Compiler(void);

virtual ~Compiler(void);

private:

Error error;

// 所用到的资源

Sources source;

// 词法分析器

Lexer lexer;

public:

// 读入待编译文件

void Readfile(void);

void Test(void);

//.....

};

//另一个文件，实现词法分析

#include "compiler.h"

#using <mscorlib.dll>

Lexer::Lexer(void):strToken(word)

{

rowCount=0;

charCount=0;

wordCount=0;

tokenChCount=0;

word=new char[200];

}

Lexer::~Lexer(void)

{

file.close();

delete word;

}

void Lexer::Openfile(const char filename[30])

{

char nextCh=' ';

int num=0;

// __try{

file.open(filename,ios_base::in|ios_base::out);

/* }

catch(System::NullReferenceException *e)

{

std::cerr<<"Error: Can't open file "<<filename

<<",please make sure !"<<endl;

exit(-1);

}

while(!file.eof()){

file.get(ch);

memary[num++]=ch;

if(ch==' '){

file.get(nextCh);

while(!file.eof()&&nextCh==' ')

file.get(nextCh);

memary[num++]=nextCh;

}

realSize=num;

}

void Lexer::Readchar()

{

if(charCount<realSize)

ch=memary[charCount++];

if(ch=='\n'&&ch!=' ') rowCount++;

}

void Lexer::ReCheckch()

{

while(ch==' ')

Readchar();

}

void Lexer::Concat()

{

word[tokenChCount++]=ch;

word[tokenChCount]='\0';

}

bool Lexer::IsLetter()

{

if(ch>='A'&&ch<='Z')

return true;

if(ch>='a'&&ch<='z')

return true;

return false;

}

bool Lexer::IsDigit()

{

switch(ch){

case '0':case '1':case '2':case '3':case '4':

case '5':case '6':case '7':case '8':case '9':

return true;

default:

return false;

}

bool Lexer::IsKeyword(const char* strWord,int &numOfkey,Sources source)

{

if(source.IsKeyword(strWord,numOfkey))

return true;

return false;

}

void Lexer::DoAnalysis(Sources source,Error error)

{

int code=-1,value=-1;

while(charCount<realSize)

{

tokenChCount=0;

Readchar();

ReCheckch();

if(IsLetter())

{

while(IsLetter() ||IsDigit())

{

Concat();

Readchar();

}

charCount--;

if(IsKeyword(strToken,code,source))

{

token[wordCount].codeNum=code;

token[wordCount++].rowNum=rowCount;

}

else

if(source.IsIdfollow(ch))

{

token[wordCount].rowNum=rowCount;

token[wordCount].tokval=ID;

value=source.InsertID(strToken);

token[wordCount++].codeNum=value;

}

else {

// error

while(ch!='\n')

Readchar();

error.SetError(strToken,rowCount-1);

}

}//if

else {

switch(ch)

{

case '{': Readchar();

while(IsDigit())

Readchar();

if(ch=='}')

{

token[wordCount].rowNum=rowCount;

token[wordCount].tokval=ELEMENT;

value=source.InsertELE(strToken);

token[wordCount++].codeNum=value;

}

else

{

//error

while(ch!='\n')

Readchar();

error.SetError(strToken,rowCount-1);

}

break;

case '=':Readchar();

if(ch!='=')

{

token[wordCount].rowNum=rowCount;

token[wordCount++].tokval=ASSIGN;

charCount--;

}

else if(ch=='=')

{

token[wordCount].rowNum=rowCount;

token[wordCount++].tokval=EQUAL;

}

break;

case '+':

token[wordCount].rowNum=rowCount;

token[wordCount++].tokval=PLUS;

break;

case '*':

token[wordCount].rowNum=rowCount;

token[wordCount++].tokval=MUL;

break;

case '<':

Readchar();

if(ch=='<')

{

token[wordCount].rowNum=rowCount;

token[wordCount++].tokval=OUTPUT;

}

else

{

//error

while(ch!='\n')

Readchar();

error.SetError(strToken,rowCount-1);

}

break;

case '>':

Readchar();

if(ch=='>')

{

token[wordCount].rowNum=rowCount;

token[wordCount++].tokval=INPUT;

}

else

{

//error

while(ch!='\n')

Readchar();

error.SetError(strToken,rowCount-1);

}

break;

case '"':Readchar();

while(ch!='\n'&&ch!='"')

Readchar();

if(ch=='"')

{

token[wordCount].rowNum=rowCount;

token[wordCount].tokval=CONST;

value=source.InsertConst(strToken);

token[wordCount++].codeNum=value;

}

else

{

//error

while(ch!='\n')

Readchar();

error.SetError(strToken,rowCount-1);

}

break;

case ',':

token[wordCount].rowNum=rowCount;

token[wordCount++].tokval=COMMA;

break;

case ';':

token[wordCount].rowNum=rowCount;

token[wordCount++].tokval=PRINT;

break;

case '(':

token[wordCount].rowNum=rowCount;

token[wordCount++].tokval=LP;

break;

case ')':

token[wordCount].rowNum=rowCount;

token[wordCount++].tokval=RP;

break;

case ' ':

// Readchar();

break;

case '\n':

token[wordCount].rowNum=rowCount;

token[wordCount++].tokval=ROWEND;

break;

default:

{

//error

while(ch!='\n')

Readchar();

error.SetError(strToken,rowCount-1);

}

}//else

}//while

}

//----------------------------------------------------------------------------

---------

void Lexer::Test()

{

cout<<"wordCount:"<<wordCount<<endl;

for(int i=0;i<wordCount;i++){

cout<<endl<<"row:"<<token[i].rowNum<<endl;

switch(token[i].tokval)

{

case KW:cout<<"keyword";

break;

case ID:cout<<"identifer";

break;

case ELEMENT:cout<<"element";

break;

case CONST:cout<<"const string";

break;

case PLUS:cout<<'+';

break;

case MUL:cout<<'*';

break;

case ASSIGN:cout<<'=';

break;

case EQUAL:cout<<"==";

break;

case PRINT:cout<<';';

break;

case LP:cout<<'(';

break;

case RP:cout<<')';

break;

case COMMA:cout<<',';

break;

case INPUT:cout<<">>";

break;

case OUTPUT:cout<<"<<";

break;

case SEMI:cout<<'"';

break;

case ROWEND:cout<<"row_end";

break;

default:

;

}

祝您工作顺利。

percy