这个月初微软ATC笔试题 - 王朝网络宽屏版

因为非正式招聘,所以就1道题,40分钟完成.题目是:给定一个英文原文,统计文件里面一共有多少个不同的英文单词,建议使用C++,注意程序效率.

今天有空了,就突然想完全实现一下这道题.当时我到快结束了才想出一个好方法,结果来不及了,只好写了文字算法.......具体实现采用的是哈希+树.以下是实现:(有些错误没有处理,比如文件打开失败;如果文件出现非英文大小写字母,程序崩溃,不是什么实用的东西,着重于算法实现,其他的懒得写了呵呵.)

#include <fstream>

#include <string>

#include <iostream>

using namespace std;

typedef struct node

{

char c;

node* child[26];

unsigned int hasChild;

}*PNODE,NODE;

void initChild(node* child[])

{

for(int i = 0; i< 26;i++){

child[i] = NULL;

}

int main(void)

{

ifstream in("plain.txt");

string strWord;

int count = 0,i,len,seq;

bool isSame = false;

NODE *root = new node;

initChild(root->child);

root->c = NULL;

PNODE cur = root;

//每次取一个单词进行处理

while(getline(in,strWord,' ')){

len = strWord.size();

for(i = 0 ;i < len; i ++){

if ((seq = (strWord[i] - 'a')) < 0) //哈希顺序

seq = strWord[i] - 'A';

if(cur->child[seq] && strWord[i] == cur->child[seq]->c){ //匹配

cur = cur->child[seq];

if (cur->hasChild == 1 && (i==(len-1)))//abcd a的情况,后者是前者的子集

isSame = false;

else

isSame = true;

continue;

}

else{//不匹配

cur->child[seq] = new node;

cur->hasChild = 1;

cur = cur->child[seq];

initChild(cur->child);

cur->c = strWord[i];

isSame = false;

}

if (!isSame) count++;

cur = root; //每个单词解析结束后cur都返回根部

}

delete root;

cout<<"The number of different word is "<<count<<endl;

return 0;

}