Super Power Dictionary
C.Third Edition
This is the third edition of my dictionary and it is very, very powerful now! Read in 20K words from a 2.3M text
file only uses less than 2seconds. However, ranking them will take 41seconds! As it need to track each node from
"WordTree" from "big to small" by "Frequency" and "rank" them by this sequence and then copy each node to a node
of "RankTree" then "insert" the new rankTree node into ranktree.
1。 Basic idea: I found out some mistakes in "ranking" functions. And it is really a hardtime to debug for a
whole day in lab(almost 8hours from 10:00AM to 18:30PM).
2。 Program design: The biggest problem is still how to walk round the "key" problem of RankTree by using "frequency"
as key to compare and at same keep all ancester class with their original functions.
3。 Major function:
A. void traverse(...);
I add one more parameters to let "RankTree Forest" to start traversing from its own "root".
B. void insertRank(Tree* node, void* userPtr)
This is a special "callback" function to construct "RankTree" forest by "traversing" each node of
"WordTree".
C. void Tree::maxWalk(...);
There are two format of this new MAXORDER way of traversing. Actually it is so-called "from big to small"
exactly opposite to "MIDORDER".
4。 Further improvement:
A. I have to go soon, so, no more time.
B. Still need improve.
C. Ranking counting and WordTree counting is differed by one, and I have not found the bug.
D. I hope I can rewrite the basic string-related part to enable the dictionary to read Chinese.
#include <iostream>
#include <cstdlib>
#include <cstring>
#include <fstream>
#include <ctime>
using namespace std;
const char* DefaultBackup = "c:\\nickback.txt";
const MaxWordLength = 255;
enum TraverseOrder
{
PREORDER, MIDORDER, POSTORDER, MAXORDER
};
class Tree
{
private:
Tree* pLeft;
Tree* pRight;
Tree* pParent;
char* pName;
int index;
static int treeCounter;
static Tree* pRoot;
int freq;
int rank;
void maxWalk(Tree* node, void (*visit)(Tree* node));
void preWalk(Tree* node, void (*visit)(Tree* node));
void midWalk(Tree* node, void (*visit)(Tree* node));
void postWalk(Tree* node, void (*visit)(Tree* node));
void maxWalk(Tree* node, void (*visit)(Tree* node, void* userPtr), void* userPtr);
void preWalk(Tree* node, void (*visit)(Tree* node, void* userPtr), void* userPtr);
void midWalk(Tree* node, void (*visit)(Tree* node, void* userPtr), void* userPtr);
void postWalk(Tree* node, void (*visit)(Tree* node, void* userPtr), void* userPtr);
protected:
void* pKey;
public:
void decCount() {treeCounter --;}
const int getRank() { return rank;}
void setRank(const int newRank) { rank = newRank;}
const int getFreq() {return freq;}
void setFreq(const int newFreq) { freq = newFreq;}
void setFreq() {freq++;}
Tree* left();
Tree* right();
Tree* parent();
virtual void* key();
virtual void setKey(void* newKey);
void setRoot(Tree* node);
const char* name();
const int getIndex();
void setLeft(Tree* leftSon);
void setRight(Tree* rightSon);
void setParent(Tree* father);
void setIndex(const int number);
virtualint compare(const void* otherKey);
void setName(const char *str);
void giveBirth();
virtual bool operator>(Tree&);
virtual bool operator==(Tree&);
virtual bool operator<(Tree&);
Tree();
virtual ~Tree();
static Tree* root() {return pRoot;}
const int count();
void traverse(void ( *visit)(Tree* node), TraverseOrder defaultOrder= MIDORDER,
Tree* start= root());
void traverse(void (*visit)(Tree* node, void* userPtr), void* userPtr,
TraverseOrder defaultOrder = MIDORDER, Tree* start = root());
};
void visitRank(Tree* node, void* userPtr);
void defaultVisit(Tree* node);
void insertRank(Tree* node, void* userPtr);
class BinaryTree: public Tree
{
private:
public:
virtual void remove(BinaryTree* node);
virtual BinaryTree* next();
virtual BinaryTree* prev();
virtual bool insert(BinaryTree* node, BinaryTree* start=(BinaryTree*)root());
virtual ~BinaryTree();
};
class WordTree: public BinaryTree
{
private:
static int wordLength;
static int wordCounter;
WordTree* addNode(WordTree* node, FILE** stream);
protected:
void setWordCounter();
void readWord(FILE** file);
void addWord(char* buffer);
public:
WordTree* searchWord(WordTree* node, const char* buffer);
WordTree* readNode(WordTree* node, FILE** stream);
WordTree* readFromFile(const char* fileName);
void saveNode(WordTree* node, FILE** stream);
virtual int compare(const void *otherKey);
void setWordLength(const int newLength);
static const int getWordLength();
virtual void setKey(void* newKey);
virtual bool operator>(Tree&);
virtual bool operator==(Tree&);
virtual bool operator<(Tree&);
void openFile(const char* fileName);
WordTree(const char * fileName);
WordTree();
int wordCount();
virtual void setName(const char *str);
void saveToFile(const char* fileName);
~WordTree();
virtual WordTree* findWord(const char* buffer);
};
class RankTree: public WordTree
{
private:
protected:
public:
virtual bool operator>(Tree& node);
virtual bool operator<(Tree& node);
void setRankTree(WordTree* wordRoot);
RankTree();
};
int count=0;
int main()
{
WordTree *ptr = new WordTree;
char choice[6], fileName[30];
long start =0;
ptr = ptr->readFromFile(DefaultBackup);
cout<<"quit? or input?\nEnter your choice: \n";
cin>>choice;
while (strcmp(choice, "quit")!=0&&strcmp(choice, "input")!=0)
{
cout<<"quit? or input?\n Enter your choic: \n";
cin>>choice;
}
if (strcmp(choice, "quit")==0)
{
cout<<"The total number of words in dictionary is "<<ptr->wordCount()<<endl;
return 0;
}
else
{
if (strcmp(choice, "input")==0)
{
cout<<"\nnow input file name here(input 'quit' to exit):\n";
cin>>fileName;
while (strcmp(fileName, "quit")!=0)
{
start = time(0);
ptr->openFile(fileName);
cout<<"\nTotal times spent for read file is:"<<time(0) -start<<endl;
cout<<"\nnow input file name here(input quit to exit):\n";
cin>>fileName;
}
cout<<"\nThe total number of words in dictionary is:"<<ptr->count()<<endl;
cout<<"\nDo you want to see contents?(yes or no)\n";
cin>>choice;
if (strcmp(choice, "yes")==0)
{
ptr->traverse(defaultVisit, PREORDER);
}
cout<<"\nDo you want to rank the tree?(yes or no)\n";
cin>>choice;
if (strcmp(choice, "yes")==0)
{
start = time(0);
int rankCount =0;
RankTree *rankPtr = new RankTree;
rankPtr->setFreq(ptr->getFreq());
rankPtr->setName((char*)ptr->key());
ptr->traverse(insertRank, (void*)rankPtr);
rankPtr->traverse(visitRank, (void*)(&rankCount), MAXORDER, rankPtr);
cout<<"\nTotal time for ranking is :"<<time(0) - start<<endl;
cout<<"Do you want to see result of ranking?\n";
cin>>choice;
if (strcmp(choice, "yes")==0)
{
rankPtr->traverse(defaultVisit, MAXORDER, rankPtr);
}
cout<<"\nThe total number in ranktree is:"<<rankPtr->count()<<endl;
}
cout<<"The total number of words in dictionary is "<<ptr->wordCount();
ptr->saveToFile(DefaultBackup);
}
}
return 0;
}
int WordTree::wordCounter = 0;
int WordTree::wordLength = 20;
void insertRank(Tree* node, void* userPtr)
{
RankTree* rankPtr;//
rankPtr = (RankTree*)(((RankTree*)userPtr)->searchWord((RankTree*)userPtr,(char*)node->key()));
if (rankPtr == NULL)
{
rankPtr = new RankTree;
rankPtr->setName((char*)node->key());
rankPtr->setFreq(node->getFreq());
((RankTree*)(userPtr))->insert(rankPtr, (RankTree*)userPtr);
}
}
RankTree::RankTree()
{
decCount();
}
void visitRank(Tree* node, void* userPtr)
{
node->setRank(*(int*)(userPtr));
*(int*)(userPtr) +=1;
}
bool RankTree::operator <(Tree& node)
{
int result;
result = getFreq() - node.getFreq();
if (result !=0)
{
return (result<0);
}
else
{
return (strcmp((char*)key(), (char*)node.key())<0);
}
}
bool RankTree::operator >(Tree& node)
{
int result;
result = getFreq() - node.getFreq();
if (result !=0)
{
return (result>0);
}
else
{
return (strcmp((char*)key(), (char*)node.key())>0);
}
}
void RankTree::setRankTree(WordTree* wordRoot)
{
RankTree* ptr;
if (wordRoot!=NULL)
{
if (this->root()!=NULL)
{
ptr = new RankTree;
ptr->setFreq(wordRoot->getFreq());
ptr->setKey((void*)wordRoot->key());
ptr->insert(ptr);
}
else
{
this->setRoot(this);
this->setFreq(wordRoot->getFreq());
this->setKey((void*)wordRoot->key());
}
setRankTree((WordTree*)wordRoot->left());
setRankTree((WordTree*)wordRoot->right());
}
}
BinaryTree::~BinaryTree()
{
}
WordTree::~WordTree()
{
}
void WordTree::setWordCounter()
{
wordCounter ++;
}
const int WordTree::getWordLength()
{
return wordLength;
}
WordTree* WordTree::addNode(WordTree* node, FILE** stream)
{
char buffer[MaxWordLength];
int index, pIndex, number;
WordTree *domino;
char *ptr=buffer, ch;
fread((void*)(&index), sizeof(int), 1, *stream);
fread((void*)(&pIndex), sizeof(int), 1, *stream);
if (index==-1&&pIndex==-1)
{
return NULL;
}
do
{
ch = fgetc(*stream);
*ptr = ch;
ptr++;
}
while(ch!='\0');
fread((void*)(&number), sizeof(int), 1, *stream);
setWordCounter(); //add a word
if (pIndex== -1) //this is root
{
node->setIndex(index);
node->setFreq(number);
node->setName(buffer);
node->setRoot(node);
return node;
}
while (pIndex!=node->getIndex())
{
if (node->parent()==NULL)
{
return NULL;
}
node= (WordTree*)node->parent();
}
if (node!=NULL&&pIndex==node->getIndex())
{
domino = new WordTree;
domino->setIndex(index);
domino->setName(buffer);
domino->setFreq(number);
domino->setParent(node);
if (*domino<*node)
{
node->setLeft(domino);
}
else
{
node->setRight(domino);
}
return domino;
}
else
{
return NULL;
}
}
WordTree* WordTree::readNode(WordTree* node, FILE** stream)
{
WordTree* domino = node;
while (domino!=NULL)
{
domino = addNode(domino, stream);
}
return (WordTree*)node->root(); //this is ugly as I have to remove those sentinal
}
WordTree* WordTree::readFromFile(const char* fileName)
{
FILE* stream;
if ((stream=fopen(fileName, "r+b"))==NULL)
{
cout<<"unable to open file "<<fileName<<endl;
return NULL;
}
else
{
WordTree* ptr= new WordTree;
return readNode(ptr, &stream);
}
fclose(stream);
}
void WordTree::saveNode(WordTree* node, FILE** stream)
{
int first, second, number;
first = node->getIndex();
number = getFreq();
fwrite((void*)(&first), sizeof(int), 1, *stream);
if (node->parent()==NULL)
{
second = -1;
}
else
{
second = node->parent()->getIndex();
}
fwrite((void*)(&second), sizeof(int), 1, *stream);
fwrite((void*)(node->name()),sizeof(char), strlen(node->name()), *stream);
fputc('\0', *stream);//I have to put this '/n' otherwise there is always problem!
fwrite((void*)(&number), sizeof(int), 1, *stream);
}
void traverseTree(WordTree* node, FILE** stream)
{
if (node!=NULL)
{
node->saveNode(node, stream);
traverseTree((WordTree*)node->left(), stream);
traverseTree((WordTree*)node->right(), stream);
}
}
void writeEnd(FILE** stream)
{
int temp1=-1, temp2=-1;
fwrite((void*)(&temp1), sizeof(int), 1, *stream);
fwrite((void*)(&temp2), sizeof(int), 1, *stream);
}
void WordTree::saveToFile(const char* fileName)
{
void traverseTree(WordTree* node, FILE ** stream);
void writeEnd(FILE** stream);
WordTree *domino;
FILE * stream;
if ((stream=fopen("c:\\nickback.txt", "w+b"))==NULL)
{
cout<<"unable to save to file "<<fileName<<endl;
}
else
{
domino = (WordTree*)root();
traverseTree((WordTree*)domino, &stream);
}
writeEnd(&stream);
fclose(stream);
}
const int Tree::count()
{
return treeCounter;
}
void WordTree::setWordLength(const int newLength)
{
if (newLength>MaxWordLength)
{
cout<<"exceed max word length."<<endl;
}
else
{
wordLength = newLength;
}
}
int WordTree::compare(const void*otherKey)
{
return strcmp((char*)key(), (char*)otherKey);
}
int WordTree::wordCount()
{
return wordCounter;
}
WordTree::WordTree()
{
}
void WordTree::openFile(const char* fileName)
{
FILE * stream;
if ((stream=fopen(fileName, "r+b"))==NULL)
{
cout<<"unable open file!"<<endl;
}
else
{
readWord(&stream);
}
fclose(stream);
}
void WordTree::readWord(FILE **stream)
{
char buffer[MaxWordLength];
char *ptr = buffer;
char ch;
int counter =0;
while (!feof(*stream))
{
ptr = buffer;
counter = 0;
ch = toupper(fgetc(*stream));
while (isalnum(ch)&&!feof(*stream)&&counter<wordLength)
{
*ptr = ch;
ch = toupper(fgetc(*stream));
ptr++;
counter++;
}
if (ptr!=buffer)
{
*ptr='\0';
addWord(buffer);
}
}
}
WordTree* WordTree::searchWord(WordTree* node, const char* buffer)
{
int result;
if (node!=NULL)
{
result = strcmp((char*)(node->key()), buffer);
if (result ==0)
{
return node;
}
else
{
if (result < 0)
{
return searchWord((WordTree*)node->right(), buffer);
}
else
{
return searchWord((WordTree*)node->left(), buffer);
}
}
}
else
{
return NULL;
}
}
WordTree* WordTree::findWord(const char *buffer)
{
return searchWord((WordTree*)root(), buffer);
}
void WordTree::addWord(char * buffer)
{
//need add a findword method and find first and then new
WordTree *ptr;
ptr = findWord(buffer);
if (ptr==NULL)
{
ptr = new WordTree;
ptr->setName(buffer);
if (ptr->insert(ptr))
{
setWordCounter();
}
}
else
{
ptr->setFreq();
}
}
void WordTree::setName(const char *str)
{
Tree::setName(str);
setKey((void*)name());
}
int Tree::treeCounter= 0;
WordTree::WordTree(const char* fileName)
{
WordTree::WordTree();
openFile(fileName);
}
void WordTree::setKey(void * newKey)
{
pKey = newKey;
}
bool WordTree::operator <(Tree& second)
{
if (strcmp((char*)key(), (char*)second.key())<0)
{
return true;
}
else
{
return false;
}
}
bool WordTree::operator ==(Tree& second)
{
if (strcmp((char*)key(), (char*)second.key())==0)
{
return true;
}
else
{
return false;
}
}
bool WordTree::operator >(Tree& second)
{
if (strcmp((char*)key(), (char*)second.key())>0)
{
return true;
}
else
{
return false;
}
}
void defaultVisit(Tree* node)
{
cout<<"\n\n";
cout<<"default visiting tree index:"<<node->getIndex()<<endl;
cout<<"and tree name is:"<<node->name()<<endl;
cout<<"and its frequency is: "<<(node)->getFreq()<<endl;
cout<<"and its rank is:"<<node->getRank()<<endl;
if (node->parent()!=NULL)
{
cout<<"its parent is:"<<node->parent()->name()<<endl;
if (node==node->parent()->left())
{
cout<<"and it is left son."<<endl;
}
else
{
cout<<"and it is right son"<<endl;
}
}
else
{
cout<<"it has no parent"<<endl;
}
cout<<"\n\n";
}
void Tree::preWalk(Tree* node, void (*visit)(Tree* node))
{
if (node!=NULL)
{
visit(node);
preWalk(node->left(), visit);
preWalk(node->right(), visit);
}
}
void Tree::postWalk(Tree* node, void (*visit)(Tree* node))
{
if (node!=NULL)
{
postWalk(node->left(), visit);
postWalk(node->right(), visit);
visit(node);
}
}
void Tree::maxWalk(Tree* node, void (*visit)(Tree* node))
{
if (node!=NULL)
{
maxWalk(node->right(), visit);
visit(node);
maxWalk(node->left(), visit);
}
}
void Tree::midWalk(Tree* node, void (*visit)(Tree* node))
{
if (node!=NULL)
{
midWalk(node->left(), visit);
visit(node);
midWalk(node->right(), visit);
}
}
void Tree::preWalk(Tree* node, void (*visit)(Tree* node, void* userPtr), void* userPtr)
{
if (node!=NULL)
{
visit(node, userPtr);
preWalk(node->left(), visit, userPtr);
preWalk(node->right(), visit, userPtr);
}
}
void Tree::midWalk(Tree* node, void (*visit)(Tree* node, void* userPtr), void* userPtr)
{
if (node!=NULL)
{
midWalk(node->left(), visit, userPtr);
visit(node, userPtr);
midWalk(node->right(), visit, userPtr);
}
}
void Tree::postWalk(Tree* node, void (*visit)(Tree* node, void* userPtr), void* userPtr)
{
if (node!=NULL)
{
postWalk(node->left(), visit, userPtr);
postWalk(node->right(), visit, userPtr);
visit(node, userPtr);
}
}
void Tree::maxWalk(Tree* node, void (*visit)(Tree* node, void* userPtr), void* userPtr)
{
if (node!=NULL)
{
maxWalk(node->right(), visit, userPtr);
visit(node, userPtr);
maxWalk(node->left(), visit, userPtr);
}
}
void Tree::traverse(void (*visit)(Tree* node, void* userPtr), void* userPtr,
TraverseOrder defaultOrder, Tree* start)
{
Tree *domino;
domino = start;
switch(defaultOrder)
{
case PREORDER:
preWalk(domino, visit, userPtr);
break;
case MIDORDER:
midWalk(domino, visit, userPtr);
break;
case POSTORDER:
postWalk(domino, visit, userPtr);
break;
case MAXORDER:
maxWalk(domino, visit, userPtr);
break;
}
}
void Tree::traverse(void (*visit)(Tree* node), TraverseOrder defaultOrder, Tree* start)
{
Tree *domino;
domino = start;
switch(defaultOrder)
{
case PREORDER:
preWalk(domino, visit);
break;
case MIDORDER:
midWalk(domino, visit);
break;
case POSTORDER:
postWalk(domino, visit);
break;
case MAXORDER:
maxWalk(domino, visit);
break;
}
}
Tree* Tree::pRoot = NULL;
void BinaryTree::remove(BinaryTree* node)
{
BinaryTree* ptr=NULL;
if (node->left()==NULL || node->right()==NULL)
{
if (node->left()!=NULL)
{
node->parent()->setLeft(node->left());
node->left()->setParent(node->parent());
}
if (node->right()!=NULL)
{
node->parent()->setRight(node->right());
node->right()->setParent(node->parent());
}
}
else
{
ptr = node->next();
remove(ptr);
ptr->setParent(node->parent());
if (node->parent()==NULL)
{
node->setRoot(ptr);
}
else
{
if (node==node->parent()->left())
{
node->parent()->setLeft(ptr);
}
else
{
node->parent()->setRight(ptr);
}
ptr->setLeft(node->left());
ptr->setRight(node->right());
}
}
}
bool Tree::operator==(Tree& second)
{
if (this->compare(second.key())==0)
{
return true;
}
else
{
return false;
}
}
bool Tree::operator <(Tree& second)
{
if (this->compare(second.key())<0)
{
return true;
}
else
{
return false;
}
}
bool Tree::operator >(Tree& second)
{
if (this->compare(second.key())>0)
{
return true;
}
else
{
return false;
}
}
int Tree::compare(const void * otherKey)
{
return *((int*)(key()))- *((int*)(otherKey));
}
void* Tree::key()
{
return pKey;
}
void Tree::setKey(void *newKey)
{
pKey = malloc(sizeof(int));
*(int*)(pKey) = *(int*)(newKey);
}
void Tree::setRoot(Tree *node)
{
pRoot = node;
}
bool BinaryTree::insert(BinaryTree* node, BinaryTree* start)
{
Tree* ptr, *dummy;
ptr = start;
dummy = ptr;
while (ptr!=NULL)
{
dummy = ptr;
if (*ptr>*node)
{
ptr = ptr->left();
}
else
{
if (*ptr<*node)
{
ptr=ptr->right();
}
else
{
if (*ptr==*node)
{
return false;
}
}
}
}
node->setParent(dummy);
if (dummy==NULL)
{
setRoot(node);
}
else
{
//node->setRoot(dummy->root());
if (*dummy>*node)
{
dummy->setLeft(node);
}
else
{
if (*dummy<*node)
{
dummy->setRight(node);
}
} //if "== ", donot insert;
}
return true;
}
void Tree::setLeft(Tree* leftSon)
{
pLeft = leftSon;
}
void Tree::setRight(Tree* rightSon)
{
pRight = rightSon;
}
void Tree::setParent(Tree* father)
{
pParent = father;
}
void Tree::giveBirth()
{
Tree* leftTree = new Tree;
Tree* rightTree = new Tree;
char str[128];
leftTree->setIndex(this->getIndex() *2 );
rightTree->setIndex(this->getIndex() * 2 + 1);
this->setLeft(leftTree);
this->setRight(rightTree);
leftTree->setParent(this);
rightTree->setParent(this);
strcpy(str, "left son of ");
strcat(str, this->name());
leftTree->setName(str);
strcpy(str, "right son of ");
strcat(str, this->name());
rightTree->setName(str);
leftTree->setParent(this);
rightTree->setParent(this);
if (root()==NULL)
{
setRoot(this);
setParent(NULL);
}
/*
else
{
leftTree->setRoot(root());
rightTree->setRoot(root());
}
*/
}
void Tree::setIndex(const int number)
{
index = number;
}
const int Tree::getIndex()
{
return index;
}
const char* Tree::name()
{
return pName;
}
void Tree::setName(const char *str)
{
if (str!=NULL)
{
pName = (char *)malloc(strlen(str)+1);
strcpy(pName, str);
}
}
Tree::Tree()
{
setIndex(treeCounter);
treeCounter++;
pLeft = NULL;
pRight = NULL;
pParent = NULL;
pName = NULL;
pKey = NULL;
freq = 0;
rank =0;
}
Tree::~Tree()
{
if (pName!=NULL)
{
free(pName);
}
if (pKey!=NULL)
{
free(pKey);
}
}
Tree* Tree::left()
{
return pLeft;
}
Tree* Tree::right()
{
return pRight;
}
Tree* Tree::parent()
{
return pParent;
}
BinaryTree* BinaryTree::next()
{
Tree* result = NULL;
if (right()!=NULL)
{
result = right();
while (result->left!=NULL)
{
result = result->left();
}
}
else
{
if (parent()!= NULL)
{
result = this;
while(result->parent()!=NULL&&result==result->parent()->right())
{
result = result->parent();
}
}
//result = null
}
return (BinaryTree*)result;
}
BinaryTree* BinaryTree::prev()
{
Tree* result = NULL;
if (left()!=NULL)
{
result = left();
while (result->right()!=NULL)
{
result = result->right();
}
}
else
{
if (parent()!=NULL)
{
result = this;
while (result->parent()!=NULL&&result==result->parent()->left())
{
result = result->parent();
}
}
}
return (BinaryTree*)result;
}