bencoding编码解析器

王朝other·作者佚名  2006-01-09
窄屏简体版  字體: |||超大  

BT客户端开始一个下载首先要处理的就是torrent文件.

而torrent文件使用bencoding编码.

所以实现bencoding编码的解析器,就是第一步工作.

Bencoding is done as follows:

Strings are length-prefixed base ten followed by a colon and the string. For example \'4:spam\' corresponds to \'spam\'.

Integers are represented by an \'i\' followed by the number in base 10 followed by an \'e\'. For example \'i3e\' corresponds to 3 and \'i-3e\' corresponds to -3. Integers have no size limitation. \'i-0e\' is invalid. All encodings with a leading zero, such as \'i03e\', are invalid, other than \'i0e\', which of course corresponds to 0.

Lists are encoded as an \'l\' followed by their elements (also bencoded) followed by an \'e\'. For example \'l4:spam4:eggse\' corresponds to [\'spam\', \'eggs\'].

Dictionaries are encoded as a \'d\' followed by a list of alternating keys and their corresponding values followed by an \'e\'. For example, \'d3:cow3:moo4:spam4:eggse\' corresponds to {\'cow\': \'moo\', \'spam\': \'eggs\'} and \'d4:spaml1:a1:bee\' corresponds to {\'spam\': [\'a\', \'b\']} . Keys must be strings and appear in sorted order (sorted as raw strings, not alphanumerics).

下面是实现的bencoding解码器的VC++源代码:

// BEncode.h: interface for the CBEncode class.

//

//////////////////////////////////////////////////////////////////////

#if !defined(AFX_BENCODE_H__4D0BB462_2AE0_45B3_8BE8_19D51B2DBB2E__INCLUDED_)

#define AFX_BENCODE_H__4D0BB462_2AE0_45B3_8BE8_19D51B2DBB2E__INCLUDED_

#if _MSC_VER > 1000

#pragma once

#endif // _MSC_VER > 1000

#pragma warning( disable : 4786 )

#pragma warning( disable : 4355 )

#include <list>

#include <map>

#include <string>

#include <vector>

using namespace std;

enum BEncodeParserErrorCode

{

enm_BEncodeErr_noerr = 0,//没有错误

enm_BEncodeErr_errString,//错误的字符串

enm_BEncodeErr_errInt,//错误的整型数据

enm_BEncodeErr_errList,//错误的列表

enm_BEncodeErr_errDict,//错误的词典结构

enm_BEncodeErr_End,//文本结束

enm_BEncodeErr_unknown//未知错误

};

enum BEncodeObjectType

{

enum_BEncodeType_Objectbase = 0,

enum_BEncodeType_String,

enum_BEncodeType_Int,

enum_BEncodeType_List,

enum_BEncodeType_Dict,

};

class CBEncodeObjectBase

{

public:

CBEncodeObjectBase(BEncodeObjectType type = enum_BEncodeType_Objectbase){m_type = type;clear();}

virtual ~CBEncodeObjectBase(){};

void clear(){szPos = NULL;m_error = enm_BEncodeErr_noerr;}

public:

BEncodeObjectType m_type;//对象类型

char * szPos;//对象在字符串中的位置

int ilen;//对象的数据长度

BEncodeParserErrorCode m_error;//错误值

};

class CBEncodeInt : public CBEncodeObjectBase

{

public:

CBEncodeInt() : CBEncodeObjectBase(enum_BEncodeType_Int) {}

virtual ~CBEncodeInt(){}

public:

int m_iValue;//整型对象的值

};

class CBEncodeString : public CBEncodeObjectBase

{

public:

CBEncodeString() : CBEncodeObjectBase(enum_BEncodeType_String) {m_szData = NULL;}

virtual ~CBEncodeString(){}

public:

bool getstring(string & strValue)

{

if(m_error == enm_BEncodeErr_noerr && m_szData)

{

strValue.assign(m_szData,m_ilen);

return true;

}

return false;

}

char * m_szData;

int m_ilen;

};

class CBEncodeList : public CBEncodeObjectBase

{

public:

CBEncodeList() : CBEncodeObjectBase(enum_BEncodeType_List) {}

virtual ~CBEncodeList(){clear();}

void clear()

{

list<CBEncodeObjectBase *>::iterator it;

for(it = m_listObj.begin();it!=m_listObj.end();++it)

delete (*it);

m_listObj.clear();

}

public:

list<CBEncodeObjectBase*> m_listObj;

};

class CBEncodeDict : public CBEncodeObjectBase

{

public:

CBEncodeDict() : CBEncodeObjectBase(enum_BEncodeType_Dict) {}

virtual ~CBEncodeDict(){clear();}

CBEncodeObjectBase* getvalue(const char * szName)

{

map<string,CBEncodeObjectBase*>::iterator it = m_mapObj.find(szName);

if(it != m_mapObj.end())

return it->second;

return NULL;

}

void clear()

{

list<CBEncodeObjectBase *>::iterator it;

for(it = m_listObj.begin();it!=m_listObj.end();++it)

delete (*it);

m_listObj.clear();

m_mapObj.clear();

}

public:

map<string,CBEncodeObjectBase*> m_mapObj;//

list<CBEncodeObjectBase*> m_listObj;//真正的对象保存在list中,list是一个name对象一个value对象.map只是一个映射表,引用了指针而已

};

class CBEncode

{

public:

bool readint(char *szCurPos,int & iendpos,list<CBEncodeObjectBase*> & listObj);

bool readstring(char *szCurPos,int & iendpos,list<CBEncodeObjectBase*> & listObj);

bool readlist(char *szCurPos,int & iendpos,list<CBEncodeObjectBase*> & listObj);

bool readdict(char *szCurPos,int & iendpos,list<CBEncodeObjectBase*> & listObj);

bool parse(const char * szData);

CBEncode();

virtual ~CBEncode();

void clear()

{

list<CBEncodeObjectBase *>::iterator it;

for(it = m_listObj.begin();it!=m_listObj.end();++it)

delete (*it);

m_listObj.clear();

}

public:

list<CBEncodeObjectBase*> m_listObj;

CBEncodeObjectBase* m_plastObj;//解析出来的最后一个对象

char * m_szTxt;

};

#endif // !defined(AFX_BENCODE_H__4D0BB462_2AE0_45B3_8BE8_19D51B2DBB2E__INCLUDED_)

// BEncode.cpp: implementation of the CBEncode class.

//

//////////////////////////////////////////////////////////////////////

#include "stdafx.h"

#include "BEncode.h"

//////////////////////////////////////////////////////////////////////

// Construction/Destruction

//////////////////////////////////////////////////////////////////////

CBEncode::CBEncode()

{

m_plastObj = NULL;

m_szTxt = NULL;

}

CBEncode::~CBEncode()

{

clear();

}

bool CBEncode::parse(const char *szData)

{

if(szData == NULL||*szData==NULL)

return false;

clear();

m_szTxt = (char*)szData;

char * szCurPos = (char*)szData;

int iendpos;

while(*szCurPos)

{

if(*szCurPos== 'i')

{

if(!readint(szCurPos,iendpos,m_listObj))

break;//遇到任何错误都终止整个解析

szCurPos+=iendpos;

}

else if(*szCurPos== 'l')

{

if(!readlist(szCurPos,iendpos,m_listObj))

break;

szCurPos+=iendpos;

}

else if(*szCurPos== 'd')

{

if(!readdict(szCurPos,iendpos,m_listObj))

break;

szCurPos+=iendpos;

}

else

{

if(!readstring(szCurPos,iendpos,m_listObj))

break;

szCurPos+=iendpos;

}

}

if(*szCurPos==0&&m_plastObj->m_error == enm_BEncodeErr_noerr)

return true;

return false;

}

//从当前位置读取一个字符串

bool CBEncode::readstring(char *szCurPos,int & iendpos,list<CBEncodeObjectBase*> & listObj)

{

char * szTmp = szCurPos;

CBEncodeString * pNewString = new CBEncodeString;

pNewString->szPos = szCurPos;

char szLen[20];

int i = 0;

while(*szTmp>='0'&&*szTmp<='9')

szLen[i++]=*(szTmp++);

szLen[i]=0;

if(*szTmp==':')

{

int ilen = atoi(szLen);

if(ilen>0)

{

pNewString->m_szData = ++szTmp;

pNewString->m_ilen = ilen;

szTmp+=ilen;

}

else

pNewString->m_error = enm_BEncodeErr_errString;

}

else

pNewString->m_error = enm_BEncodeErr_errString;

listObj.push_back(pNewString);

iendpos = szTmp-szCurPos;

m_plastObj = pNewString;

m_plastObj->ilen = iendpos;

return pNewString->m_error == enm_BEncodeErr_noerr?true:false;

}

//读取一个整型数据

bool CBEncode::readint(char *szCurPos,int & iendpos,list<CBEncodeObjectBase*> & listObj)

{

char * szTmp = szCurPos;

CBEncodeInt * pNewInt= new CBEncodeInt;

pNewInt->szPos = szCurPos;

if(*szTmp == 'i')

{

szTmp++;

char szLen[20];

int i = 0;

while(*szTmp>='0'&&*szTmp<='9')

szLen[i++]=*(szTmp++);

szLen[i]=0;

if(*szTmp=='e')

{

pNewInt->m_iValue = atoi(szLen);

++szTmp;

}

else

pNewInt->m_error = enm_BEncodeErr_errInt;

}

else

pNewInt->m_error = enm_BEncodeErr_errInt;

listObj.push_back(pNewInt);

iendpos = szTmp-szCurPos;

m_plastObj = pNewInt;

m_plastObj->ilen = iendpos;

return pNewInt->m_error == enm_BEncodeErr_noerr?true:false;

}

//读取一个列表

bool CBEncode::readlist(char *szCurPos,int & iendpos,list<CBEncodeObjectBase*> & listObj)

{

char * szTmp = szCurPos;

CBEncodeList * pNewList= new CBEncodeList;

pNewList->szPos = szCurPos;

if(*szTmp == 'l')

{

szTmp++;

int ilistendpos;

while(*szTmp!='e')

{

if(*szTmp== 'i')

{

if(!readint(szTmp,ilistendpos,pNewList->m_listObj))

break;//遇到任何错误都终止整个解析

szTmp+=ilistendpos;

}

else if(*szTmp== 'l')

{

if(!readlist(szTmp,ilistendpos,pNewList->m_listObj))

break;

szTmp+=ilistendpos;

}

else if(*szTmp== 'd')

{

if(!readdict(szTmp,ilistendpos,pNewList->m_listObj))

break;

szTmp+=ilistendpos;

}

else

{

if(!readstring(szTmp,ilistendpos,pNewList->m_listObj))

break;

szTmp+=ilistendpos;

}

}

if(*szTmp!='e'||m_plastObj->m_error != enm_BEncodeErr_noerr)

pNewList->m_error = enm_BEncodeErr_errList;

else

szTmp++;

}

else

pNewList->m_error = enm_BEncodeErr_errList;

listObj.push_back(pNewList);

iendpos = szTmp-szCurPos;

m_plastObj = pNewList;

m_plastObj->ilen = iendpos;

return pNewList->m_error == enm_BEncodeErr_noerr?true:false;

}

//读取一个字典

bool CBEncode::readdict(char *szCurPos,int & iendpos,list<CBEncodeObjectBase*> & listObj)

{

char * szTmp = szCurPos;

CBEncodeDict * pNewDict= new CBEncodeDict;

pNewDict->szPos = szCurPos;

if(*szTmp == 'd')

{

szTmp++;

int ilistendpos;

string strname;

while(*szTmp!='e')

{

if(!readstring(szTmp,ilistendpos,pNewDict->m_listObj))

break;

if(m_plastObj->m_type !=enum_BEncodeType_String)

break;

strname.assign(((CBEncodeString *)m_plastObj)->m_szData,((CBEncodeString *)m_plastObj)->m_ilen);

szTmp+=ilistendpos;

if(*szTmp== 'i')

{

if(!readint(szTmp,ilistendpos,pNewDict->m_listObj))

break;//遇到任何错误都终止整个解析

szTmp+=ilistendpos;

}

else if(*szTmp== 'l')

{

if(!readlist(szTmp,ilistendpos,pNewDict->m_listObj))

break;

szTmp+=ilistendpos;

}

else if(*szTmp== 'd')

{

if(!readdict(szTmp,ilistendpos,pNewDict->m_listObj))

break;

szTmp+=ilistendpos;

}

else

{

if(!readstring(szTmp,ilistendpos,pNewDict->m_listObj))

break;

szTmp+=ilistendpos;

}

pNewDict->m_mapObj.insert(pair<string,CBEncodeObjectBase*>(strname,m_plastObj));

}

if(*szTmp!='e'||m_plastObj->m_error != enm_BEncodeErr_noerr)

pNewDict->m_error = enm_BEncodeErr_errDict;

else

szTmp++;

}

else

pNewDict->m_error = enm_BEncodeErr_errDict;

listObj.push_back(pNewDict);

iendpos = szTmp-szCurPos;

m_plastObj = pNewDict;

m_plastObj->ilen = iendpos;

return pNewDict->m_error == enm_BEncodeErr_noerr?true:false;

}

 
 
 
免责声明:本文为网络用户发布,其观点仅代表作者个人观点,与本站无关,本站仅提供信息存储服务。文中陈述内容未经本站证实,其真实性、完整性、及时性本站不作任何保证或承诺,请读者仅作参考,并请自行核实相关内容。
 
 
© 2005- 王朝網路 版權所有 導航