分享
 
 
 

bencoding编码解析器

王朝other·作者佚名  2006-01-09
窄屏简体版  字體: |||超大  

BT客户端开始一个下载首先要处理的就是torrent文件.

而torrent文件使用bencoding编码.

所以实现bencoding编码的解析器,就是第一步工作.

Bencoding is done as follows:

Strings are length-prefixed base ten followed by a colon and the string. For example \'4:spam\' corresponds to \'spam\'.

Integers are represented by an \'i\' followed by the number in base 10 followed by an \'e\'. For example \'i3e\' corresponds to 3 and \'i-3e\' corresponds to -3. Integers have no size limitation. \'i-0e\' is invalid. All encodings with a leading zero, such as \'i03e\', are invalid, other than \'i0e\', which of course corresponds to 0.

Lists are encoded as an \'l\' followed by their elements (also bencoded) followed by an \'e\'. For example \'l4:spam4:eggse\' corresponds to [\'spam\', \'eggs\'].

Dictionaries are encoded as a \'d\' followed by a list of alternating keys and their corresponding values followed by an \'e\'. For example, \'d3:cow3:moo4:spam4:eggse\' corresponds to {\'cow\': \'moo\', \'spam\': \'eggs\'} and \'d4:spaml1:a1:bee\' corresponds to {\'spam\': [\'a\', \'b\']} . Keys must be strings and appear in sorted order (sorted as raw strings, not alphanumerics).

下面是实现的bencoding解码器的VC++源代码:

// BEncode.h: interface for the CBEncode class.

//

//////////////////////////////////////////////////////////////////////

#if !defined(AFX_BENCODE_H__4D0BB462_2AE0_45B3_8BE8_19D51B2DBB2E__INCLUDED_)

#define AFX_BENCODE_H__4D0BB462_2AE0_45B3_8BE8_19D51B2DBB2E__INCLUDED_

#if _MSC_VER > 1000

#pragma once

#endif // _MSC_VER > 1000

#pragma warning( disable : 4786 )

#pragma warning( disable : 4355 )

#include <list>

#include <map>

#include <string>

#include <vector>

using namespace std;

enum BEncodeParserErrorCode

{

enm_BEncodeErr_noerr = 0,//没有错误

enm_BEncodeErr_errString,//错误的字符串

enm_BEncodeErr_errInt,//错误的整型数据

enm_BEncodeErr_errList,//错误的列表

enm_BEncodeErr_errDict,//错误的词典结构

enm_BEncodeErr_End,//文本结束

enm_BEncodeErr_unknown//未知错误

};

enum BEncodeObjectType

{

enum_BEncodeType_Objectbase = 0,

enum_BEncodeType_String,

enum_BEncodeType_Int,

enum_BEncodeType_List,

enum_BEncodeType_Dict,

};

class CBEncodeObjectBase

{

public:

CBEncodeObjectBase(BEncodeObjectType type = enum_BEncodeType_Objectbase){m_type = type;clear();}

virtual ~CBEncodeObjectBase(){};

void clear(){szPos = NULL;m_error = enm_BEncodeErr_noerr;}

public:

BEncodeObjectType m_type;//对象类型

char * szPos;//对象在字符串中的位置

int ilen;//对象的数据长度

BEncodeParserErrorCode m_error;//错误值

};

class CBEncodeInt : public CBEncodeObjectBase

{

public:

CBEncodeInt() : CBEncodeObjectBase(enum_BEncodeType_Int) {}

virtual ~CBEncodeInt(){}

public:

int m_iValue;//整型对象的值

};

class CBEncodeString : public CBEncodeObjectBase

{

public:

CBEncodeString() : CBEncodeObjectBase(enum_BEncodeType_String) {m_szData = NULL;}

virtual ~CBEncodeString(){}

public:

bool getstring(string & strValue)

{

if(m_error == enm_BEncodeErr_noerr && m_szData)

{

strValue.assign(m_szData,m_ilen);

return true;

}

return false;

}

char * m_szData;

int m_ilen;

};

class CBEncodeList : public CBEncodeObjectBase

{

public:

CBEncodeList() : CBEncodeObjectBase(enum_BEncodeType_List) {}

virtual ~CBEncodeList(){clear();}

void clear()

{

list<CBEncodeObjectBase *>::iterator it;

for(it = m_listObj.begin();it!=m_listObj.end();++it)

delete (*it);

m_listObj.clear();

}

public:

list<CBEncodeObjectBase*> m_listObj;

};

class CBEncodeDict : public CBEncodeObjectBase

{

public:

CBEncodeDict() : CBEncodeObjectBase(enum_BEncodeType_Dict) {}

virtual ~CBEncodeDict(){clear();}

CBEncodeObjectBase* getvalue(const char * szName)

{

map<string,CBEncodeObjectBase*>::iterator it = m_mapObj.find(szName);

if(it != m_mapObj.end())

return it->second;

return NULL;

}

void clear()

{

list<CBEncodeObjectBase *>::iterator it;

for(it = m_listObj.begin();it!=m_listObj.end();++it)

delete (*it);

m_listObj.clear();

m_mapObj.clear();

}

public:

map<string,CBEncodeObjectBase*> m_mapObj;//

list<CBEncodeObjectBase*> m_listObj;//真正的对象保存在list中,list是一个name对象一个value对象.map只是一个映射表,引用了指针而已

};

class CBEncode

{

public:

bool readint(char *szCurPos,int & iendpos,list<CBEncodeObjectBase*> & listObj);

bool readstring(char *szCurPos,int & iendpos,list<CBEncodeObjectBase*> & listObj);

bool readlist(char *szCurPos,int & iendpos,list<CBEncodeObjectBase*> & listObj);

bool readdict(char *szCurPos,int & iendpos,list<CBEncodeObjectBase*> & listObj);

bool parse(const char * szData);

CBEncode();

virtual ~CBEncode();

void clear()

{

list<CBEncodeObjectBase *>::iterator it;

for(it = m_listObj.begin();it!=m_listObj.end();++it)

delete (*it);

m_listObj.clear();

}

public:

list<CBEncodeObjectBase*> m_listObj;

CBEncodeObjectBase* m_plastObj;//解析出来的最后一个对象

char * m_szTxt;

};

#endif // !defined(AFX_BENCODE_H__4D0BB462_2AE0_45B3_8BE8_19D51B2DBB2E__INCLUDED_)

// BEncode.cpp: implementation of the CBEncode class.

//

//////////////////////////////////////////////////////////////////////

#include "stdafx.h"

#include "BEncode.h"

//////////////////////////////////////////////////////////////////////

// Construction/Destruction

//////////////////////////////////////////////////////////////////////

CBEncode::CBEncode()

{

m_plastObj = NULL;

m_szTxt = NULL;

}

CBEncode::~CBEncode()

{

clear();

}

bool CBEncode::parse(const char *szData)

{

if(szData == NULL||*szData==NULL)

return false;

clear();

m_szTxt = (char*)szData;

char * szCurPos = (char*)szData;

int iendpos;

while(*szCurPos)

{

if(*szCurPos== 'i')

{

if(!readint(szCurPos,iendpos,m_listObj))

break;//遇到任何错误都终止整个解析

szCurPos+=iendpos;

}

else if(*szCurPos== 'l')

{

if(!readlist(szCurPos,iendpos,m_listObj))

break;

szCurPos+=iendpos;

}

else if(*szCurPos== 'd')

{

if(!readdict(szCurPos,iendpos,m_listObj))

break;

szCurPos+=iendpos;

}

else

{

if(!readstring(szCurPos,iendpos,m_listObj))

break;

szCurPos+=iendpos;

}

}

if(*szCurPos==0&&m_plastObj->m_error == enm_BEncodeErr_noerr)

return true;

return false;

}

//从当前位置读取一个字符串

bool CBEncode::readstring(char *szCurPos,int & iendpos,list<CBEncodeObjectBase*> & listObj)

{

char * szTmp = szCurPos;

CBEncodeString * pNewString = new CBEncodeString;

pNewString->szPos = szCurPos;

char szLen[20];

int i = 0;

while(*szTmp>='0'&&*szTmp<='9')

szLen[i++]=*(szTmp++);

szLen[i]=0;

if(*szTmp==':')

{

int ilen = atoi(szLen);

if(ilen>0)

{

pNewString->m_szData = ++szTmp;

pNewString->m_ilen = ilen;

szTmp+=ilen;

}

else

pNewString->m_error = enm_BEncodeErr_errString;

}

else

pNewString->m_error = enm_BEncodeErr_errString;

listObj.push_back(pNewString);

iendpos = szTmp-szCurPos;

m_plastObj = pNewString;

m_plastObj->ilen = iendpos;

return pNewString->m_error == enm_BEncodeErr_noerr?true:false;

}

//读取一个整型数据

bool CBEncode::readint(char *szCurPos,int & iendpos,list<CBEncodeObjectBase*> & listObj)

{

char * szTmp = szCurPos;

CBEncodeInt * pNewInt= new CBEncodeInt;

pNewInt->szPos = szCurPos;

if(*szTmp == 'i')

{

szTmp++;

char szLen[20];

int i = 0;

while(*szTmp>='0'&&*szTmp<='9')

szLen[i++]=*(szTmp++);

szLen[i]=0;

if(*szTmp=='e')

{

pNewInt->m_iValue = atoi(szLen);

++szTmp;

}

else

pNewInt->m_error = enm_BEncodeErr_errInt;

}

else

pNewInt->m_error = enm_BEncodeErr_errInt;

listObj.push_back(pNewInt);

iendpos = szTmp-szCurPos;

m_plastObj = pNewInt;

m_plastObj->ilen = iendpos;

return pNewInt->m_error == enm_BEncodeErr_noerr?true:false;

}

//读取一个列表

bool CBEncode::readlist(char *szCurPos,int & iendpos,list<CBEncodeObjectBase*> & listObj)

{

char * szTmp = szCurPos;

CBEncodeList * pNewList= new CBEncodeList;

pNewList->szPos = szCurPos;

if(*szTmp == 'l')

{

szTmp++;

int ilistendpos;

while(*szTmp!='e')

{

if(*szTmp== 'i')

{

if(!readint(szTmp,ilistendpos,pNewList->m_listObj))

break;//遇到任何错误都终止整个解析

szTmp+=ilistendpos;

}

else if(*szTmp== 'l')

{

if(!readlist(szTmp,ilistendpos,pNewList->m_listObj))

break;

szTmp+=ilistendpos;

}

else if(*szTmp== 'd')

{

if(!readdict(szTmp,ilistendpos,pNewList->m_listObj))

break;

szTmp+=ilistendpos;

}

else

{

if(!readstring(szTmp,ilistendpos,pNewList->m_listObj))

break;

szTmp+=ilistendpos;

}

}

if(*szTmp!='e'||m_plastObj->m_error != enm_BEncodeErr_noerr)

pNewList->m_error = enm_BEncodeErr_errList;

else

szTmp++;

}

else

pNewList->m_error = enm_BEncodeErr_errList;

listObj.push_back(pNewList);

iendpos = szTmp-szCurPos;

m_plastObj = pNewList;

m_plastObj->ilen = iendpos;

return pNewList->m_error == enm_BEncodeErr_noerr?true:false;

}

//读取一个字典

bool CBEncode::readdict(char *szCurPos,int & iendpos,list<CBEncodeObjectBase*> & listObj)

{

char * szTmp = szCurPos;

CBEncodeDict * pNewDict= new CBEncodeDict;

pNewDict->szPos = szCurPos;

if(*szTmp == 'd')

{

szTmp++;

int ilistendpos;

string strname;

while(*szTmp!='e')

{

if(!readstring(szTmp,ilistendpos,pNewDict->m_listObj))

break;

if(m_plastObj->m_type !=enum_BEncodeType_String)

break;

strname.assign(((CBEncodeString *)m_plastObj)->m_szData,((CBEncodeString *)m_plastObj)->m_ilen);

szTmp+=ilistendpos;

if(*szTmp== 'i')

{

if(!readint(szTmp,ilistendpos,pNewDict->m_listObj))

break;//遇到任何错误都终止整个解析

szTmp+=ilistendpos;

}

else if(*szTmp== 'l')

{

if(!readlist(szTmp,ilistendpos,pNewDict->m_listObj))

break;

szTmp+=ilistendpos;

}

else if(*szTmp== 'd')

{

if(!readdict(szTmp,ilistendpos,pNewDict->m_listObj))

break;

szTmp+=ilistendpos;

}

else

{

if(!readstring(szTmp,ilistendpos,pNewDict->m_listObj))

break;

szTmp+=ilistendpos;

}

pNewDict->m_mapObj.insert(pair<string,CBEncodeObjectBase*>(strname,m_plastObj));

}

if(*szTmp!='e'||m_plastObj->m_error != enm_BEncodeErr_noerr)

pNewDict->m_error = enm_BEncodeErr_errDict;

else

szTmp++;

}

else

pNewDict->m_error = enm_BEncodeErr_errDict;

listObj.push_back(pNewDict);

iendpos = szTmp-szCurPos;

m_plastObj = pNewDict;

m_plastObj->ilen = iendpos;

return pNewDict->m_error == enm_BEncodeErr_noerr?true:false;

}

 
 
 
免责声明:本文为网络用户发布,其观点仅代表作者个人观点,与本站无关,本站仅提供信息存储服务。文中陈述内容未经本站证实,其真实性、完整性、及时性本站不作任何保证或承诺,请读者仅作参考,并请自行核实相关内容。
2023年上半年GDP全球前十五强
 百态   2023-10-24
美众议院议长启动对拜登的弹劾调查
 百态   2023-09-13
上海、济南、武汉等多地出现不明坠落物
 探索   2023-09-06
印度或要将国名改为“巴拉特”
 百态   2023-09-06
男子为女友送行,买票不登机被捕
 百态   2023-08-20
手机地震预警功能怎么开?
 干货   2023-08-06
女子4年卖2套房花700多万做美容:不但没变美脸,面部还出现变形
 百态   2023-08-04
住户一楼被水淹 还冲来8头猪
 百态   2023-07-31
女子体内爬出大量瓜子状活虫
 百态   2023-07-25
地球连续35年收到神秘规律性信号,网友:不要回答!
 探索   2023-07-21
全球镓价格本周大涨27%
 探索   2023-07-09
钱都流向了那些不缺钱的人,苦都留给了能吃苦的人
 探索   2023-07-02
倩女手游刀客魅者强控制(强混乱强眩晕强睡眠)和对应控制抗性的关系
 百态   2020-08-20
美国5月9日最新疫情:美国确诊人数突破131万
 百态   2020-05-09
荷兰政府宣布将集体辞职
 干货   2020-04-30
倩女幽魂手游师徒任务情义春秋猜成语答案逍遥观:鹏程万里
 干货   2019-11-12
倩女幽魂手游师徒任务情义春秋猜成语答案神机营:射石饮羽
 干货   2019-11-12
倩女幽魂手游师徒任务情义春秋猜成语答案昆仑山:拔刀相助
 干货   2019-11-12
倩女幽魂手游师徒任务情义春秋猜成语答案天工阁:鬼斧神工
 干货   2019-11-12
倩女幽魂手游师徒任务情义春秋猜成语答案丝路古道:单枪匹马
 干货   2019-11-12
倩女幽魂手游师徒任务情义春秋猜成语答案镇郊荒野:与虎谋皮
 干货   2019-11-12
倩女幽魂手游师徒任务情义春秋猜成语答案镇郊荒野:李代桃僵
 干货   2019-11-12
倩女幽魂手游师徒任务情义春秋猜成语答案镇郊荒野:指鹿为马
 干货   2019-11-12
倩女幽魂手游师徒任务情义春秋猜成语答案金陵:小鸟依人
 干货   2019-11-12
倩女幽魂手游师徒任务情义春秋猜成语答案金陵:千金买邻
 干货   2019-11-12
 
推荐阅读
 
 
 
>>返回首頁<<
靜靜地坐在廢墟上,四周的荒凉一望無際,忽然覺得,淒涼也很美
© 2005- 王朝網路 版權所有