让我的C++程序直接阅读网页(4) HTTP访问

王朝c/c++·作者佚名  2006-02-01
窄屏简体版  字體: |||超大  

4. HTTP访问Web简单看就是HTML + HTTP。前面提供的HTML读取函数所需要的原始HTML文本,得通过HTTP协议从各个web site上读取到。用socket可以实现HTPP访问,但是想比较全面地支持HTTP协议,使用现成的HTTP服务显得更有效率。

微软提供了Windows平台上访问HTTP的二组API组合,WinINet, WinHTTP

这里给出二个读取Web网页的函数,

* 使用WinINet的readHTTPFile

* 使用WinHTTP的getHTTPFile

void

getHttpFile( const HINTERNET h_site,

string & rd,

const string & site,

const string & path,

UInt32 flags = 0 )

{

if ( path.empty() )

return;

HINTERNET h_file = NULL;

wchar_t w_str[WSTR_LENGTH];

::memset( w_str, 0, WSTR_LENGTH * sizeof(wchar_t) );

try

{

UInt32 l_0 = 0;

UInt32 l_1 = 0;

char *p_buf = NULL;

::mbstowcs( w_str, path.c_str(), path.size() );

h_file = ::WinHttpOpenRequest( h_site,

L"GET",

w_str,

NULL,

WINHTTP_NO_REFERER,

WINHTTP_DEFAULT_ACCEPT_TYPES,

flags );

if ( h_file == NULL )

throw ::GetLastError();

BOOL b_res = ::WinHttpSendRequest( h_file,

WINHTTP_NO_ADDITIONAL_HEADERS,

0,

WINHTTP_NO_REQUEST_DATA,

0,

0,

0 );

if ( ! b_res )

throw ::GetLastError();

b_res = ::WinHttpReceiveResponse( h_file, NULL );

if ( ! b_res )

throw ::GetLastError();

// 如果需要可以在这里或稍后读入http cookies

do

{

l_0 = 0;

b_res = ::WinHttpQueryDataAvailable( h_file, &l_0 );

p_buf = new char[l_0 + 1];

::ZeroMemory( p_buf, l_0 + 1 );

if ( b_res )

{

b_res = ::WinHttpReadData( h_file,

p_buf,

l_0,

&l_1 );

if ( b_res )

{

if ( l_1 > 0 )

rd.append( p_buf, l_1 );

}

}

delete [] p_buf;

} while ( l_0 > 0 );

if ( ! b_res )

throw ::GetLastError();

canonHTML( rd );

::WinHttpCloseHandle( h_file );

}

catch ( ... )

{

if ( h_file != NULL )

::WinHttpCloseHandle( h_file );

throw;

}

}

void

readHttpFile( string &rd,

const CHttpConnection &server,

const string & src_page )

{

if ( src_page.empty() )

return;

DWORD dw_ret;

CHttpFile *p_file = NULL;

CHttpConnection *p_svr = const_cast<CHttpConnection *>(&server);

char *rd_buf = NULL;

try

{

p_file = p_svr->OpenRequest( CHttpConnection::HTTP_VERB_GET,

src_page.c_str(),

NULL,

1,

NULL,

NULL,

INTERNET_FLAG_EXISTING_CONNECT

| INTERNET_FLAG_RELOAD );

p_file->SendRequest( "\r\n", 2 );

p_file->QueryInfoStatusCode( dw_ret );

if ( dw_ret != HTTP_STATUS_OK )

throw std::exception( "failed" );

rd_buf = new char[BUF_SIZE];

if ( rd_buf == NULL )

throw std::exception( "insufficientMemory" );

rd.erase();

memset( rd_buf, 0, BUF_SIZE );

int l = p_file->Read( rd_buf, BUF_SIZE );

while ( l > 0 )

{

rd.append( rd_buf, l );

l = p_file->Read( rd_buf, BUF_SIZE );

}

if ( rd.empty() )

throw std::exception( "noContent" );

canonHTML( rd );

p_file->Close();

delete [] rd_buf;

}

catch ( CInternetException *p_ex )

{

p_file->Close();

if ( rd_buf ) delete [] rd_buf;

TCHAR sz_err[255];

p_ex->GetErrorMessage( sz_err, 255 );

throw std::exception( sz_err );

}

catch ( ... )

{

p_file->Close();

if ( rd_buf ) delete [] rd_buf;

throw;

}

}

 
 
 
免责声明:本文为网络用户发布,其观点仅代表作者个人观点,与本站无关,本站仅提供信息存储服务。文中陈述内容未经本站证实,其真实性、完整性、及时性本站不作任何保证或承诺,请读者仅作参考,并请自行核实相关内容。
 
 
© 2005- 王朝網路 版權所有 導航