4. HTTP访问Web简单看就是HTML + HTTP。前面提供的HTML读取函数所需要的原始HTML文本,得通过HTTP协议从各个web site上读取到。用socket可以实现HTPP访问,但是想比较全面地支持HTTP协议,使用现成的HTTP服务显得更有效率。
微软提供了Windows平台上访问HTTP的二组API组合,WinINet, WinHTTP
这里给出二个读取Web网页的函数,
* 使用WinINet的readHTTPFile
* 使用WinHTTP的getHTTPFile
void
getHttpFile( const HINTERNET h_site,
string & rd,
const string & site,
const string & path,
UInt32 flags = 0 )
{
if ( path.empty() )
return;
HINTERNET h_file = NULL;
wchar_t w_str[WSTR_LENGTH];
::memset( w_str, 0, WSTR_LENGTH * sizeof(wchar_t) );
try
{
UInt32 l_0 = 0;
UInt32 l_1 = 0;
char *p_buf = NULL;
::mbstowcs( w_str, path.c_str(), path.size() );
h_file = ::WinHttpOpenRequest( h_site,
L"GET",
w_str,
NULL,
WINHTTP_NO_REFERER,
WINHTTP_DEFAULT_ACCEPT_TYPES,
flags );
if ( h_file == NULL )
throw ::GetLastError();
BOOL b_res = ::WinHttpSendRequest( h_file,
WINHTTP_NO_ADDITIONAL_HEADERS,
0,
WINHTTP_NO_REQUEST_DATA,
0,
0,
0 );
if ( ! b_res )
throw ::GetLastError();
b_res = ::WinHttpReceiveResponse( h_file, NULL );
if ( ! b_res )
throw ::GetLastError();
// 如果需要可以在这里或稍后读入http cookies
do
{
l_0 = 0;
b_res = ::WinHttpQueryDataAvailable( h_file, &l_0 );
p_buf = new char[l_0 + 1];
::ZeroMemory( p_buf, l_0 + 1 );
if ( b_res )
{
b_res = ::WinHttpReadData( h_file,
p_buf,
l_0,
&l_1 );
if ( b_res )
{
if ( l_1 > 0 )
rd.append( p_buf, l_1 );
}
}
delete [] p_buf;
} while ( l_0 > 0 );
if ( ! b_res )
throw ::GetLastError();
canonHTML( rd );
::WinHttpCloseHandle( h_file );
}
catch ( ... )
{
if ( h_file != NULL )
::WinHttpCloseHandle( h_file );
throw;
}
}
void
readHttpFile( string &rd,
const CHttpConnection &server,
const string & src_page )
{
if ( src_page.empty() )
return;
DWORD dw_ret;
CHttpFile *p_file = NULL;
CHttpConnection *p_svr = const_cast<CHttpConnection *>(&server);
char *rd_buf = NULL;
try
{
p_file = p_svr->OpenRequest( CHttpConnection::HTTP_VERB_GET,
src_page.c_str(),
NULL,
1,
NULL,
NULL,
INTERNET_FLAG_EXISTING_CONNECT
| INTERNET_FLAG_RELOAD );
p_file->SendRequest( "\r\n", 2 );
p_file->QueryInfoStatusCode( dw_ret );
if ( dw_ret != HTTP_STATUS_OK )
throw std::exception( "failed" );
rd_buf = new char[BUF_SIZE];
if ( rd_buf == NULL )
throw std::exception( "insufficientMemory" );
rd.erase();
memset( rd_buf, 0, BUF_SIZE );
int l = p_file->Read( rd_buf, BUF_SIZE );
while ( l > 0 )
{
rd.append( rd_buf, l );
l = p_file->Read( rd_buf, BUF_SIZE );
}
if ( rd.empty() )
throw std::exception( "noContent" );
canonHTML( rd );
p_file->Close();
delete [] rd_buf;
}
catch ( CInternetException *p_ex )
{
p_file->Close();
if ( rd_buf ) delete [] rd_buf;
TCHAR sz_err[255];
p_ex->GetErrorMessage( sz_err, 255 );
throw std::exception( sz_err );
}
catch ( ... )
{
p_file->Close();
if ( rd_buf ) delete [] rd_buf;
throw;
}
}