private string FetchPage(String url) //取页面指定URL页面的源码
{
String page = "null";
try
{
WebClient mywc = new WebClient();
using(Stream strm = mywc.OpenRead(url))
{
StreamReader sr = new StreamReader(strm);
page = sr.ReadToEnd();
strm.Close();
}
}
catch{}
return page;
}
public string GetLink(String url) //绝对链接的
{
String content = this.FetchPage(url);
//Regex linkRegex=new Regex("href\\s*=\\s*(?:\"(?<1>[^\"]*)\"|(?<1>\\S+)", RegexOptions.IgnoreCase);
//@"^\w+((-\w+)|(\.\w+))*\@\w+((\.|-)\w+)*\.\w+$";
Regex link=new Regex(@"+/.)+[/w-]+(/[/w-./?%&=]*)?]http://([w-]+.)+[w-]+(/[w-./?%&=]*)?",RegexOptions.IgnoreCase);
StringBuilder sb = new StringBuilder();
MatchCollection emailmatchs = link.Matches(content);
foreach(Match n in emailmatchs)
{
sb.Append(n.ToString());
sb.Append(";");
}
if( sb.Length == 0 )
{
sb.Append("null");
}
return sb.ToString();
}
public string GetEmailAddr(String url) //相对链接的
{
String content = this.FetchPage(url);
//Regex linkRegex=new Regex("href\\s*=\\s*(?:\"(?<1>[^\"]*)\"|(?<1>\\S+)", RegexOptions.IgnoreCase);
//@"^\w+((-\w+)|(\.\w+))*\@\w+((\.|-)\w+)*\.\w+$";
Regex r=new Regex(@"\w+((-\w+)|(\.\w))*\@\w+((\.|-)\w+)*\.\w+[com|cn|com.cn|net|org|cc|uk]{1,6}",RegexOptions.IgnoreCase);
StringBuilder sb = new StringBuilder();
MatchCollection emailmatchs = r.Matches(content);
foreach(Match n in emailmatchs)
{
sb.Append(n.ToString());
sb.Append(";");
}
if( sb.Length == 0 )
sb.Append("null");
return sb.ToString();
}
其中的部分代码要感我的朋友樊帆.