利用正则表达式去掉html代码

王朝html/css/js·作者佚名  2008-05-30
窄屏简体版  字體: |||超大  

using System.Text.RegularExpressions;//需要引用

// 利用正则表达式去掉"<"和">"之间的内容

private string StripHT(string strHtml)

{

Regex regex=new Regex("<.+?>",RegexOptions.IgnoreCase);

string strOutput=regex.Replace(strHtml,"");

return strOutput;

}

//方法二(不知为什么此方法占用CPU100%)

public static string DropHTML(string strHtml)

{

string [] aryReg ={

@"<script[^>]*?>.*?</script>",

@"<(\/\s*)?!?((\w+:)?\w+)(\w+(\s*=?\s*(([""''])(\\[""''tbnr]|[^\7])*?\7|\w+)|.{0})|\s)*?(\/\s*)?>",

@"([\r])[\s]+",

@"&(quot|#34);",

@"&(amp|#38);",

@"&(lt|#60);",

@"&(gt|#62);",

@"&(nbsp|#160);",

@"&(iexcl|#161);",

@"&(cent|#162);",

@"&(pound|#163);",

@"&(copy|#169);",

@"&#(\d+);",

@"-->",

@"<!--.*"

};

string [] aryRep = {

"",

"",

"",

"\"",

"&",

"<",

">",

" ",

"\xa1",//chr(161),

"\xa2",//chr(162),

"\xa3",//chr(163),

"\xa9",//chr(169),

"",

"\r",

""

};

string newReg =aryReg[0];

string strOutput=strHtml;

for(int i = 0;i<aryReg.Length;i++)

{

Regex regex = new Regex(aryReg[i],RegexOptions.IgnoreCase );

strOutput = regex.Replace(strOutput,aryRep[i]);

}

strOutput.Replace("<","");

strOutput.Replace(">","");

strOutput.Replace("\r","");

return strOutput;

}

http://www.cnblogs.com/wang123/archive/2006/09/16/505758.html

 
 
 
免责声明:本文为网络用户发布,其观点仅代表作者个人观点,与本站无关,本站仅提供信息存储服务。文中陈述内容未经本站证实,其真实性、完整性、及时性本站不作任何保证或承诺,请读者仅作参考,并请自行核实相关内容。
 
 
© 2005- 王朝網路 版權所有 導航