/*
.Net/C#: 利用反射编写通用的 rss 2.0 的 reader
最近在写一个 Simple Rss Reader
网上找到现成代码两种:
1.代码简单的,但不够通用 (如: 本站的一些专用 rss reader)
2.代码复杂的,但没有足够时间去消化 (如: rssbandit)
遂自己动手:
由于 rss 的基本属性大家都有!
但一些特殊不通用属性,如:
slash:comments
wfw:comment
wfw:commentRss
trackbackping
不一定存在! 如何处理???
我想到了 Reflection,就此提出以下解决方案:
1. Class RssHeader 用于表示 Rss 的头信息
你可以在为其添加新属性,原则是:
成员变量 Fieild 的名称为 rss 的 XML 源对应的属性名称前加下划线,XML 属性名称含有 ":" 将其滤掉!
如: <dc:language>zh-CHS</dc:language>
将其影射为:
private string _dclanguage
public string DcLanguage
{
get
{
return this._dclanguage;
}
}
2. Class RssItem 用于表示 Rss 的 Item
添加新属性的原则同 RssHeader!
3. 获取 rss 的 XML 源后通过递归遍历节点 (class SimpleRssReader)
根据实际存在的 rss 属性,通过反射,"构造实例化" RssHeader 和 RssItem!
请仔细参阅 class SimpleRssReader 的 Travel 方法!
4. 数据库 (本文使用了 Micrshaoft Data Access Application Block 3.1)
表:
Channels (主表)
ChannelsDetails (细表)
字段名称及其数据类型严格按照 rss 的 XML 源对应的属性名称,XML 属性名称含有 ":" 将其滤掉!
存储过程:
SP_AddChannel
SP_AddChannelsDetails
参数名称及其数据类型严格按照 rss 的 XML 源对应的属性名称,XML 属性名称含有 ":" 将其滤掉!
命令行编译:
csc SimpleRsReader.cs /r:C:\WINDOWS\Microsoft.NET\Framework\v1.1.4322\System.Data.OracleClient.dll
全部代码 SimpleRssReader.cs 在此下载
http://www.cnblogs.com/Files/Microshaoft/SimpleRssReader.rar
*/
namespace Microshaoft
{
using System;
using System.Xml;
using System.Text;
using System.Reflection;
using System.Collections;
using System.Text.RegularExpressions;
全部代码 SimpleRssReader.cs 在此下载
http://www.cnblogs.com/Files/Microshaoft/SimpleRssReader.rar
*/
namespace Microshaoft
{
using System;
using System.Xml;
using System.Text;
using System.Reflection;
using System.Collections;
using System.Text.RegularExpressions;
public class RssHeader
{
//feed URL
public RssHeader(string URL)
{
this._URL = URL;
}
public string Title
{
get
{
return this._title;
}
}
public string Description
{
get
{
return this._description;
}
}
public string Link
{
get
{
return this._link;
}
}
public string Language
{
get
{
return this._language;
}
}
public string Generator
{
get
{
return this._generator;
}
}
public string Ttl
{
get
{
return this._ttl;
}
}
public string Copyright
{
get
{
return this._copyright;
}
}
public DateTime PubDate
{
get
{
return Util.ParseDateTime(this._pubDate);
}
}
public string Category
{
get
{
return this._category;
}
}
public DateTime LastBuildDate
{
get
{
return Util.ParseDateTime(this._lastBuildDate);
}
}
public string ManagingEditor
{
get
{
return this._managingEditor;
}
}
public string URL
{
get
{
return this._URL;
}
}
public string DcLanguage
{
get
{
return this._dclanguage;
}
}
//下面私有 Field 的值将 class SimpleRssReader 中通过反射赋值
private string _dclanguage; //dc:language
private string _URL;
private string _managingEditor;
private string _lastBuildDate;
private string _title;
private string _description;
private string _link;
private string _language;
private string _generator;
private string _ttl;
private string _copyright;
private string _pubDate;
private string _category;
}
public class RssItem
{
private RssHeader _Header;
public RssHeader Header
{
get
{
return this._Header;
}
}
//下面私有 Field 的值将 class SimpleRssReader 中通过反射赋值
private string _title;
private string _link;
private string _description;
private string _category;
private string _author;
private string _pubDate;
private string _comments;
private string _guid;
private string _slashcomments;
private string _wfwcomment;
private string _wfwcommentRss;
private string _trackbackping;
public string TrackbackPing
{
get
{
return this._trackbackping;
}
}
public string WfwCommentRss
{
get
{
return this._wfwcommentRss;
}
}
public string WfwComment
{
get
{
return this._wfwcomment;
}
}
public string SlashComments
{
get
{
return this._slashcomments;
}
}
public string Title
{
get
{
return this._title;
}
}
public string Link
{
get
{
return this._link;
}
}
public string Description
{
get
{
return this._description;
}
}
public string Category
{
get
{
return this._category;
}
}
public string Author
{
get
{
return this._author;
}
}
public DateTime PubDate
{
get
{
return Util.ParseDateTime(this._pubDate);
}
}
public string Comments
{
get
{
return this._comments;
}
}
public string Guid
{
get
{
return this._guid;
}
}
}
public class SimpleRssReader
{
//RssHeader header 解析处理完毕事件
public delegate void RssHeaderReceiveEventHandler(SimpleRssReader Sender, RssHeader Header);
public event RssHeaderReceiveEventHandler RssHeaderReceive;
//某一个 RssItem 解析处理完毕事件
public delegate void RssItemReceiveEventHandler(SimpleRssReader Sender, RssItem Item);
public event RssItemReceiveEventHandler RssItemReceive;
private Type _TRS; //typeof(RssHeader)
private Type _tri; //typeof(RssItem)
private ArrayList _RssItemsAL;
private RssHeader _rs;
public RssHeader RssHeader
{
get
{
return this._rs;
}
}
//用于存储所有的 RssItem
private RssItem[] _RssItems;
public RssItem[] RssItems
{
get
{
return this._RssItems;
}
}
public void Rss(string URL)
{
XmlDocument xd = new XmlDocument();
//如果效率不高可采用 WebRequest 替代
xd.Load(URL);
XmlNodeList xnl = xd.SelectNodes("/rss/channel");
this._rs = new RssHeader(URL);
this._TRS = typeof(RssHeader);
this._tri = typeof(RssItem);
this._RssItemsAL = new ArrayList();
foreach (XmlNode xn in xnl)
{
//递归遍历
this.Travel(xn, 0);
}
if (this._RssItemsAL.Count > 0)
{
this._RssItems = new RssItem[this._RssItemsAL.Count];
int i = 0;
foreach (object o in this._RssItemsAL)
{
this._RssItems[i++] = (RssItem) o;
}
}
}
/// <Header>
/// 递归遍历
/// </Header>
/// <param name="xn">节点</param>
/// <param name="i">项目数</param>
private void Travel(XmlNode xn, int i)
{
if (xn.HasChildNodes)
{
foreach (XmlNode x in xn.ChildNodes)
{
if (x.ParentNode != null)
{
if (x.ParentNode.Name == "channel")
{
if (x.Name == "item")
{
i ++;
if (i >= 1)
{
XmlNode node = null;
bool b = false; //是否是 Rss Item
RssItem ri = null;
if (i == 1) //Header
{
node = xn;
b = false;
}
else if (i > 1) //Item
{
node = x;
b = true;
ri = new RssItem();
}
foreach (XmlNode n in node.ChildNodes)
{
if (n.Name != "item")
{
if (!b) //Rss Header Header
{
//根据 XML 实际存在的属性,利用反射为 RssHeader 实例的私有成员赋值
FieldInfo fi = this._TRS.GetField("_" + n.Name.Replace(":","") ,BindingFlags.NonPublic | BindingFlags.Instance | BindingFlags.Public);
if (fi != null)
{
fi.SetValue(this._rs,n.InnerText);
}
}
else //Rss Item
{
//根据 XML 实际存在的属性,利用反射为 RssItem 实例的私有成员赋值
FieldInfo fi = this._tri.GetField("_" + n.Name.Replace(":",""),BindingFlags.NonPublic | BindingFlags.Instance | BindingFlags.Public);
if (fi != null)
{
fi.SetValue(ri,n.InnerText);
}
}
}
}
if (!b)
{
//触发 RssHeaderReceive 事件
if (this.RssHeaderReceive != null)
{
this.RssHeaderReceive(this,this._rs);
}
}
else
{
//制定 RssItem 实例的 Header/Header
FieldInfo fi = this._tri.GetField("_Header",BindingFlags.NonPublic | BindingFlags.Instance | BindingFlags.Public);
if (fi != null)
{
fi.SetValue(ri,this._rs);
}
//触发 RssItemReceive 事件
if (this.RssItemReceive != null)
{
this.RssItemReceive(this,ri);
}
this._RssItemsAL.Add(ri);
}
}
}
}
}
if (!x.HasChildNodes)
{
this.Travel(x, i);
}
}
}
}
}
public class Util
{
public static DateTime ParseDateTime(string s)
{
DateTime dt;
if (s == null || s.ToString().Length <= 0)
{
dt = DateTime.Now;
}
else
{
try
{
dt = DateTime.Parse(s);
}
catch
{
dt = DateTime.Now;
}
}
return dt;
}
/// <Header>
/// 去除 HTML tag
/// </Header>
/// <param name="HTML">源</param>
/// <returns>结果</returns>
public static string StripHTML(string HTML) //google "StripHTML" 得到
{
string[] Regexs =
{
@"<script[^>]*?>.*?</script>",
@"<(\/\s*)?!?((\w+:)?\w+)(\w+(\s*=?\s*(([""'])(\[""'tbnr]|[^7])*?7|w+)|.{0})|s)*?(/s*)?>",
@"([\r\n])[\s]+",
@"&(quot|#34);",
@"&(amp|#38);",
@"&(lt|#60);",
@"&(gt|#62);",
@"&(nbsp|#160);",
@"&(iexcl|#161);",
@"&(cent|#162);",
@"&(pound|#163);",
@"&(copy|#169);",
@"&#(\d+);",
@"-->",
@"<!--.*\n"
};
string[] Replaces =
{
"",
"",
"",
"\"",
"&",
"<",
">",
" ",
"\xa1", //chr(161),
"\xa2", //chr(162),
"\xa3", //chr(163),
"\xa9", //chr(169),
"",
"\r\n",
""
};
string s = HTML;
for (int i = 0; i < Regexs.Length; i++)
{
s = new Regex(Regexs[i], RegexOptions.Multiline | RegexOptions.IgnoreCase).Replace(s, Replaces[i]);
}
s.Replace("<", "");
s.Replace(">", "");
s.Replace("\r\n", "");
return s;
}
}
}
//测试程序
namespace Test
{
using System;
using System.Data;
using System.Reflection;
using System.Data.SqlClient;
using Microshaoft;
using Microshaoft.Data;
class ConsoleApplication
{
private SqlConnection _Connection;
public string _Channel;
public SqlConnection Connection
{
set
{
this._Connection = value;
}
get
{
return this._Connection;
}
}
static void Main()
{
string s = "http://www.ccw.com.cn/rss/news2/1.xml";
s = "http://dzh.mop.com/topic/rss.jsp?type=28";
s = "http://www.ccw.com.cn/rss/news2/15.xml";
s = "http://www.cnblogs.com/rss.aspx?id=-1";
s = "http://localhost/rss.xml";
//s = "http://weblog.siliconvalley.com/column/dangillmor/index.xml";
//s= "http://www.skyone.com.cn/sub/rss/list_jjsc.xml";
ConsoleApplication a = new ConsoleApplication();
a.Connection = new SqlConnection("server=SERVER\\PSQLKE;user id=sa;password=;database=rss");
a.Connection.Open();
SimpleRssReader srr = new SimpleRssReader();
srr.RssHeaderReceive += new Microshaoft.SimpleRssReader.RssHeaderReceiveEventHandler(a.srr_RssHeaderReceive);
srr.RssItemReceive +=new Microshaoft.SimpleRssReader.RssItemReceiveEventHandler(a.srr_RssItemReceive);
System.Console.WriteLine("waiting ....");
srr.Rss(s); //以后改成多线程或异步
System.Console.WriteLine("print all rss Header and items ....");
System.Console.ReadLine();
System.Console.WriteLine("Header: "+ srr.RssHeader.Title);
foreach (RssItem ri in srr.RssItems)
{
System.Console.WriteLine("item: " + ri.Title);
}
System.Console.ReadLine();
}
private void srr_RssHeaderReceive(SimpleRssReader Sender, RssHeader Header)
{
System.Console.WriteLine("Header:" + Header.Link);
System.Console.WriteLine("Header:" + Header.Title);
this.SaveToDataBase("SP_AddChannel",typeof(RssHeader),Header);
}
private void srr_RssItemReceive(SimpleRssReader Sender, RssItem Item)
{
System.Console.WriteLine("Item: " + Item.Title);
System.Console.WriteLine("Item: " + Item.Link);
System.Console.WriteLine("Item: " + Util.StripHTML(Item.Description));
this.SaveToDataBase("SP_AddChannelsDetails",typeof(RssItem),Item);
}
private void SaveToDataBase(string sp, Type t,object instance)
{
//获取 sp 所有参数
SqlParameter[] spa = SqlHelperParameterCache.GetSpParameterSet(this.Connection, sp);
System.Collections.Hashtable ht = new System.Collections.Hashtable();
for (int i = 0; i < spa.Length; i++)
{
//保存 参数名称与其位置(次序) 的关系
ht.Add(spa[i].ParameterName.ToLower().Replace("@", ""), i);
//相当于为存储过程的所有参数赋初值
spa[i].Value = null;
}
//得到所有的属性
PropertyInfo[] pi = t.GetProperties();
foreach (PropertyInfo x in pi)
{
if (ht.ContainsKey( x.Name.ToLower()))
{
//根据参数(属性)名称得到参数的次序!
int i = (int) ht[x.Name.ToLower()];
if (spa[i].Direction == System.Data.ParameterDirection.Input || spa[i].Direction == System.Data.ParameterDirection.InputOutput)
{
object o;
if (x.PropertyType.Name == "String")
{
o = x.GetValue(instance,null);
if (o != null)
{
string s = Util.StripHTML((string) o);
o = s;
}
}
else
{
o = x.GetValue(instance,null);
}
spa[i].Value = o;
}
}
}
if (t == typeof(RssItem))
{
spa[0].Value = ((RssItem) instance).Header.URL;
}
SqlHelper.ExecuteNonQuery(this.Connection, CommandType.StoredProcedure, sp, spa);
if (spa[spa.Length - 1].Value != System.DBNull.Value)
{
System.Console.WriteLine("Save to ID: {0} successful!", spa[spa.Length - 1].Value);
}
else
{
System.Console.WriteLine("save failed! may be duplicate!");
}
}
}
}
//==========================================================================================================
/*
--sql Script
if exists (select * from dbo.sysobjects where id = object_id(N'[dbo].[SP_AddChannel]') and OBJECTPROPERTY(id, N'IsProcedure') = 1)
drop procedure [dbo].[SP_AddChannel]
GO
if exists (select * from dbo.sysobjects where id = object_id(N'[dbo].[SP_AddChannelsDetails]') and OBJECTPROPERTY(id, N'IsProcedure') = 1)
drop procedure [dbo].[SP_AddChannelsDetails]
GO
if exists (select * from dbo.sysobjects where id = object_id(N'[dbo].[Channels]') and OBJECTPROPERTY(id, N'IsUserTable') = 1)
drop table [dbo].[Channels]
GO
if exists (select * from dbo.sysobjects where id = object_id(N'[dbo].[ChannelsDetails]') and OBJECTPROPERTY(id, N'IsUserTable') = 1)
drop table [dbo].[ChannelsDetails]
GO
CREATE TABLE [dbo].[Channels] (
[ID] [int] IDENTITY (1, 1) NOT NULL ,
[URL] [varchar] (1000) COLLATE Chinese_PRC_CI_AS NULL ,
[Channel] [varchar] (100) COLLATE Chinese_PRC_CI_AS NULL ,
[Title] [varchar] (100) COLLATE Chinese_PRC_CI_AS NULL ,
[Description] [varchar] (1000) COLLATE Chinese_PRC_CI_AS NULL ,
[link] [varchar] (500) COLLATE Chinese_PRC_CI_AS NULL ,
[language] [varchar] (10) COLLATE Chinese_PRC_CI_AS NULL ,
[generator] [varchar] (100) COLLATE Chinese_PRC_CI_AS NULL ,
[ttl] [varchar] (100) COLLATE Chinese_PRC_CI_AS NULL ,
[copyright] [varchar] (100) COLLATE Chinese_PRC_CI_AS NULL ,
[pubDate] [datetime] NULL ,
[category] [varchar] (100) COLLATE Chinese_PRC_CI_AS NULL ,
[dclanguage] [varchar] (100) COLLATE Chinese_PRC_CI_AS NULL
) ON [PRIMARY]
GO
CREATE TABLE [dbo].[ChannelsDetails] (
[ID] [int] IDENTITY (1, 1) NOT NULL ,
[ChannelID] [int] NULL ,
[title] [varchar] (8000) COLLATE Chinese_PRC_CI_AS NULL ,
[link] [varchar] (8000) COLLATE Chinese_PRC_CI_AS NULL ,
[description] [varchar] (8000) COLLATE Chinese_PRC_CI_AS NULL ,
[category] [varchar] (8000) COLLATE Chinese_PRC_CI_AS NULL ,
[author] [varchar] (8000) COLLATE Chinese_PRC_CI_AS NULL ,
[pubDate] [datetime] NULL ,
[comments] [varchar] (8000) COLLATE Chinese_PRC_CI_AS NULL ,
[guid] [varchar] (8000) COLLATE Chinese_PRC_CI_AS NULL ,
[trackbackping] [varchar] (8000) COLLATE Chinese_PRC_CI_AS NULL
) ON [PRIMARY]
GO
SET QUOTED_IDENTIFIER ON
GO
SET ANSI_NULLS ON
GO
CREATE proc SP_AddChannel
@URL varchar(8000)
,@link varchar(8000)
,@Channel varchar(8000)
,@Title varchar(8000)
,@Image varchar(8000)
,@Description varchar(7999)
,@language varchar(8000)
,@generator varchar(8000)
,@ttl varchar(8000)
,@copyright varchar(8000)
,@pubDate datetime
,@category varchar(8000)
,@Docs varchar(8000)
,@ManagingEditor varchar(8000)
,@dclanguage varchar(8000)
,@ int out
as
set @ = 0
insert into Channels ([URL],[Channel],[Title],[Description],[link],[language],[generator],[ttl],[copyright],[pubDate],[category],[dclanguage])
select @URL,@Channel,@Title,@Description,@link,@language,@generator,@ttl,@copyright,@pubDate,@category,@dclanguage
where not exists(select 1 from Channels where [URL] = @URL)
select @ = SCOPE_IDENTITY()
GO
SET QUOTED_IDENTIFIER OFF
GO
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
SET ANSI_NULLS ON
GO
CREATE proc SP_AddChannelsDetails
@URL varchar(8000)
,@Title varchar(8000)
,@Description varchar(7000)
,@link varchar(8000)
,@pubDate datetime
,@category varchar(8000)
,@Comments varchar(8000)
,@Guid varchar(8000)
,@trackbackping varchar(8000)
,@ int out
as
set @ = 0
insert into ChannelsDetails ([ChannelID],[Title],[Description],[link],[pubDate],[category],[comments],[guid],[trackbackping])
select id,@Title,@Description,@link,@pubDate,@category,@comments,isnull(@guid,@link),@trackbackping
from Channels
where not exists (select 1 from ChannelsDetails where guid = isnull(@guid,@link)) and URL = @URL
select @ = SCOPE_IDENTITY()
GO
SET QUOTED_IDENTIFIER OFF
GO
SET ANSI_NULLS ON
GO
*/