Author:水如烟
总目录:行政区划数据方案设计
这里所说的网上数据,是基于:
一、有固定网址发布最新数据的链接;
二、数据格式固定。
在去年的10月,曾写了个《全国县及县以上行政区划代码信息类 》
见:http://www.cnblogs.com/LzmTW/archive/2005/10/22/260066.html
现在仍以行政区划代码数据为例。
行政区划代码数据由国家统计局发布,网址为
http://www.stats.gov.cn/tjbz/xzqhdm/index.htm
数据格式是固定的:
如最新的为2005年12月31日
http://www.stats.gov.cn/tjbz/xzqhdm/t20041022_402301029.htm
最旧的为2001年10月的,
http://www.stats.gov.cn/tjbz/xzqhdm/t20021125_46781.htm
但是有例外,这在代码中说。
方案组织:
效果:
以下为代码:
NetConst.vb
Namespace NET
Public Class NetConst
Private Sub New()
End Sub
Public Const GOV_DEFAULT As String = "www.stats.gov.cn"
Public Const GOV_ADDRESS As String = "http://www.stats.gov.cn/tjbz/xzqhdm/"
Public Const WEBTABLE_INDEX As String = "9"
End Class
End Namespace
NetInformation.vb
Imports System.Net
Imports System.IO
Imports System.Text.RegularExpressions
Namespace NET
Public Class NetInformation
Private gNetUpdateInformations(-1) As NetUpdateInformationItem
Public ReadOnly Property UpdateInformationsTable() As DataTable
Get
Return GetUpdateInformationsTable()
End Get
End Property
Private Function GetUpdateInformationsTable() As DataTable
Dim mDataTable As New DataTable("UpdateInformations")
With mDataTable
.Columns.Add("Address")
.Columns.Add("LastDate")
For Each item As NetUpdateInformationItem In gNetUpdateInformations
.Rows.Add(New String() {item.Address, item.LastDate})
Next
.AcceptChanges()
End With
Return mDataTable
End Function
Public Sub DownloadInformationsFromNet()
Dim mRegex As New Regex("(?<date>2.*日)")
Dim mNetUpdateItems As NetUpdateItem() = GetNetUpdateItems()
Dim mNetUpdateInformationItem As NetUpdateInformationItem
Dim tmp As NetUpdateItem
'由于后两个不合规则,舍去不用。最后一个没有日期,倒数第二个提供的是附件数据。
For i As Integer = 0 To mNetUpdateItems.Length - 1 - 2
tmp = mNetUpdateItems(i)
mNetUpdateInformationItem = New NetUpdateInformationItem
With mNetUpdateInformationItem
.Address = tmp.Address
.LastDate = CType(mRegex.Match(tmp.Content).Value, Date).ToString("yyyyMMdd")
End With
AppendItem(Of NetUpdateInformationItem)(mNetUpdateInformationItem, gNetUpdateInformations)
Next
End Sub
Private Function GetNetUpdateItems() As NetUpdateItem()
Dim mResult(-1) As NetUpdateItem
Dim mRegex As New Regex("<a href='(?<href>.*)' target='_blank' >(?<content>.*行政区划代码.*)</a>")
Dim mCollection As MatchCollection
Dim mClient As New WebClient()
Dim mStream As Stream = mClient.OpenRead(NetConst.GOV_ADDRESS)
Dim mReader As New StreamReader(mStream, System.Text.Encoding.Default)
Dim mText As String = mReader.ReadToEnd
mReader.Close()
mStream.Close()
mClient.Dispose()
mCollection = mRegex.Matches(mText)
Dim tmpItem As NetUpdateItem
For Each m As Match In mCollection
tmpItem = New NetUpdateItem
With tmpItem
.Address = NetConst.GOV_ADDRESS & m.Groups(1).Value
.Content = m.Groups(2).Value
End With
AppendItem(Of NetUpdateItem)(tmpItem, mResult)
Next
Return mResult
End Function
Private Structure NetUpdateItem
Public Address As String
Public Content As String
End Structure
Private Structure NetUpdateInformationItem
Public Address As String
Public LastDate As String
End Structure
Private Sub AppendItem(Of T)(ByVal value As T, ByRef array As T())
ReDim Preserve array(array.Length)
array(array.Length - 1) = value
End Sub
End Class
End Namespace
ExcelQueryTable.vb
Option Strict Off
Namespace NET
Public Class ExcelQueryTable
Private gExcelApplication As Object
Private gWorkbook As Object
Private gWorksheet As Object
Private gQueryTable As Object
Sub New()
Initialize()
End Sub
Private Sub Initialize()
gExcelApplication = CreateObject("Excel.Application")
gExcelApplication.DisplayAlerts = False '使退出时不询问是否存盘
gWorkbook = gExcelApplication.Workbooks.Add
gWorksheet = gWorkbook.Worksheets.Add
End Sub
'这里只作简单处理,详细处理在我的BLOG上有相关“文章”作过介绍
Public Sub Close()
gWorkbook.Close()
gWorksheet = Nothing
gWorkbook = Nothing
gExcelApplication.DisplayAlerts = True
gExcelApplication.Quit()
gExcelApplication = Nothing
End Sub
Public Function Query(ByVal address As String) As DataTable
Dim mDataTable As DataTable = GetDataTable()
gWorksheet.Cells.Clear()
gQueryTable = gWorksheet.QueryTables.Add( _
Connection:=String.Format("URL;{0}", address), _
Destination:=gWorksheet.Range("A1"))
With gQueryTable
.WebTables = NetConst.WEBTABLE_INDEX '这是固定的
.Refresh(BackgroundQuery:=False)
End With
Dim mCell As Object
Dim mMaxRowIndex As Integer
Dim line As Object
mMaxRowIndex = gWorksheet.Cells.SpecialCells(11).Row 'Excel.XlCellType.xlCellTypeLastCell=11
mCell = gWorksheet.Range("A1")
For i As Integer = 0 To mMaxRowIndex
line = mCell.Offset(i, 0).Value
If line IsNot Nothing Then
AddRow(mDataTable, line.ToString)
End If
Next
gQueryTable.Delete()
gQueryTable = Nothing
Return mDataTable
End Function
Private Sub AddRow(ByVal table As DataTable, ByVal line As String)
line = line.Trim
If line.Length < 7 Then Exit Sub
Dim tmpCode As String
Dim tmpName As String
tmpCode = line.Substring(0, 6)
tmpName = line.Substring(6).Trim
If Not IsNumeric(tmpCode) Then Exit Sub '前六位需是数字
table.Rows.Add(New String() {tmpCode, tmpName})
End Sub
Private Function GetDataTable() As DataTable
'表的列名意义为:代码、名称
Dim mDataTable As New DataTable("RegionalCode")
With mDataTable.Columns
.Add("Code")
.Add("Name")
End With
Return mDataTable
End Function
End Class
End Namespace
测试代码:
MainForm.vb(界面部分省,在最后有整个方案供下载)
Public Class MainForm
Private gNetInformation As New RegionalCodeLibrary.NET.NetInformation
Private gQueryTable As RegionalCodeLibrary.NET.ExcelQueryTable
Private Sub Button1_Click(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles Button1.Click
If Not CheckNetworkIsAvailable() Then Exit Sub
ShowMessage("正在下载数据信息
")gNetInformation.DownloadInformationsFromNet()
With Me.ComboBox1
.DataSource = gNetInformation.UpdateInformationsTable
.DisplayMember = "LastDate"
End With
ShowMessage("")
End Sub
Private Sub Button2_Click(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles Button2.Click
If String.IsNullOrEmpty(Me.ComboBox1.Text) Then Exit Sub
If Not CheckNetworkIsAvailable() Then Exit Sub
If gQueryTable Is Nothing Then
ShowMessage("正在启动Excel
")gQueryTable = New RegionalCodeLibrary.NET.ExcelQueryTable
End If
Dim mAddress As String = CType(Me.ComboBox1.SelectedItem, DataRowView).Row.Item("Address").ToString
ShowMessage(String.Format("正在下载{0}数据
", Me.ComboBox1.Text))Me.DataGridView1.DataSource = gQueryTable.Query(mAddress)
ShowMessage(String.Format("{0}共有数据{1}项", Me.ComboBox1.Text, Me.DataGridView1.RowCount))
End Sub
Private Sub Button3_Click(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles Button3.Click
ClearEnvironment()
End Sub
Private Function CheckNetworkIsAvailable() As Boolean
Dim mResult As Boolean = False
mResult = My.Computer.Network.IsAvailable
If Not mResult Then
ShowMessage("本地连接无效")
Else
Try
mResult = My.Computer.Network.Ping(RegionalCodeLibrary.NET.NetConst.GOV_DEFAULT)
Catch ex As Exception
mResult = False
End Try
If Not mResult Then
ShowMessage(String.Format("本机没有连接Internet或发布网址{0}无效", RegionalCodeLibrary.NET.NetConst.GOV_ADDRESS))
End If
End If
Return mResult
End Function
Private Sub ShowMessage(ByVal msg As String)
If msg = "" Then msg = "待命"
Me.Label1.Text = String.Format("消息:{0}", msg)
Me.Label1.Refresh()
End Sub
Private Sub MainForm_FormClosing(ByVal sender As Object, ByVal e As System.Windows.Forms.FormClosingEventArgs) Handles Me.FormClosing
ClearEnvironment()
End Sub
Private Sub ClearEnvironment()
If gQueryTable Is Nothing Then Exit Sub
gQueryTable.Close()
gQueryTable = Nothing
End Sub
End Class
方案下载:代码
下一篇,行政区划数据数据库的设计(一)