这里使用chinaZ的接口
查询已
删除域名,中间省略了些
函数,大家可以自己写一下。
Imports System.Text.RegularExpressions
Public Class Form1
Public pageCount As Integer = 1 '以查询到域名结果页数
Public domainCount As Integer = 0 '查询到域名个数
Public hasNext_Page As Boolean = False '是否有下一页
Public outFileName As String = "域名查询结果.txt"
Public totalPageCount As Integer = 0
'查询结果的总也数,用正则表达式匹配第一页源码得到
Public totalDomainCount As Integer = 0
'查询结果的总域名数,用正则表达式匹配第一页源码得到
Public chinaz_DelUrl As String = "http://del.chinaz.com/?kw=&p=0&bl=4&el=8&ds[]=1&pl=&hs=1&sort=1&suffix[]=com&dt=1&date=1&pagesize=200&st=1"
'各项查询参数的取值可以查看del.chinaz.com 的网页源代码和查询是的url来确定查询URL
Private Sub Button4_Click(ByVal sender As System.Object,ByVal e As System.EventArgs) Handles Button4.Click
pageCount = 1
hasNext_Page = False
If System.IO.File.Exists(outFileName) Then
System.IO.File.Delete(outFileName)
End If
Me.WebBrowser1.Navigate(chinaz_DelUrl)
End Sub
Function get_match_text(ByVal doctext As String) '从HTML文档中提取 tbody 部门源码
Dim yh = Chr(34)
Dim exp As String = String.Empty
exp = "<tbody>(.|\n)*</tbody>"
Dim MyExpression = New System.Text.RegularExpressions.Regex(exp,System.Text.RegularExpressions.RegexOptions.IgnoreCase)
Dim Myinfo As String = String.Empty
Dim MyMatches = MyExpression.Matches(doctext) '正则匹配
For Each MyMatch In MyMatches
Myinfo += vbCrLf + String.Format(MyMatch.Value)
Next
Return Myinfo
End Function
Function get_Domain_Detail()
'获取域名查询结果函数
Dim htmdoc As HtmlDocument = WebBrowser1.Document
Dim tableStr As String = get_match_text(htmdoc.Body.InnerHtml)
Dim web = New WebBrowser
web.Navigate("about:blank ")
Dim tablehtmldoc = web.Document.OpenNew(True)
tablehtmldoc.Write(tableStr)
Dim htmlES As System.Windows.Forms.HtmlElementCollection = tablehtmldoc.getElementsByTagName_r("a") '获取域名
Dim td_Count As Integer = htmlES.Count
Dim i As Integer = 0
Dim result_Domain_Name As String = String.Empty
'查询结果
Dim single_Domain_Name As String = String.Empty
'单个域名结果
Dim tagID As String = String.Empty
For i = 0 To htmlES.Count.ToString - 1
single_Domain_Name = htmlES.Item(i).InnerText
On Error Resume Next
tagID = htmlES.Item(i).Id
If single_Domain_Name <> String.Empty AndAlso tagID <> String.Empty Then
If tagID.Contains("domain_") And single_Domain_Name.Contains(".") Then
'A标签ID包含domain_ 的即为查询结果
If result_Domain_Name = String.Empty And pageCount = 1 Then
result_Domain_Name = single_Domain_Name
'第一个数据
Else
result_Domain_Name = result_Domain_Name & vbCrLf & single_Domain_Name '每个数据用换行分割
End If
domainCount += 1
'查询到域名加1
End If
End If
Next
Me.TextBox1.Text = pageCount & "/" & totalPageCount
'显示查询进度
Me.TextBox2.Text = domainCount & "/" & totalDomainCount
'显示查询进度
Return result_Domain_Name
'返回该页域名查询结果
End Function
Sub get_next_page()
'获取下一页子过程,比较通用版本
Dim hasFirst_Page As Boolean = True
If WebBrowser1.ReadyState = WebBrowserReadyState.Complete Then
Dim doc As HtmlDocument = WebBrowser1.Document
Dim htmlEC As HtmlElementCollection = doc.getElementsByTagName_r("a")
Dim hyperlink As HtmlElement = Nothing
Dim tempurl As String = String.Empty
Dim tempText As String = String.Empty
Dim nexturl As String = String.Empty
For Each hyperlink In htmlEC
'判断是否有第一页
tempurl = hyperlink.GetAttribute("href")
tempText = hyperlink.InnerText
If tempurl.Contains("page=") AndAlso tempText <> String.Empty AndAlso tempText.Contains("下一页") Then
System.IO.File.AppendAllText(outFileName,get_Domain_Detail()) '查询结果存盘
hasNext_Page = True
pageCount = pageCount + 1
Exit For
Else
hasFirst_Page = False
End If
Next
If hasNext_Page = True And hyperlink.InnerText = "下一页" Then
hyperlink.InvokeMember("click")
Else
System.IO.File.AppendAllText(outFileName,get_Domain_Detail()) '查询结果存盘
End If
End If
End Sub
Sub get_pages()
'获取总页数及总域名数,新版本
If WebBrowser1.ReadyState = WebBrowserReadyState.Complete Then
If pageCount = 1 Then
'页码为1则开始获取总页数
Dim doc As String = WebBrowser1.Document.Body.InnerHtml
'网页body源代码
Dim yh = Chr(34)
Dim exp As String = String.Empty
exp = "找到 <b>(?<DnCount>\d+)</b> 条记录,共 <b>(?<DnPages>\d+)</b> 页"
Dim MyExpression = New System.Text.RegularExpressions.Regex(exp,System.Text.RegularExpressions.RegexOptions.IgnoreCase)
Dim Myinfo As String = String.Empty
Dim MyMatches = MyExpression.Matches(doc) '正则匹配
For Each MyMatch In MyMatches
Dim matchGroups As GroupCollection = MyMatch.groups
totalDomainCount = matchGroups.Item("DnCount").ToString
totalPageCount = matchGroups.Item("DnPages").ToString
Next
System.IO.File.AppendAllText(outFileName,get_Domain_Detail()) '查询结果存盘
End If
If pageCount < totalPageCount Then
'否者打开下一页
pageCount += 1
Me.WebBrowser1.Navigate(chinaz_DelUrl & "&page=" & pageCount)
System.IO.File.AppendAllText(outFileName,get_Domain_Detail()) '查询结果存盘
ElseIf pageCount = totalPageCount Then
End If
End If
End Sub
Private Sub WebBrowser1_DocumentCompleted(ByVal sender As System.Object,ByVal e As System.Windows.Forms.WebBrowserDocumentComple
tedEventArgs) Handles WebBrowser1.DocumentCompleted
If actionType = 0 Then
WebLogin(WebBrowser1,"esonbest","eson_165")
ElseIf WebBrowser1.ReadyState = WebBrowserReadyState.Complete And actionType = 1 Then
' get_next_page()
'查看是否有下一页,并统计明细
get_pages()
End If
End Sub
Private Sub Button3_Click(ByVal sender As System.Object,ByVal e As System.EventArgs)
End Sub
Private Sub Form1_Load(ByVal sender As System.Object,ByVal e As System.EventArgs) Handles MyBase.Load
Dim path As String = System.IO.Directory.GetCurrentDirectory & "\readme.html"
WebBrowser1.Navigate(path)
'各项查询参数的取值可以查看del.chinaz.com 的网页源代码和查询是的url来确定查询URL
End Sub
Private Sub Button1_Click(ByVal sender As System.Object,ByVal e As System.EventArgs) Handles Button1.Click
'登录chinaZ
actionType = 0
WebBrowser1.Navigate("http://my.chinaz.com/login.html")
End Sub
End Class