首页 » ASP代码 » 自动采集IDC名录从IDC圈网站中

自动采集IDC名录从IDC圈网站中

有兴趣看源码的进入内页,本程序能自动将idcquan网的所有IDC名录抓取过来。地区和标题都有的哦。
如果你不知道这是什么,就点这里看我采集的结果好了。 www.suntw.com/demo/IDC2010a.html

Const logfile="C:\LINK2010.html"
Set oDic = CreateObject("scripting.dictionary")
URL="http://dh.idcquan.com/"
'首先我们要做的是把各地区的URL读出来
pagebody = xmlHttp(url,"")
pagebody=cutString(pagebody,"地区查询","地区查询")
Set diquList=regExec(pagebody,"href=""(.+?)""(.*?)>(.+?)</a>")
For Each match In diquList
	dqName=match.submatches(2)
	dqUrl=match.submatches(0)
	If Not odic.Exists(dqName) Then
		odic.Add dqName,dqUrl
	End If
Next
'1.遍历每个地区
For Each dq In oDic
writeLog "<dl><dt>" & dq & "</dt><dd>" 
url= odic(dq)
pagebody = xmlHttp(url,"")
s1=InStr (pagebody,"正在开展IDC业务的企业")
s2=InStr (s1,pagebody,"Chinaitlab Group 旗下网站")
pagebody = Mid(pagebody,s1,s2-s1)
Set urllist=regExec(pagebody,"href=""(.+?)""(.+?)>(.+?)</a>")
'2.遍历地区页中的每个IDC链接
For Each match In urllist
	infopage=""
	urlinfo=match.submatches(0)
	infopage=UCase(xmlHttp(urlinfo,""))
	If infopage<>"" Then
		urllink=cutString(infopage,"<STRONG>网  址</STRONG>:","</TD></TR>")
		urltext=match.submatches(2)
		'3.得到了IDC最终链接
		writeLog "<li>" & urltext & Chr(9) & urllink & "</li>"	
	End If
Next
writeLog "</dd></dl>"
Next
 
Sub writeLog(byval strng)
	WScript.Echo strng
	Set fso = CreateObject("scripting.filesystemobject")
	Set f=fso.OpenTextFile(logfile,8,True)
	f.Write(strng & vbCrLf)
	Set f=Nothing :Set fso=Nothing 
End Sub
 
Function cutString(byval strng,byval str1,byval str2)
	s1=instr(strng,str1)
	If s1>0 Then
		s1=s1+len(str1)
		s2=instr(s1,strng,str2)
		cutString=Mid(strng,s1,s2-s1)
	End If
End Function
 
Function regExec(byval strng,byval patng)
	Dim oreg : Set oreg=New RegExp
	oreg.Global=True : oreg.IgnoreCase=True
	oreg.Pattern = patng 
	Set regExec=oreg.Execute(strng)
	Set oreg=Nothing 
End Function 
 
Function xmlHttp(byval urlStr,byval pData)
	On Error Resume Next
	dim http,postype
	If InStr(urlstr,"?")=0 Then urlstr=urlstr&"?x44="&Timer() Else urlstr=urlstr&"&x44=" &Timer()
	If pData="" Then postype="GET" Else postype="POST"
	set http=CreateObject("WinHttp.WinHttpRequest.5.1")
	http.SetTimeouts 10000,10000,10000,10000
	http.Option(6)=0
	http.open postype,urlStr,False
	If postype="POST" Then
		http.setRequestHeader "CONTENT-TYPE","application/x-www-form-urlencoded"
	End if
	http.Send(pData)
	xmlHttp = BytesToBstr (http.ResponseBody,"GB2312")
	set http=Nothing
end Function
 
Function BytesToBstr(body,cchar)
  Dim objstream
  Set objstream = CreateObject("adodb.stream")
  objstream.Type = 1
  objstream.Mode = 3
  objstream.Open
  objstream.write body
  objstream.Position = 0
  objstream.Type = 2
  objstream.CharSet = cchar
  BytesToBstr = objstream.ReadText
  objstream.Close
  Set objstream = Nothing
End Function

, , , ,

转发到新浪微博 转发到新浪微博

目前这篇文章有38条评论(Rss)

我要评论