自动采集IDC名录从IDC圈网站中
有兴趣看源码的进入内页,本程序能自动将idcquan网的所有IDC名录抓取过来。地区和标题都有的哦。
如果你不知道这是什么,就点这里看我采集的结果好了。 www.suntw.com/demo/IDC2010a.html
Const logfile="C:\LINK2010.html" Set oDic = CreateObject("scripting.dictionary") URL="http://dh.idcquan.com/" '首先我们要做的是把各地区的URL读出来 pagebody = xmlHttp(url,"") pagebody=cutString(pagebody,"地区查询","地区查询") Set diquList=regExec(pagebody,"href=""(.+?)""(.*?)>(.+?)</a>") For Each match In diquList dqName=match.submatches(2) dqUrl=match.submatches(0) If Not odic.Exists(dqName) Then odic.Add dqName,dqUrl End If Next '1.遍历每个地区 For Each dq In oDic writeLog "<dl><dt>" & dq & "</dt><dd>" url= odic(dq) pagebody = xmlHttp(url,"") s1=InStr (pagebody,"正在开展IDC业务的企业") s2=InStr (s1,pagebody,"Chinaitlab Group 旗下网站") pagebody = Mid(pagebody,s1,s2-s1) Set urllist=regExec(pagebody,"href=""(.+?)""(.+?)>(.+?)</a>") '2.遍历地区页中的每个IDC链接 For Each match In urllist infopage="" urlinfo=match.submatches(0) infopage=UCase(xmlHttp(urlinfo,"")) If infopage<>"" Then urllink=cutString(infopage,"<STRONG>网 址</STRONG>:","</TD></TR>") urltext=match.submatches(2) '3.得到了IDC最终链接 writeLog "<li>" & urltext & Chr(9) & urllink & "</li>" End If Next writeLog "</dd></dl>" Next Sub writeLog(byval strng) WScript.Echo strng Set fso = CreateObject("scripting.filesystemobject") Set f=fso.OpenTextFile(logfile,8,True) f.Write(strng & vbCrLf) Set f=Nothing :Set fso=Nothing End Sub Function cutString(byval strng,byval str1,byval str2) s1=instr(strng,str1) If s1>0 Then s1=s1+len(str1) s2=instr(s1,strng,str2) cutString=Mid(strng,s1,s2-s1) End If End Function Function regExec(byval strng,byval patng) Dim oreg : Set oreg=New RegExp oreg.Global=True : oreg.IgnoreCase=True oreg.Pattern = patng Set regExec=oreg.Execute(strng) Set oreg=Nothing End Function Function xmlHttp(byval urlStr,byval pData) On Error Resume Next dim http,postype If InStr(urlstr,"?")=0 Then urlstr=urlstr&"?x44="&Timer() Else urlstr=urlstr&"&x44=" &Timer() If pData="" Then postype="GET" Else postype="POST" set http=CreateObject("WinHttp.WinHttpRequest.5.1") http.SetTimeouts 10000,10000,10000,10000 http.Option(6)=0 http.open postype,urlStr,False If postype="POST" Then http.setRequestHeader "CONTENT-TYPE","application/x-www-form-urlencoded" End if http.Send(pData) xmlHttp = BytesToBstr (http.ResponseBody,"GB2312") set http=Nothing end Function Function BytesToBstr(body,cchar) Dim objstream Set objstream = CreateObject("adodb.stream") objstream.Type = 1 objstream.Mode = 3 objstream.Open objstream.write body objstream.Position = 0 objstream.Type = 2 objstream.CharSet = cchar BytesToBstr = objstream.ReadText objstream.Close Set objstream = Nothing End Function
上一篇: 教你突破网吧的驱动防火墙网维大师
下一篇: 正则表达式中问号的作用
目前这篇文章有38条评论(Rss)