我有一个HTML文档,其中包含Javascript块中的地理信息.它是此网页的源代码:
https://energy.ehawaii.gov/epd/public/energy-projects-map.html
这可以被视为地图,也可以视为列表.
我想要实现的是在Excel中使用该列表,但是具有“纬度”字段和“经度”字段. Google地图标记指定Javascript中的LatLng.
我如何使用VB之类的东西来处理HTML文件的源代码,并组织成一个包含以下字段/列的表:
>描述(来自< a ... title =“等等”>)
>技术(来自< p>技术:Solar< / p>例如)
>纬度(来自google.maps.LatLng(纬度,经度);
> Longtitude(来自与纬度相同的代码行,但使用第二个变量)?
所有帮助赞赏!
解决方法
尝试基于XMLHTTP请求的此VBScript解决方案.只需复制下面的代码,粘贴到文本文件,将其保存为.vbs并运行它.脚本尚未优化,所有请求都不是异步,因此我的PC上需要大约40秒来获取所有数据.
Option Explicit Dim arrCells(),arrList,arrTmp,sRespHeaders,sRespText,arrSetHeaders,i,j,iTotal,oApp,oWB,oWS,oOutput ' Create output window Output oOutput ' Get cookies oOutput.write "Get cookies" XmlHttpGet "https://energy.ehawaii.gov/epd/public/energy-projects-map.html",Array(),sRespText ParseResponse "^Set-(Cookie): ([\S]*?=[\S]*?);[\s\S]*?$",arrSetHeaders ' Get project list oOutput.write "Get project list" arrList = Array() XmlHttpGet "https://energy.ehawaii.gov/epd/public/energy-projects-list.json?sEcho=2&iColumns=5&sColumns=&iDisplayStart=1&iDisplayLength=0&mDataProp_0=0&mDataProp_1=1&mDataProp_2=2&mDataProp_3=3&mDataProp_4=4&sSearch=&bRegex=false&sSearch_0=&bRegex_0=false&bSearchable_0=true&sSearch_1=&bRegex_1=false&bSearchable_1=true&sSearch_2=&bRegex_2=false&bSearchable_2=true&sSearch_3=&bRegex_3=false&bSearchable_3=true&sSearch_4=&bRegex_4=false&bSearchable_4=true&iSortCol_0=0&sSortDir_0=asc&iSortingCols=1&bSortable_0=true&bSortable_1=true&bSortable_2=true&bSortable_3=true&bSortable_4=true","",sRespText ParseProjects sRespText,iTotal oOutput.write "Get project list: " & (UBound(arrList) + 1) & " of " & iTotal ' Rearrange to 2-dimensional array,get LatLng ReDim arrCells(UBound(arrList),8) ' Name,Technology,Island,Capacity,Location,RID,Type,Lat,Lng For i = 0 To UBound(arrList) For j = 0 To 6 arrCells(i,j) = arrList(i)(j) Next oOutput.write "Get LatLng: " & (i + 1) & " of " & iTotal arrTmp = RequestLatLng(arrList(i)(5)) arrCells(i,7) = arrTmp(0) arrCells(i,8) = arrTmp(1) Next ' Create Excel worksheet,output data oOutput.write "Export to Excel" Set oApp = CreateObject("Excel.Application") oApp.Visible = True Set oWB = oApp.Workbooks.Add(-4167) ' xlWBATWorksheet Set oWS = oWB.Worksheets(1) oWS.Range(oWS.Cells(1,1),oWS.Cells(UBound(arrCells) + 1,9)).Value = arrCells oWS.Columns.AutoFit oWB.Saved = True oOutput.write "Completed" Sub XmlHttpGet(sQuery,sRespText) Dim arrHeader With CreateObject("MSXML2.ServerXMLHTTP") .SetOption 2,13056 ' SXH_SERVER_CERT_IGNORE_ALL_SERVER_ERRORS .Open "GET",sQuery,False For Each arrHeader In arrSetHeaders .SetRequestHeader arrHeader(0),arrHeader(1) Next .Send "" sRespHeaders = .GetAllResponseHeaders sRespText = .ResponseText End With End Sub Sub ParseResponse(sPattern,sResponse,aData) Dim oMatch,aTmp,sSubMatch aData = Array() With CreateObject("VBScript.RegExp") .Global = True .MultiLine = True .Pattern = sPattern For Each oMatch In .Execute(sResponse) If oMatch.SubMatches.Count = 1 Then PushItem aData,oMatch.SubMatches(0) Else aTmp = Array() For Each sSubMatch In oMatch.SubMatches PushItem aTmp,sSubMatch Next PushItem aData,aTmp End If Next End With End Sub Sub PushItem(aList,vItem) ReDim Preserve aList(UBound(aList) + 1) aList(UBound(aList)) = vItem End Sub Sub ParseProjects(sJson,arrProj,iTotalRecords) Dim i,q With CreateObject("htmlfile") With .parentwindow .execscript ";","jscript" .eval ("json = " & sJson & ";") iTotalRecords = CInt(.json.iTotalRecords) Do While .json.aaData.Length ReDim Preserve arrProj(UBound(arrProj) + 1) With .json.aaData.Shift() arrProj(UBound(arrProj)) = Array(.Shift(),.Shift(),.Shift()) End With Loop End With End With End Sub Function RequestLatLng(sRid) Dim sRespText,sTmp XmlHttpGet "https://energy.ehawaii.gov/epd/public/energy-project-details.html?rid=" & sRid,sRespText arrTmp = Split(sRespText,"google.maps.LatLng(") If UBound(arrTmp) >= 1 Then sTmp = arrTmp(1) arrTmp = Split(sTmp,"),") If UBound(arrTmp) >= 1 Then RequestLatLng = Split(arrTmp(0),",") Exit Function End If End If RequestLatLng = Array("#","#") End Function Sub Output(oWnd) Set oWnd = ShowWindow("energy.ehawaii.gov","",354,118) End Sub Function ShowWindow(sTitle,sBG,iWidth,iHeight) Set ShowWindow = CreateWindow() With ShowWindow With .document .title = sTitle .getElementsByTagName("head")(0).appendChild .createElement("style") .styleSheets(0).cssText = "* {font: 8pt tahoma; margin: 5px;}" .body.style.background = "buttonface" .body.style.backgroundRepeat = "no-repeat" .body.style.backgroundImage = "url(" & sBG & ")" .body.innerHTML = "" End With .resizeTo .screen.availWidth,.screen.availHeight .resizeTo iWidth + .screen.availWidth - .document.body.offsetWidth,iHeight + .screen.availHeight - .document.body.offsetHeight .moveTo CInt((.screen.availWidth - iWidth) / 2),CInt((.screen.availHeight - iHeight) / 2) .execScript "var handlers,thunks = {body_onunload: function() {handlers.WSHQuit()}};" Execute "Class clsHandlers: Public Sub WSHQuit(): WScript.Quit: End Sub: End Class" Set .handlers = New clsHandlers Set .document.body.onunload = .thunks.body_onunload .execScript "var write = function(t) {document.body.innerHTML = t};" End With End Function Function CreateWindow() ' source http://forum.script-coding.com/viewtopic.PHP?pid=75356#p75356 Dim sSignature,oShellWnd,oProc On Error Resume Next sSignature = Left(CreateObject("Scriptlet.TypeLib").Guid,38) Do Set oProc = CreateObject("WScript.Shell").Exec("mshta ""about:<head><script>moveTo(-32000,-32000);document.title=' '</script><hta:application id=app border=thick minimizebutton=no maximizebutton=no scroll=no showintaskbar=yes contextmenu=no selection=no innerborder=no icon=""""/><object id='shell' classid='clsid:8856F961-340A-11D0-A96B-00C04FD705A2'><param name=RegisterAsBrowser value=1></object><script>shell.putproperty('" & sSignature & "',document.parentWindow);</script></head>""") Do If oProc.Status > 0 Then Exit Do For Each oShellWnd In CreateObject("Shell.Application").Windows Set CreateWindow = oShellWnd.GetProperty(sSignature) If Err.Number = 0 Then Exit Function Err.Clear Next Loop Loop End Function