Почему мой код VBA не извлекает информацию из HTMLDoc сайта?
Я не уверен, почему мой код не работает (возвращая названия компаний, номера телефонов и контактные номера из HTMLDoc веб-сайта, с которого я пытаюсь получить информацию. Можете ли вы помочь определить, что я делаю неправильно (скорее всего, с типы данных IHTMLElement и IHTMLElementCollection и / или доступ к HTML через getElementsByTagName, getElementsByClassName и т. д.). Спасибо!
Option Explicit
Sub FinalMantaSub()
Dim IE As New SHDocVw.InternetExplorer
Dim HTMLDoc As MSHTML.HTMLDocument
IE.Visible = False
IE.navigate "https://www.manta.com/search?search_source=business&search=general+hospitals&search_location=Dallas+TX&pt=32.7825%2C-96.8207"
Do While IE.READYSTATE <> READYSTATE_COMPLETE
DoEvents
Loop
Set HTMLDoc = IE.document
Range("A3").Value = "Name"
Range("B3").Value = "Address"
Range("C3").Value = "Phone"
'variables to output on excel sheet
Dim BusinessNameFinal As String
Dim BusinessAddressFinal As String
Dim BusinessPhoneFinal As String
'variables used to create final BusinessAddress variable
Dim streetAddress As IHTMLElement
Dim addressLocality As IHTMLElement
Dim addressRegion As IHTMLElement
Dim postalCode As IHTMLElement
Dim itemprop As String
Dim itemprop2 As String
Dim BusinessNameCollection As IHTMLElementCollection
Dim BusinessName As IHTMLElement
Dim BusinessAddressCollection As IHTMLElementCollection
Dim BusinessAddress As IHTMLElement
Dim BusinessPhoneCollection As IHTMLElementCollection
Dim BusinessPhone As IHTMLElement
Dim RowNumber As Long
'get ready for business name looping
RowNumber = 4
Set BusinessName = HTMLDoc.getElementsByClassName("media-heading text-primary h4")(0).getElementsByTagName("strong").innerText
Set BusinessNameCollection = BusinessName.all
'loop for business names
For Each BusinessName In BusinessNameCollection
Cells(RowNumber, 1).Value = BusinessName
RowNumber = RowNumber + 1
Next BusinessName
'get ready for business address looping
RowNumber = 4
itemprop = HTMLDoc.getElementsByClassName("mvm mhn").getElementsByTagName("span").getAttribute("itemprop")
If itemprop = "streetAddress" Then
Set streetAddress = HTMLDoc.getElementsByClassName("mvm mhn").getElementsByTagName("span").innerText
ElseIf itemprop = "addressLocality" Then
Set addressLocality = HTMLDoc.getElementsByTagName("span").innerText
ElseIf itemprop = "addressRegion" Then
Set addressRegion = HTMLDoc.getElementsByTagName("span").innerText
ElseIf itemprop = "postalCode" Then
Set postalCode = HTMLDoc.getElementsByTagName("span").innerText
End If
Set BusinessAddress = streetAddress & addressLocality & addressRegion & postalCode
Set BusinessAddressCollection = BusinessAddress.all
'loop for business addresses
For Each BusinessAddress In BusinessAddressCollection
BusinessAddress = streetAddress & vbNewLine & addressLocality & ", " & addressRegion & " " & postalCode
Cells(RowNumber, 2).Value = BusinessAddress
RowNumber = RowNumber + 1
Next BusinessAddress
'get ready for business phone looping
RowNumber = 4
itemprop2 = HTMLDoc.getElementsByClassName("hidden-device-xs")(0).getAttribute("itemprop")
If itemprop2 = "telephone" Then
BusinessPhone = HTMLDoc.getElementsByClassName("hidden-device-xs")(0).getElementsByTagName("strong").innerText
End If
Set BusinessPhone = HTMLDoc.getElementsByClassName("hidden-device-xs")(0).getElementsByTagName("strong").innerText
Set BusinessPhoneCollection = BusinessPhone.all
'loop for business phones
For Each BusinessPhone In BusinessPhoneCollection
Cells(RowNumber, 3).Value = BusinessPhone
RowNumber = RowNumber + 1
Next BusinessPhone
Range("A1").Activate
Set HTMLDoc = Nothing
'do some final formatting
Range("A3").CurrentRegion.WrapText = False
Range("A3").CurrentRegion.EntireColumn.AutoFit
Range("A1:C1").EntireColumn.HorizontalAlignment = xlCenter
Range("A1:D1").Merge
Range("A1").Value = "Manta.com Business Contacts"
Range("A1").Font.Bold = True
Application.StatusBar = ""
MsgBox "Done!"
End Sub
1 ответ
Это извлекает информацию. Вы не зациклили все страницы результатов в своем коде или упомянули об этом, поэтому я настроил это, чтобы показать вам, как сделать первую страницу результатов. Дайте мне знать, как это происходит.
Код:
Option Explicit
Public Sub FinalMantaSub() '<== Can't have ad blocker enabled for this site
Dim IE As New SHDocVw.InternetExplorer
Dim HTMLDoc As MSHTML.HTMLDocument
IE.Visible = True
IE.navigate "https://www.manta.com/search?search_source=business&search=general+hospitals&search_location=Dallas+TX&pt=32.7825%2C-96.8207"
Do While IE.readyState <> READYSTATE_COMPLETE
DoEvents
Loop
Set HTMLDoc = IE.document
Dim c As Object, i As Long
Set c = HTMLDoc.querySelectorAll("div.media-body")
Do While Not c(i) Is Nothing
Debug.Print "Result #" & i + 1
Debug.Print vbNewLine
Debug.Print "Name: " & c(i).querySelector("[itemprop=""name""]").innerText
Debug.Print "Address: " & c(i).querySelector("[itemprop=""address""]").innerText
Debug.Print "Phone: " & c(i).querySelector("[itemprop=""telephone""]").innerText
Debug.Print String$(20, Chr$(61))
i = i + 1
Loop
IE.Quit
End Sub
Снимок вывода:
Обновить:
Существует огромное количество результатов, но вы можете получить внешний цикл следующим образом. Вы могли бы затем превратить вышеупомянутое в подпункт, который называется.
Dim arr() As String, pageNo As Long
arr = Split(HTMLDoc.querySelector(".pagination.pagination-md.mll a").href, "&pt")
pageNo = 1
Do While Err.Number = 0
On Error GoTo Errhand:
Dim url As String
url = Split(arr(0), "&")(0) & "&pg=" & pageNo & "&pt" & arr(1)
Debug.Print url
IE.navigate url
Do While IE.readyState <> READYSTATE_COMPLETE
DoEvents
Loop
pageNo = pageNo + 1
Loop
Errhand:
Debug.Print "Stopped after " & pageNo & " pages."