Hello gautum123,
Welcome to the Forum!
The attached workbook walks down the URLs in column "A" starting with "A2" and going down until an empty cell is found. The data from the table is out in the same row as the URL.
There is a button in row 1 to call the macro. This uses a faster and more reliable method of retrieving data. Rather than using Internet Explorer, this macro uses the engine for Internet explorer to access the HTML data. There are actually three macros. The main macro is "GetData". This calls two other macros. First it calls "GetWebDocument" whichs creates an HTML DOM object from the web page's source text. The last macro "GetElemText" is called to parse the text from an HTML element recursively. This allows retrieving a full table row of data with a single call. All text elements are returned as a single string. Each "TD" element in the table is followed by a pipe character "|" to allow the string to split into the appropriate columns.
Module1 Macro Code
Global HTMLdoc As Object
Function GetElemText(ByRef Elem As Object, Optional ByRef ElemText As String) As String
' Written: March 18, 2015
' Author: Leith Ross
'
' This is a recursive procedure to extract text from between
' an element's start tag and end tag and everything in between.
If Elem Is Nothing Then ElemText = "~": Exit Function
' Is this element a text value?
If Elem.NodeType = 3 Then
' Separate text elements with a space character.
ElemText = ElemText & Elem.NodeValue & " "
Else
' Keep parsing - Element contains other non text elements.
For Each Elem In Elem.ChildNodes
Select Case UCase(Elem.NodeName)
Case Is = "BR": ElemText = vbLf
Case Is = "TD": If ElemText <> "" Then ElemText = ElemText & "|"
Case Is = "TR": ElemText = ElemText & vbLf
End Select
Call GetElemText(Elem, ElemText)
Next Elem
End If
GetElemText = ElemText
End Function
Function GetWebDocument(ByVal URL As String) As Variant
Dim Text As String
Set HTMLdoc = Nothing
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", URL, True
.Send
While .readyState <> 4: DoEvents: Wend
If .Status <> 200 Then
GetWebDocument = "ERROR: " & .Status & " - " & .StatusResponse
Exit Function
End If
Text = .responseText
End With
Set HTMLdoc = CreateObject("htmlfile")
HTMLdoc.Write Text
HTMLdoc.Close
End Function
Sub GetData()
Dim Data As Variant
Dim n As Long
Dim oDiv As Object
Dim oTable As Object
Dim ret As Variant
Dim Rng As Range
Dim Text As String
Set Rng = Range("A2")
Do While Not IsEmpty(Rng)
ret = GetWebDocument(Rng)
' Check for a web page error.
If Not IsEmpty(ret) Then
Rng.Offset(0, 1).Value = ret
GoTo NextURL
End If
Set oDiv = HTMLdoc.getElementByID("vi-desc-maincntr")
' Locate the Item Specifics Table.
For n = 0 To oDiv.Children.Length - 1
If oDiv.Children(n).NodeType = 1 Then
If oDiv.Children(n).className = "itemAttr" Then
On Error Resume Next
Set oDiv = oDiv.Children(n)
Set oDiv = oDiv.Children(0)
Set oTable = oDiv.Children(2)
On Error GoTo 0
Exit For
End If
End If
Next n
' Check if Table exists.
If oTable Is Nothing Then
Rng.Offset(0, 1).Value = "Item Specifics were not found on this page."
GoTo NextURL
End If
c = 1
' Read the row data and output it to the worksheet.
For n = 0 To oTable.Rows.Length - 1
Text = ""
Text = GetElemText(oTable.Rows(n), Text)
' To avoid an error, check there is text to output.
If Text <> "" Then
Data = Split(Text, "|")
Rng.Offset(0, c).Resize(1, UBound(Data) + 1).Value = Data
c = c + UBound(Data) + 1
End If
Next n
NextURL:
Set Rng = Rng.Offset(1, 0)
Loop
End Sub
Bookmarks