First please excuse the lack of knowledge, im decent with vba but this is my first attempt at scraping data from IE
Ive been working on a macro for the past couple weeks which logs into a protected webapage a pulls a java generated report. ive managed (with limited knowledge) to get everything handled but the last step. I cant give out to much info because it is a secured site from work but i would appreciate any assistance.
My current code navigates to the site and logs in and navigates to the report section
Private Sub butLaunch_Click()
Sheet1.butLaunch.Visible = False
Dim url As String
url = "https://retaile....."
Dim element As Object
' Create InternetExplorer Object
Set IE = CreateObject("InternetExplorer.Application")
'IE.Visible = False
IE.Visible = True
IE.Navigate url
Sheet1.Range("C23").Value = "LOADING..."
' Wait while IE loading...
Do While IE.Busy: DoEvents: Loop
Application.Wait (Now + TimeValue("00:00:02"))
On Error Resume Next
'Input Username and Password
Set element = IE.Document.getElementById("UserName")
element.Value = Sheet1.Range("C4")
Set element = IE.Document.getElementById("Password")
element.Value = Sheet1.Range("C6")
Sheet1.Range("C23").Value = "LOGGING IN..."
' Wait while IE loading...
Do While IE.Busy: DoEvents: Loop
Application.Wait (Now + TimeValue("00:00:02"))
IE.Document.forms("LoginForm").submit
' Wait while IE loading...
Do While IE.Busy: DoEvents: Loop
IE.Navigate ("https://retail....")
' Wait while IE loading...
Do While IE.Busy: DoEvents: Loop
Application.Wait (Now + TimeValue("00:00:02"))
Dim i As Integer
Dim HTMLdoc As HTMLDocument
Dim link As HTMLLinkElement
Dim theFrame As HTMLIFrame
Dim newFrame As HTMLIFrame
Set HTMLdoc = IE.Document
Set theFrame = HTMLdoc.getElementsByName("iframe-reports")(0)
Set HTMLdoc = theFrame.contentWindow.Document
Set link = Nothing
i = 0
While i < HTMLdoc.Links.Length And link Is Nothing
If HTMLdoc.Links(i).innerText = "Reports" Then Set link = HTMLdoc.Links(i)
i = i + 1
Wend
If Not link Is Nothing Then
link.Focus
link.Click
End If
Set theFrame = Nothing
Application.Wait Now + TimeValue("00:00:02")
Dim frame As HTMLWindow2
Dim btnSubmit As HTMLInputElement
Set HTMLdoc = IE.Document
Set frame = HTMLdoc.frames("iframe-reports")
Set HTMLdoc = frame.Document
Set frame = HTMLdoc.frames("PaymentData")
Set HTMLdoc = frame.Document
Set btnSubmit = HTMLdoc.getElementById("btnSubmit")
'We find the number of choices in our option drop down list
Sheet1.Range("C23").Value = "LOADING DATES..."
Dim drp As HTMLFormElement
Set drp = HTMLdoc.getElementById("periodName")
Dim x As Long
x = HTMLdoc.forms.Length
'We get the option values into our worksheet
For x = 0 To 50
ThisWorkbook.Worksheets(2).Cells(x + 1, 1) = drp.Item(x).innerText
Next x
For x = 0 To 50
ThisWorkbook.Worksheets(2).Cells(x + 1, 2) = drp.Item(x).Index
Next x
'Cleanup for butGetInvoice click()
Sheet1.ListPeriodName.ListFillRange = ""
Sheet1.ListPeriodName.Visible = True
Sheet2.Range("C1").Cells.Clear
Sheet1.ListPeriodName.ListFillRange = "ReportDates"
Sheet1.butSelectDate.Visible = True
Sheet1.Range("C23").Value = "PLEASE SELECT INVOICE DATE"
End Sub
The reason for the second sub is to pause for the user to select the report date then finish navigating through the frames
Private Sub butSelectDate_Click()
Dim frame As HTMLWindow2
Dim btnSubmit As HTMLInputElement
Dim HTMLdoc As HTMLDocument
Dim link As HTMLLinkElement
Dim i As Integer
On Error Resume Next
Set HTMLdoc = IE.Document
Set frame = HTMLdoc.frames("iframe-reports")
Set HTMLdoc = frame.Document
Set frame = HTMLdoc.frames("PaymentData")
Set HTMLdoc = frame.Document
i = Sheet2.Range("C2").Value
HTMLdoc.getElementById("periodName").Item(i).Selected = True
HTMLdoc.forms(0).submit
' Wait while IE loading...
Do While IE.Busy: DoEvents: Loop
Application.Wait (Now + TimeValue("00:00:02"))
Sheet2.Range("C3") = HTMLdoc.getElementsByTagName("a")(0).href
HTMLdoc.getElementsByTagName("a")(0).Click
Sheet1.ListPeriodName.ListFillRange = ""
Sheet1.butSelectDate.Visible = False
Sheet1.Range("C23").Value = "DOWNLOADING INVOICE..."
Sheet1.butLaunch.Visible = True
Do While IE.Busy: DoEvents: Loop
Application.Wait (Now + TimeValue("00:00:02"))
Set frame = HTMLdoc.frames("iframe-reports")
Set HTMLdoc = frame.Document
Set frame = HTMLdoc.frames("PaymentData")
Set HTMLdoc = frame.Document
Set frame = HTMLdoc.frames("reportframe")
Set HTMLdoc = frame.Document
'Set frame = HTMLdoc.getElementById("mainframeset")
'Set HTMLdoc = frame.Document
Debug.Print HTMLdoc.all
End Sub
My issue is that the resulting data is displayed on a form(not a table, which there are tons of examples for) in DIV tags. I now there is javascripit that "GET"s the info from a secured server but i never could get the request headers and cookies right to gain autherization using XMLHTTP. So as a last ditch effort im try to scrape the DIV elements for the info. Cant post the full HTML but the hierarchy goes like this once ive clicked the hyperlink generated by the Date selecting dropdown.
IFRAME id=PaymentData
HEAD
FRAMESET NAME="MAINFRAMSET"
FRAME NAME="TOOLBARFRAME"
FRAMESET NAME="REPORTFRAMESET
FRAMESET NAME= "TOCFRAME"
FRAME NAME="REPORTFRAME"
HTML
HEAD
BODY
DIV CLASS="C12"
DIV CLASS="C13" AND SO ON
This generates a 1000+ page report which can be download a handfull of pages at a time which is why im trying to come up with a better method. It gets old downloading 50 pages at a time then combining 30+ workbooks into one usable file.
I hope Ive given enough info for someone to put me on the right path.
Again any advice would be greatly appreicated. And maybe theres even a better mothod to go about doing this?
Bookmarks