Results 1 to 7 of 7

VBA scraping of HTML

Threaded View

  1. #1
    Forum Contributor
    Join Date
    09-21-2016
    Location
    UK
    MS-Off Ver
    2016
    Posts
    131

    VBA scraping of HTML

    Hi all,

    Currently I have got a VBA which scrapes through links and tables that are stored inside sheet called web_links. But if you check sheet web_data the data is a little off the columns not where it should be. My question would be how can I update this VBA so it only downloads data from tables with the following ID's: product-name, price-box?

    Sub check_prices()
     Dim oHtml As HTMLDocument
     Dim oElement As Object
     Dim a, x, i, ii, webX, LastRow As Long
     Dim nowDate, nowTime As Date
     Dim cat, website As String
     Dim weblinks As Variant
     Dim rng, MyRange As Range
     Dim SHweblinks, SHwebdata As Worksheet
     
     'On Error Resume Next
     
     nowDate = Date
     nowTime = Time
     
    Set SHweblinks = ThisWorkbook.Worksheets("web_links")
    Set SHwebdata = ThisWorkbook.Worksheets("web_data")
     
        With Application
        .ScreenUpdating = False
        .Calculation = xlCalculationManual
        .DisplayAlerts = False
        End With
    
    With SHwebdata
    If .AutoFilterMode = True Then .AutoFilterMode = False
    End With
    
     'Needs a Reference.
     'Go to Tools > Reference > Search for Microsoft HTML Object Library > tick the checkbox > OK
     Set oHtml = New HTMLDocument
    
        weblinks = SHweblinks.Cells(1).CurrentRegion.Value
    
    For webX = 2 To UBound(weblinks)
    
     With CreateObject("WINHTTP.WinHTTPRequest.5.1")
        .Open "GET", weblinks(webX, 3), False
        .send
        oHtml.body.innerHTML = .responseText
        Debug.Print
     End With
    
     ReDim a(1 To 100000, 1 To 60)
    ' For Each oElement In oHtml.getElementsByClassName(weblinks(webX, 4))
     For Each oElement In oHtml.getElementsByClassName(weblinks(webX, 4))
        i = i + 1
        x = Split(oElement.outerText, vbCr)
        
        For ii = 1 To UBound(x)
            a(i, 1) = nowDate
            a(i, 2) = nowTime
            a(i, 3) = weblinks(webX, 1)
            a(i, 4) = weblinks(webX, 2)
            a(i, ii + 4) = Trim$(x(ii))
        Next
    
     Next oElement
        
        With SHwebdata
            LastRow = .Cells(.Rows.Count, "A").End(xlUp).Row
            .Cells(LastRow + 1, 1).Resize(i, UBound(a, 2)) = a
            i = 0
        End With
     Next
    
     '' remove line breaks and sterling signs
     Set MyRange = SHwebdata.UsedRange
            MyRange.Replace Chr(10), ""
            MyRange.Replace "£", ""
    
    '' move prices from column H to G
        Set rng = SHwebdata.Range("H:H")
        For Each Cell In rng
            'test if cell is empty
            If Cell.Value <> "" Then
                'write to adjacent cell
                Cell.Offset(0, -1).Value = Cell.Value
                Cell.ClearContents
            End If
        Next
    
    With SHwebdata
        .Columns("A:A").NumberFormat = "dd/mm/yyyy"
        .Columns("B:B").NumberFormat = "hh:mm"
        .Columns("C:F").NumberFormat = "@"
        .Columns("G:G").NumberFormat = "$#,##0.00"
        .Cells.EntireColumn.AutoFit
        .Cells.EntireRow.AutoFit
        '.Range("A:G").RemoveDuplicates Columns:=Array(1, 2, 3, 5), _
        '    Header:=xlYes
    End With
    
        Sheets("pivot").PivotTables("PivotTable1").PivotCache.Refresh
        Sheets("overview").PivotTables("PivotTable1").PivotCache.Refresh
        
        With Application
        .ScreenUpdating = True
        .Calculation = xlCalculationAutomatic
        .DisplayAlerts = True
        End With
    End Sub
    Attached Files Attached Files

Thread Information

Users Browsing this Thread

There are currently 1 users browsing this thread. (0 members and 1 guests)

Similar Threads

  1. [SOLVED] VBA scraping specific value from website HTML
    By Strutland in forum Excel Programming / VBA / Macros
    Replies: 3
    Last Post: 03-25-2017, 10:06 AM
  2. Replies: 2
    Last Post: 01-07-2016, 12:25 PM
  3. VBA to Import HTML Table Locally Stored HTML file to excel
    By ermengard in forum Excel Programming / VBA / Macros
    Replies: 12
    Last Post: 01-09-2015, 10:48 AM
  4. Excel ws into outlook html body with current html signature
    By Cadelanne in forum Excel Programming / VBA / Macros
    Replies: 1
    Last Post: 01-29-2014, 09:03 AM
  5. HTML scraping using VBA
    By MacroLide in forum Excel Programming / VBA / Macros
    Replies: 4
    Last Post: 05-26-2012, 11:12 AM
  6. convert formated excel column to html source text with html tags
    By julia81 in forum Excel Programming / VBA / Macros
    Replies: 0
    Last Post: 12-19-2011, 04:22 PM
  7. How to add HTML header/footer to export as HTML?
    By JGCA in forum Excel Programming / VBA / Macros
    Replies: 1
    Last Post: 05-08-2009, 04:29 PM

Bookmarks

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts

Search Engine Friendly URLs by vBSEO 3.6.0 RC 1