Hi everyone. I think this is a bit outside the normal realm of this forum, but I'm hoping you can help. I've got a macro that reads through one sheet, and puts some data in another sheet, based on a few criteria. The macro is working, but it's very slow - it took about 7 minutes to get through 5,000 lines, and my full source data is about 300,000 lines. Ideally, I'd love to make this process much faster.
I've attached a sample file with the macros included. The "Process File" will do the following:- Clear all data on the "EIB" sheet
- Sort the data on the "ImportedData" sheet
- Remove the HTML tags from Columns B and C of the "ImportedData" sheet
- If there's no data in Column B, use the first 80 characters from Column C
- Reformat the dates from "YYYY-MM-DD HH:MM:SS UTC" to "YYYY-MM-DD" if it's a valid date (there are some data entry errors in the original source, for example a 3 or 5 digit year)
- If the "Status" column is NOT "Inactive", or the status is "Completed" with a date after 10/1/2020, copy some of the columns to the "EIB" sheet
I also manually processed the first 5 rows of the "ImportedData" sheet onto a new sheet called "Manual - 5 rows", so you can see what the output should look like.
Sub ProcessFile()
'This will sort, strip the HTML codes, and copy the data to the "EIB" sheet
'We will also be removing all "inactive" commitments, and any commitments completed before FY20
Dim lastRowData As Long
Dim lastRowEIB As Long
Dim dataRow As Long
Dim SpreadsheetKey As Long
Dim rowID As Long
Dim worker As String
Dim prevWorker As String
Dim comName As String
Dim comDesc As String
Dim dueDate As Date
Dim comStat As String
Dim compDate As Date
Dim wsData As Worksheet
Dim wbData As Workbook
Dim wsEIB As Worksheet
Dim toBeCopied As Boolean
Dim compDateLen As Integer
Dim dueDateLen As Integer
Dim chkDate As String
Dim chkDate2 As String
Set wbData = ActiveWorkbook
Set wsData = ActiveWorkbook.Sheets("ImportedData")
Set wsEIB = ActiveWorkbook.Sheets("EIB")
lastRowEIB = 0
lastRowData = 0
SpreadsheetKey = 0
rowID = 0
worker = ""
prevWorker = ""
comName = ""
comDesc = ""
comStat = ""
compDate = 0
dueDate = 0
toBeCopied = False
'message to user
MsgBox "Processing has started. Please be patient, this may take a few minutes, depending on the amount of data.", vbOKOnly, "Procesing Started"
'Write down when we started processing
wbData.Sheets("Main").Range("I4").Value = Now()
'On Error GoTo EH:
'First, disable Screen updates for Faster Processing
Application.ScreenUpdating = False
Application.Cursor = xlWait
Application.Calculation = xlCalculationManual
'clear the EIB Sheet
lastRowEIB = wsEIB.Cells(wsEIB.Rows.Count, "A").End(xlUp).Row
wsEIB.Range("A1:A" & lastRowEIB).EntireRow.Delete
'identify the last row of the Imported Data
lastRowData = wsData.Cells(wsData.Rows.Count, "A").End(xlUp).Row
'Add headers to EIB sheet
wsEIB.Range("A1").Value = "Spreadsheet Key"
wsEIB.Range("B1").Value = "Worker"
wsEIB.Range("C1").Value = "Row ID"
wsEIB.Range("D1").Value = "Name"
wsEIB.Range("E1").Value = "Description"
wsEIB.Range("F1").Value = "Organization Goal"
wsEIB.Range("G1").Value = "Due Date"
wsEIB.Range("H1").Value = "Status"
wsEIB.Range("I1").Value = "Completion Date"
'Sort the data on the Import sheet
With wsData.Sort
.SortFields.Add Key:=Range("A1"), Order:=xlAscending
.SetRange Range("A1:K" & lastRowData)
.Header = xlYes
.Apply
End With
SpreadsheetKey = 1
lastRowEIB = 2
'Loop through the Imported Data and copy it to the EIB
For dataRow = 2 To lastRowData
worker = ""
prevWorker = ""
comName = ""
comDesc = ""
comStat = ""
compDate = 0
compDateLen = 0
dueDate = 0
dueDateLen = 0
toBeCopied = True
'read data into variables
'pernr and previous row's pernr
worker = wsData.Range("A" & dataRow).Value
worker = Format(worker, "00000000")
'prevWorker = wsEIB.Range("B" & lastRowEIB - 1).Value
'commitment title
comName = HtmlToText(wsData.Range("B" & dataRow).Value)
'comName = wsData.Range("B" & dataRow).Value
'commitment description
comDesc = HtmlToText(wsData.Range("C" & dataRow).Value)
'comDesc = wsData.Range("C" & dataRow).Value
'commitment Status
comStat = Trim(wsData.Range("D" & dataRow).Value)
'commitment completion date
compDateLen = Len(wsData.Range("G" & dataRow).Value)
If compDateLen > 0 Then chkDate = DateValue(Left(wsData.Range("G" & dataRow).Value, 10))
If IsDate(chkDate) Then compDate = chkDate Else compDate = 0
'commitment due date
dueDateLen = Len(wsData.Range("F" & dataRow).Value)
If dueDateLen > 0 Then chkDate2 = DateValue(Left(wsData.Range("F" & dataRow).Value, 10))
If IsDate(chkDate2) Then dueDate = chkDate2 Else dueDate = 0
If dataRow Mod 100 = 0 Then
Application.StatusBar = "Processing record # " & dataRow & " of " & lastRowData
DoEvents
End If
'Then check the commitment status - if it is inactive, or if it is completed before FY21,
'we will not be bringing it into WD. FY21 started on 10/1/2020
If comStat = "Inactive" Then toBeCopied = False
'If comStat = "Completed" And compDate < DateValue("October 1, 2020") Then toBeCopied = False
If comStat = "Completed" And compDate < "10/1/2020" Then toBeCopied = False
'copy data to EIB if "tobecopied" is true
If toBeCopied = True Then
'populate spreadsheet key value into column A
wsEIB.Range("A" & lastRowEIB).Value = SpreadsheetKey
'populate PERNR into Column B
wsEIB.Range("B" & lastRowEIB).NumberFormat = "@"
wsEIB.Range("B" & lastRowEIB).Value = worker
'RowID column C
'check to see if the current pernr matches the previous row's pernr, if so, increment by one, otherwise start over
If prevWorker <> worker Then rowID = 1 Else rowID = rowID + 1
wsEIB.Range("C" & lastRowEIB).Value = rowID
'Commitment Title EIB column D / Data Column B
'check to see if length is zero. If so, use the first 50 characters of the description
If Len(comName) < 1 Then comName = Left(comDesc, 80)
'write the Commitment title to the EIB
wsEIB.Range("D" & lastRowEIB).Value = comName
'Commitment EIB column E / Data Column C
wsEIB.Range("E" & lastRowEIB).Value = comDesc
'Org Goal column F - not used
'Due Date EIB column G / Data Column F
If dueDate <> 0 Then wsEIB.Range("G" & lastRowEIB).Value = dueDate
'Status EIB column H / Data ColumnD
If LCase(comStat) = "not started" Then comStat = "NOT_STARTED"
If LCase(comStat) = "completed" Then comStat = "COMPLETED"
If LCase(comStat) = "in progress" Then comStat = "IN_PROGRESS"
wsEIB.Range("H" & lastRowEIB).Value = comStat
'Completion date EIB column G
If compDate <> 0 Then wsEIB.Range("G" & lastRowEIB).Value = compDate
'increment EIB row, spreadsheet Key
lastRowEIB = lastRowEIB + 1
SpreadsheetKey = SpreadsheetKey + 1
prevWorker = worker
End If
Next dataRow
'Re-enable Screen Updating
EH:
Application.ScreenUpdating = True
Application.Cursor = xlDefault
Application.Calculation = xlCalculationAutomatic
Application.StatusBar = False
'Calculate
'write down when we finished processing
wbData.Sheets("Main").Range("L4").Value = Now()
'message user that it is complete
MsgBox "Processing is complete.", vbOKOnly, "Done"
'clear variables from memory
Set wbData = Nothing
Set wsData = Nothing
Set wsEIB = Nothing
End Sub
Bookmarks