I posted a variation of this code earlier, which scans a designated PDF for numeric values (listed in Column A of a worksheet) and returns a PDF with only pages that contain that numeric value. However, I am looking to modify this macro to search for Strings instead of just numbers (e.g. dog, cat, etc.). I tried changing some of the settings, but it did not work.
Below is the code that works for numeric values only. Any ideas where to change for alphabetic words? Thanks.
Sub Extract_PDF_Num_Keyword()
'code based on http://vbcity.com/forums/t/51200.aspx
Dim xMsg As String
Dim xInput As String
Dim xOutput As String
Dim xResponse As Long
Dim xLast_Row As Long
Dim xErrors As Long
Dim xDeleted As Long
Dim i As Long
Dim j As Long
Dim AcroApp As CAcroApp
Dim AcroPDDoc As CAcroPDDoc
Dim AcroHiliteList As CAcroHiliteList
Dim AcroTextSelect As CAcroPDTextSelect
Dim xarray() As Variant
Dim PageNumber As Variant
Dim PageContent As Variant
Dim xContent As Variant
xInput = "D:\Test.pdf"
xOutput = "D:\Test_Output.pdf"
xLast_Row = [A1].SpecialCells(xlLastCell).Row
ReDim xarray(xLast_Row)
xResponse = MsgBox("About to delete all pages which contain values from the range A1:A" & xLast_Row & Chr(10) _
& Chr(10) & "Input:" & Chr(9) & xInput _
& Chr(10) & "Output:" & Chr(9) & xOutput _
& Chr(10) & Chr(10) & "('OK' to continue, 'Cancel' to quit.)", vbOKCancel, "Delete Pages")
If xResponse = 2 Then
MsgBox "User chose not to continue. Run terminated."
Exit Sub
End If
' Files and data OK?
If Dir(xInput) = "" Then xMsg = "Input file not found - " & xInput & Chr(10)
If Dir(xOutput) <> "" Then xMsg = "Output file exists - " & xOutput & Chr(10)
xarray = Application.Transpose(Range("A1:A" & xLast_Row))
For i = 1 To xLast_Row
Next
If xMsg <> "" Then
MsgBox (xMsg & Chr(10) & "Run cancelled.")
Exit Sub
End If
' Open the PDF...
Set AcroApp = CreateObject("AcroExch.App")
Set AcroPDDoc = CreateObject("AcroExch.PDDoc")
If AcroPDDoc.Open(xInput) <> True Then
MsgBox (xInput & " couldn't be opened - run cancelled.")
Exit Sub
End If
' Read each page...
For i = AcroPDDoc.GetNumPages - 1 To 0 Step -1
Set PageNumber = AcroPDDoc.AcquirePage(i)
Set PageContent = CreateObject("AcroExch.HiliteList")
'Get up to 9,999 words from page...
If PageContent.Add(0, 9999) <> True Then
Debug.Print "Add Error on Page " & i + 1
xErrors = xErrors + 1
Else
Set AcroTextSelect = PageNumber.CreatePageHilite(PageContent)
If Not AcroTextSelect Is Nothing Then
xContent = ""
For j = 0 To AcroTextSelect.GetNumText - 1
xContent = xContent & AcroTextSelect.GetText(j)
Next j
For j = 1 To xLast_Row
If Not InStr(1, xContent, xarray(j)) > 0 Then
Debug.Print "Page " & i + 1 & " contains " & xarray(j) & " - " & xContent
' To avoid problems with the delete...
Set AcroTextSelect = Nothing
Set PageContent = Nothing
Set PageNumber = Nothing
If AcroPDDoc.DeletePages(i, i) = False Then
MsgBox ("Error deleting page " & i + 1 & " - run cancelled.")
Exit Sub
End If
xDeleted = xDeleted + 1
Exit For
End If
Next
End If
End If
Next i
If AcroPDDoc.Save(PDSaveFull, xOutput) = False Then
MsgBox "Cannot save the modified document"
Exit Sub
Else
MsgBox (xDeleted & " pages deleted. (" & xErrors & " errors.)")
End If
AcroPDDoc.Close
AcroApp.Exit
End Sub
Bookmarks