如何讀取PDF的內容
如果PDF檔案裡面有文字.圖檔.框線
要如何不用轉換成文字檔,而取得該PDF檔的文字
http://partners.adobe.com/public/developer/acrobat/sdk/index.html
Set myPDF = CreateObject("acroexch.pddoc")
'once again open the file
openResult = myPDF.Open(filelocation)
For pagenumber = 0 To pageCount - 1
getPDFTextFromPage pagenumber
Next
Set myPDF = Nothing
Sub getPDFTextFromPage(pagenumber )
Set myPDFPage = myPDF.AcquirePage(pagenumber)
Set myPageHilite = CreateObject("acroexch.hilitelist")
hiliteResult = myPageHilite.Add(0, 9000)
Set pageSelect = myPDFPage.CreatePageHilite(myPageHilite)
Dim i As Integer
For i = 0 To pageSelect.GetNumText - 1
pdfData = pdfData & pageSelect.GetText(i)
Next
'clean up
Set myPDFPage = Nothing
Set myPageHilite = Nothing
Set pageSelect = Nothing
getPDFTextFromPage = getPDFTextFromPage=pdfData
End Sub
留言列表