Spaces:
Build error
Build error
File size: 1,406 Bytes
dca8ede |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
import { readFile } from "fs/promises"
import { existsSync } from "fs"
import { join } from "path"
import customPdfParse from "./pdf-parse-wrapper"
/**
* Extracts text from a PDF file
*/
export async function extractTextFromPdf(pdfPath: string): Promise<string> {
console.log("=== Starting PDF text extraction ===")
try {
console.log("1. Reading PDF file:", pdfPath)
// Verify file exists
console.log("2. Checking if file exists")
if (!existsSync(pdfPath)) {
console.error("PDF file does not exist:", pdfPath)
throw new Error(`PDF file does not exist: ${pdfPath}`)
}
console.log("3. File exists")
console.log("4. Reading file into buffer")
const dataBuffer = await readFile(pdfPath)
console.log("5. Buffer size:", dataBuffer.length)
console.log("6. Parsing PDF content")
const data = await customPdfParse(dataBuffer, pdfPath)
console.log("7. PDF parsed successfully")
if (!data.text || data.text.length === 0) {
console.warn("8. No text extracted from PDF")
return ""
}
console.log("8. Extracted text length:", data.text.length)
return data.text
} catch (error) {
console.error("Error extracting text from PDF:", error)
if (error instanceof Error) {
throw new Error(`Failed to extract text from PDF: ${error.message}`)
}
throw new Error("Failed to extract text from PDF")
}
}
|