Spaces:
Build error
Build error
File size: 1,318 Bytes
dca8ede |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 |
import { readFile } from 'fs/promises';
import { existsSync } from 'fs';
import pdfParseFix from './pdf-parse-fix.js';
/**
* Extract text from a PDF file
*/
export async function extractTextFromPdfFile(pdfPath: string): Promise<string> {
console.log("=== Starting PDF text extraction ===");
try {
console.log("PDF path:", pdfPath);
if (!existsSync(pdfPath)) {
console.error("PDF file does not exist:", pdfPath);
throw new Error(`PDF file does not exist: ${pdfPath}`);
}
console.log("Reading file...");
const dataBuffer = await readFile(pdfPath);
console.log(`Read ${dataBuffer.length} bytes`);
console.log("Parsing PDF...");
const data = await pdfParseFix(dataBuffer);
console.log("PDF parsed successfully");
if (!data.text || data.text.length === 0) {
console.warn("No text extracted from PDF");
return "";
}
console.log(`Extracted ${data.text.length} characters of text`);
return data.text;
} catch (error) {
console.error("Error extracting text from PDF file:", error);
if (error instanceof Error) {
throw new Error(`Failed to extract text from PDF file: ${error.message}`);
}
throw new Error("Failed to extract text from PDF file");
}
} |