Spaces:
Sleeping
Sleeping
Upload 67 files
Browse files- utils/documentParser.ts +14 -3
utils/documentParser.ts
CHANGED
|
@@ -2,10 +2,17 @@
|
|
| 2 |
// @ts-ignore
|
| 3 |
import mammoth from 'mammoth';
|
| 4 |
// @ts-ignore
|
| 5 |
-
import * as
|
| 6 |
|
| 7 |
-
//
|
| 8 |
-
pdfjsLib
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
export interface ParsedDocument {
|
| 11 |
fileName: string;
|
|
@@ -56,6 +63,10 @@ const parsePdf = async (file: File): Promise<string> => {
|
|
| 56 |
reader.onload = async (e) => {
|
| 57 |
try {
|
| 58 |
const typedarray = new Uint8Array(e.target?.result as ArrayBuffer);
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
const pdf = await pdfjsLib.getDocument(typedarray).promise;
|
| 60 |
let fullText = '';
|
| 61 |
|
|
|
|
| 2 |
// @ts-ignore
|
| 3 |
import mammoth from 'mammoth';
|
| 4 |
// @ts-ignore
|
| 5 |
+
import * as pdfjsProxy from 'pdfjs-dist';
|
| 6 |
|
| 7 |
+
// Handle potential default export (ESM vs CommonJS interop issues)
|
| 8 |
+
const pdfjsLib = (pdfjsProxy as any).default || pdfjsProxy;
|
| 9 |
+
|
| 10 |
+
// Set worker for PDF.js safely. Using the same version from ESM CDN.
|
| 11 |
+
if (pdfjsLib && pdfjsLib.GlobalWorkerOptions) {
|
| 12 |
+
pdfjsLib.GlobalWorkerOptions.workerSrc = 'https://esm.sh/pdfjs-dist@3.11.174/build/pdf.worker.min.js';
|
| 13 |
+
} else {
|
| 14 |
+
console.warn("PDF.js GlobalWorkerOptions not found. PDF parsing might fail.");
|
| 15 |
+
}
|
| 16 |
|
| 17 |
export interface ParsedDocument {
|
| 18 |
fileName: string;
|
|
|
|
| 63 |
reader.onload = async (e) => {
|
| 64 |
try {
|
| 65 |
const typedarray = new Uint8Array(e.target?.result as ArrayBuffer);
|
| 66 |
+
// Ensure we call getDocument on the resolved library object
|
| 67 |
+
if (!pdfjsLib || !pdfjsLib.getDocument) {
|
| 68 |
+
throw new Error('PDF.js library not loaded correctly');
|
| 69 |
+
}
|
| 70 |
const pdf = await pdfjsLib.getDocument(typedarray).promise;
|
| 71 |
let fullText = '';
|
| 72 |
|