dvc890 commited on
Commit
5366622
·
verified ·
1 Parent(s): a7e03d0

Upload 67 files

Browse files
Files changed (1) hide show
  1. utils/documentParser.ts +14 -3
utils/documentParser.ts CHANGED
@@ -2,10 +2,17 @@
2
  // @ts-ignore
3
  import mammoth from 'mammoth';
4
  // @ts-ignore
5
- import * as pdfjsLib from 'pdfjs-dist';
6
 
7
- // Set worker for PDF.js. Using the same version from ESM CDN.
8
- pdfjsLib.GlobalWorkerOptions.workerSrc = 'https://esm.sh/pdfjs-dist@3.11.174/build/pdf.worker.min.js';
 
 
 
 
 
 
 
9
 
10
  export interface ParsedDocument {
11
  fileName: string;
@@ -56,6 +63,10 @@ const parsePdf = async (file: File): Promise<string> => {
56
  reader.onload = async (e) => {
57
  try {
58
  const typedarray = new Uint8Array(e.target?.result as ArrayBuffer);
 
 
 
 
59
  const pdf = await pdfjsLib.getDocument(typedarray).promise;
60
  let fullText = '';
61
 
 
2
  // @ts-ignore
3
  import mammoth from 'mammoth';
4
  // @ts-ignore
5
+ import * as pdfjsProxy from 'pdfjs-dist';
6
 
7
+ // Handle potential default export (ESM vs CommonJS interop issues)
8
+ const pdfjsLib = (pdfjsProxy as any).default || pdfjsProxy;
9
+
10
+ // Set worker for PDF.js safely. Using the same version from ESM CDN.
11
+ if (pdfjsLib && pdfjsLib.GlobalWorkerOptions) {
12
+ pdfjsLib.GlobalWorkerOptions.workerSrc = 'https://esm.sh/pdfjs-dist@3.11.174/build/pdf.worker.min.js';
13
+ } else {
14
+ console.warn("PDF.js GlobalWorkerOptions not found. PDF parsing might fail.");
15
+ }
16
 
17
  export interface ParsedDocument {
18
  fileName: string;
 
63
  reader.onload = async (e) => {
64
  try {
65
  const typedarray = new Uint8Array(e.target?.result as ArrayBuffer);
66
+ // Ensure we call getDocument on the resolved library object
67
+ if (!pdfjsLib || !pdfjsLib.getDocument) {
68
+ throw new Error('PDF.js library not loaded correctly');
69
+ }
70
  const pdf = await pdfjsLib.getDocument(typedarray).promise;
71
  let fullText = '';
72