Spaces:

Nav772
/

rag-qa-document

Sleeping

Navneet Sai commited on Feb 14

Commit

376e7ad

1 Parent(s): 9ba2bd3

Switch from PyMuPDF to PyPDF for HF Compatibility

Files changed (2) hide show

app.py CHANGED Viewed

@@ -8,7 +8,7 @@ import tempfile
 from typing import Optional
 import chromadb
-import fitz  # PyMuPDF
 import gradio as gr
 from chromadb.utils import embedding_functions
 from openai import OpenAI
@@ -28,12 +28,11 @@ current_chunks = []
 def extract_text_from_pdf(file_path: str) -> str:
-    """Extract text from PDF using PyMuPDF."""
-    doc = fitz.open(file_path)
     text = ""
-    for page in doc:
-        text += page.get_text()
-    doc.close()
     return text

 from typing import Optional
 import chromadb
+from pypdf import PdfReader  # PyMuPDF
 import gradio as gr
 from chromadb.utils import embedding_functions
 from openai import OpenAI
 def extract_text_from_pdf(file_path: str) -> str:
+    """Extract text from PDF using pypdf."""
+    reader = PdfReader(file_path)
     text = ""
+    for page in reader.pages:
+        text += page.extract_text() or ""
     return text

requirements.txt CHANGED Viewed

@@ -1,5 +1,4 @@
-gradio==4.44.1
-chromadb==0.4.22
-sentence-transformers==2.2.2
-openai==1.12.0
-PyMuPDF==1.23.8

+chromadb
+sentence-transformers
+openai
+pypdf