Navneet Sai commited on
Commit
376e7ad
·
1 Parent(s): 9ba2bd3

Switch from PyMuPDF to PyPDF for HF Compatibility

Browse files
Files changed (2) hide show
  1. app.py +5 -6
  2. requirements.txt +4 -5
app.py CHANGED
@@ -8,7 +8,7 @@ import tempfile
8
  from typing import Optional
9
 
10
  import chromadb
11
- import fitz # PyMuPDF
12
  import gradio as gr
13
  from chromadb.utils import embedding_functions
14
  from openai import OpenAI
@@ -28,12 +28,11 @@ current_chunks = []
28
 
29
 
30
  def extract_text_from_pdf(file_path: str) -> str:
31
- """Extract text from PDF using PyMuPDF."""
32
- doc = fitz.open(file_path)
33
  text = ""
34
- for page in doc:
35
- text += page.get_text()
36
- doc.close()
37
  return text
38
 
39
 
 
8
  from typing import Optional
9
 
10
  import chromadb
11
+ from pypdf import PdfReader # PyMuPDF
12
  import gradio as gr
13
  from chromadb.utils import embedding_functions
14
  from openai import OpenAI
 
28
 
29
 
30
  def extract_text_from_pdf(file_path: str) -> str:
31
+ """Extract text from PDF using pypdf."""
32
+ reader = PdfReader(file_path)
33
  text = ""
34
+ for page in reader.pages:
35
+ text += page.extract_text() or ""
 
36
  return text
37
 
38
 
requirements.txt CHANGED
@@ -1,5 +1,4 @@
1
- gradio==4.44.1
2
- chromadb==0.4.22
3
- sentence-transformers==2.2.2
4
- openai==1.12.0
5
- PyMuPDF==1.23.8
 
1
+ chromadb
2
+ sentence-transformers
3
+ openai
4
+ pypdf