Spaces:
Sleeping
Sleeping
update to fix ModuleNotFoundError: No module named 'PyMuPDF'
Browse files
app.py
CHANGED
|
@@ -2,7 +2,7 @@ import gradio as gr
|
|
| 2 |
import os
|
| 3 |
import uuid
|
| 4 |
import shutil
|
| 5 |
-
import
|
| 6 |
from langchain_community.vectorstores import Chroma
|
| 7 |
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
|
| 8 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
|
@@ -64,7 +64,7 @@ def process_pdf(pdf_file, state):
|
|
| 64 |
os.makedirs(state.vector_store_path)
|
| 65 |
|
| 66 |
# Extract text from the PDF
|
| 67 |
-
doc =
|
| 68 |
text = ""
|
| 69 |
for page in doc:
|
| 70 |
text += page.get_text()
|
|
|
|
| 2 |
import os
|
| 3 |
import uuid
|
| 4 |
import shutil
|
| 5 |
+
import fitz
|
| 6 |
from langchain_community.vectorstores import Chroma
|
| 7 |
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
|
| 8 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
|
|
|
| 64 |
os.makedirs(state.vector_store_path)
|
| 65 |
|
| 66 |
# Extract text from the PDF
|
| 67 |
+
doc = fitz.open(pdf_file.name)
|
| 68 |
text = ""
|
| 69 |
for page in doc:
|
| 70 |
text += page.get_text()
|