Spaces:
Sleeping
Sleeping
Upload 2 files
Browse files- app.py +6 -4
- requirements.txt +2 -1
app.py
CHANGED
|
@@ -23,7 +23,8 @@ from langchain.chains.question_answering import load_qa_chain
|
|
| 23 |
from langchain.prompts import PromptTemplate
|
| 24 |
load_dotenv()
|
| 25 |
import datetime
|
| 26 |
-
|
|
|
|
| 27 |
history=[
|
| 28 |
{
|
| 29 |
"parts": [
|
|
@@ -462,9 +463,10 @@ history=[
|
|
| 462 |
def get_pdf_text(pdf_docs):
|
| 463 |
text = ""
|
| 464 |
for pdf in pdf_docs:
|
| 465 |
-
pdf_reader =
|
| 466 |
-
for
|
| 467 |
-
|
|
|
|
| 468 |
return text
|
| 469 |
|
| 470 |
def get_text_chunks(text):
|
|
|
|
| 23 |
from langchain.prompts import PromptTemplate
|
| 24 |
load_dotenv()
|
| 25 |
import datetime
|
| 26 |
+
import fitz # PyMuPDF
|
| 27 |
+
|
| 28 |
history=[
|
| 29 |
{
|
| 30 |
"parts": [
|
|
|
|
| 463 |
def get_pdf_text(pdf_docs):
|
| 464 |
text = ""
|
| 465 |
for pdf in pdf_docs:
|
| 466 |
+
pdf_reader = fitz.open(pdf)
|
| 467 |
+
for page_num in range(pdf_reader.page_count):
|
| 468 |
+
page = pdf_reader[page_num]
|
| 469 |
+
text += page.get_text()
|
| 470 |
return text
|
| 471 |
|
| 472 |
def get_text_chunks(text):
|
requirements.txt
CHANGED
|
@@ -7,4 +7,5 @@ PyPDF2
|
|
| 7 |
chromadb
|
| 8 |
faiss-cpu
|
| 9 |
langchain_google_genai
|
| 10 |
-
langchain-community
|
|
|
|
|
|
| 7 |
chromadb
|
| 8 |
faiss-cpu
|
| 9 |
langchain_google_genai
|
| 10 |
+
langchain-community
|
| 11 |
+
PyMuPDF
|