Spaces:
Sleeping
Sleeping
Upload 2 files
Browse files- app.py +22 -4
- requirements.txt +5 -12
app.py
CHANGED
|
@@ -422,6 +422,8 @@ import nltk
|
|
| 422 |
nltk.download('stopwords')
|
| 423 |
from nltk.corpus import stopwords
|
| 424 |
from collections import Counter
|
|
|
|
|
|
|
| 425 |
|
| 426 |
def highlight_pdf(file_path, text_to_highlight, page_numbers):
|
| 427 |
# Create a temporary file to save the modified PDF
|
|
@@ -484,6 +486,19 @@ def highlight_pdf(file_path, text_to_highlight, page_numbers):
|
|
| 484 |
|
| 485 |
# Example usage
|
| 486 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 487 |
# Function to display PDF in Streamlit
|
| 488 |
def display_highlighted_pdf():
|
| 489 |
pdf_path = "Bhagavad-Gita-As-It-Is.pdf"
|
|
@@ -494,12 +509,15 @@ def display_highlighted_pdf():
|
|
| 494 |
|
| 495 |
print(highlighted_pdf_path)
|
| 496 |
|
| 497 |
-
with open(highlighted_pdf_path, "rb") as file:
|
| 498 |
-
|
| 499 |
|
| 500 |
-
|
| 501 |
-
pdf_viewer(pdf_bytes, width=700)
|
|
|
|
| 502 |
|
|
|
|
|
|
|
| 503 |
|
| 504 |
display_highlighted_pdf()
|
| 505 |
|
|
|
|
| 422 |
nltk.download('stopwords')
|
| 423 |
from nltk.corpus import stopwords
|
| 424 |
from collections import Counter
|
| 425 |
+
from streamlit_image_zoom import image_zoom
|
| 426 |
+
from PIL import Image
|
| 427 |
|
| 428 |
def highlight_pdf(file_path, text_to_highlight, page_numbers):
|
| 429 |
# Create a temporary file to save the modified PDF
|
|
|
|
| 486 |
|
| 487 |
# Example usage
|
| 488 |
|
| 489 |
+
def pdf_to_images(pdf_path, page_numbers):
|
| 490 |
+
doc = fitz.open(pdf_path)
|
| 491 |
+
images = []
|
| 492 |
+
for page_number in page_numbers:
|
| 493 |
+
page = doc.load_page(page_number - 1)
|
| 494 |
+
pix = page.get_pixmap()
|
| 495 |
+
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
|
| 496 |
+
buf = io.BytesIO()
|
| 497 |
+
img.save(buf, format="PNG")
|
| 498 |
+
byte_im = buf.getvalue()
|
| 499 |
+
images.append(byte_im)
|
| 500 |
+
return images
|
| 501 |
+
|
| 502 |
# Function to display PDF in Streamlit
|
| 503 |
def display_highlighted_pdf():
|
| 504 |
pdf_path = "Bhagavad-Gita-As-It-Is.pdf"
|
|
|
|
| 509 |
|
| 510 |
print(highlighted_pdf_path)
|
| 511 |
|
| 512 |
+
# with open(highlighted_pdf_path, "rb") as file:
|
| 513 |
+
# pdf_bytes = file.read()
|
| 514 |
|
| 515 |
+
# # Use pdf_viewer to display the PDF in Streamlit
|
| 516 |
+
# pdf_viewer(pdf_bytes, width=700)
|
| 517 |
+
images = pdf_to_images(highlighted_pdf_path, sources)
|
| 518 |
|
| 519 |
+
for img in images:
|
| 520 |
+
image_zoom(img)
|
| 521 |
|
| 522 |
display_highlighted_pdf()
|
| 523 |
|
requirements.txt
CHANGED
|
@@ -1,22 +1,15 @@
|
|
| 1 |
-
|
| 2 |
-
sentence-transformers==2.2.2
|
| 3 |
datasets
|
| 4 |
torch
|
| 5 |
streamlit-chat-media
|
| 6 |
streamlit-chat
|
| 7 |
-
transformers
|
| 8 |
PyPDF2
|
| 9 |
ratelimit
|
| 10 |
backoff
|
| 11 |
tqdm
|
| 12 |
openai
|
| 13 |
PyMuPDF # instead of fitz
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
# faiss-gpu
|
| 18 |
-
faiss-cpu==1.7.2
|
| 19 |
-
stqdm
|
| 20 |
-
python-dotenv
|
| 21 |
-
# langchain-huggingface
|
| 22 |
-
streamlit_pdf_viewer
|
|
|
|
| 1 |
+
sentence-transformers
|
|
|
|
| 2 |
datasets
|
| 3 |
torch
|
| 4 |
streamlit-chat-media
|
| 5 |
streamlit-chat
|
| 6 |
+
transformers
|
| 7 |
PyPDF2
|
| 8 |
ratelimit
|
| 9 |
backoff
|
| 10 |
tqdm
|
| 11 |
openai
|
| 12 |
PyMuPDF # instead of fitz
|
| 13 |
+
reportlab
|
| 14 |
+
PyPDF2Highlight
|
| 15 |
+
streamlit-image-zoom
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|