Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -3,10 +3,13 @@ from PyPDF2 import PdfMerger
|
|
| 3 |
from pdf2image import convert_from_path
|
| 4 |
from google.cloud import vision
|
| 5 |
from google.oauth2 import service_account
|
| 6 |
-
import
|
|
|
|
|
|
|
| 7 |
import os
|
| 8 |
import tempfile
|
| 9 |
import json
|
|
|
|
| 10 |
|
| 11 |
# Load Google Cloud Vision credentials from secret
|
| 12 |
credentials_json = os.getenv("GOOGLE_CREDENTIALS_JSON")
|
|
@@ -50,33 +53,70 @@ def process_pdf(file):
|
|
| 50 |
|
| 51 |
return text
|
| 52 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
# Function to merge PDFs
|
| 54 |
-
def merge_pdfs(
|
| 55 |
"""Merges multiple PDFs into one."""
|
| 56 |
merger = PdfMerger()
|
| 57 |
-
for
|
| 58 |
-
|
| 59 |
-
merger.append(
|
| 60 |
output_filename = "combined_document.pdf"
|
| 61 |
with open(output_filename, "wb") as output_file:
|
| 62 |
merger.write(output_file)
|
| 63 |
return output_filename
|
| 64 |
|
| 65 |
-
#
|
| 66 |
st.sidebar.title("Tool Selector")
|
| 67 |
selection = st.sidebar.radio("Choose a tool:", ["PDF Combiner", "PDF Transcriber"])
|
| 68 |
|
| 69 |
-
# PDF Combiner
|
| 70 |
if selection == "PDF Combiner":
|
| 71 |
-
st.title("PDF Combiner
|
| 72 |
-
st.write("Upload
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
|
| 81 |
# PDF Transcriber Tool
|
| 82 |
elif selection == "PDF Transcriber":
|
|
|
|
| 3 |
from pdf2image import convert_from_path
|
| 4 |
from google.cloud import vision
|
| 5 |
from google.oauth2 import service_account
|
| 6 |
+
import fitz # PyMuPDF
|
| 7 |
+
from PIL import Image
|
| 8 |
+
from io import BytesIO
|
| 9 |
import os
|
| 10 |
import tempfile
|
| 11 |
import json
|
| 12 |
+
from streamlit_sortables import sort_items
|
| 13 |
|
| 14 |
# Load Google Cloud Vision credentials from secret
|
| 15 |
credentials_json = os.getenv("GOOGLE_CREDENTIALS_JSON")
|
|
|
|
| 53 |
|
| 54 |
return text
|
| 55 |
|
| 56 |
+
# Function to generate thumbnail from PDF
|
| 57 |
+
def get_pdf_thumbnail(uploaded_file):
|
| 58 |
+
"""Generates a thumbnail image of the first page of a PDF."""
|
| 59 |
+
uploaded_file.seek(0)
|
| 60 |
+
doc = fitz.open(stream=uploaded_file.read(), filetype="pdf")
|
| 61 |
+
page = doc.load_page(0)
|
| 62 |
+
pix = page.get_pixmap()
|
| 63 |
+
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
|
| 64 |
+
img.thumbnail((100, 140))
|
| 65 |
+
return img
|
| 66 |
+
|
| 67 |
# Function to merge PDFs
|
| 68 |
+
def merge_pdfs(reordered_files):
|
| 69 |
"""Merges multiple PDFs into one."""
|
| 70 |
merger = PdfMerger()
|
| 71 |
+
for file in reordered_files:
|
| 72 |
+
file.seek(0)
|
| 73 |
+
merger.append(file)
|
| 74 |
output_filename = "combined_document.pdf"
|
| 75 |
with open(output_filename, "wb") as output_file:
|
| 76 |
merger.write(output_file)
|
| 77 |
return output_filename
|
| 78 |
|
| 79 |
+
# Sidebar Navigation
|
| 80 |
st.sidebar.title("Tool Selector")
|
| 81 |
selection = st.sidebar.radio("Choose a tool:", ["PDF Combiner", "PDF Transcriber"])
|
| 82 |
|
| 83 |
+
# PDF Combiner with Preview and Reordering
|
| 84 |
if selection == "PDF Combiner":
|
| 85 |
+
st.title("PDF Combiner with Preview & Reordering")
|
| 86 |
+
st.write("Upload individual PDF pages, visualize them, reorder, and merge into a single PDF.")
|
| 87 |
+
|
| 88 |
+
uploaded_files = st.file_uploader("Upload PDF pages", type="pdf", accept_multiple_files=True)
|
| 89 |
+
|
| 90 |
+
if uploaded_files:
|
| 91 |
+
# Generate thumbnails and filenames for each uploaded PDF
|
| 92 |
+
thumbnails = []
|
| 93 |
+
filenames = []
|
| 94 |
+
|
| 95 |
+
for file in uploaded_files:
|
| 96 |
+
thumbnails.append(get_pdf_thumbnail(file))
|
| 97 |
+
filenames.append(file.name)
|
| 98 |
+
|
| 99 |
+
# Display thumbnails with filenames for reordering
|
| 100 |
+
st.write("**Drag and drop to reorder the PDFs:**")
|
| 101 |
+
reordered_filenames = sort_items(filenames)
|
| 102 |
+
|
| 103 |
+
# Map the filenames back to the corresponding files
|
| 104 |
+
reordered_files = [uploaded_files[filenames.index(name)] for name in reordered_filenames]
|
| 105 |
+
|
| 106 |
+
# Display the thumbnails in the new order
|
| 107 |
+
st.write("**Preview of selected order:**")
|
| 108 |
+
cols = st.columns(len(reordered_files))
|
| 109 |
+
for idx, file in enumerate(reordered_files):
|
| 110 |
+
with cols[idx]:
|
| 111 |
+
st.image(get_pdf_thumbnail(file), caption=file.name, use_container_width=True)
|
| 112 |
+
|
| 113 |
+
# Merge PDFs in the specified order
|
| 114 |
+
if st.button("Merge PDFs"):
|
| 115 |
+
output_file = merge_pdfs(reordered_files)
|
| 116 |
+
st.success("PDF pages combined successfully!")
|
| 117 |
+
with open(output_file, "rb") as f:
|
| 118 |
+
st.download_button("Download Combined PDF", f, file_name=output_file, mime="application/pdf")
|
| 119 |
+
os.remove(output_file)
|
| 120 |
|
| 121 |
# PDF Transcriber Tool
|
| 122 |
elif selection == "PDF Transcriber":
|