errantanomie commited on
Commit
85e5184
·
verified ·
1 Parent(s): ceec6a5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -16
app.py CHANGED
@@ -3,10 +3,13 @@ from PyPDF2 import PdfMerger
3
  from pdf2image import convert_from_path
4
  from google.cloud import vision
5
  from google.oauth2 import service_account
6
- import io
 
 
7
  import os
8
  import tempfile
9
  import json
 
10
 
11
  # Load Google Cloud Vision credentials from secret
12
  credentials_json = os.getenv("GOOGLE_CREDENTIALS_JSON")
@@ -50,33 +53,70 @@ def process_pdf(file):
50
 
51
  return text
52
 
 
 
 
 
 
 
 
 
 
 
 
53
  # Function to merge PDFs
54
- def merge_pdfs(uploaded_files):
55
  """Merges multiple PDFs into one."""
56
  merger = PdfMerger()
57
- for uploaded_file in uploaded_files:
58
- uploaded_file.seek(0)
59
- merger.append(uploaded_file)
60
  output_filename = "combined_document.pdf"
61
  with open(output_filename, "wb") as output_file:
62
  merger.write(output_file)
63
  return output_filename
64
 
65
- # Streamlit Sidebar Navigation
66
  st.sidebar.title("Tool Selector")
67
  selection = st.sidebar.radio("Choose a tool:", ["PDF Combiner", "PDF Transcriber"])
68
 
69
- # PDF Combiner Tool
70
  if selection == "PDF Combiner":
71
- st.title("PDF Combiner Tool")
72
- st.write("Upload multiple PDF files to combine them into a single document.")
73
- uploaded_files = st.file_uploader("Upload PDF files", type="pdf", accept_multiple_files=True)
74
- if uploaded_files and st.button("Merge PDFs"):
75
- output_file = merge_pdfs(uploaded_files)
76
- st.success("PDFs combined successfully!")
77
- with open(output_file, "rb") as f:
78
- st.download_button("Download Combined PDF", f, file_name=output_file, mime="application/pdf")
79
- os.remove(output_file)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
 
81
  # PDF Transcriber Tool
82
  elif selection == "PDF Transcriber":
 
3
  from pdf2image import convert_from_path
4
  from google.cloud import vision
5
  from google.oauth2 import service_account
6
+ import fitz # PyMuPDF
7
+ from PIL import Image
8
+ from io import BytesIO
9
  import os
10
  import tempfile
11
  import json
12
+ from streamlit_sortables import sort_items
13
 
14
  # Load Google Cloud Vision credentials from secret
15
  credentials_json = os.getenv("GOOGLE_CREDENTIALS_JSON")
 
53
 
54
  return text
55
 
56
+ # Function to generate thumbnail from PDF
57
+ def get_pdf_thumbnail(uploaded_file):
58
+ """Generates a thumbnail image of the first page of a PDF."""
59
+ uploaded_file.seek(0)
60
+ doc = fitz.open(stream=uploaded_file.read(), filetype="pdf")
61
+ page = doc.load_page(0)
62
+ pix = page.get_pixmap()
63
+ img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
64
+ img.thumbnail((100, 140))
65
+ return img
66
+
67
  # Function to merge PDFs
68
+ def merge_pdfs(reordered_files):
69
  """Merges multiple PDFs into one."""
70
  merger = PdfMerger()
71
+ for file in reordered_files:
72
+ file.seek(0)
73
+ merger.append(file)
74
  output_filename = "combined_document.pdf"
75
  with open(output_filename, "wb") as output_file:
76
  merger.write(output_file)
77
  return output_filename
78
 
79
+ # Sidebar Navigation
80
  st.sidebar.title("Tool Selector")
81
  selection = st.sidebar.radio("Choose a tool:", ["PDF Combiner", "PDF Transcriber"])
82
 
83
+ # PDF Combiner with Preview and Reordering
84
  if selection == "PDF Combiner":
85
+ st.title("PDF Combiner with Preview & Reordering")
86
+ st.write("Upload individual PDF pages, visualize them, reorder, and merge into a single PDF.")
87
+
88
+ uploaded_files = st.file_uploader("Upload PDF pages", type="pdf", accept_multiple_files=True)
89
+
90
+ if uploaded_files:
91
+ # Generate thumbnails and filenames for each uploaded PDF
92
+ thumbnails = []
93
+ filenames = []
94
+
95
+ for file in uploaded_files:
96
+ thumbnails.append(get_pdf_thumbnail(file))
97
+ filenames.append(file.name)
98
+
99
+ # Display thumbnails with filenames for reordering
100
+ st.write("**Drag and drop to reorder the PDFs:**")
101
+ reordered_filenames = sort_items(filenames)
102
+
103
+ # Map the filenames back to the corresponding files
104
+ reordered_files = [uploaded_files[filenames.index(name)] for name in reordered_filenames]
105
+
106
+ # Display the thumbnails in the new order
107
+ st.write("**Preview of selected order:**")
108
+ cols = st.columns(len(reordered_files))
109
+ for idx, file in enumerate(reordered_files):
110
+ with cols[idx]:
111
+ st.image(get_pdf_thumbnail(file), caption=file.name, use_container_width=True)
112
+
113
+ # Merge PDFs in the specified order
114
+ if st.button("Merge PDFs"):
115
+ output_file = merge_pdfs(reordered_files)
116
+ st.success("PDF pages combined successfully!")
117
+ with open(output_file, "rb") as f:
118
+ st.download_button("Download Combined PDF", f, file_name=output_file, mime="application/pdf")
119
+ os.remove(output_file)
120
 
121
  # PDF Transcriber Tool
122
  elif selection == "PDF Transcriber":