errantanomie commited on
Commit
652de8b
·
verified ·
1 Parent(s): c0b55ba

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +157 -0
app.py ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Add this to your imports
2
+ from streamlit_sortables import sort_items
3
+ import uuid
4
+
5
+ # Function to get a thumbnail, using your original function
6
+ def get_pdf_thumbnail(uploaded_file, page_num):
7
+ """Generates a thumbnail image of a PDF page."""
8
+ uploaded_file.seek(0)
9
+ doc = fitz.open(stream=uploaded_file.read(), filetype="pdf")
10
+ page = doc.load_page(page_num)
11
+ pix = page.get_pixmap()
12
+ img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
13
+ img.thumbnail((100, 140))
14
+ return img
15
+
16
+ # Sidebar Navigation
17
+ st.sidebar.title("Tool Selector")
18
+ selection = st.sidebar.radio("Choose a tool:", ["PDF Combiner", "PDF Transcriber", "PDF Rotator", "PDF Document Separator"])
19
+
20
+ # PDF Document Separator Tool
21
+ if selection == "PDF Document Separator":
22
+ st.title("PDF Document Separator")
23
+ st.write("Upload a multi-document PDF and separate the documents out.")
24
+
25
+ uploaded_file = st.file_uploader("Upload a PDF", type="pdf")
26
+
27
+ if uploaded_file:
28
+ # Initialize session state variables if not already defined
29
+ if "uploaded_pdf" not in st.session_state:
30
+ st.session_state.uploaded_pdf = None
31
+ if "target_pages" not in st.session_state:
32
+ st.session_state.target_pages = [] # List of dictionaries of "page_index", "rotation", "uuid", "image"
33
+ if "available_pages" not in st.session_state:
34
+ st.session_state.available_pages = []
35
+
36
+ st.session_state.uploaded_pdf = uploaded_file
37
+
38
+ if not st.session_state.available_pages:
39
+ uploaded_file.seek(0)
40
+ doc = fitz.open(stream=uploaded_file.read(), filetype="pdf")
41
+ for page_num in range(len(doc)):
42
+ image = get_pdf_thumbnail(uploaded_file, page_num)
43
+ st.session_state.available_pages.append({"page_index":page_num, "rotation":0, "uuid":str(uuid.uuid4()), "image":image})
44
+
45
+
46
+
47
+ left_col, right_col = st.columns(2)
48
+
49
+
50
+ with left_col:
51
+ st.header("Document Overview")
52
+ st.write("Drag pages to the right to start creating the new document.")
53
+ for page in st.session_state.available_pages:
54
+ st.image(page["image"], caption=f"Page {page['page_index'] + 1}", use_container_width=True)
55
+
56
+ with right_col:
57
+ st.header("Target Document Builder")
58
+ st.write("Reorder the pages, change rotation, and create the final document.")
59
+
60
+ if st.session_state.target_pages:
61
+ # Use sort_items for the right-hand column to manage reordering
62
+ target_page_uuids = [page["uuid"] for page in st.session_state.target_pages]
63
+ reordered_page_uuids = sort_items(target_page_uuids)
64
+ reordered_target_pages = [next(page for page in st.session_state.target_pages if page['uuid'] == uuid) for uuid in reordered_page_uuids]
65
+ st.session_state.target_pages = reordered_target_pages
66
+
67
+ for page in st.session_state.target_pages:
68
+ with st.expander(f"Page {page['page_index'] + 1} Options", expanded=True):
69
+ col1, col2 = st.columns([1,1])
70
+ with col1:
71
+ st.image(page["image"], use_container_width=True)
72
+ with col2:
73
+ rotation = st.selectbox(f"Rotation", [0, 90, 180, 270], key=page["uuid"], index = [0,90,180,270].index(page['rotation']))
74
+ if rotation != page['rotation']:
75
+ page['rotation'] = rotation
76
+
77
+
78
+ # The rest of your original app.py code should go here
79
+ # PDF Combiner with Preview and Reordering
80
+ elif selection == "PDF Combiner":
81
+ st.title("PDF Combiner with Preview & Reordering")
82
+ st.write("Upload individual PDF pages, visualize them, reorder, and merge into a single PDF.")
83
+
84
+ uploaded_files = st.file_uploader("Upload PDF pages", type="pdf", accept_multiple_files=True)
85
+
86
+ if uploaded_files:
87
+ # Generate thumbnails and filenames for each uploaded PDF
88
+ thumbnails = []
89
+ filenames = []
90
+
91
+ for file in uploaded_files:
92
+ thumbnails.append(get_pdf_thumbnail(file, 0))
93
+ filenames.append(file.name)
94
+
95
+ # Display thumbnails with filenames for reordering
96
+ st.write("**Drag and drop to reorder the PDFs:**")
97
+ reordered_filenames = sort_items(filenames)
98
+
99
+ # Map the filenames back to the corresponding files
100
+ reordered_files = [uploaded_files[filenames.index(name)] for name in reordered_filenames]
101
+
102
+ # Display the thumbnails in the new order
103
+ st.write("**Preview of selected order:**")
104
+ cols = st.columns(len(reordered_files))
105
+ for idx, file in enumerate(reordered_files):
106
+ with cols[idx]:
107
+ st.image(get_pdf_thumbnail(file, 0), caption=file.name, use_container_width=True)
108
+
109
+ # Merge PDFs in the specified order
110
+ if st.button("Merge PDFs"):
111
+ output_file = merge_pdfs(reordered_files)
112
+ st.success("PDF pages combined successfully!")
113
+ download_file(output_file, generate_unique_filename("combined_document", "", ".pdf"), "application/pdf")
114
+ os.remove(output_file)
115
+
116
+ # PDF Transcriber Tool
117
+ elif selection == "PDF Transcriber":
118
+ st.title("PDF Transcriber Tool")
119
+ st.write("Upload a scanned PDF to transcribe the text.")
120
+ if not client:
121
+ st.error("Google Cloud credentials are not set. Please configure the secret.")
122
+ uploaded_file = st.file_uploader("Upload a PDF", type=["pdf"])
123
+ if uploaded_file and st.button("Transcribe PDF"):
124
+ with st.spinner("Processing..."):
125
+ output_file = process_pdf(uploaded_file)
126
+ if output_file:
127
+ st.success("Text extraction complete!")
128
+ st.text_area("Extracted Text", open(output_file, encoding="utf-8").read(), height=400)
129
+ output_file_name = generate_unique_filename(uploaded_file.name, "(T)", ".txt")
130
+ with open(output_file, "rb") as f:
131
+ st.download_button("Download Extracted Text", f, file_name=output_file_name, mime="text/plain")
132
+ os.remove(output_file)
133
+ else:
134
+ st.error("Could not process the PDF, please try again")
135
+
136
+ # PDF Rotator Tool
137
+ elif selection == "PDF Rotator":
138
+ st.title("PDF Rotator")
139
+ st.write("Upload a PDF and rotate all pages by a specified angle.")
140
+
141
+ pdf_file = st.file_uploader("Upload PDF", type=["pdf"])
142
+ rotation_angle = st.selectbox("Select rotation angle:", [90, 180, 270])
143
+
144
+ if pdf_file:
145
+ if 'rotated_doc' not in st.session_state:
146
+ st.session_state.rotated_doc = None
147
+ if st.button("Rotate PDF"):
148
+ with st.spinner("Rotating PDF..."):
149
+ st.session_state.rotated_doc = rotate_pdf(pdf_file, rotation_angle)
150
+ st.success("PDF rotated successfully!")
151
+ if st.session_state.rotated_doc:
152
+ display_pdf_preview(st.session_state.rotated_doc)
153
+ if st.button("Download Rotated PDF"):
154
+ with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as temp_pdf:
155
+ st.session_state.rotated_doc.save(temp_pdf.name)
156
+ download_file(temp_pdf.name, generate_unique_filename(pdf_file.name, "(R)", ".pdf"), "application/pdf")
157
+ os.remove(temp_pdf.name)