hzaustingg commited on
Commit
189733d
Β·
verified Β·
1 Parent(s): 379d463

Upload streamlit_app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. streamlit_app.py +286 -0
streamlit_app.py ADDED
@@ -0,0 +1,286 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ import tempfile
4
+ import fitz # PyMuPDF
5
+ from PIL import Image
6
+ import io
7
+ import base64
8
+ import time
9
+ from typing import Optional, List, Tuple
10
+ import logging
11
+
12
+ # Configure logging
13
+ logging.basicConfig(level=logging.INFO)
14
+ logger = logging.getLogger(__name__)
15
+
16
+ # Set page config
17
+ st.set_page_config(
18
+ page_title="PDF Viewer & Manager",
19
+ page_icon="πŸ“„",
20
+ layout="wide",
21
+ initial_sidebar_state="expanded"
22
+ )
23
+
24
+ # Add custom CSS for better styling
25
+ st.markdown("""
26
+ <style>
27
+ .main-header {
28
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
29
+ padding: 20px;
30
+ border-radius: 10px;
31
+ margin-bottom: 20px;
32
+ color: white;
33
+ }
34
+ .stButton>button {
35
+ background-color: #667eea;
36
+ color: white;
37
+ border-radius: 5px;
38
+ border: none;
39
+ padding: 8px 16px;
40
+ transition: all 0.3s ease;
41
+ }
42
+ .stButton>button:hover {
43
+ background-color: #5a67d8;
44
+ transform: translateY(-1px);
45
+ }
46
+ .pdf-page {
47
+ background-color: white;
48
+ border-radius: 8px;
49
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
50
+ padding: 10px;
51
+ margin: 10px 0;
52
+ }
53
+ .upload-area {
54
+ border: 2px dashed #667eea;
55
+ border-radius: 10px;
56
+ padding: 30px;
57
+ text-align: center;
58
+ transition: all 0.3s ease;
59
+ }
60
+ .upload-area:hover {
61
+ border-color: #5a67d8;
62
+ background-color: #f8f9ff;
63
+ }
64
+ .stats-card {
65
+ background: white;
66
+ border-radius: 10px;
67
+ padding: 15px;
68
+ box-shadow: 0 2px 4px rgba(0,0,0,0.1);
69
+ margin: 10px 0;
70
+ }
71
+ </style>
72
+ """, unsafe_allow_html=True)
73
+
74
+ def get_pdf_thumbnail(pdf_path: str, page_num: int = 0, width: int = 200) -> Optional[Image.Image]:
75
+ """Generate a thumbnail for PDF page"""
76
+ try:
77
+ doc = fitz.open(pdf_path)
78
+ if page_num < len(doc):
79
+ page = doc.load_page(page_num)
80
+ pix = page.get_pixmap(matrix=fitz.Matrix(width/page.rect.width, width/page.rect.height))
81
+ img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
82
+ doc.close()
83
+ return img
84
+ doc.close()
85
+ except Exception as e:
86
+ logger.error(f"Error generating thumbnail: {e}")
87
+ return None
88
+
89
+ def extract_pdf_info(pdf_path: str) -> dict:
90
+ """Extract metadata and basic info from PDF"""
91
+ try:
92
+ doc = fitz.open(pdf_path)
93
+ info = {
94
+ "page_count": len(doc),
95
+ "metadata": doc.metadata,
96
+ "file_size": os.path.getsize(pdf_path) / (1024 * 1024), # MB
97
+ "created": doc.metadata.get("creationDate", "Unknown"),
98
+ "modified": doc.metadata.get("modDate", "Unknown")
99
+ }
100
+ doc.close()
101
+ return info
102
+ except Exception as e:
103
+ logger.error(f"Error extracting PDF info: {e}")
104
+ return {"error": str(e)}
105
+
106
+ def display_pdf_page(pdf_path: str, page_num: int, width: int = 800) -> None:
107
+ """Display a single PDF page"""
108
+ try:
109
+ doc = fitz.open(pdf_path)
110
+ if page_num < len(doc):
111
+ page = doc.load_page(page_num)
112
+ pix = page.get_pixmap(matrix=fitz.Matrix(width/page.rect.width, width/page.rect.height))
113
+ img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
114
+
115
+ st.image(img, use_column_width=True, caption=f"Page {page_num + 1} of {len(doc)}")
116
+ else:
117
+ st.warning(f"Page {page_num + 1} not found. PDF has {len(doc)} pages.")
118
+ doc.close()
119
+ except Exception as e:
120
+ st.error(f"Error displaying PDF page: {e}")
121
+
122
+ def display_pdf_thumbnails(pdf_path: str, max_thumbnails: int = 5) -> None:
123
+ """Display PDF page thumbnails"""
124
+ try:
125
+ doc = fitz.open(pdf_path)
126
+ cols = st.columns(min(max_thumbnails, len(doc)))
127
+
128
+ for i, col in enumerate(cols):
129
+ if i < len(doc):
130
+ thumbnail = get_pdf_thumbnail(pdf_path, i, width=150)
131
+ if thumbnail:
132
+ with col:
133
+ st.image(thumbnail, use_column_width=True, caption=f"Page {i+1}")
134
+ if st.button(f"View Page {i+1}", key=f"page_{i}"):
135
+ st.session_state.current_page = i
136
+ st.rerun()
137
+ doc.close()
138
+ except Exception as e:
139
+ st.error(f"Error displaying thumbnails: {e}")
140
+
141
+ def main():
142
+ # Initialize session state
143
+ if 'uploaded_file' not in st.session_state:
144
+ st.session_state.uploaded_file = None
145
+ if 'current_page' not in st.session_state:
146
+ st.session_state.current_page = 0
147
+ if 'pdf_info' not in st.session_state:
148
+ st.session_state.pdf_info = None
149
+
150
+ # Header with anycoder link
151
+ st.markdown("""
152
+ <div class="main-header">
153
+ <h1>πŸ“„ PDF Viewer & Manager</h1>
154
+ <p>Built with <a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank" style="color: white; text-decoration: underline;">anycoder</a></p>
155
+ </div>
156
+ """, unsafe_allow_html=True)
157
+
158
+ # Sidebar
159
+ with st.sidebar:
160
+ st.header("πŸ“‹ Navigation")
161
+
162
+ # File upload section
163
+ st.subheader("Upload PDF")
164
+ uploaded_file = st.file_uploader(
165
+ "Choose a PDF file",
166
+ type=["pdf"],
167
+ help="Upload a PDF file to view and manage"
168
+ )
169
+
170
+ if uploaded_file:
171
+ # Save uploaded file temporarily
172
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
173
+ tmp_file.write(uploaded_file.getvalue())
174
+ temp_path = tmp_file.name
175
+
176
+ st.session_state.uploaded_file = temp_path
177
+ st.session_state.pdf_info = extract_pdf_info(temp_path)
178
+
179
+ # Display file info
180
+ if st.session_state.pdf_info and "error" not in st.session_state.pdf_info:
181
+ info = st.session_state.pdf_info
182
+ st.markdown("### πŸ“Š File Information")
183
+ st.write(f"**Pages:** {info['page_count']}")
184
+ st.write(f"**Size:** {info['file_size']:.2f} MB")
185
+ st.write(f"**Created:** {info.get('created', 'N/A')}")
186
+ st.write(f"**Modified:** {info.get('modified', 'N/A')}")
187
+
188
+ # Page navigation
189
+ if st.session_state.pdf_info and "error" not in st.session_state.pdf_info:
190
+ page_count = st.session_state.pdf_info["page_count"]
191
+ col1, col2, col3 = st.columns([1, 2, 1])
192
+ with col2:
193
+ current_page = st.number_input(
194
+ "Page",
195
+ min_value=1,
196
+ max_value=page_count,
197
+ value=st.session_state.current_page + 1,
198
+ key="page_input"
199
+ )
200
+ if current_page != st.session_state.current_page + 1:
201
+ st.session_state.current_page = current_page - 1
202
+ st.rerun()
203
+
204
+ # Clear button
205
+ if st.button("πŸ—‘οΈ Clear PDF", type="primary"):
206
+ if st.session_state.uploaded_file and os.path.exists(st.session_state.uploaded_file):
207
+ os.unlink(st.session_state.uploaded_file)
208
+ st.session_state.uploaded_file = None
209
+ st.session_state.pdf_info = None
210
+ st.session_state.current_page = 0
211
+ st.rerun()
212
+
213
+ # Main content area
214
+ if st.session_state.uploaded_file and os.path.exists(st.session_state.uploaded_file):
215
+ # Display PDF content
216
+ st.markdown("### πŸ“„ PDF Content")
217
+
218
+ # Display current page
219
+ st.markdown(f"#### Page {st.session_state.current_page + 1}")
220
+ display_pdf_page(st.session_state.uploaded_file, st.session_state.current_page)
221
+
222
+ # Display thumbnails if multiple pages
223
+ if st.session_state.pdf_info and st.session_state.pdf_info["page_count"] > 1:
224
+ st.markdown("### πŸ–ΌοΈ Page Thumbnails")
225
+ display_pdf_thumbnails(st.session_state.uploaded_file)
226
+
227
+ # Additional actions
228
+ st.markdown("### ⚑ Actions")
229
+ col1, col2, col3 = st.columns(3)
230
+
231
+ with col1:
232
+ if st.button("πŸ“₯ Download Original"):
233
+ with open(st.session_state.uploaded_file, "rb") as f:
234
+ base64_pdf = base64.b64encode(f.read()).decode('utf-8')
235
+ href = f'<a href="data:application/pdf;base64,{base64_pdf}" download="document.pdf">Download PDF</a>'
236
+ st.markdown(href, unsafe_allow_html=True)
237
+
238
+ with col2:
239
+ if st.button("πŸ“„ Extract Text"):
240
+ try:
241
+ doc = fitz.open(st.session_state.uploaded_file)
242
+ text = ""
243
+ for page in doc:
244
+ text += page.get_text()
245
+ doc.close()
246
+ st.text_area("Extracted Text", text, height=200)
247
+ except Exception as e:
248
+ st.error(f"Error extracting text: {e}")
249
+
250
+ with col3:
251
+ if st.button("πŸ“Š PDF Stats"):
252
+ if st.session_state.pdf_info and "error" not in st.session_state.pdf_info:
253
+ info = st.session_state.pdf_info
254
+ st.json({
255
+ "page_count": info["page_count"],
256
+ "file_size_mb": info["file_size"],
257
+ "metadata": info["metadata"]
258
+ })
259
+ else:
260
+ st.warning("No PDF info available")
261
+ else:
262
+ # Upload area
263
+ st.markdown("### πŸ“€ Upload PDF File")
264
+ st.markdown("""
265
+ <div class="upload-area">
266
+ <h3>Drop your PDF here or click to browse</h3>
267
+ <p>Supports PDF files only</p>
268
+ </div>
269
+ """, unsafe_allow_html=True)
270
+
271
+ # Features section
272
+ st.markdown("### ✨ Features")
273
+ features = [
274
+ "πŸ“– View PDF pages with high quality rendering",
275
+ "πŸ–ΌοΈ Browse through thumbnails of all pages",
276
+ "πŸ“₯ Download original PDF file",
277
+ "πŸ“„ Extract text content from PDF",
278
+ "πŸ“Š View detailed PDF metadata and statistics",
279
+ "πŸ”„ Navigate between pages easily"
280
+ ]
281
+
282
+ for feature in features:
283
+ st.markdown(f"- {feature}")
284
+
285
+ if __name__ == "__main__":
286
+ main()