anycoder-99f49d97 / streamlit_app.py
hzaustingg's picture
Upload streamlit_app.py with huggingface_hub
189733d verified
import streamlit as st
import os
import tempfile
import fitz # PyMuPDF
from PIL import Image
import io
import base64
import time
from typing import Optional, List, Tuple
import logging
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Set page config
st.set_page_config(
page_title="PDF Viewer & Manager",
page_icon="πŸ“„",
layout="wide",
initial_sidebar_state="expanded"
)
# Add custom CSS for better styling
st.markdown("""
<style>
.main-header {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
padding: 20px;
border-radius: 10px;
margin-bottom: 20px;
color: white;
}
.stButton>button {
background-color: #667eea;
color: white;
border-radius: 5px;
border: none;
padding: 8px 16px;
transition: all 0.3s ease;
}
.stButton>button:hover {
background-color: #5a67d8;
transform: translateY(-1px);
}
.pdf-page {
background-color: white;
border-radius: 8px;
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
padding: 10px;
margin: 10px 0;
}
.upload-area {
border: 2px dashed #667eea;
border-radius: 10px;
padding: 30px;
text-align: center;
transition: all 0.3s ease;
}
.upload-area:hover {
border-color: #5a67d8;
background-color: #f8f9ff;
}
.stats-card {
background: white;
border-radius: 10px;
padding: 15px;
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
margin: 10px 0;
}
</style>
""", unsafe_allow_html=True)
def get_pdf_thumbnail(pdf_path: str, page_num: int = 0, width: int = 200) -> Optional[Image.Image]:
"""Generate a thumbnail for PDF page"""
try:
doc = fitz.open(pdf_path)
if page_num < len(doc):
page = doc.load_page(page_num)
pix = page.get_pixmap(matrix=fitz.Matrix(width/page.rect.width, width/page.rect.height))
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
doc.close()
return img
doc.close()
except Exception as e:
logger.error(f"Error generating thumbnail: {e}")
return None
def extract_pdf_info(pdf_path: str) -> dict:
"""Extract metadata and basic info from PDF"""
try:
doc = fitz.open(pdf_path)
info = {
"page_count": len(doc),
"metadata": doc.metadata,
"file_size": os.path.getsize(pdf_path) / (1024 * 1024), # MB
"created": doc.metadata.get("creationDate", "Unknown"),
"modified": doc.metadata.get("modDate", "Unknown")
}
doc.close()
return info
except Exception as e:
logger.error(f"Error extracting PDF info: {e}")
return {"error": str(e)}
def display_pdf_page(pdf_path: str, page_num: int, width: int = 800) -> None:
"""Display a single PDF page"""
try:
doc = fitz.open(pdf_path)
if page_num < len(doc):
page = doc.load_page(page_num)
pix = page.get_pixmap(matrix=fitz.Matrix(width/page.rect.width, width/page.rect.height))
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
st.image(img, use_column_width=True, caption=f"Page {page_num + 1} of {len(doc)}")
else:
st.warning(f"Page {page_num + 1} not found. PDF has {len(doc)} pages.")
doc.close()
except Exception as e:
st.error(f"Error displaying PDF page: {e}")
def display_pdf_thumbnails(pdf_path: str, max_thumbnails: int = 5) -> None:
"""Display PDF page thumbnails"""
try:
doc = fitz.open(pdf_path)
cols = st.columns(min(max_thumbnails, len(doc)))
for i, col in enumerate(cols):
if i < len(doc):
thumbnail = get_pdf_thumbnail(pdf_path, i, width=150)
if thumbnail:
with col:
st.image(thumbnail, use_column_width=True, caption=f"Page {i+1}")
if st.button(f"View Page {i+1}", key=f"page_{i}"):
st.session_state.current_page = i
st.rerun()
doc.close()
except Exception as e:
st.error(f"Error displaying thumbnails: {e}")
def main():
# Initialize session state
if 'uploaded_file' not in st.session_state:
st.session_state.uploaded_file = None
if 'current_page' not in st.session_state:
st.session_state.current_page = 0
if 'pdf_info' not in st.session_state:
st.session_state.pdf_info = None
# Header with anycoder link
st.markdown("""
<div class="main-header">
<h1>πŸ“„ PDF Viewer & Manager</h1>
<p>Built with <a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank" style="color: white; text-decoration: underline;">anycoder</a></p>
</div>
""", unsafe_allow_html=True)
# Sidebar
with st.sidebar:
st.header("πŸ“‹ Navigation")
# File upload section
st.subheader("Upload PDF")
uploaded_file = st.file_uploader(
"Choose a PDF file",
type=["pdf"],
help="Upload a PDF file to view and manage"
)
if uploaded_file:
# Save uploaded file temporarily
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
tmp_file.write(uploaded_file.getvalue())
temp_path = tmp_file.name
st.session_state.uploaded_file = temp_path
st.session_state.pdf_info = extract_pdf_info(temp_path)
# Display file info
if st.session_state.pdf_info and "error" not in st.session_state.pdf_info:
info = st.session_state.pdf_info
st.markdown("### πŸ“Š File Information")
st.write(f"**Pages:** {info['page_count']}")
st.write(f"**Size:** {info['file_size']:.2f} MB")
st.write(f"**Created:** {info.get('created', 'N/A')}")
st.write(f"**Modified:** {info.get('modified', 'N/A')}")
# Page navigation
if st.session_state.pdf_info and "error" not in st.session_state.pdf_info:
page_count = st.session_state.pdf_info["page_count"]
col1, col2, col3 = st.columns([1, 2, 1])
with col2:
current_page = st.number_input(
"Page",
min_value=1,
max_value=page_count,
value=st.session_state.current_page + 1,
key="page_input"
)
if current_page != st.session_state.current_page + 1:
st.session_state.current_page = current_page - 1
st.rerun()
# Clear button
if st.button("πŸ—‘οΈ Clear PDF", type="primary"):
if st.session_state.uploaded_file and os.path.exists(st.session_state.uploaded_file):
os.unlink(st.session_state.uploaded_file)
st.session_state.uploaded_file = None
st.session_state.pdf_info = None
st.session_state.current_page = 0
st.rerun()
# Main content area
if st.session_state.uploaded_file and os.path.exists(st.session_state.uploaded_file):
# Display PDF content
st.markdown("### πŸ“„ PDF Content")
# Display current page
st.markdown(f"#### Page {st.session_state.current_page + 1}")
display_pdf_page(st.session_state.uploaded_file, st.session_state.current_page)
# Display thumbnails if multiple pages
if st.session_state.pdf_info and st.session_state.pdf_info["page_count"] > 1:
st.markdown("### πŸ–ΌοΈ Page Thumbnails")
display_pdf_thumbnails(st.session_state.uploaded_file)
# Additional actions
st.markdown("### ⚑ Actions")
col1, col2, col3 = st.columns(3)
with col1:
if st.button("πŸ“₯ Download Original"):
with open(st.session_state.uploaded_file, "rb") as f:
base64_pdf = base64.b64encode(f.read()).decode('utf-8')
href = f'<a href="data:application/pdf;base64,{base64_pdf}" download="document.pdf">Download PDF</a>'
st.markdown(href, unsafe_allow_html=True)
with col2:
if st.button("πŸ“„ Extract Text"):
try:
doc = fitz.open(st.session_state.uploaded_file)
text = ""
for page in doc:
text += page.get_text()
doc.close()
st.text_area("Extracted Text", text, height=200)
except Exception as e:
st.error(f"Error extracting text: {e}")
with col3:
if st.button("πŸ“Š PDF Stats"):
if st.session_state.pdf_info and "error" not in st.session_state.pdf_info:
info = st.session_state.pdf_info
st.json({
"page_count": info["page_count"],
"file_size_mb": info["file_size"],
"metadata": info["metadata"]
})
else:
st.warning("No PDF info available")
else:
# Upload area
st.markdown("### πŸ“€ Upload PDF File")
st.markdown("""
<div class="upload-area">
<h3>Drop your PDF here or click to browse</h3>
<p>Supports PDF files only</p>
</div>
""", unsafe_allow_html=True)
# Features section
st.markdown("### ✨ Features")
features = [
"πŸ“– View PDF pages with high quality rendering",
"πŸ–ΌοΈ Browse through thumbnails of all pages",
"πŸ“₯ Download original PDF file",
"πŸ“„ Extract text content from PDF",
"πŸ“Š View detailed PDF metadata and statistics",
"πŸ”„ Navigate between pages easily"
]
for feature in features:
st.markdown(f"- {feature}")
if __name__ == "__main__":
main()