import streamlit as st
import os
import tempfile
import fitz # PyMuPDF
from PIL import Image
import io
import base64
import time
from typing import Optional, List, Tuple
import logging
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Set page config
st.set_page_config(
page_title="PDF Viewer & Manager",
page_icon="📄",
layout="wide",
initial_sidebar_state="expanded"
)
# Add custom CSS for better styling
st.markdown("""
""", unsafe_allow_html=True)
def get_pdf_thumbnail(pdf_path: str, page_num: int = 0, width: int = 200) -> Optional[Image.Image]:
"""Generate a thumbnail for PDF page"""
try:
doc = fitz.open(pdf_path)
if page_num < len(doc):
page = doc.load_page(page_num)
pix = page.get_pixmap(matrix=fitz.Matrix(width/page.rect.width, width/page.rect.height))
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
doc.close()
return img
doc.close()
except Exception as e:
logger.error(f"Error generating thumbnail: {e}")
return None
def extract_pdf_info(pdf_path: str) -> dict:
"""Extract metadata and basic info from PDF"""
try:
doc = fitz.open(pdf_path)
info = {
"page_count": len(doc),
"metadata": doc.metadata,
"file_size": os.path.getsize(pdf_path) / (1024 * 1024), # MB
"created": doc.metadata.get("creationDate", "Unknown"),
"modified": doc.metadata.get("modDate", "Unknown")
}
doc.close()
return info
except Exception as e:
logger.error(f"Error extracting PDF info: {e}")
return {"error": str(e)}
def display_pdf_page(pdf_path: str, page_num: int, width: int = 800) -> None:
"""Display a single PDF page"""
try:
doc = fitz.open(pdf_path)
if page_num < len(doc):
page = doc.load_page(page_num)
pix = page.get_pixmap(matrix=fitz.Matrix(width/page.rect.width, width/page.rect.height))
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
st.image(img, use_column_width=True, caption=f"Page {page_num + 1} of {len(doc)}")
else:
st.warning(f"Page {page_num + 1} not found. PDF has {len(doc)} pages.")
doc.close()
except Exception as e:
st.error(f"Error displaying PDF page: {e}")
def display_pdf_thumbnails(pdf_path: str, max_thumbnails: int = 5) -> None:
"""Display PDF page thumbnails"""
try:
doc = fitz.open(pdf_path)
cols = st.columns(min(max_thumbnails, len(doc)))
for i, col in enumerate(cols):
if i < len(doc):
thumbnail = get_pdf_thumbnail(pdf_path, i, width=150)
if thumbnail:
with col:
st.image(thumbnail, use_column_width=True, caption=f"Page {i+1}")
if st.button(f"View Page {i+1}", key=f"page_{i}"):
st.session_state.current_page = i
st.rerun()
doc.close()
except Exception as e:
st.error(f"Error displaying thumbnails: {e}")
def main():
# Initialize session state
if 'uploaded_file' not in st.session_state:
st.session_state.uploaded_file = None
if 'current_page' not in st.session_state:
st.session_state.current_page = 0
if 'pdf_info' not in st.session_state:
st.session_state.pdf_info = None
# Header with anycoder link
st.markdown("""
📄 PDF Viewer & Manager
Built with anycoder
""", unsafe_allow_html=True)
# Sidebar
with st.sidebar:
st.header("📋 Navigation")
# File upload section
st.subheader("Upload PDF")
uploaded_file = st.file_uploader(
"Choose a PDF file",
type=["pdf"],
help="Upload a PDF file to view and manage"
)
if uploaded_file:
# Save uploaded file temporarily
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
tmp_file.write(uploaded_file.getvalue())
temp_path = tmp_file.name
st.session_state.uploaded_file = temp_path
st.session_state.pdf_info = extract_pdf_info(temp_path)
# Display file info
if st.session_state.pdf_info and "error" not in st.session_state.pdf_info:
info = st.session_state.pdf_info
st.markdown("### 📊 File Information")
st.write(f"**Pages:** {info['page_count']}")
st.write(f"**Size:** {info['file_size']:.2f} MB")
st.write(f"**Created:** {info.get('created', 'N/A')}")
st.write(f"**Modified:** {info.get('modified', 'N/A')}")
# Page navigation
if st.session_state.pdf_info and "error" not in st.session_state.pdf_info:
page_count = st.session_state.pdf_info["page_count"]
col1, col2, col3 = st.columns([1, 2, 1])
with col2:
current_page = st.number_input(
"Page",
min_value=1,
max_value=page_count,
value=st.session_state.current_page + 1,
key="page_input"
)
if current_page != st.session_state.current_page + 1:
st.session_state.current_page = current_page - 1
st.rerun()
# Clear button
if st.button("🗑️ Clear PDF", type="primary"):
if st.session_state.uploaded_file and os.path.exists(st.session_state.uploaded_file):
os.unlink(st.session_state.uploaded_file)
st.session_state.uploaded_file = None
st.session_state.pdf_info = None
st.session_state.current_page = 0
st.rerun()
# Main content area
if st.session_state.uploaded_file and os.path.exists(st.session_state.uploaded_file):
# Display PDF content
st.markdown("### 📄 PDF Content")
# Display current page
st.markdown(f"#### Page {st.session_state.current_page + 1}")
display_pdf_page(st.session_state.uploaded_file, st.session_state.current_page)
# Display thumbnails if multiple pages
if st.session_state.pdf_info and st.session_state.pdf_info["page_count"] > 1:
st.markdown("### 🖼️ Page Thumbnails")
display_pdf_thumbnails(st.session_state.uploaded_file)
# Additional actions
st.markdown("### ⚡ Actions")
col1, col2, col3 = st.columns(3)
with col1:
if st.button("📥 Download Original"):
with open(st.session_state.uploaded_file, "rb") as f:
base64_pdf = base64.b64encode(f.read()).decode('utf-8')
href = f'Download PDF'
st.markdown(href, unsafe_allow_html=True)
with col2:
if st.button("📄 Extract Text"):
try:
doc = fitz.open(st.session_state.uploaded_file)
text = ""
for page in doc:
text += page.get_text()
doc.close()
st.text_area("Extracted Text", text, height=200)
except Exception as e:
st.error(f"Error extracting text: {e}")
with col3:
if st.button("📊 PDF Stats"):
if st.session_state.pdf_info and "error" not in st.session_state.pdf_info:
info = st.session_state.pdf_info
st.json({
"page_count": info["page_count"],
"file_size_mb": info["file_size"],
"metadata": info["metadata"]
})
else:
st.warning("No PDF info available")
else:
# Upload area
st.markdown("### 📤 Upload PDF File")
st.markdown("""
Drop your PDF here or click to browse
Supports PDF files only
""", unsafe_allow_html=True)
# Features section
st.markdown("### ✨ Features")
features = [
"📖 View PDF pages with high quality rendering",
"🖼️ Browse through thumbnails of all pages",
"📥 Download original PDF file",
"📄 Extract text content from PDF",
"📊 View detailed PDF metadata and statistics",
"🔄 Navigate between pages easily"
]
for feature in features:
st.markdown(f"- {feature}")
if __name__ == "__main__":
main()