import streamlit as st import os import tempfile import fitz # PyMuPDF from PIL import Image import io import base64 import time from typing import Optional, List, Tuple import logging # Configure logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # Set page config st.set_page_config( page_title="PDF Viewer & Manager", page_icon="📄", layout="wide", initial_sidebar_state="expanded" ) # Add custom CSS for better styling st.markdown(""" """, unsafe_allow_html=True) def get_pdf_thumbnail(pdf_path: str, page_num: int = 0, width: int = 200) -> Optional[Image.Image]: """Generate a thumbnail for PDF page""" try: doc = fitz.open(pdf_path) if page_num < len(doc): page = doc.load_page(page_num) pix = page.get_pixmap(matrix=fitz.Matrix(width/page.rect.width, width/page.rect.height)) img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) doc.close() return img doc.close() except Exception as e: logger.error(f"Error generating thumbnail: {e}") return None def extract_pdf_info(pdf_path: str) -> dict: """Extract metadata and basic info from PDF""" try: doc = fitz.open(pdf_path) info = { "page_count": len(doc), "metadata": doc.metadata, "file_size": os.path.getsize(pdf_path) / (1024 * 1024), # MB "created": doc.metadata.get("creationDate", "Unknown"), "modified": doc.metadata.get("modDate", "Unknown") } doc.close() return info except Exception as e: logger.error(f"Error extracting PDF info: {e}") return {"error": str(e)} def display_pdf_page(pdf_path: str, page_num: int, width: int = 800) -> None: """Display a single PDF page""" try: doc = fitz.open(pdf_path) if page_num < len(doc): page = doc.load_page(page_num) pix = page.get_pixmap(matrix=fitz.Matrix(width/page.rect.width, width/page.rect.height)) img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) st.image(img, use_column_width=True, caption=f"Page {page_num + 1} of {len(doc)}") else: st.warning(f"Page {page_num + 1} not found. PDF has {len(doc)} pages.") doc.close() except Exception as e: st.error(f"Error displaying PDF page: {e}") def display_pdf_thumbnails(pdf_path: str, max_thumbnails: int = 5) -> None: """Display PDF page thumbnails""" try: doc = fitz.open(pdf_path) cols = st.columns(min(max_thumbnails, len(doc))) for i, col in enumerate(cols): if i < len(doc): thumbnail = get_pdf_thumbnail(pdf_path, i, width=150) if thumbnail: with col: st.image(thumbnail, use_column_width=True, caption=f"Page {i+1}") if st.button(f"View Page {i+1}", key=f"page_{i}"): st.session_state.current_page = i st.rerun() doc.close() except Exception as e: st.error(f"Error displaying thumbnails: {e}") def main(): # Initialize session state if 'uploaded_file' not in st.session_state: st.session_state.uploaded_file = None if 'current_page' not in st.session_state: st.session_state.current_page = 0 if 'pdf_info' not in st.session_state: st.session_state.pdf_info = None # Header with anycoder link st.markdown("""

📄 PDF Viewer & Manager

Built with anycoder

""", unsafe_allow_html=True) # Sidebar with st.sidebar: st.header("📋 Navigation") # File upload section st.subheader("Upload PDF") uploaded_file = st.file_uploader( "Choose a PDF file", type=["pdf"], help="Upload a PDF file to view and manage" ) if uploaded_file: # Save uploaded file temporarily with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file: tmp_file.write(uploaded_file.getvalue()) temp_path = tmp_file.name st.session_state.uploaded_file = temp_path st.session_state.pdf_info = extract_pdf_info(temp_path) # Display file info if st.session_state.pdf_info and "error" not in st.session_state.pdf_info: info = st.session_state.pdf_info st.markdown("### 📊 File Information") st.write(f"**Pages:** {info['page_count']}") st.write(f"**Size:** {info['file_size']:.2f} MB") st.write(f"**Created:** {info.get('created', 'N/A')}") st.write(f"**Modified:** {info.get('modified', 'N/A')}") # Page navigation if st.session_state.pdf_info and "error" not in st.session_state.pdf_info: page_count = st.session_state.pdf_info["page_count"] col1, col2, col3 = st.columns([1, 2, 1]) with col2: current_page = st.number_input( "Page", min_value=1, max_value=page_count, value=st.session_state.current_page + 1, key="page_input" ) if current_page != st.session_state.current_page + 1: st.session_state.current_page = current_page - 1 st.rerun() # Clear button if st.button("🗑️ Clear PDF", type="primary"): if st.session_state.uploaded_file and os.path.exists(st.session_state.uploaded_file): os.unlink(st.session_state.uploaded_file) st.session_state.uploaded_file = None st.session_state.pdf_info = None st.session_state.current_page = 0 st.rerun() # Main content area if st.session_state.uploaded_file and os.path.exists(st.session_state.uploaded_file): # Display PDF content st.markdown("### 📄 PDF Content") # Display current page st.markdown(f"#### Page {st.session_state.current_page + 1}") display_pdf_page(st.session_state.uploaded_file, st.session_state.current_page) # Display thumbnails if multiple pages if st.session_state.pdf_info and st.session_state.pdf_info["page_count"] > 1: st.markdown("### 🖼️ Page Thumbnails") display_pdf_thumbnails(st.session_state.uploaded_file) # Additional actions st.markdown("### ⚡ Actions") col1, col2, col3 = st.columns(3) with col1: if st.button("📥 Download Original"): with open(st.session_state.uploaded_file, "rb") as f: base64_pdf = base64.b64encode(f.read()).decode('utf-8') href = f'Download PDF' st.markdown(href, unsafe_allow_html=True) with col2: if st.button("📄 Extract Text"): try: doc = fitz.open(st.session_state.uploaded_file) text = "" for page in doc: text += page.get_text() doc.close() st.text_area("Extracted Text", text, height=200) except Exception as e: st.error(f"Error extracting text: {e}") with col3: if st.button("📊 PDF Stats"): if st.session_state.pdf_info and "error" not in st.session_state.pdf_info: info = st.session_state.pdf_info st.json({ "page_count": info["page_count"], "file_size_mb": info["file_size"], "metadata": info["metadata"] }) else: st.warning("No PDF info available") else: # Upload area st.markdown("### 📤 Upload PDF File") st.markdown("""

Drop your PDF here or click to browse

Supports PDF files only

""", unsafe_allow_html=True) # Features section st.markdown("### ✨ Features") features = [ "📖 View PDF pages with high quality rendering", "🖼️ Browse through thumbnails of all pages", "📥 Download original PDF file", "📄 Extract text content from PDF", "📊 View detailed PDF metadata and statistics", "🔄 Navigate between pages easily" ] for feature in features: st.markdown(f"- {feature}") if __name__ == "__main__": main()