Spaces:
Running
Running
| import streamlit as st | |
| import os | |
| import tempfile | |
| import fitz # PyMuPDF | |
| from PIL import Image | |
| import io | |
| import base64 | |
| import time | |
| from typing import Optional, List, Tuple | |
| import logging | |
| # Configure logging | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| # Set page config | |
| st.set_page_config( | |
| page_title="PDF Viewer & Manager", | |
| page_icon="π", | |
| layout="wide", | |
| initial_sidebar_state="expanded" | |
| ) | |
| # Add custom CSS for better styling | |
| st.markdown(""" | |
| <style> | |
| .main-header { | |
| background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); | |
| padding: 20px; | |
| border-radius: 10px; | |
| margin-bottom: 20px; | |
| color: white; | |
| } | |
| .stButton>button { | |
| background-color: #667eea; | |
| color: white; | |
| border-radius: 5px; | |
| border: none; | |
| padding: 8px 16px; | |
| transition: all 0.3s ease; | |
| } | |
| .stButton>button:hover { | |
| background-color: #5a67d8; | |
| transform: translateY(-1px); | |
| } | |
| .pdf-page { | |
| background-color: white; | |
| border-radius: 8px; | |
| box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); | |
| padding: 10px; | |
| margin: 10px 0; | |
| } | |
| .upload-area { | |
| border: 2px dashed #667eea; | |
| border-radius: 10px; | |
| padding: 30px; | |
| text-align: center; | |
| transition: all 0.3s ease; | |
| } | |
| .upload-area:hover { | |
| border-color: #5a67d8; | |
| background-color: #f8f9ff; | |
| } | |
| .stats-card { | |
| background: white; | |
| border-radius: 10px; | |
| padding: 15px; | |
| box-shadow: 0 2px 4px rgba(0,0,0,0.1); | |
| margin: 10px 0; | |
| } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| def get_pdf_thumbnail(pdf_path: str, page_num: int = 0, width: int = 200) -> Optional[Image.Image]: | |
| """Generate a thumbnail for PDF page""" | |
| try: | |
| doc = fitz.open(pdf_path) | |
| if page_num < len(doc): | |
| page = doc.load_page(page_num) | |
| pix = page.get_pixmap(matrix=fitz.Matrix(width/page.rect.width, width/page.rect.height)) | |
| img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) | |
| doc.close() | |
| return img | |
| doc.close() | |
| except Exception as e: | |
| logger.error(f"Error generating thumbnail: {e}") | |
| return None | |
| def extract_pdf_info(pdf_path: str) -> dict: | |
| """Extract metadata and basic info from PDF""" | |
| try: | |
| doc = fitz.open(pdf_path) | |
| info = { | |
| "page_count": len(doc), | |
| "metadata": doc.metadata, | |
| "file_size": os.path.getsize(pdf_path) / (1024 * 1024), # MB | |
| "created": doc.metadata.get("creationDate", "Unknown"), | |
| "modified": doc.metadata.get("modDate", "Unknown") | |
| } | |
| doc.close() | |
| return info | |
| except Exception as e: | |
| logger.error(f"Error extracting PDF info: {e}") | |
| return {"error": str(e)} | |
| def display_pdf_page(pdf_path: str, page_num: int, width: int = 800) -> None: | |
| """Display a single PDF page""" | |
| try: | |
| doc = fitz.open(pdf_path) | |
| if page_num < len(doc): | |
| page = doc.load_page(page_num) | |
| pix = page.get_pixmap(matrix=fitz.Matrix(width/page.rect.width, width/page.rect.height)) | |
| img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) | |
| st.image(img, use_column_width=True, caption=f"Page {page_num + 1} of {len(doc)}") | |
| else: | |
| st.warning(f"Page {page_num + 1} not found. PDF has {len(doc)} pages.") | |
| doc.close() | |
| except Exception as e: | |
| st.error(f"Error displaying PDF page: {e}") | |
| def display_pdf_thumbnails(pdf_path: str, max_thumbnails: int = 5) -> None: | |
| """Display PDF page thumbnails""" | |
| try: | |
| doc = fitz.open(pdf_path) | |
| cols = st.columns(min(max_thumbnails, len(doc))) | |
| for i, col in enumerate(cols): | |
| if i < len(doc): | |
| thumbnail = get_pdf_thumbnail(pdf_path, i, width=150) | |
| if thumbnail: | |
| with col: | |
| st.image(thumbnail, use_column_width=True, caption=f"Page {i+1}") | |
| if st.button(f"View Page {i+1}", key=f"page_{i}"): | |
| st.session_state.current_page = i | |
| st.rerun() | |
| doc.close() | |
| except Exception as e: | |
| st.error(f"Error displaying thumbnails: {e}") | |
| def main(): | |
| # Initialize session state | |
| if 'uploaded_file' not in st.session_state: | |
| st.session_state.uploaded_file = None | |
| if 'current_page' not in st.session_state: | |
| st.session_state.current_page = 0 | |
| if 'pdf_info' not in st.session_state: | |
| st.session_state.pdf_info = None | |
| # Header with anycoder link | |
| st.markdown(""" | |
| <div class="main-header"> | |
| <h1>π PDF Viewer & Manager</h1> | |
| <p>Built with <a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank" style="color: white; text-decoration: underline;">anycoder</a></p> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| # Sidebar | |
| with st.sidebar: | |
| st.header("π Navigation") | |
| # File upload section | |
| st.subheader("Upload PDF") | |
| uploaded_file = st.file_uploader( | |
| "Choose a PDF file", | |
| type=["pdf"], | |
| help="Upload a PDF file to view and manage" | |
| ) | |
| if uploaded_file: | |
| # Save uploaded file temporarily | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file: | |
| tmp_file.write(uploaded_file.getvalue()) | |
| temp_path = tmp_file.name | |
| st.session_state.uploaded_file = temp_path | |
| st.session_state.pdf_info = extract_pdf_info(temp_path) | |
| # Display file info | |
| if st.session_state.pdf_info and "error" not in st.session_state.pdf_info: | |
| info = st.session_state.pdf_info | |
| st.markdown("### π File Information") | |
| st.write(f"**Pages:** {info['page_count']}") | |
| st.write(f"**Size:** {info['file_size']:.2f} MB") | |
| st.write(f"**Created:** {info.get('created', 'N/A')}") | |
| st.write(f"**Modified:** {info.get('modified', 'N/A')}") | |
| # Page navigation | |
| if st.session_state.pdf_info and "error" not in st.session_state.pdf_info: | |
| page_count = st.session_state.pdf_info["page_count"] | |
| col1, col2, col3 = st.columns([1, 2, 1]) | |
| with col2: | |
| current_page = st.number_input( | |
| "Page", | |
| min_value=1, | |
| max_value=page_count, | |
| value=st.session_state.current_page + 1, | |
| key="page_input" | |
| ) | |
| if current_page != st.session_state.current_page + 1: | |
| st.session_state.current_page = current_page - 1 | |
| st.rerun() | |
| # Clear button | |
| if st.button("ποΈ Clear PDF", type="primary"): | |
| if st.session_state.uploaded_file and os.path.exists(st.session_state.uploaded_file): | |
| os.unlink(st.session_state.uploaded_file) | |
| st.session_state.uploaded_file = None | |
| st.session_state.pdf_info = None | |
| st.session_state.current_page = 0 | |
| st.rerun() | |
| # Main content area | |
| if st.session_state.uploaded_file and os.path.exists(st.session_state.uploaded_file): | |
| # Display PDF content | |
| st.markdown("### π PDF Content") | |
| # Display current page | |
| st.markdown(f"#### Page {st.session_state.current_page + 1}") | |
| display_pdf_page(st.session_state.uploaded_file, st.session_state.current_page) | |
| # Display thumbnails if multiple pages | |
| if st.session_state.pdf_info and st.session_state.pdf_info["page_count"] > 1: | |
| st.markdown("### πΌοΈ Page Thumbnails") | |
| display_pdf_thumbnails(st.session_state.uploaded_file) | |
| # Additional actions | |
| st.markdown("### β‘ Actions") | |
| col1, col2, col3 = st.columns(3) | |
| with col1: | |
| if st.button("π₯ Download Original"): | |
| with open(st.session_state.uploaded_file, "rb") as f: | |
| base64_pdf = base64.b64encode(f.read()).decode('utf-8') | |
| href = f'<a href="data:application/pdf;base64,{base64_pdf}" download="document.pdf">Download PDF</a>' | |
| st.markdown(href, unsafe_allow_html=True) | |
| with col2: | |
| if st.button("π Extract Text"): | |
| try: | |
| doc = fitz.open(st.session_state.uploaded_file) | |
| text = "" | |
| for page in doc: | |
| text += page.get_text() | |
| doc.close() | |
| st.text_area("Extracted Text", text, height=200) | |
| except Exception as e: | |
| st.error(f"Error extracting text: {e}") | |
| with col3: | |
| if st.button("π PDF Stats"): | |
| if st.session_state.pdf_info and "error" not in st.session_state.pdf_info: | |
| info = st.session_state.pdf_info | |
| st.json({ | |
| "page_count": info["page_count"], | |
| "file_size_mb": info["file_size"], | |
| "metadata": info["metadata"] | |
| }) | |
| else: | |
| st.warning("No PDF info available") | |
| else: | |
| # Upload area | |
| st.markdown("### π€ Upload PDF File") | |
| st.markdown(""" | |
| <div class="upload-area"> | |
| <h3>Drop your PDF here or click to browse</h3> | |
| <p>Supports PDF files only</p> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| # Features section | |
| st.markdown("### β¨ Features") | |
| features = [ | |
| "π View PDF pages with high quality rendering", | |
| "πΌοΈ Browse through thumbnails of all pages", | |
| "π₯ Download original PDF file", | |
| "π Extract text content from PDF", | |
| "π View detailed PDF metadata and statistics", | |
| "π Navigate between pages easily" | |
| ] | |
| for feature in features: | |
| st.markdown(f"- {feature}") | |
| if __name__ == "__main__": | |
| main() |