Spaces:

hzaustingg
/

anycoder-99f49d97

Running

App Files Files Community

hzaustingg commited on Dec 10, 2025

Commit

189733d

verified ·

1 Parent(s): 379d463

Upload streamlit_app.py with huggingface_hub

Browse files

Files changed (1) hide show

streamlit_app.py +286 -0

streamlit_app.py ADDED Viewed

	@@ -0,0 +1,286 @@

+import streamlit as st
+import os
+import tempfile
+import fitz  # PyMuPDF
+from PIL import Image
+import io
+import base64
+import time
+from typing import Optional, List, Tuple
+import logging
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# Set page config
+st.set_page_config(
+    page_title="PDF Viewer & Manager",
+    page_icon="📄",
+    layout="wide",
+    initial_sidebar_state="expanded"
+)
+# Add custom CSS for better styling
+st.markdown("""
+<style>
+    .main-header {
+        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+        padding: 20px;
+        border-radius: 10px;
+        margin-bottom: 20px;
+        color: white;
+    }
+    .stButton>button {
+        background-color: #667eea;
+        color: white;
+        border-radius: 5px;
+        border: none;
+        padding: 8px 16px;
+        transition: all 0.3s ease;
+    }
+    .stButton>button:hover {
+        background-color: #5a67d8;
+        transform: translateY(-1px);
+    }
+    .pdf-page {
+        background-color: white;
+        border-radius: 8px;
+        box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
+        padding: 10px;
+        margin: 10px 0;
+    }
+    .upload-area {
+        border: 2px dashed #667eea;
+        border-radius: 10px;
+        padding: 30px;
+        text-align: center;
+        transition: all 0.3s ease;
+    }
+    .upload-area:hover {
+        border-color: #5a67d8;
+        background-color: #f8f9ff;
+    }
+    .stats-card {
+        background: white;
+        border-radius: 10px;
+        padding: 15px;
+        box-shadow: 0 2px 4px rgba(0,0,0,0.1);
+        margin: 10px 0;
+    }
+</style>
+""", unsafe_allow_html=True)
+def get_pdf_thumbnail(pdf_path: str, page_num: int = 0, width: int = 200) -> Optional[Image.Image]:
+    """Generate a thumbnail for PDF page"""
+    try:
+        doc = fitz.open(pdf_path)
+        if page_num < len(doc):
+            page = doc.load_page(page_num)
+            pix = page.get_pixmap(matrix=fitz.Matrix(width/page.rect.width, width/page.rect.height))
+            img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
+            doc.close()
+            return img
+        doc.close()
+    except Exception as e:
+        logger.error(f"Error generating thumbnail: {e}")
+    return None
+def extract_pdf_info(pdf_path: str) -> dict:
+    """Extract metadata and basic info from PDF"""
+    try:
+        doc = fitz.open(pdf_path)
+        info = {
+            "page_count": len(doc),
+            "metadata": doc.metadata,
+            "file_size": os.path.getsize(pdf_path) / (1024 * 1024),  # MB
+            "created": doc.metadata.get("creationDate", "Unknown"),
+            "modified": doc.metadata.get("modDate", "Unknown")
+        }
+        doc.close()
+        return info
+    except Exception as e:
+        logger.error(f"Error extracting PDF info: {e}")
+        return {"error": str(e)}
+def display_pdf_page(pdf_path: str, page_num: int, width: int = 800) -> None:
+    """Display a single PDF page"""
+    try:
+        doc = fitz.open(pdf_path)
+        if page_num < len(doc):
+            page = doc.load_page(page_num)
+            pix = page.get_pixmap(matrix=fitz.Matrix(width/page.rect.width, width/page.rect.height))
+            img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
+            st.image(img, use_column_width=True, caption=f"Page {page_num + 1} of {len(doc)}")
+        else:
+            st.warning(f"Page {page_num + 1} not found. PDF has {len(doc)} pages.")
+        doc.close()
+    except Exception as e:
+        st.error(f"Error displaying PDF page: {e}")
+def display_pdf_thumbnails(pdf_path: str, max_thumbnails: int = 5) -> None:
+    """Display PDF page thumbnails"""
+    try:
+        doc = fitz.open(pdf_path)
+        cols = st.columns(min(max_thumbnails, len(doc)))
+        for i, col in enumerate(cols):
+            if i < len(doc):
+                thumbnail = get_pdf_thumbnail(pdf_path, i, width=150)
+                if thumbnail:
+                    with col:
+                        st.image(thumbnail, use_column_width=True, caption=f"Page {i+1}")
+                        if st.button(f"View Page {i+1}", key=f"page_{i}"):
+                            st.session_state.current_page = i
+                            st.rerun()
+        doc.close()
+    except Exception as e:
+        st.error(f"Error displaying thumbnails: {e}")
+def main():
+    # Initialize session state
+    if 'uploaded_file' not in st.session_state:
+        st.session_state.uploaded_file = None
+    if 'current_page' not in st.session_state:
+        st.session_state.current_page = 0
+    if 'pdf_info' not in st.session_state:
+        st.session_state.pdf_info = None
+    # Header with anycoder link
+    st.markdown("""
+    <div class="main-header">
+        <h1>📄 PDF Viewer & Manager</h1>
+        <p>Built with <a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank" style="color: white; text-decoration: underline;">anycoder</a></p>
+    </div>
+    """, unsafe_allow_html=True)
+    # Sidebar
+    with st.sidebar:
+        st.header("📋 Navigation")
+        # File upload section
+        st.subheader("Upload PDF")
+        uploaded_file = st.file_uploader(
+            "Choose a PDF file",
+            type=["pdf"],
+            help="Upload a PDF file to view and manage"
+        )
+        if uploaded_file:
+            # Save uploaded file temporarily
+            with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
+                tmp_file.write(uploaded_file.getvalue())
+                temp_path = tmp_file.name
+            st.session_state.uploaded_file = temp_path
+            st.session_state.pdf_info = extract_pdf_info(temp_path)
+            # Display file info
+            if st.session_state.pdf_info and "error" not in st.session_state.pdf_info:
+                info = st.session_state.pdf_info
+                st.markdown("### 📊 File Information")
+                st.write(f"**Pages:** {info['page_count']}")
+                st.write(f"**Size:** {info['file_size']:.2f} MB")
+                st.write(f"**Created:** {info.get('created', 'N/A')}")
+                st.write(f"**Modified:** {info.get('modified', 'N/A')}")
+            # Page navigation
+            if st.session_state.pdf_info and "error" not in st.session_state.pdf_info:
+                page_count = st.session_state.pdf_info["page_count"]
+                col1, col2, col3 = st.columns([1, 2, 1])
+                with col2:
+                    current_page = st.number_input(
+                        "Page",
+                        min_value=1,
+                        max_value=page_count,
+                        value=st.session_state.current_page + 1,
+                        key="page_input"
+                    )
+                if current_page != st.session_state.current_page + 1:
+                    st.session_state.current_page = current_page - 1
+                    st.rerun()
+        # Clear button
+        if st.button("🗑️ Clear PDF", type="primary"):
+            if st.session_state.uploaded_file and os.path.exists(st.session_state.uploaded_file):
+                os.unlink(st.session_state.uploaded_file)
+            st.session_state.uploaded_file = None
+            st.session_state.pdf_info = None
+            st.session_state.current_page = 0
+            st.rerun()
+    # Main content area
+    if st.session_state.uploaded_file and os.path.exists(st.session_state.uploaded_file):
+        # Display PDF content
+        st.markdown("### 📄 PDF Content")
+        # Display current page
+        st.markdown(f"#### Page {st.session_state.current_page + 1}")
+        display_pdf_page(st.session_state.uploaded_file, st.session_state.current_page)
+        # Display thumbnails if multiple pages
+        if st.session_state.pdf_info and st.session_state.pdf_info["page_count"] > 1:
+            st.markdown("### 🖼️ Page Thumbnails")
+            display_pdf_thumbnails(st.session_state.uploaded_file)
+        # Additional actions
+        st.markdown("### ⚡ Actions")
+        col1, col2, col3 = st.columns(3)
+        with col1:
+            if st.button("📥 Download Original"):
+                with open(st.session_state.uploaded_file, "rb") as f:
+                    base64_pdf = base64.b64encode(f.read()).decode('utf-8')
+                href = f'<a href="data:application/pdf;base64,{base64_pdf}" download="document.pdf">Download PDF</a>'
+                st.markdown(href, unsafe_allow_html=True)
+        with col2:
+            if st.button("📄 Extract Text"):
+                try:
+                    doc = fitz.open(st.session_state.uploaded_file)
+                    text = ""
+                    for page in doc:
+                        text += page.get_text()
+                    doc.close()
+                    st.text_area("Extracted Text", text, height=200)
+                except Exception as e:
+                    st.error(f"Error extracting text: {e}")
+        with col3:
+            if st.button("📊 PDF Stats"):
+                if st.session_state.pdf_info and "error" not in st.session_state.pdf_info:
+                    info = st.session_state.pdf_info
+                    st.json({
+                        "page_count": info["page_count"],
+                        "file_size_mb": info["file_size"],
+                        "metadata": info["metadata"]
+                    })
+                else:
+                    st.warning("No PDF info available")
+    else:
+        # Upload area
+        st.markdown("### 📤 Upload PDF File")
+        st.markdown("""
+        <div class="upload-area">
+            <h3>Drop your PDF here or click to browse</h3>
+            <p>Supports PDF files only</p>
+        </div>
+        """, unsafe_allow_html=True)
+        # Features section
+        st.markdown("### ✨ Features")
+        features = [
+            "📖 View PDF pages with high quality rendering",
+            "🖼️ Browse through thumbnails of all pages",
+            "📥 Download original PDF file",
+            "📄 Extract text content from PDF",
+            "📊 View detailed PDF metadata and statistics",
+            "🔄 Navigate between pages easily"
+        ]
+        for feature in features:
+            st.markdown(f"- {feature}")
+if __name__ == "__main__":
+    main()