Spaces:

hzaustingg
/

anycoder-99f49d97

Running

App Files Files Community

anycoder-99f49d97 / streamlit_app.py

hzaustingg

Upload streamlit_app.py with huggingface_hub

189733d verified 3 months ago

raw

history blame contribute delete

10.4 kB

	import streamlit as st
	import os
	import tempfile
	import fitz # PyMuPDF
	from PIL import Image
	import io
	import base64
	import time
	from typing import Optional, List, Tuple
	import logging

	# Configure logging
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	# Set page config
	st.set_page_config(
	page_title="PDF Viewer & Manager",
	page_icon="📄",
	layout="wide",
	initial_sidebar_state="expanded"
	)

	# Add custom CSS for better styling
	st.markdown("""
	<style>
	.main-header {
	background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
	padding: 20px;
	border-radius: 10px;
	margin-bottom: 20px;
	color: white;
	}
	.stButton>button {
	background-color: #667eea;
	color: white;
	border-radius: 5px;
	border: none;
	padding: 8px 16px;
	transition: all 0.3s ease;
	}
	.stButton>button:hover {
	background-color: #5a67d8;
	transform: translateY(-1px);
	}
	.pdf-page {
	background-color: white;
	border-radius: 8px;
	box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
	padding: 10px;
	margin: 10px 0;
	}
	.upload-area {
	border: 2px dashed #667eea;
	border-radius: 10px;
	padding: 30px;
	text-align: center;
	transition: all 0.3s ease;
	}
	.upload-area:hover {
	border-color: #5a67d8;
	background-color: #f8f9ff;
	}
	.stats-card {
	background: white;
	border-radius: 10px;
	padding: 15px;
	box-shadow: 0 2px 4px rgba(0,0,0,0.1);
	margin: 10px 0;
	}
	</style>
	""", unsafe_allow_html=True)

	def get_pdf_thumbnail(pdf_path: str, page_num: int = 0, width: int = 200) -> Optional[Image.Image]:
	"""Generate a thumbnail for PDF page"""
	try:
	doc = fitz.open(pdf_path)
	if page_num < len(doc):
	page = doc.load_page(page_num)
	pix = page.get_pixmap(matrix=fitz.Matrix(width/page.rect.width, width/page.rect.height))
	img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
	doc.close()
	return img
	doc.close()
	except Exception as e:
	logger.error(f"Error generating thumbnail: {e}")
	return None

	def extract_pdf_info(pdf_path: str) -> dict:
	"""Extract metadata and basic info from PDF"""
	try:
	doc = fitz.open(pdf_path)
	info = {
	"page_count": len(doc),
	"metadata": doc.metadata,
	"file_size": os.path.getsize(pdf_path) / (1024 * 1024), # MB
	"created": doc.metadata.get("creationDate", "Unknown"),
	"modified": doc.metadata.get("modDate", "Unknown")
	}
	doc.close()
	return info
	except Exception as e:
	logger.error(f"Error extracting PDF info: {e}")
	return {"error": str(e)}

	def display_pdf_page(pdf_path: str, page_num: int, width: int = 800) -> None:
	"""Display a single PDF page"""
	try:
	doc = fitz.open(pdf_path)
	if page_num < len(doc):
	page = doc.load_page(page_num)
	pix = page.get_pixmap(matrix=fitz.Matrix(width/page.rect.width, width/page.rect.height))
	img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)

	st.image(img, use_column_width=True, caption=f"Page {page_num + 1} of {len(doc)}")
	else:
	st.warning(f"Page {page_num + 1} not found. PDF has {len(doc)} pages.")
	doc.close()
	except Exception as e:
	st.error(f"Error displaying PDF page: {e}")

	def display_pdf_thumbnails(pdf_path: str, max_thumbnails: int = 5) -> None:
	"""Display PDF page thumbnails"""
	try:
	doc = fitz.open(pdf_path)
	cols = st.columns(min(max_thumbnails, len(doc)))

	for i, col in enumerate(cols):
	if i < len(doc):
	thumbnail = get_pdf_thumbnail(pdf_path, i, width=150)
	if thumbnail:
	with col:
	st.image(thumbnail, use_column_width=True, caption=f"Page {i+1}")
	if st.button(f"View Page {i+1}", key=f"page_{i}"):
	st.session_state.current_page = i
	st.rerun()
	doc.close()
	except Exception as e:
	st.error(f"Error displaying thumbnails: {e}")

	def main():
	# Initialize session state
	if 'uploaded_file' not in st.session_state:
	st.session_state.uploaded_file = None
	if 'current_page' not in st.session_state:
	st.session_state.current_page = 0
	if 'pdf_info' not in st.session_state:
	st.session_state.pdf_info = None

	# Header with anycoder link
	st.markdown("""
	<div class="main-header">
	<h1>📄 PDF Viewer & Manager</h1>
	<p>Built with <a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank" style="color: white; text-decoration: underline;">anycoder</a></p>
	</div>
	""", unsafe_allow_html=True)

	# Sidebar
	with st.sidebar:
	st.header("📋 Navigation")

	# File upload section
	st.subheader("Upload PDF")
	uploaded_file = st.file_uploader(
	"Choose a PDF file",
	type=["pdf"],
	help="Upload a PDF file to view and manage"
	)

	if uploaded_file:
	# Save uploaded file temporarily
	with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
	tmp_file.write(uploaded_file.getvalue())
	temp_path = tmp_file.name

	st.session_state.uploaded_file = temp_path
	st.session_state.pdf_info = extract_pdf_info(temp_path)

	# Display file info
	if st.session_state.pdf_info and "error" not in st.session_state.pdf_info:
	info = st.session_state.pdf_info
	st.markdown("### 📊 File Information")
	st.write(f"Pages: {info['page_count']}")
	st.write(f"Size: {info['file_size']:.2f} MB")
	st.write(f"Created: {info.get('created', 'N/A')}")
	st.write(f"Modified: {info.get('modified', 'N/A')}")

	# Page navigation
	if st.session_state.pdf_info and "error" not in st.session_state.pdf_info:
	page_count = st.session_state.pdf_info["page_count"]
	col1, col2, col3 = st.columns([1, 2, 1])
	with col2:
	current_page = st.number_input(
	"Page",
	min_value=1,
	max_value=page_count,
	value=st.session_state.current_page + 1,
	key="page_input"
	)
	if current_page != st.session_state.current_page + 1:
	st.session_state.current_page = current_page - 1
	st.rerun()

	# Clear button
	if st.button("🗑️ Clear PDF", type="primary"):
	if st.session_state.uploaded_file and os.path.exists(st.session_state.uploaded_file):
	os.unlink(st.session_state.uploaded_file)
	st.session_state.uploaded_file = None
	st.session_state.pdf_info = None
	st.session_state.current_page = 0
	st.rerun()

	# Main content area
	if st.session_state.uploaded_file and os.path.exists(st.session_state.uploaded_file):
	# Display PDF content
	st.markdown("### 📄 PDF Content")

	# Display current page
	st.markdown(f"#### Page {st.session_state.current_page + 1}")
	display_pdf_page(st.session_state.uploaded_file, st.session_state.current_page)

	# Display thumbnails if multiple pages
	if st.session_state.pdf_info and st.session_state.pdf_info["page_count"] > 1:
	st.markdown("### 🖼️ Page Thumbnails")
	display_pdf_thumbnails(st.session_state.uploaded_file)

	# Additional actions
	st.markdown("### ⚡ Actions")
	col1, col2, col3 = st.columns(3)

	with col1:
	if st.button("📥 Download Original"):
	with open(st.session_state.uploaded_file, "rb") as f:
	base64_pdf = base64.b64encode(f.read()).decode('utf-8')
	href = f'<a href="data:application/pdf;base64,{base64_pdf}" download="document.pdf">Download PDF</a>'
	st.markdown(href, unsafe_allow_html=True)

	with col2:
	if st.button("📄 Extract Text"):
	try:
	doc = fitz.open(st.session_state.uploaded_file)
	text = ""
	for page in doc:
	text += page.get_text()
	doc.close()
	st.text_area("Extracted Text", text, height=200)
	except Exception as e:
	st.error(f"Error extracting text: {e}")

	with col3:
	if st.button("📊 PDF Stats"):
	if st.session_state.pdf_info and "error" not in st.session_state.pdf_info:
	info = st.session_state.pdf_info
	st.json({
	"page_count": info["page_count"],
	"file_size_mb": info["file_size"],
	"metadata": info["metadata"]
	})
	else:
	st.warning("No PDF info available")
	else:
	# Upload area
	st.markdown("### 📤 Upload PDF File")
	st.markdown("""
	<div class="upload-area">
	<h3>Drop your PDF here or click to browse</h3>
	<p>Supports PDF files only</p>
	</div>
	""", unsafe_allow_html=True)

	# Features section
	st.markdown("### ✨ Features")
	features = [
	"📖 View PDF pages with high quality rendering",
	"🖼️ Browse through thumbnails of all pages",
	"📥 Download original PDF file",
	"📄 Extract text content from PDF",
	"📊 View detailed PDF metadata and statistics",
	"🔄 Navigate between pages easily"
	]

	for feature in features:
	st.markdown(f"- {feature}")

	if __name__ == "__main__":
	main()