PDF_analyst / app_simple.py
JatsTheAIGen's picture
Fix build issues: simplify app.py and fix packages.txt
93c9801
# PDF Analysis & Orchestrator - Simplified for Hugging Face Spaces
import os
import asyncio
import uuid
from pathlib import Path
from typing import Optional, List, Tuple
import time
import gradio as gr
from agents import (
AnalysisAgent,
CollaborationAgent,
ConversationAgent,
MasterOrchestrator,
)
from utils import load_pdf_text
from utils.session import make_user_session
from utils.validation import validate_file_size
from utils.prompts import PromptManager
from utils.export import ExportManager
from config import Config
# ------------------------
# Initialize Components
# ------------------------
try:
Config.ensure_directories()
except Exception as e:
print(f"Warning: Could not ensure directories: {e}")
# Agent Roster - Focused on Analysis & Orchestration
AGENTS = {
"analysis": AnalysisAgent(name="AnalysisAgent", model=Config.OPENAI_MODEL, tasks_completed=0),
"collab": CollaborationAgent(name="CollaborationAgent", model=Config.OPENAI_MODEL, tasks_completed=0),
"conversation": ConversationAgent(name="ConversationAgent", model=Config.OPENAI_MODEL, tasks_completed=0),
}
ORCHESTRATOR = MasterOrchestrator(agents=AGENTS)
# Initialize managers
try:
PROMPT_MANAGER = PromptManager()
EXPORT_MANAGER = ExportManager()
except Exception as e:
print(f"Warning: Could not initialize managers: {e}")
PROMPT_MANAGER = None
EXPORT_MANAGER = None
# ------------------------
# File Handling
# ------------------------
def save_uploaded_file(uploaded, username: str = "anonymous", session_dir: Optional[str] = None) -> str:
if session_dir is None:
session_dir = make_user_session(username)
Path(session_dir).mkdir(parents=True, exist_ok=True)
dst = Path(session_dir) / f"upload_{uuid.uuid4().hex}.pdf"
if isinstance(uploaded, str) and os.path.exists(uploaded):
from shutil import copyfile
copyfile(uploaded, dst)
return str(dst)
if hasattr(uploaded, "read"):
with open(dst, "wb") as f:
f.write(uploaded.read())
return str(dst)
if isinstance(uploaded, dict) and "name" in uploaded and os.path.exists(uploaded["name"]):
from shutil import copyfile
copyfile(uploaded["name"], dst)
return str(dst)
raise RuntimeError("Unable to save uploaded file.")
# ------------------------
# Async wrapper
# ------------------------
def run_async(func, *args, **kwargs):
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
return loop.run_until_complete(func(*args, **kwargs))
# ------------------------
# Analysis Handlers - Core Features
# ------------------------
def handle_analysis(file, prompt, username="anonymous", use_streaming=False):
if file is None:
return "Please upload a PDF.", None, None
try:
validate_file_size(file)
path = save_uploaded_file(file, username)
result = run_async(
ORCHESTRATOR.handle_user_prompt,
user_id=username,
prompt=prompt,
file_path=path,
targets=["analysis"]
)
return result.get("analysis", "No analysis result."), None, None
except Exception as e:
return f"Error during analysis: {str(e)}", None, None
def handle_batch_analysis(files, prompt, username="anonymous"):
"""Handle batch analysis of multiple PDFs"""
if not files or len(files) == 0:
return "Please upload at least one PDF.", None, None
try:
# Validate all files
file_paths = []
for file in files:
validate_file_size(file)
path = save_uploaded_file(file, username)
file_paths.append(path)
result = run_async(
ORCHESTRATOR.handle_batch_analysis,
user_id=username,
prompt=prompt,
file_paths=file_paths,
targets=["analysis"]
)
# Format batch results
batch_summary = result.get("summary", {})
batch_results = result.get("batch_results", [])
formatted_output = f"πŸ“Š Batch Analysis Results\n"
formatted_output += f"Total files: {batch_summary.get('processing_stats', {}).get('total_files', 0)}\n"
formatted_output += f"Successful: {batch_summary.get('processing_stats', {}).get('successful', 0)}\n"
formatted_output += f"Failed: {batch_summary.get('processing_stats', {}).get('failed', 0)}\n"
formatted_output += f"Success rate: {batch_summary.get('processing_stats', {}).get('success_rate', '0%')}\n\n"
if batch_summary.get("batch_analysis"):
formatted_output += f"πŸ“‹ Batch Summary:\n{batch_summary['batch_analysis']}\n\n"
formatted_output += "πŸ“„ Individual Results:\n"
for i, file_result in enumerate(batch_results):
formatted_output += f"\n--- File {i+1}: {Path(file_result.get('file_path', 'Unknown')).name} ---\n"
if "error" in file_result:
formatted_output += f"❌ Error: {file_result['error']}\n"
else:
formatted_output += f"βœ… {file_result.get('analysis', 'No analysis')}\n"
return formatted_output, None, None
except Exception as e:
return f"Error during batch analysis: {str(e)}", None, None
def handle_export(result_text, export_format, username="anonymous"):
"""Handle export of analysis results"""
if not result_text or result_text.strip() == "":
return "No content to export.", None
if not EXPORT_MANAGER:
return "Export functionality not available.", None
try:
if export_format == "txt":
filepath = EXPORT_MANAGER.export_text(result_text, username=username)
elif export_format == "json":
data = {"analysis": result_text, "exported_by": username, "timestamp": time.time()}
filepath = EXPORT_MANAGER.export_json(data, username=username)
elif export_format == "pdf":
filepath = EXPORT_MANAGER.export_pdf(result_text, username=username)
else:
return f"Unsupported export format: {export_format}", None
return f"βœ… Export successful! File saved to: {filepath}", filepath
except Exception as e:
return f"❌ Export failed: {str(e)}", None
def get_custom_prompts():
"""Get available custom prompts"""
if not PROMPT_MANAGER:
return []
prompts = PROMPT_MANAGER.get_all_prompts()
return list(prompts.keys())
def load_custom_prompt(prompt_id):
"""Load a custom prompt template"""
if not PROMPT_MANAGER:
return ""
return PROMPT_MANAGER.get_prompt(prompt_id) or ""
# ------------------------
# Gradio UI - Simplified Interface
# ------------------------
with gr.Blocks(title="PDF Analysis & Orchestrator", theme=gr.themes.Soft()) as demo:
gr.Markdown("# πŸ“„ PDF Analysis & Orchestrator - Intelligent Document Processing")
gr.Markdown("Upload PDFs and provide instructions for analysis, summarization, or explanation.")
with gr.Tabs():
# Single Document Analysis Tab
with gr.Tab("πŸ“„ Single Document Analysis"):
with gr.Row():
with gr.Column(scale=1):
pdf_in = gr.File(label="Upload PDF", file_types=[".pdf"], elem_id="file_upload")
username_input = gr.Textbox(label="Username (optional)", placeholder="anonymous", elem_id="username")
# Custom Prompts Section
with gr.Accordion("🎯 Custom Prompts", open=False):
prompt_dropdown = gr.Dropdown(
choices=get_custom_prompts(),
label="Select Custom Prompt",
value=None
)
load_prompt_btn = gr.Button("Load Prompt", size="sm")
with gr.Column(scale=2):
gr.Markdown("### Analysis Instructions")
prompt_input = gr.Textbox(
lines=4,
placeholder="Describe what you want to do with the document...\nExamples:\n- Summarize this document in 3 key points\n- Explain this technical paper for a 10-year-old\n- Segment this document by themes\n- Analyze the key findings",
label="Instructions"
)
with gr.Row():
submit_btn = gr.Button("πŸ” Analyze & Orchestrate", variant="primary", size="lg")
clear_btn = gr.Button("πŸ—‘οΈ Clear", size="sm")
# Results Section
with gr.Row():
with gr.Column(scale=2):
output_box = gr.Textbox(label="Analysis Result", lines=15, max_lines=25, show_copy_button=True)
status_box = gr.Textbox(label="Status", value="Ready to analyze documents", interactive=False)
with gr.Column(scale=1):
# Export Section
with gr.Accordion("πŸ’Ύ Export Results", open=False):
export_format = gr.Dropdown(
choices=["txt", "json", "pdf"],
label="Export Format",
value="txt"
)
export_btn = gr.Button("πŸ“₯ Export", variant="secondary")
export_status = gr.Textbox(label="Export Status", interactive=False)
# Batch Processing Tab
with gr.Tab("πŸ“š Batch Processing"):
with gr.Row():
with gr.Column(scale=1):
batch_files = gr.File(
label="Upload Multiple PDFs",
file_count="multiple",
file_types=[".pdf"]
)
batch_username = gr.Textbox(label="Username (optional)", placeholder="anonymous")
with gr.Column(scale=2):
batch_prompt = gr.Textbox(
lines=3,
placeholder="Enter analysis instructions for all documents...",
label="Batch Analysis Instructions"
)
batch_submit = gr.Button("πŸš€ Process Batch", variant="primary", size="lg")
batch_output = gr.Textbox(label="Batch Results", lines=20, max_lines=30, show_copy_button=True)
batch_status = gr.Textbox(label="Batch Status", interactive=False)
# Event Handlers
# Single document analysis
submit_btn.click(
fn=handle_analysis,
inputs=[pdf_in, prompt_input, username_input, gr.State(False)],
outputs=[output_box, status_box, gr.State()]
)
# Load custom prompt
load_prompt_btn.click(
fn=load_custom_prompt,
inputs=[prompt_dropdown],
outputs=[prompt_input]
)
# Export functionality
export_btn.click(
fn=handle_export,
inputs=[output_box, export_format, username_input],
outputs=[export_status, gr.State()]
)
# Clear functionality
clear_btn.click(
fn=lambda: ("", "", "", "Ready"),
inputs=[],
outputs=[pdf_in, prompt_input, output_box, status_box]
)
# Batch processing
batch_submit.click(
fn=handle_batch_analysis,
inputs=[batch_files, batch_prompt, batch_username],
outputs=[batch_output, batch_status, gr.State()]
)
# Examples
gr.Examples(
examples=[
["Summarize this document in 3 key points"],
["Explain this technical content for a general audience"],
["Segment this document by main themes or topics"],
["Analyze the key findings and recommendations"],
["Create an executive summary of this document"],
],
inputs=prompt_input,
label="Example Instructions"
)
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))