# PDF Analysis & Orchestrator - Simplified for Hugging Face Spaces import os import asyncio import uuid from pathlib import Path from typing import Optional, List, Tuple import time import gradio as gr from agents import ( AnalysisAgent, CollaborationAgent, ConversationAgent, MasterOrchestrator, ) from utils import load_pdf_text from utils.session import make_user_session from utils.validation import validate_file_size from utils.prompts import PromptManager from utils.export import ExportManager from config import Config # ------------------------ # Initialize Components # ------------------------ try: Config.ensure_directories() except Exception as e: print(f"Warning: Could not ensure directories: {e}") # Agent Roster - Focused on Analysis & Orchestration AGENTS = { "analysis": AnalysisAgent(name="AnalysisAgent", model=Config.OPENAI_MODEL, tasks_completed=0), "collab": CollaborationAgent(name="CollaborationAgent", model=Config.OPENAI_MODEL, tasks_completed=0), "conversation": ConversationAgent(name="ConversationAgent", model=Config.OPENAI_MODEL, tasks_completed=0), } ORCHESTRATOR = MasterOrchestrator(agents=AGENTS) # Initialize managers try: PROMPT_MANAGER = PromptManager() EXPORT_MANAGER = ExportManager() except Exception as e: print(f"Warning: Could not initialize managers: {e}") PROMPT_MANAGER = None EXPORT_MANAGER = None # ------------------------ # File Handling # ------------------------ def save_uploaded_file(uploaded, username: str = "anonymous", session_dir: Optional[str] = None) -> str: if session_dir is None: session_dir = make_user_session(username) Path(session_dir).mkdir(parents=True, exist_ok=True) dst = Path(session_dir) / f"upload_{uuid.uuid4().hex}.pdf" if isinstance(uploaded, str) and os.path.exists(uploaded): from shutil import copyfile copyfile(uploaded, dst) return str(dst) if hasattr(uploaded, "read"): with open(dst, "wb") as f: f.write(uploaded.read()) return str(dst) if isinstance(uploaded, dict) and "name" in uploaded and os.path.exists(uploaded["name"]): from shutil import copyfile copyfile(uploaded["name"], dst) return str(dst) raise RuntimeError("Unable to save uploaded file.") # ------------------------ # Async wrapper # ------------------------ def run_async(func, *args, **kwargs): loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) return loop.run_until_complete(func(*args, **kwargs)) # ------------------------ # Analysis Handlers - Core Features # ------------------------ def handle_analysis(file, prompt, username="anonymous", use_streaming=False): if file is None: return "Please upload a PDF.", None, None try: validate_file_size(file) path = save_uploaded_file(file, username) result = run_async( ORCHESTRATOR.handle_user_prompt, user_id=username, prompt=prompt, file_path=path, targets=["analysis"] ) return result.get("analysis", "No analysis result."), None, None except Exception as e: return f"Error during analysis: {str(e)}", None, None def handle_batch_analysis(files, prompt, username="anonymous"): """Handle batch analysis of multiple PDFs""" if not files or len(files) == 0: return "Please upload at least one PDF.", None, None try: # Validate all files file_paths = [] for file in files: validate_file_size(file) path = save_uploaded_file(file, username) file_paths.append(path) result = run_async( ORCHESTRATOR.handle_batch_analysis, user_id=username, prompt=prompt, file_paths=file_paths, targets=["analysis"] ) # Format batch results batch_summary = result.get("summary", {}) batch_results = result.get("batch_results", []) formatted_output = f"📊 Batch Analysis Results\n" formatted_output += f"Total files: {batch_summary.get('processing_stats', {}).get('total_files', 0)}\n" formatted_output += f"Successful: {batch_summary.get('processing_stats', {}).get('successful', 0)}\n" formatted_output += f"Failed: {batch_summary.get('processing_stats', {}).get('failed', 0)}\n" formatted_output += f"Success rate: {batch_summary.get('processing_stats', {}).get('success_rate', '0%')}\n\n" if batch_summary.get("batch_analysis"): formatted_output += f"📋 Batch Summary:\n{batch_summary['batch_analysis']}\n\n" formatted_output += "📄 Individual Results:\n" for i, file_result in enumerate(batch_results): formatted_output += f"\n--- File {i+1}: {Path(file_result.get('file_path', 'Unknown')).name} ---\n" if "error" in file_result: formatted_output += f"❌ Error: {file_result['error']}\n" else: formatted_output += f"✅ {file_result.get('analysis', 'No analysis')}\n" return formatted_output, None, None except Exception as e: return f"Error during batch analysis: {str(e)}", None, None def handle_export(result_text, export_format, username="anonymous"): """Handle export of analysis results""" if not result_text or result_text.strip() == "": return "No content to export.", None if not EXPORT_MANAGER: return "Export functionality not available.", None try: if export_format == "txt": filepath = EXPORT_MANAGER.export_text(result_text, username=username) elif export_format == "json": data = {"analysis": result_text, "exported_by": username, "timestamp": time.time()} filepath = EXPORT_MANAGER.export_json(data, username=username) elif export_format == "pdf": filepath = EXPORT_MANAGER.export_pdf(result_text, username=username) else: return f"Unsupported export format: {export_format}", None return f"✅ Export successful! File saved to: {filepath}", filepath except Exception as e: return f"❌ Export failed: {str(e)}", None def get_custom_prompts(): """Get available custom prompts""" if not PROMPT_MANAGER: return [] prompts = PROMPT_MANAGER.get_all_prompts() return list(prompts.keys()) def load_custom_prompt(prompt_id): """Load a custom prompt template""" if not PROMPT_MANAGER: return "" return PROMPT_MANAGER.get_prompt(prompt_id) or "" # ------------------------ # Gradio UI - Simplified Interface # ------------------------ with gr.Blocks(title="PDF Analysis & Orchestrator", theme=gr.themes.Soft()) as demo: gr.Markdown("# 📄 PDF Analysis & Orchestrator - Intelligent Document Processing") gr.Markdown("Upload PDFs and provide instructions for analysis, summarization, or explanation.") with gr.Tabs(): # Single Document Analysis Tab with gr.Tab("📄 Single Document Analysis"): with gr.Row(): with gr.Column(scale=1): pdf_in = gr.File(label="Upload PDF", file_types=[".pdf"], elem_id="file_upload") username_input = gr.Textbox(label="Username (optional)", placeholder="anonymous", elem_id="username") # Custom Prompts Section with gr.Accordion("🎯 Custom Prompts", open=False): prompt_dropdown = gr.Dropdown( choices=get_custom_prompts(), label="Select Custom Prompt", value=None ) load_prompt_btn = gr.Button("Load Prompt", size="sm") with gr.Column(scale=2): gr.Markdown("### Analysis Instructions") prompt_input = gr.Textbox( lines=4, placeholder="Describe what you want to do with the document...\nExamples:\n- Summarize this document in 3 key points\n- Explain this technical paper for a 10-year-old\n- Segment this document by themes\n- Analyze the key findings", label="Instructions" ) with gr.Row(): submit_btn = gr.Button("🔍 Analyze & Orchestrate", variant="primary", size="lg") clear_btn = gr.Button("🗑️ Clear", size="sm") # Results Section with gr.Row(): with gr.Column(scale=2): output_box = gr.Textbox(label="Analysis Result", lines=15, max_lines=25, show_copy_button=True) status_box = gr.Textbox(label="Status", value="Ready to analyze documents", interactive=False) with gr.Column(scale=1): # Export Section with gr.Accordion("💾 Export Results", open=False): export_format = gr.Dropdown( choices=["txt", "json", "pdf"], label="Export Format", value="txt" ) export_btn = gr.Button("📥 Export", variant="secondary") export_status = gr.Textbox(label="Export Status", interactive=False) # Batch Processing Tab with gr.Tab("📚 Batch Processing"): with gr.Row(): with gr.Column(scale=1): batch_files = gr.File( label="Upload Multiple PDFs", file_count="multiple", file_types=[".pdf"] ) batch_username = gr.Textbox(label="Username (optional)", placeholder="anonymous") with gr.Column(scale=2): batch_prompt = gr.Textbox( lines=3, placeholder="Enter analysis instructions for all documents...", label="Batch Analysis Instructions" ) batch_submit = gr.Button("🚀 Process Batch", variant="primary", size="lg") batch_output = gr.Textbox(label="Batch Results", lines=20, max_lines=30, show_copy_button=True) batch_status = gr.Textbox(label="Batch Status", interactive=False) # Event Handlers # Single document analysis submit_btn.click( fn=handle_analysis, inputs=[pdf_in, prompt_input, username_input, gr.State(False)], outputs=[output_box, status_box, gr.State()] ) # Load custom prompt load_prompt_btn.click( fn=load_custom_prompt, inputs=[prompt_dropdown], outputs=[prompt_input] ) # Export functionality export_btn.click( fn=handle_export, inputs=[output_box, export_format, username_input], outputs=[export_status, gr.State()] ) # Clear functionality clear_btn.click( fn=lambda: ("", "", "", "Ready"), inputs=[], outputs=[pdf_in, prompt_input, output_box, status_box] ) # Batch processing batch_submit.click( fn=handle_batch_analysis, inputs=[batch_files, batch_prompt, batch_username], outputs=[batch_output, batch_status, gr.State()] ) # Examples gr.Examples( examples=[ ["Summarize this document in 3 key points"], ["Explain this technical content for a general audience"], ["Segment this document by main themes or topics"], ["Analyze the key findings and recommendations"], ["Create an executive summary of this document"], ], inputs=prompt_input, label="Example Instructions" ) if __name__ == "__main__": demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))