Spaces:

JatinAutonomousLabs
/

PDF_analyst

Paused

File size: 12,202 Bytes

93c9801

# PDF Analysis & Orchestrator - Simplified for Hugging Face Spaces
import os
import asyncio
import uuid
from pathlib import Path
from typing import Optional, List, Tuple
import time

import gradio as gr
from agents import (
    AnalysisAgent,
    CollaborationAgent,
    ConversationAgent,
    MasterOrchestrator,
)
from utils import load_pdf_text
from utils.session import make_user_session
from utils.validation import validate_file_size
from utils.prompts import PromptManager
from utils.export import ExportManager
from config import Config

# ------------------------
# Initialize Components
# ------------------------
try:
    Config.ensure_directories()
except Exception as e:
    print(f"Warning: Could not ensure directories: {e}")

# Agent Roster - Focused on Analysis & Orchestration
AGENTS = {
    "analysis": AnalysisAgent(name="AnalysisAgent", model=Config.OPENAI_MODEL, tasks_completed=0),
    "collab": CollaborationAgent(name="CollaborationAgent", model=Config.OPENAI_MODEL, tasks_completed=0),
    "conversation": ConversationAgent(name="ConversationAgent", model=Config.OPENAI_MODEL, tasks_completed=0),
}
ORCHESTRATOR = MasterOrchestrator(agents=AGENTS)

# Initialize managers
try:
    PROMPT_MANAGER = PromptManager()
    EXPORT_MANAGER = ExportManager()
except Exception as e:
    print(f"Warning: Could not initialize managers: {e}")
    PROMPT_MANAGER = None
    EXPORT_MANAGER = None

# ------------------------
# File Handling
# ------------------------
def save_uploaded_file(uploaded, username: str = "anonymous", session_dir: Optional[str] = None) -> str:
    if session_dir is None:
        session_dir = make_user_session(username)
    Path(session_dir).mkdir(parents=True, exist_ok=True)
    dst = Path(session_dir) / f"upload_{uuid.uuid4().hex}.pdf"

    if isinstance(uploaded, str) and os.path.exists(uploaded):
        from shutil import copyfile
        copyfile(uploaded, dst)
        return str(dst)
    if hasattr(uploaded, "read"):
        with open(dst, "wb") as f:
            f.write(uploaded.read())
        return str(dst)
    if isinstance(uploaded, dict) and "name" in uploaded and os.path.exists(uploaded["name"]):
        from shutil import copyfile
        copyfile(uploaded["name"], dst)
        return str(dst)
    raise RuntimeError("Unable to save uploaded file.")

# ------------------------
# Async wrapper
# ------------------------
def run_async(func, *args, **kwargs):
    loop = asyncio.new_event_loop()
    asyncio.set_event_loop(loop)
    return loop.run_until_complete(func(*args, **kwargs))

# ------------------------
# Analysis Handlers - Core Features
# ------------------------
def handle_analysis(file, prompt, username="anonymous", use_streaming=False):
    if file is None:
        return "Please upload a PDF.", None, None
    
    try:
        validate_file_size(file)
        path = save_uploaded_file(file, username)
        
        result = run_async(
            ORCHESTRATOR.handle_user_prompt,
            user_id=username,
            prompt=prompt,
            file_path=path,
            targets=["analysis"]
        )
        return result.get("analysis", "No analysis result."), None, None
    except Exception as e:
        return f"Error during analysis: {str(e)}", None, None

def handle_batch_analysis(files, prompt, username="anonymous"):
    """Handle batch analysis of multiple PDFs"""
    if not files or len(files) == 0:
        return "Please upload at least one PDF.", None, None
    
    try:
        # Validate all files
        file_paths = []
        for file in files:
            validate_file_size(file)
            path = save_uploaded_file(file, username)
            file_paths.append(path)
        
        result = run_async(
            ORCHESTRATOR.handle_batch_analysis,
            user_id=username,
            prompt=prompt,
            file_paths=file_paths,
            targets=["analysis"]
        )
        
        # Format batch results
        batch_summary = result.get("summary", {})
        batch_results = result.get("batch_results", [])
        
        formatted_output = f"📊 Batch Analysis Results\n"
        formatted_output += f"Total files: {batch_summary.get('processing_stats', {}).get('total_files', 0)}\n"
        formatted_output += f"Successful: {batch_summary.get('processing_stats', {}).get('successful', 0)}\n"
        formatted_output += f"Failed: {batch_summary.get('processing_stats', {}).get('failed', 0)}\n"
        formatted_output += f"Success rate: {batch_summary.get('processing_stats', {}).get('success_rate', '0%')}\n\n"
        
        if batch_summary.get("batch_analysis"):
            formatted_output += f"📋 Batch Summary:\n{batch_summary['batch_analysis']}\n\n"
        
        formatted_output += "📄 Individual Results:\n"
        for i, file_result in enumerate(batch_results):
            formatted_output += f"\n--- File {i+1}: {Path(file_result.get('file_path', 'Unknown')).name} ---\n"
            if "error" in file_result:
                formatted_output += f"❌ Error: {file_result['error']}\n"
            else:
                formatted_output += f"✅ {file_result.get('analysis', 'No analysis')}\n"
        
        return formatted_output, None, None
    except Exception as e:
        return f"Error during batch analysis: {str(e)}", None, None

def handle_export(result_text, export_format, username="anonymous"):
    """Handle export of analysis results"""
    if not result_text or result_text.strip() == "":
        return "No content to export.", None
    
    if not EXPORT_MANAGER:
        return "Export functionality not available.", None
    
    try:
        if export_format == "txt":
            filepath = EXPORT_MANAGER.export_text(result_text, username=username)
        elif export_format == "json":
            data = {"analysis": result_text, "exported_by": username, "timestamp": time.time()}
            filepath = EXPORT_MANAGER.export_json(data, username=username)
        elif export_format == "pdf":
            filepath = EXPORT_MANAGER.export_pdf(result_text, username=username)
        else:
            return f"Unsupported export format: {export_format}", None
        
        return f"✅ Export successful! File saved to: {filepath}", filepath
    except Exception as e:
        return f"❌ Export failed: {str(e)}", None

def get_custom_prompts():
    """Get available custom prompts"""
    if not PROMPT_MANAGER:
        return []
    prompts = PROMPT_MANAGER.get_all_prompts()
    return list(prompts.keys())

def load_custom_prompt(prompt_id):
    """Load a custom prompt template"""
    if not PROMPT_MANAGER:
        return ""
    return PROMPT_MANAGER.get_prompt(prompt_id) or ""

# ------------------------
# Gradio UI - Simplified Interface
# ------------------------
with gr.Blocks(title="PDF Analysis & Orchestrator", theme=gr.themes.Soft()) as demo:
    gr.Markdown("# 📄 PDF Analysis & Orchestrator - Intelligent Document Processing")
    gr.Markdown("Upload PDFs and provide instructions for analysis, summarization, or explanation.")

    with gr.Tabs():
        # Single Document Analysis Tab
        with gr.Tab("📄 Single Document Analysis"):
            with gr.Row():
                with gr.Column(scale=1):
                    pdf_in = gr.File(label="Upload PDF", file_types=[".pdf"], elem_id="file_upload")
                    username_input = gr.Textbox(label="Username (optional)", placeholder="anonymous", elem_id="username")
                    
                    # Custom Prompts Section
                    with gr.Accordion("🎯 Custom Prompts", open=False):
                        prompt_dropdown = gr.Dropdown(
                            choices=get_custom_prompts(),
                            label="Select Custom Prompt",
                            value=None
                        )
                        load_prompt_btn = gr.Button("Load Prompt", size="sm")
                
                with gr.Column(scale=2):
                    gr.Markdown("### Analysis Instructions")
                    prompt_input = gr.Textbox(
                        lines=4, 
                        placeholder="Describe what you want to do with the document...\nExamples:\n- Summarize this document in 3 key points\n- Explain this technical paper for a 10-year-old\n- Segment this document by themes\n- Analyze the key findings", 
                        label="Instructions"
                    )
                    
                    with gr.Row():
                        submit_btn = gr.Button("🔍 Analyze & Orchestrate", variant="primary", size="lg")
                        clear_btn = gr.Button("🗑️ Clear", size="sm")

            # Results Section
            with gr.Row():
                with gr.Column(scale=2):
                    output_box = gr.Textbox(label="Analysis Result", lines=15, max_lines=25, show_copy_button=True)
                    status_box = gr.Textbox(label="Status", value="Ready to analyze documents", interactive=False)
                
                with gr.Column(scale=1):
                    # Export Section
                    with gr.Accordion("💾 Export Results", open=False):
                        export_format = gr.Dropdown(
                            choices=["txt", "json", "pdf"],
                            label="Export Format",
                            value="txt"
                        )
                        export_btn = gr.Button("📥 Export", variant="secondary")
                        export_status = gr.Textbox(label="Export Status", interactive=False)

        # Batch Processing Tab
        with gr.Tab("📚 Batch Processing"):
            with gr.Row():
                with gr.Column(scale=1):
                    batch_files = gr.File(
                        label="Upload Multiple PDFs", 
                        file_count="multiple", 
                        file_types=[".pdf"]
                    )
                    batch_username = gr.Textbox(label="Username (optional)", placeholder="anonymous")
                
                with gr.Column(scale=2):
                    batch_prompt = gr.Textbox(
                        lines=3,
                        placeholder="Enter analysis instructions for all documents...",
                        label="Batch Analysis Instructions"
                    )
                    batch_submit = gr.Button("🚀 Process Batch", variant="primary", size="lg")
            
            batch_output = gr.Textbox(label="Batch Results", lines=20, max_lines=30, show_copy_button=True)
            batch_status = gr.Textbox(label="Batch Status", interactive=False)

    # Event Handlers
    # Single document analysis
    submit_btn.click(
        fn=handle_analysis, 
        inputs=[pdf_in, prompt_input, username_input, gr.State(False)], 
        outputs=[output_box, status_box, gr.State()]
    )
    
    # Load custom prompt
    load_prompt_btn.click(
        fn=load_custom_prompt,
        inputs=[prompt_dropdown],
        outputs=[prompt_input]
    )
    
    # Export functionality
    export_btn.click(
        fn=handle_export,
        inputs=[output_box, export_format, username_input],
        outputs=[export_status, gr.State()]
    )
    
    # Clear functionality
    clear_btn.click(
        fn=lambda: ("", "", "", "Ready"),
        inputs=[],
        outputs=[pdf_in, prompt_input, output_box, status_box]
    )
    
    # Batch processing
    batch_submit.click(
        fn=handle_batch_analysis,
        inputs=[batch_files, batch_prompt, batch_username],
        outputs=[batch_output, batch_status, gr.State()]
    )

    # Examples
    gr.Examples(
        examples=[
            ["Summarize this document in 3 key points"],
            ["Explain this technical content for a general audience"],
            ["Segment this document by main themes or topics"],
            ["Analyze the key findings and recommendations"],
            ["Create an executive summary of this document"],
        ],
        inputs=prompt_input,
        label="Example Instructions"
    )

if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))