Spaces:

ikram98ai
/

litigation_research

Sleeping

File size: 7,485 Bytes

import gradio as gr
import pandas as pd
import json
import datetime
from pipeline import LitigationPipeline, MODELS
from db import init_db, DocumentLog


# --- DEFAULT PROMPTS ---
DEFAULT_SUM_PROMPT = """You are an expert pharmaceutical litigation researcher. 
Summarize the provided clinical trial document or internal correspondence.
Focus on: Adverse events, off-label promotion, and knowledge of side effects.
Format output as Markdown.
"""

DEFAULT_VER_PROMPT = """You are a Senior QC Editor. 
Review the provided summary against the original text snippet.
Rating Rubric:
- 10: Perfect capture of adverse events and dates.
- 1-9: Missing key dates or hallucinations.

Output format STRICTLY as:
SCORE: [0-10]/10
FEEDBACK: [Specific critique here]
"""

# --- UI LOGIC ---

def run_pipeline(files, api_key, model, context_limit, retries, sum_prompt, ver_prompt):
    if not files:
        return "⚠️ **Error**: No files uploaded", pd.DataFrame()
    
    if not api_key:
        return "⚠️ **Error**: Please enter an API Key", pd.DataFrame()

    try:
        # Initialize Pipeline
        pipeline = LitigationPipeline(api_key, model, context_limit)
        
        # Run Batch
        results_summary = pipeline.process_batch(files, sum_prompt, ver_prompt, retries)
        
        # Fetch Logs
        logs_df = pipeline.get_logs()
        
        # Create detailed status message
        total_docs = len(files)
        successful = len([r for r in results_summary if r[1]=='SUCCESS'])
        failed = total_docs - successful
        
        status_msg = f"""### ✅ Batch Processing Complete
        
**Summary:**
- **Total Documents**: {total_docs}
- **✓ Successful**: {successful}
- **✗ Failed**: {failed}

{f'⚠️ **Warning**: {failed} document(s) failed processing. Check logs below for details.' if failed > 0 else '🎉 All documents processed successfully!'}
"""
        
        return status_msg, logs_df
        
    except ValueError as e:
        # Handle configuration errors (invalid API key, model, etc.)
        error_msg = f"""### ❌ Configuration Error

**Error Details:**
```
{str(e)}
```

**Troubleshooting:**
- Check that your API key is valid
- Verify the selected model is supported
- Ensure all configuration values are correct
"""
        return error_msg, pd.DataFrame()
        
    except Exception as e:
        # Handle unexpected errors
        error_msg = f"""### ❌ Unexpected Error

**Error Details:**
```
{str(e)}
```

**What to do:**
- Check the console logs for more details
- Verify your PDF files are not corrupted
- Try processing fewer files at once
- Contact support if the issue persists
"""
        return error_msg, pd.DataFrame()

def get_initial_logs():
    """Fetch initial logs to populate the UI."""
    db_session = init_db()
    query = db_session.query(DocumentLog).order_by(DocumentLog.timestamp.desc()).limit(50)
    df = pd.read_sql(query.statement, db_session.bind)
    # Ensure columns match the expected headers in the UI
    df_display = df.rename(columns={
        "verification_score": "score",
        "verifier_feedback": "feedback"
    })
    return df_display

def export_logs_csv():
    """Export logs to CSV file."""
    db_session = init_db()
    query = db_session.query(DocumentLog).order_by(DocumentLog.timestamp.desc())
    df = pd.read_sql(query.statement, db_session.bind)
    
    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
    filename = f"litigation_logs_{timestamp}.csv"
    df.to_csv(filename, index=False)
    
    return filename

def export_logs_json():
    """Export logs to JSON file."""
    db_session = init_db()
    query = db_session.query(DocumentLog).order_by(DocumentLog.timestamp.desc())
    df = pd.read_sql(query.statement, db_session.bind)
    
    # Convert datetime to string for JSON serialization
    df['timestamp'] = df['timestamp'].astype(str)
    
    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
    filename = f"litigation_logs_{timestamp}.json"
    
    with open(filename, 'w') as f:
        json.dump(df.to_dict(orient='records'), f, indent=2)
    
    return filename
# --- GRADIO LAYOUT ---

with gr.Blocks(title="Pharma Litigation AI Researcher") as demo:
    gr.Markdown("## ⚖️ Pharmaceutical Litigation Research Dashboard")
    
    with gr.Row():
        with gr.Column(scale=1):
            gr.Markdown("Upload batch PDFs, configure the verifier loop, and extract litigation-critical insights.")
            # Prompt Engineering
            with gr.Accordion("📝 Prompt Engineering (Expand to Edit)", open=False):
                sum_prompt_box = gr.Textbox(label="Summarizer Prompt", value=DEFAULT_SUM_PROMPT, lines=5)
                ver_prompt_box = gr.Textbox(label="Verifier Prompt", value=DEFAULT_VER_PROMPT, lines=5)
            
            # File Upload
            file_uploader = gr.File(label="Upload Discovery Documents (PDF)", file_count="multiple", type="filepath")
            # Configuration Panel
            gr.Markdown("---")
            gr.Markdown("### ⚙️ Configuration")
            api_key_input = gr.Textbox(label="API Key", type="password", placeholder="api-key...")
            model_selector = gr.Dropdown(list(MODELS.keys()), label="Model", value=list(MODELS.keys())[0])
            context_limit = gr.Number(label="Reset Context After (N docs)", value=5, precision=0)
            retry_limit = gr.Slider(minimum=0, maximum=5, value=2, step=1, label="Max Verification Retries")
            
            run_btn = gr.Button("🚀 Start Analysis Batch", variant="primary")
            
        with gr.Column(scale=2):
            # Results Area
            with gr.Tabs():
                with gr.TabItem("📊 Live Execution Logs"):
                    status_output = gr.Markdown("Waiting for input...")
                    log_table = gr.Dataframe(label="Processing Logs (Database)", 
                                             headers=[" id", "filename", "status", "retry_count", "verification_score", "verifier_feedback", "final_summary", "model_used", "cost_estimate", "timestamp"],)
                    
                    # Export Buttons
                    gr.Markdown("### 📥 Export Results")
                    with gr.Row():
                        export_csv_btn = gr.Button("📄 Download as CSV", size="sm")
                        export_json_btn = gr.Button("📋 Download as JSON", size="sm")
                    
                    csv_download = gr.File(label="CSV Download", visible=False)
                    json_download = gr.File(label="JSON Download", visible=False)

    

    # Event Wiring
    run_btn.click(
        fn=run_pipeline,
        inputs=[
            file_uploader, 
            api_key_input, 
            model_selector, 
            context_limit, 
            retry_limit,
            sum_prompt_box,
            ver_prompt_box
        ],
        outputs=[status_output, log_table]
    )
    
    # Export event handlers
    export_csv_btn.click(
        fn=export_logs_csv,
        inputs=[],
        outputs=[csv_download]
    ).then(
        lambda: gr.File(visible=True),
        outputs=[csv_download]
    )
    
    export_json_btn.click(
        fn=export_logs_json,
        inputs=[],
        outputs=[json_download]
    ).then(
        lambda: gr.File(visible=True),
        outputs=[json_download]
    )
    
    demo.load(get_initial_logs, None, log_table)

if __name__ == "__main__":
    demo.launch()