File size: 7,485 Bytes
15bd23d
 
d560cbb
 
52b6de3
 
15bd23d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d560cbb
15bd23d
 
d560cbb
15bd23d
d560cbb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15bd23d
52b6de3
 
 
 
 
 
 
 
 
 
 
d560cbb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15bd23d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d560cbb
52b6de3
15bd23d
 
 
 
 
 
 
 
 
 
d560cbb
 
 
 
 
 
 
 
 
 
 
15bd23d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d560cbb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52b6de3
15bd23d
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
import gradio as gr
import pandas as pd
import json
import datetime
from pipeline import LitigationPipeline, MODELS
from db import init_db, DocumentLog


# --- DEFAULT PROMPTS ---
DEFAULT_SUM_PROMPT = """You are an expert pharmaceutical litigation researcher. 
Summarize the provided clinical trial document or internal correspondence.
Focus on: Adverse events, off-label promotion, and knowledge of side effects.
Format output as Markdown.
"""

DEFAULT_VER_PROMPT = """You are a Senior QC Editor. 
Review the provided summary against the original text snippet.
Rating Rubric:
- 10: Perfect capture of adverse events and dates.
- 1-9: Missing key dates or hallucinations.

Output format STRICTLY as:
SCORE: [0-10]/10
FEEDBACK: [Specific critique here]
"""

# --- UI LOGIC ---

def run_pipeline(files, api_key, model, context_limit, retries, sum_prompt, ver_prompt):
    if not files:
        return "⚠️ **Error**: No files uploaded", pd.DataFrame()
    
    if not api_key:
        return "⚠️ **Error**: Please enter an API Key", pd.DataFrame()

    try:
        # Initialize Pipeline
        pipeline = LitigationPipeline(api_key, model, context_limit)
        
        # Run Batch
        results_summary = pipeline.process_batch(files, sum_prompt, ver_prompt, retries)
        
        # Fetch Logs
        logs_df = pipeline.get_logs()
        
        # Create detailed status message
        total_docs = len(files)
        successful = len([r for r in results_summary if r[1]=='SUCCESS'])
        failed = total_docs - successful
        
        status_msg = f"""### βœ… Batch Processing Complete
        
**Summary:**
- **Total Documents**: {total_docs}
- **βœ“ Successful**: {successful}
- **βœ— Failed**: {failed}

{f'⚠️ **Warning**: {failed} document(s) failed processing. Check logs below for details.' if failed > 0 else 'πŸŽ‰ All documents processed successfully!'}
"""
        
        return status_msg, logs_df
        
    except ValueError as e:
        # Handle configuration errors (invalid API key, model, etc.)
        error_msg = f"""### ❌ Configuration Error

**Error Details:**
```
{str(e)}
```

**Troubleshooting:**
- Check that your API key is valid
- Verify the selected model is supported
- Ensure all configuration values are correct
"""
        return error_msg, pd.DataFrame()
        
    except Exception as e:
        # Handle unexpected errors
        error_msg = f"""### ❌ Unexpected Error

**Error Details:**
```
{str(e)}
```

**What to do:**
- Check the console logs for more details
- Verify your PDF files are not corrupted
- Try processing fewer files at once
- Contact support if the issue persists
"""
        return error_msg, pd.DataFrame()

def get_initial_logs():
    """Fetch initial logs to populate the UI."""
    db_session = init_db()
    query = db_session.query(DocumentLog).order_by(DocumentLog.timestamp.desc()).limit(50)
    df = pd.read_sql(query.statement, db_session.bind)
    # Ensure columns match the expected headers in the UI
    df_display = df.rename(columns={
        "verification_score": "score",
        "verifier_feedback": "feedback"
    })
    return df_display

def export_logs_csv():
    """Export logs to CSV file."""
    db_session = init_db()
    query = db_session.query(DocumentLog).order_by(DocumentLog.timestamp.desc())
    df = pd.read_sql(query.statement, db_session.bind)
    
    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
    filename = f"litigation_logs_{timestamp}.csv"
    df.to_csv(filename, index=False)
    
    return filename

def export_logs_json():
    """Export logs to JSON file."""
    db_session = init_db()
    query = db_session.query(DocumentLog).order_by(DocumentLog.timestamp.desc())
    df = pd.read_sql(query.statement, db_session.bind)
    
    # Convert datetime to string for JSON serialization
    df['timestamp'] = df['timestamp'].astype(str)
    
    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
    filename = f"litigation_logs_{timestamp}.json"
    
    with open(filename, 'w') as f:
        json.dump(df.to_dict(orient='records'), f, indent=2)
    
    return filename
# --- GRADIO LAYOUT ---

with gr.Blocks(title="Pharma Litigation AI Researcher") as demo:
    gr.Markdown("## βš–οΈ Pharmaceutical Litigation Research Dashboard")
    
    with gr.Row():
        with gr.Column(scale=1):
            gr.Markdown("Upload batch PDFs, configure the verifier loop, and extract litigation-critical insights.")
            # Prompt Engineering
            with gr.Accordion("πŸ“ Prompt Engineering (Expand to Edit)", open=False):
                sum_prompt_box = gr.Textbox(label="Summarizer Prompt", value=DEFAULT_SUM_PROMPT, lines=5)
                ver_prompt_box = gr.Textbox(label="Verifier Prompt", value=DEFAULT_VER_PROMPT, lines=5)
            
            # File Upload
            file_uploader = gr.File(label="Upload Discovery Documents (PDF)", file_count="multiple", type="filepath")
            # Configuration Panel
            gr.Markdown("---")
            gr.Markdown("### βš™οΈ Configuration")
            api_key_input = gr.Textbox(label="API Key", type="password", placeholder="api-key...")
            model_selector = gr.Dropdown(list(MODELS.keys()), label="Model", value=list(MODELS.keys())[0])
            context_limit = gr.Number(label="Reset Context After (N docs)", value=5, precision=0)
            retry_limit = gr.Slider(minimum=0, maximum=5, value=2, step=1, label="Max Verification Retries")
            
            run_btn = gr.Button("πŸš€ Start Analysis Batch", variant="primary")
            
        with gr.Column(scale=2):
            # Results Area
            with gr.Tabs():
                with gr.TabItem("πŸ“Š Live Execution Logs"):
                    status_output = gr.Markdown("Waiting for input...")
                    log_table = gr.Dataframe(label="Processing Logs (Database)", 
                                             headers=[" id", "filename", "status", "retry_count", "verification_score", "verifier_feedback", "final_summary", "model_used", "cost_estimate", "timestamp"],)
                    
                    # Export Buttons
                    gr.Markdown("### πŸ“₯ Export Results")
                    with gr.Row():
                        export_csv_btn = gr.Button("πŸ“„ Download as CSV", size="sm")
                        export_json_btn = gr.Button("πŸ“‹ Download as JSON", size="sm")
                    
                    csv_download = gr.File(label="CSV Download", visible=False)
                    json_download = gr.File(label="JSON Download", visible=False)

    

    # Event Wiring
    run_btn.click(
        fn=run_pipeline,
        inputs=[
            file_uploader, 
            api_key_input, 
            model_selector, 
            context_limit, 
            retry_limit,
            sum_prompt_box,
            ver_prompt_box
        ],
        outputs=[status_output, log_table]
    )
    
    # Export event handlers
    export_csv_btn.click(
        fn=export_logs_csv,
        inputs=[],
        outputs=[csv_download]
    ).then(
        lambda: gr.File(visible=True),
        outputs=[csv_download]
    )
    
    export_json_btn.click(
        fn=export_logs_json,
        inputs=[],
        outputs=[json_download]
    ).then(
        lambda: gr.File(visible=True),
        outputs=[json_download]
    )
    
    demo.load(get_initial_logs, None, log_table)

if __name__ == "__main__":
    demo.launch()