Spaces:

ikram98ai
/

litigation_research

Sleeping

App Files Files Community

litigation_research / src /app.py

ikram98ai

improving error messages and fixing bugs

d560cbb 2 months ago

raw

history blame contribute delete

7.49 kB

	import gradio as gr
	import pandas as pd
	import json
	import datetime
	from pipeline import LitigationPipeline, MODELS
	from db import init_db, DocumentLog


	# --- DEFAULT PROMPTS ---
	DEFAULT_SUM_PROMPT = """You are an expert pharmaceutical litigation researcher.
	Summarize the provided clinical trial document or internal correspondence.
	Focus on: Adverse events, off-label promotion, and knowledge of side effects.
	Format output as Markdown.
	"""

	DEFAULT_VER_PROMPT = """You are a Senior QC Editor.
	Review the provided summary against the original text snippet.
	Rating Rubric:
	- 10: Perfect capture of adverse events and dates.
	- 1-9: Missing key dates or hallucinations.

	Output format STRICTLY as:
	SCORE: [0-10]/10
	FEEDBACK: [Specific critique here]
	"""

	# --- UI LOGIC ---

	def run_pipeline(files, api_key, model, context_limit, retries, sum_prompt, ver_prompt):
	if not files:
	return "⚠️ Error: No files uploaded", pd.DataFrame()

	if not api_key:
	return "⚠️ Error: Please enter an API Key", pd.DataFrame()

	try:
	# Initialize Pipeline
	pipeline = LitigationPipeline(api_key, model, context_limit)

	# Run Batch
	results_summary = pipeline.process_batch(files, sum_prompt, ver_prompt, retries)

	# Fetch Logs
	logs_df = pipeline.get_logs()

	# Create detailed status message
	total_docs = len(files)
	successful = len([r for r in results_summary if r[1]=='SUCCESS'])
	failed = total_docs - successful

	status_msg = f"""### ✅ Batch Processing Complete

	Summary:
	- Total Documents: {total_docs}
	- ✓ Successful: {successful}
	- ✗ Failed: {failed}

	{f'⚠️ Warning: {failed} document(s) failed processing. Check logs below for details.' if failed > 0 else '🎉 All documents processed successfully!'}
	"""

	return status_msg, logs_df

	except ValueError as e:
	# Handle configuration errors (invalid API key, model, etc.)
	error_msg = f"""### ❌ Configuration Error

	Error Details:
	```
	{str(e)}
	```

	Troubleshooting:
	- Check that your API key is valid
	- Verify the selected model is supported
	- Ensure all configuration values are correct
	"""
	return error_msg, pd.DataFrame()

	except Exception as e:
	# Handle unexpected errors
	error_msg = f"""### ❌ Unexpected Error

	Error Details:
	```
	{str(e)}
	```

	What to do:
	- Check the console logs for more details
	- Verify your PDF files are not corrupted
	- Try processing fewer files at once
	- Contact support if the issue persists
	"""
	return error_msg, pd.DataFrame()

	def get_initial_logs():
	"""Fetch initial logs to populate the UI."""
	db_session = init_db()
	query = db_session.query(DocumentLog).order_by(DocumentLog.timestamp.desc()).limit(50)
	df = pd.read_sql(query.statement, db_session.bind)
	# Ensure columns match the expected headers in the UI
	df_display = df.rename(columns={
	"verification_score": "score",
	"verifier_feedback": "feedback"
	})
	return df_display

	def export_logs_csv():
	"""Export logs to CSV file."""
	db_session = init_db()
	query = db_session.query(DocumentLog).order_by(DocumentLog.timestamp.desc())
	df = pd.read_sql(query.statement, db_session.bind)

	timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
	filename = f"litigation_logs_{timestamp}.csv"
	df.to_csv(filename, index=False)

	return filename

	def export_logs_json():
	"""Export logs to JSON file."""
	db_session = init_db()
	query = db_session.query(DocumentLog).order_by(DocumentLog.timestamp.desc())
	df = pd.read_sql(query.statement, db_session.bind)

	# Convert datetime to string for JSON serialization
	df['timestamp'] = df['timestamp'].astype(str)

	timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
	filename = f"litigation_logs_{timestamp}.json"

	with open(filename, 'w') as f:
	json.dump(df.to_dict(orient='records'), f, indent=2)

	return filename
	# --- GRADIO LAYOUT ---

	with gr.Blocks(title="Pharma Litigation AI Researcher") as demo:
	gr.Markdown("## ⚖️ Pharmaceutical Litigation Research Dashboard")

	with gr.Row():
	with gr.Column(scale=1):
	gr.Markdown("Upload batch PDFs, configure the verifier loop, and extract litigation-critical insights.")
	# Prompt Engineering
	with gr.Accordion("📝 Prompt Engineering (Expand to Edit)", open=False):
	sum_prompt_box = gr.Textbox(label="Summarizer Prompt", value=DEFAULT_SUM_PROMPT, lines=5)
	ver_prompt_box = gr.Textbox(label="Verifier Prompt", value=DEFAULT_VER_PROMPT, lines=5)

	# File Upload
	file_uploader = gr.File(label="Upload Discovery Documents (PDF)", file_count="multiple", type="filepath")
	# Configuration Panel
	gr.Markdown("---")
	gr.Markdown("### ⚙️ Configuration")
	api_key_input = gr.Textbox(label="API Key", type="password", placeholder="api-key...")
	model_selector = gr.Dropdown(list(MODELS.keys()), label="Model", value=list(MODELS.keys())[0])
	context_limit = gr.Number(label="Reset Context After (N docs)", value=5, precision=0)
	retry_limit = gr.Slider(minimum=0, maximum=5, value=2, step=1, label="Max Verification Retries")

	run_btn = gr.Button("🚀 Start Analysis Batch", variant="primary")

	with gr.Column(scale=2):
	# Results Area
	with gr.Tabs():
	with gr.TabItem("📊 Live Execution Logs"):
	status_output = gr.Markdown("Waiting for input...")
	log_table = gr.Dataframe(label="Processing Logs (Database)",
	headers=[" id", "filename", "status", "retry_count", "verification_score", "verifier_feedback", "final_summary", "model_used", "cost_estimate", "timestamp"],)

	# Export Buttons
	gr.Markdown("### 📥 Export Results")
	with gr.Row():
	export_csv_btn = gr.Button("📄 Download as CSV", size="sm")
	export_json_btn = gr.Button("📋 Download as JSON", size="sm")

	csv_download = gr.File(label="CSV Download", visible=False)
	json_download = gr.File(label="JSON Download", visible=False)



	# Event Wiring
	run_btn.click(
	fn=run_pipeline,
	inputs=[
	file_uploader,
	api_key_input,
	model_selector,
	context_limit,
	retry_limit,
	sum_prompt_box,
	ver_prompt_box
	],
	outputs=[status_output, log_table]
	)

	# Export event handlers
	export_csv_btn.click(
	fn=export_logs_csv,
	inputs=[],
	outputs=[csv_download]
	).then(
	lambda: gr.File(visible=True),
	outputs=[csv_download]
	)

	export_json_btn.click(
	fn=export_logs_json,
	inputs=[],
	outputs=[json_download]
	).then(
	lambda: gr.File(visible=True),
	outputs=[json_download]
	)

	demo.load(get_initial_logs, None, log_table)

	if __name__ == "__main__":
	demo.launch()