Cancer_LLM / app.py
enshicoolsoda's picture
try with different prompts
82c62db verified
import json
import os
import requests
import gradio as gr
# -----------------------------
# 1. Configuration & Data Mapping
# -----------------------------
CANCER_MAP = {
"Uterine Cancer": "data/ucec_combined_data.json",
"Breast Cancer": "data/brca_combined_data.json",
"Lung Cancer": "data/luad_combined_data.json",
"Bladder Cancer": "data/blca_combined_data.json",
"Head and Neck Cancer": "data/hnsc_combined_data.json",
}
GT_MAP = {
"Uterine Cancer": "UCEC",
"Breast Cancer": "BRCA",
"Lung Cancer": "LUAD",
"Bladder Cancer": "BLCA",
"Head and Neck Cancer": "HNSC",
}
COMMON_AGENTS = ["Carboplatin", "Paclitaxel", "Cisplatin", "Gemcitabine", "Doxorubicin", "Tamoxifen", "Other"]
# --- Prompt Templates ---
PROMPT_DIRECT = "You are an oncology expert. Predict 5-year recurrence based ONLY on the provided JSON. Respond strictly with 'Yes' or 'No' and nothing else."
PROMPT_COT = """You are an oncology expert. Predict 5-year cancer recurrence.
Process:
1. Analyze demographics and tumor stage.
2. Evaluate treatment timeline and dosages.
3. Identify risk factors.
4. State your final prediction.
Constraint: You must end your response with 'FINAL_PREDICTION: YES' or 'FINAL_PREDICTION: NO'."""
PROMPT_GRADING = """You are a clinical oncology researcher. Evaluate 5-year recurrence risk by grading:
- Tumor Burden (Stage/Grade)
- Treatment Adequacy (Agents/Duration)
- Patient Baseline
Prediction Rule: If cumulative evidence suggests >50% likelihood of recurrence, predict Yes.
Output Format:
[Reasoning]
Decision: [Yes/No]"""
# -----------------------------
# 2. AI Backend Function
# -----------------------------
def ollama_chat(messages, temperature=0.1):
endpoint = os.getenv("OLLAMA_ENDPOINT")
if not endpoint: return "Error: OLLAMA_ENDPOINT not set."
url = f"{endpoint}/api/chat"
headers = {"Content-Type": "application/json", "ngrok-skip-browser-warning": "true"}
payload = {
"model": "qwen2.5:7b",
"messages": messages,
"stream": False,
"options": {"temperature": float(temperature), "num_ctx": 8192}
}
try:
r = requests.post(url, json=payload, headers=headers, timeout=120)
return r.json().get("message", {}).get("content", "")
except Exception as e:
return f"Error: {str(e)}"
# -----------------------------
# 3. Evaluation Engine Logic
# -----------------------------
def run_evaluation(cancer_type, strategy):
data_path = CANCER_MAP.get(cancer_type)
gt_path = "data/ground_truth_5yr_recurrence.json"
if not os.path.exists(data_path) or not os.path.exists(gt_path):
yield "Error: Required data files not found in /data folder."
return
with open(data_path, 'r') as f: patient_db = json.load(f)
with open(gt_path, 'r') as f: all_gt = json.load(f)
gt_labels = all_gt.get(GT_MAP[cancer_type], {})
eval_ids = [pid for pid in gt_labels.keys() if pid in patient_db]
# Map strategy to system prompt
sys_content = PROMPT_COT if strategy == "Chain-of-Thought" else (PROMPT_GRADING if strategy == "Evidence Grading" else PROMPT_DIRECT)
tp, tn, fp, fn = 0, 0, 0, 0
yield f"🚀 Starting {strategy} inference for {len(eval_ids)} patients in {cancer_type}..."
for i, pid in enumerate(eval_ids):
actual = gt_labels[pid]
patient_json = json.dumps(patient_db[pid])
msgs = [{"role": "system", "content": sys_content}, {"role": "user", "content": f"Patient Data: {patient_json}"}]
raw_res = ollama_chat(msgs).strip().upper()
# Robust Parsing
if strategy == "Direct":
pred = "Yes" if "YES" in raw_res[:10] else "No"
elif strategy == "Chain-of-Thought":
pred = "Yes" if "FINAL_PREDICTION: YES" in raw_res else "No"
else: # Evidence Grading
pred = "Yes" if "DECISION: YES" in raw_res else "No"
if pred == "Yes" and actual == "Yes": tp += 1
elif pred == "No" and actual == "No": tn += 1
elif pred == "Yes" and actual == "No": fp += 1
else: fn += 1
if (i + 1) % 5 == 0:
yield f"🔄 Progress: {i+1}/{len(eval_ids)} patients processed..."
# Metrics
total = len(eval_ids)
acc = (tp + tn) / total if total > 0 else 0
sens = tp / (tp + fn) if (tp + fn) > 0 else 0
spec = tn / (tn + fp) if (tn + fp) > 0 else 0
yield f"""
## {strategy} Strategy Results: {cancer_type}
- **Accuracy:** {acc:.2%}
- **Sensitivity (Recall):** {sens:.2%}
- **Specificity:** {spec:.2%}
**Confusion Matrix:**
| | Predicted YES | Predicted NO |
|---|---|---|
| **Actual YES** | {tp} (TP) | {fn} (FN) |
| **Actual NO** | {fp} (FP) | {tn} (TN) |
"""
# -----------------------------
# 4. Helper UI Logic (Chat)
# -----------------------------
def load_data(cancer_type):
path = CANCER_MAP.get(cancer_type)
with open(path, "r") as f: data = json.load(f)
ids = sorted([str(k) for k in data.keys()])
return gr.update(choices=ids, value=ids[0]), data
def respond(message, history):
history = history or []
# Standard System Prompt for Chat
sys = {"role": "system", "content": "You are an oncology assistant. Summarize the case and predict outcomes."}
res = ollama_chat([sys] + history + [{"role": "user", "content": message}])
history.append({"role": "user", "content": message})
history.append({"role": "assistant", "content": res})
return "", history
# -----------------------------
# 5. UI Layout
# -----------------------------
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# Oncology Research Platform")
full_data_state = gr.State({})
with gr.Tabs():
# TAB 1: Evaluation Engine
with gr.TabItem("🔬 Performance Metrics"):
gr.Markdown("### Zero-Shot Inference Experiments")
with gr.Row():
e_type = gr.Dropdown(label="Cancer Type", choices=list(CANCER_MAP.keys()), value="Uterine Cancer")
e_strat = gr.Dropdown(label="Prompting Strategy", choices=["Direct", "Chain-of-Thought", "Evidence Grading"], value="Direct")
run_btn = gr.Button("Start Experiment", variant="primary")
results_md = gr.Markdown("Select criteria and start to see metrics.")
# TAB 2: Clinical Assistant
with gr.TabItem("💬 Clinical Assistant"):
with gr.Row():
with gr.Column(scale=1):
c_select = gr.Dropdown(label="Cancer Type", choices=list(CANCER_MAP.keys()), value="Uterine Cancer")
p_select = gr.Dropdown(label="Patient ID")
with gr.Column(scale=2):
chat = gr.Chatbot(height=400)
msg = gr.Textbox(label="Patient JSON / Message")
send = gr.Button("Analyze")
# Bindings
run_btn.click(run_evaluation, [e_type, e_strat], results_md)
c_select.change(load_data, c_select, [p_select, full_data_state])
p_select.change(lambda p, d: json.dumps(d.get(p), indent=2), [p_select, full_data_state], msg)
send.click(respond, [msg, chat], [msg, chat])
demo.load(load_data, c_select, [p_select, full_data_state])
demo.launch()
# import json
# import os
# import requests
# import gradio as gr
# import pandas as pd
# # -----------------------------
# # 1. Configuration & Data Mapping
# # -----------------------------
# CANCER_MAP = {
# "Uterine Cancer": "data/ucec_combined_data.json",
# "Breast Cancer": "data/brca_combined_data.json",
# "Lung Cancer": "data/luad_combined_data.json",
# "Bladder Cancer": "data/blca_combined_data.json",
# "Head and Neck Cancer": "data/hnsc_combined_data.json",
# }
# # Map for the Ground Truth JSON keys
# GT_MAP = {
# "Uterine Cancer": "UCEC",
# "Breast Cancer": "BRCA",
# "Lung Cancer": "LUAD",
# "Bladder Cancer": "BLCA",
# "Head and Neck Cancer": "HNSC",
# }
# COMMON_AGENTS = ["Carboplatin", "Paclitaxel", "Cisplatin", "Gemcitabine", "Doxorubicin", "Other"]
# # -----------------------------
# # 2. AI Backend Function
# # -----------------------------
# def ollama_chat(messages, temperature=0.1):
# endpoint = os.getenv("OLLAMA_ENDPOINT")
# if not endpoint: return "Error: Endpoint not set."
# url = f"{endpoint}/api/chat"
# headers = {"Content-Type": "application/json", "ngrok-skip-browser-warning": "true"}
# payload = {
# "model": "qwen2.5:7b",
# "messages": messages,
# "stream": False,
# "options": {"temperature": float(temperature), "num_ctx": 8192}
# }
# try:
# r = requests.post(url, json=payload, headers=headers, timeout=120)
# return r.json().get("message", {}).get("content", "")
# except: return "Connection Error"
# # -----------------------------
# # 3. Evaluation Logic
# # -----------------------------
# def run_evaluation(cancer_type):
# # 1. Load Data
# data_path = CANCER_MAP.get(cancer_type)
# gt_path = "data/ground_truth_5yr_recurrence.json"
# if not os.path.exists(data_path) or not os.path.exists(gt_path):
# return "Error: Missing data or ground truth files."
# with open(data_path, 'r') as f: patient_db = json.load(f)
# with open(gt_path, 'r') as f: all_gt = json.load(f)
# gt_labels = all_gt.get(GT_MAP[cancer_type], {})
# # 2. Filter patients present in both
# eval_ids = [pid for pid in gt_labels.keys() if pid in patient_db]
# results = []
# tp, tn, fp, fn = 0, 0, 0, 0
# yield f"Starting inference for {len(eval_ids)} patients in {cancer_type}..."
# for i, pid in enumerate(eval_ids):
# actual = gt_labels[pid] # "Yes" or "No"
# patient_json = json.dumps(patient_db[pid])
# # Zero-shot prompt
# eval_prompt = [
# {"role": "system", "content": "You are an oncology expert. Predict 5-year recurrence based ONLY on the provided JSON. Respond strictly with 'Yes' or 'No' and nothing else."},
# {"role": "user", "content": f"Patient Data: {patient_json}"}
# ]
# prediction_raw = ollama_chat(eval_prompt).strip()
# # Simple parser to find Yes/No in response
# prediction = "Yes" if "yes" in prediction_raw.lower() else "No"
# # Calculate Metrics
# if prediction == "Yes" and actual == "Yes": tp += 1
# elif prediction == "No" and actual == "No": tn += 1
# elif prediction == "Yes" and actual == "No": fp += 1
# elif prediction == "No" and actual == "Yes": fn += 1
# if i % 5 == 0:
# yield f"Processed {i+1}/{len(eval_ids)} patients..."
# # 3. Final Metric Calculation
# acc = (tp + tn) / len(eval_ids) if eval_ids else 0
# sens = tp / (tp + fn) if (tp + fn) > 0 else 0
# spec = tn / (tn + fp) if (tn + fp) > 0 else 0
# summary = f"""
# ### Evaluation Results: {cancer_type}
# - **Total Patients Processed:** {len(eval_ids)}
# - **Unweighted Accuracy:** {acc:.2%}
# - **Sensitivity (Recall):** {sens:.2%}
# - **Specificity:** {spec:.2%}
# *Confusion Matrix: TP={tp}, TN={tn}, FP={fp}, FN={fn}*
# """
# yield summary
# # -----------------------------
# # 4. UI Layout (Modified)
# # -----------------------------
# with gr.Blocks(title="OncoRisk Eval & Demo") as demo:
# gr.HTML('<div style="text-align:center"><h1>Oncology Risk Assistant</h1></div>')
# with gr.Tabs():
# # Tab 1: Your original Chat/Simulation UI
# with gr.TabItem("Clinical Assistant"):
# with gr.Row():
# with gr.Column(scale=1):
# cancer_select = gr.Dropdown(label="Select Cancer Type", choices=list(CANCER_MAP.keys()))
# patient_select = gr.Dropdown(label="Select Patient ID")
# submit_btn = gr.Button("Analyze Case", variant="primary")
# missing_output = gr.HighlightedText(label="Completeness")
# with gr.Column(scale=2):
# chatbot = gr.Chatbot(height=500)
# msg_input = gr.Textbox(label="Input Box", lines=5)
# # Tab 2: NEW Evaluation Engine
# with gr.TabItem("Performance Metrics (Zero-Shot)"):
# gr.Markdown("### Run Zero-Shot Inference on Ground Truth")
# eval_cancer_type = gr.Dropdown(label="Select Cancer for Evaluation", choices=list(CANCER_MAP.keys()))
# run_eval_btn = gr.Button("Start Experiment", variant="secondary")
# eval_results = gr.Markdown("Results will appear here after inference...")
# # Logic for Evaluation
# run_eval_btn.click(fn=run_evaluation, inputs=eval_cancer_type, outputs=eval_results)
# # (Keep your existing Event Logic for Chat/Data Selection here...)
# # ... [Same as your provided code] ...
# demo.launch()