Spaces:
Sleeping
Sleeping
File size: 16,170 Bytes
d545e9c 4113528 ffa1fde f84e774 5439a8a f84e774 3d6ee70 5fb3d5e 3d6ee70 cbfbd67 82c62db f84e774 d545e9c 5f9f5c5 cbfbd67 0250535 82c62db 0250535 5439a8a cbfbd67 d545e9c 0250535 d545e9c 5439a8a d545e9c 0250535 cbfbd67 82c62db d545e9c 82c62db d545e9c 82c62db cbfbd67 5439a8a cbfbd67 82c62db f84e774 cbfbd67 e5ea2cf cbfbd67 f84e774 82c62db cbfbd67 82c62db 29aa4fc cbfbd67 82c62db cbfbd67 82c62db cbfbd67 82c62db cbfbd67 82c62db cbfbd67 82c62db 29aa4fc 82c62db cbfbd67 29aa4fc 82c62db cbfbd67 d545e9c 82c62db cbfbd67 d545e9c 5f9f5c5 ffa1fde 5f9f5c5 ffa1fde 5f9f5c5 ffa1fde 5f9f5c5 ffa1fde 5f9f5c5 ffa1fde 5f9f5c5 ffa1fde 5f9f5c5 ffa1fde 5f9f5c5 ffa1fde 5f9f5c5 ffa1fde 5f9f5c5 d545e9c 82c62db d545e9c 82c62db 4113528 82c62db cbfbd67 82c62db 5f9f5c5 022c6ea ffa1fde 82c62db cbfbd67 82c62db cbfbd67 82c62db 4f70e46 82c62db 4113528 82c62db cbfbd67 82c62db cbfbd67 82c62db cbfbd67 82c62db cbfbd67 82c62db | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 | import json
import os
import requests
import gradio as gr
import re
from sklearn.metrics import roc_auc_score
# -----------------------------
# 1. Configuration & Data Mapping
# -----------------------------
CANCER_MAP = {
"Uterine Cancer": "data/ucec_combined_data.json",
"Breast Cancer": "data/brca_combined_data.json",
"Lung Cancer": "data/luad_combined_data.json",
"Bladder Cancer": "data/blca_combined_data.json",
"Head and Neck Cancer": "data/hnsc_combined_data.json",
}
GT_MAP = {
"Uterine Cancer": "UCEC",
"Breast Cancer": "BRCA",
"Lung Cancer": "LUAD",
"Bladder Cancer": "BLCA",
"Head and Neck Cancer": "HNSC",
}
COMMON_AGENTS = ["Carboplatin", "Paclitaxel", "Cisplatin", "Gemcitabine", "Doxorubicin", "Tamoxifen", "Other"]
# --- Prompt Templates ---
PROMPT_DIRECT = "You are an oncology expert. Predict 5-year recurrence based ONLY on the provided JSON. Respond strictly with 'Yes' or 'No' and nothing else."
PROMPT_COT = """You are an oncology expert. Predict 5-year cancer recurrence.
Process:
1. Analyze demographics and tumor stage.
2. Evaluate treatment timeline and dosages.
3. Identify risk factors.
4. State your final prediction.
Constraint: You must end your response with 'FINAL_PREDICTION: YES' or 'FINAL_PREDICTION: NO'."""
PROMPT_GRADING = """You are a clinical oncology researcher. Evaluate 5-year recurrence risk by grading:
- Tumor Burden (Stage/Grade)
- Treatment Adequacy (Agents/Duration)
- Patient Baseline
Prediction Rule: If cumulative evidence suggests >50% likelihood of recurrence, predict Yes.
Output Format:
[Reasoning]
Decision: [Yes/No]"""
# -----------------------------
# 2. AI Backend Function
# -----------------------------
# --- AUC Prompt Templates ---
AUC_PROMPT_DIRECT = "You are an oncology expert. Predict the probability of 5-year recurrence for this patient. Respond strictly with a number from 0 to 100 representing the percentage (e.g., 75) and nothing else."
AUC_PROMPT_COT = """You are an oncology expert. Analyze the patient data:
1. Evaluate Tumor Stage/Demographics.
2. Review treatment timing and cycles.
3. List recurrence risk factors.
Output Format: ... Reasoning ... PROBABILITY: [0-100]"""
AUC_PROMPT_GRADING = """You are a clinical researcher. Grade the following:
- Tumor Burden
- Treatment Adequacy
Based on these grades, provide a final risk score.
Output Format: ... Reasoning ... FINAL_SCORE: [0-100]"""
def ollama_chat(messages, temperature=0.1):
endpoint = os.getenv("OLLAMA_ENDPOINT")
if not endpoint: return "Error: OLLAMA_ENDPOINT not set."
url = f"{endpoint}/api/chat"
headers = {"Content-Type": "application/json", "ngrok-skip-browser-warning": "true"}
payload = {
"model": "qwen2.5:7b",
"messages": messages,
"stream": False,
"options": {"temperature": float(temperature), "num_ctx": 8192}
}
try:
r = requests.post(url, json=payload, headers=headers, timeout=120)
return r.json().get("message", {}).get("content", "")
except Exception as e:
return f"Error: {str(e)}"
# -----------------------------
# 3. Evaluation Engine Logic
# -----------------------------
def run_evaluation(cancer_type, strategy):
data_path = CANCER_MAP.get(cancer_type)
gt_path = "data/ground_truth_5yr_recurrence.json"
if not os.path.exists(data_path) or not os.path.exists(gt_path):
yield "Error: Required data files not found in /data folder."
return
with open(data_path, 'r') as f: patient_db = json.load(f)
with open(gt_path, 'r') as f: all_gt = json.load(f)
gt_labels = all_gt.get(GT_MAP[cancer_type], {})
eval_ids = [pid for pid in gt_labels.keys() if pid in patient_db]
# Map strategy to system prompt
sys_content = PROMPT_COT if strategy == "Chain-of-Thought" else (PROMPT_GRADING if strategy == "Evidence Grading" else PROMPT_DIRECT)
tp, tn, fp, fn = 0, 0, 0, 0
yield f"π Starting {strategy} inference for {len(eval_ids)} patients in {cancer_type}..."
for i, pid in enumerate(eval_ids):
actual = gt_labels[pid]
patient_json = json.dumps(patient_db[pid])
msgs = [{"role": "system", "content": sys_content}, {"role": "user", "content": f"Patient Data: {patient_json}"}]
raw_res = ollama_chat(msgs).strip().upper()
# Robust Parsing
if strategy == "Direct":
pred = "Yes" if "YES" in raw_res[:10] else "No"
elif strategy == "Chain-of-Thought":
pred = "Yes" if "FINAL_PREDICTION: YES" in raw_res else "No"
else: # Evidence Grading
pred = "Yes" if "DECISION: YES" in raw_res else "No"
if pred == "Yes" and actual == "Yes": tp += 1
elif pred == "No" and actual == "No": tn += 1
elif pred == "Yes" and actual == "No": fp += 1
else: fn += 1
if (i + 1) % 5 == 0:
yield f"π Progress: {i+1}/{len(eval_ids)} patients processed..."
# Metrics
total = len(eval_ids)
acc = (tp + tn) / total if total > 0 else 0
sens = tp / (tp + fn) if (tp + fn) > 0 else 0
spec = tn / (tn + fp) if (tn + fp) > 0 else 0
yield f"""
## {strategy} Strategy Results: {cancer_type}
- **Accuracy:** {acc:.2%}
- **Sensitivity (Recall):** {sens:.2%}
- **Specificity:** {spec:.2%}
**Confusion Matrix:**
| | Predicted YES | Predicted NO |
|---|---|---|
| **Actual YES** | {tp} (TP) | {fn} (FN) |
| **Actual NO** | {fp} (FP) | {tn} (TN) |
"""
def run_auc_experiment(cancer_type, strategy):
data_path = CANCER_MAP.get(cancer_type)
gt_path = "data/ground_truth_5yr_recurrence.json"
if not os.path.exists(data_path) or not os.path.exists(gt_path):
yield "Files not found."
return
with open(data_path, 'r') as f: patient_db = json.load(f)
with open(gt_path, 'r') as f: all_gt = json.load(f)
gt_labels = all_gt.get(GT_MAP[cancer_type], {})
eval_ids = [pid for pid in gt_labels.keys() if pid in patient_db]
# Map Strategy to Prompt
if strategy == "Chain-of-Thought": sys_msg = AUC_PROMPT_COT
elif strategy == "Evidence Grading": sys_msg = AUC_PROMPT_GRADING
else: sys_msg = AUC_PROMPT_DIRECT
y_true, y_scores = [], []
yield f"π Starting AUC ({strategy}) for {len(eval_ids)} patients..."
for i, pid in enumerate(eval_ids):
actual = 1 if gt_labels[pid] == "Yes" else 0
patient_json = json.dumps(patient_db[pid])
raw_res = ollama_chat([{"role": "system", "content": sys_msg}, {"role": "user", "content": f"Data: {patient_json}"}])
# Regex to find any number between 0-100 in the response
try:
# Look for the last number in the text, as reasoning often contains other numbers
scores_found = re.findall(r"(\d+)", raw_res)
if scores_found:
score = float(scores_found[-1]) / 100.0
y_true.append(actual)
y_scores.append(score)
except: continue
if (i + 1) % 5 == 0: yield f"π Progress: {i+1}/{len(eval_ids)}..."
try:
final_auc = roc_auc_score(y_true, y_scores)
yield f"## AUC Result ({strategy})\n**Dataset:** {cancer_type}\n**AUC Score:** {final_auc:.4f}"
except Exception as e:
yield f"Error: {str(e)}"
# -----------------------------
# 4. Helper UI Logic (Chat)
# -----------------------------
def load_data(cancer_type):
path = CANCER_MAP.get(cancer_type)
with open(path, "r") as f: data = json.load(f)
ids = sorted([str(k) for k in data.keys()])
return gr.update(choices=ids, value=ids[0]), data
def respond(message, history):
history = history or []
# Standard System Prompt for Chat
sys = {"role": "system", "content": "You are an oncology assistant. Summarize the case and predict outcomes."}
res = ollama_chat([sys] + history + [{"role": "user", "content": message}])
history.append({"role": "user", "content": message})
history.append({"role": "assistant", "content": res})
return "", history
# -----------------------------
# 5. UI Layout
# -----------------------------
with gr.Blocks() as demo:
gr.Markdown("# Oncology Research Platform")
full_data_state = gr.State({})
with gr.Tabs():
# TAB 1: Evaluation Engine
with gr.TabItem("π¬ Performance Metrics"):
gr.Markdown("### Zero-Shot Inference Experiments")
with gr.Row():
e_type = gr.Dropdown(label="Cancer Type", choices=list(CANCER_MAP.keys()), value="Uterine Cancer")
e_strat = gr.Dropdown(label="Prompting Strategy", choices=["Direct", "Chain-of-Thought", "Evidence Grading"], value="Direct")
run_btn = gr.Button("Start Experiment", variant="primary")
results_md = gr.Markdown("Select criteria and start to see metrics.")
# TAB 2: Clinical Assistant
with gr.TabItem("π AUC Metrics"):
with gr.Row():
auc_type = gr.Dropdown(label="Cancer Type", choices=list(CANCER_MAP.keys()))
auc_strat = gr.Dropdown(label="Strategy", choices=["Direct", "Chain-of-Thought", "Evidence Grading"])
run_auc_btn = gr.Button("Run AUC Test", variant="primary")
auc_results = gr.Markdown("AUC scores will appear here.")
with gr.TabItem("π¬ Clinical Assistant"):
with gr.Row():
with gr.Column(scale=1):
c_select = gr.Dropdown(label="Cancer Type", choices=list(CANCER_MAP.keys()), value="Uterine Cancer")
p_select = gr.Dropdown(label="Patient ID")
with gr.Column(scale=2):
chat = gr.Chatbot(height=400)
msg = gr.Textbox(label="Patient JSON / Message")
send = gr.Button("Analyze")
# Bindings
run_btn.click(run_evaluation, [e_type, e_strat], results_md)
run_auc_btn.click(run_auc_experiment, [auc_type, auc_strat], auc_results)
c_select.change(load_data, c_select, [p_select, full_data_state])
p_select.change(lambda p, d: json.dumps(d.get(p), indent=2), [p_select, full_data_state], msg)
send.click(respond, [msg, chat], [msg, chat])
demo.load(load_data, c_select, [p_select, full_data_state])
# demo.launch()
demo.launch(server_name="0.0.0.0", server_port=7860, theme=gr.themes.Soft())
# import json
# import os
# import requests
# import gradio as gr
# import pandas as pd
# # -----------------------------
# # 1. Configuration & Data Mapping
# # -----------------------------
# CANCER_MAP = {
# "Uterine Cancer": "data/ucec_combined_data.json",
# "Breast Cancer": "data/brca_combined_data.json",
# "Lung Cancer": "data/luad_combined_data.json",
# "Bladder Cancer": "data/blca_combined_data.json",
# "Head and Neck Cancer": "data/hnsc_combined_data.json",
# }
# # Map for the Ground Truth JSON keys
# GT_MAP = {
# "Uterine Cancer": "UCEC",
# "Breast Cancer": "BRCA",
# "Lung Cancer": "LUAD",
# "Bladder Cancer": "BLCA",
# "Head and Neck Cancer": "HNSC",
# }
# COMMON_AGENTS = ["Carboplatin", "Paclitaxel", "Cisplatin", "Gemcitabine", "Doxorubicin", "Other"]
# # -----------------------------
# # 2. AI Backend Function
# # -----------------------------
# def ollama_chat(messages, temperature=0.1):
# endpoint = os.getenv("OLLAMA_ENDPOINT")
# if not endpoint: return "Error: Endpoint not set."
# url = f"{endpoint}/api/chat"
# headers = {"Content-Type": "application/json", "ngrok-skip-browser-warning": "true"}
# payload = {
# "model": "qwen2.5:7b",
# "messages": messages,
# "stream": False,
# "options": {"temperature": float(temperature), "num_ctx": 8192}
# }
# try:
# r = requests.post(url, json=payload, headers=headers, timeout=120)
# return r.json().get("message", {}).get("content", "")
# except: return "Connection Error"
# # -----------------------------
# # 3. Evaluation Logic
# # -----------------------------
# def run_evaluation(cancer_type):
# # 1. Load Data
# data_path = CANCER_MAP.get(cancer_type)
# gt_path = "data/ground_truth_5yr_recurrence.json"
# if not os.path.exists(data_path) or not os.path.exists(gt_path):
# return "Error: Missing data or ground truth files."
# with open(data_path, 'r') as f: patient_db = json.load(f)
# with open(gt_path, 'r') as f: all_gt = json.load(f)
# gt_labels = all_gt.get(GT_MAP[cancer_type], {})
# # 2. Filter patients present in both
# eval_ids = [pid for pid in gt_labels.keys() if pid in patient_db]
# results = []
# tp, tn, fp, fn = 0, 0, 0, 0
# yield f"Starting inference for {len(eval_ids)} patients in {cancer_type}..."
# for i, pid in enumerate(eval_ids):
# actual = gt_labels[pid] # "Yes" or "No"
# patient_json = json.dumps(patient_db[pid])
# # Zero-shot prompt
# eval_prompt = [
# {"role": "system", "content": "You are an oncology expert. Predict 5-year recurrence based ONLY on the provided JSON. Respond strictly with 'Yes' or 'No' and nothing else."},
# {"role": "user", "content": f"Patient Data: {patient_json}"}
# ]
# prediction_raw = ollama_chat(eval_prompt).strip()
# # Simple parser to find Yes/No in response
# prediction = "Yes" if "yes" in prediction_raw.lower() else "No"
# # Calculate Metrics
# if prediction == "Yes" and actual == "Yes": tp += 1
# elif prediction == "No" and actual == "No": tn += 1
# elif prediction == "Yes" and actual == "No": fp += 1
# elif prediction == "No" and actual == "Yes": fn += 1
# if i % 5 == 0:
# yield f"Processed {i+1}/{len(eval_ids)} patients..."
# # 3. Final Metric Calculation
# acc = (tp + tn) / len(eval_ids) if eval_ids else 0
# sens = tp / (tp + fn) if (tp + fn) > 0 else 0
# spec = tn / (tn + fp) if (tn + fp) > 0 else 0
# summary = f"""
# ### Evaluation Results: {cancer_type}
# - **Total Patients Processed:** {len(eval_ids)}
# - **Unweighted Accuracy:** {acc:.2%}
# - **Sensitivity (Recall):** {sens:.2%}
# - **Specificity:** {spec:.2%}
# *Confusion Matrix: TP={tp}, TN={tn}, FP={fp}, FN={fn}*
# """
# yield summary
# # -----------------------------
# # 4. UI Layout (Modified)
# # -----------------------------
# with gr.Blocks(title="OncoRisk Eval & Demo") as demo:
# gr.HTML('<div style="text-align:center"><h1>Oncology Risk Assistant</h1></div>')
# with gr.Tabs():
# # Tab 1: Your original Chat/Simulation UI
# with gr.TabItem("Clinical Assistant"):
# with gr.Row():
# with gr.Column(scale=1):
# cancer_select = gr.Dropdown(label="Select Cancer Type", choices=list(CANCER_MAP.keys()))
# patient_select = gr.Dropdown(label="Select Patient ID")
# submit_btn = gr.Button("Analyze Case", variant="primary")
# missing_output = gr.HighlightedText(label="Completeness")
# with gr.Column(scale=2):
# chatbot = gr.Chatbot(height=500)
# msg_input = gr.Textbox(label="Input Box", lines=5)
# # Tab 2: NEW Evaluation Engine
# with gr.TabItem("Performance Metrics (Zero-Shot)"):
# gr.Markdown("### Run Zero-Shot Inference on Ground Truth")
# eval_cancer_type = gr.Dropdown(label="Select Cancer for Evaluation", choices=list(CANCER_MAP.keys()))
# run_eval_btn = gr.Button("Start Experiment", variant="secondary")
# eval_results = gr.Markdown("Results will appear here after inference...")
# # Logic for Evaluation
# run_eval_btn.click(fn=run_evaluation, inputs=eval_cancer_type, outputs=eval_results)
# # (Keep your existing Event Logic for Chat/Data Selection here...)
# # ... [Same as your provided code] ...
# demo.launch() |