import json import os import requests import gradio as gr # ----------------------------- # 1. Configuration & Data Mapping # ----------------------------- CANCER_MAP = { "Uterine Cancer": "data/ucec_combined_data.json", "Breast Cancer": "data/brca_combined_data.json", "Lung Cancer": "data/luad_combined_data.json", "Bladder Cancer": "data/blca_combined_data.json", "Head and Neck Cancer": "data/hnsc_combined_data.json", } GT_MAP = { "Uterine Cancer": "UCEC", "Breast Cancer": "BRCA", "Lung Cancer": "LUAD", "Bladder Cancer": "BLCA", "Head and Neck Cancer": "HNSC", } COMMON_AGENTS = ["Carboplatin", "Paclitaxel", "Cisplatin", "Gemcitabine", "Doxorubicin", "Tamoxifen", "Other"] # --- Prompt Templates --- PROMPT_DIRECT = "You are an oncology expert. Predict 5-year recurrence based ONLY on the provided JSON. Respond strictly with 'Yes' or 'No' and nothing else." PROMPT_COT = """You are an oncology expert. Predict 5-year cancer recurrence. Process: 1. Analyze demographics and tumor stage. 2. Evaluate treatment timeline and dosages. 3. Identify risk factors. 4. State your final prediction. Constraint: You must end your response with 'FINAL_PREDICTION: YES' or 'FINAL_PREDICTION: NO'.""" PROMPT_GRADING = """You are a clinical oncology researcher. Evaluate 5-year recurrence risk by grading: - Tumor Burden (Stage/Grade) - Treatment Adequacy (Agents/Duration) - Patient Baseline Prediction Rule: If cumulative evidence suggests >50% likelihood of recurrence, predict Yes. Output Format: [Reasoning] Decision: [Yes/No]""" # ----------------------------- # 2. AI Backend Function # ----------------------------- def ollama_chat(messages, temperature=0.1): endpoint = os.getenv("OLLAMA_ENDPOINT") if not endpoint: return "Error: OLLAMA_ENDPOINT not set." url = f"{endpoint}/api/chat" headers = {"Content-Type": "application/json", "ngrok-skip-browser-warning": "true"} payload = { "model": "qwen2.5:7b", "messages": messages, "stream": False, "options": {"temperature": float(temperature), "num_ctx": 8192} } try: r = requests.post(url, json=payload, headers=headers, timeout=120) return r.json().get("message", {}).get("content", "") except Exception as e: return f"Error: {str(e)}" # ----------------------------- # 3. Evaluation Engine Logic # ----------------------------- def run_evaluation(cancer_type, strategy): data_path = CANCER_MAP.get(cancer_type) gt_path = "data/ground_truth_5yr_recurrence.json" if not os.path.exists(data_path) or not os.path.exists(gt_path): yield "Error: Required data files not found in /data folder." return with open(data_path, 'r') as f: patient_db = json.load(f) with open(gt_path, 'r') as f: all_gt = json.load(f) gt_labels = all_gt.get(GT_MAP[cancer_type], {}) eval_ids = [pid for pid in gt_labels.keys() if pid in patient_db] # Map strategy to system prompt sys_content = PROMPT_COT if strategy == "Chain-of-Thought" else (PROMPT_GRADING if strategy == "Evidence Grading" else PROMPT_DIRECT) tp, tn, fp, fn = 0, 0, 0, 0 yield f"🚀 Starting {strategy} inference for {len(eval_ids)} patients in {cancer_type}..." for i, pid in enumerate(eval_ids): actual = gt_labels[pid] patient_json = json.dumps(patient_db[pid]) msgs = [{"role": "system", "content": sys_content}, {"role": "user", "content": f"Patient Data: {patient_json}"}] raw_res = ollama_chat(msgs).strip().upper() # Robust Parsing if strategy == "Direct": pred = "Yes" if "YES" in raw_res[:10] else "No" elif strategy == "Chain-of-Thought": pred = "Yes" if "FINAL_PREDICTION: YES" in raw_res else "No" else: # Evidence Grading pred = "Yes" if "DECISION: YES" in raw_res else "No" if pred == "Yes" and actual == "Yes": tp += 1 elif pred == "No" and actual == "No": tn += 1 elif pred == "Yes" and actual == "No": fp += 1 else: fn += 1 if (i + 1) % 5 == 0: yield f"🔄 Progress: {i+1}/{len(eval_ids)} patients processed..." # Metrics total = len(eval_ids) acc = (tp + tn) / total if total > 0 else 0 sens = tp / (tp + fn) if (tp + fn) > 0 else 0 spec = tn / (tn + fp) if (tn + fp) > 0 else 0 yield f""" ## {strategy} Strategy Results: {cancer_type} - **Accuracy:** {acc:.2%} - **Sensitivity (Recall):** {sens:.2%} - **Specificity:** {spec:.2%} **Confusion Matrix:** | | Predicted YES | Predicted NO | |---|---|---| | **Actual YES** | {tp} (TP) | {fn} (FN) | | **Actual NO** | {fp} (FP) | {tn} (TN) | """ # ----------------------------- # 4. Helper UI Logic (Chat) # ----------------------------- def load_data(cancer_type): path = CANCER_MAP.get(cancer_type) with open(path, "r") as f: data = json.load(f) ids = sorted([str(k) for k in data.keys()]) return gr.update(choices=ids, value=ids[0]), data def respond(message, history): history = history or [] # Standard System Prompt for Chat sys = {"role": "system", "content": "You are an oncology assistant. Summarize the case and predict outcomes."} res = ollama_chat([sys] + history + [{"role": "user", "content": message}]) history.append({"role": "user", "content": message}) history.append({"role": "assistant", "content": res}) return "", history # ----------------------------- # 5. UI Layout # ----------------------------- with gr.Blocks(theme=gr.themes.Soft()) as demo: gr.Markdown("# Oncology Research Platform") full_data_state = gr.State({}) with gr.Tabs(): # TAB 1: Evaluation Engine with gr.TabItem("🔬 Performance Metrics"): gr.Markdown("### Zero-Shot Inference Experiments") with gr.Row(): e_type = gr.Dropdown(label="Cancer Type", choices=list(CANCER_MAP.keys()), value="Uterine Cancer") e_strat = gr.Dropdown(label="Prompting Strategy", choices=["Direct", "Chain-of-Thought", "Evidence Grading"], value="Direct") run_btn = gr.Button("Start Experiment", variant="primary") results_md = gr.Markdown("Select criteria and start to see metrics.") # TAB 2: Clinical Assistant with gr.TabItem("💬 Clinical Assistant"): with gr.Row(): with gr.Column(scale=1): c_select = gr.Dropdown(label="Cancer Type", choices=list(CANCER_MAP.keys()), value="Uterine Cancer") p_select = gr.Dropdown(label="Patient ID") with gr.Column(scale=2): chat = gr.Chatbot(height=400) msg = gr.Textbox(label="Patient JSON / Message") send = gr.Button("Analyze") # Bindings run_btn.click(run_evaluation, [e_type, e_strat], results_md) c_select.change(load_data, c_select, [p_select, full_data_state]) p_select.change(lambda p, d: json.dumps(d.get(p), indent=2), [p_select, full_data_state], msg) send.click(respond, [msg, chat], [msg, chat]) demo.load(load_data, c_select, [p_select, full_data_state]) demo.launch() # import json # import os # import requests # import gradio as gr # import pandas as pd # # ----------------------------- # # 1. Configuration & Data Mapping # # ----------------------------- # CANCER_MAP = { # "Uterine Cancer": "data/ucec_combined_data.json", # "Breast Cancer": "data/brca_combined_data.json", # "Lung Cancer": "data/luad_combined_data.json", # "Bladder Cancer": "data/blca_combined_data.json", # "Head and Neck Cancer": "data/hnsc_combined_data.json", # } # # Map for the Ground Truth JSON keys # GT_MAP = { # "Uterine Cancer": "UCEC", # "Breast Cancer": "BRCA", # "Lung Cancer": "LUAD", # "Bladder Cancer": "BLCA", # "Head and Neck Cancer": "HNSC", # } # COMMON_AGENTS = ["Carboplatin", "Paclitaxel", "Cisplatin", "Gemcitabine", "Doxorubicin", "Other"] # # ----------------------------- # # 2. AI Backend Function # # ----------------------------- # def ollama_chat(messages, temperature=0.1): # endpoint = os.getenv("OLLAMA_ENDPOINT") # if not endpoint: return "Error: Endpoint not set." # url = f"{endpoint}/api/chat" # headers = {"Content-Type": "application/json", "ngrok-skip-browser-warning": "true"} # payload = { # "model": "qwen2.5:7b", # "messages": messages, # "stream": False, # "options": {"temperature": float(temperature), "num_ctx": 8192} # } # try: # r = requests.post(url, json=payload, headers=headers, timeout=120) # return r.json().get("message", {}).get("content", "") # except: return "Connection Error" # # ----------------------------- # # 3. Evaluation Logic # # ----------------------------- # def run_evaluation(cancer_type): # # 1. Load Data # data_path = CANCER_MAP.get(cancer_type) # gt_path = "data/ground_truth_5yr_recurrence.json" # if not os.path.exists(data_path) or not os.path.exists(gt_path): # return "Error: Missing data or ground truth files." # with open(data_path, 'r') as f: patient_db = json.load(f) # with open(gt_path, 'r') as f: all_gt = json.load(f) # gt_labels = all_gt.get(GT_MAP[cancer_type], {}) # # 2. Filter patients present in both # eval_ids = [pid for pid in gt_labels.keys() if pid in patient_db] # results = [] # tp, tn, fp, fn = 0, 0, 0, 0 # yield f"Starting inference for {len(eval_ids)} patients in {cancer_type}..." # for i, pid in enumerate(eval_ids): # actual = gt_labels[pid] # "Yes" or "No" # patient_json = json.dumps(patient_db[pid]) # # Zero-shot prompt # eval_prompt = [ # {"role": "system", "content": "You are an oncology expert. Predict 5-year recurrence based ONLY on the provided JSON. Respond strictly with 'Yes' or 'No' and nothing else."}, # {"role": "user", "content": f"Patient Data: {patient_json}"} # ] # prediction_raw = ollama_chat(eval_prompt).strip() # # Simple parser to find Yes/No in response # prediction = "Yes" if "yes" in prediction_raw.lower() else "No" # # Calculate Metrics # if prediction == "Yes" and actual == "Yes": tp += 1 # elif prediction == "No" and actual == "No": tn += 1 # elif prediction == "Yes" and actual == "No": fp += 1 # elif prediction == "No" and actual == "Yes": fn += 1 # if i % 5 == 0: # yield f"Processed {i+1}/{len(eval_ids)} patients..." # # 3. Final Metric Calculation # acc = (tp + tn) / len(eval_ids) if eval_ids else 0 # sens = tp / (tp + fn) if (tp + fn) > 0 else 0 # spec = tn / (tn + fp) if (tn + fp) > 0 else 0 # summary = f""" # ### Evaluation Results: {cancer_type} # - **Total Patients Processed:** {len(eval_ids)} # - **Unweighted Accuracy:** {acc:.2%} # - **Sensitivity (Recall):** {sens:.2%} # - **Specificity:** {spec:.2%} # *Confusion Matrix: TP={tp}, TN={tn}, FP={fp}, FN={fn}* # """ # yield summary # # ----------------------------- # # 4. UI Layout (Modified) # # ----------------------------- # with gr.Blocks(title="OncoRisk Eval & Demo") as demo: # gr.HTML('