Spaces:

enshicoolsoda
/

Cancer_LLM

Sleeping

App Files Files Community

enshicoolsoda commited on Apr 2

Commit

82c62db

verified ·

1 Parent(s): cbfbd67

try with different prompts

Browse files

Files changed (1) hide show

app.py +251 -61

app.py CHANGED Viewed

@@ -2,7 +2,6 @@ import json
 import os
 import requests
 import gradio as gr
-import pandas as pd
 # -----------------------------
 # 1. Configuration & Data Mapping
@@ -15,7 +14,6 @@ CANCER_MAP = {
     "Head and Neck Cancer": "data/hnsc_combined_data.json",
 }
-# Map for the Ground Truth JSON keys
 GT_MAP = {
     "Uterine Cancer": "UCEC",
     "Breast Cancer": "BRCA",
@@ -24,14 +22,36 @@ GT_MAP = {
     "Head and Neck Cancer": "HNSC",
 }
-COMMON_AGENTS = ["Carboplatin", "Paclitaxel", "Cisplatin", "Gemcitabine", "Doxorubicin", "Other"]
 # -----------------------------
 # 2. AI Backend Function
 # -----------------------------
 def ollama_chat(messages, temperature=0.1):
     endpoint = os.getenv("OLLAMA_ENDPOINT")
-    if not endpoint: return "Error: Endpoint not set."
     url = f"{endpoint}/api/chat"
     headers = {"Content-Type": "application/json", "ngrok-skip-browser-warning": "true"}
@@ -44,101 +64,271 @@ def ollama_chat(messages, temperature=0.1):
     try:
         r = requests.post(url, json=payload, headers=headers, timeout=120)
         return r.json().get("message", {}).get("content", "")
-    except: return "Connection Error"
 # -----------------------------
-# 3. Evaluation Logic
 # -----------------------------
-def run_evaluation(cancer_type):
-    # 1. Load Data
     data_path = CANCER_MAP.get(cancer_type)
     gt_path = "data/ground_truth_5yr_recurrence.json"
     if not os.path.exists(data_path) or not os.path.exists(gt_path):
-        return "Error: Missing data or ground truth files."
     with open(data_path, 'r') as f: patient_db = json.load(f)
     with open(gt_path, 'r') as f: all_gt = json.load(f)
     gt_labels = all_gt.get(GT_MAP[cancer_type], {})
-    # 2. Filter patients present in both
     eval_ids = [pid for pid in gt_labels.keys() if pid in patient_db]
-    results = []
     tp, tn, fp, fn = 0, 0, 0, 0
-    yield f"Starting inference for {len(eval_ids)} patients in {cancer_type}..."
     for i, pid in enumerate(eval_ids):
-        actual = gt_labels[pid] # "Yes" or "No"
         patient_json = json.dumps(patient_db[pid])
-        # Zero-shot prompt
-        eval_prompt = [
-            {"role": "system", "content": "You are an oncology expert. Predict 5-year recurrence based ONLY on the provided JSON. Respond strictly with 'Yes' or 'No' and nothing else."},
-            {"role": "user", "content": f"Patient Data: {patient_json}"}
-        ]
-        prediction_raw = ollama_chat(eval_prompt).strip()
-        # Simple parser to find Yes/No in response
-        prediction = "Yes" if "yes" in prediction_raw.lower() else "No"
-        # Calculate Metrics
-        if prediction == "Yes" and actual == "Yes": tp += 1
-        elif prediction == "No" and actual == "No": tn += 1
-        elif prediction == "Yes" and actual == "No": fp += 1
-        elif prediction == "No" and actual == "Yes": fn += 1
-        if i % 5 == 0:
-            yield f"Processed {i+1}/{len(eval_ids)} patients..."
-    # 3. Final Metric Calculation
-    acc = (tp + tn) / len(eval_ids) if eval_ids else 0
     sens = tp / (tp + fn) if (tp + fn) > 0 else 0
     spec = tn / (tn + fp) if (tn + fp) > 0 else 0
-    summary = f"""
-    ### Evaluation Results: {cancer_type}
-    - **Total Patients Processed:** {len(eval_ids)}
-    - **Unweighted Accuracy:** {acc:.2%}
     - **Sensitivity (Recall):** {sens:.2%}
     - **Specificity:** {spec:.2%}
-    *Confusion Matrix: TP={tp}, TN={tn}, FP={fp}, FN={fn}*
     """
-    yield summary
 # -----------------------------
-# 4. UI Layout (Modified)
 # -----------------------------
-with gr.Blocks(title="OncoRisk Eval & Demo") as demo:
-    gr.HTML('<div style="text-align:center"><h1>Oncology Risk Assistant</h1></div>')
     with gr.Tabs():
-        # Tab 1: Your original Chat/Simulation UI
-        with gr.TabItem("Clinical Assistant"):
             with gr.Row():
                 with gr.Column(scale=1):
-                    cancer_select = gr.Dropdown(label="Select Cancer Type", choices=list(CANCER_MAP.keys()))
-                    patient_select = gr.Dropdown(label="Select Patient ID")
-                    submit_btn = gr.Button("Analyze Case", variant="primary")
-                    missing_output = gr.HighlightedText(label="Completeness")
                 with gr.Column(scale=2):
-                    chatbot = gr.Chatbot(height=500)
-                    msg_input = gr.Textbox(label="Input Box", lines=5)
-        # Tab 2: NEW Evaluation Engine
-        with gr.TabItem("Performance Metrics (Zero-Shot)"):
-            gr.Markdown("### Run Zero-Shot Inference on Ground Truth")
-            eval_cancer_type = gr.Dropdown(label="Select Cancer for Evaluation", choices=list(CANCER_MAP.keys()))
-            run_eval_btn = gr.Button("Start Experiment", variant="secondary")
-            eval_results = gr.Markdown("Results will appear here after inference...")
-    # Logic for Evaluation
-    run_eval_btn.click(fn=run_evaluation, inputs=eval_cancer_type, outputs=eval_results)
-    # (Keep your existing Event Logic for Chat/Data Selection here...)
-    # ... [Same as your provided code] ...
-demo.launch()

 import os
 import requests
 import gradio as gr
 # -----------------------------
 # 1. Configuration & Data Mapping
     "Head and Neck Cancer": "data/hnsc_combined_data.json",
 }
 GT_MAP = {
     "Uterine Cancer": "UCEC",
     "Breast Cancer": "BRCA",
     "Head and Neck Cancer": "HNSC",
 }
+COMMON_AGENTS = ["Carboplatin", "Paclitaxel", "Cisplatin", "Gemcitabine", "Doxorubicin", "Tamoxifen", "Other"]
+# --- Prompt Templates ---
+PROMPT_DIRECT = "You are an oncology expert. Predict 5-year recurrence based ONLY on the provided JSON. Respond strictly with 'Yes' or 'No' and nothing else."
+PROMPT_COT = """You are an oncology expert. Predict 5-year cancer recurrence.
+Process:
+1. Analyze demographics and tumor stage.
+2. Evaluate treatment timeline and dosages.
+3. Identify risk factors.
+4. State your final prediction.
+Constraint: You must end your response with 'FINAL_PREDICTION: YES' or 'FINAL_PREDICTION: NO'."""
+PROMPT_GRADING = """You are a clinical oncology researcher. Evaluate 5-year recurrence risk by grading:
+- Tumor Burden (Stage/Grade)
+- Treatment Adequacy (Agents/Duration)
+- Patient Baseline
+Prediction Rule: If cumulative evidence suggests >50% likelihood of recurrence, predict Yes.
+Output Format:
+[Reasoning]
+Decision: [Yes/No]"""
 # -----------------------------
 # 2. AI Backend Function
 # -----------------------------
 def ollama_chat(messages, temperature=0.1):
     endpoint = os.getenv("OLLAMA_ENDPOINT")
+    if not endpoint: return "Error: OLLAMA_ENDPOINT not set."
     url = f"{endpoint}/api/chat"
     headers = {"Content-Type": "application/json", "ngrok-skip-browser-warning": "true"}
     try:
         r = requests.post(url, json=payload, headers=headers, timeout=120)
         return r.json().get("message", {}).get("content", "")
+    except Exception as e:
+        return f"Error: {str(e)}"
 # -----------------------------
+# 3. Evaluation Engine Logic
 # -----------------------------
+def run_evaluation(cancer_type, strategy):
     data_path = CANCER_MAP.get(cancer_type)
     gt_path = "data/ground_truth_5yr_recurrence.json"
     if not os.path.exists(data_path) or not os.path.exists(gt_path):
+        yield "Error: Required data files not found in /data folder."
+        return
     with open(data_path, 'r') as f: patient_db = json.load(f)
     with open(gt_path, 'r') as f: all_gt = json.load(f)
     gt_labels = all_gt.get(GT_MAP[cancer_type], {})
     eval_ids = [pid for pid in gt_labels.keys() if pid in patient_db]
+    # Map strategy to system prompt
+    sys_content = PROMPT_COT if strategy == "Chain-of-Thought" else (PROMPT_GRADING if strategy == "Evidence Grading" else PROMPT_DIRECT)
     tp, tn, fp, fn = 0, 0, 0, 0
+    yield f"🚀 Starting {strategy} inference for {len(eval_ids)} patients in {cancer_type}..."
     for i, pid in enumerate(eval_ids):
+        actual = gt_labels[pid]
         patient_json = json.dumps(patient_db[pid])
+        msgs = [{"role": "system", "content": sys_content}, {"role": "user", "content": f"Patient Data: {patient_json}"}]
+        raw_res = ollama_chat(msgs).strip().upper()
+        # Robust Parsing
+        if strategy == "Direct":
+            pred = "Yes" if "YES" in raw_res[:10] else "No"
+        elif strategy == "Chain-of-Thought":
+            pred = "Yes" if "FINAL_PREDICTION: YES" in raw_res else "No"
+        else: # Evidence Grading
+            pred = "Yes" if "DECISION: YES" in raw_res else "No"
+        if pred == "Yes" and actual == "Yes": tp += 1
+        elif pred == "No" and actual == "No": tn += 1
+        elif pred == "Yes" and actual == "No": fp += 1
+        else: fn += 1
+        if (i + 1) % 5 == 0:
+            yield f"🔄 Progress: {i+1}/{len(eval_ids)} patients processed..."
+    # Metrics
+    total = len(eval_ids)
+    acc = (tp + tn) / total if total > 0 else 0
     sens = tp / (tp + fn) if (tp + fn) > 0 else 0
     spec = tn / (tn + fp) if (tn + fp) > 0 else 0
+    yield f"""
+    ## {strategy} Strategy Results: {cancer_type}
+    - **Accuracy:** {acc:.2%}
     - **Sensitivity (Recall):** {sens:.2%}
     - **Specificity:** {spec:.2%}
+    **Confusion Matrix:**
+    | | Predicted YES | Predicted NO |
+    |---|---|---|
+    | **Actual YES** | {tp} (TP) | {fn} (FN) |
+    | **Actual NO** | {fp} (FP) | {tn} (TN) |
     """
 # -----------------------------
+# 4. Helper UI Logic (Chat)
 # -----------------------------
+def load_data(cancer_type):
+    path = CANCER_MAP.get(cancer_type)
+    with open(path, "r") as f: data = json.load(f)
+    ids = sorted([str(k) for k in data.keys()])
+    return gr.update(choices=ids, value=ids[0]), data
+def respond(message, history):
+    history = history or []
+    # Standard System Prompt for Chat
+    sys = {"role": "system", "content": "You are an oncology assistant. Summarize the case and predict outcomes."}
+    res = ollama_chat([sys] + history + [{"role": "user", "content": message}])
+    history.append({"role": "user", "content": message})
+    history.append({"role": "assistant", "content": res})
+    return "", history
+# -----------------------------
+# 5. UI Layout
+# -----------------------------
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# Oncology Research Platform")
+    full_data_state = gr.State({})
     with gr.Tabs():
+        # TAB 1: Evaluation Engine
+        with gr.TabItem("🔬 Performance Metrics"):
+            gr.Markdown("### Zero-Shot Inference Experiments")
+            with gr.Row():
+                e_type = gr.Dropdown(label="Cancer Type", choices=list(CANCER_MAP.keys()), value="Uterine Cancer")
+                e_strat = gr.Dropdown(label="Prompting Strategy", choices=["Direct", "Chain-of-Thought", "Evidence Grading"], value="Direct")
+            run_btn = gr.Button("Start Experiment", variant="primary")
+            results_md = gr.Markdown("Select criteria and start to see metrics.")
+        # TAB 2: Clinical Assistant
+        with gr.TabItem("💬 Clinical Assistant"):
             with gr.Row():
                 with gr.Column(scale=1):
+                    c_select = gr.Dropdown(label="Cancer Type", choices=list(CANCER_MAP.keys()), value="Uterine Cancer")
+                    p_select = gr.Dropdown(label="Patient ID")
                 with gr.Column(scale=2):
+                    chat = gr.Chatbot(height=400)
+                    msg = gr.Textbox(label="Patient JSON / Message")
+                    send = gr.Button("Analyze")
+    # Bindings
+    run_btn.click(run_evaluation, [e_type, e_strat], results_md)
+    c_select.change(load_data, c_select, [p_select, full_data_state])
+    p_select.change(lambda p, d: json.dumps(d.get(p), indent=2), [p_select, full_data_state], msg)
+    send.click(respond, [msg, chat], [msg, chat])
+    demo.load(load_data, c_select, [p_select, full_data_state])
+demo.launch()
+# import json
+# import os
+# import requests
+# import gradio as gr
+# import pandas as pd
+# # -----------------------------
+# # 1. Configuration & Data Mapping
+# # -----------------------------
+# CANCER_MAP = {
+#     "Uterine Cancer": "data/ucec_combined_data.json",
+#     "Breast Cancer": "data/brca_combined_data.json",
+#     "Lung Cancer": "data/luad_combined_data.json",
+#     "Bladder Cancer": "data/blca_combined_data.json",
+#     "Head and Neck Cancer": "data/hnsc_combined_data.json",
+# }
+# # Map for the Ground Truth JSON keys
+# GT_MAP = {
+#     "Uterine Cancer": "UCEC",
+#     "Breast Cancer": "BRCA",
+#     "Lung Cancer": "LUAD",
+#     "Bladder Cancer": "BLCA",
+#     "Head and Neck Cancer": "HNSC",
+# }
+# COMMON_AGENTS = ["Carboplatin", "Paclitaxel", "Cisplatin", "Gemcitabine", "Doxorubicin", "Other"]
+# # -----------------------------
+# # 2. AI Backend Function
+# # -----------------------------
+# def ollama_chat(messages, temperature=0.1):
+#     endpoint = os.getenv("OLLAMA_ENDPOINT")
+#     if not endpoint: return "Error: Endpoint not set."
+#     url = f"{endpoint}/api/chat"
+#     headers = {"Content-Type": "application/json", "ngrok-skip-browser-warning": "true"}
+#     payload = {
+#         "model": "qwen2.5:7b",
+#         "messages": messages,
+#         "stream": False,
+#         "options": {"temperature": float(temperature), "num_ctx": 8192}
+#     }
+#     try:
+#         r = requests.post(url, json=payload, headers=headers, timeout=120)
+#         return r.json().get("message", {}).get("content", "")
+#     except: return "Connection Error"
+# # -----------------------------
+# # 3. Evaluation Logic
+# # -----------------------------
+# def run_evaluation(cancer_type):
+#     # 1. Load Data
+#     data_path = CANCER_MAP.get(cancer_type)
+#     gt_path = "data/ground_truth_5yr_recurrence.json"
+#     if not os.path.exists(data_path) or not os.path.exists(gt_path):
+#         return "Error: Missing data or ground truth files."
+#     with open(data_path, 'r') as f: patient_db = json.load(f)
+#     with open(gt_path, 'r') as f: all_gt = json.load(f)
+#     gt_labels = all_gt.get(GT_MAP[cancer_type], {})
+#     # 2. Filter patients present in both
+#     eval_ids = [pid for pid in gt_labels.keys() if pid in patient_db]
+#     results = []
+#     tp, tn, fp, fn = 0, 0, 0, 0
+#     yield f"Starting inference for {len(eval_ids)} patients in {cancer_type}..."
+#     for i, pid in enumerate(eval_ids):
+#         actual = gt_labels[pid] # "Yes" or "No"
+#         patient_json = json.dumps(patient_db[pid])
+#         # Zero-shot prompt
+#         eval_prompt = [
+#             {"role": "system", "content": "You are an oncology expert. Predict 5-year recurrence based ONLY on the provided JSON. Respond strictly with 'Yes' or 'No' and nothing else."},
+#             {"role": "user", "content": f"Patient Data: {patient_json}"}
+#         ]
+#         prediction_raw = ollama_chat(eval_prompt).strip()
+#         # Simple parser to find Yes/No in response
+#         prediction = "Yes" if "yes" in prediction_raw.lower() else "No"
+#         # Calculate Metrics
+#         if prediction == "Yes" and actual == "Yes": tp += 1
+#         elif prediction == "No" and actual == "No": tn += 1
+#         elif prediction == "Yes" and actual == "No": fp += 1
+#         elif prediction == "No" and actual == "Yes": fn += 1
+#         if i % 5 == 0:
+#             yield f"Processed {i+1}/{len(eval_ids)} patients..."
+#     # 3. Final Metric Calculation
+#     acc = (tp + tn) / len(eval_ids) if eval_ids else 0
+#     sens = tp / (tp + fn) if (tp + fn) > 0 else 0
+#     spec = tn / (tn + fp) if (tn + fp) > 0 else 0
+#     summary = f"""
+#     ### Evaluation Results: {cancer_type}
+#     - **Total Patients Processed:** {len(eval_ids)}
+#     - **Unweighted Accuracy:** {acc:.2%}
+#     - **Sensitivity (Recall):** {sens:.2%}
+#     - **Specificity:** {spec:.2%}
+#     *Confusion Matrix: TP={tp}, TN={tn}, FP={fp}, FN={fn}*
+#     """
+#     yield summary
+# # -----------------------------
+# # 4. UI Layout (Modified)
+# # -----------------------------
+# with gr.Blocks(title="OncoRisk Eval & Demo") as demo:
+#     gr.HTML('<div style="text-align:center"><h1>Oncology Risk Assistant</h1></div>')
+#     with gr.Tabs():
+#         # Tab 1: Your original Chat/Simulation UI
+#         with gr.TabItem("Clinical Assistant"):
+#             with gr.Row():
+#                 with gr.Column(scale=1):
+#                     cancer_select = gr.Dropdown(label="Select Cancer Type", choices=list(CANCER_MAP.keys()))
+#                     patient_select = gr.Dropdown(label="Select Patient ID")
+#                     submit_btn = gr.Button("Analyze Case", variant="primary")
+#                     missing_output = gr.HighlightedText(label="Completeness")
+#                 with gr.Column(scale=2):
+#                     chatbot = gr.Chatbot(height=500)
+#                     msg_input = gr.Textbox(label="Input Box", lines=5)
+#         # Tab 2: NEW Evaluation Engine
+#         with gr.TabItem("Performance Metrics (Zero-Shot)"):
+#             gr.Markdown("### Run Zero-Shot Inference on Ground Truth")
+#             eval_cancer_type = gr.Dropdown(label="Select Cancer for Evaluation", choices=list(CANCER_MAP.keys()))
+#             run_eval_btn = gr.Button("Start Experiment", variant="secondary")
+#             eval_results = gr.Markdown("Results will appear here after inference...")
+#     # Logic for Evaluation
+#     run_eval_btn.click(fn=run_evaluation, inputs=eval_cancer_type, outputs=eval_results)
+#     # (Keep your existing Event Logic for Chat/Data Selection here...)
+#     # ... [Same as your provided code] ...
+# demo.launch()