Spaces:

enshicoolsoda
/

Cancer_LLM

Sleeping

App Files Files Community

Cancer_LLM / app.py

enshicoolsoda

try with different prompts

82c62db verified 8 days ago

raw

history blame contribute delete

13 kB

	import json
	import os
	import requests
	import gradio as gr

	# -----------------------------
	# 1. Configuration & Data Mapping
	# -----------------------------
	CANCER_MAP = {
	"Uterine Cancer": "data/ucec_combined_data.json",
	"Breast Cancer": "data/brca_combined_data.json",
	"Lung Cancer": "data/luad_combined_data.json",
	"Bladder Cancer": "data/blca_combined_data.json",
	"Head and Neck Cancer": "data/hnsc_combined_data.json",
	}

	GT_MAP = {
	"Uterine Cancer": "UCEC",
	"Breast Cancer": "BRCA",
	"Lung Cancer": "LUAD",
	"Bladder Cancer": "BLCA",
	"Head and Neck Cancer": "HNSC",
	}

	COMMON_AGENTS = ["Carboplatin", "Paclitaxel", "Cisplatin", "Gemcitabine", "Doxorubicin", "Tamoxifen", "Other"]

	# --- Prompt Templates ---
	PROMPT_DIRECT = "You are an oncology expert. Predict 5-year recurrence based ONLY on the provided JSON. Respond strictly with 'Yes' or 'No' and nothing else."

	PROMPT_COT = """You are an oncology expert. Predict 5-year cancer recurrence.
	Process:
	1. Analyze demographics and tumor stage.
	2. Evaluate treatment timeline and dosages.
	3. Identify risk factors.
	4. State your final prediction.

	Constraint: You must end your response with 'FINAL_PREDICTION: YES' or 'FINAL_PREDICTION: NO'."""

	PROMPT_GRADING = """You are a clinical oncology researcher. Evaluate 5-year recurrence risk by grading:
	- Tumor Burden (Stage/Grade)
	- Treatment Adequacy (Agents/Duration)
	- Patient Baseline

	Prediction Rule: If cumulative evidence suggests >50% likelihood of recurrence, predict Yes.
	Output Format:
	[Reasoning]
	Decision: [Yes/No]"""

	# -----------------------------
	# 2. AI Backend Function
	# -----------------------------
	def ollama_chat(messages, temperature=0.1):
	endpoint = os.getenv("OLLAMA_ENDPOINT")
	if not endpoint: return "Error: OLLAMA_ENDPOINT not set."

	url = f"{endpoint}/api/chat"
	headers = {"Content-Type": "application/json", "ngrok-skip-browser-warning": "true"}
	payload = {
	"model": "qwen2.5:7b",
	"messages": messages,
	"stream": False,
	"options": {"temperature": float(temperature), "num_ctx": 8192}
	}
	try:
	r = requests.post(url, json=payload, headers=headers, timeout=120)
	return r.json().get("message", {}).get("content", "")
	except Exception as e:
	return f"Error: {str(e)}"

	# -----------------------------
	# 3. Evaluation Engine Logic
	# -----------------------------
	def run_evaluation(cancer_type, strategy):
	data_path = CANCER_MAP.get(cancer_type)
	gt_path = "data/ground_truth_5yr_recurrence.json"

	if not os.path.exists(data_path) or not os.path.exists(gt_path):
	yield "Error: Required data files not found in /data folder."
	return

	with open(data_path, 'r') as f: patient_db = json.load(f)
	with open(gt_path, 'r') as f: all_gt = json.load(f)

	gt_labels = all_gt.get(GT_MAP[cancer_type], {})
	eval_ids = [pid for pid in gt_labels.keys() if pid in patient_db]

	# Map strategy to system prompt
	sys_content = PROMPT_COT if strategy == "Chain-of-Thought" else (PROMPT_GRADING if strategy == "Evidence Grading" else PROMPT_DIRECT)

	tp, tn, fp, fn = 0, 0, 0, 0
	yield f"🚀 Starting {strategy} inference for {len(eval_ids)} patients in {cancer_type}..."

	for i, pid in enumerate(eval_ids):
	actual = gt_labels[pid]
	patient_json = json.dumps(patient_db[pid])

	msgs = [{"role": "system", "content": sys_content}, {"role": "user", "content": f"Patient Data: {patient_json}"}]
	raw_res = ollama_chat(msgs).strip().upper()

	# Robust Parsing
	if strategy == "Direct":
	pred = "Yes" if "YES" in raw_res[:10] else "No"
	elif strategy == "Chain-of-Thought":
	pred = "Yes" if "FINAL_PREDICTION: YES" in raw_res else "No"
	else: # Evidence Grading
	pred = "Yes" if "DECISION: YES" in raw_res else "No"

	if pred == "Yes" and actual == "Yes": tp += 1
	elif pred == "No" and actual == "No": tn += 1
	elif pred == "Yes" and actual == "No": fp += 1
	else: fn += 1

	if (i + 1) % 5 == 0:
	yield f"🔄 Progress: {i+1}/{len(eval_ids)} patients processed..."

	# Metrics
	total = len(eval_ids)
	acc = (tp + tn) / total if total > 0 else 0
	sens = tp / (tp + fn) if (tp + fn) > 0 else 0
	spec = tn / (tn + fp) if (tn + fp) > 0 else 0

	yield f"""
	## {strategy} Strategy Results: {cancer_type}
	- Accuracy: {acc:.2%}
	- Sensitivity (Recall): {sens:.2%}
	- Specificity: {spec:.2%}

	Confusion Matrix:
	\| \| Predicted YES \| Predicted NO \|
	\|---\|---\|---\|
	\| Actual YES \| {tp} (TP) \| {fn} (FN) \|
	\| Actual NO \| {fp} (FP) \| {tn} (TN) \|
	"""

	# -----------------------------
	# 4. Helper UI Logic (Chat)
	# -----------------------------
	def load_data(cancer_type):
	path = CANCER_MAP.get(cancer_type)
	with open(path, "r") as f: data = json.load(f)
	ids = sorted([str(k) for k in data.keys()])
	return gr.update(choices=ids, value=ids[0]), data

	def respond(message, history):
	history = history or []
	# Standard System Prompt for Chat
	sys = {"role": "system", "content": "You are an oncology assistant. Summarize the case and predict outcomes."}
	res = ollama_chat([sys] + history + [{"role": "user", "content": message}])
	history.append({"role": "user", "content": message})
	history.append({"role": "assistant", "content": res})
	return "", history

	# -----------------------------
	# 5. UI Layout
	# -----------------------------
	with gr.Blocks(theme=gr.themes.Soft()) as demo:
	gr.Markdown("# Oncology Research Platform")
	full_data_state = gr.State({})

	with gr.Tabs():
	# TAB 1: Evaluation Engine
	with gr.TabItem("🔬 Performance Metrics"):
	gr.Markdown("### Zero-Shot Inference Experiments")
	with gr.Row():
	e_type = gr.Dropdown(label="Cancer Type", choices=list(CANCER_MAP.keys()), value="Uterine Cancer")
	e_strat = gr.Dropdown(label="Prompting Strategy", choices=["Direct", "Chain-of-Thought", "Evidence Grading"], value="Direct")
	run_btn = gr.Button("Start Experiment", variant="primary")
	results_md = gr.Markdown("Select criteria and start to see metrics.")

	# TAB 2: Clinical Assistant
	with gr.TabItem("💬 Clinical Assistant"):
	with gr.Row():
	with gr.Column(scale=1):
	c_select = gr.Dropdown(label="Cancer Type", choices=list(CANCER_MAP.keys()), value="Uterine Cancer")
	p_select = gr.Dropdown(label="Patient ID")
	with gr.Column(scale=2):
	chat = gr.Chatbot(height=400)
	msg = gr.Textbox(label="Patient JSON / Message")
	send = gr.Button("Analyze")

	# Bindings
	run_btn.click(run_evaluation, [e_type, e_strat], results_md)
	c_select.change(load_data, c_select, [p_select, full_data_state])
	p_select.change(lambda p, d: json.dumps(d.get(p), indent=2), [p_select, full_data_state], msg)
	send.click(respond, [msg, chat], [msg, chat])

	demo.load(load_data, c_select, [p_select, full_data_state])

	demo.launch()

	# import json
	# import os
	# import requests
	# import gradio as gr
	# import pandas as pd

	# # -----------------------------
	# # 1. Configuration & Data Mapping
	# # -----------------------------
	# CANCER_MAP = {
	# "Uterine Cancer": "data/ucec_combined_data.json",
	# "Breast Cancer": "data/brca_combined_data.json",
	# "Lung Cancer": "data/luad_combined_data.json",
	# "Bladder Cancer": "data/blca_combined_data.json",
	# "Head and Neck Cancer": "data/hnsc_combined_data.json",
	# }

	# # Map for the Ground Truth JSON keys
	# GT_MAP = {
	# "Uterine Cancer": "UCEC",
	# "Breast Cancer": "BRCA",
	# "Lung Cancer": "LUAD",
	# "Bladder Cancer": "BLCA",
	# "Head and Neck Cancer": "HNSC",
	# }

	# COMMON_AGENTS = ["Carboplatin", "Paclitaxel", "Cisplatin", "Gemcitabine", "Doxorubicin", "Other"]

	# # -----------------------------
	# # 2. AI Backend Function
	# # -----------------------------
	# def ollama_chat(messages, temperature=0.1):
	# endpoint = os.getenv("OLLAMA_ENDPOINT")
	# if not endpoint: return "Error: Endpoint not set."

	# url = f"{endpoint}/api/chat"
	# headers = {"Content-Type": "application/json", "ngrok-skip-browser-warning": "true"}
	# payload = {
	# "model": "qwen2.5:7b",
	# "messages": messages,
	# "stream": False,
	# "options": {"temperature": float(temperature), "num_ctx": 8192}
	# }
	# try:
	# r = requests.post(url, json=payload, headers=headers, timeout=120)
	# return r.json().get("message", {}).get("content", "")
	# except: return "Connection Error"

	# # -----------------------------
	# # 3. Evaluation Logic
	# # -----------------------------
	# def run_evaluation(cancer_type):
	# # 1. Load Data
	# data_path = CANCER_MAP.get(cancer_type)
	# gt_path = "data/ground_truth_5yr_recurrence.json"

	# if not os.path.exists(data_path) or not os.path.exists(gt_path):
	# return "Error: Missing data or ground truth files."

	# with open(data_path, 'r') as f: patient_db = json.load(f)
	# with open(gt_path, 'r') as f: all_gt = json.load(f)

	# gt_labels = all_gt.get(GT_MAP[cancer_type], {})

	# # 2. Filter patients present in both
	# eval_ids = [pid for pid in gt_labels.keys() if pid in patient_db]

	# results = []
	# tp, tn, fp, fn = 0, 0, 0, 0

	# yield f"Starting inference for {len(eval_ids)} patients in {cancer_type}..."

	# for i, pid in enumerate(eval_ids):
	# actual = gt_labels[pid] # "Yes" or "No"
	# patient_json = json.dumps(patient_db[pid])

	# # Zero-shot prompt
	# eval_prompt = [
	# {"role": "system", "content": "You are an oncology expert. Predict 5-year recurrence based ONLY on the provided JSON. Respond strictly with 'Yes' or 'No' and nothing else."},
	# {"role": "user", "content": f"Patient Data: {patient_json}"}
	# ]

	# prediction_raw = ollama_chat(eval_prompt).strip()
	# # Simple parser to find Yes/No in response
	# prediction = "Yes" if "yes" in prediction_raw.lower() else "No"

	# # Calculate Metrics
	# if prediction == "Yes" and actual == "Yes": tp += 1
	# elif prediction == "No" and actual == "No": tn += 1
	# elif prediction == "Yes" and actual == "No": fp += 1
	# elif prediction == "No" and actual == "Yes": fn += 1

	# if i % 5 == 0:
	# yield f"Processed {i+1}/{len(eval_ids)} patients..."

	# # 3. Final Metric Calculation
	# acc = (tp + tn) / len(eval_ids) if eval_ids else 0
	# sens = tp / (tp + fn) if (tp + fn) > 0 else 0
	# spec = tn / (tn + fp) if (tn + fp) > 0 else 0

	# summary = f"""
	# ### Evaluation Results: {cancer_type}
	# - Total Patients Processed: {len(eval_ids)}
	# - Unweighted Accuracy: {acc:.2%}
	# - Sensitivity (Recall): {sens:.2%}
	# - Specificity: {spec:.2%}

	# Confusion Matrix: TP={tp}, TN={tn}, FP={fp}, FN={fn}
	# """
	# yield summary

	# # -----------------------------
	# # 4. UI Layout (Modified)
	# # -----------------------------
	# with gr.Blocks(title="OncoRisk Eval & Demo") as demo:
	# gr.HTML('<div style="text-align:center"><h1>Oncology Risk Assistant</h1></div>')

	# with gr.Tabs():
	# # Tab 1: Your original Chat/Simulation UI
	# with gr.TabItem("Clinical Assistant"):
	# with gr.Row():
	# with gr.Column(scale=1):
	# cancer_select = gr.Dropdown(label="Select Cancer Type", choices=list(CANCER_MAP.keys()))
	# patient_select = gr.Dropdown(label="Select Patient ID")
	# submit_btn = gr.Button("Analyze Case", variant="primary")
	# missing_output = gr.HighlightedText(label="Completeness")
	# with gr.Column(scale=2):
	# chatbot = gr.Chatbot(height=500)
	# msg_input = gr.Textbox(label="Input Box", lines=5)

	# # Tab 2: NEW Evaluation Engine
	# with gr.TabItem("Performance Metrics (Zero-Shot)"):
	# gr.Markdown("### Run Zero-Shot Inference on Ground Truth")
	# eval_cancer_type = gr.Dropdown(label="Select Cancer for Evaluation", choices=list(CANCER_MAP.keys()))
	# run_eval_btn = gr.Button("Start Experiment", variant="secondary")
	# eval_results = gr.Markdown("Results will appear here after inference...")

	# # Logic for Evaluation
	# run_eval_btn.click(fn=run_evaluation, inputs=eval_cancer_type, outputs=eval_results)

	# # (Keep your existing Event Logic for Chat/Data Selection here...)
	# # ... [Same as your provided code] ...

	# demo.launch()