Spaces:

gumannic
/

neuroscan

Running

App Files Files Community

neuroscan / app.py

gumannic

Update app.py

20df99f verified 18 days ago

raw

history blame contribute delete

14.4 kB

	import gradio as gr
	import numpy as np
	import pandas as pd
	import joblib
	import json
	import os
	import requests
	from PIL import Image, ImageFilter
	import torch
	import torch.nn as nn
	from torchvision import transforms, models

	import sys

	RF_MODEL = joblib.load("src/rf_model.pkl")
	LR_MODEL = joblib.load("src/lr_model.pkl")
	SCALER = joblib.load("src/scaler.pkl")

	with open("src/prompts.json") as f:
	PROMPT_TEMPLATES = json.load(f)

	OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "")

	CV_MODEL = None
	CV_CLASS_NAMES_RAW = None
	CV_TRANSFORM = None
	CV_MODEL_LOADED = False

	def load_cv_model():
	global CV_MODEL, CV_CLASS_NAMES_RAW, CV_TRANSFORM, CV_MODEL_LOADED
	model_path = "src/cv_model.pth"
	if not os.path.exists(model_path):
	print(f"[CV] Model file not found at {model_path} — using fallback heuristic")
	return False
	try:
	checkpoint = torch.load(model_path, map_location="cpu", weights_only=False)
	CV_CLASS_NAMES_RAW = checkpoint["class_names"]
	img_size = checkpoint.get("img_size", 224)

	model = models.mobilenet_v2()
	model.classifier = nn.Sequential(
	nn.Dropout(0.3),
	nn.Linear(model.last_channel, 128),
	nn.ReLU(),
	nn.Dropout(0.2),
	nn.Linear(128, len(CV_CLASS_NAMES_RAW))
	)
	model.load_state_dict(checkpoint["model_state_dict"])
	model.eval()

	CV_MODEL = model
	CV_TRANSFORM = transforms.Compose([
	transforms.Resize((img_size, img_size)),
	transforms.ToTensor(),
	transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
	])
	CV_MODEL_LOADED = True
	print(f"[CV] Model loaded successfully — classes: {CV_CLASS_NAMES_RAW}")
	return True
	except Exception as e:
	print(f"[CV] Failed to load model: {e}")
	return False

	load_cv_model()

	FEATURES = [
	"age", "sex", "headache_freq", "seizures", "vision_problems",
	"nausea", "memory_issues", "speech_issues", "family_history",
	"prior_radiation", "symptom_score",
	"age_over_50", "age_over_65",
	"motor_neuro_score", "sensory_score", "risk_factor_count",
	]

	CV_CLASSES = ["Glioma", "Meningioma", "Pituitary", "No Tumor"]

	CLASS_NAME_MAP = {
	"glioma": "Glioma",
	"meningioma": "Meningioma",
	"pituitary": "Pituitary",
	"notumor": "No Tumor",
	"no_tumor": "No Tumor",
	"Glioma": "Glioma",
	"Meningioma": "Meningioma",
	"Pituitary": "Pituitary",
	"No Tumor": "No Tumor",
	}


	def preprocess_mri(image):
	if image is None:
	return None
	img = image.convert("L").resize((224, 224))
	img = img.filter(ImageFilter.GaussianBlur(0.8))
	return np.array(img, dtype=np.float32) / 255.0


	def cv_predict_cnn(image):
	"""Real CNN inference using trained MobileNetV2."""
	img_rgb = image.convert("RGB")
	tensor = CV_TRANSFORM(img_rgb).unsqueeze(0)
	with torch.no_grad():
	logits = CV_MODEL(tensor)
	probs = torch.softmax(logits, dim=1)[0].numpy()

	all_probs = {}
	for raw_name, p in zip(CV_CLASS_NAMES_RAW, probs):
	clean_name = CLASS_NAME_MAP.get(raw_name, raw_name)
	all_probs[clean_name] = round(float(p), 3)

	for c in CV_CLASSES:
	if c not in all_probs:
	all_probs[c] = 0.0

	all_probs = {c: all_probs.get(c, 0.0) for c in CV_CLASSES}

	pred_label = max(all_probs, key=all_probs.get)
	confidence = all_probs[pred_label]
	return pred_label, round(confidence, 3), all_probs


	def cv_predict_heuristic(image):
	"""Fallback heuristic when no trained model is available."""
	arr = preprocess_mri(image)
	h, w = arr.shape
	cy, cx = h // 2, w // 2
	yy, xx = np.ogrid[:h, :w]
	brain_radius = min(h, w) * 0.42
	brain_mask = ((yy - cy) 2 + (xx - cx) 2) < brain_radius ** 2
	brain_pixels = arr[brain_mask]

	if brain_pixels.size == 0:
	return "No Tumor", 0.5, {"Glioma": 0.0, "Meningioma": 0.0, "Pituitary": 0.0, "No Tumor": 1.0}

	p99 = np.percentile(brain_pixels, 99)
	p75 = np.percentile(brain_pixels, 75)
	p50 = np.percentile(brain_pixels, 50)
	contrast = p99 - p50

	threshold = max(p99 * 0.87, p75 + 0.07)
	bright_mask = (arr > threshold) & brain_mask
	bright_count = int(bright_mask.sum())
	brain_area = int(brain_mask.sum())
	bright_ratio = bright_count / max(brain_area, 1)

	if bright_count > 5:
	ys_b, xs_b = np.where(bright_mask)
	spread = (ys_b.std() + xs_b.std()) / 2
	focality = 1.0 / (1.0 + spread / 25)
	else:
	focality = 0.0

	has_focal_bright = (
	0.004 < bright_ratio < 0.22
	and contrast > 0.18
	and focality > 0.42
	)

	if not has_focal_bright:
	return "No Tumor", 0.82, {
	"Glioma": 0.06, "Meningioma": 0.05, "Pituitary": 0.05, "No Tumor": 0.84
	}

	ys, xs = np.where(bright_mask)
	mean_y = ys.mean()
	mean_x = xs.mean()
	dist_from_center = np.sqrt((mean_y - cy) 2 + (mean_x - cx) 2)
	dist_norm = dist_from_center / brain_radius
	tumor_conf = float(np.clip(0.55 + contrast * 0.8 + bright_ratio * 1.5, 0.55, 0.92))

	if dist_norm < 0.25:
	pred = "Pituitary"
	scores = [0.08, 0.06, tumor_conf, 1 - tumor_conf - 0.14]
	elif dist_norm > 0.55:
	pred = "Meningioma"
	scores = [0.10, tumor_conf, 0.05, 1 - tumor_conf - 0.15]
	else:
	pred = "Glioma"
	scores = [tumor_conf, 0.10, 0.05, 1 - tumor_conf - 0.15]

	scores = np.clip(scores, 0.02, 1.0)
	scores = scores / scores.sum()
	all_probs = {c: round(float(s), 3) for c, s in zip(CV_CLASSES, scores)}
	return pred, round(float(scores[CV_CLASSES.index(pred)]), 3), all_probs


	def cv_predict(image):
	if image is None:
	return "No image uploaded", 0.0, {c: 0.0 for c in CV_CLASSES}

	if CV_MODEL_LOADED:
	return cv_predict_cnn(image)
	else:
	return cv_predict_heuristic(image)


	def engineer_features(age, sex, headache_freq, seizures, vision, nausea,
	memory, speech, family, radiation):
	symptom_score = (
	headache_freq * 0.5 + seizures * 3 + vision * 2 + nausea * 1 +
	memory * 2 + speech * 2.5 + family * 1.5 + radiation * 2 +
	(age > 50) * 1
	)
	row = pd.DataFrame([[
	age, sex, headache_freq, seizures, vision, nausea, memory, speech,
	family, radiation, symptom_score,
	int(age > 50), int(age > 65),
	seizures + speech + memory,
	vision + headache_freq * 0.3,
	int(family) + int(radiation) + int(age > 65),
	]], columns=FEATURES)
	return row, round(symptom_score, 2)


	def ml_predict(age, sex, headache_freq, seizures, vision, nausea,
	memory, speech, family, radiation, model_choice):
	row, symptom_score = engineer_features(
	age, sex, headache_freq, seizures, vision, nausea,
	memory, speech, family, radiation
	)
	row_s = pd.DataFrame(SCALER.transform(row), columns=FEATURES)

	if model_choice == "Logistic Regression":
	prob = LR_MODEL.predict_proba(row_s)[0][1]
	else:
	prob = RF_MODEL.predict_proba(row)[0][1]

	label = "High Risk" if prob >= 0.5 else "Low Risk"
	return label, round(float(prob), 3), round(symptom_score, 2)


	def build_prompt(style, cv_pred, cv_conf, all_probs, risk_label, risk_prob,
	age, sex_val, headache_freq, seizures, vision, nausea,
	memory, speech, family, radiation, symptom_score, model_used):
	sex_str = "Male" if sex_val else "Female"
	hist = (
	f"Age: {age} \| Sex: {sex_str}\n"
	f"Headache frequency: {headache_freq}x/week\n"
	f"Seizures: {'Yes' if seizures else 'No'} \| "
	f"Vision problems: {'Yes' if vision else 'No'}\n"
	f"Memory issues: {'Yes' if memory else 'No'} \| "
	f"Speech issues: {'Yes' if speech else 'No'}\n"
	f"Family history: {'Yes' if family else 'No'} \| "
	f"Prior radiation: {'Yes' if radiation else 'No'}\n"
	f"Composite symptom score: {symptom_score}"
	)
	if style == "Minimal (Prompt A)":
	return (
	f"You are a radiologist assistant. Write a brief 3-4 sentence radiology report.\n\n"
	f"MRI: {cv_pred} ({cv_conf:.0%} confidence)\n"
	f"Clinical Risk: {risk_label} ({risk_prob:.0%})\n"
	f"Patient: {hist}\n\nBe concise and clinical."
	)
	else:
	probs_str = " \| ".join(f"{k}: {v:.0%}" for k, v in all_probs.items())
	return (
	f"You are a senior neuroradiologist writing a formal MRI report for a referring neurologist.\n\n"
	f"IMAGING FINDINGS:\n"
	f" MRI Classification: {cv_pred} ({cv_conf:.0%} confidence)\n"
	f" Differential probabilities — {probs_str}\n\n"
	f"CLINICAL RISK ASSESSMENT (ML Model — {model_used}):\n"
	f" Risk Category: {risk_label} ({risk_prob:.0%} probability)\n\n"
	f"PATIENT HISTORY:\n{hist}\n\n"
	f"Write a structured radiology report with:\n"
	f"1. Imaging Interpretation\n"
	f"2. Clinical Correlation\n"
	f"3. Differential Diagnosis\n"
	f"4. Recommended Next Steps\n\n"
	f"Use precise medical language. Be objective and evidence-based."
	)


	def call_openai(prompt):
	key = OPENAI_API_KEY or os.environ.get("OPENAI_API_KEY", "")
	if not key:
	return (
	"⚠️ No API key set.\n\n"
	"Add OPENAI_API_KEY as a Space secret to enable AI reports.\n\n"
	f"Prompt that would be sent:\n\n---\n{prompt[:400]}...\n---"
	)
	headers = {
	"Authorization": f"Bearer {key}",
	"Content-Type": "application/json",
	}
	body = {
	"model": "gpt-4o-mini",
	"max_tokens": 600,
	"messages": [{"role": "user", "content": prompt}],
	}
	try:
	resp = requests.post(
	"https://api.openai.com/v1/chat/completions",
	headers=headers, json=body, timeout=25
	)
	return resp.json()["choices"][0]["message"]["content"]
	except Exception as e:
	return f"[API error: {e}]"


	def run_analysis(image, age, sex_val, headache_freq, seizures, vision, nausea,
	memory, speech, family, radiation, ml_model, prompt_style):

	cv_pred, cv_conf, all_probs = cv_predict(image)
	risk_label, risk_prob, symptom_score = ml_predict(
	age, int(sex_val), headache_freq, int(seizures), int(vision),
	int(nausea), int(memory), int(speech), int(family), int(radiation),
	ml_model
	)

	cv_out = (
	f"MRI Classification: {cv_pred} \n"
	f"Confidence: {cv_conf:.1%} \n\n"
	"Class Probabilities: \n" +
	"\n".join(f"- {k}: {v:.1%}" for k, v in all_probs.items())
	)

	ml_out = (
	f"Clinical Risk: {risk_label} \n"
	f"Probability: {risk_prob:.1%} \n"
	f"Symptom Score: {symptom_score} \n"
	f"Model: {ml_model}"
	)

	prompt = build_prompt(
	prompt_style, cv_pred, cv_conf, all_probs,
	risk_label, risk_prob, age, int(sex_val),
	headache_freq, int(seizures), int(vision), int(nausea),
	int(memory), int(speech), int(family), int(radiation),
	symptom_score, ml_model
	)
	report = call_openai(prompt)

	return cv_out, ml_out, report


	with gr.Blocks(title="NeuroScan AI", theme=gr.themes.Soft()) as demo:
	gr.Markdown("# 🧠 NeuroScan AI — Brain Tumor Detection Assistant")
	gr.Markdown(
	"Upload a brain MRI scan and enter patient details. "
	"The system classifies the scan (CV), assesses clinical risk (ML), "
	"and generates a radiology report (NLP)."
	)

	with gr.Row():
	with gr.Column(scale=1):
	gr.Markdown("### 🧠 MRI Scan")
	image_input = gr.Image(type="pil", label="Upload Brain MRI Image")

	gr.Markdown("### 🧑‍⚕️ Patient Details")
	age = gr.Slider(18, 90, value=55, step=1, label="Age")
	sex_val = gr.Checkbox(label="Male", value=True)
	headache_freq = gr.Slider(0, 7, value=3, step=1, label="Headaches (days/week)")
	seizures = gr.Checkbox(label="Seizures")
	vision = gr.Checkbox(label="Vision Problems")
	nausea = gr.Checkbox(label="Nausea")
	memory = gr.Checkbox(label="Memory Issues")
	speech = gr.Checkbox(label="Speech Issues")
	family = gr.Checkbox(label="Family History of Brain Tumor")
	radiation = gr.Checkbox(label="Prior Radiation Therapy")

	gr.Markdown("### ⚙️ Settings")
	ml_model = gr.Radio(
	["Random Forest", "Logistic Regression"],
	value="Random Forest", label="Clinical Risk Model"
	)
	prompt_style = gr.Radio(
	["Minimal (Prompt A)", "Structured (Prompt B)"],
	value="Structured (Prompt B)", label="Report Style"
	)
	run_btn = gr.Button("🔍 Analyse", variant="primary", size="lg")

	with gr.Column(scale=1):
	gr.Markdown("### 🧠 CV — MRI Classification")
	cv_output = gr.Markdown(value="Upload an MRI and click Analyse")

	gr.Markdown("### 📊 ML — Clinical Risk Assessment")
	ml_output = gr.Markdown(value="Fill in patient details and click Analyse")

	gr.Markdown("### 📝 NLP — Radiology Report")
	nlp_output = gr.Textbox(
	label="Generated Report",
	lines=16,
	interactive=False,
	placeholder="Report will appear here after analysis..."
	)

	run_btn.click(
	fn=run_analysis,
	inputs=[image_input, age, sex_val, headache_freq, seizures,
	vision, nausea, memory, speech, family, radiation,
	ml_model, prompt_style],
	outputs=[cv_output, ml_output, nlp_output]
	)

	gr.Markdown("---")
	gr.Markdown(
	"Blocks: Computer Vision (ResNet18) · ML Numeric Data (LR/RF) · NLP (Claude API) \n"
	"Data: Kaggle Brain Tumor MRI Dataset · Synthetic Patient Data \n"
	"⚠️ Research / student project only. Not for clinical use."
	)

	if __name__ == "__main__":
	demo.launch()