Spaces:

Ephraimmm
/

Naija_MedModel

Running on Zero

App Files Files Community

Naija_MedModel / app.py

Ephraimmm

Update app.py

044b2dd verified 3 days ago

raw

history blame contribute delete

35.8 kB

	"""
	NaijaMedModel v1.0 — ZeroGPU (H200) Gradio Space

	Changes vs v1:
	- Fixed: system prompt was a tuple not a string (caused Jinja2 crash).
	- Fixed: gender leaking into SOAP output.
	Layer 1 — system prompt explicitly forbids gender inference.
	Layer 2 — user turn repeats the prohibition.
	Layer 3 — _strip_gender() post-processes the raw model output
	with regex before rendering, catching anything the model
	still writes despite the instructions.
	"""

	import torch, os, tempfile, re

	_orig_load = torch.load
	def _patched_load(f, args, *kwargs):
	kwargs["weights_only"] = False
	return _orig_load(f, args, *kwargs)
	torch.load = _patched_load

	try:
	from huggingface_hub.utils import _validators as _hf_v
	_orig_smooth = _hf_v.smoothly_deprecate_legacy_arguments
	def _patched_smooth(fn_name, kwargs):
	if "use_auth_token" in kwargs:
	v = kwargs.pop("use_auth_token")
	if "token" not in kwargs and v is not None:
	kwargs["token"] = v
	return _orig_smooth(fn_name, kwargs)
	_hf_v.smoothly_deprecate_legacy_arguments = _patched_smooth
	except Exception:
	pass

	import numpy as np
	import soundfile as sf
	from scipy.signal import resample
	import gradio as gr
	import spaces

	ASR_EN_MODEL = "Ephraimmm/asrfinetuned"
	ASR_YO_MODEL = "NCAIR1/Yoruba-ASR"
	WHISPER_MODEL = "openai/whisper-large-v3"
	TRANSLATE_MODEL = "Helsinki-NLP/opus-mt-yo-en"
	DIAR_MODEL = "pyannote/speaker-diarization-3.1"
	SOAP_MODEL = "Edifon/SOAP_SFT_V1"
	HF_TOKEN = os.environ.get("HF_TOKEN", "")

	ROLE_OPTIONS = ["Doctor", "Patient", "Parent / Guardian", "Nurse", "Interpreter", "Other"]
	ORDER_OPTIONS = ["1st speaker", "2nd speaker", "3rd speaker", "4th speaker"]

	_models: dict = {}


	# ══════════════════════════════════════════════════════════════════════════
	# MODEL LOADING
	# ══════════════════════════════════════════════════════════════════════════

	def _get_models() -> dict:
	if _models:
	return _models

	from transformers import (
	pipeline as hf_pipeline,
	AutoProcessor,
	AutoModelForSpeechSeq2Seq,
	MarianMTModel,
	MarianTokenizer,
	AutoTokenizer,
	AutoModelForCausalLM,
	)
	from pyannote.audio import Pipeline as DiarizationPipeline
	import whisper as _whisper

	print("⏳ Loading English ASR…")
	_models["asr_en"] = hf_pipeline(
	"automatic-speech-recognition", model=ASR_EN_MODEL,
	device="cuda", token=HF_TOKEN,
	)

	print("⏳ Loading Yoruba ASR…")
	_models["yo_processor"] = AutoProcessor.from_pretrained(ASR_YO_MODEL)
	_models["yo_model"] = AutoModelForSpeechSeq2Seq.from_pretrained(
	ASR_YO_MODEL, torch_dtype=torch.float16
	).to("cuda")

	print("⏳ Loading Whisper large-v3…")
	_models["whisper_pipe"] = hf_pipeline(
	"automatic-speech-recognition", model=WHISPER_MODEL,
	torch_dtype=torch.float16, device="cuda",
	generate_kwargs={"task": "translate", "language": None},
	)

	print("⏳ Loading Yoruba→English MT…")
	try:
	_models["mt_tokenizer"] = MarianTokenizer.from_pretrained(TRANSLATE_MODEL)
	_models["mt_model"] = MarianMTModel.from_pretrained(
	TRANSLATE_MODEL, torch_dtype=torch.float16
	).to("cuda")
	except Exception as e:
	print(f"⚠️ MT skipped: {e}")
	_models["mt_tokenizer"] = None
	_models["mt_model"] = None

	print("⏳ Loading diarization…")
	diar = DiarizationPipeline.from_pretrained(DIAR_MODEL, use_auth_token=HF_TOKEN)
	_models["diar"] = diar.to(torch.device("cuda"))

	print("⏳ Loading Whisper tiny…")
	_models["lang_model"] = _whisper.load_model("tiny", device="cuda")

	print("⏳ Loading SOAP model…")
	_models["soap_processor"] = AutoTokenizer.from_pretrained(SOAP_MODEL, token=HF_TOKEN)
	_models["soap_model"] = AutoModelForCausalLM.from_pretrained(
	SOAP_MODEL, torch_dtype=torch.bfloat16, device_map="cuda", token=HF_TOKEN,
	)

	_models["whisper_lib"] = _whisper
	print("✅ All models ready.")
	return _models


	# ══════════════════════════════════════════════════════════════════════════
	# SPEAKER MAPPING — order-based
	# ══════════════════════════════════════════════════════════════════════════

	def _get_chunk(arr, start, end, sr=16000):
	return arr[int(start * sr): int(end * sr)]


	def _build_speaker_map(segs: list[dict], speaker_config: list[dict]) -> dict[str, str]:
	"""
	pyannote 3.x labels clusters in strict order of first appearance:
	SPEAKER_00 = first voice heard, SPEAKER_01 = second, etc.
	We sort the user config by their chosen speaking order and zip.
	"""
	all_clusters = sorted({s["speaker"] for s in segs})
	sorted_cfg = sorted(speaker_config, key=lambda c: c["order"])

	sp_map: dict[str, str] = {}
	for cluster, cfg in zip(all_clusters, sorted_cfg):
	sp_map[cluster] = cfg["role"]

	for i, cluster in enumerate(all_clusters):
	if cluster not in sp_map:
	sp_map[cluster] = f"Speaker {i + 1}"

	return sp_map


	# ══════════════════════════════════════════════════════════════════════════
	# GENDER STRIPPING — Layer 3 (deterministic safety net)
	# ══════════════════════════════════════════════════════════════════════════

	def _strip_gender(text: str) -> str:
	"""
	Remove gendered language the SOAP model writes despite prompt instructions.
	This is a hard post-processing pass — it does not rely on model compliance.
	Applied to the raw model output before any markdown conversion.
	"""
	# Remove demographic descriptors: "a male", "a female", "a young man", etc.
	text = re.sub(
	r',?\s*\ban?\s+(male\|female\|man\|woman\|boy\|girl\|gentleman\|lady)\b',
	'', text, flags=re.IGNORECASE
	)
	# Remove age+gender combos: "35-year-old male", "elderly woman"
	text = re.sub(
	r'\b\d+[-\s]year[-\s]old\s+(male\|female\|man\|woman\|boy\|girl)\b',
	'', text, flags=re.IGNORECASE
	)
	text = re.sub(
	r'\b(elderly\|young\|middle[-\s]aged)\s+(male\|female\|man\|woman\|boy\|girl)\b',
	'', text, flags=re.IGNORECASE
	)
	# Replace gendered pronouns
	text = re.sub(r'\bhe\b', 'the patient', text, flags=re.IGNORECASE)
	text = re.sub(r'\bshe\b', 'the patient', text, flags=re.IGNORECASE)
	text = re.sub(r'\bhis\b', "the patient's", text, flags=re.IGNORECASE)
	text = re.sub(r'\bher\b', "the patient's", text, flags=re.IGNORECASE)
	text = re.sub(r'\bhim\b', 'the patient', text, flags=re.IGNORECASE)
	# Clean up any double spaces left behind
	text = re.sub(r' +', ' ', text)
	text = re.sub(r' ,', ',', text)
	return text.strip()


	# ══════════════════════════════════════════════════════════════════════════
	# HELPERS
	# ══════════════════════════════════════════════════════════════════════════

	def _clean_fillers(text: str) -> str:
	return re.sub(r'\b(emm?\|erm+\|uhh?\|umm?\|ah+)\b', '', text, flags=re.IGNORECASE).strip()


	def to_markdown(text: str) -> str:
	sections = {
	"S:": "## Subjective",
	"O:": "## Objective",
	"A:": "## Assessment",
	"P:": "## Plan",
	}
	for tag, heading in sections.items():
	text = re.sub(rf"(?i){re.escape(tag)}", f"\n\n{heading}\n", text)
	return text.strip()


	def _run_whisper_translate(m: dict, chunk: np.ndarray) -> str:
	mdl = m["whisper_pipe"].model
	proc = m["whisper_pipe"].feature_extractor
	tok = m["whisper_pipe"].tokenizer
	inp = proc(chunk, sampling_rate=16000, return_tensors="pt").input_features
	inp = inp.to("cuda", dtype=torch.float16)
	with torch.no_grad():
	ids = mdl.generate(inp, task="translate", language=None, return_timestamps=False)
	return tok.batch_decode(ids, skip_special_tokens=True)[0].strip()


	# ══════════════════════════════════════════════════════════════════════════
	# MAIN GPU PIPELINE
	# ══════════════════════════════════════════════════════════════════════════

	@spaces.GPU(duration=300)
	def run_pipeline(wav_path: str, speaker_config: list[dict]) -> str:
	m = _get_models()
	_whisper = m["whisper_lib"]

	num_speakers = len(speaker_config) if speaker_config else None
	diar_kwargs = {"num_speakers": num_speakers} if num_speakers else {}
	diar_result = m["diar"](wav_path, **diar_kwargs)

	segs = [
	{"start": t.start, "end": t.end, "speaker": spk}
	for t, _, spk in diar_result.itertracks(yield_label=True)
	]
	if not segs:
	return "⚠️ No speakers detected in the recording."

	arr, _ = sf.read(wav_path)
	arr = arr.astype(np.float32)

	sp_map = _build_speaker_map(segs, speaker_config)

	# ── ASR per segment ────────────────────────────────────────────────────
	transcript: list[dict] = []
	for seg in segs:
	chunk = _get_chunk(arr, seg["start"], seg["end"])
	if len(chunk) < 16000 * 0.5:
	continue

	padded = _whisper.pad_or_trim(chunk)
	mel = _whisper.log_mel_spectrogram(padded).to("cuda")
	_, probs = m["lang_model"].detect_language(mel)
	lang = max(probs, key=probs.get)

	if lang == "yo":
	inputs = m["yo_processor"](chunk, sampling_rate=16000, return_tensors="pt")
	inputs = {
	k: v.to("cuda", dtype=torch.float16) if v.dtype == torch.float32 else v.to("cuda")
	for k, v in inputs.items()
	}
	with torch.no_grad():
	ids = m["yo_model"].generate(**inputs)
	yo_text = m["yo_processor"].batch_decode(ids, skip_special_tokens=True)[0].strip()

	if m["mt_model"]:
	tokens = m["mt_tokenizer"]([yo_text], return_tensors="pt", padding=True)
	tokens = {k: v.to("cuda") for k, v in tokens.items()}
	with torch.no_grad():
	out = m["mt_model"].generate(**tokens)
	en_text = m["mt_tokenizer"].batch_decode(out, skip_special_tokens=True)[0].strip()
	else:
	en_text = _run_whisper_translate(m, chunk)

	transcript.append({**seg, "language": "yo", "text": en_text, "translated": True})

	elif lang != "en":
	en_text = _run_whisper_translate(m, chunk)
	transcript.append({**seg, "language": lang, "text": en_text, "translated": True})

	else:
	fe = m["asr_en"].feature_extractor
	inputs = fe(chunk, sampling_rate=16000, return_tensors="pt",
	truncation=True, return_attention_mask=True)
	inputs = {k: v.to("cuda") for k, v in inputs.items()}
	with torch.no_grad():
	ids = m["asr_en"].model.generate(
	inputs["input_features"],
	attention_mask=inputs["attention_mask"],
	generation_config=m["asr_en"].model.generation_config,
	language="english",
	)
	en_text = m["asr_en"].tokenizer.batch_decode(ids, skip_special_tokens=True)[0].strip()
	transcript.append({**seg, "language": "en", "text": en_text, "translated": False})

	if not transcript:
	return "⚠️ No speech could be transcribed from the recording."

	# ── Build flat transcript for SOAP prompt ──────────────────────────────
	flat = "\n".join(
	f"{sp_map.get(s['speaker'], s['speaker'])}: {_clean_fillers(s['text'])}"
	for s in transcript
	)

	roles_present = sorted(set(sp_map.values()))
	roles_desc = ", ".join(roles_present)

	_role_instructions: dict[str, str] = {
	"Doctor":
	"The Doctor's questions, examinations and clinical reasoning drive the Assessment and Plan.",
	"Patient":
	"The Patient's reported symptoms, history and concerns form the core of the Subjective section.",
	"Parent / Guardian":
	"The Parent/Guardian provides collateral history on behalf of the patient; "
	"include this under Subjective, clearly noted as collateral.",
	"Nurse":
	"The Nurse may contribute observed vital signs or measurements; include under Objective.",
	"Interpreter":
	"The Interpreter facilitates communication only; do not attribute clinical statements to them.",
	"Other":
	"Include contributions from other speakers only if clinically relevant.",
	}
	role_guidance = "\n".join(
	_role_instructions[r] for r in roles_present if r in _role_instructions
	)

	# ── Layer 1: system prompt — explicit gender prohibition ───────────────
	system_content = (
	"You are an expert medical professor generating medically accurate SOAP notes.\n"
	f"Speakers present: {roles_desc}.\n"
	f"{role_guidance}\n"
	"Output format: use exactly these section labels on their own line: "
	"S:, O:, A:, P: — no markdown, no bullet points inside the headers.\n"
	"CRITICAL RULE — GENDER: You must never state, infer, or imply the gender of any speaker. "
	"Do not use the words male, female, man, woman, boy, girl, gentleman, or lady to describe any speaker. "
	"Do not use gendered pronouns: he, she, his, her, him, hers. "
	"Always refer to the patient as 'the patient' and the clinician as 'the clinician'. "
	"If gender is mentioned anywhere in the transcript, ignore it completely."
	)

	# ── Layer 2: user turn — repeat prohibition so it is in recent context ─
	user_content = (
	"Create a SOAP note for this consultation.\n"
	"Reminder: use no gendered pronouns and no gender descriptors for any speaker. "
	"Write 'the patient' and 'the clinician' throughout.\n\n"
	f"### Transcript:\n{flat}\n"
	)

	tok = m["soap_processor"]
	msgs = [
	{"role": "system", "content": system_content},
	{"role": "user", "content": user_content},
	]

	encoded = tok.apply_chat_template(msgs, add_generation_prompt=True, return_tensors="pt")
	if hasattr(encoded, "input_ids"):
	input_ids = encoded.input_ids.to("cuda")
	elif isinstance(encoded, dict):
	input_ids = encoded["input_ids"].to("cuda")
	else:
	input_ids = torch.tensor(encoded).unsqueeze(0).to("cuda")

	out_ids = m["soap_model"].generate(input_ids, max_new_tokens=2048, do_sample=False)
	soap = tok.decode(out_ids[0][input_ids.shape[1]:], skip_special_tokens=True)

	# ── Layer 3: post-process — deterministic gender stripping ────────────
	soap = _strip_gender(soap)

	return to_markdown(soap)


	# ══════════════════════════════════════════════════════════════════════════
	# GRADIO TOP-LEVEL HANDLER
	# ══════════════════════════════════════════════════════════════════════════

	def _order_str_to_int(order_str: str) -> int:
	return {"1st speaker": 0, "2nd speaker": 1, "3rd speaker": 2, "4th speaker": 3}.get(order_str, 0)


	def process_audio(
	audio_filepath,
	num_speakers: int,
	spk1_role: str, spk1_order: str,
	spk2_role: str, spk2_order: str,
	spk3_role: str, spk3_order: str,
	spk4_role: str, spk4_order: str,
	) -> str:
	if audio_filepath is None:
	return "⚠️ Please upload an audio file before generating."

	raw = [
	{"role": spk1_role, "order": _order_str_to_int(spk1_order)},
	{"role": spk2_role, "order": _order_str_to_int(spk2_order)},
	{"role": spk3_role, "order": _order_str_to_int(spk3_order)},
	{"role": spk4_role, "order": _order_str_to_int(spk4_order)},
	]
	speaker_config = raw[:int(num_speakers)]

	tmp_wav = tempfile.mktemp(suffix=".wav")
	try:
	arr, sr = sf.read(audio_filepath, always_2d=False)
	if sr != 16000:
	arr = resample(arr, int(len(arr) * 16000 / sr))
	if arr.ndim > 1:
	arr = arr.mean(axis=1)
	arr = arr.astype(np.float32)
	sf.write(tmp_wav, arr, 16000)
	except Exception:
	import subprocess
	subprocess.run(
	["ffmpeg", "-i", audio_filepath, "-ar", "16000", "-ac", "1",
	tmp_wav, "-y", "-loglevel", "quiet"],
	check=True,
	)

	try:
	return run_pipeline(tmp_wav, speaker_config)
	finally:
	try:
	os.remove(tmp_wav)
	except Exception:
	pass


	# ══════════════════════════════════════════════════════════════════════════
	# CSS
	# ══════════════════════════════════════════════════════════════════════════

	custom_css = """
	@import url('https://fonts.googleapis.com/css2?family=DM+Sans:ital,wght@0,300;0,400;0,500;0,600&family=DM+Mono:wght@300;400;500&display=swap');

	:root {
	--bg-void: #080c10;
	--bg-card: #111820;
	--bg-input: #0a0f14;
	--border-dim: rgba(0,220,180,0.08);
	--border-bright: rgba(0,220,180,0.28);
	--teal: #00dca4;
	--teal-dim: #00b884;
	--amber: #f5a623;
	--txt-hi: #e8eef4;
	--txt-mid: #8899aa;
	--txt-lo: #4a5d6e;
	--mono: 'DM Mono', monospace;
	--sans: 'DM Sans', sans-serif;
	--r-sm: 6px; --r-md: 10px; --r-lg: 16px; --r-xl: 22px;
	}

	* { box-sizing: border-box; }
	body, .gradio-container, gradio-app, #root {
	background: var(--bg-void) !important;
	font-family: var(--sans) !important;
	color: var(--txt-hi) !important;
	}
	.gradio-container { max-width: 980px !important; margin: 0 auto !important; padding: 0 24px 80px !important; }
	::-webkit-scrollbar { width: 4px; }
	::-webkit-scrollbar-thumb { background: var(--border-bright); border-radius: 99px; }

	/* Hero */
	#hero-wrap { text-align:center; padding:64px 0 48px; position:relative; }
	#hero-wrap::before {
	content:''; position:absolute; top:0; left:50%; transform:translateX(-50%);
	width:560px; height:260px;
	background:radial-gradient(ellipse at 50% 0%,rgba(0,220,164,.09) 0%,transparent 70%);
	pointer-events:none;
	}
	.badge-row { display:flex; align-items:center; justify-content:center; gap:8px; margin-bottom:22px; }
	.badge { display:inline-flex; align-items:center; gap:5px; font-size:11px; font-family:var(--mono); letter-spacing:.06em; text-transform:uppercase; padding:4px 12px; border-radius:99px; border:1px solid; }
	.badge-teal { color:var(--teal); border-color:rgba(0,220,164,.3); background:rgba(0,220,164,.06); }
	.badge-amber { color:var(--amber); border-color:rgba(245,166,35,.3); background:rgba(245,166,35,.06); }
	.badge-v { color:var(--txt-lo); border-color:var(--border-dim); }
	.pulse-dot { width:6px; height:6px; border-radius:50%; background:var(--teal); animation:pulse 2s ease-in-out infinite; display:inline-block; }
	@keyframes pulse { 0%,100%{box-shadow:0 0 0 0 rgba(0,220,164,.6)} 50%{box-shadow:0 0 0 5px transparent} }
	.hero-title { font-size:clamp(32px,5vw,52px); font-weight:300; letter-spacing:-.02em; line-height:1.1; margin:0 0 6px; }
	.hero-title span { font-weight:600; color:var(--teal); }
	.hero-sub { font-size:15px; color:var(--txt-mid); font-weight:300; max-width:520px; margin:12px auto 0; line-height:1.65; }

	/* Stat + pipeline rows */
	.stat-row { display:grid; grid-template-columns:repeat(3,1fr); gap:12px; margin-bottom:28px; }
	.stat-card { background:var(--bg-card); border:1px solid var(--border-dim); border-radius:var(--r-md); padding:18px 20px; text-align:center; transition:border-color .25s; }
	.stat-card:hover { border-color:var(--border-bright); }
	.stat-num { font-size:26px; font-weight:500; color:var(--teal); font-family:var(--mono); line-height:1; margin-bottom:5px; }
	.stat-label { font-size:11px; color:var(--txt-lo); text-transform:uppercase; letter-spacing:.08em; }
	.pipeline-row { display:grid; grid-template-columns:repeat(4,1fr); gap:8px; margin-bottom:28px; }
	.pipe-step { background:var(--bg-card); border:1px solid var(--border-dim); border-radius:var(--r-md); padding:14px 12px; text-align:center; }
	.pipe-icon { font-size:18px; margin-bottom:6px; display:block; }
	.pipe-name { font-size:10px; font-family:var(--mono); color:var(--txt-lo); text-transform:uppercase; letter-spacing:.08em; line-height:1.3; }

	/* Section panels */
	#config-section, #upload-section, #output-section {
	background:var(--bg-card); border:1px solid var(--border-dim);
	border-radius:var(--r-xl); padding:28px 32px; margin-bottom:16px;
	position:relative; overflow:hidden;
	}
	#config-section::after, #upload-section::after {
	content:''; position:absolute; inset:0;
	background:radial-gradient(ellipse at 50% -20%,rgba(0,220,164,.04) 0%,transparent 60%);
	pointer-events:none;
	}
	.section-label {
	display:flex; align-items:center; gap:10px;
	font-size:12px; font-family:var(--mono); color:var(--txt-lo);
	text-transform:uppercase; letter-spacing:.1em; margin-bottom:18px;
	}
	.section-label::before { content:''; display:inline-block; width:20px; height:1px; background:var(--teal-dim); }
	.section-hint { font-size:13px; color:var(--txt-mid); margin:0 0 22px; line-height:1.65; }

	/* Speaker cards */
	.spk-header { display:flex; align-items:center; gap:10px; margin-bottom:14px; }
	.spk-avatar {
	width:34px; height:34px; border-radius:50%;
	display:flex; align-items:center; justify-content:center;
	font-size:11px; font-weight:600; font-family:var(--mono); flex-shrink:0;
	}
	.av1 { background:rgba(0,220,164,.14); color:#00dca4; border:1px solid rgba(0,220,164,.3); }
	.av2 { background:rgba(245,166,35,.12); color:#f5a623; border:1px solid rgba(245,166,35,.3); }
	.av3 { background:rgba(168,141,240,.12);color:#a88df0; border:1px solid rgba(168,141,240,.3); }
	.av4 { background:rgba(255,107,133,.10);color:#ff6b85; border:1px solid rgba(255,107,133,.3); }
	.spk-name { font-size:12px; color:var(--txt-mid); font-family:var(--mono); }

	.order-callout {
	background: rgba(0,220,164,0.06);
	border: 1px solid rgba(0,220,164,0.18);
	border-radius: var(--r-md);
	padding: 12px 16px;
	font-size: 12px;
	color: var(--txt-mid);
	line-height: 1.6;
	margin-bottom: 20px;
	}
	.order-callout strong { color: var(--teal); font-weight: 500; }

	/* Format chips */
	.format-hints { display:flex; gap:8px; flex-wrap:wrap; margin-top:14px; }
	.fmt-chip { font-size:10px; font-family:var(--mono); color:var(--txt-lo); border:1px solid var(--border-dim); border-radius:4px; padding:3px 8px; }

	/* Audio widget */
	[data-testid="audio"] { background:var(--bg-input) !important; border:1.5px dashed rgba(0,220,164,.2) !important; border-radius:var(--r-md) !important; }
	[data-testid="audio"]:hover { border-color:rgba(0,220,164,.45) !important; }

	/* CTA */
	#run-btn button {
	width:100% !important; height:52px !important;
	background:linear-gradient(135deg,#00c49a,#00dca4,#00b884) !important;
	border:none !important; border-radius:var(--r-md) !important;
	color:#03120d !important; font-family:var(--sans) !important;
	font-size:15px !important; font-weight:600 !important;
	transition:transform .15s,box-shadow .25s !important;
	}
	#run-btn button:hover { transform:translateY(-1px) !important; box-shadow:0 8px 32px rgba(0,220,164,.25) !important; }
	#run-btn button:active { transform:translateY(0) !important; }

	/* SOAP output */
	#output-section::before {
	content:''; position:absolute; top:0; right:0; width:300px; height:300px;
	background:radial-gradient(ellipse at 100% 0%,rgba(0,220,164,.04) 0%,transparent 65%);
	pointer-events:none;
	}
	#soap-out, .gr-markdown { background:transparent !important; border:none !important; color:var(--txt-hi) !important; font-family:var(--sans) !important; }
	#soap-out h2 {
	font-size:13px !important; font-family:var(--mono) !important; font-weight:500 !important;
	letter-spacing:.12em !important; text-transform:uppercase !important; color:var(--teal) !important;
	margin:28px 0 10px !important; padding-bottom:8px !important;
	border-bottom:1px solid var(--border-dim) !important;
	}
	#soap-out h2::before { content:'// '; opacity:.4; }
	#soap-out p { font-size:15px !important; line-height:1.75 !important; color:var(--txt-mid) !important; margin:0 0 8px !important; }
	#soap-out ul,#soap-out ol { color:var(--txt-mid) !important; font-size:15px !important; line-height:1.75 !important; padding-left:20px !important; }
	#soap-out em { color:var(--txt-lo) !important; font-style:normal !important; font-size:14px !important; display:block; text-align:center; margin-top:60px; }
	#soap-out strong { color:var(--txt-hi) !important; font-weight:500 !important; }
	#soap-out hr { border:none !important; border-top:1px solid var(--border-dim) !important; margin:20px 0 !important; }

	/* Footer */
	#footer-bar { margin-top:48px; padding-top:20px; border-top:1px solid var(--border-dim); display:flex; align-items:center; justify-content:space-between; flex-wrap:wrap; gap:12px; }
	.footer-txt { font-size:12px; color:var(--txt-lo); font-family:var(--mono); }
	.footer-authors { font-size:12px; color:var(--txt-lo); }
	.footer-authors span { color:var(--teal-dim); }

	/* Gradio resets */
	.gr-block,.block,[data-testid] { background:transparent !important; border:none !important; box-shadow:none !important; padding:0 !important; }
	.gr-box,.wrap { background:transparent !important; border:none !important; }
	label.float { display:none !important; }
	footer,.footer { display:none !important; }
	select { background:var(--bg-input) !important; border:1px solid var(--border-dim) !important; color:var(--txt-hi) !important; border-radius:var(--r-sm) !important; font-family:var(--sans) !important; font-size:13px !important; }
	select:focus { border-color:var(--border-bright) !important; outline:none !important; }
	.gr-form label, label { color:var(--txt-mid) !important; font-size:12px !important; font-family:var(--mono) !important; }
	input[type=range]::-webkit-slider-thumb { background:var(--teal) !important; }
	"""


	# ══════════════════════════════════════════════════════════════════════════
	# GRADIO UI
	# ══════════════════════════════════════════════════════════════════════════

	with gr.Blocks(
	title="NaijaMedModel v1",
	css=custom_css,
	theme=gr.themes.Base(
	primary_hue=gr.themes.colors.Color(
	c50="#e0faf3", c100="#b3f2e0", c200="#66e6c2",
	c300="#00dca4", c400="#00c49a", c500="#00b884",
	c600="#009a6d", c700="#007d57", c800="#005f41",
	c900="#003e2a", c950="#001f15", name="teal",
	),
	neutral_hue=gr.themes.colors.slate,
	font=[gr.themes.GoogleFont("DM Sans"), "sans-serif"],
	font_mono=[gr.themes.GoogleFont("DM Mono"), "monospace"],
	).set(
	body_background_fill="#080c10",
	block_background_fill="#111820",
	block_border_color="rgba(0,220,164,0.08)",
	block_label_text_color="#4a5d6e",
	input_background_fill="#0a0f14",
	button_primary_background_fill="#00dca4",
	button_primary_text_color="#03120d",
	slider_color="#00dca4",
	color_accent="#00dca4",
	),
	) as demo:

	gr.HTML("""
	<div id="hero-wrap">
	<div class="badge-row">
	<span class="badge badge-teal"><span class="pulse-dot"></span> Live</span>
	<span class="badge badge-amber">ZeroGPU · H200</span>
	<span class="badge badge-v">v1</span>
	</div>
	<h1 class="hero-title">Naija<span>Med</span>Model</h1>
	<p class="hero-sub">
	Bilingual clinical speech recognition for Nigeria.<br>
	English & Yoruba consultations → structured SOAP notes, automatically.
	</p>
	</div>
	<div class="stat-row">
	<div class="stat-card"><div class="stat-num">2</div><div class="stat-label">Languages</div></div>
	<div class="stat-card"><div class="stat-num">NO</div><div class="stat-label">External API calls</div></div>
	<div class="stat-card"><div class="stat-num">SOAP</div><div class="stat-label">Output Format</div></div>
	</div>
	<div class="pipeline-row">
	<div class="pipe-step"><span class="pipe-icon">🎙</span><div class="pipe-name">Speaker<br>Diarization</div></div>
	<div class="pipe-step"><span class="pipe-icon">🔍</span><div class="pipe-name">Language<br>Detection</div></div>
	<div class="pipe-step"><span class="pipe-icon">📝</span><div class="pipe-name">ASR &<br>Translation</div></div>
	<div class="pipe-step"><span class="pipe-icon">🏥</span><div class="pipe-name">SOAP<br>Generation</div></div>
	</div>
	""")

	# ── Section 01: Speaker config ─────────────────────────────────────────
	with gr.Group(elem_id="config-section"):
	gr.HTML('<div class="section-label">01 — Speaker Configuration</div>')
	gr.HTML("""
	<p class="section-hint">
	Who is in the room and <strong style="color:var(--teal)">when each person speaks first</strong>.
	The system maps voices by speaking order — pyannote always labels the first voice
	heard as Speaker 1, the second as Speaker 2, and so on.
	</p>
	<div class="order-callout">
	<strong>How to use:</strong> Play the recording — did the doctor speak first,
	or did the patient? Set "1st speaker" to whoever opened the conversation.
	If you get it wrong, just change the dropdowns and press Generate again —
	no need to re-upload the file.
	</div>
	""")

	num_speakers = gr.Slider(
	minimum=1, maximum=4, value=2, step=1,
	label="Number of speakers in the recording",
	)

	with gr.Row():
	with gr.Column():
	gr.HTML('<div class="spk-header"><div class="spk-avatar av1">S1</div><span class="spk-name">Speaker 1</span></div>')
	spk1_role = gr.Dropdown(choices=ROLE_OPTIONS, value="Doctor", label="Clinical role")
	spk1_order = gr.Dropdown(choices=ORDER_OPTIONS, value="1st speaker", label="Speaks first in recording")
	with gr.Column():
	gr.HTML('<div class="spk-header"><div class="spk-avatar av2">S2</div><span class="spk-name">Speaker 2</span></div>')
	spk2_role = gr.Dropdown(choices=ROLE_OPTIONS, value="Patient", label="Clinical role")
	spk2_order = gr.Dropdown(choices=ORDER_OPTIONS, value="2nd speaker", label="Speaks first in recording")

	with gr.Row(visible=False) as spk34_row:
	with gr.Column():
	gr.HTML('<div class="spk-header"><div class="spk-avatar av3">S3</div><span class="spk-name">Speaker 3</span></div>')
	spk3_role = gr.Dropdown(choices=ROLE_OPTIONS, value="Parent / Guardian", label="Clinical role")
	spk3_order = gr.Dropdown(choices=ORDER_OPTIONS, value="3rd speaker", label="Speaks first in recording")
	with gr.Column():
	gr.HTML('<div class="spk-header"><div class="spk-avatar av4">S4</div><span class="spk-name">Speaker 4</span></div>')
	spk4_role = gr.Dropdown(choices=ROLE_OPTIONS, value="Nurse", label="Clinical role")
	spk4_order = gr.Dropdown(choices=ORDER_OPTIONS, value="4th speaker", label="Speaks first in recording")

	num_speakers.change(
	fn=lambda n: gr.update(visible=(n >= 3)),
	inputs=num_speakers,
	outputs=spk34_row,
	)

	# ── Section 02: Upload ─────────────────────────────────────────────────
	with gr.Group(elem_id="upload-section"):
	gr.HTML('<div class="section-label">02 — Upload Consultation</div>')
	audio_in = gr.Audio(type="filepath", label="", show_label=False)
	gr.HTML("""
	<div class="format-hints">
	<span class="fmt-chip">MP3</span><span class="fmt-chip">WAV</span>
	<span class="fmt-chip">AAC</span><span class="fmt-chip">M4A</span>
	<span class="fmt-chip">OGG</span><span class="fmt-chip">FLAC</span>
	</div>
	""")

	btn = gr.Button("Generate SOAP Note", variant="primary", size="lg", elem_id="run-btn")

	# ── Section 03: Output ─────────────────────────────────────────────────
	with gr.Group(elem_id="output-section"):
	gr.HTML('<div class="section-label">03 — Clinical SOAP Note</div>')
	soap_out = gr.Markdown(
	value="Configure speakers, upload audio, then press Generate.",
	elem_id="soap-out",
	)

	gr.HTML("""
	<div id="footer-bar">
	<span class="footer-txt">First request ~2 min · Models load on demand · v1</span>
	<span class="footer-authors">Built by <span>Analytics Intelligence</span> · Uche & Jimmy</span>
	</div>
	""")

	btn.click(
	fn=process_audio,
	inputs=[
	audio_in, num_speakers,
	spk1_role, spk1_order,
	spk2_role, spk2_order,
	spk3_role, spk3_order,
	spk4_role, spk4_order,
	],
	outputs=soap_out,
	)

	if __name__ == "__main__":
	demo.launch()