Spaces:
Running on Zero
Running on Zero
| """ | |
| NaijaMedModel v1.0 β ZeroGPU (H200) Gradio Space | |
| Changes vs v1: | |
| - Fixed: system prompt was a tuple not a string (caused Jinja2 crash). | |
| - Fixed: gender leaking into SOAP output. | |
| Layer 1 β system prompt explicitly forbids gender inference. | |
| Layer 2 β user turn repeats the prohibition. | |
| Layer 3 β _strip_gender() post-processes the raw model output | |
| with regex before rendering, catching anything the model | |
| still writes despite the instructions. | |
| """ | |
| import torch, os, tempfile, re | |
| _orig_load = torch.load | |
| def _patched_load(f, *args, **kwargs): | |
| kwargs["weights_only"] = False | |
| return _orig_load(f, *args, **kwargs) | |
| torch.load = _patched_load | |
| try: | |
| from huggingface_hub.utils import _validators as _hf_v | |
| _orig_smooth = _hf_v.smoothly_deprecate_legacy_arguments | |
| def _patched_smooth(fn_name, kwargs): | |
| if "use_auth_token" in kwargs: | |
| v = kwargs.pop("use_auth_token") | |
| if "token" not in kwargs and v is not None: | |
| kwargs["token"] = v | |
| return _orig_smooth(fn_name, kwargs) | |
| _hf_v.smoothly_deprecate_legacy_arguments = _patched_smooth | |
| except Exception: | |
| pass | |
| import numpy as np | |
| import soundfile as sf | |
| from scipy.signal import resample | |
| import gradio as gr | |
| import spaces | |
| ASR_EN_MODEL = "Ephraimmm/asrfinetuned" | |
| ASR_YO_MODEL = "NCAIR1/Yoruba-ASR" | |
| WHISPER_MODEL = "openai/whisper-large-v3" | |
| TRANSLATE_MODEL = "Helsinki-NLP/opus-mt-yo-en" | |
| DIAR_MODEL = "pyannote/speaker-diarization-3.1" | |
| SOAP_MODEL = "Edifon/SOAP_SFT_V1" | |
| HF_TOKEN = os.environ.get("HF_TOKEN", "") | |
| ROLE_OPTIONS = ["Doctor", "Patient", "Parent / Guardian", "Nurse", "Interpreter", "Other"] | |
| ORDER_OPTIONS = ["1st speaker", "2nd speaker", "3rd speaker", "4th speaker"] | |
| _models: dict = {} | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # MODEL LOADING | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _get_models() -> dict: | |
| if _models: | |
| return _models | |
| from transformers import ( | |
| pipeline as hf_pipeline, | |
| AutoProcessor, | |
| AutoModelForSpeechSeq2Seq, | |
| MarianMTModel, | |
| MarianTokenizer, | |
| AutoTokenizer, | |
| AutoModelForCausalLM, | |
| ) | |
| from pyannote.audio import Pipeline as DiarizationPipeline | |
| import whisper as _whisper | |
| print("β³ Loading English ASRβ¦") | |
| _models["asr_en"] = hf_pipeline( | |
| "automatic-speech-recognition", model=ASR_EN_MODEL, | |
| device="cuda", token=HF_TOKEN, | |
| ) | |
| print("β³ Loading Yoruba ASRβ¦") | |
| _models["yo_processor"] = AutoProcessor.from_pretrained(ASR_YO_MODEL) | |
| _models["yo_model"] = AutoModelForSpeechSeq2Seq.from_pretrained( | |
| ASR_YO_MODEL, torch_dtype=torch.float16 | |
| ).to("cuda") | |
| print("β³ Loading Whisper large-v3β¦") | |
| _models["whisper_pipe"] = hf_pipeline( | |
| "automatic-speech-recognition", model=WHISPER_MODEL, | |
| torch_dtype=torch.float16, device="cuda", | |
| generate_kwargs={"task": "translate", "language": None}, | |
| ) | |
| print("β³ Loading YorubaβEnglish MTβ¦") | |
| try: | |
| _models["mt_tokenizer"] = MarianTokenizer.from_pretrained(TRANSLATE_MODEL) | |
| _models["mt_model"] = MarianMTModel.from_pretrained( | |
| TRANSLATE_MODEL, torch_dtype=torch.float16 | |
| ).to("cuda") | |
| except Exception as e: | |
| print(f"β οΈ MT skipped: {e}") | |
| _models["mt_tokenizer"] = None | |
| _models["mt_model"] = None | |
| print("β³ Loading diarizationβ¦") | |
| diar = DiarizationPipeline.from_pretrained(DIAR_MODEL, use_auth_token=HF_TOKEN) | |
| _models["diar"] = diar.to(torch.device("cuda")) | |
| print("β³ Loading Whisper tinyβ¦") | |
| _models["lang_model"] = _whisper.load_model("tiny", device="cuda") | |
| print("β³ Loading SOAP modelβ¦") | |
| _models["soap_processor"] = AutoTokenizer.from_pretrained(SOAP_MODEL, token=HF_TOKEN) | |
| _models["soap_model"] = AutoModelForCausalLM.from_pretrained( | |
| SOAP_MODEL, torch_dtype=torch.bfloat16, device_map="cuda", token=HF_TOKEN, | |
| ) | |
| _models["whisper_lib"] = _whisper | |
| print("β All models ready.") | |
| return _models | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # SPEAKER MAPPING β order-based | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _get_chunk(arr, start, end, sr=16000): | |
| return arr[int(start * sr): int(end * sr)] | |
| def _build_speaker_map(segs: list[dict], speaker_config: list[dict]) -> dict[str, str]: | |
| """ | |
| pyannote 3.x labels clusters in strict order of first appearance: | |
| SPEAKER_00 = first voice heard, SPEAKER_01 = second, etc. | |
| We sort the user config by their chosen speaking order and zip. | |
| """ | |
| all_clusters = sorted({s["speaker"] for s in segs}) | |
| sorted_cfg = sorted(speaker_config, key=lambda c: c["order"]) | |
| sp_map: dict[str, str] = {} | |
| for cluster, cfg in zip(all_clusters, sorted_cfg): | |
| sp_map[cluster] = cfg["role"] | |
| for i, cluster in enumerate(all_clusters): | |
| if cluster not in sp_map: | |
| sp_map[cluster] = f"Speaker {i + 1}" | |
| return sp_map | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # GENDER STRIPPING β Layer 3 (deterministic safety net) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _strip_gender(text: str) -> str: | |
| """ | |
| Remove gendered language the SOAP model writes despite prompt instructions. | |
| This is a hard post-processing pass β it does not rely on model compliance. | |
| Applied to the raw model output before any markdown conversion. | |
| """ | |
| # Remove demographic descriptors: "a male", "a female", "a young man", etc. | |
| text = re.sub( | |
| r',?\s*\ban?\s+(male|female|man|woman|boy|girl|gentleman|lady)\b', | |
| '', text, flags=re.IGNORECASE | |
| ) | |
| # Remove age+gender combos: "35-year-old male", "elderly woman" | |
| text = re.sub( | |
| r'\b\d+[-\s]year[-\s]old\s+(male|female|man|woman|boy|girl)\b', | |
| '', text, flags=re.IGNORECASE | |
| ) | |
| text = re.sub( | |
| r'\b(elderly|young|middle[-\s]aged)\s+(male|female|man|woman|boy|girl)\b', | |
| '', text, flags=re.IGNORECASE | |
| ) | |
| # Replace gendered pronouns | |
| text = re.sub(r'\bhe\b', 'the patient', text, flags=re.IGNORECASE) | |
| text = re.sub(r'\bshe\b', 'the patient', text, flags=re.IGNORECASE) | |
| text = re.sub(r'\bhis\b', "the patient's", text, flags=re.IGNORECASE) | |
| text = re.sub(r'\bher\b', "the patient's", text, flags=re.IGNORECASE) | |
| text = re.sub(r'\bhim\b', 'the patient', text, flags=re.IGNORECASE) | |
| # Clean up any double spaces left behind | |
| text = re.sub(r' +', ' ', text) | |
| text = re.sub(r' ,', ',', text) | |
| return text.strip() | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # HELPERS | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _clean_fillers(text: str) -> str: | |
| return re.sub(r'\b(emm?|erm+|uhh?|umm?|ah+)\b', '', text, flags=re.IGNORECASE).strip() | |
| def to_markdown(text: str) -> str: | |
| sections = { | |
| "S:": "## Subjective", | |
| "O:": "## Objective", | |
| "A:": "## Assessment", | |
| "P:": "## Plan", | |
| } | |
| for tag, heading in sections.items(): | |
| text = re.sub(rf"(?i){re.escape(tag)}", f"\n\n{heading}\n", text) | |
| return text.strip() | |
| def _run_whisper_translate(m: dict, chunk: np.ndarray) -> str: | |
| mdl = m["whisper_pipe"].model | |
| proc = m["whisper_pipe"].feature_extractor | |
| tok = m["whisper_pipe"].tokenizer | |
| inp = proc(chunk, sampling_rate=16000, return_tensors="pt").input_features | |
| inp = inp.to("cuda", dtype=torch.float16) | |
| with torch.no_grad(): | |
| ids = mdl.generate(inp, task="translate", language=None, return_timestamps=False) | |
| return tok.batch_decode(ids, skip_special_tokens=True)[0].strip() | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # MAIN GPU PIPELINE | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def run_pipeline(wav_path: str, speaker_config: list[dict]) -> str: | |
| m = _get_models() | |
| _whisper = m["whisper_lib"] | |
| num_speakers = len(speaker_config) if speaker_config else None | |
| diar_kwargs = {"num_speakers": num_speakers} if num_speakers else {} | |
| diar_result = m["diar"](wav_path, **diar_kwargs) | |
| segs = [ | |
| {"start": t.start, "end": t.end, "speaker": spk} | |
| for t, _, spk in diar_result.itertracks(yield_label=True) | |
| ] | |
| if not segs: | |
| return "β οΈ No speakers detected in the recording." | |
| arr, _ = sf.read(wav_path) | |
| arr = arr.astype(np.float32) | |
| sp_map = _build_speaker_map(segs, speaker_config) | |
| # ββ ASR per segment ββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| transcript: list[dict] = [] | |
| for seg in segs: | |
| chunk = _get_chunk(arr, seg["start"], seg["end"]) | |
| if len(chunk) < 16000 * 0.5: | |
| continue | |
| padded = _whisper.pad_or_trim(chunk) | |
| mel = _whisper.log_mel_spectrogram(padded).to("cuda") | |
| _, probs = m["lang_model"].detect_language(mel) | |
| lang = max(probs, key=probs.get) | |
| if lang == "yo": | |
| inputs = m["yo_processor"](chunk, sampling_rate=16000, return_tensors="pt") | |
| inputs = { | |
| k: v.to("cuda", dtype=torch.float16) if v.dtype == torch.float32 else v.to("cuda") | |
| for k, v in inputs.items() | |
| } | |
| with torch.no_grad(): | |
| ids = m["yo_model"].generate(**inputs) | |
| yo_text = m["yo_processor"].batch_decode(ids, skip_special_tokens=True)[0].strip() | |
| if m["mt_model"]: | |
| tokens = m["mt_tokenizer"]([yo_text], return_tensors="pt", padding=True) | |
| tokens = {k: v.to("cuda") for k, v in tokens.items()} | |
| with torch.no_grad(): | |
| out = m["mt_model"].generate(**tokens) | |
| en_text = m["mt_tokenizer"].batch_decode(out, skip_special_tokens=True)[0].strip() | |
| else: | |
| en_text = _run_whisper_translate(m, chunk) | |
| transcript.append({**seg, "language": "yo", "text": en_text, "translated": True}) | |
| elif lang != "en": | |
| en_text = _run_whisper_translate(m, chunk) | |
| transcript.append({**seg, "language": lang, "text": en_text, "translated": True}) | |
| else: | |
| fe = m["asr_en"].feature_extractor | |
| inputs = fe(chunk, sampling_rate=16000, return_tensors="pt", | |
| truncation=True, return_attention_mask=True) | |
| inputs = {k: v.to("cuda") for k, v in inputs.items()} | |
| with torch.no_grad(): | |
| ids = m["asr_en"].model.generate( | |
| inputs["input_features"], | |
| attention_mask=inputs["attention_mask"], | |
| generation_config=m["asr_en"].model.generation_config, | |
| language="english", | |
| ) | |
| en_text = m["asr_en"].tokenizer.batch_decode(ids, skip_special_tokens=True)[0].strip() | |
| transcript.append({**seg, "language": "en", "text": en_text, "translated": False}) | |
| if not transcript: | |
| return "β οΈ No speech could be transcribed from the recording." | |
| # ββ Build flat transcript for SOAP prompt ββββββββββββββββββββββββββββββ | |
| flat = "\n".join( | |
| f"{sp_map.get(s['speaker'], s['speaker'])}: {_clean_fillers(s['text'])}" | |
| for s in transcript | |
| ) | |
| roles_present = sorted(set(sp_map.values())) | |
| roles_desc = ", ".join(roles_present) | |
| _role_instructions: dict[str, str] = { | |
| "Doctor": | |
| "The Doctor's questions, examinations and clinical reasoning drive the Assessment and Plan.", | |
| "Patient": | |
| "The Patient's reported symptoms, history and concerns form the core of the Subjective section.", | |
| "Parent / Guardian": | |
| "The Parent/Guardian provides collateral history on behalf of the patient; " | |
| "include this under Subjective, clearly noted as collateral.", | |
| "Nurse": | |
| "The Nurse may contribute observed vital signs or measurements; include under Objective.", | |
| "Interpreter": | |
| "The Interpreter facilitates communication only; do not attribute clinical statements to them.", | |
| "Other": | |
| "Include contributions from other speakers only if clinically relevant.", | |
| } | |
| role_guidance = "\n".join( | |
| _role_instructions[r] for r in roles_present if r in _role_instructions | |
| ) | |
| # ββ Layer 1: system prompt β explicit gender prohibition βββββββββββββββ | |
| system_content = ( | |
| "You are an expert medical professor generating medically accurate SOAP notes.\n" | |
| f"Speakers present: {roles_desc}.\n" | |
| f"{role_guidance}\n" | |
| "Output format: use exactly these section labels on their own line: " | |
| "S:, O:, A:, P: β no markdown, no bullet points inside the headers.\n" | |
| "CRITICAL RULE β GENDER: You must never state, infer, or imply the gender of any speaker. " | |
| "Do not use the words male, female, man, woman, boy, girl, gentleman, or lady to describe any speaker. " | |
| "Do not use gendered pronouns: he, she, his, her, him, hers. " | |
| "Always refer to the patient as 'the patient' and the clinician as 'the clinician'. " | |
| "If gender is mentioned anywhere in the transcript, ignore it completely." | |
| ) | |
| # ββ Layer 2: user turn β repeat prohibition so it is in recent context β | |
| user_content = ( | |
| "Create a SOAP note for this consultation.\n" | |
| "Reminder: use no gendered pronouns and no gender descriptors for any speaker. " | |
| "Write 'the patient' and 'the clinician' throughout.\n\n" | |
| f"### Transcript:\n{flat}\n" | |
| ) | |
| tok = m["soap_processor"] | |
| msgs = [ | |
| {"role": "system", "content": system_content}, | |
| {"role": "user", "content": user_content}, | |
| ] | |
| encoded = tok.apply_chat_template(msgs, add_generation_prompt=True, return_tensors="pt") | |
| if hasattr(encoded, "input_ids"): | |
| input_ids = encoded.input_ids.to("cuda") | |
| elif isinstance(encoded, dict): | |
| input_ids = encoded["input_ids"].to("cuda") | |
| else: | |
| input_ids = torch.tensor(encoded).unsqueeze(0).to("cuda") | |
| out_ids = m["soap_model"].generate(input_ids, max_new_tokens=2048, do_sample=False) | |
| soap = tok.decode(out_ids[0][input_ids.shape[1]:], skip_special_tokens=True) | |
| # ββ Layer 3: post-process β deterministic gender stripping ββββββββββββ | |
| soap = _strip_gender(soap) | |
| return to_markdown(soap) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # GRADIO TOP-LEVEL HANDLER | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _order_str_to_int(order_str: str) -> int: | |
| return {"1st speaker": 0, "2nd speaker": 1, "3rd speaker": 2, "4th speaker": 3}.get(order_str, 0) | |
| def process_audio( | |
| audio_filepath, | |
| num_speakers: int, | |
| spk1_role: str, spk1_order: str, | |
| spk2_role: str, spk2_order: str, | |
| spk3_role: str, spk3_order: str, | |
| spk4_role: str, spk4_order: str, | |
| ) -> str: | |
| if audio_filepath is None: | |
| return "β οΈ Please upload an audio file before generating." | |
| raw = [ | |
| {"role": spk1_role, "order": _order_str_to_int(spk1_order)}, | |
| {"role": spk2_role, "order": _order_str_to_int(spk2_order)}, | |
| {"role": spk3_role, "order": _order_str_to_int(spk3_order)}, | |
| {"role": spk4_role, "order": _order_str_to_int(spk4_order)}, | |
| ] | |
| speaker_config = raw[:int(num_speakers)] | |
| tmp_wav = tempfile.mktemp(suffix=".wav") | |
| try: | |
| arr, sr = sf.read(audio_filepath, always_2d=False) | |
| if sr != 16000: | |
| arr = resample(arr, int(len(arr) * 16000 / sr)) | |
| if arr.ndim > 1: | |
| arr = arr.mean(axis=1) | |
| arr = arr.astype(np.float32) | |
| sf.write(tmp_wav, arr, 16000) | |
| except Exception: | |
| import subprocess | |
| subprocess.run( | |
| ["ffmpeg", "-i", audio_filepath, "-ar", "16000", "-ac", "1", | |
| tmp_wav, "-y", "-loglevel", "quiet"], | |
| check=True, | |
| ) | |
| try: | |
| return run_pipeline(tmp_wav, speaker_config) | |
| finally: | |
| try: | |
| os.remove(tmp_wav) | |
| except Exception: | |
| pass | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # CSS | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| custom_css = """ | |
| @import url('https://fonts.googleapis.com/css2?family=DM+Sans:ital,wght@0,300;0,400;0,500;0,600&family=DM+Mono:wght@300;400;500&display=swap'); | |
| :root { | |
| --bg-void: #080c10; | |
| --bg-card: #111820; | |
| --bg-input: #0a0f14; | |
| --border-dim: rgba(0,220,180,0.08); | |
| --border-bright: rgba(0,220,180,0.28); | |
| --teal: #00dca4; | |
| --teal-dim: #00b884; | |
| --amber: #f5a623; | |
| --txt-hi: #e8eef4; | |
| --txt-mid: #8899aa; | |
| --txt-lo: #4a5d6e; | |
| --mono: 'DM Mono', monospace; | |
| --sans: 'DM Sans', sans-serif; | |
| --r-sm: 6px; --r-md: 10px; --r-lg: 16px; --r-xl: 22px; | |
| } | |
| * { box-sizing: border-box; } | |
| body, .gradio-container, gradio-app, #root { | |
| background: var(--bg-void) !important; | |
| font-family: var(--sans) !important; | |
| color: var(--txt-hi) !important; | |
| } | |
| .gradio-container { max-width: 980px !important; margin: 0 auto !important; padding: 0 24px 80px !important; } | |
| ::-webkit-scrollbar { width: 4px; } | |
| ::-webkit-scrollbar-thumb { background: var(--border-bright); border-radius: 99px; } | |
| /* Hero */ | |
| #hero-wrap { text-align:center; padding:64px 0 48px; position:relative; } | |
| #hero-wrap::before { | |
| content:''; position:absolute; top:0; left:50%; transform:translateX(-50%); | |
| width:560px; height:260px; | |
| background:radial-gradient(ellipse at 50% 0%,rgba(0,220,164,.09) 0%,transparent 70%); | |
| pointer-events:none; | |
| } | |
| .badge-row { display:flex; align-items:center; justify-content:center; gap:8px; margin-bottom:22px; } | |
| .badge { display:inline-flex; align-items:center; gap:5px; font-size:11px; font-family:var(--mono); letter-spacing:.06em; text-transform:uppercase; padding:4px 12px; border-radius:99px; border:1px solid; } | |
| .badge-teal { color:var(--teal); border-color:rgba(0,220,164,.3); background:rgba(0,220,164,.06); } | |
| .badge-amber { color:var(--amber); border-color:rgba(245,166,35,.3); background:rgba(245,166,35,.06); } | |
| .badge-v { color:var(--txt-lo); border-color:var(--border-dim); } | |
| .pulse-dot { width:6px; height:6px; border-radius:50%; background:var(--teal); animation:pulse 2s ease-in-out infinite; display:inline-block; } | |
| @keyframes pulse { 0%,100%{box-shadow:0 0 0 0 rgba(0,220,164,.6)} 50%{box-shadow:0 0 0 5px transparent} } | |
| .hero-title { font-size:clamp(32px,5vw,52px); font-weight:300; letter-spacing:-.02em; line-height:1.1; margin:0 0 6px; } | |
| .hero-title span { font-weight:600; color:var(--teal); } | |
| .hero-sub { font-size:15px; color:var(--txt-mid); font-weight:300; max-width:520px; margin:12px auto 0; line-height:1.65; } | |
| /* Stat + pipeline rows */ | |
| .stat-row { display:grid; grid-template-columns:repeat(3,1fr); gap:12px; margin-bottom:28px; } | |
| .stat-card { background:var(--bg-card); border:1px solid var(--border-dim); border-radius:var(--r-md); padding:18px 20px; text-align:center; transition:border-color .25s; } | |
| .stat-card:hover { border-color:var(--border-bright); } | |
| .stat-num { font-size:26px; font-weight:500; color:var(--teal); font-family:var(--mono); line-height:1; margin-bottom:5px; } | |
| .stat-label { font-size:11px; color:var(--txt-lo); text-transform:uppercase; letter-spacing:.08em; } | |
| .pipeline-row { display:grid; grid-template-columns:repeat(4,1fr); gap:8px; margin-bottom:28px; } | |
| .pipe-step { background:var(--bg-card); border:1px solid var(--border-dim); border-radius:var(--r-md); padding:14px 12px; text-align:center; } | |
| .pipe-icon { font-size:18px; margin-bottom:6px; display:block; } | |
| .pipe-name { font-size:10px; font-family:var(--mono); color:var(--txt-lo); text-transform:uppercase; letter-spacing:.08em; line-height:1.3; } | |
| /* Section panels */ | |
| #config-section, #upload-section, #output-section { | |
| background:var(--bg-card); border:1px solid var(--border-dim); | |
| border-radius:var(--r-xl); padding:28px 32px; margin-bottom:16px; | |
| position:relative; overflow:hidden; | |
| } | |
| #config-section::after, #upload-section::after { | |
| content:''; position:absolute; inset:0; | |
| background:radial-gradient(ellipse at 50% -20%,rgba(0,220,164,.04) 0%,transparent 60%); | |
| pointer-events:none; | |
| } | |
| .section-label { | |
| display:flex; align-items:center; gap:10px; | |
| font-size:12px; font-family:var(--mono); color:var(--txt-lo); | |
| text-transform:uppercase; letter-spacing:.1em; margin-bottom:18px; | |
| } | |
| .section-label::before { content:''; display:inline-block; width:20px; height:1px; background:var(--teal-dim); } | |
| .section-hint { font-size:13px; color:var(--txt-mid); margin:0 0 22px; line-height:1.65; } | |
| /* Speaker cards */ | |
| .spk-header { display:flex; align-items:center; gap:10px; margin-bottom:14px; } | |
| .spk-avatar { | |
| width:34px; height:34px; border-radius:50%; | |
| display:flex; align-items:center; justify-content:center; | |
| font-size:11px; font-weight:600; font-family:var(--mono); flex-shrink:0; | |
| } | |
| .av1 { background:rgba(0,220,164,.14); color:#00dca4; border:1px solid rgba(0,220,164,.3); } | |
| .av2 { background:rgba(245,166,35,.12); color:#f5a623; border:1px solid rgba(245,166,35,.3); } | |
| .av3 { background:rgba(168,141,240,.12);color:#a88df0; border:1px solid rgba(168,141,240,.3); } | |
| .av4 { background:rgba(255,107,133,.10);color:#ff6b85; border:1px solid rgba(255,107,133,.3); } | |
| .spk-name { font-size:12px; color:var(--txt-mid); font-family:var(--mono); } | |
| .order-callout { | |
| background: rgba(0,220,164,0.06); | |
| border: 1px solid rgba(0,220,164,0.18); | |
| border-radius: var(--r-md); | |
| padding: 12px 16px; | |
| font-size: 12px; | |
| color: var(--txt-mid); | |
| line-height: 1.6; | |
| margin-bottom: 20px; | |
| } | |
| .order-callout strong { color: var(--teal); font-weight: 500; } | |
| /* Format chips */ | |
| .format-hints { display:flex; gap:8px; flex-wrap:wrap; margin-top:14px; } | |
| .fmt-chip { font-size:10px; font-family:var(--mono); color:var(--txt-lo); border:1px solid var(--border-dim); border-radius:4px; padding:3px 8px; } | |
| /* Audio widget */ | |
| [data-testid="audio"] { background:var(--bg-input) !important; border:1.5px dashed rgba(0,220,164,.2) !important; border-radius:var(--r-md) !important; } | |
| [data-testid="audio"]:hover { border-color:rgba(0,220,164,.45) !important; } | |
| /* CTA */ | |
| #run-btn button { | |
| width:100% !important; height:52px !important; | |
| background:linear-gradient(135deg,#00c49a,#00dca4,#00b884) !important; | |
| border:none !important; border-radius:var(--r-md) !important; | |
| color:#03120d !important; font-family:var(--sans) !important; | |
| font-size:15px !important; font-weight:600 !important; | |
| transition:transform .15s,box-shadow .25s !important; | |
| } | |
| #run-btn button:hover { transform:translateY(-1px) !important; box-shadow:0 8px 32px rgba(0,220,164,.25) !important; } | |
| #run-btn button:active { transform:translateY(0) !important; } | |
| /* SOAP output */ | |
| #output-section::before { | |
| content:''; position:absolute; top:0; right:0; width:300px; height:300px; | |
| background:radial-gradient(ellipse at 100% 0%,rgba(0,220,164,.04) 0%,transparent 65%); | |
| pointer-events:none; | |
| } | |
| #soap-out, .gr-markdown { background:transparent !important; border:none !important; color:var(--txt-hi) !important; font-family:var(--sans) !important; } | |
| #soap-out h2 { | |
| font-size:13px !important; font-family:var(--mono) !important; font-weight:500 !important; | |
| letter-spacing:.12em !important; text-transform:uppercase !important; color:var(--teal) !important; | |
| margin:28px 0 10px !important; padding-bottom:8px !important; | |
| border-bottom:1px solid var(--border-dim) !important; | |
| } | |
| #soap-out h2::before { content:'// '; opacity:.4; } | |
| #soap-out p { font-size:15px !important; line-height:1.75 !important; color:var(--txt-mid) !important; margin:0 0 8px !important; } | |
| #soap-out ul,#soap-out ol { color:var(--txt-mid) !important; font-size:15px !important; line-height:1.75 !important; padding-left:20px !important; } | |
| #soap-out em { color:var(--txt-lo) !important; font-style:normal !important; font-size:14px !important; display:block; text-align:center; margin-top:60px; } | |
| #soap-out strong { color:var(--txt-hi) !important; font-weight:500 !important; } | |
| #soap-out hr { border:none !important; border-top:1px solid var(--border-dim) !important; margin:20px 0 !important; } | |
| /* Footer */ | |
| #footer-bar { margin-top:48px; padding-top:20px; border-top:1px solid var(--border-dim); display:flex; align-items:center; justify-content:space-between; flex-wrap:wrap; gap:12px; } | |
| .footer-txt { font-size:12px; color:var(--txt-lo); font-family:var(--mono); } | |
| .footer-authors { font-size:12px; color:var(--txt-lo); } | |
| .footer-authors span { color:var(--teal-dim); } | |
| /* Gradio resets */ | |
| .gr-block,.block,[data-testid] { background:transparent !important; border:none !important; box-shadow:none !important; padding:0 !important; } | |
| .gr-box,.wrap { background:transparent !important; border:none !important; } | |
| label.float { display:none !important; } | |
| footer,.footer { display:none !important; } | |
| select { background:var(--bg-input) !important; border:1px solid var(--border-dim) !important; color:var(--txt-hi) !important; border-radius:var(--r-sm) !important; font-family:var(--sans) !important; font-size:13px !important; } | |
| select:focus { border-color:var(--border-bright) !important; outline:none !important; } | |
| .gr-form label, label { color:var(--txt-mid) !important; font-size:12px !important; font-family:var(--mono) !important; } | |
| input[type=range]::-webkit-slider-thumb { background:var(--teal) !important; } | |
| """ | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # GRADIO UI | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| with gr.Blocks( | |
| title="NaijaMedModel v1", | |
| css=custom_css, | |
| theme=gr.themes.Base( | |
| primary_hue=gr.themes.colors.Color( | |
| c50="#e0faf3", c100="#b3f2e0", c200="#66e6c2", | |
| c300="#00dca4", c400="#00c49a", c500="#00b884", | |
| c600="#009a6d", c700="#007d57", c800="#005f41", | |
| c900="#003e2a", c950="#001f15", name="teal", | |
| ), | |
| neutral_hue=gr.themes.colors.slate, | |
| font=[gr.themes.GoogleFont("DM Sans"), "sans-serif"], | |
| font_mono=[gr.themes.GoogleFont("DM Mono"), "monospace"], | |
| ).set( | |
| body_background_fill="#080c10", | |
| block_background_fill="#111820", | |
| block_border_color="rgba(0,220,164,0.08)", | |
| block_label_text_color="#4a5d6e", | |
| input_background_fill="#0a0f14", | |
| button_primary_background_fill="#00dca4", | |
| button_primary_text_color="#03120d", | |
| slider_color="#00dca4", | |
| color_accent="#00dca4", | |
| ), | |
| ) as demo: | |
| gr.HTML(""" | |
| <div id="hero-wrap"> | |
| <div class="badge-row"> | |
| <span class="badge badge-teal"><span class="pulse-dot"></span> Live</span> | |
| <span class="badge badge-amber">ZeroGPU Β· H200</span> | |
| <span class="badge badge-v">v1</span> | |
| </div> | |
| <h1 class="hero-title">Naija<span>Med</span>Model</h1> | |
| <p class="hero-sub"> | |
| Bilingual clinical speech recognition for Nigeria.<br> | |
| English & Yoruba consultations β structured SOAP notes, automatically. | |
| </p> | |
| </div> | |
| <div class="stat-row"> | |
| <div class="stat-card"><div class="stat-num">2</div><div class="stat-label">Languages</div></div> | |
| <div class="stat-card"><div class="stat-num">NO</div><div class="stat-label">External API calls</div></div> | |
| <div class="stat-card"><div class="stat-num">SOAP</div><div class="stat-label">Output Format</div></div> | |
| </div> | |
| <div class="pipeline-row"> | |
| <div class="pipe-step"><span class="pipe-icon">π</span><div class="pipe-name">Speaker<br>Diarization</div></div> | |
| <div class="pipe-step"><span class="pipe-icon">π</span><div class="pipe-name">Language<br>Detection</div></div> | |
| <div class="pipe-step"><span class="pipe-icon">π</span><div class="pipe-name">ASR &<br>Translation</div></div> | |
| <div class="pipe-step"><span class="pipe-icon">π₯</span><div class="pipe-name">SOAP<br>Generation</div></div> | |
| </div> | |
| """) | |
| # ββ Section 01: Speaker config βββββββββββββββββββββββββββββββββββββββββ | |
| with gr.Group(elem_id="config-section"): | |
| gr.HTML('<div class="section-label">01 β Speaker Configuration</div>') | |
| gr.HTML(""" | |
| <p class="section-hint"> | |
| Who is in the room and <strong style="color:var(--teal)">when each person speaks first</strong>. | |
| The system maps voices by speaking order β pyannote always labels the first voice | |
| heard as Speaker 1, the second as Speaker 2, and so on. | |
| </p> | |
| <div class="order-callout"> | |
| <strong>How to use:</strong> Play the recording β did the doctor speak first, | |
| or did the patient? Set "1st speaker" to whoever opened the conversation. | |
| If you get it wrong, just change the dropdowns and press Generate again β | |
| no need to re-upload the file. | |
| </div> | |
| """) | |
| num_speakers = gr.Slider( | |
| minimum=1, maximum=4, value=2, step=1, | |
| label="Number of speakers in the recording", | |
| ) | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.HTML('<div class="spk-header"><div class="spk-avatar av1">S1</div><span class="spk-name">Speaker 1</span></div>') | |
| spk1_role = gr.Dropdown(choices=ROLE_OPTIONS, value="Doctor", label="Clinical role") | |
| spk1_order = gr.Dropdown(choices=ORDER_OPTIONS, value="1st speaker", label="Speaks first in recording") | |
| with gr.Column(): | |
| gr.HTML('<div class="spk-header"><div class="spk-avatar av2">S2</div><span class="spk-name">Speaker 2</span></div>') | |
| spk2_role = gr.Dropdown(choices=ROLE_OPTIONS, value="Patient", label="Clinical role") | |
| spk2_order = gr.Dropdown(choices=ORDER_OPTIONS, value="2nd speaker", label="Speaks first in recording") | |
| with gr.Row(visible=False) as spk34_row: | |
| with gr.Column(): | |
| gr.HTML('<div class="spk-header"><div class="spk-avatar av3">S3</div><span class="spk-name">Speaker 3</span></div>') | |
| spk3_role = gr.Dropdown(choices=ROLE_OPTIONS, value="Parent / Guardian", label="Clinical role") | |
| spk3_order = gr.Dropdown(choices=ORDER_OPTIONS, value="3rd speaker", label="Speaks first in recording") | |
| with gr.Column(): | |
| gr.HTML('<div class="spk-header"><div class="spk-avatar av4">S4</div><span class="spk-name">Speaker 4</span></div>') | |
| spk4_role = gr.Dropdown(choices=ROLE_OPTIONS, value="Nurse", label="Clinical role") | |
| spk4_order = gr.Dropdown(choices=ORDER_OPTIONS, value="4th speaker", label="Speaks first in recording") | |
| num_speakers.change( | |
| fn=lambda n: gr.update(visible=(n >= 3)), | |
| inputs=num_speakers, | |
| outputs=spk34_row, | |
| ) | |
| # ββ Section 02: Upload βββββββββββββββββββββββββββββββββββββββββββββββββ | |
| with gr.Group(elem_id="upload-section"): | |
| gr.HTML('<div class="section-label">02 β Upload Consultation</div>') | |
| audio_in = gr.Audio(type="filepath", label="", show_label=False) | |
| gr.HTML(""" | |
| <div class="format-hints"> | |
| <span class="fmt-chip">MP3</span><span class="fmt-chip">WAV</span> | |
| <span class="fmt-chip">AAC</span><span class="fmt-chip">M4A</span> | |
| <span class="fmt-chip">OGG</span><span class="fmt-chip">FLAC</span> | |
| </div> | |
| """) | |
| btn = gr.Button("Generate SOAP Note", variant="primary", size="lg", elem_id="run-btn") | |
| # ββ Section 03: Output βββββββββββββββββββββββββββββββββββββββββββββββββ | |
| with gr.Group(elem_id="output-section"): | |
| gr.HTML('<div class="section-label">03 β Clinical SOAP Note</div>') | |
| soap_out = gr.Markdown( | |
| value="*Configure speakers, upload audio, then press Generate.*", | |
| elem_id="soap-out", | |
| ) | |
| gr.HTML(""" | |
| <div id="footer-bar"> | |
| <span class="footer-txt">First request ~2 min Β· Models load on demand Β· v1</span> | |
| <span class="footer-authors">Built by <span>Analytics Intelligence</span> Β· Uche & Jimmy</span> | |
| </div> | |
| """) | |
| btn.click( | |
| fn=process_audio, | |
| inputs=[ | |
| audio_in, num_speakers, | |
| spk1_role, spk1_order, | |
| spk2_role, spk2_order, | |
| spk3_role, spk3_order, | |
| spk4_role, spk4_order, | |
| ], | |
| outputs=soap_out, | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |