""" đĨ Nursing Language Translator Translates NHS clinical shorthand to formal language using NurseEmbed-300M """ import gradio as gr import json import re from sentence_transformers import SentenceTransformer from sklearn.metrics.pairwise import cosine_similarity import numpy as np # Load the NurseEmbed model print("Loading NurseEmbed-300M...") model = SentenceTransformer("NurseCitizenDeveloper/NurseEmbed-300M") print("â Model loaded!") # Load knowledge base with open("knowledge_base.json", "r") as f: KNOWLEDGE_BASE = json.load(f) # Pre-compute embeddings for all abbreviations print("Computing knowledge base embeddings...") KB_TEXTS = [item["abbrev"] for item in KNOWLEDGE_BASE] KB_EMBEDDINGS = model.encode(KB_TEXTS) print(f"â {len(KB_TEXTS)} abbreviations indexed!") # NEWS2 interpretation NEWS2_THRESHOLDS = { (0, 0): ("Low risk", "Routine monitoring", "đĸ"), (1, 4): ("Low-medium risk", "Increased monitoring frequency", "đĄ"), (5, 6): ("Medium risk", "Urgent response - inform senior nurse/doctor", "đ "), (7, 20): ("High risk", "Emergency response - immediate senior review, consider critical care", "đ´") } def interpret_news2(score): """Interpret NEWS2 score and return clinical action""" try: score = int(score) for (low, high), (risk, action, emoji) in NEWS2_THRESHOLDS.items(): if low <= score <= high: return f"{emoji} **NEWS2 {score}**: {risk}\n â {action}" return f"â ī¸ NEWS2 {score}: Invalid score (should be 0-20)" except: return None def find_abbreviation_match(text, threshold=0.3): """Find matching abbreviations using semantic similarity""" if not text.strip(): return [] # Encode the input text text_embedding = model.encode([text]) # Compute similarities similarities = cosine_similarity(text_embedding, KB_EMBEDDINGS)[0] # Get matches above threshold matches = [] for idx, sim in enumerate(similarities): if sim > threshold: matches.append({ "abbrev": KNOWLEDGE_BASE[idx]["abbrev"], "full": KNOWLEDGE_BASE[idx]["full"], "category": KNOWLEDGE_BASE[idx]["category"], "similarity": float(sim) }) # Sort by similarity matches.sort(key=lambda x: x["similarity"], reverse=True) return matches[:5] # Top 5 matches def extract_demographics(text): """Extract age and gender from text""" patterns = [ r'(\d+)\s*[yY]/[oO]', # 72 y/o r'(\d+)\s*[yY][oO]', # 72yo r'(\d+)\s*[yY]ear', # 72 year r'(\d+)\s*[mM]ale', # 72 male r'(\d+)\s*[fF]emale', # 72 female r'(\d+)\s*[MF]\b', # 72M or 72F ] age = None for pattern in patterns: match = re.search(pattern, text) if match: age = match.group(1) break gender = None if re.search(r'\b[mM]ale\b|\b[mM]\b|\bman\b|\bgentleman\b', text): gender = "Male" elif re.search(r'\b[fF]emale\b|\b[fF]\b|\bwoman\b|\blady\b', text): gender = "Female" result = "" if age: result += f"**Age**: {age} years old\n" if gender: result += f"**Gender**: {gender}\n" return result if result else None def translate_nursing_text(input_text): """Main translation function""" if not input_text.strip(): return "Please enter clinical text to translate." output = [] output.append("# đ Translation Report\n") output.append(f"**Original**: _{input_text}_\n") output.append("---\n") # Extract demographics demographics = extract_demographics(input_text) if demographics: output.append("## đ¤ Patient Demographics\n") output.append(demographics) output.append("") # Check for NEWS2 scores news_match = re.search(r'NEWS2?\s*(?:score\s*)?(?:is\s*|of\s*|=\s*)?(\d+)', input_text, re.IGNORECASE) if news_match: score = news_match.group(1) output.append("## â ī¸ Early Warning Score\n") output.append(interpret_news2(score)) output.append("") # Tokenize and find abbreviations words = re.findall(r'\b[\w/]+\b|[?#][\w]*', input_text) found_terms = [] seen = set() for word in words: if word.lower() in seen or len(word) < 2: continue seen.add(word.lower()) matches = find_abbreviation_match(word, threshold=0.4) if matches: best_match = matches[0] found_terms.append({ "original": word, "translation": best_match["full"], "category": best_match["category"], "confidence": best_match["similarity"] }) # Also check multi-word phrases phrases_to_check = [ "c/o", "y/o", "O/E", "U&E", "?PE", "NOF #" ] for phrase in phrases_to_check: if phrase.lower() in input_text.lower(): matches = find_abbreviation_match(phrase, threshold=0.4) if matches and phrase.lower() not in seen: seen.add(phrase.lower()) best_match = matches[0] found_terms.append({ "original": phrase, "translation": best_match["full"], "category": best_match["category"], "confidence": best_match["similarity"] }) # Sort by confidence found_terms.sort(key=lambda x: x["confidence"], reverse=True) if found_terms: output.append("## đ Clinical Terms Identified\n") output.append("| Term | Translation | Category | Confidence |") output.append("|------|-------------|----------|------------|") for term in found_terms: conf_bar = "đĸ" if term["confidence"] > 0.7 else ("đĄ" if term["confidence"] > 0.5 else "đ ") output.append(f"| `{term['original']}` | {term['translation']} | {term['category']} | {conf_bar} {term['confidence']:.0%} |") output.append("") # Generate formal translation output.append("## â Formal Translation\n") formal_text = input_text for term in found_terms: # Replace abbreviation with full form pattern = re.compile(re.escape(term["original"]), re.IGNORECASE) formal_text = pattern.sub(f"**{term['translation']}**", formal_text, count=1) output.append(f"> {formal_text}\n") return "\n".join(output) def get_abbreviation_list(): """Return formatted list of abbreviations by category""" categories = {} for item in KNOWLEDGE_BASE: cat = item["category"] if cat not in categories: categories[cat] = [] categories[cat].append(f"`{item['abbrev']}` â {item['full']}") output = ["# đ NHS Abbreviation Reference\n"] for cat in sorted(categories.keys()): output.append(f"## {cat}\n") output.append("\n".join(categories[cat])) output.append("") return "\n".join(output) # Build the Gradio interface with gr.Blocks( title="đĨ Nursing Language Translator", theme=gr.themes.Soft(primary_hue="blue", secondary_hue="cyan") ) as app: gr.Markdown(""" # đĨ Nursing Language Translator **Powered by NurseEmbed-300M** â A clinical embedding model trained on NHS nursing terminology. Translates clinical shorthand, abbreviations, and NEWS2 scores into formal language. """) with gr.Tabs(): with gr.Tab("đ Translate"): with gr.Row(): with gr.Column(scale=1): input_text = gr.Textbox( label="Clinical Shorthand Input", placeholder="e.g., 72M, c/o SOB, NEWS2=7, PMH: COPD, ?PE, started LMWH", lines=4 ) translate_btn = gr.Button("đ Translate", variant="primary") gr.Examples( examples=[ ["72M c/o SOB, NEWS2 score is 7, PMH: COPD, AF. Started on Salbutamol NEB and LMWH."], ["Pt admitted via A&E with ?PE. CXR NAD. ABG shows type 1 resp failure. For CT PA."], ["85F NOF # post-op day 2. Increasing confusion, Temp 38.2. ?UTI vs ?SSI. Sent MSU."], ["54M NSTEMI. ECG: ST depression V3-V6. Troponin elevated. For ECHO and cardiology review."], ["NEWS2 9 - patient deteriorating. RR 28, O2 sats 88% on 4L, HR 120, BP 90/60."] ], inputs=input_text, label="Example Clinical Notes" ) with gr.Column(scale=1): output_text = gr.Markdown(label="Translation") translate_btn.click( fn=translate_nursing_text, inputs=input_text, outputs=output_text ) with gr.Tab("đ Reference"): gr.Markdown(get_abbreviation_list()) with gr.Tab("âšī¸ About"): gr.Markdown(""" ## About This Tool The **Nursing Language Translator** uses **NurseEmbed-300M**, a clinical embedding model fine-tuned on NHS nursing terminology. ### How It Works 1. **Semantic Matching**: Uses vector embeddings to match abbreviations to their meanings 2. **NEWS2 Interpretation**: Automatically interprets Early Warning Scores 3. **Context-Aware**: Understands clinical context, not just string matching ### Model Details - **Base Model**: EmbeddingGemma-300M - **Training Data**: 10,000 medical Q&A pairs + 200 NHS nursing abbreviations - **Accuracy**: 81.3% Accuracy@1 on medical retrieval ### Author Created by **Lincoln Gombedza** ([@NurseCitizenDeveloper](https://huggingface.co/NurseCitizenDeveloper)) Part of the **Nursing Citizen Development** movement and **OpenEnv Challenge** submission. --- **Disclaimer**: This tool is for educational and assistive purposes only. Always verify clinical information and follow local trust policies. """) gr.Markdown(""" ---