NurseCitizenDeveloper's picture
Fix GitHub link to correct nursing-language-translator repo
56d9350
"""
πŸ₯ Nursing Language Translator
Translates NHS clinical shorthand to formal language using NurseEmbed-300M
"""
import gradio as gr
import json
import re
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
# Load the NurseEmbed model
print("Loading NurseEmbed-300M...")
model = SentenceTransformer("NurseCitizenDeveloper/NurseEmbed-300M")
print("βœ… Model loaded!")
# Load knowledge base
with open("knowledge_base.json", "r") as f:
KNOWLEDGE_BASE = json.load(f)
# Pre-compute embeddings for all abbreviations
print("Computing knowledge base embeddings...")
KB_TEXTS = [item["abbrev"] for item in KNOWLEDGE_BASE]
KB_EMBEDDINGS = model.encode(KB_TEXTS)
print(f"βœ… {len(KB_TEXTS)} abbreviations indexed!")
# NEWS2 interpretation
NEWS2_THRESHOLDS = {
(0, 0): ("Low risk", "Routine monitoring", "🟒"),
(1, 4): ("Low-medium risk", "Increased monitoring frequency", "🟑"),
(5, 6): ("Medium risk", "Urgent response - inform senior nurse/doctor", "🟠"),
(7, 20): ("High risk", "Emergency response - immediate senior review, consider critical care", "πŸ”΄")
}
def interpret_news2(score):
"""Interpret NEWS2 score and return clinical action"""
try:
score = int(score)
for (low, high), (risk, action, emoji) in NEWS2_THRESHOLDS.items():
if low <= score <= high:
return f"{emoji} **NEWS2 {score}**: {risk}\n β†’ {action}"
return f"⚠️ NEWS2 {score}: Invalid score (should be 0-20)"
except:
return None
def find_abbreviation_match(text, threshold=0.3):
"""Find matching abbreviations using semantic similarity"""
if not text.strip():
return []
# Encode the input text
text_embedding = model.encode([text])
# Compute similarities
similarities = cosine_similarity(text_embedding, KB_EMBEDDINGS)[0]
# Get matches above threshold
matches = []
for idx, sim in enumerate(similarities):
if sim > threshold:
matches.append({
"abbrev": KNOWLEDGE_BASE[idx]["abbrev"],
"full": KNOWLEDGE_BASE[idx]["full"],
"category": KNOWLEDGE_BASE[idx]["category"],
"similarity": float(sim)
})
# Sort by similarity
matches.sort(key=lambda x: x["similarity"], reverse=True)
return matches[:5] # Top 5 matches
def extract_demographics(text):
"""Extract age and gender from text"""
patterns = [
r'(\d+)\s*[yY]/[oO]', # 72 y/o
r'(\d+)\s*[yY][oO]', # 72yo
r'(\d+)\s*[yY]ear', # 72 year
r'(\d+)\s*[mM]ale', # 72 male
r'(\d+)\s*[fF]emale', # 72 female
r'(\d+)\s*[MF]\b', # 72M or 72F
]
age = None
for pattern in patterns:
match = re.search(pattern, text)
if match:
age = match.group(1)
break
gender = None
if re.search(r'\b[mM]ale\b|\b[mM]\b|\bman\b|\bgentleman\b', text):
gender = "Male"
elif re.search(r'\b[fF]emale\b|\b[fF]\b|\bwoman\b|\blady\b', text):
gender = "Female"
result = ""
if age:
result += f"**Age**: {age} years old\n"
if gender:
result += f"**Gender**: {gender}\n"
return result if result else None
def translate_nursing_text(input_text):
"""Main translation function"""
if not input_text.strip():
return "Please enter clinical text to translate."
output = []
output.append("# πŸ“‹ Translation Report\n")
output.append(f"**Original**: _{input_text}_\n")
output.append("---\n")
# Extract demographics
demographics = extract_demographics(input_text)
if demographics:
output.append("## πŸ‘€ Patient Demographics\n")
output.append(demographics)
output.append("")
# Check for NEWS2 scores
news_match = re.search(r'NEWS2?\s*(?:score\s*)?(?:is\s*|of\s*|=\s*)?(\d+)', input_text, re.IGNORECASE)
if news_match:
score = news_match.group(1)
output.append("## ⚠️ Early Warning Score\n")
output.append(interpret_news2(score))
output.append("")
# Tokenize and find abbreviations
words = re.findall(r'\b[\w/]+\b|[?#][\w]*', input_text)
found_terms = []
seen = set()
for word in words:
if word.lower() in seen or len(word) < 2:
continue
seen.add(word.lower())
matches = find_abbreviation_match(word, threshold=0.4)
if matches:
best_match = matches[0]
found_terms.append({
"original": word,
"translation": best_match["full"],
"category": best_match["category"],
"confidence": best_match["similarity"]
})
# Also check multi-word phrases
phrases_to_check = [
"c/o", "y/o", "O/E", "U&E", "?PE", "NOF #"
]
for phrase in phrases_to_check:
if phrase.lower() in input_text.lower():
matches = find_abbreviation_match(phrase, threshold=0.4)
if matches and phrase.lower() not in seen:
seen.add(phrase.lower())
best_match = matches[0]
found_terms.append({
"original": phrase,
"translation": best_match["full"],
"category": best_match["category"],
"confidence": best_match["similarity"]
})
# Sort by confidence
found_terms.sort(key=lambda x: x["confidence"], reverse=True)
if found_terms:
output.append("## πŸ“– Clinical Terms Identified\n")
output.append("| Term | Translation | Category | Confidence |")
output.append("|------|-------------|----------|------------|")
for term in found_terms:
conf_bar = "🟒" if term["confidence"] > 0.7 else ("🟑" if term["confidence"] > 0.5 else "🟠")
output.append(f"| `{term['original']}` | {term['translation']} | {term['category']} | {conf_bar} {term['confidence']:.0%} |")
output.append("")
# Generate formal translation
output.append("## βœ… Formal Translation\n")
formal_text = input_text
for term in found_terms:
# Replace abbreviation with full form
pattern = re.compile(re.escape(term["original"]), re.IGNORECASE)
formal_text = pattern.sub(f"**{term['translation']}**", formal_text, count=1)
output.append(f"> {formal_text}\n")
return "\n".join(output)
def get_abbreviation_list():
"""Return formatted list of abbreviations by category"""
categories = {}
for item in KNOWLEDGE_BASE:
cat = item["category"]
if cat not in categories:
categories[cat] = []
categories[cat].append(f"`{item['abbrev']}` β†’ {item['full']}")
output = ["# πŸ“š NHS Abbreviation Reference\n"]
for cat in sorted(categories.keys()):
output.append(f"## {cat}\n")
output.append("\n".join(categories[cat]))
output.append("")
return "\n".join(output)
# Build the Gradio interface
with gr.Blocks(
title="πŸ₯ Nursing Language Translator",
theme=gr.themes.Soft(primary_hue="blue", secondary_hue="cyan")
) as app:
gr.Markdown("""
# πŸ₯ Nursing Language Translator
**Powered by NurseEmbed-300M** β€” A clinical embedding model trained on NHS nursing terminology.
Translates clinical shorthand, abbreviations, and NEWS2 scores into formal language.
""")
with gr.Tabs():
with gr.Tab("πŸ”„ Translate"):
with gr.Row():
with gr.Column(scale=1):
input_text = gr.Textbox(
label="Clinical Shorthand Input",
placeholder="e.g., 72M, c/o SOB, NEWS2=7, PMH: COPD, ?PE, started LMWH",
lines=4
)
translate_btn = gr.Button("πŸ”„ Translate", variant="primary")
gr.Examples(
examples=[
["72M c/o SOB, NEWS2 score is 7, PMH: COPD, AF. Started on Salbutamol NEB and LMWH."],
["Pt admitted via A&E with ?PE. CXR NAD. ABG shows type 1 resp failure. For CT PA."],
["85F NOF # post-op day 2. Increasing confusion, Temp 38.2. ?UTI vs ?SSI. Sent MSU."],
["54M NSTEMI. ECG: ST depression V3-V6. Troponin elevated. For ECHO and cardiology review."],
["NEWS2 9 - patient deteriorating. RR 28, O2 sats 88% on 4L, HR 120, BP 90/60."]
],
inputs=input_text,
label="Example Clinical Notes"
)
with gr.Column(scale=1):
output_text = gr.Markdown(label="Translation")
translate_btn.click(
fn=translate_nursing_text,
inputs=input_text,
outputs=output_text
)
with gr.Tab("πŸ“š Reference"):
gr.Markdown(get_abbreviation_list())
with gr.Tab("ℹ️ About"):
gr.Markdown("""
## About This Tool
The **Nursing Language Translator** uses **NurseEmbed-300M**, a clinical embedding model
fine-tuned on NHS nursing terminology.
### How It Works
1. **Semantic Matching**: Uses vector embeddings to match abbreviations to their meanings
2. **NEWS2 Interpretation**: Automatically interprets Early Warning Scores
3. **Context-Aware**: Understands clinical context, not just string matching
### Model Details
- **Base Model**: EmbeddingGemma-300M
- **Training Data**: 10,000 medical Q&A pairs + 200 NHS nursing abbreviations
- **Accuracy**: 81.3% Accuracy@1 on medical retrieval
### Author
Created by **Lincoln Gombedza** ([@NurseCitizenDeveloper](https://huggingface.co/NurseCitizenDeveloper))
Part of the **Nursing Citizen Development** movement and **OpenEnv Challenge** submission.
---
**Disclaimer**: This tool is for educational and assistive purposes only.
Always verify clinical information and follow local trust policies.
""")
gr.Markdown("""
---
<center>
🩺 Built with ❀️ for NHS Nurses |
<a href="https://huggingface.co/NurseCitizenDeveloper/NurseEmbed-300M">Model</a> |
<a href="https://github.com/Clinical-Quality-Artifical-Intelligence/nursing-language-translator">GitHub</a>
</center>
""")
if __name__ == "__main__":
app.launch(server_name="0.0.0.0", server_port=7860)