|
|
""" |
|
|
π₯ Nursing Language Translator |
|
|
Translates NHS clinical shorthand to formal language using NurseEmbed-300M |
|
|
""" |
|
|
import gradio as gr |
|
|
import json |
|
|
import re |
|
|
from sentence_transformers import SentenceTransformer |
|
|
from sklearn.metrics.pairwise import cosine_similarity |
|
|
import numpy as np |
|
|
|
|
|
|
|
|
print("Loading NurseEmbed-300M...") |
|
|
model = SentenceTransformer("NurseCitizenDeveloper/NurseEmbed-300M") |
|
|
print("β
Model loaded!") |
|
|
|
|
|
|
|
|
with open("knowledge_base.json", "r") as f: |
|
|
KNOWLEDGE_BASE = json.load(f) |
|
|
|
|
|
|
|
|
print("Computing knowledge base embeddings...") |
|
|
KB_TEXTS = [item["abbrev"] for item in KNOWLEDGE_BASE] |
|
|
KB_EMBEDDINGS = model.encode(KB_TEXTS) |
|
|
print(f"β
{len(KB_TEXTS)} abbreviations indexed!") |
|
|
|
|
|
|
|
|
NEWS2_THRESHOLDS = { |
|
|
(0, 0): ("Low risk", "Routine monitoring", "π’"), |
|
|
(1, 4): ("Low-medium risk", "Increased monitoring frequency", "π‘"), |
|
|
(5, 6): ("Medium risk", "Urgent response - inform senior nurse/doctor", "π "), |
|
|
(7, 20): ("High risk", "Emergency response - immediate senior review, consider critical care", "π΄") |
|
|
} |
|
|
|
|
|
def interpret_news2(score): |
|
|
"""Interpret NEWS2 score and return clinical action""" |
|
|
try: |
|
|
score = int(score) |
|
|
for (low, high), (risk, action, emoji) in NEWS2_THRESHOLDS.items(): |
|
|
if low <= score <= high: |
|
|
return f"{emoji} **NEWS2 {score}**: {risk}\n β {action}" |
|
|
return f"β οΈ NEWS2 {score}: Invalid score (should be 0-20)" |
|
|
except: |
|
|
return None |
|
|
|
|
|
def find_abbreviation_match(text, threshold=0.3): |
|
|
"""Find matching abbreviations using semantic similarity""" |
|
|
if not text.strip(): |
|
|
return [] |
|
|
|
|
|
|
|
|
text_embedding = model.encode([text]) |
|
|
|
|
|
|
|
|
similarities = cosine_similarity(text_embedding, KB_EMBEDDINGS)[0] |
|
|
|
|
|
|
|
|
matches = [] |
|
|
for idx, sim in enumerate(similarities): |
|
|
if sim > threshold: |
|
|
matches.append({ |
|
|
"abbrev": KNOWLEDGE_BASE[idx]["abbrev"], |
|
|
"full": KNOWLEDGE_BASE[idx]["full"], |
|
|
"category": KNOWLEDGE_BASE[idx]["category"], |
|
|
"similarity": float(sim) |
|
|
}) |
|
|
|
|
|
|
|
|
matches.sort(key=lambda x: x["similarity"], reverse=True) |
|
|
return matches[:5] |
|
|
|
|
|
def extract_demographics(text): |
|
|
"""Extract age and gender from text""" |
|
|
patterns = [ |
|
|
r'(\d+)\s*[yY]/[oO]', |
|
|
r'(\d+)\s*[yY][oO]', |
|
|
r'(\d+)\s*[yY]ear', |
|
|
r'(\d+)\s*[mM]ale', |
|
|
r'(\d+)\s*[fF]emale', |
|
|
r'(\d+)\s*[MF]\b', |
|
|
] |
|
|
|
|
|
age = None |
|
|
for pattern in patterns: |
|
|
match = re.search(pattern, text) |
|
|
if match: |
|
|
age = match.group(1) |
|
|
break |
|
|
|
|
|
gender = None |
|
|
if re.search(r'\b[mM]ale\b|\b[mM]\b|\bman\b|\bgentleman\b', text): |
|
|
gender = "Male" |
|
|
elif re.search(r'\b[fF]emale\b|\b[fF]\b|\bwoman\b|\blady\b', text): |
|
|
gender = "Female" |
|
|
|
|
|
result = "" |
|
|
if age: |
|
|
result += f"**Age**: {age} years old\n" |
|
|
if gender: |
|
|
result += f"**Gender**: {gender}\n" |
|
|
|
|
|
return result if result else None |
|
|
|
|
|
def translate_nursing_text(input_text): |
|
|
"""Main translation function""" |
|
|
if not input_text.strip(): |
|
|
return "Please enter clinical text to translate." |
|
|
|
|
|
output = [] |
|
|
output.append("# π Translation Report\n") |
|
|
output.append(f"**Original**: _{input_text}_\n") |
|
|
output.append("---\n") |
|
|
|
|
|
|
|
|
demographics = extract_demographics(input_text) |
|
|
if demographics: |
|
|
output.append("## π€ Patient Demographics\n") |
|
|
output.append(demographics) |
|
|
output.append("") |
|
|
|
|
|
|
|
|
news_match = re.search(r'NEWS2?\s*(?:score\s*)?(?:is\s*|of\s*|=\s*)?(\d+)', input_text, re.IGNORECASE) |
|
|
if news_match: |
|
|
score = news_match.group(1) |
|
|
output.append("## β οΈ Early Warning Score\n") |
|
|
output.append(interpret_news2(score)) |
|
|
output.append("") |
|
|
|
|
|
|
|
|
words = re.findall(r'\b[\w/]+\b|[?#][\w]*', input_text) |
|
|
|
|
|
found_terms = [] |
|
|
seen = set() |
|
|
|
|
|
for word in words: |
|
|
if word.lower() in seen or len(word) < 2: |
|
|
continue |
|
|
seen.add(word.lower()) |
|
|
|
|
|
matches = find_abbreviation_match(word, threshold=0.4) |
|
|
if matches: |
|
|
best_match = matches[0] |
|
|
found_terms.append({ |
|
|
"original": word, |
|
|
"translation": best_match["full"], |
|
|
"category": best_match["category"], |
|
|
"confidence": best_match["similarity"] |
|
|
}) |
|
|
|
|
|
|
|
|
phrases_to_check = [ |
|
|
"c/o", "y/o", "O/E", "U&E", "?PE", "NOF #" |
|
|
] |
|
|
for phrase in phrases_to_check: |
|
|
if phrase.lower() in input_text.lower(): |
|
|
matches = find_abbreviation_match(phrase, threshold=0.4) |
|
|
if matches and phrase.lower() not in seen: |
|
|
seen.add(phrase.lower()) |
|
|
best_match = matches[0] |
|
|
found_terms.append({ |
|
|
"original": phrase, |
|
|
"translation": best_match["full"], |
|
|
"category": best_match["category"], |
|
|
"confidence": best_match["similarity"] |
|
|
}) |
|
|
|
|
|
|
|
|
found_terms.sort(key=lambda x: x["confidence"], reverse=True) |
|
|
|
|
|
if found_terms: |
|
|
output.append("## π Clinical Terms Identified\n") |
|
|
output.append("| Term | Translation | Category | Confidence |") |
|
|
output.append("|------|-------------|----------|------------|") |
|
|
for term in found_terms: |
|
|
conf_bar = "π’" if term["confidence"] > 0.7 else ("π‘" if term["confidence"] > 0.5 else "π ") |
|
|
output.append(f"| `{term['original']}` | {term['translation']} | {term['category']} | {conf_bar} {term['confidence']:.0%} |") |
|
|
output.append("") |
|
|
|
|
|
|
|
|
output.append("## β
Formal Translation\n") |
|
|
formal_text = input_text |
|
|
for term in found_terms: |
|
|
|
|
|
pattern = re.compile(re.escape(term["original"]), re.IGNORECASE) |
|
|
formal_text = pattern.sub(f"**{term['translation']}**", formal_text, count=1) |
|
|
output.append(f"> {formal_text}\n") |
|
|
|
|
|
return "\n".join(output) |
|
|
|
|
|
|
|
|
def get_abbreviation_list(): |
|
|
"""Return formatted list of abbreviations by category""" |
|
|
categories = {} |
|
|
for item in KNOWLEDGE_BASE: |
|
|
cat = item["category"] |
|
|
if cat not in categories: |
|
|
categories[cat] = [] |
|
|
categories[cat].append(f"`{item['abbrev']}` β {item['full']}") |
|
|
|
|
|
output = ["# π NHS Abbreviation Reference\n"] |
|
|
for cat in sorted(categories.keys()): |
|
|
output.append(f"## {cat}\n") |
|
|
output.append("\n".join(categories[cat])) |
|
|
output.append("") |
|
|
|
|
|
return "\n".join(output) |
|
|
|
|
|
|
|
|
|
|
|
with gr.Blocks( |
|
|
title="π₯ Nursing Language Translator", |
|
|
theme=gr.themes.Soft(primary_hue="blue", secondary_hue="cyan") |
|
|
) as app: |
|
|
gr.Markdown(""" |
|
|
# π₯ Nursing Language Translator |
|
|
|
|
|
**Powered by NurseEmbed-300M** β A clinical embedding model trained on NHS nursing terminology. |
|
|
|
|
|
Translates clinical shorthand, abbreviations, and NEWS2 scores into formal language. |
|
|
""") |
|
|
|
|
|
with gr.Tabs(): |
|
|
with gr.Tab("π Translate"): |
|
|
with gr.Row(): |
|
|
with gr.Column(scale=1): |
|
|
input_text = gr.Textbox( |
|
|
label="Clinical Shorthand Input", |
|
|
placeholder="e.g., 72M, c/o SOB, NEWS2=7, PMH: COPD, ?PE, started LMWH", |
|
|
lines=4 |
|
|
) |
|
|
translate_btn = gr.Button("π Translate", variant="primary") |
|
|
|
|
|
gr.Examples( |
|
|
examples=[ |
|
|
["72M c/o SOB, NEWS2 score is 7, PMH: COPD, AF. Started on Salbutamol NEB and LMWH."], |
|
|
["Pt admitted via A&E with ?PE. CXR NAD. ABG shows type 1 resp failure. For CT PA."], |
|
|
["85F NOF # post-op day 2. Increasing confusion, Temp 38.2. ?UTI vs ?SSI. Sent MSU."], |
|
|
["54M NSTEMI. ECG: ST depression V3-V6. Troponin elevated. For ECHO and cardiology review."], |
|
|
["NEWS2 9 - patient deteriorating. RR 28, O2 sats 88% on 4L, HR 120, BP 90/60."] |
|
|
], |
|
|
inputs=input_text, |
|
|
label="Example Clinical Notes" |
|
|
) |
|
|
|
|
|
with gr.Column(scale=1): |
|
|
output_text = gr.Markdown(label="Translation") |
|
|
|
|
|
translate_btn.click( |
|
|
fn=translate_nursing_text, |
|
|
inputs=input_text, |
|
|
outputs=output_text |
|
|
) |
|
|
|
|
|
with gr.Tab("π Reference"): |
|
|
gr.Markdown(get_abbreviation_list()) |
|
|
|
|
|
with gr.Tab("βΉοΈ About"): |
|
|
gr.Markdown(""" |
|
|
## About This Tool |
|
|
|
|
|
The **Nursing Language Translator** uses **NurseEmbed-300M**, a clinical embedding model |
|
|
fine-tuned on NHS nursing terminology. |
|
|
|
|
|
### How It Works |
|
|
1. **Semantic Matching**: Uses vector embeddings to match abbreviations to their meanings |
|
|
2. **NEWS2 Interpretation**: Automatically interprets Early Warning Scores |
|
|
3. **Context-Aware**: Understands clinical context, not just string matching |
|
|
|
|
|
### Model Details |
|
|
- **Base Model**: EmbeddingGemma-300M |
|
|
- **Training Data**: 10,000 medical Q&A pairs + 200 NHS nursing abbreviations |
|
|
- **Accuracy**: 81.3% Accuracy@1 on medical retrieval |
|
|
|
|
|
### Author |
|
|
Created by **Lincoln Gombedza** ([@NurseCitizenDeveloper](https://huggingface.co/NurseCitizenDeveloper)) |
|
|
|
|
|
Part of the **Nursing Citizen Development** movement and **OpenEnv Challenge** submission. |
|
|
|
|
|
--- |
|
|
|
|
|
**Disclaimer**: This tool is for educational and assistive purposes only. |
|
|
Always verify clinical information and follow local trust policies. |
|
|
""") |
|
|
|
|
|
gr.Markdown(""" |
|
|
--- |
|
|
<center> |
|
|
π©Ί Built with β€οΈ for NHS Nurses | |
|
|
<a href="https://huggingface.co/NurseCitizenDeveloper/NurseEmbed-300M">Model</a> | |
|
|
<a href="https://github.com/Clinical-Quality-Artifical-Intelligence/nursing-language-translator">GitHub</a> |
|
|
</center> |
|
|
""") |
|
|
|
|
|
if __name__ == "__main__": |
|
|
app.launch(server_name="0.0.0.0", server_port=7860) |
|
|
|