Spaces:

NurseCitizenDeveloper
/

Nursing-Language-Translator

Sleeping

App Files Files Community

Nursing-Language-Translator / app.py

NurseCitizenDeveloper

Fix GitHub link to correct nursing-language-translator repo

56d9350 8 days ago

raw

history blame contribute delete

11 kB

	"""
	🏥 Nursing Language Translator
	Translates NHS clinical shorthand to formal language using NurseEmbed-300M
	"""
	import gradio as gr
	import json
	import re
	from sentence_transformers import SentenceTransformer
	from sklearn.metrics.pairwise import cosine_similarity
	import numpy as np

	# Load the NurseEmbed model
	print("Loading NurseEmbed-300M...")
	model = SentenceTransformer("NurseCitizenDeveloper/NurseEmbed-300M")
	print("✅ Model loaded!")

	# Load knowledge base
	with open("knowledge_base.json", "r") as f:
	KNOWLEDGE_BASE = json.load(f)

	# Pre-compute embeddings for all abbreviations
	print("Computing knowledge base embeddings...")
	KB_TEXTS = [item["abbrev"] for item in KNOWLEDGE_BASE]
	KB_EMBEDDINGS = model.encode(KB_TEXTS)
	print(f"✅ {len(KB_TEXTS)} abbreviations indexed!")

	# NEWS2 interpretation
	NEWS2_THRESHOLDS = {
	(0, 0): ("Low risk", "Routine monitoring", "🟢"),
	(1, 4): ("Low-medium risk", "Increased monitoring frequency", "🟡"),
	(5, 6): ("Medium risk", "Urgent response - inform senior nurse/doctor", "🟠"),
	(7, 20): ("High risk", "Emergency response - immediate senior review, consider critical care", "🔴")
	}

	def interpret_news2(score):
	"""Interpret NEWS2 score and return clinical action"""
	try:
	score = int(score)
	for (low, high), (risk, action, emoji) in NEWS2_THRESHOLDS.items():
	if low <= score <= high:
	return f"{emoji} NEWS2 {score}: {risk}\n → {action}"
	return f"⚠️ NEWS2 {score}: Invalid score (should be 0-20)"
	except:
	return None

	def find_abbreviation_match(text, threshold=0.3):
	"""Find matching abbreviations using semantic similarity"""
	if not text.strip():
	return []

	# Encode the input text
	text_embedding = model.encode([text])

	# Compute similarities
	similarities = cosine_similarity(text_embedding, KB_EMBEDDINGS)[0]

	# Get matches above threshold
	matches = []
	for idx, sim in enumerate(similarities):
	if sim > threshold:
	matches.append({
	"abbrev": KNOWLEDGE_BASE[idx]["abbrev"],
	"full": KNOWLEDGE_BASE[idx]["full"],
	"category": KNOWLEDGE_BASE[idx]["category"],
	"similarity": float(sim)
	})

	# Sort by similarity
	matches.sort(key=lambda x: x["similarity"], reverse=True)
	return matches[:5] # Top 5 matches

	def extract_demographics(text):
	"""Extract age and gender from text"""
	patterns = [
	r'(\d+)\s*[yY]/[oO]', # 72 y/o
	r'(\d+)\s*[yY][oO]', # 72yo
	r'(\d+)\s*[yY]ear', # 72 year
	r'(\d+)\s*[mM]ale', # 72 male
	r'(\d+)\s*[fF]emale', # 72 female
	r'(\d+)\s*[MF]\b', # 72M or 72F
	]

	age = None
	for pattern in patterns:
	match = re.search(pattern, text)
	if match:
	age = match.group(1)
	break

	gender = None
	if re.search(r'\b[mM]ale\b\|\b[mM]\b\|\bman\b\|\bgentleman\b', text):
	gender = "Male"
	elif re.search(r'\b[fF]emale\b\|\b[fF]\b\|\bwoman\b\|\blady\b', text):
	gender = "Female"

	result = ""
	if age:
	result += f"Age: {age} years old\n"
	if gender:
	result += f"Gender: {gender}\n"

	return result if result else None

	def translate_nursing_text(input_text):
	"""Main translation function"""
	if not input_text.strip():
	return "Please enter clinical text to translate."

	output = []
	output.append("# 📋 Translation Report\n")
	output.append(f"Original: _{input_text}_\n")
	output.append("---\n")

	# Extract demographics
	demographics = extract_demographics(input_text)
	if demographics:
	output.append("## 👤 Patient Demographics\n")
	output.append(demographics)
	output.append("")

	# Check for NEWS2 scores
	news_match = re.search(r'NEWS2?\s(?:score\s)?(?:is\s\|of\s\|=\s*)?(\d+)', input_text, re.IGNORECASE)
	if news_match:
	score = news_match.group(1)
	output.append("## ⚠️ Early Warning Score\n")
	output.append(interpret_news2(score))
	output.append("")

	# Tokenize and find abbreviations
	words = re.findall(r'\b[\w/]+\b\|[?#][\w]*', input_text)

	found_terms = []
	seen = set()

	for word in words:
	if word.lower() in seen or len(word) < 2:
	continue
	seen.add(word.lower())

	matches = find_abbreviation_match(word, threshold=0.4)
	if matches:
	best_match = matches[0]
	found_terms.append({
	"original": word,
	"translation": best_match["full"],
	"category": best_match["category"],
	"confidence": best_match["similarity"]
	})

	# Also check multi-word phrases
	phrases_to_check = [
	"c/o", "y/o", "O/E", "U&E", "?PE", "NOF #"
	]
	for phrase in phrases_to_check:
	if phrase.lower() in input_text.lower():
	matches = find_abbreviation_match(phrase, threshold=0.4)
	if matches and phrase.lower() not in seen:
	seen.add(phrase.lower())
	best_match = matches[0]
	found_terms.append({
	"original": phrase,
	"translation": best_match["full"],
	"category": best_match["category"],
	"confidence": best_match["similarity"]
	})

	# Sort by confidence
	found_terms.sort(key=lambda x: x["confidence"], reverse=True)

	if found_terms:
	output.append("## 📖 Clinical Terms Identified\n")
	output.append("\| Term \| Translation \| Category \| Confidence \|")
	output.append("\|------\|-------------\|----------\|------------\|")
	for term in found_terms:
	conf_bar = "🟢" if term["confidence"] > 0.7 else ("🟡" if term["confidence"] > 0.5 else "🟠")
	output.append(f"\| `{term['original']}` \| {term['translation']} \| {term['category']} \| {conf_bar} {term['confidence']:.0%} \|")
	output.append("")

	# Generate formal translation
	output.append("## ✅ Formal Translation\n")
	formal_text = input_text
	for term in found_terms:
	# Replace abbreviation with full form
	pattern = re.compile(re.escape(term["original"]), re.IGNORECASE)
	formal_text = pattern.sub(f"{term['translation']}", formal_text, count=1)
	output.append(f"> {formal_text}\n")

	return "\n".join(output)


	def get_abbreviation_list():
	"""Return formatted list of abbreviations by category"""
	categories = {}
	for item in KNOWLEDGE_BASE:
	cat = item["category"]
	if cat not in categories:
	categories[cat] = []
	categories[cat].append(f"`{item['abbrev']}` → {item['full']}")

	output = ["# 📚 NHS Abbreviation Reference\n"]
	for cat in sorted(categories.keys()):
	output.append(f"## {cat}\n")
	output.append("\n".join(categories[cat]))
	output.append("")

	return "\n".join(output)


	# Build the Gradio interface
	with gr.Blocks(
	title="🏥 Nursing Language Translator",
	theme=gr.themes.Soft(primary_hue="blue", secondary_hue="cyan")
	) as app:
	gr.Markdown("""
	# 🏥 Nursing Language Translator

	Powered by NurseEmbed-300M — A clinical embedding model trained on NHS nursing terminology.

	Translates clinical shorthand, abbreviations, and NEWS2 scores into formal language.
	""")

	with gr.Tabs():
	with gr.Tab("🔄 Translate"):
	with gr.Row():
	with gr.Column(scale=1):
	input_text = gr.Textbox(
	label="Clinical Shorthand Input",
	placeholder="e.g., 72M, c/o SOB, NEWS2=7, PMH: COPD, ?PE, started LMWH",
	lines=4
	)
	translate_btn = gr.Button("🔄 Translate", variant="primary")

	gr.Examples(
	examples=[
	["72M c/o SOB, NEWS2 score is 7, PMH: COPD, AF. Started on Salbutamol NEB and LMWH."],
	["Pt admitted via A&E with ?PE. CXR NAD. ABG shows type 1 resp failure. For CT PA."],
	["85F NOF # post-op day 2. Increasing confusion, Temp 38.2. ?UTI vs ?SSI. Sent MSU."],
	["54M NSTEMI. ECG: ST depression V3-V6. Troponin elevated. For ECHO and cardiology review."],
	["NEWS2 9 - patient deteriorating. RR 28, O2 sats 88% on 4L, HR 120, BP 90/60."]
	],
	inputs=input_text,
	label="Example Clinical Notes"
	)

	with gr.Column(scale=1):
	output_text = gr.Markdown(label="Translation")

	translate_btn.click(
	fn=translate_nursing_text,
	inputs=input_text,
	outputs=output_text
	)

	with gr.Tab("📚 Reference"):
	gr.Markdown(get_abbreviation_list())

	with gr.Tab("ℹ️ About"):
	gr.Markdown("""
	## About This Tool

	The Nursing Language Translator uses NurseEmbed-300M, a clinical embedding model
	fine-tuned on NHS nursing terminology.

	### How It Works
	1. Semantic Matching: Uses vector embeddings to match abbreviations to their meanings
	2. NEWS2 Interpretation: Automatically interprets Early Warning Scores
	3. Context-Aware: Understands clinical context, not just string matching

	### Model Details
	- Base Model: EmbeddingGemma-300M
	- Training Data: 10,000 medical Q&A pairs + 200 NHS nursing abbreviations
	- Accuracy: 81.3% Accuracy@1 on medical retrieval

	### Author
	Created by Lincoln Gombedza ([@NurseCitizenDeveloper](https://huggingface.co/NurseCitizenDeveloper))

	Part of the Nursing Citizen Development movement and OpenEnv Challenge submission.

	---

	Disclaimer: This tool is for educational and assistive purposes only.
	Always verify clinical information and follow local trust policies.
	""")

	gr.Markdown("""
	---
	<center>
	🩺 Built with ❤️ for NHS Nurses \|
	<a href="https://huggingface.co/NurseCitizenDeveloper/NurseEmbed-300M">Model</a> \|
	<a href="https://github.com/Clinical-Quality-Artifical-Intelligence/nursing-language-translator">GitHub</a>
	</center>
	""")

	if __name__ == "__main__":
	app.launch(server_name="0.0.0.0", server_port=7860)