File size: 10,992 Bytes
e4963d6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56d9350
e4963d6
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
"""
πŸ₯ Nursing Language Translator
Translates NHS clinical shorthand to formal language using NurseEmbed-300M
"""
import gradio as gr
import json
import re
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# Load the NurseEmbed model
print("Loading NurseEmbed-300M...")
model = SentenceTransformer("NurseCitizenDeveloper/NurseEmbed-300M")
print("βœ… Model loaded!")

# Load knowledge base
with open("knowledge_base.json", "r") as f:
    KNOWLEDGE_BASE = json.load(f)

# Pre-compute embeddings for all abbreviations
print("Computing knowledge base embeddings...")
KB_TEXTS = [item["abbrev"] for item in KNOWLEDGE_BASE]
KB_EMBEDDINGS = model.encode(KB_TEXTS)
print(f"βœ… {len(KB_TEXTS)} abbreviations indexed!")

# NEWS2 interpretation
NEWS2_THRESHOLDS = {
    (0, 0): ("Low risk", "Routine monitoring", "🟒"),
    (1, 4): ("Low-medium risk", "Increased monitoring frequency", "🟑"),
    (5, 6): ("Medium risk", "Urgent response - inform senior nurse/doctor", "🟠"),
    (7, 20): ("High risk", "Emergency response - immediate senior review, consider critical care", "πŸ”΄")
}

def interpret_news2(score):
    """Interpret NEWS2 score and return clinical action"""
    try:
        score = int(score)
        for (low, high), (risk, action, emoji) in NEWS2_THRESHOLDS.items():
            if low <= score <= high:
                return f"{emoji} **NEWS2 {score}**: {risk}\n   β†’ {action}"
        return f"⚠️ NEWS2 {score}: Invalid score (should be 0-20)"
    except:
        return None

def find_abbreviation_match(text, threshold=0.3):
    """Find matching abbreviations using semantic similarity"""
    if not text.strip():
        return []
    
    # Encode the input text
    text_embedding = model.encode([text])
    
    # Compute similarities
    similarities = cosine_similarity(text_embedding, KB_EMBEDDINGS)[0]
    
    # Get matches above threshold
    matches = []
    for idx, sim in enumerate(similarities):
        if sim > threshold:
            matches.append({
                "abbrev": KNOWLEDGE_BASE[idx]["abbrev"],
                "full": KNOWLEDGE_BASE[idx]["full"],
                "category": KNOWLEDGE_BASE[idx]["category"],
                "similarity": float(sim)
            })
    
    # Sort by similarity
    matches.sort(key=lambda x: x["similarity"], reverse=True)
    return matches[:5]  # Top 5 matches

def extract_demographics(text):
    """Extract age and gender from text"""
    patterns = [
        r'(\d+)\s*[yY]/[oO]',  # 72 y/o
        r'(\d+)\s*[yY][oO]',   # 72yo
        r'(\d+)\s*[yY]ear',    # 72 year
        r'(\d+)\s*[mM]ale',    # 72 male
        r'(\d+)\s*[fF]emale',  # 72 female
        r'(\d+)\s*[MF]\b',     # 72M or 72F
    ]
    
    age = None
    for pattern in patterns:
        match = re.search(pattern, text)
        if match:
            age = match.group(1)
            break
    
    gender = None
    if re.search(r'\b[mM]ale\b|\b[mM]\b|\bman\b|\bgentleman\b', text):
        gender = "Male"
    elif re.search(r'\b[fF]emale\b|\b[fF]\b|\bwoman\b|\blady\b', text):
        gender = "Female"
    
    result = ""
    if age:
        result += f"**Age**: {age} years old\n"
    if gender:
        result += f"**Gender**: {gender}\n"
    
    return result if result else None

def translate_nursing_text(input_text):
    """Main translation function"""
    if not input_text.strip():
        return "Please enter clinical text to translate."
    
    output = []
    output.append("# πŸ“‹ Translation Report\n")
    output.append(f"**Original**: _{input_text}_\n")
    output.append("---\n")
    
    # Extract demographics
    demographics = extract_demographics(input_text)
    if demographics:
        output.append("## πŸ‘€ Patient Demographics\n")
        output.append(demographics)
        output.append("")
    
    # Check for NEWS2 scores
    news_match = re.search(r'NEWS2?\s*(?:score\s*)?(?:is\s*|of\s*|=\s*)?(\d+)', input_text, re.IGNORECASE)
    if news_match:
        score = news_match.group(1)
        output.append("## ⚠️ Early Warning Score\n")
        output.append(interpret_news2(score))
        output.append("")
    
    # Tokenize and find abbreviations
    words = re.findall(r'\b[\w/]+\b|[?#][\w]*', input_text)
    
    found_terms = []
    seen = set()
    
    for word in words:
        if word.lower() in seen or len(word) < 2:
            continue
        seen.add(word.lower())
        
        matches = find_abbreviation_match(word, threshold=0.4)
        if matches:
            best_match = matches[0]
            found_terms.append({
                "original": word,
                "translation": best_match["full"],
                "category": best_match["category"],
                "confidence": best_match["similarity"]
            })
    
    # Also check multi-word phrases
    phrases_to_check = [
        "c/o", "y/o", "O/E", "U&E", "?PE", "NOF #"
    ]
    for phrase in phrases_to_check:
        if phrase.lower() in input_text.lower():
            matches = find_abbreviation_match(phrase, threshold=0.4)
            if matches and phrase.lower() not in seen:
                seen.add(phrase.lower())
                best_match = matches[0]
                found_terms.append({
                    "original": phrase,
                    "translation": best_match["full"],
                    "category": best_match["category"],
                    "confidence": best_match["similarity"]
                })
    
    # Sort by confidence
    found_terms.sort(key=lambda x: x["confidence"], reverse=True)
    
    if found_terms:
        output.append("## πŸ“– Clinical Terms Identified\n")
        output.append("| Term | Translation | Category | Confidence |")
        output.append("|------|-------------|----------|------------|")
        for term in found_terms:
            conf_bar = "🟒" if term["confidence"] > 0.7 else ("🟑" if term["confidence"] > 0.5 else "🟠")
            output.append(f"| `{term['original']}` | {term['translation']} | {term['category']} | {conf_bar} {term['confidence']:.0%} |")
        output.append("")
    
    # Generate formal translation
    output.append("## βœ… Formal Translation\n")
    formal_text = input_text
    for term in found_terms:
        # Replace abbreviation with full form
        pattern = re.compile(re.escape(term["original"]), re.IGNORECASE)
        formal_text = pattern.sub(f"**{term['translation']}**", formal_text, count=1)
    output.append(f"> {formal_text}\n")
    
    return "\n".join(output)


def get_abbreviation_list():
    """Return formatted list of abbreviations by category"""
    categories = {}
    for item in KNOWLEDGE_BASE:
        cat = item["category"]
        if cat not in categories:
            categories[cat] = []
        categories[cat].append(f"`{item['abbrev']}` β†’ {item['full']}")
    
    output = ["# πŸ“š NHS Abbreviation Reference\n"]
    for cat in sorted(categories.keys()):
        output.append(f"## {cat}\n")
        output.append("\n".join(categories[cat]))
        output.append("")
    
    return "\n".join(output)


# Build the Gradio interface
with gr.Blocks(
    title="πŸ₯ Nursing Language Translator",
    theme=gr.themes.Soft(primary_hue="blue", secondary_hue="cyan")
) as app:
    gr.Markdown("""
    # πŸ₯ Nursing Language Translator
    
    **Powered by NurseEmbed-300M** β€” A clinical embedding model trained on NHS nursing terminology.
    
    Translates clinical shorthand, abbreviations, and NEWS2 scores into formal language.
    """)
    
    with gr.Tabs():
        with gr.Tab("πŸ”„ Translate"):
            with gr.Row():
                with gr.Column(scale=1):
                    input_text = gr.Textbox(
                        label="Clinical Shorthand Input",
                        placeholder="e.g., 72M, c/o SOB, NEWS2=7, PMH: COPD, ?PE, started LMWH",
                        lines=4
                    )
                    translate_btn = gr.Button("πŸ”„ Translate", variant="primary")
                    
                    gr.Examples(
                        examples=[
                            ["72M c/o SOB, NEWS2 score is 7, PMH: COPD, AF. Started on Salbutamol NEB and LMWH."],
                            ["Pt admitted via A&E with ?PE. CXR NAD. ABG shows type 1 resp failure. For CT PA."],
                            ["85F NOF # post-op day 2. Increasing confusion, Temp 38.2. ?UTI vs ?SSI. Sent MSU."],
                            ["54M NSTEMI. ECG: ST depression V3-V6. Troponin elevated. For ECHO and cardiology review."],
                            ["NEWS2 9 - patient deteriorating. RR 28, O2 sats 88% on 4L, HR 120, BP 90/60."]
                        ],
                        inputs=input_text,
                        label="Example Clinical Notes"
                    )
                
                with gr.Column(scale=1):
                    output_text = gr.Markdown(label="Translation")
            
            translate_btn.click(
                fn=translate_nursing_text,
                inputs=input_text,
                outputs=output_text
            )
        
        with gr.Tab("πŸ“š Reference"):
            gr.Markdown(get_abbreviation_list())
        
        with gr.Tab("ℹ️ About"):
            gr.Markdown("""
            ## About This Tool
            
            The **Nursing Language Translator** uses **NurseEmbed-300M**, a clinical embedding model 
            fine-tuned on NHS nursing terminology.
            
            ### How It Works
            1. **Semantic Matching**: Uses vector embeddings to match abbreviations to their meanings
            2. **NEWS2 Interpretation**: Automatically interprets Early Warning Scores
            3. **Context-Aware**: Understands clinical context, not just string matching
            
            ### Model Details
            - **Base Model**: EmbeddingGemma-300M
            - **Training Data**: 10,000 medical Q&A pairs + 200 NHS nursing abbreviations
            - **Accuracy**: 81.3% Accuracy@1 on medical retrieval
            
            ### Author
            Created by **Lincoln Gombedza** ([@NurseCitizenDeveloper](https://huggingface.co/NurseCitizenDeveloper))
            
            Part of the **Nursing Citizen Development** movement and **OpenEnv Challenge** submission.
            
            ---
            
            **Disclaimer**: This tool is for educational and assistive purposes only. 
            Always verify clinical information and follow local trust policies.
            """)
    
    gr.Markdown("""
    ---
    <center>
    🩺 Built with ❀️ for NHS Nurses | 
    <a href="https://huggingface.co/NurseCitizenDeveloper/NurseEmbed-300M">Model</a> | 
    <a href="https://github.com/Clinical-Quality-Artifical-Intelligence/nursing-language-translator">GitHub</a>
    </center>
    """)

if __name__ == "__main__":
    app.launch(server_name="0.0.0.0", server_port=7860)