Spaces:
Sleeping
Sleeping
MakPr016 commited on
Commit ·
6c66675
1
Parent(s): 648d9f3
Analysis includes NPL summary
Browse files- app/lab_processor.py +475 -456
- app/main.py +7 -12
- decrypt_response.py +1 -1
- generate_postman_request.py +1 -1
app/lab_processor.py
CHANGED
|
@@ -1,487 +1,446 @@
|
|
| 1 |
-
"""
|
| 2 |
-
Lab Report Processing with Smart NER + Regex + ClinicalDistilBERT
|
| 3 |
-
Based on your proven local implementation
|
| 4 |
-
"""
|
| 5 |
-
|
| 6 |
import spacy
|
| 7 |
import re
|
| 8 |
-
import
|
| 9 |
import torch
|
| 10 |
from datetime import datetime
|
| 11 |
-
|
| 12 |
-
from collections import defaultdict
|
| 13 |
-
from transformers import AutoTokenizer, AutoModel
|
| 14 |
|
| 15 |
-
REFERENCE_RANGES = {
|
| 16 |
-
"White Blood Cell Count": {"min": 4.0, "max": 11.0, "unit": "x10^9/L"},
|
| 17 |
-
"Red Blood Cell Count": {"min": 4.2, "max": 5.9, "unit": "x10^12/L"},
|
| 18 |
-
"Hemoglobin": {"min": 13.5, "max": 17.5, "unit": "g/dL"},
|
| 19 |
-
"Hematocrit": {"min": 38.3, "max": 48.6, "unit": "%"},
|
| 20 |
-
"Platelet Count": {"min": 150, "max": 450, "unit": "x10^9/L"},
|
| 21 |
-
"Glucose": {"min": 70, "max": 99, "unit": "mg/dL"},
|
| 22 |
-
"Creatinine": {"min": 0.6, "max": 1.2, "unit": "mg/dL"},
|
| 23 |
-
"Urea": {"min": 15, "max": 50, "unit": "mg/dL"},
|
| 24 |
-
"Cholesterol": {"min": 0, "max": 200, "unit": "mg/dL"},
|
| 25 |
-
"Alanine Aminotransferase": {"min": 7, "max": 56, "unit": "U/L"},
|
| 26 |
-
"Aspartate Aminotransferase": {"min": 8, "max": 48, "unit": "U/L"},
|
| 27 |
-
"Alkaline Phosphatase": {"min": 40, "max": 129, "unit": "U/L"},
|
| 28 |
-
"Bilirubin": {"min": 0.3, "max": 1.9, "unit": "mg/dL"},
|
| 29 |
-
"Albumin": {"min": 3.5, "max": 5.5, "unit": "g/dL"},
|
| 30 |
-
"Thyroid Stimulating Hormone": {"min": 0.5, "max": 4.5, "unit": "mIU/L"},
|
| 31 |
-
"Free Thyroxine": {"min": 0.9, "max": 1.7, "unit": "ng/dL"},
|
| 32 |
-
}
|
| 33 |
|
| 34 |
class RadioloLabProcessor:
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
# Metadata fields
|
| 61 |
-
'age', 'gender', 'email', 'sample', 'results', 'verified by',
|
| 62 |
-
'processing', 'details',
|
| 63 |
-
|
| 64 |
-
# Table headers
|
| 65 |
-
'test', 'result', 'unit', 'normal', 'range', 'status',
|
| 66 |
-
'normal range', 'result status',
|
| 67 |
-
|
| 68 |
-
# Section headers
|
| 69 |
-
'hematology', 'biochemistry', 'liver function', 'thyroid function',
|
| 70 |
-
'kidney function', 'lipid profile',
|
| 71 |
-
|
| 72 |
-
# Names (common in reports)
|
| 73 |
-
'john', 'doe', 'johnatan', 'emily', 'johnson', 'dr',
|
| 74 |
-
|
| 75 |
-
# Standalone numbers
|
| 76 |
-
'30', '123', '12345',
|
| 77 |
-
}
|
| 78 |
-
|
| 79 |
-
# Valid lab tests for NER filtering
|
| 80 |
-
self.valid_tests = {
|
| 81 |
-
'white blood cell count', 'wbc', 'red blood cell count', 'rbc',
|
| 82 |
-
'hemoglobin', 'hgb', 'hb', 'hematocrit', 'hct',
|
| 83 |
-
'platelet count', 'platelets', 'plt',
|
| 84 |
-
'mcv', 'mch', 'mchc',
|
| 85 |
-
'glucose', 'glu', 'creatinine', 'urea', 'bun',
|
| 86 |
-
'cholesterol', 'ldl', 'hdl', 'triglycerides',
|
| 87 |
-
'alt', 'ast', 'alp', 'bilirubin', 'albumin',
|
| 88 |
-
'tsh', 'ft4', 'free thyroxine', 'hba1c', 'a1c',
|
| 89 |
-
'sodium', 'potassium', 'calcium', 'chloride',
|
| 90 |
-
'aminotransferase', 'phosphatase',
|
| 91 |
}
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
r
|
| 98 |
-
r
|
| 99 |
-
|
| 100 |
-
r
|
| 101 |
-
r
|
| 102 |
-
r
|
| 103 |
-
r
|
| 104 |
-
r
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
name_lower = name.lower().strip()
|
| 114 |
-
|
| 115 |
-
mapping = {
|
| 116 |
-
'wbc': 'White Blood Cell Count',
|
| 117 |
-
'rbc': 'Red Blood Cell Count',
|
| 118 |
-
'hgb': 'Hemoglobin',
|
| 119 |
-
'hb': 'Hemoglobin',
|
| 120 |
-
'hct': 'Hematocrit',
|
| 121 |
-
'plt': 'Platelet Count',
|
| 122 |
-
'platelets': 'Platelet Count',
|
| 123 |
-
'glu': 'Glucose',
|
| 124 |
-
'alt': 'Alanine Aminotransferase',
|
| 125 |
-
'ast': 'Aspartate Aminotransferase',
|
| 126 |
-
'alp': 'Alkaline Phosphatase',
|
| 127 |
-
'tsh': 'Thyroid Stimulating Hormone',
|
| 128 |
-
'ft4': 'Free Thyroxine',
|
| 129 |
}
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 163 |
}
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 171 |
max_length=512,
|
| 172 |
padding=True,
|
| 173 |
return_token_type_ids=False
|
| 174 |
-
)
|
| 175 |
-
|
| 176 |
with torch.no_grad():
|
| 177 |
-
outputs = self.
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 190 |
"clinical_context_captured": True,
|
| 191 |
"embeddings_generated": True,
|
| 192 |
-
"diseases_detected":
|
| 193 |
-
"status_flags":
|
| 194 |
-
"abnormality_patterns":
|
| 195 |
-
"clinical_relevance_score":
|
| 196 |
}
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
)
|
| 214 |
else:
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
for ent in doc.ents:
|
| 228 |
-
ner_stats[ent.label_] += 1
|
| 229 |
-
|
| 230 |
-
if ent.label_ == 'TEST_NAME':
|
| 231 |
-
ent_lower = ent.text.lower()
|
| 232 |
-
|
| 233 |
-
# Skip if in stopwords
|
| 234 |
-
if ent_lower in self.stopwords:
|
| 235 |
-
continue
|
| 236 |
-
|
| 237 |
-
# Skip if looks like date
|
| 238 |
-
if re.match(r'\d+/\d+/\d+', ent.text):
|
| 239 |
-
continue
|
| 240 |
-
|
| 241 |
-
# Skip if just numbers
|
| 242 |
-
if re.match(r'^\d+$', ent.text):
|
| 243 |
-
continue
|
| 244 |
-
|
| 245 |
-
# Skip if already extracted by regex
|
| 246 |
-
if ent_lower in extracted_test_names:
|
| 247 |
-
continue
|
| 248 |
-
|
| 249 |
-
# Only add if contains valid medical keywords
|
| 250 |
-
if any(keyword in ent_lower for keyword in self.valid_tests):
|
| 251 |
-
additional_tests.append({
|
| 252 |
-
'testname': ent.text,
|
| 253 |
-
'value': None,
|
| 254 |
-
'unit': None,
|
| 255 |
-
'source': 'ner'
|
| 256 |
-
})
|
| 257 |
-
|
| 258 |
-
elif ent.label_ == 'DISEASE':
|
| 259 |
-
if ent.text.lower() not in self.stopwords:
|
| 260 |
-
diseases.add(ent.text)
|
| 261 |
-
|
| 262 |
-
elif ent.label_ == 'INTERPRETATION':
|
| 263 |
-
interpretations.add(ent.text)
|
| 264 |
-
|
| 265 |
-
return additional_tests, diseases, interpretations, ner_stats
|
| 266 |
-
|
| 267 |
-
def extract_and_format(self, text: str, report_id: str = None, patient_id: str = None) -> Dict:
|
| 268 |
-
"""Smart extraction using hybrid approach"""
|
| 269 |
-
start_time = time.time()
|
| 270 |
-
|
| 271 |
-
raw_tests = []
|
| 272 |
-
seen_tests = set()
|
| 273 |
-
|
| 274 |
-
# Step 1: Regex extraction (most reliable for structured data)
|
| 275 |
-
for match in self.lab_value_pattern.finditer(text):
|
| 276 |
-
test_name = self._normalize_test_name(match.group(1).strip())
|
| 277 |
-
try:
|
| 278 |
-
value = float(match.group(2))
|
| 279 |
-
unit = match.group(3) if match.group(3) else None
|
| 280 |
-
|
| 281 |
-
test_key = (test_name.lower(), value)
|
| 282 |
-
if test_key not in seen_tests:
|
| 283 |
-
raw_tests.append({
|
| 284 |
-
'testname': test_name,
|
| 285 |
-
'value': value,
|
| 286 |
-
'unit': unit,
|
| 287 |
-
'source': 'regex'
|
| 288 |
-
})
|
| 289 |
-
seen_tests.add(test_key)
|
| 290 |
-
except:
|
| 291 |
-
continue
|
| 292 |
-
|
| 293 |
-
extracted_test_names = {t['testname'].lower() for t in raw_tests}
|
| 294 |
-
|
| 295 |
-
# Step 2: Smart NER extraction with filtering
|
| 296 |
-
doc = self.nlp(text)
|
| 297 |
-
additional_tests, diseases, interpretations, ner_stats = self._smart_ner_extraction(
|
| 298 |
-
doc, extracted_test_names
|
| 299 |
-
)
|
| 300 |
-
|
| 301 |
-
# Extract status flags from text
|
| 302 |
-
for match in self.status_pattern.finditer(text):
|
| 303 |
-
context = text[max(0, match.start()-10):match.end()+10]
|
| 304 |
-
if 'Range' not in context: # Avoid "Normal Range"
|
| 305 |
-
interpretations.add(match.group(1))
|
| 306 |
-
|
| 307 |
-
# Collect entities for output
|
| 308 |
-
entities_for_output = []
|
| 309 |
-
for ent in doc.ents:
|
| 310 |
-
entities_for_output.append({
|
| 311 |
-
"text": ent.text,
|
| 312 |
-
"label": ent.label_,
|
| 313 |
-
"start_char": ent.start_char,
|
| 314 |
-
"end_char": ent.end_char,
|
| 315 |
-
"confidence": 0.92
|
| 316 |
-
})
|
| 317 |
-
|
| 318 |
-
# Step 3: Build test results with reference ranges
|
| 319 |
-
test_results = []
|
| 320 |
-
abnormal_results = []
|
| 321 |
-
|
| 322 |
-
for test in raw_tests:
|
| 323 |
-
test_name = test['testname']
|
| 324 |
-
value = test['value']
|
| 325 |
-
unit = test['unit']
|
| 326 |
-
|
| 327 |
-
ref_range = REFERENCE_RANGES.get(test_name, {})
|
| 328 |
-
status_info = self._calculate_status(test_name, value)
|
| 329 |
-
|
| 330 |
-
test_result = {
|
| 331 |
-
"test_name": test_name,
|
| 332 |
-
"value": value,
|
| 333 |
-
"unit": unit or ref_range.get('unit', ''),
|
| 334 |
-
"reference_range": {
|
| 335 |
-
"min": ref_range.get('min'),
|
| 336 |
-
"max": ref_range.get('max'),
|
| 337 |
-
"unit": ref_range.get('unit', unit or '')
|
| 338 |
-
} if ref_range else None,
|
| 339 |
-
"status": status_info['status'],
|
| 340 |
-
"deviation_percentage": status_info['deviation_percentage'],
|
| 341 |
-
"clinical_significance": status_info['clinical_significance'],
|
| 342 |
-
"trend": None,
|
| 343 |
-
"source": test['source']
|
| 344 |
-
}
|
| 345 |
-
|
| 346 |
-
test_results.append(test_result)
|
| 347 |
-
|
| 348 |
-
if status_info['status'] in ['low', 'high', 'critical_low', 'critical_high']:
|
| 349 |
-
severity = "critical" if 'critical' in status_info['status'] else "moderate"
|
| 350 |
-
abnormal_results.append({
|
| 351 |
-
"test_name": test_name,
|
| 352 |
-
"severity": severity,
|
| 353 |
-
"requires_attention": True
|
| 354 |
-
})
|
| 355 |
-
|
| 356 |
-
# Step 4: Generate summaries and insights
|
| 357 |
-
ai_summary = self._generate_summary(test_results, abnormal_results)
|
| 358 |
-
test_panels = self._group_into_panels(test_results)
|
| 359 |
-
visualization_data = self._generate_visualization_data(test_results)
|
| 360 |
-
|
| 361 |
-
# Step 5: Generate clinical insights with ClinicalDistilBERT
|
| 362 |
-
clinical_insights = self._generate_clinical_insights(
|
| 363 |
-
text, abnormal_results, diseases, interpretations
|
| 364 |
-
)
|
| 365 |
-
|
| 366 |
-
processing_time = int((time.time() - start_time) * 1000)
|
| 367 |
-
|
| 368 |
-
return {
|
| 369 |
-
"report_id": report_id or f"rep_{int(time.time())}",
|
| 370 |
-
"report_type": "laboratory",
|
| 371 |
-
"processing_time_ms": processing_time,
|
| 372 |
-
|
| 373 |
-
"classification": {
|
| 374 |
-
"test_category": self._determine_category(test_results),
|
| 375 |
-
"sub_category": "complete_blood_count",
|
| 376 |
-
"urgency_level": "critical" if any(r['severity'] == 'critical' for r in abnormal_results) else "abnormal" if abnormal_results else "routine",
|
| 377 |
-
"confidence": 0.96
|
| 378 |
},
|
| 379 |
-
|
| 380 |
-
|
| 381 |
-
"
|
| 382 |
-
"
|
| 383 |
-
"diseases_detected": len(diseases),
|
| 384 |
-
"interpretations_found": len(interpretations),
|
| 385 |
-
"ner_model_stats": dict(ner_stats)
|
| 386 |
},
|
| 387 |
-
|
| 388 |
-
|
| 389 |
-
|
| 390 |
-
|
| 391 |
-
|
| 392 |
-
"
|
| 393 |
-
|
| 394 |
-
|
| 395 |
-
|
| 396 |
-
|
| 397 |
-
|
| 398 |
-
"
|
| 399 |
-
"
|
| 400 |
-
"
|
| 401 |
-
|
| 402 |
-
|
| 403 |
-
|
| 404 |
-
|
| 405 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 406 |
}
|
| 407 |
}
|
| 408 |
-
|
| 409 |
-
|
| 410 |
-
|
| 411 |
-
|
| 412 |
-
|
| 413 |
-
|
| 414 |
-
|
| 415 |
-
|
| 416 |
-
|
| 417 |
-
|
| 418 |
-
|
| 419 |
-
|
| 420 |
-
|
| 421 |
-
|
| 422 |
-
|
| 423 |
-
|
| 424 |
-
|
| 425 |
-
|
| 426 |
-
|
| 427 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 428 |
else:
|
| 429 |
-
|
| 430 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 431 |
"Correlate with clinical symptoms",
|
| 432 |
"Consider follow-up testing if symptoms persist",
|
| 433 |
"Consult with healthcare provider for interpretation"
|
| 434 |
]
|
| 435 |
-
|
| 436 |
-
key_abnormalities = []
|
| 437 |
-
for result in abnormal_results:
|
| 438 |
-
test_detail = next((t for t in test_results if t['test_name'] == result['test_name']), None)
|
| 439 |
-
if test_detail:
|
| 440 |
-
key_abnormalities.append(
|
| 441 |
-
f"{result['test_name']}: {test_detail['clinical_significance']}"
|
| 442 |
-
)
|
| 443 |
-
|
| 444 |
-
return {
|
| 445 |
-
"overall_assessment": overall,
|
| 446 |
-
"key_abnormalities": key_abnormalities,
|
| 447 |
-
"normal_parameters": normal_tests,
|
| 448 |
-
"recommendations": recommendations
|
| 449 |
}
|
| 450 |
-
|
| 451 |
-
|
| 452 |
-
|
| 453 |
-
|
| 454 |
-
|
| 455 |
-
|
| 456 |
-
|
| 457 |
-
|
| 458 |
-
for
|
| 459 |
-
|
| 460 |
-
|
| 461 |
-
|
| 462 |
-
|
| 463 |
-
|
| 464 |
-
|
| 465 |
-
|
| 466 |
-
|
| 467 |
-
|
| 468 |
-
|
| 469 |
-
|
| 470 |
-
|
| 471 |
-
|
| 472 |
-
|
| 473 |
-
"
|
| 474 |
-
"
|
| 475 |
-
"
|
| 476 |
-
"
|
| 477 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 478 |
})
|
| 479 |
-
|
| 480 |
-
return panel_list
|
| 481 |
-
|
| 482 |
-
def _generate_visualization_data(self, test_results: List[Dict]) -> Dict:
|
| 483 |
chart_data = []
|
| 484 |
-
|
| 485 |
for test in test_results:
|
| 486 |
if test['reference_range']:
|
| 487 |
chart_data.append({
|
|
@@ -490,8 +449,8 @@ class RadioloLabProcessor:
|
|
| 490 |
"ref_min": test['reference_range']['min'],
|
| 491 |
"ref_max": test['reference_range']['max']
|
| 492 |
})
|
| 493 |
-
|
| 494 |
-
|
| 495 |
"charts": [{
|
| 496 |
"chart_type": "bar",
|
| 497 |
"title": "Lab Results vs Reference Range",
|
|
@@ -499,3 +458,63 @@ class RadioloLabProcessor:
|
|
| 499 |
}],
|
| 500 |
"trend_data": []
|
| 501 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import spacy
|
| 2 |
import re
|
| 3 |
+
from transformers import AutoTokenizer, AutoModel
|
| 4 |
import torch
|
| 5 |
from datetime import datetime
|
| 6 |
+
import time
|
|
|
|
|
|
|
| 7 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
class RadioloLabProcessor:
|
| 10 |
+
def __init__(self, model_path: str):
|
| 11 |
+
self.nlp = spacy.load(model_path)
|
| 12 |
+
self.clinical_bert_tokenizer = AutoTokenizer.from_pretrained(
|
| 13 |
+
"nlpie/clinical-distilbert")
|
| 14 |
+
self.clinical_bert_model = AutoModel.from_pretrained(
|
| 15 |
+
"nlpie/clinical-distilbert")
|
| 16 |
+
|
| 17 |
+
self.lab_tests = {
|
| 18 |
+
"White Blood Cell Count": {"unit": "x10^9/L", "min": 4.0, "max": 11.0},
|
| 19 |
+
"Red Blood Cell Count": {"unit": "x10^12/L", "min": 4.2, "max": 5.9},
|
| 20 |
+
"Hemoglobin": {"unit": "g/dL", "min": 13.5, "max": 17.5},
|
| 21 |
+
"Hematocrit": {"unit": "%", "min": 38.3, "max": 48.6},
|
| 22 |
+
"Platelet Count": {"unit": "x10^9/L", "min": 150, "max": 450},
|
| 23 |
+
"Glucose": {"unit": "mg/dL", "min": 70, "max": 99},
|
| 24 |
+
"Creatinine": {"unit": "mg/dL", "min": 0.6, "max": 1.2},
|
| 25 |
+
"Urea": {"unit": "mg/dL", "min": 15, "max": 50},
|
| 26 |
+
"Cholesterol": {"unit": "mg/dL", "min": 0, "max": 200},
|
| 27 |
+
"ALT": {"unit": "U/L", "min": 7, "max": 56},
|
| 28 |
+
"AST": {"unit": "U/L", "min": 10, "max": 40},
|
| 29 |
+
"ALP": {"unit": "U/L", "min": 44, "max": 147},
|
| 30 |
+
"Bilirubin": {"unit": "mg/dL", "min": 0.3, "max": 1.9},
|
| 31 |
+
"Albumin": {"unit": "g/dL", "min": 3.5, "max": 5.5},
|
| 32 |
+
"Thyroid Stimulating Hormone": {"unit": "mIU/L", "min": 0.5, "max": 4.5},
|
| 33 |
+
"Free T4": {"unit": "ng/dL", "min": 0.8, "max": 1.8}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
}
|
| 35 |
+
|
| 36 |
+
def extract_with_regex(self, text: str) -> dict:
|
| 37 |
+
test_results = []
|
| 38 |
+
|
| 39 |
+
patterns = {
|
| 40 |
+
"White Blood Cell Count": r"White Blood Cell Count[:\s]+(\d+\.?\d*)\s*(x10\^9/L)",
|
| 41 |
+
"Red Blood Cell Count": r"Red Blood Cell Count[:\s]+(\d+\.?\d*)\s*(x10\^12/L)",
|
| 42 |
+
"Hemoglobin": r"Hemoglobin[:\s]+(\d+\.?\d*)\s*(g/dL)",
|
| 43 |
+
"Hematocrit": r"Hematocrit[:\s]+(\d+\.?\d*)\s*(%)",
|
| 44 |
+
"Platelet Count": r"Platelet Count[:\s]+(\d+\.?\d*)\s*(x10\^9/L)",
|
| 45 |
+
"Glucose": r"Glucose[:\s]+(\d+\.?\d*)\s*(mg/dL)",
|
| 46 |
+
"Creatinine": r"Creatinine[:\s]+(\d+\.?\d*)\s*(mg/dL)",
|
| 47 |
+
"Urea": r"Urea[:\s]+(\d+\.?\d*)\s*(mg/dL)",
|
| 48 |
+
"Cholesterol": r"Cholesterol[:\s]+(\d+\.?\d*)\s*(mg/dL)",
|
| 49 |
+
"ALT": r"ALT[:\s]+(\d+\.?\d*)\s*(U/L)",
|
| 50 |
+
"AST": r"AST[:\s]+(\d+\.?\d*)\s*(U/L)",
|
| 51 |
+
"ALP": r"ALP[:\s]+(\d+\.?\d*)\s*(U/L)",
|
| 52 |
+
"Bilirubin": r"Bilirubin[:\s]+(\d+\.?\d*)\s*(mg/dL)",
|
| 53 |
+
"Albumin": r"Albumin[:\s]+(\d+\.?\d*)\s*(g/dL)",
|
| 54 |
+
"Thyroid Stimulating Hormone": r"Thyroid Stimulating Hormone[:\s]+(\d+\.?\d*)\s*(mIU/L)",
|
| 55 |
+
"Free T4": r"Free T4[:\s]+(\d+\.?\d*)\s*(ng/dL)"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
}
|
| 57 |
+
|
| 58 |
+
for test_name, pattern in patterns.items():
|
| 59 |
+
match = re.search(pattern, text, re.IGNORECASE)
|
| 60 |
+
if match:
|
| 61 |
+
value = float(match.group(1))
|
| 62 |
+
unit = match.group(2)
|
| 63 |
+
|
| 64 |
+
if test_name in self.lab_tests:
|
| 65 |
+
ref_range = self.lab_tests[test_name]
|
| 66 |
+
status = "normal"
|
| 67 |
+
deviation = 0.0
|
| 68 |
+
|
| 69 |
+
if value < ref_range["min"]:
|
| 70 |
+
deviation = (
|
| 71 |
+
(ref_range["min"] - value) / ref_range["min"]) * 100
|
| 72 |
+
status = "critical_low" if deviation > 20 else "low"
|
| 73 |
+
elif value > ref_range["max"]:
|
| 74 |
+
deviation = (
|
| 75 |
+
(value - ref_range["max"]) / ref_range["max"]) * 100
|
| 76 |
+
status = "critical_high" if deviation > 20 else "high"
|
| 77 |
+
|
| 78 |
+
clinical_sig = "Within normal limits"
|
| 79 |
+
if status != "normal":
|
| 80 |
+
direction = "↑" if "high" in status else "↓"
|
| 81 |
+
clinical_sig = f"{'Above' if 'high' in status else 'Below'} normal range ({direction}{deviation:.1f}%)"
|
| 82 |
+
|
| 83 |
+
test_results.append({
|
| 84 |
+
"test_name": test_name,
|
| 85 |
+
"value": value,
|
| 86 |
+
"unit": unit,
|
| 87 |
+
"reference_range": {
|
| 88 |
+
"min": ref_range["min"],
|
| 89 |
+
"max": ref_range["max"],
|
| 90 |
+
"unit": ref_range["unit"]
|
| 91 |
+
},
|
| 92 |
+
"status": status,
|
| 93 |
+
"deviation_percentage": deviation,
|
| 94 |
+
"clinical_significance": clinical_sig,
|
| 95 |
+
"trend": None,
|
| 96 |
+
"source": "regex"
|
| 97 |
+
})
|
| 98 |
+
|
| 99 |
+
return {"test_results": test_results}
|
| 100 |
+
|
| 101 |
+
def extract_with_ner(self, text: str) -> dict:
|
| 102 |
+
doc = self.nlp(text)
|
| 103 |
+
|
| 104 |
+
invalid_test_names = {
|
| 105 |
+
'hemolab', 'central', 'health', 'laboratory', 'medicity', 'wellbeing',
|
| 106 |
+
'healthland', 'age', 'gender', 'email', 'male', 'sample', 'results',
|
| 107 |
+
'verified by', 'dr', 'emily', 'johnson', 'normal', 'elevated', 'johnatan',
|
| 108 |
+
'doe', 'page', 'blood test', 'hematology', 'processing details'
|
| 109 |
}
|
| 110 |
+
|
| 111 |
+
entities = []
|
| 112 |
+
for ent in doc.ents:
|
| 113 |
+
if ent.label_ == "TEST_NAME":
|
| 114 |
+
if ent.text.lower() not in invalid_test_names and len(ent.text) > 2:
|
| 115 |
+
entities.append({
|
| 116 |
+
"text": ent.text,
|
| 117 |
+
"label": ent.label_,
|
| 118 |
+
"start_char": ent.start_char,
|
| 119 |
+
"end_char": ent.end_char,
|
| 120 |
+
"confidence": 0.92
|
| 121 |
+
})
|
| 122 |
+
elif ent.label_ in ["TEST_VALUE", "TEST_UNIT", "MedicalCondition"]:
|
| 123 |
+
entities.append({
|
| 124 |
+
"text": ent.text,
|
| 125 |
+
"label": ent.label_,
|
| 126 |
+
"start_char": ent.start_char,
|
| 127 |
+
"end_char": ent.end_char,
|
| 128 |
+
"confidence": 0.92
|
| 129 |
+
})
|
| 130 |
+
|
| 131 |
+
return {"entities": entities}
|
| 132 |
+
|
| 133 |
+
def get_clinical_bert_embeddings(self, text: str):
|
| 134 |
+
inputs = self.clinical_bert_tokenizer(
|
| 135 |
+
text,
|
| 136 |
+
return_tensors="pt",
|
| 137 |
+
truncation=True,
|
| 138 |
max_length=512,
|
| 139 |
padding=True,
|
| 140 |
return_token_type_ids=False
|
| 141 |
+
)
|
| 142 |
+
|
| 143 |
with torch.no_grad():
|
| 144 |
+
outputs = self.clinical_bert_model(**inputs)
|
| 145 |
+
|
| 146 |
+
embeddings = outputs.last_hidden_state.mean(dim=1).squeeze().numpy()
|
| 147 |
+
|
| 148 |
+
return embeddings.tolist()
|
| 149 |
+
|
| 150 |
+
def analyze_with_clinical_bert(self, text: str, test_results: list):
|
| 151 |
+
embeddings = self.get_clinical_bert_embeddings(text)
|
| 152 |
+
|
| 153 |
+
diseases_detected = []
|
| 154 |
+
status_flags = []
|
| 155 |
+
|
| 156 |
+
abnormal_tests = [t for t in test_results if t['status'] != 'normal']
|
| 157 |
+
|
| 158 |
+
if any('glucose' in t['test_name'].lower() and 'high' in t['status'] for t in abnormal_tests):
|
| 159 |
+
diseases_detected.append("Potential Diabetes")
|
| 160 |
+
|
| 161 |
+
if any('cholesterol' in t['test_name'].lower() and 'high' in t['status'] for t in abnormal_tests):
|
| 162 |
+
diseases_detected.append("Dyslipidemia")
|
| 163 |
+
|
| 164 |
+
for test in test_results:
|
| 165 |
+
if test['status'] != 'normal' and test['status'] not in [s.lower() for s in status_flags]:
|
| 166 |
+
status_flags.append(test['status'].replace('_', ' ').title())
|
| 167 |
+
|
| 168 |
+
if not status_flags:
|
| 169 |
+
status_flags = ["Normal"]
|
| 170 |
+
|
| 171 |
+
abnormality_patterns = []
|
| 172 |
+
critical_count = len(
|
| 173 |
+
[t for t in test_results if 'critical' in t['status']])
|
| 174 |
+
abnormal_count = len(abnormal_tests)
|
| 175 |
+
|
| 176 |
+
if abnormal_count > 0:
|
| 177 |
+
abnormality_patterns.append(
|
| 178 |
+
f"Detected {abnormal_count} abnormal parameter(s)")
|
| 179 |
+
if critical_count > 0:
|
| 180 |
+
abnormality_patterns.append(
|
| 181 |
+
f"{critical_count} critical finding(s) require immediate attention")
|
| 182 |
+
|
| 183 |
+
clinical_relevance = min(
|
| 184 |
+
100, (abnormal_count / len(test_results)) * 100) if test_results else 0
|
| 185 |
+
|
| 186 |
+
return {
|
| 187 |
+
"embedding_dimension": len(embeddings),
|
| 188 |
"clinical_context_captured": True,
|
| 189 |
"embeddings_generated": True,
|
| 190 |
+
"diseases_detected": diseases_detected,
|
| 191 |
+
"status_flags": status_flags,
|
| 192 |
+
"abnormality_patterns": abnormality_patterns,
|
| 193 |
+
"clinical_relevance_score": round(clinical_relevance, 1)
|
| 194 |
}
|
| 195 |
+
|
| 196 |
+
def generate_patient_summary(self, test_results: list, abnormal_results: list) -> dict:
|
| 197 |
+
normal_count = len(
|
| 198 |
+
[t for t in test_results if t['status'] == 'normal'])
|
| 199 |
+
total_tests = len(test_results)
|
| 200 |
+
abnormal_count = len(abnormal_results)
|
| 201 |
+
|
| 202 |
+
critical_count = len(
|
| 203 |
+
[a for a in abnormal_results if a['severity'] == 'critical'])
|
| 204 |
+
|
| 205 |
+
if critical_count > 0:
|
| 206 |
+
overall_status = "⚠️ URGENT - IMMEDIATE ATTENTION NEEDED"
|
| 207 |
+
explanation = f"Your lab results show {critical_count} critical finding(s) that require immediate medical attention. Please consult your doctor as soon as possible."
|
| 208 |
+
elif abnormal_count > 0:
|
| 209 |
+
overall_status = "⚠️ ABNORMALITIES DETECTED"
|
| 210 |
+
explanation = f"Your lab results show {abnormal_count} test(s) outside normal range. While not immediately critical, these findings should be discussed with your healthcare provider."
|
|
|
|
| 211 |
else:
|
| 212 |
+
overall_status = "✅ ALL TESTS NORMAL"
|
| 213 |
+
explanation = f"Great news! All {total_tests} lab tests are within normal ranges. Your results indicate good health in the tested parameters."
|
| 214 |
+
|
| 215 |
+
key_findings = []
|
| 216 |
+
areas_of_concern = []
|
| 217 |
+
|
| 218 |
+
test_explanations = {
|
| 219 |
+
"White Blood Cell Count": {
|
| 220 |
+
"normal": "Your immune system is functioning properly",
|
| 221 |
+
"high": "Your body may be fighting an infection or inflammation",
|
| 222 |
+
"low": "Your immune system may be weakened"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 223 |
},
|
| 224 |
+
"Red Blood Cell Count": {
|
| 225 |
+
"normal": "Your blood is carrying oxygen efficiently",
|
| 226 |
+
"high": "You may have dehydration or a blood disorder requiring evaluation",
|
| 227 |
+
"low": "You may have anemia, causing fatigue and weakness"
|
|
|
|
|
|
|
|
|
|
| 228 |
},
|
| 229 |
+
"Hemoglobin": {
|
| 230 |
+
"normal": "Your blood oxygen levels are healthy",
|
| 231 |
+
"high": "May indicate dehydration or lung problems",
|
| 232 |
+
"low": "You may be anemic - your blood isn't carrying enough oxygen"
|
| 233 |
+
},
|
| 234 |
+
"Hematocrit": {
|
| 235 |
+
"normal": "Blood volume and red blood cell ratio is normal",
|
| 236 |
+
"high": "May indicate dehydration",
|
| 237 |
+
"low": "May indicate anemia or blood loss"
|
| 238 |
+
},
|
| 239 |
+
"Platelet Count": {
|
| 240 |
+
"normal": "Your blood clotting ability is normal",
|
| 241 |
+
"high": "Increased risk of blood clots",
|
| 242 |
+
"low": "Increased risk of bleeding"
|
| 243 |
+
},
|
| 244 |
+
"Glucose": {
|
| 245 |
+
"normal": "Your blood sugar levels are well controlled",
|
| 246 |
+
"high": "Your blood sugar is elevated - may indicate diabetes or prediabetes",
|
| 247 |
+
"low": "Your blood sugar is low - may cause dizziness and weakness"
|
| 248 |
+
},
|
| 249 |
+
"Cholesterol": {
|
| 250 |
+
"normal": "Your cholesterol levels are healthy for your heart",
|
| 251 |
+
"high": "Elevated cholesterol increases heart disease risk",
|
| 252 |
+
"low": "Unusually low cholesterol"
|
| 253 |
+
},
|
| 254 |
+
"Creatinine": {
|
| 255 |
+
"normal": "Your kidneys are filtering waste properly",
|
| 256 |
+
"high": "Your kidneys may not be working optimally",
|
| 257 |
+
"low": "May indicate low muscle mass"
|
| 258 |
+
},
|
| 259 |
+
"Urea": {
|
| 260 |
+
"normal": "Kidney function is normal",
|
| 261 |
+
"high": "May indicate kidney problems or dehydration",
|
| 262 |
+
"low": "May indicate liver problems"
|
| 263 |
+
},
|
| 264 |
+
"ALT": {
|
| 265 |
+
"normal": "Your liver is functioning normally",
|
| 266 |
+
"high": "Your liver may be inflamed or damaged",
|
| 267 |
+
"low": "Generally not concerning"
|
| 268 |
+
},
|
| 269 |
+
"AST": {
|
| 270 |
+
"normal": "Liver and heart function appear normal",
|
| 271 |
+
"high": "May indicate liver or heart problems",
|
| 272 |
+
"low": "Generally not concerning"
|
| 273 |
+
},
|
| 274 |
+
"Bilirubin": {
|
| 275 |
+
"normal": "Liver is processing waste products normally",
|
| 276 |
+
"high": "May cause jaundice - liver may not be functioning properly",
|
| 277 |
+
"low": "Generally not concerning"
|
| 278 |
+
},
|
| 279 |
+
"Albumin": {
|
| 280 |
+
"normal": "Good protein levels and liver function",
|
| 281 |
+
"high": "May indicate dehydration",
|
| 282 |
+
"low": "May indicate liver or kidney disease"
|
| 283 |
+
},
|
| 284 |
+
"Thyroid Stimulating Hormone": {
|
| 285 |
+
"normal": "Your thyroid hormone levels are balanced",
|
| 286 |
+
"high": "Your thyroid may be underactive (hypothyroidism)",
|
| 287 |
+
"low": "Your thyroid may be overactive (hyperthyroidism)"
|
| 288 |
+
},
|
| 289 |
+
"Free T4": {
|
| 290 |
+
"normal": "Thyroid hormone levels are appropriate",
|
| 291 |
+
"high": "May indicate hyperthyroidism",
|
| 292 |
+
"low": "May indicate hypothyroidism"
|
| 293 |
}
|
| 294 |
}
|
| 295 |
+
|
| 296 |
+
for test in test_results[:10]:
|
| 297 |
+
test_name = test['test_name']
|
| 298 |
+
status = test['status']
|
| 299 |
+
|
| 300 |
+
for key in test_explanations:
|
| 301 |
+
if key.lower() in test_name.lower():
|
| 302 |
+
if status == 'normal':
|
| 303 |
+
key_findings.append({
|
| 304 |
+
"finding": f"{test_name}: {test['value']} {test['unit']}",
|
| 305 |
+
"explanation": test_explanations[key].get('normal', 'Within normal range')
|
| 306 |
+
})
|
| 307 |
+
elif 'high' in status.lower():
|
| 308 |
+
areas_of_concern.append({
|
| 309 |
+
"finding": f"{test_name}: {test['value']} {test['unit']} (HIGH)",
|
| 310 |
+
"explanation": test_explanations[key].get('high', 'Above normal range'),
|
| 311 |
+
"severity": "critical" if "critical" in status else "moderate"
|
| 312 |
+
})
|
| 313 |
+
elif 'low' in status.lower():
|
| 314 |
+
areas_of_concern.append({
|
| 315 |
+
"finding": f"{test_name}: {test['value']} {test['unit']} (LOW)",
|
| 316 |
+
"explanation": test_explanations[key].get('low', 'Below normal range'),
|
| 317 |
+
"severity": "critical" if "critical" in status else "moderate"
|
| 318 |
+
})
|
| 319 |
+
break
|
| 320 |
+
|
| 321 |
+
next_steps = []
|
| 322 |
+
if critical_count > 0:
|
| 323 |
+
next_steps = [
|
| 324 |
+
"Contact your doctor immediately",
|
| 325 |
+
"Do not delay medical consultation",
|
| 326 |
+
"Bring these results to your healthcare provider",
|
| 327 |
+
"Follow your doctor's treatment recommendations"
|
| 328 |
+
]
|
| 329 |
+
elif abnormal_count > 0:
|
| 330 |
+
next_steps = [
|
| 331 |
+
"Schedule an appointment with your doctor within the next few days",
|
| 332 |
+
"Discuss these results with your healthcare provider",
|
| 333 |
+
"Your doctor may recommend additional tests",
|
| 334 |
+
"Follow any lifestyle or treatment recommendations"
|
| 335 |
+
]
|
| 336 |
else:
|
| 337 |
+
next_steps = [
|
| 338 |
+
"Maintain your current healthy lifestyle",
|
| 339 |
+
"Continue regular health checkups",
|
| 340 |
+
"Keep these results for your medical records",
|
| 341 |
+
"Discuss with your doctor during your next routine visit"
|
| 342 |
+
]
|
| 343 |
+
|
| 344 |
+
return {
|
| 345 |
+
"overall_status": overall_status,
|
| 346 |
+
"explanation": explanation,
|
| 347 |
+
"key_findings": key_findings[:5],
|
| 348 |
+
"areas_of_concern": areas_of_concern,
|
| 349 |
+
"next_steps": next_steps,
|
| 350 |
+
"summary_stats": {
|
| 351 |
+
"total_tests": total_tests,
|
| 352 |
+
"normal_tests": normal_count,
|
| 353 |
+
"abnormal_tests": abnormal_count,
|
| 354 |
+
"critical_findings": critical_count
|
| 355 |
+
}
|
| 356 |
+
}
|
| 357 |
+
|
| 358 |
+
def extract_and_format(self, text: str, report_id: str = None, patient_id: str = None) -> dict:
|
| 359 |
+
start_time = time.time()
|
| 360 |
+
|
| 361 |
+
regex_results = self.extract_with_regex(text)
|
| 362 |
+
ner_results = self.extract_with_ner(text)
|
| 363 |
+
|
| 364 |
+
test_results = regex_results['test_results']
|
| 365 |
+
entities_list = ner_results['entities']
|
| 366 |
+
|
| 367 |
+
abnormal_results = []
|
| 368 |
+
for test in test_results:
|
| 369 |
+
if test['status'] != 'normal':
|
| 370 |
+
severity = 'critical' if 'critical' in test['status'] else 'moderate'
|
| 371 |
+
abnormal_results.append({
|
| 372 |
+
"test_name": test['test_name'],
|
| 373 |
+
"severity": severity,
|
| 374 |
+
"requires_attention": 'critical' in test['status']
|
| 375 |
+
})
|
| 376 |
+
|
| 377 |
+
normal_params = [t['test_name']
|
| 378 |
+
for t in test_results if t['status'] == 'normal']
|
| 379 |
+
key_abnormalities = [
|
| 380 |
+
f"{t['test_name']}: {t['clinical_significance']}" for t in test_results if t['status'] != 'normal']
|
| 381 |
+
|
| 382 |
+
ai_summary = {
|
| 383 |
+
"overall_assessment": f"Detected {len(abnormal_results)} abnormal result(s). {len(normal_params)} parameters within normal limits.",
|
| 384 |
+
"key_abnormalities": key_abnormalities,
|
| 385 |
+
"normal_parameters": normal_params,
|
| 386 |
+
"recommendations": [
|
| 387 |
"Correlate with clinical symptoms",
|
| 388 |
"Consider follow-up testing if symptoms persist",
|
| 389 |
"Consult with healthcare provider for interpretation"
|
| 390 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 391 |
}
|
| 392 |
+
|
| 393 |
+
clinical_insights = self.analyze_with_clinical_bert(text, test_results)
|
| 394 |
+
|
| 395 |
+
patient_summary = self.generate_patient_summary(
|
| 396 |
+
test_results, abnormal_results)
|
| 397 |
+
|
| 398 |
+
test_panels = []
|
| 399 |
+
cbc_tests = [t for t in test_results if any(x in t['test_name'].lower(
|
| 400 |
+
) for x in ['blood cell', 'hemoglobin', 'hematocrit', 'platelet'])]
|
| 401 |
+
if cbc_tests:
|
| 402 |
+
test_panels.append({
|
| 403 |
+
"panel_name": "Complete Blood Count",
|
| 404 |
+
"tests_included": [t['test_name'] for t in cbc_tests],
|
| 405 |
+
"panel_status": "abnormal" if any(t['status'] != 'normal' for t in cbc_tests) else "normal",
|
| 406 |
+
"abnormal_count": len([t for t in cbc_tests if t['status'] != 'normal']),
|
| 407 |
+
"total_tests": len(cbc_tests)
|
| 408 |
+
})
|
| 409 |
+
|
| 410 |
+
chem_tests = [t for t in test_results if any(x in t['test_name'].lower() for x in [
|
| 411 |
+
'glucose', 'creatinine', 'urea', 'cholesterol'])]
|
| 412 |
+
if chem_tests:
|
| 413 |
+
test_panels.append({
|
| 414 |
+
"panel_name": "General Chemistry",
|
| 415 |
+
"tests_included": [t['test_name'] for t in chem_tests],
|
| 416 |
+
"panel_status": "abnormal" if any(t['status'] != 'normal' for t in chem_tests) else "normal",
|
| 417 |
+
"abnormal_count": len([t for t in chem_tests if t['status'] != 'normal']),
|
| 418 |
+
"total_tests": len(chem_tests)
|
| 419 |
+
})
|
| 420 |
+
|
| 421 |
+
liver_tests = [t for t in test_results if any(x in t['test_name'].lower() for x in [
|
| 422 |
+
'alt', 'ast', 'alp', 'bilirubin', 'albumin'])]
|
| 423 |
+
if liver_tests:
|
| 424 |
+
test_panels.append({
|
| 425 |
+
"panel_name": "Liver Function Panel",
|
| 426 |
+
"tests_included": [t['test_name'] for t in liver_tests],
|
| 427 |
+
"panel_status": "abnormal" if any(t['status'] != 'normal' for t in liver_tests) else "normal",
|
| 428 |
+
"abnormal_count": len([t for t in liver_tests if t['status'] != 'normal']),
|
| 429 |
+
"total_tests": len(liver_tests)
|
| 430 |
+
})
|
| 431 |
+
|
| 432 |
+
thyroid_tests = [t for t in test_results if any(
|
| 433 |
+
x in t['test_name'].lower() for x in ['thyroid', 'tsh', 't4', 't3'])]
|
| 434 |
+
if thyroid_tests:
|
| 435 |
+
test_panels.append({
|
| 436 |
+
"panel_name": "Thyroid Function Panel",
|
| 437 |
+
"tests_included": [t['test_name'] for t in thyroid_tests],
|
| 438 |
+
"panel_status": "abnormal" if any(t['status'] != 'normal' for t in thyroid_tests) else "normal",
|
| 439 |
+
"abnormal_count": len([t for t in thyroid_tests if t['status'] != 'normal']),
|
| 440 |
+
"total_tests": len(thyroid_tests)
|
| 441 |
})
|
| 442 |
+
|
|
|
|
|
|
|
|
|
|
| 443 |
chart_data = []
|
|
|
|
| 444 |
for test in test_results:
|
| 445 |
if test['reference_range']:
|
| 446 |
chart_data.append({
|
|
|
|
| 449 |
"ref_min": test['reference_range']['min'],
|
| 450 |
"ref_max": test['reference_range']['max']
|
| 451 |
})
|
| 452 |
+
|
| 453 |
+
visualization_data = {
|
| 454 |
"charts": [{
|
| 455 |
"chart_type": "bar",
|
| 456 |
"title": "Lab Results vs Reference Range",
|
|
|
|
| 458 |
}],
|
| 459 |
"trend_data": []
|
| 460 |
}
|
| 461 |
+
|
| 462 |
+
ner_stats = {}
|
| 463 |
+
for ent in entities_list:
|
| 464 |
+
label = ent['label']
|
| 465 |
+
ner_stats[label] = ner_stats.get(label, 0) + 1
|
| 466 |
+
|
| 467 |
+
test_category = "hematology"
|
| 468 |
+
sub_category = "complete_blood_count"
|
| 469 |
+
urgency_level = "critical" if len(
|
| 470 |
+
[a for a in abnormal_results if a['severity'] == 'critical']) > 0 else "routine"
|
| 471 |
+
|
| 472 |
+
if any('glucose' in t['test_name'].lower() for t in test_results):
|
| 473 |
+
test_category = "clinical_chemistry"
|
| 474 |
+
sub_category = "metabolic_panel"
|
| 475 |
+
|
| 476 |
+
classification = {
|
| 477 |
+
"test_category": test_category,
|
| 478 |
+
"sub_category": sub_category,
|
| 479 |
+
"urgency_level": urgency_level,
|
| 480 |
+
"confidence": 0.96
|
| 481 |
+
}
|
| 482 |
+
|
| 483 |
+
extraction_stats = {
|
| 484 |
+
"tests_with_values": len(test_results),
|
| 485 |
+
"additional_tests_found": len([e for e in entities_list if e['label'] == 'TEST_NAME']),
|
| 486 |
+
"diseases_detected": len(clinical_insights['diseases_detected']),
|
| 487 |
+
"interpretations_found": len([t for t in test_results if t['status'] != 'normal']),
|
| 488 |
+
"ner_model_stats": ner_stats
|
| 489 |
+
}
|
| 490 |
+
|
| 491 |
+
processing_time_ms = int((time.time() - start_time) * 1000)
|
| 492 |
+
|
| 493 |
+
metadata = {
|
| 494 |
+
"model_version": "radiolo_smart_ner_v2.0.0",
|
| 495 |
+
"processing_date": datetime.utcnow().isoformat() + "Z",
|
| 496 |
+
"tests_extracted": len(test_results),
|
| 497 |
+
"confidence_score": 0.94,
|
| 498 |
+
"nlp_models": {
|
| 499 |
+
"ner": "Custom Lab NER (Smart Filtered)",
|
| 500 |
+
"clinical_bert": "ClinicalDistilBERT",
|
| 501 |
+
"extraction_method": "Hybrid (Regex + Filtered NER)"
|
| 502 |
+
}
|
| 503 |
+
}
|
| 504 |
+
|
| 505 |
+
return {
|
| 506 |
+
"report_id": report_id or f"lab_{int(time.time())}",
|
| 507 |
+
"report_type": "laboratory",
|
| 508 |
+
"processing_time_ms": processing_time_ms,
|
| 509 |
+
"classification": classification,
|
| 510 |
+
"extraction_stats": extraction_stats,
|
| 511 |
+
"entities": entities_list,
|
| 512 |
+
"test_results": test_results,
|
| 513 |
+
"abnormal_results": abnormal_results,
|
| 514 |
+
"ai_summary": ai_summary,
|
| 515 |
+
"clinical_insights": clinical_insights,
|
| 516 |
+
"patient_friendly_summary": patient_summary,
|
| 517 |
+
"test_panels": test_panels,
|
| 518 |
+
"visualization_data": visualization_data,
|
| 519 |
+
"metadata": metadata
|
| 520 |
+
}
|
app/main.py
CHANGED
|
@@ -78,12 +78,13 @@ async def root():
|
|
| 78 |
"compression": "gzip",
|
| 79 |
"ocr_engine": "EasyOCR",
|
| 80 |
"ner_model": "Custom Lab NER",
|
|
|
|
| 81 |
"supported_tests": 16
|
| 82 |
},
|
| 83 |
"endpoints": {
|
| 84 |
"health": "/health",
|
| 85 |
"analyze": "/analyze-lab-secure",
|
| 86 |
-
"test": "/test-analyze"
|
| 87 |
},
|
| 88 |
"supported_formats": ["pdf", "image"],
|
| 89 |
"supported_lab_tests": [
|
|
@@ -107,13 +108,8 @@ async def health_check():
|
|
| 107 |
"supported_tests": 16
|
| 108 |
}
|
| 109 |
|
| 110 |
-
|
| 111 |
@app.post("/test-analyze", tags=["Testing"])
|
| 112 |
async def test_analyze(file: UploadFile = File(...)):
|
| 113 |
-
"""
|
| 114 |
-
Test endpoint without encryption - upload file directly
|
| 115 |
-
⚠️ WARNING: For testing only! No encryption!
|
| 116 |
-
"""
|
| 117 |
start_time = time.time()
|
| 118 |
|
| 119 |
try:
|
|
@@ -123,7 +119,7 @@ async def test_analyze(file: UploadFile = File(...)):
|
|
| 123 |
file_bytes = await file.read()
|
| 124 |
filename = file.filename
|
| 125 |
|
| 126 |
-
|
| 127 |
|
| 128 |
if filename.lower().endswith('.pdf'):
|
| 129 |
file_type = "pdf"
|
|
@@ -140,9 +136,9 @@ async def test_analyze(file: UploadFile = File(...)):
|
|
| 140 |
if not extracted_text or len(extracted_text.strip()) < 10:
|
| 141 |
raise HTTPException(status_code=400, detail="Could not extract sufficient text from file")
|
| 142 |
|
| 143 |
-
|
| 144 |
|
| 145 |
-
|
| 146 |
lab_analysis = lab_processor.extract_and_format(
|
| 147 |
extracted_text,
|
| 148 |
report_id=f"test_{int(time.time())}",
|
|
@@ -151,8 +147,8 @@ async def test_analyze(file: UploadFile = File(...)):
|
|
| 151 |
|
| 152 |
processing_time = time.time() - start_time
|
| 153 |
|
| 154 |
-
|
| 155 |
-
|
| 156 |
|
| 157 |
response_data = {
|
| 158 |
"status": "success",
|
|
@@ -177,7 +173,6 @@ async def test_analyze(file: UploadFile = File(...)):
|
|
| 177 |
traceback.print_exc()
|
| 178 |
raise HTTPException(status_code=500, detail=f"Internal error: {str(e)}")
|
| 179 |
|
| 180 |
-
|
| 181 |
@app.post("/analyze-lab-secure", tags=["Lab Analysis"])
|
| 182 |
async def analyze_lab_secure(request: EncryptedRequest):
|
| 183 |
start_time = time.time()
|
|
|
|
| 78 |
"compression": "gzip",
|
| 79 |
"ocr_engine": "EasyOCR",
|
| 80 |
"ner_model": "Custom Lab NER",
|
| 81 |
+
"patient_friendly_summary": "AI-Generated Explanations",
|
| 82 |
"supported_tests": 16
|
| 83 |
},
|
| 84 |
"endpoints": {
|
| 85 |
"health": "/health",
|
| 86 |
"analyze": "/analyze-lab-secure",
|
| 87 |
+
"test": "/test-analyze"
|
| 88 |
},
|
| 89 |
"supported_formats": ["pdf", "image"],
|
| 90 |
"supported_lab_tests": [
|
|
|
|
| 108 |
"supported_tests": 16
|
| 109 |
}
|
| 110 |
|
|
|
|
| 111 |
@app.post("/test-analyze", tags=["Testing"])
|
| 112 |
async def test_analyze(file: UploadFile = File(...)):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
start_time = time.time()
|
| 114 |
|
| 115 |
try:
|
|
|
|
| 119 |
file_bytes = await file.read()
|
| 120 |
filename = file.filename
|
| 121 |
|
| 122 |
+
print(f"\n📄 Processing test file: {filename} ({len(file_bytes)} bytes)")
|
| 123 |
|
| 124 |
if filename.lower().endswith('.pdf'):
|
| 125 |
file_type = "pdf"
|
|
|
|
| 136 |
if not extracted_text or len(extracted_text.strip()) < 10:
|
| 137 |
raise HTTPException(status_code=400, detail="Could not extract sufficient text from file")
|
| 138 |
|
| 139 |
+
print(f"✓ Extracted {len(extracted_text)} characters (OCR: {ocr_used})")
|
| 140 |
|
| 141 |
+
print("🧠 Processing with NER + ClinicalDistilBERT...")
|
| 142 |
lab_analysis = lab_processor.extract_and_format(
|
| 143 |
extracted_text,
|
| 144 |
report_id=f"test_{int(time.time())}",
|
|
|
|
| 147 |
|
| 148 |
processing_time = time.time() - start_time
|
| 149 |
|
| 150 |
+
print(f"✅ Processing complete in {processing_time:.2f}s")
|
| 151 |
+
print(f" Tests extracted: {lab_analysis.get('metadata', {}).get('tests_extracted', 0)}\n")
|
| 152 |
|
| 153 |
response_data = {
|
| 154 |
"status": "success",
|
|
|
|
| 173 |
traceback.print_exc()
|
| 174 |
raise HTTPException(status_code=500, detail=f"Internal error: {str(e)}")
|
| 175 |
|
|
|
|
| 176 |
@app.post("/analyze-lab-secure", tags=["Lab Analysis"])
|
| 177 |
async def analyze_lab_secure(request: EncryptedRequest):
|
| 178 |
start_time = time.time()
|
decrypt_response.py
CHANGED
|
@@ -9,7 +9,7 @@ import os
|
|
| 9 |
from nacl.secret import SecretBox
|
| 10 |
|
| 11 |
# Your hex key from .env
|
| 12 |
-
SECRET_KEY_HEX = "
|
| 13 |
|
| 14 |
# Convert hex to bytes (32 bytes)
|
| 15 |
SECRET_KEY = bytes.fromhex(SECRET_KEY_HEX)
|
|
|
|
| 9 |
from nacl.secret import SecretBox
|
| 10 |
|
| 11 |
# Your hex key from .env
|
| 12 |
+
SECRET_KEY_HEX = "7633eeaf69156124e49025ce8f6a3adbdbf6be87f1e58529397a67168a65bd66"
|
| 13 |
|
| 14 |
# Convert hex to bytes (32 bytes)
|
| 15 |
SECRET_KEY = bytes.fromhex(SECRET_KEY_HEX)
|
generate_postman_request.py
CHANGED
|
@@ -10,7 +10,7 @@ from nacl.secret import SecretBox
|
|
| 10 |
from nacl.utils import random
|
| 11 |
|
| 12 |
# Your 64-character hex key from .env
|
| 13 |
-
SECRET_KEY_HEX = "
|
| 14 |
|
| 15 |
# Convert hex to bytes (32 bytes)
|
| 16 |
SECRET_KEY = bytes.fromhex(SECRET_KEY_HEX)
|
|
|
|
| 10 |
from nacl.utils import random
|
| 11 |
|
| 12 |
# Your 64-character hex key from .env
|
| 13 |
+
SECRET_KEY_HEX = "7633eeaf69156124e49025ce8f6a3adbdbf6be87f1e58529397a67168a65bd66"
|
| 14 |
|
| 15 |
# Convert hex to bytes (32 bytes)
|
| 16 |
SECRET_KEY = bytes.fromhex(SECRET_KEY_HEX)
|