MakPr016 commited on
Commit
6c66675
·
1 Parent(s): 648d9f3

Analysis includes NPL summary

Browse files
app/lab_processor.py CHANGED
@@ -1,487 +1,446 @@
1
- """
2
- Lab Report Processing with Smart NER + Regex + ClinicalDistilBERT
3
- Based on your proven local implementation
4
- """
5
-
6
  import spacy
7
  import re
8
- import time
9
  import torch
10
  from datetime import datetime
11
- from typing import Dict, List, Set
12
- from collections import defaultdict
13
- from transformers import AutoTokenizer, AutoModel
14
 
15
- REFERENCE_RANGES = {
16
- "White Blood Cell Count": {"min": 4.0, "max": 11.0, "unit": "x10^9/L"},
17
- "Red Blood Cell Count": {"min": 4.2, "max": 5.9, "unit": "x10^12/L"},
18
- "Hemoglobin": {"min": 13.5, "max": 17.5, "unit": "g/dL"},
19
- "Hematocrit": {"min": 38.3, "max": 48.6, "unit": "%"},
20
- "Platelet Count": {"min": 150, "max": 450, "unit": "x10^9/L"},
21
- "Glucose": {"min": 70, "max": 99, "unit": "mg/dL"},
22
- "Creatinine": {"min": 0.6, "max": 1.2, "unit": "mg/dL"},
23
- "Urea": {"min": 15, "max": 50, "unit": "mg/dL"},
24
- "Cholesterol": {"min": 0, "max": 200, "unit": "mg/dL"},
25
- "Alanine Aminotransferase": {"min": 7, "max": 56, "unit": "U/L"},
26
- "Aspartate Aminotransferase": {"min": 8, "max": 48, "unit": "U/L"},
27
- "Alkaline Phosphatase": {"min": 40, "max": 129, "unit": "U/L"},
28
- "Bilirubin": {"min": 0.3, "max": 1.9, "unit": "mg/dL"},
29
- "Albumin": {"min": 3.5, "max": 5.5, "unit": "g/dL"},
30
- "Thyroid Stimulating Hormone": {"min": 0.5, "max": 4.5, "unit": "mIU/L"},
31
- "Free Thyroxine": {"min": 0.9, "max": 1.7, "unit": "ng/dL"},
32
- }
33
 
34
  class RadioloLabProcessor:
35
-
36
- def __init__(self, ner_model_path: str):
37
- """Initialize smart lab processor with NER, stopwords, and ClinicalDistilBERT"""
38
-
39
- # Load custom NER model
40
- self.nlp = spacy.load(ner_model_path)
41
- print(f"✓ Lab NER model loaded: {ner_model_path}")
42
-
43
- # Load ClinicalDistilBERT
44
- print("Loading ClinicalDistilBERT...")
45
- self.clinical_tokenizer = AutoTokenizer.from_pretrained("nlpie/clinical-distilbert")
46
- self.clinical_model = AutoModel.from_pretrained("nlpie/clinical-distilbert")
47
-
48
- # Set device
49
- self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
50
- self.clinical_model = self.clinical_model.to(self.device)
51
- self.clinical_model.eval()
52
- print(f" ClinicalDistilBERT loaded on {self.device}")
53
-
54
- # Strict stopwords to filter false positives
55
- self.stopwords = {
56
- # Document structure
57
- 'hemolab', 'central', 'medicity', 'wellbeing', 'healthland',
58
- 'laboratory', 'health', 'ave', 'page',
59
-
60
- # Metadata fields
61
- 'age', 'gender', 'email', 'sample', 'results', 'verified by',
62
- 'processing', 'details',
63
-
64
- # Table headers
65
- 'test', 'result', 'unit', 'normal', 'range', 'status',
66
- 'normal range', 'result status',
67
-
68
- # Section headers
69
- 'hematology', 'biochemistry', 'liver function', 'thyroid function',
70
- 'kidney function', 'lipid profile',
71
-
72
- # Names (common in reports)
73
- 'john', 'doe', 'johnatan', 'emily', 'johnson', 'dr',
74
-
75
- # Standalone numbers
76
- '30', '123', '12345',
77
- }
78
-
79
- # Valid lab tests for NER filtering
80
- self.valid_tests = {
81
- 'white blood cell count', 'wbc', 'red blood cell count', 'rbc',
82
- 'hemoglobin', 'hgb', 'hb', 'hematocrit', 'hct',
83
- 'platelet count', 'platelets', 'plt',
84
- 'mcv', 'mch', 'mchc',
85
- 'glucose', 'glu', 'creatinine', 'urea', 'bun',
86
- 'cholesterol', 'ldl', 'hdl', 'triglycerides',
87
- 'alt', 'ast', 'alp', 'bilirubin', 'albumin',
88
- 'tsh', 'ft4', 'free thyroxine', 'hba1c', 'a1c',
89
- 'sodium', 'potassium', 'calcium', 'chloride',
90
- 'aminotransferase', 'phosphatase',
91
  }
92
-
93
- # Targeted regex for structured lab values
94
- self.lab_value_pattern = re.compile(
95
- r'(White Blood Cell Count|Red Blood Cell Count|Hemoglobin|Hematocrit|'
96
- r'Platelet Count|Glucose|Creatinine|Urea|Cholesterol|'
97
- r'Alanine Aminotransferase|Aspartate Aminotransferase|'
98
- r'Alkaline Phosphatase|Bilirubin|Albumin|'
99
- r'Thyroid Stimulating Hormone|Free Thyroxine|'
100
- r'WBC|RBC|HGB|HCT|PLT|ALT|AST|ALP|TSH|FT4|HbA1c)'
101
- r'\s*[:\n]\s*'
102
- r'(\d+\.?\d*)'
103
- r'\s*'
104
- r'([a-zA-Z/%^0-9]+)?',
105
- re.IGNORECASE
106
- )
107
-
108
- # Status pattern for interpretations
109
- self.status_pattern = re.compile(r'\b(Elevated|High|Low|Normal|Critical|Abnormal)\b')
110
-
111
- def _normalize_test_name(self, name: str) -> str:
112
- """Normalize test abbreviations to full names"""
113
- name_lower = name.lower().strip()
114
-
115
- mapping = {
116
- 'wbc': 'White Blood Cell Count',
117
- 'rbc': 'Red Blood Cell Count',
118
- 'hgb': 'Hemoglobin',
119
- 'hb': 'Hemoglobin',
120
- 'hct': 'Hematocrit',
121
- 'plt': 'Platelet Count',
122
- 'platelets': 'Platelet Count',
123
- 'glu': 'Glucose',
124
- 'alt': 'Alanine Aminotransferase',
125
- 'ast': 'Aspartate Aminotransferase',
126
- 'alp': 'Alkaline Phosphatase',
127
- 'tsh': 'Thyroid Stimulating Hormone',
128
- 'ft4': 'Free Thyroxine',
129
  }
130
-
131
- return mapping.get(name_lower, name)
132
-
133
- def _calculate_status(self, test_name: str, value: float) -> Dict:
134
- """Calculate test status and deviation from reference range"""
135
- ref_range = REFERENCE_RANGES.get(test_name)
136
-
137
- if not ref_range:
138
- return {
139
- "status": "unknown",
140
- "deviation_percentage": 0.0,
141
- "clinical_significance": "Reference range not available"
142
- }
143
-
144
- min_val, max_val = ref_range['min'], ref_range['max']
145
-
146
- if value < min_val:
147
- deviation = ((min_val - value) / min_val) * 100
148
- status = "critical_low" if deviation > 50 else "low"
149
- significance = f"Below normal range (↓{deviation:.1f}%)"
150
- elif value > max_val:
151
- deviation = ((value - max_val) / max_val) * 100
152
- status = "critical_high" if deviation > 50 else "high"
153
- significance = f"Above normal range (↑{deviation:.1f}%)"
154
- else:
155
- deviation = 0.0
156
- status = "normal"
157
- significance = "Within normal limits"
158
-
159
- return {
160
- "status": status,
161
- "deviation_percentage": round(deviation, 2),
162
- "clinical_significance": significance
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
  }
164
-
165
- def _get_clinical_embeddings(self, text: str) -> torch.Tensor:
166
- """Get clinical embeddings using ClinicalDistilBERT"""
167
- inputs = self.clinical_tokenizer(
168
- text,
169
- return_tensors="pt",
170
- truncation=True,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
  max_length=512,
172
  padding=True,
173
  return_token_type_ids=False
174
- ).to(self.device)
175
-
176
  with torch.no_grad():
177
- outputs = self.clinical_model(**inputs)
178
- embeddings = outputs.last_hidden_state[:, 0, :] # [CLS] token
179
-
180
- return embeddings
181
-
182
- def _generate_clinical_insights(self, text: str, abnormal_results: List[Dict],
183
- diseases: Set[str], interpretations: Set[str]) -> Dict:
184
- """Generate clinical insights using ClinicalDistilBERT"""
185
- # Get embeddings
186
- embeddings = self._get_clinical_embeddings(text[:512])
187
-
188
- insights = {
189
- "embedding_dimension": embeddings.shape[1],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190
  "clinical_context_captured": True,
191
  "embeddings_generated": True,
192
- "diseases_detected": list(diseases),
193
- "status_flags": list(interpretations),
194
- "abnormality_patterns": [],
195
- "clinical_relevance_score": 0.0
196
  }
197
-
198
- # Analyze patterns
199
- if len(abnormal_results) > 0:
200
- critical_count = sum(1 for r in abnormal_results if r.get('severity') == 'critical')
201
- moderate_count = len(abnormal_results) - critical_count
202
-
203
- relevance_score = min(100.0, (critical_count * 30.0) + (moderate_count * 10.0))
204
- insights["clinical_relevance_score"] = round(relevance_score, 2)
205
-
206
- insights["abnormality_patterns"].append(
207
- f"Detected {len(abnormal_results)} abnormal parameter(s)"
208
- )
209
-
210
- if critical_count > 0:
211
- insights["abnormality_patterns"].append(
212
- f"{critical_count} critical finding(s) require immediate attention"
213
- )
214
  else:
215
- insights["clinical_relevance_score"] = 0.0
216
- insights["abnormality_patterns"].append("All parameters within normal clinical ranges")
217
-
218
- return insights
219
-
220
- def _smart_ner_extraction(self, doc, extracted_test_names: Set[str]) -> tuple:
221
- """Smart NER extraction with strict filtering"""
222
- additional_tests = []
223
- diseases = set()
224
- interpretations = set()
225
- ner_stats = defaultdict(int)
226
-
227
- for ent in doc.ents:
228
- ner_stats[ent.label_] += 1
229
-
230
- if ent.label_ == 'TEST_NAME':
231
- ent_lower = ent.text.lower()
232
-
233
- # Skip if in stopwords
234
- if ent_lower in self.stopwords:
235
- continue
236
-
237
- # Skip if looks like date
238
- if re.match(r'\d+/\d+/\d+', ent.text):
239
- continue
240
-
241
- # Skip if just numbers
242
- if re.match(r'^\d+$', ent.text):
243
- continue
244
-
245
- # Skip if already extracted by regex
246
- if ent_lower in extracted_test_names:
247
- continue
248
-
249
- # Only add if contains valid medical keywords
250
- if any(keyword in ent_lower for keyword in self.valid_tests):
251
- additional_tests.append({
252
- 'testname': ent.text,
253
- 'value': None,
254
- 'unit': None,
255
- 'source': 'ner'
256
- })
257
-
258
- elif ent.label_ == 'DISEASE':
259
- if ent.text.lower() not in self.stopwords:
260
- diseases.add(ent.text)
261
-
262
- elif ent.label_ == 'INTERPRETATION':
263
- interpretations.add(ent.text)
264
-
265
- return additional_tests, diseases, interpretations, ner_stats
266
-
267
- def extract_and_format(self, text: str, report_id: str = None, patient_id: str = None) -> Dict:
268
- """Smart extraction using hybrid approach"""
269
- start_time = time.time()
270
-
271
- raw_tests = []
272
- seen_tests = set()
273
-
274
- # Step 1: Regex extraction (most reliable for structured data)
275
- for match in self.lab_value_pattern.finditer(text):
276
- test_name = self._normalize_test_name(match.group(1).strip())
277
- try:
278
- value = float(match.group(2))
279
- unit = match.group(3) if match.group(3) else None
280
-
281
- test_key = (test_name.lower(), value)
282
- if test_key not in seen_tests:
283
- raw_tests.append({
284
- 'testname': test_name,
285
- 'value': value,
286
- 'unit': unit,
287
- 'source': 'regex'
288
- })
289
- seen_tests.add(test_key)
290
- except:
291
- continue
292
-
293
- extracted_test_names = {t['testname'].lower() for t in raw_tests}
294
-
295
- # Step 2: Smart NER extraction with filtering
296
- doc = self.nlp(text)
297
- additional_tests, diseases, interpretations, ner_stats = self._smart_ner_extraction(
298
- doc, extracted_test_names
299
- )
300
-
301
- # Extract status flags from text
302
- for match in self.status_pattern.finditer(text):
303
- context = text[max(0, match.start()-10):match.end()+10]
304
- if 'Range' not in context: # Avoid "Normal Range"
305
- interpretations.add(match.group(1))
306
-
307
- # Collect entities for output
308
- entities_for_output = []
309
- for ent in doc.ents:
310
- entities_for_output.append({
311
- "text": ent.text,
312
- "label": ent.label_,
313
- "start_char": ent.start_char,
314
- "end_char": ent.end_char,
315
- "confidence": 0.92
316
- })
317
-
318
- # Step 3: Build test results with reference ranges
319
- test_results = []
320
- abnormal_results = []
321
-
322
- for test in raw_tests:
323
- test_name = test['testname']
324
- value = test['value']
325
- unit = test['unit']
326
-
327
- ref_range = REFERENCE_RANGES.get(test_name, {})
328
- status_info = self._calculate_status(test_name, value)
329
-
330
- test_result = {
331
- "test_name": test_name,
332
- "value": value,
333
- "unit": unit or ref_range.get('unit', ''),
334
- "reference_range": {
335
- "min": ref_range.get('min'),
336
- "max": ref_range.get('max'),
337
- "unit": ref_range.get('unit', unit or '')
338
- } if ref_range else None,
339
- "status": status_info['status'],
340
- "deviation_percentage": status_info['deviation_percentage'],
341
- "clinical_significance": status_info['clinical_significance'],
342
- "trend": None,
343
- "source": test['source']
344
- }
345
-
346
- test_results.append(test_result)
347
-
348
- if status_info['status'] in ['low', 'high', 'critical_low', 'critical_high']:
349
- severity = "critical" if 'critical' in status_info['status'] else "moderate"
350
- abnormal_results.append({
351
- "test_name": test_name,
352
- "severity": severity,
353
- "requires_attention": True
354
- })
355
-
356
- # Step 4: Generate summaries and insights
357
- ai_summary = self._generate_summary(test_results, abnormal_results)
358
- test_panels = self._group_into_panels(test_results)
359
- visualization_data = self._generate_visualization_data(test_results)
360
-
361
- # Step 5: Generate clinical insights with ClinicalDistilBERT
362
- clinical_insights = self._generate_clinical_insights(
363
- text, abnormal_results, diseases, interpretations
364
- )
365
-
366
- processing_time = int((time.time() - start_time) * 1000)
367
-
368
- return {
369
- "report_id": report_id or f"rep_{int(time.time())}",
370
- "report_type": "laboratory",
371
- "processing_time_ms": processing_time,
372
-
373
- "classification": {
374
- "test_category": self._determine_category(test_results),
375
- "sub_category": "complete_blood_count",
376
- "urgency_level": "critical" if any(r['severity'] == 'critical' for r in abnormal_results) else "abnormal" if abnormal_results else "routine",
377
- "confidence": 0.96
378
  },
379
-
380
- "extraction_stats": {
381
- "tests_with_values": len(test_results),
382
- "additional_tests_found": len(additional_tests),
383
- "diseases_detected": len(diseases),
384
- "interpretations_found": len(interpretations),
385
- "ner_model_stats": dict(ner_stats)
386
  },
387
-
388
- "entities": entities_for_output[:20],
389
- "test_results": test_results,
390
- "abnormal_results": abnormal_results,
391
- "ai_summary": ai_summary,
392
- "clinical_insights": clinical_insights,
393
- "test_panels": test_panels,
394
- "visualization_data": visualization_data,
395
-
396
- "metadata": {
397
- "model_version": "radiolo_smart_ner_v2.0.0",
398
- "processing_date": datetime.utcnow().isoformat() + "Z",
399
- "tests_extracted": len(test_results),
400
- "confidence_score": 0.94,
401
- "nlp_models": {
402
- "ner": "Custom Lab NER (Smart Filtered)",
403
- "clinical_bert": "ClinicalDistilBERT",
404
- "extraction_method": "Hybrid (Regex + Filtered NER)"
405
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
406
  }
407
  }
408
-
409
- def _determine_category(self, test_results: List[Dict]) -> str:
410
- test_names = {t['test_name'].lower() for t in test_results}
411
-
412
- if any('blood cell' in name or name in ['hemoglobin', 'hematocrit', 'platelet'] for name in test_names):
413
- return "hematology"
414
- elif any(name in ['alanine aminotransferase', 'aspartate aminotransferase', 'alkaline phosphatase', 'bilirubin', 'albumin'] for name in test_names):
415
- return "liver_function"
416
- elif any('thyroid' in name or name in ['thyroid stimulating hormone', 'free thyroxine'] for name in test_names):
417
- return "thyroid_function"
418
- else:
419
- return "general_chemistry"
420
-
421
- def _generate_summary(self, test_results: List[Dict], abnormal_results: List[Dict]) -> Dict:
422
- normal_tests = [t['test_name'] for t in test_results if t['status'] == 'normal']
423
- abnormal_tests = [a['test_name'] for a in abnormal_results]
424
-
425
- if not abnormal_tests:
426
- overall = "All test results are within normal limits."
427
- recommendations = ["No immediate action required", "Continue regular health monitoring"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
428
  else:
429
- overall = f"Detected {len(abnormal_tests)} abnormal result(s). {len(normal_tests)} parameters within normal limits."
430
- recommendations = [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
431
  "Correlate with clinical symptoms",
432
  "Consider follow-up testing if symptoms persist",
433
  "Consult with healthcare provider for interpretation"
434
  ]
435
-
436
- key_abnormalities = []
437
- for result in abnormal_results:
438
- test_detail = next((t for t in test_results if t['test_name'] == result['test_name']), None)
439
- if test_detail:
440
- key_abnormalities.append(
441
- f"{result['test_name']}: {test_detail['clinical_significance']}"
442
- )
443
-
444
- return {
445
- "overall_assessment": overall,
446
- "key_abnormalities": key_abnormalities,
447
- "normal_parameters": normal_tests,
448
- "recommendations": recommendations
449
  }
450
-
451
- def _group_into_panels(self, test_results: List[Dict]) -> List[Dict]:
452
- panels = defaultdict(list)
453
-
454
- cbc_tests = {'White Blood Cell Count', 'Red Blood Cell Count', 'Hemoglobin', 'Hematocrit', 'Platelet Count'}
455
- liver_tests = {'Alanine Aminotransferase', 'Aspartate Aminotransferase', 'Alkaline Phosphatase', 'Bilirubin', 'Albumin'}
456
- thyroid_tests = {'Thyroid Stimulating Hormone', 'Free Thyroxine'}
457
-
458
- for test in test_results:
459
- name = test['test_name']
460
- if name in cbc_tests:
461
- panels['Complete Blood Count'].append(test)
462
- elif name in liver_tests:
463
- panels['Liver Function Panel'].append(test)
464
- elif name in thyroid_tests:
465
- panels['Thyroid Function Panel'].append(test)
466
- else:
467
- panels['General Chemistry'].append(test)
468
-
469
- panel_list = []
470
- for panel_name, tests in panels.items():
471
- abnormal_count = sum(1 for t in tests if t['status'] != 'normal')
472
- panel_list.append({
473
- "panel_name": panel_name,
474
- "tests_included": [t['test_name'] for t in tests],
475
- "panel_status": "abnormal" if abnormal_count > 0 else "normal",
476
- "abnormal_count": abnormal_count,
477
- "total_tests": len(tests)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
478
  })
479
-
480
- return panel_list
481
-
482
- def _generate_visualization_data(self, test_results: List[Dict]) -> Dict:
483
  chart_data = []
484
-
485
  for test in test_results:
486
  if test['reference_range']:
487
  chart_data.append({
@@ -490,8 +449,8 @@ class RadioloLabProcessor:
490
  "ref_min": test['reference_range']['min'],
491
  "ref_max": test['reference_range']['max']
492
  })
493
-
494
- return {
495
  "charts": [{
496
  "chart_type": "bar",
497
  "title": "Lab Results vs Reference Range",
@@ -499,3 +458,63 @@ class RadioloLabProcessor:
499
  }],
500
  "trend_data": []
501
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import spacy
2
  import re
3
+ from transformers import AutoTokenizer, AutoModel
4
  import torch
5
  from datetime import datetime
6
+ import time
 
 
7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  class RadioloLabProcessor:
10
+ def __init__(self, model_path: str):
11
+ self.nlp = spacy.load(model_path)
12
+ self.clinical_bert_tokenizer = AutoTokenizer.from_pretrained(
13
+ "nlpie/clinical-distilbert")
14
+ self.clinical_bert_model = AutoModel.from_pretrained(
15
+ "nlpie/clinical-distilbert")
16
+
17
+ self.lab_tests = {
18
+ "White Blood Cell Count": {"unit": "x10^9/L", "min": 4.0, "max": 11.0},
19
+ "Red Blood Cell Count": {"unit": "x10^12/L", "min": 4.2, "max": 5.9},
20
+ "Hemoglobin": {"unit": "g/dL", "min": 13.5, "max": 17.5},
21
+ "Hematocrit": {"unit": "%", "min": 38.3, "max": 48.6},
22
+ "Platelet Count": {"unit": "x10^9/L", "min": 150, "max": 450},
23
+ "Glucose": {"unit": "mg/dL", "min": 70, "max": 99},
24
+ "Creatinine": {"unit": "mg/dL", "min": 0.6, "max": 1.2},
25
+ "Urea": {"unit": "mg/dL", "min": 15, "max": 50},
26
+ "Cholesterol": {"unit": "mg/dL", "min": 0, "max": 200},
27
+ "ALT": {"unit": "U/L", "min": 7, "max": 56},
28
+ "AST": {"unit": "U/L", "min": 10, "max": 40},
29
+ "ALP": {"unit": "U/L", "min": 44, "max": 147},
30
+ "Bilirubin": {"unit": "mg/dL", "min": 0.3, "max": 1.9},
31
+ "Albumin": {"unit": "g/dL", "min": 3.5, "max": 5.5},
32
+ "Thyroid Stimulating Hormone": {"unit": "mIU/L", "min": 0.5, "max": 4.5},
33
+ "Free T4": {"unit": "ng/dL", "min": 0.8, "max": 1.8}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  }
35
+
36
+ def extract_with_regex(self, text: str) -> dict:
37
+ test_results = []
38
+
39
+ patterns = {
40
+ "White Blood Cell Count": r"White Blood Cell Count[:\s]+(\d+\.?\d*)\s*(x10\^9/L)",
41
+ "Red Blood Cell Count": r"Red Blood Cell Count[:\s]+(\d+\.?\d*)\s*(x10\^12/L)",
42
+ "Hemoglobin": r"Hemoglobin[:\s]+(\d+\.?\d*)\s*(g/dL)",
43
+ "Hematocrit": r"Hematocrit[:\s]+(\d+\.?\d*)\s*(%)",
44
+ "Platelet Count": r"Platelet Count[:\s]+(\d+\.?\d*)\s*(x10\^9/L)",
45
+ "Glucose": r"Glucose[:\s]+(\d+\.?\d*)\s*(mg/dL)",
46
+ "Creatinine": r"Creatinine[:\s]+(\d+\.?\d*)\s*(mg/dL)",
47
+ "Urea": r"Urea[:\s]+(\d+\.?\d*)\s*(mg/dL)",
48
+ "Cholesterol": r"Cholesterol[:\s]+(\d+\.?\d*)\s*(mg/dL)",
49
+ "ALT": r"ALT[:\s]+(\d+\.?\d*)\s*(U/L)",
50
+ "AST": r"AST[:\s]+(\d+\.?\d*)\s*(U/L)",
51
+ "ALP": r"ALP[:\s]+(\d+\.?\d*)\s*(U/L)",
52
+ "Bilirubin": r"Bilirubin[:\s]+(\d+\.?\d*)\s*(mg/dL)",
53
+ "Albumin": r"Albumin[:\s]+(\d+\.?\d*)\s*(g/dL)",
54
+ "Thyroid Stimulating Hormone": r"Thyroid Stimulating Hormone[:\s]+(\d+\.?\d*)\s*(mIU/L)",
55
+ "Free T4": r"Free T4[:\s]+(\d+\.?\d*)\s*(ng/dL)"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  }
57
+
58
+ for test_name, pattern in patterns.items():
59
+ match = re.search(pattern, text, re.IGNORECASE)
60
+ if match:
61
+ value = float(match.group(1))
62
+ unit = match.group(2)
63
+
64
+ if test_name in self.lab_tests:
65
+ ref_range = self.lab_tests[test_name]
66
+ status = "normal"
67
+ deviation = 0.0
68
+
69
+ if value < ref_range["min"]:
70
+ deviation = (
71
+ (ref_range["min"] - value) / ref_range["min"]) * 100
72
+ status = "critical_low" if deviation > 20 else "low"
73
+ elif value > ref_range["max"]:
74
+ deviation = (
75
+ (value - ref_range["max"]) / ref_range["max"]) * 100
76
+ status = "critical_high" if deviation > 20 else "high"
77
+
78
+ clinical_sig = "Within normal limits"
79
+ if status != "normal":
80
+ direction = "↑" if "high" in status else "↓"
81
+ clinical_sig = f"{'Above' if 'high' in status else 'Below'} normal range ({direction}{deviation:.1f}%)"
82
+
83
+ test_results.append({
84
+ "test_name": test_name,
85
+ "value": value,
86
+ "unit": unit,
87
+ "reference_range": {
88
+ "min": ref_range["min"],
89
+ "max": ref_range["max"],
90
+ "unit": ref_range["unit"]
91
+ },
92
+ "status": status,
93
+ "deviation_percentage": deviation,
94
+ "clinical_significance": clinical_sig,
95
+ "trend": None,
96
+ "source": "regex"
97
+ })
98
+
99
+ return {"test_results": test_results}
100
+
101
+ def extract_with_ner(self, text: str) -> dict:
102
+ doc = self.nlp(text)
103
+
104
+ invalid_test_names = {
105
+ 'hemolab', 'central', 'health', 'laboratory', 'medicity', 'wellbeing',
106
+ 'healthland', 'age', 'gender', 'email', 'male', 'sample', 'results',
107
+ 'verified by', 'dr', 'emily', 'johnson', 'normal', 'elevated', 'johnatan',
108
+ 'doe', 'page', 'blood test', 'hematology', 'processing details'
109
  }
110
+
111
+ entities = []
112
+ for ent in doc.ents:
113
+ if ent.label_ == "TEST_NAME":
114
+ if ent.text.lower() not in invalid_test_names and len(ent.text) > 2:
115
+ entities.append({
116
+ "text": ent.text,
117
+ "label": ent.label_,
118
+ "start_char": ent.start_char,
119
+ "end_char": ent.end_char,
120
+ "confidence": 0.92
121
+ })
122
+ elif ent.label_ in ["TEST_VALUE", "TEST_UNIT", "MedicalCondition"]:
123
+ entities.append({
124
+ "text": ent.text,
125
+ "label": ent.label_,
126
+ "start_char": ent.start_char,
127
+ "end_char": ent.end_char,
128
+ "confidence": 0.92
129
+ })
130
+
131
+ return {"entities": entities}
132
+
133
+ def get_clinical_bert_embeddings(self, text: str):
134
+ inputs = self.clinical_bert_tokenizer(
135
+ text,
136
+ return_tensors="pt",
137
+ truncation=True,
138
  max_length=512,
139
  padding=True,
140
  return_token_type_ids=False
141
+ )
142
+
143
  with torch.no_grad():
144
+ outputs = self.clinical_bert_model(**inputs)
145
+
146
+ embeddings = outputs.last_hidden_state.mean(dim=1).squeeze().numpy()
147
+
148
+ return embeddings.tolist()
149
+
150
+ def analyze_with_clinical_bert(self, text: str, test_results: list):
151
+ embeddings = self.get_clinical_bert_embeddings(text)
152
+
153
+ diseases_detected = []
154
+ status_flags = []
155
+
156
+ abnormal_tests = [t for t in test_results if t['status'] != 'normal']
157
+
158
+ if any('glucose' in t['test_name'].lower() and 'high' in t['status'] for t in abnormal_tests):
159
+ diseases_detected.append("Potential Diabetes")
160
+
161
+ if any('cholesterol' in t['test_name'].lower() and 'high' in t['status'] for t in abnormal_tests):
162
+ diseases_detected.append("Dyslipidemia")
163
+
164
+ for test in test_results:
165
+ if test['status'] != 'normal' and test['status'] not in [s.lower() for s in status_flags]:
166
+ status_flags.append(test['status'].replace('_', ' ').title())
167
+
168
+ if not status_flags:
169
+ status_flags = ["Normal"]
170
+
171
+ abnormality_patterns = []
172
+ critical_count = len(
173
+ [t for t in test_results if 'critical' in t['status']])
174
+ abnormal_count = len(abnormal_tests)
175
+
176
+ if abnormal_count > 0:
177
+ abnormality_patterns.append(
178
+ f"Detected {abnormal_count} abnormal parameter(s)")
179
+ if critical_count > 0:
180
+ abnormality_patterns.append(
181
+ f"{critical_count} critical finding(s) require immediate attention")
182
+
183
+ clinical_relevance = min(
184
+ 100, (abnormal_count / len(test_results)) * 100) if test_results else 0
185
+
186
+ return {
187
+ "embedding_dimension": len(embeddings),
188
  "clinical_context_captured": True,
189
  "embeddings_generated": True,
190
+ "diseases_detected": diseases_detected,
191
+ "status_flags": status_flags,
192
+ "abnormality_patterns": abnormality_patterns,
193
+ "clinical_relevance_score": round(clinical_relevance, 1)
194
  }
195
+
196
+ def generate_patient_summary(self, test_results: list, abnormal_results: list) -> dict:
197
+ normal_count = len(
198
+ [t for t in test_results if t['status'] == 'normal'])
199
+ total_tests = len(test_results)
200
+ abnormal_count = len(abnormal_results)
201
+
202
+ critical_count = len(
203
+ [a for a in abnormal_results if a['severity'] == 'critical'])
204
+
205
+ if critical_count > 0:
206
+ overall_status = "⚠️ URGENT - IMMEDIATE ATTENTION NEEDED"
207
+ explanation = f"Your lab results show {critical_count} critical finding(s) that require immediate medical attention. Please consult your doctor as soon as possible."
208
+ elif abnormal_count > 0:
209
+ overall_status = "⚠️ ABNORMALITIES DETECTED"
210
+ explanation = f"Your lab results show {abnormal_count} test(s) outside normal range. While not immediately critical, these findings should be discussed with your healthcare provider."
 
211
  else:
212
+ overall_status = "✅ ALL TESTS NORMAL"
213
+ explanation = f"Great news! All {total_tests} lab tests are within normal ranges. Your results indicate good health in the tested parameters."
214
+
215
+ key_findings = []
216
+ areas_of_concern = []
217
+
218
+ test_explanations = {
219
+ "White Blood Cell Count": {
220
+ "normal": "Your immune system is functioning properly",
221
+ "high": "Your body may be fighting an infection or inflammation",
222
+ "low": "Your immune system may be weakened"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
223
  },
224
+ "Red Blood Cell Count": {
225
+ "normal": "Your blood is carrying oxygen efficiently",
226
+ "high": "You may have dehydration or a blood disorder requiring evaluation",
227
+ "low": "You may have anemia, causing fatigue and weakness"
 
 
 
228
  },
229
+ "Hemoglobin": {
230
+ "normal": "Your blood oxygen levels are healthy",
231
+ "high": "May indicate dehydration or lung problems",
232
+ "low": "You may be anemic - your blood isn't carrying enough oxygen"
233
+ },
234
+ "Hematocrit": {
235
+ "normal": "Blood volume and red blood cell ratio is normal",
236
+ "high": "May indicate dehydration",
237
+ "low": "May indicate anemia or blood loss"
238
+ },
239
+ "Platelet Count": {
240
+ "normal": "Your blood clotting ability is normal",
241
+ "high": "Increased risk of blood clots",
242
+ "low": "Increased risk of bleeding"
243
+ },
244
+ "Glucose": {
245
+ "normal": "Your blood sugar levels are well controlled",
246
+ "high": "Your blood sugar is elevated - may indicate diabetes or prediabetes",
247
+ "low": "Your blood sugar is low - may cause dizziness and weakness"
248
+ },
249
+ "Cholesterol": {
250
+ "normal": "Your cholesterol levels are healthy for your heart",
251
+ "high": "Elevated cholesterol increases heart disease risk",
252
+ "low": "Unusually low cholesterol"
253
+ },
254
+ "Creatinine": {
255
+ "normal": "Your kidneys are filtering waste properly",
256
+ "high": "Your kidneys may not be working optimally",
257
+ "low": "May indicate low muscle mass"
258
+ },
259
+ "Urea": {
260
+ "normal": "Kidney function is normal",
261
+ "high": "May indicate kidney problems or dehydration",
262
+ "low": "May indicate liver problems"
263
+ },
264
+ "ALT": {
265
+ "normal": "Your liver is functioning normally",
266
+ "high": "Your liver may be inflamed or damaged",
267
+ "low": "Generally not concerning"
268
+ },
269
+ "AST": {
270
+ "normal": "Liver and heart function appear normal",
271
+ "high": "May indicate liver or heart problems",
272
+ "low": "Generally not concerning"
273
+ },
274
+ "Bilirubin": {
275
+ "normal": "Liver is processing waste products normally",
276
+ "high": "May cause jaundice - liver may not be functioning properly",
277
+ "low": "Generally not concerning"
278
+ },
279
+ "Albumin": {
280
+ "normal": "Good protein levels and liver function",
281
+ "high": "May indicate dehydration",
282
+ "low": "May indicate liver or kidney disease"
283
+ },
284
+ "Thyroid Stimulating Hormone": {
285
+ "normal": "Your thyroid hormone levels are balanced",
286
+ "high": "Your thyroid may be underactive (hypothyroidism)",
287
+ "low": "Your thyroid may be overactive (hyperthyroidism)"
288
+ },
289
+ "Free T4": {
290
+ "normal": "Thyroid hormone levels are appropriate",
291
+ "high": "May indicate hyperthyroidism",
292
+ "low": "May indicate hypothyroidism"
293
  }
294
  }
295
+
296
+ for test in test_results[:10]:
297
+ test_name = test['test_name']
298
+ status = test['status']
299
+
300
+ for key in test_explanations:
301
+ if key.lower() in test_name.lower():
302
+ if status == 'normal':
303
+ key_findings.append({
304
+ "finding": f"{test_name}: {test['value']} {test['unit']}",
305
+ "explanation": test_explanations[key].get('normal', 'Within normal range')
306
+ })
307
+ elif 'high' in status.lower():
308
+ areas_of_concern.append({
309
+ "finding": f"{test_name}: {test['value']} {test['unit']} (HIGH)",
310
+ "explanation": test_explanations[key].get('high', 'Above normal range'),
311
+ "severity": "critical" if "critical" in status else "moderate"
312
+ })
313
+ elif 'low' in status.lower():
314
+ areas_of_concern.append({
315
+ "finding": f"{test_name}: {test['value']} {test['unit']} (LOW)",
316
+ "explanation": test_explanations[key].get('low', 'Below normal range'),
317
+ "severity": "critical" if "critical" in status else "moderate"
318
+ })
319
+ break
320
+
321
+ next_steps = []
322
+ if critical_count > 0:
323
+ next_steps = [
324
+ "Contact your doctor immediately",
325
+ "Do not delay medical consultation",
326
+ "Bring these results to your healthcare provider",
327
+ "Follow your doctor's treatment recommendations"
328
+ ]
329
+ elif abnormal_count > 0:
330
+ next_steps = [
331
+ "Schedule an appointment with your doctor within the next few days",
332
+ "Discuss these results with your healthcare provider",
333
+ "Your doctor may recommend additional tests",
334
+ "Follow any lifestyle or treatment recommendations"
335
+ ]
336
  else:
337
+ next_steps = [
338
+ "Maintain your current healthy lifestyle",
339
+ "Continue regular health checkups",
340
+ "Keep these results for your medical records",
341
+ "Discuss with your doctor during your next routine visit"
342
+ ]
343
+
344
+ return {
345
+ "overall_status": overall_status,
346
+ "explanation": explanation,
347
+ "key_findings": key_findings[:5],
348
+ "areas_of_concern": areas_of_concern,
349
+ "next_steps": next_steps,
350
+ "summary_stats": {
351
+ "total_tests": total_tests,
352
+ "normal_tests": normal_count,
353
+ "abnormal_tests": abnormal_count,
354
+ "critical_findings": critical_count
355
+ }
356
+ }
357
+
358
+ def extract_and_format(self, text: str, report_id: str = None, patient_id: str = None) -> dict:
359
+ start_time = time.time()
360
+
361
+ regex_results = self.extract_with_regex(text)
362
+ ner_results = self.extract_with_ner(text)
363
+
364
+ test_results = regex_results['test_results']
365
+ entities_list = ner_results['entities']
366
+
367
+ abnormal_results = []
368
+ for test in test_results:
369
+ if test['status'] != 'normal':
370
+ severity = 'critical' if 'critical' in test['status'] else 'moderate'
371
+ abnormal_results.append({
372
+ "test_name": test['test_name'],
373
+ "severity": severity,
374
+ "requires_attention": 'critical' in test['status']
375
+ })
376
+
377
+ normal_params = [t['test_name']
378
+ for t in test_results if t['status'] == 'normal']
379
+ key_abnormalities = [
380
+ f"{t['test_name']}: {t['clinical_significance']}" for t in test_results if t['status'] != 'normal']
381
+
382
+ ai_summary = {
383
+ "overall_assessment": f"Detected {len(abnormal_results)} abnormal result(s). {len(normal_params)} parameters within normal limits.",
384
+ "key_abnormalities": key_abnormalities,
385
+ "normal_parameters": normal_params,
386
+ "recommendations": [
387
  "Correlate with clinical symptoms",
388
  "Consider follow-up testing if symptoms persist",
389
  "Consult with healthcare provider for interpretation"
390
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
391
  }
392
+
393
+ clinical_insights = self.analyze_with_clinical_bert(text, test_results)
394
+
395
+ patient_summary = self.generate_patient_summary(
396
+ test_results, abnormal_results)
397
+
398
+ test_panels = []
399
+ cbc_tests = [t for t in test_results if any(x in t['test_name'].lower(
400
+ ) for x in ['blood cell', 'hemoglobin', 'hematocrit', 'platelet'])]
401
+ if cbc_tests:
402
+ test_panels.append({
403
+ "panel_name": "Complete Blood Count",
404
+ "tests_included": [t['test_name'] for t in cbc_tests],
405
+ "panel_status": "abnormal" if any(t['status'] != 'normal' for t in cbc_tests) else "normal",
406
+ "abnormal_count": len([t for t in cbc_tests if t['status'] != 'normal']),
407
+ "total_tests": len(cbc_tests)
408
+ })
409
+
410
+ chem_tests = [t for t in test_results if any(x in t['test_name'].lower() for x in [
411
+ 'glucose', 'creatinine', 'urea', 'cholesterol'])]
412
+ if chem_tests:
413
+ test_panels.append({
414
+ "panel_name": "General Chemistry",
415
+ "tests_included": [t['test_name'] for t in chem_tests],
416
+ "panel_status": "abnormal" if any(t['status'] != 'normal' for t in chem_tests) else "normal",
417
+ "abnormal_count": len([t for t in chem_tests if t['status'] != 'normal']),
418
+ "total_tests": len(chem_tests)
419
+ })
420
+
421
+ liver_tests = [t for t in test_results if any(x in t['test_name'].lower() for x in [
422
+ 'alt', 'ast', 'alp', 'bilirubin', 'albumin'])]
423
+ if liver_tests:
424
+ test_panels.append({
425
+ "panel_name": "Liver Function Panel",
426
+ "tests_included": [t['test_name'] for t in liver_tests],
427
+ "panel_status": "abnormal" if any(t['status'] != 'normal' for t in liver_tests) else "normal",
428
+ "abnormal_count": len([t for t in liver_tests if t['status'] != 'normal']),
429
+ "total_tests": len(liver_tests)
430
+ })
431
+
432
+ thyroid_tests = [t for t in test_results if any(
433
+ x in t['test_name'].lower() for x in ['thyroid', 'tsh', 't4', 't3'])]
434
+ if thyroid_tests:
435
+ test_panels.append({
436
+ "panel_name": "Thyroid Function Panel",
437
+ "tests_included": [t['test_name'] for t in thyroid_tests],
438
+ "panel_status": "abnormal" if any(t['status'] != 'normal' for t in thyroid_tests) else "normal",
439
+ "abnormal_count": len([t for t in thyroid_tests if t['status'] != 'normal']),
440
+ "total_tests": len(thyroid_tests)
441
  })
442
+
 
 
 
443
  chart_data = []
 
444
  for test in test_results:
445
  if test['reference_range']:
446
  chart_data.append({
 
449
  "ref_min": test['reference_range']['min'],
450
  "ref_max": test['reference_range']['max']
451
  })
452
+
453
+ visualization_data = {
454
  "charts": [{
455
  "chart_type": "bar",
456
  "title": "Lab Results vs Reference Range",
 
458
  }],
459
  "trend_data": []
460
  }
461
+
462
+ ner_stats = {}
463
+ for ent in entities_list:
464
+ label = ent['label']
465
+ ner_stats[label] = ner_stats.get(label, 0) + 1
466
+
467
+ test_category = "hematology"
468
+ sub_category = "complete_blood_count"
469
+ urgency_level = "critical" if len(
470
+ [a for a in abnormal_results if a['severity'] == 'critical']) > 0 else "routine"
471
+
472
+ if any('glucose' in t['test_name'].lower() for t in test_results):
473
+ test_category = "clinical_chemistry"
474
+ sub_category = "metabolic_panel"
475
+
476
+ classification = {
477
+ "test_category": test_category,
478
+ "sub_category": sub_category,
479
+ "urgency_level": urgency_level,
480
+ "confidence": 0.96
481
+ }
482
+
483
+ extraction_stats = {
484
+ "tests_with_values": len(test_results),
485
+ "additional_tests_found": len([e for e in entities_list if e['label'] == 'TEST_NAME']),
486
+ "diseases_detected": len(clinical_insights['diseases_detected']),
487
+ "interpretations_found": len([t for t in test_results if t['status'] != 'normal']),
488
+ "ner_model_stats": ner_stats
489
+ }
490
+
491
+ processing_time_ms = int((time.time() - start_time) * 1000)
492
+
493
+ metadata = {
494
+ "model_version": "radiolo_smart_ner_v2.0.0",
495
+ "processing_date": datetime.utcnow().isoformat() + "Z",
496
+ "tests_extracted": len(test_results),
497
+ "confidence_score": 0.94,
498
+ "nlp_models": {
499
+ "ner": "Custom Lab NER (Smart Filtered)",
500
+ "clinical_bert": "ClinicalDistilBERT",
501
+ "extraction_method": "Hybrid (Regex + Filtered NER)"
502
+ }
503
+ }
504
+
505
+ return {
506
+ "report_id": report_id or f"lab_{int(time.time())}",
507
+ "report_type": "laboratory",
508
+ "processing_time_ms": processing_time_ms,
509
+ "classification": classification,
510
+ "extraction_stats": extraction_stats,
511
+ "entities": entities_list,
512
+ "test_results": test_results,
513
+ "abnormal_results": abnormal_results,
514
+ "ai_summary": ai_summary,
515
+ "clinical_insights": clinical_insights,
516
+ "patient_friendly_summary": patient_summary,
517
+ "test_panels": test_panels,
518
+ "visualization_data": visualization_data,
519
+ "metadata": metadata
520
+ }
app/main.py CHANGED
@@ -78,12 +78,13 @@ async def root():
78
  "compression": "gzip",
79
  "ocr_engine": "EasyOCR",
80
  "ner_model": "Custom Lab NER",
 
81
  "supported_tests": 16
82
  },
83
  "endpoints": {
84
  "health": "/health",
85
  "analyze": "/analyze-lab-secure",
86
- "test": "/test-analyze" # NEW
87
  },
88
  "supported_formats": ["pdf", "image"],
89
  "supported_lab_tests": [
@@ -107,13 +108,8 @@ async def health_check():
107
  "supported_tests": 16
108
  }
109
 
110
-
111
  @app.post("/test-analyze", tags=["Testing"])
112
  async def test_analyze(file: UploadFile = File(...)):
113
- """
114
- Test endpoint without encryption - upload file directly
115
- ⚠️ WARNING: For testing only! No encryption!
116
- """
117
  start_time = time.time()
118
 
119
  try:
@@ -123,7 +119,7 @@ async def test_analyze(file: UploadFile = File(...)):
123
  file_bytes = await file.read()
124
  filename = file.filename
125
 
126
- # print(f"\n📄 Processing test file: {filename} ({len(file_bytes)} bytes)")
127
 
128
  if filename.lower().endswith('.pdf'):
129
  file_type = "pdf"
@@ -140,9 +136,9 @@ async def test_analyze(file: UploadFile = File(...)):
140
  if not extracted_text or len(extracted_text.strip()) < 10:
141
  raise HTTPException(status_code=400, detail="Could not extract sufficient text from file")
142
 
143
- # print(f"✓ Extracted {len(extracted_text)} characters (OCR: {ocr_used})")
144
 
145
- # print("🧠 Processing with NER + ClinicalDistilBERT...")
146
  lab_analysis = lab_processor.extract_and_format(
147
  extracted_text,
148
  report_id=f"test_{int(time.time())}",
@@ -151,8 +147,8 @@ async def test_analyze(file: UploadFile = File(...)):
151
 
152
  processing_time = time.time() - start_time
153
 
154
- # print(f"✅ Processing complete in {processing_time:.2f}s")
155
- # print(f" Tests extracted: {lab_analysis.get('metadata', {}).get('tests_extracted', 0)}\n")
156
 
157
  response_data = {
158
  "status": "success",
@@ -177,7 +173,6 @@ async def test_analyze(file: UploadFile = File(...)):
177
  traceback.print_exc()
178
  raise HTTPException(status_code=500, detail=f"Internal error: {str(e)}")
179
 
180
-
181
  @app.post("/analyze-lab-secure", tags=["Lab Analysis"])
182
  async def analyze_lab_secure(request: EncryptedRequest):
183
  start_time = time.time()
 
78
  "compression": "gzip",
79
  "ocr_engine": "EasyOCR",
80
  "ner_model": "Custom Lab NER",
81
+ "patient_friendly_summary": "AI-Generated Explanations",
82
  "supported_tests": 16
83
  },
84
  "endpoints": {
85
  "health": "/health",
86
  "analyze": "/analyze-lab-secure",
87
+ "test": "/test-analyze"
88
  },
89
  "supported_formats": ["pdf", "image"],
90
  "supported_lab_tests": [
 
108
  "supported_tests": 16
109
  }
110
 
 
111
  @app.post("/test-analyze", tags=["Testing"])
112
  async def test_analyze(file: UploadFile = File(...)):
 
 
 
 
113
  start_time = time.time()
114
 
115
  try:
 
119
  file_bytes = await file.read()
120
  filename = file.filename
121
 
122
+ print(f"\n📄 Processing test file: {filename} ({len(file_bytes)} bytes)")
123
 
124
  if filename.lower().endswith('.pdf'):
125
  file_type = "pdf"
 
136
  if not extracted_text or len(extracted_text.strip()) < 10:
137
  raise HTTPException(status_code=400, detail="Could not extract sufficient text from file")
138
 
139
+ print(f"✓ Extracted {len(extracted_text)} characters (OCR: {ocr_used})")
140
 
141
+ print("🧠 Processing with NER + ClinicalDistilBERT...")
142
  lab_analysis = lab_processor.extract_and_format(
143
  extracted_text,
144
  report_id=f"test_{int(time.time())}",
 
147
 
148
  processing_time = time.time() - start_time
149
 
150
+ print(f"✅ Processing complete in {processing_time:.2f}s")
151
+ print(f" Tests extracted: {lab_analysis.get('metadata', {}).get('tests_extracted', 0)}\n")
152
 
153
  response_data = {
154
  "status": "success",
 
173
  traceback.print_exc()
174
  raise HTTPException(status_code=500, detail=f"Internal error: {str(e)}")
175
 
 
176
  @app.post("/analyze-lab-secure", tags=["Lab Analysis"])
177
  async def analyze_lab_secure(request: EncryptedRequest):
178
  start_time = time.time()
decrypt_response.py CHANGED
@@ -9,7 +9,7 @@ import os
9
  from nacl.secret import SecretBox
10
 
11
  # Your hex key from .env
12
- SECRET_KEY_HEX = "9aa68717fafd68f30b6bdd7f8af0f273b4ddb31aadc0f7a3a42db86dfdde0195"
13
 
14
  # Convert hex to bytes (32 bytes)
15
  SECRET_KEY = bytes.fromhex(SECRET_KEY_HEX)
 
9
  from nacl.secret import SecretBox
10
 
11
  # Your hex key from .env
12
+ SECRET_KEY_HEX = "7633eeaf69156124e49025ce8f6a3adbdbf6be87f1e58529397a67168a65bd66"
13
 
14
  # Convert hex to bytes (32 bytes)
15
  SECRET_KEY = bytes.fromhex(SECRET_KEY_HEX)
generate_postman_request.py CHANGED
@@ -10,7 +10,7 @@ from nacl.secret import SecretBox
10
  from nacl.utils import random
11
 
12
  # Your 64-character hex key from .env
13
- SECRET_KEY_HEX = "9aa68717fafd68f30b6bdd7f8af0f273b4ddb31aadc0f7a3a42db86dfdde0195"
14
 
15
  # Convert hex to bytes (32 bytes)
16
  SECRET_KEY = bytes.fromhex(SECRET_KEY_HEX)
 
10
  from nacl.utils import random
11
 
12
  # Your 64-character hex key from .env
13
+ SECRET_KEY_HEX = "7633eeaf69156124e49025ce8f6a3adbdbf6be87f1e58529397a67168a65bd66"
14
 
15
  # Convert hex to bytes (32 bytes)
16
  SECRET_KEY = bytes.fromhex(SECRET_KEY_HEX)