Spaces:

MakPr016
/

clinical-analysis-api

Sleeping

App Files Files Community

MakPr016 commited on Oct 19, 2025

Commit

6c66675

1 Parent(s): 648d9f3

Analysis includes NPL summary

Browse files

Files changed (4) hide show

app/lab_processor.py +475 -456
app/main.py +7 -12
decrypt_response.py +1 -1
generate_postman_request.py +1 -1

app/lab_processor.py CHANGED Viewed

@@ -1,487 +1,446 @@
-"""
-Lab Report Processing with Smart NER + Regex + ClinicalDistilBERT
-Based on your proven local implementation
-"""
 import spacy
 import re
-import time
 import torch
 from datetime import datetime
-from typing import Dict, List, Set
-from collections import defaultdict
-from transformers import AutoTokenizer, AutoModel
-REFERENCE_RANGES = {
-    "White Blood Cell Count": {"min": 4.0, "max": 11.0, "unit": "x10^9/L"},
-    "Red Blood Cell Count": {"min": 4.2, "max": 5.9, "unit": "x10^12/L"},
-    "Hemoglobin": {"min": 13.5, "max": 17.5, "unit": "g/dL"},
-    "Hematocrit": {"min": 38.3, "max": 48.6, "unit": "%"},
-    "Platelet Count": {"min": 150, "max": 450, "unit": "x10^9/L"},
-    "Glucose": {"min": 70, "max": 99, "unit": "mg/dL"},
-    "Creatinine": {"min": 0.6, "max": 1.2, "unit": "mg/dL"},
-    "Urea": {"min": 15, "max": 50, "unit": "mg/dL"},
-    "Cholesterol": {"min": 0, "max": 200, "unit": "mg/dL"},
-    "Alanine Aminotransferase": {"min": 7, "max": 56, "unit": "U/L"},
-    "Aspartate Aminotransferase": {"min": 8, "max": 48, "unit": "U/L"},
-    "Alkaline Phosphatase": {"min": 40, "max": 129, "unit": "U/L"},
-    "Bilirubin": {"min": 0.3, "max": 1.9, "unit": "mg/dL"},
-    "Albumin": {"min": 3.5, "max": 5.5, "unit": "g/dL"},
-    "Thyroid Stimulating Hormone": {"min": 0.5, "max": 4.5, "unit": "mIU/L"},
-    "Free Thyroxine": {"min": 0.9, "max": 1.7, "unit": "ng/dL"},
-}
 class RadioloLabProcessor:
-    def __init__(self, ner_model_path: str):
-        """Initialize smart lab processor with NER, stopwords, and ClinicalDistilBERT"""
-        # Load custom NER model
-        self.nlp = spacy.load(ner_model_path)
-        print(f"✓ Lab NER model loaded: {ner_model_path}")
-        # Load ClinicalDistilBERT
-        print("Loading ClinicalDistilBERT...")
-        self.clinical_tokenizer = AutoTokenizer.from_pretrained("nlpie/clinical-distilbert")
-        self.clinical_model = AutoModel.from_pretrained("nlpie/clinical-distilbert")
-        # Set device
-        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-        self.clinical_model = self.clinical_model.to(self.device)
-        self.clinical_model.eval()
-        print(f"✓ ClinicalDistilBERT loaded on {self.device}")
-        # Strict stopwords to filter false positives
-        self.stopwords = {
-            # Document structure
-            'hemolab', 'central', 'medicity', 'wellbeing', 'healthland',
-            'laboratory', 'health', 'ave', 'page',
-            # Metadata fields
-            'age', 'gender', 'email', 'sample', 'results', 'verified by',
-            'processing', 'details',
-            # Table headers
-            'test', 'result', 'unit', 'normal', 'range', 'status',
-            'normal range', 'result status',
-            # Section headers
-            'hematology', 'biochemistry', 'liver function', 'thyroid function',
-            'kidney function', 'lipid profile',
-            # Names (common in reports)
-            'john', 'doe', 'johnatan', 'emily', 'johnson', 'dr',
-            # Standalone numbers
-            '30', '123', '12345',
-        }
-        # Valid lab tests for NER filtering
-        self.valid_tests = {
-            'white blood cell count', 'wbc', 'red blood cell count', 'rbc',
-            'hemoglobin', 'hgb', 'hb', 'hematocrit', 'hct',
-            'platelet count', 'platelets', 'plt',
-            'mcv', 'mch', 'mchc',
-            'glucose', 'glu', 'creatinine', 'urea', 'bun',
-            'cholesterol', 'ldl', 'hdl', 'triglycerides',
-            'alt', 'ast', 'alp', 'bilirubin', 'albumin',
-            'tsh', 'ft4', 'free thyroxine', 'hba1c', 'a1c',
-            'sodium', 'potassium', 'calcium', 'chloride',
-            'aminotransferase', 'phosphatase',
         }
-        # Targeted regex for structured lab values
-        self.lab_value_pattern = re.compile(
-            r'(White Blood Cell Count|Red Blood Cell Count|Hemoglobin|Hematocrit|'
-            r'Platelet Count|Glucose|Creatinine|Urea|Cholesterol|'
-            r'Alanine Aminotransferase|Aspartate Aminotransferase|'
-            r'Alkaline Phosphatase|Bilirubin|Albumin|'
-            r'Thyroid Stimulating Hormone|Free Thyroxine|'
-            r'WBC|RBC|HGB|HCT|PLT|ALT|AST|ALP|TSH|FT4|HbA1c)'
-            r'\s*[:\n]\s*'
-            r'(\d+\.?\d*)'
-            r'\s*'
-            r'([a-zA-Z/%^0-9]+)?',
-            re.IGNORECASE
-        )
-        # Status pattern for interpretations
-        self.status_pattern = re.compile(r'\b(Elevated|High|Low|Normal|Critical|Abnormal)\b')
-    def _normalize_test_name(self, name: str) -> str:
-        """Normalize test abbreviations to full names"""
-        name_lower = name.lower().strip()
-        mapping = {
-            'wbc': 'White Blood Cell Count',
-            'rbc': 'Red Blood Cell Count',
-            'hgb': 'Hemoglobin',
-            'hb': 'Hemoglobin',
-            'hct': 'Hematocrit',
-            'plt': 'Platelet Count',
-            'platelets': 'Platelet Count',
-            'glu': 'Glucose',
-            'alt': 'Alanine Aminotransferase',
-            'ast': 'Aspartate Aminotransferase',
-            'alp': 'Alkaline Phosphatase',
-            'tsh': 'Thyroid Stimulating Hormone',
-            'ft4': 'Free Thyroxine',
         }
-        return mapping.get(name_lower, name)
-    def _calculate_status(self, test_name: str, value: float) -> Dict:
-        """Calculate test status and deviation from reference range"""
-        ref_range = REFERENCE_RANGES.get(test_name)
-        if not ref_range:
-            return {
-                "status": "unknown",
-                "deviation_percentage": 0.0,
-                "clinical_significance": "Reference range not available"
-            }
-        min_val, max_val = ref_range['min'], ref_range['max']
-        if value < min_val:
-            deviation = ((min_val - value) / min_val) * 100
-            status = "critical_low" if deviation > 50 else "low"
-            significance = f"Below normal range (↓{deviation:.1f}%)"
-        elif value > max_val:
-            deviation = ((value - max_val) / max_val) * 100
-            status = "critical_high" if deviation > 50 else "high"
-            significance = f"Above normal range (↑{deviation:.1f}%)"
-        else:
-            deviation = 0.0
-            status = "normal"
-            significance = "Within normal limits"
-        return {
-            "status": status,
-            "deviation_percentage": round(deviation, 2),
-            "clinical_significance": significance
         }
-    def _get_clinical_embeddings(self, text: str) -> torch.Tensor:
-        """Get clinical embeddings using ClinicalDistilBERT"""
-        inputs = self.clinical_tokenizer(
-            text,
-            return_tensors="pt",
-            truncation=True,
             max_length=512,
             padding=True,
             return_token_type_ids=False
-        ).to(self.device)
         with torch.no_grad():
-            outputs = self.clinical_model(**inputs)
-            embeddings = outputs.last_hidden_state[:, 0, :]  # [CLS] token
-        return embeddings
-    def _generate_clinical_insights(self, text: str, abnormal_results: List[Dict],
-                                    diseases: Set[str], interpretations: Set[str]) -> Dict:
-        """Generate clinical insights using ClinicalDistilBERT"""
-        # Get embeddings
-        embeddings = self._get_clinical_embeddings(text[:512])
-        insights = {
-            "embedding_dimension": embeddings.shape[1],
             "clinical_context_captured": True,
             "embeddings_generated": True,
-            "diseases_detected": list(diseases),
-            "status_flags": list(interpretations),
-            "abnormality_patterns": [],
-            "clinical_relevance_score": 0.0
         }
-        # Analyze patterns
-        if len(abnormal_results) > 0:
-            critical_count = sum(1 for r in abnormal_results if r.get('severity') == 'critical')
-            moderate_count = len(abnormal_results) - critical_count
-            relevance_score = min(100.0, (critical_count * 30.0) + (moderate_count * 10.0))
-            insights["clinical_relevance_score"] = round(relevance_score, 2)
-            insights["abnormality_patterns"].append(
-                f"Detected {len(abnormal_results)} abnormal parameter(s)"
-            )
-            if critical_count > 0:
-                insights["abnormality_patterns"].append(
-                    f"{critical_count} critical finding(s) require immediate attention"
-                )
         else:
-            insights["clinical_relevance_score"] = 0.0
-            insights["abnormality_patterns"].append("All parameters within normal clinical ranges")
-        return insights
-    def _smart_ner_extraction(self, doc, extracted_test_names: Set[str]) -> tuple:
-        """Smart NER extraction with strict filtering"""
-        additional_tests = []
-        diseases = set()
-        interpretations = set()
-        ner_stats = defaultdict(int)
-        for ent in doc.ents:
-            ner_stats[ent.label_] += 1
-            if ent.label_ == 'TEST_NAME':
-                ent_lower = ent.text.lower()
-                # Skip if in stopwords
-                if ent_lower in self.stopwords:
-                    continue
-                # Skip if looks like date
-                if re.match(r'\d+/\d+/\d+', ent.text):
-                    continue
-                # Skip if just numbers
-                if re.match(r'^\d+$', ent.text):
-                    continue
-                # Skip if already extracted by regex
-                if ent_lower in extracted_test_names:
-                    continue
-                # Only add if contains valid medical keywords
-                if any(keyword in ent_lower for keyword in self.valid_tests):
-                    additional_tests.append({
-                        'testname': ent.text,
-                        'value': None,
-                        'unit': None,
-                        'source': 'ner'
-                    })
-            elif ent.label_ == 'DISEASE':
-                if ent.text.lower() not in self.stopwords:
-                    diseases.add(ent.text)
-            elif ent.label_ == 'INTERPRETATION':
-                interpretations.add(ent.text)
-        return additional_tests, diseases, interpretations, ner_stats
-    def extract_and_format(self, text: str, report_id: str = None, patient_id: str = None) -> Dict:
-        """Smart extraction using hybrid approach"""
-        start_time = time.time()
-        raw_tests = []
-        seen_tests = set()
-        # Step 1: Regex extraction (most reliable for structured data)
-        for match in self.lab_value_pattern.finditer(text):
-            test_name = self._normalize_test_name(match.group(1).strip())
-            try:
-                value = float(match.group(2))
-                unit = match.group(3) if match.group(3) else None
-                test_key = (test_name.lower(), value)
-                if test_key not in seen_tests:
-                    raw_tests.append({
-                        'testname': test_name,
-                        'value': value,
-                        'unit': unit,
-                        'source': 'regex'
-                    })
-                    seen_tests.add(test_key)
-            except:
-                continue
-        extracted_test_names = {t['testname'].lower() for t in raw_tests}
-        # Step 2: Smart NER extraction with filtering
-        doc = self.nlp(text)
-        additional_tests, diseases, interpretations, ner_stats = self._smart_ner_extraction(
-            doc, extracted_test_names
-        )
-        # Extract status flags from text
-        for match in self.status_pattern.finditer(text):
-            context = text[max(0, match.start()-10):match.end()+10]
-            if 'Range' not in context:  # Avoid "Normal Range"
-                interpretations.add(match.group(1))
-        # Collect entities for output
-        entities_for_output = []
-        for ent in doc.ents:
-            entities_for_output.append({
-                "text": ent.text,
-                "label": ent.label_,
-                "start_char": ent.start_char,
-                "end_char": ent.end_char,
-                "confidence": 0.92
-            })
-        # Step 3: Build test results with reference ranges
-        test_results = []
-        abnormal_results = []
-        for test in raw_tests:
-            test_name = test['testname']
-            value = test['value']
-            unit = test['unit']
-            ref_range = REFERENCE_RANGES.get(test_name, {})
-            status_info = self._calculate_status(test_name, value)
-            test_result = {
-                "test_name": test_name,
-                "value": value,
-                "unit": unit or ref_range.get('unit', ''),
-                "reference_range": {
-                    "min": ref_range.get('min'),
-                    "max": ref_range.get('max'),
-                    "unit": ref_range.get('unit', unit or '')
-                } if ref_range else None,
-                "status": status_info['status'],
-                "deviation_percentage": status_info['deviation_percentage'],
-                "clinical_significance": status_info['clinical_significance'],
-                "trend": None,
-                "source": test['source']
-            }
-            test_results.append(test_result)
-            if status_info['status'] in ['low', 'high', 'critical_low', 'critical_high']:
-                severity = "critical" if 'critical' in status_info['status'] else "moderate"
-                abnormal_results.append({
-                    "test_name": test_name,
-                    "severity": severity,
-                    "requires_attention": True
-                })
-        # Step 4: Generate summaries and insights
-        ai_summary = self._generate_summary(test_results, abnormal_results)
-        test_panels = self._group_into_panels(test_results)
-        visualization_data = self._generate_visualization_data(test_results)
-        # Step 5: Generate clinical insights with ClinicalDistilBERT
-        clinical_insights = self._generate_clinical_insights(
-            text, abnormal_results, diseases, interpretations
-        )
-        processing_time = int((time.time() - start_time) * 1000)
-        return {
-            "report_id": report_id or f"rep_{int(time.time())}",
-            "report_type": "laboratory",
-            "processing_time_ms": processing_time,
-            "classification": {
-                "test_category": self._determine_category(test_results),
-                "sub_category": "complete_blood_count",
-                "urgency_level": "critical" if any(r['severity'] == 'critical' for r in abnormal_results) else "abnormal" if abnormal_results else "routine",
-                "confidence": 0.96
             },
-            "extraction_stats": {
-                "tests_with_values": len(test_results),
-                "additional_tests_found": len(additional_tests),
-                "diseases_detected": len(diseases),
-                "interpretations_found": len(interpretations),
-                "ner_model_stats": dict(ner_stats)
             },
-            "entities": entities_for_output[:20],
-            "test_results": test_results,
-            "abnormal_results": abnormal_results,
-            "ai_summary": ai_summary,
-            "clinical_insights": clinical_insights,
-            "test_panels": test_panels,
-            "visualization_data": visualization_data,
-            "metadata": {
-                "model_version": "radiolo_smart_ner_v2.0.0",
-                "processing_date": datetime.utcnow().isoformat() + "Z",
-                "tests_extracted": len(test_results),
-                "confidence_score": 0.94,
-                "nlp_models": {
-                    "ner": "Custom Lab NER (Smart Filtered)",
-                    "clinical_bert": "ClinicalDistilBERT",
-                    "extraction_method": "Hybrid (Regex + Filtered NER)"
-                }
             }
         }
-    def _determine_category(self, test_results: List[Dict]) -> str:
-        test_names = {t['test_name'].lower() for t in test_results}
-        if any('blood cell' in name or name in ['hemoglobin', 'hematocrit', 'platelet'] for name in test_names):
-            return "hematology"
-        elif any(name in ['alanine aminotransferase', 'aspartate aminotransferase', 'alkaline phosphatase', 'bilirubin', 'albumin'] for name in test_names):
-            return "liver_function"
-        elif any('thyroid' in name or name in ['thyroid stimulating hormone', 'free thyroxine'] for name in test_names):
-            return "thyroid_function"
-        else:
-            return "general_chemistry"
-    def _generate_summary(self, test_results: List[Dict], abnormal_results: List[Dict]) -> Dict:
-        normal_tests = [t['test_name'] for t in test_results if t['status'] == 'normal']
-        abnormal_tests = [a['test_name'] for a in abnormal_results]
-        if not abnormal_tests:
-            overall = "All test results are within normal limits."
-            recommendations = ["No immediate action required", "Continue regular health monitoring"]
         else:
-            overall = f"Detected {len(abnormal_tests)} abnormal result(s). {len(normal_tests)} parameters within normal limits."
-            recommendations = [
                 "Correlate with clinical symptoms",
                 "Consider follow-up testing if symptoms persist",
                 "Consult with healthcare provider for interpretation"
             ]
-        key_abnormalities = []
-        for result in abnormal_results:
-            test_detail = next((t for t in test_results if t['test_name'] == result['test_name']), None)
-            if test_detail:
-                key_abnormalities.append(
-                    f"{result['test_name']}: {test_detail['clinical_significance']}"
-                )
-        return {
-            "overall_assessment": overall,
-            "key_abnormalities": key_abnormalities,
-            "normal_parameters": normal_tests,
-            "recommendations": recommendations
         }
-    def _group_into_panels(self, test_results: List[Dict]) -> List[Dict]:
-        panels = defaultdict(list)
-        cbc_tests = {'White Blood Cell Count', 'Red Blood Cell Count', 'Hemoglobin', 'Hematocrit', 'Platelet Count'}
-        liver_tests = {'Alanine Aminotransferase', 'Aspartate Aminotransferase', 'Alkaline Phosphatase', 'Bilirubin', 'Albumin'}
-        thyroid_tests = {'Thyroid Stimulating Hormone', 'Free Thyroxine'}
-        for test in test_results:
-            name = test['test_name']
-            if name in cbc_tests:
-                panels['Complete Blood Count'].append(test)
-            elif name in liver_tests:
-                panels['Liver Function Panel'].append(test)
-            elif name in thyroid_tests:
-                panels['Thyroid Function Panel'].append(test)
-            else:
-                panels['General Chemistry'].append(test)
-        panel_list = []
-        for panel_name, tests in panels.items():
-            abnormal_count = sum(1 for t in tests if t['status'] != 'normal')
-            panel_list.append({
-                "panel_name": panel_name,
-                "tests_included": [t['test_name'] for t in tests],
-                "panel_status": "abnormal" if abnormal_count > 0 else "normal",
-                "abnormal_count": abnormal_count,
-                "total_tests": len(tests)
             })
-        return panel_list
-    def _generate_visualization_data(self, test_results: List[Dict]) -> Dict:
         chart_data = []
         for test in test_results:
             if test['reference_range']:
                 chart_data.append({
@@ -490,8 +449,8 @@ class RadioloLabProcessor:
                     "ref_min": test['reference_range']['min'],
                     "ref_max": test['reference_range']['max']
                 })
-        return {
             "charts": [{
                 "chart_type": "bar",
                 "title": "Lab Results vs Reference Range",
@@ -499,3 +458,63 @@ class RadioloLabProcessor:
             }],
             "trend_data": []
         }

 import spacy
 import re
+from transformers import AutoTokenizer, AutoModel
 import torch
 from datetime import datetime
+import time
 class RadioloLabProcessor:
+    def __init__(self, model_path: str):
+        self.nlp = spacy.load(model_path)
+        self.clinical_bert_tokenizer = AutoTokenizer.from_pretrained(
+            "nlpie/clinical-distilbert")
+        self.clinical_bert_model = AutoModel.from_pretrained(
+            "nlpie/clinical-distilbert")
+        self.lab_tests = {
+            "White Blood Cell Count": {"unit": "x10^9/L", "min": 4.0, "max": 11.0},
+            "Red Blood Cell Count": {"unit": "x10^12/L", "min": 4.2, "max": 5.9},
+            "Hemoglobin": {"unit": "g/dL", "min": 13.5, "max": 17.5},
+            "Hematocrit": {"unit": "%", "min": 38.3, "max": 48.6},
+            "Platelet Count": {"unit": "x10^9/L", "min": 150, "max": 450},
+            "Glucose": {"unit": "mg/dL", "min": 70, "max": 99},
+            "Creatinine": {"unit": "mg/dL", "min": 0.6, "max": 1.2},
+            "Urea": {"unit": "mg/dL", "min": 15, "max": 50},
+            "Cholesterol": {"unit": "mg/dL", "min": 0, "max": 200},
+            "ALT": {"unit": "U/L", "min": 7, "max": 56},
+            "AST": {"unit": "U/L", "min": 10, "max": 40},
+            "ALP": {"unit": "U/L", "min": 44, "max": 147},
+            "Bilirubin": {"unit": "mg/dL", "min": 0.3, "max": 1.9},
+            "Albumin": {"unit": "g/dL", "min": 3.5, "max": 5.5},
+            "Thyroid Stimulating Hormone": {"unit": "mIU/L", "min": 0.5, "max": 4.5},
+            "Free T4": {"unit": "ng/dL", "min": 0.8, "max": 1.8}
         }
+    def extract_with_regex(self, text: str) -> dict:
+        test_results = []
+        patterns = {
+            "White Blood Cell Count": r"White Blood Cell Count[:\s]+(\d+\.?\d*)\s*(x10\^9/L)",
+            "Red Blood Cell Count": r"Red Blood Cell Count[:\s]+(\d+\.?\d*)\s*(x10\^12/L)",
+            "Hemoglobin": r"Hemoglobin[:\s]+(\d+\.?\d*)\s*(g/dL)",
+            "Hematocrit": r"Hematocrit[:\s]+(\d+\.?\d*)\s*(%)",
+            "Platelet Count": r"Platelet Count[:\s]+(\d+\.?\d*)\s*(x10\^9/L)",
+            "Glucose": r"Glucose[:\s]+(\d+\.?\d*)\s*(mg/dL)",
+            "Creatinine": r"Creatinine[:\s]+(\d+\.?\d*)\s*(mg/dL)",
+            "Urea": r"Urea[:\s]+(\d+\.?\d*)\s*(mg/dL)",
+            "Cholesterol": r"Cholesterol[:\s]+(\d+\.?\d*)\s*(mg/dL)",
+            "ALT": r"ALT[:\s]+(\d+\.?\d*)\s*(U/L)",
+            "AST": r"AST[:\s]+(\d+\.?\d*)\s*(U/L)",
+            "ALP": r"ALP[:\s]+(\d+\.?\d*)\s*(U/L)",
+            "Bilirubin": r"Bilirubin[:\s]+(\d+\.?\d*)\s*(mg/dL)",
+            "Albumin": r"Albumin[:\s]+(\d+\.?\d*)\s*(g/dL)",
+            "Thyroid Stimulating Hormone": r"Thyroid Stimulating Hormone[:\s]+(\d+\.?\d*)\s*(mIU/L)",
+            "Free T4": r"Free T4[:\s]+(\d+\.?\d*)\s*(ng/dL)"
         }
+        for test_name, pattern in patterns.items():
+            match = re.search(pattern, text, re.IGNORECASE)
+            if match:
+                value = float(match.group(1))
+                unit = match.group(2)
+                if test_name in self.lab_tests:
+                    ref_range = self.lab_tests[test_name]
+                    status = "normal"
+                    deviation = 0.0
+                    if value < ref_range["min"]:
+                        deviation = (
+                            (ref_range["min"] - value) / ref_range["min"]) * 100
+                        status = "critical_low" if deviation > 20 else "low"
+                    elif value > ref_range["max"]:
+                        deviation = (
+                            (value - ref_range["max"]) / ref_range["max"]) * 100
+                        status = "critical_high" if deviation > 20 else "high"
+                    clinical_sig = "Within normal limits"
+                    if status != "normal":
+                        direction = "↑" if "high" in status else "↓"
+                        clinical_sig = f"{'Above' if 'high' in status else 'Below'} normal range ({direction}{deviation:.1f}%)"
+                    test_results.append({
+                        "test_name": test_name,
+                        "value": value,
+                        "unit": unit,
+                        "reference_range": {
+                            "min": ref_range["min"],
+                            "max": ref_range["max"],
+                            "unit": ref_range["unit"]
+                        },
+                        "status": status,
+                        "deviation_percentage": deviation,
+                        "clinical_significance": clinical_sig,
+                        "trend": None,
+                        "source": "regex"
+                    })
+        return {"test_results": test_results}
+    def extract_with_ner(self, text: str) -> dict:
+        doc = self.nlp(text)
+        invalid_test_names = {
+            'hemolab', 'central', 'health', 'laboratory', 'medicity', 'wellbeing',
+            'healthland', 'age', 'gender', 'email', 'male', 'sample', 'results',
+            'verified by', 'dr', 'emily', 'johnson', 'normal', 'elevated', 'johnatan',
+            'doe', 'page', 'blood test', 'hematology', 'processing details'
         }
+        entities = []
+        for ent in doc.ents:
+            if ent.label_ == "TEST_NAME":
+                if ent.text.lower() not in invalid_test_names and len(ent.text) > 2:
+                    entities.append({
+                        "text": ent.text,
+                        "label": ent.label_,
+                        "start_char": ent.start_char,
+                        "end_char": ent.end_char,
+                        "confidence": 0.92
+                    })
+            elif ent.label_ in ["TEST_VALUE", "TEST_UNIT", "MedicalCondition"]:
+                entities.append({
+                    "text": ent.text,
+                    "label": ent.label_,
+                    "start_char": ent.start_char,
+                    "end_char": ent.end_char,
+                    "confidence": 0.92
+                })
+        return {"entities": entities}
+    def get_clinical_bert_embeddings(self, text: str):
+        inputs = self.clinical_bert_tokenizer(
+            text,
+            return_tensors="pt",
+            truncation=True,
             max_length=512,
             padding=True,
             return_token_type_ids=False
+        )
         with torch.no_grad():
+            outputs = self.clinical_bert_model(**inputs)
+        embeddings = outputs.last_hidden_state.mean(dim=1).squeeze().numpy()
+        return embeddings.tolist()
+    def analyze_with_clinical_bert(self, text: str, test_results: list):
+        embeddings = self.get_clinical_bert_embeddings(text)
+        diseases_detected = []
+        status_flags = []
+        abnormal_tests = [t for t in test_results if t['status'] != 'normal']
+        if any('glucose' in t['test_name'].lower() and 'high' in t['status'] for t in abnormal_tests):
+            diseases_detected.append("Potential Diabetes")
+        if any('cholesterol' in t['test_name'].lower() and 'high' in t['status'] for t in abnormal_tests):
+            diseases_detected.append("Dyslipidemia")
+        for test in test_results:
+            if test['status'] != 'normal' and test['status'] not in [s.lower() for s in status_flags]:
+                status_flags.append(test['status'].replace('_', ' ').title())
+        if not status_flags:
+            status_flags = ["Normal"]
+        abnormality_patterns = []
+        critical_count = len(
+            [t for t in test_results if 'critical' in t['status']])
+        abnormal_count = len(abnormal_tests)
+        if abnormal_count > 0:
+            abnormality_patterns.append(
+                f"Detected {abnormal_count} abnormal parameter(s)")
+        if critical_count > 0:
+            abnormality_patterns.append(
+                f"{critical_count} critical finding(s) require immediate attention")
+        clinical_relevance = min(
+            100, (abnormal_count / len(test_results)) * 100) if test_results else 0
+        return {
+            "embedding_dimension": len(embeddings),
             "clinical_context_captured": True,
             "embeddings_generated": True,
+            "diseases_detected": diseases_detected,
+            "status_flags": status_flags,
+            "abnormality_patterns": abnormality_patterns,
+            "clinical_relevance_score": round(clinical_relevance, 1)
         }
+    def generate_patient_summary(self, test_results: list, abnormal_results: list) -> dict:
+        normal_count = len(
+            [t for t in test_results if t['status'] == 'normal'])
+        total_tests = len(test_results)
+        abnormal_count = len(abnormal_results)
+        critical_count = len(
+            [a for a in abnormal_results if a['severity'] == 'critical'])
+        if critical_count > 0:
+            overall_status = "⚠️ URGENT - IMMEDIATE ATTENTION NEEDED"
+            explanation = f"Your lab results show {critical_count} critical finding(s) that require immediate medical attention. Please consult your doctor as soon as possible."
+        elif abnormal_count > 0:
+            overall_status = "⚠️ ABNORMALITIES DETECTED"
+            explanation = f"Your lab results show {abnormal_count} test(s) outside normal range. While not immediately critical, these findings should be discussed with your healthcare provider."
         else:
+            overall_status = "✅ ALL TESTS NORMAL"
+            explanation = f"Great news! All {total_tests} lab tests are within normal ranges. Your results indicate good health in the tested parameters."
+        key_findings = []
+        areas_of_concern = []
+        test_explanations = {
+            "White Blood Cell Count": {
+                "normal": "Your immune system is functioning properly",
+                "high": "Your body may be fighting an infection or inflammation",
+                "low": "Your immune system may be weakened"
             },
+            "Red Blood Cell Count": {
+                "normal": "Your blood is carrying oxygen efficiently",
+                "high": "You may have dehydration or a blood disorder requiring evaluation",
+                "low": "You may have anemia, causing fatigue and weakness"
             },
+            "Hemoglobin": {
+                "normal": "Your blood oxygen levels are healthy",
+                "high": "May indicate dehydration or lung problems",
+                "low": "You may be anemic - your blood isn't carrying enough oxygen"
+            },
+            "Hematocrit": {
+                "normal": "Blood volume and red blood cell ratio is normal",
+                "high": "May indicate dehydration",
+                "low": "May indicate anemia or blood loss"
+            },
+            "Platelet Count": {
+                "normal": "Your blood clotting ability is normal",
+                "high": "Increased risk of blood clots",
+                "low": "Increased risk of bleeding"
+            },
+            "Glucose": {
+                "normal": "Your blood sugar levels are well controlled",
+                "high": "Your blood sugar is elevated - may indicate diabetes or prediabetes",
+                "low": "Your blood sugar is low - may cause dizziness and weakness"
+            },
+            "Cholesterol": {
+                "normal": "Your cholesterol levels are healthy for your heart",
+                "high": "Elevated cholesterol increases heart disease risk",
+                "low": "Unusually low cholesterol"
+            },
+            "Creatinine": {
+                "normal": "Your kidneys are filtering waste properly",
+                "high": "Your kidneys may not be working optimally",
+                "low": "May indicate low muscle mass"
+            },
+            "Urea": {
+                "normal": "Kidney function is normal",
+                "high": "May indicate kidney problems or dehydration",
+                "low": "May indicate liver problems"
+            },
+            "ALT": {
+                "normal": "Your liver is functioning normally",
+                "high": "Your liver may be inflamed or damaged",
+                "low": "Generally not concerning"
+            },
+            "AST": {
+                "normal": "Liver and heart function appear normal",
+                "high": "May indicate liver or heart problems",
+                "low": "Generally not concerning"
+            },
+            "Bilirubin": {
+                "normal": "Liver is processing waste products normally",
+                "high": "May cause jaundice - liver may not be functioning properly",
+                "low": "Generally not concerning"
+            },
+            "Albumin": {
+                "normal": "Good protein levels and liver function",
+                "high": "May indicate dehydration",
+                "low": "May indicate liver or kidney disease"
+            },
+            "Thyroid Stimulating Hormone": {
+                "normal": "Your thyroid hormone levels are balanced",
+                "high": "Your thyroid may be underactive (hypothyroidism)",
+                "low": "Your thyroid may be overactive (hyperthyroidism)"
+            },
+            "Free T4": {
+                "normal": "Thyroid hormone levels are appropriate",
+                "high": "May indicate hyperthyroidism",
+                "low": "May indicate hypothyroidism"
             }
         }
+        for test in test_results[:10]:
+            test_name = test['test_name']
+            status = test['status']
+            for key in test_explanations:
+                if key.lower() in test_name.lower():
+                    if status == 'normal':
+                        key_findings.append({
+                            "finding": f"{test_name}: {test['value']} {test['unit']}",
+                            "explanation": test_explanations[key].get('normal', 'Within normal range')
+                        })
+                    elif 'high' in status.lower():
+                        areas_of_concern.append({
+                            "finding": f"{test_name}: {test['value']} {test['unit']} (HIGH)",
+                            "explanation": test_explanations[key].get('high', 'Above normal range'),
+                            "severity": "critical" if "critical" in status else "moderate"
+                        })
+                    elif 'low' in status.lower():
+                        areas_of_concern.append({
+                            "finding": f"{test_name}: {test['value']} {test['unit']} (LOW)",
+                            "explanation": test_explanations[key].get('low', 'Below normal range'),
+                            "severity": "critical" if "critical" in status else "moderate"
+                        })
+                    break
+        next_steps = []
+        if critical_count > 0:
+            next_steps = [
+                "Contact your doctor immediately",
+                "Do not delay medical consultation",
+                "Bring these results to your healthcare provider",
+                "Follow your doctor's treatment recommendations"
+            ]
+        elif abnormal_count > 0:
+            next_steps = [
+                "Schedule an appointment with your doctor within the next few days",
+                "Discuss these results with your healthcare provider",
+                "Your doctor may recommend additional tests",
+                "Follow any lifestyle or treatment recommendations"
+            ]
         else:
+            next_steps = [
+                "Maintain your current healthy lifestyle",
+                "Continue regular health checkups",
+                "Keep these results for your medical records",
+                "Discuss with your doctor during your next routine visit"
+            ]
+        return {
+            "overall_status": overall_status,
+            "explanation": explanation,
+            "key_findings": key_findings[:5],
+            "areas_of_concern": areas_of_concern,
+            "next_steps": next_steps,
+            "summary_stats": {
+                "total_tests": total_tests,
+                "normal_tests": normal_count,
+                "abnormal_tests": abnormal_count,
+                "critical_findings": critical_count
+            }
+        }
+    def extract_and_format(self, text: str, report_id: str = None, patient_id: str = None) -> dict:
+        start_time = time.time()
+        regex_results = self.extract_with_regex(text)
+        ner_results = self.extract_with_ner(text)
+        test_results = regex_results['test_results']
+        entities_list = ner_results['entities']
+        abnormal_results = []
+        for test in test_results:
+            if test['status'] != 'normal':
+                severity = 'critical' if 'critical' in test['status'] else 'moderate'
+                abnormal_results.append({
+                    "test_name": test['test_name'],
+                    "severity": severity,
+                    "requires_attention": 'critical' in test['status']
+                })
+        normal_params = [t['test_name']
+                         for t in test_results if t['status'] == 'normal']
+        key_abnormalities = [
+            f"{t['test_name']}: {t['clinical_significance']}" for t in test_results if t['status'] != 'normal']
+        ai_summary = {
+            "overall_assessment": f"Detected {len(abnormal_results)} abnormal result(s). {len(normal_params)} parameters within normal limits.",
+            "key_abnormalities": key_abnormalities,
+            "normal_parameters": normal_params,
+            "recommendations": [
                 "Correlate with clinical symptoms",
                 "Consider follow-up testing if symptoms persist",
                 "Consult with healthcare provider for interpretation"
             ]
         }
+        clinical_insights = self.analyze_with_clinical_bert(text, test_results)
+        patient_summary = self.generate_patient_summary(
+            test_results, abnormal_results)
+        test_panels = []
+        cbc_tests = [t for t in test_results if any(x in t['test_name'].lower(
+        ) for x in ['blood cell', 'hemoglobin', 'hematocrit', 'platelet'])]
+        if cbc_tests:
+            test_panels.append({
+                "panel_name": "Complete Blood Count",
+                "tests_included": [t['test_name'] for t in cbc_tests],
+                "panel_status": "abnormal" if any(t['status'] != 'normal' for t in cbc_tests) else "normal",
+                "abnormal_count": len([t for t in cbc_tests if t['status'] != 'normal']),
+                "total_tests": len(cbc_tests)
+            })
+        chem_tests = [t for t in test_results if any(x in t['test_name'].lower() for x in [
+                                                     'glucose', 'creatinine', 'urea', 'cholesterol'])]
+        if chem_tests:
+            test_panels.append({
+                "panel_name": "General Chemistry",
+                "tests_included": [t['test_name'] for t in chem_tests],
+                "panel_status": "abnormal" if any(t['status'] != 'normal' for t in chem_tests) else "normal",
+                "abnormal_count": len([t for t in chem_tests if t['status'] != 'normal']),
+                "total_tests": len(chem_tests)
+            })
+        liver_tests = [t for t in test_results if any(x in t['test_name'].lower() for x in [
+                                                      'alt', 'ast', 'alp', 'bilirubin', 'albumin'])]
+        if liver_tests:
+            test_panels.append({
+                "panel_name": "Liver Function Panel",
+                "tests_included": [t['test_name'] for t in liver_tests],
+                "panel_status": "abnormal" if any(t['status'] != 'normal' for t in liver_tests) else "normal",
+                "abnormal_count": len([t for t in liver_tests if t['status'] != 'normal']),
+                "total_tests": len(liver_tests)
+            })
+        thyroid_tests = [t for t in test_results if any(
+            x in t['test_name'].lower() for x in ['thyroid', 'tsh', 't4', 't3'])]
+        if thyroid_tests:
+            test_panels.append({
+                "panel_name": "Thyroid Function Panel",
+                "tests_included": [t['test_name'] for t in thyroid_tests],
+                "panel_status": "abnormal" if any(t['status'] != 'normal' for t in thyroid_tests) else "normal",
+                "abnormal_count": len([t for t in thyroid_tests if t['status'] != 'normal']),
+                "total_tests": len(thyroid_tests)
             })
         chart_data = []
         for test in test_results:
             if test['reference_range']:
                 chart_data.append({
                     "ref_min": test['reference_range']['min'],
                     "ref_max": test['reference_range']['max']
                 })
+        visualization_data = {
             "charts": [{
                 "chart_type": "bar",
                 "title": "Lab Results vs Reference Range",
             }],
             "trend_data": []
         }
+        ner_stats = {}
+        for ent in entities_list:
+            label = ent['label']
+            ner_stats[label] = ner_stats.get(label, 0) + 1
+        test_category = "hematology"
+        sub_category = "complete_blood_count"
+        urgency_level = "critical" if len(
+            [a for a in abnormal_results if a['severity'] == 'critical']) > 0 else "routine"
+        if any('glucose' in t['test_name'].lower() for t in test_results):
+            test_category = "clinical_chemistry"
+            sub_category = "metabolic_panel"
+        classification = {
+            "test_category": test_category,
+            "sub_category": sub_category,
+            "urgency_level": urgency_level,
+            "confidence": 0.96
+        }
+        extraction_stats = {
+            "tests_with_values": len(test_results),
+            "additional_tests_found": len([e for e in entities_list if e['label'] == 'TEST_NAME']),
+            "diseases_detected": len(clinical_insights['diseases_detected']),
+            "interpretations_found": len([t for t in test_results if t['status'] != 'normal']),
+            "ner_model_stats": ner_stats
+        }
+        processing_time_ms = int((time.time() - start_time) * 1000)
+        metadata = {
+            "model_version": "radiolo_smart_ner_v2.0.0",
+            "processing_date": datetime.utcnow().isoformat() + "Z",
+            "tests_extracted": len(test_results),
+            "confidence_score": 0.94,
+            "nlp_models": {
+                "ner": "Custom Lab NER (Smart Filtered)",
+                "clinical_bert": "ClinicalDistilBERT",
+                "extraction_method": "Hybrid (Regex + Filtered NER)"
+            }
+        }
+        return {
+            "report_id": report_id or f"lab_{int(time.time())}",
+            "report_type": "laboratory",
+            "processing_time_ms": processing_time_ms,
+            "classification": classification,
+            "extraction_stats": extraction_stats,
+            "entities": entities_list,
+            "test_results": test_results,
+            "abnormal_results": abnormal_results,
+            "ai_summary": ai_summary,
+            "clinical_insights": clinical_insights,
+            "patient_friendly_summary": patient_summary,
+            "test_panels": test_panels,
+            "visualization_data": visualization_data,
+            "metadata": metadata
+        }

app/main.py CHANGED Viewed

@@ -78,12 +78,13 @@ async def root():
             "compression": "gzip",
             "ocr_engine": "EasyOCR",
             "ner_model": "Custom Lab NER",
             "supported_tests": 16
         },
         "endpoints": {
             "health": "/health",
             "analyze": "/analyze-lab-secure",
-            "test": "/test-analyze"  # NEW
         },
         "supported_formats": ["pdf", "image"],
         "supported_lab_tests": [
@@ -107,13 +108,8 @@ async def health_check():
         "supported_tests": 16
     }
 @app.post("/test-analyze", tags=["Testing"])
 async def test_analyze(file: UploadFile = File(...)):
-    """
-    Test endpoint without encryption - upload file directly
-    ⚠️ WARNING: For testing only! No encryption!
-    """
     start_time = time.time()
     try:
@@ -123,7 +119,7 @@ async def test_analyze(file: UploadFile = File(...)):
         file_bytes = await file.read()
         filename = file.filename
-        # print(f"\n📄 Processing test file: {filename} ({len(file_bytes)} bytes)")
         if filename.lower().endswith('.pdf'):
             file_type = "pdf"
@@ -140,9 +136,9 @@ async def test_analyze(file: UploadFile = File(...)):
         if not extracted_text or len(extracted_text.strip()) < 10:
             raise HTTPException(status_code=400, detail="Could not extract sufficient text from file")
-        # print(f"✓ Extracted {len(extracted_text)} characters (OCR: {ocr_used})")
-        # print("🧠 Processing with NER + ClinicalDistilBERT...")
         lab_analysis = lab_processor.extract_and_format(
             extracted_text,
             report_id=f"test_{int(time.time())}",
@@ -151,8 +147,8 @@ async def test_analyze(file: UploadFile = File(...)):
         processing_time = time.time() - start_time
-        # print(f"✅ Processing complete in {processing_time:.2f}s")
-        # print(f"   Tests extracted: {lab_analysis.get('metadata', {}).get('tests_extracted', 0)}\n")
         response_data = {
             "status": "success",
@@ -177,7 +173,6 @@ async def test_analyze(file: UploadFile = File(...)):
         traceback.print_exc()
         raise HTTPException(status_code=500, detail=f"Internal error: {str(e)}")
 @app.post("/analyze-lab-secure", tags=["Lab Analysis"])
 async def analyze_lab_secure(request: EncryptedRequest):
     start_time = time.time()

             "compression": "gzip",
             "ocr_engine": "EasyOCR",
             "ner_model": "Custom Lab NER",
+            "patient_friendly_summary": "AI-Generated Explanations",
             "supported_tests": 16
         },
         "endpoints": {
             "health": "/health",
             "analyze": "/analyze-lab-secure",
+            "test": "/test-analyze"
         },
         "supported_formats": ["pdf", "image"],
         "supported_lab_tests": [
         "supported_tests": 16
     }
 @app.post("/test-analyze", tags=["Testing"])
 async def test_analyze(file: UploadFile = File(...)):
     start_time = time.time()
     try:
         file_bytes = await file.read()
         filename = file.filename
+        print(f"\n📄 Processing test file: {filename} ({len(file_bytes)} bytes)")
         if filename.lower().endswith('.pdf'):
             file_type = "pdf"
         if not extracted_text or len(extracted_text.strip()) < 10:
             raise HTTPException(status_code=400, detail="Could not extract sufficient text from file")
+        print(f"✓ Extracted {len(extracted_text)} characters (OCR: {ocr_used})")
+        print("🧠 Processing with NER + ClinicalDistilBERT...")
         lab_analysis = lab_processor.extract_and_format(
             extracted_text,
             report_id=f"test_{int(time.time())}",
         processing_time = time.time() - start_time
+        print(f"✅ Processing complete in {processing_time:.2f}s")
+        print(f"   Tests extracted: {lab_analysis.get('metadata', {}).get('tests_extracted', 0)}\n")
         response_data = {
             "status": "success",
         traceback.print_exc()
         raise HTTPException(status_code=500, detail=f"Internal error: {str(e)}")
 @app.post("/analyze-lab-secure", tags=["Lab Analysis"])
 async def analyze_lab_secure(request: EncryptedRequest):
     start_time = time.time()

decrypt_response.py CHANGED Viewed

@@ -9,7 +9,7 @@ import os
 from nacl.secret import SecretBox
 # Your hex key from .env
-SECRET_KEY_HEX = "9aa68717fafd68f30b6bdd7f8af0f273b4ddb31aadc0f7a3a42db86dfdde0195"
 # Convert hex to bytes (32 bytes)
 SECRET_KEY = bytes.fromhex(SECRET_KEY_HEX)

 from nacl.secret import SecretBox
 # Your hex key from .env
+SECRET_KEY_HEX = "7633eeaf69156124e49025ce8f6a3adbdbf6be87f1e58529397a67168a65bd66"
 # Convert hex to bytes (32 bytes)
 SECRET_KEY = bytes.fromhex(SECRET_KEY_HEX)

generate_postman_request.py CHANGED Viewed

@@ -10,7 +10,7 @@ from nacl.secret import SecretBox
 from nacl.utils import random
 # Your 64-character hex key from .env
-SECRET_KEY_HEX = "9aa68717fafd68f30b6bdd7f8af0f273b4ddb31aadc0f7a3a42db86dfdde0195"
 # Convert hex to bytes (32 bytes)
 SECRET_KEY = bytes.fromhex(SECRET_KEY_HEX)

 from nacl.utils import random
 # Your 64-character hex key from .env
+SECRET_KEY_HEX = "7633eeaf69156124e49025ce8f6a3adbdbf6be87f1e58529397a67168a65bd66"
 # Convert hex to bytes (32 bytes)
 SECRET_KEY = bytes.fromhex(SECRET_KEY_HEX)