Spaces:
Running
Running
| """ | |
| MediGuard AI β Shared Utilities | |
| Common functions used by both the main API and HuggingFace deployment: | |
| - Biomarker parsing | |
| - Disease scoring heuristics | |
| - Result formatting | |
| """ | |
| from __future__ import annotations | |
| import json | |
| import logging | |
| import re | |
| from typing import Any | |
| logger = logging.getLogger(__name__) | |
| # --------------------------------------------------------------------------- | |
| # Biomarker Parsing | |
| # --------------------------------------------------------------------------- | |
| # Canonical biomarker name mapping (aliases -> standard name) | |
| BIOMARKER_ALIASES: dict[str, str] = { | |
| # Glucose | |
| "glucose": "Glucose", | |
| "fasting glucose": "Glucose", | |
| "fastingglucose": "Glucose", | |
| "blood sugar": "Glucose", | |
| "blood glucose": "Glucose", | |
| "fbg": "Glucose", | |
| "fbs": "Glucose", | |
| # HbA1c | |
| "hba1c": "HbA1c", | |
| "a1c": "HbA1c", | |
| "hemoglobin a1c": "HbA1c", | |
| "hemoglobina1c": "HbA1c", | |
| "glycated hemoglobin": "HbA1c", | |
| # Cholesterol | |
| "cholesterol": "Cholesterol", | |
| "total cholesterol": "Cholesterol", | |
| "totalcholesterol": "Cholesterol", | |
| "tc": "Cholesterol", | |
| # LDL | |
| "ldl": "LDL", | |
| "ldl cholesterol": "LDL", | |
| "ldlcholesterol": "LDL", | |
| "ldl-c": "LDL", | |
| # HDL | |
| "hdl": "HDL", | |
| "hdl cholesterol": "HDL", | |
| "hdlcholesterol": "HDL", | |
| "hdl-c": "HDL", | |
| # Triglycerides | |
| "triglycerides": "Triglycerides", | |
| "tg": "Triglycerides", | |
| "trigs": "Triglycerides", | |
| # Hemoglobin | |
| "hemoglobin": "Hemoglobin", | |
| "hgb": "Hemoglobin", | |
| "hb": "Hemoglobin", | |
| # TSH | |
| "tsh": "TSH", | |
| "thyroid stimulating hormone": "TSH", | |
| # Creatinine | |
| "creatinine": "Creatinine", | |
| "cr": "Creatinine", | |
| # ALT/AST | |
| "alt": "ALT", | |
| "sgpt": "ALT", | |
| "ast": "AST", | |
| "sgot": "AST", | |
| # Blood pressure | |
| "systolic": "Systolic_BP", | |
| "systolic bp": "Systolic_BP", | |
| "sbp": "Systolic_BP", | |
| "diastolic": "Diastolic_BP", | |
| "diastolic bp": "Diastolic_BP", | |
| "dbp": "Diastolic_BP", | |
| # BMI | |
| "bmi": "BMI", | |
| "body mass index": "BMI", | |
| } | |
| def normalize_biomarker_name(name: str) -> str: | |
| """ | |
| Normalize a biomarker name to its canonical form. | |
| Args: | |
| name: Raw biomarker name (may be alias, mixed case, etc.) | |
| Returns: | |
| Canonical biomarker name | |
| """ | |
| key = name.lower().strip().replace("_", " ") | |
| return BIOMARKER_ALIASES.get(key, name) | |
| def parse_biomarkers(text: str) -> dict[str, float]: | |
| """ | |
| Parse biomarkers from natural language text or JSON. | |
| Supports formats like: | |
| - JSON: {"Glucose": 140, "HbA1c": 7.5} | |
| - Key-value: "Glucose: 140, HbA1c: 7.5" | |
| - Natural: "glucose 140 mg/dL and hba1c 7.5%" | |
| Args: | |
| text: Input text containing biomarker values | |
| Returns: | |
| Dictionary of normalized biomarker names to float values | |
| """ | |
| text = text.strip() | |
| if not text: | |
| return {} | |
| # Try JSON first | |
| if text.startswith("{"): | |
| try: | |
| raw = json.loads(text) | |
| return {normalize_biomarker_name(k): float(v) for k, v in raw.items()} | |
| except (json.JSONDecodeError, ValueError, TypeError): | |
| pass | |
| # Regex patterns for biomarker extraction | |
| patterns = [ | |
| # "Glucose: 140" or "Glucose = 140" or "Glucose - 140" | |
| r"([A-Za-z][A-Za-z0-9_\s]{0,30})\s*[:=\-]\s*([\d.]+)", | |
| # "Glucose 140 mg/dL" (value after name with optional unit) | |
| r"\b([A-Za-z][A-Za-z0-9_]{0,15})\s+([\d.]+)\s*(?:mg/dL|mmol/L|%|g/dL|U/L|mIU/L|ng/mL|pg/mL|ΞΌmol/L|umol/L)?(?:\s|,|$)", | |
| ] | |
| biomarkers: dict[str, float] = {} | |
| for pattern in patterns: | |
| for match in re.finditer(pattern, text, re.IGNORECASE): | |
| name, value = match.groups() | |
| name = name.strip() | |
| # Skip common non-biomarker words | |
| if name.lower() in {"the", "a", "an", "and", "or", "is", "was", "are", "were", "be"}: | |
| continue | |
| try: | |
| fval = float(value) | |
| canonical = normalize_biomarker_name(name) | |
| # Don't overwrite if already found (first match wins) | |
| if canonical not in biomarkers: | |
| biomarkers[canonical] = fval | |
| except ValueError: | |
| continue | |
| return biomarkers | |
| # --------------------------------------------------------------------------- | |
| # Disease Scoring Heuristics | |
| # --------------------------------------------------------------------------- | |
| # Reference ranges for biomarkers (approximate clinical ranges) | |
| BIOMARKER_REFERENCE_RANGES: dict[str, tuple[float, float, str]] = { | |
| # (low, high, unit) | |
| "Glucose": (70, 100, "mg/dL"), | |
| "HbA1c": (4.0, 5.6, "%"), | |
| "Cholesterol": (0, 200, "mg/dL"), | |
| "LDL": (0, 100, "mg/dL"), | |
| "HDL": (40, 999, "mg/dL"), # Higher is better | |
| "Triglycerides": (0, 150, "mg/dL"), | |
| "Hemoglobin": (12.0, 17.5, "g/dL"), | |
| "TSH": (0.4, 4.0, "mIU/L"), | |
| "Creatinine": (0.6, 1.2, "mg/dL"), | |
| "ALT": (7, 56, "U/L"), | |
| "AST": (10, 40, "U/L"), | |
| "Systolic_BP": (90, 120, "mmHg"), | |
| "Diastolic_BP": (60, 80, "mmHg"), | |
| "BMI": (18.5, 24.9, "kg/mΒ²"), | |
| } | |
| def classify_biomarker(name: str, value: float) -> str: | |
| """ | |
| Classify a biomarker value as normal, low, or high. | |
| Args: | |
| name: Canonical biomarker name | |
| value: Measured value | |
| Returns: | |
| "normal", "low", or "high" | |
| """ | |
| ranges = BIOMARKER_REFERENCE_RANGES.get(name) | |
| if not ranges: | |
| return "unknown" | |
| low, high, _ = ranges | |
| if value < low: | |
| return "low" | |
| elif value > high: | |
| return "high" | |
| else: | |
| return "normal" | |
| def score_disease_diabetes(biomarkers: dict[str, float]) -> tuple[float, str]: | |
| """ | |
| Score diabetes risk based on biomarkers. | |
| Returns: (score 0-1, severity) | |
| """ | |
| glucose = biomarkers.get("Glucose", 0) | |
| hba1c = biomarkers.get("HbA1c", 0) | |
| score = 0.0 | |
| reasons = [] | |
| # HbA1c scoring (most important) | |
| if hba1c >= 6.5: | |
| score += 0.5 | |
| reasons.append(f"HbA1c {hba1c}% >= 6.5% (diabetes threshold)") | |
| elif hba1c >= 5.7: | |
| score += 0.3 | |
| reasons.append(f"HbA1c {hba1c}% in prediabetes range") | |
| # Fasting glucose scoring | |
| if glucose >= 126: | |
| score += 0.35 | |
| reasons.append(f"Glucose {glucose} mg/dL >= 126 (diabetes threshold)") | |
| elif glucose >= 100: | |
| score += 0.2 | |
| reasons.append(f"Glucose {glucose} mg/dL in prediabetes range") | |
| # Normalize to 0-1 | |
| score = min(1.0, score) | |
| # Determine severity | |
| if score >= 0.7: | |
| severity = "high" | |
| elif score >= 0.4: | |
| severity = "moderate" | |
| else: | |
| severity = "low" | |
| return score, severity | |
| def score_disease_dyslipidemia(biomarkers: dict[str, float]) -> tuple[float, str]: | |
| """Score dyslipidemia risk based on lipid panel.""" | |
| cholesterol = biomarkers.get("Cholesterol", 0) | |
| ldl = biomarkers.get("LDL", 0) | |
| hdl = biomarkers.get("HDL", 999) # High default (higher is better) | |
| triglycerides = biomarkers.get("Triglycerides", 0) | |
| score = 0.0 | |
| if cholesterol >= 240: | |
| score += 0.3 | |
| elif cholesterol >= 200: | |
| score += 0.15 | |
| if ldl >= 160: | |
| score += 0.3 | |
| elif ldl >= 130: | |
| score += 0.15 | |
| if hdl < 40: | |
| score += 0.2 | |
| if triglycerides >= 200: | |
| score += 0.2 | |
| elif triglycerides >= 150: | |
| score += 0.1 | |
| score = min(1.0, score) | |
| if score >= 0.6: | |
| severity = "high" | |
| elif score >= 0.3: | |
| severity = "moderate" | |
| else: | |
| severity = "low" | |
| return score, severity | |
| def score_disease_anemia(biomarkers: dict[str, float]) -> tuple[float, str]: | |
| """Score anemia risk based on hemoglobin.""" | |
| hemoglobin = biomarkers.get("Hemoglobin", 0) | |
| if not hemoglobin: | |
| return 0.0, "unknown" | |
| if hemoglobin < 8: | |
| return 0.9, "critical" | |
| elif hemoglobin < 10: | |
| return 0.7, "high" | |
| elif hemoglobin < 12: | |
| return 0.5, "moderate" | |
| elif hemoglobin < 13: | |
| return 0.2, "low" | |
| else: | |
| return 0.0, "normal" | |
| def score_disease_thyroid(biomarkers: dict[str, float]) -> tuple[float, str, str]: | |
| """Score thyroid disorder risk. Returns: (score, severity, direction).""" | |
| tsh = biomarkers.get("TSH", 0) | |
| if not tsh: | |
| return 0.0, "unknown", "none" | |
| if tsh > 10: | |
| return 0.8, "high", "hypothyroid" | |
| elif tsh > 4.5: | |
| return 0.5, "moderate", "hypothyroid" | |
| elif tsh < 0.1: | |
| return 0.8, "high", "hyperthyroid" | |
| elif tsh < 0.4: | |
| return 0.5, "moderate", "hyperthyroid" | |
| else: | |
| return 0.0, "normal", "none" | |
| def score_all_diseases(biomarkers: dict[str, float]) -> dict[str, dict[str, Any]]: | |
| """ | |
| Score all disease risks based on available biomarkers. | |
| Args: | |
| biomarkers: Dictionary of biomarker values | |
| Returns: | |
| Dictionary of disease -> {score, severity, disease, confidence} | |
| """ | |
| results = {} | |
| # Diabetes | |
| score, severity = score_disease_diabetes(biomarkers) | |
| if score > 0: | |
| results["diabetes"] = { | |
| "disease": "Diabetes", | |
| "confidence": score, | |
| "severity": severity, | |
| } | |
| # Dyslipidemia | |
| score, severity = score_disease_dyslipidemia(biomarkers) | |
| if score > 0: | |
| results["dyslipidemia"] = { | |
| "disease": "Dyslipidemia", | |
| "confidence": score, | |
| "severity": severity, | |
| } | |
| # Anemia | |
| score, severity = score_disease_anemia(biomarkers) | |
| if score > 0: | |
| results["anemia"] = { | |
| "disease": "Anemia", | |
| "confidence": score, | |
| "severity": severity, | |
| } | |
| # Thyroid | |
| score, severity, direction = score_disease_thyroid(biomarkers) | |
| if score > 0: | |
| disease_name = "Hypothyroidism" if direction == "hypothyroid" else "Hyperthyroidism" | |
| results["thyroid"] = { | |
| "disease": disease_name, | |
| "confidence": score, | |
| "severity": severity, | |
| } | |
| return results | |
| def get_primary_prediction(biomarkers: dict[str, float]) -> dict[str, Any]: | |
| """ | |
| Get the highest-confidence disease prediction. | |
| Args: | |
| biomarkers: Dictionary of biomarker values | |
| Returns: | |
| Dictionary with disease, confidence, severity | |
| """ | |
| scores = score_all_diseases(biomarkers) | |
| if not scores: | |
| return { | |
| "disease": "General Health Screening", | |
| "confidence": 0.5, | |
| "severity": "low", | |
| } | |
| # Return highest confidence | |
| best = max(scores.values(), key=lambda x: x["confidence"]) | |
| return best | |
| # --------------------------------------------------------------------------- | |
| # Biomarker Flagging | |
| # --------------------------------------------------------------------------- | |
| def flag_biomarkers(biomarkers: dict[str, float]) -> list[dict[str, Any]]: | |
| """ | |
| Flag abnormal biomarkers with classification and reference ranges. | |
| Args: | |
| biomarkers: Dictionary of biomarker values | |
| Returns: | |
| List of flagged biomarkers with details | |
| """ | |
| flags = [] | |
| for name, value in biomarkers.items(): | |
| classification = classify_biomarker(name, value) | |
| ranges = BIOMARKER_REFERENCE_RANGES.get(name) | |
| flag = { | |
| "name": name, | |
| "value": value, | |
| "status": classification, | |
| } | |
| if ranges: | |
| low, high, unit = ranges | |
| flag["reference_range"] = f"{low}-{high} {unit}" | |
| flag["unit"] = unit | |
| if classification != "normal": | |
| flag["flagged"] = True | |
| flags.append(flag) | |
| # Sort: flagged first, then by name | |
| flags.sort(key=lambda x: (not x.get("flagged", False), x["name"])) | |
| return flags | |
| # --------------------------------------------------------------------------- | |
| # Utility Functions | |
| # --------------------------------------------------------------------------- | |
| def format_confidence_percent(score: float) -> str: | |
| """Format confidence score as percentage string.""" | |
| return f"{int(score * 100)}%" | |
| def severity_to_emoji(severity: str) -> str: | |
| """Convert severity level to emoji.""" | |
| mapping = { | |
| "critical": "π΄", | |
| "high": "π ", | |
| "moderate": "π‘", | |
| "low": "π’", | |
| "normal": "β ", | |
| "unknown": "β", | |
| } | |
| return mapping.get(severity.lower(), "βͺ") | |