Spaces:

T0X1N
/

Agentic-RagBot

Sleeping

Nikhil Pravin Pise commited on Feb 23

Commit

ad2e847

1 Parent(s): aefac4f

Refactor: Improve code quality, security, and configuration

- Remove redundant sys.path manipulation in agents and services
- Enhance CORS configuration with environment-based origin handling
- Improve error handling (no internal detail leakage)
- Fix environment variable handling in setup scripts (remove extra quotes)
- Update gitignore for external tools and course materials
- Correct clinical biomarker reference ranges
- Add comprehensive production upgrade plan
- Improve test configuration (HuggingFace deprecation filters)
- Add optional dependencies for evaluation tools
- Consolidate path initialization patterns

Files changed (26) hide show

.gitignore +3 -1
api/app/main.py +8 -8
api/app/services/extraction.py +4 -2
api/app/services/ragbot.py +11 -7
api/requirements.txt +1 -1
config/biomarker_references.json +234 -8
data/chat_reports/report_Diabetes_20260223_142525.json +322 -0
docs/plans/PRODUCTION_UPGRADE_PLAN.md +833 -0
pytest.ini +1 -0
requirements.txt +9 -0
scripts/monitor_test.py +0 -1
scripts/setup_embeddings.py +3 -3
src/agents/biomarker_analyzer.py +1 -5
src/agents/biomarker_linker.py +0 -4
src/agents/clinical_guidelines.py +0 -3
src/agents/confidence_assessor.py +0 -4
src/agents/disease_explainer.py +8 -6
src/agents/response_synthesizer.py +0 -4
src/biomarker_normalization.py +41 -0
src/biomarker_validator.py +8 -0
src/evaluation/evaluators.py +5 -6
src/evolution/director.py +10 -1
src/llm_config.py +13 -6
src/pdf_processor.py +10 -57
src/workflow.py +0 -4
tests/test_evaluation_system.py +5 -5

.gitignore CHANGED Viewed

@@ -292,4 +292,6 @@ docker-compose.override.yml
 models/
 # Node modules (if any JS tooling)
-node_modules/

 models/
 # Node modules (if any JS tooling)
+node_modules/
+.agents/
+production-agentic-rag-course/

api/app/main.py CHANGED Viewed

@@ -78,13 +78,14 @@ app = FastAPI(
 # CORS MIDDLEWARE
 # ============================================================================
-# Allow all origins (for MVP - can restrict later)
 app.add_middleware(
     CORSMiddleware,
-    allow_origins=["*"],  # Allows all origins
-    allow_credentials=True,
-    allow_methods=["*"],  # Allows all methods
-    allow_headers=["*"],  # Allows all headers
 )
@@ -109,15 +110,14 @@ async def validation_exception_handler(request: Request, exc: RequestValidationE
 @app.exception_handler(Exception)
 async def general_exception_handler(request: Request, exc: Exception):
-    """Handle unexpected errors"""
     logger.error(f"Unhandled exception: {exc}", exc_info=True)
     return JSONResponse(
         status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
         content={
             "status": "error",
             "error_code": "INTERNAL_SERVER_ERROR",
-            "message": "An unexpected error occurred",
-            "details": str(exc)
         }
     )

 # CORS MIDDLEWARE
 # ============================================================================
+# CORS configuration — restrict to known origins in production
+_allowed_origins = os.getenv("CORS_ALLOWED_ORIGINS", "*").split(",")
 app.add_middleware(
     CORSMiddleware,
+    allow_origins=_allowed_origins,
+    allow_credentials=_allowed_origins != ["*"],  # credentials only with explicit origins
+    allow_methods=["*"],
+    allow_headers=["*"],
 )
 @app.exception_handler(Exception)
 async def general_exception_handler(request: Request, exc: Exception):
+    """Handle unexpected errors — never leak internal details to the client."""
     logger.error(f"Unhandled exception: {exc}", exc_info=True)
     return JSONResponse(
         status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
         content={
             "status": "error",
             "error_code": "INTERNAL_SERVER_ERROR",
+            "message": "An unexpected error occurred. Please try again later."
         }
     )

api/app/services/extraction.py CHANGED Viewed

@@ -8,8 +8,10 @@ import sys
 from pathlib import Path
 from typing import Dict, Any, Tuple
-# Add parent paths for imports
-sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent))
 from langchain_core.prompts import ChatPromptTemplate
 from src.biomarker_normalization import normalize_biomarker_name

 from pathlib import Path
 from typing import Dict, Any, Tuple
+# Ensure project root is in path for src imports
+_project_root = str(Path(__file__).parent.parent.parent.parent)
+if _project_root not in sys.path:
+    sys.path.insert(0, _project_root)
 from langchain_core.prompts import ChatPromptTemplate
 from src.biomarker_normalization import normalize_biomarker_name

api/app/services/ragbot.py CHANGED Viewed

@@ -10,8 +10,10 @@ from pathlib import Path
 from typing import Dict, Any
 from datetime import datetime
-# Add parent directory to path for imports
-sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent))
 from src.workflow import create_guild
 from src.state import PatientInput
@@ -42,16 +44,18 @@ class RagBotService:
         print("INFO: Initializing RagBot workflow...")
         start_time = time.time()
-        # Save current directory
         import os
-        original_dir = os.getcwd()
         try:
-            # Change to RagBot root (parent of api directory)
-            # This ensures vector store paths resolve correctly
             ragbot_root = Path(__file__).parent.parent.parent.parent
             os.chdir(ragbot_root)
-            print(f"INFO: Working directory: {ragbot_root}")
             self.guild = create_guild()
             self.initialized = True

 from typing import Dict, Any
 from datetime import datetime
+# Ensure project root is in path for src imports
+_project_root = str(Path(__file__).parent.parent.parent.parent)
+if _project_root not in sys.path:
+    sys.path.insert(0, _project_root)
 from src.workflow import create_guild
 from src.state import PatientInput
         print("INFO: Initializing RagBot workflow...")
         start_time = time.time()
         import os
         try:
+            # Set working directory via environment so vector store paths resolve
+            # without a process-global os.chdir() (which is thread-unsafe).
             ragbot_root = Path(__file__).parent.parent.parent.parent
+            os.environ["RAGBOT_ROOT"] = str(ragbot_root)
+            print(f"INFO: Project root: {ragbot_root}")
+            # Temporarily chdir only during initialization (single-threaded at startup)
+            original_dir = os.getcwd()
             os.chdir(ragbot_root)
             self.guild = create_guild()
             self.initialized = True

api/requirements.txt CHANGED Viewed

@@ -3,7 +3,7 @@
 fastapi==0.109.0
 uvicorn[standard]==0.27.0
-pydantic==2.5.3
 python-multipart==0.0.6
 # CORS and middleware

 fastapi==0.109.0
 uvicorn[standard]==0.27.0
+pydantic>=2.5.3
 python-multipart==0.0.6
 # CORS and middleware

config/biomarker_references.json CHANGED Viewed

@@ -15,13 +15,14 @@
     },
     "Cholesterol": {
       "unit": "mg/dL",
-      "normal_range": {"min": 0, "max": 200},
-      "critical_low": null,
       "critical_high": 240,
       "type": "total",
       "gender_specific": false,
       "description": "Total cholesterol level",
       "clinical_significance": {
         "high": "Increased cardiovascular disease risk"
       }
     },
@@ -177,7 +178,7 @@
     },
     "Triglycerides": {
       "unit": "mg/dL",
-      "normal_range": {"min": 0, "max": 150},
       "critical_low": null,
       "critical_high": 500,
       "gender_specific": false,
@@ -188,7 +189,7 @@
     },
     "HbA1c": {
       "unit": "%",
-      "normal_range": {"min": 0, "max": 5.7},
       "critical_low": null,
       "critical_high": 14,
       "gender_specific": false,
@@ -199,7 +200,7 @@
     },
     "LDL Cholesterol": {
       "unit": "mg/dL",
-      "normal_range": {"min": 0, "max": 100},
       "critical_low": null,
       "critical_high": 190,
       "gender_specific": false,
@@ -211,10 +212,10 @@
     "HDL Cholesterol": {
       "unit": "mg/dL",
       "normal_range": {
-        "male": {"min": 40, "max": 999},
-        "female": {"min": 50, "max": 999}
       },
-      "critical_low": 40,
       "critical_high": null,
       "gender_specific": true,
       "description": "High-density lipoprotein (good cholesterol)",
@@ -291,6 +292,231 @@
       "clinical_significance": {
         "high": "Acute inflammation or infection"
       }
     }
   }
 }

     },
     "Cholesterol": {
       "unit": "mg/dL",
+      "normal_range": {"min": 125, "max": 200},
+      "critical_low": 100,
       "critical_high": 240,
       "type": "total",
       "gender_specific": false,
       "description": "Total cholesterol level",
       "clinical_significance": {
+        "low": "Malnutrition, liver disease, or hyperthyroidism",
         "high": "Increased cardiovascular disease risk"
       }
     },
     },
     "Triglycerides": {
       "unit": "mg/dL",
+      "normal_range": {"min": 40, "max": 150},
       "critical_low": null,
       "critical_high": 500,
       "gender_specific": false,
     },
     "HbA1c": {
       "unit": "%",
+      "normal_range": {"min": 4.0, "max": 5.7},
       "critical_low": null,
       "critical_high": 14,
       "gender_specific": false,
     },
     "LDL Cholesterol": {
       "unit": "mg/dL",
+      "normal_range": {"min": 40, "max": 100},
       "critical_low": null,
       "critical_high": 190,
       "gender_specific": false,
     "HDL Cholesterol": {
       "unit": "mg/dL",
       "normal_range": {
+        "male": {"min": 40, "max": 100},
+        "female": {"min": 50, "max": 100}
       },
+      "critical_low": 25,
       "critical_high": null,
       "gender_specific": true,
       "description": "High-density lipoprotein (good cholesterol)",
       "clinical_significance": {
         "high": "Acute inflammation or infection"
       }
+    },
+    "Urea": {
+      "unit": "mg/dL",
+      "normal_range": {"min": 7, "max": 20},
+      "critical_low": null,
+      "critical_high": 50,
+      "gender_specific": false,
+      "description": "Blood urea nitrogen (kidney function marker)",
+      "clinical_significance": {
+        "high": "Kidney dysfunction, dehydration, or high-protein diet",
+        "low": "Liver disease or malnutrition"
+      }
+    },
+    "TSH": {
+      "unit": "mIU/L",
+      "normal_range": {"min": 0.4, "max": 4.0},
+      "critical_low": 0.1,
+      "critical_high": 10,
+      "gender_specific": false,
+      "description": "Thyroid-stimulating hormone",
+      "clinical_significance": {
+        "low": "Hyperthyroidism",
+        "high": "Hypothyroidism"
+      }
+    },
+    "T3": {
+      "unit": "ng/dL",
+      "normal_range": {"min": 80, "max": 200},
+      "critical_low": null,
+      "critical_high": null,
+      "gender_specific": false,
+      "description": "Triiodothyronine (thyroid hormone)",
+      "clinical_significance": {
+        "low": "Hypothyroidism",
+        "high": "Hyperthyroidism"
+      }
+    },
+    "T4": {
+      "unit": "μg/dL",
+      "normal_range": {"min": 5.0, "max": 12.0},
+      "critical_low": null,
+      "critical_high": null,
+      "gender_specific": false,
+      "description": "Thyroxine (thyroid hormone)",
+      "clinical_significance": {
+        "low": "Hypothyroidism",
+        "high": "Hyperthyroidism"
+      }
+    },
+    "Sodium": {
+      "unit": "mEq/L",
+      "normal_range": {"min": 136, "max": 145},
+      "critical_low": 120,
+      "critical_high": 155,
+      "gender_specific": false,
+      "description": "Blood sodium level (electrolyte)",
+      "clinical_significance": {
+        "low": "Hyponatremia - confusion, seizures",
+        "high": "Hypernatremia - dehydration, thirst"
+      }
+    },
+    "Potassium": {
+      "unit": "mEq/L",
+      "normal_range": {"min": 3.5, "max": 5.0},
+      "critical_low": 2.5,
+      "critical_high": 6.5,
+      "gender_specific": false,
+      "description": "Blood potassium level (electrolyte)",
+      "clinical_significance": {
+        "low": "Hypokalemia - muscle weakness, arrhythmia",
+        "high": "Hyperkalemia - cardiac arrest risk"
+      }
+    },
+    "Calcium": {
+      "unit": "mg/dL",
+      "normal_range": {"min": 8.5, "max": 10.5},
+      "critical_low": 7.0,
+      "critical_high": 12.0,
+      "gender_specific": false,
+      "description": "Blood calcium level",
+      "clinical_significance": {
+        "low": "Hypocalcemia - muscle cramps, numbness",
+        "high": "Hypercalcemia - kidney stones, bone pain"
+      }
+    },
+    "Chloride": {
+      "unit": "mEq/L",
+      "normal_range": {"min": 98, "max": 106},
+      "critical_low": 80,
+      "critical_high": 120,
+      "gender_specific": false,
+      "description": "Blood chloride level (electrolyte)",
+      "clinical_significance": {
+        "low": "Metabolic alkalosis",
+        "high": "Metabolic acidosis"
+      }
+    },
+    "Bicarbonate": {
+      "unit": "mEq/L",
+      "normal_range": {"min": 22, "max": 28},
+      "critical_low": 15,
+      "critical_high": 35,
+      "gender_specific": false,
+      "description": "Blood bicarbonate (acid-base balance)",
+      "clinical_significance": {
+        "low": "Metabolic acidosis",
+        "high": "Metabolic alkalosis"
+      }
+    },
+    "Uric_Acid": {
+      "unit": "mg/dL",
+      "normal_range": {
+        "male": {"min": 3.4, "max": 7.0},
+        "female": {"min": 2.4, "max": 6.0}
+      },
+      "critical_low": null,
+      "critical_high": 10,
+      "gender_specific": true,
+      "description": "Blood uric acid level",
+      "clinical_significance": {
+        "high": "Gout risk, kidney stone risk"
+      }
+    },
+    "Total_Protein": {
+      "unit": "g/dL",
+      "normal_range": {"min": 6.0, "max": 8.3},
+      "critical_low": 4.5,
+      "critical_high": null,
+      "gender_specific": false,
+      "description": "Total serum protein",
+      "clinical_significance": {
+        "low": "Liver disease, malnutrition, kidney disease",
+        "high": "Chronic inflammation, multiple myeloma"
+      }
+    },
+    "Albumin": {
+      "unit": "g/dL",
+      "normal_range": {"min": 3.5, "max": 5.5},
+      "critical_low": 2.0,
+      "critical_high": null,
+      "gender_specific": false,
+      "description": "Serum albumin (liver function)",
+      "clinical_significance": {
+        "low": "Liver disease, malnutrition, kidney disease"
+      }
+    },
+    "Globulin": {
+      "unit": "g/dL",
+      "normal_range": {"min": 2.0, "max": 3.5},
+      "critical_low": null,
+      "critical_high": null,
+      "gender_specific": false,
+      "description": "Serum globulin (immune proteins)",
+      "clinical_significance": {
+        "low": "Immune deficiency",
+        "high": "Chronic inflammation, liver disease"
+      }
+    },
+    "AG_Ratio": {
+      "unit": "ratio",
+      "normal_range": {"min": 1.1, "max": 2.5},
+      "critical_low": null,
+      "critical_high": null,
+      "gender_specific": false,
+      "description": "Albumin/Globulin ratio",
+      "clinical_significance": {
+        "low": "Liver disease, autoimmune conditions",
+        "high": "Generally not clinically significant"
+      }
+    },
+    "Bilirubin_Total": {
+      "unit": "mg/dL",
+      "normal_range": {"min": 0.1, "max": 1.2},
+      "critical_low": null,
+      "critical_high": 5.0,
+      "gender_specific": false,
+      "description": "Total bilirubin (liver function marker)",
+      "clinical_significance": {
+        "high": "Liver disease, bile duct obstruction, hemolysis"
+      }
+    },
+    "ALP": {
+      "unit": "U/L",
+      "normal_range": {"min": 44, "max": 147},
+      "critical_low": null,
+      "critical_high": 500,
+      "gender_specific": false,
+      "description": "Alkaline phosphatase (liver/bone enzyme)",
+      "clinical_significance": {
+        "high": "Liver disease, bone disorders, bile duct issues"
+      }
+    },
+    "BUN": {
+      "unit": "mg/dL",
+      "normal_range": {"min": 7, "max": 20},
+      "critical_low": null,
+      "critical_high": 50,
+      "gender_specific": false,
+      "description": "Blood urea nitrogen (kidney function)",
+      "clinical_significance": {
+        "high": "Kidney dysfunction, dehydration"
+      }
+    },
+    "BUN_Creatinine_Ratio": {
+      "unit": "ratio",
+      "normal_range": {"min": 10, "max": 20},
+      "critical_low": null,
+      "critical_high": null,
+      "gender_specific": false,
+      "description": "BUN to Creatinine ratio",
+      "clinical_significance": {
+        "high": "Pre-renal cause (dehydration, GI bleeding)",
+        "low": "Intrinsic renal disease, liver disease"
+      }
+    },
+    "VLDL": {
+      "unit": "mg/dL",
+      "normal_range": {"min": 2, "max": 30},
+      "critical_low": null,
+      "critical_high": null,
+      "gender_specific": false,
+      "description": "Very low-density lipoprotein cholesterol",
+      "clinical_significance": {
+        "high": "Increased cardiovascular risk"
+      }
     }
   }
 }

data/chat_reports/report_Diabetes_20260223_142525.json ADDED Viewed

	@@ -0,0 +1,322 @@

+{
+  "timestamp": "20260223_142525",
+  "biomarkers_input": {
+    "Glucose": 140.0,
+    "HbA1c": 7.5
+  },
+  "final_response": {
+    "patient_summary": {
+      "total_biomarkers_tested": 2,
+      "biomarkers_in_normal_range": 0,
+      "biomarkers_out_of_range": 2,
+      "critical_values": 0,
+      "overall_risk_profile": "The patient's biomarker results indicate a high risk profile for diabetes, with both glucose and HbA1c levels exceeding normal ranges. The most concerning findings are the elevated glucose level of 140.0 mg/dL and HbA1c level of 7.5%, which suggest impaired glucose regulation. These results align with the predicted disease of diabetes, warranting further evaluation and potential intervention to manage blood sugar levels and prevent disease progression.",
+      "narrative": "Based on your test results, it's likely that you may have diabetes, with our system showing an 85% confidence level in this prediction. Your glucose and HbA1c levels, which are important indicators of blood sugar control, are higher than normal, suggesting that your body may be having trouble managing blood sugar. It's essential to discuss these results with your doctor, who can provide a definitive diagnosis and guidance on the next steps, and I want to reassure you that with proper care and management, many people with diabetes are able to lead healthy and active lives. Your doctor will be able to help you understand what these results mean for your health and develop a plan to help you manage your condition."
+    },
+    "prediction_explanation": {
+      "primary_disease": "Diabetes",
+      "confidence": 0.85,
+      "key_drivers": [
+        {
+          "biomarker": "Glucose",
+          "value": 140.0,
+          "contribution": "31%",
+          "explanation": "Your glucose level is 140.0 mg/dL, which is higher than normal, indicating that you may have hyperglycemia, a condition where there is too much sugar in the blood, a common characteristic of diabetes. This result suggests that you may be at risk for diabetes or may already have the condition, and further evaluation and management may be necessary to prevent complications.",
+          "evidence": "3 Prevention and management \nof complications of diabetes \nAcute complications of diabetes\nTwo important acute complications are hypoglycaemia and hyperglycaemic \nemergencies. Hypoglycaemia\nHypoglycae"
+        },
+        {
+          "biomarker": "HbA1c",
+          "value": 7.5,
+          "contribution": "31%",
+          "explanation": "Your HbA1c result of 7.5% is higher than the target level of 7%, which may indicate that your blood sugar levels are not well-controlled, suggesting a possible diagnosis of Type 2 Diabetes. This means that your body may not be producing or using insulin properly, leading to elevated blood glucose levels, and further evaluation and discussion with your doctor is needed to determine the best course of action.",
+          "evidence": "Diabetes (Type 2) \u2014 Extensive RAG Reference\nGenerated for MediGuard AI RAG-Helper \u007f 2025-11-22\n1. What diabetes is (focused on Type 2)\nDiabetes mellitus is a chronic metabolic disease characterized by"
+        }
+      ],
+      "mechanism_summary": "",
+      "pathophysiology": "Diabetes mellitus is a group of metabolic disorders characterized by the presence of hyperglycemia due to defects in insulin secretion, insulin action, or both. The underlying biological mechanisms involve impaired insulin secretion from the beta cells in the pancreas and/or impaired insulin action in the body's cells, leading to elevated blood glucose levels. This can result from a combination of genetic, environmental, and lifestyle factors, including insulin resistance, where the body's cells become less responsive to insulin, and a progressive beta-cell secretory defect, where the pancreas is unable to produce enough insulin to meet the body's needs.\n",
+      "pdf_references": [
+        "diabetes.pdf (Page 8)",
+        "diabetes.pdf (Page 4)",
+        "diabetes.pdf (Page 11)",
+        "MediGuard_Diabetes_Guidelines_Extensive.pdf (Page 0)",
+        "diabetes.pdf (Page 10)"
+      ]
+    },
+    "confidence_assessment": {
+      "prediction_reliability": "MODERATE",
+      "evidence_strength": "MODERATE",
+      "limitations": [
+        "Missing data: 41 biomarker(s) not provided",
+        "Multiple critical values detected; professional evaluation essential"
+      ],
+      "recommendation": "Moderate confidence prediction. Medical consultation recommended for professional evaluation and additional testing if needed.",
+      "assessment_summary": "The overall reliability of this prediction is moderate, indicating that while the 85% confidence from the ML model is notable, there are limitations and uncertainties that must be considered. The prediction is supported by moderate evidence strength, but the presence of two identified limitations suggests that key factors may not have been fully accounted for, introducing potential weaknesses. Therefore, it is essential to consult a professional medical practitioner to review the results, discuss uncertainties, and determine the best course of action to ensure accurate diagnosis and effective treatment.",
+      "alternative_diagnoses": [
+        {
+          "disease": "Anemia",
+          "probability": 0.08,
+          "note": "Consider discussing with healthcare provider"
+        }
+      ]
+    },
+    "safety_alerts": [
+      {
+        "severity": "MEDIUM",
+        "biomarker": "Glucose",
+        "message": "Glucose is 140.0 mg/dL, above normal range (70-100 mg/dL). Hyperglycemia - diabetes risk, requires further testing",
+        "action": "Consult with healthcare provider"
+      },
+      {
+        "severity": "MEDIUM",
+        "biomarker": "HbA1c",
+        "message": "HbA1c is 7.5 %, above normal range (4.0-5.7 %). Diabetes (\u00e2\u2030\u00a56.5%), Prediabetes (5.7-6.4%)",
+        "action": "Consult with healthcare provider"
+      }
+    ],
+    "metadata": {
+      "timestamp": "2026-02-23T14:03:55.540464",
+      "system_version": "MediGuard AI RAG-Helper v1.0",
+      "sop_version": "Baseline",
+      "agents_executed": [
+        "Biomarker Analyzer",
+        "Biomarker-Disease Linker",
+        "Clinical Guidelines",
+        "Disease Explainer",
+        "Confidence Assessor"
+      ],
+      "disclaimer": "This is an AI-assisted analysis tool for patient self-assessment. It is NOT a substitute for professional medical advice, diagnosis, or treatment. Always consult qualified healthcare providers for medical decisions."
+    },
+    "biomarker_flags": [
+      {
+        "name": "Glucose",
+        "value": 140.0,
+        "unit": "mg/dL",
+        "status": "HIGH",
+        "reference_range": "70-100 mg/dL",
+        "warning": "Glucose is 140.0 mg/dL, above normal range (70-100 mg/dL). Hyperglycemia - diabetes risk, requires further testing"
+      },
+      {
+        "name": "HbA1c",
+        "value": 7.5,
+        "unit": "%",
+        "status": "HIGH",
+        "reference_range": "4.0-5.7 %",
+        "warning": "HbA1c is 7.5 %, above normal range (4.0-5.7 %). Diabetes (\u00e2\u2030\u00a56.5%), Prediabetes (5.7-6.4%)"
+      }
+    ],
+    "key_drivers": [
+      {
+        "biomarker": "Glucose",
+        "value": 140.0,
+        "contribution": "31%",
+        "explanation": "Your glucose level is 140.0 mg/dL, which is higher than normal, indicating that you may have hyperglycemia, a condition where there is too much sugar in the blood, a common characteristic of diabetes. This result suggests that you may be at risk for diabetes or may already have the condition, and further evaluation and management may be necessary to prevent complications.",
+        "evidence": "3 Prevention and management \nof complications of diabetes \nAcute complications of diabetes\nTwo important acute complications are hypoglycaemia and hyperglycaemic \nemergencies. Hypoglycaemia\nHypoglycaemia (abnormally low blood glucose) is a frequent iatrogenic \ncomplication in diabetic patients, occurring particularly in patients receiving \nsulfonylurea or insulin. Introduction\nDefinition of diabetes\nDiabetes mellitus, commonly known as diabetes, is a group of metabolic disorders \ncharacterized b"
+      },
+      {
+        "biomarker": "HbA1c",
+        "value": 7.5,
+        "contribution": "31%",
+        "explanation": "Your HbA1c result of 7.5% is higher than the target level of 7%, which may indicate that your blood sugar levels are not well-controlled, suggesting a possible diagnosis of Type 2 Diabetes. This means that your body may not be producing or using insulin properly, leading to elevated blood glucose levels, and further evaluation and discussion with your doctor is needed to determine the best course of action.",
+        "evidence": "Diabetes (Type 2) \u2014 Extensive RAG Reference\nGenerated for MediGuard AI RAG-Helper \u007f 2025-11-22\n1. What diabetes is (focused on Type 2)\nDiabetes mellitus is a chronic metabolic disease characterized by elevated blood glucose due to impaired\ninsulin secretion, insulin action, or both. \u2022 The majority of patients can be expected to aim for an HbA1c of 7."
+      }
+    ],
+    "disease_explanation": {
+      "pathophysiology": "Diabetes mellitus is a group of metabolic disorders characterized by the presence of hyperglycemia due to defects in insulin secretion, insulin action, or both. The underlying biological mechanisms involve impaired insulin secretion from the beta cells in the pancreas and/or impaired insulin action in the body's cells, leading to elevated blood glucose levels. This can result from a combination of genetic, environmental, and lifestyle factors, including insulin resistance, where the body's cells become less responsive to insulin, and a progressive beta-cell secretory defect, where the pancreas is unable to produce enough insulin to meet the body's needs.\n",
+      "citations": [
+        "diabetes.pdf (Page 8)",
+        "diabetes.pdf (Page 4)",
+        "diabetes.pdf (Page 11)",
+        "MediGuard_Diabetes_Guidelines_Extensive.pdf (Page 0)",
+        "diabetes.pdf (Page 10)"
+      ],
+      "retrieved_chunks": null
+    },
+    "recommendations": {
+      "immediate_actions": [
+        "Consult a healthcare professional**: Given the high prediction confidence of 85.0% for diabetes and the presence of critical safety alerts (elevated glucose and HbA1c levels), it is essential to consult a healthcare professional for a definitive diagnosis and to discuss a personalized treatment plan.",
+        "Undergo further testing**: As indicated by the hyperglycemia and HbA1c levels, further testing is required to confirm the diagnosis of diabetes and to assess the severity of the condition."
+      ],
+      "lifestyle_changes": [
+        "Regular physical activity**: Engage in at least 150 minutes of moderate-intensity aerobic exercise, or 75 minutes of vigorous-intensity aerobic exercise, or a combination of both, per week. Additionally, incorporate strength-training exercises, high-intensity interval training, and other physical activities to improve insulin sensitivity.",
+        "Weight management**: If overweight or obese, aim to lose 5-10% of body weight to improve insulin sensitivity and reduce the risk of complications.",
+        "Stress management**: Practice stress-reducing techniques, such as meditation, yoga, or deep breathing exercises, to help manage stress and improve overall well-being.",
+        "Sleep and relaxation**: Ensure adequate sleep (7-8 hours per night) and practice relaxation techniques to help regulate blood glucose levels and overall health."
+      ],
+      "monitoring": [
+        "HbA1c testing**: Schedule regular HbA1c tests (every 3-6 months) to assess average blood glucose control over time.",
+        "Foot care and examination**: Regularly examine feet for any signs of damage or infection, and seek medical attention if any concerns arise.",
+        "Regular health check-ups**: Schedule regular health check-ups with a healthcare professional to monitor progress, adjust the treatment plan, and screen for potential complications.",
+        "Remember, it is essential to consult a healthcare professional for a definitive diagnosis and personalized treatment plan. These recommendations are meant to provide general guidance and support, but should not replace professional medical advice."
+      ],
+      "guideline_citations": [
+        "diabetes.pdf"
+      ]
+    },
+    "clinical_recommendations": {
+      "immediate_actions": [
+        "Consult a healthcare professional**: Given the high prediction confidence of 85.0% for diabetes and the presence of critical safety alerts (elevated glucose and HbA1c levels), it is essential to consult a healthcare professional for a definitive diagnosis and to discuss a personalized treatment plan.",
+        "Undergo further testing**: As indicated by the hyperglycemia and HbA1c levels, further testing is required to confirm the diagnosis of diabetes and to assess the severity of the condition."
+      ],
+      "lifestyle_changes": [
+        "Regular physical activity**: Engage in at least 150 minutes of moderate-intensity aerobic exercise, or 75 minutes of vigorous-intensity aerobic exercise, or a combination of both, per week. Additionally, incorporate strength-training exercises, high-intensity interval training, and other physical activities to improve insulin sensitivity.",
+        "Weight management**: If overweight or obese, aim to lose 5-10% of body weight to improve insulin sensitivity and reduce the risk of complications.",
+        "Stress management**: Practice stress-reducing techniques, such as meditation, yoga, or deep breathing exercises, to help manage stress and improve overall well-being.",
+        "Sleep and relaxation**: Ensure adequate sleep (7-8 hours per night) and practice relaxation techniques to help regulate blood glucose levels and overall health."
+      ],
+      "monitoring": [
+        "HbA1c testing**: Schedule regular HbA1c tests (every 3-6 months) to assess average blood glucose control over time.",
+        "Foot care and examination**: Regularly examine feet for any signs of damage or infection, and seek medical attention if any concerns arise.",
+        "Regular health check-ups**: Schedule regular health check-ups with a healthcare professional to monitor progress, adjust the treatment plan, and screen for potential complications.",
+        "Remember, it is essential to consult a healthcare professional for a definitive diagnosis and personalized treatment plan. These recommendations are meant to provide general guidance and support, but should not replace professional medical advice."
+      ],
+      "guideline_citations": [
+        "diabetes.pdf"
+      ]
+    },
+    "alternative_diagnoses": [
+      {
+        "disease": "Anemia",
+        "probability": 0.08,
+        "note": "Consider discussing with healthcare provider"
+      }
+    ],
+    "analysis": {
+      "biomarker_flags": [
+        {
+          "name": "Glucose",
+          "value": 140.0,
+          "unit": "mg/dL",
+          "status": "HIGH",
+          "reference_range": "70-100 mg/dL",
+          "warning": "Glucose is 140.0 mg/dL, above normal range (70-100 mg/dL). Hyperglycemia - diabetes risk, requires further testing"
+        },
+        {
+          "name": "HbA1c",
+          "value": 7.5,
+          "unit": "%",
+          "status": "HIGH",
+          "reference_range": "4.0-5.7 %",
+          "warning": "HbA1c is 7.5 %, above normal range (4.0-5.7 %). Diabetes (\u00e2\u2030\u00a56.5%), Prediabetes (5.7-6.4%)"
+        }
+      ],
+      "safety_alerts": [
+        {
+          "severity": "MEDIUM",
+          "biomarker": "Glucose",
+          "message": "Glucose is 140.0 mg/dL, above normal range (70-100 mg/dL). Hyperglycemia - diabetes risk, requires further testing",
+          "action": "Consult with healthcare provider"
+        },
+        {
+          "severity": "MEDIUM",
+          "biomarker": "HbA1c",
+          "message": "HbA1c is 7.5 %, above normal range (4.0-5.7 %). Diabetes (\u00e2\u2030\u00a56.5%), Prediabetes (5.7-6.4%)",
+          "action": "Consult with healthcare provider"
+        }
+      ],
+      "key_drivers": [
+        {
+          "biomarker": "Glucose",
+          "value": 140.0,
+          "contribution": "31%",
+          "explanation": "Your glucose level is 140.0 mg/dL, which is higher than normal, indicating that you may have hyperglycemia, a condition where there is too much sugar in the blood, a common characteristic of diabetes. This result suggests that you may be at risk for diabetes or may already have the condition, and further evaluation and management may be necessary to prevent complications.",
+          "evidence": "3 Prevention and management \nof complications of diabetes \nAcute complications of diabetes\nTwo important acute complications are hypoglycaemia and hyperglycaemic \nemergencies. Hypoglycaemia\nHypoglycaemia (abnormally low blood glucose) is a frequent iatrogenic \ncomplication in diabetic patients, occurring particularly in patients receiving \nsulfonylurea or insulin. Introduction\nDefinition of diabetes\nDiabetes mellitus, commonly known as diabetes, is a group of metabolic disorders \ncharacterized b"
+        },
+        {
+          "biomarker": "HbA1c",
+          "value": 7.5,
+          "contribution": "31%",
+          "explanation": "Your HbA1c result of 7.5% is higher than the target level of 7%, which may indicate that your blood sugar levels are not well-controlled, suggesting a possible diagnosis of Type 2 Diabetes. This means that your body may not be producing or using insulin properly, leading to elevated blood glucose levels, and further evaluation and discussion with your doctor is needed to determine the best course of action.",
+          "evidence": "Diabetes (Type 2) \u2014 Extensive RAG Reference\nGenerated for MediGuard AI RAG-Helper \u007f 2025-11-22\n1. What diabetes is (focused on Type 2)\nDiabetes mellitus is a chronic metabolic disease characterized by elevated blood glucose due to impaired\ninsulin secretion, insulin action, or both. \u2022 The majority of patients can be expected to aim for an HbA1c of 7."
+        }
+      ],
+      "disease_explanation": {
+        "pathophysiology": "Diabetes mellitus is a group of metabolic disorders characterized by the presence of hyperglycemia due to defects in insulin secretion, insulin action, or both. The underlying biological mechanisms involve impaired insulin secretion from the beta cells in the pancreas and/or impaired insulin action in the body's cells, leading to elevated blood glucose levels. This can result from a combination of genetic, environmental, and lifestyle factors, including insulin resistance, where the body's cells become less responsive to insulin, and a progressive beta-cell secretory defect, where the pancreas is unable to produce enough insulin to meet the body's needs.\n",
+        "citations": [
+          "diabetes.pdf (Page 8)",
+          "diabetes.pdf (Page 4)",
+          "diabetes.pdf (Page 11)",
+          "MediGuard_Diabetes_Guidelines_Extensive.pdf (Page 0)",
+          "diabetes.pdf (Page 10)"
+        ],
+        "retrieved_chunks": null
+      },
+      "recommendations": {
+        "immediate_actions": [
+          "Consult a healthcare professional**: Given the high prediction confidence of 85.0% for diabetes and the presence of critical safety alerts (elevated glucose and HbA1c levels), it is essential to consult a healthcare professional for a definitive diagnosis and to discuss a personalized treatment plan.",
+          "Undergo further testing**: As indicated by the hyperglycemia and HbA1c levels, further testing is required to confirm the diagnosis of diabetes and to assess the severity of the condition."
+        ],
+        "lifestyle_changes": [
+          "Regular physical activity**: Engage in at least 150 minutes of moderate-intensity aerobic exercise, or 75 minutes of vigorous-intensity aerobic exercise, or a combination of both, per week. Additionally, incorporate strength-training exercises, high-intensity interval training, and other physical activities to improve insulin sensitivity.",
+          "Weight management**: If overweight or obese, aim to lose 5-10% of body weight to improve insulin sensitivity and reduce the risk of complications.",
+          "Stress management**: Practice stress-reducing techniques, such as meditation, yoga, or deep breathing exercises, to help manage stress and improve overall well-being.",
+          "Sleep and relaxation**: Ensure adequate sleep (7-8 hours per night) and practice relaxation techniques to help regulate blood glucose levels and overall health."
+        ],
+        "monitoring": [
+          "HbA1c testing**: Schedule regular HbA1c tests (every 3-6 months) to assess average blood glucose control over time.",
+          "Foot care and examination**: Regularly examine feet for any signs of damage or infection, and seek medical attention if any concerns arise.",
+          "Regular health check-ups**: Schedule regular health check-ups with a healthcare professional to monitor progress, adjust the treatment plan, and screen for potential complications.",
+          "Remember, it is essential to consult a healthcare professional for a definitive diagnosis and personalized treatment plan. These recommendations are meant to provide general guidance and support, but should not replace professional medical advice."
+        ],
+        "guideline_citations": [
+          "diabetes.pdf"
+        ]
+      },
+      "confidence_assessment": {
+        "prediction_reliability": "MODERATE",
+        "evidence_strength": "MODERATE",
+        "limitations": [
+          "Missing data: 41 biomarker(s) not provided",
+          "Multiple critical values detected; professional evaluation essential"
+        ],
+        "recommendation": "Moderate confidence prediction. Medical consultation recommended for professional evaluation and additional testing if needed.",
+        "assessment_summary": "The overall reliability of this prediction is moderate, indicating that while the 85% confidence from the ML model is notable, there are limitations and uncertainties that must be considered. The prediction is supported by moderate evidence strength, but the presence of two identified limitations suggests that key factors may not have been fully accounted for, introducing potential weaknesses. Therefore, it is essential to consult a professional medical practitioner to review the results, discuss uncertainties, and determine the best course of action to ensure accurate diagnosis and effective treatment.",
+        "alternative_diagnoses": [
+          {
+            "disease": "Anemia",
+            "probability": 0.08,
+            "note": "Consider discussing with healthcare provider"
+          }
+        ]
+      },
+      "alternative_diagnoses": [
+        {
+          "disease": "Anemia",
+          "probability": 0.08,
+          "note": "Consider discussing with healthcare provider"
+        }
+      ]
+    }
+  },
+  "biomarker_flags": [
+    {
+      "name": "Glucose",
+      "value": 140.0,
+      "unit": "mg/dL",
+      "status": "HIGH",
+      "reference_range": "70-100 mg/dL",
+      "warning": "Glucose is 140.0 mg/dL, above normal range (70-100 mg/dL). Hyperglycemia - diabetes risk, requires further testing"
+    },
+    {
+      "name": "HbA1c",
+      "value": 7.5,
+      "unit": "%",
+      "status": "HIGH",
+      "reference_range": "4.0-5.7 %",
+      "warning": "HbA1c is 7.5 %, above normal range (4.0-5.7 %). Diabetes (\u00e2\u2030\u00a56.5%), Prediabetes (5.7-6.4%)"
+    }
+  ],
+  "safety_alerts": [
+    {
+      "severity": "MEDIUM",
+      "biomarker": "Glucose",
+      "message": "Glucose is 140.0 mg/dL, above normal range (70-100 mg/dL). Hyperglycemia - diabetes risk, requires further testing",
+      "action": "Consult with healthcare provider"
+    },
+    {
+      "severity": "MEDIUM",
+      "biomarker": "HbA1c",
+      "message": "HbA1c is 7.5 %, above normal range (4.0-5.7 %). Diabetes (\u00e2\u2030\u00a56.5%), Prediabetes (5.7-6.4%)",
+      "action": "Consult with healthcare provider"
+    }
+  ]
+}

docs/plans/PRODUCTION_UPGRADE_PLAN.md ADDED Viewed

	@@ -0,0 +1,833 @@

+# MediGuard AI — Production Upgrade Plan
+## From Prototype to Production-Grade MedTech RAG System
+> **Generated**: 2026-02-23
+> **Based on**: Deep review of production-agentic-rag-course (Weeks 1–7) + existing RagBot codebase
+> **Goal**: Take the existing MediGuard AI (clinical biomarker analysis + RAG explanation system) to full production quality, applying every lesson from the arXiv Paper Curator course — adapted for the MedTech domain.
+---
+## Table of Contents
+1. [Executive Summary](#1-executive-summary)
+2. [Deep Review: Course vs. Your Codebase](#2-deep-review-course-vs-your-codebase)
+3. [Architecture Gap Analysis](#3-architecture-gap-analysis)
+4. [Phase 1: Infrastructure Foundation](#phase-1-infrastructure-foundation-week-1-equivalent)
+5. [Phase 2: Medical Data Ingestion Pipeline](#phase-2-medical-data-ingestion-pipeline-week-2-equivalent)
+6. [Phase 3: Production Search Foundation](#phase-3-production-search-foundation-week-3-equivalent)
+7. [Phase 4: Hybrid Search & Intelligent Chunking](#phase-4-hybrid-search--intelligent-chunking-week-4-equivalent)
+8. [Phase 5: Complete RAG Pipeline with Streaming](#phase-5-complete-rag-pipeline-with-streaming-week-5-equivalent)
+9. [Phase 6: Monitoring, Caching & Observability](#phase-6-monitoring-caching--observability-week-6-equivalent)
+10. [Phase 7: Agentic RAG & Messaging Bot](#phase-7-agentic-rag--messaging-bot-week-7-equivalent)
+11. [Phase 8: MedTech-Specific Additions](#phase-8-medtech-specific-additions-beyond-course)
+12. [Implementation Priority Matrix](#implementation-priority-matrix)
+13. [Migration Strategy](#migration-strategy)
+---
+## 1. Executive Summary
+Your RagBot is a **working prototype** with strong domain logic (biomarker validation, multi-agent clinical analysis, 5D evaluation, SOP evolution). The course teaches **production infrastructure** (Docker orchestration, OpenSearch hybrid search, Airflow pipelines, Redis caching, Langfuse observability, LangGraph agentic workflows, Telegram bot).
+**The strategy**: Keep your excellent medical domain logic and multi-agent architecture, but rebuild the infrastructure layer to match production standards. Your domain is *harder* than arXiv papers — medical data demands stricter validation, HIPAA-aware patterns, and safety guardrails.
+### What You Have (Strengths)
+- ✅ 6 specialized medical agents (Biomarker Analyzer, Disease Explainer, Biomarker-Disease Linker, Clinical Guidelines, Confidence Assessor, Response Synthesizer)
+- ✅ LangGraph orchestration with parallel execution
+- ✅ Robust biomarker validation with 24 biomarkers, reference ranges, critical values
+- ✅ 5D evaluation framework (Clinical Accuracy, Evidence Grounding, Actionability, Clarity, Safety)
+- ✅ SOP evolution engine (Outer Loop optimization)
+- ✅ Multi-provider LLM support (Groq, Gemini, Ollama)
+- ✅ Basic FastAPI with analysis endpoints
+- ✅ CLI chatbot with natural language biomarker extraction
+### What You're Missing (Gaps)
+- ❌ No Docker Compose orchestration (only minimal single-service Dockerfile)
+- ❌ No production database (PostgreSQL) — no patient/report persistence
+- ❌ No production search engine — using FAISS (in-memory, single-file, no filtering)
+- ❌ No chunking strategy — basic RecursiveCharacterTextSplitter only
+- ❌ No hybrid search (BM25 + vector) — vector-only retrieval
+- ❌ No production embeddings — using local HuggingFace MiniLM (384d) or Google free tier
+- ❌ No data ingestion pipeline (Airflow) — manual PDF loading
+- ❌ No caching layer (Redis) — every query hits LLM
+- ❌ No observability (Langfuse) — no tracing, no cost tracking
+- ❌ No streaming responses — synchronous only
+- ❌ No Gradio interface — CLI only (besides basic API)
+- ❌ No messaging bot (Telegram/WhatsApp) — no mobile access
+- ❌ No agentic RAG with guardrails, document grading, query rewriting
+- ❌ No proper dependency injection pattern (FastAPI `Depends()`)
+- ❌ No Pydantic Settings with env-nested config
+- ❌ No factory pattern for service initialization
+- ❌ No proper exception hierarchy
+- ❌ No health checks for all services
+- ❌ No Makefile / dev tooling (ruff, mypy, pre-commit)
+- ❌ No proper test infrastructure (pytest fixtures, test containers)
+---
+## 2. Deep Review: Course vs. Your Codebase
+### Course Architecture (What Production Looks Like)
+```
+┌──────────────────────────────────────────────────────────────┐
+│                    Docker Compose Orchestration                │
+├──────────┬──────────┬──────────┬──────────┬─────────────────┤
+│ FastAPI  │PostgreSQL│OpenSearch│  Ollama  │   Airflow       │
+│ (8000)   │ (5432)   │ (9200)   │ (11434)  │   (8080)        │
+├──────────┼───���──────┼──────────┼──────────┼─────────────────┤
+│  Redis   │ Langfuse │ClickHouse│  MinIO   │ Langfuse-PG     │
+│ (6379)   │ (3001)   │          │          │ (5433)          │
+├──────────┴──────────┴──────────┴──────────┴─────────────────┤
+│            Gradio UI (7861) │ Telegram Bot                    │
+└──────────────────────────────────────────────────────────────┘
+```
+**Key Patterns from Course:**
+- **Pydantic Settings** with `env_nested_delimiter="__"` for hierarchical config
+- **Factory pattern** (`make_*` functions) for every service
+- **Dependency injection** via FastAPI `Depends()` with typed annotations
+- **Lifespan context** for startup/shutdown with proper resource management
+- **Service layer separation**: `routers/` → `services/` → `clients/`
+- **Schema-driven**: Separate Pydantic schemas for API, database, embeddings, indexing
+- **Exception hierarchy**: Domain-specific exceptions (`PDFParsingException`, `OllamaException`, etc.)
+- **Context dataclass** for LangGraph runtime dependency injection
+- **Structured LLM output** via `.with_structured_output(PydanticModel)`
+### Your Codebase Architecture (Current State)
+```
+┌─────────────────────────────────────────────┐
+│           Basic FastAPI (api/app/)           │
+│     Single Dockerfile, no orchestration      │
+├─────────────────────────────────────────────┤
+│        src/ (Core Domain Logic)              │
+│  ┌─────────────────────────────────────┐    │
+│  │ workflow.py (LangGraph StateGraph)   │    │
+│  │ 6 agents/ (parallel execution)       │    │
+│  │ biomarker_validator.py (24 markers)  │    │
+│  │ pdf_processor.py (FAISS + PyPDF)     │    │
+│  │ evaluation/ (5D framework)           │    │
+│  │ evolution/ (SOP optimization)        │    │
+│  └─────────────────────────────────────┘    │
+├─────────────────────────────────────────────┤
+│   FAISS vector store (single file)           │
+│   No PostgreSQL, No Redis, No OpenSearch     │
+└─────────────────────────────────────────────┘
+```
+---
+## 3. Architecture Gap Analysis
+| Dimension | Course (Production) | Your Codebase (Prototype) | Gap Severity |
+|-----------|-------------------|--------------------------|--------------|
+| **Container Orchestration** | Docker Compose with 12+ services, health checks, networks | Single Dockerfile, manual startup | 🔴 Critical |
+| **Database** | PostgreSQL 16 with SQLAlchemy models, repositories | None (in-memory only) | 🔴 Critical |
+| **Search Engine** | OpenSearch 2.19 with BM25 + KNN hybrid, RRF fusion | FAISS (vector-only, no filtering) | 🔴 Critical |
+| **Chunking** | Section-aware chunking (600w, 100w overlap, metadata) | Basic RecursiveCharacterTextSplitter (1000 char) | 🟡 Major |
+| **Embeddings** | Jina AI v3 (1024d, passage/query differentiation) | HuggingFace MiniLM (384d) or Google free tier | 🟡 Major |
+| **Data Pipeline** | Airflow DAGs (daily schedule, fetch→parse→chunk→index) | Manual PDF loading, one-time setup | 🟡 Major |
+| **Caching** | Redis with TTL, exact-match, SHA256 keys | None | 🟡 Major |
+| **Observability** | Langfuse v3 (traces, spans, generations, cost tracking) | None (print statements only) | 🟡 Major |
+| **Streaming** | SSE streaming with Gradio UI | None (synchronous responses) | 🟡 Major |
+| **Agentic RAG** | LangGraph with guardrails, grading, rewriting, context_schema | Basic LangGraph (no guardrails, no grading) | 🟡 Major |
+| **Bot Integration** | Telegram bot with /search, Q&A, caching | None | 🟢 Enhancement |
+| **Config Management** | Pydantic Settings, hierarchical env vars, frozen models | Basic os.getenv, dotenv | 🟡 Major |
+| **Dependency Injection** | FastAPI Depends() with typed annotations | Manual global singletons | 🟡 Major |
+| **Error Handling** | Domain exception hierarchy, graceful fallbacks | Basic try/except with prints | 🟡 Major |
+| **Code Quality** | Ruff, MyPy, pre-commit, pytest with fixtures | Minimal pytest, no linting | 🟢 Enhancement |
+| **API Design** | Versioned (/api/v1/), health checks for all services | Basic routes, minimal health check | 🟡 Major |
+---
+## Phase 1: Infrastructure Foundation (Week 1 Equivalent)
+> **Goal**: Containerize everything, add PostgreSQL for persistence, set up OpenSearch, establish professional development environment.
+### 1.1 Docker Compose Orchestration
+Create a production `docker-compose.yml` with all services:
+```yaml
+# Target services for MediGuard AI:
+services:
+  api:           # FastAPI application (port 8000)
+  postgres:      # Patient reports, analysis history (port 5432)
+  opensearch:    # Medical document search engine (port 9200)
+  opensearch-dashboards:  # Search UI (port 5601)
+  redis:         # Response caching (port 6379)
+  ollama:        # Local LLM for privacy-sensitive medical data (port 11434)
+  airflow:       # Medical literature pipeline (port 8080)
+  langfuse-web:  # Observability dashboard (port 3001)
+  langfuse-worker/postgres/redis/clickhouse/minio:  # Langfuse infra
+```
+**Tasks:**
+- [ ] Create root `docker-compose.yml` adapting course pattern to MedTech services
+- [ ] Create multi-stage `Dockerfile` using UV package manager (copy course pattern)
+- [ ] Add health checks for every service (PostgreSQL, OpenSearch, Redis, Ollama)
+- [ ] Set up Docker network `mediguard-network` with proper service dependencies
+- [ ] Configure volume persistence for all data stores
+- [ ] Create `.env.example` with all configuration variables documented
+### 1.2 Pydantic Settings Configuration
+Replace scattered `os.getenv()` calls with hierarchical Pydantic Settings:
+```python
+# New: src/config.py (course-inspired)
+class MedicalPDFSettings(BaseConfigSettings):    # PDF parser config
+class ChunkingSettings(BaseConfigSettings):       # Chunking parameters
+class OpenSearchSettings(BaseConfigSettings):     # Search engine config
+class LangfuseSettings(BaseConfigSettings):       # Observability config
+class RedisSettings(BaseConfigSettings):          # Cache config
+class TelegramSettings(BaseConfigSettings):       # Bot config
+class BiomarkerSettings(BaseConfigSettings):      # Biomarker thresholds
+class Settings(BaseConfigSettings):               # Root settings
+```
+**Tasks:**
+- [ ] Rewrite `src/config.py` — keep `ExplanationSOP` but add infrastructure settings classes
+- [ ] Use `env_nested_delimiter="__"` for hierarchical environment variables
+- [ ] Add `frozen=True` for immutable configuration
+- [ ] Move all hardcoded values to environment variables with sensible defaults
+- [ ] Create `get_settings()` factory with `@lru_cache`
+### 1.3 PostgreSQL Database Setup
+Add persistent storage for analysis history — critical for medical audit trail:
+```python
+# New models:
+class PatientAnalysis(Base):      # Store each analysis run
+class AnalysisReport(Base):       # Store final reports
+class MedicalDocument(Base):      # Track ingested medical PDFs
+class BiomarkerReference(Base):   # Biomarker reference ranges (currently JSON file)
+```
+**Tasks:**
+- [ ] Create `src/db/` package mirroring course pattern (factory, interfaces, postgresql)
+- [ ] Define SQLAlchemy models for analysis history and medical documents
+- [ ] Create repository pattern for data access
+- [ ] Set up Alembic for database migrations
+- [ ] Migrate `biomarker_references.json` to database (keep JSON as seed data)
+### 1.4 Project Structure Refactor
+Reorganize to match production patterns:
+```
+src/
+├── config.py                    # Pydantic Settings (hierarchical)
+├── main.py                      # FastAPI app with lifespan
+├── database.py                  # Database utilities
+├── dependencies.py              # FastAPI dependency injection
+├── exceptions.py                # Domain exception hierarchy
+├── middlewares.py               # Request logging, timing
+├── db/                          # Database layer
+│   ├── factory.py
+│   └── interfaces/
+├── models/                      # SQLAlchemy models
+│   ├── analysis.py
+│   └── document.py
+├── repositories/                # Data access
+│   ├── analysis.py
+│   └── document.py
+├── routers/                     # API endpoints
+│   ├── analyze.py               # Biomarker analysis
+│   ├── ask.py                   # RAG Q&A (streaming + standard)
+│   ├── health.py                # Comprehensive health checks
+│   └── search.py                # Medical document search
+├── schemas/                     # Pydantic request/response models
+│   ├── api/
+│   ├── medical/
+│   └── embeddings/
+├── services/                    # Business logic
+│   ├── agents/                  # Your 6 medical agents (KEEP!)
+│   │   ├── biomarker_analyzer.py
+│   │   ├── disease_explainer.py
+│   │   ├── biomarker_linker.py
+│   │   ├── clinical_guidelines.py
+│   │   ├── confidence_assessor.py
+│   │   ├── response_synthesizer.py
+│   │   ├── agentic_rag.py       # NEW: LangGraph agentic wrapper
+│   │   ├── nodes/               # NEW: Guardrail, grading, rewriting
+│   │   ├── state.py             # Enhanced state
+│   │   ├── context.py           # Runtime dependency injection
+│   │   └── prompts.py           # Medical-domain prompts
+│   ├── opensearch/              # NEW: Search engine client
+│   ├── embeddings/              # NEW: Production embeddings
+│   ├── cache/                   # NEW: Redis caching
+│   ├── langfuse/                # NEW: Observability
+│   ├── ollama/                  # NEW: Local LLM client
+│   ├── indexing/                # NEW: Chunking + indexing
+│   ├── pdf_parser/              # Enhanced: Use Docling
+│   ├── telegram/                # NEW: Bot integration
+│   └── biomarker/               # Extracted: validation + normalization
+├── evaluation/                  # KEEP: 5D evaluation
+└── evolution/                   # KEEP: SOP evolution
+```
+**Tasks:**
+- [ ] Create the new directory structure
+- [ ] Move API from `api/app/` into `src/` (single application)
+- [ ] Create `exceptions.py` with medical-domain exception hierarchy
+- [ ] Create `dependencies.py` with typed FastAPI dependency injection
+- [ ] Create `main.py` with proper lifespan context manager
+### 1.5 Development Tooling
+**Tasks:**
+- [ ] Create `pyproject.toml` replacing `requirements.txt` (use UV)
+- [ ] Create `Makefile` with start/stop/test/lint/format/health commands
+- [ ] Add `ruff` for linting and formatting
+- [ ] Add `mypy` for type checking
+- [ ] Add `.pre-commit-config.yaml`
+- [ ] Create `.env.example` and `.env.test`
+---
+## Phase 2: Medical Data Ingestion Pipeline (Week 2 Equivalent)
+> **Goal**: Automated ingestion of medical PDFs, clinical guidelines, and reference documents with Airflow orchestration.
+### 2.1 Medical PDF Parser Upgrade
+Replace basic PyPDF with Docling for better medical document handling:
+**Tasks:**
+- [ ] Create `src/services/pdf_parser/` with Docling integration (copy course pattern)
+- [ ] Add medical-specific section detection (Abstract, Methods, Results, Discussion, Clinical Guidelines)
+- [ ] Add table extraction for lab reference ranges
+- [ ] Add validation: file size limits, page limits, PDF header check
+- [ ] Add metadata extraction: title, authors, publication date, journal
+### 2.2 Medical Document Sources
+Unlike arXiv (single API), medical literature comes from multiple sources:
+**Tasks:**
+- [ ] Create `src/services/medical_sources/` package
+- [ ] Implement PubMed API client (free, rate-limited) for research papers
+- [ ] Implement local PDF upload endpoint for clinical guidelines
+- [ ] Implement reference document ingestion (WHO, CDC, ADA guidelines)
+- [ ] Create document deduplication logic (by title hash + content fingerprint)
+- [ ] Add `MedicalDocument` model tracking: source, parse status, indexing status
+### 2.3 Airflow Pipeline for Medical Literature
+**Tasks:**
+- [ ] Create `airflow/` directory with Dockerfile and entrypoint
+- [ ] Create `airflow/dags/medical_ingestion.py` DAG:
+  - `setup_environment` → `fetch_new_documents` → `parse_pdfs` → `chunk_and_index` → `generate_report`
+- [ ] Schedule: Daily at 6 AM for PubMed updates, on-demand for uploaded PDFs
+- [ ] Add retry logic with exponential backoff
+- [ ] Mount `src/` into Airflow container for shared code
+### 2.4 PostgreSQL Storage for Documents
+**Tasks:**
+- [ ] Create `MedicalDocument` model: id, title, source, source_type, authors, abstract, raw_text, sections, parse_status, indexed_at
+- [ ] Create `PaperRepository` with CRUD + upsert + status tracking
+- [ ] Track processing pipeline: `uploaded → parsed → chunked → indexed`
+- [ ] Store parsed sections as JSON for re-indexing without re-parsing
+---
+## Phase 3: Production Search Foundation (Week 3 Equivalent)
+> **Goal**: Replace FAISS with OpenSearch for production BM25 keyword search with medical-specific optimizations.
+### 3.1 OpenSearch Client
+**Tasks:**
+- [ ] Create `src/services/opensearch/` package (adapt course pattern)
+- [ ] Implement `OpenSearchClient` with:
+  - Health check, index management, BM25 search, bulk indexing
+  - **Medical-specific**: Boost clinical term matches, support ICD-10 code filtering
+- [ ] Create `QueryBuilder` with medical field boosting:
+  ```
+  fields: ["chunk_text^3", "title^2", "section_title^1.5", "abstract^1"]
+  ```
+- [ ] Create `index_config_hybrid.py` with medical document mapping:
+  - Fields: chunk_text, title, authors, abstract, document_type (guideline/research/reference), condition_tags, publication_year
+### 3.2 Medical Document Index Mapping
+```python
+MEDICAL_CHUNKS_MAPPING = {
+    "settings": {
+        "index.knn": True,
+        "analysis": {
+            "analyzer": {
+                "medical_analyzer": {
+                    "type": "custom",
+                    "tokenizer": "standard",
+                    "filter": ["lowercase", "medical_synonyms", "stop", "snowball"]
+                }
+            }
+        }
+    },
+    "mappings": {
+        "properties": {
+            "chunk_text": {"type": "text", "analyzer": "medical_analyzer"},
+            "document_type": {"type": "keyword"},  # guideline, research, reference
+            "condition_tags": {"type": "keyword"},  # diabetes, anemia, etc.
+            "biomarkers_mentioned": {"type": "keyword"},  # Glucose, HbA1c, etc.
+            "embedding": {"type": "knn_vector", "dimension": 1024},
+            # ... more fields
+        }
+    }
+}
+```
+**Tasks:**
+- [ ] Design medical-optimized OpenSearch mapping
+- [ ] Add medical synonym analyzer (e.g., "diabetes mellitus" ↔ "DM", "HbA1c" ↔ "glycated hemoglobin")
+- [ ] Create search endpoint `POST /api/v1/search` with filtering by document_type, condition_tags
+- [ ] Implement BM25 search with medical field boosting
+- [ ] Create index verification in startup lifespan
+---
+## Phase 4: Hybrid Search & Intelligent Chunking (Week 4 Equivalent)
+> **Goal**: Section-aware chunking for medical documents + hybrid search (BM25 + semantic) with RRF fusion.
+### 4.1 Medical-Aware Text Chunking
+**Tasks:**
+- [ ] Create `src/services/indexing/text_chunker.py` adapting course's `TextChunker`:
+  - Section-aware chunking (detect: Introduction, Methods, Results, Discussion, Guidelines, References)
+  - Target: 600 words per chunk, 100 word overlap
+  - Medical metadata: section_title, biomarkers_mentioned, condition_tags
+- [ ] Create `MedicalTextChunker` subclass with:
+  - Biomarker mention detection (scan for any of 24+ biomarker names)
+  - Condition tag extraction (diabetes, anemia, heart disease, etc.)
+  - Table-aware chunking (keep tables together)
+  - Reference section filtering (skip bibliography chunks)
+- [ ] Create `HybridIndexingService` for chunk → embed → index pipeline
+### 4.2 Production Embeddings
+**Tasks:**
+- [ ] Create `src/services/embeddings/` with Jina AI client (1024d, passage/query differentiation)
+- [ ] Add fallback chain: Jina → Google → HuggingFace
+- [ ] Implement batch embedding for efficient indexing
+- [ ] Track embedding model in chunk metadata for versioning
+### 4.3 Hybrid Search with RRF
+**Tasks:**
+- [ ] Implement `search_unified()` supporting: BM25-only, vector-only, hybrid modes
+- [ ] Set up OpenSearch RRF (Reciprocal Rank Fusion) pipeline
+- [ ] Create unified search endpoint `POST /api/v1/hybrid-search/`
+- [ ] Add min_score filtering and result deduplication
+- [ ] Benchmark: BM25 vs. vector vs. hybrid on medical queries
+---
+## Phase 5: Complete RAG Pipeline with Streaming (Week 5 Equivalent)
+> **Goal**: Replace synchronous analysis with streaming RAG, add Gradio UI, optimize prompts.
+### 5.1 Ollama Client Upgrade
+**Tasks:**
+- [ ] Create `src/services/ollama/` package (adapt course pattern)
+- [ ] Implement `OllamaClient` with:
+  - Health check, model listing, generate, streaming generate
+  - Usage metadata extraction (tokens, latency)
+  - LangChain integration: `get_langchain_model()` for structured output
+- [ ] Create medical-specific RAG prompt templates:
+  - `rag_medical_system.txt` — optimized for medical explanation generation
+  - Structured output format for clinical responses
+- [ ] Create `OllamaFactory` with `@lru_cache`
+### 5.2 Streaming RAG Endpoints
+**Tasks:**
+- [ ] Create `POST /api/v1/ask` — standard RAG with medical context retrieval
+- [ ] Create `POST /api/v1/stream` — SSE streaming for real-time responses
+- [ ] Create `POST /api/v1/analyze/stream` — streaming biomarker analysis
+- [ ] Integrate with existing multi-agent pipeline:
+  ```
+  Query → Hybrid Search → Medical Chunks → Agent Pipeline → Streaming Response
+  ```
+### 5.3 Gradio Medical Interface
+**Tasks:**
+- [ ] Create `src/gradio_app.py` for interactive medical RAG:
+  - Biomarker input form (structured entry)
+  - Natural language input (free text)
+  - Streaming response display
+  - Search mode selector (BM25, hybrid, vector)
+  - Model selector
+  - Analysis history display
+- [ ] Create `gradio_launcher.py` for easy startup
+- [ ] Expose on port 7861
+### 5.4 Prompt Optimization
+**Tasks:**
+- [ ] Reduce prompt size by 60-80% (course achieved 80% reduction)
+- [ ] Create focused medical prompts (separate: biomarker analysis, disease explanation, guidelines)
+- [ ] Test prompt variants using 5D evaluation framework
+- [ ] Store best prompts as SOP parameters (tie into evolution engine)
+---
+## Phase 6: Monitoring, Caching & Observability (Week 6 Equivalent)
+> **Goal**: Add Langfuse tracing for the entire pipeline, Redis caching, and production monitoring.
+### 6.1 Langfuse Integration
+**Tasks:**
+- [ ] Create `src/services/langfuse/` package (adapt course pattern):
+  - `client.py` — LangfuseTracer wrapper with v3 SDK
+  - `factory.py` — cached tracer factory
+  - `tracer.py` — medical-specific RAGTracer with named steps
+- [ ] Add spans for every pipeline step:
+  - `biomarker_validation` → `query_embedding` → `search_retrieval` → `agent_execution` → `response_synthesis`
+- [ ] Track per-request metrics:
+  - Total latency, LLM tokens used, search results count, cache hit/miss, agent execution time
+- [ ] Add Langfuse Docker services to docker-compose.yml
+- [ ] Create trace visualization for medical analysis pipeline
+### 6.2 Redis Caching
+**Tasks:**
+- [ ] Create `src/services/cache/` package (adapt course pattern):
+  - Exact-match cache: SHA256(query + model + top_k + biomarkers) → cached response
+  - TTL: 6 hours for general queries, 1 hour for biomarker analysis (values may change)
+- [ ] Add caching to:
+  - `/api/v1/ask` — cache RAG responses
+  - `/api/v1/analyze` — cache full analysis results
+  - Embeddings — cache frequently queried embeddings
+- [ ] Add graceful fallback: cache miss → normal pipeline
+- [ ] Track cache hit rates in Langfuse
+### 6.3 Production Health Dashboard
+**Tasks:**
+- [ ] Enhance `/api/v1/health` to check all services:
+  - PostgreSQL, OpenSearch, Redis, Ollama, Langfuse, Airflow
+- [ ] Add `/api/v1/metrics` endpoint for operational metrics
+- [ ] Create Langfuse dashboard for:
+  - Average response time, cache hit rate, error rate, token costs
+  - Per-agent execution times, search relevance scores
+---
+## Phase 7: Agentic RAG & Messaging Bot (Week 7 Equivalent)
+> **Goal**: Wrap your multi-agent pipeline in a LangGraph agentic workflow with guardrails, document grading, and query rewriting. Add Telegram bot for mobile access.
+### 7.1 Agentic RAG Wrapper
+This is the most impactful upgrade — it adds **intelligence around your existing agents**:
+```
+User Query
+    ↓
+[GUARDRAIL] ──── Is this a medical/biomarker question? ────→ [OUT OF SCOPE]
+    ↓ yes
+[RETRIEVE] ──── Hybrid search for medical documents ────→ [TOOL: search]
+    ↓
+[GRADE DOCUMENTS] ──── Are results relevant? ────→ [REWRITE QUERY] ──→ loop
+    ↓ yes
+[CLINICAL ANALYSIS] ──── Your 6 medical agents ────→ structured analysis
+    ↓
+[GENERATE RESPONSE] ──── Synthesize with citations ────→ final answer
+```
+**Tasks:**
+- [ ] Create `src/services/agents/agentic_rag.py` — `AgenticRAGService` class
+- [ ] Create `src/services/agents/nodes/`:
+  - `guardrail_node.py` — Medical domain validation (score 0-100)
+    - In-scope: biomarker questions, disease queries, clinical guidelines
+    - Out-of-scope: non-medical, general knowledge, harmful content
+  - `retrieve_node.py` — Creates tool call with `max_retrieval_attempts`
+  - `grade_documents_node.py` — LLM evaluates medical relevance
+  - `rewrite_query_node.py` — LLM rewrites for better medical retrieval
+  - `generate_answer_node.py` — Uses your existing agent pipeline OR direct LLM
+  - `out_of_scope_node.py` — Polite medical-domain rejection
+- [ ] Create `src/services/agents/state.py` — Enhanced state with guardrail_result, routing_decision, grading_results
+- [ ] Create `src/services/agents/context.py` — Runtime context for dependency injection
+- [ ] Create `src/services/agents/prompts.py` — Medical-specific prompts:
+  - Guardrail: "Is this about health/biomarkers/medical conditions?"
+  - Grading: "Does this medical document answer the clinical question?"
+  - Rewriting: "Improve this medical query for better document retrieval"
+  - Generation: "Synthesize medical findings with citations and safety caveats"
+- [ ] Create `src/services/agents/tools.py` — Medical retriever tool wrapping OpenSearch
+- [ ] Create `POST /api/v1/ask-agentic` endpoint
+- [ ] Add Langfuse tracing to every node
+### 7.2 Medical Guardrails (Critical for MedTech)
+Beyond the course's simple domain check, add medical-specific safety:
+**Tasks:**
+- [ ] **Input guardrails**:
+  - Detect harmful queries (self-harm, drug abuse guidance)
+  - Detect attempts to get diagnosis without proper data
+  - Validate biomarker values are physiologically plausible
+- [ ] **Output guardrails**:
+  - Always include "consult your healthcare provider" disclaimer
+  - Never provide definitive diagnosis (always "suggests" / "may indicate")
+  - Flag critical biomarker values with immediate action advice
+  - Ensure safety_alerts are present for out-of-range values
+- [ ] **Citation guardrails**:
+  - Ensure all medical claims have document citations
+  - Flag unsupported claims
+### 7.3 Telegram Bot Integration
+**Tasks:**
+- [ ] Create `src/services/telegram/` package (adapt course pattern)
+- [ ] Implement bot commands:
+  - `/start` — Welcome with medical assistant introduction
+  - `/help` — Show capabilities and input format
+  - `/analyze <biomarker values>` — Quick biomarker analysis
+  - `/search <medical query>` — Search medical documents
+  - `/report` — Get last analysis as formatted report
+  - Free text — Full RAG Q&A about medical topics
+- [ ] Add typing indicators and progress messages
+- [ ] Integrate caching for repeated queries
+- [ ] Add rate limiting (medical queries shouldn't be spammed)
+- [ ] Create `TelegramFactory` gated by `TELEGRAM__ENABLED=true`
+### 7.4 Feedback Loop
+**Tasks:**
+- [ ] Create `POST /api/v1/feedback` endpoint (adapt from course)
+- [ ] Integrate with Langfuse scoring
+- [ ] Use feedback data to identify weak prompts → feed into SOP evolution engine
+---
+## Phase 8: MedTech-Specific Additions (Beyond Course)
+> **Goal**: Things the course doesn't cover but your medical domain demands.
+### 8.1 HIPAA-Awareness Patterns
+**Tasks:**
+- [ ] Never log patient biomarker values in plain text
+- [ ] Add request ID tracking without PII
+- [ ] Create data retention policy (auto-delete analysis data after configurable period)
+- [ ] Add audit logging for all analysis requests
+- [ ] Document HIPAA compliance approach (even if not yet certified)
+### 8.2 Medical Safety Testing
+**Tasks:**
+- [ ] Create medical-specific test suite:
+  - Critical value detection tests (every critical biomarker)
+  - Guardrail rejection tests (non-medical queries)
+  - Citation completeness tests
+  - Safety disclaimer presence tests
+  - Biomarker normalization tests (already have some)
+- [ ] Integrate 5D evaluation into CI pipeline
+- [ ] Create test fixtures with realistic medical scenarios
+### 8.3 Evolution Engine Integration
+**Tasks:**
+- [ ] Wire SOP evolution engine to production metrics (Langfuse data)
+- [ ] Create Airflow DAG for scheduled evolution cycles
+- [ ] Store evolved SOPs in PostgreSQL with version tracking
+- [ ] A/B test SOP variants using Langfuse trace comparison
+### 8.4 Multi-condition Support
+**Tasks:**
+- [ ] Extend condition coverage beyond current 5 diseases
+- [ ] Add condition-specific retrieval strategies
+- [ ] Create condition-specific chunking filters
+- [ ] Support multi-condition analysis (comorbidities)
+---
+## Implementation Priority Matrix
+| Priority | Phase | Effort | Impact | Dependencies |
+|----------|-------|--------|--------|--------------|
+| 🔴 P0 | 1.1 Docker Compose | 2 days | Critical | None |
+| 🔴 P0 | 1.2 Pydantic Settings | 1 day | Critical | None |
+| 🔴 P0 | 1.4 Project Restructure | 2 days | Critical | None |
+| 🔴 P0 | 1.5 Dev Tooling | 0.5 day | Critical | 1.4 |
+| 🔴 P0 | 1.3 PostgreSQL + Models | 2 days | Critical | 1.1, 1.4 |
+| 🟡 P1 | 3.1 OpenSearch Client | 2 days | High | 1.1, 1.4 |
+| 🟡 P1 | 3.2 Medical Index Mapping | 1 day | High | 3.1 |
+| 🟡 P1 | 4.1 Medical Text Chunker | 2 days | High | 3.1 |
+| 🟡 P1 | 4.2 Production Embeddings | 1 day | High | 4.1 |
+| 🟡 P1 | 4.3 Hybrid Search + RRF | 1 day | High | 3.1, 4.2 |
+| 🟡 P1 | 5.1 Ollama Client | 1 day | High | 1.4 |
+| 🟡 P1 | 5.2 Streaming Endpoints | 1 day | High | 5.1, 4.3 |
+| 🟡 P1 | 2.1 PDF Parser (Docling) | 1 day | High | 1.4 |
+| 🟡 P1 | 7.1 Agentic RAG Wrapper | 3 days | High | 5.2, 4.3 |
+| 🟡 P1 | 7.2 Medical Guardrails | 2 days | High | 7.1 |
+| 🟢 P2 | 2.3 Airflow Pipeline | 2 days | Medium | 1.1, 2.1, 4.1 |
+| 🟢 P2 | 5.3 Gradio Interface | 1 day | Medium | 5.2 |
+| 🟢 P2 | 6.1 Langfuse Tracing | 2 days | Medium | 1.1, 5.2 |
+| 🟢 P2 | 6.2 Redis Caching | 1 day | Medium | 1.1, 5.2 |
+| 🟢 P2 | 6.3 Health Dashboard | 0.5 day | Medium | 6.1 |
+| 🟢 P2 | 7.3 Telegram Bot | 2 days | Medium | 7.1, 6.2 |
+| 🟢 P2 | 7.4 Feedback Loop | 0.5 day | Medium | 6.1 |
+| 🔵 P3 | 2.2 Medical Sources | 2 days | Low | 2.1 |
+| 🔵 P3 | 8.1 HIPAA Patterns | 1 day | Low | 1.3 |
+| 🔵 P3 | 8.2 Safety Testing | 2 days | Low | 7.2 |
+| 🔵 P3 | 8.3 Evolution Integration | 2 days | Low | 6.1, 2.3 |
+| 🔵 P3 | 8.4 Multi-condition | 3 days | Low | 4.1 |
+**Estimated Total: ~40 days of focused work**
+---
+## Migration Strategy
+### Step 1: Foundation (Week 1-2 of work)
+1. Restructure project layout → Phase 1.4
+2. Create Pydantic Settings → Phase 1.2
+3. Set up Docker Compose → Phase 1.1
+4. Add PostgreSQL with models → Phase 1.3
+5. Add dev tooling → Phase 1.5
+### Step 2: Search Engine (Week 2-3)
+6. Create OpenSearch client + medical mapping → Phase 3.1, 3.2
+7. Build medical text chunker → Phase 4.1
+8. Add production embeddings (Jina) → Phase 4.2
+9. Implement hybrid search + RRF → Phase 4.3
+10. Upgrade PDF parser to Docling → Phase 2.1
+### Step 3: RAG Pipeline (Week 3-4)
+11. Create Ollama client → Phase 5.1
+12. Add streaming endpoints → Phase 5.2
+13. Build agentic RAG wrapper → Phase 7.1
+14. Add medical guardrails → Phase 7.2
+15. Create Gradio interface → Phase 5.3
+### Step 4: Production Hardening (Week 4-5)
+16. Add Langfuse observability → Phase 6.1
+17. Add Redis caching → Phase 6.2
+18. Set up Airflow pipeline → Phase 2.3
+19. Build Telegram bot → Phase 7.3
+20. Add feedback loop → Phase 7.4
+### Step 5: Polish (Week 5-6)
+21. Health dashboard → Phase 6.3
+22. Medical safety testing → Phase 8.2
+23. HIPAA patterns → Phase 8.1
+24. Evolution engine integration → Phase 8.3
+### Key Migration Rules
+- **Never break what works**: Keep all existing agents functional throughout
+- **Test at every step**: Run existing tests after each phase
+- **Incremental Docker**: Start with API + PostgreSQL, add services one at a time
+- **Feature flags**: Gate new features (Telegram, Langfuse, Redis) behind settings
+- **Backward compatibility**: Keep CLI chatbot working alongside new API
+---
+## Architecture Target State
+```
+┌─────────────────────────────────────────────────────────────────────────┐
+│                     Docker Compose Orchestration                         │
+│                                                                          │
+│  ┌──────────┐  ┌───────────┐  ┌───────────┐  ┌────────┐  ┌─────────┐  │
+│  │ FastAPI   │  │PostgreSQL │  │ OpenSearch │  │ Ollama │  │ Airflow │  │
+│  │ + Gradio  │  │ (reports, │  │ (hybrid   │  │ (local │  │ (daily  │  │
+│  │ (8000,    │  │  docs,    │  │  medical  │  │  LLM)  │  │ ingest) │  │
+│  │  7861)    │  │  history) │  │  search)  │  │        │  │         │  │
+│  └────┬─────┘  └─────┬─────┘  └─────┬─────┘  └───┬────┘  └────┬────┘  │
+│       │              │              │             │            │        │
+│  ┌────┴─────┐  ┌─────┴─────┐  ┌────┴────────────┴────────────┴──┐    │
+│  │  Redis   │  │ Langfuse  │  │        mediguard-network         │    │
+│  │ (cache)  │  │ (observe) │  └──────────────────────────────────┘    │
+│  └──────────┘  └───────────┘                                          │
+│                                                                          │
+│  ┌──────────────────────────────────────────────────────────────────┐  │
+│  │                    Agentic RAG Pipeline                            │  │
+│  │                                                                    │  │
+│  │  Query → [Guardrail] → [Retrieve] → [Grade] → [6 Medical Agents] │  │
+│  │              ↓              ↑          ↓              ↓            │  │
+│  │        [Out of Scope]  [Rewrite]  [Generate]  → Final Response    │  │
+│  │                                                                    │  │
+│  │  Agents: Biomarker Analyzer │ Disease Explainer │ Linker          │  │
+│  │          Clinical Guidelines │ Confidence │ Synthesizer           │  │
+│  └──────────────────────────────────────────────────────────────────┘  │
+│                                                                          │
+│  ┌──────────────┐  ┌──────────────┐  ┌──────────────────────────────┐  │
+│  │ Telegram Bot │  │  Gradio UI   │  │  5D Eval + SOP Evolution     │  │
+│  │ (mobile)     │  │  (desktop)   │  │  (self-improvement loop)     │  │
+│  └──────────────┘  └──────────────┘  └──────────────────────────────┘  │
+└─────────────────────────────────────────────────────────────────────────┘
+```
+---
+## Files to Create (Summary)
+| New File | Source of Inspiration |
+|----------|----------------------|
+| `docker-compose.yml` | Course `compose.yml` (adapted) |
+| `Dockerfile` | Course `Dockerfile` (multi-stage UV) |
+| `Makefile` | Course `Makefile` |
+| `pyproject.toml` | Course `pyproject.toml` |
+| `.pre-commit-config.yaml` | Course `.pre-commit-config.yaml` |
+| `.env.example` | Course `.env.example` |
+| `src/main.py` | Course `src/main.py` (lifespan pattern) |
+| `src/config.py` | Course `src/config.py` + existing SOP config |
+| `src/dependencies.py` | Course `src/dependencies.py` |
+| `src/exceptions.py` | Course `src/exceptions.py` (medical exceptions) |
+| `src/database.py` | Course `src/database.py` |
+| `src/db/*` | Course `src/db/*` |
+| `src/models/analysis.py` | New (medical domain) |
+| `src/models/document.py` | Course `src/models/paper.py` (adapted) |
+| `src/repositories/*` | Course `src/repositories/*` (adapted) |
+| `src/routers/ask.py` | Course `src/routers/ask.py` |
+| `src/routers/search.py` | Course `src/routers/hybrid_search.py` |
+| `src/routers/health.py` | Course `src/routers/ping.py` (enhanced) |
+| `src/schemas/*` | Course `src/schemas/*` (medical schemas) |
+| `src/services/opensearch/*` | Course `src/services/opensearch/*` |
+| `src/services/embeddings/*` | Course `src/services/embeddings/*` |
+| `src/services/ollama/*` | Course `src/services/ollama/*` |
+| `src/services/cache/*` | Course `src/services/cache/*` |
+| `src/services/langfuse/*` | Course `src/services/langfuse/*` |
+| `src/services/indexing/*` | Course `src/services/indexing/*` (medical chunks) |
+| `src/services/pdf_parser/*` | Course `src/services/pdf_parser/*` |
+| `src/services/telegram/*` | Course `src/services/telegram/*` |
+| `src/services/agents/agentic_rag.py` | Course (adapted for medical agents) |
+| `src/services/agents/nodes/*` | Course (medical guardrails) |
+| `src/services/agents/context.py` | Course |
+| `src/services/agents/prompts.py` | Course (medical prompts) |
+| `src/gradio_app.py` | Course `src/gradio_app.py` (medical UI) |
+| `airflow/dags/medical_ingestion.py` | Course `airflow/dags/arxiv_paper_ingestion.py` |
+## Files to Keep & Enhance
+| Existing File | Action |
+|---------------|--------|
+| `src/agents/biomarker_analyzer.py` | Keep, move to `src/services/agents/medical/` |
+| `src/agents/disease_explainer.py` | Keep, move, add OpenSearch retriever |
+| `src/agents/biomarker_linker.py` | Keep, move, add OpenSearch retriever |
+| `src/agents/clinical_guidelines.py` | Keep, move, add OpenSearch retriever |
+| `src/agents/confidence_assessor.py` | Keep, move |
+| `src/agents/response_synthesizer.py` | Keep, move |
+| `src/biomarker_validator.py` | Keep, move to `src/services/biomarker/` |
+| `src/biomarker_normalization.py` | Keep, move to `src/services/biomarker/` |
+| `src/evaluation/` | Keep, enhance with Langfuse integration |
+| `src/evolution/` | Keep, wire to production metrics |
+| `config/biomarker_references.json` | Keep as seed data, migrate to DB |
+| `scripts/chat.py` | Keep, update imports |
+| `tests/*` | Keep, add production test fixtures |
+---
+*This plan transforms MediGuard AI from a working prototype into a production-grade medical RAG system, applying every infrastructure lesson from the arXiv Paper Curator course while preserving and enhancing your unique medical domain logic.*

pytest.ini CHANGED Viewed

@@ -1,3 +1,4 @@
 [pytest]
 filterwarnings =
     ignore::langchain_core._api.deprecation.LangChainDeprecationWarning

 [pytest]
 filterwarnings =
     ignore::langchain_core._api.deprecation.LangChainDeprecationWarning
+    ignore:.*class.*HuggingFaceEmbeddings.*was deprecated.*:DeprecationWarning

requirements.txt CHANGED Viewed

@@ -30,3 +30,12 @@ python-dotenv>=1.0.0
 # Utilities
 numpy>=1.26.2
 matplotlib>=3.8.2

 # Utilities
 numpy>=1.26.2
 matplotlib>=3.8.2
+# Optional: improved readability scoring for evaluations
+textstat>=0.7.3
+# Optional: HuggingFace embedding provider
+# langchain-huggingface>=0.0.1
+# Optional: Ollama local LLM provider
+# langchain-ollama>=0.0.1

scripts/monitor_test.py CHANGED Viewed

@@ -1,6 +1,5 @@
 """Monitor evolution test progress"""
 import time
-import subprocess
 print("Monitoring evolution test... (Press Ctrl+C to stop)")
 print("=" * 70)

 """Monitor evolution test progress"""
 import time
 print("Monitoring evolution test... (Press Ctrl+C to stop)")
 print("=" * 70)

scripts/setup_embeddings.py CHANGED Viewed

@@ -52,19 +52,19 @@ def setup_google_api_key():
         updated = False
         for i, line in enumerate(lines):
             if line.startswith("GOOGLE_API_KEY="):
-                lines[i] = f'GOOGLE_API_KEY="{api_key}"\n'
                 updated = True
                 break
         if not updated:
-            lines.insert(0, f'GOOGLE_API_KEY="{api_key}"\n')
         with open(env_path, 'w') as f:
             f.writelines(lines)
     else:
         # Create new .env file
         with open(env_path, 'w') as f:
-            f.write(f'GOOGLE_API_KEY="{api_key}"\n')
     print("\nAPI key saved to .env file!")
     print("\n" + "="*70)

         updated = False
         for i, line in enumerate(lines):
             if line.startswith("GOOGLE_API_KEY="):
+                lines[i] = f'GOOGLE_API_KEY={api_key}\n'
                 updated = True
                 break
         if not updated:
+            lines.insert(0, f'GOOGLE_API_KEY={api_key}\n')
         with open(env_path, 'w') as f:
             f.writelines(lines)
     else:
         # Create new .env file
         with open(env_path, 'w') as f:
+            f.write(f'GOOGLE_API_KEY={api_key}\n')
     print("\nAPI key saved to .env file!")
     print("\n" + "="*70)

src/agents/biomarker_analyzer.py CHANGED Viewed

@@ -3,10 +3,6 @@ MediGuard AI RAG-Helper
 Biomarker Analyzer Agent - Validates biomarker values and flags anomalies
 """
-import sys
-from pathlib import Path
-sys.path.insert(0, str(Path(__file__).parent.parent.parent))
 from typing import Dict, List
 from src.state import GuildState, AgentOutput, BiomarkerFlag
 from src.biomarker_validator import BiomarkerValidator
@@ -36,7 +32,7 @@ class BiomarkerAnalyzerAgent:
         biomarkers = state['patient_biomarkers']
         patient_context = state.get('patient_context', {})
-        gender = patient_context.get('gender', 'male')
         predicted_disease = state['model_prediction']['disease']
         # Validate all biomarkers

 Biomarker Analyzer Agent - Validates biomarker values and flags anomalies
 """
 from typing import Dict, List
 from src.state import GuildState, AgentOutput, BiomarkerFlag
 from src.biomarker_validator import BiomarkerValidator
         biomarkers = state['patient_biomarkers']
         patient_context = state.get('patient_context', {})
+        gender = patient_context.get('gender')  # None if not provided — uses non-gender-specific ranges
         predicted_disease = state['model_prediction']['disease']
         # Validate all biomarkers

src/agents/biomarker_linker.py CHANGED Viewed

@@ -3,10 +3,6 @@ MediGuard AI RAG-Helper
 Biomarker-Disease Linker Agent - Connects biomarker values to predicted disease
 """
-import sys
-from pathlib import Path
-sys.path.insert(0, str(Path(__file__).parent.parent.parent))
 from typing import Dict, List
 from src.state import GuildState, AgentOutput, KeyDriver
 from src.llm_config import llm_config

 Biomarker-Disease Linker Agent - Connects biomarker values to predicted disease
 """
 from typing import Dict, List
 from src.state import GuildState, AgentOutput, KeyDriver
 from src.llm_config import llm_config

src/agents/clinical_guidelines.py CHANGED Viewed

@@ -3,10 +3,7 @@ MediGuard AI RAG-Helper
 Clinical Guidelines Agent - Retrieves evidence-based recommendations
 """
-import sys
 from pathlib import Path
-sys.path.insert(0, str(Path(__file__).parent.parent.parent))
 from typing import List
 from src.state import GuildState, AgentOutput
 from src.llm_config import llm_config

 Clinical Guidelines Agent - Retrieves evidence-based recommendations
 """
 from pathlib import Path
 from typing import List
 from src.state import GuildState, AgentOutput
 from src.llm_config import llm_config

src/agents/confidence_assessor.py CHANGED Viewed

@@ -3,10 +3,6 @@ MediGuard AI RAG-Helper
 Confidence Assessor Agent - Evaluates prediction reliability
 """
-import sys
-from pathlib import Path
-sys.path.insert(0, str(Path(__file__).parent.parent.parent))
 from typing import Any, Dict, List
 from src.state import GuildState, AgentOutput
 from src.biomarker_validator import BiomarkerValidator

 Confidence Assessor Agent - Evaluates prediction reliability
 """
 from typing import Any, Dict, List
 from src.state import GuildState, AgentOutput
 from src.biomarker_validator import BiomarkerValidator

src/agents/disease_explainer.py CHANGED Viewed

@@ -3,10 +3,7 @@ MediGuard AI RAG-Helper
 Disease Explainer Agent - Retrieves disease pathophysiology from medical PDFs
 """
-import sys
 from pathlib import Path
-sys.path.insert(0, str(Path(__file__).parent.parent.parent))
 from src.state import GuildState, AgentOutput
 from src.llm_config import llm_config
 from langchain_core.prompts import ChatPromptTemplate
@@ -43,9 +40,10 @@ class DiseaseExplainerAgent:
         disease = model_prediction['disease']
         confidence = model_prediction['confidence']
-        # Configure retrieval based on SOP (use copy to avoid mutating shared retriever)
         retrieval_k = state['sop'].disease_explainer_k
-        self.retriever.search_kwargs = {**self.retriever.search_kwargs, 'k': retrieval_k}
         # Retrieve relevant documents
         print(f"\nRetrieving information about: {disease}")
@@ -54,7 +52,11 @@ class DiseaseExplainerAgent:
         query = f"""What is {disease}? Explain the pathophysiology, diagnostic criteria,
         and clinical presentation. Focus on mechanisms relevant to blood biomarkers."""
-        docs = self.retriever.invoke(query)
         print(f"Retrieved {len(docs)} relevant document chunks")

 Disease Explainer Agent - Retrieves disease pathophysiology from medical PDFs
 """
 from pathlib import Path
 from src.state import GuildState, AgentOutput
 from src.llm_config import llm_config
 from langchain_core.prompts import ChatPromptTemplate
         disease = model_prediction['disease']
         confidence = model_prediction['confidence']
+        # Configure retrieval based on SOP — create a copy to avoid mutating shared retriever
         retrieval_k = state['sop'].disease_explainer_k
+        original_search_kwargs = dict(self.retriever.search_kwargs)
+        self.retriever.search_kwargs = {**original_search_kwargs, 'k': retrieval_k}
         # Retrieve relevant documents
         print(f"\nRetrieving information about: {disease}")
         query = f"""What is {disease}? Explain the pathophysiology, diagnostic criteria,
         and clinical presentation. Focus on mechanisms relevant to blood biomarkers."""
+        try:
+            docs = self.retriever.invoke(query)
+        finally:
+            # Restore original search_kwargs to avoid side effects
+            self.retriever.search_kwargs = original_search_kwargs
         print(f"Retrieved {len(docs)} relevant document chunks")

src/agents/response_synthesizer.py CHANGED Viewed

@@ -3,10 +3,6 @@ MediGuard AI RAG-Helper
 Response Synthesizer Agent - Compiles all findings into final structured JSON
 """
-import sys
-from pathlib import Path
-sys.path.insert(0, str(Path(__file__).parent.parent.parent))
 import json
 from typing import Dict, List, Any
 from src.state import GuildState

 Response Synthesizer Agent - Compiles all findings into final structured JSON
 """
 import json
 from typing import Dict, List, Any
 from src.state import GuildState

src/biomarker_normalization.py CHANGED Viewed

@@ -76,6 +76,47 @@ NORMALIZATION_MAP: Dict[str, str] = {
     # Kidney
     "creatinine": "Creatinine",
 }

     # Kidney
     "creatinine": "Creatinine",
+    # Thyroid
+    "tsh": "TSH",
+    "thyroidstimulatinghormone": "TSH",
+    "t3": "T3",
+    "triiodothyronine": "T3",
+    "t4": "T4",
+    "thyroxine": "T4",
+    # Electrolytes
+    "sodium": "Sodium",
+    "na": "Sodium",
+    "potassium": "Potassium",
+    "k": "Potassium",
+    "calcium": "Calcium",
+    "ca": "Calcium",
+    "chloride": "Chloride",
+    "cl": "Chloride",
+    "bicarbonate": "Bicarbonate",
+    "hco3": "Bicarbonate",
+    # Kidney / Metabolic
+    "urea": "Urea",
+    "bun": "BUN",
+    "bloodureanitrogen": "BUN",
+    "buncreatinineratio": "BUN_Creatinine_Ratio",
+    "uricacid": "Uric_Acid",
+    # Liver / Protein
+    "totalprotein": "Total_Protein",
+    "albumin": "Albumin",
+    "globulin": "Globulin",
+    "agratio": "AG_Ratio",
+    "albuminglobulinratio": "AG_Ratio",
+    "bilirubintotal": "Bilirubin_Total",
+    "bilirubin": "Bilirubin_Total",
+    "alp": "ALP",
+    "alkalinephosphatase": "ALP",
+    # Lipids
+    "vldl": "VLDL",
 }

src/biomarker_validator.py CHANGED Viewed

@@ -162,6 +162,14 @@ class BiomarkerValidator:
                 "Glucose", "HbA1c", "Insulin", "BMI",
                 "Triglycerides", "HDL Cholesterol", "LDL Cholesterol"
             ],
             "Anemia": [
                 "Hemoglobin", "Red Blood Cells", "Hematocrit",
                 "Mean Corpuscular Volume", "Mean Corpuscular Hemoglobin",

                 "Glucose", "HbA1c", "Insulin", "BMI",
                 "Triglycerides", "HDL Cholesterol", "LDL Cholesterol"
             ],
+            "Type 2 Diabetes": [
+                "Glucose", "HbA1c", "Insulin", "BMI",
+                "Triglycerides", "HDL Cholesterol", "LDL Cholesterol"
+            ],
+            "Type 1 Diabetes": [
+                "Glucose", "HbA1c", "Insulin", "BMI",
+                "Triglycerides", "HDL Cholesterol", "LDL Cholesterol"
+            ],
             "Anemia": [
                 "Hemoglobin", "Red Blood Cells", "Hematocrit",
                 "Mean Corpuscular Volume", "Mean Corpuscular Hemoglobin",

src/evaluation/evaluators.py CHANGED Viewed

@@ -5,6 +5,7 @@ MediGuard AI RAG-Helper - Evaluation System
 from pydantic import BaseModel, Field
 from typing import Dict, Any, List
 from langchain_core.prompts import ChatPromptTemplate
 from src.llm_config import get_chat_model
@@ -93,14 +94,13 @@ Respond ONLY with valid JSON in this format:
     })
     # Parse JSON response
-    import json
     try:
         content = result.content if isinstance(result.content, str) else str(result.content)
         parsed = json.loads(content)
         return GradedScore(score=parsed['score'], reasoning=parsed['reasoning'])
     except (json.JSONDecodeError, KeyError, TypeError):
-        # Fallback if JSON parsing fails
-        return GradedScore(score=0.85, reasoning="Medical interpretations appear accurate and evidence-based.")
 # Evaluator 2: Evidence Grounding (Programmatic + LLM)
@@ -192,13 +192,12 @@ Respond ONLY with valid JSON in this format:
     })
     # Parse JSON response
-    import json
     try:
         parsed = json.loads(result.content if isinstance(result.content, str) else str(result.content))
         return GradedScore(score=parsed['score'], reasoning=parsed['reasoning'])
     except (json.JSONDecodeError, KeyError, TypeError):
-        # Fallback if JSON parsing fails
-        return GradedScore(score=0.90, reasoning="Recommendations are clear, actionable, and appropriately prioritized.")
 # Evaluator 4: Explainability Clarity (Programmatic)

 from pydantic import BaseModel, Field
 from typing import Dict, Any, List
+import json
 from langchain_core.prompts import ChatPromptTemplate
 from src.llm_config import get_chat_model
     })
     # Parse JSON response
     try:
         content = result.content if isinstance(result.content, str) else str(result.content)
         parsed = json.loads(content)
         return GradedScore(score=parsed['score'], reasoning=parsed['reasoning'])
     except (json.JSONDecodeError, KeyError, TypeError):
+        # Fallback if JSON parsing fails — use a conservative score to avoid inflating metrics
+        return GradedScore(score=0.5, reasoning="Unable to parse LLM evaluation response; defaulting to neutral score.")
 # Evaluator 2: Evidence Grounding (Programmatic + LLM)
     })
     # Parse JSON response
     try:
         parsed = json.loads(result.content if isinstance(result.content, str) else str(result.content))
         return GradedScore(score=parsed['score'], reasoning=parsed['reasoning'])
     except (json.JSONDecodeError, KeyError, TypeError):
+        # Fallback if JSON parsing fails — use a conservative score to avoid inflating metrics
+        return GradedScore(score=0.5, reasoning="Unable to parse LLM evaluation response; defaulting to neutral score.")
 # Evaluator 4: Explainability Clarity (Programmatic)

src/evolution/director.py CHANGED Viewed

@@ -399,11 +399,20 @@ def run_evolution_cycle(
         # Run workflow with mutated SOP
         from src.state import PatientInput
         graph_input = {
             "patient_biomarkers": patient_input.biomarkers,
             "model_prediction": patient_input.model_prediction,
             "patient_context": patient_input.patient_context,
-            "sop": mutant_sop
         }
         try:

         # Run workflow with mutated SOP
         from src.state import PatientInput
+        from datetime import datetime
         graph_input = {
             "patient_biomarkers": patient_input.biomarkers,
             "model_prediction": patient_input.model_prediction,
             "patient_context": patient_input.patient_context,
+            "plan": None,
+            "sop": mutant_sop,
+            "agent_outputs": [],
+            "biomarker_flags": [],
+            "safety_alerts": [],
+            "biomarker_analysis": None,
+            "final_response": None,
+            "processing_timestamp": datetime.now().isoformat(),
+            "sop_version": description
         }
         try:

src/llm_config.py CHANGED Viewed

@@ -9,6 +9,7 @@ Supports multiple providers:
 """
 import os
 from typing import Literal, Optional
 from dotenv import load_dotenv
@@ -23,8 +24,8 @@ DEFAULT_LLM_PROVIDER = os.getenv("LLM_PROVIDER", "groq")
 def get_chat_model(
-    provider: Literal["groq", "gemini", "ollama"] = None,
-    model: str = None,
     temperature: float = 0.0,
     json_mode: bool = False
 ):
@@ -100,7 +101,7 @@ def get_chat_model(
         raise ValueError(f"Unknown provider: {provider}. Use 'groq', 'gemini', or 'ollama'")
-def get_embedding_model(provider: Literal["google", "huggingface", "ollama"] = None):
     """
     Get embedding model for vector search.
@@ -155,7 +156,7 @@ def get_embedding_model(provider: Literal["google", "huggingface", "ollama"] = N
 class LLMConfig:
     """Central configuration for all LLM models"""
-    def __init__(self, provider: str = None, lazy: bool = True):
         """
         Initialize all model clients.
@@ -166,6 +167,7 @@ class LLMConfig:
         self.provider = provider or DEFAULT_LLM_PROVIDER
         self._lazy = lazy
         self._initialized = False
         # Lazy-initialized model instances
         self._planner = None
@@ -184,7 +186,12 @@ class LLMConfig:
         if self._initialized:
             return
-        print(f"Initializing LLM models with provider: {self.provider.upper()}")
         # Fast model for structured tasks (planning, analysis)
         self._planner = get_chat_model(
@@ -263,7 +270,7 @@ class LLMConfig:
         self._initialize_models()
         return self._embedding_model
-    def get_synthesizer(self, model_name: str = None):
         """Get synthesizer model (for backward compatibility)"""
         if model_name:
             return get_chat_model(provider=self.provider, model=model_name, temperature=0.2)

 """
 import os
+import threading
 from typing import Literal, Optional
 from dotenv import load_dotenv
 def get_chat_model(
+    provider: Optional[Literal["groq", "gemini", "ollama"]] = None,
+    model: Optional[str] = None,
     temperature: float = 0.0,
     json_mode: bool = False
 ):
         raise ValueError(f"Unknown provider: {provider}. Use 'groq', 'gemini', or 'ollama'")
+def get_embedding_model(provider: Optional[Literal["google", "huggingface", "ollama"]] = None):
     """
     Get embedding model for vector search.
 class LLMConfig:
     """Central configuration for all LLM models"""
+    def __init__(self, provider: Optional[str] = None, lazy: bool = True):
         """
         Initialize all model clients.
         self.provider = provider or DEFAULT_LLM_PROVIDER
         self._lazy = lazy
         self._initialized = False
+        self._lock = threading.Lock()
         # Lazy-initialized model instances
         self._planner = None
         if self._initialized:
             return
+        with self._lock:
+            # Double-checked locking
+            if self._initialized:
+                return
+            print(f"Initializing LLM models with provider: {self.provider.upper()}")
         # Fast model for structured tasks (planning, analysis)
         self._planner = get_chat_model(
         self._initialize_models()
         return self._embedding_model
+    def get_synthesizer(self, model_name: Optional[str] = None):
         """Get synthesizer model (for backward compatibility)"""
         if model_name:
             return get_chat_model(provider=self.provider, model=model_name, temperature=0.2)

src/pdf_processor.py CHANGED Viewed

@@ -6,7 +6,7 @@ PDF document processing and vector store creation
 import os
 import warnings
 from pathlib import Path
-from typing import List, Optional, Literal
 from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
 from langchain_text_splitters import RecursiveCharacterTextSplitter
 from langchain_community.vectorstores import FAISS
@@ -21,62 +21,8 @@ os.environ.setdefault("HF_HUB_DISABLE_IMPLICIT_TOKEN", "1")
 # Load environment variables
 load_dotenv()
-def get_embedding_model(provider: Literal["google", "huggingface", "ollama"] = None):
-    """
-    Get embedding model with automatic fallback.
-    Args:
-        provider: "google" (FREE, recommended), "huggingface" (local), or "ollama" (local)
-    Returns:
-        Embedding model instance
-    """
-    provider = provider or os.getenv("EMBEDDING_PROVIDER", "google")
-    if provider == "google":
-        from langchain_google_genai import GoogleGenerativeAIEmbeddings
-        api_key = os.getenv("GOOGLE_API_KEY")
-        if not api_key:
-            print("WARN: GOOGLE_API_KEY not found in .env file")
-            print("INFO: Get FREE API key: https://aistudio.google.com/app/apikey")
-            print("INFO: Falling back to HuggingFace local embeddings...\n")
-            return get_embedding_model("huggingface")
-        try:
-            print("INFO: Using Google Gemini embeddings (FREE, fast)")
-            return GoogleGenerativeAIEmbeddings(
-                model="models/text-embedding-004",
-                google_api_key=api_key
-            )
-        except Exception as e:
-            print(f"WARN: Google embeddings failed: {e}")
-            print("INFO: Falling back to HuggingFace local embeddings...\n")
-            return get_embedding_model("huggingface")
-    elif provider == "huggingface":
-        try:
-            from langchain_huggingface import HuggingFaceEmbeddings
-        except ImportError:
-            from langchain_community.embeddings import HuggingFaceEmbeddings
-        print("INFO: Using HuggingFace local embeddings (free, offline)")
-        return HuggingFaceEmbeddings(
-            model_name="sentence-transformers/all-MiniLM-L6-v2"
-        )
-    elif provider == "ollama":
-        try:
-            from langchain_ollama import OllamaEmbeddings
-        except ImportError:
-            from langchain_community.embeddings import OllamaEmbeddings
-        print("INFO: Using local Ollama embeddings (requires Ollama running)")
-        return OllamaEmbeddings(model="nomic-embed-text")
-    else:
-        raise ValueError(f"Unknown provider: {provider}. Use 'google', 'huggingface', or 'ollama'")
 class PDFProcessor:
@@ -170,6 +116,10 @@ class PDFProcessor:
         chunks = self.text_splitter.split_documents(documents)
         # Add chunk index to metadata
         for i, chunk in enumerate(chunks):
             chunk.metadata['chunk_id'] = i
@@ -236,6 +186,9 @@ class PDFProcessor:
             return None
         try:
             vector_store = FAISS.load_local(
                 str(self.vector_store_path),
                 embedding_model,

 import os
 import warnings
 from pathlib import Path
+from typing import List, Optional
 from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
 from langchain_text_splitters import RecursiveCharacterTextSplitter
 from langchain_community.vectorstores import FAISS
 # Load environment variables
 load_dotenv()
+# Re-export for backward compatibility
+from src.llm_config import get_embedding_model  # noqa: F401
 class PDFProcessor:
         chunks = self.text_splitter.split_documents(documents)
+        if not chunks:
+            print("WARN: No chunks generated from documents")
+            return chunks
         # Add chunk index to metadata
         for i, chunk in enumerate(chunks):
             chunk.metadata['chunk_id'] = i
             return None
         try:
+            # SECURITY NOTE: allow_dangerous_deserialization=True uses pickle.
+            # Only load vector stores from trusted, locally-built sources.
+            # Never load .faiss/.pkl files from untrusted origins.
             vector_store = FAISS.load_local(
                 str(self.vector_store_path),
                 embedding_model,

src/workflow.py CHANGED Viewed

@@ -3,10 +3,6 @@ MediGuard AI RAG-Helper
 Main LangGraph Workflow - Clinical Insight Guild Orchestration
 """
-import sys
-from pathlib import Path
-sys.path.insert(0, str(Path(__file__).parent.parent))
 from langgraph.graph import StateGraph, END
 from src.state import GuildState
 from src.pdf_processor import get_all_retrievers

 Main LangGraph Workflow - Clinical Insight Guild Orchestration
 """
 from langgraph.graph import StateGraph, END
 from src.state import GuildState
 from src.pdf_processor import get_all_retrievers

tests/test_evaluation_system.py CHANGED Viewed

@@ -89,27 +89,27 @@ def test_evaluation_system():
         AgentOutput(
             agent_name="Disease Explainer",
             findings=disease_explainer_context,
-            citations=["diabetes.pdf", "MediGuard_Diabetes_Guidelines_Extensive.pdf"]
         ),
         AgentOutput(
             agent_name="Biomarker Analyzer",
             findings="Analyzed 25 biomarkers. Found 19 out of range, 3 critical values.",
-            citations=[]
         ),
         AgentOutput(
             agent_name="Biomarker-Disease Linker",
             findings="Glucose and HbA1c are primary drivers for Type 2 Diabetes prediction.",
-            citations=["diabetes.pdf"]
         ),
         AgentOutput(
             agent_name="Clinical Guidelines",
             findings="Recommend immediate medical consultation, lifestyle modifications.",
-            citations=["diabetes.pdf"]
         ),
         AgentOutput(
             agent_name="Confidence Assessor",
             findings="High confidence prediction (87%) based on strong biomarker evidence.",
-            citations=[]
         )
     ]

         AgentOutput(
             agent_name="Disease Explainer",
             findings=disease_explainer_context,
+            metadata={"citations": ["diabetes.pdf", "MediGuard_Diabetes_Guidelines_Extensive.pdf"]}
         ),
         AgentOutput(
             agent_name="Biomarker Analyzer",
             findings="Analyzed 25 biomarkers. Found 19 out of range, 3 critical values.",
+            metadata={"citations": []}
         ),
         AgentOutput(
             agent_name="Biomarker-Disease Linker",
             findings="Glucose and HbA1c are primary drivers for Type 2 Diabetes prediction.",
+            metadata={"citations": ["diabetes.pdf"]}
         ),
         AgentOutput(
             agent_name="Clinical Guidelines",
             findings="Recommend immediate medical consultation, lifestyle modifications.",
+            metadata={"citations": ["diabetes.pdf"]}
         ),
         AgentOutput(
             agent_name="Confidence Assessor",
             findings="High confidence prediction (87%) based on strong biomarker evidence.",
+            metadata={"citations": []}
         )
     ]