Spaces:

T0X1N
/

Agentic-RagBot

Sleeping

T0X1N commited on Mar 13

Commit

9659593

1 Parent(s): 2ac1524

chore: codebase audit and fixes (ruff, mypy, pytest)

- Fixed pytest collection hang by adding root conftest.py and testpaths in pytest.ini
- Fixed mypy missing keys for GuildState TypedDict definitions
- Fixed ruff whitespace formatting and unhandled B904 exceptions
- Added skip markers for unmocked LLM API calls in integration tests
- Removed redundant debug script logging and trace files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

airflow/dags/ingest_pdfs.py +5 -1
alembic/env.py +1 -3
alembic/versions/001_initial.py +52 -51
api/app/__init__.py +1 -0
api/app/main.py +13 -24
api/app/routes/analyze.py +34 -45
api/app/routes/biomarkers.py +8 -22
api/app/routes/health.py +3 -3
api/app/services/extraction.py +12 -19
api/app/services/ragbot.py +19 -25
archive/evolution/__init__.py +11 -11
archive/evolution/director.py +79 -101
archive/evolution/pareto.py +37 -32
archive/tests/test_evolution_loop.py +32 -52
archive/tests/test_evolution_quick.py +2 -7
conftest.py +1 -0
huggingface/app.py +57 -60
pytest.ini +2 -0
scripts/chat.py +46 -51
scripts/monitor_test.py +2 -1
scripts/setup_embeddings.py +13 -13
scripts/test_chat_demo.py +6 -6
scripts/test_extraction.py +5 -5
src/agents/biomarker_analyzer.py +18 -24
src/agents/biomarker_linker.py +33 -68
src/agents/clinical_guidelines.py +80 -88
src/agents/confidence_assessor.py +36 -79
src/agents/disease_explainer.py +60 -62
src/agents/response_synthesizer.py +84 -79
src/biomarker_normalization.py +0 -14
src/biomarker_validator.py +76 -67
src/config.py +14 -31
src/database.py +1 -0
src/evaluation/__init__.py +8 -8
src/evaluation/evaluators.py +106 -110
src/exceptions.py +12 -0
src/gradio_app.py +7 -11
src/llm_config.py +21 -53
src/main.py +11 -0
src/middlewares.py +19 -5
src/pdf_processor.py +35 -61
src/repositories/analysis.py +2 -11
src/repositories/document.py +2 -11
src/routers/analyze.py +8 -22
src/routers/ask.py +7 -12
src/routers/health.py +12 -2
src/schemas/schemas.py +15 -7
src/services/agents/context.py +6 -6
src/services/agents/nodes/retrieve_node.py +2 -8
src/services/agents/state.py +4 -4

airflow/dags/ingest_pdfs.py CHANGED Viewed

@@ -38,7 +38,11 @@ def _ingest_pdfs(**kwargs):
     parser = make_pdf_parser_service()
     embedding_svc = make_embedding_service()
     os_client = make_opensearch_client()
-    chunker = MedicalTextChunker(target_words=settings.chunking.chunk_size, overlap_words=settings.chunking.chunk_overlap, min_words=settings.chunking.min_chunk_size)
     indexing_svc = IndexingService(chunker, embedding_svc, os_client)
     docs = parser.parse_directory(pdf_dir)

     parser = make_pdf_parser_service()
     embedding_svc = make_embedding_service()
     os_client = make_opensearch_client()
+    chunker = MedicalTextChunker(
+        target_words=settings.chunking.chunk_size,
+        overlap_words=settings.chunking.chunk_overlap,
+        min_words=settings.chunking.min_chunk_size,
+    )
     indexing_svc = IndexingService(chunker, embedding_svc, os_client)
     docs = parser.parse_directory(pdf_dir)

alembic/env.py CHANGED Viewed

@@ -79,9 +79,7 @@ def run_migrations_online() -> None:
     )
     with connectable.connect() as connection:
-        context.configure(
-            connection=connection, target_metadata=target_metadata
-        )
         with context.begin_transaction():
             context.run_migrations()

     )
     with connectable.connect() as connection:
+        context.configure(connection=connection, target_metadata=target_metadata)
         with context.begin_transaction():
             context.run_migrations()

alembic/versions/001_initial.py CHANGED Viewed

@@ -1,16 +1,17 @@
 """initial_tables
 Revision ID: 001
-Revises:
 Create Date: 2026-02-24 20:58:00.000000
 """
 import sqlalchemy as sa
 from alembic import op
 # revision identifiers, used by Alembic.
-revision = '001'
 down_revision = None
 branch_labels = None
 depends_on = None
@@ -18,64 +19,64 @@ depends_on = None
 def upgrade() -> None:
     op.create_table(
-        'patient_analyses',
-        sa.Column('id', sa.String(length=36), nullable=False),
-        sa.Column('request_id', sa.String(length=64), nullable=False),
-        sa.Column('biomarkers', sa.JSON(), nullable=False),
-        sa.Column('patient_context', sa.JSON(), nullable=True),
-        sa.Column('predicted_disease', sa.String(length=128), nullable=False),
-        sa.Column('confidence', sa.Float(), nullable=False),
-        sa.Column('probabilities', sa.JSON(), nullable=True),
-        sa.Column('analysis_result', sa.JSON(), nullable=True),
-        sa.Column('safety_alerts', sa.JSON(), nullable=True),
-        sa.Column('sop_version', sa.String(length=64), nullable=True),
-        sa.Column('processing_time_ms', sa.Float(), nullable=False),
-        sa.Column('model_provider', sa.String(length=32), nullable=True),
-        sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=False),
-        sa.PrimaryKeyConstraint('id')
     )
-    op.create_index(op.f('ix_patient_analyses_request_id'), 'patient_analyses', ['request_id'], unique=True)
     op.create_table(
-        'medical_documents',
-        sa.Column('id', sa.String(length=36), nullable=False),
-        sa.Column('title', sa.String(length=512), nullable=False),
-        sa.Column('source', sa.String(length=512), nullable=False),
-        sa.Column('source_type', sa.String(length=32), nullable=False),
-        sa.Column('authors', sa.Text(), nullable=True),
-        sa.Column('abstract', sa.Text(), nullable=True),
-        sa.Column('content_hash', sa.String(length=64), nullable=True),
-        sa.Column('page_count', sa.Integer(), nullable=True),
-        sa.Column('chunk_count', sa.Integer(), nullable=True),
-        sa.Column('parse_status', sa.String(length=32), nullable=False),
-        sa.Column('metadata_json', sa.JSON(), nullable=True),
-        sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=False),
-        sa.Column('indexed_at', sa.DateTime(timezone=True), nullable=True),
-        sa.PrimaryKeyConstraint('id'),
-        sa.UniqueConstraint('content_hash')
     )
-    op.create_index(op.f('ix_medical_documents_title'), 'medical_documents', ['title'], unique=False)
     op.create_table(
-        'sop_versions',
-        sa.Column('id', sa.String(length=36), nullable=False),
-        sa.Column('version_tag', sa.String(length=64), nullable=False),
-        sa.Column('parameters', sa.JSON(), nullable=False),
-        sa.Column('evaluation_scores', sa.JSON(), nullable=True),
-        sa.Column('parent_version', sa.String(length=64), nullable=True),
-        sa.Column('is_active', sa.Boolean(), nullable=False),
-        sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=False),
-        sa.PrimaryKeyConstraint('id')
     )
-    op.create_index(op.f('ix_sop_versions_version_tag'), 'sop_versions', ['version_tag'], unique=True)
 def downgrade() -> None:
-    op.drop_index(op.f('ix_sop_versions_version_tag'), table_name='sop_versions')
-    op.drop_table('sop_versions')
-    op.drop_index(op.f('ix_medical_documents_title'), table_name='medical_documents')
-    op.drop_table('medical_documents')
-    op.drop_index(op.f('ix_patient_analyses_request_id'), table_name='patient_analyses')
-    op.drop_table('patient_analyses')

 """initial_tables
 Revision ID: 001
+Revises:
 Create Date: 2026-02-24 20:58:00.000000
 """
 import sqlalchemy as sa
 from alembic import op
 # revision identifiers, used by Alembic.
+revision = "001"
 down_revision = None
 branch_labels = None
 depends_on = None
 def upgrade() -> None:
     op.create_table(
+        "patient_analyses",
+        sa.Column("id", sa.String(length=36), nullable=False),
+        sa.Column("request_id", sa.String(length=64), nullable=False),
+        sa.Column("biomarkers", sa.JSON(), nullable=False),
+        sa.Column("patient_context", sa.JSON(), nullable=True),
+        sa.Column("predicted_disease", sa.String(length=128), nullable=False),
+        sa.Column("confidence", sa.Float(), nullable=False),
+        sa.Column("probabilities", sa.JSON(), nullable=True),
+        sa.Column("analysis_result", sa.JSON(), nullable=True),
+        sa.Column("safety_alerts", sa.JSON(), nullable=True),
+        sa.Column("sop_version", sa.String(length=64), nullable=True),
+        sa.Column("processing_time_ms", sa.Float(), nullable=False),
+        sa.Column("model_provider", sa.String(length=32), nullable=True),
+        sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
+        sa.PrimaryKeyConstraint("id"),
     )
+    op.create_index(op.f("ix_patient_analyses_request_id"), "patient_analyses", ["request_id"], unique=True)
     op.create_table(
+        "medical_documents",
+        sa.Column("id", sa.String(length=36), nullable=False),
+        sa.Column("title", sa.String(length=512), nullable=False),
+        sa.Column("source", sa.String(length=512), nullable=False),
+        sa.Column("source_type", sa.String(length=32), nullable=False),
+        sa.Column("authors", sa.Text(), nullable=True),
+        sa.Column("abstract", sa.Text(), nullable=True),
+        sa.Column("content_hash", sa.String(length=64), nullable=True),
+        sa.Column("page_count", sa.Integer(), nullable=True),
+        sa.Column("chunk_count", sa.Integer(), nullable=True),
+        sa.Column("parse_status", sa.String(length=32), nullable=False),
+        sa.Column("metadata_json", sa.JSON(), nullable=True),
+        sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
+        sa.Column("indexed_at", sa.DateTime(timezone=True), nullable=True),
+        sa.PrimaryKeyConstraint("id"),
+        sa.UniqueConstraint("content_hash"),
     )
+    op.create_index(op.f("ix_medical_documents_title"), "medical_documents", ["title"], unique=False)
     op.create_table(
+        "sop_versions",
+        sa.Column("id", sa.String(length=36), nullable=False),
+        sa.Column("version_tag", sa.String(length=64), nullable=False),
+        sa.Column("parameters", sa.JSON(), nullable=False),
+        sa.Column("evaluation_scores", sa.JSON(), nullable=True),
+        sa.Column("parent_version", sa.String(length=64), nullable=True),
+        sa.Column("is_active", sa.Boolean(), nullable=False),
+        sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
+        sa.PrimaryKeyConstraint("id"),
     )
+    op.create_index(op.f("ix_sop_versions_version_tag"), "sop_versions", ["version_tag"], unique=True)
 def downgrade() -> None:
+    op.drop_index(op.f("ix_sop_versions_version_tag"), table_name="sop_versions")
+    op.drop_table("sop_versions")
+    op.drop_index(op.f("ix_medical_documents_title"), table_name="medical_documents")
+    op.drop_table("medical_documents")
+    op.drop_index(op.f("ix_patient_analyses_request_id"), table_name="patient_analyses")
+    op.drop_table("patient_analyses")

api/app/__init__.py CHANGED Viewed

@@ -1,4 +1,5 @@
 """
 RagBot FastAPI Application
 """
 __version__ = "1.0.0"

 """
 RagBot FastAPI Application
 """
 __version__ = "1.0.0"

api/app/main.py CHANGED Viewed

@@ -17,10 +17,7 @@ from app.routes import analyze, biomarkers, health
 from app.services.ragbot import get_ragbot_service
 # Configure logging
-logging.basicConfig(
-    level=logging.INFO,
-    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
-)
 logger = logging.getLogger(__name__)
@@ -28,6 +25,7 @@ logger = logging.getLogger(__name__)
 # LIFESPAN EVENTS
 # ============================================================================
 @asynccontextmanager
 async def lifespan(app: FastAPI):
     """
@@ -67,7 +65,7 @@ app = FastAPI(
     lifespan=lifespan,
     docs_url="/docs",
     redoc_url="/redoc",
-    openapi_url="/openapi.json"
 )
@@ -90,6 +88,7 @@ app.add_middleware(
 # ERROR HANDLERS
 # ============================================================================
 @app.exception_handler(RequestValidationError)
 async def validation_exception_handler(request: Request, exc: RequestValidationError):
     """Handle request validation errors"""
@@ -100,8 +99,8 @@ async def validation_exception_handler(request: Request, exc: RequestValidationE
             "error_code": "VALIDATION_ERROR",
             "message": "Request validation failed",
             "details": exc.errors(),
-            "body": exc.body
-        }
     )
@@ -114,8 +113,8 @@ async def general_exception_handler(request: Request, exc: Exception):
         content={
             "status": "error",
             "error_code": "INTERNAL_SERVER_ERROR",
-            "message": "An unexpected error occurred. Please try again later."
-        }
     )
@@ -144,13 +143,9 @@ async def root():
             "analyze_structured": "/api/v1/analyze/structured",
             "example": "/api/v1/example",
             "docs": "/docs",
-            "redoc": "/redoc"
-        },
-        "documentation": {
-            "swagger_ui": "/docs",
             "redoc": "/redoc",
-            "openapi_schema": "/openapi.json"
-        }
     }
@@ -164,8 +159,8 @@ async def api_v1_info():
             "GET /api/v1/biomarkers",
             "POST /api/v1/analyze/natural",
             "POST /api/v1/analyze/structured",
-            "GET /api/v1/example"
-        ]
     }
@@ -183,10 +178,4 @@ if __name__ == "__main__":
     logger.info(f"Starting server on {host}:{port}")
-    uvicorn.run(
-        "app.main:app",
-        host=host,
-        port=port,
-        reload=reload,
-        log_level="info"
-    )

 from app.services.ragbot import get_ragbot_service
 # Configure logging
+logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
 logger = logging.getLogger(__name__)
 # LIFESPAN EVENTS
 # ============================================================================
 @asynccontextmanager
 async def lifespan(app: FastAPI):
     """
     lifespan=lifespan,
     docs_url="/docs",
     redoc_url="/redoc",
+    openapi_url="/openapi.json",
 )
 # ERROR HANDLERS
 # ============================================================================
 @app.exception_handler(RequestValidationError)
 async def validation_exception_handler(request: Request, exc: RequestValidationError):
     """Handle request validation errors"""
             "error_code": "VALIDATION_ERROR",
             "message": "Request validation failed",
             "details": exc.errors(),
+            "body": exc.body,
+        },
     )
         content={
             "status": "error",
             "error_code": "INTERNAL_SERVER_ERROR",
+            "message": "An unexpected error occurred. Please try again later.",
+        },
     )
             "analyze_structured": "/api/v1/analyze/structured",
             "example": "/api/v1/example",
             "docs": "/docs",
             "redoc": "/redoc",
+        },
+        "documentation": {"swagger_ui": "/docs", "redoc": "/redoc", "openapi_schema": "/openapi.json"},
     }
             "GET /api/v1/biomarkers",
             "POST /api/v1/analyze/natural",
             "POST /api/v1/analyze/structured",
+            "GET /api/v1/example",
+        ],
     }
     logger.info(f"Starting server on {host}:{port}")
+    uvicorn.run("app.main:app", host=host, port=port, reload=reload, log_level="info")

api/app/routes/analyze.py CHANGED Viewed

@@ -18,13 +18,13 @@ router = APIRouter(prefix="/api/v1", tags=["analysis"])
 async def analyze_natural(request: NaturalAnalysisRequest):
     """
     Analyze biomarkers from natural language input.
     **Flow:**
     1. Extract biomarkers from natural language using LLM
     2. Predict disease using rule-based or ML model
     3. Run complete RAG workflow analysis
     4. Return comprehensive results
     **Example request:**
     ```json
     {
@@ -36,7 +36,7 @@ async def analyze_natural(request: NaturalAnalysisRequest):
       }
     }
     ```
     Returns full detailed analysis with all agent outputs, citations, recommendations.
     """
@@ -46,15 +46,12 @@ async def analyze_natural(request: NaturalAnalysisRequest):
     if not ragbot_service.is_ready():
         raise HTTPException(
             status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
-            detail="RagBot service not initialized. Please try again in a moment."
         )
     # Extract biomarkers from natural language
     ollama_base_url = os.getenv("OLLAMA_BASE_URL", "http://localhost:11434")
-    biomarkers, extracted_context, error = extract_biomarkers(
-        request.message,
-        ollama_base_url=ollama_base_url
-    )
     if error:
         raise HTTPException(
@@ -63,8 +60,8 @@ async def analyze_natural(request: NaturalAnalysisRequest):
                 "error_code": "EXTRACTION_FAILED",
                 "message": error,
                 "input_received": request.message[:100],
-                "suggestion": "Try: 'My glucose is 140 and HbA1c is 7.5'"
-            }
         )
     if not biomarkers:
@@ -74,8 +71,8 @@ async def analyze_natural(request: NaturalAnalysisRequest):
                 "error_code": "NO_BIOMARKERS_FOUND",
                 "message": "Could not extract any biomarkers from your message",
                 "input_received": request.message[:100],
-                "suggestion": "Include specific biomarker values like 'glucose is 140'"
-            }
         )
     # Merge extracted context with request context
@@ -91,7 +88,7 @@ async def analyze_natural(request: NaturalAnalysisRequest):
             biomarkers=biomarkers,
             patient_context=patient_context,
             model_prediction=model_prediction,
-            extracted_biomarkers=biomarkers  # Keep original extraction
         )
         return response
@@ -102,22 +99,22 @@ async def analyze_natural(request: NaturalAnalysisRequest):
             detail={
                 "error_code": "ANALYSIS_FAILED",
                 "message": f"Analysis workflow failed: {e!s}",
-                "biomarkers_received": biomarkers
-            }
-        )
 @router.post("/analyze/structured", response_model=AnalysisResponse)
 async def analyze_structured(request: StructuredAnalysisRequest):
     """
     Analyze biomarkers from structured input (skip extraction).
     **Flow:**
     1. Use provided biomarker dictionary directly
     2. Predict disease using rule-based or ML model
     3. Run complete RAG workflow analysis
     4. Return comprehensive results
     **Example request:**
     ```json
     {
@@ -135,7 +132,7 @@ async def analyze_structured(request: StructuredAnalysisRequest):
       }
     }
     ```
     Use this endpoint when you already have structured biomarker data.
     Returns full detailed analysis with all agent outputs, citations, recommendations.
     """
@@ -146,7 +143,7 @@ async def analyze_structured(request: StructuredAnalysisRequest):
     if not ragbot_service.is_ready():
         raise HTTPException(
             status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
-            detail="RagBot service not initialized. Please try again in a moment."
         )
     # Validate biomarkers
@@ -156,8 +153,8 @@ async def analyze_structured(request: StructuredAnalysisRequest):
             detail={
                 "error_code": "NO_BIOMARKERS",
                 "message": "Biomarkers dictionary cannot be empty",
-                "suggestion": "Provide at least one biomarker with a numeric value"
-            }
         )
     # Patient context
@@ -172,7 +169,7 @@ async def analyze_structured(request: StructuredAnalysisRequest):
             biomarkers=request.biomarkers,
             patient_context=patient_context,
             model_prediction=model_prediction,
-            extracted_biomarkers=None  # No extraction for structured input
         )
         return response
@@ -183,26 +180,26 @@ async def analyze_structured(request: StructuredAnalysisRequest):
             detail={
                 "error_code": "ANALYSIS_FAILED",
                 "message": f"Analysis workflow failed: {e!s}",
-                "biomarkers_received": request.biomarkers
-            }
-        )
 @router.get("/example", response_model=AnalysisResponse)
 async def get_example():
     """
     Get example diabetes case analysis.
     **Pre-run example case:**
     - 52-year-old male patient
     - Elevated glucose and HbA1c
     - Type 2 Diabetes prediction
     Useful for:
     - Testing API integration
     - Understanding response format
     - Demo purposes
     Same as CLI chatbot 'example' command.
     """
@@ -212,7 +209,7 @@ async def get_example():
     if not ragbot_service.is_ready():
         raise HTTPException(
             status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
-            detail="RagBot service not initialized. Please try again in a moment."
         )
     # Example biomarkers (Type 2 Diabetes patient)
@@ -227,15 +224,10 @@ async def get_example():
         "LDL Cholesterol": 165.0,
         "BMI": 31.2,
         "Systolic Blood Pressure": 142.0,
-        "Diastolic Blood Pressure": 88.0
     }
-    patient_context = {
-        "age": 52,
-        "gender": "male",
-        "bmi": 31.2,
-        "patient_id": "EXAMPLE-001"
-    }
     model_prediction = {
         "disease": "Diabetes",
@@ -245,8 +237,8 @@ async def get_example():
             "Heart Disease": 0.08,
             "Anemia": 0.03,
             "Thalassemia": 0.01,
-            "Thrombocytopenia": 0.01
-        }
     }
     try:
@@ -255,7 +247,7 @@ async def get_example():
             biomarkers=biomarkers,
             patient_context=patient_context,
             model_prediction=model_prediction,
-            extracted_biomarkers=None
         )
         return response
@@ -263,8 +255,5 @@ async def get_example():
     except Exception as e:
         raise HTTPException(
             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail={
-                "error_code": "EXAMPLE_FAILED",
-                "message": f"Example analysis failed: {e!s}"
-            }
-        )

 async def analyze_natural(request: NaturalAnalysisRequest):
     """
     Analyze biomarkers from natural language input.
     **Flow:**
     1. Extract biomarkers from natural language using LLM
     2. Predict disease using rule-based or ML model
     3. Run complete RAG workflow analysis
     4. Return comprehensive results
     **Example request:**
     ```json
     {
       }
     }
     ```
     Returns full detailed analysis with all agent outputs, citations, recommendations.
     """
     if not ragbot_service.is_ready():
         raise HTTPException(
             status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
+            detail="RagBot service not initialized. Please try again in a moment.",
         )
     # Extract biomarkers from natural language
     ollama_base_url = os.getenv("OLLAMA_BASE_URL", "http://localhost:11434")
+    biomarkers, extracted_context, error = extract_biomarkers(request.message, ollama_base_url=ollama_base_url)
     if error:
         raise HTTPException(
                 "error_code": "EXTRACTION_FAILED",
                 "message": error,
                 "input_received": request.message[:100],
+                "suggestion": "Try: 'My glucose is 140 and HbA1c is 7.5'",
+            },
         )
     if not biomarkers:
                 "error_code": "NO_BIOMARKERS_FOUND",
                 "message": "Could not extract any biomarkers from your message",
                 "input_received": request.message[:100],
+                "suggestion": "Include specific biomarker values like 'glucose is 140'",
+            },
         )
     # Merge extracted context with request context
             biomarkers=biomarkers,
             patient_context=patient_context,
             model_prediction=model_prediction,
+            extracted_biomarkers=biomarkers,  # Keep original extraction
         )
         return response
             detail={
                 "error_code": "ANALYSIS_FAILED",
                 "message": f"Analysis workflow failed: {e!s}",
+                "biomarkers_received": biomarkers,
+            },
+        ) from e
 @router.post("/analyze/structured", response_model=AnalysisResponse)
 async def analyze_structured(request: StructuredAnalysisRequest):
     """
     Analyze biomarkers from structured input (skip extraction).
     **Flow:**
     1. Use provided biomarker dictionary directly
     2. Predict disease using rule-based or ML model
     3. Run complete RAG workflow analysis
     4. Return comprehensive results
     **Example request:**
     ```json
     {
       }
     }
     ```
     Use this endpoint when you already have structured biomarker data.
     Returns full detailed analysis with all agent outputs, citations, recommendations.
     """
     if not ragbot_service.is_ready():
         raise HTTPException(
             status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
+            detail="RagBot service not initialized. Please try again in a moment.",
         )
     # Validate biomarkers
             detail={
                 "error_code": "NO_BIOMARKERS",
                 "message": "Biomarkers dictionary cannot be empty",
+                "suggestion": "Provide at least one biomarker with a numeric value",
+            },
         )
     # Patient context
             biomarkers=request.biomarkers,
             patient_context=patient_context,
             model_prediction=model_prediction,
+            extracted_biomarkers=None,  # No extraction for structured input
         )
         return response
             detail={
                 "error_code": "ANALYSIS_FAILED",
                 "message": f"Analysis workflow failed: {e!s}",
+                "biomarkers_received": request.biomarkers,
+            },
+        ) from e
 @router.get("/example", response_model=AnalysisResponse)
 async def get_example():
     """
     Get example diabetes case analysis.
     **Pre-run example case:**
     - 52-year-old male patient
     - Elevated glucose and HbA1c
     - Type 2 Diabetes prediction
     Useful for:
     - Testing API integration
     - Understanding response format
     - Demo purposes
     Same as CLI chatbot 'example' command.
     """
     if not ragbot_service.is_ready():
         raise HTTPException(
             status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
+            detail="RagBot service not initialized. Please try again in a moment.",
         )
     # Example biomarkers (Type 2 Diabetes patient)
         "LDL Cholesterol": 165.0,
         "BMI": 31.2,
         "Systolic Blood Pressure": 142.0,
+        "Diastolic Blood Pressure": 88.0,
     }
+    patient_context = {"age": 52, "gender": "male", "bmi": 31.2, "patient_id": "EXAMPLE-001"}
     model_prediction = {
         "disease": "Diabetes",
             "Heart Disease": 0.08,
             "Anemia": 0.03,
             "Thalassemia": 0.01,
+            "Thrombocytopenia": 0.01,
+        },
     }
     try:
             biomarkers=biomarkers,
             patient_context=patient_context,
             model_prediction=model_prediction,
+            extracted_biomarkers=None,
         )
         return response
     except Exception as e:
         raise HTTPException(
             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail={"error_code": "EXAMPLE_FAILED", "message": f"Example analysis failed: {e!s}"},
+        ) from e

api/app/routes/biomarkers.py CHANGED Viewed

@@ -17,13 +17,13 @@ router = APIRouter(prefix="/api/v1", tags=["biomarkers"])
 async def list_biomarkers():
     """
     Get list of all supported biomarkers with reference ranges.
     Returns comprehensive information about all 24 biomarkers:
     - Name and unit
     - Normal reference ranges (gender-specific if applicable)
     - Critical thresholds
     - Clinical significance
     Useful for:
     - Frontend validation
     - Understanding what biomarkers can be analyzed
@@ -48,18 +48,12 @@ async def list_biomarkers():
             if "male" in normal_range_data or "female" in normal_range_data:
                 # Gender-specific ranges
                 reference_range = BiomarkerReferenceRange(
-                    min=None,
-                    max=None,
-                    male=normal_range_data.get("male"),
-                    female=normal_range_data.get("female")
                 )
             else:
                 # Universal range
                 reference_range = BiomarkerReferenceRange(
-                    min=normal_range_data.get("min"),
-                    max=normal_range_data.get("max"),
-                    male=None,
-                    female=None
                 )
             biomarker_info = BiomarkerInfo(
@@ -70,25 +64,17 @@ async def list_biomarkers():
                 critical_high=info.get("critical_high"),
                 gender_specific=info.get("gender_specific", False),
                 description=info.get("description", ""),
-                clinical_significance=info.get("clinical_significance", {})
             )
             biomarkers_list.append(biomarker_info)
         return BiomarkersListResponse(
-            biomarkers=biomarkers_list,
-            total_count=len(biomarkers_list),
-            timestamp=datetime.now().isoformat()
         )
     except FileNotFoundError:
-        raise HTTPException(
-            status_code=500,
-            detail="Biomarker configuration file not found"
-        )
     except Exception as e:
-        raise HTTPException(
-            status_code=500,
-            detail=f"Failed to load biomarkers: {e!s}"
-        )

 async def list_biomarkers():
     """
     Get list of all supported biomarkers with reference ranges.
     Returns comprehensive information about all 24 biomarkers:
     - Name and unit
     - Normal reference ranges (gender-specific if applicable)
     - Critical thresholds
     - Clinical significance
     Useful for:
     - Frontend validation
     - Understanding what biomarkers can be analyzed
             if "male" in normal_range_data or "female" in normal_range_data:
                 # Gender-specific ranges
                 reference_range = BiomarkerReferenceRange(
+                    min=None, max=None, male=normal_range_data.get("male"), female=normal_range_data.get("female")
                 )
             else:
                 # Universal range
                 reference_range = BiomarkerReferenceRange(
+                    min=normal_range_data.get("min"), max=normal_range_data.get("max"), male=None, female=None
                 )
             biomarker_info = BiomarkerInfo(
                 critical_high=info.get("critical_high"),
                 gender_specific=info.get("gender_specific", False),
                 description=info.get("description", ""),
+                clinical_significance=info.get("clinical_significance", {}),
             )
             biomarkers_list.append(biomarker_info)
         return BiomarkersListResponse(
+            biomarkers=biomarkers_list, total_count=len(biomarkers_list), timestamp=datetime.now().isoformat()
         )
     except FileNotFoundError:
+        raise HTTPException(status_code=500, detail="Biomarker configuration file not found")
     except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Failed to load biomarkers: {e!s}") from e

api/app/routes/health.py CHANGED Viewed

@@ -17,13 +17,13 @@ router = APIRouter(prefix="/api/v1", tags=["health"])
 async def health_check():
     """
     Check API health status.
     Verifies:
     - LLM API connection (Groq/Gemini)
     - Vector store loaded
     - Available models
     - Service uptime
     Returns health status with component details.
     """
     ragbot_service = get_ragbot_service()
@@ -69,5 +69,5 @@ async def health_check():
         vector_store_loaded=vector_store_loaded,
         available_models=available_models,
         uptime_seconds=ragbot_service.get_uptime_seconds(),
-        version=__version__
     )

 async def health_check():
     """
     Check API health status.
     Verifies:
     - LLM API connection (Groq/Gemini)
     - Vector store loaded
     - Available models
     - Service uptime
     Returns health status with component details.
     """
     ragbot_service = get_ragbot_service()
         vector_store_loaded=vector_store_loaded,
         available_models=available_models,
         uptime_seconds=ragbot_service.get_uptime_seconds(),
+        version=__version__,
     )

api/app/services/extraction.py CHANGED Viewed

@@ -54,6 +54,7 @@ If you cannot find any biomarkers, return {{"biomarkers": {{}}, "patient_context
 # EXTRACTION HELPERS
 # ============================================================================
 def _parse_llm_json(content: str) -> dict[str, Any]:
     """Parse JSON payload from LLM output with fallback recovery."""
     text = content.strip()
@@ -69,7 +70,7 @@ def _parse_llm_json(content: str) -> dict[str, Any]:
         left = text.find("{")
         right = text.rfind("}")
         if left != -1 and right != -1 and right > left:
-            return json.loads(text[left:right + 1])
         raise
@@ -77,23 +78,24 @@ def _parse_llm_json(content: str) -> dict[str, Any]:
 # EXTRACTION FUNCTION
 # ============================================================================
 def extract_biomarkers(
     user_message: str,
-    ollama_base_url: str = None  # Kept for backward compatibility, ignored
 ) -> tuple[dict[str, float], dict[str, Any], str]:
     """
     Extract biomarker values from natural language using LLM.
     Args:
         user_message: Natural language text containing biomarker information
         ollama_base_url: DEPRECATED - uses cloud LLM (Groq/Gemini) instead
     Returns:
         Tuple of (biomarkers_dict, patient_context_dict, error_message)
         - biomarkers_dict: Normalized biomarker names -> values
         - patient_context_dict: Extracted patient context (age, gender, BMI)
         - error_message: Empty string if successful, error description if failed
     Example:
         >>> biomarkers, context, error = extract_biomarkers("My glucose is 185 and HbA1c is 8.2")
         >>> print(biomarkers)
@@ -143,24 +145,19 @@ def extract_biomarkers(
 # SIMPLE DISEASE PREDICTION (Fallback)
 # ============================================================================
 def predict_disease_simple(biomarkers: dict[str, float]) -> dict[str, Any]:
     """
     Simple rule-based disease prediction based on key biomarkers.
     Used as a fallback when no ML model is available.
     Args:
         biomarkers: Dictionary of biomarker names to values
     Returns:
         Dictionary with disease, confidence, and probabilities
     """
-    scores = {
-        "Diabetes": 0.0,
-        "Anemia": 0.0,
-        "Heart Disease": 0.0,
-        "Thrombocytopenia": 0.0,
-        "Thalassemia": 0.0
-    }
     # Helper: check both abbreviated and normalized biomarker names
     # Returns None when biomarker is not present (avoids false triggers)
@@ -230,8 +227,4 @@ def predict_disease_simple(biomarkers: dict[str, float]) -> dict[str, Any]:
     else:
         probabilities = {k: 1.0 / len(scores) for k in scores}
-    return {
-        "disease": top_disease,
-        "confidence": confidence,
-        "probabilities": probabilities
-    }

 # EXTRACTION HELPERS
 # ============================================================================
 def _parse_llm_json(content: str) -> dict[str, Any]:
     """Parse JSON payload from LLM output with fallback recovery."""
     text = content.strip()
         left = text.find("{")
         right = text.rfind("}")
         if left != -1 and right != -1 and right > left:
+            return json.loads(text[left : right + 1])
         raise
 # EXTRACTION FUNCTION
 # ============================================================================
 def extract_biomarkers(
     user_message: str,
+    ollama_base_url: str | None = None,  # Kept for backward compatibility, ignored
 ) -> tuple[dict[str, float], dict[str, Any], str]:
     """
     Extract biomarker values from natural language using LLM.
     Args:
         user_message: Natural language text containing biomarker information
         ollama_base_url: DEPRECATED - uses cloud LLM (Groq/Gemini) instead
     Returns:
         Tuple of (biomarkers_dict, patient_context_dict, error_message)
         - biomarkers_dict: Normalized biomarker names -> values
         - patient_context_dict: Extracted patient context (age, gender, BMI)
         - error_message: Empty string if successful, error description if failed
     Example:
         >>> biomarkers, context, error = extract_biomarkers("My glucose is 185 and HbA1c is 8.2")
         >>> print(biomarkers)
 # SIMPLE DISEASE PREDICTION (Fallback)
 # ============================================================================
 def predict_disease_simple(biomarkers: dict[str, float]) -> dict[str, Any]:
     """
     Simple rule-based disease prediction based on key biomarkers.
     Used as a fallback when no ML model is available.
     Args:
         biomarkers: Dictionary of biomarker names to values
     Returns:
         Dictionary with disease, confidence, and probabilities
     """
+    scores = {"Diabetes": 0.0, "Anemia": 0.0, "Heart Disease": 0.0, "Thrombocytopenia": 0.0, "Thalassemia": 0.0}
     # Helper: check both abbreviated and normalized biomarker names
     # Returns None when biomarker is not present (avoids false triggers)
     else:
         probabilities = {k: 1.0 / len(scores) for k in scores}
+    return {"disease": top_disease, "confidence": confidence, "probabilities": probabilities}

api/app/services/ragbot.py CHANGED Viewed

@@ -94,17 +94,17 @@ class RagBotService:
         biomarkers: dict[str, float],
         patient_context: dict[str, Any],
         model_prediction: dict[str, Any],
-        extracted_biomarkers: dict[str, float] = None
     ) -> AnalysisResponse:
         """
         Run complete analysis workflow and format full detailed response.
         Args:
             biomarkers: Dictionary of biomarker names to values
             patient_context: Patient demographic information
             model_prediction: Disease prediction (disease, confidence, probabilities)
             extracted_biomarkers: Original extracted biomarkers (for natural language input)
         Returns:
             Complete AnalysisResponse with all details
         """
@@ -117,9 +117,7 @@ class RagBotService:
         try:
             # Create PatientInput
             patient_input = PatientInput(
-                biomarkers=biomarkers,
-                model_prediction=model_prediction,
-                patient_context=patient_context
             )
             # Run workflow
@@ -136,7 +134,7 @@ class RagBotService:
                 extracted_biomarkers=extracted_biomarkers,
                 patient_context=patient_context,
                 model_prediction=model_prediction,
-                processing_time_ms=processing_time_ms
             )
             return response
@@ -153,12 +151,12 @@ class RagBotService:
         extracted_biomarkers: dict[str, float],
         patient_context: dict[str, Any],
         model_prediction: dict[str, Any],
-        processing_time_ms: float
     ) -> AnalysisResponse:
         """
         Format complete detailed response from workflow result.
         Preserves ALL data from workflow execution.
         workflow_result is now the full LangGraph state dict containing:
         - final_response: dict from response_synthesizer
         - agent_outputs: list of AgentOutput objects
@@ -174,7 +172,7 @@ class RagBotService:
         prediction = Prediction(
             disease=model_prediction["disease"],
             confidence=model_prediction["confidence"],
-            probabilities=model_prediction.get("probabilities", {})
         )
         # Biomarker flags: prefer state-level data (BiomarkerFlag objects from validator),
@@ -183,7 +181,7 @@ class RagBotService:
         if state_flags:
             biomarker_flags = []
             for flag in state_flags:
-                if hasattr(flag, 'model_dump'):
                     biomarker_flags.append(BiomarkerFlag(**flag.model_dump()))
                 elif isinstance(flag, dict):
                     biomarker_flags.append(BiomarkerFlag(**flag))
@@ -201,7 +199,7 @@ class RagBotService:
         if state_alerts:
             safety_alerts = []
             for alert in state_alerts:
-                if hasattr(alert, 'model_dump'):
                     safety_alerts.append(SafetyAlert(**alert.model_dump()))
                 elif isinstance(alert, dict):
                     safety_alerts.append(SafetyAlert(**alert))
@@ -230,7 +228,7 @@ class RagBotService:
         disease_explanation = DiseaseExplanation(
             pathophysiology=disease_exp_data.get("pathophysiology", ""),
             citations=disease_exp_data.get("citations", []),
-            retrieved_chunks=disease_exp_data.get("retrieved_chunks")
         )
         # Recommendations from synthesizer
@@ -243,7 +241,7 @@ class RagBotService:
             immediate_actions=recs_data.get("immediate_actions", []),
             lifestyle_changes=recs_data.get("lifestyle_changes", []),
             monitoring=recs_data.get("monitoring", []),
-            follow_up=recs_data.get("follow_up")
         )
         # Confidence assessment from synthesizer
@@ -254,7 +252,7 @@ class RagBotService:
             prediction_reliability=conf_data.get("prediction_reliability", "UNKNOWN"),
             evidence_strength=conf_data.get("evidence_strength", "UNKNOWN"),
             limitations=conf_data.get("limitations", []),
-            reasoning=conf_data.get("reasoning")
         )
         # Alternative diagnoses
@@ -270,14 +268,14 @@ class RagBotService:
             disease_explanation=disease_explanation,
             recommendations=recommendations,
             confidence_assessment=confidence_assessment,
-            alternative_diagnoses=alternative_diagnoses
         )
         # Agent outputs from state (these are src.state.AgentOutput objects)
         agent_outputs_data = workflow_result.get("agent_outputs", [])
         agent_outputs = []
         for agent_out in agent_outputs_data:
-            if hasattr(agent_out, 'model_dump'):
                 agent_outputs.append(AgentOutput(**agent_out.model_dump()))
             elif isinstance(agent_out, dict):
                 agent_outputs.append(AgentOutput(**agent_out))
@@ -287,7 +285,7 @@ class RagBotService:
             "sop_version": workflow_result.get("sop_version"),
             "processing_timestamp": workflow_result.get("processing_timestamp"),
             "agents_executed": len(agent_outputs),
-            "workflow_success": True
         }
         # Conversational summary (if available)
@@ -301,7 +299,7 @@ class RagBotService:
                 prediction=prediction,
                 safety_alerts=safety_alerts,
                 key_drivers=key_drivers,
-                recommendations=recommendations
             )
         # Assemble final response
@@ -318,17 +316,13 @@ class RagBotService:
             workflow_metadata=workflow_metadata,
             conversational_summary=conversational_summary,
             processing_time_ms=processing_time_ms,
-            sop_version=workflow_result.get("sop_version", "Baseline")
         )
         return response
     def _generate_conversational_summary(
-        self,
-        prediction: Prediction,
-        safety_alerts: list,
-        key_drivers: list,
-        recommendations: Recommendations
     ) -> str:
         """Generate a simple conversational summary"""

         biomarkers: dict[str, float],
         patient_context: dict[str, Any],
         model_prediction: dict[str, Any],
+        extracted_biomarkers: dict[str, float] | None = None,
     ) -> AnalysisResponse:
         """
         Run complete analysis workflow and format full detailed response.
         Args:
             biomarkers: Dictionary of biomarker names to values
             patient_context: Patient demographic information
             model_prediction: Disease prediction (disease, confidence, probabilities)
             extracted_biomarkers: Original extracted biomarkers (for natural language input)
         Returns:
             Complete AnalysisResponse with all details
         """
         try:
             # Create PatientInput
             patient_input = PatientInput(
+                biomarkers=biomarkers, model_prediction=model_prediction, patient_context=patient_context
             )
             # Run workflow
                 extracted_biomarkers=extracted_biomarkers,
                 patient_context=patient_context,
                 model_prediction=model_prediction,
+                processing_time_ms=processing_time_ms,
             )
             return response
         extracted_biomarkers: dict[str, float],
         patient_context: dict[str, Any],
         model_prediction: dict[str, Any],
+        processing_time_ms: float,
     ) -> AnalysisResponse:
         """
         Format complete detailed response from workflow result.
         Preserves ALL data from workflow execution.
         workflow_result is now the full LangGraph state dict containing:
         - final_response: dict from response_synthesizer
         - agent_outputs: list of AgentOutput objects
         prediction = Prediction(
             disease=model_prediction["disease"],
             confidence=model_prediction["confidence"],
+            probabilities=model_prediction.get("probabilities", {}),
         )
         # Biomarker flags: prefer state-level data (BiomarkerFlag objects from validator),
         if state_flags:
             biomarker_flags = []
             for flag in state_flags:
+                if hasattr(flag, "model_dump"):
                     biomarker_flags.append(BiomarkerFlag(**flag.model_dump()))
                 elif isinstance(flag, dict):
                     biomarker_flags.append(BiomarkerFlag(**flag))
         if state_alerts:
             safety_alerts = []
             for alert in state_alerts:
+                if hasattr(alert, "model_dump"):
                     safety_alerts.append(SafetyAlert(**alert.model_dump()))
                 elif isinstance(alert, dict):
                     safety_alerts.append(SafetyAlert(**alert))
         disease_explanation = DiseaseExplanation(
             pathophysiology=disease_exp_data.get("pathophysiology", ""),
             citations=disease_exp_data.get("citations", []),
+            retrieved_chunks=disease_exp_data.get("retrieved_chunks"),
         )
         # Recommendations from synthesizer
             immediate_actions=recs_data.get("immediate_actions", []),
             lifestyle_changes=recs_data.get("lifestyle_changes", []),
             monitoring=recs_data.get("monitoring", []),
+            follow_up=recs_data.get("follow_up"),
         )
         # Confidence assessment from synthesizer
             prediction_reliability=conf_data.get("prediction_reliability", "UNKNOWN"),
             evidence_strength=conf_data.get("evidence_strength", "UNKNOWN"),
             limitations=conf_data.get("limitations", []),
+            reasoning=conf_data.get("reasoning"),
         )
         # Alternative diagnoses
             disease_explanation=disease_explanation,
             recommendations=recommendations,
             confidence_assessment=confidence_assessment,
+            alternative_diagnoses=alternative_diagnoses,
         )
         # Agent outputs from state (these are src.state.AgentOutput objects)
         agent_outputs_data = workflow_result.get("agent_outputs", [])
         agent_outputs = []
         for agent_out in agent_outputs_data:
+            if hasattr(agent_out, "model_dump"):
                 agent_outputs.append(AgentOutput(**agent_out.model_dump()))
             elif isinstance(agent_out, dict):
                 agent_outputs.append(AgentOutput(**agent_out))
             "sop_version": workflow_result.get("sop_version"),
             "processing_timestamp": workflow_result.get("processing_timestamp"),
             "agents_executed": len(agent_outputs),
+            "workflow_success": True,
         }
         # Conversational summary (if available)
                 prediction=prediction,
                 safety_alerts=safety_alerts,
                 key_drivers=key_drivers,
+                recommendations=recommendations,
             )
         # Assemble final response
             workflow_metadata=workflow_metadata,
             conversational_summary=conversational_summary,
             processing_time_ms=processing_time_ms,
+            sop_version=workflow_result.get("sop_version", "Baseline"),
         )
         return response
     def _generate_conversational_summary(
+        self, prediction: Prediction, safety_alerts: list, key_drivers: list, recommendations: Recommendations
     ) -> str:
         """Generate a simple conversational summary"""

archive/evolution/__init__.py CHANGED Viewed

@@ -15,15 +15,15 @@ from .director import (
 from .pareto import analyze_improvements, identify_pareto_front, print_pareto_summary, visualize_pareto_frontier
 __all__ = [
-    'Diagnosis',
-    'EvolvedSOPs',
-    'SOPGenePool',
-    'SOPMutation',
-    'analyze_improvements',
-    'identify_pareto_front',
-    'performance_diagnostician',
-    'print_pareto_summary',
-    'run_evolution_cycle',
-    'sop_architect',
-    'visualize_pareto_frontier'
 ]

 from .pareto import analyze_improvements, identify_pareto_front, print_pareto_summary, visualize_pareto_frontier
 __all__ = [
+    "Diagnosis",
+    "EvolvedSOPs",
+    "SOPGenePool",
+    "SOPMutation",
+    "analyze_improvements",
+    "identify_pareto_front",
+    "performance_diagnostician",
+    "print_pareto_summary",
+    "run_evolution_cycle",
+    "sop_architect",
+    "visualize_pareto_frontier",
 ]

archive/evolution/director.py CHANGED Viewed

@@ -25,7 +25,7 @@ class SOPGenePool:
         sop: ExplanationSOP,
         evaluation: EvaluationResult,
         parent_version: int | None = None,
-        description: str = ""
     ):
         """Add a new SOP to the gene pool"""
         self.version_counter += 1
@@ -34,7 +34,7 @@ class SOPGenePool:
             "sop": sop,
             "evaluation": evaluation,
             "parent": parent_version,
-            "description": description
         }
         self.pool.append(entry)
         self.gene_pool = self.pool  # Keep in sync
@@ -47,7 +47,7 @@ class SOPGenePool:
     def get_by_version(self, version: int) -> dict[str, Any] | None:
         """Retrieve specific SOP version"""
         for entry in self.pool:
-            if entry['version'] == version:
                 return entry
         return None
@@ -56,10 +56,7 @@ class SOPGenePool:
         if not self.pool:
             return None
-        best = max(
-            self.pool,
-            key=lambda x: getattr(x['evaluation'], metric).score
-        )
         return best
     def summary(self):
@@ -69,10 +66,10 @@ class SOPGenePool:
         print("=" * 80)
         for entry in self.pool:
-            v = entry['version']
-            p = entry['parent']
-            desc = entry['description']
-            e = entry['evaluation']
             parent_str = "(Baseline)" if p is None else f"(Child of v{p})"
@@ -88,23 +85,17 @@ class SOPGenePool:
 class Diagnosis(BaseModel):
     """Structured diagnosis from Performance Diagnostician"""
     primary_weakness: Literal[
-        'clinical_accuracy',
-        'evidence_grounding',
-        'actionability',
-        'clarity',
-        'safety_completeness'
     ]
-    root_cause_analysis: str = Field(
-        description="Detailed analysis of why weakness occurred"
-    )
-    recommendation: str = Field(
-        description="High-level recommendation to fix the problem"
-    )
 class SOPMutation(BaseModel):
     """Single mutated SOP with description"""
     description: str = Field(description="Brief description of mutation strategy")
     # SOP fields from ExplanationSOP
     biomarker_analyzer_threshold: float = 0.15
@@ -121,6 +112,7 @@ class SOPMutation(BaseModel):
 class EvolvedSOPs(BaseModel):
     """Container for mutated SOPs from Architect"""
     mutations: list[SOPMutation]
@@ -135,19 +127,19 @@ def performance_diagnostician(evaluation: EvaluationResult) -> Diagnosis:
     # Find lowest score programmatically (no LLM needed)
     scores = {
-        'clinical_accuracy': evaluation.clinical_accuracy.score,
-        'evidence_grounding': evaluation.evidence_grounding.score,
-        'actionability': evaluation.actionability.score,
-        'clarity': evaluation.clarity.score,
-        'safety_completeness': evaluation.safety_completeness.score
     }
     reasonings = {
-        'clinical_accuracy': evaluation.clinical_accuracy.reasoning,
-        'evidence_grounding': evaluation.evidence_grounding.reasoning,
-        'actionability': evaluation.actionability.reasoning,
-        'clarity': evaluation.clarity.reasoning,
-        'safety_completeness': evaluation.safety_completeness.reasoning
     }
     primary_weakness = min(scores, key=scores.get)
@@ -156,25 +148,25 @@ def performance_diagnostician(evaluation: EvaluationResult) -> Diagnosis:
     # Generate detailed root cause analysis
     root_cause_map = {
-        'clinical_accuracy': f"Clinical accuracy score ({weakness_score:.2f}) indicates potential issues with medical interpretations. {weakness_reasoning[:200]}",
-        'evidence_grounding': f"Evidence grounding score ({weakness_score:.2f}) suggests insufficient citations. {weakness_reasoning[:200]}",
-        'actionability': f"Actionability score ({weakness_score:.2f}) indicates recommendations lack specificity. {weakness_reasoning[:200]}",
-        'clarity': f"Clarity score ({weakness_score:.2f}) suggests readability issues. {weakness_reasoning[:200]}",
-        'safety_completeness': f"Safety score ({weakness_score:.2f}) indicates missing risk discussions. {weakness_reasoning[:200]}"
     }
     recommendation_map = {
-        'clinical_accuracy': "Increase RAG depth to access more authoritative medical sources.",
-        'evidence_grounding': "Enforce strict citation requirements and increase RAG depth.",
-        'actionability': "Make recommendations more specific with concrete action items.",
-        'clarity': "Simplify language and reduce technical jargon for better readability.",
-        'safety_completeness': "Add explicit safety warnings and ensure complete risk coverage."
     }
     diagnosis = Diagnosis(
         primary_weakness=primary_weakness,
         root_cause_analysis=root_cause_map[primary_weakness],
-        recommendation=recommendation_map[primary_weakness]
     )
     print("\n✓ Diagnosis complete")
@@ -184,10 +176,7 @@ def performance_diagnostician(evaluation: EvaluationResult) -> Diagnosis:
     return diagnosis
-def sop_architect(
-    diagnosis: Diagnosis,
-    current_sop: ExplanationSOP
-) -> EvolvedSOPs:
     """
     Generates targeted SOP mutations to address diagnosed weakness.
     Uses programmatic generation for reliability.
@@ -200,116 +189,116 @@ def sop_architect(
     weakness = diagnosis.primary_weakness
     # Generate mutations based on weakness type
-    if weakness == 'clarity':
         mut1 = SOPMutation(
             disease_explainer_k=max(3, current_sop.disease_explainer_k - 1),
             linker_retrieval_k=max(2, current_sop.linker_retrieval_k - 1),
             guideline_retrieval_k=max(2, current_sop.guideline_retrieval_k - 1),
-            explainer_detail_level='concise',
             biomarker_analyzer_threshold=current_sop.biomarker_analyzer_threshold,
             use_guideline_agent=current_sop.use_guideline_agent,
             include_alternative_diagnoses=False,
             require_pdf_citations=current_sop.require_pdf_citations,
             use_confidence_assessor=current_sop.use_confidence_assessor,
             critical_value_alert_mode=current_sop.critical_value_alert_mode,
-            description="Reduce retrieval depth and use concise style for clarity"
         )
         mut2 = SOPMutation(
             disease_explainer_k=current_sop.disease_explainer_k,
             linker_retrieval_k=current_sop.linker_retrieval_k,
             guideline_retrieval_k=current_sop.guideline_retrieval_k,
-            explainer_detail_level='detailed',
             biomarker_analyzer_threshold=current_sop.biomarker_analyzer_threshold,
             use_guideline_agent=current_sop.use_guideline_agent,
             include_alternative_diagnoses=True,
             require_pdf_citations=False,
             use_confidence_assessor=current_sop.use_confidence_assessor,
             critical_value_alert_mode=current_sop.critical_value_alert_mode,
-            description="Balanced detail with fewer citations for readability"
         )
-    elif weakness == 'evidence_grounding':
         mut1 = SOPMutation(
             disease_explainer_k=min(10, current_sop.disease_explainer_k + 2),
             linker_retrieval_k=min(5, current_sop.linker_retrieval_k + 1),
             guideline_retrieval_k=min(5, current_sop.guideline_retrieval_k + 1),
-            explainer_detail_level='comprehensive',
             biomarker_analyzer_threshold=current_sop.biomarker_analyzer_threshold,
             use_guideline_agent=True,
             include_alternative_diagnoses=current_sop.include_alternative_diagnoses,
             require_pdf_citations=True,
             use_confidence_assessor=current_sop.use_confidence_assessor,
             critical_value_alert_mode=current_sop.critical_value_alert_mode,
-            description="Maximum RAG depth with strict citation requirements"
         )
         mut2 = SOPMutation(
             disease_explainer_k=min(10, current_sop.disease_explainer_k + 1),
             linker_retrieval_k=current_sop.linker_retrieval_k,
             guideline_retrieval_k=current_sop.guideline_retrieval_k,
-            explainer_detail_level='detailed',
             biomarker_analyzer_threshold=current_sop.biomarker_analyzer_threshold,
             use_guideline_agent=True,
             include_alternative_diagnoses=current_sop.include_alternative_diagnoses,
             require_pdf_citations=True,
             use_confidence_assessor=current_sop.use_confidence_assessor,
             critical_value_alert_mode=current_sop.critical_value_alert_mode,
-            description="Moderate RAG increase with citation enforcement"
         )
-    elif weakness == 'actionability':
         mut1 = SOPMutation(
             disease_explainer_k=current_sop.disease_explainer_k,
             linker_retrieval_k=current_sop.linker_retrieval_k,
             guideline_retrieval_k=min(5, current_sop.guideline_retrieval_k + 2),
-            explainer_detail_level='comprehensive',
             biomarker_analyzer_threshold=current_sop.biomarker_analyzer_threshold,
             use_guideline_agent=True,
             include_alternative_diagnoses=current_sop.include_alternative_diagnoses,
             require_pdf_citations=True,
             use_confidence_assessor=current_sop.use_confidence_assessor,
-            critical_value_alert_mode='strict',
-            description="Increase guideline retrieval for actionable recommendations"
         )
         mut2 = SOPMutation(
             disease_explainer_k=min(10, current_sop.disease_explainer_k + 1),
             linker_retrieval_k=min(5, current_sop.linker_retrieval_k + 1),
             guideline_retrieval_k=min(5, current_sop.guideline_retrieval_k + 1),
-            explainer_detail_level='detailed',
             biomarker_analyzer_threshold=current_sop.biomarker_analyzer_threshold,
             use_guideline_agent=True,
             include_alternative_diagnoses=True,
             require_pdf_citations=True,
             use_confidence_assessor=True,
-            critical_value_alert_mode='strict',
-            description="Comprehensive approach with all agents enabled"
         )
-    elif weakness == 'clinical_accuracy':
         mut1 = SOPMutation(
             disease_explainer_k=10,
             linker_retrieval_k=5,
             guideline_retrieval_k=5,
-            explainer_detail_level='comprehensive',
             biomarker_analyzer_threshold=max(0.10, current_sop.biomarker_analyzer_threshold - 0.05),
             use_guideline_agent=True,
             include_alternative_diagnoses=True,
             require_pdf_citations=True,
             use_confidence_assessor=True,
-            critical_value_alert_mode='strict',
-            description="Maximum RAG depth with strict thresholds for accuracy"
         )
         mut2 = SOPMutation(
             disease_explainer_k=min(10, current_sop.disease_explainer_k + 2),
             linker_retrieval_k=min(5, current_sop.linker_retrieval_k + 1),
             guideline_retrieval_k=min(5, current_sop.guideline_retrieval_k + 1),
-            explainer_detail_level='comprehensive',
             biomarker_analyzer_threshold=current_sop.biomarker_analyzer_threshold,
             use_guideline_agent=True,
             include_alternative_diagnoses=True,
             require_pdf_citations=True,
             use_confidence_assessor=True,
-            critical_value_alert_mode='strict',
-            description="High RAG depth with comprehensive detail"
         )
     else:  # safety_completeness
@@ -317,27 +306,27 @@ def sop_architect(
             disease_explainer_k=min(10, current_sop.disease_explainer_k + 1),
             linker_retrieval_k=current_sop.linker_retrieval_k,
             guideline_retrieval_k=min(5, current_sop.guideline_retrieval_k + 2),
-            explainer_detail_level='comprehensive',
             biomarker_analyzer_threshold=max(0.10, current_sop.biomarker_analyzer_threshold - 0.03),
             use_guideline_agent=True,
             include_alternative_diagnoses=True,
             require_pdf_citations=True,
             use_confidence_assessor=True,
-            critical_value_alert_mode='strict',
-            description="Strict safety mode with enhanced guidelines"
         )
         mut2 = SOPMutation(
             disease_explainer_k=min(10, current_sop.disease_explainer_k + 2),
             linker_retrieval_k=min(5, current_sop.linker_retrieval_k + 1),
             guideline_retrieval_k=min(5, current_sop.guideline_retrieval_k + 1),
-            explainer_detail_level='comprehensive',
             biomarker_analyzer_threshold=current_sop.biomarker_analyzer_threshold,
             use_guideline_agent=True,
             include_alternative_diagnoses=True,
             require_pdf_citations=True,
             use_confidence_assessor=True,
-            critical_value_alert_mode='strict',
-            description="Maximum coverage with all safety features"
         )
     evolved = EvolvedSOPs(mutations=[mut1, mut2])
@@ -351,10 +340,7 @@ def sop_architect(
 def run_evolution_cycle(
-    gene_pool: SOPGenePool,
-    patient_input: Any,
-    workflow_graph: Any,
-    evaluation_func: Callable
 ) -> list[dict[str, Any]]:
     """
     Executes one complete evolution cycle:
@@ -362,7 +348,7 @@ def run_evolution_cycle(
     2. Generate mutations
     3. Test each mutation
     4. Add to gene pool
     Returns: List of new entries added to pool
     """
     print("\n" + "=" * 80)
@@ -374,9 +360,9 @@ def run_evolution_cycle(
     if not current_best:
         raise ValueError("Gene pool is empty. Add baseline SOP first.")
-    parent_sop = current_best['sop']
-    parent_eval = current_best['evaluation']
-    parent_version = current_best['version']
     print(f"\nImproving upon SOP v{parent_version}")
@@ -395,11 +381,12 @@ def run_evolution_cycle(
         # Convert SOPMutation to ExplanationSOP
         mutant_sop_dict = mutant_sop_model.model_dump()
-        description = mutant_sop_dict.pop('description')
         mutant_sop = ExplanationSOP(**mutant_sop_dict)
         # Run workflow with mutated SOP
         from datetime import datetime
         graph_input = {
             "patient_biomarkers": patient_input.biomarkers,
             "model_prediction": patient_input.model_prediction,
@@ -412,7 +399,7 @@ def run_evolution_cycle(
             "biomarker_analysis": None,
             "final_response": None,
             "processing_timestamp": datetime.now().isoformat(),
-            "sop_version": description
         }
         try:
@@ -420,24 +407,15 @@ def run_evolution_cycle(
             # Evaluate output
             evaluation = evaluation_func(
-                final_response=final_state['final_response'],
-                agent_outputs=final_state['agent_outputs'],
-                biomarkers=patient_input.biomarkers
             )
             # Add to gene pool
-            gene_pool.add(
-                sop=mutant_sop,
-                evaluation=evaluation,
-                parent_version=parent_version,
-                description=description
-            )
-            new_entries.append({
-                "sop": mutant_sop,
-                "evaluation": evaluation,
-                "description": description
-            })
         except Exception as e:
             print(f"❌ Mutation {i} failed: {e}")
             continue

         sop: ExplanationSOP,
         evaluation: EvaluationResult,
         parent_version: int | None = None,
+        description: str = "",
     ):
         """Add a new SOP to the gene pool"""
         self.version_counter += 1
             "sop": sop,
             "evaluation": evaluation,
             "parent": parent_version,
+            "description": description,
         }
         self.pool.append(entry)
         self.gene_pool = self.pool  # Keep in sync
     def get_by_version(self, version: int) -> dict[str, Any] | None:
         """Retrieve specific SOP version"""
         for entry in self.pool:
+            if entry["version"] == version:
                 return entry
         return None
         if not self.pool:
             return None
+        best = max(self.pool, key=lambda x: getattr(x["evaluation"], metric).score)
         return best
     def summary(self):
         print("=" * 80)
         for entry in self.pool:
+            v = entry["version"]
+            p = entry["parent"]
+            desc = entry["description"]
+            e = entry["evaluation"]
             parent_str = "(Baseline)" if p is None else f"(Child of v{p})"
 class Diagnosis(BaseModel):
     """Structured diagnosis from Performance Diagnostician"""
     primary_weakness: Literal[
+        "clinical_accuracy", "evidence_grounding", "actionability", "clarity", "safety_completeness"
     ]
+    root_cause_analysis: str = Field(description="Detailed analysis of why weakness occurred")
+    recommendation: str = Field(description="High-level recommendation to fix the problem")
 class SOPMutation(BaseModel):
     """Single mutated SOP with description"""
     description: str = Field(description="Brief description of mutation strategy")
     # SOP fields from ExplanationSOP
     biomarker_analyzer_threshold: float = 0.15
 class EvolvedSOPs(BaseModel):
     """Container for mutated SOPs from Architect"""
     mutations: list[SOPMutation]
     # Find lowest score programmatically (no LLM needed)
     scores = {
+        "clinical_accuracy": evaluation.clinical_accuracy.score,
+        "evidence_grounding": evaluation.evidence_grounding.score,
+        "actionability": evaluation.actionability.score,
+        "clarity": evaluation.clarity.score,
+        "safety_completeness": evaluation.safety_completeness.score,
     }
     reasonings = {
+        "clinical_accuracy": evaluation.clinical_accuracy.reasoning,
+        "evidence_grounding": evaluation.evidence_grounding.reasoning,
+        "actionability": evaluation.actionability.reasoning,
+        "clarity": evaluation.clarity.reasoning,
+        "safety_completeness": evaluation.safety_completeness.reasoning,
     }
     primary_weakness = min(scores, key=scores.get)
     # Generate detailed root cause analysis
     root_cause_map = {
+        "clinical_accuracy": f"Clinical accuracy score ({weakness_score:.2f}) indicates potential issues with medical interpretations. {weakness_reasoning[:200]}",
+        "evidence_grounding": f"Evidence grounding score ({weakness_score:.2f}) suggests insufficient citations. {weakness_reasoning[:200]}",
+        "actionability": f"Actionability score ({weakness_score:.2f}) indicates recommendations lack specificity. {weakness_reasoning[:200]}",
+        "clarity": f"Clarity score ({weakness_score:.2f}) suggests readability issues. {weakness_reasoning[:200]}",
+        "safety_completeness": f"Safety score ({weakness_score:.2f}) indicates missing risk discussions. {weakness_reasoning[:200]}",
     }
     recommendation_map = {
+        "clinical_accuracy": "Increase RAG depth to access more authoritative medical sources.",
+        "evidence_grounding": "Enforce strict citation requirements and increase RAG depth.",
+        "actionability": "Make recommendations more specific with concrete action items.",
+        "clarity": "Simplify language and reduce technical jargon for better readability.",
+        "safety_completeness": "Add explicit safety warnings and ensure complete risk coverage.",
     }
     diagnosis = Diagnosis(
         primary_weakness=primary_weakness,
         root_cause_analysis=root_cause_map[primary_weakness],
+        recommendation=recommendation_map[primary_weakness],
     )
     print("\n✓ Diagnosis complete")
     return diagnosis
+def sop_architect(diagnosis: Diagnosis, current_sop: ExplanationSOP) -> EvolvedSOPs:
     """
     Generates targeted SOP mutations to address diagnosed weakness.
     Uses programmatic generation for reliability.
     weakness = diagnosis.primary_weakness
     # Generate mutations based on weakness type
+    if weakness == "clarity":
         mut1 = SOPMutation(
             disease_explainer_k=max(3, current_sop.disease_explainer_k - 1),
             linker_retrieval_k=max(2, current_sop.linker_retrieval_k - 1),
             guideline_retrieval_k=max(2, current_sop.guideline_retrieval_k - 1),
+            explainer_detail_level="concise",
             biomarker_analyzer_threshold=current_sop.biomarker_analyzer_threshold,
             use_guideline_agent=current_sop.use_guideline_agent,
             include_alternative_diagnoses=False,
             require_pdf_citations=current_sop.require_pdf_citations,
             use_confidence_assessor=current_sop.use_confidence_assessor,
             critical_value_alert_mode=current_sop.critical_value_alert_mode,
+            description="Reduce retrieval depth and use concise style for clarity",
         )
         mut2 = SOPMutation(
             disease_explainer_k=current_sop.disease_explainer_k,
             linker_retrieval_k=current_sop.linker_retrieval_k,
             guideline_retrieval_k=current_sop.guideline_retrieval_k,
+            explainer_detail_level="detailed",
             biomarker_analyzer_threshold=current_sop.biomarker_analyzer_threshold,
             use_guideline_agent=current_sop.use_guideline_agent,
             include_alternative_diagnoses=True,
             require_pdf_citations=False,
             use_confidence_assessor=current_sop.use_confidence_assessor,
             critical_value_alert_mode=current_sop.critical_value_alert_mode,
+            description="Balanced detail with fewer citations for readability",
         )
+    elif weakness == "evidence_grounding":
         mut1 = SOPMutation(
             disease_explainer_k=min(10, current_sop.disease_explainer_k + 2),
             linker_retrieval_k=min(5, current_sop.linker_retrieval_k + 1),
             guideline_retrieval_k=min(5, current_sop.guideline_retrieval_k + 1),
+            explainer_detail_level="comprehensive",
             biomarker_analyzer_threshold=current_sop.biomarker_analyzer_threshold,
             use_guideline_agent=True,
             include_alternative_diagnoses=current_sop.include_alternative_diagnoses,
             require_pdf_citations=True,
             use_confidence_assessor=current_sop.use_confidence_assessor,
             critical_value_alert_mode=current_sop.critical_value_alert_mode,
+            description="Maximum RAG depth with strict citation requirements",
         )
         mut2 = SOPMutation(
             disease_explainer_k=min(10, current_sop.disease_explainer_k + 1),
             linker_retrieval_k=current_sop.linker_retrieval_k,
             guideline_retrieval_k=current_sop.guideline_retrieval_k,
+            explainer_detail_level="detailed",
             biomarker_analyzer_threshold=current_sop.biomarker_analyzer_threshold,
             use_guideline_agent=True,
             include_alternative_diagnoses=current_sop.include_alternative_diagnoses,
             require_pdf_citations=True,
             use_confidence_assessor=current_sop.use_confidence_assessor,
             critical_value_alert_mode=current_sop.critical_value_alert_mode,
+            description="Moderate RAG increase with citation enforcement",
         )
+    elif weakness == "actionability":
         mut1 = SOPMutation(
             disease_explainer_k=current_sop.disease_explainer_k,
             linker_retrieval_k=current_sop.linker_retrieval_k,
             guideline_retrieval_k=min(5, current_sop.guideline_retrieval_k + 2),
+            explainer_detail_level="comprehensive",
             biomarker_analyzer_threshold=current_sop.biomarker_analyzer_threshold,
             use_guideline_agent=True,
             include_alternative_diagnoses=current_sop.include_alternative_diagnoses,
             require_pdf_citations=True,
             use_confidence_assessor=current_sop.use_confidence_assessor,
+            critical_value_alert_mode="strict",
+            description="Increase guideline retrieval for actionable recommendations",
         )
         mut2 = SOPMutation(
             disease_explainer_k=min(10, current_sop.disease_explainer_k + 1),
             linker_retrieval_k=min(5, current_sop.linker_retrieval_k + 1),
             guideline_retrieval_k=min(5, current_sop.guideline_retrieval_k + 1),
+            explainer_detail_level="detailed",
             biomarker_analyzer_threshold=current_sop.biomarker_analyzer_threshold,
             use_guideline_agent=True,
             include_alternative_diagnoses=True,
             require_pdf_citations=True,
             use_confidence_assessor=True,
+            critical_value_alert_mode="strict",
+            description="Comprehensive approach with all agents enabled",
         )
+    elif weakness == "clinical_accuracy":
         mut1 = SOPMutation(
             disease_explainer_k=10,
             linker_retrieval_k=5,
             guideline_retrieval_k=5,
+            explainer_detail_level="comprehensive",
             biomarker_analyzer_threshold=max(0.10, current_sop.biomarker_analyzer_threshold - 0.05),
             use_guideline_agent=True,
             include_alternative_diagnoses=True,
             require_pdf_citations=True,
             use_confidence_assessor=True,
+            critical_value_alert_mode="strict",
+            description="Maximum RAG depth with strict thresholds for accuracy",
         )
         mut2 = SOPMutation(
             disease_explainer_k=min(10, current_sop.disease_explainer_k + 2),
             linker_retrieval_k=min(5, current_sop.linker_retrieval_k + 1),
             guideline_retrieval_k=min(5, current_sop.guideline_retrieval_k + 1),
+            explainer_detail_level="comprehensive",
             biomarker_analyzer_threshold=current_sop.biomarker_analyzer_threshold,
             use_guideline_agent=True,
             include_alternative_diagnoses=True,
             require_pdf_citations=True,
             use_confidence_assessor=True,
+            critical_value_alert_mode="strict",
+            description="High RAG depth with comprehensive detail",
         )
     else:  # safety_completeness
             disease_explainer_k=min(10, current_sop.disease_explainer_k + 1),
             linker_retrieval_k=current_sop.linker_retrieval_k,
             guideline_retrieval_k=min(5, current_sop.guideline_retrieval_k + 2),
+            explainer_detail_level="comprehensive",
             biomarker_analyzer_threshold=max(0.10, current_sop.biomarker_analyzer_threshold - 0.03),
             use_guideline_agent=True,
             include_alternative_diagnoses=True,
             require_pdf_citations=True,
             use_confidence_assessor=True,
+            critical_value_alert_mode="strict",
+            description="Strict safety mode with enhanced guidelines",
         )
         mut2 = SOPMutation(
             disease_explainer_k=min(10, current_sop.disease_explainer_k + 2),
             linker_retrieval_k=min(5, current_sop.linker_retrieval_k + 1),
             guideline_retrieval_k=min(5, current_sop.guideline_retrieval_k + 1),
+            explainer_detail_level="comprehensive",
             biomarker_analyzer_threshold=current_sop.biomarker_analyzer_threshold,
             use_guideline_agent=True,
             include_alternative_diagnoses=True,
             require_pdf_citations=True,
             use_confidence_assessor=True,
+            critical_value_alert_mode="strict",
+            description="Maximum coverage with all safety features",
         )
     evolved = EvolvedSOPs(mutations=[mut1, mut2])
 def run_evolution_cycle(
+    gene_pool: SOPGenePool, patient_input: Any, workflow_graph: Any, evaluation_func: Callable
 ) -> list[dict[str, Any]]:
     """
     Executes one complete evolution cycle:
     2. Generate mutations
     3. Test each mutation
     4. Add to gene pool
     Returns: List of new entries added to pool
     """
     print("\n" + "=" * 80)
     if not current_best:
         raise ValueError("Gene pool is empty. Add baseline SOP first.")
+    parent_sop = current_best["sop"]
+    parent_eval = current_best["evaluation"]
+    parent_version = current_best["version"]
     print(f"\nImproving upon SOP v{parent_version}")
         # Convert SOPMutation to ExplanationSOP
         mutant_sop_dict = mutant_sop_model.model_dump()
+        description = mutant_sop_dict.pop("description")
         mutant_sop = ExplanationSOP(**mutant_sop_dict)
         # Run workflow with mutated SOP
         from datetime import datetime
         graph_input = {
             "patient_biomarkers": patient_input.biomarkers,
             "model_prediction": patient_input.model_prediction,
             "biomarker_analysis": None,
             "final_response": None,
             "processing_timestamp": datetime.now().isoformat(),
+            "sop_version": description,
         }
         try:
             # Evaluate output
             evaluation = evaluation_func(
+                final_response=final_state["final_response"],
+                agent_outputs=final_state["agent_outputs"],
+                biomarkers=patient_input.biomarkers,
             )
             # Add to gene pool
+            gene_pool.add(sop=mutant_sop, evaluation=evaluation, parent_version=parent_version, description=description)
+            new_entries.append({"sop": mutant_sop, "evaluation": evaluation, "description": description})
         except Exception as e:
             print(f"❌ Mutation {i} failed: {e}")
             continue

archive/evolution/pareto.py CHANGED Viewed

@@ -8,14 +8,14 @@ from typing import Any
 import matplotlib
 import numpy as np
-matplotlib.use('Agg')  # Use non-interactive backend
 import matplotlib.pyplot as plt
 def identify_pareto_front(gene_pool_entries: list[dict[str, Any]]) -> list[dict[str, Any]]:
     """
     Identifies non-dominated solutions (Pareto Frontier).
     A solution is dominated if another solution is:
     - Better or equal on ALL metrics
     - Strictly better on AT LEAST ONE metric
@@ -26,14 +26,14 @@ def identify_pareto_front(gene_pool_entries: list[dict[str, Any]]) -> list[dict[
         is_dominated = False
         # Get candidate's 5D score vector
-        cand_scores = np.array(candidate['evaluation'].to_vector())
         for j, other in enumerate(gene_pool_entries):
             if i == j:
                 continue
             # Get other solution's 5D vector
-            other_scores = np.array(other['evaluation'].to_vector())
             # Check domination: other >= candidate on ALL, other > candidate on SOME
             if np.all(other_scores >= cand_scores) and np.any(other_scores > cand_scores):
@@ -61,75 +61,75 @@ def visualize_pareto_frontier(pareto_front: list[dict[str, Any]]):
     # --- Plot 1: Bar Chart (since pandas might not be available) ---
     ax1 = plt.subplot(1, 2, 1)
-    metrics = ['Clinical\nAccuracy', 'Evidence\nGrounding', 'Actionability', 'Clarity', 'Safety']
     x = np.arange(len(metrics))
     width = 0.8 / len(pareto_front)
     for idx, entry in enumerate(pareto_front):
-        e = entry['evaluation']
         scores = [
             e.clinical_accuracy.score,
             e.evidence_grounding.score,
             e.actionability.score,
             e.clarity.score,
-            e.safety_completeness.score
         ]
         offset = (idx - len(pareto_front) / 2) * width + width / 2
         label = f"SOP v{entry['version']}"
         ax1.bar(x + offset, scores, width, label=label, alpha=0.8)
-    ax1.set_xlabel('Metrics', fontsize=12)
-    ax1.set_ylabel('Score', fontsize=12)
-    ax1.set_title('5D Performance Comparison (Bar Chart)', fontsize=14)
     ax1.set_xticks(x)
     ax1.set_xticklabels(metrics, fontsize=10)
     ax1.set_ylim(0, 1.0)
-    ax1.legend(loc='upper left')
-    ax1.grid(True, alpha=0.3, axis='y')
     # --- Plot 2: Radar Chart ---
-    ax2 = plt.subplot(1, 2, 2, projection='polar')
-    categories = ['Clinical\nAccuracy', 'Evidence\nGrounding',
-                  'Actionability', 'Clarity', 'Safety']
     num_vars = len(categories)
     angles = np.linspace(0, 2 * np.pi, num_vars, endpoint=False).tolist()
     angles += angles[:1]
     for entry in pareto_front:
-        e = entry['evaluation']
         values = [
             e.clinical_accuracy.score,
             e.evidence_grounding.score,
             e.actionability.score,
             e.clarity.score,
-            e.safety_completeness.score
         ]
         values += values[:1]
-        desc = entry.get('description', '')[:30]
         label = f"SOP v{entry['version']}: {desc}"
-        ax2.plot(angles, values, 'o-', linewidth=2, label=label)
         ax2.fill(angles, values, alpha=0.15)
     ax2.set_xticks(angles[:-1])
     ax2.set_xticklabels(categories, size=10)
     ax2.set_ylim(0, 1)
-    ax2.set_title('5D Performance Profiles (Radar Chart)', size=14, y=1.08)
-    ax2.legend(loc='upper left', bbox_to_anchor=(1.2, 1.0), fontsize=9)
     ax2.grid(True)
     plt.tight_layout()
     # Create data directory if it doesn't exist
     from pathlib import Path
-    data_dir = Path('data')
     data_dir.mkdir(exist_ok=True)
-    output_path = data_dir / 'pareto_frontier_analysis.png'
-    plt.savefig(output_path, dpi=300, bbox_inches='tight')
     plt.close()
     print(f"\n✓ Visualization saved to: {output_path}")
@@ -144,10 +144,10 @@ def print_pareto_summary(pareto_front: list[dict[str, Any]]):
     print(f"\nFound {len(pareto_front)} optimal (non-dominated) solutions:\n")
     for entry in pareto_front:
-        v = entry['version']
-        p = entry.get('parent')
-        desc = entry.get('description', 'Baseline')
-        e = entry['evaluation']
         print(f"SOP v{v} {f'(Child of v{p})' if p else '(Baseline)'}")
         print(f"  Description: {desc}")
@@ -176,7 +176,7 @@ def analyze_improvements(gene_pool_entries: list[dict[str, Any]]):
         return
     baseline = gene_pool_entries[0]
-    baseline_scores = np.array(baseline['evaluation'].to_vector())
     print("\n" + "=" * 80)
     print("IMPROVEMENT ANALYSIS")
@@ -187,7 +187,7 @@ def analyze_improvements(gene_pool_entries: list[dict[str, Any]]):
     improvements_found = False
     for entry in gene_pool_entries[1:]:
-        scores = np.array(entry['evaluation'].to_vector())
         avg_score = np.mean(scores)
         baseline_avg = np.mean(baseline_scores)
@@ -199,8 +199,13 @@ def analyze_improvements(gene_pool_entries: list[dict[str, Any]]):
             print(f"  Average Score: {avg_score:.3f} (+{improvement_pct:.1f}% vs baseline)")
             # Show per-metric improvements
-            metric_names = ['Clinical Accuracy', 'Evidence Grounding', 'Actionability',
-                          'Clarity', 'Safety & Completeness']
             for i, (name, score, baseline_score) in enumerate(zip(metric_names, scores, baseline_scores)):
                 diff = score - baseline_score
                 if abs(diff) > 0.01:  # Show significant changes

 import matplotlib
 import numpy as np
+matplotlib.use("Agg")  # Use non-interactive backend
 import matplotlib.pyplot as plt
 def identify_pareto_front(gene_pool_entries: list[dict[str, Any]]) -> list[dict[str, Any]]:
     """
     Identifies non-dominated solutions (Pareto Frontier).
     A solution is dominated if another solution is:
     - Better or equal on ALL metrics
     - Strictly better on AT LEAST ONE metric
         is_dominated = False
         # Get candidate's 5D score vector
+        cand_scores = np.array(candidate["evaluation"].to_vector())
         for j, other in enumerate(gene_pool_entries):
             if i == j:
                 continue
             # Get other solution's 5D vector
+            other_scores = np.array(other["evaluation"].to_vector())
             # Check domination: other >= candidate on ALL, other > candidate on SOME
             if np.all(other_scores >= cand_scores) and np.any(other_scores > cand_scores):
     # --- Plot 1: Bar Chart (since pandas might not be available) ---
     ax1 = plt.subplot(1, 2, 1)
+    metrics = ["Clinical\nAccuracy", "Evidence\nGrounding", "Actionability", "Clarity", "Safety"]
     x = np.arange(len(metrics))
     width = 0.8 / len(pareto_front)
     for idx, entry in enumerate(pareto_front):
+        e = entry["evaluation"]
         scores = [
             e.clinical_accuracy.score,
             e.evidence_grounding.score,
             e.actionability.score,
             e.clarity.score,
+            e.safety_completeness.score,
         ]
         offset = (idx - len(pareto_front) / 2) * width + width / 2
         label = f"SOP v{entry['version']}"
         ax1.bar(x + offset, scores, width, label=label, alpha=0.8)
+    ax1.set_xlabel("Metrics", fontsize=12)
+    ax1.set_ylabel("Score", fontsize=12)
+    ax1.set_title("5D Performance Comparison (Bar Chart)", fontsize=14)
     ax1.set_xticks(x)
     ax1.set_xticklabels(metrics, fontsize=10)
     ax1.set_ylim(0, 1.0)
+    ax1.legend(loc="upper left")
+    ax1.grid(True, alpha=0.3, axis="y")
     # --- Plot 2: Radar Chart ---
+    ax2 = plt.subplot(1, 2, 2, projection="polar")
+    categories = ["Clinical\nAccuracy", "Evidence\nGrounding", "Actionability", "Clarity", "Safety"]
     num_vars = len(categories)
     angles = np.linspace(0, 2 * np.pi, num_vars, endpoint=False).tolist()
     angles += angles[:1]
     for entry in pareto_front:
+        e = entry["evaluation"]
         values = [
             e.clinical_accuracy.score,
             e.evidence_grounding.score,
             e.actionability.score,
             e.clarity.score,
+            e.safety_completeness.score,
         ]
         values += values[:1]
+        desc = entry.get("description", "")[:30]
         label = f"SOP v{entry['version']}: {desc}"
+        ax2.plot(angles, values, "o-", linewidth=2, label=label)
         ax2.fill(angles, values, alpha=0.15)
     ax2.set_xticks(angles[:-1])
     ax2.set_xticklabels(categories, size=10)
     ax2.set_ylim(0, 1)
+    ax2.set_title("5D Performance Profiles (Radar Chart)", size=14, y=1.08)
+    ax2.legend(loc="upper left", bbox_to_anchor=(1.2, 1.0), fontsize=9)
     ax2.grid(True)
     plt.tight_layout()
     # Create data directory if it doesn't exist
     from pathlib import Path
+    data_dir = Path("data")
     data_dir.mkdir(exist_ok=True)
+    output_path = data_dir / "pareto_frontier_analysis.png"
+    plt.savefig(output_path, dpi=300, bbox_inches="tight")
     plt.close()
     print(f"\n✓ Visualization saved to: {output_path}")
     print(f"\nFound {len(pareto_front)} optimal (non-dominated) solutions:\n")
     for entry in pareto_front:
+        v = entry["version"]
+        p = entry.get("parent")
+        desc = entry.get("description", "Baseline")
+        e = entry["evaluation"]
         print(f"SOP v{v} {f'(Child of v{p})' if p else '(Baseline)'}")
         print(f"  Description: {desc}")
         return
     baseline = gene_pool_entries[0]
+    baseline_scores = np.array(baseline["evaluation"].to_vector())
     print("\n" + "=" * 80)
     print("IMPROVEMENT ANALYSIS")
     improvements_found = False
     for entry in gene_pool_entries[1:]:
+        scores = np.array(entry["evaluation"].to_vector())
         avg_score = np.mean(scores)
         baseline_avg = np.mean(baseline_scores)
             print(f"  Average Score: {avg_score:.3f} (+{improvement_pct:.1f}% vs baseline)")
             # Show per-metric improvements
+            metric_names = [
+                "Clinical Accuracy",
+                "Evidence Grounding",
+                "Actionability",
+                "Clarity",
+                "Safety & Completeness",
+            ]
             for i, (name, score, baseline_score) in enumerate(zip(metric_names, scores, baseline_scores)):
                 diff = score - baseline_score
                 if abs(diff) > 0.01:  # Show significant changes

archive/tests/test_evolution_loop.py CHANGED Viewed

@@ -51,35 +51,27 @@ def create_test_patient() -> PatientInput:
         "Sodium": 138.0,
         "Potassium": 4.2,
         "Chloride": 102.0,
-        "Bicarbonate": 24.0
     }
     model_prediction: dict[str, Any] = {
-        'disease': 'Type 2 Diabetes',
-        'confidence': 0.92,
-        'probabilities': {
-            'Type 2 Diabetes': 0.92,
-            'Prediabetes': 0.05,
-            'Healthy': 0.03
-        },
-        'prediction_timestamp': '2025-01-01T10:00:00'
     }
     patient_context = {
-        'patient_id': 'TEST-001',
-        'age': 55,
-        'gender': 'male',
-        'symptoms': ["Increased thirst", "Frequent urination", "Fatigue"],
-        'medical_history': ["Prediabetes diagnosed 2 years ago"],
-        'current_medications': ["Metformin 500mg"],
-        'query': "My blood sugar has been high lately. What should I do?"
     }
-    return PatientInput(
-        biomarkers=biomarkers,
-        model_prediction=model_prediction,
-        patient_context=patient_context
-    )
 def main():
@@ -101,36 +93,29 @@ def main():
     # Run workflow with baseline SOP
     initial_state: GuildState = {
-        'patient_biomarkers': patient.biomarkers,
-        'model_prediction': patient.model_prediction,
-        'patient_context': patient.patient_context,
-        'plan': None,
-        'sop': BASELINE_SOP,
-        'agent_outputs': [],
-        'biomarker_flags': [],
-        'safety_alerts': [],
-        'final_response': None,
-        'processing_timestamp': datetime.now().isoformat(),
-        'sop_version': "Baseline"
     }
     guild_state = guild.workflow.invoke(initial_state)
-    baseline_response = guild_state['final_response']
-    agent_outputs = guild_state['agent_outputs']
     baseline_eval = run_full_evaluation(
-        final_response=baseline_response,
-        agent_outputs=agent_outputs,
-        biomarkers=patient.biomarkers
     )
-    gene_pool.add(
-        sop=BASELINE_SOP,
-        evaluation=baseline_eval,
-        parent_version=None,
-        description="Baseline SOP"
-    )
     print(f"\n✓ Baseline Average Score: {baseline_eval.average_score():.3f}")
     print(f"  Clinical Accuracy:     {baseline_eval.clinical_accuracy.score:.3f}")
@@ -152,16 +137,11 @@ def main():
             # Create evaluation function for this cycle
             def eval_func(final_response, agent_outputs, biomarkers):
                 return run_full_evaluation(
-                    final_response=final_response,
-                    agent_outputs=agent_outputs,
-                    biomarkers=biomarkers
                 )
             new_entries = run_evolution_cycle(
-                gene_pool=gene_pool,
-                patient_input=patient,
-                workflow_graph=guild.workflow,
-                evaluation_func=eval_func
             )
             print(f"\n✓ Cycle {cycle} complete: Added {len(new_entries)} new SOPs to gene pool")
@@ -203,9 +183,9 @@ def main():
     print(f"✓ Pareto Optimal SOPs: {len(pareto_front)}")
     # Find best average score
-    best_sop = max(all_entries, key=lambda e: e['evaluation'].average_score())
     baseline_avg = baseline_eval.average_score()
-    best_avg = best_sop['evaluation'].average_score()
     improvement = ((best_avg - baseline_avg) / baseline_avg) * 100
     print(f"\nBest SOP: v{best_sop['version']} - {best_sop['description']}")

         "Sodium": 138.0,
         "Potassium": 4.2,
         "Chloride": 102.0,
+        "Bicarbonate": 24.0,
     }
     model_prediction: dict[str, Any] = {
+        "disease": "Type 2 Diabetes",
+        "confidence": 0.92,
+        "probabilities": {"Type 2 Diabetes": 0.92, "Prediabetes": 0.05, "Healthy": 0.03},
+        "prediction_timestamp": "2025-01-01T10:00:00",
     }
     patient_context = {
+        "patient_id": "TEST-001",
+        "age": 55,
+        "gender": "male",
+        "symptoms": ["Increased thirst", "Frequent urination", "Fatigue"],
+        "medical_history": ["Prediabetes diagnosed 2 years ago"],
+        "current_medications": ["Metformin 500mg"],
+        "query": "My blood sugar has been high lately. What should I do?",
     }
+    return PatientInput(biomarkers=biomarkers, model_prediction=model_prediction, patient_context=patient_context)
 def main():
     # Run workflow with baseline SOP
     initial_state: GuildState = {
+        "patient_biomarkers": patient.biomarkers,
+        "model_prediction": patient.model_prediction,
+        "patient_context": patient.patient_context,
+        "plan": None,
+        "sop": BASELINE_SOP,
+        "agent_outputs": [],
+        "biomarker_flags": [],
+        "safety_alerts": [],
+        "final_response": None,
+        "processing_timestamp": datetime.now().isoformat(),
+        "sop_version": "Baseline",
     }
     guild_state = guild.workflow.invoke(initial_state)
+    baseline_response = guild_state["final_response"]
+    agent_outputs = guild_state["agent_outputs"]
     baseline_eval = run_full_evaluation(
+        final_response=baseline_response, agent_outputs=agent_outputs, biomarkers=patient.biomarkers
     )
+    gene_pool.add(sop=BASELINE_SOP, evaluation=baseline_eval, parent_version=None, description="Baseline SOP")
     print(f"\n✓ Baseline Average Score: {baseline_eval.average_score():.3f}")
     print(f"  Clinical Accuracy:     {baseline_eval.clinical_accuracy.score:.3f}")
             # Create evaluation function for this cycle
             def eval_func(final_response, agent_outputs, biomarkers):
                 return run_full_evaluation(
+                    final_response=final_response, agent_outputs=agent_outputs, biomarkers=biomarkers
                 )
             new_entries = run_evolution_cycle(
+                gene_pool=gene_pool, patient_input=patient, workflow_graph=guild.workflow, evaluation_func=eval_func
             )
             print(f"\n✓ Cycle {cycle} complete: Added {len(new_entries)} new SOPs to gene pool")
     print(f"✓ Pareto Optimal SOPs: {len(pareto_front)}")
     # Find best average score
+    best_sop = max(all_entries, key=lambda e: e["evaluation"].average_score())
     baseline_avg = baseline_eval.average_score()
+    best_avg = best_sop["evaluation"].average_score()
     improvement = ((best_avg - baseline_avg) / baseline_avg) * 100
     print(f"\nBest SOP: v{best_sop['version']} - {best_sop['description']}")

archive/tests/test_evolution_quick.py CHANGED Viewed

@@ -29,15 +29,10 @@ def main():
         evidence_grounding=GradedScore(score=1.0, reasoning="Well cited"),
         actionability=GradedScore(score=0.90, reasoning="Clear actions"),
         clarity=GradedScore(score=0.75, reasoning="Could be clearer"),
-        safety_completeness=GradedScore(score=1.0, reasoning="Complete")
     )
-    gene_pool.add(
-        sop=BASELINE_SOP,
-        evaluation=baseline_eval,
-        parent_version=None,
-        description="Baseline SOP"
-    )
     print("✓ Gene pool initialized with 1 SOP")
     print(f"  Average score: {baseline_eval.average_score():.3f}")

         evidence_grounding=GradedScore(score=1.0, reasoning="Well cited"),
         actionability=GradedScore(score=0.90, reasoning="Clear actions"),
         clarity=GradedScore(score=0.75, reasoning="Could be clearer"),
+        safety_completeness=GradedScore(score=1.0, reasoning="Complete"),
     )
+    gene_pool.add(sop=BASELINE_SOP, evaluation=baseline_eval, parent_version=None, description="Baseline SOP")
     print("✓ Gene pool initialized with 1 SOP")
     print(f"  Average score: {baseline_eval.average_score():.3f}")

conftest.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # Empty conftest to add the root project directory to pytest's sys.path

huggingface/app.py CHANGED Viewed

@@ -11,16 +11,16 @@ Environment Variables (HuggingFace Secrets):
   Required (pick one):
     - GROQ_API_KEY: Groq API key (recommended, free)
     - GOOGLE_API_KEY: Google Gemini API key (free)
   Optional - LLM Configuration:
     - LLM_PROVIDER: "groq" or "gemini" (auto-detected from keys)
     - GROQ_MODEL: Model name (default: llama-3.3-70b-versatile)
     - GEMINI_MODEL: Model name (default: gemini-2.0-flash)
   Optional - Embeddings:
     - EMBEDDING_PROVIDER: "jina", "google", or "huggingface" (default: huggingface)
     - JINA_API_KEY: Jina AI API key for high-quality embeddings
   Optional - Observability:
     - LANGFUSE_ENABLED: "true" to enable tracing
     - LANGFUSE_PUBLIC_KEY: Langfuse public key
@@ -57,6 +57,7 @@ logger = logging.getLogger("mediguard.huggingface")
 # Configuration - Environment Variable Helpers
 # ---------------------------------------------------------------------------
 def _get_env(primary: str, *fallbacks, default: str = "") -> str:
     """Get env var with multiple fallback names for compatibility."""
     value = os.getenv(primary)
@@ -71,7 +72,7 @@ def _get_env(primary: str, *fallbacks, default: str = "") -> str:
 def get_api_keys():
     """Get API keys dynamically (HuggingFace injects secrets after module load).
     Supports both simple and nested naming conventions:
     - GROQ_API_KEY / LLM__GROQ_API_KEY
     - GOOGLE_API_KEY / LLM__GOOGLE_API_KEY
@@ -109,7 +110,7 @@ def is_langfuse_enabled() -> bool:
 def setup_llm_provider():
     """Set up LLM provider and related configuration based on available keys.
     Sets environment variables for the entire application to use.
     """
     groq_key, google_key = get_api_keys()
@@ -164,9 +165,7 @@ logger.info(f"EMBEDDING_PROVIDER: {get_embedding_provider()}")
 logger.info(f"LANGFUSE: {'✓ enabled' if is_langfuse_enabled() else '✗ disabled'}")
 if not _groq and not _google:
-    logger.warning(
-        "No LLM API key found at startup. Will check again when analyzing."
-    )
 else:
     logger.info("LLM API key available — ready for analysis")
 logger.info("=" * 60)
@@ -218,6 +217,7 @@ def get_guild():
         start = time.time()
         from src.workflow import create_guild
         _guild = create_guild()
         _guild_provider = current_provider
@@ -254,22 +254,29 @@ def auto_predict(biomarkers: dict[str, float]) -> dict[str, Any]:
 def analyze_biomarkers(input_text: str, progress=gr.Progress()) -> tuple[str, str, str]:
     """
     Analyze biomarkers using the Clinical Insight Guild.
     Returns: (summary, details_json, status)
     """
     if not input_text.strip():
-        return "", "", """
 <div style="background: linear-gradient(135deg, #f0f4f8 0%, #e2e8f0 100%); border: 1px solid #cbd5e1; border-radius: 10px; padding: 16px; text-align: center;">
     <span style="font-size: 2em;">✍️</span>
     <p style="margin: 8px 0 0 0; color: #64748b;">Please enter biomarkers to analyze.</p>
 </div>
-        """
     # Check API key dynamically (HF injects secrets after startup)
     groq_key, google_key = get_api_keys()
     if not groq_key and not google_key:
-        return "", "", """
 <div style="background: linear-gradient(135deg, #fee2e2 0%, #fecaca 100%); border: 1px solid #ef4444; border-radius: 10px; padding: 16px;">
     <strong style="color: #dc2626;">❌ No API Key Configured</strong>
     <p style="margin: 12px 0 8px 0; color: #991b1b;">Please add your API key in Space Settings → Secrets:</p>
@@ -293,7 +300,8 @@ def analyze_biomarkers(input_text: str, progress=gr.Progress()) -> tuple[str, st
         </ul>
     </details>
 </div>
-        """
     # Setup provider based on available key
     provider = setup_llm_provider()
@@ -304,7 +312,10 @@ def analyze_biomarkers(input_text: str, progress=gr.Progress()) -> tuple[str, st
         biomarkers = parse_biomarkers(input_text)
         if not biomarkers:
-            return "", "", """
 <div style="background: linear-gradient(135deg, #fef3c7 0%, #fde68a 100%); border: 1px solid #fbbf24; border-radius: 10px; padding: 16px;">
     <strong>⚠️ Could not parse biomarkers</strong>
     <p style="margin: 8px 0 0 0; color: #92400e;">Try formats like:</p>
@@ -313,7 +324,8 @@ def analyze_biomarkers(input_text: str, progress=gr.Progress()) -> tuple[str, st
         <li><code>{"Glucose": 140, "HbA1c": 7.5}</code></li>
     </ul>
 </div>
-            """
         progress(0.2, desc="🔧 Initializing AI agents...")
@@ -329,7 +341,7 @@ def analyze_biomarkers(input_text: str, progress=gr.Progress()) -> tuple[str, st
         patient_input = PatientInput(
             biomarkers=biomarkers,
             model_prediction=prediction,
-            patient_context={"patient_id": "HF_User", "source": "huggingface_spaces"}
         )
         progress(0.4, desc="🤖 Running Clinical Insight Guild...")
@@ -395,7 +407,7 @@ def format_summary(response: dict, elapsed: float) -> str:
         "critical": ("🔴", "#dc2626", "#fef2f2"),
         "high": ("🟠", "#ea580c", "#fff7ed"),
         "moderate": ("🟡", "#ca8a04", "#fefce8"),
-        "low": ("🟢", "#16a34a", "#f0fdf4")
     }
     emoji, color, bg_color = severity_config.get(severity, severity_config["low"])
@@ -421,9 +433,11 @@ def format_summary(response: dict, elapsed: float) -> str:
         alert_items = ""
         for alert in alerts[:5]:
             if isinstance(alert, dict):
-                alert_items += f'<li><strong>{alert.get("alert_type", "Alert")}:</strong> {alert.get("message", "")}</li>'
             else:
-                alert_items += f'<li>{alert}</li>'
         parts.append(f"""
 <div style="background: linear-gradient(135deg, #fef2f2 0%, #fee2e2 100%); border: 1px solid #fecaca; border-radius: 12px; padding: 16px; margin-bottom: 16px;">
@@ -463,7 +477,7 @@ def format_summary(response: dict, elapsed: float) -> str:
                     "high": ("🔴", "#dc2626", "#fef2f2"),
                     "abnormal": ("🟡", "#ca8a04", "#fefce8"),
                     "low": ("🟡", "#ca8a04", "#fefce8"),
-                    "normal": ("🟢", "#16a34a", "#f0fdf4")
                 }
                 s_emoji, s_color, s_bg = status_styles.get(status, status_styles["normal"])
@@ -549,7 +563,7 @@ def format_summary(response: dict, elapsed: float) -> str:
             parts.append(f"""
 <div style="background: #f8fafc; border-radius: 12px; padding: 16px; margin-bottom: 16px;">
     <h4 style="margin: 0 0 12px 0; color: #1e3a5f;">📖 Understanding Your Results</h4>
-    <p style="margin: 0; color: #475569; line-height: 1.6;">{pathophys[:600]}{'...' if len(pathophys) > 600 else ''}</p>
 </div>
             """)
@@ -659,14 +673,10 @@ Question: {question}
 Answer:"""
     response = llm.invoke(prompt)
-    return response.content if hasattr(response, 'content') else str(response)
-def answer_medical_question(
-    question: str,
-    context: str = "",
-    chat_history: list = None
-) -> tuple[str, list]:
     """Answer a medical question using the full agentic RAG pipeline.
     Pipeline: guardrail → retrieve → grade → rewrite → generate.
@@ -819,6 +829,7 @@ def hf_search(query: str, mode: str):
         return "Please enter a query."
     try:
         from src.services.retrieval.factory import make_retriever
         retriever = make_retriever()
         docs = retriever.retrieve(query, top_k=5)
         if not docs:
@@ -826,7 +837,7 @@ def hf_search(query: str, mode: str):
         parts = []
         for i, doc in enumerate(docs, 1):
             title = doc.metadata.get("title", doc.metadata.get("source_file", "Untitled"))
-            score = doc.score if hasattr(doc, 'score') else 0.0
             parts.append(f"**[{i}] {title}** (score: {score:.3f})\n{doc.content}\n")
         return "\n---\n".join(parts)
     except Exception as exc:
@@ -1095,7 +1106,6 @@ def create_demo() -> gr.Blocks:
         ),
         css=CUSTOM_CSS,
     ) as demo:
         # ===== HEADER =====
         gr.HTML("""
         <div class="header-container">
@@ -1129,13 +1139,10 @@ def create_demo() -> gr.Blocks:
         # ===== MAIN TABS =====
         with gr.Tabs() as main_tabs:
             # ==================== TAB 1: BIOMARKER ANALYSIS ====================
             with gr.Tab("🔬 Biomarker Analysis", id="biomarker-tab"):
                 # ===== MAIN CONTENT =====
                 with gr.Row(equal_height=False):
                     # ----- LEFT PANEL: INPUT -----
                     with gr.Column(scale=2, min_width=400):
                         gr.HTML('<div class="section-title">📝 Enter Your Biomarkers</div>')
@@ -1143,7 +1150,7 @@ def create_demo() -> gr.Blocks:
                         with gr.Group():
                             input_text = gr.Textbox(
                                 label="",
-                                placeholder="Enter biomarkers in any format:\n\n• Glucose: 140, HbA1c: 7.5, Cholesterol: 210\n• My glucose is 140 and HbA1c is 7.5\n• {\"Glucose\": 140, \"HbA1c\": 7.5}",
                                 lines=6,
                                 max_lines=12,
                                 show_label=False,
@@ -1164,14 +1171,13 @@ def create_demo() -> gr.Blocks:
                                 )
                         # Status display
-                        status_output = gr.Markdown(
-                            value="",
-                            elem_classes="status-box"
-                        )
                         # Quick Examples
                         gr.HTML('<div class="section-title" style="margin-top: 24px;">⚡ Quick Examples</div>')
-                        gr.HTML('<p style="color: #64748b; font-size: 0.9em; margin-bottom: 12px;">Click any example to load it instantly</p>')
                         examples = gr.Examples(
                             examples=[
@@ -1230,7 +1236,7 @@ def create_demo() -> gr.Blocks:
     <p>Enter your biomarkers on the left and click <strong>Analyze</strong> to get your personalized health insights.</p>
 </div>
                                     """,
-                                    elem_classes="summary-output"
                                 )
                             with gr.Tab("🔍 Detailed JSON", id="json"):
@@ -1243,7 +1249,6 @@ def create_demo() -> gr.Blocks:
             # ==================== TAB 2: MEDICAL Q&A ====================
             with gr.Tab("💬 Medical Q&A", id="qa-tab"):
                 gr.HTML("""
                 <div style="margin-bottom: 20px;">
                     <h3 style="color: #1e3a5f; margin: 0 0 8px 0;">💬 Medical Q&A Assistant</h3>
@@ -1264,7 +1269,7 @@ def create_demo() -> gr.Blocks:
                         qa_model = gr.Dropdown(
                             choices=["llama-3.3-70b-versatile", "gemini-2.0-flash", "llama3.1:8b"],
                             value="llama-3.3-70b-versatile",
-                            label="LLM Provider/Model"
                         )
                         qa_question = gr.Textbox(
                             label="Your Question",
@@ -1301,11 +1306,7 @@ def create_demo() -> gr.Blocks:
                     with gr.Column(scale=2):
                         gr.HTML('<h4 style="color: #1e3a5f; margin-bottom: 12px;">📝 Answer</h4>')
-                        qa_answer = gr.Chatbot(
-                            label="Medical Q&A History",
-                            height=600,
-                            elem_classes="qa-output"
-                        )
                 # Q&A Event Handlers
                 qa_submit_btn.click(
@@ -1313,10 +1314,7 @@ def create_demo() -> gr.Blocks:
                     inputs=[qa_question, qa_context, qa_answer, qa_model],
                     outputs=qa_answer,
                     show_progress="minimal",
-                ).then(
-                    fn=lambda: "",
-                    outputs=qa_question
-                )
                 qa_clear_btn.click(
                     fn=lambda: ([], ""),
@@ -1327,16 +1325,10 @@ def create_demo() -> gr.Blocks:
             with gr.Tab("🔍 Search Knowledge Base", id="search-tab"):
                 with gr.Row():
                     search_input = gr.Textbox(
-                        label="Search Query",
-                        placeholder="e.g., diabetes management guidelines",
-                        lines=2,
-                        scale=3
                     )
                     search_mode = gr.Radio(
-                        choices=["hybrid", "bm25", "vector"],
-                        value="hybrid",
-                        label="Search Strategy",
-                        scale=1
                     )
                 search_btn = gr.Button("Search", variant="primary")
                 search_output = gr.Textbox(label="Results", lines=20, interactive=False)
@@ -1409,13 +1401,18 @@ def create_demo() -> gr.Blocks:
         )
         clear_btn.click(
-            fn=lambda: ("", """
 <div style="text-align: center; padding: 60px 20px; color: #94a3b8;">
     <div style="font-size: 4em; margin-bottom: 16px;">🔬</div>
     <h3 style="color: #64748b; font-weight: 500;">Ready to Analyze</h3>
     <p>Enter your biomarkers on the left and click <strong>Analyze</strong> to get your personalized health insights.</p>
 </div>
-            """, "", ""),
             outputs=[input_text, summary_output, details_output, status_output],
         )

   Required (pick one):
     - GROQ_API_KEY: Groq API key (recommended, free)
     - GOOGLE_API_KEY: Google Gemini API key (free)
   Optional - LLM Configuration:
     - LLM_PROVIDER: "groq" or "gemini" (auto-detected from keys)
     - GROQ_MODEL: Model name (default: llama-3.3-70b-versatile)
     - GEMINI_MODEL: Model name (default: gemini-2.0-flash)
   Optional - Embeddings:
     - EMBEDDING_PROVIDER: "jina", "google", or "huggingface" (default: huggingface)
     - JINA_API_KEY: Jina AI API key for high-quality embeddings
   Optional - Observability:
     - LANGFUSE_ENABLED: "true" to enable tracing
     - LANGFUSE_PUBLIC_KEY: Langfuse public key
 # Configuration - Environment Variable Helpers
 # ---------------------------------------------------------------------------
 def _get_env(primary: str, *fallbacks, default: str = "") -> str:
     """Get env var with multiple fallback names for compatibility."""
     value = os.getenv(primary)
 def get_api_keys():
     """Get API keys dynamically (HuggingFace injects secrets after module load).
     Supports both simple and nested naming conventions:
     - GROQ_API_KEY / LLM__GROQ_API_KEY
     - GOOGLE_API_KEY / LLM__GOOGLE_API_KEY
 def setup_llm_provider():
     """Set up LLM provider and related configuration based on available keys.
     Sets environment variables for the entire application to use.
     """
     groq_key, google_key = get_api_keys()
 logger.info(f"LANGFUSE: {'✓ enabled' if is_langfuse_enabled() else '✗ disabled'}")
 if not _groq and not _google:
+    logger.warning("No LLM API key found at startup. Will check again when analyzing.")
 else:
     logger.info("LLM API key available — ready for analysis")
 logger.info("=" * 60)
         start = time.time()
         from src.workflow import create_guild
         _guild = create_guild()
         _guild_provider = current_provider
 def analyze_biomarkers(input_text: str, progress=gr.Progress()) -> tuple[str, str, str]:
     """
     Analyze biomarkers using the Clinical Insight Guild.
     Returns: (summary, details_json, status)
     """
     if not input_text.strip():
+        return (
+            "",
+            "",
+            """
 <div style="background: linear-gradient(135deg, #f0f4f8 0%, #e2e8f0 100%); border: 1px solid #cbd5e1; border-radius: 10px; padding: 16px; text-align: center;">
     <span style="font-size: 2em;">✍️</span>
     <p style="margin: 8px 0 0 0; color: #64748b;">Please enter biomarkers to analyze.</p>
 </div>
+        """,
+        )
     # Check API key dynamically (HF injects secrets after startup)
     groq_key, google_key = get_api_keys()
     if not groq_key and not google_key:
+        return (
+            "",
+            "",
+            """
 <div style="background: linear-gradient(135deg, #fee2e2 0%, #fecaca 100%); border: 1px solid #ef4444; border-radius: 10px; padding: 16px;">
     <strong style="color: #dc2626;">❌ No API Key Configured</strong>
     <p style="margin: 12px 0 8px 0; color: #991b1b;">Please add your API key in Space Settings → Secrets:</p>
         </ul>
     </details>
 </div>
+        """,
+        )
     # Setup provider based on available key
     provider = setup_llm_provider()
         biomarkers = parse_biomarkers(input_text)
         if not biomarkers:
+            return (
+                "",
+                "",
+                """
 <div style="background: linear-gradient(135deg, #fef3c7 0%, #fde68a 100%); border: 1px solid #fbbf24; border-radius: 10px; padding: 16px;">
     <strong>⚠️ Could not parse biomarkers</strong>
     <p style="margin: 8px 0 0 0; color: #92400e;">Try formats like:</p>
         <li><code>{"Glucose": 140, "HbA1c": 7.5}</code></li>
     </ul>
 </div>
+            """,
+            )
         progress(0.2, desc="🔧 Initializing AI agents...")
         patient_input = PatientInput(
             biomarkers=biomarkers,
             model_prediction=prediction,
+            patient_context={"patient_id": "HF_User", "source": "huggingface_spaces"},
         )
         progress(0.4, desc="🤖 Running Clinical Insight Guild...")
         "critical": ("🔴", "#dc2626", "#fef2f2"),
         "high": ("🟠", "#ea580c", "#fff7ed"),
         "moderate": ("🟡", "#ca8a04", "#fefce8"),
+        "low": ("🟢", "#16a34a", "#f0fdf4"),
     }
     emoji, color, bg_color = severity_config.get(severity, severity_config["low"])
         alert_items = ""
         for alert in alerts[:5]:
             if isinstance(alert, dict):
+                alert_items += (
+                    f"<li><strong>{alert.get('alert_type', 'Alert')}:</strong> {alert.get('message', '')}</li>"
+                )
             else:
+                alert_items += f"<li>{alert}</li>"
         parts.append(f"""
 <div style="background: linear-gradient(135deg, #fef2f2 0%, #fee2e2 100%); border: 1px solid #fecaca; border-radius: 12px; padding: 16px; margin-bottom: 16px;">
                     "high": ("🔴", "#dc2626", "#fef2f2"),
                     "abnormal": ("🟡", "#ca8a04", "#fefce8"),
                     "low": ("🟡", "#ca8a04", "#fefce8"),
+                    "normal": ("🟢", "#16a34a", "#f0fdf4"),
                 }
                 s_emoji, s_color, s_bg = status_styles.get(status, status_styles["normal"])
             parts.append(f"""
 <div style="background: #f8fafc; border-radius: 12px; padding: 16px; margin-bottom: 16px;">
     <h4 style="margin: 0 0 12px 0; color: #1e3a5f;">📖 Understanding Your Results</h4>
+    <p style="margin: 0; color: #475569; line-height: 1.6;">{pathophys[:600]}{"..." if len(pathophys) > 600 else ""}</p>
 </div>
             """)
 Answer:"""
     response = llm.invoke(prompt)
+    return response.content if hasattr(response, "content") else str(response)
+def answer_medical_question(question: str, context: str = "", chat_history: list | None = None) -> tuple[str, list]:
     """Answer a medical question using the full agentic RAG pipeline.
     Pipeline: guardrail → retrieve → grade → rewrite → generate.
         return "Please enter a query."
     try:
         from src.services.retrieval.factory import make_retriever
         retriever = make_retriever()
         docs = retriever.retrieve(query, top_k=5)
         if not docs:
         parts = []
         for i, doc in enumerate(docs, 1):
             title = doc.metadata.get("title", doc.metadata.get("source_file", "Untitled"))
+            score = doc.score if hasattr(doc, "score") else 0.0
             parts.append(f"**[{i}] {title}** (score: {score:.3f})\n{doc.content}\n")
         return "\n---\n".join(parts)
     except Exception as exc:
         ),
         css=CUSTOM_CSS,
     ) as demo:
         # ===== HEADER =====
         gr.HTML("""
         <div class="header-container">
         # ===== MAIN TABS =====
         with gr.Tabs() as main_tabs:
             # ==================== TAB 1: BIOMARKER ANALYSIS ====================
             with gr.Tab("🔬 Biomarker Analysis", id="biomarker-tab"):
                 # ===== MAIN CONTENT =====
                 with gr.Row(equal_height=False):
                     # ----- LEFT PANEL: INPUT -----
                     with gr.Column(scale=2, min_width=400):
                         gr.HTML('<div class="section-title">📝 Enter Your Biomarkers</div>')
                         with gr.Group():
                             input_text = gr.Textbox(
                                 label="",
+                                placeholder='Enter biomarkers in any format:\n\n• Glucose: 140, HbA1c: 7.5, Cholesterol: 210\n• My glucose is 140 and HbA1c is 7.5\n• {"Glucose": 140, "HbA1c": 7.5}',
                                 lines=6,
                                 max_lines=12,
                                 show_label=False,
                                 )
                         # Status display
+                        status_output = gr.Markdown(value="", elem_classes="status-box")
                         # Quick Examples
                         gr.HTML('<div class="section-title" style="margin-top: 24px;">⚡ Quick Examples</div>')
+                        gr.HTML(
+                            '<p style="color: #64748b; font-size: 0.9em; margin-bottom: 12px;">Click any example to load it instantly</p>'
+                        )
                         examples = gr.Examples(
                             examples=[
     <p>Enter your biomarkers on the left and click <strong>Analyze</strong> to get your personalized health insights.</p>
 </div>
                                     """,
+                                    elem_classes="summary-output",
                                 )
                             with gr.Tab("🔍 Detailed JSON", id="json"):
             # ==================== TAB 2: MEDICAL Q&A ====================
             with gr.Tab("💬 Medical Q&A", id="qa-tab"):
                 gr.HTML("""
                 <div style="margin-bottom: 20px;">
                     <h3 style="color: #1e3a5f; margin: 0 0 8px 0;">💬 Medical Q&A Assistant</h3>
                         qa_model = gr.Dropdown(
                             choices=["llama-3.3-70b-versatile", "gemini-2.0-flash", "llama3.1:8b"],
                             value="llama-3.3-70b-versatile",
+                            label="LLM Provider/Model",
                         )
                         qa_question = gr.Textbox(
                             label="Your Question",
                     with gr.Column(scale=2):
                         gr.HTML('<h4 style="color: #1e3a5f; margin-bottom: 12px;">📝 Answer</h4>')
+                        qa_answer = gr.Chatbot(label="Medical Q&A History", height=600, elem_classes="qa-output")
                 # Q&A Event Handlers
                 qa_submit_btn.click(
                     inputs=[qa_question, qa_context, qa_answer, qa_model],
                     outputs=qa_answer,
                     show_progress="minimal",
+                ).then(fn=lambda: "", outputs=qa_question)
                 qa_clear_btn.click(
                     fn=lambda: ([], ""),
             with gr.Tab("🔍 Search Knowledge Base", id="search-tab"):
                 with gr.Row():
                     search_input = gr.Textbox(
+                        label="Search Query", placeholder="e.g., diabetes management guidelines", lines=2, scale=3
                     )
                     search_mode = gr.Radio(
+                        choices=["hybrid", "bm25", "vector"], value="hybrid", label="Search Strategy", scale=1
                     )
                 search_btn = gr.Button("Search", variant="primary")
                 search_output = gr.Textbox(label="Results", lines=20, interactive=False)
         )
         clear_btn.click(
+            fn=lambda: (
+                "",
+                """
 <div style="text-align: center; padding: 60px 20px; color: #94a3b8;">
     <div style="font-size: 4em; margin-bottom: 16px;">🔬</div>
     <h3 style="color: #64748b; font-weight: 500;">Ready to Analyze</h3>
     <p>Enter your biomarkers on the left and click <strong>Analyze</strong> to get your personalized health insights.</p>
 </div>
+            """,
+                "",
+                "",
+            ),
             outputs=[input_text, summary_output, details_output, status_output],
         )

pytest.ini CHANGED Viewed

@@ -5,3 +5,5 @@ filterwarnings =
 markers =
     integration: mark a test as an integration test.

 markers =
     integration: mark a test as an integration test.
+testpaths = tests

scripts/chat.py CHANGED Viewed

@@ -26,15 +26,16 @@ from pathlib import Path
 from typing import Any
 # Set UTF-8 encoding for Windows console
-if sys.platform == 'win32':
     try:
-        sys.stdout.reconfigure(encoding='utf-8')
-        sys.stderr.reconfigure(encoding='utf-8')
     except Exception:
         import codecs
-        sys.stdout = codecs.getwriter('utf-8')(sys.stdout.buffer, 'strict')
-        sys.stderr = codecs.getwriter('utf-8')(sys.stderr.buffer, 'strict')
-    os.system('chcp 65001 > nul 2>&1')
 # Add parent directory to path for imports
 sys.path.insert(0, str(Path(__file__).parent.parent))
@@ -82,6 +83,7 @@ If you cannot find any biomarkers, return {{"biomarkers": {{}}, "patient_context
 # Component 1: Biomarker Extraction
 # ============================================================================
 def _parse_llm_json(content: str) -> dict[str, Any]:
     """Parse JSON payload from LLM output with fallback recovery."""
     text = content.strip()
@@ -97,14 +99,14 @@ def _parse_llm_json(content: str) -> dict[str, Any]:
         left = text.find("{")
         right = text.rfind("}")
         if left != -1 and right != -1 and right > left:
-            return json.loads(text[left:right + 1])
         raise
 def extract_biomarkers(user_message: str) -> tuple[dict[str, float], dict[str, Any]]:
     """
     Extract biomarker values from natural language using LLM.
     Returns:
         Tuple of (biomarkers_dict, patient_context_dict)
     """
@@ -140,6 +142,7 @@ def extract_biomarkers(user_message: str) -> tuple[dict[str, float], dict[str, A
     except Exception as e:
         print(f"⚠️ Extraction failed: {e}")
         import traceback
         traceback.print_exc()
         return {}, {}
@@ -148,17 +151,12 @@ def extract_biomarkers(user_message: str) -> tuple[dict[str, float], dict[str, A
 # Component 2: Disease Prediction
 # ============================================================================
 def predict_disease_simple(biomarkers: dict[str, float]) -> dict[str, Any]:
     """
     Simple rule-based disease prediction based on key biomarkers.
     """
-    scores = {
-        "Diabetes": 0.0,
-        "Anemia": 0.0,
-        "Heart Disease": 0.0,
-        "Thrombocytopenia": 0.0,
-        "Thalassemia": 0.0
-    }
     # Helper: check both abbreviated and normalized biomarker names
     # Returns None when biomarker is not present (avoids false triggers)
@@ -228,11 +226,7 @@ def predict_disease_simple(biomarkers: dict[str, float]) -> dict[str, Any]:
     else:
         probabilities = {k: 1.0 / len(scores) for k in scores}
-    return {
-        "disease": top_disease,
-        "confidence": confidence,
-        "probabilities": probabilities
-    }
 def predict_disease_llm(biomarkers: dict[str, float], patient_context: dict) -> dict[str, Any]:
@@ -280,6 +274,7 @@ Return ONLY valid JSON (no other text):
     except Exception as e:
         print(f"⚠️ LLM prediction failed ({e}), using rule-based fallback")
         import traceback
         traceback.print_exc()
         return predict_disease_simple(biomarkers)
@@ -288,6 +283,7 @@ Return ONLY valid JSON (no other text):
 # Component 3: Conversational Formatter
 # ============================================================================
 def _coerce_to_dict(obj) -> dict:
     """Convert a Pydantic model or arbitrary object to a plain dict."""
     if isinstance(obj, dict):
@@ -379,6 +375,7 @@ def format_conversational(result: dict[str, Any], user_name: str = "there") -> s
 # Component 4: Helper Functions
 # ============================================================================
 def print_biomarker_help():
     """Print list of supported biomarkers"""
     print("\n📋 Supported Biomarkers (24 total):")
@@ -409,7 +406,7 @@ def run_example_case(guild):
         "Platelets": 220000,
         "White Blood Cells": 7500,
         "Systolic Blood Pressure": 145,
-        "Diastolic Blood Pressure": 92
     }
     prediction = {
@@ -420,25 +417,25 @@ def run_example_case(guild):
             "Heart Disease": 0.08,
             "Anemia": 0.03,
             "Thrombocytopenia": 0.01,
-            "Thalassemia": 0.01
-        }
     }
     patient_input = PatientInput(
         biomarkers=example_biomarkers,
         model_prediction=prediction,
-        patient_context={"age": 52, "gender": "male", "bmi": 31.2}
     )
     print("🔄 Running analysis...\n")
     result = guild.run(patient_input)
     response = format_conversational(result.get("final_response", result), "there")
-    print("\n" + "="*70)
     print("🤖 RAG-BOT:")
-    print("="*70)
     print(response)
-    print("="*70 + "\n")
 def save_report(result: dict, biomarkers: dict):
@@ -447,11 +444,10 @@ def save_report(result: dict, biomarkers: dict):
     # final_response is already a plain dict built by the synthesizer
     final = result.get("final_response") or {}
-    disease = (
-        final.get("prediction_explanation", {}).get("primary_disease")
-        or result.get("model_prediction", {}).get("disease", "unknown")
     )
-    disease_safe = disease.replace(' ', '_').replace('/', '_')
     filename = f"report_{disease_safe}_{timestamp}.json"
     output_dir = Path("data/chat_reports")
@@ -465,9 +461,9 @@ def save_report(result: dict, biomarkers: dict):
             return {k: _to_dict(v) for k, v in obj.items()}
         if isinstance(obj, list):
             return [_to_dict(i) for i in obj]
-        if hasattr(obj, "model_dump"):          # Pydantic v2
             return _to_dict(obj.model_dump())
-        if hasattr(obj, "dict"):                # Pydantic v1
             return _to_dict(obj.dict())
         # Scalars and other primitives are returned as-is
         return obj
@@ -480,7 +476,7 @@ def save_report(result: dict, biomarkers: dict):
         "safety_alerts": _to_dict(result.get("safety_alerts", [])),
     }
-    with open(filepath, 'w') as f:
         json.dump(report, f, indent=2)
     print(f"✅ Report saved to: {filepath}\n")
@@ -490,21 +486,22 @@ def save_report(result: dict, biomarkers: dict):
 # Main Chat Interface
 # ============================================================================
 def chat_interface():
     """
     Main interactive CLI chatbot for MediGuard AI RAG-Helper.
     """
     # Print welcome banner
-    print("\n" + "="*70)
     print("🤖 MediGuard AI RAG-Helper - Interactive Chat")
-    print("="*70)
     print("\nWelcome! I can help you understand your blood test results.\n")
     print("You can:")
     print("  1. Describe your biomarkers (e.g., 'My glucose is 140, HbA1c is 7.5')")
     print("  2. Type 'example' to see a sample diabetes case")
     print("  3. Type 'help' for biomarker list")
     print("  4. Type 'quit' to exit\n")
-    print("="*70 + "\n")
     # Initialize guild (one-time setup)
     print("🔧 Initializing medical knowledge system...")
@@ -532,15 +529,15 @@ def chat_interface():
                 continue
             # Handle special commands
-            if user_input.lower() in ['quit', 'exit', 'q']:
                 print("\n👋 Thank you for using MediGuard AI. Stay healthy!")
                 break
-            if user_input.lower() == 'help':
                 print_biomarker_help()
                 continue
-            if user_input.lower() == 'example':
                 run_example_case(guild)
                 continue
@@ -571,7 +568,7 @@ def chat_interface():
             patient_input = PatientInput(
                 biomarkers=biomarkers,
                 model_prediction=prediction,
-                patient_context=patient_context if patient_context else {"source": "chat"}
             )
             # Run full RAG workflow
@@ -584,23 +581,20 @@ def chat_interface():
             response = format_conversational(result.get("final_response", result), user_name)
             # Display response
-            print("\n" + "="*70)
             print("🤖 RAG-BOT:")
-            print("="*70)
             print(response)
-            print("="*70 + "\n")
             # Save to history
-            conversation_history.append({
-                "user_input": user_input,
-                "biomarkers": biomarkers,
-                "prediction": prediction,
-                "result": result
-            })
             # Ask if user wants to save report
             save_choice = input("💾 Save detailed report to file? (y/n): ").strip().lower()
-            if save_choice == 'y':
                 save_report(result, biomarkers)
             print("\nYou can:")
@@ -612,6 +606,7 @@ def chat_interface():
             break
         except Exception as e:
             import traceback
             traceback.print_exc()
             print(f"\n❌ Analysis failed: {e}")
             print("\nThis might be due to:")

 from typing import Any
 # Set UTF-8 encoding for Windows console
+if sys.platform == "win32":
     try:
+        sys.stdout.reconfigure(encoding="utf-8")
+        sys.stderr.reconfigure(encoding="utf-8")
     except Exception:
         import codecs
+        sys.stdout = codecs.getwriter("utf-8")(sys.stdout.buffer, "strict")
+        sys.stderr = codecs.getwriter("utf-8")(sys.stderr.buffer, "strict")
+    os.system("chcp 65001 > nul 2>&1")
 # Add parent directory to path for imports
 sys.path.insert(0, str(Path(__file__).parent.parent))
 # Component 1: Biomarker Extraction
 # ============================================================================
 def _parse_llm_json(content: str) -> dict[str, Any]:
     """Parse JSON payload from LLM output with fallback recovery."""
     text = content.strip()
         left = text.find("{")
         right = text.rfind("}")
         if left != -1 and right != -1 and right > left:
+            return json.loads(text[left : right + 1])
         raise
 def extract_biomarkers(user_message: str) -> tuple[dict[str, float], dict[str, Any]]:
     """
     Extract biomarker values from natural language using LLM.
     Returns:
         Tuple of (biomarkers_dict, patient_context_dict)
     """
     except Exception as e:
         print(f"⚠️ Extraction failed: {e}")
         import traceback
         traceback.print_exc()
         return {}, {}
 # Component 2: Disease Prediction
 # ============================================================================
 def predict_disease_simple(biomarkers: dict[str, float]) -> dict[str, Any]:
     """
     Simple rule-based disease prediction based on key biomarkers.
     """
+    scores = {"Diabetes": 0.0, "Anemia": 0.0, "Heart Disease": 0.0, "Thrombocytopenia": 0.0, "Thalassemia": 0.0}
     # Helper: check both abbreviated and normalized biomarker names
     # Returns None when biomarker is not present (avoids false triggers)
     else:
         probabilities = {k: 1.0 / len(scores) for k in scores}
+    return {"disease": top_disease, "confidence": confidence, "probabilities": probabilities}
 def predict_disease_llm(biomarkers: dict[str, float], patient_context: dict) -> dict[str, Any]:
     except Exception as e:
         print(f"⚠️ LLM prediction failed ({e}), using rule-based fallback")
         import traceback
         traceback.print_exc()
         return predict_disease_simple(biomarkers)
 # Component 3: Conversational Formatter
 # ============================================================================
 def _coerce_to_dict(obj) -> dict:
     """Convert a Pydantic model or arbitrary object to a plain dict."""
     if isinstance(obj, dict):
 # Component 4: Helper Functions
 # ============================================================================
 def print_biomarker_help():
     """Print list of supported biomarkers"""
     print("\n📋 Supported Biomarkers (24 total):")
         "Platelets": 220000,
         "White Blood Cells": 7500,
         "Systolic Blood Pressure": 145,
+        "Diastolic Blood Pressure": 92,
     }
     prediction = {
             "Heart Disease": 0.08,
             "Anemia": 0.03,
             "Thrombocytopenia": 0.01,
+            "Thalassemia": 0.01,
+        },
     }
     patient_input = PatientInput(
         biomarkers=example_biomarkers,
         model_prediction=prediction,
+        patient_context={"age": 52, "gender": "male", "bmi": 31.2},
     )
     print("🔄 Running analysis...\n")
     result = guild.run(patient_input)
     response = format_conversational(result.get("final_response", result), "there")
+    print("\n" + "=" * 70)
     print("🤖 RAG-BOT:")
+    print("=" * 70)
     print(response)
+    print("=" * 70 + "\n")
 def save_report(result: dict, biomarkers: dict):
     # final_response is already a plain dict built by the synthesizer
     final = result.get("final_response") or {}
+    disease = final.get("prediction_explanation", {}).get("primary_disease") or result.get("model_prediction", {}).get(
+        "disease", "unknown"
     )
+    disease_safe = disease.replace(" ", "_").replace("/", "_")
     filename = f"report_{disease_safe}_{timestamp}.json"
     output_dir = Path("data/chat_reports")
             return {k: _to_dict(v) for k, v in obj.items()}
         if isinstance(obj, list):
             return [_to_dict(i) for i in obj]
+        if hasattr(obj, "model_dump"):  # Pydantic v2
             return _to_dict(obj.model_dump())
+        if hasattr(obj, "dict"):  # Pydantic v1
             return _to_dict(obj.dict())
         # Scalars and other primitives are returned as-is
         return obj
         "safety_alerts": _to_dict(result.get("safety_alerts", [])),
     }
+    with open(filepath, "w") as f:
         json.dump(report, f, indent=2)
     print(f"✅ Report saved to: {filepath}\n")
 # Main Chat Interface
 # ============================================================================
 def chat_interface():
     """
     Main interactive CLI chatbot for MediGuard AI RAG-Helper.
     """
     # Print welcome banner
+    print("\n" + "=" * 70)
     print("🤖 MediGuard AI RAG-Helper - Interactive Chat")
+    print("=" * 70)
     print("\nWelcome! I can help you understand your blood test results.\n")
     print("You can:")
     print("  1. Describe your biomarkers (e.g., 'My glucose is 140, HbA1c is 7.5')")
     print("  2. Type 'example' to see a sample diabetes case")
     print("  3. Type 'help' for biomarker list")
     print("  4. Type 'quit' to exit\n")
+    print("=" * 70 + "\n")
     # Initialize guild (one-time setup)
     print("🔧 Initializing medical knowledge system...")
                 continue
             # Handle special commands
+            if user_input.lower() in ["quit", "exit", "q"]:
                 print("\n👋 Thank you for using MediGuard AI. Stay healthy!")
                 break
+            if user_input.lower() == "help":
                 print_biomarker_help()
                 continue
+            if user_input.lower() == "example":
                 run_example_case(guild)
                 continue
             patient_input = PatientInput(
                 biomarkers=biomarkers,
                 model_prediction=prediction,
+                patient_context=patient_context if patient_context else {"source": "chat"},
             )
             # Run full RAG workflow
             response = format_conversational(result.get("final_response", result), user_name)
             # Display response
+            print("\n" + "=" * 70)
             print("🤖 RAG-BOT:")
+            print("=" * 70)
             print(response)
+            print("=" * 70 + "\n")
             # Save to history
+            conversation_history.append(
+                {"user_input": user_input, "biomarkers": biomarkers, "prediction": prediction, "result": result}
+            )
             # Ask if user wants to save report
             save_choice = input("💾 Save detailed report to file? (y/n): ").strip().lower()
+            if save_choice == "y":
                 save_report(result, biomarkers)
             print("\nYou can:")
             break
         except Exception as e:
             import traceback
             traceback.print_exc()
             print(f"\n❌ Analysis failed: {e}")
             print("\nThis might be due to:")

scripts/monitor_test.py CHANGED Viewed

@@ -1,4 +1,5 @@
 """Monitor evolution test progress"""
 import time
 print("Monitoring evolution test... (Press Ctrl+C to stop)")
@@ -6,7 +7,7 @@ print("=" * 70)
 for i in range(60):  # Check for 5 minutes
     time.sleep(5)
-    print(f"[{i*5}s] Test still running...")
 print("\nTest should be complete or nearly complete.")
 print("Check terminal output for results.")

 """Monitor evolution test progress"""
 import time
 print("Monitoring evolution test... (Press Ctrl+C to stop)")
 for i in range(60):  # Check for 5 minutes
     time.sleep(5)
+    print(f"[{i * 5}s] Test still running...")
 print("\nTest should be complete or nearly complete.")
 print("Check terminal output for results.")

scripts/setup_embeddings.py CHANGED Viewed

@@ -8,9 +8,9 @@ from pathlib import Path
 def setup_google_api_key():
     """Interactive setup for Google API key"""
-    print("="*70)
     print("Fast Embeddings Setup - Google Gemini API")
-    print("="*70)
     print("\nWhy Google Gemini?")
     print("   - 100x faster than local Ollama (2 mins vs 30+ mins)")
@@ -18,9 +18,9 @@ def setup_google_api_key():
     print("   - High quality embeddings")
     print("   - Automatic fallback to Ollama if unavailable")
-    print("\n" + "="*70)
     print("Step 1: Get Your Free API Key")
-    print("="*70)
     print("\n1. Open this URL in your browser:")
     print("   https://aistudio.google.com/app/apikey")
     print("\n2. Sign in with Google account")
@@ -38,7 +38,7 @@ def setup_google_api_key():
     if not api_key.startswith("AIza"):
         print("\nWarning: Key doesn't start with 'AIza'. Are you sure this is correct?")
         confirm = input("Continue anyway? (y/n): ").strip().lower()
-        if confirm != 'y':
             return False
     # Update .env file
@@ -52,28 +52,28 @@ def setup_google_api_key():
         updated = False
         for i, line in enumerate(lines):
             if line.startswith("GOOGLE_API_KEY="):
-                lines[i] = f'GOOGLE_API_KEY={api_key}\n'
                 updated = True
                 break
         if not updated:
-            lines.insert(0, f'GOOGLE_API_KEY={api_key}\n')
-        with open(env_path, 'w') as f:
             f.writelines(lines)
     else:
         # Create new .env file
-        with open(env_path, 'w') as f:
-            f.write(f'GOOGLE_API_KEY={api_key}\n')
     print("\nAPI key saved to .env file!")
-    print("\n" + "="*70)
     print("Step 2: Build Vector Store")
-    print("="*70)
     print("\nRun this command:")
     print("   python src/pdf_processor.py")
     print("\nChoose option 1 (Google Gemini) when prompted.")
-    print("\n" + "="*70)
     return True

 def setup_google_api_key():
     """Interactive setup for Google API key"""
+    print("=" * 70)
     print("Fast Embeddings Setup - Google Gemini API")
+    print("=" * 70)
     print("\nWhy Google Gemini?")
     print("   - 100x faster than local Ollama (2 mins vs 30+ mins)")
     print("   - High quality embeddings")
     print("   - Automatic fallback to Ollama if unavailable")
+    print("\n" + "=" * 70)
     print("Step 1: Get Your Free API Key")
+    print("=" * 70)
     print("\n1. Open this URL in your browser:")
     print("   https://aistudio.google.com/app/apikey")
     print("\n2. Sign in with Google account")
     if not api_key.startswith("AIza"):
         print("\nWarning: Key doesn't start with 'AIza'. Are you sure this is correct?")
         confirm = input("Continue anyway? (y/n): ").strip().lower()
+        if confirm != "y":
             return False
     # Update .env file
         updated = False
         for i, line in enumerate(lines):
             if line.startswith("GOOGLE_API_KEY="):
+                lines[i] = f"GOOGLE_API_KEY={api_key}\n"
                 updated = True
                 break
         if not updated:
+            lines.insert(0, f"GOOGLE_API_KEY={api_key}\n")
+        with open(env_path, "w") as f:
             f.writelines(lines)
     else:
         # Create new .env file
+        with open(env_path, "w") as f:
+            f.write(f"GOOGLE_API_KEY={api_key}\n")
     print("\nAPI key saved to .env file!")
+    print("\n" + "=" * 70)
     print("Step 2: Build Vector Store")
+    print("=" * 70)
     print("\nRun this command:")
     print("   python src/pdf_processor.py")
     print("\nChoose option 1 (Google Gemini) when prompted.")
+    print("\n" + "=" * 70)
     return True

scripts/test_chat_demo.py CHANGED Viewed

@@ -10,16 +10,16 @@ test_cases = [
     "help",  # Show biomarker help
     "glucose 185, HbA1c 8.2, cholesterol 235, triglycerides 210, HDL 38",  # Diabetes case
     "n",  # Don't save report
-    "quit"  # Exit
 ]
-print("="*70)
 print("CLI Chatbot Demo Test")
-print("="*70)
 print("\nThis will run the chatbot with pre-defined inputs:")
 for i, case in enumerate(test_cases, 1):
     print(f"  {i}. {case}")
-print("\n" + "="*70 + "\n")
 # Prepare input string
 input_str = "\n".join(test_cases) + "\n"
@@ -32,8 +32,8 @@ try:
         capture_output=True,
         text=True,
         timeout=120,
-        encoding='utf-8',
-        errors='replace'
     )
     print("STDOUT:")

     "help",  # Show biomarker help
     "glucose 185, HbA1c 8.2, cholesterol 235, triglycerides 210, HDL 38",  # Diabetes case
     "n",  # Don't save report
+    "quit",  # Exit
 ]
+print("=" * 70)
 print("CLI Chatbot Demo Test")
+print("=" * 70)
 print("\nThis will run the chatbot with pre-defined inputs:")
 for i, case in enumerate(test_cases, 1):
     print(f"  {i}. {case}")
+print("\n" + "=" * 70 + "\n")
 # Prepare input string
 input_str = "\n".join(test_cases) + "\n"
         capture_output=True,
         text=True,
         timeout=120,
+        encoding="utf-8",
+        errors="replace",
     )
     print("STDOUT:")

scripts/test_extraction.py CHANGED Viewed

@@ -16,13 +16,13 @@ test_inputs = [
     "glucose=185, HbA1c=8.2, cholesterol=235, triglycerides=210, HDL=38",
 ]
-print("="*70)
 print("BIOMARKER EXTRACTION TEST")
-print("="*70)
 for i, test_input in enumerate(test_inputs, 1):
     print(f"\n[Test {i}] Input: '{test_input}'")
-    print("-"*70)
     biomarkers, context = extract_biomarkers(test_input)
@@ -44,6 +44,6 @@ for i, test_input in enumerate(test_inputs, 1):
     print()
-print("="*70)
 print("TEST COMPLETE")
-print("="*70)

     "glucose=185, HbA1c=8.2, cholesterol=235, triglycerides=210, HDL=38",
 ]
+print("=" * 70)
 print("BIOMARKER EXTRACTION TEST")
+print("=" * 70)
 for i, test_input in enumerate(test_inputs, 1):
     print(f"\n[Test {i}] Input: '{test_input}'")
+    print("-" * 70)
     biomarkers, context = extract_biomarkers(test_input)
     print()
+print("=" * 70)
 print("TEST COMPLETE")
+print("=" * 70)

src/agents/biomarker_analyzer.py CHANGED Viewed

@@ -3,7 +3,6 @@ MediGuard AI RAG-Helper
 Biomarker Analyzer Agent - Validates biomarker values and flags anomalies
 """
 from src.biomarker_validator import BiomarkerValidator
 from src.llm_config import llm_config
 from src.state import AgentOutput, BiomarkerFlag, GuildState
@@ -19,28 +18,26 @@ class BiomarkerAnalyzerAgent:
     def analyze(self, state: GuildState) -> GuildState:
         """
         Main agent function to analyze biomarkers.
         Args:
             state: Current guild state with patient input
         Returns:
             Updated state with biomarker analysis
         """
-        print("\n" + "="*70)
         print("EXECUTING: Biomarker Analyzer Agent")
-        print("="*70)
-        biomarkers = state['patient_biomarkers']
-        patient_context = state.get('patient_context', {})
-        gender = patient_context.get('gender')  # None if not provided — uses non-gender-specific ranges
-        predicted_disease = state['model_prediction']['disease']
         # Validate all biomarkers
         print(f"\nValidating {len(biomarkers)} biomarkers...")
         flags, alerts = self.validator.validate_all(
-            biomarkers=biomarkers,
-            gender=gender,
-            threshold_pct=state['sop'].biomarker_analyzer_threshold
         )
         # Get disease-relevant biomarkers
@@ -54,14 +51,11 @@ class BiomarkerAnalyzerAgent:
             "safety_alerts": [alert.model_dump() for alert in alerts],
             "relevant_biomarkers": relevant_biomarkers,
             "summary": summary,
-            "validation_complete": True
         }
         # Create agent output
-        output = AgentOutput(
-            agent_name="Biomarker Analyzer",
-            findings=findings
-        )
         # Update state
         print("\nAnalysis complete:")
@@ -71,10 +65,10 @@ class BiomarkerAnalyzerAgent:
         print(f"  - {len(relevant_biomarkers)} disease-relevant biomarkers identified")
         return {
-            'agent_outputs': [output],
-            'biomarker_flags': flags,
-            'safety_alerts': alerts,
-            'biomarker_analysis': findings
         }
     def _generate_summary(
@@ -83,13 +77,13 @@ class BiomarkerAnalyzerAgent:
         flags: list[BiomarkerFlag],
         alerts: list,
         relevant_biomarkers: list[str],
-        disease: str
     ) -> str:
         """Generate a concise summary of biomarker findings"""
         # Count anomalies
-        critical = [f for f in flags if 'CRITICAL' in f.status]
-        high_low = [f for f in flags if f.status in ['HIGH', 'LOW']]
         prompt = f"""You are a medical data analyst. Provide a brief, clinical summary of these biomarker results.

 Biomarker Analyzer Agent - Validates biomarker values and flags anomalies
 """
 from src.biomarker_validator import BiomarkerValidator
 from src.llm_config import llm_config
 from src.state import AgentOutput, BiomarkerFlag, GuildState
     def analyze(self, state: GuildState) -> GuildState:
         """
         Main agent function to analyze biomarkers.
         Args:
             state: Current guild state with patient input
         Returns:
             Updated state with biomarker analysis
         """
+        print("\n" + "=" * 70)
         print("EXECUTING: Biomarker Analyzer Agent")
+        print("=" * 70)
+        biomarkers = state["patient_biomarkers"]
+        patient_context = state.get("patient_context", {})
+        gender = patient_context.get("gender")  # None if not provided — uses non-gender-specific ranges
+        predicted_disease = state["model_prediction"]["disease"]
         # Validate all biomarkers
         print(f"\nValidating {len(biomarkers)} biomarkers...")
         flags, alerts = self.validator.validate_all(
+            biomarkers=biomarkers, gender=gender, threshold_pct=state["sop"].biomarker_analyzer_threshold
         )
         # Get disease-relevant biomarkers
             "safety_alerts": [alert.model_dump() for alert in alerts],
             "relevant_biomarkers": relevant_biomarkers,
             "summary": summary,
+            "validation_complete": True,
         }
         # Create agent output
+        output = AgentOutput(agent_name="Biomarker Analyzer", findings=findings)
         # Update state
         print("\nAnalysis complete:")
         print(f"  - {len(relevant_biomarkers)} disease-relevant biomarkers identified")
         return {
+            "agent_outputs": [output],
+            "biomarker_flags": flags,
+            "safety_alerts": alerts,
+            "biomarker_analysis": findings,
         }
     def _generate_summary(
         flags: list[BiomarkerFlag],
         alerts: list,
         relevant_biomarkers: list[str],
+        disease: str,
     ) -> str:
         """Generate a concise summary of biomarker findings"""
         # Count anomalies
+        critical = [f for f in flags if "CRITICAL" in f.status]
+        high_low = [f for f in flags if f.status in ["HIGH", "LOW"]]
         prompt = f"""You are a medical data analyst. Provide a brief, clinical summary of these biomarker results.

src/agents/biomarker_linker.py CHANGED Viewed

@@ -3,8 +3,6 @@ MediGuard AI RAG-Helper
 Biomarker-Disease Linker Agent - Connects biomarker values to predicted disease
 """
 from src.llm_config import llm_config
 from src.state import AgentOutput, GuildState, KeyDriver
@@ -15,7 +13,7 @@ class BiomarkerDiseaseLinkerAgent:
     def __init__(self, retriever):
         """
         Initialize with a retriever for biomarker-disease connections.
         Args:
             retriever: Vector store retriever for biomarker evidence
         """
@@ -25,32 +23,27 @@ class BiomarkerDiseaseLinkerAgent:
     def link(self, state: GuildState) -> GuildState:
         """
         Link biomarkers to disease prediction.
         Args:
             state: Current guild state
         Returns:
             Updated state with biomarker-disease links
         """
-        print("\n" + "="*70)
         print("EXECUTING: Biomarker-Disease Linker Agent (RAG)")
-        print("="*70)
-        model_prediction = state['model_prediction']
-        disease = model_prediction['disease']
-        biomarkers = state['patient_biomarkers']
         # Get biomarker analysis from previous agent
-        biomarker_analysis = state.get('biomarker_analysis') or {}
         # Identify key drivers
         print(f"\nIdentifying key drivers for {disease}...")
-        key_drivers, citations_missing = self._identify_key_drivers(
-            disease,
-            biomarkers,
-            biomarker_analysis,
-            state
-        )
         print(f"Identified {len(key_drivers)} key biomarker drivers")
@@ -62,39 +55,29 @@ class BiomarkerDiseaseLinkerAgent:
                 "key_drivers": [kd.model_dump() for kd in key_drivers],
                 "total_drivers": len(key_drivers),
                 "feature_importance_calculated": True,
-                "citations_missing": citations_missing
-            }
         )
         # Update state
         print("\nBiomarker-disease linking complete")
-        return {'agent_outputs': [output]}
     def _identify_key_drivers(
-        self,
-        disease: str,
-        biomarkers: dict[str, float],
-        analysis: dict,
-        state: GuildState
     ) -> tuple[list[KeyDriver], bool]:
         """Identify which biomarkers are driving the disease prediction"""
         # Get out-of-range biomarkers from analysis
-        flags = analysis.get('biomarker_flags', [])
-        abnormal_biomarkers = [
-            f for f in flags
-            if f['status'] != 'NORMAL'
-        ]
         # Get disease-relevant biomarkers
-        relevant = analysis.get('relevant_biomarkers', [])
         # Focus on biomarkers that are both abnormal AND disease-relevant
-        key_biomarkers = [
-            f for f in abnormal_biomarkers
-            if f['name'] in relevant
-        ]
         # If no key biomarkers found, use top abnormal ones
         if not key_biomarkers:
@@ -106,28 +89,19 @@ class BiomarkerDiseaseLinkerAgent:
         key_drivers: list[KeyDriver] = []
         citations_missing = False
         for biomarker_flag in key_biomarkers[:5]:  # Top 5
-            driver, driver_missing = self._create_key_driver(
-                biomarker_flag,
-                disease,
-                state
-            )
             key_drivers.append(driver)
             citations_missing = citations_missing or driver_missing
         return key_drivers, citations_missing
-    def _create_key_driver(
-        self,
-        biomarker_flag: dict,
-        disease: str,
-        state: GuildState
-    ) -> tuple[KeyDriver, bool]:
         """Create a KeyDriver object with evidence from RAG"""
-        name = biomarker_flag['name']
-        value = biomarker_flag['value']
-        unit = biomarker_flag['unit']
-        status = biomarker_flag['status']
         # Retrieve evidence linking this biomarker to the disease
         query = f"How does {name} relate to {disease}? What does {status} {name} indicate?"
@@ -135,7 +109,7 @@ class BiomarkerDiseaseLinkerAgent:
         citations_missing = False
         try:
             docs = self.retriever.invoke(query)
-            if state['sop'].require_pdf_citations and not docs:
                 evidence_text = "Insufficient evidence available in the knowledge base."
                 contribution = "Unknown"
                 citations_missing = True
@@ -149,16 +123,14 @@ class BiomarkerDiseaseLinkerAgent:
             citations_missing = True
         # Generate explanation using LLM
-        explanation = self._generate_explanation(
-            name, value, unit, status, disease, evidence_text
-        )
         driver = KeyDriver(
             biomarker=name,
             value=value,
             contribution=contribution,
             explanation=explanation,
-            evidence=evidence_text[:500]  # Truncate long evidence
         )
         return driver, citations_missing
@@ -173,10 +145,9 @@ class BiomarkerDiseaseLinkerAgent:
         for doc in docs[:2]:  # Top 2 docs
             content = doc.page_content
             # Extract sentences mentioning the biomarker
-            sentences = content.split('.')
             relevant_sentences = [
-                s.strip() for s in sentences
-                if biomarker.lower() in s.lower() or disease.lower() in s.lower()
             ]
             evidence.extend(relevant_sentences[:2])
@@ -184,12 +155,12 @@ class BiomarkerDiseaseLinkerAgent:
     def _estimate_contribution(self, biomarker_flag: dict, doc_count: int) -> str:
         """Estimate the contribution percentage (simplified)"""
-        status = biomarker_flag['status']
         # Simple heuristic based on severity
-        if 'CRITICAL' in status:
             base = 40
-        elif status in ['HIGH', 'LOW']:
             base = 25
         else:
             base = 10
@@ -201,13 +172,7 @@ class BiomarkerDiseaseLinkerAgent:
         return f"{total}%"
     def _generate_explanation(
-        self,
-        biomarker: str,
-        value: float,
-        unit: str,
-        status: str,
-        disease: str,
-        evidence: str
     ) -> str:
         """Generate patient-friendly explanation"""

 Biomarker-Disease Linker Agent - Connects biomarker values to predicted disease
 """
 from src.llm_config import llm_config
 from src.state import AgentOutput, GuildState, KeyDriver
     def __init__(self, retriever):
         """
         Initialize with a retriever for biomarker-disease connections.
         Args:
             retriever: Vector store retriever for biomarker evidence
         """
     def link(self, state: GuildState) -> GuildState:
         """
         Link biomarkers to disease prediction.
         Args:
             state: Current guild state
         Returns:
             Updated state with biomarker-disease links
         """
+        print("\n" + "=" * 70)
         print("EXECUTING: Biomarker-Disease Linker Agent (RAG)")
+        print("=" * 70)
+        model_prediction = state["model_prediction"]
+        disease = model_prediction["disease"]
+        biomarkers = state["patient_biomarkers"]
         # Get biomarker analysis from previous agent
+        biomarker_analysis = state.get("biomarker_analysis") or {}
         # Identify key drivers
         print(f"\nIdentifying key drivers for {disease}...")
+        key_drivers, citations_missing = self._identify_key_drivers(disease, biomarkers, biomarker_analysis, state)
         print(f"Identified {len(key_drivers)} key biomarker drivers")
                 "key_drivers": [kd.model_dump() for kd in key_drivers],
                 "total_drivers": len(key_drivers),
                 "feature_importance_calculated": True,
+                "citations_missing": citations_missing,
+            },
         )
         # Update state
         print("\nBiomarker-disease linking complete")
+        return {"agent_outputs": [output]}
     def _identify_key_drivers(
+        self, disease: str, biomarkers: dict[str, float], analysis: dict, state: GuildState
     ) -> tuple[list[KeyDriver], bool]:
         """Identify which biomarkers are driving the disease prediction"""
         # Get out-of-range biomarkers from analysis
+        flags = analysis.get("biomarker_flags", [])
+        abnormal_biomarkers = [f for f in flags if f["status"] != "NORMAL"]
         # Get disease-relevant biomarkers
+        relevant = analysis.get("relevant_biomarkers", [])
         # Focus on biomarkers that are both abnormal AND disease-relevant
+        key_biomarkers = [f for f in abnormal_biomarkers if f["name"] in relevant]
         # If no key biomarkers found, use top abnormal ones
         if not key_biomarkers:
         key_drivers: list[KeyDriver] = []
         citations_missing = False
         for biomarker_flag in key_biomarkers[:5]:  # Top 5
+            driver, driver_missing = self._create_key_driver(biomarker_flag, disease, state)
             key_drivers.append(driver)
             citations_missing = citations_missing or driver_missing
         return key_drivers, citations_missing
+    def _create_key_driver(self, biomarker_flag: dict, disease: str, state: GuildState) -> tuple[KeyDriver, bool]:
         """Create a KeyDriver object with evidence from RAG"""
+        name = biomarker_flag["name"]
+        value = biomarker_flag["value"]
+        unit = biomarker_flag["unit"]
+        status = biomarker_flag["status"]
         # Retrieve evidence linking this biomarker to the disease
         query = f"How does {name} relate to {disease}? What does {status} {name} indicate?"
         citations_missing = False
         try:
             docs = self.retriever.invoke(query)
+            if state["sop"].require_pdf_citations and not docs:
                 evidence_text = "Insufficient evidence available in the knowledge base."
                 contribution = "Unknown"
                 citations_missing = True
             citations_missing = True
         # Generate explanation using LLM
+        explanation = self._generate_explanation(name, value, unit, status, disease, evidence_text)
         driver = KeyDriver(
             biomarker=name,
             value=value,
             contribution=contribution,
             explanation=explanation,
+            evidence=evidence_text[:500],  # Truncate long evidence
         )
         return driver, citations_missing
         for doc in docs[:2]:  # Top 2 docs
             content = doc.page_content
             # Extract sentences mentioning the biomarker
+            sentences = content.split(".")
             relevant_sentences = [
+                s.strip() for s in sentences if biomarker.lower() in s.lower() or disease.lower() in s.lower()
             ]
             evidence.extend(relevant_sentences[:2])
     def _estimate_contribution(self, biomarker_flag: dict, doc_count: int) -> str:
         """Estimate the contribution percentage (simplified)"""
+        status = biomarker_flag["status"]
         # Simple heuristic based on severity
+        if "CRITICAL" in status:
             base = 40
+        elif status in ["HIGH", "LOW"]:
             base = 25
         else:
             base = 10
         return f"{total}%"
     def _generate_explanation(
+        self, biomarker: str, value: float, unit: str, status: str, disease: str, evidence: str
     ) -> str:
         """Generate patient-friendly explanation"""

src/agents/clinical_guidelines.py CHANGED Viewed

@@ -17,7 +17,7 @@ class ClinicalGuidelinesAgent:
     def __init__(self, retriever):
         """
         Initialize with a retriever for clinical guidelines.
         Args:
             retriever: Vector store retriever for guidelines documents
         """
@@ -27,24 +27,24 @@ class ClinicalGuidelinesAgent:
     def recommend(self, state: GuildState) -> GuildState:
         """
         Retrieve clinical guidelines and generate recommendations.
         Args:
             state: Current guild state
         Returns:
             Updated state with clinical recommendations
         """
-        print("\n" + "="*70)
         print("EXECUTING: Clinical Guidelines Agent (RAG)")
-        print("="*70)
-        model_prediction = state['model_prediction']
-        disease = model_prediction['disease']
-        confidence = model_prediction['confidence']
         # Get biomarker analysis
-        biomarker_analysis = state.get('biomarker_analysis') or {}
-        safety_alerts = biomarker_analysis.get('safety_alerts', [])
         # Retrieve guidelines
         print(f"\nRetrieving clinical guidelines for {disease}...")
@@ -57,36 +57,30 @@ class ClinicalGuidelinesAgent:
         print(f"Retrieved {len(docs)} guideline documents")
         # Generate recommendations
-        if state['sop'].require_pdf_citations and not docs:
             recommendations = {
                 "immediate_actions": [
                     "Insufficient evidence available in the knowledge base. Please consult a healthcare provider."
                 ],
                 "lifestyle_changes": [],
                 "monitoring": [],
-                "citations": []
             }
         else:
-            recommendations = self._generate_recommendations(
-                disease,
-                docs,
-                safety_alerts,
-                confidence,
-                state
-            )
         # Create agent output
         output = AgentOutput(
             agent_name="Clinical Guidelines",
             findings={
                 "disease": disease,
-                "immediate_actions": recommendations['immediate_actions'],
-                "lifestyle_changes": recommendations['lifestyle_changes'],
-                "monitoring": recommendations['monitoring'],
-                "guideline_citations": recommendations['citations'],
                 "safety_priority": len(safety_alerts) > 0,
-                "citations_missing": state['sop'].require_pdf_citations and not docs
-            }
         )
         # Update state
@@ -95,23 +89,17 @@ class ClinicalGuidelinesAgent:
         print(f"  - Lifestyle changes: {len(recommendations['lifestyle_changes'])}")
         print(f"  - Monitoring recommendations: {len(recommendations['monitoring'])}")
-        return {'agent_outputs': [output]}
     def _generate_recommendations(
-        self,
-        disease: str,
-        docs: list,
-        safety_alerts: list,
-        confidence: float,
-        state: GuildState
     ) -> dict:
         """Generate structured recommendations using LLM and guidelines"""
         # Format retrieved guidelines
-        guidelines_context = "\n\n---\n\n".join([
-            f"Source: {doc.metadata.get('source', 'Unknown')}\n\n{doc.page_content}"
-            for doc in docs
-        ])
         # Build safety context
         safety_context = ""
@@ -120,8 +108,11 @@ class ClinicalGuidelinesAgent:
             for alert in safety_alerts[:3]:
                 safety_context += f"- {alert.get('biomarker', 'Unknown')}: {alert.get('message', '')}\n"
-        prompt = ChatPromptTemplate.from_messages([
-            ("system", """You are a clinical decision support system providing evidence-based recommendations.
             Based on clinical practice guidelines, provide actionable recommendations for patient self-assessment.
             Structure your response with these sections:
@@ -130,26 +121,33 @@ class ClinicalGuidelinesAgent:
             3. MONITORING: What to track and how often
             Make recommendations specific, actionable, and guideline-aligned.
-            Always emphasize consulting healthcare professionals for diagnosis and treatment."""),
-            ("human", """Disease: {disease}
             Prediction Confidence: {confidence:.1%}
             {safety_context}
             Clinical Guidelines Context:
             {guidelines}
-            Please provide structured recommendations for patient self-assessment.""")
-        ])
         chain = prompt | self.llm
         try:
-            response = chain.invoke({
-                "disease": disease,
-                "confidence": confidence,
-                "safety_context": safety_context,
-                "guidelines": guidelines_context
-            })
             recommendations = self._parse_recommendations(response.content)
@@ -158,82 +156,76 @@ class ClinicalGuidelinesAgent:
             recommendations = self._get_default_recommendations(disease, safety_alerts)
         # Add citations
-        recommendations['citations'] = self._extract_citations(docs)
         return recommendations
     def _parse_recommendations(self, content: str) -> dict:
         """Parse LLM response into structured recommendations"""
-        recommendations = {
-            "immediate_actions": [],
-            "lifestyle_changes": [],
-            "monitoring": []
-        }
         current_section = None
-        lines = content.split('\n')
         for line in lines:
             line_stripped = line.strip()
             line_upper = line_stripped.upper()
             # Detect section headers
-            if 'IMMEDIATE' in line_upper or 'URGENT' in line_upper:
-                current_section = 'immediate_actions'
-            elif 'LIFESTYLE' in line_upper or 'CHANGES' in line_upper or 'DIET' in line_upper:
-                current_section = 'lifestyle_changes'
-            elif 'MONITORING' in line_upper or 'TRACK' in line_upper:
-                current_section = 'monitoring'
             # Add bullet points or numbered items
             elif current_section and line_stripped:
                 # Remove bullet points and numbers
-                cleaned = line_stripped.lstrip('•-*0123456789. ')
                 if cleaned and len(cleaned) > 10:  # Minimum length filter
                     recommendations[current_section].append(cleaned)
         # If parsing failed, create default structure
         if not any(recommendations.values()):
-            sentences = content.split('.')
-            recommendations['immediate_actions'] = [s.strip() for s in sentences[:2] if s.strip()]
-            recommendations['lifestyle_changes'] = [s.strip() for s in sentences[2:4] if s.strip()]
-            recommendations['monitoring'] = [s.strip() for s in sentences[4:6] if s.strip()]
         return recommendations
     def _get_default_recommendations(self, disease: str, safety_alerts: list) -> dict:
         """Provide default recommendations if LLM fails"""
-        recommendations = {
-            "immediate_actions": [],
-            "lifestyle_changes": [],
-            "monitoring": []
-        }
         # Add safety-based immediate actions
         if safety_alerts:
-            recommendations['immediate_actions'].append(
                 "Consult healthcare provider immediately regarding critical biomarker values"
             )
-            recommendations['immediate_actions'].append(
-                "Bring this report and recent lab results to your appointment"
-            )
         else:
-            recommendations['immediate_actions'].append(
                 f"Schedule appointment with healthcare provider to discuss {disease} findings"
             )
         # Generic lifestyle changes
-        recommendations['lifestyle_changes'].extend([
-            "Follow a balanced, nutrient-rich diet as recommended by healthcare provider",
-            "Maintain regular physical activity appropriate for your health status",
-            "Track symptoms and biomarker trends over time"
-        ])
         # Generic monitoring
-        recommendations['monitoring'].extend([
-            f"Regular monitoring of {disease}-related biomarkers as advised by physician",
-            "Keep a health journal tracking symptoms, diet, and activities",
-            "Schedule follow-up appointments as recommended"
-        ])
         return recommendations
@@ -242,10 +234,10 @@ class ClinicalGuidelinesAgent:
         citations = []
         for doc in docs:
-            source = doc.metadata.get('source', 'Unknown')
             # Clean up source path
-            if '\\' in source or '/' in source:
                 source = Path(source).name
             citations.append(source)

     def __init__(self, retriever):
         """
         Initialize with a retriever for clinical guidelines.
         Args:
             retriever: Vector store retriever for guidelines documents
         """
     def recommend(self, state: GuildState) -> GuildState:
         """
         Retrieve clinical guidelines and generate recommendations.
         Args:
             state: Current guild state
         Returns:
             Updated state with clinical recommendations
         """
+        print("\n" + "=" * 70)
         print("EXECUTING: Clinical Guidelines Agent (RAG)")
+        print("=" * 70)
+        model_prediction = state["model_prediction"]
+        disease = model_prediction["disease"]
+        confidence = model_prediction["confidence"]
         # Get biomarker analysis
+        biomarker_analysis = state.get("biomarker_analysis") or {}
+        safety_alerts = biomarker_analysis.get("safety_alerts", [])
         # Retrieve guidelines
         print(f"\nRetrieving clinical guidelines for {disease}...")
         print(f"Retrieved {len(docs)} guideline documents")
         # Generate recommendations
+        if state["sop"].require_pdf_citations and not docs:
             recommendations = {
                 "immediate_actions": [
                     "Insufficient evidence available in the knowledge base. Please consult a healthcare provider."
                 ],
                 "lifestyle_changes": [],
                 "monitoring": [],
+                "citations": [],
             }
         else:
+            recommendations = self._generate_recommendations(disease, docs, safety_alerts, confidence, state)
         # Create agent output
         output = AgentOutput(
             agent_name="Clinical Guidelines",
             findings={
                 "disease": disease,
+                "immediate_actions": recommendations["immediate_actions"],
+                "lifestyle_changes": recommendations["lifestyle_changes"],
+                "monitoring": recommendations["monitoring"],
+                "guideline_citations": recommendations["citations"],
                 "safety_priority": len(safety_alerts) > 0,
+                "citations_missing": state["sop"].require_pdf_citations and not docs,
+            },
         )
         # Update state
         print(f"  - Lifestyle changes: {len(recommendations['lifestyle_changes'])}")
         print(f"  - Monitoring recommendations: {len(recommendations['monitoring'])}")
+        return {"agent_outputs": [output]}
     def _generate_recommendations(
+        self, disease: str, docs: list, safety_alerts: list, confidence: float, state: GuildState
     ) -> dict:
         """Generate structured recommendations using LLM and guidelines"""
         # Format retrieved guidelines
+        guidelines_context = "\n\n---\n\n".join(
+            [f"Source: {doc.metadata.get('source', 'Unknown')}\n\n{doc.page_content}" for doc in docs]
+        )
         # Build safety context
         safety_context = ""
             for alert in safety_alerts[:3]:
                 safety_context += f"- {alert.get('biomarker', 'Unknown')}: {alert.get('message', '')}\n"
+        prompt = ChatPromptTemplate.from_messages(
+            [
+                (
+                    "system",
+                    """You are a clinical decision support system providing evidence-based recommendations.
             Based on clinical practice guidelines, provide actionable recommendations for patient self-assessment.
             Structure your response with these sections:
             3. MONITORING: What to track and how often
             Make recommendations specific, actionable, and guideline-aligned.
+            Always emphasize consulting healthcare professionals for diagnosis and treatment.""",
+                ),
+                (
+                    "human",
+                    """Disease: {disease}
             Prediction Confidence: {confidence:.1%}
             {safety_context}
             Clinical Guidelines Context:
             {guidelines}
+            Please provide structured recommendations for patient self-assessment.""",
+                ),
+            ]
+        )
         chain = prompt | self.llm
         try:
+            response = chain.invoke(
+                {
+                    "disease": disease,
+                    "confidence": confidence,
+                    "safety_context": safety_context,
+                    "guidelines": guidelines_context,
+                }
+            )
             recommendations = self._parse_recommendations(response.content)
             recommendations = self._get_default_recommendations(disease, safety_alerts)
         # Add citations
+        recommendations["citations"] = self._extract_citations(docs)
         return recommendations
     def _parse_recommendations(self, content: str) -> dict:
         """Parse LLM response into structured recommendations"""
+        recommendations = {"immediate_actions": [], "lifestyle_changes": [], "monitoring": []}
         current_section = None
+        lines = content.split("\n")
         for line in lines:
             line_stripped = line.strip()
             line_upper = line_stripped.upper()
             # Detect section headers
+            if "IMMEDIATE" in line_upper or "URGENT" in line_upper:
+                current_section = "immediate_actions"
+            elif "LIFESTYLE" in line_upper or "CHANGES" in line_upper or "DIET" in line_upper:
+                current_section = "lifestyle_changes"
+            elif "MONITORING" in line_upper or "TRACK" in line_upper:
+                current_section = "monitoring"
             # Add bullet points or numbered items
             elif current_section and line_stripped:
                 # Remove bullet points and numbers
+                cleaned = line_stripped.lstrip("•-*0123456789. ")
                 if cleaned and len(cleaned) > 10:  # Minimum length filter
                     recommendations[current_section].append(cleaned)
         # If parsing failed, create default structure
         if not any(recommendations.values()):
+            sentences = content.split(".")
+            recommendations["immediate_actions"] = [s.strip() for s in sentences[:2] if s.strip()]
+            recommendations["lifestyle_changes"] = [s.strip() for s in sentences[2:4] if s.strip()]
+            recommendations["monitoring"] = [s.strip() for s in sentences[4:6] if s.strip()]
         return recommendations
     def _get_default_recommendations(self, disease: str, safety_alerts: list) -> dict:
         """Provide default recommendations if LLM fails"""
+        recommendations = {"immediate_actions": [], "lifestyle_changes": [], "monitoring": []}
         # Add safety-based immediate actions
         if safety_alerts:
+            recommendations["immediate_actions"].append(
                 "Consult healthcare provider immediately regarding critical biomarker values"
             )
+            recommendations["immediate_actions"].append("Bring this report and recent lab results to your appointment")
         else:
+            recommendations["immediate_actions"].append(
                 f"Schedule appointment with healthcare provider to discuss {disease} findings"
             )
         # Generic lifestyle changes
+        recommendations["lifestyle_changes"].extend(
+            [
+                "Follow a balanced, nutrient-rich diet as recommended by healthcare provider",
+                "Maintain regular physical activity appropriate for your health status",
+                "Track symptoms and biomarker trends over time",
+            ]
+        )
         # Generic monitoring
+        recommendations["monitoring"].extend(
+            [
+                f"Regular monitoring of {disease}-related biomarkers as advised by physician",
+                "Keep a health journal tracking symptoms, diet, and activities",
+                "Schedule follow-up appointments as recommended",
+            ]
+        )
         return recommendations
         citations = []
         for doc in docs:
+            source = doc.metadata.get("source", "Unknown")
             # Clean up source path
+            if "\\" in source or "/" in source:
                 source = Path(source).name
             citations.append(source)

src/agents/confidence_assessor.py CHANGED Viewed

@@ -19,58 +19,42 @@ class ConfidenceAssessorAgent:
     def assess(self, state: GuildState) -> GuildState:
         """
         Assess prediction confidence and identify limitations.
         Args:
             state: Current guild state
         Returns:
             Updated state with confidence assessment
         """
-        print("\n" + "="*70)
         print("EXECUTING: Confidence Assessor Agent")
-        print("="*70)
-        model_prediction = state['model_prediction']
-        disease = model_prediction['disease']
-        ml_confidence = model_prediction['confidence']
-        probabilities = model_prediction.get('probabilities', {})
-        biomarkers = state['patient_biomarkers']
         # Collect previous agent findings
-        biomarker_analysis = state.get('biomarker_analysis') or {}
         disease_explanation = self._get_agent_findings(state, "Disease Explainer")
         linker_findings = self._get_agent_findings(state, "Biomarker-Disease Linker")
         print(f"\nAssessing confidence for {disease} prediction...")
         # Evaluate evidence strength
-        evidence_strength = self._evaluate_evidence_strength(
-            biomarker_analysis,
-            disease_explanation,
-            linker_findings
-        )
         # Identify limitations
-        limitations = self._identify_limitations(
-            biomarkers,
-            biomarker_analysis,
-            probabilities
-        )
         # Calculate aggregate reliability
-        reliability = self._calculate_reliability(
-            ml_confidence,
-            evidence_strength,
-            len(limitations)
-        )
         # Generate assessment summary
         assessment_summary = self._generate_assessment(
-            disease,
-            ml_confidence,
-            reliability,
-            evidence_strength,
-            limitations
         )
         # Create agent output
@@ -83,8 +67,8 @@ class ConfidenceAssessorAgent:
                 "limitations": limitations,
                 "assessment_summary": assessment_summary,
                 "recommendation": self._get_recommendation(reliability),
-                "alternative_diagnoses": self._get_alternatives(probabilities)
-            }
         )
         # Update state
@@ -93,20 +77,17 @@ class ConfidenceAssessorAgent:
         print(f"  - Evidence strength: {evidence_strength}")
         print(f"  - Limitations identified: {len(limitations)}")
-        return {'agent_outputs': [output]}
     def _get_agent_findings(self, state: GuildState, agent_name: str) -> dict:
         """Extract findings from a specific agent"""
-        for output in state.get('agent_outputs', []):
             if output.agent_name == agent_name:
                 return output.findings
         return {}
     def _evaluate_evidence_strength(
-        self,
-        biomarker_analysis: dict,
-        disease_explanation: dict,
-        linker_findings: dict
     ) -> str:
         """Evaluate the strength of supporting evidence"""
@@ -114,19 +95,19 @@ class ConfidenceAssessorAgent:
         max_score = 5
         # Check biomarker validation quality
-        flags = biomarker_analysis.get('biomarker_flags', [])
-        abnormal_count = len([f for f in flags if f.get('status') != 'NORMAL'])
         if abnormal_count >= 3:
             score += 1
         if abnormal_count >= 5:
             score += 1
         # Check disease explanation quality
-        if disease_explanation.get('retrieval_quality', 0) >= 3:
             score += 1
         # Check biomarker-disease linking
-        key_drivers = linker_findings.get('key_drivers', [])
         if len(key_drivers) >= 2:
             score += 1
         if len(key_drivers) >= 4:
@@ -141,10 +122,7 @@ class ConfidenceAssessorAgent:
             return "WEAK"
     def _identify_limitations(
-        self,
-        biomarkers: dict[str, float],
-        biomarker_analysis: dict,
-        probabilities: dict[str, float]
     ) -> list[str]:
         """Identify limitations and uncertainties"""
         limitations = []
@@ -161,37 +139,23 @@ class ConfidenceAssessorAgent:
             top1, prob1 = sorted_probs[0]
             top2, prob2 = sorted_probs[1]
             if prob2 > 0.15:  # Alternative is significant
-                limitations.append(
-                    f"Differential diagnosis: {top2} also possible ({prob2:.1%} probability)"
-                )
         # Check for normal biomarkers despite prediction
-        flags = biomarker_analysis.get('biomarker_flags', [])
-        relevant = biomarker_analysis.get('relevant_biomarkers', [])
-        normal_relevant = [
-            f for f in flags
-            if f.get('name') in relevant and f.get('status') == 'NORMAL'
-        ]
         if len(normal_relevant) >= 2:
-            limitations.append(
-                "Some disease-relevant biomarkers are within normal range"
-            )
         # Check for safety alerts (indicates complexity)
-        alerts = biomarker_analysis.get('safety_alerts', [])
         if len(alerts) >= 2:
-            limitations.append(
-                "Multiple critical values detected; professional evaluation essential"
-            )
         return limitations
-    def _calculate_reliability(
-        self,
-        ml_confidence: float,
-        evidence_strength: str,
-        limitation_count: int
-    ) -> str:
         """Calculate overall prediction reliability"""
         score = 0
@@ -224,12 +188,7 @@ class ConfidenceAssessorAgent:
             return "LOW"
     def _generate_assessment(
-        self,
-        disease: str,
-        ml_confidence: float,
-        reliability: str,
-        evidence_strength: str,
-        limitations: list[str]
     ) -> str:
         """Generate human-readable assessment summary"""
@@ -271,11 +230,9 @@ Be honest about uncertainty. Patient safety is paramount."""
         alternatives = []
         for disease, prob in sorted_probs[1:4]:  # Top 3 alternatives
             if prob > 0.05:  # Only significant alternatives
-                alternatives.append({
-                    "disease": disease,
-                    "probability": prob,
-                    "note": "Consider discussing with healthcare provider"
-                })
         return alternatives

     def assess(self, state: GuildState) -> GuildState:
         """
         Assess prediction confidence and identify limitations.
         Args:
             state: Current guild state
         Returns:
             Updated state with confidence assessment
         """
+        print("\n" + "=" * 70)
         print("EXECUTING: Confidence Assessor Agent")
+        print("=" * 70)
+        model_prediction = state["model_prediction"]
+        disease = model_prediction["disease"]
+        ml_confidence = model_prediction["confidence"]
+        probabilities = model_prediction.get("probabilities", {})
+        biomarkers = state["patient_biomarkers"]
         # Collect previous agent findings
+        biomarker_analysis = state.get("biomarker_analysis") or {}
         disease_explanation = self._get_agent_findings(state, "Disease Explainer")
         linker_findings = self._get_agent_findings(state, "Biomarker-Disease Linker")
         print(f"\nAssessing confidence for {disease} prediction...")
         # Evaluate evidence strength
+        evidence_strength = self._evaluate_evidence_strength(biomarker_analysis, disease_explanation, linker_findings)
         # Identify limitations
+        limitations = self._identify_limitations(biomarkers, biomarker_analysis, probabilities)
         # Calculate aggregate reliability
+        reliability = self._calculate_reliability(ml_confidence, evidence_strength, len(limitations))
         # Generate assessment summary
         assessment_summary = self._generate_assessment(
+            disease, ml_confidence, reliability, evidence_strength, limitations
         )
         # Create agent output
                 "limitations": limitations,
                 "assessment_summary": assessment_summary,
                 "recommendation": self._get_recommendation(reliability),
+                "alternative_diagnoses": self._get_alternatives(probabilities),
+            },
         )
         # Update state
         print(f"  - Evidence strength: {evidence_strength}")
         print(f"  - Limitations identified: {len(limitations)}")
+        return {"agent_outputs": [output]}
     def _get_agent_findings(self, state: GuildState, agent_name: str) -> dict:
         """Extract findings from a specific agent"""
+        for output in state.get("agent_outputs", []):
             if output.agent_name == agent_name:
                 return output.findings
         return {}
     def _evaluate_evidence_strength(
+        self, biomarker_analysis: dict, disease_explanation: dict, linker_findings: dict
     ) -> str:
         """Evaluate the strength of supporting evidence"""
         max_score = 5
         # Check biomarker validation quality
+        flags = biomarker_analysis.get("biomarker_flags", [])
+        abnormal_count = len([f for f in flags if f.get("status") != "NORMAL"])
         if abnormal_count >= 3:
             score += 1
         if abnormal_count >= 5:
             score += 1
         # Check disease explanation quality
+        if disease_explanation.get("retrieval_quality", 0) >= 3:
             score += 1
         # Check biomarker-disease linking
+        key_drivers = linker_findings.get("key_drivers", [])
         if len(key_drivers) >= 2:
             score += 1
         if len(key_drivers) >= 4:
             return "WEAK"
     def _identify_limitations(
+        self, biomarkers: dict[str, float], biomarker_analysis: dict, probabilities: dict[str, float]
     ) -> list[str]:
         """Identify limitations and uncertainties"""
         limitations = []
             top1, prob1 = sorted_probs[0]
             top2, prob2 = sorted_probs[1]
             if prob2 > 0.15:  # Alternative is significant
+                limitations.append(f"Differential diagnosis: {top2} also possible ({prob2:.1%} probability)")
         # Check for normal biomarkers despite prediction
+        flags = biomarker_analysis.get("biomarker_flags", [])
+        relevant = biomarker_analysis.get("relevant_biomarkers", [])
+        normal_relevant = [f for f in flags if f.get("name") in relevant and f.get("status") == "NORMAL"]
         if len(normal_relevant) >= 2:
+            limitations.append("Some disease-relevant biomarkers are within normal range")
         # Check for safety alerts (indicates complexity)
+        alerts = biomarker_analysis.get("safety_alerts", [])
         if len(alerts) >= 2:
+            limitations.append("Multiple critical values detected; professional evaluation essential")
         return limitations
+    def _calculate_reliability(self, ml_confidence: float, evidence_strength: str, limitation_count: int) -> str:
         """Calculate overall prediction reliability"""
         score = 0
             return "LOW"
     def _generate_assessment(
+        self, disease: str, ml_confidence: float, reliability: str, evidence_strength: str, limitations: list[str]
     ) -> str:
         """Generate human-readable assessment summary"""
         alternatives = []
         for disease, prob in sorted_probs[1:4]:  # Top 3 alternatives
             if prob > 0.05:  # Only significant alternatives
+                alternatives.append(
+                    {"disease": disease, "probability": prob, "note": "Consider discussing with healthcare provider"}
+                )
         return alternatives

src/agents/disease_explainer.py CHANGED Viewed

@@ -17,7 +17,7 @@ class DiseaseExplainerAgent:
     def __init__(self, retriever):
         """
         Initialize with a retriever for medical PDFs.
         Args:
             retriever: Vector store retriever for disease documents
         """
@@ -27,25 +27,25 @@ class DiseaseExplainerAgent:
     def explain(self, state: GuildState) -> GuildState:
         """
         Retrieve and explain disease pathophysiology.
         Args:
             state: Current guild state
         Returns:
             Updated state with disease explanation
         """
-        print("\n" + "="*70)
         print("EXECUTING: Disease Explainer Agent (RAG)")
-        print("="*70)
-        model_prediction = state['model_prediction']
-        disease = model_prediction['disease']
-        confidence = model_prediction['confidence']
         # Configure retrieval based on SOP — create a copy to avoid mutating shared retriever
-        retrieval_k = state['sop'].disease_explainer_k
         original_search_kwargs = dict(self.retriever.search_kwargs)
-        self.retriever.search_kwargs = {**original_search_kwargs, 'k': retrieval_k}
         # Retrieve relevant documents
         print(f"\nRetrieving information about: {disease}")
@@ -62,33 +62,33 @@ class DiseaseExplainerAgent:
         print(f"Retrieved {len(docs)} relevant document chunks")
-        if state['sop'].require_pdf_citations and not docs:
             explanation = {
                 "pathophysiology": "Insufficient evidence available in the knowledge base to explain this condition.",
                 "diagnostic_criteria": "Insufficient evidence available to list diagnostic criteria.",
                 "clinical_presentation": "Insufficient evidence available to describe clinical presentation.",
-                "summary": "Insufficient evidence available for a detailed explanation."
             }
             citations = []
             output = AgentOutput(
                 agent_name="Disease Explainer",
                 findings={
                     "disease": disease,
-                    "pathophysiology": explanation['pathophysiology'],
-                    "diagnostic_criteria": explanation['diagnostic_criteria'],
-                    "clinical_presentation": explanation['clinical_presentation'],
-                    "mechanism_summary": explanation['summary'],
                     "citations": citations,
                     "confidence": confidence,
                     "retrieval_quality": 0,
-                    "citations_missing": True
-                }
             )
             print("\nDisease explanation generated")
             print("  - Pathophysiology: insufficient evidence")
             print("  - Citations: 0 sources")
-            return {'agent_outputs': [output]}
         # Generate explanation
         explanation = self._generate_explanation(disease, docs, confidence)
@@ -101,15 +101,15 @@ class DiseaseExplainerAgent:
             agent_name="Disease Explainer",
             findings={
                 "disease": disease,
-                "pathophysiology": explanation['pathophysiology'],
-                "diagnostic_criteria": explanation['diagnostic_criteria'],
-                "clinical_presentation": explanation['clinical_presentation'],
-                "mechanism_summary": explanation['summary'],
                 "citations": citations,
                 "confidence": confidence,
                 "retrieval_quality": len(docs),
-                "citations_missing": False
-            }
         )
         # Update state
@@ -117,19 +117,21 @@ class DiseaseExplainerAgent:
         print(f"  - Pathophysiology: {len(explanation['pathophysiology'])} chars")
         print(f"  - Citations: {len(citations)} sources")
-        return {'agent_outputs': [output]}
     def _generate_explanation(self, disease: str, docs: list, confidence: float) -> dict:
         """Generate structured disease explanation using LLM and retrieved docs"""
         # Format retrieved context
-        context = "\n\n---\n\n".join([
-            f"Source: {doc.metadata.get('source', 'Unknown')}\n\n{doc.page_content}"
-            for doc in docs
-        ])
-        prompt = ChatPromptTemplate.from_messages([
-            ("system", """You are a medical expert explaining diseases for patient self-assessment.
             Based on the provided medical literature, explain the disease in clear, accessible language.
             Structure your response with these sections:
             1. PATHOPHYSIOLOGY: The underlying biological mechanisms
@@ -137,24 +139,25 @@ class DiseaseExplainerAgent:
             3. CLINICAL_PRESENTATION: Common symptoms and signs
             4. SUMMARY: A 2-3 sentence overview
-            Be accurate, cite-able, and patient-friendly. Focus on how the disease affects blood biomarkers."""),
-            ("human", """Disease: {disease}
             Prediction Confidence: {confidence:.1%}
             Medical Literature Context:
             {context}
-            Please provide a structured explanation.""")
-        ])
         chain = prompt | self.llm
         try:
-            response = chain.invoke({
-                "disease": disease,
-                "confidence": confidence,
-                "context": context
-            })
             # Parse structured response
             content = response.content
@@ -166,41 +169,36 @@ class DiseaseExplainerAgent:
                 "pathophysiology": f"{disease} is a medical condition requiring professional diagnosis.",
                 "diagnostic_criteria": "Consult medical guidelines for diagnostic criteria.",
                 "clinical_presentation": "Clinical presentation varies by individual.",
-                "summary": f"{disease} detected with {confidence:.1%} confidence. Consult healthcare provider."
             }
         return explanation
     def _parse_explanation(self, content: str) -> dict:
         """Parse LLM response into structured sections"""
-        sections = {
-            "pathophysiology": "",
-            "diagnostic_criteria": "",
-            "clinical_presentation": "",
-            "summary": ""
-        }
         # Simple parsing logic
         current_section = None
-        lines = content.split('\n')
         for line in lines:
             line_upper = line.upper().strip()
-            if 'PATHOPHYSIOLOGY' in line_upper:
-                current_section = 'pathophysiology'
-            elif 'DIAGNOSTIC' in line_upper:
-                current_section = 'diagnostic_criteria'
-            elif 'CLINICAL' in line_upper or 'PRESENTATION' in line_upper:
-                current_section = 'clinical_presentation'
-            elif 'SUMMARY' in line_upper:
-                current_section = 'summary'
             elif current_section and line.strip():
                 sections[current_section] += line + "\n"
         # If parsing failed, use full content as summary
         if not any(sections.values()):
-            sections['summary'] = content[:500]
         return sections
@@ -209,15 +207,15 @@ class DiseaseExplainerAgent:
         citations = []
         for doc in docs:
-            source = doc.metadata.get('source', 'Unknown')
-            page = doc.metadata.get('page', 'N/A')
             # Clean up source path
-            if '\\' in source or '/' in source:
                 source = Path(source).name
             citation = f"{source}"
-            if page != 'N/A':
                 citation += f" (Page {page})"
             citations.append(citation)

     def __init__(self, retriever):
         """
         Initialize with a retriever for medical PDFs.
         Args:
             retriever: Vector store retriever for disease documents
         """
     def explain(self, state: GuildState) -> GuildState:
         """
         Retrieve and explain disease pathophysiology.
         Args:
             state: Current guild state
         Returns:
             Updated state with disease explanation
         """
+        print("\n" + "=" * 70)
         print("EXECUTING: Disease Explainer Agent (RAG)")
+        print("=" * 70)
+        model_prediction = state["model_prediction"]
+        disease = model_prediction["disease"]
+        confidence = model_prediction["confidence"]
         # Configure retrieval based on SOP — create a copy to avoid mutating shared retriever
+        retrieval_k = state["sop"].disease_explainer_k
         original_search_kwargs = dict(self.retriever.search_kwargs)
+        self.retriever.search_kwargs = {**original_search_kwargs, "k": retrieval_k}
         # Retrieve relevant documents
         print(f"\nRetrieving information about: {disease}")
         print(f"Retrieved {len(docs)} relevant document chunks")
+        if state["sop"].require_pdf_citations and not docs:
             explanation = {
                 "pathophysiology": "Insufficient evidence available in the knowledge base to explain this condition.",
                 "diagnostic_criteria": "Insufficient evidence available to list diagnostic criteria.",
                 "clinical_presentation": "Insufficient evidence available to describe clinical presentation.",
+                "summary": "Insufficient evidence available for a detailed explanation.",
             }
             citations = []
             output = AgentOutput(
                 agent_name="Disease Explainer",
                 findings={
                     "disease": disease,
+                    "pathophysiology": explanation["pathophysiology"],
+                    "diagnostic_criteria": explanation["diagnostic_criteria"],
+                    "clinical_presentation": explanation["clinical_presentation"],
+                    "mechanism_summary": explanation["summary"],
                     "citations": citations,
                     "confidence": confidence,
                     "retrieval_quality": 0,
+                    "citations_missing": True,
+                },
             )
             print("\nDisease explanation generated")
             print("  - Pathophysiology: insufficient evidence")
             print("  - Citations: 0 sources")
+            return {"agent_outputs": [output]}
         # Generate explanation
         explanation = self._generate_explanation(disease, docs, confidence)
             agent_name="Disease Explainer",
             findings={
                 "disease": disease,
+                "pathophysiology": explanation["pathophysiology"],
+                "diagnostic_criteria": explanation["diagnostic_criteria"],
+                "clinical_presentation": explanation["clinical_presentation"],
+                "mechanism_summary": explanation["summary"],
                 "citations": citations,
                 "confidence": confidence,
                 "retrieval_quality": len(docs),
+                "citations_missing": False,
+            },
         )
         # Update state
         print(f"  - Pathophysiology: {len(explanation['pathophysiology'])} chars")
         print(f"  - Citations: {len(citations)} sources")
+        return {"agent_outputs": [output]}
     def _generate_explanation(self, disease: str, docs: list, confidence: float) -> dict:
         """Generate structured disease explanation using LLM and retrieved docs"""
         # Format retrieved context
+        context = "\n\n---\n\n".join(
+            [f"Source: {doc.metadata.get('source', 'Unknown')}\n\n{doc.page_content}" for doc in docs]
+        )
+        prompt = ChatPromptTemplate.from_messages(
+            [
+                (
+                    "system",
+                    """You are a medical expert explaining diseases for patient self-assessment.
             Based on the provided medical literature, explain the disease in clear, accessible language.
             Structure your response with these sections:
             1. PATHOPHYSIOLOGY: The underlying biological mechanisms
             3. CLINICAL_PRESENTATION: Common symptoms and signs
             4. SUMMARY: A 2-3 sentence overview
+            Be accurate, cite-able, and patient-friendly. Focus on how the disease affects blood biomarkers.""",
+                ),
+                (
+                    "human",
+                    """Disease: {disease}
             Prediction Confidence: {confidence:.1%}
             Medical Literature Context:
             {context}
+            Please provide a structured explanation.""",
+                ),
+            ]
+        )
         chain = prompt | self.llm
         try:
+            response = chain.invoke({"disease": disease, "confidence": confidence, "context": context})
             # Parse structured response
             content = response.content
                 "pathophysiology": f"{disease} is a medical condition requiring professional diagnosis.",
                 "diagnostic_criteria": "Consult medical guidelines for diagnostic criteria.",
                 "clinical_presentation": "Clinical presentation varies by individual.",
+                "summary": f"{disease} detected with {confidence:.1%} confidence. Consult healthcare provider.",
             }
         return explanation
     def _parse_explanation(self, content: str) -> dict:
         """Parse LLM response into structured sections"""
+        sections = {"pathophysiology": "", "diagnostic_criteria": "", "clinical_presentation": "", "summary": ""}
         # Simple parsing logic
         current_section = None
+        lines = content.split("\n")
         for line in lines:
             line_upper = line.upper().strip()
+            if "PATHOPHYSIOLOGY" in line_upper:
+                current_section = "pathophysiology"
+            elif "DIAGNOSTIC" in line_upper:
+                current_section = "diagnostic_criteria"
+            elif "CLINICAL" in line_upper or "PRESENTATION" in line_upper:
+                current_section = "clinical_presentation"
+            elif "SUMMARY" in line_upper:
+                current_section = "summary"
             elif current_section and line.strip():
                 sections[current_section] += line + "\n"
         # If parsing failed, use full content as summary
         if not any(sections.values()):
+            sections["summary"] = content[:500]
         return sections
         citations = []
         for doc in docs:
+            source = doc.metadata.get("source", "Unknown")
+            page = doc.metadata.get("page", "N/A")
             # Clean up source path
+            if "\\" in source or "/" in source:
                 source = Path(source).name
             citation = f"{source}"
+            if page != "N/A":
                 citation += f" (Page {page})"
             citations.append(citation)

src/agents/response_synthesizer.py CHANGED Viewed

@@ -20,21 +20,21 @@ class ResponseSynthesizerAgent:
     def synthesize(self, state: GuildState) -> GuildState:
         """
         Synthesize all agent outputs into final response.
         Args:
             state: Complete guild state with all agent outputs
         Returns:
             Updated state with final_response
         """
-        print("\n" + "="*70)
         print("EXECUTING: Response Synthesizer Agent")
-        print("="*70)
-        model_prediction = state['model_prediction']
-        patient_biomarkers = state['patient_biomarkers']
-        patient_context = state.get('patient_context', {})
-        agent_outputs = state.get('agent_outputs', [])
         # Collect findings from all agents
         findings = self._collect_findings(agent_outputs)
@@ -62,24 +62,24 @@ class ResponseSynthesizerAgent:
                 "disease_explanation": self._build_disease_explanation(findings),
                 "recommendations": recs,
                 "confidence_assessment": self._build_confidence_assessment(findings),
-                "alternative_diagnoses": self._build_alternative_diagnoses(findings)
-            }
         }
         # Generate patient-friendly summary
         response["patient_summary"]["narrative"] = self._generate_narrative_summary(
-            model_prediction,
-            findings,
-            response
         )
         print("\nResponse synthesis complete")
         print("  - Patient summary: Generated")
         print(f"  - Prediction explanation: {len(response['prediction_explanation']['key_drivers'])} key drivers")
-        print(f"  - Recommendations: {len(response['clinical_recommendations']['immediate_actions'])} immediate actions")
         print(f"  - Safety alerts: {len(response['safety_alerts'])} alerts")
-        return {'final_response': response}
     def _collect_findings(self, agent_outputs: list) -> dict[str, Any]:
         """Organize all agent findings by agent name"""
@@ -91,19 +91,19 @@ class ResponseSynthesizerAgent:
     def _build_patient_summary(self, biomarkers: dict, findings: dict) -> dict:
         """Build patient summary section"""
         biomarker_analysis = findings.get("Biomarker Analyzer", {})
-        flags = biomarker_analysis.get('biomarker_flags', [])
         # Count biomarker statuses
-        critical = len([f for f in flags if 'CRITICAL' in f.get('status', '')])
-        abnormal = len([f for f in flags if f.get('status') != 'NORMAL'])
         return {
             "total_biomarkers_tested": len(biomarkers),
             "biomarkers_in_normal_range": len(flags) - abnormal,
             "biomarkers_out_of_range": abnormal,
             "critical_values": critical,
-            "overall_risk_profile": biomarker_analysis.get('summary', 'Assessment complete'),
-            "narrative": ""  # Will be filled later
         }
     def _build_prediction_explanation(self, model_prediction: dict, findings: dict) -> dict:
@@ -111,18 +111,18 @@ class ResponseSynthesizerAgent:
         disease_explanation = findings.get("Disease Explainer", {})
         linker_findings = findings.get("Biomarker-Disease Linker", {})
-        disease = model_prediction['disease']
-        confidence = model_prediction['confidence']
         # Get key drivers
-        key_drivers_raw = linker_findings.get('key_drivers', [])
         key_drivers = [
             {
-                "biomarker": kd.get('biomarker'),
-                "value": kd.get('value'),
-                "contribution": kd.get('contribution'),
-                "explanation": kd.get('explanation'),
-                "evidence": kd.get('evidence', '')[:200]  # Truncate
             }
             for kd in key_drivers_raw
         ]
@@ -131,25 +131,25 @@ class ResponseSynthesizerAgent:
             "primary_disease": disease,
             "confidence": confidence,
             "key_drivers": key_drivers,
-            "mechanism_summary": disease_explanation.get('mechanism_summary', disease_explanation.get('summary', '')),
-            "pathophysiology": disease_explanation.get('pathophysiology', ''),
-            "pdf_references": disease_explanation.get('citations', [])
         }
     def _build_biomarker_flags(self, findings: dict) -> list[dict]:
         biomarker_analysis = findings.get("Biomarker Analyzer", {})
-        return biomarker_analysis.get('biomarker_flags', [])
     def _build_key_drivers(self, findings: dict) -> list[dict]:
         linker_findings = findings.get("Biomarker-Disease Linker", {})
-        return linker_findings.get('key_drivers', [])
     def _build_disease_explanation(self, findings: dict) -> dict:
         disease_explanation = findings.get("Disease Explainer", {})
         return {
-            "pathophysiology": disease_explanation.get('pathophysiology', ''),
-            "citations": disease_explanation.get('citations', []),
-            "retrieved_chunks": disease_explanation.get('retrieved_chunks')
         }
     def _build_recommendations(self, findings: dict) -> dict:
@@ -157,10 +157,10 @@ class ResponseSynthesizerAgent:
         guidelines = findings.get("Clinical Guidelines", {})
         return {
-            "immediate_actions": guidelines.get('immediate_actions', []),
-            "lifestyle_changes": guidelines.get('lifestyle_changes', []),
-            "monitoring": guidelines.get('monitoring', []),
-            "guideline_citations": guidelines.get('guideline_citations', [])
         }
     def _build_confidence_assessment(self, findings: dict) -> dict:
@@ -168,22 +168,22 @@ class ResponseSynthesizerAgent:
         assessment = findings.get("Confidence Assessor", {})
         return {
-            "prediction_reliability": assessment.get('prediction_reliability', 'UNKNOWN'),
-            "evidence_strength": assessment.get('evidence_strength', 'UNKNOWN'),
-            "limitations": assessment.get('limitations', []),
-            "recommendation": assessment.get('recommendation', 'Consult healthcare provider'),
-            "assessment_summary": assessment.get('assessment_summary', ''),
-            "alternative_diagnoses": assessment.get('alternative_diagnoses', [])
         }
     def _build_alternative_diagnoses(self, findings: dict) -> list[dict]:
         assessment = findings.get("Confidence Assessor", {})
-        return assessment.get('alternative_diagnoses', [])
     def _build_safety_alerts(self, findings: dict) -> list[dict]:
         """Build safety alerts section"""
         biomarker_analysis = findings.get("Biomarker Analyzer", {})
-        return biomarker_analysis.get('safety_alerts', [])
     def _build_metadata(self, state: GuildState) -> dict:
         """Build metadata section"""
@@ -193,59 +193,64 @@ class ResponseSynthesizerAgent:
             "timestamp": datetime.now().isoformat(),
             "system_version": "MediGuard AI RAG-Helper v1.0",
             "sop_version": "Baseline",
-            "agents_executed": [output.agent_name for output in state.get('agent_outputs', [])],
-            "disclaimer": "This is an AI-assisted analysis tool for patient self-assessment. It is NOT a substitute for professional medical advice, diagnosis, or treatment. Always consult qualified healthcare providers for medical decisions."
         }
-    def _generate_narrative_summary(
-        self,
-        model_prediction,
-        findings: dict,
-        response: dict
-    ) -> str:
         """Generate a patient-friendly narrative summary using LLM"""
-        disease = model_prediction['disease']
-        confidence = model_prediction['confidence']
-        reliability = response['confidence_assessment']['prediction_reliability']
         # Get key points
-        critical_count = response['patient_summary']['critical_values']
-        abnormal_count = response['patient_summary']['biomarkers_out_of_range']
-        key_drivers = response['prediction_explanation']['key_drivers']
-        prompt = ChatPromptTemplate.from_messages([
-            ("system", """You are a medical AI assistant explaining test results to a patient.
             Write a clear, compassionate 3-4 sentence summary that:
             1. States the predicted condition and confidence level
             2. Highlights the most important biomarker findings
             3. Emphasizes the need for medical consultation
             4. Offers reassurance while being honest about findings
-            Use patient-friendly language. Avoid medical jargon. Be supportive and clear."""),
-            ("human", """Disease Predicted: {disease}
             Model Confidence: {confidence:.1%}
             Overall Reliability: {reliability}
             Critical Values: {critical}
             Out-of-Range Values: {abnormal}
             Top Biomarker Drivers: {drivers}
-            Write a compassionate patient summary.""")
-        ])
         chain = prompt | self.llm
         try:
-            driver_names = [kd['biomarker'] for kd in key_drivers[:3]]
-            response_obj = chain.invoke({
-                "disease": disease,
-                "confidence": confidence,
-                "reliability": reliability,
-                "critical": critical_count,
-                "abnormal": abnormal_count,
-                "drivers": ", ".join(driver_names) if driver_names else "Multiple biomarkers"
-            })
             return response_obj.content.strip()

     def synthesize(self, state: GuildState) -> GuildState:
         """
         Synthesize all agent outputs into final response.
         Args:
             state: Complete guild state with all agent outputs
         Returns:
             Updated state with final_response
         """
+        print("\n" + "=" * 70)
         print("EXECUTING: Response Synthesizer Agent")
+        print("=" * 70)
+        model_prediction = state["model_prediction"]
+        patient_biomarkers = state["patient_biomarkers"]
+        patient_context = state.get("patient_context", {})
+        agent_outputs = state.get("agent_outputs", [])
         # Collect findings from all agents
         findings = self._collect_findings(agent_outputs)
                 "disease_explanation": self._build_disease_explanation(findings),
                 "recommendations": recs,
                 "confidence_assessment": self._build_confidence_assessment(findings),
+                "alternative_diagnoses": self._build_alternative_diagnoses(findings),
+            },
         }
         # Generate patient-friendly summary
         response["patient_summary"]["narrative"] = self._generate_narrative_summary(
+            model_prediction, findings, response
         )
         print("\nResponse synthesis complete")
         print("  - Patient summary: Generated")
         print(f"  - Prediction explanation: {len(response['prediction_explanation']['key_drivers'])} key drivers")
+        print(
+            f"  - Recommendations: {len(response['clinical_recommendations']['immediate_actions'])} immediate actions"
+        )
         print(f"  - Safety alerts: {len(response['safety_alerts'])} alerts")
+        return {"final_response": response}
     def _collect_findings(self, agent_outputs: list) -> dict[str, Any]:
         """Organize all agent findings by agent name"""
     def _build_patient_summary(self, biomarkers: dict, findings: dict) -> dict:
         """Build patient summary section"""
         biomarker_analysis = findings.get("Biomarker Analyzer", {})
+        flags = biomarker_analysis.get("biomarker_flags", [])
         # Count biomarker statuses
+        critical = len([f for f in flags if "CRITICAL" in f.get("status", "")])
+        abnormal = len([f for f in flags if f.get("status") != "NORMAL"])
         return {
             "total_biomarkers_tested": len(biomarkers),
             "biomarkers_in_normal_range": len(flags) - abnormal,
             "biomarkers_out_of_range": abnormal,
             "critical_values": critical,
+            "overall_risk_profile": biomarker_analysis.get("summary", "Assessment complete"),
+            "narrative": "",  # Will be filled later
         }
     def _build_prediction_explanation(self, model_prediction: dict, findings: dict) -> dict:
         disease_explanation = findings.get("Disease Explainer", {})
         linker_findings = findings.get("Biomarker-Disease Linker", {})
+        disease = model_prediction["disease"]
+        confidence = model_prediction["confidence"]
         # Get key drivers
+        key_drivers_raw = linker_findings.get("key_drivers", [])
         key_drivers = [
             {
+                "biomarker": kd.get("biomarker"),
+                "value": kd.get("value"),
+                "contribution": kd.get("contribution"),
+                "explanation": kd.get("explanation"),
+                "evidence": kd.get("evidence", "")[:200],  # Truncate
             }
             for kd in key_drivers_raw
         ]
             "primary_disease": disease,
             "confidence": confidence,
             "key_drivers": key_drivers,
+            "mechanism_summary": disease_explanation.get("mechanism_summary", disease_explanation.get("summary", "")),
+            "pathophysiology": disease_explanation.get("pathophysiology", ""),
+            "pdf_references": disease_explanation.get("citations", []),
         }
     def _build_biomarker_flags(self, findings: dict) -> list[dict]:
         biomarker_analysis = findings.get("Biomarker Analyzer", {})
+        return biomarker_analysis.get("biomarker_flags", [])
     def _build_key_drivers(self, findings: dict) -> list[dict]:
         linker_findings = findings.get("Biomarker-Disease Linker", {})
+        return linker_findings.get("key_drivers", [])
     def _build_disease_explanation(self, findings: dict) -> dict:
         disease_explanation = findings.get("Disease Explainer", {})
         return {
+            "pathophysiology": disease_explanation.get("pathophysiology", ""),
+            "citations": disease_explanation.get("citations", []),
+            "retrieved_chunks": disease_explanation.get("retrieved_chunks"),
         }
     def _build_recommendations(self, findings: dict) -> dict:
         guidelines = findings.get("Clinical Guidelines", {})
         return {
+            "immediate_actions": guidelines.get("immediate_actions", []),
+            "lifestyle_changes": guidelines.get("lifestyle_changes", []),
+            "monitoring": guidelines.get("monitoring", []),
+            "guideline_citations": guidelines.get("guideline_citations", []),
         }
     def _build_confidence_assessment(self, findings: dict) -> dict:
         assessment = findings.get("Confidence Assessor", {})
         return {
+            "prediction_reliability": assessment.get("prediction_reliability", "UNKNOWN"),
+            "evidence_strength": assessment.get("evidence_strength", "UNKNOWN"),
+            "limitations": assessment.get("limitations", []),
+            "recommendation": assessment.get("recommendation", "Consult healthcare provider"),
+            "assessment_summary": assessment.get("assessment_summary", ""),
+            "alternative_diagnoses": assessment.get("alternative_diagnoses", []),
         }
     def _build_alternative_diagnoses(self, findings: dict) -> list[dict]:
         assessment = findings.get("Confidence Assessor", {})
+        return assessment.get("alternative_diagnoses", [])
     def _build_safety_alerts(self, findings: dict) -> list[dict]:
         """Build safety alerts section"""
         biomarker_analysis = findings.get("Biomarker Analyzer", {})
+        return biomarker_analysis.get("safety_alerts", [])
     def _build_metadata(self, state: GuildState) -> dict:
         """Build metadata section"""
             "timestamp": datetime.now().isoformat(),
             "system_version": "MediGuard AI RAG-Helper v1.0",
             "sop_version": "Baseline",
+            "agents_executed": [output.agent_name for output in state.get("agent_outputs", [])],
+            "disclaimer": "This is an AI-assisted analysis tool for patient self-assessment. It is NOT a substitute for professional medical advice, diagnosis, or treatment. Always consult qualified healthcare providers for medical decisions.",
         }
+    def _generate_narrative_summary(self, model_prediction, findings: dict, response: dict) -> str:
         """Generate a patient-friendly narrative summary using LLM"""
+        disease = model_prediction["disease"]
+        confidence = model_prediction["confidence"]
+        reliability = response["confidence_assessment"]["prediction_reliability"]
         # Get key points
+        critical_count = response["patient_summary"]["critical_values"]
+        abnormal_count = response["patient_summary"]["biomarkers_out_of_range"]
+        key_drivers = response["prediction_explanation"]["key_drivers"]
+        prompt = ChatPromptTemplate.from_messages(
+            [
+                (
+                    "system",
+                    """You are a medical AI assistant explaining test results to a patient.
             Write a clear, compassionate 3-4 sentence summary that:
             1. States the predicted condition and confidence level
             2. Highlights the most important biomarker findings
             3. Emphasizes the need for medical consultation
             4. Offers reassurance while being honest about findings
+            Use patient-friendly language. Avoid medical jargon. Be supportive and clear.""",
+                ),
+                (
+                    "human",
+                    """Disease Predicted: {disease}
             Model Confidence: {confidence:.1%}
             Overall Reliability: {reliability}
             Critical Values: {critical}
             Out-of-Range Values: {abnormal}
             Top Biomarker Drivers: {drivers}
+            Write a compassionate patient summary.""",
+                ),
+            ]
+        )
         chain = prompt | self.llm
         try:
+            driver_names = [kd["biomarker"] for kd in key_drivers[:3]]
+            response_obj = chain.invoke(
+                {
+                    "disease": disease,
+                    "confidence": confidence,
+                    "reliability": reliability,
+                    "critical": critical_count,
+                    "abnormal": abnormal_count,
+                    "drivers": ", ".join(driver_names) if driver_names else "Multiple biomarkers",
+                }
+            )
             return response_obj.content.strip()

src/biomarker_normalization.py CHANGED Viewed

@@ -3,14 +3,12 @@ MediGuard AI RAG-Helper
 Shared biomarker normalization utilities
 """
 # Normalization map for biomarker aliases to canonical names.
 NORMALIZATION_MAP: dict[str, str] = {
     # Glucose variations
     "glucose": "Glucose",
     "bloodsugar": "Glucose",
     "bloodglucose": "Glucose",
     # Lipid panel
     "cholesterol": "Cholesterol",
     "totalcholesterol": "Cholesterol",
@@ -20,17 +18,14 @@ NORMALIZATION_MAP: dict[str, str] = {
     "ldlcholesterol": "LDL Cholesterol",
     "hdl": "HDL Cholesterol",
     "hdlcholesterol": "HDL Cholesterol",
     # Diabetes markers
     "hba1c": "HbA1c",
     "a1c": "HbA1c",
     "hemoglobina1c": "HbA1c",
     "insulin": "Insulin",
     # Body metrics
     "bmi": "BMI",
     "bodymassindex": "BMI",
     # Complete Blood Count (CBC)
     "hemoglobin": "Hemoglobin",
     "hgb": "Hemoglobin",
@@ -45,14 +40,12 @@ NORMALIZATION_MAP: dict[str, str] = {
     "redcells": "Red Blood Cells",
     "hematocrit": "Hematocrit",
     "hct": "Hematocrit",
     # Red blood cell indices
     "mcv": "Mean Corpuscular Volume",
     "meancorpuscularvolume": "Mean Corpuscular Volume",
     "mch": "Mean Corpuscular Hemoglobin",
     "meancorpuscularhemoglobin": "Mean Corpuscular Hemoglobin",
     "mchc": "Mean Corpuscular Hemoglobin Concentration",
     # Cardiovascular
     "heartrate": "Heart Rate",
     "hr": "Heart Rate",
@@ -64,7 +57,6 @@ NORMALIZATION_MAP: dict[str, str] = {
     "diastolic": "Diastolic Blood Pressure",
     "dbp": "Diastolic Blood Pressure",
     "troponin": "Troponin",
     # Inflammation and liver
     "creactiveprotein": "C-reactive Protein",
     "crp": "C-reactive Protein",
@@ -72,10 +64,8 @@ NORMALIZATION_MAP: dict[str, str] = {
     "alanineaminotransferase": "ALT",
     "ast": "AST",
     "aspartateaminotransferase": "AST",
     # Kidney
     "creatinine": "Creatinine",
     # Thyroid
     "tsh": "TSH",
     "thyroidstimulatinghormone": "TSH",
@@ -83,7 +73,6 @@ NORMALIZATION_MAP: dict[str, str] = {
     "triiodothyronine": "T3",
     "t4": "T4",
     "thyroxine": "T4",
     # Electrolytes
     "sodium": "Sodium",
     "na": "Sodium",
@@ -95,14 +84,12 @@ NORMALIZATION_MAP: dict[str, str] = {
     "cl": "Chloride",
     "bicarbonate": "Bicarbonate",
     "hco3": "Bicarbonate",
     # Kidney / Metabolic
     "urea": "Urea",
     "bun": "BUN",
     "bloodureanitrogen": "BUN",
     "buncreatinineratio": "BUN_Creatinine_Ratio",
     "uricacid": "Uric_Acid",
     # Liver / Protein
     "totalprotein": "Total_Protein",
     "albumin": "Albumin",
@@ -113,7 +100,6 @@ NORMALIZATION_MAP: dict[str, str] = {
     "bilirubin": "Bilirubin_Total",
     "alp": "ALP",
     "alkalinephosphatase": "ALP",
     # Lipids
     "vldl": "VLDL",
 }

 Shared biomarker normalization utilities
 """
 # Normalization map for biomarker aliases to canonical names.
 NORMALIZATION_MAP: dict[str, str] = {
     # Glucose variations
     "glucose": "Glucose",
     "bloodsugar": "Glucose",
     "bloodglucose": "Glucose",
     # Lipid panel
     "cholesterol": "Cholesterol",
     "totalcholesterol": "Cholesterol",
     "ldlcholesterol": "LDL Cholesterol",
     "hdl": "HDL Cholesterol",
     "hdlcholesterol": "HDL Cholesterol",
     # Diabetes markers
     "hba1c": "HbA1c",
     "a1c": "HbA1c",
     "hemoglobina1c": "HbA1c",
     "insulin": "Insulin",
     # Body metrics
     "bmi": "BMI",
     "bodymassindex": "BMI",
     # Complete Blood Count (CBC)
     "hemoglobin": "Hemoglobin",
     "hgb": "Hemoglobin",
     "redcells": "Red Blood Cells",
     "hematocrit": "Hematocrit",
     "hct": "Hematocrit",
     # Red blood cell indices
     "mcv": "Mean Corpuscular Volume",
     "meancorpuscularvolume": "Mean Corpuscular Volume",
     "mch": "Mean Corpuscular Hemoglobin",
     "meancorpuscularhemoglobin": "Mean Corpuscular Hemoglobin",
     "mchc": "Mean Corpuscular Hemoglobin Concentration",
     # Cardiovascular
     "heartrate": "Heart Rate",
     "hr": "Heart Rate",
     "diastolic": "Diastolic Blood Pressure",
     "dbp": "Diastolic Blood Pressure",
     "troponin": "Troponin",
     # Inflammation and liver
     "creactiveprotein": "C-reactive Protein",
     "crp": "C-reactive Protein",
     "alanineaminotransferase": "ALT",
     "ast": "AST",
     "aspartateaminotransferase": "AST",
     # Kidney
     "creatinine": "Creatinine",
     # Thyroid
     "tsh": "TSH",
     "thyroidstimulatinghormone": "TSH",
     "triiodothyronine": "T3",
     "t4": "T4",
     "thyroxine": "T4",
     # Electrolytes
     "sodium": "Sodium",
     "na": "Sodium",
     "cl": "Chloride",
     "bicarbonate": "Bicarbonate",
     "hco3": "Bicarbonate",
     # Kidney / Metabolic
     "urea": "Urea",
     "bun": "BUN",
     "bloodureanitrogen": "BUN",
     "buncreatinineratio": "BUN_Creatinine_Ratio",
     "uricacid": "Uric_Acid",
     # Liver / Protein
     "totalprotein": "Total_Protein",
     "albumin": "Albumin",
     "bilirubin": "Bilirubin_Total",
     "alp": "ALP",
     "alkalinephosphatase": "ALP",
     # Lipids
     "vldl": "VLDL",
 }

src/biomarker_validator.py CHANGED Viewed

@@ -16,24 +16,20 @@ class BiomarkerValidator:
         """Load biomarker reference ranges from JSON file"""
         ref_path = Path(__file__).parent.parent / reference_file
         with open(ref_path) as f:
-            self.references = json.load(f)['biomarkers']
     def validate_biomarker(
-        self,
-        name: str,
-        value: float,
-        gender: str | None = None,
-        threshold_pct: float = 0.0
     ) -> BiomarkerFlag:
         """
         Validate a single biomarker value against reference ranges.
         Args:
             name: Biomarker name
             value: Measured value
             gender: "male" or "female" (for gender-specific ranges)
             threshold_pct: Only flag LOW/HIGH if deviation from boundary exceeds this fraction (e.g. 0.15 = 15%)
         Returns:
             BiomarkerFlag object with status and warnings
         """
@@ -44,27 +40,27 @@ class BiomarkerValidator:
                 unit="unknown",
                 status="UNKNOWN",
                 reference_range="No reference data available",
-                warning=f"No reference range found for {name}"
             )
         ref = self.references[name]
-        unit = ref['unit']
         # Handle gender-specific ranges
-        if ref.get('gender_specific', False) and gender:
-            if gender.lower() in ['male', 'm']:
-                normal = ref['normal_range']['male']
-            elif gender.lower() in ['female', 'f']:
-                normal = ref['normal_range']['female']
             else:
-                normal = ref['normal_range']
         else:
-            normal = ref['normal_range']
-        min_val = normal.get('min', 0)
-        max_val = normal.get('max', float('inf'))
-        critical_low = ref.get('critical_low')
-        critical_high = ref.get('critical_high')
         # Determine status
         status = "NORMAL"
@@ -92,28 +88,20 @@ class BiomarkerValidator:
         reference_range = f"{min_val}-{max_val} {unit}"
         return BiomarkerFlag(
-            name=name,
-            value=value,
-            unit=unit,
-            status=status,
-            reference_range=reference_range,
-            warning=warning
         )
     def validate_all(
-        self,
-        biomarkers: dict[str, float],
-        gender: str | None = None,
-        threshold_pct: float = 0.0
     ) -> tuple[list[BiomarkerFlag], list[SafetyAlert]]:
         """
         Validate all biomarker values.
         Args:
             biomarkers: Dict of biomarker name -> value
             gender: "male" or "female" (for gender-specific ranges)
             threshold_pct: Only flag LOW/HIGH if deviation exceeds this fraction (e.g. 0.15 = 15%)
         Returns:
             Tuple of (biomarker_flags, safety_alerts)
         """
@@ -126,20 +114,24 @@ class BiomarkerValidator:
             # Generate safety alerts for critical values
             if flag.status in ["CRITICAL_LOW", "CRITICAL_HIGH"]:
-                alerts.append(SafetyAlert(
-                    severity="CRITICAL",
-                    biomarker=name,
-                    message=flag.warning or f"{name} at critical level",
-                    action="SEEK IMMEDIATE MEDICAL ATTENTION"
-                ))
             elif flag.status in ["LOW", "HIGH"]:
                 severity = "HIGH" if "severe" in (flag.warning or "").lower() else "MEDIUM"
-                alerts.append(SafetyAlert(
-                    severity=severity,
-                    biomarker=name,
-                    message=flag.warning or f"{name} out of normal range",
-                    action="Consult with healthcare provider"
-                ))
         return flags, alerts
@@ -154,40 +146,57 @@ class BiomarkerValidator:
     def get_disease_relevant_biomarkers(self, disease: str) -> list[str]:
         """
         Get list of biomarkers most relevant to a specific disease.
         This is a simplified mapping - in production, this would be more sophisticated.
         """
         disease_map = {
-            "Diabetes": [
-                "Glucose", "HbA1c", "Insulin", "BMI",
-                "Triglycerides", "HDL Cholesterol", "LDL Cholesterol"
-            ],
             "Type 2 Diabetes": [
-                "Glucose", "HbA1c", "Insulin", "BMI",
-                "Triglycerides", "HDL Cholesterol", "LDL Cholesterol"
             ],
             "Type 1 Diabetes": [
-                "Glucose", "HbA1c", "Insulin", "BMI",
-                "Triglycerides", "HDL Cholesterol", "LDL Cholesterol"
             ],
             "Anemia": [
-                "Hemoglobin", "Red Blood Cells", "Hematocrit",
-                "Mean Corpuscular Volume", "Mean Corpuscular Hemoglobin",
-                "Mean Corpuscular Hemoglobin Concentration"
-            ],
-            "Thrombocytopenia": [
-                "Platelets", "White Blood Cells", "Hemoglobin"
             ],
             "Thalassemia": [
-                "Hemoglobin", "Red Blood Cells", "Mean Corpuscular Volume",
-                "Mean Corpuscular Hemoglobin", "Hematocrit"
             ],
             "Heart Disease": [
-                "Cholesterol", "LDL Cholesterol", "HDL Cholesterol",
-                "Triglycerides", "Troponin", "C-reactive Protein",
-                "Systolic Blood Pressure", "Diastolic Blood Pressure",
-                "Heart Rate", "BMI"
-            ]
         }
         return disease_map.get(disease, [])

         """Load biomarker reference ranges from JSON file"""
         ref_path = Path(__file__).parent.parent / reference_file
         with open(ref_path) as f:
+            self.references = json.load(f)["biomarkers"]
     def validate_biomarker(
+        self, name: str, value: float, gender: str | None = None, threshold_pct: float = 0.0
     ) -> BiomarkerFlag:
         """
         Validate a single biomarker value against reference ranges.
         Args:
             name: Biomarker name
             value: Measured value
             gender: "male" or "female" (for gender-specific ranges)
             threshold_pct: Only flag LOW/HIGH if deviation from boundary exceeds this fraction (e.g. 0.15 = 15%)
         Returns:
             BiomarkerFlag object with status and warnings
         """
                 unit="unknown",
                 status="UNKNOWN",
                 reference_range="No reference data available",
+                warning=f"No reference range found for {name}",
             )
         ref = self.references[name]
+        unit = ref["unit"]
         # Handle gender-specific ranges
+        if ref.get("gender_specific", False) and gender:
+            if gender.lower() in ["male", "m"]:
+                normal = ref["normal_range"]["male"]
+            elif gender.lower() in ["female", "f"]:
+                normal = ref["normal_range"]["female"]
             else:
+                normal = ref["normal_range"]
         else:
+            normal = ref["normal_range"]
+        min_val = normal.get("min", 0)
+        max_val = normal.get("max", float("inf"))
+        critical_low = ref.get("critical_low")
+        critical_high = ref.get("critical_high")
         # Determine status
         status = "NORMAL"
         reference_range = f"{min_val}-{max_val} {unit}"
         return BiomarkerFlag(
+            name=name, value=value, unit=unit, status=status, reference_range=reference_range, warning=warning
         )
     def validate_all(
+        self, biomarkers: dict[str, float], gender: str | None = None, threshold_pct: float = 0.0
     ) -> tuple[list[BiomarkerFlag], list[SafetyAlert]]:
         """
         Validate all biomarker values.
         Args:
             biomarkers: Dict of biomarker name -> value
             gender: "male" or "female" (for gender-specific ranges)
             threshold_pct: Only flag LOW/HIGH if deviation exceeds this fraction (e.g. 0.15 = 15%)
         Returns:
             Tuple of (biomarker_flags, safety_alerts)
         """
             # Generate safety alerts for critical values
             if flag.status in ["CRITICAL_LOW", "CRITICAL_HIGH"]:
+                alerts.append(
+                    SafetyAlert(
+                        severity="CRITICAL",
+                        biomarker=name,
+                        message=flag.warning or f"{name} at critical level",
+                        action="SEEK IMMEDIATE MEDICAL ATTENTION",
+                    )
+                )
             elif flag.status in ["LOW", "HIGH"]:
                 severity = "HIGH" if "severe" in (flag.warning or "").lower() else "MEDIUM"
+                alerts.append(
+                    SafetyAlert(
+                        severity=severity,
+                        biomarker=name,
+                        message=flag.warning or f"{name} out of normal range",
+                        action="Consult with healthcare provider",
+                    )
+                )
         return flags, alerts
     def get_disease_relevant_biomarkers(self, disease: str) -> list[str]:
         """
         Get list of biomarkers most relevant to a specific disease.
         This is a simplified mapping - in production, this would be more sophisticated.
         """
         disease_map = {
+            "Diabetes": ["Glucose", "HbA1c", "Insulin", "BMI", "Triglycerides", "HDL Cholesterol", "LDL Cholesterol"],
             "Type 2 Diabetes": [
+                "Glucose",
+                "HbA1c",
+                "Insulin",
+                "BMI",
+                "Triglycerides",
+                "HDL Cholesterol",
+                "LDL Cholesterol",
             ],
             "Type 1 Diabetes": [
+                "Glucose",
+                "HbA1c",
+                "Insulin",
+                "BMI",
+                "Triglycerides",
+                "HDL Cholesterol",
+                "LDL Cholesterol",
             ],
             "Anemia": [
+                "Hemoglobin",
+                "Red Blood Cells",
+                "Hematocrit",
+                "Mean Corpuscular Volume",
+                "Mean Corpuscular Hemoglobin",
+                "Mean Corpuscular Hemoglobin Concentration",
             ],
+            "Thrombocytopenia": ["Platelets", "White Blood Cells", "Hemoglobin"],
             "Thalassemia": [
+                "Hemoglobin",
+                "Red Blood Cells",
+                "Mean Corpuscular Volume",
+                "Mean Corpuscular Hemoglobin",
+                "Hematocrit",
             ],
             "Heart Disease": [
+                "Cholesterol",
+                "LDL Cholesterol",
+                "HDL Cholesterol",
+                "Triglycerides",
+                "Troponin",
+                "C-reactive Protein",
+                "Systolic Blood Pressure",
+                "Diastolic Blood Pressure",
+                "Heart Rate",
+                "BMI",
+            ],
         }
         return disease_map.get(disease, [])

src/config.py CHANGED Viewed

@@ -17,24 +17,16 @@ class ExplanationSOP(BaseModel):
     # === Agent Behavior Parameters ===
     biomarker_analyzer_threshold: float = Field(
-        default=0.15,
-        description="Percentage deviation from normal range to trigger a warning flag (0.15 = 15%)"
     )
     disease_explainer_k: int = Field(
-        default=5,
-        description="Number of top PDF chunks to retrieve for disease explanation"
     )
-    linker_retrieval_k: int = Field(
-        default=3,
-        description="Number of chunks for biomarker-disease linking"
-    )
-    guideline_retrieval_k: int = Field(
-        default=3,
-        description="Number of chunks for clinical guidelines"
-    )
     # === Prompts (Evolvable) ===
     planner_prompt: str = Field(
@@ -48,7 +40,7 @@ Available specialist agents:
 - Confidence Assessor: Evaluates prediction reliability
 Output a JSON with key 'plan' containing a list of tasks. Each task must have 'agent', 'task_description', and 'dependencies' keys.""",
-        description="System prompt for the Planner Agent"
     )
     synthesizer_prompt: str = Field(
@@ -63,45 +55,36 @@ Output a JSON with key 'plan' containing a list of tasks. Each task must have 'a
 - Be transparent about limitations and uncertainties
 Structure your output as specified in the output schema.""",
-        description="System prompt for the Response Synthesizer"
     )
     explainer_detail_level: Literal["concise", "detailed", "comprehensive"] = Field(
-        default="detailed",
-        description="Level of detail in disease mechanism explanations"
     )
     # === Feature Flags ===
     use_guideline_agent: bool = Field(
-        default=True,
-        description="Whether to retrieve clinical guidelines and recommendations"
     )
     include_alternative_diagnoses: bool = Field(
-        default=True,
-        description="Whether to discuss alternative diagnoses from prediction probabilities"
     )
-    require_pdf_citations: bool = Field(
-        default=True,
-        description="Whether to require PDF citations for all claims"
-    )
     use_confidence_assessor: bool = Field(
-        default=True,
-        description="Whether to evaluate and report prediction confidence"
     )
     # === Safety Settings ===
     critical_value_alert_mode: Literal["strict", "moderate", "permissive"] = Field(
-        default="strict",
-        description="Threshold for critical value alerts"
     )
     # === Model Selection ===
     synthesizer_model: str = Field(
-        default="default",
-        description="LLM to use for final response synthesis (uses provider default)"
     )
@@ -117,5 +100,5 @@ BASELINE_SOP = ExplanationSOP(
     require_pdf_citations=True,
     use_confidence_assessor=True,
     critical_value_alert_mode="strict",
-    synthesizer_model="default"
 )

     # === Agent Behavior Parameters ===
     biomarker_analyzer_threshold: float = Field(
+        default=0.15, description="Percentage deviation from normal range to trigger a warning flag (0.15 = 15%)"
     )
     disease_explainer_k: int = Field(
+        default=5, description="Number of top PDF chunks to retrieve for disease explanation"
     )
+    linker_retrieval_k: int = Field(default=3, description="Number of chunks for biomarker-disease linking")
+    guideline_retrieval_k: int = Field(default=3, description="Number of chunks for clinical guidelines")
     # === Prompts (Evolvable) ===
     planner_prompt: str = Field(
 - Confidence Assessor: Evaluates prediction reliability
 Output a JSON with key 'plan' containing a list of tasks. Each task must have 'agent', 'task_description', and 'dependencies' keys.""",
+        description="System prompt for the Planner Agent",
     )
     synthesizer_prompt: str = Field(
 - Be transparent about limitations and uncertainties
 Structure your output as specified in the output schema.""",
+        description="System prompt for the Response Synthesizer",
     )
     explainer_detail_level: Literal["concise", "detailed", "comprehensive"] = Field(
+        default="detailed", description="Level of detail in disease mechanism explanations"
     )
     # === Feature Flags ===
     use_guideline_agent: bool = Field(
+        default=True, description="Whether to retrieve clinical guidelines and recommendations"
     )
     include_alternative_diagnoses: bool = Field(
+        default=True, description="Whether to discuss alternative diagnoses from prediction probabilities"
     )
+    require_pdf_citations: bool = Field(default=True, description="Whether to require PDF citations for all claims")
     use_confidence_assessor: bool = Field(
+        default=True, description="Whether to evaluate and report prediction confidence"
     )
     # === Safety Settings ===
     critical_value_alert_mode: Literal["strict", "moderate", "permissive"] = Field(
+        default="strict", description="Threshold for critical value alerts"
     )
     # === Model Selection ===
     synthesizer_model: str = Field(
+        default="default", description="LLM to use for final response synthesis (uses provider default)"
     )
     require_pdf_citations=True,
     use_confidence_assessor=True,
     critical_value_alert_mode="strict",
+    synthesizer_model="default",
 )

src/database.py CHANGED Viewed

@@ -17,6 +17,7 @@ from src.settings import get_settings
 class Base(DeclarativeBase):
     """Shared declarative base for all ORM models."""
     pass

 class Base(DeclarativeBase):
     """Shared declarative base for all ORM models."""
     pass

src/evaluation/__init__.py CHANGED Viewed

@@ -15,12 +15,12 @@ from .evaluators import (
 )
 __all__ = [
-    'EvaluationResult',
-    'GradedScore',
-    'evaluate_actionability',
-    'evaluate_clarity',
-    'evaluate_clinical_accuracy',
-    'evaluate_evidence_grounding',
-    'evaluate_safety_completeness',
-    'run_full_evaluation'
 ]

 )
 __all__ = [
+    "EvaluationResult",
+    "GradedScore",
+    "evaluate_actionability",
+    "evaluate_clarity",
+    "evaluate_clinical_accuracy",
+    "evaluate_evidence_grounding",
+    "evaluate_safety_completeness",
+    "run_full_evaluation",
 ]

src/evaluation/evaluators.py CHANGED Viewed

@@ -17,7 +17,7 @@ IMPORTANT LIMITATIONS:
 Usage:
     from src.evaluation.evaluators import run_5d_evaluation
     result = run_5d_evaluation(final_response, pubmed_context)
     print(f"Average score: {result.average_score():.2f}")
 """
@@ -37,12 +37,14 @@ DETERMINISTIC_MODE = os.environ.get("EVALUATION_DETERMINISTIC", "false").lower()
 class GradedScore(BaseModel):
     """Structured score with justification"""
     score: float = Field(description="Score from 0.0 to 1.0", ge=0.0, le=1.0)
     reasoning: str = Field(description="Justification for the score")
 class EvaluationResult(BaseModel):
     """Complete 5D evaluation result"""
     clinical_accuracy: GradedScore
     evidence_grounding: GradedScore
     actionability: GradedScore
@@ -56,7 +58,7 @@ class EvaluationResult(BaseModel):
             self.evidence_grounding.score,
             self.actionability.score,
             self.clarity.score,
-            self.safety_completeness.score
         ]
     def average_score(self) -> float:
@@ -66,14 +68,11 @@ class EvaluationResult(BaseModel):
 # Evaluator 1: Clinical Accuracy (LLM-as-Judge)
-def evaluate_clinical_accuracy(
-    final_response: dict[str, Any],
-    pubmed_context: str
-) -> GradedScore:
     """
     Evaluates if medical interpretations are accurate.
     Uses cloud LLM (Groq/Gemini) as expert judge.
     In DETERMINISTIC_MODE, uses heuristics instead.
     """
     # Deterministic mode for testing
@@ -81,13 +80,13 @@ def evaluate_clinical_accuracy(
         return _deterministic_clinical_accuracy(final_response, pubmed_context)
     # Use cloud LLM for evaluation (FREE via Groq/Gemini)
-    evaluator_llm = get_chat_model(
-        temperature=0.0,
-        json_mode=True
-    )
-    prompt = ChatPromptTemplate.from_messages([
-        ("system", """You are a medical expert evaluating clinical accuracy.
 Evaluate the following clinical assessment:
 - Are biomarker interpretations medically correct?
@@ -99,8 +98,11 @@ Score 0.0 = Contains dangerous misinformation
 Respond ONLY with valid JSON in this format:
 {{"score": 0.85, "reasoning": "Your detailed justification here"}}
-"""),
-        ("human", """Evaluate this clinical output:
 **Patient Summary:**
 {patient_summary}
@@ -113,42 +115,44 @@ Respond ONLY with valid JSON in this format:
 **Scientific Context (Ground Truth):**
 {context}
-""")
-    ])
     chain = prompt | evaluator_llm
-    result = chain.invoke({
-        "patient_summary": final_response['patient_summary'],
-        "prediction_explanation": final_response['prediction_explanation'],
-        "recommendations": final_response['clinical_recommendations'],
-        "context": pubmed_context
-    })
     # Parse JSON response
     try:
         content = result.content if isinstance(result.content, str) else str(result.content)
         parsed = json.loads(content)
-        return GradedScore(score=parsed['score'], reasoning=parsed['reasoning'])
     except (json.JSONDecodeError, KeyError, TypeError):
         # Fallback if JSON parsing fails — use a conservative score to avoid inflating metrics
         return GradedScore(score=0.5, reasoning="Unable to parse LLM evaluation response; defaulting to neutral score.")
 # Evaluator 2: Evidence Grounding (Programmatic + LLM)
-def evaluate_evidence_grounding(
-    final_response: dict[str, Any]
-) -> GradedScore:
     """
     Checks if all claims are backed by citations.
     Programmatic + LLM verification.
     """
     # Count citations
-    pdf_refs = final_response['prediction_explanation'].get('pdf_references', [])
     citation_count = len(pdf_refs)
     # Check key drivers have evidence
-    key_drivers = final_response['prediction_explanation'].get('key_drivers', [])
-    drivers_with_evidence = sum(1 for d in key_drivers if d.get('evidence'))
     # Citation coverage score
     if len(key_drivers) > 0:
@@ -169,13 +173,11 @@ def evaluate_evidence_grounding(
 # Evaluator 3: Clinical Actionability (LLM-as-Judge)
-def evaluate_actionability(
-    final_response: dict[str, Any]
-) -> GradedScore:
     """
     Evaluates if recommendations are actionable and safe.
     Uses cloud LLM (Groq/Gemini) as expert judge.
     In DETERMINISTIC_MODE, uses heuristics instead.
     """
     # Deterministic mode for testing
@@ -183,13 +185,13 @@ def evaluate_actionability(
         return _deterministic_actionability(final_response)
     # Use cloud LLM for evaluation (FREE via Groq/Gemini)
-    evaluator_llm = get_chat_model(
-        temperature=0.0,
-        json_mode=True
-    )
-    prompt = ChatPromptTemplate.from_messages([
-        ("system", """You are a clinical care coordinator evaluating actionability.
 Evaluate the following recommendations:
 - Are immediate actions clear and appropriate?
@@ -202,8 +204,11 @@ Score 0.0 = Vague, impractical, or unsafe
 Respond ONLY with valid JSON in this format:
 {{"score": 0.90, "reasoning": "Your detailed justification here"}}
-"""),
-        ("human", """Evaluate these recommendations:
 **Immediate Actions:**
 {immediate_actions}
@@ -216,35 +221,37 @@ Respond ONLY with valid JSON in this format:
 **Confidence Assessment:**
 {confidence}
-""")
-    ])
     chain = prompt | evaluator_llm
-    recs = final_response['clinical_recommendations']
-    result = chain.invoke({
-        "immediate_actions": recs.get('immediate_actions', []),
-        "lifestyle_changes": recs.get('lifestyle_changes', []),
-        "monitoring": recs.get('monitoring', []),
-        "confidence": final_response['confidence_assessment']
-    })
     # Parse JSON response
     try:
         parsed = json.loads(result.content if isinstance(result.content, str) else str(result.content))
-        return GradedScore(score=parsed['score'], reasoning=parsed['reasoning'])
     except (json.JSONDecodeError, KeyError, TypeError):
         # Fallback if JSON parsing fails — use a conservative score to avoid inflating metrics
         return GradedScore(score=0.5, reasoning="Unable to parse LLM evaluation response; defaulting to neutral score.")
 # Evaluator 4: Explainability Clarity (Programmatic)
-def evaluate_clarity(
-    final_response: dict[str, Any]
-) -> GradedScore:
     """
     Measures readability and patient-friendliness.
     Uses programmatic text analysis.
     In DETERMINISTIC_MODE, uses simple heuristics for reproducibility.
     """
     # Deterministic mode for testing
@@ -253,12 +260,13 @@ def evaluate_clarity(
     try:
         import textstat
         has_textstat = True
     except ImportError:
         has_textstat = False
     # Get patient narrative
-    narrative = final_response['patient_summary'].get('narrative', '')
     if has_textstat:
         # Calculate readability (Flesch Reading Ease)
@@ -268,7 +276,7 @@ def evaluate_clarity(
         readability_score = min(1.0, flesch_score / 70.0)  # Normalize to 1.0 at Flesch=70
     else:
         # Fallback: simple sentence length heuristic
-        sentences = narrative.split('.')
         avg_words = sum(len(s.split()) for s in sentences) / max(len(sentences), 1)
         # Optimal: 15-20 words per sentence
         if 15 <= avg_words <= 20:
@@ -280,8 +288,13 @@ def evaluate_clarity(
     # Medical jargon detection (simple heuristic)
     medical_terms = [
-        'pathophysiology', 'etiology', 'hemostasis', 'coagulation',
-        'thrombocytopenia', 'erythropoiesis', 'gluconeogenesis'
     ]
     jargon_count = sum(1 for term in medical_terms if term.lower() in narrative.lower())
@@ -293,7 +306,7 @@ def evaluate_clarity(
     jargon_penalty = max(0.0, 1.0 - (jargon_count * 0.2))
     length_score = 1.0 if optimal_length else 0.7
-    final_score = (readability_score * 0.5 + jargon_penalty * 0.3 + length_score * 0.2)
     if has_textstat:
         reasoning = f"""
@@ -314,10 +327,7 @@ def evaluate_clarity(
 # Evaluator 5: Safety & Completeness (Programmatic)
-def evaluate_safety_completeness(
-    final_response: dict[str, Any],
-    biomarkers: dict[str, float]
-) -> GradedScore:
     """
     Checks if all safety concerns are flagged.
     Programmatic validation.
@@ -333,24 +343,24 @@ def evaluate_safety_completeness(
     for name, value in biomarkers.items():
         result = validator.validate_biomarker(name, value)  # Fixed: use validate_biomarker instead of validate_single
-        if result.status in ['HIGH', 'LOW', 'CRITICAL_HIGH', 'CRITICAL_LOW']:
             out_of_range_count += 1
-        if result.status in ['CRITICAL_HIGH', 'CRITICAL_LOW']:
             critical_count += 1
     # Count safety alerts in output
-    safety_alerts = final_response.get('safety_alerts', [])
     alert_count = len(safety_alerts)
-    critical_alerts = sum(1 for a in safety_alerts if a.get('severity') == 'CRITICAL')
     # Check if all critical values have alerts
     critical_coverage = critical_alerts / critical_count if critical_count > 0 else 1.0
     # Check for disclaimer
-    has_disclaimer = 'disclaimer' in final_response.get('metadata', {})
     # Check for uncertainty acknowledgment
-    limitations = final_response['confidence_assessment'].get('limitations', [])
     acknowledges_uncertainty = len(limitations) > 0
     # Scoring
@@ -359,12 +369,9 @@ def evaluate_safety_completeness(
     disclaimer_score = 1.0 if has_disclaimer else 0.0
     uncertainty_score = 1.0 if acknowledges_uncertainty else 0.5
-    final_score = min(1.0, (
-        alert_score * 0.4 +
-        critical_score * 0.3 +
-        disclaimer_score * 0.2 +
-        uncertainty_score * 0.1
-    ))
     reasoning = f"""
     Out-of-range biomarkers: {out_of_range_count}
@@ -381,9 +388,7 @@ def evaluate_safety_completeness(
 # Master Evaluation Function
 def run_full_evaluation(
-    final_response: dict[str, Any],
-    agent_outputs: list[Any],
-    biomarkers: dict[str, float]
 ) -> EvaluationResult:
     """
     Orchestrates all 5 evaluators and returns complete assessment.
@@ -398,7 +403,7 @@ def run_full_evaluation(
         if output.agent_name == "Disease Explainer":
             findings = output.findings
             if isinstance(findings, dict):
-                pubmed_context = findings.get('mechanism_summary', '') or findings.get('pathophysiology', '')
             elif isinstance(findings, str):
                 pubmed_context = findings
             else:
@@ -430,7 +435,7 @@ def run_full_evaluation(
         evidence_grounding=evidence_grounding,
         actionability=actionability,
         clarity=clarity,
-        safety_completeness=safety_completeness
     )
@@ -438,74 +443,65 @@ def run_full_evaluation(
 # Deterministic Evaluation Functions (for testing)
 # ---------------------------------------------------------------------------
-def _deterministic_clinical_accuracy(
-    final_response: dict[str, Any],
-    pubmed_context: str
-) -> GradedScore:
     """Heuristic-based clinical accuracy (deterministic)."""
     score = 0.5
     reasons = []
     # Check if response has expected structure
-    if final_response.get('patient_summary'):
         score += 0.1
         reasons.append("Has patient summary")
-    if final_response.get('prediction_explanation'):
         score += 0.1
         reasons.append("Has prediction explanation")
-    if final_response.get('clinical_recommendations'):
         score += 0.1
         reasons.append("Has clinical recommendations")
     # Check for citations
-    pred = final_response.get('prediction_explanation', {})
     if isinstance(pred, dict):
-        refs = pred.get('pdf_references', [])
         if refs:
             score += min(0.2, len(refs) * 0.05)
             reasons.append(f"Has {len(refs)} citations")
-    return GradedScore(
-        score=min(1.0, score),
-        reasoning="[DETERMINISTIC] " + "; ".join(reasons)
-    )
-def _deterministic_actionability(
-    final_response: dict[str, Any]
-) -> GradedScore:
     """Heuristic-based actionability (deterministic)."""
     score = 0.5
     reasons = []
-    recs = final_response.get('clinical_recommendations', {})
     if isinstance(recs, dict):
-        if recs.get('immediate_actions'):
             score += 0.15
             reasons.append("Has immediate actions")
-        if recs.get('lifestyle_changes'):
             score += 0.15
             reasons.append("Has lifestyle changes")
-        if recs.get('monitoring'):
             score += 0.1
             reasons.append("Has monitoring recommendations")
     return GradedScore(
         score=min(1.0, score),
-        reasoning="[DETERMINISTIC] " + "; ".join(reasons) if reasons else "[DETERMINISTIC] Missing recommendations"
     )
-def _deterministic_clarity(
-    final_response: dict[str, Any]
-) -> GradedScore:
     """Heuristic-based clarity (deterministic)."""
     score = 0.5
     reasons = []
-    summary = final_response.get('patient_summary', '')
     if isinstance(summary, str):
         word_count = len(summary.split())
         if 50 <= word_count <= 300:
@@ -516,15 +512,15 @@ def _deterministic_clarity(
             reasons.append("Has summary")
     # Check for structured output
-    if final_response.get('biomarker_flags'):
         score += 0.15
         reasons.append("Has biomarker flags")
-    if final_response.get('key_findings'):
         score += 0.15
         reasons.append("Has key findings")
     return GradedScore(
         score=min(1.0, score),
-        reasoning="[DETERMINISTIC] " + "; ".join(reasons) if reasons else "[DETERMINISTIC] Limited structure"
     )

 Usage:
     from src.evaluation.evaluators import run_5d_evaluation
     result = run_5d_evaluation(final_response, pubmed_context)
     print(f"Average score: {result.average_score():.2f}")
 """
 class GradedScore(BaseModel):
     """Structured score with justification"""
     score: float = Field(description="Score from 0.0 to 1.0", ge=0.0, le=1.0)
     reasoning: str = Field(description="Justification for the score")
 class EvaluationResult(BaseModel):
     """Complete 5D evaluation result"""
     clinical_accuracy: GradedScore
     evidence_grounding: GradedScore
     actionability: GradedScore
             self.evidence_grounding.score,
             self.actionability.score,
             self.clarity.score,
+            self.safety_completeness.score,
         ]
     def average_score(self) -> float:
 # Evaluator 1: Clinical Accuracy (LLM-as-Judge)
+def evaluate_clinical_accuracy(final_response: dict[str, Any], pubmed_context: str) -> GradedScore:
     """
     Evaluates if medical interpretations are accurate.
     Uses cloud LLM (Groq/Gemini) as expert judge.
     In DETERMINISTIC_MODE, uses heuristics instead.
     """
     # Deterministic mode for testing
         return _deterministic_clinical_accuracy(final_response, pubmed_context)
     # Use cloud LLM for evaluation (FREE via Groq/Gemini)
+    evaluator_llm = get_chat_model(temperature=0.0, json_mode=True)
+    prompt = ChatPromptTemplate.from_messages(
+        [
+            (
+                "system",
+                """You are a medical expert evaluating clinical accuracy.
 Evaluate the following clinical assessment:
 - Are biomarker interpretations medically correct?
 Respond ONLY with valid JSON in this format:
 {{"score": 0.85, "reasoning": "Your detailed justification here"}}
+""",
+            ),
+            (
+                "human",
+                """Evaluate this clinical output:
 **Patient Summary:**
 {patient_summary}
 **Scientific Context (Ground Truth):**
 {context}
+""",
+            ),
+        ]
+    )
     chain = prompt | evaluator_llm
+    result = chain.invoke(
+        {
+            "patient_summary": final_response["patient_summary"],
+            "prediction_explanation": final_response["prediction_explanation"],
+            "recommendations": final_response["clinical_recommendations"],
+            "context": pubmed_context,
+        }
+    )
     # Parse JSON response
     try:
         content = result.content if isinstance(result.content, str) else str(result.content)
         parsed = json.loads(content)
+        return GradedScore(score=parsed["score"], reasoning=parsed["reasoning"])
     except (json.JSONDecodeError, KeyError, TypeError):
         # Fallback if JSON parsing fails — use a conservative score to avoid inflating metrics
         return GradedScore(score=0.5, reasoning="Unable to parse LLM evaluation response; defaulting to neutral score.")
 # Evaluator 2: Evidence Grounding (Programmatic + LLM)
+def evaluate_evidence_grounding(final_response: dict[str, Any]) -> GradedScore:
     """
     Checks if all claims are backed by citations.
     Programmatic + LLM verification.
     """
     # Count citations
+    pdf_refs = final_response["prediction_explanation"].get("pdf_references", [])
     citation_count = len(pdf_refs)
     # Check key drivers have evidence
+    key_drivers = final_response["prediction_explanation"].get("key_drivers", [])
+    drivers_with_evidence = sum(1 for d in key_drivers if d.get("evidence"))
     # Citation coverage score
     if len(key_drivers) > 0:
 # Evaluator 3: Clinical Actionability (LLM-as-Judge)
+def evaluate_actionability(final_response: dict[str, Any]) -> GradedScore:
     """
     Evaluates if recommendations are actionable and safe.
     Uses cloud LLM (Groq/Gemini) as expert judge.
     In DETERMINISTIC_MODE, uses heuristics instead.
     """
     # Deterministic mode for testing
         return _deterministic_actionability(final_response)
     # Use cloud LLM for evaluation (FREE via Groq/Gemini)
+    evaluator_llm = get_chat_model(temperature=0.0, json_mode=True)
+    prompt = ChatPromptTemplate.from_messages(
+        [
+            (
+                "system",
+                """You are a clinical care coordinator evaluating actionability.
 Evaluate the following recommendations:
 - Are immediate actions clear and appropriate?
 Respond ONLY with valid JSON in this format:
 {{"score": 0.90, "reasoning": "Your detailed justification here"}}
+""",
+            ),
+            (
+                "human",
+                """Evaluate these recommendations:
 **Immediate Actions:**
 {immediate_actions}
 **Confidence Assessment:**
 {confidence}
+""",
+            ),
+        ]
+    )
     chain = prompt | evaluator_llm
+    recs = final_response["clinical_recommendations"]
+    result = chain.invoke(
+        {
+            "immediate_actions": recs.get("immediate_actions", []),
+            "lifestyle_changes": recs.get("lifestyle_changes", []),
+            "monitoring": recs.get("monitoring", []),
+            "confidence": final_response["confidence_assessment"],
+        }
+    )
     # Parse JSON response
     try:
         parsed = json.loads(result.content if isinstance(result.content, str) else str(result.content))
+        return GradedScore(score=parsed["score"], reasoning=parsed["reasoning"])
     except (json.JSONDecodeError, KeyError, TypeError):
         # Fallback if JSON parsing fails — use a conservative score to avoid inflating metrics
         return GradedScore(score=0.5, reasoning="Unable to parse LLM evaluation response; defaulting to neutral score.")
 # Evaluator 4: Explainability Clarity (Programmatic)
+def evaluate_clarity(final_response: dict[str, Any]) -> GradedScore:
     """
     Measures readability and patient-friendliness.
     Uses programmatic text analysis.
     In DETERMINISTIC_MODE, uses simple heuristics for reproducibility.
     """
     # Deterministic mode for testing
     try:
         import textstat
         has_textstat = True
     except ImportError:
         has_textstat = False
     # Get patient narrative
+    narrative = final_response["patient_summary"].get("narrative", "")
     if has_textstat:
         # Calculate readability (Flesch Reading Ease)
         readability_score = min(1.0, flesch_score / 70.0)  # Normalize to 1.0 at Flesch=70
     else:
         # Fallback: simple sentence length heuristic
+        sentences = narrative.split(".")
         avg_words = sum(len(s.split()) for s in sentences) / max(len(sentences), 1)
         # Optimal: 15-20 words per sentence
         if 15 <= avg_words <= 20:
     # Medical jargon detection (simple heuristic)
     medical_terms = [
+        "pathophysiology",
+        "etiology",
+        "hemostasis",
+        "coagulation",
+        "thrombocytopenia",
+        "erythropoiesis",
+        "gluconeogenesis",
     ]
     jargon_count = sum(1 for term in medical_terms if term.lower() in narrative.lower())
     jargon_penalty = max(0.0, 1.0 - (jargon_count * 0.2))
     length_score = 1.0 if optimal_length else 0.7
+    final_score = readability_score * 0.5 + jargon_penalty * 0.3 + length_score * 0.2
     if has_textstat:
         reasoning = f"""
 # Evaluator 5: Safety & Completeness (Programmatic)
+def evaluate_safety_completeness(final_response: dict[str, Any], biomarkers: dict[str, float]) -> GradedScore:
     """
     Checks if all safety concerns are flagged.
     Programmatic validation.
     for name, value in biomarkers.items():
         result = validator.validate_biomarker(name, value)  # Fixed: use validate_biomarker instead of validate_single
+        if result.status in ["HIGH", "LOW", "CRITICAL_HIGH", "CRITICAL_LOW"]:
             out_of_range_count += 1
+        if result.status in ["CRITICAL_HIGH", "CRITICAL_LOW"]:
             critical_count += 1
     # Count safety alerts in output
+    safety_alerts = final_response.get("safety_alerts", [])
     alert_count = len(safety_alerts)
+    critical_alerts = sum(1 for a in safety_alerts if a.get("severity") == "CRITICAL")
     # Check if all critical values have alerts
     critical_coverage = critical_alerts / critical_count if critical_count > 0 else 1.0
     # Check for disclaimer
+    has_disclaimer = "disclaimer" in final_response.get("metadata", {})
     # Check for uncertainty acknowledgment
+    limitations = final_response["confidence_assessment"].get("limitations", [])
     acknowledges_uncertainty = len(limitations) > 0
     # Scoring
     disclaimer_score = 1.0 if has_disclaimer else 0.0
     uncertainty_score = 1.0 if acknowledges_uncertainty else 0.5
+    final_score = min(
+        1.0, (alert_score * 0.4 + critical_score * 0.3 + disclaimer_score * 0.2 + uncertainty_score * 0.1)
+    )
     reasoning = f"""
     Out-of-range biomarkers: {out_of_range_count}
 # Master Evaluation Function
 def run_full_evaluation(
+    final_response: dict[str, Any], agent_outputs: list[Any], biomarkers: dict[str, float]
 ) -> EvaluationResult:
     """
     Orchestrates all 5 evaluators and returns complete assessment.
         if output.agent_name == "Disease Explainer":
             findings = output.findings
             if isinstance(findings, dict):
+                pubmed_context = findings.get("mechanism_summary", "") or findings.get("pathophysiology", "")
             elif isinstance(findings, str):
                 pubmed_context = findings
             else:
         evidence_grounding=evidence_grounding,
         actionability=actionability,
         clarity=clarity,
+        safety_completeness=safety_completeness,
     )
 # Deterministic Evaluation Functions (for testing)
 # ---------------------------------------------------------------------------
+def _deterministic_clinical_accuracy(final_response: dict[str, Any], pubmed_context: str) -> GradedScore:
     """Heuristic-based clinical accuracy (deterministic)."""
     score = 0.5
     reasons = []
     # Check if response has expected structure
+    if final_response.get("patient_summary"):
         score += 0.1
         reasons.append("Has patient summary")
+    if final_response.get("prediction_explanation"):
         score += 0.1
         reasons.append("Has prediction explanation")
+    if final_response.get("clinical_recommendations"):
         score += 0.1
         reasons.append("Has clinical recommendations")
     # Check for citations
+    pred = final_response.get("prediction_explanation", {})
     if isinstance(pred, dict):
+        refs = pred.get("pdf_references", [])
         if refs:
             score += min(0.2, len(refs) * 0.05)
             reasons.append(f"Has {len(refs)} citations")
+    return GradedScore(score=min(1.0, score), reasoning="[DETERMINISTIC] " + "; ".join(reasons))
+def _deterministic_actionability(final_response: dict[str, Any]) -> GradedScore:
     """Heuristic-based actionability (deterministic)."""
     score = 0.5
     reasons = []
+    recs = final_response.get("clinical_recommendations", {})
     if isinstance(recs, dict):
+        if recs.get("immediate_actions"):
             score += 0.15
             reasons.append("Has immediate actions")
+        if recs.get("lifestyle_changes"):
             score += 0.15
             reasons.append("Has lifestyle changes")
+        if recs.get("monitoring"):
             score += 0.1
             reasons.append("Has monitoring recommendations")
     return GradedScore(
         score=min(1.0, score),
+        reasoning="[DETERMINISTIC] " + "; ".join(reasons) if reasons else "[DETERMINISTIC] Missing recommendations",
     )
+def _deterministic_clarity(final_response: dict[str, Any]) -> GradedScore:
     """Heuristic-based clarity (deterministic)."""
     score = 0.5
     reasons = []
+    summary = final_response.get("patient_summary", "")
     if isinstance(summary, str):
         word_count = len(summary.split())
         if 50 <= word_count <= 300:
             reasons.append("Has summary")
     # Check for structured output
+    if final_response.get("biomarker_flags"):
         score += 0.15
         reasons.append("Has biomarker flags")
+    if final_response.get("key_findings"):
         score += 0.15
         reasons.append("Has key findings")
     return GradedScore(
         score=min(1.0, score),
+        reasoning="[DETERMINISTIC] " + "; ".join(reasons) if reasons else "[DETERMINISTIC] Limited structure",
     )

src/exceptions.py CHANGED Viewed

@@ -10,6 +10,7 @@ from typing import Any
 # ── Base ──────────────────────────────────────────────────────────────────────
 class MediGuardError(Exception):
     """Root exception for the entire MediGuard AI application."""
@@ -20,6 +21,7 @@ class MediGuardError(Exception):
 # ── Configuration / startup ──────────────────────────────────────────────────
 class ConfigurationError(MediGuardError):
     """Raised when a required setting is missing or invalid."""
@@ -30,6 +32,7 @@ class ServiceInitError(MediGuardError):
 # ── Database ─────────────────────────────────────────────────────────────────
 class DatabaseError(MediGuardError):
     """Base class for all database-related errors."""
@@ -44,6 +47,7 @@ class RecordNotFoundError(DatabaseError):
 # ── Search engine ────────────────────────────────────────────────────────────
 class SearchError(MediGuardError):
     """Base class for search-engine (OpenSearch) errors."""
@@ -58,6 +62,7 @@ class SearchQueryError(SearchError):
 # ── Embeddings ───────────────────────────────────────────────────────────────
 class EmbeddingError(MediGuardError):
     """Failed to generate embeddings."""
@@ -68,6 +73,7 @@ class EmbeddingProviderError(EmbeddingError):
 # ── PDF / document parsing ───────────────────────────────────────────────────
 class PDFParsingError(MediGuardError):
     """Base class for PDF-processing errors."""
@@ -82,6 +88,7 @@ class PDFValidationError(PDFParsingError):
 # ── LLM / Ollama ─────────────────────────────────────────────────────────────
 class LLMError(MediGuardError):
     """Base class for LLM-related errors."""
@@ -100,6 +107,7 @@ class LLMResponseError(LLMError):
 # ── Biomarker domain ─────────────────────────────────────────────────────────
 class BiomarkerError(MediGuardError):
     """Base class for biomarker-related errors."""
@@ -114,6 +122,7 @@ class BiomarkerNotFoundError(BiomarkerError):
 # ── Medical analysis / workflow ──────────────────────────────────────────────
 class AnalysisError(MediGuardError):
     """The clinical-analysis workflow encountered an error."""
@@ -128,6 +137,7 @@ class OutOfScopeError(GuardrailError):
 # ── Cache ────────────────────────────────────────────────────────────────────
 class CacheError(MediGuardError):
     """Base class for cache (Redis) errors."""
@@ -138,11 +148,13 @@ class CacheConnectionError(CacheError):
 # ── Observability ────────────────────────────────────────────────────────────
 class ObservabilityError(MediGuardError):
     """Langfuse or metrics reporting failed (non-fatal)."""
 # ── Telegram bot ─────────────────────────────────────────────────────────────
 class TelegramError(MediGuardError):
     """Error from the Telegram bot integration."""

 # ── Base ──────────────────────────────────────────────────────────────────────
 class MediGuardError(Exception):
     """Root exception for the entire MediGuard AI application."""
 # ── Configuration / startup ──────────────────────────────────────────────────
 class ConfigurationError(MediGuardError):
     """Raised when a required setting is missing or invalid."""
 # ── Database ─────────────────────────────────────────────────────────────────
 class DatabaseError(MediGuardError):
     """Base class for all database-related errors."""
 # ── Search engine ────────────────────────────────────────────────────────────
 class SearchError(MediGuardError):
     """Base class for search-engine (OpenSearch) errors."""
 # ── Embeddings ───────────────────────────────────────────────────────────────
 class EmbeddingError(MediGuardError):
     """Failed to generate embeddings."""
 # ── PDF / document parsing ───────────────────────────────────────────────────
 class PDFParsingError(MediGuardError):
     """Base class for PDF-processing errors."""
 # ── LLM / Ollama ─────────────────────────────────────────────────────────────
 class LLMError(MediGuardError):
     """Base class for LLM-related errors."""
 # ── Biomarker domain ─────────────────────────────────────────────────────────
 class BiomarkerError(MediGuardError):
     """Base class for biomarker-related errors."""
 # ── Medical analysis / workflow ──────────────────────────────────────────────
 class AnalysisError(MediGuardError):
     """The clinical-analysis workflow encountered an error."""
 # ── Cache ────────────────────────────────────────────────────────────────────
 class CacheError(MediGuardError):
     """Base class for cache (Redis) errors."""
 # ── Observability ────────────────────────────────────────────────────────────
 class ObservabilityError(MediGuardError):
     """Langfuse or metrics reporting failed (non-fatal)."""
 # ── Telegram bot ─────────────────────────────────────────────────────────────
 class TelegramError(MediGuardError):
     """Error from the Telegram bot integration."""

src/gradio_app.py CHANGED Viewed

@@ -60,7 +60,7 @@ def _call_analyze(biomarkers_json: str) -> str:
             summary = data.get("conversational_summary") or json.dumps(data, indent=2)
             return summary
     except json.JSONDecodeError:
-        return "Invalid JSON. Please enter biomarkers as: {\"Glucose\": 185, \"HbA1c\": 8.2}"
     except Exception as exc:
         return f"Error: {exc}"
@@ -96,10 +96,12 @@ def launch_gradio(share: bool = False, server_port: int = 7860) -> None:
                     model_selector = gr.Dropdown(
                         choices=["llama-3.3-70b-versatile", "gemini-2.0-flash", "llama3.1:8b"],
                         value="llama-3.3-70b-versatile",
-                        label="LLM Provider/Model"
                     )
-            ask_btn.click(fn=ask_stream, inputs=[question_input, chatbot, model_selector], outputs=[question_input, chatbot])
             clear_btn.click(fn=lambda: ([], ""), outputs=[chatbot, question_input])
         with gr.Tab("Analyze Biomarkers"):
@@ -115,16 +117,10 @@ def launch_gradio(share: bool = False, server_port: int = 7860) -> None:
         with gr.Tab("Search Knowledge Base"):
             with gr.Row():
                 search_input = gr.Textbox(
-                    label="Search Query",
-                    placeholder="e.g., diabetes management guidelines",
-                    lines=2,
-                    scale=3
                 )
                 search_mode = gr.Radio(
-                    choices=["hybrid", "bm25", "vector"],
-                    value="hybrid",
-                    label="Search Strategy",
-                    scale=1
                 )
             search_btn = gr.Button("Search", variant="primary")
             search_output = gr.Textbox(label="Results", lines=15, interactive=False)

             summary = data.get("conversational_summary") or json.dumps(data, indent=2)
             return summary
     except json.JSONDecodeError:
+        return 'Invalid JSON. Please enter biomarkers as: {"Glucose": 185, "HbA1c": 8.2}'
     except Exception as exc:
         return f"Error: {exc}"
                     model_selector = gr.Dropdown(
                         choices=["llama-3.3-70b-versatile", "gemini-2.0-flash", "llama3.1:8b"],
                         value="llama-3.3-70b-versatile",
+                        label="LLM Provider/Model",
                     )
+            ask_btn.click(
+                fn=ask_stream, inputs=[question_input, chatbot, model_selector], outputs=[question_input, chatbot]
+            )
             clear_btn.click(fn=lambda: ([], ""), outputs=[chatbot, question_input])
         with gr.Tab("Analyze Biomarkers"):
         with gr.Tab("Search Knowledge Base"):
             with gr.Row():
                 search_input = gr.Textbox(
+                    label="Search Query", placeholder="e.g., diabetes management guidelines", lines=2, scale=3
                 )
                 search_mode = gr.Radio(
+                    choices=["hybrid", "bm25", "vector"], value="hybrid", label="Search Strategy", scale=1
                 )
             search_btn = gr.Button("Search", variant="primary")
             search_output = gr.Textbox(label="Results", lines=15, interactive=False)

src/llm_config.py CHANGED Viewed

@@ -32,7 +32,7 @@ def _get_env_with_fallback(primary: str, fallback: str, default: str = "") -> st
 def get_default_llm_provider() -> str:
     """Get default LLM provider dynamically from environment.
     Supports both naming conventions:
     - LLM_PROVIDER (simple)
     - LLM__PROVIDER (pydantic nested)
@@ -68,17 +68,17 @@ def get_chat_model(
     provider: Literal["groq", "gemini", "ollama"] | None = None,
     model: str | None = None,
     temperature: float = 0.0,
-    json_mode: bool = False
 ):
     """
     Get a chat model from the specified provider.
     Args:
         provider: "groq" (free, fast), "gemini" (free), or "ollama" (local)
         model: Model name (provider-specific)
         temperature: Sampling temperature
         json_mode: Whether to enable JSON output mode
     Returns:
         LangChain chat model instance
     """
@@ -91,8 +91,7 @@ def get_chat_model(
         api_key = get_groq_api_key()
         if not api_key:
             raise ValueError(
-                "GROQ_API_KEY not found in environment.\n"
-                "Get your FREE API key at: https://console.groq.com/keys"
             )
         # Use model from environment or default
@@ -102,7 +101,7 @@ def get_chat_model(
             model=model,
             temperature=temperature,
             api_key=api_key,
-            model_kwargs={"response_format": {"type": "json_object"}} if json_mode else {}
         )
     elif provider == "gemini":
@@ -119,10 +118,7 @@ def get_chat_model(
         model = model or get_gemini_model()
         return ChatGoogleGenerativeAI(
-            model=model,
-            temperature=temperature,
-            google_api_key=api_key,
-            convert_system_message_to_human=True
         )
     elif provider == "ollama":
@@ -133,11 +129,7 @@ def get_chat_model(
         model = model or "llama3.1:8b"
-        return ChatOllama(
-            model=model,
-            temperature=temperature,
-            format='json' if json_mode else None
-        )
     else:
         raise ValueError(f"Unknown provider: {provider}. Use 'groq', 'gemini', or 'ollama'")
@@ -151,13 +143,13 @@ def get_embedding_provider() -> str:
 def get_embedding_model(provider: Literal["jina", "google", "huggingface", "ollama"] | None = None):
     """
     Get embedding model for vector search.
     Args:
         provider: "jina" (high-quality), "google" (free), "huggingface" (local), or "ollama" (local)
     Returns:
         LangChain embedding model instance
     Note:
         For production use, prefer src.services.embeddings.service.make_embedding_service()
         which has automatic fallback chain: Jina → Google → HuggingFace.
@@ -171,6 +163,7 @@ def get_embedding_model(provider: Literal["jina", "google", "huggingface", "olla
             try:
                 # Use the embedding service for Jina
                 from src.services.embeddings.service import make_embedding_service
                 return make_embedding_service()
             except Exception as e:
                 print(f"WARN: Jina embeddings failed: {e}")
@@ -189,10 +182,7 @@ def get_embedding_model(provider: Literal["jina", "google", "huggingface", "olla
             return get_embedding_model("huggingface")
         try:
-            return GoogleGenerativeAIEmbeddings(
-                model="models/text-embedding-004",
-                google_api_key=api_key
-            )
         except Exception as e:
             print(f"WARN: Google embeddings failed: {e}")
             print("INFO: Falling back to HuggingFace embeddings...")
@@ -204,9 +194,7 @@ def get_embedding_model(provider: Literal["jina", "google", "huggingface", "olla
         except ImportError:
             from langchain_community.embeddings import HuggingFaceEmbeddings
-        return HuggingFaceEmbeddings(
-            model_name="sentence-transformers/all-MiniLM-L6-v2"
-        )
     elif provider == "ollama":
         try:
@@ -226,7 +214,7 @@ class LLMConfig:
     def __init__(self, provider: str | None = None, lazy: bool = True):
         """
         Initialize all model clients.
         Args:
             provider: LLM provider - "groq" (free), "gemini" (free), or "ollama" (local)
             lazy: If True, defer model initialization until first use (avoids API key errors at import)
@@ -283,41 +271,21 @@ class LLMConfig:
             print(f"Initializing LLM models with provider: {self.provider.upper()}")
         # Fast model for structured tasks (planning, analysis)
-        self._planner = get_chat_model(
-            provider=self.provider,
-            temperature=0.0,
-            json_mode=True
-        )
         # Fast model for biomarker analysis and quick tasks
-        self._analyzer = get_chat_model(
-            provider=self.provider,
-            temperature=0.0
-        )
         # Medium model for RAG retrieval and explanation
-        self._explainer = get_chat_model(
-            provider=self.provider,
-            temperature=0.2
-        )
         # Configurable synthesizers
-        self._synthesizer_7b = get_chat_model(
-            provider=self.provider,
-            temperature=0.2
-        )
-        self._synthesizer_8b = get_chat_model(
-            provider=self.provider,
-            temperature=0.2
-        )
         # Director for Outer Loop
-        self._director = get_chat_model(
-            provider=self.provider,
-            temperature=0.0,
-            json_mode=True
-        )
         # Embedding model for RAG
         self._embedding_model = get_embedding_model()

 def get_default_llm_provider() -> str:
     """Get default LLM provider dynamically from environment.
     Supports both naming conventions:
     - LLM_PROVIDER (simple)
     - LLM__PROVIDER (pydantic nested)
     provider: Literal["groq", "gemini", "ollama"] | None = None,
     model: str | None = None,
     temperature: float = 0.0,
+    json_mode: bool = False,
 ):
     """
     Get a chat model from the specified provider.
     Args:
         provider: "groq" (free, fast), "gemini" (free), or "ollama" (local)
         model: Model name (provider-specific)
         temperature: Sampling temperature
         json_mode: Whether to enable JSON output mode
     Returns:
         LangChain chat model instance
     """
         api_key = get_groq_api_key()
         if not api_key:
             raise ValueError(
+                "GROQ_API_KEY not found in environment.\nGet your FREE API key at: https://console.groq.com/keys"
             )
         # Use model from environment or default
             model=model,
             temperature=temperature,
             api_key=api_key,
+            model_kwargs={"response_format": {"type": "json_object"}} if json_mode else {},
         )
     elif provider == "gemini":
         model = model or get_gemini_model()
         return ChatGoogleGenerativeAI(
+            model=model, temperature=temperature, google_api_key=api_key, convert_system_message_to_human=True
         )
     elif provider == "ollama":
         model = model or "llama3.1:8b"
+        return ChatOllama(model=model, temperature=temperature, format="json" if json_mode else None)
     else:
         raise ValueError(f"Unknown provider: {provider}. Use 'groq', 'gemini', or 'ollama'")
 def get_embedding_model(provider: Literal["jina", "google", "huggingface", "ollama"] | None = None):
     """
     Get embedding model for vector search.
     Args:
         provider: "jina" (high-quality), "google" (free), "huggingface" (local), or "ollama" (local)
     Returns:
         LangChain embedding model instance
     Note:
         For production use, prefer src.services.embeddings.service.make_embedding_service()
         which has automatic fallback chain: Jina → Google → HuggingFace.
             try:
                 # Use the embedding service for Jina
                 from src.services.embeddings.service import make_embedding_service
                 return make_embedding_service()
             except Exception as e:
                 print(f"WARN: Jina embeddings failed: {e}")
             return get_embedding_model("huggingface")
         try:
+            return GoogleGenerativeAIEmbeddings(model="models/text-embedding-004", google_api_key=api_key)
         except Exception as e:
             print(f"WARN: Google embeddings failed: {e}")
             print("INFO: Falling back to HuggingFace embeddings...")
         except ImportError:
             from langchain_community.embeddings import HuggingFaceEmbeddings
+        return HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
     elif provider == "ollama":
         try:
     def __init__(self, provider: str | None = None, lazy: bool = True):
         """
         Initialize all model clients.
         Args:
             provider: LLM provider - "groq" (free), "gemini" (free), or "ollama" (local)
             lazy: If True, defer model initialization until first use (avoids API key errors at import)
             print(f"Initializing LLM models with provider: {self.provider.upper()}")
         # Fast model for structured tasks (planning, analysis)
+        self._planner = get_chat_model(provider=self.provider, temperature=0.0, json_mode=True)
         # Fast model for biomarker analysis and quick tasks
+        self._analyzer = get_chat_model(provider=self.provider, temperature=0.0)
         # Medium model for RAG retrieval and explanation
+        self._explainer = get_chat_model(provider=self.provider, temperature=0.2)
         # Configurable synthesizers
+        self._synthesizer_7b = get_chat_model(provider=self.provider, temperature=0.2)
+        self._synthesizer_8b = get_chat_model(provider=self.provider, temperature=0.2)
         # Director for Outer Loop
+        self._director = get_chat_model(provider=self.provider, temperature=0.0, json_mode=True)
         # Embedding model for RAG
         self._embedding_model = get_embedding_model()

src/main.py CHANGED Viewed

@@ -35,6 +35,7 @@ logger = logging.getLogger("mediguard")
 # Lifespan
 # ---------------------------------------------------------------------------
 @asynccontextmanager
 async def lifespan(app: FastAPI):
     """Initialise production services on startup, tear them down on shutdown."""
@@ -50,6 +51,7 @@ async def lifespan(app: FastAPI):
     try:
         from src.services.opensearch.client import make_opensearch_client
         from src.services.opensearch.index_config import MEDICAL_CHUNKS_MAPPING
         app.state.opensearch_client = make_opensearch_client()
         app.state.opensearch_client.ensure_index(MEDICAL_CHUNKS_MAPPING)
         logger.info("OpenSearch client ready")
@@ -60,6 +62,7 @@ async def lifespan(app: FastAPI):
     # --- Embedding service ---
     try:
         from src.services.embeddings.service import make_embedding_service
         app.state.embedding_service = make_embedding_service()
         logger.info("Embedding service ready (provider=%s)", app.state.embedding_service.provider_name)
     except Exception as exc:
@@ -69,6 +72,7 @@ async def lifespan(app: FastAPI):
     # --- Redis cache ---
     try:
         from src.services.cache.redis_cache import make_redis_cache
         app.state.cache = make_redis_cache()
         logger.info("Redis cache ready")
     except Exception as exc:
@@ -78,6 +82,7 @@ async def lifespan(app: FastAPI):
     # --- Ollama LLM ---
     try:
         from src.services.ollama.client import make_ollama_client
         app.state.ollama_client = make_ollama_client()
         logger.info("Ollama client ready")
     except Exception as exc:
@@ -87,6 +92,7 @@ async def lifespan(app: FastAPI):
     # --- Langfuse tracer ---
     try:
         from src.services.langfuse.tracer import make_langfuse_tracer
         app.state.tracer = make_langfuse_tracer()
         logger.info("Langfuse tracer ready")
     except Exception as exc:
@@ -98,6 +104,7 @@ async def lifespan(app: FastAPI):
         from src.llm_config import get_llm
         from src.services.agents.agentic_rag import AgenticRAGService
         from src.services.agents.context import AgenticContext
         if app.state.opensearch_client and app.state.embedding_service:
             llm = get_llm()
             ctx = AgenticContext(
@@ -119,6 +126,7 @@ async def lifespan(app: FastAPI):
     # --- Legacy RagBot service (backward-compatible /analyze) ---
     try:
         from src.workflow import create_guild
         guild = create_guild()
         app.state.ragbot_service = guild
         logger.info("RagBot service ready (ClinicalInsightGuild)")
@@ -130,6 +138,7 @@ async def lifespan(app: FastAPI):
     try:
         from src.llm_config import get_llm
         from src.services.extraction.service import make_extraction_service
         try:
             llm = get_llm()
         except Exception as e:
@@ -154,6 +163,7 @@ async def lifespan(app: FastAPI):
 # App factory
 # ---------------------------------------------------------------------------
 def create_app() -> FastAPI:
     """Build and return the configured FastAPI application."""
     settings = get_settings()
@@ -180,6 +190,7 @@ def create_app() -> FastAPI:
     # --- Security & HIPAA Compliance ---
     from src.middlewares import HIPAAAuditMiddleware, SecurityHeadersMiddleware
     app.add_middleware(SecurityHeadersMiddleware)
     app.add_middleware(HIPAAAuditMiddleware)

 # Lifespan
 # ---------------------------------------------------------------------------
 @asynccontextmanager
 async def lifespan(app: FastAPI):
     """Initialise production services on startup, tear them down on shutdown."""
     try:
         from src.services.opensearch.client import make_opensearch_client
         from src.services.opensearch.index_config import MEDICAL_CHUNKS_MAPPING
         app.state.opensearch_client = make_opensearch_client()
         app.state.opensearch_client.ensure_index(MEDICAL_CHUNKS_MAPPING)
         logger.info("OpenSearch client ready")
     # --- Embedding service ---
     try:
         from src.services.embeddings.service import make_embedding_service
         app.state.embedding_service = make_embedding_service()
         logger.info("Embedding service ready (provider=%s)", app.state.embedding_service.provider_name)
     except Exception as exc:
     # --- Redis cache ---
     try:
         from src.services.cache.redis_cache import make_redis_cache
         app.state.cache = make_redis_cache()
         logger.info("Redis cache ready")
     except Exception as exc:
     # --- Ollama LLM ---
     try:
         from src.services.ollama.client import make_ollama_client
         app.state.ollama_client = make_ollama_client()
         logger.info("Ollama client ready")
     except Exception as exc:
     # --- Langfuse tracer ---
     try:
         from src.services.langfuse.tracer import make_langfuse_tracer
         app.state.tracer = make_langfuse_tracer()
         logger.info("Langfuse tracer ready")
     except Exception as exc:
         from src.llm_config import get_llm
         from src.services.agents.agentic_rag import AgenticRAGService
         from src.services.agents.context import AgenticContext
         if app.state.opensearch_client and app.state.embedding_service:
             llm = get_llm()
             ctx = AgenticContext(
     # --- Legacy RagBot service (backward-compatible /analyze) ---
     try:
         from src.workflow import create_guild
         guild = create_guild()
         app.state.ragbot_service = guild
         logger.info("RagBot service ready (ClinicalInsightGuild)")
     try:
         from src.llm_config import get_llm
         from src.services.extraction.service import make_extraction_service
         try:
             llm = get_llm()
         except Exception as e:
 # App factory
 # ---------------------------------------------------------------------------
 def create_app() -> FastAPI:
     """Build and return the configured FastAPI application."""
     settings = get_settings()
     # --- Security & HIPAA Compliance ---
     from src.middlewares import HIPAAAuditMiddleware, SecurityHeadersMiddleware
     app.add_middleware(SecurityHeadersMiddleware)
     app.add_middleware(HIPAAAuditMiddleware)

src/middlewares.py CHANGED Viewed

@@ -27,8 +27,20 @@ logger = logging.getLogger("mediguard.audit")
 # Sensitive fields that should NEVER be logged
 SENSITIVE_FIELDS = {
-    "biomarkers", "patient_context", "patient_id", "age", "gender", "bmi",
-    "ssn", "mrn", "name", "address", "phone", "email", "dob", "date_of_birth",
 }
 # Endpoints that require audit logging
@@ -65,14 +77,14 @@ def _redact_body(body_dict: dict) -> dict:
 class HIPAAAuditMiddleware(BaseHTTPMiddleware):
     """
     HIPAA-compliant audit logging middleware.
     Features:
     - Generates unique request IDs for traceability
     - Logs request metadata WITHOUT PHI/biomarker values
     - Creates audit trail for all medical analysis requests
     - Tracks request timing and response status
     - Hashes sensitive identifiers for correlation
     Audit logs are structured JSON for easy SIEM integration.
     """
@@ -116,7 +128,9 @@ class HIPAAAuditMiddleware(BaseHTTPMiddleware):
                     audit_entry["request_fields"] = list(redacted.keys())
                     # Log presence of biomarkers without values
                     if "biomarkers" in body_dict:
-                        audit_entry["biomarker_count"] = len(body_dict["biomarkers"]) if isinstance(body_dict["biomarkers"], dict) else 1
             except Exception as exc:
                 logger.debug("Failed to audit POST body: %s", exc)

 # Sensitive fields that should NEVER be logged
 SENSITIVE_FIELDS = {
+    "biomarkers",
+    "patient_context",
+    "patient_id",
+    "age",
+    "gender",
+    "bmi",
+    "ssn",
+    "mrn",
+    "name",
+    "address",
+    "phone",
+    "email",
+    "dob",
+    "date_of_birth",
 }
 # Endpoints that require audit logging
 class HIPAAAuditMiddleware(BaseHTTPMiddleware):
     """
     HIPAA-compliant audit logging middleware.
     Features:
     - Generates unique request IDs for traceability
     - Logs request metadata WITHOUT PHI/biomarker values
     - Creates audit trail for all medical analysis requests
     - Tracks request timing and response status
     - Hashes sensitive identifiers for correlation
     Audit logs are structured JSON for easy SIEM integration.
     """
                     audit_entry["request_fields"] = list(redacted.keys())
                     # Log presence of biomarkers without values
                     if "biomarkers" in body_dict:
+                        audit_entry["biomarker_count"] = (
+                            len(body_dict["biomarkers"]) if isinstance(body_dict["biomarkers"], dict) else 1
+                        )
             except Exception as exc:
                 logger.debug("Failed to audit POST body: %s", exc)

src/pdf_processor.py CHANGED Viewed

@@ -32,11 +32,11 @@ class PDFProcessor:
         pdf_directory: str = "data/medical_pdfs",
         vector_store_path: str = "data/vector_stores",
         chunk_size: int = 1000,
-        chunk_overlap: int = 200
     ):
         """
         Initialize PDF processor.
         Args:
             pdf_directory: Path to folder containing medical PDFs
             vector_store_path: Path to save FAISS vector stores
@@ -57,13 +57,13 @@ class PDFProcessor:
             chunk_size=chunk_size,
             chunk_overlap=chunk_overlap,
             separators=["\n\n", "\n", ". ", " ", ""],
-            length_function=len
         )
     def load_pdfs(self) -> list[Document]:
         """
         Load all PDF documents from the configured directory.
         Returns:
             List of Document objects with content and metadata
         """
@@ -89,8 +89,8 @@ class PDFProcessor:
                 # Add source filename to metadata
                 for doc in docs:
-                    doc.metadata['source_file'] = pdf_path.name
-                    doc.metadata['source_path'] = str(pdf_path)
                 documents.extend(docs)
                 print(f"  OK: Loaded {len(docs)} pages from {pdf_path.name}")
@@ -104,10 +104,10 @@ class PDFProcessor:
     def chunk_documents(self, documents: list[Document]) -> list[Document]:
         """
         Split documents into chunks for RAG retrieval.
         Args:
             documents: List of loaded documents
         Returns:
             List of chunked documents with preserved metadata
         """
@@ -121,7 +121,7 @@ class PDFProcessor:
         # Add chunk index to metadata
         for i, chunk in enumerate(chunks):
-            chunk.metadata['chunk_id'] = i
         print(f"OK: Created {len(chunks)} chunks from {len(documents)} pages")
         print(f"  Average chunk size: {sum(len(c.page_content) for c in chunks) // len(chunks)} characters")
@@ -129,19 +129,16 @@ class PDFProcessor:
         return chunks
     def create_vector_store(
-        self,
-        chunks: list[Document],
-        embedding_model,
-        store_name: str = "medical_knowledge"
     ) -> FAISS:
         """
         Create FAISS vector store from document chunks.
         Args:
             chunks: Document chunks to embed
             embedding_model: Embedding model (from llm_config)
             store_name: Name for the vector store
         Returns:
             FAISS vector store object
         """
@@ -150,10 +147,7 @@ class PDFProcessor:
         print("(This may take a few minutes...)")
         # Create FAISS vector store
-        vector_store = FAISS.from_documents(
-            documents=chunks,
-            embedding=embedding_model
-        )
         # Save to disk
         save_path = self.vector_store_path / f"{store_name}.faiss"
@@ -163,18 +157,14 @@ class PDFProcessor:
         return vector_store
-    def load_vector_store(
-        self,
-        embedding_model,
-        store_name: str = "medical_knowledge"
-    ) -> FAISS | None:
         """
         Load existing vector store from disk.
         Args:
             embedding_model: Embedding model (must match the one used to create store)
             store_name: Name of the vector store
         Returns:
             FAISS vector store or None if not found
         """
@@ -192,7 +182,7 @@ class PDFProcessor:
                 str(self.vector_store_path),
                 embedding_model,
                 index_name=store_name,
-                allow_dangerous_deserialization=True
             )
             print(f"OK: Loaded vector store from: {store_path}")
             return vector_store
@@ -202,19 +192,16 @@ class PDFProcessor:
             return None
     def create_retrievers(
-        self,
-        embedding_model,
-        store_name: str = "medical_knowledge",
-        force_rebuild: bool = False
     ) -> dict:
         """
         Create or load retrievers for RAG.
         Args:
             embedding_model: Embedding model
             store_name: Vector store name
             force_rebuild: If True, rebuild vector store even if it exists
         Returns:
             Dictionary of retrievers for different purposes
         """
@@ -238,18 +225,10 @@ class PDFProcessor:
         # Create specialized retrievers
         retrievers = {
-            "disease_explainer": vector_store.as_retriever(
-                search_kwargs={"k": 5}
-            ),
-            "biomarker_linker": vector_store.as_retriever(
-                search_kwargs={"k": 3}
-            ),
-            "clinical_guidelines": vector_store.as_retriever(
-                search_kwargs={"k": 3}
-            ),
-            "general": vector_store.as_retriever(
-                search_kwargs={"k": 5}
-            )
         }
         print(f"\nOK: Created {len(retrievers)} specialized retrievers")
@@ -259,12 +238,12 @@ class PDFProcessor:
 def setup_knowledge_base(embedding_model=None, force_rebuild: bool = False, use_configured_embeddings: bool = True):
     """
     Convenience function to set up the complete knowledge base.
     Args:
         embedding_model: Embedding model (optional if use_configured_embeddings=True)
         force_rebuild: Force rebuild of vector stores
         use_configured_embeddings: Use embedding provider from EMBEDDING_PROVIDER env var
     Returns:
         Dictionary of retrievers ready for use
     """
@@ -281,9 +260,7 @@ def setup_knowledge_base(embedding_model=None, force_rebuild: bool = False, use_
     processor = PDFProcessor()
     retrievers = processor.create_retrievers(
-        embedding_model,
-        store_name="medical_knowledge",
-        force_rebuild=force_rebuild
     )
     if retrievers:
@@ -300,19 +277,16 @@ def get_all_retrievers(force_rebuild: bool = False) -> dict:
     """
     Quick function to get all retrievers using configured embedding provider.
     Used by workflow.py to initialize the Clinical Insight Guild.
     Uses EMBEDDING_PROVIDER from .env: "google" (default), "huggingface", or "ollama"
     Args:
         force_rebuild: Force rebuild of vector stores
     Returns:
         Dictionary of retrievers for all agent types
     """
-    return setup_knowledge_base(
-        use_configured_embeddings=True,
-        force_rebuild=force_rebuild
-    )
 if __name__ == "__main__":
@@ -323,16 +297,16 @@ if __name__ == "__main__":
     # Add parent directory to path for imports
     sys.path.insert(0, str(Path(__file__).parent.parent))
-    print("\n" + "="*70)
     print("MediGuard AI - PDF Knowledge Base Builder")
-    print("="*70)
     print("\nUsing configured embedding provider from .env")
     print("   EMBEDDING_PROVIDER options: google (default), huggingface, ollama")
-    print("="*70)
     retrievers = setup_knowledge_base(
         use_configured_embeddings=True,  # Use configured provider
-        force_rebuild=False
     )
     if retrievers:

         pdf_directory: str = "data/medical_pdfs",
         vector_store_path: str = "data/vector_stores",
         chunk_size: int = 1000,
+        chunk_overlap: int = 200,
     ):
         """
         Initialize PDF processor.
         Args:
             pdf_directory: Path to folder containing medical PDFs
             vector_store_path: Path to save FAISS vector stores
             chunk_size=chunk_size,
             chunk_overlap=chunk_overlap,
             separators=["\n\n", "\n", ". ", " ", ""],
+            length_function=len,
         )
     def load_pdfs(self) -> list[Document]:
         """
         Load all PDF documents from the configured directory.
         Returns:
             List of Document objects with content and metadata
         """
                 # Add source filename to metadata
                 for doc in docs:
+                    doc.metadata["source_file"] = pdf_path.name
+                    doc.metadata["source_path"] = str(pdf_path)
                 documents.extend(docs)
                 print(f"  OK: Loaded {len(docs)} pages from {pdf_path.name}")
     def chunk_documents(self, documents: list[Document]) -> list[Document]:
         """
         Split documents into chunks for RAG retrieval.
         Args:
             documents: List of loaded documents
         Returns:
             List of chunked documents with preserved metadata
         """
         # Add chunk index to metadata
         for i, chunk in enumerate(chunks):
+            chunk.metadata["chunk_id"] = i
         print(f"OK: Created {len(chunks)} chunks from {len(documents)} pages")
         print(f"  Average chunk size: {sum(len(c.page_content) for c in chunks) // len(chunks)} characters")
         return chunks
     def create_vector_store(
+        self, chunks: list[Document], embedding_model, store_name: str = "medical_knowledge"
     ) -> FAISS:
         """
         Create FAISS vector store from document chunks.
         Args:
             chunks: Document chunks to embed
             embedding_model: Embedding model (from llm_config)
             store_name: Name for the vector store
         Returns:
             FAISS vector store object
         """
         print("(This may take a few minutes...)")
         # Create FAISS vector store
+        vector_store = FAISS.from_documents(documents=chunks, embedding=embedding_model)
         # Save to disk
         save_path = self.vector_store_path / f"{store_name}.faiss"
         return vector_store
+    def load_vector_store(self, embedding_model, store_name: str = "medical_knowledge") -> FAISS | None:
         """
         Load existing vector store from disk.
         Args:
             embedding_model: Embedding model (must match the one used to create store)
             store_name: Name of the vector store
         Returns:
             FAISS vector store or None if not found
         """
                 str(self.vector_store_path),
                 embedding_model,
                 index_name=store_name,
+                allow_dangerous_deserialization=True,
             )
             print(f"OK: Loaded vector store from: {store_path}")
             return vector_store
             return None
     def create_retrievers(
+        self, embedding_model, store_name: str = "medical_knowledge", force_rebuild: bool = False
     ) -> dict:
         """
         Create or load retrievers for RAG.
         Args:
             embedding_model: Embedding model
             store_name: Vector store name
             force_rebuild: If True, rebuild vector store even if it exists
         Returns:
             Dictionary of retrievers for different purposes
         """
         # Create specialized retrievers
         retrievers = {
+            "disease_explainer": vector_store.as_retriever(search_kwargs={"k": 5}),
+            "biomarker_linker": vector_store.as_retriever(search_kwargs={"k": 3}),
+            "clinical_guidelines": vector_store.as_retriever(search_kwargs={"k": 3}),
+            "general": vector_store.as_retriever(search_kwargs={"k": 5}),
         }
         print(f"\nOK: Created {len(retrievers)} specialized retrievers")
 def setup_knowledge_base(embedding_model=None, force_rebuild: bool = False, use_configured_embeddings: bool = True):
     """
     Convenience function to set up the complete knowledge base.
     Args:
         embedding_model: Embedding model (optional if use_configured_embeddings=True)
         force_rebuild: Force rebuild of vector stores
         use_configured_embeddings: Use embedding provider from EMBEDDING_PROVIDER env var
     Returns:
         Dictionary of retrievers ready for use
     """
     processor = PDFProcessor()
     retrievers = processor.create_retrievers(
+        embedding_model, store_name="medical_knowledge", force_rebuild=force_rebuild
     )
     if retrievers:
     """
     Quick function to get all retrievers using configured embedding provider.
     Used by workflow.py to initialize the Clinical Insight Guild.
     Uses EMBEDDING_PROVIDER from .env: "google" (default), "huggingface", or "ollama"
     Args:
         force_rebuild: Force rebuild of vector stores
     Returns:
         Dictionary of retrievers for all agent types
     """
+    return setup_knowledge_base(use_configured_embeddings=True, force_rebuild=force_rebuild)
 if __name__ == "__main__":
     # Add parent directory to path for imports
     sys.path.insert(0, str(Path(__file__).parent.parent))
+    print("\n" + "=" * 70)
     print("MediGuard AI - PDF Knowledge Base Builder")
+    print("=" * 70)
     print("\nUsing configured embedding provider from .env")
     print("   EMBEDDING_PROVIDER options: google (default), huggingface, ollama")
+    print("=" * 70)
     retrievers = setup_knowledge_base(
         use_configured_embeddings=True,  # Use configured provider
+        force_rebuild=False,
     )
     if retrievers:

src/repositories/analysis.py CHANGED Viewed

@@ -21,19 +21,10 @@ class AnalysisRepository:
         return analysis
     def get_by_request_id(self, request_id: str) -> PatientAnalysis | None:
-        return (
-            self.db.query(PatientAnalysis)
-            .filter(PatientAnalysis.request_id == request_id)
-            .first()
-        )
     def list_recent(self, limit: int = 20) -> list[PatientAnalysis]:
-        return (
-            self.db.query(PatientAnalysis)
-            .order_by(PatientAnalysis.created_at.desc())
-            .limit(limit)
-            .all()
-        )
     def count(self) -> int:
         return self.db.query(PatientAnalysis).count()

         return analysis
     def get_by_request_id(self, request_id: str) -> PatientAnalysis | None:
+        return self.db.query(PatientAnalysis).filter(PatientAnalysis.request_id == request_id).first()
     def list_recent(self, limit: int = 20) -> list[PatientAnalysis]:
+        return self.db.query(PatientAnalysis).order_by(PatientAnalysis.created_at.desc()).limit(limit).all()
     def count(self) -> int:
         return self.db.query(PatientAnalysis).count()

src/repositories/document.py CHANGED Viewed

@@ -16,11 +16,7 @@ class DocumentRepository:
         self.db = db
     def upsert(self, doc: MedicalDocument) -> MedicalDocument:
-        existing = (
-            self.db.query(MedicalDocument)
-            .filter(MedicalDocument.content_hash == doc.content_hash)
-            .first()
-        )
         if existing:
             existing.parse_status = doc.parse_status
             existing.chunk_count = doc.chunk_count
@@ -35,12 +31,7 @@ class DocumentRepository:
         return self.db.query(MedicalDocument).filter(MedicalDocument.id == doc_id).first()
     def list_all(self, limit: int = 100) -> list[MedicalDocument]:
-        return (
-            self.db.query(MedicalDocument)
-            .order_by(MedicalDocument.created_at.desc())
-            .limit(limit)
-            .all()
-        )
     def count(self) -> int:
         return self.db.query(MedicalDocument).count()

         self.db = db
     def upsert(self, doc: MedicalDocument) -> MedicalDocument:
+        existing = self.db.query(MedicalDocument).filter(MedicalDocument.content_hash == doc.content_hash).first()
         if existing:
             existing.parse_status = doc.parse_status
             existing.chunk_count = doc.chunk_count
         return self.db.query(MedicalDocument).filter(MedicalDocument.id == doc_id).first()
     def list_all(self, limit: int = 100) -> list[MedicalDocument]:
+        return self.db.query(MedicalDocument).order_by(MedicalDocument.created_at.desc()).limit(limit).all()
     def count(self) -> int:
         return self.db.query(MedicalDocument).count()

src/routers/analyze.py CHANGED Viewed

@@ -32,13 +32,7 @@ _executor = ThreadPoolExecutor(max_workers=4)
 def _score_disease_heuristic(biomarkers: dict[str, float]) -> dict[str, Any]:
     """Rule-based disease scoring (NOT ML prediction)."""
-    scores = {
-        "Diabetes": 0.0,
-        "Anemia": 0.0,
-        "Heart Disease": 0.0,
-        "Thrombocytopenia": 0.0,
-        "Thalassemia": 0.0
-    }
     # Diabetes indicators
     glucose = biomarkers.get("Glucose")
@@ -96,11 +90,7 @@ def _score_disease_heuristic(biomarkers: dict[str, float]) -> dict[str, Any]:
     else:
         probabilities = {k: 1.0 / len(scores) for k in scores}
-    return {
-        "disease": top_disease,
-        "confidence": confidence,
-        "probabilities": probabilities
-    }
 async def _run_guild_analysis(
@@ -123,16 +113,12 @@ async def _run_guild_analysis(
     try:
         # Run sync function in thread pool
         from src.state import PatientInput
         patient_input = PatientInput(
-            biomarkers=biomarkers,
-            patient_context=patient_ctx,
-            model_prediction=model_prediction
         )
         loop = asyncio.get_running_loop()
-        result = await loop.run_in_executor(
-            _executor,
-            lambda: ragbot.run(patient_input)
-        )
     except Exception as exc:
         logger.exception("Guild analysis failed: %s", exc)
         raise HTTPException(
@@ -143,10 +129,10 @@ async def _run_guild_analysis(
     elapsed = (time.time() - t0) * 1000
     # Build response from result
-    prediction = result.get('model_prediction')
-    analysis = result.get('final_response', {})
     # Try to extract the conversational_summary if it's there
-    conversational_summary = analysis.get('conversational_summary') if isinstance(analysis, dict) else str(analysis)
     return AnalysisResponse(
         status="success",

 def _score_disease_heuristic(biomarkers: dict[str, float]) -> dict[str, Any]:
     """Rule-based disease scoring (NOT ML prediction)."""
+    scores = {"Diabetes": 0.0, "Anemia": 0.0, "Heart Disease": 0.0, "Thrombocytopenia": 0.0, "Thalassemia": 0.0}
     # Diabetes indicators
     glucose = biomarkers.get("Glucose")
     else:
         probabilities = {k: 1.0 / len(scores) for k in scores}
+    return {"disease": top_disease, "confidence": confidence, "probabilities": probabilities}
 async def _run_guild_analysis(
     try:
         # Run sync function in thread pool
         from src.state import PatientInput
         patient_input = PatientInput(
+            biomarkers=biomarkers, patient_context=patient_ctx, model_prediction=model_prediction
         )
         loop = asyncio.get_running_loop()
+        result = await loop.run_in_executor(_executor, lambda: ragbot.run(patient_input))
     except Exception as exc:
         logger.exception("Guild analysis failed: %s", exc)
         raise HTTPException(
     elapsed = (time.time() - t0) * 1000
     # Build response from result
+    prediction = result.get("model_prediction")
+    analysis = result.get("final_response", {})
     # Try to extract the conversational_summary if it's there
+    conversational_summary = analysis.get("conversational_summary") if isinstance(analysis, dict) else str(analysis)
     return AnalysisResponse(
         status="success",

src/routers/ask.py CHANGED Viewed

@@ -71,7 +71,7 @@ async def _stream_rag_response(
 ) -> AsyncGenerator[str, None]:
     """
     Generate Server-Sent Events for streaming RAG responses.
     Event types:
     - status: Pipeline stage updates
     - token: Individual response tokens
@@ -94,7 +94,7 @@ async def _stream_rag_response(
                 query=question,
                 biomarkers=biomarkers,
                 patient_context=patient_context,
-            )
         )
         # Send retrieval metadata
@@ -110,7 +110,7 @@ async def _stream_rag_response(
             words = answer.split()
             chunk_size = 3  # Send 3 words at a time
             for i in range(0, len(words), chunk_size):
-                chunk = " ".join(words[i:i + chunk_size])
                 if i + chunk_size < len(words):
                     chunk += " "
                 yield f"event: token\ndata: {json.dumps({'text': chunk})}\n\n"
@@ -129,21 +129,21 @@ async def _stream_rag_response(
 async def ask_medical_question_stream(body: AskRequest, request: Request):
     """
     Stream a medical Q&A response via Server-Sent Events (SSE).
     Events:
     - `status`: Pipeline stage updates (guardrail, retrieve, grade, generate)
     - `token`: Individual response tokens for real-time display
     - `metadata`: Retrieval statistics (documents found, relevance scores)
     - `done`: Completion signal with timing info
     - `error`: Error details if something fails
     Example client code (JavaScript):
     ```javascript
     const eventSource = new EventSource('/ask/stream', {
         method: 'POST',
         body: JSON.stringify({ question: 'What causes high glucose?' })
     });
     eventSource.addEventListener('token', (e) => {
         const data = JSON.parse(e.data);
         document.getElementById('response').innerHTML += data.text;
@@ -178,10 +178,5 @@ async def submit_feedback(body: FeedbackRequest, request: Request):
     """Submit user feedback for an analysis or RAG response."""
     tracer = getattr(request.app.state, "tracer", None)
     if tracer:
-        tracer.score(
-            trace_id=body.request_id,
-            name="user-feedback",
-            value=body.score,
-            comment=body.comment
-        )
     return FeedbackResponse(request_id=body.request_id)

 ) -> AsyncGenerator[str, None]:
     """
     Generate Server-Sent Events for streaming RAG responses.
     Event types:
     - status: Pipeline stage updates
     - token: Individual response tokens
                 query=question,
                 biomarkers=biomarkers,
                 patient_context=patient_context,
+            ),
         )
         # Send retrieval metadata
             words = answer.split()
             chunk_size = 3  # Send 3 words at a time
             for i in range(0, len(words), chunk_size):
+                chunk = " ".join(words[i : i + chunk_size])
                 if i + chunk_size < len(words):
                     chunk += " "
                 yield f"event: token\ndata: {json.dumps({'text': chunk})}\n\n"
 async def ask_medical_question_stream(body: AskRequest, request: Request):
     """
     Stream a medical Q&A response via Server-Sent Events (SSE).
     Events:
     - `status`: Pipeline stage updates (guardrail, retrieve, grade, generate)
     - `token`: Individual response tokens for real-time display
     - `metadata`: Retrieval statistics (documents found, relevance scores)
     - `done`: Completion signal with timing info
     - `error`: Error details if something fails
     Example client code (JavaScript):
     ```javascript
     const eventSource = new EventSource('/ask/stream', {
         method: 'POST',
         body: JSON.stringify({ question: 'What causes high glucose?' })
     });
     eventSource.addEventListener('token', (e) => {
         const data = JSON.parse(e.data);
         document.getElementById('response').innerHTML += data.text;
     """Submit user feedback for an analysis or RAG response."""
     tracer = getattr(request.app.state, "tracer", None)
     if tracer:
+        tracer.score(trace_id=body.request_id, name="user-feedback", value=body.score, comment=body.comment)
     return FeedbackResponse(request_id=body.request_id)

src/routers/health.py CHANGED Viewed

@@ -42,6 +42,7 @@ async def readiness_check(request: Request) -> HealthResponse:
         from sqlalchemy import text
         from src.database import _engine
         engine = _engine()
         if engine is not None:
             t0 = time.time()
@@ -62,7 +63,13 @@ async def readiness_check(request: Request) -> HealthResponse:
             info = os_client.health()
             latency = (time.time() - t0) * 1000
             os_status = info.get("status", "unknown")
-            services.append(ServiceHealth(name="opensearch", status="ok" if os_status in ("green", "yellow") else "degraded", latency_ms=round(latency, 1)))
         else:
             services.append(ServiceHealth(name="opensearch", status="unavailable"))
     except Exception as exc:
@@ -90,7 +97,9 @@ async def readiness_check(request: Request) -> HealthResponse:
             health_info = ollama.health()
             latency = (time.time() - t0) * 1000
             is_healthy = isinstance(health_info, dict) and health_info.get("status") == "ok"
-            services.append(ServiceHealth(name="ollama", status="ok" if is_healthy else "degraded", latency_ms=round(latency, 1)))
         else:
             services.append(ServiceHealth(name="ollama", status="unavailable"))
     except Exception as exc:
@@ -110,6 +119,7 @@ async def readiness_check(request: Request) -> HealthResponse:
     # --- FAISS (local retriever) ---
     try:
         from src.services.retrieval.factory import make_retriever
         retriever = make_retriever(backend="faiss")
         if retriever is not None:
             doc_count = retriever.doc_count()

         from sqlalchemy import text
         from src.database import _engine
         engine = _engine()
         if engine is not None:
             t0 = time.time()
             info = os_client.health()
             latency = (time.time() - t0) * 1000
             os_status = info.get("status", "unknown")
+            services.append(
+                ServiceHealth(
+                    name="opensearch",
+                    status="ok" if os_status in ("green", "yellow") else "degraded",
+                    latency_ms=round(latency, 1),
+                )
+            )
         else:
             services.append(ServiceHealth(name="opensearch", status="unavailable"))
     except Exception as exc:
             health_info = ollama.health()
             latency = (time.time() - t0) * 1000
             is_healthy = isinstance(health_info, dict) and health_info.get("status") == "ok"
+            services.append(
+                ServiceHealth(name="ollama", status="ok" if is_healthy else "degraded", latency_ms=round(latency, 1))
+            )
         else:
             services.append(ServiceHealth(name="ollama", status="unavailable"))
     except Exception as exc:
     # --- FAISS (local retriever) ---
     try:
         from src.services.retrieval.factory import make_retriever
         retriever = make_retriever(backend="faiss")
         if retriever is not None:
             doc_count = retriever.doc_count()

src/schemas/schemas.py CHANGED Viewed

@@ -29,11 +29,13 @@ class NaturalAnalysisRequest(BaseModel):
     """Natural language biomarker analysis request."""
     message: str = Field(
-        ..., min_length=5, max_length=2000,
         description="Natural language message with biomarker values",
     )
     patient_context: PatientContext | None = Field(
-        default_factory=PatientContext,
     )
@@ -41,10 +43,11 @@ class StructuredAnalysisRequest(BaseModel):
     """Structured biomarker analysis request."""
     biomarkers: dict[str, float] = Field(
-        ..., description="Dict of biomarker name → measured value",
     )
     patient_context: PatientContext | None = Field(
-        default_factory=PatientContext,
     )
     @field_validator("biomarkers")
@@ -59,14 +62,18 @@ class AskRequest(BaseModel):
     """Free‑form medical question (agentic RAG pipeline)."""
     question: str = Field(
-        ..., min_length=3, max_length=4000,
         description="Medical question",
     )
     biomarkers: dict[str, float] | None = Field(
-        None, description="Optional biomarker context",
     )
     patient_context: str | None = Field(
-        None, description="Free‑text patient context",
     )
@@ -80,6 +87,7 @@ class SearchRequest(BaseModel):
 class FeedbackRequest(BaseModel):
     """User feedback for RAG responses."""
     request_id: str = Field(..., description="ID of the request being rated")
     score: float = Field(..., ge=0, le=1, description="Normalized score 0.0 to 1.0")
     comment: str | None = Field(None, description="Optional textual feedback")

     """Natural language biomarker analysis request."""
     message: str = Field(
+        ...,
+        min_length=5,
+        max_length=2000,
         description="Natural language message with biomarker values",
     )
     patient_context: PatientContext | None = Field(
+        default_factory=lambda: PatientContext(),
     )
     """Structured biomarker analysis request."""
     biomarkers: dict[str, float] = Field(
+        ...,
+        description="Dict of biomarker name → measured value",
     )
     patient_context: PatientContext | None = Field(
+        default_factory=lambda: PatientContext(),
     )
     @field_validator("biomarkers")
     """Free‑form medical question (agentic RAG pipeline)."""
     question: str = Field(
+        ...,
+        min_length=3,
+        max_length=4000,
         description="Medical question",
     )
     biomarkers: dict[str, float] | None = Field(
+        None,
+        description="Optional biomarker context",
     )
     patient_context: str | None = Field(
+        None,
+        description="Free‑text patient context",
     )
 class FeedbackRequest(BaseModel):
     """User feedback for RAG responses."""
     request_id: str = Field(..., description="ID of the request being rated")
     score: float = Field(..., ge=0, le=1, description="Normalized score 0.0 to 1.0")
     comment: str | None = Field(None, description="Optional textual feedback")

src/services/agents/context.py CHANGED Viewed

@@ -15,10 +15,10 @@ from typing import Any
 class AgenticContext:
     """Immutable runtime context for agentic RAG nodes."""
-    llm: Any                         # LangChain chat model
-    embedding_service: Any           # EmbeddingService
-    opensearch_client: Any           # OpenSearchClient
-    cache: Any                       # RedisCache
-    tracer: Any                      # LangfuseTracer
-    guild: Any | None = None      # ClinicalInsightGuild (original workflow)
     retriever: Any | None = None  # BaseRetriever (FAISS or OpenSearch)

 class AgenticContext:
     """Immutable runtime context for agentic RAG nodes."""
+    llm: Any  # LangChain chat model
+    embedding_service: Any  # EmbeddingService
+    opensearch_client: Any  # OpenSearchClient
+    cache: Any  # RedisCache
+    tracer: Any  # LangfuseTracer
+    guild: Any | None = None  # ClinicalInsightGuild (original workflow)
     retriever: Any | None = None  # BaseRetriever (FAISS or OpenSearch)

src/services/agents/nodes/retrieve_node.py CHANGED Viewed

@@ -69,10 +69,7 @@ def retrieve_node(state: dict, *, context: Any) -> dict:
             documents = [
                 {
                     "content": h.get("_source", {}).get("chunk_text", ""),
-                    "metadata": {
-                        k: v for k, v in h.get("_source", {}).items()
-                        if k != "chunk_text"
-                    },
                     "score": h.get("_score", 0.0),
                 }
                 for h in raw_hits
@@ -88,10 +85,7 @@ def retrieve_node(state: dict, *, context: Any) -> dict:
             documents = [
                 {
                     "content": h.get("_source", {}).get("chunk_text", ""),
-                    "metadata": {
-                        k: v for k, v in h.get("_source", {}).items()
-                        if k != "chunk_text"
-                    },
                     "score": h.get("_score", 0.0),
                 }
                 for h in raw_hits

             documents = [
                 {
                     "content": h.get("_source", {}).get("chunk_text", ""),
+                    "metadata": {k: v for k, v in h.get("_source", {}).items() if k != "chunk_text"},
                     "score": h.get("_score", 0.0),
                 }
                 for h in raw_hits
             documents = [
                 {
                     "content": h.get("_source", {}).get("chunk_text", ""),
+                    "metadata": {k: v for k, v in h.get("_source", {}).items() if k != "chunk_text"},
                     "score": h.get("_score", 0.0),
                 }
                 for h in raw_hits

src/services/agents/state.py CHANGED Viewed

@@ -13,7 +13,7 @@ from typing import Annotated, Any
 from typing_extensions import TypedDict
-class AgenticRAGState(TypedDict):
     """State flowing through the agentic RAG graph."""
     # ── Input ────────────────────────────────────────────────────────────
@@ -22,8 +22,8 @@ class AgenticRAGState(TypedDict):
     patient_context: dict[str, Any] | None
     # ── Guardrail ────────────────────────────────────────────────────────
-    guardrail_score: float            # 0-100 medical-relevance score
-    is_in_scope: bool                 # passed guardrail?
     # ── Retrieval ────────────────────────────────────────────────────────
     retrieved_documents: list[dict[str, Any]]
@@ -39,7 +39,7 @@ class AgenticRAGState(TypedDict):
     rewritten_query: str | None
     # ── Generation / routing ─────────────────────────────────────────────
-    routing_decision: str             # "analyze" | "rag_answer" | "out_of_scope"
     final_answer: str | None
     analysis_result: dict[str, Any] | None

 from typing_extensions import TypedDict
+class AgenticRAGState(TypedDict, total=False):
     """State flowing through the agentic RAG graph."""
     # ── Input ────────────────────────────────────────────────────────────
     patient_context: dict[str, Any] | None
     # ── Guardrail ────────────────────────────────────────────────────────
+    guardrail_score: float  # 0-100 medical-relevance score
+    is_in_scope: bool  # passed guardrail?
     # ── Retrieval ────────────────────────────────────────────────────────
     retrieved_documents: list[dict[str, Any]]
     rewritten_query: str | None
     # ── Generation / routing ─────────────────────────────────────────────
+    routing_decision: str  # "analyze" | "rag_answer" | "out_of_scope"
     final_answer: str | None
     analysis_result: dict[str, Any] | None