Spaces:

ghitaben
/

AMR-Guard

Running on Zero

App Files Files Community

ghitaben commited on Feb 16

Commit

793d027

1 Parent(s): 1abf8b9

Add ChromaDB vector store for unstructured document retrieval and import functionality

Browse files

Files changed (19) hide show

.gitignore +3 -0
app.py +451 -0
docs/KNOWLEDGE_STORAGE_STRATEGY.md +611 -0
pyproject.toml +2 -1
setup_demo.py +57 -0
src/__init__.py +1 -0
src/agents.py +16 -0
src/agents/__init__.py +0 -0
src/db/__init__.py +34 -0
src/db/database.py +61 -0
src/db/import_data.py +382 -0
src/db/schema.sql +108 -0
src/db/vector_store.py +312 -0
src/tools/__init__.py +67 -0
src/tools/antibiotic_tools.py +210 -0
src/tools/rag_tools.py +185 -0
src/tools/resistance_tools.py +244 -0
src/tools/safety_tools.py +250 -0
uv.lock +2 -0

.gitignore CHANGED Viewed

	@@ -1 +1,4 @@
1	.DS_Store

 .DS_Store
+.env
+data/
+*.pyc

app.py CHANGED Viewed

	@@ -0,0 +1,451 @@

+"""
+Med-I-C: AMR-Guard Demo Application
+Infection Lifecycle Orchestrator - Streamlit Interface
+"""
+import streamlit as st
+import sys
+from pathlib import Path
+# Add project root to path
+PROJECT_ROOT = Path(__file__).parent
+sys.path.insert(0, str(PROJECT_ROOT))
+from src.tools import (
+    query_antibiotic_info,
+    get_antibiotics_by_category,
+    interpret_mic_value,
+    get_breakpoints_for_pathogen,
+    query_resistance_pattern,
+    get_most_effective_antibiotics,
+    calculate_mic_trend,
+    check_drug_interactions,
+    screen_antibiotic_safety,
+    search_clinical_guidelines,
+    get_treatment_recommendation,
+    get_empirical_therapy_guidance,
+)
+# Page configuration
+st.set_page_config(
+    page_title="Med-I-C: AMR-Guard",
+    page_icon="🦠",
+    layout="wide",
+    initial_sidebar_state="expanded"
+)
+# Custom CSS
+st.markdown("""
+<style>
+    .main-header {
+        font-size: 2.5rem;
+        font-weight: bold;
+        color: #1E88E5;
+        margin-bottom: 0;
+    }
+    .sub-header {
+        font-size: 1.2rem;
+        color: #666;
+        margin-top: 0;
+    }
+    .risk-high {
+        background-color: #FFCDD2;
+        padding: 10px;
+        border-radius: 5px;
+        border-left: 4px solid #D32F2F;
+    }
+    .risk-moderate {
+        background-color: #FFE0B2;
+        padding: 10px;
+        border-radius: 5px;
+        border-left: 4px solid #F57C00;
+    }
+    .risk-low {
+        background-color: #C8E6C9;
+        padding: 10px;
+        border-radius: 5px;
+        border-left: 4px solid #388E3C;
+    }
+    .info-box {
+        background-color: #E3F2FD;
+        padding: 15px;
+        border-radius: 5px;
+        margin: 10px 0;
+    }
+</style>
+""", unsafe_allow_html=True)
+def main():
+    # Header
+    st.markdown('<p class="main-header">🦠 Med-I-C: AMR-Guard</p>', unsafe_allow_html=True)
+    st.markdown('<p class="sub-header">Infection Lifecycle Orchestrator Demo</p>', unsafe_allow_html=True)
+    # Sidebar navigation
+    st.sidebar.title("Navigation")
+    page = st.sidebar.radio(
+        "Select Module",
+        [
+            "🏠 Overview",
+            "💊 Stage 1: Empirical Advisor",
+            "🔬 Stage 2: Lab Interpretation",
+            "📊 MIC Trend Analysis",
+            "⚠️ Drug Safety Check",
+            "📚 Clinical Guidelines Search"
+        ]
+    )
+    if page == "🏠 Overview":
+        show_overview()
+    elif page == "💊 Stage 1: Empirical Advisor":
+        show_empirical_advisor()
+    elif page == "🔬 Stage 2: Lab Interpretation":
+        show_lab_interpretation()
+    elif page == "📊 MIC Trend Analysis":
+        show_mic_trend_analysis()
+    elif page == "⚠️ Drug Safety Check":
+        show_drug_safety()
+    elif page == "📚 Clinical Guidelines Search":
+        show_guidelines_search()
+def show_overview():
+    st.header("System Overview")
+    col1, col2 = st.columns(2)
+    with col1:
+        st.subheader("Stage 1: Empirical Phase")
+        st.markdown("""
+        **The "First 24 Hours"**
+        Before lab results are available, the system:
+        - Analyzes patient history and risk factors
+        - Suggests empirical antibiotics based on:
+            - Suspected pathogen
+            - Local resistance patterns
+            - WHO stewardship guidelines (ACCESS → WATCH → RESERVE)
+        - Checks drug interactions with current medications
+        """)
+    with col2:
+        st.subheader("Stage 2: Targeted Phase")
+        st.markdown("""
+        **The "Lab Interpretation"**
+        Once antibiogram is available, the system:
+        - Interprets MIC values against EUCAST breakpoints
+        - Detects "MIC Creep" from historical data
+        - Refines antibiotic selection
+        - Provides evidence-based treatment recommendations
+        """)
+    st.divider()
+    st.subheader("Knowledge Sources")
+    col1, col2, col3, col4 = st.columns(4)
+    with col1:
+        st.metric("WHO EML", "264", "antibiotics classified")
+    with col2:
+        st.metric("ATLAS Data", "10K+", "susceptibility records")
+    with col3:
+        st.metric("Breakpoints", "41", "pathogen groups")
+    with col4:
+        st.metric("Interactions", "191K+", "drug pairs")
+def show_empirical_advisor():
+    st.header("💊 Stage 1: Empirical Advisor")
+    st.markdown("*Recommend empirical therapy before lab results*")
+    col1, col2 = st.columns([2, 1])
+    with col1:
+        infection_type = st.selectbox(
+            "Infection Type",
+            ["Urinary Tract Infection (UTI)", "Pneumonia", "Sepsis",
+             "Skin/Soft Tissue", "Intra-abdominal", "Meningitis"]
+        )
+        suspected_pathogen = st.text_input(
+            "Suspected Pathogen (optional)",
+            placeholder="e.g., E. coli, Klebsiella pneumoniae"
+        )
+        risk_factors = st.multiselect(
+            "Risk Factors",
+            ["Prior MRSA infection", "Recent antibiotic use (<90 days)",
+             "Healthcare-associated", "Immunocompromised",
+             "Renal impairment", "Prior MDR infection"]
+        )
+    with col2:
+        st.markdown("**WHO Stewardship Categories**")
+        st.markdown("""
+        - **ACCESS**: First-line, low resistance
+        - **WATCH**: Higher resistance potential
+        - **RESERVE**: Last resort antibiotics
+        """)
+    if st.button("Get Empirical Recommendation", type="primary"):
+        with st.spinner("Searching guidelines and resistance data..."):
+            # Get recommendations from guidelines
+            guidance = get_empirical_therapy_guidance(
+                infection_type.split("(")[0].strip(),
+                risk_factors
+            )
+            st.subheader("Recommendations")
+            if guidance.get("recommendations"):
+                for i, rec in enumerate(guidance["recommendations"][:3], 1):
+                    with st.expander(f"Guideline Excerpt {i} (Relevance: {rec.get('relevance_score', 0):.2f})"):
+                        st.markdown(rec.get("content", ""))
+                        st.caption(f"Source: {rec.get('source', 'IDSA Guidelines')}")
+            # If pathogen specified, show resistance patterns
+            if suspected_pathogen:
+                st.subheader(f"Resistance Patterns for {suspected_pathogen}")
+                effective = get_most_effective_antibiotics(suspected_pathogen, min_susceptibility=70)
+                if effective:
+                    st.markdown("**Most Effective Antibiotics (>70% susceptibility)**")
+                    for ab in effective[:5]:
+                        st.write(f"- **{ab.get('antibiotic')}**: {ab.get('avg_susceptibility', 0):.1f}% susceptible")
+                else:
+                    st.info("No resistance data found for this pathogen.")
+def show_lab_interpretation():
+    st.header("🔬 Stage 2: Lab Interpretation")
+    st.markdown("*Interpret antibiogram MIC values*")
+    col1, col2 = st.columns(2)
+    with col1:
+        pathogen = st.text_input(
+            "Identified Pathogen",
+            placeholder="e.g., Escherichia coli, Pseudomonas aeruginosa"
+        )
+        antibiotic = st.text_input(
+            "Antibiotic",
+            placeholder="e.g., Ciprofloxacin, Meropenem"
+        )
+        mic_value = st.number_input(
+            "MIC Value (mg/L)",
+            min_value=0.001,
+            max_value=1024.0,
+            value=1.0,
+            step=0.5
+        )
+    with col2:
+        st.markdown("**How to Read Results**")
+        st.markdown("""
+        - **S (Susceptible)**: MIC ≤ breakpoint - antibiotic likely effective
+        - **I (Intermediate)**: May work with higher doses
+        - **R (Resistant)**: MIC > breakpoint - do not use
+        """)
+    if st.button("Interpret MIC", type="primary"):
+        if pathogen and antibiotic:
+            with st.spinner("Checking breakpoints..."):
+                result = interpret_mic_value(pathogen, antibiotic, mic_value)
+                interpretation = result.get("interpretation", "UNKNOWN")
+                if interpretation == "SUSCEPTIBLE":
+                    st.success(f"✅ **{interpretation}**")
+                elif interpretation == "RESISTANT":
+                    st.error(f"❌ **{interpretation}**")
+                elif interpretation == "INTERMEDIATE":
+                    st.warning(f"⚠️ **{interpretation}**")
+                else:
+                    st.info(f"❓ **{interpretation}**")
+                st.markdown(f"**Details:** {result.get('message', '')}")
+                if result.get("breakpoints"):
+                    bp = result["breakpoints"]
+                    st.markdown(f"""
+                    **Breakpoints:**
+                    - S ≤ {bp.get('susceptible', 'N/A')} mg/L
+                    - R > {bp.get('resistant', 'N/A')} mg/L
+                    """)
+                if result.get("notes"):
+                    st.info(f"**Note:** {result.get('notes')}")
+        else:
+            st.warning("Please enter both pathogen and antibiotic names.")
+def show_mic_trend_analysis():
+    st.header("📊 MIC Trend Analysis")
+    st.markdown("*Detect MIC creep over time*")
+    st.markdown("""
+    Enter historical MIC values to detect resistance velocity.
+    **MIC Creep**: A gradual increase in MIC that may predict treatment failure
+    even when the organism is still classified as "Susceptible".
+    """)
+    # Input for historical MICs
+    num_readings = st.slider("Number of historical readings", 2, 6, 3)
+    mic_values = []
+    cols = st.columns(num_readings)
+    for i, col in enumerate(cols):
+        with col:
+            mic = col.number_input(
+                f"MIC {i+1}",
+                min_value=0.001,
+                max_value=256.0,
+                value=float(2 ** i),  # Default: 1, 2, 4, ...
+                key=f"mic_{i}"
+            )
+            mic_values.append({"date": f"T{i}", "mic_value": mic})
+    if st.button("Analyze Trend", type="primary"):
+        result = calculate_mic_trend(mic_values)
+        risk_level = result.get("risk_level", "UNKNOWN")
+        if risk_level == "HIGH":
+            st.markdown(f'<div class="risk-high"><strong>🚨 HIGH RISK</strong><br>{result.get("alert", "")}</div>',
+                       unsafe_allow_html=True)
+        elif risk_level == "MODERATE":
+            st.markdown(f'<div class="risk-moderate"><strong>⚠️ MODERATE RISK</strong><br>{result.get("alert", "")}</div>',
+                       unsafe_allow_html=True)
+        else:
+            st.markdown(f'<div class="risk-low"><strong>✅ LOW RISK</strong><br>{result.get("alert", "")}</div>',
+                       unsafe_allow_html=True)
+        st.divider()
+        col1, col2, col3 = st.columns(3)
+        with col1:
+            st.metric("Baseline MIC", f"{result.get('baseline_mic', 'N/A')} mg/L")
+        with col2:
+            st.metric("Current MIC", f"{result.get('current_mic', 'N/A')} mg/L")
+        with col3:
+            st.metric("Fold Change", f"{result.get('ratio', 'N/A')}x")
+        st.markdown(f"**Trend:** {result.get('trend', 'N/A')}")
+        st.markdown(f"**Resistance Velocity:** {result.get('velocity', 'N/A')}x per time point")
+def show_drug_safety():
+    st.header("⚠️ Drug Safety Check")
+    st.markdown("*Screen for drug interactions*")
+    col1, col2 = st.columns(2)
+    with col1:
+        antibiotic = st.text_input(
+            "Proposed Antibiotic",
+            placeholder="e.g., Ciprofloxacin"
+        )
+        current_meds = st.text_area(
+            "Current Medications (one per line)",
+            placeholder="Warfarin\nMetformin\nAmlodipine",
+            height=150
+        )
+    with col2:
+        allergies = st.text_area(
+            "Known Allergies (one per line)",
+            placeholder="Penicillin\nSulfa",
+            height=100
+        )
+    if st.button("Check Safety", type="primary"):
+        if antibiotic:
+            medications = [m.strip() for m in current_meds.split("\n") if m.strip()]
+            allergy_list = [a.strip() for a in allergies.split("\n") if a.strip()]
+            with st.spinner("Checking interactions..."):
+                result = screen_antibiotic_safety(antibiotic, medications, allergy_list)
+                if result.get("safe_to_use"):
+                    st.success("✅ No critical safety concerns identified")
+                else:
+                    st.error("❌ SAFETY CONCERNS IDENTIFIED")
+                # Show alerts
+                if result.get("alerts"):
+                    st.subheader("Alerts")
+                    for alert in result["alerts"]:
+                        level = alert.get("level", "WARNING")
+                        if level == "CRITICAL":
+                            st.error(f"🚨 {alert.get('message', '')}")
+                        else:
+                            st.warning(f"⚠️ {alert.get('message', '')}")
+                # Show allergy warnings
+                if result.get("allergy_warnings"):
+                    st.subheader("Allergy Warnings")
+                    for warn in result["allergy_warnings"]:
+                        st.error(f"🚫 {warn.get('message', '')}")
+                # Show interactions
+                if result.get("interactions"):
+                    st.subheader("Drug Interactions Found")
+                    for interaction in result["interactions"][:5]:
+                        severity = interaction.get("severity", "unknown")
+                        icon = "🔴" if severity == "major" else "🟡" if severity == "moderate" else "🟢"
+                        st.markdown(f"""
+                        {icon} **{interaction.get('drug_1')}** ↔ **{interaction.get('drug_2')}**
+                        - Severity: {severity.upper()}
+                        - {interaction.get('interaction_description', '')}
+                        """)
+        else:
+            st.warning("Please enter an antibiotic name.")
+def show_guidelines_search():
+    st.header("📚 Clinical Guidelines Search")
+    st.markdown("*Search IDSA treatment guidelines*")
+    query = st.text_input(
+        "Search Query",
+        placeholder="e.g., treatment for ESBL E. coli UTI"
+    )
+    pathogen_filter = st.selectbox(
+        "Filter by Pathogen Type (optional)",
+        ["All", "ESBL-E", "CRE", "CRAB", "DTR-PA", "S.maltophilia", "AmpC-E"]
+    )
+    if st.button("Search Guidelines", type="primary"):
+        if query:
+            with st.spinner("Searching clinical guidelines..."):
+                filter_value = None if pathogen_filter == "All" else pathogen_filter
+                results = search_clinical_guidelines(query, pathogen_filter=filter_value, n_results=5)
+                if results:
+                    st.subheader(f"Found {len(results)} relevant excerpts")
+                    for i, result in enumerate(results, 1):
+                        with st.expander(
+                            f"Result {i} - {result.get('pathogen_type', 'General')} "
+                            f"(Relevance: {result.get('relevance_score', 0):.2f})"
+                        ):
+                            st.markdown(result.get("content", ""))
+                            st.caption(f"Source: {result.get('source', 'IDSA Guidelines')}")
+                else:
+                    st.info("No results found. Try a different query or remove the filter.")
+        else:
+            st.warning("Please enter a search query.")
+if __name__ == "__main__":
+    main()

docs/KNOWLEDGE_STORAGE_STRATEGY.md ADDED Viewed

	@@ -0,0 +1,611 @@

+# Med-I-C Knowledge Storage Strategy
+## Overview
+This document defines how each document in the `docs/` folder will be stored and queried to support the **AMR-Guard: Infection Lifecycle Orchestrator** workflow.
+---
+## Document Classification Summary
+| Document | Type | Storage | Purpose in Workflow |
+|----------|------|---------|---------------------|
+| EML exports (ACCESS/RESERVE/WATCH) | XLSX | **SQLite** | Antibiotic classification & stewardship |
+| ATLAS Susceptibility Data | XLSX | **SQLite** | Pathogen resistance patterns |
+| MIC Breakpoint Tables | XLSX | **SQLite** | Susceptibility interpretation |
+| Drug Interactions | CSV | **SQLite** | Drug safety screening |
+| IDSA Guidance (ciae403.pdf) | PDF | **ChromaDB** | Clinical treatment guidelines |
+| MIC Breakpoint Tables (PDF) | PDF | **ChromaDB** | Reference documentation |
+---
+## Part 1: Structured Data (SQLite)
+### 1.1 EML Antibiotic Classification Tables
+**Source Files:**
+- `antibiotic_guidelines/EML export ACCESS group.xlsx`
+- `antibiotic_guidelines/EML export RESERVE group.xlsx`
+- `antibiotic_guidelines/EML export WATCH group.xlsx`
+**Database Table: `eml_antibiotics`**
+```sql
+CREATE TABLE eml_antibiotics (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    medicine_name TEXT NOT NULL,
+    who_category TEXT NOT NULL,  -- 'ACCESS', 'RESERVE', 'WATCH'
+    eml_section TEXT,
+    formulations TEXT,
+    indication TEXT,
+    atc_codes TEXT,
+    combined_with TEXT,
+    status TEXT,
+    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+);
+CREATE INDEX idx_medicine_name ON eml_antibiotics(medicine_name);
+CREATE INDEX idx_who_category ON eml_antibiotics(who_category);
+CREATE INDEX idx_atc_codes ON eml_antibiotics(atc_codes);
+```
+**Usage in Workflow:**
+- **Agent 1 (Intake Historian):** Query to identify antibiotic stewardship category
+- **Agent 4 (Clinical Pharmacologist):** Suggest ACCESS antibiotics first, escalate to WATCH/RESERVE only when necessary
+---
+### 1.2 ATLAS Pathogen Susceptibility Data
+**Source File:** `pathogen_resistance/ATLAS Susceptibility Data Export.xlsx`
+**Database Tables:**
+```sql
+CREATE TABLE atlas_susceptibility_percent (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    pathogen TEXT NOT NULL,
+    antibiotic TEXT NOT NULL,
+    region TEXT,
+    year INTEGER,
+    susceptibility_percent REAL,
+    sample_size INTEGER,
+    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+);
+CREATE TABLE atlas_susceptibility_absolute (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    pathogen TEXT NOT NULL,
+    antibiotic TEXT NOT NULL,
+    region TEXT,
+    year INTEGER,
+    susceptible_count INTEGER,
+    intermediate_count INTEGER,
+    resistant_count INTEGER,
+    total_isolates INTEGER,
+    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+);
+CREATE INDEX idx_pathogen ON atlas_susceptibility_percent(pathogen);
+CREATE INDEX idx_antibiotic ON atlas_susceptibility_percent(antibiotic);
+CREATE INDEX idx_pathogen_abs ON atlas_susceptibility_absolute(pathogen);
+```
+**Usage in Workflow:**
+- **Agent 1 (Empirical Phase):** Retrieve local/regional resistance patterns for empirical therapy
+- **Agent 3 (Trend Analyst):** Compare current MIC with population-level trends
+---
+### 1.3 MIC Breakpoint Tables
+**Source File:** `mic_breakpoints/v_16.0__BreakpointTables.xlsx`
+**Database Tables:**
+```sql
+CREATE TABLE mic_breakpoints (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    pathogen_group TEXT NOT NULL,  -- e.g., 'Enterobacterales', 'Staphylococcus'
+    antibiotic TEXT NOT NULL,
+    route TEXT,  -- 'IV', 'Oral', 'Topical'
+    mic_susceptible REAL,  -- S breakpoint (mg/L)
+    mic_resistant REAL,    -- R breakpoint (mg/L)
+    disk_susceptible REAL, -- Zone diameter (mm)
+    disk_resistant REAL,
+    notes TEXT,
+    eucast_version TEXT DEFAULT '16.0',
+    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+);
+CREATE TABLE dosage_guidance (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    antibiotic TEXT NOT NULL,
+    standard_dose TEXT,
+    high_dose TEXT,
+    renal_adjustment TEXT,
+    notes TEXT
+);
+CREATE INDEX idx_bp_pathogen ON mic_breakpoints(pathogen_group);
+CREATE INDEX idx_bp_antibiotic ON mic_breakpoints(antibiotic);
+```
+**Usage in Workflow:**
+- **Agent 2 (Vision Specialist):** Validate extracted MIC values against breakpoints
+- **Agent 3 (Trend Analyst):** Interpret S/I/R classification from MIC values
+- **Agent 4 (Clinical Pharmacologist):** Use dosage guidance for prescriptions
+---
+### 1.4 Drug Interactions Database
+**Source File:** `drug_safety/db_drug_interactions.csv`
+**Database Table:**
+```sql
+CREATE TABLE drug_interactions (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    drug_1 TEXT NOT NULL,
+    drug_2 TEXT NOT NULL,
+    interaction_description TEXT,
+    severity TEXT,  -- Derived: 'major', 'moderate', 'minor'
+    mechanism TEXT, -- Derived from description
+    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+);
+CREATE INDEX idx_drug_1 ON drug_interactions(drug_1);
+CREATE INDEX idx_drug_2 ON drug_interactions(drug_2);
+CREATE INDEX idx_severity ON drug_interactions(severity);
+-- View for bidirectional lookup
+CREATE VIEW drug_interaction_lookup AS
+SELECT drug_1, drug_2, interaction_description, severity FROM drug_interactions
+UNION ALL
+SELECT drug_2, drug_1, interaction_description, severity FROM drug_interactions;
+```
+**Usage in Workflow:**
+- **Agent 4 (Clinical Pharmacologist):** Check for interactions with patient's current medications
+- **Safety Alerts:** Flag potential toxicity issues
+---
+## Part 2: Unstructured Data (ChromaDB)
+### 2.1 IDSA Clinical Guidelines
+**Source File:** `antibiotic_guidelines/ciae403.pdf`
+**ChromaDB Collection: `idsa_treatment_guidelines`**
+```python
+collection_config = {
+    "name": "idsa_treatment_guidelines",
+    "metadata": {
+        "source": "IDSA 2024 Guidance",
+        "doi": "10.1093/cid/ciae403",
+        "version": "2024"
+    },
+    "embedding_function": "sentence-transformers/all-MiniLM-L6-v2"
+}
+# Document chunking strategy
+chunk_config = {
+    "chunk_size": 1000,
+    "chunk_overlap": 200,
+    "separators": ["\n\n", "\n", ". "],
+    "metadata_fields": ["section", "pathogen_type", "recommendation_type"]
+}
+```
+**Metadata Schema per Chunk:**
+```python
+{
+    "section": "Treatment Recommendations",
+    "pathogen_type": "ESBL-E | CRE | CRAB | DTR-PA | S.maltophilia",
+    "recommendation_strength": "Strong | Conditional",
+    "evidence_quality": "High | Moderate | Low",
+    "page_number": int
+}
+```
+**Usage in Workflow:**
+- **Agent 1 (Empirical Phase):** Retrieve treatment recommendations for suspected pathogens
+- **Agent 4 (Clinical Pharmacologist):** Provide evidence-based justification for antibiotic selection
+---
+### 2.2 MIC Breakpoint Reference (PDF)
+**Source File:** `mic_breakpoints/v_16.0_Breakpoint_Tables.pdf`
+**ChromaDB Collection: `mic_reference_docs`**
+```python
+collection_config = {
+    "name": "mic_reference_docs",
+    "metadata": {
+        "source": "EUCAST Breakpoint Tables",
+        "version": "16.0"
+    },
+    "embedding_function": "sentence-transformers/all-MiniLM-L6-v2"
+}
+```
+**Usage in Workflow:**
+- **Supplementary Context:** Provide detailed explanations for breakpoint interpretations
+- **Edge Cases:** Handle unusual pathogens or antibiotic combinations not in structured tables
+---
+## Part 3: Query Tools Definition
+### Tool 1: `query_antibiotic_info`
+**Purpose:** Retrieve antibiotic classification and formulation details
+```python
+def query_antibiotic_info(
+    antibiotic_name: str,
+    include_category: bool = True,
+    include_formulations: bool = True
+) -> dict:
+    """
+    Query EML antibiotic database for classification and details.
+    Args:
+        antibiotic_name: Name of the antibiotic (partial match supported)
+        include_category: Include WHO stewardship category
+        include_formulations: Include available formulations
+    Returns:
+        dict with antibiotic details, category, indications
+    Used by: Agent 1, Agent 4
+    """
+```
+**SQL Query:**
+```sql
+SELECT medicine_name, who_category, formulations, indication, combined_with
+FROM eml_antibiotics
+WHERE LOWER(medicine_name) LIKE LOWER(?)
+ORDER BY who_category;  -- ACCESS first, then WATCH, then RESERVE
+```
+---
+### Tool 2: `query_resistance_pattern`
+**Purpose:** Get susceptibility data for pathogen-antibiotic combinations
+```python
+def query_resistance_pattern(
+    pathogen: str,
+    antibiotic: str = None,
+    region: str = None,
+    year: int = None
+) -> dict:
+    """
+    Query ATLAS susceptibility data for resistance patterns.
+    Args:
+        pathogen: Pathogen name (e.g., "E. coli", "K. pneumoniae")
+        antibiotic: Optional specific antibiotic to check
+        region: Optional geographic region filter
+        year: Optional year filter (defaults to most recent)
+    Returns:
+        dict with susceptibility percentages and trends
+    Used by: Agent 1 (Empirical), Agent 3 (Trend Analysis)
+    """
+```
+**SQL Query:**
+```sql
+SELECT antibiotic, susceptibility_percent, sample_size, year
+FROM atlas_susceptibility_percent
+WHERE LOWER(pathogen) LIKE LOWER(?)
+  AND (antibiotic = ? OR ? IS NULL)
+  AND (region = ? OR ? IS NULL)
+ORDER BY year DESC, susceptibility_percent DESC;
+```
+---
+### Tool 3: `interpret_mic_value`
+**Purpose:** Classify MIC as S/I/R based on EUCAST breakpoints
+```python
+def interpret_mic_value(
+    pathogen: str,
+    antibiotic: str,
+    mic_value: float,
+    route: str = "IV"
+) -> dict:
+    """
+    Interpret MIC value against EUCAST breakpoints.
+    Args:
+        pathogen: Pathogen name or group
+        antibiotic: Antibiotic name
+        mic_value: MIC value in mg/L
+        route: Administration route (IV, Oral)
+    Returns:
+        dict with interpretation (S/I/R), breakpoint values, dosing notes
+    Used by: Agent 2, Agent 3
+    """
+```
+**SQL Query:**
+```sql
+SELECT mic_susceptible, mic_resistant, notes
+FROM mic_breakpoints
+WHERE LOWER(pathogen_group) LIKE LOWER(?)
+  AND LOWER(antibiotic) LIKE LOWER(?)
+  AND (route = ? OR route IS NULL);
+```
+**Interpretation Logic:**
+```python
+if mic_value <= mic_susceptible:
+    return "Susceptible"
+elif mic_value > mic_resistant:
+    return "Resistant"
+else:
+    return "Intermediate (Susceptible, Increased Exposure)"
+```
+---
+### Tool 4: `check_drug_interactions`
+**Purpose:** Screen for drug-drug interactions
+```python
+def check_drug_interactions(
+    target_drug: str,
+    patient_medications: list[str],
+    severity_filter: str = None
+) -> list[dict]:
+    """
+    Check for interactions between target drug and patient's medications.
+    Args:
+        target_drug: Antibiotic being considered
+        patient_medications: List of patient's current medications
+        severity_filter: Optional filter ('major', 'moderate', 'minor')
+    Returns:
+        list of interaction dicts with severity and description
+    Used by: Agent 4 (Safety Check)
+    """
+```
+**SQL Query:**
+```sql
+SELECT drug_1, drug_2, interaction_description, severity
+FROM drug_interaction_lookup
+WHERE LOWER(drug_1) LIKE LOWER(?)
+  AND LOWER(drug_2) IN (SELECT LOWER(value) FROM json_each(?))
+  AND (severity = ? OR ? IS NULL)
+ORDER BY severity DESC;
+```
+---
+### Tool 5: `search_clinical_guidelines`
+**Purpose:** RAG search over IDSA guidelines for treatment recommendations
+```python
+def search_clinical_guidelines(
+    query: str,
+    pathogen_filter: str = None,
+    n_results: int = 5
+) -> list[dict]:
+    """
+    Semantic search over IDSA clinical guidelines.
+    Args:
+        query: Natural language query about treatment
+        pathogen_filter: Optional pathogen type filter
+        n_results: Number of results to return
+    Returns:
+        list of relevant guideline excerpts with metadata
+    Used by: Agent 1 (Empirical), Agent 4 (Justification)
+    """
+```
+**ChromaDB Query:**
+```python
+results = collection.query(
+    query_texts=[query],
+    n_results=n_results,
+    where={"pathogen_type": pathogen_filter} if pathogen_filter else None,
+    include=["documents", "metadatas", "distances"]
+)
+```
+---
+### Tool 6: `calculate_mic_trend`
+**Purpose:** Analyze MIC creep over time
+```python
+def calculate_mic_trend(
+    patient_id: str,
+    pathogen: str,
+    antibiotic: str,
+    historical_mics: list[dict]  # [{date, mic_value}, ...]
+) -> dict:
+    """
+    Calculate resistance velocity and MIC trend.
+    Args:
+        patient_id: Patient identifier
+        pathogen: Identified pathogen
+        antibiotic: Target antibiotic
+        historical_mics: List of historical MIC readings
+    Returns:
+        dict with trend analysis, resistance_velocity, risk_level
+    Used by: Agent 3 (Trend Analyst)
+    """
+```
+**Logic:**
+```python
+# Calculate resistance velocity
+if len(historical_mics) >= 2:
+    baseline_mic = historical_mics[0]["mic_value"]
+    current_mic = historical_mics[-1]["mic_value"]
+    ratio = current_mic / baseline_mic
+    if ratio >= 4:  # Two-step dilution increase
+        risk_level = "HIGH"
+        alert = "MIC Creep Detected - Risk of Treatment Failure"
+    elif ratio >= 2:
+        risk_level = "MODERATE"
+        alert = "MIC Trending Upward - Monitor Closely"
+    else:
+        risk_level = "LOW"
+        alert = None
+```
+---
+## Part 4: Workflow Integration
+### Stage 1: Empirical Phase (Before Lab Results)
+```
+Input: Patient history, symptoms, infection site
+    │
+    ▼
+┌─────────────────────────────────────────────────────────┐
+│  Agent 1: Intake Historian (MedGemma 1.5)               │
+│  ├── Tool: search_clinical_guidelines()                 │
+│  │   └── ChromaDB: idsa_treatment_guidelines            │
+│  ├── Tool: query_resistance_pattern()                   │
+│  │   └── SQLite: atlas_susceptibility_percent           │
+│  └── Tool: query_antibiotic_info()                      │
+│      └── SQLite: eml_antibiotics                        │
+└─────────────────────────────────────────────────────────┘
+    │
+    ▼
+┌─────────────────────────────────────────────────────────┐
+│  Agent 4: Clinical Pharmacologist (TxGemma)             │
+│  ├── Tool: check_drug_interactions()                    │
+│  │   └── SQLite: drug_interactions                      │
+│  └── Tool: query_antibiotic_info() [dosing]             │
+│      └── SQLite: eml_antibiotics + dosage_guidance      │
+└─────────────────────────────────────────────────────────┘
+    │
+    ▼
+Output: Empirical therapy recommendation with safety check
+```
+### Stage 2: Targeted Phase (After Lab Results)
+```
+Input: Lab report (antibiogram image/PDF)
+    │
+    ▼
+┌─────────────────────────────────────────────────────────┐
+│  Agent 2: Vision Specialist (MedGemma 4B)               │
+│  ├── Extract: Pathogen name, MIC values                 │
+│  └── Tool: interpret_mic_value()                        │
+│      └── SQLite: mic_breakpoints                        │
+└─────────────────────────────────────────────────────────┘
+    │
+    ▼
+┌─────────────────────────────────────────────────────────┐
+│  Agent 3: Trend Analyst (MedGemma 27B)                  │
+│  ├── Tool: calculate_mic_trend()                        │
+│  │   └── Patient historical data + current MIC          │
+│  └── Tool: query_resistance_pattern()                   │
+│      └── SQLite: atlas_susceptibility (population data) │
+└─────────────────────────────────────────────────────────┘
+    │
+    ▼
+┌─────────────────────────────────────────────────────────┐
+│  Agent 4: Clinical Pharmacologist (TxGemma)             │
+│  ├── Tool: search_clinical_guidelines()                 │
+│  │   └── ChromaDB: idsa_treatment_guidelines            │
+│  ├── Tool: check_drug_interactions()                    │
+│  │   └── SQLite: drug_interactions                      │
+│  └── Generate: Final prescription with justification    │
+└─────────────────────────────────────────────────────────┘
+    │
+    ▼
+Output: Targeted therapy with MIC trend analysis & safety alerts
+```
+---
+## Part 5: Implementation Checklist
+### SQLite Setup
+- [ ] Create database schema with all tables
+- [ ] Import EML Excel files (ACCESS, RESERVE, WATCH)
+- [ ] Import ATLAS susceptibility data (both sheets)
+- [ ] Import MIC breakpoint tables (41 sheets)
+- [ ] Import drug interactions CSV
+- [ ] Add severity classification to interactions
+- [ ] Create indexes for efficient queries
+### ChromaDB Setup
+- [ ] Initialize ChromaDB persistent storage
+- [ ] Process ciae403.pdf with chunking strategy
+- [ ] Process MIC breakpoint PDF
+- [ ] Add metadata to all chunks
+- [ ] Test semantic search queries
+### Tool Implementation
+- [ ] Implement `query_antibiotic_info()`
+- [ ] Implement `query_resistance_pattern()`
+- [ ] Implement `interpret_mic_value()`
+- [ ] Implement `check_drug_interactions()`
+- [ ] Implement `search_clinical_guidelines()`
+- [ ] Implement `calculate_mic_trend()`
+- [ ] Create unified tool interface for LangGraph
+---
+## File Structure
+```
+Med-I-C/
+├── docs/                          # Source documents
+├── data/
+│   ├── medic.db                   # SQLite database
+│   └── chroma/                    # ChromaDB persistent storage
+├── src/
+│   ├── db/
+│   │   ├── schema.sql             # Database schema
+│   │   └── import_data.py         # Data import scripts
+│   ├── tools/
+│   │   ├── antibiotic_tools.py    # query_antibiotic_info, interpret_mic
+│   │   ├── resistance_tools.py   # query_resistance_pattern, calculate_mic_trend
+│   │   ├── safety_tools.py       # check_drug_interactions
+│   │   └── rag_tools.py          # search_clinical_guidelines
+│   └── agents/
+│       ├── intake_historian.py    # Agent 1
+│       ├── vision_specialist.py   # Agent 2
+│       ├── trend_analyst.py       # Agent 3
+│       └── clinical_pharmacologist.py  # Agent 4
+└── KNOWLEDGE_STORAGE_STRATEGY.md  # This document
+```

pyproject.toml CHANGED Viewed

@@ -1,7 +1,7 @@
 [project]
 name = "Med-I-C"
 version = "0.1.0"
-description = "Add your description here"
 readme = "README.md"
 requires-python = ">=3.10"
 dependencies = [
@@ -25,4 +25,5 @@ dependencies = [
     "pypdf",
     "langchain-community>=0.4.1",
     "jq>=1.11.0",
 ]

 [project]
 name = "Med-I-C"
 version = "0.1.0"
+description = "AMR-Guard: Infection Lifecycle Orchestrator Demo"
 readme = "README.md"
 requires-python = ">=3.10"
 dependencies = [
     "pypdf",
     "langchain-community>=0.4.1",
     "jq>=1.11.0",
+    "pandas>=2.0.0",
 ]

setup_demo.py ADDED Viewed

	@@ -0,0 +1,57 @@

+#!/usr/bin/env python3
+"""
+Setup script for Med-I-C Demo
+Initializes the database and imports all data.
+"""
+import sys
+from pathlib import Path
+# Add project root to path
+PROJECT_ROOT = Path(__file__).parent
+sys.path.insert(0, str(PROJECT_ROOT))
+def main():
+    print("=" * 60)
+    print("Med-I-C Demo Setup")
+    print("AMR-Guard: Infection Lifecycle Orchestrator")
+    print("=" * 60)
+    print()
+    # Step 1: Import structured data into SQLite
+    print("Step 1: Importing structured data into SQLite...")
+    print("-" * 40)
+    from src.db.import_data import import_all_data
+    # Limit interactions to 50k for faster demo setup
+    structured_results = import_all_data(interactions_limit=50000)
+    # Step 2: Import PDFs into ChromaDB
+    print("\nStep 2: Importing PDFs into ChromaDB (Vector Store)...")
+    print("-" * 40)
+    from src.db.vector_store import import_all_vectors
+    vector_results = import_all_vectors()
+    # Summary
+    print("\n" + "=" * 60)
+    print("Setup Complete!")
+    print("=" * 60)
+    print("\nData imported:")
+    print(f"  - EML Antibiotics: {structured_results.get('eml_antibiotics', 0)} records")
+    print(f"  - ATLAS Susceptibility: {structured_results.get('atlas_susceptibility', 0)} records")
+    print(f"  - MIC Breakpoints: {structured_results.get('mic_breakpoints', 0)} records")
+    print(f"  - Drug Interactions: {structured_results.get('drug_interactions', 0)} records")
+    print(f"  - IDSA Guidelines: {vector_results.get('idsa_guidelines', 0)} chunks")
+    print(f"  - MIC Reference: {vector_results.get('mic_reference', 0)} chunks")
+    print("\nTo run the demo app:")
+    print("  uv run streamlit run app.py")
+    print()
+if __name__ == "__main__":
+    main()

src/__init__.py CHANGED Viewed

	@@ -0,0 +1 @@


1	+ """Med-I-C: AMR-Guard - Infection Lifecycle Orchestrator."""

src/agents.py CHANGED Viewed

	@@ -0,0 +1,16 @@

+import os
+from langchain.agents import create_agent
+from langchain.chat_models import init_chat_model
+from dotenv import load_dotenv
+os.environ["GOOGLE_API_KEY"] = load_dotenv().get("GOOGLE_API_KEY")
+model = init_chat_model(
+    "google_genai:gemini-2.5-flash-lite",
+    # Kwargs passed to the model:
+    temperature=0.7,
+    timeout=30,
+    max_tokens=1000,
+)
+Intake_Historian = create_agent(model=model, tools=["google_search"], verbose=True)

src/agents/__init__.py ADDED Viewed

File without changes

src/db/__init__.py ADDED Viewed

	@@ -0,0 +1,34 @@

+"""Database modules for Med-I-C."""
+from .database import (
+    init_database,
+    get_connection,
+    execute_query,
+    execute_insert,
+    execute_many,
+    DB_PATH,
+    DATA_DIR,
+    DOCS_DIR,
+)
+from .vector_store import (
+    get_chroma_client,
+    search_guidelines,
+    search_mic_reference,
+    import_all_vectors,
+)
+__all__ = [
+    "init_database",
+    "get_connection",
+    "execute_query",
+    "execute_insert",
+    "execute_many",
+    "DB_PATH",
+    "DATA_DIR",
+    "DOCS_DIR",
+    "get_chroma_client",
+    "search_guidelines",
+    "search_mic_reference",
+    "import_all_vectors",
+]

src/db/database.py ADDED Viewed

	@@ -0,0 +1,61 @@

+"""Database connection and initialization for Med-I-C."""
+import sqlite3
+from pathlib import Path
+from contextlib import contextmanager
+# Project paths
+PROJECT_ROOT = Path(__file__).parent.parent.parent
+DATA_DIR = PROJECT_ROOT / "data"
+DOCS_DIR = PROJECT_ROOT / "docs"
+DB_PATH = DATA_DIR / "medic.db"
+SCHEMA_PATH = Path(__file__).parent / "schema.sql"
+def init_database() -> None:
+    """Initialize the database with schema."""
+    DATA_DIR.mkdir(parents=True, exist_ok=True)
+    with get_connection() as conn:
+        with open(SCHEMA_PATH, 'r') as f:
+            conn.executescript(f.read())
+        conn.commit()
+    print(f"Database initialized at {DB_PATH}")
+@contextmanager
+def get_connection():
+    """Context manager for database connections."""
+    conn = sqlite3.connect(DB_PATH)
+    conn.row_factory = sqlite3.Row
+    try:
+        yield conn
+    finally:
+        conn.close()
+def execute_query(query: str, params: tuple = ()) -> list[dict]:
+    """Execute a query and return results as list of dicts."""
+    with get_connection() as conn:
+        cursor = conn.execute(query, params)
+        columns = [description[0] for description in cursor.description]
+        return [dict(zip(columns, row)) for row in cursor.fetchall()]
+def execute_insert(query: str, params: tuple = ()) -> int:
+    """Execute an insert and return the last row id."""
+    with get_connection() as conn:
+        cursor = conn.execute(query, params)
+        conn.commit()
+        return cursor.lastrowid
+def execute_many(query: str, params_list: list[tuple]) -> None:
+    """Execute many inserts."""
+    with get_connection() as conn:
+        conn.executemany(query, params_list)
+        conn.commit()
+if __name__ == "__main__":
+    init_database()

src/db/import_data.py ADDED Viewed

	@@ -0,0 +1,382 @@

+"""Data import scripts for Med-I-C structured documents."""
+import pandas as pd
+import re
+from pathlib import Path
+from .database import (
+    get_connection, init_database, execute_many,
+    DOCS_DIR, DB_PATH
+)
+def safe_float(value):
+    """Safely convert a value to float, returning None on failure."""
+    if pd.isna(value):
+        return None
+    try:
+        return float(value)
+    except (ValueError, TypeError):
+        return None
+def safe_int(value):
+    """Safely convert a value to int, returning None on failure."""
+    if pd.isna(value):
+        return None
+    try:
+        return int(float(value))
+    except (ValueError, TypeError):
+        return None
+def classify_severity(description: str) -> str:
+    """Classify drug interaction severity based on description keywords."""
+    if not description:
+        return "unknown"
+    desc_lower = description.lower()
+    # Major severity indicators
+    major_keywords = [
+        "cardiotoxic", "nephrotoxic", "hepatotoxic", "neurotoxic",
+        "fatal", "death", "severe", "contraindicated", "arrhythmia",
+        "qt prolongation", "seizure", "bleeding", "hemorrhage",
+        "serotonin syndrome", "neuroleptic malignant"
+    ]
+    # Moderate severity indicators
+    moderate_keywords = [
+        "increase", "decrease", "reduce", "enhance", "inhibit",
+        "metabolism", "concentration", "absorption", "excretion",
+        "therapeutic effect", "adverse effect", "toxicity"
+    ]
+    for keyword in major_keywords:
+        if keyword in desc_lower:
+            return "major"
+    for keyword in moderate_keywords:
+        if keyword in desc_lower:
+            return "moderate"
+    return "minor"
+def import_eml_antibiotics() -> int:
+    """Import WHO EML antibiotic classification data."""
+    print("Importing EML antibiotic data...")
+    eml_files = {
+        "ACCESS": DOCS_DIR / "antibiotic_guidelines" / "EML export-ACCESS group.xlsx",
+        "RESERVE": DOCS_DIR / "antibiotic_guidelines" / "EML export-RESERVE group.xlsx",
+        "WATCH": DOCS_DIR / "antibiotic_guidelines" / "EML export-WATCH group.xlsx",
+    }
+    records = []
+    for category, filepath in eml_files.items():
+        if not filepath.exists():
+            print(f"  Warning: {filepath} not found, skipping...")
+            continue
+        try:
+            # Use openpyxl directly with read_only=True for faster loading
+            import openpyxl
+            wb = openpyxl.load_workbook(filepath, read_only=True)
+            ws = wb.active
+            # Get headers from first row
+            headers = []
+            for cell in ws[1]:
+                headers.append(str(cell.value).strip().lower().replace(' ', '_') if cell.value else f'col_{len(headers)}')
+            # Process data rows
+            for row_idx, row in enumerate(ws.iter_rows(min_row=2, values_only=True), start=2):
+                row_dict = dict(zip(headers, row))
+                medicine = str(row_dict.get('medicine_name', row_dict.get('medicine', '')))
+                if not medicine or medicine == 'None' or medicine == 'nan':
+                    continue
+                def safe_str(val):
+                    if val is None or pd.isna(val):
+                        return ''
+                    return str(val)
+                records.append((
+                    medicine,
+                    category,
+                    safe_str(row_dict.get('eml_section', '')),
+                    safe_str(row_dict.get('formulations', '')),
+                    safe_str(row_dict.get('indication', '')),
+                    safe_str(row_dict.get('atc_codes', row_dict.get('atc_code', ''))),
+                    safe_str(row_dict.get('combined_with', '')),
+                    safe_str(row_dict.get('status', '')),
+                ))
+            wb.close()
+            print(f"  Loaded {len([r for r in records if r[1] == category])} from {category}")
+        except Exception as e:
+            print(f"  Warning: Error reading {filepath}: {e}")
+            continue
+    if records:
+        query = """
+            INSERT INTO eml_antibiotics
+            (medicine_name, who_category, eml_section, formulations,
+             indication, atc_codes, combined_with, status)
+            VALUES (?, ?, ?, ?, ?, ?, ?, ?)
+        """
+        execute_many(query, records)
+        print(f"  Imported {len(records)} EML antibiotic records total")
+    return len(records)
+def import_atlas_susceptibility() -> int:
+    """Import ATLAS antimicrobial susceptibility data."""
+    print("Importing ATLAS susceptibility data...")
+    filepath = DOCS_DIR / "pathogen_resistance" / "ATLAS Susceptibility Data Export.xlsx"
+    if not filepath.exists():
+        print(f"  Warning: {filepath} not found, skipping...")
+        return 0
+    # Read the raw data to find the header row and extract region
+    df_raw = pd.read_excel(filepath, sheet_name="Percent", header=None)
+    # Extract region from the title (row 1)
+    region = "Unknown"
+    for idx, row in df_raw.head(5).iterrows():
+        cell = str(row.iloc[0]) if pd.notna(row.iloc[0]) else ""
+        if "from" in cell.lower():
+            # Extract country from "Percentage Susceptibility from Argentina"
+            parts = cell.split("from")
+            if len(parts) > 1:
+                region = parts[1].strip()
+            break
+    # Find the header row (contains 'Antibacterial' or 'N')
+    header_row = 4  # Default
+    for idx, row in df_raw.head(10).iterrows():
+        if any('Antibacterial' in str(v) for v in row.values if pd.notna(v)):
+            header_row = idx
+            break
+    # Read with proper header
+    df = pd.read_excel(filepath, sheet_name="Percent", header=header_row)
+    # Standardize column names
+    df.columns = [str(col).strip().lower().replace(' ', '_').replace('.', '') for col in df.columns]
+    records = []
+    for _, row in df.iterrows():
+        antibiotic = str(row.get('antibacterial', ''))
+        # Skip empty or non-antibiotic rows
+        if not antibiotic or antibiotic == 'nan' or 'omitted' in antibiotic.lower():
+            continue
+        if 'in vitro' in antibiotic.lower() or 'table cells' in antibiotic.lower():
+            continue
+        # Get susceptibility values
+        n_value = row.get('n', None)
+        pct_s = row.get('susc', row.get('susceptible', None))
+        pct_i = row.get('int', row.get('intermediate', None))
+        pct_r = row.get('res', row.get('resistant', None))
+        # Use safe conversion functions
+        n_int = safe_int(n_value)
+        s_float = safe_float(pct_s)
+        if n_int is not None and s_float is not None:
+            records.append((
+                "General",  # Species - will be refined if more data available
+                "",  # Family
+                antibiotic,
+                s_float,
+                safe_float(pct_i),
+                safe_float(pct_r),
+                n_int,
+                2024,  # Year - from the data context
+                region,
+                "ATLAS"
+            ))
+    if records:
+        query = """
+            INSERT INTO atlas_susceptibility
+            (species, family, antibiotic, percent_susceptible,
+             percent_intermediate, percent_resistant, total_isolates,
+             year, region, source)
+            VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+        """
+        execute_many(query, records)
+        print(f"  Imported {len(records)} ATLAS susceptibility records from {region}")
+    return len(records)
+def import_mic_breakpoints() -> int:
+    """Import EUCAST MIC breakpoint tables."""
+    print("Importing MIC breakpoint data...")
+    filepath = DOCS_DIR / "mic_breakpoints" / "v_16.0__BreakpointTables.xlsx"
+    if not filepath.exists():
+        print(f"  Warning: {filepath} not found, skipping...")
+        return 0
+    # Get all sheet names
+    xl = pd.ExcelFile(filepath)
+    # Skip non-pathogen sheets
+    skip_sheets = {'Content', 'Changes', 'Notes', 'Guidance', 'Dosages',
+                   'Technical uncertainty', 'PK PD breakpoints', 'PK PD cutoffs'}
+    records = []
+    for sheet_name in xl.sheet_names:
+        if sheet_name in skip_sheets:
+            continue
+        try:
+            df = pd.read_excel(filepath, sheet_name=sheet_name, header=None)
+            # Try to find antibiotic data - look for rows with MIC values
+            pathogen_group = sheet_name
+            # Simple heuristic: look for rows that might contain antibiotic names and MIC values
+            for idx, row in df.iterrows():
+                row_values = [str(v).strip() for v in row.values if pd.notna(v)]
+                # Look for rows that might be antibiotic entries
+                if len(row_values) >= 2:
+                    potential_antibiotic = row_values[0]
+                    # Skip header-like rows
+                    if any(kw in potential_antibiotic.lower() for kw in
+                           ['antibiotic', 'agent', 'note', 'disk', 'mic', 'breakpoint']):
+                        continue
+                    # Try to extract MIC values (numbers)
+                    mic_values = []
+                    for v in row_values[1:]:
+                        try:
+                            mic_values.append(float(v.replace('≤', '').replace('>', '').replace('<', '').strip()))
+                        except (ValueError, AttributeError):
+                            pass
+                    if len(mic_values) >= 2 and len(potential_antibiotic) > 2:
+                        records.append((
+                            pathogen_group,
+                            potential_antibiotic,
+                            None,  # route
+                            mic_values[0] if len(mic_values) > 0 else None,  # S breakpoint
+                            mic_values[1] if len(mic_values) > 1 else None,  # R breakpoint
+                            None,  # disk S
+                            None,  # disk R
+                            None,  # notes
+                            "16.0"
+                        ))
+        except Exception as e:
+            print(f"  Warning: Could not parse sheet '{sheet_name}': {e}")
+            continue
+    if records:
+        query = """
+            INSERT INTO mic_breakpoints
+            (pathogen_group, antibiotic, route, mic_susceptible, mic_resistant,
+             disk_susceptible, disk_resistant, notes, eucast_version)
+            VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
+        """
+        execute_many(query, records)
+        print(f"  Imported {len(records)} MIC breakpoint records")
+    return len(records)
+def import_drug_interactions(limit: int = None) -> int:
+    """Import drug-drug interaction database."""
+    print("Importing drug interactions data...")
+    filepath = DOCS_DIR / "drug_safety" / "db_drug_interactions.csv"
+    if not filepath.exists():
+        print(f"  Warning: {filepath} not found, skipping...")
+        return 0
+    # Read CSV in chunks due to large size
+    chunk_size = 10000
+    total_records = 0
+    for chunk in pd.read_csv(filepath, chunksize=chunk_size):
+        # Standardize column names
+        chunk.columns = [col.strip().lower().replace(' ', '_') for col in chunk.columns]
+        records = []
+        for _, row in chunk.iterrows():
+            drug_1 = str(row.get('drug_1', row.get('drug1', row.iloc[0] if len(row) > 0 else '')))
+            drug_2 = str(row.get('drug_2', row.get('drug2', row.iloc[1] if len(row) > 1 else '')))
+            description = str(row.get('interaction_description', row.get('description',
+                             row.get('interaction', row.iloc[2] if len(row) > 2 else ''))))
+            severity = classify_severity(description)
+            if drug_1 and drug_2:
+                records.append((drug_1, drug_2, description, severity))
+        if records:
+            query = """
+                INSERT INTO drug_interactions
+                (drug_1, drug_2, interaction_description, severity)
+                VALUES (?, ?, ?, ?)
+            """
+            execute_many(query, records)
+            total_records += len(records)
+        if limit and total_records >= limit:
+            break
+    print(f"  Imported {total_records} drug interaction records")
+    return total_records
+def import_all_data(interactions_limit: int = None) -> dict:
+    """Import all structured data into the database."""
+    print(f"\n{'='*50}")
+    print("Med-I-C Data Import")
+    print(f"{'='*50}\n")
+    # Initialize database
+    init_database()
+    # Clear existing data
+    with get_connection() as conn:
+        conn.execute("DELETE FROM eml_antibiotics")
+        conn.execute("DELETE FROM atlas_susceptibility")
+        conn.execute("DELETE FROM mic_breakpoints")
+        conn.execute("DELETE FROM drug_interactions")
+        conn.commit()
+    print("Cleared existing data\n")
+    # Import all data
+    results = {
+        "eml_antibiotics": import_eml_antibiotics(),
+        "atlas_susceptibility": import_atlas_susceptibility(),
+        "mic_breakpoints": import_mic_breakpoints(),
+        "drug_interactions": import_drug_interactions(limit=interactions_limit),
+    }
+    print(f"\n{'='*50}")
+    print("Import Summary:")
+    for table, count in results.items():
+        print(f"  {table}: {count} records")
+    print(f"{'='*50}\n")
+    return results
+if __name__ == "__main__":
+    # Import with a limit on interactions for faster demo
+    import_all_data(interactions_limit=50000)

src/db/schema.sql ADDED Viewed

	@@ -0,0 +1,108 @@

+-- Med-I-C Database Schema
+-- AMR-Guard: Infection Lifecycle Orchestrator
+-- EML Antibiotic Classification Table
+CREATE TABLE IF NOT EXISTS eml_antibiotics (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    medicine_name TEXT NOT NULL,
+    who_category TEXT NOT NULL,  -- 'ACCESS', 'RESERVE', 'WATCH'
+    eml_section TEXT,
+    formulations TEXT,
+    indication TEXT,
+    atc_codes TEXT,
+    combined_with TEXT,
+    status TEXT,
+    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+);
+CREATE INDEX IF NOT EXISTS idx_eml_medicine_name ON eml_antibiotics(medicine_name);
+CREATE INDEX IF NOT EXISTS idx_eml_who_category ON eml_antibiotics(who_category);
+CREATE INDEX IF NOT EXISTS idx_eml_atc_codes ON eml_antibiotics(atc_codes);
+-- ATLAS Susceptibility Data (Percent)
+CREATE TABLE IF NOT EXISTS atlas_susceptibility (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    species TEXT,
+    family TEXT,
+    antibiotic TEXT,
+    percent_susceptible REAL,
+    percent_intermediate REAL,
+    percent_resistant REAL,
+    total_isolates INTEGER,
+    year INTEGER,
+    region TEXT,
+    source TEXT,
+    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+);
+CREATE INDEX IF NOT EXISTS idx_atlas_species ON atlas_susceptibility(species);
+CREATE INDEX IF NOT EXISTS idx_atlas_antibiotic ON atlas_susceptibility(antibiotic);
+CREATE INDEX IF NOT EXISTS idx_atlas_family ON atlas_susceptibility(family);
+-- MIC Breakpoints Table
+CREATE TABLE IF NOT EXISTS mic_breakpoints (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    pathogen_group TEXT NOT NULL,
+    antibiotic TEXT NOT NULL,
+    route TEXT,
+    mic_susceptible REAL,
+    mic_resistant REAL,
+    disk_susceptible REAL,
+    disk_resistant REAL,
+    notes TEXT,
+    eucast_version TEXT DEFAULT '16.0',
+    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+);
+CREATE INDEX IF NOT EXISTS idx_bp_pathogen ON mic_breakpoints(pathogen_group);
+CREATE INDEX IF NOT EXISTS idx_bp_antibiotic ON mic_breakpoints(antibiotic);
+-- Dosage Guidance Table
+CREATE TABLE IF NOT EXISTS dosage_guidance (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    antibiotic TEXT NOT NULL,
+    standard_dose TEXT,
+    high_dose TEXT,
+    renal_adjustment TEXT,
+    notes TEXT,
+    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+);
+CREATE INDEX IF NOT EXISTS idx_dosage_antibiotic ON dosage_guidance(antibiotic);
+-- Drug Interactions Table
+CREATE TABLE IF NOT EXISTS drug_interactions (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    drug_1 TEXT NOT NULL,
+    drug_2 TEXT NOT NULL,
+    interaction_description TEXT,
+    severity TEXT,
+    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+);
+CREATE INDEX IF NOT EXISTS idx_di_drug_1 ON drug_interactions(drug_1);
+CREATE INDEX IF NOT EXISTS idx_di_drug_2 ON drug_interactions(drug_2);
+CREATE INDEX IF NOT EXISTS idx_di_severity ON drug_interactions(severity);
+-- View for bidirectional drug interaction lookup
+CREATE VIEW IF NOT EXISTS drug_interaction_lookup AS
+SELECT id, drug_1, drug_2, interaction_description, severity FROM drug_interactions
+UNION ALL
+SELECT id, drug_2 as drug_1, drug_1 as drug_2, interaction_description, severity FROM drug_interactions;
+-- Patient History Table (for demo purposes)
+CREATE TABLE IF NOT EXISTS patient_history (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    patient_id TEXT NOT NULL,
+    infection_date DATE,
+    pathogen TEXT,
+    antibiotic TEXT,
+    mic_value REAL,
+    interpretation TEXT,
+    outcome TEXT,
+    notes TEXT,
+    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+);
+CREATE INDEX IF NOT EXISTS idx_ph_patient ON patient_history(patient_id);
+CREATE INDEX IF NOT EXISTS idx_ph_pathogen ON patient_history(pathogen);

src/db/vector_store.py ADDED Viewed

	@@ -0,0 +1,312 @@

+"""ChromaDB vector store for unstructured document RAG."""
+import chromadb
+from chromadb.utils import embedding_functions
+from pathlib import Path
+from typing import Optional
+import hashlib
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+from pypdf import PdfReader
+# Project paths
+PROJECT_ROOT = Path(__file__).parent.parent.parent
+DATA_DIR = PROJECT_ROOT / "data"
+DOCS_DIR = PROJECT_ROOT / "docs"
+CHROMA_DIR = DATA_DIR / "chroma"
+def get_chroma_client() -> chromadb.PersistentClient:
+    """Get ChromaDB persistent client."""
+    CHROMA_DIR.mkdir(parents=True, exist_ok=True)
+    return chromadb.PersistentClient(path=str(CHROMA_DIR))
+def get_embedding_function():
+    """Get the embedding function for ChromaDB."""
+    return embedding_functions.SentenceTransformerEmbeddingFunction(
+        model_name="all-MiniLM-L6-v2"
+    )
+def extract_pdf_text(pdf_path: Path) -> str:
+    """Extract text from PDF file."""
+    reader = PdfReader(pdf_path)
+    text = ""
+    for page in reader.pages:
+        text += page.extract_text() + "\n\n"
+    return text
+def chunk_text(text: str, chunk_size: int = 1000, chunk_overlap: int = 200) -> list[str]:
+    """Split text into chunks for embedding."""
+    splitter = RecursiveCharacterTextSplitter(
+        chunk_size=chunk_size,
+        chunk_overlap=chunk_overlap,
+        separators=["\n\n", "\n", ". ", " ", ""]
+    )
+    return splitter.split_text(text)
+def generate_doc_id(text: str, index: int) -> str:
+    """Generate a unique document ID."""
+    hash_input = f"{text[:100]}_{index}"
+    return hashlib.md5(hash_input.encode()).hexdigest()
+def init_idsa_guidelines_collection() -> chromadb.Collection:
+    """Initialize the IDSA treatment guidelines collection."""
+    client = get_chroma_client()
+    ef = get_embedding_function()
+    # Delete existing collection if exists
+    try:
+        client.delete_collection("idsa_treatment_guidelines")
+    except Exception:
+        pass
+    collection = client.create_collection(
+        name="idsa_treatment_guidelines",
+        embedding_function=ef,
+        metadata={
+            "source": "IDSA 2024 Guidance",
+            "doi": "10.1093/cid/ciae403",
+            "description": "Antimicrobial-Resistant Gram-Negative Infections Treatment Guidelines"
+        }
+    )
+    return collection
+def init_mic_reference_collection() -> chromadb.Collection:
+    """Initialize the MIC reference documentation collection."""
+    client = get_chroma_client()
+    ef = get_embedding_function()
+    # Delete existing collection if exists
+    try:
+        client.delete_collection("mic_reference_docs")
+    except Exception:
+        pass
+    collection = client.create_collection(
+        name="mic_reference_docs",
+        embedding_function=ef,
+        metadata={
+            "source": "EUCAST Breakpoint Tables",
+            "version": "16.0",
+            "description": "MIC Breakpoint Reference Documentation"
+        }
+    )
+    return collection
+def classify_chunk_pathogen(text: str) -> str:
+    """Classify which pathogen type a chunk relates to."""
+    text_lower = text.lower()
+    pathogen_keywords = {
+        "ESBL-E": ["esbl", "extended-spectrum beta-lactamase", "esbl-e", "esbl-producing"],
+        "CRE": ["carbapenem-resistant enterobacterales", "cre", "carbapenemase"],
+        "CRAB": ["acinetobacter baumannii", "crab", "carbapenem-resistant acinetobacter"],
+        "DTR-PA": ["pseudomonas aeruginosa", "dtr-p", "difficult-to-treat resistance"],
+        "S.maltophilia": ["stenotrophomonas maltophilia", "s. maltophilia"],
+        "AmpC-E": ["ampc", "ampc-e", "ampc-producing"],
+    }
+    for pathogen, keywords in pathogen_keywords.items():
+        for keyword in keywords:
+            if keyword in text_lower:
+                return pathogen
+    return "General"
+def import_idsa_guidelines() -> int:
+    """Import IDSA guidelines PDF into ChromaDB."""
+    print("Importing IDSA guidelines into ChromaDB...")
+    pdf_path = DOCS_DIR / "antibiotic_guidelines" / "ciae403.pdf"
+    if not pdf_path.exists():
+        print(f"  Warning: {pdf_path} not found, skipping...")
+        return 0
+    # Extract text from PDF
+    print("  Extracting text from PDF...")
+    text = extract_pdf_text(pdf_path)
+    # Chunk the text
+    print("  Chunking text...")
+    chunks = chunk_text(text)
+    # Initialize collection
+    collection = init_idsa_guidelines_collection()
+    # Prepare documents for insertion
+    documents = []
+    metadatas = []
+    ids = []
+    for i, chunk in enumerate(chunks):
+        documents.append(chunk)
+        metadatas.append({
+            "source": "ciae403.pdf",
+            "chunk_index": i,
+            "pathogen_type": classify_chunk_pathogen(chunk),
+            "page_estimate": i // 3  # Rough estimate
+        })
+        ids.append(generate_doc_id(chunk, i))
+    # Add to collection
+    print(f"  Adding {len(documents)} chunks to collection...")
+    collection.add(
+        documents=documents,
+        metadatas=metadatas,
+        ids=ids
+    )
+    print(f"  Imported {len(documents)} chunks from IDSA guidelines")
+    return len(documents)
+def import_mic_reference() -> int:
+    """Import MIC breakpoint PDF into ChromaDB."""
+    print("Importing MIC reference PDF into ChromaDB...")
+    pdf_path = DOCS_DIR / "mic_breakpoints" / "v_16.0_Breakpoint_Tables.pdf"
+    if not pdf_path.exists():
+        print(f"  Warning: {pdf_path} not found, skipping...")
+        return 0
+    # Extract text from PDF
+    print("  Extracting text from PDF...")
+    text = extract_pdf_text(pdf_path)
+    # Chunk the text
+    print("  Chunking text...")
+    chunks = chunk_text(text, chunk_size=800, chunk_overlap=150)
+    # Initialize collection
+    collection = init_mic_reference_collection()
+    # Prepare documents for insertion
+    documents = []
+    metadatas = []
+    ids = []
+    for i, chunk in enumerate(chunks):
+        documents.append(chunk)
+        metadatas.append({
+            "source": "v_16.0_Breakpoint_Tables.pdf",
+            "chunk_index": i,
+            "document_type": "mic_reference"
+        })
+        ids.append(generate_doc_id(chunk, i))
+    # Add to collection
+    print(f"  Adding {len(documents)} chunks to collection...")
+    collection.add(
+        documents=documents,
+        metadatas=metadatas,
+        ids=ids
+    )
+    print(f"  Imported {len(documents)} chunks from MIC reference")
+    return len(documents)
+def get_collection(name: str) -> Optional[chromadb.Collection]:
+    """Get a collection by name."""
+    client = get_chroma_client()
+    ef = get_embedding_function()
+    try:
+        return client.get_collection(name=name, embedding_function=ef)
+    except Exception:
+        return None
+def search_guidelines(
+    query: str,
+    n_results: int = 5,
+    pathogen_filter: str = None
+) -> list[dict]:
+    """Search the IDSA guidelines collection."""
+    collection = get_collection("idsa_treatment_guidelines")
+    if collection is None:
+        return []
+    where_filter = None
+    if pathogen_filter:
+        where_filter = {"pathogen_type": pathogen_filter}
+    results = collection.query(
+        query_texts=[query],
+        n_results=n_results,
+        where=where_filter,
+        include=["documents", "metadatas", "distances"]
+    )
+    # Format results
+    formatted = []
+    for i in range(len(results['documents'][0])):
+        formatted.append({
+            "content": results['documents'][0][i],
+            "metadata": results['metadatas'][0][i],
+            "distance": results['distances'][0][i]
+        })
+    return formatted
+def search_mic_reference(query: str, n_results: int = 3) -> list[dict]:
+    """Search the MIC reference collection."""
+    collection = get_collection("mic_reference_docs")
+    if collection is None:
+        return []
+    results = collection.query(
+        query_texts=[query],
+        n_results=n_results,
+        include=["documents", "metadatas", "distances"]
+    )
+    # Format results
+    formatted = []
+    for i in range(len(results['documents'][0])):
+        formatted.append({
+            "content": results['documents'][0][i],
+            "metadata": results['metadatas'][0][i],
+            "distance": results['distances'][0][i]
+        })
+    return formatted
+def import_all_vectors() -> dict:
+    """Import all PDFs into ChromaDB."""
+    print(f"\n{'='*50}")
+    print("ChromaDB Vector Import")
+    print(f"{'='*50}\n")
+    results = {
+        "idsa_guidelines": import_idsa_guidelines(),
+        "mic_reference": import_mic_reference(),
+    }
+    print(f"\n{'='*50}")
+    print("Vector Import Summary:")
+    for collection, count in results.items():
+        print(f"  {collection}: {count} chunks")
+    print(f"{'='*50}\n")
+    return results
+if __name__ == "__main__":
+    import_all_vectors()

src/tools/__init__.py ADDED Viewed

	@@ -0,0 +1,67 @@

+"""Med-I-C Query Tools for AMR-Guard Workflow."""
+from .antibiotic_tools import (
+    query_antibiotic_info,
+    get_antibiotics_by_category,
+    get_antibiotic_for_indication,
+    interpret_mic_value,
+    get_breakpoints_for_pathogen,
+)
+from .resistance_tools import (
+    query_resistance_pattern,
+    get_most_effective_antibiotics,
+    get_resistance_trend,
+    calculate_mic_trend,
+    get_pathogen_families,
+    get_pathogens_by_family,
+)
+from .safety_tools import (
+    check_drug_interactions,
+    check_single_interaction,
+    get_all_interactions_for_drug,
+    get_major_interactions_for_drug,
+    screen_antibiotic_safety,
+    get_interaction_statistics,
+)
+from .rag_tools import (
+    search_clinical_guidelines,
+    search_mic_reference_docs,
+    get_treatment_recommendation,
+    explain_mic_interpretation,
+    get_empirical_therapy_guidance,
+)
+__all__ = [
+    # Antibiotic tools
+    "query_antibiotic_info",
+    "get_antibiotics_by_category",
+    "get_antibiotic_for_indication",
+    "interpret_mic_value",
+    "get_breakpoints_for_pathogen",
+    # Resistance tools
+    "query_resistance_pattern",
+    "get_most_effective_antibiotics",
+    "get_resistance_trend",
+    "calculate_mic_trend",
+    "get_pathogen_families",
+    "get_pathogens_by_family",
+    # Safety tools
+    "check_drug_interactions",
+    "check_single_interaction",
+    "get_all_interactions_for_drug",
+    "get_major_interactions_for_drug",
+    "screen_antibiotic_safety",
+    "get_interaction_statistics",
+    # RAG tools
+    "search_clinical_guidelines",
+    "search_mic_reference_docs",
+    "get_treatment_recommendation",
+    "explain_mic_interpretation",
+    "get_empirical_therapy_guidance",
+]

src/tools/antibiotic_tools.py ADDED Viewed

	@@ -0,0 +1,210 @@

+"""Antibiotic query tools for Med-I-C workflow."""
+from typing import Optional
+from src.db.database import execute_query
+def query_antibiotic_info(
+    antibiotic_name: str,
+    include_category: bool = True,
+    include_formulations: bool = True
+) -> list[dict]:
+    """
+    Query EML antibiotic database for classification and details.
+    Args:
+        antibiotic_name: Name of the antibiotic (partial match supported)
+        include_category: Include WHO stewardship category
+        include_formulations: Include available formulations
+    Returns:
+        List of matching antibiotics with details
+    Used by: Agent 1, Agent 4
+    """
+    query = """
+        SELECT
+            medicine_name,
+            who_category,
+            eml_section,
+            formulations,
+            indication,
+            atc_codes,
+            combined_with,
+            status
+        FROM eml_antibiotics
+        WHERE LOWER(medicine_name) LIKE LOWER(?)
+        ORDER BY
+            CASE who_category
+                WHEN 'ACCESS' THEN 1
+                WHEN 'WATCH' THEN 2
+                WHEN 'RESERVE' THEN 3
+                ELSE 4
+            END
+    """
+    results = execute_query(query, (f"%{antibiotic_name}%",))
+    # Filter columns based on parameters
+    if not include_category or not include_formulations:
+        filtered_results = []
+        for r in results:
+            filtered = dict(r)
+            if not include_category:
+                filtered.pop('who_category', None)
+            if not include_formulations:
+                filtered.pop('formulations', None)
+            filtered_results.append(filtered)
+        return filtered_results
+    return results
+def get_antibiotics_by_category(category: str) -> list[dict]:
+    """
+    Get all antibiotics in a specific WHO category.
+    Args:
+        category: WHO category ('ACCESS', 'WATCH', 'RESERVE')
+    Returns:
+        List of antibiotics in that category
+    """
+    query = """
+        SELECT medicine_name, indication, formulations, atc_codes
+        FROM eml_antibiotics
+        WHERE UPPER(who_category) = UPPER(?)
+        ORDER BY medicine_name
+    """
+    return execute_query(query, (category,))
+def get_antibiotic_for_indication(indication_keyword: str) -> list[dict]:
+    """
+    Find antibiotics based on indication keywords.
+    Args:
+        indication_keyword: Keyword to search in indications
+    Returns:
+        List of matching antibiotics with indications
+    """
+    query = """
+        SELECT
+            medicine_name,
+            who_category,
+            indication,
+            formulations
+        FROM eml_antibiotics
+        WHERE LOWER(indication) LIKE LOWER(?)
+        ORDER BY
+            CASE who_category
+                WHEN 'ACCESS' THEN 1
+                WHEN 'WATCH' THEN 2
+                WHEN 'RESERVE' THEN 3
+                ELSE 4
+            END
+    """
+    return execute_query(query, (f"%{indication_keyword}%",))
+def interpret_mic_value(
+    pathogen: str,
+    antibiotic: str,
+    mic_value: float,
+    route: str = None
+) -> dict:
+    """
+    Interpret MIC value against EUCAST breakpoints.
+    Args:
+        pathogen: Pathogen name or group
+        antibiotic: Antibiotic name
+        mic_value: MIC value in mg/L
+        route: Administration route (IV, Oral)
+    Returns:
+        Dict with interpretation (S/I/R), breakpoint values, clinical notes
+    Used by: Agent 2, Agent 3
+    """
+    query = """
+        SELECT
+            pathogen_group,
+            antibiotic,
+            mic_susceptible,
+            mic_resistant,
+            notes,
+            route
+        FROM mic_breakpoints
+        WHERE LOWER(pathogen_group) LIKE LOWER(?)
+          AND LOWER(antibiotic) LIKE LOWER(?)
+        LIMIT 1
+    """
+    results = execute_query(query, (f"%{pathogen}%", f"%{antibiotic}%"))
+    if not results:
+        return {
+            "interpretation": "UNKNOWN",
+            "message": f"No breakpoint found for {antibiotic} against {pathogen}",
+            "mic_value": mic_value,
+            "breakpoints": None
+        }
+    bp = results[0]
+    mic_s = bp.get('mic_susceptible')
+    mic_r = bp.get('mic_resistant')
+    # Determine interpretation
+    if mic_s is not None and mic_value <= mic_s:
+        interpretation = "SUSCEPTIBLE"
+        message = f"MIC ({mic_value} mg/L) ≤ S breakpoint ({mic_s} mg/L)"
+    elif mic_r is not None and mic_value > mic_r:
+        interpretation = "RESISTANT"
+        message = f"MIC ({mic_value} mg/L) > R breakpoint ({mic_r} mg/L)"
+    elif mic_s is not None and mic_r is not None:
+        interpretation = "INTERMEDIATE"
+        message = f"MIC ({mic_value} mg/L) between S ({mic_s}) and R ({mic_r}) breakpoints"
+    else:
+        interpretation = "UNKNOWN"
+        message = "Incomplete breakpoint data"
+    return {
+        "interpretation": interpretation,
+        "message": message,
+        "mic_value": mic_value,
+        "breakpoints": {
+            "susceptible": mic_s,
+            "resistant": mic_r
+        },
+        "pathogen_group": bp.get('pathogen_group'),
+        "notes": bp.get('notes')
+    }
+def get_breakpoints_for_pathogen(pathogen: str) -> list[dict]:
+    """
+    Get all available breakpoints for a pathogen.
+    Args:
+        pathogen: Pathogen name or group
+    Returns:
+        List of antibiotic breakpoints for the pathogen
+    """
+    query = """
+        SELECT
+            antibiotic,
+            mic_susceptible,
+            mic_resistant,
+            route,
+            notes
+        FROM mic_breakpoints
+        WHERE LOWER(pathogen_group) LIKE LOWER(?)
+        ORDER BY antibiotic
+    """
+    return execute_query(query, (f"%{pathogen}%",))

src/tools/rag_tools.py ADDED Viewed

	@@ -0,0 +1,185 @@

+"""RAG tools for querying clinical guidelines via ChromaDB."""
+from typing import Optional
+from src.db.vector_store import search_guidelines, search_mic_reference
+def search_clinical_guidelines(
+    query: str,
+    pathogen_filter: str = None,
+    n_results: int = 5
+) -> list[dict]:
+    """
+    Semantic search over IDSA clinical guidelines.
+    Args:
+        query: Natural language query about treatment
+        pathogen_filter: Optional pathogen type filter
+            Options: 'ESBL-E', 'CRE', 'CRAB', 'DTR-PA', 'S.maltophilia', 'AmpC-E', 'General'
+        n_results: Number of results to return
+    Returns:
+        List of relevant guideline excerpts with metadata
+    Used by: Agent 1 (Empirical), Agent 4 (Justification)
+    """
+    results = search_guidelines(query, n_results, pathogen_filter)
+    # Format for agent consumption
+    formatted = []
+    for r in results:
+        formatted.append({
+            "content": r.get("content", ""),
+            "pathogen_type": r.get("metadata", {}).get("pathogen_type", "General"),
+            "source": r.get("metadata", {}).get("source", "IDSA Guidelines"),
+            "relevance_score": 1 - r.get("distance", 1)  # Convert distance to similarity
+        })
+    return formatted
+def search_mic_reference_docs(query: str, n_results: int = 3) -> list[dict]:
+    """
+    Search MIC breakpoint reference documentation.
+    Args:
+        query: Query about MIC interpretation or breakpoints
+        n_results: Number of results to return
+    Returns:
+        List of relevant reference excerpts
+    """
+    results = search_mic_reference(query, n_results)
+    formatted = []
+    for r in results:
+        formatted.append({
+            "content": r.get("content", ""),
+            "source": r.get("metadata", {}).get("source", "EUCAST Breakpoints"),
+            "relevance_score": 1 - r.get("distance", 1)
+        })
+    return formatted
+def get_treatment_recommendation(
+    pathogen: str,
+    infection_site: str = None,
+    patient_factors: list[str] = None
+) -> dict:
+    """
+    Get treatment recommendation by searching guidelines.
+    Args:
+        pathogen: Identified or suspected pathogen
+        infection_site: Location of infection (e.g., "urinary", "respiratory")
+        patient_factors: List of patient factors (e.g., ["renal impairment", "pregnancy"])
+    Returns:
+        Treatment recommendation with guideline citations
+    """
+    # Build comprehensive query
+    query_parts = [f"treatment for {pathogen} infection"]
+    if infection_site:
+        query_parts.append(f"in {infection_site}")
+    if patient_factors:
+        query_parts.append(f"considering {', '.join(patient_factors)}")
+    query = " ".join(query_parts)
+    # Search guidelines
+    results = search_clinical_guidelines(query, n_results=5)
+    # Try to determine pathogen category
+    pathogen_category = None
+    pathogen_lower = pathogen.lower()
+    pathogen_mapping = {
+        "ESBL-E": ["esbl", "extended-spectrum", "e. coli", "klebsiella"],
+        "CRE": ["carbapenem-resistant", "cre", "carbapenemase"],
+        "CRAB": ["acinetobacter", "crab"],
+        "DTR-PA": ["pseudomonas", "dtr"],
+        "S.maltophilia": ["stenotrophomonas", "maltophilia"],
+    }
+    for category, keywords in pathogen_mapping.items():
+        for keyword in keywords:
+            if keyword in pathogen_lower:
+                pathogen_category = category
+                break
+    # Search with pathogen filter if category identified
+    if pathogen_category:
+        filtered_results = search_clinical_guidelines(
+            query, pathogen_filter=pathogen_category, n_results=3
+        )
+        if filtered_results:
+            results = filtered_results + results[:2]  # Combine results
+    return {
+        "query": query,
+        "pathogen_category": pathogen_category or "General",
+        "recommendations": results[:5],
+        "note": "These recommendations are from IDSA 2024 guidelines. Always verify with current institutional protocols."
+    }
+def explain_mic_interpretation(
+    pathogen: str,
+    antibiotic: str,
+    mic_value: float
+) -> dict:
+    """
+    Get detailed explanation for MIC interpretation from reference docs.
+    Args:
+        pathogen: Pathogen name
+        antibiotic: Antibiotic name
+        mic_value: The MIC value to interpret
+    Returns:
+        Detailed explanation with reference citations
+    """
+    query = f"MIC breakpoint interpretation for {antibiotic} against {pathogen}"
+    results = search_mic_reference_docs(query, n_results=3)
+    return {
+        "query": query,
+        "mic_value": mic_value,
+        "reference_excerpts": results,
+        "note": "Refer to current EUCAST v16.0 breakpoint tables for official interpretation."
+    }
+def get_empirical_therapy_guidance(
+    infection_type: str,
+    risk_factors: list[str] = None
+) -> dict:
+    """
+    Get empirical therapy guidance for an infection type.
+    Args:
+        infection_type: Type of infection (e.g., "UTI", "pneumonia", "sepsis")
+        risk_factors: List of risk factors (e.g., ["prior MRSA", "recent antibiotics"])
+    Returns:
+        Empirical therapy recommendations
+    """
+    query_parts = [f"empirical therapy for {infection_type}"]
+    if risk_factors:
+        query_parts.append(f"with risk factors: {', '.join(risk_factors)}")
+    query = " ".join(query_parts)
+    results = search_clinical_guidelines(query, n_results=5)
+    return {
+        "infection_type": infection_type,
+        "risk_factors": risk_factors or [],
+        "recommendations": results,
+        "note": "Empirical therapy should be de-escalated based on culture results."
+    }

src/tools/resistance_tools.py ADDED Viewed

	@@ -0,0 +1,244 @@

+"""Resistance pattern and trend analysis tools for Med-I-C workflow."""
+from typing import Optional
+from src.db.database import execute_query
+def query_resistance_pattern(
+    pathogen: str,
+    antibiotic: str = None,
+    region: str = None,
+    year: int = None
+) -> list[dict]:
+    """
+    Query ATLAS susceptibility data for resistance patterns.
+    Args:
+        pathogen: Pathogen name (e.g., "E. coli", "K. pneumoniae")
+        antibiotic: Optional specific antibiotic to check
+        region: Optional geographic region filter
+        year: Optional year filter (defaults to most recent)
+    Returns:
+        List of susceptibility records with percentages
+    Used by: Agent 1 (Empirical), Agent 3 (Trend Analysis)
+    """
+    conditions = ["LOWER(species) LIKE LOWER(?)"]
+    params = [f"%{pathogen}%"]
+    if antibiotic:
+        conditions.append("LOWER(antibiotic) LIKE LOWER(?)")
+        params.append(f"%{antibiotic}%")
+    if region:
+        conditions.append("LOWER(region) LIKE LOWER(?)")
+        params.append(f"%{region}%")
+    if year:
+        conditions.append("year = ?")
+        params.append(year)
+    where_clause = " AND ".join(conditions)
+    query = f"""
+        SELECT
+            species,
+            family,
+            antibiotic,
+            percent_susceptible,
+            percent_intermediate,
+            percent_resistant,
+            total_isolates,
+            year,
+            region
+        FROM atlas_susceptibility
+        WHERE {where_clause}
+        ORDER BY year DESC, percent_susceptible DESC
+        LIMIT 50
+    """
+    return execute_query(query, tuple(params))
+def get_most_effective_antibiotics(
+    pathogen: str,
+    min_susceptibility: float = 80.0,
+    limit: int = 10
+) -> list[dict]:
+    """
+    Find antibiotics with highest susceptibility for a pathogen.
+    Args:
+        pathogen: Pathogen name
+        min_susceptibility: Minimum susceptibility percentage (default 80%)
+        limit: Maximum number of results
+    Returns:
+        List of effective antibiotics sorted by susceptibility
+    """
+    query = """
+        SELECT
+            antibiotic,
+            AVG(percent_susceptible) as avg_susceptibility,
+            SUM(total_isolates) as total_samples,
+            MAX(year) as latest_year
+        FROM atlas_susceptibility
+        WHERE LOWER(species) LIKE LOWER(?)
+          AND percent_susceptible >= ?
+        GROUP BY antibiotic
+        ORDER BY avg_susceptibility DESC
+        LIMIT ?
+    """
+    return execute_query(query, (f"%{pathogen}%", min_susceptibility, limit))
+def get_resistance_trend(
+    pathogen: str,
+    antibiotic: str
+) -> list[dict]:
+    """
+    Get resistance trend over time for pathogen-antibiotic combination.
+    Args:
+        pathogen: Pathogen name
+        antibiotic: Antibiotic name
+    Returns:
+        List of yearly susceptibility data
+    """
+    query = """
+        SELECT
+            year,
+            AVG(percent_susceptible) as avg_susceptibility,
+            AVG(percent_resistant) as avg_resistance,
+            SUM(total_isolates) as total_samples
+        FROM atlas_susceptibility
+        WHERE LOWER(species) LIKE LOWER(?)
+          AND LOWER(antibiotic) LIKE LOWER(?)
+          AND year IS NOT NULL
+        GROUP BY year
+        ORDER BY year ASC
+    """
+    return execute_query(query, (f"%{pathogen}%", f"%{antibiotic}%"))
+def calculate_mic_trend(
+    historical_mics: list[dict],
+    current_mic: float = None
+) -> dict:
+    """
+    Calculate resistance velocity and MIC trend from historical data.
+    Args:
+        historical_mics: List of historical MIC readings [{"date": ..., "mic_value": ...}, ...]
+        current_mic: Optional current MIC value (if not in historical_mics)
+    Returns:
+        Dict with trend analysis, resistance_velocity, risk_level
+    Used by: Agent 3 (Trend Analyst)
+    Logic:
+        - If MIC increases by 4x (two-step dilution), flag HIGH risk
+        - If MIC increases by 2x (one-step dilution), flag MODERATE risk
+        - Otherwise, LOW risk
+    """
+    if not historical_mics:
+        return {
+            "risk_level": "UNKNOWN",
+            "message": "No historical MIC data available",
+            "trend": None,
+            "velocity": None
+        }
+    # Sort by date if available
+    sorted_mics = sorted(
+        historical_mics,
+        key=lambda x: x.get('date', '0')
+    )
+    mic_values = [m['mic_value'] for m in sorted_mics if m.get('mic_value')]
+    if current_mic:
+        mic_values.append(current_mic)
+    if len(mic_values) < 2:
+        return {
+            "risk_level": "UNKNOWN",
+            "message": "Insufficient MIC history (need at least 2 values)",
+            "trend": None,
+            "velocity": None,
+            "values": mic_values
+        }
+    baseline_mic = mic_values[0]
+    latest_mic = mic_values[-1]
+    # Avoid division by zero
+    if baseline_mic == 0:
+        baseline_mic = 0.001
+    ratio = latest_mic / baseline_mic
+    # Calculate velocity (fold change per time point)
+    velocity = ratio ** (1 / (len(mic_values) - 1)) if len(mic_values) > 1 else 1
+    # Determine trend direction
+    if ratio > 1.5:
+        trend = "INCREASING"
+    elif ratio < 0.67:
+        trend = "DECREASING"
+    else:
+        trend = "STABLE"
+    # Determine risk level
+    if ratio >= 4:
+        risk_level = "HIGH"
+        alert = "MIC CREEP DETECTED - Two-step dilution increase. High risk of treatment failure even if currently 'Susceptible'."
+    elif ratio >= 2:
+        risk_level = "MODERATE"
+        alert = "MIC trending upward (one-step dilution increase). Monitor closely and consider alternative agents."
+    elif trend == "INCREASING":
+        risk_level = "LOW"
+        alert = "Slight MIC increase observed. Continue current therapy with monitoring."
+    else:
+        risk_level = "LOW"
+        alert = "MIC stable or decreasing. Current therapy appears effective."
+    return {
+        "risk_level": risk_level,
+        "alert": alert,
+        "trend": trend,
+        "velocity": round(velocity, 2),
+        "ratio": round(ratio, 2),
+        "baseline_mic": baseline_mic,
+        "current_mic": latest_mic,
+        "data_points": len(mic_values),
+        "values": mic_values
+    }
+def get_pathogen_families() -> list[dict]:
+    """Get list of unique pathogen families in the database."""
+    query = """
+        SELECT DISTINCT family, COUNT(DISTINCT species) as species_count
+        FROM atlas_susceptibility
+        WHERE family IS NOT NULL AND family != ''
+        GROUP BY family
+        ORDER BY species_count DESC
+    """
+    return execute_query(query)
+def get_pathogens_by_family(family: str) -> list[dict]:
+    """Get all pathogens in a specific family."""
+    query = """
+        SELECT DISTINCT species
+        FROM atlas_susceptibility
+        WHERE LOWER(family) LIKE LOWER(?)
+        ORDER BY species
+    """
+    return execute_query(query, (f"%{family}%",))

src/tools/safety_tools.py ADDED Viewed

	@@ -0,0 +1,250 @@

+"""Drug safety and interaction tools for Med-I-C workflow."""
+from typing import Optional
+from src.db.database import execute_query
+def check_drug_interactions(
+    target_drug: str,
+    patient_medications: list[str],
+    severity_filter: str = None
+) -> list[dict]:
+    """
+    Check for interactions between target drug and patient's medications.
+    Args:
+        target_drug: Antibiotic being considered
+        patient_medications: List of patient's current medications
+        severity_filter: Optional filter ('major', 'moderate', 'minor')
+    Returns:
+        List of interaction dicts with severity and description
+    Used by: Agent 4 (Safety Check)
+    """
+    if not patient_medications:
+        return []
+    # Build query with proper parameter handling
+    placeholders = ','.join(['?' for _ in patient_medications])
+    conditions = [f"LOWER(drug_2) IN ({placeholders})"]
+    params = [med.lower() for med in patient_medications]
+    # Add target drug condition
+    conditions.append("LOWER(drug_1) LIKE LOWER(?)")
+    params.append(f"%{target_drug}%")
+    if severity_filter:
+        conditions.append("severity = ?")
+        params.append(severity_filter)
+    where_clause = " AND ".join(conditions)
+    query = f"""
+        SELECT
+            drug_1,
+            drug_2,
+            interaction_description,
+            severity
+        FROM drug_interaction_lookup
+        WHERE {where_clause}
+        ORDER BY
+            CASE severity
+                WHEN 'major' THEN 1
+                WHEN 'moderate' THEN 2
+                WHEN 'minor' THEN 3
+                ELSE 4
+            END
+    """
+    return execute_query(query, tuple(params))
+def check_single_interaction(drug_1: str, drug_2: str) -> Optional[dict]:
+    """
+    Check for interaction between two specific drugs.
+    Args:
+        drug_1: First drug name
+        drug_2: Second drug name
+    Returns:
+        Interaction details or None if no interaction found
+    """
+    query = """
+        SELECT
+            drug_1,
+            drug_2,
+            interaction_description,
+            severity
+        FROM drug_interaction_lookup
+        WHERE (LOWER(drug_1) LIKE LOWER(?) AND LOWER(drug_2) LIKE LOWER(?))
+        LIMIT 1
+    """
+    results = execute_query(query, (f"%{drug_1}%", f"%{drug_2}%"))
+    return results[0] if results else None
+def get_all_interactions_for_drug(drug: str) -> list[dict]:
+    """
+    Get all known interactions for a specific drug.
+    Args:
+        drug: Drug name to check
+    Returns:
+        List of all interactions involving this drug
+    """
+    query = """
+        SELECT
+            drug_1,
+            drug_2,
+            interaction_description,
+            severity
+        FROM drug_interaction_lookup
+        WHERE LOWER(drug_1) LIKE LOWER(?)
+        ORDER BY
+            CASE severity
+                WHEN 'major' THEN 1
+                WHEN 'moderate' THEN 2
+                WHEN 'minor' THEN 3
+                ELSE 4
+            END
+        LIMIT 100
+    """
+    return execute_query(query, (f"%{drug}%",))
+def get_major_interactions_for_drug(drug: str) -> list[dict]:
+    """
+    Get only major interactions for a specific drug.
+    Args:
+        drug: Drug name to check
+    Returns:
+        List of major severity interactions
+    """
+    query = """
+        SELECT
+            drug_1,
+            drug_2,
+            interaction_description
+        FROM drug_interaction_lookup
+        WHERE LOWER(drug_1) LIKE LOWER(?)
+          AND severity = 'major'
+        LIMIT 50
+    """
+    return execute_query(query, (f"%{drug}%",))
+def screen_antibiotic_safety(
+    antibiotic: str,
+    patient_medications: list[str],
+    patient_allergies: list[str] = None
+) -> dict:
+    """
+    Comprehensive safety screening for an antibiotic choice.
+    Args:
+        antibiotic: Proposed antibiotic
+        patient_medications: List of current medications
+        patient_allergies: List of known allergies (optional)
+    Returns:
+        Safety assessment with interactions and alerts
+    Used by: Agent 4 (Clinical Pharmacologist)
+    """
+    safety_report = {
+        "antibiotic": antibiotic,
+        "safe_to_use": True,
+        "alerts": [],
+        "interactions": [],
+        "allergy_warnings": []
+    }
+    # Check drug interactions
+    interactions = check_drug_interactions(antibiotic, patient_medications)
+    if interactions:
+        safety_report["interactions"] = interactions
+        # Check for major interactions
+        major = [i for i in interactions if i.get('severity') == 'major']
+        moderate = [i for i in interactions if i.get('severity') == 'moderate']
+        if major:
+            safety_report["safe_to_use"] = False
+            safety_report["alerts"].append({
+                "level": "CRITICAL",
+                "message": f"Found {len(major)} major drug interaction(s). Review required before prescribing."
+            })
+        if moderate:
+            safety_report["alerts"].append({
+                "level": "WARNING",
+                "message": f"Found {len(moderate)} moderate drug interaction(s). Consider dose adjustment or monitoring."
+            })
+    # Check allergies (basic check for cross-reactivity)
+    if patient_allergies:
+        antibiotic_lower = antibiotic.lower()
+        # Common antibiotic class cross-reactivity patterns
+        cross_reactivity = {
+            "penicillin": ["amoxicillin", "ampicillin", "piperacillin", "cephalosporin"],
+            "cephalosporin": ["ceftriaxone", "cefotaxime", "ceftazidime", "cefepime"],
+            "sulfa": ["sulfamethoxazole", "trimethoprim-sulfamethoxazole", "bactrim"],
+            "fluoroquinolone": ["ciprofloxacin", "levofloxacin", "moxifloxacin"],
+        }
+        for allergy in patient_allergies:
+            allergy_lower = allergy.lower()
+            # Direct match
+            if allergy_lower in antibiotic_lower:
+                safety_report["safe_to_use"] = False
+                safety_report["allergy_warnings"].append({
+                    "level": "CRITICAL",
+                    "message": f"Patient has documented allergy to {allergy}. CONTRAINDICATED."
+                })
+            # Cross-reactivity check
+            for allergen, related in cross_reactivity.items():
+                if allergen in allergy_lower:
+                    for related_drug in related:
+                        if related_drug in antibiotic_lower:
+                            safety_report["alerts"].append({
+                                "level": "WARNING",
+                                "message": f"Potential cross-reactivity: Patient allergic to {allergy}, {antibiotic} is in related class."
+                            })
+    # Summary
+    if safety_report["safe_to_use"]:
+        safety_report["summary"] = "No critical safety concerns identified."
+    else:
+        safety_report["summary"] = "SAFETY CONCERNS IDENTIFIED - Review required before prescribing."
+    return safety_report
+def get_interaction_statistics() -> dict:
+    """Get statistics about the drug interaction database."""
+    queries = {
+        "total": "SELECT COUNT(*) as count FROM drug_interactions",
+        "major": "SELECT COUNT(*) as count FROM drug_interactions WHERE severity = 'major'",
+        "moderate": "SELECT COUNT(*) as count FROM drug_interactions WHERE severity = 'moderate'",
+        "minor": "SELECT COUNT(*) as count FROM drug_interactions WHERE severity = 'minor'",
+    }
+    stats = {}
+    for key, query in queries.items():
+        result = execute_query(query)
+        stats[key] = result[0]['count'] if result else 0
+    return stats

uv.lock CHANGED Viewed

@@ -2041,6 +2041,7 @@ dependencies = [
     { name = "langchain-text-splitters" },
     { name = "langgraph" },
     { name = "openpyxl" },
     { name = "pillow" },
     { name = "pydantic" },
     { name = "pypdf" },
@@ -2065,6 +2066,7 @@ requires-dist = [
     { name = "langchain-text-splitters" },
     { name = "langgraph", specifier = ">=0.0.15" },
     { name = "openpyxl" },
     { name = "pillow" },
     { name = "pydantic", specifier = ">=2.0" },
     { name = "pypdf" },

     { name = "langchain-text-splitters" },
     { name = "langgraph" },
     { name = "openpyxl" },
+    { name = "pandas" },
     { name = "pillow" },
     { name = "pydantic" },
     { name = "pypdf" },
     { name = "langchain-text-splitters" },
     { name = "langgraph", specifier = ">=0.0.15" },
     { name = "openpyxl" },
+    { name = "pandas", specifier = ">=2.0.0" },
     { name = "pillow" },
     { name = "pydantic", specifier = ">=2.0" },
     { name = "pypdf" },