Spaces:

ghitaben
/

AMR-Guard

Running on Zero

App Files Files Community

AhmedBj commited on Feb 12

Commit

08a811f

1 Parent(s): e60ddd8

temporary committing need to be check

Browse files

Files changed (14) hide show

.python-version +1 -0
README.md +0 -0
main.py +6 -0
pyproject.toml +28 -0
scripts/ingest_data.py +40 -0
src/Med_I_C.egg-info/PKG-INFO +24 -0
src/Med_I_C.egg-info/SOURCES.txt +17 -0
src/Med_I_C.egg-info/dependency_links.txt +1 -0
src/Med_I_C.egg-info/requires.txt +18 -0
src/Med_I_C.egg-info/top_level.txt +9 -0
src/config.py +150 -0
src/loader.py +201 -0
src/state.py +125 -0
uv.lock +0 -0

.python-version ADDED Viewed

	@@ -0,0 +1 @@


1	+ 3.13

README.md ADDED Viewed

File without changes

main.py ADDED Viewed

	@@ -0,0 +1,6 @@

+def main():
+    print("Hello from med-i-c!")
+if __name__ == "__main__":
+    main()

pyproject.toml ADDED Viewed

	@@ -0,0 +1,28 @@

+[project]
+name = "Med-I-C"
+version = "0.1.0"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = [
+    "langgraph>=0.0.15",
+    "langchain>=0.3.0",
+    "langchain-text-splitters",
+    "langchain-google-vertexai",
+    "google-cloud-aiplatform",
+    "chromadb>=0.4.0",
+    "sentence-transformers",
+    "transformers>=4.50.0",
+    "torch",
+    "accelerate",
+    "bitsandbytes",
+    "streamlit",
+    "pillow",
+    "pydantic>=2.0",
+    "python-dotenv",
+    "openpyxl",
+    "requests",
+    "pypdf",
+    "langchain-community>=0.4.1",
+    "jq>=1.11.0",
+]

scripts/ingest_data.py CHANGED Viewed

	@@ -0,0 +1,40 @@

+import os
+import chromadb
+from chromadb.utils import embedding_functions
+from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader, JSONLoader
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+# 1. Setup Chroma Persistence
+CHROMA_PATH = "data/chroma_db"
+DATA_PATH = "data/Med-I-C/raw"
+def ingest_medical_data():
+    # Persistent client for the competition (Kaggle/Local)
+    client = chromadb.PersistentClient(path=CHROMA_PATH)
+    # Using the embedding model you specified
+    model_name = "all-MiniLM-L6-v2"
+    ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name=model_name)
+    # 2. Ingest Guidelines (PDFs)
+    # We create a specific collection for cleaner retrieval
+    guideline_col = client.get_or_create_collection(name="antibiotic_guidelines", embedding_function=ef)
+    loader = DirectoryLoader(f"{DATA_PATH}/guidelines", glob="*.pdf", loader_cls=PyPDFLoader)
+    documents = loader.load()
+    # 1000/100 split as discussed for clinical coherence
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
+    chunks = text_splitter.split_documents(documents)
+    # Adding to Chroma
+    guideline_col.add(
+        ids=[f"guideline_{i}" for i in range(len(chunks))],
+        documents=[c.page_content for c in chunks],
+        metadatas=[c.metadata for c in chunks]
+    )
+    print(f"Successfully ingested {len(chunks)} guideline chunks.")
+if __name__ == "__main__":
+    ingest_medical_data()

src/Med_I_C.egg-info/PKG-INFO ADDED Viewed

	@@ -0,0 +1,24 @@

+Metadata-Version: 2.4
+Name: Med-I-C
+Version: 0.1.0
+Summary: Add your description here
+Requires-Python: >=3.10
+Description-Content-Type: text/markdown
+Requires-Dist: langgraph>=0.0.15
+Requires-Dist: langchain>=0.3.0
+Requires-Dist: langchain-text-splitters
+Requires-Dist: langchain-google-vertexai
+Requires-Dist: google-cloud-aiplatform
+Requires-Dist: chromadb>=0.4.0
+Requires-Dist: sentence-transformers
+Requires-Dist: transformers>=4.50.0
+Requires-Dist: torch
+Requires-Dist: accelerate
+Requires-Dist: bitsandbytes
+Requires-Dist: streamlit
+Requires-Dist: pillow
+Requires-Dist: pydantic>=2.0
+Requires-Dist: python-dotenv
+Requires-Dist: openpyxl
+Requires-Dist: requests
+Requires-Dist: pypdf

src/Med_I_C.egg-info/SOURCES.txt ADDED Viewed

	@@ -0,0 +1,17 @@

+README.md
+pyproject.toml
+src/__init__.py
+src/agents.py
+src/config.py
+src/graph.py
+src/loader.py
+src/prompts.py
+src/rag.py
+src/state.py
+src/utils.py
+src/Med_I_C.egg-info/PKG-INFO
+src/Med_I_C.egg-info/SOURCES.txt
+src/Med_I_C.egg-info/dependency_links.txt
+src/Med_I_C.egg-info/requires.txt
+src/Med_I_C.egg-info/top_level.txt
+tests/test_pipeline.py

src/Med_I_C.egg-info/dependency_links.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+

src/Med_I_C.egg-info/requires.txt ADDED Viewed

	@@ -0,0 +1,18 @@

+langgraph>=0.0.15
+langchain>=0.3.0
+langchain-text-splitters
+langchain-google-vertexai
+google-cloud-aiplatform
+chromadb>=0.4.0
+sentence-transformers
+transformers>=4.50.0
+torch
+accelerate
+bitsandbytes
+streamlit
+pillow
+pydantic>=2.0
+python-dotenv
+openpyxl
+requests
+pypdf

src/Med_I_C.egg-info/top_level.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+__init__
+agents
+config
+graph
+loader
+prompts
+rag
+state
+utils

src/config.py CHANGED Viewed

	@@ -0,0 +1,150 @@

+from __future__ import annotations
+import os
+from functools import lru_cache
+from pathlib import Path
+from typing import Literal, Optional
+from dotenv import load_dotenv
+from pydantic import BaseModel, Field
+# Load variables from a local .env if present (handy for local dev)
+load_dotenv()
+class Settings(BaseModel):
+    """
+    Central configuration object for Med-I-C.
+    Values are read from environment variables where possible so that
+    the same code can run locally, on Kaggle, and in production.
+    """
+    # ------------------------------------------------------------------
+    # General environment
+    # ------------------------------------------------------------------
+    environment: Literal["local", "kaggle", "production"] = Field(
+        default_factory=lambda: os.getenv("MEDIC_ENV", "local")
+    )
+    project_root: Path = Field(
+        default_factory=lambda: Path(__file__).resolve().parents[1]
+    )
+    data_dir: Path = Field(
+        default_factory=lambda: Path(
+            os.getenv("MEDIC_DATA_DIR", "data")
+        )
+    )
+    chroma_db_dir: Path = Field(
+        default_factory=lambda: Path(
+            os.getenv("MEDIC_CHROMA_DB_DIR", "data/chroma_db")
+        )
+    )
+    # ------------------------------------------------------------------
+    # Model + deployment preferences
+    # ------------------------------------------------------------------
+    default_backend: Literal["vertex", "local"] = Field(
+        default_factory=lambda: os.getenv("MEDIC_DEFAULT_BACKEND", "vertex")  # type: ignore[arg-type]
+    )
+    # Quantization mode for local models
+    quantization: Literal["none", "4bit"] = Field(
+        default_factory=lambda: os.getenv("MEDIC_QUANTIZATION", "4bit")  # type: ignore[arg-type]
+    )
+    # Embedding model used for ChromaDB / RAG
+    embedding_model_name: str = Field(
+        default_factory=lambda: os.getenv(
+            "MEDIC_EMBEDDING_MODEL",
+            "sentence-transformers/all-MiniLM-L6-v2",
+        )
+    )
+    # ------------------------------------------------------------------
+    # Vertex AI configuration (MedGemma / TxGemma hosted on Vertex)
+    # ------------------------------------------------------------------
+    use_vertex: bool = Field(
+        default_factory=lambda: os.getenv("MEDIC_USE_VERTEX", "true").lower()
+        in {"1", "true", "yes"}
+    )
+    vertex_project_id: Optional[str] = Field(
+        default_factory=lambda: os.getenv("MEDIC_VERTEX_PROJECT_ID")
+    )
+    vertex_location: str = Field(
+        default_factory=lambda: os.getenv("MEDIC_VERTEX_LOCATION", "us-central1")
+    )
+    # Model IDs as expected by Vertex / langchain-google-vertexai
+    vertex_medgemma_4b_model: str = Field(
+        default_factory=lambda: os.getenv(
+            "MEDIC_VERTEX_MEDGEMMA_4B_MODEL",
+            "med-gemma-4b-it",
+        )
+    )
+    vertex_medgemma_27b_model: str = Field(
+        default_factory=lambda: os.getenv(
+            "MEDIC_VERTEX_MEDGEMMA_27B_MODEL",
+            "med-gemma-27b-text-it",
+        )
+    )
+    vertex_txgemma_9b_model: str = Field(
+        default_factory=lambda: os.getenv(
+            "MEDIC_VERTEX_TXGEMMA_9B_MODEL",
+            "tx-gemma-9b",
+        )
+    )
+    vertex_txgemma_2b_model: str = Field(
+        default_factory=lambda: os.getenv(
+            "MEDIC_VERTEX_TXGEMMA_2B_MODEL",
+            "tx-gemma-2b",
+        )
+    )
+    # Standard GOOGLE_APPLICATION_CREDENTIALS path, if needed
+    google_application_credentials: Optional[Path] = Field(
+        default_factory=lambda: (
+            Path(os.environ["GOOGLE_APPLICATION_CREDENTIALS"])
+            if "GOOGLE_APPLICATION_CREDENTIALS" in os.environ
+            else None
+        )
+    )
+    # ------------------------------------------------------------------
+    # Local model paths (for offline / Kaggle GPU usage)
+    # ------------------------------------------------------------------
+    local_medgemma_4b_model: Optional[str] = Field(
+        default_factory=lambda: os.getenv("MEDIC_LOCAL_MEDGEMMA_4B_MODEL")
+    )
+    local_medgemma_27b_model: Optional[str] = Field(
+        default_factory=lambda: os.getenv("MEDIC_LOCAL_MEDGEMMA_27B_MODEL")
+    )
+    local_txgemma_9b_model: Optional[str] = Field(
+        default_factory=lambda: os.getenv("MEDIC_LOCAL_TXGEMMA_9B_MODEL")
+    )
+    local_txgemma_2b_model: Optional[str] = Field(
+        default_factory=lambda: os.getenv("MEDIC_LOCAL_TXGEMMA_2B_MODEL")
+    )
+@lru_cache(maxsize=1)
+def get_settings() -> Settings:
+    """
+    Return a cached Settings instance.
+    Use this helper everywhere instead of instantiating Settings directly:
+        from src.config import get_settings
+        settings = get_settings()
+    """
+    return Settings()
+__all__ = ["Settings", "get_settings"]

src/loader.py CHANGED Viewed

	@@ -0,0 +1,201 @@

+from __future__ import annotations
+import logging
+from functools import lru_cache
+from typing import Any, Callable, Dict, Literal, Optional, Tuple
+from .config import get_settings
+logger = logging.getLogger(__name__)
+TextBackend = Literal["vertex", "local"]
+TextModelName = Literal["medgemma_4b", "medgemma_27b", "txgemma_9b", "txgemma_2b"]
+def _resolve_backend(
+    requested: Optional[TextBackend],
+) -> TextBackend:
+    settings = get_settings()
+    backend = requested or settings.default_backend  # type: ignore[assignment]
+    if backend == "vertex" and not settings.use_vertex:
+        logger.info("Vertex disabled in settings, falling back to local backend.")
+        return "local"
+    return backend
+@lru_cache(maxsize=8)
+def _get_vertex_chat_model(model_name: TextModelName):
+    """
+    Lazily construct a Vertex AI chat model via langchain-google-vertexai.
+    Returns an object with an .invoke(str) method; we wrap this in a simple
+    callable for downstream use.
+    """
+    try:
+        from langchain_google_vertexai import ChatVertexAI
+    except Exception as exc:  # pragma: no cover - import-time failure
+        raise RuntimeError(
+            "langchain-google-vertexai is not available; "
+            "install it or switch MEDIC_DEFAULT_BACKEND=local."
+        ) from exc
+    settings = get_settings()
+    if settings.vertex_project_id is None:
+        raise RuntimeError(
+            "MEDIC_VERTEX_PROJECT_ID is not set. "
+            "Set it in your environment or .env to use Vertex AI."
+        )
+    model_id_map: Dict[TextModelName, str] = {
+        "medgemma_4b": settings.vertex_medgemma_4b_model,
+        "medgemma_27b": settings.vertex_medgemma_27b_model,
+        "txgemma_9b": settings.vertex_txgemma_9b_model,
+        "txgemma_2b": settings.vertex_txgemma_2b_model,
+    }
+    model_id = model_id_map[model_name]
+    llm = ChatVertexAI(
+        model=model_id,
+        project=settings.vertex_project_id,
+        location=settings.vertex_location,
+        temperature=0.2,
+    )
+    def _call(prompt: str, **kwargs: Any) -> str:
+        """Thin wrapper returning plain text from ChatVertexAI."""
+        result = llm.invoke(prompt, **kwargs)
+        # langchain BaseMessage or plain string
+        content = getattr(result, "content", result)
+        return str(content)
+    return _call
+@lru_cache(maxsize=8)
+def _get_local_causal_lm(model_name: TextModelName):
+    """
+    Lazily load a local transformers model for offline / Kaggle usage.
+    Assumes model paths are provided via MEDIC_LOCAL_* env vars and that
+    the appropriate model weights are available in the environment.
+    """
+    from transformers import AutoModelForCausalLM, AutoTokenizer
+    import torch
+    settings = get_settings()
+    model_path_map: Dict[TextModelName, Optional[str]] = {
+        "medgemma_4b": settings.local_medgemma_4b_model,
+        "medgemma_27b": settings.local_medgemma_27b_model,
+        "txgemma_9b": settings.local_txgemma_9b_model,
+        "txgemma_2b": settings.local_txgemma_2b_model,
+    }
+    model_path = model_path_map[model_name]
+    if not model_path:
+        raise RuntimeError(
+            f"No local model path configured for {model_name}. "
+            f"Set MEDIC_LOCAL_*_MODEL or use the Vertex backend."
+        )
+    load_kwargs: Dict[str, Any] = {
+        "device_map": "auto",
+    }
+    # Optional 4-bit quantization via bitsandbytes
+    if get_settings().quantization == "4bit":
+        load_kwargs["load_in_4bit"] = True
+    tokenizer = AutoTokenizer.from_pretrained(model_path)
+    model = AutoModelForCausalLM.from_pretrained(model_path, **load_kwargs)
+    def _call(
+        prompt: str,
+        max_new_tokens: int = 512,
+        temperature: float = 0.2,
+        **generate_kwargs: Any,
+    ) -> str:
+        inputs = tokenizer(prompt, return_tensors="pt")
+        inputs = {k: v.to(model.device) for k, v in inputs.items()}
+        do_sample = temperature > 0
+        with torch.no_grad():
+            output_ids = model.generate(
+                **inputs,
+                do_sample=do_sample,
+                temperature=temperature if do_sample else 0.0,
+                max_new_tokens=max_new_tokens,
+                **generate_kwargs,
+            )
+        # Drop the prompt tokens and decode only the completion
+        generated_ids = output_ids[0, inputs["input_ids"].shape[1] :]
+        text = tokenizer.decode(generated_ids, skip_special_tokens=True)
+        return text.strip()
+    return _call
+@lru_cache(maxsize=32)
+def get_text_model(
+    model_name: TextModelName = "medgemma_4b",
+    backend: Optional[TextBackend] = None,
+) -> Callable[..., str]:
+    """
+    Return a cached text-generation callable.
+    Example:
+        from src.loader import get_text_model
+        model = get_text_model("medgemma_4b")
+        answer = model("Explain ESBL in simple terms.")
+    """
+    resolved_backend = _resolve_backend(backend)
+    if resolved_backend == "vertex":
+        return _get_vertex_chat_model(model_name)
+    else:
+        return _get_local_causal_lm(model_name)
+def run_inference(
+    prompt: str,
+    model_name: TextModelName = "medgemma_4b",
+    backend: Optional[TextBackend] = None,
+    max_new_tokens: int = 512,
+    temperature: float = 0.2,
+    **kwargs: Any,
+) -> str:
+    """
+    Convenience wrapper around `get_text_model`.
+    This is the simplest entry point to use inside agents:
+        from src.loader import run_inference
+        text = run_inference(prompt, model_name="medgemma_4b")
+    """
+    model = get_text_model(model_name=model_name, backend=backend)
+    return model(
+        prompt,
+        max_new_tokens=max_new_tokens,
+        temperature=temperature,
+        **kwargs,
+    )
+__all__ = [
+    "TextBackend",
+    "TextModelName",
+    "get_text_model",
+    "run_inference",
+]

src/state.py CHANGED Viewed

	@@ -0,0 +1,125 @@

+from __future__ import annotations
+from typing import Dict, List, Literal, NotRequired, Optional, TypedDict
+class LabResult(TypedDict, total=False):
+    """Structured representation of a single lab value."""
+    name: str
+    value: str
+    unit: NotRequired[Optional[str]]
+    reference_range: NotRequired[Optional[str]]
+    flag: NotRequired[Optional[Literal["low", "normal", "high", "critical"]]]
+class MICDatum(TypedDict, total=False):
+    """Single MIC measurement for a bug–drug pair."""
+    organism: str
+    antibiotic: str
+    mic_value: str
+    mic_unit: NotRequired[Optional[str]]
+    interpretation: NotRequired[Optional[Literal["S", "I", "R"]]]
+    breakpoint_source: NotRequired[Optional[str]]  # e.g. EUCAST v16.0
+    year: NotRequired[Optional[int]]
+    site: NotRequired[Optional[str]]  # e.g. blood, urine
+class Recommendation(TypedDict, total=False):
+    """Final clinical recommendation assembled by Agent 4."""
+    primary_antibiotic: Optional[str]
+    backup_antibiotic: NotRequired[Optional[str]]
+    dose: Optional[str]
+    route: Optional[str]
+    frequency: Optional[str]
+    duration: Optional[str]
+    rationale: Optional[str]
+    references: NotRequired[List[str]]
+    safety_alerts: NotRequired[List[str]]
+class InfectionState(TypedDict, total=False):
+    """
+    Global LangGraph state for the Med-I-C pipeline.
+    All agents read from and write back to this object.
+    Most keys are optional to keep the schema flexible across stages.
+    """
+    # ------------------------------------------------------------------
+    # Patient identity & demographics
+    # ------------------------------------------------------------------
+    patient_id: NotRequired[Optional[str]]
+    age_years: NotRequired[Optional[float]]
+    sex: NotRequired[Optional[Literal["male", "female", "other", "unknown"]]]
+    weight_kg: NotRequired[Optional[float]]
+    height_cm: NotRequired[Optional[float]]
+    # ------------------------------------------------------------------
+    # Clinical context
+    # ------------------------------------------------------------------
+    suspected_source: NotRequired[Optional[str]]  # e.g. "community UTI"
+    comorbidities: NotRequired[List[str]]
+    medications: NotRequired[List[str]]
+    allergies: NotRequired[List[str]]
+    infection_site: NotRequired[Optional[str]]
+    country_or_region: NotRequired[Optional[str]]
+    # ------------------------------------------------------------------
+    # Renal function / vitals
+    # ------------------------------------------------------------------
+    serum_creatinine_mg_dl: NotRequired[Optional[float]]
+    creatinine_clearance_ml_min: NotRequired[Optional[float]]
+    vitals: NotRequired[Dict[str, str]]  # flexible key/value, e.g. {"BP": "120/80"}
+    # ------------------------------------------------------------------
+    # Lab data & MICs
+    # ------------------------------------------------------------------
+    labs_raw_text: NotRequired[Optional[str]]  # raw OCR / PDF text
+    labs_parsed: NotRequired[List[LabResult]]
+    mic_data: NotRequired[List[MICDatum]]
+    mic_trend_summary: NotRequired[Optional[str]]
+    # ------------------------------------------------------------------
+    # Stage / routing metadata
+    # ------------------------------------------------------------------
+    stage: NotRequired[Literal["empirical", "targeted"]]
+    route_to_vision: NotRequired[bool]
+    route_to_trend_analyst: NotRequired[bool]
+    # ------------------------------------------------------------------
+    # Agent outputs
+    # ------------------------------------------------------------------
+    intake_notes: NotRequired[Optional[str]]  # Agent 1
+    vision_notes: NotRequired[Optional[str]]  # Agent 2
+    trend_notes: NotRequired[Optional[str]]  # Agent 3
+    pharmacology_notes: NotRequired[Optional[str]]  # Agent 4
+    recommendation: NotRequired[Optional[Recommendation]]
+    # ------------------------------------------------------------------
+    # RAG / context + safety
+    # ------------------------------------------------------------------
+    rag_context: NotRequired[Optional[str]]
+    guideline_sources: NotRequired[List[str]]
+    breakpoint_sources: NotRequired[List[str]]
+    safety_warnings: NotRequired[List[str]]
+    # ------------------------------------------------------------------
+    # Diagnostics / debugging
+    # ------------------------------------------------------------------
+    errors: NotRequired[List[str]]
+    debug_log: NotRequired[List[str]]
+__all__ = [
+    "LabResult",
+    "MICDatum",
+    "Recommendation",
+    "InfectionState",
+]

uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff