File size: 1,305 Bytes
b7f3196
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import os
from pathlib import Path
from typing import Dict, List
import torch
from pydantic_settings import BaseSettings

class Settings(BaseSettings):
    # Model Configuration
    MODEL_NAME: str = "sentence-transformers/embeddinggemma-300m-medical"
    CLASSIFIER_NAME: str = "davidgray/health-query-triage"
    CATEGORIES: List[str] = ["medical", "insurance"]

    # Paths
    CHECKPOINT_PATH: str = "classifier/checkpoints"
    CACHE_DIR: str = ".cache/embeddings"

    # Device
    DEVICE: str = "cuda" if torch.cuda.is_available() else ("mps" if torch.backends.mps.is_available() else "cpu")

    # Corpora Configuration
    CORPORA_CONFIG: Dict[str, dict] = {
        "medical_qa": {"path": "data/corpora/medical_qa.jsonl",
                       "text_fields": ["question", "answer", "title"]},
        "miriad":     {"path": "data/corpora/miriad_text.jsonl",
                       "text_fields": ["question", "answer", "title"]},
        "pubmed":     {"path": "data/corpora/pubmed.json",
                       "text_fields": ["contents","title"]},
        "unidoc":     {"path": "data/corpora/unidoc_qa.jsonl",
                       "text_fields": ["question", "answer", "title"]},
    }

    class Config:
        env_file = ".env"

settings = Settings()