Spaces:
Running
Running
File size: 5,697 Bytes
1e732dd 696f787 1e732dd 9659593 1e732dd 9659593 1e732dd 9659593 1e732dd | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 | """
MediGuard AI β Pydantic Settings (hierarchical, env-driven)
All runtime configuration lives here. Values are read from environment
variables (with ``env_nested_delimiter="__"``), so ``OPENSEARCH__HOST``
maps to ``settings.opensearch.host``.
Usage::
from src.settings import get_settings
settings = get_settings()
print(settings.opensearch.host)
"""
from __future__ import annotations
from functools import lru_cache
from typing import Literal
from pydantic import Field
from pydantic_settings import BaseSettings
# ββ Helpers ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
class _Base(BaseSettings):
"""Shared Settings base with nested-env support."""
model_config = {
"env_nested_delimiter": "__",
"frozen": True,
"extra": "ignore",
}
# ββ Sub-settings βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
class APISettings(_Base):
host: str = "0.0.0.0"
port: int = 8000
reload: bool = False
workers: int = 4
cors_origins: str = "*"
log_level: str = "INFO"
model_config = {"env_prefix": "API__"}
class PostgresSettings(_Base):
database_url: str = "postgresql+psycopg2://mediguard:mediguard@localhost:5432/mediguard_db"
model_config = {"env_prefix": "POSTGRES__"}
class OpenSearchSettings(_Base):
host: str = "http://localhost:9200"
index_name: str = "medical_chunks"
username: str = ""
password: str = ""
verify_certs: bool = False
timeout: int = 30
model_config = {"env_prefix": "OPENSEARCH__"}
class RedisSettings(_Base):
host: str = "localhost"
port: int = 6379
db: int = 0
ttl_seconds: int = 21600 # 6 hours default
enabled: bool = True
model_config = {"env_prefix": "REDIS__"}
class OllamaSettings(_Base):
host: str = "http://localhost:11434"
model: str = "llama3.1:8b"
embedding_model: str = "nomic-embed-text"
timeout: int = 120
num_ctx: int = 8192
model_config = {"env_prefix": "OLLAMA__"}
class LLMSettings(_Base):
provider: Literal["groq", "gemini", "ollama"] = "groq"
temperature: float = 0.0
groq_api_key: str = ""
groq_model: str = "llama-3.3-70b-versatile"
google_api_key: str = ""
gemini_model: str = "gemini-2.0-flash"
model_config = {"env_prefix": "LLM__"}
class EmbeddingSettings(_Base):
provider: Literal["jina", "google", "huggingface", "ollama"] = "google"
jina_api_key: str = ""
jina_model: str = "jina-embeddings-v3"
dimension: int = 1024
google_api_key: str = ""
huggingface_model: str = "sentence-transformers/all-MiniLM-L6-v2"
batch_size: int = 64
model_config = {"env_prefix": "EMBEDDING__"}
class ChunkingSettings(_Base):
chunk_size: int = 600 # words
chunk_overlap: int = 100 # words
min_chunk_size: int = 50
section_aware: bool = True
model_config = {"env_prefix": "CHUNKING__"}
class LangfuseSettings(_Base):
enabled: bool = False
public_key: str = ""
secret_key: str = ""
host: str = "http://localhost:3001"
model_config = {"env_prefix": "LANGFUSE__"}
class TelegramSettings(_Base):
enabled: bool = False
bot_token: str = ""
allowed_users: str = "" # comma-separated user IDs
model_config = {"env_prefix": "TELEGRAM__"}
class BiomarkerSettings(_Base):
reference_file: str = "config/biomarker_references.json"
analyzer_threshold: float = 0.15
critical_alert_mode: Literal["strict", "moderate", "permissive"] = "strict"
model_config = {"env_prefix": "BIOMARKER__"}
class MedicalPDFSettings(_Base):
pdf_directory: str = "data/medical_pdfs"
vector_store_path: str = "data/vector_stores"
max_file_size_mb: int = 50
max_pages: int = 500
model_config = {"env_prefix": "PDF__"}
# ββ Root settings ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
class Settings(_Base):
"""Root configuration β aggregates all sub-settings."""
app_name: str = "MediGuard AI"
app_version: str = "2.0.0"
environment: Literal["development", "staging", "production"] = "development"
debug: bool = False
# Sub-settings (populated from env with nesting)
api: APISettings = Field(default_factory=APISettings)
postgres: PostgresSettings = Field(default_factory=PostgresSettings)
opensearch: OpenSearchSettings = Field(default_factory=OpenSearchSettings)
redis: RedisSettings = Field(default_factory=RedisSettings)
ollama: OllamaSettings = Field(default_factory=OllamaSettings)
llm: LLMSettings = Field(default_factory=LLMSettings)
embedding: EmbeddingSettings = Field(default_factory=EmbeddingSettings)
chunking: ChunkingSettings = Field(default_factory=ChunkingSettings)
langfuse: LangfuseSettings = Field(default_factory=LangfuseSettings)
telegram: TelegramSettings = Field(default_factory=TelegramSettings)
biomarker: BiomarkerSettings = Field(default_factory=BiomarkerSettings)
pdf: MedicalPDFSettings = Field(default_factory=MedicalPDFSettings)
model_config = {
"env_nested_delimiter": "__",
"frozen": True,
"extra": "ignore",
}
@lru_cache(maxsize=1)
def get_settings() -> Settings:
"""Cached factory β returns a single frozen ``Settings`` instance."""
return Settings()
|