File size: 2,723 Bytes
e33977d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
"""Central configuration loaded from environment variables.

All other modules import `settings` from here. Never call os.environ directly
in agent code — keeps the surface small and testable.
"""
from __future__ import annotations

import os
from dataclasses import dataclass
from pathlib import Path

from dotenv import load_dotenv

load_dotenv()


@dataclass(frozen=True)
class Settings:
    # LLM provider switch
    llm_provider: str   # 'openai' or 'gemini'

    # OpenAI
    openai_api_key: str
    openai_reasoning_model: str
    openai_bulk_model: str

    # Gemini
    gemini_api_key: str
    gemini_reasoning_model: str
    gemini_bulk_model: str

    # Convenience aliases (backward compat with older code that uses these)
    reasoning_model: str
    bulk_model: str

    # Embeddings
    sbert_model: str

    # Paths
    data_dir: Path
    processed_dir: Path
    chroma_persist_dir: Path

    # Service ports
    task_a_api_port: int
    task_a_ui_port: int
    task_b_api_port: int
    task_b_ui_port: int


def _build() -> Settings:
    data_dir = Path(os.environ.get("DATA_DIR", "./data")).resolve()
    provider = os.environ.get("LLM_PROVIDER", "openai").lower()

    openai_reasoning = os.environ.get("OPENAI_REASONING_MODEL", "gpt-4o")
    openai_bulk = os.environ.get("OPENAI_BULK_MODEL", "gpt-4o-mini")
    gemini_reasoning = os.environ.get("GEMINI_REASONING_MODEL", "gemini-2.5-flash")
    gemini_bulk = os.environ.get("GEMINI_BULK_MODEL", "gemini-2.5-flash-lite")

    # Convenience aliases point to the active provider's models
    if provider == "gemini":
        active_reasoning, active_bulk = gemini_reasoning, gemini_bulk
    else:
        active_reasoning, active_bulk = openai_reasoning, openai_bulk

    return Settings(
        llm_provider=provider,
        openai_api_key=os.environ.get("OPENAI_API_KEY", ""),
        openai_reasoning_model=openai_reasoning,
        openai_bulk_model=openai_bulk,
        gemini_api_key=os.environ.get("GEMINI_API_KEY", ""),
        gemini_reasoning_model=gemini_reasoning,
        gemini_bulk_model=gemini_bulk,
        reasoning_model=active_reasoning,
        bulk_model=active_bulk,
        sbert_model=os.environ.get("SBERT_MODEL", "sentence-transformers/all-MiniLM-L6-v2"),
        data_dir=data_dir,
        processed_dir=data_dir / "processed",
        chroma_persist_dir=Path(os.environ.get("CHROMA_PERSIST_DIR", data_dir / "chroma")),
        task_a_api_port=int(os.environ.get("TASK_A_API_PORT", 8001)),
        task_a_ui_port=int(os.environ.get("TASK_A_UI_PORT", 8501)),
        task_b_api_port=int(os.environ.get("TASK_B_API_PORT", 8002)),
        task_b_ui_port=int(os.environ.get("TASK_B_UI_PORT", 8502)),
    )


settings = _build()