File size: 6,504 Bytes
069f0a0
 
 
 
 
 
 
 
 
fd1472e
069f0a0
 
 
 
 
 
 
 
 
 
 
 
 
fd1472e
627c291
fd1472e
069f0a0
 
 
cd7c282
 
069f0a0
 
622c8ba
9e9bc6b
 
 
b2929fc
ea28d9c
 
 
b2929fc
ea28d9c
b2929fc
 
3f60cec
b2929fc
069f0a0
20c7bad
 
 
 
 
 
 
 
 
 
 
069f0a0
 
 
 
 
 
 
599a754
 
 
 
 
 
 
 
 
 
 
 
069f0a0
cb46aac
 
599a754
cb46aac
069f0a0
 
 
 
9e9bc6b
3aa91e9
 
 
7cc8b69
 
 
9e9bc6b
7cc8b69
3aa91e9
069f0a0
 
cd7c282
 
 
 
069f0a0
 
 
 
cd7c282
069f0a0
 
 
 
 
 
9d21bf8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cd7c282
 
 
 
 
b2929fc
 
 
 
 
9d21bf8
 
 
cd7c282
 
 
 
 
 
9d21bf8
069f0a0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
"""Application configuration using Pydantic Settings."""

import logging
from typing import Literal

import structlog
from pydantic import Field
from pydantic_settings import BaseSettings, SettingsConfigDict

from src.config.domain import ResearchDomain
from src.utils.exceptions import ConfigurationError


class Settings(BaseSettings):
    """Strongly-typed application settings."""

    model_config = SettingsConfigDict(
        env_file=".env",
        env_file_encoding="utf-8",
        case_sensitive=False,
        extra="ignore",
    )

    # Domain configuration
    research_domain: ResearchDomain = ResearchDomain.SEXUAL_HEALTH

    # LLM Configuration
    openai_api_key: str | None = Field(default=None, description="OpenAI API key")
    anthropic_api_key: str | None = Field(default=None, description="Anthropic API key")
    gemini_api_key: str | None = Field(default=None, description="Google Gemini API key")
    llm_provider: Literal["openai", "anthropic", "huggingface", "gemini"] = Field(
        default="openai", description="Which LLM provider to use"
    )
    openai_model: str = Field(default="gpt-5", description="OpenAI model name")
    anthropic_model: str = Field(
        default="claude-sonnet-4-5-20250929", description="Anthropic model"
    )
    # HuggingFace (free tier)
    # NOTE: Large models (70B+) are routed to third-party providers (Novita, Hyperbolic) which are
    # unreliable (500/401 errors). We use Qwen2.5-7B-Instruct as it is small enough to run on
    # Hugging Face's native serverless infrastructure.
    huggingface_model: str | None = Field(
        default="Qwen/Qwen2.5-7B-Instruct", description="HuggingFace model name"
    )
    hf_token: str | None = Field(
        default=None, alias="HF_TOKEN", description="HuggingFace API token"
    )

    # Embedding Configuration
    # Note: OpenAI embeddings require OPENAI_API_KEY (Anthropic has no embeddings API)
    openai_embedding_model: str = Field(
        default="text-embedding-3-small",
        description="OpenAI embedding model (used by LlamaIndex RAG)",
    )
    local_embedding_model: str = Field(
        default="all-MiniLM-L6-v2",
        description="Local sentence-transformers model (used by EmbeddingService)",
    )

    # PubMed Configuration
    ncbi_api_key: str | None = Field(
        default=None, description="NCBI API key for higher rate limits"
    )

    # Agent Configuration
    max_iterations: int = Field(default=10, ge=1, le=50)
    advanced_max_rounds: int = Field(
        default=5,
        ge=1,
        le=20,
        description="Max coordination rounds for Advanced mode (default 5 for faster demos)",
    )
    advanced_timeout: float = Field(
        default=300.0,
        ge=60.0,
        le=900.0,
        description="Timeout for Advanced mode in seconds (default 5 min)",
    )
    search_timeout: int = Field(default=30, description="Seconds to wait for search")
    magentic_timeout: int = Field(
        default=600,
        description="Timeout for Magentic mode in seconds (deprecated, use advanced_timeout)",
    )

    # Logging
    log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR"] = "INFO"

    # External Services
    modal_token_id: str | None = Field(default=None, description="Modal token ID")
    modal_token_secret: str | None = Field(default=None, description="Modal token secret")
    chroma_db_path: str = Field(default="./chroma_db", description="ChromaDB storage path")

    @property
    def modal_available(self) -> bool:
        """Check if Modal credentials are configured."""
        return bool(self.modal_token_id and self.modal_token_secret)

    def get_api_key(self) -> str:
        """Get the API key for the configured provider."""
        # Normalize provider for case-insensitive matching
        provider_lower = self.llm_provider.lower() if self.llm_provider else ""

        if provider_lower == "openai":
            if not self.openai_api_key:
                raise ConfigurationError("OPENAI_API_KEY not set")
            return self.openai_api_key

        if provider_lower == "anthropic":
            if not self.anthropic_api_key:
                raise ConfigurationError("ANTHROPIC_API_KEY not set")
            return self.anthropic_api_key

        raise ConfigurationError(f"Unknown LLM provider: {self.llm_provider}")

    def get_openai_api_key(self) -> str:
        """Get OpenAI API key (required for Magentic function calling)."""
        if not self.openai_api_key:
            raise ConfigurationError(
                "OPENAI_API_KEY not set. Magentic mode requires OpenAI for function calling. "
                "Use mode='simple' for other providers."
            )
        return self.openai_api_key

    @property
    def has_openai_key(self) -> bool:
        """Check if OpenAI API key is available."""
        return bool(self.openai_api_key)

    @property
    def has_anthropic_key(self) -> bool:
        """Check if Anthropic API key is available."""
        return bool(self.anthropic_api_key)

    @property
    def has_gemini_key(self) -> bool:
        """Check if Gemini API key is available."""
        return bool(self.gemini_api_key)

    @property
    def has_huggingface_key(self) -> bool:
        """Check if HuggingFace token is available."""
        return bool(self.hf_token)

    @property
    def has_any_llm_key(self) -> bool:
        """Check if any LLM API key is available."""
        return (
            self.has_openai_key
            or self.has_anthropic_key
            or self.has_huggingface_key
            or self.has_gemini_key
        )


def get_settings() -> Settings:
    """Factory function to get settings (allows mocking in tests)."""
    return Settings()


def configure_logging(settings: Settings) -> None:
    """Configure structured logging with the configured log level."""
    # Set stdlib logging level from settings
    logging.basicConfig(
        level=getattr(logging, settings.log_level),
        format="%(message)s",
    )

    structlog.configure(
        processors=[
            structlog.stdlib.filter_by_level,
            structlog.stdlib.add_logger_name,
            structlog.stdlib.add_log_level,
            structlog.processors.TimeStamper(fmt="iso"),
            structlog.processors.JSONRenderer(),
        ],
        wrapper_class=structlog.stdlib.BoundLogger,
        context_class=dict,
        logger_factory=structlog.stdlib.LoggerFactory(),
    )


# Singleton for easy import
settings = get_settings()