File size: 1,576 Bytes
88bdcff
 
 
 
 
 
 
 
 
 
 
 
f3ebc82
 
 
88bdcff
78caafb
 
88bdcff
 
14c59e5
 
706520f
 
 
 
14c59e5
 
88bdcff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
"""Application settings with environment variable support."""

from typing import Literal
from pydantic_settings import BaseSettings, SettingsConfigDict


class Settings(BaseSettings):
    """FDAM AI Pipeline configuration."""

    # Environment
    environment: Literal["development", "production"] = "development"

    # Logging - set LOG_LEVEL=DEBUG for detailed troubleshooting on HF Spaces
    log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] = "INFO"

    # Model loading - set MOCK_MODELS=true for local dev on RTX 4090
    # Default is False for production (HuggingFace Spaces)
    mock_models: bool = False

    # Model paths (for production on HuggingFace Spaces)
    # 4B dense model - fits single GPU, no tensor parallelism needed
    vision_model: str = "Qwen/Qwen3-VL-4B-Thinking"
    embedding_model: str = "Qwen/Qwen3-VL-Embedding-2B"
    reranker_model: str = "Qwen/Qwen3-VL-Reranker-2B"

    # vLLM configuration
    vllm_tensor_parallel_size: int = 1  # Single GPU - 4B model fits on one L4
    vllm_max_model_len: int = 16384  # 4B supports up to 256K, 16K is sufficient

    # ChromaDB
    chroma_persist_dir: str = "./chroma_db"

    # Knowledge base
    knowledge_base_dir: str = "./RAG-KB"

    # Gradio server (0.0.0.0 required for WSL)
    server_host: str = "0.0.0.0"
    server_port: int = 7860

    # Assessment limits
    max_images_per_assessment: int = 20

    model_config = SettingsConfigDict(
        env_file=".env",
        env_prefix="",
        case_sensitive=False,
    )


# Singleton instance
settings = Settings()