Spaces:
Paused
Paused
| """Application settings with environment variable support.""" | |
| from typing import Literal | |
| from pydantic_settings import BaseSettings, SettingsConfigDict | |
| class Settings(BaseSettings): | |
| """FDAM AI Pipeline configuration.""" | |
| # Environment | |
| environment: Literal["development", "production"] = "development" | |
| # Logging - set LOG_LEVEL=DEBUG for detailed troubleshooting on HF Spaces | |
| log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] = "INFO" | |
| # Model loading - set MOCK_MODELS=true for local dev on RTX 4090 | |
| # Default is False for production (HuggingFace Spaces) | |
| mock_models: bool = False | |
| # Model paths (for production on HuggingFace Spaces) | |
| # 4B dense model - fits single GPU, no tensor parallelism needed | |
| vision_model: str = "Qwen/Qwen3-VL-4B-Thinking" | |
| embedding_model: str = "Qwen/Qwen3-VL-Embedding-2B" | |
| reranker_model: str = "Qwen/Qwen3-VL-Reranker-2B" | |
| # vLLM configuration | |
| vllm_tensor_parallel_size: int = 1 # Single GPU - 4B model fits on one L4 | |
| vllm_max_model_len: int = 16384 # 4B supports up to 256K, 16K is sufficient | |
| # ChromaDB | |
| chroma_persist_dir: str = "./chroma_db" | |
| # Knowledge base | |
| knowledge_base_dir: str = "./RAG-KB" | |
| # Gradio server (0.0.0.0 required for WSL) | |
| server_host: str = "0.0.0.0" | |
| server_port: int = 7860 | |
| # Assessment limits | |
| max_images_per_assessment: int = 20 | |
| model_config = SettingsConfigDict( | |
| env_file=".env", | |
| env_prefix="", | |
| case_sensitive=False, | |
| ) | |
| # Singleton instance | |
| settings = Settings() | |