Spaces:
Paused
Paused
File size: 1,576 Bytes
88bdcff f3ebc82 88bdcff 78caafb 88bdcff 14c59e5 706520f 14c59e5 88bdcff |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
"""Application settings with environment variable support."""
from typing import Literal
from pydantic_settings import BaseSettings, SettingsConfigDict
class Settings(BaseSettings):
"""FDAM AI Pipeline configuration."""
# Environment
environment: Literal["development", "production"] = "development"
# Logging - set LOG_LEVEL=DEBUG for detailed troubleshooting on HF Spaces
log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] = "INFO"
# Model loading - set MOCK_MODELS=true for local dev on RTX 4090
# Default is False for production (HuggingFace Spaces)
mock_models: bool = False
# Model paths (for production on HuggingFace Spaces)
# 4B dense model - fits single GPU, no tensor parallelism needed
vision_model: str = "Qwen/Qwen3-VL-4B-Thinking"
embedding_model: str = "Qwen/Qwen3-VL-Embedding-2B"
reranker_model: str = "Qwen/Qwen3-VL-Reranker-2B"
# vLLM configuration
vllm_tensor_parallel_size: int = 1 # Single GPU - 4B model fits on one L4
vllm_max_model_len: int = 16384 # 4B supports up to 256K, 16K is sufficient
# ChromaDB
chroma_persist_dir: str = "./chroma_db"
# Knowledge base
knowledge_base_dir: str = "./RAG-KB"
# Gradio server (0.0.0.0 required for WSL)
server_host: str = "0.0.0.0"
server_port: int = 7860
# Assessment limits
max_images_per_assessment: int = 20
model_config = SettingsConfigDict(
env_file=".env",
env_prefix="",
case_sensitive=False,
)
# Singleton instance
settings = Settings()
|