SmokeScan / config /settings.py
KinetoLabs's picture
Switch to Qwen3-VL-4B-Thinking for single-GPU simplicity
14c59e5
"""Application settings with environment variable support."""
from typing import Literal
from pydantic_settings import BaseSettings, SettingsConfigDict
class Settings(BaseSettings):
"""FDAM AI Pipeline configuration."""
# Environment
environment: Literal["development", "production"] = "development"
# Logging - set LOG_LEVEL=DEBUG for detailed troubleshooting on HF Spaces
log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] = "INFO"
# Model loading - set MOCK_MODELS=true for local dev on RTX 4090
# Default is False for production (HuggingFace Spaces)
mock_models: bool = False
# Model paths (for production on HuggingFace Spaces)
# 4B dense model - fits single GPU, no tensor parallelism needed
vision_model: str = "Qwen/Qwen3-VL-4B-Thinking"
embedding_model: str = "Qwen/Qwen3-VL-Embedding-2B"
reranker_model: str = "Qwen/Qwen3-VL-Reranker-2B"
# vLLM configuration
vllm_tensor_parallel_size: int = 1 # Single GPU - 4B model fits on one L4
vllm_max_model_len: int = 16384 # 4B supports up to 256K, 16K is sufficient
# ChromaDB
chroma_persist_dir: str = "./chroma_db"
# Knowledge base
knowledge_base_dir: str = "./RAG-KB"
# Gradio server (0.0.0.0 required for WSL)
server_host: str = "0.0.0.0"
server_port: int = 7860
# Assessment limits
max_images_per_assessment: int = 20
model_config = SettingsConfigDict(
env_file=".env",
env_prefix="",
case_sensitive=False,
)
# Singleton instance
settings = Settings()