Spaces:

KinetoLabs
/

SmokeScan

Paused

SmokeScan / config /settings.py

Switch to Qwen3-VL-4B-Thinking for single-GPU simplicity

14c59e5 2 days ago

1.58 kB

	"""Application settings with environment variable support."""

	from typing import Literal
	from pydantic_settings import BaseSettings, SettingsConfigDict


	class Settings(BaseSettings):
	"""FDAM AI Pipeline configuration."""

	# Environment
	environment: Literal["development", "production"] = "development"

	# Logging - set LOG_LEVEL=DEBUG for detailed troubleshooting on HF Spaces
	log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] = "INFO"

	# Model loading - set MOCK_MODELS=true for local dev on RTX 4090
	# Default is False for production (HuggingFace Spaces)
	mock_models: bool = False

	# Model paths (for production on HuggingFace Spaces)
	# 4B dense model - fits single GPU, no tensor parallelism needed
	vision_model: str = "Qwen/Qwen3-VL-4B-Thinking"
	embedding_model: str = "Qwen/Qwen3-VL-Embedding-2B"
	reranker_model: str = "Qwen/Qwen3-VL-Reranker-2B"

	# vLLM configuration
	vllm_tensor_parallel_size: int = 1 # Single GPU - 4B model fits on one L4
	vllm_max_model_len: int = 16384 # 4B supports up to 256K, 16K is sufficient

	# ChromaDB
	chroma_persist_dir: str = "./chroma_db"

	# Knowledge base
	knowledge_base_dir: str = "./RAG-KB"

	# Gradio server (0.0.0.0 required for WSL)
	server_host: str = "0.0.0.0"
	server_port: int = 7860

	# Assessment limits
	max_images_per_assessment: int = 20

	model_config = SettingsConfigDict(
	env_file=".env",
	env_prefix="",
	case_sensitive=False,
	)


	# Singleton instance
	settings = Settings()