Spaces:
Sleeping
Sleeping
shivam0897-i
fix(backend): Convert PyTorch thread execution to bounded async pool to prevent OOM on HF Spaces
4eae08d | """ | |
| Configuration management using Pydantic Settings. | |
| """ | |
| from pydantic_settings import BaseSettings | |
| from typing import List | |
| from pydantic import Field | |
| class Settings(BaseSettings): | |
| """Application configuration.""" | |
| # Core API Settings | |
| API_KEY: str = Field(..., description="API Key for authentication") | |
| PORT: int = Field(7860, description="Server port") | |
| WEBSITE_URL: str = Field( | |
| default="https://voice-detection-nu.vercel.app/", | |
| description="Project or Portfolio URL" | |
| ) | |
| # Security: Swagger/OpenAPI docs are disabled by default in production. | |
| # Set DOCS_ENABLED=true in .env for local development. | |
| DOCS_ENABLED: bool = Field( | |
| default=False, | |
| description="Enable /docs, /redoc, and /openapi.json endpoints (disable in production)" | |
| ) | |
| # CORS Settings | |
| # Use str field with alias to read env var safely (avoids Pydantic trying to parse as JSON) | |
| ALLOWED_ORIGINS_RAW: str = Field(default="*", alias="ALLOWED_ORIGINS") | |
| def ALLOWED_ORIGINS(self) -> List[str]: | |
| """Parse the raw CORS origins string into a list.""" | |
| raw_value: str = self.ALLOWED_ORIGINS_RAW | |
| if raw_value.strip().startswith("["): | |
| import json | |
| try: | |
| return json.loads(raw_value) | |
| except json.JSONDecodeError: | |
| pass | |
| return [origin.strip() for origin in raw_value.split(",") if origin.strip()] | |
| # Audio Constraints | |
| MAX_AUDIO_SIZE_MB: int = 10 | |
| SUPPORTED_LANGUAGES: List[str] = [ | |
| "Auto", "English", "Hindi", "Hinglish", "Mixed", | |
| "Tamil", "Malayalam", "Telugu" | |
| ] | |
| SUPPORTED_FORMATS: List[str] = [ | |
| "mp3", "wav", "flac", "ogg", "m4a", "mp4", "webm" | |
| ] | |
| # ASR settings | |
| ASR_ENABLED: bool = Field(default=True, description="Enable speech-to-text analysis for realtime sessions") | |
| ASR_MODEL_SIZE: str = Field(default="tiny", description="faster-whisper model size") | |
| ASR_COMPUTE_TYPE: str = Field(default="int8", description="faster-whisper compute type") | |
| ASR_BEAM_SIZE: int = Field(default=3, description="Beam size for ASR decoding") | |
| ASR_TIMEOUT_MS: int = Field( | |
| default=1200, | |
| ge=200, | |
| le=15000, | |
| description="Max realtime ASR duration per chunk before timeout fallback" | |
| ) | |
| ASR_MAX_INFLIGHT_TASKS: int = Field( | |
| default=3, | |
| ge=1, | |
| le=8, | |
| description="Maximum concurrent ASR background tasks allowed to prevent thread pileups" | |
| ) | |
| VOICE_MAX_INFLIGHT_TASKS: int = Field( | |
| default=2, | |
| ge=1, | |
| le=8, | |
| description="Maximum concurrent Voice Analysis PyTorch tasks allowed to prevent OOM thread pileups" | |
| ) | |
| ASR_WARMUP_ENABLED: bool = Field( | |
| default=True, | |
| description="Warm faster-whisper model during startup to avoid first-chunk latency spike" | |
| ) | |
| AUDIO_PIPELINE_WARMUP_ENABLED: bool = Field( | |
| default=True, | |
| description="Warm audio decoding/resampling pipeline during startup" | |
| ) | |
| VOICE_WARMUP_ENABLED: bool = Field( | |
| default=True, | |
| description="Run one startup inference through voice analyzer to avoid first-chunk latency spikes" | |
| ) | |
| # Voice classification model settings | |
| VOICE_MODEL_ID: str = Field( | |
| default="shivam-2211/voice-detection-model", | |
| description="Primary Hugging Face model id for AI voice detection" | |
| ) | |
| VOICE_MODEL_BACKUP_ID: str = Field( | |
| default="mo-thecreator/Deepfake-audio-detection", | |
| description="Backup model id if primary model load fails" | |
| ) | |
| VOICE_MODEL_LOCAL_PATH: str = Field( | |
| default="./fine_tuned_model", | |
| description="Optional local model path that takes priority when present" | |
| ) | |
| MODEL_LOGIT_TEMPERATURE: float = Field( | |
| default=1.5, | |
| ge=1.0, | |
| le=10.0, | |
| description="Temperature scaling for model logits before softmax. Higher values reduce overconfidence. 1.0 = no scaling." | |
| ) | |
| REALTIME_LIGHTWEIGHT_AUDIO: bool = Field( | |
| default=False, | |
| description="Use lightweight audio analysis path for realtime chunk processing (set true for throughput-first mode)" | |
| ) | |
| LEGACY_FALLBACK_RETURNS_UNCERTAIN: bool = Field( | |
| default=True, | |
| description="Return UNCERTAIN classification on legacy endpoint when ML fallback occurs" | |
| ) | |
| # Risk policy (versioned + configurable weights) | |
| RISK_POLICY_VERSION: str = Field(default="v1.2", description="Version tag for realtime risk policy") | |
| RISK_WEIGHT_AUDIO: float = Field(default=0.45, ge=0.0, le=1.0) | |
| RISK_WEIGHT_KEYWORD: float = Field(default=0.20, ge=0.0, le=1.0) | |
| RISK_WEIGHT_SEMANTIC: float = Field(default=0.15, ge=0.0, le=1.0) | |
| RISK_WEIGHT_BEHAVIOUR: float = Field(default=0.20, ge=0.0, le=1.0) | |
| RISK_DELTA_BOOST_FACTOR: float = Field( | |
| default=0.30, | |
| ge=0.0, | |
| le=1.0, | |
| description="How strongly risk increases when per-chunk delta is positive" | |
| ) | |
| # Optional LLM semantic verifier (second-layer, not primary classifier) | |
| LLM_SEMANTIC_ENABLED: bool = Field(default=False) | |
| LLM_PROVIDER: str = Field(default="gemini", description="LLM provider: openai or gemini") | |
| LLM_SEMANTIC_MODEL: str = Field(default="gemini-2.5-flash", description="Model name for selected LLM provider (optional)") | |
| LLM_SEMANTIC_TIMEOUT_MS: int = Field(default=900, ge=100, le=5000) | |
| LLM_SEMANTIC_MIN_ASR_CONFIDENCE: float = Field(default=0.35, ge=0.0, le=1.0) | |
| LLM_SEMANTIC_CHUNK_INTERVAL: int = Field(default=2, ge=1, le=20) | |
| LLM_SEMANTIC_BLEND_WEIGHT: float = Field( | |
| default=0.20, | |
| ge=0.0, | |
| le=1.0, | |
| description="Weight assigned to LLM semantic score in fused semantic score" | |
| ) | |
| OPENAI_API_KEY: str | None = Field(default=None, description="Optional OpenAI API key for LLM semantic verifier") | |
| GEMINI_API_KEY: str | None = Field(default=None, description="Optional Gemini API key for LLM semantic verifier") | |
| # Session store backend | |
| SESSION_STORE_BACKEND: str = Field( | |
| default="redis", | |
| description="Session store backend: memory or redis" | |
| ) | |
| REDIS_URL: str | None = Field( | |
| default=None, | |
| description="Redis URL for session state and queue (required when SESSION_STORE_BACKEND=redis)" | |
| ) | |
| REDIS_PREFIX: str = Field( | |
| default="ai_call_shield", | |
| description="Redis key prefix namespace" | |
| ) | |
| REDIS_CONNECT_TIMEOUT_MS: int = Field(default=2000, ge=100, le=30000) | |
| REDIS_IO_TIMEOUT_MS: int = Field(default=2000, ge=100, le=30000) | |
| # Deep-lane async verification controls | |
| DEEP_LANE_ENABLED: bool = Field( | |
| default=False, | |
| description="Enable asynchronous deep-lane verification after fast-lane decision" | |
| ) | |
| DEEP_LANE_QUEUE_BACKEND: str = Field( | |
| default="memory", | |
| description="Queue backend: memory or redis" | |
| ) | |
| DEEP_LANE_MAX_WORKERS: int = Field(default=2, ge=1, le=16) | |
| DEEP_LANE_MAX_RETRIES: int = Field(default=1, ge=0, le=10) | |
| DEEP_LANE_RETRY_BACKOFF_MS: int = Field(default=500, ge=0, le=60000) | |
| DEEP_LANE_TARGET_LATENCY_MS: int = Field(default=3000, ge=200, le=10000) | |
| # Performance targets (for harness/reporting and CI gates) | |
| PERF_CHUNK_P95_TARGET_MS: int = Field(default=1200, ge=100, le=10000) | |
| PERF_ALERT_P95_TARGET_MS: int = Field(default=2500, ge=100, le=10000) | |
| # Session retention and privacy controls | |
| SESSION_ACTIVE_RETENTION_SECONDS: int = Field( | |
| default=1800, | |
| description="Retention TTL for active sessions with no updates" | |
| ) | |
| SESSION_ENDED_RETENTION_SECONDS: int = Field( | |
| default=300, | |
| description="Retention TTL for ended sessions before purge" | |
| ) | |
| MASK_TRANSCRIPT_OUTPUT: bool = Field( | |
| default=True, | |
| description="Mask sensitive entities from transcript before returning response" | |
| ) | |
| # WebSocket limits | |
| WS_MAX_DURATION_SECONDS: int = Field( | |
| default=1800, | |
| description="Maximum WebSocket connection duration in seconds (30 min)" | |
| ) | |
| WS_IDLE_TIMEOUT_SECONDS: int = Field( | |
| default=120, | |
| description="Close WebSocket if no message received within this many seconds" | |
| ) | |
| # Environment Specific | |
| SPACE_ID: str | None = Field(default=None, description="Hugging Face Space ID if running in Spaces") | |
| model_config = { | |
| "env_file": ".env", | |
| "case_sensitive": True, | |
| "extra": "ignore" | |
| } | |
| # Global settings instance | |
| settings = Settings() | |