Spaces:
Running
Running
| services: | |
| app: | |
| build: | |
| context: . | |
| dockerfile: Dockerfile | |
| args: | |
| - PORT=${PORT:-7860} | |
| container_name: precisionvoice | |
| ports: | |
| - "${PORT:-7860}:${PORT:-7860}" | |
| volumes: | |
| # Persist uploaded/processed files | |
| - ./data:/app/data | |
| # Cache models to avoid re-downloading | |
| - model_cache_hf:/root/.cache/huggingface | |
| - model_cache_torch:/root/.cache/torch | |
| - model_cache_mdx:/root/.audio-separator-models | |
| environment: | |
| # HuggingFace token (required for pyannote.audio) | |
| - HF_TOKEN=${HF_TOKEN:-} | |
| # Model settings | |
| - WHISPER_MODEL=${WHISPER_MODEL:-erax-ai/EraX-WoW-Turbo-V1.1-CT2} | |
| - DIARIZATION_MODEL=${DIARIZATION_MODEL:-pyannote/speaker-diarization-3.1} | |
| # Device (auto, cuda, cpu) | |
| - DEVICE=${DEVICE:-auto} | |
| # Speech Enhancement (SpeechBrain SepFormer) | |
| - ENABLE_SPEECH_ENHANCEMENT=${ENABLE_SPEECH_ENHANCEMENT:-True} | |
| - ENHANCEMENT_MODEL=${ENHANCEMENT_MODEL:-speechbrain/sepformer-dns4-16k-enhancement} | |
| # MDX-Net Vocal Separation | |
| - ENABLE_VOCAL_SEPARATION=${ENABLE_VOCAL_SEPARATION:-True} | |
| - MDX_MODEL=${MDX_MODEL:-UVR-MDX-NET-Voc_FT} | |
| # Upload settings | |
| - MAX_UPLOAD_SIZE_MB=${MAX_UPLOAD_SIZE_MB:-100} | |
| # Optimization settings | |
| - ENABLE_LOUDNORM=${ENABLE_LOUDNORM:-True} | |
| - ENABLE_NOISE_REDUCTION=${ENABLE_NOISE_REDUCTION:-True} | |
| # VAD settings | |
| - VAD_THRESHOLD=${VAD_THRESHOLD:-0.5} | |
| - VAD_MIN_SPEECH_DURATION_MS=${VAD_MIN_SPEECH_DURATION_MS:-250} | |
| - VAD_MIN_SILENCE_DURATION_MS=${VAD_MIN_SILENCE_DURATION_MS:-500} | |
| # Clustering settings | |
| - MERGE_THRESHOLD_S=${MERGE_THRESHOLD_S:-0.5} | |
| - MIN_SEGMENT_DURATION_S=${MIN_SEGMENT_DURATION_S:-0.3} | |
| restart: unless-stopped | |
| # GPU support (uncomment for NVIDIA GPU) | |
| # deploy: | |
| # resources: | |
| # reservations: | |
| # devices: | |
| # - driver: nvidia | |
| # count: all | |
| # capabilities: [gpu] | |
| volumes: | |
| model_cache_hf: | |
| name: precisionvoice_hf_cache | |
| model_cache_torch: | |
| name: precisionvoice_torch_cache | |
| model_cache_mdx: | |
| name: precisionvoice_mdx_cache | |