File size: 2,162 Bytes
4d6b6c4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
services:
  app:
    build:
      context: .
      dockerfile: Dockerfile
      args:
        - PORT=${PORT:-7860}
    container_name: precisionvoice
    ports:
      - "${PORT:-7860}:${PORT:-7860}"
    volumes:
      # Persist uploaded/processed files
      - ./data:/app/data
      # Cache models to avoid re-downloading
      - model_cache_hf:/root/.cache/huggingface
      - model_cache_torch:/root/.cache/torch
      - model_cache_mdx:/root/.audio-separator-models
    environment:
      # HuggingFace token (required for pyannote.audio)
      - HF_TOKEN=${HF_TOKEN:-}
      # Model settings
      - WHISPER_MODEL=${WHISPER_MODEL:-erax-ai/EraX-WoW-Turbo-V1.1-CT2}
      - DIARIZATION_MODEL=${DIARIZATION_MODEL:-pyannote/speaker-diarization-3.1}
      # Device (auto, cuda, cpu)
      - DEVICE=${DEVICE:-auto}
      # Speech Enhancement (SpeechBrain SepFormer)
      - ENABLE_SPEECH_ENHANCEMENT=${ENABLE_SPEECH_ENHANCEMENT:-True}
      - ENHANCEMENT_MODEL=${ENHANCEMENT_MODEL:-speechbrain/sepformer-dns4-16k-enhancement}
      # MDX-Net Vocal Separation
      - ENABLE_VOCAL_SEPARATION=${ENABLE_VOCAL_SEPARATION:-True}
      - MDX_MODEL=${MDX_MODEL:-UVR-MDX-NET-Voc_FT}
      # Upload settings
      - MAX_UPLOAD_SIZE_MB=${MAX_UPLOAD_SIZE_MB:-100}
      # Optimization settings
      - ENABLE_LOUDNORM=${ENABLE_LOUDNORM:-True}
      - ENABLE_NOISE_REDUCTION=${ENABLE_NOISE_REDUCTION:-True}
      # VAD settings
      - VAD_THRESHOLD=${VAD_THRESHOLD:-0.5}
      - VAD_MIN_SPEECH_DURATION_MS=${VAD_MIN_SPEECH_DURATION_MS:-250}
      - VAD_MIN_SILENCE_DURATION_MS=${VAD_MIN_SILENCE_DURATION_MS:-500}
      # Clustering settings
      - MERGE_THRESHOLD_S=${MERGE_THRESHOLD_S:-0.5}
      - MIN_SEGMENT_DURATION_S=${MIN_SEGMENT_DURATION_S:-0.3}
    restart: unless-stopped
    # GPU support (uncomment for NVIDIA GPU)
    # deploy:
    #   resources:
    #     reservations:
    #       devices:
    #         - driver: nvidia
    #           count: all
    #           capabilities: [gpu]

volumes:
  model_cache_hf:
    name: precisionvoice_hf_cache
  model_cache_torch:
    name: precisionvoice_torch_cache
  model_cache_mdx:
    name: precisionvoice_mdx_cache