Spaces:
Sleeping
Sleeping
| # Environment Configuration for PrecisionVoice | |
| # HuggingFace token (required for pyannote.audio) | |
| # Get your token at: https://huggingface.co/settings/tokens | |
| # Accept terms at: https://huggingface.co/pyannote/speaker-diarization-3.1 | |
| HF_TOKEN=your_huggingface_token_here | |
| # Model settings | |
| WHISPER_MODEL=kiendt/PhoWhisper-large-ct2 | |
| DIARIZATION_MODEL=pyannote/speaker-diarization-3.1 | |
| # Device settings (cuda, cpu, or auto) | |
| DEVICE=auto | |
| # --- Denoising (Speech Enhancement) --- | |
| # Enable speech enhancement (removes background noise, hum, etc.) | |
| ENABLE_DENOISER=True | |
| # Denoiser model: dns64 (standard), dns48, or master64 | |
| DENOISER_MODEL=dns64 | |
| # --- MDX-Net Vocal Separation --- | |
| # Enable vocal separation before transcription (isolates voice from music/noise) | |
| # More effective than the basic Demucs implementation. | |
| ENABLE_VOCAL_SEPARATION=True | |
| # MDX-Net model: Kim_Vocal_2.onnx (recommended for vocals) | |
| MDX_MODEL=Kim_Vocal_2.onnx | |
| # Upload settings | |
| MAX_UPLOAD_SIZE_MB=100 | |
| # --- Optimization Settings --- | |
| # Enable subtle highpass filter (removes low-frequency rumble < 80Hz) | |
| ENABLE_NOISE_REDUCTION=True | |
| # Enable/Disable Loudness Normalization (EBU R128) | |
| ENABLE_LOUDNORM=True | |
| # --- VAD (Voice Activity Detection) Settings --- | |
| # Threshold for detecting speech (0.0 to 1.0). Higher = stricter | |
| VAD_THRESHOLD=0.5 | |
| # Ignore speech segments shorter than this (milliseconds) | |
| VAD_MIN_SPEECH_DURATION_MS=250 | |
| # Minimum silence duration to split segments (milliseconds) | |
| VAD_MIN_SILENCE_DURATION_MS=500 | |
| # --- Post-processing (Clustering) Settings --- | |
| # Merge segments from same speaker if gap is less than this (seconds) | |
| MERGE_THRESHOLD_S=0.5 | |
| # Filter out segments shorter than this (seconds) - removes blips/noise | |
| MIN_SEGMENT_DURATION_S=0.3 | |
| # Server settings | |
| HOST=0.0.0.0 | |
| PORT=8000 | |