video-dubbing-agent / config.py
dashhdata's picture
Upload folder using huggingface_hub
ea2dffa verified
"""
Video Dubbing Agent — Configuration
Deployed version with HuggingFace GPU transcription + male-only voices.
"""
import os
from pathlib import Path
BASE_DIR = Path(__file__).parent
TEMP_DIR = BASE_DIR / "temp_jobs"
STATIC_DIR = BASE_DIR / "static"
TEMPLATES_DIR = BASE_DIR / "templates"
TEMP_DIR.mkdir(exist_ok=True)
# Server
HOST = "0.0.0.0"
PORT = int(os.getenv("PORT", 7860)) # HF Spaces uses 7860
DEBUG = os.getenv("DEBUG", "false").lower() == "true"
# YouTube
YT_FORMAT = "bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best"
YT_MAX_DURATION = 4 * 3600
# Audio
AUDIO_SAMPLE_RATE = 16000
AUDIO_CHANNELS = 1
# === TRANSCRIPTION: HuggingFace Free GPU API ===
# This uses HF's free Inference API with GPU — no local model needed!
HF_API_URL = "https://api-inference.huggingface.co/models/openai/whisper-small"
HF_TOKEN = os.getenv("HF_TOKEN", "") # Optional: set for higher rate limits
# Fallback to local faster-whisper if HF API fails
WHISPER_MODEL_SIZE = os.getenv("WHISPER_MODEL", "tiny")
WHISPER_COMPUTE_TYPE = "int8"
# === VOICE: MALE ONLY ===
# Force all speakers to use MALE voice regardless of detected gender
FORCE_MALE_VOICE = True
# Edge TTS Voice Map — MALE voices only
EDGE_TTS_VOICES = {
"mr": {"male": "mr-IN-ManoharNeural", "female": "mr-IN-ManoharNeural"},
"hi": {"male": "hi-IN-MadhurNeural", "female": "hi-IN-MadhurNeural"},
"en": {"male": "en-US-GuyNeural", "female": "en-US-GuyNeural"},
"es": {"male": "es-ES-AlvaroNeural", "female": "es-ES-AlvaroNeural"},
"fr": {"male": "fr-FR-HenriNeural", "female": "fr-FR-HenriNeural"},
"de": {"male": "de-DE-ConradNeural", "female": "de-DE-ConradNeural"},
"ja": {"male": "ja-JP-KeitaNeural", "female": "ja-JP-KeitaNeural"},
"zh": {"male": "zh-CN-YunxiNeural", "female": "zh-CN-YunxiNeural"},
"pt": {"male": "pt-BR-AntonioNeural", "female": "pt-BR-AntonioNeural"},
"ar": {"male": "ar-SA-HamedNeural", "female": "ar-SA-HamedNeural"},
"ko": {"male": "ko-KR-InJoonNeural", "female": "ko-KR-InJoonNeural"},
"ru": {"male": "ru-RU-DmitryNeural", "female": "ru-RU-DmitryNeural"},
"it": {"male": "it-IT-DiegoNeural", "female": "it-IT-DiegoNeural"},
"ta": {"male": "ta-IN-ValluvarNeural", "female": "ta-IN-ValluvarNeural"},
"te": {"male": "te-IN-MohanNeural", "female": "te-IN-MohanNeural"},
"bn": {"male": "bn-IN-BashkarNeural", "female": "bn-IN-BashkarNeural"},
"gu": {"male": "gu-IN-NiranjanNeural", "female": "gu-IN-NiranjanNeural"},
"kn": {"male": "kn-IN-GaganNeural", "female": "kn-IN-GaganNeural"},
"ml": {"male": "ml-IN-MidhunNeural", "female": "ml-IN-MidhunNeural"},
"ur": {"male": "ur-PK-AsadNeural", "female": "ur-PK-AsadNeural"},
}
# Translation
TRANSLATION_ENGINE = os.getenv("TRANSLATION_ENGINE", "google")
TRANSLATION_BATCH_SIZE = 50
# WhisperX settings (used by local fallback)
WHISPER_BATCH_SIZE = 16
CHUNK_DURATION_SEC = 600
# Gender detection (not used when FORCE_MALE_VOICE=True, but imported by old code)
GENDER_PITCH_THRESHOLD = 165
GENDER_MIN_SAMPLE_SEC = 3
# TTS
TTS_MAX_SPEED_FACTOR = 1.3
TTS_CROSSFADE_MS = 50
BACKGROUND_VOLUME = 0.15
JOB_EXPIRY_HOURS = 12
MAX_CONCURRENT_JOBS = 2
# Audio chunk size for HF API (30 seconds per chunk)
HF_CHUNK_DURATION_SEC = 30
SUPPORTED_LANGUAGES = {
"mr": "Marathi", "hi": "Hindi", "en": "English", "es": "Spanish",
"fr": "French", "de": "German", "ja": "Japanese", "zh": "Chinese",
"pt": "Portuguese", "ar": "Arabic", "ko": "Korean", "ru": "Russian",
"it": "Italian", "ta": "Tamil", "te": "Telugu", "bn": "Bengali",
"gu": "Gujarati", "kn": "Kannada", "ml": "Malayalam", "ur": "Urdu",
}