Live_Commentary_App / languages.py
PlotweaverModel's picture
Upload 8 files
bad74fd verified
raw
history blame
10.5 kB
"""
Language configuration for PlotWeaver.
Three TTS/translation engines:
1. "qwen" — Qwen 3.5 Omni (end-to-end speech-to-speech, best for global languages)
2. "yourvoic" — YourVoic API (TTS only, paired with NLLB MT)
3. "local" — Local MMS-TTS on GPU (for Yoruba, Hausa, etc.), paired with NLLB MT
"""
# Qwen Omni voices (work across all Qwen-supported languages)
QWEN_VOICES = [
"Cherry", "Serena", "Ethan", "Chelsie", "Momo", "Vivian", "Moon", "Maia",
"Kai", "Nofish", "Bella", "Jennifer", "Ryan", "Katerina", "Aiden",
"Eldric Sage", "Mia", "Mochi", "Bellona", "Vincent", "Bunny", "Neil",
"Elias", "Arthur", "Seren", "Bodega", "Sonrisa", "Alek", "Dolce",
"Sohee", "Ono Anna", "Lenn", "Emilien", "Andre",
]
# Each language entry:
# "Display Name": {
# "nllb": NLLB-200 language code (for local/yourvoic pipeline translation),
# "yourvoic_lang": YourVoic language code (or None),
# "yourvoic_voices": list of YourVoic voice names,
# "tts_engine": "qwen" | "yourvoic" | "local",
# "qwen_code": short language code for Qwen prompts (or None),
# "qwen_name": full language name for Qwen system prompt (or None),
# }
LANGUAGES = {
# ---- Global Languages (Qwen Omni — best quality) ----
"Arabic": {
"nllb": "arb_Arab", "yourvoic_lang": "ar-SA",
"yourvoic_voices": ["Peter"], "tts_engine": "qwen",
"qwen_code": "ar", "qwen_name": "Modern Standard Arabic (العربية الفصحى)",
},
"Spanish": {
"nllb": "spa_Latn", "yourvoic_lang": "es-ES",
"yourvoic_voices": ["Peter", "Kylie"], "tts_engine": "qwen",
"qwen_code": "es", "qwen_name": "Spanish",
},
"French": {
"nllb": "fra_Latn", "yourvoic_lang": "fr-FR",
"yourvoic_voices": ["Peter", "Kylie"], "tts_engine": "qwen",
"qwen_code": "fr", "qwen_name": "French",
},
"German": {
"nllb": "deu_Latn", "yourvoic_lang": "de-DE",
"yourvoic_voices": ["Peter", "Kylie"], "tts_engine": "qwen",
"qwen_code": "de", "qwen_name": "German",
},
"Mandarin": {
"nllb": "zho_Hans", "yourvoic_lang": "zh-CN",
"yourvoic_voices": ["Peter", "Kylie"], "tts_engine": "qwen",
"qwen_code": "zh", "qwen_name": "Mandarin Chinese",
},
"Italian": {
"nllb": "ita_Latn", "yourvoic_lang": "it-IT",
"yourvoic_voices": ["Peter", "Kylie"], "tts_engine": "qwen",
"qwen_code": "it", "qwen_name": "Italian",
},
"Japanese": {
"nllb": "jpn_Jpan", "yourvoic_lang": "ja-JP",
"yourvoic_voices": ["Peter", "Kylie"], "tts_engine": "qwen",
"qwen_code": "ja", "qwen_name": "Japanese",
},
"Portuguese": {
"nllb": "por_Latn", "yourvoic_lang": "pt-BR",
"yourvoic_voices": ["Peter", "Kylie"], "tts_engine": "qwen",
"qwen_code": "pt", "qwen_name": "Portuguese",
},
"Hindi": {
"nllb": "hin_Deva", "yourvoic_lang": "hi-IN",
"yourvoic_voices": ["Rahul", "Deepika", "Aditya"], "tts_engine": "qwen",
"qwen_code": "hi", "qwen_name": "Hindi",
},
"Korean": {
"nllb": "kor_Hang", "yourvoic_lang": "ko-KR",
"yourvoic_voices": ["Peter", "Kylie"], "tts_engine": "qwen",
"qwen_code": "ko", "qwen_name": "Korean",
},
"Russian": {
"nllb": "rus_Cyrl", "yourvoic_lang": "ru-RU",
"yourvoic_voices": ["Peter", "Kylie"], "tts_engine": "qwen",
"qwen_code": "ru", "qwen_name": "Russian",
},
# ---- African Languages (Local pipeline: Whisper → NLLB → MMS-TTS) ----
"Yoruba": {
"nllb": "yor_Latn", "yourvoic_lang": None,
"yourvoic_voices": [], "tts_engine": "local",
"qwen_code": None, "qwen_name": None,
},
"Hausa": {
"nllb": "hau_Latn", "yourvoic_lang": None,
"yourvoic_voices": [], "tts_engine": "local",
"qwen_code": None, "qwen_name": None,
},
"Igbo": {
"nllb": "ibo_Latn", "yourvoic_lang": None,
"yourvoic_voices": [], "tts_engine": "local",
"qwen_code": None, "qwen_name": None,
},
"Swahili": {
"nllb": "swh_Latn", "yourvoic_lang": "sw-KE",
"yourvoic_voices": ["Peter"], "tts_engine": "yourvoic",
"qwen_code": None, "qwen_name": None,
},
"Zulu": {
"nllb": "zul_Latn", "yourvoic_lang": None,
"yourvoic_voices": [], "tts_engine": "local",
"qwen_code": None, "qwen_name": None,
},
"Amharic": {
"nllb": "amh_Ethi", "yourvoic_lang": "am-ET",
"yourvoic_voices": ["Peter"], "tts_engine": "yourvoic",
"qwen_code": None, "qwen_name": None,
},
"Afrikaans": {
"nllb": "afr_Latn", "yourvoic_lang": "af-ZA",
"yourvoic_voices": ["Peter"], "tts_engine": "yourvoic",
"qwen_code": None, "qwen_name": None,
},
# ---- South Asian (YourVoic TTS + NLLB MT) ----
"Bengali": {
"nllb": "ben_Beng", "yourvoic_lang": "bn-IN",
"yourvoic_voices": ["Sneha", "Aryan"], "tts_engine": "yourvoic",
"qwen_code": None, "qwen_name": None,
},
"Tamil": {
"nllb": "tam_Taml", "yourvoic_lang": "ta-IN",
"yourvoic_voices": ["Priya", "Kumar"], "tts_engine": "yourvoic",
"qwen_code": None, "qwen_name": None,
},
"Telugu": {
"nllb": "tel_Telu", "yourvoic_lang": "te-IN",
"yourvoic_voices": ["Arjun", "Lakshmi"], "tts_engine": "yourvoic",
"qwen_code": None, "qwen_name": None,
},
"Marathi": {
"nllb": "mar_Deva", "yourvoic_lang": "mr-IN",
"yourvoic_voices": ["Anjali", "Rohan"], "tts_engine": "yourvoic",
"qwen_code": None, "qwen_name": None,
},
"Urdu": {
"nllb": "urd_Arab", "yourvoic_lang": "ur-PK",
"yourvoic_voices": ["Peter"], "tts_engine": "yourvoic",
"qwen_code": None, "qwen_name": None,
},
"Nepali": {
"nllb": "npi_Deva", "yourvoic_lang": "ne-NP",
"yourvoic_voices": ["Peter"], "tts_engine": "yourvoic",
"qwen_code": None, "qwen_name": None,
},
# ---- Southeast Asian (YourVoic) ----
"Indonesian": {
"nllb": "ind_Latn", "yourvoic_lang": "id-ID",
"yourvoic_voices": ["Peter"], "tts_engine": "yourvoic",
"qwen_code": None, "qwen_name": None,
},
"Vietnamese": {
"nllb": "vie_Latn", "yourvoic_lang": "vi-VN",
"yourvoic_voices": ["Peter"], "tts_engine": "yourvoic",
"qwen_code": None, "qwen_name": None,
},
"Thai": {
"nllb": "tha_Thai", "yourvoic_lang": "th-TH",
"yourvoic_voices": ["Peter"], "tts_engine": "yourvoic",
"qwen_code": None, "qwen_name": None,
},
"Malay": {
"nllb": "zsm_Latn", "yourvoic_lang": "ms-MY",
"yourvoic_voices": ["Peter"], "tts_engine": "yourvoic",
"qwen_code": None, "qwen_name": None,
},
"Filipino": {
"nllb": "tgl_Latn", "yourvoic_lang": "fil-PH",
"yourvoic_voices": ["Peter"], "tts_engine": "yourvoic",
"qwen_code": None, "qwen_name": None,
},
# ---- European (YourVoic) ----
"Dutch": {
"nllb": "nld_Latn", "yourvoic_lang": "nl-NL",
"yourvoic_voices": ["Peter"], "tts_engine": "yourvoic",
"qwen_code": None, "qwen_name": None,
},
"Polish": {
"nllb": "pol_Latn", "yourvoic_lang": "pl-PL",
"yourvoic_voices": ["Peter"], "tts_engine": "yourvoic",
"qwen_code": None, "qwen_name": None,
},
"Turkish": {
"nllb": "tur_Latn", "yourvoic_lang": "tr-TR",
"yourvoic_voices": ["Peter"], "tts_engine": "yourvoic",
"qwen_code": None, "qwen_name": None,
},
"Swedish": {
"nllb": "swe_Latn", "yourvoic_lang": "sv-SE",
"yourvoic_voices": ["Peter"], "tts_engine": "yourvoic",
"qwen_code": None, "qwen_name": None,
},
"Romanian": {
"nllb": "ron_Latn", "yourvoic_lang": "ro-RO",
"yourvoic_voices": ["Peter"], "tts_engine": "yourvoic",
"qwen_code": None, "qwen_name": None,
},
"Greek": {
"nllb": "ell_Grek", "yourvoic_lang": "el-GR",
"yourvoic_voices": ["Peter"], "tts_engine": "yourvoic",
"qwen_code": None, "qwen_name": None,
},
"Ukrainian": {
"nllb": "ukr_Cyrl", "yourvoic_lang": "uk-UA",
"yourvoic_voices": ["Peter"], "tts_engine": "yourvoic",
"qwen_code": None, "qwen_name": None,
},
"Finnish": {
"nllb": "fin_Latn", "yourvoic_lang": "fi-FI",
"yourvoic_voices": ["Peter"], "tts_engine": "yourvoic",
"qwen_code": None, "qwen_name": None,
},
"Danish": {
"nllb": "dan_Latn", "yourvoic_lang": "da-DK",
"yourvoic_voices": ["Peter"], "tts_engine": "yourvoic",
"qwen_code": None, "qwen_name": None,
},
"Norwegian": {
"nllb": "nob_Latn", "yourvoic_lang": "nb-NO",
"yourvoic_voices": ["Peter"], "tts_engine": "yourvoic",
"qwen_code": None, "qwen_name": None,
},
# ---- Middle Eastern (YourVoic) ----
"Persian": {
"nllb": "pes_Arab", "yourvoic_lang": "fa-IR",
"yourvoic_voices": ["Peter"], "tts_engine": "yourvoic",
"qwen_code": None, "qwen_name": None,
},
"Hebrew": {
"nllb": "heb_Hebr", "yourvoic_lang": "he-IL",
"yourvoic_voices": ["Peter"], "tts_engine": "yourvoic",
"qwen_code": None, "qwen_name": None,
},
}
# Group languages by category for the UI
LANGUAGE_GROUPS = {
"Global Languages": [
"Spanish", "French", "German", "Mandarin", "Italian",
"Japanese", "Portuguese", "Hindi", "Arabic", "Korean", "Russian",
],
"African Languages": [
"Yoruba", "Hausa", "Igbo", "Swahili", "Zulu", "Amharic", "Afrikaans",
],
"South Asian": [
"Bengali", "Tamil", "Telugu", "Marathi", "Urdu", "Nepali",
],
"Southeast Asian": [
"Indonesian", "Vietnamese", "Thai", "Malay", "Filipino",
],
"European": [
"Dutch", "Polish", "Turkish", "Swedish", "Romanian",
"Greek", "Ukrainian", "Finnish", "Danish", "Norwegian",
],
"Middle Eastern": [
"Persian", "Hebrew",
],
}
# All language display names (for dropdowns)
ALL_LANGUAGE_NAMES = sorted(LANGUAGES.keys())
# Languages that use local TTS (your fine-tuned models)
LOCAL_TTS_LANGUAGES = [k for k, v in LANGUAGES.items() if v["tts_engine"] == "local"]
# Languages that use YourVoic API
YOURVOIC_LANGUAGES = [k for k, v in LANGUAGES.items() if v["tts_engine"] == "yourvoic"]