File size: 2,581 Bytes
43f8b96
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
"""Language code mappings between TTS display names, Whisper ISO-639-1, and NLLB BCP-47."""

from dataclasses import dataclass
from typing import Mapping


@dataclass(frozen=True)
class LangInfo:
    display: str      # Display name matching TTS_LANGUAGES (e.g., "Portuguese")
    nllb: str         # NLLB-200 BCP-47 flores code (e.g., "por_Latn")
    whisper: str      # Whisper ISO 639-1 code (e.g., "pt")


LANG_MAP: Mapping[str, LangInfo] = {
    "Arabic":     LangInfo("Arabic",     "arb_Arab", "ar"),
    "Danish":     LangInfo("Danish",     "dan_Latn", "da"),
    "German":     LangInfo("German",     "deu_Latn", "de"),
    "Greek":      LangInfo("Greek",      "ell_Grek", "el"),
    "English":    LangInfo("English",    "eng_Latn", "en"),
    "Spanish":    LangInfo("Spanish",    "spa_Latn", "es"),
    "Finnish":    LangInfo("Finnish",    "fin_Latn", "fi"),
    "French":     LangInfo("French",     "fra_Latn", "fr"),
    "Hebrew":     LangInfo("Hebrew",     "heb_Hebr", "he"),
    "Hindi":      LangInfo("Hindi",      "hin_Deva", "hi"),
    "Italian":    LangInfo("Italian",    "ita_Latn", "it"),
    "Japanese":   LangInfo("Japanese",   "jpn_Jpan", "ja"),
    "Korean":     LangInfo("Korean",     "kor_Hang", "ko"),
    "Malay":      LangInfo("Malay",      "zsm_Latn", "ms"),
    "Dutch":      LangInfo("Dutch",      "nld_Latn", "nl"),
    "Norwegian":  LangInfo("Norwegian",  "nob_Latn", "no"),
    "Polish":     LangInfo("Polish",     "pol_Latn", "pl"),
    "Portuguese": LangInfo("Portuguese", "por_Latn", "pt"),
    "Russian":    LangInfo("Russian",    "rus_Cyrl", "ru"),
    "Swedish":    LangInfo("Swedish",    "swe_Latn", "sv"),
    "Swahili":    LangInfo("Swahili",    "swh_Latn", "sw"),
    "Turkish":    LangInfo("Turkish",    "tur_Latn", "tr"),
    "Chinese":    LangInfo("Chinese",    "zho_Hans", "zh"),
}


def get_nllb_code(lang_display: str) -> str:
    info = LANG_MAP.get(lang_display)
    if info is None:
        raise ValueError(f"Unknown language: {lang_display!r}")
    return info.nllb


def get_whisper_code(lang_display: str) -> str:
    info = LANG_MAP.get(lang_display)
    if info is None:
        raise ValueError(f"Unknown language: {lang_display!r}")
    return info.whisper


def whisper_code_to_display(whisper_code: str) -> str | None:
    for info in LANG_MAP.values():
        if info.whisper == whisper_code:
            return info.display
    return None


def nllb_code_to_display(nllb_code: str) -> str | None:
    for info in LANG_MAP.values():
        if info.nllb == nllb_code:
            return info.display
    return None