Spaces:

Resilient-Coders
/

aidoc-tts

Sleeping

App Files Files Community

JustinJoshi commited on Apr 17

Commit

a23c103

1 Parent(s): 8fedf61

Add vi back via fairseq model_name + TTS_HOME mirror approach

Browse files

Files changed (1) hide show

app.py +43 -6

app.py CHANGED Viewed

@@ -32,10 +32,16 @@ DEFAULT_SPEAKER = os.environ.get("COQUI_DEFAULT_SPEAKER", "p228")
 REPOS: dict[str, str] = {
     "en": os.environ.get("HF_TTS_EN_REPO", "Resilient-Coders/coqui-vctk-en"),
     "es": os.environ.get("HF_TTS_ES_REPO", "Resilient-Coders/coqui-css10-es"),
-    # "vi" (Vietnamese MMS fairseq) uses a config format incompatible with
-    # coqui-tts 0.27.x. Vietnamese is handled by the local sidecar / cloud fallback.
 }
 WEIGHT_FILE_CANDIDATES = ["model.pth", "model_file.pth.tar", "model_file.pth"]
@@ -124,6 +130,29 @@ def patch_config(local_dir: str) -> str:
     return config_path
 def get_tts(lang: str) -> TTS:
     if lang not in REPOS:
         raise HTTPException(status_code=400, detail=f"Unsupported language: {lang}")
@@ -131,10 +160,18 @@ def get_tts(lang: str) -> TTS:
         repo_id = REPOS[lang]
         print(f"[tts] downloading repo for {lang}: {repo_id}", flush=True)
         local_dir = snapshot_download(repo_id=repo_id)
-        weights = resolve_weights(local_dir)
-        config_path = patch_config(local_dir)
-        print(f"[tts] loading {weights}", flush=True)
-        tts_instances[lang] = TTS(model_path=weights, config_path=config_path, progress_bar=False).to("cpu")
     return tts_instances[lang]

 REPOS: dict[str, str] = {
     "en": os.environ.get("HF_TTS_EN_REPO", "Resilient-Coders/coqui-vctk-en"),
     "es": os.environ.get("HF_TTS_ES_REPO", "Resilient-Coders/coqui-css10-es"),
+    "vi": os.environ.get("HF_TTS_VI_REPO", "Resilient-Coders/mms-tts-vie"),
 }
+# Vietnamese uses Fairseq format. Coqui loads it via model_name (model_dir path),
+# which calls _load_fairseq_from_dir and never reads config.json.
+# We mirror the HF snapshot files into TTS_HOME so model_name lookup finds them.
+TTS_HOME = os.path.join(os.path.expanduser("~"), ".local", "share", "tts")
+VI_MODEL_NAME = "tts_models/vie/fairseq/vits"
+VI_TTS_HOME_DIR = os.path.join(TTS_HOME, "tts_models--vie--fairseq--vits")
 WEIGHT_FILE_CANDIDATES = ["model.pth", "model_file.pth.tar", "model_file.pth"]
     return config_path
+def setup_fairseq_vi(local_dir: str) -> None:
+    """Mirror HF snapshot files for the Vietnamese fairseq model into TTS_HOME.
+    Coqui's fairseq loader uses model_name -> model_dir -> _load_fairseq_from_dir,
+    which creates a blank VitsConfig and never reads config.json. Setting up the
+    TTS_HOME directory lets us use model_name without re-downloading from Coqui's
+    (defunct) registry, and avoids the config format incompatibility.
+    """
+    os.makedirs(VI_TTS_HOME_DIR, exist_ok=True)
+    for fname in os.listdir(local_dir):
+        if fname.startswith("."):
+            continue
+        src = os.path.realpath(os.path.join(local_dir, fname))
+        dst = os.path.join(VI_TTS_HOME_DIR, fname)
+        if not os.path.exists(dst) and os.path.isfile(src):
+            try:
+                os.symlink(src, dst)
+            except OSError:
+                import shutil
+                shutil.copy2(src, dst)
+            print(f"[tts] vi: linked {fname}", flush=True)
 def get_tts(lang: str) -> TTS:
     if lang not in REPOS:
         raise HTTPException(status_code=400, detail=f"Unsupported language: {lang}")
         repo_id = REPOS[lang]
         print(f"[tts] downloading repo for {lang}: {repo_id}", flush=True)
         local_dir = snapshot_download(repo_id=repo_id)
+        if lang == "vi":
+            # Fairseq format: use model_name so Coqui routes through
+            # _load_fairseq_from_dir (blank VitsConfig, bypasses config.json parse).
+            setup_fairseq_vi(local_dir)
+            print(f"[tts] loading vi via model_name={VI_MODEL_NAME}", flush=True)
+            tts_instances[lang] = TTS(model_name=VI_MODEL_NAME, progress_bar=False).to("cpu")
+        else:
+            weights = resolve_weights(local_dir)
+            config_path = patch_config(local_dir)
+            print(f"[tts] loading {weights}", flush=True)
+            tts_instances[lang] = TTS(model_path=weights, config_path=config_path, progress_bar=False).to("cpu")
     return tts_instances[lang]