Spaces:

harmonicsnail
/

Modern_TalkNET

Sleeping

harmonicsnail commited on Oct 28, 2025

Commit

2379562

1 Parent(s): 13ae703

added json dump and implemented fix

Files changed (1) hide show

model_inference.py CHANGED Viewed

@@ -13,30 +13,16 @@ CMUDICT_PATH = "cmudict.dict.txt"
 STATE_DICT_PATH = os.environ.get("NETTALK_STATE_DICT", "nettalk_state_dict.pt")
 # --- 1) Rebuild vocab from CMUdict (same method you used in notebook) ---
-def build_vocab(cmudict_path=CMUDICT_PATH):
-    words = []
-    phones_all = []
-    with open(cmudict_path, "r", encoding="utf-8", errors="ignore") as f:
-        for line in f:
-            if line.strip() and not line.startswith(";;;"):
-                parts = line.strip().split()
-                w = parts[0]
-                p = parts[1:]
-                words.append(w)
-                phones_all.append(p)
-    # character vocab from words (include space for padding)
-    char_vocab = set("".join(words))
-    char_vocab.add(" ")  # ensure space exists
-    char_to_idx = {c: i+1 for i, c in enumerate(sorted(char_vocab))}  # reserve 0 for unknown/pad
-    char_to_idx["<PAD>"] = 0
-    idx_to_char = {i: c for c, i in char_to_idx.items()}
-    phone_vocab = set(phone for p_list in phones_all for phone in p_list)
-    phone_to_idx = {p: i for i, p in enumerate(sorted(phone_vocab))}
-    idx_to_phone = {i: p for p, i in phone_to_idx.items()}
-    return char_to_idx, idx_to_char, phone_to_idx, idx_to_phone
 CHAR_TO_IDX, IDX_TO_CHAR, PHONE_TO_IDX, IDX_TO_PHONE = build_vocab()

 STATE_DICT_PATH = os.environ.get("NETTALK_STATE_DICT", "nettalk_state_dict.pt")
 # --- 1) Rebuild vocab from CMUdict (same method you used in notebook) ---
+import json
+def load_vocab():
+    with open("char_vocab.json") as f:
+        char_to_idx = json.load(f)
+    idx_to_char = {i: c for c, i in char_to_idx.items()}
+    return char_to_idx, idx_to_char
+CHAR_TO_IDX, IDX_TO_CHAR = load_vocab()
+VOCAB_SIZE = len(CHAR_TO_IDX)
 CHAR_TO_IDX, IDX_TO_CHAR, PHONE_TO_IDX, IDX_TO_PHONE = build_vocab()