Spaces:

trysem
/

PrahaTTS-ML

Sleeping

App Files Files Community

trysem commited on 17 days ago

Commit

9175bc3

verified ·

1 Parent(s): e82747c

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -14

app.py CHANGED Viewed

@@ -30,33 +30,47 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
 repo_id = "Praha-Labs/PrahaTTS-ML"
 def load_model():
-    print(f"Loading base Chatterbox model on {device}...")
-    model = ChatterboxTTS.from_pretrained(device=device)
     print("Downloading custom Indic tokenizer and config...")
     try:
         tokenizer_path = hf_hub_download(repo_id=repo_id, filename="tokenizer_indic.json")
         config_path = hf_hub_download(repo_id=repo_id, filename="config_indic.py")
-        # 1. OVERRIDE THE TOKENIZER PROPERLY
-        # Instead of 'load_from_file', we must instantiate the Chatterbox EnTokenizer class directly.
-        model.tokenizer = EnTokenizer(tokenizer_path)
-        print("Indic Tokenizer successfully injected.")
-        # 2. DYNAMICALLY LOAD THE CONFIG SCRIPT
-        # This prevents the English text cleaner from stripping out Malayalam characters.
         spec = importlib.util.spec_from_file_location("config_indic", config_path)
         config_indic = importlib.util.module_from_spec(spec)
         spec.loader.exec_module(config_indic)
-        # If the config script provides a specific override function, call it
         if hasattr(config_indic, 'apply_config'):
-            config_indic.apply_config(model)
-        print("Indic Configuration/Alphabet applied.")
     except Exception as e:
-        print(f"Error during tokenizer/config load: {e}")
     print("Loading LoRA adapter weights...")
     try:
         if hasattr(model, 't3'):

 repo_id = "Praha-Labs/PrahaTTS-ML"
 def load_model():
     print("Downloading custom Indic tokenizer and config...")
     try:
         tokenizer_path = hf_hub_download(repo_id=repo_id, filename="tokenizer_indic.json")
         config_path = hf_hub_download(repo_id=repo_id, filename="config_indic.py")
+        # 1. DYNAMICALLY LOAD THE CONFIG SCRIPT FIRST!
+        # This patches the Chatterbox character/symbol lists before the model initializes.
         spec = importlib.util.spec_from_file_location("config_indic", config_path)
         config_indic = importlib.util.module_from_spec(spec)
         spec.loader.exec_module(config_indic)
         if hasattr(config_indic, 'apply_config'):
+            try:
+                config_indic.apply_config()
+            except TypeError:
+                pass # Try again later if it requires the model object
+        print("Indic Configuration applied. Vocabulary patched.")
     except Exception as e:
+        print(f"Error during config load: {e}")
+    # 2. NOW LOAD THE BASE MODEL
+    # It will now initialize its PyTorch embeddings using the NEW patched vocabulary size!
+    print(f"Loading base Chatterbox model on {device}...")
+    model = ChatterboxTTS.from_pretrained(device=device)
+    # Run the override again if it specifically required the initialized model object
+    if hasattr(config_indic, 'apply_config'):
+        try:
+            config_indic.apply_config(model)
+        except TypeError:
+            pass
+    # 3. APPLY TOKENIZER
+    try:
+        model.tokenizer = EnTokenizer(tokenizer_path)
+        print("Indic Tokenizer successfully injected.")
+    except Exception as e:
+        print(f"Error during tokenizer inject: {e}")
+    # 4. LOAD LORA ADAPTER
     print("Loading LoRA adapter weights...")
     try:
         if hasattr(model, 't3'):