Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -30,33 +30,47 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
| 30 |
repo_id = "Praha-Labs/PrahaTTS-ML"
|
| 31 |
|
| 32 |
def load_model():
|
| 33 |
-
print(f"Loading base Chatterbox model on {device}...")
|
| 34 |
-
model = ChatterboxTTS.from_pretrained(device=device)
|
| 35 |
-
|
| 36 |
print("Downloading custom Indic tokenizer and config...")
|
| 37 |
try:
|
| 38 |
tokenizer_path = hf_hub_download(repo_id=repo_id, filename="tokenizer_indic.json")
|
| 39 |
config_path = hf_hub_download(repo_id=repo_id, filename="config_indic.py")
|
| 40 |
|
| 41 |
-
# 1.
|
| 42 |
-
#
|
| 43 |
-
model.tokenizer = EnTokenizer(tokenizer_path)
|
| 44 |
-
print("Indic Tokenizer successfully injected.")
|
| 45 |
-
|
| 46 |
-
# 2. DYNAMICALLY LOAD THE CONFIG SCRIPT
|
| 47 |
-
# This prevents the English text cleaner from stripping out Malayalam characters.
|
| 48 |
spec = importlib.util.spec_from_file_location("config_indic", config_path)
|
| 49 |
config_indic = importlib.util.module_from_spec(spec)
|
| 50 |
spec.loader.exec_module(config_indic)
|
| 51 |
|
| 52 |
-
# If the config script provides a specific override function, call it
|
| 53 |
if hasattr(config_indic, 'apply_config'):
|
| 54 |
-
|
| 55 |
-
|
|
|
|
|
|
|
|
|
|
| 56 |
|
| 57 |
except Exception as e:
|
| 58 |
-
print(f"Error during
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
|
|
|
|
| 60 |
print("Loading LoRA adapter weights...")
|
| 61 |
try:
|
| 62 |
if hasattr(model, 't3'):
|
|
|
|
| 30 |
repo_id = "Praha-Labs/PrahaTTS-ML"
|
| 31 |
|
| 32 |
def load_model():
|
|
|
|
|
|
|
|
|
|
| 33 |
print("Downloading custom Indic tokenizer and config...")
|
| 34 |
try:
|
| 35 |
tokenizer_path = hf_hub_download(repo_id=repo_id, filename="tokenizer_indic.json")
|
| 36 |
config_path = hf_hub_download(repo_id=repo_id, filename="config_indic.py")
|
| 37 |
|
| 38 |
+
# 1. DYNAMICALLY LOAD THE CONFIG SCRIPT FIRST!
|
| 39 |
+
# This patches the Chatterbox character/symbol lists before the model initializes.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
spec = importlib.util.spec_from_file_location("config_indic", config_path)
|
| 41 |
config_indic = importlib.util.module_from_spec(spec)
|
| 42 |
spec.loader.exec_module(config_indic)
|
| 43 |
|
|
|
|
| 44 |
if hasattr(config_indic, 'apply_config'):
|
| 45 |
+
try:
|
| 46 |
+
config_indic.apply_config()
|
| 47 |
+
except TypeError:
|
| 48 |
+
pass # Try again later if it requires the model object
|
| 49 |
+
print("Indic Configuration applied. Vocabulary patched.")
|
| 50 |
|
| 51 |
except Exception as e:
|
| 52 |
+
print(f"Error during config load: {e}")
|
| 53 |
+
|
| 54 |
+
# 2. NOW LOAD THE BASE MODEL
|
| 55 |
+
# It will now initialize its PyTorch embeddings using the NEW patched vocabulary size!
|
| 56 |
+
print(f"Loading base Chatterbox model on {device}...")
|
| 57 |
+
model = ChatterboxTTS.from_pretrained(device=device)
|
| 58 |
+
|
| 59 |
+
# Run the override again if it specifically required the initialized model object
|
| 60 |
+
if hasattr(config_indic, 'apply_config'):
|
| 61 |
+
try:
|
| 62 |
+
config_indic.apply_config(model)
|
| 63 |
+
except TypeError:
|
| 64 |
+
pass
|
| 65 |
+
|
| 66 |
+
# 3. APPLY TOKENIZER
|
| 67 |
+
try:
|
| 68 |
+
model.tokenizer = EnTokenizer(tokenizer_path)
|
| 69 |
+
print("Indic Tokenizer successfully injected.")
|
| 70 |
+
except Exception as e:
|
| 71 |
+
print(f"Error during tokenizer inject: {e}")
|
| 72 |
|
| 73 |
+
# 4. LOAD LORA ADAPTER
|
| 74 |
print("Loading LoRA adapter weights...")
|
| 75 |
try:
|
| 76 |
if hasattr(model, 't3'):
|