trysem commited on
Commit
9175bc3
·
verified ·
1 Parent(s): e82747c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -14
app.py CHANGED
@@ -30,33 +30,47 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
30
  repo_id = "Praha-Labs/PrahaTTS-ML"
31
 
32
  def load_model():
33
- print(f"Loading base Chatterbox model on {device}...")
34
- model = ChatterboxTTS.from_pretrained(device=device)
35
-
36
  print("Downloading custom Indic tokenizer and config...")
37
  try:
38
  tokenizer_path = hf_hub_download(repo_id=repo_id, filename="tokenizer_indic.json")
39
  config_path = hf_hub_download(repo_id=repo_id, filename="config_indic.py")
40
 
41
- # 1. OVERRIDE THE TOKENIZER PROPERLY
42
- # Instead of 'load_from_file', we must instantiate the Chatterbox EnTokenizer class directly.
43
- model.tokenizer = EnTokenizer(tokenizer_path)
44
- print("Indic Tokenizer successfully injected.")
45
-
46
- # 2. DYNAMICALLY LOAD THE CONFIG SCRIPT
47
- # This prevents the English text cleaner from stripping out Malayalam characters.
48
  spec = importlib.util.spec_from_file_location("config_indic", config_path)
49
  config_indic = importlib.util.module_from_spec(spec)
50
  spec.loader.exec_module(config_indic)
51
 
52
- # If the config script provides a specific override function, call it
53
  if hasattr(config_indic, 'apply_config'):
54
- config_indic.apply_config(model)
55
- print("Indic Configuration/Alphabet applied.")
 
 
 
56
 
57
  except Exception as e:
58
- print(f"Error during tokenizer/config load: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
 
60
  print("Loading LoRA adapter weights...")
61
  try:
62
  if hasattr(model, 't3'):
 
30
  repo_id = "Praha-Labs/PrahaTTS-ML"
31
 
32
  def load_model():
 
 
 
33
  print("Downloading custom Indic tokenizer and config...")
34
  try:
35
  tokenizer_path = hf_hub_download(repo_id=repo_id, filename="tokenizer_indic.json")
36
  config_path = hf_hub_download(repo_id=repo_id, filename="config_indic.py")
37
 
38
+ # 1. DYNAMICALLY LOAD THE CONFIG SCRIPT FIRST!
39
+ # This patches the Chatterbox character/symbol lists before the model initializes.
 
 
 
 
 
40
  spec = importlib.util.spec_from_file_location("config_indic", config_path)
41
  config_indic = importlib.util.module_from_spec(spec)
42
  spec.loader.exec_module(config_indic)
43
 
 
44
  if hasattr(config_indic, 'apply_config'):
45
+ try:
46
+ config_indic.apply_config()
47
+ except TypeError:
48
+ pass # Try again later if it requires the model object
49
+ print("Indic Configuration applied. Vocabulary patched.")
50
 
51
  except Exception as e:
52
+ print(f"Error during config load: {e}")
53
+
54
+ # 2. NOW LOAD THE BASE MODEL
55
+ # It will now initialize its PyTorch embeddings using the NEW patched vocabulary size!
56
+ print(f"Loading base Chatterbox model on {device}...")
57
+ model = ChatterboxTTS.from_pretrained(device=device)
58
+
59
+ # Run the override again if it specifically required the initialized model object
60
+ if hasattr(config_indic, 'apply_config'):
61
+ try:
62
+ config_indic.apply_config(model)
63
+ except TypeError:
64
+ pass
65
+
66
+ # 3. APPLY TOKENIZER
67
+ try:
68
+ model.tokenizer = EnTokenizer(tokenizer_path)
69
+ print("Indic Tokenizer successfully injected.")
70
+ except Exception as e:
71
+ print(f"Error during tokenizer inject: {e}")
72
 
73
+ # 4. LOAD LORA ADAPTER
74
  print("Loading LoRA adapter weights...")
75
  try:
76
  if hasattr(model, 't3'):