rahul7star commited on
Commit
06f6230
Β·
verified Β·
1 Parent(s): 3d3296e

Chatterbox fine-tuned model + logs

Browse files
Files changed (1) hide show
  1. training.log +79 -27
training.log CHANGED
@@ -1,7 +1,7 @@
1
 
2
  /usr/local/lib/python3.13/site-packages/perth/perth_net/__init__.py:1: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.
3
  from pkg_resources import resource_filename
4
- 02/06/2026 05:10:16 - INFO - __main__ - Training/evaluation parameters CustomTrainingArguments(
5
  accelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None, 'use_configured_state': False},
6
  adam_beta1=0.9,
7
  adam_beta2=0.999,
@@ -113,58 +113,110 @@ warmup_ratio=None,
113
  warmup_steps=1.0,
114
  weight_decay=0.0,
115
  )
116
- 02/06/2026 05:10:16 - INFO - __main__ - Model parameters ModelArguments(model_name_or_path='ResembleAI/chatterbox', local_model_dir=None, cache_dir=None, freeze_voice_encoder=True, freeze_s3gen=True)
117
- 02/06/2026 05:10:16 - INFO - __main__ - Data parameters DataArguments(language='hi', dataset_dir=None, metadata_file=None, dataset_name='maddi99/bengali-banspeech', dataset_config_name=None, train_split_name='train', eval_split_name='validation', text_column_name='text_scribe', audio_column_name='audio', max_text_len=256, max_speech_len=800, audio_prompt_duration_s=3.0, eval_split_size=0.0002, preprocessing_num_workers=None, ignore_verifications=False)
118
- 02/06/2026 05:10:16 - INFO - __main__ - Loading ChatterboxTTS model...
119
- 02/06/2026 05:10:16 - INFO - __main__ - Loading model from Hugging Face Hub: ResembleAI/chatterbox
120
  /usr/local/lib/python3.13/site-packages/huggingface_hub/utils/_validators.py:202: UserWarning: The `local_dir_use_symlinks` argument is deprecated and ignored in `hf_hub_download`. Downloading to a local directory does not use symlinks anymore.
121
  warnings.warn(
122
- 02/06/2026 05:10:16 - INFO - httpx - HTTP Request: HEAD https://huggingface.co/ResembleAI/chatterbox/resolve/main/ve.safetensors "HTTP/1.1 302 Found"
123
- 02/06/2026 05:10:16 - INFO - httpx - HTTP Request: GET https://huggingface.co/api/models/ResembleAI/chatterbox/xet-read-token/05e904af2b5c7f8e482687a9d7336c5c824467d9 "HTTP/1.1 200 OK"
124
 
125
 
126
  ve.safetensors: 0%| | 0.00/5.70M [00:00<?, ?B/s]
127
- ve.safetensors: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 5.70M/5.70M [00:00<00:00, 38.2MB/s]
128
- 02/06/2026 05:10:16 - INFO - httpx - HTTP Request: HEAD https://huggingface.co/ResembleAI/chatterbox/resolve/main/t3_mtl23ls_v2.safetensors "HTTP/1.1 302 Found"
129
 
130
 
131
  t3_mtl23ls_v2.safetensors: 0%| | 0.00/2.14G [00:00<?, ?B/s]
132
 
133
- t3_mtl23ls_v2.safetensors: 0%| | 7.60M/2.14G [00:01<07:58, 4.47MB/s]
134
 
135
- t3_mtl23ls_v2.safetensors: 4%|β–Ž | 78.7M/2.14G [00:02<00:59, 34.6MB/s]
136
 
137
- t3_mtl23ls_v2.safetensors: 10%|β–ˆ | 221M/2.14G [00:03<00:27, 70.5MB/s] 
138
 
139
- t3_mtl23ls_v2.safetensors: 24%|β–ˆβ–ˆβ–Ž | 505M/2.14G [00:06<00:16, 101MB/s] 
140
 
141
- t3_mtl23ls_v2.safetensors: 30%|β–ˆβ–ˆβ–ˆ | 647M/2.14G [00:07<00:15, 97.3MB/s]
142
- t3_mtl23ls_v2.safetensors: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 2.14G/2.14G [00:08<00:00, 246MB/s]
143
- 02/06/2026 05:10:25 - INFO - httpx - HTTP Request: HEAD https://huggingface.co/ResembleAI/chatterbox/resolve/main/s3gen.safetensors "HTTP/1.1 302 Found"
144
 
145
 
146
  s3gen.safetensors: 0%| | 0.00/1.06G [00:00<?, ?B/s]
147
 
148
- s3gen.safetensors: 5%|▍ | 50.7M/1.06G [00:01<00:27, 36.9MB/s]
149
 
150
- s3gen.safetensors: 62%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 654M/1.06G [00:02<00:01, 311MB/s] 
151
- s3gen.safetensors: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 1.06G/1.06G [00:02<00:00, 391MB/s]
152
- 02/06/2026 05:10:28 - INFO - httpx - HTTP Request: HEAD https://huggingface.co/ResembleAI/chatterbox/resolve/main/mtl_tokenizer.json "HTTP/1.1 307 Temporary Redirect"
153
- 02/06/2026 05:10:28 - INFO - httpx - HTTP Request: HEAD https://huggingface.co/api/resolve-cache/models/ResembleAI/chatterbox/05e904af2b5c7f8e482687a9d7336c5c824467d9/mtl_tokenizer.json "HTTP/1.1 200 OK"
154
- 02/06/2026 05:10:28 - INFO - httpx - HTTP Request: GET https://huggingface.co/api/resolve-cache/models/ResembleAI/chatterbox/05e904af2b5c7f8e482687a9d7336c5c824467d9/mtl_tokenizer.json "HTTP/1.1 200 OK"
155
 
156
 
157
  mtl_tokenizer.json: 0%| | 0.00/68.1k [00:00<?, ?B/s]
158
- mtl_tokenizer.json: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 68.1k/68.1k [00:00<00:00, 145MB/s]
159
- 02/06/2026 05:10:28 - INFO - httpx - HTTP Request: HEAD https://huggingface.co/ResembleAI/chatterbox/resolve/main/conds.pt "HTTP/1.1 302 Found"
160
 
161
 
162
  conds.pt: 0%| | 0.00/107k [00:00<?, ?B/s]
163
- conds.pt: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 107k/107k [00:00<00:00, 1.30MB/s]
164
- Traceback (most recent call last):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
  File "/app/chatterbox-multilingual-finetuning/src/finetune_t3.py", line 848, in <module>
166
  main()
167
  ~~~~^^
168
  File "/app/chatterbox-multilingual-finetuning/src/finetune_t3.py", line 616, in main
169
  chatterbox_model = ChatterboxMultilingualTTS.from_pretrained(device="cpu")
170
- TypeError: ChatterboxMultilingualTTS.from_pretrained() got an unexpected keyword argument 'device'
 
 
 
 
 
 
1
 
2
  /usr/local/lib/python3.13/site-packages/perth/perth_net/__init__.py:1: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.
3
  from pkg_resources import resource_filename
4
+ 02/06/2026 05:27:31 - INFO - __main__ - Training/evaluation parameters CustomTrainingArguments(
5
  accelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None, 'use_configured_state': False},
6
  adam_beta1=0.9,
7
  adam_beta2=0.999,
 
113
  warmup_steps=1.0,
114
  weight_decay=0.0,
115
  )
116
+ 02/06/2026 05:27:31 - INFO - __main__ - Model parameters ModelArguments(model_name_or_path='ResembleAI/chatterbox', local_model_dir=None, cache_dir=None, freeze_voice_encoder=True, freeze_s3gen=True)
117
+ 02/06/2026 05:27:31 - INFO - __main__ - Data parameters DataArguments(language='bn', dataset_dir=None, metadata_file=None, dataset_name=' maddi99/bengali-banspeech', dataset_config_name=None, train_split_name='train', eval_split_name='validation', text_column_name='text_scribe', audio_column_name='audio', max_text_len=256, max_speech_len=800, audio_prompt_duration_s=3.0, eval_split_size=0.0002, preprocessing_num_workers=None, ignore_verifications=False)
118
+ 02/06/2026 05:27:31 - INFO - __main__ - Loading ChatterboxTTS model...
119
+ 02/06/2026 05:27:31 - INFO - __main__ - Loading model from Hugging Face Hub: ResembleAI/chatterbox
120
  /usr/local/lib/python3.13/site-packages/huggingface_hub/utils/_validators.py:202: UserWarning: The `local_dir_use_symlinks` argument is deprecated and ignored in `hf_hub_download`. Downloading to a local directory does not use symlinks anymore.
121
  warnings.warn(
122
+ 02/06/2026 05:27:31 - INFO - httpx - HTTP Request: HEAD https://huggingface.co/ResembleAI/chatterbox/resolve/main/ve.safetensors "HTTP/1.1 302 Found"
123
+ 02/06/2026 05:27:31 - INFO - httpx - HTTP Request: GET https://huggingface.co/api/models/ResembleAI/chatterbox/xet-read-token/05e904af2b5c7f8e482687a9d7336c5c824467d9 "HTTP/1.1 200 OK"
124
 
125
 
126
  ve.safetensors: 0%| | 0.00/5.70M [00:00<?, ?B/s]
127
+ ve.safetensors: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 5.70M/5.70M [00:00<00:00, 21.1MB/s]
128
+ 02/06/2026 05:27:32 - INFO - httpx - HTTP Request: HEAD https://huggingface.co/ResembleAI/chatterbox/resolve/main/t3_mtl23ls_v2.safetensors "HTTP/1.1 302 Found"
129
 
130
 
131
  t3_mtl23ls_v2.safetensors: 0%| | 0.00/2.14G [00:00<?, ?B/s]
132
 
133
+ t3_mtl23ls_v2.safetensors: 0%| | 7.60M/2.14G [00:01<07:04, 5.04MB/s]
134
 
135
+ t3_mtl23ls_v2.safetensors: 4%|▍ | 80.9M/2.14G [00:05<02:07, 16.2MB/s]
136
 
137
+ t3_mtl23ls_v2.safetensors: 27%|β–ˆβ–ˆβ–‹ | 579M/2.14G [00:06<00:13, 118MB/s] 
138
 
139
+ t3_mtl23ls_v2.safetensors: 37%|β–ˆβ–ˆβ–ˆβ–‹ | 792M/2.14G [00:07<00:10, 128MB/s]
140
 
141
+ t3_mtl23ls_v2.safetensors: 60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1.29G/2.14G [00:09<00:04, 211MB/s]
142
+ t3_mtl23ls_v2.safetensors: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 2.14G/2.14G [00:09<00:00, 221MB/s]
143
+ 02/06/2026 05:27:41 - INFO - httpx - HTTP Request: HEAD https://huggingface.co/ResembleAI/chatterbox/resolve/main/s3gen.safetensors "HTTP/1.1 302 Found"
144
 
145
 
146
  s3gen.safetensors: 0%| | 0.00/1.06G [00:00<?, ?B/s]
147
 
148
+ s3gen.safetensors: 6%|β–‹ | 67.0M/1.06G [00:01<00:29, 33.9MB/s]
149
 
150
+ s3gen.safetensors: 56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 587M/1.06G [00:03<00:02, 229MB/s] 
151
+ s3gen.safetensors: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 1.06G/1.06G [00:03<00:00, 322MB/s]
152
+ 02/06/2026 05:27:45 - INFO - httpx - HTTP Request: HEAD https://huggingface.co/ResembleAI/chatterbox/resolve/main/mtl_tokenizer.json "HTTP/1.1 307 Temporary Redirect"
153
+ 02/06/2026 05:27:45 - INFO - httpx - HTTP Request: HEAD https://huggingface.co/api/resolve-cache/models/ResembleAI/chatterbox/05e904af2b5c7f8e482687a9d7336c5c824467d9/mtl_tokenizer.json "HTTP/1.1 200 OK"
154
+ 02/06/2026 05:27:45 - INFO - httpx - HTTP Request: GET https://huggingface.co/api/resolve-cache/models/ResembleAI/chatterbox/05e904af2b5c7f8e482687a9d7336c5c824467d9/mtl_tokenizer.json "HTTP/1.1 200 OK"
155
 
156
 
157
  mtl_tokenizer.json: 0%| | 0.00/68.1k [00:00<?, ?B/s]
158
+ mtl_tokenizer.json: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 68.1k/68.1k [00:00<00:00, 134MB/s]
159
+ 02/06/2026 05:27:45 - INFO - httpx - HTTP Request: HEAD https://huggingface.co/ResembleAI/chatterbox/resolve/main/conds.pt "HTTP/1.1 302 Found"
160
 
161
 
162
  conds.pt: 0%| | 0.00/107k [00:00<?, ?B/s]
163
+ conds.pt: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 107k/107k [00:00<00:00, 1.31MB/s]
164
+ 02/06/2026 05:27:45 - INFO - httpx - HTTP Request: GET https://huggingface.co/api/models/ResembleAI/chatterbox/revision/main "HTTP/1.1 200 OK"
165
+
166
+
167
+ Downloading (incomplete total...): 0.00B [00:00, ?B/s]
168
+
169
+ Fetching 6 files: 0%| | 0/6 [00:00<?, ?it/s]02/06/2026 05:27:45 - INFO - httpx - HTTP Request: HEAD https://huggingface.co/ResembleAI/chatterbox/resolve/05e904af2b5c7f8e482687a9d7336c5c824467d9/Cangjie5_TC.json "HTTP/1.1 307 Temporary Redirect"
170
+ 02/06/2026 05:27:45 - INFO - httpx - HTTP Request: HEAD https://huggingface.co/ResembleAI/chatterbox/resolve/05e904af2b5c7f8e482687a9d7336c5c824467d9/conds.pt "HTTP/1.1 302 Found"
171
+ 02/06/2026 05:27:45 - INFO - httpx - HTTP Request: HEAD https://huggingface.co/ResembleAI/chatterbox/resolve/05e904af2b5c7f8e482687a9d7336c5c824467d9/s3gen.pt "HTTP/1.1 302 Found"
172
+ 02/06/2026 05:27:45 - INFO - httpx - HTTP Request: HEAD https://huggingface.co/ResembleAI/chatterbox/resolve/05e904af2b5c7f8e482687a9d7336c5c824467d9/ve.pt "HTTP/1.1 302 Found"
173
+ 02/06/2026 05:27:45 - INFO - httpx - HTTP Request: HEAD https://huggingface.co/ResembleAI/chatterbox/resolve/05e904af2b5c7f8e482687a9d7336c5c824467d9/t3_mtl23ls_v2.safetensors "HTTP/1.1 302 Found"
174
+ 02/06/2026 05:27:45 - INFO - httpx - HTTP Request: HEAD https://huggingface.co/ResembleAI/chatterbox/resolve/05e904af2b5c7f8e482687a9d7336c5c824467d9/grapheme_mtl_merged_expanded_v1.json "HTTP/1.1 307 Temporary Redirect"
175
+ 02/06/2026 05:27:45 - INFO - httpx - HTTP Request: HEAD https://huggingface.co/api/resolve-cache/models/ResembleAI/chatterbox/05e904af2b5c7f8e482687a9d7336c5c824467d9/Cangjie5_TC.json "HTTP/1.1 200 OK"
176
+
177
+
178
+ Downloading (incomplete total...): 0%| | 0.00/2.14G [00:00<?, ?B/s]
179
+
180
+ Downloading (incomplete total...): 0%| | 0.00/3.20G [00:00<?, ?B/s]
181
+
182
+ Downloading (incomplete total...): 0%| | 0.00/3.21G [00:00<?, ?B/s]
183
+
184
+ Downloading (incomplete total...): 0%| | 0.00/3.21G [00:00<?, ?B/s]02/06/2026 05:27:45 - INFO - httpx - HTTP Request: GET https://huggingface.co/api/resolve-cache/models/ResembleAI/chatterbox/05e904af2b5c7f8e482687a9d7336c5c824467d9/Cangjie5_TC.json "HTTP/1.1 200 OK"
185
+
186
+
187
+ Downloading (incomplete total...): 0%| | 0.00/3.21G [00:00<?, ?B/s]02/06/2026 05:27:45 - INFO - httpx - HTTP Request: HEAD https://huggingface.co/api/resolve-cache/models/ResembleAI/chatterbox/05e904af2b5c7f8e482687a9d7336c5c824467d9/grapheme_mtl_merged_expanded_v1.json "HTTP/1.1 200 OK"
188
+ 02/06/2026 05:27:45 - INFO - httpx - HTTP Request: GET https://huggingface.co/api/resolve-cache/models/ResembleAI/chatterbox/05e904af2b5c7f8e482687a9d7336c5c824467d9/grapheme_mtl_merged_expanded_v1.json "HTTP/1.1 200 OK"
189
+
190
+
191
+ Downloading (incomplete total...): 0%| | 1.92M/3.21G [00:00<02:36, 20.5MB/s]
192
+
193
+ Downloading (incomplete total...): 0%| | 15.4M/3.21G [00:01<05:28, 9.71MB/s]
194
+
195
+ Downloading (incomplete total...): 3%|β–Ž | 86.5M/3.21G [00:06<03:42, 14.0MB/s]
196
+
197
+ Downloading (incomplete total...): 5%|▍ | 158M/3.21G [00:07<02:13, 22.9MB/s] 
198
+
199
+ Downloading (incomplete total...): 9%|β–Š | 280M/3.21G [00:09<01:17, 37.6MB/s]
200
+
201
+ Downloading (incomplete total...): 28%|β–ˆβ–ˆβ–Š | 908M/3.21G [00:10<00:14, 155MB/s] 
202
+
203
+ Downloading (incomplete total...): 44%|β–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 1.40G/3.21G [00:11<00:08, 223MB/s]
204
+
205
+ Fetching 6 files: 67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 4/6 [00:12<00:06, 3.09s/it]
206
+
207
+ Downloading (incomplete total...): 87%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2.78G/3.21G [00:12<00:00, 493MB/s]
208
+ Fetching 6 files: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 6/6 [00:13<00:00, 2.20s/it]
209
+
210
+
211
+ Download complete: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 3.21G/3.21G [00:13<00:00, 493MB/s] Traceback (most recent call last):
212
  File "/app/chatterbox-multilingual-finetuning/src/finetune_t3.py", line 848, in <module>
213
  main()
214
  ~~~~^^
215
  File "/app/chatterbox-multilingual-finetuning/src/finetune_t3.py", line 616, in main
216
  chatterbox_model = ChatterboxMultilingualTTS.from_pretrained(device="cpu")
217
+ File "/app/chatterbox-multilingual-finetuning/src/chatterbox/mtl_tts.py", line 188, in from_pretrained
218
+ return cls.from_local(ckpt_dir, device)
219
+ ~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^
220
+ TypeError: ChatterboxMultilingualTTS.from_local() takes 2 positional arguments but 3 were given
221
+
222
+ Download complete: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 3.21G/3.21G [00:13<00:00, 239MB/s]