omarelshehy commited on
Commit
b51e8f7
·
verified ·
1 Parent(s): b643fd7

Upload folder using huggingface_hub

Browse files
all_results.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.9932075471698112,
3
+ "eval_runtime": 0.8671,
4
+ "eval_samples_per_second": 6.919,
5
+ "eval_steps_per_second": 1.153,
6
+ "total_flos": 0.0,
7
+ "train_loss": 3.8633889573993105,
8
+ "train_runtime": 2643.5458,
9
+ "train_samples_per_second": 8.02,
10
+ "train_steps_per_second": 0.125
11
+ }
eval_results.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.9932075471698112,
3
+ "eval_runtime": 0.8671,
4
+ "eval_samples_per_second": 6.919,
5
+ "eval_steps_per_second": 1.153
6
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03280e5c7cdcd527337ae5509dcf44f5212259b74fdd465695ed4e04103a2604
3
+ size 2143990656
t3_full_finetuned.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5967346b10f31d670dca74905451f68bf617d770c6e6cda026f5b86703609440
3
+ size 2143989752
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.9932075471698112,
3
+ "total_flos": 0.0,
4
+ "train_loss": 3.8633889573993105,
5
+ "train_runtime": 2643.5458,
6
+ "train_samples_per_second": 8.02,
7
+ "train_steps_per_second": 0.125
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,273 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.9932075471698112,
5
+ "eval_steps": 500.0,
6
+ "global_step": 330,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.06037735849056604,
13
+ "grad_norm": 102.50332641601562,
14
+ "learning_rate": 3e-06,
15
+ "loss": 5.0564,
16
+ "step": 10
17
+ },
18
+ {
19
+ "epoch": 0.12075471698113208,
20
+ "grad_norm": 100.99700164794922,
21
+ "learning_rate": 8.000000000000001e-06,
22
+ "loss": 4.6092,
23
+ "step": 20
24
+ },
25
+ {
26
+ "epoch": 0.1811320754716981,
27
+ "grad_norm": 79.72400665283203,
28
+ "learning_rate": 1.3000000000000001e-05,
29
+ "loss": 4.456,
30
+ "step": 30
31
+ },
32
+ {
33
+ "epoch": 0.24150943396226415,
34
+ "grad_norm": 73.68656921386719,
35
+ "learning_rate": 1.8e-05,
36
+ "loss": 4.3422,
37
+ "step": 40
38
+ },
39
+ {
40
+ "epoch": 0.3018867924528302,
41
+ "grad_norm": 63.52485275268555,
42
+ "learning_rate": 2.3000000000000003e-05,
43
+ "loss": 4.0122,
44
+ "step": 50
45
+ },
46
+ {
47
+ "epoch": 0.3622641509433962,
48
+ "grad_norm": 81.21367645263672,
49
+ "learning_rate": 2.8000000000000003e-05,
50
+ "loss": 4.1135,
51
+ "step": 60
52
+ },
53
+ {
54
+ "epoch": 0.4226415094339623,
55
+ "grad_norm": 53.12013244628906,
56
+ "learning_rate": 3.3e-05,
57
+ "loss": 3.8562,
58
+ "step": 70
59
+ },
60
+ {
61
+ "epoch": 0.4830188679245283,
62
+ "grad_norm": 66.63668823242188,
63
+ "learning_rate": 3.8e-05,
64
+ "loss": 4.2626,
65
+ "step": 80
66
+ },
67
+ {
68
+ "epoch": 0.5433962264150943,
69
+ "grad_norm": 64.65432739257812,
70
+ "learning_rate": 4.3e-05,
71
+ "loss": 4.0323,
72
+ "step": 90
73
+ },
74
+ {
75
+ "epoch": 0.6037735849056604,
76
+ "grad_norm": 58.95002365112305,
77
+ "learning_rate": 4.8e-05,
78
+ "loss": 4.1304,
79
+ "step": 100
80
+ },
81
+ {
82
+ "epoch": 0.6641509433962264,
83
+ "grad_norm": 71.38043975830078,
84
+ "learning_rate": 4.8695652173913046e-05,
85
+ "loss": 3.8795,
86
+ "step": 110
87
+ },
88
+ {
89
+ "epoch": 0.7245283018867924,
90
+ "grad_norm": 57.298316955566406,
91
+ "learning_rate": 4.6521739130434785e-05,
92
+ "loss": 4.0055,
93
+ "step": 120
94
+ },
95
+ {
96
+ "epoch": 0.7849056603773585,
97
+ "grad_norm": 59.41974639892578,
98
+ "learning_rate": 4.4347826086956525e-05,
99
+ "loss": 4.1426,
100
+ "step": 130
101
+ },
102
+ {
103
+ "epoch": 0.8452830188679246,
104
+ "grad_norm": 58.00967788696289,
105
+ "learning_rate": 4.2173913043478264e-05,
106
+ "loss": 4.0285,
107
+ "step": 140
108
+ },
109
+ {
110
+ "epoch": 0.9056603773584906,
111
+ "grad_norm": 63.804954528808594,
112
+ "learning_rate": 4e-05,
113
+ "loss": 4.1778,
114
+ "step": 150
115
+ },
116
+ {
117
+ "epoch": 0.9660377358490566,
118
+ "grad_norm": 61.9858512878418,
119
+ "learning_rate": 3.7826086956521736e-05,
120
+ "loss": 4.2706,
121
+ "step": 160
122
+ },
123
+ {
124
+ "epoch": 1.0271698113207548,
125
+ "grad_norm": 56.96372985839844,
126
+ "learning_rate": 3.565217391304348e-05,
127
+ "loss": 3.7699,
128
+ "step": 170
129
+ },
130
+ {
131
+ "epoch": 1.0875471698113208,
132
+ "grad_norm": 70.83443450927734,
133
+ "learning_rate": 3.347826086956522e-05,
134
+ "loss": 3.7837,
135
+ "step": 180
136
+ },
137
+ {
138
+ "epoch": 1.1479245283018868,
139
+ "grad_norm": 61.61659240722656,
140
+ "learning_rate": 3.130434782608696e-05,
141
+ "loss": 3.6811,
142
+ "step": 190
143
+ },
144
+ {
145
+ "epoch": 1.2083018867924529,
146
+ "grad_norm": 63.96455001831055,
147
+ "learning_rate": 2.9130434782608696e-05,
148
+ "loss": 3.7169,
149
+ "step": 200
150
+ },
151
+ {
152
+ "epoch": 1.268679245283019,
153
+ "grad_norm": 54.126808166503906,
154
+ "learning_rate": 2.6956521739130436e-05,
155
+ "loss": 3.4802,
156
+ "step": 210
157
+ },
158
+ {
159
+ "epoch": 1.329056603773585,
160
+ "grad_norm": 66.76802062988281,
161
+ "learning_rate": 2.4782608695652175e-05,
162
+ "loss": 3.7544,
163
+ "step": 220
164
+ },
165
+ {
166
+ "epoch": 1.389433962264151,
167
+ "grad_norm": 62.32306671142578,
168
+ "learning_rate": 2.2608695652173914e-05,
169
+ "loss": 3.2619,
170
+ "step": 230
171
+ },
172
+ {
173
+ "epoch": 1.449811320754717,
174
+ "grad_norm": 70.75401306152344,
175
+ "learning_rate": 2.0434782608695654e-05,
176
+ "loss": 3.4987,
177
+ "step": 240
178
+ },
179
+ {
180
+ "epoch": 1.510188679245283,
181
+ "grad_norm": 75.13993072509766,
182
+ "learning_rate": 1.8260869565217393e-05,
183
+ "loss": 3.4636,
184
+ "step": 250
185
+ },
186
+ {
187
+ "epoch": 1.570566037735849,
188
+ "grad_norm": 75.18315887451172,
189
+ "learning_rate": 1.608695652173913e-05,
190
+ "loss": 3.4056,
191
+ "step": 260
192
+ },
193
+ {
194
+ "epoch": 1.630943396226415,
195
+ "grad_norm": 65.60991668701172,
196
+ "learning_rate": 1.391304347826087e-05,
197
+ "loss": 3.5469,
198
+ "step": 270
199
+ },
200
+ {
201
+ "epoch": 1.691320754716981,
202
+ "grad_norm": 61.672264099121094,
203
+ "learning_rate": 1.173913043478261e-05,
204
+ "loss": 3.4558,
205
+ "step": 280
206
+ },
207
+ {
208
+ "epoch": 1.751698113207547,
209
+ "grad_norm": 80.44284057617188,
210
+ "learning_rate": 9.565217391304349e-06,
211
+ "loss": 3.644,
212
+ "step": 290
213
+ },
214
+ {
215
+ "epoch": 1.8120754716981131,
216
+ "grad_norm": 63.004974365234375,
217
+ "learning_rate": 7.391304347826088e-06,
218
+ "loss": 3.2974,
219
+ "step": 300
220
+ },
221
+ {
222
+ "epoch": 1.8724528301886791,
223
+ "grad_norm": 63.83254623413086,
224
+ "learning_rate": 5.217391304347826e-06,
225
+ "loss": 3.4935,
226
+ "step": 310
227
+ },
228
+ {
229
+ "epoch": 1.9328301886792452,
230
+ "grad_norm": 77.79533386230469,
231
+ "learning_rate": 3.0434782608695654e-06,
232
+ "loss": 3.1741,
233
+ "step": 320
234
+ },
235
+ {
236
+ "epoch": 1.9932075471698112,
237
+ "grad_norm": 54.132991790771484,
238
+ "learning_rate": 8.695652173913044e-07,
239
+ "loss": 3.6886,
240
+ "step": 330
241
+ },
242
+ {
243
+ "epoch": 1.9932075471698112,
244
+ "step": 330,
245
+ "total_flos": 0.0,
246
+ "train_loss": 3.8633889573993105,
247
+ "train_runtime": 2643.5458,
248
+ "train_samples_per_second": 8.02,
249
+ "train_steps_per_second": 0.125
250
+ }
251
+ ],
252
+ "logging_steps": 10,
253
+ "max_steps": 330,
254
+ "num_input_tokens_seen": 0,
255
+ "num_train_epochs": 2,
256
+ "save_steps": 500,
257
+ "stateful_callbacks": {
258
+ "TrainerControl": {
259
+ "args": {
260
+ "should_epoch_stop": false,
261
+ "should_evaluate": false,
262
+ "should_log": false,
263
+ "should_save": true,
264
+ "should_training_stop": true
265
+ },
266
+ "attributes": {}
267
+ }
268
+ },
269
+ "total_flos": 0.0,
270
+ "train_batch_size": 4,
271
+ "trial_name": null,
272
+ "trial_params": null
273
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49acf4bb8926bbd127842d3bcffe2e9347353bf753a309caaae7398506c8c0c4
3
+ size 5304
training_log.txt ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ================================================================================
2
+ ChatterboxMultilingualTTS Full Fine-Tuning Log
3
+ ================================================================================
4
+
5
+ Training started at step 0
6
+ Total steps: 330
7
+ Epochs: 2.0
8
+ --------------------------------------------------------------------------------
9
+
10
+ Step 10: loss=5.0564 lr=3.00e-06 epoch=0.06
11
+ Step 20: loss=4.6092 lr=8.00e-06 epoch=0.12
12
+ Step 30: loss=4.4560 lr=1.30e-05 epoch=0.18
13
+ Step 40: loss=4.3422 lr=1.80e-05 epoch=0.24
14
+ Step 50: loss=4.0122 lr=2.30e-05 epoch=0.30
15
+ Step 60: loss=4.1135 lr=2.80e-05 epoch=0.36
16
+ Step 70: loss=3.8562 lr=3.30e-05 epoch=0.42
17
+ Step 80: loss=4.2626 lr=3.80e-05 epoch=0.48
18
+ Step 90: loss=4.0323 lr=4.30e-05 epoch=0.54
19
+ Completed step 100/330
20
+ Step 100: loss=4.1304 lr=4.80e-05 epoch=0.60
21
+ Step 110: loss=3.8795 lr=4.87e-05 epoch=0.66
22
+ Step 120: loss=4.0055 lr=4.65e-05 epoch=0.72
23
+ Step 130: loss=4.1426 lr=4.43e-05 epoch=0.78
24
+ Step 140: loss=4.0285 lr=4.22e-05 epoch=0.85
25
+ Step 150: loss=4.1778 lr=4.00e-05 epoch=0.91
26
+ Step 160: loss=4.2706 lr=3.78e-05 epoch=0.97
27
+ Step 170: loss=3.7699 lr=3.57e-05 epoch=1.03
28
+ Step 180: loss=3.7837 lr=3.35e-05 epoch=1.09
29
+ Step 190: loss=3.6811 lr=3.13e-05 epoch=1.15
30
+ Completed step 200/330
31
+ Step 200: loss=3.7169 lr=2.91e-05 epoch=1.21
32
+ Step 210: loss=3.4802 lr=2.70e-05 epoch=1.27
33
+ Step 220: loss=3.7544 lr=2.48e-05 epoch=1.33
34
+ Step 230: loss=3.2619 lr=2.26e-05 epoch=1.39
35
+ Step 240: loss=3.4987 lr=2.04e-05 epoch=1.45
36
+ Step 250: loss=3.4636 lr=1.83e-05 epoch=1.51
37
+ Step 260: loss=3.4056 lr=1.61e-05 epoch=1.57
38
+ Step 270: loss=3.5469 lr=1.39e-05 epoch=1.63
39
+ Step 280: loss=3.4558 lr=1.17e-05 epoch=1.69
40
+ Step 290: loss=3.6440 lr=9.57e-06 epoch=1.75
41
+ Completed step 300/330
42
+ Step 300: loss=3.2974 lr=7.39e-06 epoch=1.81
43
+ Step 310: loss=3.4935 lr=5.22e-06 epoch=1.87
44
+ Step 320: loss=3.1741 lr=3.04e-06 epoch=1.93
45
+ Step 330: loss=3.6886 lr=8.70e-07 epoch=1.99
46
+ Checkpoint saved at step 330
47
+ Step 330: epoch=1.99
48
+
49
+ ================================================================================
50
+ Training completed!
51
+ Final step: 330
52
+ ================================================================================
53
+ Step 330: epoch=1.99