hypaai commited on
Commit
68541df
·
verified ·
1 Parent(s): a5e7fa6

Training in progress, step 742, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2b4f9dd35810f12923748d23e35764e5750ddb129ab09001d6715ee1e8ff7115
3
  size 187692184
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8cdacbd734c7cfb118194306ec5555eb3b914cf0396802811744ac319ae8c417
3
  size 187692184
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b2e5b19e34b0a8a70707fff10f280e22411e9090d5cec43ce4fb7b7265eec999
3
  size 14921099
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d465bff954eafc049f58779851244d37423037d3ab0cda46ca37f7614c9b8761
3
  size 14921099
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:30e1366c381bb5999ff6352e727b7113930f0b6f58258b41406c9717642b2b42
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd1b8e11a439755f89b031703828e2394b2f01fe6c8cf0ce4fa480ade641b1fb
3
  size 14645
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:30858f23bcb22d0baef45bd4add9d6fa474141308c12653c706077b87d932e49
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:310d4ff86f0a19ce65aaa92b0f116d7a6212129aa045ee2dbca2cd74a0f71639
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e35cd02002112061ce66dbfac8fcad9e2a69fb1c642d85e5afa7ee322f4e9dd5
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17de27c8ddf89af6f120bf81b9f82a5499bf088092eeb924c1c9ecb04d3351b5
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.0673854447439353,
6
  "eval_steps": 50,
7
- "global_step": 400,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -136,6 +136,102 @@
136
  "eval_steps_per_second": 9.246,
137
  "eval_wer": 105.19633626553006,
138
  "step": 400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
  }
140
  ],
141
  "logging_steps": 50,
@@ -150,12 +246,12 @@
150
  "should_evaluate": false,
151
  "should_log": false,
152
  "should_save": true,
153
- "should_training_stop": false
154
  },
155
  "attributes": {}
156
  }
157
  },
158
- "total_flos": 1.5297411022848e+19,
159
  "train_batch_size": 64,
160
  "trial_name": null,
161
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.4609164420485175,
6
  "eval_steps": 50,
7
+ "global_step": 742,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
136
  "eval_steps_per_second": 9.246,
137
  "eval_wer": 105.19633626553006,
138
  "step": 400
139
+ },
140
+ {
141
+ "epoch": 0.0673854447439353,
142
+ "grad_norm": 2.3205716609954834,
143
+ "learning_rate": 3.975576662143827e-05,
144
+ "loss": 8.901954345703125,
145
+ "step": 450
146
+ },
147
+ {
148
+ "epoch": 0.0673854447439353,
149
+ "eval_loss": 4.385607719421387,
150
+ "eval_runtime": 95.6681,
151
+ "eval_samples_per_second": 10.035,
152
+ "eval_steps_per_second": 10.035,
153
+ "eval_wer": 102.23088782080347,
154
+ "step": 450
155
+ },
156
+ {
157
+ "epoch": 0.1347708894878706,
158
+ "grad_norm": 1.2967826128005981,
159
+ "learning_rate": 3.297150610583447e-05,
160
+ "loss": 8.103807983398438,
161
+ "step": 500
162
+ },
163
+ {
164
+ "epoch": 0.1347708894878706,
165
+ "eval_loss": 4.036848068237305,
166
+ "eval_runtime": 90.525,
167
+ "eval_samples_per_second": 10.605,
168
+ "eval_steps_per_second": 10.605,
169
+ "eval_wer": 99.10220368187177,
170
+ "step": 500
171
+ },
172
+ {
173
+ "epoch": 0.20215633423180593,
174
+ "grad_norm": 2.0856359004974365,
175
+ "learning_rate": 2.6187245590230662e-05,
176
+ "loss": 9.242673950195313,
177
+ "step": 550
178
+ },
179
+ {
180
+ "epoch": 0.20215633423180593,
181
+ "eval_loss": 3.9937756061553955,
182
+ "eval_runtime": 90.3336,
183
+ "eval_samples_per_second": 10.627,
184
+ "eval_steps_per_second": 10.627,
185
+ "eval_wer": 98.02303437018229,
186
+ "step": 550
187
+ },
188
+ {
189
+ "epoch": 0.2695417789757412,
190
+ "grad_norm": 1.1551530361175537,
191
+ "learning_rate": 1.9402985074626868e-05,
192
+ "loss": 7.580842895507812,
193
+ "step": 600
194
+ },
195
+ {
196
+ "epoch": 0.2695417789757412,
197
+ "eval_loss": 3.8142902851104736,
198
+ "eval_runtime": 90.2808,
199
+ "eval_samples_per_second": 10.633,
200
+ "eval_steps_per_second": 10.633,
201
+ "eval_wer": 95.92817629454974,
202
+ "step": 600
203
+ },
204
+ {
205
+ "epoch": 0.33692722371967654,
206
+ "grad_norm": 1.4342340230941772,
207
+ "learning_rate": 1.2618724559023069e-05,
208
+ "loss": 9.35689208984375,
209
+ "step": 650
210
+ },
211
+ {
212
+ "epoch": 0.33692722371967654,
213
+ "eval_loss": 4.0508809089660645,
214
+ "eval_runtime": 89.1496,
215
+ "eval_samples_per_second": 10.768,
216
+ "eval_steps_per_second": 10.768,
217
+ "eval_wer": 98.15906411535322,
218
+ "step": 650
219
+ },
220
+ {
221
+ "epoch": 0.40431266846361186,
222
+ "grad_norm": 1.9096912145614624,
223
+ "learning_rate": 5.834464043419268e-06,
224
+ "loss": 9.176519775390625,
225
+ "step": 700
226
+ },
227
+ {
228
+ "epoch": 0.40431266846361186,
229
+ "eval_loss": 3.9893789291381836,
230
+ "eval_runtime": 90.4288,
231
+ "eval_samples_per_second": 10.616,
232
+ "eval_steps_per_second": 10.616,
233
+ "eval_wer": 96.53577582297996,
234
+ "step": 700
235
  }
236
  ],
237
  "logging_steps": 50,
 
246
  "should_evaluate": false,
247
  "should_log": false,
248
  "should_save": true,
249
+ "should_training_stop": true
250
  },
251
  "attributes": {}
252
  }
253
  },
254
+ "total_flos": 2.837669744738304e+19,
255
  "train_batch_size": 64,
256
  "trial_name": null,
257
  "trial_params": null