NairaRahim commited on
Commit
2297f9d
·
verified ·
1 Parent(s): 6268252

Training in progress, epoch 3, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bf382415b8b4e319747d4c583bb2c22fdcb33f73e65a25fb65dde51f022f0b3a
3
  size 1227009528
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c450bc0ab6aae2aa695b08c5f17070da86a392ef1d21ad63fdcff23b36b0281c
3
  size 1227009528
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eb172aa8a3695c37694e8b016fb08371ff016bf0005d26a5ad71d0066147ef06
3
  size 2454133690
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0cd17e14a85b790579fc23c4dffeabc9df3be8e2f4b9762f22e4935e57438ad2
3
  size 2454133690
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fda2382d9098c76a47d94697adf0e77400f7b6a3f4a525cb2f195f30c9813189
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bff8dada29d1f5265289e75197b18e7b964b1af6e44a0a6b6522b1cf938eb114
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2590224ad91e1be553a4c5db4bec4d60b1f52733b47fba11c73ce6465c9447e8
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b374d94603410fd6652078786e8573cde53c3c6aef9163768dada58a03a48fd5
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 35.211631774902344,
3
- "best_model_checkpoint": "/kaggle/working/output/checkpoint-2610",
4
- "epoch": 2.0,
5
  "eval_steps": 500,
6
- "global_step": 2610,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -205,6 +205,105 @@
205
  "eval_samples_per_second": 26.452,
206
  "eval_steps_per_second": 3.324,
207
  "step": 2610
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
208
  }
209
  ],
210
  "logging_steps": 100,
@@ -233,7 +332,7 @@
233
  "attributes": {}
234
  }
235
  },
236
- "total_flos": 2814621806094336.0,
237
  "train_batch_size": 8,
238
  "trial_name": null,
239
  "trial_params": null
 
1
  {
2
+ "best_metric": 35.00273132324219,
3
+ "best_model_checkpoint": "/kaggle/working/output/checkpoint-3915",
4
+ "epoch": 3.0,
5
  "eval_steps": 500,
6
+ "global_step": 3915,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
205
  "eval_samples_per_second": 26.452,
206
  "eval_steps_per_second": 3.324,
207
  "step": 2610
208
+ },
209
+ {
210
+ "epoch": 2.0689655172413794,
211
+ "grad_norm": 6.074384689331055,
212
+ "learning_rate": 4.8707375478927206e-05,
213
+ "loss": 33.6587,
214
+ "step": 2700
215
+ },
216
+ {
217
+ "epoch": 2.1455938697318007,
218
+ "grad_norm": 3.770009994506836,
219
+ "learning_rate": 4.865948275862069e-05,
220
+ "loss": 34.5023,
221
+ "step": 2800
222
+ },
223
+ {
224
+ "epoch": 2.2222222222222223,
225
+ "grad_norm": 4.6336140632629395,
226
+ "learning_rate": 4.861159003831418e-05,
227
+ "loss": 34.1806,
228
+ "step": 2900
229
+ },
230
+ {
231
+ "epoch": 2.2988505747126435,
232
+ "grad_norm": 5.440792083740234,
233
+ "learning_rate": 4.856369731800767e-05,
234
+ "loss": 34.6645,
235
+ "step": 3000
236
+ },
237
+ {
238
+ "epoch": 2.375478927203065,
239
+ "grad_norm": 2.98138165473938,
240
+ "learning_rate": 4.8515804597701154e-05,
241
+ "loss": 34.1371,
242
+ "step": 3100
243
+ },
244
+ {
245
+ "epoch": 2.4521072796934864,
246
+ "grad_norm": 2.4175803661346436,
247
+ "learning_rate": 4.846791187739464e-05,
248
+ "loss": 33.8015,
249
+ "step": 3200
250
+ },
251
+ {
252
+ "epoch": 2.528735632183908,
253
+ "grad_norm": 3.846370220184326,
254
+ "learning_rate": 4.842001915708813e-05,
255
+ "loss": 34.0589,
256
+ "step": 3300
257
+ },
258
+ {
259
+ "epoch": 2.6053639846743293,
260
+ "grad_norm": 4.001793384552002,
261
+ "learning_rate": 4.8372126436781614e-05,
262
+ "loss": 33.7327,
263
+ "step": 3400
264
+ },
265
+ {
266
+ "epoch": 2.681992337164751,
267
+ "grad_norm": 3.7779624462127686,
268
+ "learning_rate": 4.83242337164751e-05,
269
+ "loss": 34.3508,
270
+ "step": 3500
271
+ },
272
+ {
273
+ "epoch": 2.7586206896551726,
274
+ "grad_norm": 3.5112695693969727,
275
+ "learning_rate": 4.827634099616858e-05,
276
+ "loss": 33.5653,
277
+ "step": 3600
278
+ },
279
+ {
280
+ "epoch": 2.835249042145594,
281
+ "grad_norm": 2.3443048000335693,
282
+ "learning_rate": 4.822844827586207e-05,
283
+ "loss": 33.798,
284
+ "step": 3700
285
+ },
286
+ {
287
+ "epoch": 2.9118773946360155,
288
+ "grad_norm": 2.5035479068756104,
289
+ "learning_rate": 4.8180555555555555e-05,
290
+ "loss": 33.4353,
291
+ "step": 3800
292
+ },
293
+ {
294
+ "epoch": 2.9885057471264367,
295
+ "grad_norm": 3.4322028160095215,
296
+ "learning_rate": 4.813266283524904e-05,
297
+ "loss": 33.948,
298
+ "step": 3900
299
+ },
300
+ {
301
+ "epoch": 3.0,
302
+ "eval_loss": 35.00273132324219,
303
+ "eval_runtime": 49.3242,
304
+ "eval_samples_per_second": 26.458,
305
+ "eval_steps_per_second": 3.325,
306
+ "step": 3915
307
  }
308
  ],
309
  "logging_steps": 100,
 
332
  "attributes": {}
333
  }
334
  },
335
+ "total_flos": 4221932709141504.0,
336
  "train_batch_size": 8,
337
  "trial_name": null,
338
  "trial_params": null