NairaRahim commited on
Commit
54c0d08
·
verified ·
1 Parent(s): 2e77195

Training in progress, epoch 4, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c450bc0ab6aae2aa695b08c5f17070da86a392ef1d21ad63fdcff23b36b0281c
3
  size 1227009528
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43744e9d3a7df899c77712de3afb6af1a054747752266c81e6c564a6bbdfc9fc
3
  size 1227009528
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0cd17e14a85b790579fc23c4dffeabc9df3be8e2f4b9762f22e4935e57438ad2
3
  size 2454133690
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9dc38bc58189826542c01bb812237dd78de2565f1b21ebc12593e6867e65ffec
3
  size 2454133690
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bff8dada29d1f5265289e75197b18e7b964b1af6e44a0a6b6522b1cf938eb114
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd03df90c7c1260e5c9a0b8fad9ec21a69a6cc6367e61c044d90f7a2513787fb
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b374d94603410fd6652078786e8573cde53c3c6aef9163768dada58a03a48fd5
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e03a74488d48b3a98579050f742070bcb62d3183a7aab3987e0d0c9c802d894
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 35.00273132324219,
3
- "best_model_checkpoint": "/kaggle/working/output/checkpoint-3915",
4
- "epoch": 3.0,
5
  "eval_steps": 500,
6
- "global_step": 3915,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -304,6 +304,105 @@
304
  "eval_samples_per_second": 26.458,
305
  "eval_steps_per_second": 3.325,
306
  "step": 3915
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
307
  }
308
  ],
309
  "logging_steps": 100,
@@ -332,7 +431,7 @@
332
  "attributes": {}
333
  }
334
  },
335
- "total_flos": 4221932709141504.0,
336
  "train_batch_size": 8,
337
  "trial_name": null,
338
  "trial_params": null
 
1
  {
2
+ "best_metric": 34.954986572265625,
3
+ "best_model_checkpoint": "/kaggle/working/output/checkpoint-5220",
4
+ "epoch": 4.0,
5
  "eval_steps": 500,
6
+ "global_step": 5220,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
304
  "eval_samples_per_second": 26.458,
305
  "eval_steps_per_second": 3.325,
306
  "step": 3915
307
+ },
308
+ {
309
+ "epoch": 3.0651340996168583,
310
+ "grad_norm": 2.8833682537078857,
311
+ "learning_rate": 4.808477011494253e-05,
312
+ "loss": 34.523,
313
+ "step": 4000
314
+ },
315
+ {
316
+ "epoch": 3.1417624521072796,
317
+ "grad_norm": 2.8744261264801025,
318
+ "learning_rate": 4.803735632183908e-05,
319
+ "loss": 33.921,
320
+ "step": 4100
321
+ },
322
+ {
323
+ "epoch": 3.218390804597701,
324
+ "grad_norm": 2.928616762161255,
325
+ "learning_rate": 4.798946360153257e-05,
326
+ "loss": 33.6903,
327
+ "step": 4200
328
+ },
329
+ {
330
+ "epoch": 3.2950191570881224,
331
+ "grad_norm": 3.0579280853271484,
332
+ "learning_rate": 4.7941570881226054e-05,
333
+ "loss": 33.0608,
334
+ "step": 4300
335
+ },
336
+ {
337
+ "epoch": 3.371647509578544,
338
+ "grad_norm": 1.6688510179519653,
339
+ "learning_rate": 4.789367816091954e-05,
340
+ "loss": 33.8769,
341
+ "step": 4400
342
+ },
343
+ {
344
+ "epoch": 3.4482758620689653,
345
+ "grad_norm": 2.6190459728240967,
346
+ "learning_rate": 4.784578544061303e-05,
347
+ "loss": 33.2974,
348
+ "step": 4500
349
+ },
350
+ {
351
+ "epoch": 3.524904214559387,
352
+ "grad_norm": 2.6260671615600586,
353
+ "learning_rate": 4.7797892720306515e-05,
354
+ "loss": 34.0589,
355
+ "step": 4600
356
+ },
357
+ {
358
+ "epoch": 3.6015325670498086,
359
+ "grad_norm": 3.191978693008423,
360
+ "learning_rate": 4.775e-05,
361
+ "loss": 33.9493,
362
+ "step": 4700
363
+ },
364
+ {
365
+ "epoch": 3.67816091954023,
366
+ "grad_norm": 2.759941339492798,
367
+ "learning_rate": 4.770210727969349e-05,
368
+ "loss": 33.5936,
369
+ "step": 4800
370
+ },
371
+ {
372
+ "epoch": 3.7547892720306515,
373
+ "grad_norm": 2.262294054031372,
374
+ "learning_rate": 4.7654214559386976e-05,
375
+ "loss": 34.06,
376
+ "step": 4900
377
+ },
378
+ {
379
+ "epoch": 3.8314176245210727,
380
+ "grad_norm": 4.6808600425720215,
381
+ "learning_rate": 4.760632183908046e-05,
382
+ "loss": 34.1592,
383
+ "step": 5000
384
+ },
385
+ {
386
+ "epoch": 3.9080459770114944,
387
+ "grad_norm": 4.294464111328125,
388
+ "learning_rate": 4.755842911877395e-05,
389
+ "loss": 34.4652,
390
+ "step": 5100
391
+ },
392
+ {
393
+ "epoch": 3.9846743295019156,
394
+ "grad_norm": 2.7845072746276855,
395
+ "learning_rate": 4.7510536398467436e-05,
396
+ "loss": 34.2075,
397
+ "step": 5200
398
+ },
399
+ {
400
+ "epoch": 4.0,
401
+ "eval_loss": 34.954986572265625,
402
+ "eval_runtime": 49.2865,
403
+ "eval_samples_per_second": 26.478,
404
+ "eval_steps_per_second": 3.327,
405
+ "step": 5220
406
  }
407
  ],
408
  "logging_steps": 100,
 
431
  "attributes": {}
432
  }
433
  },
434
+ "total_flos": 5629243612188672.0,
435
  "train_batch_size": 8,
436
  "trial_name": null,
437
  "trial_params": null