NairaRahim commited on
Commit
b96df0a
·
verified ·
1 Parent(s): 4faa57b

Training in progress, epoch 5, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:43744e9d3a7df899c77712de3afb6af1a054747752266c81e6c564a6bbdfc9fc
3
  size 1227009528
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2968fae491fefd2f9a431c5a0bf13b850a49d5465bf2f3ab25c45e33c0a41886
3
  size 1227009528
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9dc38bc58189826542c01bb812237dd78de2565f1b21ebc12593e6867e65ffec
3
  size 2454133690
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:938bb6b2c29f2be8620725e1d9819dff7d9e79ec433558bbd6ac24951cd0c258
3
  size 2454133690
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bd03df90c7c1260e5c9a0b8fad9ec21a69a6cc6367e61c044d90f7a2513787fb
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:feb257991d06d0ad08909803a2d396d17f96d7f13a21d29dde85f6747c2c6f53
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3e03a74488d48b3a98579050f742070bcb62d3183a7aab3987e0d0c9c802d894
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34c587e6b31550b01ed65ddb502a6dbeb722c15426dc145d4dd3a0afea5fb120
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 34.954986572265625,
3
- "best_model_checkpoint": "/kaggle/working/output/checkpoint-5220",
4
- "epoch": 4.0,
5
  "eval_steps": 500,
6
- "global_step": 5220,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -403,6 +403,105 @@
403
  "eval_samples_per_second": 26.478,
404
  "eval_steps_per_second": 3.327,
405
  "step": 5220
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
406
  }
407
  ],
408
  "logging_steps": 100,
@@ -431,7 +530,7 @@
431
  "attributes": {}
432
  }
433
  },
434
- "total_flos": 5629243612188672.0,
435
  "train_batch_size": 8,
436
  "trial_name": null,
437
  "trial_params": null
 
1
  {
2
+ "best_metric": 34.924800872802734,
3
+ "best_model_checkpoint": "/kaggle/working/output/checkpoint-6525",
4
+ "epoch": 5.0,
5
  "eval_steps": 500,
6
+ "global_step": 6525,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
403
  "eval_samples_per_second": 26.478,
404
  "eval_steps_per_second": 3.327,
405
  "step": 5220
406
+ },
407
+ {
408
+ "epoch": 4.061302681992337,
409
+ "grad_norm": 4.420943260192871,
410
+ "learning_rate": 4.746264367816092e-05,
411
+ "loss": 34.5735,
412
+ "step": 5300
413
+ },
414
+ {
415
+ "epoch": 4.137931034482759,
416
+ "grad_norm": 2.898287534713745,
417
+ "learning_rate": 4.741475095785441e-05,
418
+ "loss": 34.0739,
419
+ "step": 5400
420
+ },
421
+ {
422
+ "epoch": 4.21455938697318,
423
+ "grad_norm": 4.703996658325195,
424
+ "learning_rate": 4.73668582375479e-05,
425
+ "loss": 33.7022,
426
+ "step": 5500
427
+ },
428
+ {
429
+ "epoch": 4.291187739463601,
430
+ "grad_norm": 2.2913658618927,
431
+ "learning_rate": 4.7318965517241384e-05,
432
+ "loss": 33.6581,
433
+ "step": 5600
434
+ },
435
+ {
436
+ "epoch": 4.3678160919540225,
437
+ "grad_norm": 3.895615339279175,
438
+ "learning_rate": 4.727107279693487e-05,
439
+ "loss": 34.0314,
440
+ "step": 5700
441
+ },
442
+ {
443
+ "epoch": 4.444444444444445,
444
+ "grad_norm": 4.635524749755859,
445
+ "learning_rate": 4.722318007662835e-05,
446
+ "loss": 34.5266,
447
+ "step": 5800
448
+ },
449
+ {
450
+ "epoch": 4.521072796934866,
451
+ "grad_norm": 3.451066017150879,
452
+ "learning_rate": 4.717528735632184e-05,
453
+ "loss": 33.1786,
454
+ "step": 5900
455
+ },
456
+ {
457
+ "epoch": 4.597701149425287,
458
+ "grad_norm": 2.552107810974121,
459
+ "learning_rate": 4.7127394636015325e-05,
460
+ "loss": 33.6118,
461
+ "step": 6000
462
+ },
463
+ {
464
+ "epoch": 4.674329501915709,
465
+ "grad_norm": 2.359786033630371,
466
+ "learning_rate": 4.707998084291188e-05,
467
+ "loss": 33.9903,
468
+ "step": 6100
469
+ },
470
+ {
471
+ "epoch": 4.75095785440613,
472
+ "grad_norm": 2.2611875534057617,
473
+ "learning_rate": 4.703208812260537e-05,
474
+ "loss": 34.0762,
475
+ "step": 6200
476
+ },
477
+ {
478
+ "epoch": 4.827586206896552,
479
+ "grad_norm": 1.8199210166931152,
480
+ "learning_rate": 4.698419540229885e-05,
481
+ "loss": 33.6635,
482
+ "step": 6300
483
+ },
484
+ {
485
+ "epoch": 4.904214559386973,
486
+ "grad_norm": 2.7332305908203125,
487
+ "learning_rate": 4.693630268199234e-05,
488
+ "loss": 33.0946,
489
+ "step": 6400
490
+ },
491
+ {
492
+ "epoch": 4.980842911877395,
493
+ "grad_norm": 2.9454078674316406,
494
+ "learning_rate": 4.6888409961685824e-05,
495
+ "loss": 33.9173,
496
+ "step": 6500
497
+ },
498
+ {
499
+ "epoch": 5.0,
500
+ "eval_loss": 34.924800872802734,
501
+ "eval_runtime": 49.3002,
502
+ "eval_samples_per_second": 26.47,
503
+ "eval_steps_per_second": 3.327,
504
+ "step": 6525
505
  }
506
  ],
507
  "logging_steps": 100,
 
530
  "attributes": {}
531
  }
532
  },
533
+ "total_flos": 7036554515235840.0,
534
  "train_batch_size": 8,
535
  "trial_name": null,
536
  "trial_params": null