NairaRahim commited on
Commit
125273a
·
verified ·
1 Parent(s): d231127

Training in progress, epoch 6, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2968fae491fefd2f9a431c5a0bf13b850a49d5465bf2f3ab25c45e33c0a41886
3
  size 1227009528
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20c40f891a6ad2cd6bdb721e2f111292589ff390313316ee8f0d082edb0b9b03
3
  size 1227009528
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:938bb6b2c29f2be8620725e1d9819dff7d9e79ec433558bbd6ac24951cd0c258
3
  size 2454133690
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97e7d057f22cb8197d547d1ed0d192390fd1e34fb36aa35aa6b76f03d0e2f9d9
3
  size 2454133690
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:feb257991d06d0ad08909803a2d396d17f96d7f13a21d29dde85f6747c2c6f53
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe7c95ba6d299e128ae454cc0731e509722836b2913c0cc0546da0aa648a6383
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:34c587e6b31550b01ed65ddb502a6dbeb722c15426dc145d4dd3a0afea5fb120
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a099afb4d9a9c6bf9e5e93d59bc1aa866f860cc49e0492bfafa53bc834b220ce
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 34.924800872802734,
3
- "best_model_checkpoint": "/kaggle/working/output/checkpoint-6525",
4
- "epoch": 5.0,
5
  "eval_steps": 500,
6
- "global_step": 6525,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -502,6 +502,105 @@
502
  "eval_samples_per_second": 26.47,
503
  "eval_steps_per_second": 3.327,
504
  "step": 6525
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
505
  }
506
  ],
507
  "logging_steps": 100,
@@ -530,7 +629,7 @@
530
  "attributes": {}
531
  }
532
  },
533
- "total_flos": 7036554515235840.0,
534
  "train_batch_size": 8,
535
  "trial_name": null,
536
  "trial_params": null
 
1
  {
2
+ "best_metric": 34.841033935546875,
3
+ "best_model_checkpoint": "/kaggle/working/output/checkpoint-7830",
4
+ "epoch": 6.0,
5
  "eval_steps": 500,
6
+ "global_step": 7830,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
502
  "eval_samples_per_second": 26.47,
503
  "eval_steps_per_second": 3.327,
504
  "step": 6525
505
+ },
506
+ {
507
+ "epoch": 5.057471264367816,
508
+ "grad_norm": 2.3083884716033936,
509
+ "learning_rate": 4.684051724137931e-05,
510
+ "loss": 33.8987,
511
+ "step": 6600
512
+ },
513
+ {
514
+ "epoch": 5.134099616858237,
515
+ "grad_norm": 2.228327751159668,
516
+ "learning_rate": 4.67926245210728e-05,
517
+ "loss": 33.8189,
518
+ "step": 6700
519
+ },
520
+ {
521
+ "epoch": 5.210727969348659,
522
+ "grad_norm": 3.6814918518066406,
523
+ "learning_rate": 4.6744731800766284e-05,
524
+ "loss": 33.8364,
525
+ "step": 6800
526
+ },
527
+ {
528
+ "epoch": 5.287356321839081,
529
+ "grad_norm": 2.5758285522460938,
530
+ "learning_rate": 4.669683908045977e-05,
531
+ "loss": 33.7093,
532
+ "step": 6900
533
+ },
534
+ {
535
+ "epoch": 5.363984674329502,
536
+ "grad_norm": 4.175839900970459,
537
+ "learning_rate": 4.6648946360153265e-05,
538
+ "loss": 33.6689,
539
+ "step": 7000
540
+ },
541
+ {
542
+ "epoch": 5.440613026819923,
543
+ "grad_norm": 2.213092088699341,
544
+ "learning_rate": 4.6601053639846745e-05,
545
+ "loss": 33.7936,
546
+ "step": 7100
547
+ },
548
+ {
549
+ "epoch": 5.517241379310345,
550
+ "grad_norm": 2.4982571601867676,
551
+ "learning_rate": 4.655316091954023e-05,
552
+ "loss": 33.3686,
553
+ "step": 7200
554
+ },
555
+ {
556
+ "epoch": 5.593869731800766,
557
+ "grad_norm": 3.635983943939209,
558
+ "learning_rate": 4.6505747126436784e-05,
559
+ "loss": 33.5493,
560
+ "step": 7300
561
+ },
562
+ {
563
+ "epoch": 5.670498084291188,
564
+ "grad_norm": 4.315894603729248,
565
+ "learning_rate": 4.645785440613027e-05,
566
+ "loss": 33.6607,
567
+ "step": 7400
568
+ },
569
+ {
570
+ "epoch": 5.747126436781609,
571
+ "grad_norm": 2.6151223182678223,
572
+ "learning_rate": 4.640996168582376e-05,
573
+ "loss": 34.7535,
574
+ "step": 7500
575
+ },
576
+ {
577
+ "epoch": 5.823754789272031,
578
+ "grad_norm": 4.03953218460083,
579
+ "learning_rate": 4.6362068965517244e-05,
580
+ "loss": 33.9865,
581
+ "step": 7600
582
+ },
583
+ {
584
+ "epoch": 5.900383141762452,
585
+ "grad_norm": 2.512362480163574,
586
+ "learning_rate": 4.6314176245210724e-05,
587
+ "loss": 33.0343,
588
+ "step": 7700
589
+ },
590
+ {
591
+ "epoch": 5.977011494252873,
592
+ "grad_norm": 4.745575428009033,
593
+ "learning_rate": 4.626628352490422e-05,
594
+ "loss": 33.4544,
595
+ "step": 7800
596
+ },
597
+ {
598
+ "epoch": 6.0,
599
+ "eval_loss": 34.841033935546875,
600
+ "eval_runtime": 49.3059,
601
+ "eval_samples_per_second": 26.467,
602
+ "eval_steps_per_second": 3.326,
603
+ "step": 7830
604
  }
605
  ],
606
  "logging_steps": 100,
 
629
  "attributes": {}
630
  }
631
  },
632
+ "total_flos": 8443865418283008.0,
633
  "train_batch_size": 8,
634
  "trial_name": null,
635
  "trial_params": null