NairaRahim commited on
Commit
0c30893
·
verified ·
1 Parent(s): 17eb667

Training in progress, epoch 27, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:32e71eff0d61a84e366db8484360ee95668b8ea2394397d8e4e593ed6e3506b9
3
  size 1227009528
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6578b9c4d75748b6280cbd87cff77c51810b19211f34f787c2c20a45bba7f69a
3
  size 1227009528
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6a7fb9eb6216192ce22490824fdf15cbc77c71775a288290e6a20a495b35fff2
3
  size 2454133690
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebfa5e3b95d0b2043f86b91a91689cd4bbf2940ae6a300f2f266d892b9b6aa2a
3
  size 2454133690
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a30464a0259a27646e9e215f48107081e6b2f053cf9eb8cd5707cfedf93581ee
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:701bb29f6de724269b66c860403f8e84b96f5ac92e54ca67ad415c85cf650a00
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:68265dc14a2d04db70bc11c529ede2fb6197ce4bf18435eda07a0317b0f9a6b4
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf4aead81b9bf3b888621453ce55acb21aa6ea9c3c6f5c0de4c20591eb5cb4b1
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 34.53865432739258,
3
- "best_model_checkpoint": "/kaggle/working/output/checkpoint-28710",
4
- "epoch": 26.0,
5
  "eval_steps": 500,
6
- "global_step": 33930,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2588,6 +2588,105 @@
2588
  "eval_samples_per_second": 26.471,
2589
  "eval_steps_per_second": 3.327,
2590
  "step": 33930
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2591
  }
2592
  ],
2593
  "logging_steps": 100,
@@ -2602,7 +2701,7 @@
2602
  "early_stopping_threshold": 0.0
2603
  },
2604
  "attributes": {
2605
- "early_stopping_patience_counter": 4
2606
  }
2607
  },
2608
  "TrainerControl": {
@@ -2616,7 +2715,7 @@
2616
  "attributes": {}
2617
  }
2618
  },
2619
- "total_flos": 3.659008347922637e+16,
2620
  "train_batch_size": 8,
2621
  "trial_name": null,
2622
  "trial_params": null
 
1
  {
2
+ "best_metric": 34.47444152832031,
3
+ "best_model_checkpoint": "/kaggle/working/output/checkpoint-35235",
4
+ "epoch": 27.0,
5
  "eval_steps": 500,
6
+ "global_step": 35235,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2588
  "eval_samples_per_second": 26.471,
2589
  "eval_steps_per_second": 3.327,
2590
  "step": 33930
2591
+ },
2592
+ {
2593
+ "epoch": 26.053639846743295,
2594
+ "grad_norm": 3.76274037361145,
2595
+ "learning_rate": 3.372461685823755e-05,
2596
+ "loss": 33.5457,
2597
+ "step": 34000
2598
+ },
2599
+ {
2600
+ "epoch": 26.130268199233715,
2601
+ "grad_norm": 5.3265061378479,
2602
+ "learning_rate": 3.3676724137931036e-05,
2603
+ "loss": 33.2299,
2604
+ "step": 34100
2605
+ },
2606
+ {
2607
+ "epoch": 26.20689655172414,
2608
+ "grad_norm": 4.5878987312316895,
2609
+ "learning_rate": 3.362883141762452e-05,
2610
+ "loss": 33.7869,
2611
+ "step": 34200
2612
+ },
2613
+ {
2614
+ "epoch": 26.28352490421456,
2615
+ "grad_norm": 3.673882007598877,
2616
+ "learning_rate": 3.358093869731801e-05,
2617
+ "loss": 32.6976,
2618
+ "step": 34300
2619
+ },
2620
+ {
2621
+ "epoch": 26.36015325670498,
2622
+ "grad_norm": 3.5689809322357178,
2623
+ "learning_rate": 3.35330459770115e-05,
2624
+ "loss": 33.6335,
2625
+ "step": 34400
2626
+ },
2627
+ {
2628
+ "epoch": 26.436781609195403,
2629
+ "grad_norm": 5.735408306121826,
2630
+ "learning_rate": 3.3485153256704984e-05,
2631
+ "loss": 33.116,
2632
+ "step": 34500
2633
+ },
2634
+ {
2635
+ "epoch": 26.513409961685824,
2636
+ "grad_norm": 1.9485822916030884,
2637
+ "learning_rate": 3.343726053639847e-05,
2638
+ "loss": 33.9434,
2639
+ "step": 34600
2640
+ },
2641
+ {
2642
+ "epoch": 26.590038314176244,
2643
+ "grad_norm": 4.049289226531982,
2644
+ "learning_rate": 3.338936781609196e-05,
2645
+ "loss": 33.2275,
2646
+ "step": 34700
2647
+ },
2648
+ {
2649
+ "epoch": 26.666666666666668,
2650
+ "grad_norm": 2.392415761947632,
2651
+ "learning_rate": 3.334147509578544e-05,
2652
+ "loss": 33.1124,
2653
+ "step": 34800
2654
+ },
2655
+ {
2656
+ "epoch": 26.743295019157088,
2657
+ "grad_norm": 3.01650333404541,
2658
+ "learning_rate": 3.3293582375478924e-05,
2659
+ "loss": 33.0733,
2660
+ "step": 34900
2661
+ },
2662
+ {
2663
+ "epoch": 26.81992337164751,
2664
+ "grad_norm": 3.1701831817626953,
2665
+ "learning_rate": 3.324568965517241e-05,
2666
+ "loss": 33.0029,
2667
+ "step": 35000
2668
+ },
2669
+ {
2670
+ "epoch": 26.896551724137932,
2671
+ "grad_norm": 2.6294217109680176,
2672
+ "learning_rate": 3.3197796934865905e-05,
2673
+ "loss": 32.9881,
2674
+ "step": 35100
2675
+ },
2676
+ {
2677
+ "epoch": 26.973180076628353,
2678
+ "grad_norm": 3.4342799186706543,
2679
+ "learning_rate": 3.314990421455939e-05,
2680
+ "loss": 33.047,
2681
+ "step": 35200
2682
+ },
2683
+ {
2684
+ "epoch": 27.0,
2685
+ "eval_loss": 34.47444152832031,
2686
+ "eval_runtime": 49.2859,
2687
+ "eval_samples_per_second": 26.478,
2688
+ "eval_steps_per_second": 3.328,
2689
+ "step": 35235
2690
  }
2691
  ],
2692
  "logging_steps": 100,
 
2701
  "early_stopping_threshold": 0.0
2702
  },
2703
  "attributes": {
2704
+ "early_stopping_patience_counter": 0
2705
  }
2706
  },
2707
  "TrainerControl": {
 
2715
  "attributes": {}
2716
  }
2717
  },
2718
+ "total_flos": 3.799739438227354e+16,
2719
  "train_batch_size": 8,
2720
  "trial_name": null,
2721
  "trial_params": null