NairaRahim commited on
Commit
4111fa7
·
verified ·
1 Parent(s): dc91e54

Training in progress, epoch 28, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6578b9c4d75748b6280cbd87cff77c51810b19211f34f787c2c20a45bba7f69a
3
  size 1227009528
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7192af0b20e9ae92f8d286a2da65a9c5747ee4020c50c48aa473a3e43703773
3
  size 1227009528
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ebfa5e3b95d0b2043f86b91a91689cd4bbf2940ae6a300f2f266d892b9b6aa2a
3
  size 2454133690
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff33db56d63c67d85cfa088d479ea506d59eff9e087bf55d1971168fc3e3f331
3
  size 2454133690
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:701bb29f6de724269b66c860403f8e84b96f5ac92e54ca67ad415c85cf650a00
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28a85b72b009dea1edbe23a0bf87db6524c747b48f4861eabc329f71b30d8145
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bf4aead81b9bf3b888621453ce55acb21aa6ea9c3c6f5c0de4c20591eb5cb4b1
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a5b99942864785620ec565d7775aff86fb1bb9970466bd2d05892314489a53a
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 34.47444152832031,
3
- "best_model_checkpoint": "/kaggle/working/output/checkpoint-35235",
4
- "epoch": 27.0,
5
  "eval_steps": 500,
6
- "global_step": 35235,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2687,6 +2687,105 @@
2687
  "eval_samples_per_second": 26.478,
2688
  "eval_steps_per_second": 3.328,
2689
  "step": 35235
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2690
  }
2691
  ],
2692
  "logging_steps": 100,
@@ -2715,7 +2814,7 @@
2715
  "attributes": {}
2716
  }
2717
  },
2718
- "total_flos": 3.799739438227354e+16,
2719
  "train_batch_size": 8,
2720
  "trial_name": null,
2721
  "trial_params": null
 
1
  {
2
+ "best_metric": 34.4583740234375,
3
+ "best_model_checkpoint": "/kaggle/working/output/checkpoint-36540",
4
+ "epoch": 28.0,
5
  "eval_steps": 500,
6
+ "global_step": 36540,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2687
  "eval_samples_per_second": 26.478,
2688
  "eval_steps_per_second": 3.328,
2689
  "step": 35235
2690
+ },
2691
+ {
2692
+ "epoch": 27.049808429118773,
2693
+ "grad_norm": 2.2080352306365967,
2694
+ "learning_rate": 3.310201149425288e-05,
2695
+ "loss": 33.0059,
2696
+ "step": 35300
2697
+ },
2698
+ {
2699
+ "epoch": 27.126436781609197,
2700
+ "grad_norm": 3.0985817909240723,
2701
+ "learning_rate": 3.3054118773946366e-05,
2702
+ "loss": 33.8201,
2703
+ "step": 35400
2704
+ },
2705
+ {
2706
+ "epoch": 27.203065134099617,
2707
+ "grad_norm": 3.165069103240967,
2708
+ "learning_rate": 3.300622605363985e-05,
2709
+ "loss": 33.7343,
2710
+ "step": 35500
2711
+ },
2712
+ {
2713
+ "epoch": 27.279693486590038,
2714
+ "grad_norm": 3.2427308559417725,
2715
+ "learning_rate": 3.295833333333333e-05,
2716
+ "loss": 32.8829,
2717
+ "step": 35600
2718
+ },
2719
+ {
2720
+ "epoch": 27.35632183908046,
2721
+ "grad_norm": 2.973548412322998,
2722
+ "learning_rate": 3.291044061302682e-05,
2723
+ "loss": 33.2656,
2724
+ "step": 35700
2725
+ },
2726
+ {
2727
+ "epoch": 27.43295019157088,
2728
+ "grad_norm": 2.892834424972534,
2729
+ "learning_rate": 3.2862547892720306e-05,
2730
+ "loss": 33.085,
2731
+ "step": 35800
2732
+ },
2733
+ {
2734
+ "epoch": 27.509578544061302,
2735
+ "grad_norm": 2.4037787914276123,
2736
+ "learning_rate": 3.281465517241379e-05,
2737
+ "loss": 32.7549,
2738
+ "step": 35900
2739
+ },
2740
+ {
2741
+ "epoch": 27.586206896551722,
2742
+ "grad_norm": 3.7890052795410156,
2743
+ "learning_rate": 3.276676245210728e-05,
2744
+ "loss": 33.4256,
2745
+ "step": 36000
2746
+ },
2747
+ {
2748
+ "epoch": 27.662835249042146,
2749
+ "grad_norm": 3.4910600185394287,
2750
+ "learning_rate": 3.271886973180077e-05,
2751
+ "loss": 33.3707,
2752
+ "step": 36100
2753
+ },
2754
+ {
2755
+ "epoch": 27.739463601532567,
2756
+ "grad_norm": 2.895573854446411,
2757
+ "learning_rate": 3.2670977011494254e-05,
2758
+ "loss": 32.699,
2759
+ "step": 36200
2760
+ },
2761
+ {
2762
+ "epoch": 27.816091954022987,
2763
+ "grad_norm": 4.670979022979736,
2764
+ "learning_rate": 3.262308429118774e-05,
2765
+ "loss": 33.5898,
2766
+ "step": 36300
2767
+ },
2768
+ {
2769
+ "epoch": 27.89272030651341,
2770
+ "grad_norm": 2.362605571746826,
2771
+ "learning_rate": 3.257519157088123e-05,
2772
+ "loss": 33.4235,
2773
+ "step": 36400
2774
+ },
2775
+ {
2776
+ "epoch": 27.96934865900383,
2777
+ "grad_norm": 4.695677280426025,
2778
+ "learning_rate": 3.2527298850574715e-05,
2779
+ "loss": 33.9318,
2780
+ "step": 36500
2781
+ },
2782
+ {
2783
+ "epoch": 28.0,
2784
+ "eval_loss": 34.4583740234375,
2785
+ "eval_runtime": 49.3101,
2786
+ "eval_samples_per_second": 26.465,
2787
+ "eval_steps_per_second": 3.326,
2788
+ "step": 36540
2789
  }
2790
  ],
2791
  "logging_steps": 100,
 
2814
  "attributes": {}
2815
  }
2816
  },
2817
+ "total_flos": 3.94047052853207e+16,
2818
  "train_batch_size": 8,
2819
  "trial_name": null,
2820
  "trial_params": null