NairaRahim commited on
Commit
0e2c802
·
verified ·
1 Parent(s): 4a0be3d

Training in progress, epoch 30, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:daef4e55ad3f9cac7476db8fc6c0558852fa9a93b615790669d2f15c7a8efc1b
3
  size 1227009528
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2803b5ea78a77c33b077e0060ee73753f95d174d834b993bc91485092dcde1c9
3
  size 1227009528
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f4d1575be4a450eb389f45ccce71c57c7f233f5a2b6bd3384717eae5a12f344b
3
  size 2454133690
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32e5494189f649e0e13e6f06d32a81e216f34cb7f0ab96e7dcedf6d8cc0cf2ad
3
  size 2454133690
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dd4ea3a90506cd06614bd07f48e2ae5e6d0bcae66d2c1234c0b2b3e5e6f04045
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14e90b56db58b69cb5e05f5a06dece018d9fd278779eef8662306f8a599fad84
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7ca503ac3f6a648f00e869d6e43b6905f33cfb3af705d6edf930285c1fae1c03
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f3041d0db6b547a1562ecb7021cfeb0bcc92669d8d944852d4251894eeac567
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 34.4583740234375,
3
  "best_model_checkpoint": "/kaggle/working/output/checkpoint-36540",
4
- "epoch": 29.0,
5
  "eval_steps": 500,
6
- "global_step": 37845,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2885,6 +2885,105 @@
2885
  "eval_samples_per_second": 26.444,
2886
  "eval_steps_per_second": 3.323,
2887
  "step": 37845
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2888
  }
2889
  ],
2890
  "logging_steps": 100,
@@ -2899,7 +2998,7 @@
2899
  "early_stopping_threshold": 0.0
2900
  },
2901
  "attributes": {
2902
- "early_stopping_patience_counter": 1
2903
  }
2904
  },
2905
  "TrainerControl": {
@@ -2913,7 +3012,7 @@
2913
  "attributes": {}
2914
  }
2915
  },
2916
- "total_flos": 4.081201618836787e+16,
2917
  "train_batch_size": 8,
2918
  "trial_name": null,
2919
  "trial_params": null
 
1
  {
2
  "best_metric": 34.4583740234375,
3
  "best_model_checkpoint": "/kaggle/working/output/checkpoint-36540",
4
+ "epoch": 30.0,
5
  "eval_steps": 500,
6
+ "global_step": 39150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2885
  "eval_samples_per_second": 26.444,
2886
  "eval_steps_per_second": 3.323,
2887
  "step": 37845
2888
+ },
2889
+ {
2890
+ "epoch": 29.042145593869733,
2891
+ "grad_norm": 4.515305042266846,
2892
+ "learning_rate": 3.185727969348659e-05,
2893
+ "loss": 33.5753,
2894
+ "step": 37900
2895
+ },
2896
+ {
2897
+ "epoch": 29.118773946360154,
2898
+ "grad_norm": 3.7501096725463867,
2899
+ "learning_rate": 3.1809386973180076e-05,
2900
+ "loss": 33.3131,
2901
+ "step": 38000
2902
+ },
2903
+ {
2904
+ "epoch": 29.195402298850574,
2905
+ "grad_norm": 3.431818723678589,
2906
+ "learning_rate": 3.176149425287356e-05,
2907
+ "loss": 33.085,
2908
+ "step": 38100
2909
+ },
2910
+ {
2911
+ "epoch": 29.272030651340994,
2912
+ "grad_norm": 3.4503543376922607,
2913
+ "learning_rate": 3.171360153256705e-05,
2914
+ "loss": 32.7894,
2915
+ "step": 38200
2916
+ },
2917
+ {
2918
+ "epoch": 29.34865900383142,
2919
+ "grad_norm": 4.361378192901611,
2920
+ "learning_rate": 3.1665708812260536e-05,
2921
+ "loss": 33.4922,
2922
+ "step": 38300
2923
+ },
2924
+ {
2925
+ "epoch": 29.42528735632184,
2926
+ "grad_norm": 2.354480504989624,
2927
+ "learning_rate": 3.161781609195402e-05,
2928
+ "loss": 33.3214,
2929
+ "step": 38400
2930
+ },
2931
+ {
2932
+ "epoch": 29.50191570881226,
2933
+ "grad_norm": 3.3123044967651367,
2934
+ "learning_rate": 3.156992337164751e-05,
2935
+ "loss": 33.3181,
2936
+ "step": 38500
2937
+ },
2938
+ {
2939
+ "epoch": 29.578544061302683,
2940
+ "grad_norm": 2.3824117183685303,
2941
+ "learning_rate": 3.1522030651341e-05,
2942
+ "loss": 33.0926,
2943
+ "step": 38600
2944
+ },
2945
+ {
2946
+ "epoch": 29.655172413793103,
2947
+ "grad_norm": 2.811178684234619,
2948
+ "learning_rate": 3.1474137931034484e-05,
2949
+ "loss": 33.3361,
2950
+ "step": 38700
2951
+ },
2952
+ {
2953
+ "epoch": 29.731800766283524,
2954
+ "grad_norm": 4.715090751647949,
2955
+ "learning_rate": 3.142624521072797e-05,
2956
+ "loss": 32.8444,
2957
+ "step": 38800
2958
+ },
2959
+ {
2960
+ "epoch": 29.808429118773947,
2961
+ "grad_norm": 2.191209316253662,
2962
+ "learning_rate": 3.137835249042146e-05,
2963
+ "loss": 33.9677,
2964
+ "step": 38900
2965
+ },
2966
+ {
2967
+ "epoch": 29.885057471264368,
2968
+ "grad_norm": 2.606814384460449,
2969
+ "learning_rate": 3.1330459770114945e-05,
2970
+ "loss": 33.3536,
2971
+ "step": 39000
2972
+ },
2973
+ {
2974
+ "epoch": 29.961685823754788,
2975
+ "grad_norm": 4.8533172607421875,
2976
+ "learning_rate": 3.128256704980843e-05,
2977
+ "loss": 33.2721,
2978
+ "step": 39100
2979
+ },
2980
+ {
2981
+ "epoch": 30.0,
2982
+ "eval_loss": 34.46094512939453,
2983
+ "eval_runtime": 49.4265,
2984
+ "eval_samples_per_second": 26.403,
2985
+ "eval_steps_per_second": 3.318,
2986
+ "step": 39150
2987
  }
2988
  ],
2989
  "logging_steps": 100,
 
2998
  "early_stopping_threshold": 0.0
2999
  },
3000
  "attributes": {
3001
+ "early_stopping_patience_counter": 2
3002
  }
3003
  },
3004
  "TrainerControl": {
 
3012
  "attributes": {}
3013
  }
3014
  },
3015
+ "total_flos": 4.221932709141504e+16,
3016
  "train_batch_size": 8,
3017
  "trial_name": null,
3018
  "trial_params": null