goyalayus commited on
Commit
2c6591f
·
verified ·
1 Parent(s): d801503

Training in progress, step 120, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:84dc19866663b90543a5cc28be1f6e8bff087836139765f45314465458b8abcc
3
  size 84962944
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eefc043abf545b1a9056c0abf733392ba05d898ea8ec803b6799e50eeaa1f44c
3
  size 84962944
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d1f351de7fa9c7c965f3987cce588d1a1045c4b50b8b4c5da49e74ba84d14ecc
3
  size 43387339
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68c8d28949f0d88f53c349673f9eb37b7c9d7b5324f0fea87d89f07ce4a3d0bf
3
  size 43387339
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:57ab9d84b07e05a0de6cf94054dbda159d79b30eff688eeb5d3abec7ce76ed9f
3
  size 14709
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3da3a8ea9a349e34715a0e680e246db9bce3ac95cfe61981f857abc88096d4e9
3
  size 14709
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:64f8b0b08f6088c90a3341aa1b8a14244d3700ce315516380355bdd802a502a0
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8098b00a7c8df434ccbc8255df3dab9ab568965934b2c15b0908bddeb0e9b559
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 110,
3
- "best_metric": 1.211287498474121,
4
- "best_model_checkpoint": "./outputs/ctrl_abort_real_right/checkpoint-110",
5
- "epoch": 0.23567220139260847,
6
  "eval_steps": 5,
7
- "global_step": 110,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -954,6 +954,92 @@
954
  "eval_samples_per_second": 13.609,
955
  "eval_steps_per_second": 6.805,
956
  "step": 110
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
957
  }
958
  ],
959
  "logging_steps": 1,
@@ -977,12 +1063,12 @@
977
  "should_evaluate": false,
978
  "should_log": false,
979
  "should_save": true,
980
- "should_training_stop": false
981
  },
982
  "attributes": {}
983
  }
984
  },
985
- "total_flos": 1.08491123136e+16,
986
  "train_batch_size": 2,
987
  "trial_name": null,
988
  "trial_params": null
 
1
  {
2
+ "best_global_step": 120,
3
+ "best_metric": 1.2101209163665771,
4
+ "best_model_checkpoint": "./outputs/ctrl_abort_real_right/checkpoint-120",
5
+ "epoch": 0.2570969469737547,
6
  "eval_steps": 5,
7
+ "global_step": 120,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
954
  "eval_samples_per_second": 13.609,
955
  "eval_steps_per_second": 6.805,
956
  "step": 110
957
+ },
958
+ {
959
+ "epoch": 0.2378146759507231,
960
+ "grad_norm": 0.696169912815094,
961
+ "learning_rate": 2.0253513192751374e-07,
962
+ "loss": 1.1758,
963
+ "step": 111
964
+ },
965
+ {
966
+ "epoch": 0.2399571505088377,
967
+ "grad_norm": 0.6544473767280579,
968
+ "learning_rate": 1.6426572649021477e-07,
969
+ "loss": 1.1242,
970
+ "step": 112
971
+ },
972
+ {
973
+ "epoch": 0.24209962506695232,
974
+ "grad_norm": 0.8569499254226685,
975
+ "learning_rate": 1.2994041528833267e-07,
976
+ "loss": 1.2727,
977
+ "step": 113
978
+ },
979
+ {
980
+ "epoch": 0.24424209962506696,
981
+ "grad_norm": 0.85068279504776,
982
+ "learning_rate": 9.958719453803278e-08,
983
+ "loss": 1.2417,
984
+ "step": 114
985
+ },
986
+ {
987
+ "epoch": 0.24638457418318158,
988
+ "grad_norm": 0.8679143786430359,
989
+ "learning_rate": 7.32308207615351e-08,
990
+ "loss": 1.2296,
991
+ "step": 115
992
+ },
993
+ {
994
+ "epoch": 0.24638457418318158,
995
+ "eval_loss": 1.2105430364608765,
996
+ "eval_runtime": 1.1269,
997
+ "eval_samples_per_second": 14.198,
998
+ "eval_steps_per_second": 7.099,
999
+ "step": 115
1000
+ },
1001
+ {
1002
+ "epoch": 0.2485270487412962,
1003
+ "grad_norm": 0.674493670463562,
1004
+ "learning_rate": 5.089279059533658e-08,
1005
+ "loss": 1.1677,
1006
+ "step": 116
1007
+ },
1008
+ {
1009
+ "epoch": 0.25066952329941083,
1010
+ "grad_norm": 0.6304970979690552,
1011
+ "learning_rate": 3.25913232572489e-08,
1012
+ "loss": 1.1482,
1013
+ "step": 117
1014
+ },
1015
+ {
1016
+ "epoch": 0.25281199785752545,
1017
+ "grad_norm": 0.6986701488494873,
1018
+ "learning_rate": 1.834134568654333e-08,
1019
+ "loss": 1.1476,
1020
+ "step": 118
1021
+ },
1022
+ {
1023
+ "epoch": 0.25495447241564007,
1024
+ "grad_norm": 0.6568459868431091,
1025
+ "learning_rate": 8.15448036932176e-09,
1026
+ "loss": 1.109,
1027
+ "step": 119
1028
+ },
1029
+ {
1030
+ "epoch": 0.2570969469737547,
1031
+ "grad_norm": 0.8193196654319763,
1032
+ "learning_rate": 2.0390358590538507e-09,
1033
+ "loss": 1.1736,
1034
+ "step": 120
1035
+ },
1036
+ {
1037
+ "epoch": 0.2570969469737547,
1038
+ "eval_loss": 1.2101209163665771,
1039
+ "eval_runtime": 1.4407,
1040
+ "eval_samples_per_second": 11.106,
1041
+ "eval_steps_per_second": 5.553,
1042
+ "step": 120
1043
  }
1044
  ],
1045
  "logging_steps": 1,
 
1063
  "should_evaluate": false,
1064
  "should_log": false,
1065
  "should_save": true,
1066
+ "should_training_stop": true
1067
  },
1068
  "attributes": {}
1069
  }
1070
  },
1071
+ "total_flos": 1.1935339253376e+16,
1072
  "train_batch_size": 2,
1073
  "trial_name": null,
1074
  "trial_params": null