Rakhman16 commited on
Commit
158e1ae
·
verified ·
1 Parent(s): 0acc9b7

Training in progress, step 5000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0673723b6fea73b97632e58ff883b0fcedfe7c681e3b064768751625f89426b0
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ccdc97af0c664a598630c782a7b331756b66dbb34a0f09170e5fc260495d1b53
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6c6a5cfafd84b1b27fd2b7eb7cde6b07d68bac4292ccc06119a3b5fcbffec4d1
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e5a38645e0ad06e7337b8afd1b8688354e44665215d46144294c375e1b14ec6
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b65d52be0a01d1387b6ab5b94c8d85e39f197ebf9d2ff39d8953e7c1331a8faf
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f750e8c47e9e6edd21fa1108074fa273b123ea44b89fc5876f119d3a8a4022f1
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2af37ede7aaba01e654492d7c7cd23480899eab3274bf26dc98e9ba5f8aeadfb
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e794023d937cb42e6b824ae46ca100bd6dbedd5057ac527c5cededbfc6fc3265
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.20200392603874207,
3
- "best_model_checkpoint": "./fine-tuned/checkpoint-4500",
4
- "epoch": 3.161222339304531,
5
  "eval_steps": 100,
6
- "global_step": 4500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -997,6 +997,116 @@
997
  "eval_samples_per_second": 66.585,
998
  "eval_steps_per_second": 2.09,
999
  "step": 4500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1000
  }
1001
  ],
1002
  "logging_steps": 50,
@@ -1016,7 +1126,7 @@
1016
  "attributes": {}
1017
  }
1018
  },
1019
- "total_flos": 4.38381173440512e+16,
1020
  "train_batch_size": 32,
1021
  "trial_name": null,
1022
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.2013118416070938,
3
+ "best_model_checkpoint": "./fine-tuned/checkpoint-5000",
4
+ "epoch": 3.512469265893923,
5
  "eval_steps": 100,
6
+ "global_step": 5000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
997
  "eval_samples_per_second": 66.585,
998
  "eval_steps_per_second": 2.09,
999
  "step": 4500
1000
+ },
1001
+ {
1002
+ "epoch": 3.1963470319634704,
1003
+ "grad_norm": 30103.6484375,
1004
+ "learning_rate": 6.018973998594519e-06,
1005
+ "loss": 0.1891,
1006
+ "step": 4550
1007
+ },
1008
+ {
1009
+ "epoch": 3.2314717246224096,
1010
+ "grad_norm": 22014.908203125,
1011
+ "learning_rate": 5.755446240337316e-06,
1012
+ "loss": 0.1933,
1013
+ "step": 4600
1014
+ },
1015
+ {
1016
+ "epoch": 3.2314717246224096,
1017
+ "eval_loss": 0.20177535712718964,
1018
+ "eval_runtime": 66.8767,
1019
+ "eval_samples_per_second": 66.69,
1020
+ "eval_steps_per_second": 2.093,
1021
+ "step": 4600
1022
+ },
1023
+ {
1024
+ "epoch": 3.266596417281349,
1025
+ "grad_norm": 24894.115234375,
1026
+ "learning_rate": 5.491918482080113e-06,
1027
+ "loss": 0.1921,
1028
+ "step": 4650
1029
+ },
1030
+ {
1031
+ "epoch": 3.301721109940288,
1032
+ "grad_norm": 21648.677734375,
1033
+ "learning_rate": 5.2283907238229096e-06,
1034
+ "loss": 0.1914,
1035
+ "step": 4700
1036
+ },
1037
+ {
1038
+ "epoch": 3.301721109940288,
1039
+ "eval_loss": 0.20187227427959442,
1040
+ "eval_runtime": 66.9001,
1041
+ "eval_samples_per_second": 66.667,
1042
+ "eval_steps_per_second": 2.093,
1043
+ "step": 4700
1044
+ },
1045
+ {
1046
+ "epoch": 3.3368458025992274,
1047
+ "grad_norm": 24555.294921875,
1048
+ "learning_rate": 4.964862965565706e-06,
1049
+ "loss": 0.1914,
1050
+ "step": 4750
1051
+ },
1052
+ {
1053
+ "epoch": 3.3719704952581666,
1054
+ "grad_norm": 44338.69921875,
1055
+ "learning_rate": 4.7013352073085035e-06,
1056
+ "loss": 0.1936,
1057
+ "step": 4800
1058
+ },
1059
+ {
1060
+ "epoch": 3.3719704952581666,
1061
+ "eval_loss": 0.20171089470386505,
1062
+ "eval_runtime": 67.0479,
1063
+ "eval_samples_per_second": 66.52,
1064
+ "eval_steps_per_second": 2.088,
1065
+ "step": 4800
1066
+ },
1067
+ {
1068
+ "epoch": 3.407095187917106,
1069
+ "grad_norm": 23296.537109375,
1070
+ "learning_rate": 4.4378074490513e-06,
1071
+ "loss": 0.1949,
1072
+ "step": 4850
1073
+ },
1074
+ {
1075
+ "epoch": 3.442219880576045,
1076
+ "grad_norm": 21337.087890625,
1077
+ "learning_rate": 4.1742796907940974e-06,
1078
+ "loss": 0.1902,
1079
+ "step": 4900
1080
+ },
1081
+ {
1082
+ "epoch": 3.442219880576045,
1083
+ "eval_loss": 0.20151035487651825,
1084
+ "eval_runtime": 66.9445,
1085
+ "eval_samples_per_second": 66.622,
1086
+ "eval_steps_per_second": 2.091,
1087
+ "step": 4900
1088
+ },
1089
+ {
1090
+ "epoch": 3.4773445732349844,
1091
+ "grad_norm": 20258.736328125,
1092
+ "learning_rate": 3.910751932536894e-06,
1093
+ "loss": 0.1966,
1094
+ "step": 4950
1095
+ },
1096
+ {
1097
+ "epoch": 3.512469265893923,
1098
+ "grad_norm": 22937.763671875,
1099
+ "learning_rate": 3.647224174279691e-06,
1100
+ "loss": 0.1949,
1101
+ "step": 5000
1102
+ },
1103
+ {
1104
+ "epoch": 3.512469265893923,
1105
+ "eval_loss": 0.2013118416070938,
1106
+ "eval_runtime": 67.0166,
1107
+ "eval_samples_per_second": 66.551,
1108
+ "eval_steps_per_second": 2.089,
1109
+ "step": 5000
1110
  }
1111
  ],
1112
  "logging_steps": 50,
 
1126
  "attributes": {}
1127
  }
1128
  },
1129
+ "total_flos": 4.87097804685312e+16,
1130
  "train_batch_size": 32,
1131
  "trial_name": null,
1132
  "trial_params": null