Training in progress, step 9000, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 328277848
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4b6a100ff38d4f00b501f20a5190982d96bca76e8f9a3dd9afd41838295e088c
|
| 3 |
size 328277848
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 318646859
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:318e4ae9012739627ee7e1642d03ed5987f8ac51a72f0db40543b41f04528304
|
| 3 |
size 318646859
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3343121e0ab3aeb674ab29d872307564462c4bd82cdd92e6577a4ff26999fc00
|
| 3 |
size 14645
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:528ba9a1d2a5739586b1652bb1454f9e977f93a6ae9e9c38a71b51bc41c45de4
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 1.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -6101,6 +6101,364 @@
|
|
| 6101 |
"eval_samples_per_second": 186.962,
|
| 6102 |
"eval_steps_per_second": 3.926,
|
| 6103 |
"step": 8500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6104 |
}
|
| 6105 |
],
|
| 6106 |
"logging_steps": 10,
|
|
@@ -6120,7 +6478,7 @@
|
|
| 6120 |
"attributes": {}
|
| 6121 |
}
|
| 6122 |
},
|
| 6123 |
-
"total_flos":
|
| 6124 |
"train_batch_size": 48,
|
| 6125 |
"trial_name": null,
|
| 6126 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 1.5205271160669032,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 9000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 6101 |
"eval_samples_per_second": 186.962,
|
| 6102 |
"eval_steps_per_second": 3.926,
|
| 6103 |
"step": 8500
|
| 6104 |
+
},
|
| 6105 |
+
{
|
| 6106 |
+
"epoch": 1.4377428619699273,
|
| 6107 |
+
"grad_norm": 0.4886073172092438,
|
| 6108 |
+
"learning_rate": 7.706964398504293e-05,
|
| 6109 |
+
"loss": 4.375003814697266,
|
| 6110 |
+
"step": 8510
|
| 6111 |
+
},
|
| 6112 |
+
{
|
| 6113 |
+
"epoch": 1.439432336543335,
|
| 6114 |
+
"grad_norm": 0.48204493522644043,
|
| 6115 |
+
"learning_rate": 7.665144535049224e-05,
|
| 6116 |
+
"loss": 4.33798828125,
|
| 6117 |
+
"step": 8520
|
| 6118 |
+
},
|
| 6119 |
+
{
|
| 6120 |
+
"epoch": 1.4411218111167428,
|
| 6121 |
+
"grad_norm": 0.46427401900291443,
|
| 6122 |
+
"learning_rate": 7.623399467409416e-05,
|
| 6123 |
+
"loss": 4.350474166870117,
|
| 6124 |
+
"step": 8530
|
| 6125 |
+
},
|
| 6126 |
+
{
|
| 6127 |
+
"epoch": 1.4428112856901505,
|
| 6128 |
+
"grad_norm": 0.48773905634880066,
|
| 6129 |
+
"learning_rate": 7.581729621272386e-05,
|
| 6130 |
+
"loss": 4.331120300292969,
|
| 6131 |
+
"step": 8540
|
| 6132 |
+
},
|
| 6133 |
+
{
|
| 6134 |
+
"epoch": 1.444500760263558,
|
| 6135 |
+
"grad_norm": 0.49667221307754517,
|
| 6136 |
+
"learning_rate": 7.540135421558585e-05,
|
| 6137 |
+
"loss": 4.313655090332031,
|
| 6138 |
+
"step": 8550
|
| 6139 |
+
},
|
| 6140 |
+
{
|
| 6141 |
+
"epoch": 1.4461902348369657,
|
| 6142 |
+
"grad_norm": 0.4690784215927124,
|
| 6143 |
+
"learning_rate": 7.498617292417074e-05,
|
| 6144 |
+
"loss": 4.368997573852539,
|
| 6145 |
+
"step": 8560
|
| 6146 |
+
},
|
| 6147 |
+
{
|
| 6148 |
+
"epoch": 1.4478797094103735,
|
| 6149 |
+
"grad_norm": 0.48315441608428955,
|
| 6150 |
+
"learning_rate": 7.457175657221194e-05,
|
| 6151 |
+
"loss": 4.365554428100586,
|
| 6152 |
+
"step": 8570
|
| 6153 |
+
},
|
| 6154 |
+
{
|
| 6155 |
+
"epoch": 1.449569183983781,
|
| 6156 |
+
"grad_norm": 0.47335466742515564,
|
| 6157 |
+
"learning_rate": 7.415810938564277e-05,
|
| 6158 |
+
"loss": 4.337088394165039,
|
| 6159 |
+
"step": 8580
|
| 6160 |
+
},
|
| 6161 |
+
{
|
| 6162 |
+
"epoch": 1.4512586585571887,
|
| 6163 |
+
"grad_norm": 0.45826077461242676,
|
| 6164 |
+
"learning_rate": 7.37452355825528e-05,
|
| 6165 |
+
"loss": 4.34198112487793,
|
| 6166 |
+
"step": 8590
|
| 6167 |
+
},
|
| 6168 |
+
{
|
| 6169 |
+
"epoch": 1.4529481331305965,
|
| 6170 |
+
"grad_norm": 0.4623316824436188,
|
| 6171 |
+
"learning_rate": 7.333313937314548e-05,
|
| 6172 |
+
"loss": 4.346709442138672,
|
| 6173 |
+
"step": 8600
|
| 6174 |
+
},
|
| 6175 |
+
{
|
| 6176 |
+
"epoch": 1.454637607704004,
|
| 6177 |
+
"grad_norm": 0.48673200607299805,
|
| 6178 |
+
"learning_rate": 7.292182495969462e-05,
|
| 6179 |
+
"loss": 4.370217514038086,
|
| 6180 |
+
"step": 8610
|
| 6181 |
+
},
|
| 6182 |
+
{
|
| 6183 |
+
"epoch": 1.4563270822774117,
|
| 6184 |
+
"grad_norm": 0.4870317280292511,
|
| 6185 |
+
"learning_rate": 7.251129653650206e-05,
|
| 6186 |
+
"loss": 4.340325927734375,
|
| 6187 |
+
"step": 8620
|
| 6188 |
+
},
|
| 6189 |
+
{
|
| 6190 |
+
"epoch": 1.4580165568508194,
|
| 6191 |
+
"grad_norm": 0.4829833507537842,
|
| 6192 |
+
"learning_rate": 7.210155828985447e-05,
|
| 6193 |
+
"loss": 4.333442687988281,
|
| 6194 |
+
"step": 8630
|
| 6195 |
+
},
|
| 6196 |
+
{
|
| 6197 |
+
"epoch": 1.459706031424227,
|
| 6198 |
+
"grad_norm": 0.4647566080093384,
|
| 6199 |
+
"learning_rate": 7.169261439798083e-05,
|
| 6200 |
+
"loss": 4.3144184112548825,
|
| 6201 |
+
"step": 8640
|
| 6202 |
+
},
|
| 6203 |
+
{
|
| 6204 |
+
"epoch": 1.4613955059976347,
|
| 6205 |
+
"grad_norm": 0.48941001296043396,
|
| 6206 |
+
"learning_rate": 7.128446903101004e-05,
|
| 6207 |
+
"loss": 4.31253662109375,
|
| 6208 |
+
"step": 8650
|
| 6209 |
+
},
|
| 6210 |
+
{
|
| 6211 |
+
"epoch": 1.4630849805710424,
|
| 6212 |
+
"grad_norm": 0.46602746844291687,
|
| 6213 |
+
"learning_rate": 7.087712635092802e-05,
|
| 6214 |
+
"loss": 4.346303176879883,
|
| 6215 |
+
"step": 8660
|
| 6216 |
+
},
|
| 6217 |
+
{
|
| 6218 |
+
"epoch": 1.4647744551444501,
|
| 6219 |
+
"grad_norm": 0.5055034756660461,
|
| 6220 |
+
"learning_rate": 7.047059051153538e-05,
|
| 6221 |
+
"loss": 4.3370361328125,
|
| 6222 |
+
"step": 8670
|
| 6223 |
+
},
|
| 6224 |
+
{
|
| 6225 |
+
"epoch": 1.4664639297178579,
|
| 6226 |
+
"grad_norm": 0.49361884593963623,
|
| 6227 |
+
"learning_rate": 7.006486565840532e-05,
|
| 6228 |
+
"loss": 4.337132263183594,
|
| 6229 |
+
"step": 8680
|
| 6230 |
+
},
|
| 6231 |
+
{
|
| 6232 |
+
"epoch": 1.4681534042912654,
|
| 6233 |
+
"grad_norm": 0.4785706400871277,
|
| 6234 |
+
"learning_rate": 6.96599559288411e-05,
|
| 6235 |
+
"loss": 4.349030303955078,
|
| 6236 |
+
"step": 8690
|
| 6237 |
+
},
|
| 6238 |
+
{
|
| 6239 |
+
"epoch": 1.4698428788646731,
|
| 6240 |
+
"grad_norm": 0.49940159916877747,
|
| 6241 |
+
"learning_rate": 6.925586545183383e-05,
|
| 6242 |
+
"loss": 4.356793212890625,
|
| 6243 |
+
"step": 8700
|
| 6244 |
+
},
|
| 6245 |
+
{
|
| 6246 |
+
"epoch": 1.4715323534380809,
|
| 6247 |
+
"grad_norm": 0.4632912576198578,
|
| 6248 |
+
"learning_rate": 6.885259834802042e-05,
|
| 6249 |
+
"loss": 4.333657836914062,
|
| 6250 |
+
"step": 8710
|
| 6251 |
+
},
|
| 6252 |
+
{
|
| 6253 |
+
"epoch": 1.4732218280114884,
|
| 6254 |
+
"grad_norm": 0.4802776575088501,
|
| 6255 |
+
"learning_rate": 6.845015872964179e-05,
|
| 6256 |
+
"loss": 4.345002365112305,
|
| 6257 |
+
"step": 8720
|
| 6258 |
+
},
|
| 6259 |
+
{
|
| 6260 |
+
"epoch": 1.4749113025848961,
|
| 6261 |
+
"grad_norm": 0.4794064164161682,
|
| 6262 |
+
"learning_rate": 6.80485507005005e-05,
|
| 6263 |
+
"loss": 4.348992538452149,
|
| 6264 |
+
"step": 8730
|
| 6265 |
+
},
|
| 6266 |
+
{
|
| 6267 |
+
"epoch": 1.4766007771583038,
|
| 6268 |
+
"grad_norm": 0.48898664116859436,
|
| 6269 |
+
"learning_rate": 6.764777835591921e-05,
|
| 6270 |
+
"loss": 4.341244125366211,
|
| 6271 |
+
"step": 8740
|
| 6272 |
+
},
|
| 6273 |
+
{
|
| 6274 |
+
"epoch": 1.4782902517317114,
|
| 6275 |
+
"grad_norm": 0.4965602159500122,
|
| 6276 |
+
"learning_rate": 6.724784578269892e-05,
|
| 6277 |
+
"loss": 4.321900939941406,
|
| 6278 |
+
"step": 8750
|
| 6279 |
+
},
|
| 6280 |
+
{
|
| 6281 |
+
"epoch": 1.479979726305119,
|
| 6282 |
+
"grad_norm": 0.4652167856693268,
|
| 6283 |
+
"learning_rate": 6.684875705907722e-05,
|
| 6284 |
+
"loss": 4.334490203857422,
|
| 6285 |
+
"step": 8760
|
| 6286 |
+
},
|
| 6287 |
+
{
|
| 6288 |
+
"epoch": 1.4816692008785268,
|
| 6289 |
+
"grad_norm": 0.4919753968715668,
|
| 6290 |
+
"learning_rate": 6.645051625468657e-05,
|
| 6291 |
+
"loss": 4.318844604492187,
|
| 6292 |
+
"step": 8770
|
| 6293 |
+
},
|
| 6294 |
+
{
|
| 6295 |
+
"epoch": 1.4833586754519343,
|
| 6296 |
+
"grad_norm": 0.48315659165382385,
|
| 6297 |
+
"learning_rate": 6.605312743051297e-05,
|
| 6298 |
+
"loss": 4.349975967407227,
|
| 6299 |
+
"step": 8780
|
| 6300 |
+
},
|
| 6301 |
+
{
|
| 6302 |
+
"epoch": 1.485048150025342,
|
| 6303 |
+
"grad_norm": 0.4814257323741913,
|
| 6304 |
+
"learning_rate": 6.565659463885467e-05,
|
| 6305 |
+
"loss": 4.339570236206055,
|
| 6306 |
+
"step": 8790
|
| 6307 |
+
},
|
| 6308 |
+
{
|
| 6309 |
+
"epoch": 1.4867376245987498,
|
| 6310 |
+
"grad_norm": 0.48735612630844116,
|
| 6311 |
+
"learning_rate": 6.526092192328048e-05,
|
| 6312 |
+
"loss": 4.335529708862305,
|
| 6313 |
+
"step": 8800
|
| 6314 |
+
},
|
| 6315 |
+
{
|
| 6316 |
+
"epoch": 1.4884270991721575,
|
| 6317 |
+
"grad_norm": 0.4753458499908447,
|
| 6318 |
+
"learning_rate": 6.486611331858879e-05,
|
| 6319 |
+
"loss": 4.328804779052734,
|
| 6320 |
+
"step": 8810
|
| 6321 |
+
},
|
| 6322 |
+
{
|
| 6323 |
+
"epoch": 1.490116573745565,
|
| 6324 |
+
"grad_norm": 0.46705493330955505,
|
| 6325 |
+
"learning_rate": 6.447217285076651e-05,
|
| 6326 |
+
"loss": 4.353744125366211,
|
| 6327 |
+
"step": 8820
|
| 6328 |
+
},
|
| 6329 |
+
{
|
| 6330 |
+
"epoch": 1.4918060483189728,
|
| 6331 |
+
"grad_norm": 0.4967743456363678,
|
| 6332 |
+
"learning_rate": 6.407910453694782e-05,
|
| 6333 |
+
"loss": 4.356158065795898,
|
| 6334 |
+
"step": 8830
|
| 6335 |
+
},
|
| 6336 |
+
{
|
| 6337 |
+
"epoch": 1.4934955228923805,
|
| 6338 |
+
"grad_norm": 0.4624764621257782,
|
| 6339 |
+
"learning_rate": 6.368691238537321e-05,
|
| 6340 |
+
"loss": 4.316521453857422,
|
| 6341 |
+
"step": 8840
|
| 6342 |
+
},
|
| 6343 |
+
{
|
| 6344 |
+
"epoch": 1.4951849974657883,
|
| 6345 |
+
"grad_norm": 0.5081548094749451,
|
| 6346 |
+
"learning_rate": 6.329560039534874e-05,
|
| 6347 |
+
"loss": 4.3620750427246096,
|
| 6348 |
+
"step": 8850
|
| 6349 |
+
},
|
| 6350 |
+
{
|
| 6351 |
+
"epoch": 1.4968744720391958,
|
| 6352 |
+
"grad_norm": 0.486570805311203,
|
| 6353 |
+
"learning_rate": 6.290517255720505e-05,
|
| 6354 |
+
"loss": 4.351879501342774,
|
| 6355 |
+
"step": 8860
|
| 6356 |
+
},
|
| 6357 |
+
{
|
| 6358 |
+
"epoch": 1.4985639466126035,
|
| 6359 |
+
"grad_norm": 0.4706440567970276,
|
| 6360 |
+
"learning_rate": 6.251563285225707e-05,
|
| 6361 |
+
"loss": 4.324571228027343,
|
| 6362 |
+
"step": 8870
|
| 6363 |
+
},
|
| 6364 |
+
{
|
| 6365 |
+
"epoch": 1.5002534211860112,
|
| 6366 |
+
"grad_norm": 0.49965882301330566,
|
| 6367 |
+
"learning_rate": 6.212698525276294e-05,
|
| 6368 |
+
"loss": 4.34442367553711,
|
| 6369 |
+
"step": 8880
|
| 6370 |
+
},
|
| 6371 |
+
{
|
| 6372 |
+
"epoch": 1.5019428957594188,
|
| 6373 |
+
"grad_norm": 0.4871665835380554,
|
| 6374 |
+
"learning_rate": 6.173923372188372e-05,
|
| 6375 |
+
"loss": 4.329629516601562,
|
| 6376 |
+
"step": 8890
|
| 6377 |
+
},
|
| 6378 |
+
{
|
| 6379 |
+
"epoch": 1.5036323703328265,
|
| 6380 |
+
"grad_norm": 0.47697439789772034,
|
| 6381 |
+
"learning_rate": 6.135238221364313e-05,
|
| 6382 |
+
"loss": 4.3523296356201175,
|
| 6383 |
+
"step": 8900
|
| 6384 |
+
},
|
| 6385 |
+
{
|
| 6386 |
+
"epoch": 1.5053218449062342,
|
| 6387 |
+
"grad_norm": 0.48661452531814575,
|
| 6388 |
+
"learning_rate": 6.096643467288703e-05,
|
| 6389 |
+
"loss": 4.330023956298828,
|
| 6390 |
+
"step": 8910
|
| 6391 |
+
},
|
| 6392 |
+
{
|
| 6393 |
+
"epoch": 1.5070113194796417,
|
| 6394 |
+
"grad_norm": 0.4829593002796173,
|
| 6395 |
+
"learning_rate": 6.058139503524314e-05,
|
| 6396 |
+
"loss": 4.348539352416992,
|
| 6397 |
+
"step": 8920
|
| 6398 |
+
},
|
| 6399 |
+
{
|
| 6400 |
+
"epoch": 1.5087007940530495,
|
| 6401 |
+
"grad_norm": 0.47934937477111816,
|
| 6402 |
+
"learning_rate": 6.019726722708104e-05,
|
| 6403 |
+
"loss": 4.323921966552734,
|
| 6404 |
+
"step": 8930
|
| 6405 |
+
},
|
| 6406 |
+
{
|
| 6407 |
+
"epoch": 1.5103902686264572,
|
| 6408 |
+
"grad_norm": 0.5149379372596741,
|
| 6409 |
+
"learning_rate": 5.981405516547222e-05,
|
| 6410 |
+
"loss": 4.312050628662109,
|
| 6411 |
+
"step": 8940
|
| 6412 |
+
},
|
| 6413 |
+
{
|
| 6414 |
+
"epoch": 1.5120797431998647,
|
| 6415 |
+
"grad_norm": 0.48116961121559143,
|
| 6416 |
+
"learning_rate": 5.9431762758149875e-05,
|
| 6417 |
+
"loss": 4.327413940429688,
|
| 6418 |
+
"step": 8950
|
| 6419 |
+
},
|
| 6420 |
+
{
|
| 6421 |
+
"epoch": 1.5137692177732727,
|
| 6422 |
+
"grad_norm": 0.49428287148475647,
|
| 6423 |
+
"learning_rate": 5.9050393903469215e-05,
|
| 6424 |
+
"loss": 4.323257827758789,
|
| 6425 |
+
"step": 8960
|
| 6426 |
+
},
|
| 6427 |
+
{
|
| 6428 |
+
"epoch": 1.5154586923466802,
|
| 6429 |
+
"grad_norm": 0.5180572271347046,
|
| 6430 |
+
"learning_rate": 5.866995249036775e-05,
|
| 6431 |
+
"loss": 4.333328628540039,
|
| 6432 |
+
"step": 8970
|
| 6433 |
+
},
|
| 6434 |
+
{
|
| 6435 |
+
"epoch": 1.5171481669200877,
|
| 6436 |
+
"grad_norm": 0.4911746382713318,
|
| 6437 |
+
"learning_rate": 5.829044239832564e-05,
|
| 6438 |
+
"loss": 4.323813247680664,
|
| 6439 |
+
"step": 8980
|
| 6440 |
+
},
|
| 6441 |
+
{
|
| 6442 |
+
"epoch": 1.5188376414934956,
|
| 6443 |
+
"grad_norm": 0.49372172355651855,
|
| 6444 |
+
"learning_rate": 5.791186749732594e-05,
|
| 6445 |
+
"loss": 4.345953750610351,
|
| 6446 |
+
"step": 8990
|
| 6447 |
+
},
|
| 6448 |
+
{
|
| 6449 |
+
"epoch": 1.5205271160669032,
|
| 6450 |
+
"grad_norm": 0.4822508990764618,
|
| 6451 |
+
"learning_rate": 5.7534231647815244e-05,
|
| 6452 |
+
"loss": 4.349853134155273,
|
| 6453 |
+
"step": 9000
|
| 6454 |
+
},
|
| 6455 |
+
{
|
| 6456 |
+
"epoch": 1.5205271160669032,
|
| 6457 |
+
"eval_loss": 4.292741298675537,
|
| 6458 |
+
"eval_runtime": 3.7165,
|
| 6459 |
+
"eval_samples_per_second": 269.07,
|
| 6460 |
+
"eval_steps_per_second": 5.65,
|
| 6461 |
+
"step": 9000
|
| 6462 |
}
|
| 6463 |
],
|
| 6464 |
"logging_steps": 10,
|
|
|
|
| 6478 |
"attributes": {}
|
| 6479 |
}
|
| 6480 |
},
|
| 6481 |
+
"total_flos": 3.010090484178616e+17,
|
| 6482 |
"train_batch_size": 48,
|
| 6483 |
"trial_name": null,
|
| 6484 |
"trial_params": null
|