Training in progress, step 9000, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 328277848
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d5822c5d51ff1a3f6c8d63d9491441c689004f44619d361568f98a19df1caeab
|
| 3 |
size 328277848
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 318646859
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e2c88f007992dd9990ea0216c73aaca02a8b4aebfac4c43fbb77c941bb9cf18e
|
| 3 |
size 318646859
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:88595be53afbf68c948f838fbf4b1fa7776619d23de4baf3620fece471fafed5
|
| 3 |
size 14645
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:528ba9a1d2a5739586b1652bb1454f9e977f93a6ae9e9c38a71b51bc41c45de4
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 1.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -6101,6 +6101,364 @@
|
|
| 6101 |
"eval_samples_per_second": 271.089,
|
| 6102 |
"eval_steps_per_second": 5.693,
|
| 6103 |
"step": 8500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6104 |
}
|
| 6105 |
],
|
| 6106 |
"logging_steps": 10,
|
|
@@ -6120,7 +6478,7 @@
|
|
| 6120 |
"attributes": {}
|
| 6121 |
}
|
| 6122 |
},
|
| 6123 |
-
"total_flos":
|
| 6124 |
"train_batch_size": 48,
|
| 6125 |
"trial_name": null,
|
| 6126 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 1.5205271160669032,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 9000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 6101 |
"eval_samples_per_second": 271.089,
|
| 6102 |
"eval_steps_per_second": 5.693,
|
| 6103 |
"step": 8500
|
| 6104 |
+
},
|
| 6105 |
+
{
|
| 6106 |
+
"epoch": 1.4377428619699273,
|
| 6107 |
+
"grad_norm": 0.4925293028354645,
|
| 6108 |
+
"learning_rate": 7.706964398504293e-05,
|
| 6109 |
+
"loss": 4.376210403442383,
|
| 6110 |
+
"step": 8510
|
| 6111 |
+
},
|
| 6112 |
+
{
|
| 6113 |
+
"epoch": 1.439432336543335,
|
| 6114 |
+
"grad_norm": 0.4719123840332031,
|
| 6115 |
+
"learning_rate": 7.665144535049224e-05,
|
| 6116 |
+
"loss": 4.338931274414063,
|
| 6117 |
+
"step": 8520
|
| 6118 |
+
},
|
| 6119 |
+
{
|
| 6120 |
+
"epoch": 1.4411218111167428,
|
| 6121 |
+
"grad_norm": 0.4722173511981964,
|
| 6122 |
+
"learning_rate": 7.623399467409416e-05,
|
| 6123 |
+
"loss": 4.352537536621094,
|
| 6124 |
+
"step": 8530
|
| 6125 |
+
},
|
| 6126 |
+
{
|
| 6127 |
+
"epoch": 1.4428112856901505,
|
| 6128 |
+
"grad_norm": 0.4844585955142975,
|
| 6129 |
+
"learning_rate": 7.581729621272386e-05,
|
| 6130 |
+
"loss": 4.332356262207031,
|
| 6131 |
+
"step": 8540
|
| 6132 |
+
},
|
| 6133 |
+
{
|
| 6134 |
+
"epoch": 1.444500760263558,
|
| 6135 |
+
"grad_norm": 0.49630841612815857,
|
| 6136 |
+
"learning_rate": 7.540135421558585e-05,
|
| 6137 |
+
"loss": 4.3133392333984375,
|
| 6138 |
+
"step": 8550
|
| 6139 |
+
},
|
| 6140 |
+
{
|
| 6141 |
+
"epoch": 1.4461902348369657,
|
| 6142 |
+
"grad_norm": 0.472133994102478,
|
| 6143 |
+
"learning_rate": 7.498617292417074e-05,
|
| 6144 |
+
"loss": 4.3697349548339846,
|
| 6145 |
+
"step": 8560
|
| 6146 |
+
},
|
| 6147 |
+
{
|
| 6148 |
+
"epoch": 1.4478797094103735,
|
| 6149 |
+
"grad_norm": 0.48327624797821045,
|
| 6150 |
+
"learning_rate": 7.457175657221194e-05,
|
| 6151 |
+
"loss": 4.366666030883789,
|
| 6152 |
+
"step": 8570
|
| 6153 |
+
},
|
| 6154 |
+
{
|
| 6155 |
+
"epoch": 1.449569183983781,
|
| 6156 |
+
"grad_norm": 0.4768034815788269,
|
| 6157 |
+
"learning_rate": 7.415810938564277e-05,
|
| 6158 |
+
"loss": 4.33704719543457,
|
| 6159 |
+
"step": 8580
|
| 6160 |
+
},
|
| 6161 |
+
{
|
| 6162 |
+
"epoch": 1.4512586585571887,
|
| 6163 |
+
"grad_norm": 0.4592680037021637,
|
| 6164 |
+
"learning_rate": 7.37452355825528e-05,
|
| 6165 |
+
"loss": 4.343940734863281,
|
| 6166 |
+
"step": 8590
|
| 6167 |
+
},
|
| 6168 |
+
{
|
| 6169 |
+
"epoch": 1.4529481331305965,
|
| 6170 |
+
"grad_norm": 0.4643280804157257,
|
| 6171 |
+
"learning_rate": 7.333313937314548e-05,
|
| 6172 |
+
"loss": 4.346873474121094,
|
| 6173 |
+
"step": 8600
|
| 6174 |
+
},
|
| 6175 |
+
{
|
| 6176 |
+
"epoch": 1.454637607704004,
|
| 6177 |
+
"grad_norm": 0.4980602264404297,
|
| 6178 |
+
"learning_rate": 7.292182495969462e-05,
|
| 6179 |
+
"loss": 4.370085525512695,
|
| 6180 |
+
"step": 8610
|
| 6181 |
+
},
|
| 6182 |
+
{
|
| 6183 |
+
"epoch": 1.4563270822774117,
|
| 6184 |
+
"grad_norm": 0.4845782518386841,
|
| 6185 |
+
"learning_rate": 7.251129653650206e-05,
|
| 6186 |
+
"loss": 4.3420463562011715,
|
| 6187 |
+
"step": 8620
|
| 6188 |
+
},
|
| 6189 |
+
{
|
| 6190 |
+
"epoch": 1.4580165568508194,
|
| 6191 |
+
"grad_norm": 0.47701558470726013,
|
| 6192 |
+
"learning_rate": 7.210155828985447e-05,
|
| 6193 |
+
"loss": 4.333865356445313,
|
| 6194 |
+
"step": 8630
|
| 6195 |
+
},
|
| 6196 |
+
{
|
| 6197 |
+
"epoch": 1.459706031424227,
|
| 6198 |
+
"grad_norm": 0.4681967794895172,
|
| 6199 |
+
"learning_rate": 7.169261439798083e-05,
|
| 6200 |
+
"loss": 4.315822982788086,
|
| 6201 |
+
"step": 8640
|
| 6202 |
+
},
|
| 6203 |
+
{
|
| 6204 |
+
"epoch": 1.4613955059976347,
|
| 6205 |
+
"grad_norm": 0.48438313603401184,
|
| 6206 |
+
"learning_rate": 7.128446903101004e-05,
|
| 6207 |
+
"loss": 4.31340446472168,
|
| 6208 |
+
"step": 8650
|
| 6209 |
+
},
|
| 6210 |
+
{
|
| 6211 |
+
"epoch": 1.4630849805710424,
|
| 6212 |
+
"grad_norm": 0.4675985872745514,
|
| 6213 |
+
"learning_rate": 7.087712635092802e-05,
|
| 6214 |
+
"loss": 4.347599792480469,
|
| 6215 |
+
"step": 8660
|
| 6216 |
+
},
|
| 6217 |
+
{
|
| 6218 |
+
"epoch": 1.4647744551444501,
|
| 6219 |
+
"grad_norm": 0.5026019215583801,
|
| 6220 |
+
"learning_rate": 7.047059051153538e-05,
|
| 6221 |
+
"loss": 4.3385356903076175,
|
| 6222 |
+
"step": 8670
|
| 6223 |
+
},
|
| 6224 |
+
{
|
| 6225 |
+
"epoch": 1.4664639297178579,
|
| 6226 |
+
"grad_norm": 0.4908424913883209,
|
| 6227 |
+
"learning_rate": 7.006486565840532e-05,
|
| 6228 |
+
"loss": 4.337771224975586,
|
| 6229 |
+
"step": 8680
|
| 6230 |
+
},
|
| 6231 |
+
{
|
| 6232 |
+
"epoch": 1.4681534042912654,
|
| 6233 |
+
"grad_norm": 0.47692814469337463,
|
| 6234 |
+
"learning_rate": 6.96599559288411e-05,
|
| 6235 |
+
"loss": 4.350002288818359,
|
| 6236 |
+
"step": 8690
|
| 6237 |
+
},
|
| 6238 |
+
{
|
| 6239 |
+
"epoch": 1.4698428788646731,
|
| 6240 |
+
"grad_norm": 0.4985916316509247,
|
| 6241 |
+
"learning_rate": 6.925586545183383e-05,
|
| 6242 |
+
"loss": 4.357270812988281,
|
| 6243 |
+
"step": 8700
|
| 6244 |
+
},
|
| 6245 |
+
{
|
| 6246 |
+
"epoch": 1.4715323534380809,
|
| 6247 |
+
"grad_norm": 0.4779921770095825,
|
| 6248 |
+
"learning_rate": 6.885259834802042e-05,
|
| 6249 |
+
"loss": 4.3343353271484375,
|
| 6250 |
+
"step": 8710
|
| 6251 |
+
},
|
| 6252 |
+
{
|
| 6253 |
+
"epoch": 1.4732218280114884,
|
| 6254 |
+
"grad_norm": 0.4964430630207062,
|
| 6255 |
+
"learning_rate": 6.845015872964179e-05,
|
| 6256 |
+
"loss": 4.345649337768554,
|
| 6257 |
+
"step": 8720
|
| 6258 |
+
},
|
| 6259 |
+
{
|
| 6260 |
+
"epoch": 1.4749113025848961,
|
| 6261 |
+
"grad_norm": 0.4816732108592987,
|
| 6262 |
+
"learning_rate": 6.80485507005005e-05,
|
| 6263 |
+
"loss": 4.349812316894531,
|
| 6264 |
+
"step": 8730
|
| 6265 |
+
},
|
| 6266 |
+
{
|
| 6267 |
+
"epoch": 1.4766007771583038,
|
| 6268 |
+
"grad_norm": 0.4839925765991211,
|
| 6269 |
+
"learning_rate": 6.764777835591921e-05,
|
| 6270 |
+
"loss": 4.342644119262696,
|
| 6271 |
+
"step": 8740
|
| 6272 |
+
},
|
| 6273 |
+
{
|
| 6274 |
+
"epoch": 1.4782902517317114,
|
| 6275 |
+
"grad_norm": 0.5161303877830505,
|
| 6276 |
+
"learning_rate": 6.724784578269892e-05,
|
| 6277 |
+
"loss": 4.322945022583008,
|
| 6278 |
+
"step": 8750
|
| 6279 |
+
},
|
| 6280 |
+
{
|
| 6281 |
+
"epoch": 1.479979726305119,
|
| 6282 |
+
"grad_norm": 0.4845769703388214,
|
| 6283 |
+
"learning_rate": 6.684875705907722e-05,
|
| 6284 |
+
"loss": 4.33643798828125,
|
| 6285 |
+
"step": 8760
|
| 6286 |
+
},
|
| 6287 |
+
{
|
| 6288 |
+
"epoch": 1.4816692008785268,
|
| 6289 |
+
"grad_norm": 0.48371464014053345,
|
| 6290 |
+
"learning_rate": 6.645051625468657e-05,
|
| 6291 |
+
"loss": 4.319810104370117,
|
| 6292 |
+
"step": 8770
|
| 6293 |
+
},
|
| 6294 |
+
{
|
| 6295 |
+
"epoch": 1.4833586754519343,
|
| 6296 |
+
"grad_norm": 0.4810192286968231,
|
| 6297 |
+
"learning_rate": 6.605312743051297e-05,
|
| 6298 |
+
"loss": 4.350659561157227,
|
| 6299 |
+
"step": 8780
|
| 6300 |
+
},
|
| 6301 |
+
{
|
| 6302 |
+
"epoch": 1.485048150025342,
|
| 6303 |
+
"grad_norm": 0.4886019825935364,
|
| 6304 |
+
"learning_rate": 6.565659463885467e-05,
|
| 6305 |
+
"loss": 4.340823364257813,
|
| 6306 |
+
"step": 8790
|
| 6307 |
+
},
|
| 6308 |
+
{
|
| 6309 |
+
"epoch": 1.4867376245987498,
|
| 6310 |
+
"grad_norm": 0.4922144114971161,
|
| 6311 |
+
"learning_rate": 6.526092192328048e-05,
|
| 6312 |
+
"loss": 4.337167358398437,
|
| 6313 |
+
"step": 8800
|
| 6314 |
+
},
|
| 6315 |
+
{
|
| 6316 |
+
"epoch": 1.4884270991721575,
|
| 6317 |
+
"grad_norm": 0.47720760107040405,
|
| 6318 |
+
"learning_rate": 6.486611331858879e-05,
|
| 6319 |
+
"loss": 4.330669403076172,
|
| 6320 |
+
"step": 8810
|
| 6321 |
+
},
|
| 6322 |
+
{
|
| 6323 |
+
"epoch": 1.490116573745565,
|
| 6324 |
+
"grad_norm": 0.45629069209098816,
|
| 6325 |
+
"learning_rate": 6.447217285076651e-05,
|
| 6326 |
+
"loss": 4.354007339477539,
|
| 6327 |
+
"step": 8820
|
| 6328 |
+
},
|
| 6329 |
+
{
|
| 6330 |
+
"epoch": 1.4918060483189728,
|
| 6331 |
+
"grad_norm": 0.4794461727142334,
|
| 6332 |
+
"learning_rate": 6.407910453694782e-05,
|
| 6333 |
+
"loss": 4.356667327880859,
|
| 6334 |
+
"step": 8830
|
| 6335 |
+
},
|
| 6336 |
+
{
|
| 6337 |
+
"epoch": 1.4934955228923805,
|
| 6338 |
+
"grad_norm": 0.4836932420730591,
|
| 6339 |
+
"learning_rate": 6.368691238537321e-05,
|
| 6340 |
+
"loss": 4.3167163848876955,
|
| 6341 |
+
"step": 8840
|
| 6342 |
+
},
|
| 6343 |
+
{
|
| 6344 |
+
"epoch": 1.4951849974657883,
|
| 6345 |
+
"grad_norm": 0.5060141086578369,
|
| 6346 |
+
"learning_rate": 6.329560039534874e-05,
|
| 6347 |
+
"loss": 4.362548828125,
|
| 6348 |
+
"step": 8850
|
| 6349 |
+
},
|
| 6350 |
+
{
|
| 6351 |
+
"epoch": 1.4968744720391958,
|
| 6352 |
+
"grad_norm": 0.48216700553894043,
|
| 6353 |
+
"learning_rate": 6.290517255720505e-05,
|
| 6354 |
+
"loss": 4.3512012481689455,
|
| 6355 |
+
"step": 8860
|
| 6356 |
+
},
|
| 6357 |
+
{
|
| 6358 |
+
"epoch": 1.4985639466126035,
|
| 6359 |
+
"grad_norm": 0.46019911766052246,
|
| 6360 |
+
"learning_rate": 6.251563285225707e-05,
|
| 6361 |
+
"loss": 4.32593002319336,
|
| 6362 |
+
"step": 8870
|
| 6363 |
+
},
|
| 6364 |
+
{
|
| 6365 |
+
"epoch": 1.5002534211860112,
|
| 6366 |
+
"grad_norm": 0.4773600697517395,
|
| 6367 |
+
"learning_rate": 6.212698525276294e-05,
|
| 6368 |
+
"loss": 4.345823287963867,
|
| 6369 |
+
"step": 8880
|
| 6370 |
+
},
|
| 6371 |
+
{
|
| 6372 |
+
"epoch": 1.5019428957594188,
|
| 6373 |
+
"grad_norm": 0.4903421401977539,
|
| 6374 |
+
"learning_rate": 6.173923372188372e-05,
|
| 6375 |
+
"loss": 4.330167770385742,
|
| 6376 |
+
"step": 8890
|
| 6377 |
+
},
|
| 6378 |
+
{
|
| 6379 |
+
"epoch": 1.5036323703328265,
|
| 6380 |
+
"grad_norm": 0.47027841210365295,
|
| 6381 |
+
"learning_rate": 6.135238221364313e-05,
|
| 6382 |
+
"loss": 4.352994155883789,
|
| 6383 |
+
"step": 8900
|
| 6384 |
+
},
|
| 6385 |
+
{
|
| 6386 |
+
"epoch": 1.5053218449062342,
|
| 6387 |
+
"grad_norm": 0.4893588125705719,
|
| 6388 |
+
"learning_rate": 6.096643467288703e-05,
|
| 6389 |
+
"loss": 4.3315269470214846,
|
| 6390 |
+
"step": 8910
|
| 6391 |
+
},
|
| 6392 |
+
{
|
| 6393 |
+
"epoch": 1.5070113194796417,
|
| 6394 |
+
"grad_norm": 0.4835808277130127,
|
| 6395 |
+
"learning_rate": 6.058139503524314e-05,
|
| 6396 |
+
"loss": 4.349056625366211,
|
| 6397 |
+
"step": 8920
|
| 6398 |
+
},
|
| 6399 |
+
{
|
| 6400 |
+
"epoch": 1.5087007940530495,
|
| 6401 |
+
"grad_norm": 0.4750809967517853,
|
| 6402 |
+
"learning_rate": 6.019726722708104e-05,
|
| 6403 |
+
"loss": 4.325545120239258,
|
| 6404 |
+
"step": 8930
|
| 6405 |
+
},
|
| 6406 |
+
{
|
| 6407 |
+
"epoch": 1.5103902686264572,
|
| 6408 |
+
"grad_norm": 0.4945700466632843,
|
| 6409 |
+
"learning_rate": 5.981405516547222e-05,
|
| 6410 |
+
"loss": 4.312815093994141,
|
| 6411 |
+
"step": 8940
|
| 6412 |
+
},
|
| 6413 |
+
{
|
| 6414 |
+
"epoch": 1.5120797431998647,
|
| 6415 |
+
"grad_norm": 0.4704221487045288,
|
| 6416 |
+
"learning_rate": 5.9431762758149875e-05,
|
| 6417 |
+
"loss": 4.328189849853516,
|
| 6418 |
+
"step": 8950
|
| 6419 |
+
},
|
| 6420 |
+
{
|
| 6421 |
+
"epoch": 1.5137692177732727,
|
| 6422 |
+
"grad_norm": 0.48752453923225403,
|
| 6423 |
+
"learning_rate": 5.9050393903469215e-05,
|
| 6424 |
+
"loss": 4.324124145507812,
|
| 6425 |
+
"step": 8960
|
| 6426 |
+
},
|
| 6427 |
+
{
|
| 6428 |
+
"epoch": 1.5154586923466802,
|
| 6429 |
+
"grad_norm": 0.5149093270301819,
|
| 6430 |
+
"learning_rate": 5.866995249036775e-05,
|
| 6431 |
+
"loss": 4.334346771240234,
|
| 6432 |
+
"step": 8970
|
| 6433 |
+
},
|
| 6434 |
+
{
|
| 6435 |
+
"epoch": 1.5171481669200877,
|
| 6436 |
+
"grad_norm": 0.49064958095550537,
|
| 6437 |
+
"learning_rate": 5.829044239832564e-05,
|
| 6438 |
+
"loss": 4.324323654174805,
|
| 6439 |
+
"step": 8980
|
| 6440 |
+
},
|
| 6441 |
+
{
|
| 6442 |
+
"epoch": 1.5188376414934956,
|
| 6443 |
+
"grad_norm": 0.486092746257782,
|
| 6444 |
+
"learning_rate": 5.791186749732594e-05,
|
| 6445 |
+
"loss": 4.346895599365235,
|
| 6446 |
+
"step": 8990
|
| 6447 |
+
},
|
| 6448 |
+
{
|
| 6449 |
+
"epoch": 1.5205271160669032,
|
| 6450 |
+
"grad_norm": 0.48512768745422363,
|
| 6451 |
+
"learning_rate": 5.7534231647815244e-05,
|
| 6452 |
+
"loss": 4.350548934936524,
|
| 6453 |
+
"step": 9000
|
| 6454 |
+
},
|
| 6455 |
+
{
|
| 6456 |
+
"epoch": 1.5205271160669032,
|
| 6457 |
+
"eval_loss": 4.312350273132324,
|
| 6458 |
+
"eval_runtime": 4.1596,
|
| 6459 |
+
"eval_samples_per_second": 240.409,
|
| 6460 |
+
"eval_steps_per_second": 5.049,
|
| 6461 |
+
"step": 9000
|
| 6462 |
}
|
| 6463 |
],
|
| 6464 |
"logging_steps": 10,
|
|
|
|
| 6478 |
"attributes": {}
|
| 6479 |
}
|
| 6480 |
},
|
| 6481 |
+
"total_flos": 3.010090484178616e+17,
|
| 6482 |
"train_batch_size": 48,
|
| 6483 |
"trial_name": null,
|
| 6484 |
"trial_params": null
|