Training in progress, epoch 29, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 990185320
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c66c8955128e5e62b623b11b9ae6effa8174d3e5b88cc5a8d94a8e6d659abc1b
|
| 3 |
size 990185320
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1980541387
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c77cbc69914cf82936274255b687c22dd295cf06c93e14ff29417415459cea06
|
| 3 |
size 1980541387
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3ed1a06b153dad4a8a660e42029973a714386f051e63eb7e369425dfe3df9276
|
| 3 |
size 14645
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0f1547202e5461888783dd093e6ac1ad6ae74788ba3d5b6af2761bd28f88426a
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch":
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -3242,6 +3242,126 @@
|
|
| 3242 |
"eval_samples_per_second": 22.068,
|
| 3243 |
"eval_steps_per_second": 2.759,
|
| 3244 |
"step": 43092
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3245 |
}
|
| 3246 |
],
|
| 3247 |
"logging_steps": 100,
|
|
@@ -3261,7 +3381,7 @@
|
|
| 3261 |
"attributes": {}
|
| 3262 |
}
|
| 3263 |
},
|
| 3264 |
-
"total_flos": 6.
|
| 3265 |
"train_batch_size": 8,
|
| 3266 |
"trial_name": null,
|
| 3267 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 29.0,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 44631,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 3242 |
"eval_samples_per_second": 22.068,
|
| 3243 |
"eval_steps_per_second": 2.759,
|
| 3244 |
"step": 43092
|
| 3245 |
+
},
|
| 3246 |
+
{
|
| 3247 |
+
"epoch": 28.00519818063678,
|
| 3248 |
+
"grad_norm": 4.655136585235596,
|
| 3249 |
+
"learning_rate": 3.3257526532380337e-06,
|
| 3250 |
+
"loss": 1.4119,
|
| 3251 |
+
"step": 43100
|
| 3252 |
+
},
|
| 3253 |
+
{
|
| 3254 |
+
"epoch": 28.07017543859649,
|
| 3255 |
+
"grad_norm": 5.602964878082275,
|
| 3256 |
+
"learning_rate": 3.2174572233051767e-06,
|
| 3257 |
+
"loss": 1.3656,
|
| 3258 |
+
"step": 43200
|
| 3259 |
+
},
|
| 3260 |
+
{
|
| 3261 |
+
"epoch": 28.135152696556204,
|
| 3262 |
+
"grad_norm": 3.6727871894836426,
|
| 3263 |
+
"learning_rate": 3.10916179337232e-06,
|
| 3264 |
+
"loss": 1.4041,
|
| 3265 |
+
"step": 43300
|
| 3266 |
+
},
|
| 3267 |
+
{
|
| 3268 |
+
"epoch": 28.20012995451592,
|
| 3269 |
+
"grad_norm": 5.562687397003174,
|
| 3270 |
+
"learning_rate": 3.000866363439463e-06,
|
| 3271 |
+
"loss": 1.4439,
|
| 3272 |
+
"step": 43400
|
| 3273 |
+
},
|
| 3274 |
+
{
|
| 3275 |
+
"epoch": 28.265107212475634,
|
| 3276 |
+
"grad_norm": 3.7093451023101807,
|
| 3277 |
+
"learning_rate": 2.892570933506606e-06,
|
| 3278 |
+
"loss": 1.3836,
|
| 3279 |
+
"step": 43500
|
| 3280 |
+
},
|
| 3281 |
+
{
|
| 3282 |
+
"epoch": 28.33008447043535,
|
| 3283 |
+
"grad_norm": 6.225944519042969,
|
| 3284 |
+
"learning_rate": 2.7842755035737496e-06,
|
| 3285 |
+
"loss": 1.4018,
|
| 3286 |
+
"step": 43600
|
| 3287 |
+
},
|
| 3288 |
+
{
|
| 3289 |
+
"epoch": 28.395061728395063,
|
| 3290 |
+
"grad_norm": 3.9284849166870117,
|
| 3291 |
+
"learning_rate": 2.6759800736408926e-06,
|
| 3292 |
+
"loss": 1.4189,
|
| 3293 |
+
"step": 43700
|
| 3294 |
+
},
|
| 3295 |
+
{
|
| 3296 |
+
"epoch": 28.460038986354775,
|
| 3297 |
+
"grad_norm": 4.287786483764648,
|
| 3298 |
+
"learning_rate": 2.5676846437080356e-06,
|
| 3299 |
+
"loss": 1.4119,
|
| 3300 |
+
"step": 43800
|
| 3301 |
+
},
|
| 3302 |
+
{
|
| 3303 |
+
"epoch": 28.52501624431449,
|
| 3304 |
+
"grad_norm": 5.376986980438232,
|
| 3305 |
+
"learning_rate": 2.459389213775179e-06,
|
| 3306 |
+
"loss": 1.4671,
|
| 3307 |
+
"step": 43900
|
| 3308 |
+
},
|
| 3309 |
+
{
|
| 3310 |
+
"epoch": 28.589993502274204,
|
| 3311 |
+
"grad_norm": 4.01196813583374,
|
| 3312 |
+
"learning_rate": 2.351093783842322e-06,
|
| 3313 |
+
"loss": 1.413,
|
| 3314 |
+
"step": 44000
|
| 3315 |
+
},
|
| 3316 |
+
{
|
| 3317 |
+
"epoch": 28.65497076023392,
|
| 3318 |
+
"grad_norm": 9.470341682434082,
|
| 3319 |
+
"learning_rate": 2.242798353909465e-06,
|
| 3320 |
+
"loss": 1.4883,
|
| 3321 |
+
"step": 44100
|
| 3322 |
+
},
|
| 3323 |
+
{
|
| 3324 |
+
"epoch": 28.719948018193634,
|
| 3325 |
+
"grad_norm": 3.921780586242676,
|
| 3326 |
+
"learning_rate": 2.1345029239766084e-06,
|
| 3327 |
+
"loss": 1.4371,
|
| 3328 |
+
"step": 44200
|
| 3329 |
+
},
|
| 3330 |
+
{
|
| 3331 |
+
"epoch": 28.784925276153345,
|
| 3332 |
+
"grad_norm": 6.419370651245117,
|
| 3333 |
+
"learning_rate": 2.0262074940437514e-06,
|
| 3334 |
+
"loss": 1.468,
|
| 3335 |
+
"step": 44300
|
| 3336 |
+
},
|
| 3337 |
+
{
|
| 3338 |
+
"epoch": 28.84990253411306,
|
| 3339 |
+
"grad_norm": 3.46016263961792,
|
| 3340 |
+
"learning_rate": 1.9179120641108944e-06,
|
| 3341 |
+
"loss": 1.4476,
|
| 3342 |
+
"step": 44400
|
| 3343 |
+
},
|
| 3344 |
+
{
|
| 3345 |
+
"epoch": 28.914879792072774,
|
| 3346 |
+
"grad_norm": 5.6550822257995605,
|
| 3347 |
+
"learning_rate": 1.8096166341780376e-06,
|
| 3348 |
+
"loss": 1.4096,
|
| 3349 |
+
"step": 44500
|
| 3350 |
+
},
|
| 3351 |
+
{
|
| 3352 |
+
"epoch": 28.97985705003249,
|
| 3353 |
+
"grad_norm": 4.346546173095703,
|
| 3354 |
+
"learning_rate": 1.701321204245181e-06,
|
| 3355 |
+
"loss": 1.4135,
|
| 3356 |
+
"step": 44600
|
| 3357 |
+
},
|
| 3358 |
+
{
|
| 3359 |
+
"epoch": 29.0,
|
| 3360 |
+
"eval_loss": 1.3682384490966797,
|
| 3361 |
+
"eval_runtime": 61.7947,
|
| 3362 |
+
"eval_samples_per_second": 22.138,
|
| 3363 |
+
"eval_steps_per_second": 2.767,
|
| 3364 |
+
"step": 44631
|
| 3365 |
}
|
| 3366 |
],
|
| 3367 |
"logging_steps": 100,
|
|
|
|
| 3381 |
"attributes": {}
|
| 3382 |
}
|
| 3383 |
},
|
| 3384 |
+
"total_flos": 6.774142686776525e+16,
|
| 3385 |
"train_batch_size": 8,
|
| 3386 |
"trial_name": null,
|
| 3387 |
"trial_params": null
|