Upload trainer_state.json with huggingface_hub
Browse files- trainer_state.json +25 -4
trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch":
|
| 6 |
"eval_steps": 10,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -344,6 +344,27 @@
|
|
| 344 |
"eval_samples_per_second": 4.337,
|
| 345 |
"eval_steps_per_second": 1.084,
|
| 346 |
"step": 160
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 347 |
}
|
| 348 |
],
|
| 349 |
"logging_steps": 10,
|
|
@@ -358,12 +379,12 @@
|
|
| 358 |
"should_evaluate": false,
|
| 359 |
"should_log": false,
|
| 360 |
"should_save": true,
|
| 361 |
-
"should_training_stop":
|
| 362 |
},
|
| 363 |
"attributes": {}
|
| 364 |
}
|
| 365 |
},
|
| 366 |
-
"total_flos":
|
| 367 |
"train_batch_size": 4,
|
| 368 |
"trial_name": null,
|
| 369 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 2.0,
|
| 6 |
"eval_steps": 10,
|
| 7 |
+
"global_step": 178,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 344 |
"eval_samples_per_second": 4.337,
|
| 345 |
"eval_steps_per_second": 1.084,
|
| 346 |
"step": 160
|
| 347 |
+
},
|
| 348 |
+
{
|
| 349 |
+
"entropy": 6.240383183956146,
|
| 350 |
+
"epoch": 1.9152542372881356,
|
| 351 |
+
"grad_norm": 0.34765625,
|
| 352 |
+
"learning_rate": 1.0465116279069768e-05,
|
| 353 |
+
"loss": 6.1943107604980465,
|
| 354 |
+
"mean_token_accuracy": 0.2432584844529629,
|
| 355 |
+
"num_tokens": 2869517.0,
|
| 356 |
+
"step": 170
|
| 357 |
+
},
|
| 358 |
+
{
|
| 359 |
+
"epoch": 1.9152542372881356,
|
| 360 |
+
"eval_entropy": 6.599712918202083,
|
| 361 |
+
"eval_loss": 6.369909763336182,
|
| 362 |
+
"eval_mean_token_accuracy": 0.20116472554703554,
|
| 363 |
+
"eval_num_tokens": 2869517.0,
|
| 364 |
+
"eval_runtime": 43.9021,
|
| 365 |
+
"eval_samples_per_second": 4.373,
|
| 366 |
+
"eval_steps_per_second": 1.093,
|
| 367 |
+
"step": 170
|
| 368 |
}
|
| 369 |
],
|
| 370 |
"logging_steps": 10,
|
|
|
|
| 379 |
"should_evaluate": false,
|
| 380 |
"should_log": false,
|
| 381 |
"should_save": true,
|
| 382 |
+
"should_training_stop": true
|
| 383 |
},
|
| 384 |
"attributes": {}
|
| 385 |
}
|
| 386 |
},
|
| 387 |
+
"total_flos": 2.1389497039169126e+17,
|
| 388 |
"train_batch_size": 4,
|
| 389 |
"trial_name": null,
|
| 390 |
"trial_params": null
|