| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 10.0, | |
| "eval_steps": 500, | |
| "global_step": 8910, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.20035457557497433, | |
| "eval_loss": 5.137700080871582, | |
| "eval_runtime": 4.7614, | |
| "eval_samples_per_second": 42.424, | |
| "eval_steps_per_second": 1.47, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 1.122334455667789, | |
| "grad_norm": 12257.177734375, | |
| "learning_rate": 0.0005993999999999999, | |
| "loss": 1.2877, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.22360543295034005, | |
| "eval_loss": 4.890064239501953, | |
| "eval_runtime": 4.3477, | |
| "eval_samples_per_second": 46.461, | |
| "eval_steps_per_second": 1.61, | |
| "step": 1782 | |
| }, | |
| { | |
| "epoch": 2.244668911335578, | |
| "grad_norm": 16755.361328125, | |
| "learning_rate": 0.0005242225031605562, | |
| "loss": 1.1355, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.2355505609269342, | |
| "eval_loss": 4.7771735191345215, | |
| "eval_runtime": 4.3663, | |
| "eval_samples_per_second": 46.264, | |
| "eval_steps_per_second": 1.603, | |
| "step": 2673 | |
| }, | |
| { | |
| "epoch": 3.3670033670033668, | |
| "grad_norm": 27167.396484375, | |
| "learning_rate": 0.0004483691529709229, | |
| "loss": 1.0964, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.2399488481137742, | |
| "eval_loss": 4.702260971069336, | |
| "eval_runtime": 4.4874, | |
| "eval_samples_per_second": 45.014, | |
| "eval_steps_per_second": 1.56, | |
| "step": 3564 | |
| }, | |
| { | |
| "epoch": 4.489337822671156, | |
| "grad_norm": 20150.478515625, | |
| "learning_rate": 0.00037251580278128944, | |
| "loss": 1.0755, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.2460231346030885, | |
| "eval_loss": 4.61249303817749, | |
| "eval_runtime": 4.3579, | |
| "eval_samples_per_second": 46.353, | |
| "eval_steps_per_second": 1.606, | |
| "step": 4455 | |
| }, | |
| { | |
| "epoch": 5.611672278338945, | |
| "grad_norm": 24080.505859375, | |
| "learning_rate": 0.0002966624525916561, | |
| "loss": 1.0502, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.2551200325512003, | |
| "eval_loss": 4.534069538116455, | |
| "eval_runtime": 4.3651, | |
| "eval_samples_per_second": 46.276, | |
| "eval_steps_per_second": 1.604, | |
| "step": 5346 | |
| }, | |
| { | |
| "epoch": 6.7340067340067336, | |
| "grad_norm": 19178.21484375, | |
| "learning_rate": 0.00022080910240202274, | |
| "loss": 1.0248, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.2642556819282711, | |
| "eval_loss": 4.448388576507568, | |
| "eval_runtime": 4.4493, | |
| "eval_samples_per_second": 45.401, | |
| "eval_steps_per_second": 1.573, | |
| "step": 6237 | |
| }, | |
| { | |
| "epoch": 7.856341189674523, | |
| "grad_norm": 13593.7294921875, | |
| "learning_rate": 0.00014495575221238938, | |
| "loss": 1.0035, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.2731878863033074, | |
| "eval_loss": 4.369141578674316, | |
| "eval_runtime": 4.3376, | |
| "eval_samples_per_second": 46.569, | |
| "eval_steps_per_second": 1.614, | |
| "step": 7128 | |
| }, | |
| { | |
| "epoch": 8.978675645342312, | |
| "grad_norm": 12907.32421875, | |
| "learning_rate": 6.9102402022756e-05, | |
| "loss": 0.9814, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.27882621921683365, | |
| "eval_loss": 4.313453197479248, | |
| "eval_runtime": 4.3378, | |
| "eval_samples_per_second": 46.567, | |
| "eval_steps_per_second": 1.614, | |
| "step": 8019 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.28208133924938483, | |
| "eval_loss": 4.283874988555908, | |
| "eval_runtime": 4.4601, | |
| "eval_samples_per_second": 45.29, | |
| "eval_steps_per_second": 1.569, | |
| "step": 8910 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "step": 8910, | |
| "total_flos": 7.44290353152e+16, | |
| "train_loss": 1.0702610208530619, | |
| "train_runtime": 9181.6961, | |
| "train_samples_per_second": 31.024, | |
| "train_steps_per_second": 0.97 | |
| } | |
| ], | |
| "logging_steps": 1000, | |
| "max_steps": 8910, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 10, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 7.44290353152e+16, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |