{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 10.0, "eval_steps": 500, "global_step": 8910, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.20035457557497433, "eval_loss": 5.137700080871582, "eval_runtime": 4.7614, "eval_samples_per_second": 42.424, "eval_steps_per_second": 1.47, "step": 891 }, { "epoch": 1.122334455667789, "grad_norm": 12257.177734375, "learning_rate": 0.0005993999999999999, "loss": 1.2877, "step": 1000 }, { "epoch": 2.0, "eval_accuracy": 0.22360543295034005, "eval_loss": 4.890064239501953, "eval_runtime": 4.3477, "eval_samples_per_second": 46.461, "eval_steps_per_second": 1.61, "step": 1782 }, { "epoch": 2.244668911335578, "grad_norm": 16755.361328125, "learning_rate": 0.0005242225031605562, "loss": 1.1355, "step": 2000 }, { "epoch": 3.0, "eval_accuracy": 0.2355505609269342, "eval_loss": 4.7771735191345215, "eval_runtime": 4.3663, "eval_samples_per_second": 46.264, "eval_steps_per_second": 1.603, "step": 2673 }, { "epoch": 3.3670033670033668, "grad_norm": 27167.396484375, "learning_rate": 0.0004483691529709229, "loss": 1.0964, "step": 3000 }, { "epoch": 4.0, "eval_accuracy": 0.2399488481137742, "eval_loss": 4.702260971069336, "eval_runtime": 4.4874, "eval_samples_per_second": 45.014, "eval_steps_per_second": 1.56, "step": 3564 }, { "epoch": 4.489337822671156, "grad_norm": 20150.478515625, "learning_rate": 0.00037251580278128944, "loss": 1.0755, "step": 4000 }, { "epoch": 5.0, "eval_accuracy": 0.2460231346030885, "eval_loss": 4.61249303817749, "eval_runtime": 4.3579, "eval_samples_per_second": 46.353, "eval_steps_per_second": 1.606, "step": 4455 }, { "epoch": 5.611672278338945, "grad_norm": 24080.505859375, "learning_rate": 0.0002966624525916561, "loss": 1.0502, "step": 5000 }, { "epoch": 6.0, "eval_accuracy": 0.2551200325512003, "eval_loss": 4.534069538116455, "eval_runtime": 4.3651, "eval_samples_per_second": 46.276, "eval_steps_per_second": 1.604, "step": 5346 }, { "epoch": 6.7340067340067336, "grad_norm": 19178.21484375, "learning_rate": 0.00022080910240202274, "loss": 1.0248, "step": 6000 }, { "epoch": 7.0, "eval_accuracy": 0.2642556819282711, "eval_loss": 4.448388576507568, "eval_runtime": 4.4493, "eval_samples_per_second": 45.401, "eval_steps_per_second": 1.573, "step": 6237 }, { "epoch": 7.856341189674523, "grad_norm": 13593.7294921875, "learning_rate": 0.00014495575221238938, "loss": 1.0035, "step": 7000 }, { "epoch": 8.0, "eval_accuracy": 0.2731878863033074, "eval_loss": 4.369141578674316, "eval_runtime": 4.3376, "eval_samples_per_second": 46.569, "eval_steps_per_second": 1.614, "step": 7128 }, { "epoch": 8.978675645342312, "grad_norm": 12907.32421875, "learning_rate": 6.9102402022756e-05, "loss": 0.9814, "step": 8000 }, { "epoch": 9.0, "eval_accuracy": 0.27882621921683365, "eval_loss": 4.313453197479248, "eval_runtime": 4.3378, "eval_samples_per_second": 46.567, "eval_steps_per_second": 1.614, "step": 8019 }, { "epoch": 10.0, "eval_accuracy": 0.28208133924938483, "eval_loss": 4.283874988555908, "eval_runtime": 4.4601, "eval_samples_per_second": 45.29, "eval_steps_per_second": 1.569, "step": 8910 }, { "epoch": 10.0, "step": 8910, "total_flos": 7.44290353152e+16, "train_loss": 1.0702610208530619, "train_runtime": 9181.6961, "train_samples_per_second": 31.024, "train_steps_per_second": 0.97 } ], "logging_steps": 1000, "max_steps": 8910, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7.44290353152e+16, "train_batch_size": 32, "trial_name": null, "trial_params": null }