| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 20.0, | |
| "eval_steps": 500, | |
| "global_step": 240, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.08333333333333333, | |
| "grad_norm": 5.815704345703125, | |
| "learning_rate": 4.166666666666667e-06, | |
| "loss": 0.9944, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy_fol": 1.0, | |
| "eval_loss": 0.37742123007774353, | |
| "eval_runtime": 10.6636, | |
| "eval_samples_per_second": 6.471, | |
| "eval_steps_per_second": 1.125, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy_fol": 0.9855072463768116, | |
| "eval_loss": 0.36990299820899963, | |
| "eval_runtime": 10.0571, | |
| "eval_samples_per_second": 6.861, | |
| "eval_steps_per_second": 1.193, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy_fol": 1.0, | |
| "eval_loss": 0.4505714774131775, | |
| "eval_runtime": 10.0403, | |
| "eval_samples_per_second": 6.872, | |
| "eval_steps_per_second": 1.195, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy_fol": 1.0, | |
| "eval_loss": 0.49083924293518066, | |
| "eval_runtime": 10.1223, | |
| "eval_samples_per_second": 6.817, | |
| "eval_steps_per_second": 1.186, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy_fol": 1.0, | |
| "eval_loss": 0.551354706287384, | |
| "eval_runtime": 9.943, | |
| "eval_samples_per_second": 6.94, | |
| "eval_steps_per_second": 1.207, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy_fol": 0.9565217391304348, | |
| "eval_loss": 0.5758901834487915, | |
| "eval_runtime": 10.0414, | |
| "eval_samples_per_second": 6.872, | |
| "eval_steps_per_second": 1.195, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy_fol": 1.0, | |
| "eval_loss": 0.6346259117126465, | |
| "eval_runtime": 10.0121, | |
| "eval_samples_per_second": 6.892, | |
| "eval_steps_per_second": 1.199, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy_fol": 0.9855072463768116, | |
| "eval_loss": 0.6786300539970398, | |
| "eval_runtime": 9.853, | |
| "eval_samples_per_second": 7.003, | |
| "eval_steps_per_second": 1.218, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy_fol": 0.9855072463768116, | |
| "eval_loss": 0.7070556282997131, | |
| "eval_runtime": 10.0666, | |
| "eval_samples_per_second": 6.854, | |
| "eval_steps_per_second": 1.192, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy_fol": 1.0, | |
| "eval_loss": 0.7301931381225586, | |
| "eval_runtime": 9.8811, | |
| "eval_samples_per_second": 6.983, | |
| "eval_steps_per_second": 1.214, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_accuracy_fol": 1.0, | |
| "eval_loss": 0.7777052521705627, | |
| "eval_runtime": 10.0443, | |
| "eval_samples_per_second": 6.87, | |
| "eval_steps_per_second": 1.195, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy_fol": 1.0, | |
| "eval_loss": 0.7906161546707153, | |
| "eval_runtime": 9.9021, | |
| "eval_samples_per_second": 6.968, | |
| "eval_steps_per_second": 1.212, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_accuracy_fol": 1.0, | |
| "eval_loss": 0.7852453589439392, | |
| "eval_runtime": 10.0952, | |
| "eval_samples_per_second": 6.835, | |
| "eval_steps_per_second": 1.189, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy_fol": 1.0, | |
| "eval_loss": 0.8031890392303467, | |
| "eval_runtime": 9.9012, | |
| "eval_samples_per_second": 6.969, | |
| "eval_steps_per_second": 1.212, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_accuracy_fol": 1.0, | |
| "eval_loss": 0.8141315579414368, | |
| "eval_runtime": 10.0811, | |
| "eval_samples_per_second": 6.845, | |
| "eval_steps_per_second": 1.19, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy_fol": 1.0, | |
| "eval_loss": 0.8203307390213013, | |
| "eval_runtime": 10.0556, | |
| "eval_samples_per_second": 6.862, | |
| "eval_steps_per_second": 1.193, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_accuracy_fol": 1.0, | |
| "eval_loss": 0.8233786225318909, | |
| "eval_runtime": 10.038, | |
| "eval_samples_per_second": 6.874, | |
| "eval_steps_per_second": 1.195, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy_fol": 1.0, | |
| "eval_loss": 0.8255409002304077, | |
| "eval_runtime": 9.8663, | |
| "eval_samples_per_second": 6.993, | |
| "eval_steps_per_second": 1.216, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_accuracy_fol": 1.0, | |
| "eval_loss": 0.8259706497192383, | |
| "eval_runtime": 9.9818, | |
| "eval_samples_per_second": 6.913, | |
| "eval_steps_per_second": 1.202, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy_fol": 1.0, | |
| "eval_loss": 0.8260682821273804, | |
| "eval_runtime": 9.8198, | |
| "eval_samples_per_second": 7.027, | |
| "eval_steps_per_second": 1.222, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "step": 240, | |
| "total_flos": 4.93214716919808e+16, | |
| "train_loss": 0.09651691963275273, | |
| "train_runtime": 472.7445, | |
| "train_samples_per_second": 11.846, | |
| "train_steps_per_second": 0.508 | |
| } | |
| ], | |
| "logging_steps": 700, | |
| "max_steps": 240, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 20, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4.93214716919808e+16, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |