| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 10.0, | |
| "global_step": 417130, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.999988013329178e-05, | |
| "loss": 5.0345, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 4.5e-05, | |
| "loss": 3.5452, | |
| "step": 41713 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.4118760802529946, | |
| "eval_loss": 3.3738181591033936, | |
| "eval_runtime": 157.5504, | |
| "eval_samples_per_second": 129.368, | |
| "eval_steps_per_second": 16.173, | |
| "step": 41713 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 4e-05, | |
| "loss": 3.4132, | |
| "step": 83426 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.4179529258872079, | |
| "eval_loss": 3.326674461364746, | |
| "eval_runtime": 156.5806, | |
| "eval_samples_per_second": 130.169, | |
| "eval_steps_per_second": 16.273, | |
| "step": 83426 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 3.5e-05, | |
| "loss": 3.3573, | |
| "step": 125139 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.4213834655713664, | |
| "eval_loss": 3.3009300231933594, | |
| "eval_runtime": 156.2147, | |
| "eval_samples_per_second": 130.474, | |
| "eval_steps_per_second": 16.311, | |
| "step": 125139 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "learning_rate": 3e-05, | |
| "loss": 3.3196, | |
| "step": 166852 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.42338731797471446, | |
| "eval_loss": 3.2852749824523926, | |
| "eval_runtime": 157.4446, | |
| "eval_samples_per_second": 129.455, | |
| "eval_steps_per_second": 16.183, | |
| "step": 166852 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "learning_rate": 2.5e-05, | |
| "loss": 3.2912, | |
| "step": 208565 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.42540662325952266, | |
| "eval_loss": 3.2730937004089355, | |
| "eval_runtime": 155.9547, | |
| "eval_samples_per_second": 130.692, | |
| "eval_steps_per_second": 16.338, | |
| "step": 208565 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "learning_rate": 2e-05, | |
| "loss": 3.2688, | |
| "step": 250278 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.42657331580976576, | |
| "eval_loss": 3.265730857849121, | |
| "eval_runtime": 157.36, | |
| "eval_samples_per_second": 129.525, | |
| "eval_steps_per_second": 16.192, | |
| "step": 250278 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "learning_rate": 1.5e-05, | |
| "loss": 3.2506, | |
| "step": 291991 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.4276302929016416, | |
| "eval_loss": 3.25875186920166, | |
| "eval_runtime": 157.1763, | |
| "eval_samples_per_second": 129.676, | |
| "eval_steps_per_second": 16.211, | |
| "step": 291991 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "learning_rate": 1e-05, | |
| "loss": 3.2358, | |
| "step": 333704 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.4284013916865043, | |
| "eval_loss": 3.2545275688171387, | |
| "eval_runtime": 155.6431, | |
| "eval_samples_per_second": 130.953, | |
| "eval_steps_per_second": 16.371, | |
| "step": 333704 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "learning_rate": 5e-06, | |
| "loss": 3.2237, | |
| "step": 375417 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.4288819762999157, | |
| "eval_loss": 3.2520158290863037, | |
| "eval_runtime": 156.3518, | |
| "eval_samples_per_second": 130.36, | |
| "eval_steps_per_second": 16.297, | |
| "step": 375417 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "learning_rate": 0.0, | |
| "loss": 3.2144, | |
| "step": 417130 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.42910256618275966, | |
| "eval_loss": 3.251399040222168, | |
| "eval_runtime": 154.5215, | |
| "eval_samples_per_second": 131.904, | |
| "eval_steps_per_second": 16.49, | |
| "step": 417130 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "step": 417130, | |
| "total_flos": 6.5394993954816e+17, | |
| "train_loss": 3.311987802313766, | |
| "train_runtime": 196254.5716, | |
| "train_samples_per_second": 51.01, | |
| "train_steps_per_second": 2.125 | |
| } | |
| ], | |
| "max_steps": 417130, | |
| "num_train_epochs": 10, | |
| "total_flos": 6.5394993954816e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |