| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 9.99985909539242, | |
| "global_step": 17740, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.8590755355129655e-05, | |
| "loss": 0.9393, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.71815107102593e-05, | |
| "loss": 0.3539, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "eval_acc": 0.56, | |
| "eval_loss": 0.2668898105621338, | |
| "eval_num": 14748, | |
| "eval_runtime": 3131.2422, | |
| "eval_samples_per_second": 4.71, | |
| "eval_steps_per_second": 1.177, | |
| "eval_true_num": 8264, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 4.5772266065388954e-05, | |
| "loss": 0.2978, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 4.43630214205186e-05, | |
| "loss": 0.2523, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "eval_acc": 0.56, | |
| "eval_loss": 0.203141987323761, | |
| "eval_num": 14748, | |
| "eval_runtime": 3124.7339, | |
| "eval_samples_per_second": 4.72, | |
| "eval_steps_per_second": 1.18, | |
| "eval_true_num": 8317, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 4.295377677564825e-05, | |
| "loss": 0.2261, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 4.1544532130777905e-05, | |
| "loss": 0.2003, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "eval_acc": 0.58, | |
| "eval_loss": 0.149781733751297, | |
| "eval_num": 14748, | |
| "eval_runtime": 3130.9718, | |
| "eval_samples_per_second": 4.71, | |
| "eval_steps_per_second": 1.178, | |
| "eval_true_num": 8496, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 4.013528748590756e-05, | |
| "loss": 0.1805, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 3.872604284103721e-05, | |
| "loss": 0.1609, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "eval_acc": 0.58, | |
| "eval_loss": 0.12837813794612885, | |
| "eval_num": 14748, | |
| "eval_runtime": 3114.0685, | |
| "eval_samples_per_second": 4.736, | |
| "eval_steps_per_second": 1.184, | |
| "eval_true_num": 8612, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 3.731679819616686e-05, | |
| "loss": 0.1538, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 3.590755355129651e-05, | |
| "loss": 0.1431, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "eval_acc": 0.59, | |
| "eval_loss": 0.11188653111457825, | |
| "eval_num": 14748, | |
| "eval_runtime": 3118.5595, | |
| "eval_samples_per_second": 4.729, | |
| "eval_steps_per_second": 1.182, | |
| "eval_true_num": 8675, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 3.1, | |
| "learning_rate": 3.4498308906426155e-05, | |
| "loss": 0.1341, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 3.38, | |
| "learning_rate": 3.308906426155581e-05, | |
| "loss": 0.1236, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 3.38, | |
| "eval_acc": 0.59, | |
| "eval_loss": 0.10543708503246307, | |
| "eval_num": 14748, | |
| "eval_runtime": 3115.5827, | |
| "eval_samples_per_second": 4.734, | |
| "eval_steps_per_second": 1.183, | |
| "eval_true_num": 8737, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 3.66, | |
| "learning_rate": 3.167981961668546e-05, | |
| "loss": 0.1191, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 3.95, | |
| "learning_rate": 3.0270574971815107e-05, | |
| "loss": 0.1172, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 3.95, | |
| "eval_acc": 0.59, | |
| "eval_loss": 0.09806588292121887, | |
| "eval_num": 14748, | |
| "eval_runtime": 3113.6457, | |
| "eval_samples_per_second": 4.737, | |
| "eval_steps_per_second": 1.184, | |
| "eval_true_num": 8773, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 4.23, | |
| "learning_rate": 2.8861330326944756e-05, | |
| "loss": 0.1059, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 4.51, | |
| "learning_rate": 2.745208568207441e-05, | |
| "loss": 0.1027, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 4.51, | |
| "eval_acc": 0.6, | |
| "eval_loss": 0.09554540365934372, | |
| "eval_num": 14748, | |
| "eval_runtime": 3108.2274, | |
| "eval_samples_per_second": 4.745, | |
| "eval_steps_per_second": 1.186, | |
| "eval_true_num": 8787, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 4.79, | |
| "learning_rate": 2.604284103720406e-05, | |
| "loss": 0.102, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 5.07, | |
| "learning_rate": 2.463359639233371e-05, | |
| "loss": 0.0968, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 5.07, | |
| "eval_acc": 0.6, | |
| "eval_loss": 0.09313096106052399, | |
| "eval_num": 14748, | |
| "eval_runtime": 3181.4547, | |
| "eval_samples_per_second": 4.636, | |
| "eval_steps_per_second": 1.159, | |
| "eval_true_num": 8807, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 5.36, | |
| "learning_rate": 2.322435174746336e-05, | |
| "loss": 0.0923, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 5.64, | |
| "learning_rate": 2.181510710259301e-05, | |
| "loss": 0.0911, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 5.64, | |
| "eval_acc": 0.6, | |
| "eval_loss": 0.08947672694921494, | |
| "eval_num": 14748, | |
| "eval_runtime": 3114.0147, | |
| "eval_samples_per_second": 4.736, | |
| "eval_steps_per_second": 1.184, | |
| "eval_true_num": 8787, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 5.92, | |
| "learning_rate": 2.040586245772266e-05, | |
| "loss": 0.0903, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 6.2, | |
| "learning_rate": 1.8996617812852312e-05, | |
| "loss": 0.0852, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 6.2, | |
| "eval_acc": 0.6, | |
| "eval_loss": 0.09117105603218079, | |
| "eval_num": 14748, | |
| "eval_runtime": 3130.4988, | |
| "eval_samples_per_second": 4.711, | |
| "eval_steps_per_second": 1.178, | |
| "eval_true_num": 8840, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 6.48, | |
| "learning_rate": 1.7587373167981965e-05, | |
| "loss": 0.0819, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 6.76, | |
| "learning_rate": 1.6178128523111614e-05, | |
| "loss": 0.0823, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 6.76, | |
| "eval_acc": 0.6, | |
| "eval_loss": 0.08801201730966568, | |
| "eval_num": 14748, | |
| "eval_runtime": 3126.5232, | |
| "eval_samples_per_second": 4.717, | |
| "eval_steps_per_second": 1.179, | |
| "eval_true_num": 8846, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 7.05, | |
| "learning_rate": 1.4768883878241263e-05, | |
| "loss": 0.0798, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 7.33, | |
| "learning_rate": 1.3359639233370913e-05, | |
| "loss": 0.0768, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 7.33, | |
| "eval_acc": 0.6, | |
| "eval_loss": 0.09153631329536438, | |
| "eval_num": 14748, | |
| "eval_runtime": 3122.3149, | |
| "eval_samples_per_second": 4.723, | |
| "eval_steps_per_second": 1.181, | |
| "eval_true_num": 8879, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 7.61, | |
| "learning_rate": 1.1950394588500564e-05, | |
| "loss": 0.0766, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 7.89, | |
| "learning_rate": 1.0541149943630215e-05, | |
| "loss": 0.0758, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 7.89, | |
| "eval_acc": 0.6, | |
| "eval_loss": 0.08920902758836746, | |
| "eval_num": 14748, | |
| "eval_runtime": 3113.1818, | |
| "eval_samples_per_second": 4.737, | |
| "eval_steps_per_second": 1.184, | |
| "eval_true_num": 8853, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 8.17, | |
| "learning_rate": 9.131905298759866e-06, | |
| "loss": 0.0723, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 8.46, | |
| "learning_rate": 7.722660653889515e-06, | |
| "loss": 0.0708, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 8.46, | |
| "eval_acc": 0.6, | |
| "eval_loss": 0.08845733851194382, | |
| "eval_num": 14748, | |
| "eval_runtime": 3118.0936, | |
| "eval_samples_per_second": 4.73, | |
| "eval_steps_per_second": 1.182, | |
| "eval_true_num": 8884, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 8.74, | |
| "learning_rate": 6.313416009019166e-06, | |
| "loss": 0.0725, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 9.02, | |
| "learning_rate": 4.904171364148816e-06, | |
| "loss": 0.0701, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 9.02, | |
| "eval_acc": 0.6, | |
| "eval_loss": 0.08842625468969345, | |
| "eval_num": 14748, | |
| "eval_runtime": 3110.625, | |
| "eval_samples_per_second": 4.741, | |
| "eval_steps_per_second": 1.185, | |
| "eval_true_num": 8915, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 9.3, | |
| "learning_rate": 3.494926719278467e-06, | |
| "loss": 0.0686, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 9.58, | |
| "learning_rate": 2.0856820744081176e-06, | |
| "loss": 0.0685, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 9.58, | |
| "eval_acc": 0.6, | |
| "eval_loss": 0.08841479569673538, | |
| "eval_num": 14748, | |
| "eval_runtime": 3115.2193, | |
| "eval_samples_per_second": 4.734, | |
| "eval_steps_per_second": 1.184, | |
| "eval_true_num": 8921, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 9.86, | |
| "learning_rate": 6.764374295377678e-07, | |
| "loss": 0.0676, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "step": 17740, | |
| "total_flos": 1.671210021126144e+18, | |
| "train_loss": 0.14555114742868244, | |
| "train_runtime": 234335.1808, | |
| "train_samples_per_second": 4.846, | |
| "train_steps_per_second": 0.076 | |
| } | |
| ], | |
| "max_steps": 17740, | |
| "num_train_epochs": 10, | |
| "total_flos": 1.671210021126144e+18, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |