{ "best_global_step": 237, "best_metric": 0.9935535559662932, "best_model_checkpoint": "ckpt/checkpoint-237", "epoch": 3.0, "eval_steps": 500, "global_step": 237, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.08917197452229299, "grad_norm": 36.30582809448242, "learning_rate": 5.2173913043478265e-06, "loss": 1.1125, "step": 7 }, { "epoch": 0.17834394904458598, "grad_norm": 35.81989669799805, "learning_rate": 1.1304347826086957e-05, "loss": 0.8511, "step": 14 }, { "epoch": 0.267515923566879, "grad_norm": 6.439170837402344, "learning_rate": 1.739130434782609e-05, "loss": 0.6037, "step": 21 }, { "epoch": 0.35668789808917195, "grad_norm": 3.1305227279663086, "learning_rate": 1.9626168224299065e-05, "loss": 0.3597, "step": 28 }, { "epoch": 0.445859872611465, "grad_norm": 4.9628095626831055, "learning_rate": 1.8971962616822433e-05, "loss": 0.2122, "step": 35 }, { "epoch": 0.535031847133758, "grad_norm": 3.5573127269744873, "learning_rate": 1.8317757009345797e-05, "loss": 0.1176, "step": 42 }, { "epoch": 0.6242038216560509, "grad_norm": 2.0348973274230957, "learning_rate": 1.766355140186916e-05, "loss": 0.0712, "step": 49 }, { "epoch": 0.7133757961783439, "grad_norm": 6.060424327850342, "learning_rate": 1.7009345794392526e-05, "loss": 0.0753, "step": 56 }, { "epoch": 0.802547770700637, "grad_norm": 3.5385732650756836, "learning_rate": 1.635514018691589e-05, "loss": 0.0575, "step": 63 }, { "epoch": 0.89171974522293, "grad_norm": 2.5258700847625732, "learning_rate": 1.5700934579439254e-05, "loss": 0.039, "step": 70 }, { "epoch": 0.9808917197452229, "grad_norm": 0.7659706473350525, "learning_rate": 1.5046728971962619e-05, "loss": 0.0656, "step": 77 }, { "epoch": 1.0, "eval_accuracy": 0.9894872195867869, "eval_f1": 0.9894880819914448, "eval_loss": 0.03790666535496712, "eval_runtime": 2530.3213, "eval_samples_per_second": 22.744, "eval_steps_per_second": 0.356, "step": 79 }, { "epoch": 1.0636942675159236, "grad_norm": 0.6808683276176453, "learning_rate": 1.4392523364485981e-05, "loss": 0.0528, "step": 84 }, { "epoch": 1.1528662420382165, "grad_norm": 26.949949264526367, "learning_rate": 1.3738317757009347e-05, "loss": 0.099, "step": 91 }, { "epoch": 1.2420382165605095, "grad_norm": 1.5863674879074097, "learning_rate": 1.308411214953271e-05, "loss": 0.0336, "step": 98 }, { "epoch": 1.3312101910828025, "grad_norm": 2.183351755142212, "learning_rate": 1.2429906542056076e-05, "loss": 0.0173, "step": 105 }, { "epoch": 1.4203821656050954, "grad_norm": 2.4681448936462402, "learning_rate": 1.177570093457944e-05, "loss": 0.017, "step": 112 }, { "epoch": 1.5095541401273884, "grad_norm": 6.7833476066589355, "learning_rate": 1.1121495327102804e-05, "loss": 0.0249, "step": 119 }, { "epoch": 1.5987261146496814, "grad_norm": 0.06058590114116669, "learning_rate": 1.0467289719626168e-05, "loss": 0.0244, "step": 126 }, { "epoch": 1.6878980891719744, "grad_norm": 0.17764343321323395, "learning_rate": 9.813084112149533e-06, "loss": 0.0357, "step": 133 }, { "epoch": 1.7770700636942676, "grad_norm": 12.890459060668945, "learning_rate": 9.158878504672899e-06, "loss": 0.0439, "step": 140 }, { "epoch": 1.8662420382165605, "grad_norm": 7.447948455810547, "learning_rate": 8.504672897196263e-06, "loss": 0.0415, "step": 147 }, { "epoch": 1.9554140127388535, "grad_norm": 0.40099361538887024, "learning_rate": 7.850467289719627e-06, "loss": 0.0141, "step": 154 }, { "epoch": 2.0, "eval_accuracy": 0.9897478670350484, "eval_f1": 0.9897461756211305, "eval_loss": 0.03573331609368324, "eval_runtime": 2458.8654, "eval_samples_per_second": 23.405, "eval_steps_per_second": 0.366, "step": 158 }, { "epoch": 2.038216560509554, "grad_norm": 0.18038131296634674, "learning_rate": 7.196261682242991e-06, "loss": 0.0107, "step": 161 }, { "epoch": 2.127388535031847, "grad_norm": 8.816960334777832, "learning_rate": 6.542056074766355e-06, "loss": 0.0197, "step": 168 }, { "epoch": 2.21656050955414, "grad_norm": 0.17330680787563324, "learning_rate": 5.88785046728972e-06, "loss": 0.0492, "step": 175 }, { "epoch": 2.305732484076433, "grad_norm": 0.10762328654527664, "learning_rate": 5.233644859813084e-06, "loss": 0.0223, "step": 182 }, { "epoch": 2.394904458598726, "grad_norm": 0.06711781769990921, "learning_rate": 4.579439252336449e-06, "loss": 0.0263, "step": 189 }, { "epoch": 2.484076433121019, "grad_norm": 0.7490471601486206, "learning_rate": 3.925233644859814e-06, "loss": 0.004, "step": 196 }, { "epoch": 2.573248407643312, "grad_norm": 0.060274120420217514, "learning_rate": 3.2710280373831774e-06, "loss": 0.0124, "step": 203 }, { "epoch": 2.662420382165605, "grad_norm": 12.112130165100098, "learning_rate": 2.616822429906542e-06, "loss": 0.013, "step": 210 }, { "epoch": 2.7515923566878984, "grad_norm": 0.036105964332818985, "learning_rate": 1.962616822429907e-06, "loss": 0.0034, "step": 217 }, { "epoch": 2.840764331210191, "grad_norm": 0.03519793599843979, "learning_rate": 1.308411214953271e-06, "loss": 0.0106, "step": 224 }, { "epoch": 2.9299363057324843, "grad_norm": 0.6189459562301636, "learning_rate": 6.542056074766355e-07, "loss": 0.0095, "step": 231 }, { "epoch": 3.0, "eval_accuracy": 0.993553319779666, "eval_f1": 0.9935535559662932, "eval_loss": 0.0235657449811697, "eval_runtime": 2541.1299, "eval_samples_per_second": 22.647, "eval_steps_per_second": 0.354, "step": 237 } ], "logging_steps": 7, "max_steps": 237, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1973350632960000.0, "train_batch_size": 32, "trial_name": null, "trial_params": null }