| { | |
| "best_global_step": 237, | |
| "best_metric": 0.9935535559662932, | |
| "best_model_checkpoint": "ckpt/checkpoint-237", | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 237, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.08917197452229299, | |
| "grad_norm": 36.30582809448242, | |
| "learning_rate": 5.2173913043478265e-06, | |
| "loss": 1.1125, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.17834394904458598, | |
| "grad_norm": 35.81989669799805, | |
| "learning_rate": 1.1304347826086957e-05, | |
| "loss": 0.8511, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.267515923566879, | |
| "grad_norm": 6.439170837402344, | |
| "learning_rate": 1.739130434782609e-05, | |
| "loss": 0.6037, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.35668789808917195, | |
| "grad_norm": 3.1305227279663086, | |
| "learning_rate": 1.9626168224299065e-05, | |
| "loss": 0.3597, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.445859872611465, | |
| "grad_norm": 4.9628095626831055, | |
| "learning_rate": 1.8971962616822433e-05, | |
| "loss": 0.2122, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.535031847133758, | |
| "grad_norm": 3.5573127269744873, | |
| "learning_rate": 1.8317757009345797e-05, | |
| "loss": 0.1176, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.6242038216560509, | |
| "grad_norm": 2.0348973274230957, | |
| "learning_rate": 1.766355140186916e-05, | |
| "loss": 0.0712, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.7133757961783439, | |
| "grad_norm": 6.060424327850342, | |
| "learning_rate": 1.7009345794392526e-05, | |
| "loss": 0.0753, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.802547770700637, | |
| "grad_norm": 3.5385732650756836, | |
| "learning_rate": 1.635514018691589e-05, | |
| "loss": 0.0575, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.89171974522293, | |
| "grad_norm": 2.5258700847625732, | |
| "learning_rate": 1.5700934579439254e-05, | |
| "loss": 0.039, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.9808917197452229, | |
| "grad_norm": 0.7659706473350525, | |
| "learning_rate": 1.5046728971962619e-05, | |
| "loss": 0.0656, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.9894872195867869, | |
| "eval_f1": 0.9894880819914448, | |
| "eval_loss": 0.03790666535496712, | |
| "eval_runtime": 2530.3213, | |
| "eval_samples_per_second": 22.744, | |
| "eval_steps_per_second": 0.356, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 1.0636942675159236, | |
| "grad_norm": 0.6808683276176453, | |
| "learning_rate": 1.4392523364485981e-05, | |
| "loss": 0.0528, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 1.1528662420382165, | |
| "grad_norm": 26.949949264526367, | |
| "learning_rate": 1.3738317757009347e-05, | |
| "loss": 0.099, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 1.2420382165605095, | |
| "grad_norm": 1.5863674879074097, | |
| "learning_rate": 1.308411214953271e-05, | |
| "loss": 0.0336, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 1.3312101910828025, | |
| "grad_norm": 2.183351755142212, | |
| "learning_rate": 1.2429906542056076e-05, | |
| "loss": 0.0173, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 1.4203821656050954, | |
| "grad_norm": 2.4681448936462402, | |
| "learning_rate": 1.177570093457944e-05, | |
| "loss": 0.017, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 1.5095541401273884, | |
| "grad_norm": 6.7833476066589355, | |
| "learning_rate": 1.1121495327102804e-05, | |
| "loss": 0.0249, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 1.5987261146496814, | |
| "grad_norm": 0.06058590114116669, | |
| "learning_rate": 1.0467289719626168e-05, | |
| "loss": 0.0244, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 1.6878980891719744, | |
| "grad_norm": 0.17764343321323395, | |
| "learning_rate": 9.813084112149533e-06, | |
| "loss": 0.0357, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 1.7770700636942676, | |
| "grad_norm": 12.890459060668945, | |
| "learning_rate": 9.158878504672899e-06, | |
| "loss": 0.0439, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.8662420382165605, | |
| "grad_norm": 7.447948455810547, | |
| "learning_rate": 8.504672897196263e-06, | |
| "loss": 0.0415, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 1.9554140127388535, | |
| "grad_norm": 0.40099361538887024, | |
| "learning_rate": 7.850467289719627e-06, | |
| "loss": 0.0141, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.9897478670350484, | |
| "eval_f1": 0.9897461756211305, | |
| "eval_loss": 0.03573331609368324, | |
| "eval_runtime": 2458.8654, | |
| "eval_samples_per_second": 23.405, | |
| "eval_steps_per_second": 0.366, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 2.038216560509554, | |
| "grad_norm": 0.18038131296634674, | |
| "learning_rate": 7.196261682242991e-06, | |
| "loss": 0.0107, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 2.127388535031847, | |
| "grad_norm": 8.816960334777832, | |
| "learning_rate": 6.542056074766355e-06, | |
| "loss": 0.0197, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 2.21656050955414, | |
| "grad_norm": 0.17330680787563324, | |
| "learning_rate": 5.88785046728972e-06, | |
| "loss": 0.0492, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 2.305732484076433, | |
| "grad_norm": 0.10762328654527664, | |
| "learning_rate": 5.233644859813084e-06, | |
| "loss": 0.0223, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 2.394904458598726, | |
| "grad_norm": 0.06711781769990921, | |
| "learning_rate": 4.579439252336449e-06, | |
| "loss": 0.0263, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 2.484076433121019, | |
| "grad_norm": 0.7490471601486206, | |
| "learning_rate": 3.925233644859814e-06, | |
| "loss": 0.004, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 2.573248407643312, | |
| "grad_norm": 0.060274120420217514, | |
| "learning_rate": 3.2710280373831774e-06, | |
| "loss": 0.0124, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 2.662420382165605, | |
| "grad_norm": 12.112130165100098, | |
| "learning_rate": 2.616822429906542e-06, | |
| "loss": 0.013, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 2.7515923566878984, | |
| "grad_norm": 0.036105964332818985, | |
| "learning_rate": 1.962616822429907e-06, | |
| "loss": 0.0034, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 2.840764331210191, | |
| "grad_norm": 0.03519793599843979, | |
| "learning_rate": 1.308411214953271e-06, | |
| "loss": 0.0106, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 2.9299363057324843, | |
| "grad_norm": 0.6189459562301636, | |
| "learning_rate": 6.542056074766355e-07, | |
| "loss": 0.0095, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.993553319779666, | |
| "eval_f1": 0.9935535559662932, | |
| "eval_loss": 0.0235657449811697, | |
| "eval_runtime": 2541.1299, | |
| "eval_samples_per_second": 22.647, | |
| "eval_steps_per_second": 0.354, | |
| "step": 237 | |
| } | |
| ], | |
| "logging_steps": 7, | |
| "max_steps": 237, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1973350632960000.0, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |