{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 7840, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_loss": 1.5211859610345628, "las": 67.4814491488433, "step": 392, "uas": 78.04452204277608 }, { "epoch": 1.2755102040816326, "grad_norm": 11.628578186035156, "learning_rate": 0.00018724489795918367, "loss": 271.7516, "step": 500 }, { "epoch": 2.0, "eval_loss": 1.4270889030562506, "las": 69.53295504146661, "step": 784, "uas": 79.79048450458315 }, { "epoch": 2.5510204081632653, "grad_norm": 6.459884166717529, "learning_rate": 0.00017448979591836735, "loss": 271.7582, "step": 1000 }, { "epoch": 3.0, "eval_loss": 1.7154893933071031, "las": 72.28284591881274, "step": 1176, "uas": 81.53644696639022 }, { "epoch": 3.826530612244898, "grad_norm": 3.9133880138397217, "learning_rate": 0.00016173469387755103, "loss": 268.2068, "step": 1500 }, { "epoch": 4.0, "eval_loss": 1.7123144169648488, "las": 73.63596682671323, "step": 1568, "uas": 81.97293758184199 }, { "epoch": 5.0, "eval_loss": 1.9232268912924662, "las": 72.98123090353558, "step": 1960, "uas": 82.1475338280227 }, { "epoch": 5.1020408163265305, "grad_norm": 2.6702146530151367, "learning_rate": 0.00014897959183673472, "loss": 267.1583, "step": 2000 }, { "epoch": 6.0, "eval_loss": 2.1728447212113275, "las": 73.06852902662592, "step": 2352, "uas": 81.36185072020952 }, { "epoch": 6.377551020408164, "grad_norm": 2.6859347820281982, "learning_rate": 0.00013622448979591837, "loss": 266.0369, "step": 2500 }, { "epoch": 7.0, "eval_loss": 2.3242934031618967, "las": 74.11610650371017, "step": 2744, "uas": 83.02051505892624 }, { "epoch": 7.653061224489796, "grad_norm": 1.6134849786758423, "learning_rate": 0.00012346938775510203, "loss": 265.9112, "step": 3000 }, { "epoch": 8.0, "eval_loss": 2.5794437726338706, "las": 73.24312527280664, "step": 3136, "uas": 82.5403753819293 }, { "epoch": 8.928571428571429, "grad_norm": 1.1696821451187134, "learning_rate": 0.00011071428571428572, "loss": 267.02, "step": 3500 }, { "epoch": 9.0, "eval_loss": 2.7259935074382358, "las": 73.24312527280664, "step": 3528, "uas": 82.1475338280227 }, { "epoch": 10.0, "eval_loss": 2.6596836613284216, "las": 73.5050196420777, "step": 3920, "uas": 82.67132256656483 }, { "epoch": 10.204081632653061, "grad_norm": 2.175758123397827, "learning_rate": 9.79591836734694e-05, "loss": 266.0427, "step": 4000 }, { "epoch": 11.0, "eval_loss": 2.5932833121882544, "las": 74.07245744216499, "step": 4312, "uas": 83.50065473592318 }, { "epoch": 11.479591836734693, "grad_norm": 1.4794198274612427, "learning_rate": 8.520408163265306e-05, "loss": 265.7745, "step": 4500 }, { "epoch": 12.0, "eval_loss": 2.7420518745978675, "las": 74.37800087298123, "step": 4704, "uas": 83.2824094281973 }, { "epoch": 12.755102040816327, "grad_norm": 1.8547543287277222, "learning_rate": 7.244897959183675e-05, "loss": 265.777, "step": 5000 }, { "epoch": 13.0, "eval_loss": 2.854385713736216, "las": 74.59624618070711, "step": 5096, "uas": 83.15146224356177 }, { "epoch": 14.0, "eval_loss": 3.256368252966139, "las": 74.77084242688782, "step": 5488, "uas": 82.97686599738105 }, { "epoch": 14.03061224489796, "grad_norm": 1.382800817489624, "learning_rate": 5.9693877551020416e-05, "loss": 266.4641, "step": 5500 }, { "epoch": 15.0, "eval_loss": 3.261543388168017, "las": 74.15975556525535, "step": 5880, "uas": 82.8895678742907 }, { "epoch": 15.306122448979592, "grad_norm": 0.6841826438903809, "learning_rate": 4.6938775510204086e-05, "loss": 267.6857, "step": 6000 }, { "epoch": 16.0, "eval_loss": 3.268537084261576, "las": 73.63596682671323, "step": 6272, "uas": 82.67132256656483 }, { "epoch": 16.581632653061224, "grad_norm": 0.8982020020484924, "learning_rate": 3.4183673469387755e-05, "loss": 265.4994, "step": 6500 }, { "epoch": 17.0, "eval_loss": 3.46340329779519, "las": 74.68354430379746, "step": 6664, "uas": 83.02051505892624 }, { "epoch": 17.857142857142858, "grad_norm": 0.9878342151641846, "learning_rate": 2.1428571428571428e-05, "loss": 265.6774, "step": 7000 }, { "epoch": 18.0, "eval_loss": 3.7613406744268207, "las": 74.42164993452641, "step": 7056, "uas": 83.2824094281973 }, { "epoch": 19.0, "eval_loss": 3.8515510294172497, "las": 74.6398952422523, "step": 7448, "uas": 83.32605848974246 }, { "epoch": 19.132653061224488, "grad_norm": 1.085435390472412, "learning_rate": 8.673469387755103e-06, "loss": 266.1408, "step": 7500 }, { "epoch": 20.0, "eval_loss": 3.952014442947176, "las": 74.72719336534264, "step": 7840, "uas": 83.54430379746836 }, { "epoch": 20.0, "step": 7840, "total_flos": 8.698923198185472e+16, "train_loss": 267.07814692283165, "train_runtime": 7682.2301, "train_samples_per_second": 65.314, "train_steps_per_second": 1.021 } ], "logging_steps": 500, "max_steps": 7840, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 8.698923198185472e+16, "train_batch_size": 64, "trial_name": null, "trial_params": null }