{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 500, "global_step": 3530, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.056657223796033995, "grad_norm": 0.49070337414741516, "learning_rate": 6.289308176100629e-05, "loss": 1.948, "step": 100 }, { "epoch": 0.11331444759206799, "grad_norm": 0.35821089148521423, "learning_rate": 9.998427702677944e-05, "loss": 1.8537, "step": 200 }, { "epoch": 0.16997167138810199, "grad_norm": 0.8048006892204285, "learning_rate": 9.981415164444553e-05, "loss": 1.8535, "step": 300 }, { "epoch": 0.22662889518413598, "grad_norm": 0.3545685410499573, "learning_rate": 9.945770301725816e-05, "loss": 1.8401, "step": 400 }, { "epoch": 0.28328611898017, "grad_norm": 0.2784646451473236, "learning_rate": 9.892258840153075e-05, "loss": 1.8068, "step": 500 }, { "epoch": 0.33994334277620397, "grad_norm": 0.31996163725852966, "learning_rate": 9.820000233095179e-05, "loss": 1.8332, "step": 600 }, { "epoch": 0.39660056657223797, "grad_norm": 0.8525365591049194, "learning_rate": 9.729713052750826e-05, "loss": 1.8503, "step": 700 }, { "epoch": 0.45325779036827196, "grad_norm": 0.42982813715934753, "learning_rate": 9.621735006374983e-05, "loss": 1.8055, "step": 800 }, { "epoch": 0.509915014164306, "grad_norm": 0.3288513720035553, "learning_rate": 9.49646997156688e-05, "loss": 1.8356, "step": 900 }, { "epoch": 0.56657223796034, "grad_norm": 0.3136463165283203, "learning_rate": 9.354386485619264e-05, "loss": 1.8165, "step": 1000 }, { "epoch": 0.623229461756374, "grad_norm": 0.3262263238430023, "learning_rate": 9.196015993016642e-05, "loss": 1.7969, "step": 1100 }, { "epoch": 0.6798866855524079, "grad_norm": 0.3740326762199402, "learning_rate": 9.021950857637486e-05, "loss": 1.8197, "step": 1200 }, { "epoch": 0.7365439093484419, "grad_norm": 0.2732725739479065, "learning_rate": 8.832842147095495e-05, "loss": 1.7799, "step": 1300 }, { "epoch": 0.7932011331444759, "grad_norm": 0.3327942490577698, "learning_rate": 8.629397197507315e-05, "loss": 1.8248, "step": 1400 }, { "epoch": 0.8498583569405099, "grad_norm": 0.33730548620224, "learning_rate": 8.412376967795361e-05, "loss": 1.8325, "step": 1500 }, { "epoch": 0.9065155807365439, "grad_norm": 0.3892277181148529, "learning_rate": 8.182593193421625e-05, "loss": 1.8233, "step": 1600 }, { "epoch": 0.9631728045325779, "grad_norm": 0.31279096007347107, "learning_rate": 7.940905350198551e-05, "loss": 1.8086, "step": 1700 }, { "epoch": 1.0, "eval_loss": 1.804076910018921, "eval_runtime": 252.4049, "eval_samples_per_second": 1.074, "eval_steps_per_second": 1.074, "step": 1765 }, { "epoch": 1.019830028328612, "grad_norm": 0.3374468684196472, "learning_rate": 7.68821743953346e-05, "loss": 1.7881, "step": 1800 }, { "epoch": 1.0764872521246458, "grad_norm": 0.3738987147808075, "learning_rate": 7.425474607130858e-05, "loss": 1.7664, "step": 1900 }, { "epoch": 1.13314447592068, "grad_norm": 0.44481009244918823, "learning_rate": 7.153659607799951e-05, "loss": 1.7459, "step": 2000 }, { "epoch": 1.1898016997167138, "grad_norm": 0.39869531989097595, "learning_rate": 6.873789129590286e-05, "loss": 1.7631, "step": 2100 }, { "epoch": 1.246458923512748, "grad_norm": 0.378170371055603, "learning_rate": 6.586909991004586e-05, "loss": 1.7557, "step": 2200 }, { "epoch": 1.3031161473087818, "grad_norm": 0.4261433780193329, "learning_rate": 6.294095225512603e-05, "loss": 1.7353, "step": 2300 }, { "epoch": 1.3597733711048159, "grad_norm": 0.3958815634250641, "learning_rate": 5.996440068011383e-05, "loss": 1.7579, "step": 2400 }, { "epoch": 1.41643059490085, "grad_norm": 0.37525269389152527, "learning_rate": 5.695057858243988e-05, "loss": 1.7623, "step": 2500 }, { "epoch": 1.4730878186968839, "grad_norm": 0.4011896252632141, "learning_rate": 5.391075876499483e-05, "loss": 1.7363, "step": 2600 }, { "epoch": 1.5297450424929178, "grad_norm": 0.45209965109825134, "learning_rate": 5.0856311271701064e-05, "loss": 1.7745, "step": 2700 }, { "epoch": 1.5864022662889519, "grad_norm": 0.4531945288181305, "learning_rate": 4.7798660859367615e-05, "loss": 1.7513, "step": 2800 }, { "epoch": 1.643059490084986, "grad_norm": 0.5212255120277405, "learning_rate": 4.474924426489847e-05, "loss": 1.7438, "step": 2900 }, { "epoch": 1.6997167138810199, "grad_norm": 0.4203312397003174, "learning_rate": 4.171946742769108e-05, "loss": 1.7491, "step": 3000 }, { "epoch": 1.7563739376770537, "grad_norm": 1.2426502704620361, "learning_rate": 3.872066282722877e-05, "loss": 1.7699, "step": 3100 }, { "epoch": 1.8130311614730878, "grad_norm": 1.147980809211731, "learning_rate": 3.576404709544031e-05, "loss": 1.7475, "step": 3200 }, { "epoch": 1.869688385269122, "grad_norm": 0.6595035791397095, "learning_rate": 3.286067906237188e-05, "loss": 1.7306, "step": 3300 }, { "epoch": 1.9263456090651558, "grad_norm": 0.42627865076065063, "learning_rate": 3.0021418392096213e-05, "loss": 1.7464, "step": 3400 }, { "epoch": 1.9830028328611897, "grad_norm": 0.5331133604049683, "learning_rate": 2.7256884963575536e-05, "loss": 1.7512, "step": 3500 }, { "epoch": 2.0, "eval_loss": 1.7998833656311035, "eval_runtime": 252.3326, "eval_samples_per_second": 1.074, "eval_steps_per_second": 1.074, "step": 3530 } ], "logging_steps": 100, "max_steps": 5295, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 7.828436663001416e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }