| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 28712, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.01741432153803288, | |
| "grad_norm": 6.671968936920166, | |
| "learning_rate": 1.160092807424594e-05, | |
| "loss": 3.4084, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.03482864307606576, | |
| "grad_norm": 4.760310173034668, | |
| "learning_rate": 1.9900897666068224e-05, | |
| "loss": 2.8756, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.052242964614098636, | |
| "grad_norm": 10.023279190063477, | |
| "learning_rate": 1.9541831238779175e-05, | |
| "loss": 2.7465, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.06965728615213151, | |
| "grad_norm": 13.3428316116333, | |
| "learning_rate": 1.9182764811490127e-05, | |
| "loss": 2.8027, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.0870716076901644, | |
| "grad_norm": 10.00976276397705, | |
| "learning_rate": 1.882369838420108e-05, | |
| "loss": 2.5838, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.10448592922819727, | |
| "grad_norm": 9.512397766113281, | |
| "learning_rate": 1.846463195691203e-05, | |
| "loss": 2.5855, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.12190025076623015, | |
| "grad_norm": 7.561283588409424, | |
| "learning_rate": 1.8105565529622982e-05, | |
| "loss": 2.5998, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.13931457230426303, | |
| "grad_norm": 5.637691497802734, | |
| "learning_rate": 1.7746499102333934e-05, | |
| "loss": 2.6146, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.1567288938422959, | |
| "grad_norm": 5.674258232116699, | |
| "learning_rate": 1.7387432675044886e-05, | |
| "loss": 2.5343, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.1741432153803288, | |
| "grad_norm": 6.290276527404785, | |
| "learning_rate": 1.7028366247755838e-05, | |
| "loss": 2.5986, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.19155753691836166, | |
| "grad_norm": 9.24161148071289, | |
| "learning_rate": 1.6669299820466786e-05, | |
| "loss": 2.5192, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.20897185845639454, | |
| "grad_norm": 5.9918413162231445, | |
| "learning_rate": 1.6310233393177738e-05, | |
| "loss": 2.44, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.22638617999442742, | |
| "grad_norm": 5.031735420227051, | |
| "learning_rate": 1.5951166965888693e-05, | |
| "loss": 2.5443, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.2438005015324603, | |
| "grad_norm": 8.406722068786621, | |
| "learning_rate": 1.559210053859964e-05, | |
| "loss": 2.4336, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.26121482307049315, | |
| "grad_norm": 5.958395957946777, | |
| "learning_rate": 1.5233034111310595e-05, | |
| "loss": 2.5202, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.27862914460852606, | |
| "grad_norm": 6.343221187591553, | |
| "learning_rate": 1.4873967684021545e-05, | |
| "loss": 2.4703, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.2960434661465589, | |
| "grad_norm": 6.622114181518555, | |
| "learning_rate": 1.4514901256732496e-05, | |
| "loss": 2.4571, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.3134577876845918, | |
| "grad_norm": 7.682774543762207, | |
| "learning_rate": 1.4155834829443448e-05, | |
| "loss": 2.3446, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.33087210922262467, | |
| "grad_norm": 12.083649635314941, | |
| "learning_rate": 1.37967684021544e-05, | |
| "loss": 2.4335, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.3482864307606576, | |
| "grad_norm": 15.165790557861328, | |
| "learning_rate": 1.3437701974865352e-05, | |
| "loss": 2.4165, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.3657007522986904, | |
| "grad_norm": 4.316242694854736, | |
| "learning_rate": 1.3078635547576302e-05, | |
| "loss": 2.3593, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.38311507383672333, | |
| "grad_norm": 13.665470123291016, | |
| "learning_rate": 1.2719569120287253e-05, | |
| "loss": 2.3966, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.4005293953747562, | |
| "grad_norm": 7.673960208892822, | |
| "learning_rate": 1.2360502692998207e-05, | |
| "loss": 2.3842, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.4179437169127891, | |
| "grad_norm": 6.637418746948242, | |
| "learning_rate": 1.2001436265709157e-05, | |
| "loss": 2.333, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.43535803845082194, | |
| "grad_norm": 12.264994621276855, | |
| "learning_rate": 1.1642369838420109e-05, | |
| "loss": 2.4412, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.45277235998885484, | |
| "grad_norm": 8.014012336730957, | |
| "learning_rate": 1.1283303411131059e-05, | |
| "loss": 2.4107, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.4701866815268877, | |
| "grad_norm": 5.616372585296631, | |
| "learning_rate": 1.0924236983842012e-05, | |
| "loss": 2.3865, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.4876010030649206, | |
| "grad_norm": 12.474315643310547, | |
| "learning_rate": 1.0565170556552964e-05, | |
| "loss": 2.4172, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.5050153246029535, | |
| "grad_norm": 4.360251426696777, | |
| "learning_rate": 1.0206104129263914e-05, | |
| "loss": 2.3679, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.5224296461409863, | |
| "grad_norm": 11.085514068603516, | |
| "learning_rate": 9.847037701974866e-06, | |
| "loss": 2.2899, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.5398439676790192, | |
| "grad_norm": 8.206655502319336, | |
| "learning_rate": 9.487971274685817e-06, | |
| "loss": 2.396, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.5572582892170521, | |
| "grad_norm": 8.702625274658203, | |
| "learning_rate": 9.128904847396769e-06, | |
| "loss": 2.4141, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.574672610755085, | |
| "grad_norm": 5.285048007965088, | |
| "learning_rate": 8.769838420107721e-06, | |
| "loss": 2.386, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.5920869322931178, | |
| "grad_norm": 12.492161750793457, | |
| "learning_rate": 8.410771992818673e-06, | |
| "loss": 2.3857, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.6095012538311507, | |
| "grad_norm": 6.360340595245361, | |
| "learning_rate": 8.051705565529624e-06, | |
| "loss": 2.3265, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.6269155753691836, | |
| "grad_norm": 9.026312828063965, | |
| "learning_rate": 7.692639138240574e-06, | |
| "loss": 2.2468, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.6443298969072165, | |
| "grad_norm": 15.688992500305176, | |
| "learning_rate": 7.333572710951526e-06, | |
| "loss": 2.416, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.6617442184452493, | |
| "grad_norm": 14.925408363342285, | |
| "learning_rate": 6.974506283662478e-06, | |
| "loss": 2.3618, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.6791585399832822, | |
| "grad_norm": 6.57327127456665, | |
| "learning_rate": 6.6154398563734305e-06, | |
| "loss": 2.3216, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 0.6965728615213151, | |
| "grad_norm": 4.341097831726074, | |
| "learning_rate": 6.2563734290843814e-06, | |
| "loss": 2.2302, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.713987183059348, | |
| "grad_norm": 6.36518669128418, | |
| "learning_rate": 5.897307001795332e-06, | |
| "loss": 2.3242, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 0.7314015045973808, | |
| "grad_norm": 5.405521869659424, | |
| "learning_rate": 5.538240574506284e-06, | |
| "loss": 2.1918, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.7488158261354138, | |
| "grad_norm": 11.361534118652344, | |
| "learning_rate": 5.179174147217235e-06, | |
| "loss": 2.2957, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 0.7662301476734467, | |
| "grad_norm": 12.7646484375, | |
| "learning_rate": 4.820107719928187e-06, | |
| "loss": 2.3009, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.7836444692114796, | |
| "grad_norm": 7.689826488494873, | |
| "learning_rate": 4.4610412926391385e-06, | |
| "loss": 2.2011, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 0.8010587907495124, | |
| "grad_norm": 12.123157501220703, | |
| "learning_rate": 4.10197486535009e-06, | |
| "loss": 2.3082, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.8184731122875453, | |
| "grad_norm": 7.985175132751465, | |
| "learning_rate": 3.7429084380610415e-06, | |
| "loss": 2.2633, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 0.8358874338255782, | |
| "grad_norm": 7.724106311798096, | |
| "learning_rate": 3.3838420107719933e-06, | |
| "loss": 2.3609, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.8533017553636111, | |
| "grad_norm": 11.675145149230957, | |
| "learning_rate": 3.0247755834829446e-06, | |
| "loss": 2.2966, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 0.8707160769016439, | |
| "grad_norm": 10.773253440856934, | |
| "learning_rate": 2.6657091561938963e-06, | |
| "loss": 2.3305, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.8881303984396768, | |
| "grad_norm": 13.044085502624512, | |
| "learning_rate": 2.3066427289048477e-06, | |
| "loss": 2.2828, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 0.9055447199777097, | |
| "grad_norm": 7.912332534790039, | |
| "learning_rate": 1.947576301615799e-06, | |
| "loss": 2.2439, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 0.9229590415157426, | |
| "grad_norm": 13.101786613464355, | |
| "learning_rate": 1.5885098743267505e-06, | |
| "loss": 2.289, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 0.9403733630537754, | |
| "grad_norm": 5.365452766418457, | |
| "learning_rate": 1.229443447037702e-06, | |
| "loss": 2.302, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 0.9577876845918083, | |
| "grad_norm": 11.440293312072754, | |
| "learning_rate": 8.703770197486536e-07, | |
| "loss": 2.2542, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 0.9752020061298412, | |
| "grad_norm": 23.63568115234375, | |
| "learning_rate": 5.11310592459605e-07, | |
| "loss": 2.2199, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.9926163276678741, | |
| "grad_norm": 14.987226486206055, | |
| "learning_rate": 1.5224416517055656e-07, | |
| "loss": 2.2635, | |
| "step": 28500 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 28712, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500.0, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |