| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 7.927444794952681, |
| "eval_steps": 500, |
| "global_step": 840, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.14195583596214512, |
| "grad_norm": 1.257095217704773, |
| "learning_rate": 6.511627906976745e-05, |
| "loss": 2.7447, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.28391167192429023, |
| "grad_norm": 0.6703794598579407, |
| "learning_rate": 0.00013488372093023256, |
| "loss": 1.7492, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.42586750788643535, |
| "grad_norm": 0.5519043207168579, |
| "learning_rate": 0.00019999923848679644, |
| "loss": 1.2311, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.5678233438485805, |
| "grad_norm": 0.5175157785415649, |
| "learning_rate": 0.00019980511570519505, |
| "loss": 1.1063, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.7097791798107256, |
| "grad_norm": 0.6619298458099365, |
| "learning_rate": 0.00019926907703615428, |
| "loss": 1.0206, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.8517350157728707, |
| "grad_norm": 0.7122862935066223, |
| "learning_rate": 0.00019839295885986296, |
| "loss": 0.9689, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.9936908517350158, |
| "grad_norm": 0.851209819316864, |
| "learning_rate": 0.0001971797626128871, |
| "loss": 0.9688, |
| "step": 105 |
| }, |
| { |
| "epoch": 1.1324921135646688, |
| "grad_norm": 0.6464285850524902, |
| "learning_rate": 0.00019563364450574252, |
| "loss": 0.8574, |
| "step": 120 |
| }, |
| { |
| "epoch": 1.274447949526814, |
| "grad_norm": 0.6553785800933838, |
| "learning_rate": 0.00019375990128440204, |
| "loss": 0.8527, |
| "step": 135 |
| }, |
| { |
| "epoch": 1.416403785488959, |
| "grad_norm": 0.6542381644248962, |
| "learning_rate": 0.00019156495208451658, |
| "loss": 0.8163, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.5583596214511042, |
| "grad_norm": 0.7524316906929016, |
| "learning_rate": 0.0001890563164405134, |
| "loss": 0.823, |
| "step": 165 |
| }, |
| { |
| "epoch": 1.7003154574132493, |
| "grad_norm": 0.715350866317749, |
| "learning_rate": 0.0001862425885249098, |
| "loss": 0.8389, |
| "step": 180 |
| }, |
| { |
| "epoch": 1.8422712933753944, |
| "grad_norm": 0.7483017444610596, |
| "learning_rate": 0.00018313340770609367, |
| "loss": 0.7991, |
| "step": 195 |
| }, |
| { |
| "epoch": 1.9842271293375395, |
| "grad_norm": 0.8540375232696533, |
| "learning_rate": 0.00017973942552543503, |
| "loss": 0.7916, |
| "step": 210 |
| }, |
| { |
| "epoch": 2.1230283911671926, |
| "grad_norm": 0.9041481018066406, |
| "learning_rate": 0.00017607226920685976, |
| "loss": 0.6324, |
| "step": 225 |
| }, |
| { |
| "epoch": 2.2649842271293377, |
| "grad_norm": 0.9540446996688843, |
| "learning_rate": 0.00017214450182389559, |
| "loss": 0.6236, |
| "step": 240 |
| }, |
| { |
| "epoch": 2.406940063091483, |
| "grad_norm": 0.9503123760223389, |
| "learning_rate": 0.00016796957926065134, |
| "loss": 0.6384, |
| "step": 255 |
| }, |
| { |
| "epoch": 2.548895899053628, |
| "grad_norm": 1.0240495204925537, |
| "learning_rate": 0.00016356180411417447, |
| "loss": 0.5756, |
| "step": 270 |
| }, |
| { |
| "epoch": 2.690851735015773, |
| "grad_norm": 1.0138286352157593, |
| "learning_rate": 0.00015893627669610926, |
| "loss": 0.6549, |
| "step": 285 |
| }, |
| { |
| "epoch": 2.832807570977918, |
| "grad_norm": 1.0231430530548096, |
| "learning_rate": 0.00015410884330151626, |
| "loss": 0.6267, |
| "step": 300 |
| }, |
| { |
| "epoch": 2.9747634069400632, |
| "grad_norm": 0.9881237149238586, |
| "learning_rate": 0.00014909604192207568, |
| "loss": 0.6235, |
| "step": 315 |
| }, |
| { |
| "epoch": 3.1135646687697163, |
| "grad_norm": 1.4114665985107422, |
| "learning_rate": 0.00014391504558965157, |
| "loss": 0.4734, |
| "step": 330 |
| }, |
| { |
| "epoch": 3.2555205047318614, |
| "grad_norm": 1.3432203531265259, |
| "learning_rate": 0.00013858360354431355, |
| "loss": 0.4271, |
| "step": 345 |
| }, |
| { |
| "epoch": 3.3974763406940065, |
| "grad_norm": 1.1701949834823608, |
| "learning_rate": 0.00013311998042836356, |
| "loss": 0.4547, |
| "step": 360 |
| }, |
| { |
| "epoch": 3.5394321766561516, |
| "grad_norm": 1.1254725456237793, |
| "learning_rate": 0.00012754289371467986, |
| "loss": 0.421, |
| "step": 375 |
| }, |
| { |
| "epoch": 3.6813880126182967, |
| "grad_norm": 1.2798420190811157, |
| "learning_rate": 0.00012187144958373793, |
| "loss": 0.4518, |
| "step": 390 |
| }, |
| { |
| "epoch": 3.823343848580442, |
| "grad_norm": 1.2269246578216553, |
| "learning_rate": 0.00011612507746898307, |
| "loss": 0.4841, |
| "step": 405 |
| }, |
| { |
| "epoch": 3.965299684542587, |
| "grad_norm": 1.3260618448257446, |
| "learning_rate": 0.00011032346349479161, |
| "loss": 0.4677, |
| "step": 420 |
| }, |
| { |
| "epoch": 4.10410094637224, |
| "grad_norm": 1.6245781183242798, |
| "learning_rate": 0.00010448648303505151, |
| "loss": 0.348, |
| "step": 435 |
| }, |
| { |
| "epoch": 4.246056782334385, |
| "grad_norm": 1.2491908073425293, |
| "learning_rate": 9.863413262340491e-05, |
| "loss": 0.3101, |
| "step": 450 |
| }, |
| { |
| "epoch": 4.38801261829653, |
| "grad_norm": 1.3882880210876465, |
| "learning_rate": 9.278646144841758e-05, |
| "loss": 0.3044, |
| "step": 465 |
| }, |
| { |
| "epoch": 4.529968454258675, |
| "grad_norm": 1.356520175933838, |
| "learning_rate": 8.696350266836128e-05, |
| "loss": 0.3251, |
| "step": 480 |
| }, |
| { |
| "epoch": 4.6719242902208205, |
| "grad_norm": 1.248788595199585, |
| "learning_rate": 8.118520478091311e-05, |
| "loss": 0.3087, |
| "step": 495 |
| }, |
| { |
| "epoch": 4.813880126182966, |
| "grad_norm": 1.6940261125564575, |
| "learning_rate": 7.547136328288814e-05, |
| "loss": 0.3158, |
| "step": 510 |
| }, |
| { |
| "epoch": 4.955835962145111, |
| "grad_norm": 1.34111487865448, |
| "learning_rate": 6.9841552854128e-05, |
| "loss": 0.3222, |
| "step": 525 |
| }, |
| { |
| "epoch": 5.094637223974764, |
| "grad_norm": 1.0496258735656738, |
| "learning_rate": 6.431506029787189e-05, |
| "loss": 0.2501, |
| "step": 540 |
| }, |
| { |
| "epoch": 5.236593059936909, |
| "grad_norm": 1.2102131843566895, |
| "learning_rate": 5.8910818467345185e-05, |
| "loss": 0.2205, |
| "step": 555 |
| }, |
| { |
| "epoch": 5.378548895899054, |
| "grad_norm": 1.1957101821899414, |
| "learning_rate": 5.3647341404923134e-05, |
| "loss": 0.2214, |
| "step": 570 |
| }, |
| { |
| "epoch": 5.520504731861199, |
| "grad_norm": 1.1445940732955933, |
| "learning_rate": 4.8542660916070736e-05, |
| "loss": 0.2252, |
| "step": 585 |
| }, |
| { |
| "epoch": 5.662460567823344, |
| "grad_norm": 1.2054235935211182, |
| "learning_rate": 4.361426479534753e-05, |
| "loss": 0.2275, |
| "step": 600 |
| }, |
| { |
| "epoch": 5.804416403785489, |
| "grad_norm": 2.122396469116211, |
| "learning_rate": 3.8879036916103704e-05, |
| "loss": 0.2306, |
| "step": 615 |
| }, |
| { |
| "epoch": 5.946372239747634, |
| "grad_norm": 1.7328449487686157, |
| "learning_rate": 3.4353199389111065e-05, |
| "loss": 0.2344, |
| "step": 630 |
| }, |
| { |
| "epoch": 6.085173501577287, |
| "grad_norm": 0.821266770362854, |
| "learning_rate": 3.005225698828338e-05, |
| "loss": 0.1922, |
| "step": 645 |
| }, |
| { |
| "epoch": 6.2271293375394325, |
| "grad_norm": 0.9995691180229187, |
| "learning_rate": 2.599094403387481e-05, |
| "loss": 0.1655, |
| "step": 660 |
| }, |
| { |
| "epoch": 6.369085173501578, |
| "grad_norm": 1.0223170518875122, |
| "learning_rate": 2.2183173915125656e-05, |
| "loss": 0.1731, |
| "step": 675 |
| }, |
| { |
| "epoch": 6.511041009463723, |
| "grad_norm": 1.3553900718688965, |
| "learning_rate": 1.8641991425282345e-05, |
| "loss": 0.1696, |
| "step": 690 |
| }, |
| { |
| "epoch": 6.652996845425868, |
| "grad_norm": 1.330947995185852, |
| "learning_rate": 1.53795280722846e-05, |
| "loss": 0.1706, |
| "step": 705 |
| }, |
| { |
| "epoch": 6.794952681388013, |
| "grad_norm": 1.1109322309494019, |
| "learning_rate": 1.2406960518217326e-05, |
| "loss": 0.1749, |
| "step": 720 |
| }, |
| { |
| "epoch": 6.936908517350158, |
| "grad_norm": 1.1352533102035522, |
| "learning_rate": 9.734472289907182e-06, |
| "loss": 0.1683, |
| "step": 735 |
| }, |
| { |
| "epoch": 7.075709779179811, |
| "grad_norm": 1.0221686363220215, |
| "learning_rate": 7.3712188918370285e-06, |
| "loss": 0.1664, |
| "step": 750 |
| }, |
| { |
| "epoch": 7.217665615141956, |
| "grad_norm": 0.9193073511123657, |
| "learning_rate": 5.325296440895622e-06, |
| "loss": 0.1444, |
| "step": 765 |
| }, |
| { |
| "epoch": 7.3596214511041005, |
| "grad_norm": 1.8977042436599731, |
| "learning_rate": 3.6037139304146762e-06, |
| "loss": 0.1528, |
| "step": 780 |
| }, |
| { |
| "epoch": 7.501577287066246, |
| "grad_norm": 0.9380797147750854, |
| "learning_rate": 2.212369218512078e-06, |
| "loss": 0.1438, |
| "step": 795 |
| }, |
| { |
| "epoch": 7.643533123028391, |
| "grad_norm": 1.0754660367965698, |
| "learning_rate": 1.1560288230015203e-06, |
| "loss": 0.1502, |
| "step": 810 |
| }, |
| { |
| "epoch": 7.785488958990536, |
| "grad_norm": 0.9819433093070984, |
| "learning_rate": 4.383115920874814e-07, |
| "loss": 0.1452, |
| "step": 825 |
| }, |
| { |
| "epoch": 7.927444794952681, |
| "grad_norm": 1.4535789489746094, |
| "learning_rate": 6.16763067873949e-08, |
| "loss": 0.1484, |
| "step": 840 |
| } |
| ], |
| "logging_steps": 15, |
| "max_steps": 848, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 8, |
| "save_steps": 15, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.111385862982861e+16, |
| "train_batch_size": 6, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|