| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.7479431563201197, |
| "eval_steps": 500, |
| "global_step": 1000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.018698578908002993, |
| "grad_norm": 9.572704315185547, |
| "learning_rate": 3.048780487804878e-05, |
| "loss": 2.5708, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.037397157816005985, |
| "grad_norm": 6.995045185089111, |
| "learning_rate": 4.999405067699773e-05, |
| "loss": 2.6291, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.05609573672400898, |
| "grad_norm": 7.9323601722717285, |
| "learning_rate": 4.991513829823945e-05, |
| "loss": 2.7109, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.07479431563201197, |
| "grad_norm": 7.052701473236084, |
| "learning_rate": 4.9744751398665467e-05, |
| "loss": 2.857, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.09349289454001496, |
| "grad_norm": 6.857184410095215, |
| "learning_rate": 4.948351554413879e-05, |
| "loss": 2.8639, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.11219147344801796, |
| "grad_norm": 6.162296772003174, |
| "learning_rate": 4.9132389847321244e-05, |
| "loss": 2.7908, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.13089005235602094, |
| "grad_norm": 96.66787719726562, |
| "learning_rate": 4.869266344634556e-05, |
| "loss": 3.0533, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.14958863126402394, |
| "grad_norm": 6.0340375900268555, |
| "learning_rate": 4.816595077181764e-05, |
| "loss": 2.8847, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.1682872101720269, |
| "grad_norm": 5.803041934967041, |
| "learning_rate": 4.755418561952595e-05, |
| "loss": 3.2308, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.1869857890800299, |
| "grad_norm": 5.644896030426025, |
| "learning_rate": 4.6859614050619644e-05, |
| "loss": 3.66, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.2056843679880329, |
| "grad_norm": 6.0701751708984375, |
| "learning_rate": 4.608478614532215e-05, |
| "loss": 3.2233, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.2243829468960359, |
| "grad_norm": 7.215365409851074, |
| "learning_rate": 4.523254664045583e-05, |
| "loss": 3.3212, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.24308152580403888, |
| "grad_norm": 42.88441848754883, |
| "learning_rate": 4.430602448515173e-05, |
| "loss": 5.5739, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.2617801047120419, |
| "grad_norm": 6.698093414306641, |
| "learning_rate": 4.330862135308981e-05, |
| "loss": 6.687, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.28047868362004486, |
| "grad_norm": 7.0725321769714355, |
| "learning_rate": 4.2243999153446444e-05, |
| "loss": 6.676, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.2991772625280479, |
| "grad_norm": 4.725111961364746, |
| "learning_rate": 4.111606658640209e-05, |
| "loss": 6.6665, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.31787584143605085, |
| "grad_norm": 6.662052631378174, |
| "learning_rate": 3.9928964792569655e-05, |
| "loss": 6.9399, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.3365744203440538, |
| "grad_norm": 8.17912769317627, |
| "learning_rate": 3.868705214903098e-05, |
| "loss": 6.2064, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.35527299925205685, |
| "grad_norm": 4.715211868286133, |
| "learning_rate": 3.7394888267801986e-05, |
| "loss": 5.9489, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.3739715781600598, |
| "grad_norm": 7.79115629196167, |
| "learning_rate": 3.6057217255475034e-05, |
| "loss": 5.7699, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.39267015706806285, |
| "grad_norm": 40.13149642944336, |
| "learning_rate": 3.4678950295500015e-05, |
| "loss": 5.6122, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.4113687359760658, |
| "grad_norm": 5.954702854156494, |
| "learning_rate": 3.326514761705209e-05, |
| "loss": 5.4873, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.4300673148840688, |
| "grad_norm": 6.408680438995361, |
| "learning_rate": 3.182099991668653e-05, |
| "loss": 5.4693, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.4487658937920718, |
| "grad_norm": 5.902630805969238, |
| "learning_rate": 3.035180930098997e-05, |
| "loss": 5.3252, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.4674644727000748, |
| "grad_norm": 6.601041793823242, |
| "learning_rate": 2.8862969820196016e-05, |
| "loss": 5.2455, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.48616305160807777, |
| "grad_norm": 7.046901702880859, |
| "learning_rate": 2.7359947664234937e-05, |
| "loss": 5.1306, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.5048616305160808, |
| "grad_norm": 5.962765216827393, |
| "learning_rate": 2.5848261093926563e-05, |
| "loss": 5.0464, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.5235602094240838, |
| "grad_norm": 6.858371734619141, |
| "learning_rate": 2.433346018099786e-05, |
| "loss": 5.0013, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.5422587883320867, |
| "grad_norm": 5.1485443115234375, |
| "learning_rate": 2.2821106431308544e-05, |
| "loss": 4.8979, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.5609573672400897, |
| "grad_norm": 5.508594512939453, |
| "learning_rate": 2.1316752366096948e-05, |
| "loss": 4.8487, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.5796559461480928, |
| "grad_norm": 6.006443023681641, |
| "learning_rate": 1.982592113621237e-05, |
| "loss": 4.8909, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.5983545250560958, |
| "grad_norm": 5.325293064117432, |
| "learning_rate": 1.835408624417918e-05, |
| "loss": 4.7447, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.6170531039640987, |
| "grad_norm": 5.54567289352417, |
| "learning_rate": 1.690665144854198e-05, |
| "loss": 4.7865, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.6357516828721017, |
| "grad_norm": 5.164028167724609, |
| "learning_rate": 1.5488930924271722e-05, |
| "loss": 4.72, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.6544502617801047, |
| "grad_norm": 6.280778884887695, |
| "learning_rate": 1.4106129752073022e-05, |
| "loss": 4.6891, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.6731488406881077, |
| "grad_norm": 5.060294151306152, |
| "learning_rate": 1.276332480822468e-05, |
| "loss": 4.677, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.6918474195961107, |
| "grad_norm": 5.764603137969971, |
| "learning_rate": 1.1465446125115758e-05, |
| "loss": 4.5532, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.7105459985041137, |
| "grad_norm": 5.3928046226501465, |
| "learning_rate": 1.0217258790910448e-05, |
| "loss": 4.5508, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.7292445774121167, |
| "grad_norm": 6.133120536804199, |
| "learning_rate": 9.023345454796459e-06, |
| "loss": 4.5482, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.7479431563201197, |
| "grad_norm": 5.548661708831787, |
| "learning_rate": 7.88808950204783e-06, |
| "loss": 4.5212, |
| "step": 1000 |
| } |
| ], |
| "logging_steps": 25, |
| "max_steps": 1337, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "total_flos": 9.2220514369536e+16, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|