| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.5771297006907137, |
| "eval_steps": 2000.0, |
| "global_step": 420, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.06139677666922486, |
| "grad_norm": 0.1343451291322708, |
| "learning_rate": 1.0000000000000002e-06, |
| "loss": 0.293, |
| "num_tokens": 7752366.0, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.12279355333844973, |
| "grad_norm": 0.14936372637748718, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 0.2405, |
| "num_tokens": 15390227.0, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.1841903300076746, |
| "grad_norm": 0.10722821950912476, |
| "learning_rate": 3e-06, |
| "loss": 0.2648, |
| "num_tokens": 23123703.0, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.24558710667689945, |
| "grad_norm": 0.0795913115143776, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 0.3012, |
| "num_tokens": 30930492.0, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.3069838833461243, |
| "grad_norm": 0.11849294602870941, |
| "learning_rate": 5e-06, |
| "loss": 0.2342, |
| "num_tokens": 38599104.0, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.3683806600153492, |
| "grad_norm": 0.13234864175319672, |
| "learning_rate": 6e-06, |
| "loss": 0.2306, |
| "num_tokens": 46329582.0, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.4297774366845741, |
| "grad_norm": 0.12520423531532288, |
| "learning_rate": 7e-06, |
| "loss": 0.2753, |
| "num_tokens": 54163228.0, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.4911742133537989, |
| "grad_norm": 0.1749039590358734, |
| "learning_rate": 8.000000000000001e-06, |
| "loss": 0.2848, |
| "num_tokens": 61755924.0, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.5525709900230238, |
| "grad_norm": 0.13144172728061676, |
| "learning_rate": 9e-06, |
| "loss": 0.2499, |
| "num_tokens": 69324388.0, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.6139677666922486, |
| "grad_norm": 0.1269148290157318, |
| "learning_rate": 1e-05, |
| "loss": 0.2941, |
| "num_tokens": 76977972.0, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.6753645433614736, |
| "grad_norm": 0.13263040781021118, |
| "learning_rate": 9.998945997517957e-06, |
| "loss": 0.3423, |
| "num_tokens": 84625449.0, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.7367613200306984, |
| "grad_norm": 0.12442048639059067, |
| "learning_rate": 9.99578443444032e-06, |
| "loss": 0.2522, |
| "num_tokens": 92380333.0, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.7981580966999232, |
| "grad_norm": 0.12848921120166779, |
| "learning_rate": 9.990516643685222e-06, |
| "loss": 0.2305, |
| "num_tokens": 100278549.0, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.8595548733691482, |
| "grad_norm": 0.18896037340164185, |
| "learning_rate": 9.983144846158472e-06, |
| "loss": 0.2682, |
| "num_tokens": 107896424.0, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.920951650038373, |
| "grad_norm": 0.10803721845149994, |
| "learning_rate": 9.973672149817232e-06, |
| "loss": 0.2951, |
| "num_tokens": 115687353.0, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.9823484267075978, |
| "grad_norm": 0.1490592062473297, |
| "learning_rate": 9.96210254835968e-06, |
| "loss": 0.2354, |
| "num_tokens": 123478527.0, |
| "step": 160 |
| }, |
| { |
| "epoch": 1.0429777436684573, |
| "grad_norm": 0.11919503659009933, |
| "learning_rate": 9.948440919541277e-06, |
| "loss": 0.313, |
| "num_tokens": 131158765.0, |
| "step": 170 |
| }, |
| { |
| "epoch": 1.1043745203376822, |
| "grad_norm": 0.14419515430927277, |
| "learning_rate": 9.932693023118299e-06, |
| "loss": 0.3636, |
| "num_tokens": 138879586.0, |
| "step": 180 |
| }, |
| { |
| "epoch": 1.1657712970069072, |
| "grad_norm": 0.13056258857250214, |
| "learning_rate": 9.91486549841951e-06, |
| "loss": 0.237, |
| "num_tokens": 146732371.0, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.2271680736761321, |
| "grad_norm": 0.1365155279636383, |
| "learning_rate": 9.894965861547023e-06, |
| "loss": 0.2511, |
| "num_tokens": 154459979.0, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.2885648503453568, |
| "grad_norm": 0.2808520495891571, |
| "learning_rate": 9.873002502207502e-06, |
| "loss": 0.2508, |
| "num_tokens": 162175757.0, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.3499616270145818, |
| "grad_norm": 0.0974242314696312, |
| "learning_rate": 9.848984680175049e-06, |
| "loss": 0.2109, |
| "num_tokens": 169696281.0, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.4113584036838067, |
| "grad_norm": 0.1405002772808075, |
| "learning_rate": 9.822922521387277e-06, |
| "loss": 0.2564, |
| "num_tokens": 177488034.0, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.4727551803530314, |
| "grad_norm": 0.17119945585727692, |
| "learning_rate": 9.794827013676206e-06, |
| "loss": 0.2494, |
| "num_tokens": 185380889.0, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.5341519570222564, |
| "grad_norm": 0.11872359365224838, |
| "learning_rate": 9.764710002135784e-06, |
| "loss": 0.3133, |
| "num_tokens": 193120160.0, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.5955487336914813, |
| "grad_norm": 0.14314331114292145, |
| "learning_rate": 9.732584184127973e-06, |
| "loss": 0.3011, |
| "num_tokens": 200841296.0, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.656945510360706, |
| "grad_norm": 0.1306375414133072, |
| "learning_rate": 9.698463103929542e-06, |
| "loss": 0.2421, |
| "num_tokens": 208630495.0, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.718342287029931, |
| "grad_norm": 0.18562458455562592, |
| "learning_rate": 9.66236114702178e-06, |
| "loss": 0.2501, |
| "num_tokens": 216297417.0, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.779739063699156, |
| "grad_norm": 0.3968285322189331, |
| "learning_rate": 9.62429353402556e-06, |
| "loss": 0.2719, |
| "num_tokens": 224014081.0, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.8411358403683806, |
| "grad_norm": 0.13298219442367554, |
| "learning_rate": 9.584276314284316e-06, |
| "loss": 0.2728, |
| "num_tokens": 231641746.0, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.9025326170376056, |
| "grad_norm": 0.11644992977380753, |
| "learning_rate": 9.542326359097619e-06, |
| "loss": 0.2557, |
| "num_tokens": 239183635.0, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.9639293937068305, |
| "grad_norm": 0.13967359066009521, |
| "learning_rate": 9.498461354608228e-06, |
| "loss": 0.24, |
| "num_tokens": 246880516.0, |
| "step": 320 |
| }, |
| { |
| "epoch": 2.02455871066769, |
| "grad_norm": 0.10428839921951294, |
| "learning_rate": 9.452699794345583e-06, |
| "loss": 0.2238, |
| "num_tokens": 254662159.0, |
| "step": 330 |
| }, |
| { |
| "epoch": 2.0859554873369146, |
| "grad_norm": 0.1438775211572647, |
| "learning_rate": 9.405060971428924e-06, |
| "loss": 0.228, |
| "num_tokens": 262332176.0, |
| "step": 340 |
| }, |
| { |
| "epoch": 2.1473522640061398, |
| "grad_norm": 0.13422533869743347, |
| "learning_rate": 9.355564970433288e-06, |
| "loss": 0.2599, |
| "num_tokens": 270068159.0, |
| "step": 350 |
| }, |
| { |
| "epoch": 2.2087490406753645, |
| "grad_norm": 0.18186450004577637, |
| "learning_rate": 9.30423265892184e-06, |
| "loss": 0.2758, |
| "num_tokens": 277876746.0, |
| "step": 360 |
| }, |
| { |
| "epoch": 2.2701458173445896, |
| "grad_norm": 0.1439521163702011, |
| "learning_rate": 9.251085678648072e-06, |
| "loss": 0.2618, |
| "num_tokens": 285680808.0, |
| "step": 370 |
| }, |
| { |
| "epoch": 2.3315425940138144, |
| "grad_norm": 0.11770835518836975, |
| "learning_rate": 9.196146436431635e-06, |
| "loss": 0.2424, |
| "num_tokens": 293359797.0, |
| "step": 380 |
| }, |
| { |
| "epoch": 2.392939370683039, |
| "grad_norm": 0.11337260156869888, |
| "learning_rate": 9.13943809471159e-06, |
| "loss": 0.2129, |
| "num_tokens": 301088235.0, |
| "step": 390 |
| }, |
| { |
| "epoch": 2.4543361473522642, |
| "grad_norm": 0.11012344807386398, |
| "learning_rate": 9.08098456178111e-06, |
| "loss": 0.2094, |
| "num_tokens": 308725104.0, |
| "step": 400 |
| }, |
| { |
| "epoch": 2.515732924021489, |
| "grad_norm": 0.1827051341533661, |
| "learning_rate": 9.020810481707709e-06, |
| "loss": 0.2676, |
| "num_tokens": 316685360.0, |
| "step": 410 |
| }, |
| { |
| "epoch": 2.5771297006907137, |
| "grad_norm": 0.16157183051109314, |
| "learning_rate": 8.958941223943292e-06, |
| "loss": 0.27, |
| "num_tokens": 324401853.0, |
| "step": 420 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 1630, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 10, |
| "save_steps": 60, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.5150507719439417e+19, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|