Invalid JSON: Unexpected token 'I', ..."ad_norm": Infinity,
"... is not valid JSON
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.0, | |
| "eval_steps": 300000000000000000, | |
| "global_step": 4230, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.02364066193853428, | |
| "grad_norm": 4265.77734375, | |
| "learning_rate": 2.695035460992908e-06, | |
| "loss": 19.7865, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.04728132387706856, | |
| "grad_norm": Infinity, | |
| "learning_rate": 6.2411347517730495e-06, | |
| "loss": 9.5661, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.07092198581560284, | |
| "grad_norm": 1082.762939453125, | |
| "learning_rate": 9.645390070921986e-06, | |
| "loss": 2.3602, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.09456264775413711, | |
| "grad_norm": 249.59776306152344, | |
| "learning_rate": 1.3191489361702129e-05, | |
| "loss": 1.7304, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.1182033096926714, | |
| "grad_norm": 171.15611267089844, | |
| "learning_rate": 1.673758865248227e-05, | |
| "loss": 1.6707, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.14184397163120568, | |
| "grad_norm": 64.97687530517578, | |
| "learning_rate": 2.028368794326241e-05, | |
| "loss": 1.5563, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.16548463356973994, | |
| "grad_norm": 22.22820472717285, | |
| "learning_rate": 2.3829787234042553e-05, | |
| "loss": 1.4809, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.18912529550827423, | |
| "grad_norm": 17.94223403930664, | |
| "learning_rate": 2.7375886524822697e-05, | |
| "loss": 1.3877, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.2127659574468085, | |
| "grad_norm": 6.133880615234375, | |
| "learning_rate": 2.999913686685402e-05, | |
| "loss": 1.2877, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.2364066193853428, | |
| "grad_norm": 5.359964370727539, | |
| "learning_rate": 2.9979733510221677e-05, | |
| "loss": 1.1985, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.26004728132387706, | |
| "grad_norm": 8.637283325195312, | |
| "learning_rate": 2.9934831545542617e-05, | |
| "loss": 1.1513, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.28368794326241137, | |
| "grad_norm": 13.667312622070312, | |
| "learning_rate": 2.986450740525784e-05, | |
| "loss": 1.0896, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.3073286052009456, | |
| "grad_norm": 6.358010292053223, | |
| "learning_rate": 2.9768880795615002e-05, | |
| "loss": 1.0343, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.3309692671394799, | |
| "grad_norm": 4.579405784606934, | |
| "learning_rate": 2.9648114492903583e-05, | |
| "loss": 0.9778, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.3546099290780142, | |
| "grad_norm": 4.688839435577393, | |
| "learning_rate": 2.950241406637593e-05, | |
| "loss": 0.9688, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.37825059101654845, | |
| "grad_norm": 5.001623153686523, | |
| "learning_rate": 2.9332027528325834e-05, | |
| "loss": 0.9246, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.40189125295508277, | |
| "grad_norm": 5.845919609069824, | |
| "learning_rate": 2.9137244911920255e-05, | |
| "loss": 0.9075, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.425531914893617, | |
| "grad_norm": 6.357454299926758, | |
| "learning_rate": 2.891839777750281e-05, | |
| "loss": 0.9032, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.4491725768321513, | |
| "grad_norm": 4.689474105834961, | |
| "learning_rate": 2.8675858648209442e-05, | |
| "loss": 0.8833, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.4728132387706856, | |
| "grad_norm": 5.220682621002197, | |
| "learning_rate": 2.841004037585688e-05, | |
| "loss": 0.8512, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.49645390070921985, | |
| "grad_norm": 6.682931423187256, | |
| "learning_rate": 2.8121395438183372e-05, | |
| "loss": 0.8526, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.5200945626477541, | |
| "grad_norm": 5.913801670074463, | |
| "learning_rate": 2.7810415168637912e-05, | |
| "loss": 0.8187, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.5437352245862884, | |
| "grad_norm": 5.719775676727295, | |
| "learning_rate": 2.7477628920028935e-05, | |
| "loss": 0.8174, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.5673758865248227, | |
| "grad_norm": 7.033783912658691, | |
| "learning_rate": 2.712360316345627e-05, | |
| "loss": 0.8378, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.5910165484633569, | |
| "grad_norm": 10.351150512695312, | |
| "learning_rate": 2.6748940524060027e-05, | |
| "loss": 0.81, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.6146572104018913, | |
| "grad_norm": 6.334929466247559, | |
| "learning_rate": 2.6354278755227802e-05, | |
| "loss": 0.8019, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.6382978723404256, | |
| "grad_norm": 4.533697605133057, | |
| "learning_rate": 2.5940289653006427e-05, | |
| "loss": 0.7874, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.6619385342789598, | |
| "grad_norm": 5.18574857711792, | |
| "learning_rate": 2.550767791256593e-05, | |
| "loss": 0.7758, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.6855791962174941, | |
| "grad_norm": 5.3152923583984375, | |
| "learning_rate": 2.5057179928662506e-05, | |
| "loss": 0.7811, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.7092198581560284, | |
| "grad_norm": 6.021467685699463, | |
| "learning_rate": 2.458956254214211e-05, | |
| "loss": 0.7849, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.7328605200945626, | |
| "grad_norm": 9.204541206359863, | |
| "learning_rate": 2.4105621734618613e-05, | |
| "loss": 0.7653, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.7565011820330969, | |
| "grad_norm": 7.0010504722595215, | |
| "learning_rate": 2.3606181273548253e-05, | |
| "loss": 0.7578, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.7801418439716312, | |
| "grad_norm": 4.713606357574463, | |
| "learning_rate": 2.309209131000687e-05, | |
| "loss": 0.743, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.8037825059101655, | |
| "grad_norm": 8.350544929504395, | |
| "learning_rate": 2.256422693155675e-05, | |
| "loss": 0.7327, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.8274231678486997, | |
| "grad_norm": 5.2699713706970215, | |
| "learning_rate": 2.2023486672666385e-05, | |
| "loss": 0.7252, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.851063829787234, | |
| "grad_norm": 5.1293625831604, | |
| "learning_rate": 2.1470790985218804e-05, | |
| "loss": 0.7169, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.8747044917257684, | |
| "grad_norm": 5.4763407707214355, | |
| "learning_rate": 2.0907080671711832e-05, | |
| "loss": 0.7208, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.8983451536643026, | |
| "grad_norm": 13.955887794494629, | |
| "learning_rate": 2.0333315283817486e-05, | |
| "loss": 0.7309, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.9219858156028369, | |
| "grad_norm": 8.627326011657715, | |
| "learning_rate": 1.975047148902632e-05, | |
| "loss": 0.7155, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.9456264775413712, | |
| "grad_norm": 8.94743537902832, | |
| "learning_rate": 1.9159541408157158e-05, | |
| "loss": 0.7068, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.9692671394799054, | |
| "grad_norm": 5.135090351104736, | |
| "learning_rate": 1.8561530926562023e-05, | |
| "loss": 0.6918, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.9929078014184397, | |
| "grad_norm": 38.368629455566406, | |
| "learning_rate": 1.795745798190099e-05, | |
| "loss": 0.6818, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 1.016548463356974, | |
| "grad_norm": 25.27336311340332, | |
| "learning_rate": 1.734835083140153e-05, | |
| "loss": 0.685, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 1.0401891252955082, | |
| "grad_norm": 7.319797992706299, | |
| "learning_rate": 1.6735246301551825e-05, | |
| "loss": 0.6739, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 1.0638297872340425, | |
| "grad_norm": 10.93574333190918, | |
| "learning_rate": 1.6119188023207348e-05, | |
| "loss": 0.6828, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 1.0874704491725768, | |
| "grad_norm": 51.116207122802734, | |
| "learning_rate": 1.5501224655115118e-05, | |
| "loss": 0.6514, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 1.1111111111111112, | |
| "grad_norm": 17.643468856811523, | |
| "learning_rate": 1.4882408098879367e-05, | |
| "loss": 0.6708, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 1.1347517730496455, | |
| "grad_norm": 24.42500114440918, | |
| "learning_rate": 1.426379170840718e-05, | |
| "loss": 0.6595, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 1.1583924349881798, | |
| "grad_norm": 7.433150291442871, | |
| "learning_rate": 1.364642849688209e-05, | |
| "loss": 0.6545, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 1.1820330969267139, | |
| "grad_norm": 12.09028148651123, | |
| "learning_rate": 1.3031369344317569e-05, | |
| "loss": 0.6533, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.2056737588652482, | |
| "grad_norm": 8.16505241394043, | |
| "learning_rate": 1.2419661208741687e-05, | |
| "loss": 0.6466, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 1.2293144208037825, | |
| "grad_norm": 8.545854568481445, | |
| "learning_rate": 1.181234534405775e-05, | |
| "loss": 0.6311, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 1.2529550827423168, | |
| "grad_norm": 7.951985836029053, | |
| "learning_rate": 1.1210455527614574e-05, | |
| "loss": 0.6338, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 1.2765957446808511, | |
| "grad_norm": 17.269718170166016, | |
| "learning_rate": 1.061501630050338e-05, | |
| "loss": 0.6214, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 1.3002364066193852, | |
| "grad_norm": 14.052486419677734, | |
| "learning_rate": 1.0027041223576735e-05, | |
| "loss": 0.6307, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 1.3238770685579198, | |
| "grad_norm": 7.13311767578125, | |
| "learning_rate": 9.447531152158089e-06, | |
| "loss": 0.6273, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 1.3475177304964538, | |
| "grad_norm": 9.32107162475586, | |
| "learning_rate": 8.877472532378836e-06, | |
| "loss": 0.6218, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 1.3711583924349882, | |
| "grad_norm": 6.4954118728637695, | |
| "learning_rate": 8.317835722042693e-06, | |
| "loss": 0.6085, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 1.3947990543735225, | |
| "grad_norm": 7.570118427276611, | |
| "learning_rate": 7.769573338875851e-06, | |
| "loss": 0.6007, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 1.4184397163120568, | |
| "grad_norm": 18.045612335205078, | |
| "learning_rate": 7.2336186389743095e-06, | |
| "loss": 0.6177, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.442080378250591, | |
| "grad_norm": 10.485590934753418, | |
| "learning_rate": 6.710883928208835e-06, | |
| "loss": 0.5859, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 1.4657210401891252, | |
| "grad_norm": 13.521890640258789, | |
| "learning_rate": 6.202259009291401e-06, | |
| "loss": 0.6117, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 1.4893617021276595, | |
| "grad_norm": 10.763672828674316, | |
| "learning_rate": 5.708609667146788e-06, | |
| "loss": 0.5647, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 1.5130023640661938, | |
| "grad_norm": 13.315629959106445, | |
| "learning_rate": 5.2307761951673425e-06, | |
| "loss": 0.6115, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 1.5366430260047281, | |
| "grad_norm": 13.952415466308594, | |
| "learning_rate": 4.769571964859664e-06, | |
| "loss": 0.594, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 1.5602836879432624, | |
| "grad_norm": 9.341636657714844, | |
| "learning_rate": 4.325782041317874e-06, | |
| "loss": 0.5803, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 1.5839243498817965, | |
| "grad_norm": 23.52338218688965, | |
| "learning_rate": 3.900161846880281e-06, | |
| "loss": 0.5648, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 1.607565011820331, | |
| "grad_norm": 22.669052124023438, | |
| "learning_rate": 3.4934358752441315e-06, | |
| "loss": 0.5588, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 1.6312056737588652, | |
| "grad_norm": 50.38961410522461, | |
| "learning_rate": 3.106296458227363e-06, | |
| "loss": 0.5777, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 1.6548463356973995, | |
| "grad_norm": 6.503214359283447, | |
| "learning_rate": 2.7394025872764556e-06, | |
| "loss": 0.5688, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.6784869976359338, | |
| "grad_norm": 81.57908630371094, | |
| "learning_rate": 2.39337879172658e-06, | |
| "loss": 0.5849, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 1.702127659574468, | |
| "grad_norm": 10.680161476135254, | |
| "learning_rate": 2.0688140757233428e-06, | |
| "loss": 0.5761, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 1.7257683215130024, | |
| "grad_norm": 6.659136772155762, | |
| "learning_rate": 1.7662609156157749e-06, | |
| "loss": 0.566, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 1.7494089834515365, | |
| "grad_norm": 13.552983283996582, | |
| "learning_rate": 1.486234319527186e-06, | |
| "loss": 0.5625, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 1.773049645390071, | |
| "grad_norm": 22.759199142456055, | |
| "learning_rate": 1.2292109507047273e-06, | |
| "loss": 0.557, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 1.7966903073286051, | |
| "grad_norm": 36.262474060058594, | |
| "learning_rate": 9.956283161398172e-07, | |
| "loss": 0.5561, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 1.8203309692671394, | |
| "grad_norm": 8.722681999206543, | |
| "learning_rate": 7.858840218406921e-07, | |
| "loss": 0.5624, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 1.8439716312056738, | |
| "grad_norm": 6.087028980255127, | |
| "learning_rate": 6.00335096024619e-07, | |
| "loss": 0.5747, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 1.867612293144208, | |
| "grad_norm": 9.753291130065918, | |
| "learning_rate": 4.3929738138196787e-07, | |
| "loss": 0.5629, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 1.8912529550827424, | |
| "grad_norm": 13.233976364135742, | |
| "learning_rate": 3.030449974465471e-07, | |
| "loss": 0.5588, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.9148936170212765, | |
| "grad_norm": 23.338430404663086, | |
| "learning_rate": 1.9180987398740358e-07, | |
| "loss": 0.5632, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 1.938534278959811, | |
| "grad_norm": 30.731971740722656, | |
| "learning_rate": 1.0578135621633178e-07, | |
| "loss": 0.5566, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 1.962174940898345, | |
| "grad_norm": 12.004512786865234, | |
| "learning_rate": 4.5105882483119643e-08, | |
| "loss": 0.5446, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 1.9858156028368794, | |
| "grad_norm": 18.55535125732422, | |
| "learning_rate": 9.886735007152425e-09, | |
| "loss": 0.5539, | |
| "step": 4200 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 4230, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 1000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4.480992965002527e+17, | |
| "train_batch_size": 12, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |