| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 325, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.015384615384615385, | |
| "grad_norm": 1.4592643976211548, | |
| "learning_rate": 1.4634146341463416e-06, | |
| "loss": 1.3666, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.03076923076923077, | |
| "grad_norm": 0.9001633524894714, | |
| "learning_rate": 3.2926829268292685e-06, | |
| "loss": 1.353, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.046153846153846156, | |
| "grad_norm": 0.7452582716941833, | |
| "learning_rate": 5.121951219512195e-06, | |
| "loss": 1.3232, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.06153846153846154, | |
| "grad_norm": 0.6290757656097412, | |
| "learning_rate": 6.951219512195123e-06, | |
| "loss": 1.3691, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.07692307692307693, | |
| "grad_norm": 0.4892721474170685, | |
| "learning_rate": 8.780487804878048e-06, | |
| "loss": 1.3552, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.09230769230769231, | |
| "grad_norm": 0.5850591659545898, | |
| "learning_rate": 1.0609756097560975e-05, | |
| "loss": 1.3401, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.1076923076923077, | |
| "grad_norm": 0.5620784163475037, | |
| "learning_rate": 1.2439024390243903e-05, | |
| "loss": 1.2889, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.12307692307692308, | |
| "grad_norm": 0.5427800416946411, | |
| "learning_rate": 1.4268292682926829e-05, | |
| "loss": 1.2625, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.13846153846153847, | |
| "grad_norm": 0.5011927485466003, | |
| "learning_rate": 1.6097560975609757e-05, | |
| "loss": 1.2391, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.15384615384615385, | |
| "grad_norm": 0.5249335169792175, | |
| "learning_rate": 1.7926829268292684e-05, | |
| "loss": 1.2474, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.16923076923076924, | |
| "grad_norm": 0.50667804479599, | |
| "learning_rate": 1.975609756097561e-05, | |
| "loss": 1.2475, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.18461538461538463, | |
| "grad_norm": 0.48870664834976196, | |
| "learning_rate": 2.1585365853658537e-05, | |
| "loss": 1.2505, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 0.44815170764923096, | |
| "learning_rate": 2.3414634146341466e-05, | |
| "loss": 1.2401, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.2153846153846154, | |
| "grad_norm": 0.4926839768886566, | |
| "learning_rate": 2.524390243902439e-05, | |
| "loss": 1.2612, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.23076923076923078, | |
| "grad_norm": 0.6198599934577942, | |
| "learning_rate": 2.707317073170732e-05, | |
| "loss": 1.2474, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.24615384615384617, | |
| "grad_norm": 0.49995529651641846, | |
| "learning_rate": 2.8902439024390242e-05, | |
| "loss": 1.2197, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.26153846153846155, | |
| "grad_norm": 0.5290886163711548, | |
| "learning_rate": 2.9999875637756577e-05, | |
| "loss": 1.1353, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.27692307692307694, | |
| "grad_norm": 0.5151082873344421, | |
| "learning_rate": 2.9998476586200195e-05, | |
| "loss": 1.1778, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.2923076923076923, | |
| "grad_norm": 0.6584219932556152, | |
| "learning_rate": 2.9995523175756406e-05, | |
| "loss": 1.1296, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.3076923076923077, | |
| "grad_norm": 0.5882734060287476, | |
| "learning_rate": 2.9991015712500275e-05, | |
| "loss": 1.1377, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.3230769230769231, | |
| "grad_norm": 0.5423492193222046, | |
| "learning_rate": 2.9984954663560287e-05, | |
| "loss": 1.1778, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.3384615384615385, | |
| "grad_norm": 0.7454944849014282, | |
| "learning_rate": 2.9977340657069916e-05, | |
| "loss": 1.0789, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.35384615384615387, | |
| "grad_norm": 0.5375627875328064, | |
| "learning_rate": 2.9968174482102552e-05, | |
| "loss": 1.1178, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.36923076923076925, | |
| "grad_norm": 0.7350935339927673, | |
| "learning_rate": 2.9957457088589697e-05, | |
| "loss": 1.0682, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.38461538461538464, | |
| "grad_norm": 0.624808669090271, | |
| "learning_rate": 2.994518958722255e-05, | |
| "loss": 1.0331, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 0.679461658000946, | |
| "learning_rate": 2.993137324933688e-05, | |
| "loss": 1.1324, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.4153846153846154, | |
| "grad_norm": 0.6046873331069946, | |
| "learning_rate": 2.9916009506781284e-05, | |
| "loss": 1.0288, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.4307692307692308, | |
| "grad_norm": 0.7874692678451538, | |
| "learning_rate": 2.9899099951768775e-05, | |
| "loss": 1.118, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.4461538461538462, | |
| "grad_norm": 0.6846326589584351, | |
| "learning_rate": 2.988064633671181e-05, | |
| "loss": 1.109, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.46153846153846156, | |
| "grad_norm": 0.6826982498168945, | |
| "learning_rate": 2.9860650574040666e-05, | |
| "loss": 1.0325, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.47692307692307695, | |
| "grad_norm": 0.9006183743476868, | |
| "learning_rate": 2.9839114736005216e-05, | |
| "loss": 1.0584, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.49230769230769234, | |
| "grad_norm": 0.8280394077301025, | |
| "learning_rate": 2.981604105446022e-05, | |
| "loss": 0.9616, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.5076923076923077, | |
| "grad_norm": 0.7612792253494263, | |
| "learning_rate": 2.979143192063399e-05, | |
| "loss": 1.0125, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.5230769230769231, | |
| "grad_norm": 0.8651503324508667, | |
| "learning_rate": 2.976528988488061e-05, | |
| "loss": 0.9571, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.5384615384615384, | |
| "grad_norm": 0.8503299951553345, | |
| "learning_rate": 2.97376176564156e-05, | |
| "loss": 0.9423, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.5538461538461539, | |
| "grad_norm": 0.7497279047966003, | |
| "learning_rate": 2.9708418103035166e-05, | |
| "loss": 1.014, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.5692307692307692, | |
| "grad_norm": 0.881376326084137, | |
| "learning_rate": 2.9677694250818998e-05, | |
| "loss": 0.9457, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.5846153846153846, | |
| "grad_norm": 0.8077456951141357, | |
| "learning_rate": 2.9645449283816644e-05, | |
| "loss": 0.9003, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 0.8906092047691345, | |
| "learning_rate": 2.9611686543717565e-05, | |
| "loss": 0.944, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.6153846153846154, | |
| "grad_norm": 0.8934299349784851, | |
| "learning_rate": 2.95764095295048e-05, | |
| "loss": 0.9277, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.6307692307692307, | |
| "grad_norm": 0.9526508450508118, | |
| "learning_rate": 2.9539621897092342e-05, | |
| "loss": 0.8852, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.6461538461538462, | |
| "grad_norm": 0.8791632056236267, | |
| "learning_rate": 2.950132745894629e-05, | |
| "loss": 0.9177, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.6615384615384615, | |
| "grad_norm": 0.9002313613891602, | |
| "learning_rate": 2.946153018368971e-05, | |
| "loss": 0.8687, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.676923076923077, | |
| "grad_norm": 0.8795701861381531, | |
| "learning_rate": 2.9420234195691383e-05, | |
| "loss": 0.8655, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.6923076923076923, | |
| "grad_norm": 0.8691343069076538, | |
| "learning_rate": 2.9377443774638358e-05, | |
| "loss": 0.8868, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.7076923076923077, | |
| "grad_norm": 0.9766488075256348, | |
| "learning_rate": 2.933316335509242e-05, | |
| "loss": 0.8838, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.7230769230769231, | |
| "grad_norm": 0.9698830842971802, | |
| "learning_rate": 2.928739752603055e-05, | |
| "loss": 0.8904, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.7384615384615385, | |
| "grad_norm": 0.9286680221557617, | |
| "learning_rate": 2.9240151030369314e-05, | |
| "loss": 0.8388, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.7538461538461538, | |
| "grad_norm": 0.8796747326850891, | |
| "learning_rate": 2.919142876447335e-05, | |
| "loss": 0.8116, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.7692307692307693, | |
| "grad_norm": 1.0402207374572754, | |
| "learning_rate": 2.914123577764795e-05, | |
| "loss": 0.8384, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.7846153846153846, | |
| "grad_norm": 1.0921474695205688, | |
| "learning_rate": 2.9089577271615735e-05, | |
| "loss": 0.8572, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 1.0024511814117432, | |
| "learning_rate": 2.9036458599977625e-05, | |
| "loss": 0.7863, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.8153846153846154, | |
| "grad_norm": 0.9102811217308044, | |
| "learning_rate": 2.8981885267658e-05, | |
| "loss": 0.8251, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.8307692307692308, | |
| "grad_norm": 1.1564185619354248, | |
| "learning_rate": 2.892586293033419e-05, | |
| "loss": 0.7996, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.8461538461538461, | |
| "grad_norm": 0.9751123785972595, | |
| "learning_rate": 2.886839739385037e-05, | |
| "loss": 0.792, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.8615384615384616, | |
| "grad_norm": 0.9479052424430847, | |
| "learning_rate": 2.880949461361587e-05, | |
| "loss": 0.7419, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.8769230769230769, | |
| "grad_norm": 1.006443738937378, | |
| "learning_rate": 2.874916069398798e-05, | |
| "loss": 0.8018, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.8923076923076924, | |
| "grad_norm": 0.9871719479560852, | |
| "learning_rate": 2.8687401887639343e-05, | |
| "loss": 0.7702, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.9076923076923077, | |
| "grad_norm": 1.0391933917999268, | |
| "learning_rate": 2.8624224594909953e-05, | |
| "loss": 0.7881, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.9230769230769231, | |
| "grad_norm": 0.9722039699554443, | |
| "learning_rate": 2.8559635363143857e-05, | |
| "loss": 0.764, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.9384615384615385, | |
| "grad_norm": 1.0607675313949585, | |
| "learning_rate": 2.849364088601063e-05, | |
| "loss": 0.7665, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.9538461538461539, | |
| "grad_norm": 1.122786045074463, | |
| "learning_rate": 2.8426248002811686e-05, | |
| "loss": 0.7217, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.9692307692307692, | |
| "grad_norm": 1.0698699951171875, | |
| "learning_rate": 2.8357463697771474e-05, | |
| "loss": 0.7319, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.9846153846153847, | |
| "grad_norm": 1.0934139490127563, | |
| "learning_rate": 2.8287295099313694e-05, | |
| "loss": 0.7172, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 1.0971643924713135, | |
| "learning_rate": 2.8215749479322523e-05, | |
| "loss": 0.6796, | |
| "step": 325 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 1625, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 2000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4.566837909793014e+17, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |