| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 468, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0106951871657754, |
| "grad_norm": 5.869461536407471, |
| "learning_rate": 9.30232558139535e-07, |
| "loss": 0.3054, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.0213903743315508, |
| "grad_norm": 4.7690653800964355, |
| "learning_rate": 2.0930232558139536e-06, |
| "loss": 0.2661, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.03208556149732621, |
| "grad_norm": 4.248436450958252, |
| "learning_rate": 3.2558139534883724e-06, |
| "loss": 0.2409, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.0427807486631016, |
| "grad_norm": 4.08921480178833, |
| "learning_rate": 4.418604651162791e-06, |
| "loss": 0.2228, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.053475935828877004, |
| "grad_norm": 4.846882343292236, |
| "learning_rate": 5.58139534883721e-06, |
| "loss": 0.2258, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.06417112299465241, |
| "grad_norm": 4.230620384216309, |
| "learning_rate": 6.744186046511628e-06, |
| "loss": 0.2103, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.0748663101604278, |
| "grad_norm": 4.4064130783081055, |
| "learning_rate": 7.906976744186048e-06, |
| "loss": 0.2122, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.0855614973262032, |
| "grad_norm": 4.86531400680542, |
| "learning_rate": 9.069767441860465e-06, |
| "loss": 0.1996, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.0962566844919786, |
| "grad_norm": 5.53223991394043, |
| "learning_rate": 9.999986679414613e-06, |
| "loss": 0.2311, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.10695187165775401, |
| "grad_norm": 4.2916178703308105, |
| "learning_rate": 9.999520466378376e-06, |
| "loss": 0.2319, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.11764705882352941, |
| "grad_norm": 4.200897216796875, |
| "learning_rate": 9.998388295046227e-06, |
| "loss": 0.2368, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.12834224598930483, |
| "grad_norm": 4.808861255645752, |
| "learning_rate": 9.996590316228402e-06, |
| "loss": 0.2354, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.13903743315508021, |
| "grad_norm": 4.5134077072143555, |
| "learning_rate": 9.994126769423656e-06, |
| "loss": 0.234, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.1497326203208556, |
| "grad_norm": 4.348900318145752, |
| "learning_rate": 9.990997982787348e-06, |
| "loss": 0.2541, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.16042780748663102, |
| "grad_norm": 4.4039306640625, |
| "learning_rate": 9.98720437308773e-06, |
| "loss": 0.2496, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.1711229946524064, |
| "grad_norm": 4.726532936096191, |
| "learning_rate": 9.982746445650437e-06, |
| "loss": 0.2797, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.18181818181818182, |
| "grad_norm": 4.256592273712158, |
| "learning_rate": 9.977624794291172e-06, |
| "loss": 0.2518, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.1925133689839572, |
| "grad_norm": 4.330168724060059, |
| "learning_rate": 9.97184010123661e-06, |
| "loss": 0.261, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.20320855614973263, |
| "grad_norm": 4.281119346618652, |
| "learning_rate": 9.965393137033512e-06, |
| "loss": 0.2564, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.21390374331550802, |
| "grad_norm": 4.247673511505127, |
| "learning_rate": 9.958284760446104e-06, |
| "loss": 0.2501, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.22459893048128343, |
| "grad_norm": 3.930504322052002, |
| "learning_rate": 9.950515918341666e-06, |
| "loss": 0.2504, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.23529411764705882, |
| "grad_norm": 4.084280967712402, |
| "learning_rate": 9.942087645564415e-06, |
| "loss": 0.2706, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.24598930481283424, |
| "grad_norm": 3.869400978088379, |
| "learning_rate": 9.93300106479766e-06, |
| "loss": 0.2499, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.25668449197860965, |
| "grad_norm": 4.0887675285339355, |
| "learning_rate": 9.923257386414253e-06, |
| "loss": 0.2628, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.26737967914438504, |
| "grad_norm": 4.140570163726807, |
| "learning_rate": 9.912857908315363e-06, |
| "loss": 0.2642, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.27807486631016043, |
| "grad_norm": 4.731294631958008, |
| "learning_rate": 9.901804015757588e-06, |
| "loss": 0.2669, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.2887700534759358, |
| "grad_norm": 4.113650321960449, |
| "learning_rate": 9.89009718116843e-06, |
| "loss": 0.2809, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.2994652406417112, |
| "grad_norm": 3.8770499229431152, |
| "learning_rate": 9.877738963950175e-06, |
| "loss": 0.2705, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.31016042780748665, |
| "grad_norm": 4.077112197875977, |
| "learning_rate": 9.864731010272152e-06, |
| "loss": 0.2548, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.32085561497326204, |
| "grad_norm": 4.771427631378174, |
| "learning_rate": 9.851075052851476e-06, |
| "loss": 0.2534, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.3315508021390374, |
| "grad_norm": 3.84387469291687, |
| "learning_rate": 9.83677291072223e-06, |
| "loss": 0.2438, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.3422459893048128, |
| "grad_norm": 4.427935600280762, |
| "learning_rate": 9.821826488993168e-06, |
| "loss": 0.2791, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.35294117647058826, |
| "grad_norm": 4.1868767738342285, |
| "learning_rate": 9.806237778593941e-06, |
| "loss": 0.2734, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.36363636363636365, |
| "grad_norm": 3.8055779933929443, |
| "learning_rate": 9.790008856009902e-06, |
| "loss": 0.2463, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.37433155080213903, |
| "grad_norm": 4.041962623596191, |
| "learning_rate": 9.773141883005507e-06, |
| "loss": 0.2813, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.3850267379679144, |
| "grad_norm": 3.6363329887390137, |
| "learning_rate": 9.755639106336347e-06, |
| "loss": 0.2724, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.39572192513368987, |
| "grad_norm": 3.50486421585083, |
| "learning_rate": 9.737502857449894e-06, |
| "loss": 0.271, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.40641711229946526, |
| "grad_norm": 3.4967920780181885, |
| "learning_rate": 9.718735552174923e-06, |
| "loss": 0.2467, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.41711229946524064, |
| "grad_norm": 3.3797831535339355, |
| "learning_rate": 9.699339690399717e-06, |
| "loss": 0.2621, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.42780748663101603, |
| "grad_norm": 15.109771728515625, |
| "learning_rate": 9.679317855739073e-06, |
| "loss": 0.2791, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.4385026737967914, |
| "grad_norm": 3.828474760055542, |
| "learning_rate": 9.658672715190151e-06, |
| "loss": 0.2829, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.44919786096256686, |
| "grad_norm": 3.2750980854034424, |
| "learning_rate": 9.637407018777224e-06, |
| "loss": 0.2866, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.45989304812834225, |
| "grad_norm": 3.615290880203247, |
| "learning_rate": 9.615523599185353e-06, |
| "loss": 0.2422, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.47058823529411764, |
| "grad_norm": 3.5919339656829834, |
| "learning_rate": 9.593025371383064e-06, |
| "loss": 0.269, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.48128342245989303, |
| "grad_norm": 4.708985805511475, |
| "learning_rate": 9.569915332234068e-06, |
| "loss": 0.2686, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.4919786096256685, |
| "grad_norm": 3.5988545417785645, |
| "learning_rate": 9.546196560098062e-06, |
| "loss": 0.273, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.5026737967914439, |
| "grad_norm": 3.4772067070007324, |
| "learning_rate": 9.521872214420668e-06, |
| "loss": 0.2395, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.5133689839572193, |
| "grad_norm": 3.7974278926849365, |
| "learning_rate": 9.496945535312597e-06, |
| "loss": 0.2682, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.5240641711229946, |
| "grad_norm": 3.69970440864563, |
| "learning_rate": 9.471419843118036e-06, |
| "loss": 0.2557, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.5347593582887701, |
| "grad_norm": 3.470737934112549, |
| "learning_rate": 9.44529853797238e-06, |
| "loss": 0.266, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.5454545454545454, |
| "grad_norm": 3.733236312866211, |
| "learning_rate": 9.418585099349306e-06, |
| "loss": 0.2779, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.5561497326203209, |
| "grad_norm": 3.434349298477173, |
| "learning_rate": 9.391283085597299e-06, |
| "loss": 0.2508, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.5668449197860963, |
| "grad_norm": 3.5163183212280273, |
| "learning_rate": 9.36339613346565e-06, |
| "loss": 0.2727, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.5775401069518716, |
| "grad_norm": 4.07936429977417, |
| "learning_rate": 9.33492795762005e-06, |
| "loss": 0.2714, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.5882352941176471, |
| "grad_norm": 3.171149492263794, |
| "learning_rate": 9.305882350147763e-06, |
| "loss": 0.2585, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.5989304812834224, |
| "grad_norm": 3.424302101135254, |
| "learning_rate": 9.276263180052498e-06, |
| "loss": 0.2584, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.6096256684491979, |
| "grad_norm": 3.800421714782715, |
| "learning_rate": 9.246074392739057e-06, |
| "loss": 0.2771, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.6203208556149733, |
| "grad_norm": 3.4711737632751465, |
| "learning_rate": 9.21532000948778e-06, |
| "loss": 0.265, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.6310160427807486, |
| "grad_norm": 3.5598926544189453, |
| "learning_rate": 9.184004126918891e-06, |
| "loss": 0.2653, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.6417112299465241, |
| "grad_norm": 3.320875644683838, |
| "learning_rate": 9.152130916446817e-06, |
| "loss": 0.2373, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.6524064171122995, |
| "grad_norm": 3.606067419052124, |
| "learning_rate": 9.119704623724528e-06, |
| "loss": 0.2779, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.6631016042780749, |
| "grad_norm": 3.189316511154175, |
| "learning_rate": 9.086729568078006e-06, |
| "loss": 0.2813, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.6737967914438503, |
| "grad_norm": 3.725550651550293, |
| "learning_rate": 9.05321014193089e-06, |
| "loss": 0.284, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.6844919786096256, |
| "grad_norm": 3.6915953159332275, |
| "learning_rate": 9.019150810219376e-06, |
| "loss": 0.2646, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.6951871657754011, |
| "grad_norm": 3.499095916748047, |
| "learning_rate": 8.984556109797484e-06, |
| "loss": 0.2693, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.7058823529411765, |
| "grad_norm": 3.671689510345459, |
| "learning_rate": 8.949430648832716e-06, |
| "loss": 0.2662, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.7165775401069518, |
| "grad_norm": 3.549403429031372, |
| "learning_rate": 8.91377910619223e-06, |
| "loss": 0.2745, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.7272727272727273, |
| "grad_norm": 3.2107975482940674, |
| "learning_rate": 8.8776062308196e-06, |
| "loss": 0.2462, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.7379679144385026, |
| "grad_norm": 3.040982246398926, |
| "learning_rate": 8.84091684110223e-06, |
| "loss": 0.2663, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.7486631016042781, |
| "grad_norm": 3.460141658782959, |
| "learning_rate": 8.803715824229525e-06, |
| "loss": 0.2595, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.7593582887700535, |
| "grad_norm": 3.5115394592285156, |
| "learning_rate": 8.766008135541896e-06, |
| "loss": 0.2476, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.7700534759358288, |
| "grad_norm": 3.2345032691955566, |
| "learning_rate": 8.727798797870688e-06, |
| "loss": 0.2399, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.7807486631016043, |
| "grad_norm": 3.489104986190796, |
| "learning_rate": 8.689092900869112e-06, |
| "loss": 0.2568, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.7914438502673797, |
| "grad_norm": 3.4822070598602295, |
| "learning_rate": 8.649895600334284e-06, |
| "loss": 0.2779, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.8021390374331551, |
| "grad_norm": 3.6014904975891113, |
| "learning_rate": 8.610212117520453e-06, |
| "loss": 0.2738, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.8128342245989305, |
| "grad_norm": 3.3353493213653564, |
| "learning_rate": 8.570047738443502e-06, |
| "loss": 0.2564, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.8235294117647058, |
| "grad_norm": 3.1294198036193848, |
| "learning_rate": 8.52940781317683e-06, |
| "loss": 0.2459, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.8342245989304813, |
| "grad_norm": 3.2122206687927246, |
| "learning_rate": 8.48829775513869e-06, |
| "loss": 0.2639, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.8449197860962567, |
| "grad_norm": 3.2475640773773193, |
| "learning_rate": 8.446723040371114e-06, |
| "loss": 0.2545, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.8556149732620321, |
| "grad_norm": 3.113609552383423, |
| "learning_rate": 8.40468920681047e-06, |
| "loss": 0.2712, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.8663101604278075, |
| "grad_norm": 3.231935501098633, |
| "learning_rate": 8.362201853549777e-06, |
| "loss": 0.27, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.8770053475935828, |
| "grad_norm": 3.3826744556427, |
| "learning_rate": 8.319266640092899e-06, |
| "loss": 0.2823, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.8877005347593583, |
| "grad_norm": 3.3691983222961426, |
| "learning_rate": 8.275889285600656e-06, |
| "loss": 0.2555, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.8983957219251337, |
| "grad_norm": 3.4460556507110596, |
| "learning_rate": 8.23207556812902e-06, |
| "loss": 0.2615, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.9090909090909091, |
| "grad_norm": 3.0713820457458496, |
| "learning_rate": 8.187831323859445e-06, |
| "loss": 0.2512, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.9197860962566845, |
| "grad_norm": 2.781736373901367, |
| "learning_rate": 8.143162446321465e-06, |
| "loss": 0.2568, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.93048128342246, |
| "grad_norm": 3.0802292823791504, |
| "learning_rate": 8.098074885607646e-06, |
| "loss": 0.2506, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.9411764705882353, |
| "grad_norm": 3.0498411655426025, |
| "learning_rate": 8.052574647581009e-06, |
| "loss": 0.2586, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.9518716577540107, |
| "grad_norm": 3.0598437786102295, |
| "learning_rate": 8.006667793075026e-06, |
| "loss": 0.2576, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.9625668449197861, |
| "grad_norm": 2.935920238494873, |
| "learning_rate": 7.960360437086287e-06, |
| "loss": 0.2363, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.9732620320855615, |
| "grad_norm": 3.2348110675811768, |
| "learning_rate": 7.91365874795995e-06, |
| "loss": 0.2677, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.983957219251337, |
| "grad_norm": 3.4138996601104736, |
| "learning_rate": 7.866568946568107e-06, |
| "loss": 0.2702, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.9946524064171123, |
| "grad_norm": 2.949201822280884, |
| "learning_rate": 7.819097305481112e-06, |
| "loss": 0.2487, |
| "step": 465 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 1404, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.619093694675681e+17, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|