| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9995220557226798, | |
| "eval_steps": 500, | |
| "global_step": 1111, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00899659816132025, | |
| "grad_norm": 9.407082207126406, | |
| "learning_rate": 1.323529411764706e-06, | |
| "loss": 1.1112, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.0179931963226405, | |
| "grad_norm": 1.6078560053885997, | |
| "learning_rate": 2.7941176470588237e-06, | |
| "loss": 0.9362, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.02698979448396075, | |
| "grad_norm": 1.624171273563677, | |
| "learning_rate": 4.264705882352942e-06, | |
| "loss": 0.8806, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.035986392645281, | |
| "grad_norm": 1.4554360159842772, | |
| "learning_rate": 4.9997341046993195e-06, | |
| "loss": 0.8947, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.04498299080660125, | |
| "grad_norm": 1.4128774633791414, | |
| "learning_rate": 4.997607281643338e-06, | |
| "loss": 0.8764, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.0539795889679215, | |
| "grad_norm": 1.4582318570520234, | |
| "learning_rate": 4.993355445074358e-06, | |
| "loss": 0.8758, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.06297618712924176, | |
| "grad_norm": 1.241568669359911, | |
| "learning_rate": 4.986982212538754e-06, | |
| "loss": 0.8839, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.071972785290562, | |
| "grad_norm": 1.4053494360834542, | |
| "learning_rate": 4.978493006508408e-06, | |
| "loss": 0.8741, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.08096938345188226, | |
| "grad_norm": 1.4737910013351594, | |
| "learning_rate": 4.967895049767168e-06, | |
| "loss": 0.8859, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.0899659816132025, | |
| "grad_norm": 1.3138356543765093, | |
| "learning_rate": 4.9551973592655565e-06, | |
| "loss": 0.8506, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.09896257977452276, | |
| "grad_norm": 1.3758428201688702, | |
| "learning_rate": 4.940410738448974e-06, | |
| "loss": 0.9021, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.107959177935843, | |
| "grad_norm": 1.4334286224262116, | |
| "learning_rate": 4.923547768065916e-06, | |
| "loss": 0.8752, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.11695577609716326, | |
| "grad_norm": 1.3787955824383673, | |
| "learning_rate": 4.904622795464018e-06, | |
| "loss": 0.8824, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.12595237425848352, | |
| "grad_norm": 1.3627983383165039, | |
| "learning_rate": 4.883651922383059e-06, | |
| "loss": 0.8519, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.13494897241980378, | |
| "grad_norm": 1.3424633268190644, | |
| "learning_rate": 4.860652991255274e-06, | |
| "loss": 0.8565, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.143945570581124, | |
| "grad_norm": 1.4227597124829214, | |
| "learning_rate": 4.835645570024666e-06, | |
| "loss": 0.8884, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.15294216874244426, | |
| "grad_norm": 1.3745439024877604, | |
| "learning_rate": 4.808650935498216e-06, | |
| "loss": 0.8394, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.16193876690376452, | |
| "grad_norm": 1.1913982454389247, | |
| "learning_rate": 4.779692055243149e-06, | |
| "loss": 0.8542, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.17093536506508478, | |
| "grad_norm": 1.3183554482150048, | |
| "learning_rate": 4.748793568045682e-06, | |
| "loss": 0.8633, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.179931963226405, | |
| "grad_norm": 1.4397694519607327, | |
| "learning_rate": 4.715981762947854e-06, | |
| "loss": 0.8532, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.18892856138772526, | |
| "grad_norm": 1.2038269967391835, | |
| "learning_rate": 4.681284556880294e-06, | |
| "loss": 0.8544, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.19792515954904552, | |
| "grad_norm": 1.2701132880820947, | |
| "learning_rate": 4.6447314709099436e-06, | |
| "loss": 0.8463, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.20692175771036578, | |
| "grad_norm": 1.3356581457961003, | |
| "learning_rate": 4.606353605122954e-06, | |
| "loss": 0.8652, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.215918355871686, | |
| "grad_norm": 1.3140257601689191, | |
| "learning_rate": 4.566183612164116e-06, | |
| "loss": 0.8737, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.22491495403300626, | |
| "grad_norm": 1.2150684493886468, | |
| "learning_rate": 4.52425566945535e-06, | |
| "loss": 0.8448, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.23391155219432652, | |
| "grad_norm": 1.3356045800197514, | |
| "learning_rate": 4.480605450116879e-06, | |
| "loss": 0.8531, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.24290815035564678, | |
| "grad_norm": 1.3621827029728837, | |
| "learning_rate": 4.435270092615835e-06, | |
| "loss": 0.8569, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.25190474851696704, | |
| "grad_norm": 1.2958362488368202, | |
| "learning_rate": 4.388288169168121e-06, | |
| "loss": 0.8559, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.2609013466782873, | |
| "grad_norm": 1.315904667574477, | |
| "learning_rate": 4.339699652920407e-06, | |
| "loss": 0.8487, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.26989794483960755, | |
| "grad_norm": 1.192500405781859, | |
| "learning_rate": 4.28954588394019e-06, | |
| "loss": 0.8427, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.27889454300092775, | |
| "grad_norm": 1.3463647117462414, | |
| "learning_rate": 4.237869534042848e-06, | |
| "loss": 0.8645, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.287891141162248, | |
| "grad_norm": 1.302446463869839, | |
| "learning_rate": 4.184714570485619e-06, | |
| "loss": 0.8739, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.29688773932356827, | |
| "grad_norm": 1.2998957392856052, | |
| "learning_rate": 4.130126218559396e-06, | |
| "loss": 0.8388, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.3058843374848885, | |
| "grad_norm": 1.1895515530331358, | |
| "learning_rate": 4.074150923110149e-06, | |
| "loss": 0.8344, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.3148809356462088, | |
| "grad_norm": 1.2291112546963492, | |
| "learning_rate": 4.0168363090227425e-06, | |
| "loss": 0.8489, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.32387753380752904, | |
| "grad_norm": 1.2187350361306888, | |
| "learning_rate": 3.958231140700742e-06, | |
| "loss": 0.8827, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.3328741319688493, | |
| "grad_norm": 1.2251085843679796, | |
| "learning_rate": 3.898385280576696e-06, | |
| "loss": 0.8257, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.34187073013016955, | |
| "grad_norm": 1.2708814495416274, | |
| "learning_rate": 3.8373496466881986e-06, | |
| "loss": 0.8586, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.35086732829148976, | |
| "grad_norm": 1.3212728062854284, | |
| "learning_rate": 3.775176169355816e-06, | |
| "loss": 0.8633, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.35986392645281, | |
| "grad_norm": 1.3153556245818046, | |
| "learning_rate": 3.7119177469997506e-06, | |
| "loss": 0.8385, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.36886052461413027, | |
| "grad_norm": 1.2072741564966885, | |
| "learning_rate": 3.647628201132818e-06, | |
| "loss": 0.8167, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.3778571227754505, | |
| "grad_norm": 1.2379563543143086, | |
| "learning_rate": 3.582362230568044e-06, | |
| "loss": 0.8331, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.3868537209367708, | |
| "grad_norm": 1.2879168901856428, | |
| "learning_rate": 3.5161753648798367e-06, | |
| "loss": 0.8337, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.39585031909809104, | |
| "grad_norm": 1.29505065565257, | |
| "learning_rate": 3.449123917158331e-06, | |
| "loss": 0.8371, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.4048469172594113, | |
| "grad_norm": 1.2295083553559263, | |
| "learning_rate": 3.3812649360970988e-06, | |
| "loss": 0.8368, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.41384351542073156, | |
| "grad_norm": 1.3554176223589234, | |
| "learning_rate": 3.3126561574549975e-06, | |
| "loss": 0.8322, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.4228401135820518, | |
| "grad_norm": 1.3079414538272678, | |
| "learning_rate": 3.2433559549334475e-06, | |
| "loss": 0.8295, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.431836711743372, | |
| "grad_norm": 1.2635231121274326, | |
| "learning_rate": 3.173423290510937e-06, | |
| "loss": 0.8343, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.44083330990469227, | |
| "grad_norm": 1.3150644499613324, | |
| "learning_rate": 3.102917664277007e-06, | |
| "loss": 0.8548, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.44982990806601253, | |
| "grad_norm": 1.366414847286227, | |
| "learning_rate": 3.0318990638084055e-06, | |
| "loss": 0.8558, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.4588265062273328, | |
| "grad_norm": 1.2798875567653052, | |
| "learning_rate": 2.9604279131304685e-06, | |
| "loss": 0.8441, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.46782310438865304, | |
| "grad_norm": 1.2205525459157436, | |
| "learning_rate": 2.8885650213071746e-06, | |
| "loss": 0.8346, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.4768197025499733, | |
| "grad_norm": 1.3131391162470558, | |
| "learning_rate": 2.8163715307035897e-06, | |
| "loss": 0.8363, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.48581630071129356, | |
| "grad_norm": 1.2817420417534302, | |
| "learning_rate": 2.743908864964741e-06, | |
| "loss": 0.8637, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.4948128988726138, | |
| "grad_norm": 1.4117777334581971, | |
| "learning_rate": 2.6712386767551663e-06, | |
| "loss": 0.8439, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.5038094970339341, | |
| "grad_norm": 1.3316023582639032, | |
| "learning_rate": 2.5984227953036124e-06, | |
| "loss": 0.8485, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.5128060951952543, | |
| "grad_norm": 1.220300819321901, | |
| "learning_rate": 2.52552317379751e-06, | |
| "loss": 0.8279, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.5218026933565746, | |
| "grad_norm": 1.2405242839005477, | |
| "learning_rate": 2.452601836671977e-06, | |
| "loss": 0.8744, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.5307992915178948, | |
| "grad_norm": 1.160142751072927, | |
| "learning_rate": 2.3797208268382096e-06, | |
| "loss": 0.817, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.5397958896792151, | |
| "grad_norm": 1.2558513559088742, | |
| "learning_rate": 2.3069421528961493e-06, | |
| "loss": 0.8454, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.5487924878405352, | |
| "grad_norm": 1.2495369982031077, | |
| "learning_rate": 2.2343277363763437e-06, | |
| "loss": 0.821, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.5577890860018555, | |
| "grad_norm": 1.2645404439096872, | |
| "learning_rate": 2.1619393590558857e-06, | |
| "loss": 0.8444, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.5667856841631758, | |
| "grad_norm": 1.2701388082173177, | |
| "learning_rate": 2.0898386103932634e-06, | |
| "loss": 0.8296, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.575782282324496, | |
| "grad_norm": 1.2620244980098876, | |
| "learning_rate": 2.018086835126831e-06, | |
| "loss": 0.8425, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.5847788804858163, | |
| "grad_norm": 1.1594661341735868, | |
| "learning_rate": 1.9467450810814984e-06, | |
| "loss": 0.8356, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.5937754786471365, | |
| "grad_norm": 1.2189222560662132, | |
| "learning_rate": 1.8758740472280372e-06, | |
| "loss": 0.8123, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.6027720768084568, | |
| "grad_norm": 1.2444702547045237, | |
| "learning_rate": 1.8055340320392002e-06, | |
| "loss": 0.8124, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.611768674969777, | |
| "grad_norm": 1.2315293692576161, | |
| "learning_rate": 1.7357848821865914e-06, | |
| "loss": 0.8469, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.6207652731310973, | |
| "grad_norm": 1.2995895579660914, | |
| "learning_rate": 1.6666859416219384e-06, | |
| "loss": 0.8171, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.6297618712924176, | |
| "grad_norm": 1.2091823608533048, | |
| "learning_rate": 1.5982960010860882e-06, | |
| "loss": 0.8105, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.6387584694537378, | |
| "grad_norm": 1.2705740309640616, | |
| "learning_rate": 1.530673248088687e-06, | |
| "loss": 0.8342, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.6477550676150581, | |
| "grad_norm": 1.2981521802148184, | |
| "learning_rate": 1.463875217401099e-06, | |
| "loss": 0.8715, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.6567516657763783, | |
| "grad_norm": 1.1925093116586027, | |
| "learning_rate": 1.397958742104687e-06, | |
| "loss": 0.8355, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.6657482639376986, | |
| "grad_norm": 1.1985043813628813, | |
| "learning_rate": 1.3329799052361094e-06, | |
| "loss": 0.8148, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.6747448620990188, | |
| "grad_norm": 1.1010299588672359, | |
| "learning_rate": 1.2689939920707667e-06, | |
| "loss": 0.8229, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.6837414602603391, | |
| "grad_norm": 1.2430718303897152, | |
| "learning_rate": 1.2060554430849951e-06, | |
| "loss": 0.8409, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.6927380584216594, | |
| "grad_norm": 1.269761061806062, | |
| "learning_rate": 1.1442178076370426e-06, | |
| "loss": 0.8258, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.7017346565829795, | |
| "grad_norm": 1.157246567841852, | |
| "learning_rate": 1.0835336984062122e-06, | |
| "loss": 0.8162, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.7107312547442998, | |
| "grad_norm": 1.2172574409120116, | |
| "learning_rate": 1.0240547466289646e-06, | |
| "loss": 0.808, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.71972785290562, | |
| "grad_norm": 1.2870129001272486, | |
| "learning_rate": 9.65831558170037e-07, | |
| "loss": 0.8314, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.7287244510669403, | |
| "grad_norm": 1.2970054704276492, | |
| "learning_rate": 9.089136704659823e-07, | |
| "loss": 0.8437, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.7377210492282605, | |
| "grad_norm": 1.1999856118846624, | |
| "learning_rate": 8.533495103777367e-07, | |
| "loss": 0.8052, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.7467176473895808, | |
| "grad_norm": 1.233946813798178, | |
| "learning_rate": 7.991863529880936e-07, | |
| "loss": 0.7951, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.755714245550901, | |
| "grad_norm": 1.2668438696715465, | |
| "learning_rate": 7.464702813791308e-07, | |
| "loss": 0.8461, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.7647108437122213, | |
| "grad_norm": 1.2263597703226994, | |
| "learning_rate": 6.952461474238182e-07, | |
| "loss": 0.8045, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.7737074418735416, | |
| "grad_norm": 1.2325614523967854, | |
| "learning_rate": 6.455575336251549e-07, | |
| "loss": 0.8562, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.7827040400348618, | |
| "grad_norm": 1.243498384623771, | |
| "learning_rate": 5.974467160353226e-07, | |
| "loss": 0.8345, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.7917006381961821, | |
| "grad_norm": 1.158311860753632, | |
| "learning_rate": 5.509546282863806e-07, | |
| "loss": 0.8344, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.8006972363575023, | |
| "grad_norm": 1.2163583553306607, | |
| "learning_rate": 5.061208267631315e-07, | |
| "loss": 0.8256, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.8096938345188226, | |
| "grad_norm": 1.2102463242463388, | |
| "learning_rate": 4.629834569477626e-07, | |
| "loss": 0.8427, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.8186904326801429, | |
| "grad_norm": 1.1969681545587425, | |
| "learning_rate": 4.21579220964923e-07, | |
| "loss": 0.8195, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.8276870308414631, | |
| "grad_norm": 1.2573696493635405, | |
| "learning_rate": 3.8194334635482777e-07, | |
| "loss": 0.8277, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.8366836290027834, | |
| "grad_norm": 1.2979421497920385, | |
| "learning_rate": 3.4410955610097745e-07, | |
| "loss": 0.811, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.8456802271641036, | |
| "grad_norm": 1.1946033424208522, | |
| "learning_rate": 3.0811003993797327e-07, | |
| "loss": 0.8187, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.8546768253254238, | |
| "grad_norm": 1.1959972915578303, | |
| "learning_rate": 2.7397542696386145e-07, | |
| "loss": 0.8352, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.863673423486744, | |
| "grad_norm": 1.2612864041438945, | |
| "learning_rate": 2.4173475958028855e-07, | |
| "loss": 0.8076, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.8726700216480643, | |
| "grad_norm": 1.2393073900072233, | |
| "learning_rate": 2.1141546878265696e-07, | |
| "loss": 0.8357, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.8816666198093845, | |
| "grad_norm": 1.3961098525762, | |
| "learning_rate": 1.8304335082129032e-07, | |
| "loss": 0.8375, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.8906632179707048, | |
| "grad_norm": 1.2304674879812914, | |
| "learning_rate": 1.566425452534784e-07, | |
| "loss": 0.8482, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.8996598161320251, | |
| "grad_norm": 1.1275632452975384, | |
| "learning_rate": 1.3223551440506244e-07, | |
| "loss": 0.8334, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.9086564142933453, | |
| "grad_norm": 1.1697390212239938, | |
| "learning_rate": 1.0984302425904869e-07, | |
| "loss": 0.8218, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.9176530124546656, | |
| "grad_norm": 1.1549729040350385, | |
| "learning_rate": 8.94841267874974e-08, | |
| "loss": 0.8242, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.9266496106159858, | |
| "grad_norm": 1.2550852449415948, | |
| "learning_rate": 7.117614374173353e-08, | |
| "loss": 0.8276, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.9356462087773061, | |
| "grad_norm": 1.1789618223129013, | |
| "learning_rate": 5.493465191465458e-08, | |
| "loss": 0.8399, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.9446428069386263, | |
| "grad_norm": 1.308555979964644, | |
| "learning_rate": 4.0773469887692154e-08, | |
| "loss": 0.8231, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.9536394050999466, | |
| "grad_norm": 1.1941396262140982, | |
| "learning_rate": 2.8704646273687298e-08, | |
| "loss": 0.8089, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.9626360032612669, | |
| "grad_norm": 1.2336678582691885, | |
| "learning_rate": 1.873844946569614e-08, | |
| "loss": 0.8546, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.9716326014225871, | |
| "grad_norm": 1.2123027368688872, | |
| "learning_rate": 1.0883358900435626e-08, | |
| "loss": 0.8213, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.9806291995839074, | |
| "grad_norm": 1.101205104982958, | |
| "learning_rate": 5.146057843814223e-09, | |
| "loss": 0.8261, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.9896257977452276, | |
| "grad_norm": 1.138150004760349, | |
| "learning_rate": 1.531427704675459e-09, | |
| "loss": 0.8431, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.9986223959065479, | |
| "grad_norm": 1.1694006998873614, | |
| "learning_rate": 4.25438816009649e-11, | |
| "loss": 0.8082, | |
| "step": 1110 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 1111, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 600, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 145966987345920.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |