| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 222, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0045045045045045045, | |
| "grad_norm": 0.8704756498336792, | |
| "learning_rate": 7.142857142857143e-07, | |
| "loss": 0.31640625, | |
| "step": 1, | |
| "token_acc": 0.901016281855159 | |
| }, | |
| { | |
| "epoch": 0.009009009009009009, | |
| "grad_norm": 0.867336094379425, | |
| "learning_rate": 1.4285714285714286e-06, | |
| "loss": 0.34814453125, | |
| "step": 2, | |
| "token_acc": 0.8902718604179347 | |
| }, | |
| { | |
| "epoch": 0.013513513513513514, | |
| "grad_norm": 0.8713582158088684, | |
| "learning_rate": 2.1428571428571427e-06, | |
| "loss": 0.404052734375, | |
| "step": 3, | |
| "token_acc": 0.8748633439589399 | |
| }, | |
| { | |
| "epoch": 0.018018018018018018, | |
| "grad_norm": 1.0100644826889038, | |
| "learning_rate": 2.8571428571428573e-06, | |
| "loss": 0.3514404296875, | |
| "step": 4, | |
| "token_acc": 0.8913569321533923 | |
| }, | |
| { | |
| "epoch": 0.02252252252252252, | |
| "grad_norm": 0.7727848887443542, | |
| "learning_rate": 3.5714285714285718e-06, | |
| "loss": 0.337158203125, | |
| "step": 5, | |
| "token_acc": 0.8953797468354431 | |
| }, | |
| { | |
| "epoch": 0.02702702702702703, | |
| "grad_norm": 0.8298861980438232, | |
| "learning_rate": 4.2857142857142855e-06, | |
| "loss": 0.376220703125, | |
| "step": 6, | |
| "token_acc": 0.8836344783462395 | |
| }, | |
| { | |
| "epoch": 0.03153153153153153, | |
| "grad_norm": 0.8222936391830444, | |
| "learning_rate": 5e-06, | |
| "loss": 0.40087890625, | |
| "step": 7, | |
| "token_acc": 0.8779121330766402 | |
| }, | |
| { | |
| "epoch": 0.036036036036036036, | |
| "grad_norm": 0.762649655342102, | |
| "learning_rate": 4.999733114418726e-06, | |
| "loss": 0.385498046875, | |
| "step": 8, | |
| "token_acc": 0.8799979877251233 | |
| }, | |
| { | |
| "epoch": 0.04054054054054054, | |
| "grad_norm": 0.8142457604408264, | |
| "learning_rate": 4.998932514657232e-06, | |
| "loss": 0.334716796875, | |
| "step": 9, | |
| "token_acc": 0.8947475570032574 | |
| }, | |
| { | |
| "epoch": 0.04504504504504504, | |
| "grad_norm": 1.020268201828003, | |
| "learning_rate": 4.997598371650346e-06, | |
| "loss": 0.395263671875, | |
| "step": 10, | |
| "token_acc": 0.8798358928868437 | |
| }, | |
| { | |
| "epoch": 0.04954954954954955, | |
| "grad_norm": 1.1546354293823242, | |
| "learning_rate": 4.995730970248893e-06, | |
| "loss": 0.3499755859375, | |
| "step": 11, | |
| "token_acc": 0.8920448295669816 | |
| }, | |
| { | |
| "epoch": 0.05405405405405406, | |
| "grad_norm": 0.9397904872894287, | |
| "learning_rate": 4.993330709158879e-06, | |
| "loss": 0.3543701171875, | |
| "step": 12, | |
| "token_acc": 0.8902679534039811 | |
| }, | |
| { | |
| "epoch": 0.05855855855855856, | |
| "grad_norm": 0.957818329334259, | |
| "learning_rate": 4.990398100856367e-06, | |
| "loss": 0.33349609375, | |
| "step": 13, | |
| "token_acc": 0.897339547629675 | |
| }, | |
| { | |
| "epoch": 0.06306306306306306, | |
| "grad_norm": 1.097841501235962, | |
| "learning_rate": 4.986933771478052e-06, | |
| "loss": 0.3480224609375, | |
| "step": 14, | |
| "token_acc": 0.8923554684891254 | |
| }, | |
| { | |
| "epoch": 0.06756756756756757, | |
| "grad_norm": 1.1507660150527954, | |
| "learning_rate": 4.982938460687583e-06, | |
| "loss": 0.3604736328125, | |
| "step": 15, | |
| "token_acc": 0.8908350807943752 | |
| }, | |
| { | |
| "epoch": 0.07207207207207207, | |
| "grad_norm": 0.8487851023674011, | |
| "learning_rate": 4.978413021517634e-06, | |
| "loss": 0.34765625, | |
| "step": 16, | |
| "token_acc": 0.8925047582349576 | |
| }, | |
| { | |
| "epoch": 0.07657657657657657, | |
| "grad_norm": 0.7968904376029968, | |
| "learning_rate": 4.973358420187776e-06, | |
| "loss": 0.318359375, | |
| "step": 17, | |
| "token_acc": 0.8994016642783518 | |
| }, | |
| { | |
| "epoch": 0.08108108108108109, | |
| "grad_norm": 0.8453375101089478, | |
| "learning_rate": 4.967775735898179e-06, | |
| "loss": 0.3338623046875, | |
| "step": 18, | |
| "token_acc": 0.8963307821629692 | |
| }, | |
| { | |
| "epoch": 0.08558558558558559, | |
| "grad_norm": 0.8234410285949707, | |
| "learning_rate": 4.961666160599198e-06, | |
| "loss": 0.344482421875, | |
| "step": 19, | |
| "token_acc": 0.8938563663904482 | |
| }, | |
| { | |
| "epoch": 0.09009009009009009, | |
| "grad_norm": 0.9092895984649658, | |
| "learning_rate": 4.955030998736876e-06, | |
| "loss": 0.323974609375, | |
| "step": 20, | |
| "token_acc": 0.8994619407758588 | |
| }, | |
| { | |
| "epoch": 0.0945945945945946, | |
| "grad_norm": 0.9384039044380188, | |
| "learning_rate": 4.947871666974438e-06, | |
| "loss": 0.40478515625, | |
| "step": 21, | |
| "token_acc": 0.8766958028536647 | |
| }, | |
| { | |
| "epoch": 0.0990990990990991, | |
| "grad_norm": 0.9205092787742615, | |
| "learning_rate": 4.940189693889819e-06, | |
| "loss": 0.32330322265625, | |
| "step": 22, | |
| "token_acc": 0.8981442098900853 | |
| }, | |
| { | |
| "epoch": 0.1036036036036036, | |
| "grad_norm": 0.9223892092704773, | |
| "learning_rate": 4.931986719649298e-06, | |
| "loss": 0.3792724609375, | |
| "step": 23, | |
| "token_acc": 0.8823543361236927 | |
| }, | |
| { | |
| "epoch": 0.10810810810810811, | |
| "grad_norm": 0.6995769739151001, | |
| "learning_rate": 4.923264495657319e-06, | |
| "loss": 0.3433837890625, | |
| "step": 24, | |
| "token_acc": 0.8946257568082592 | |
| }, | |
| { | |
| "epoch": 0.11261261261261261, | |
| "grad_norm": 1.3750187158584595, | |
| "learning_rate": 4.914024884182535e-06, | |
| "loss": 0.40557861328125, | |
| "step": 25, | |
| "token_acc": 0.878056546947765 | |
| }, | |
| { | |
| "epoch": 0.11711711711711711, | |
| "grad_norm": 1.1581752300262451, | |
| "learning_rate": 4.904269857960208e-06, | |
| "loss": 0.3367919921875, | |
| "step": 26, | |
| "token_acc": 0.8954026071562684 | |
| }, | |
| { | |
| "epoch": 0.12162162162162163, | |
| "grad_norm": 1.0632562637329102, | |
| "learning_rate": 4.894001499771015e-06, | |
| "loss": 0.358154296875, | |
| "step": 27, | |
| "token_acc": 0.8903309816311065 | |
| }, | |
| { | |
| "epoch": 0.12612612612612611, | |
| "grad_norm": 0.9011189341545105, | |
| "learning_rate": 4.883222001996352e-06, | |
| "loss": 0.3438720703125, | |
| "step": 28, | |
| "token_acc": 0.8913533999298984 | |
| }, | |
| { | |
| "epoch": 0.13063063063063063, | |
| "grad_norm": 1.0672448873519897, | |
| "learning_rate": 4.871933666150239e-06, | |
| "loss": 0.3509521484375, | |
| "step": 29, | |
| "token_acc": 0.8918955084077967 | |
| }, | |
| { | |
| "epoch": 0.13513513513513514, | |
| "grad_norm": 0.8111361861228943, | |
| "learning_rate": 4.8601389023879395e-06, | |
| "loss": 0.3671875, | |
| "step": 30, | |
| "token_acc": 0.8869003430910295 | |
| }, | |
| { | |
| "epoch": 0.13963963963963963, | |
| "grad_norm": 0.9260621666908264, | |
| "learning_rate": 4.8478402289913566e-06, | |
| "loss": 0.3306884765625, | |
| "step": 31, | |
| "token_acc": 0.8972916061574209 | |
| }, | |
| { | |
| "epoch": 0.14414414414414414, | |
| "grad_norm": 0.9479268193244934, | |
| "learning_rate": 4.835040271831371e-06, | |
| "loss": 0.35076904296875, | |
| "step": 32, | |
| "token_acc": 0.89304353165101 | |
| }, | |
| { | |
| "epoch": 0.14864864864864866, | |
| "grad_norm": 0.9695205688476562, | |
| "learning_rate": 4.821741763807186e-06, | |
| "loss": 0.360107421875, | |
| "step": 33, | |
| "token_acc": 0.8890506207294774 | |
| }, | |
| { | |
| "epoch": 0.15315315315315314, | |
| "grad_norm": 0.8806270956993103, | |
| "learning_rate": 4.807947544262838e-06, | |
| "loss": 0.37542724609375, | |
| "step": 34, | |
| "token_acc": 0.883788106662847 | |
| }, | |
| { | |
| "epoch": 0.15765765765765766, | |
| "grad_norm": 0.8483520150184631, | |
| "learning_rate": 4.793660558380969e-06, | |
| "loss": 0.359619140625, | |
| "step": 35, | |
| "token_acc": 0.8883863056108398 | |
| }, | |
| { | |
| "epoch": 0.16216216216216217, | |
| "grad_norm": 0.9061219692230225, | |
| "learning_rate": 4.7788838565540044e-06, | |
| "loss": 0.33489990234375, | |
| "step": 36, | |
| "token_acc": 0.8967343771172348 | |
| }, | |
| { | |
| "epoch": 0.16666666666666666, | |
| "grad_norm": 0.7820679545402527, | |
| "learning_rate": 4.763620593732867e-06, | |
| "loss": 0.350830078125, | |
| "step": 37, | |
| "token_acc": 0.890262264871789 | |
| }, | |
| { | |
| "epoch": 0.17117117117117117, | |
| "grad_norm": 0.9904912114143372, | |
| "learning_rate": 4.747874028753375e-06, | |
| "loss": 0.4389495849609375, | |
| "step": 38, | |
| "token_acc": 0.8688650397289224 | |
| }, | |
| { | |
| "epoch": 0.17567567567567569, | |
| "grad_norm": 0.871274471282959, | |
| "learning_rate": 4.731647523640446e-06, | |
| "loss": 0.3544921875, | |
| "step": 39, | |
| "token_acc": 0.8903172772836772 | |
| }, | |
| { | |
| "epoch": 0.18018018018018017, | |
| "grad_norm": 0.7513757348060608, | |
| "learning_rate": 4.7149445428902794e-06, | |
| "loss": 0.3541259765625, | |
| "step": 40, | |
| "token_acc": 0.8904149382414799 | |
| }, | |
| { | |
| "epoch": 0.18468468468468469, | |
| "grad_norm": 1.159681797027588, | |
| "learning_rate": 4.697768652730656e-06, | |
| "loss": 0.3160400390625, | |
| "step": 41, | |
| "token_acc": 0.9033266799733866 | |
| }, | |
| { | |
| "epoch": 0.1891891891891892, | |
| "grad_norm": 0.8280705213546753, | |
| "learning_rate": 4.68012352035952e-06, | |
| "loss": 0.3328857421875, | |
| "step": 42, | |
| "token_acc": 0.8980802792321116 | |
| }, | |
| { | |
| "epoch": 0.19369369369369369, | |
| "grad_norm": 0.8860488533973694, | |
| "learning_rate": 4.662012913161998e-06, | |
| "loss": 0.3543701171875, | |
| "step": 43, | |
| "token_acc": 0.890051324589144 | |
| }, | |
| { | |
| "epoch": 0.1981981981981982, | |
| "grad_norm": 0.8635104298591614, | |
| "learning_rate": 4.643440697906033e-06, | |
| "loss": 0.34234619140625, | |
| "step": 44, | |
| "token_acc": 0.8932650377240949 | |
| }, | |
| { | |
| "epoch": 0.20270270270270271, | |
| "grad_norm": 0.846026599407196, | |
| "learning_rate": 4.624410839916798e-06, | |
| "loss": 0.3629150390625, | |
| "step": 45, | |
| "token_acc": 0.8890571013466493 | |
| }, | |
| { | |
| "epoch": 0.2072072072072072, | |
| "grad_norm": 0.8589869737625122, | |
| "learning_rate": 4.604927402230061e-06, | |
| "loss": 0.359130859375, | |
| "step": 46, | |
| "token_acc": 0.8904908189625078 | |
| }, | |
| { | |
| "epoch": 0.21171171171171171, | |
| "grad_norm": 0.8538748025894165, | |
| "learning_rate": 4.584994544724695e-06, | |
| "loss": 0.373016357421875, | |
| "step": 47, | |
| "token_acc": 0.8841772321548235 | |
| }, | |
| { | |
| "epoch": 0.21621621621621623, | |
| "grad_norm": 0.8112604022026062, | |
| "learning_rate": 4.564616523234511e-06, | |
| "loss": 0.3341064453125, | |
| "step": 48, | |
| "token_acc": 0.8964978147002773 | |
| }, | |
| { | |
| "epoch": 0.22072072072072071, | |
| "grad_norm": 0.9030938148498535, | |
| "learning_rate": 4.543797688639596e-06, | |
| "loss": 0.407958984375, | |
| "step": 49, | |
| "token_acc": 0.8771987646635173 | |
| }, | |
| { | |
| "epoch": 0.22522522522522523, | |
| "grad_norm": 0.8748745918273926, | |
| "learning_rate": 4.522542485937369e-06, | |
| "loss": 0.37548828125, | |
| "step": 50, | |
| "token_acc": 0.8819748719843742 | |
| }, | |
| { | |
| "epoch": 0.22972972972972974, | |
| "grad_norm": 0.7620592713356018, | |
| "learning_rate": 4.500855453293532e-06, | |
| "loss": 0.31634521484375, | |
| "step": 51, | |
| "token_acc": 0.8998812351543943 | |
| }, | |
| { | |
| "epoch": 0.23423423423423423, | |
| "grad_norm": 0.9173205494880676, | |
| "learning_rate": 4.478741221073136e-06, | |
| "loss": 0.337432861328125, | |
| "step": 52, | |
| "token_acc": 0.8988951978520181 | |
| }, | |
| { | |
| "epoch": 0.23873873873873874, | |
| "grad_norm": 1.211668610572815, | |
| "learning_rate": 4.456204510851957e-06, | |
| "loss": 0.361572265625, | |
| "step": 53, | |
| "token_acc": 0.8882260347359092 | |
| }, | |
| { | |
| "epoch": 0.24324324324324326, | |
| "grad_norm": 1.1069424152374268, | |
| "learning_rate": 4.433250134408401e-06, | |
| "loss": 0.3758544921875, | |
| "step": 54, | |
| "token_acc": 0.8838936369355408 | |
| }, | |
| { | |
| "epoch": 0.24774774774774774, | |
| "grad_norm": 1.0697734355926514, | |
| "learning_rate": 4.4098829926961485e-06, | |
| "loss": 0.3668212890625, | |
| "step": 55, | |
| "token_acc": 0.8864855281237534 | |
| }, | |
| { | |
| "epoch": 0.25225225225225223, | |
| "grad_norm": 0.9282281994819641, | |
| "learning_rate": 4.386108074797757e-06, | |
| "loss": 0.3770751953125, | |
| "step": 56, | |
| "token_acc": 0.8851603202168664 | |
| }, | |
| { | |
| "epoch": 0.25675675675675674, | |
| "grad_norm": 0.7715709209442139, | |
| "learning_rate": 4.361930456859455e-06, | |
| "loss": 0.391845703125, | |
| "step": 57, | |
| "token_acc": 0.8783604400170836 | |
| }, | |
| { | |
| "epoch": 0.26126126126126126, | |
| "grad_norm": 0.9285658001899719, | |
| "learning_rate": 4.337355301007336e-06, | |
| "loss": 0.3463134765625, | |
| "step": 58, | |
| "token_acc": 0.8939376088180235 | |
| }, | |
| { | |
| "epoch": 0.26576576576576577, | |
| "grad_norm": 1.034256935119629, | |
| "learning_rate": 4.312387854245201e-06, | |
| "loss": 0.3419189453125, | |
| "step": 59, | |
| "token_acc": 0.8928964642593341 | |
| }, | |
| { | |
| "epoch": 0.2702702702702703, | |
| "grad_norm": 1.0528221130371094, | |
| "learning_rate": 4.287033447334286e-06, | |
| "loss": 0.317138671875, | |
| "step": 60, | |
| "token_acc": 0.901003937801551 | |
| }, | |
| { | |
| "epoch": 0.2747747747747748, | |
| "grad_norm": 0.8839389681816101, | |
| "learning_rate": 4.261297493655092e-06, | |
| "loss": 0.3155517578125, | |
| "step": 61, | |
| "token_acc": 0.9004427280397325 | |
| }, | |
| { | |
| "epoch": 0.27927927927927926, | |
| "grad_norm": 0.8743260502815247, | |
| "learning_rate": 4.2351854880515856e-06, | |
| "loss": 0.34619140625, | |
| "step": 62, | |
| "token_acc": 0.8932852856301954 | |
| }, | |
| { | |
| "epoch": 0.28378378378378377, | |
| "grad_norm": 0.7228661179542542, | |
| "learning_rate": 4.208703005658e-06, | |
| "loss": 0.348663330078125, | |
| "step": 63, | |
| "token_acc": 0.8947115994978206 | |
| }, | |
| { | |
| "epoch": 0.2882882882882883, | |
| "grad_norm": 0.8091106414794922, | |
| "learning_rate": 4.1818557007085e-06, | |
| "loss": 0.3548583984375, | |
| "step": 64, | |
| "token_acc": 0.8893367675466839 | |
| }, | |
| { | |
| "epoch": 0.2927927927927928, | |
| "grad_norm": 0.9842603802680969, | |
| "learning_rate": 4.154649305329959e-06, | |
| "loss": 0.34814453125, | |
| "step": 65, | |
| "token_acc": 0.8905122962458113 | |
| }, | |
| { | |
| "epoch": 0.2972972972972973, | |
| "grad_norm": 0.6639820337295532, | |
| "learning_rate": 4.12708962831809e-06, | |
| "loss": 0.33953857421875, | |
| "step": 66, | |
| "token_acc": 0.8963516432198938 | |
| }, | |
| { | |
| "epoch": 0.30180180180180183, | |
| "grad_norm": 0.9688128232955933, | |
| "learning_rate": 4.099182553897228e-06, | |
| "loss": 0.34039306640625, | |
| "step": 67, | |
| "token_acc": 0.893565994838982 | |
| }, | |
| { | |
| "epoch": 0.3063063063063063, | |
| "grad_norm": 0.7545742392539978, | |
| "learning_rate": 4.070934040463999e-06, | |
| "loss": 0.341400146484375, | |
| "step": 68, | |
| "token_acc": 0.894034692141854 | |
| }, | |
| { | |
| "epoch": 0.3108108108108108, | |
| "grad_norm": 0.834723711013794, | |
| "learning_rate": 4.042350119315142e-06, | |
| "loss": 0.39501953125, | |
| "step": 69, | |
| "token_acc": 0.8793881644934805 | |
| }, | |
| { | |
| "epoch": 0.3153153153153153, | |
| "grad_norm": 0.7959160804748535, | |
| "learning_rate": 4.013436893359787e-06, | |
| "loss": 0.37982177734375, | |
| "step": 70, | |
| "token_acc": 0.8827430920972124 | |
| }, | |
| { | |
| "epoch": 0.31981981981981983, | |
| "grad_norm": 0.7727994322776794, | |
| "learning_rate": 3.984200535816427e-06, | |
| "loss": 0.3292236328125, | |
| "step": 71, | |
| "token_acc": 0.8962622549019608 | |
| }, | |
| { | |
| "epoch": 0.32432432432432434, | |
| "grad_norm": 1.1143063306808472, | |
| "learning_rate": 3.9546472888948825e-06, | |
| "loss": 0.3765869140625, | |
| "step": 72, | |
| "token_acc": 0.8862365334289776 | |
| }, | |
| { | |
| "epoch": 0.32882882882882886, | |
| "grad_norm": 0.802827775478363, | |
| "learning_rate": 3.924783462463541e-06, | |
| "loss": 0.3514404296875, | |
| "step": 73, | |
| "token_acc": 0.8915428441438926 | |
| }, | |
| { | |
| "epoch": 0.3333333333333333, | |
| "grad_norm": 0.7230685353279114, | |
| "learning_rate": 3.894615432702144e-06, | |
| "loss": 0.326416015625, | |
| "step": 74, | |
| "token_acc": 0.8988927052463204 | |
| }, | |
| { | |
| "epoch": 0.33783783783783783, | |
| "grad_norm": 0.6452018618583679, | |
| "learning_rate": 3.8641496407404165e-06, | |
| "loss": 0.344970703125, | |
| "step": 75, | |
| "token_acc": 0.8912708883025505 | |
| }, | |
| { | |
| "epoch": 0.34234234234234234, | |
| "grad_norm": 0.8161414265632629, | |
| "learning_rate": 3.833392591282838e-06, | |
| "loss": 0.352294921875, | |
| "step": 76, | |
| "token_acc": 0.8910519555745838 | |
| }, | |
| { | |
| "epoch": 0.34684684684684686, | |
| "grad_norm": 0.9082568287849426, | |
| "learning_rate": 3.802350851219826e-06, | |
| "loss": 0.31591796875, | |
| "step": 77, | |
| "token_acc": 0.903437424998 | |
| }, | |
| { | |
| "epoch": 0.35135135135135137, | |
| "grad_norm": 0.8428945541381836, | |
| "learning_rate": 3.771031048225653e-06, | |
| "loss": 0.3326416015625, | |
| "step": 78, | |
| "token_acc": 0.8960439385497094 | |
| }, | |
| { | |
| "epoch": 0.35585585585585583, | |
| "grad_norm": 0.7270176410675049, | |
| "learning_rate": 3.7394398693433798e-06, | |
| "loss": 0.34326171875, | |
| "step": 79, | |
| "token_acc": 0.8919539041909978 | |
| }, | |
| { | |
| "epoch": 0.36036036036036034, | |
| "grad_norm": 0.9635636210441589, | |
| "learning_rate": 3.70758405955712e-06, | |
| "loss": 0.324951171875, | |
| "step": 80, | |
| "token_acc": 0.8983449519816702 | |
| }, | |
| { | |
| "epoch": 0.36486486486486486, | |
| "grad_norm": 0.7542657256126404, | |
| "learning_rate": 3.675470420351921e-06, | |
| "loss": 0.363525390625, | |
| "step": 81, | |
| "token_acc": 0.8872009164518733 | |
| }, | |
| { | |
| "epoch": 0.36936936936936937, | |
| "grad_norm": 0.8671934008598328, | |
| "learning_rate": 3.6431058082615966e-06, | |
| "loss": 0.31597900390625, | |
| "step": 82, | |
| "token_acc": 0.9009948542024013 | |
| }, | |
| { | |
| "epoch": 0.3738738738738739, | |
| "grad_norm": 1.0081602334976196, | |
| "learning_rate": 3.6104971334047954e-06, | |
| "loss": 0.3199462890625, | |
| "step": 83, | |
| "token_acc": 0.8997704785831705 | |
| }, | |
| { | |
| "epoch": 0.3783783783783784, | |
| "grad_norm": 1.0129483938217163, | |
| "learning_rate": 3.5776513580096316e-06, | |
| "loss": 0.3701171875, | |
| "step": 84, | |
| "token_acc": 0.88471046201057 | |
| }, | |
| { | |
| "epoch": 0.38288288288288286, | |
| "grad_norm": 0.8052798509597778, | |
| "learning_rate": 3.5445754949271925e-06, | |
| "loss": 0.3421630859375, | |
| "step": 85, | |
| "token_acc": 0.8927845528455285 | |
| }, | |
| { | |
| "epoch": 0.38738738738738737, | |
| "grad_norm": 0.8934619426727295, | |
| "learning_rate": 3.5112766061342346e-06, | |
| "loss": 0.3060302734375, | |
| "step": 86, | |
| "token_acc": 0.9026350477994344 | |
| }, | |
| { | |
| "epoch": 0.3918918918918919, | |
| "grad_norm": 0.6580748558044434, | |
| "learning_rate": 3.47776180122539e-06, | |
| "loss": 0.359619140625, | |
| "step": 87, | |
| "token_acc": 0.8894399733815792 | |
| }, | |
| { | |
| "epoch": 0.3963963963963964, | |
| "grad_norm": 0.8089192509651184, | |
| "learning_rate": 3.4440382358952116e-06, | |
| "loss": 0.3321533203125, | |
| "step": 88, | |
| "token_acc": 0.8961488812392426 | |
| }, | |
| { | |
| "epoch": 0.4009009009009009, | |
| "grad_norm": 0.8350925445556641, | |
| "learning_rate": 3.4101131104103664e-06, | |
| "loss": 0.3330078125, | |
| "step": 89, | |
| "token_acc": 0.8948240976437569 | |
| }, | |
| { | |
| "epoch": 0.40540540540540543, | |
| "grad_norm": 0.8865199089050293, | |
| "learning_rate": 3.3759936680723238e-06, | |
| "loss": 0.3404541015625, | |
| "step": 90, | |
| "token_acc": 0.8960275592341626 | |
| }, | |
| { | |
| "epoch": 0.4099099099099099, | |
| "grad_norm": 0.9881960153579712, | |
| "learning_rate": 3.341687193670844e-06, | |
| "loss": 0.4093017578125, | |
| "step": 91, | |
| "token_acc": 0.8754743340444411 | |
| }, | |
| { | |
| "epoch": 0.4144144144144144, | |
| "grad_norm": 0.6525192856788635, | |
| "learning_rate": 3.3072010119286156e-06, | |
| "loss": 0.343994140625, | |
| "step": 92, | |
| "token_acc": 0.8952931153825675 | |
| }, | |
| { | |
| "epoch": 0.4189189189189189, | |
| "grad_norm": 0.8146042823791504, | |
| "learning_rate": 3.272542485937369e-06, | |
| "loss": 0.32464599609375, | |
| "step": 93, | |
| "token_acc": 0.8996927651139742 | |
| }, | |
| { | |
| "epoch": 0.42342342342342343, | |
| "grad_norm": 0.7997490763664246, | |
| "learning_rate": 3.237719015585787e-06, | |
| "loss": 0.3258056640625, | |
| "step": 94, | |
| "token_acc": 0.8986706407255705 | |
| }, | |
| { | |
| "epoch": 0.42792792792792794, | |
| "grad_norm": 0.7047693133354187, | |
| "learning_rate": 3.202738035979571e-06, | |
| "loss": 0.3316650390625, | |
| "step": 95, | |
| "token_acc": 0.8959871462508255 | |
| }, | |
| { | |
| "epoch": 0.43243243243243246, | |
| "grad_norm": 0.6673498749732971, | |
| "learning_rate": 3.167607015853983e-06, | |
| "loss": 0.35711669921875, | |
| "step": 96, | |
| "token_acc": 0.8914680520593433 | |
| }, | |
| { | |
| "epoch": 0.4369369369369369, | |
| "grad_norm": 0.9044120907783508, | |
| "learning_rate": 3.132333455979202e-06, | |
| "loss": 0.29888916015625, | |
| "step": 97, | |
| "token_acc": 0.9081531160305777 | |
| }, | |
| { | |
| "epoch": 0.44144144144144143, | |
| "grad_norm": 0.6456412672996521, | |
| "learning_rate": 3.0969248875588547e-06, | |
| "loss": 0.3343048095703125, | |
| "step": 98, | |
| "token_acc": 0.8963899986716667 | |
| }, | |
| { | |
| "epoch": 0.44594594594594594, | |
| "grad_norm": 0.8044923543930054, | |
| "learning_rate": 3.0613888706220336e-06, | |
| "loss": 0.3668212890625, | |
| "step": 99, | |
| "token_acc": 0.8872929258325789 | |
| }, | |
| { | |
| "epoch": 0.45045045045045046, | |
| "grad_norm": 0.8912515044212341, | |
| "learning_rate": 3.025732992409166e-06, | |
| "loss": 0.3749237060546875, | |
| "step": 100, | |
| "token_acc": 0.8863157390554651 | |
| }, | |
| { | |
| "epoch": 0.45495495495495497, | |
| "grad_norm": 0.9178464412689209, | |
| "learning_rate": 2.989964865752079e-06, | |
| "loss": 0.31494140625, | |
| "step": 101, | |
| "token_acc": 0.9031758326878389 | |
| }, | |
| { | |
| "epoch": 0.4594594594594595, | |
| "grad_norm": 0.7799769639968872, | |
| "learning_rate": 2.9540921274485913e-06, | |
| "loss": 0.316070556640625, | |
| "step": 102, | |
| "token_acc": 0.9010862029119482 | |
| }, | |
| { | |
| "epoch": 0.46396396396396394, | |
| "grad_norm": 0.9977706670761108, | |
| "learning_rate": 2.9181224366319947e-06, | |
| "loss": 0.335693359375, | |
| "step": 103, | |
| "token_acc": 0.8947431320117716 | |
| }, | |
| { | |
| "epoch": 0.46846846846846846, | |
| "grad_norm": 1.2078821659088135, | |
| "learning_rate": 2.882063473135763e-06, | |
| "loss": 0.4012451171875, | |
| "step": 104, | |
| "token_acc": 0.8755390944456183 | |
| }, | |
| { | |
| "epoch": 0.47297297297297297, | |
| "grad_norm": 0.9138604998588562, | |
| "learning_rate": 2.845922935853841e-06, | |
| "loss": 0.31298828125, | |
| "step": 105, | |
| "token_acc": 0.9045985448046975 | |
| }, | |
| { | |
| "epoch": 0.4774774774774775, | |
| "grad_norm": 0.9735301733016968, | |
| "learning_rate": 2.80970854109687e-06, | |
| "loss": 0.312744140625, | |
| "step": 106, | |
| "token_acc": 0.9002951864614082 | |
| }, | |
| { | |
| "epoch": 0.481981981981982, | |
| "grad_norm": 0.8821303844451904, | |
| "learning_rate": 2.773428020944687e-06, | |
| "loss": 0.316650390625, | |
| "step": 107, | |
| "token_acc": 0.9018892572372231 | |
| }, | |
| { | |
| "epoch": 0.4864864864864865, | |
| "grad_norm": 0.7590733766555786, | |
| "learning_rate": 2.7370891215954572e-06, | |
| "loss": 0.362060546875, | |
| "step": 108, | |
| "token_acc": 0.8878953544431608 | |
| }, | |
| { | |
| "epoch": 0.49099099099099097, | |
| "grad_norm": 1.1650851964950562, | |
| "learning_rate": 2.7006996017118033e-06, | |
| "loss": 0.349365234375, | |
| "step": 109, | |
| "token_acc": 0.8941975079040357 | |
| }, | |
| { | |
| "epoch": 0.4954954954954955, | |
| "grad_norm": 0.8107997179031372, | |
| "learning_rate": 2.6642672307642575e-06, | |
| "loss": 0.3140869140625, | |
| "step": 110, | |
| "token_acc": 0.9023156522104909 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 0.9272815585136414, | |
| "learning_rate": 2.627799787372418e-06, | |
| "loss": 0.33642578125, | |
| "step": 111, | |
| "token_acc": 0.8949056603773585 | |
| }, | |
| { | |
| "epoch": 0.5045045045045045, | |
| "grad_norm": 0.8578754663467407, | |
| "learning_rate": 2.591305057644148e-06, | |
| "loss": 0.34564208984375, | |
| "step": 112, | |
| "token_acc": 0.8929581334646075 | |
| }, | |
| { | |
| "epoch": 0.509009009009009, | |
| "grad_norm": 0.8179190754890442, | |
| "learning_rate": 2.5547908335131704e-06, | |
| "loss": 0.3253173828125, | |
| "step": 113, | |
| "token_acc": 0.8980514754137483 | |
| }, | |
| { | |
| "epoch": 0.5135135135135135, | |
| "grad_norm": 0.937614917755127, | |
| "learning_rate": 2.5182649110754325e-06, | |
| "loss": 0.30389404296875, | |
| "step": 114, | |
| "token_acc": 0.9068263329281652 | |
| }, | |
| { | |
| "epoch": 0.5180180180180181, | |
| "grad_norm": 0.9342759251594543, | |
| "learning_rate": 2.4817350889245675e-06, | |
| "loss": 0.362548828125, | |
| "step": 115, | |
| "token_acc": 0.8870322400184855 | |
| }, | |
| { | |
| "epoch": 0.5225225225225225, | |
| "grad_norm": 0.7444611191749573, | |
| "learning_rate": 2.44520916648683e-06, | |
| "loss": 0.36907958984375, | |
| "step": 116, | |
| "token_acc": 0.8857218663533336 | |
| }, | |
| { | |
| "epoch": 0.527027027027027, | |
| "grad_norm": 0.9588095545768738, | |
| "learning_rate": 2.408694942355853e-06, | |
| "loss": 0.38671875, | |
| "step": 117, | |
| "token_acc": 0.8824941943771926 | |
| }, | |
| { | |
| "epoch": 0.5315315315315315, | |
| "grad_norm": 0.9178715348243713, | |
| "learning_rate": 2.3722002126275826e-06, | |
| "loss": 0.34375, | |
| "step": 118, | |
| "token_acc": 0.8941754417885328 | |
| }, | |
| { | |
| "epoch": 0.536036036036036, | |
| "grad_norm": 0.8871239423751831, | |
| "learning_rate": 2.3357327692357434e-06, | |
| "loss": 0.3515625, | |
| "step": 119, | |
| "token_acc": 0.8914658528885615 | |
| }, | |
| { | |
| "epoch": 0.5405405405405406, | |
| "grad_norm": 0.7818154096603394, | |
| "learning_rate": 2.2993003982881976e-06, | |
| "loss": 0.30548095703125, | |
| "step": 120, | |
| "token_acc": 0.9041648931367617 | |
| }, | |
| { | |
| "epoch": 0.545045045045045, | |
| "grad_norm": 0.7676111459732056, | |
| "learning_rate": 2.262910878404544e-06, | |
| "loss": 0.2769775390625, | |
| "step": 121, | |
| "token_acc": 0.9139403881271473 | |
| }, | |
| { | |
| "epoch": 0.5495495495495496, | |
| "grad_norm": 0.9711487889289856, | |
| "learning_rate": 2.2265719790553147e-06, | |
| "loss": 0.3883056640625, | |
| "step": 122, | |
| "token_acc": 0.8804856085838586 | |
| }, | |
| { | |
| "epoch": 0.5540540540540541, | |
| "grad_norm": 0.8610119223594666, | |
| "learning_rate": 2.19029145890313e-06, | |
| "loss": 0.3262939453125, | |
| "step": 123, | |
| "token_acc": 0.8978232618583496 | |
| }, | |
| { | |
| "epoch": 0.5585585585585585, | |
| "grad_norm": 0.8020825386047363, | |
| "learning_rate": 2.154077064146159e-06, | |
| "loss": 0.3035888671875, | |
| "step": 124, | |
| "token_acc": 0.9039610929800985 | |
| }, | |
| { | |
| "epoch": 0.5630630630630631, | |
| "grad_norm": 0.9404456615447998, | |
| "learning_rate": 2.1179365268642377e-06, | |
| "loss": 0.32861328125, | |
| "step": 125, | |
| "token_acc": 0.8969406998672906 | |
| }, | |
| { | |
| "epoch": 0.5675675675675675, | |
| "grad_norm": 0.9896916747093201, | |
| "learning_rate": 2.0818775633680057e-06, | |
| "loss": 0.36248779296875, | |
| "step": 126, | |
| "token_acc": 0.8876241534257928 | |
| }, | |
| { | |
| "epoch": 0.5720720720720721, | |
| "grad_norm": 0.738699734210968, | |
| "learning_rate": 2.045907872551409e-06, | |
| "loss": 0.3084716796875, | |
| "step": 127, | |
| "token_acc": 0.9047011482971833 | |
| }, | |
| { | |
| "epoch": 0.5765765765765766, | |
| "grad_norm": 0.987114429473877, | |
| "learning_rate": 2.010035134247922e-06, | |
| "loss": 0.3638916015625, | |
| "step": 128, | |
| "token_acc": 0.8856382245027449 | |
| }, | |
| { | |
| "epoch": 0.581081081081081, | |
| "grad_norm": 0.8161911368370056, | |
| "learning_rate": 1.9742670075908353e-06, | |
| "loss": 0.3726806640625, | |
| "step": 129, | |
| "token_acc": 0.8862522268667838 | |
| }, | |
| { | |
| "epoch": 0.5855855855855856, | |
| "grad_norm": 1.0528193712234497, | |
| "learning_rate": 1.9386111293779673e-06, | |
| "loss": 0.3369140625, | |
| "step": 130, | |
| "token_acc": 0.8956112595402446 | |
| }, | |
| { | |
| "epoch": 0.5900900900900901, | |
| "grad_norm": 0.9027992486953735, | |
| "learning_rate": 1.903075112441145e-06, | |
| "loss": 0.3206787109375, | |
| "step": 131, | |
| "token_acc": 0.9009896596103493 | |
| }, | |
| { | |
| "epoch": 0.5945945945945946, | |
| "grad_norm": 0.7341856360435486, | |
| "learning_rate": 1.8676665440207982e-06, | |
| "loss": 0.2967529296875, | |
| "step": 132, | |
| "token_acc": 0.9068318389675675 | |
| }, | |
| { | |
| "epoch": 0.5990990990990991, | |
| "grad_norm": 0.735024094581604, | |
| "learning_rate": 1.832392984146018e-06, | |
| "loss": 0.31787109375, | |
| "step": 133, | |
| "token_acc": 0.8996451126455548 | |
| }, | |
| { | |
| "epoch": 0.6036036036036037, | |
| "grad_norm": 0.9426040053367615, | |
| "learning_rate": 1.7972619640204298e-06, | |
| "loss": 0.387451171875, | |
| "step": 134, | |
| "token_acc": 0.8815383862602143 | |
| }, | |
| { | |
| "epoch": 0.6081081081081081, | |
| "grad_norm": 0.7768560647964478, | |
| "learning_rate": 1.7622809844142138e-06, | |
| "loss": 0.35296630859375, | |
| "step": 135, | |
| "token_acc": 0.8923252350367681 | |
| }, | |
| { | |
| "epoch": 0.6126126126126126, | |
| "grad_norm": 0.8278442025184631, | |
| "learning_rate": 1.7274575140626318e-06, | |
| "loss": 0.3173828125, | |
| "step": 136, | |
| "token_acc": 0.8998283663598066 | |
| }, | |
| { | |
| "epoch": 0.6171171171171171, | |
| "grad_norm": 0.8853439092636108, | |
| "learning_rate": 1.6927989880713852e-06, | |
| "loss": 0.3544158935546875, | |
| "step": 137, | |
| "token_acc": 0.8905075533139094 | |
| }, | |
| { | |
| "epoch": 0.6216216216216216, | |
| "grad_norm": 0.6611644625663757, | |
| "learning_rate": 1.6583128063291576e-06, | |
| "loss": 0.33984375, | |
| "step": 138, | |
| "token_acc": 0.8959825350762759 | |
| }, | |
| { | |
| "epoch": 0.6261261261261262, | |
| "grad_norm": 0.7384758591651917, | |
| "learning_rate": 1.6240063319276767e-06, | |
| "loss": 0.32476806640625, | |
| "step": 139, | |
| "token_acc": 0.8985073953315207 | |
| }, | |
| { | |
| "epoch": 0.6306306306306306, | |
| "grad_norm": 0.773961067199707, | |
| "learning_rate": 1.5898868895896336e-06, | |
| "loss": 0.4095458984375, | |
| "step": 140, | |
| "token_acc": 0.8739141708803319 | |
| }, | |
| { | |
| "epoch": 0.6351351351351351, | |
| "grad_norm": 0.8257731199264526, | |
| "learning_rate": 1.5559617641047886e-06, | |
| "loss": 0.3592529296875, | |
| "step": 141, | |
| "token_acc": 0.8883896425588295 | |
| }, | |
| { | |
| "epoch": 0.6396396396396397, | |
| "grad_norm": 0.787449061870575, | |
| "learning_rate": 1.5222381987746104e-06, | |
| "loss": 0.357177734375, | |
| "step": 142, | |
| "token_acc": 0.888363484849953 | |
| }, | |
| { | |
| "epoch": 0.6441441441441441, | |
| "grad_norm": 0.6886521577835083, | |
| "learning_rate": 1.488723393865766e-06, | |
| "loss": 0.360107421875, | |
| "step": 143, | |
| "token_acc": 0.8890432232736252 | |
| }, | |
| { | |
| "epoch": 0.6486486486486487, | |
| "grad_norm": 0.7612305283546448, | |
| "learning_rate": 1.4554245050728085e-06, | |
| "loss": 0.3238525390625, | |
| "step": 144, | |
| "token_acc": 0.8999899789558072 | |
| }, | |
| { | |
| "epoch": 0.6531531531531531, | |
| "grad_norm": 0.8425935506820679, | |
| "learning_rate": 1.4223486419903692e-06, | |
| "loss": 0.327606201171875, | |
| "step": 145, | |
| "token_acc": 0.8979553119730186 | |
| }, | |
| { | |
| "epoch": 0.6576576576576577, | |
| "grad_norm": 1.0973107814788818, | |
| "learning_rate": 1.389502866595206e-06, | |
| "loss": 0.319122314453125, | |
| "step": 146, | |
| "token_acc": 0.9017525376916615 | |
| }, | |
| { | |
| "epoch": 0.6621621621621622, | |
| "grad_norm": 0.7503589391708374, | |
| "learning_rate": 1.3568941917384038e-06, | |
| "loss": 0.31683349609375, | |
| "step": 147, | |
| "token_acc": 0.9008651974055598 | |
| }, | |
| { | |
| "epoch": 0.6666666666666666, | |
| "grad_norm": 0.7911334037780762, | |
| "learning_rate": 1.324529579648079e-06, | |
| "loss": 0.28924560546875, | |
| "step": 148, | |
| "token_acc": 0.9068919780898478 | |
| }, | |
| { | |
| "epoch": 0.6711711711711712, | |
| "grad_norm": 0.771323025226593, | |
| "learning_rate": 1.2924159404428804e-06, | |
| "loss": 0.373291015625, | |
| "step": 149, | |
| "token_acc": 0.884785335262904 | |
| }, | |
| { | |
| "epoch": 0.6756756756756757, | |
| "grad_norm": 0.9364131689071655, | |
| "learning_rate": 1.2605601306566206e-06, | |
| "loss": 0.3597412109375, | |
| "step": 150, | |
| "token_acc": 0.888817750535287 | |
| }, | |
| { | |
| "epoch": 0.6801801801801802, | |
| "grad_norm": 0.7087746262550354, | |
| "learning_rate": 1.2289689517743475e-06, | |
| "loss": 0.3809814453125, | |
| "step": 151, | |
| "token_acc": 0.8821097824929922 | |
| }, | |
| { | |
| "epoch": 0.6846846846846847, | |
| "grad_norm": 0.6490341424942017, | |
| "learning_rate": 1.1976491487801747e-06, | |
| "loss": 0.32562255859375, | |
| "step": 152, | |
| "token_acc": 0.8976476697136251 | |
| }, | |
| { | |
| "epoch": 0.6891891891891891, | |
| "grad_norm": 0.7317127585411072, | |
| "learning_rate": 1.1666074087171628e-06, | |
| "loss": 0.330810546875, | |
| "step": 153, | |
| "token_acc": 0.9007194922339586 | |
| }, | |
| { | |
| "epoch": 0.6936936936936937, | |
| "grad_norm": 0.7992354035377502, | |
| "learning_rate": 1.1358503592595837e-06, | |
| "loss": 0.33154296875, | |
| "step": 154, | |
| "token_acc": 0.8951166965888689 | |
| }, | |
| { | |
| "epoch": 0.6981981981981982, | |
| "grad_norm": 0.8374021053314209, | |
| "learning_rate": 1.1053845672978567e-06, | |
| "loss": 0.3441162109375, | |
| "step": 155, | |
| "token_acc": 0.8938746596418214 | |
| }, | |
| { | |
| "epoch": 0.7027027027027027, | |
| "grad_norm": 0.747456431388855, | |
| "learning_rate": 1.0752165375364593e-06, | |
| "loss": 0.3369140625, | |
| "step": 156, | |
| "token_acc": 0.8960199252801992 | |
| }, | |
| { | |
| "epoch": 0.7072072072072072, | |
| "grad_norm": 0.6646521091461182, | |
| "learning_rate": 1.0453527111051183e-06, | |
| "loss": 0.302581787109375, | |
| "step": 157, | |
| "token_acc": 0.9050437347721778 | |
| }, | |
| { | |
| "epoch": 0.7117117117117117, | |
| "grad_norm": 0.7055298686027527, | |
| "learning_rate": 1.0157994641835737e-06, | |
| "loss": 0.301239013671875, | |
| "step": 158, | |
| "token_acc": 0.9069595645412131 | |
| }, | |
| { | |
| "epoch": 0.7162162162162162, | |
| "grad_norm": 0.9243741631507874, | |
| "learning_rate": 9.865631066402138e-07, | |
| "loss": 0.35394287109375, | |
| "step": 159, | |
| "token_acc": 0.8925179495143776 | |
| }, | |
| { | |
| "epoch": 0.7207207207207207, | |
| "grad_norm": 0.8311925530433655, | |
| "learning_rate": 9.576498806848592e-07, | |
| "loss": 0.340087890625, | |
| "step": 160, | |
| "token_acc": 0.894881932249615 | |
| }, | |
| { | |
| "epoch": 0.7252252252252253, | |
| "grad_norm": 0.8588569164276123, | |
| "learning_rate": 9.290659595360019e-07, | |
| "loss": 0.3267822265625, | |
| "step": 161, | |
| "token_acc": 0.8974585635359116 | |
| }, | |
| { | |
| "epoch": 0.7297297297297297, | |
| "grad_norm": 0.7929273247718811, | |
| "learning_rate": 9.008174461027724e-07, | |
| "loss": 0.340576171875, | |
| "step": 162, | |
| "token_acc": 0.8939879994458003 | |
| }, | |
| { | |
| "epoch": 0.7342342342342343, | |
| "grad_norm": 1.2013729810714722, | |
| "learning_rate": 8.729103716819113e-07, | |
| "loss": 0.370819091796875, | |
| "step": 163, | |
| "token_acc": 0.8868560581265955 | |
| }, | |
| { | |
| "epoch": 0.7387387387387387, | |
| "grad_norm": 0.7468942403793335, | |
| "learning_rate": 8.453506946700419e-07, | |
| "loss": 0.2955322265625, | |
| "step": 164, | |
| "token_acc": 0.9082374491957181 | |
| }, | |
| { | |
| "epoch": 0.7432432432432432, | |
| "grad_norm": 1.1810017824172974, | |
| "learning_rate": 8.181442992915001e-07, | |
| "loss": 0.378662109375, | |
| "step": 165, | |
| "token_acc": 0.8825104623355918 | |
| }, | |
| { | |
| "epoch": 0.7477477477477478, | |
| "grad_norm": 0.7701326012611389, | |
| "learning_rate": 7.912969943420018e-07, | |
| "loss": 0.3447265625, | |
| "step": 166, | |
| "token_acc": 0.8957051550086613 | |
| }, | |
| { | |
| "epoch": 0.7522522522522522, | |
| "grad_norm": 0.8558163046836853, | |
| "learning_rate": 7.648145119484152e-07, | |
| "loss": 0.3280029296875, | |
| "step": 167, | |
| "token_acc": 0.89935299183094 | |
| }, | |
| { | |
| "epoch": 0.7567567567567568, | |
| "grad_norm": 0.6991153955459595, | |
| "learning_rate": 7.387025063449082e-07, | |
| "loss": 0.3521728515625, | |
| "step": 168, | |
| "token_acc": 0.8900763667218877 | |
| }, | |
| { | |
| "epoch": 0.7612612612612613, | |
| "grad_norm": 1.034488558769226, | |
| "learning_rate": 7.129665526657145e-07, | |
| "loss": 0.3106231689453125, | |
| "step": 169, | |
| "token_acc": 0.9047021491417857 | |
| }, | |
| { | |
| "epoch": 0.7657657657657657, | |
| "grad_norm": 0.733146071434021, | |
| "learning_rate": 6.876121457547996e-07, | |
| "loss": 0.310302734375, | |
| "step": 170, | |
| "token_acc": 0.9026569890363783 | |
| }, | |
| { | |
| "epoch": 0.7702702702702703, | |
| "grad_norm": 0.8405783176422119, | |
| "learning_rate": 6.626446989926652e-07, | |
| "loss": 0.3514404296875, | |
| "step": 171, | |
| "token_acc": 0.890449830708873 | |
| }, | |
| { | |
| "epoch": 0.7747747747747747, | |
| "grad_norm": 0.8425772190093994, | |
| "learning_rate": 6.380695431405453e-07, | |
| "loss": 0.361083984375, | |
| "step": 172, | |
| "token_acc": 0.8888163230910221 | |
| }, | |
| { | |
| "epoch": 0.7792792792792793, | |
| "grad_norm": 0.9122714996337891, | |
| "learning_rate": 6.138919252022435e-07, | |
| "loss": 0.3192138671875, | |
| "step": 173, | |
| "token_acc": 0.899907008200186 | |
| }, | |
| { | |
| "epoch": 0.7837837837837838, | |
| "grad_norm": 0.9160909652709961, | |
| "learning_rate": 5.901170073038523e-07, | |
| "loss": 0.3465576171875, | |
| "step": 174, | |
| "token_acc": 0.8923485456678811 | |
| }, | |
| { | |
| "epoch": 0.7882882882882883, | |
| "grad_norm": 0.7696585655212402, | |
| "learning_rate": 5.667498655916002e-07, | |
| "loss": 0.3604736328125, | |
| "step": 175, | |
| "token_acc": 0.8889116998746597 | |
| }, | |
| { | |
| "epoch": 0.7927927927927928, | |
| "grad_norm": 0.7901983261108398, | |
| "learning_rate": 5.437954891480443e-07, | |
| "loss": 0.3160400390625, | |
| "step": 176, | |
| "token_acc": 0.9012986436881038 | |
| }, | |
| { | |
| "epoch": 0.7972972972972973, | |
| "grad_norm": 0.7334163188934326, | |
| "learning_rate": 5.21258778926865e-07, | |
| "loss": 0.34423828125, | |
| "step": 177, | |
| "token_acc": 0.8941695594530863 | |
| }, | |
| { | |
| "epoch": 0.8018018018018018, | |
| "grad_norm": 0.7236452698707581, | |
| "learning_rate": 4.99144546706469e-07, | |
| "loss": 0.34686279296875, | |
| "step": 178, | |
| "token_acc": 0.8923213125253115 | |
| }, | |
| { | |
| "epoch": 0.8063063063063063, | |
| "grad_norm": 0.7739962935447693, | |
| "learning_rate": 4.774575140626317e-07, | |
| "loss": 0.349609375, | |
| "step": 179, | |
| "token_acc": 0.8923176451560731 | |
| }, | |
| { | |
| "epoch": 0.8108108108108109, | |
| "grad_norm": 0.8533901572227478, | |
| "learning_rate": 4.5620231136040414e-07, | |
| "loss": 0.332275390625, | |
| "step": 180, | |
| "token_acc": 0.8971639258853548 | |
| }, | |
| { | |
| "epoch": 0.8153153153153153, | |
| "grad_norm": 1.0738646984100342, | |
| "learning_rate": 4.3538347676548965e-07, | |
| "loss": 0.3839111328125, | |
| "step": 181, | |
| "token_acc": 0.8830640641762606 | |
| }, | |
| { | |
| "epoch": 0.8198198198198198, | |
| "grad_norm": 0.7544999122619629, | |
| "learning_rate": 4.150054552753055e-07, | |
| "loss": 0.349273681640625, | |
| "step": 182, | |
| "token_acc": 0.8917029877374043 | |
| }, | |
| { | |
| "epoch": 0.8243243243243243, | |
| "grad_norm": 0.6967670917510986, | |
| "learning_rate": 3.950725977699396e-07, | |
| "loss": 0.308349609375, | |
| "step": 183, | |
| "token_acc": 0.9040394422226454 | |
| }, | |
| { | |
| "epoch": 0.8288288288288288, | |
| "grad_norm": 1.0199168920516968, | |
| "learning_rate": 3.7558916008320263e-07, | |
| "loss": 0.3571624755859375, | |
| "step": 184, | |
| "token_acc": 0.8911331997805311 | |
| }, | |
| { | |
| "epoch": 0.8333333333333334, | |
| "grad_norm": 0.776905357837677, | |
| "learning_rate": 3.5655930209396784e-07, | |
| "loss": 0.34765625, | |
| "step": 185, | |
| "token_acc": 0.8915000262425865 | |
| }, | |
| { | |
| "epoch": 0.8378378378378378, | |
| "grad_norm": 0.9011301398277283, | |
| "learning_rate": 3.379870868380031e-07, | |
| "loss": 0.37677001953125, | |
| "step": 186, | |
| "token_acc": 0.8837367137532784 | |
| }, | |
| { | |
| "epoch": 0.8423423423423423, | |
| "grad_norm": 0.7519142627716064, | |
| "learning_rate": 3.1987647964048075e-07, | |
| "loss": 0.3240966796875, | |
| "step": 187, | |
| "token_acc": 0.8982510044906642 | |
| }, | |
| { | |
| "epoch": 0.8468468468468469, | |
| "grad_norm": 0.8182622194290161, | |
| "learning_rate": 3.022313472693447e-07, | |
| "loss": 0.31689453125, | |
| "step": 188, | |
| "token_acc": 0.8999964010221098 | |
| }, | |
| { | |
| "epoch": 0.8513513513513513, | |
| "grad_norm": 0.7286121845245361, | |
| "learning_rate": 2.850554571097211e-07, | |
| "loss": 0.3243408203125, | |
| "step": 189, | |
| "token_acc": 0.900392670157068 | |
| }, | |
| { | |
| "epoch": 0.8558558558558559, | |
| "grad_norm": 0.8014657497406006, | |
| "learning_rate": 2.6835247635955466e-07, | |
| "loss": 0.3505859375, | |
| "step": 190, | |
| "token_acc": 0.8923208057153752 | |
| }, | |
| { | |
| "epoch": 0.8603603603603603, | |
| "grad_norm": 0.8122464418411255, | |
| "learning_rate": 2.521259712466256e-07, | |
| "loss": 0.3648681640625, | |
| "step": 191, | |
| "token_acc": 0.8863166841775962 | |
| }, | |
| { | |
| "epoch": 0.8648648648648649, | |
| "grad_norm": 1.0286812782287598, | |
| "learning_rate": 2.3637940626713346e-07, | |
| "loss": 0.3612060546875, | |
| "step": 192, | |
| "token_acc": 0.8910263284894633 | |
| }, | |
| { | |
| "epoch": 0.8693693693693694, | |
| "grad_norm": 0.8132110238075256, | |
| "learning_rate": 2.2111614344599686e-07, | |
| "loss": 0.3125152587890625, | |
| "step": 193, | |
| "token_acc": 0.9036560888233949 | |
| }, | |
| { | |
| "epoch": 0.8738738738738738, | |
| "grad_norm": 0.7871835231781006, | |
| "learning_rate": 2.0633944161903147e-07, | |
| "loss": 0.3336181640625, | |
| "step": 194, | |
| "token_acc": 0.8960117156590649 | |
| }, | |
| { | |
| "epoch": 0.8783783783783784, | |
| "grad_norm": 0.8867942690849304, | |
| "learning_rate": 1.9205245573716196e-07, | |
| "loss": 0.29541015625, | |
| "step": 195, | |
| "token_acc": 0.9067175216003737 | |
| }, | |
| { | |
| "epoch": 0.8828828828828829, | |
| "grad_norm": 0.8739636540412903, | |
| "learning_rate": 1.7825823619281452e-07, | |
| "loss": 0.3477783203125, | |
| "step": 196, | |
| "token_acc": 0.8919099564915821 | |
| }, | |
| { | |
| "epoch": 0.8873873873873874, | |
| "grad_norm": 0.7999213933944702, | |
| "learning_rate": 1.649597281686302e-07, | |
| "loss": 0.32855224609375, | |
| "step": 197, | |
| "token_acc": 0.8981510210179152 | |
| }, | |
| { | |
| "epoch": 0.8918918918918919, | |
| "grad_norm": 0.8212850093841553, | |
| "learning_rate": 1.5215977100864394e-07, | |
| "loss": 0.392333984375, | |
| "step": 198, | |
| "token_acc": 0.8798142365281447 | |
| }, | |
| { | |
| "epoch": 0.8963963963963963, | |
| "grad_norm": 0.8761787414550781, | |
| "learning_rate": 1.3986109761206097e-07, | |
| "loss": 0.3126220703125, | |
| "step": 199, | |
| "token_acc": 0.9025795889471472 | |
| }, | |
| { | |
| "epoch": 0.9009009009009009, | |
| "grad_norm": 0.7608596086502075, | |
| "learning_rate": 1.2806633384976092e-07, | |
| "loss": 0.332550048828125, | |
| "step": 200, | |
| "token_acc": 0.8978598516872388 | |
| }, | |
| { | |
| "epoch": 0.9054054054054054, | |
| "grad_norm": 0.807299017906189, | |
| "learning_rate": 1.1677799800364958e-07, | |
| "loss": 0.3438720703125, | |
| "step": 201, | |
| "token_acc": 0.8938019253593564 | |
| }, | |
| { | |
| "epoch": 0.9099099099099099, | |
| "grad_norm": 0.906044065952301, | |
| "learning_rate": 1.0599850022898539e-07, | |
| "loss": 0.3448486328125, | |
| "step": 202, | |
| "token_acc": 0.8919713642686692 | |
| }, | |
| { | |
| "epoch": 0.9144144144144144, | |
| "grad_norm": 0.7889218330383301, | |
| "learning_rate": 9.573014203979241e-08, | |
| "loss": 0.28619384765625, | |
| "step": 203, | |
| "token_acc": 0.9101828456205424 | |
| }, | |
| { | |
| "epoch": 0.918918918918919, | |
| "grad_norm": 0.7564137578010559, | |
| "learning_rate": 8.597511581746626e-08, | |
| "loss": 0.3443603515625, | |
| "step": 204, | |
| "token_acc": 0.891580368127471 | |
| }, | |
| { | |
| "epoch": 0.9234234234234234, | |
| "grad_norm": 0.9052029252052307, | |
| "learning_rate": 7.673550434268123e-08, | |
| "loss": 0.30670166015625, | |
| "step": 205, | |
| "token_acc": 0.9040779490944689 | |
| }, | |
| { | |
| "epoch": 0.9279279279279279, | |
| "grad_norm": 0.8498448729515076, | |
| "learning_rate": 6.801328035070138e-08, | |
| "loss": 0.2917938232421875, | |
| "step": 206, | |
| "token_acc": 0.9076634951423842 | |
| }, | |
| { | |
| "epoch": 0.9324324324324325, | |
| "grad_norm": 0.940307080745697, | |
| "learning_rate": 5.981030611018235e-08, | |
| "loss": 0.3585205078125, | |
| "step": 207, | |
| "token_acc": 0.8883763592374977 | |
| }, | |
| { | |
| "epoch": 0.9369369369369369, | |
| "grad_norm": 0.9120367765426636, | |
| "learning_rate": 5.212833302556258e-08, | |
| "loss": 0.325927734375, | |
| "step": 208, | |
| "token_acc": 0.8974414543043994 | |
| }, | |
| { | |
| "epoch": 0.9414414414414415, | |
| "grad_norm": 1.141761064529419, | |
| "learning_rate": 4.4969001263124314e-08, | |
| "loss": 0.3285980224609375, | |
| "step": 209, | |
| "token_acc": 0.8957855034437521 | |
| }, | |
| { | |
| "epoch": 0.9459459459459459, | |
| "grad_norm": 0.8518276810646057, | |
| "learning_rate": 3.833383940080232e-08, | |
| "loss": 0.3089599609375, | |
| "step": 210, | |
| "token_acc": 0.9050746193405264 | |
| }, | |
| { | |
| "epoch": 0.9504504504504504, | |
| "grad_norm": 0.8082460761070251, | |
| "learning_rate": 3.222426410182111e-08, | |
| "loss": 0.363555908203125, | |
| "step": 211, | |
| "token_acc": 0.8873174557723192 | |
| }, | |
| { | |
| "epoch": 0.954954954954955, | |
| "grad_norm": 1.4015358686447144, | |
| "learning_rate": 2.6641579812224373e-08, | |
| "loss": 0.41107177734375, | |
| "step": 212, | |
| "token_acc": 0.8763219103737013 | |
| }, | |
| { | |
| "epoch": 0.9594594594594594, | |
| "grad_norm": 0.7203386425971985, | |
| "learning_rate": 2.1586978482366072e-08, | |
| "loss": 0.3438720703125, | |
| "step": 213, | |
| "token_acc": 0.8924183661803375 | |
| }, | |
| { | |
| "epoch": 0.963963963963964, | |
| "grad_norm": 0.8376468420028687, | |
| "learning_rate": 1.7061539312417107e-08, | |
| "loss": 0.3333740234375, | |
| "step": 214, | |
| "token_acc": 0.8958556698301764 | |
| }, | |
| { | |
| "epoch": 0.9684684684684685, | |
| "grad_norm": 0.8015531897544861, | |
| "learning_rate": 1.3066228521948221e-08, | |
| "loss": 0.35986328125, | |
| "step": 215, | |
| "token_acc": 0.8891843543006334 | |
| }, | |
| { | |
| "epoch": 0.972972972972973, | |
| "grad_norm": 0.7691966891288757, | |
| "learning_rate": 9.60189914363363e-09, | |
| "loss": 0.35791015625, | |
| "step": 216, | |
| "token_acc": 0.8902418854661981 | |
| }, | |
| { | |
| "epoch": 0.9774774774774775, | |
| "grad_norm": 0.9731364250183105, | |
| "learning_rate": 6.66929084112089e-09, | |
| "loss": 0.352081298828125, | |
| "step": 217, | |
| "token_acc": 0.8928305631356983 | |
| }, | |
| { | |
| "epoch": 0.9819819819819819, | |
| "grad_norm": 0.7616921067237854, | |
| "learning_rate": 4.269029751107489e-09, | |
| "loss": 0.3609619140625, | |
| "step": 218, | |
| "token_acc": 0.8907411504424779 | |
| }, | |
| { | |
| "epoch": 0.9864864864864865, | |
| "grad_norm": 1.2653754949569702, | |
| "learning_rate": 2.4016283496544614e-09, | |
| "loss": 0.370086669921875, | |
| "step": 219, | |
| "token_acc": 0.8855589798345022 | |
| }, | |
| { | |
| "epoch": 0.990990990990991, | |
| "grad_norm": 0.9321889281272888, | |
| "learning_rate": 1.0674853427683484e-09, | |
| "loss": 0.31292724609375, | |
| "step": 220, | |
| "token_acc": 0.9013229718149482 | |
| }, | |
| { | |
| "epoch": 0.9954954954954955, | |
| "grad_norm": 0.8537278771400452, | |
| "learning_rate": 2.668855812748561e-10, | |
| "loss": 0.325958251953125, | |
| "step": 221, | |
| "token_acc": 0.89793930894784 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.9416584372520447, | |
| "learning_rate": 0.0, | |
| "loss": 0.33929443359375, | |
| "step": 222, | |
| "token_acc": 0.8940414967192765 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 222, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 50, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.1423772948666778e+17, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |