| { |
| "best_global_step": 5950, |
| "best_metric": 0.7729265244559362, |
| "best_model_checkpoint": "pseudo_model/checkpoint-5950", |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 5950, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.008403361344537815, |
| "grad_norm": 70.3099365234375, |
| "learning_rate": 1.991764705882353e-05, |
| "loss": 0.0639, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.01680672268907563, |
| "grad_norm": 0.251952201128006, |
| "learning_rate": 1.983361344537815e-05, |
| "loss": 0.1373, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.025210084033613446, |
| "grad_norm": 54.500118255615234, |
| "learning_rate": 1.9749579831932776e-05, |
| "loss": 0.187, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.03361344537815126, |
| "grad_norm": 17.24431037902832, |
| "learning_rate": 1.9665546218487398e-05, |
| "loss": 0.1268, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.04201680672268908, |
| "grad_norm": 0.7904719114303589, |
| "learning_rate": 1.958151260504202e-05, |
| "loss": 0.1147, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.05042016806722689, |
| "grad_norm": 15.803120613098145, |
| "learning_rate": 1.949747899159664e-05, |
| "loss": 0.0939, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.058823529411764705, |
| "grad_norm": 21.106233596801758, |
| "learning_rate": 1.9413445378151262e-05, |
| "loss": 0.1392, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.06722689075630252, |
| "grad_norm": 4.552457809448242, |
| "learning_rate": 1.9329411764705883e-05, |
| "loss": 0.0754, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.07563025210084033, |
| "grad_norm": 59.192100524902344, |
| "learning_rate": 1.9245378151260505e-05, |
| "loss": 0.0864, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.08403361344537816, |
| "grad_norm": 24.423683166503906, |
| "learning_rate": 1.9161344537815126e-05, |
| "loss": 0.1462, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.09243697478991597, |
| "grad_norm": 20.817564010620117, |
| "learning_rate": 1.907731092436975e-05, |
| "loss": 0.1025, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.10084033613445378, |
| "grad_norm": 16.254493713378906, |
| "learning_rate": 1.8993277310924372e-05, |
| "loss": 0.1588, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.1092436974789916, |
| "grad_norm": 43.414573669433594, |
| "learning_rate": 1.8909243697478993e-05, |
| "loss": 0.0737, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.11764705882352941, |
| "grad_norm": 25.70012664794922, |
| "learning_rate": 1.8825210084033615e-05, |
| "loss": 0.0638, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.12605042016806722, |
| "grad_norm": 4.720924377441406, |
| "learning_rate": 1.8741176470588236e-05, |
| "loss": 0.1043, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.13445378151260504, |
| "grad_norm": 57.87378692626953, |
| "learning_rate": 1.8657142857142858e-05, |
| "loss": 0.1188, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.14285714285714285, |
| "grad_norm": 23.307783126831055, |
| "learning_rate": 1.857310924369748e-05, |
| "loss": 0.1161, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.15126050420168066, |
| "grad_norm": 44.87445831298828, |
| "learning_rate": 1.8489075630252104e-05, |
| "loss": 0.0917, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.15966386554621848, |
| "grad_norm": 14.766839981079102, |
| "learning_rate": 1.8405042016806725e-05, |
| "loss": 0.1296, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.16806722689075632, |
| "grad_norm": 8.999948501586914, |
| "learning_rate": 1.8321008403361346e-05, |
| "loss": 0.0966, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.17647058823529413, |
| "grad_norm": 27.70500946044922, |
| "learning_rate": 1.8236974789915968e-05, |
| "loss": 0.0847, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.18487394957983194, |
| "grad_norm": 45.68607711791992, |
| "learning_rate": 1.815294117647059e-05, |
| "loss": 0.0774, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.19327731092436976, |
| "grad_norm": 9.21766185760498, |
| "learning_rate": 1.806890756302521e-05, |
| "loss": 0.0832, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.20168067226890757, |
| "grad_norm": 84.41997528076172, |
| "learning_rate": 1.7984873949579832e-05, |
| "loss": 0.1061, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.21008403361344538, |
| "grad_norm": 11.963436126708984, |
| "learning_rate": 1.7900840336134457e-05, |
| "loss": 0.1119, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.2184873949579832, |
| "grad_norm": 30.294477462768555, |
| "learning_rate": 1.7816806722689078e-05, |
| "loss": 0.089, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.226890756302521, |
| "grad_norm": 0.17758768796920776, |
| "learning_rate": 1.77327731092437e-05, |
| "loss": 0.0671, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.23529411764705882, |
| "grad_norm": 0.24741335213184357, |
| "learning_rate": 1.764873949579832e-05, |
| "loss": 0.0875, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.24369747899159663, |
| "grad_norm": 79.4989013671875, |
| "learning_rate": 1.7564705882352942e-05, |
| "loss": 0.139, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.25210084033613445, |
| "grad_norm": 40.87698745727539, |
| "learning_rate": 1.7480672268907564e-05, |
| "loss": 0.0666, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.2605042016806723, |
| "grad_norm": 4.265988826751709, |
| "learning_rate": 1.7396638655462185e-05, |
| "loss": 0.0957, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.2689075630252101, |
| "grad_norm": 31.657917022705078, |
| "learning_rate": 1.7312605042016806e-05, |
| "loss": 0.1013, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.2773109243697479, |
| "grad_norm": 48.78037643432617, |
| "learning_rate": 1.722857142857143e-05, |
| "loss": 0.0721, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.2857142857142857, |
| "grad_norm": 0.1120712086558342, |
| "learning_rate": 1.7144537815126052e-05, |
| "loss": 0.0865, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.29411764705882354, |
| "grad_norm": 0.8699374198913574, |
| "learning_rate": 1.7060504201680674e-05, |
| "loss": 0.0779, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.3025210084033613, |
| "grad_norm": 64.08531951904297, |
| "learning_rate": 1.6976470588235295e-05, |
| "loss": 0.0938, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.31092436974789917, |
| "grad_norm": 36.72616195678711, |
| "learning_rate": 1.6892436974789917e-05, |
| "loss": 0.068, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.31932773109243695, |
| "grad_norm": 24.38673973083496, |
| "learning_rate": 1.6808403361344538e-05, |
| "loss": 0.0667, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.3277310924369748, |
| "grad_norm": 4.634633541107178, |
| "learning_rate": 1.672436974789916e-05, |
| "loss": 0.1098, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.33613445378151263, |
| "grad_norm": 0.005323654506355524, |
| "learning_rate": 1.6640336134453784e-05, |
| "loss": 0.0578, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.3445378151260504, |
| "grad_norm": 30.870010375976562, |
| "learning_rate": 1.6556302521008405e-05, |
| "loss": 0.0702, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.35294117647058826, |
| "grad_norm": 0.3597383201122284, |
| "learning_rate": 1.6472268907563027e-05, |
| "loss": 0.087, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.36134453781512604, |
| "grad_norm": 43.77628707885742, |
| "learning_rate": 1.6388235294117648e-05, |
| "loss": 0.0865, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.3697478991596639, |
| "grad_norm": 5.36348819732666, |
| "learning_rate": 1.630420168067227e-05, |
| "loss": 0.1211, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.37815126050420167, |
| "grad_norm": 5.006982326507568, |
| "learning_rate": 1.622016806722689e-05, |
| "loss": 0.0772, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.3865546218487395, |
| "grad_norm": 0.21585534512996674, |
| "learning_rate": 1.6136134453781512e-05, |
| "loss": 0.0873, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.3949579831932773, |
| "grad_norm": 5.9591193199157715, |
| "learning_rate": 1.6052100840336137e-05, |
| "loss": 0.0678, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.40336134453781514, |
| "grad_norm": 5.950344085693359, |
| "learning_rate": 1.596806722689076e-05, |
| "loss": 0.0583, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.4117647058823529, |
| "grad_norm": 62.205570220947266, |
| "learning_rate": 1.588403361344538e-05, |
| "loss": 0.0799, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.42016806722689076, |
| "grad_norm": 11.344050407409668, |
| "learning_rate": 1.58e-05, |
| "loss": 0.0925, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.42857142857142855, |
| "grad_norm": 20.887693405151367, |
| "learning_rate": 1.5715966386554623e-05, |
| "loss": 0.0515, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.4369747899159664, |
| "grad_norm": 4.148672580718994, |
| "learning_rate": 1.5631932773109244e-05, |
| "loss": 0.0836, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.44537815126050423, |
| "grad_norm": 0.019104987382888794, |
| "learning_rate": 1.5547899159663865e-05, |
| "loss": 0.0663, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.453781512605042, |
| "grad_norm": 55.08736038208008, |
| "learning_rate": 1.5463865546218487e-05, |
| "loss": 0.0573, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.46218487394957986, |
| "grad_norm": 24.77686882019043, |
| "learning_rate": 1.537983193277311e-05, |
| "loss": 0.0995, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.47058823529411764, |
| "grad_norm": 0.2827483117580414, |
| "learning_rate": 1.5295798319327733e-05, |
| "loss": 0.0931, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.4789915966386555, |
| "grad_norm": 51.127052307128906, |
| "learning_rate": 1.5211764705882354e-05, |
| "loss": 0.0674, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.48739495798319327, |
| "grad_norm": 28.012380599975586, |
| "learning_rate": 1.5127731092436977e-05, |
| "loss": 0.071, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.4957983193277311, |
| "grad_norm": 62.96744155883789, |
| "learning_rate": 1.5043697478991597e-05, |
| "loss": 0.0802, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.5042016806722689, |
| "grad_norm": 24.443973541259766, |
| "learning_rate": 1.4959663865546218e-05, |
| "loss": 0.0822, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.5126050420168067, |
| "grad_norm": 0.09382598847150803, |
| "learning_rate": 1.4875630252100841e-05, |
| "loss": 0.0893, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.5210084033613446, |
| "grad_norm": 8.969447135925293, |
| "learning_rate": 1.4791596638655463e-05, |
| "loss": 0.0587, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.5294117647058824, |
| "grad_norm": 0.33123093843460083, |
| "learning_rate": 1.4707563025210086e-05, |
| "loss": 0.0662, |
| "step": 3150 |
| }, |
| { |
| "epoch": 0.5378151260504201, |
| "grad_norm": 0.34799447655677795, |
| "learning_rate": 1.4623529411764707e-05, |
| "loss": 0.0471, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.5462184873949579, |
| "grad_norm": 0.10502827167510986, |
| "learning_rate": 1.453949579831933e-05, |
| "loss": 0.0608, |
| "step": 3250 |
| }, |
| { |
| "epoch": 0.5546218487394958, |
| "grad_norm": 46.54685974121094, |
| "learning_rate": 1.4455462184873952e-05, |
| "loss": 0.0756, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.5630252100840336, |
| "grad_norm": 25.307750701904297, |
| "learning_rate": 1.4371428571428571e-05, |
| "loss": 0.0703, |
| "step": 3350 |
| }, |
| { |
| "epoch": 0.5714285714285714, |
| "grad_norm": 0.03593587502837181, |
| "learning_rate": 1.4287394957983194e-05, |
| "loss": 0.0509, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.5798319327731093, |
| "grad_norm": 6.873576641082764, |
| "learning_rate": 1.4203361344537816e-05, |
| "loss": 0.0751, |
| "step": 3450 |
| }, |
| { |
| "epoch": 0.5882352941176471, |
| "grad_norm": 39.44701385498047, |
| "learning_rate": 1.4119327731092439e-05, |
| "loss": 0.0698, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.5966386554621849, |
| "grad_norm": 0.7871583104133606, |
| "learning_rate": 1.403529411764706e-05, |
| "loss": 0.0701, |
| "step": 3550 |
| }, |
| { |
| "epoch": 0.6050420168067226, |
| "grad_norm": 0.06720981746912003, |
| "learning_rate": 1.3951260504201683e-05, |
| "loss": 0.0613, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.6134453781512605, |
| "grad_norm": 87.64744567871094, |
| "learning_rate": 1.3867226890756305e-05, |
| "loss": 0.0657, |
| "step": 3650 |
| }, |
| { |
| "epoch": 0.6218487394957983, |
| "grad_norm": 92.26810455322266, |
| "learning_rate": 1.3783193277310924e-05, |
| "loss": 0.0915, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.6302521008403361, |
| "grad_norm": 0.13392935693264008, |
| "learning_rate": 1.3699159663865547e-05, |
| "loss": 0.0552, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.6386554621848739, |
| "grad_norm": 0.2624678313732147, |
| "learning_rate": 1.3615126050420169e-05, |
| "loss": 0.061, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.6470588235294118, |
| "grad_norm": 58.510833740234375, |
| "learning_rate": 1.353109243697479e-05, |
| "loss": 0.0475, |
| "step": 3850 |
| }, |
| { |
| "epoch": 0.6554621848739496, |
| "grad_norm": 0.19354496896266937, |
| "learning_rate": 1.3447058823529413e-05, |
| "loss": 0.0423, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.6638655462184874, |
| "grad_norm": 20.584426879882812, |
| "learning_rate": 1.3363025210084035e-05, |
| "loss": 0.1135, |
| "step": 3950 |
| }, |
| { |
| "epoch": 0.6722689075630253, |
| "grad_norm": 1.4565203189849854, |
| "learning_rate": 1.3278991596638658e-05, |
| "loss": 0.0264, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.680672268907563, |
| "grad_norm": 0.0815153419971466, |
| "learning_rate": 1.3194957983193279e-05, |
| "loss": 0.0956, |
| "step": 4050 |
| }, |
| { |
| "epoch": 0.6890756302521008, |
| "grad_norm": 45.101993560791016, |
| "learning_rate": 1.3110924369747899e-05, |
| "loss": 0.0853, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.6974789915966386, |
| "grad_norm": 8.502076148986816, |
| "learning_rate": 1.3026890756302522e-05, |
| "loss": 0.0407, |
| "step": 4150 |
| }, |
| { |
| "epoch": 0.7058823529411765, |
| "grad_norm": 0.01488969475030899, |
| "learning_rate": 1.2942857142857143e-05, |
| "loss": 0.0506, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.7142857142857143, |
| "grad_norm": 128.31639099121094, |
| "learning_rate": 1.2858823529411766e-05, |
| "loss": 0.0527, |
| "step": 4250 |
| }, |
| { |
| "epoch": 0.7226890756302521, |
| "grad_norm": 69.5967025756836, |
| "learning_rate": 1.2774789915966388e-05, |
| "loss": 0.0437, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.7310924369747899, |
| "grad_norm": 51.27083206176758, |
| "learning_rate": 1.269075630252101e-05, |
| "loss": 0.0511, |
| "step": 4350 |
| }, |
| { |
| "epoch": 0.7394957983193278, |
| "grad_norm": 1.9427380561828613, |
| "learning_rate": 1.2606722689075632e-05, |
| "loss": 0.0938, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.7478991596638656, |
| "grad_norm": 17.49405288696289, |
| "learning_rate": 1.2522689075630252e-05, |
| "loss": 0.0799, |
| "step": 4450 |
| }, |
| { |
| "epoch": 0.7563025210084033, |
| "grad_norm": 2.69028377532959, |
| "learning_rate": 1.2438655462184875e-05, |
| "loss": 0.0556, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.7647058823529411, |
| "grad_norm": 91.98591613769531, |
| "learning_rate": 1.2354621848739496e-05, |
| "loss": 0.0341, |
| "step": 4550 |
| }, |
| { |
| "epoch": 0.773109243697479, |
| "grad_norm": 37.63540267944336, |
| "learning_rate": 1.2270588235294119e-05, |
| "loss": 0.0748, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.7815126050420168, |
| "grad_norm": 24.502737045288086, |
| "learning_rate": 1.218655462184874e-05, |
| "loss": 0.0693, |
| "step": 4650 |
| }, |
| { |
| "epoch": 0.7899159663865546, |
| "grad_norm": 0.010893125087022781, |
| "learning_rate": 1.2102521008403364e-05, |
| "loss": 0.0371, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.7983193277310925, |
| "grad_norm": 39.78943634033203, |
| "learning_rate": 1.2018487394957985e-05, |
| "loss": 0.0547, |
| "step": 4750 |
| }, |
| { |
| "epoch": 0.8067226890756303, |
| "grad_norm": 21.680444717407227, |
| "learning_rate": 1.1934453781512605e-05, |
| "loss": 0.0405, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.8151260504201681, |
| "grad_norm": 91.88570404052734, |
| "learning_rate": 1.1850420168067228e-05, |
| "loss": 0.048, |
| "step": 4850 |
| }, |
| { |
| "epoch": 0.8235294117647058, |
| "grad_norm": 47.21152114868164, |
| "learning_rate": 1.1766386554621849e-05, |
| "loss": 0.0957, |
| "step": 4900 |
| }, |
| { |
| "epoch": 0.8319327731092437, |
| "grad_norm": 0.03324070945382118, |
| "learning_rate": 1.1682352941176472e-05, |
| "loss": 0.0525, |
| "step": 4950 |
| }, |
| { |
| "epoch": 0.8403361344537815, |
| "grad_norm": 0.4629770517349243, |
| "learning_rate": 1.1598319327731094e-05, |
| "loss": 0.0682, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.8487394957983193, |
| "grad_norm": 45.92827224731445, |
| "learning_rate": 1.1514285714285715e-05, |
| "loss": 0.0424, |
| "step": 5050 |
| }, |
| { |
| "epoch": 0.8571428571428571, |
| "grad_norm": 32.47571563720703, |
| "learning_rate": 1.1430252100840338e-05, |
| "loss": 0.0794, |
| "step": 5100 |
| }, |
| { |
| "epoch": 0.865546218487395, |
| "grad_norm": 17.017562866210938, |
| "learning_rate": 1.134621848739496e-05, |
| "loss": 0.0436, |
| "step": 5150 |
| }, |
| { |
| "epoch": 0.8739495798319328, |
| "grad_norm": 2.4690027236938477, |
| "learning_rate": 1.1262184873949579e-05, |
| "loss": 0.0618, |
| "step": 5200 |
| }, |
| { |
| "epoch": 0.8823529411764706, |
| "grad_norm": 91.57916259765625, |
| "learning_rate": 1.1178151260504202e-05, |
| "loss": 0.0628, |
| "step": 5250 |
| }, |
| { |
| "epoch": 0.8907563025210085, |
| "grad_norm": 0.11016395688056946, |
| "learning_rate": 1.1094117647058823e-05, |
| "loss": 0.076, |
| "step": 5300 |
| }, |
| { |
| "epoch": 0.8991596638655462, |
| "grad_norm": 42.38503646850586, |
| "learning_rate": 1.1010084033613447e-05, |
| "loss": 0.0759, |
| "step": 5350 |
| }, |
| { |
| "epoch": 0.907563025210084, |
| "grad_norm": 4.813717365264893, |
| "learning_rate": 1.0926050420168068e-05, |
| "loss": 0.0349, |
| "step": 5400 |
| }, |
| { |
| "epoch": 0.9159663865546218, |
| "grad_norm": 0.016100173816084862, |
| "learning_rate": 1.0842016806722691e-05, |
| "loss": 0.0506, |
| "step": 5450 |
| }, |
| { |
| "epoch": 0.9243697478991597, |
| "grad_norm": 0.07725575566291809, |
| "learning_rate": 1.0757983193277312e-05, |
| "loss": 0.0581, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.9327731092436975, |
| "grad_norm": 69.73644256591797, |
| "learning_rate": 1.0673949579831932e-05, |
| "loss": 0.0552, |
| "step": 5550 |
| }, |
| { |
| "epoch": 0.9411764705882353, |
| "grad_norm": 0.0879245325922966, |
| "learning_rate": 1.0589915966386555e-05, |
| "loss": 0.059, |
| "step": 5600 |
| }, |
| { |
| "epoch": 0.9495798319327731, |
| "grad_norm": 0.06376896053552628, |
| "learning_rate": 1.0505882352941176e-05, |
| "loss": 0.046, |
| "step": 5650 |
| }, |
| { |
| "epoch": 0.957983193277311, |
| "grad_norm": 98.75856018066406, |
| "learning_rate": 1.04218487394958e-05, |
| "loss": 0.0319, |
| "step": 5700 |
| }, |
| { |
| "epoch": 0.9663865546218487, |
| "grad_norm": 0.07460715621709824, |
| "learning_rate": 1.0337815126050421e-05, |
| "loss": 0.0349, |
| "step": 5750 |
| }, |
| { |
| "epoch": 0.9747899159663865, |
| "grad_norm": 88.64122772216797, |
| "learning_rate": 1.0253781512605044e-05, |
| "loss": 0.0577, |
| "step": 5800 |
| }, |
| { |
| "epoch": 0.9831932773109243, |
| "grad_norm": 0.0315723791718483, |
| "learning_rate": 1.0169747899159665e-05, |
| "loss": 0.0655, |
| "step": 5850 |
| }, |
| { |
| "epoch": 0.9915966386554622, |
| "grad_norm": 74.6689682006836, |
| "learning_rate": 1.0085714285714288e-05, |
| "loss": 0.0457, |
| "step": 5900 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.06222504749894142, |
| "learning_rate": 1.0001680672268908e-05, |
| "loss": 0.0391, |
| "step": 5950 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_f1": 0.7729265244559362, |
| "eval_loss": 1.5595977306365967, |
| "eval_runtime": 0.0922, |
| "eval_samples_per_second": 2364.65, |
| "eval_steps_per_second": 43.388, |
| "step": 5950 |
| } |
| ], |
| "logging_steps": 50, |
| "max_steps": 11900, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 8495403899188410.0, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|