| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.0, |
| "eval_steps": 100, |
| "global_step": 378, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.005291005291005291, |
| "grad_norm": 0.77734375, |
| "learning_rate": 0.0, |
| "loss": 2.5063, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.010582010582010581, |
| "grad_norm": 0.82421875, |
| "learning_rate": 2e-05, |
| "loss": 2.5479, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.015873015873015872, |
| "grad_norm": 0.7734375, |
| "learning_rate": 4e-05, |
| "loss": 2.458, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.021164021164021163, |
| "grad_norm": 0.8359375, |
| "learning_rate": 6e-05, |
| "loss": 2.3926, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.026455026455026454, |
| "grad_norm": 0.765625, |
| "learning_rate": 8e-05, |
| "loss": 2.0942, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.031746031746031744, |
| "grad_norm": 0.39453125, |
| "learning_rate": 0.0001, |
| "loss": 1.832, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.037037037037037035, |
| "grad_norm": 0.234375, |
| "learning_rate": 0.00012, |
| "loss": 1.559, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.042328042328042326, |
| "grad_norm": 0.1953125, |
| "learning_rate": 0.00014, |
| "loss": 1.3943, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.047619047619047616, |
| "grad_norm": 0.16015625, |
| "learning_rate": 0.00016, |
| "loss": 1.354, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.05291005291005291, |
| "grad_norm": 0.1298828125, |
| "learning_rate": 0.00018, |
| "loss": 1.3038, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.0582010582010582, |
| "grad_norm": 0.1552734375, |
| "learning_rate": 0.0002, |
| "loss": 1.2313, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.06349206349206349, |
| "grad_norm": 0.109375, |
| "learning_rate": 0.00019945652173913045, |
| "loss": 1.2218, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.06878306878306878, |
| "grad_norm": 0.11279296875, |
| "learning_rate": 0.00019891304347826087, |
| "loss": 1.1812, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.07407407407407407, |
| "grad_norm": 0.1025390625, |
| "learning_rate": 0.0001983695652173913, |
| "loss": 1.174, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.07936507936507936, |
| "grad_norm": 0.09814453125, |
| "learning_rate": 0.00019782608695652175, |
| "loss": 1.132, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.08465608465608465, |
| "grad_norm": 0.08642578125, |
| "learning_rate": 0.0001972826086956522, |
| "loss": 1.1224, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.08994708994708994, |
| "grad_norm": 0.07470703125, |
| "learning_rate": 0.00019673913043478263, |
| "loss": 1.0804, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.09523809523809523, |
| "grad_norm": 0.06396484375, |
| "learning_rate": 0.00019619565217391305, |
| "loss": 1.055, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.10052910052910052, |
| "grad_norm": 0.056640625, |
| "learning_rate": 0.0001956521739130435, |
| "loss": 1.0442, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.10582010582010581, |
| "grad_norm": 0.051025390625, |
| "learning_rate": 0.0001951086956521739, |
| "loss": 1.0217, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.1111111111111111, |
| "grad_norm": 0.05419921875, |
| "learning_rate": 0.00019456521739130434, |
| "loss": 1.0435, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.1164021164021164, |
| "grad_norm": 0.04931640625, |
| "learning_rate": 0.00019402173913043479, |
| "loss": 1.0062, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.12169312169312169, |
| "grad_norm": 0.04638671875, |
| "learning_rate": 0.00019347826086956523, |
| "loss": 1.0207, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.12698412698412698, |
| "grad_norm": 0.04541015625, |
| "learning_rate": 0.00019293478260869567, |
| "loss": 0.9658, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.13227513227513227, |
| "grad_norm": 0.042236328125, |
| "learning_rate": 0.0001923913043478261, |
| "loss": 0.9755, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.13756613756613756, |
| "grad_norm": 0.042236328125, |
| "learning_rate": 0.00019184782608695653, |
| "loss": 0.9438, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.14285714285714285, |
| "grad_norm": 0.042724609375, |
| "learning_rate": 0.00019130434782608697, |
| "loss": 0.9861, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.14814814814814814, |
| "grad_norm": 0.0390625, |
| "learning_rate": 0.00019076086956521738, |
| "loss": 0.9287, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.15343915343915343, |
| "grad_norm": 0.0380859375, |
| "learning_rate": 0.00019021739130434782, |
| "loss": 0.9081, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.15873015873015872, |
| "grad_norm": 0.039794921875, |
| "learning_rate": 0.0001896739130434783, |
| "loss": 0.9301, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.164021164021164, |
| "grad_norm": 0.03759765625, |
| "learning_rate": 0.0001891304347826087, |
| "loss": 0.8619, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.1693121693121693, |
| "grad_norm": 0.036865234375, |
| "learning_rate": 0.00018858695652173915, |
| "loss": 0.8892, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.1746031746031746, |
| "grad_norm": 0.03759765625, |
| "learning_rate": 0.00018804347826086956, |
| "loss": 0.889, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.17989417989417988, |
| "grad_norm": 0.037109375, |
| "learning_rate": 0.0001875, |
| "loss": 0.8299, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.18518518518518517, |
| "grad_norm": 0.0380859375, |
| "learning_rate": 0.00018695652173913045, |
| "loss": 0.8749, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.19047619047619047, |
| "grad_norm": 0.037109375, |
| "learning_rate": 0.0001864130434782609, |
| "loss": 0.8511, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.19576719576719576, |
| "grad_norm": 0.038818359375, |
| "learning_rate": 0.00018586956521739133, |
| "loss": 0.8657, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.20105820105820105, |
| "grad_norm": 0.039794921875, |
| "learning_rate": 0.00018532608695652174, |
| "loss": 0.8095, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.20634920634920634, |
| "grad_norm": 0.0390625, |
| "learning_rate": 0.00018478260869565218, |
| "loss": 0.8281, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.21164021164021163, |
| "grad_norm": 0.039794921875, |
| "learning_rate": 0.00018423913043478263, |
| "loss": 0.8226, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.21693121693121692, |
| "grad_norm": 0.044921875, |
| "learning_rate": 0.00018369565217391304, |
| "loss": 0.8463, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.2222222222222222, |
| "grad_norm": 0.04052734375, |
| "learning_rate": 0.00018315217391304348, |
| "loss": 0.8109, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.2275132275132275, |
| "grad_norm": 0.0380859375, |
| "learning_rate": 0.00018260869565217392, |
| "loss": 0.8052, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.2328042328042328, |
| "grad_norm": 0.038330078125, |
| "learning_rate": 0.00018206521739130437, |
| "loss": 0.7974, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.23809523809523808, |
| "grad_norm": 0.0380859375, |
| "learning_rate": 0.0001815217391304348, |
| "loss": 0.8002, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.24338624338624337, |
| "grad_norm": 0.04052734375, |
| "learning_rate": 0.00018097826086956522, |
| "loss": 0.8308, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.24867724867724866, |
| "grad_norm": 0.037109375, |
| "learning_rate": 0.00018043478260869566, |
| "loss": 0.8053, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.25396825396825395, |
| "grad_norm": 0.03955078125, |
| "learning_rate": 0.0001798913043478261, |
| "loss": 0.7827, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.25925925925925924, |
| "grad_norm": 0.0390625, |
| "learning_rate": 0.00017934782608695652, |
| "loss": 0.8008, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.26455026455026454, |
| "grad_norm": 0.038330078125, |
| "learning_rate": 0.00017880434782608696, |
| "loss": 0.752, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.2698412698412698, |
| "grad_norm": 0.039794921875, |
| "learning_rate": 0.0001782608695652174, |
| "loss": 0.7542, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.2751322751322751, |
| "grad_norm": 0.040283203125, |
| "learning_rate": 0.00017771739130434784, |
| "loss": 0.7599, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.2804232804232804, |
| "grad_norm": 0.040283203125, |
| "learning_rate": 0.00017717391304347829, |
| "loss": 0.7798, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.2857142857142857, |
| "grad_norm": 0.038330078125, |
| "learning_rate": 0.0001766304347826087, |
| "loss": 0.7674, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.291005291005291, |
| "grad_norm": 0.038818359375, |
| "learning_rate": 0.00017608695652173914, |
| "loss": 0.7321, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.2962962962962963, |
| "grad_norm": 0.04150390625, |
| "learning_rate": 0.00017554347826086956, |
| "loss": 0.7249, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.30158730158730157, |
| "grad_norm": 0.040771484375, |
| "learning_rate": 0.000175, |
| "loss": 0.7482, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.30687830687830686, |
| "grad_norm": 0.040771484375, |
| "learning_rate": 0.00017445652173913044, |
| "loss": 0.7246, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.31216931216931215, |
| "grad_norm": 0.040771484375, |
| "learning_rate": 0.00017391304347826088, |
| "loss": 0.7331, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.31746031746031744, |
| "grad_norm": 0.04296875, |
| "learning_rate": 0.00017336956521739132, |
| "loss": 0.7256, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.32275132275132273, |
| "grad_norm": 0.0400390625, |
| "learning_rate": 0.00017282608695652174, |
| "loss": 0.7171, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.328042328042328, |
| "grad_norm": 0.044189453125, |
| "learning_rate": 0.00017228260869565218, |
| "loss": 0.7364, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.3333333333333333, |
| "grad_norm": 0.041748046875, |
| "learning_rate": 0.00017173913043478262, |
| "loss": 0.708, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.3386243386243386, |
| "grad_norm": 0.04296875, |
| "learning_rate": 0.00017119565217391304, |
| "loss": 0.6989, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.3439153439153439, |
| "grad_norm": 0.043212890625, |
| "learning_rate": 0.00017065217391304348, |
| "loss": 0.6899, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.3492063492063492, |
| "grad_norm": 0.0458984375, |
| "learning_rate": 0.00017010869565217392, |
| "loss": 0.7274, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.3544973544973545, |
| "grad_norm": 0.042236328125, |
| "learning_rate": 0.00016956521739130436, |
| "loss": 0.7028, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.35978835978835977, |
| "grad_norm": 0.042724609375, |
| "learning_rate": 0.0001690217391304348, |
| "loss": 0.6927, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.36507936507936506, |
| "grad_norm": 0.047119140625, |
| "learning_rate": 0.00016847826086956522, |
| "loss": 0.6844, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.37037037037037035, |
| "grad_norm": 0.044677734375, |
| "learning_rate": 0.00016793478260869566, |
| "loss": 0.7191, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.37566137566137564, |
| "grad_norm": 0.04931640625, |
| "learning_rate": 0.0001673913043478261, |
| "loss": 0.6815, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.38095238095238093, |
| "grad_norm": 0.04931640625, |
| "learning_rate": 0.00016684782608695651, |
| "loss": 0.6975, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.3862433862433862, |
| "grad_norm": 0.046875, |
| "learning_rate": 0.00016630434782608698, |
| "loss": 0.6809, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.3915343915343915, |
| "grad_norm": 0.048583984375, |
| "learning_rate": 0.0001657608695652174, |
| "loss": 0.684, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.3968253968253968, |
| "grad_norm": 0.048583984375, |
| "learning_rate": 0.00016521739130434784, |
| "loss": 0.6843, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.4021164021164021, |
| "grad_norm": 0.045654296875, |
| "learning_rate": 0.00016467391304347828, |
| "loss": 0.6873, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.4074074074074074, |
| "grad_norm": 0.04931640625, |
| "learning_rate": 0.0001641304347826087, |
| "loss": 0.6694, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.4126984126984127, |
| "grad_norm": 0.0546875, |
| "learning_rate": 0.00016358695652173914, |
| "loss": 0.6902, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.41798941798941797, |
| "grad_norm": 0.04833984375, |
| "learning_rate": 0.00016304347826086955, |
| "loss": 0.6729, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.42328042328042326, |
| "grad_norm": 0.048583984375, |
| "learning_rate": 0.00016250000000000002, |
| "loss": 0.6822, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.42857142857142855, |
| "grad_norm": 0.052490234375, |
| "learning_rate": 0.00016195652173913046, |
| "loss": 0.6738, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.43386243386243384, |
| "grad_norm": 0.0517578125, |
| "learning_rate": 0.00016141304347826088, |
| "loss": 0.6987, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.43915343915343913, |
| "grad_norm": 0.052001953125, |
| "learning_rate": 0.00016086956521739132, |
| "loss": 0.6789, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.4444444444444444, |
| "grad_norm": 0.05126953125, |
| "learning_rate": 0.00016032608695652173, |
| "loss": 0.6736, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.4497354497354497, |
| "grad_norm": 0.053466796875, |
| "learning_rate": 0.00015978260869565217, |
| "loss": 0.6626, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.455026455026455, |
| "grad_norm": 0.059326171875, |
| "learning_rate": 0.00015923913043478262, |
| "loss": 0.6724, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.4603174603174603, |
| "grad_norm": 0.050537109375, |
| "learning_rate": 0.00015869565217391306, |
| "loss": 0.6501, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.4656084656084656, |
| "grad_norm": 0.05029296875, |
| "learning_rate": 0.0001581521739130435, |
| "loss": 0.6459, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.4708994708994709, |
| "grad_norm": 0.05224609375, |
| "learning_rate": 0.0001576086956521739, |
| "loss": 0.6841, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.47619047619047616, |
| "grad_norm": 0.052734375, |
| "learning_rate": 0.00015706521739130435, |
| "loss": 0.6659, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.48148148148148145, |
| "grad_norm": 0.0478515625, |
| "learning_rate": 0.0001565217391304348, |
| "loss": 0.6402, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.48677248677248675, |
| "grad_norm": 0.05517578125, |
| "learning_rate": 0.0001559782608695652, |
| "loss": 0.6458, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.49206349206349204, |
| "grad_norm": 0.052490234375, |
| "learning_rate": 0.00015543478260869565, |
| "loss": 0.6622, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.4973544973544973, |
| "grad_norm": 0.051513671875, |
| "learning_rate": 0.0001548913043478261, |
| "loss": 0.6696, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.5026455026455027, |
| "grad_norm": 0.050048828125, |
| "learning_rate": 0.00015434782608695654, |
| "loss": 0.6391, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.5079365079365079, |
| "grad_norm": 0.05419921875, |
| "learning_rate": 0.00015380434782608698, |
| "loss": 0.6656, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.5132275132275133, |
| "grad_norm": 0.055419921875, |
| "learning_rate": 0.0001532608695652174, |
| "loss": 0.6702, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.5185185185185185, |
| "grad_norm": 0.0517578125, |
| "learning_rate": 0.00015271739130434783, |
| "loss": 0.6746, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.5238095238095238, |
| "grad_norm": 0.054443359375, |
| "learning_rate": 0.00015217391304347827, |
| "loss": 0.675, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.5291005291005291, |
| "grad_norm": 0.0537109375, |
| "learning_rate": 0.0001516304347826087, |
| "loss": 0.6625, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.5291005291005291, |
| "eval_loss": 0.6503328680992126, |
| "eval_model_preparation_time": 0.0048, |
| "eval_runtime": 17.4703, |
| "eval_samples_per_second": 153.862, |
| "eval_steps_per_second": 9.616, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.5343915343915344, |
| "grad_norm": 0.0517578125, |
| "learning_rate": 0.00015108695652173913, |
| "loss": 0.6307, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.5396825396825397, |
| "grad_norm": 0.05322265625, |
| "learning_rate": 0.00015054347826086957, |
| "loss": 0.6433, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.544973544973545, |
| "grad_norm": 0.05810546875, |
| "learning_rate": 0.00015000000000000001, |
| "loss": 0.6398, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.5502645502645502, |
| "grad_norm": 0.0556640625, |
| "learning_rate": 0.00014945652173913046, |
| "loss": 0.6443, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.5555555555555556, |
| "grad_norm": 0.053466796875, |
| "learning_rate": 0.00014891304347826087, |
| "loss": 0.6508, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.5608465608465608, |
| "grad_norm": 0.058349609375, |
| "learning_rate": 0.0001483695652173913, |
| "loss": 0.6475, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.5661375661375662, |
| "grad_norm": 0.064453125, |
| "learning_rate": 0.00014782608695652173, |
| "loss": 0.6364, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.5714285714285714, |
| "grad_norm": 0.051513671875, |
| "learning_rate": 0.00014728260869565217, |
| "loss": 0.6281, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.5767195767195767, |
| "grad_norm": 0.051513671875, |
| "learning_rate": 0.00014673913043478264, |
| "loss": 0.6439, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.582010582010582, |
| "grad_norm": 0.05517578125, |
| "learning_rate": 0.00014619565217391305, |
| "loss": 0.613, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.5873015873015873, |
| "grad_norm": 0.05859375, |
| "learning_rate": 0.0001456521739130435, |
| "loss": 0.6611, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.5925925925925926, |
| "grad_norm": 0.0517578125, |
| "learning_rate": 0.0001451086956521739, |
| "loss": 0.6111, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.5978835978835979, |
| "grad_norm": 0.054931640625, |
| "learning_rate": 0.00014456521739130435, |
| "loss": 0.6406, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.6031746031746031, |
| "grad_norm": 0.0595703125, |
| "learning_rate": 0.0001440217391304348, |
| "loss": 0.6377, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.6084656084656085, |
| "grad_norm": 0.056396484375, |
| "learning_rate": 0.0001434782608695652, |
| "loss": 0.6266, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.6137566137566137, |
| "grad_norm": 0.055419921875, |
| "learning_rate": 0.00014293478260869567, |
| "loss": 0.6248, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.6190476190476191, |
| "grad_norm": 0.05712890625, |
| "learning_rate": 0.00014239130434782612, |
| "loss": 0.6274, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.6243386243386243, |
| "grad_norm": 0.05224609375, |
| "learning_rate": 0.00014184782608695653, |
| "loss": 0.6311, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.6296296296296297, |
| "grad_norm": 0.05126953125, |
| "learning_rate": 0.00014130434782608697, |
| "loss": 0.6017, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.6349206349206349, |
| "grad_norm": 0.056396484375, |
| "learning_rate": 0.00014076086956521739, |
| "loss": 0.6472, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.6402116402116402, |
| "grad_norm": 0.058349609375, |
| "learning_rate": 0.00014021739130434783, |
| "loss": 0.6374, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.6455026455026455, |
| "grad_norm": 0.051025390625, |
| "learning_rate": 0.00013967391304347827, |
| "loss": 0.6244, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.6507936507936508, |
| "grad_norm": 0.05615234375, |
| "learning_rate": 0.0001391304347826087, |
| "loss": 0.6681, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.656084656084656, |
| "grad_norm": 0.05517578125, |
| "learning_rate": 0.00013858695652173915, |
| "loss": 0.6318, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.6613756613756614, |
| "grad_norm": 0.0546875, |
| "learning_rate": 0.00013804347826086957, |
| "loss": 0.6431, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.6666666666666666, |
| "grad_norm": 0.0595703125, |
| "learning_rate": 0.0001375, |
| "loss": 0.6453, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.671957671957672, |
| "grad_norm": 0.0517578125, |
| "learning_rate": 0.00013695652173913045, |
| "loss": 0.6238, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.6772486772486772, |
| "grad_norm": 0.055908203125, |
| "learning_rate": 0.00013641304347826086, |
| "loss": 0.6039, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.6825396825396826, |
| "grad_norm": 0.053955078125, |
| "learning_rate": 0.0001358695652173913, |
| "loss": 0.6183, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.6878306878306878, |
| "grad_norm": 0.0556640625, |
| "learning_rate": 0.00013532608695652175, |
| "loss": 0.6786, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.6931216931216931, |
| "grad_norm": 0.0546875, |
| "learning_rate": 0.0001347826086956522, |
| "loss": 0.6242, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.6984126984126984, |
| "grad_norm": 0.05126953125, |
| "learning_rate": 0.00013423913043478263, |
| "loss": 0.6251, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.7037037037037037, |
| "grad_norm": 0.05029296875, |
| "learning_rate": 0.00013369565217391305, |
| "loss": 0.6445, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.708994708994709, |
| "grad_norm": 0.060546875, |
| "learning_rate": 0.0001331521739130435, |
| "loss": 0.6138, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.7142857142857143, |
| "grad_norm": 0.054443359375, |
| "learning_rate": 0.0001326086956521739, |
| "loss": 0.5826, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.7195767195767195, |
| "grad_norm": 0.057861328125, |
| "learning_rate": 0.00013206521739130434, |
| "loss": 0.6027, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.7248677248677249, |
| "grad_norm": 0.05419921875, |
| "learning_rate": 0.00013152173913043478, |
| "loss": 0.6296, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.7301587301587301, |
| "grad_norm": 0.055419921875, |
| "learning_rate": 0.00013097826086956523, |
| "loss": 0.6448, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.7354497354497355, |
| "grad_norm": 0.051513671875, |
| "learning_rate": 0.00013043478260869567, |
| "loss": 0.6336, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.7407407407407407, |
| "grad_norm": 0.052734375, |
| "learning_rate": 0.0001298913043478261, |
| "loss": 0.577, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.746031746031746, |
| "grad_norm": 0.051025390625, |
| "learning_rate": 0.00012934782608695652, |
| "loss": 0.6114, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.7513227513227513, |
| "grad_norm": 0.056884765625, |
| "learning_rate": 0.00012880434782608697, |
| "loss": 0.6114, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.7566137566137566, |
| "grad_norm": 0.051513671875, |
| "learning_rate": 0.00012826086956521738, |
| "loss": 0.6307, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.7619047619047619, |
| "grad_norm": 0.046630859375, |
| "learning_rate": 0.00012771739130434782, |
| "loss": 0.5979, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.7671957671957672, |
| "grad_norm": 0.05615234375, |
| "learning_rate": 0.00012717391304347826, |
| "loss": 0.6094, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.7724867724867724, |
| "grad_norm": 0.052734375, |
| "learning_rate": 0.0001266304347826087, |
| "loss": 0.6184, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.7777777777777778, |
| "grad_norm": 0.0556640625, |
| "learning_rate": 0.00012608695652173915, |
| "loss": 0.6139, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.783068783068783, |
| "grad_norm": 0.052490234375, |
| "learning_rate": 0.00012554347826086956, |
| "loss": 0.6369, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.7883597883597884, |
| "grad_norm": 0.057861328125, |
| "learning_rate": 0.000125, |
| "loss": 0.6227, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.7936507936507936, |
| "grad_norm": 0.0537109375, |
| "learning_rate": 0.00012445652173913044, |
| "loss": 0.6396, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.798941798941799, |
| "grad_norm": 0.05224609375, |
| "learning_rate": 0.00012391304347826086, |
| "loss": 0.5942, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.8042328042328042, |
| "grad_norm": 0.052001953125, |
| "learning_rate": 0.00012336956521739133, |
| "loss": 0.5837, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.8095238095238095, |
| "grad_norm": 0.054443359375, |
| "learning_rate": 0.00012282608695652174, |
| "loss": 0.5945, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.8148148148148148, |
| "grad_norm": 0.05419921875, |
| "learning_rate": 0.00012228260869565218, |
| "loss": 0.6097, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.8201058201058201, |
| "grad_norm": 0.053466796875, |
| "learning_rate": 0.00012173913043478263, |
| "loss": 0.6062, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.8253968253968254, |
| "grad_norm": 0.057373046875, |
| "learning_rate": 0.00012119565217391304, |
| "loss": 0.5891, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.8306878306878307, |
| "grad_norm": 0.052978515625, |
| "learning_rate": 0.00012065217391304348, |
| "loss": 0.5707, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.8359788359788359, |
| "grad_norm": 0.055419921875, |
| "learning_rate": 0.00012010869565217391, |
| "loss": 0.6178, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.8412698412698413, |
| "grad_norm": 0.0556640625, |
| "learning_rate": 0.00011956521739130435, |
| "loss": 0.5776, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.8465608465608465, |
| "grad_norm": 0.058349609375, |
| "learning_rate": 0.00011902173913043479, |
| "loss": 0.6096, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.8518518518518519, |
| "grad_norm": 0.05322265625, |
| "learning_rate": 0.00011847826086956522, |
| "loss": 0.6006, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.8571428571428571, |
| "grad_norm": 0.05908203125, |
| "learning_rate": 0.00011793478260869566, |
| "loss": 0.6119, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.8624338624338624, |
| "grad_norm": 0.062255859375, |
| "learning_rate": 0.0001173913043478261, |
| "loss": 0.5744, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.8677248677248677, |
| "grad_norm": 0.05712890625, |
| "learning_rate": 0.00011684782608695652, |
| "loss": 0.6051, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.873015873015873, |
| "grad_norm": 0.054443359375, |
| "learning_rate": 0.00011630434782608697, |
| "loss": 0.6083, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.8783068783068783, |
| "grad_norm": 0.05517578125, |
| "learning_rate": 0.00011576086956521739, |
| "loss": 0.5878, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.8835978835978836, |
| "grad_norm": 0.05810546875, |
| "learning_rate": 0.00011521739130434783, |
| "loss": 0.5912, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.8888888888888888, |
| "grad_norm": 0.0517578125, |
| "learning_rate": 0.00011467391304347827, |
| "loss": 0.5783, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.8941798941798942, |
| "grad_norm": 0.053955078125, |
| "learning_rate": 0.0001141304347826087, |
| "loss": 0.6034, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.8994708994708994, |
| "grad_norm": 0.053955078125, |
| "learning_rate": 0.00011358695652173914, |
| "loss": 0.5775, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.9047619047619048, |
| "grad_norm": 0.05029296875, |
| "learning_rate": 0.00011304347826086956, |
| "loss": 0.5793, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.91005291005291, |
| "grad_norm": 0.05712890625, |
| "learning_rate": 0.00011250000000000001, |
| "loss": 0.5688, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.9153439153439153, |
| "grad_norm": 0.051025390625, |
| "learning_rate": 0.00011195652173913045, |
| "loss": 0.6247, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.9206349206349206, |
| "grad_norm": 0.055419921875, |
| "learning_rate": 0.00011141304347826087, |
| "loss": 0.5996, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.9259259259259259, |
| "grad_norm": 0.0556640625, |
| "learning_rate": 0.00011086956521739131, |
| "loss": 0.5958, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.9312169312169312, |
| "grad_norm": 0.054443359375, |
| "learning_rate": 0.00011032608695652174, |
| "loss": 0.5934, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.9365079365079365, |
| "grad_norm": 0.058837890625, |
| "learning_rate": 0.00010978260869565218, |
| "loss": 0.5922, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.9417989417989417, |
| "grad_norm": 0.05712890625, |
| "learning_rate": 0.00010923913043478262, |
| "loss": 0.5832, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.9470899470899471, |
| "grad_norm": 0.0556640625, |
| "learning_rate": 0.00010869565217391305, |
| "loss": 0.5876, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.9523809523809523, |
| "grad_norm": 0.055419921875, |
| "learning_rate": 0.00010815217391304349, |
| "loss": 0.5922, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.9576719576719577, |
| "grad_norm": 0.056640625, |
| "learning_rate": 0.0001076086956521739, |
| "loss": 0.5832, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.9629629629629629, |
| "grad_norm": 0.05517578125, |
| "learning_rate": 0.00010706521739130435, |
| "loss": 0.5972, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.9682539682539683, |
| "grad_norm": 0.0546875, |
| "learning_rate": 0.0001065217391304348, |
| "loss": 0.5676, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.9735449735449735, |
| "grad_norm": 0.05712890625, |
| "learning_rate": 0.00010597826086956521, |
| "loss": 0.598, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.9788359788359788, |
| "grad_norm": 0.05712890625, |
| "learning_rate": 0.00010543478260869566, |
| "loss": 0.5837, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.9841269841269841, |
| "grad_norm": 0.05322265625, |
| "learning_rate": 0.0001048913043478261, |
| "loss": 0.6075, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.9894179894179894, |
| "grad_norm": 0.05615234375, |
| "learning_rate": 0.00010434782608695653, |
| "loss": 0.6048, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.9947089947089947, |
| "grad_norm": 0.0546875, |
| "learning_rate": 0.00010380434782608697, |
| "loss": 0.5913, |
| "step": 188 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.0517578125, |
| "learning_rate": 0.00010326086956521738, |
| "loss": 0.5679, |
| "step": 189 |
| }, |
| { |
| "epoch": 1.0052910052910053, |
| "grad_norm": 0.052001953125, |
| "learning_rate": 0.00010271739130434784, |
| "loss": 0.57, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.0105820105820107, |
| "grad_norm": 0.056396484375, |
| "learning_rate": 0.00010217391304347828, |
| "loss": 0.5678, |
| "step": 191 |
| }, |
| { |
| "epoch": 1.0158730158730158, |
| "grad_norm": 0.055419921875, |
| "learning_rate": 0.0001016304347826087, |
| "loss": 0.5514, |
| "step": 192 |
| }, |
| { |
| "epoch": 1.0211640211640212, |
| "grad_norm": 0.052001953125, |
| "learning_rate": 0.00010108695652173914, |
| "loss": 0.558, |
| "step": 193 |
| }, |
| { |
| "epoch": 1.0264550264550265, |
| "grad_norm": 0.0556640625, |
| "learning_rate": 0.00010054347826086956, |
| "loss": 0.5788, |
| "step": 194 |
| }, |
| { |
| "epoch": 1.0317460317460316, |
| "grad_norm": 0.056640625, |
| "learning_rate": 0.0001, |
| "loss": 0.6105, |
| "step": 195 |
| }, |
| { |
| "epoch": 1.037037037037037, |
| "grad_norm": 0.0576171875, |
| "learning_rate": 9.945652173913043e-05, |
| "loss": 0.5778, |
| "step": 196 |
| }, |
| { |
| "epoch": 1.0423280423280423, |
| "grad_norm": 0.06396484375, |
| "learning_rate": 9.891304347826087e-05, |
| "loss": 0.5871, |
| "step": 197 |
| }, |
| { |
| "epoch": 1.0476190476190477, |
| "grad_norm": 0.055908203125, |
| "learning_rate": 9.836956521739132e-05, |
| "loss": 0.576, |
| "step": 198 |
| }, |
| { |
| "epoch": 1.052910052910053, |
| "grad_norm": 0.052978515625, |
| "learning_rate": 9.782608695652174e-05, |
| "loss": 0.5928, |
| "step": 199 |
| }, |
| { |
| "epoch": 1.0582010582010581, |
| "grad_norm": 0.0546875, |
| "learning_rate": 9.728260869565217e-05, |
| "loss": 0.5645, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.0582010582010581, |
| "eval_loss": 0.5890559554100037, |
| "eval_model_preparation_time": 0.0048, |
| "eval_runtime": 17.3104, |
| "eval_samples_per_second": 155.283, |
| "eval_steps_per_second": 9.705, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.0634920634920635, |
| "grad_norm": 0.05712890625, |
| "learning_rate": 9.673913043478261e-05, |
| "loss": 0.5626, |
| "step": 201 |
| }, |
| { |
| "epoch": 1.0687830687830688, |
| "grad_norm": 0.0576171875, |
| "learning_rate": 9.619565217391306e-05, |
| "loss": 0.6014, |
| "step": 202 |
| }, |
| { |
| "epoch": 1.074074074074074, |
| "grad_norm": 0.060302734375, |
| "learning_rate": 9.565217391304348e-05, |
| "loss": 0.5686, |
| "step": 203 |
| }, |
| { |
| "epoch": 1.0793650793650793, |
| "grad_norm": 0.057373046875, |
| "learning_rate": 9.510869565217391e-05, |
| "loss": 0.5634, |
| "step": 204 |
| }, |
| { |
| "epoch": 1.0846560846560847, |
| "grad_norm": 0.055908203125, |
| "learning_rate": 9.456521739130435e-05, |
| "loss": 0.5702, |
| "step": 205 |
| }, |
| { |
| "epoch": 1.08994708994709, |
| "grad_norm": 0.0546875, |
| "learning_rate": 9.402173913043478e-05, |
| "loss": 0.5914, |
| "step": 206 |
| }, |
| { |
| "epoch": 1.0952380952380953, |
| "grad_norm": 0.0576171875, |
| "learning_rate": 9.347826086956522e-05, |
| "loss": 0.5543, |
| "step": 207 |
| }, |
| { |
| "epoch": 1.1005291005291005, |
| "grad_norm": 0.057373046875, |
| "learning_rate": 9.293478260869566e-05, |
| "loss": 0.5934, |
| "step": 208 |
| }, |
| { |
| "epoch": 1.1058201058201058, |
| "grad_norm": 0.0556640625, |
| "learning_rate": 9.239130434782609e-05, |
| "loss": 0.5753, |
| "step": 209 |
| }, |
| { |
| "epoch": 1.1111111111111112, |
| "grad_norm": 0.056640625, |
| "learning_rate": 9.184782608695652e-05, |
| "loss": 0.5678, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.1164021164021163, |
| "grad_norm": 0.059326171875, |
| "learning_rate": 9.130434782608696e-05, |
| "loss": 0.5743, |
| "step": 211 |
| }, |
| { |
| "epoch": 1.1216931216931216, |
| "grad_norm": 0.0546875, |
| "learning_rate": 9.07608695652174e-05, |
| "loss": 0.5861, |
| "step": 212 |
| }, |
| { |
| "epoch": 1.126984126984127, |
| "grad_norm": 0.06103515625, |
| "learning_rate": 9.021739130434783e-05, |
| "loss": 0.5655, |
| "step": 213 |
| }, |
| { |
| "epoch": 1.1322751322751323, |
| "grad_norm": 0.052734375, |
| "learning_rate": 8.967391304347826e-05, |
| "loss": 0.5566, |
| "step": 214 |
| }, |
| { |
| "epoch": 1.1375661375661377, |
| "grad_norm": 0.05859375, |
| "learning_rate": 8.91304347826087e-05, |
| "loss": 0.5626, |
| "step": 215 |
| }, |
| { |
| "epoch": 1.1428571428571428, |
| "grad_norm": 0.055908203125, |
| "learning_rate": 8.858695652173914e-05, |
| "loss": 0.5848, |
| "step": 216 |
| }, |
| { |
| "epoch": 1.1481481481481481, |
| "grad_norm": 0.0576171875, |
| "learning_rate": 8.804347826086957e-05, |
| "loss": 0.557, |
| "step": 217 |
| }, |
| { |
| "epoch": 1.1534391534391535, |
| "grad_norm": 0.059326171875, |
| "learning_rate": 8.75e-05, |
| "loss": 0.5729, |
| "step": 218 |
| }, |
| { |
| "epoch": 1.1587301587301586, |
| "grad_norm": 0.0546875, |
| "learning_rate": 8.695652173913044e-05, |
| "loss": 0.5715, |
| "step": 219 |
| }, |
| { |
| "epoch": 1.164021164021164, |
| "grad_norm": 0.05517578125, |
| "learning_rate": 8.641304347826087e-05, |
| "loss": 0.5783, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.1693121693121693, |
| "grad_norm": 0.059326171875, |
| "learning_rate": 8.586956521739131e-05, |
| "loss": 0.5637, |
| "step": 221 |
| }, |
| { |
| "epoch": 1.1746031746031746, |
| "grad_norm": 0.0595703125, |
| "learning_rate": 8.532608695652174e-05, |
| "loss": 0.582, |
| "step": 222 |
| }, |
| { |
| "epoch": 1.17989417989418, |
| "grad_norm": 0.0576171875, |
| "learning_rate": 8.478260869565218e-05, |
| "loss": 0.5796, |
| "step": 223 |
| }, |
| { |
| "epoch": 1.1851851851851851, |
| "grad_norm": 0.056396484375, |
| "learning_rate": 8.423913043478261e-05, |
| "loss": 0.5491, |
| "step": 224 |
| }, |
| { |
| "epoch": 1.1904761904761905, |
| "grad_norm": 0.061279296875, |
| "learning_rate": 8.369565217391305e-05, |
| "loss": 0.5423, |
| "step": 225 |
| }, |
| { |
| "epoch": 1.1957671957671958, |
| "grad_norm": 0.05810546875, |
| "learning_rate": 8.315217391304349e-05, |
| "loss": 0.5723, |
| "step": 226 |
| }, |
| { |
| "epoch": 1.201058201058201, |
| "grad_norm": 0.053466796875, |
| "learning_rate": 8.260869565217392e-05, |
| "loss": 0.5253, |
| "step": 227 |
| }, |
| { |
| "epoch": 1.2063492063492063, |
| "grad_norm": 0.056884765625, |
| "learning_rate": 8.206521739130435e-05, |
| "loss": 0.5645, |
| "step": 228 |
| }, |
| { |
| "epoch": 1.2116402116402116, |
| "grad_norm": 0.05712890625, |
| "learning_rate": 8.152173913043478e-05, |
| "loss": 0.5621, |
| "step": 229 |
| }, |
| { |
| "epoch": 1.216931216931217, |
| "grad_norm": 0.06103515625, |
| "learning_rate": 8.097826086956523e-05, |
| "loss": 0.5717, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.2222222222222223, |
| "grad_norm": 0.060302734375, |
| "learning_rate": 8.043478260869566e-05, |
| "loss": 0.5881, |
| "step": 231 |
| }, |
| { |
| "epoch": 1.2275132275132274, |
| "grad_norm": 0.058349609375, |
| "learning_rate": 7.989130434782609e-05, |
| "loss": 0.5674, |
| "step": 232 |
| }, |
| { |
| "epoch": 1.2328042328042328, |
| "grad_norm": 0.057861328125, |
| "learning_rate": 7.934782608695653e-05, |
| "loss": 0.5791, |
| "step": 233 |
| }, |
| { |
| "epoch": 1.2380952380952381, |
| "grad_norm": 0.0576171875, |
| "learning_rate": 7.880434782608696e-05, |
| "loss": 0.5491, |
| "step": 234 |
| }, |
| { |
| "epoch": 1.2433862433862433, |
| "grad_norm": 0.0556640625, |
| "learning_rate": 7.82608695652174e-05, |
| "loss": 0.5627, |
| "step": 235 |
| }, |
| { |
| "epoch": 1.2486772486772486, |
| "grad_norm": 0.0546875, |
| "learning_rate": 7.771739130434783e-05, |
| "loss": 0.5783, |
| "step": 236 |
| }, |
| { |
| "epoch": 1.253968253968254, |
| "grad_norm": 0.05712890625, |
| "learning_rate": 7.717391304347827e-05, |
| "loss": 0.5673, |
| "step": 237 |
| }, |
| { |
| "epoch": 1.2592592592592593, |
| "grad_norm": 0.052001953125, |
| "learning_rate": 7.66304347826087e-05, |
| "loss": 0.5774, |
| "step": 238 |
| }, |
| { |
| "epoch": 1.2645502645502646, |
| "grad_norm": 0.060302734375, |
| "learning_rate": 7.608695652173914e-05, |
| "loss": 0.5643, |
| "step": 239 |
| }, |
| { |
| "epoch": 1.2698412698412698, |
| "grad_norm": 0.052978515625, |
| "learning_rate": 7.554347826086957e-05, |
| "loss": 0.5508, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.2751322751322751, |
| "grad_norm": 0.0537109375, |
| "learning_rate": 7.500000000000001e-05, |
| "loss": 0.5482, |
| "step": 241 |
| }, |
| { |
| "epoch": 1.2804232804232805, |
| "grad_norm": 0.059326171875, |
| "learning_rate": 7.445652173913044e-05, |
| "loss": 0.5838, |
| "step": 242 |
| }, |
| { |
| "epoch": 1.2857142857142856, |
| "grad_norm": 0.061767578125, |
| "learning_rate": 7.391304347826086e-05, |
| "loss": 0.5381, |
| "step": 243 |
| }, |
| { |
| "epoch": 1.291005291005291, |
| "grad_norm": 0.05712890625, |
| "learning_rate": 7.336956521739132e-05, |
| "loss": 0.5473, |
| "step": 244 |
| }, |
| { |
| "epoch": 1.2962962962962963, |
| "grad_norm": 0.057373046875, |
| "learning_rate": 7.282608695652175e-05, |
| "loss": 0.5684, |
| "step": 245 |
| }, |
| { |
| "epoch": 1.3015873015873016, |
| "grad_norm": 0.059326171875, |
| "learning_rate": 7.228260869565217e-05, |
| "loss": 0.5655, |
| "step": 246 |
| }, |
| { |
| "epoch": 1.306878306878307, |
| "grad_norm": 0.053955078125, |
| "learning_rate": 7.17391304347826e-05, |
| "loss": 0.5358, |
| "step": 247 |
| }, |
| { |
| "epoch": 1.312169312169312, |
| "grad_norm": 0.0615234375, |
| "learning_rate": 7.119565217391306e-05, |
| "loss": 0.6069, |
| "step": 248 |
| }, |
| { |
| "epoch": 1.3174603174603174, |
| "grad_norm": 0.057373046875, |
| "learning_rate": 7.065217391304349e-05, |
| "loss": 0.551, |
| "step": 249 |
| }, |
| { |
| "epoch": 1.3227513227513228, |
| "grad_norm": 0.06103515625, |
| "learning_rate": 7.010869565217391e-05, |
| "loss": 0.5978, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.328042328042328, |
| "grad_norm": 0.05224609375, |
| "learning_rate": 6.956521739130436e-05, |
| "loss": 0.5152, |
| "step": 251 |
| }, |
| { |
| "epoch": 1.3333333333333333, |
| "grad_norm": 0.054443359375, |
| "learning_rate": 6.902173913043478e-05, |
| "loss": 0.5699, |
| "step": 252 |
| }, |
| { |
| "epoch": 1.3386243386243386, |
| "grad_norm": 0.056396484375, |
| "learning_rate": 6.847826086956522e-05, |
| "loss": 0.5397, |
| "step": 253 |
| }, |
| { |
| "epoch": 1.343915343915344, |
| "grad_norm": 0.059814453125, |
| "learning_rate": 6.793478260869565e-05, |
| "loss": 0.5679, |
| "step": 254 |
| }, |
| { |
| "epoch": 1.3492063492063493, |
| "grad_norm": 0.0556640625, |
| "learning_rate": 6.73913043478261e-05, |
| "loss": 0.5535, |
| "step": 255 |
| }, |
| { |
| "epoch": 1.3544973544973544, |
| "grad_norm": 0.054443359375, |
| "learning_rate": 6.684782608695652e-05, |
| "loss": 0.5745, |
| "step": 256 |
| }, |
| { |
| "epoch": 1.3597883597883598, |
| "grad_norm": 0.05859375, |
| "learning_rate": 6.630434782608695e-05, |
| "loss": 0.5692, |
| "step": 257 |
| }, |
| { |
| "epoch": 1.3650793650793651, |
| "grad_norm": 0.056396484375, |
| "learning_rate": 6.576086956521739e-05, |
| "loss": 0.5669, |
| "step": 258 |
| }, |
| { |
| "epoch": 1.3703703703703702, |
| "grad_norm": 0.057861328125, |
| "learning_rate": 6.521739130434783e-05, |
| "loss": 0.5725, |
| "step": 259 |
| }, |
| { |
| "epoch": 1.3756613756613756, |
| "grad_norm": 0.058837890625, |
| "learning_rate": 6.467391304347826e-05, |
| "loss": 0.5528, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.380952380952381, |
| "grad_norm": 0.060791015625, |
| "learning_rate": 6.413043478260869e-05, |
| "loss": 0.5691, |
| "step": 261 |
| }, |
| { |
| "epoch": 1.3862433862433863, |
| "grad_norm": 0.05810546875, |
| "learning_rate": 6.358695652173913e-05, |
| "loss": 0.5526, |
| "step": 262 |
| }, |
| { |
| "epoch": 1.3915343915343916, |
| "grad_norm": 0.058349609375, |
| "learning_rate": 6.304347826086957e-05, |
| "loss": 0.5615, |
| "step": 263 |
| }, |
| { |
| "epoch": 1.3968253968253967, |
| "grad_norm": 0.05322265625, |
| "learning_rate": 6.25e-05, |
| "loss": 0.5372, |
| "step": 264 |
| }, |
| { |
| "epoch": 1.402116402116402, |
| "grad_norm": 0.058837890625, |
| "learning_rate": 6.195652173913043e-05, |
| "loss": 0.5693, |
| "step": 265 |
| }, |
| { |
| "epoch": 1.4074074074074074, |
| "grad_norm": 0.057373046875, |
| "learning_rate": 6.141304347826087e-05, |
| "loss": 0.5398, |
| "step": 266 |
| }, |
| { |
| "epoch": 1.4126984126984126, |
| "grad_norm": 0.058349609375, |
| "learning_rate": 6.086956521739131e-05, |
| "loss": 0.5624, |
| "step": 267 |
| }, |
| { |
| "epoch": 1.417989417989418, |
| "grad_norm": 0.0625, |
| "learning_rate": 6.032608695652174e-05, |
| "loss": 0.5522, |
| "step": 268 |
| }, |
| { |
| "epoch": 1.4232804232804233, |
| "grad_norm": 0.05712890625, |
| "learning_rate": 5.9782608695652175e-05, |
| "loss": 0.549, |
| "step": 269 |
| }, |
| { |
| "epoch": 1.4285714285714286, |
| "grad_norm": 0.05615234375, |
| "learning_rate": 5.923913043478261e-05, |
| "loss": 0.5395, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.433862433862434, |
| "grad_norm": 0.05712890625, |
| "learning_rate": 5.869565217391305e-05, |
| "loss": 0.5482, |
| "step": 271 |
| }, |
| { |
| "epoch": 1.439153439153439, |
| "grad_norm": 0.059326171875, |
| "learning_rate": 5.815217391304349e-05, |
| "loss": 0.5726, |
| "step": 272 |
| }, |
| { |
| "epoch": 1.4444444444444444, |
| "grad_norm": 0.05908203125, |
| "learning_rate": 5.7608695652173915e-05, |
| "loss": 0.5587, |
| "step": 273 |
| }, |
| { |
| "epoch": 1.4497354497354498, |
| "grad_norm": 0.05859375, |
| "learning_rate": 5.706521739130435e-05, |
| "loss": 0.5447, |
| "step": 274 |
| }, |
| { |
| "epoch": 1.455026455026455, |
| "grad_norm": 0.05859375, |
| "learning_rate": 5.652173913043478e-05, |
| "loss": 0.5294, |
| "step": 275 |
| }, |
| { |
| "epoch": 1.4603174603174602, |
| "grad_norm": 0.056396484375, |
| "learning_rate": 5.5978260869565226e-05, |
| "loss": 0.5549, |
| "step": 276 |
| }, |
| { |
| "epoch": 1.4656084656084656, |
| "grad_norm": 0.0595703125, |
| "learning_rate": 5.5434782608695654e-05, |
| "loss": 0.572, |
| "step": 277 |
| }, |
| { |
| "epoch": 1.470899470899471, |
| "grad_norm": 0.055419921875, |
| "learning_rate": 5.489130434782609e-05, |
| "loss": 0.5487, |
| "step": 278 |
| }, |
| { |
| "epoch": 1.4761904761904763, |
| "grad_norm": 0.0556640625, |
| "learning_rate": 5.4347826086956524e-05, |
| "loss": 0.5559, |
| "step": 279 |
| }, |
| { |
| "epoch": 1.4814814814814814, |
| "grad_norm": 0.055419921875, |
| "learning_rate": 5.380434782608695e-05, |
| "loss": 0.5495, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.4867724867724867, |
| "grad_norm": 0.055908203125, |
| "learning_rate": 5.32608695652174e-05, |
| "loss": 0.541, |
| "step": 281 |
| }, |
| { |
| "epoch": 1.492063492063492, |
| "grad_norm": 0.05712890625, |
| "learning_rate": 5.271739130434783e-05, |
| "loss": 0.5492, |
| "step": 282 |
| }, |
| { |
| "epoch": 1.4973544973544972, |
| "grad_norm": 0.054931640625, |
| "learning_rate": 5.217391304347826e-05, |
| "loss": 0.5423, |
| "step": 283 |
| }, |
| { |
| "epoch": 1.5026455026455028, |
| "grad_norm": 0.05859375, |
| "learning_rate": 5.163043478260869e-05, |
| "loss": 0.568, |
| "step": 284 |
| }, |
| { |
| "epoch": 1.507936507936508, |
| "grad_norm": 0.054931640625, |
| "learning_rate": 5.108695652173914e-05, |
| "loss": 0.5554, |
| "step": 285 |
| }, |
| { |
| "epoch": 1.5132275132275133, |
| "grad_norm": 0.060546875, |
| "learning_rate": 5.054347826086957e-05, |
| "loss": 0.5691, |
| "step": 286 |
| }, |
| { |
| "epoch": 1.5185185185185186, |
| "grad_norm": 0.055908203125, |
| "learning_rate": 5e-05, |
| "loss": 0.5371, |
| "step": 287 |
| }, |
| { |
| "epoch": 1.5238095238095237, |
| "grad_norm": 0.060302734375, |
| "learning_rate": 4.945652173913044e-05, |
| "loss": 0.5452, |
| "step": 288 |
| }, |
| { |
| "epoch": 1.529100529100529, |
| "grad_norm": 0.054931640625, |
| "learning_rate": 4.891304347826087e-05, |
| "loss": 0.5289, |
| "step": 289 |
| }, |
| { |
| "epoch": 1.5343915343915344, |
| "grad_norm": 0.057861328125, |
| "learning_rate": 4.836956521739131e-05, |
| "loss": 0.5486, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.5396825396825395, |
| "grad_norm": 0.057861328125, |
| "learning_rate": 4.782608695652174e-05, |
| "loss": 0.5582, |
| "step": 291 |
| }, |
| { |
| "epoch": 1.544973544973545, |
| "grad_norm": 0.0556640625, |
| "learning_rate": 4.7282608695652177e-05, |
| "loss": 0.5809, |
| "step": 292 |
| }, |
| { |
| "epoch": 1.5502645502645502, |
| "grad_norm": 0.057373046875, |
| "learning_rate": 4.673913043478261e-05, |
| "loss": 0.5286, |
| "step": 293 |
| }, |
| { |
| "epoch": 1.5555555555555556, |
| "grad_norm": 0.05810546875, |
| "learning_rate": 4.6195652173913046e-05, |
| "loss": 0.5648, |
| "step": 294 |
| }, |
| { |
| "epoch": 1.560846560846561, |
| "grad_norm": 0.052734375, |
| "learning_rate": 4.565217391304348e-05, |
| "loss": 0.5714, |
| "step": 295 |
| }, |
| { |
| "epoch": 1.566137566137566, |
| "grad_norm": 0.0556640625, |
| "learning_rate": 4.5108695652173916e-05, |
| "loss": 0.5569, |
| "step": 296 |
| }, |
| { |
| "epoch": 1.5714285714285714, |
| "grad_norm": 0.0546875, |
| "learning_rate": 4.456521739130435e-05, |
| "loss": 0.5481, |
| "step": 297 |
| }, |
| { |
| "epoch": 1.5767195767195767, |
| "grad_norm": 0.057373046875, |
| "learning_rate": 4.4021739130434786e-05, |
| "loss": 0.5631, |
| "step": 298 |
| }, |
| { |
| "epoch": 1.5820105820105819, |
| "grad_norm": 0.055419921875, |
| "learning_rate": 4.347826086956522e-05, |
| "loss": 0.5802, |
| "step": 299 |
| }, |
| { |
| "epoch": 1.5873015873015874, |
| "grad_norm": 0.0556640625, |
| "learning_rate": 4.2934782608695655e-05, |
| "loss": 0.5475, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.5873015873015874, |
| "eval_loss": 0.5692652463912964, |
| "eval_model_preparation_time": 0.0048, |
| "eval_runtime": 17.3273, |
| "eval_samples_per_second": 155.131, |
| "eval_steps_per_second": 9.696, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.5925925925925926, |
| "grad_norm": 0.057373046875, |
| "learning_rate": 4.239130434782609e-05, |
| "loss": 0.5809, |
| "step": 301 |
| }, |
| { |
| "epoch": 1.597883597883598, |
| "grad_norm": 0.0576171875, |
| "learning_rate": 4.1847826086956525e-05, |
| "loss": 0.5451, |
| "step": 302 |
| }, |
| { |
| "epoch": 1.6031746031746033, |
| "grad_norm": 0.0546875, |
| "learning_rate": 4.130434782608696e-05, |
| "loss": 0.5668, |
| "step": 303 |
| }, |
| { |
| "epoch": 1.6084656084656084, |
| "grad_norm": 0.056396484375, |
| "learning_rate": 4.076086956521739e-05, |
| "loss": 0.5267, |
| "step": 304 |
| }, |
| { |
| "epoch": 1.6137566137566137, |
| "grad_norm": 0.055419921875, |
| "learning_rate": 4.021739130434783e-05, |
| "loss": 0.5538, |
| "step": 305 |
| }, |
| { |
| "epoch": 1.619047619047619, |
| "grad_norm": 0.05810546875, |
| "learning_rate": 3.9673913043478264e-05, |
| "loss": 0.5603, |
| "step": 306 |
| }, |
| { |
| "epoch": 1.6243386243386242, |
| "grad_norm": 0.056884765625, |
| "learning_rate": 3.91304347826087e-05, |
| "loss": 0.5694, |
| "step": 307 |
| }, |
| { |
| "epoch": 1.6296296296296298, |
| "grad_norm": 0.055908203125, |
| "learning_rate": 3.8586956521739134e-05, |
| "loss": 0.571, |
| "step": 308 |
| }, |
| { |
| "epoch": 1.6349206349206349, |
| "grad_norm": 0.056884765625, |
| "learning_rate": 3.804347826086957e-05, |
| "loss": 0.5489, |
| "step": 309 |
| }, |
| { |
| "epoch": 1.6402116402116402, |
| "grad_norm": 0.052978515625, |
| "learning_rate": 3.7500000000000003e-05, |
| "loss": 0.5258, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.6455026455026456, |
| "grad_norm": 0.06005859375, |
| "learning_rate": 3.695652173913043e-05, |
| "loss": 0.5607, |
| "step": 311 |
| }, |
| { |
| "epoch": 1.6507936507936507, |
| "grad_norm": 0.058349609375, |
| "learning_rate": 3.641304347826087e-05, |
| "loss": 0.5509, |
| "step": 312 |
| }, |
| { |
| "epoch": 1.656084656084656, |
| "grad_norm": 0.0556640625, |
| "learning_rate": 3.58695652173913e-05, |
| "loss": 0.5425, |
| "step": 313 |
| }, |
| { |
| "epoch": 1.6613756613756614, |
| "grad_norm": 0.057861328125, |
| "learning_rate": 3.532608695652174e-05, |
| "loss": 0.5489, |
| "step": 314 |
| }, |
| { |
| "epoch": 1.6666666666666665, |
| "grad_norm": 0.058349609375, |
| "learning_rate": 3.478260869565218e-05, |
| "loss": 0.5491, |
| "step": 315 |
| }, |
| { |
| "epoch": 1.671957671957672, |
| "grad_norm": 0.0595703125, |
| "learning_rate": 3.423913043478261e-05, |
| "loss": 0.556, |
| "step": 316 |
| }, |
| { |
| "epoch": 1.6772486772486772, |
| "grad_norm": 0.054443359375, |
| "learning_rate": 3.369565217391305e-05, |
| "loss": 0.5609, |
| "step": 317 |
| }, |
| { |
| "epoch": 1.6825396825396826, |
| "grad_norm": 0.0556640625, |
| "learning_rate": 3.3152173913043475e-05, |
| "loss": 0.5359, |
| "step": 318 |
| }, |
| { |
| "epoch": 1.687830687830688, |
| "grad_norm": 0.0576171875, |
| "learning_rate": 3.260869565217392e-05, |
| "loss": 0.5578, |
| "step": 319 |
| }, |
| { |
| "epoch": 1.693121693121693, |
| "grad_norm": 0.0546875, |
| "learning_rate": 3.2065217391304345e-05, |
| "loss": 0.559, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.6984126984126984, |
| "grad_norm": 0.05322265625, |
| "learning_rate": 3.152173913043479e-05, |
| "loss": 0.5559, |
| "step": 321 |
| }, |
| { |
| "epoch": 1.7037037037037037, |
| "grad_norm": 0.05712890625, |
| "learning_rate": 3.0978260869565215e-05, |
| "loss": 0.5645, |
| "step": 322 |
| }, |
| { |
| "epoch": 1.7089947089947088, |
| "grad_norm": 0.058349609375, |
| "learning_rate": 3.0434782608695656e-05, |
| "loss": 0.5668, |
| "step": 323 |
| }, |
| { |
| "epoch": 1.7142857142857144, |
| "grad_norm": 0.06005859375, |
| "learning_rate": 2.9891304347826088e-05, |
| "loss": 0.5673, |
| "step": 324 |
| }, |
| { |
| "epoch": 1.7195767195767195, |
| "grad_norm": 0.053955078125, |
| "learning_rate": 2.9347826086956526e-05, |
| "loss": 0.5764, |
| "step": 325 |
| }, |
| { |
| "epoch": 1.7248677248677249, |
| "grad_norm": 0.058349609375, |
| "learning_rate": 2.8804347826086957e-05, |
| "loss": 0.6039, |
| "step": 326 |
| }, |
| { |
| "epoch": 1.7301587301587302, |
| "grad_norm": 0.057861328125, |
| "learning_rate": 2.826086956521739e-05, |
| "loss": 0.5714, |
| "step": 327 |
| }, |
| { |
| "epoch": 1.7354497354497354, |
| "grad_norm": 0.05419921875, |
| "learning_rate": 2.7717391304347827e-05, |
| "loss": 0.545, |
| "step": 328 |
| }, |
| { |
| "epoch": 1.7407407407407407, |
| "grad_norm": 0.0546875, |
| "learning_rate": 2.7173913043478262e-05, |
| "loss": 0.5523, |
| "step": 329 |
| }, |
| { |
| "epoch": 1.746031746031746, |
| "grad_norm": 0.05419921875, |
| "learning_rate": 2.66304347826087e-05, |
| "loss": 0.5558, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.7513227513227512, |
| "grad_norm": 0.0576171875, |
| "learning_rate": 2.608695652173913e-05, |
| "loss": 0.5465, |
| "step": 331 |
| }, |
| { |
| "epoch": 1.7566137566137567, |
| "grad_norm": 0.052978515625, |
| "learning_rate": 2.554347826086957e-05, |
| "loss": 0.5211, |
| "step": 332 |
| }, |
| { |
| "epoch": 1.7619047619047619, |
| "grad_norm": 0.06005859375, |
| "learning_rate": 2.5e-05, |
| "loss": 0.5461, |
| "step": 333 |
| }, |
| { |
| "epoch": 1.7671957671957672, |
| "grad_norm": 0.053466796875, |
| "learning_rate": 2.4456521739130436e-05, |
| "loss": 0.5763, |
| "step": 334 |
| }, |
| { |
| "epoch": 1.7724867724867726, |
| "grad_norm": 0.0595703125, |
| "learning_rate": 2.391304347826087e-05, |
| "loss": 0.5661, |
| "step": 335 |
| }, |
| { |
| "epoch": 1.7777777777777777, |
| "grad_norm": 0.05419921875, |
| "learning_rate": 2.3369565217391306e-05, |
| "loss": 0.5408, |
| "step": 336 |
| }, |
| { |
| "epoch": 1.783068783068783, |
| "grad_norm": 0.05517578125, |
| "learning_rate": 2.282608695652174e-05, |
| "loss": 0.546, |
| "step": 337 |
| }, |
| { |
| "epoch": 1.7883597883597884, |
| "grad_norm": 0.057861328125, |
| "learning_rate": 2.2282608695652175e-05, |
| "loss": 0.5479, |
| "step": 338 |
| }, |
| { |
| "epoch": 1.7936507936507935, |
| "grad_norm": 0.052490234375, |
| "learning_rate": 2.173913043478261e-05, |
| "loss": 0.5505, |
| "step": 339 |
| }, |
| { |
| "epoch": 1.798941798941799, |
| "grad_norm": 0.053955078125, |
| "learning_rate": 2.1195652173913045e-05, |
| "loss": 0.54, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.8042328042328042, |
| "grad_norm": 0.056640625, |
| "learning_rate": 2.065217391304348e-05, |
| "loss": 0.5601, |
| "step": 341 |
| }, |
| { |
| "epoch": 1.8095238095238095, |
| "grad_norm": 0.055419921875, |
| "learning_rate": 2.0108695652173915e-05, |
| "loss": 0.5396, |
| "step": 342 |
| }, |
| { |
| "epoch": 1.8148148148148149, |
| "grad_norm": 0.0556640625, |
| "learning_rate": 1.956521739130435e-05, |
| "loss": 0.5429, |
| "step": 343 |
| }, |
| { |
| "epoch": 1.82010582010582, |
| "grad_norm": 0.052490234375, |
| "learning_rate": 1.9021739130434784e-05, |
| "loss": 0.5481, |
| "step": 344 |
| }, |
| { |
| "epoch": 1.8253968253968254, |
| "grad_norm": 0.05615234375, |
| "learning_rate": 1.8478260869565216e-05, |
| "loss": 0.5218, |
| "step": 345 |
| }, |
| { |
| "epoch": 1.8306878306878307, |
| "grad_norm": 0.05859375, |
| "learning_rate": 1.793478260869565e-05, |
| "loss": 0.544, |
| "step": 346 |
| }, |
| { |
| "epoch": 1.8359788359788358, |
| "grad_norm": 0.053466796875, |
| "learning_rate": 1.739130434782609e-05, |
| "loss": 0.5343, |
| "step": 347 |
| }, |
| { |
| "epoch": 1.8412698412698414, |
| "grad_norm": 0.054443359375, |
| "learning_rate": 1.6847826086956524e-05, |
| "loss": 0.5489, |
| "step": 348 |
| }, |
| { |
| "epoch": 1.8465608465608465, |
| "grad_norm": 0.05224609375, |
| "learning_rate": 1.630434782608696e-05, |
| "loss": 0.558, |
| "step": 349 |
| }, |
| { |
| "epoch": 1.8518518518518519, |
| "grad_norm": 0.05517578125, |
| "learning_rate": 1.5760869565217393e-05, |
| "loss": 0.5682, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.8571428571428572, |
| "grad_norm": 0.052490234375, |
| "learning_rate": 1.5217391304347828e-05, |
| "loss": 0.5429, |
| "step": 351 |
| }, |
| { |
| "epoch": 1.8624338624338623, |
| "grad_norm": 0.05810546875, |
| "learning_rate": 1.4673913043478263e-05, |
| "loss": 0.5682, |
| "step": 352 |
| }, |
| { |
| "epoch": 1.8677248677248677, |
| "grad_norm": 0.056396484375, |
| "learning_rate": 1.4130434782608694e-05, |
| "loss": 0.5568, |
| "step": 353 |
| }, |
| { |
| "epoch": 1.873015873015873, |
| "grad_norm": 0.0546875, |
| "learning_rate": 1.3586956521739131e-05, |
| "loss": 0.5407, |
| "step": 354 |
| }, |
| { |
| "epoch": 1.8783068783068781, |
| "grad_norm": 0.052978515625, |
| "learning_rate": 1.3043478260869566e-05, |
| "loss": 0.5263, |
| "step": 355 |
| }, |
| { |
| "epoch": 1.8835978835978837, |
| "grad_norm": 0.052001953125, |
| "learning_rate": 1.25e-05, |
| "loss": 0.5173, |
| "step": 356 |
| }, |
| { |
| "epoch": 1.8888888888888888, |
| "grad_norm": 0.0537109375, |
| "learning_rate": 1.1956521739130435e-05, |
| "loss": 0.5434, |
| "step": 357 |
| }, |
| { |
| "epoch": 1.8941798941798942, |
| "grad_norm": 0.058837890625, |
| "learning_rate": 1.141304347826087e-05, |
| "loss": 0.5599, |
| "step": 358 |
| }, |
| { |
| "epoch": 1.8994708994708995, |
| "grad_norm": 0.05224609375, |
| "learning_rate": 1.0869565217391305e-05, |
| "loss": 0.5277, |
| "step": 359 |
| }, |
| { |
| "epoch": 1.9047619047619047, |
| "grad_norm": 0.0546875, |
| "learning_rate": 1.032608695652174e-05, |
| "loss": 0.5381, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.91005291005291, |
| "grad_norm": 0.054931640625, |
| "learning_rate": 9.782608695652175e-06, |
| "loss": 0.5278, |
| "step": 361 |
| }, |
| { |
| "epoch": 1.9153439153439153, |
| "grad_norm": 0.057373046875, |
| "learning_rate": 9.239130434782608e-06, |
| "loss": 0.5732, |
| "step": 362 |
| }, |
| { |
| "epoch": 1.9206349206349205, |
| "grad_norm": 0.0546875, |
| "learning_rate": 8.695652173913044e-06, |
| "loss": 0.542, |
| "step": 363 |
| }, |
| { |
| "epoch": 1.925925925925926, |
| "grad_norm": 0.057373046875, |
| "learning_rate": 8.15217391304348e-06, |
| "loss": 0.5716, |
| "step": 364 |
| }, |
| { |
| "epoch": 1.9312169312169312, |
| "grad_norm": 0.054931640625, |
| "learning_rate": 7.608695652173914e-06, |
| "loss": 0.5486, |
| "step": 365 |
| }, |
| { |
| "epoch": 1.9365079365079365, |
| "grad_norm": 0.055908203125, |
| "learning_rate": 7.065217391304347e-06, |
| "loss": 0.5487, |
| "step": 366 |
| }, |
| { |
| "epoch": 1.9417989417989419, |
| "grad_norm": 0.05224609375, |
| "learning_rate": 6.521739130434783e-06, |
| "loss": 0.5321, |
| "step": 367 |
| }, |
| { |
| "epoch": 1.947089947089947, |
| "grad_norm": 0.056640625, |
| "learning_rate": 5.978260869565218e-06, |
| "loss": 0.5404, |
| "step": 368 |
| }, |
| { |
| "epoch": 1.9523809523809523, |
| "grad_norm": 0.051513671875, |
| "learning_rate": 5.4347826086956525e-06, |
| "loss": 0.5565, |
| "step": 369 |
| }, |
| { |
| "epoch": 1.9576719576719577, |
| "grad_norm": 0.0517578125, |
| "learning_rate": 4.891304347826087e-06, |
| "loss": 0.5524, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.9629629629629628, |
| "grad_norm": 0.05712890625, |
| "learning_rate": 4.347826086956522e-06, |
| "loss": 0.562, |
| "step": 371 |
| }, |
| { |
| "epoch": 1.9682539682539684, |
| "grad_norm": 0.05517578125, |
| "learning_rate": 3.804347826086957e-06, |
| "loss": 0.5583, |
| "step": 372 |
| }, |
| { |
| "epoch": 1.9735449735449735, |
| "grad_norm": 0.0576171875, |
| "learning_rate": 3.2608695652173914e-06, |
| "loss": 0.6053, |
| "step": 373 |
| }, |
| { |
| "epoch": 1.9788359788359788, |
| "grad_norm": 0.053466796875, |
| "learning_rate": 2.7173913043478263e-06, |
| "loss": 0.5485, |
| "step": 374 |
| }, |
| { |
| "epoch": 1.9841269841269842, |
| "grad_norm": 0.053466796875, |
| "learning_rate": 2.173913043478261e-06, |
| "loss": 0.5181, |
| "step": 375 |
| }, |
| { |
| "epoch": 1.9894179894179893, |
| "grad_norm": 0.054931640625, |
| "learning_rate": 1.6304347826086957e-06, |
| "loss": 0.5413, |
| "step": 376 |
| }, |
| { |
| "epoch": 1.9947089947089947, |
| "grad_norm": 0.052978515625, |
| "learning_rate": 1.0869565217391306e-06, |
| "loss": 0.5374, |
| "step": 377 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.051513671875, |
| "learning_rate": 5.434782608695653e-07, |
| "loss": 0.5506, |
| "step": 378 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 378, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.276650144564511e+17, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|