| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 230, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.004347826086956522, |
| "grad_norm": 209.3180389404297, |
| "learning_rate": 0.0, |
| "loss": 5.8188, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.008695652173913044, |
| "grad_norm": 215.69874572753906, |
| "learning_rate": 4.347826086956522e-06, |
| "loss": 5.9259, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.013043478260869565, |
| "grad_norm": 62.712825775146484, |
| "learning_rate": 8.695652173913044e-06, |
| "loss": 5.4202, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.017391304347826087, |
| "grad_norm": 85.59194946289062, |
| "learning_rate": 1.3043478260869566e-05, |
| "loss": 5.3079, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.021739130434782608, |
| "grad_norm": 22.901897430419922, |
| "learning_rate": 1.739130434782609e-05, |
| "loss": 5.0196, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.02608695652173913, |
| "grad_norm": 22.081829071044922, |
| "learning_rate": 2.173913043478261e-05, |
| "loss": 4.8222, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.030434782608695653, |
| "grad_norm": 11.022245407104492, |
| "learning_rate": 2.608695652173913e-05, |
| "loss": 4.4617, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.034782608695652174, |
| "grad_norm": 7.274469375610352, |
| "learning_rate": 3.0434782608695656e-05, |
| "loss": 4.335, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.0391304347826087, |
| "grad_norm": 3.8645834922790527, |
| "learning_rate": 3.478260869565218e-05, |
| "loss": 4.0476, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.043478260869565216, |
| "grad_norm": 2.6724016666412354, |
| "learning_rate": 3.91304347826087e-05, |
| "loss": 3.8387, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.04782608695652174, |
| "grad_norm": 2.258195161819458, |
| "learning_rate": 4.347826086956522e-05, |
| "loss": 3.8144, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.05217391304347826, |
| "grad_norm": 1.8822625875473022, |
| "learning_rate": 4.782608695652174e-05, |
| "loss": 3.4008, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.05652173913043478, |
| "grad_norm": 2.047840118408203, |
| "learning_rate": 5.217391304347826e-05, |
| "loss": 3.2554, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.06086956521739131, |
| "grad_norm": 1.8671568632125854, |
| "learning_rate": 5.652173913043478e-05, |
| "loss": 3.2461, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.06521739130434782, |
| "grad_norm": 1.6069483757019043, |
| "learning_rate": 6.086956521739131e-05, |
| "loss": 2.9738, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.06956521739130435, |
| "grad_norm": 1.3096915483474731, |
| "learning_rate": 6.521739130434783e-05, |
| "loss": 2.7823, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.07391304347826087, |
| "grad_norm": 1.3594956398010254, |
| "learning_rate": 6.956521739130436e-05, |
| "loss": 2.6255, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.0782608695652174, |
| "grad_norm": 1.0210895538330078, |
| "learning_rate": 7.391304347826086e-05, |
| "loss": 2.4501, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.08260869565217391, |
| "grad_norm": 0.8942164182662964, |
| "learning_rate": 7.82608695652174e-05, |
| "loss": 2.2934, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.08695652173913043, |
| "grad_norm": 0.8361735343933105, |
| "learning_rate": 8.260869565217392e-05, |
| "loss": 2.2029, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.09130434782608696, |
| "grad_norm": 0.794482409954071, |
| "learning_rate": 8.695652173913044e-05, |
| "loss": 2.0223, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.09565217391304348, |
| "grad_norm": 0.7513137459754944, |
| "learning_rate": 9.130434782608696e-05, |
| "loss": 1.8504, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 0.76312655210495, |
| "learning_rate": 9.565217391304348e-05, |
| "loss": 1.6577, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.10434782608695652, |
| "grad_norm": 0.8560758829116821, |
| "learning_rate": 0.0001, |
| "loss": 1.5565, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.10869565217391304, |
| "grad_norm": 0.7479954957962036, |
| "learning_rate": 0.00010434782608695653, |
| "loss": 1.4364, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.11304347826086956, |
| "grad_norm": 0.5951140522956848, |
| "learning_rate": 0.00010869565217391305, |
| "loss": 1.2957, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.11739130434782609, |
| "grad_norm": 0.503224790096283, |
| "learning_rate": 0.00011304347826086956, |
| "loss": 1.1799, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.12173913043478261, |
| "grad_norm": 0.47480374574661255, |
| "learning_rate": 0.0001173913043478261, |
| "loss": 1.1277, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.12608695652173912, |
| "grad_norm": 0.38552260398864746, |
| "learning_rate": 0.00012173913043478263, |
| "loss": 1.0744, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.13043478260869565, |
| "grad_norm": 0.35596558451652527, |
| "learning_rate": 0.00012608695652173915, |
| "loss": 1.0023, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.13478260869565217, |
| "grad_norm": 0.32971665263175964, |
| "learning_rate": 0.00013043478260869567, |
| "loss": 0.9691, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.1391304347826087, |
| "grad_norm": 0.37770169973373413, |
| "learning_rate": 0.0001347826086956522, |
| "loss": 0.9116, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.14347826086956522, |
| "grad_norm": 0.22640736401081085, |
| "learning_rate": 0.0001391304347826087, |
| "loss": 0.8613, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.14782608695652175, |
| "grad_norm": 0.20925410091876984, |
| "learning_rate": 0.0001434782608695652, |
| "loss": 0.8836, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.15217391304347827, |
| "grad_norm": 0.20542123913764954, |
| "learning_rate": 0.00014782608695652173, |
| "loss": 0.8502, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.1565217391304348, |
| "grad_norm": 0.16715222597122192, |
| "learning_rate": 0.00015217391304347827, |
| "loss": 0.8292, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.1608695652173913, |
| "grad_norm": 0.1648133248090744, |
| "learning_rate": 0.0001565217391304348, |
| "loss": 0.8189, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.16521739130434782, |
| "grad_norm": 0.13562779128551483, |
| "learning_rate": 0.00016086956521739132, |
| "loss": 0.8078, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.16956521739130434, |
| "grad_norm": 0.1290610432624817, |
| "learning_rate": 0.00016521739130434784, |
| "loss": 0.7712, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.17391304347826086, |
| "grad_norm": 0.11024343967437744, |
| "learning_rate": 0.00016956521739130436, |
| "loss": 0.7448, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.1782608695652174, |
| "grad_norm": 0.12418993562459946, |
| "learning_rate": 0.00017391304347826088, |
| "loss": 0.7633, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.1826086956521739, |
| "grad_norm": 0.10319849103689194, |
| "learning_rate": 0.0001782608695652174, |
| "loss": 0.7463, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.18695652173913044, |
| "grad_norm": 0.10371455550193787, |
| "learning_rate": 0.00018260869565217392, |
| "loss": 0.7516, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.19130434782608696, |
| "grad_norm": 0.09219090640544891, |
| "learning_rate": 0.00018695652173913045, |
| "loss": 0.7265, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.1956521739130435, |
| "grad_norm": 0.09577666968107224, |
| "learning_rate": 0.00019130434782608697, |
| "loss": 0.7382, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 0.08755916357040405, |
| "learning_rate": 0.0001956521739130435, |
| "loss": 0.7392, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.20434782608695654, |
| "grad_norm": 0.08335893601179123, |
| "learning_rate": 0.0002, |
| "loss": 0.7182, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.20869565217391303, |
| "grad_norm": 0.08622466027736664, |
| "learning_rate": 0.00019999712083215463, |
| "loss": 0.7196, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.21304347826086956, |
| "grad_norm": 0.07222707569599152, |
| "learning_rate": 0.00019998848349441062, |
| "loss": 0.7014, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.21739130434782608, |
| "grad_norm": 0.07286012172698975, |
| "learning_rate": 0.00019997408848413493, |
| "loss": 0.6986, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.2217391304347826, |
| "grad_norm": 0.07811558246612549, |
| "learning_rate": 0.00019995393663024054, |
| "loss": 0.6922, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.22608695652173913, |
| "grad_norm": 0.07095416635274887, |
| "learning_rate": 0.0001999280290931388, |
| "loss": 0.7188, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.23043478260869565, |
| "grad_norm": 0.0705651044845581, |
| "learning_rate": 0.00019989636736467278, |
| "loss": 0.7135, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.23478260869565218, |
| "grad_norm": 0.0649741142988205, |
| "learning_rate": 0.00019985895326803097, |
| "loss": 0.6833, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.2391304347826087, |
| "grad_norm": 0.07023416459560394, |
| "learning_rate": 0.00019981578895764273, |
| "loss": 0.6902, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.24347826086956523, |
| "grad_norm": 0.065043605864048, |
| "learning_rate": 0.00019976687691905393, |
| "loss": 0.6933, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.24782608695652175, |
| "grad_norm": 0.0647321566939354, |
| "learning_rate": 0.00019971221996878394, |
| "loss": 0.6946, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.25217391304347825, |
| "grad_norm": 0.08214448392391205, |
| "learning_rate": 0.0001996518212541634, |
| "loss": 0.6789, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.2565217391304348, |
| "grad_norm": 0.06106014922261238, |
| "learning_rate": 0.00019958568425315314, |
| "loss": 0.6826, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.2608695652173913, |
| "grad_norm": 0.06052952632308006, |
| "learning_rate": 0.0001995138127741436, |
| "loss": 0.6706, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.26521739130434785, |
| "grad_norm": 0.06265316903591156, |
| "learning_rate": 0.00019943621095573586, |
| "loss": 0.6809, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.26956521739130435, |
| "grad_norm": 0.0603368878364563, |
| "learning_rate": 0.00019935288326650312, |
| "loss": 0.6728, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.27391304347826084, |
| "grad_norm": 0.06611189991235733, |
| "learning_rate": 0.00019926383450473344, |
| "loss": 0.6499, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.2782608695652174, |
| "grad_norm": 0.06278355419635773, |
| "learning_rate": 0.00019916906979815347, |
| "loss": 0.6561, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.2826086956521739, |
| "grad_norm": 0.07379094511270523, |
| "learning_rate": 0.00019906859460363307, |
| "loss": 0.6786, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.28695652173913044, |
| "grad_norm": 0.09574166685342789, |
| "learning_rate": 0.0001989624147068713, |
| "loss": 0.6625, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.29130434782608694, |
| "grad_norm": 0.08743462711572647, |
| "learning_rate": 0.00019885053622206304, |
| "loss": 0.648, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.2956521739130435, |
| "grad_norm": 0.08914034813642502, |
| "learning_rate": 0.00019873296559154698, |
| "loss": 0.6561, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 0.06804706901311874, |
| "learning_rate": 0.0001986097095854347, |
| "loss": 0.658, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.30434782608695654, |
| "grad_norm": 0.09893489629030228, |
| "learning_rate": 0.00019848077530122083, |
| "loss": 0.6708, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.30869565217391304, |
| "grad_norm": 0.07928409427404404, |
| "learning_rate": 0.0001983461701633742, |
| "loss": 0.6407, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.3130434782608696, |
| "grad_norm": 0.07455449551343918, |
| "learning_rate": 0.0001982059019229106, |
| "loss": 0.676, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.3173913043478261, |
| "grad_norm": 0.0770968496799469, |
| "learning_rate": 0.00019805997865694614, |
| "loss": 0.6639, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.3217391304347826, |
| "grad_norm": 0.06771919876337051, |
| "learning_rate": 0.00019790840876823232, |
| "loss": 0.6486, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.32608695652173914, |
| "grad_norm": 0.07457810640335083, |
| "learning_rate": 0.0001977512009846721, |
| "loss": 0.6681, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.33043478260869563, |
| "grad_norm": 0.0826922208070755, |
| "learning_rate": 0.00019758836435881746, |
| "loss": 0.6356, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.3347826086956522, |
| "grad_norm": 0.07923886179924011, |
| "learning_rate": 0.00019741990826734794, |
| "loss": 0.6682, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.3391304347826087, |
| "grad_norm": 0.11045071482658386, |
| "learning_rate": 0.0001972458424105307, |
| "loss": 0.6203, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.34347826086956523, |
| "grad_norm": 0.11731227487325668, |
| "learning_rate": 0.00019706617681166218, |
| "loss": 0.66, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.34782608695652173, |
| "grad_norm": 0.12649305164813995, |
| "learning_rate": 0.00019688092181649065, |
| "loss": 0.6613, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.3521739130434783, |
| "grad_norm": 0.1144268661737442, |
| "learning_rate": 0.00019669008809262062, |
| "loss": 0.6606, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.3565217391304348, |
| "grad_norm": 0.11361440271139145, |
| "learning_rate": 0.00019649368662889855, |
| "loss": 0.629, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.36086956521739133, |
| "grad_norm": 0.12539249658584595, |
| "learning_rate": 0.00019629172873477995, |
| "loss": 0.6676, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.3652173913043478, |
| "grad_norm": 0.11141279339790344, |
| "learning_rate": 0.00019608422603967836, |
| "loss": 0.6376, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.3695652173913043, |
| "grad_norm": 0.09837634861469269, |
| "learning_rate": 0.00019587119049229557, |
| "loss": 0.6503, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.3739130434782609, |
| "grad_norm": 0.15677575767040253, |
| "learning_rate": 0.0001956526343599335, |
| "loss": 0.6638, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.3782608695652174, |
| "grad_norm": 0.252825528383255, |
| "learning_rate": 0.0001954285702277879, |
| "loss": 0.6713, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.3826086956521739, |
| "grad_norm": 0.3602813482284546, |
| "learning_rate": 0.00019519901099822372, |
| "loss": 0.6596, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.3869565217391304, |
| "grad_norm": 0.3970949053764343, |
| "learning_rate": 0.00019496396989003193, |
| "loss": 0.6617, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.391304347826087, |
| "grad_norm": 0.284343421459198, |
| "learning_rate": 0.00019472346043766865, |
| "loss": 0.6229, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.39565217391304347, |
| "grad_norm": 0.19832171499729156, |
| "learning_rate": 0.00019447749649047542, |
| "loss": 0.6665, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 0.24541743099689484, |
| "learning_rate": 0.00019422609221188207, |
| "loss": 0.6585, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.4043478260869565, |
| "grad_norm": 0.1915537267923355, |
| "learning_rate": 0.00019396926207859084, |
| "loss": 0.6343, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.40869565217391307, |
| "grad_norm": 0.20492875576019287, |
| "learning_rate": 0.00019370702087974302, |
| "loss": 0.6438, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.41304347826086957, |
| "grad_norm": 0.25835996866226196, |
| "learning_rate": 0.00019343938371606712, |
| "loss": 0.6502, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.41739130434782606, |
| "grad_norm": 0.2585464417934418, |
| "learning_rate": 0.00019316636599900946, |
| "loss": 0.6393, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.4217391304347826, |
| "grad_norm": 0.2317182868719101, |
| "learning_rate": 0.00019288798344984672, |
| "loss": 0.6275, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.4260869565217391, |
| "grad_norm": 0.23632416129112244, |
| "learning_rate": 0.00019260425209878052, |
| "loss": 0.6414, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.43043478260869567, |
| "grad_norm": 0.1801244169473648, |
| "learning_rate": 0.00019231518828401458, |
| "loss": 0.6491, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.43478260869565216, |
| "grad_norm": 0.24871514737606049, |
| "learning_rate": 0.00019202080865081368, |
| "loss": 0.6581, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.4391304347826087, |
| "grad_norm": 0.26276353001594543, |
| "learning_rate": 0.00019172113015054532, |
| "loss": 0.644, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.4434782608695652, |
| "grad_norm": 0.19743724167346954, |
| "learning_rate": 0.0001914161700397035, |
| "loss": 0.6519, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.44782608695652176, |
| "grad_norm": 0.31385916471481323, |
| "learning_rate": 0.00019110594587891519, |
| "loss": 0.6462, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.45217391304347826, |
| "grad_norm": 0.2689647674560547, |
| "learning_rate": 0.0001907904755319289, |
| "loss": 0.6517, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.45652173913043476, |
| "grad_norm": 0.17245543003082275, |
| "learning_rate": 0.00019046977716458626, |
| "loss": 0.6245, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.4608695652173913, |
| "grad_norm": 0.4380849003791809, |
| "learning_rate": 0.00019014386924377582, |
| "loss": 0.6519, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.4652173913043478, |
| "grad_norm": 0.305043488740921, |
| "learning_rate": 0.0001898127705363696, |
| "loss": 0.6606, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.46956521739130436, |
| "grad_norm": 0.20340269804000854, |
| "learning_rate": 0.0001894765001081428, |
| "loss": 0.6359, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.47391304347826085, |
| "grad_norm": 0.15703125298023224, |
| "learning_rate": 0.0001891350773226754, |
| "loss": 0.6461, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.4782608695652174, |
| "grad_norm": 0.16932646930217743, |
| "learning_rate": 0.0001887885218402375, |
| "loss": 0.6413, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.4826086956521739, |
| "grad_norm": 0.1790553480386734, |
| "learning_rate": 0.00018843685361665723, |
| "loss": 0.6378, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.48695652173913045, |
| "grad_norm": 0.24903282523155212, |
| "learning_rate": 0.00018808009290217136, |
| "loss": 0.6308, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.49130434782608695, |
| "grad_norm": 0.20529182255268097, |
| "learning_rate": 0.00018771826024025946, |
| "loss": 0.6315, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.4956521739130435, |
| "grad_norm": 0.18206629157066345, |
| "learning_rate": 0.00018735137646646078, |
| "loss": 0.6409, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.22906547784805298, |
| "learning_rate": 0.00018697946270717467, |
| "loss": 0.6522, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.5043478260869565, |
| "grad_norm": 0.23560722172260284, |
| "learning_rate": 0.00018660254037844388, |
| "loss": 0.6424, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.508695652173913, |
| "grad_norm": 0.3479248881340027, |
| "learning_rate": 0.00018622063118472134, |
| "loss": 0.6591, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.5130434782608696, |
| "grad_norm": 0.48405924439430237, |
| "learning_rate": 0.00018583375711762052, |
| "loss": 0.6312, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.5173913043478261, |
| "grad_norm": 0.6660999655723572, |
| "learning_rate": 0.00018544194045464886, |
| "loss": 0.6492, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.5217391304347826, |
| "grad_norm": 0.6070662140846252, |
| "learning_rate": 0.0001850452037579251, |
| "loss": 0.631, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.5260869565217391, |
| "grad_norm": 0.2432556301355362, |
| "learning_rate": 0.00018464356987288013, |
| "loss": 0.6192, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.5304347826086957, |
| "grad_norm": 0.4718700647354126, |
| "learning_rate": 0.00018423706192694116, |
| "loss": 0.6385, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.5347826086956522, |
| "grad_norm": 0.41220200061798096, |
| "learning_rate": 0.00018382570332820043, |
| "loss": 0.6362, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.5391304347826087, |
| "grad_norm": 0.24313992261886597, |
| "learning_rate": 0.00018340951776406694, |
| "loss": 0.659, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.5434782608695652, |
| "grad_norm": 0.42307668924331665, |
| "learning_rate": 0.00018298852919990252, |
| "loss": 0.6484, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.5478260869565217, |
| "grad_norm": 0.2858572006225586, |
| "learning_rate": 0.00018256276187764197, |
| "loss": 0.6437, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.5521739130434783, |
| "grad_norm": 0.2318851351737976, |
| "learning_rate": 0.0001821322403143969, |
| "loss": 0.6191, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.5565217391304348, |
| "grad_norm": 0.3861188292503357, |
| "learning_rate": 0.0001816969893010442, |
| "loss": 0.639, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.5608695652173913, |
| "grad_norm": 0.2969801127910614, |
| "learning_rate": 0.0001812570339007983, |
| "loss": 0.6624, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.5652173913043478, |
| "grad_norm": 0.29341548681259155, |
| "learning_rate": 0.00018081239944776805, |
| "loss": 0.639, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.5695652173913044, |
| "grad_norm": 0.43678849935531616, |
| "learning_rate": 0.00018036311154549784, |
| "loss": 0.6384, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.5739130434782609, |
| "grad_norm": 0.5248069167137146, |
| "learning_rate": 0.00017990919606549328, |
| "loss": 0.6451, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.5782608695652174, |
| "grad_norm": 0.5387030243873596, |
| "learning_rate": 0.00017945067914573146, |
| "loss": 0.6198, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.5826086956521739, |
| "grad_norm": 0.55666184425354, |
| "learning_rate": 0.00017898758718915586, |
| "loss": 0.6391, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.5869565217391305, |
| "grad_norm": 0.4839560389518738, |
| "learning_rate": 0.0001785199468621559, |
| "loss": 0.6411, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.591304347826087, |
| "grad_norm": 0.5173195004463196, |
| "learning_rate": 0.00017804778509303138, |
| "loss": 0.6318, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.5956521739130435, |
| "grad_norm": 0.341448038816452, |
| "learning_rate": 0.000177571129070442, |
| "loss": 0.6427, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 0.2654604911804199, |
| "learning_rate": 0.00017709000624184162, |
| "loss": 0.616, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.6043478260869565, |
| "grad_norm": 0.4000408351421356, |
| "learning_rate": 0.0001766044443118978, |
| "loss": 0.611, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.6086956521739131, |
| "grad_norm": 0.2812383770942688, |
| "learning_rate": 0.00017611447124089649, |
| "loss": 0.6508, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.6130434782608696, |
| "grad_norm": 0.30483949184417725, |
| "learning_rate": 0.00017562011524313185, |
| "loss": 0.6628, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.6173913043478261, |
| "grad_norm": 0.4457907974720001, |
| "learning_rate": 0.0001751214047852818, |
| "loss": 0.6274, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.6217391304347826, |
| "grad_norm": 0.38395488262176514, |
| "learning_rate": 0.00017461836858476856, |
| "loss": 0.6528, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.6260869565217392, |
| "grad_norm": 0.573344886302948, |
| "learning_rate": 0.00017411103560810526, |
| "loss": 0.6504, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.6304347826086957, |
| "grad_norm": 0.5133661031723022, |
| "learning_rate": 0.00017359943506922774, |
| "loss": 0.6334, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.6347826086956522, |
| "grad_norm": 0.2995568513870239, |
| "learning_rate": 0.00017308359642781242, |
| "loss": 0.6328, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.6391304347826087, |
| "grad_norm": 0.5677820444107056, |
| "learning_rate": 0.0001725635493875799, |
| "loss": 0.639, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.6434782608695652, |
| "grad_norm": 0.4751092791557312, |
| "learning_rate": 0.00017203932389458454, |
| "loss": 0.6229, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.6478260869565218, |
| "grad_norm": 0.4374710023403168, |
| "learning_rate": 0.00017151095013548994, |
| "loss": 0.6377, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.6521739130434783, |
| "grad_norm": 0.4172927439212799, |
| "learning_rate": 0.0001709784585358309, |
| "loss": 0.6277, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.6565217391304348, |
| "grad_norm": 0.3994798958301544, |
| "learning_rate": 0.00017044187975826124, |
| "loss": 0.637, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.6608695652173913, |
| "grad_norm": 0.34366917610168457, |
| "learning_rate": 0.00016990124470078822, |
| "loss": 0.6556, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.6652173913043479, |
| "grad_norm": 0.533347487449646, |
| "learning_rate": 0.0001693565844949933, |
| "loss": 0.6073, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.6695652173913044, |
| "grad_norm": 0.4292946457862854, |
| "learning_rate": 0.0001688079305042395, |
| "loss": 0.6548, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.6739130434782609, |
| "grad_norm": 0.2770076394081116, |
| "learning_rate": 0.00016825531432186543, |
| "loss": 0.6014, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.6782608695652174, |
| "grad_norm": 0.377838134765625, |
| "learning_rate": 0.0001676987677693659, |
| "loss": 0.6406, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.6826086956521739, |
| "grad_norm": 0.421268492937088, |
| "learning_rate": 0.0001671383228945597, |
| "loss": 0.6288, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.6869565217391305, |
| "grad_norm": 0.4219221770763397, |
| "learning_rate": 0.00016657401196974405, |
| "loss": 0.647, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.691304347826087, |
| "grad_norm": 0.3563760221004486, |
| "learning_rate": 0.00016600586748983641, |
| "loss": 0.6307, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.6956521739130435, |
| "grad_norm": 0.39387866854667664, |
| "learning_rate": 0.00016543392217050314, |
| "loss": 0.631, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 0.36268243193626404, |
| "learning_rate": 0.0001648582089462756, |
| "loss": 0.6429, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.7043478260869566, |
| "grad_norm": 0.3702019155025482, |
| "learning_rate": 0.00016427876096865394, |
| "loss": 0.6338, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.7086956521739131, |
| "grad_norm": 0.44408297538757324, |
| "learning_rate": 0.00016369561160419784, |
| "loss": 0.6416, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.7130434782608696, |
| "grad_norm": 0.5986080765724182, |
| "learning_rate": 0.00016310879443260528, |
| "loss": 0.6187, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.717391304347826, |
| "grad_norm": 0.7963016629219055, |
| "learning_rate": 0.0001625183432447789, |
| "loss": 0.6365, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.7217391304347827, |
| "grad_norm": 1.2156025171279907, |
| "learning_rate": 0.0001619242920408802, |
| "loss": 0.6625, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.7260869565217392, |
| "grad_norm": 0.7924716472625732, |
| "learning_rate": 0.00016132667502837165, |
| "loss": 0.6276, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.7304347826086957, |
| "grad_norm": 0.29551273584365845, |
| "learning_rate": 0.00016072552662004696, |
| "loss": 0.6159, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.7347826086956522, |
| "grad_norm": 0.7566269040107727, |
| "learning_rate": 0.00016012088143204953, |
| "loss": 0.6485, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.7391304347826086, |
| "grad_norm": 1.001354455947876, |
| "learning_rate": 0.00015951277428187898, |
| "loss": 0.6323, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.7434782608695653, |
| "grad_norm": 0.9103027582168579, |
| "learning_rate": 0.00015890124018638638, |
| "loss": 0.6255, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.7478260869565218, |
| "grad_norm": 0.3885137736797333, |
| "learning_rate": 0.00015828631435975784, |
| "loss": 0.6323, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.7521739130434782, |
| "grad_norm": 0.6141281723976135, |
| "learning_rate": 0.00015766803221148673, |
| "loss": 0.6504, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.7565217391304347, |
| "grad_norm": 0.8024821281433105, |
| "learning_rate": 0.0001570464293443346, |
| "loss": 0.641, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.7608695652173914, |
| "grad_norm": 0.43333736062049866, |
| "learning_rate": 0.00015642154155228122, |
| "loss": 0.627, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.7652173913043478, |
| "grad_norm": 0.649389922618866, |
| "learning_rate": 0.00015579340481846336, |
| "loss": 0.6483, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.7695652173913043, |
| "grad_norm": 1.0359424352645874, |
| "learning_rate": 0.00015516205531310273, |
| "loss": 0.6332, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.7739130434782608, |
| "grad_norm": 0.7209396362304688, |
| "learning_rate": 0.00015452752939142328, |
| "loss": 0.6524, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.7782608695652173, |
| "grad_norm": 0.6178513169288635, |
| "learning_rate": 0.00015388986359155758, |
| "loss": 0.645, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.782608695652174, |
| "grad_norm": 0.9886595606803894, |
| "learning_rate": 0.00015324909463244296, |
| "loss": 0.6642, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.7869565217391304, |
| "grad_norm": 0.7466373443603516, |
| "learning_rate": 0.00015260525941170712, |
| "loss": 0.6315, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.7913043478260869, |
| "grad_norm": 0.5552679896354675, |
| "learning_rate": 0.00015195839500354335, |
| "loss": 0.6207, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.7956521739130434, |
| "grad_norm": 0.5576688647270203, |
| "learning_rate": 0.0001513085386565758, |
| "loss": 0.6421, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 0.4000707268714905, |
| "learning_rate": 0.00015065572779171432, |
| "loss": 0.6398, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.8043478260869565, |
| "grad_norm": 0.4978863298892975, |
| "learning_rate": 0.00015000000000000001, |
| "loss": 0.6456, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.808695652173913, |
| "grad_norm": 0.4530424177646637, |
| "learning_rate": 0.00014934139304044033, |
| "loss": 0.6453, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.8130434782608695, |
| "grad_norm": 0.29163071513175964, |
| "learning_rate": 0.00014867994483783485, |
| "loss": 0.6558, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.8173913043478261, |
| "grad_norm": 0.33445900678634644, |
| "learning_rate": 0.00014801569348059157, |
| "loss": 0.6291, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.8217391304347826, |
| "grad_norm": 0.3891032934188843, |
| "learning_rate": 0.0001473486772185334, |
| "loss": 0.6458, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.8260869565217391, |
| "grad_norm": 0.4320944845676422, |
| "learning_rate": 0.00014667893446069588, |
| "loss": 0.6275, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.8304347826086956, |
| "grad_norm": 0.3652418553829193, |
| "learning_rate": 0.00014600650377311522, |
| "loss": 0.6434, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.8347826086956521, |
| "grad_norm": 0.2939096689224243, |
| "learning_rate": 0.00014533142387660773, |
| "loss": 0.6462, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.8391304347826087, |
| "grad_norm": 0.36094796657562256, |
| "learning_rate": 0.00014465373364454001, |
| "loss": 0.6259, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.8434782608695652, |
| "grad_norm": 0.503746747970581, |
| "learning_rate": 0.00014397347210059057, |
| "loss": 0.6565, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.8478260869565217, |
| "grad_norm": 0.501377522945404, |
| "learning_rate": 0.00014329067841650274, |
| "loss": 0.6358, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.8521739130434782, |
| "grad_norm": 0.40720251202583313, |
| "learning_rate": 0.00014260539190982886, |
| "loss": 0.636, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.8565217391304348, |
| "grad_norm": 0.3170947730541229, |
| "learning_rate": 0.00014191765204166643, |
| "loss": 0.6343, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.8608695652173913, |
| "grad_norm": 0.43554455041885376, |
| "learning_rate": 0.00014122749841438575, |
| "loss": 0.6319, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.8652173913043478, |
| "grad_norm": 0.5128415822982788, |
| "learning_rate": 0.00014053497076934948, |
| "loss": 0.6326, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.8695652173913043, |
| "grad_norm": 0.44992515444755554, |
| "learning_rate": 0.00013984010898462416, |
| "loss": 0.6343, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.8739130434782608, |
| "grad_norm": 0.506968080997467, |
| "learning_rate": 0.00013914295307268396, |
| "loss": 0.6472, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.8782608695652174, |
| "grad_norm": 0.6257392764091492, |
| "learning_rate": 0.0001384435431781065, |
| "loss": 0.6535, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.8826086956521739, |
| "grad_norm": 0.9480230808258057, |
| "learning_rate": 0.00013774191957526143, |
| "loss": 0.6628, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.8869565217391304, |
| "grad_norm": 1.2171893119812012, |
| "learning_rate": 0.00013703812266599113, |
| "loss": 0.6585, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.8913043478260869, |
| "grad_norm": 0.3134421110153198, |
| "learning_rate": 0.00013633219297728416, |
| "loss": 0.6629, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.8956521739130435, |
| "grad_norm": 1.003349781036377, |
| "learning_rate": 0.00013562417115894172, |
| "loss": 0.6516, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 1.246419906616211, |
| "learning_rate": 0.00013491409798123687, |
| "loss": 0.6418, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.9043478260869565, |
| "grad_norm": 0.46948862075805664, |
| "learning_rate": 0.00013420201433256689, |
| "loss": 0.6441, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.908695652173913, |
| "grad_norm": 1.628340244293213, |
| "learning_rate": 0.00013348796121709862, |
| "loss": 0.6661, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.9130434782608695, |
| "grad_norm": 0.4027623236179352, |
| "learning_rate": 0.0001327719797524075, |
| "loss": 0.6342, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.9173913043478261, |
| "grad_norm": 1.3196384906768799, |
| "learning_rate": 0.00013205411116710972, |
| "loss": 0.6724, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.9217391304347826, |
| "grad_norm": 0.561631977558136, |
| "learning_rate": 0.00013133439679848823, |
| "loss": 0.6541, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.9260869565217391, |
| "grad_norm": 0.7715569734573364, |
| "learning_rate": 0.00013061287809011242, |
| "loss": 0.6419, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.9304347826086956, |
| "grad_norm": 0.8591257333755493, |
| "learning_rate": 0.0001298895965894516, |
| "loss": 0.6197, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.9347826086956522, |
| "grad_norm": 0.4229847192764282, |
| "learning_rate": 0.0001291645939454825, |
| "loss": 0.6472, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.9391304347826087, |
| "grad_norm": 0.7943733930587769, |
| "learning_rate": 0.0001284379119062912, |
| "loss": 0.6576, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.9434782608695652, |
| "grad_norm": 0.7454273104667664, |
| "learning_rate": 0.0001277095923166689, |
| "loss": 0.6245, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.9478260869565217, |
| "grad_norm": 0.4976602792739868, |
| "learning_rate": 0.00012697967711570242, |
| "loss": 0.644, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.9521739130434783, |
| "grad_norm": 0.6845293641090393, |
| "learning_rate": 0.00012624820833435937, |
| "loss": 0.6412, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.9565217391304348, |
| "grad_norm": 0.7265484929084778, |
| "learning_rate": 0.0001255152280930676, |
| "loss": 0.6438, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.9608695652173913, |
| "grad_norm": 0.4346272647380829, |
| "learning_rate": 0.00012478077859929, |
| "loss": 0.6116, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.9652173913043478, |
| "grad_norm": 0.5768253803253174, |
| "learning_rate": 0.00012404490214509386, |
| "loss": 0.6242, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.9695652173913043, |
| "grad_norm": 0.688556969165802, |
| "learning_rate": 0.00012330764110471566, |
| "loss": 0.6546, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.9739130434782609, |
| "grad_norm": 0.6147114634513855, |
| "learning_rate": 0.00012256903793212107, |
| "loss": 0.6286, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.9782608695652174, |
| "grad_norm": 0.6598117351531982, |
| "learning_rate": 0.00012182913515856015, |
| "loss": 0.65, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.9826086956521739, |
| "grad_norm": 0.6232290863990784, |
| "learning_rate": 0.00012108797539011847, |
| "loss": 0.6465, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.9869565217391304, |
| "grad_norm": 0.3764599561691284, |
| "learning_rate": 0.0001203456013052634, |
| "loss": 0.6397, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.991304347826087, |
| "grad_norm": 0.4177006781101227, |
| "learning_rate": 0.00011960205565238684, |
| "loss": 0.6324, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.9956521739130435, |
| "grad_norm": 0.6632861495018005, |
| "learning_rate": 0.00011885738124734358, |
| "loss": 0.6394, |
| "step": 229 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.8406037092208862, |
| "learning_rate": 0.00011811162097098558, |
| "loss": 0.6563, |
| "step": 230 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 460, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 8.970864260913562e+18, |
| "train_batch_size": 24, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|