{ "best_global_step": 4952, "best_metric": 2.4348788261413574, "best_model_checkpoint": "./output_dir/Video_Games_stage1_full_rollthenthink/checkpoint-4952", "epoch": 2.0, "eval_steps": 500, "global_step": 4952, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00040401999898995, "grad_norm": 34.5, "learning_rate": 0.0, "loss": 2.5641, "step": 1 }, { "epoch": 0.0008080399979799, "grad_norm": 25.625, "learning_rate": 1.4999999999999999e-05, "loss": 1.8995, "step": 2 }, { "epoch": 0.00121205999696985, "grad_norm": 14.75, "learning_rate": 2.9999999999999997e-05, "loss": 1.6299, "step": 3 }, { "epoch": 0.0016160799959598, "grad_norm": 8.9375, "learning_rate": 4.4999999999999996e-05, "loss": 1.8235, "step": 4 }, { "epoch": 0.0020200999949497502, "grad_norm": 3.140625, "learning_rate": 5.9999999999999995e-05, "loss": 1.4428, "step": 5 }, { "epoch": 0.0024241199939397, "grad_norm": 3.359375, "learning_rate": 7.5e-05, "loss": 1.6732, "step": 6 }, { "epoch": 0.00282813999292965, "grad_norm": 2.578125, "learning_rate": 8.999999999999999e-05, "loss": 1.4055, "step": 7 }, { "epoch": 0.0032321599919196, "grad_norm": 1.765625, "learning_rate": 0.00010499999999999999, "loss": 1.3038, "step": 8 }, { "epoch": 0.00363617999090955, "grad_norm": 1.9765625, "learning_rate": 0.00011999999999999999, "loss": 1.4008, "step": 9 }, { "epoch": 0.0040401999898995004, "grad_norm": 1.9140625, "learning_rate": 0.000135, "loss": 1.3479, "step": 10 }, { "epoch": 0.00444421998888945, "grad_norm": 1.5078125, "learning_rate": 0.00015, "loss": 1.0933, "step": 11 }, { "epoch": 0.0048482399878794, "grad_norm": 1.59375, "learning_rate": 0.000165, "loss": 1.2659, "step": 12 }, { "epoch": 0.00525225998686935, "grad_norm": 1.4453125, "learning_rate": 0.00017999999999999998, "loss": 1.2401, "step": 13 }, { "epoch": 0.0056562799858593, "grad_norm": 1.5859375, "learning_rate": 0.000195, "loss": 1.2296, "step": 14 }, { "epoch": 0.00606029998484925, "grad_norm": 1.203125, "learning_rate": 0.00020999999999999998, "loss": 1.0735, "step": 15 }, { "epoch": 0.0064643199838392, "grad_norm": 1.3125, "learning_rate": 0.000225, "loss": 1.2826, "step": 16 }, { "epoch": 0.00686833998282915, "grad_norm": 1.2265625, "learning_rate": 0.00023999999999999998, "loss": 1.0531, "step": 17 }, { "epoch": 0.0072723599818191, "grad_norm": 1.296875, "learning_rate": 0.00025499999999999996, "loss": 1.2165, "step": 18 }, { "epoch": 0.00767637998080905, "grad_norm": 1.296875, "learning_rate": 0.00027, "loss": 1.1774, "step": 19 }, { "epoch": 0.008080399979799001, "grad_norm": 1.2734375, "learning_rate": 0.000285, "loss": 1.1318, "step": 20 }, { "epoch": 0.008484419978788951, "grad_norm": 1.0625, "learning_rate": 0.0003, "loss": 1.0383, "step": 21 }, { "epoch": 0.0088884399777789, "grad_norm": 1.4375, "learning_rate": 0.00029997572815533975, "loss": 1.0022, "step": 22 }, { "epoch": 0.00929245997676885, "grad_norm": 1.3515625, "learning_rate": 0.0002999514563106796, "loss": 1.2895, "step": 23 }, { "epoch": 0.0096964799757588, "grad_norm": 1.3671875, "learning_rate": 0.0002999271844660194, "loss": 1.0691, "step": 24 }, { "epoch": 0.01010049997474875, "grad_norm": 1.4453125, "learning_rate": 0.0002999029126213592, "loss": 1.2265, "step": 25 }, { "epoch": 0.0105045199737387, "grad_norm": 1.3046875, "learning_rate": 0.000299878640776699, "loss": 1.288, "step": 26 }, { "epoch": 0.01090853997272865, "grad_norm": 1.28125, "learning_rate": 0.0002998543689320388, "loss": 1.0984, "step": 27 }, { "epoch": 0.0113125599717186, "grad_norm": 1.296875, "learning_rate": 0.0002998300970873786, "loss": 1.3475, "step": 28 }, { "epoch": 0.01171657997070855, "grad_norm": 1.453125, "learning_rate": 0.00029980582524271845, "loss": 1.3382, "step": 29 }, { "epoch": 0.0121205999696985, "grad_norm": 1.2421875, "learning_rate": 0.0002997815533980582, "loss": 1.3884, "step": 30 }, { "epoch": 0.01252461996868845, "grad_norm": 1.3515625, "learning_rate": 0.00029975728155339805, "loss": 1.0333, "step": 31 }, { "epoch": 0.0129286399676784, "grad_norm": 1.15625, "learning_rate": 0.0002997330097087378, "loss": 1.1701, "step": 32 }, { "epoch": 0.01333265996666835, "grad_norm": 3.65625, "learning_rate": 0.00029970873786407766, "loss": 1.1252, "step": 33 }, { "epoch": 0.0137366799656583, "grad_norm": 1.296875, "learning_rate": 0.00029968446601941743, "loss": 1.1997, "step": 34 }, { "epoch": 0.01414069996464825, "grad_norm": 1.1328125, "learning_rate": 0.00029966019417475726, "loss": 1.183, "step": 35 }, { "epoch": 0.0145447199636382, "grad_norm": 1.1015625, "learning_rate": 0.00029963592233009704, "loss": 1.1745, "step": 36 }, { "epoch": 0.01494873996262815, "grad_norm": 1.1328125, "learning_rate": 0.00029961165048543687, "loss": 1.2271, "step": 37 }, { "epoch": 0.0153527599616181, "grad_norm": 1.1015625, "learning_rate": 0.00029958737864077664, "loss": 1.1588, "step": 38 }, { "epoch": 0.01575677996060805, "grad_norm": 1.21875, "learning_rate": 0.00029956310679611647, "loss": 1.2681, "step": 39 }, { "epoch": 0.016160799959598002, "grad_norm": 1.0390625, "learning_rate": 0.0002995388349514563, "loss": 1.0733, "step": 40 }, { "epoch": 0.01656481995858795, "grad_norm": 1.015625, "learning_rate": 0.0002995145631067961, "loss": 0.9785, "step": 41 }, { "epoch": 0.016968839957577902, "grad_norm": 0.9296875, "learning_rate": 0.0002994902912621359, "loss": 1.1494, "step": 42 }, { "epoch": 0.01737285995656785, "grad_norm": 1.0625, "learning_rate": 0.0002994660194174757, "loss": 1.1549, "step": 43 }, { "epoch": 0.0177768799555578, "grad_norm": 0.97265625, "learning_rate": 0.0002994417475728155, "loss": 1.2268, "step": 44 }, { "epoch": 0.01818089995454775, "grad_norm": 0.9609375, "learning_rate": 0.00029941747572815534, "loss": 1.1043, "step": 45 }, { "epoch": 0.0185849199535377, "grad_norm": 1.203125, "learning_rate": 0.0002993932038834951, "loss": 1.3574, "step": 46 }, { "epoch": 0.01898893995252765, "grad_norm": 1.0625, "learning_rate": 0.00029936893203883494, "loss": 1.2589, "step": 47 }, { "epoch": 0.0193929599515176, "grad_norm": 1.0, "learning_rate": 0.0002993446601941747, "loss": 1.0932, "step": 48 }, { "epoch": 0.01979697995050755, "grad_norm": 0.90625, "learning_rate": 0.00029932038834951455, "loss": 1.053, "step": 49 }, { "epoch": 0.0202009999494975, "grad_norm": 0.9765625, "learning_rate": 0.0002992961165048544, "loss": 1.0963, "step": 50 }, { "epoch": 0.02060501994848745, "grad_norm": 1.046875, "learning_rate": 0.00029927184466019415, "loss": 1.2248, "step": 51 }, { "epoch": 0.0210090399474774, "grad_norm": 1.078125, "learning_rate": 0.00029924757281553393, "loss": 1.1382, "step": 52 }, { "epoch": 0.02141305994646735, "grad_norm": 1.0390625, "learning_rate": 0.00029922330097087376, "loss": 1.2549, "step": 53 }, { "epoch": 0.0218170799454573, "grad_norm": 0.99609375, "learning_rate": 0.0002991990291262136, "loss": 1.2379, "step": 54 }, { "epoch": 0.02222109994444725, "grad_norm": 0.9921875, "learning_rate": 0.00029917475728155336, "loss": 1.2471, "step": 55 }, { "epoch": 0.0226251199434372, "grad_norm": 1.3984375, "learning_rate": 0.0002991504854368932, "loss": 1.3349, "step": 56 }, { "epoch": 0.02302913994242715, "grad_norm": 1.1796875, "learning_rate": 0.00029912621359223297, "loss": 1.2337, "step": 57 }, { "epoch": 0.0234331599414171, "grad_norm": 1.2421875, "learning_rate": 0.0002991019417475728, "loss": 1.297, "step": 58 }, { "epoch": 0.02383717994040705, "grad_norm": 1.0234375, "learning_rate": 0.00029907766990291257, "loss": 1.1379, "step": 59 }, { "epoch": 0.024241199939397, "grad_norm": 0.9140625, "learning_rate": 0.0002990533980582524, "loss": 1.0976, "step": 60 }, { "epoch": 0.02464521993838695, "grad_norm": 1.15625, "learning_rate": 0.00029902912621359223, "loss": 1.2591, "step": 61 }, { "epoch": 0.0250492399373769, "grad_norm": 0.94921875, "learning_rate": 0.000299004854368932, "loss": 1.1093, "step": 62 }, { "epoch": 0.02545325993636685, "grad_norm": 1.09375, "learning_rate": 0.0002989805825242718, "loss": 1.3025, "step": 63 }, { "epoch": 0.0258572799353568, "grad_norm": 0.921875, "learning_rate": 0.0002989563106796116, "loss": 0.9812, "step": 64 }, { "epoch": 0.02626129993434675, "grad_norm": 1.09375, "learning_rate": 0.00029893203883495144, "loss": 1.1996, "step": 65 }, { "epoch": 0.0266653199333367, "grad_norm": 1.09375, "learning_rate": 0.0002989077669902912, "loss": 1.2251, "step": 66 }, { "epoch": 0.02706933993232665, "grad_norm": 0.91015625, "learning_rate": 0.00029888349514563104, "loss": 1.002, "step": 67 }, { "epoch": 0.0274733599313166, "grad_norm": 0.96875, "learning_rate": 0.0002988592233009708, "loss": 1.1654, "step": 68 }, { "epoch": 0.02787737993030655, "grad_norm": 1.0546875, "learning_rate": 0.00029883495145631065, "loss": 1.308, "step": 69 }, { "epoch": 0.0282813999292965, "grad_norm": 1.21875, "learning_rate": 0.0002988106796116505, "loss": 1.3042, "step": 70 }, { "epoch": 0.02868541992828645, "grad_norm": 1.0390625, "learning_rate": 0.00029878640776699025, "loss": 1.1926, "step": 71 }, { "epoch": 0.0290894399272764, "grad_norm": 0.98828125, "learning_rate": 0.0002987621359223301, "loss": 1.1621, "step": 72 }, { "epoch": 0.02949345992626635, "grad_norm": 0.91796875, "learning_rate": 0.00029873786407766986, "loss": 1.1024, "step": 73 }, { "epoch": 0.0298974799252563, "grad_norm": 1.0078125, "learning_rate": 0.0002987135922330097, "loss": 1.2151, "step": 74 }, { "epoch": 0.03030149992424625, "grad_norm": 0.8984375, "learning_rate": 0.0002986893203883495, "loss": 1.0686, "step": 75 }, { "epoch": 0.0307055199232362, "grad_norm": 0.921875, "learning_rate": 0.0002986650485436893, "loss": 1.0877, "step": 76 }, { "epoch": 0.03110953992222615, "grad_norm": 1.0, "learning_rate": 0.0002986407766990291, "loss": 1.2617, "step": 77 }, { "epoch": 0.0315135599212161, "grad_norm": 0.9140625, "learning_rate": 0.0002986165048543689, "loss": 0.9851, "step": 78 }, { "epoch": 0.03191757992020605, "grad_norm": 0.98046875, "learning_rate": 0.0002985922330097087, "loss": 1.031, "step": 79 }, { "epoch": 0.032321599919196004, "grad_norm": 1.078125, "learning_rate": 0.00029856796116504856, "loss": 1.323, "step": 80 }, { "epoch": 0.03272561991818595, "grad_norm": 1.09375, "learning_rate": 0.00029854368932038833, "loss": 1.2017, "step": 81 }, { "epoch": 0.0331296399171759, "grad_norm": 1.046875, "learning_rate": 0.0002985194174757281, "loss": 1.1801, "step": 82 }, { "epoch": 0.03353365991616585, "grad_norm": 0.9765625, "learning_rate": 0.00029849514563106794, "loss": 1.0822, "step": 83 }, { "epoch": 0.033937679915155804, "grad_norm": 1.1875, "learning_rate": 0.00029847087378640777, "loss": 1.1898, "step": 84 }, { "epoch": 0.03434169991414575, "grad_norm": 0.98828125, "learning_rate": 0.00029844660194174754, "loss": 1.0311, "step": 85 }, { "epoch": 0.0347457199131357, "grad_norm": 1.1640625, "learning_rate": 0.00029842233009708737, "loss": 1.2454, "step": 86 }, { "epoch": 0.03514973991212565, "grad_norm": 0.92578125, "learning_rate": 0.00029839805825242715, "loss": 1.0596, "step": 87 }, { "epoch": 0.0355537599111156, "grad_norm": 0.97265625, "learning_rate": 0.000298373786407767, "loss": 1.0798, "step": 88 }, { "epoch": 0.03595777991010555, "grad_norm": 1.0625, "learning_rate": 0.00029834951456310675, "loss": 1.172, "step": 89 }, { "epoch": 0.0363617999090955, "grad_norm": 1.546875, "learning_rate": 0.0002983252427184466, "loss": 1.1422, "step": 90 }, { "epoch": 0.03676581990808545, "grad_norm": 1.0390625, "learning_rate": 0.0002983009708737864, "loss": 1.1116, "step": 91 }, { "epoch": 0.0371698399070754, "grad_norm": 0.875, "learning_rate": 0.0002982766990291262, "loss": 1.0879, "step": 92 }, { "epoch": 0.03757385990606535, "grad_norm": 0.875, "learning_rate": 0.00029825242718446596, "loss": 0.9441, "step": 93 }, { "epoch": 0.0379778799050553, "grad_norm": 0.8671875, "learning_rate": 0.0002982281553398058, "loss": 0.9607, "step": 94 }, { "epoch": 0.03838189990404525, "grad_norm": 0.8359375, "learning_rate": 0.0002982038834951456, "loss": 1.0844, "step": 95 }, { "epoch": 0.0387859199030352, "grad_norm": 1.0, "learning_rate": 0.0002981796116504854, "loss": 1.0862, "step": 96 }, { "epoch": 0.03918993990202515, "grad_norm": 0.87109375, "learning_rate": 0.0002981553398058252, "loss": 1.0541, "step": 97 }, { "epoch": 0.0395939599010151, "grad_norm": 0.92578125, "learning_rate": 0.000298131067961165, "loss": 1.1086, "step": 98 }, { "epoch": 0.03999797990000505, "grad_norm": 0.859375, "learning_rate": 0.00029810679611650483, "loss": 1.034, "step": 99 }, { "epoch": 0.040401999898995, "grad_norm": 0.8125, "learning_rate": 0.00029808252427184466, "loss": 0.9623, "step": 100 }, { "epoch": 0.04080601989798495, "grad_norm": 0.8203125, "learning_rate": 0.00029805825242718443, "loss": 1.1094, "step": 101 }, { "epoch": 0.0412100398969749, "grad_norm": 0.7109375, "learning_rate": 0.00029803398058252426, "loss": 0.9687, "step": 102 }, { "epoch": 0.04161405989596485, "grad_norm": 0.79296875, "learning_rate": 0.00029800970873786404, "loss": 0.9616, "step": 103 }, { "epoch": 0.0420180798949548, "grad_norm": 0.8125, "learning_rate": 0.00029798543689320387, "loss": 1.0682, "step": 104 }, { "epoch": 0.04242209989394475, "grad_norm": 0.6875, "learning_rate": 0.0002979611650485437, "loss": 0.8879, "step": 105 }, { "epoch": 0.0428261198929347, "grad_norm": 0.796875, "learning_rate": 0.00029793689320388347, "loss": 1.0175, "step": 106 }, { "epoch": 0.04323013989192465, "grad_norm": 0.91796875, "learning_rate": 0.0002979126213592233, "loss": 1.1171, "step": 107 }, { "epoch": 0.0436341598909146, "grad_norm": 0.765625, "learning_rate": 0.0002978883495145631, "loss": 0.9731, "step": 108 }, { "epoch": 0.04403817988990455, "grad_norm": 0.8125, "learning_rate": 0.0002978640776699029, "loss": 1.0341, "step": 109 }, { "epoch": 0.0444421998888945, "grad_norm": 0.90234375, "learning_rate": 0.0002978398058252427, "loss": 1.0358, "step": 110 }, { "epoch": 0.04484621988788445, "grad_norm": 0.8046875, "learning_rate": 0.0002978155339805825, "loss": 1.0316, "step": 111 }, { "epoch": 0.0452502398868744, "grad_norm": 1.015625, "learning_rate": 0.0002977912621359223, "loss": 1.0325, "step": 112 }, { "epoch": 0.04565425988586435, "grad_norm": 0.890625, "learning_rate": 0.0002977669902912621, "loss": 1.1484, "step": 113 }, { "epoch": 0.0460582798848543, "grad_norm": 0.95703125, "learning_rate": 0.0002977427184466019, "loss": 0.9972, "step": 114 }, { "epoch": 0.04646229988384425, "grad_norm": 0.75390625, "learning_rate": 0.0002977184466019417, "loss": 0.9471, "step": 115 }, { "epoch": 0.0468663198828342, "grad_norm": 0.89453125, "learning_rate": 0.00029769417475728155, "loss": 0.9588, "step": 116 }, { "epoch": 0.04727033988182415, "grad_norm": 0.80859375, "learning_rate": 0.0002976699029126213, "loss": 1.1364, "step": 117 }, { "epoch": 0.0476743598808141, "grad_norm": 0.9140625, "learning_rate": 0.00029764563106796115, "loss": 0.9852, "step": 118 }, { "epoch": 0.048078379879804053, "grad_norm": 0.94140625, "learning_rate": 0.00029762135922330093, "loss": 1.1316, "step": 119 }, { "epoch": 0.048482399878794, "grad_norm": 0.98046875, "learning_rate": 0.00029759708737864076, "loss": 1.1558, "step": 120 }, { "epoch": 0.04888641987778395, "grad_norm": 0.91015625, "learning_rate": 0.0002975728155339806, "loss": 1.0148, "step": 121 }, { "epoch": 0.0492904398767739, "grad_norm": 0.859375, "learning_rate": 0.00029754854368932036, "loss": 1.0293, "step": 122 }, { "epoch": 0.049694459875763854, "grad_norm": 0.86328125, "learning_rate": 0.00029752427184466014, "loss": 1.0008, "step": 123 }, { "epoch": 0.0500984798747538, "grad_norm": 0.8359375, "learning_rate": 0.00029749999999999997, "loss": 0.9754, "step": 124 }, { "epoch": 0.05050249987374375, "grad_norm": 0.91796875, "learning_rate": 0.0002974757281553398, "loss": 1.0863, "step": 125 }, { "epoch": 0.0509065198727337, "grad_norm": 0.95703125, "learning_rate": 0.00029745145631067957, "loss": 1.0305, "step": 126 }, { "epoch": 0.05131053987172365, "grad_norm": 0.90625, "learning_rate": 0.0002974271844660194, "loss": 0.9882, "step": 127 }, { "epoch": 0.0517145598707136, "grad_norm": 1.1015625, "learning_rate": 0.0002974029126213592, "loss": 1.1303, "step": 128 }, { "epoch": 0.05211857986970355, "grad_norm": 1.0078125, "learning_rate": 0.000297378640776699, "loss": 1.1039, "step": 129 }, { "epoch": 0.0525225998686935, "grad_norm": 0.83203125, "learning_rate": 0.00029735436893203884, "loss": 1.1055, "step": 130 }, { "epoch": 0.05292661986768345, "grad_norm": 0.94921875, "learning_rate": 0.0002973300970873786, "loss": 1.133, "step": 131 }, { "epoch": 0.0533306398666734, "grad_norm": 0.87109375, "learning_rate": 0.00029730582524271844, "loss": 1.0825, "step": 132 }, { "epoch": 0.05373465986566335, "grad_norm": 0.77734375, "learning_rate": 0.0002972815533980582, "loss": 0.9782, "step": 133 }, { "epoch": 0.0541386798646533, "grad_norm": 0.875, "learning_rate": 0.00029725728155339804, "loss": 0.9056, "step": 134 }, { "epoch": 0.05454269986364325, "grad_norm": 0.89453125, "learning_rate": 0.0002972330097087378, "loss": 1.0426, "step": 135 }, { "epoch": 0.0549467198626332, "grad_norm": 0.96484375, "learning_rate": 0.00029720873786407765, "loss": 1.1402, "step": 136 }, { "epoch": 0.05535073986162315, "grad_norm": 0.80859375, "learning_rate": 0.0002971844660194174, "loss": 0.9809, "step": 137 }, { "epoch": 0.0557547598606131, "grad_norm": 0.80859375, "learning_rate": 0.00029716019417475725, "loss": 0.9833, "step": 138 }, { "epoch": 0.05615877985960305, "grad_norm": 0.859375, "learning_rate": 0.00029713592233009703, "loss": 1.0878, "step": 139 }, { "epoch": 0.056562799858593, "grad_norm": 0.828125, "learning_rate": 0.00029711165048543686, "loss": 1.0767, "step": 140 }, { "epoch": 0.05696681985758295, "grad_norm": 0.76953125, "learning_rate": 0.0002970873786407767, "loss": 0.9885, "step": 141 }, { "epoch": 0.0573708398565729, "grad_norm": 0.79296875, "learning_rate": 0.00029706310679611646, "loss": 0.8937, "step": 142 }, { "epoch": 0.05777485985556285, "grad_norm": 0.9140625, "learning_rate": 0.0002970388349514563, "loss": 1.1756, "step": 143 }, { "epoch": 0.0581788798545528, "grad_norm": 0.6953125, "learning_rate": 0.00029701456310679607, "loss": 0.9337, "step": 144 }, { "epoch": 0.05858289985354275, "grad_norm": 0.890625, "learning_rate": 0.0002969902912621359, "loss": 1.033, "step": 145 }, { "epoch": 0.0589869198525327, "grad_norm": 0.91796875, "learning_rate": 0.0002969660194174757, "loss": 1.2059, "step": 146 }, { "epoch": 0.05939093985152265, "grad_norm": 0.91796875, "learning_rate": 0.0002969417475728155, "loss": 1.0209, "step": 147 }, { "epoch": 0.0597949598505126, "grad_norm": 0.84765625, "learning_rate": 0.00029691747572815533, "loss": 1.0435, "step": 148 }, { "epoch": 0.06019897984950255, "grad_norm": 0.875, "learning_rate": 0.0002968932038834951, "loss": 0.973, "step": 149 }, { "epoch": 0.0606029998484925, "grad_norm": 0.87109375, "learning_rate": 0.00029686893203883494, "loss": 0.9803, "step": 150 }, { "epoch": 0.06100701984748245, "grad_norm": 0.8046875, "learning_rate": 0.00029684466019417477, "loss": 0.9996, "step": 151 }, { "epoch": 0.0614110398464724, "grad_norm": 0.82421875, "learning_rate": 0.00029682038834951454, "loss": 1.0078, "step": 152 }, { "epoch": 0.06181505984546235, "grad_norm": 0.80859375, "learning_rate": 0.0002967961165048543, "loss": 1.0159, "step": 153 }, { "epoch": 0.0622190798444523, "grad_norm": 0.70703125, "learning_rate": 0.00029677184466019415, "loss": 0.8266, "step": 154 }, { "epoch": 0.06262309984344225, "grad_norm": 0.84375, "learning_rate": 0.000296747572815534, "loss": 0.9275, "step": 155 }, { "epoch": 0.0630271198424322, "grad_norm": 0.9609375, "learning_rate": 0.00029672330097087375, "loss": 1.0159, "step": 156 }, { "epoch": 0.06343113984142215, "grad_norm": 0.9296875, "learning_rate": 0.0002966990291262136, "loss": 1.1252, "step": 157 }, { "epoch": 0.0638351598404121, "grad_norm": 0.9765625, "learning_rate": 0.00029667475728155336, "loss": 1.1385, "step": 158 }, { "epoch": 0.06423917983940206, "grad_norm": 0.75390625, "learning_rate": 0.0002966504854368932, "loss": 1.032, "step": 159 }, { "epoch": 0.06464319983839201, "grad_norm": 0.90234375, "learning_rate": 0.00029662621359223296, "loss": 0.9425, "step": 160 }, { "epoch": 0.06504721983738194, "grad_norm": 0.83203125, "learning_rate": 0.0002966019417475728, "loss": 1.0501, "step": 161 }, { "epoch": 0.0654512398363719, "grad_norm": 0.77734375, "learning_rate": 0.0002965776699029126, "loss": 0.9881, "step": 162 }, { "epoch": 0.06585525983536185, "grad_norm": 0.796875, "learning_rate": 0.0002965533980582524, "loss": 0.931, "step": 163 }, { "epoch": 0.0662592798343518, "grad_norm": 0.8515625, "learning_rate": 0.00029652912621359217, "loss": 0.933, "step": 164 }, { "epoch": 0.06666329983334175, "grad_norm": 0.7109375, "learning_rate": 0.000296504854368932, "loss": 0.9364, "step": 165 }, { "epoch": 0.0670673198323317, "grad_norm": 0.94921875, "learning_rate": 0.00029648058252427183, "loss": 1.0804, "step": 166 }, { "epoch": 0.06747133983132166, "grad_norm": 0.78125, "learning_rate": 0.0002964563106796116, "loss": 0.9827, "step": 167 }, { "epoch": 0.06787535983031161, "grad_norm": 0.98828125, "learning_rate": 0.00029643203883495143, "loss": 1.1814, "step": 168 }, { "epoch": 0.06827937982930155, "grad_norm": 0.734375, "learning_rate": 0.0002964077669902912, "loss": 0.9523, "step": 169 }, { "epoch": 0.0686833998282915, "grad_norm": 0.76953125, "learning_rate": 0.00029638349514563104, "loss": 0.9115, "step": 170 }, { "epoch": 0.06908741982728145, "grad_norm": 0.73828125, "learning_rate": 0.00029635922330097087, "loss": 0.9798, "step": 171 }, { "epoch": 0.0694914398262714, "grad_norm": 0.89453125, "learning_rate": 0.00029633495145631064, "loss": 1.0933, "step": 172 }, { "epoch": 0.06989545982526135, "grad_norm": 0.83203125, "learning_rate": 0.00029631067961165047, "loss": 1.0964, "step": 173 }, { "epoch": 0.0702994798242513, "grad_norm": 0.97265625, "learning_rate": 0.00029628640776699025, "loss": 0.8969, "step": 174 }, { "epoch": 0.07070349982324126, "grad_norm": 0.96875, "learning_rate": 0.0002962621359223301, "loss": 1.0792, "step": 175 }, { "epoch": 0.0711075198222312, "grad_norm": 0.86328125, "learning_rate": 0.0002962378640776699, "loss": 1.1553, "step": 176 }, { "epoch": 0.07151153982122115, "grad_norm": 0.8984375, "learning_rate": 0.0002962135922330097, "loss": 0.851, "step": 177 }, { "epoch": 0.0719155598202111, "grad_norm": 0.75390625, "learning_rate": 0.0002961893203883495, "loss": 0.8923, "step": 178 }, { "epoch": 0.07231957981920105, "grad_norm": 0.83984375, "learning_rate": 0.0002961650485436893, "loss": 1.0365, "step": 179 }, { "epoch": 0.072723599818191, "grad_norm": 0.8203125, "learning_rate": 0.0002961407766990291, "loss": 0.9531, "step": 180 }, { "epoch": 0.07312761981718095, "grad_norm": 0.80078125, "learning_rate": 0.00029611650485436894, "loss": 1.011, "step": 181 }, { "epoch": 0.0735316398161709, "grad_norm": 0.8515625, "learning_rate": 0.0002960922330097087, "loss": 1.0127, "step": 182 }, { "epoch": 0.07393565981516086, "grad_norm": 0.8828125, "learning_rate": 0.0002960679611650485, "loss": 1.092, "step": 183 }, { "epoch": 0.0743396798141508, "grad_norm": 0.7734375, "learning_rate": 0.0002960436893203883, "loss": 1.0027, "step": 184 }, { "epoch": 0.07474369981314075, "grad_norm": 0.68359375, "learning_rate": 0.0002960194174757281, "loss": 0.8608, "step": 185 }, { "epoch": 0.0751477198121307, "grad_norm": 0.8984375, "learning_rate": 0.00029599514563106793, "loss": 1.1078, "step": 186 }, { "epoch": 0.07555173981112065, "grad_norm": 0.8828125, "learning_rate": 0.00029597087378640776, "loss": 1.1133, "step": 187 }, { "epoch": 0.0759557598101106, "grad_norm": 0.90625, "learning_rate": 0.00029594660194174753, "loss": 0.9431, "step": 188 }, { "epoch": 0.07635977980910055, "grad_norm": 0.8046875, "learning_rate": 0.00029592233009708736, "loss": 1.0513, "step": 189 }, { "epoch": 0.0767637998080905, "grad_norm": 0.88671875, "learning_rate": 0.00029589805825242714, "loss": 1.1064, "step": 190 }, { "epoch": 0.07716781980708046, "grad_norm": 0.6875, "learning_rate": 0.00029587378640776697, "loss": 0.889, "step": 191 }, { "epoch": 0.0775718398060704, "grad_norm": 0.6640625, "learning_rate": 0.0002958495145631068, "loss": 0.832, "step": 192 }, { "epoch": 0.07797585980506035, "grad_norm": 0.6796875, "learning_rate": 0.00029582524271844657, "loss": 0.8738, "step": 193 }, { "epoch": 0.0783798798040503, "grad_norm": 0.94140625, "learning_rate": 0.00029580097087378635, "loss": 1.1953, "step": 194 }, { "epoch": 0.07878389980304025, "grad_norm": 1.0546875, "learning_rate": 0.0002957766990291262, "loss": 1.2061, "step": 195 }, { "epoch": 0.0791879198020302, "grad_norm": 0.83984375, "learning_rate": 0.000295752427184466, "loss": 1.1278, "step": 196 }, { "epoch": 0.07959193980102015, "grad_norm": 0.87890625, "learning_rate": 0.0002957281553398058, "loss": 0.9646, "step": 197 }, { "epoch": 0.0799959598000101, "grad_norm": 0.87109375, "learning_rate": 0.0002957038834951456, "loss": 1.0379, "step": 198 }, { "epoch": 0.08039997979900006, "grad_norm": 0.8515625, "learning_rate": 0.0002956796116504854, "loss": 1.1067, "step": 199 }, { "epoch": 0.08080399979799, "grad_norm": 0.75, "learning_rate": 0.0002956553398058252, "loss": 0.9496, "step": 200 }, { "epoch": 0.08120801979697995, "grad_norm": 0.71875, "learning_rate": 0.00029563106796116505, "loss": 0.9241, "step": 201 }, { "epoch": 0.0816120397959699, "grad_norm": 0.6875, "learning_rate": 0.0002956067961165048, "loss": 0.9225, "step": 202 }, { "epoch": 0.08201605979495985, "grad_norm": 0.828125, "learning_rate": 0.00029558252427184465, "loss": 1.1124, "step": 203 }, { "epoch": 0.0824200797939498, "grad_norm": 0.80078125, "learning_rate": 0.0002955582524271844, "loss": 1.0216, "step": 204 }, { "epoch": 0.08282409979293975, "grad_norm": 0.8515625, "learning_rate": 0.00029553398058252425, "loss": 1.1123, "step": 205 }, { "epoch": 0.0832281197919297, "grad_norm": 0.7578125, "learning_rate": 0.0002955097087378641, "loss": 0.9313, "step": 206 }, { "epoch": 0.08363213979091966, "grad_norm": 0.90625, "learning_rate": 0.00029548543689320386, "loss": 0.9868, "step": 207 }, { "epoch": 0.0840361597899096, "grad_norm": 0.76171875, "learning_rate": 0.0002954611650485437, "loss": 0.8957, "step": 208 }, { "epoch": 0.08444017978889955, "grad_norm": 0.8046875, "learning_rate": 0.00029543689320388346, "loss": 1.0486, "step": 209 }, { "epoch": 0.0848441997878895, "grad_norm": 0.6953125, "learning_rate": 0.0002954126213592233, "loss": 0.9594, "step": 210 }, { "epoch": 0.08524821978687945, "grad_norm": 0.7734375, "learning_rate": 0.00029538834951456307, "loss": 1.0598, "step": 211 }, { "epoch": 0.0856522397858694, "grad_norm": 1.1640625, "learning_rate": 0.0002953640776699029, "loss": 0.9287, "step": 212 }, { "epoch": 0.08605625978485935, "grad_norm": 0.83984375, "learning_rate": 0.0002953398058252427, "loss": 0.9725, "step": 213 }, { "epoch": 0.0864602797838493, "grad_norm": 0.6640625, "learning_rate": 0.0002953155339805825, "loss": 0.9781, "step": 214 }, { "epoch": 0.08686429978283924, "grad_norm": 0.8359375, "learning_rate": 0.0002952912621359223, "loss": 0.8706, "step": 215 }, { "epoch": 0.0872683197818292, "grad_norm": 0.80859375, "learning_rate": 0.0002952669902912621, "loss": 0.9089, "step": 216 }, { "epoch": 0.08767233978081915, "grad_norm": 0.83203125, "learning_rate": 0.00029524271844660194, "loss": 1.0379, "step": 217 }, { "epoch": 0.0880763597798091, "grad_norm": 0.765625, "learning_rate": 0.0002952184466019417, "loss": 1.0308, "step": 218 }, { "epoch": 0.08848037977879905, "grad_norm": 0.7421875, "learning_rate": 0.00029519417475728154, "loss": 0.9835, "step": 219 }, { "epoch": 0.088884399777789, "grad_norm": 1.15625, "learning_rate": 0.0002951699029126213, "loss": 0.9932, "step": 220 }, { "epoch": 0.08928841977677895, "grad_norm": 0.80078125, "learning_rate": 0.00029514563106796115, "loss": 1.0426, "step": 221 }, { "epoch": 0.0896924397757689, "grad_norm": 0.74609375, "learning_rate": 0.000295121359223301, "loss": 0.9382, "step": 222 }, { "epoch": 0.09009645977475884, "grad_norm": 0.94140625, "learning_rate": 0.00029509708737864075, "loss": 1.0283, "step": 223 }, { "epoch": 0.0905004797737488, "grad_norm": 0.91796875, "learning_rate": 0.0002950728155339805, "loss": 1.0652, "step": 224 }, { "epoch": 0.09090449977273875, "grad_norm": 0.80078125, "learning_rate": 0.00029504854368932036, "loss": 1.0468, "step": 225 }, { "epoch": 0.0913085197717287, "grad_norm": 1.4609375, "learning_rate": 0.0002950242718446602, "loss": 1.0454, "step": 226 }, { "epoch": 0.09171253977071865, "grad_norm": 0.8359375, "learning_rate": 0.00029499999999999996, "loss": 1.0093, "step": 227 }, { "epoch": 0.0921165597697086, "grad_norm": 0.97265625, "learning_rate": 0.0002949757281553398, "loss": 0.9629, "step": 228 }, { "epoch": 0.09252057976869855, "grad_norm": 0.88671875, "learning_rate": 0.00029495145631067957, "loss": 0.9917, "step": 229 }, { "epoch": 0.0929245997676885, "grad_norm": 0.9609375, "learning_rate": 0.0002949271844660194, "loss": 1.1664, "step": 230 }, { "epoch": 0.09332861976667844, "grad_norm": 0.87890625, "learning_rate": 0.0002949029126213592, "loss": 1.0614, "step": 231 }, { "epoch": 0.0937326397656684, "grad_norm": 0.88671875, "learning_rate": 0.000294878640776699, "loss": 1.0827, "step": 232 }, { "epoch": 0.09413665976465835, "grad_norm": 0.8125, "learning_rate": 0.00029485436893203883, "loss": 0.8871, "step": 233 }, { "epoch": 0.0945406797636483, "grad_norm": 0.75390625, "learning_rate": 0.0002948300970873786, "loss": 1.0594, "step": 234 }, { "epoch": 0.09494469976263825, "grad_norm": 1.6796875, "learning_rate": 0.00029480582524271843, "loss": 1.0821, "step": 235 }, { "epoch": 0.0953487197616282, "grad_norm": 1.0, "learning_rate": 0.0002947815533980582, "loss": 0.9732, "step": 236 }, { "epoch": 0.09575273976061816, "grad_norm": 0.859375, "learning_rate": 0.00029475728155339804, "loss": 0.9982, "step": 237 }, { "epoch": 0.09615675975960811, "grad_norm": 0.734375, "learning_rate": 0.0002947330097087378, "loss": 0.8946, "step": 238 }, { "epoch": 0.09656077975859804, "grad_norm": 0.80078125, "learning_rate": 0.00029470873786407764, "loss": 1.0789, "step": 239 }, { "epoch": 0.096964799757588, "grad_norm": 0.7265625, "learning_rate": 0.0002946844660194174, "loss": 1.0303, "step": 240 }, { "epoch": 0.09736881975657795, "grad_norm": 0.71484375, "learning_rate": 0.00029466019417475725, "loss": 1.0468, "step": 241 }, { "epoch": 0.0977728397555679, "grad_norm": 0.734375, "learning_rate": 0.0002946359223300971, "loss": 0.9786, "step": 242 }, { "epoch": 0.09817685975455785, "grad_norm": 0.77734375, "learning_rate": 0.00029461165048543685, "loss": 0.9, "step": 243 }, { "epoch": 0.0985808797535478, "grad_norm": 0.7890625, "learning_rate": 0.0002945873786407767, "loss": 1.064, "step": 244 }, { "epoch": 0.09898489975253776, "grad_norm": 0.7109375, "learning_rate": 0.00029456310679611646, "loss": 1.0181, "step": 245 }, { "epoch": 0.09938891975152771, "grad_norm": 0.78515625, "learning_rate": 0.0002945388349514563, "loss": 1.0734, "step": 246 }, { "epoch": 0.09979293975051765, "grad_norm": 0.703125, "learning_rate": 0.0002945145631067961, "loss": 0.9849, "step": 247 }, { "epoch": 0.1001969597495076, "grad_norm": 0.72265625, "learning_rate": 0.0002944902912621359, "loss": 1.0185, "step": 248 }, { "epoch": 0.10060097974849755, "grad_norm": 0.8359375, "learning_rate": 0.0002944660194174757, "loss": 0.9724, "step": 249 }, { "epoch": 0.1010049997474875, "grad_norm": 0.93359375, "learning_rate": 0.0002944417475728155, "loss": 0.9866, "step": 250 }, { "epoch": 0.10140901974647745, "grad_norm": 0.80078125, "learning_rate": 0.0002944174757281553, "loss": 1.0369, "step": 251 }, { "epoch": 0.1018130397454674, "grad_norm": 0.6875, "learning_rate": 0.00029439320388349515, "loss": 0.936, "step": 252 }, { "epoch": 0.10221705974445736, "grad_norm": 0.84375, "learning_rate": 0.00029436893203883493, "loss": 1.1222, "step": 253 }, { "epoch": 0.1026210797434473, "grad_norm": 0.7734375, "learning_rate": 0.0002943446601941747, "loss": 0.9793, "step": 254 }, { "epoch": 0.10302509974243725, "grad_norm": 0.73046875, "learning_rate": 0.00029432038834951453, "loss": 0.8875, "step": 255 }, { "epoch": 0.1034291197414272, "grad_norm": 0.78125, "learning_rate": 0.00029429611650485436, "loss": 0.9942, "step": 256 }, { "epoch": 0.10383313974041715, "grad_norm": 0.7734375, "learning_rate": 0.00029427184466019414, "loss": 0.9042, "step": 257 }, { "epoch": 0.1042371597394071, "grad_norm": 0.73046875, "learning_rate": 0.00029424757281553397, "loss": 0.9496, "step": 258 }, { "epoch": 0.10464117973839705, "grad_norm": 0.80078125, "learning_rate": 0.00029422330097087374, "loss": 0.9903, "step": 259 }, { "epoch": 0.105045199737387, "grad_norm": 0.61328125, "learning_rate": 0.0002941990291262136, "loss": 0.9087, "step": 260 }, { "epoch": 0.10544921973637696, "grad_norm": 0.703125, "learning_rate": 0.00029417475728155335, "loss": 0.9785, "step": 261 }, { "epoch": 0.1058532397353669, "grad_norm": 0.80859375, "learning_rate": 0.0002941504854368932, "loss": 1.0158, "step": 262 }, { "epoch": 0.10625725973435685, "grad_norm": 0.8671875, "learning_rate": 0.000294126213592233, "loss": 1.0027, "step": 263 }, { "epoch": 0.1066612797333468, "grad_norm": 0.91796875, "learning_rate": 0.0002941019417475728, "loss": 1.1421, "step": 264 }, { "epoch": 0.10706529973233675, "grad_norm": 0.75390625, "learning_rate": 0.00029407766990291256, "loss": 0.8618, "step": 265 }, { "epoch": 0.1074693197313267, "grad_norm": 0.75, "learning_rate": 0.0002940533980582524, "loss": 0.9511, "step": 266 }, { "epoch": 0.10787333973031665, "grad_norm": 2.96875, "learning_rate": 0.0002940291262135922, "loss": 0.8486, "step": 267 }, { "epoch": 0.1082773597293066, "grad_norm": 0.8359375, "learning_rate": 0.000294004854368932, "loss": 0.9963, "step": 268 }, { "epoch": 0.10868137972829656, "grad_norm": 0.77734375, "learning_rate": 0.0002939805825242718, "loss": 0.9622, "step": 269 }, { "epoch": 0.1090853997272865, "grad_norm": 0.6875, "learning_rate": 0.0002939563106796116, "loss": 0.9933, "step": 270 }, { "epoch": 0.10948941972627645, "grad_norm": 0.70703125, "learning_rate": 0.0002939320388349514, "loss": 0.8924, "step": 271 }, { "epoch": 0.1098934397252664, "grad_norm": 0.7421875, "learning_rate": 0.00029390776699029126, "loss": 0.8703, "step": 272 }, { "epoch": 0.11029745972425635, "grad_norm": 0.6796875, "learning_rate": 0.00029388349514563103, "loss": 0.8987, "step": 273 }, { "epoch": 0.1107014797232463, "grad_norm": 0.72265625, "learning_rate": 0.00029385922330097086, "loss": 1.0079, "step": 274 }, { "epoch": 0.11110549972223625, "grad_norm": 0.8515625, "learning_rate": 0.00029383495145631064, "loss": 1.112, "step": 275 }, { "epoch": 0.1115095197212262, "grad_norm": 0.6953125, "learning_rate": 0.00029381067961165047, "loss": 0.9664, "step": 276 }, { "epoch": 0.11191353972021616, "grad_norm": 0.73828125, "learning_rate": 0.0002937864077669903, "loss": 0.8644, "step": 277 }, { "epoch": 0.1123175597192061, "grad_norm": 0.76953125, "learning_rate": 0.00029376213592233007, "loss": 0.9472, "step": 278 }, { "epoch": 0.11272157971819605, "grad_norm": 0.8125, "learning_rate": 0.0002937378640776699, "loss": 0.9776, "step": 279 }, { "epoch": 0.113125599717186, "grad_norm": 0.81640625, "learning_rate": 0.0002937135922330097, "loss": 1.0549, "step": 280 }, { "epoch": 0.11352961971617595, "grad_norm": 0.7890625, "learning_rate": 0.0002936893203883495, "loss": 0.9088, "step": 281 }, { "epoch": 0.1139336397151659, "grad_norm": 0.78515625, "learning_rate": 0.00029366504854368933, "loss": 1.0217, "step": 282 }, { "epoch": 0.11433765971415585, "grad_norm": 0.71875, "learning_rate": 0.0002936407766990291, "loss": 0.9649, "step": 283 }, { "epoch": 0.1147416797131458, "grad_norm": 3.28125, "learning_rate": 0.0002936165048543689, "loss": 1.1714, "step": 284 }, { "epoch": 0.11514569971213576, "grad_norm": 0.73046875, "learning_rate": 0.0002935922330097087, "loss": 1.1016, "step": 285 }, { "epoch": 0.1155497197111257, "grad_norm": 0.6796875, "learning_rate": 0.0002935679611650485, "loss": 0.9771, "step": 286 }, { "epoch": 0.11595373971011565, "grad_norm": 0.67578125, "learning_rate": 0.0002935436893203883, "loss": 0.9563, "step": 287 }, { "epoch": 0.1163577597091056, "grad_norm": 0.66796875, "learning_rate": 0.00029351941747572815, "loss": 0.9222, "step": 288 }, { "epoch": 0.11676177970809555, "grad_norm": 0.6875, "learning_rate": 0.0002934951456310679, "loss": 0.9364, "step": 289 }, { "epoch": 0.1171657997070855, "grad_norm": 0.72265625, "learning_rate": 0.00029347087378640775, "loss": 1.0015, "step": 290 }, { "epoch": 0.11756981970607545, "grad_norm": 0.703125, "learning_rate": 0.00029344660194174753, "loss": 0.9835, "step": 291 }, { "epoch": 0.1179738397050654, "grad_norm": 0.81640625, "learning_rate": 0.00029342233009708736, "loss": 1.0738, "step": 292 }, { "epoch": 0.11837785970405536, "grad_norm": 0.734375, "learning_rate": 0.0002933980582524272, "loss": 1.006, "step": 293 }, { "epoch": 0.1187818797030453, "grad_norm": 1.1015625, "learning_rate": 0.00029337378640776696, "loss": 0.9796, "step": 294 }, { "epoch": 0.11918589970203525, "grad_norm": 3.296875, "learning_rate": 0.00029334951456310674, "loss": 1.0246, "step": 295 }, { "epoch": 0.1195899197010252, "grad_norm": 0.83203125, "learning_rate": 0.00029332524271844657, "loss": 0.9388, "step": 296 }, { "epoch": 0.11999393970001515, "grad_norm": 0.765625, "learning_rate": 0.0002933009708737864, "loss": 1.0142, "step": 297 }, { "epoch": 0.1203979596990051, "grad_norm": 0.89453125, "learning_rate": 0.00029327669902912617, "loss": 1.0849, "step": 298 }, { "epoch": 0.12080197969799505, "grad_norm": 0.81640625, "learning_rate": 0.000293252427184466, "loss": 0.9702, "step": 299 }, { "epoch": 0.121205999696985, "grad_norm": 0.80859375, "learning_rate": 0.0002932281553398058, "loss": 1.1123, "step": 300 }, { "epoch": 0.12161001969597494, "grad_norm": 0.85546875, "learning_rate": 0.0002932038834951456, "loss": 0.9149, "step": 301 }, { "epoch": 0.1220140396949649, "grad_norm": 0.8125, "learning_rate": 0.00029317961165048543, "loss": 0.9886, "step": 302 }, { "epoch": 0.12241805969395485, "grad_norm": 0.83203125, "learning_rate": 0.0002931553398058252, "loss": 0.9678, "step": 303 }, { "epoch": 0.1228220796929448, "grad_norm": 0.75, "learning_rate": 0.00029313106796116504, "loss": 0.8933, "step": 304 }, { "epoch": 0.12322609969193475, "grad_norm": 0.81640625, "learning_rate": 0.0002931067961165048, "loss": 1.0086, "step": 305 }, { "epoch": 0.1236301196909247, "grad_norm": 0.67578125, "learning_rate": 0.00029308252427184464, "loss": 0.9656, "step": 306 }, { "epoch": 0.12403413968991465, "grad_norm": 0.73828125, "learning_rate": 0.0002930582524271845, "loss": 1.0638, "step": 307 }, { "epoch": 0.1244381596889046, "grad_norm": 0.69921875, "learning_rate": 0.00029303398058252425, "loss": 0.9599, "step": 308 }, { "epoch": 0.12484217968789454, "grad_norm": 0.75390625, "learning_rate": 0.0002930097087378641, "loss": 1.0094, "step": 309 }, { "epoch": 0.1252461996868845, "grad_norm": 0.76953125, "learning_rate": 0.00029298543689320385, "loss": 0.9661, "step": 310 }, { "epoch": 0.12565021968587445, "grad_norm": 0.96875, "learning_rate": 0.0002929611650485437, "loss": 0.865, "step": 311 }, { "epoch": 0.1260542396848644, "grad_norm": 0.828125, "learning_rate": 0.00029293689320388346, "loss": 1.0541, "step": 312 }, { "epoch": 0.12645825968385435, "grad_norm": 0.6484375, "learning_rate": 0.0002929126213592233, "loss": 0.8174, "step": 313 }, { "epoch": 0.1268622796828443, "grad_norm": 0.734375, "learning_rate": 0.00029288834951456306, "loss": 1.0021, "step": 314 }, { "epoch": 0.12726629968183426, "grad_norm": 0.66015625, "learning_rate": 0.0002928640776699029, "loss": 0.8684, "step": 315 }, { "epoch": 0.1276703196808242, "grad_norm": 0.66015625, "learning_rate": 0.00029283980582524267, "loss": 0.9502, "step": 316 }, { "epoch": 0.12807433967981416, "grad_norm": 0.71875, "learning_rate": 0.0002928155339805825, "loss": 1.0258, "step": 317 }, { "epoch": 0.1284783596788041, "grad_norm": 0.76953125, "learning_rate": 0.0002927912621359223, "loss": 0.998, "step": 318 }, { "epoch": 0.12888237967779406, "grad_norm": 0.76171875, "learning_rate": 0.0002927669902912621, "loss": 1.1369, "step": 319 }, { "epoch": 0.12928639967678401, "grad_norm": 0.67578125, "learning_rate": 0.00029274271844660193, "loss": 1.0052, "step": 320 }, { "epoch": 0.12969041967577394, "grad_norm": 0.72265625, "learning_rate": 0.0002927184466019417, "loss": 1.0297, "step": 321 }, { "epoch": 0.1300944396747639, "grad_norm": 0.6328125, "learning_rate": 0.00029269417475728154, "loss": 0.8447, "step": 322 }, { "epoch": 0.13049845967375384, "grad_norm": 0.69921875, "learning_rate": 0.00029266990291262136, "loss": 0.8856, "step": 323 }, { "epoch": 0.1309024796727438, "grad_norm": 0.7890625, "learning_rate": 0.00029264563106796114, "loss": 1.0237, "step": 324 }, { "epoch": 0.13130649967173375, "grad_norm": 0.7109375, "learning_rate": 0.0002926213592233009, "loss": 0.9483, "step": 325 }, { "epoch": 0.1317105196707237, "grad_norm": 0.69921875, "learning_rate": 0.00029259708737864074, "loss": 0.9272, "step": 326 }, { "epoch": 0.13211453966971365, "grad_norm": 0.6796875, "learning_rate": 0.0002925728155339806, "loss": 0.9646, "step": 327 }, { "epoch": 0.1325185596687036, "grad_norm": 0.6484375, "learning_rate": 0.00029254854368932035, "loss": 0.9909, "step": 328 }, { "epoch": 0.13292257966769355, "grad_norm": 0.765625, "learning_rate": 0.0002925242718446602, "loss": 1.0682, "step": 329 }, { "epoch": 0.1333265996666835, "grad_norm": 0.71875, "learning_rate": 0.00029249999999999995, "loss": 1.0289, "step": 330 }, { "epoch": 0.13373061966567346, "grad_norm": 0.74609375, "learning_rate": 0.0002924757281553398, "loss": 1.1211, "step": 331 }, { "epoch": 0.1341346396646634, "grad_norm": 0.6796875, "learning_rate": 0.0002924514563106796, "loss": 0.9447, "step": 332 }, { "epoch": 0.13453865966365336, "grad_norm": 0.578125, "learning_rate": 0.0002924271844660194, "loss": 0.9128, "step": 333 }, { "epoch": 0.1349426796626433, "grad_norm": 0.7109375, "learning_rate": 0.0002924029126213592, "loss": 0.9529, "step": 334 }, { "epoch": 0.13534669966163326, "grad_norm": 0.703125, "learning_rate": 0.000292378640776699, "loss": 0.92, "step": 335 }, { "epoch": 0.13575071966062321, "grad_norm": 0.73828125, "learning_rate": 0.0002923543689320388, "loss": 0.9382, "step": 336 }, { "epoch": 0.13615473965961314, "grad_norm": 0.78125, "learning_rate": 0.0002923300970873786, "loss": 0.9448, "step": 337 }, { "epoch": 0.1365587596586031, "grad_norm": 0.77734375, "learning_rate": 0.0002923058252427184, "loss": 1.0835, "step": 338 }, { "epoch": 0.13696277965759304, "grad_norm": 0.66015625, "learning_rate": 0.0002922815533980582, "loss": 0.9923, "step": 339 }, { "epoch": 0.137366799656583, "grad_norm": 0.78125, "learning_rate": 0.00029225728155339803, "loss": 1.0372, "step": 340 }, { "epoch": 0.13777081965557295, "grad_norm": 0.65234375, "learning_rate": 0.0002922330097087378, "loss": 0.8467, "step": 341 }, { "epoch": 0.1381748396545629, "grad_norm": 0.71875, "learning_rate": 0.00029220873786407764, "loss": 1.0142, "step": 342 }, { "epoch": 0.13857885965355285, "grad_norm": 0.61328125, "learning_rate": 0.00029218446601941747, "loss": 0.9025, "step": 343 }, { "epoch": 0.1389828796525428, "grad_norm": 0.71484375, "learning_rate": 0.00029216019417475724, "loss": 0.921, "step": 344 }, { "epoch": 0.13938689965153275, "grad_norm": 0.69140625, "learning_rate": 0.00029213592233009707, "loss": 1.0033, "step": 345 }, { "epoch": 0.1397909196505227, "grad_norm": 0.68359375, "learning_rate": 0.00029211165048543685, "loss": 0.8607, "step": 346 }, { "epoch": 0.14019493964951266, "grad_norm": 0.64453125, "learning_rate": 0.0002920873786407767, "loss": 0.9075, "step": 347 }, { "epoch": 0.1405989596485026, "grad_norm": 0.74609375, "learning_rate": 0.0002920631067961165, "loss": 1.0099, "step": 348 }, { "epoch": 0.14100297964749256, "grad_norm": 0.83203125, "learning_rate": 0.0002920388349514563, "loss": 0.9787, "step": 349 }, { "epoch": 0.1414069996464825, "grad_norm": 1.59375, "learning_rate": 0.0002920145631067961, "loss": 1.046, "step": 350 }, { "epoch": 0.14181101964547246, "grad_norm": 0.69140625, "learning_rate": 0.0002919902912621359, "loss": 0.95, "step": 351 }, { "epoch": 0.1422150396444624, "grad_norm": 0.68359375, "learning_rate": 0.0002919660194174757, "loss": 0.9394, "step": 352 }, { "epoch": 0.14261905964345234, "grad_norm": 0.8359375, "learning_rate": 0.00029194174757281554, "loss": 1.0869, "step": 353 }, { "epoch": 0.1430230796424423, "grad_norm": 0.66796875, "learning_rate": 0.0002919174757281553, "loss": 0.8593, "step": 354 }, { "epoch": 0.14342709964143224, "grad_norm": 0.8046875, "learning_rate": 0.0002918932038834951, "loss": 0.9251, "step": 355 }, { "epoch": 0.1438311196404222, "grad_norm": 0.72265625, "learning_rate": 0.0002918689320388349, "loss": 1.0662, "step": 356 }, { "epoch": 0.14423513963941215, "grad_norm": 0.6796875, "learning_rate": 0.00029184466019417475, "loss": 0.9809, "step": 357 }, { "epoch": 0.1446391596384021, "grad_norm": 0.62109375, "learning_rate": 0.00029182038834951453, "loss": 0.9219, "step": 358 }, { "epoch": 0.14504317963739205, "grad_norm": 0.6640625, "learning_rate": 0.00029179611650485436, "loss": 0.9277, "step": 359 }, { "epoch": 0.145447199636382, "grad_norm": 0.65234375, "learning_rate": 0.00029177184466019413, "loss": 0.9504, "step": 360 }, { "epoch": 0.14585121963537195, "grad_norm": 0.6640625, "learning_rate": 0.00029174757281553396, "loss": 0.947, "step": 361 }, { "epoch": 0.1462552396343619, "grad_norm": 0.6953125, "learning_rate": 0.00029172330097087374, "loss": 0.978, "step": 362 }, { "epoch": 0.14665925963335186, "grad_norm": 0.66015625, "learning_rate": 0.00029169902912621357, "loss": 0.8699, "step": 363 }, { "epoch": 0.1470632796323418, "grad_norm": 0.83203125, "learning_rate": 0.0002916747572815534, "loss": 1.1887, "step": 364 }, { "epoch": 0.14746729963133176, "grad_norm": 1.046875, "learning_rate": 0.00029165048543689317, "loss": 1.0645, "step": 365 }, { "epoch": 0.1478713196303217, "grad_norm": 0.76953125, "learning_rate": 0.00029162621359223295, "loss": 0.9479, "step": 366 }, { "epoch": 0.14827533962931166, "grad_norm": 0.79296875, "learning_rate": 0.0002916019417475728, "loss": 1.1128, "step": 367 }, { "epoch": 0.1486793596283016, "grad_norm": 0.6328125, "learning_rate": 0.0002915776699029126, "loss": 0.8824, "step": 368 }, { "epoch": 0.14908337962729154, "grad_norm": 0.83984375, "learning_rate": 0.0002915533980582524, "loss": 1.0144, "step": 369 }, { "epoch": 0.1494873996262815, "grad_norm": 0.78125, "learning_rate": 0.0002915291262135922, "loss": 0.9871, "step": 370 }, { "epoch": 0.14989141962527144, "grad_norm": 0.640625, "learning_rate": 0.000291504854368932, "loss": 0.9508, "step": 371 }, { "epoch": 0.1502954396242614, "grad_norm": 0.73046875, "learning_rate": 0.0002914805825242718, "loss": 0.9727, "step": 372 }, { "epoch": 0.15069945962325135, "grad_norm": 0.796875, "learning_rate": 0.00029145631067961164, "loss": 0.9153, "step": 373 }, { "epoch": 0.1511034796222413, "grad_norm": 0.671875, "learning_rate": 0.0002914320388349514, "loss": 0.9147, "step": 374 }, { "epoch": 0.15150749962123125, "grad_norm": 0.6875, "learning_rate": 0.00029140776699029125, "loss": 0.9147, "step": 375 }, { "epoch": 0.1519115196202212, "grad_norm": 0.84765625, "learning_rate": 0.000291383495145631, "loss": 1.018, "step": 376 }, { "epoch": 0.15231553961921115, "grad_norm": 0.71875, "learning_rate": 0.00029135922330097085, "loss": 0.8901, "step": 377 }, { "epoch": 0.1527195596182011, "grad_norm": 0.94921875, "learning_rate": 0.0002913349514563107, "loss": 0.9848, "step": 378 }, { "epoch": 0.15312357961719106, "grad_norm": 0.75, "learning_rate": 0.00029131067961165046, "loss": 1.0765, "step": 379 }, { "epoch": 0.153527599616181, "grad_norm": 0.66796875, "learning_rate": 0.0002912864077669903, "loss": 0.9138, "step": 380 }, { "epoch": 0.15393161961517096, "grad_norm": 0.78125, "learning_rate": 0.00029126213592233006, "loss": 0.8982, "step": 381 }, { "epoch": 0.1543356396141609, "grad_norm": 0.671875, "learning_rate": 0.0002912378640776699, "loss": 0.9418, "step": 382 }, { "epoch": 0.15473965961315084, "grad_norm": 0.7421875, "learning_rate": 0.0002912135922330097, "loss": 0.9668, "step": 383 }, { "epoch": 0.1551436796121408, "grad_norm": 0.68359375, "learning_rate": 0.0002911893203883495, "loss": 0.9175, "step": 384 }, { "epoch": 0.15554769961113074, "grad_norm": 0.82421875, "learning_rate": 0.00029116504854368927, "loss": 1.0371, "step": 385 }, { "epoch": 0.1559517196101207, "grad_norm": 0.7421875, "learning_rate": 0.0002911407766990291, "loss": 1.0674, "step": 386 }, { "epoch": 0.15635573960911064, "grad_norm": 0.80859375, "learning_rate": 0.0002911165048543689, "loss": 1.0343, "step": 387 }, { "epoch": 0.1567597596081006, "grad_norm": 1.4609375, "learning_rate": 0.0002910922330097087, "loss": 0.9558, "step": 388 }, { "epoch": 0.15716377960709055, "grad_norm": 0.71484375, "learning_rate": 0.00029106796116504854, "loss": 1.0743, "step": 389 }, { "epoch": 0.1575677996060805, "grad_norm": 0.6875, "learning_rate": 0.0002910436893203883, "loss": 1.0006, "step": 390 }, { "epoch": 0.15797181960507045, "grad_norm": 0.6953125, "learning_rate": 0.00029101941747572814, "loss": 1.0064, "step": 391 }, { "epoch": 0.1583758396040604, "grad_norm": 0.65234375, "learning_rate": 0.0002909951456310679, "loss": 1.0054, "step": 392 }, { "epoch": 0.15877985960305036, "grad_norm": 0.6796875, "learning_rate": 0.00029097087378640775, "loss": 0.821, "step": 393 }, { "epoch": 0.1591838796020403, "grad_norm": 0.66015625, "learning_rate": 0.0002909466019417476, "loss": 0.9554, "step": 394 }, { "epoch": 0.15958789960103026, "grad_norm": 0.5859375, "learning_rate": 0.00029092233009708735, "loss": 0.8887, "step": 395 }, { "epoch": 0.1599919196000202, "grad_norm": 0.75, "learning_rate": 0.0002908980582524271, "loss": 1.0168, "step": 396 }, { "epoch": 0.16039593959901016, "grad_norm": 0.67578125, "learning_rate": 0.00029087378640776695, "loss": 0.9522, "step": 397 }, { "epoch": 0.16079995959800011, "grad_norm": 0.76953125, "learning_rate": 0.0002908495145631068, "loss": 0.96, "step": 398 }, { "epoch": 0.16120397959699004, "grad_norm": 0.6484375, "learning_rate": 0.00029082524271844656, "loss": 0.9688, "step": 399 }, { "epoch": 0.16160799959598, "grad_norm": 0.6484375, "learning_rate": 0.0002908009708737864, "loss": 0.9729, "step": 400 }, { "epoch": 0.16201201959496994, "grad_norm": 0.73046875, "learning_rate": 0.00029077669902912616, "loss": 0.9569, "step": 401 }, { "epoch": 0.1624160395939599, "grad_norm": 0.69921875, "learning_rate": 0.000290752427184466, "loss": 0.9364, "step": 402 }, { "epoch": 0.16282005959294985, "grad_norm": 0.625, "learning_rate": 0.0002907281553398058, "loss": 0.8842, "step": 403 }, { "epoch": 0.1632240795919398, "grad_norm": 0.6953125, "learning_rate": 0.0002907038834951456, "loss": 0.8996, "step": 404 }, { "epoch": 0.16362809959092975, "grad_norm": 0.58984375, "learning_rate": 0.00029067961165048543, "loss": 0.8592, "step": 405 }, { "epoch": 0.1640321195899197, "grad_norm": 0.58984375, "learning_rate": 0.0002906553398058252, "loss": 0.946, "step": 406 }, { "epoch": 0.16443613958890965, "grad_norm": 0.59765625, "learning_rate": 0.00029063106796116503, "loss": 0.9374, "step": 407 }, { "epoch": 0.1648401595878996, "grad_norm": 0.64453125, "learning_rate": 0.00029060679611650486, "loss": 0.9407, "step": 408 }, { "epoch": 0.16524417958688956, "grad_norm": 0.66796875, "learning_rate": 0.00029058252427184464, "loss": 0.9434, "step": 409 }, { "epoch": 0.1656481995858795, "grad_norm": 0.6015625, "learning_rate": 0.00029055825242718447, "loss": 0.8999, "step": 410 }, { "epoch": 0.16605221958486946, "grad_norm": 0.9765625, "learning_rate": 0.00029053398058252424, "loss": 0.9597, "step": 411 }, { "epoch": 0.1664562395838594, "grad_norm": 0.62109375, "learning_rate": 0.00029050970873786407, "loss": 0.9277, "step": 412 }, { "epoch": 0.16686025958284936, "grad_norm": 0.6796875, "learning_rate": 0.00029048543689320385, "loss": 0.9732, "step": 413 }, { "epoch": 0.16726427958183931, "grad_norm": 0.625, "learning_rate": 0.0002904611650485437, "loss": 0.9582, "step": 414 }, { "epoch": 0.16766829958082924, "grad_norm": 0.609375, "learning_rate": 0.00029043689320388345, "loss": 0.8973, "step": 415 }, { "epoch": 0.1680723195798192, "grad_norm": 0.640625, "learning_rate": 0.0002904126213592233, "loss": 0.9233, "step": 416 }, { "epoch": 0.16847633957880914, "grad_norm": 0.74609375, "learning_rate": 0.00029038834951456306, "loss": 1.0908, "step": 417 }, { "epoch": 0.1688803595777991, "grad_norm": 0.7265625, "learning_rate": 0.0002903640776699029, "loss": 0.9227, "step": 418 }, { "epoch": 0.16928437957678905, "grad_norm": 0.6328125, "learning_rate": 0.0002903398058252427, "loss": 0.8926, "step": 419 }, { "epoch": 0.169688399575779, "grad_norm": 0.859375, "learning_rate": 0.0002903155339805825, "loss": 1.0873, "step": 420 }, { "epoch": 0.17009241957476895, "grad_norm": 0.5625, "learning_rate": 0.0002902912621359223, "loss": 0.8927, "step": 421 }, { "epoch": 0.1704964395737589, "grad_norm": 0.96484375, "learning_rate": 0.0002902669902912621, "loss": 0.9399, "step": 422 }, { "epoch": 0.17090045957274885, "grad_norm": 1.03125, "learning_rate": 0.0002902427184466019, "loss": 0.9971, "step": 423 }, { "epoch": 0.1713044795717388, "grad_norm": 0.6328125, "learning_rate": 0.00029021844660194175, "loss": 0.9264, "step": 424 }, { "epoch": 0.17170849957072876, "grad_norm": 0.59765625, "learning_rate": 0.00029019417475728153, "loss": 0.8329, "step": 425 }, { "epoch": 0.1721125195697187, "grad_norm": 0.70703125, "learning_rate": 0.0002901699029126213, "loss": 0.9387, "step": 426 }, { "epoch": 0.17251653956870866, "grad_norm": 0.60546875, "learning_rate": 0.00029014563106796113, "loss": 0.9553, "step": 427 }, { "epoch": 0.1729205595676986, "grad_norm": 0.71484375, "learning_rate": 0.00029012135922330096, "loss": 1.0072, "step": 428 }, { "epoch": 0.17332457956668856, "grad_norm": 0.91796875, "learning_rate": 0.00029009708737864074, "loss": 1.0054, "step": 429 }, { "epoch": 0.1737285995656785, "grad_norm": 0.6640625, "learning_rate": 0.00029007281553398057, "loss": 0.7583, "step": 430 }, { "epoch": 0.17413261956466844, "grad_norm": 0.640625, "learning_rate": 0.00029004854368932034, "loss": 0.8443, "step": 431 }, { "epoch": 0.1745366395636584, "grad_norm": 0.71484375, "learning_rate": 0.00029002427184466017, "loss": 0.9096, "step": 432 }, { "epoch": 0.17494065956264834, "grad_norm": 0.6328125, "learning_rate": 0.00029, "loss": 0.8336, "step": 433 }, { "epoch": 0.1753446795616383, "grad_norm": 0.6015625, "learning_rate": 0.0002899757281553398, "loss": 0.8841, "step": 434 }, { "epoch": 0.17574869956062825, "grad_norm": 0.6484375, "learning_rate": 0.0002899514563106796, "loss": 1.0032, "step": 435 }, { "epoch": 0.1761527195596182, "grad_norm": 0.80078125, "learning_rate": 0.0002899271844660194, "loss": 0.9524, "step": 436 }, { "epoch": 0.17655673955860815, "grad_norm": 0.58203125, "learning_rate": 0.0002899029126213592, "loss": 0.8574, "step": 437 }, { "epoch": 0.1769607595575981, "grad_norm": 1.015625, "learning_rate": 0.000289878640776699, "loss": 1.0049, "step": 438 }, { "epoch": 0.17736477955658805, "grad_norm": 0.7890625, "learning_rate": 0.0002898543689320388, "loss": 1.0577, "step": 439 }, { "epoch": 0.177768799555578, "grad_norm": 0.82421875, "learning_rate": 0.0002898300970873786, "loss": 0.9255, "step": 440 }, { "epoch": 0.17817281955456796, "grad_norm": 0.59765625, "learning_rate": 0.0002898058252427184, "loss": 0.8805, "step": 441 }, { "epoch": 0.1785768395535579, "grad_norm": 1.46875, "learning_rate": 0.0002897815533980582, "loss": 1.116, "step": 442 }, { "epoch": 0.17898085955254786, "grad_norm": 0.66796875, "learning_rate": 0.000289757281553398, "loss": 0.8507, "step": 443 }, { "epoch": 0.1793848795515378, "grad_norm": 0.72265625, "learning_rate": 0.00028973300970873785, "loss": 0.9159, "step": 444 }, { "epoch": 0.17978889955052776, "grad_norm": 0.6796875, "learning_rate": 0.00028970873786407763, "loss": 0.9899, "step": 445 }, { "epoch": 0.1801929195495177, "grad_norm": 0.6796875, "learning_rate": 0.00028968446601941746, "loss": 1.0286, "step": 446 }, { "epoch": 0.18059693954850764, "grad_norm": 0.91015625, "learning_rate": 0.00028966019417475723, "loss": 1.0754, "step": 447 }, { "epoch": 0.1810009595474976, "grad_norm": 0.71484375, "learning_rate": 0.00028963592233009706, "loss": 0.9649, "step": 448 }, { "epoch": 0.18140497954648754, "grad_norm": 0.72265625, "learning_rate": 0.0002896116504854369, "loss": 0.9307, "step": 449 }, { "epoch": 0.1818089995454775, "grad_norm": 0.74609375, "learning_rate": 0.00028958737864077667, "loss": 1.0451, "step": 450 }, { "epoch": 0.18221301954446745, "grad_norm": 0.71875, "learning_rate": 0.0002895631067961165, "loss": 1.0017, "step": 451 }, { "epoch": 0.1826170395434574, "grad_norm": 0.60546875, "learning_rate": 0.0002895388349514563, "loss": 0.8848, "step": 452 }, { "epoch": 0.18302105954244735, "grad_norm": 0.69921875, "learning_rate": 0.0002895145631067961, "loss": 0.9599, "step": 453 }, { "epoch": 0.1834250795414373, "grad_norm": 0.78125, "learning_rate": 0.00028949029126213593, "loss": 1.0509, "step": 454 }, { "epoch": 0.18382909954042725, "grad_norm": 0.64453125, "learning_rate": 0.0002894660194174757, "loss": 0.9038, "step": 455 }, { "epoch": 0.1842331195394172, "grad_norm": 0.62109375, "learning_rate": 0.0002894417475728155, "loss": 0.9076, "step": 456 }, { "epoch": 0.18463713953840716, "grad_norm": 0.64453125, "learning_rate": 0.0002894174757281553, "loss": 0.8815, "step": 457 }, { "epoch": 0.1850411595373971, "grad_norm": 0.76171875, "learning_rate": 0.00028939320388349514, "loss": 1.1018, "step": 458 }, { "epoch": 0.18544517953638706, "grad_norm": 0.734375, "learning_rate": 0.0002893689320388349, "loss": 0.8771, "step": 459 }, { "epoch": 0.185849199535377, "grad_norm": 0.64453125, "learning_rate": 0.00028934466019417475, "loss": 0.9859, "step": 460 }, { "epoch": 0.18625321953436696, "grad_norm": 0.7890625, "learning_rate": 0.0002893203883495145, "loss": 1.0455, "step": 461 }, { "epoch": 0.1866572395333569, "grad_norm": 0.546875, "learning_rate": 0.00028929611650485435, "loss": 0.8593, "step": 462 }, { "epoch": 0.18706125953234684, "grad_norm": 0.71875, "learning_rate": 0.0002892718446601941, "loss": 0.9824, "step": 463 }, { "epoch": 0.1874652795313368, "grad_norm": 0.69140625, "learning_rate": 0.00028924757281553396, "loss": 0.9396, "step": 464 }, { "epoch": 0.18786929953032674, "grad_norm": 0.734375, "learning_rate": 0.0002892233009708738, "loss": 0.9899, "step": 465 }, { "epoch": 0.1882733195293167, "grad_norm": 0.6875, "learning_rate": 0.00028919902912621356, "loss": 1.0053, "step": 466 }, { "epoch": 0.18867733952830665, "grad_norm": 0.6875, "learning_rate": 0.00028917475728155334, "loss": 0.9602, "step": 467 }, { "epoch": 0.1890813595272966, "grad_norm": 0.71875, "learning_rate": 0.00028915048543689316, "loss": 0.9674, "step": 468 }, { "epoch": 0.18948537952628655, "grad_norm": 0.6484375, "learning_rate": 0.000289126213592233, "loss": 0.9548, "step": 469 }, { "epoch": 0.1898893995252765, "grad_norm": 0.6484375, "learning_rate": 0.00028910194174757277, "loss": 0.9667, "step": 470 }, { "epoch": 0.19029341952426646, "grad_norm": 0.66015625, "learning_rate": 0.0002890776699029126, "loss": 0.9601, "step": 471 }, { "epoch": 0.1906974395232564, "grad_norm": 1.390625, "learning_rate": 0.0002890533980582524, "loss": 1.0437, "step": 472 }, { "epoch": 0.19110145952224636, "grad_norm": 0.71484375, "learning_rate": 0.0002890291262135922, "loss": 1.013, "step": 473 }, { "epoch": 0.1915054795212363, "grad_norm": 0.73046875, "learning_rate": 0.00028900485436893203, "loss": 0.9955, "step": 474 }, { "epoch": 0.19190949952022626, "grad_norm": 6.5, "learning_rate": 0.0002889805825242718, "loss": 1.217, "step": 475 }, { "epoch": 0.19231351951921621, "grad_norm": 0.6171875, "learning_rate": 0.00028895631067961164, "loss": 0.9331, "step": 476 }, { "epoch": 0.19271753951820614, "grad_norm": 0.8046875, "learning_rate": 0.0002889320388349514, "loss": 0.9918, "step": 477 }, { "epoch": 0.1931215595171961, "grad_norm": 0.703125, "learning_rate": 0.00028890776699029124, "loss": 0.9231, "step": 478 }, { "epoch": 0.19352557951618604, "grad_norm": 0.58203125, "learning_rate": 0.00028888349514563107, "loss": 0.8452, "step": 479 }, { "epoch": 0.193929599515176, "grad_norm": 0.625, "learning_rate": 0.00028885922330097085, "loss": 0.9044, "step": 480 }, { "epoch": 0.19433361951416595, "grad_norm": 0.90625, "learning_rate": 0.0002888349514563107, "loss": 0.9561, "step": 481 }, { "epoch": 0.1947376395131559, "grad_norm": 0.68359375, "learning_rate": 0.00028881067961165045, "loss": 0.9036, "step": 482 }, { "epoch": 0.19514165951214585, "grad_norm": 0.609375, "learning_rate": 0.0002887864077669903, "loss": 0.8151, "step": 483 }, { "epoch": 0.1955456795111358, "grad_norm": 0.68359375, "learning_rate": 0.0002887621359223301, "loss": 0.8803, "step": 484 }, { "epoch": 0.19594969951012575, "grad_norm": 0.73828125, "learning_rate": 0.0002887378640776699, "loss": 0.9758, "step": 485 }, { "epoch": 0.1963537195091157, "grad_norm": 0.6171875, "learning_rate": 0.00028871359223300966, "loss": 0.8342, "step": 486 }, { "epoch": 0.19675773950810566, "grad_norm": 0.73828125, "learning_rate": 0.0002886893203883495, "loss": 0.9107, "step": 487 }, { "epoch": 0.1971617595070956, "grad_norm": 0.62890625, "learning_rate": 0.00028866504854368927, "loss": 0.937, "step": 488 }, { "epoch": 0.19756577950608556, "grad_norm": 0.76171875, "learning_rate": 0.0002886407766990291, "loss": 0.9575, "step": 489 }, { "epoch": 0.1979697995050755, "grad_norm": 0.796875, "learning_rate": 0.0002886165048543689, "loss": 0.9534, "step": 490 }, { "epoch": 0.19837381950406546, "grad_norm": 0.77734375, "learning_rate": 0.0002885922330097087, "loss": 0.9845, "step": 491 }, { "epoch": 0.19877783950305541, "grad_norm": 0.78125, "learning_rate": 0.00028856796116504853, "loss": 1.0751, "step": 492 }, { "epoch": 0.19918185950204534, "grad_norm": 0.73828125, "learning_rate": 0.0002885436893203883, "loss": 0.9038, "step": 493 }, { "epoch": 0.1995858795010353, "grad_norm": 0.6875, "learning_rate": 0.00028851941747572813, "loss": 0.9239, "step": 494 }, { "epoch": 0.19998989950002524, "grad_norm": 0.671875, "learning_rate": 0.00028849514563106796, "loss": 0.9502, "step": 495 }, { "epoch": 0.2003939194990152, "grad_norm": 0.609375, "learning_rate": 0.00028847087378640774, "loss": 0.8342, "step": 496 }, { "epoch": 0.20079793949800515, "grad_norm": 0.62890625, "learning_rate": 0.0002884466019417475, "loss": 0.8999, "step": 497 }, { "epoch": 0.2012019594969951, "grad_norm": 0.71875, "learning_rate": 0.00028842233009708734, "loss": 1.0026, "step": 498 }, { "epoch": 0.20160597949598505, "grad_norm": 0.625, "learning_rate": 0.0002883980582524272, "loss": 0.957, "step": 499 }, { "epoch": 0.202009999494975, "grad_norm": 0.609375, "learning_rate": 0.00028837378640776695, "loss": 0.8801, "step": 500 }, { "epoch": 0.20241401949396495, "grad_norm": 0.7578125, "learning_rate": 0.0002883495145631068, "loss": 0.9967, "step": 501 }, { "epoch": 0.2028180394929549, "grad_norm": 0.79296875, "learning_rate": 0.00028832524271844655, "loss": 0.9743, "step": 502 }, { "epoch": 0.20322205949194486, "grad_norm": 0.68359375, "learning_rate": 0.0002883009708737864, "loss": 1.0198, "step": 503 }, { "epoch": 0.2036260794909348, "grad_norm": 0.69921875, "learning_rate": 0.0002882766990291262, "loss": 0.9923, "step": 504 }, { "epoch": 0.20403009948992476, "grad_norm": 0.75390625, "learning_rate": 0.000288252427184466, "loss": 1.0495, "step": 505 }, { "epoch": 0.2044341194889147, "grad_norm": 0.73828125, "learning_rate": 0.0002882281553398058, "loss": 1.0756, "step": 506 }, { "epoch": 0.20483813948790466, "grad_norm": 0.65625, "learning_rate": 0.0002882038834951456, "loss": 0.8983, "step": 507 }, { "epoch": 0.2052421594868946, "grad_norm": 0.69140625, "learning_rate": 0.0002881796116504854, "loss": 0.9395, "step": 508 }, { "epoch": 0.20564617948588454, "grad_norm": 0.57421875, "learning_rate": 0.00028815533980582525, "loss": 0.8725, "step": 509 }, { "epoch": 0.2060501994848745, "grad_norm": 0.77734375, "learning_rate": 0.000288131067961165, "loss": 1.0873, "step": 510 }, { "epoch": 0.20645421948386444, "grad_norm": 0.65234375, "learning_rate": 0.00028810679611650486, "loss": 0.9895, "step": 511 }, { "epoch": 0.2068582394828544, "grad_norm": 0.734375, "learning_rate": 0.00028808252427184463, "loss": 0.884, "step": 512 }, { "epoch": 0.20726225948184435, "grad_norm": 0.58984375, "learning_rate": 0.0002880582524271844, "loss": 0.869, "step": 513 }, { "epoch": 0.2076662794808343, "grad_norm": 0.8203125, "learning_rate": 0.00028803398058252424, "loss": 0.9871, "step": 514 }, { "epoch": 0.20807029947982425, "grad_norm": 0.65234375, "learning_rate": 0.00028800970873786406, "loss": 0.8309, "step": 515 }, { "epoch": 0.2084743194788142, "grad_norm": 0.8046875, "learning_rate": 0.00028798543689320384, "loss": 1.0642, "step": 516 }, { "epoch": 0.20887833947780415, "grad_norm": 0.66015625, "learning_rate": 0.00028796116504854367, "loss": 0.9013, "step": 517 }, { "epoch": 0.2092823594767941, "grad_norm": 0.89453125, "learning_rate": 0.00028793689320388344, "loss": 1.138, "step": 518 }, { "epoch": 0.20968637947578406, "grad_norm": 0.76953125, "learning_rate": 0.0002879126213592233, "loss": 0.8747, "step": 519 }, { "epoch": 0.210090399474774, "grad_norm": 0.9296875, "learning_rate": 0.0002878883495145631, "loss": 1.0327, "step": 520 }, { "epoch": 0.21049441947376396, "grad_norm": 0.6640625, "learning_rate": 0.0002878640776699029, "loss": 0.9591, "step": 521 }, { "epoch": 0.2108984394727539, "grad_norm": 0.6796875, "learning_rate": 0.0002878398058252427, "loss": 0.9316, "step": 522 }, { "epoch": 0.21130245947174386, "grad_norm": 0.7109375, "learning_rate": 0.0002878155339805825, "loss": 1.0135, "step": 523 }, { "epoch": 0.2117064794707338, "grad_norm": 0.7265625, "learning_rate": 0.0002877912621359223, "loss": 0.884, "step": 524 }, { "epoch": 0.21211049946972374, "grad_norm": 0.93359375, "learning_rate": 0.00028776699029126214, "loss": 1.0663, "step": 525 }, { "epoch": 0.2125145194687137, "grad_norm": 0.65234375, "learning_rate": 0.0002877427184466019, "loss": 0.892, "step": 526 }, { "epoch": 0.21291853946770364, "grad_norm": 0.83203125, "learning_rate": 0.0002877184466019417, "loss": 1.0309, "step": 527 }, { "epoch": 0.2133225594666936, "grad_norm": 0.65234375, "learning_rate": 0.0002876941747572815, "loss": 0.8718, "step": 528 }, { "epoch": 0.21372657946568355, "grad_norm": 0.63671875, "learning_rate": 0.00028766990291262135, "loss": 0.983, "step": 529 }, { "epoch": 0.2141305994646735, "grad_norm": 0.65234375, "learning_rate": 0.0002876456310679611, "loss": 0.9089, "step": 530 }, { "epoch": 0.21453461946366345, "grad_norm": 0.703125, "learning_rate": 0.00028762135922330096, "loss": 1.0427, "step": 531 }, { "epoch": 0.2149386394626534, "grad_norm": 0.6015625, "learning_rate": 0.00028759708737864073, "loss": 0.9013, "step": 532 }, { "epoch": 0.21534265946164335, "grad_norm": 0.65234375, "learning_rate": 0.00028757281553398056, "loss": 0.8577, "step": 533 }, { "epoch": 0.2157466794606333, "grad_norm": 0.6171875, "learning_rate": 0.0002875485436893204, "loss": 0.896, "step": 534 }, { "epoch": 0.21615069945962326, "grad_norm": 0.640625, "learning_rate": 0.00028752427184466017, "loss": 0.8859, "step": 535 }, { "epoch": 0.2165547194586132, "grad_norm": 0.69140625, "learning_rate": 0.0002875, "loss": 0.8967, "step": 536 }, { "epoch": 0.21695873945760316, "grad_norm": 0.66015625, "learning_rate": 0.00028747572815533977, "loss": 0.9091, "step": 537 }, { "epoch": 0.2173627594565931, "grad_norm": 0.625, "learning_rate": 0.0002874514563106796, "loss": 0.9385, "step": 538 }, { "epoch": 0.21776677945558306, "grad_norm": 1.0703125, "learning_rate": 0.0002874271844660194, "loss": 1.0022, "step": 539 }, { "epoch": 0.218170799454573, "grad_norm": 0.546875, "learning_rate": 0.0002874029126213592, "loss": 0.8623, "step": 540 }, { "epoch": 0.21857481945356294, "grad_norm": 0.8046875, "learning_rate": 0.000287378640776699, "loss": 1.0123, "step": 541 }, { "epoch": 0.2189788394525529, "grad_norm": 0.67578125, "learning_rate": 0.0002873543689320388, "loss": 0.9064, "step": 542 }, { "epoch": 0.21938285945154284, "grad_norm": 0.57421875, "learning_rate": 0.0002873300970873786, "loss": 0.8887, "step": 543 }, { "epoch": 0.2197868794505328, "grad_norm": 1.8203125, "learning_rate": 0.0002873058252427184, "loss": 0.9723, "step": 544 }, { "epoch": 0.22019089944952275, "grad_norm": 0.546875, "learning_rate": 0.00028728155339805824, "loss": 0.8342, "step": 545 }, { "epoch": 0.2205949194485127, "grad_norm": 0.68359375, "learning_rate": 0.000287257281553398, "loss": 0.9256, "step": 546 }, { "epoch": 0.22099893944750265, "grad_norm": 0.6171875, "learning_rate": 0.00028723300970873785, "loss": 0.924, "step": 547 }, { "epoch": 0.2214029594464926, "grad_norm": 0.6171875, "learning_rate": 0.0002872087378640776, "loss": 0.9493, "step": 548 }, { "epoch": 0.22180697944548255, "grad_norm": 0.6328125, "learning_rate": 0.00028718446601941745, "loss": 0.7858, "step": 549 }, { "epoch": 0.2222109994444725, "grad_norm": 0.62890625, "learning_rate": 0.0002871601941747573, "loss": 0.9084, "step": 550 }, { "epoch": 0.22261501944346246, "grad_norm": 0.64453125, "learning_rate": 0.00028713592233009706, "loss": 0.887, "step": 551 }, { "epoch": 0.2230190394424524, "grad_norm": 0.7421875, "learning_rate": 0.0002871116504854369, "loss": 1.0842, "step": 552 }, { "epoch": 0.22342305944144236, "grad_norm": 0.734375, "learning_rate": 0.00028708737864077666, "loss": 0.9642, "step": 553 }, { "epoch": 0.2238270794404323, "grad_norm": 0.6875, "learning_rate": 0.0002870631067961165, "loss": 0.9266, "step": 554 }, { "epoch": 0.22423109943942224, "grad_norm": 0.63671875, "learning_rate": 0.0002870388349514563, "loss": 0.8952, "step": 555 }, { "epoch": 0.2246351194384122, "grad_norm": 0.5703125, "learning_rate": 0.0002870145631067961, "loss": 0.8276, "step": 556 }, { "epoch": 0.22503913943740214, "grad_norm": 0.58203125, "learning_rate": 0.00028699029126213587, "loss": 0.8826, "step": 557 }, { "epoch": 0.2254431594363921, "grad_norm": 0.69140625, "learning_rate": 0.0002869660194174757, "loss": 0.8993, "step": 558 }, { "epoch": 0.22584717943538204, "grad_norm": 0.640625, "learning_rate": 0.00028694174757281553, "loss": 0.9939, "step": 559 }, { "epoch": 0.226251199434372, "grad_norm": 0.67578125, "learning_rate": 0.0002869174757281553, "loss": 0.8858, "step": 560 }, { "epoch": 0.22665521943336195, "grad_norm": 0.7265625, "learning_rate": 0.00028689320388349513, "loss": 1.034, "step": 561 }, { "epoch": 0.2270592394323519, "grad_norm": 0.76171875, "learning_rate": 0.0002868689320388349, "loss": 0.9795, "step": 562 }, { "epoch": 0.22746325943134185, "grad_norm": 0.765625, "learning_rate": 0.00028684466019417474, "loss": 1.0035, "step": 563 }, { "epoch": 0.2278672794303318, "grad_norm": 0.6640625, "learning_rate": 0.0002868203883495145, "loss": 0.9007, "step": 564 }, { "epoch": 0.22827129942932176, "grad_norm": 1.125, "learning_rate": 0.00028679611650485434, "loss": 0.9216, "step": 565 }, { "epoch": 0.2286753194283117, "grad_norm": 0.59375, "learning_rate": 0.0002867718446601942, "loss": 0.8631, "step": 566 }, { "epoch": 0.22907933942730166, "grad_norm": 1.4140625, "learning_rate": 0.00028674757281553395, "loss": 1.0407, "step": 567 }, { "epoch": 0.2294833594262916, "grad_norm": 0.69921875, "learning_rate": 0.0002867233009708737, "loss": 0.8925, "step": 568 }, { "epoch": 0.22988737942528156, "grad_norm": 1.390625, "learning_rate": 0.00028669902912621355, "loss": 1.1478, "step": 569 }, { "epoch": 0.23029139942427151, "grad_norm": 0.69140625, "learning_rate": 0.0002866747572815534, "loss": 0.9599, "step": 570 }, { "epoch": 0.23069541942326144, "grad_norm": 0.796875, "learning_rate": 0.00028665048543689316, "loss": 1.0164, "step": 571 }, { "epoch": 0.2310994394222514, "grad_norm": 0.69921875, "learning_rate": 0.000286626213592233, "loss": 0.8927, "step": 572 }, { "epoch": 0.23150345942124134, "grad_norm": 0.6328125, "learning_rate": 0.00028660194174757276, "loss": 0.9198, "step": 573 }, { "epoch": 0.2319074794202313, "grad_norm": 0.8515625, "learning_rate": 0.0002865776699029126, "loss": 1.0202, "step": 574 }, { "epoch": 0.23231149941922125, "grad_norm": 0.76953125, "learning_rate": 0.0002865533980582524, "loss": 0.9704, "step": 575 }, { "epoch": 0.2327155194182112, "grad_norm": 0.76171875, "learning_rate": 0.0002865291262135922, "loss": 1.1209, "step": 576 }, { "epoch": 0.23311953941720115, "grad_norm": 0.70703125, "learning_rate": 0.000286504854368932, "loss": 1.0094, "step": 577 }, { "epoch": 0.2335235594161911, "grad_norm": 0.62109375, "learning_rate": 0.0002864805825242718, "loss": 0.8508, "step": 578 }, { "epoch": 0.23392757941518105, "grad_norm": 0.734375, "learning_rate": 0.00028645631067961163, "loss": 1.1793, "step": 579 }, { "epoch": 0.234331599414171, "grad_norm": 0.7109375, "learning_rate": 0.00028643203883495146, "loss": 0.9887, "step": 580 }, { "epoch": 0.23473561941316096, "grad_norm": 0.63671875, "learning_rate": 0.00028640776699029124, "loss": 0.9601, "step": 581 }, { "epoch": 0.2351396394121509, "grad_norm": 0.70703125, "learning_rate": 0.00028638349514563107, "loss": 1.0661, "step": 582 }, { "epoch": 0.23554365941114086, "grad_norm": 0.60546875, "learning_rate": 0.00028635922330097084, "loss": 0.8554, "step": 583 }, { "epoch": 0.2359476794101308, "grad_norm": 0.859375, "learning_rate": 0.00028633495145631067, "loss": 0.8983, "step": 584 }, { "epoch": 0.23635169940912076, "grad_norm": 0.6015625, "learning_rate": 0.0002863106796116505, "loss": 0.9391, "step": 585 }, { "epoch": 0.23675571940811072, "grad_norm": 2.328125, "learning_rate": 0.0002862864077669903, "loss": 0.9954, "step": 586 }, { "epoch": 0.23715973940710064, "grad_norm": 0.578125, "learning_rate": 0.00028626213592233005, "loss": 0.8463, "step": 587 }, { "epoch": 0.2375637594060906, "grad_norm": 0.60546875, "learning_rate": 0.0002862378640776699, "loss": 0.8266, "step": 588 }, { "epoch": 0.23796777940508054, "grad_norm": 0.75390625, "learning_rate": 0.00028621359223300965, "loss": 1.1127, "step": 589 }, { "epoch": 0.2383717994040705, "grad_norm": 0.7109375, "learning_rate": 0.0002861893203883495, "loss": 1.0256, "step": 590 }, { "epoch": 0.23877581940306045, "grad_norm": 0.83203125, "learning_rate": 0.0002861650485436893, "loss": 0.834, "step": 591 }, { "epoch": 0.2391798394020504, "grad_norm": 0.6328125, "learning_rate": 0.0002861407766990291, "loss": 0.8038, "step": 592 }, { "epoch": 0.23958385940104035, "grad_norm": 0.66796875, "learning_rate": 0.0002861165048543689, "loss": 0.9478, "step": 593 }, { "epoch": 0.2399878794000303, "grad_norm": 0.79296875, "learning_rate": 0.0002860922330097087, "loss": 1.0882, "step": 594 }, { "epoch": 0.24039189939902025, "grad_norm": 0.6328125, "learning_rate": 0.0002860679611650485, "loss": 0.9208, "step": 595 }, { "epoch": 0.2407959193980102, "grad_norm": 0.60546875, "learning_rate": 0.00028604368932038835, "loss": 0.9013, "step": 596 }, { "epoch": 0.24119993939700016, "grad_norm": 0.6328125, "learning_rate": 0.00028601941747572813, "loss": 0.8794, "step": 597 }, { "epoch": 0.2416039593959901, "grad_norm": 0.6953125, "learning_rate": 0.0002859951456310679, "loss": 0.9299, "step": 598 }, { "epoch": 0.24200797939498006, "grad_norm": 0.78125, "learning_rate": 0.00028597087378640773, "loss": 1.034, "step": 599 }, { "epoch": 0.24241199939397, "grad_norm": 0.640625, "learning_rate": 0.00028594660194174756, "loss": 0.9038, "step": 600 }, { "epoch": 0.24281601939295996, "grad_norm": 0.7265625, "learning_rate": 0.00028592233009708734, "loss": 0.9749, "step": 601 }, { "epoch": 0.2432200393919499, "grad_norm": 0.79296875, "learning_rate": 0.00028589805825242717, "loss": 0.8349, "step": 602 }, { "epoch": 0.24362405939093984, "grad_norm": 0.6796875, "learning_rate": 0.00028587378640776694, "loss": 0.9917, "step": 603 }, { "epoch": 0.2440280793899298, "grad_norm": 0.6015625, "learning_rate": 0.00028584951456310677, "loss": 0.9405, "step": 604 }, { "epoch": 0.24443209938891974, "grad_norm": 0.6640625, "learning_rate": 0.0002858252427184466, "loss": 0.8991, "step": 605 }, { "epoch": 0.2448361193879097, "grad_norm": 0.69140625, "learning_rate": 0.0002858009708737864, "loss": 0.9084, "step": 606 }, { "epoch": 0.24524013938689965, "grad_norm": 0.6328125, "learning_rate": 0.0002857766990291262, "loss": 0.9519, "step": 607 }, { "epoch": 0.2456441593858896, "grad_norm": 0.640625, "learning_rate": 0.000285752427184466, "loss": 1.0394, "step": 608 }, { "epoch": 0.24604817938487955, "grad_norm": 0.6171875, "learning_rate": 0.0002857281553398058, "loss": 0.822, "step": 609 }, { "epoch": 0.2464521993838695, "grad_norm": 0.60546875, "learning_rate": 0.00028570388349514564, "loss": 0.8921, "step": 610 }, { "epoch": 0.24685621938285945, "grad_norm": 0.98046875, "learning_rate": 0.0002856796116504854, "loss": 1.0395, "step": 611 }, { "epoch": 0.2472602393818494, "grad_norm": 0.68359375, "learning_rate": 0.00028565533980582524, "loss": 1.0082, "step": 612 }, { "epoch": 0.24766425938083936, "grad_norm": 0.546875, "learning_rate": 0.000285631067961165, "loss": 0.8228, "step": 613 }, { "epoch": 0.2480682793798293, "grad_norm": 0.578125, "learning_rate": 0.0002856067961165048, "loss": 0.9422, "step": 614 }, { "epoch": 0.24847229937881926, "grad_norm": 0.640625, "learning_rate": 0.0002855825242718446, "loss": 0.8533, "step": 615 }, { "epoch": 0.2488763193778092, "grad_norm": 0.65234375, "learning_rate": 0.00028555825242718445, "loss": 0.9887, "step": 616 }, { "epoch": 0.24928033937679916, "grad_norm": 0.69140625, "learning_rate": 0.00028553398058252423, "loss": 0.9982, "step": 617 }, { "epoch": 0.2496843593757891, "grad_norm": 0.67578125, "learning_rate": 0.00028550970873786406, "loss": 0.9553, "step": 618 }, { "epoch": 0.25008837937477907, "grad_norm": 0.71875, "learning_rate": 0.00028548543689320383, "loss": 1.0163, "step": 619 }, { "epoch": 0.250492399373769, "grad_norm": 0.5859375, "learning_rate": 0.00028546116504854366, "loss": 0.8816, "step": 620 }, { "epoch": 0.25089641937275897, "grad_norm": 0.62890625, "learning_rate": 0.0002854368932038835, "loss": 0.8976, "step": 621 }, { "epoch": 0.2513004393717489, "grad_norm": 0.671875, "learning_rate": 0.00028541262135922327, "loss": 1.0007, "step": 622 }, { "epoch": 0.2517044593707389, "grad_norm": 0.56640625, "learning_rate": 0.0002853883495145631, "loss": 0.8655, "step": 623 }, { "epoch": 0.2521084793697288, "grad_norm": 0.63671875, "learning_rate": 0.00028536407766990287, "loss": 0.8199, "step": 624 }, { "epoch": 0.2525124993687188, "grad_norm": 0.69921875, "learning_rate": 0.0002853398058252427, "loss": 0.9979, "step": 625 }, { "epoch": 0.2529165193677087, "grad_norm": 0.5859375, "learning_rate": 0.00028531553398058253, "loss": 0.8656, "step": 626 }, { "epoch": 0.2533205393666986, "grad_norm": 0.64453125, "learning_rate": 0.0002852912621359223, "loss": 0.9553, "step": 627 }, { "epoch": 0.2537245593656886, "grad_norm": 0.61328125, "learning_rate": 0.0002852669902912621, "loss": 0.9857, "step": 628 }, { "epoch": 0.25412857936467853, "grad_norm": 0.72265625, "learning_rate": 0.0002852427184466019, "loss": 1.0002, "step": 629 }, { "epoch": 0.2545325993636685, "grad_norm": 0.6953125, "learning_rate": 0.00028521844660194174, "loss": 1.0198, "step": 630 }, { "epoch": 0.25493661936265843, "grad_norm": 0.65625, "learning_rate": 0.0002851941747572815, "loss": 0.9464, "step": 631 }, { "epoch": 0.2553406393616484, "grad_norm": 0.7265625, "learning_rate": 0.00028516990291262135, "loss": 0.9267, "step": 632 }, { "epoch": 0.25574465936063834, "grad_norm": 0.84765625, "learning_rate": 0.0002851456310679611, "loss": 1.0059, "step": 633 }, { "epoch": 0.2561486793596283, "grad_norm": 0.59765625, "learning_rate": 0.00028512135922330095, "loss": 0.8549, "step": 634 }, { "epoch": 0.25655269935861824, "grad_norm": 0.62109375, "learning_rate": 0.0002850970873786408, "loss": 0.9661, "step": 635 }, { "epoch": 0.2569567193576082, "grad_norm": 0.625, "learning_rate": 0.00028507281553398055, "loss": 0.8712, "step": 636 }, { "epoch": 0.25736073935659814, "grad_norm": 0.66015625, "learning_rate": 0.0002850485436893204, "loss": 0.8885, "step": 637 }, { "epoch": 0.2577647593555881, "grad_norm": 0.56640625, "learning_rate": 0.00028502427184466016, "loss": 0.8802, "step": 638 }, { "epoch": 0.25816877935457805, "grad_norm": 0.67578125, "learning_rate": 0.000285, "loss": 0.8805, "step": 639 }, { "epoch": 0.25857279935356803, "grad_norm": 0.59375, "learning_rate": 0.00028497572815533976, "loss": 0.8873, "step": 640 }, { "epoch": 0.25897681935255795, "grad_norm": 0.82421875, "learning_rate": 0.0002849514563106796, "loss": 1.1061, "step": 641 }, { "epoch": 0.2593808393515479, "grad_norm": 0.62109375, "learning_rate": 0.00028492718446601937, "loss": 0.8934, "step": 642 }, { "epoch": 0.25978485935053786, "grad_norm": 1.015625, "learning_rate": 0.0002849029126213592, "loss": 0.9627, "step": 643 }, { "epoch": 0.2601888793495278, "grad_norm": 0.57421875, "learning_rate": 0.000284878640776699, "loss": 0.8594, "step": 644 }, { "epoch": 0.26059289934851776, "grad_norm": 0.796875, "learning_rate": 0.0002848543689320388, "loss": 1.0497, "step": 645 }, { "epoch": 0.2609969193475077, "grad_norm": 0.65234375, "learning_rate": 0.00028483009708737863, "loss": 0.8962, "step": 646 }, { "epoch": 0.26140093934649766, "grad_norm": 0.55859375, "learning_rate": 0.0002848058252427184, "loss": 0.9408, "step": 647 }, { "epoch": 0.2618049593454876, "grad_norm": 0.578125, "learning_rate": 0.00028478155339805824, "loss": 0.9544, "step": 648 }, { "epoch": 0.26220897934447757, "grad_norm": 0.66796875, "learning_rate": 0.000284757281553398, "loss": 0.9871, "step": 649 }, { "epoch": 0.2626129993434675, "grad_norm": 0.6171875, "learning_rate": 0.00028473300970873784, "loss": 0.893, "step": 650 }, { "epoch": 0.26301701934245747, "grad_norm": 0.66796875, "learning_rate": 0.00028470873786407767, "loss": 1.0196, "step": 651 }, { "epoch": 0.2634210393414474, "grad_norm": 0.59375, "learning_rate": 0.00028468446601941745, "loss": 0.8983, "step": 652 }, { "epoch": 0.2638250593404374, "grad_norm": 0.6796875, "learning_rate": 0.0002846601941747573, "loss": 0.921, "step": 653 }, { "epoch": 0.2642290793394273, "grad_norm": 0.6328125, "learning_rate": 0.00028463592233009705, "loss": 1.0104, "step": 654 }, { "epoch": 0.2646330993384173, "grad_norm": 0.58203125, "learning_rate": 0.0002846116504854369, "loss": 0.9128, "step": 655 }, { "epoch": 0.2650371193374072, "grad_norm": 0.578125, "learning_rate": 0.0002845873786407767, "loss": 0.8141, "step": 656 }, { "epoch": 0.2654411393363971, "grad_norm": 0.66015625, "learning_rate": 0.0002845631067961165, "loss": 0.9438, "step": 657 }, { "epoch": 0.2658451593353871, "grad_norm": 0.640625, "learning_rate": 0.00028453883495145626, "loss": 0.9011, "step": 658 }, { "epoch": 0.26624917933437703, "grad_norm": 4.71875, "learning_rate": 0.0002845145631067961, "loss": 1.2513, "step": 659 }, { "epoch": 0.266653199333367, "grad_norm": 0.84375, "learning_rate": 0.0002844902912621359, "loss": 1.1288, "step": 660 }, { "epoch": 0.26705721933235693, "grad_norm": 0.83203125, "learning_rate": 0.0002844660194174757, "loss": 1.1012, "step": 661 }, { "epoch": 0.2674612393313469, "grad_norm": 0.6875, "learning_rate": 0.0002844417475728155, "loss": 0.9636, "step": 662 }, { "epoch": 0.26786525933033684, "grad_norm": 0.6796875, "learning_rate": 0.0002844174757281553, "loss": 0.9212, "step": 663 }, { "epoch": 0.2682692793293268, "grad_norm": 0.6640625, "learning_rate": 0.00028439320388349513, "loss": 0.9566, "step": 664 }, { "epoch": 0.26867329932831674, "grad_norm": 1.6015625, "learning_rate": 0.0002843689320388349, "loss": 1.0172, "step": 665 }, { "epoch": 0.2690773193273067, "grad_norm": 0.6796875, "learning_rate": 0.00028434466019417473, "loss": 0.9269, "step": 666 }, { "epoch": 0.26948133932629664, "grad_norm": 0.68359375, "learning_rate": 0.00028432038834951456, "loss": 0.8695, "step": 667 }, { "epoch": 0.2698853593252866, "grad_norm": 0.7421875, "learning_rate": 0.00028429611650485434, "loss": 0.9769, "step": 668 }, { "epoch": 0.27028937932427655, "grad_norm": 0.61328125, "learning_rate": 0.0002842718446601941, "loss": 0.9047, "step": 669 }, { "epoch": 0.2706933993232665, "grad_norm": 0.6328125, "learning_rate": 0.00028424757281553394, "loss": 0.9375, "step": 670 }, { "epoch": 0.27109741932225645, "grad_norm": 0.609375, "learning_rate": 0.00028422330097087377, "loss": 0.9032, "step": 671 }, { "epoch": 0.27150143932124643, "grad_norm": 0.671875, "learning_rate": 0.00028419902912621355, "loss": 0.9476, "step": 672 }, { "epoch": 0.27190545932023635, "grad_norm": 0.6328125, "learning_rate": 0.0002841747572815534, "loss": 0.9191, "step": 673 }, { "epoch": 0.2723094793192263, "grad_norm": 0.66796875, "learning_rate": 0.00028415048543689315, "loss": 1.0055, "step": 674 }, { "epoch": 0.27271349931821626, "grad_norm": 0.53515625, "learning_rate": 0.000284126213592233, "loss": 0.8339, "step": 675 }, { "epoch": 0.2731175193172062, "grad_norm": 0.640625, "learning_rate": 0.0002841019417475728, "loss": 0.9243, "step": 676 }, { "epoch": 0.27352153931619616, "grad_norm": 0.6015625, "learning_rate": 0.0002840776699029126, "loss": 0.842, "step": 677 }, { "epoch": 0.2739255593151861, "grad_norm": 0.59765625, "learning_rate": 0.0002840533980582524, "loss": 0.8922, "step": 678 }, { "epoch": 0.27432957931417606, "grad_norm": 0.6171875, "learning_rate": 0.0002840291262135922, "loss": 0.9799, "step": 679 }, { "epoch": 0.274733599313166, "grad_norm": 0.62109375, "learning_rate": 0.000284004854368932, "loss": 0.8862, "step": 680 }, { "epoch": 0.27513761931215597, "grad_norm": 0.52734375, "learning_rate": 0.00028398058252427185, "loss": 0.8683, "step": 681 }, { "epoch": 0.2755416393111459, "grad_norm": 0.6171875, "learning_rate": 0.0002839563106796116, "loss": 0.9163, "step": 682 }, { "epoch": 0.27594565931013587, "grad_norm": 0.7578125, "learning_rate": 0.00028393203883495145, "loss": 1.1206, "step": 683 }, { "epoch": 0.2763496793091258, "grad_norm": 0.59375, "learning_rate": 0.00028390776699029123, "loss": 0.9849, "step": 684 }, { "epoch": 0.2767536993081158, "grad_norm": 0.53515625, "learning_rate": 0.00028388349514563106, "loss": 0.8585, "step": 685 }, { "epoch": 0.2771577193071057, "grad_norm": 0.72265625, "learning_rate": 0.0002838592233009709, "loss": 0.9802, "step": 686 }, { "epoch": 0.2775617393060957, "grad_norm": 0.640625, "learning_rate": 0.00028383495145631066, "loss": 1.0202, "step": 687 }, { "epoch": 0.2779657593050856, "grad_norm": 0.67578125, "learning_rate": 0.00028381067961165044, "loss": 1.012, "step": 688 }, { "epoch": 0.2783697793040755, "grad_norm": 0.6640625, "learning_rate": 0.00028378640776699027, "loss": 0.9712, "step": 689 }, { "epoch": 0.2787737993030655, "grad_norm": 0.68359375, "learning_rate": 0.00028376213592233004, "loss": 0.9298, "step": 690 }, { "epoch": 0.27917781930205543, "grad_norm": 0.640625, "learning_rate": 0.0002837378640776699, "loss": 0.9768, "step": 691 }, { "epoch": 0.2795818393010454, "grad_norm": 0.65234375, "learning_rate": 0.0002837135922330097, "loss": 0.903, "step": 692 }, { "epoch": 0.27998585930003533, "grad_norm": 0.69140625, "learning_rate": 0.0002836893203883495, "loss": 0.9531, "step": 693 }, { "epoch": 0.2803898792990253, "grad_norm": 0.68359375, "learning_rate": 0.0002836650485436893, "loss": 0.8968, "step": 694 }, { "epoch": 0.28079389929801524, "grad_norm": 3.109375, "learning_rate": 0.0002836407766990291, "loss": 0.8792, "step": 695 }, { "epoch": 0.2811979192970052, "grad_norm": 0.84375, "learning_rate": 0.0002836165048543689, "loss": 1.1256, "step": 696 }, { "epoch": 0.28160193929599514, "grad_norm": 0.56640625, "learning_rate": 0.00028359223300970874, "loss": 0.7954, "step": 697 }, { "epoch": 0.2820059592949851, "grad_norm": 0.5859375, "learning_rate": 0.0002835679611650485, "loss": 0.9192, "step": 698 }, { "epoch": 0.28240997929397504, "grad_norm": 0.5859375, "learning_rate": 0.0002835436893203883, "loss": 0.9071, "step": 699 }, { "epoch": 0.282813999292965, "grad_norm": 0.65625, "learning_rate": 0.0002835194174757281, "loss": 0.9338, "step": 700 }, { "epoch": 0.28321801929195495, "grad_norm": 0.640625, "learning_rate": 0.00028349514563106795, "loss": 0.8559, "step": 701 }, { "epoch": 0.2836220392909449, "grad_norm": 0.60546875, "learning_rate": 0.0002834708737864077, "loss": 0.8797, "step": 702 }, { "epoch": 0.28402605928993485, "grad_norm": 0.5703125, "learning_rate": 0.00028344660194174756, "loss": 0.9025, "step": 703 }, { "epoch": 0.2844300792889248, "grad_norm": 0.6875, "learning_rate": 0.00028342233009708733, "loss": 0.9541, "step": 704 }, { "epoch": 0.28483409928791475, "grad_norm": 0.609375, "learning_rate": 0.00028339805825242716, "loss": 0.8582, "step": 705 }, { "epoch": 0.2852381192869047, "grad_norm": 0.71875, "learning_rate": 0.000283373786407767, "loss": 1.0486, "step": 706 }, { "epoch": 0.28564213928589466, "grad_norm": 0.6796875, "learning_rate": 0.00028334951456310676, "loss": 0.9017, "step": 707 }, { "epoch": 0.2860461592848846, "grad_norm": 0.6953125, "learning_rate": 0.0002833252427184466, "loss": 0.9046, "step": 708 }, { "epoch": 0.28645017928387456, "grad_norm": 0.703125, "learning_rate": 0.00028330097087378637, "loss": 0.96, "step": 709 }, { "epoch": 0.2868541992828645, "grad_norm": 0.5546875, "learning_rate": 0.0002832766990291262, "loss": 0.826, "step": 710 }, { "epoch": 0.28725821928185447, "grad_norm": 0.5703125, "learning_rate": 0.00028325242718446603, "loss": 0.8317, "step": 711 }, { "epoch": 0.2876622392808444, "grad_norm": 0.74609375, "learning_rate": 0.0002832281553398058, "loss": 1.112, "step": 712 }, { "epoch": 0.28806625927983437, "grad_norm": 0.5546875, "learning_rate": 0.00028320388349514563, "loss": 0.8887, "step": 713 }, { "epoch": 0.2884702792788243, "grad_norm": 0.6328125, "learning_rate": 0.0002831796116504854, "loss": 0.8881, "step": 714 }, { "epoch": 0.2888742992778143, "grad_norm": 0.6484375, "learning_rate": 0.0002831553398058252, "loss": 0.8682, "step": 715 }, { "epoch": 0.2892783192768042, "grad_norm": 0.60546875, "learning_rate": 0.000283131067961165, "loss": 0.9852, "step": 716 }, { "epoch": 0.2896823392757942, "grad_norm": 0.5859375, "learning_rate": 0.00028310679611650484, "loss": 0.8942, "step": 717 }, { "epoch": 0.2900863592747841, "grad_norm": 0.54296875, "learning_rate": 0.0002830825242718446, "loss": 0.8331, "step": 718 }, { "epoch": 0.290490379273774, "grad_norm": 0.67578125, "learning_rate": 0.00028305825242718445, "loss": 0.9571, "step": 719 }, { "epoch": 0.290894399272764, "grad_norm": 7.125, "learning_rate": 0.0002830339805825242, "loss": 1.1611, "step": 720 }, { "epoch": 0.29129841927175393, "grad_norm": 0.85546875, "learning_rate": 0.00028300970873786405, "loss": 1.0906, "step": 721 }, { "epoch": 0.2917024392707439, "grad_norm": 0.78125, "learning_rate": 0.0002829854368932039, "loss": 0.7732, "step": 722 }, { "epoch": 0.29210645926973383, "grad_norm": 1.8046875, "learning_rate": 0.00028296116504854366, "loss": 0.8838, "step": 723 }, { "epoch": 0.2925104792687238, "grad_norm": 0.75390625, "learning_rate": 0.0002829368932038835, "loss": 1.0307, "step": 724 }, { "epoch": 0.29291449926771373, "grad_norm": 0.734375, "learning_rate": 0.00028291262135922326, "loss": 0.9486, "step": 725 }, { "epoch": 0.2933185192667037, "grad_norm": 0.58203125, "learning_rate": 0.0002828883495145631, "loss": 0.8739, "step": 726 }, { "epoch": 0.29372253926569364, "grad_norm": 1.109375, "learning_rate": 0.0002828640776699029, "loss": 0.7921, "step": 727 }, { "epoch": 0.2941265592646836, "grad_norm": 1.6328125, "learning_rate": 0.0002828398058252427, "loss": 1.1104, "step": 728 }, { "epoch": 0.29453057926367354, "grad_norm": 0.66796875, "learning_rate": 0.00028281553398058247, "loss": 0.8588, "step": 729 }, { "epoch": 0.2949345992626635, "grad_norm": 2.171875, "learning_rate": 0.0002827912621359223, "loss": 1.0735, "step": 730 }, { "epoch": 0.29533861926165345, "grad_norm": 2.265625, "learning_rate": 0.00028276699029126213, "loss": 0.9293, "step": 731 }, { "epoch": 0.2957426392606434, "grad_norm": 0.68359375, "learning_rate": 0.0002827427184466019, "loss": 1.1045, "step": 732 }, { "epoch": 0.29614665925963335, "grad_norm": 0.62890625, "learning_rate": 0.00028271844660194173, "loss": 0.974, "step": 733 }, { "epoch": 0.29655067925862333, "grad_norm": 0.8359375, "learning_rate": 0.0002826941747572815, "loss": 0.8526, "step": 734 }, { "epoch": 0.29695469925761325, "grad_norm": 3.421875, "learning_rate": 0.00028266990291262134, "loss": 0.9843, "step": 735 }, { "epoch": 0.2973587192566032, "grad_norm": 0.63671875, "learning_rate": 0.00028264563106796117, "loss": 0.9592, "step": 736 }, { "epoch": 0.29776273925559316, "grad_norm": 0.70703125, "learning_rate": 0.00028262135922330094, "loss": 0.8841, "step": 737 }, { "epoch": 0.2981667592545831, "grad_norm": 0.63671875, "learning_rate": 0.00028259708737864077, "loss": 1.0498, "step": 738 }, { "epoch": 0.29857077925357306, "grad_norm": 0.66796875, "learning_rate": 0.00028257281553398055, "loss": 0.9624, "step": 739 }, { "epoch": 0.298974799252563, "grad_norm": 0.58984375, "learning_rate": 0.0002825485436893204, "loss": 0.9093, "step": 740 }, { "epoch": 0.29937881925155296, "grad_norm": 0.66796875, "learning_rate": 0.00028252427184466015, "loss": 0.8723, "step": 741 }, { "epoch": 0.2997828392505429, "grad_norm": 0.5703125, "learning_rate": 0.0002825, "loss": 0.7886, "step": 742 }, { "epoch": 0.30018685924953287, "grad_norm": 0.65625, "learning_rate": 0.00028247572815533976, "loss": 0.9585, "step": 743 }, { "epoch": 0.3005908792485228, "grad_norm": 0.6953125, "learning_rate": 0.0002824514563106796, "loss": 0.9489, "step": 744 }, { "epoch": 0.30099489924751277, "grad_norm": 0.6171875, "learning_rate": 0.00028242718446601936, "loss": 0.9185, "step": 745 }, { "epoch": 0.3013989192465027, "grad_norm": 0.73828125, "learning_rate": 0.0002824029126213592, "loss": 0.9173, "step": 746 }, { "epoch": 0.3018029392454927, "grad_norm": 0.70703125, "learning_rate": 0.000282378640776699, "loss": 0.9368, "step": 747 }, { "epoch": 0.3022069592444826, "grad_norm": 0.71875, "learning_rate": 0.0002823543689320388, "loss": 1.0581, "step": 748 }, { "epoch": 0.3026109792434726, "grad_norm": 0.89453125, "learning_rate": 0.0002823300970873786, "loss": 1.091, "step": 749 }, { "epoch": 0.3030149992424625, "grad_norm": 0.71484375, "learning_rate": 0.0002823058252427184, "loss": 0.8759, "step": 750 }, { "epoch": 0.3034190192414524, "grad_norm": 0.58203125, "learning_rate": 0.00028228155339805823, "loss": 0.8685, "step": 751 }, { "epoch": 0.3038230392404424, "grad_norm": 1.0859375, "learning_rate": 0.00028225728155339806, "loss": 0.9998, "step": 752 }, { "epoch": 0.30422705923943233, "grad_norm": 0.609375, "learning_rate": 0.00028223300970873783, "loss": 0.9506, "step": 753 }, { "epoch": 0.3046310792384223, "grad_norm": 0.7265625, "learning_rate": 0.00028220873786407766, "loss": 0.9506, "step": 754 }, { "epoch": 0.30503509923741223, "grad_norm": 0.6640625, "learning_rate": 0.00028218446601941744, "loss": 0.8966, "step": 755 }, { "epoch": 0.3054391192364022, "grad_norm": 0.58984375, "learning_rate": 0.00028216019417475727, "loss": 0.8222, "step": 756 }, { "epoch": 0.30584313923539214, "grad_norm": 0.74609375, "learning_rate": 0.0002821359223300971, "loss": 1.0288, "step": 757 }, { "epoch": 0.3062471592343821, "grad_norm": 0.6171875, "learning_rate": 0.0002821116504854369, "loss": 0.7993, "step": 758 }, { "epoch": 0.30665117923337204, "grad_norm": 0.74609375, "learning_rate": 0.00028208737864077665, "loss": 1.0813, "step": 759 }, { "epoch": 0.307055199232362, "grad_norm": 0.66015625, "learning_rate": 0.0002820631067961165, "loss": 0.9298, "step": 760 }, { "epoch": 0.30745921923135194, "grad_norm": 0.58203125, "learning_rate": 0.0002820388349514563, "loss": 0.796, "step": 761 }, { "epoch": 0.3078632392303419, "grad_norm": 0.74609375, "learning_rate": 0.0002820145631067961, "loss": 1.0613, "step": 762 }, { "epoch": 0.30826725922933185, "grad_norm": 0.69921875, "learning_rate": 0.0002819902912621359, "loss": 0.9907, "step": 763 }, { "epoch": 0.3086712792283218, "grad_norm": 0.59765625, "learning_rate": 0.0002819660194174757, "loss": 0.8779, "step": 764 }, { "epoch": 0.30907529922731175, "grad_norm": 0.85546875, "learning_rate": 0.0002819417475728155, "loss": 0.8005, "step": 765 }, { "epoch": 0.3094793192263017, "grad_norm": 0.6796875, "learning_rate": 0.0002819174757281553, "loss": 1.0027, "step": 766 }, { "epoch": 0.30988333922529165, "grad_norm": 3.390625, "learning_rate": 0.0002818932038834951, "loss": 1.2817, "step": 767 }, { "epoch": 0.3102873592242816, "grad_norm": 0.78125, "learning_rate": 0.00028186893203883495, "loss": 1.1035, "step": 768 }, { "epoch": 0.31069137922327156, "grad_norm": 1.0234375, "learning_rate": 0.0002818446601941747, "loss": 0.9828, "step": 769 }, { "epoch": 0.3110953992222615, "grad_norm": 0.6171875, "learning_rate": 0.0002818203883495145, "loss": 0.9242, "step": 770 }, { "epoch": 0.31149941922125146, "grad_norm": 0.57421875, "learning_rate": 0.00028179611650485433, "loss": 0.7967, "step": 771 }, { "epoch": 0.3119034392202414, "grad_norm": 0.7109375, "learning_rate": 0.00028177184466019416, "loss": 0.9886, "step": 772 }, { "epoch": 0.31230745921923136, "grad_norm": 0.9609375, "learning_rate": 0.00028174757281553394, "loss": 0.914, "step": 773 }, { "epoch": 0.3127114792182213, "grad_norm": 0.64453125, "learning_rate": 0.00028172330097087377, "loss": 0.9278, "step": 774 }, { "epoch": 0.31311549921721127, "grad_norm": 0.82421875, "learning_rate": 0.00028169902912621354, "loss": 0.867, "step": 775 }, { "epoch": 0.3135195192162012, "grad_norm": 0.63671875, "learning_rate": 0.00028167475728155337, "loss": 0.8429, "step": 776 }, { "epoch": 0.31392353921519117, "grad_norm": 0.65234375, "learning_rate": 0.0002816504854368932, "loss": 1.0028, "step": 777 }, { "epoch": 0.3143275592141811, "grad_norm": 0.6796875, "learning_rate": 0.000281626213592233, "loss": 0.9794, "step": 778 }, { "epoch": 0.3147315792131711, "grad_norm": 0.66015625, "learning_rate": 0.0002816019417475728, "loss": 0.8115, "step": 779 }, { "epoch": 0.315135599212161, "grad_norm": 0.72265625, "learning_rate": 0.0002815776699029126, "loss": 0.783, "step": 780 }, { "epoch": 0.315539619211151, "grad_norm": 0.52734375, "learning_rate": 0.0002815533980582524, "loss": 0.7876, "step": 781 }, { "epoch": 0.3159436392101409, "grad_norm": 0.6015625, "learning_rate": 0.00028152912621359224, "loss": 0.792, "step": 782 }, { "epoch": 0.3163476592091308, "grad_norm": 0.6953125, "learning_rate": 0.000281504854368932, "loss": 0.8519, "step": 783 }, { "epoch": 0.3167516792081208, "grad_norm": 0.640625, "learning_rate": 0.00028148058252427184, "loss": 0.9951, "step": 784 }, { "epoch": 0.31715569920711073, "grad_norm": 0.62890625, "learning_rate": 0.0002814563106796116, "loss": 0.91, "step": 785 }, { "epoch": 0.3175597192061007, "grad_norm": 0.578125, "learning_rate": 0.00028143203883495145, "loss": 0.9021, "step": 786 }, { "epoch": 0.31796373920509063, "grad_norm": 0.5546875, "learning_rate": 0.0002814077669902913, "loss": 0.7973, "step": 787 }, { "epoch": 0.3183677592040806, "grad_norm": 0.6796875, "learning_rate": 0.00028138349514563105, "loss": 0.9338, "step": 788 }, { "epoch": 0.31877177920307054, "grad_norm": 0.78125, "learning_rate": 0.00028135922330097083, "loss": 0.9241, "step": 789 }, { "epoch": 0.3191757992020605, "grad_norm": 0.6171875, "learning_rate": 0.00028133495145631066, "loss": 0.8217, "step": 790 }, { "epoch": 0.31957981920105044, "grad_norm": 0.64453125, "learning_rate": 0.00028131067961165043, "loss": 0.8936, "step": 791 }, { "epoch": 0.3199838392000404, "grad_norm": 0.8125, "learning_rate": 0.00028128640776699026, "loss": 0.9677, "step": 792 }, { "epoch": 0.32038785919903034, "grad_norm": 0.6171875, "learning_rate": 0.0002812621359223301, "loss": 0.9456, "step": 793 }, { "epoch": 0.3207918791980203, "grad_norm": 0.609375, "learning_rate": 0.00028123786407766987, "loss": 0.9369, "step": 794 }, { "epoch": 0.32119589919701025, "grad_norm": 0.5390625, "learning_rate": 0.0002812135922330097, "loss": 0.8844, "step": 795 }, { "epoch": 0.32159991919600023, "grad_norm": 0.55859375, "learning_rate": 0.00028118932038834947, "loss": 0.8333, "step": 796 }, { "epoch": 0.32200393919499015, "grad_norm": 0.58984375, "learning_rate": 0.0002811650485436893, "loss": 0.908, "step": 797 }, { "epoch": 0.3224079591939801, "grad_norm": 0.56640625, "learning_rate": 0.00028114077669902913, "loss": 0.8693, "step": 798 }, { "epoch": 0.32281197919297006, "grad_norm": 0.71484375, "learning_rate": 0.0002811165048543689, "loss": 0.9163, "step": 799 }, { "epoch": 0.32321599919196, "grad_norm": 0.66015625, "learning_rate": 0.0002810922330097087, "loss": 0.9512, "step": 800 }, { "epoch": 0.32362001919094996, "grad_norm": 0.5859375, "learning_rate": 0.0002810679611650485, "loss": 0.8617, "step": 801 }, { "epoch": 0.3240240391899399, "grad_norm": 0.57421875, "learning_rate": 0.00028104368932038834, "loss": 0.8462, "step": 802 }, { "epoch": 0.32442805918892986, "grad_norm": 0.61328125, "learning_rate": 0.0002810194174757281, "loss": 0.9403, "step": 803 }, { "epoch": 0.3248320791879198, "grad_norm": 0.60546875, "learning_rate": 0.00028099514563106794, "loss": 0.9139, "step": 804 }, { "epoch": 0.32523609918690977, "grad_norm": 0.5703125, "learning_rate": 0.0002809708737864077, "loss": 0.8284, "step": 805 }, { "epoch": 0.3256401191858997, "grad_norm": 0.78125, "learning_rate": 0.00028094660194174755, "loss": 1.1109, "step": 806 }, { "epoch": 0.32604413918488967, "grad_norm": 0.83984375, "learning_rate": 0.0002809223300970874, "loss": 0.9832, "step": 807 }, { "epoch": 0.3264481591838796, "grad_norm": 0.61328125, "learning_rate": 0.00028089805825242715, "loss": 0.8983, "step": 808 }, { "epoch": 0.3268521791828696, "grad_norm": 0.68359375, "learning_rate": 0.000280873786407767, "loss": 1.0228, "step": 809 }, { "epoch": 0.3272561991818595, "grad_norm": 0.56640625, "learning_rate": 0.00028084951456310676, "loss": 0.7923, "step": 810 }, { "epoch": 0.3276602191808495, "grad_norm": 0.6015625, "learning_rate": 0.0002808252427184466, "loss": 0.9158, "step": 811 }, { "epoch": 0.3280642391798394, "grad_norm": 0.6328125, "learning_rate": 0.0002808009708737864, "loss": 0.8969, "step": 812 }, { "epoch": 0.3284682591788293, "grad_norm": 0.74609375, "learning_rate": 0.0002807766990291262, "loss": 1.0301, "step": 813 }, { "epoch": 0.3288722791778193, "grad_norm": 0.8359375, "learning_rate": 0.000280752427184466, "loss": 0.9933, "step": 814 }, { "epoch": 0.32927629917680923, "grad_norm": 0.5625, "learning_rate": 0.0002807281553398058, "loss": 0.886, "step": 815 }, { "epoch": 0.3296803191757992, "grad_norm": 0.546875, "learning_rate": 0.00028070388349514557, "loss": 0.8494, "step": 816 }, { "epoch": 0.33008433917478913, "grad_norm": 0.6171875, "learning_rate": 0.0002806796116504854, "loss": 0.9817, "step": 817 }, { "epoch": 0.3304883591737791, "grad_norm": 0.6015625, "learning_rate": 0.00028065533980582523, "loss": 0.9445, "step": 818 }, { "epoch": 0.33089237917276904, "grad_norm": 0.95703125, "learning_rate": 0.000280631067961165, "loss": 0.9852, "step": 819 }, { "epoch": 0.331296399171759, "grad_norm": 0.61328125, "learning_rate": 0.00028060679611650484, "loss": 0.9259, "step": 820 }, { "epoch": 0.33170041917074894, "grad_norm": 0.6171875, "learning_rate": 0.0002805825242718446, "loss": 0.8589, "step": 821 }, { "epoch": 0.3321044391697389, "grad_norm": 0.6875, "learning_rate": 0.00028055825242718444, "loss": 0.9431, "step": 822 }, { "epoch": 0.33250845916872884, "grad_norm": 0.78125, "learning_rate": 0.00028053398058252427, "loss": 0.9913, "step": 823 }, { "epoch": 0.3329124791677188, "grad_norm": 0.796875, "learning_rate": 0.00028050970873786405, "loss": 0.9774, "step": 824 }, { "epoch": 0.33331649916670875, "grad_norm": 0.7421875, "learning_rate": 0.0002804854368932039, "loss": 0.9538, "step": 825 }, { "epoch": 0.3337205191656987, "grad_norm": 0.69140625, "learning_rate": 0.00028046116504854365, "loss": 0.9812, "step": 826 }, { "epoch": 0.33412453916468865, "grad_norm": 0.6328125, "learning_rate": 0.0002804368932038835, "loss": 0.9436, "step": 827 }, { "epoch": 0.33452855916367863, "grad_norm": 0.59375, "learning_rate": 0.0002804126213592233, "loss": 0.9036, "step": 828 }, { "epoch": 0.33493257916266855, "grad_norm": 0.6171875, "learning_rate": 0.0002803883495145631, "loss": 0.9794, "step": 829 }, { "epoch": 0.3353365991616585, "grad_norm": 0.7421875, "learning_rate": 0.00028036407766990286, "loss": 0.8456, "step": 830 }, { "epoch": 0.33574061916064846, "grad_norm": 0.54296875, "learning_rate": 0.0002803398058252427, "loss": 0.8338, "step": 831 }, { "epoch": 0.3361446391596384, "grad_norm": 0.76171875, "learning_rate": 0.0002803155339805825, "loss": 0.9217, "step": 832 }, { "epoch": 0.33654865915862836, "grad_norm": 0.76953125, "learning_rate": 0.0002802912621359223, "loss": 1.0137, "step": 833 }, { "epoch": 0.3369526791576183, "grad_norm": 0.6015625, "learning_rate": 0.0002802669902912621, "loss": 0.8218, "step": 834 }, { "epoch": 0.33735669915660826, "grad_norm": 0.77734375, "learning_rate": 0.0002802427184466019, "loss": 0.8556, "step": 835 }, { "epoch": 0.3377607191555982, "grad_norm": 0.67578125, "learning_rate": 0.00028021844660194173, "loss": 0.9511, "step": 836 }, { "epoch": 0.33816473915458817, "grad_norm": 0.6484375, "learning_rate": 0.00028019417475728156, "loss": 0.9537, "step": 837 }, { "epoch": 0.3385687591535781, "grad_norm": 0.6953125, "learning_rate": 0.00028016990291262133, "loss": 1.0392, "step": 838 }, { "epoch": 0.33897277915256807, "grad_norm": 0.6796875, "learning_rate": 0.00028014563106796116, "loss": 0.9596, "step": 839 }, { "epoch": 0.339376799151558, "grad_norm": 0.63671875, "learning_rate": 0.00028012135922330094, "loss": 1.0211, "step": 840 }, { "epoch": 0.339780819150548, "grad_norm": 0.51953125, "learning_rate": 0.0002800970873786407, "loss": 0.803, "step": 841 }, { "epoch": 0.3401848391495379, "grad_norm": 0.640625, "learning_rate": 0.00028007281553398054, "loss": 0.9732, "step": 842 }, { "epoch": 0.3405888591485279, "grad_norm": 0.65625, "learning_rate": 0.00028004854368932037, "loss": 1.0467, "step": 843 }, { "epoch": 0.3409928791475178, "grad_norm": 0.6640625, "learning_rate": 0.0002800242718446602, "loss": 0.9668, "step": 844 }, { "epoch": 0.3413968991465077, "grad_norm": 0.609375, "learning_rate": 0.00028, "loss": 0.8563, "step": 845 }, { "epoch": 0.3418009191454977, "grad_norm": 0.58203125, "learning_rate": 0.00027997572815533975, "loss": 0.9716, "step": 846 }, { "epoch": 0.34220493914448763, "grad_norm": 0.71875, "learning_rate": 0.0002799514563106796, "loss": 0.9822, "step": 847 }, { "epoch": 0.3426089591434776, "grad_norm": 0.61328125, "learning_rate": 0.0002799271844660194, "loss": 0.8337, "step": 848 }, { "epoch": 0.34301297914246753, "grad_norm": 0.55859375, "learning_rate": 0.0002799029126213592, "loss": 0.8042, "step": 849 }, { "epoch": 0.3434169991414575, "grad_norm": 0.93359375, "learning_rate": 0.000279878640776699, "loss": 0.9065, "step": 850 }, { "epoch": 0.34382101914044744, "grad_norm": 0.66015625, "learning_rate": 0.0002798543689320388, "loss": 0.9616, "step": 851 }, { "epoch": 0.3442250391394374, "grad_norm": 0.6171875, "learning_rate": 0.0002798300970873786, "loss": 0.8142, "step": 852 }, { "epoch": 0.34462905913842734, "grad_norm": 0.640625, "learning_rate": 0.00027980582524271845, "loss": 0.8965, "step": 853 }, { "epoch": 0.3450330791374173, "grad_norm": 0.609375, "learning_rate": 0.0002797815533980582, "loss": 0.8453, "step": 854 }, { "epoch": 0.34543709913640724, "grad_norm": 0.62890625, "learning_rate": 0.00027975728155339805, "loss": 0.8563, "step": 855 }, { "epoch": 0.3458411191353972, "grad_norm": 0.6875, "learning_rate": 0.00027973300970873783, "loss": 0.9214, "step": 856 }, { "epoch": 0.34624513913438715, "grad_norm": 0.5859375, "learning_rate": 0.00027970873786407766, "loss": 0.8165, "step": 857 }, { "epoch": 0.3466491591333771, "grad_norm": 0.71875, "learning_rate": 0.0002796844660194175, "loss": 1.0236, "step": 858 }, { "epoch": 0.34705317913236705, "grad_norm": 0.52734375, "learning_rate": 0.00027966019417475726, "loss": 0.8289, "step": 859 }, { "epoch": 0.347457199131357, "grad_norm": 0.59765625, "learning_rate": 0.00027963592233009704, "loss": 0.8696, "step": 860 }, { "epoch": 0.34786121913034695, "grad_norm": 0.62890625, "learning_rate": 0.00027961165048543687, "loss": 0.9748, "step": 861 }, { "epoch": 0.3482652391293369, "grad_norm": 0.640625, "learning_rate": 0.0002795873786407767, "loss": 0.9792, "step": 862 }, { "epoch": 0.34866925912832686, "grad_norm": 0.6328125, "learning_rate": 0.00027956310679611647, "loss": 0.9177, "step": 863 }, { "epoch": 0.3490732791273168, "grad_norm": 0.6484375, "learning_rate": 0.0002795388349514563, "loss": 1.0146, "step": 864 }, { "epoch": 0.34947729912630676, "grad_norm": 0.56640625, "learning_rate": 0.0002795145631067961, "loss": 0.9266, "step": 865 }, { "epoch": 0.3498813191252967, "grad_norm": 0.56640625, "learning_rate": 0.0002794902912621359, "loss": 0.8002, "step": 866 }, { "epoch": 0.35028533912428667, "grad_norm": 0.6328125, "learning_rate": 0.0002794660194174757, "loss": 0.9918, "step": 867 }, { "epoch": 0.3506893591232766, "grad_norm": 0.70703125, "learning_rate": 0.0002794417475728155, "loss": 1.0639, "step": 868 }, { "epoch": 0.35109337912226657, "grad_norm": 0.671875, "learning_rate": 0.00027941747572815534, "loss": 0.9588, "step": 869 }, { "epoch": 0.3514973991212565, "grad_norm": 0.57421875, "learning_rate": 0.0002793932038834951, "loss": 0.7633, "step": 870 }, { "epoch": 0.3519014191202465, "grad_norm": 0.63671875, "learning_rate": 0.0002793689320388349, "loss": 0.896, "step": 871 }, { "epoch": 0.3523054391192364, "grad_norm": 0.59765625, "learning_rate": 0.0002793446601941747, "loss": 0.8646, "step": 872 }, { "epoch": 0.3527094591182264, "grad_norm": 0.51953125, "learning_rate": 0.00027932038834951455, "loss": 0.8272, "step": 873 }, { "epoch": 0.3531134791172163, "grad_norm": 0.6171875, "learning_rate": 0.0002792961165048543, "loss": 0.9748, "step": 874 }, { "epoch": 0.3535174991162063, "grad_norm": 0.55859375, "learning_rate": 0.00027927184466019415, "loss": 0.8959, "step": 875 }, { "epoch": 0.3539215191151962, "grad_norm": 0.65234375, "learning_rate": 0.00027924757281553393, "loss": 0.8645, "step": 876 }, { "epoch": 0.3543255391141861, "grad_norm": 0.59765625, "learning_rate": 0.00027922330097087376, "loss": 0.8721, "step": 877 }, { "epoch": 0.3547295591131761, "grad_norm": 0.5859375, "learning_rate": 0.0002791990291262136, "loss": 0.891, "step": 878 }, { "epoch": 0.35513357911216603, "grad_norm": 0.75, "learning_rate": 0.00027917475728155336, "loss": 0.9983, "step": 879 }, { "epoch": 0.355537599111156, "grad_norm": 0.578125, "learning_rate": 0.0002791504854368932, "loss": 0.9142, "step": 880 }, { "epoch": 0.35594161911014593, "grad_norm": 0.546875, "learning_rate": 0.00027912621359223297, "loss": 0.8134, "step": 881 }, { "epoch": 0.3563456391091359, "grad_norm": 0.6015625, "learning_rate": 0.0002791019417475728, "loss": 0.826, "step": 882 }, { "epoch": 0.35674965910812584, "grad_norm": 0.59765625, "learning_rate": 0.0002790776699029126, "loss": 0.8607, "step": 883 }, { "epoch": 0.3571536791071158, "grad_norm": 0.58203125, "learning_rate": 0.0002790533980582524, "loss": 0.8213, "step": 884 }, { "epoch": 0.35755769910610574, "grad_norm": 0.5625, "learning_rate": 0.00027902912621359223, "loss": 0.8384, "step": 885 }, { "epoch": 0.3579617191050957, "grad_norm": 0.609375, "learning_rate": 0.000279004854368932, "loss": 1.02, "step": 886 }, { "epoch": 0.35836573910408565, "grad_norm": 0.53125, "learning_rate": 0.00027898058252427184, "loss": 0.8105, "step": 887 }, { "epoch": 0.3587697591030756, "grad_norm": 0.57421875, "learning_rate": 0.0002789563106796116, "loss": 0.8466, "step": 888 }, { "epoch": 0.35917377910206555, "grad_norm": 0.59765625, "learning_rate": 0.00027893203883495144, "loss": 0.8651, "step": 889 }, { "epoch": 0.35957779910105553, "grad_norm": 0.55859375, "learning_rate": 0.0002789077669902912, "loss": 0.7706, "step": 890 }, { "epoch": 0.35998181910004545, "grad_norm": 0.6015625, "learning_rate": 0.00027888349514563105, "loss": 0.8377, "step": 891 }, { "epoch": 0.3603858390990354, "grad_norm": 0.55859375, "learning_rate": 0.0002788592233009708, "loss": 0.8844, "step": 892 }, { "epoch": 0.36078985909802536, "grad_norm": 0.62890625, "learning_rate": 0.00027883495145631065, "loss": 0.9498, "step": 893 }, { "epoch": 0.3611938790970153, "grad_norm": 0.578125, "learning_rate": 0.0002788106796116505, "loss": 0.9199, "step": 894 }, { "epoch": 0.36159789909600526, "grad_norm": 0.5859375, "learning_rate": 0.00027878640776699026, "loss": 0.9291, "step": 895 }, { "epoch": 0.3620019190949952, "grad_norm": 0.61328125, "learning_rate": 0.0002787621359223301, "loss": 0.8296, "step": 896 }, { "epoch": 0.36240593909398516, "grad_norm": 0.62109375, "learning_rate": 0.00027873786407766986, "loss": 0.8558, "step": 897 }, { "epoch": 0.3628099590929751, "grad_norm": 0.63671875, "learning_rate": 0.0002787135922330097, "loss": 0.9345, "step": 898 }, { "epoch": 0.36321397909196507, "grad_norm": 0.60546875, "learning_rate": 0.0002786893203883495, "loss": 0.9616, "step": 899 }, { "epoch": 0.363617999090955, "grad_norm": 0.51953125, "learning_rate": 0.0002786650485436893, "loss": 0.7173, "step": 900 }, { "epoch": 0.36402201908994497, "grad_norm": 0.5859375, "learning_rate": 0.00027864077669902907, "loss": 0.9432, "step": 901 }, { "epoch": 0.3644260390889349, "grad_norm": 0.55078125, "learning_rate": 0.0002786165048543689, "loss": 0.8143, "step": 902 }, { "epoch": 0.3648300590879249, "grad_norm": 0.55078125, "learning_rate": 0.00027859223300970873, "loss": 0.8476, "step": 903 }, { "epoch": 0.3652340790869148, "grad_norm": 0.65625, "learning_rate": 0.0002785679611650485, "loss": 0.9911, "step": 904 }, { "epoch": 0.3656380990859048, "grad_norm": 0.50390625, "learning_rate": 0.00027854368932038833, "loss": 0.7352, "step": 905 }, { "epoch": 0.3660421190848947, "grad_norm": 0.609375, "learning_rate": 0.0002785194174757281, "loss": 0.8646, "step": 906 }, { "epoch": 0.3664461390838846, "grad_norm": 0.578125, "learning_rate": 0.00027849514563106794, "loss": 0.891, "step": 907 }, { "epoch": 0.3668501590828746, "grad_norm": 0.578125, "learning_rate": 0.00027847087378640777, "loss": 0.8848, "step": 908 }, { "epoch": 0.36725417908186453, "grad_norm": 0.640625, "learning_rate": 0.00027844660194174754, "loss": 1.0004, "step": 909 }, { "epoch": 0.3676581990808545, "grad_norm": 0.64453125, "learning_rate": 0.00027842233009708737, "loss": 1.0562, "step": 910 }, { "epoch": 0.36806221907984443, "grad_norm": 0.671875, "learning_rate": 0.00027839805825242715, "loss": 0.9096, "step": 911 }, { "epoch": 0.3684662390788344, "grad_norm": 0.55078125, "learning_rate": 0.000278373786407767, "loss": 0.8454, "step": 912 }, { "epoch": 0.36887025907782434, "grad_norm": 0.5546875, "learning_rate": 0.0002783495145631068, "loss": 0.8723, "step": 913 }, { "epoch": 0.3692742790768143, "grad_norm": 0.671875, "learning_rate": 0.0002783252427184466, "loss": 0.9513, "step": 914 }, { "epoch": 0.36967829907580424, "grad_norm": 0.6953125, "learning_rate": 0.0002783009708737864, "loss": 0.9708, "step": 915 }, { "epoch": 0.3700823190747942, "grad_norm": 0.7109375, "learning_rate": 0.0002782766990291262, "loss": 0.9693, "step": 916 }, { "epoch": 0.37048633907378414, "grad_norm": 0.60546875, "learning_rate": 0.00027825242718446596, "loss": 0.8826, "step": 917 }, { "epoch": 0.3708903590727741, "grad_norm": 0.58984375, "learning_rate": 0.0002782281553398058, "loss": 0.9152, "step": 918 }, { "epoch": 0.37129437907176405, "grad_norm": 0.578125, "learning_rate": 0.0002782038834951456, "loss": 0.9224, "step": 919 }, { "epoch": 0.371698399070754, "grad_norm": 0.6015625, "learning_rate": 0.0002781796116504854, "loss": 0.8963, "step": 920 }, { "epoch": 0.37210241906974395, "grad_norm": 0.6015625, "learning_rate": 0.0002781553398058252, "loss": 0.8334, "step": 921 }, { "epoch": 0.37250643906873393, "grad_norm": 0.53515625, "learning_rate": 0.000278131067961165, "loss": 0.7856, "step": 922 }, { "epoch": 0.37291045906772385, "grad_norm": 0.62890625, "learning_rate": 0.00027810679611650483, "loss": 0.9035, "step": 923 }, { "epoch": 0.3733144790667138, "grad_norm": 0.59765625, "learning_rate": 0.00027808252427184466, "loss": 0.9386, "step": 924 }, { "epoch": 0.37371849906570376, "grad_norm": 0.609375, "learning_rate": 0.00027805825242718443, "loss": 0.8479, "step": 925 }, { "epoch": 0.3741225190646937, "grad_norm": 0.6015625, "learning_rate": 0.00027803398058252426, "loss": 0.8504, "step": 926 }, { "epoch": 0.37452653906368366, "grad_norm": 0.64453125, "learning_rate": 0.00027800970873786404, "loss": 0.7408, "step": 927 }, { "epoch": 0.3749305590626736, "grad_norm": 0.56640625, "learning_rate": 0.00027798543689320387, "loss": 0.871, "step": 928 }, { "epoch": 0.37533457906166356, "grad_norm": 0.60546875, "learning_rate": 0.0002779611650485437, "loss": 0.9568, "step": 929 }, { "epoch": 0.3757385990606535, "grad_norm": 0.54296875, "learning_rate": 0.00027793689320388347, "loss": 0.8517, "step": 930 }, { "epoch": 0.37614261905964347, "grad_norm": 0.59375, "learning_rate": 0.00027791262135922325, "loss": 0.8995, "step": 931 }, { "epoch": 0.3765466390586334, "grad_norm": 0.59765625, "learning_rate": 0.0002778883495145631, "loss": 0.8633, "step": 932 }, { "epoch": 0.37695065905762337, "grad_norm": 0.5625, "learning_rate": 0.0002778640776699029, "loss": 0.7833, "step": 933 }, { "epoch": 0.3773546790566133, "grad_norm": 0.53515625, "learning_rate": 0.0002778398058252427, "loss": 0.86, "step": 934 }, { "epoch": 0.3777586990556033, "grad_norm": 0.63671875, "learning_rate": 0.0002778155339805825, "loss": 0.9247, "step": 935 }, { "epoch": 0.3781627190545932, "grad_norm": 0.6171875, "learning_rate": 0.0002777912621359223, "loss": 0.8955, "step": 936 }, { "epoch": 0.3785667390535832, "grad_norm": 0.62109375, "learning_rate": 0.0002777669902912621, "loss": 0.9596, "step": 937 }, { "epoch": 0.3789707590525731, "grad_norm": 0.671875, "learning_rate": 0.00027774271844660195, "loss": 0.9683, "step": 938 }, { "epoch": 0.379374779051563, "grad_norm": 0.62109375, "learning_rate": 0.0002777184466019417, "loss": 0.8889, "step": 939 }, { "epoch": 0.379778799050553, "grad_norm": 0.51171875, "learning_rate": 0.00027769417475728155, "loss": 0.8788, "step": 940 }, { "epoch": 0.38018281904954293, "grad_norm": 0.5859375, "learning_rate": 0.0002776699029126213, "loss": 0.8881, "step": 941 }, { "epoch": 0.3805868390485329, "grad_norm": 0.66015625, "learning_rate": 0.0002776456310679611, "loss": 0.9409, "step": 942 }, { "epoch": 0.38099085904752283, "grad_norm": 0.515625, "learning_rate": 0.00027762135922330093, "loss": 0.8313, "step": 943 }, { "epoch": 0.3813948790465128, "grad_norm": 0.53515625, "learning_rate": 0.00027759708737864076, "loss": 0.8661, "step": 944 }, { "epoch": 0.38179889904550274, "grad_norm": 0.62890625, "learning_rate": 0.0002775728155339806, "loss": 0.9431, "step": 945 }, { "epoch": 0.3822029190444927, "grad_norm": 0.56640625, "learning_rate": 0.00027754854368932036, "loss": 0.8562, "step": 946 }, { "epoch": 0.38260693904348264, "grad_norm": 0.609375, "learning_rate": 0.00027752427184466014, "loss": 0.8767, "step": 947 }, { "epoch": 0.3830109590424726, "grad_norm": 0.70703125, "learning_rate": 0.00027749999999999997, "loss": 0.9938, "step": 948 }, { "epoch": 0.38341497904146254, "grad_norm": 0.578125, "learning_rate": 0.0002774757281553398, "loss": 0.895, "step": 949 }, { "epoch": 0.3838189990404525, "grad_norm": 0.5859375, "learning_rate": 0.0002774514563106796, "loss": 0.8302, "step": 950 }, { "epoch": 0.38422301903944245, "grad_norm": 0.58203125, "learning_rate": 0.0002774271844660194, "loss": 0.9076, "step": 951 }, { "epoch": 0.38462703903843243, "grad_norm": 0.609375, "learning_rate": 0.0002774029126213592, "loss": 0.8623, "step": 952 }, { "epoch": 0.38503105903742235, "grad_norm": 0.5234375, "learning_rate": 0.000277378640776699, "loss": 0.874, "step": 953 }, { "epoch": 0.3854350790364123, "grad_norm": 0.56640625, "learning_rate": 0.00027735436893203884, "loss": 0.9533, "step": 954 }, { "epoch": 0.38583909903540226, "grad_norm": 0.59375, "learning_rate": 0.0002773300970873786, "loss": 1.0447, "step": 955 }, { "epoch": 0.3862431190343922, "grad_norm": 0.58203125, "learning_rate": 0.00027730582524271844, "loss": 0.7848, "step": 956 }, { "epoch": 0.38664713903338216, "grad_norm": 0.58984375, "learning_rate": 0.0002772815533980582, "loss": 0.8539, "step": 957 }, { "epoch": 0.3870511590323721, "grad_norm": 0.71875, "learning_rate": 0.00027725728155339805, "loss": 0.9533, "step": 958 }, { "epoch": 0.38745517903136206, "grad_norm": 0.65625, "learning_rate": 0.0002772330097087379, "loss": 0.8743, "step": 959 }, { "epoch": 0.387859199030352, "grad_norm": 0.59375, "learning_rate": 0.00027720873786407765, "loss": 0.8219, "step": 960 }, { "epoch": 0.38826321902934197, "grad_norm": 0.66796875, "learning_rate": 0.0002771844660194174, "loss": 0.9349, "step": 961 }, { "epoch": 0.3886672390283319, "grad_norm": 0.55078125, "learning_rate": 0.00027716019417475726, "loss": 0.888, "step": 962 }, { "epoch": 0.38907125902732187, "grad_norm": 2.84375, "learning_rate": 0.0002771359223300971, "loss": 0.9392, "step": 963 }, { "epoch": 0.3894752790263118, "grad_norm": 0.62109375, "learning_rate": 0.00027711165048543686, "loss": 0.9082, "step": 964 }, { "epoch": 0.3898792990253018, "grad_norm": 0.6015625, "learning_rate": 0.0002770873786407767, "loss": 0.884, "step": 965 }, { "epoch": 0.3902833190242917, "grad_norm": 0.5859375, "learning_rate": 0.00027706310679611647, "loss": 0.9095, "step": 966 }, { "epoch": 0.3906873390232817, "grad_norm": 0.6015625, "learning_rate": 0.0002770388349514563, "loss": 0.9312, "step": 967 }, { "epoch": 0.3910913590222716, "grad_norm": 0.69921875, "learning_rate": 0.00027701456310679607, "loss": 0.9183, "step": 968 }, { "epoch": 0.3914953790212615, "grad_norm": 0.72265625, "learning_rate": 0.0002769902912621359, "loss": 0.9443, "step": 969 }, { "epoch": 0.3918993990202515, "grad_norm": 0.5625, "learning_rate": 0.00027696601941747573, "loss": 0.896, "step": 970 }, { "epoch": 0.39230341901924143, "grad_norm": 0.62890625, "learning_rate": 0.0002769417475728155, "loss": 0.8419, "step": 971 }, { "epoch": 0.3927074390182314, "grad_norm": 0.79296875, "learning_rate": 0.0002769174757281553, "loss": 0.9442, "step": 972 }, { "epoch": 0.39311145901722133, "grad_norm": 0.64453125, "learning_rate": 0.0002768932038834951, "loss": 0.9117, "step": 973 }, { "epoch": 0.3935154790162113, "grad_norm": 0.578125, "learning_rate": 0.00027686893203883494, "loss": 0.8801, "step": 974 }, { "epoch": 0.39391949901520124, "grad_norm": 0.66796875, "learning_rate": 0.0002768446601941747, "loss": 0.913, "step": 975 }, { "epoch": 0.3943235190141912, "grad_norm": 0.69921875, "learning_rate": 0.00027682038834951454, "loss": 0.9184, "step": 976 }, { "epoch": 0.39472753901318114, "grad_norm": 0.60546875, "learning_rate": 0.0002767961165048543, "loss": 0.8952, "step": 977 }, { "epoch": 0.3951315590121711, "grad_norm": 0.609375, "learning_rate": 0.00027677184466019415, "loss": 0.824, "step": 978 }, { "epoch": 0.39553557901116104, "grad_norm": 0.5625, "learning_rate": 0.000276747572815534, "loss": 0.8904, "step": 979 }, { "epoch": 0.395939599010151, "grad_norm": 0.55859375, "learning_rate": 0.00027672330097087375, "loss": 0.8198, "step": 980 }, { "epoch": 0.39634361900914095, "grad_norm": 0.81640625, "learning_rate": 0.0002766990291262136, "loss": 0.9305, "step": 981 }, { "epoch": 0.3967476390081309, "grad_norm": 0.578125, "learning_rate": 0.00027667475728155336, "loss": 0.877, "step": 982 }, { "epoch": 0.39715165900712085, "grad_norm": 0.62109375, "learning_rate": 0.0002766504854368932, "loss": 0.881, "step": 983 }, { "epoch": 0.39755567900611083, "grad_norm": 0.65625, "learning_rate": 0.000276626213592233, "loss": 0.9868, "step": 984 }, { "epoch": 0.39795969900510075, "grad_norm": 0.63671875, "learning_rate": 0.0002766019417475728, "loss": 0.9481, "step": 985 }, { "epoch": 0.3983637190040907, "grad_norm": 2.5625, "learning_rate": 0.0002765776699029126, "loss": 0.8312, "step": 986 }, { "epoch": 0.39876773900308066, "grad_norm": 0.5390625, "learning_rate": 0.0002765533980582524, "loss": 0.8199, "step": 987 }, { "epoch": 0.3991717590020706, "grad_norm": 0.7734375, "learning_rate": 0.0002765291262135922, "loss": 0.9141, "step": 988 }, { "epoch": 0.39957577900106056, "grad_norm": 0.55078125, "learning_rate": 0.000276504854368932, "loss": 0.9191, "step": 989 }, { "epoch": 0.3999797990000505, "grad_norm": 0.55078125, "learning_rate": 0.00027648058252427183, "loss": 0.7712, "step": 990 }, { "epoch": 0.40038381899904046, "grad_norm": 0.57421875, "learning_rate": 0.0002764563106796116, "loss": 0.9265, "step": 991 }, { "epoch": 0.4007878389980304, "grad_norm": 0.65234375, "learning_rate": 0.00027643203883495143, "loss": 0.8154, "step": 992 }, { "epoch": 0.40119185899702037, "grad_norm": 0.67578125, "learning_rate": 0.0002764077669902912, "loss": 0.9648, "step": 993 }, { "epoch": 0.4015958789960103, "grad_norm": 0.68359375, "learning_rate": 0.00027638349514563104, "loss": 0.8854, "step": 994 }, { "epoch": 0.40199989899500027, "grad_norm": 0.62109375, "learning_rate": 0.00027635922330097087, "loss": 0.9464, "step": 995 }, { "epoch": 0.4024039189939902, "grad_norm": 0.6015625, "learning_rate": 0.00027633495145631064, "loss": 0.9086, "step": 996 }, { "epoch": 0.4028079389929802, "grad_norm": 0.8125, "learning_rate": 0.0002763106796116505, "loss": 1.0486, "step": 997 }, { "epoch": 0.4032119589919701, "grad_norm": 0.609375, "learning_rate": 0.00027628640776699025, "loss": 0.9475, "step": 998 }, { "epoch": 0.4036159789909601, "grad_norm": 0.68359375, "learning_rate": 0.0002762621359223301, "loss": 0.985, "step": 999 }, { "epoch": 0.40401999898995, "grad_norm": 0.71484375, "learning_rate": 0.0002762378640776699, "loss": 0.9732, "step": 1000 }, { "epoch": 0.4044240189889399, "grad_norm": 0.546875, "learning_rate": 0.0002762135922330097, "loss": 0.9496, "step": 1001 }, { "epoch": 0.4048280389879299, "grad_norm": 0.6015625, "learning_rate": 0.00027618932038834946, "loss": 0.798, "step": 1002 }, { "epoch": 0.40523205898691983, "grad_norm": 0.640625, "learning_rate": 0.0002761650485436893, "loss": 0.9126, "step": 1003 }, { "epoch": 0.4056360789859098, "grad_norm": 0.5625, "learning_rate": 0.0002761407766990291, "loss": 0.8748, "step": 1004 }, { "epoch": 0.40604009898489973, "grad_norm": 0.55078125, "learning_rate": 0.0002761165048543689, "loss": 0.8633, "step": 1005 }, { "epoch": 0.4064441189838897, "grad_norm": 0.5234375, "learning_rate": 0.0002760922330097087, "loss": 0.928, "step": 1006 }, { "epoch": 0.40684813898287964, "grad_norm": 0.66015625, "learning_rate": 0.0002760679611650485, "loss": 0.8375, "step": 1007 }, { "epoch": 0.4072521589818696, "grad_norm": 0.68359375, "learning_rate": 0.0002760436893203883, "loss": 0.9303, "step": 1008 }, { "epoch": 0.40765617898085954, "grad_norm": 0.5703125, "learning_rate": 0.00027601941747572816, "loss": 0.8973, "step": 1009 }, { "epoch": 0.4080601989798495, "grad_norm": 0.609375, "learning_rate": 0.00027599514563106793, "loss": 0.8821, "step": 1010 }, { "epoch": 0.40846421897883944, "grad_norm": 0.64453125, "learning_rate": 0.00027597087378640776, "loss": 0.8353, "step": 1011 }, { "epoch": 0.4088682389778294, "grad_norm": 0.69921875, "learning_rate": 0.00027594660194174754, "loss": 1.0146, "step": 1012 }, { "epoch": 0.40927225897681935, "grad_norm": 1.453125, "learning_rate": 0.00027592233009708736, "loss": 0.8854, "step": 1013 }, { "epoch": 0.4096762789758093, "grad_norm": 0.53515625, "learning_rate": 0.0002758980582524272, "loss": 0.8582, "step": 1014 }, { "epoch": 0.41008029897479925, "grad_norm": 0.5234375, "learning_rate": 0.00027587378640776697, "loss": 0.8418, "step": 1015 }, { "epoch": 0.4104843189737892, "grad_norm": 0.6484375, "learning_rate": 0.0002758495145631068, "loss": 0.8894, "step": 1016 }, { "epoch": 0.41088833897277915, "grad_norm": 0.546875, "learning_rate": 0.0002758252427184466, "loss": 0.948, "step": 1017 }, { "epoch": 0.4112923589717691, "grad_norm": 0.60546875, "learning_rate": 0.00027580097087378635, "loss": 0.8727, "step": 1018 }, { "epoch": 0.41169637897075906, "grad_norm": 0.671875, "learning_rate": 0.0002757766990291262, "loss": 1.0001, "step": 1019 }, { "epoch": 0.412100398969749, "grad_norm": 0.5859375, "learning_rate": 0.000275752427184466, "loss": 0.8405, "step": 1020 }, { "epoch": 0.41250441896873896, "grad_norm": 0.64453125, "learning_rate": 0.0002757281553398058, "loss": 0.9275, "step": 1021 }, { "epoch": 0.4129084389677289, "grad_norm": 0.578125, "learning_rate": 0.0002757038834951456, "loss": 0.9575, "step": 1022 }, { "epoch": 0.41331245896671887, "grad_norm": 0.5078125, "learning_rate": 0.0002756796116504854, "loss": 0.8439, "step": 1023 }, { "epoch": 0.4137164789657088, "grad_norm": 0.56640625, "learning_rate": 0.0002756553398058252, "loss": 0.8572, "step": 1024 }, { "epoch": 0.41412049896469877, "grad_norm": 0.59765625, "learning_rate": 0.00027563106796116505, "loss": 0.842, "step": 1025 }, { "epoch": 0.4145245189636887, "grad_norm": 0.625, "learning_rate": 0.0002756067961165048, "loss": 0.854, "step": 1026 }, { "epoch": 0.4149285389626787, "grad_norm": 0.5703125, "learning_rate": 0.00027558252427184465, "loss": 0.921, "step": 1027 }, { "epoch": 0.4153325589616686, "grad_norm": 0.53515625, "learning_rate": 0.00027555825242718443, "loss": 0.7984, "step": 1028 }, { "epoch": 0.4157365789606586, "grad_norm": 0.54296875, "learning_rate": 0.00027553398058252426, "loss": 0.912, "step": 1029 }, { "epoch": 0.4161405989596485, "grad_norm": 0.53515625, "learning_rate": 0.0002755097087378641, "loss": 0.8318, "step": 1030 }, { "epoch": 0.4165446189586385, "grad_norm": 0.58984375, "learning_rate": 0.00027548543689320386, "loss": 0.8686, "step": 1031 }, { "epoch": 0.4169486389576284, "grad_norm": 0.625, "learning_rate": 0.00027546116504854364, "loss": 0.9132, "step": 1032 }, { "epoch": 0.4173526589566183, "grad_norm": 0.62890625, "learning_rate": 0.00027543689320388347, "loss": 0.8349, "step": 1033 }, { "epoch": 0.4177566789556083, "grad_norm": 0.62109375, "learning_rate": 0.0002754126213592233, "loss": 0.8891, "step": 1034 }, { "epoch": 0.41816069895459823, "grad_norm": 0.57421875, "learning_rate": 0.00027538834951456307, "loss": 0.98, "step": 1035 }, { "epoch": 0.4185647189535882, "grad_norm": 0.61328125, "learning_rate": 0.0002753640776699029, "loss": 0.8228, "step": 1036 }, { "epoch": 0.41896873895257813, "grad_norm": 0.5859375, "learning_rate": 0.0002753398058252427, "loss": 0.9978, "step": 1037 }, { "epoch": 0.4193727589515681, "grad_norm": 0.640625, "learning_rate": 0.0002753155339805825, "loss": 0.82, "step": 1038 }, { "epoch": 0.41977677895055804, "grad_norm": 0.54296875, "learning_rate": 0.00027529126213592233, "loss": 0.9223, "step": 1039 }, { "epoch": 0.420180798949548, "grad_norm": 0.61328125, "learning_rate": 0.0002752669902912621, "loss": 0.9154, "step": 1040 }, { "epoch": 0.42058481894853794, "grad_norm": 0.62890625, "learning_rate": 0.00027524271844660194, "loss": 0.988, "step": 1041 }, { "epoch": 0.4209888389475279, "grad_norm": 0.5703125, "learning_rate": 0.0002752184466019417, "loss": 0.8191, "step": 1042 }, { "epoch": 0.42139285894651785, "grad_norm": 0.62890625, "learning_rate": 0.0002751941747572815, "loss": 1.0068, "step": 1043 }, { "epoch": 0.4217968789455078, "grad_norm": 0.640625, "learning_rate": 0.0002751699029126213, "loss": 0.907, "step": 1044 }, { "epoch": 0.42220089894449775, "grad_norm": 0.515625, "learning_rate": 0.00027514563106796115, "loss": 0.7958, "step": 1045 }, { "epoch": 0.42260491894348773, "grad_norm": 0.625, "learning_rate": 0.000275121359223301, "loss": 0.8603, "step": 1046 }, { "epoch": 0.42300893894247765, "grad_norm": 0.53125, "learning_rate": 0.00027509708737864075, "loss": 0.821, "step": 1047 }, { "epoch": 0.4234129589414676, "grad_norm": 0.71484375, "learning_rate": 0.00027507281553398053, "loss": 0.8767, "step": 1048 }, { "epoch": 0.42381697894045756, "grad_norm": 0.6796875, "learning_rate": 0.00027504854368932036, "loss": 0.9093, "step": 1049 }, { "epoch": 0.4242209989394475, "grad_norm": 0.58203125, "learning_rate": 0.0002750242718446602, "loss": 0.7834, "step": 1050 }, { "epoch": 0.42462501893843746, "grad_norm": 0.6796875, "learning_rate": 0.00027499999999999996, "loss": 0.9573, "step": 1051 }, { "epoch": 0.4250290389374274, "grad_norm": 0.84765625, "learning_rate": 0.0002749757281553398, "loss": 0.9883, "step": 1052 }, { "epoch": 0.42543305893641736, "grad_norm": 0.58984375, "learning_rate": 0.00027495145631067957, "loss": 0.8401, "step": 1053 }, { "epoch": 0.4258370789354073, "grad_norm": 0.71484375, "learning_rate": 0.0002749271844660194, "loss": 0.9947, "step": 1054 }, { "epoch": 0.42624109893439727, "grad_norm": 0.6171875, "learning_rate": 0.0002749029126213592, "loss": 0.8611, "step": 1055 }, { "epoch": 0.4266451189333872, "grad_norm": 0.984375, "learning_rate": 0.000274878640776699, "loss": 0.9094, "step": 1056 }, { "epoch": 0.42704913893237717, "grad_norm": 0.62890625, "learning_rate": 0.00027485436893203883, "loss": 0.9064, "step": 1057 }, { "epoch": 0.4274531589313671, "grad_norm": 0.5859375, "learning_rate": 0.0002748300970873786, "loss": 0.924, "step": 1058 }, { "epoch": 0.4278571789303571, "grad_norm": 0.53125, "learning_rate": 0.00027480582524271844, "loss": 0.866, "step": 1059 }, { "epoch": 0.428261198929347, "grad_norm": 0.59375, "learning_rate": 0.00027478155339805826, "loss": 0.9385, "step": 1060 }, { "epoch": 0.428665218928337, "grad_norm": 0.58203125, "learning_rate": 0.00027475728155339804, "loss": 0.7783, "step": 1061 }, { "epoch": 0.4290692389273269, "grad_norm": 0.734375, "learning_rate": 0.0002747330097087378, "loss": 1.1032, "step": 1062 }, { "epoch": 0.4294732589263168, "grad_norm": 0.5625, "learning_rate": 0.00027470873786407764, "loss": 0.847, "step": 1063 }, { "epoch": 0.4298772789253068, "grad_norm": 0.54296875, "learning_rate": 0.0002746844660194175, "loss": 0.8389, "step": 1064 }, { "epoch": 0.43028129892429673, "grad_norm": 0.625, "learning_rate": 0.00027466019417475725, "loss": 0.9199, "step": 1065 }, { "epoch": 0.4306853189232867, "grad_norm": 0.703125, "learning_rate": 0.0002746359223300971, "loss": 0.9854, "step": 1066 }, { "epoch": 0.43108933892227663, "grad_norm": 0.78125, "learning_rate": 0.00027461165048543685, "loss": 1.0216, "step": 1067 }, { "epoch": 0.4314933589212666, "grad_norm": 0.58984375, "learning_rate": 0.0002745873786407767, "loss": 0.8628, "step": 1068 }, { "epoch": 0.43189737892025654, "grad_norm": 0.56640625, "learning_rate": 0.00027456310679611646, "loss": 0.7613, "step": 1069 }, { "epoch": 0.4323013989192465, "grad_norm": 0.6796875, "learning_rate": 0.0002745388349514563, "loss": 0.9457, "step": 1070 }, { "epoch": 0.43270541891823644, "grad_norm": 0.515625, "learning_rate": 0.0002745145631067961, "loss": 0.8058, "step": 1071 }, { "epoch": 0.4331094389172264, "grad_norm": 0.5859375, "learning_rate": 0.0002744902912621359, "loss": 0.8303, "step": 1072 }, { "epoch": 0.43351345891621634, "grad_norm": 0.49609375, "learning_rate": 0.00027446601941747567, "loss": 0.7541, "step": 1073 }, { "epoch": 0.4339174789152063, "grad_norm": 0.5859375, "learning_rate": 0.0002744417475728155, "loss": 0.8887, "step": 1074 }, { "epoch": 0.43432149891419625, "grad_norm": 0.6484375, "learning_rate": 0.0002744174757281553, "loss": 0.9178, "step": 1075 }, { "epoch": 0.4347255189131862, "grad_norm": 0.62890625, "learning_rate": 0.0002743932038834951, "loss": 0.9269, "step": 1076 }, { "epoch": 0.43512953891217615, "grad_norm": 0.53125, "learning_rate": 0.00027436893203883493, "loss": 0.8439, "step": 1077 }, { "epoch": 0.43553355891116613, "grad_norm": 0.59375, "learning_rate": 0.0002743446601941747, "loss": 0.92, "step": 1078 }, { "epoch": 0.43593757891015605, "grad_norm": 0.6171875, "learning_rate": 0.00027432038834951454, "loss": 0.9346, "step": 1079 }, { "epoch": 0.436341598909146, "grad_norm": 0.5390625, "learning_rate": 0.00027429611650485437, "loss": 0.8247, "step": 1080 }, { "epoch": 0.43674561890813596, "grad_norm": 0.8671875, "learning_rate": 0.00027427184466019414, "loss": 0.7588, "step": 1081 }, { "epoch": 0.4371496389071259, "grad_norm": 0.61328125, "learning_rate": 0.00027424757281553397, "loss": 0.9359, "step": 1082 }, { "epoch": 0.43755365890611586, "grad_norm": 0.6015625, "learning_rate": 0.00027422330097087375, "loss": 0.9474, "step": 1083 }, { "epoch": 0.4379576789051058, "grad_norm": 0.609375, "learning_rate": 0.0002741990291262136, "loss": 0.8526, "step": 1084 }, { "epoch": 0.43836169890409576, "grad_norm": 0.65234375, "learning_rate": 0.0002741747572815534, "loss": 0.9287, "step": 1085 }, { "epoch": 0.4387657189030857, "grad_norm": 0.578125, "learning_rate": 0.0002741504854368932, "loss": 0.8832, "step": 1086 }, { "epoch": 0.43916973890207567, "grad_norm": 0.66796875, "learning_rate": 0.000274126213592233, "loss": 0.9876, "step": 1087 }, { "epoch": 0.4395737589010656, "grad_norm": 0.59765625, "learning_rate": 0.0002741019417475728, "loss": 0.8628, "step": 1088 }, { "epoch": 0.43997777890005557, "grad_norm": 0.58984375, "learning_rate": 0.0002740776699029126, "loss": 0.8763, "step": 1089 }, { "epoch": 0.4403817988990455, "grad_norm": 0.66015625, "learning_rate": 0.0002740533980582524, "loss": 0.8421, "step": 1090 }, { "epoch": 0.4407858188980355, "grad_norm": 0.640625, "learning_rate": 0.0002740291262135922, "loss": 0.8245, "step": 1091 }, { "epoch": 0.4411898388970254, "grad_norm": 0.6640625, "learning_rate": 0.000274004854368932, "loss": 0.9276, "step": 1092 }, { "epoch": 0.4415938588960154, "grad_norm": 0.5234375, "learning_rate": 0.0002739805825242718, "loss": 0.8337, "step": 1093 }, { "epoch": 0.4419978788950053, "grad_norm": 0.7890625, "learning_rate": 0.0002739563106796116, "loss": 1.0179, "step": 1094 }, { "epoch": 0.4424018988939952, "grad_norm": 0.59765625, "learning_rate": 0.00027393203883495143, "loss": 0.8817, "step": 1095 }, { "epoch": 0.4428059188929852, "grad_norm": 0.60546875, "learning_rate": 0.00027390776699029126, "loss": 0.9082, "step": 1096 }, { "epoch": 0.44320993889197513, "grad_norm": 0.546875, "learning_rate": 0.00027388349514563103, "loss": 0.7794, "step": 1097 }, { "epoch": 0.4436139588909651, "grad_norm": 0.48046875, "learning_rate": 0.00027385922330097086, "loss": 0.7876, "step": 1098 }, { "epoch": 0.44401797888995503, "grad_norm": 0.54296875, "learning_rate": 0.00027383495145631064, "loss": 0.8368, "step": 1099 }, { "epoch": 0.444421998888945, "grad_norm": 0.64453125, "learning_rate": 0.00027381067961165047, "loss": 0.9497, "step": 1100 }, { "epoch": 0.44482601888793494, "grad_norm": 0.609375, "learning_rate": 0.0002737864077669903, "loss": 0.9198, "step": 1101 }, { "epoch": 0.4452300388869249, "grad_norm": 0.5625, "learning_rate": 0.00027376213592233007, "loss": 0.8391, "step": 1102 }, { "epoch": 0.44563405888591484, "grad_norm": 0.49609375, "learning_rate": 0.00027373786407766985, "loss": 0.8131, "step": 1103 }, { "epoch": 0.4460380788849048, "grad_norm": 0.64453125, "learning_rate": 0.0002737135922330097, "loss": 0.8853, "step": 1104 }, { "epoch": 0.44644209888389474, "grad_norm": 0.8203125, "learning_rate": 0.0002736893203883495, "loss": 0.8473, "step": 1105 }, { "epoch": 0.4468461188828847, "grad_norm": 0.6328125, "learning_rate": 0.0002736650485436893, "loss": 0.9862, "step": 1106 }, { "epoch": 0.44725013888187465, "grad_norm": 0.56640625, "learning_rate": 0.0002736407766990291, "loss": 0.802, "step": 1107 }, { "epoch": 0.4476541588808646, "grad_norm": 0.56640625, "learning_rate": 0.0002736165048543689, "loss": 0.9025, "step": 1108 }, { "epoch": 0.44805817887985455, "grad_norm": 0.640625, "learning_rate": 0.0002735922330097087, "loss": 0.9466, "step": 1109 }, { "epoch": 0.4484621988788445, "grad_norm": 0.51171875, "learning_rate": 0.00027356796116504854, "loss": 0.7939, "step": 1110 }, { "epoch": 0.44886621887783446, "grad_norm": 0.53125, "learning_rate": 0.0002735436893203883, "loss": 0.8642, "step": 1111 }, { "epoch": 0.4492702388768244, "grad_norm": 0.609375, "learning_rate": 0.00027351941747572815, "loss": 0.9476, "step": 1112 }, { "epoch": 0.44967425887581436, "grad_norm": 0.60546875, "learning_rate": 0.0002734951456310679, "loss": 0.9102, "step": 1113 }, { "epoch": 0.4500782788748043, "grad_norm": 0.52734375, "learning_rate": 0.00027347087378640775, "loss": 0.8822, "step": 1114 }, { "epoch": 0.45048229887379426, "grad_norm": 0.54296875, "learning_rate": 0.0002734466019417476, "loss": 0.8893, "step": 1115 }, { "epoch": 0.4508863188727842, "grad_norm": 0.58984375, "learning_rate": 0.00027342233009708736, "loss": 0.8921, "step": 1116 }, { "epoch": 0.45129033887177417, "grad_norm": 0.578125, "learning_rate": 0.0002733980582524272, "loss": 0.8277, "step": 1117 }, { "epoch": 0.4516943588707641, "grad_norm": 0.5859375, "learning_rate": 0.00027337378640776696, "loss": 0.8439, "step": 1118 }, { "epoch": 0.45209837886975407, "grad_norm": 0.6875, "learning_rate": 0.00027334951456310674, "loss": 1.0, "step": 1119 }, { "epoch": 0.452502398868744, "grad_norm": 0.5859375, "learning_rate": 0.00027332524271844657, "loss": 0.946, "step": 1120 }, { "epoch": 0.452906418867734, "grad_norm": 0.58203125, "learning_rate": 0.0002733009708737864, "loss": 1.0198, "step": 1121 }, { "epoch": 0.4533104388667239, "grad_norm": 0.75, "learning_rate": 0.00027327669902912617, "loss": 0.9032, "step": 1122 }, { "epoch": 0.4537144588657139, "grad_norm": 0.66796875, "learning_rate": 0.000273252427184466, "loss": 0.9265, "step": 1123 }, { "epoch": 0.4541184788647038, "grad_norm": 0.609375, "learning_rate": 0.0002732281553398058, "loss": 0.8998, "step": 1124 }, { "epoch": 0.4545224988636938, "grad_norm": 0.671875, "learning_rate": 0.0002732038834951456, "loss": 1.0331, "step": 1125 }, { "epoch": 0.4549265188626837, "grad_norm": 0.59375, "learning_rate": 0.00027317961165048544, "loss": 0.8052, "step": 1126 }, { "epoch": 0.45533053886167363, "grad_norm": 0.63671875, "learning_rate": 0.0002731553398058252, "loss": 0.8508, "step": 1127 }, { "epoch": 0.4557345588606636, "grad_norm": 0.57421875, "learning_rate": 0.00027313106796116504, "loss": 0.834, "step": 1128 }, { "epoch": 0.45613857885965353, "grad_norm": 0.57421875, "learning_rate": 0.0002731067961165048, "loss": 0.852, "step": 1129 }, { "epoch": 0.4565425988586435, "grad_norm": 0.60546875, "learning_rate": 0.00027308252427184465, "loss": 0.9022, "step": 1130 }, { "epoch": 0.45694661885763344, "grad_norm": 0.54296875, "learning_rate": 0.0002730582524271845, "loss": 0.9075, "step": 1131 }, { "epoch": 0.4573506388566234, "grad_norm": 0.63671875, "learning_rate": 0.00027303398058252425, "loss": 0.9308, "step": 1132 }, { "epoch": 0.45775465885561334, "grad_norm": 0.578125, "learning_rate": 0.000273009708737864, "loss": 0.9852, "step": 1133 }, { "epoch": 0.4581586788546033, "grad_norm": 0.5390625, "learning_rate": 0.00027298543689320385, "loss": 0.8516, "step": 1134 }, { "epoch": 0.45856269885359324, "grad_norm": 0.60546875, "learning_rate": 0.0002729611650485437, "loss": 0.8424, "step": 1135 }, { "epoch": 0.4589667188525832, "grad_norm": 0.5, "learning_rate": 0.00027293689320388346, "loss": 0.8186, "step": 1136 }, { "epoch": 0.45937073885157315, "grad_norm": 0.5859375, "learning_rate": 0.0002729126213592233, "loss": 0.9224, "step": 1137 }, { "epoch": 0.4597747588505631, "grad_norm": 0.61328125, "learning_rate": 0.00027288834951456306, "loss": 0.8946, "step": 1138 }, { "epoch": 0.46017877884955305, "grad_norm": 0.60546875, "learning_rate": 0.0002728640776699029, "loss": 0.9042, "step": 1139 }, { "epoch": 0.46058279884854303, "grad_norm": 0.484375, "learning_rate": 0.0002728398058252427, "loss": 0.8686, "step": 1140 }, { "epoch": 0.46098681884753295, "grad_norm": 0.76171875, "learning_rate": 0.0002728155339805825, "loss": 1.0408, "step": 1141 }, { "epoch": 0.4613908388465229, "grad_norm": 0.68359375, "learning_rate": 0.00027279126213592233, "loss": 0.93, "step": 1142 }, { "epoch": 0.46179485884551286, "grad_norm": 0.5234375, "learning_rate": 0.0002727669902912621, "loss": 0.8782, "step": 1143 }, { "epoch": 0.4621988788445028, "grad_norm": 1.4453125, "learning_rate": 0.0002727427184466019, "loss": 1.1381, "step": 1144 }, { "epoch": 0.46260289884349276, "grad_norm": 0.5078125, "learning_rate": 0.0002727184466019417, "loss": 0.7424, "step": 1145 }, { "epoch": 0.4630069188424827, "grad_norm": 0.65625, "learning_rate": 0.00027269417475728154, "loss": 0.9146, "step": 1146 }, { "epoch": 0.46341093884147266, "grad_norm": 0.69140625, "learning_rate": 0.00027266990291262137, "loss": 0.8364, "step": 1147 }, { "epoch": 0.4638149588404626, "grad_norm": 0.57421875, "learning_rate": 0.00027264563106796114, "loss": 0.9727, "step": 1148 }, { "epoch": 0.46421897883945257, "grad_norm": 0.64453125, "learning_rate": 0.0002726213592233009, "loss": 0.8953, "step": 1149 }, { "epoch": 0.4646229988384425, "grad_norm": 0.6953125, "learning_rate": 0.00027259708737864075, "loss": 0.8648, "step": 1150 }, { "epoch": 0.46502701883743247, "grad_norm": 0.66796875, "learning_rate": 0.0002725728155339806, "loss": 0.9128, "step": 1151 }, { "epoch": 0.4654310388364224, "grad_norm": 0.8046875, "learning_rate": 0.00027254854368932035, "loss": 0.8492, "step": 1152 }, { "epoch": 0.4658350588354124, "grad_norm": 0.55078125, "learning_rate": 0.0002725242718446602, "loss": 0.8966, "step": 1153 }, { "epoch": 0.4662390788344023, "grad_norm": 0.54296875, "learning_rate": 0.00027249999999999996, "loss": 0.8473, "step": 1154 }, { "epoch": 0.4666430988333923, "grad_norm": 0.65234375, "learning_rate": 0.0002724757281553398, "loss": 0.8258, "step": 1155 }, { "epoch": 0.4670471188323822, "grad_norm": 0.5703125, "learning_rate": 0.0002724514563106796, "loss": 0.8523, "step": 1156 }, { "epoch": 0.4674511388313721, "grad_norm": 0.7421875, "learning_rate": 0.0002724271844660194, "loss": 0.8554, "step": 1157 }, { "epoch": 0.4678551588303621, "grad_norm": 0.546875, "learning_rate": 0.0002724029126213592, "loss": 0.8096, "step": 1158 }, { "epoch": 0.46825917882935203, "grad_norm": 0.6171875, "learning_rate": 0.000272378640776699, "loss": 0.8683, "step": 1159 }, { "epoch": 0.468663198828342, "grad_norm": 0.53125, "learning_rate": 0.0002723543689320388, "loss": 0.8513, "step": 1160 }, { "epoch": 0.46906721882733193, "grad_norm": 0.5859375, "learning_rate": 0.00027233009708737865, "loss": 0.7921, "step": 1161 }, { "epoch": 0.4694712388263219, "grad_norm": 1.9375, "learning_rate": 0.00027230582524271843, "loss": 1.013, "step": 1162 }, { "epoch": 0.46987525882531184, "grad_norm": 0.66796875, "learning_rate": 0.0002722815533980582, "loss": 0.8156, "step": 1163 }, { "epoch": 0.4702792788243018, "grad_norm": 0.65234375, "learning_rate": 0.00027225728155339803, "loss": 0.9932, "step": 1164 }, { "epoch": 0.47068329882329174, "grad_norm": 0.640625, "learning_rate": 0.00027223300970873786, "loss": 0.9747, "step": 1165 }, { "epoch": 0.4710873188222817, "grad_norm": 0.65625, "learning_rate": 0.00027220873786407764, "loss": 0.9227, "step": 1166 }, { "epoch": 0.47149133882127164, "grad_norm": 0.56640625, "learning_rate": 0.00027218446601941747, "loss": 0.8498, "step": 1167 }, { "epoch": 0.4718953588202616, "grad_norm": 0.5234375, "learning_rate": 0.00027216019417475724, "loss": 0.8587, "step": 1168 }, { "epoch": 0.47229937881925155, "grad_norm": 0.6171875, "learning_rate": 0.00027213592233009707, "loss": 0.8981, "step": 1169 }, { "epoch": 0.4727033988182415, "grad_norm": 0.55078125, "learning_rate": 0.00027211165048543685, "loss": 0.8665, "step": 1170 }, { "epoch": 0.47310741881723145, "grad_norm": 0.54296875, "learning_rate": 0.0002720873786407767, "loss": 0.8903, "step": 1171 }, { "epoch": 0.47351143881622143, "grad_norm": 0.70703125, "learning_rate": 0.0002720631067961165, "loss": 0.914, "step": 1172 }, { "epoch": 0.47391545881521135, "grad_norm": 0.5234375, "learning_rate": 0.0002720388349514563, "loss": 0.8625, "step": 1173 }, { "epoch": 0.4743194788142013, "grad_norm": 0.57421875, "learning_rate": 0.00027201456310679606, "loss": 0.8259, "step": 1174 }, { "epoch": 0.47472349881319126, "grad_norm": 0.6171875, "learning_rate": 0.0002719902912621359, "loss": 0.9212, "step": 1175 }, { "epoch": 0.4751275188121812, "grad_norm": 0.58203125, "learning_rate": 0.0002719660194174757, "loss": 0.8988, "step": 1176 }, { "epoch": 0.47553153881117116, "grad_norm": 0.55078125, "learning_rate": 0.0002719417475728155, "loss": 0.9419, "step": 1177 }, { "epoch": 0.4759355588101611, "grad_norm": 0.55078125, "learning_rate": 0.0002719174757281553, "loss": 0.9106, "step": 1178 }, { "epoch": 0.47633957880915107, "grad_norm": 0.58984375, "learning_rate": 0.0002718932038834951, "loss": 0.9255, "step": 1179 }, { "epoch": 0.476743598808141, "grad_norm": 0.5703125, "learning_rate": 0.0002718689320388349, "loss": 0.9378, "step": 1180 }, { "epoch": 0.47714761880713097, "grad_norm": 0.50390625, "learning_rate": 0.00027184466019417475, "loss": 0.8826, "step": 1181 }, { "epoch": 0.4775516388061209, "grad_norm": 0.466796875, "learning_rate": 0.00027182038834951453, "loss": 0.7676, "step": 1182 }, { "epoch": 0.4779556588051109, "grad_norm": 0.51953125, "learning_rate": 0.00027179611650485436, "loss": 0.8612, "step": 1183 }, { "epoch": 0.4783596788041008, "grad_norm": 0.5390625, "learning_rate": 0.00027177184466019413, "loss": 0.7875, "step": 1184 }, { "epoch": 0.4787636988030908, "grad_norm": 0.48046875, "learning_rate": 0.00027174757281553396, "loss": 0.8312, "step": 1185 }, { "epoch": 0.4791677188020807, "grad_norm": 0.54296875, "learning_rate": 0.0002717233009708738, "loss": 0.7684, "step": 1186 }, { "epoch": 0.4795717388010707, "grad_norm": 0.74609375, "learning_rate": 0.00027169902912621357, "loss": 0.931, "step": 1187 }, { "epoch": 0.4799757588000606, "grad_norm": 0.62109375, "learning_rate": 0.0002716747572815534, "loss": 0.9964, "step": 1188 }, { "epoch": 0.4803797787990505, "grad_norm": 0.6015625, "learning_rate": 0.0002716504854368932, "loss": 0.899, "step": 1189 }, { "epoch": 0.4807837987980405, "grad_norm": 0.49609375, "learning_rate": 0.000271626213592233, "loss": 0.8272, "step": 1190 }, { "epoch": 0.48118781879703043, "grad_norm": 0.56640625, "learning_rate": 0.0002716019417475728, "loss": 0.9005, "step": 1191 }, { "epoch": 0.4815918387960204, "grad_norm": 0.51953125, "learning_rate": 0.0002715776699029126, "loss": 0.8782, "step": 1192 }, { "epoch": 0.48199585879501033, "grad_norm": 1.3203125, "learning_rate": 0.0002715533980582524, "loss": 1.1729, "step": 1193 }, { "epoch": 0.4823998787940003, "grad_norm": 0.5703125, "learning_rate": 0.0002715291262135922, "loss": 0.8858, "step": 1194 }, { "epoch": 0.48280389879299024, "grad_norm": 0.5390625, "learning_rate": 0.000271504854368932, "loss": 0.8897, "step": 1195 }, { "epoch": 0.4832079187919802, "grad_norm": 0.58984375, "learning_rate": 0.0002714805825242718, "loss": 0.9079, "step": 1196 }, { "epoch": 0.48361193879097014, "grad_norm": 0.66015625, "learning_rate": 0.00027145631067961165, "loss": 0.9274, "step": 1197 }, { "epoch": 0.4840159587899601, "grad_norm": 0.63671875, "learning_rate": 0.0002714320388349514, "loss": 0.958, "step": 1198 }, { "epoch": 0.48441997878895005, "grad_norm": 1.765625, "learning_rate": 0.00027140776699029125, "loss": 0.9068, "step": 1199 }, { "epoch": 0.48482399878794, "grad_norm": 0.65234375, "learning_rate": 0.000271383495145631, "loss": 0.8989, "step": 1200 }, { "epoch": 0.48522801878692995, "grad_norm": 0.68359375, "learning_rate": 0.00027135922330097086, "loss": 0.848, "step": 1201 }, { "epoch": 0.48563203878591993, "grad_norm": 0.78125, "learning_rate": 0.0002713349514563107, "loss": 0.7974, "step": 1202 }, { "epoch": 0.48603605878490985, "grad_norm": 0.5703125, "learning_rate": 0.00027131067961165046, "loss": 0.8219, "step": 1203 }, { "epoch": 0.4864400787838998, "grad_norm": 0.6640625, "learning_rate": 0.00027128640776699024, "loss": 0.8716, "step": 1204 }, { "epoch": 0.48684409878288976, "grad_norm": 0.59375, "learning_rate": 0.00027126213592233006, "loss": 0.8425, "step": 1205 }, { "epoch": 0.4872481187818797, "grad_norm": 0.625, "learning_rate": 0.0002712378640776699, "loss": 0.9162, "step": 1206 }, { "epoch": 0.48765213878086966, "grad_norm": 0.5703125, "learning_rate": 0.00027121359223300967, "loss": 0.797, "step": 1207 }, { "epoch": 0.4880561587798596, "grad_norm": 0.60546875, "learning_rate": 0.0002711893203883495, "loss": 0.8058, "step": 1208 }, { "epoch": 0.48846017877884956, "grad_norm": 0.62890625, "learning_rate": 0.0002711650485436893, "loss": 0.8699, "step": 1209 }, { "epoch": 0.4888641987778395, "grad_norm": 0.6875, "learning_rate": 0.0002711407766990291, "loss": 0.9559, "step": 1210 }, { "epoch": 0.48926821877682947, "grad_norm": 0.73046875, "learning_rate": 0.00027111650485436893, "loss": 1.0552, "step": 1211 }, { "epoch": 0.4896722387758194, "grad_norm": 0.5546875, "learning_rate": 0.0002710922330097087, "loss": 0.8338, "step": 1212 }, { "epoch": 0.49007625877480937, "grad_norm": 0.5390625, "learning_rate": 0.00027106796116504854, "loss": 0.8045, "step": 1213 }, { "epoch": 0.4904802787737993, "grad_norm": 0.52734375, "learning_rate": 0.0002710436893203883, "loss": 0.8167, "step": 1214 }, { "epoch": 0.4908842987727893, "grad_norm": 0.859375, "learning_rate": 0.00027101941747572814, "loss": 0.9096, "step": 1215 }, { "epoch": 0.4912883187717792, "grad_norm": 0.73046875, "learning_rate": 0.0002709951456310679, "loss": 0.9511, "step": 1216 }, { "epoch": 0.4916923387707692, "grad_norm": 0.62890625, "learning_rate": 0.00027097087378640775, "loss": 0.9182, "step": 1217 }, { "epoch": 0.4920963587697591, "grad_norm": 0.640625, "learning_rate": 0.0002709466019417476, "loss": 0.9703, "step": 1218 }, { "epoch": 0.4925003787687491, "grad_norm": 0.53515625, "learning_rate": 0.00027092233009708735, "loss": 0.9121, "step": 1219 }, { "epoch": 0.492904398767739, "grad_norm": 0.6328125, "learning_rate": 0.00027089805825242713, "loss": 0.8757, "step": 1220 }, { "epoch": 0.49330841876672893, "grad_norm": 0.80078125, "learning_rate": 0.00027087378640776696, "loss": 0.8899, "step": 1221 }, { "epoch": 0.4937124387657189, "grad_norm": 0.65234375, "learning_rate": 0.0002708495145631068, "loss": 0.9543, "step": 1222 }, { "epoch": 0.49411645876470883, "grad_norm": 0.5078125, "learning_rate": 0.00027082524271844656, "loss": 0.817, "step": 1223 }, { "epoch": 0.4945204787636988, "grad_norm": 0.53125, "learning_rate": 0.0002708009708737864, "loss": 0.8611, "step": 1224 }, { "epoch": 0.49492449876268874, "grad_norm": 0.7421875, "learning_rate": 0.00027077669902912617, "loss": 0.9017, "step": 1225 }, { "epoch": 0.4953285187616787, "grad_norm": 0.55078125, "learning_rate": 0.000270752427184466, "loss": 0.9115, "step": 1226 }, { "epoch": 0.49573253876066864, "grad_norm": 0.578125, "learning_rate": 0.0002707281553398058, "loss": 0.8844, "step": 1227 }, { "epoch": 0.4961365587596586, "grad_norm": 0.498046875, "learning_rate": 0.0002707038834951456, "loss": 0.8466, "step": 1228 }, { "epoch": 0.49654057875864854, "grad_norm": 0.5390625, "learning_rate": 0.00027067961165048543, "loss": 0.8262, "step": 1229 }, { "epoch": 0.4969445987576385, "grad_norm": 0.50390625, "learning_rate": 0.0002706553398058252, "loss": 0.8224, "step": 1230 }, { "epoch": 0.49734861875662845, "grad_norm": 0.66015625, "learning_rate": 0.00027063106796116503, "loss": 0.888, "step": 1231 }, { "epoch": 0.4977526387556184, "grad_norm": 0.578125, "learning_rate": 0.00027060679611650486, "loss": 0.8551, "step": 1232 }, { "epoch": 0.49815665875460835, "grad_norm": 0.55078125, "learning_rate": 0.00027058252427184464, "loss": 0.9137, "step": 1233 }, { "epoch": 0.49856067875359833, "grad_norm": 0.5234375, "learning_rate": 0.0002705582524271844, "loss": 0.8991, "step": 1234 }, { "epoch": 0.49896469875258825, "grad_norm": 0.64453125, "learning_rate": 0.00027053398058252424, "loss": 0.8508, "step": 1235 }, { "epoch": 0.4993687187515782, "grad_norm": 0.546875, "learning_rate": 0.0002705097087378641, "loss": 0.8386, "step": 1236 }, { "epoch": 0.49977273875056816, "grad_norm": 0.6796875, "learning_rate": 0.00027048543689320385, "loss": 0.9948, "step": 1237 }, { "epoch": 0.5001767587495581, "grad_norm": 0.53125, "learning_rate": 0.0002704611650485437, "loss": 0.7951, "step": 1238 }, { "epoch": 0.500580778748548, "grad_norm": 0.59765625, "learning_rate": 0.00027043689320388345, "loss": 0.9541, "step": 1239 }, { "epoch": 0.500984798747538, "grad_norm": 0.67578125, "learning_rate": 0.0002704126213592233, "loss": 1.0335, "step": 1240 }, { "epoch": 0.501388818746528, "grad_norm": 0.5546875, "learning_rate": 0.0002703883495145631, "loss": 0.9389, "step": 1241 }, { "epoch": 0.5017928387455179, "grad_norm": 0.5703125, "learning_rate": 0.0002703640776699029, "loss": 0.9152, "step": 1242 }, { "epoch": 0.5021968587445078, "grad_norm": 0.5703125, "learning_rate": 0.0002703398058252427, "loss": 0.9164, "step": 1243 }, { "epoch": 0.5026008787434978, "grad_norm": 0.98046875, "learning_rate": 0.0002703155339805825, "loss": 0.8994, "step": 1244 }, { "epoch": 0.5030048987424878, "grad_norm": 0.58203125, "learning_rate": 0.00027029126213592227, "loss": 0.8968, "step": 1245 }, { "epoch": 0.5034089187414778, "grad_norm": 0.609375, "learning_rate": 0.0002702669902912621, "loss": 0.8585, "step": 1246 }, { "epoch": 0.5038129387404676, "grad_norm": 0.65625, "learning_rate": 0.0002702427184466019, "loss": 0.9829, "step": 1247 }, { "epoch": 0.5042169587394576, "grad_norm": 0.63671875, "learning_rate": 0.00027021844660194176, "loss": 0.9664, "step": 1248 }, { "epoch": 0.5046209787384476, "grad_norm": 0.63671875, "learning_rate": 0.00027019417475728153, "loss": 0.9245, "step": 1249 }, { "epoch": 0.5050249987374376, "grad_norm": 0.5390625, "learning_rate": 0.0002701699029126213, "loss": 0.925, "step": 1250 }, { "epoch": 0.5054290187364274, "grad_norm": 0.62890625, "learning_rate": 0.00027014563106796114, "loss": 0.9181, "step": 1251 }, { "epoch": 0.5058330387354174, "grad_norm": 0.5078125, "learning_rate": 0.00027012135922330096, "loss": 0.8261, "step": 1252 }, { "epoch": 0.5062370587344074, "grad_norm": 0.57421875, "learning_rate": 0.00027009708737864074, "loss": 0.8602, "step": 1253 }, { "epoch": 0.5066410787333973, "grad_norm": 0.62890625, "learning_rate": 0.00027007281553398057, "loss": 0.9508, "step": 1254 }, { "epoch": 0.5070450987323872, "grad_norm": 0.61328125, "learning_rate": 0.00027004854368932034, "loss": 0.8274, "step": 1255 }, { "epoch": 0.5074491187313772, "grad_norm": 0.57421875, "learning_rate": 0.0002700242718446602, "loss": 0.8809, "step": 1256 }, { "epoch": 0.5078531387303672, "grad_norm": 0.54296875, "learning_rate": 0.00027, "loss": 0.8377, "step": 1257 }, { "epoch": 0.5082571587293571, "grad_norm": 0.62109375, "learning_rate": 0.0002699757281553398, "loss": 0.9392, "step": 1258 }, { "epoch": 0.508661178728347, "grad_norm": 0.6171875, "learning_rate": 0.0002699514563106796, "loss": 0.9637, "step": 1259 }, { "epoch": 0.509065198727337, "grad_norm": 0.55859375, "learning_rate": 0.0002699271844660194, "loss": 0.8862, "step": 1260 }, { "epoch": 0.509469218726327, "grad_norm": 0.68359375, "learning_rate": 0.0002699029126213592, "loss": 1.0409, "step": 1261 }, { "epoch": 0.5098732387253169, "grad_norm": 0.62890625, "learning_rate": 0.00026987864077669904, "loss": 1.0121, "step": 1262 }, { "epoch": 0.5102772587243068, "grad_norm": 0.58203125, "learning_rate": 0.0002698543689320388, "loss": 0.8254, "step": 1263 }, { "epoch": 0.5106812787232968, "grad_norm": 0.53125, "learning_rate": 0.0002698300970873786, "loss": 0.8587, "step": 1264 }, { "epoch": 0.5110852987222868, "grad_norm": 0.52734375, "learning_rate": 0.0002698058252427184, "loss": 0.8594, "step": 1265 }, { "epoch": 0.5114893187212767, "grad_norm": 10.375, "learning_rate": 0.00026978155339805825, "loss": 0.9726, "step": 1266 }, { "epoch": 0.5118933387202667, "grad_norm": 0.51171875, "learning_rate": 0.000269757281553398, "loss": 0.8971, "step": 1267 }, { "epoch": 0.5122973587192566, "grad_norm": 0.70703125, "learning_rate": 0.00026973300970873786, "loss": 0.9351, "step": 1268 }, { "epoch": 0.5127013787182465, "grad_norm": 0.48046875, "learning_rate": 0.00026970873786407763, "loss": 0.8054, "step": 1269 }, { "epoch": 0.5131053987172365, "grad_norm": 0.546875, "learning_rate": 0.00026968446601941746, "loss": 0.8099, "step": 1270 }, { "epoch": 0.5135094187162265, "grad_norm": 0.6015625, "learning_rate": 0.00026966019417475724, "loss": 0.9183, "step": 1271 }, { "epoch": 0.5139134387152164, "grad_norm": 0.609375, "learning_rate": 0.00026963592233009707, "loss": 0.8274, "step": 1272 }, { "epoch": 0.5143174587142063, "grad_norm": 0.55859375, "learning_rate": 0.0002696116504854369, "loss": 0.9206, "step": 1273 }, { "epoch": 0.5147214787131963, "grad_norm": 0.50390625, "learning_rate": 0.00026958737864077667, "loss": 0.7776, "step": 1274 }, { "epoch": 0.5151254987121863, "grad_norm": 0.56640625, "learning_rate": 0.00026956310679611645, "loss": 0.8768, "step": 1275 }, { "epoch": 0.5155295187111762, "grad_norm": 0.6015625, "learning_rate": 0.0002695388349514563, "loss": 0.8698, "step": 1276 }, { "epoch": 0.5159335387101661, "grad_norm": 0.50390625, "learning_rate": 0.0002695145631067961, "loss": 0.8156, "step": 1277 }, { "epoch": 0.5163375587091561, "grad_norm": 0.59765625, "learning_rate": 0.0002694902912621359, "loss": 0.8356, "step": 1278 }, { "epoch": 0.5167415787081461, "grad_norm": 0.59375, "learning_rate": 0.0002694660194174757, "loss": 0.8301, "step": 1279 }, { "epoch": 0.5171455987071361, "grad_norm": 0.5625, "learning_rate": 0.0002694417475728155, "loss": 0.8422, "step": 1280 }, { "epoch": 0.5175496187061259, "grad_norm": 0.546875, "learning_rate": 0.0002694174757281553, "loss": 0.8379, "step": 1281 }, { "epoch": 0.5179536387051159, "grad_norm": 0.7109375, "learning_rate": 0.00026939320388349514, "loss": 1.0175, "step": 1282 }, { "epoch": 0.5183576587041059, "grad_norm": 0.6015625, "learning_rate": 0.0002693689320388349, "loss": 0.9491, "step": 1283 }, { "epoch": 0.5187616787030958, "grad_norm": 0.59375, "learning_rate": 0.00026934466019417475, "loss": 0.8644, "step": 1284 }, { "epoch": 0.5191656987020857, "grad_norm": 0.52734375, "learning_rate": 0.0002693203883495145, "loss": 0.7552, "step": 1285 }, { "epoch": 0.5195697187010757, "grad_norm": 0.61328125, "learning_rate": 0.00026929611650485435, "loss": 0.9852, "step": 1286 }, { "epoch": 0.5199737387000657, "grad_norm": 0.470703125, "learning_rate": 0.0002692718446601942, "loss": 0.7786, "step": 1287 }, { "epoch": 0.5203777586990556, "grad_norm": 0.625, "learning_rate": 0.00026924757281553396, "loss": 0.8616, "step": 1288 }, { "epoch": 0.5207817786980455, "grad_norm": 0.578125, "learning_rate": 0.0002692233009708738, "loss": 0.8604, "step": 1289 }, { "epoch": 0.5211857986970355, "grad_norm": 0.51953125, "learning_rate": 0.00026919902912621356, "loss": 0.8128, "step": 1290 }, { "epoch": 0.5215898186960255, "grad_norm": 0.6015625, "learning_rate": 0.0002691747572815534, "loss": 0.9263, "step": 1291 }, { "epoch": 0.5219938386950154, "grad_norm": 0.54296875, "learning_rate": 0.00026915048543689317, "loss": 0.7916, "step": 1292 }, { "epoch": 0.5223978586940053, "grad_norm": 0.6171875, "learning_rate": 0.000269126213592233, "loss": 0.9226, "step": 1293 }, { "epoch": 0.5228018786929953, "grad_norm": 0.56640625, "learning_rate": 0.00026910194174757277, "loss": 0.829, "step": 1294 }, { "epoch": 0.5232058986919853, "grad_norm": 0.734375, "learning_rate": 0.0002690776699029126, "loss": 0.9897, "step": 1295 }, { "epoch": 0.5236099186909752, "grad_norm": 0.62890625, "learning_rate": 0.0002690533980582524, "loss": 0.9497, "step": 1296 }, { "epoch": 0.5240139386899652, "grad_norm": 0.65234375, "learning_rate": 0.0002690291262135922, "loss": 0.9457, "step": 1297 }, { "epoch": 0.5244179586889551, "grad_norm": 0.69921875, "learning_rate": 0.00026900485436893203, "loss": 0.9793, "step": 1298 }, { "epoch": 0.524821978687945, "grad_norm": 0.55859375, "learning_rate": 0.0002689805825242718, "loss": 0.9198, "step": 1299 }, { "epoch": 0.525225998686935, "grad_norm": 0.66796875, "learning_rate": 0.00026895631067961164, "loss": 1.003, "step": 1300 }, { "epoch": 0.525630018685925, "grad_norm": 0.5546875, "learning_rate": 0.0002689320388349514, "loss": 0.8591, "step": 1301 }, { "epoch": 0.5260340386849149, "grad_norm": 0.53125, "learning_rate": 0.00026890776699029124, "loss": 0.7966, "step": 1302 }, { "epoch": 0.5264380586839048, "grad_norm": 0.578125, "learning_rate": 0.0002688834951456311, "loss": 0.8801, "step": 1303 }, { "epoch": 0.5268420786828948, "grad_norm": 0.66796875, "learning_rate": 0.00026885922330097085, "loss": 0.9625, "step": 1304 }, { "epoch": 0.5272460986818848, "grad_norm": 0.51171875, "learning_rate": 0.0002688349514563106, "loss": 0.8616, "step": 1305 }, { "epoch": 0.5276501186808747, "grad_norm": 0.5234375, "learning_rate": 0.00026881067961165045, "loss": 0.9248, "step": 1306 }, { "epoch": 0.5280541386798646, "grad_norm": 0.62890625, "learning_rate": 0.0002687864077669903, "loss": 0.9521, "step": 1307 }, { "epoch": 0.5284581586788546, "grad_norm": 0.6015625, "learning_rate": 0.00026876213592233006, "loss": 1.0446, "step": 1308 }, { "epoch": 0.5288621786778446, "grad_norm": 0.4609375, "learning_rate": 0.0002687378640776699, "loss": 0.7983, "step": 1309 }, { "epoch": 0.5292661986768346, "grad_norm": 0.57421875, "learning_rate": 0.00026871359223300966, "loss": 0.7752, "step": 1310 }, { "epoch": 0.5296702186758244, "grad_norm": 0.58984375, "learning_rate": 0.0002686893203883495, "loss": 0.977, "step": 1311 }, { "epoch": 0.5300742386748144, "grad_norm": 0.53125, "learning_rate": 0.0002686650485436893, "loss": 0.8583, "step": 1312 }, { "epoch": 0.5304782586738044, "grad_norm": 0.52734375, "learning_rate": 0.0002686407766990291, "loss": 0.7913, "step": 1313 }, { "epoch": 0.5308822786727942, "grad_norm": 0.59375, "learning_rate": 0.0002686165048543689, "loss": 0.8825, "step": 1314 }, { "epoch": 0.5312862986717842, "grad_norm": 0.50390625, "learning_rate": 0.0002685922330097087, "loss": 0.9315, "step": 1315 }, { "epoch": 0.5316903186707742, "grad_norm": 0.58984375, "learning_rate": 0.00026856796116504853, "loss": 0.8631, "step": 1316 }, { "epoch": 0.5320943386697642, "grad_norm": 0.59765625, "learning_rate": 0.0002685436893203883, "loss": 0.8694, "step": 1317 }, { "epoch": 0.5324983586687541, "grad_norm": 0.68359375, "learning_rate": 0.00026851941747572814, "loss": 0.9226, "step": 1318 }, { "epoch": 0.532902378667744, "grad_norm": 0.6015625, "learning_rate": 0.00026849514563106797, "loss": 0.9228, "step": 1319 }, { "epoch": 0.533306398666734, "grad_norm": 0.55078125, "learning_rate": 0.00026847087378640774, "loss": 0.8604, "step": 1320 }, { "epoch": 0.533710418665724, "grad_norm": 0.53515625, "learning_rate": 0.0002684466019417475, "loss": 0.7949, "step": 1321 }, { "epoch": 0.5341144386647139, "grad_norm": 0.5546875, "learning_rate": 0.00026842233009708735, "loss": 0.8495, "step": 1322 }, { "epoch": 0.5345184586637038, "grad_norm": 0.51953125, "learning_rate": 0.0002683980582524272, "loss": 0.7764, "step": 1323 }, { "epoch": 0.5349224786626938, "grad_norm": 0.5234375, "learning_rate": 0.00026837378640776695, "loss": 0.875, "step": 1324 }, { "epoch": 0.5353264986616838, "grad_norm": 0.5859375, "learning_rate": 0.0002683495145631068, "loss": 0.8986, "step": 1325 }, { "epoch": 0.5357305186606737, "grad_norm": 0.5546875, "learning_rate": 0.00026832524271844655, "loss": 0.8497, "step": 1326 }, { "epoch": 0.5361345386596637, "grad_norm": 0.59375, "learning_rate": 0.0002683009708737864, "loss": 0.9331, "step": 1327 }, { "epoch": 0.5365385586586536, "grad_norm": 0.6171875, "learning_rate": 0.0002682766990291262, "loss": 0.8472, "step": 1328 }, { "epoch": 0.5369425786576435, "grad_norm": 0.46484375, "learning_rate": 0.000268252427184466, "loss": 0.7695, "step": 1329 }, { "epoch": 0.5373465986566335, "grad_norm": 0.56640625, "learning_rate": 0.0002682281553398058, "loss": 0.8567, "step": 1330 }, { "epoch": 0.5377506186556235, "grad_norm": 0.46484375, "learning_rate": 0.0002682038834951456, "loss": 0.6902, "step": 1331 }, { "epoch": 0.5381546386546134, "grad_norm": 0.6015625, "learning_rate": 0.0002681796116504854, "loss": 0.8895, "step": 1332 }, { "epoch": 0.5385586586536033, "grad_norm": 0.53515625, "learning_rate": 0.00026815533980582525, "loss": 0.8503, "step": 1333 }, { "epoch": 0.5389626786525933, "grad_norm": 0.6484375, "learning_rate": 0.00026813106796116503, "loss": 0.9392, "step": 1334 }, { "epoch": 0.5393666986515833, "grad_norm": 0.578125, "learning_rate": 0.0002681067961165048, "loss": 0.8602, "step": 1335 }, { "epoch": 0.5397707186505732, "grad_norm": 0.50390625, "learning_rate": 0.00026808252427184463, "loss": 0.79, "step": 1336 }, { "epoch": 0.5401747386495631, "grad_norm": 0.6015625, "learning_rate": 0.00026805825242718446, "loss": 0.9084, "step": 1337 }, { "epoch": 0.5405787586485531, "grad_norm": 0.5234375, "learning_rate": 0.00026803398058252424, "loss": 0.8115, "step": 1338 }, { "epoch": 0.5409827786475431, "grad_norm": 0.55078125, "learning_rate": 0.00026800970873786407, "loss": 0.814, "step": 1339 }, { "epoch": 0.541386798646533, "grad_norm": 0.58203125, "learning_rate": 0.00026798543689320384, "loss": 0.8673, "step": 1340 }, { "epoch": 0.5417908186455229, "grad_norm": 0.5390625, "learning_rate": 0.00026796116504854367, "loss": 0.8535, "step": 1341 }, { "epoch": 0.5421948386445129, "grad_norm": 0.50390625, "learning_rate": 0.0002679368932038835, "loss": 0.8312, "step": 1342 }, { "epoch": 0.5425988586435029, "grad_norm": 0.5546875, "learning_rate": 0.0002679126213592233, "loss": 0.8686, "step": 1343 }, { "epoch": 0.5430028786424929, "grad_norm": 0.5625, "learning_rate": 0.0002678883495145631, "loss": 0.8561, "step": 1344 }, { "epoch": 0.5434068986414827, "grad_norm": 0.5703125, "learning_rate": 0.0002678640776699029, "loss": 0.811, "step": 1345 }, { "epoch": 0.5438109186404727, "grad_norm": 0.4765625, "learning_rate": 0.00026783980582524266, "loss": 0.7897, "step": 1346 }, { "epoch": 0.5442149386394627, "grad_norm": 0.494140625, "learning_rate": 0.0002678155339805825, "loss": 0.7833, "step": 1347 }, { "epoch": 0.5446189586384526, "grad_norm": 0.5625, "learning_rate": 0.0002677912621359223, "loss": 0.9201, "step": 1348 }, { "epoch": 0.5450229786374425, "grad_norm": 0.61328125, "learning_rate": 0.00026776699029126214, "loss": 0.9207, "step": 1349 }, { "epoch": 0.5454269986364325, "grad_norm": 0.6015625, "learning_rate": 0.0002677427184466019, "loss": 0.8278, "step": 1350 }, { "epoch": 0.5458310186354225, "grad_norm": 0.5859375, "learning_rate": 0.0002677184466019417, "loss": 0.9118, "step": 1351 }, { "epoch": 0.5462350386344124, "grad_norm": 0.5859375, "learning_rate": 0.0002676941747572815, "loss": 0.9001, "step": 1352 }, { "epoch": 0.5466390586334023, "grad_norm": 0.5546875, "learning_rate": 0.00026766990291262135, "loss": 0.9134, "step": 1353 }, { "epoch": 0.5470430786323923, "grad_norm": 0.51953125, "learning_rate": 0.00026764563106796113, "loss": 0.7841, "step": 1354 }, { "epoch": 0.5474470986313823, "grad_norm": 0.55078125, "learning_rate": 0.00026762135922330096, "loss": 0.8856, "step": 1355 }, { "epoch": 0.5478511186303722, "grad_norm": 0.609375, "learning_rate": 0.00026759708737864073, "loss": 0.8935, "step": 1356 }, { "epoch": 0.5482551386293621, "grad_norm": 0.58203125, "learning_rate": 0.00026757281553398056, "loss": 0.915, "step": 1357 }, { "epoch": 0.5486591586283521, "grad_norm": 0.5390625, "learning_rate": 0.0002675485436893204, "loss": 0.8841, "step": 1358 }, { "epoch": 0.5490631786273421, "grad_norm": 0.609375, "learning_rate": 0.00026752427184466017, "loss": 0.8439, "step": 1359 }, { "epoch": 0.549467198626332, "grad_norm": 0.5, "learning_rate": 0.0002675, "loss": 0.7601, "step": 1360 }, { "epoch": 0.549871218625322, "grad_norm": 0.52734375, "learning_rate": 0.00026747572815533977, "loss": 0.8422, "step": 1361 }, { "epoch": 0.5502752386243119, "grad_norm": 0.5546875, "learning_rate": 0.0002674514563106796, "loss": 0.8543, "step": 1362 }, { "epoch": 0.5506792586233018, "grad_norm": 0.6796875, "learning_rate": 0.00026742718446601943, "loss": 0.959, "step": 1363 }, { "epoch": 0.5510832786222918, "grad_norm": 0.54296875, "learning_rate": 0.0002674029126213592, "loss": 0.9374, "step": 1364 }, { "epoch": 0.5514872986212818, "grad_norm": 0.5625, "learning_rate": 0.000267378640776699, "loss": 0.9523, "step": 1365 }, { "epoch": 0.5518913186202717, "grad_norm": 0.62890625, "learning_rate": 0.0002673543689320388, "loss": 0.8168, "step": 1366 }, { "epoch": 0.5522953386192616, "grad_norm": 0.5625, "learning_rate": 0.00026733009708737864, "loss": 0.8726, "step": 1367 }, { "epoch": 0.5526993586182516, "grad_norm": 0.5859375, "learning_rate": 0.0002673058252427184, "loss": 0.8816, "step": 1368 }, { "epoch": 0.5531033786172416, "grad_norm": 0.66796875, "learning_rate": 0.00026728155339805825, "loss": 0.9373, "step": 1369 }, { "epoch": 0.5535073986162315, "grad_norm": 0.52734375, "learning_rate": 0.000267257281553398, "loss": 0.8561, "step": 1370 }, { "epoch": 0.5539114186152214, "grad_norm": 0.578125, "learning_rate": 0.00026723300970873785, "loss": 0.7618, "step": 1371 }, { "epoch": 0.5543154386142114, "grad_norm": 0.59375, "learning_rate": 0.0002672087378640776, "loss": 0.9182, "step": 1372 }, { "epoch": 0.5547194586132014, "grad_norm": 0.5546875, "learning_rate": 0.00026718446601941745, "loss": 0.7475, "step": 1373 }, { "epoch": 0.5551234786121914, "grad_norm": 0.54296875, "learning_rate": 0.0002671601941747573, "loss": 0.8739, "step": 1374 }, { "epoch": 0.5555274986111812, "grad_norm": 0.5234375, "learning_rate": 0.00026713592233009706, "loss": 0.8771, "step": 1375 }, { "epoch": 0.5559315186101712, "grad_norm": 0.5546875, "learning_rate": 0.00026711165048543683, "loss": 0.9402, "step": 1376 }, { "epoch": 0.5563355386091612, "grad_norm": 0.625, "learning_rate": 0.00026708737864077666, "loss": 0.9041, "step": 1377 }, { "epoch": 0.556739558608151, "grad_norm": 0.6328125, "learning_rate": 0.0002670631067961165, "loss": 0.9188, "step": 1378 }, { "epoch": 0.557143578607141, "grad_norm": 0.5625, "learning_rate": 0.00026703883495145627, "loss": 0.8953, "step": 1379 }, { "epoch": 0.557547598606131, "grad_norm": 23.125, "learning_rate": 0.0002670145631067961, "loss": 1.0533, "step": 1380 }, { "epoch": 0.557951618605121, "grad_norm": 0.63671875, "learning_rate": 0.0002669902912621359, "loss": 0.9172, "step": 1381 }, { "epoch": 0.5583556386041109, "grad_norm": 0.6796875, "learning_rate": 0.0002669660194174757, "loss": 0.9318, "step": 1382 }, { "epoch": 0.5587596586031008, "grad_norm": 0.51953125, "learning_rate": 0.00026694174757281553, "loss": 0.9607, "step": 1383 }, { "epoch": 0.5591636786020908, "grad_norm": 0.5859375, "learning_rate": 0.0002669174757281553, "loss": 0.857, "step": 1384 }, { "epoch": 0.5595676986010808, "grad_norm": 0.7265625, "learning_rate": 0.00026689320388349514, "loss": 0.9261, "step": 1385 }, { "epoch": 0.5599717186000707, "grad_norm": 0.43359375, "learning_rate": 0.0002668689320388349, "loss": 0.798, "step": 1386 }, { "epoch": 0.5603757385990606, "grad_norm": 0.59375, "learning_rate": 0.00026684466019417474, "loss": 0.8823, "step": 1387 }, { "epoch": 0.5607797585980506, "grad_norm": 0.4765625, "learning_rate": 0.00026682038834951457, "loss": 0.8425, "step": 1388 }, { "epoch": 0.5611837785970406, "grad_norm": 0.53125, "learning_rate": 0.00026679611650485435, "loss": 0.7664, "step": 1389 }, { "epoch": 0.5615877985960305, "grad_norm": 0.55859375, "learning_rate": 0.0002667718446601942, "loss": 0.8678, "step": 1390 }, { "epoch": 0.5619918185950205, "grad_norm": 0.64453125, "learning_rate": 0.00026674757281553395, "loss": 0.8871, "step": 1391 }, { "epoch": 0.5623958385940104, "grad_norm": 0.5078125, "learning_rate": 0.0002667233009708738, "loss": 0.7607, "step": 1392 }, { "epoch": 0.5627998585930003, "grad_norm": 0.578125, "learning_rate": 0.00026669902912621356, "loss": 0.8138, "step": 1393 }, { "epoch": 0.5632038785919903, "grad_norm": 0.578125, "learning_rate": 0.0002666747572815534, "loss": 0.8115, "step": 1394 }, { "epoch": 0.5636078985909803, "grad_norm": 0.53515625, "learning_rate": 0.00026665048543689316, "loss": 0.8145, "step": 1395 }, { "epoch": 0.5640119185899702, "grad_norm": 0.55078125, "learning_rate": 0.000266626213592233, "loss": 0.88, "step": 1396 }, { "epoch": 0.5644159385889601, "grad_norm": 0.68359375, "learning_rate": 0.00026660194174757276, "loss": 0.9771, "step": 1397 }, { "epoch": 0.5648199585879501, "grad_norm": 0.546875, "learning_rate": 0.0002665776699029126, "loss": 0.8715, "step": 1398 }, { "epoch": 0.5652239785869401, "grad_norm": 0.75, "learning_rate": 0.0002665533980582524, "loss": 0.8296, "step": 1399 }, { "epoch": 0.56562799858593, "grad_norm": 0.6171875, "learning_rate": 0.0002665291262135922, "loss": 0.9253, "step": 1400 }, { "epoch": 0.5660320185849199, "grad_norm": 0.58203125, "learning_rate": 0.00026650485436893203, "loss": 0.827, "step": 1401 }, { "epoch": 0.5664360385839099, "grad_norm": 0.6484375, "learning_rate": 0.0002664805825242718, "loss": 0.8737, "step": 1402 }, { "epoch": 0.5668400585828999, "grad_norm": 0.6796875, "learning_rate": 0.00026645631067961163, "loss": 0.9696, "step": 1403 }, { "epoch": 0.5672440785818899, "grad_norm": 0.58203125, "learning_rate": 0.00026643203883495146, "loss": 0.8544, "step": 1404 }, { "epoch": 0.5676480985808797, "grad_norm": 0.55078125, "learning_rate": 0.00026640776699029124, "loss": 0.828, "step": 1405 }, { "epoch": 0.5680521185798697, "grad_norm": 0.609375, "learning_rate": 0.000266383495145631, "loss": 0.9082, "step": 1406 }, { "epoch": 0.5684561385788597, "grad_norm": 0.59765625, "learning_rate": 0.00026635922330097084, "loss": 0.7719, "step": 1407 }, { "epoch": 0.5688601585778496, "grad_norm": 0.5390625, "learning_rate": 0.00026633495145631067, "loss": 0.7672, "step": 1408 }, { "epoch": 0.5692641785768395, "grad_norm": 0.6875, "learning_rate": 0.00026631067961165045, "loss": 0.9306, "step": 1409 }, { "epoch": 0.5696681985758295, "grad_norm": 0.49609375, "learning_rate": 0.0002662864077669903, "loss": 0.784, "step": 1410 }, { "epoch": 0.5700722185748195, "grad_norm": 0.53125, "learning_rate": 0.00026626213592233005, "loss": 0.8627, "step": 1411 }, { "epoch": 0.5704762385738094, "grad_norm": 0.54296875, "learning_rate": 0.0002662378640776699, "loss": 0.8744, "step": 1412 }, { "epoch": 0.5708802585727993, "grad_norm": 0.546875, "learning_rate": 0.0002662135922330097, "loss": 0.8406, "step": 1413 }, { "epoch": 0.5712842785717893, "grad_norm": 0.5390625, "learning_rate": 0.0002661893203883495, "loss": 0.9038, "step": 1414 }, { "epoch": 0.5716882985707793, "grad_norm": 0.45703125, "learning_rate": 0.0002661650485436893, "loss": 0.7531, "step": 1415 }, { "epoch": 0.5720923185697692, "grad_norm": 0.55859375, "learning_rate": 0.0002661407766990291, "loss": 0.8826, "step": 1416 }, { "epoch": 0.5724963385687591, "grad_norm": 0.56640625, "learning_rate": 0.0002661165048543689, "loss": 0.8811, "step": 1417 }, { "epoch": 0.5729003585677491, "grad_norm": 0.5625, "learning_rate": 0.0002660922330097087, "loss": 0.8921, "step": 1418 }, { "epoch": 0.5733043785667391, "grad_norm": 0.5390625, "learning_rate": 0.0002660679611650485, "loss": 0.846, "step": 1419 }, { "epoch": 0.573708398565729, "grad_norm": 0.5078125, "learning_rate": 0.00026604368932038835, "loss": 0.7617, "step": 1420 }, { "epoch": 0.574112418564719, "grad_norm": 0.74609375, "learning_rate": 0.00026601941747572813, "loss": 1.0166, "step": 1421 }, { "epoch": 0.5745164385637089, "grad_norm": 0.48828125, "learning_rate": 0.0002659951456310679, "loss": 0.697, "step": 1422 }, { "epoch": 0.5749204585626988, "grad_norm": 0.6484375, "learning_rate": 0.00026597087378640773, "loss": 0.9225, "step": 1423 }, { "epoch": 0.5753244785616888, "grad_norm": 0.49609375, "learning_rate": 0.00026594660194174756, "loss": 0.7972, "step": 1424 }, { "epoch": 0.5757284985606788, "grad_norm": 0.60546875, "learning_rate": 0.00026592233009708734, "loss": 0.8962, "step": 1425 }, { "epoch": 0.5761325185596687, "grad_norm": 0.5390625, "learning_rate": 0.00026589805825242717, "loss": 0.8348, "step": 1426 }, { "epoch": 0.5765365385586586, "grad_norm": 0.5390625, "learning_rate": 0.00026587378640776694, "loss": 0.8568, "step": 1427 }, { "epoch": 0.5769405585576486, "grad_norm": 0.62890625, "learning_rate": 0.00026584951456310677, "loss": 0.8632, "step": 1428 }, { "epoch": 0.5773445785566386, "grad_norm": 0.5859375, "learning_rate": 0.0002658252427184466, "loss": 0.954, "step": 1429 }, { "epoch": 0.5777485985556285, "grad_norm": 0.5234375, "learning_rate": 0.0002658009708737864, "loss": 0.8269, "step": 1430 }, { "epoch": 0.5781526185546184, "grad_norm": 0.53515625, "learning_rate": 0.0002657766990291262, "loss": 0.8599, "step": 1431 }, { "epoch": 0.5785566385536084, "grad_norm": 0.58984375, "learning_rate": 0.000265752427184466, "loss": 0.8896, "step": 1432 }, { "epoch": 0.5789606585525984, "grad_norm": 0.58203125, "learning_rate": 0.0002657281553398058, "loss": 0.9318, "step": 1433 }, { "epoch": 0.5793646785515884, "grad_norm": 0.5625, "learning_rate": 0.00026570388349514564, "loss": 0.741, "step": 1434 }, { "epoch": 0.5797686985505782, "grad_norm": 0.53515625, "learning_rate": 0.0002656796116504854, "loss": 0.8059, "step": 1435 }, { "epoch": 0.5801727185495682, "grad_norm": 0.5546875, "learning_rate": 0.0002656553398058252, "loss": 0.9362, "step": 1436 }, { "epoch": 0.5805767385485582, "grad_norm": 0.64453125, "learning_rate": 0.000265631067961165, "loss": 0.8147, "step": 1437 }, { "epoch": 0.580980758547548, "grad_norm": 0.53125, "learning_rate": 0.00026560679611650485, "loss": 0.7813, "step": 1438 }, { "epoch": 0.581384778546538, "grad_norm": 0.5234375, "learning_rate": 0.0002655825242718446, "loss": 0.8497, "step": 1439 }, { "epoch": 0.581788798545528, "grad_norm": 0.546875, "learning_rate": 0.00026555825242718446, "loss": 0.8207, "step": 1440 }, { "epoch": 0.582192818544518, "grad_norm": 0.578125, "learning_rate": 0.00026553398058252423, "loss": 0.8307, "step": 1441 }, { "epoch": 0.5825968385435079, "grad_norm": 0.625, "learning_rate": 0.00026550970873786406, "loss": 0.9556, "step": 1442 }, { "epoch": 0.5830008585424978, "grad_norm": 0.52734375, "learning_rate": 0.0002654854368932039, "loss": 0.8701, "step": 1443 }, { "epoch": 0.5834048785414878, "grad_norm": 0.671875, "learning_rate": 0.00026546116504854366, "loss": 0.928, "step": 1444 }, { "epoch": 0.5838088985404778, "grad_norm": 0.51171875, "learning_rate": 0.0002654368932038835, "loss": 0.7628, "step": 1445 }, { "epoch": 0.5842129185394677, "grad_norm": 0.49609375, "learning_rate": 0.00026541262135922327, "loss": 0.8048, "step": 1446 }, { "epoch": 0.5846169385384576, "grad_norm": 0.56640625, "learning_rate": 0.00026538834951456304, "loss": 0.8467, "step": 1447 }, { "epoch": 0.5850209585374476, "grad_norm": 0.63671875, "learning_rate": 0.0002653640776699029, "loss": 0.8846, "step": 1448 }, { "epoch": 0.5854249785364376, "grad_norm": 0.5546875, "learning_rate": 0.0002653398058252427, "loss": 0.8429, "step": 1449 }, { "epoch": 0.5858289985354275, "grad_norm": 0.5703125, "learning_rate": 0.00026531553398058253, "loss": 0.8839, "step": 1450 }, { "epoch": 0.5862330185344174, "grad_norm": 0.55859375, "learning_rate": 0.0002652912621359223, "loss": 0.8515, "step": 1451 }, { "epoch": 0.5866370385334074, "grad_norm": 0.59765625, "learning_rate": 0.0002652669902912621, "loss": 0.8253, "step": 1452 }, { "epoch": 0.5870410585323974, "grad_norm": 0.53515625, "learning_rate": 0.0002652427184466019, "loss": 0.8056, "step": 1453 }, { "epoch": 0.5874450785313873, "grad_norm": 0.56640625, "learning_rate": 0.00026521844660194174, "loss": 0.8781, "step": 1454 }, { "epoch": 0.5878490985303773, "grad_norm": 0.5234375, "learning_rate": 0.0002651941747572815, "loss": 0.8379, "step": 1455 }, { "epoch": 0.5882531185293672, "grad_norm": 0.5703125, "learning_rate": 0.00026516990291262135, "loss": 0.8203, "step": 1456 }, { "epoch": 0.5886571385283571, "grad_norm": 0.56640625, "learning_rate": 0.0002651456310679611, "loss": 0.8279, "step": 1457 }, { "epoch": 0.5890611585273471, "grad_norm": 0.6171875, "learning_rate": 0.00026512135922330095, "loss": 0.9737, "step": 1458 }, { "epoch": 0.5894651785263371, "grad_norm": 0.62890625, "learning_rate": 0.0002650970873786408, "loss": 0.919, "step": 1459 }, { "epoch": 0.589869198525327, "grad_norm": 0.5546875, "learning_rate": 0.00026507281553398056, "loss": 0.8825, "step": 1460 }, { "epoch": 0.5902732185243169, "grad_norm": 0.55078125, "learning_rate": 0.0002650485436893204, "loss": 0.928, "step": 1461 }, { "epoch": 0.5906772385233069, "grad_norm": 0.6484375, "learning_rate": 0.00026502427184466016, "loss": 0.9006, "step": 1462 }, { "epoch": 0.5910812585222969, "grad_norm": 0.640625, "learning_rate": 0.000265, "loss": 0.9638, "step": 1463 }, { "epoch": 0.5914852785212869, "grad_norm": 0.62109375, "learning_rate": 0.0002649757281553398, "loss": 0.8346, "step": 1464 }, { "epoch": 0.5918892985202767, "grad_norm": 0.58984375, "learning_rate": 0.0002649514563106796, "loss": 0.9346, "step": 1465 }, { "epoch": 0.5922933185192667, "grad_norm": 0.53515625, "learning_rate": 0.00026492718446601937, "loss": 0.8417, "step": 1466 }, { "epoch": 0.5926973385182567, "grad_norm": 0.60546875, "learning_rate": 0.0002649029126213592, "loss": 0.9153, "step": 1467 }, { "epoch": 0.5931013585172467, "grad_norm": 0.52734375, "learning_rate": 0.00026487864077669903, "loss": 0.9231, "step": 1468 }, { "epoch": 0.5935053785162365, "grad_norm": 0.58203125, "learning_rate": 0.0002648543689320388, "loss": 0.7981, "step": 1469 }, { "epoch": 0.5939093985152265, "grad_norm": 0.6015625, "learning_rate": 0.00026483009708737863, "loss": 0.8825, "step": 1470 }, { "epoch": 0.5943134185142165, "grad_norm": 0.5625, "learning_rate": 0.0002648058252427184, "loss": 0.8667, "step": 1471 }, { "epoch": 0.5947174385132064, "grad_norm": 0.474609375, "learning_rate": 0.00026478155339805824, "loss": 0.744, "step": 1472 }, { "epoch": 0.5951214585121963, "grad_norm": 0.62109375, "learning_rate": 0.000264757281553398, "loss": 0.9815, "step": 1473 }, { "epoch": 0.5955254785111863, "grad_norm": 0.54296875, "learning_rate": 0.00026473300970873784, "loss": 0.8079, "step": 1474 }, { "epoch": 0.5959294985101763, "grad_norm": 0.484375, "learning_rate": 0.00026470873786407767, "loss": 0.7513, "step": 1475 }, { "epoch": 0.5963335185091662, "grad_norm": 0.5234375, "learning_rate": 0.00026468446601941745, "loss": 0.8679, "step": 1476 }, { "epoch": 0.5967375385081561, "grad_norm": 0.5625, "learning_rate": 0.0002646601941747572, "loss": 0.8251, "step": 1477 }, { "epoch": 0.5971415585071461, "grad_norm": 0.466796875, "learning_rate": 0.00026463592233009705, "loss": 0.7255, "step": 1478 }, { "epoch": 0.5975455785061361, "grad_norm": 0.62109375, "learning_rate": 0.0002646116504854369, "loss": 0.895, "step": 1479 }, { "epoch": 0.597949598505126, "grad_norm": 0.578125, "learning_rate": 0.00026458737864077666, "loss": 0.8851, "step": 1480 }, { "epoch": 0.598353618504116, "grad_norm": 0.60546875, "learning_rate": 0.0002645631067961165, "loss": 0.8974, "step": 1481 }, { "epoch": 0.5987576385031059, "grad_norm": 0.7265625, "learning_rate": 0.00026453883495145626, "loss": 0.9873, "step": 1482 }, { "epoch": 0.5991616585020959, "grad_norm": 0.61328125, "learning_rate": 0.0002645145631067961, "loss": 0.882, "step": 1483 }, { "epoch": 0.5995656785010858, "grad_norm": 0.55078125, "learning_rate": 0.0002644902912621359, "loss": 0.9511, "step": 1484 }, { "epoch": 0.5999696985000758, "grad_norm": 0.6171875, "learning_rate": 0.0002644660194174757, "loss": 0.9369, "step": 1485 }, { "epoch": 0.6003737184990657, "grad_norm": 0.55859375, "learning_rate": 0.0002644417475728155, "loss": 0.8879, "step": 1486 }, { "epoch": 0.6007777384980556, "grad_norm": 0.8125, "learning_rate": 0.0002644174757281553, "loss": 1.0533, "step": 1487 }, { "epoch": 0.6011817584970456, "grad_norm": 0.6796875, "learning_rate": 0.00026439320388349513, "loss": 0.891, "step": 1488 }, { "epoch": 0.6015857784960356, "grad_norm": 0.546875, "learning_rate": 0.00026436893203883496, "loss": 0.836, "step": 1489 }, { "epoch": 0.6019897984950255, "grad_norm": 0.609375, "learning_rate": 0.00026434466019417473, "loss": 0.881, "step": 1490 }, { "epoch": 0.6023938184940154, "grad_norm": 0.53125, "learning_rate": 0.00026432038834951456, "loss": 0.8601, "step": 1491 }, { "epoch": 0.6027978384930054, "grad_norm": 0.5234375, "learning_rate": 0.00026429611650485434, "loss": 0.8605, "step": 1492 }, { "epoch": 0.6032018584919954, "grad_norm": 0.609375, "learning_rate": 0.00026427184466019417, "loss": 0.9349, "step": 1493 }, { "epoch": 0.6036058784909853, "grad_norm": 0.494140625, "learning_rate": 0.00026424757281553394, "loss": 0.8687, "step": 1494 }, { "epoch": 0.6040098984899752, "grad_norm": 0.5546875, "learning_rate": 0.0002642233009708738, "loss": 0.7745, "step": 1495 }, { "epoch": 0.6044139184889652, "grad_norm": 0.4921875, "learning_rate": 0.00026419902912621355, "loss": 0.8211, "step": 1496 }, { "epoch": 0.6048179384879552, "grad_norm": 0.64453125, "learning_rate": 0.0002641747572815534, "loss": 0.9613, "step": 1497 }, { "epoch": 0.6052219584869452, "grad_norm": 0.49609375, "learning_rate": 0.00026415048543689315, "loss": 0.8203, "step": 1498 }, { "epoch": 0.605625978485935, "grad_norm": 0.609375, "learning_rate": 0.000264126213592233, "loss": 1.0009, "step": 1499 }, { "epoch": 0.606029998484925, "grad_norm": 0.53125, "learning_rate": 0.0002641019417475728, "loss": 0.8339, "step": 1500 }, { "epoch": 0.606434018483915, "grad_norm": 0.5390625, "learning_rate": 0.0002640776699029126, "loss": 0.8334, "step": 1501 }, { "epoch": 0.6068380384829049, "grad_norm": 0.69921875, "learning_rate": 0.0002640533980582524, "loss": 1.026, "step": 1502 }, { "epoch": 0.6072420584818948, "grad_norm": 0.5625, "learning_rate": 0.0002640291262135922, "loss": 0.8851, "step": 1503 }, { "epoch": 0.6076460784808848, "grad_norm": 0.5859375, "learning_rate": 0.000264004854368932, "loss": 0.9841, "step": 1504 }, { "epoch": 0.6080500984798748, "grad_norm": 0.52734375, "learning_rate": 0.00026398058252427185, "loss": 0.8624, "step": 1505 }, { "epoch": 0.6084541184788647, "grad_norm": 0.494140625, "learning_rate": 0.0002639563106796116, "loss": 0.8132, "step": 1506 }, { "epoch": 0.6088581384778546, "grad_norm": 0.50390625, "learning_rate": 0.0002639320388349514, "loss": 0.7483, "step": 1507 }, { "epoch": 0.6092621584768446, "grad_norm": 0.52734375, "learning_rate": 0.00026390776699029123, "loss": 0.8643, "step": 1508 }, { "epoch": 0.6096661784758346, "grad_norm": 0.63671875, "learning_rate": 0.00026388349514563106, "loss": 0.8543, "step": 1509 }, { "epoch": 0.6100701984748245, "grad_norm": 0.484375, "learning_rate": 0.00026385922330097084, "loss": 0.8106, "step": 1510 }, { "epoch": 0.6104742184738144, "grad_norm": 0.6015625, "learning_rate": 0.00026383495145631067, "loss": 0.8402, "step": 1511 }, { "epoch": 0.6108782384728044, "grad_norm": 0.515625, "learning_rate": 0.00026381067961165044, "loss": 0.8676, "step": 1512 }, { "epoch": 0.6112822584717944, "grad_norm": 0.703125, "learning_rate": 0.00026378640776699027, "loss": 0.8976, "step": 1513 }, { "epoch": 0.6116862784707843, "grad_norm": 0.52734375, "learning_rate": 0.0002637621359223301, "loss": 0.9, "step": 1514 }, { "epoch": 0.6120902984697743, "grad_norm": 0.5625, "learning_rate": 0.0002637378640776699, "loss": 0.8517, "step": 1515 }, { "epoch": 0.6124943184687642, "grad_norm": 0.5625, "learning_rate": 0.0002637135922330097, "loss": 0.852, "step": 1516 }, { "epoch": 0.6128983384677541, "grad_norm": 0.546875, "learning_rate": 0.0002636893203883495, "loss": 0.8336, "step": 1517 }, { "epoch": 0.6133023584667441, "grad_norm": 0.578125, "learning_rate": 0.0002636650485436893, "loss": 0.8887, "step": 1518 }, { "epoch": 0.6137063784657341, "grad_norm": 0.66796875, "learning_rate": 0.0002636407766990291, "loss": 0.9086, "step": 1519 }, { "epoch": 0.614110398464724, "grad_norm": 0.53125, "learning_rate": 0.0002636165048543689, "loss": 0.7106, "step": 1520 }, { "epoch": 0.6145144184637139, "grad_norm": 0.486328125, "learning_rate": 0.00026359223300970874, "loss": 0.8399, "step": 1521 }, { "epoch": 0.6149184384627039, "grad_norm": 0.5234375, "learning_rate": 0.0002635679611650485, "loss": 0.7809, "step": 1522 }, { "epoch": 0.6153224584616939, "grad_norm": 0.4609375, "learning_rate": 0.0002635436893203883, "loss": 0.7927, "step": 1523 }, { "epoch": 0.6157264784606838, "grad_norm": 0.796875, "learning_rate": 0.0002635194174757281, "loss": 0.8558, "step": 1524 }, { "epoch": 0.6161304984596737, "grad_norm": 0.640625, "learning_rate": 0.00026349514563106795, "loss": 0.9081, "step": 1525 }, { "epoch": 0.6165345184586637, "grad_norm": 0.55078125, "learning_rate": 0.00026347087378640773, "loss": 0.7776, "step": 1526 }, { "epoch": 0.6169385384576537, "grad_norm": 0.5546875, "learning_rate": 0.00026344660194174756, "loss": 0.7591, "step": 1527 }, { "epoch": 0.6173425584566437, "grad_norm": 0.53125, "learning_rate": 0.00026342233009708733, "loss": 0.7919, "step": 1528 }, { "epoch": 0.6177465784556335, "grad_norm": 0.6015625, "learning_rate": 0.00026339805825242716, "loss": 0.8048, "step": 1529 }, { "epoch": 0.6181505984546235, "grad_norm": 0.59375, "learning_rate": 0.000263373786407767, "loss": 0.8535, "step": 1530 }, { "epoch": 0.6185546184536135, "grad_norm": 0.55078125, "learning_rate": 0.00026334951456310677, "loss": 0.8246, "step": 1531 }, { "epoch": 0.6189586384526033, "grad_norm": 0.515625, "learning_rate": 0.0002633252427184466, "loss": 0.8646, "step": 1532 }, { "epoch": 0.6193626584515933, "grad_norm": 0.5234375, "learning_rate": 0.00026330097087378637, "loss": 0.7301, "step": 1533 }, { "epoch": 0.6197666784505833, "grad_norm": 0.59765625, "learning_rate": 0.0002632766990291262, "loss": 0.796, "step": 1534 }, { "epoch": 0.6201706984495733, "grad_norm": 0.58203125, "learning_rate": 0.00026325242718446603, "loss": 0.8621, "step": 1535 }, { "epoch": 0.6205747184485632, "grad_norm": 0.50390625, "learning_rate": 0.0002632281553398058, "loss": 0.7405, "step": 1536 }, { "epoch": 0.6209787384475531, "grad_norm": 0.58203125, "learning_rate": 0.0002632038834951456, "loss": 0.7882, "step": 1537 }, { "epoch": 0.6213827584465431, "grad_norm": 0.5546875, "learning_rate": 0.0002631796116504854, "loss": 0.9358, "step": 1538 }, { "epoch": 0.6217867784455331, "grad_norm": 0.58203125, "learning_rate": 0.00026315533980582524, "loss": 0.7745, "step": 1539 }, { "epoch": 0.622190798444523, "grad_norm": 0.625, "learning_rate": 0.000263131067961165, "loss": 0.8686, "step": 1540 }, { "epoch": 0.6225948184435129, "grad_norm": 0.65625, "learning_rate": 0.00026310679611650484, "loss": 0.8572, "step": 1541 }, { "epoch": 0.6229988384425029, "grad_norm": 0.61328125, "learning_rate": 0.0002630825242718446, "loss": 0.8297, "step": 1542 }, { "epoch": 0.6234028584414929, "grad_norm": 0.625, "learning_rate": 0.00026305825242718445, "loss": 0.7796, "step": 1543 }, { "epoch": 0.6238068784404828, "grad_norm": 0.55859375, "learning_rate": 0.0002630339805825242, "loss": 0.9628, "step": 1544 }, { "epoch": 0.6242108984394727, "grad_norm": 0.66015625, "learning_rate": 0.00026300970873786405, "loss": 0.7887, "step": 1545 }, { "epoch": 0.6246149184384627, "grad_norm": 0.6015625, "learning_rate": 0.0002629854368932039, "loss": 0.9062, "step": 1546 }, { "epoch": 0.6250189384374527, "grad_norm": 0.55859375, "learning_rate": 0.00026296116504854366, "loss": 0.8268, "step": 1547 }, { "epoch": 0.6254229584364426, "grad_norm": 0.54296875, "learning_rate": 0.00026293689320388343, "loss": 0.9628, "step": 1548 }, { "epoch": 0.6258269784354326, "grad_norm": 0.55078125, "learning_rate": 0.00026291262135922326, "loss": 0.8598, "step": 1549 }, { "epoch": 0.6262309984344225, "grad_norm": 0.55859375, "learning_rate": 0.0002628883495145631, "loss": 0.8596, "step": 1550 }, { "epoch": 0.6266350184334124, "grad_norm": 0.74609375, "learning_rate": 0.0002628640776699029, "loss": 0.8437, "step": 1551 }, { "epoch": 0.6270390384324024, "grad_norm": 0.5625, "learning_rate": 0.0002628398058252427, "loss": 0.9297, "step": 1552 }, { "epoch": 0.6274430584313924, "grad_norm": 0.53515625, "learning_rate": 0.00026281553398058247, "loss": 0.8867, "step": 1553 }, { "epoch": 0.6278470784303823, "grad_norm": 0.92578125, "learning_rate": 0.0002627912621359223, "loss": 0.92, "step": 1554 }, { "epoch": 0.6282510984293722, "grad_norm": 0.640625, "learning_rate": 0.00026276699029126213, "loss": 0.9713, "step": 1555 }, { "epoch": 0.6286551184283622, "grad_norm": 0.63671875, "learning_rate": 0.0002627427184466019, "loss": 0.8656, "step": 1556 }, { "epoch": 0.6290591384273522, "grad_norm": 0.5546875, "learning_rate": 0.00026271844660194174, "loss": 0.8678, "step": 1557 }, { "epoch": 0.6294631584263422, "grad_norm": 0.6328125, "learning_rate": 0.0002626941747572815, "loss": 0.8887, "step": 1558 }, { "epoch": 0.629867178425332, "grad_norm": 0.71875, "learning_rate": 0.00026266990291262134, "loss": 0.872, "step": 1559 }, { "epoch": 0.630271198424322, "grad_norm": 0.609375, "learning_rate": 0.00026264563106796117, "loss": 0.7638, "step": 1560 }, { "epoch": 0.630675218423312, "grad_norm": 0.5546875, "learning_rate": 0.00026262135922330094, "loss": 0.7935, "step": 1561 }, { "epoch": 0.631079238422302, "grad_norm": 0.5859375, "learning_rate": 0.0002625970873786408, "loss": 0.8592, "step": 1562 }, { "epoch": 0.6314832584212918, "grad_norm": 0.59375, "learning_rate": 0.00026257281553398055, "loss": 0.8783, "step": 1563 }, { "epoch": 0.6318872784202818, "grad_norm": 0.51171875, "learning_rate": 0.0002625485436893204, "loss": 0.8388, "step": 1564 }, { "epoch": 0.6322912984192718, "grad_norm": 10.75, "learning_rate": 0.0002625242718446602, "loss": 0.8766, "step": 1565 }, { "epoch": 0.6326953184182617, "grad_norm": 0.63671875, "learning_rate": 0.0002625, "loss": 0.8473, "step": 1566 }, { "epoch": 0.6330993384172516, "grad_norm": 0.61328125, "learning_rate": 0.00026247572815533976, "loss": 0.7857, "step": 1567 }, { "epoch": 0.6335033584162416, "grad_norm": 0.57421875, "learning_rate": 0.0002624514563106796, "loss": 0.8694, "step": 1568 }, { "epoch": 0.6339073784152316, "grad_norm": 0.59375, "learning_rate": 0.0002624271844660194, "loss": 0.8763, "step": 1569 }, { "epoch": 0.6343113984142215, "grad_norm": 0.478515625, "learning_rate": 0.0002624029126213592, "loss": 0.8392, "step": 1570 }, { "epoch": 0.6347154184132114, "grad_norm": 0.5859375, "learning_rate": 0.000262378640776699, "loss": 0.7515, "step": 1571 }, { "epoch": 0.6351194384122014, "grad_norm": 0.5546875, "learning_rate": 0.0002623543689320388, "loss": 0.8578, "step": 1572 }, { "epoch": 0.6355234584111914, "grad_norm": 0.4453125, "learning_rate": 0.00026233009708737863, "loss": 0.7757, "step": 1573 }, { "epoch": 0.6359274784101813, "grad_norm": 0.55859375, "learning_rate": 0.0002623058252427184, "loss": 0.8749, "step": 1574 }, { "epoch": 0.6363314984091712, "grad_norm": 0.58984375, "learning_rate": 0.00026228155339805823, "loss": 0.7762, "step": 1575 }, { "epoch": 0.6367355184081612, "grad_norm": 0.484375, "learning_rate": 0.00026225728155339806, "loss": 0.801, "step": 1576 }, { "epoch": 0.6371395384071512, "grad_norm": 0.515625, "learning_rate": 0.00026223300970873784, "loss": 0.8372, "step": 1577 }, { "epoch": 0.6375435584061411, "grad_norm": 0.53515625, "learning_rate": 0.0002622087378640776, "loss": 0.8079, "step": 1578 }, { "epoch": 0.637947578405131, "grad_norm": 0.6796875, "learning_rate": 0.00026218446601941744, "loss": 0.9032, "step": 1579 }, { "epoch": 0.638351598404121, "grad_norm": 0.59765625, "learning_rate": 0.00026216019417475727, "loss": 0.9409, "step": 1580 }, { "epoch": 0.6387556184031109, "grad_norm": 0.51171875, "learning_rate": 0.00026213592233009705, "loss": 0.9317, "step": 1581 }, { "epoch": 0.6391596384021009, "grad_norm": 0.53515625, "learning_rate": 0.0002621116504854369, "loss": 0.7918, "step": 1582 }, { "epoch": 0.6395636584010909, "grad_norm": 0.4765625, "learning_rate": 0.00026208737864077665, "loss": 0.7524, "step": 1583 }, { "epoch": 0.6399676784000808, "grad_norm": 0.50390625, "learning_rate": 0.0002620631067961165, "loss": 0.8121, "step": 1584 }, { "epoch": 0.6403716983990707, "grad_norm": 0.6640625, "learning_rate": 0.0002620388349514563, "loss": 0.9001, "step": 1585 }, { "epoch": 0.6407757183980607, "grad_norm": 0.49609375, "learning_rate": 0.0002620145631067961, "loss": 0.7823, "step": 1586 }, { "epoch": 0.6411797383970507, "grad_norm": 0.58203125, "learning_rate": 0.0002619902912621359, "loss": 0.8913, "step": 1587 }, { "epoch": 0.6415837583960406, "grad_norm": 0.55078125, "learning_rate": 0.0002619660194174757, "loss": 0.6801, "step": 1588 }, { "epoch": 0.6419877783950305, "grad_norm": 0.59375, "learning_rate": 0.0002619417475728155, "loss": 0.9737, "step": 1589 }, { "epoch": 0.6423917983940205, "grad_norm": 0.76171875, "learning_rate": 0.00026191747572815535, "loss": 0.9765, "step": 1590 }, { "epoch": 0.6427958183930105, "grad_norm": 0.96875, "learning_rate": 0.0002618932038834951, "loss": 0.8534, "step": 1591 }, { "epoch": 0.6431998383920005, "grad_norm": 0.60546875, "learning_rate": 0.00026186893203883495, "loss": 0.8907, "step": 1592 }, { "epoch": 0.6436038583909903, "grad_norm": 0.53125, "learning_rate": 0.00026184466019417473, "loss": 0.8464, "step": 1593 }, { "epoch": 0.6440078783899803, "grad_norm": 0.5859375, "learning_rate": 0.00026182038834951456, "loss": 0.8277, "step": 1594 }, { "epoch": 0.6444118983889703, "grad_norm": 0.50390625, "learning_rate": 0.00026179611650485433, "loss": 0.8341, "step": 1595 }, { "epoch": 0.6448159183879602, "grad_norm": 0.64453125, "learning_rate": 0.00026177184466019416, "loss": 0.8328, "step": 1596 }, { "epoch": 0.6452199383869501, "grad_norm": 0.51171875, "learning_rate": 0.00026174757281553394, "loss": 0.8189, "step": 1597 }, { "epoch": 0.6456239583859401, "grad_norm": 0.51953125, "learning_rate": 0.00026172330097087377, "loss": 0.8785, "step": 1598 }, { "epoch": 0.6460279783849301, "grad_norm": 0.6171875, "learning_rate": 0.00026169902912621354, "loss": 0.992, "step": 1599 }, { "epoch": 0.64643199838392, "grad_norm": 0.51953125, "learning_rate": 0.00026167475728155337, "loss": 0.8591, "step": 1600 }, { "epoch": 0.6468360183829099, "grad_norm": 1.4609375, "learning_rate": 0.0002616504854368932, "loss": 0.8045, "step": 1601 }, { "epoch": 0.6472400383818999, "grad_norm": 0.5625, "learning_rate": 0.000261626213592233, "loss": 0.7515, "step": 1602 }, { "epoch": 0.6476440583808899, "grad_norm": 0.80078125, "learning_rate": 0.0002616019417475728, "loss": 0.8424, "step": 1603 }, { "epoch": 0.6480480783798798, "grad_norm": 0.546875, "learning_rate": 0.0002615776699029126, "loss": 0.8114, "step": 1604 }, { "epoch": 0.6484520983788697, "grad_norm": 0.5859375, "learning_rate": 0.0002615533980582524, "loss": 0.8419, "step": 1605 }, { "epoch": 0.6488561183778597, "grad_norm": 0.57421875, "learning_rate": 0.00026152912621359224, "loss": 0.8351, "step": 1606 }, { "epoch": 0.6492601383768497, "grad_norm": 0.490234375, "learning_rate": 0.000261504854368932, "loss": 0.7582, "step": 1607 }, { "epoch": 0.6496641583758396, "grad_norm": 0.546875, "learning_rate": 0.0002614805825242718, "loss": 0.8756, "step": 1608 }, { "epoch": 0.6500681783748296, "grad_norm": 0.515625, "learning_rate": 0.0002614563106796116, "loss": 0.8338, "step": 1609 }, { "epoch": 0.6504721983738195, "grad_norm": 0.5625, "learning_rate": 0.00026143203883495145, "loss": 0.8204, "step": 1610 }, { "epoch": 0.6508762183728094, "grad_norm": 0.5546875, "learning_rate": 0.0002614077669902912, "loss": 0.8597, "step": 1611 }, { "epoch": 0.6512802383717994, "grad_norm": 0.498046875, "learning_rate": 0.00026138349514563105, "loss": 0.8046, "step": 1612 }, { "epoch": 0.6516842583707894, "grad_norm": 0.5625, "learning_rate": 0.00026135922330097083, "loss": 0.8544, "step": 1613 }, { "epoch": 0.6520882783697793, "grad_norm": 0.63671875, "learning_rate": 0.00026133495145631066, "loss": 1.023, "step": 1614 }, { "epoch": 0.6524922983687692, "grad_norm": 0.52734375, "learning_rate": 0.0002613106796116505, "loss": 0.7778, "step": 1615 }, { "epoch": 0.6528963183677592, "grad_norm": 1.1015625, "learning_rate": 0.00026128640776699026, "loss": 0.7586, "step": 1616 }, { "epoch": 0.6533003383667492, "grad_norm": 0.4609375, "learning_rate": 0.0002612621359223301, "loss": 0.7047, "step": 1617 }, { "epoch": 0.6537043583657391, "grad_norm": 0.56640625, "learning_rate": 0.00026123786407766987, "loss": 0.9178, "step": 1618 }, { "epoch": 0.654108378364729, "grad_norm": 0.79296875, "learning_rate": 0.0002612135922330097, "loss": 0.9086, "step": 1619 }, { "epoch": 0.654512398363719, "grad_norm": 0.578125, "learning_rate": 0.00026118932038834947, "loss": 0.9267, "step": 1620 }, { "epoch": 0.654916418362709, "grad_norm": 0.609375, "learning_rate": 0.0002611650485436893, "loss": 0.883, "step": 1621 }, { "epoch": 0.655320438361699, "grad_norm": 0.58203125, "learning_rate": 0.00026114077669902913, "loss": 0.8088, "step": 1622 }, { "epoch": 0.6557244583606888, "grad_norm": 0.61328125, "learning_rate": 0.0002611165048543689, "loss": 0.9362, "step": 1623 }, { "epoch": 0.6561284783596788, "grad_norm": 0.54296875, "learning_rate": 0.0002610922330097087, "loss": 0.82, "step": 1624 }, { "epoch": 0.6565324983586688, "grad_norm": 0.56640625, "learning_rate": 0.0002610679611650485, "loss": 0.753, "step": 1625 }, { "epoch": 0.6569365183576586, "grad_norm": 0.5078125, "learning_rate": 0.00026104368932038834, "loss": 0.7307, "step": 1626 }, { "epoch": 0.6573405383566486, "grad_norm": 0.58203125, "learning_rate": 0.0002610194174757281, "loss": 0.8488, "step": 1627 }, { "epoch": 0.6577445583556386, "grad_norm": 0.58984375, "learning_rate": 0.00026099514563106795, "loss": 0.8737, "step": 1628 }, { "epoch": 0.6581485783546286, "grad_norm": 0.578125, "learning_rate": 0.0002609708737864077, "loss": 0.8815, "step": 1629 }, { "epoch": 0.6585525983536185, "grad_norm": 0.55859375, "learning_rate": 0.00026094660194174755, "loss": 0.8425, "step": 1630 }, { "epoch": 0.6589566183526084, "grad_norm": 0.55078125, "learning_rate": 0.0002609223300970874, "loss": 0.8403, "step": 1631 }, { "epoch": 0.6593606383515984, "grad_norm": 0.55859375, "learning_rate": 0.00026089805825242716, "loss": 0.8852, "step": 1632 }, { "epoch": 0.6597646583505884, "grad_norm": 0.51953125, "learning_rate": 0.000260873786407767, "loss": 0.7646, "step": 1633 }, { "epoch": 0.6601686783495783, "grad_norm": 0.50390625, "learning_rate": 0.00026084951456310676, "loss": 0.8933, "step": 1634 }, { "epoch": 0.6605726983485682, "grad_norm": 0.578125, "learning_rate": 0.0002608252427184466, "loss": 0.8618, "step": 1635 }, { "epoch": 0.6609767183475582, "grad_norm": 0.515625, "learning_rate": 0.0002608009708737864, "loss": 0.7939, "step": 1636 }, { "epoch": 0.6613807383465482, "grad_norm": 0.6015625, "learning_rate": 0.0002607766990291262, "loss": 0.9402, "step": 1637 }, { "epoch": 0.6617847583455381, "grad_norm": 0.5625, "learning_rate": 0.00026075242718446597, "loss": 0.9615, "step": 1638 }, { "epoch": 0.662188778344528, "grad_norm": 0.7421875, "learning_rate": 0.0002607281553398058, "loss": 0.9714, "step": 1639 }, { "epoch": 0.662592798343518, "grad_norm": 0.58984375, "learning_rate": 0.00026070388349514563, "loss": 0.7713, "step": 1640 }, { "epoch": 0.6629968183425079, "grad_norm": 0.68359375, "learning_rate": 0.0002606796116504854, "loss": 0.957, "step": 1641 }, { "epoch": 0.6634008383414979, "grad_norm": 0.74609375, "learning_rate": 0.00026065533980582523, "loss": 0.9017, "step": 1642 }, { "epoch": 0.6638048583404879, "grad_norm": 0.546875, "learning_rate": 0.000260631067961165, "loss": 0.9436, "step": 1643 }, { "epoch": 0.6642088783394778, "grad_norm": 0.58984375, "learning_rate": 0.00026060679611650484, "loss": 0.7444, "step": 1644 }, { "epoch": 0.6646128983384677, "grad_norm": 0.5546875, "learning_rate": 0.0002605825242718446, "loss": 0.8836, "step": 1645 }, { "epoch": 0.6650169183374577, "grad_norm": 0.65625, "learning_rate": 0.00026055825242718444, "loss": 0.9279, "step": 1646 }, { "epoch": 0.6654209383364477, "grad_norm": 0.64453125, "learning_rate": 0.00026053398058252427, "loss": 0.992, "step": 1647 }, { "epoch": 0.6658249583354376, "grad_norm": 0.59765625, "learning_rate": 0.00026050970873786405, "loss": 0.9085, "step": 1648 }, { "epoch": 0.6662289783344275, "grad_norm": 0.55859375, "learning_rate": 0.0002604854368932038, "loss": 0.8339, "step": 1649 }, { "epoch": 0.6666329983334175, "grad_norm": 0.5703125, "learning_rate": 0.00026046116504854365, "loss": 0.8012, "step": 1650 }, { "epoch": 0.6670370183324075, "grad_norm": 0.53125, "learning_rate": 0.0002604368932038835, "loss": 0.7868, "step": 1651 }, { "epoch": 0.6674410383313975, "grad_norm": 0.5234375, "learning_rate": 0.0002604126213592233, "loss": 0.9254, "step": 1652 }, { "epoch": 0.6678450583303873, "grad_norm": 0.578125, "learning_rate": 0.0002603883495145631, "loss": 0.8265, "step": 1653 }, { "epoch": 0.6682490783293773, "grad_norm": 0.51171875, "learning_rate": 0.00026036407766990286, "loss": 0.7228, "step": 1654 }, { "epoch": 0.6686530983283673, "grad_norm": 0.54296875, "learning_rate": 0.0002603398058252427, "loss": 0.8516, "step": 1655 }, { "epoch": 0.6690571183273573, "grad_norm": 0.53515625, "learning_rate": 0.0002603155339805825, "loss": 0.8268, "step": 1656 }, { "epoch": 0.6694611383263471, "grad_norm": 0.435546875, "learning_rate": 0.0002602912621359223, "loss": 0.8074, "step": 1657 }, { "epoch": 0.6698651583253371, "grad_norm": 0.53515625, "learning_rate": 0.0002602669902912621, "loss": 0.8148, "step": 1658 }, { "epoch": 0.6702691783243271, "grad_norm": 0.48046875, "learning_rate": 0.0002602427184466019, "loss": 0.8223, "step": 1659 }, { "epoch": 0.670673198323317, "grad_norm": 0.5078125, "learning_rate": 0.00026021844660194173, "loss": 0.753, "step": 1660 }, { "epoch": 0.6710772183223069, "grad_norm": 0.5078125, "learning_rate": 0.00026019417475728156, "loss": 0.7832, "step": 1661 }, { "epoch": 0.6714812383212969, "grad_norm": 0.6328125, "learning_rate": 0.00026016990291262133, "loss": 0.9772, "step": 1662 }, { "epoch": 0.6718852583202869, "grad_norm": 0.5703125, "learning_rate": 0.00026014563106796116, "loss": 0.9529, "step": 1663 }, { "epoch": 0.6722892783192768, "grad_norm": 0.53125, "learning_rate": 0.00026012135922330094, "loss": 0.8717, "step": 1664 }, { "epoch": 0.6726932983182667, "grad_norm": 0.73046875, "learning_rate": 0.00026009708737864077, "loss": 0.9791, "step": 1665 }, { "epoch": 0.6730973183172567, "grad_norm": 0.52734375, "learning_rate": 0.0002600728155339806, "loss": 0.8087, "step": 1666 }, { "epoch": 0.6735013383162467, "grad_norm": 0.58203125, "learning_rate": 0.00026004854368932037, "loss": 0.8142, "step": 1667 }, { "epoch": 0.6739053583152366, "grad_norm": 0.515625, "learning_rate": 0.00026002427184466015, "loss": 0.7557, "step": 1668 }, { "epoch": 0.6743093783142265, "grad_norm": 0.6484375, "learning_rate": 0.00026, "loss": 0.9216, "step": 1669 }, { "epoch": 0.6747133983132165, "grad_norm": 0.486328125, "learning_rate": 0.0002599757281553398, "loss": 0.7794, "step": 1670 }, { "epoch": 0.6751174183122065, "grad_norm": 0.57421875, "learning_rate": 0.0002599514563106796, "loss": 0.9087, "step": 1671 }, { "epoch": 0.6755214383111964, "grad_norm": 0.55078125, "learning_rate": 0.0002599271844660194, "loss": 0.8142, "step": 1672 }, { "epoch": 0.6759254583101864, "grad_norm": 0.64453125, "learning_rate": 0.0002599029126213592, "loss": 0.9093, "step": 1673 }, { "epoch": 0.6763294783091763, "grad_norm": 0.53125, "learning_rate": 0.000259878640776699, "loss": 0.7984, "step": 1674 }, { "epoch": 0.6767334983081662, "grad_norm": 0.546875, "learning_rate": 0.0002598543689320388, "loss": 0.8211, "step": 1675 }, { "epoch": 0.6771375183071562, "grad_norm": 0.46484375, "learning_rate": 0.0002598300970873786, "loss": 0.7566, "step": 1676 }, { "epoch": 0.6775415383061462, "grad_norm": 0.5625, "learning_rate": 0.00025980582524271845, "loss": 0.9492, "step": 1677 }, { "epoch": 0.6779455583051361, "grad_norm": 0.5703125, "learning_rate": 0.0002597815533980582, "loss": 0.8231, "step": 1678 }, { "epoch": 0.678349578304126, "grad_norm": 0.5390625, "learning_rate": 0.000259757281553398, "loss": 0.8303, "step": 1679 }, { "epoch": 0.678753598303116, "grad_norm": 0.5859375, "learning_rate": 0.00025973300970873783, "loss": 0.8797, "step": 1680 }, { "epoch": 0.679157618302106, "grad_norm": 0.466796875, "learning_rate": 0.00025970873786407766, "loss": 0.7776, "step": 1681 }, { "epoch": 0.679561638301096, "grad_norm": 0.48828125, "learning_rate": 0.00025968446601941743, "loss": 0.7801, "step": 1682 }, { "epoch": 0.6799656583000858, "grad_norm": 0.51953125, "learning_rate": 0.00025966019417475726, "loss": 0.8728, "step": 1683 }, { "epoch": 0.6803696782990758, "grad_norm": 0.51953125, "learning_rate": 0.00025963592233009704, "loss": 0.8154, "step": 1684 }, { "epoch": 0.6807736982980658, "grad_norm": 0.64453125, "learning_rate": 0.00025961165048543687, "loss": 0.9054, "step": 1685 }, { "epoch": 0.6811777182970558, "grad_norm": 0.5234375, "learning_rate": 0.0002595873786407767, "loss": 0.9182, "step": 1686 }, { "epoch": 0.6815817382960456, "grad_norm": 0.5390625, "learning_rate": 0.0002595631067961165, "loss": 0.8584, "step": 1687 }, { "epoch": 0.6819857582950356, "grad_norm": 0.5078125, "learning_rate": 0.0002595388349514563, "loss": 0.8433, "step": 1688 }, { "epoch": 0.6823897782940256, "grad_norm": 0.5625, "learning_rate": 0.0002595145631067961, "loss": 0.8373, "step": 1689 }, { "epoch": 0.6827937982930155, "grad_norm": 0.5625, "learning_rate": 0.0002594902912621359, "loss": 0.8246, "step": 1690 }, { "epoch": 0.6831978182920054, "grad_norm": 0.51171875, "learning_rate": 0.00025946601941747574, "loss": 0.8005, "step": 1691 }, { "epoch": 0.6836018382909954, "grad_norm": 0.58984375, "learning_rate": 0.0002594417475728155, "loss": 0.873, "step": 1692 }, { "epoch": 0.6840058582899854, "grad_norm": 0.546875, "learning_rate": 0.00025941747572815534, "loss": 0.8595, "step": 1693 }, { "epoch": 0.6844098782889753, "grad_norm": 0.4921875, "learning_rate": 0.0002593932038834951, "loss": 0.8171, "step": 1694 }, { "epoch": 0.6848138982879652, "grad_norm": 0.4453125, "learning_rate": 0.00025936893203883495, "loss": 0.7807, "step": 1695 }, { "epoch": 0.6852179182869552, "grad_norm": 0.48046875, "learning_rate": 0.0002593446601941747, "loss": 0.8237, "step": 1696 }, { "epoch": 0.6856219382859452, "grad_norm": 0.54296875, "learning_rate": 0.00025932038834951455, "loss": 0.8707, "step": 1697 }, { "epoch": 0.6860259582849351, "grad_norm": 0.5078125, "learning_rate": 0.0002592961165048543, "loss": 0.8332, "step": 1698 }, { "epoch": 0.686429978283925, "grad_norm": 0.6640625, "learning_rate": 0.00025927184466019416, "loss": 0.7912, "step": 1699 }, { "epoch": 0.686833998282915, "grad_norm": 0.484375, "learning_rate": 0.00025924757281553393, "loss": 0.783, "step": 1700 }, { "epoch": 0.687238018281905, "grad_norm": 0.5625, "learning_rate": 0.00025922330097087376, "loss": 0.9004, "step": 1701 }, { "epoch": 0.6876420382808949, "grad_norm": 0.62109375, "learning_rate": 0.0002591990291262136, "loss": 1.0038, "step": 1702 }, { "epoch": 0.6880460582798849, "grad_norm": 0.4609375, "learning_rate": 0.00025917475728155337, "loss": 0.8143, "step": 1703 }, { "epoch": 0.6884500782788748, "grad_norm": 0.494140625, "learning_rate": 0.0002591504854368932, "loss": 0.8283, "step": 1704 }, { "epoch": 0.6888540982778647, "grad_norm": 0.59765625, "learning_rate": 0.00025912621359223297, "loss": 0.9488, "step": 1705 }, { "epoch": 0.6892581182768547, "grad_norm": 0.51953125, "learning_rate": 0.0002591019417475728, "loss": 0.9083, "step": 1706 }, { "epoch": 0.6896621382758447, "grad_norm": 0.4765625, "learning_rate": 0.00025907766990291263, "loss": 0.813, "step": 1707 }, { "epoch": 0.6900661582748346, "grad_norm": 2.640625, "learning_rate": 0.0002590533980582524, "loss": 0.9248, "step": 1708 }, { "epoch": 0.6904701782738245, "grad_norm": 0.5703125, "learning_rate": 0.0002590291262135922, "loss": 0.9025, "step": 1709 }, { "epoch": 0.6908741982728145, "grad_norm": 0.52734375, "learning_rate": 0.000259004854368932, "loss": 0.7858, "step": 1710 }, { "epoch": 0.6912782182718045, "grad_norm": 0.578125, "learning_rate": 0.00025898058252427184, "loss": 0.8969, "step": 1711 }, { "epoch": 0.6916822382707944, "grad_norm": 0.59765625, "learning_rate": 0.0002589563106796116, "loss": 0.8747, "step": 1712 }, { "epoch": 0.6920862582697843, "grad_norm": 0.8515625, "learning_rate": 0.00025893203883495144, "loss": 0.8751, "step": 1713 }, { "epoch": 0.6924902782687743, "grad_norm": 0.59375, "learning_rate": 0.0002589077669902912, "loss": 0.779, "step": 1714 }, { "epoch": 0.6928942982677643, "grad_norm": 0.75, "learning_rate": 0.00025888349514563105, "loss": 0.8276, "step": 1715 }, { "epoch": 0.6932983182667543, "grad_norm": 0.5390625, "learning_rate": 0.0002588592233009709, "loss": 0.8251, "step": 1716 }, { "epoch": 0.6937023382657441, "grad_norm": 0.58203125, "learning_rate": 0.00025883495145631065, "loss": 0.89, "step": 1717 }, { "epoch": 0.6941063582647341, "grad_norm": 0.671875, "learning_rate": 0.0002588106796116505, "loss": 0.9096, "step": 1718 }, { "epoch": 0.6945103782637241, "grad_norm": 0.6328125, "learning_rate": 0.00025878640776699026, "loss": 0.9023, "step": 1719 }, { "epoch": 0.694914398262714, "grad_norm": 0.51171875, "learning_rate": 0.0002587621359223301, "loss": 0.803, "step": 1720 }, { "epoch": 0.6953184182617039, "grad_norm": 0.55859375, "learning_rate": 0.00025873786407766986, "loss": 0.8224, "step": 1721 }, { "epoch": 0.6957224382606939, "grad_norm": 0.466796875, "learning_rate": 0.0002587135922330097, "loss": 0.865, "step": 1722 }, { "epoch": 0.6961264582596839, "grad_norm": 0.482421875, "learning_rate": 0.0002586893203883495, "loss": 0.771, "step": 1723 }, { "epoch": 0.6965304782586738, "grad_norm": 0.53515625, "learning_rate": 0.0002586650485436893, "loss": 0.7785, "step": 1724 }, { "epoch": 0.6969344982576637, "grad_norm": 0.46484375, "learning_rate": 0.00025864077669902907, "loss": 0.7762, "step": 1725 }, { "epoch": 0.6973385182566537, "grad_norm": 0.62109375, "learning_rate": 0.0002586165048543689, "loss": 0.852, "step": 1726 }, { "epoch": 0.6977425382556437, "grad_norm": 0.5, "learning_rate": 0.00025859223300970873, "loss": 0.8337, "step": 1727 }, { "epoch": 0.6981465582546336, "grad_norm": 0.5859375, "learning_rate": 0.0002585679611650485, "loss": 0.9023, "step": 1728 }, { "epoch": 0.6985505782536235, "grad_norm": 0.462890625, "learning_rate": 0.00025854368932038833, "loss": 0.8488, "step": 1729 }, { "epoch": 0.6989545982526135, "grad_norm": 0.474609375, "learning_rate": 0.0002585194174757281, "loss": 0.7977, "step": 1730 }, { "epoch": 0.6993586182516035, "grad_norm": 0.494140625, "learning_rate": 0.00025849514563106794, "loss": 0.7607, "step": 1731 }, { "epoch": 0.6997626382505934, "grad_norm": 0.57421875, "learning_rate": 0.00025847087378640777, "loss": 1.0481, "step": 1732 }, { "epoch": 0.7001666582495834, "grad_norm": 0.5234375, "learning_rate": 0.00025844660194174754, "loss": 0.7442, "step": 1733 }, { "epoch": 0.7005706782485733, "grad_norm": 0.5, "learning_rate": 0.0002584223300970874, "loss": 0.8008, "step": 1734 }, { "epoch": 0.7009746982475632, "grad_norm": 0.703125, "learning_rate": 0.00025839805825242715, "loss": 0.9379, "step": 1735 }, { "epoch": 0.7013787182465532, "grad_norm": 0.61328125, "learning_rate": 0.000258373786407767, "loss": 0.9058, "step": 1736 }, { "epoch": 0.7017827382455432, "grad_norm": 0.53515625, "learning_rate": 0.0002583495145631068, "loss": 0.9504, "step": 1737 }, { "epoch": 0.7021867582445331, "grad_norm": 0.59765625, "learning_rate": 0.0002583252427184466, "loss": 0.907, "step": 1738 }, { "epoch": 0.702590778243523, "grad_norm": 0.59765625, "learning_rate": 0.00025830097087378636, "loss": 0.8134, "step": 1739 }, { "epoch": 0.702994798242513, "grad_norm": 0.73828125, "learning_rate": 0.0002582766990291262, "loss": 0.9219, "step": 1740 }, { "epoch": 0.703398818241503, "grad_norm": 0.546875, "learning_rate": 0.000258252427184466, "loss": 0.8357, "step": 1741 }, { "epoch": 0.703802838240493, "grad_norm": 0.58984375, "learning_rate": 0.0002582281553398058, "loss": 0.8951, "step": 1742 }, { "epoch": 0.7042068582394828, "grad_norm": 0.50390625, "learning_rate": 0.0002582038834951456, "loss": 0.7786, "step": 1743 }, { "epoch": 0.7046108782384728, "grad_norm": 0.578125, "learning_rate": 0.0002581796116504854, "loss": 0.7897, "step": 1744 }, { "epoch": 0.7050148982374628, "grad_norm": 0.490234375, "learning_rate": 0.0002581553398058252, "loss": 0.7769, "step": 1745 }, { "epoch": 0.7054189182364528, "grad_norm": 0.5234375, "learning_rate": 0.000258131067961165, "loss": 0.8079, "step": 1746 }, { "epoch": 0.7058229382354426, "grad_norm": 0.55859375, "learning_rate": 0.00025810679611650483, "loss": 0.8809, "step": 1747 }, { "epoch": 0.7062269582344326, "grad_norm": 0.62109375, "learning_rate": 0.00025808252427184466, "loss": 0.892, "step": 1748 }, { "epoch": 0.7066309782334226, "grad_norm": 0.67578125, "learning_rate": 0.00025805825242718444, "loss": 0.9397, "step": 1749 }, { "epoch": 0.7070349982324126, "grad_norm": 0.546875, "learning_rate": 0.0002580339805825242, "loss": 0.8077, "step": 1750 }, { "epoch": 0.7074390182314024, "grad_norm": 0.58984375, "learning_rate": 0.00025800970873786404, "loss": 0.9855, "step": 1751 }, { "epoch": 0.7078430382303924, "grad_norm": 0.6171875, "learning_rate": 0.00025798543689320387, "loss": 0.8312, "step": 1752 }, { "epoch": 0.7082470582293824, "grad_norm": 0.6328125, "learning_rate": 0.0002579611650485437, "loss": 0.8648, "step": 1753 }, { "epoch": 0.7086510782283723, "grad_norm": 0.5390625, "learning_rate": 0.0002579368932038835, "loss": 0.8785, "step": 1754 }, { "epoch": 0.7090550982273622, "grad_norm": 0.53125, "learning_rate": 0.00025791262135922325, "loss": 0.8303, "step": 1755 }, { "epoch": 0.7094591182263522, "grad_norm": 0.50390625, "learning_rate": 0.0002578883495145631, "loss": 0.8023, "step": 1756 }, { "epoch": 0.7098631382253422, "grad_norm": 0.54296875, "learning_rate": 0.0002578640776699029, "loss": 0.9063, "step": 1757 }, { "epoch": 0.7102671582243321, "grad_norm": 0.498046875, "learning_rate": 0.0002578398058252427, "loss": 0.8493, "step": 1758 }, { "epoch": 0.710671178223322, "grad_norm": 0.6015625, "learning_rate": 0.0002578155339805825, "loss": 0.9583, "step": 1759 }, { "epoch": 0.711075198222312, "grad_norm": 0.5234375, "learning_rate": 0.0002577912621359223, "loss": 0.8141, "step": 1760 }, { "epoch": 0.711479218221302, "grad_norm": 0.6953125, "learning_rate": 0.0002577669902912621, "loss": 0.9046, "step": 1761 }, { "epoch": 0.7118832382202919, "grad_norm": 0.64453125, "learning_rate": 0.00025774271844660195, "loss": 0.8559, "step": 1762 }, { "epoch": 0.7122872582192818, "grad_norm": 0.76953125, "learning_rate": 0.0002577184466019417, "loss": 0.961, "step": 1763 }, { "epoch": 0.7126912782182718, "grad_norm": 0.482421875, "learning_rate": 0.00025769417475728155, "loss": 0.7478, "step": 1764 }, { "epoch": 0.7130952982172618, "grad_norm": 0.60546875, "learning_rate": 0.00025766990291262133, "loss": 0.885, "step": 1765 }, { "epoch": 0.7134993182162517, "grad_norm": 0.47265625, "learning_rate": 0.00025764563106796116, "loss": 0.8306, "step": 1766 }, { "epoch": 0.7139033382152417, "grad_norm": 0.58984375, "learning_rate": 0.000257621359223301, "loss": 0.9117, "step": 1767 }, { "epoch": 0.7143073582142316, "grad_norm": 0.458984375, "learning_rate": 0.00025759708737864076, "loss": 0.7534, "step": 1768 }, { "epoch": 0.7147113782132215, "grad_norm": 0.54296875, "learning_rate": 0.00025757281553398054, "loss": 0.8442, "step": 1769 }, { "epoch": 0.7151153982122115, "grad_norm": 0.52734375, "learning_rate": 0.00025754854368932037, "loss": 0.7731, "step": 1770 }, { "epoch": 0.7155194182112015, "grad_norm": 0.4921875, "learning_rate": 0.00025752427184466014, "loss": 0.8251, "step": 1771 }, { "epoch": 0.7159234382101914, "grad_norm": 0.5234375, "learning_rate": 0.00025749999999999997, "loss": 0.8504, "step": 1772 }, { "epoch": 0.7163274582091813, "grad_norm": 0.5078125, "learning_rate": 0.0002574757281553398, "loss": 0.8677, "step": 1773 }, { "epoch": 0.7167314782081713, "grad_norm": 0.5703125, "learning_rate": 0.0002574514563106796, "loss": 0.8526, "step": 1774 }, { "epoch": 0.7171354982071613, "grad_norm": 0.59375, "learning_rate": 0.0002574271844660194, "loss": 0.9193, "step": 1775 }, { "epoch": 0.7175395182061513, "grad_norm": 0.61328125, "learning_rate": 0.0002574029126213592, "loss": 0.8379, "step": 1776 }, { "epoch": 0.7179435382051411, "grad_norm": 0.484375, "learning_rate": 0.000257378640776699, "loss": 0.8221, "step": 1777 }, { "epoch": 0.7183475582041311, "grad_norm": 0.50390625, "learning_rate": 0.00025735436893203884, "loss": 0.858, "step": 1778 }, { "epoch": 0.7187515782031211, "grad_norm": 0.470703125, "learning_rate": 0.0002573300970873786, "loss": 0.7306, "step": 1779 }, { "epoch": 0.7191555982021111, "grad_norm": 0.6015625, "learning_rate": 0.0002573058252427184, "loss": 1.0154, "step": 1780 }, { "epoch": 0.7195596182011009, "grad_norm": 0.482421875, "learning_rate": 0.0002572815533980582, "loss": 0.8555, "step": 1781 }, { "epoch": 0.7199636382000909, "grad_norm": 0.484375, "learning_rate": 0.00025725728155339805, "loss": 0.755, "step": 1782 }, { "epoch": 0.7203676581990809, "grad_norm": 0.77734375, "learning_rate": 0.0002572330097087378, "loss": 1.0106, "step": 1783 }, { "epoch": 0.7207716781980708, "grad_norm": 0.5625, "learning_rate": 0.00025720873786407765, "loss": 0.8106, "step": 1784 }, { "epoch": 0.7211756981970607, "grad_norm": 0.4921875, "learning_rate": 0.00025718446601941743, "loss": 0.8004, "step": 1785 }, { "epoch": 0.7215797181960507, "grad_norm": 0.50390625, "learning_rate": 0.00025716019417475726, "loss": 0.8083, "step": 1786 }, { "epoch": 0.7219837381950407, "grad_norm": 0.474609375, "learning_rate": 0.0002571359223300971, "loss": 0.7499, "step": 1787 }, { "epoch": 0.7223877581940306, "grad_norm": 0.57421875, "learning_rate": 0.00025711165048543686, "loss": 0.8362, "step": 1788 }, { "epoch": 0.7227917781930205, "grad_norm": 0.62109375, "learning_rate": 0.0002570873786407767, "loss": 0.866, "step": 1789 }, { "epoch": 0.7231957981920105, "grad_norm": 0.63671875, "learning_rate": 0.00025706310679611647, "loss": 0.9669, "step": 1790 }, { "epoch": 0.7235998181910005, "grad_norm": 0.48046875, "learning_rate": 0.0002570388349514563, "loss": 0.748, "step": 1791 }, { "epoch": 0.7240038381899904, "grad_norm": 0.51953125, "learning_rate": 0.0002570145631067961, "loss": 0.9098, "step": 1792 }, { "epoch": 0.7244078581889803, "grad_norm": 0.5078125, "learning_rate": 0.0002569902912621359, "loss": 0.8389, "step": 1793 }, { "epoch": 0.7248118781879703, "grad_norm": 0.55859375, "learning_rate": 0.00025696601941747573, "loss": 0.8068, "step": 1794 }, { "epoch": 0.7252158981869603, "grad_norm": 0.54296875, "learning_rate": 0.0002569417475728155, "loss": 0.9114, "step": 1795 }, { "epoch": 0.7256199181859502, "grad_norm": 0.73828125, "learning_rate": 0.00025691747572815534, "loss": 0.9136, "step": 1796 }, { "epoch": 0.7260239381849402, "grad_norm": 0.58203125, "learning_rate": 0.0002568932038834951, "loss": 0.8574, "step": 1797 }, { "epoch": 0.7264279581839301, "grad_norm": 0.546875, "learning_rate": 0.00025686893203883494, "loss": 0.8397, "step": 1798 }, { "epoch": 0.72683197818292, "grad_norm": 0.5078125, "learning_rate": 0.0002568446601941747, "loss": 0.8049, "step": 1799 }, { "epoch": 0.72723599818191, "grad_norm": 0.61328125, "learning_rate": 0.00025682038834951454, "loss": 0.6389, "step": 1800 }, { "epoch": 0.7276400181809, "grad_norm": 0.6484375, "learning_rate": 0.0002567961165048543, "loss": 0.8799, "step": 1801 }, { "epoch": 0.7280440381798899, "grad_norm": 0.55078125, "learning_rate": 0.00025677184466019415, "loss": 0.9025, "step": 1802 }, { "epoch": 0.7284480581788798, "grad_norm": 0.447265625, "learning_rate": 0.000256747572815534, "loss": 0.8093, "step": 1803 }, { "epoch": 0.7288520781778698, "grad_norm": 0.55078125, "learning_rate": 0.00025672330097087375, "loss": 0.8179, "step": 1804 }, { "epoch": 0.7292560981768598, "grad_norm": 0.48828125, "learning_rate": 0.0002566990291262136, "loss": 0.688, "step": 1805 }, { "epoch": 0.7296601181758497, "grad_norm": 0.61328125, "learning_rate": 0.00025667475728155336, "loss": 0.8309, "step": 1806 }, { "epoch": 0.7300641381748396, "grad_norm": 0.58984375, "learning_rate": 0.0002566504854368932, "loss": 0.775, "step": 1807 }, { "epoch": 0.7304681581738296, "grad_norm": 0.578125, "learning_rate": 0.000256626213592233, "loss": 0.8834, "step": 1808 }, { "epoch": 0.7308721781728196, "grad_norm": 0.490234375, "learning_rate": 0.0002566019417475728, "loss": 0.7341, "step": 1809 }, { "epoch": 0.7312761981718096, "grad_norm": 0.46875, "learning_rate": 0.00025657766990291257, "loss": 0.7763, "step": 1810 }, { "epoch": 0.7316802181707994, "grad_norm": 0.65625, "learning_rate": 0.0002565533980582524, "loss": 0.8456, "step": 1811 }, { "epoch": 0.7320842381697894, "grad_norm": 0.5625, "learning_rate": 0.0002565291262135922, "loss": 0.7771, "step": 1812 }, { "epoch": 0.7324882581687794, "grad_norm": 0.578125, "learning_rate": 0.000256504854368932, "loss": 0.8539, "step": 1813 }, { "epoch": 0.7328922781677693, "grad_norm": 0.54296875, "learning_rate": 0.00025648058252427183, "loss": 0.8267, "step": 1814 }, { "epoch": 0.7332962981667592, "grad_norm": 0.55078125, "learning_rate": 0.0002564563106796116, "loss": 0.7828, "step": 1815 }, { "epoch": 0.7337003181657492, "grad_norm": 0.5625, "learning_rate": 0.00025643203883495144, "loss": 0.8447, "step": 1816 }, { "epoch": 0.7341043381647392, "grad_norm": 0.6171875, "learning_rate": 0.00025640776699029127, "loss": 0.9017, "step": 1817 }, { "epoch": 0.7345083581637291, "grad_norm": 0.5390625, "learning_rate": 0.00025638349514563104, "loss": 0.8866, "step": 1818 }, { "epoch": 0.734912378162719, "grad_norm": 0.59375, "learning_rate": 0.00025635922330097087, "loss": 0.8241, "step": 1819 }, { "epoch": 0.735316398161709, "grad_norm": 0.58203125, "learning_rate": 0.00025633495145631065, "loss": 0.7949, "step": 1820 }, { "epoch": 0.735720418160699, "grad_norm": 0.71484375, "learning_rate": 0.0002563106796116505, "loss": 1.0072, "step": 1821 }, { "epoch": 0.7361244381596889, "grad_norm": 0.53125, "learning_rate": 0.00025628640776699025, "loss": 0.8291, "step": 1822 }, { "epoch": 0.7365284581586788, "grad_norm": 0.51171875, "learning_rate": 0.0002562621359223301, "loss": 0.7817, "step": 1823 }, { "epoch": 0.7369324781576688, "grad_norm": 0.546875, "learning_rate": 0.0002562378640776699, "loss": 0.8822, "step": 1824 }, { "epoch": 0.7373364981566588, "grad_norm": 0.53125, "learning_rate": 0.0002562135922330097, "loss": 0.8385, "step": 1825 }, { "epoch": 0.7377405181556487, "grad_norm": 0.46484375, "learning_rate": 0.00025618932038834946, "loss": 0.7055, "step": 1826 }, { "epoch": 0.7381445381546387, "grad_norm": 0.482421875, "learning_rate": 0.0002561650485436893, "loss": 0.7493, "step": 1827 }, { "epoch": 0.7385485581536286, "grad_norm": 0.58984375, "learning_rate": 0.0002561407766990291, "loss": 0.9298, "step": 1828 }, { "epoch": 0.7389525781526185, "grad_norm": 0.466796875, "learning_rate": 0.0002561165048543689, "loss": 0.8167, "step": 1829 }, { "epoch": 0.7393565981516085, "grad_norm": 0.6875, "learning_rate": 0.0002560922330097087, "loss": 1.0511, "step": 1830 }, { "epoch": 0.7397606181505985, "grad_norm": 0.60546875, "learning_rate": 0.0002560679611650485, "loss": 0.9132, "step": 1831 }, { "epoch": 0.7401646381495884, "grad_norm": 0.5625, "learning_rate": 0.00025604368932038833, "loss": 0.7909, "step": 1832 }, { "epoch": 0.7405686581485783, "grad_norm": 0.5234375, "learning_rate": 0.00025601941747572816, "loss": 0.7946, "step": 1833 }, { "epoch": 0.7409726781475683, "grad_norm": 0.625, "learning_rate": 0.00025599514563106793, "loss": 0.8096, "step": 1834 }, { "epoch": 0.7413766981465583, "grad_norm": 0.5390625, "learning_rate": 0.00025597087378640776, "loss": 0.8051, "step": 1835 }, { "epoch": 0.7417807181455482, "grad_norm": 0.51953125, "learning_rate": 0.00025594660194174754, "loss": 0.826, "step": 1836 }, { "epoch": 0.7421847381445381, "grad_norm": 0.546875, "learning_rate": 0.00025592233009708737, "loss": 0.8319, "step": 1837 }, { "epoch": 0.7425887581435281, "grad_norm": 0.52734375, "learning_rate": 0.0002558980582524272, "loss": 0.7899, "step": 1838 }, { "epoch": 0.7429927781425181, "grad_norm": 0.5, "learning_rate": 0.00025587378640776697, "loss": 0.7917, "step": 1839 }, { "epoch": 0.743396798141508, "grad_norm": 0.55859375, "learning_rate": 0.00025584951456310675, "loss": 0.9038, "step": 1840 }, { "epoch": 0.7438008181404979, "grad_norm": 0.68359375, "learning_rate": 0.0002558252427184466, "loss": 0.9313, "step": 1841 }, { "epoch": 0.7442048381394879, "grad_norm": 0.484375, "learning_rate": 0.0002558009708737864, "loss": 0.7767, "step": 1842 }, { "epoch": 0.7446088581384779, "grad_norm": 0.5859375, "learning_rate": 0.0002557766990291262, "loss": 0.8683, "step": 1843 }, { "epoch": 0.7450128781374679, "grad_norm": 0.50390625, "learning_rate": 0.000255752427184466, "loss": 0.7713, "step": 1844 }, { "epoch": 0.7454168981364577, "grad_norm": 0.5, "learning_rate": 0.0002557281553398058, "loss": 0.7639, "step": 1845 }, { "epoch": 0.7458209181354477, "grad_norm": 0.5703125, "learning_rate": 0.0002557038834951456, "loss": 0.7837, "step": 1846 }, { "epoch": 0.7462249381344377, "grad_norm": 0.54296875, "learning_rate": 0.0002556796116504854, "loss": 0.8214, "step": 1847 }, { "epoch": 0.7466289581334276, "grad_norm": 0.57421875, "learning_rate": 0.0002556553398058252, "loss": 0.8532, "step": 1848 }, { "epoch": 0.7470329781324175, "grad_norm": 0.6015625, "learning_rate": 0.00025563106796116505, "loss": 0.93, "step": 1849 }, { "epoch": 0.7474369981314075, "grad_norm": 0.5234375, "learning_rate": 0.0002556067961165048, "loss": 0.8245, "step": 1850 }, { "epoch": 0.7478410181303975, "grad_norm": 0.5625, "learning_rate": 0.0002555825242718446, "loss": 0.9363, "step": 1851 }, { "epoch": 0.7482450381293874, "grad_norm": 0.51953125, "learning_rate": 0.00025555825242718443, "loss": 0.7932, "step": 1852 }, { "epoch": 0.7486490581283773, "grad_norm": 0.5234375, "learning_rate": 0.00025553398058252426, "loss": 0.8714, "step": 1853 }, { "epoch": 0.7490530781273673, "grad_norm": 0.58203125, "learning_rate": 0.0002555097087378641, "loss": 0.7787, "step": 1854 }, { "epoch": 0.7494570981263573, "grad_norm": 0.63671875, "learning_rate": 0.00025548543689320386, "loss": 0.8465, "step": 1855 }, { "epoch": 0.7498611181253472, "grad_norm": 0.5078125, "learning_rate": 0.00025546116504854364, "loss": 0.7784, "step": 1856 }, { "epoch": 0.7502651381243371, "grad_norm": 0.50390625, "learning_rate": 0.00025543689320388347, "loss": 0.8079, "step": 1857 }, { "epoch": 0.7506691581233271, "grad_norm": 0.59765625, "learning_rate": 0.0002554126213592233, "loss": 1.0206, "step": 1858 }, { "epoch": 0.7510731781223171, "grad_norm": 0.462890625, "learning_rate": 0.00025538834951456307, "loss": 0.8244, "step": 1859 }, { "epoch": 0.751477198121307, "grad_norm": 0.474609375, "learning_rate": 0.0002553640776699029, "loss": 0.7088, "step": 1860 }, { "epoch": 0.751881218120297, "grad_norm": 0.5390625, "learning_rate": 0.0002553398058252427, "loss": 0.8365, "step": 1861 }, { "epoch": 0.7522852381192869, "grad_norm": 0.55078125, "learning_rate": 0.0002553155339805825, "loss": 0.8639, "step": 1862 }, { "epoch": 0.7526892581182768, "grad_norm": 0.625, "learning_rate": 0.00025529126213592234, "loss": 0.8707, "step": 1863 }, { "epoch": 0.7530932781172668, "grad_norm": 0.458984375, "learning_rate": 0.0002552669902912621, "loss": 0.7749, "step": 1864 }, { "epoch": 0.7534972981162568, "grad_norm": 0.57421875, "learning_rate": 0.00025524271844660194, "loss": 0.8179, "step": 1865 }, { "epoch": 0.7539013181152467, "grad_norm": 0.58984375, "learning_rate": 0.0002552184466019417, "loss": 0.8884, "step": 1866 }, { "epoch": 0.7543053381142366, "grad_norm": 0.72265625, "learning_rate": 0.00025519417475728155, "loss": 1.0114, "step": 1867 }, { "epoch": 0.7547093581132266, "grad_norm": 0.4453125, "learning_rate": 0.0002551699029126214, "loss": 0.7617, "step": 1868 }, { "epoch": 0.7551133781122166, "grad_norm": 0.6015625, "learning_rate": 0.00025514563106796115, "loss": 0.8995, "step": 1869 }, { "epoch": 0.7555173981112066, "grad_norm": 0.5, "learning_rate": 0.0002551213592233009, "loss": 0.9006, "step": 1870 }, { "epoch": 0.7559214181101964, "grad_norm": 0.5546875, "learning_rate": 0.00025509708737864075, "loss": 0.786, "step": 1871 }, { "epoch": 0.7563254381091864, "grad_norm": 0.494140625, "learning_rate": 0.00025507281553398053, "loss": 0.8471, "step": 1872 }, { "epoch": 0.7567294581081764, "grad_norm": 0.470703125, "learning_rate": 0.00025504854368932036, "loss": 0.7444, "step": 1873 }, { "epoch": 0.7571334781071664, "grad_norm": 0.625, "learning_rate": 0.0002550242718446602, "loss": 0.9541, "step": 1874 }, { "epoch": 0.7575374981061562, "grad_norm": 0.53125, "learning_rate": 0.00025499999999999996, "loss": 0.7789, "step": 1875 }, { "epoch": 0.7579415181051462, "grad_norm": 0.55859375, "learning_rate": 0.0002549757281553398, "loss": 0.8566, "step": 1876 }, { "epoch": 0.7583455381041362, "grad_norm": 0.5, "learning_rate": 0.00025495145631067957, "loss": 0.853, "step": 1877 }, { "epoch": 0.758749558103126, "grad_norm": 0.5234375, "learning_rate": 0.0002549271844660194, "loss": 0.8337, "step": 1878 }, { "epoch": 0.759153578102116, "grad_norm": 0.5234375, "learning_rate": 0.00025490291262135923, "loss": 0.8475, "step": 1879 }, { "epoch": 0.759557598101106, "grad_norm": 0.55078125, "learning_rate": 0.000254878640776699, "loss": 0.8532, "step": 1880 }, { "epoch": 0.759961618100096, "grad_norm": 0.55078125, "learning_rate": 0.0002548543689320388, "loss": 0.8091, "step": 1881 }, { "epoch": 0.7603656380990859, "grad_norm": 0.5078125, "learning_rate": 0.0002548300970873786, "loss": 0.7661, "step": 1882 }, { "epoch": 0.7607696580980758, "grad_norm": 0.48046875, "learning_rate": 0.00025480582524271844, "loss": 0.7983, "step": 1883 }, { "epoch": 0.7611736780970658, "grad_norm": 0.54296875, "learning_rate": 0.0002547815533980582, "loss": 0.911, "step": 1884 }, { "epoch": 0.7615776980960558, "grad_norm": 0.50390625, "learning_rate": 0.00025475728155339804, "loss": 0.7735, "step": 1885 }, { "epoch": 0.7619817180950457, "grad_norm": 0.55078125, "learning_rate": 0.0002547330097087378, "loss": 0.8211, "step": 1886 }, { "epoch": 0.7623857380940356, "grad_norm": 0.5390625, "learning_rate": 0.00025470873786407765, "loss": 0.729, "step": 1887 }, { "epoch": 0.7627897580930256, "grad_norm": 0.52734375, "learning_rate": 0.0002546844660194175, "loss": 0.8824, "step": 1888 }, { "epoch": 0.7631937780920156, "grad_norm": 0.51171875, "learning_rate": 0.00025466019417475725, "loss": 0.7683, "step": 1889 }, { "epoch": 0.7635977980910055, "grad_norm": 0.5390625, "learning_rate": 0.0002546359223300971, "loss": 0.8902, "step": 1890 }, { "epoch": 0.7640018180899955, "grad_norm": 0.7890625, "learning_rate": 0.00025461165048543686, "loss": 0.8277, "step": 1891 }, { "epoch": 0.7644058380889854, "grad_norm": 0.51171875, "learning_rate": 0.0002545873786407767, "loss": 0.8123, "step": 1892 }, { "epoch": 0.7648098580879753, "grad_norm": 0.470703125, "learning_rate": 0.0002545631067961165, "loss": 0.7491, "step": 1893 }, { "epoch": 0.7652138780869653, "grad_norm": 0.5703125, "learning_rate": 0.0002545388349514563, "loss": 0.811, "step": 1894 }, { "epoch": 0.7656178980859553, "grad_norm": 0.57421875, "learning_rate": 0.0002545145631067961, "loss": 0.856, "step": 1895 }, { "epoch": 0.7660219180849452, "grad_norm": 0.484375, "learning_rate": 0.0002544902912621359, "loss": 0.7683, "step": 1896 }, { "epoch": 0.7664259380839351, "grad_norm": 0.4609375, "learning_rate": 0.0002544660194174757, "loss": 0.6771, "step": 1897 }, { "epoch": 0.7668299580829251, "grad_norm": 0.5390625, "learning_rate": 0.0002544417475728155, "loss": 0.8451, "step": 1898 }, { "epoch": 0.7672339780819151, "grad_norm": 0.62109375, "learning_rate": 0.00025441747572815533, "loss": 1.0596, "step": 1899 }, { "epoch": 0.767637998080905, "grad_norm": 0.52734375, "learning_rate": 0.0002543932038834951, "loss": 0.8038, "step": 1900 }, { "epoch": 0.7680420180798949, "grad_norm": 0.65625, "learning_rate": 0.00025436893203883493, "loss": 0.835, "step": 1901 }, { "epoch": 0.7684460380788849, "grad_norm": 0.60546875, "learning_rate": 0.0002543446601941747, "loss": 0.8118, "step": 1902 }, { "epoch": 0.7688500580778749, "grad_norm": 0.6171875, "learning_rate": 0.00025432038834951454, "loss": 0.8567, "step": 1903 }, { "epoch": 0.7692540780768649, "grad_norm": 0.53125, "learning_rate": 0.00025429611650485437, "loss": 0.7365, "step": 1904 }, { "epoch": 0.7696580980758547, "grad_norm": 0.5, "learning_rate": 0.00025427184466019414, "loss": 0.8271, "step": 1905 }, { "epoch": 0.7700621180748447, "grad_norm": 0.51953125, "learning_rate": 0.00025424757281553397, "loss": 0.7548, "step": 1906 }, { "epoch": 0.7704661380738347, "grad_norm": 0.69921875, "learning_rate": 0.00025422330097087375, "loss": 0.9258, "step": 1907 }, { "epoch": 0.7708701580728246, "grad_norm": 0.6796875, "learning_rate": 0.0002541990291262136, "loss": 0.9004, "step": 1908 }, { "epoch": 0.7712741780718145, "grad_norm": 0.6015625, "learning_rate": 0.0002541747572815534, "loss": 0.9835, "step": 1909 }, { "epoch": 0.7716781980708045, "grad_norm": 0.53125, "learning_rate": 0.0002541504854368932, "loss": 0.8316, "step": 1910 }, { "epoch": 0.7720822180697945, "grad_norm": 0.59765625, "learning_rate": 0.00025412621359223296, "loss": 0.9146, "step": 1911 }, { "epoch": 0.7724862380687844, "grad_norm": 0.90234375, "learning_rate": 0.0002541019417475728, "loss": 0.9122, "step": 1912 }, { "epoch": 0.7728902580677743, "grad_norm": 0.703125, "learning_rate": 0.0002540776699029126, "loss": 0.8863, "step": 1913 }, { "epoch": 0.7732942780667643, "grad_norm": 0.53125, "learning_rate": 0.0002540533980582524, "loss": 0.8385, "step": 1914 }, { "epoch": 0.7736982980657543, "grad_norm": 0.515625, "learning_rate": 0.0002540291262135922, "loss": 0.8125, "step": 1915 }, { "epoch": 0.7741023180647442, "grad_norm": 0.62890625, "learning_rate": 0.000254004854368932, "loss": 0.8991, "step": 1916 }, { "epoch": 0.7745063380637341, "grad_norm": 0.4765625, "learning_rate": 0.0002539805825242718, "loss": 0.7323, "step": 1917 }, { "epoch": 0.7749103580627241, "grad_norm": 0.59765625, "learning_rate": 0.00025395631067961165, "loss": 0.9481, "step": 1918 }, { "epoch": 0.7753143780617141, "grad_norm": 0.58984375, "learning_rate": 0.00025393203883495143, "loss": 0.94, "step": 1919 }, { "epoch": 0.775718398060704, "grad_norm": 0.49609375, "learning_rate": 0.00025390776699029126, "loss": 0.7283, "step": 1920 }, { "epoch": 0.776122418059694, "grad_norm": 0.515625, "learning_rate": 0.00025388349514563103, "loss": 0.8425, "step": 1921 }, { "epoch": 0.7765264380586839, "grad_norm": 0.63671875, "learning_rate": 0.00025385922330097086, "loss": 0.9606, "step": 1922 }, { "epoch": 0.7769304580576738, "grad_norm": 0.54296875, "learning_rate": 0.00025383495145631064, "loss": 0.8344, "step": 1923 }, { "epoch": 0.7773344780566638, "grad_norm": 0.4921875, "learning_rate": 0.00025381067961165047, "loss": 0.8516, "step": 1924 }, { "epoch": 0.7777384980556538, "grad_norm": 0.53515625, "learning_rate": 0.0002537864077669903, "loss": 0.8352, "step": 1925 }, { "epoch": 0.7781425180546437, "grad_norm": 0.47265625, "learning_rate": 0.0002537621359223301, "loss": 0.8973, "step": 1926 }, { "epoch": 0.7785465380536336, "grad_norm": 1.0390625, "learning_rate": 0.00025373786407766985, "loss": 0.9225, "step": 1927 }, { "epoch": 0.7789505580526236, "grad_norm": 0.546875, "learning_rate": 0.0002537135922330097, "loss": 0.8249, "step": 1928 }, { "epoch": 0.7793545780516136, "grad_norm": 0.50390625, "learning_rate": 0.0002536893203883495, "loss": 0.843, "step": 1929 }, { "epoch": 0.7797585980506035, "grad_norm": 0.6796875, "learning_rate": 0.0002536650485436893, "loss": 0.9307, "step": 1930 }, { "epoch": 0.7801626180495934, "grad_norm": 0.52734375, "learning_rate": 0.0002536407766990291, "loss": 0.7809, "step": 1931 }, { "epoch": 0.7805666380485834, "grad_norm": 0.59765625, "learning_rate": 0.0002536165048543689, "loss": 0.8802, "step": 1932 }, { "epoch": 0.7809706580475734, "grad_norm": 0.5078125, "learning_rate": 0.0002535922330097087, "loss": 0.7848, "step": 1933 }, { "epoch": 0.7813746780465634, "grad_norm": 0.65625, "learning_rate": 0.00025356796116504855, "loss": 0.9166, "step": 1934 }, { "epoch": 0.7817786980455532, "grad_norm": 0.58984375, "learning_rate": 0.0002535436893203883, "loss": 0.7757, "step": 1935 }, { "epoch": 0.7821827180445432, "grad_norm": 0.52734375, "learning_rate": 0.00025351941747572815, "loss": 0.8152, "step": 1936 }, { "epoch": 0.7825867380435332, "grad_norm": 0.90625, "learning_rate": 0.0002534951456310679, "loss": 0.8939, "step": 1937 }, { "epoch": 0.782990758042523, "grad_norm": 0.6484375, "learning_rate": 0.00025347087378640776, "loss": 0.7867, "step": 1938 }, { "epoch": 0.783394778041513, "grad_norm": 0.484375, "learning_rate": 0.0002534466019417476, "loss": 0.7182, "step": 1939 }, { "epoch": 0.783798798040503, "grad_norm": 0.57421875, "learning_rate": 0.00025342233009708736, "loss": 0.8763, "step": 1940 }, { "epoch": 0.784202818039493, "grad_norm": 0.65234375, "learning_rate": 0.00025339805825242714, "loss": 1.0015, "step": 1941 }, { "epoch": 0.7846068380384829, "grad_norm": 0.53515625, "learning_rate": 0.00025337378640776696, "loss": 0.8119, "step": 1942 }, { "epoch": 0.7850108580374728, "grad_norm": 0.5859375, "learning_rate": 0.0002533495145631068, "loss": 0.9161, "step": 1943 }, { "epoch": 0.7854148780364628, "grad_norm": 0.5625, "learning_rate": 0.00025332524271844657, "loss": 0.7606, "step": 1944 }, { "epoch": 0.7858188980354528, "grad_norm": 0.546875, "learning_rate": 0.0002533009708737864, "loss": 0.7427, "step": 1945 }, { "epoch": 0.7862229180344427, "grad_norm": 0.609375, "learning_rate": 0.0002532766990291262, "loss": 0.8243, "step": 1946 }, { "epoch": 0.7866269380334326, "grad_norm": 0.486328125, "learning_rate": 0.000253252427184466, "loss": 0.7121, "step": 1947 }, { "epoch": 0.7870309580324226, "grad_norm": 0.52734375, "learning_rate": 0.0002532281553398058, "loss": 0.7468, "step": 1948 }, { "epoch": 0.7874349780314126, "grad_norm": 0.53125, "learning_rate": 0.0002532038834951456, "loss": 0.8463, "step": 1949 }, { "epoch": 0.7878389980304025, "grad_norm": 0.55078125, "learning_rate": 0.00025317961165048544, "loss": 0.6875, "step": 1950 }, { "epoch": 0.7882430180293925, "grad_norm": 0.515625, "learning_rate": 0.0002531553398058252, "loss": 0.7961, "step": 1951 }, { "epoch": 0.7886470380283824, "grad_norm": 0.5390625, "learning_rate": 0.000253131067961165, "loss": 0.8459, "step": 1952 }, { "epoch": 0.7890510580273724, "grad_norm": 0.48828125, "learning_rate": 0.0002531067961165048, "loss": 0.7585, "step": 1953 }, { "epoch": 0.7894550780263623, "grad_norm": 0.56640625, "learning_rate": 0.00025308252427184465, "loss": 0.9844, "step": 1954 }, { "epoch": 0.7898590980253523, "grad_norm": 0.6328125, "learning_rate": 0.0002530582524271845, "loss": 0.8868, "step": 1955 }, { "epoch": 0.7902631180243422, "grad_norm": 0.54296875, "learning_rate": 0.00025303398058252425, "loss": 0.7895, "step": 1956 }, { "epoch": 0.7906671380233321, "grad_norm": 0.58984375, "learning_rate": 0.000253009708737864, "loss": 0.7841, "step": 1957 }, { "epoch": 0.7910711580223221, "grad_norm": 0.484375, "learning_rate": 0.00025298543689320386, "loss": 0.774, "step": 1958 }, { "epoch": 0.7914751780213121, "grad_norm": 0.48828125, "learning_rate": 0.0002529611650485437, "loss": 0.7831, "step": 1959 }, { "epoch": 0.791879198020302, "grad_norm": 0.50390625, "learning_rate": 0.00025293689320388346, "loss": 0.8803, "step": 1960 }, { "epoch": 0.7922832180192919, "grad_norm": 0.59765625, "learning_rate": 0.0002529126213592233, "loss": 0.8391, "step": 1961 }, { "epoch": 0.7926872380182819, "grad_norm": 0.48046875, "learning_rate": 0.00025288834951456307, "loss": 0.7628, "step": 1962 }, { "epoch": 0.7930912580172719, "grad_norm": 0.5078125, "learning_rate": 0.0002528640776699029, "loss": 0.7577, "step": 1963 }, { "epoch": 0.7934952780162619, "grad_norm": 0.546875, "learning_rate": 0.0002528398058252427, "loss": 0.7054, "step": 1964 }, { "epoch": 0.7938992980152517, "grad_norm": 0.55859375, "learning_rate": 0.0002528155339805825, "loss": 0.7381, "step": 1965 }, { "epoch": 0.7943033180142417, "grad_norm": 0.58203125, "learning_rate": 0.00025279126213592233, "loss": 0.8606, "step": 1966 }, { "epoch": 0.7947073380132317, "grad_norm": 0.50390625, "learning_rate": 0.0002527669902912621, "loss": 0.8658, "step": 1967 }, { "epoch": 0.7951113580122217, "grad_norm": 0.482421875, "learning_rate": 0.00025274271844660193, "loss": 0.7463, "step": 1968 }, { "epoch": 0.7955153780112115, "grad_norm": 0.5390625, "learning_rate": 0.00025271844660194176, "loss": 0.8618, "step": 1969 }, { "epoch": 0.7959193980102015, "grad_norm": 0.5, "learning_rate": 0.00025269417475728154, "loss": 0.8495, "step": 1970 }, { "epoch": 0.7963234180091915, "grad_norm": 0.458984375, "learning_rate": 0.0002526699029126213, "loss": 0.8656, "step": 1971 }, { "epoch": 0.7967274380081814, "grad_norm": 0.5625, "learning_rate": 0.00025264563106796114, "loss": 0.8092, "step": 1972 }, { "epoch": 0.7971314580071713, "grad_norm": 0.61328125, "learning_rate": 0.0002526213592233009, "loss": 0.9838, "step": 1973 }, { "epoch": 0.7975354780061613, "grad_norm": 0.53125, "learning_rate": 0.00025259708737864075, "loss": 0.8973, "step": 1974 }, { "epoch": 0.7979394980051513, "grad_norm": 0.53125, "learning_rate": 0.0002525728155339806, "loss": 0.8577, "step": 1975 }, { "epoch": 0.7983435180041412, "grad_norm": 0.5390625, "learning_rate": 0.00025254854368932035, "loss": 0.8721, "step": 1976 }, { "epoch": 0.7987475380031311, "grad_norm": 0.4609375, "learning_rate": 0.0002525242718446602, "loss": 0.7285, "step": 1977 }, { "epoch": 0.7991515580021211, "grad_norm": 0.5390625, "learning_rate": 0.00025249999999999996, "loss": 0.7906, "step": 1978 }, { "epoch": 0.7995555780011111, "grad_norm": 0.462890625, "learning_rate": 0.0002524757281553398, "loss": 0.6765, "step": 1979 }, { "epoch": 0.799959598000101, "grad_norm": 0.466796875, "learning_rate": 0.0002524514563106796, "loss": 0.7873, "step": 1980 }, { "epoch": 0.800363617999091, "grad_norm": 0.6015625, "learning_rate": 0.0002524271844660194, "loss": 0.827, "step": 1981 }, { "epoch": 0.8007676379980809, "grad_norm": 0.53515625, "learning_rate": 0.00025240291262135917, "loss": 0.8464, "step": 1982 }, { "epoch": 0.8011716579970709, "grad_norm": 0.55078125, "learning_rate": 0.000252378640776699, "loss": 0.855, "step": 1983 }, { "epoch": 0.8015756779960608, "grad_norm": 0.498046875, "learning_rate": 0.0002523543689320388, "loss": 0.8048, "step": 1984 }, { "epoch": 0.8019796979950508, "grad_norm": 0.53515625, "learning_rate": 0.0002523300970873786, "loss": 0.8415, "step": 1985 }, { "epoch": 0.8023837179940407, "grad_norm": 0.482421875, "learning_rate": 0.00025230582524271843, "loss": 0.7625, "step": 1986 }, { "epoch": 0.8027877379930306, "grad_norm": 0.53125, "learning_rate": 0.0002522815533980582, "loss": 0.8774, "step": 1987 }, { "epoch": 0.8031917579920206, "grad_norm": 0.51171875, "learning_rate": 0.00025225728155339804, "loss": 0.8135, "step": 1988 }, { "epoch": 0.8035957779910106, "grad_norm": 0.5546875, "learning_rate": 0.00025223300970873786, "loss": 0.7615, "step": 1989 }, { "epoch": 0.8039997979900005, "grad_norm": 0.44140625, "learning_rate": 0.00025220873786407764, "loss": 0.685, "step": 1990 }, { "epoch": 0.8044038179889904, "grad_norm": 0.486328125, "learning_rate": 0.00025218446601941747, "loss": 0.78, "step": 1991 }, { "epoch": 0.8048078379879804, "grad_norm": 0.46875, "learning_rate": 0.00025216019417475724, "loss": 0.7299, "step": 1992 }, { "epoch": 0.8052118579869704, "grad_norm": 0.55078125, "learning_rate": 0.0002521359223300971, "loss": 0.8101, "step": 1993 }, { "epoch": 0.8056158779859603, "grad_norm": 0.51953125, "learning_rate": 0.0002521116504854369, "loss": 0.8856, "step": 1994 }, { "epoch": 0.8060198979849502, "grad_norm": 0.53515625, "learning_rate": 0.0002520873786407767, "loss": 0.7669, "step": 1995 }, { "epoch": 0.8064239179839402, "grad_norm": 0.65234375, "learning_rate": 0.0002520631067961165, "loss": 0.841, "step": 1996 }, { "epoch": 0.8068279379829302, "grad_norm": 0.53515625, "learning_rate": 0.0002520388349514563, "loss": 0.8257, "step": 1997 }, { "epoch": 0.8072319579819202, "grad_norm": 0.5546875, "learning_rate": 0.0002520145631067961, "loss": 0.8575, "step": 1998 }, { "epoch": 0.80763597798091, "grad_norm": 0.53125, "learning_rate": 0.0002519902912621359, "loss": 0.8621, "step": 1999 }, { "epoch": 0.8080399979799, "grad_norm": 0.57421875, "learning_rate": 0.0002519660194174757, "loss": 0.7655, "step": 2000 }, { "epoch": 0.80844401797889, "grad_norm": 0.66796875, "learning_rate": 0.0002519417475728155, "loss": 0.9522, "step": 2001 }, { "epoch": 0.8088480379778799, "grad_norm": 0.58984375, "learning_rate": 0.0002519174757281553, "loss": 0.887, "step": 2002 }, { "epoch": 0.8092520579768698, "grad_norm": 0.51953125, "learning_rate": 0.0002518932038834951, "loss": 0.9039, "step": 2003 }, { "epoch": 0.8096560779758598, "grad_norm": 0.453125, "learning_rate": 0.0002518689320388349, "loss": 0.7313, "step": 2004 }, { "epoch": 0.8100600979748498, "grad_norm": 0.59765625, "learning_rate": 0.00025184466019417476, "loss": 0.9392, "step": 2005 }, { "epoch": 0.8104641179738397, "grad_norm": 0.59765625, "learning_rate": 0.00025182038834951453, "loss": 0.8118, "step": 2006 }, { "epoch": 0.8108681379728296, "grad_norm": 0.54296875, "learning_rate": 0.00025179611650485436, "loss": 0.8801, "step": 2007 }, { "epoch": 0.8112721579718196, "grad_norm": 0.69921875, "learning_rate": 0.00025177184466019414, "loss": 0.9204, "step": 2008 }, { "epoch": 0.8116761779708096, "grad_norm": 0.51953125, "learning_rate": 0.00025174757281553397, "loss": 0.7986, "step": 2009 }, { "epoch": 0.8120801979697995, "grad_norm": 0.60546875, "learning_rate": 0.0002517233009708738, "loss": 0.9228, "step": 2010 }, { "epoch": 0.8124842179687894, "grad_norm": 0.73046875, "learning_rate": 0.00025169902912621357, "loss": 0.7954, "step": 2011 }, { "epoch": 0.8128882379677794, "grad_norm": 0.5, "learning_rate": 0.00025167475728155335, "loss": 0.8183, "step": 2012 }, { "epoch": 0.8132922579667694, "grad_norm": 0.6171875, "learning_rate": 0.0002516504854368932, "loss": 0.871, "step": 2013 }, { "epoch": 0.8136962779657593, "grad_norm": 0.6015625, "learning_rate": 0.000251626213592233, "loss": 0.726, "step": 2014 }, { "epoch": 0.8141002979647493, "grad_norm": 0.62890625, "learning_rate": 0.0002516019417475728, "loss": 0.9704, "step": 2015 }, { "epoch": 0.8145043179637392, "grad_norm": 0.80078125, "learning_rate": 0.0002515776699029126, "loss": 0.8682, "step": 2016 }, { "epoch": 0.8149083379627291, "grad_norm": 0.50390625, "learning_rate": 0.0002515533980582524, "loss": 0.7542, "step": 2017 }, { "epoch": 0.8153123579617191, "grad_norm": 0.6796875, "learning_rate": 0.0002515291262135922, "loss": 1.011, "step": 2018 }, { "epoch": 0.8157163779607091, "grad_norm": 0.5859375, "learning_rate": 0.00025150485436893204, "loss": 0.832, "step": 2019 }, { "epoch": 0.816120397959699, "grad_norm": 0.51953125, "learning_rate": 0.0002514805825242718, "loss": 0.8639, "step": 2020 }, { "epoch": 0.8165244179586889, "grad_norm": 0.486328125, "learning_rate": 0.00025145631067961165, "loss": 0.8033, "step": 2021 }, { "epoch": 0.8169284379576789, "grad_norm": 0.62109375, "learning_rate": 0.0002514320388349514, "loss": 0.8414, "step": 2022 }, { "epoch": 0.8173324579566689, "grad_norm": 0.53515625, "learning_rate": 0.00025140776699029125, "loss": 0.8359, "step": 2023 }, { "epoch": 0.8177364779556588, "grad_norm": 0.46875, "learning_rate": 0.00025138349514563103, "loss": 0.7571, "step": 2024 }, { "epoch": 0.8181404979546487, "grad_norm": 0.63671875, "learning_rate": 0.00025135922330097086, "loss": 1.082, "step": 2025 }, { "epoch": 0.8185445179536387, "grad_norm": 0.50390625, "learning_rate": 0.0002513349514563107, "loss": 0.8374, "step": 2026 }, { "epoch": 0.8189485379526287, "grad_norm": 0.5, "learning_rate": 0.00025131067961165046, "loss": 0.8659, "step": 2027 }, { "epoch": 0.8193525579516187, "grad_norm": 0.63671875, "learning_rate": 0.00025128640776699024, "loss": 0.8392, "step": 2028 }, { "epoch": 0.8197565779506085, "grad_norm": 0.66015625, "learning_rate": 0.00025126213592233007, "loss": 0.8356, "step": 2029 }, { "epoch": 0.8201605979495985, "grad_norm": 0.51953125, "learning_rate": 0.0002512378640776699, "loss": 0.7584, "step": 2030 }, { "epoch": 0.8205646179485885, "grad_norm": 0.50390625, "learning_rate": 0.00025121359223300967, "loss": 0.7475, "step": 2031 }, { "epoch": 0.8209686379475784, "grad_norm": 0.58203125, "learning_rate": 0.0002511893203883495, "loss": 0.8234, "step": 2032 }, { "epoch": 0.8213726579465683, "grad_norm": 0.609375, "learning_rate": 0.0002511650485436893, "loss": 0.8767, "step": 2033 }, { "epoch": 0.8217766779455583, "grad_norm": 0.5859375, "learning_rate": 0.0002511407766990291, "loss": 0.752, "step": 2034 }, { "epoch": 0.8221806979445483, "grad_norm": 0.64453125, "learning_rate": 0.00025111650485436893, "loss": 0.8553, "step": 2035 }, { "epoch": 0.8225847179435382, "grad_norm": 0.64453125, "learning_rate": 0.0002510922330097087, "loss": 0.8091, "step": 2036 }, { "epoch": 0.8229887379425281, "grad_norm": 0.5546875, "learning_rate": 0.00025106796116504854, "loss": 0.8662, "step": 2037 }, { "epoch": 0.8233927579415181, "grad_norm": 0.76171875, "learning_rate": 0.0002510436893203883, "loss": 0.9187, "step": 2038 }, { "epoch": 0.8237967779405081, "grad_norm": 0.462890625, "learning_rate": 0.00025101941747572814, "loss": 0.786, "step": 2039 }, { "epoch": 0.824200797939498, "grad_norm": 0.66015625, "learning_rate": 0.000250995145631068, "loss": 0.8948, "step": 2040 }, { "epoch": 0.8246048179384879, "grad_norm": 0.640625, "learning_rate": 0.00025097087378640775, "loss": 0.7899, "step": 2041 }, { "epoch": 0.8250088379374779, "grad_norm": 0.50390625, "learning_rate": 0.0002509466019417475, "loss": 0.8046, "step": 2042 }, { "epoch": 0.8254128579364679, "grad_norm": 0.7265625, "learning_rate": 0.00025092233009708735, "loss": 0.9616, "step": 2043 }, { "epoch": 0.8258168779354578, "grad_norm": 0.546875, "learning_rate": 0.0002508980582524272, "loss": 0.79, "step": 2044 }, { "epoch": 0.8262208979344478, "grad_norm": 0.56640625, "learning_rate": 0.00025087378640776696, "loss": 0.7974, "step": 2045 }, { "epoch": 0.8266249179334377, "grad_norm": 0.51171875, "learning_rate": 0.0002508495145631068, "loss": 0.8337, "step": 2046 }, { "epoch": 0.8270289379324277, "grad_norm": 0.55859375, "learning_rate": 0.00025082524271844656, "loss": 0.8338, "step": 2047 }, { "epoch": 0.8274329579314176, "grad_norm": 0.5078125, "learning_rate": 0.0002508009708737864, "loss": 0.7865, "step": 2048 }, { "epoch": 0.8278369779304076, "grad_norm": 0.5, "learning_rate": 0.00025077669902912617, "loss": 0.892, "step": 2049 }, { "epoch": 0.8282409979293975, "grad_norm": 0.6171875, "learning_rate": 0.000250752427184466, "loss": 0.9296, "step": 2050 }, { "epoch": 0.8286450179283874, "grad_norm": 0.50390625, "learning_rate": 0.0002507281553398058, "loss": 0.7757, "step": 2051 }, { "epoch": 0.8290490379273774, "grad_norm": 0.52734375, "learning_rate": 0.0002507038834951456, "loss": 0.7318, "step": 2052 }, { "epoch": 0.8294530579263674, "grad_norm": 0.51171875, "learning_rate": 0.0002506796116504854, "loss": 0.8212, "step": 2053 }, { "epoch": 0.8298570779253573, "grad_norm": 0.62109375, "learning_rate": 0.0002506553398058252, "loss": 0.9084, "step": 2054 }, { "epoch": 0.8302610979243472, "grad_norm": 0.5234375, "learning_rate": 0.00025063106796116504, "loss": 0.8986, "step": 2055 }, { "epoch": 0.8306651179233372, "grad_norm": 0.54296875, "learning_rate": 0.00025060679611650487, "loss": 0.8831, "step": 2056 }, { "epoch": 0.8310691379223272, "grad_norm": 0.490234375, "learning_rate": 0.00025058252427184464, "loss": 0.8216, "step": 2057 }, { "epoch": 0.8314731579213172, "grad_norm": 0.482421875, "learning_rate": 0.0002505582524271844, "loss": 0.8147, "step": 2058 }, { "epoch": 0.831877177920307, "grad_norm": 0.54296875, "learning_rate": 0.00025053398058252425, "loss": 0.7787, "step": 2059 }, { "epoch": 0.832281197919297, "grad_norm": 0.494140625, "learning_rate": 0.0002505097087378641, "loss": 0.7728, "step": 2060 }, { "epoch": 0.832685217918287, "grad_norm": 0.71875, "learning_rate": 0.00025048543689320385, "loss": 1.0049, "step": 2061 }, { "epoch": 0.833089237917277, "grad_norm": 0.482421875, "learning_rate": 0.0002504611650485437, "loss": 0.746, "step": 2062 }, { "epoch": 0.8334932579162668, "grad_norm": 0.578125, "learning_rate": 0.00025043689320388345, "loss": 0.9051, "step": 2063 }, { "epoch": 0.8338972779152568, "grad_norm": 0.455078125, "learning_rate": 0.0002504126213592233, "loss": 0.7467, "step": 2064 }, { "epoch": 0.8343012979142468, "grad_norm": 0.53515625, "learning_rate": 0.0002503883495145631, "loss": 0.8165, "step": 2065 }, { "epoch": 0.8347053179132367, "grad_norm": 0.53515625, "learning_rate": 0.0002503640776699029, "loss": 0.9335, "step": 2066 }, { "epoch": 0.8351093379122266, "grad_norm": 0.64453125, "learning_rate": 0.0002503398058252427, "loss": 0.9288, "step": 2067 }, { "epoch": 0.8355133579112166, "grad_norm": 0.455078125, "learning_rate": 0.0002503155339805825, "loss": 0.766, "step": 2068 }, { "epoch": 0.8359173779102066, "grad_norm": 0.5078125, "learning_rate": 0.0002502912621359223, "loss": 0.7418, "step": 2069 }, { "epoch": 0.8363213979091965, "grad_norm": 0.515625, "learning_rate": 0.00025026699029126215, "loss": 0.7875, "step": 2070 }, { "epoch": 0.8367254179081864, "grad_norm": 0.6171875, "learning_rate": 0.00025024271844660193, "loss": 0.7943, "step": 2071 }, { "epoch": 0.8371294379071764, "grad_norm": 0.609375, "learning_rate": 0.0002502184466019417, "loss": 0.8219, "step": 2072 }, { "epoch": 0.8375334579061664, "grad_norm": 0.609375, "learning_rate": 0.00025019417475728153, "loss": 0.8787, "step": 2073 }, { "epoch": 0.8379374779051563, "grad_norm": 0.55859375, "learning_rate": 0.0002501699029126213, "loss": 0.8422, "step": 2074 }, { "epoch": 0.8383414979041462, "grad_norm": 0.5546875, "learning_rate": 0.00025014563106796114, "loss": 0.8906, "step": 2075 }, { "epoch": 0.8387455179031362, "grad_norm": 0.65234375, "learning_rate": 0.00025012135922330097, "loss": 0.9403, "step": 2076 }, { "epoch": 0.8391495379021262, "grad_norm": 0.470703125, "learning_rate": 0.00025009708737864074, "loss": 0.7219, "step": 2077 }, { "epoch": 0.8395535579011161, "grad_norm": 0.64453125, "learning_rate": 0.00025007281553398057, "loss": 0.942, "step": 2078 }, { "epoch": 0.8399575779001061, "grad_norm": 0.5234375, "learning_rate": 0.00025004854368932035, "loss": 0.7658, "step": 2079 }, { "epoch": 0.840361597899096, "grad_norm": 0.61328125, "learning_rate": 0.0002500242718446602, "loss": 0.8327, "step": 2080 }, { "epoch": 0.8407656178980859, "grad_norm": 0.55078125, "learning_rate": 0.00025, "loss": 0.929, "step": 2081 }, { "epoch": 0.8411696378970759, "grad_norm": 0.58203125, "learning_rate": 0.0002499757281553398, "loss": 0.8686, "step": 2082 }, { "epoch": 0.8415736578960659, "grad_norm": 0.5859375, "learning_rate": 0.00024995145631067956, "loss": 0.8603, "step": 2083 }, { "epoch": 0.8419776778950558, "grad_norm": 0.51953125, "learning_rate": 0.0002499271844660194, "loss": 0.7427, "step": 2084 }, { "epoch": 0.8423816978940457, "grad_norm": 0.51953125, "learning_rate": 0.0002499029126213592, "loss": 0.8506, "step": 2085 }, { "epoch": 0.8427857178930357, "grad_norm": 0.54296875, "learning_rate": 0.000249878640776699, "loss": 0.8287, "step": 2086 }, { "epoch": 0.8431897378920257, "grad_norm": 0.7265625, "learning_rate": 0.0002498543689320388, "loss": 1.0063, "step": 2087 }, { "epoch": 0.8435937578910156, "grad_norm": 0.5546875, "learning_rate": 0.0002498300970873786, "loss": 0.8335, "step": 2088 }, { "epoch": 0.8439977778900055, "grad_norm": 0.5234375, "learning_rate": 0.0002498058252427184, "loss": 0.7714, "step": 2089 }, { "epoch": 0.8444017978889955, "grad_norm": 0.609375, "learning_rate": 0.00024978155339805825, "loss": 0.8799, "step": 2090 }, { "epoch": 0.8448058178879855, "grad_norm": 0.5, "learning_rate": 0.00024975728155339803, "loss": 0.7935, "step": 2091 }, { "epoch": 0.8452098378869755, "grad_norm": 0.5625, "learning_rate": 0.00024973300970873786, "loss": 0.854, "step": 2092 }, { "epoch": 0.8456138578859653, "grad_norm": 0.58203125, "learning_rate": 0.00024970873786407763, "loss": 0.8619, "step": 2093 }, { "epoch": 0.8460178778849553, "grad_norm": 0.55078125, "learning_rate": 0.00024968446601941746, "loss": 0.9443, "step": 2094 }, { "epoch": 0.8464218978839453, "grad_norm": 0.49609375, "learning_rate": 0.0002496601941747573, "loss": 0.785, "step": 2095 }, { "epoch": 0.8468259178829352, "grad_norm": 0.5546875, "learning_rate": 0.00024963592233009707, "loss": 0.8044, "step": 2096 }, { "epoch": 0.8472299378819251, "grad_norm": 0.58203125, "learning_rate": 0.0002496116504854369, "loss": 0.8722, "step": 2097 }, { "epoch": 0.8476339578809151, "grad_norm": 0.462890625, "learning_rate": 0.00024958737864077667, "loss": 0.8358, "step": 2098 }, { "epoch": 0.8480379778799051, "grad_norm": 0.6171875, "learning_rate": 0.00024956310679611645, "loss": 0.8329, "step": 2099 }, { "epoch": 0.848441997878895, "grad_norm": 0.494140625, "learning_rate": 0.0002495388349514563, "loss": 0.7529, "step": 2100 }, { "epoch": 0.8488460178778849, "grad_norm": 0.53125, "learning_rate": 0.0002495145631067961, "loss": 0.8119, "step": 2101 }, { "epoch": 0.8492500378768749, "grad_norm": 0.578125, "learning_rate": 0.0002494902912621359, "loss": 0.7948, "step": 2102 }, { "epoch": 0.8496540578758649, "grad_norm": 0.5859375, "learning_rate": 0.0002494660194174757, "loss": 0.9167, "step": 2103 }, { "epoch": 0.8500580778748548, "grad_norm": 0.494140625, "learning_rate": 0.0002494417475728155, "loss": 0.8221, "step": 2104 }, { "epoch": 0.8504620978738447, "grad_norm": 0.439453125, "learning_rate": 0.0002494174757281553, "loss": 0.7619, "step": 2105 }, { "epoch": 0.8508661178728347, "grad_norm": 0.546875, "learning_rate": 0.00024939320388349515, "loss": 0.7957, "step": 2106 }, { "epoch": 0.8512701378718247, "grad_norm": 0.58203125, "learning_rate": 0.0002493689320388349, "loss": 0.8636, "step": 2107 }, { "epoch": 0.8516741578708146, "grad_norm": 0.546875, "learning_rate": 0.00024934466019417475, "loss": 0.7903, "step": 2108 }, { "epoch": 0.8520781778698046, "grad_norm": 0.60546875, "learning_rate": 0.0002493203883495145, "loss": 0.8143, "step": 2109 }, { "epoch": 0.8524821978687945, "grad_norm": 0.55859375, "learning_rate": 0.00024929611650485435, "loss": 0.9263, "step": 2110 }, { "epoch": 0.8528862178677844, "grad_norm": 0.50390625, "learning_rate": 0.0002492718446601942, "loss": 0.7413, "step": 2111 }, { "epoch": 0.8532902378667744, "grad_norm": 0.546875, "learning_rate": 0.00024924757281553396, "loss": 0.8778, "step": 2112 }, { "epoch": 0.8536942578657644, "grad_norm": 0.53515625, "learning_rate": 0.00024922330097087373, "loss": 0.7576, "step": 2113 }, { "epoch": 0.8540982778647543, "grad_norm": 0.47265625, "learning_rate": 0.00024919902912621356, "loss": 0.7962, "step": 2114 }, { "epoch": 0.8545022978637442, "grad_norm": 0.578125, "learning_rate": 0.0002491747572815534, "loss": 0.8293, "step": 2115 }, { "epoch": 0.8549063178627342, "grad_norm": 0.59765625, "learning_rate": 0.00024915048543689317, "loss": 0.848, "step": 2116 }, { "epoch": 0.8553103378617242, "grad_norm": 0.5703125, "learning_rate": 0.000249126213592233, "loss": 0.8651, "step": 2117 }, { "epoch": 0.8557143578607141, "grad_norm": 0.474609375, "learning_rate": 0.0002491019417475728, "loss": 0.8458, "step": 2118 }, { "epoch": 0.856118377859704, "grad_norm": 0.498046875, "learning_rate": 0.0002490776699029126, "loss": 0.7742, "step": 2119 }, { "epoch": 0.856522397858694, "grad_norm": 0.4375, "learning_rate": 0.00024905339805825243, "loss": 0.6913, "step": 2120 }, { "epoch": 0.856926417857684, "grad_norm": 0.482421875, "learning_rate": 0.0002490291262135922, "loss": 0.8009, "step": 2121 }, { "epoch": 0.857330437856674, "grad_norm": 0.5625, "learning_rate": 0.00024900485436893204, "loss": 0.7903, "step": 2122 }, { "epoch": 0.8577344578556638, "grad_norm": 0.46875, "learning_rate": 0.0002489805825242718, "loss": 0.7584, "step": 2123 }, { "epoch": 0.8581384778546538, "grad_norm": 0.55078125, "learning_rate": 0.00024895631067961164, "loss": 0.9547, "step": 2124 }, { "epoch": 0.8585424978536438, "grad_norm": 0.52734375, "learning_rate": 0.0002489320388349514, "loss": 0.9224, "step": 2125 }, { "epoch": 0.8589465178526337, "grad_norm": 0.50390625, "learning_rate": 0.00024890776699029125, "loss": 0.782, "step": 2126 }, { "epoch": 0.8593505378516236, "grad_norm": 0.48828125, "learning_rate": 0.0002488834951456311, "loss": 0.7863, "step": 2127 }, { "epoch": 0.8597545578506136, "grad_norm": 0.5390625, "learning_rate": 0.00024885922330097085, "loss": 0.7901, "step": 2128 }, { "epoch": 0.8601585778496036, "grad_norm": 0.58984375, "learning_rate": 0.0002488349514563106, "loss": 0.8203, "step": 2129 }, { "epoch": 0.8605625978485935, "grad_norm": 0.46875, "learning_rate": 0.00024881067961165046, "loss": 0.7943, "step": 2130 }, { "epoch": 0.8609666178475834, "grad_norm": 0.54296875, "learning_rate": 0.0002487864077669903, "loss": 0.912, "step": 2131 }, { "epoch": 0.8613706378465734, "grad_norm": 0.671875, "learning_rate": 0.00024876213592233006, "loss": 0.8662, "step": 2132 }, { "epoch": 0.8617746578455634, "grad_norm": 0.48828125, "learning_rate": 0.0002487378640776699, "loss": 0.8475, "step": 2133 }, { "epoch": 0.8621786778445533, "grad_norm": 0.458984375, "learning_rate": 0.00024871359223300966, "loss": 0.7681, "step": 2134 }, { "epoch": 0.8625826978435432, "grad_norm": 0.52734375, "learning_rate": 0.0002486893203883495, "loss": 0.7617, "step": 2135 }, { "epoch": 0.8629867178425332, "grad_norm": 0.51953125, "learning_rate": 0.0002486650485436893, "loss": 0.8166, "step": 2136 }, { "epoch": 0.8633907378415232, "grad_norm": 0.54296875, "learning_rate": 0.0002486407766990291, "loss": 0.7719, "step": 2137 }, { "epoch": 0.8637947578405131, "grad_norm": 0.57421875, "learning_rate": 0.00024861650485436893, "loss": 0.8793, "step": 2138 }, { "epoch": 0.864198777839503, "grad_norm": 0.48046875, "learning_rate": 0.0002485922330097087, "loss": 0.7441, "step": 2139 }, { "epoch": 0.864602797838493, "grad_norm": 0.53125, "learning_rate": 0.00024856796116504853, "loss": 0.6972, "step": 2140 }, { "epoch": 0.865006817837483, "grad_norm": 0.474609375, "learning_rate": 0.00024854368932038836, "loss": 0.8296, "step": 2141 }, { "epoch": 0.8654108378364729, "grad_norm": 0.46875, "learning_rate": 0.00024851941747572814, "loss": 0.8329, "step": 2142 }, { "epoch": 0.8658148578354629, "grad_norm": 0.470703125, "learning_rate": 0.0002484951456310679, "loss": 0.805, "step": 2143 }, { "epoch": 0.8662188778344528, "grad_norm": 0.484375, "learning_rate": 0.00024847087378640774, "loss": 0.7829, "step": 2144 }, { "epoch": 0.8666228978334427, "grad_norm": 0.52734375, "learning_rate": 0.00024844660194174757, "loss": 0.8937, "step": 2145 }, { "epoch": 0.8670269178324327, "grad_norm": 0.462890625, "learning_rate": 0.00024842233009708735, "loss": 0.7418, "step": 2146 }, { "epoch": 0.8674309378314227, "grad_norm": 0.55859375, "learning_rate": 0.0002483980582524272, "loss": 0.8781, "step": 2147 }, { "epoch": 0.8678349578304126, "grad_norm": 0.51953125, "learning_rate": 0.00024837378640776695, "loss": 0.7678, "step": 2148 }, { "epoch": 0.8682389778294025, "grad_norm": 0.59765625, "learning_rate": 0.0002483495145631068, "loss": 0.7625, "step": 2149 }, { "epoch": 0.8686429978283925, "grad_norm": 0.484375, "learning_rate": 0.00024832524271844656, "loss": 0.7013, "step": 2150 }, { "epoch": 0.8690470178273825, "grad_norm": 0.55859375, "learning_rate": 0.0002483009708737864, "loss": 0.7965, "step": 2151 }, { "epoch": 0.8694510378263725, "grad_norm": 0.478515625, "learning_rate": 0.0002482766990291262, "loss": 0.7895, "step": 2152 }, { "epoch": 0.8698550578253623, "grad_norm": 0.55859375, "learning_rate": 0.000248252427184466, "loss": 0.8216, "step": 2153 }, { "epoch": 0.8702590778243523, "grad_norm": 0.48046875, "learning_rate": 0.00024822815533980577, "loss": 0.735, "step": 2154 }, { "epoch": 0.8706630978233423, "grad_norm": 0.5078125, "learning_rate": 0.0002482038834951456, "loss": 0.7698, "step": 2155 }, { "epoch": 0.8710671178223323, "grad_norm": 0.59765625, "learning_rate": 0.0002481796116504854, "loss": 0.9599, "step": 2156 }, { "epoch": 0.8714711378213221, "grad_norm": 0.5859375, "learning_rate": 0.00024815533980582525, "loss": 0.8189, "step": 2157 }, { "epoch": 0.8718751578203121, "grad_norm": 0.498046875, "learning_rate": 0.00024813106796116503, "loss": 0.8148, "step": 2158 }, { "epoch": 0.8722791778193021, "grad_norm": 0.578125, "learning_rate": 0.0002481067961165048, "loss": 0.8466, "step": 2159 }, { "epoch": 0.872683197818292, "grad_norm": 0.6484375, "learning_rate": 0.00024808252427184463, "loss": 0.9533, "step": 2160 }, { "epoch": 0.8730872178172819, "grad_norm": 0.53125, "learning_rate": 0.00024805825242718446, "loss": 0.6904, "step": 2161 }, { "epoch": 0.8734912378162719, "grad_norm": 0.5859375, "learning_rate": 0.00024803398058252424, "loss": 0.6946, "step": 2162 }, { "epoch": 0.8738952578152619, "grad_norm": 0.6015625, "learning_rate": 0.00024800970873786407, "loss": 0.8714, "step": 2163 }, { "epoch": 0.8742992778142518, "grad_norm": 0.5078125, "learning_rate": 0.00024798543689320384, "loss": 0.7367, "step": 2164 }, { "epoch": 0.8747032978132417, "grad_norm": 0.47265625, "learning_rate": 0.00024796116504854367, "loss": 0.8131, "step": 2165 }, { "epoch": 0.8751073178122317, "grad_norm": 0.546875, "learning_rate": 0.0002479368932038835, "loss": 0.7071, "step": 2166 }, { "epoch": 0.8755113378112217, "grad_norm": 0.5390625, "learning_rate": 0.0002479126213592233, "loss": 0.8652, "step": 2167 }, { "epoch": 0.8759153578102116, "grad_norm": 0.54296875, "learning_rate": 0.0002478883495145631, "loss": 0.8671, "step": 2168 }, { "epoch": 0.8763193778092015, "grad_norm": 0.5390625, "learning_rate": 0.0002478640776699029, "loss": 0.8046, "step": 2169 }, { "epoch": 0.8767233978081915, "grad_norm": 0.5, "learning_rate": 0.0002478398058252427, "loss": 0.7597, "step": 2170 }, { "epoch": 0.8771274178071815, "grad_norm": 0.50390625, "learning_rate": 0.00024781553398058254, "loss": 0.7525, "step": 2171 }, { "epoch": 0.8775314378061714, "grad_norm": 0.4921875, "learning_rate": 0.0002477912621359223, "loss": 0.7309, "step": 2172 }, { "epoch": 0.8779354578051614, "grad_norm": 0.6640625, "learning_rate": 0.0002477669902912621, "loss": 0.9239, "step": 2173 }, { "epoch": 0.8783394778041513, "grad_norm": 0.61328125, "learning_rate": 0.0002477427184466019, "loss": 0.897, "step": 2174 }, { "epoch": 0.8787434978031412, "grad_norm": 0.52734375, "learning_rate": 0.0002477184466019417, "loss": 0.816, "step": 2175 }, { "epoch": 0.8791475178021312, "grad_norm": 0.5625, "learning_rate": 0.0002476941747572815, "loss": 0.9232, "step": 2176 }, { "epoch": 0.8795515378011212, "grad_norm": 0.5859375, "learning_rate": 0.00024766990291262136, "loss": 0.8328, "step": 2177 }, { "epoch": 0.8799555578001111, "grad_norm": 0.453125, "learning_rate": 0.00024764563106796113, "loss": 0.7356, "step": 2178 }, { "epoch": 0.880359577799101, "grad_norm": 0.6484375, "learning_rate": 0.00024762135922330096, "loss": 0.9307, "step": 2179 }, { "epoch": 0.880763597798091, "grad_norm": 0.58984375, "learning_rate": 0.00024759708737864074, "loss": 0.7461, "step": 2180 }, { "epoch": 0.881167617797081, "grad_norm": 0.43359375, "learning_rate": 0.00024757281553398056, "loss": 0.8155, "step": 2181 }, { "epoch": 0.881571637796071, "grad_norm": 0.58203125, "learning_rate": 0.0002475485436893204, "loss": 0.9277, "step": 2182 }, { "epoch": 0.8819756577950608, "grad_norm": 0.494140625, "learning_rate": 0.00024752427184466017, "loss": 0.8649, "step": 2183 }, { "epoch": 0.8823796777940508, "grad_norm": 0.5546875, "learning_rate": 0.00024749999999999994, "loss": 0.8771, "step": 2184 }, { "epoch": 0.8827836977930408, "grad_norm": 0.486328125, "learning_rate": 0.0002474757281553398, "loss": 0.774, "step": 2185 }, { "epoch": 0.8831877177920308, "grad_norm": 0.5703125, "learning_rate": 0.0002474514563106796, "loss": 0.852, "step": 2186 }, { "epoch": 0.8835917377910206, "grad_norm": 0.46875, "learning_rate": 0.0002474271844660194, "loss": 0.7961, "step": 2187 }, { "epoch": 0.8839957577900106, "grad_norm": 0.50390625, "learning_rate": 0.0002474029126213592, "loss": 0.7441, "step": 2188 }, { "epoch": 0.8843997777890006, "grad_norm": 0.5078125, "learning_rate": 0.000247378640776699, "loss": 0.8155, "step": 2189 }, { "epoch": 0.8848037977879905, "grad_norm": 0.53515625, "learning_rate": 0.0002473543689320388, "loss": 0.8531, "step": 2190 }, { "epoch": 0.8852078177869804, "grad_norm": 0.46484375, "learning_rate": 0.00024733009708737864, "loss": 0.7841, "step": 2191 }, { "epoch": 0.8856118377859704, "grad_norm": 0.46484375, "learning_rate": 0.0002473058252427184, "loss": 0.7612, "step": 2192 }, { "epoch": 0.8860158577849604, "grad_norm": 0.671875, "learning_rate": 0.00024728155339805825, "loss": 0.8863, "step": 2193 }, { "epoch": 0.8864198777839503, "grad_norm": 0.486328125, "learning_rate": 0.000247257281553398, "loss": 0.8257, "step": 2194 }, { "epoch": 0.8868238977829402, "grad_norm": 0.5703125, "learning_rate": 0.00024723300970873785, "loss": 0.8744, "step": 2195 }, { "epoch": 0.8872279177819302, "grad_norm": 0.466796875, "learning_rate": 0.0002472087378640777, "loss": 0.7236, "step": 2196 }, { "epoch": 0.8876319377809202, "grad_norm": 0.498046875, "learning_rate": 0.00024718446601941746, "loss": 0.8046, "step": 2197 }, { "epoch": 0.8880359577799101, "grad_norm": 0.5234375, "learning_rate": 0.0002471601941747573, "loss": 0.8376, "step": 2198 }, { "epoch": 0.8884399777789, "grad_norm": 0.53515625, "learning_rate": 0.00024713592233009706, "loss": 0.8666, "step": 2199 }, { "epoch": 0.88884399777789, "grad_norm": 0.54296875, "learning_rate": 0.00024711165048543684, "loss": 0.8913, "step": 2200 }, { "epoch": 0.88924801777688, "grad_norm": 0.5625, "learning_rate": 0.00024708737864077667, "loss": 0.8095, "step": 2201 }, { "epoch": 0.8896520377758699, "grad_norm": 0.5859375, "learning_rate": 0.0002470631067961165, "loss": 0.8852, "step": 2202 }, { "epoch": 0.8900560577748599, "grad_norm": 0.61328125, "learning_rate": 0.00024703883495145627, "loss": 0.8554, "step": 2203 }, { "epoch": 0.8904600777738498, "grad_norm": 0.490234375, "learning_rate": 0.0002470145631067961, "loss": 0.6999, "step": 2204 }, { "epoch": 0.8908640977728397, "grad_norm": 0.5, "learning_rate": 0.0002469902912621359, "loss": 0.7925, "step": 2205 }, { "epoch": 0.8912681177718297, "grad_norm": 0.56640625, "learning_rate": 0.0002469660194174757, "loss": 0.8087, "step": 2206 }, { "epoch": 0.8916721377708197, "grad_norm": 0.51953125, "learning_rate": 0.00024694174757281553, "loss": 0.8497, "step": 2207 }, { "epoch": 0.8920761577698096, "grad_norm": 0.6640625, "learning_rate": 0.0002469174757281553, "loss": 0.9606, "step": 2208 }, { "epoch": 0.8924801777687995, "grad_norm": 0.453125, "learning_rate": 0.00024689320388349514, "loss": 0.7019, "step": 2209 }, { "epoch": 0.8928841977677895, "grad_norm": 0.546875, "learning_rate": 0.0002468689320388349, "loss": 0.7827, "step": 2210 }, { "epoch": 0.8932882177667795, "grad_norm": 0.51953125, "learning_rate": 0.00024684466019417474, "loss": 0.7655, "step": 2211 }, { "epoch": 0.8936922377657694, "grad_norm": 0.65625, "learning_rate": 0.00024682038834951457, "loss": 1.0427, "step": 2212 }, { "epoch": 0.8940962577647593, "grad_norm": 0.5546875, "learning_rate": 0.00024679611650485435, "loss": 0.8762, "step": 2213 }, { "epoch": 0.8945002777637493, "grad_norm": 0.48828125, "learning_rate": 0.0002467718446601941, "loss": 0.8468, "step": 2214 }, { "epoch": 0.8949042977627393, "grad_norm": 0.5625, "learning_rate": 0.00024674757281553395, "loss": 0.906, "step": 2215 }, { "epoch": 0.8953083177617293, "grad_norm": 0.578125, "learning_rate": 0.0002467233009708738, "loss": 0.8247, "step": 2216 }, { "epoch": 0.8957123377607191, "grad_norm": 0.498046875, "learning_rate": 0.00024669902912621356, "loss": 0.7538, "step": 2217 }, { "epoch": 0.8961163577597091, "grad_norm": 0.52734375, "learning_rate": 0.0002466747572815534, "loss": 0.8855, "step": 2218 }, { "epoch": 0.8965203777586991, "grad_norm": 0.578125, "learning_rate": 0.00024665048543689316, "loss": 0.8608, "step": 2219 }, { "epoch": 0.896924397757689, "grad_norm": 0.578125, "learning_rate": 0.000246626213592233, "loss": 0.8328, "step": 2220 }, { "epoch": 0.8973284177566789, "grad_norm": 0.515625, "learning_rate": 0.0002466019417475728, "loss": 0.8033, "step": 2221 }, { "epoch": 0.8977324377556689, "grad_norm": 0.447265625, "learning_rate": 0.0002465776699029126, "loss": 0.8049, "step": 2222 }, { "epoch": 0.8981364577546589, "grad_norm": 0.734375, "learning_rate": 0.0002465533980582524, "loss": 0.8108, "step": 2223 }, { "epoch": 0.8985404777536488, "grad_norm": 0.56640625, "learning_rate": 0.0002465291262135922, "loss": 0.8753, "step": 2224 }, { "epoch": 0.8989444977526387, "grad_norm": 0.51171875, "learning_rate": 0.00024650485436893203, "loss": 0.8558, "step": 2225 }, { "epoch": 0.8993485177516287, "grad_norm": 0.578125, "learning_rate": 0.0002464805825242718, "loss": 0.8319, "step": 2226 }, { "epoch": 0.8997525377506187, "grad_norm": 0.50390625, "learning_rate": 0.00024645631067961163, "loss": 0.7316, "step": 2227 }, { "epoch": 0.9001565577496086, "grad_norm": 0.55859375, "learning_rate": 0.00024643203883495146, "loss": 0.8059, "step": 2228 }, { "epoch": 0.9005605777485985, "grad_norm": 0.51171875, "learning_rate": 0.00024640776699029124, "loss": 0.8344, "step": 2229 }, { "epoch": 0.9009645977475885, "grad_norm": 0.447265625, "learning_rate": 0.000246383495145631, "loss": 0.8563, "step": 2230 }, { "epoch": 0.9013686177465785, "grad_norm": 0.5703125, "learning_rate": 0.00024635922330097084, "loss": 0.8364, "step": 2231 }, { "epoch": 0.9017726377455684, "grad_norm": 0.50390625, "learning_rate": 0.0002463349514563107, "loss": 0.7385, "step": 2232 }, { "epoch": 0.9021766577445584, "grad_norm": 0.6171875, "learning_rate": 0.00024631067961165045, "loss": 0.7999, "step": 2233 }, { "epoch": 0.9025806777435483, "grad_norm": 0.5234375, "learning_rate": 0.0002462864077669903, "loss": 0.8091, "step": 2234 }, { "epoch": 0.9029846977425382, "grad_norm": 0.58984375, "learning_rate": 0.00024626213592233005, "loss": 0.8858, "step": 2235 }, { "epoch": 0.9033887177415282, "grad_norm": 0.4921875, "learning_rate": 0.0002462378640776699, "loss": 0.8192, "step": 2236 }, { "epoch": 0.9037927377405182, "grad_norm": 0.4921875, "learning_rate": 0.0002462135922330097, "loss": 0.8104, "step": 2237 }, { "epoch": 0.9041967577395081, "grad_norm": 0.5546875, "learning_rate": 0.0002461893203883495, "loss": 0.9714, "step": 2238 }, { "epoch": 0.904600777738498, "grad_norm": 0.466796875, "learning_rate": 0.0002461650485436893, "loss": 0.7774, "step": 2239 }, { "epoch": 0.905004797737488, "grad_norm": 0.5703125, "learning_rate": 0.0002461407766990291, "loss": 0.7746, "step": 2240 }, { "epoch": 0.905408817736478, "grad_norm": 0.50390625, "learning_rate": 0.0002461165048543689, "loss": 0.7923, "step": 2241 }, { "epoch": 0.905812837735468, "grad_norm": 0.59375, "learning_rate": 0.00024609223300970875, "loss": 0.9247, "step": 2242 }, { "epoch": 0.9062168577344578, "grad_norm": 0.51953125, "learning_rate": 0.0002460679611650485, "loss": 0.8243, "step": 2243 }, { "epoch": 0.9066208777334478, "grad_norm": 0.54296875, "learning_rate": 0.0002460436893203883, "loss": 0.9159, "step": 2244 }, { "epoch": 0.9070248977324378, "grad_norm": 0.478515625, "learning_rate": 0.00024601941747572813, "loss": 0.7041, "step": 2245 }, { "epoch": 0.9074289177314278, "grad_norm": 0.59375, "learning_rate": 0.00024599514563106796, "loss": 0.8102, "step": 2246 }, { "epoch": 0.9078329377304176, "grad_norm": 0.52734375, "learning_rate": 0.00024597087378640774, "loss": 0.7113, "step": 2247 }, { "epoch": 0.9082369577294076, "grad_norm": 0.5078125, "learning_rate": 0.00024594660194174757, "loss": 0.8125, "step": 2248 }, { "epoch": 0.9086409777283976, "grad_norm": 0.5078125, "learning_rate": 0.00024592233009708734, "loss": 0.8871, "step": 2249 }, { "epoch": 0.9090449977273876, "grad_norm": 0.55859375, "learning_rate": 0.00024589805825242717, "loss": 0.9702, "step": 2250 }, { "epoch": 0.9094490177263774, "grad_norm": 0.546875, "learning_rate": 0.00024587378640776695, "loss": 0.8018, "step": 2251 }, { "epoch": 0.9098530377253674, "grad_norm": 0.55078125, "learning_rate": 0.0002458495145631068, "loss": 0.9382, "step": 2252 }, { "epoch": 0.9102570577243574, "grad_norm": 0.53515625, "learning_rate": 0.0002458252427184466, "loss": 0.8248, "step": 2253 }, { "epoch": 0.9106610777233473, "grad_norm": 0.56640625, "learning_rate": 0.0002458009708737864, "loss": 0.808, "step": 2254 }, { "epoch": 0.9110650977223372, "grad_norm": 0.4921875, "learning_rate": 0.00024577669902912615, "loss": 0.7667, "step": 2255 }, { "epoch": 0.9114691177213272, "grad_norm": 0.474609375, "learning_rate": 0.000245752427184466, "loss": 0.784, "step": 2256 }, { "epoch": 0.9118731377203172, "grad_norm": 0.52734375, "learning_rate": 0.0002457281553398058, "loss": 0.87, "step": 2257 }, { "epoch": 0.9122771577193071, "grad_norm": 0.50390625, "learning_rate": 0.00024570388349514564, "loss": 0.8249, "step": 2258 }, { "epoch": 0.912681177718297, "grad_norm": 0.48046875, "learning_rate": 0.0002456796116504854, "loss": 0.8377, "step": 2259 }, { "epoch": 0.913085197717287, "grad_norm": 0.6328125, "learning_rate": 0.0002456553398058252, "loss": 0.8055, "step": 2260 }, { "epoch": 0.913489217716277, "grad_norm": 0.5, "learning_rate": 0.000245631067961165, "loss": 0.8963, "step": 2261 }, { "epoch": 0.9138932377152669, "grad_norm": 0.45703125, "learning_rate": 0.00024560679611650485, "loss": 0.753, "step": 2262 }, { "epoch": 0.9142972577142569, "grad_norm": 0.5234375, "learning_rate": 0.00024558252427184463, "loss": 0.7734, "step": 2263 }, { "epoch": 0.9147012777132468, "grad_norm": 0.56640625, "learning_rate": 0.00024555825242718446, "loss": 0.7899, "step": 2264 }, { "epoch": 0.9151052977122368, "grad_norm": 0.515625, "learning_rate": 0.00024553398058252423, "loss": 0.7598, "step": 2265 }, { "epoch": 0.9155093177112267, "grad_norm": 0.5625, "learning_rate": 0.00024550970873786406, "loss": 0.8358, "step": 2266 }, { "epoch": 0.9159133377102167, "grad_norm": 0.52734375, "learning_rate": 0.0002454854368932039, "loss": 0.8817, "step": 2267 }, { "epoch": 0.9163173577092066, "grad_norm": 0.515625, "learning_rate": 0.00024546116504854367, "loss": 0.8672, "step": 2268 }, { "epoch": 0.9167213777081965, "grad_norm": 0.57421875, "learning_rate": 0.0002454368932038835, "loss": 0.87, "step": 2269 }, { "epoch": 0.9171253977071865, "grad_norm": 0.5, "learning_rate": 0.00024541262135922327, "loss": 0.7389, "step": 2270 }, { "epoch": 0.9175294177061765, "grad_norm": 0.498046875, "learning_rate": 0.0002453883495145631, "loss": 0.7761, "step": 2271 }, { "epoch": 0.9179334377051664, "grad_norm": 0.51953125, "learning_rate": 0.00024536407766990293, "loss": 0.8494, "step": 2272 }, { "epoch": 0.9183374577041563, "grad_norm": 0.51953125, "learning_rate": 0.0002453398058252427, "loss": 0.8415, "step": 2273 }, { "epoch": 0.9187414777031463, "grad_norm": 0.55859375, "learning_rate": 0.0002453155339805825, "loss": 0.8298, "step": 2274 }, { "epoch": 0.9191454977021363, "grad_norm": 0.53125, "learning_rate": 0.0002452912621359223, "loss": 0.8386, "step": 2275 }, { "epoch": 0.9195495177011263, "grad_norm": 0.59765625, "learning_rate": 0.0002452669902912621, "loss": 0.952, "step": 2276 }, { "epoch": 0.9199535377001161, "grad_norm": 0.53515625, "learning_rate": 0.0002452427184466019, "loss": 0.8976, "step": 2277 }, { "epoch": 0.9203575576991061, "grad_norm": 0.5546875, "learning_rate": 0.00024521844660194174, "loss": 0.8303, "step": 2278 }, { "epoch": 0.9207615776980961, "grad_norm": 0.52734375, "learning_rate": 0.0002451941747572815, "loss": 0.8572, "step": 2279 }, { "epoch": 0.9211655976970861, "grad_norm": 0.466796875, "learning_rate": 0.00024516990291262135, "loss": 0.7421, "step": 2280 }, { "epoch": 0.9215696176960759, "grad_norm": 0.5078125, "learning_rate": 0.0002451456310679611, "loss": 0.8683, "step": 2281 }, { "epoch": 0.9219736376950659, "grad_norm": 0.47265625, "learning_rate": 0.00024512135922330095, "loss": 0.7718, "step": 2282 }, { "epoch": 0.9223776576940559, "grad_norm": 0.435546875, "learning_rate": 0.0002450970873786408, "loss": 0.7586, "step": 2283 }, { "epoch": 0.9227816776930458, "grad_norm": 0.55078125, "learning_rate": 0.00024507281553398056, "loss": 0.9129, "step": 2284 }, { "epoch": 0.9231856976920357, "grad_norm": 0.45703125, "learning_rate": 0.00024504854368932033, "loss": 0.7577, "step": 2285 }, { "epoch": 0.9235897176910257, "grad_norm": 0.486328125, "learning_rate": 0.00024502427184466016, "loss": 0.7433, "step": 2286 }, { "epoch": 0.9239937376900157, "grad_norm": 0.55078125, "learning_rate": 0.000245, "loss": 0.8101, "step": 2287 }, { "epoch": 0.9243977576890056, "grad_norm": 0.5703125, "learning_rate": 0.00024497572815533977, "loss": 0.8149, "step": 2288 }, { "epoch": 0.9248017776879955, "grad_norm": 0.51953125, "learning_rate": 0.0002449514563106796, "loss": 0.7963, "step": 2289 }, { "epoch": 0.9252057976869855, "grad_norm": 0.53125, "learning_rate": 0.00024492718446601937, "loss": 0.789, "step": 2290 }, { "epoch": 0.9256098176859755, "grad_norm": 0.5390625, "learning_rate": 0.0002449029126213592, "loss": 0.8137, "step": 2291 }, { "epoch": 0.9260138376849654, "grad_norm": 0.55078125, "learning_rate": 0.00024487864077669903, "loss": 0.8352, "step": 2292 }, { "epoch": 0.9264178576839553, "grad_norm": 0.5546875, "learning_rate": 0.0002448543689320388, "loss": 0.8374, "step": 2293 }, { "epoch": 0.9268218776829453, "grad_norm": 0.5390625, "learning_rate": 0.00024483009708737864, "loss": 0.7237, "step": 2294 }, { "epoch": 0.9272258976819353, "grad_norm": 0.51171875, "learning_rate": 0.0002448058252427184, "loss": 0.7325, "step": 2295 }, { "epoch": 0.9276299176809252, "grad_norm": 0.51171875, "learning_rate": 0.00024478155339805824, "loss": 0.8198, "step": 2296 }, { "epoch": 0.9280339376799152, "grad_norm": 0.458984375, "learning_rate": 0.00024475728155339807, "loss": 0.726, "step": 2297 }, { "epoch": 0.9284379576789051, "grad_norm": 0.5703125, "learning_rate": 0.00024473300970873784, "loss": 0.8109, "step": 2298 }, { "epoch": 0.928841977677895, "grad_norm": 0.51171875, "learning_rate": 0.0002447087378640777, "loss": 0.6926, "step": 2299 }, { "epoch": 0.929245997676885, "grad_norm": 0.45703125, "learning_rate": 0.00024468446601941745, "loss": 0.8012, "step": 2300 }, { "epoch": 0.929650017675875, "grad_norm": 0.5078125, "learning_rate": 0.0002446601941747572, "loss": 0.8122, "step": 2301 }, { "epoch": 0.9300540376748649, "grad_norm": 0.494140625, "learning_rate": 0.00024463592233009705, "loss": 0.7443, "step": 2302 }, { "epoch": 0.9304580576738548, "grad_norm": 0.50390625, "learning_rate": 0.0002446116504854369, "loss": 0.8479, "step": 2303 }, { "epoch": 0.9308620776728448, "grad_norm": 0.59765625, "learning_rate": 0.00024458737864077666, "loss": 0.7338, "step": 2304 }, { "epoch": 0.9312660976718348, "grad_norm": 0.62109375, "learning_rate": 0.0002445631067961165, "loss": 0.9079, "step": 2305 }, { "epoch": 0.9316701176708247, "grad_norm": 0.58203125, "learning_rate": 0.00024453883495145626, "loss": 0.8914, "step": 2306 }, { "epoch": 0.9320741376698146, "grad_norm": 0.5078125, "learning_rate": 0.0002445145631067961, "loss": 0.7793, "step": 2307 }, { "epoch": 0.9324781576688046, "grad_norm": 0.470703125, "learning_rate": 0.0002444902912621359, "loss": 0.8334, "step": 2308 }, { "epoch": 0.9328821776677946, "grad_norm": 0.482421875, "learning_rate": 0.0002444660194174757, "loss": 0.7742, "step": 2309 }, { "epoch": 0.9332861976667846, "grad_norm": 0.5703125, "learning_rate": 0.00024444174757281553, "loss": 0.8011, "step": 2310 }, { "epoch": 0.9336902176657744, "grad_norm": 0.53125, "learning_rate": 0.0002444174757281553, "loss": 0.8831, "step": 2311 }, { "epoch": 0.9340942376647644, "grad_norm": 0.4765625, "learning_rate": 0.00024439320388349513, "loss": 0.7364, "step": 2312 }, { "epoch": 0.9344982576637544, "grad_norm": 0.51953125, "learning_rate": 0.00024436893203883496, "loss": 0.8454, "step": 2313 }, { "epoch": 0.9349022776627443, "grad_norm": 0.5, "learning_rate": 0.00024434466019417474, "loss": 0.8867, "step": 2314 }, { "epoch": 0.9353062976617342, "grad_norm": 0.50390625, "learning_rate": 0.0002443203883495145, "loss": 0.743, "step": 2315 }, { "epoch": 0.9357103176607242, "grad_norm": 0.546875, "learning_rate": 0.00024429611650485434, "loss": 0.9102, "step": 2316 }, { "epoch": 0.9361143376597142, "grad_norm": 0.58984375, "learning_rate": 0.00024427184466019417, "loss": 0.8955, "step": 2317 }, { "epoch": 0.9365183576587041, "grad_norm": 0.5078125, "learning_rate": 0.00024424757281553395, "loss": 0.8683, "step": 2318 }, { "epoch": 0.936922377657694, "grad_norm": 0.53515625, "learning_rate": 0.0002442233009708738, "loss": 0.8126, "step": 2319 }, { "epoch": 0.937326397656684, "grad_norm": 0.5703125, "learning_rate": 0.00024419902912621355, "loss": 0.8359, "step": 2320 }, { "epoch": 0.937730417655674, "grad_norm": 0.53515625, "learning_rate": 0.0002441747572815534, "loss": 0.9271, "step": 2321 }, { "epoch": 0.9381344376546639, "grad_norm": 0.4765625, "learning_rate": 0.0002441504854368932, "loss": 0.7565, "step": 2322 }, { "epoch": 0.9385384576536538, "grad_norm": 0.59765625, "learning_rate": 0.00024412621359223298, "loss": 0.7957, "step": 2323 }, { "epoch": 0.9389424776526438, "grad_norm": 0.474609375, "learning_rate": 0.00024410194174757281, "loss": 0.6883, "step": 2324 }, { "epoch": 0.9393464976516338, "grad_norm": 0.53515625, "learning_rate": 0.0002440776699029126, "loss": 0.7711, "step": 2325 }, { "epoch": 0.9397505176506237, "grad_norm": 0.52734375, "learning_rate": 0.0002440533980582524, "loss": 0.7066, "step": 2326 }, { "epoch": 0.9401545376496137, "grad_norm": 0.625, "learning_rate": 0.00024402912621359222, "loss": 0.8653, "step": 2327 }, { "epoch": 0.9405585576486036, "grad_norm": 0.55859375, "learning_rate": 0.00024400485436893202, "loss": 0.8369, "step": 2328 }, { "epoch": 0.9409625776475935, "grad_norm": 0.494140625, "learning_rate": 0.00024398058252427183, "loss": 0.6516, "step": 2329 }, { "epoch": 0.9413665976465835, "grad_norm": 0.52734375, "learning_rate": 0.00024395631067961163, "loss": 0.8957, "step": 2330 }, { "epoch": 0.9417706176455735, "grad_norm": 0.6171875, "learning_rate": 0.00024393203883495143, "loss": 0.8387, "step": 2331 }, { "epoch": 0.9421746376445634, "grad_norm": 0.53125, "learning_rate": 0.00024390776699029126, "loss": 0.7192, "step": 2332 }, { "epoch": 0.9425786576435533, "grad_norm": 0.4921875, "learning_rate": 0.00024388349514563104, "loss": 0.754, "step": 2333 }, { "epoch": 0.9429826776425433, "grad_norm": 0.466796875, "learning_rate": 0.00024385922330097084, "loss": 0.647, "step": 2334 }, { "epoch": 0.9433866976415333, "grad_norm": 0.62890625, "learning_rate": 0.00024383495145631067, "loss": 0.8795, "step": 2335 }, { "epoch": 0.9437907176405232, "grad_norm": 0.44921875, "learning_rate": 0.00024381067961165047, "loss": 0.7682, "step": 2336 }, { "epoch": 0.9441947376395131, "grad_norm": 0.5390625, "learning_rate": 0.00024378640776699024, "loss": 0.8812, "step": 2337 }, { "epoch": 0.9445987576385031, "grad_norm": 0.58203125, "learning_rate": 0.00024376213592233007, "loss": 0.7165, "step": 2338 }, { "epoch": 0.9450027776374931, "grad_norm": 0.60546875, "learning_rate": 0.00024373786407766988, "loss": 0.8452, "step": 2339 }, { "epoch": 0.945406797636483, "grad_norm": 0.546875, "learning_rate": 0.0002437135922330097, "loss": 0.8352, "step": 2340 }, { "epoch": 0.9458108176354729, "grad_norm": 0.5078125, "learning_rate": 0.0002436893203883495, "loss": 0.7456, "step": 2341 }, { "epoch": 0.9462148376344629, "grad_norm": 0.5234375, "learning_rate": 0.00024366504854368928, "loss": 0.7641, "step": 2342 }, { "epoch": 0.9466188576334529, "grad_norm": 0.54296875, "learning_rate": 0.0002436407766990291, "loss": 0.7939, "step": 2343 }, { "epoch": 0.9470228776324429, "grad_norm": 0.51953125, "learning_rate": 0.00024361650485436892, "loss": 0.824, "step": 2344 }, { "epoch": 0.9474268976314327, "grad_norm": 0.5390625, "learning_rate": 0.0002435922330097087, "loss": 0.7981, "step": 2345 }, { "epoch": 0.9478309176304227, "grad_norm": 0.77734375, "learning_rate": 0.00024356796116504852, "loss": 0.9573, "step": 2346 }, { "epoch": 0.9482349376294127, "grad_norm": 0.546875, "learning_rate": 0.00024354368932038832, "loss": 0.7942, "step": 2347 }, { "epoch": 0.9486389576284026, "grad_norm": 0.5234375, "learning_rate": 0.00024351941747572812, "loss": 0.8108, "step": 2348 }, { "epoch": 0.9490429776273925, "grad_norm": 0.546875, "learning_rate": 0.00024349514563106795, "loss": 0.7443, "step": 2349 }, { "epoch": 0.9494469976263825, "grad_norm": 0.6484375, "learning_rate": 0.00024347087378640773, "loss": 0.9047, "step": 2350 }, { "epoch": 0.9498510176253725, "grad_norm": 0.515625, "learning_rate": 0.00024344660194174756, "loss": 0.8379, "step": 2351 }, { "epoch": 0.9502550376243624, "grad_norm": 0.546875, "learning_rate": 0.00024342233009708736, "loss": 0.8623, "step": 2352 }, { "epoch": 0.9506590576233523, "grad_norm": 0.478515625, "learning_rate": 0.00024339805825242716, "loss": 0.8022, "step": 2353 }, { "epoch": 0.9510630776223423, "grad_norm": 0.46875, "learning_rate": 0.000243373786407767, "loss": 0.7087, "step": 2354 }, { "epoch": 0.9514670976213323, "grad_norm": 0.59375, "learning_rate": 0.00024334951456310677, "loss": 0.9102, "step": 2355 }, { "epoch": 0.9518711176203222, "grad_norm": 0.49609375, "learning_rate": 0.00024332524271844657, "loss": 0.8093, "step": 2356 }, { "epoch": 0.9522751376193122, "grad_norm": 0.5703125, "learning_rate": 0.0002433009708737864, "loss": 0.9117, "step": 2357 }, { "epoch": 0.9526791576183021, "grad_norm": 0.72265625, "learning_rate": 0.00024327669902912618, "loss": 0.9443, "step": 2358 }, { "epoch": 0.9530831776172921, "grad_norm": 0.5625, "learning_rate": 0.000243252427184466, "loss": 0.7972, "step": 2359 }, { "epoch": 0.953487197616282, "grad_norm": 0.54296875, "learning_rate": 0.0002432281553398058, "loss": 0.83, "step": 2360 }, { "epoch": 0.953891217615272, "grad_norm": 0.52734375, "learning_rate": 0.0002432038834951456, "loss": 0.6922, "step": 2361 }, { "epoch": 0.9542952376142619, "grad_norm": 0.51953125, "learning_rate": 0.00024317961165048544, "loss": 0.7308, "step": 2362 }, { "epoch": 0.9546992576132518, "grad_norm": 0.5234375, "learning_rate": 0.00024315533980582521, "loss": 0.8073, "step": 2363 }, { "epoch": 0.9551032776122418, "grad_norm": 0.5546875, "learning_rate": 0.00024313106796116502, "loss": 0.8674, "step": 2364 }, { "epoch": 0.9555072976112318, "grad_norm": 0.49609375, "learning_rate": 0.00024310679611650485, "loss": 0.7358, "step": 2365 }, { "epoch": 0.9559113176102217, "grad_norm": 0.54296875, "learning_rate": 0.00024308252427184465, "loss": 0.7659, "step": 2366 }, { "epoch": 0.9563153376092116, "grad_norm": 0.74609375, "learning_rate": 0.00024305825242718442, "loss": 0.9964, "step": 2367 }, { "epoch": 0.9567193576082016, "grad_norm": 0.60546875, "learning_rate": 0.00024303398058252425, "loss": 0.8175, "step": 2368 }, { "epoch": 0.9571233776071916, "grad_norm": 0.7421875, "learning_rate": 0.00024300970873786406, "loss": 1.0361, "step": 2369 }, { "epoch": 0.9575273976061816, "grad_norm": 0.52734375, "learning_rate": 0.00024298543689320388, "loss": 0.8798, "step": 2370 }, { "epoch": 0.9579314176051714, "grad_norm": 0.546875, "learning_rate": 0.00024296116504854366, "loss": 0.8493, "step": 2371 }, { "epoch": 0.9583354376041614, "grad_norm": 0.5625, "learning_rate": 0.00024293689320388346, "loss": 0.9715, "step": 2372 }, { "epoch": 0.9587394576031514, "grad_norm": 0.5, "learning_rate": 0.0002429126213592233, "loss": 0.887, "step": 2373 }, { "epoch": 0.9591434776021414, "grad_norm": 0.498046875, "learning_rate": 0.0002428883495145631, "loss": 0.7447, "step": 2374 }, { "epoch": 0.9595474976011312, "grad_norm": 0.474609375, "learning_rate": 0.00024286407766990287, "loss": 0.7537, "step": 2375 }, { "epoch": 0.9599515176001212, "grad_norm": 0.56640625, "learning_rate": 0.0002428398058252427, "loss": 0.8169, "step": 2376 }, { "epoch": 0.9603555375991112, "grad_norm": 0.67578125, "learning_rate": 0.0002428155339805825, "loss": 0.885, "step": 2377 }, { "epoch": 0.960759557598101, "grad_norm": 1.0078125, "learning_rate": 0.0002427912621359223, "loss": 0.8568, "step": 2378 }, { "epoch": 0.961163577597091, "grad_norm": 0.6171875, "learning_rate": 0.00024276699029126213, "loss": 0.9919, "step": 2379 }, { "epoch": 0.961567597596081, "grad_norm": 0.55078125, "learning_rate": 0.0002427427184466019, "loss": 0.7867, "step": 2380 }, { "epoch": 0.961971617595071, "grad_norm": 0.5234375, "learning_rate": 0.00024271844660194174, "loss": 0.8673, "step": 2381 }, { "epoch": 0.9623756375940609, "grad_norm": 0.52734375, "learning_rate": 0.00024269417475728154, "loss": 0.7885, "step": 2382 }, { "epoch": 0.9627796575930508, "grad_norm": 0.56640625, "learning_rate": 0.00024266990291262131, "loss": 0.8356, "step": 2383 }, { "epoch": 0.9631836775920408, "grad_norm": 0.57421875, "learning_rate": 0.00024264563106796114, "loss": 0.8174, "step": 2384 }, { "epoch": 0.9635876975910308, "grad_norm": 0.54296875, "learning_rate": 0.00024262135922330095, "loss": 0.7922, "step": 2385 }, { "epoch": 0.9639917175900207, "grad_norm": 0.5078125, "learning_rate": 0.00024259708737864075, "loss": 0.6862, "step": 2386 }, { "epoch": 0.9643957375890106, "grad_norm": 0.6328125, "learning_rate": 0.00024257281553398058, "loss": 0.8054, "step": 2387 }, { "epoch": 0.9647997575880006, "grad_norm": 0.7890625, "learning_rate": 0.00024254854368932035, "loss": 1.0563, "step": 2388 }, { "epoch": 0.9652037775869906, "grad_norm": 0.6015625, "learning_rate": 0.00024252427184466018, "loss": 0.8065, "step": 2389 }, { "epoch": 0.9656077975859805, "grad_norm": 0.51953125, "learning_rate": 0.00024249999999999999, "loss": 0.7753, "step": 2390 }, { "epoch": 0.9660118175849705, "grad_norm": 0.478515625, "learning_rate": 0.0002424757281553398, "loss": 0.8233, "step": 2391 }, { "epoch": 0.9664158375839604, "grad_norm": 0.498046875, "learning_rate": 0.0002424514563106796, "loss": 0.82, "step": 2392 }, { "epoch": 0.9668198575829503, "grad_norm": 0.625, "learning_rate": 0.0002424271844660194, "loss": 0.8846, "step": 2393 }, { "epoch": 0.9672238775819403, "grad_norm": 0.482421875, "learning_rate": 0.0002424029126213592, "loss": 0.7787, "step": 2394 }, { "epoch": 0.9676278975809303, "grad_norm": 0.5234375, "learning_rate": 0.00024237864077669902, "loss": 0.8095, "step": 2395 }, { "epoch": 0.9680319175799202, "grad_norm": 0.486328125, "learning_rate": 0.0002423543689320388, "loss": 0.788, "step": 2396 }, { "epoch": 0.9684359375789101, "grad_norm": 0.59765625, "learning_rate": 0.0002423300970873786, "loss": 0.9105, "step": 2397 }, { "epoch": 0.9688399575779001, "grad_norm": 0.44921875, "learning_rate": 0.00024230582524271843, "loss": 0.7041, "step": 2398 }, { "epoch": 0.9692439775768901, "grad_norm": 0.57421875, "learning_rate": 0.00024228155339805823, "loss": 0.76, "step": 2399 }, { "epoch": 0.96964799757588, "grad_norm": 0.490234375, "learning_rate": 0.00024225728155339806, "loss": 0.7965, "step": 2400 }, { "epoch": 0.9700520175748699, "grad_norm": 0.6171875, "learning_rate": 0.00024223300970873784, "loss": 0.8131, "step": 2401 }, { "epoch": 0.9704560375738599, "grad_norm": 0.51171875, "learning_rate": 0.00024220873786407764, "loss": 0.7216, "step": 2402 }, { "epoch": 0.9708600575728499, "grad_norm": 0.53125, "learning_rate": 0.00024218446601941747, "loss": 0.8673, "step": 2403 }, { "epoch": 0.9712640775718399, "grad_norm": 0.5625, "learning_rate": 0.00024216019417475727, "loss": 0.8363, "step": 2404 }, { "epoch": 0.9716680975708297, "grad_norm": 0.478515625, "learning_rate": 0.00024213592233009705, "loss": 0.8205, "step": 2405 }, { "epoch": 0.9720721175698197, "grad_norm": 0.45703125, "learning_rate": 0.00024211165048543688, "loss": 0.8702, "step": 2406 }, { "epoch": 0.9724761375688097, "grad_norm": 0.48046875, "learning_rate": 0.00024208737864077668, "loss": 0.7992, "step": 2407 }, { "epoch": 0.9728801575677996, "grad_norm": 0.52734375, "learning_rate": 0.00024206310679611645, "loss": 0.8002, "step": 2408 }, { "epoch": 0.9732841775667895, "grad_norm": 0.4609375, "learning_rate": 0.00024203883495145628, "loss": 0.7289, "step": 2409 }, { "epoch": 0.9736881975657795, "grad_norm": 0.61328125, "learning_rate": 0.0002420145631067961, "loss": 0.811, "step": 2410 }, { "epoch": 0.9740922175647695, "grad_norm": 0.53125, "learning_rate": 0.00024199029126213592, "loss": 0.7272, "step": 2411 }, { "epoch": 0.9744962375637594, "grad_norm": 0.51953125, "learning_rate": 0.00024196601941747572, "loss": 0.8135, "step": 2412 }, { "epoch": 0.9749002575627493, "grad_norm": 0.515625, "learning_rate": 0.0002419417475728155, "loss": 0.7297, "step": 2413 }, { "epoch": 0.9753042775617393, "grad_norm": 0.59765625, "learning_rate": 0.00024191747572815532, "loss": 0.8167, "step": 2414 }, { "epoch": 0.9757082975607293, "grad_norm": 0.61328125, "learning_rate": 0.00024189320388349513, "loss": 0.9756, "step": 2415 }, { "epoch": 0.9761123175597192, "grad_norm": 0.5546875, "learning_rate": 0.00024186893203883493, "loss": 0.8452, "step": 2416 }, { "epoch": 0.9765163375587091, "grad_norm": 0.59375, "learning_rate": 0.00024184466019417476, "loss": 0.8521, "step": 2417 }, { "epoch": 0.9769203575576991, "grad_norm": 0.494140625, "learning_rate": 0.00024182038834951453, "loss": 0.7248, "step": 2418 }, { "epoch": 0.9773243775566891, "grad_norm": 0.482421875, "learning_rate": 0.00024179611650485433, "loss": 0.7601, "step": 2419 }, { "epoch": 0.977728397555679, "grad_norm": 0.546875, "learning_rate": 0.00024177184466019416, "loss": 0.8844, "step": 2420 }, { "epoch": 0.978132417554669, "grad_norm": 0.5703125, "learning_rate": 0.00024174757281553394, "loss": 0.7862, "step": 2421 }, { "epoch": 0.9785364375536589, "grad_norm": 0.494140625, "learning_rate": 0.00024172330097087377, "loss": 0.7872, "step": 2422 }, { "epoch": 0.9789404575526488, "grad_norm": 0.6796875, "learning_rate": 0.00024169902912621357, "loss": 0.8859, "step": 2423 }, { "epoch": 0.9793444775516388, "grad_norm": 0.5234375, "learning_rate": 0.00024167475728155337, "loss": 0.7984, "step": 2424 }, { "epoch": 0.9797484975506288, "grad_norm": 0.466796875, "learning_rate": 0.0002416504854368932, "loss": 0.8413, "step": 2425 }, { "epoch": 0.9801525175496187, "grad_norm": 0.484375, "learning_rate": 0.00024162621359223298, "loss": 0.8328, "step": 2426 }, { "epoch": 0.9805565375486086, "grad_norm": 0.474609375, "learning_rate": 0.00024160194174757278, "loss": 0.8012, "step": 2427 }, { "epoch": 0.9809605575475986, "grad_norm": 0.62890625, "learning_rate": 0.0002415776699029126, "loss": 0.7994, "step": 2428 }, { "epoch": 0.9813645775465886, "grad_norm": 0.44140625, "learning_rate": 0.0002415533980582524, "loss": 0.7204, "step": 2429 }, { "epoch": 0.9817685975455785, "grad_norm": 0.58203125, "learning_rate": 0.00024152912621359221, "loss": 0.8335, "step": 2430 }, { "epoch": 0.9821726175445684, "grad_norm": 0.478515625, "learning_rate": 0.00024150485436893202, "loss": 0.7426, "step": 2431 }, { "epoch": 0.9825766375435584, "grad_norm": 0.4453125, "learning_rate": 0.00024148058252427182, "loss": 0.7487, "step": 2432 }, { "epoch": 0.9829806575425484, "grad_norm": 0.625, "learning_rate": 0.00024145631067961165, "loss": 0.8205, "step": 2433 }, { "epoch": 0.9833846775415384, "grad_norm": 0.5390625, "learning_rate": 0.00024143203883495142, "loss": 0.9071, "step": 2434 }, { "epoch": 0.9837886975405282, "grad_norm": 0.6796875, "learning_rate": 0.00024140776699029123, "loss": 0.9609, "step": 2435 }, { "epoch": 0.9841927175395182, "grad_norm": 0.61328125, "learning_rate": 0.00024138349514563106, "loss": 0.8492, "step": 2436 }, { "epoch": 0.9845967375385082, "grad_norm": 0.5703125, "learning_rate": 0.00024135922330097086, "loss": 0.8571, "step": 2437 }, { "epoch": 0.9850007575374982, "grad_norm": 0.51171875, "learning_rate": 0.00024133495145631063, "loss": 0.8154, "step": 2438 }, { "epoch": 0.985404777536488, "grad_norm": 0.59765625, "learning_rate": 0.00024131067961165046, "loss": 0.9073, "step": 2439 }, { "epoch": 0.985808797535478, "grad_norm": 0.5546875, "learning_rate": 0.00024128640776699027, "loss": 0.823, "step": 2440 }, { "epoch": 0.986212817534468, "grad_norm": 0.44140625, "learning_rate": 0.0002412621359223301, "loss": 0.6553, "step": 2441 }, { "epoch": 0.9866168375334579, "grad_norm": 0.5625, "learning_rate": 0.0002412378640776699, "loss": 0.8678, "step": 2442 }, { "epoch": 0.9870208575324478, "grad_norm": 0.58984375, "learning_rate": 0.00024121359223300967, "loss": 0.8356, "step": 2443 }, { "epoch": 0.9874248775314378, "grad_norm": 0.462890625, "learning_rate": 0.0002411893203883495, "loss": 0.867, "step": 2444 }, { "epoch": 0.9878288975304278, "grad_norm": 0.486328125, "learning_rate": 0.0002411650485436893, "loss": 0.8268, "step": 2445 }, { "epoch": 0.9882329175294177, "grad_norm": 0.54296875, "learning_rate": 0.00024114077669902908, "loss": 0.8013, "step": 2446 }, { "epoch": 0.9886369375284076, "grad_norm": 0.5625, "learning_rate": 0.0002411165048543689, "loss": 0.9483, "step": 2447 }, { "epoch": 0.9890409575273976, "grad_norm": 0.5, "learning_rate": 0.0002410922330097087, "loss": 0.7918, "step": 2448 }, { "epoch": 0.9894449775263876, "grad_norm": 0.423828125, "learning_rate": 0.0002410679611650485, "loss": 0.7221, "step": 2449 }, { "epoch": 0.9898489975253775, "grad_norm": 0.52734375, "learning_rate": 0.00024104368932038834, "loss": 0.7196, "step": 2450 }, { "epoch": 0.9902530175243675, "grad_norm": 0.5234375, "learning_rate": 0.00024101941747572812, "loss": 0.8404, "step": 2451 }, { "epoch": 0.9906570375233574, "grad_norm": 0.54296875, "learning_rate": 0.00024099514563106795, "loss": 0.8332, "step": 2452 }, { "epoch": 0.9910610575223474, "grad_norm": 0.63671875, "learning_rate": 0.00024097087378640775, "loss": 0.8611, "step": 2453 }, { "epoch": 0.9914650775213373, "grad_norm": 0.515625, "learning_rate": 0.00024094660194174755, "loss": 0.7987, "step": 2454 }, { "epoch": 0.9918690975203273, "grad_norm": 0.5234375, "learning_rate": 0.00024092233009708738, "loss": 0.8154, "step": 2455 }, { "epoch": 0.9922731175193172, "grad_norm": 0.55859375, "learning_rate": 0.00024089805825242716, "loss": 0.8416, "step": 2456 }, { "epoch": 0.9926771375183071, "grad_norm": 0.54296875, "learning_rate": 0.00024087378640776696, "loss": 0.9035, "step": 2457 }, { "epoch": 0.9930811575172971, "grad_norm": 0.6015625, "learning_rate": 0.0002408495145631068, "loss": 0.9502, "step": 2458 }, { "epoch": 0.9934851775162871, "grad_norm": 0.47265625, "learning_rate": 0.00024082524271844656, "loss": 0.7709, "step": 2459 }, { "epoch": 0.993889197515277, "grad_norm": 0.5, "learning_rate": 0.0002408009708737864, "loss": 0.8336, "step": 2460 }, { "epoch": 0.9942932175142669, "grad_norm": 0.498046875, "learning_rate": 0.0002407766990291262, "loss": 0.8775, "step": 2461 }, { "epoch": 0.9946972375132569, "grad_norm": 0.5859375, "learning_rate": 0.000240752427184466, "loss": 0.8024, "step": 2462 }, { "epoch": 0.9951012575122469, "grad_norm": 0.5, "learning_rate": 0.00024072815533980583, "loss": 0.6933, "step": 2463 }, { "epoch": 0.9955052775112369, "grad_norm": 0.482421875, "learning_rate": 0.0002407038834951456, "loss": 0.802, "step": 2464 }, { "epoch": 0.9959092975102267, "grad_norm": 0.5703125, "learning_rate": 0.0002406796116504854, "loss": 0.9256, "step": 2465 }, { "epoch": 0.9963133175092167, "grad_norm": 0.8359375, "learning_rate": 0.00024065533980582523, "loss": 0.9415, "step": 2466 }, { "epoch": 0.9967173375082067, "grad_norm": 0.5078125, "learning_rate": 0.00024063106796116504, "loss": 0.8145, "step": 2467 }, { "epoch": 0.9971213575071967, "grad_norm": 0.50390625, "learning_rate": 0.0002406067961165048, "loss": 0.8682, "step": 2468 }, { "epoch": 0.9975253775061865, "grad_norm": 0.5546875, "learning_rate": 0.00024058252427184464, "loss": 0.903, "step": 2469 }, { "epoch": 0.9979293975051765, "grad_norm": 0.59765625, "learning_rate": 0.00024055825242718444, "loss": 0.9728, "step": 2470 }, { "epoch": 0.9983334175041665, "grad_norm": 0.5390625, "learning_rate": 0.00024053398058252427, "loss": 0.7879, "step": 2471 }, { "epoch": 0.9987374375031564, "grad_norm": 0.484375, "learning_rate": 0.00024050970873786405, "loss": 0.7909, "step": 2472 }, { "epoch": 0.9991414575021463, "grad_norm": 0.48046875, "learning_rate": 0.00024048543689320385, "loss": 0.7694, "step": 2473 }, { "epoch": 0.9995454775011363, "grad_norm": 0.55859375, "learning_rate": 0.00024046116504854368, "loss": 0.9309, "step": 2474 }, { "epoch": 0.9999494975001263, "grad_norm": 0.58203125, "learning_rate": 0.00024043689320388348, "loss": 0.8757, "step": 2475 }, { "epoch": 1.0, "grad_norm": 0.95703125, "learning_rate": 0.00024041262135922326, "loss": 0.6065, "step": 2476 }, { "epoch": 1.0, "eval_loss": 2.582169771194458, "eval_runtime": 214.302, "eval_samples_per_second": 28.66, "eval_steps_per_second": 28.66, "step": 2476 }, { "epoch": 1.0, "eval_title2sid_loss": 1.17689049243927, "eval_title2sid_runtime": 136.7525, "eval_title2sid_samples_per_second": 27.831, "eval_title2sid_steps_per_second": 27.831, "step": 2476 }, { "epoch": 1.0, "eval_title2sid_loss": 1.17689049243927, "eval_title2sid_runtime": 136.7525, "eval_title2sid_samples_per_second": 27.831, "eval_title2sid_steps_per_second": 27.831, "step": 2476 }, { "epoch": 1.0, "eval_sid2title_loss": 0.3095063865184784, "eval_sid2title_runtime": 133.7967, "eval_sid2title_samples_per_second": 28.603, "eval_sid2title_steps_per_second": 28.603, "step": 2476 }, { "epoch": 1.0, "eval_sid2title_loss": 0.3095063865184784, "eval_sid2title_runtime": 133.7967, "eval_sid2title_samples_per_second": 28.603, "eval_sid2title_steps_per_second": 28.603, "step": 2476 }, { "epoch": 1.0004040199989899, "grad_norm": 0.71484375, "learning_rate": 0.0002403883495145631, "loss": 0.7187, "step": 2477 }, { "epoch": 1.00080803999798, "grad_norm": 0.5703125, "learning_rate": 0.0002403640776699029, "loss": 0.642, "step": 2478 }, { "epoch": 1.0012120599969698, "grad_norm": 0.62109375, "learning_rate": 0.0002403398058252427, "loss": 0.677, "step": 2479 }, { "epoch": 1.0016160799959597, "grad_norm": 0.55859375, "learning_rate": 0.00024031553398058252, "loss": 0.6586, "step": 2480 }, { "epoch": 1.0020200999949498, "grad_norm": 0.7421875, "learning_rate": 0.0002402912621359223, "loss": 0.6067, "step": 2481 }, { "epoch": 1.0024241199939397, "grad_norm": 0.68359375, "learning_rate": 0.00024026699029126213, "loss": 0.6873, "step": 2482 }, { "epoch": 1.0028281399929297, "grad_norm": 0.68359375, "learning_rate": 0.00024024271844660193, "loss": 0.6084, "step": 2483 }, { "epoch": 1.0032321599919196, "grad_norm": 0.69921875, "learning_rate": 0.0002402184466019417, "loss": 0.651, "step": 2484 }, { "epoch": 1.0036361799909095, "grad_norm": 0.65234375, "learning_rate": 0.00024019417475728153, "loss": 0.715, "step": 2485 }, { "epoch": 1.0040401999898996, "grad_norm": 0.62890625, "learning_rate": 0.00024016990291262134, "loss": 0.5067, "step": 2486 }, { "epoch": 1.0044442199888894, "grad_norm": 0.498046875, "learning_rate": 0.00024014563106796114, "loss": 0.5855, "step": 2487 }, { "epoch": 1.0048482399878793, "grad_norm": 0.5, "learning_rate": 0.00024012135922330097, "loss": 0.6369, "step": 2488 }, { "epoch": 1.0052522599868694, "grad_norm": 0.51171875, "learning_rate": 0.00024009708737864074, "loss": 0.6105, "step": 2489 }, { "epoch": 1.0056562799858593, "grad_norm": 0.65625, "learning_rate": 0.00024007281553398057, "loss": 0.6978, "step": 2490 }, { "epoch": 1.0060602999848494, "grad_norm": 0.5703125, "learning_rate": 0.00024004854368932037, "loss": 0.6266, "step": 2491 }, { "epoch": 1.0064643199838392, "grad_norm": 0.52734375, "learning_rate": 0.00024002427184466018, "loss": 0.6198, "step": 2492 }, { "epoch": 1.006868339982829, "grad_norm": 0.5703125, "learning_rate": 0.00023999999999999998, "loss": 0.6427, "step": 2493 }, { "epoch": 1.0072723599818192, "grad_norm": 0.5234375, "learning_rate": 0.00023997572815533978, "loss": 0.5797, "step": 2494 }, { "epoch": 1.007676379980809, "grad_norm": 0.53515625, "learning_rate": 0.00023995145631067958, "loss": 0.6279, "step": 2495 }, { "epoch": 1.008080399979799, "grad_norm": 0.546875, "learning_rate": 0.0002399271844660194, "loss": 0.6687, "step": 2496 }, { "epoch": 1.008484419978789, "grad_norm": 0.59765625, "learning_rate": 0.0002399029126213592, "loss": 0.6112, "step": 2497 }, { "epoch": 1.0088884399777789, "grad_norm": 0.5625, "learning_rate": 0.000239878640776699, "loss": 0.6217, "step": 2498 }, { "epoch": 1.0092924599767688, "grad_norm": 0.5234375, "learning_rate": 0.00023985436893203882, "loss": 0.6079, "step": 2499 }, { "epoch": 1.0096964799757588, "grad_norm": 0.53515625, "learning_rate": 0.00023983009708737862, "loss": 0.6188, "step": 2500 }, { "epoch": 1.0101004999747487, "grad_norm": 0.48828125, "learning_rate": 0.00023980582524271845, "loss": 0.6205, "step": 2501 }, { "epoch": 1.0105045199737388, "grad_norm": 0.51953125, "learning_rate": 0.00023978155339805823, "loss": 0.624, "step": 2502 }, { "epoch": 1.0109085399727287, "grad_norm": 0.50390625, "learning_rate": 0.00023975728155339803, "loss": 0.6325, "step": 2503 }, { "epoch": 1.0113125599717185, "grad_norm": 0.53125, "learning_rate": 0.00023973300970873786, "loss": 0.6086, "step": 2504 }, { "epoch": 1.0117165799707086, "grad_norm": 0.58984375, "learning_rate": 0.00023970873786407766, "loss": 0.6182, "step": 2505 }, { "epoch": 1.0121205999696985, "grad_norm": 0.53125, "learning_rate": 0.00023968446601941744, "loss": 0.6279, "step": 2506 }, { "epoch": 1.0125246199686884, "grad_norm": 0.578125, "learning_rate": 0.00023966019417475727, "loss": 0.6523, "step": 2507 }, { "epoch": 1.0129286399676785, "grad_norm": 0.54296875, "learning_rate": 0.00023963592233009707, "loss": 0.6298, "step": 2508 }, { "epoch": 1.0133326599666683, "grad_norm": 0.53515625, "learning_rate": 0.00023961165048543684, "loss": 0.6395, "step": 2509 }, { "epoch": 1.0137366799656582, "grad_norm": 0.61328125, "learning_rate": 0.00023958737864077667, "loss": 0.6636, "step": 2510 }, { "epoch": 1.0141406999646483, "grad_norm": 0.58203125, "learning_rate": 0.00023956310679611648, "loss": 0.6637, "step": 2511 }, { "epoch": 1.0145447199636382, "grad_norm": 0.5703125, "learning_rate": 0.0002395388349514563, "loss": 0.7063, "step": 2512 }, { "epoch": 1.0149487399626282, "grad_norm": 0.48828125, "learning_rate": 0.0002395145631067961, "loss": 0.6278, "step": 2513 }, { "epoch": 1.0153527599616181, "grad_norm": 0.55078125, "learning_rate": 0.00023949029126213588, "loss": 0.6792, "step": 2514 }, { "epoch": 1.015756779960608, "grad_norm": 0.53125, "learning_rate": 0.0002394660194174757, "loss": 0.5815, "step": 2515 }, { "epoch": 1.016160799959598, "grad_norm": 0.52734375, "learning_rate": 0.00023944174757281551, "loss": 0.6637, "step": 2516 }, { "epoch": 1.016564819958588, "grad_norm": 0.431640625, "learning_rate": 0.00023941747572815532, "loss": 0.5477, "step": 2517 }, { "epoch": 1.0169688399575778, "grad_norm": 0.5, "learning_rate": 0.00023939320388349515, "loss": 0.5707, "step": 2518 }, { "epoch": 1.017372859956568, "grad_norm": 0.6171875, "learning_rate": 0.00023936893203883492, "loss": 0.6742, "step": 2519 }, { "epoch": 1.0177768799555578, "grad_norm": 0.83984375, "learning_rate": 0.00023934466019417472, "loss": 0.717, "step": 2520 }, { "epoch": 1.0181808999545479, "grad_norm": 0.5859375, "learning_rate": 0.00023932038834951455, "loss": 0.6633, "step": 2521 }, { "epoch": 1.0185849199535377, "grad_norm": 0.62109375, "learning_rate": 0.00023929611650485433, "loss": 0.6604, "step": 2522 }, { "epoch": 1.0189889399525276, "grad_norm": 0.6015625, "learning_rate": 0.00023927184466019416, "loss": 0.6839, "step": 2523 }, { "epoch": 1.0193929599515177, "grad_norm": 0.53515625, "learning_rate": 0.00023924757281553396, "loss": 0.6571, "step": 2524 }, { "epoch": 1.0197969799505076, "grad_norm": 0.5234375, "learning_rate": 0.00023922330097087376, "loss": 0.6464, "step": 2525 }, { "epoch": 1.0202009999494974, "grad_norm": 0.71484375, "learning_rate": 0.0002391990291262136, "loss": 0.6756, "step": 2526 }, { "epoch": 1.0206050199484875, "grad_norm": 0.5625, "learning_rate": 0.00023917475728155337, "loss": 0.6615, "step": 2527 }, { "epoch": 1.0210090399474774, "grad_norm": 0.55859375, "learning_rate": 0.00023915048543689317, "loss": 0.6513, "step": 2528 }, { "epoch": 1.0214130599464672, "grad_norm": 0.61328125, "learning_rate": 0.000239126213592233, "loss": 0.6333, "step": 2529 }, { "epoch": 1.0218170799454573, "grad_norm": 0.5234375, "learning_rate": 0.0002391019417475728, "loss": 0.596, "step": 2530 }, { "epoch": 1.0222210999444472, "grad_norm": 0.453125, "learning_rate": 0.0002390776699029126, "loss": 0.5896, "step": 2531 }, { "epoch": 1.0226251199434373, "grad_norm": 0.6015625, "learning_rate": 0.0002390533980582524, "loss": 0.7025, "step": 2532 }, { "epoch": 1.0230291399424272, "grad_norm": 0.609375, "learning_rate": 0.0002390291262135922, "loss": 0.6333, "step": 2533 }, { "epoch": 1.023433159941417, "grad_norm": 0.609375, "learning_rate": 0.00023900485436893204, "loss": 0.6266, "step": 2534 }, { "epoch": 1.0238371799404071, "grad_norm": 0.458984375, "learning_rate": 0.0002389805825242718, "loss": 0.5724, "step": 2535 }, { "epoch": 1.024241199939397, "grad_norm": 0.5390625, "learning_rate": 0.00023895631067961162, "loss": 0.644, "step": 2536 }, { "epoch": 1.0246452199383869, "grad_norm": 0.5703125, "learning_rate": 0.00023893203883495144, "loss": 0.6339, "step": 2537 }, { "epoch": 1.025049239937377, "grad_norm": 0.68359375, "learning_rate": 0.00023890776699029125, "loss": 0.6635, "step": 2538 }, { "epoch": 1.0254532599363668, "grad_norm": 0.546875, "learning_rate": 0.00023888349514563102, "loss": 0.6459, "step": 2539 }, { "epoch": 1.0258572799353567, "grad_norm": 0.482421875, "learning_rate": 0.00023885922330097085, "loss": 0.6353, "step": 2540 }, { "epoch": 1.0262612999343468, "grad_norm": 1.078125, "learning_rate": 0.00023883495145631065, "loss": 0.7637, "step": 2541 }, { "epoch": 1.0266653199333367, "grad_norm": 0.63671875, "learning_rate": 0.00023881067961165048, "loss": 0.6834, "step": 2542 }, { "epoch": 1.0270693399323267, "grad_norm": 0.486328125, "learning_rate": 0.00023878640776699029, "loss": 0.6115, "step": 2543 }, { "epoch": 1.0274733599313166, "grad_norm": 0.53515625, "learning_rate": 0.00023876213592233006, "loss": 0.6358, "step": 2544 }, { "epoch": 1.0278773799303065, "grad_norm": 0.6484375, "learning_rate": 0.0002387378640776699, "loss": 0.7311, "step": 2545 }, { "epoch": 1.0282813999292966, "grad_norm": 0.486328125, "learning_rate": 0.0002387135922330097, "loss": 0.6402, "step": 2546 }, { "epoch": 1.0286854199282864, "grad_norm": 0.5390625, "learning_rate": 0.00023868932038834947, "loss": 0.6095, "step": 2547 }, { "epoch": 1.0290894399272763, "grad_norm": 0.5390625, "learning_rate": 0.0002386650485436893, "loss": 0.5385, "step": 2548 }, { "epoch": 1.0294934599262664, "grad_norm": 0.5390625, "learning_rate": 0.0002386407766990291, "loss": 0.636, "step": 2549 }, { "epoch": 1.0298974799252563, "grad_norm": 0.6953125, "learning_rate": 0.0002386165048543689, "loss": 0.6655, "step": 2550 }, { "epoch": 1.0303014999242464, "grad_norm": 0.5703125, "learning_rate": 0.00023859223300970873, "loss": 0.7073, "step": 2551 }, { "epoch": 1.0307055199232362, "grad_norm": 0.54296875, "learning_rate": 0.0002385679611650485, "loss": 0.614, "step": 2552 }, { "epoch": 1.031109539922226, "grad_norm": 0.71484375, "learning_rate": 0.00023854368932038834, "loss": 0.729, "step": 2553 }, { "epoch": 1.0315135599212162, "grad_norm": 0.59375, "learning_rate": 0.00023851941747572814, "loss": 0.6979, "step": 2554 }, { "epoch": 1.031917579920206, "grad_norm": 0.546875, "learning_rate": 0.00023849514563106794, "loss": 0.6647, "step": 2555 }, { "epoch": 1.032321599919196, "grad_norm": 0.8359375, "learning_rate": 0.00023847087378640774, "loss": 0.8056, "step": 2556 }, { "epoch": 1.032725619918186, "grad_norm": 0.49609375, "learning_rate": 0.00023844660194174755, "loss": 0.6144, "step": 2557 }, { "epoch": 1.0331296399171759, "grad_norm": 0.61328125, "learning_rate": 0.00023842233009708735, "loss": 0.5566, "step": 2558 }, { "epoch": 1.0335336599161657, "grad_norm": 0.58203125, "learning_rate": 0.00023839805825242718, "loss": 0.5884, "step": 2559 }, { "epoch": 1.0339376799151558, "grad_norm": 0.50390625, "learning_rate": 0.00023837378640776695, "loss": 0.6065, "step": 2560 }, { "epoch": 1.0343416999141457, "grad_norm": 0.66796875, "learning_rate": 0.00023834951456310678, "loss": 0.6614, "step": 2561 }, { "epoch": 1.0347457199131358, "grad_norm": 0.5234375, "learning_rate": 0.00023832524271844658, "loss": 0.6355, "step": 2562 }, { "epoch": 1.0351497399121257, "grad_norm": 0.546875, "learning_rate": 0.0002383009708737864, "loss": 0.6912, "step": 2563 }, { "epoch": 1.0355537599111155, "grad_norm": 0.47265625, "learning_rate": 0.00023827669902912622, "loss": 0.5746, "step": 2564 }, { "epoch": 1.0359577799101056, "grad_norm": 0.51953125, "learning_rate": 0.000238252427184466, "loss": 0.6522, "step": 2565 }, { "epoch": 1.0363617999090955, "grad_norm": 0.546875, "learning_rate": 0.0002382281553398058, "loss": 0.6347, "step": 2566 }, { "epoch": 1.0367658199080854, "grad_norm": 0.486328125, "learning_rate": 0.00023820388349514562, "loss": 0.6004, "step": 2567 }, { "epoch": 1.0371698399070755, "grad_norm": 0.474609375, "learning_rate": 0.00023817961165048543, "loss": 0.5656, "step": 2568 }, { "epoch": 1.0375738599060653, "grad_norm": 0.56640625, "learning_rate": 0.0002381553398058252, "loss": 0.6058, "step": 2569 }, { "epoch": 1.0379778799050552, "grad_norm": 1.546875, "learning_rate": 0.00023813106796116503, "loss": 0.6526, "step": 2570 }, { "epoch": 1.0383818999040453, "grad_norm": 0.63671875, "learning_rate": 0.00023810679611650483, "loss": 0.663, "step": 2571 }, { "epoch": 1.0387859199030351, "grad_norm": 0.49609375, "learning_rate": 0.00023808252427184466, "loss": 0.5645, "step": 2572 }, { "epoch": 1.0391899399020252, "grad_norm": 0.515625, "learning_rate": 0.00023805825242718444, "loss": 0.6588, "step": 2573 }, { "epoch": 1.039593959901015, "grad_norm": 0.66015625, "learning_rate": 0.00023803398058252424, "loss": 0.6525, "step": 2574 }, { "epoch": 1.039997979900005, "grad_norm": 0.54296875, "learning_rate": 0.00023800970873786407, "loss": 0.6371, "step": 2575 }, { "epoch": 1.040401999898995, "grad_norm": 0.67578125, "learning_rate": 0.00023798543689320387, "loss": 0.6408, "step": 2576 }, { "epoch": 1.040806019897985, "grad_norm": 0.58203125, "learning_rate": 0.00023796116504854365, "loss": 0.6807, "step": 2577 }, { "epoch": 1.0412100398969748, "grad_norm": 0.58984375, "learning_rate": 0.00023793689320388348, "loss": 0.6741, "step": 2578 }, { "epoch": 1.041614059895965, "grad_norm": 0.56640625, "learning_rate": 0.00023791262135922328, "loss": 0.6421, "step": 2579 }, { "epoch": 1.0420180798949548, "grad_norm": 0.5625, "learning_rate": 0.00023788834951456308, "loss": 0.6752, "step": 2580 }, { "epoch": 1.0424220998939449, "grad_norm": 0.58984375, "learning_rate": 0.0002378640776699029, "loss": 0.6381, "step": 2581 }, { "epoch": 1.0428261198929347, "grad_norm": 0.54296875, "learning_rate": 0.00023783980582524269, "loss": 0.5701, "step": 2582 }, { "epoch": 1.0432301398919246, "grad_norm": 0.5703125, "learning_rate": 0.00023781553398058251, "loss": 0.6297, "step": 2583 }, { "epoch": 1.0436341598909147, "grad_norm": 0.703125, "learning_rate": 0.00023779126213592232, "loss": 0.7234, "step": 2584 }, { "epoch": 1.0440381798899045, "grad_norm": 0.5078125, "learning_rate": 0.0002377669902912621, "loss": 0.593, "step": 2585 }, { "epoch": 1.0444421998888944, "grad_norm": 0.5390625, "learning_rate": 0.00023774271844660192, "loss": 0.6959, "step": 2586 }, { "epoch": 1.0448462198878845, "grad_norm": 0.54296875, "learning_rate": 0.00023771844660194172, "loss": 0.6133, "step": 2587 }, { "epoch": 1.0452502398868744, "grad_norm": 0.57421875, "learning_rate": 0.00023769417475728153, "loss": 0.6307, "step": 2588 }, { "epoch": 1.0456542598858642, "grad_norm": 0.48828125, "learning_rate": 0.00023766990291262136, "loss": 0.5794, "step": 2589 }, { "epoch": 1.0460582798848543, "grad_norm": 0.75390625, "learning_rate": 0.00023764563106796113, "loss": 0.6444, "step": 2590 }, { "epoch": 1.0464622998838442, "grad_norm": 0.490234375, "learning_rate": 0.00023762135922330096, "loss": 0.6009, "step": 2591 }, { "epoch": 1.0468663198828343, "grad_norm": 0.546875, "learning_rate": 0.00023759708737864076, "loss": 0.6135, "step": 2592 }, { "epoch": 1.0472703398818242, "grad_norm": 0.59765625, "learning_rate": 0.00023757281553398057, "loss": 0.6476, "step": 2593 }, { "epoch": 1.047674359880814, "grad_norm": 0.57421875, "learning_rate": 0.00023754854368932037, "loss": 0.6628, "step": 2594 }, { "epoch": 1.0480783798798041, "grad_norm": 0.482421875, "learning_rate": 0.00023752427184466017, "loss": 0.6309, "step": 2595 }, { "epoch": 1.048482399878794, "grad_norm": 0.5390625, "learning_rate": 0.00023749999999999997, "loss": 0.6709, "step": 2596 }, { "epoch": 1.0488864198777839, "grad_norm": 0.5703125, "learning_rate": 0.0002374757281553398, "loss": 0.6564, "step": 2597 }, { "epoch": 1.049290439876774, "grad_norm": 0.462890625, "learning_rate": 0.00023745145631067958, "loss": 0.5685, "step": 2598 }, { "epoch": 1.0496944598757638, "grad_norm": 0.56640625, "learning_rate": 0.00023742718446601938, "loss": 0.6169, "step": 2599 }, { "epoch": 1.050098479874754, "grad_norm": 0.447265625, "learning_rate": 0.0002374029126213592, "loss": 0.6011, "step": 2600 }, { "epoch": 1.0505024998737438, "grad_norm": 0.6015625, "learning_rate": 0.000237378640776699, "loss": 0.6913, "step": 2601 }, { "epoch": 1.0509065198727336, "grad_norm": 0.52734375, "learning_rate": 0.00023735436893203884, "loss": 0.5935, "step": 2602 }, { "epoch": 1.0513105398717237, "grad_norm": 0.490234375, "learning_rate": 0.00023733009708737862, "loss": 0.5769, "step": 2603 }, { "epoch": 1.0517145598707136, "grad_norm": 0.75, "learning_rate": 0.00023730582524271842, "loss": 0.6944, "step": 2604 }, { "epoch": 1.0521185798697035, "grad_norm": 0.57421875, "learning_rate": 0.00023728155339805825, "loss": 0.6961, "step": 2605 }, { "epoch": 1.0525225998686936, "grad_norm": 0.63671875, "learning_rate": 0.00023725728155339805, "loss": 0.6464, "step": 2606 }, { "epoch": 1.0529266198676834, "grad_norm": 0.6328125, "learning_rate": 0.00023723300970873783, "loss": 0.6172, "step": 2607 }, { "epoch": 1.0533306398666733, "grad_norm": 0.53515625, "learning_rate": 0.00023720873786407765, "loss": 0.6273, "step": 2608 }, { "epoch": 1.0537346598656634, "grad_norm": 0.62890625, "learning_rate": 0.00023718446601941746, "loss": 0.6325, "step": 2609 }, { "epoch": 1.0541386798646533, "grad_norm": 0.462890625, "learning_rate": 0.00023716019417475723, "loss": 0.5675, "step": 2610 }, { "epoch": 1.0545426998636434, "grad_norm": 0.55859375, "learning_rate": 0.00023713592233009706, "loss": 0.6156, "step": 2611 }, { "epoch": 1.0549467198626332, "grad_norm": 0.515625, "learning_rate": 0.00023711165048543686, "loss": 0.6076, "step": 2612 }, { "epoch": 1.055350739861623, "grad_norm": 0.53515625, "learning_rate": 0.0002370873786407767, "loss": 0.6973, "step": 2613 }, { "epoch": 1.0557547598606132, "grad_norm": 0.53515625, "learning_rate": 0.0002370631067961165, "loss": 0.5916, "step": 2614 }, { "epoch": 1.056158779859603, "grad_norm": 0.53125, "learning_rate": 0.00023703883495145627, "loss": 0.601, "step": 2615 }, { "epoch": 1.056562799858593, "grad_norm": 0.60546875, "learning_rate": 0.0002370145631067961, "loss": 0.6077, "step": 2616 }, { "epoch": 1.056966819857583, "grad_norm": 0.58203125, "learning_rate": 0.0002369902912621359, "loss": 0.6046, "step": 2617 }, { "epoch": 1.0573708398565729, "grad_norm": 0.57421875, "learning_rate": 0.0002369660194174757, "loss": 0.6556, "step": 2618 }, { "epoch": 1.0577748598555627, "grad_norm": 0.609375, "learning_rate": 0.00023694174757281553, "loss": 0.6414, "step": 2619 }, { "epoch": 1.0581788798545528, "grad_norm": 0.60546875, "learning_rate": 0.0002369174757281553, "loss": 0.6256, "step": 2620 }, { "epoch": 1.0585828998535427, "grad_norm": 0.4921875, "learning_rate": 0.0002368932038834951, "loss": 0.6229, "step": 2621 }, { "epoch": 1.0589869198525328, "grad_norm": 0.490234375, "learning_rate": 0.00023686893203883494, "loss": 0.629, "step": 2622 }, { "epoch": 1.0593909398515227, "grad_norm": 0.65234375, "learning_rate": 0.00023684466019417472, "loss": 0.5684, "step": 2623 }, { "epoch": 1.0597949598505125, "grad_norm": 0.51171875, "learning_rate": 0.00023682038834951455, "loss": 0.6184, "step": 2624 }, { "epoch": 1.0601989798495026, "grad_norm": 0.67578125, "learning_rate": 0.00023679611650485435, "loss": 0.6569, "step": 2625 }, { "epoch": 1.0606029998484925, "grad_norm": 0.5390625, "learning_rate": 0.00023677184466019415, "loss": 0.6273, "step": 2626 }, { "epoch": 1.0610070198474824, "grad_norm": 0.474609375, "learning_rate": 0.00023674757281553398, "loss": 0.6063, "step": 2627 }, { "epoch": 1.0614110398464724, "grad_norm": 0.53125, "learning_rate": 0.00023672330097087376, "loss": 0.591, "step": 2628 }, { "epoch": 1.0618150598454623, "grad_norm": 0.462890625, "learning_rate": 0.00023669902912621356, "loss": 0.571, "step": 2629 }, { "epoch": 1.0622190798444524, "grad_norm": 0.546875, "learning_rate": 0.0002366747572815534, "loss": 0.6736, "step": 2630 }, { "epoch": 1.0626230998434423, "grad_norm": 0.5390625, "learning_rate": 0.0002366504854368932, "loss": 0.6567, "step": 2631 }, { "epoch": 1.0630271198424321, "grad_norm": 0.625, "learning_rate": 0.000236626213592233, "loss": 0.6736, "step": 2632 }, { "epoch": 1.0634311398414222, "grad_norm": 0.51171875, "learning_rate": 0.0002366019417475728, "loss": 0.6682, "step": 2633 }, { "epoch": 1.063835159840412, "grad_norm": 0.4296875, "learning_rate": 0.0002365776699029126, "loss": 0.5849, "step": 2634 }, { "epoch": 1.064239179839402, "grad_norm": 0.48828125, "learning_rate": 0.00023655339805825243, "loss": 0.598, "step": 2635 }, { "epoch": 1.064643199838392, "grad_norm": 0.5859375, "learning_rate": 0.0002365291262135922, "loss": 0.6548, "step": 2636 }, { "epoch": 1.065047219837382, "grad_norm": 0.671875, "learning_rate": 0.000236504854368932, "loss": 0.6975, "step": 2637 }, { "epoch": 1.0654512398363718, "grad_norm": 0.5859375, "learning_rate": 0.00023648058252427183, "loss": 0.6769, "step": 2638 }, { "epoch": 1.065855259835362, "grad_norm": 0.6015625, "learning_rate": 0.00023645631067961164, "loss": 0.6746, "step": 2639 }, { "epoch": 1.0662592798343518, "grad_norm": 0.50390625, "learning_rate": 0.0002364320388349514, "loss": 0.5758, "step": 2640 }, { "epoch": 1.0666632998333418, "grad_norm": 0.5078125, "learning_rate": 0.00023640776699029124, "loss": 0.5712, "step": 2641 }, { "epoch": 1.0670673198323317, "grad_norm": 0.75, "learning_rate": 0.00023638349514563104, "loss": 0.7304, "step": 2642 }, { "epoch": 1.0674713398313216, "grad_norm": 0.65625, "learning_rate": 0.00023635922330097087, "loss": 0.6654, "step": 2643 }, { "epoch": 1.0678753598303117, "grad_norm": 0.55078125, "learning_rate": 0.00023633495145631067, "loss": 0.7117, "step": 2644 }, { "epoch": 1.0682793798293015, "grad_norm": 0.54296875, "learning_rate": 0.00023631067961165045, "loss": 0.6652, "step": 2645 }, { "epoch": 1.0686833998282914, "grad_norm": 1.0625, "learning_rate": 0.00023628640776699028, "loss": 0.8525, "step": 2646 }, { "epoch": 1.0690874198272815, "grad_norm": 0.53515625, "learning_rate": 0.00023626213592233008, "loss": 0.5976, "step": 2647 }, { "epoch": 1.0694914398262714, "grad_norm": 0.5, "learning_rate": 0.00023623786407766986, "loss": 0.567, "step": 2648 }, { "epoch": 1.0698954598252612, "grad_norm": 0.5390625, "learning_rate": 0.00023621359223300969, "loss": 0.6012, "step": 2649 }, { "epoch": 1.0702994798242513, "grad_norm": 0.65234375, "learning_rate": 0.0002361893203883495, "loss": 0.7015, "step": 2650 }, { "epoch": 1.0707034998232412, "grad_norm": 0.51171875, "learning_rate": 0.0002361650485436893, "loss": 0.6033, "step": 2651 }, { "epoch": 1.0711075198222313, "grad_norm": 0.5703125, "learning_rate": 0.00023614077669902912, "loss": 0.6318, "step": 2652 }, { "epoch": 1.0715115398212212, "grad_norm": 0.51953125, "learning_rate": 0.0002361165048543689, "loss": 0.6655, "step": 2653 }, { "epoch": 1.071915559820211, "grad_norm": 0.5078125, "learning_rate": 0.00023609223300970873, "loss": 0.5778, "step": 2654 }, { "epoch": 1.0723195798192011, "grad_norm": 0.48046875, "learning_rate": 0.00023606796116504853, "loss": 0.5438, "step": 2655 }, { "epoch": 1.072723599818191, "grad_norm": 0.51953125, "learning_rate": 0.00023604368932038833, "loss": 0.6356, "step": 2656 }, { "epoch": 1.0731276198171809, "grad_norm": 0.54296875, "learning_rate": 0.00023601941747572813, "loss": 0.6455, "step": 2657 }, { "epoch": 1.073531639816171, "grad_norm": 0.796875, "learning_rate": 0.00023599514563106793, "loss": 0.5962, "step": 2658 }, { "epoch": 1.0739356598151608, "grad_norm": 0.466796875, "learning_rate": 0.00023597087378640774, "loss": 0.5791, "step": 2659 }, { "epoch": 1.074339679814151, "grad_norm": 0.546875, "learning_rate": 0.00023594660194174757, "loss": 0.5814, "step": 2660 }, { "epoch": 1.0747436998131408, "grad_norm": 0.49609375, "learning_rate": 0.00023592233009708734, "loss": 0.6439, "step": 2661 }, { "epoch": 1.0751477198121306, "grad_norm": 0.6171875, "learning_rate": 0.00023589805825242717, "loss": 0.645, "step": 2662 }, { "epoch": 1.0755517398111207, "grad_norm": 0.453125, "learning_rate": 0.00023587378640776697, "loss": 0.5496, "step": 2663 }, { "epoch": 1.0759557598101106, "grad_norm": 0.5234375, "learning_rate": 0.00023584951456310678, "loss": 0.604, "step": 2664 }, { "epoch": 1.0763597798091005, "grad_norm": 0.6328125, "learning_rate": 0.0002358252427184466, "loss": 0.6551, "step": 2665 }, { "epoch": 1.0767637998080906, "grad_norm": 0.53515625, "learning_rate": 0.00023580097087378638, "loss": 0.7027, "step": 2666 }, { "epoch": 1.0771678198070804, "grad_norm": 0.55078125, "learning_rate": 0.00023577669902912618, "loss": 0.6348, "step": 2667 }, { "epoch": 1.0775718398060703, "grad_norm": 0.6015625, "learning_rate": 0.000235752427184466, "loss": 0.6735, "step": 2668 }, { "epoch": 1.0779758598050604, "grad_norm": 0.5390625, "learning_rate": 0.00023572815533980581, "loss": 0.6474, "step": 2669 }, { "epoch": 1.0783798798040503, "grad_norm": 0.59765625, "learning_rate": 0.0002357038834951456, "loss": 0.6253, "step": 2670 }, { "epoch": 1.0787838998030403, "grad_norm": 0.6171875, "learning_rate": 0.00023567961165048542, "loss": 0.6302, "step": 2671 }, { "epoch": 1.0791879198020302, "grad_norm": 0.54296875, "learning_rate": 0.00023565533980582522, "loss": 0.6045, "step": 2672 }, { "epoch": 1.07959193980102, "grad_norm": 0.54296875, "learning_rate": 0.00023563106796116505, "loss": 0.6319, "step": 2673 }, { "epoch": 1.0799959598000102, "grad_norm": 0.54296875, "learning_rate": 0.00023560679611650483, "loss": 0.677, "step": 2674 }, { "epoch": 1.080399979799, "grad_norm": 0.75390625, "learning_rate": 0.00023558252427184463, "loss": 0.7009, "step": 2675 }, { "epoch": 1.08080399979799, "grad_norm": 0.49609375, "learning_rate": 0.00023555825242718446, "loss": 0.5265, "step": 2676 }, { "epoch": 1.08120801979698, "grad_norm": 0.466796875, "learning_rate": 0.00023553398058252426, "loss": 0.5982, "step": 2677 }, { "epoch": 1.0816120397959699, "grad_norm": 0.5234375, "learning_rate": 0.00023550970873786404, "loss": 0.6513, "step": 2678 }, { "epoch": 1.08201605979496, "grad_norm": 0.61328125, "learning_rate": 0.00023548543689320386, "loss": 0.6917, "step": 2679 }, { "epoch": 1.0824200797939498, "grad_norm": 0.5546875, "learning_rate": 0.00023546116504854367, "loss": 0.6639, "step": 2680 }, { "epoch": 1.0828240997929397, "grad_norm": 0.66796875, "learning_rate": 0.00023543689320388347, "loss": 0.7033, "step": 2681 }, { "epoch": 1.0832281197919298, "grad_norm": 0.75, "learning_rate": 0.0002354126213592233, "loss": 0.7631, "step": 2682 }, { "epoch": 1.0836321397909197, "grad_norm": 0.55078125, "learning_rate": 0.00023538834951456307, "loss": 0.6212, "step": 2683 }, { "epoch": 1.0840361597899095, "grad_norm": 0.5859375, "learning_rate": 0.0002353640776699029, "loss": 0.5849, "step": 2684 }, { "epoch": 1.0844401797888996, "grad_norm": 0.61328125, "learning_rate": 0.0002353398058252427, "loss": 0.6638, "step": 2685 }, { "epoch": 1.0848441997878895, "grad_norm": 1.6171875, "learning_rate": 0.00023531553398058248, "loss": 0.7474, "step": 2686 }, { "epoch": 1.0852482197868794, "grad_norm": 0.53125, "learning_rate": 0.0002352912621359223, "loss": 0.6747, "step": 2687 }, { "epoch": 1.0856522397858694, "grad_norm": 0.52734375, "learning_rate": 0.0002352669902912621, "loss": 0.6325, "step": 2688 }, { "epoch": 1.0860562597848593, "grad_norm": 0.55859375, "learning_rate": 0.00023524271844660192, "loss": 0.5817, "step": 2689 }, { "epoch": 1.0864602797838494, "grad_norm": 0.515625, "learning_rate": 0.00023521844660194174, "loss": 0.663, "step": 2690 }, { "epoch": 1.0868642997828393, "grad_norm": 0.5234375, "learning_rate": 0.00023519417475728152, "loss": 0.5802, "step": 2691 }, { "epoch": 1.0872683197818291, "grad_norm": 0.63671875, "learning_rate": 0.00023516990291262135, "loss": 0.5979, "step": 2692 }, { "epoch": 1.0876723397808192, "grad_norm": 0.6640625, "learning_rate": 0.00023514563106796115, "loss": 0.6811, "step": 2693 }, { "epoch": 1.088076359779809, "grad_norm": 0.458984375, "learning_rate": 0.00023512135922330095, "loss": 0.6143, "step": 2694 }, { "epoch": 1.088480379778799, "grad_norm": 0.455078125, "learning_rate": 0.00023509708737864076, "loss": 0.5903, "step": 2695 }, { "epoch": 1.088884399777789, "grad_norm": 0.5078125, "learning_rate": 0.00023507281553398056, "loss": 0.6265, "step": 2696 }, { "epoch": 1.089288419776779, "grad_norm": 0.6484375, "learning_rate": 0.00023504854368932036, "loss": 0.6929, "step": 2697 }, { "epoch": 1.089692439775769, "grad_norm": 0.5625, "learning_rate": 0.0002350242718446602, "loss": 0.6229, "step": 2698 }, { "epoch": 1.0900964597747589, "grad_norm": 0.5390625, "learning_rate": 0.00023499999999999997, "loss": 0.6617, "step": 2699 }, { "epoch": 1.0905004797737488, "grad_norm": 0.52734375, "learning_rate": 0.00023497572815533977, "loss": 0.6148, "step": 2700 }, { "epoch": 1.0909044997727388, "grad_norm": 0.62890625, "learning_rate": 0.0002349514563106796, "loss": 0.6203, "step": 2701 }, { "epoch": 1.0913085197717287, "grad_norm": 0.60546875, "learning_rate": 0.0002349271844660194, "loss": 0.6503, "step": 2702 }, { "epoch": 1.0917125397707186, "grad_norm": 0.54296875, "learning_rate": 0.00023490291262135923, "loss": 0.6631, "step": 2703 }, { "epoch": 1.0921165597697087, "grad_norm": 0.56640625, "learning_rate": 0.000234878640776699, "loss": 0.6746, "step": 2704 }, { "epoch": 1.0925205797686985, "grad_norm": 0.609375, "learning_rate": 0.0002348543689320388, "loss": 0.5825, "step": 2705 }, { "epoch": 1.0929245997676884, "grad_norm": 0.59765625, "learning_rate": 0.00023483009708737864, "loss": 0.6053, "step": 2706 }, { "epoch": 1.0933286197666785, "grad_norm": 0.66015625, "learning_rate": 0.00023480582524271844, "loss": 0.6552, "step": 2707 }, { "epoch": 1.0937326397656684, "grad_norm": 0.57421875, "learning_rate": 0.00023478155339805821, "loss": 0.6709, "step": 2708 }, { "epoch": 1.0941366597646582, "grad_norm": 0.5546875, "learning_rate": 0.00023475728155339804, "loss": 0.6604, "step": 2709 }, { "epoch": 1.0945406797636483, "grad_norm": 0.56640625, "learning_rate": 0.00023473300970873785, "loss": 0.6674, "step": 2710 }, { "epoch": 1.0949446997626382, "grad_norm": 0.68359375, "learning_rate": 0.00023470873786407762, "loss": 0.7394, "step": 2711 }, { "epoch": 1.0953487197616283, "grad_norm": 0.451171875, "learning_rate": 0.00023468446601941745, "loss": 0.6232, "step": 2712 }, { "epoch": 1.0957527397606182, "grad_norm": 0.51171875, "learning_rate": 0.00023466019417475725, "loss": 0.5744, "step": 2713 }, { "epoch": 1.096156759759608, "grad_norm": 0.59765625, "learning_rate": 0.00023463592233009708, "loss": 0.5881, "step": 2714 }, { "epoch": 1.0965607797585981, "grad_norm": 0.671875, "learning_rate": 0.00023461165048543688, "loss": 0.5816, "step": 2715 }, { "epoch": 1.096964799757588, "grad_norm": 0.5546875, "learning_rate": 0.00023458737864077666, "loss": 0.6, "step": 2716 }, { "epoch": 1.0973688197565779, "grad_norm": 0.51953125, "learning_rate": 0.0002345631067961165, "loss": 0.6562, "step": 2717 }, { "epoch": 1.097772839755568, "grad_norm": 0.72265625, "learning_rate": 0.0002345388349514563, "loss": 0.6439, "step": 2718 }, { "epoch": 1.0981768597545578, "grad_norm": 0.58203125, "learning_rate": 0.0002345145631067961, "loss": 0.6526, "step": 2719 }, { "epoch": 1.098580879753548, "grad_norm": 0.59765625, "learning_rate": 0.0002344902912621359, "loss": 0.6213, "step": 2720 }, { "epoch": 1.0989848997525378, "grad_norm": 0.5, "learning_rate": 0.0002344660194174757, "loss": 0.6009, "step": 2721 }, { "epoch": 1.0993889197515276, "grad_norm": 0.578125, "learning_rate": 0.0002344417475728155, "loss": 0.6478, "step": 2722 }, { "epoch": 1.0997929397505177, "grad_norm": 0.55859375, "learning_rate": 0.00023441747572815533, "loss": 0.5254, "step": 2723 }, { "epoch": 1.1001969597495076, "grad_norm": 0.625, "learning_rate": 0.0002343932038834951, "loss": 0.6446, "step": 2724 }, { "epoch": 1.1006009797484975, "grad_norm": 0.55859375, "learning_rate": 0.00023436893203883494, "loss": 0.6376, "step": 2725 }, { "epoch": 1.1010049997474876, "grad_norm": 0.6875, "learning_rate": 0.00023434466019417474, "loss": 0.7427, "step": 2726 }, { "epoch": 1.1014090197464774, "grad_norm": 0.57421875, "learning_rate": 0.00023432038834951454, "loss": 0.6072, "step": 2727 }, { "epoch": 1.1018130397454673, "grad_norm": 0.58984375, "learning_rate": 0.00023429611650485437, "loss": 0.7211, "step": 2728 }, { "epoch": 1.1022170597444574, "grad_norm": 0.6015625, "learning_rate": 0.00023427184466019414, "loss": 0.6538, "step": 2729 }, { "epoch": 1.1026210797434473, "grad_norm": 0.466796875, "learning_rate": 0.00023424757281553395, "loss": 0.6543, "step": 2730 }, { "epoch": 1.1030250997424373, "grad_norm": 0.66796875, "learning_rate": 0.00023422330097087378, "loss": 0.6772, "step": 2731 }, { "epoch": 1.1034291197414272, "grad_norm": 0.64453125, "learning_rate": 0.00023419902912621358, "loss": 0.6434, "step": 2732 }, { "epoch": 1.103833139740417, "grad_norm": 0.474609375, "learning_rate": 0.00023417475728155338, "loss": 0.5985, "step": 2733 }, { "epoch": 1.1042371597394072, "grad_norm": 0.6328125, "learning_rate": 0.00023415048543689318, "loss": 0.5976, "step": 2734 }, { "epoch": 1.104641179738397, "grad_norm": 0.546875, "learning_rate": 0.00023412621359223299, "loss": 0.5814, "step": 2735 }, { "epoch": 1.105045199737387, "grad_norm": 0.51953125, "learning_rate": 0.00023410194174757282, "loss": 0.6047, "step": 2736 }, { "epoch": 1.105449219736377, "grad_norm": 0.578125, "learning_rate": 0.0002340776699029126, "loss": 0.5981, "step": 2737 }, { "epoch": 1.1058532397353669, "grad_norm": 0.58203125, "learning_rate": 0.0002340533980582524, "loss": 0.6162, "step": 2738 }, { "epoch": 1.106257259734357, "grad_norm": 0.51171875, "learning_rate": 0.00023402912621359222, "loss": 0.6092, "step": 2739 }, { "epoch": 1.1066612797333468, "grad_norm": 0.59765625, "learning_rate": 0.00023400485436893202, "loss": 0.5619, "step": 2740 }, { "epoch": 1.1070652997323367, "grad_norm": 0.72265625, "learning_rate": 0.0002339805825242718, "loss": 0.6803, "step": 2741 }, { "epoch": 1.1074693197313268, "grad_norm": 0.498046875, "learning_rate": 0.00023395631067961163, "loss": 0.6158, "step": 2742 }, { "epoch": 1.1078733397303167, "grad_norm": 0.62109375, "learning_rate": 0.00023393203883495143, "loss": 0.7064, "step": 2743 }, { "epoch": 1.1082773597293065, "grad_norm": 0.6171875, "learning_rate": 0.00023390776699029126, "loss": 0.7374, "step": 2744 }, { "epoch": 1.1086813797282966, "grad_norm": 0.65625, "learning_rate": 0.00023388349514563106, "loss": 0.6024, "step": 2745 }, { "epoch": 1.1090853997272865, "grad_norm": 0.63671875, "learning_rate": 0.00023385922330097084, "loss": 0.6616, "step": 2746 }, { "epoch": 1.1094894197262763, "grad_norm": 0.5546875, "learning_rate": 0.00023383495145631067, "loss": 0.5919, "step": 2747 }, { "epoch": 1.1098934397252664, "grad_norm": 0.5, "learning_rate": 0.00023381067961165047, "loss": 0.631, "step": 2748 }, { "epoch": 1.1102974597242563, "grad_norm": 0.45703125, "learning_rate": 0.00023378640776699025, "loss": 0.6313, "step": 2749 }, { "epoch": 1.1107014797232464, "grad_norm": 0.59765625, "learning_rate": 0.00023376213592233007, "loss": 0.6669, "step": 2750 }, { "epoch": 1.1111054997222363, "grad_norm": 0.5859375, "learning_rate": 0.00023373786407766988, "loss": 0.6212, "step": 2751 }, { "epoch": 1.1115095197212261, "grad_norm": 0.5078125, "learning_rate": 0.00023371359223300968, "loss": 0.5539, "step": 2752 }, { "epoch": 1.1119135397202162, "grad_norm": 0.4453125, "learning_rate": 0.0002336893203883495, "loss": 0.5884, "step": 2753 }, { "epoch": 1.112317559719206, "grad_norm": 2.078125, "learning_rate": 0.00023366504854368928, "loss": 0.6446, "step": 2754 }, { "epoch": 1.112721579718196, "grad_norm": 0.61328125, "learning_rate": 0.00023364077669902911, "loss": 0.6866, "step": 2755 }, { "epoch": 1.113125599717186, "grad_norm": 0.490234375, "learning_rate": 0.00023361650485436892, "loss": 0.588, "step": 2756 }, { "epoch": 1.113529619716176, "grad_norm": 0.61328125, "learning_rate": 0.00023359223300970872, "loss": 0.6638, "step": 2757 }, { "epoch": 1.113933639715166, "grad_norm": 0.73828125, "learning_rate": 0.00023356796116504852, "loss": 0.7065, "step": 2758 }, { "epoch": 1.1143376597141559, "grad_norm": 0.625, "learning_rate": 0.00023354368932038832, "loss": 0.7035, "step": 2759 }, { "epoch": 1.1147416797131457, "grad_norm": 0.5859375, "learning_rate": 0.00023351941747572813, "loss": 0.6817, "step": 2760 }, { "epoch": 1.1151456997121358, "grad_norm": 0.578125, "learning_rate": 0.00023349514563106795, "loss": 0.6492, "step": 2761 }, { "epoch": 1.1155497197111257, "grad_norm": 0.51953125, "learning_rate": 0.00023347087378640773, "loss": 0.6268, "step": 2762 }, { "epoch": 1.1159537397101156, "grad_norm": 0.51171875, "learning_rate": 0.00023344660194174756, "loss": 0.6292, "step": 2763 }, { "epoch": 1.1163577597091057, "grad_norm": 0.75, "learning_rate": 0.00023342233009708736, "loss": 0.6126, "step": 2764 }, { "epoch": 1.1167617797080955, "grad_norm": 0.65625, "learning_rate": 0.00023339805825242716, "loss": 0.7697, "step": 2765 }, { "epoch": 1.1171657997070854, "grad_norm": 0.5859375, "learning_rate": 0.000233373786407767, "loss": 0.6449, "step": 2766 }, { "epoch": 1.1175698197060755, "grad_norm": 0.50390625, "learning_rate": 0.00023334951456310677, "loss": 0.5851, "step": 2767 }, { "epoch": 1.1179738397050654, "grad_norm": 0.5703125, "learning_rate": 0.00023332524271844657, "loss": 0.6431, "step": 2768 }, { "epoch": 1.1183778597040555, "grad_norm": 0.54296875, "learning_rate": 0.0002333009708737864, "loss": 0.6306, "step": 2769 }, { "epoch": 1.1187818797030453, "grad_norm": 0.578125, "learning_rate": 0.0002332766990291262, "loss": 0.6649, "step": 2770 }, { "epoch": 1.1191858997020352, "grad_norm": 0.50390625, "learning_rate": 0.00023325242718446598, "loss": 0.645, "step": 2771 }, { "epoch": 1.1195899197010253, "grad_norm": 0.546875, "learning_rate": 0.0002332281553398058, "loss": 0.678, "step": 2772 }, { "epoch": 1.1199939397000152, "grad_norm": 0.49609375, "learning_rate": 0.0002332038834951456, "loss": 0.6105, "step": 2773 }, { "epoch": 1.120397959699005, "grad_norm": 0.51171875, "learning_rate": 0.00023317961165048544, "loss": 0.6134, "step": 2774 }, { "epoch": 1.120801979697995, "grad_norm": 0.5078125, "learning_rate": 0.00023315533980582521, "loss": 0.5235, "step": 2775 }, { "epoch": 1.121205999696985, "grad_norm": 0.62109375, "learning_rate": 0.00023313106796116502, "loss": 0.6946, "step": 2776 }, { "epoch": 1.1216100196959748, "grad_norm": 0.52734375, "learning_rate": 0.00023310679611650485, "loss": 0.5917, "step": 2777 }, { "epoch": 1.122014039694965, "grad_norm": 0.59765625, "learning_rate": 0.00023308252427184465, "loss": 0.6687, "step": 2778 }, { "epoch": 1.1224180596939548, "grad_norm": 0.6015625, "learning_rate": 0.00023305825242718442, "loss": 0.6973, "step": 2779 }, { "epoch": 1.122822079692945, "grad_norm": 0.462890625, "learning_rate": 0.00023303398058252425, "loss": 0.6144, "step": 2780 }, { "epoch": 1.1232260996919348, "grad_norm": 0.63671875, "learning_rate": 0.00023300970873786406, "loss": 0.649, "step": 2781 }, { "epoch": 1.1236301196909246, "grad_norm": 0.5, "learning_rate": 0.00023298543689320386, "loss": 0.6289, "step": 2782 }, { "epoch": 1.1240341396899147, "grad_norm": 0.5078125, "learning_rate": 0.0002329611650485437, "loss": 0.6036, "step": 2783 }, { "epoch": 1.1244381596889046, "grad_norm": 0.546875, "learning_rate": 0.00023293689320388346, "loss": 0.6513, "step": 2784 }, { "epoch": 1.1248421796878945, "grad_norm": 0.6015625, "learning_rate": 0.0002329126213592233, "loss": 0.7036, "step": 2785 }, { "epoch": 1.1252461996868846, "grad_norm": 0.50390625, "learning_rate": 0.0002328883495145631, "loss": 0.6231, "step": 2786 }, { "epoch": 1.1256502196858744, "grad_norm": 0.51953125, "learning_rate": 0.00023286407766990287, "loss": 0.5893, "step": 2787 }, { "epoch": 1.1260542396848643, "grad_norm": 0.5703125, "learning_rate": 0.0002328398058252427, "loss": 0.6353, "step": 2788 }, { "epoch": 1.1264582596838544, "grad_norm": 0.55078125, "learning_rate": 0.0002328155339805825, "loss": 0.5902, "step": 2789 }, { "epoch": 1.1268622796828442, "grad_norm": 0.419921875, "learning_rate": 0.0002327912621359223, "loss": 0.5546, "step": 2790 }, { "epoch": 1.1272662996818343, "grad_norm": 0.58203125, "learning_rate": 0.00023276699029126213, "loss": 0.622, "step": 2791 }, { "epoch": 1.1276703196808242, "grad_norm": 0.51171875, "learning_rate": 0.0002327427184466019, "loss": 0.6151, "step": 2792 }, { "epoch": 1.128074339679814, "grad_norm": 0.609375, "learning_rate": 0.00023271844660194174, "loss": 0.6075, "step": 2793 }, { "epoch": 1.1284783596788042, "grad_norm": 0.57421875, "learning_rate": 0.00023269417475728154, "loss": 0.6576, "step": 2794 }, { "epoch": 1.128882379677794, "grad_norm": 0.5234375, "learning_rate": 0.00023266990291262134, "loss": 0.6591, "step": 2795 }, { "epoch": 1.1292863996767841, "grad_norm": 0.53515625, "learning_rate": 0.00023264563106796115, "loss": 0.6269, "step": 2796 }, { "epoch": 1.129690419675774, "grad_norm": 0.55078125, "learning_rate": 0.00023262135922330095, "loss": 0.6385, "step": 2797 }, { "epoch": 1.1300944396747639, "grad_norm": 0.515625, "learning_rate": 0.00023259708737864075, "loss": 0.6228, "step": 2798 }, { "epoch": 1.130498459673754, "grad_norm": 0.60546875, "learning_rate": 0.00023257281553398058, "loss": 0.7343, "step": 2799 }, { "epoch": 1.1309024796727438, "grad_norm": 0.474609375, "learning_rate": 0.00023254854368932035, "loss": 0.6461, "step": 2800 }, { "epoch": 1.1313064996717337, "grad_norm": 0.44140625, "learning_rate": 0.00023252427184466016, "loss": 0.5399, "step": 2801 }, { "epoch": 1.1317105196707238, "grad_norm": 0.53515625, "learning_rate": 0.00023249999999999999, "loss": 0.715, "step": 2802 }, { "epoch": 1.1321145396697136, "grad_norm": 0.53515625, "learning_rate": 0.0002324757281553398, "loss": 0.5644, "step": 2803 }, { "epoch": 1.1325185596687035, "grad_norm": 0.58203125, "learning_rate": 0.00023245145631067962, "loss": 0.6418, "step": 2804 }, { "epoch": 1.1329225796676936, "grad_norm": 0.58203125, "learning_rate": 0.0002324271844660194, "loss": 0.6915, "step": 2805 }, { "epoch": 1.1333265996666835, "grad_norm": 0.5234375, "learning_rate": 0.0002324029126213592, "loss": 0.5982, "step": 2806 }, { "epoch": 1.1337306196656733, "grad_norm": 0.5390625, "learning_rate": 0.00023237864077669903, "loss": 0.654, "step": 2807 }, { "epoch": 1.1341346396646634, "grad_norm": 0.470703125, "learning_rate": 0.00023235436893203883, "loss": 0.5894, "step": 2808 }, { "epoch": 1.1345386596636533, "grad_norm": 0.5859375, "learning_rate": 0.0002323300970873786, "loss": 0.6119, "step": 2809 }, { "epoch": 1.1349426796626434, "grad_norm": 0.765625, "learning_rate": 0.00023230582524271843, "loss": 0.566, "step": 2810 }, { "epoch": 1.1353466996616333, "grad_norm": 0.5859375, "learning_rate": 0.00023228155339805823, "loss": 0.6108, "step": 2811 }, { "epoch": 1.1357507196606231, "grad_norm": 0.50390625, "learning_rate": 0.000232257281553398, "loss": 0.635, "step": 2812 }, { "epoch": 1.1361547396596132, "grad_norm": 0.5625, "learning_rate": 0.00023223300970873784, "loss": 0.6665, "step": 2813 }, { "epoch": 1.136558759658603, "grad_norm": 0.466796875, "learning_rate": 0.00023220873786407764, "loss": 0.5492, "step": 2814 }, { "epoch": 1.136962779657593, "grad_norm": 0.6484375, "learning_rate": 0.00023218446601941747, "loss": 0.645, "step": 2815 }, { "epoch": 1.137366799656583, "grad_norm": 0.458984375, "learning_rate": 0.00023216019417475727, "loss": 0.5562, "step": 2816 }, { "epoch": 1.137770819655573, "grad_norm": 0.546875, "learning_rate": 0.00023213592233009705, "loss": 0.6224, "step": 2817 }, { "epoch": 1.138174839654563, "grad_norm": 0.6015625, "learning_rate": 0.00023211165048543688, "loss": 0.6852, "step": 2818 }, { "epoch": 1.1385788596535529, "grad_norm": 0.55859375, "learning_rate": 0.00023208737864077668, "loss": 0.6595, "step": 2819 }, { "epoch": 1.1389828796525427, "grad_norm": 0.5859375, "learning_rate": 0.00023206310679611648, "loss": 0.6021, "step": 2820 }, { "epoch": 1.1393868996515328, "grad_norm": 0.482421875, "learning_rate": 0.00023203883495145629, "loss": 0.5979, "step": 2821 }, { "epoch": 1.1397909196505227, "grad_norm": 0.56640625, "learning_rate": 0.0002320145631067961, "loss": 0.6003, "step": 2822 }, { "epoch": 1.1401949396495126, "grad_norm": 0.61328125, "learning_rate": 0.0002319902912621359, "loss": 0.7235, "step": 2823 }, { "epoch": 1.1405989596485027, "grad_norm": 0.53125, "learning_rate": 0.00023196601941747572, "loss": 0.6038, "step": 2824 }, { "epoch": 1.1410029796474925, "grad_norm": 0.54296875, "learning_rate": 0.0002319417475728155, "loss": 0.6534, "step": 2825 }, { "epoch": 1.1414069996464824, "grad_norm": 0.5546875, "learning_rate": 0.00023191747572815532, "loss": 0.6298, "step": 2826 }, { "epoch": 1.1418110196454725, "grad_norm": 0.484375, "learning_rate": 0.00023189320388349513, "loss": 0.5727, "step": 2827 }, { "epoch": 1.1422150396444624, "grad_norm": 0.484375, "learning_rate": 0.00023186893203883493, "loss": 0.5984, "step": 2828 }, { "epoch": 1.1426190596434522, "grad_norm": 0.55078125, "learning_rate": 0.00023184466019417476, "loss": 0.6497, "step": 2829 }, { "epoch": 1.1430230796424423, "grad_norm": 0.703125, "learning_rate": 0.00023182038834951453, "loss": 0.6806, "step": 2830 }, { "epoch": 1.1434270996414322, "grad_norm": 0.578125, "learning_rate": 0.00023179611650485434, "loss": 0.6242, "step": 2831 }, { "epoch": 1.1438311196404223, "grad_norm": 0.578125, "learning_rate": 0.00023177184466019417, "loss": 0.6558, "step": 2832 }, { "epoch": 1.1442351396394121, "grad_norm": 0.53125, "learning_rate": 0.00023174757281553397, "loss": 0.66, "step": 2833 }, { "epoch": 1.144639159638402, "grad_norm": 0.53515625, "learning_rate": 0.00023172330097087377, "loss": 0.64, "step": 2834 }, { "epoch": 1.145043179637392, "grad_norm": 0.546875, "learning_rate": 0.00023169902912621357, "loss": 0.6199, "step": 2835 }, { "epoch": 1.145447199636382, "grad_norm": 0.5703125, "learning_rate": 0.00023167475728155337, "loss": 0.6064, "step": 2836 }, { "epoch": 1.145851219635372, "grad_norm": 0.482421875, "learning_rate": 0.0002316504854368932, "loss": 0.6279, "step": 2837 }, { "epoch": 1.146255239634362, "grad_norm": 0.50390625, "learning_rate": 0.00023162621359223298, "loss": 0.5549, "step": 2838 }, { "epoch": 1.1466592596333518, "grad_norm": 0.6640625, "learning_rate": 0.00023160194174757278, "loss": 0.6374, "step": 2839 }, { "epoch": 1.147063279632342, "grad_norm": 0.49609375, "learning_rate": 0.0002315776699029126, "loss": 0.6862, "step": 2840 }, { "epoch": 1.1474672996313318, "grad_norm": 0.6015625, "learning_rate": 0.0002315533980582524, "loss": 0.7036, "step": 2841 }, { "epoch": 1.1478713196303216, "grad_norm": 0.5703125, "learning_rate": 0.0002315291262135922, "loss": 0.6651, "step": 2842 }, { "epoch": 1.1482753396293117, "grad_norm": 0.5390625, "learning_rate": 0.00023150485436893202, "loss": 0.5972, "step": 2843 }, { "epoch": 1.1486793596283016, "grad_norm": 0.6171875, "learning_rate": 0.00023148058252427182, "loss": 0.6264, "step": 2844 }, { "epoch": 1.1490833796272915, "grad_norm": 0.546875, "learning_rate": 0.00023145631067961165, "loss": 0.5288, "step": 2845 }, { "epoch": 1.1494873996262815, "grad_norm": 0.578125, "learning_rate": 0.00023143203883495145, "loss": 0.6815, "step": 2846 }, { "epoch": 1.1498914196252714, "grad_norm": 0.51171875, "learning_rate": 0.00023140776699029123, "loss": 0.6442, "step": 2847 }, { "epoch": 1.1502954396242613, "grad_norm": 0.73828125, "learning_rate": 0.00023138349514563106, "loss": 0.7739, "step": 2848 }, { "epoch": 1.1506994596232514, "grad_norm": 0.625, "learning_rate": 0.00023135922330097086, "loss": 0.7192, "step": 2849 }, { "epoch": 1.1511034796222412, "grad_norm": 0.54296875, "learning_rate": 0.00023133495145631063, "loss": 0.6611, "step": 2850 }, { "epoch": 1.1515074996212313, "grad_norm": 0.55078125, "learning_rate": 0.00023131067961165046, "loss": 0.67, "step": 2851 }, { "epoch": 1.1519115196202212, "grad_norm": 0.5546875, "learning_rate": 0.00023128640776699027, "loss": 0.6631, "step": 2852 }, { "epoch": 1.152315539619211, "grad_norm": 0.6640625, "learning_rate": 0.00023126213592233007, "loss": 0.6729, "step": 2853 }, { "epoch": 1.1527195596182012, "grad_norm": 0.5703125, "learning_rate": 0.0002312378640776699, "loss": 0.5821, "step": 2854 }, { "epoch": 1.153123579617191, "grad_norm": 0.55859375, "learning_rate": 0.00023121359223300967, "loss": 0.6267, "step": 2855 }, { "epoch": 1.1535275996161811, "grad_norm": 0.515625, "learning_rate": 0.0002311893203883495, "loss": 0.5908, "step": 2856 }, { "epoch": 1.153931619615171, "grad_norm": 0.58984375, "learning_rate": 0.0002311650485436893, "loss": 0.7274, "step": 2857 }, { "epoch": 1.1543356396141609, "grad_norm": 0.5703125, "learning_rate": 0.0002311407766990291, "loss": 0.6444, "step": 2858 }, { "epoch": 1.154739659613151, "grad_norm": 0.55078125, "learning_rate": 0.0002311165048543689, "loss": 0.6522, "step": 2859 }, { "epoch": 1.1551436796121408, "grad_norm": 0.48828125, "learning_rate": 0.0002310922330097087, "loss": 0.6456, "step": 2860 }, { "epoch": 1.1555476996111307, "grad_norm": 0.8046875, "learning_rate": 0.00023106796116504851, "loss": 0.7205, "step": 2861 }, { "epoch": 1.1559517196101208, "grad_norm": 0.5703125, "learning_rate": 0.00023104368932038834, "loss": 0.639, "step": 2862 }, { "epoch": 1.1563557396091106, "grad_norm": 0.5234375, "learning_rate": 0.00023101941747572812, "loss": 0.582, "step": 2863 }, { "epoch": 1.1567597596081005, "grad_norm": 0.48046875, "learning_rate": 0.00023099514563106795, "loss": 0.5623, "step": 2864 }, { "epoch": 1.1571637796070906, "grad_norm": 0.55078125, "learning_rate": 0.00023097087378640775, "loss": 0.6209, "step": 2865 }, { "epoch": 1.1575677996060805, "grad_norm": 0.5390625, "learning_rate": 0.00023094660194174755, "loss": 0.6129, "step": 2866 }, { "epoch": 1.1579718196050703, "grad_norm": 0.5390625, "learning_rate": 0.00023092233009708738, "loss": 0.6451, "step": 2867 }, { "epoch": 1.1583758396040604, "grad_norm": 0.5625, "learning_rate": 0.00023089805825242716, "loss": 0.6155, "step": 2868 }, { "epoch": 1.1587798596030503, "grad_norm": 0.56640625, "learning_rate": 0.00023087378640776696, "loss": 0.6128, "step": 2869 }, { "epoch": 1.1591838796020404, "grad_norm": 0.640625, "learning_rate": 0.0002308495145631068, "loss": 0.7287, "step": 2870 }, { "epoch": 1.1595878996010303, "grad_norm": 0.474609375, "learning_rate": 0.0002308252427184466, "loss": 0.6099, "step": 2871 }, { "epoch": 1.1599919196000201, "grad_norm": 0.494140625, "learning_rate": 0.00023080097087378637, "loss": 0.5753, "step": 2872 }, { "epoch": 1.1603959395990102, "grad_norm": 0.52734375, "learning_rate": 0.0002307766990291262, "loss": 0.6352, "step": 2873 }, { "epoch": 1.160799959598, "grad_norm": 0.6015625, "learning_rate": 0.000230752427184466, "loss": 0.6263, "step": 2874 }, { "epoch": 1.16120397959699, "grad_norm": 0.59765625, "learning_rate": 0.00023072815533980583, "loss": 0.6274, "step": 2875 }, { "epoch": 1.16160799959598, "grad_norm": 0.6328125, "learning_rate": 0.0002307038834951456, "loss": 0.6381, "step": 2876 }, { "epoch": 1.16201201959497, "grad_norm": 0.48828125, "learning_rate": 0.0002306796116504854, "loss": 0.621, "step": 2877 }, { "epoch": 1.16241603959396, "grad_norm": 0.609375, "learning_rate": 0.00023065533980582524, "loss": 0.7122, "step": 2878 }, { "epoch": 1.1628200595929499, "grad_norm": 0.6796875, "learning_rate": 0.00023063106796116504, "loss": 0.6792, "step": 2879 }, { "epoch": 1.1632240795919397, "grad_norm": 0.494140625, "learning_rate": 0.0002306067961165048, "loss": 0.596, "step": 2880 }, { "epoch": 1.1636280995909298, "grad_norm": 0.62109375, "learning_rate": 0.00023058252427184464, "loss": 0.6686, "step": 2881 }, { "epoch": 1.1640321195899197, "grad_norm": 0.5390625, "learning_rate": 0.00023055825242718444, "loss": 0.6046, "step": 2882 }, { "epoch": 1.1644361395889096, "grad_norm": 0.453125, "learning_rate": 0.00023053398058252425, "loss": 0.5581, "step": 2883 }, { "epoch": 1.1648401595878997, "grad_norm": 0.490234375, "learning_rate": 0.00023050970873786405, "loss": 0.6124, "step": 2884 }, { "epoch": 1.1652441795868895, "grad_norm": 0.64453125, "learning_rate": 0.00023048543689320385, "loss": 0.6749, "step": 2885 }, { "epoch": 1.1656481995858794, "grad_norm": 0.56640625, "learning_rate": 0.00023046116504854368, "loss": 0.6051, "step": 2886 }, { "epoch": 1.1660522195848695, "grad_norm": 0.6875, "learning_rate": 0.00023043689320388348, "loss": 0.6973, "step": 2887 }, { "epoch": 1.1664562395838594, "grad_norm": 0.6015625, "learning_rate": 0.00023041262135922326, "loss": 0.6339, "step": 2888 }, { "epoch": 1.1668602595828494, "grad_norm": 0.46875, "learning_rate": 0.0002303883495145631, "loss": 0.5784, "step": 2889 }, { "epoch": 1.1672642795818393, "grad_norm": 0.4453125, "learning_rate": 0.0002303640776699029, "loss": 0.5779, "step": 2890 }, { "epoch": 1.1676682995808292, "grad_norm": 0.578125, "learning_rate": 0.0002303398058252427, "loss": 0.6698, "step": 2891 }, { "epoch": 1.1680723195798193, "grad_norm": 0.5703125, "learning_rate": 0.00023031553398058252, "loss": 0.6285, "step": 2892 }, { "epoch": 1.1684763395788091, "grad_norm": 0.53515625, "learning_rate": 0.0002302912621359223, "loss": 0.6145, "step": 2893 }, { "epoch": 1.168880359577799, "grad_norm": 0.439453125, "learning_rate": 0.00023026699029126213, "loss": 0.5725, "step": 2894 }, { "epoch": 1.169284379576789, "grad_norm": 0.5859375, "learning_rate": 0.00023024271844660193, "loss": 0.6744, "step": 2895 }, { "epoch": 1.169688399575779, "grad_norm": 0.51953125, "learning_rate": 0.00023021844660194173, "loss": 0.6562, "step": 2896 }, { "epoch": 1.170092419574769, "grad_norm": 0.7890625, "learning_rate": 0.00023019417475728153, "loss": 0.7428, "step": 2897 }, { "epoch": 1.170496439573759, "grad_norm": 0.52734375, "learning_rate": 0.00023016990291262134, "loss": 0.6207, "step": 2898 }, { "epoch": 1.1709004595727488, "grad_norm": 0.578125, "learning_rate": 0.00023014563106796114, "loss": 0.6556, "step": 2899 }, { "epoch": 1.1713044795717389, "grad_norm": 0.4765625, "learning_rate": 0.00023012135922330097, "loss": 0.5926, "step": 2900 }, { "epoch": 1.1717084995707288, "grad_norm": 0.5078125, "learning_rate": 0.00023009708737864074, "loss": 0.5746, "step": 2901 }, { "epoch": 1.1721125195697186, "grad_norm": 0.5859375, "learning_rate": 0.00023007281553398055, "loss": 0.5622, "step": 2902 }, { "epoch": 1.1725165395687087, "grad_norm": 0.48046875, "learning_rate": 0.00023004854368932038, "loss": 0.6172, "step": 2903 }, { "epoch": 1.1729205595676986, "grad_norm": 0.52734375, "learning_rate": 0.00023002427184466018, "loss": 0.6778, "step": 2904 }, { "epoch": 1.1733245795666885, "grad_norm": 0.439453125, "learning_rate": 0.00023, "loss": 0.563, "step": 2905 }, { "epoch": 1.1737285995656785, "grad_norm": 0.484375, "learning_rate": 0.00022997572815533978, "loss": 0.6677, "step": 2906 }, { "epoch": 1.1741326195646684, "grad_norm": 0.515625, "learning_rate": 0.00022995145631067958, "loss": 0.6481, "step": 2907 }, { "epoch": 1.1745366395636583, "grad_norm": 0.53125, "learning_rate": 0.00022992718446601941, "loss": 0.6394, "step": 2908 }, { "epoch": 1.1749406595626484, "grad_norm": 0.498046875, "learning_rate": 0.00022990291262135922, "loss": 0.659, "step": 2909 }, { "epoch": 1.1753446795616382, "grad_norm": 0.56640625, "learning_rate": 0.000229878640776699, "loss": 0.6071, "step": 2910 }, { "epoch": 1.1757486995606283, "grad_norm": 0.53125, "learning_rate": 0.00022985436893203882, "loss": 0.6982, "step": 2911 }, { "epoch": 1.1761527195596182, "grad_norm": 0.49609375, "learning_rate": 0.00022983009708737862, "loss": 0.6374, "step": 2912 }, { "epoch": 1.176556739558608, "grad_norm": 0.51953125, "learning_rate": 0.0002298058252427184, "loss": 0.6705, "step": 2913 }, { "epoch": 1.1769607595575982, "grad_norm": 0.5546875, "learning_rate": 0.00022978155339805823, "loss": 0.6543, "step": 2914 }, { "epoch": 1.177364779556588, "grad_norm": 0.60546875, "learning_rate": 0.00022975728155339803, "loss": 0.6798, "step": 2915 }, { "epoch": 1.1777687995555781, "grad_norm": 0.54296875, "learning_rate": 0.00022973300970873786, "loss": 0.6604, "step": 2916 }, { "epoch": 1.178172819554568, "grad_norm": 0.5703125, "learning_rate": 0.00022970873786407766, "loss": 0.6012, "step": 2917 }, { "epoch": 1.1785768395535579, "grad_norm": 0.640625, "learning_rate": 0.00022968446601941744, "loss": 0.6577, "step": 2918 }, { "epoch": 1.178980859552548, "grad_norm": 0.6875, "learning_rate": 0.00022966019417475727, "loss": 0.7368, "step": 2919 }, { "epoch": 1.1793848795515378, "grad_norm": 0.515625, "learning_rate": 0.00022963592233009707, "loss": 0.6272, "step": 2920 }, { "epoch": 1.1797888995505277, "grad_norm": 0.470703125, "learning_rate": 0.00022961165048543687, "loss": 0.5667, "step": 2921 }, { "epoch": 1.1801929195495178, "grad_norm": 0.56640625, "learning_rate": 0.00022958737864077667, "loss": 0.6569, "step": 2922 }, { "epoch": 1.1805969395485076, "grad_norm": 0.51953125, "learning_rate": 0.00022956310679611648, "loss": 0.6392, "step": 2923 }, { "epoch": 1.1810009595474975, "grad_norm": 0.46875, "learning_rate": 0.00022953883495145628, "loss": 0.6147, "step": 2924 }, { "epoch": 1.1814049795464876, "grad_norm": 0.435546875, "learning_rate": 0.0002295145631067961, "loss": 0.6043, "step": 2925 }, { "epoch": 1.1818089995454775, "grad_norm": 0.59765625, "learning_rate": 0.00022949029126213588, "loss": 0.6775, "step": 2926 }, { "epoch": 1.1822130195444673, "grad_norm": 0.58984375, "learning_rate": 0.0002294660194174757, "loss": 0.6235, "step": 2927 }, { "epoch": 1.1826170395434574, "grad_norm": 0.59375, "learning_rate": 0.00022944174757281551, "loss": 0.5505, "step": 2928 }, { "epoch": 1.1830210595424473, "grad_norm": 0.546875, "learning_rate": 0.00022941747572815532, "loss": 0.6077, "step": 2929 }, { "epoch": 1.1834250795414374, "grad_norm": 0.53125, "learning_rate": 0.00022939320388349515, "loss": 0.6393, "step": 2930 }, { "epoch": 1.1838290995404273, "grad_norm": 0.54296875, "learning_rate": 0.00022936893203883492, "loss": 0.6851, "step": 2931 }, { "epoch": 1.1842331195394171, "grad_norm": 0.52734375, "learning_rate": 0.00022934466019417472, "loss": 0.6048, "step": 2932 }, { "epoch": 1.1846371395384072, "grad_norm": 0.52734375, "learning_rate": 0.00022932038834951455, "loss": 0.559, "step": 2933 }, { "epoch": 1.185041159537397, "grad_norm": 0.53125, "learning_rate": 0.00022929611650485436, "loss": 0.6645, "step": 2934 }, { "epoch": 1.1854451795363872, "grad_norm": 0.64453125, "learning_rate": 0.00022927184466019416, "loss": 0.7475, "step": 2935 }, { "epoch": 1.185849199535377, "grad_norm": 0.55078125, "learning_rate": 0.00022924757281553396, "loss": 0.6278, "step": 2936 }, { "epoch": 1.186253219534367, "grad_norm": 0.546875, "learning_rate": 0.00022922330097087376, "loss": 0.6764, "step": 2937 }, { "epoch": 1.186657239533357, "grad_norm": 0.51953125, "learning_rate": 0.0002291990291262136, "loss": 0.6947, "step": 2938 }, { "epoch": 1.1870612595323469, "grad_norm": 0.50390625, "learning_rate": 0.00022917475728155337, "loss": 0.7014, "step": 2939 }, { "epoch": 1.1874652795313367, "grad_norm": 0.5234375, "learning_rate": 0.00022915048543689317, "loss": 0.6263, "step": 2940 }, { "epoch": 1.1878692995303268, "grad_norm": 0.5390625, "learning_rate": 0.000229126213592233, "loss": 0.6674, "step": 2941 }, { "epoch": 1.1882733195293167, "grad_norm": 0.609375, "learning_rate": 0.0002291019417475728, "loss": 0.7065, "step": 2942 }, { "epoch": 1.1886773395283066, "grad_norm": 0.51953125, "learning_rate": 0.00022907766990291258, "loss": 0.6183, "step": 2943 }, { "epoch": 1.1890813595272967, "grad_norm": 0.54296875, "learning_rate": 0.0002290533980582524, "loss": 0.5799, "step": 2944 }, { "epoch": 1.1894853795262865, "grad_norm": 0.48046875, "learning_rate": 0.0002290291262135922, "loss": 0.6661, "step": 2945 }, { "epoch": 1.1898893995252764, "grad_norm": 0.5, "learning_rate": 0.00022900485436893204, "loss": 0.5795, "step": 2946 }, { "epoch": 1.1902934195242665, "grad_norm": 0.5625, "learning_rate": 0.00022898058252427184, "loss": 0.6358, "step": 2947 }, { "epoch": 1.1906974395232564, "grad_norm": 0.49609375, "learning_rate": 0.00022895631067961162, "loss": 0.6328, "step": 2948 }, { "epoch": 1.1911014595222464, "grad_norm": 0.5703125, "learning_rate": 0.00022893203883495145, "loss": 0.6236, "step": 2949 }, { "epoch": 1.1915054795212363, "grad_norm": 0.546875, "learning_rate": 0.00022890776699029125, "loss": 0.668, "step": 2950 }, { "epoch": 1.1919094995202262, "grad_norm": 0.53515625, "learning_rate": 0.00022888349514563102, "loss": 0.619, "step": 2951 }, { "epoch": 1.1923135195192163, "grad_norm": 0.50390625, "learning_rate": 0.00022885922330097085, "loss": 0.6701, "step": 2952 }, { "epoch": 1.1927175395182061, "grad_norm": 0.52734375, "learning_rate": 0.00022883495145631065, "loss": 0.6196, "step": 2953 }, { "epoch": 1.193121559517196, "grad_norm": 0.625, "learning_rate": 0.00022881067961165046, "loss": 0.6951, "step": 2954 }, { "epoch": 1.193525579516186, "grad_norm": 0.486328125, "learning_rate": 0.0002287864077669903, "loss": 0.6031, "step": 2955 }, { "epoch": 1.193929599515176, "grad_norm": 0.55078125, "learning_rate": 0.00022876213592233006, "loss": 0.6037, "step": 2956 }, { "epoch": 1.194333619514166, "grad_norm": 0.4921875, "learning_rate": 0.0002287378640776699, "loss": 0.6188, "step": 2957 }, { "epoch": 1.194737639513156, "grad_norm": 0.5078125, "learning_rate": 0.0002287135922330097, "loss": 0.6582, "step": 2958 }, { "epoch": 1.1951416595121458, "grad_norm": 0.5234375, "learning_rate": 0.0002286893203883495, "loss": 0.6337, "step": 2959 }, { "epoch": 1.1955456795111359, "grad_norm": 0.67578125, "learning_rate": 0.0002286650485436893, "loss": 0.625, "step": 2960 }, { "epoch": 1.1959496995101258, "grad_norm": 0.48828125, "learning_rate": 0.0002286407766990291, "loss": 0.6396, "step": 2961 }, { "epoch": 1.1963537195091156, "grad_norm": 0.7421875, "learning_rate": 0.0002286165048543689, "loss": 0.7611, "step": 2962 }, { "epoch": 1.1967577395081057, "grad_norm": 0.482421875, "learning_rate": 0.00022859223300970873, "loss": 0.5095, "step": 2963 }, { "epoch": 1.1971617595070956, "grad_norm": 0.59375, "learning_rate": 0.0002285679611650485, "loss": 0.6981, "step": 2964 }, { "epoch": 1.1975657795060854, "grad_norm": 0.58203125, "learning_rate": 0.00022854368932038834, "loss": 0.6595, "step": 2965 }, { "epoch": 1.1979697995050755, "grad_norm": 0.55078125, "learning_rate": 0.00022851941747572814, "loss": 0.623, "step": 2966 }, { "epoch": 1.1983738195040654, "grad_norm": 0.6015625, "learning_rate": 0.00022849514563106794, "loss": 0.6796, "step": 2967 }, { "epoch": 1.1987778395030555, "grad_norm": 0.474609375, "learning_rate": 0.00022847087378640777, "loss": 0.6776, "step": 2968 }, { "epoch": 1.1991818595020454, "grad_norm": 0.546875, "learning_rate": 0.00022844660194174755, "loss": 0.5996, "step": 2969 }, { "epoch": 1.1995858795010352, "grad_norm": 0.52734375, "learning_rate": 0.00022842233009708735, "loss": 0.5918, "step": 2970 }, { "epoch": 1.1999898995000253, "grad_norm": 0.59765625, "learning_rate": 0.00022839805825242718, "loss": 0.6616, "step": 2971 }, { "epoch": 1.2003939194990152, "grad_norm": 0.53125, "learning_rate": 0.00022837378640776698, "loss": 0.5747, "step": 2972 }, { "epoch": 1.200797939498005, "grad_norm": 0.59765625, "learning_rate": 0.00022834951456310676, "loss": 0.697, "step": 2973 }, { "epoch": 1.2012019594969952, "grad_norm": 0.61328125, "learning_rate": 0.00022832524271844659, "loss": 0.6607, "step": 2974 }, { "epoch": 1.201605979495985, "grad_norm": 0.515625, "learning_rate": 0.0002283009708737864, "loss": 0.6211, "step": 2975 }, { "epoch": 1.2020099994949751, "grad_norm": 0.50390625, "learning_rate": 0.00022827669902912622, "loss": 0.5532, "step": 2976 }, { "epoch": 1.202414019493965, "grad_norm": 0.53515625, "learning_rate": 0.000228252427184466, "loss": 0.6515, "step": 2977 }, { "epoch": 1.2028180394929548, "grad_norm": 0.51171875, "learning_rate": 0.0002282281553398058, "loss": 0.6655, "step": 2978 }, { "epoch": 1.203222059491945, "grad_norm": 0.6484375, "learning_rate": 0.00022820388349514562, "loss": 0.7328, "step": 2979 }, { "epoch": 1.2036260794909348, "grad_norm": 0.44140625, "learning_rate": 0.00022817961165048543, "loss": 0.5316, "step": 2980 }, { "epoch": 1.2040300994899247, "grad_norm": 0.5390625, "learning_rate": 0.0002281553398058252, "loss": 0.5576, "step": 2981 }, { "epoch": 1.2044341194889148, "grad_norm": 0.48046875, "learning_rate": 0.00022813106796116503, "loss": 0.6336, "step": 2982 }, { "epoch": 1.2048381394879046, "grad_norm": 0.640625, "learning_rate": 0.00022810679611650483, "loss": 0.6814, "step": 2983 }, { "epoch": 1.2052421594868945, "grad_norm": 0.56640625, "learning_rate": 0.00022808252427184464, "loss": 0.5864, "step": 2984 }, { "epoch": 1.2056461794858846, "grad_norm": 0.5625, "learning_rate": 0.00022805825242718444, "loss": 0.5819, "step": 2985 }, { "epoch": 1.2060501994848745, "grad_norm": 0.546875, "learning_rate": 0.00022803398058252424, "loss": 0.5951, "step": 2986 }, { "epoch": 1.2064542194838643, "grad_norm": 0.54296875, "learning_rate": 0.00022800970873786407, "loss": 0.6458, "step": 2987 }, { "epoch": 1.2068582394828544, "grad_norm": 0.515625, "learning_rate": 0.00022798543689320387, "loss": 0.611, "step": 2988 }, { "epoch": 1.2072622594818443, "grad_norm": 0.5546875, "learning_rate": 0.00022796116504854365, "loss": 0.6777, "step": 2989 }, { "epoch": 1.2076662794808344, "grad_norm": 0.498046875, "learning_rate": 0.00022793689320388348, "loss": 0.5651, "step": 2990 }, { "epoch": 1.2080702994798242, "grad_norm": 0.5234375, "learning_rate": 0.00022791262135922328, "loss": 0.711, "step": 2991 }, { "epoch": 1.2084743194788141, "grad_norm": 0.51171875, "learning_rate": 0.00022788834951456308, "loss": 0.5651, "step": 2992 }, { "epoch": 1.2088783394778042, "grad_norm": 0.53515625, "learning_rate": 0.0002278640776699029, "loss": 0.6674, "step": 2993 }, { "epoch": 1.209282359476794, "grad_norm": 0.498046875, "learning_rate": 0.00022783980582524269, "loss": 0.6407, "step": 2994 }, { "epoch": 1.2096863794757842, "grad_norm": 0.453125, "learning_rate": 0.00022781553398058252, "loss": 0.6424, "step": 2995 }, { "epoch": 1.210090399474774, "grad_norm": 0.455078125, "learning_rate": 0.00022779126213592232, "loss": 0.5, "step": 2996 }, { "epoch": 1.210494419473764, "grad_norm": 0.455078125, "learning_rate": 0.00022776699029126212, "loss": 0.5823, "step": 2997 }, { "epoch": 1.210898439472754, "grad_norm": 0.4453125, "learning_rate": 0.00022774271844660192, "loss": 0.579, "step": 2998 }, { "epoch": 1.2113024594717439, "grad_norm": 0.59765625, "learning_rate": 0.00022771844660194173, "loss": 0.6524, "step": 2999 }, { "epoch": 1.2117064794707337, "grad_norm": 0.478515625, "learning_rate": 0.00022769417475728153, "loss": 0.6072, "step": 3000 }, { "epoch": 1.2121104994697238, "grad_norm": 0.52734375, "learning_rate": 0.00022766990291262136, "loss": 0.6015, "step": 3001 }, { "epoch": 1.2125145194687137, "grad_norm": 0.4609375, "learning_rate": 0.00022764563106796113, "loss": 0.5741, "step": 3002 }, { "epoch": 1.2129185394677036, "grad_norm": 0.5625, "learning_rate": 0.00022762135922330093, "loss": 0.6582, "step": 3003 }, { "epoch": 1.2133225594666937, "grad_norm": 0.4375, "learning_rate": 0.00022759708737864076, "loss": 0.6581, "step": 3004 }, { "epoch": 1.2137265794656835, "grad_norm": 0.5859375, "learning_rate": 0.00022757281553398057, "loss": 0.6706, "step": 3005 }, { "epoch": 1.2141305994646734, "grad_norm": 0.53515625, "learning_rate": 0.0002275485436893204, "loss": 0.621, "step": 3006 }, { "epoch": 1.2145346194636635, "grad_norm": 0.515625, "learning_rate": 0.00022752427184466017, "loss": 0.5977, "step": 3007 }, { "epoch": 1.2149386394626533, "grad_norm": 0.474609375, "learning_rate": 0.00022749999999999997, "loss": 0.5926, "step": 3008 }, { "epoch": 1.2153426594616434, "grad_norm": 0.546875, "learning_rate": 0.0002274757281553398, "loss": 0.5955, "step": 3009 }, { "epoch": 1.2157466794606333, "grad_norm": 0.5390625, "learning_rate": 0.0002274514563106796, "loss": 0.6293, "step": 3010 }, { "epoch": 1.2161506994596232, "grad_norm": 0.578125, "learning_rate": 0.00022742718446601938, "loss": 0.6916, "step": 3011 }, { "epoch": 1.2165547194586133, "grad_norm": 0.66015625, "learning_rate": 0.0002274029126213592, "loss": 0.6747, "step": 3012 }, { "epoch": 1.2169587394576031, "grad_norm": 0.4921875, "learning_rate": 0.000227378640776699, "loss": 0.6004, "step": 3013 }, { "epoch": 1.2173627594565932, "grad_norm": 0.5078125, "learning_rate": 0.0002273543689320388, "loss": 0.5784, "step": 3014 }, { "epoch": 1.217766779455583, "grad_norm": 0.58203125, "learning_rate": 0.00022733009708737862, "loss": 0.6585, "step": 3015 }, { "epoch": 1.218170799454573, "grad_norm": 0.5, "learning_rate": 0.00022730582524271842, "loss": 0.6723, "step": 3016 }, { "epoch": 1.218574819453563, "grad_norm": 0.51171875, "learning_rate": 0.00022728155339805825, "loss": 0.6445, "step": 3017 }, { "epoch": 1.218978839452553, "grad_norm": 0.451171875, "learning_rate": 0.00022725728155339805, "loss": 0.5757, "step": 3018 }, { "epoch": 1.2193828594515428, "grad_norm": 0.65234375, "learning_rate": 0.00022723300970873783, "loss": 0.6881, "step": 3019 }, { "epoch": 1.2197868794505329, "grad_norm": 0.68359375, "learning_rate": 0.00022720873786407766, "loss": 0.6793, "step": 3020 }, { "epoch": 1.2201908994495227, "grad_norm": 0.546875, "learning_rate": 0.00022718446601941746, "loss": 0.6039, "step": 3021 }, { "epoch": 1.2205949194485126, "grad_norm": 0.54296875, "learning_rate": 0.00022716019417475726, "loss": 0.6879, "step": 3022 }, { "epoch": 1.2209989394475027, "grad_norm": 0.453125, "learning_rate": 0.00022713592233009706, "loss": 0.5525, "step": 3023 }, { "epoch": 1.2214029594464926, "grad_norm": 0.494140625, "learning_rate": 0.00022711165048543686, "loss": 0.6323, "step": 3024 }, { "epoch": 1.2218069794454824, "grad_norm": 0.5390625, "learning_rate": 0.00022708737864077667, "loss": 0.6766, "step": 3025 }, { "epoch": 1.2222109994444725, "grad_norm": 0.8515625, "learning_rate": 0.0002270631067961165, "loss": 0.6601, "step": 3026 }, { "epoch": 1.2226150194434624, "grad_norm": 0.58984375, "learning_rate": 0.00022703883495145627, "loss": 0.6738, "step": 3027 }, { "epoch": 1.2230190394424525, "grad_norm": 0.57421875, "learning_rate": 0.0002270145631067961, "loss": 0.6353, "step": 3028 }, { "epoch": 1.2234230594414424, "grad_norm": 0.515625, "learning_rate": 0.0002269902912621359, "loss": 0.5983, "step": 3029 }, { "epoch": 1.2238270794404322, "grad_norm": 0.7265625, "learning_rate": 0.0002269660194174757, "loss": 0.635, "step": 3030 }, { "epoch": 1.2242310994394223, "grad_norm": 0.61328125, "learning_rate": 0.00022694174757281554, "loss": 0.6961, "step": 3031 }, { "epoch": 1.2246351194384122, "grad_norm": 0.62890625, "learning_rate": 0.0002269174757281553, "loss": 0.6178, "step": 3032 }, { "epoch": 1.225039139437402, "grad_norm": 0.5546875, "learning_rate": 0.0002268932038834951, "loss": 0.6007, "step": 3033 }, { "epoch": 1.2254431594363921, "grad_norm": 0.55078125, "learning_rate": 0.00022686893203883494, "loss": 0.5725, "step": 3034 }, { "epoch": 1.225847179435382, "grad_norm": 0.50390625, "learning_rate": 0.00022684466019417474, "loss": 0.63, "step": 3035 }, { "epoch": 1.226251199434372, "grad_norm": 0.46875, "learning_rate": 0.00022682038834951455, "loss": 0.5602, "step": 3036 }, { "epoch": 1.226655219433362, "grad_norm": 0.486328125, "learning_rate": 0.00022679611650485435, "loss": 0.6953, "step": 3037 }, { "epoch": 1.2270592394323518, "grad_norm": 0.5390625, "learning_rate": 0.00022677184466019415, "loss": 0.6006, "step": 3038 }, { "epoch": 1.227463259431342, "grad_norm": 0.47265625, "learning_rate": 0.00022674757281553398, "loss": 0.6344, "step": 3039 }, { "epoch": 1.2278672794303318, "grad_norm": 0.56640625, "learning_rate": 0.00022672330097087376, "loss": 0.6611, "step": 3040 }, { "epoch": 1.2282712994293217, "grad_norm": 0.4765625, "learning_rate": 0.00022669902912621356, "loss": 0.5959, "step": 3041 }, { "epoch": 1.2286753194283118, "grad_norm": 0.4765625, "learning_rate": 0.0002266747572815534, "loss": 0.5891, "step": 3042 }, { "epoch": 1.2290793394273016, "grad_norm": 0.5078125, "learning_rate": 0.0002266504854368932, "loss": 0.6764, "step": 3043 }, { "epoch": 1.2294833594262915, "grad_norm": 0.5078125, "learning_rate": 0.00022662621359223297, "loss": 0.6652, "step": 3044 }, { "epoch": 1.2298873794252816, "grad_norm": 0.48828125, "learning_rate": 0.0002266019417475728, "loss": 0.5992, "step": 3045 }, { "epoch": 1.2302913994242715, "grad_norm": 0.51171875, "learning_rate": 0.0002265776699029126, "loss": 0.634, "step": 3046 }, { "epoch": 1.2306954194232613, "grad_norm": 0.5546875, "learning_rate": 0.00022655339805825243, "loss": 0.6697, "step": 3047 }, { "epoch": 1.2310994394222514, "grad_norm": 0.494140625, "learning_rate": 0.0002265291262135922, "loss": 0.6413, "step": 3048 }, { "epoch": 1.2315034594212413, "grad_norm": 0.5078125, "learning_rate": 0.000226504854368932, "loss": 0.5343, "step": 3049 }, { "epoch": 1.2319074794202314, "grad_norm": 0.50390625, "learning_rate": 0.00022648058252427183, "loss": 0.6335, "step": 3050 }, { "epoch": 1.2323114994192212, "grad_norm": 0.5703125, "learning_rate": 0.00022645631067961164, "loss": 0.638, "step": 3051 }, { "epoch": 1.2327155194182111, "grad_norm": 0.65625, "learning_rate": 0.0002264320388349514, "loss": 0.6709, "step": 3052 }, { "epoch": 1.2331195394172012, "grad_norm": 0.53125, "learning_rate": 0.00022640776699029124, "loss": 0.6678, "step": 3053 }, { "epoch": 1.233523559416191, "grad_norm": 0.51171875, "learning_rate": 0.00022638349514563104, "loss": 0.5775, "step": 3054 }, { "epoch": 1.2339275794151812, "grad_norm": 0.53515625, "learning_rate": 0.00022635922330097085, "loss": 0.6152, "step": 3055 }, { "epoch": 1.234331599414171, "grad_norm": 0.55078125, "learning_rate": 0.00022633495145631068, "loss": 0.6511, "step": 3056 }, { "epoch": 1.234735619413161, "grad_norm": 0.63671875, "learning_rate": 0.00022631067961165045, "loss": 0.6967, "step": 3057 }, { "epoch": 1.235139639412151, "grad_norm": 0.490234375, "learning_rate": 0.00022628640776699028, "loss": 0.6, "step": 3058 }, { "epoch": 1.2355436594111409, "grad_norm": 0.50390625, "learning_rate": 0.00022626213592233008, "loss": 0.6334, "step": 3059 }, { "epoch": 1.2359476794101307, "grad_norm": 0.5078125, "learning_rate": 0.00022623786407766988, "loss": 0.5561, "step": 3060 }, { "epoch": 1.2363516994091208, "grad_norm": 0.5078125, "learning_rate": 0.0002262135922330097, "loss": 0.6726, "step": 3061 }, { "epoch": 1.2367557194081107, "grad_norm": 0.5, "learning_rate": 0.0002261893203883495, "loss": 0.5927, "step": 3062 }, { "epoch": 1.2371597394071006, "grad_norm": 0.427734375, "learning_rate": 0.0002261650485436893, "loss": 0.5507, "step": 3063 }, { "epoch": 1.2375637594060906, "grad_norm": 0.50390625, "learning_rate": 0.00022614077669902912, "loss": 0.5499, "step": 3064 }, { "epoch": 1.2379677794050805, "grad_norm": 0.443359375, "learning_rate": 0.0002261165048543689, "loss": 0.6028, "step": 3065 }, { "epoch": 1.2383717994040704, "grad_norm": 0.58984375, "learning_rate": 0.00022609223300970873, "loss": 0.6535, "step": 3066 }, { "epoch": 1.2387758194030605, "grad_norm": 0.54296875, "learning_rate": 0.00022606796116504853, "loss": 0.6215, "step": 3067 }, { "epoch": 1.2391798394020503, "grad_norm": 0.86328125, "learning_rate": 0.00022604368932038833, "loss": 0.6988, "step": 3068 }, { "epoch": 1.2395838594010404, "grad_norm": 0.67578125, "learning_rate": 0.00022601941747572816, "loss": 0.8085, "step": 3069 }, { "epoch": 1.2399878794000303, "grad_norm": 0.515625, "learning_rate": 0.00022599514563106794, "loss": 0.6163, "step": 3070 }, { "epoch": 1.2403918993990202, "grad_norm": 0.490234375, "learning_rate": 0.00022597087378640774, "loss": 0.5696, "step": 3071 }, { "epoch": 1.2407959193980103, "grad_norm": 0.65625, "learning_rate": 0.00022594660194174757, "loss": 0.6489, "step": 3072 }, { "epoch": 1.2411999393970001, "grad_norm": 0.55859375, "learning_rate": 0.00022592233009708737, "loss": 0.663, "step": 3073 }, { "epoch": 1.2416039593959902, "grad_norm": 0.578125, "learning_rate": 0.00022589805825242714, "loss": 0.6576, "step": 3074 }, { "epoch": 1.24200797939498, "grad_norm": 0.578125, "learning_rate": 0.00022587378640776697, "loss": 0.6759, "step": 3075 }, { "epoch": 1.24241199939397, "grad_norm": 0.6015625, "learning_rate": 0.00022584951456310678, "loss": 0.7291, "step": 3076 }, { "epoch": 1.24281601939296, "grad_norm": 0.51171875, "learning_rate": 0.0002258252427184466, "loss": 0.5859, "step": 3077 }, { "epoch": 1.24322003939195, "grad_norm": 0.703125, "learning_rate": 0.00022580097087378638, "loss": 0.7104, "step": 3078 }, { "epoch": 1.2436240593909398, "grad_norm": 0.474609375, "learning_rate": 0.00022577669902912618, "loss": 0.6344, "step": 3079 }, { "epoch": 1.2440280793899299, "grad_norm": 0.4765625, "learning_rate": 0.000225752427184466, "loss": 0.5929, "step": 3080 }, { "epoch": 1.2444320993889197, "grad_norm": 0.53515625, "learning_rate": 0.00022572815533980582, "loss": 0.6556, "step": 3081 }, { "epoch": 1.2448361193879096, "grad_norm": 0.5546875, "learning_rate": 0.0002257038834951456, "loss": 0.6159, "step": 3082 }, { "epoch": 1.2452401393868997, "grad_norm": 0.55859375, "learning_rate": 0.00022567961165048542, "loss": 0.6185, "step": 3083 }, { "epoch": 1.2456441593858896, "grad_norm": 0.53515625, "learning_rate": 0.00022565533980582522, "loss": 0.5967, "step": 3084 }, { "epoch": 1.2460481793848794, "grad_norm": 0.58984375, "learning_rate": 0.00022563106796116502, "loss": 0.6162, "step": 3085 }, { "epoch": 1.2464521993838695, "grad_norm": 0.546875, "learning_rate": 0.00022560679611650483, "loss": 0.6039, "step": 3086 }, { "epoch": 1.2468562193828594, "grad_norm": 0.625, "learning_rate": 0.00022558252427184463, "loss": 0.7004, "step": 3087 }, { "epoch": 1.2472602393818495, "grad_norm": 0.6796875, "learning_rate": 0.00022555825242718446, "loss": 0.6924, "step": 3088 }, { "epoch": 1.2476642593808394, "grad_norm": 0.5234375, "learning_rate": 0.00022553398058252426, "loss": 0.6197, "step": 3089 }, { "epoch": 1.2480682793798292, "grad_norm": 0.51953125, "learning_rate": 0.00022550970873786404, "loss": 0.6292, "step": 3090 }, { "epoch": 1.2484722993788193, "grad_norm": 0.5078125, "learning_rate": 0.00022548543689320387, "loss": 0.5759, "step": 3091 }, { "epoch": 1.2488763193778092, "grad_norm": 0.5703125, "learning_rate": 0.00022546116504854367, "loss": 0.681, "step": 3092 }, { "epoch": 1.2492803393767993, "grad_norm": 0.55078125, "learning_rate": 0.00022543689320388347, "loss": 0.6294, "step": 3093 }, { "epoch": 1.2496843593757891, "grad_norm": 0.5078125, "learning_rate": 0.0002254126213592233, "loss": 0.6519, "step": 3094 }, { "epoch": 1.250088379374779, "grad_norm": 0.4921875, "learning_rate": 0.00022538834951456308, "loss": 0.658, "step": 3095 }, { "epoch": 1.250492399373769, "grad_norm": 0.48828125, "learning_rate": 0.0002253640776699029, "loss": 0.6487, "step": 3096 }, { "epoch": 1.250896419372759, "grad_norm": 0.5546875, "learning_rate": 0.0002253398058252427, "loss": 0.5974, "step": 3097 }, { "epoch": 1.2513004393717488, "grad_norm": 0.51171875, "learning_rate": 0.0002253155339805825, "loss": 0.6589, "step": 3098 }, { "epoch": 1.251704459370739, "grad_norm": 0.5703125, "learning_rate": 0.0002252912621359223, "loss": 0.695, "step": 3099 }, { "epoch": 1.2521084793697288, "grad_norm": 0.5625, "learning_rate": 0.00022526699029126211, "loss": 0.6701, "step": 3100 }, { "epoch": 1.2525124993687187, "grad_norm": 0.5546875, "learning_rate": 0.00022524271844660192, "loss": 0.6291, "step": 3101 }, { "epoch": 1.2529165193677088, "grad_norm": 0.5546875, "learning_rate": 0.00022521844660194175, "loss": 0.5957, "step": 3102 }, { "epoch": 1.2533205393666986, "grad_norm": 0.54296875, "learning_rate": 0.00022519417475728152, "loss": 0.5809, "step": 3103 }, { "epoch": 1.2537245593656885, "grad_norm": 0.56640625, "learning_rate": 0.00022516990291262132, "loss": 0.6652, "step": 3104 }, { "epoch": 1.2541285793646786, "grad_norm": 0.5625, "learning_rate": 0.00022514563106796115, "loss": 0.6433, "step": 3105 }, { "epoch": 1.2545325993636685, "grad_norm": 0.65625, "learning_rate": 0.00022512135922330096, "loss": 0.6678, "step": 3106 }, { "epoch": 1.2549366193626583, "grad_norm": 0.4765625, "learning_rate": 0.00022509708737864078, "loss": 0.5966, "step": 3107 }, { "epoch": 1.2553406393616484, "grad_norm": 0.65625, "learning_rate": 0.00022507281553398056, "loss": 0.7103, "step": 3108 }, { "epoch": 1.2557446593606383, "grad_norm": 0.58984375, "learning_rate": 0.00022504854368932036, "loss": 0.7071, "step": 3109 }, { "epoch": 1.2561486793596284, "grad_norm": 0.42578125, "learning_rate": 0.0002250242718446602, "loss": 0.5828, "step": 3110 }, { "epoch": 1.2565526993586182, "grad_norm": 0.5234375, "learning_rate": 0.000225, "loss": 0.5748, "step": 3111 }, { "epoch": 1.2569567193576083, "grad_norm": 0.490234375, "learning_rate": 0.00022497572815533977, "loss": 0.5992, "step": 3112 }, { "epoch": 1.2573607393565982, "grad_norm": 0.578125, "learning_rate": 0.0002249514563106796, "loss": 0.6387, "step": 3113 }, { "epoch": 1.257764759355588, "grad_norm": 0.55078125, "learning_rate": 0.0002249271844660194, "loss": 0.6773, "step": 3114 }, { "epoch": 1.2581687793545782, "grad_norm": 0.482421875, "learning_rate": 0.00022490291262135918, "loss": 0.6217, "step": 3115 }, { "epoch": 1.258572799353568, "grad_norm": 0.5078125, "learning_rate": 0.000224878640776699, "loss": 0.6751, "step": 3116 }, { "epoch": 1.258976819352558, "grad_norm": 0.55078125, "learning_rate": 0.0002248543689320388, "loss": 0.6621, "step": 3117 }, { "epoch": 1.259380839351548, "grad_norm": 0.51953125, "learning_rate": 0.00022483009708737864, "loss": 0.6292, "step": 3118 }, { "epoch": 1.2597848593505379, "grad_norm": 0.54296875, "learning_rate": 0.00022480582524271844, "loss": 0.618, "step": 3119 }, { "epoch": 1.2601888793495277, "grad_norm": 0.462890625, "learning_rate": 0.00022478155339805821, "loss": 0.6144, "step": 3120 }, { "epoch": 1.2605928993485178, "grad_norm": 0.53125, "learning_rate": 0.00022475728155339804, "loss": 0.6348, "step": 3121 }, { "epoch": 1.2609969193475077, "grad_norm": 0.5078125, "learning_rate": 0.00022473300970873785, "loss": 0.6406, "step": 3122 }, { "epoch": 1.2614009393464976, "grad_norm": 0.48046875, "learning_rate": 0.00022470873786407765, "loss": 0.5564, "step": 3123 }, { "epoch": 1.2618049593454876, "grad_norm": 0.46484375, "learning_rate": 0.00022468446601941745, "loss": 0.5946, "step": 3124 }, { "epoch": 1.2622089793444775, "grad_norm": 0.51953125, "learning_rate": 0.00022466019417475725, "loss": 0.6334, "step": 3125 }, { "epoch": 1.2626129993434674, "grad_norm": 0.5859375, "learning_rate": 0.00022463592233009706, "loss": 0.6369, "step": 3126 }, { "epoch": 1.2630170193424575, "grad_norm": 0.578125, "learning_rate": 0.00022461165048543689, "loss": 0.6136, "step": 3127 }, { "epoch": 1.2634210393414473, "grad_norm": 0.51171875, "learning_rate": 0.00022458737864077666, "loss": 0.5752, "step": 3128 }, { "epoch": 1.2638250593404374, "grad_norm": 0.50390625, "learning_rate": 0.0002245631067961165, "loss": 0.6628, "step": 3129 }, { "epoch": 1.2642290793394273, "grad_norm": 0.64453125, "learning_rate": 0.0002245388349514563, "loss": 0.7376, "step": 3130 }, { "epoch": 1.2646330993384174, "grad_norm": 0.5390625, "learning_rate": 0.0002245145631067961, "loss": 0.7014, "step": 3131 }, { "epoch": 1.2650371193374073, "grad_norm": 0.61328125, "learning_rate": 0.00022449029126213592, "loss": 0.6512, "step": 3132 }, { "epoch": 1.2654411393363971, "grad_norm": 0.546875, "learning_rate": 0.0002244660194174757, "loss": 0.6708, "step": 3133 }, { "epoch": 1.2658451593353872, "grad_norm": 0.494140625, "learning_rate": 0.0002244417475728155, "loss": 0.6739, "step": 3134 }, { "epoch": 1.266249179334377, "grad_norm": 0.53515625, "learning_rate": 0.00022441747572815533, "loss": 0.7016, "step": 3135 }, { "epoch": 1.266653199333367, "grad_norm": 0.52734375, "learning_rate": 0.00022439320388349513, "loss": 0.6151, "step": 3136 }, { "epoch": 1.267057219332357, "grad_norm": 0.5703125, "learning_rate": 0.00022436893203883494, "loss": 0.6715, "step": 3137 }, { "epoch": 1.267461239331347, "grad_norm": 0.515625, "learning_rate": 0.00022434466019417474, "loss": 0.6529, "step": 3138 }, { "epoch": 1.2678652593303368, "grad_norm": 0.478515625, "learning_rate": 0.00022432038834951454, "loss": 0.6186, "step": 3139 }, { "epoch": 1.2682692793293269, "grad_norm": 0.66796875, "learning_rate": 0.00022429611650485437, "loss": 0.69, "step": 3140 }, { "epoch": 1.2686732993283167, "grad_norm": 0.5078125, "learning_rate": 0.00022427184466019415, "loss": 0.5891, "step": 3141 }, { "epoch": 1.2690773193273066, "grad_norm": 0.52734375, "learning_rate": 0.00022424757281553395, "loss": 0.6042, "step": 3142 }, { "epoch": 1.2694813393262967, "grad_norm": 0.4453125, "learning_rate": 0.00022422330097087378, "loss": 0.534, "step": 3143 }, { "epoch": 1.2698853593252866, "grad_norm": 0.58203125, "learning_rate": 0.00022419902912621358, "loss": 0.7062, "step": 3144 }, { "epoch": 1.2702893793242764, "grad_norm": 0.46875, "learning_rate": 0.00022417475728155335, "loss": 0.5998, "step": 3145 }, { "epoch": 1.2706933993232665, "grad_norm": 0.55859375, "learning_rate": 0.00022415048543689318, "loss": 0.7113, "step": 3146 }, { "epoch": 1.2710974193222564, "grad_norm": 0.474609375, "learning_rate": 0.000224126213592233, "loss": 0.6454, "step": 3147 }, { "epoch": 1.2715014393212465, "grad_norm": 0.6875, "learning_rate": 0.00022410194174757282, "loss": 0.6604, "step": 3148 }, { "epoch": 1.2719054593202364, "grad_norm": 0.62109375, "learning_rate": 0.0002240776699029126, "loss": 0.7094, "step": 3149 }, { "epoch": 1.2723094793192262, "grad_norm": 0.52734375, "learning_rate": 0.0002240533980582524, "loss": 0.5945, "step": 3150 }, { "epoch": 1.2727134993182163, "grad_norm": 0.65234375, "learning_rate": 0.00022402912621359222, "loss": 0.6893, "step": 3151 }, { "epoch": 1.2731175193172062, "grad_norm": 0.59375, "learning_rate": 0.00022400485436893203, "loss": 0.6204, "step": 3152 }, { "epoch": 1.2735215393161963, "grad_norm": 0.53125, "learning_rate": 0.0002239805825242718, "loss": 0.6447, "step": 3153 }, { "epoch": 1.2739255593151861, "grad_norm": 0.53515625, "learning_rate": 0.00022395631067961163, "loss": 0.6172, "step": 3154 }, { "epoch": 1.274329579314176, "grad_norm": 0.546875, "learning_rate": 0.00022393203883495143, "loss": 0.6606, "step": 3155 }, { "epoch": 1.274733599313166, "grad_norm": 0.55078125, "learning_rate": 0.00022390776699029123, "loss": 0.5303, "step": 3156 }, { "epoch": 1.275137619312156, "grad_norm": 0.5546875, "learning_rate": 0.00022388349514563106, "loss": 0.6354, "step": 3157 }, { "epoch": 1.2755416393111458, "grad_norm": 0.55859375, "learning_rate": 0.00022385922330097084, "loss": 0.6351, "step": 3158 }, { "epoch": 1.275945659310136, "grad_norm": 0.5390625, "learning_rate": 0.00022383495145631067, "loss": 0.6659, "step": 3159 }, { "epoch": 1.2763496793091258, "grad_norm": 0.5703125, "learning_rate": 0.00022381067961165047, "loss": 0.6879, "step": 3160 }, { "epoch": 1.2767536993081157, "grad_norm": 0.5390625, "learning_rate": 0.00022378640776699027, "loss": 0.6865, "step": 3161 }, { "epoch": 1.2771577193071058, "grad_norm": 0.5234375, "learning_rate": 0.00022376213592233008, "loss": 0.5823, "step": 3162 }, { "epoch": 1.2775617393060956, "grad_norm": 0.58984375, "learning_rate": 0.00022373786407766988, "loss": 0.6159, "step": 3163 }, { "epoch": 1.2779657593050855, "grad_norm": 0.55859375, "learning_rate": 0.00022371359223300968, "loss": 0.6944, "step": 3164 }, { "epoch": 1.2783697793040756, "grad_norm": 0.59375, "learning_rate": 0.0002236893203883495, "loss": 0.6033, "step": 3165 }, { "epoch": 1.2787737993030655, "grad_norm": 0.5078125, "learning_rate": 0.00022366504854368929, "loss": 0.651, "step": 3166 }, { "epoch": 1.2791778193020553, "grad_norm": 0.5546875, "learning_rate": 0.00022364077669902911, "loss": 0.6198, "step": 3167 }, { "epoch": 1.2795818393010454, "grad_norm": 0.57421875, "learning_rate": 0.00022361650485436892, "loss": 0.6544, "step": 3168 }, { "epoch": 1.2799858593000353, "grad_norm": 0.5078125, "learning_rate": 0.00022359223300970872, "loss": 0.5368, "step": 3169 }, { "epoch": 1.2803898792990254, "grad_norm": 0.51171875, "learning_rate": 0.00022356796116504855, "loss": 0.6065, "step": 3170 }, { "epoch": 1.2807938992980152, "grad_norm": 0.58203125, "learning_rate": 0.00022354368932038832, "loss": 0.6685, "step": 3171 }, { "epoch": 1.2811979192970053, "grad_norm": 0.56640625, "learning_rate": 0.00022351941747572813, "loss": 0.6893, "step": 3172 }, { "epoch": 1.2816019392959952, "grad_norm": 0.498046875, "learning_rate": 0.00022349514563106796, "loss": 0.6508, "step": 3173 }, { "epoch": 1.282005959294985, "grad_norm": 0.625, "learning_rate": 0.00022347087378640776, "loss": 0.6408, "step": 3174 }, { "epoch": 1.2824099792939752, "grad_norm": 0.56640625, "learning_rate": 0.00022344660194174753, "loss": 0.6613, "step": 3175 }, { "epoch": 1.282813999292965, "grad_norm": 0.54296875, "learning_rate": 0.00022342233009708736, "loss": 0.6147, "step": 3176 }, { "epoch": 1.283218019291955, "grad_norm": 0.55078125, "learning_rate": 0.00022339805825242717, "loss": 0.6672, "step": 3177 }, { "epoch": 1.283622039290945, "grad_norm": 0.71875, "learning_rate": 0.000223373786407767, "loss": 0.8321, "step": 3178 }, { "epoch": 1.2840260592899349, "grad_norm": 0.51953125, "learning_rate": 0.00022334951456310677, "loss": 0.5539, "step": 3179 }, { "epoch": 1.2844300792889247, "grad_norm": 0.5625, "learning_rate": 0.00022332524271844657, "loss": 0.6721, "step": 3180 }, { "epoch": 1.2848340992879148, "grad_norm": 0.5703125, "learning_rate": 0.0002233009708737864, "loss": 0.6923, "step": 3181 }, { "epoch": 1.2852381192869047, "grad_norm": 0.6875, "learning_rate": 0.0002232766990291262, "loss": 0.6967, "step": 3182 }, { "epoch": 1.2856421392858945, "grad_norm": 0.4921875, "learning_rate": 0.00022325242718446598, "loss": 0.6017, "step": 3183 }, { "epoch": 1.2860461592848846, "grad_norm": 0.490234375, "learning_rate": 0.0002232281553398058, "loss": 0.6246, "step": 3184 }, { "epoch": 1.2864501792838745, "grad_norm": 0.474609375, "learning_rate": 0.0002232038834951456, "loss": 0.6025, "step": 3185 }, { "epoch": 1.2868541992828644, "grad_norm": 0.58203125, "learning_rate": 0.0002231796116504854, "loss": 0.6774, "step": 3186 }, { "epoch": 1.2872582192818545, "grad_norm": 0.5234375, "learning_rate": 0.00022315533980582522, "loss": 0.6025, "step": 3187 }, { "epoch": 1.2876622392808443, "grad_norm": 0.466796875, "learning_rate": 0.00022313106796116502, "loss": 0.5127, "step": 3188 }, { "epoch": 1.2880662592798344, "grad_norm": 0.484375, "learning_rate": 0.00022310679611650485, "loss": 0.6149, "step": 3189 }, { "epoch": 1.2884702792788243, "grad_norm": 0.453125, "learning_rate": 0.00022308252427184465, "loss": 0.592, "step": 3190 }, { "epoch": 1.2888742992778144, "grad_norm": 0.458984375, "learning_rate": 0.00022305825242718443, "loss": 0.6005, "step": 3191 }, { "epoch": 1.2892783192768043, "grad_norm": 0.5234375, "learning_rate": 0.00022303398058252425, "loss": 0.5909, "step": 3192 }, { "epoch": 1.2896823392757941, "grad_norm": 0.53515625, "learning_rate": 0.00022300970873786406, "loss": 0.7299, "step": 3193 }, { "epoch": 1.2900863592747842, "grad_norm": 0.64453125, "learning_rate": 0.00022298543689320386, "loss": 0.5746, "step": 3194 }, { "epoch": 1.290490379273774, "grad_norm": 0.51953125, "learning_rate": 0.0002229611650485437, "loss": 0.6323, "step": 3195 }, { "epoch": 1.290894399272764, "grad_norm": 0.5234375, "learning_rate": 0.00022293689320388346, "loss": 0.6349, "step": 3196 }, { "epoch": 1.291298419271754, "grad_norm": 0.54296875, "learning_rate": 0.0002229126213592233, "loss": 0.5936, "step": 3197 }, { "epoch": 1.291702439270744, "grad_norm": 0.49609375, "learning_rate": 0.0002228883495145631, "loss": 0.569, "step": 3198 }, { "epoch": 1.2921064592697338, "grad_norm": 0.625, "learning_rate": 0.0002228640776699029, "loss": 0.6698, "step": 3199 }, { "epoch": 1.2925104792687239, "grad_norm": 0.83984375, "learning_rate": 0.0002228398058252427, "loss": 0.7301, "step": 3200 }, { "epoch": 1.2929144992677137, "grad_norm": 0.52734375, "learning_rate": 0.0002228155339805825, "loss": 0.6626, "step": 3201 }, { "epoch": 1.2933185192667036, "grad_norm": 0.58203125, "learning_rate": 0.0002227912621359223, "loss": 0.6999, "step": 3202 }, { "epoch": 1.2937225392656937, "grad_norm": 0.48046875, "learning_rate": 0.00022276699029126213, "loss": 0.6062, "step": 3203 }, { "epoch": 1.2941265592646836, "grad_norm": 0.6328125, "learning_rate": 0.0002227427184466019, "loss": 0.6995, "step": 3204 }, { "epoch": 1.2945305792636734, "grad_norm": 0.5390625, "learning_rate": 0.0002227184466019417, "loss": 0.6171, "step": 3205 }, { "epoch": 1.2949345992626635, "grad_norm": 0.515625, "learning_rate": 0.00022269417475728154, "loss": 0.6382, "step": 3206 }, { "epoch": 1.2953386192616534, "grad_norm": 0.6171875, "learning_rate": 0.00022266990291262134, "loss": 0.6154, "step": 3207 }, { "epoch": 1.2957426392606435, "grad_norm": 0.53515625, "learning_rate": 0.00022264563106796117, "loss": 0.6279, "step": 3208 }, { "epoch": 1.2961466592596333, "grad_norm": 0.474609375, "learning_rate": 0.00022262135922330095, "loss": 0.6338, "step": 3209 }, { "epoch": 1.2965506792586234, "grad_norm": 0.5546875, "learning_rate": 0.00022259708737864075, "loss": 0.6783, "step": 3210 }, { "epoch": 1.2969546992576133, "grad_norm": 0.51953125, "learning_rate": 0.00022257281553398058, "loss": 0.6556, "step": 3211 }, { "epoch": 1.2973587192566032, "grad_norm": 0.6640625, "learning_rate": 0.00022254854368932036, "loss": 0.7159, "step": 3212 }, { "epoch": 1.2977627392555933, "grad_norm": 0.45703125, "learning_rate": 0.00022252427184466016, "loss": 0.6505, "step": 3213 }, { "epoch": 1.2981667592545831, "grad_norm": 0.53515625, "learning_rate": 0.0002225, "loss": 0.6939, "step": 3214 }, { "epoch": 1.298570779253573, "grad_norm": 0.5390625, "learning_rate": 0.0002224757281553398, "loss": 0.6395, "step": 3215 }, { "epoch": 1.298974799252563, "grad_norm": 0.484375, "learning_rate": 0.00022245145631067956, "loss": 0.6565, "step": 3216 }, { "epoch": 1.299378819251553, "grad_norm": 0.55859375, "learning_rate": 0.0002224271844660194, "loss": 0.6171, "step": 3217 }, { "epoch": 1.2997828392505428, "grad_norm": 0.42578125, "learning_rate": 0.0002224029126213592, "loss": 0.5633, "step": 3218 }, { "epoch": 1.300186859249533, "grad_norm": 0.56640625, "learning_rate": 0.00022237864077669903, "loss": 0.7106, "step": 3219 }, { "epoch": 1.3005908792485228, "grad_norm": 0.5234375, "learning_rate": 0.00022235436893203883, "loss": 0.6676, "step": 3220 }, { "epoch": 1.3009948992475127, "grad_norm": 0.48046875, "learning_rate": 0.0002223300970873786, "loss": 0.5903, "step": 3221 }, { "epoch": 1.3013989192465028, "grad_norm": 0.55859375, "learning_rate": 0.00022230582524271843, "loss": 0.6586, "step": 3222 }, { "epoch": 1.3018029392454926, "grad_norm": 0.5625, "learning_rate": 0.00022228155339805824, "loss": 0.6452, "step": 3223 }, { "epoch": 1.3022069592444825, "grad_norm": 0.68359375, "learning_rate": 0.00022225728155339804, "loss": 0.6641, "step": 3224 }, { "epoch": 1.3026109792434726, "grad_norm": 0.50390625, "learning_rate": 0.00022223300970873784, "loss": 0.6946, "step": 3225 }, { "epoch": 1.3030149992424624, "grad_norm": 0.5390625, "learning_rate": 0.00022220873786407764, "loss": 0.5461, "step": 3226 }, { "epoch": 1.3034190192414523, "grad_norm": 0.55078125, "learning_rate": 0.00022218446601941744, "loss": 0.5739, "step": 3227 }, { "epoch": 1.3038230392404424, "grad_norm": 0.50390625, "learning_rate": 0.00022216019417475727, "loss": 0.6292, "step": 3228 }, { "epoch": 1.3042270592394323, "grad_norm": 0.56640625, "learning_rate": 0.00022213592233009705, "loss": 0.6211, "step": 3229 }, { "epoch": 1.3046310792384224, "grad_norm": 0.65234375, "learning_rate": 0.00022211165048543688, "loss": 0.5906, "step": 3230 }, { "epoch": 1.3050350992374122, "grad_norm": 0.57421875, "learning_rate": 0.00022208737864077668, "loss": 0.6712, "step": 3231 }, { "epoch": 1.3054391192364023, "grad_norm": 0.45703125, "learning_rate": 0.00022206310679611648, "loss": 0.6, "step": 3232 }, { "epoch": 1.3058431392353922, "grad_norm": 0.43359375, "learning_rate": 0.0002220388349514563, "loss": 0.5944, "step": 3233 }, { "epoch": 1.306247159234382, "grad_norm": 0.625, "learning_rate": 0.0002220145631067961, "loss": 0.6864, "step": 3234 }, { "epoch": 1.3066511792333722, "grad_norm": 0.44921875, "learning_rate": 0.0002219902912621359, "loss": 0.5949, "step": 3235 }, { "epoch": 1.307055199232362, "grad_norm": 0.49609375, "learning_rate": 0.00022196601941747572, "loss": 0.5269, "step": 3236 }, { "epoch": 1.3074592192313519, "grad_norm": 0.5546875, "learning_rate": 0.00022194174757281552, "loss": 0.6366, "step": 3237 }, { "epoch": 1.307863239230342, "grad_norm": 0.46875, "learning_rate": 0.00022191747572815532, "loss": 0.6068, "step": 3238 }, { "epoch": 1.3082672592293318, "grad_norm": 0.5078125, "learning_rate": 0.00022189320388349513, "loss": 0.5918, "step": 3239 }, { "epoch": 1.3086712792283217, "grad_norm": 0.5625, "learning_rate": 0.00022186893203883493, "loss": 0.5518, "step": 3240 }, { "epoch": 1.3090752992273118, "grad_norm": 0.466796875, "learning_rate": 0.00022184466019417476, "loss": 0.5655, "step": 3241 }, { "epoch": 1.3094793192263017, "grad_norm": 0.51953125, "learning_rate": 0.00022182038834951453, "loss": 0.623, "step": 3242 }, { "epoch": 1.3098833392252915, "grad_norm": 0.578125, "learning_rate": 0.00022179611650485434, "loss": 0.6746, "step": 3243 }, { "epoch": 1.3102873592242816, "grad_norm": 0.59375, "learning_rate": 0.00022177184466019417, "loss": 0.6491, "step": 3244 }, { "epoch": 1.3106913792232715, "grad_norm": 0.55859375, "learning_rate": 0.00022174757281553397, "loss": 0.697, "step": 3245 }, { "epoch": 1.3110953992222614, "grad_norm": 0.625, "learning_rate": 0.00022172330097087374, "loss": 0.6655, "step": 3246 }, { "epoch": 1.3114994192212515, "grad_norm": 0.48828125, "learning_rate": 0.00022169902912621357, "loss": 0.5603, "step": 3247 }, { "epoch": 1.3119034392202413, "grad_norm": 0.54296875, "learning_rate": 0.00022167475728155338, "loss": 0.6264, "step": 3248 }, { "epoch": 1.3123074592192314, "grad_norm": 0.58984375, "learning_rate": 0.0002216504854368932, "loss": 0.6287, "step": 3249 }, { "epoch": 1.3127114792182213, "grad_norm": 0.5859375, "learning_rate": 0.00022162621359223298, "loss": 0.6727, "step": 3250 }, { "epoch": 1.3131154992172114, "grad_norm": 0.4765625, "learning_rate": 0.00022160194174757278, "loss": 0.6166, "step": 3251 }, { "epoch": 1.3135195192162012, "grad_norm": 0.494140625, "learning_rate": 0.0002215776699029126, "loss": 0.5569, "step": 3252 }, { "epoch": 1.3139235392151911, "grad_norm": 0.470703125, "learning_rate": 0.00022155339805825241, "loss": 0.6009, "step": 3253 }, { "epoch": 1.3143275592141812, "grad_norm": 0.51171875, "learning_rate": 0.0002215291262135922, "loss": 0.6322, "step": 3254 }, { "epoch": 1.314731579213171, "grad_norm": 0.5390625, "learning_rate": 0.00022150485436893202, "loss": 0.6309, "step": 3255 }, { "epoch": 1.315135599212161, "grad_norm": 0.55859375, "learning_rate": 0.00022148058252427182, "loss": 0.6345, "step": 3256 }, { "epoch": 1.315539619211151, "grad_norm": 0.5703125, "learning_rate": 0.00022145631067961162, "loss": 0.6115, "step": 3257 }, { "epoch": 1.315943639210141, "grad_norm": 0.6875, "learning_rate": 0.00022143203883495145, "loss": 0.716, "step": 3258 }, { "epoch": 1.3163476592091308, "grad_norm": 27.625, "learning_rate": 0.00022140776699029123, "loss": 0.7329, "step": 3259 }, { "epoch": 1.3167516792081209, "grad_norm": 0.5078125, "learning_rate": 0.00022138349514563106, "loss": 0.6029, "step": 3260 }, { "epoch": 1.3171556992071107, "grad_norm": 0.6328125, "learning_rate": 0.00022135922330097086, "loss": 0.6529, "step": 3261 }, { "epoch": 1.3175597192061006, "grad_norm": 0.51171875, "learning_rate": 0.00022133495145631066, "loss": 0.657, "step": 3262 }, { "epoch": 1.3179637392050907, "grad_norm": 0.54296875, "learning_rate": 0.00022131067961165046, "loss": 0.5948, "step": 3263 }, { "epoch": 1.3183677592040806, "grad_norm": 0.470703125, "learning_rate": 0.00022128640776699027, "loss": 0.5842, "step": 3264 }, { "epoch": 1.3187717792030704, "grad_norm": 0.62890625, "learning_rate": 0.00022126213592233007, "loss": 0.6079, "step": 3265 }, { "epoch": 1.3191757992020605, "grad_norm": 0.546875, "learning_rate": 0.0002212378640776699, "loss": 0.6164, "step": 3266 }, { "epoch": 1.3195798192010504, "grad_norm": 0.59375, "learning_rate": 0.00022121359223300967, "loss": 0.6987, "step": 3267 }, { "epoch": 1.3199838392000405, "grad_norm": 0.41796875, "learning_rate": 0.0002211893203883495, "loss": 0.5473, "step": 3268 }, { "epoch": 1.3203878591990303, "grad_norm": 0.578125, "learning_rate": 0.0002211650485436893, "loss": 0.6653, "step": 3269 }, { "epoch": 1.3207918791980204, "grad_norm": 0.4765625, "learning_rate": 0.0002211407766990291, "loss": 0.6489, "step": 3270 }, { "epoch": 1.3211958991970103, "grad_norm": 0.5234375, "learning_rate": 0.00022111650485436894, "loss": 0.5801, "step": 3271 }, { "epoch": 1.3215999191960002, "grad_norm": 0.56640625, "learning_rate": 0.0002210922330097087, "loss": 0.5383, "step": 3272 }, { "epoch": 1.3220039391949903, "grad_norm": 0.57421875, "learning_rate": 0.00022106796116504852, "loss": 0.6694, "step": 3273 }, { "epoch": 1.3224079591939801, "grad_norm": 0.65234375, "learning_rate": 0.00022104368932038834, "loss": 0.7713, "step": 3274 }, { "epoch": 1.32281197919297, "grad_norm": 0.5390625, "learning_rate": 0.00022101941747572812, "loss": 0.6518, "step": 3275 }, { "epoch": 1.32321599919196, "grad_norm": 0.6015625, "learning_rate": 0.00022099514563106792, "loss": 0.6441, "step": 3276 }, { "epoch": 1.32362001919095, "grad_norm": 0.5390625, "learning_rate": 0.00022097087378640775, "loss": 0.59, "step": 3277 }, { "epoch": 1.3240240391899398, "grad_norm": 0.51171875, "learning_rate": 0.00022094660194174755, "loss": 0.6417, "step": 3278 }, { "epoch": 1.32442805918893, "grad_norm": 0.66015625, "learning_rate": 0.00022092233009708738, "loss": 0.6957, "step": 3279 }, { "epoch": 1.3248320791879198, "grad_norm": 0.6875, "learning_rate": 0.00022089805825242716, "loss": 0.66, "step": 3280 }, { "epoch": 1.3252360991869097, "grad_norm": 0.703125, "learning_rate": 0.00022087378640776696, "loss": 0.6415, "step": 3281 }, { "epoch": 1.3256401191858997, "grad_norm": 0.52734375, "learning_rate": 0.0002208495145631068, "loss": 0.6608, "step": 3282 }, { "epoch": 1.3260441391848896, "grad_norm": 0.625, "learning_rate": 0.0002208252427184466, "loss": 0.7031, "step": 3283 }, { "epoch": 1.3264481591838795, "grad_norm": 0.5546875, "learning_rate": 0.00022080097087378637, "loss": 0.6195, "step": 3284 }, { "epoch": 1.3268521791828696, "grad_norm": 0.60546875, "learning_rate": 0.0002207766990291262, "loss": 0.5775, "step": 3285 }, { "epoch": 1.3272561991818594, "grad_norm": 0.52734375, "learning_rate": 0.000220752427184466, "loss": 0.6764, "step": 3286 }, { "epoch": 1.3276602191808495, "grad_norm": 0.51171875, "learning_rate": 0.0002207281553398058, "loss": 0.5957, "step": 3287 }, { "epoch": 1.3280642391798394, "grad_norm": 0.515625, "learning_rate": 0.0002207038834951456, "loss": 0.6212, "step": 3288 }, { "epoch": 1.3284682591788293, "grad_norm": 48.25, "learning_rate": 0.0002206796116504854, "loss": 1.4445, "step": 3289 }, { "epoch": 1.3288722791778194, "grad_norm": 0.60546875, "learning_rate": 0.00022065533980582524, "loss": 0.6644, "step": 3290 }, { "epoch": 1.3292762991768092, "grad_norm": 0.54296875, "learning_rate": 0.00022063106796116504, "loss": 0.6775, "step": 3291 }, { "epoch": 1.3296803191757993, "grad_norm": 0.61328125, "learning_rate": 0.00022060679611650481, "loss": 0.7477, "step": 3292 }, { "epoch": 1.3300843391747892, "grad_norm": 0.62109375, "learning_rate": 0.00022058252427184464, "loss": 0.6357, "step": 3293 }, { "epoch": 1.330488359173779, "grad_norm": 0.48828125, "learning_rate": 0.00022055825242718445, "loss": 0.6383, "step": 3294 }, { "epoch": 1.3308923791727691, "grad_norm": 0.478515625, "learning_rate": 0.00022053398058252425, "loss": 0.5942, "step": 3295 }, { "epoch": 1.331296399171759, "grad_norm": 0.66015625, "learning_rate": 0.00022050970873786408, "loss": 0.68, "step": 3296 }, { "epoch": 1.3317004191707489, "grad_norm": 0.53515625, "learning_rate": 0.00022048543689320385, "loss": 0.6227, "step": 3297 }, { "epoch": 1.332104439169739, "grad_norm": 0.578125, "learning_rate": 0.00022046116504854368, "loss": 0.6535, "step": 3298 }, { "epoch": 1.3325084591687288, "grad_norm": 0.45703125, "learning_rate": 0.00022043689320388348, "loss": 0.6057, "step": 3299 }, { "epoch": 1.3329124791677187, "grad_norm": 0.5234375, "learning_rate": 0.0002204126213592233, "loss": 0.6233, "step": 3300 }, { "epoch": 1.3333164991667088, "grad_norm": 0.62109375, "learning_rate": 0.0002203883495145631, "loss": 0.6635, "step": 3301 }, { "epoch": 1.3337205191656987, "grad_norm": 0.671875, "learning_rate": 0.0002203640776699029, "loss": 0.6545, "step": 3302 }, { "epoch": 1.3341245391646885, "grad_norm": 0.5234375, "learning_rate": 0.0002203398058252427, "loss": 0.6351, "step": 3303 }, { "epoch": 1.3345285591636786, "grad_norm": 0.48828125, "learning_rate": 0.00022031553398058252, "loss": 0.5497, "step": 3304 }, { "epoch": 1.3349325791626685, "grad_norm": 0.5546875, "learning_rate": 0.0002202912621359223, "loss": 0.6456, "step": 3305 }, { "epoch": 1.3353365991616584, "grad_norm": 0.53125, "learning_rate": 0.0002202669902912621, "loss": 0.6802, "step": 3306 }, { "epoch": 1.3357406191606485, "grad_norm": 0.60546875, "learning_rate": 0.00022024271844660193, "loss": 0.7348, "step": 3307 }, { "epoch": 1.3361446391596383, "grad_norm": 0.54296875, "learning_rate": 0.00022021844660194173, "loss": 0.6553, "step": 3308 }, { "epoch": 1.3365486591586284, "grad_norm": 0.8671875, "learning_rate": 0.00022019417475728156, "loss": 0.693, "step": 3309 }, { "epoch": 1.3369526791576183, "grad_norm": 0.578125, "learning_rate": 0.00022016990291262134, "loss": 0.6318, "step": 3310 }, { "epoch": 1.3373566991566084, "grad_norm": 0.42578125, "learning_rate": 0.00022014563106796114, "loss": 0.5965, "step": 3311 }, { "epoch": 1.3377607191555982, "grad_norm": 0.55078125, "learning_rate": 0.00022012135922330097, "loss": 0.6854, "step": 3312 }, { "epoch": 1.3381647391545881, "grad_norm": 0.53125, "learning_rate": 0.00022009708737864074, "loss": 0.6571, "step": 3313 }, { "epoch": 1.3385687591535782, "grad_norm": 0.7890625, "learning_rate": 0.00022007281553398055, "loss": 0.7127, "step": 3314 }, { "epoch": 1.338972779152568, "grad_norm": 0.6328125, "learning_rate": 0.00022004854368932038, "loss": 0.6646, "step": 3315 }, { "epoch": 1.339376799151558, "grad_norm": 0.51171875, "learning_rate": 0.00022002427184466018, "loss": 0.6238, "step": 3316 }, { "epoch": 1.339780819150548, "grad_norm": 0.52734375, "learning_rate": 0.00021999999999999995, "loss": 0.6446, "step": 3317 }, { "epoch": 1.340184839149538, "grad_norm": 0.5546875, "learning_rate": 0.00021997572815533978, "loss": 0.6581, "step": 3318 }, { "epoch": 1.3405888591485278, "grad_norm": 4.875, "learning_rate": 0.00021995145631067959, "loss": 1.0206, "step": 3319 }, { "epoch": 1.3409928791475179, "grad_norm": 0.671875, "learning_rate": 0.00021992718446601941, "loss": 0.7179, "step": 3320 }, { "epoch": 1.3413968991465077, "grad_norm": 0.58203125, "learning_rate": 0.00021990291262135922, "loss": 0.6647, "step": 3321 }, { "epoch": 1.3418009191454976, "grad_norm": 0.64453125, "learning_rate": 0.000219878640776699, "loss": 0.6259, "step": 3322 }, { "epoch": 1.3422049391444877, "grad_norm": 0.5546875, "learning_rate": 0.00021985436893203882, "loss": 0.6533, "step": 3323 }, { "epoch": 1.3426089591434776, "grad_norm": 0.6171875, "learning_rate": 0.00021983009708737862, "loss": 0.7159, "step": 3324 }, { "epoch": 1.3430129791424674, "grad_norm": 0.5390625, "learning_rate": 0.00021980582524271843, "loss": 0.6158, "step": 3325 }, { "epoch": 1.3434169991414575, "grad_norm": 0.5625, "learning_rate": 0.00021978155339805823, "loss": 0.6724, "step": 3326 }, { "epoch": 1.3438210191404474, "grad_norm": 0.6015625, "learning_rate": 0.00021975728155339803, "loss": 0.6262, "step": 3327 }, { "epoch": 1.3442250391394375, "grad_norm": 0.5078125, "learning_rate": 0.00021973300970873783, "loss": 0.6274, "step": 3328 }, { "epoch": 1.3446290591384273, "grad_norm": 0.56640625, "learning_rate": 0.00021970873786407766, "loss": 0.617, "step": 3329 }, { "epoch": 1.3450330791374174, "grad_norm": 0.482421875, "learning_rate": 0.00021968446601941744, "loss": 0.617, "step": 3330 }, { "epoch": 1.3454370991364073, "grad_norm": 0.578125, "learning_rate": 0.00021966019417475727, "loss": 0.6756, "step": 3331 }, { "epoch": 1.3458411191353972, "grad_norm": 0.55078125, "learning_rate": 0.00021963592233009707, "loss": 0.6644, "step": 3332 }, { "epoch": 1.3462451391343873, "grad_norm": 0.515625, "learning_rate": 0.00021961165048543687, "loss": 0.6016, "step": 3333 }, { "epoch": 1.3466491591333771, "grad_norm": 0.5, "learning_rate": 0.0002195873786407767, "loss": 0.6186, "step": 3334 }, { "epoch": 1.347053179132367, "grad_norm": 0.5546875, "learning_rate": 0.00021956310679611648, "loss": 0.6838, "step": 3335 }, { "epoch": 1.347457199131357, "grad_norm": 0.80859375, "learning_rate": 0.00021953883495145628, "loss": 0.7366, "step": 3336 }, { "epoch": 1.347861219130347, "grad_norm": 0.515625, "learning_rate": 0.0002195145631067961, "loss": 0.6455, "step": 3337 }, { "epoch": 1.3482652391293368, "grad_norm": 0.490234375, "learning_rate": 0.0002194902912621359, "loss": 0.6124, "step": 3338 }, { "epoch": 1.348669259128327, "grad_norm": 0.53515625, "learning_rate": 0.0002194660194174757, "loss": 0.6827, "step": 3339 }, { "epoch": 1.3490732791273168, "grad_norm": 0.59375, "learning_rate": 0.00021944174757281552, "loss": 0.734, "step": 3340 }, { "epoch": 1.3494772991263067, "grad_norm": 0.51953125, "learning_rate": 0.00021941747572815532, "loss": 0.7011, "step": 3341 }, { "epoch": 1.3498813191252967, "grad_norm": 0.59375, "learning_rate": 0.00021939320388349515, "loss": 0.6627, "step": 3342 }, { "epoch": 1.3502853391242866, "grad_norm": 0.53125, "learning_rate": 0.00021936893203883492, "loss": 0.6585, "step": 3343 }, { "epoch": 1.3506893591232765, "grad_norm": 0.478515625, "learning_rate": 0.00021934466019417473, "loss": 0.644, "step": 3344 }, { "epoch": 1.3510933791222666, "grad_norm": 0.490234375, "learning_rate": 0.00021932038834951455, "loss": 0.5599, "step": 3345 }, { "epoch": 1.3514973991212564, "grad_norm": 0.5078125, "learning_rate": 0.00021929611650485436, "loss": 0.6526, "step": 3346 }, { "epoch": 1.3519014191202465, "grad_norm": 0.609375, "learning_rate": 0.00021927184466019413, "loss": 0.6558, "step": 3347 }, { "epoch": 1.3523054391192364, "grad_norm": 0.5703125, "learning_rate": 0.00021924757281553396, "loss": 0.7175, "step": 3348 }, { "epoch": 1.3527094591182265, "grad_norm": 0.61328125, "learning_rate": 0.00021922330097087376, "loss": 0.7383, "step": 3349 }, { "epoch": 1.3531134791172164, "grad_norm": 0.478515625, "learning_rate": 0.0002191990291262136, "loss": 0.5558, "step": 3350 }, { "epoch": 1.3535174991162062, "grad_norm": 0.53515625, "learning_rate": 0.00021917475728155337, "loss": 0.5651, "step": 3351 }, { "epoch": 1.3539215191151963, "grad_norm": 0.546875, "learning_rate": 0.00021915048543689317, "loss": 0.5862, "step": 3352 }, { "epoch": 1.3543255391141862, "grad_norm": 0.55859375, "learning_rate": 0.000219126213592233, "loss": 0.6788, "step": 3353 }, { "epoch": 1.354729559113176, "grad_norm": 0.5859375, "learning_rate": 0.0002191019417475728, "loss": 0.6116, "step": 3354 }, { "epoch": 1.3551335791121661, "grad_norm": 0.55859375, "learning_rate": 0.00021907766990291258, "loss": 0.6822, "step": 3355 }, { "epoch": 1.355537599111156, "grad_norm": 0.59765625, "learning_rate": 0.0002190533980582524, "loss": 0.7149, "step": 3356 }, { "epoch": 1.3559416191101459, "grad_norm": 0.55078125, "learning_rate": 0.0002190291262135922, "loss": 0.612, "step": 3357 }, { "epoch": 1.356345639109136, "grad_norm": 0.4453125, "learning_rate": 0.000219004854368932, "loss": 0.5689, "step": 3358 }, { "epoch": 1.3567496591081258, "grad_norm": 0.62890625, "learning_rate": 0.00021898058252427184, "loss": 0.659, "step": 3359 }, { "epoch": 1.3571536791071157, "grad_norm": 0.46484375, "learning_rate": 0.00021895631067961162, "loss": 0.5643, "step": 3360 }, { "epoch": 1.3575576991061058, "grad_norm": 0.5234375, "learning_rate": 0.00021893203883495145, "loss": 0.5815, "step": 3361 }, { "epoch": 1.3579617191050957, "grad_norm": 0.482421875, "learning_rate": 0.00021890776699029125, "loss": 0.563, "step": 3362 }, { "epoch": 1.3583657391040855, "grad_norm": 0.462890625, "learning_rate": 0.00021888349514563105, "loss": 0.5777, "step": 3363 }, { "epoch": 1.3587697591030756, "grad_norm": 0.66015625, "learning_rate": 0.00021885922330097085, "loss": 0.6447, "step": 3364 }, { "epoch": 1.3591737791020655, "grad_norm": 0.5625, "learning_rate": 0.00021883495145631066, "loss": 0.6807, "step": 3365 }, { "epoch": 1.3595777991010556, "grad_norm": 0.61328125, "learning_rate": 0.00021881067961165046, "loss": 0.7098, "step": 3366 }, { "epoch": 1.3599818191000455, "grad_norm": 0.55078125, "learning_rate": 0.0002187864077669903, "loss": 0.6288, "step": 3367 }, { "epoch": 1.3603858390990353, "grad_norm": 0.46484375, "learning_rate": 0.00021876213592233006, "loss": 0.5953, "step": 3368 }, { "epoch": 1.3607898590980254, "grad_norm": 0.5390625, "learning_rate": 0.0002187378640776699, "loss": 0.6296, "step": 3369 }, { "epoch": 1.3611938790970153, "grad_norm": 0.61328125, "learning_rate": 0.0002187135922330097, "loss": 0.6725, "step": 3370 }, { "epoch": 1.3615978990960054, "grad_norm": 0.5234375, "learning_rate": 0.0002186893203883495, "loss": 0.6248, "step": 3371 }, { "epoch": 1.3620019190949952, "grad_norm": 0.52734375, "learning_rate": 0.00021866504854368933, "loss": 0.6538, "step": 3372 }, { "epoch": 1.362405939093985, "grad_norm": 0.515625, "learning_rate": 0.0002186407766990291, "loss": 0.6246, "step": 3373 }, { "epoch": 1.3628099590929752, "grad_norm": 0.490234375, "learning_rate": 0.0002186165048543689, "loss": 0.5743, "step": 3374 }, { "epoch": 1.363213979091965, "grad_norm": 0.439453125, "learning_rate": 0.00021859223300970873, "loss": 0.587, "step": 3375 }, { "epoch": 1.363617999090955, "grad_norm": 0.59765625, "learning_rate": 0.0002185679611650485, "loss": 0.6488, "step": 3376 }, { "epoch": 1.364022019089945, "grad_norm": 0.7109375, "learning_rate": 0.0002185436893203883, "loss": 0.7139, "step": 3377 }, { "epoch": 1.364426039088935, "grad_norm": 0.45703125, "learning_rate": 0.00021851941747572814, "loss": 0.6657, "step": 3378 }, { "epoch": 1.3648300590879248, "grad_norm": 0.54296875, "learning_rate": 0.00021849514563106794, "loss": 0.5943, "step": 3379 }, { "epoch": 1.3652340790869149, "grad_norm": 0.56640625, "learning_rate": 0.00021847087378640777, "loss": 0.6827, "step": 3380 }, { "epoch": 1.3656380990859047, "grad_norm": 0.5703125, "learning_rate": 0.00021844660194174755, "loss": 0.6146, "step": 3381 }, { "epoch": 1.3660421190848946, "grad_norm": 0.54296875, "learning_rate": 0.00021842233009708735, "loss": 0.6962, "step": 3382 }, { "epoch": 1.3664461390838847, "grad_norm": 0.49609375, "learning_rate": 0.00021839805825242718, "loss": 0.6343, "step": 3383 }, { "epoch": 1.3668501590828745, "grad_norm": 0.53125, "learning_rate": 0.00021837378640776698, "loss": 0.6747, "step": 3384 }, { "epoch": 1.3672541790818644, "grad_norm": 0.52734375, "learning_rate": 0.00021834951456310676, "loss": 0.6608, "step": 3385 }, { "epoch": 1.3676581990808545, "grad_norm": 0.55078125, "learning_rate": 0.00021832524271844659, "loss": 0.5884, "step": 3386 }, { "epoch": 1.3680622190798444, "grad_norm": 0.65625, "learning_rate": 0.0002183009708737864, "loss": 0.6625, "step": 3387 }, { "epoch": 1.3684662390788345, "grad_norm": 0.46875, "learning_rate": 0.0002182766990291262, "loss": 0.5706, "step": 3388 }, { "epoch": 1.3688702590778243, "grad_norm": 0.48046875, "learning_rate": 0.000218252427184466, "loss": 0.6568, "step": 3389 }, { "epoch": 1.3692742790768144, "grad_norm": 0.478515625, "learning_rate": 0.0002182281553398058, "loss": 0.6199, "step": 3390 }, { "epoch": 1.3696782990758043, "grad_norm": 0.5703125, "learning_rate": 0.00021820388349514562, "loss": 0.6724, "step": 3391 }, { "epoch": 1.3700823190747942, "grad_norm": 0.5234375, "learning_rate": 0.00021817961165048543, "loss": 0.6733, "step": 3392 }, { "epoch": 1.3704863390737843, "grad_norm": 0.56640625, "learning_rate": 0.0002181553398058252, "loss": 0.6903, "step": 3393 }, { "epoch": 1.3708903590727741, "grad_norm": 0.4609375, "learning_rate": 0.00021813106796116503, "loss": 0.5983, "step": 3394 }, { "epoch": 1.371294379071764, "grad_norm": 0.671875, "learning_rate": 0.00021810679611650483, "loss": 0.7094, "step": 3395 }, { "epoch": 1.371698399070754, "grad_norm": 0.47265625, "learning_rate": 0.00021808252427184464, "loss": 0.6468, "step": 3396 }, { "epoch": 1.372102419069744, "grad_norm": 0.55859375, "learning_rate": 0.00021805825242718447, "loss": 0.6449, "step": 3397 }, { "epoch": 1.3725064390687338, "grad_norm": 0.431640625, "learning_rate": 0.00021803398058252424, "loss": 0.5523, "step": 3398 }, { "epoch": 1.372910459067724, "grad_norm": 0.6328125, "learning_rate": 0.00021800970873786407, "loss": 0.6244, "step": 3399 }, { "epoch": 1.3733144790667138, "grad_norm": 0.50390625, "learning_rate": 0.00021798543689320387, "loss": 0.6793, "step": 3400 }, { "epoch": 1.3737184990657036, "grad_norm": 0.56640625, "learning_rate": 0.00021796116504854368, "loss": 0.7095, "step": 3401 }, { "epoch": 1.3741225190646937, "grad_norm": 0.59375, "learning_rate": 0.00021793689320388348, "loss": 0.674, "step": 3402 }, { "epoch": 1.3745265390636836, "grad_norm": 0.59765625, "learning_rate": 0.00021791262135922328, "loss": 0.6394, "step": 3403 }, { "epoch": 1.3749305590626735, "grad_norm": 0.4765625, "learning_rate": 0.00021788834951456308, "loss": 0.6218, "step": 3404 }, { "epoch": 1.3753345790616636, "grad_norm": 0.55859375, "learning_rate": 0.0002178640776699029, "loss": 0.67, "step": 3405 }, { "epoch": 1.3757385990606534, "grad_norm": 0.5546875, "learning_rate": 0.0002178398058252427, "loss": 0.6726, "step": 3406 }, { "epoch": 1.3761426190596435, "grad_norm": 0.4765625, "learning_rate": 0.0002178155339805825, "loss": 0.6088, "step": 3407 }, { "epoch": 1.3765466390586334, "grad_norm": 0.515625, "learning_rate": 0.00021779126213592232, "loss": 0.5836, "step": 3408 }, { "epoch": 1.3769506590576235, "grad_norm": 0.56640625, "learning_rate": 0.00021776699029126212, "loss": 0.6087, "step": 3409 }, { "epoch": 1.3773546790566134, "grad_norm": 0.5625, "learning_rate": 0.00021774271844660195, "loss": 0.6519, "step": 3410 }, { "epoch": 1.3777586990556032, "grad_norm": 0.578125, "learning_rate": 0.00021771844660194173, "loss": 0.7228, "step": 3411 }, { "epoch": 1.3781627190545933, "grad_norm": 0.515625, "learning_rate": 0.00021769417475728153, "loss": 0.5863, "step": 3412 }, { "epoch": 1.3785667390535832, "grad_norm": 0.466796875, "learning_rate": 0.00021766990291262136, "loss": 0.6195, "step": 3413 }, { "epoch": 1.378970759052573, "grad_norm": 0.4609375, "learning_rate": 0.00021764563106796113, "loss": 0.6058, "step": 3414 }, { "epoch": 1.3793747790515631, "grad_norm": 0.6015625, "learning_rate": 0.00021762135922330094, "loss": 0.6674, "step": 3415 }, { "epoch": 1.379778799050553, "grad_norm": 0.515625, "learning_rate": 0.00021759708737864076, "loss": 0.6522, "step": 3416 }, { "epoch": 1.3801828190495429, "grad_norm": 0.48046875, "learning_rate": 0.00021757281553398057, "loss": 0.5214, "step": 3417 }, { "epoch": 1.380586839048533, "grad_norm": 0.4765625, "learning_rate": 0.00021754854368932034, "loss": 0.6038, "step": 3418 }, { "epoch": 1.3809908590475228, "grad_norm": 0.62890625, "learning_rate": 0.00021752427184466017, "loss": 0.7276, "step": 3419 }, { "epoch": 1.3813948790465127, "grad_norm": 0.48046875, "learning_rate": 0.00021749999999999997, "loss": 0.6114, "step": 3420 }, { "epoch": 1.3817988990455028, "grad_norm": 0.5625, "learning_rate": 0.0002174757281553398, "loss": 0.667, "step": 3421 }, { "epoch": 1.3822029190444927, "grad_norm": 0.6015625, "learning_rate": 0.0002174514563106796, "loss": 0.6705, "step": 3422 }, { "epoch": 1.3826069390434825, "grad_norm": 0.55859375, "learning_rate": 0.00021742718446601938, "loss": 0.5662, "step": 3423 }, { "epoch": 1.3830109590424726, "grad_norm": 0.52734375, "learning_rate": 0.0002174029126213592, "loss": 0.602, "step": 3424 }, { "epoch": 1.3834149790414625, "grad_norm": 0.5, "learning_rate": 0.000217378640776699, "loss": 0.6181, "step": 3425 }, { "epoch": 1.3838189990404526, "grad_norm": 0.51171875, "learning_rate": 0.00021735436893203882, "loss": 0.5713, "step": 3426 }, { "epoch": 1.3842230190394424, "grad_norm": 0.58203125, "learning_rate": 0.00021733009708737862, "loss": 0.7062, "step": 3427 }, { "epoch": 1.3846270390384325, "grad_norm": 0.498046875, "learning_rate": 0.00021730582524271842, "loss": 0.6063, "step": 3428 }, { "epoch": 1.3850310590374224, "grad_norm": 0.45703125, "learning_rate": 0.00021728155339805822, "loss": 0.6148, "step": 3429 }, { "epoch": 1.3854350790364123, "grad_norm": 0.4765625, "learning_rate": 0.00021725728155339805, "loss": 0.6125, "step": 3430 }, { "epoch": 1.3858390990354024, "grad_norm": 0.4765625, "learning_rate": 0.00021723300970873783, "loss": 0.6041, "step": 3431 }, { "epoch": 1.3862431190343922, "grad_norm": 0.5, "learning_rate": 0.00021720873786407766, "loss": 0.5922, "step": 3432 }, { "epoch": 1.386647139033382, "grad_norm": 0.47265625, "learning_rate": 0.00021718446601941746, "loss": 0.5636, "step": 3433 }, { "epoch": 1.3870511590323722, "grad_norm": 0.56640625, "learning_rate": 0.00021716019417475726, "loss": 0.6363, "step": 3434 }, { "epoch": 1.387455179031362, "grad_norm": 0.6953125, "learning_rate": 0.0002171359223300971, "loss": 0.7088, "step": 3435 }, { "epoch": 1.387859199030352, "grad_norm": 0.578125, "learning_rate": 0.00021711165048543687, "loss": 0.6349, "step": 3436 }, { "epoch": 1.388263219029342, "grad_norm": 0.54296875, "learning_rate": 0.00021708737864077667, "loss": 0.6471, "step": 3437 }, { "epoch": 1.388667239028332, "grad_norm": 0.609375, "learning_rate": 0.0002170631067961165, "loss": 0.6839, "step": 3438 }, { "epoch": 1.3890712590273218, "grad_norm": 0.53125, "learning_rate": 0.00021703883495145627, "loss": 0.6577, "step": 3439 }, { "epoch": 1.3894752790263118, "grad_norm": 0.484375, "learning_rate": 0.0002170145631067961, "loss": 0.5965, "step": 3440 }, { "epoch": 1.3898792990253017, "grad_norm": 0.50390625, "learning_rate": 0.0002169902912621359, "loss": 0.6096, "step": 3441 }, { "epoch": 1.3902833190242916, "grad_norm": 0.7421875, "learning_rate": 0.0002169660194174757, "loss": 0.7264, "step": 3442 }, { "epoch": 1.3906873390232817, "grad_norm": 0.494140625, "learning_rate": 0.00021694174757281554, "loss": 0.6694, "step": 3443 }, { "epoch": 1.3910913590222715, "grad_norm": 0.59765625, "learning_rate": 0.0002169174757281553, "loss": 0.7045, "step": 3444 }, { "epoch": 1.3914953790212614, "grad_norm": 0.56640625, "learning_rate": 0.00021689320388349511, "loss": 0.6181, "step": 3445 }, { "epoch": 1.3918993990202515, "grad_norm": 0.46484375, "learning_rate": 0.00021686893203883494, "loss": 0.623, "step": 3446 }, { "epoch": 1.3923034190192414, "grad_norm": 0.5390625, "learning_rate": 0.00021684466019417475, "loss": 0.6434, "step": 3447 }, { "epoch": 1.3927074390182315, "grad_norm": 0.65625, "learning_rate": 0.00021682038834951452, "loss": 0.7365, "step": 3448 }, { "epoch": 1.3931114590172213, "grad_norm": 0.55078125, "learning_rate": 0.00021679611650485435, "loss": 0.6771, "step": 3449 }, { "epoch": 1.3935154790162114, "grad_norm": 0.59765625, "learning_rate": 0.00021677184466019415, "loss": 0.6924, "step": 3450 }, { "epoch": 1.3939194990152013, "grad_norm": 0.70703125, "learning_rate": 0.00021674757281553398, "loss": 0.7827, "step": 3451 }, { "epoch": 1.3943235190141912, "grad_norm": 0.5078125, "learning_rate": 0.00021672330097087376, "loss": 0.6151, "step": 3452 }, { "epoch": 1.3947275390131813, "grad_norm": 0.5703125, "learning_rate": 0.00021669902912621356, "loss": 0.656, "step": 3453 }, { "epoch": 1.3951315590121711, "grad_norm": 0.51953125, "learning_rate": 0.0002166747572815534, "loss": 0.6115, "step": 3454 }, { "epoch": 1.395535579011161, "grad_norm": 0.53515625, "learning_rate": 0.0002166504854368932, "loss": 0.6358, "step": 3455 }, { "epoch": 1.395939599010151, "grad_norm": 0.68359375, "learning_rate": 0.00021662621359223297, "loss": 0.6226, "step": 3456 }, { "epoch": 1.396343619009141, "grad_norm": 0.57421875, "learning_rate": 0.0002166019417475728, "loss": 0.6212, "step": 3457 }, { "epoch": 1.3967476390081308, "grad_norm": 0.48828125, "learning_rate": 0.0002165776699029126, "loss": 0.6065, "step": 3458 }, { "epoch": 1.397151659007121, "grad_norm": 0.5859375, "learning_rate": 0.0002165533980582524, "loss": 0.6571, "step": 3459 }, { "epoch": 1.3975556790061108, "grad_norm": 0.640625, "learning_rate": 0.00021652912621359223, "loss": 0.7419, "step": 3460 }, { "epoch": 1.3979596990051006, "grad_norm": 0.56640625, "learning_rate": 0.000216504854368932, "loss": 0.6586, "step": 3461 }, { "epoch": 1.3983637190040907, "grad_norm": 0.51171875, "learning_rate": 0.00021648058252427184, "loss": 0.6186, "step": 3462 }, { "epoch": 1.3987677390030806, "grad_norm": 0.5625, "learning_rate": 0.00021645631067961164, "loss": 0.6749, "step": 3463 }, { "epoch": 1.3991717590020705, "grad_norm": 0.52734375, "learning_rate": 0.00021643203883495144, "loss": 0.7164, "step": 3464 }, { "epoch": 1.3995757790010606, "grad_norm": 0.53515625, "learning_rate": 0.00021640776699029124, "loss": 0.6068, "step": 3465 }, { "epoch": 1.3999797990000504, "grad_norm": 0.52734375, "learning_rate": 0.00021638349514563104, "loss": 0.5822, "step": 3466 }, { "epoch": 1.4003838189990405, "grad_norm": 0.55859375, "learning_rate": 0.00021635922330097085, "loss": 0.682, "step": 3467 }, { "epoch": 1.4007878389980304, "grad_norm": 0.51953125, "learning_rate": 0.00021633495145631068, "loss": 0.635, "step": 3468 }, { "epoch": 1.4011918589970205, "grad_norm": 0.59765625, "learning_rate": 0.00021631067961165045, "loss": 0.5917, "step": 3469 }, { "epoch": 1.4015958789960103, "grad_norm": 0.466796875, "learning_rate": 0.00021628640776699028, "loss": 0.6072, "step": 3470 }, { "epoch": 1.4019998989950002, "grad_norm": 0.5234375, "learning_rate": 0.00021626213592233008, "loss": 0.6802, "step": 3471 }, { "epoch": 1.4024039189939903, "grad_norm": 0.6640625, "learning_rate": 0.00021623786407766989, "loss": 0.684, "step": 3472 }, { "epoch": 1.4028079389929802, "grad_norm": 0.57421875, "learning_rate": 0.00021621359223300972, "loss": 0.6759, "step": 3473 }, { "epoch": 1.40321195899197, "grad_norm": 0.51171875, "learning_rate": 0.0002161893203883495, "loss": 0.616, "step": 3474 }, { "epoch": 1.4036159789909601, "grad_norm": 0.55078125, "learning_rate": 0.0002161650485436893, "loss": 0.6577, "step": 3475 }, { "epoch": 1.40401999898995, "grad_norm": 0.5625, "learning_rate": 0.00021614077669902912, "loss": 0.6014, "step": 3476 }, { "epoch": 1.4044240189889399, "grad_norm": 0.53125, "learning_rate": 0.0002161165048543689, "loss": 0.5679, "step": 3477 }, { "epoch": 1.40482803898793, "grad_norm": 0.62109375, "learning_rate": 0.0002160922330097087, "loss": 0.5954, "step": 3478 }, { "epoch": 1.4052320589869198, "grad_norm": 0.5859375, "learning_rate": 0.00021606796116504853, "loss": 0.6689, "step": 3479 }, { "epoch": 1.4056360789859097, "grad_norm": 0.62109375, "learning_rate": 0.00021604368932038833, "loss": 0.6929, "step": 3480 }, { "epoch": 1.4060400989848998, "grad_norm": 0.46484375, "learning_rate": 0.00021601941747572816, "loss": 0.6079, "step": 3481 }, { "epoch": 1.4064441189838897, "grad_norm": 0.482421875, "learning_rate": 0.00021599514563106794, "loss": 0.6067, "step": 3482 }, { "epoch": 1.4068481389828795, "grad_norm": 0.77734375, "learning_rate": 0.00021597087378640774, "loss": 0.5938, "step": 3483 }, { "epoch": 1.4072521589818696, "grad_norm": 0.578125, "learning_rate": 0.00021594660194174757, "loss": 0.6693, "step": 3484 }, { "epoch": 1.4076561789808595, "grad_norm": 0.5546875, "learning_rate": 0.00021592233009708737, "loss": 0.6134, "step": 3485 }, { "epoch": 1.4080601989798496, "grad_norm": 0.51171875, "learning_rate": 0.00021589805825242715, "loss": 0.6228, "step": 3486 }, { "epoch": 1.4084642189788394, "grad_norm": 0.48828125, "learning_rate": 0.00021587378640776697, "loss": 0.5946, "step": 3487 }, { "epoch": 1.4088682389778295, "grad_norm": 0.6484375, "learning_rate": 0.00021584951456310678, "loss": 0.6884, "step": 3488 }, { "epoch": 1.4092722589768194, "grad_norm": 0.5625, "learning_rate": 0.00021582524271844658, "loss": 0.6733, "step": 3489 }, { "epoch": 1.4096762789758093, "grad_norm": 0.5625, "learning_rate": 0.00021580097087378638, "loss": 0.7001, "step": 3490 }, { "epoch": 1.4100802989747994, "grad_norm": 0.47265625, "learning_rate": 0.00021577669902912618, "loss": 0.5942, "step": 3491 }, { "epoch": 1.4104843189737892, "grad_norm": 0.50390625, "learning_rate": 0.00021575242718446601, "loss": 0.6846, "step": 3492 }, { "epoch": 1.410888338972779, "grad_norm": 0.51171875, "learning_rate": 0.00021572815533980582, "loss": 0.6188, "step": 3493 }, { "epoch": 1.4112923589717692, "grad_norm": 0.490234375, "learning_rate": 0.0002157038834951456, "loss": 0.6219, "step": 3494 }, { "epoch": 1.411696378970759, "grad_norm": 0.451171875, "learning_rate": 0.00021567961165048542, "loss": 0.5974, "step": 3495 }, { "epoch": 1.412100398969749, "grad_norm": 0.431640625, "learning_rate": 0.00021565533980582522, "loss": 0.5647, "step": 3496 }, { "epoch": 1.412504418968739, "grad_norm": 0.625, "learning_rate": 0.00021563106796116503, "loss": 0.7005, "step": 3497 }, { "epoch": 1.4129084389677289, "grad_norm": 0.53125, "learning_rate": 0.00021560679611650485, "loss": 0.6336, "step": 3498 }, { "epoch": 1.4133124589667188, "grad_norm": 0.490234375, "learning_rate": 0.00021558252427184463, "loss": 0.6206, "step": 3499 }, { "epoch": 1.4137164789657088, "grad_norm": 0.58984375, "learning_rate": 0.00021555825242718446, "loss": 0.7263, "step": 3500 }, { "epoch": 1.4141204989646987, "grad_norm": 0.5625, "learning_rate": 0.00021553398058252426, "loss": 0.6881, "step": 3501 }, { "epoch": 1.4145245189636886, "grad_norm": 0.50390625, "learning_rate": 0.00021550970873786406, "loss": 0.6101, "step": 3502 }, { "epoch": 1.4149285389626787, "grad_norm": 0.65625, "learning_rate": 0.00021548543689320387, "loss": 0.6201, "step": 3503 }, { "epoch": 1.4153325589616685, "grad_norm": 0.57421875, "learning_rate": 0.00021546116504854367, "loss": 0.6837, "step": 3504 }, { "epoch": 1.4157365789606586, "grad_norm": 0.578125, "learning_rate": 0.00021543689320388347, "loss": 0.641, "step": 3505 }, { "epoch": 1.4161405989596485, "grad_norm": 0.66796875, "learning_rate": 0.0002154126213592233, "loss": 0.6873, "step": 3506 }, { "epoch": 1.4165446189586386, "grad_norm": 0.55078125, "learning_rate": 0.00021538834951456308, "loss": 0.6894, "step": 3507 }, { "epoch": 1.4169486389576285, "grad_norm": 0.44921875, "learning_rate": 0.00021536407766990288, "loss": 0.5937, "step": 3508 }, { "epoch": 1.4173526589566183, "grad_norm": 0.58984375, "learning_rate": 0.0002153398058252427, "loss": 0.6313, "step": 3509 }, { "epoch": 1.4177566789556084, "grad_norm": 0.474609375, "learning_rate": 0.0002153155339805825, "loss": 0.6077, "step": 3510 }, { "epoch": 1.4181606989545983, "grad_norm": 0.53125, "learning_rate": 0.00021529126213592234, "loss": 0.5768, "step": 3511 }, { "epoch": 1.4185647189535882, "grad_norm": 0.484375, "learning_rate": 0.00021526699029126211, "loss": 0.6107, "step": 3512 }, { "epoch": 1.4189687389525782, "grad_norm": 0.48046875, "learning_rate": 0.00021524271844660192, "loss": 0.5473, "step": 3513 }, { "epoch": 1.4193727589515681, "grad_norm": 0.5234375, "learning_rate": 0.00021521844660194175, "loss": 0.6554, "step": 3514 }, { "epoch": 1.419776778950558, "grad_norm": 0.5546875, "learning_rate": 0.00021519417475728152, "loss": 0.6759, "step": 3515 }, { "epoch": 1.420180798949548, "grad_norm": 0.55859375, "learning_rate": 0.00021516990291262132, "loss": 0.6594, "step": 3516 }, { "epoch": 1.420584818948538, "grad_norm": 0.4609375, "learning_rate": 0.00021514563106796115, "loss": 0.5934, "step": 3517 }, { "epoch": 1.4209888389475278, "grad_norm": 0.52734375, "learning_rate": 0.00021512135922330096, "loss": 0.6523, "step": 3518 }, { "epoch": 1.421392858946518, "grad_norm": 0.52734375, "learning_rate": 0.00021509708737864073, "loss": 0.631, "step": 3519 }, { "epoch": 1.4217968789455078, "grad_norm": 0.55078125, "learning_rate": 0.00021507281553398056, "loss": 0.6641, "step": 3520 }, { "epoch": 1.4222008989444976, "grad_norm": 0.60546875, "learning_rate": 0.00021504854368932036, "loss": 0.6439, "step": 3521 }, { "epoch": 1.4226049189434877, "grad_norm": 0.451171875, "learning_rate": 0.0002150242718446602, "loss": 0.5728, "step": 3522 }, { "epoch": 1.4230089389424776, "grad_norm": 0.470703125, "learning_rate": 0.000215, "loss": 0.5853, "step": 3523 }, { "epoch": 1.4234129589414675, "grad_norm": 0.55078125, "learning_rate": 0.00021497572815533977, "loss": 0.6223, "step": 3524 }, { "epoch": 1.4238169789404576, "grad_norm": 0.515625, "learning_rate": 0.0002149514563106796, "loss": 0.6293, "step": 3525 }, { "epoch": 1.4242209989394474, "grad_norm": 0.52734375, "learning_rate": 0.0002149271844660194, "loss": 0.6437, "step": 3526 }, { "epoch": 1.4246250189384375, "grad_norm": 0.6015625, "learning_rate": 0.0002149029126213592, "loss": 0.5898, "step": 3527 }, { "epoch": 1.4250290389374274, "grad_norm": 0.57421875, "learning_rate": 0.000214878640776699, "loss": 0.6223, "step": 3528 }, { "epoch": 1.4254330589364175, "grad_norm": 0.5546875, "learning_rate": 0.0002148543689320388, "loss": 0.653, "step": 3529 }, { "epoch": 1.4258370789354073, "grad_norm": 0.494140625, "learning_rate": 0.0002148300970873786, "loss": 0.6817, "step": 3530 }, { "epoch": 1.4262410989343972, "grad_norm": 0.482421875, "learning_rate": 0.00021480582524271844, "loss": 0.6204, "step": 3531 }, { "epoch": 1.4266451189333873, "grad_norm": 0.67578125, "learning_rate": 0.00021478155339805822, "loss": 0.6449, "step": 3532 }, { "epoch": 1.4270491389323772, "grad_norm": 0.53125, "learning_rate": 0.00021475728155339805, "loss": 0.6323, "step": 3533 }, { "epoch": 1.427453158931367, "grad_norm": 0.57421875, "learning_rate": 0.00021473300970873785, "loss": 0.677, "step": 3534 }, { "epoch": 1.4278571789303571, "grad_norm": 0.53515625, "learning_rate": 0.00021470873786407765, "loss": 0.6069, "step": 3535 }, { "epoch": 1.428261198929347, "grad_norm": 0.53515625, "learning_rate": 0.00021468446601941748, "loss": 0.5775, "step": 3536 }, { "epoch": 1.4286652189283369, "grad_norm": 0.57421875, "learning_rate": 0.00021466019417475725, "loss": 0.6532, "step": 3537 }, { "epoch": 1.429069238927327, "grad_norm": 0.7109375, "learning_rate": 0.00021463592233009706, "loss": 0.7, "step": 3538 }, { "epoch": 1.4294732589263168, "grad_norm": 0.5234375, "learning_rate": 0.00021461165048543689, "loss": 0.5789, "step": 3539 }, { "epoch": 1.4298772789253067, "grad_norm": 0.62890625, "learning_rate": 0.00021458737864077666, "loss": 0.7028, "step": 3540 }, { "epoch": 1.4302812989242968, "grad_norm": 0.51171875, "learning_rate": 0.0002145631067961165, "loss": 0.5638, "step": 3541 }, { "epoch": 1.4306853189232867, "grad_norm": 0.490234375, "learning_rate": 0.0002145388349514563, "loss": 0.5948, "step": 3542 }, { "epoch": 1.4310893389222765, "grad_norm": 0.5859375, "learning_rate": 0.0002145145631067961, "loss": 0.7497, "step": 3543 }, { "epoch": 1.4314933589212666, "grad_norm": 0.49609375, "learning_rate": 0.00021449029126213593, "loss": 0.6616, "step": 3544 }, { "epoch": 1.4318973789202565, "grad_norm": 0.5546875, "learning_rate": 0.0002144660194174757, "loss": 0.6583, "step": 3545 }, { "epoch": 1.4323013989192466, "grad_norm": 0.421875, "learning_rate": 0.0002144417475728155, "loss": 0.5986, "step": 3546 }, { "epoch": 1.4327054189182364, "grad_norm": 0.490234375, "learning_rate": 0.00021441747572815533, "loss": 0.6203, "step": 3547 }, { "epoch": 1.4331094389172265, "grad_norm": 0.609375, "learning_rate": 0.00021439320388349513, "loss": 0.7094, "step": 3548 }, { "epoch": 1.4335134589162164, "grad_norm": 0.443359375, "learning_rate": 0.0002143689320388349, "loss": 0.5438, "step": 3549 }, { "epoch": 1.4339174789152063, "grad_norm": 0.484375, "learning_rate": 0.00021434466019417474, "loss": 0.5448, "step": 3550 }, { "epoch": 1.4343214989141964, "grad_norm": 0.416015625, "learning_rate": 0.00021432038834951454, "loss": 0.5497, "step": 3551 }, { "epoch": 1.4347255189131862, "grad_norm": 0.6875, "learning_rate": 0.00021429611650485437, "loss": 0.673, "step": 3552 }, { "epoch": 1.435129538912176, "grad_norm": 0.5390625, "learning_rate": 0.00021427184466019415, "loss": 0.6937, "step": 3553 }, { "epoch": 1.4355335589111662, "grad_norm": 0.48046875, "learning_rate": 0.00021424757281553395, "loss": 0.6438, "step": 3554 }, { "epoch": 1.435937578910156, "grad_norm": 0.71484375, "learning_rate": 0.00021422330097087378, "loss": 0.7469, "step": 3555 }, { "epoch": 1.436341598909146, "grad_norm": 0.5546875, "learning_rate": 0.00021419902912621358, "loss": 0.6753, "step": 3556 }, { "epoch": 1.436745618908136, "grad_norm": 0.578125, "learning_rate": 0.00021417475728155336, "loss": 0.639, "step": 3557 }, { "epoch": 1.4371496389071259, "grad_norm": 0.462890625, "learning_rate": 0.00021415048543689319, "loss": 0.6419, "step": 3558 }, { "epoch": 1.4375536589061157, "grad_norm": 0.55859375, "learning_rate": 0.000214126213592233, "loss": 0.6015, "step": 3559 }, { "epoch": 1.4379576789051058, "grad_norm": 0.44921875, "learning_rate": 0.0002141019417475728, "loss": 0.5926, "step": 3560 }, { "epoch": 1.4383616989040957, "grad_norm": 0.4765625, "learning_rate": 0.00021407766990291262, "loss": 0.6175, "step": 3561 }, { "epoch": 1.4387657189030856, "grad_norm": 0.58203125, "learning_rate": 0.0002140533980582524, "loss": 0.6548, "step": 3562 }, { "epoch": 1.4391697389020757, "grad_norm": 0.486328125, "learning_rate": 0.00021402912621359222, "loss": 0.5398, "step": 3563 }, { "epoch": 1.4395737589010655, "grad_norm": 0.625, "learning_rate": 0.00021400485436893203, "loss": 0.653, "step": 3564 }, { "epoch": 1.4399777789000556, "grad_norm": 0.6875, "learning_rate": 0.00021398058252427183, "loss": 0.6867, "step": 3565 }, { "epoch": 1.4403817988990455, "grad_norm": 0.52734375, "learning_rate": 0.00021395631067961163, "loss": 0.5803, "step": 3566 }, { "epoch": 1.4407858188980356, "grad_norm": 0.57421875, "learning_rate": 0.00021393203883495143, "loss": 0.6925, "step": 3567 }, { "epoch": 1.4411898388970255, "grad_norm": 0.6171875, "learning_rate": 0.00021390776699029124, "loss": 0.7536, "step": 3568 }, { "epoch": 1.4415938588960153, "grad_norm": 0.466796875, "learning_rate": 0.00021388349514563107, "loss": 0.5707, "step": 3569 }, { "epoch": 1.4419978788950054, "grad_norm": 0.51171875, "learning_rate": 0.00021385922330097084, "loss": 0.5808, "step": 3570 }, { "epoch": 1.4424018988939953, "grad_norm": 0.578125, "learning_rate": 0.00021383495145631067, "loss": 0.6727, "step": 3571 }, { "epoch": 1.4428059188929852, "grad_norm": 0.515625, "learning_rate": 0.00021381067961165047, "loss": 0.6688, "step": 3572 }, { "epoch": 1.4432099388919752, "grad_norm": 0.50390625, "learning_rate": 0.00021378640776699027, "loss": 0.5913, "step": 3573 }, { "epoch": 1.443613958890965, "grad_norm": 0.67578125, "learning_rate": 0.0002137621359223301, "loss": 0.7058, "step": 3574 }, { "epoch": 1.444017978889955, "grad_norm": 0.515625, "learning_rate": 0.00021373786407766988, "loss": 0.6198, "step": 3575 }, { "epoch": 1.444421998888945, "grad_norm": 0.48828125, "learning_rate": 0.00021371359223300968, "loss": 0.6136, "step": 3576 }, { "epoch": 1.444826018887935, "grad_norm": 0.67578125, "learning_rate": 0.0002136893203883495, "loss": 0.6812, "step": 3577 }, { "epoch": 1.4452300388869248, "grad_norm": 0.416015625, "learning_rate": 0.00021366504854368929, "loss": 0.5516, "step": 3578 }, { "epoch": 1.445634058885915, "grad_norm": 0.4921875, "learning_rate": 0.0002136407766990291, "loss": 0.714, "step": 3579 }, { "epoch": 1.4460380788849048, "grad_norm": 0.546875, "learning_rate": 0.00021361650485436892, "loss": 0.6447, "step": 3580 }, { "epoch": 1.4464420988838946, "grad_norm": 0.51171875, "learning_rate": 0.00021359223300970872, "loss": 0.5749, "step": 3581 }, { "epoch": 1.4468461188828847, "grad_norm": 0.46484375, "learning_rate": 0.00021356796116504855, "loss": 0.5793, "step": 3582 }, { "epoch": 1.4472501388818746, "grad_norm": 0.67578125, "learning_rate": 0.00021354368932038832, "loss": 0.7881, "step": 3583 }, { "epoch": 1.4476541588808647, "grad_norm": 0.50390625, "learning_rate": 0.00021351941747572813, "loss": 0.6559, "step": 3584 }, { "epoch": 1.4480581788798546, "grad_norm": 0.51171875, "learning_rate": 0.00021349514563106796, "loss": 0.5933, "step": 3585 }, { "epoch": 1.4484621988788444, "grad_norm": 0.515625, "learning_rate": 0.00021347087378640776, "loss": 0.5659, "step": 3586 }, { "epoch": 1.4488662188778345, "grad_norm": 0.55859375, "learning_rate": 0.00021344660194174753, "loss": 0.6305, "step": 3587 }, { "epoch": 1.4492702388768244, "grad_norm": 0.671875, "learning_rate": 0.00021342233009708736, "loss": 0.6904, "step": 3588 }, { "epoch": 1.4496742588758145, "grad_norm": 0.5234375, "learning_rate": 0.00021339805825242717, "loss": 0.5739, "step": 3589 }, { "epoch": 1.4500782788748043, "grad_norm": 0.609375, "learning_rate": 0.00021337378640776697, "loss": 0.6526, "step": 3590 }, { "epoch": 1.4504822988737942, "grad_norm": 0.54296875, "learning_rate": 0.00021334951456310677, "loss": 0.6428, "step": 3591 }, { "epoch": 1.4508863188727843, "grad_norm": 0.470703125, "learning_rate": 0.00021332524271844657, "loss": 0.4988, "step": 3592 }, { "epoch": 1.4512903388717742, "grad_norm": 0.53125, "learning_rate": 0.0002133009708737864, "loss": 0.6553, "step": 3593 }, { "epoch": 1.451694358870764, "grad_norm": 0.58984375, "learning_rate": 0.0002132766990291262, "loss": 0.6279, "step": 3594 }, { "epoch": 1.4520983788697541, "grad_norm": 0.462890625, "learning_rate": 0.00021325242718446598, "loss": 0.553, "step": 3595 }, { "epoch": 1.452502398868744, "grad_norm": 0.59375, "learning_rate": 0.0002132281553398058, "loss": 0.6859, "step": 3596 }, { "epoch": 1.4529064188677339, "grad_norm": 0.51171875, "learning_rate": 0.0002132038834951456, "loss": 0.5649, "step": 3597 }, { "epoch": 1.453310438866724, "grad_norm": 0.51953125, "learning_rate": 0.00021317961165048541, "loss": 0.5719, "step": 3598 }, { "epoch": 1.4537144588657138, "grad_norm": 0.53125, "learning_rate": 0.00021315533980582524, "loss": 0.6826, "step": 3599 }, { "epoch": 1.4541184788647037, "grad_norm": 0.54296875, "learning_rate": 0.00021313106796116502, "loss": 0.6109, "step": 3600 }, { "epoch": 1.4545224988636938, "grad_norm": 0.58984375, "learning_rate": 0.00021310679611650485, "loss": 0.6704, "step": 3601 }, { "epoch": 1.4549265188626836, "grad_norm": 0.455078125, "learning_rate": 0.00021308252427184465, "loss": 0.5779, "step": 3602 }, { "epoch": 1.4553305388616735, "grad_norm": 0.5625, "learning_rate": 0.00021305825242718443, "loss": 0.6682, "step": 3603 }, { "epoch": 1.4557345588606636, "grad_norm": 0.482421875, "learning_rate": 0.00021303398058252426, "loss": 0.6642, "step": 3604 }, { "epoch": 1.4561385788596535, "grad_norm": 0.5390625, "learning_rate": 0.00021300970873786406, "loss": 0.6886, "step": 3605 }, { "epoch": 1.4565425988586436, "grad_norm": 0.5625, "learning_rate": 0.00021298543689320386, "loss": 0.6188, "step": 3606 }, { "epoch": 1.4569466188576334, "grad_norm": 0.515625, "learning_rate": 0.0002129611650485437, "loss": 0.6772, "step": 3607 }, { "epoch": 1.4573506388566235, "grad_norm": 0.58203125, "learning_rate": 0.00021293689320388346, "loss": 0.6517, "step": 3608 }, { "epoch": 1.4577546588556134, "grad_norm": 0.67578125, "learning_rate": 0.00021291262135922327, "loss": 0.6936, "step": 3609 }, { "epoch": 1.4581586788546033, "grad_norm": 0.455078125, "learning_rate": 0.0002128883495145631, "loss": 0.5793, "step": 3610 }, { "epoch": 1.4585626988535934, "grad_norm": 0.75390625, "learning_rate": 0.0002128640776699029, "loss": 0.7104, "step": 3611 }, { "epoch": 1.4589667188525832, "grad_norm": 0.5, "learning_rate": 0.00021283980582524273, "loss": 0.6165, "step": 3612 }, { "epoch": 1.459370738851573, "grad_norm": 0.55078125, "learning_rate": 0.0002128155339805825, "loss": 0.6439, "step": 3613 }, { "epoch": 1.4597747588505632, "grad_norm": 0.57421875, "learning_rate": 0.0002127912621359223, "loss": 0.6538, "step": 3614 }, { "epoch": 1.460178778849553, "grad_norm": 0.55859375, "learning_rate": 0.00021276699029126214, "loss": 0.6224, "step": 3615 }, { "epoch": 1.460582798848543, "grad_norm": 0.49609375, "learning_rate": 0.0002127427184466019, "loss": 0.5657, "step": 3616 }, { "epoch": 1.460986818847533, "grad_norm": 0.423828125, "learning_rate": 0.0002127184466019417, "loss": 0.5849, "step": 3617 }, { "epoch": 1.4613908388465229, "grad_norm": 0.5546875, "learning_rate": 0.00021269417475728154, "loss": 0.6263, "step": 3618 }, { "epoch": 1.4617948588455127, "grad_norm": 0.51953125, "learning_rate": 0.00021266990291262134, "loss": 0.6518, "step": 3619 }, { "epoch": 1.4621988788445028, "grad_norm": 0.494140625, "learning_rate": 0.00021264563106796112, "loss": 0.6036, "step": 3620 }, { "epoch": 1.4626028988434927, "grad_norm": 0.6875, "learning_rate": 0.00021262135922330095, "loss": 0.7846, "step": 3621 }, { "epoch": 1.4630069188424826, "grad_norm": 0.76171875, "learning_rate": 0.00021259708737864075, "loss": 0.6917, "step": 3622 }, { "epoch": 1.4634109388414727, "grad_norm": 0.62109375, "learning_rate": 0.00021257281553398058, "loss": 0.6263, "step": 3623 }, { "epoch": 1.4638149588404625, "grad_norm": 0.49609375, "learning_rate": 0.00021254854368932038, "loss": 0.5791, "step": 3624 }, { "epoch": 1.4642189788394526, "grad_norm": 0.45703125, "learning_rate": 0.00021252427184466016, "loss": 0.5416, "step": 3625 }, { "epoch": 1.4646229988384425, "grad_norm": 0.515625, "learning_rate": 0.0002125, "loss": 0.6606, "step": 3626 }, { "epoch": 1.4650270188374326, "grad_norm": 0.64453125, "learning_rate": 0.0002124757281553398, "loss": 0.7093, "step": 3627 }, { "epoch": 1.4654310388364225, "grad_norm": 0.5625, "learning_rate": 0.0002124514563106796, "loss": 0.6371, "step": 3628 }, { "epoch": 1.4658350588354123, "grad_norm": 0.5390625, "learning_rate": 0.0002124271844660194, "loss": 0.5894, "step": 3629 }, { "epoch": 1.4662390788344024, "grad_norm": 0.498046875, "learning_rate": 0.0002124029126213592, "loss": 0.6012, "step": 3630 }, { "epoch": 1.4666430988333923, "grad_norm": 0.453125, "learning_rate": 0.000212378640776699, "loss": 0.5589, "step": 3631 }, { "epoch": 1.4670471188323821, "grad_norm": 0.59375, "learning_rate": 0.00021235436893203883, "loss": 0.6295, "step": 3632 }, { "epoch": 1.4674511388313722, "grad_norm": 0.5078125, "learning_rate": 0.0002123300970873786, "loss": 0.6238, "step": 3633 }, { "epoch": 1.467855158830362, "grad_norm": 0.515625, "learning_rate": 0.00021230582524271843, "loss": 0.6208, "step": 3634 }, { "epoch": 1.468259178829352, "grad_norm": 0.50390625, "learning_rate": 0.00021228155339805824, "loss": 0.6059, "step": 3635 }, { "epoch": 1.468663198828342, "grad_norm": 0.58203125, "learning_rate": 0.00021225728155339804, "loss": 0.6787, "step": 3636 }, { "epoch": 1.469067218827332, "grad_norm": 0.51171875, "learning_rate": 0.00021223300970873787, "loss": 0.6307, "step": 3637 }, { "epoch": 1.4694712388263218, "grad_norm": 0.5546875, "learning_rate": 0.00021220873786407764, "loss": 0.658, "step": 3638 }, { "epoch": 1.469875258825312, "grad_norm": 0.482421875, "learning_rate": 0.00021218446601941745, "loss": 0.5611, "step": 3639 }, { "epoch": 1.4702792788243018, "grad_norm": 0.455078125, "learning_rate": 0.00021216019417475728, "loss": 0.5641, "step": 3640 }, { "epoch": 1.4706832988232916, "grad_norm": 0.59375, "learning_rate": 0.00021213592233009705, "loss": 0.6724, "step": 3641 }, { "epoch": 1.4710873188222817, "grad_norm": 0.51953125, "learning_rate": 0.00021211165048543688, "loss": 0.6522, "step": 3642 }, { "epoch": 1.4714913388212716, "grad_norm": 0.546875, "learning_rate": 0.00021208737864077668, "loss": 0.6802, "step": 3643 }, { "epoch": 1.4718953588202617, "grad_norm": 0.4921875, "learning_rate": 0.00021206310679611648, "loss": 0.6283, "step": 3644 }, { "epoch": 1.4722993788192515, "grad_norm": 0.48828125, "learning_rate": 0.00021203883495145631, "loss": 0.6528, "step": 3645 }, { "epoch": 1.4727033988182416, "grad_norm": 0.546875, "learning_rate": 0.0002120145631067961, "loss": 0.5767, "step": 3646 }, { "epoch": 1.4731074188172315, "grad_norm": 0.5234375, "learning_rate": 0.0002119902912621359, "loss": 0.6531, "step": 3647 }, { "epoch": 1.4735114388162214, "grad_norm": 0.462890625, "learning_rate": 0.00021196601941747572, "loss": 0.684, "step": 3648 }, { "epoch": 1.4739154588152115, "grad_norm": 0.62109375, "learning_rate": 0.00021194174757281552, "loss": 0.6947, "step": 3649 }, { "epoch": 1.4743194788142013, "grad_norm": 0.51171875, "learning_rate": 0.0002119174757281553, "loss": 0.626, "step": 3650 }, { "epoch": 1.4747234988131912, "grad_norm": 0.5859375, "learning_rate": 0.00021189320388349513, "loss": 0.6168, "step": 3651 }, { "epoch": 1.4751275188121813, "grad_norm": 0.62109375, "learning_rate": 0.00021186893203883493, "loss": 0.7477, "step": 3652 }, { "epoch": 1.4755315388111712, "grad_norm": 0.486328125, "learning_rate": 0.00021184466019417476, "loss": 0.6507, "step": 3653 }, { "epoch": 1.475935558810161, "grad_norm": 0.5390625, "learning_rate": 0.00021182038834951454, "loss": 0.6085, "step": 3654 }, { "epoch": 1.4763395788091511, "grad_norm": 0.6484375, "learning_rate": 0.00021179611650485434, "loss": 0.6112, "step": 3655 }, { "epoch": 1.476743598808141, "grad_norm": 0.51953125, "learning_rate": 0.00021177184466019417, "loss": 0.655, "step": 3656 }, { "epoch": 1.4771476188071309, "grad_norm": 0.671875, "learning_rate": 0.00021174757281553397, "loss": 0.6717, "step": 3657 }, { "epoch": 1.477551638806121, "grad_norm": 0.55078125, "learning_rate": 0.00021172330097087374, "loss": 0.5964, "step": 3658 }, { "epoch": 1.4779556588051108, "grad_norm": 0.4765625, "learning_rate": 0.00021169902912621357, "loss": 0.5808, "step": 3659 }, { "epoch": 1.4783596788041007, "grad_norm": 0.486328125, "learning_rate": 0.00021167475728155338, "loss": 0.5842, "step": 3660 }, { "epoch": 1.4787636988030908, "grad_norm": 0.625, "learning_rate": 0.00021165048543689318, "loss": 0.7122, "step": 3661 }, { "epoch": 1.4791677188020806, "grad_norm": 0.498046875, "learning_rate": 0.000211626213592233, "loss": 0.5631, "step": 3662 }, { "epoch": 1.4795717388010707, "grad_norm": 0.61328125, "learning_rate": 0.00021160194174757278, "loss": 0.6216, "step": 3663 }, { "epoch": 1.4799757588000606, "grad_norm": 0.4453125, "learning_rate": 0.0002115776699029126, "loss": 0.5553, "step": 3664 }, { "epoch": 1.4803797787990505, "grad_norm": 0.62890625, "learning_rate": 0.00021155339805825241, "loss": 0.718, "step": 3665 }, { "epoch": 1.4807837987980406, "grad_norm": 0.5390625, "learning_rate": 0.00021152912621359222, "loss": 0.6176, "step": 3666 }, { "epoch": 1.4811878187970304, "grad_norm": 0.515625, "learning_rate": 0.00021150485436893202, "loss": 0.6233, "step": 3667 }, { "epoch": 1.4815918387960205, "grad_norm": 0.5078125, "learning_rate": 0.00021148058252427182, "loss": 0.6342, "step": 3668 }, { "epoch": 1.4819958587950104, "grad_norm": 0.55078125, "learning_rate": 0.00021145631067961162, "loss": 0.7509, "step": 3669 }, { "epoch": 1.4823998787940003, "grad_norm": 0.439453125, "learning_rate": 0.00021143203883495145, "loss": 0.5741, "step": 3670 }, { "epoch": 1.4828038987929903, "grad_norm": 0.416015625, "learning_rate": 0.00021140776699029123, "loss": 0.6421, "step": 3671 }, { "epoch": 1.4832079187919802, "grad_norm": 0.53125, "learning_rate": 0.00021138349514563106, "loss": 0.6128, "step": 3672 }, { "epoch": 1.48361193879097, "grad_norm": 0.62890625, "learning_rate": 0.00021135922330097086, "loss": 0.6015, "step": 3673 }, { "epoch": 1.4840159587899602, "grad_norm": 0.5625, "learning_rate": 0.00021133495145631066, "loss": 0.5858, "step": 3674 }, { "epoch": 1.48441997878895, "grad_norm": 0.53125, "learning_rate": 0.0002113106796116505, "loss": 0.6693, "step": 3675 }, { "epoch": 1.48482399878794, "grad_norm": 0.65234375, "learning_rate": 0.00021128640776699027, "loss": 0.695, "step": 3676 }, { "epoch": 1.48522801878693, "grad_norm": 0.48828125, "learning_rate": 0.00021126213592233007, "loss": 0.5803, "step": 3677 }, { "epoch": 1.4856320387859199, "grad_norm": 0.4375, "learning_rate": 0.0002112378640776699, "loss": 0.5651, "step": 3678 }, { "epoch": 1.4860360587849097, "grad_norm": 0.4609375, "learning_rate": 0.00021121359223300967, "loss": 0.5878, "step": 3679 }, { "epoch": 1.4864400787838998, "grad_norm": 0.48828125, "learning_rate": 0.00021118932038834948, "loss": 0.5718, "step": 3680 }, { "epoch": 1.4868440987828897, "grad_norm": 0.60546875, "learning_rate": 0.0002111650485436893, "loss": 0.6372, "step": 3681 }, { "epoch": 1.4872481187818796, "grad_norm": 0.58984375, "learning_rate": 0.0002111407766990291, "loss": 0.7082, "step": 3682 }, { "epoch": 1.4876521387808697, "grad_norm": 0.54296875, "learning_rate": 0.00021111650485436894, "loss": 0.5532, "step": 3683 }, { "epoch": 1.4880561587798595, "grad_norm": 0.640625, "learning_rate": 0.00021109223300970871, "loss": 0.6145, "step": 3684 }, { "epoch": 1.4884601787788496, "grad_norm": 0.58984375, "learning_rate": 0.00021106796116504852, "loss": 0.6445, "step": 3685 }, { "epoch": 1.4888641987778395, "grad_norm": 0.5859375, "learning_rate": 0.00021104368932038835, "loss": 0.6516, "step": 3686 }, { "epoch": 1.4892682187768296, "grad_norm": 0.58203125, "learning_rate": 0.00021101941747572815, "loss": 0.6069, "step": 3687 }, { "epoch": 1.4896722387758194, "grad_norm": 0.578125, "learning_rate": 0.00021099514563106792, "loss": 0.6603, "step": 3688 }, { "epoch": 1.4900762587748093, "grad_norm": 0.6328125, "learning_rate": 0.00021097087378640775, "loss": 0.653, "step": 3689 }, { "epoch": 1.4904802787737994, "grad_norm": 0.486328125, "learning_rate": 0.00021094660194174755, "loss": 0.6048, "step": 3690 }, { "epoch": 1.4908842987727893, "grad_norm": 0.474609375, "learning_rate": 0.00021092233009708736, "loss": 0.617, "step": 3691 }, { "epoch": 1.4912883187717791, "grad_norm": 0.52734375, "learning_rate": 0.00021089805825242716, "loss": 0.6622, "step": 3692 }, { "epoch": 1.4916923387707692, "grad_norm": 0.5, "learning_rate": 0.00021087378640776696, "loss": 0.591, "step": 3693 }, { "epoch": 1.492096358769759, "grad_norm": 0.5078125, "learning_rate": 0.0002108495145631068, "loss": 0.71, "step": 3694 }, { "epoch": 1.492500378768749, "grad_norm": 0.54296875, "learning_rate": 0.0002108252427184466, "loss": 0.6588, "step": 3695 }, { "epoch": 1.492904398767739, "grad_norm": 0.61328125, "learning_rate": 0.00021080097087378637, "loss": 0.6478, "step": 3696 }, { "epoch": 1.493308418766729, "grad_norm": 0.44140625, "learning_rate": 0.0002107766990291262, "loss": 0.6151, "step": 3697 }, { "epoch": 1.4937124387657188, "grad_norm": 0.6015625, "learning_rate": 0.000210752427184466, "loss": 0.665, "step": 3698 }, { "epoch": 1.4941164587647089, "grad_norm": 0.73046875, "learning_rate": 0.0002107281553398058, "loss": 0.8003, "step": 3699 }, { "epoch": 1.4945204787636988, "grad_norm": 0.482421875, "learning_rate": 0.00021070388349514563, "loss": 0.5502, "step": 3700 }, { "epoch": 1.4949244987626886, "grad_norm": 0.71875, "learning_rate": 0.0002106796116504854, "loss": 0.6302, "step": 3701 }, { "epoch": 1.4953285187616787, "grad_norm": 0.55859375, "learning_rate": 0.00021065533980582524, "loss": 0.5996, "step": 3702 }, { "epoch": 1.4957325387606686, "grad_norm": 0.53515625, "learning_rate": 0.00021063106796116504, "loss": 0.6498, "step": 3703 }, { "epoch": 1.4961365587596587, "grad_norm": 0.490234375, "learning_rate": 0.00021060679611650481, "loss": 0.6314, "step": 3704 }, { "epoch": 1.4965405787586485, "grad_norm": 0.44140625, "learning_rate": 0.00021058252427184464, "loss": 0.5495, "step": 3705 }, { "epoch": 1.4969445987576386, "grad_norm": 0.55078125, "learning_rate": 0.00021055825242718445, "loss": 0.7145, "step": 3706 }, { "epoch": 1.4973486187566285, "grad_norm": 0.5390625, "learning_rate": 0.00021053398058252425, "loss": 0.5876, "step": 3707 }, { "epoch": 1.4977526387556184, "grad_norm": 0.54296875, "learning_rate": 0.00021050970873786408, "loss": 0.6417, "step": 3708 }, { "epoch": 1.4981566587546085, "grad_norm": 0.66796875, "learning_rate": 0.00021048543689320385, "loss": 0.6894, "step": 3709 }, { "epoch": 1.4985606787535983, "grad_norm": 0.55078125, "learning_rate": 0.00021046116504854366, "loss": 0.5751, "step": 3710 }, { "epoch": 1.4989646987525882, "grad_norm": 0.6171875, "learning_rate": 0.00021043689320388349, "loss": 0.6748, "step": 3711 }, { "epoch": 1.4993687187515783, "grad_norm": 0.490234375, "learning_rate": 0.0002104126213592233, "loss": 0.6441, "step": 3712 }, { "epoch": 1.4997727387505682, "grad_norm": 0.6015625, "learning_rate": 0.00021038834951456312, "loss": 0.6742, "step": 3713 }, { "epoch": 1.500176758749558, "grad_norm": 0.61328125, "learning_rate": 0.0002103640776699029, "loss": 0.7085, "step": 3714 }, { "epoch": 1.5005807787485481, "grad_norm": 0.5703125, "learning_rate": 0.0002103398058252427, "loss": 0.5641, "step": 3715 }, { "epoch": 1.500984798747538, "grad_norm": 0.478515625, "learning_rate": 0.00021031553398058252, "loss": 0.5913, "step": 3716 }, { "epoch": 1.5013888187465279, "grad_norm": 0.546875, "learning_rate": 0.0002102912621359223, "loss": 0.6452, "step": 3717 }, { "epoch": 1.501792838745518, "grad_norm": 0.5390625, "learning_rate": 0.0002102669902912621, "loss": 0.6829, "step": 3718 }, { "epoch": 1.5021968587445078, "grad_norm": 0.5234375, "learning_rate": 0.00021024271844660193, "loss": 0.6481, "step": 3719 }, { "epoch": 1.5026008787434977, "grad_norm": 0.52734375, "learning_rate": 0.00021021844660194173, "loss": 0.6092, "step": 3720 }, { "epoch": 1.5030048987424878, "grad_norm": 0.5234375, "learning_rate": 0.0002101941747572815, "loss": 0.641, "step": 3721 }, { "epoch": 1.5034089187414779, "grad_norm": 0.455078125, "learning_rate": 0.00021016990291262134, "loss": 0.58, "step": 3722 }, { "epoch": 1.5038129387404675, "grad_norm": 0.50390625, "learning_rate": 0.00021014563106796114, "loss": 0.6462, "step": 3723 }, { "epoch": 1.5042169587394576, "grad_norm": 0.60546875, "learning_rate": 0.00021012135922330097, "loss": 0.6807, "step": 3724 }, { "epoch": 1.5046209787384477, "grad_norm": 0.546875, "learning_rate": 0.00021009708737864077, "loss": 0.6564, "step": 3725 }, { "epoch": 1.5050249987374376, "grad_norm": 0.5859375, "learning_rate": 0.00021007281553398055, "loss": 0.7164, "step": 3726 }, { "epoch": 1.5054290187364274, "grad_norm": 0.466796875, "learning_rate": 0.00021004854368932038, "loss": 0.6551, "step": 3727 }, { "epoch": 1.5058330387354175, "grad_norm": 0.54296875, "learning_rate": 0.00021002427184466018, "loss": 0.6212, "step": 3728 }, { "epoch": 1.5062370587344074, "grad_norm": 0.515625, "learning_rate": 0.00020999999999999998, "loss": 0.6548, "step": 3729 }, { "epoch": 1.5066410787333973, "grad_norm": 0.59375, "learning_rate": 0.00020997572815533978, "loss": 0.6163, "step": 3730 }, { "epoch": 1.5070450987323873, "grad_norm": 0.61328125, "learning_rate": 0.00020995145631067959, "loss": 0.7089, "step": 3731 }, { "epoch": 1.5074491187313772, "grad_norm": 0.46484375, "learning_rate": 0.0002099271844660194, "loss": 0.614, "step": 3732 }, { "epoch": 1.507853138730367, "grad_norm": 0.474609375, "learning_rate": 0.00020990291262135922, "loss": 0.6541, "step": 3733 }, { "epoch": 1.5082571587293572, "grad_norm": 0.515625, "learning_rate": 0.000209878640776699, "loss": 0.616, "step": 3734 }, { "epoch": 1.508661178728347, "grad_norm": 0.46484375, "learning_rate": 0.00020985436893203882, "loss": 0.5896, "step": 3735 }, { "epoch": 1.509065198727337, "grad_norm": 0.5546875, "learning_rate": 0.00020983009708737863, "loss": 0.6135, "step": 3736 }, { "epoch": 1.509469218726327, "grad_norm": 0.5234375, "learning_rate": 0.00020980582524271843, "loss": 0.6765, "step": 3737 }, { "epoch": 1.5098732387253169, "grad_norm": 0.5078125, "learning_rate": 0.00020978155339805826, "loss": 0.6006, "step": 3738 }, { "epoch": 1.5102772587243067, "grad_norm": 0.5, "learning_rate": 0.00020975728155339803, "loss": 0.6095, "step": 3739 }, { "epoch": 1.5106812787232968, "grad_norm": 0.466796875, "learning_rate": 0.00020973300970873783, "loss": 0.5988, "step": 3740 }, { "epoch": 1.511085298722287, "grad_norm": 0.5078125, "learning_rate": 0.00020970873786407766, "loss": 0.6968, "step": 3741 }, { "epoch": 1.5114893187212766, "grad_norm": 0.45703125, "learning_rate": 0.00020968446601941744, "loss": 0.628, "step": 3742 }, { "epoch": 1.5118933387202667, "grad_norm": 0.56640625, "learning_rate": 0.00020966019417475727, "loss": 0.7147, "step": 3743 }, { "epoch": 1.5122973587192567, "grad_norm": 0.6015625, "learning_rate": 0.00020963592233009707, "loss": 0.6873, "step": 3744 }, { "epoch": 1.5127013787182464, "grad_norm": 0.51953125, "learning_rate": 0.00020961165048543687, "loss": 0.656, "step": 3745 }, { "epoch": 1.5131053987172365, "grad_norm": 0.58203125, "learning_rate": 0.0002095873786407767, "loss": 0.617, "step": 3746 }, { "epoch": 1.5135094187162266, "grad_norm": 0.54296875, "learning_rate": 0.00020956310679611648, "loss": 0.6524, "step": 3747 }, { "epoch": 1.5139134387152164, "grad_norm": 0.494140625, "learning_rate": 0.00020953883495145628, "loss": 0.5901, "step": 3748 }, { "epoch": 1.5143174587142063, "grad_norm": 0.49609375, "learning_rate": 0.0002095145631067961, "loss": 0.6639, "step": 3749 }, { "epoch": 1.5147214787131964, "grad_norm": 0.69140625, "learning_rate": 0.0002094902912621359, "loss": 0.6903, "step": 3750 }, { "epoch": 1.5151254987121863, "grad_norm": 0.5234375, "learning_rate": 0.0002094660194174757, "loss": 0.705, "step": 3751 }, { "epoch": 1.5155295187111761, "grad_norm": 0.5625, "learning_rate": 0.00020944174757281552, "loss": 0.6682, "step": 3752 }, { "epoch": 1.5159335387101662, "grad_norm": 0.486328125, "learning_rate": 0.00020941747572815532, "loss": 0.6563, "step": 3753 }, { "epoch": 1.516337558709156, "grad_norm": 0.5078125, "learning_rate": 0.00020939320388349515, "loss": 0.5872, "step": 3754 }, { "epoch": 1.516741578708146, "grad_norm": 0.6328125, "learning_rate": 0.00020936893203883492, "loss": 0.685, "step": 3755 }, { "epoch": 1.517145598707136, "grad_norm": 0.56640625, "learning_rate": 0.00020934466019417473, "loss": 0.6657, "step": 3756 }, { "epoch": 1.517549618706126, "grad_norm": 0.478515625, "learning_rate": 0.00020932038834951456, "loss": 0.5917, "step": 3757 }, { "epoch": 1.5179536387051158, "grad_norm": 0.55078125, "learning_rate": 0.00020929611650485436, "loss": 0.6301, "step": 3758 }, { "epoch": 1.5183576587041059, "grad_norm": 0.44921875, "learning_rate": 0.00020927184466019413, "loss": 0.5756, "step": 3759 }, { "epoch": 1.5187616787030958, "grad_norm": 0.427734375, "learning_rate": 0.00020924757281553396, "loss": 0.542, "step": 3760 }, { "epoch": 1.5191656987020856, "grad_norm": 0.4140625, "learning_rate": 0.00020922330097087376, "loss": 0.5964, "step": 3761 }, { "epoch": 1.5195697187010757, "grad_norm": 0.51953125, "learning_rate": 0.00020919902912621357, "loss": 0.5952, "step": 3762 }, { "epoch": 1.5199737387000658, "grad_norm": 0.6484375, "learning_rate": 0.0002091747572815534, "loss": 0.6672, "step": 3763 }, { "epoch": 1.5203777586990554, "grad_norm": 0.5234375, "learning_rate": 0.00020915048543689317, "loss": 0.6473, "step": 3764 }, { "epoch": 1.5207817786980455, "grad_norm": 0.546875, "learning_rate": 0.000209126213592233, "loss": 0.6768, "step": 3765 }, { "epoch": 1.5211857986970356, "grad_norm": 0.6484375, "learning_rate": 0.0002091019417475728, "loss": 0.664, "step": 3766 }, { "epoch": 1.5215898186960255, "grad_norm": 0.5859375, "learning_rate": 0.00020907766990291258, "loss": 0.6748, "step": 3767 }, { "epoch": 1.5219938386950154, "grad_norm": 0.51953125, "learning_rate": 0.0002090533980582524, "loss": 0.6665, "step": 3768 }, { "epoch": 1.5223978586940055, "grad_norm": 0.498046875, "learning_rate": 0.0002090291262135922, "loss": 0.6028, "step": 3769 }, { "epoch": 1.5228018786929953, "grad_norm": 0.53515625, "learning_rate": 0.000209004854368932, "loss": 0.673, "step": 3770 }, { "epoch": 1.5232058986919852, "grad_norm": 0.494140625, "learning_rate": 0.00020898058252427184, "loss": 0.6203, "step": 3771 }, { "epoch": 1.5236099186909753, "grad_norm": 0.53515625, "learning_rate": 0.00020895631067961162, "loss": 0.6295, "step": 3772 }, { "epoch": 1.5240139386899652, "grad_norm": 0.484375, "learning_rate": 0.00020893203883495145, "loss": 0.6095, "step": 3773 }, { "epoch": 1.524417958688955, "grad_norm": 0.515625, "learning_rate": 0.00020890776699029125, "loss": 0.5955, "step": 3774 }, { "epoch": 1.5248219786879451, "grad_norm": 0.4765625, "learning_rate": 0.00020888349514563105, "loss": 0.5402, "step": 3775 }, { "epoch": 1.525225998686935, "grad_norm": 0.578125, "learning_rate": 0.00020885922330097088, "loss": 0.7139, "step": 3776 }, { "epoch": 1.5256300186859248, "grad_norm": 0.55859375, "learning_rate": 0.00020883495145631066, "loss": 0.6138, "step": 3777 }, { "epoch": 1.526034038684915, "grad_norm": 0.478515625, "learning_rate": 0.00020881067961165046, "loss": 0.6105, "step": 3778 }, { "epoch": 1.5264380586839048, "grad_norm": 0.53515625, "learning_rate": 0.0002087864077669903, "loss": 0.6215, "step": 3779 }, { "epoch": 1.5268420786828947, "grad_norm": 0.44921875, "learning_rate": 0.00020876213592233006, "loss": 0.5421, "step": 3780 }, { "epoch": 1.5272460986818848, "grad_norm": 0.52734375, "learning_rate": 0.00020873786407766987, "loss": 0.6858, "step": 3781 }, { "epoch": 1.5276501186808749, "grad_norm": 0.51953125, "learning_rate": 0.0002087135922330097, "loss": 0.5097, "step": 3782 }, { "epoch": 1.5280541386798645, "grad_norm": 0.55078125, "learning_rate": 0.0002086893203883495, "loss": 0.6151, "step": 3783 }, { "epoch": 1.5284581586788546, "grad_norm": 0.49609375, "learning_rate": 0.00020866504854368933, "loss": 0.5974, "step": 3784 }, { "epoch": 1.5288621786778447, "grad_norm": 0.578125, "learning_rate": 0.0002086407766990291, "loss": 0.5967, "step": 3785 }, { "epoch": 1.5292661986768346, "grad_norm": 0.640625, "learning_rate": 0.0002086165048543689, "loss": 0.6958, "step": 3786 }, { "epoch": 1.5296702186758244, "grad_norm": 0.57421875, "learning_rate": 0.00020859223300970873, "loss": 0.6156, "step": 3787 }, { "epoch": 1.5300742386748145, "grad_norm": 0.451171875, "learning_rate": 0.00020856796116504854, "loss": 0.5857, "step": 3788 }, { "epoch": 1.5304782586738044, "grad_norm": 0.65234375, "learning_rate": 0.0002085436893203883, "loss": 0.7216, "step": 3789 }, { "epoch": 1.5308822786727942, "grad_norm": 0.95703125, "learning_rate": 0.00020851941747572814, "loss": 0.6874, "step": 3790 }, { "epoch": 1.5312862986717843, "grad_norm": 0.546875, "learning_rate": 0.00020849514563106794, "loss": 0.6131, "step": 3791 }, { "epoch": 1.5316903186707742, "grad_norm": 0.59765625, "learning_rate": 0.00020847087378640775, "loss": 0.6565, "step": 3792 }, { "epoch": 1.532094338669764, "grad_norm": 0.57421875, "learning_rate": 0.00020844660194174755, "loss": 0.7061, "step": 3793 }, { "epoch": 1.5324983586687542, "grad_norm": 0.703125, "learning_rate": 0.00020842233009708735, "loss": 0.682, "step": 3794 }, { "epoch": 1.532902378667744, "grad_norm": 0.5546875, "learning_rate": 0.00020839805825242718, "loss": 0.6224, "step": 3795 }, { "epoch": 1.533306398666734, "grad_norm": 0.58984375, "learning_rate": 0.00020837378640776698, "loss": 0.6348, "step": 3796 }, { "epoch": 1.533710418665724, "grad_norm": 0.50390625, "learning_rate": 0.00020834951456310676, "loss": 0.6318, "step": 3797 }, { "epoch": 1.5341144386647139, "grad_norm": 0.546875, "learning_rate": 0.0002083252427184466, "loss": 0.6876, "step": 3798 }, { "epoch": 1.5345184586637037, "grad_norm": 0.734375, "learning_rate": 0.0002083009708737864, "loss": 0.6783, "step": 3799 }, { "epoch": 1.5349224786626938, "grad_norm": 0.46484375, "learning_rate": 0.0002082766990291262, "loss": 0.5544, "step": 3800 }, { "epoch": 1.535326498661684, "grad_norm": 0.52734375, "learning_rate": 0.00020825242718446602, "loss": 0.616, "step": 3801 }, { "epoch": 1.5357305186606736, "grad_norm": 0.5390625, "learning_rate": 0.0002082281553398058, "loss": 0.7117, "step": 3802 }, { "epoch": 1.5361345386596637, "grad_norm": 0.49609375, "learning_rate": 0.00020820388349514563, "loss": 0.6313, "step": 3803 }, { "epoch": 1.5365385586586537, "grad_norm": 0.53125, "learning_rate": 0.00020817961165048543, "loss": 0.6413, "step": 3804 }, { "epoch": 1.5369425786576434, "grad_norm": 0.5, "learning_rate": 0.0002081553398058252, "loss": 0.6089, "step": 3805 }, { "epoch": 1.5373465986566335, "grad_norm": 0.48828125, "learning_rate": 0.00020813106796116503, "loss": 0.6374, "step": 3806 }, { "epoch": 1.5377506186556236, "grad_norm": 0.5234375, "learning_rate": 0.00020810679611650484, "loss": 0.6443, "step": 3807 }, { "epoch": 1.5381546386546134, "grad_norm": 0.6171875, "learning_rate": 0.00020808252427184464, "loss": 0.638, "step": 3808 }, { "epoch": 1.5385586586536033, "grad_norm": 0.53515625, "learning_rate": 0.00020805825242718447, "loss": 0.6072, "step": 3809 }, { "epoch": 1.5389626786525934, "grad_norm": 0.486328125, "learning_rate": 0.00020803398058252424, "loss": 0.6473, "step": 3810 }, { "epoch": 1.5393666986515833, "grad_norm": 0.57421875, "learning_rate": 0.00020800970873786404, "loss": 0.702, "step": 3811 }, { "epoch": 1.5397707186505731, "grad_norm": 0.55859375, "learning_rate": 0.00020798543689320387, "loss": 0.6719, "step": 3812 }, { "epoch": 1.5401747386495632, "grad_norm": 0.61328125, "learning_rate": 0.00020796116504854368, "loss": 0.702, "step": 3813 }, { "epoch": 1.540578758648553, "grad_norm": 0.6171875, "learning_rate": 0.0002079368932038835, "loss": 0.7115, "step": 3814 }, { "epoch": 1.540982778647543, "grad_norm": 0.498046875, "learning_rate": 0.00020791262135922328, "loss": 0.6068, "step": 3815 }, { "epoch": 1.541386798646533, "grad_norm": 0.63671875, "learning_rate": 0.00020788834951456308, "loss": 0.6245, "step": 3816 }, { "epoch": 1.541790818645523, "grad_norm": 0.58984375, "learning_rate": 0.0002078640776699029, "loss": 0.643, "step": 3817 }, { "epoch": 1.5421948386445128, "grad_norm": 0.51171875, "learning_rate": 0.0002078398058252427, "loss": 0.6509, "step": 3818 }, { "epoch": 1.5425988586435029, "grad_norm": 0.4375, "learning_rate": 0.0002078155339805825, "loss": 0.59, "step": 3819 }, { "epoch": 1.543002878642493, "grad_norm": 0.498046875, "learning_rate": 0.00020779126213592232, "loss": 0.5924, "step": 3820 }, { "epoch": 1.5434068986414826, "grad_norm": 0.458984375, "learning_rate": 0.00020776699029126212, "loss": 0.6229, "step": 3821 }, { "epoch": 1.5438109186404727, "grad_norm": 0.51171875, "learning_rate": 0.0002077427184466019, "loss": 0.5962, "step": 3822 }, { "epoch": 1.5442149386394628, "grad_norm": 0.609375, "learning_rate": 0.00020771844660194173, "loss": 0.6826, "step": 3823 }, { "epoch": 1.5446189586384524, "grad_norm": 0.515625, "learning_rate": 0.00020769417475728153, "loss": 0.6108, "step": 3824 }, { "epoch": 1.5450229786374425, "grad_norm": 0.5703125, "learning_rate": 0.00020766990291262136, "loss": 0.6445, "step": 3825 }, { "epoch": 1.5454269986364326, "grad_norm": 0.55859375, "learning_rate": 0.00020764563106796116, "loss": 0.6325, "step": 3826 }, { "epoch": 1.5458310186354225, "grad_norm": 0.57421875, "learning_rate": 0.00020762135922330094, "loss": 0.733, "step": 3827 }, { "epoch": 1.5462350386344124, "grad_norm": 0.48046875, "learning_rate": 0.00020759708737864077, "loss": 0.653, "step": 3828 }, { "epoch": 1.5466390586334025, "grad_norm": 0.54296875, "learning_rate": 0.00020757281553398057, "loss": 0.6301, "step": 3829 }, { "epoch": 1.5470430786323923, "grad_norm": 0.63671875, "learning_rate": 0.00020754854368932037, "loss": 0.6981, "step": 3830 }, { "epoch": 1.5474470986313822, "grad_norm": 0.546875, "learning_rate": 0.00020752427184466017, "loss": 0.6348, "step": 3831 }, { "epoch": 1.5478511186303723, "grad_norm": 0.470703125, "learning_rate": 0.00020749999999999998, "loss": 0.6017, "step": 3832 }, { "epoch": 1.5482551386293621, "grad_norm": 0.5078125, "learning_rate": 0.00020747572815533978, "loss": 0.649, "step": 3833 }, { "epoch": 1.548659158628352, "grad_norm": 0.474609375, "learning_rate": 0.0002074514563106796, "loss": 0.6309, "step": 3834 }, { "epoch": 1.549063178627342, "grad_norm": 0.546875, "learning_rate": 0.00020742718446601938, "loss": 0.6274, "step": 3835 }, { "epoch": 1.549467198626332, "grad_norm": 0.55078125, "learning_rate": 0.0002074029126213592, "loss": 0.7542, "step": 3836 }, { "epoch": 1.5498712186253218, "grad_norm": 0.61328125, "learning_rate": 0.00020737864077669901, "loss": 0.5998, "step": 3837 }, { "epoch": 1.550275238624312, "grad_norm": 0.474609375, "learning_rate": 0.00020735436893203882, "loss": 0.6202, "step": 3838 }, { "epoch": 1.5506792586233018, "grad_norm": 0.5234375, "learning_rate": 0.00020733009708737865, "loss": 0.5876, "step": 3839 }, { "epoch": 1.5510832786222917, "grad_norm": 0.59375, "learning_rate": 0.00020730582524271842, "loss": 0.6564, "step": 3840 }, { "epoch": 1.5514872986212818, "grad_norm": 0.4921875, "learning_rate": 0.00020728155339805822, "loss": 0.5879, "step": 3841 }, { "epoch": 1.5518913186202719, "grad_norm": 0.54296875, "learning_rate": 0.00020725728155339805, "loss": 0.575, "step": 3842 }, { "epoch": 1.5522953386192615, "grad_norm": 0.6015625, "learning_rate": 0.00020723300970873783, "loss": 0.6303, "step": 3843 }, { "epoch": 1.5526993586182516, "grad_norm": 0.5859375, "learning_rate": 0.00020720873786407766, "loss": 0.6327, "step": 3844 }, { "epoch": 1.5531033786172417, "grad_norm": 0.54296875, "learning_rate": 0.00020718446601941746, "loss": 0.5899, "step": 3845 }, { "epoch": 1.5535073986162315, "grad_norm": 0.515625, "learning_rate": 0.00020716019417475726, "loss": 0.6741, "step": 3846 }, { "epoch": 1.5539114186152214, "grad_norm": 0.41796875, "learning_rate": 0.0002071359223300971, "loss": 0.5437, "step": 3847 }, { "epoch": 1.5543154386142115, "grad_norm": 0.5625, "learning_rate": 0.00020711165048543687, "loss": 0.7247, "step": 3848 }, { "epoch": 1.5547194586132014, "grad_norm": 0.68359375, "learning_rate": 0.00020708737864077667, "loss": 0.7495, "step": 3849 }, { "epoch": 1.5551234786121912, "grad_norm": 0.50390625, "learning_rate": 0.0002070631067961165, "loss": 0.6384, "step": 3850 }, { "epoch": 1.5555274986111813, "grad_norm": 0.57421875, "learning_rate": 0.0002070388349514563, "loss": 0.6434, "step": 3851 }, { "epoch": 1.5559315186101712, "grad_norm": 0.58203125, "learning_rate": 0.00020701456310679608, "loss": 0.6262, "step": 3852 }, { "epoch": 1.556335538609161, "grad_norm": 0.48046875, "learning_rate": 0.0002069902912621359, "loss": 0.5567, "step": 3853 }, { "epoch": 1.5567395586081512, "grad_norm": 0.546875, "learning_rate": 0.0002069660194174757, "loss": 0.5745, "step": 3854 }, { "epoch": 1.557143578607141, "grad_norm": 0.46875, "learning_rate": 0.00020694174757281554, "loss": 0.5862, "step": 3855 }, { "epoch": 1.557547598606131, "grad_norm": 0.62109375, "learning_rate": 0.0002069174757281553, "loss": 0.6507, "step": 3856 }, { "epoch": 1.557951618605121, "grad_norm": 0.5390625, "learning_rate": 0.00020689320388349511, "loss": 0.6274, "step": 3857 }, { "epoch": 1.5583556386041109, "grad_norm": 0.55859375, "learning_rate": 0.00020686893203883494, "loss": 0.6829, "step": 3858 }, { "epoch": 1.5587596586031007, "grad_norm": 0.486328125, "learning_rate": 0.00020684466019417475, "loss": 0.6406, "step": 3859 }, { "epoch": 1.5591636786020908, "grad_norm": 0.46875, "learning_rate": 0.00020682038834951452, "loss": 0.6709, "step": 3860 }, { "epoch": 1.559567698601081, "grad_norm": 0.5546875, "learning_rate": 0.00020679611650485435, "loss": 0.6642, "step": 3861 }, { "epoch": 1.5599717186000706, "grad_norm": 0.458984375, "learning_rate": 0.00020677184466019415, "loss": 0.5737, "step": 3862 }, { "epoch": 1.5603757385990606, "grad_norm": 0.48828125, "learning_rate": 0.00020674757281553396, "loss": 0.6487, "step": 3863 }, { "epoch": 1.5607797585980507, "grad_norm": 0.50390625, "learning_rate": 0.00020672330097087379, "loss": 0.6651, "step": 3864 }, { "epoch": 1.5611837785970406, "grad_norm": 0.58203125, "learning_rate": 0.00020669902912621356, "loss": 0.7214, "step": 3865 }, { "epoch": 1.5615877985960305, "grad_norm": 0.56640625, "learning_rate": 0.0002066747572815534, "loss": 0.6017, "step": 3866 }, { "epoch": 1.5619918185950206, "grad_norm": 0.4765625, "learning_rate": 0.0002066504854368932, "loss": 0.656, "step": 3867 }, { "epoch": 1.5623958385940104, "grad_norm": 0.64453125, "learning_rate": 0.00020662621359223297, "loss": 0.6941, "step": 3868 }, { "epoch": 1.5627998585930003, "grad_norm": 0.51953125, "learning_rate": 0.0002066019417475728, "loss": 0.5422, "step": 3869 }, { "epoch": 1.5632038785919904, "grad_norm": 0.55859375, "learning_rate": 0.0002065776699029126, "loss": 0.6178, "step": 3870 }, { "epoch": 1.5636078985909803, "grad_norm": 0.4453125, "learning_rate": 0.0002065533980582524, "loss": 0.5417, "step": 3871 }, { "epoch": 1.5640119185899701, "grad_norm": 0.5390625, "learning_rate": 0.00020652912621359223, "loss": 0.6172, "step": 3872 }, { "epoch": 1.5644159385889602, "grad_norm": 0.55078125, "learning_rate": 0.000206504854368932, "loss": 0.6617, "step": 3873 }, { "epoch": 1.56481995858795, "grad_norm": 0.4609375, "learning_rate": 0.00020648058252427184, "loss": 0.5799, "step": 3874 }, { "epoch": 1.56522397858694, "grad_norm": 0.42578125, "learning_rate": 0.00020645631067961164, "loss": 0.5487, "step": 3875 }, { "epoch": 1.56562799858593, "grad_norm": 0.5546875, "learning_rate": 0.00020643203883495144, "loss": 0.6072, "step": 3876 }, { "epoch": 1.56603201858492, "grad_norm": 0.49609375, "learning_rate": 0.00020640776699029127, "loss": 0.5958, "step": 3877 }, { "epoch": 1.5664360385839098, "grad_norm": 0.58984375, "learning_rate": 0.00020638349514563105, "loss": 0.5823, "step": 3878 }, { "epoch": 1.5668400585828999, "grad_norm": 0.63671875, "learning_rate": 0.00020635922330097085, "loss": 0.73, "step": 3879 }, { "epoch": 1.56724407858189, "grad_norm": 0.52734375, "learning_rate": 0.00020633495145631068, "loss": 0.6375, "step": 3880 }, { "epoch": 1.5676480985808796, "grad_norm": 0.486328125, "learning_rate": 0.00020631067961165045, "loss": 0.6462, "step": 3881 }, { "epoch": 1.5680521185798697, "grad_norm": 0.484375, "learning_rate": 0.00020628640776699025, "loss": 0.5897, "step": 3882 }, { "epoch": 1.5684561385788598, "grad_norm": 0.5546875, "learning_rate": 0.00020626213592233008, "loss": 0.6266, "step": 3883 }, { "epoch": 1.5688601585778494, "grad_norm": 0.5625, "learning_rate": 0.0002062378640776699, "loss": 0.6802, "step": 3884 }, { "epoch": 1.5692641785768395, "grad_norm": 0.52734375, "learning_rate": 0.00020621359223300972, "loss": 0.6325, "step": 3885 }, { "epoch": 1.5696681985758296, "grad_norm": 0.50390625, "learning_rate": 0.0002061893203883495, "loss": 0.608, "step": 3886 }, { "epoch": 1.5700722185748195, "grad_norm": 0.462890625, "learning_rate": 0.0002061650485436893, "loss": 0.551, "step": 3887 }, { "epoch": 1.5704762385738094, "grad_norm": 0.53125, "learning_rate": 0.00020614077669902912, "loss": 0.6607, "step": 3888 }, { "epoch": 1.5708802585727994, "grad_norm": 0.5234375, "learning_rate": 0.00020611650485436893, "loss": 0.6326, "step": 3889 }, { "epoch": 1.5712842785717893, "grad_norm": 0.498046875, "learning_rate": 0.0002060922330097087, "loss": 0.628, "step": 3890 }, { "epoch": 1.5716882985707792, "grad_norm": 0.54296875, "learning_rate": 0.00020606796116504853, "loss": 0.6616, "step": 3891 }, { "epoch": 1.5720923185697693, "grad_norm": 0.640625, "learning_rate": 0.00020604368932038833, "loss": 0.6626, "step": 3892 }, { "epoch": 1.5724963385687591, "grad_norm": 0.48046875, "learning_rate": 0.00020601941747572813, "loss": 0.6102, "step": 3893 }, { "epoch": 1.572900358567749, "grad_norm": 0.546875, "learning_rate": 0.00020599514563106794, "loss": 0.6207, "step": 3894 }, { "epoch": 1.573304378566739, "grad_norm": 1.625, "learning_rate": 0.00020597087378640774, "loss": 0.6629, "step": 3895 }, { "epoch": 1.573708398565729, "grad_norm": 0.431640625, "learning_rate": 0.00020594660194174757, "loss": 0.5822, "step": 3896 }, { "epoch": 1.5741124185647188, "grad_norm": 0.546875, "learning_rate": 0.00020592233009708737, "loss": 0.6405, "step": 3897 }, { "epoch": 1.574516438563709, "grad_norm": 0.62890625, "learning_rate": 0.00020589805825242715, "loss": 0.7791, "step": 3898 }, { "epoch": 1.5749204585626988, "grad_norm": 0.609375, "learning_rate": 0.00020587378640776698, "loss": 0.6579, "step": 3899 }, { "epoch": 1.5753244785616887, "grad_norm": 0.60546875, "learning_rate": 0.00020584951456310678, "loss": 0.6349, "step": 3900 }, { "epoch": 1.5757284985606788, "grad_norm": 0.5546875, "learning_rate": 0.00020582524271844658, "loss": 0.6549, "step": 3901 }, { "epoch": 1.5761325185596688, "grad_norm": 0.59765625, "learning_rate": 0.0002058009708737864, "loss": 0.5699, "step": 3902 }, { "epoch": 1.5765365385586585, "grad_norm": 0.48828125, "learning_rate": 0.00020577669902912619, "loss": 0.6531, "step": 3903 }, { "epoch": 1.5769405585576486, "grad_norm": 0.46484375, "learning_rate": 0.00020575242718446601, "loss": 0.5481, "step": 3904 }, { "epoch": 1.5773445785566387, "grad_norm": 0.51953125, "learning_rate": 0.00020572815533980582, "loss": 0.6711, "step": 3905 }, { "epoch": 1.5777485985556285, "grad_norm": 0.4296875, "learning_rate": 0.0002057038834951456, "loss": 0.5609, "step": 3906 }, { "epoch": 1.5781526185546184, "grad_norm": 0.453125, "learning_rate": 0.00020567961165048542, "loss": 0.6308, "step": 3907 }, { "epoch": 1.5785566385536085, "grad_norm": 0.498046875, "learning_rate": 0.00020565533980582522, "loss": 0.6594, "step": 3908 }, { "epoch": 1.5789606585525984, "grad_norm": 0.59765625, "learning_rate": 0.00020563106796116503, "loss": 0.6345, "step": 3909 }, { "epoch": 1.5793646785515882, "grad_norm": 0.46875, "learning_rate": 0.00020560679611650486, "loss": 0.5328, "step": 3910 }, { "epoch": 1.5797686985505783, "grad_norm": 0.5078125, "learning_rate": 0.00020558252427184463, "loss": 0.6168, "step": 3911 }, { "epoch": 1.5801727185495682, "grad_norm": 0.64453125, "learning_rate": 0.00020555825242718443, "loss": 0.7012, "step": 3912 }, { "epoch": 1.580576738548558, "grad_norm": 0.53125, "learning_rate": 0.00020553398058252426, "loss": 0.6596, "step": 3913 }, { "epoch": 1.5809807585475482, "grad_norm": 0.52734375, "learning_rate": 0.00020550970873786407, "loss": 0.5898, "step": 3914 }, { "epoch": 1.581384778546538, "grad_norm": 0.5078125, "learning_rate": 0.00020548543689320387, "loss": 0.618, "step": 3915 }, { "epoch": 1.581788798545528, "grad_norm": 0.490234375, "learning_rate": 0.00020546116504854367, "loss": 0.5734, "step": 3916 }, { "epoch": 1.582192818544518, "grad_norm": 0.53515625, "learning_rate": 0.00020543689320388347, "loss": 0.625, "step": 3917 }, { "epoch": 1.5825968385435079, "grad_norm": 0.6171875, "learning_rate": 0.0002054126213592233, "loss": 0.6831, "step": 3918 }, { "epoch": 1.5830008585424977, "grad_norm": 0.498046875, "learning_rate": 0.00020538834951456308, "loss": 0.6203, "step": 3919 }, { "epoch": 1.5834048785414878, "grad_norm": 0.66015625, "learning_rate": 0.00020536407766990288, "loss": 0.7314, "step": 3920 }, { "epoch": 1.583808898540478, "grad_norm": 0.6171875, "learning_rate": 0.0002053398058252427, "loss": 0.7615, "step": 3921 }, { "epoch": 1.5842129185394676, "grad_norm": 0.52734375, "learning_rate": 0.0002053155339805825, "loss": 0.6298, "step": 3922 }, { "epoch": 1.5846169385384576, "grad_norm": 0.62890625, "learning_rate": 0.00020529126213592229, "loss": 0.7101, "step": 3923 }, { "epoch": 1.5850209585374477, "grad_norm": 0.5546875, "learning_rate": 0.00020526699029126212, "loss": 0.6217, "step": 3924 }, { "epoch": 1.5854249785364376, "grad_norm": 0.54296875, "learning_rate": 0.00020524271844660192, "loss": 0.6128, "step": 3925 }, { "epoch": 1.5858289985354275, "grad_norm": 0.55078125, "learning_rate": 0.00020521844660194175, "loss": 0.6123, "step": 3926 }, { "epoch": 1.5862330185344176, "grad_norm": 0.6484375, "learning_rate": 0.00020519417475728155, "loss": 0.6941, "step": 3927 }, { "epoch": 1.5866370385334074, "grad_norm": 0.53125, "learning_rate": 0.00020516990291262132, "loss": 0.6615, "step": 3928 }, { "epoch": 1.5870410585323973, "grad_norm": 0.609375, "learning_rate": 0.00020514563106796115, "loss": 0.5408, "step": 3929 }, { "epoch": 1.5874450785313874, "grad_norm": 0.4765625, "learning_rate": 0.00020512135922330096, "loss": 0.5992, "step": 3930 }, { "epoch": 1.5878490985303773, "grad_norm": 0.56640625, "learning_rate": 0.00020509708737864073, "loss": 0.641, "step": 3931 }, { "epoch": 1.5882531185293671, "grad_norm": 0.45703125, "learning_rate": 0.00020507281553398056, "loss": 0.6089, "step": 3932 }, { "epoch": 1.5886571385283572, "grad_norm": 0.49609375, "learning_rate": 0.00020504854368932036, "loss": 0.5982, "step": 3933 }, { "epoch": 1.589061158527347, "grad_norm": 0.609375, "learning_rate": 0.0002050242718446602, "loss": 0.5614, "step": 3934 }, { "epoch": 1.589465178526337, "grad_norm": 0.53515625, "learning_rate": 0.000205, "loss": 0.5966, "step": 3935 }, { "epoch": 1.589869198525327, "grad_norm": 0.49609375, "learning_rate": 0.00020497572815533977, "loss": 0.6466, "step": 3936 }, { "epoch": 1.590273218524317, "grad_norm": 0.51953125, "learning_rate": 0.0002049514563106796, "loss": 0.5528, "step": 3937 }, { "epoch": 1.5906772385233068, "grad_norm": 0.66796875, "learning_rate": 0.0002049271844660194, "loss": 0.6176, "step": 3938 }, { "epoch": 1.5910812585222969, "grad_norm": 0.46875, "learning_rate": 0.0002049029126213592, "loss": 0.6234, "step": 3939 }, { "epoch": 1.591485278521287, "grad_norm": 0.5546875, "learning_rate": 0.00020487864077669903, "loss": 0.5724, "step": 3940 }, { "epoch": 1.5918892985202766, "grad_norm": 0.52734375, "learning_rate": 0.0002048543689320388, "loss": 0.6259, "step": 3941 }, { "epoch": 1.5922933185192667, "grad_norm": 0.486328125, "learning_rate": 0.0002048300970873786, "loss": 0.6012, "step": 3942 }, { "epoch": 1.5926973385182568, "grad_norm": 0.62890625, "learning_rate": 0.00020480582524271844, "loss": 0.6161, "step": 3943 }, { "epoch": 1.5931013585172467, "grad_norm": 0.53125, "learning_rate": 0.00020478155339805822, "loss": 0.7152, "step": 3944 }, { "epoch": 1.5935053785162365, "grad_norm": 0.4921875, "learning_rate": 0.00020475728155339805, "loss": 0.6497, "step": 3945 }, { "epoch": 1.5939093985152266, "grad_norm": 0.57421875, "learning_rate": 0.00020473300970873785, "loss": 0.7031, "step": 3946 }, { "epoch": 1.5943134185142165, "grad_norm": 0.5078125, "learning_rate": 0.00020470873786407765, "loss": 0.6416, "step": 3947 }, { "epoch": 1.5947174385132064, "grad_norm": 0.62890625, "learning_rate": 0.00020468446601941748, "loss": 0.7407, "step": 3948 }, { "epoch": 1.5951214585121964, "grad_norm": 0.435546875, "learning_rate": 0.00020466019417475726, "loss": 0.5733, "step": 3949 }, { "epoch": 1.5955254785111863, "grad_norm": 0.494140625, "learning_rate": 0.00020463592233009706, "loss": 0.5785, "step": 3950 }, { "epoch": 1.5959294985101762, "grad_norm": 0.5625, "learning_rate": 0.0002046116504854369, "loss": 0.6206, "step": 3951 }, { "epoch": 1.5963335185091663, "grad_norm": 0.5703125, "learning_rate": 0.0002045873786407767, "loss": 0.6514, "step": 3952 }, { "epoch": 1.5967375385081561, "grad_norm": 0.52734375, "learning_rate": 0.00020456310679611646, "loss": 0.627, "step": 3953 }, { "epoch": 1.597141558507146, "grad_norm": 0.53515625, "learning_rate": 0.0002045388349514563, "loss": 0.6799, "step": 3954 }, { "epoch": 1.597545578506136, "grad_norm": 0.546875, "learning_rate": 0.0002045145631067961, "loss": 0.7212, "step": 3955 }, { "epoch": 1.597949598505126, "grad_norm": 0.50390625, "learning_rate": 0.00020449029126213593, "loss": 0.5971, "step": 3956 }, { "epoch": 1.5983536185041158, "grad_norm": 0.578125, "learning_rate": 0.0002044660194174757, "loss": 0.636, "step": 3957 }, { "epoch": 1.598757638503106, "grad_norm": 0.54296875, "learning_rate": 0.0002044417475728155, "loss": 0.6414, "step": 3958 }, { "epoch": 1.599161658502096, "grad_norm": 0.55078125, "learning_rate": 0.00020441747572815533, "loss": 0.7094, "step": 3959 }, { "epoch": 1.5995656785010857, "grad_norm": 0.482421875, "learning_rate": 0.00020439320388349514, "loss": 0.5592, "step": 3960 }, { "epoch": 1.5999696985000758, "grad_norm": 0.458984375, "learning_rate": 0.0002043689320388349, "loss": 0.6281, "step": 3961 }, { "epoch": 1.6003737184990658, "grad_norm": 0.54296875, "learning_rate": 0.00020434466019417474, "loss": 0.615, "step": 3962 }, { "epoch": 1.6007777384980555, "grad_norm": 0.53515625, "learning_rate": 0.00020432038834951454, "loss": 0.6312, "step": 3963 }, { "epoch": 1.6011817584970456, "grad_norm": 0.515625, "learning_rate": 0.00020429611650485434, "loss": 0.5859, "step": 3964 }, { "epoch": 1.6015857784960357, "grad_norm": 0.53125, "learning_rate": 0.00020427184466019417, "loss": 0.6863, "step": 3965 }, { "epoch": 1.6019897984950255, "grad_norm": 0.5546875, "learning_rate": 0.00020424757281553395, "loss": 0.5888, "step": 3966 }, { "epoch": 1.6023938184940154, "grad_norm": 0.68359375, "learning_rate": 0.00020422330097087378, "loss": 0.6954, "step": 3967 }, { "epoch": 1.6027978384930055, "grad_norm": 0.5703125, "learning_rate": 0.00020419902912621358, "loss": 0.6476, "step": 3968 }, { "epoch": 1.6032018584919954, "grad_norm": 0.486328125, "learning_rate": 0.00020417475728155336, "loss": 0.6177, "step": 3969 }, { "epoch": 1.6036058784909852, "grad_norm": 0.58984375, "learning_rate": 0.00020415048543689319, "loss": 0.6916, "step": 3970 }, { "epoch": 1.6040098984899753, "grad_norm": 0.5078125, "learning_rate": 0.000204126213592233, "loss": 0.6222, "step": 3971 }, { "epoch": 1.6044139184889652, "grad_norm": 0.49609375, "learning_rate": 0.0002041019417475728, "loss": 0.6812, "step": 3972 }, { "epoch": 1.604817938487955, "grad_norm": 0.609375, "learning_rate": 0.00020407766990291262, "loss": 0.6573, "step": 3973 }, { "epoch": 1.6052219584869452, "grad_norm": 0.474609375, "learning_rate": 0.0002040533980582524, "loss": 0.5921, "step": 3974 }, { "epoch": 1.605625978485935, "grad_norm": 0.490234375, "learning_rate": 0.00020402912621359222, "loss": 0.628, "step": 3975 }, { "epoch": 1.606029998484925, "grad_norm": 0.47265625, "learning_rate": 0.00020400485436893203, "loss": 0.5745, "step": 3976 }, { "epoch": 1.606434018483915, "grad_norm": 0.5234375, "learning_rate": 0.00020398058252427183, "loss": 0.6059, "step": 3977 }, { "epoch": 1.6068380384829049, "grad_norm": 0.55078125, "learning_rate": 0.00020395631067961163, "loss": 0.6315, "step": 3978 }, { "epoch": 1.6072420584818947, "grad_norm": 0.5703125, "learning_rate": 0.00020393203883495143, "loss": 0.662, "step": 3979 }, { "epoch": 1.6076460784808848, "grad_norm": 0.58984375, "learning_rate": 0.00020390776699029124, "loss": 0.639, "step": 3980 }, { "epoch": 1.608050098479875, "grad_norm": 0.462890625, "learning_rate": 0.00020388349514563107, "loss": 0.5664, "step": 3981 }, { "epoch": 1.6084541184788645, "grad_norm": 0.5234375, "learning_rate": 0.00020385922330097084, "loss": 0.6522, "step": 3982 }, { "epoch": 1.6088581384778546, "grad_norm": 0.5625, "learning_rate": 0.00020383495145631064, "loss": 0.6732, "step": 3983 }, { "epoch": 1.6092621584768447, "grad_norm": 0.515625, "learning_rate": 0.00020381067961165047, "loss": 0.4989, "step": 3984 }, { "epoch": 1.6096661784758346, "grad_norm": 0.515625, "learning_rate": 0.00020378640776699028, "loss": 0.6988, "step": 3985 }, { "epoch": 1.6100701984748245, "grad_norm": 0.4375, "learning_rate": 0.0002037621359223301, "loss": 0.5717, "step": 3986 }, { "epoch": 1.6104742184738146, "grad_norm": 0.44140625, "learning_rate": 0.00020373786407766988, "loss": 0.5895, "step": 3987 }, { "epoch": 1.6108782384728044, "grad_norm": 0.46875, "learning_rate": 0.00020371359223300968, "loss": 0.6481, "step": 3988 }, { "epoch": 1.6112822584717943, "grad_norm": 0.474609375, "learning_rate": 0.0002036893203883495, "loss": 0.6022, "step": 3989 }, { "epoch": 1.6116862784707844, "grad_norm": 0.51953125, "learning_rate": 0.00020366504854368931, "loss": 0.6123, "step": 3990 }, { "epoch": 1.6120902984697743, "grad_norm": 0.51953125, "learning_rate": 0.0002036407766990291, "loss": 0.6326, "step": 3991 }, { "epoch": 1.6124943184687641, "grad_norm": 0.470703125, "learning_rate": 0.00020361650485436892, "loss": 0.6212, "step": 3992 }, { "epoch": 1.6128983384677542, "grad_norm": 0.474609375, "learning_rate": 0.00020359223300970872, "loss": 0.6145, "step": 3993 }, { "epoch": 1.613302358466744, "grad_norm": 0.5859375, "learning_rate": 0.0002035679611650485, "loss": 0.6237, "step": 3994 }, { "epoch": 1.613706378465734, "grad_norm": 0.59375, "learning_rate": 0.00020354368932038833, "loss": 0.6693, "step": 3995 }, { "epoch": 1.614110398464724, "grad_norm": 0.5390625, "learning_rate": 0.00020351941747572813, "loss": 0.6278, "step": 3996 }, { "epoch": 1.614514418463714, "grad_norm": 0.439453125, "learning_rate": 0.00020349514563106796, "loss": 0.5634, "step": 3997 }, { "epoch": 1.6149184384627038, "grad_norm": 0.474609375, "learning_rate": 0.00020347087378640776, "loss": 0.6013, "step": 3998 }, { "epoch": 1.6153224584616939, "grad_norm": 0.6953125, "learning_rate": 0.00020344660194174754, "loss": 0.6692, "step": 3999 }, { "epoch": 1.615726478460684, "grad_norm": 0.51953125, "learning_rate": 0.00020342233009708736, "loss": 0.6191, "step": 4000 }, { "epoch": 1.6161304984596736, "grad_norm": 0.59375, "learning_rate": 0.00020339805825242717, "loss": 0.7013, "step": 4001 }, { "epoch": 1.6165345184586637, "grad_norm": 0.51171875, "learning_rate": 0.00020337378640776697, "loss": 0.59, "step": 4002 }, { "epoch": 1.6169385384576538, "grad_norm": 0.5, "learning_rate": 0.0002033495145631068, "loss": 0.6179, "step": 4003 }, { "epoch": 1.6173425584566437, "grad_norm": 0.60546875, "learning_rate": 0.00020332524271844657, "loss": 0.6925, "step": 4004 }, { "epoch": 1.6177465784556335, "grad_norm": 0.62109375, "learning_rate": 0.0002033009708737864, "loss": 0.6969, "step": 4005 }, { "epoch": 1.6181505984546236, "grad_norm": 0.5390625, "learning_rate": 0.0002032766990291262, "loss": 0.5476, "step": 4006 }, { "epoch": 1.6185546184536135, "grad_norm": 0.57421875, "learning_rate": 0.00020325242718446598, "loss": 0.6579, "step": 4007 }, { "epoch": 1.6189586384526033, "grad_norm": 0.59375, "learning_rate": 0.0002032281553398058, "loss": 0.6946, "step": 4008 }, { "epoch": 1.6193626584515934, "grad_norm": 0.5625, "learning_rate": 0.0002032038834951456, "loss": 0.6262, "step": 4009 }, { "epoch": 1.6197666784505833, "grad_norm": 0.51171875, "learning_rate": 0.00020317961165048542, "loss": 0.6049, "step": 4010 }, { "epoch": 1.6201706984495732, "grad_norm": 0.5625, "learning_rate": 0.00020315533980582524, "loss": 0.6093, "step": 4011 }, { "epoch": 1.6205747184485633, "grad_norm": 0.443359375, "learning_rate": 0.00020313106796116502, "loss": 0.57, "step": 4012 }, { "epoch": 1.6209787384475531, "grad_norm": 0.5546875, "learning_rate": 0.00020310679611650482, "loss": 0.6637, "step": 4013 }, { "epoch": 1.621382758446543, "grad_norm": 0.5703125, "learning_rate": 0.00020308252427184465, "loss": 0.6427, "step": 4014 }, { "epoch": 1.621786778445533, "grad_norm": 0.4609375, "learning_rate": 0.00020305825242718445, "loss": 0.5973, "step": 4015 }, { "epoch": 1.622190798444523, "grad_norm": 0.41796875, "learning_rate": 0.00020303398058252426, "loss": 0.5773, "step": 4016 }, { "epoch": 1.6225948184435128, "grad_norm": 0.470703125, "learning_rate": 0.00020300970873786406, "loss": 0.6271, "step": 4017 }, { "epoch": 1.622998838442503, "grad_norm": 0.5078125, "learning_rate": 0.00020298543689320386, "loss": 0.5433, "step": 4018 }, { "epoch": 1.623402858441493, "grad_norm": 0.51171875, "learning_rate": 0.0002029611650485437, "loss": 0.5935, "step": 4019 }, { "epoch": 1.6238068784404827, "grad_norm": 0.59765625, "learning_rate": 0.00020293689320388347, "loss": 0.6409, "step": 4020 }, { "epoch": 1.6242108984394727, "grad_norm": 0.466796875, "learning_rate": 0.00020291262135922327, "loss": 0.6279, "step": 4021 }, { "epoch": 1.6246149184384628, "grad_norm": 0.61328125, "learning_rate": 0.0002028883495145631, "loss": 0.6356, "step": 4022 }, { "epoch": 1.6250189384374527, "grad_norm": 0.69140625, "learning_rate": 0.0002028640776699029, "loss": 0.7348, "step": 4023 }, { "epoch": 1.6254229584364426, "grad_norm": 0.6171875, "learning_rate": 0.00020283980582524267, "loss": 0.5765, "step": 4024 }, { "epoch": 1.6258269784354327, "grad_norm": 0.58203125, "learning_rate": 0.0002028155339805825, "loss": 0.6259, "step": 4025 }, { "epoch": 1.6262309984344225, "grad_norm": 0.68359375, "learning_rate": 0.0002027912621359223, "loss": 0.764, "step": 4026 }, { "epoch": 1.6266350184334124, "grad_norm": 0.49609375, "learning_rate": 0.00020276699029126214, "loss": 0.6181, "step": 4027 }, { "epoch": 1.6270390384324025, "grad_norm": 0.65625, "learning_rate": 0.00020274271844660194, "loss": 0.6367, "step": 4028 }, { "epoch": 1.6274430584313924, "grad_norm": 0.625, "learning_rate": 0.00020271844660194171, "loss": 0.6025, "step": 4029 }, { "epoch": 1.6278470784303822, "grad_norm": 0.46875, "learning_rate": 0.00020269417475728154, "loss": 0.5981, "step": 4030 }, { "epoch": 1.6282510984293723, "grad_norm": 0.5234375, "learning_rate": 0.00020266990291262135, "loss": 0.5751, "step": 4031 }, { "epoch": 1.6286551184283622, "grad_norm": 0.59765625, "learning_rate": 0.00020264563106796112, "loss": 0.7564, "step": 4032 }, { "epoch": 1.629059138427352, "grad_norm": 0.50390625, "learning_rate": 0.00020262135922330095, "loss": 0.5732, "step": 4033 }, { "epoch": 1.6294631584263422, "grad_norm": 0.55859375, "learning_rate": 0.00020259708737864075, "loss": 0.6683, "step": 4034 }, { "epoch": 1.629867178425332, "grad_norm": 0.60546875, "learning_rate": 0.00020257281553398058, "loss": 0.5864, "step": 4035 }, { "epoch": 1.6302711984243219, "grad_norm": 0.5078125, "learning_rate": 0.00020254854368932038, "loss": 0.5603, "step": 4036 }, { "epoch": 1.630675218423312, "grad_norm": 0.486328125, "learning_rate": 0.00020252427184466016, "loss": 0.636, "step": 4037 }, { "epoch": 1.631079238422302, "grad_norm": 0.53125, "learning_rate": 0.0002025, "loss": 0.5818, "step": 4038 }, { "epoch": 1.6314832584212917, "grad_norm": 0.546875, "learning_rate": 0.0002024757281553398, "loss": 0.6539, "step": 4039 }, { "epoch": 1.6318872784202818, "grad_norm": 0.486328125, "learning_rate": 0.0002024514563106796, "loss": 0.5583, "step": 4040 }, { "epoch": 1.632291298419272, "grad_norm": 0.515625, "learning_rate": 0.00020242718446601942, "loss": 0.6434, "step": 4041 }, { "epoch": 1.6326953184182615, "grad_norm": 0.51953125, "learning_rate": 0.0002024029126213592, "loss": 0.6367, "step": 4042 }, { "epoch": 1.6330993384172516, "grad_norm": 0.50390625, "learning_rate": 0.000202378640776699, "loss": 0.6103, "step": 4043 }, { "epoch": 1.6335033584162417, "grad_norm": 0.44140625, "learning_rate": 0.00020235436893203883, "loss": 0.6087, "step": 4044 }, { "epoch": 1.6339073784152316, "grad_norm": 0.55078125, "learning_rate": 0.0002023300970873786, "loss": 0.6074, "step": 4045 }, { "epoch": 1.6343113984142215, "grad_norm": 0.62890625, "learning_rate": 0.00020230582524271843, "loss": 0.6868, "step": 4046 }, { "epoch": 1.6347154184132116, "grad_norm": 0.4453125, "learning_rate": 0.00020228155339805824, "loss": 0.6379, "step": 4047 }, { "epoch": 1.6351194384122014, "grad_norm": 0.427734375, "learning_rate": 0.00020225728155339804, "loss": 0.6029, "step": 4048 }, { "epoch": 1.6355234584111913, "grad_norm": 0.49609375, "learning_rate": 0.00020223300970873787, "loss": 0.6393, "step": 4049 }, { "epoch": 1.6359274784101814, "grad_norm": 0.51953125, "learning_rate": 0.00020220873786407764, "loss": 0.5569, "step": 4050 }, { "epoch": 1.6363314984091712, "grad_norm": 0.625, "learning_rate": 0.00020218446601941745, "loss": 0.6752, "step": 4051 }, { "epoch": 1.6367355184081611, "grad_norm": 0.66015625, "learning_rate": 0.00020216019417475728, "loss": 0.5866, "step": 4052 }, { "epoch": 1.6371395384071512, "grad_norm": 0.50390625, "learning_rate": 0.00020213592233009708, "loss": 0.6311, "step": 4053 }, { "epoch": 1.637543558406141, "grad_norm": 0.494140625, "learning_rate": 0.00020211165048543685, "loss": 0.5931, "step": 4054 }, { "epoch": 1.637947578405131, "grad_norm": 0.498046875, "learning_rate": 0.00020208737864077668, "loss": 0.5893, "step": 4055 }, { "epoch": 1.638351598404121, "grad_norm": 0.57421875, "learning_rate": 0.00020206310679611649, "loss": 0.5986, "step": 4056 }, { "epoch": 1.638755618403111, "grad_norm": 0.55859375, "learning_rate": 0.00020203883495145631, "loss": 0.6039, "step": 4057 }, { "epoch": 1.6391596384021008, "grad_norm": 0.546875, "learning_rate": 0.0002020145631067961, "loss": 0.6164, "step": 4058 }, { "epoch": 1.6395636584010909, "grad_norm": 0.494140625, "learning_rate": 0.0002019902912621359, "loss": 0.6531, "step": 4059 }, { "epoch": 1.639967678400081, "grad_norm": 0.5234375, "learning_rate": 0.00020196601941747572, "loss": 0.614, "step": 4060 }, { "epoch": 1.6403716983990706, "grad_norm": 0.54296875, "learning_rate": 0.00020194174757281552, "loss": 0.6463, "step": 4061 }, { "epoch": 1.6407757183980607, "grad_norm": 0.47265625, "learning_rate": 0.0002019174757281553, "loss": 0.6887, "step": 4062 }, { "epoch": 1.6411797383970508, "grad_norm": 0.416015625, "learning_rate": 0.00020189320388349513, "loss": 0.5982, "step": 4063 }, { "epoch": 1.6415837583960406, "grad_norm": 0.65234375, "learning_rate": 0.00020186893203883493, "loss": 0.7205, "step": 4064 }, { "epoch": 1.6419877783950305, "grad_norm": 0.578125, "learning_rate": 0.00020184466019417473, "loss": 0.6422, "step": 4065 }, { "epoch": 1.6423917983940206, "grad_norm": 0.51953125, "learning_rate": 0.00020182038834951456, "loss": 0.5709, "step": 4066 }, { "epoch": 1.6427958183930105, "grad_norm": 0.546875, "learning_rate": 0.00020179611650485434, "loss": 0.6591, "step": 4067 }, { "epoch": 1.6431998383920003, "grad_norm": 0.51171875, "learning_rate": 0.00020177184466019417, "loss": 0.6177, "step": 4068 }, { "epoch": 1.6436038583909904, "grad_norm": 0.51953125, "learning_rate": 0.00020174757281553397, "loss": 0.6671, "step": 4069 }, { "epoch": 1.6440078783899803, "grad_norm": 0.5234375, "learning_rate": 0.00020172330097087375, "loss": 0.6676, "step": 4070 }, { "epoch": 1.6444118983889702, "grad_norm": 0.59375, "learning_rate": 0.00020169902912621357, "loss": 0.6786, "step": 4071 }, { "epoch": 1.6448159183879603, "grad_norm": 0.6953125, "learning_rate": 0.00020167475728155338, "loss": 0.7389, "step": 4072 }, { "epoch": 1.6452199383869501, "grad_norm": 0.478515625, "learning_rate": 0.00020165048543689318, "loss": 0.6544, "step": 4073 }, { "epoch": 1.64562395838594, "grad_norm": 0.5078125, "learning_rate": 0.000201626213592233, "loss": 0.5852, "step": 4074 }, { "epoch": 1.64602797838493, "grad_norm": 0.51953125, "learning_rate": 0.00020160194174757278, "loss": 0.6623, "step": 4075 }, { "epoch": 1.64643199838392, "grad_norm": 0.72265625, "learning_rate": 0.0002015776699029126, "loss": 0.7391, "step": 4076 }, { "epoch": 1.6468360183829098, "grad_norm": 0.546875, "learning_rate": 0.00020155339805825242, "loss": 0.6432, "step": 4077 }, { "epoch": 1.6472400383819, "grad_norm": 0.546875, "learning_rate": 0.00020152912621359222, "loss": 0.6078, "step": 4078 }, { "epoch": 1.64764405838089, "grad_norm": 0.60546875, "learning_rate": 0.00020150485436893202, "loss": 0.6073, "step": 4079 }, { "epoch": 1.6480480783798797, "grad_norm": 0.49609375, "learning_rate": 0.00020148058252427182, "loss": 0.6553, "step": 4080 }, { "epoch": 1.6484520983788697, "grad_norm": 0.46875, "learning_rate": 0.00020145631067961163, "loss": 0.609, "step": 4081 }, { "epoch": 1.6488561183778598, "grad_norm": 0.58203125, "learning_rate": 0.00020143203883495145, "loss": 0.6865, "step": 4082 }, { "epoch": 1.6492601383768497, "grad_norm": 0.6484375, "learning_rate": 0.00020140776699029123, "loss": 0.6918, "step": 4083 }, { "epoch": 1.6496641583758396, "grad_norm": 0.515625, "learning_rate": 0.00020138349514563103, "loss": 0.6827, "step": 4084 }, { "epoch": 1.6500681783748297, "grad_norm": 0.5234375, "learning_rate": 0.00020135922330097086, "loss": 0.6366, "step": 4085 }, { "epoch": 1.6504721983738195, "grad_norm": 0.5390625, "learning_rate": 0.00020133495145631066, "loss": 0.676, "step": 4086 }, { "epoch": 1.6508762183728094, "grad_norm": 0.50390625, "learning_rate": 0.0002013106796116505, "loss": 0.6307, "step": 4087 }, { "epoch": 1.6512802383717995, "grad_norm": 0.5, "learning_rate": 0.00020128640776699027, "loss": 0.6253, "step": 4088 }, { "epoch": 1.6516842583707894, "grad_norm": 0.5234375, "learning_rate": 0.00020126213592233007, "loss": 0.6758, "step": 4089 }, { "epoch": 1.6520882783697792, "grad_norm": 0.53125, "learning_rate": 0.0002012378640776699, "loss": 0.6228, "step": 4090 }, { "epoch": 1.6524922983687693, "grad_norm": 0.66015625, "learning_rate": 0.0002012135922330097, "loss": 0.7523, "step": 4091 }, { "epoch": 1.6528963183677592, "grad_norm": 0.4453125, "learning_rate": 0.00020118932038834948, "loss": 0.591, "step": 4092 }, { "epoch": 1.653300338366749, "grad_norm": 0.49609375, "learning_rate": 0.0002011650485436893, "loss": 0.6281, "step": 4093 }, { "epoch": 1.6537043583657391, "grad_norm": 0.52734375, "learning_rate": 0.0002011407766990291, "loss": 0.656, "step": 4094 }, { "epoch": 1.654108378364729, "grad_norm": 0.51171875, "learning_rate": 0.00020111650485436889, "loss": 0.683, "step": 4095 }, { "epoch": 1.6545123983637189, "grad_norm": 0.69140625, "learning_rate": 0.00020109223300970871, "loss": 0.6874, "step": 4096 }, { "epoch": 1.654916418362709, "grad_norm": 0.57421875, "learning_rate": 0.00020106796116504852, "loss": 0.6464, "step": 4097 }, { "epoch": 1.655320438361699, "grad_norm": 0.490234375, "learning_rate": 0.00020104368932038835, "loss": 0.6508, "step": 4098 }, { "epoch": 1.6557244583606887, "grad_norm": 0.498046875, "learning_rate": 0.00020101941747572815, "loss": 0.5731, "step": 4099 }, { "epoch": 1.6561284783596788, "grad_norm": 0.53515625, "learning_rate": 0.00020099514563106792, "loss": 0.5883, "step": 4100 }, { "epoch": 1.656532498358669, "grad_norm": 0.466796875, "learning_rate": 0.00020097087378640775, "loss": 0.6026, "step": 4101 }, { "epoch": 1.6569365183576585, "grad_norm": 0.5078125, "learning_rate": 0.00020094660194174756, "loss": 0.6133, "step": 4102 }, { "epoch": 1.6573405383566486, "grad_norm": 0.515625, "learning_rate": 0.00020092233009708736, "loss": 0.6144, "step": 4103 }, { "epoch": 1.6577445583556387, "grad_norm": 0.486328125, "learning_rate": 0.0002008980582524272, "loss": 0.6164, "step": 4104 }, { "epoch": 1.6581485783546286, "grad_norm": 0.51171875, "learning_rate": 0.00020087378640776696, "loss": 0.5736, "step": 4105 }, { "epoch": 1.6585525983536185, "grad_norm": 0.5234375, "learning_rate": 0.0002008495145631068, "loss": 0.6586, "step": 4106 }, { "epoch": 1.6589566183526085, "grad_norm": 0.5703125, "learning_rate": 0.0002008252427184466, "loss": 0.5987, "step": 4107 }, { "epoch": 1.6593606383515984, "grad_norm": 0.458984375, "learning_rate": 0.00020080097087378637, "loss": 0.574, "step": 4108 }, { "epoch": 1.6597646583505883, "grad_norm": 0.5390625, "learning_rate": 0.0002007766990291262, "loss": 0.6166, "step": 4109 }, { "epoch": 1.6601686783495784, "grad_norm": 0.53125, "learning_rate": 0.000200752427184466, "loss": 0.5908, "step": 4110 }, { "epoch": 1.6605726983485682, "grad_norm": 0.48046875, "learning_rate": 0.0002007281553398058, "loss": 0.5935, "step": 4111 }, { "epoch": 1.6609767183475581, "grad_norm": 0.48828125, "learning_rate": 0.00020070388349514563, "loss": 0.6019, "step": 4112 }, { "epoch": 1.6613807383465482, "grad_norm": 0.486328125, "learning_rate": 0.0002006796116504854, "loss": 0.5827, "step": 4113 }, { "epoch": 1.661784758345538, "grad_norm": 0.51953125, "learning_rate": 0.0002006553398058252, "loss": 0.6532, "step": 4114 }, { "epoch": 1.662188778344528, "grad_norm": 0.50390625, "learning_rate": 0.00020063106796116504, "loss": 0.5974, "step": 4115 }, { "epoch": 1.662592798343518, "grad_norm": 0.5703125, "learning_rate": 0.00020060679611650484, "loss": 0.7018, "step": 4116 }, { "epoch": 1.662996818342508, "grad_norm": 0.5625, "learning_rate": 0.00020058252427184464, "loss": 0.5909, "step": 4117 }, { "epoch": 1.6634008383414978, "grad_norm": 0.609375, "learning_rate": 0.00020055825242718445, "loss": 0.6562, "step": 4118 }, { "epoch": 1.6638048583404879, "grad_norm": 0.5234375, "learning_rate": 0.00020053398058252425, "loss": 0.606, "step": 4119 }, { "epoch": 1.664208878339478, "grad_norm": 0.609375, "learning_rate": 0.00020050970873786408, "loss": 0.6799, "step": 4120 }, { "epoch": 1.6646128983384676, "grad_norm": 0.5703125, "learning_rate": 0.00020048543689320385, "loss": 0.6155, "step": 4121 }, { "epoch": 1.6650169183374577, "grad_norm": 0.439453125, "learning_rate": 0.00020046116504854366, "loss": 0.6144, "step": 4122 }, { "epoch": 1.6654209383364478, "grad_norm": 0.51171875, "learning_rate": 0.00020043689320388349, "loss": 0.6098, "step": 4123 }, { "epoch": 1.6658249583354376, "grad_norm": 0.7578125, "learning_rate": 0.0002004126213592233, "loss": 0.5577, "step": 4124 }, { "epoch": 1.6662289783344275, "grad_norm": 0.59375, "learning_rate": 0.00020038834951456306, "loss": 0.679, "step": 4125 }, { "epoch": 1.6666329983334176, "grad_norm": 0.62890625, "learning_rate": 0.0002003640776699029, "loss": 0.6668, "step": 4126 }, { "epoch": 1.6670370183324075, "grad_norm": 0.53125, "learning_rate": 0.0002003398058252427, "loss": 0.7131, "step": 4127 }, { "epoch": 1.6674410383313973, "grad_norm": 0.5625, "learning_rate": 0.00020031553398058252, "loss": 0.6416, "step": 4128 }, { "epoch": 1.6678450583303874, "grad_norm": 0.54296875, "learning_rate": 0.00020029126213592233, "loss": 0.6659, "step": 4129 }, { "epoch": 1.6682490783293773, "grad_norm": 0.50390625, "learning_rate": 0.0002002669902912621, "loss": 0.6144, "step": 4130 }, { "epoch": 1.6686530983283672, "grad_norm": 0.486328125, "learning_rate": 0.00020024271844660193, "loss": 0.5277, "step": 4131 }, { "epoch": 1.6690571183273573, "grad_norm": 0.5625, "learning_rate": 0.00020021844660194173, "loss": 0.6558, "step": 4132 }, { "epoch": 1.6694611383263471, "grad_norm": 0.443359375, "learning_rate": 0.0002001941747572815, "loss": 0.6127, "step": 4133 }, { "epoch": 1.669865158325337, "grad_norm": 0.5234375, "learning_rate": 0.00020016990291262134, "loss": 0.6383, "step": 4134 }, { "epoch": 1.670269178324327, "grad_norm": 0.5703125, "learning_rate": 0.00020014563106796114, "loss": 0.6041, "step": 4135 }, { "epoch": 1.670673198323317, "grad_norm": 0.494140625, "learning_rate": 0.00020012135922330097, "loss": 0.6077, "step": 4136 }, { "epoch": 1.6710772183223068, "grad_norm": 0.462890625, "learning_rate": 0.00020009708737864077, "loss": 0.6192, "step": 4137 }, { "epoch": 1.671481238321297, "grad_norm": 0.58984375, "learning_rate": 0.00020007281553398055, "loss": 0.6973, "step": 4138 }, { "epoch": 1.671885258320287, "grad_norm": 0.5234375, "learning_rate": 0.00020004854368932038, "loss": 0.584, "step": 4139 }, { "epoch": 1.6722892783192767, "grad_norm": 0.515625, "learning_rate": 0.00020002427184466018, "loss": 0.6627, "step": 4140 }, { "epoch": 1.6726932983182667, "grad_norm": 0.455078125, "learning_rate": 0.00019999999999999998, "loss": 0.5462, "step": 4141 }, { "epoch": 1.6730973183172568, "grad_norm": 0.578125, "learning_rate": 0.00019997572815533978, "loss": 0.7097, "step": 4142 }, { "epoch": 1.6735013383162467, "grad_norm": 0.46484375, "learning_rate": 0.0001999514563106796, "loss": 0.5178, "step": 4143 }, { "epoch": 1.6739053583152366, "grad_norm": 0.451171875, "learning_rate": 0.0001999271844660194, "loss": 0.6051, "step": 4144 }, { "epoch": 1.6743093783142267, "grad_norm": 0.43359375, "learning_rate": 0.00019990291262135922, "loss": 0.5271, "step": 4145 }, { "epoch": 1.6747133983132165, "grad_norm": 0.65234375, "learning_rate": 0.000199878640776699, "loss": 0.6586, "step": 4146 }, { "epoch": 1.6751174183122064, "grad_norm": 0.453125, "learning_rate": 0.00019985436893203882, "loss": 0.6023, "step": 4147 }, { "epoch": 1.6755214383111965, "grad_norm": 0.51953125, "learning_rate": 0.00019983009708737863, "loss": 0.6284, "step": 4148 }, { "epoch": 1.6759254583101864, "grad_norm": 0.53515625, "learning_rate": 0.00019980582524271843, "loss": 0.5607, "step": 4149 }, { "epoch": 1.6763294783091762, "grad_norm": 0.6328125, "learning_rate": 0.00019978155339805826, "loss": 0.6816, "step": 4150 }, { "epoch": 1.6767334983081663, "grad_norm": 0.5390625, "learning_rate": 0.00019975728155339803, "loss": 0.5999, "step": 4151 }, { "epoch": 1.6771375183071562, "grad_norm": 0.458984375, "learning_rate": 0.00019973300970873784, "loss": 0.5757, "step": 4152 }, { "epoch": 1.677541538306146, "grad_norm": 0.5390625, "learning_rate": 0.00019970873786407766, "loss": 0.6697, "step": 4153 }, { "epoch": 1.6779455583051361, "grad_norm": 0.494140625, "learning_rate": 0.00019968446601941747, "loss": 0.605, "step": 4154 }, { "epoch": 1.678349578304126, "grad_norm": 0.5703125, "learning_rate": 0.00019966019417475724, "loss": 0.5784, "step": 4155 }, { "epoch": 1.6787535983031159, "grad_norm": 0.546875, "learning_rate": 0.00019963592233009707, "loss": 0.6555, "step": 4156 }, { "epoch": 1.679157618302106, "grad_norm": 0.431640625, "learning_rate": 0.00019961165048543687, "loss": 0.5971, "step": 4157 }, { "epoch": 1.679561638301096, "grad_norm": 0.47265625, "learning_rate": 0.0001995873786407767, "loss": 0.6121, "step": 4158 }, { "epoch": 1.6799656583000857, "grad_norm": 0.62109375, "learning_rate": 0.00019956310679611648, "loss": 0.6157, "step": 4159 }, { "epoch": 1.6803696782990758, "grad_norm": 0.609375, "learning_rate": 0.00019953883495145628, "loss": 0.6872, "step": 4160 }, { "epoch": 1.6807736982980659, "grad_norm": 0.5078125, "learning_rate": 0.0001995145631067961, "loss": 0.511, "step": 4161 }, { "epoch": 1.6811777182970558, "grad_norm": 0.58203125, "learning_rate": 0.0001994902912621359, "loss": 0.5778, "step": 4162 }, { "epoch": 1.6815817382960456, "grad_norm": 0.5078125, "learning_rate": 0.0001994660194174757, "loss": 0.6096, "step": 4163 }, { "epoch": 1.6819857582950357, "grad_norm": 0.466796875, "learning_rate": 0.00019944174757281552, "loss": 0.662, "step": 4164 }, { "epoch": 1.6823897782940256, "grad_norm": 0.625, "learning_rate": 0.00019941747572815532, "loss": 0.6532, "step": 4165 }, { "epoch": 1.6827937982930155, "grad_norm": 0.53515625, "learning_rate": 0.00019939320388349512, "loss": 0.7075, "step": 4166 }, { "epoch": 1.6831978182920055, "grad_norm": 0.498046875, "learning_rate": 0.00019936893203883495, "loss": 0.6672, "step": 4167 }, { "epoch": 1.6836018382909954, "grad_norm": 0.75390625, "learning_rate": 0.00019934466019417473, "loss": 0.7211, "step": 4168 }, { "epoch": 1.6840058582899853, "grad_norm": 0.63671875, "learning_rate": 0.00019932038834951456, "loss": 0.6628, "step": 4169 }, { "epoch": 1.6844098782889754, "grad_norm": 0.5546875, "learning_rate": 0.00019929611650485436, "loss": 0.5906, "step": 4170 }, { "epoch": 1.6848138982879652, "grad_norm": 0.55859375, "learning_rate": 0.00019927184466019413, "loss": 0.656, "step": 4171 }, { "epoch": 1.685217918286955, "grad_norm": 0.453125, "learning_rate": 0.00019924757281553396, "loss": 0.5852, "step": 4172 }, { "epoch": 1.6856219382859452, "grad_norm": 0.6796875, "learning_rate": 0.00019922330097087377, "loss": 0.6146, "step": 4173 }, { "epoch": 1.686025958284935, "grad_norm": 0.55859375, "learning_rate": 0.00019919902912621357, "loss": 0.6353, "step": 4174 }, { "epoch": 1.686429978283925, "grad_norm": 0.578125, "learning_rate": 0.0001991747572815534, "loss": 0.635, "step": 4175 }, { "epoch": 1.686833998282915, "grad_norm": 0.52734375, "learning_rate": 0.00019915048543689317, "loss": 0.6439, "step": 4176 }, { "epoch": 1.6872380182819051, "grad_norm": 0.59375, "learning_rate": 0.000199126213592233, "loss": 0.6345, "step": 4177 }, { "epoch": 1.6876420382808948, "grad_norm": 0.546875, "learning_rate": 0.0001991019417475728, "loss": 0.717, "step": 4178 }, { "epoch": 1.6880460582798849, "grad_norm": 0.609375, "learning_rate": 0.0001990776699029126, "loss": 0.652, "step": 4179 }, { "epoch": 1.688450078278875, "grad_norm": 0.5625, "learning_rate": 0.0001990533980582524, "loss": 0.7065, "step": 4180 }, { "epoch": 1.6888540982778646, "grad_norm": 0.56640625, "learning_rate": 0.0001990291262135922, "loss": 0.6722, "step": 4181 }, { "epoch": 1.6892581182768547, "grad_norm": 0.4609375, "learning_rate": 0.00019900485436893201, "loss": 0.5743, "step": 4182 }, { "epoch": 1.6896621382758448, "grad_norm": 0.5703125, "learning_rate": 0.00019898058252427184, "loss": 0.7896, "step": 4183 }, { "epoch": 1.6900661582748346, "grad_norm": 0.55078125, "learning_rate": 0.00019895631067961162, "loss": 0.6326, "step": 4184 }, { "epoch": 1.6904701782738245, "grad_norm": 0.5078125, "learning_rate": 0.00019893203883495142, "loss": 0.6492, "step": 4185 }, { "epoch": 1.6908741982728146, "grad_norm": 0.490234375, "learning_rate": 0.00019890776699029125, "loss": 0.6449, "step": 4186 }, { "epoch": 1.6912782182718045, "grad_norm": 0.54296875, "learning_rate": 0.00019888349514563105, "loss": 0.6816, "step": 4187 }, { "epoch": 1.6916822382707943, "grad_norm": 0.486328125, "learning_rate": 0.00019885922330097088, "loss": 0.5527, "step": 4188 }, { "epoch": 1.6920862582697844, "grad_norm": 6.46875, "learning_rate": 0.00019883495145631066, "loss": 0.6755, "step": 4189 }, { "epoch": 1.6924902782687743, "grad_norm": 0.51953125, "learning_rate": 0.00019881067961165046, "loss": 0.6113, "step": 4190 }, { "epoch": 1.6928942982677642, "grad_norm": 0.50390625, "learning_rate": 0.0001987864077669903, "loss": 0.5618, "step": 4191 }, { "epoch": 1.6932983182667543, "grad_norm": 0.59765625, "learning_rate": 0.0001987621359223301, "loss": 0.6712, "step": 4192 }, { "epoch": 1.6937023382657441, "grad_norm": 0.5234375, "learning_rate": 0.00019873786407766987, "loss": 0.6631, "step": 4193 }, { "epoch": 1.694106358264734, "grad_norm": 0.515625, "learning_rate": 0.0001987135922330097, "loss": 0.6104, "step": 4194 }, { "epoch": 1.694510378263724, "grad_norm": 0.41015625, "learning_rate": 0.0001986893203883495, "loss": 0.5819, "step": 4195 }, { "epoch": 1.694914398262714, "grad_norm": 0.57421875, "learning_rate": 0.00019866504854368927, "loss": 0.654, "step": 4196 }, { "epoch": 1.6953184182617038, "grad_norm": 0.4921875, "learning_rate": 0.0001986407766990291, "loss": 0.5736, "step": 4197 }, { "epoch": 1.695722438260694, "grad_norm": 0.50390625, "learning_rate": 0.0001986165048543689, "loss": 0.6528, "step": 4198 }, { "epoch": 1.696126458259684, "grad_norm": 0.4609375, "learning_rate": 0.00019859223300970874, "loss": 0.4809, "step": 4199 }, { "epoch": 1.6965304782586736, "grad_norm": 0.58203125, "learning_rate": 0.00019856796116504854, "loss": 0.7014, "step": 4200 }, { "epoch": 1.6969344982576637, "grad_norm": 0.470703125, "learning_rate": 0.0001985436893203883, "loss": 0.6123, "step": 4201 }, { "epoch": 1.6973385182566538, "grad_norm": 0.52734375, "learning_rate": 0.00019851941747572814, "loss": 0.6353, "step": 4202 }, { "epoch": 1.6977425382556437, "grad_norm": 0.5546875, "learning_rate": 0.00019849514563106794, "loss": 0.604, "step": 4203 }, { "epoch": 1.6981465582546336, "grad_norm": 0.447265625, "learning_rate": 0.00019847087378640775, "loss": 0.5791, "step": 4204 }, { "epoch": 1.6985505782536237, "grad_norm": 1.53125, "learning_rate": 0.00019844660194174758, "loss": 0.6939, "step": 4205 }, { "epoch": 1.6989545982526135, "grad_norm": 0.5234375, "learning_rate": 0.00019842233009708735, "loss": 0.6363, "step": 4206 }, { "epoch": 1.6993586182516034, "grad_norm": 0.48828125, "learning_rate": 0.00019839805825242718, "loss": 0.609, "step": 4207 }, { "epoch": 1.6997626382505935, "grad_norm": 0.52734375, "learning_rate": 0.00019837378640776698, "loss": 0.5936, "step": 4208 }, { "epoch": 1.7001666582495834, "grad_norm": 0.48828125, "learning_rate": 0.00019834951456310676, "loss": 0.6, "step": 4209 }, { "epoch": 1.7005706782485732, "grad_norm": 0.470703125, "learning_rate": 0.0001983252427184466, "loss": 0.6541, "step": 4210 }, { "epoch": 1.7009746982475633, "grad_norm": 0.451171875, "learning_rate": 0.0001983009708737864, "loss": 0.5541, "step": 4211 }, { "epoch": 1.7013787182465532, "grad_norm": 0.6796875, "learning_rate": 0.0001982766990291262, "loss": 0.7211, "step": 4212 }, { "epoch": 1.701782738245543, "grad_norm": 0.52734375, "learning_rate": 0.00019825242718446602, "loss": 0.646, "step": 4213 }, { "epoch": 1.7021867582445331, "grad_norm": 0.48828125, "learning_rate": 0.0001982281553398058, "loss": 0.655, "step": 4214 }, { "epoch": 1.702590778243523, "grad_norm": 0.4765625, "learning_rate": 0.0001982038834951456, "loss": 0.5879, "step": 4215 }, { "epoch": 1.7029947982425129, "grad_norm": 0.65234375, "learning_rate": 0.00019817961165048543, "loss": 0.7266, "step": 4216 }, { "epoch": 1.703398818241503, "grad_norm": 0.5078125, "learning_rate": 0.00019815533980582523, "loss": 0.5886, "step": 4217 }, { "epoch": 1.703802838240493, "grad_norm": 0.60546875, "learning_rate": 0.00019813106796116503, "loss": 0.7317, "step": 4218 }, { "epoch": 1.7042068582394827, "grad_norm": 0.515625, "learning_rate": 0.00019810679611650484, "loss": 0.6115, "step": 4219 }, { "epoch": 1.7046108782384728, "grad_norm": 0.48046875, "learning_rate": 0.00019808252427184464, "loss": 0.6268, "step": 4220 }, { "epoch": 1.7050148982374629, "grad_norm": 0.56640625, "learning_rate": 0.00019805825242718447, "loss": 0.5872, "step": 4221 }, { "epoch": 1.7054189182364528, "grad_norm": 0.50390625, "learning_rate": 0.00019803398058252424, "loss": 0.7107, "step": 4222 }, { "epoch": 1.7058229382354426, "grad_norm": 0.451171875, "learning_rate": 0.00019800970873786405, "loss": 0.6167, "step": 4223 }, { "epoch": 1.7062269582344327, "grad_norm": 0.515625, "learning_rate": 0.00019798543689320387, "loss": 0.6443, "step": 4224 }, { "epoch": 1.7066309782334226, "grad_norm": 0.51171875, "learning_rate": 0.00019796116504854368, "loss": 0.5877, "step": 4225 }, { "epoch": 1.7070349982324124, "grad_norm": 0.53125, "learning_rate": 0.00019793689320388345, "loss": 0.5694, "step": 4226 }, { "epoch": 1.7074390182314025, "grad_norm": 0.478515625, "learning_rate": 0.00019791262135922328, "loss": 0.5858, "step": 4227 }, { "epoch": 1.7078430382303924, "grad_norm": 0.50390625, "learning_rate": 0.00019788834951456308, "loss": 0.6234, "step": 4228 }, { "epoch": 1.7082470582293823, "grad_norm": 0.515625, "learning_rate": 0.00019786407766990291, "loss": 0.6255, "step": 4229 }, { "epoch": 1.7086510782283724, "grad_norm": 0.52734375, "learning_rate": 0.00019783980582524272, "loss": 0.6396, "step": 4230 }, { "epoch": 1.7090550982273622, "grad_norm": 0.53515625, "learning_rate": 0.0001978155339805825, "loss": 0.6725, "step": 4231 }, { "epoch": 1.709459118226352, "grad_norm": 0.58984375, "learning_rate": 0.00019779126213592232, "loss": 0.5899, "step": 4232 }, { "epoch": 1.7098631382253422, "grad_norm": 0.55859375, "learning_rate": 0.00019776699029126212, "loss": 0.6283, "step": 4233 }, { "epoch": 1.710267158224332, "grad_norm": 0.53125, "learning_rate": 0.0001977427184466019, "loss": 0.6149, "step": 4234 }, { "epoch": 1.710671178223322, "grad_norm": 0.55859375, "learning_rate": 0.00019771844660194173, "loss": 0.5615, "step": 4235 }, { "epoch": 1.711075198222312, "grad_norm": 0.671875, "learning_rate": 0.00019769417475728153, "loss": 0.7406, "step": 4236 }, { "epoch": 1.7114792182213021, "grad_norm": 0.56640625, "learning_rate": 0.00019766990291262136, "loss": 0.6056, "step": 4237 }, { "epoch": 1.7118832382202918, "grad_norm": 0.5078125, "learning_rate": 0.00019764563106796116, "loss": 0.6069, "step": 4238 }, { "epoch": 1.7122872582192818, "grad_norm": 0.494140625, "learning_rate": 0.00019762135922330094, "loss": 0.6153, "step": 4239 }, { "epoch": 1.712691278218272, "grad_norm": 0.53125, "learning_rate": 0.00019759708737864077, "loss": 0.6172, "step": 4240 }, { "epoch": 1.7130952982172618, "grad_norm": 0.474609375, "learning_rate": 0.00019757281553398057, "loss": 0.602, "step": 4241 }, { "epoch": 1.7134993182162517, "grad_norm": 0.58984375, "learning_rate": 0.00019754854368932037, "loss": 0.7025, "step": 4242 }, { "epoch": 1.7139033382152418, "grad_norm": 0.515625, "learning_rate": 0.00019752427184466017, "loss": 0.622, "step": 4243 }, { "epoch": 1.7143073582142316, "grad_norm": 0.46875, "learning_rate": 0.00019749999999999998, "loss": 0.5565, "step": 4244 }, { "epoch": 1.7147113782132215, "grad_norm": 0.66015625, "learning_rate": 0.00019747572815533978, "loss": 0.6998, "step": 4245 }, { "epoch": 1.7151153982122116, "grad_norm": 0.56640625, "learning_rate": 0.0001974514563106796, "loss": 0.6242, "step": 4246 }, { "epoch": 1.7155194182112015, "grad_norm": 0.5234375, "learning_rate": 0.00019742718446601938, "loss": 0.6021, "step": 4247 }, { "epoch": 1.7159234382101913, "grad_norm": 0.484375, "learning_rate": 0.0001974029126213592, "loss": 0.5925, "step": 4248 }, { "epoch": 1.7163274582091814, "grad_norm": 0.52734375, "learning_rate": 0.00019737864077669901, "loss": 0.6809, "step": 4249 }, { "epoch": 1.7167314782081713, "grad_norm": 0.474609375, "learning_rate": 0.00019735436893203882, "loss": 0.6225, "step": 4250 }, { "epoch": 1.7171354982071612, "grad_norm": 0.462890625, "learning_rate": 0.00019733009708737865, "loss": 0.5768, "step": 4251 }, { "epoch": 1.7175395182061513, "grad_norm": 0.4765625, "learning_rate": 0.00019730582524271842, "loss": 0.5596, "step": 4252 }, { "epoch": 1.7179435382051411, "grad_norm": 0.49609375, "learning_rate": 0.00019728155339805822, "loss": 0.5921, "step": 4253 }, { "epoch": 1.718347558204131, "grad_norm": 0.55859375, "learning_rate": 0.00019725728155339805, "loss": 0.6899, "step": 4254 }, { "epoch": 1.718751578203121, "grad_norm": 0.62109375, "learning_rate": 0.00019723300970873786, "loss": 0.666, "step": 4255 }, { "epoch": 1.7191555982021112, "grad_norm": 0.546875, "learning_rate": 0.00019720873786407763, "loss": 0.6341, "step": 4256 }, { "epoch": 1.7195596182011008, "grad_norm": 0.5390625, "learning_rate": 0.00019718446601941746, "loss": 0.6192, "step": 4257 }, { "epoch": 1.719963638200091, "grad_norm": 0.58984375, "learning_rate": 0.00019716019417475726, "loss": 0.6355, "step": 4258 }, { "epoch": 1.720367658199081, "grad_norm": 0.55078125, "learning_rate": 0.0001971359223300971, "loss": 0.6137, "step": 4259 }, { "epoch": 1.7207716781980706, "grad_norm": 0.5078125, "learning_rate": 0.00019711165048543687, "loss": 0.6713, "step": 4260 }, { "epoch": 1.7211756981970607, "grad_norm": 0.443359375, "learning_rate": 0.00019708737864077667, "loss": 0.5303, "step": 4261 }, { "epoch": 1.7215797181960508, "grad_norm": 0.47265625, "learning_rate": 0.0001970631067961165, "loss": 0.5571, "step": 4262 }, { "epoch": 1.7219837381950407, "grad_norm": 0.486328125, "learning_rate": 0.0001970388349514563, "loss": 0.589, "step": 4263 }, { "epoch": 1.7223877581940306, "grad_norm": 0.59765625, "learning_rate": 0.00019701456310679608, "loss": 0.6458, "step": 4264 }, { "epoch": 1.7227917781930207, "grad_norm": 0.458984375, "learning_rate": 0.0001969902912621359, "loss": 0.598, "step": 4265 }, { "epoch": 1.7231957981920105, "grad_norm": 0.53515625, "learning_rate": 0.0001969660194174757, "loss": 0.6846, "step": 4266 }, { "epoch": 1.7235998181910004, "grad_norm": 0.447265625, "learning_rate": 0.0001969417475728155, "loss": 0.5785, "step": 4267 }, { "epoch": 1.7240038381899905, "grad_norm": 0.7890625, "learning_rate": 0.00019691747572815534, "loss": 0.6694, "step": 4268 }, { "epoch": 1.7244078581889803, "grad_norm": 0.53515625, "learning_rate": 0.00019689320388349512, "loss": 0.5859, "step": 4269 }, { "epoch": 1.7248118781879702, "grad_norm": 0.474609375, "learning_rate": 0.00019686893203883495, "loss": 0.5672, "step": 4270 }, { "epoch": 1.7252158981869603, "grad_norm": 0.50390625, "learning_rate": 0.00019684466019417475, "loss": 0.6107, "step": 4271 }, { "epoch": 1.7256199181859502, "grad_norm": 0.59765625, "learning_rate": 0.00019682038834951452, "loss": 0.6687, "step": 4272 }, { "epoch": 1.72602393818494, "grad_norm": 0.546875, "learning_rate": 0.00019679611650485435, "loss": 0.5938, "step": 4273 }, { "epoch": 1.7264279581839301, "grad_norm": 0.431640625, "learning_rate": 0.00019677184466019415, "loss": 0.5936, "step": 4274 }, { "epoch": 1.72683197818292, "grad_norm": 0.478515625, "learning_rate": 0.00019674757281553396, "loss": 0.6218, "step": 4275 }, { "epoch": 1.7272359981819099, "grad_norm": 0.5546875, "learning_rate": 0.00019672330097087379, "loss": 0.6352, "step": 4276 }, { "epoch": 1.7276400181809, "grad_norm": 0.55859375, "learning_rate": 0.00019669902912621356, "loss": 0.6044, "step": 4277 }, { "epoch": 1.72804403817989, "grad_norm": 0.4609375, "learning_rate": 0.0001966747572815534, "loss": 0.6511, "step": 4278 }, { "epoch": 1.7284480581788797, "grad_norm": 0.5859375, "learning_rate": 0.0001966504854368932, "loss": 0.6773, "step": 4279 }, { "epoch": 1.7288520781778698, "grad_norm": 0.5, "learning_rate": 0.000196626213592233, "loss": 0.5552, "step": 4280 }, { "epoch": 1.7292560981768599, "grad_norm": 0.47265625, "learning_rate": 0.0001966019417475728, "loss": 0.5321, "step": 4281 }, { "epoch": 1.7296601181758497, "grad_norm": 0.54296875, "learning_rate": 0.0001965776699029126, "loss": 0.6235, "step": 4282 }, { "epoch": 1.7300641381748396, "grad_norm": 0.45703125, "learning_rate": 0.0001965533980582524, "loss": 0.5795, "step": 4283 }, { "epoch": 1.7304681581738297, "grad_norm": 0.5625, "learning_rate": 0.00019652912621359223, "loss": 0.6645, "step": 4284 }, { "epoch": 1.7308721781728196, "grad_norm": 0.57421875, "learning_rate": 0.000196504854368932, "loss": 0.5845, "step": 4285 }, { "epoch": 1.7312761981718094, "grad_norm": 0.6171875, "learning_rate": 0.0001964805825242718, "loss": 0.6175, "step": 4286 }, { "epoch": 1.7316802181707995, "grad_norm": 0.51953125, "learning_rate": 0.00019645631067961164, "loss": 0.6152, "step": 4287 }, { "epoch": 1.7320842381697894, "grad_norm": 0.51171875, "learning_rate": 0.00019643203883495144, "loss": 0.6025, "step": 4288 }, { "epoch": 1.7324882581687793, "grad_norm": 0.53515625, "learning_rate": 0.00019640776699029127, "loss": 0.5097, "step": 4289 }, { "epoch": 1.7328922781677694, "grad_norm": 0.546875, "learning_rate": 0.00019638349514563105, "loss": 0.5831, "step": 4290 }, { "epoch": 1.7332962981667592, "grad_norm": 0.5390625, "learning_rate": 0.00019635922330097085, "loss": 0.6224, "step": 4291 }, { "epoch": 1.733700318165749, "grad_norm": 0.6640625, "learning_rate": 0.00019633495145631068, "loss": 0.6417, "step": 4292 }, { "epoch": 1.7341043381647392, "grad_norm": 0.49609375, "learning_rate": 0.00019631067961165048, "loss": 0.6879, "step": 4293 }, { "epoch": 1.734508358163729, "grad_norm": 0.59375, "learning_rate": 0.00019628640776699026, "loss": 0.6752, "step": 4294 }, { "epoch": 1.734912378162719, "grad_norm": 0.4921875, "learning_rate": 0.00019626213592233009, "loss": 0.5898, "step": 4295 }, { "epoch": 1.735316398161709, "grad_norm": 0.625, "learning_rate": 0.0001962378640776699, "loss": 0.6029, "step": 4296 }, { "epoch": 1.735720418160699, "grad_norm": 0.462890625, "learning_rate": 0.00019621359223300966, "loss": 0.5708, "step": 4297 }, { "epoch": 1.7361244381596888, "grad_norm": 0.5546875, "learning_rate": 0.0001961893203883495, "loss": 0.6672, "step": 4298 }, { "epoch": 1.7365284581586788, "grad_norm": 0.59375, "learning_rate": 0.0001961650485436893, "loss": 0.7009, "step": 4299 }, { "epoch": 1.736932478157669, "grad_norm": 0.55859375, "learning_rate": 0.00019614077669902912, "loss": 0.6662, "step": 4300 }, { "epoch": 1.7373364981566588, "grad_norm": 0.515625, "learning_rate": 0.00019611650485436893, "loss": 0.5905, "step": 4301 }, { "epoch": 1.7377405181556487, "grad_norm": 0.4296875, "learning_rate": 0.0001960922330097087, "loss": 0.5131, "step": 4302 }, { "epoch": 1.7381445381546388, "grad_norm": 0.474609375, "learning_rate": 0.00019606796116504853, "loss": 0.5799, "step": 4303 }, { "epoch": 1.7385485581536286, "grad_norm": 0.4921875, "learning_rate": 0.00019604368932038833, "loss": 0.5772, "step": 4304 }, { "epoch": 1.7389525781526185, "grad_norm": 0.55859375, "learning_rate": 0.00019601941747572814, "loss": 0.6366, "step": 4305 }, { "epoch": 1.7393565981516086, "grad_norm": 0.482421875, "learning_rate": 0.00019599514563106794, "loss": 0.6162, "step": 4306 }, { "epoch": 1.7397606181505985, "grad_norm": 0.4765625, "learning_rate": 0.00019597087378640774, "loss": 0.5966, "step": 4307 }, { "epoch": 1.7401646381495883, "grad_norm": 0.5, "learning_rate": 0.00019594660194174757, "loss": 0.592, "step": 4308 }, { "epoch": 1.7405686581485784, "grad_norm": 0.49609375, "learning_rate": 0.00019592233009708737, "loss": 0.5984, "step": 4309 }, { "epoch": 1.7409726781475683, "grad_norm": 0.5703125, "learning_rate": 0.00019589805825242715, "loss": 0.6118, "step": 4310 }, { "epoch": 1.7413766981465582, "grad_norm": 0.4765625, "learning_rate": 0.00019587378640776698, "loss": 0.6164, "step": 4311 }, { "epoch": 1.7417807181455482, "grad_norm": 0.51953125, "learning_rate": 0.00019584951456310678, "loss": 0.7055, "step": 4312 }, { "epoch": 1.7421847381445381, "grad_norm": 0.5625, "learning_rate": 0.00019582524271844658, "loss": 0.5721, "step": 4313 }, { "epoch": 1.742588758143528, "grad_norm": 0.53515625, "learning_rate": 0.0001958009708737864, "loss": 0.7131, "step": 4314 }, { "epoch": 1.742992778142518, "grad_norm": 0.47265625, "learning_rate": 0.00019577669902912619, "loss": 0.6004, "step": 4315 }, { "epoch": 1.7433967981415082, "grad_norm": 0.59375, "learning_rate": 0.000195752427184466, "loss": 0.5967, "step": 4316 }, { "epoch": 1.7438008181404978, "grad_norm": 0.447265625, "learning_rate": 0.00019572815533980582, "loss": 0.5855, "step": 4317 }, { "epoch": 1.744204838139488, "grad_norm": 0.5625, "learning_rate": 0.00019570388349514562, "loss": 0.6388, "step": 4318 }, { "epoch": 1.744608858138478, "grad_norm": 0.58984375, "learning_rate": 0.00019567961165048542, "loss": 0.686, "step": 4319 }, { "epoch": 1.7450128781374679, "grad_norm": 0.466796875, "learning_rate": 0.00019565533980582522, "loss": 0.6187, "step": 4320 }, { "epoch": 1.7454168981364577, "grad_norm": 0.5546875, "learning_rate": 0.00019563106796116503, "loss": 0.6163, "step": 4321 }, { "epoch": 1.7458209181354478, "grad_norm": 0.625, "learning_rate": 0.00019560679611650486, "loss": 0.6442, "step": 4322 }, { "epoch": 1.7462249381344377, "grad_norm": 0.54296875, "learning_rate": 0.00019558252427184463, "loss": 0.6387, "step": 4323 }, { "epoch": 1.7466289581334276, "grad_norm": 0.49609375, "learning_rate": 0.00019555825242718443, "loss": 0.6395, "step": 4324 }, { "epoch": 1.7470329781324176, "grad_norm": 0.5078125, "learning_rate": 0.00019553398058252426, "loss": 0.6259, "step": 4325 }, { "epoch": 1.7474369981314075, "grad_norm": 0.5, "learning_rate": 0.00019550970873786407, "loss": 0.5683, "step": 4326 }, { "epoch": 1.7478410181303974, "grad_norm": 0.494140625, "learning_rate": 0.00019548543689320384, "loss": 0.5691, "step": 4327 }, { "epoch": 1.7482450381293875, "grad_norm": 0.46875, "learning_rate": 0.00019546116504854367, "loss": 0.5573, "step": 4328 }, { "epoch": 1.7486490581283773, "grad_norm": 0.52734375, "learning_rate": 0.00019543689320388347, "loss": 0.6123, "step": 4329 }, { "epoch": 1.7490530781273672, "grad_norm": 0.58984375, "learning_rate": 0.0001954126213592233, "loss": 0.6836, "step": 4330 }, { "epoch": 1.7494570981263573, "grad_norm": 0.55078125, "learning_rate": 0.0001953883495145631, "loss": 0.6794, "step": 4331 }, { "epoch": 1.7498611181253472, "grad_norm": 0.55078125, "learning_rate": 0.00019536407766990288, "loss": 0.5777, "step": 4332 }, { "epoch": 1.750265138124337, "grad_norm": 0.54296875, "learning_rate": 0.0001953398058252427, "loss": 0.5087, "step": 4333 }, { "epoch": 1.7506691581233271, "grad_norm": 0.5078125, "learning_rate": 0.0001953155339805825, "loss": 0.5604, "step": 4334 }, { "epoch": 1.7510731781223172, "grad_norm": 0.51953125, "learning_rate": 0.0001952912621359223, "loss": 0.5877, "step": 4335 }, { "epoch": 1.7514771981213069, "grad_norm": 0.5546875, "learning_rate": 0.00019526699029126212, "loss": 0.5693, "step": 4336 }, { "epoch": 1.751881218120297, "grad_norm": 0.5546875, "learning_rate": 0.00019524271844660192, "loss": 0.6514, "step": 4337 }, { "epoch": 1.752285238119287, "grad_norm": 0.45703125, "learning_rate": 0.00019521844660194175, "loss": 0.569, "step": 4338 }, { "epoch": 1.7526892581182767, "grad_norm": 0.5625, "learning_rate": 0.00019519417475728155, "loss": 0.609, "step": 4339 }, { "epoch": 1.7530932781172668, "grad_norm": 0.5546875, "learning_rate": 0.00019516990291262133, "loss": 0.6879, "step": 4340 }, { "epoch": 1.7534972981162569, "grad_norm": 0.57421875, "learning_rate": 0.00019514563106796116, "loss": 0.6702, "step": 4341 }, { "epoch": 1.7539013181152467, "grad_norm": 0.5078125, "learning_rate": 0.00019512135922330096, "loss": 0.6774, "step": 4342 }, { "epoch": 1.7543053381142366, "grad_norm": 0.58203125, "learning_rate": 0.00019509708737864076, "loss": 0.6977, "step": 4343 }, { "epoch": 1.7547093581132267, "grad_norm": 0.53515625, "learning_rate": 0.00019507281553398056, "loss": 0.6235, "step": 4344 }, { "epoch": 1.7551133781122166, "grad_norm": 0.51953125, "learning_rate": 0.00019504854368932036, "loss": 0.6604, "step": 4345 }, { "epoch": 1.7555173981112064, "grad_norm": 0.50390625, "learning_rate": 0.00019502427184466017, "loss": 0.5341, "step": 4346 }, { "epoch": 1.7559214181101965, "grad_norm": 0.53515625, "learning_rate": 0.000195, "loss": 0.6446, "step": 4347 }, { "epoch": 1.7563254381091864, "grad_norm": 0.4609375, "learning_rate": 0.00019497572815533977, "loss": 0.5524, "step": 4348 }, { "epoch": 1.7567294581081763, "grad_norm": 0.5546875, "learning_rate": 0.0001949514563106796, "loss": 0.6641, "step": 4349 }, { "epoch": 1.7571334781071664, "grad_norm": 0.5234375, "learning_rate": 0.0001949271844660194, "loss": 0.5644, "step": 4350 }, { "epoch": 1.7575374981061562, "grad_norm": 0.427734375, "learning_rate": 0.0001949029126213592, "loss": 0.5749, "step": 4351 }, { "epoch": 1.757941518105146, "grad_norm": 0.466796875, "learning_rate": 0.00019487864077669904, "loss": 0.5015, "step": 4352 }, { "epoch": 1.7583455381041362, "grad_norm": 0.5625, "learning_rate": 0.0001948543689320388, "loss": 0.6637, "step": 4353 }, { "epoch": 1.758749558103126, "grad_norm": 0.46484375, "learning_rate": 0.0001948300970873786, "loss": 0.5448, "step": 4354 }, { "epoch": 1.759153578102116, "grad_norm": 0.46875, "learning_rate": 0.00019480582524271844, "loss": 0.5872, "step": 4355 }, { "epoch": 1.759557598101106, "grad_norm": 0.5, "learning_rate": 0.00019478155339805824, "loss": 0.5861, "step": 4356 }, { "epoch": 1.759961618100096, "grad_norm": 0.484375, "learning_rate": 0.00019475728155339802, "loss": 0.5536, "step": 4357 }, { "epoch": 1.7603656380990857, "grad_norm": 0.5625, "learning_rate": 0.00019473300970873785, "loss": 0.6411, "step": 4358 }, { "epoch": 1.7607696580980758, "grad_norm": 0.484375, "learning_rate": 0.00019470873786407765, "loss": 0.5972, "step": 4359 }, { "epoch": 1.761173678097066, "grad_norm": 0.5546875, "learning_rate": 0.00019468446601941748, "loss": 0.5727, "step": 4360 }, { "epoch": 1.7615776980960558, "grad_norm": 0.484375, "learning_rate": 0.00019466019417475726, "loss": 0.6261, "step": 4361 }, { "epoch": 1.7619817180950457, "grad_norm": 0.486328125, "learning_rate": 0.00019463592233009706, "loss": 0.6115, "step": 4362 }, { "epoch": 1.7623857380940358, "grad_norm": 0.462890625, "learning_rate": 0.0001946116504854369, "loss": 0.4896, "step": 4363 }, { "epoch": 1.7627897580930256, "grad_norm": 0.6171875, "learning_rate": 0.0001945873786407767, "loss": 0.6699, "step": 4364 }, { "epoch": 1.7631937780920155, "grad_norm": 0.455078125, "learning_rate": 0.00019456310679611647, "loss": 0.5617, "step": 4365 }, { "epoch": 1.7635977980910056, "grad_norm": 0.53515625, "learning_rate": 0.0001945388349514563, "loss": 0.572, "step": 4366 }, { "epoch": 1.7640018180899955, "grad_norm": 0.6328125, "learning_rate": 0.0001945145631067961, "loss": 0.6897, "step": 4367 }, { "epoch": 1.7644058380889853, "grad_norm": 0.53125, "learning_rate": 0.0001944902912621359, "loss": 0.6226, "step": 4368 }, { "epoch": 1.7648098580879754, "grad_norm": 0.45703125, "learning_rate": 0.00019446601941747573, "loss": 0.5579, "step": 4369 }, { "epoch": 1.7652138780869653, "grad_norm": 0.515625, "learning_rate": 0.0001944417475728155, "loss": 0.5803, "step": 4370 }, { "epoch": 1.7656178980859552, "grad_norm": 0.59375, "learning_rate": 0.00019441747572815533, "loss": 0.6396, "step": 4371 }, { "epoch": 1.7660219180849452, "grad_norm": 0.51953125, "learning_rate": 0.00019439320388349514, "loss": 0.6675, "step": 4372 }, { "epoch": 1.766425938083935, "grad_norm": 0.5625, "learning_rate": 0.0001943689320388349, "loss": 0.663, "step": 4373 }, { "epoch": 1.766829958082925, "grad_norm": 0.515625, "learning_rate": 0.00019434466019417474, "loss": 0.5754, "step": 4374 }, { "epoch": 1.767233978081915, "grad_norm": 0.56640625, "learning_rate": 0.00019432038834951454, "loss": 0.6463, "step": 4375 }, { "epoch": 1.7676379980809052, "grad_norm": 0.45703125, "learning_rate": 0.00019429611650485435, "loss": 0.5425, "step": 4376 }, { "epoch": 1.7680420180798948, "grad_norm": 0.416015625, "learning_rate": 0.00019427184466019418, "loss": 0.5751, "step": 4377 }, { "epoch": 1.768446038078885, "grad_norm": 0.546875, "learning_rate": 0.00019424757281553395, "loss": 0.6292, "step": 4378 }, { "epoch": 1.768850058077875, "grad_norm": 0.55859375, "learning_rate": 0.00019422330097087378, "loss": 0.6046, "step": 4379 }, { "epoch": 1.7692540780768649, "grad_norm": 0.51171875, "learning_rate": 0.00019419902912621358, "loss": 0.6239, "step": 4380 }, { "epoch": 1.7696580980758547, "grad_norm": 0.51953125, "learning_rate": 0.00019417475728155338, "loss": 0.6019, "step": 4381 }, { "epoch": 1.7700621180748448, "grad_norm": 0.57421875, "learning_rate": 0.0001941504854368932, "loss": 0.6274, "step": 4382 }, { "epoch": 1.7704661380738347, "grad_norm": 0.6171875, "learning_rate": 0.000194126213592233, "loss": 0.6021, "step": 4383 }, { "epoch": 1.7708701580728246, "grad_norm": 0.56640625, "learning_rate": 0.0001941019417475728, "loss": 0.6647, "step": 4384 }, { "epoch": 1.7712741780718146, "grad_norm": 0.51171875, "learning_rate": 0.00019407766990291262, "loss": 0.5437, "step": 4385 }, { "epoch": 1.7716781980708045, "grad_norm": 0.578125, "learning_rate": 0.0001940533980582524, "loss": 0.5816, "step": 4386 }, { "epoch": 1.7720822180697944, "grad_norm": 0.5390625, "learning_rate": 0.0001940291262135922, "loss": 0.6439, "step": 4387 }, { "epoch": 1.7724862380687845, "grad_norm": 0.578125, "learning_rate": 0.00019400485436893203, "loss": 0.582, "step": 4388 }, { "epoch": 1.7728902580677743, "grad_norm": 0.474609375, "learning_rate": 0.00019398058252427183, "loss": 0.6177, "step": 4389 }, { "epoch": 1.7732942780667642, "grad_norm": 0.546875, "learning_rate": 0.00019395631067961166, "loss": 0.6604, "step": 4390 }, { "epoch": 1.7736982980657543, "grad_norm": 0.494140625, "learning_rate": 0.00019393203883495143, "loss": 0.5638, "step": 4391 }, { "epoch": 1.7741023180647442, "grad_norm": 0.47265625, "learning_rate": 0.00019390776699029124, "loss": 0.5843, "step": 4392 }, { "epoch": 1.774506338063734, "grad_norm": 0.451171875, "learning_rate": 0.00019388349514563107, "loss": 0.5678, "step": 4393 }, { "epoch": 1.7749103580627241, "grad_norm": 0.52734375, "learning_rate": 0.00019385922330097087, "loss": 0.5805, "step": 4394 }, { "epoch": 1.7753143780617142, "grad_norm": 0.55859375, "learning_rate": 0.00019383495145631064, "loss": 0.6078, "step": 4395 }, { "epoch": 1.7757183980607039, "grad_norm": 0.53515625, "learning_rate": 0.00019381067961165047, "loss": 0.6096, "step": 4396 }, { "epoch": 1.776122418059694, "grad_norm": 0.494140625, "learning_rate": 0.00019378640776699028, "loss": 0.6692, "step": 4397 }, { "epoch": 1.776526438058684, "grad_norm": 0.6484375, "learning_rate": 0.00019376213592233005, "loss": 0.6724, "step": 4398 }, { "epoch": 1.7769304580576737, "grad_norm": 0.48828125, "learning_rate": 0.00019373786407766988, "loss": 0.5543, "step": 4399 }, { "epoch": 1.7773344780566638, "grad_norm": 0.55078125, "learning_rate": 0.00019371359223300968, "loss": 0.703, "step": 4400 }, { "epoch": 1.7777384980556539, "grad_norm": 0.466796875, "learning_rate": 0.0001936893203883495, "loss": 0.5532, "step": 4401 }, { "epoch": 1.7781425180546437, "grad_norm": 0.51953125, "learning_rate": 0.00019366504854368931, "loss": 0.6188, "step": 4402 }, { "epoch": 1.7785465380536336, "grad_norm": 0.4765625, "learning_rate": 0.0001936407766990291, "loss": 0.5519, "step": 4403 }, { "epoch": 1.7789505580526237, "grad_norm": 0.55078125, "learning_rate": 0.00019361650485436892, "loss": 0.6434, "step": 4404 }, { "epoch": 1.7793545780516136, "grad_norm": 0.6796875, "learning_rate": 0.00019359223300970872, "loss": 0.6295, "step": 4405 }, { "epoch": 1.7797585980506034, "grad_norm": 0.64453125, "learning_rate": 0.00019356796116504852, "loss": 0.7072, "step": 4406 }, { "epoch": 1.7801626180495935, "grad_norm": 0.5390625, "learning_rate": 0.00019354368932038833, "loss": 0.5749, "step": 4407 }, { "epoch": 1.7805666380485834, "grad_norm": 0.5390625, "learning_rate": 0.00019351941747572813, "loss": 0.6727, "step": 4408 }, { "epoch": 1.7809706580475733, "grad_norm": 0.498046875, "learning_rate": 0.00019349514563106796, "loss": 0.6101, "step": 4409 }, { "epoch": 1.7813746780465634, "grad_norm": 0.4921875, "learning_rate": 0.00019347087378640776, "loss": 0.6239, "step": 4410 }, { "epoch": 1.7817786980455532, "grad_norm": 0.455078125, "learning_rate": 0.00019344660194174754, "loss": 0.5985, "step": 4411 }, { "epoch": 1.782182718044543, "grad_norm": 0.484375, "learning_rate": 0.00019342233009708737, "loss": 0.6285, "step": 4412 }, { "epoch": 1.7825867380435332, "grad_norm": 0.43359375, "learning_rate": 0.00019339805825242717, "loss": 0.5521, "step": 4413 }, { "epoch": 1.782990758042523, "grad_norm": 0.546875, "learning_rate": 0.00019337378640776697, "loss": 0.6153, "step": 4414 }, { "epoch": 1.783394778041513, "grad_norm": 0.65234375, "learning_rate": 0.0001933495145631068, "loss": 0.6497, "step": 4415 }, { "epoch": 1.783798798040503, "grad_norm": 0.4921875, "learning_rate": 0.00019332524271844657, "loss": 0.5736, "step": 4416 }, { "epoch": 1.784202818039493, "grad_norm": 0.5703125, "learning_rate": 0.00019330097087378638, "loss": 0.6196, "step": 4417 }, { "epoch": 1.7846068380384827, "grad_norm": 0.671875, "learning_rate": 0.0001932766990291262, "loss": 0.7193, "step": 4418 }, { "epoch": 1.7850108580374728, "grad_norm": 0.46875, "learning_rate": 0.000193252427184466, "loss": 0.5981, "step": 4419 }, { "epoch": 1.785414878036463, "grad_norm": 0.59765625, "learning_rate": 0.0001932281553398058, "loss": 0.6164, "step": 4420 }, { "epoch": 1.7858188980354528, "grad_norm": 0.60546875, "learning_rate": 0.00019320388349514561, "loss": 0.6517, "step": 4421 }, { "epoch": 1.7862229180344427, "grad_norm": 0.58203125, "learning_rate": 0.00019317961165048542, "loss": 0.6348, "step": 4422 }, { "epoch": 1.7866269380334328, "grad_norm": 0.435546875, "learning_rate": 0.00019315533980582525, "loss": 0.6268, "step": 4423 }, { "epoch": 1.7870309580324226, "grad_norm": 0.4609375, "learning_rate": 0.00019313106796116502, "loss": 0.6205, "step": 4424 }, { "epoch": 1.7874349780314125, "grad_norm": 0.486328125, "learning_rate": 0.00019310679611650482, "loss": 0.624, "step": 4425 }, { "epoch": 1.7878389980304026, "grad_norm": 0.5078125, "learning_rate": 0.00019308252427184465, "loss": 0.6648, "step": 4426 }, { "epoch": 1.7882430180293925, "grad_norm": 0.48828125, "learning_rate": 0.00019305825242718445, "loss": 0.597, "step": 4427 }, { "epoch": 1.7886470380283823, "grad_norm": 0.5234375, "learning_rate": 0.00019303398058252423, "loss": 0.6712, "step": 4428 }, { "epoch": 1.7890510580273724, "grad_norm": 0.5390625, "learning_rate": 0.00019300970873786406, "loss": 0.6216, "step": 4429 }, { "epoch": 1.7894550780263623, "grad_norm": 0.66796875, "learning_rate": 0.00019298543689320386, "loss": 0.6468, "step": 4430 }, { "epoch": 1.7898590980253521, "grad_norm": 0.4765625, "learning_rate": 0.0001929611650485437, "loss": 0.6169, "step": 4431 }, { "epoch": 1.7902631180243422, "grad_norm": 0.478515625, "learning_rate": 0.0001929368932038835, "loss": 0.5881, "step": 4432 }, { "epoch": 1.790667138023332, "grad_norm": 0.51171875, "learning_rate": 0.00019291262135922327, "loss": 0.5926, "step": 4433 }, { "epoch": 1.791071158022322, "grad_norm": 0.61328125, "learning_rate": 0.0001928883495145631, "loss": 0.6746, "step": 4434 }, { "epoch": 1.791475178021312, "grad_norm": 0.46484375, "learning_rate": 0.0001928640776699029, "loss": 0.5416, "step": 4435 }, { "epoch": 1.7918791980203022, "grad_norm": 0.5859375, "learning_rate": 0.00019283980582524268, "loss": 0.5604, "step": 4436 }, { "epoch": 1.7922832180192918, "grad_norm": 0.486328125, "learning_rate": 0.0001928155339805825, "loss": 0.5756, "step": 4437 }, { "epoch": 1.792687238018282, "grad_norm": 0.5546875, "learning_rate": 0.0001927912621359223, "loss": 0.6492, "step": 4438 }, { "epoch": 1.793091258017272, "grad_norm": 0.4921875, "learning_rate": 0.00019276699029126214, "loss": 0.5834, "step": 4439 }, { "epoch": 1.7934952780162619, "grad_norm": 0.5, "learning_rate": 0.00019274271844660194, "loss": 0.512, "step": 4440 }, { "epoch": 1.7938992980152517, "grad_norm": 0.45703125, "learning_rate": 0.00019271844660194171, "loss": 0.5548, "step": 4441 }, { "epoch": 1.7943033180142418, "grad_norm": 0.4921875, "learning_rate": 0.00019269417475728154, "loss": 0.6352, "step": 4442 }, { "epoch": 1.7947073380132317, "grad_norm": 0.53515625, "learning_rate": 0.00019266990291262135, "loss": 0.6609, "step": 4443 }, { "epoch": 1.7951113580122215, "grad_norm": 0.48046875, "learning_rate": 0.00019264563106796115, "loss": 0.6094, "step": 4444 }, { "epoch": 1.7955153780112116, "grad_norm": 0.63671875, "learning_rate": 0.00019262135922330095, "loss": 0.69, "step": 4445 }, { "epoch": 1.7959193980102015, "grad_norm": 0.498046875, "learning_rate": 0.00019259708737864075, "loss": 0.6269, "step": 4446 }, { "epoch": 1.7963234180091914, "grad_norm": 0.578125, "learning_rate": 0.00019257281553398056, "loss": 0.6288, "step": 4447 }, { "epoch": 1.7967274380081815, "grad_norm": 0.388671875, "learning_rate": 0.00019254854368932039, "loss": 0.5247, "step": 4448 }, { "epoch": 1.7971314580071713, "grad_norm": 0.62890625, "learning_rate": 0.00019252427184466016, "loss": 0.6968, "step": 4449 }, { "epoch": 1.7975354780061612, "grad_norm": 0.51953125, "learning_rate": 0.0001925, "loss": 0.641, "step": 4450 }, { "epoch": 1.7979394980051513, "grad_norm": 0.5078125, "learning_rate": 0.0001924757281553398, "loss": 0.5877, "step": 4451 }, { "epoch": 1.7983435180041412, "grad_norm": 0.47265625, "learning_rate": 0.0001924514563106796, "loss": 0.5945, "step": 4452 }, { "epoch": 1.798747538003131, "grad_norm": 0.47265625, "learning_rate": 0.00019242718446601942, "loss": 0.5583, "step": 4453 }, { "epoch": 1.7991515580021211, "grad_norm": 0.703125, "learning_rate": 0.0001924029126213592, "loss": 0.72, "step": 4454 }, { "epoch": 1.7995555780011112, "grad_norm": 0.69140625, "learning_rate": 0.000192378640776699, "loss": 0.695, "step": 4455 }, { "epoch": 1.7999595980001009, "grad_norm": 0.5078125, "learning_rate": 0.00019235436893203883, "loss": 0.6164, "step": 4456 }, { "epoch": 1.800363617999091, "grad_norm": 0.58984375, "learning_rate": 0.00019233009708737863, "loss": 0.6614, "step": 4457 }, { "epoch": 1.800767637998081, "grad_norm": 0.458984375, "learning_rate": 0.0001923058252427184, "loss": 0.6055, "step": 4458 }, { "epoch": 1.801171657997071, "grad_norm": 0.578125, "learning_rate": 0.00019228155339805824, "loss": 0.6586, "step": 4459 }, { "epoch": 1.8015756779960608, "grad_norm": 0.486328125, "learning_rate": 0.00019225728155339804, "loss": 0.6417, "step": 4460 }, { "epoch": 1.8019796979950509, "grad_norm": 0.47265625, "learning_rate": 0.00019223300970873787, "loss": 0.6007, "step": 4461 }, { "epoch": 1.8023837179940407, "grad_norm": 0.546875, "learning_rate": 0.00019220873786407765, "loss": 0.5834, "step": 4462 }, { "epoch": 1.8027877379930306, "grad_norm": 0.53515625, "learning_rate": 0.00019218446601941745, "loss": 0.6595, "step": 4463 }, { "epoch": 1.8031917579920207, "grad_norm": 0.48828125, "learning_rate": 0.00019216019417475728, "loss": 0.5303, "step": 4464 }, { "epoch": 1.8035957779910106, "grad_norm": 0.5078125, "learning_rate": 0.00019213592233009708, "loss": 0.6588, "step": 4465 }, { "epoch": 1.8039997979900004, "grad_norm": 0.41015625, "learning_rate": 0.00019211165048543685, "loss": 0.482, "step": 4466 }, { "epoch": 1.8044038179889905, "grad_norm": 0.5, "learning_rate": 0.00019208737864077668, "loss": 0.5017, "step": 4467 }, { "epoch": 1.8048078379879804, "grad_norm": 0.5546875, "learning_rate": 0.00019206310679611649, "loss": 0.6329, "step": 4468 }, { "epoch": 1.8052118579869703, "grad_norm": 0.546875, "learning_rate": 0.0001920388349514563, "loss": 0.6062, "step": 4469 }, { "epoch": 1.8056158779859603, "grad_norm": 0.58984375, "learning_rate": 0.0001920145631067961, "loss": 0.6541, "step": 4470 }, { "epoch": 1.8060198979849502, "grad_norm": 0.61328125, "learning_rate": 0.0001919902912621359, "loss": 0.6053, "step": 4471 }, { "epoch": 1.80642391798394, "grad_norm": 0.6171875, "learning_rate": 0.00019196601941747572, "loss": 0.6753, "step": 4472 }, { "epoch": 1.8068279379829302, "grad_norm": 0.55859375, "learning_rate": 0.00019194174757281553, "loss": 0.6436, "step": 4473 }, { "epoch": 1.8072319579819203, "grad_norm": 0.6015625, "learning_rate": 0.0001919174757281553, "loss": 0.6639, "step": 4474 }, { "epoch": 1.80763597798091, "grad_norm": 0.484375, "learning_rate": 0.00019189320388349513, "loss": 0.6331, "step": 4475 }, { "epoch": 1.8080399979799, "grad_norm": 0.5703125, "learning_rate": 0.00019186893203883493, "loss": 0.6678, "step": 4476 }, { "epoch": 1.80844401797889, "grad_norm": 0.5390625, "learning_rate": 0.00019184466019417473, "loss": 0.6958, "step": 4477 }, { "epoch": 1.8088480379778797, "grad_norm": 0.484375, "learning_rate": 0.00019182038834951456, "loss": 0.6148, "step": 4478 }, { "epoch": 1.8092520579768698, "grad_norm": 0.5234375, "learning_rate": 0.00019179611650485434, "loss": 0.6177, "step": 4479 }, { "epoch": 1.80965607797586, "grad_norm": 0.50390625, "learning_rate": 0.00019177184466019417, "loss": 0.6018, "step": 4480 }, { "epoch": 1.8100600979748498, "grad_norm": 0.478515625, "learning_rate": 0.00019174757281553397, "loss": 0.6429, "step": 4481 }, { "epoch": 1.8104641179738397, "grad_norm": 0.51953125, "learning_rate": 0.00019172330097087377, "loss": 0.6173, "step": 4482 }, { "epoch": 1.8108681379728298, "grad_norm": 0.58984375, "learning_rate": 0.00019169902912621358, "loss": 0.6407, "step": 4483 }, { "epoch": 1.8112721579718196, "grad_norm": 0.58203125, "learning_rate": 0.00019167475728155338, "loss": 0.6209, "step": 4484 }, { "epoch": 1.8116761779708095, "grad_norm": 0.5625, "learning_rate": 0.00019165048543689318, "loss": 0.6814, "step": 4485 }, { "epoch": 1.8120801979697996, "grad_norm": 0.50390625, "learning_rate": 0.000191626213592233, "loss": 0.5928, "step": 4486 }, { "epoch": 1.8124842179687894, "grad_norm": 0.57421875, "learning_rate": 0.00019160194174757278, "loss": 0.65, "step": 4487 }, { "epoch": 1.8128882379677793, "grad_norm": 0.53125, "learning_rate": 0.0001915776699029126, "loss": 0.6111, "step": 4488 }, { "epoch": 1.8132922579667694, "grad_norm": 0.50390625, "learning_rate": 0.00019155339805825242, "loss": 0.6041, "step": 4489 }, { "epoch": 1.8136962779657593, "grad_norm": 0.51171875, "learning_rate": 0.00019152912621359222, "loss": 0.6118, "step": 4490 }, { "epoch": 1.8141002979647491, "grad_norm": 0.494140625, "learning_rate": 0.00019150485436893205, "loss": 0.637, "step": 4491 }, { "epoch": 1.8145043179637392, "grad_norm": 0.52734375, "learning_rate": 0.00019148058252427182, "loss": 0.561, "step": 4492 }, { "epoch": 1.814908337962729, "grad_norm": 0.5859375, "learning_rate": 0.00019145631067961163, "loss": 0.6401, "step": 4493 }, { "epoch": 1.815312357961719, "grad_norm": 0.4921875, "learning_rate": 0.00019143203883495146, "loss": 0.6385, "step": 4494 }, { "epoch": 1.815716377960709, "grad_norm": 0.50390625, "learning_rate": 0.00019140776699029126, "loss": 0.553, "step": 4495 }, { "epoch": 1.8161203979596992, "grad_norm": 0.5234375, "learning_rate": 0.00019138349514563103, "loss": 0.6017, "step": 4496 }, { "epoch": 1.8165244179586888, "grad_norm": 0.458984375, "learning_rate": 0.00019135922330097086, "loss": 0.6372, "step": 4497 }, { "epoch": 1.8169284379576789, "grad_norm": 0.484375, "learning_rate": 0.00019133495145631066, "loss": 0.5955, "step": 4498 }, { "epoch": 1.817332457956669, "grad_norm": 0.58203125, "learning_rate": 0.00019131067961165044, "loss": 0.6753, "step": 4499 }, { "epoch": 1.8177364779556588, "grad_norm": 0.458984375, "learning_rate": 0.00019128640776699027, "loss": 0.5713, "step": 4500 }, { "epoch": 1.8181404979546487, "grad_norm": 0.51171875, "learning_rate": 0.00019126213592233007, "loss": 0.5865, "step": 4501 }, { "epoch": 1.8185445179536388, "grad_norm": 0.478515625, "learning_rate": 0.0001912378640776699, "loss": 0.5909, "step": 4502 }, { "epoch": 1.8189485379526287, "grad_norm": 0.52734375, "learning_rate": 0.0001912135922330097, "loss": 0.6465, "step": 4503 }, { "epoch": 1.8193525579516185, "grad_norm": 0.56640625, "learning_rate": 0.00019118932038834948, "loss": 0.6115, "step": 4504 }, { "epoch": 1.8197565779506086, "grad_norm": 0.51171875, "learning_rate": 0.0001911650485436893, "loss": 0.6047, "step": 4505 }, { "epoch": 1.8201605979495985, "grad_norm": 0.5, "learning_rate": 0.0001911407766990291, "loss": 0.6209, "step": 4506 }, { "epoch": 1.8205646179485884, "grad_norm": 0.498046875, "learning_rate": 0.0001911165048543689, "loss": 0.5896, "step": 4507 }, { "epoch": 1.8209686379475785, "grad_norm": 0.55078125, "learning_rate": 0.00019109223300970872, "loss": 0.6246, "step": 4508 }, { "epoch": 1.8213726579465683, "grad_norm": 0.5078125, "learning_rate": 0.00019106796116504852, "loss": 0.7146, "step": 4509 }, { "epoch": 1.8217766779455582, "grad_norm": 0.498046875, "learning_rate": 0.00019104368932038835, "loss": 0.6379, "step": 4510 }, { "epoch": 1.8221806979445483, "grad_norm": 0.51953125, "learning_rate": 0.00019101941747572815, "loss": 0.6219, "step": 4511 }, { "epoch": 1.8225847179435382, "grad_norm": 0.61328125, "learning_rate": 0.00019099514563106792, "loss": 0.6937, "step": 4512 }, { "epoch": 1.822988737942528, "grad_norm": 0.453125, "learning_rate": 0.00019097087378640775, "loss": 0.5628, "step": 4513 }, { "epoch": 1.8233927579415181, "grad_norm": 0.462890625, "learning_rate": 0.00019094660194174756, "loss": 0.5738, "step": 4514 }, { "epoch": 1.8237967779405082, "grad_norm": 0.59375, "learning_rate": 0.00019092233009708736, "loss": 0.697, "step": 4515 }, { "epoch": 1.8242007979394979, "grad_norm": 0.44921875, "learning_rate": 0.0001908980582524272, "loss": 0.585, "step": 4516 }, { "epoch": 1.824604817938488, "grad_norm": 0.4921875, "learning_rate": 0.00019087378640776696, "loss": 0.6062, "step": 4517 }, { "epoch": 1.825008837937478, "grad_norm": 0.51171875, "learning_rate": 0.00019084951456310677, "loss": 0.6204, "step": 4518 }, { "epoch": 1.825412857936468, "grad_norm": 0.62890625, "learning_rate": 0.0001908252427184466, "loss": 0.691, "step": 4519 }, { "epoch": 1.8258168779354578, "grad_norm": 0.5234375, "learning_rate": 0.0001908009708737864, "loss": 0.6391, "step": 4520 }, { "epoch": 1.8262208979344479, "grad_norm": 0.52734375, "learning_rate": 0.0001907766990291262, "loss": 0.6266, "step": 4521 }, { "epoch": 1.8266249179334377, "grad_norm": 0.5, "learning_rate": 0.000190752427184466, "loss": 0.64, "step": 4522 }, { "epoch": 1.8270289379324276, "grad_norm": 0.484375, "learning_rate": 0.0001907281553398058, "loss": 0.6004, "step": 4523 }, { "epoch": 1.8274329579314177, "grad_norm": 0.53125, "learning_rate": 0.00019070388349514563, "loss": 0.6428, "step": 4524 }, { "epoch": 1.8278369779304076, "grad_norm": 0.5859375, "learning_rate": 0.0001906796116504854, "loss": 0.6656, "step": 4525 }, { "epoch": 1.8282409979293974, "grad_norm": 0.466796875, "learning_rate": 0.0001906553398058252, "loss": 0.5566, "step": 4526 }, { "epoch": 1.8286450179283875, "grad_norm": 0.51171875, "learning_rate": 0.00019063106796116504, "loss": 0.6297, "step": 4527 }, { "epoch": 1.8290490379273774, "grad_norm": 0.462890625, "learning_rate": 0.00019060679611650484, "loss": 0.6324, "step": 4528 }, { "epoch": 1.8294530579263673, "grad_norm": 0.625, "learning_rate": 0.00019058252427184462, "loss": 0.6555, "step": 4529 }, { "epoch": 1.8298570779253573, "grad_norm": 0.5078125, "learning_rate": 0.00019055825242718445, "loss": 0.6293, "step": 4530 }, { "epoch": 1.8302610979243472, "grad_norm": 0.62890625, "learning_rate": 0.00019053398058252425, "loss": 0.6753, "step": 4531 }, { "epoch": 1.830665117923337, "grad_norm": 0.71875, "learning_rate": 0.00019050970873786408, "loss": 0.6713, "step": 4532 }, { "epoch": 1.8310691379223272, "grad_norm": 0.51171875, "learning_rate": 0.00019048543689320388, "loss": 0.5463, "step": 4533 }, { "epoch": 1.8314731579213173, "grad_norm": 0.53515625, "learning_rate": 0.00019046116504854366, "loss": 0.6034, "step": 4534 }, { "epoch": 1.831877177920307, "grad_norm": 0.4296875, "learning_rate": 0.0001904368932038835, "loss": 0.5397, "step": 4535 }, { "epoch": 1.832281197919297, "grad_norm": 0.53515625, "learning_rate": 0.0001904126213592233, "loss": 0.6753, "step": 4536 }, { "epoch": 1.832685217918287, "grad_norm": 0.462890625, "learning_rate": 0.00019038834951456306, "loss": 0.5854, "step": 4537 }, { "epoch": 1.833089237917277, "grad_norm": 0.494140625, "learning_rate": 0.0001903640776699029, "loss": 0.6046, "step": 4538 }, { "epoch": 1.8334932579162668, "grad_norm": 0.58203125, "learning_rate": 0.0001903398058252427, "loss": 0.6685, "step": 4539 }, { "epoch": 1.833897277915257, "grad_norm": 0.48828125, "learning_rate": 0.00019031553398058253, "loss": 0.6471, "step": 4540 }, { "epoch": 1.8343012979142468, "grad_norm": 0.6015625, "learning_rate": 0.00019029126213592233, "loss": 0.6748, "step": 4541 }, { "epoch": 1.8347053179132367, "grad_norm": 0.58203125, "learning_rate": 0.0001902669902912621, "loss": 0.6187, "step": 4542 }, { "epoch": 1.8351093379122267, "grad_norm": 0.49609375, "learning_rate": 0.00019024271844660193, "loss": 0.6376, "step": 4543 }, { "epoch": 1.8355133579112166, "grad_norm": 0.462890625, "learning_rate": 0.00019021844660194174, "loss": 0.5722, "step": 4544 }, { "epoch": 1.8359173779102065, "grad_norm": 0.4921875, "learning_rate": 0.00019019417475728154, "loss": 0.5946, "step": 4545 }, { "epoch": 1.8363213979091966, "grad_norm": 0.5234375, "learning_rate": 0.00019016990291262134, "loss": 0.649, "step": 4546 }, { "epoch": 1.8367254179081864, "grad_norm": 0.5546875, "learning_rate": 0.00019014563106796114, "loss": 0.6391, "step": 4547 }, { "epoch": 1.8371294379071763, "grad_norm": 0.51171875, "learning_rate": 0.00019012135922330094, "loss": 0.6373, "step": 4548 }, { "epoch": 1.8375334579061664, "grad_norm": 0.435546875, "learning_rate": 0.00019009708737864077, "loss": 0.5726, "step": 4549 }, { "epoch": 1.8379374779051563, "grad_norm": 0.53125, "learning_rate": 0.00019007281553398055, "loss": 0.5689, "step": 4550 }, { "epoch": 1.8383414979041461, "grad_norm": 0.6015625, "learning_rate": 0.00019004854368932038, "loss": 0.6914, "step": 4551 }, { "epoch": 1.8387455179031362, "grad_norm": 0.5078125, "learning_rate": 0.00019002427184466018, "loss": 0.6539, "step": 4552 }, { "epoch": 1.8391495379021263, "grad_norm": 0.53125, "learning_rate": 0.00018999999999999998, "loss": 0.5913, "step": 4553 }, { "epoch": 1.839553557901116, "grad_norm": 0.55078125, "learning_rate": 0.0001899757281553398, "loss": 0.6389, "step": 4554 }, { "epoch": 1.839957577900106, "grad_norm": 0.54296875, "learning_rate": 0.0001899514563106796, "loss": 0.6279, "step": 4555 }, { "epoch": 1.8403615978990961, "grad_norm": 0.478515625, "learning_rate": 0.0001899271844660194, "loss": 0.5743, "step": 4556 }, { "epoch": 1.8407656178980858, "grad_norm": 0.478515625, "learning_rate": 0.00018990291262135922, "loss": 0.5965, "step": 4557 }, { "epoch": 1.8411696378970759, "grad_norm": 0.53125, "learning_rate": 0.00018987864077669902, "loss": 0.6032, "step": 4558 }, { "epoch": 1.841573657896066, "grad_norm": 0.6875, "learning_rate": 0.0001898543689320388, "loss": 0.6457, "step": 4559 }, { "epoch": 1.8419776778950558, "grad_norm": 0.52734375, "learning_rate": 0.00018983009708737863, "loss": 0.5781, "step": 4560 }, { "epoch": 1.8423816978940457, "grad_norm": 0.470703125, "learning_rate": 0.00018980582524271843, "loss": 0.5983, "step": 4561 }, { "epoch": 1.8427857178930358, "grad_norm": 0.439453125, "learning_rate": 0.00018978155339805826, "loss": 0.5904, "step": 4562 }, { "epoch": 1.8431897378920257, "grad_norm": 0.439453125, "learning_rate": 0.00018975728155339803, "loss": 0.6336, "step": 4563 }, { "epoch": 1.8435937578910155, "grad_norm": 0.55078125, "learning_rate": 0.00018973300970873784, "loss": 0.6736, "step": 4564 }, { "epoch": 1.8439977778900056, "grad_norm": 0.484375, "learning_rate": 0.00018970873786407767, "loss": 0.5521, "step": 4565 }, { "epoch": 1.8444017978889955, "grad_norm": 0.55078125, "learning_rate": 0.00018968446601941747, "loss": 0.6379, "step": 4566 }, { "epoch": 1.8448058178879854, "grad_norm": 0.546875, "learning_rate": 0.00018966019417475724, "loss": 0.6871, "step": 4567 }, { "epoch": 1.8452098378869755, "grad_norm": 0.4765625, "learning_rate": 0.00018963592233009707, "loss": 0.5311, "step": 4568 }, { "epoch": 1.8456138578859653, "grad_norm": 0.64453125, "learning_rate": 0.00018961165048543688, "loss": 0.5928, "step": 4569 }, { "epoch": 1.8460178778849552, "grad_norm": 0.62109375, "learning_rate": 0.00018958737864077668, "loss": 0.6423, "step": 4570 }, { "epoch": 1.8464218978839453, "grad_norm": 0.52734375, "learning_rate": 0.00018956310679611648, "loss": 0.6155, "step": 4571 }, { "epoch": 1.8468259178829352, "grad_norm": 0.47265625, "learning_rate": 0.00018953883495145628, "loss": 0.642, "step": 4572 }, { "epoch": 1.847229937881925, "grad_norm": 0.53125, "learning_rate": 0.0001895145631067961, "loss": 0.609, "step": 4573 }, { "epoch": 1.8476339578809151, "grad_norm": 0.51171875, "learning_rate": 0.00018949029126213591, "loss": 0.5377, "step": 4574 }, { "epoch": 1.8480379778799052, "grad_norm": 0.482421875, "learning_rate": 0.0001894660194174757, "loss": 0.5993, "step": 4575 }, { "epoch": 1.8484419978788948, "grad_norm": 0.69140625, "learning_rate": 0.00018944174757281552, "loss": 0.6855, "step": 4576 }, { "epoch": 1.848846017877885, "grad_norm": 0.443359375, "learning_rate": 0.00018941747572815532, "loss": 0.5707, "step": 4577 }, { "epoch": 1.849250037876875, "grad_norm": 0.51953125, "learning_rate": 0.00018939320388349512, "loss": 0.5973, "step": 4578 }, { "epoch": 1.849654057875865, "grad_norm": 0.61328125, "learning_rate": 0.00018936893203883495, "loss": 0.6816, "step": 4579 }, { "epoch": 1.8500580778748548, "grad_norm": 0.46484375, "learning_rate": 0.00018934466019417473, "loss": 0.5946, "step": 4580 }, { "epoch": 1.8504620978738449, "grad_norm": 0.416015625, "learning_rate": 0.00018932038834951456, "loss": 0.5885, "step": 4581 }, { "epoch": 1.8508661178728347, "grad_norm": 0.6640625, "learning_rate": 0.00018929611650485436, "loss": 0.6183, "step": 4582 }, { "epoch": 1.8512701378718246, "grad_norm": 0.57421875, "learning_rate": 0.00018927184466019416, "loss": 0.7045, "step": 4583 }, { "epoch": 1.8516741578708147, "grad_norm": 0.431640625, "learning_rate": 0.00018924757281553396, "loss": 0.6132, "step": 4584 }, { "epoch": 1.8520781778698046, "grad_norm": 0.4765625, "learning_rate": 0.00018922330097087377, "loss": 0.6642, "step": 4585 }, { "epoch": 1.8524821978687944, "grad_norm": 0.44140625, "learning_rate": 0.00018919902912621357, "loss": 0.5905, "step": 4586 }, { "epoch": 1.8528862178677845, "grad_norm": 0.498046875, "learning_rate": 0.0001891747572815534, "loss": 0.6681, "step": 4587 }, { "epoch": 1.8532902378667744, "grad_norm": 0.453125, "learning_rate": 0.00018915048543689317, "loss": 0.5879, "step": 4588 }, { "epoch": 1.8536942578657642, "grad_norm": 0.53515625, "learning_rate": 0.00018912621359223298, "loss": 0.5853, "step": 4589 }, { "epoch": 1.8540982778647543, "grad_norm": 0.4765625, "learning_rate": 0.0001891019417475728, "loss": 0.6134, "step": 4590 }, { "epoch": 1.8545022978637442, "grad_norm": 0.53515625, "learning_rate": 0.0001890776699029126, "loss": 0.6569, "step": 4591 }, { "epoch": 1.854906317862734, "grad_norm": 35.25, "learning_rate": 0.00018905339805825244, "loss": 0.5831, "step": 4592 }, { "epoch": 1.8553103378617242, "grad_norm": 0.6015625, "learning_rate": 0.0001890291262135922, "loss": 0.5924, "step": 4593 }, { "epoch": 1.8557143578607143, "grad_norm": 0.5234375, "learning_rate": 0.00018900485436893201, "loss": 0.5976, "step": 4594 }, { "epoch": 1.856118377859704, "grad_norm": 0.5625, "learning_rate": 0.00018898058252427184, "loss": 0.5996, "step": 4595 }, { "epoch": 1.856522397858694, "grad_norm": 0.5625, "learning_rate": 0.00018895631067961165, "loss": 0.6146, "step": 4596 }, { "epoch": 1.856926417857684, "grad_norm": 0.47265625, "learning_rate": 0.00018893203883495142, "loss": 0.5797, "step": 4597 }, { "epoch": 1.857330437856674, "grad_norm": 0.671875, "learning_rate": 0.00018890776699029125, "loss": 0.6574, "step": 4598 }, { "epoch": 1.8577344578556638, "grad_norm": 0.51171875, "learning_rate": 0.00018888349514563105, "loss": 0.6288, "step": 4599 }, { "epoch": 1.858138477854654, "grad_norm": 0.5859375, "learning_rate": 0.00018885922330097083, "loss": 0.6412, "step": 4600 }, { "epoch": 1.8585424978536438, "grad_norm": 0.427734375, "learning_rate": 0.00018883495145631066, "loss": 0.6159, "step": 4601 }, { "epoch": 1.8589465178526337, "grad_norm": 0.53515625, "learning_rate": 0.00018881067961165046, "loss": 0.6143, "step": 4602 }, { "epoch": 1.8593505378516237, "grad_norm": 0.498046875, "learning_rate": 0.0001887864077669903, "loss": 0.626, "step": 4603 }, { "epoch": 1.8597545578506136, "grad_norm": 0.5546875, "learning_rate": 0.0001887621359223301, "loss": 0.6385, "step": 4604 }, { "epoch": 1.8601585778496035, "grad_norm": 0.45703125, "learning_rate": 0.00018873786407766987, "loss": 0.5892, "step": 4605 }, { "epoch": 1.8605625978485936, "grad_norm": 0.6171875, "learning_rate": 0.0001887135922330097, "loss": 0.6808, "step": 4606 }, { "epoch": 1.8609666178475834, "grad_norm": 0.46875, "learning_rate": 0.0001886893203883495, "loss": 0.5921, "step": 4607 }, { "epoch": 1.8613706378465733, "grad_norm": 0.4453125, "learning_rate": 0.0001886650485436893, "loss": 0.5776, "step": 4608 }, { "epoch": 1.8617746578455634, "grad_norm": 0.5234375, "learning_rate": 0.0001886407766990291, "loss": 0.6412, "step": 4609 }, { "epoch": 1.8621786778445533, "grad_norm": 0.515625, "learning_rate": 0.0001886165048543689, "loss": 0.5503, "step": 4610 }, { "epoch": 1.8625826978435431, "grad_norm": 0.5390625, "learning_rate": 0.00018859223300970874, "loss": 0.6488, "step": 4611 }, { "epoch": 1.8629867178425332, "grad_norm": 0.515625, "learning_rate": 0.00018856796116504854, "loss": 0.6673, "step": 4612 }, { "epoch": 1.8633907378415233, "grad_norm": 0.55078125, "learning_rate": 0.0001885436893203883, "loss": 0.5831, "step": 4613 }, { "epoch": 1.863794757840513, "grad_norm": 0.498046875, "learning_rate": 0.00018851941747572814, "loss": 0.5515, "step": 4614 }, { "epoch": 1.864198777839503, "grad_norm": 0.515625, "learning_rate": 0.00018849514563106795, "loss": 0.53, "step": 4615 }, { "epoch": 1.8646027978384931, "grad_norm": 0.54296875, "learning_rate": 0.00018847087378640775, "loss": 0.5976, "step": 4616 }, { "epoch": 1.865006817837483, "grad_norm": 0.58984375, "learning_rate": 0.00018844660194174758, "loss": 0.6714, "step": 4617 }, { "epoch": 1.8654108378364729, "grad_norm": 0.4921875, "learning_rate": 0.00018842233009708735, "loss": 0.5284, "step": 4618 }, { "epoch": 1.865814857835463, "grad_norm": 0.47265625, "learning_rate": 0.00018839805825242715, "loss": 0.5952, "step": 4619 }, { "epoch": 1.8662188778344528, "grad_norm": 0.54296875, "learning_rate": 0.00018837378640776698, "loss": 0.6395, "step": 4620 }, { "epoch": 1.8666228978334427, "grad_norm": 0.5078125, "learning_rate": 0.0001883495145631068, "loss": 0.5695, "step": 4621 }, { "epoch": 1.8670269178324328, "grad_norm": 0.484375, "learning_rate": 0.0001883252427184466, "loss": 0.5747, "step": 4622 }, { "epoch": 1.8674309378314227, "grad_norm": 0.49609375, "learning_rate": 0.0001883009708737864, "loss": 0.5903, "step": 4623 }, { "epoch": 1.8678349578304125, "grad_norm": 0.5703125, "learning_rate": 0.0001882766990291262, "loss": 0.5895, "step": 4624 }, { "epoch": 1.8682389778294026, "grad_norm": 0.5078125, "learning_rate": 0.00018825242718446602, "loss": 0.6891, "step": 4625 }, { "epoch": 1.8686429978283925, "grad_norm": 0.5078125, "learning_rate": 0.0001882281553398058, "loss": 0.6493, "step": 4626 }, { "epoch": 1.8690470178273824, "grad_norm": 0.50390625, "learning_rate": 0.0001882038834951456, "loss": 0.6532, "step": 4627 }, { "epoch": 1.8694510378263725, "grad_norm": 0.4765625, "learning_rate": 0.00018817961165048543, "loss": 0.6218, "step": 4628 }, { "epoch": 1.8698550578253623, "grad_norm": 0.478515625, "learning_rate": 0.00018815533980582523, "loss": 0.5613, "step": 4629 }, { "epoch": 1.8702590778243522, "grad_norm": 0.431640625, "learning_rate": 0.000188131067961165, "loss": 0.5554, "step": 4630 }, { "epoch": 1.8706630978233423, "grad_norm": 0.515625, "learning_rate": 0.00018810679611650484, "loss": 0.663, "step": 4631 }, { "epoch": 1.8710671178223324, "grad_norm": 0.466796875, "learning_rate": 0.00018808252427184464, "loss": 0.6132, "step": 4632 }, { "epoch": 1.871471137821322, "grad_norm": 0.423828125, "learning_rate": 0.00018805825242718447, "loss": 0.567, "step": 4633 }, { "epoch": 1.871875157820312, "grad_norm": 0.4921875, "learning_rate": 0.00018803398058252424, "loss": 0.5978, "step": 4634 }, { "epoch": 1.8722791778193022, "grad_norm": 0.55859375, "learning_rate": 0.00018800970873786405, "loss": 0.6488, "step": 4635 }, { "epoch": 1.8726831978182918, "grad_norm": 0.60546875, "learning_rate": 0.00018798543689320388, "loss": 0.6097, "step": 4636 }, { "epoch": 1.873087217817282, "grad_norm": 0.455078125, "learning_rate": 0.00018796116504854368, "loss": 0.6448, "step": 4637 }, { "epoch": 1.873491237816272, "grad_norm": 0.546875, "learning_rate": 0.00018793689320388345, "loss": 0.6652, "step": 4638 }, { "epoch": 1.873895257815262, "grad_norm": 0.57421875, "learning_rate": 0.00018791262135922328, "loss": 0.6804, "step": 4639 }, { "epoch": 1.8742992778142518, "grad_norm": 0.5234375, "learning_rate": 0.00018788834951456309, "loss": 0.6402, "step": 4640 }, { "epoch": 1.8747032978132419, "grad_norm": 0.56640625, "learning_rate": 0.00018786407766990291, "loss": 0.6747, "step": 4641 }, { "epoch": 1.8751073178122317, "grad_norm": 0.494140625, "learning_rate": 0.00018783980582524272, "loss": 0.5224, "step": 4642 }, { "epoch": 1.8755113378112216, "grad_norm": 0.6015625, "learning_rate": 0.0001878155339805825, "loss": 0.7081, "step": 4643 }, { "epoch": 1.8759153578102117, "grad_norm": 0.5390625, "learning_rate": 0.00018779126213592232, "loss": 0.6241, "step": 4644 }, { "epoch": 1.8763193778092015, "grad_norm": 0.5078125, "learning_rate": 0.00018776699029126212, "loss": 0.607, "step": 4645 }, { "epoch": 1.8767233978081914, "grad_norm": 0.435546875, "learning_rate": 0.00018774271844660193, "loss": 0.6235, "step": 4646 }, { "epoch": 1.8771274178071815, "grad_norm": 0.55078125, "learning_rate": 0.00018771844660194173, "loss": 0.6208, "step": 4647 }, { "epoch": 1.8775314378061714, "grad_norm": 0.48046875, "learning_rate": 0.00018769417475728153, "loss": 0.5288, "step": 4648 }, { "epoch": 1.8779354578051612, "grad_norm": 0.58984375, "learning_rate": 0.00018766990291262133, "loss": 0.6275, "step": 4649 }, { "epoch": 1.8783394778041513, "grad_norm": 0.5078125, "learning_rate": 0.00018764563106796116, "loss": 0.585, "step": 4650 }, { "epoch": 1.8787434978031412, "grad_norm": 0.578125, "learning_rate": 0.00018762135922330094, "loss": 0.6215, "step": 4651 }, { "epoch": 1.879147517802131, "grad_norm": 0.53515625, "learning_rate": 0.00018759708737864077, "loss": 0.6432, "step": 4652 }, { "epoch": 1.8795515378011212, "grad_norm": 0.494140625, "learning_rate": 0.00018757281553398057, "loss": 0.622, "step": 4653 }, { "epoch": 1.8799555578001113, "grad_norm": 0.5703125, "learning_rate": 0.00018754854368932037, "loss": 0.605, "step": 4654 }, { "epoch": 1.880359577799101, "grad_norm": 0.43359375, "learning_rate": 0.0001875242718446602, "loss": 0.5662, "step": 4655 }, { "epoch": 1.880763597798091, "grad_norm": 0.5234375, "learning_rate": 0.00018749999999999998, "loss": 0.6474, "step": 4656 }, { "epoch": 1.881167617797081, "grad_norm": 0.51953125, "learning_rate": 0.00018747572815533978, "loss": 0.6551, "step": 4657 }, { "epoch": 1.881571637796071, "grad_norm": 0.515625, "learning_rate": 0.0001874514563106796, "loss": 0.6211, "step": 4658 }, { "epoch": 1.8819756577950608, "grad_norm": 0.6015625, "learning_rate": 0.0001874271844660194, "loss": 0.5792, "step": 4659 }, { "epoch": 1.882379677794051, "grad_norm": 0.482421875, "learning_rate": 0.00018740291262135919, "loss": 0.6033, "step": 4660 }, { "epoch": 1.8827836977930408, "grad_norm": 0.5234375, "learning_rate": 0.00018737864077669902, "loss": 0.6701, "step": 4661 }, { "epoch": 1.8831877177920306, "grad_norm": 0.484375, "learning_rate": 0.00018735436893203882, "loss": 0.5988, "step": 4662 }, { "epoch": 1.8835917377910207, "grad_norm": 0.5703125, "learning_rate": 0.00018733009708737865, "loss": 0.6473, "step": 4663 }, { "epoch": 1.8839957577900106, "grad_norm": 0.458984375, "learning_rate": 0.00018730582524271842, "loss": 0.5677, "step": 4664 }, { "epoch": 1.8843997777890005, "grad_norm": 0.462890625, "learning_rate": 0.00018728155339805822, "loss": 0.5491, "step": 4665 }, { "epoch": 1.8848037977879906, "grad_norm": 0.458984375, "learning_rate": 0.00018725728155339805, "loss": 0.6139, "step": 4666 }, { "epoch": 1.8852078177869804, "grad_norm": 0.61328125, "learning_rate": 0.00018723300970873786, "loss": 0.6144, "step": 4667 }, { "epoch": 1.8856118377859703, "grad_norm": 0.55078125, "learning_rate": 0.00018720873786407763, "loss": 0.6216, "step": 4668 }, { "epoch": 1.8860158577849604, "grad_norm": 0.6171875, "learning_rate": 0.00018718446601941746, "loss": 0.6412, "step": 4669 }, { "epoch": 1.8864198777839503, "grad_norm": 0.5234375, "learning_rate": 0.00018716019417475726, "loss": 0.5961, "step": 4670 }, { "epoch": 1.8868238977829401, "grad_norm": 0.51953125, "learning_rate": 0.00018713592233009707, "loss": 0.5669, "step": 4671 }, { "epoch": 1.8872279177819302, "grad_norm": 0.60546875, "learning_rate": 0.00018711165048543687, "loss": 0.6199, "step": 4672 }, { "epoch": 1.8876319377809203, "grad_norm": 0.5078125, "learning_rate": 0.00018708737864077667, "loss": 0.6312, "step": 4673 }, { "epoch": 1.88803595777991, "grad_norm": 0.64453125, "learning_rate": 0.0001870631067961165, "loss": 0.6905, "step": 4674 }, { "epoch": 1.8884399777789, "grad_norm": 0.4921875, "learning_rate": 0.0001870388349514563, "loss": 0.6435, "step": 4675 }, { "epoch": 1.8888439977778901, "grad_norm": 0.515625, "learning_rate": 0.00018701456310679608, "loss": 0.6576, "step": 4676 }, { "epoch": 1.88924801777688, "grad_norm": 0.63671875, "learning_rate": 0.0001869902912621359, "loss": 0.6391, "step": 4677 }, { "epoch": 1.8896520377758699, "grad_norm": 0.546875, "learning_rate": 0.0001869660194174757, "loss": 0.6528, "step": 4678 }, { "epoch": 1.89005605777486, "grad_norm": 0.60546875, "learning_rate": 0.0001869417475728155, "loss": 0.667, "step": 4679 }, { "epoch": 1.8904600777738498, "grad_norm": 0.5703125, "learning_rate": 0.00018691747572815534, "loss": 0.7255, "step": 4680 }, { "epoch": 1.8908640977728397, "grad_norm": 0.5390625, "learning_rate": 0.00018689320388349512, "loss": 0.6289, "step": 4681 }, { "epoch": 1.8912681177718298, "grad_norm": 0.5, "learning_rate": 0.00018686893203883495, "loss": 0.5854, "step": 4682 }, { "epoch": 1.8916721377708197, "grad_norm": 0.53515625, "learning_rate": 0.00018684466019417475, "loss": 0.6466, "step": 4683 }, { "epoch": 1.8920761577698095, "grad_norm": 0.50390625, "learning_rate": 0.00018682038834951455, "loss": 0.5807, "step": 4684 }, { "epoch": 1.8924801777687996, "grad_norm": 0.55078125, "learning_rate": 0.00018679611650485435, "loss": 0.5473, "step": 4685 }, { "epoch": 1.8928841977677895, "grad_norm": 0.5, "learning_rate": 0.00018677184466019416, "loss": 0.6322, "step": 4686 }, { "epoch": 1.8932882177667794, "grad_norm": 0.50390625, "learning_rate": 0.00018674757281553396, "loss": 0.5954, "step": 4687 }, { "epoch": 1.8936922377657694, "grad_norm": 0.49609375, "learning_rate": 0.0001867233009708738, "loss": 0.6541, "step": 4688 }, { "epoch": 1.8940962577647593, "grad_norm": 0.4140625, "learning_rate": 0.00018669902912621356, "loss": 0.5511, "step": 4689 }, { "epoch": 1.8945002777637492, "grad_norm": 0.55078125, "learning_rate": 0.00018667475728155336, "loss": 0.632, "step": 4690 }, { "epoch": 1.8949042977627393, "grad_norm": 0.51171875, "learning_rate": 0.0001866504854368932, "loss": 0.583, "step": 4691 }, { "epoch": 1.8953083177617294, "grad_norm": 0.58984375, "learning_rate": 0.000186626213592233, "loss": 0.641, "step": 4692 }, { "epoch": 1.895712337760719, "grad_norm": 0.4453125, "learning_rate": 0.00018660194174757283, "loss": 0.5568, "step": 4693 }, { "epoch": 1.896116357759709, "grad_norm": 0.50390625, "learning_rate": 0.0001865776699029126, "loss": 0.5496, "step": 4694 }, { "epoch": 1.8965203777586992, "grad_norm": 0.46875, "learning_rate": 0.0001865533980582524, "loss": 0.5856, "step": 4695 }, { "epoch": 1.8969243977576888, "grad_norm": 0.54296875, "learning_rate": 0.00018652912621359223, "loss": 0.6079, "step": 4696 }, { "epoch": 1.897328417756679, "grad_norm": 0.60546875, "learning_rate": 0.000186504854368932, "loss": 0.6647, "step": 4697 }, { "epoch": 1.897732437755669, "grad_norm": 0.474609375, "learning_rate": 0.0001864805825242718, "loss": 0.5367, "step": 4698 }, { "epoch": 1.898136457754659, "grad_norm": 0.56640625, "learning_rate": 0.00018645631067961164, "loss": 0.5719, "step": 4699 }, { "epoch": 1.8985404777536488, "grad_norm": 0.890625, "learning_rate": 0.00018643203883495144, "loss": 0.7013, "step": 4700 }, { "epoch": 1.8989444977526388, "grad_norm": 0.486328125, "learning_rate": 0.00018640776699029122, "loss": 0.5949, "step": 4701 }, { "epoch": 1.8993485177516287, "grad_norm": 0.58984375, "learning_rate": 0.00018638349514563105, "loss": 0.5951, "step": 4702 }, { "epoch": 1.8997525377506186, "grad_norm": 0.5390625, "learning_rate": 0.00018635922330097085, "loss": 0.58, "step": 4703 }, { "epoch": 1.9001565577496087, "grad_norm": 0.55078125, "learning_rate": 0.00018633495145631068, "loss": 0.6089, "step": 4704 }, { "epoch": 1.9005605777485985, "grad_norm": 0.6484375, "learning_rate": 0.00018631067961165048, "loss": 0.6494, "step": 4705 }, { "epoch": 1.9009645977475884, "grad_norm": 0.62109375, "learning_rate": 0.00018628640776699026, "loss": 0.6633, "step": 4706 }, { "epoch": 1.9013686177465785, "grad_norm": 0.50390625, "learning_rate": 0.00018626213592233009, "loss": 0.6249, "step": 4707 }, { "epoch": 1.9017726377455684, "grad_norm": 0.62109375, "learning_rate": 0.0001862378640776699, "loss": 0.7218, "step": 4708 }, { "epoch": 1.9021766577445582, "grad_norm": 0.48828125, "learning_rate": 0.0001862135922330097, "loss": 0.6133, "step": 4709 }, { "epoch": 1.9025806777435483, "grad_norm": 0.453125, "learning_rate": 0.0001861893203883495, "loss": 0.5615, "step": 4710 }, { "epoch": 1.9029846977425382, "grad_norm": 0.5625, "learning_rate": 0.0001861650485436893, "loss": 0.6608, "step": 4711 }, { "epoch": 1.903388717741528, "grad_norm": 0.53125, "learning_rate": 0.00018614077669902912, "loss": 0.6088, "step": 4712 }, { "epoch": 1.9037927377405182, "grad_norm": 0.41796875, "learning_rate": 0.00018611650485436893, "loss": 0.5175, "step": 4713 }, { "epoch": 1.9041967577395083, "grad_norm": 0.53125, "learning_rate": 0.0001860922330097087, "loss": 0.5677, "step": 4714 }, { "epoch": 1.904600777738498, "grad_norm": 0.5234375, "learning_rate": 0.00018606796116504853, "loss": 0.5513, "step": 4715 }, { "epoch": 1.905004797737488, "grad_norm": 0.50390625, "learning_rate": 0.00018604368932038833, "loss": 0.5619, "step": 4716 }, { "epoch": 1.905408817736478, "grad_norm": 0.466796875, "learning_rate": 0.00018601941747572814, "loss": 0.5667, "step": 4717 }, { "epoch": 1.905812837735468, "grad_norm": 0.490234375, "learning_rate": 0.00018599514563106797, "loss": 0.628, "step": 4718 }, { "epoch": 1.9062168577344578, "grad_norm": 0.546875, "learning_rate": 0.00018597087378640774, "loss": 0.6496, "step": 4719 }, { "epoch": 1.906620877733448, "grad_norm": 0.4609375, "learning_rate": 0.00018594660194174754, "loss": 0.5561, "step": 4720 }, { "epoch": 1.9070248977324378, "grad_norm": 0.455078125, "learning_rate": 0.00018592233009708737, "loss": 0.5854, "step": 4721 }, { "epoch": 1.9074289177314276, "grad_norm": 0.43359375, "learning_rate": 0.00018589805825242718, "loss": 0.6027, "step": 4722 }, { "epoch": 1.9078329377304177, "grad_norm": 0.49609375, "learning_rate": 0.00018587378640776698, "loss": 0.5959, "step": 4723 }, { "epoch": 1.9082369577294076, "grad_norm": 0.55859375, "learning_rate": 0.00018584951456310678, "loss": 0.6913, "step": 4724 }, { "epoch": 1.9086409777283975, "grad_norm": 0.57421875, "learning_rate": 0.00018582524271844658, "loss": 0.6813, "step": 4725 }, { "epoch": 1.9090449977273876, "grad_norm": 0.423828125, "learning_rate": 0.0001858009708737864, "loss": 0.5392, "step": 4726 }, { "epoch": 1.9094490177263774, "grad_norm": 0.482421875, "learning_rate": 0.0001857766990291262, "loss": 0.6317, "step": 4727 }, { "epoch": 1.9098530377253673, "grad_norm": 0.6015625, "learning_rate": 0.000185752427184466, "loss": 0.7168, "step": 4728 }, { "epoch": 1.9102570577243574, "grad_norm": 0.5390625, "learning_rate": 0.00018572815533980582, "loss": 0.606, "step": 4729 }, { "epoch": 1.9106610777233473, "grad_norm": 0.578125, "learning_rate": 0.00018570388349514562, "loss": 0.687, "step": 4730 }, { "epoch": 1.9110650977223371, "grad_norm": 0.5390625, "learning_rate": 0.0001856796116504854, "loss": 0.609, "step": 4731 }, { "epoch": 1.9114691177213272, "grad_norm": 0.453125, "learning_rate": 0.00018565533980582523, "loss": 0.5683, "step": 4732 }, { "epoch": 1.9118731377203173, "grad_norm": 0.56640625, "learning_rate": 0.00018563106796116503, "loss": 0.6262, "step": 4733 }, { "epoch": 1.912277157719307, "grad_norm": 0.455078125, "learning_rate": 0.00018560679611650486, "loss": 0.6142, "step": 4734 }, { "epoch": 1.912681177718297, "grad_norm": 0.54296875, "learning_rate": 0.00018558252427184463, "loss": 0.599, "step": 4735 }, { "epoch": 1.9130851977172871, "grad_norm": 0.490234375, "learning_rate": 0.00018555825242718444, "loss": 0.5484, "step": 4736 }, { "epoch": 1.913489217716277, "grad_norm": 0.515625, "learning_rate": 0.00018553398058252426, "loss": 0.5258, "step": 4737 }, { "epoch": 1.9138932377152669, "grad_norm": 0.515625, "learning_rate": 0.00018550970873786407, "loss": 0.5991, "step": 4738 }, { "epoch": 1.914297257714257, "grad_norm": 0.640625, "learning_rate": 0.00018548543689320384, "loss": 0.7087, "step": 4739 }, { "epoch": 1.9147012777132468, "grad_norm": 0.482421875, "learning_rate": 0.00018546116504854367, "loss": 0.584, "step": 4740 }, { "epoch": 1.9151052977122367, "grad_norm": 0.546875, "learning_rate": 0.00018543689320388347, "loss": 0.5925, "step": 4741 }, { "epoch": 1.9155093177112268, "grad_norm": 0.52734375, "learning_rate": 0.0001854126213592233, "loss": 0.5568, "step": 4742 }, { "epoch": 1.9159133377102167, "grad_norm": 0.453125, "learning_rate": 0.0001853883495145631, "loss": 0.6234, "step": 4743 }, { "epoch": 1.9163173577092065, "grad_norm": 0.5390625, "learning_rate": 0.00018536407766990288, "loss": 0.6529, "step": 4744 }, { "epoch": 1.9167213777081966, "grad_norm": 0.48828125, "learning_rate": 0.0001853398058252427, "loss": 0.6019, "step": 4745 }, { "epoch": 1.9171253977071865, "grad_norm": 0.5078125, "learning_rate": 0.0001853155339805825, "loss": 0.574, "step": 4746 }, { "epoch": 1.9175294177061764, "grad_norm": 0.46484375, "learning_rate": 0.00018529126213592232, "loss": 0.6006, "step": 4747 }, { "epoch": 1.9179334377051664, "grad_norm": 0.498046875, "learning_rate": 0.00018526699029126212, "loss": 0.5921, "step": 4748 }, { "epoch": 1.9183374577041563, "grad_norm": 0.486328125, "learning_rate": 0.00018524271844660192, "loss": 0.585, "step": 4749 }, { "epoch": 1.9187414777031462, "grad_norm": 0.734375, "learning_rate": 0.00018521844660194172, "loss": 0.657, "step": 4750 }, { "epoch": 1.9191454977021363, "grad_norm": 0.447265625, "learning_rate": 0.00018519417475728155, "loss": 0.5993, "step": 4751 }, { "epoch": 1.9195495177011264, "grad_norm": 0.62109375, "learning_rate": 0.00018516990291262133, "loss": 0.6926, "step": 4752 }, { "epoch": 1.919953537700116, "grad_norm": 0.486328125, "learning_rate": 0.00018514563106796116, "loss": 0.5514, "step": 4753 }, { "epoch": 1.920357557699106, "grad_norm": 0.51953125, "learning_rate": 0.00018512135922330096, "loss": 0.5898, "step": 4754 }, { "epoch": 1.9207615776980962, "grad_norm": 0.5, "learning_rate": 0.00018509708737864076, "loss": 0.6821, "step": 4755 }, { "epoch": 1.921165597697086, "grad_norm": 0.451171875, "learning_rate": 0.0001850728155339806, "loss": 0.5102, "step": 4756 }, { "epoch": 1.921569617696076, "grad_norm": 0.5234375, "learning_rate": 0.00018504854368932037, "loss": 0.6354, "step": 4757 }, { "epoch": 1.921973637695066, "grad_norm": 0.515625, "learning_rate": 0.00018502427184466017, "loss": 0.6232, "step": 4758 }, { "epoch": 1.9223776576940559, "grad_norm": 0.46484375, "learning_rate": 0.000185, "loss": 0.6067, "step": 4759 }, { "epoch": 1.9227816776930458, "grad_norm": 0.42578125, "learning_rate": 0.0001849757281553398, "loss": 0.5676, "step": 4760 }, { "epoch": 1.9231856976920358, "grad_norm": 0.62890625, "learning_rate": 0.00018495145631067957, "loss": 0.6196, "step": 4761 }, { "epoch": 1.9235897176910257, "grad_norm": 0.470703125, "learning_rate": 0.0001849271844660194, "loss": 0.6366, "step": 4762 }, { "epoch": 1.9239937376900156, "grad_norm": 0.5078125, "learning_rate": 0.0001849029126213592, "loss": 0.5936, "step": 4763 }, { "epoch": 1.9243977576890057, "grad_norm": 0.466796875, "learning_rate": 0.00018487864077669904, "loss": 0.6008, "step": 4764 }, { "epoch": 1.9248017776879955, "grad_norm": 0.64453125, "learning_rate": 0.0001848543689320388, "loss": 0.7194, "step": 4765 }, { "epoch": 1.9252057976869854, "grad_norm": 0.5390625, "learning_rate": 0.00018483009708737861, "loss": 0.6609, "step": 4766 }, { "epoch": 1.9256098176859755, "grad_norm": 0.47265625, "learning_rate": 0.00018480582524271844, "loss": 0.5393, "step": 4767 }, { "epoch": 1.9260138376849654, "grad_norm": 0.478515625, "learning_rate": 0.00018478155339805825, "loss": 0.6052, "step": 4768 }, { "epoch": 1.9264178576839552, "grad_norm": 0.57421875, "learning_rate": 0.00018475728155339802, "loss": 0.6683, "step": 4769 }, { "epoch": 1.9268218776829453, "grad_norm": 0.4453125, "learning_rate": 0.00018473300970873785, "loss": 0.5462, "step": 4770 }, { "epoch": 1.9272258976819354, "grad_norm": 0.6796875, "learning_rate": 0.00018470873786407765, "loss": 0.6946, "step": 4771 }, { "epoch": 1.927629917680925, "grad_norm": 0.625, "learning_rate": 0.00018468446601941745, "loss": 0.7003, "step": 4772 }, { "epoch": 1.9280339376799152, "grad_norm": 0.64453125, "learning_rate": 0.00018466019417475726, "loss": 0.7, "step": 4773 }, { "epoch": 1.9284379576789052, "grad_norm": 0.52734375, "learning_rate": 0.00018463592233009706, "loss": 0.551, "step": 4774 }, { "epoch": 1.928841977677895, "grad_norm": 0.56640625, "learning_rate": 0.0001846116504854369, "loss": 0.6524, "step": 4775 }, { "epoch": 1.929245997676885, "grad_norm": 0.482421875, "learning_rate": 0.0001845873786407767, "loss": 0.606, "step": 4776 }, { "epoch": 1.929650017675875, "grad_norm": 0.44140625, "learning_rate": 0.00018456310679611647, "loss": 0.5033, "step": 4777 }, { "epoch": 1.930054037674865, "grad_norm": 0.5546875, "learning_rate": 0.0001845388349514563, "loss": 0.6652, "step": 4778 }, { "epoch": 1.9304580576738548, "grad_norm": 0.55859375, "learning_rate": 0.0001845145631067961, "loss": 0.5714, "step": 4779 }, { "epoch": 1.930862077672845, "grad_norm": 0.515625, "learning_rate": 0.0001844902912621359, "loss": 0.6326, "step": 4780 }, { "epoch": 1.9312660976718348, "grad_norm": 0.5, "learning_rate": 0.00018446601941747573, "loss": 0.5858, "step": 4781 }, { "epoch": 1.9316701176708246, "grad_norm": 0.51171875, "learning_rate": 0.0001844417475728155, "loss": 0.6605, "step": 4782 }, { "epoch": 1.9320741376698147, "grad_norm": 0.42578125, "learning_rate": 0.00018441747572815533, "loss": 0.5641, "step": 4783 }, { "epoch": 1.9324781576688046, "grad_norm": 0.4921875, "learning_rate": 0.00018439320388349514, "loss": 0.571, "step": 4784 }, { "epoch": 1.9328821776677945, "grad_norm": 0.5234375, "learning_rate": 0.00018436893203883494, "loss": 0.6421, "step": 4785 }, { "epoch": 1.9332861976667846, "grad_norm": 0.4375, "learning_rate": 0.00018434466019417474, "loss": 0.6435, "step": 4786 }, { "epoch": 1.9336902176657744, "grad_norm": 0.53125, "learning_rate": 0.00018432038834951454, "loss": 0.6207, "step": 4787 }, { "epoch": 1.9340942376647643, "grad_norm": 0.50390625, "learning_rate": 0.00018429611650485435, "loss": 0.5997, "step": 4788 }, { "epoch": 1.9344982576637544, "grad_norm": 0.58984375, "learning_rate": 0.00018427184466019418, "loss": 0.6071, "step": 4789 }, { "epoch": 1.9349022776627443, "grad_norm": 0.5625, "learning_rate": 0.00018424757281553395, "loss": 0.712, "step": 4790 }, { "epoch": 1.9353062976617341, "grad_norm": 0.46875, "learning_rate": 0.00018422330097087375, "loss": 0.5839, "step": 4791 }, { "epoch": 1.9357103176607242, "grad_norm": 0.5234375, "learning_rate": 0.00018419902912621358, "loss": 0.5827, "step": 4792 }, { "epoch": 1.9361143376597143, "grad_norm": 0.439453125, "learning_rate": 0.00018417475728155339, "loss": 0.5612, "step": 4793 }, { "epoch": 1.936518357658704, "grad_norm": 0.5234375, "learning_rate": 0.00018415048543689321, "loss": 0.6506, "step": 4794 }, { "epoch": 1.936922377657694, "grad_norm": 0.51953125, "learning_rate": 0.000184126213592233, "loss": 0.6051, "step": 4795 }, { "epoch": 1.9373263976566841, "grad_norm": 0.58984375, "learning_rate": 0.0001841019417475728, "loss": 0.6074, "step": 4796 }, { "epoch": 1.937730417655674, "grad_norm": 0.50390625, "learning_rate": 0.00018407766990291262, "loss": 0.5777, "step": 4797 }, { "epoch": 1.9381344376546639, "grad_norm": 0.478515625, "learning_rate": 0.0001840533980582524, "loss": 0.5693, "step": 4798 }, { "epoch": 1.938538457653654, "grad_norm": 0.52734375, "learning_rate": 0.0001840291262135922, "loss": 0.7221, "step": 4799 }, { "epoch": 1.9389424776526438, "grad_norm": 0.5546875, "learning_rate": 0.00018400485436893203, "loss": 0.585, "step": 4800 }, { "epoch": 1.9393464976516337, "grad_norm": 0.5, "learning_rate": 0.00018398058252427183, "loss": 0.5923, "step": 4801 }, { "epoch": 1.9397505176506238, "grad_norm": 0.466796875, "learning_rate": 0.0001839563106796116, "loss": 0.6124, "step": 4802 }, { "epoch": 1.9401545376496137, "grad_norm": 0.515625, "learning_rate": 0.00018393203883495144, "loss": 0.6275, "step": 4803 }, { "epoch": 1.9405585576486035, "grad_norm": 0.486328125, "learning_rate": 0.00018390776699029124, "loss": 0.5879, "step": 4804 }, { "epoch": 1.9409625776475936, "grad_norm": 0.55859375, "learning_rate": 0.00018388349514563107, "loss": 0.7243, "step": 4805 }, { "epoch": 1.9413665976465835, "grad_norm": 0.47265625, "learning_rate": 0.00018385922330097087, "loss": 0.5489, "step": 4806 }, { "epoch": 1.9417706176455733, "grad_norm": 0.52734375, "learning_rate": 0.00018383495145631065, "loss": 0.5793, "step": 4807 }, { "epoch": 1.9421746376445634, "grad_norm": 0.474609375, "learning_rate": 0.00018381067961165047, "loss": 0.6101, "step": 4808 }, { "epoch": 1.9425786576435533, "grad_norm": 0.4453125, "learning_rate": 0.00018378640776699028, "loss": 0.5413, "step": 4809 }, { "epoch": 1.9429826776425432, "grad_norm": 0.5625, "learning_rate": 0.00018376213592233008, "loss": 0.5781, "step": 4810 }, { "epoch": 1.9433866976415333, "grad_norm": 0.478515625, "learning_rate": 0.00018373786407766988, "loss": 0.6248, "step": 4811 }, { "epoch": 1.9437907176405234, "grad_norm": 0.53515625, "learning_rate": 0.00018371359223300968, "loss": 0.577, "step": 4812 }, { "epoch": 1.944194737639513, "grad_norm": 0.62109375, "learning_rate": 0.0001836893203883495, "loss": 0.6692, "step": 4813 }, { "epoch": 1.944598757638503, "grad_norm": 0.484375, "learning_rate": 0.00018366504854368932, "loss": 0.5554, "step": 4814 }, { "epoch": 1.9450027776374932, "grad_norm": 0.466796875, "learning_rate": 0.0001836407766990291, "loss": 0.5911, "step": 4815 }, { "epoch": 1.945406797636483, "grad_norm": 0.6484375, "learning_rate": 0.00018361650485436892, "loss": 0.6242, "step": 4816 }, { "epoch": 1.945810817635473, "grad_norm": 0.498046875, "learning_rate": 0.00018359223300970872, "loss": 0.6061, "step": 4817 }, { "epoch": 1.946214837634463, "grad_norm": 0.578125, "learning_rate": 0.00018356796116504853, "loss": 0.6348, "step": 4818 }, { "epoch": 1.9466188576334529, "grad_norm": 0.4921875, "learning_rate": 0.00018354368932038835, "loss": 0.5995, "step": 4819 }, { "epoch": 1.9470228776324427, "grad_norm": 0.55078125, "learning_rate": 0.00018351941747572813, "loss": 0.7065, "step": 4820 }, { "epoch": 1.9474268976314328, "grad_norm": 0.478515625, "learning_rate": 0.00018349514563106793, "loss": 0.5193, "step": 4821 }, { "epoch": 1.9478309176304227, "grad_norm": 0.51171875, "learning_rate": 0.00018347087378640776, "loss": 0.5875, "step": 4822 }, { "epoch": 1.9482349376294126, "grad_norm": 0.640625, "learning_rate": 0.00018344660194174756, "loss": 0.6635, "step": 4823 }, { "epoch": 1.9486389576284027, "grad_norm": 0.59765625, "learning_rate": 0.00018342233009708737, "loss": 0.6571, "step": 4824 }, { "epoch": 1.9490429776273925, "grad_norm": 0.5234375, "learning_rate": 0.00018339805825242717, "loss": 0.6176, "step": 4825 }, { "epoch": 1.9494469976263824, "grad_norm": 0.462890625, "learning_rate": 0.00018337378640776697, "loss": 0.5891, "step": 4826 }, { "epoch": 1.9498510176253725, "grad_norm": 0.53125, "learning_rate": 0.0001833495145631068, "loss": 0.6101, "step": 4827 }, { "epoch": 1.9502550376243624, "grad_norm": 0.5703125, "learning_rate": 0.00018332524271844658, "loss": 0.6648, "step": 4828 }, { "epoch": 1.9506590576233522, "grad_norm": 0.48828125, "learning_rate": 0.00018330097087378638, "loss": 0.6068, "step": 4829 }, { "epoch": 1.9510630776223423, "grad_norm": 0.44921875, "learning_rate": 0.0001832766990291262, "loss": 0.5734, "step": 4830 }, { "epoch": 1.9514670976213324, "grad_norm": 0.45703125, "learning_rate": 0.000183252427184466, "loss": 0.589, "step": 4831 }, { "epoch": 1.951871117620322, "grad_norm": 0.5234375, "learning_rate": 0.00018322815533980579, "loss": 0.6016, "step": 4832 }, { "epoch": 1.9522751376193122, "grad_norm": 0.72265625, "learning_rate": 0.00018320388349514561, "loss": 0.7619, "step": 4833 }, { "epoch": 1.9526791576183022, "grad_norm": 0.5546875, "learning_rate": 0.00018317961165048542, "loss": 0.6835, "step": 4834 }, { "epoch": 1.953083177617292, "grad_norm": 0.55078125, "learning_rate": 0.00018315533980582525, "loss": 0.6371, "step": 4835 }, { "epoch": 1.953487197616282, "grad_norm": 0.48828125, "learning_rate": 0.00018313106796116502, "loss": 0.5932, "step": 4836 }, { "epoch": 1.953891217615272, "grad_norm": 0.50390625, "learning_rate": 0.00018310679611650482, "loss": 0.5911, "step": 4837 }, { "epoch": 1.954295237614262, "grad_norm": 0.52734375, "learning_rate": 0.00018308252427184465, "loss": 0.6284, "step": 4838 }, { "epoch": 1.9546992576132518, "grad_norm": 0.6328125, "learning_rate": 0.00018305825242718446, "loss": 0.6824, "step": 4839 }, { "epoch": 1.955103277612242, "grad_norm": 0.546875, "learning_rate": 0.00018303398058252423, "loss": 0.6757, "step": 4840 }, { "epoch": 1.9555072976112318, "grad_norm": 0.546875, "learning_rate": 0.00018300970873786406, "loss": 0.6386, "step": 4841 }, { "epoch": 1.9559113176102216, "grad_norm": 0.5625, "learning_rate": 0.00018298543689320386, "loss": 0.6165, "step": 4842 }, { "epoch": 1.9563153376092117, "grad_norm": 0.4453125, "learning_rate": 0.0001829611650485437, "loss": 0.5488, "step": 4843 }, { "epoch": 1.9567193576082016, "grad_norm": 0.55859375, "learning_rate": 0.0001829368932038835, "loss": 0.6295, "step": 4844 }, { "epoch": 1.9571233776071915, "grad_norm": 0.408203125, "learning_rate": 0.00018291262135922327, "loss": 0.5815, "step": 4845 }, { "epoch": 1.9575273976061816, "grad_norm": 0.5703125, "learning_rate": 0.0001828883495145631, "loss": 0.6677, "step": 4846 }, { "epoch": 1.9579314176051714, "grad_norm": 0.52734375, "learning_rate": 0.0001828640776699029, "loss": 0.626, "step": 4847 }, { "epoch": 1.9583354376041613, "grad_norm": 0.5234375, "learning_rate": 0.0001828398058252427, "loss": 0.5712, "step": 4848 }, { "epoch": 1.9587394576031514, "grad_norm": 0.5546875, "learning_rate": 0.0001828155339805825, "loss": 0.5848, "step": 4849 }, { "epoch": 1.9591434776021415, "grad_norm": 0.5, "learning_rate": 0.0001827912621359223, "loss": 0.5663, "step": 4850 }, { "epoch": 1.9595474976011311, "grad_norm": 0.52734375, "learning_rate": 0.0001827669902912621, "loss": 0.6471, "step": 4851 }, { "epoch": 1.9599515176001212, "grad_norm": 0.5, "learning_rate": 0.00018274271844660194, "loss": 0.6182, "step": 4852 }, { "epoch": 1.9603555375991113, "grad_norm": 0.546875, "learning_rate": 0.00018271844660194172, "loss": 0.6595, "step": 4853 }, { "epoch": 1.960759557598101, "grad_norm": 0.578125, "learning_rate": 0.00018269417475728154, "loss": 0.5994, "step": 4854 }, { "epoch": 1.961163577597091, "grad_norm": 0.4921875, "learning_rate": 0.00018266990291262135, "loss": 0.5898, "step": 4855 }, { "epoch": 1.9615675975960811, "grad_norm": 0.5703125, "learning_rate": 0.00018264563106796115, "loss": 0.5818, "step": 4856 }, { "epoch": 1.961971617595071, "grad_norm": 0.47265625, "learning_rate": 0.00018262135922330098, "loss": 0.5395, "step": 4857 }, { "epoch": 1.9623756375940609, "grad_norm": 0.51171875, "learning_rate": 0.00018259708737864075, "loss": 0.5988, "step": 4858 }, { "epoch": 1.962779657593051, "grad_norm": 0.6640625, "learning_rate": 0.00018257281553398056, "loss": 0.687, "step": 4859 }, { "epoch": 1.9631836775920408, "grad_norm": 0.490234375, "learning_rate": 0.00018254854368932039, "loss": 0.5858, "step": 4860 }, { "epoch": 1.9635876975910307, "grad_norm": 0.55078125, "learning_rate": 0.00018252427184466016, "loss": 0.6466, "step": 4861 }, { "epoch": 1.9639917175900208, "grad_norm": 0.53515625, "learning_rate": 0.00018249999999999996, "loss": 0.6215, "step": 4862 }, { "epoch": 1.9643957375890106, "grad_norm": 0.55078125, "learning_rate": 0.0001824757281553398, "loss": 0.6344, "step": 4863 }, { "epoch": 1.9647997575880005, "grad_norm": 0.61328125, "learning_rate": 0.0001824514563106796, "loss": 0.7049, "step": 4864 }, { "epoch": 1.9652037775869906, "grad_norm": 0.54296875, "learning_rate": 0.00018242718446601942, "loss": 0.6539, "step": 4865 }, { "epoch": 1.9656077975859805, "grad_norm": 0.54296875, "learning_rate": 0.0001824029126213592, "loss": 0.6718, "step": 4866 }, { "epoch": 1.9660118175849703, "grad_norm": 0.453125, "learning_rate": 0.000182378640776699, "loss": 0.6545, "step": 4867 }, { "epoch": 1.9664158375839604, "grad_norm": 0.5078125, "learning_rate": 0.00018235436893203883, "loss": 0.6047, "step": 4868 }, { "epoch": 1.9668198575829503, "grad_norm": 0.474609375, "learning_rate": 0.00018233009708737863, "loss": 0.6331, "step": 4869 }, { "epoch": 1.9672238775819402, "grad_norm": 0.458984375, "learning_rate": 0.0001823058252427184, "loss": 0.6036, "step": 4870 }, { "epoch": 1.9676278975809303, "grad_norm": 0.58984375, "learning_rate": 0.00018228155339805824, "loss": 0.6573, "step": 4871 }, { "epoch": 1.9680319175799204, "grad_norm": 0.5625, "learning_rate": 0.00018225728155339804, "loss": 0.6222, "step": 4872 }, { "epoch": 1.96843593757891, "grad_norm": 0.54296875, "learning_rate": 0.00018223300970873784, "loss": 0.5818, "step": 4873 }, { "epoch": 1.9688399575779, "grad_norm": 0.58203125, "learning_rate": 0.00018220873786407765, "loss": 0.5809, "step": 4874 }, { "epoch": 1.9692439775768902, "grad_norm": 0.478515625, "learning_rate": 0.00018218446601941745, "loss": 0.585, "step": 4875 }, { "epoch": 1.96964799757588, "grad_norm": 0.50390625, "learning_rate": 0.00018216019417475728, "loss": 0.594, "step": 4876 }, { "epoch": 1.97005201757487, "grad_norm": 0.515625, "learning_rate": 0.00018213592233009708, "loss": 0.4994, "step": 4877 }, { "epoch": 1.97045603757386, "grad_norm": 0.609375, "learning_rate": 0.00018211165048543686, "loss": 0.5394, "step": 4878 }, { "epoch": 1.9708600575728499, "grad_norm": 0.57421875, "learning_rate": 0.00018208737864077668, "loss": 0.6305, "step": 4879 }, { "epoch": 1.9712640775718397, "grad_norm": 0.57421875, "learning_rate": 0.0001820631067961165, "loss": 0.6272, "step": 4880 }, { "epoch": 1.9716680975708298, "grad_norm": 0.44921875, "learning_rate": 0.0001820388349514563, "loss": 0.5566, "step": 4881 }, { "epoch": 1.9720721175698197, "grad_norm": 0.53125, "learning_rate": 0.00018201456310679612, "loss": 0.6288, "step": 4882 }, { "epoch": 1.9724761375688096, "grad_norm": 0.486328125, "learning_rate": 0.0001819902912621359, "loss": 0.5678, "step": 4883 }, { "epoch": 1.9728801575677997, "grad_norm": 0.466796875, "learning_rate": 0.00018196601941747572, "loss": 0.5828, "step": 4884 }, { "epoch": 1.9732841775667895, "grad_norm": 0.484375, "learning_rate": 0.00018194174757281553, "loss": 0.5643, "step": 4885 }, { "epoch": 1.9736881975657794, "grad_norm": 0.6875, "learning_rate": 0.00018191747572815533, "loss": 0.6031, "step": 4886 }, { "epoch": 1.9740922175647695, "grad_norm": 0.609375, "learning_rate": 0.00018189320388349513, "loss": 0.6955, "step": 4887 }, { "epoch": 1.9744962375637594, "grad_norm": 0.5625, "learning_rate": 0.00018186893203883493, "loss": 0.6263, "step": 4888 }, { "epoch": 1.9749002575627492, "grad_norm": 0.51171875, "learning_rate": 0.00018184466019417474, "loss": 0.6036, "step": 4889 }, { "epoch": 1.9753042775617393, "grad_norm": 0.490234375, "learning_rate": 0.00018182038834951456, "loss": 0.6111, "step": 4890 }, { "epoch": 1.9757082975607294, "grad_norm": 0.61328125, "learning_rate": 0.00018179611650485434, "loss": 0.6762, "step": 4891 }, { "epoch": 1.976112317559719, "grad_norm": 0.470703125, "learning_rate": 0.00018177184466019414, "loss": 0.6036, "step": 4892 }, { "epoch": 1.9765163375587091, "grad_norm": 0.453125, "learning_rate": 0.00018174757281553397, "loss": 0.5552, "step": 4893 }, { "epoch": 1.9769203575576992, "grad_norm": 0.455078125, "learning_rate": 0.00018172330097087377, "loss": 0.5612, "step": 4894 }, { "epoch": 1.977324377556689, "grad_norm": 0.52734375, "learning_rate": 0.0001816990291262136, "loss": 0.5987, "step": 4895 }, { "epoch": 1.977728397555679, "grad_norm": 0.498046875, "learning_rate": 0.00018167475728155338, "loss": 0.5713, "step": 4896 }, { "epoch": 1.978132417554669, "grad_norm": 0.51953125, "learning_rate": 0.00018165048543689318, "loss": 0.6092, "step": 4897 }, { "epoch": 1.978536437553659, "grad_norm": 0.52734375, "learning_rate": 0.000181626213592233, "loss": 0.6406, "step": 4898 }, { "epoch": 1.9789404575526488, "grad_norm": 0.609375, "learning_rate": 0.00018160194174757279, "loss": 0.6589, "step": 4899 }, { "epoch": 1.979344477551639, "grad_norm": 0.50390625, "learning_rate": 0.0001815776699029126, "loss": 0.5505, "step": 4900 }, { "epoch": 1.9797484975506288, "grad_norm": 0.53515625, "learning_rate": 0.00018155339805825242, "loss": 0.5697, "step": 4901 }, { "epoch": 1.9801525175496186, "grad_norm": 0.58203125, "learning_rate": 0.00018152912621359222, "loss": 0.5362, "step": 4902 }, { "epoch": 1.9805565375486087, "grad_norm": 1.1015625, "learning_rate": 0.000181504854368932, "loss": 0.7898, "step": 4903 }, { "epoch": 1.9809605575475986, "grad_norm": 0.462890625, "learning_rate": 0.00018148058252427182, "loss": 0.5807, "step": 4904 }, { "epoch": 1.9813645775465885, "grad_norm": 0.44921875, "learning_rate": 0.00018145631067961163, "loss": 0.5288, "step": 4905 }, { "epoch": 1.9817685975455785, "grad_norm": 0.6171875, "learning_rate": 0.00018143203883495146, "loss": 0.7049, "step": 4906 }, { "epoch": 1.9821726175445684, "grad_norm": 0.53515625, "learning_rate": 0.00018140776699029126, "loss": 0.608, "step": 4907 }, { "epoch": 1.9825766375435583, "grad_norm": 0.474609375, "learning_rate": 0.00018138349514563103, "loss": 0.5033, "step": 4908 }, { "epoch": 1.9829806575425484, "grad_norm": 0.5390625, "learning_rate": 0.00018135922330097086, "loss": 0.6532, "step": 4909 }, { "epoch": 1.9833846775415385, "grad_norm": 0.58203125, "learning_rate": 0.00018133495145631067, "loss": 0.6683, "step": 4910 }, { "epoch": 1.9837886975405281, "grad_norm": 0.5078125, "learning_rate": 0.00018131067961165047, "loss": 0.5844, "step": 4911 }, { "epoch": 1.9841927175395182, "grad_norm": 0.58203125, "learning_rate": 0.00018128640776699027, "loss": 0.655, "step": 4912 }, { "epoch": 1.9845967375385083, "grad_norm": 0.5234375, "learning_rate": 0.00018126213592233007, "loss": 0.6468, "step": 4913 }, { "epoch": 1.9850007575374982, "grad_norm": 0.69140625, "learning_rate": 0.0001812378640776699, "loss": 0.6575, "step": 4914 }, { "epoch": 1.985404777536488, "grad_norm": 0.58203125, "learning_rate": 0.0001812135922330097, "loss": 0.6421, "step": 4915 }, { "epoch": 1.9858087975354781, "grad_norm": 0.51171875, "learning_rate": 0.00018118932038834948, "loss": 0.5766, "step": 4916 }, { "epoch": 1.986212817534468, "grad_norm": 0.486328125, "learning_rate": 0.0001811650485436893, "loss": 0.6008, "step": 4917 }, { "epoch": 1.9866168375334579, "grad_norm": 0.65234375, "learning_rate": 0.0001811407766990291, "loss": 0.6701, "step": 4918 }, { "epoch": 1.987020857532448, "grad_norm": 0.447265625, "learning_rate": 0.00018111650485436891, "loss": 0.5376, "step": 4919 }, { "epoch": 1.9874248775314378, "grad_norm": 0.6015625, "learning_rate": 0.00018109223300970874, "loss": 0.5981, "step": 4920 }, { "epoch": 1.9878288975304277, "grad_norm": 0.51171875, "learning_rate": 0.00018106796116504852, "loss": 0.5876, "step": 4921 }, { "epoch": 1.9882329175294178, "grad_norm": 0.55078125, "learning_rate": 0.00018104368932038832, "loss": 0.6429, "step": 4922 }, { "epoch": 1.9886369375284076, "grad_norm": 0.5078125, "learning_rate": 0.00018101941747572815, "loss": 0.637, "step": 4923 }, { "epoch": 1.9890409575273975, "grad_norm": 0.49609375, "learning_rate": 0.00018099514563106795, "loss": 0.5877, "step": 4924 }, { "epoch": 1.9894449775263876, "grad_norm": 0.51171875, "learning_rate": 0.00018097087378640776, "loss": 0.6946, "step": 4925 }, { "epoch": 1.9898489975253775, "grad_norm": 0.490234375, "learning_rate": 0.00018094660194174756, "loss": 0.6038, "step": 4926 }, { "epoch": 1.9902530175243673, "grad_norm": 0.48828125, "learning_rate": 0.00018092233009708736, "loss": 0.6368, "step": 4927 }, { "epoch": 1.9906570375233574, "grad_norm": 0.515625, "learning_rate": 0.0001808980582524272, "loss": 0.6769, "step": 4928 }, { "epoch": 1.9910610575223475, "grad_norm": 0.447265625, "learning_rate": 0.00018087378640776696, "loss": 0.5713, "step": 4929 }, { "epoch": 1.9914650775213372, "grad_norm": 0.55859375, "learning_rate": 0.00018084951456310677, "loss": 0.6566, "step": 4930 }, { "epoch": 1.9918690975203273, "grad_norm": 0.55078125, "learning_rate": 0.0001808252427184466, "loss": 0.6129, "step": 4931 }, { "epoch": 1.9922731175193173, "grad_norm": 0.578125, "learning_rate": 0.0001808009708737864, "loss": 0.6452, "step": 4932 }, { "epoch": 1.992677137518307, "grad_norm": 0.5234375, "learning_rate": 0.00018077669902912617, "loss": 0.6099, "step": 4933 }, { "epoch": 1.993081157517297, "grad_norm": 0.546875, "learning_rate": 0.000180752427184466, "loss": 0.6303, "step": 4934 }, { "epoch": 1.9934851775162872, "grad_norm": 0.6015625, "learning_rate": 0.0001807281553398058, "loss": 0.6086, "step": 4935 }, { "epoch": 1.993889197515277, "grad_norm": 0.54296875, "learning_rate": 0.00018070388349514564, "loss": 0.6491, "step": 4936 }, { "epoch": 1.994293217514267, "grad_norm": 0.486328125, "learning_rate": 0.0001806796116504854, "loss": 0.623, "step": 4937 }, { "epoch": 1.994697237513257, "grad_norm": 0.52734375, "learning_rate": 0.0001806553398058252, "loss": 0.6871, "step": 4938 }, { "epoch": 1.9951012575122469, "grad_norm": 0.58203125, "learning_rate": 0.00018063106796116504, "loss": 0.5789, "step": 4939 }, { "epoch": 1.9955052775112367, "grad_norm": 0.53125, "learning_rate": 0.00018060679611650484, "loss": 0.6759, "step": 4940 }, { "epoch": 1.9959092975102268, "grad_norm": 0.46875, "learning_rate": 0.00018058252427184462, "loss": 0.6401, "step": 4941 }, { "epoch": 1.9963133175092167, "grad_norm": 0.5625, "learning_rate": 0.00018055825242718445, "loss": 0.6444, "step": 4942 }, { "epoch": 1.9967173375082066, "grad_norm": 0.498046875, "learning_rate": 0.00018053398058252425, "loss": 0.6397, "step": 4943 }, { "epoch": 1.9971213575071967, "grad_norm": 0.55078125, "learning_rate": 0.00018050970873786408, "loss": 0.571, "step": 4944 }, { "epoch": 1.9975253775061865, "grad_norm": 0.515625, "learning_rate": 0.00018048543689320388, "loss": 0.6403, "step": 4945 }, { "epoch": 1.9979293975051764, "grad_norm": 0.5, "learning_rate": 0.00018046116504854366, "loss": 0.6238, "step": 4946 }, { "epoch": 1.9983334175041665, "grad_norm": 0.5078125, "learning_rate": 0.0001804368932038835, "loss": 0.6702, "step": 4947 }, { "epoch": 1.9987374375031564, "grad_norm": 0.48828125, "learning_rate": 0.0001804126213592233, "loss": 0.6376, "step": 4948 }, { "epoch": 1.9991414575021462, "grad_norm": 0.5234375, "learning_rate": 0.0001803883495145631, "loss": 0.6554, "step": 4949 }, { "epoch": 1.9995454775011363, "grad_norm": 0.490234375, "learning_rate": 0.0001803640776699029, "loss": 0.5481, "step": 4950 }, { "epoch": 1.9999494975001264, "grad_norm": 0.482421875, "learning_rate": 0.0001803398058252427, "loss": 0.5856, "step": 4951 }, { "epoch": 2.0, "grad_norm": 2.3125, "learning_rate": 0.0001803155339805825, "loss": 0.4942, "step": 4952 }, { "epoch": 2.0, "eval_loss": 2.4348788261413574, "eval_runtime": 213.7191, "eval_samples_per_second": 28.739, "eval_steps_per_second": 28.739, "step": 4952 }, { "epoch": 2.0, "eval_title2sid_loss": 0.7016080617904663, "eval_title2sid_runtime": 126.8384, "eval_title2sid_samples_per_second": 30.007, "eval_title2sid_steps_per_second": 30.007, "step": 4952 }, { "epoch": 2.0, "eval_title2sid_loss": 0.7016080617904663, "eval_title2sid_runtime": 126.8384, "eval_title2sid_samples_per_second": 30.007, "eval_title2sid_steps_per_second": 30.007, "step": 4952 }, { "epoch": 2.0, "eval_sid2title_loss": 0.12928932905197144, "eval_sid2title_runtime": 127.9496, "eval_sid2title_samples_per_second": 29.91, "eval_sid2title_steps_per_second": 29.91, "step": 4952 }, { "epoch": 2.0, "eval_sid2title_loss": 0.12928932905197144, "eval_sid2title_runtime": 127.9496, "eval_sid2title_samples_per_second": 29.91, "eval_sid2title_steps_per_second": 29.91, "step": 4952 } ], "logging_steps": 1, "max_steps": 12380, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.383451815677231e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }