| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 606, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.024783147459727387, |
| "grad_norm": 2.3770346641540527, |
| "learning_rate": 8e-05, |
| "loss": 2.289, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.04956629491945477, |
| "grad_norm": 1.5179001092910767, |
| "learning_rate": 0.00018, |
| "loss": 1.6219, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.07434944237918216, |
| "grad_norm": 0.6122242212295532, |
| "learning_rate": 0.0001986577181208054, |
| "loss": 0.9076, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.09913258983890955, |
| "grad_norm": 0.7408744096755981, |
| "learning_rate": 0.00019697986577181208, |
| "loss": 1.0936, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.12391573729863693, |
| "grad_norm": 0.773099958896637, |
| "learning_rate": 0.0001953020134228188, |
| "loss": 1.0541, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.14869888475836432, |
| "grad_norm": 0.5606175661087036, |
| "learning_rate": 0.00019362416107382552, |
| "loss": 0.857, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.1734820322180917, |
| "grad_norm": 0.5474881529808044, |
| "learning_rate": 0.0001919463087248322, |
| "loss": 0.8884, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.1982651796778191, |
| "grad_norm": 0.6062418818473816, |
| "learning_rate": 0.00019026845637583895, |
| "loss": 0.7933, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.22304832713754646, |
| "grad_norm": 0.5491968393325806, |
| "learning_rate": 0.00018859060402684564, |
| "loss": 1.0993, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.24783147459727387, |
| "grad_norm": 0.6508728861808777, |
| "learning_rate": 0.00018691275167785236, |
| "loss": 0.7969, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.27261462205700127, |
| "grad_norm": 0.5214135050773621, |
| "learning_rate": 0.00018523489932885907, |
| "loss": 0.7337, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.29739776951672864, |
| "grad_norm": 0.6263068318367004, |
| "learning_rate": 0.0001835570469798658, |
| "loss": 0.7676, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.322180916976456, |
| "grad_norm": 0.905588686466217, |
| "learning_rate": 0.00018187919463087248, |
| "loss": 0.6215, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.3469640644361834, |
| "grad_norm": 0.7323358654975891, |
| "learning_rate": 0.0001802013422818792, |
| "loss": 0.8739, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.37174721189591076, |
| "grad_norm": 0.7098009586334229, |
| "learning_rate": 0.0001785234899328859, |
| "loss": 0.8578, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.3965303593556382, |
| "grad_norm": 0.6922330260276794, |
| "learning_rate": 0.00017684563758389263, |
| "loss": 0.9465, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.42131350681536556, |
| "grad_norm": 0.5942096710205078, |
| "learning_rate": 0.00017516778523489935, |
| "loss": 0.8533, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.44609665427509293, |
| "grad_norm": 0.5012693405151367, |
| "learning_rate": 0.00017348993288590604, |
| "loss": 0.9949, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.4708798017348203, |
| "grad_norm": 0.6599251627922058, |
| "learning_rate": 0.00017181208053691275, |
| "loss": 0.6861, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.49566294919454773, |
| "grad_norm": 0.6302607655525208, |
| "learning_rate": 0.00017013422818791947, |
| "loss": 0.7804, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.5204460966542751, |
| "grad_norm": 0.4946972727775574, |
| "learning_rate": 0.00016845637583892619, |
| "loss": 0.8112, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.5452292441140025, |
| "grad_norm": 0.5818770527839661, |
| "learning_rate": 0.0001667785234899329, |
| "loss": 0.7273, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.5700123915737298, |
| "grad_norm": 0.8738688826560974, |
| "learning_rate": 0.00016510067114093962, |
| "loss": 0.8921, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.5947955390334573, |
| "grad_norm": 0.6265502572059631, |
| "learning_rate": 0.0001634228187919463, |
| "loss": 0.7654, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.6195786864931846, |
| "grad_norm": 0.6213403344154358, |
| "learning_rate": 0.00016174496644295302, |
| "loss": 0.8194, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.644361833952912, |
| "grad_norm": 0.6503012776374817, |
| "learning_rate": 0.00016006711409395974, |
| "loss": 0.8546, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.6691449814126395, |
| "grad_norm": 0.4928428828716278, |
| "learning_rate": 0.00015838926174496643, |
| "loss": 0.528, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.6939281288723668, |
| "grad_norm": 0.581334114074707, |
| "learning_rate": 0.00015671140939597315, |
| "loss": 0.5692, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.7187112763320942, |
| "grad_norm": 0.7810402512550354, |
| "learning_rate": 0.0001550335570469799, |
| "loss": 1.1702, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.7434944237918215, |
| "grad_norm": 0.6355721354484558, |
| "learning_rate": 0.00015335570469798658, |
| "loss": 0.6699, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.7682775712515489, |
| "grad_norm": 0.521967887878418, |
| "learning_rate": 0.0001516778523489933, |
| "loss": 0.6565, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.7930607187112764, |
| "grad_norm": 0.4311043322086334, |
| "learning_rate": 0.00015000000000000001, |
| "loss": 0.7537, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.8178438661710037, |
| "grad_norm": 0.5549885630607605, |
| "learning_rate": 0.0001483221476510067, |
| "loss": 0.6691, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.8426270136307311, |
| "grad_norm": 0.4813348650932312, |
| "learning_rate": 0.00014664429530201342, |
| "loss": 0.5279, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.8674101610904585, |
| "grad_norm": 0.6353710293769836, |
| "learning_rate": 0.00014496644295302014, |
| "loss": 0.6643, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.8921933085501859, |
| "grad_norm": 0.49870240688323975, |
| "learning_rate": 0.00014328859060402685, |
| "loss": 0.7134, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.9169764560099133, |
| "grad_norm": 0.536192774772644, |
| "learning_rate": 0.00014161073825503357, |
| "loss": 0.6358, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.9417596034696406, |
| "grad_norm": 0.6779680848121643, |
| "learning_rate": 0.0001399328859060403, |
| "loss": 0.716, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.966542750929368, |
| "grad_norm": 0.6344008445739746, |
| "learning_rate": 0.00013825503355704698, |
| "loss": 0.6644, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.9913258983890955, |
| "grad_norm": 0.5447779297828674, |
| "learning_rate": 0.0001365771812080537, |
| "loss": 0.9557, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.0148698884758365, |
| "grad_norm": 0.45053160190582275, |
| "learning_rate": 0.0001348993288590604, |
| "loss": 0.6475, |
| "step": 205 |
| }, |
| { |
| "epoch": 1.0396530359355638, |
| "grad_norm": 0.3626702129840851, |
| "learning_rate": 0.0001332214765100671, |
| "loss": 0.3968, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.0644361833952911, |
| "grad_norm": 0.6345260739326477, |
| "learning_rate": 0.00013154362416107384, |
| "loss": 0.6513, |
| "step": 215 |
| }, |
| { |
| "epoch": 1.0892193308550187, |
| "grad_norm": 0.6892905235290527, |
| "learning_rate": 0.00012986577181208056, |
| "loss": 0.7689, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.114002478314746, |
| "grad_norm": 0.5781119465827942, |
| "learning_rate": 0.00012818791946308725, |
| "loss": 0.7809, |
| "step": 225 |
| }, |
| { |
| "epoch": 1.1387856257744733, |
| "grad_norm": 0.5030850768089294, |
| "learning_rate": 0.00012651006711409397, |
| "loss": 0.6872, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.1635687732342008, |
| "grad_norm": 0.7697097063064575, |
| "learning_rate": 0.00012483221476510068, |
| "loss": 0.58, |
| "step": 235 |
| }, |
| { |
| "epoch": 1.1883519206939281, |
| "grad_norm": 0.6653311252593994, |
| "learning_rate": 0.00012315436241610737, |
| "loss": 0.7252, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.2131350681536555, |
| "grad_norm": 0.5599316358566284, |
| "learning_rate": 0.00012147651006711409, |
| "loss": 0.5401, |
| "step": 245 |
| }, |
| { |
| "epoch": 1.2379182156133828, |
| "grad_norm": 0.621579110622406, |
| "learning_rate": 0.00011979865771812082, |
| "loss": 0.6159, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.2627013630731103, |
| "grad_norm": 0.6578307151794434, |
| "learning_rate": 0.00011812080536912754, |
| "loss": 0.4616, |
| "step": 255 |
| }, |
| { |
| "epoch": 1.2874845105328376, |
| "grad_norm": 0.6472283601760864, |
| "learning_rate": 0.00011644295302013424, |
| "loss": 0.4225, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.3122676579925652, |
| "grad_norm": 0.5076863765716553, |
| "learning_rate": 0.00011476510067114094, |
| "loss": 0.5308, |
| "step": 265 |
| }, |
| { |
| "epoch": 1.3370508054522925, |
| "grad_norm": 0.5231316089630127, |
| "learning_rate": 0.00011308724832214766, |
| "loss": 0.5059, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.3618339529120198, |
| "grad_norm": 0.7917241454124451, |
| "learning_rate": 0.00011140939597315436, |
| "loss": 0.4856, |
| "step": 275 |
| }, |
| { |
| "epoch": 1.3866171003717471, |
| "grad_norm": 0.616142988204956, |
| "learning_rate": 0.00010973154362416106, |
| "loss": 0.4773, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.4114002478314747, |
| "grad_norm": 0.6872631311416626, |
| "learning_rate": 0.0001080536912751678, |
| "loss": 0.5274, |
| "step": 285 |
| }, |
| { |
| "epoch": 1.436183395291202, |
| "grad_norm": 0.7652924656867981, |
| "learning_rate": 0.00010637583892617451, |
| "loss": 0.495, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.4609665427509293, |
| "grad_norm": 0.3928225636482239, |
| "learning_rate": 0.00010469798657718121, |
| "loss": 0.3075, |
| "step": 295 |
| }, |
| { |
| "epoch": 1.4857496902106568, |
| "grad_norm": 0.5579555630683899, |
| "learning_rate": 0.00010302013422818793, |
| "loss": 0.5002, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.5105328376703842, |
| "grad_norm": 0.530565083026886, |
| "learning_rate": 0.00010134228187919463, |
| "loss": 0.712, |
| "step": 305 |
| }, |
| { |
| "epoch": 1.5353159851301115, |
| "grad_norm": 0.9171392321586609, |
| "learning_rate": 9.966442953020134e-05, |
| "loss": 0.5517, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.5600991325898388, |
| "grad_norm": 0.7776666879653931, |
| "learning_rate": 9.798657718120807e-05, |
| "loss": 0.4571, |
| "step": 315 |
| }, |
| { |
| "epoch": 1.5848822800495663, |
| "grad_norm": 0.672423243522644, |
| "learning_rate": 9.630872483221477e-05, |
| "loss": 0.5058, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.6096654275092936, |
| "grad_norm": 0.5513662695884705, |
| "learning_rate": 9.463087248322147e-05, |
| "loss": 0.63, |
| "step": 325 |
| }, |
| { |
| "epoch": 1.6344485749690212, |
| "grad_norm": 0.4629589319229126, |
| "learning_rate": 9.295302013422819e-05, |
| "loss": 0.4052, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.6592317224287485, |
| "grad_norm": 0.6519562005996704, |
| "learning_rate": 9.127516778523491e-05, |
| "loss": 0.5305, |
| "step": 335 |
| }, |
| { |
| "epoch": 1.6840148698884758, |
| "grad_norm": 0.5246181488037109, |
| "learning_rate": 8.959731543624161e-05, |
| "loss": 0.4193, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.7087980173482031, |
| "grad_norm": 0.7614356279373169, |
| "learning_rate": 8.791946308724833e-05, |
| "loss": 0.5908, |
| "step": 345 |
| }, |
| { |
| "epoch": 1.7335811648079305, |
| "grad_norm": 0.6947569847106934, |
| "learning_rate": 8.624161073825504e-05, |
| "loss": 0.4335, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.758364312267658, |
| "grad_norm": 0.4542942941188812, |
| "learning_rate": 8.456375838926175e-05, |
| "loss": 0.6186, |
| "step": 355 |
| }, |
| { |
| "epoch": 1.7831474597273855, |
| "grad_norm": 0.563624382019043, |
| "learning_rate": 8.288590604026846e-05, |
| "loss": 0.4605, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.8079306071871128, |
| "grad_norm": 0.5522857904434204, |
| "learning_rate": 8.120805369127518e-05, |
| "loss": 0.5918, |
| "step": 365 |
| }, |
| { |
| "epoch": 1.8327137546468402, |
| "grad_norm": 0.3432987928390503, |
| "learning_rate": 7.953020134228188e-05, |
| "loss": 0.4177, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.8574969021065675, |
| "grad_norm": 0.5656675100326538, |
| "learning_rate": 7.78523489932886e-05, |
| "loss": 0.6676, |
| "step": 375 |
| }, |
| { |
| "epoch": 1.8822800495662948, |
| "grad_norm": 0.5128700733184814, |
| "learning_rate": 7.61744966442953e-05, |
| "loss": 0.5425, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.9070631970260223, |
| "grad_norm": 0.6921593546867371, |
| "learning_rate": 7.449664429530202e-05, |
| "loss": 0.4439, |
| "step": 385 |
| }, |
| { |
| "epoch": 1.9318463444857497, |
| "grad_norm": 0.6611471176147461, |
| "learning_rate": 7.281879194630872e-05, |
| "loss": 0.5939, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.9566294919454772, |
| "grad_norm": 0.8047774434089661, |
| "learning_rate": 7.114093959731544e-05, |
| "loss": 0.7568, |
| "step": 395 |
| }, |
| { |
| "epoch": 1.9814126394052045, |
| "grad_norm": 0.6147902607917786, |
| "learning_rate": 6.946308724832216e-05, |
| "loss": 0.5265, |
| "step": 400 |
| }, |
| { |
| "epoch": 2.0049566294919456, |
| "grad_norm": 0.4429222643375397, |
| "learning_rate": 6.778523489932886e-05, |
| "loss": 0.5753, |
| "step": 405 |
| }, |
| { |
| "epoch": 2.029739776951673, |
| "grad_norm": 0.49188828468322754, |
| "learning_rate": 6.610738255033558e-05, |
| "loss": 0.4439, |
| "step": 410 |
| }, |
| { |
| "epoch": 2.0545229244114003, |
| "grad_norm": 0.7511292695999146, |
| "learning_rate": 6.442953020134228e-05, |
| "loss": 0.3947, |
| "step": 415 |
| }, |
| { |
| "epoch": 2.0793060718711276, |
| "grad_norm": 0.5809934735298157, |
| "learning_rate": 6.2751677852349e-05, |
| "loss": 0.3935, |
| "step": 420 |
| }, |
| { |
| "epoch": 2.104089219330855, |
| "grad_norm": 1.1626038551330566, |
| "learning_rate": 6.107382550335571e-05, |
| "loss": 0.388, |
| "step": 425 |
| }, |
| { |
| "epoch": 2.1288723667905822, |
| "grad_norm": 0.6084951758384705, |
| "learning_rate": 5.9395973154362415e-05, |
| "loss": 0.4371, |
| "step": 430 |
| }, |
| { |
| "epoch": 2.15365551425031, |
| "grad_norm": 0.5379754304885864, |
| "learning_rate": 5.771812080536914e-05, |
| "loss": 0.3975, |
| "step": 435 |
| }, |
| { |
| "epoch": 2.1784386617100373, |
| "grad_norm": 0.4898189902305603, |
| "learning_rate": 5.604026845637584e-05, |
| "loss": 0.5212, |
| "step": 440 |
| }, |
| { |
| "epoch": 2.2032218091697646, |
| "grad_norm": 0.8490257859230042, |
| "learning_rate": 5.436241610738255e-05, |
| "loss": 0.3889, |
| "step": 445 |
| }, |
| { |
| "epoch": 2.228004956629492, |
| "grad_norm": 0.8492045998573303, |
| "learning_rate": 5.268456375838926e-05, |
| "loss": 0.354, |
| "step": 450 |
| }, |
| { |
| "epoch": 2.2527881040892193, |
| "grad_norm": 0.3837261497974396, |
| "learning_rate": 5.100671140939598e-05, |
| "loss": 0.2432, |
| "step": 455 |
| }, |
| { |
| "epoch": 2.2775712515489466, |
| "grad_norm": 0.617027759552002, |
| "learning_rate": 4.932885906040269e-05, |
| "loss": 0.298, |
| "step": 460 |
| }, |
| { |
| "epoch": 2.3023543990086743, |
| "grad_norm": 0.6203614473342896, |
| "learning_rate": 4.76510067114094e-05, |
| "loss": 0.4173, |
| "step": 465 |
| }, |
| { |
| "epoch": 2.3271375464684017, |
| "grad_norm": 0.6991643309593201, |
| "learning_rate": 4.597315436241611e-05, |
| "loss": 0.2615, |
| "step": 470 |
| }, |
| { |
| "epoch": 2.351920693928129, |
| "grad_norm": 0.8322747349739075, |
| "learning_rate": 4.4295302013422824e-05, |
| "loss": 0.354, |
| "step": 475 |
| }, |
| { |
| "epoch": 2.3767038413878563, |
| "grad_norm": 0.8549102544784546, |
| "learning_rate": 4.2617449664429534e-05, |
| "loss": 0.3329, |
| "step": 480 |
| }, |
| { |
| "epoch": 2.4014869888475836, |
| "grad_norm": 0.7914723753929138, |
| "learning_rate": 4.0939597315436244e-05, |
| "loss": 0.4457, |
| "step": 485 |
| }, |
| { |
| "epoch": 2.426270136307311, |
| "grad_norm": 0.24238666892051697, |
| "learning_rate": 3.9261744966442954e-05, |
| "loss": 0.1957, |
| "step": 490 |
| }, |
| { |
| "epoch": 2.4510532837670382, |
| "grad_norm": 0.7603949904441833, |
| "learning_rate": 3.758389261744967e-05, |
| "loss": 0.363, |
| "step": 495 |
| }, |
| { |
| "epoch": 2.4758364312267656, |
| "grad_norm": 0.5621479153633118, |
| "learning_rate": 3.5906040268456373e-05, |
| "loss": 0.1987, |
| "step": 500 |
| }, |
| { |
| "epoch": 2.5006195786864933, |
| "grad_norm": 0.6900079846382141, |
| "learning_rate": 3.422818791946309e-05, |
| "loss": 0.3846, |
| "step": 505 |
| }, |
| { |
| "epoch": 2.5254027261462206, |
| "grad_norm": 0.7483950853347778, |
| "learning_rate": 3.25503355704698e-05, |
| "loss": 0.4089, |
| "step": 510 |
| }, |
| { |
| "epoch": 2.550185873605948, |
| "grad_norm": 0.7797295451164246, |
| "learning_rate": 3.087248322147651e-05, |
| "loss": 0.3883, |
| "step": 515 |
| }, |
| { |
| "epoch": 2.5749690210656753, |
| "grad_norm": 1.0466924905776978, |
| "learning_rate": 2.9194630872483227e-05, |
| "loss": 0.5279, |
| "step": 520 |
| }, |
| { |
| "epoch": 2.5997521685254026, |
| "grad_norm": 0.9217528700828552, |
| "learning_rate": 2.7516778523489933e-05, |
| "loss": 0.4031, |
| "step": 525 |
| }, |
| { |
| "epoch": 2.6245353159851303, |
| "grad_norm": 0.7957612872123718, |
| "learning_rate": 2.5838926174496646e-05, |
| "loss": 0.3163, |
| "step": 530 |
| }, |
| { |
| "epoch": 2.6493184634448577, |
| "grad_norm": 0.7865810990333557, |
| "learning_rate": 2.416107382550336e-05, |
| "loss": 0.5586, |
| "step": 535 |
| }, |
| { |
| "epoch": 2.674101610904585, |
| "grad_norm": 0.5190469026565552, |
| "learning_rate": 2.248322147651007e-05, |
| "loss": 0.2444, |
| "step": 540 |
| }, |
| { |
| "epoch": 2.6988847583643123, |
| "grad_norm": 0.790671706199646, |
| "learning_rate": 2.080536912751678e-05, |
| "loss": 0.6483, |
| "step": 545 |
| }, |
| { |
| "epoch": 2.7236679058240396, |
| "grad_norm": 0.9736090302467346, |
| "learning_rate": 1.9127516778523493e-05, |
| "loss": 0.3612, |
| "step": 550 |
| }, |
| { |
| "epoch": 2.748451053283767, |
| "grad_norm": 0.7225416898727417, |
| "learning_rate": 1.7449664429530202e-05, |
| "loss": 0.3578, |
| "step": 555 |
| }, |
| { |
| "epoch": 2.7732342007434942, |
| "grad_norm": 0.5790948867797852, |
| "learning_rate": 1.5771812080536912e-05, |
| "loss": 0.2573, |
| "step": 560 |
| }, |
| { |
| "epoch": 2.7980173482032216, |
| "grad_norm": 0.6138997673988342, |
| "learning_rate": 1.4093959731543624e-05, |
| "loss": 0.5458, |
| "step": 565 |
| }, |
| { |
| "epoch": 2.8228004956629493, |
| "grad_norm": 0.5243539810180664, |
| "learning_rate": 1.2416107382550337e-05, |
| "loss": 0.3168, |
| "step": 570 |
| }, |
| { |
| "epoch": 2.8475836431226766, |
| "grad_norm": 1.29122793674469, |
| "learning_rate": 1.0738255033557047e-05, |
| "loss": 0.5166, |
| "step": 575 |
| }, |
| { |
| "epoch": 2.872366790582404, |
| "grad_norm": 0.6179217100143433, |
| "learning_rate": 9.060402684563759e-06, |
| "loss": 0.2481, |
| "step": 580 |
| }, |
| { |
| "epoch": 2.8971499380421313, |
| "grad_norm": 0.7408438324928284, |
| "learning_rate": 7.382550335570471e-06, |
| "loss": 0.2677, |
| "step": 585 |
| }, |
| { |
| "epoch": 2.9219330855018586, |
| "grad_norm": 0.4825621247291565, |
| "learning_rate": 5.704697986577182e-06, |
| "loss": 0.3451, |
| "step": 590 |
| }, |
| { |
| "epoch": 2.9467162329615864, |
| "grad_norm": 0.9304247498512268, |
| "learning_rate": 4.026845637583892e-06, |
| "loss": 0.3597, |
| "step": 595 |
| }, |
| { |
| "epoch": 2.9714993804213137, |
| "grad_norm": 0.7385866045951843, |
| "learning_rate": 2.3489932885906044e-06, |
| "loss": 0.5111, |
| "step": 600 |
| }, |
| { |
| "epoch": 2.996282527881041, |
| "grad_norm": 0.7714264988899231, |
| "learning_rate": 6.711409395973154e-07, |
| "loss": 0.3069, |
| "step": 605 |
| }, |
| { |
| "epoch": 3.0, |
| "step": 606, |
| "total_flos": 2.9833309199010816e+16, |
| "train_loss": 0.5936956948327152, |
| "train_runtime": 983.9366, |
| "train_samples_per_second": 4.921, |
| "train_steps_per_second": 0.616 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 606, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.9833309199010816e+16, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|