| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 99.5475113122172, |
| "eval_steps": 20000, |
| "global_step": 308000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.03232062055591468, |
| "grad_norm": 122.48411560058594, |
| "learning_rate": 9.900000000000002e-06, |
| "loss": 5.087, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.06464124111182935, |
| "grad_norm": 87.30136108398438, |
| "learning_rate": 1.9900000000000003e-05, |
| "loss": 4.5234, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.09696186166774402, |
| "grad_norm": 14.430279731750488, |
| "learning_rate": 2.9900000000000002e-05, |
| "loss": 4.1619, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.1292824822236587, |
| "grad_norm": 7.7585768699646, |
| "learning_rate": 3.99e-05, |
| "loss": 4.0647, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.16160310277957338, |
| "grad_norm": 29.10972785949707, |
| "learning_rate": 4.99e-05, |
| "loss": 4.0281, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.19392372333548805, |
| "grad_norm": 2.4226176738739014, |
| "learning_rate": 5.9900000000000006e-05, |
| "loss": 3.9624, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.22624434389140272, |
| "grad_norm": 6.9928483963012695, |
| "learning_rate": 6.99e-05, |
| "loss": 3.9599, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.2585649644473174, |
| "grad_norm": 2.1512234210968018, |
| "learning_rate": 7.99e-05, |
| "loss": 3.9121, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.2908855850032321, |
| "grad_norm": 2.635658025741577, |
| "learning_rate": 8.989999999999999e-05, |
| "loss": 3.8726, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.32320620555914675, |
| "grad_norm": 2.5363662242889404, |
| "learning_rate": 9.99e-05, |
| "loss": 3.8475, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.3555268261150614, |
| "grad_norm": 2.313490390777588, |
| "learning_rate": 0.0001099, |
| "loss": 3.8057, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.3878474466709761, |
| "grad_norm": 4.086860656738281, |
| "learning_rate": 0.00011990000000000001, |
| "loss": 3.7642, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.42016806722689076, |
| "grad_norm": 2.341607093811035, |
| "learning_rate": 0.00012989999999999999, |
| "loss": 3.7752, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.45248868778280543, |
| "grad_norm": 2.455345630645752, |
| "learning_rate": 0.0001399, |
| "loss": 3.759, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.4848093083387201, |
| "grad_norm": 2.450104236602783, |
| "learning_rate": 0.0001499, |
| "loss": 3.7248, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.5171299288946348, |
| "grad_norm": 15.239123344421387, |
| "learning_rate": 0.00015989999999999998, |
| "loss": 3.7233, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.5494505494505495, |
| "grad_norm": 2.546140670776367, |
| "learning_rate": 0.0001699, |
| "loss": 3.6952, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.5817711700064642, |
| "grad_norm": 2.6291635036468506, |
| "learning_rate": 0.0001799, |
| "loss": 3.6845, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.6140917905623788, |
| "grad_norm": 2.4870760440826416, |
| "learning_rate": 0.0001899, |
| "loss": 3.6955, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.6464124111182935, |
| "grad_norm": 2.939796209335327, |
| "learning_rate": 0.0001999, |
| "loss": 3.6431, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.6787330316742082, |
| "grad_norm": 2.5228323936462402, |
| "learning_rate": 0.0002099, |
| "loss": 3.6324, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.7110536522301228, |
| "grad_norm": 2.6174564361572266, |
| "learning_rate": 0.0002199, |
| "loss": 3.6422, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.7433742727860375, |
| "grad_norm": 2.353490114212036, |
| "learning_rate": 0.0002299, |
| "loss": 3.6162, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.7756948933419522, |
| "grad_norm": 2.3185763359069824, |
| "learning_rate": 0.0002399, |
| "loss": 3.6186, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.8080155138978669, |
| "grad_norm": 3.45831036567688, |
| "learning_rate": 0.0002499, |
| "loss": 3.5984, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.8403361344537815, |
| "grad_norm": 2.644831895828247, |
| "learning_rate": 0.00025990000000000003, |
| "loss": 3.5993, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.8726567550096962, |
| "grad_norm": 2.6962873935699463, |
| "learning_rate": 0.0002699, |
| "loss": 3.6063, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.9049773755656109, |
| "grad_norm": 2.297571897506714, |
| "learning_rate": 0.0002799, |
| "loss": 3.6051, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.9372979961215255, |
| "grad_norm": 2.1946680545806885, |
| "learning_rate": 0.0002899, |
| "loss": 3.5727, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.9696186166774402, |
| "grad_norm": 2.3791117668151855, |
| "learning_rate": 0.0002999, |
| "loss": 3.5559, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.0019392372333549, |
| "grad_norm": 1.4081765413284302, |
| "learning_rate": 0.0003099, |
| "loss": 3.5623, |
| "step": 3100 |
| }, |
| { |
| "epoch": 1.0342598577892697, |
| "grad_norm": 1.5577131509780884, |
| "learning_rate": 0.0003199, |
| "loss": 3.5032, |
| "step": 3200 |
| }, |
| { |
| "epoch": 1.0665804783451842, |
| "grad_norm": 1.6537963151931763, |
| "learning_rate": 0.00032990000000000005, |
| "loss": 3.5059, |
| "step": 3300 |
| }, |
| { |
| "epoch": 1.098901098901099, |
| "grad_norm": 1.1358909606933594, |
| "learning_rate": 0.00033989999999999997, |
| "loss": 3.4895, |
| "step": 3400 |
| }, |
| { |
| "epoch": 1.1312217194570136, |
| "grad_norm": 1.8418935537338257, |
| "learning_rate": 0.0003499, |
| "loss": 3.5043, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.1635423400129283, |
| "grad_norm": 1.2883477210998535, |
| "learning_rate": 0.0003599, |
| "loss": 3.4928, |
| "step": 3600 |
| }, |
| { |
| "epoch": 1.195862960568843, |
| "grad_norm": 2.2627828121185303, |
| "learning_rate": 0.0003699, |
| "loss": 3.484, |
| "step": 3700 |
| }, |
| { |
| "epoch": 1.2281835811247577, |
| "grad_norm": 1.3333110809326172, |
| "learning_rate": 0.0003799, |
| "loss": 3.4796, |
| "step": 3800 |
| }, |
| { |
| "epoch": 1.2605042016806722, |
| "grad_norm": 1.2305139303207397, |
| "learning_rate": 0.00038990000000000004, |
| "loss": 3.4748, |
| "step": 3900 |
| }, |
| { |
| "epoch": 1.292824822236587, |
| "grad_norm": 1.0505295991897583, |
| "learning_rate": 0.00039989999999999996, |
| "loss": 3.4668, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.3251454427925016, |
| "grad_norm": 1.9814112186431885, |
| "learning_rate": 0.0004099, |
| "loss": 3.472, |
| "step": 4100 |
| }, |
| { |
| "epoch": 1.3574660633484164, |
| "grad_norm": 1.0137572288513184, |
| "learning_rate": 0.0004199, |
| "loss": 3.476, |
| "step": 4200 |
| }, |
| { |
| "epoch": 1.389786683904331, |
| "grad_norm": 1.4311473369598389, |
| "learning_rate": 0.0004299, |
| "loss": 3.4757, |
| "step": 4300 |
| }, |
| { |
| "epoch": 1.4221073044602457, |
| "grad_norm": 1.5800271034240723, |
| "learning_rate": 0.0004399, |
| "loss": 3.4591, |
| "step": 4400 |
| }, |
| { |
| "epoch": 1.4544279250161603, |
| "grad_norm": 1.2509582042694092, |
| "learning_rate": 0.00044990000000000004, |
| "loss": 3.4495, |
| "step": 4500 |
| }, |
| { |
| "epoch": 1.486748545572075, |
| "grad_norm": 1.3119666576385498, |
| "learning_rate": 0.0004599, |
| "loss": 3.4512, |
| "step": 4600 |
| }, |
| { |
| "epoch": 1.5190691661279896, |
| "grad_norm": 1.1141108274459839, |
| "learning_rate": 0.0004699, |
| "loss": 3.4655, |
| "step": 4700 |
| }, |
| { |
| "epoch": 1.5513897866839044, |
| "grad_norm": 1.295493721961975, |
| "learning_rate": 0.0004799, |
| "loss": 3.4512, |
| "step": 4800 |
| }, |
| { |
| "epoch": 1.5837104072398192, |
| "grad_norm": 0.9966570734977722, |
| "learning_rate": 0.0004899, |
| "loss": 3.4436, |
| "step": 4900 |
| }, |
| { |
| "epoch": 1.6160310277957337, |
| "grad_norm": 0.8886629939079285, |
| "learning_rate": 0.0004999000000000001, |
| "loss": 3.4193, |
| "step": 5000 |
| }, |
| { |
| "epoch": 1.6483516483516483, |
| "grad_norm": 1.174604892730713, |
| "learning_rate": 0.0005099, |
| "loss": 3.4435, |
| "step": 5100 |
| }, |
| { |
| "epoch": 1.680672268907563, |
| "grad_norm": 1.2078009843826294, |
| "learning_rate": 0.0005199, |
| "loss": 3.4152, |
| "step": 5200 |
| }, |
| { |
| "epoch": 1.7129928894634778, |
| "grad_norm": 1.044562816619873, |
| "learning_rate": 0.0005299, |
| "loss": 3.4225, |
| "step": 5300 |
| }, |
| { |
| "epoch": 1.7453135100193924, |
| "grad_norm": 1.065690517425537, |
| "learning_rate": 0.0005399000000000001, |
| "loss": 3.4187, |
| "step": 5400 |
| }, |
| { |
| "epoch": 1.777634130575307, |
| "grad_norm": 0.9627252817153931, |
| "learning_rate": 0.0005499000000000001, |
| "loss": 3.4267, |
| "step": 5500 |
| }, |
| { |
| "epoch": 1.8099547511312217, |
| "grad_norm": 0.9242098331451416, |
| "learning_rate": 0.0005599, |
| "loss": 3.4098, |
| "step": 5600 |
| }, |
| { |
| "epoch": 1.8422753716871365, |
| "grad_norm": 1.0979822874069214, |
| "learning_rate": 0.0005698999999999999, |
| "loss": 3.4007, |
| "step": 5700 |
| }, |
| { |
| "epoch": 1.874595992243051, |
| "grad_norm": 0.9085142016410828, |
| "learning_rate": 0.0005799, |
| "loss": 3.3882, |
| "step": 5800 |
| }, |
| { |
| "epoch": 1.9069166127989656, |
| "grad_norm": 1.1204533576965332, |
| "learning_rate": 0.0005899, |
| "loss": 3.3966, |
| "step": 5900 |
| }, |
| { |
| "epoch": 1.9392372333548804, |
| "grad_norm": 1.6729990243911743, |
| "learning_rate": 0.0005999, |
| "loss": 3.391, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.9715578539107952, |
| "grad_norm": 1.193272590637207, |
| "learning_rate": 0.0006099, |
| "loss": 3.398, |
| "step": 6100 |
| }, |
| { |
| "epoch": 2.0038784744667097, |
| "grad_norm": 0.9911984801292419, |
| "learning_rate": 0.0006199, |
| "loss": 3.3991, |
| "step": 6200 |
| }, |
| { |
| "epoch": 2.0361990950226243, |
| "grad_norm": 1.0938817262649536, |
| "learning_rate": 0.0006299000000000001, |
| "loss": 3.2833, |
| "step": 6300 |
| }, |
| { |
| "epoch": 2.0685197155785393, |
| "grad_norm": 1.0412477254867554, |
| "learning_rate": 0.0006399, |
| "loss": 3.3089, |
| "step": 6400 |
| }, |
| { |
| "epoch": 2.100840336134454, |
| "grad_norm": 1.0674519538879395, |
| "learning_rate": 0.0006499, |
| "loss": 3.3082, |
| "step": 6500 |
| }, |
| { |
| "epoch": 2.1331609566903684, |
| "grad_norm": 1.0387150049209595, |
| "learning_rate": 0.0006599, |
| "loss": 3.2791, |
| "step": 6600 |
| }, |
| { |
| "epoch": 2.165481577246283, |
| "grad_norm": 1.2339116334915161, |
| "learning_rate": 0.0006699000000000001, |
| "loss": 3.3188, |
| "step": 6700 |
| }, |
| { |
| "epoch": 2.197802197802198, |
| "grad_norm": 1.027089238166809, |
| "learning_rate": 0.0006799, |
| "loss": 3.3212, |
| "step": 6800 |
| }, |
| { |
| "epoch": 2.2301228183581125, |
| "grad_norm": 0.8910061120986938, |
| "learning_rate": 0.0006899, |
| "loss": 3.303, |
| "step": 6900 |
| }, |
| { |
| "epoch": 2.262443438914027, |
| "grad_norm": 1.0685038566589355, |
| "learning_rate": 0.0006998999999999999, |
| "loss": 3.3141, |
| "step": 7000 |
| }, |
| { |
| "epoch": 2.2947640594699417, |
| "grad_norm": 1.1801034212112427, |
| "learning_rate": 0.0007099, |
| "loss": 3.3029, |
| "step": 7100 |
| }, |
| { |
| "epoch": 2.3270846800258567, |
| "grad_norm": 0.927495539188385, |
| "learning_rate": 0.0007199, |
| "loss": 3.3018, |
| "step": 7200 |
| }, |
| { |
| "epoch": 2.3594053005817712, |
| "grad_norm": 0.9997018575668335, |
| "learning_rate": 0.0007299, |
| "loss": 3.3107, |
| "step": 7300 |
| }, |
| { |
| "epoch": 2.391725921137686, |
| "grad_norm": 0.9022483825683594, |
| "learning_rate": 0.0007399, |
| "loss": 3.3105, |
| "step": 7400 |
| }, |
| { |
| "epoch": 2.4240465416936003, |
| "grad_norm": 1.1087478399276733, |
| "learning_rate": 0.0007499000000000001, |
| "loss": 3.3093, |
| "step": 7500 |
| }, |
| { |
| "epoch": 2.4563671622495153, |
| "grad_norm": 0.8586679100990295, |
| "learning_rate": 0.0007599, |
| "loss": 3.2884, |
| "step": 7600 |
| }, |
| { |
| "epoch": 2.48868778280543, |
| "grad_norm": 0.9941036105155945, |
| "learning_rate": 0.0007699, |
| "loss": 3.286, |
| "step": 7700 |
| }, |
| { |
| "epoch": 2.5210084033613445, |
| "grad_norm": 0.8098440766334534, |
| "learning_rate": 0.0007799, |
| "loss": 3.292, |
| "step": 7800 |
| }, |
| { |
| "epoch": 2.553329023917259, |
| "grad_norm": 0.8971424102783203, |
| "learning_rate": 0.0007899000000000001, |
| "loss": 3.3003, |
| "step": 7900 |
| }, |
| { |
| "epoch": 2.585649644473174, |
| "grad_norm": 0.7246127724647522, |
| "learning_rate": 0.0007999000000000001, |
| "loss": 3.2941, |
| "step": 8000 |
| }, |
| { |
| "epoch": 2.6179702650290886, |
| "grad_norm": 1.125386118888855, |
| "learning_rate": 0.0008099, |
| "loss": 3.2923, |
| "step": 8100 |
| }, |
| { |
| "epoch": 2.650290885585003, |
| "grad_norm": 0.8445305228233337, |
| "learning_rate": 0.0008198999999999999, |
| "loss": 3.2854, |
| "step": 8200 |
| }, |
| { |
| "epoch": 2.682611506140918, |
| "grad_norm": 0.9022735953330994, |
| "learning_rate": 0.0008299, |
| "loss": 3.3128, |
| "step": 8300 |
| }, |
| { |
| "epoch": 2.7149321266968327, |
| "grad_norm": 2.568601608276367, |
| "learning_rate": 0.0008399, |
| "loss": 3.2935, |
| "step": 8400 |
| }, |
| { |
| "epoch": 2.7472527472527473, |
| "grad_norm": 0.9319080114364624, |
| "learning_rate": 0.0008499, |
| "loss": 3.2836, |
| "step": 8500 |
| }, |
| { |
| "epoch": 2.779573367808662, |
| "grad_norm": 0.9496336579322815, |
| "learning_rate": 0.0008599, |
| "loss": 3.2652, |
| "step": 8600 |
| }, |
| { |
| "epoch": 2.8118939883645764, |
| "grad_norm": 0.9994519948959351, |
| "learning_rate": 0.0008699000000000001, |
| "loss": 3.2707, |
| "step": 8700 |
| }, |
| { |
| "epoch": 2.8442146089204914, |
| "grad_norm": 0.8992823958396912, |
| "learning_rate": 0.0008799000000000001, |
| "loss": 3.2758, |
| "step": 8800 |
| }, |
| { |
| "epoch": 2.876535229476406, |
| "grad_norm": 0.9746232032775879, |
| "learning_rate": 0.0008899, |
| "loss": 3.2662, |
| "step": 8900 |
| }, |
| { |
| "epoch": 2.9088558500323205, |
| "grad_norm": 1.0509666204452515, |
| "learning_rate": 0.0008999, |
| "loss": 3.287, |
| "step": 9000 |
| }, |
| { |
| "epoch": 2.9411764705882355, |
| "grad_norm": 1.1123683452606201, |
| "learning_rate": 0.0009099, |
| "loss": 3.2874, |
| "step": 9100 |
| }, |
| { |
| "epoch": 2.97349709114415, |
| "grad_norm": 0.8263154029846191, |
| "learning_rate": 0.0009199000000000001, |
| "loss": 3.2478, |
| "step": 9200 |
| }, |
| { |
| "epoch": 3.0058177117000646, |
| "grad_norm": 0.9960469007492065, |
| "learning_rate": 0.0009299, |
| "loss": 3.2641, |
| "step": 9300 |
| }, |
| { |
| "epoch": 3.038138332255979, |
| "grad_norm": 0.9817208051681519, |
| "learning_rate": 0.0009399, |
| "loss": 3.1547, |
| "step": 9400 |
| }, |
| { |
| "epoch": 3.070458952811894, |
| "grad_norm": 0.8626974821090698, |
| "learning_rate": 0.0009498999999999999, |
| "loss": 3.1857, |
| "step": 9500 |
| }, |
| { |
| "epoch": 3.1027795733678087, |
| "grad_norm": 0.8734513521194458, |
| "learning_rate": 0.0009599, |
| "loss": 3.1828, |
| "step": 9600 |
| }, |
| { |
| "epoch": 3.1351001939237233, |
| "grad_norm": 0.9288407564163208, |
| "learning_rate": 0.0009699, |
| "loss": 3.1667, |
| "step": 9700 |
| }, |
| { |
| "epoch": 3.167420814479638, |
| "grad_norm": 1.0279629230499268, |
| "learning_rate": 0.0009799, |
| "loss": 3.1817, |
| "step": 9800 |
| }, |
| { |
| "epoch": 3.199741435035553, |
| "grad_norm": 0.8655309677124023, |
| "learning_rate": 0.0009899, |
| "loss": 3.1927, |
| "step": 9900 |
| }, |
| { |
| "epoch": 3.2320620555914674, |
| "grad_norm": 0.8720046877861023, |
| "learning_rate": 0.0009999, |
| "loss": 3.1844, |
| "step": 10000 |
| }, |
| { |
| "epoch": 3.264382676147382, |
| "grad_norm": 0.9178308844566345, |
| "learning_rate": 0.001, |
| "loss": 3.1809, |
| "step": 10100 |
| }, |
| { |
| "epoch": 3.2967032967032965, |
| "grad_norm": 1.1305406093597412, |
| "learning_rate": 0.001, |
| "loss": 3.1835, |
| "step": 10200 |
| }, |
| { |
| "epoch": 3.3290239172592115, |
| "grad_norm": 1.032468318939209, |
| "learning_rate": 0.001, |
| "loss": 3.1599, |
| "step": 10300 |
| }, |
| { |
| "epoch": 3.361344537815126, |
| "grad_norm": 0.9638125896453857, |
| "learning_rate": 0.001, |
| "loss": 3.1734, |
| "step": 10400 |
| }, |
| { |
| "epoch": 3.3936651583710407, |
| "grad_norm": 0.8970602750778198, |
| "learning_rate": 0.001, |
| "loss": 3.1596, |
| "step": 10500 |
| }, |
| { |
| "epoch": 3.425985778926955, |
| "grad_norm": 0.9424933195114136, |
| "learning_rate": 0.001, |
| "loss": 3.1612, |
| "step": 10600 |
| }, |
| { |
| "epoch": 3.45830639948287, |
| "grad_norm": 1.008062720298767, |
| "learning_rate": 0.001, |
| "loss": 3.1634, |
| "step": 10700 |
| }, |
| { |
| "epoch": 3.490627020038785, |
| "grad_norm": 0.8778404593467712, |
| "learning_rate": 0.001, |
| "loss": 3.1653, |
| "step": 10800 |
| }, |
| { |
| "epoch": 3.5229476405946993, |
| "grad_norm": 1.0320971012115479, |
| "learning_rate": 0.001, |
| "loss": 3.1862, |
| "step": 10900 |
| }, |
| { |
| "epoch": 3.555268261150614, |
| "grad_norm": 0.7651787996292114, |
| "learning_rate": 0.001, |
| "loss": 3.1588, |
| "step": 11000 |
| }, |
| { |
| "epoch": 3.587588881706529, |
| "grad_norm": 1.0109679698944092, |
| "learning_rate": 0.001, |
| "loss": 3.1633, |
| "step": 11100 |
| }, |
| { |
| "epoch": 3.6199095022624435, |
| "grad_norm": 1.1707773208618164, |
| "learning_rate": 0.001, |
| "loss": 3.1605, |
| "step": 11200 |
| }, |
| { |
| "epoch": 3.652230122818358, |
| "grad_norm": 0.8497399091720581, |
| "learning_rate": 0.001, |
| "loss": 3.1628, |
| "step": 11300 |
| }, |
| { |
| "epoch": 3.684550743374273, |
| "grad_norm": 1.1759374141693115, |
| "learning_rate": 0.001, |
| "loss": 3.1498, |
| "step": 11400 |
| }, |
| { |
| "epoch": 3.7168713639301876, |
| "grad_norm": 1.3136417865753174, |
| "learning_rate": 0.001, |
| "loss": 3.1711, |
| "step": 11500 |
| }, |
| { |
| "epoch": 3.749191984486102, |
| "grad_norm": 0.8515832424163818, |
| "learning_rate": 0.001, |
| "loss": 3.1654, |
| "step": 11600 |
| }, |
| { |
| "epoch": 3.7815126050420167, |
| "grad_norm": 0.9094183444976807, |
| "learning_rate": 0.001, |
| "loss": 3.1736, |
| "step": 11700 |
| }, |
| { |
| "epoch": 3.8138332255979313, |
| "grad_norm": 0.7091506719589233, |
| "learning_rate": 0.001, |
| "loss": 3.1555, |
| "step": 11800 |
| }, |
| { |
| "epoch": 3.8461538461538463, |
| "grad_norm": 0.7586659789085388, |
| "learning_rate": 0.001, |
| "loss": 3.1599, |
| "step": 11900 |
| }, |
| { |
| "epoch": 3.878474466709761, |
| "grad_norm": 0.8182700276374817, |
| "learning_rate": 0.001, |
| "loss": 3.1678, |
| "step": 12000 |
| }, |
| { |
| "epoch": 3.9107950872656754, |
| "grad_norm": 0.8606002926826477, |
| "learning_rate": 0.001, |
| "loss": 3.1539, |
| "step": 12100 |
| }, |
| { |
| "epoch": 3.9431157078215904, |
| "grad_norm": 1.0754919052124023, |
| "learning_rate": 0.001, |
| "loss": 3.1526, |
| "step": 12200 |
| }, |
| { |
| "epoch": 3.975436328377505, |
| "grad_norm": 1.1159805059432983, |
| "learning_rate": 0.001, |
| "loss": 3.1545, |
| "step": 12300 |
| }, |
| { |
| "epoch": 4.0077569489334195, |
| "grad_norm": 0.8374843597412109, |
| "learning_rate": 0.001, |
| "loss": 3.1302, |
| "step": 12400 |
| }, |
| { |
| "epoch": 4.040077569489334, |
| "grad_norm": 0.8675714135169983, |
| "learning_rate": 0.001, |
| "loss": 3.0011, |
| "step": 12500 |
| }, |
| { |
| "epoch": 4.072398190045249, |
| "grad_norm": 0.9006522297859192, |
| "learning_rate": 0.001, |
| "loss": 3.0227, |
| "step": 12600 |
| }, |
| { |
| "epoch": 4.104718810601163, |
| "grad_norm": 0.8449679613113403, |
| "learning_rate": 0.001, |
| "loss": 3.0373, |
| "step": 12700 |
| }, |
| { |
| "epoch": 4.137039431157079, |
| "grad_norm": 1.1275441646575928, |
| "learning_rate": 0.001, |
| "loss": 3.029, |
| "step": 12800 |
| }, |
| { |
| "epoch": 4.169360051712993, |
| "grad_norm": 0.9259504079818726, |
| "learning_rate": 0.001, |
| "loss": 3.0351, |
| "step": 12900 |
| }, |
| { |
| "epoch": 4.201680672268908, |
| "grad_norm": 1.0209553241729736, |
| "learning_rate": 0.001, |
| "loss": 3.0349, |
| "step": 13000 |
| }, |
| { |
| "epoch": 4.234001292824822, |
| "grad_norm": 0.9716935753822327, |
| "learning_rate": 0.001, |
| "loss": 3.0446, |
| "step": 13100 |
| }, |
| { |
| "epoch": 4.266321913380737, |
| "grad_norm": 0.8766793608665466, |
| "learning_rate": 0.001, |
| "loss": 3.0304, |
| "step": 13200 |
| }, |
| { |
| "epoch": 4.298642533936651, |
| "grad_norm": 0.9728161692619324, |
| "learning_rate": 0.001, |
| "loss": 3.0246, |
| "step": 13300 |
| }, |
| { |
| "epoch": 4.330963154492566, |
| "grad_norm": 0.8230871558189392, |
| "learning_rate": 0.001, |
| "loss": 3.0515, |
| "step": 13400 |
| }, |
| { |
| "epoch": 4.3632837750484805, |
| "grad_norm": 0.8542620539665222, |
| "learning_rate": 0.001, |
| "loss": 3.0577, |
| "step": 13500 |
| }, |
| { |
| "epoch": 4.395604395604396, |
| "grad_norm": 0.7272443175315857, |
| "learning_rate": 0.001, |
| "loss": 3.0452, |
| "step": 13600 |
| }, |
| { |
| "epoch": 4.4279250161603105, |
| "grad_norm": 1.0077018737792969, |
| "learning_rate": 0.001, |
| "loss": 3.0209, |
| "step": 13700 |
| }, |
| { |
| "epoch": 4.460245636716225, |
| "grad_norm": 1.0092633962631226, |
| "learning_rate": 0.001, |
| "loss": 3.05, |
| "step": 13800 |
| }, |
| { |
| "epoch": 4.49256625727214, |
| "grad_norm": 0.7395654916763306, |
| "learning_rate": 0.001, |
| "loss": 3.0595, |
| "step": 13900 |
| }, |
| { |
| "epoch": 4.524886877828054, |
| "grad_norm": 0.9956360459327698, |
| "learning_rate": 0.001, |
| "loss": 3.0167, |
| "step": 14000 |
| }, |
| { |
| "epoch": 4.557207498383969, |
| "grad_norm": 0.9180870652198792, |
| "learning_rate": 0.001, |
| "loss": 3.0449, |
| "step": 14100 |
| }, |
| { |
| "epoch": 4.589528118939883, |
| "grad_norm": 0.9045624732971191, |
| "learning_rate": 0.001, |
| "loss": 3.0553, |
| "step": 14200 |
| }, |
| { |
| "epoch": 4.621848739495798, |
| "grad_norm": 0.795102596282959, |
| "learning_rate": 0.001, |
| "loss": 3.0544, |
| "step": 14300 |
| }, |
| { |
| "epoch": 4.654169360051713, |
| "grad_norm": 1.0817164182662964, |
| "learning_rate": 0.001, |
| "loss": 3.0537, |
| "step": 14400 |
| }, |
| { |
| "epoch": 4.686489980607628, |
| "grad_norm": 0.9697984457015991, |
| "learning_rate": 0.001, |
| "loss": 3.0513, |
| "step": 14500 |
| }, |
| { |
| "epoch": 4.7188106011635425, |
| "grad_norm": 0.903374969959259, |
| "learning_rate": 0.001, |
| "loss": 3.0283, |
| "step": 14600 |
| }, |
| { |
| "epoch": 4.751131221719457, |
| "grad_norm": 1.154241681098938, |
| "learning_rate": 0.001, |
| "loss": 3.0389, |
| "step": 14700 |
| }, |
| { |
| "epoch": 4.783451842275372, |
| "grad_norm": 0.7839650511741638, |
| "learning_rate": 0.001, |
| "loss": 3.0233, |
| "step": 14800 |
| }, |
| { |
| "epoch": 4.815772462831286, |
| "grad_norm": 1.3288406133651733, |
| "learning_rate": 0.001, |
| "loss": 3.0315, |
| "step": 14900 |
| }, |
| { |
| "epoch": 4.848093083387201, |
| "grad_norm": 0.9391863942146301, |
| "learning_rate": 0.001, |
| "loss": 3.0416, |
| "step": 15000 |
| }, |
| { |
| "epoch": 4.880413703943116, |
| "grad_norm": 0.9329890608787537, |
| "learning_rate": 0.001, |
| "loss": 3.0265, |
| "step": 15100 |
| }, |
| { |
| "epoch": 4.912734324499031, |
| "grad_norm": 0.9532723426818848, |
| "learning_rate": 0.001, |
| "loss": 3.038, |
| "step": 15200 |
| }, |
| { |
| "epoch": 4.945054945054945, |
| "grad_norm": 0.9412986040115356, |
| "learning_rate": 0.001, |
| "loss": 3.0333, |
| "step": 15300 |
| }, |
| { |
| "epoch": 4.97737556561086, |
| "grad_norm": 1.205365538597107, |
| "learning_rate": 0.001, |
| "loss": 3.0419, |
| "step": 15400 |
| }, |
| { |
| "epoch": 5.009696186166774, |
| "grad_norm": 0.9483180642127991, |
| "learning_rate": 0.001, |
| "loss": 3.0027, |
| "step": 15500 |
| }, |
| { |
| "epoch": 5.042016806722689, |
| "grad_norm": 1.1812211275100708, |
| "learning_rate": 0.001, |
| "loss": 2.8706, |
| "step": 15600 |
| }, |
| { |
| "epoch": 5.0743374272786035, |
| "grad_norm": 0.8328335881233215, |
| "learning_rate": 0.001, |
| "loss": 2.9088, |
| "step": 15700 |
| }, |
| { |
| "epoch": 5.106658047834518, |
| "grad_norm": 0.9921544790267944, |
| "learning_rate": 0.001, |
| "loss": 2.9154, |
| "step": 15800 |
| }, |
| { |
| "epoch": 5.1389786683904335, |
| "grad_norm": 0.984326183795929, |
| "learning_rate": 0.001, |
| "loss": 2.9198, |
| "step": 15900 |
| }, |
| { |
| "epoch": 5.171299288946348, |
| "grad_norm": 0.9495807886123657, |
| "learning_rate": 0.001, |
| "loss": 2.9274, |
| "step": 16000 |
| }, |
| { |
| "epoch": 5.203619909502263, |
| "grad_norm": 1.0462549924850464, |
| "learning_rate": 0.001, |
| "loss": 2.9166, |
| "step": 16100 |
| }, |
| { |
| "epoch": 5.235940530058177, |
| "grad_norm": 0.8766588568687439, |
| "learning_rate": 0.001, |
| "loss": 2.9068, |
| "step": 16200 |
| }, |
| { |
| "epoch": 5.268261150614092, |
| "grad_norm": 0.7101683616638184, |
| "learning_rate": 0.001, |
| "loss": 2.9257, |
| "step": 16300 |
| }, |
| { |
| "epoch": 5.300581771170006, |
| "grad_norm": 0.957855761051178, |
| "learning_rate": 0.001, |
| "loss": 2.9323, |
| "step": 16400 |
| }, |
| { |
| "epoch": 5.332902391725921, |
| "grad_norm": 1.2239158153533936, |
| "learning_rate": 0.001, |
| "loss": 2.9191, |
| "step": 16500 |
| }, |
| { |
| "epoch": 5.365223012281835, |
| "grad_norm": 0.8347125053405762, |
| "learning_rate": 0.001, |
| "loss": 2.9195, |
| "step": 16600 |
| }, |
| { |
| "epoch": 5.397543632837751, |
| "grad_norm": 0.8946273922920227, |
| "learning_rate": 0.001, |
| "loss": 2.9198, |
| "step": 16700 |
| }, |
| { |
| "epoch": 5.429864253393665, |
| "grad_norm": 0.8327140212059021, |
| "learning_rate": 0.001, |
| "loss": 2.9386, |
| "step": 16800 |
| }, |
| { |
| "epoch": 5.46218487394958, |
| "grad_norm": 0.9591065049171448, |
| "learning_rate": 0.001, |
| "loss": 2.9233, |
| "step": 16900 |
| }, |
| { |
| "epoch": 5.4945054945054945, |
| "grad_norm": 0.9572122693061829, |
| "learning_rate": 0.001, |
| "loss": 2.9417, |
| "step": 17000 |
| }, |
| { |
| "epoch": 5.526826115061409, |
| "grad_norm": 1.2478572130203247, |
| "learning_rate": 0.001, |
| "loss": 2.9227, |
| "step": 17100 |
| }, |
| { |
| "epoch": 5.559146735617324, |
| "grad_norm": 0.7681442499160767, |
| "learning_rate": 0.001, |
| "loss": 2.9318, |
| "step": 17200 |
| }, |
| { |
| "epoch": 5.591467356173238, |
| "grad_norm": 0.9959331750869751, |
| "learning_rate": 0.001, |
| "loss": 2.9099, |
| "step": 17300 |
| }, |
| { |
| "epoch": 5.623787976729153, |
| "grad_norm": 1.1006550788879395, |
| "learning_rate": 0.001, |
| "loss": 2.9279, |
| "step": 17400 |
| }, |
| { |
| "epoch": 5.656108597285068, |
| "grad_norm": 0.9101115465164185, |
| "learning_rate": 0.001, |
| "loss": 2.9323, |
| "step": 17500 |
| }, |
| { |
| "epoch": 5.688429217840983, |
| "grad_norm": 1.1711993217468262, |
| "learning_rate": 0.001, |
| "loss": 2.9567, |
| "step": 17600 |
| }, |
| { |
| "epoch": 5.720749838396897, |
| "grad_norm": 0.9631717205047607, |
| "learning_rate": 0.001, |
| "loss": 2.9452, |
| "step": 17700 |
| }, |
| { |
| "epoch": 5.753070458952812, |
| "grad_norm": 1.028151035308838, |
| "learning_rate": 0.001, |
| "loss": 2.9354, |
| "step": 17800 |
| }, |
| { |
| "epoch": 5.785391079508726, |
| "grad_norm": 0.9212917685508728, |
| "learning_rate": 0.001, |
| "loss": 2.9225, |
| "step": 17900 |
| }, |
| { |
| "epoch": 5.817711700064641, |
| "grad_norm": 1.1386103630065918, |
| "learning_rate": 0.001, |
| "loss": 2.9372, |
| "step": 18000 |
| }, |
| { |
| "epoch": 5.850032320620556, |
| "grad_norm": 0.8606694936752319, |
| "learning_rate": 0.001, |
| "loss": 2.9466, |
| "step": 18100 |
| }, |
| { |
| "epoch": 5.882352941176471, |
| "grad_norm": 0.7786224484443665, |
| "learning_rate": 0.001, |
| "loss": 2.9463, |
| "step": 18200 |
| }, |
| { |
| "epoch": 5.914673561732386, |
| "grad_norm": 0.8389948010444641, |
| "learning_rate": 0.001, |
| "loss": 2.959, |
| "step": 18300 |
| }, |
| { |
| "epoch": 5.9469941822883, |
| "grad_norm": 1.326764464378357, |
| "learning_rate": 0.001, |
| "loss": 2.9324, |
| "step": 18400 |
| }, |
| { |
| "epoch": 5.979314802844215, |
| "grad_norm": 1.221278190612793, |
| "learning_rate": 0.001, |
| "loss": 2.938, |
| "step": 18500 |
| }, |
| { |
| "epoch": 6.011635423400129, |
| "grad_norm": 0.9087767004966736, |
| "learning_rate": 0.001, |
| "loss": 2.8858, |
| "step": 18600 |
| }, |
| { |
| "epoch": 6.043956043956044, |
| "grad_norm": 0.9513673782348633, |
| "learning_rate": 0.001, |
| "loss": 2.7961, |
| "step": 18700 |
| }, |
| { |
| "epoch": 6.076276664511958, |
| "grad_norm": 0.9944157004356384, |
| "learning_rate": 0.001, |
| "loss": 2.7991, |
| "step": 18800 |
| }, |
| { |
| "epoch": 6.108597285067873, |
| "grad_norm": 1.06227707862854, |
| "learning_rate": 0.001, |
| "loss": 2.8068, |
| "step": 18900 |
| }, |
| { |
| "epoch": 6.140917905623788, |
| "grad_norm": 0.8983224034309387, |
| "learning_rate": 0.001, |
| "loss": 2.8201, |
| "step": 19000 |
| }, |
| { |
| "epoch": 6.173238526179703, |
| "grad_norm": 0.7782119512557983, |
| "learning_rate": 0.001, |
| "loss": 2.8152, |
| "step": 19100 |
| }, |
| { |
| "epoch": 6.2055591467356175, |
| "grad_norm": 0.8689744472503662, |
| "learning_rate": 0.001, |
| "loss": 2.8198, |
| "step": 19200 |
| }, |
| { |
| "epoch": 6.237879767291532, |
| "grad_norm": 1.1996628046035767, |
| "learning_rate": 0.001, |
| "loss": 2.8325, |
| "step": 19300 |
| }, |
| { |
| "epoch": 6.270200387847447, |
| "grad_norm": 0.844738245010376, |
| "learning_rate": 0.001, |
| "loss": 2.8508, |
| "step": 19400 |
| }, |
| { |
| "epoch": 6.302521008403361, |
| "grad_norm": 0.9009729027748108, |
| "learning_rate": 0.001, |
| "loss": 2.8238, |
| "step": 19500 |
| }, |
| { |
| "epoch": 6.334841628959276, |
| "grad_norm": 1.2987245321273804, |
| "learning_rate": 0.001, |
| "loss": 2.8231, |
| "step": 19600 |
| }, |
| { |
| "epoch": 6.36716224951519, |
| "grad_norm": 1.0361422300338745, |
| "learning_rate": 0.001, |
| "loss": 2.8338, |
| "step": 19700 |
| }, |
| { |
| "epoch": 6.399482870071106, |
| "grad_norm": 1.0403621196746826, |
| "learning_rate": 0.001, |
| "loss": 2.8508, |
| "step": 19800 |
| }, |
| { |
| "epoch": 6.43180349062702, |
| "grad_norm": 0.9830465912818909, |
| "learning_rate": 0.001, |
| "loss": 2.833, |
| "step": 19900 |
| }, |
| { |
| "epoch": 6.464124111182935, |
| "grad_norm": 0.7783878445625305, |
| "learning_rate": 0.001, |
| "loss": 2.8237, |
| "step": 20000 |
| }, |
| { |
| "epoch": 6.496444731738849, |
| "grad_norm": 0.8542726635932922, |
| "learning_rate": 0.001, |
| "loss": 2.8433, |
| "step": 20100 |
| }, |
| { |
| "epoch": 6.528765352294764, |
| "grad_norm": 0.9122792482376099, |
| "learning_rate": 0.001, |
| "loss": 2.8397, |
| "step": 20200 |
| }, |
| { |
| "epoch": 6.5610859728506785, |
| "grad_norm": 1.1444542407989502, |
| "learning_rate": 0.001, |
| "loss": 2.8342, |
| "step": 20300 |
| }, |
| { |
| "epoch": 6.593406593406593, |
| "grad_norm": 1.118303894996643, |
| "learning_rate": 0.001, |
| "loss": 2.8437, |
| "step": 20400 |
| }, |
| { |
| "epoch": 6.625727213962508, |
| "grad_norm": 1.069883108139038, |
| "learning_rate": 0.001, |
| "loss": 2.8436, |
| "step": 20500 |
| }, |
| { |
| "epoch": 6.658047834518423, |
| "grad_norm": 0.9313819408416748, |
| "learning_rate": 0.001, |
| "loss": 2.8396, |
| "step": 20600 |
| }, |
| { |
| "epoch": 6.690368455074338, |
| "grad_norm": 1.0891497135162354, |
| "learning_rate": 0.001, |
| "loss": 2.8338, |
| "step": 20700 |
| }, |
| { |
| "epoch": 6.722689075630252, |
| "grad_norm": 0.9340724945068359, |
| "learning_rate": 0.001, |
| "loss": 2.8487, |
| "step": 20800 |
| }, |
| { |
| "epoch": 6.755009696186167, |
| "grad_norm": 0.8091602325439453, |
| "learning_rate": 0.001, |
| "loss": 2.8663, |
| "step": 20900 |
| }, |
| { |
| "epoch": 6.787330316742081, |
| "grad_norm": 0.878212034702301, |
| "learning_rate": 0.001, |
| "loss": 2.8463, |
| "step": 21000 |
| }, |
| { |
| "epoch": 6.819650937297996, |
| "grad_norm": 0.7118565440177917, |
| "learning_rate": 0.001, |
| "loss": 2.8466, |
| "step": 21100 |
| }, |
| { |
| "epoch": 6.85197155785391, |
| "grad_norm": 0.7867164611816406, |
| "learning_rate": 0.001, |
| "loss": 2.8717, |
| "step": 21200 |
| }, |
| { |
| "epoch": 6.884292178409826, |
| "grad_norm": 1.037338137626648, |
| "learning_rate": 0.001, |
| "loss": 2.8507, |
| "step": 21300 |
| }, |
| { |
| "epoch": 6.91661279896574, |
| "grad_norm": 1.0783958435058594, |
| "learning_rate": 0.001, |
| "loss": 2.8654, |
| "step": 21400 |
| }, |
| { |
| "epoch": 6.948933419521655, |
| "grad_norm": 1.2687408924102783, |
| "learning_rate": 0.001, |
| "loss": 2.8545, |
| "step": 21500 |
| }, |
| { |
| "epoch": 6.98125404007757, |
| "grad_norm": 1.236606478691101, |
| "learning_rate": 0.001, |
| "loss": 2.8371, |
| "step": 21600 |
| }, |
| { |
| "epoch": 7.013574660633484, |
| "grad_norm": 0.8000703454017639, |
| "learning_rate": 0.001, |
| "loss": 2.7781, |
| "step": 21700 |
| }, |
| { |
| "epoch": 7.045895281189399, |
| "grad_norm": 1.101654052734375, |
| "learning_rate": 0.001, |
| "loss": 2.7135, |
| "step": 21800 |
| }, |
| { |
| "epoch": 7.078215901745313, |
| "grad_norm": 1.2058213949203491, |
| "learning_rate": 0.001, |
| "loss": 2.7033, |
| "step": 21900 |
| }, |
| { |
| "epoch": 7.110536522301228, |
| "grad_norm": 1.1319844722747803, |
| "learning_rate": 0.001, |
| "loss": 2.7142, |
| "step": 22000 |
| }, |
| { |
| "epoch": 7.142857142857143, |
| "grad_norm": 1.2110048532485962, |
| "learning_rate": 0.001, |
| "loss": 2.7249, |
| "step": 22100 |
| }, |
| { |
| "epoch": 7.175177763413058, |
| "grad_norm": 1.1869632005691528, |
| "learning_rate": 0.001, |
| "loss": 2.7229, |
| "step": 22200 |
| }, |
| { |
| "epoch": 7.207498383968972, |
| "grad_norm": 0.8604994416236877, |
| "learning_rate": 0.001, |
| "loss": 2.7444, |
| "step": 22300 |
| }, |
| { |
| "epoch": 7.239819004524887, |
| "grad_norm": 1.1467421054840088, |
| "learning_rate": 0.001, |
| "loss": 2.7492, |
| "step": 22400 |
| }, |
| { |
| "epoch": 7.2721396250808015, |
| "grad_norm": 0.9088888764381409, |
| "learning_rate": 0.001, |
| "loss": 2.7485, |
| "step": 22500 |
| }, |
| { |
| "epoch": 7.304460245636716, |
| "grad_norm": 1.0480924844741821, |
| "learning_rate": 0.001, |
| "loss": 2.7441, |
| "step": 22600 |
| }, |
| { |
| "epoch": 7.336780866192631, |
| "grad_norm": 0.8503157496452332, |
| "learning_rate": 0.001, |
| "loss": 2.7429, |
| "step": 22700 |
| }, |
| { |
| "epoch": 7.369101486748546, |
| "grad_norm": 0.9091809391975403, |
| "learning_rate": 0.001, |
| "loss": 2.765, |
| "step": 22800 |
| }, |
| { |
| "epoch": 7.401422107304461, |
| "grad_norm": 1.3554952144622803, |
| "learning_rate": 0.001, |
| "loss": 2.7709, |
| "step": 22900 |
| }, |
| { |
| "epoch": 7.433742727860375, |
| "grad_norm": 1.0642281770706177, |
| "learning_rate": 0.001, |
| "loss": 2.7586, |
| "step": 23000 |
| }, |
| { |
| "epoch": 7.46606334841629, |
| "grad_norm": 1.150550127029419, |
| "learning_rate": 0.001, |
| "loss": 2.7648, |
| "step": 23100 |
| }, |
| { |
| "epoch": 7.498383968972204, |
| "grad_norm": 0.9647549986839294, |
| "learning_rate": 0.001, |
| "loss": 2.7699, |
| "step": 23200 |
| }, |
| { |
| "epoch": 7.530704589528119, |
| "grad_norm": 1.3209148645401, |
| "learning_rate": 0.001, |
| "loss": 2.7576, |
| "step": 23300 |
| }, |
| { |
| "epoch": 7.563025210084033, |
| "grad_norm": 0.9192158579826355, |
| "learning_rate": 0.001, |
| "loss": 2.7804, |
| "step": 23400 |
| }, |
| { |
| "epoch": 7.595345830639948, |
| "grad_norm": 1.0111809968948364, |
| "learning_rate": 0.001, |
| "loss": 2.7506, |
| "step": 23500 |
| }, |
| { |
| "epoch": 7.6276664511958625, |
| "grad_norm": 0.9270694851875305, |
| "learning_rate": 0.001, |
| "loss": 2.7643, |
| "step": 23600 |
| }, |
| { |
| "epoch": 7.659987071751778, |
| "grad_norm": 1.0087300539016724, |
| "learning_rate": 0.001, |
| "loss": 2.7594, |
| "step": 23700 |
| }, |
| { |
| "epoch": 7.6923076923076925, |
| "grad_norm": 1.1877171993255615, |
| "learning_rate": 0.001, |
| "loss": 2.7515, |
| "step": 23800 |
| }, |
| { |
| "epoch": 7.724628312863607, |
| "grad_norm": 1.1692532300949097, |
| "learning_rate": 0.001, |
| "loss": 2.7635, |
| "step": 23900 |
| }, |
| { |
| "epoch": 7.756948933419522, |
| "grad_norm": 0.9619380831718445, |
| "learning_rate": 0.001, |
| "loss": 2.7688, |
| "step": 24000 |
| }, |
| { |
| "epoch": 7.789269553975436, |
| "grad_norm": 1.0232017040252686, |
| "learning_rate": 0.001, |
| "loss": 2.7836, |
| "step": 24100 |
| }, |
| { |
| "epoch": 7.821590174531351, |
| "grad_norm": 1.0013618469238281, |
| "learning_rate": 0.001, |
| "loss": 2.7778, |
| "step": 24200 |
| }, |
| { |
| "epoch": 7.853910795087265, |
| "grad_norm": 0.97769695520401, |
| "learning_rate": 0.001, |
| "loss": 2.7579, |
| "step": 24300 |
| }, |
| { |
| "epoch": 7.886231415643181, |
| "grad_norm": 0.9150475859642029, |
| "learning_rate": 0.001, |
| "loss": 2.7683, |
| "step": 24400 |
| }, |
| { |
| "epoch": 7.918552036199095, |
| "grad_norm": 1.0076943635940552, |
| "learning_rate": 0.001, |
| "loss": 2.767, |
| "step": 24500 |
| }, |
| { |
| "epoch": 7.95087265675501, |
| "grad_norm": 1.0352141857147217, |
| "learning_rate": 0.001, |
| "loss": 2.8031, |
| "step": 24600 |
| }, |
| { |
| "epoch": 7.983193277310924, |
| "grad_norm": 0.9151076078414917, |
| "learning_rate": 0.001, |
| "loss": 2.7802, |
| "step": 24700 |
| }, |
| { |
| "epoch": 8.015513897866839, |
| "grad_norm": 1.3630614280700684, |
| "learning_rate": 0.001, |
| "loss": 2.6941, |
| "step": 24800 |
| }, |
| { |
| "epoch": 8.047834518422754, |
| "grad_norm": 1.331531047821045, |
| "learning_rate": 0.001, |
| "loss": 2.6443, |
| "step": 24900 |
| }, |
| { |
| "epoch": 8.080155138978668, |
| "grad_norm": 1.6701931953430176, |
| "learning_rate": 0.001, |
| "loss": 2.6514, |
| "step": 25000 |
| }, |
| { |
| "epoch": 8.112475759534583, |
| "grad_norm": 1.3195340633392334, |
| "learning_rate": 0.001, |
| "loss": 2.6601, |
| "step": 25100 |
| }, |
| { |
| "epoch": 8.144796380090497, |
| "grad_norm": 1.5026133060455322, |
| "learning_rate": 0.001, |
| "loss": 2.6744, |
| "step": 25200 |
| }, |
| { |
| "epoch": 8.177117000646412, |
| "grad_norm": 1.1848560571670532, |
| "learning_rate": 0.001, |
| "loss": 2.6525, |
| "step": 25300 |
| }, |
| { |
| "epoch": 8.209437621202326, |
| "grad_norm": 1.428328275680542, |
| "learning_rate": 0.001, |
| "loss": 2.661, |
| "step": 25400 |
| }, |
| { |
| "epoch": 8.241758241758241, |
| "grad_norm": 1.4003264904022217, |
| "learning_rate": 0.001, |
| "loss": 2.6613, |
| "step": 25500 |
| }, |
| { |
| "epoch": 8.274078862314157, |
| "grad_norm": 1.0878551006317139, |
| "learning_rate": 0.001, |
| "loss": 2.6452, |
| "step": 25600 |
| }, |
| { |
| "epoch": 8.306399482870072, |
| "grad_norm": 1.2808568477630615, |
| "learning_rate": 0.001, |
| "loss": 2.6624, |
| "step": 25700 |
| }, |
| { |
| "epoch": 8.338720103425986, |
| "grad_norm": 1.1686146259307861, |
| "learning_rate": 0.001, |
| "loss": 2.6687, |
| "step": 25800 |
| }, |
| { |
| "epoch": 8.371040723981901, |
| "grad_norm": 1.0730260610580444, |
| "learning_rate": 0.001, |
| "loss": 2.6768, |
| "step": 25900 |
| }, |
| { |
| "epoch": 8.403361344537815, |
| "grad_norm": 1.2151321172714233, |
| "learning_rate": 0.001, |
| "loss": 2.6459, |
| "step": 26000 |
| }, |
| { |
| "epoch": 8.43568196509373, |
| "grad_norm": 1.6539074182510376, |
| "learning_rate": 0.001, |
| "loss": 2.6671, |
| "step": 26100 |
| }, |
| { |
| "epoch": 8.468002585649645, |
| "grad_norm": 1.3920577764511108, |
| "learning_rate": 0.001, |
| "loss": 2.6961, |
| "step": 26200 |
| }, |
| { |
| "epoch": 8.50032320620556, |
| "grad_norm": 0.968061089515686, |
| "learning_rate": 0.001, |
| "loss": 2.6942, |
| "step": 26300 |
| }, |
| { |
| "epoch": 8.532643826761474, |
| "grad_norm": 1.220590353012085, |
| "learning_rate": 0.001, |
| "loss": 2.688, |
| "step": 26400 |
| }, |
| { |
| "epoch": 8.564964447317388, |
| "grad_norm": 1.3109445571899414, |
| "learning_rate": 0.001, |
| "loss": 2.6982, |
| "step": 26500 |
| }, |
| { |
| "epoch": 8.597285067873303, |
| "grad_norm": 1.6531808376312256, |
| "learning_rate": 0.001, |
| "loss": 2.6929, |
| "step": 26600 |
| }, |
| { |
| "epoch": 8.629605688429217, |
| "grad_norm": 1.1348577737808228, |
| "learning_rate": 0.001, |
| "loss": 2.7053, |
| "step": 26700 |
| }, |
| { |
| "epoch": 8.661926308985132, |
| "grad_norm": 1.5206853151321411, |
| "learning_rate": 0.001, |
| "loss": 2.687, |
| "step": 26800 |
| }, |
| { |
| "epoch": 8.694246929541046, |
| "grad_norm": 1.5895841121673584, |
| "learning_rate": 0.001, |
| "loss": 2.7004, |
| "step": 26900 |
| }, |
| { |
| "epoch": 8.726567550096961, |
| "grad_norm": 1.1910431385040283, |
| "learning_rate": 0.001, |
| "loss": 2.71, |
| "step": 27000 |
| }, |
| { |
| "epoch": 8.758888170652877, |
| "grad_norm": 1.8874714374542236, |
| "learning_rate": 0.001, |
| "loss": 2.6958, |
| "step": 27100 |
| }, |
| { |
| "epoch": 8.791208791208792, |
| "grad_norm": 1.01585054397583, |
| "learning_rate": 0.001, |
| "loss": 2.6898, |
| "step": 27200 |
| }, |
| { |
| "epoch": 8.823529411764707, |
| "grad_norm": 1.2293158769607544, |
| "learning_rate": 0.001, |
| "loss": 2.7001, |
| "step": 27300 |
| }, |
| { |
| "epoch": 8.855850032320621, |
| "grad_norm": 1.2832460403442383, |
| "learning_rate": 0.001, |
| "loss": 2.7213, |
| "step": 27400 |
| }, |
| { |
| "epoch": 8.888170652876536, |
| "grad_norm": 1.0620850324630737, |
| "learning_rate": 0.001, |
| "loss": 2.7072, |
| "step": 27500 |
| }, |
| { |
| "epoch": 8.92049127343245, |
| "grad_norm": 1.327416181564331, |
| "learning_rate": 0.001, |
| "loss": 2.69, |
| "step": 27600 |
| }, |
| { |
| "epoch": 8.952811893988365, |
| "grad_norm": 0.9163597226142883, |
| "learning_rate": 0.001, |
| "loss": 2.7301, |
| "step": 27700 |
| }, |
| { |
| "epoch": 8.98513251454428, |
| "grad_norm": 1.2720648050308228, |
| "learning_rate": 0.001, |
| "loss": 2.7143, |
| "step": 27800 |
| }, |
| { |
| "epoch": 9.017453135100194, |
| "grad_norm": 1.5404466390609741, |
| "learning_rate": 0.001, |
| "loss": 2.622, |
| "step": 27900 |
| }, |
| { |
| "epoch": 9.049773755656108, |
| "grad_norm": 1.5179469585418701, |
| "learning_rate": 0.001, |
| "loss": 2.5708, |
| "step": 28000 |
| }, |
| { |
| "epoch": 9.082094376212023, |
| "grad_norm": 1.2205370664596558, |
| "learning_rate": 0.001, |
| "loss": 2.5866, |
| "step": 28100 |
| }, |
| { |
| "epoch": 9.114414996767938, |
| "grad_norm": 1.3983556032180786, |
| "learning_rate": 0.001, |
| "loss": 2.5853, |
| "step": 28200 |
| }, |
| { |
| "epoch": 9.146735617323852, |
| "grad_norm": 1.6919760704040527, |
| "learning_rate": 0.001, |
| "loss": 2.5896, |
| "step": 28300 |
| }, |
| { |
| "epoch": 9.179056237879767, |
| "grad_norm": 1.3373256921768188, |
| "learning_rate": 0.001, |
| "loss": 2.5889, |
| "step": 28400 |
| }, |
| { |
| "epoch": 9.211376858435681, |
| "grad_norm": 1.7062350511550903, |
| "learning_rate": 0.001, |
| "loss": 2.5728, |
| "step": 28500 |
| }, |
| { |
| "epoch": 9.243697478991596, |
| "grad_norm": 1.7507829666137695, |
| "learning_rate": 0.001, |
| "loss": 2.5811, |
| "step": 28600 |
| }, |
| { |
| "epoch": 9.276018099547512, |
| "grad_norm": 1.2600959539413452, |
| "learning_rate": 0.001, |
| "loss": 2.5818, |
| "step": 28700 |
| }, |
| { |
| "epoch": 9.308338720103427, |
| "grad_norm": 1.0494632720947266, |
| "learning_rate": 0.001, |
| "loss": 2.6243, |
| "step": 28800 |
| }, |
| { |
| "epoch": 9.340659340659341, |
| "grad_norm": 1.623203158378601, |
| "learning_rate": 0.001, |
| "loss": 2.6241, |
| "step": 28900 |
| }, |
| { |
| "epoch": 9.372979961215256, |
| "grad_norm": 1.0560330152511597, |
| "learning_rate": 0.001, |
| "loss": 2.6189, |
| "step": 29000 |
| }, |
| { |
| "epoch": 9.40530058177117, |
| "grad_norm": 1.1647675037384033, |
| "learning_rate": 0.001, |
| "loss": 2.6266, |
| "step": 29100 |
| }, |
| { |
| "epoch": 9.437621202327085, |
| "grad_norm": 1.3463765382766724, |
| "learning_rate": 0.001, |
| "loss": 2.6203, |
| "step": 29200 |
| }, |
| { |
| "epoch": 9.469941822883, |
| "grad_norm": 0.9838733673095703, |
| "learning_rate": 0.001, |
| "loss": 2.6273, |
| "step": 29300 |
| }, |
| { |
| "epoch": 9.502262443438914, |
| "grad_norm": 1.7701879739761353, |
| "learning_rate": 0.001, |
| "loss": 2.6278, |
| "step": 29400 |
| }, |
| { |
| "epoch": 9.534583063994829, |
| "grad_norm": 0.891035795211792, |
| "learning_rate": 0.001, |
| "loss": 2.6208, |
| "step": 29500 |
| }, |
| { |
| "epoch": 9.566903684550743, |
| "grad_norm": 1.5723954439163208, |
| "learning_rate": 0.001, |
| "loss": 2.6093, |
| "step": 29600 |
| }, |
| { |
| "epoch": 9.599224305106658, |
| "grad_norm": 1.0477232933044434, |
| "learning_rate": 0.001, |
| "loss": 2.6275, |
| "step": 29700 |
| }, |
| { |
| "epoch": 9.631544925662572, |
| "grad_norm": 1.5019673109054565, |
| "learning_rate": 0.001, |
| "loss": 2.659, |
| "step": 29800 |
| }, |
| { |
| "epoch": 9.663865546218487, |
| "grad_norm": 1.2560871839523315, |
| "learning_rate": 0.001, |
| "loss": 2.6101, |
| "step": 29900 |
| }, |
| { |
| "epoch": 9.696186166774401, |
| "grad_norm": 1.1805680990219116, |
| "learning_rate": 0.001, |
| "loss": 2.6174, |
| "step": 30000 |
| }, |
| { |
| "epoch": 9.728506787330316, |
| "grad_norm": 1.0397446155548096, |
| "learning_rate": 0.001, |
| "loss": 2.6498, |
| "step": 30100 |
| }, |
| { |
| "epoch": 9.760827407886232, |
| "grad_norm": 1.6623647212982178, |
| "learning_rate": 0.001, |
| "loss": 2.6359, |
| "step": 30200 |
| }, |
| { |
| "epoch": 9.793148028442147, |
| "grad_norm": 1.26282799243927, |
| "learning_rate": 0.001, |
| "loss": 2.6579, |
| "step": 30300 |
| }, |
| { |
| "epoch": 9.825468648998061, |
| "grad_norm": 1.2911651134490967, |
| "learning_rate": 0.001, |
| "loss": 2.6161, |
| "step": 30400 |
| }, |
| { |
| "epoch": 9.857789269553976, |
| "grad_norm": 1.3239177465438843, |
| "learning_rate": 0.001, |
| "loss": 2.6292, |
| "step": 30500 |
| }, |
| { |
| "epoch": 9.89010989010989, |
| "grad_norm": 0.9709534049034119, |
| "learning_rate": 0.001, |
| "loss": 2.627, |
| "step": 30600 |
| }, |
| { |
| "epoch": 9.922430510665805, |
| "grad_norm": 1.332767367362976, |
| "learning_rate": 0.001, |
| "loss": 2.6526, |
| "step": 30700 |
| }, |
| { |
| "epoch": 9.95475113122172, |
| "grad_norm": 1.4657008647918701, |
| "learning_rate": 0.001, |
| "loss": 2.6515, |
| "step": 30800 |
| }, |
| { |
| "epoch": 9.987071751777634, |
| "grad_norm": 1.639960765838623, |
| "learning_rate": 0.001, |
| "loss": 2.6621, |
| "step": 30900 |
| }, |
| { |
| "epoch": 10.019392372333549, |
| "grad_norm": 1.050683617591858, |
| "learning_rate": 0.001, |
| "loss": 2.5767, |
| "step": 31000 |
| }, |
| { |
| "epoch": 10.051712992889463, |
| "grad_norm": 1.1165498495101929, |
| "learning_rate": 0.001, |
| "loss": 2.5307, |
| "step": 31100 |
| }, |
| { |
| "epoch": 10.084033613445378, |
| "grad_norm": 1.3107905387878418, |
| "learning_rate": 0.001, |
| "loss": 2.5127, |
| "step": 31200 |
| }, |
| { |
| "epoch": 10.116354234001292, |
| "grad_norm": 1.1361440420150757, |
| "learning_rate": 0.001, |
| "loss": 2.5248, |
| "step": 31300 |
| }, |
| { |
| "epoch": 10.148674854557207, |
| "grad_norm": 1.4246487617492676, |
| "learning_rate": 0.001, |
| "loss": 2.5209, |
| "step": 31400 |
| }, |
| { |
| "epoch": 10.180995475113122, |
| "grad_norm": 0.9404054880142212, |
| "learning_rate": 0.001, |
| "loss": 2.5429, |
| "step": 31500 |
| }, |
| { |
| "epoch": 10.213316095669036, |
| "grad_norm": 0.9457871913909912, |
| "learning_rate": 0.001, |
| "loss": 2.5414, |
| "step": 31600 |
| }, |
| { |
| "epoch": 10.24563671622495, |
| "grad_norm": 1.1399489641189575, |
| "learning_rate": 0.001, |
| "loss": 2.5448, |
| "step": 31700 |
| }, |
| { |
| "epoch": 10.277957336780867, |
| "grad_norm": 1.1223640441894531, |
| "learning_rate": 0.001, |
| "loss": 2.5353, |
| "step": 31800 |
| }, |
| { |
| "epoch": 10.310277957336782, |
| "grad_norm": 1.446655511856079, |
| "learning_rate": 0.001, |
| "loss": 2.5589, |
| "step": 31900 |
| }, |
| { |
| "epoch": 10.342598577892696, |
| "grad_norm": 0.9820685982704163, |
| "learning_rate": 0.001, |
| "loss": 2.5305, |
| "step": 32000 |
| }, |
| { |
| "epoch": 10.37491919844861, |
| "grad_norm": 0.9038324952125549, |
| "learning_rate": 0.001, |
| "loss": 2.5389, |
| "step": 32100 |
| }, |
| { |
| "epoch": 10.407239819004525, |
| "grad_norm": 1.184463381767273, |
| "learning_rate": 0.001, |
| "loss": 2.5791, |
| "step": 32200 |
| }, |
| { |
| "epoch": 10.43956043956044, |
| "grad_norm": 0.9426921606063843, |
| "learning_rate": 0.001, |
| "loss": 2.5612, |
| "step": 32300 |
| }, |
| { |
| "epoch": 10.471881060116354, |
| "grad_norm": 1.2373493909835815, |
| "learning_rate": 0.001, |
| "loss": 2.5603, |
| "step": 32400 |
| }, |
| { |
| "epoch": 10.504201680672269, |
| "grad_norm": 1.0498517751693726, |
| "learning_rate": 0.001, |
| "loss": 2.5471, |
| "step": 32500 |
| }, |
| { |
| "epoch": 10.536522301228183, |
| "grad_norm": 1.1927897930145264, |
| "learning_rate": 0.001, |
| "loss": 2.5663, |
| "step": 32600 |
| }, |
| { |
| "epoch": 10.568842921784098, |
| "grad_norm": 1.0986390113830566, |
| "learning_rate": 0.001, |
| "loss": 2.589, |
| "step": 32700 |
| }, |
| { |
| "epoch": 10.601163542340013, |
| "grad_norm": 1.0975826978683472, |
| "learning_rate": 0.001, |
| "loss": 2.5746, |
| "step": 32800 |
| }, |
| { |
| "epoch": 10.633484162895927, |
| "grad_norm": 1.4507718086242676, |
| "learning_rate": 0.001, |
| "loss": 2.5584, |
| "step": 32900 |
| }, |
| { |
| "epoch": 10.665804783451842, |
| "grad_norm": 0.9978941082954407, |
| "learning_rate": 0.001, |
| "loss": 2.5884, |
| "step": 33000 |
| }, |
| { |
| "epoch": 10.698125404007756, |
| "grad_norm": 1.0040268898010254, |
| "learning_rate": 0.001, |
| "loss": 2.5802, |
| "step": 33100 |
| }, |
| { |
| "epoch": 10.73044602456367, |
| "grad_norm": 1.1611864566802979, |
| "learning_rate": 0.001, |
| "loss": 2.5745, |
| "step": 33200 |
| }, |
| { |
| "epoch": 10.762766645119587, |
| "grad_norm": 0.9444515705108643, |
| "learning_rate": 0.001, |
| "loss": 2.5742, |
| "step": 33300 |
| }, |
| { |
| "epoch": 10.795087265675502, |
| "grad_norm": 0.770430326461792, |
| "learning_rate": 0.001, |
| "loss": 2.5833, |
| "step": 33400 |
| }, |
| { |
| "epoch": 10.827407886231416, |
| "grad_norm": 1.0619391202926636, |
| "learning_rate": 0.001, |
| "loss": 2.5904, |
| "step": 33500 |
| }, |
| { |
| "epoch": 10.85972850678733, |
| "grad_norm": 0.9841474890708923, |
| "learning_rate": 0.001, |
| "loss": 2.5825, |
| "step": 33600 |
| }, |
| { |
| "epoch": 10.892049127343245, |
| "grad_norm": 0.8374784588813782, |
| "learning_rate": 0.001, |
| "loss": 2.5847, |
| "step": 33700 |
| }, |
| { |
| "epoch": 10.92436974789916, |
| "grad_norm": 1.0929527282714844, |
| "learning_rate": 0.001, |
| "loss": 2.5871, |
| "step": 33800 |
| }, |
| { |
| "epoch": 10.956690368455074, |
| "grad_norm": 1.063624382019043, |
| "learning_rate": 0.001, |
| "loss": 2.5833, |
| "step": 33900 |
| }, |
| { |
| "epoch": 10.989010989010989, |
| "grad_norm": 0.9693267941474915, |
| "learning_rate": 0.001, |
| "loss": 2.5765, |
| "step": 34000 |
| }, |
| { |
| "epoch": 11.021331609566904, |
| "grad_norm": 1.0682049989700317, |
| "learning_rate": 0.001, |
| "loss": 2.5111, |
| "step": 34100 |
| }, |
| { |
| "epoch": 11.053652230122818, |
| "grad_norm": 1.2485145330429077, |
| "learning_rate": 0.001, |
| "loss": 2.4233, |
| "step": 34200 |
| }, |
| { |
| "epoch": 11.085972850678733, |
| "grad_norm": 1.2661662101745605, |
| "learning_rate": 0.001, |
| "loss": 2.4521, |
| "step": 34300 |
| }, |
| { |
| "epoch": 11.118293471234647, |
| "grad_norm": 0.9464433789253235, |
| "learning_rate": 0.001, |
| "loss": 2.4781, |
| "step": 34400 |
| }, |
| { |
| "epoch": 11.150614091790562, |
| "grad_norm": 1.4966256618499756, |
| "learning_rate": 0.001, |
| "loss": 2.4606, |
| "step": 34500 |
| }, |
| { |
| "epoch": 11.182934712346476, |
| "grad_norm": 0.9548174738883972, |
| "learning_rate": 0.001, |
| "loss": 2.474, |
| "step": 34600 |
| }, |
| { |
| "epoch": 11.215255332902391, |
| "grad_norm": 1.2072795629501343, |
| "learning_rate": 0.001, |
| "loss": 2.4905, |
| "step": 34700 |
| }, |
| { |
| "epoch": 11.247575953458306, |
| "grad_norm": 1.1185228824615479, |
| "learning_rate": 0.001, |
| "loss": 2.4699, |
| "step": 34800 |
| }, |
| { |
| "epoch": 11.279896574014222, |
| "grad_norm": 1.3499680757522583, |
| "learning_rate": 0.001, |
| "loss": 2.4886, |
| "step": 34900 |
| }, |
| { |
| "epoch": 11.312217194570136, |
| "grad_norm": 0.9926376938819885, |
| "learning_rate": 0.001, |
| "loss": 2.4868, |
| "step": 35000 |
| }, |
| { |
| "epoch": 11.344537815126051, |
| "grad_norm": 1.0508836507797241, |
| "learning_rate": 0.001, |
| "loss": 2.4988, |
| "step": 35100 |
| }, |
| { |
| "epoch": 11.376858435681966, |
| "grad_norm": 1.0693625211715698, |
| "learning_rate": 0.001, |
| "loss": 2.4944, |
| "step": 35200 |
| }, |
| { |
| "epoch": 11.40917905623788, |
| "grad_norm": 1.2590545415878296, |
| "learning_rate": 0.001, |
| "loss": 2.5104, |
| "step": 35300 |
| }, |
| { |
| "epoch": 11.441499676793795, |
| "grad_norm": 1.0293058156967163, |
| "learning_rate": 0.001, |
| "loss": 2.5196, |
| "step": 35400 |
| }, |
| { |
| "epoch": 11.47382029734971, |
| "grad_norm": 1.299343466758728, |
| "learning_rate": 0.001, |
| "loss": 2.5146, |
| "step": 35500 |
| }, |
| { |
| "epoch": 11.506140917905624, |
| "grad_norm": 1.1421232223510742, |
| "learning_rate": 0.001, |
| "loss": 2.5063, |
| "step": 35600 |
| }, |
| { |
| "epoch": 11.538461538461538, |
| "grad_norm": 0.8818128108978271, |
| "learning_rate": 0.001, |
| "loss": 2.5242, |
| "step": 35700 |
| }, |
| { |
| "epoch": 11.570782159017453, |
| "grad_norm": 1.0025140047073364, |
| "learning_rate": 0.001, |
| "loss": 2.4983, |
| "step": 35800 |
| }, |
| { |
| "epoch": 11.603102779573367, |
| "grad_norm": 0.9715794920921326, |
| "learning_rate": 0.001, |
| "loss": 2.5207, |
| "step": 35900 |
| }, |
| { |
| "epoch": 11.635423400129282, |
| "grad_norm": 1.076883316040039, |
| "learning_rate": 0.001, |
| "loss": 2.5222, |
| "step": 36000 |
| }, |
| { |
| "epoch": 11.667744020685197, |
| "grad_norm": 1.0112929344177246, |
| "learning_rate": 0.001, |
| "loss": 2.5248, |
| "step": 36100 |
| }, |
| { |
| "epoch": 11.700064641241111, |
| "grad_norm": 1.1007529497146606, |
| "learning_rate": 0.001, |
| "loss": 2.5448, |
| "step": 36200 |
| }, |
| { |
| "epoch": 11.732385261797026, |
| "grad_norm": 0.9470750093460083, |
| "learning_rate": 0.001, |
| "loss": 2.5358, |
| "step": 36300 |
| }, |
| { |
| "epoch": 11.764705882352942, |
| "grad_norm": 1.0336370468139648, |
| "learning_rate": 0.001, |
| "loss": 2.5418, |
| "step": 36400 |
| }, |
| { |
| "epoch": 11.797026502908857, |
| "grad_norm": 1.0315172672271729, |
| "learning_rate": 0.001, |
| "loss": 2.5124, |
| "step": 36500 |
| }, |
| { |
| "epoch": 11.829347123464771, |
| "grad_norm": 1.18720543384552, |
| "learning_rate": 0.001, |
| "loss": 2.5141, |
| "step": 36600 |
| }, |
| { |
| "epoch": 11.861667744020686, |
| "grad_norm": 1.034296989440918, |
| "learning_rate": 0.001, |
| "loss": 2.5322, |
| "step": 36700 |
| }, |
| { |
| "epoch": 11.8939883645766, |
| "grad_norm": 1.0513291358947754, |
| "learning_rate": 0.001, |
| "loss": 2.5226, |
| "step": 36800 |
| }, |
| { |
| "epoch": 11.926308985132515, |
| "grad_norm": 1.0058174133300781, |
| "learning_rate": 0.001, |
| "loss": 2.5452, |
| "step": 36900 |
| }, |
| { |
| "epoch": 11.95862960568843, |
| "grad_norm": 1.342353105545044, |
| "learning_rate": 0.001, |
| "loss": 2.546, |
| "step": 37000 |
| }, |
| { |
| "epoch": 11.990950226244344, |
| "grad_norm": 1.0556999444961548, |
| "learning_rate": 0.001, |
| "loss": 2.5334, |
| "step": 37100 |
| }, |
| { |
| "epoch": 12.023270846800258, |
| "grad_norm": 1.2010694742202759, |
| "learning_rate": 0.001, |
| "loss": 2.4479, |
| "step": 37200 |
| }, |
| { |
| "epoch": 12.055591467356173, |
| "grad_norm": 1.0156108140945435, |
| "learning_rate": 0.001, |
| "loss": 2.4137, |
| "step": 37300 |
| }, |
| { |
| "epoch": 12.087912087912088, |
| "grad_norm": 1.216589093208313, |
| "learning_rate": 0.001, |
| "loss": 2.3993, |
| "step": 37400 |
| }, |
| { |
| "epoch": 12.120232708468002, |
| "grad_norm": 1.1752727031707764, |
| "learning_rate": 0.001, |
| "loss": 2.3963, |
| "step": 37500 |
| }, |
| { |
| "epoch": 12.152553329023917, |
| "grad_norm": 1.212253451347351, |
| "learning_rate": 0.001, |
| "loss": 2.4086, |
| "step": 37600 |
| }, |
| { |
| "epoch": 12.184873949579831, |
| "grad_norm": 1.0437322854995728, |
| "learning_rate": 0.001, |
| "loss": 2.4183, |
| "step": 37700 |
| }, |
| { |
| "epoch": 12.217194570135746, |
| "grad_norm": 1.2049281597137451, |
| "learning_rate": 0.001, |
| "loss": 2.4453, |
| "step": 37800 |
| }, |
| { |
| "epoch": 12.24951519069166, |
| "grad_norm": 1.014501929283142, |
| "learning_rate": 0.001, |
| "loss": 2.4162, |
| "step": 37900 |
| }, |
| { |
| "epoch": 12.281835811247577, |
| "grad_norm": 1.2967098951339722, |
| "learning_rate": 0.001, |
| "loss": 2.4231, |
| "step": 38000 |
| }, |
| { |
| "epoch": 12.314156431803491, |
| "grad_norm": 1.0043957233428955, |
| "learning_rate": 0.001, |
| "loss": 2.4235, |
| "step": 38100 |
| }, |
| { |
| "epoch": 12.346477052359406, |
| "grad_norm": 1.1719709634780884, |
| "learning_rate": 0.001, |
| "loss": 2.4608, |
| "step": 38200 |
| }, |
| { |
| "epoch": 12.37879767291532, |
| "grad_norm": 1.6381487846374512, |
| "learning_rate": 0.001, |
| "loss": 2.453, |
| "step": 38300 |
| }, |
| { |
| "epoch": 12.411118293471235, |
| "grad_norm": 1.1440620422363281, |
| "learning_rate": 0.001, |
| "loss": 2.4596, |
| "step": 38400 |
| }, |
| { |
| "epoch": 12.44343891402715, |
| "grad_norm": 1.389151692390442, |
| "learning_rate": 0.001, |
| "loss": 2.4657, |
| "step": 38500 |
| }, |
| { |
| "epoch": 12.475759534583064, |
| "grad_norm": 1.1156370639801025, |
| "learning_rate": 0.001, |
| "loss": 2.4662, |
| "step": 38600 |
| }, |
| { |
| "epoch": 12.508080155138979, |
| "grad_norm": 0.8124172687530518, |
| "learning_rate": 0.001, |
| "loss": 2.4504, |
| "step": 38700 |
| }, |
| { |
| "epoch": 12.540400775694893, |
| "grad_norm": 1.3011460304260254, |
| "learning_rate": 0.001, |
| "loss": 2.4797, |
| "step": 38800 |
| }, |
| { |
| "epoch": 12.572721396250808, |
| "grad_norm": 0.8509902954101562, |
| "learning_rate": 0.001, |
| "loss": 2.4685, |
| "step": 38900 |
| }, |
| { |
| "epoch": 12.605042016806722, |
| "grad_norm": 1.1421103477478027, |
| "learning_rate": 0.001, |
| "loss": 2.4656, |
| "step": 39000 |
| }, |
| { |
| "epoch": 12.637362637362637, |
| "grad_norm": 1.0977075099945068, |
| "learning_rate": 0.001, |
| "loss": 2.4641, |
| "step": 39100 |
| }, |
| { |
| "epoch": 12.669683257918551, |
| "grad_norm": 0.9898512959480286, |
| "learning_rate": 0.001, |
| "loss": 2.4688, |
| "step": 39200 |
| }, |
| { |
| "epoch": 12.702003878474466, |
| "grad_norm": 1.0552868843078613, |
| "learning_rate": 0.001, |
| "loss": 2.4752, |
| "step": 39300 |
| }, |
| { |
| "epoch": 12.73432449903038, |
| "grad_norm": 0.7937177419662476, |
| "learning_rate": 0.001, |
| "loss": 2.4834, |
| "step": 39400 |
| }, |
| { |
| "epoch": 12.766645119586297, |
| "grad_norm": 1.175423264503479, |
| "learning_rate": 0.001, |
| "loss": 2.476, |
| "step": 39500 |
| }, |
| { |
| "epoch": 12.798965740142211, |
| "grad_norm": 0.835038959980011, |
| "learning_rate": 0.001, |
| "loss": 2.4991, |
| "step": 39600 |
| }, |
| { |
| "epoch": 12.831286360698126, |
| "grad_norm": 1.1652710437774658, |
| "learning_rate": 0.001, |
| "loss": 2.4946, |
| "step": 39700 |
| }, |
| { |
| "epoch": 12.86360698125404, |
| "grad_norm": 0.8950003385543823, |
| "learning_rate": 0.001, |
| "loss": 2.47, |
| "step": 39800 |
| }, |
| { |
| "epoch": 12.895927601809955, |
| "grad_norm": 1.1447982788085938, |
| "learning_rate": 0.001, |
| "loss": 2.4728, |
| "step": 39900 |
| }, |
| { |
| "epoch": 12.92824822236587, |
| "grad_norm": 1.3896085023880005, |
| "learning_rate": 0.001, |
| "loss": 2.473, |
| "step": 40000 |
| }, |
| { |
| "epoch": 12.960568842921784, |
| "grad_norm": 1.0610136985778809, |
| "learning_rate": 0.001, |
| "loss": 2.4644, |
| "step": 40100 |
| }, |
| { |
| "epoch": 12.992889463477699, |
| "grad_norm": 1.2621580362319946, |
| "learning_rate": 0.001, |
| "loss": 2.4881, |
| "step": 40200 |
| }, |
| { |
| "epoch": 13.025210084033613, |
| "grad_norm": 1.0967028141021729, |
| "learning_rate": 0.001, |
| "loss": 2.3948, |
| "step": 40300 |
| }, |
| { |
| "epoch": 13.057530704589528, |
| "grad_norm": 0.9754540920257568, |
| "learning_rate": 0.001, |
| "loss": 2.362, |
| "step": 40400 |
| }, |
| { |
| "epoch": 13.089851325145442, |
| "grad_norm": 1.023298740386963, |
| "learning_rate": 0.001, |
| "loss": 2.3488, |
| "step": 40500 |
| }, |
| { |
| "epoch": 13.122171945701357, |
| "grad_norm": 0.9918268322944641, |
| "learning_rate": 0.001, |
| "loss": 2.3396, |
| "step": 40600 |
| }, |
| { |
| "epoch": 13.154492566257272, |
| "grad_norm": 0.865107536315918, |
| "learning_rate": 0.001, |
| "loss": 2.3641, |
| "step": 40700 |
| }, |
| { |
| "epoch": 13.186813186813186, |
| "grad_norm": 1.0310488939285278, |
| "learning_rate": 0.001, |
| "loss": 2.3617, |
| "step": 40800 |
| }, |
| { |
| "epoch": 13.2191338073691, |
| "grad_norm": 0.8639553189277649, |
| "learning_rate": 0.001, |
| "loss": 2.3812, |
| "step": 40900 |
| }, |
| { |
| "epoch": 13.251454427925015, |
| "grad_norm": 1.0706894397735596, |
| "learning_rate": 0.001, |
| "loss": 2.3639, |
| "step": 41000 |
| }, |
| { |
| "epoch": 13.283775048480932, |
| "grad_norm": 1.1262027025222778, |
| "learning_rate": 0.001, |
| "loss": 2.3943, |
| "step": 41100 |
| }, |
| { |
| "epoch": 13.316095669036846, |
| "grad_norm": 1.2430620193481445, |
| "learning_rate": 0.001, |
| "loss": 2.3872, |
| "step": 41200 |
| }, |
| { |
| "epoch": 13.34841628959276, |
| "grad_norm": 0.976929783821106, |
| "learning_rate": 0.001, |
| "loss": 2.3958, |
| "step": 41300 |
| }, |
| { |
| "epoch": 13.380736910148675, |
| "grad_norm": 1.1228851079940796, |
| "learning_rate": 0.001, |
| "loss": 2.3995, |
| "step": 41400 |
| }, |
| { |
| "epoch": 13.41305753070459, |
| "grad_norm": 1.3405524492263794, |
| "learning_rate": 0.001, |
| "loss": 2.3798, |
| "step": 41500 |
| }, |
| { |
| "epoch": 13.445378151260504, |
| "grad_norm": 1.1651748418807983, |
| "learning_rate": 0.001, |
| "loss": 2.4097, |
| "step": 41600 |
| }, |
| { |
| "epoch": 13.477698771816419, |
| "grad_norm": 0.9650669693946838, |
| "learning_rate": 0.001, |
| "loss": 2.4045, |
| "step": 41700 |
| }, |
| { |
| "epoch": 13.510019392372334, |
| "grad_norm": 1.1848506927490234, |
| "learning_rate": 0.001, |
| "loss": 2.4206, |
| "step": 41800 |
| }, |
| { |
| "epoch": 13.542340012928248, |
| "grad_norm": 1.2294769287109375, |
| "learning_rate": 0.001, |
| "loss": 2.4142, |
| "step": 41900 |
| }, |
| { |
| "epoch": 13.574660633484163, |
| "grad_norm": 0.9633001685142517, |
| "learning_rate": 0.001, |
| "loss": 2.4102, |
| "step": 42000 |
| }, |
| { |
| "epoch": 13.606981254040077, |
| "grad_norm": 1.231916904449463, |
| "learning_rate": 0.001, |
| "loss": 2.4432, |
| "step": 42100 |
| }, |
| { |
| "epoch": 13.639301874595992, |
| "grad_norm": 1.1208678483963013, |
| "learning_rate": 0.001, |
| "loss": 2.4254, |
| "step": 42200 |
| }, |
| { |
| "epoch": 13.671622495151906, |
| "grad_norm": 1.0216820240020752, |
| "learning_rate": 0.001, |
| "loss": 2.4281, |
| "step": 42300 |
| }, |
| { |
| "epoch": 13.70394311570782, |
| "grad_norm": 1.2707115411758423, |
| "learning_rate": 0.001, |
| "loss": 2.4229, |
| "step": 42400 |
| }, |
| { |
| "epoch": 13.736263736263737, |
| "grad_norm": 1.4039192199707031, |
| "learning_rate": 0.001, |
| "loss": 2.4345, |
| "step": 42500 |
| }, |
| { |
| "epoch": 13.768584356819652, |
| "grad_norm": 0.8980364799499512, |
| "learning_rate": 0.001, |
| "loss": 2.4371, |
| "step": 42600 |
| }, |
| { |
| "epoch": 13.800904977375566, |
| "grad_norm": 1.0485930442810059, |
| "learning_rate": 0.001, |
| "loss": 2.4274, |
| "step": 42700 |
| }, |
| { |
| "epoch": 13.83322559793148, |
| "grad_norm": 0.94874507188797, |
| "learning_rate": 0.001, |
| "loss": 2.4408, |
| "step": 42800 |
| }, |
| { |
| "epoch": 13.865546218487395, |
| "grad_norm": 1.0252995491027832, |
| "learning_rate": 0.001, |
| "loss": 2.4494, |
| "step": 42900 |
| }, |
| { |
| "epoch": 13.89786683904331, |
| "grad_norm": 1.4931409358978271, |
| "learning_rate": 0.001, |
| "loss": 2.4155, |
| "step": 43000 |
| }, |
| { |
| "epoch": 13.930187459599225, |
| "grad_norm": 1.2104835510253906, |
| "learning_rate": 0.001, |
| "loss": 2.4439, |
| "step": 43100 |
| }, |
| { |
| "epoch": 13.96250808015514, |
| "grad_norm": 1.1501274108886719, |
| "learning_rate": 0.001, |
| "loss": 2.4477, |
| "step": 43200 |
| }, |
| { |
| "epoch": 13.994828700711054, |
| "grad_norm": 1.2111330032348633, |
| "learning_rate": 0.001, |
| "loss": 2.4257, |
| "step": 43300 |
| }, |
| { |
| "epoch": 14.027149321266968, |
| "grad_norm": 1.2563148736953735, |
| "learning_rate": 0.001, |
| "loss": 2.3305, |
| "step": 43400 |
| }, |
| { |
| "epoch": 14.059469941822883, |
| "grad_norm": 1.1479803323745728, |
| "learning_rate": 0.001, |
| "loss": 2.3233, |
| "step": 43500 |
| }, |
| { |
| "epoch": 14.091790562378797, |
| "grad_norm": 1.4190595149993896, |
| "learning_rate": 0.001, |
| "loss": 2.3296, |
| "step": 43600 |
| }, |
| { |
| "epoch": 14.124111182934712, |
| "grad_norm": 1.0025237798690796, |
| "learning_rate": 0.001, |
| "loss": 2.3046, |
| "step": 43700 |
| }, |
| { |
| "epoch": 14.156431803490626, |
| "grad_norm": 0.9679207801818848, |
| "learning_rate": 0.001, |
| "loss": 2.3314, |
| "step": 43800 |
| }, |
| { |
| "epoch": 14.188752424046541, |
| "grad_norm": 1.1125279664993286, |
| "learning_rate": 0.001, |
| "loss": 2.3419, |
| "step": 43900 |
| }, |
| { |
| "epoch": 14.221073044602456, |
| "grad_norm": 0.9743191599845886, |
| "learning_rate": 0.001, |
| "loss": 2.3285, |
| "step": 44000 |
| }, |
| { |
| "epoch": 14.25339366515837, |
| "grad_norm": 1.4148427248001099, |
| "learning_rate": 0.001, |
| "loss": 2.3147, |
| "step": 44100 |
| }, |
| { |
| "epoch": 14.285714285714286, |
| "grad_norm": 1.012438178062439, |
| "learning_rate": 0.001, |
| "loss": 2.325, |
| "step": 44200 |
| }, |
| { |
| "epoch": 14.318034906270201, |
| "grad_norm": 1.4741400480270386, |
| "learning_rate": 0.001, |
| "loss": 2.3371, |
| "step": 44300 |
| }, |
| { |
| "epoch": 14.350355526826116, |
| "grad_norm": 1.0528957843780518, |
| "learning_rate": 0.001, |
| "loss": 2.347, |
| "step": 44400 |
| }, |
| { |
| "epoch": 14.38267614738203, |
| "grad_norm": 1.1542372703552246, |
| "learning_rate": 0.001, |
| "loss": 2.3617, |
| "step": 44500 |
| }, |
| { |
| "epoch": 14.414996767937945, |
| "grad_norm": 1.225242018699646, |
| "learning_rate": 0.001, |
| "loss": 2.363, |
| "step": 44600 |
| }, |
| { |
| "epoch": 14.44731738849386, |
| "grad_norm": 1.0331146717071533, |
| "learning_rate": 0.001, |
| "loss": 2.3536, |
| "step": 44700 |
| }, |
| { |
| "epoch": 14.479638009049774, |
| "grad_norm": 1.1673269271850586, |
| "learning_rate": 0.001, |
| "loss": 2.3626, |
| "step": 44800 |
| }, |
| { |
| "epoch": 14.511958629605688, |
| "grad_norm": 1.0871409177780151, |
| "learning_rate": 0.001, |
| "loss": 2.3564, |
| "step": 44900 |
| }, |
| { |
| "epoch": 14.544279250161603, |
| "grad_norm": 1.0477733612060547, |
| "learning_rate": 0.001, |
| "loss": 2.361, |
| "step": 45000 |
| }, |
| { |
| "epoch": 14.576599870717518, |
| "grad_norm": 1.1164947748184204, |
| "learning_rate": 0.001, |
| "loss": 2.368, |
| "step": 45100 |
| }, |
| { |
| "epoch": 14.608920491273432, |
| "grad_norm": 1.1241072416305542, |
| "learning_rate": 0.001, |
| "loss": 2.3684, |
| "step": 45200 |
| }, |
| { |
| "epoch": 14.641241111829347, |
| "grad_norm": 1.1364701986312866, |
| "learning_rate": 0.001, |
| "loss": 2.3814, |
| "step": 45300 |
| }, |
| { |
| "epoch": 14.673561732385261, |
| "grad_norm": 1.3487145900726318, |
| "learning_rate": 0.001, |
| "loss": 2.3935, |
| "step": 45400 |
| }, |
| { |
| "epoch": 14.705882352941176, |
| "grad_norm": 1.0357153415679932, |
| "learning_rate": 0.001, |
| "loss": 2.3942, |
| "step": 45500 |
| }, |
| { |
| "epoch": 14.738202973497092, |
| "grad_norm": 1.5036461353302002, |
| "learning_rate": 0.001, |
| "loss": 2.4049, |
| "step": 45600 |
| }, |
| { |
| "epoch": 14.770523594053007, |
| "grad_norm": 1.0585894584655762, |
| "learning_rate": 0.001, |
| "loss": 2.3785, |
| "step": 45700 |
| }, |
| { |
| "epoch": 14.802844214608921, |
| "grad_norm": 0.9382624626159668, |
| "learning_rate": 0.001, |
| "loss": 2.3854, |
| "step": 45800 |
| }, |
| { |
| "epoch": 14.835164835164836, |
| "grad_norm": 1.0960943698883057, |
| "learning_rate": 0.001, |
| "loss": 2.3735, |
| "step": 45900 |
| }, |
| { |
| "epoch": 14.86748545572075, |
| "grad_norm": 1.1341259479522705, |
| "learning_rate": 0.001, |
| "loss": 2.3893, |
| "step": 46000 |
| }, |
| { |
| "epoch": 14.899806076276665, |
| "grad_norm": 0.8915718197822571, |
| "learning_rate": 0.001, |
| "loss": 2.3789, |
| "step": 46100 |
| }, |
| { |
| "epoch": 14.93212669683258, |
| "grad_norm": 1.1936798095703125, |
| "learning_rate": 0.001, |
| "loss": 2.4101, |
| "step": 46200 |
| }, |
| { |
| "epoch": 14.964447317388494, |
| "grad_norm": 1.2099779844284058, |
| "learning_rate": 0.001, |
| "loss": 2.4176, |
| "step": 46300 |
| }, |
| { |
| "epoch": 14.996767937944409, |
| "grad_norm": 0.8536863327026367, |
| "learning_rate": 0.001, |
| "loss": 2.3897, |
| "step": 46400 |
| }, |
| { |
| "epoch": 15.029088558500323, |
| "grad_norm": 1.2651400566101074, |
| "learning_rate": 0.001, |
| "loss": 2.2709, |
| "step": 46500 |
| }, |
| { |
| "epoch": 15.061409179056238, |
| "grad_norm": 1.1207914352416992, |
| "learning_rate": 0.001, |
| "loss": 2.2619, |
| "step": 46600 |
| }, |
| { |
| "epoch": 15.093729799612152, |
| "grad_norm": 1.0109786987304688, |
| "learning_rate": 0.001, |
| "loss": 2.275, |
| "step": 46700 |
| }, |
| { |
| "epoch": 15.126050420168067, |
| "grad_norm": 1.6071540117263794, |
| "learning_rate": 0.001, |
| "loss": 2.2764, |
| "step": 46800 |
| }, |
| { |
| "epoch": 15.158371040723981, |
| "grad_norm": 1.2585033178329468, |
| "learning_rate": 0.001, |
| "loss": 2.291, |
| "step": 46900 |
| }, |
| { |
| "epoch": 15.190691661279896, |
| "grad_norm": 1.313089370727539, |
| "learning_rate": 0.001, |
| "loss": 2.277, |
| "step": 47000 |
| }, |
| { |
| "epoch": 15.22301228183581, |
| "grad_norm": 0.8985034227371216, |
| "learning_rate": 0.001, |
| "loss": 2.2836, |
| "step": 47100 |
| }, |
| { |
| "epoch": 15.255332902391725, |
| "grad_norm": 1.3318949937820435, |
| "learning_rate": 0.001, |
| "loss": 2.2753, |
| "step": 47200 |
| }, |
| { |
| "epoch": 15.287653522947641, |
| "grad_norm": 1.6827569007873535, |
| "learning_rate": 0.001, |
| "loss": 2.2958, |
| "step": 47300 |
| }, |
| { |
| "epoch": 15.319974143503556, |
| "grad_norm": 1.5016133785247803, |
| "learning_rate": 0.001, |
| "loss": 2.3056, |
| "step": 47400 |
| }, |
| { |
| "epoch": 15.35229476405947, |
| "grad_norm": 1.3888620138168335, |
| "learning_rate": 0.001, |
| "loss": 2.3095, |
| "step": 47500 |
| }, |
| { |
| "epoch": 15.384615384615385, |
| "grad_norm": 0.9856404662132263, |
| "learning_rate": 0.001, |
| "loss": 2.294, |
| "step": 47600 |
| }, |
| { |
| "epoch": 15.4169360051713, |
| "grad_norm": 1.1952743530273438, |
| "learning_rate": 0.001, |
| "loss": 2.3156, |
| "step": 47700 |
| }, |
| { |
| "epoch": 15.449256625727214, |
| "grad_norm": 1.3369444608688354, |
| "learning_rate": 0.001, |
| "loss": 2.3049, |
| "step": 47800 |
| }, |
| { |
| "epoch": 15.481577246283129, |
| "grad_norm": 1.258984923362732, |
| "learning_rate": 0.001, |
| "loss": 2.3135, |
| "step": 47900 |
| }, |
| { |
| "epoch": 15.513897866839043, |
| "grad_norm": 1.1995731592178345, |
| "learning_rate": 0.001, |
| "loss": 2.3567, |
| "step": 48000 |
| }, |
| { |
| "epoch": 15.546218487394958, |
| "grad_norm": 1.1886205673217773, |
| "learning_rate": 0.001, |
| "loss": 2.3331, |
| "step": 48100 |
| }, |
| { |
| "epoch": 15.578539107950872, |
| "grad_norm": 1.1722344160079956, |
| "learning_rate": 0.001, |
| "loss": 2.3292, |
| "step": 48200 |
| }, |
| { |
| "epoch": 15.610859728506787, |
| "grad_norm": 1.4253953695297241, |
| "learning_rate": 0.001, |
| "loss": 2.3449, |
| "step": 48300 |
| }, |
| { |
| "epoch": 15.643180349062701, |
| "grad_norm": 1.1338748931884766, |
| "learning_rate": 0.001, |
| "loss": 2.3234, |
| "step": 48400 |
| }, |
| { |
| "epoch": 15.675500969618616, |
| "grad_norm": 1.2208466529846191, |
| "learning_rate": 0.001, |
| "loss": 2.3402, |
| "step": 48500 |
| }, |
| { |
| "epoch": 15.70782159017453, |
| "grad_norm": 1.2015690803527832, |
| "learning_rate": 0.001, |
| "loss": 2.3361, |
| "step": 48600 |
| }, |
| { |
| "epoch": 15.740142210730447, |
| "grad_norm": 1.0186476707458496, |
| "learning_rate": 0.001, |
| "loss": 2.3455, |
| "step": 48700 |
| }, |
| { |
| "epoch": 15.772462831286362, |
| "grad_norm": 1.4595075845718384, |
| "learning_rate": 0.001, |
| "loss": 2.3425, |
| "step": 48800 |
| }, |
| { |
| "epoch": 15.804783451842276, |
| "grad_norm": 1.0360519886016846, |
| "learning_rate": 0.001, |
| "loss": 2.3361, |
| "step": 48900 |
| }, |
| { |
| "epoch": 15.83710407239819, |
| "grad_norm": 0.9627425074577332, |
| "learning_rate": 0.001, |
| "loss": 2.3359, |
| "step": 49000 |
| }, |
| { |
| "epoch": 15.869424692954105, |
| "grad_norm": 0.9317019581794739, |
| "learning_rate": 0.001, |
| "loss": 2.3536, |
| "step": 49100 |
| }, |
| { |
| "epoch": 15.90174531351002, |
| "grad_norm": 1.2822422981262207, |
| "learning_rate": 0.001, |
| "loss": 2.3559, |
| "step": 49200 |
| }, |
| { |
| "epoch": 15.934065934065934, |
| "grad_norm": 1.5487724542617798, |
| "learning_rate": 0.001, |
| "loss": 2.3575, |
| "step": 49300 |
| }, |
| { |
| "epoch": 15.966386554621849, |
| "grad_norm": 1.4528721570968628, |
| "learning_rate": 0.001, |
| "loss": 2.351, |
| "step": 49400 |
| }, |
| { |
| "epoch": 15.998707175177763, |
| "grad_norm": 1.8106807470321655, |
| "learning_rate": 0.001, |
| "loss": 2.3607, |
| "step": 49500 |
| }, |
| { |
| "epoch": 16.031027795733678, |
| "grad_norm": 1.4708492755889893, |
| "learning_rate": 0.001, |
| "loss": 2.2137, |
| "step": 49600 |
| }, |
| { |
| "epoch": 16.063348416289593, |
| "grad_norm": 1.2355926036834717, |
| "learning_rate": 0.001, |
| "loss": 2.2269, |
| "step": 49700 |
| }, |
| { |
| "epoch": 16.095669036845507, |
| "grad_norm": 2.1963531970977783, |
| "learning_rate": 0.001, |
| "loss": 2.2293, |
| "step": 49800 |
| }, |
| { |
| "epoch": 16.12798965740142, |
| "grad_norm": 1.46670663356781, |
| "learning_rate": 0.001, |
| "loss": 2.2242, |
| "step": 49900 |
| }, |
| { |
| "epoch": 16.160310277957336, |
| "grad_norm": 1.289772629737854, |
| "learning_rate": 0.001, |
| "loss": 2.2503, |
| "step": 50000 |
| }, |
| { |
| "epoch": 16.19263089851325, |
| "grad_norm": 1.4664719104766846, |
| "learning_rate": 0.001, |
| "loss": 2.2517, |
| "step": 50100 |
| }, |
| { |
| "epoch": 16.224951519069165, |
| "grad_norm": 1.0733598470687866, |
| "learning_rate": 0.001, |
| "loss": 2.2682, |
| "step": 50200 |
| }, |
| { |
| "epoch": 16.25727213962508, |
| "grad_norm": 1.5189355611801147, |
| "learning_rate": 0.001, |
| "loss": 2.2561, |
| "step": 50300 |
| }, |
| { |
| "epoch": 16.289592760180994, |
| "grad_norm": 1.3452093601226807, |
| "learning_rate": 0.001, |
| "loss": 2.2513, |
| "step": 50400 |
| }, |
| { |
| "epoch": 16.32191338073691, |
| "grad_norm": 1.3040879964828491, |
| "learning_rate": 0.001, |
| "loss": 2.249, |
| "step": 50500 |
| }, |
| { |
| "epoch": 16.354234001292824, |
| "grad_norm": 1.6627320051193237, |
| "learning_rate": 0.001, |
| "loss": 2.2694, |
| "step": 50600 |
| }, |
| { |
| "epoch": 16.386554621848738, |
| "grad_norm": 1.5199756622314453, |
| "learning_rate": 0.001, |
| "loss": 2.2729, |
| "step": 50700 |
| }, |
| { |
| "epoch": 16.418875242404653, |
| "grad_norm": 1.412865161895752, |
| "learning_rate": 0.001, |
| "loss": 2.2636, |
| "step": 50800 |
| }, |
| { |
| "epoch": 16.451195862960567, |
| "grad_norm": 1.1884099245071411, |
| "learning_rate": 0.001, |
| "loss": 2.281, |
| "step": 50900 |
| }, |
| { |
| "epoch": 16.483516483516482, |
| "grad_norm": 1.5446977615356445, |
| "learning_rate": 0.001, |
| "loss": 2.2719, |
| "step": 51000 |
| }, |
| { |
| "epoch": 16.5158371040724, |
| "grad_norm": 1.6802927255630493, |
| "learning_rate": 0.001, |
| "loss": 2.2849, |
| "step": 51100 |
| }, |
| { |
| "epoch": 16.548157724628314, |
| "grad_norm": 1.2737250328063965, |
| "learning_rate": 0.001, |
| "loss": 2.2851, |
| "step": 51200 |
| }, |
| { |
| "epoch": 16.58047834518423, |
| "grad_norm": 1.3500823974609375, |
| "learning_rate": 0.001, |
| "loss": 2.3008, |
| "step": 51300 |
| }, |
| { |
| "epoch": 16.612798965740144, |
| "grad_norm": 1.1918673515319824, |
| "learning_rate": 0.001, |
| "loss": 2.2985, |
| "step": 51400 |
| }, |
| { |
| "epoch": 16.645119586296058, |
| "grad_norm": 1.1218078136444092, |
| "learning_rate": 0.001, |
| "loss": 2.2982, |
| "step": 51500 |
| }, |
| { |
| "epoch": 16.677440206851973, |
| "grad_norm": 1.5831869840621948, |
| "learning_rate": 0.001, |
| "loss": 2.3034, |
| "step": 51600 |
| }, |
| { |
| "epoch": 16.709760827407887, |
| "grad_norm": 1.3978112936019897, |
| "learning_rate": 0.001, |
| "loss": 2.3151, |
| "step": 51700 |
| }, |
| { |
| "epoch": 16.742081447963802, |
| "grad_norm": 1.4054855108261108, |
| "learning_rate": 0.001, |
| "loss": 2.286, |
| "step": 51800 |
| }, |
| { |
| "epoch": 16.774402068519716, |
| "grad_norm": 0.9958290457725525, |
| "learning_rate": 0.001, |
| "loss": 2.3094, |
| "step": 51900 |
| }, |
| { |
| "epoch": 16.80672268907563, |
| "grad_norm": 1.3509937524795532, |
| "learning_rate": 0.001, |
| "loss": 2.2887, |
| "step": 52000 |
| }, |
| { |
| "epoch": 16.839043309631545, |
| "grad_norm": 2.143278121948242, |
| "learning_rate": 0.001, |
| "loss": 2.3244, |
| "step": 52100 |
| }, |
| { |
| "epoch": 16.87136393018746, |
| "grad_norm": 1.5843268632888794, |
| "learning_rate": 0.001, |
| "loss": 2.3059, |
| "step": 52200 |
| }, |
| { |
| "epoch": 16.903684550743375, |
| "grad_norm": 1.4028642177581787, |
| "learning_rate": 0.001, |
| "loss": 2.3053, |
| "step": 52300 |
| }, |
| { |
| "epoch": 16.93600517129929, |
| "grad_norm": 1.5138201713562012, |
| "learning_rate": 0.001, |
| "loss": 2.3194, |
| "step": 52400 |
| }, |
| { |
| "epoch": 16.968325791855204, |
| "grad_norm": 1.6168174743652344, |
| "learning_rate": 0.001, |
| "loss": 2.3039, |
| "step": 52500 |
| }, |
| { |
| "epoch": 17.00064641241112, |
| "grad_norm": 1.1482666730880737, |
| "learning_rate": 0.001, |
| "loss": 2.302, |
| "step": 52600 |
| }, |
| { |
| "epoch": 17.032967032967033, |
| "grad_norm": 1.4270600080490112, |
| "learning_rate": 0.001, |
| "loss": 2.1655, |
| "step": 52700 |
| }, |
| { |
| "epoch": 17.065287653522947, |
| "grad_norm": 1.8423911333084106, |
| "learning_rate": 0.001, |
| "loss": 2.1833, |
| "step": 52800 |
| }, |
| { |
| "epoch": 17.097608274078862, |
| "grad_norm": 1.6184693574905396, |
| "learning_rate": 0.001, |
| "loss": 2.1812, |
| "step": 52900 |
| }, |
| { |
| "epoch": 17.129928894634777, |
| "grad_norm": 1.0999650955200195, |
| "learning_rate": 0.001, |
| "loss": 2.194, |
| "step": 53000 |
| }, |
| { |
| "epoch": 17.16224951519069, |
| "grad_norm": 1.5708781480789185, |
| "learning_rate": 0.001, |
| "loss": 2.1813, |
| "step": 53100 |
| }, |
| { |
| "epoch": 17.194570135746606, |
| "grad_norm": 1.5949249267578125, |
| "learning_rate": 0.001, |
| "loss": 2.1901, |
| "step": 53200 |
| }, |
| { |
| "epoch": 17.22689075630252, |
| "grad_norm": 1.490331768989563, |
| "learning_rate": 0.001, |
| "loss": 2.2041, |
| "step": 53300 |
| }, |
| { |
| "epoch": 17.259211376858435, |
| "grad_norm": 1.5599461793899536, |
| "learning_rate": 0.001, |
| "loss": 2.2346, |
| "step": 53400 |
| }, |
| { |
| "epoch": 17.29153199741435, |
| "grad_norm": 1.9876192808151245, |
| "learning_rate": 0.001, |
| "loss": 2.2065, |
| "step": 53500 |
| }, |
| { |
| "epoch": 17.323852617970264, |
| "grad_norm": 1.8799535036087036, |
| "learning_rate": 0.001, |
| "loss": 2.241, |
| "step": 53600 |
| }, |
| { |
| "epoch": 17.35617323852618, |
| "grad_norm": 2.051346778869629, |
| "learning_rate": 0.001, |
| "loss": 2.2141, |
| "step": 53700 |
| }, |
| { |
| "epoch": 17.388493859082093, |
| "grad_norm": 2.10467529296875, |
| "learning_rate": 0.001, |
| "loss": 2.2266, |
| "step": 53800 |
| }, |
| { |
| "epoch": 17.420814479638008, |
| "grad_norm": 1.1607954502105713, |
| "learning_rate": 0.001, |
| "loss": 2.2431, |
| "step": 53900 |
| }, |
| { |
| "epoch": 17.453135100193922, |
| "grad_norm": 1.8151345252990723, |
| "learning_rate": 0.001, |
| "loss": 2.2645, |
| "step": 54000 |
| }, |
| { |
| "epoch": 17.485455720749837, |
| "grad_norm": 1.368027925491333, |
| "learning_rate": 0.001, |
| "loss": 2.2507, |
| "step": 54100 |
| }, |
| { |
| "epoch": 17.517776341305755, |
| "grad_norm": 1.8371237516403198, |
| "learning_rate": 0.001, |
| "loss": 2.2506, |
| "step": 54200 |
| }, |
| { |
| "epoch": 17.55009696186167, |
| "grad_norm": 1.2181835174560547, |
| "learning_rate": 0.001, |
| "loss": 2.2249, |
| "step": 54300 |
| }, |
| { |
| "epoch": 17.582417582417584, |
| "grad_norm": 1.3113195896148682, |
| "learning_rate": 0.001, |
| "loss": 2.233, |
| "step": 54400 |
| }, |
| { |
| "epoch": 17.6147382029735, |
| "grad_norm": 1.2849704027175903, |
| "learning_rate": 0.001, |
| "loss": 2.2755, |
| "step": 54500 |
| }, |
| { |
| "epoch": 17.647058823529413, |
| "grad_norm": 1.2958779335021973, |
| "learning_rate": 0.001, |
| "loss": 2.2668, |
| "step": 54600 |
| }, |
| { |
| "epoch": 17.679379444085328, |
| "grad_norm": 1.5940552949905396, |
| "learning_rate": 0.001, |
| "loss": 2.2648, |
| "step": 54700 |
| }, |
| { |
| "epoch": 17.711700064641242, |
| "grad_norm": 1.6866202354431152, |
| "learning_rate": 0.001, |
| "loss": 2.2646, |
| "step": 54800 |
| }, |
| { |
| "epoch": 17.744020685197157, |
| "grad_norm": 1.7330199480056763, |
| "learning_rate": 0.001, |
| "loss": 2.2786, |
| "step": 54900 |
| }, |
| { |
| "epoch": 17.77634130575307, |
| "grad_norm": 1.4443870782852173, |
| "learning_rate": 0.001, |
| "loss": 2.2595, |
| "step": 55000 |
| }, |
| { |
| "epoch": 17.808661926308986, |
| "grad_norm": 1.4749608039855957, |
| "learning_rate": 0.001, |
| "loss": 2.2711, |
| "step": 55100 |
| }, |
| { |
| "epoch": 17.8409825468649, |
| "grad_norm": 1.5407294034957886, |
| "learning_rate": 0.001, |
| "loss": 2.263, |
| "step": 55200 |
| }, |
| { |
| "epoch": 17.873303167420815, |
| "grad_norm": 1.521584153175354, |
| "learning_rate": 0.001, |
| "loss": 2.2822, |
| "step": 55300 |
| }, |
| { |
| "epoch": 17.90562378797673, |
| "grad_norm": 1.7927467823028564, |
| "learning_rate": 0.001, |
| "loss": 2.273, |
| "step": 55400 |
| }, |
| { |
| "epoch": 17.937944408532644, |
| "grad_norm": 1.3927431106567383, |
| "learning_rate": 0.001, |
| "loss": 2.2797, |
| "step": 55500 |
| }, |
| { |
| "epoch": 17.97026502908856, |
| "grad_norm": 2.010420799255371, |
| "learning_rate": 0.001, |
| "loss": 2.2923, |
| "step": 55600 |
| }, |
| { |
| "epoch": 18.002585649644473, |
| "grad_norm": 1.3878021240234375, |
| "learning_rate": 0.001, |
| "loss": 2.2861, |
| "step": 55700 |
| }, |
| { |
| "epoch": 18.034906270200388, |
| "grad_norm": 1.6571372747421265, |
| "learning_rate": 0.001, |
| "loss": 2.1246, |
| "step": 55800 |
| }, |
| { |
| "epoch": 18.067226890756302, |
| "grad_norm": 1.3559300899505615, |
| "learning_rate": 0.001, |
| "loss": 2.1514, |
| "step": 55900 |
| }, |
| { |
| "epoch": 18.099547511312217, |
| "grad_norm": 1.1670371294021606, |
| "learning_rate": 0.001, |
| "loss": 2.1436, |
| "step": 56000 |
| }, |
| { |
| "epoch": 18.13186813186813, |
| "grad_norm": 1.331639051437378, |
| "learning_rate": 0.001, |
| "loss": 2.1682, |
| "step": 56100 |
| }, |
| { |
| "epoch": 18.164188752424046, |
| "grad_norm": 1.2827179431915283, |
| "learning_rate": 0.001, |
| "loss": 2.1676, |
| "step": 56200 |
| }, |
| { |
| "epoch": 18.19650937297996, |
| "grad_norm": 1.556694746017456, |
| "learning_rate": 0.001, |
| "loss": 2.1648, |
| "step": 56300 |
| }, |
| { |
| "epoch": 18.228829993535875, |
| "grad_norm": 1.428536057472229, |
| "learning_rate": 0.001, |
| "loss": 2.1962, |
| "step": 56400 |
| }, |
| { |
| "epoch": 18.26115061409179, |
| "grad_norm": 1.1887167692184448, |
| "learning_rate": 0.001, |
| "loss": 2.1626, |
| "step": 56500 |
| }, |
| { |
| "epoch": 18.293471234647704, |
| "grad_norm": 1.2648141384124756, |
| "learning_rate": 0.001, |
| "loss": 2.1923, |
| "step": 56600 |
| }, |
| { |
| "epoch": 18.32579185520362, |
| "grad_norm": 1.1101640462875366, |
| "learning_rate": 0.001, |
| "loss": 2.217, |
| "step": 56700 |
| }, |
| { |
| "epoch": 18.358112475759533, |
| "grad_norm": 0.9787229895591736, |
| "learning_rate": 0.001, |
| "loss": 2.1767, |
| "step": 56800 |
| }, |
| { |
| "epoch": 18.390433096315448, |
| "grad_norm": 1.4049594402313232, |
| "learning_rate": 0.001, |
| "loss": 2.1986, |
| "step": 56900 |
| }, |
| { |
| "epoch": 18.422753716871362, |
| "grad_norm": 1.415755033493042, |
| "learning_rate": 0.001, |
| "loss": 2.2025, |
| "step": 57000 |
| }, |
| { |
| "epoch": 18.455074337427277, |
| "grad_norm": 1.1880539655685425, |
| "learning_rate": 0.001, |
| "loss": 2.2029, |
| "step": 57100 |
| }, |
| { |
| "epoch": 18.48739495798319, |
| "grad_norm": 1.154875636100769, |
| "learning_rate": 0.001, |
| "loss": 2.2117, |
| "step": 57200 |
| }, |
| { |
| "epoch": 18.51971557853911, |
| "grad_norm": 0.9201914668083191, |
| "learning_rate": 0.001, |
| "loss": 2.2086, |
| "step": 57300 |
| }, |
| { |
| "epoch": 18.552036199095024, |
| "grad_norm": 1.2845484018325806, |
| "learning_rate": 0.001, |
| "loss": 2.2137, |
| "step": 57400 |
| }, |
| { |
| "epoch": 18.58435681965094, |
| "grad_norm": 1.026379108428955, |
| "learning_rate": 0.001, |
| "loss": 2.2051, |
| "step": 57500 |
| }, |
| { |
| "epoch": 18.616677440206853, |
| "grad_norm": 0.9726206064224243, |
| "learning_rate": 0.001, |
| "loss": 2.2064, |
| "step": 57600 |
| }, |
| { |
| "epoch": 18.648998060762768, |
| "grad_norm": 1.5916303396224976, |
| "learning_rate": 0.001, |
| "loss": 2.216, |
| "step": 57700 |
| }, |
| { |
| "epoch": 18.681318681318682, |
| "grad_norm": 1.2654246091842651, |
| "learning_rate": 0.001, |
| "loss": 2.2234, |
| "step": 57800 |
| }, |
| { |
| "epoch": 18.713639301874597, |
| "grad_norm": 1.6823699474334717, |
| "learning_rate": 0.001, |
| "loss": 2.2388, |
| "step": 57900 |
| }, |
| { |
| "epoch": 18.74595992243051, |
| "grad_norm": 1.0970423221588135, |
| "learning_rate": 0.001, |
| "loss": 2.2332, |
| "step": 58000 |
| }, |
| { |
| "epoch": 18.778280542986426, |
| "grad_norm": 1.136433720588684, |
| "learning_rate": 0.001, |
| "loss": 2.2384, |
| "step": 58100 |
| }, |
| { |
| "epoch": 18.81060116354234, |
| "grad_norm": 1.2828449010849, |
| "learning_rate": 0.001, |
| "loss": 2.2142, |
| "step": 58200 |
| }, |
| { |
| "epoch": 18.842921784098255, |
| "grad_norm": 1.010236144065857, |
| "learning_rate": 0.001, |
| "loss": 2.2349, |
| "step": 58300 |
| }, |
| { |
| "epoch": 18.87524240465417, |
| "grad_norm": 1.3149129152297974, |
| "learning_rate": 0.001, |
| "loss": 2.2471, |
| "step": 58400 |
| }, |
| { |
| "epoch": 18.907563025210084, |
| "grad_norm": 1.5493013858795166, |
| "learning_rate": 0.001, |
| "loss": 2.2713, |
| "step": 58500 |
| }, |
| { |
| "epoch": 18.939883645766, |
| "grad_norm": 1.3114402294158936, |
| "learning_rate": 0.001, |
| "loss": 2.2392, |
| "step": 58600 |
| }, |
| { |
| "epoch": 18.972204266321913, |
| "grad_norm": 0.9853785037994385, |
| "learning_rate": 0.001, |
| "loss": 2.2405, |
| "step": 58700 |
| }, |
| { |
| "epoch": 19.004524886877828, |
| "grad_norm": 1.2043988704681396, |
| "learning_rate": 0.001, |
| "loss": 2.2477, |
| "step": 58800 |
| }, |
| { |
| "epoch": 19.036845507433743, |
| "grad_norm": 1.6636931896209717, |
| "learning_rate": 0.001, |
| "loss": 2.0882, |
| "step": 58900 |
| }, |
| { |
| "epoch": 19.069166127989657, |
| "grad_norm": 0.8756181597709656, |
| "learning_rate": 0.001, |
| "loss": 2.0999, |
| "step": 59000 |
| }, |
| { |
| "epoch": 19.10148674854557, |
| "grad_norm": 1.1063123941421509, |
| "learning_rate": 0.001, |
| "loss": 2.1199, |
| "step": 59100 |
| }, |
| { |
| "epoch": 19.133807369101486, |
| "grad_norm": 1.2338377237319946, |
| "learning_rate": 0.001, |
| "loss": 2.1267, |
| "step": 59200 |
| }, |
| { |
| "epoch": 19.1661279896574, |
| "grad_norm": 1.44084632396698, |
| "learning_rate": 0.001, |
| "loss": 2.1371, |
| "step": 59300 |
| }, |
| { |
| "epoch": 19.198448610213315, |
| "grad_norm": 1.1796813011169434, |
| "learning_rate": 0.001, |
| "loss": 2.1499, |
| "step": 59400 |
| }, |
| { |
| "epoch": 19.23076923076923, |
| "grad_norm": 1.1995784044265747, |
| "learning_rate": 0.001, |
| "loss": 2.1438, |
| "step": 59500 |
| }, |
| { |
| "epoch": 19.263089851325145, |
| "grad_norm": 0.9502636790275574, |
| "learning_rate": 0.001, |
| "loss": 2.1585, |
| "step": 59600 |
| }, |
| { |
| "epoch": 19.29541047188106, |
| "grad_norm": 0.9679586291313171, |
| "learning_rate": 0.001, |
| "loss": 2.1544, |
| "step": 59700 |
| }, |
| { |
| "epoch": 19.327731092436974, |
| "grad_norm": 1.2242112159729004, |
| "learning_rate": 0.001, |
| "loss": 2.1633, |
| "step": 59800 |
| }, |
| { |
| "epoch": 19.360051712992888, |
| "grad_norm": 1.2602921724319458, |
| "learning_rate": 0.001, |
| "loss": 2.157, |
| "step": 59900 |
| }, |
| { |
| "epoch": 19.392372333548803, |
| "grad_norm": 1.55171799659729, |
| "learning_rate": 0.001, |
| "loss": 2.1749, |
| "step": 60000 |
| }, |
| { |
| "epoch": 19.424692954104717, |
| "grad_norm": 1.2272818088531494, |
| "learning_rate": 0.001, |
| "loss": 2.1719, |
| "step": 60100 |
| }, |
| { |
| "epoch": 19.457013574660632, |
| "grad_norm": 1.1695352792739868, |
| "learning_rate": 0.001, |
| "loss": 2.1562, |
| "step": 60200 |
| }, |
| { |
| "epoch": 19.489334195216546, |
| "grad_norm": 1.1918383836746216, |
| "learning_rate": 0.001, |
| "loss": 2.1781, |
| "step": 60300 |
| }, |
| { |
| "epoch": 19.521654815772465, |
| "grad_norm": 1.1158689260482788, |
| "learning_rate": 0.001, |
| "loss": 2.175, |
| "step": 60400 |
| }, |
| { |
| "epoch": 19.55397543632838, |
| "grad_norm": 1.2081654071807861, |
| "learning_rate": 0.001, |
| "loss": 2.1811, |
| "step": 60500 |
| }, |
| { |
| "epoch": 19.586296056884294, |
| "grad_norm": 1.0976276397705078, |
| "learning_rate": 0.001, |
| "loss": 2.18, |
| "step": 60600 |
| }, |
| { |
| "epoch": 19.618616677440208, |
| "grad_norm": 1.0541235208511353, |
| "learning_rate": 0.001, |
| "loss": 2.1821, |
| "step": 60700 |
| }, |
| { |
| "epoch": 19.650937297996123, |
| "grad_norm": 1.1526302099227905, |
| "learning_rate": 0.001, |
| "loss": 2.1947, |
| "step": 60800 |
| }, |
| { |
| "epoch": 19.683257918552037, |
| "grad_norm": 1.3280059099197388, |
| "learning_rate": 0.001, |
| "loss": 2.1674, |
| "step": 60900 |
| }, |
| { |
| "epoch": 19.715578539107952, |
| "grad_norm": 1.2853764295578003, |
| "learning_rate": 0.001, |
| "loss": 2.1935, |
| "step": 61000 |
| }, |
| { |
| "epoch": 19.747899159663866, |
| "grad_norm": 1.0324232578277588, |
| "learning_rate": 0.001, |
| "loss": 2.2013, |
| "step": 61100 |
| }, |
| { |
| "epoch": 19.78021978021978, |
| "grad_norm": 1.3006898164749146, |
| "learning_rate": 0.001, |
| "loss": 2.2024, |
| "step": 61200 |
| }, |
| { |
| "epoch": 19.812540400775696, |
| "grad_norm": 1.1228374242782593, |
| "learning_rate": 0.001, |
| "loss": 2.193, |
| "step": 61300 |
| }, |
| { |
| "epoch": 19.84486102133161, |
| "grad_norm": 1.2684869766235352, |
| "learning_rate": 0.001, |
| "loss": 2.2083, |
| "step": 61400 |
| }, |
| { |
| "epoch": 19.877181641887525, |
| "grad_norm": 0.8672001361846924, |
| "learning_rate": 0.001, |
| "loss": 2.2101, |
| "step": 61500 |
| }, |
| { |
| "epoch": 19.90950226244344, |
| "grad_norm": 1.0683246850967407, |
| "learning_rate": 0.001, |
| "loss": 2.2139, |
| "step": 61600 |
| }, |
| { |
| "epoch": 19.941822882999354, |
| "grad_norm": 1.1728013753890991, |
| "learning_rate": 0.001, |
| "loss": 2.1988, |
| "step": 61700 |
| }, |
| { |
| "epoch": 19.97414350355527, |
| "grad_norm": 1.2360259294509888, |
| "learning_rate": 0.001, |
| "loss": 2.206, |
| "step": 61800 |
| }, |
| { |
| "epoch": 20.006464124111183, |
| "grad_norm": 1.1226767301559448, |
| "learning_rate": 0.001, |
| "loss": 2.1834, |
| "step": 61900 |
| }, |
| { |
| "epoch": 20.038784744667097, |
| "grad_norm": 1.2612074613571167, |
| "learning_rate": 0.001, |
| "loss": 2.0784, |
| "step": 62000 |
| }, |
| { |
| "epoch": 20.071105365223012, |
| "grad_norm": 1.2865725755691528, |
| "learning_rate": 0.001, |
| "loss": 2.0913, |
| "step": 62100 |
| }, |
| { |
| "epoch": 20.103425985778927, |
| "grad_norm": 1.2894771099090576, |
| "learning_rate": 0.001, |
| "loss": 2.0799, |
| "step": 62200 |
| }, |
| { |
| "epoch": 20.13574660633484, |
| "grad_norm": 1.3527390956878662, |
| "learning_rate": 0.001, |
| "loss": 2.0959, |
| "step": 62300 |
| }, |
| { |
| "epoch": 20.168067226890756, |
| "grad_norm": 1.1928151845932007, |
| "learning_rate": 0.001, |
| "loss": 2.103, |
| "step": 62400 |
| }, |
| { |
| "epoch": 20.20038784744667, |
| "grad_norm": 1.0168992280960083, |
| "learning_rate": 0.001, |
| "loss": 2.0941, |
| "step": 62500 |
| }, |
| { |
| "epoch": 20.232708468002585, |
| "grad_norm": 1.0409342050552368, |
| "learning_rate": 0.001, |
| "loss": 2.1038, |
| "step": 62600 |
| }, |
| { |
| "epoch": 20.2650290885585, |
| "grad_norm": 1.2547892332077026, |
| "learning_rate": 0.001, |
| "loss": 2.0966, |
| "step": 62700 |
| }, |
| { |
| "epoch": 20.297349709114414, |
| "grad_norm": 1.2054287195205688, |
| "learning_rate": 0.001, |
| "loss": 2.1298, |
| "step": 62800 |
| }, |
| { |
| "epoch": 20.32967032967033, |
| "grad_norm": 1.0080070495605469, |
| "learning_rate": 0.001, |
| "loss": 2.1166, |
| "step": 62900 |
| }, |
| { |
| "epoch": 20.361990950226243, |
| "grad_norm": 1.103588342666626, |
| "learning_rate": 0.001, |
| "loss": 2.1441, |
| "step": 63000 |
| }, |
| { |
| "epoch": 20.394311570782158, |
| "grad_norm": 1.1245242357254028, |
| "learning_rate": 0.001, |
| "loss": 2.1216, |
| "step": 63100 |
| }, |
| { |
| "epoch": 20.426632191338072, |
| "grad_norm": 1.236649513244629, |
| "learning_rate": 0.001, |
| "loss": 2.1274, |
| "step": 63200 |
| }, |
| { |
| "epoch": 20.458952811893987, |
| "grad_norm": 1.1738601922988892, |
| "learning_rate": 0.001, |
| "loss": 2.1229, |
| "step": 63300 |
| }, |
| { |
| "epoch": 20.4912734324499, |
| "grad_norm": 1.0897395610809326, |
| "learning_rate": 0.001, |
| "loss": 2.1431, |
| "step": 63400 |
| }, |
| { |
| "epoch": 20.52359405300582, |
| "grad_norm": 1.4749375581741333, |
| "learning_rate": 0.001, |
| "loss": 2.1278, |
| "step": 63500 |
| }, |
| { |
| "epoch": 20.555914673561734, |
| "grad_norm": 1.4643535614013672, |
| "learning_rate": 0.001, |
| "loss": 2.1512, |
| "step": 63600 |
| }, |
| { |
| "epoch": 20.58823529411765, |
| "grad_norm": 0.9917904734611511, |
| "learning_rate": 0.001, |
| "loss": 2.1503, |
| "step": 63700 |
| }, |
| { |
| "epoch": 20.620555914673563, |
| "grad_norm": 1.2945317029953003, |
| "learning_rate": 0.001, |
| "loss": 2.1454, |
| "step": 63800 |
| }, |
| { |
| "epoch": 20.652876535229478, |
| "grad_norm": 1.1126689910888672, |
| "learning_rate": 0.001, |
| "loss": 2.1353, |
| "step": 63900 |
| }, |
| { |
| "epoch": 20.685197155785392, |
| "grad_norm": 1.2969932556152344, |
| "learning_rate": 0.001, |
| "loss": 2.1496, |
| "step": 64000 |
| }, |
| { |
| "epoch": 20.717517776341307, |
| "grad_norm": 1.3333299160003662, |
| "learning_rate": 0.001, |
| "loss": 2.1623, |
| "step": 64100 |
| }, |
| { |
| "epoch": 20.74983839689722, |
| "grad_norm": 1.0493760108947754, |
| "learning_rate": 0.001, |
| "loss": 2.1668, |
| "step": 64200 |
| }, |
| { |
| "epoch": 20.782159017453136, |
| "grad_norm": 1.0862622261047363, |
| "learning_rate": 0.001, |
| "loss": 2.1579, |
| "step": 64300 |
| }, |
| { |
| "epoch": 20.81447963800905, |
| "grad_norm": 1.3162381649017334, |
| "learning_rate": 0.001, |
| "loss": 2.1769, |
| "step": 64400 |
| }, |
| { |
| "epoch": 20.846800258564965, |
| "grad_norm": 1.7097973823547363, |
| "learning_rate": 0.001, |
| "loss": 2.1642, |
| "step": 64500 |
| }, |
| { |
| "epoch": 20.87912087912088, |
| "grad_norm": 1.0230882167816162, |
| "learning_rate": 0.001, |
| "loss": 2.1939, |
| "step": 64600 |
| }, |
| { |
| "epoch": 20.911441499676794, |
| "grad_norm": 0.8909283876419067, |
| "learning_rate": 0.001, |
| "loss": 2.1795, |
| "step": 64700 |
| }, |
| { |
| "epoch": 20.94376212023271, |
| "grad_norm": 1.1475738286972046, |
| "learning_rate": 0.001, |
| "loss": 2.1833, |
| "step": 64800 |
| }, |
| { |
| "epoch": 20.976082740788623, |
| "grad_norm": 1.3730794191360474, |
| "learning_rate": 0.001, |
| "loss": 2.1868, |
| "step": 64900 |
| }, |
| { |
| "epoch": 21.008403361344538, |
| "grad_norm": 1.518537163734436, |
| "learning_rate": 0.001, |
| "loss": 2.1312, |
| "step": 65000 |
| }, |
| { |
| "epoch": 21.040723981900452, |
| "grad_norm": 1.2940282821655273, |
| "learning_rate": 0.001, |
| "loss": 2.0509, |
| "step": 65100 |
| }, |
| { |
| "epoch": 21.073044602456367, |
| "grad_norm": 1.860131859779358, |
| "learning_rate": 0.001, |
| "loss": 2.0563, |
| "step": 65200 |
| }, |
| { |
| "epoch": 21.10536522301228, |
| "grad_norm": 1.4131841659545898, |
| "learning_rate": 0.001, |
| "loss": 2.0643, |
| "step": 65300 |
| }, |
| { |
| "epoch": 21.137685843568196, |
| "grad_norm": 1.429236888885498, |
| "learning_rate": 0.001, |
| "loss": 2.0731, |
| "step": 65400 |
| }, |
| { |
| "epoch": 21.17000646412411, |
| "grad_norm": 1.1104481220245361, |
| "learning_rate": 0.001, |
| "loss": 2.062, |
| "step": 65500 |
| }, |
| { |
| "epoch": 21.202327084680025, |
| "grad_norm": 1.18645179271698, |
| "learning_rate": 0.001, |
| "loss": 2.0773, |
| "step": 65600 |
| }, |
| { |
| "epoch": 21.23464770523594, |
| "grad_norm": 1.384711742401123, |
| "learning_rate": 0.001, |
| "loss": 2.0734, |
| "step": 65700 |
| }, |
| { |
| "epoch": 21.266968325791854, |
| "grad_norm": 1.1969033479690552, |
| "learning_rate": 0.001, |
| "loss": 2.0751, |
| "step": 65800 |
| }, |
| { |
| "epoch": 21.29928894634777, |
| "grad_norm": 1.297878384590149, |
| "learning_rate": 0.001, |
| "loss": 2.0942, |
| "step": 65900 |
| }, |
| { |
| "epoch": 21.331609566903683, |
| "grad_norm": 1.4161620140075684, |
| "learning_rate": 0.001, |
| "loss": 2.1058, |
| "step": 66000 |
| }, |
| { |
| "epoch": 21.363930187459598, |
| "grad_norm": 1.1466299295425415, |
| "learning_rate": 0.001, |
| "loss": 2.0875, |
| "step": 66100 |
| }, |
| { |
| "epoch": 21.396250808015512, |
| "grad_norm": 1.0590342283248901, |
| "learning_rate": 0.001, |
| "loss": 2.0883, |
| "step": 66200 |
| }, |
| { |
| "epoch": 21.428571428571427, |
| "grad_norm": 1.4948805570602417, |
| "learning_rate": 0.001, |
| "loss": 2.1005, |
| "step": 66300 |
| }, |
| { |
| "epoch": 21.46089204912734, |
| "grad_norm": 1.6177111864089966, |
| "learning_rate": 0.001, |
| "loss": 2.1093, |
| "step": 66400 |
| }, |
| { |
| "epoch": 21.49321266968326, |
| "grad_norm": 1.3227946758270264, |
| "learning_rate": 0.001, |
| "loss": 2.0811, |
| "step": 66500 |
| }, |
| { |
| "epoch": 21.525533290239174, |
| "grad_norm": 1.3259391784667969, |
| "learning_rate": 0.001, |
| "loss": 2.0773, |
| "step": 66600 |
| }, |
| { |
| "epoch": 21.55785391079509, |
| "grad_norm": 1.352561354637146, |
| "learning_rate": 0.001, |
| "loss": 2.111, |
| "step": 66700 |
| }, |
| { |
| "epoch": 21.590174531351003, |
| "grad_norm": 1.1337064504623413, |
| "learning_rate": 0.001, |
| "loss": 2.1252, |
| "step": 66800 |
| }, |
| { |
| "epoch": 21.622495151906918, |
| "grad_norm": 1.2298682928085327, |
| "learning_rate": 0.001, |
| "loss": 2.1198, |
| "step": 66900 |
| }, |
| { |
| "epoch": 21.654815772462833, |
| "grad_norm": 1.2822353839874268, |
| "learning_rate": 0.001, |
| "loss": 2.1258, |
| "step": 67000 |
| }, |
| { |
| "epoch": 21.687136393018747, |
| "grad_norm": 0.9597915410995483, |
| "learning_rate": 0.001, |
| "loss": 2.1283, |
| "step": 67100 |
| }, |
| { |
| "epoch": 21.71945701357466, |
| "grad_norm": 0.9812031388282776, |
| "learning_rate": 0.001, |
| "loss": 2.1439, |
| "step": 67200 |
| }, |
| { |
| "epoch": 21.751777634130576, |
| "grad_norm": 1.0487492084503174, |
| "learning_rate": 0.001, |
| "loss": 2.1357, |
| "step": 67300 |
| }, |
| { |
| "epoch": 21.78409825468649, |
| "grad_norm": 1.1485785245895386, |
| "learning_rate": 0.001, |
| "loss": 2.1601, |
| "step": 67400 |
| }, |
| { |
| "epoch": 21.816418875242405, |
| "grad_norm": 0.9800519347190857, |
| "learning_rate": 0.001, |
| "loss": 2.1565, |
| "step": 67500 |
| }, |
| { |
| "epoch": 21.84873949579832, |
| "grad_norm": 1.2664602994918823, |
| "learning_rate": 0.001, |
| "loss": 2.1442, |
| "step": 67600 |
| }, |
| { |
| "epoch": 21.881060116354234, |
| "grad_norm": 1.3896493911743164, |
| "learning_rate": 0.001, |
| "loss": 2.1419, |
| "step": 67700 |
| }, |
| { |
| "epoch": 21.91338073691015, |
| "grad_norm": 1.5186612606048584, |
| "learning_rate": 0.001, |
| "loss": 2.1331, |
| "step": 67800 |
| }, |
| { |
| "epoch": 21.945701357466064, |
| "grad_norm": 1.1328755617141724, |
| "learning_rate": 0.001, |
| "loss": 2.1423, |
| "step": 67900 |
| }, |
| { |
| "epoch": 21.978021978021978, |
| "grad_norm": 1.7095478773117065, |
| "learning_rate": 0.001, |
| "loss": 2.144, |
| "step": 68000 |
| }, |
| { |
| "epoch": 22.010342598577893, |
| "grad_norm": 1.1905454397201538, |
| "learning_rate": 0.001, |
| "loss": 2.0849, |
| "step": 68100 |
| }, |
| { |
| "epoch": 22.042663219133807, |
| "grad_norm": 1.151082992553711, |
| "learning_rate": 0.001, |
| "loss": 2.0151, |
| "step": 68200 |
| }, |
| { |
| "epoch": 22.07498383968972, |
| "grad_norm": 1.414963960647583, |
| "learning_rate": 0.001, |
| "loss": 2.0046, |
| "step": 68300 |
| }, |
| { |
| "epoch": 22.107304460245636, |
| "grad_norm": 1.0465073585510254, |
| "learning_rate": 0.001, |
| "loss": 2.0297, |
| "step": 68400 |
| }, |
| { |
| "epoch": 22.13962508080155, |
| "grad_norm": 1.1696205139160156, |
| "learning_rate": 0.001, |
| "loss": 2.0333, |
| "step": 68500 |
| }, |
| { |
| "epoch": 22.171945701357465, |
| "grad_norm": 1.165142297744751, |
| "learning_rate": 0.001, |
| "loss": 2.0432, |
| "step": 68600 |
| }, |
| { |
| "epoch": 22.20426632191338, |
| "grad_norm": 1.5192077159881592, |
| "learning_rate": 0.001, |
| "loss": 2.0496, |
| "step": 68700 |
| }, |
| { |
| "epoch": 22.236586942469295, |
| "grad_norm": 1.7254102230072021, |
| "learning_rate": 0.001, |
| "loss": 2.04, |
| "step": 68800 |
| }, |
| { |
| "epoch": 22.26890756302521, |
| "grad_norm": 1.0662051439285278, |
| "learning_rate": 0.001, |
| "loss": 2.0316, |
| "step": 68900 |
| }, |
| { |
| "epoch": 22.301228183581124, |
| "grad_norm": 1.3857476711273193, |
| "learning_rate": 0.001, |
| "loss": 2.0576, |
| "step": 69000 |
| }, |
| { |
| "epoch": 22.33354880413704, |
| "grad_norm": 1.332242727279663, |
| "learning_rate": 0.001, |
| "loss": 2.0349, |
| "step": 69100 |
| }, |
| { |
| "epoch": 22.365869424692953, |
| "grad_norm": 1.3924357891082764, |
| "learning_rate": 0.001, |
| "loss": 2.0848, |
| "step": 69200 |
| }, |
| { |
| "epoch": 22.398190045248867, |
| "grad_norm": 1.1637581586837769, |
| "learning_rate": 0.001, |
| "loss": 2.0862, |
| "step": 69300 |
| }, |
| { |
| "epoch": 22.430510665804782, |
| "grad_norm": 1.474599003791809, |
| "learning_rate": 0.001, |
| "loss": 2.0771, |
| "step": 69400 |
| }, |
| { |
| "epoch": 22.462831286360696, |
| "grad_norm": 1.2520018815994263, |
| "learning_rate": 0.001, |
| "loss": 2.0771, |
| "step": 69500 |
| }, |
| { |
| "epoch": 22.49515190691661, |
| "grad_norm": 1.3370542526245117, |
| "learning_rate": 0.001, |
| "loss": 2.0893, |
| "step": 69600 |
| }, |
| { |
| "epoch": 22.52747252747253, |
| "grad_norm": 0.9184688925743103, |
| "learning_rate": 0.001, |
| "loss": 2.0766, |
| "step": 69700 |
| }, |
| { |
| "epoch": 22.559793148028444, |
| "grad_norm": 1.0229378938674927, |
| "learning_rate": 0.001, |
| "loss": 2.0814, |
| "step": 69800 |
| }, |
| { |
| "epoch": 22.59211376858436, |
| "grad_norm": 1.6382076740264893, |
| "learning_rate": 0.001, |
| "loss": 2.0853, |
| "step": 69900 |
| }, |
| { |
| "epoch": 22.624434389140273, |
| "grad_norm": 1.4604605436325073, |
| "learning_rate": 0.001, |
| "loss": 2.094, |
| "step": 70000 |
| }, |
| { |
| "epoch": 22.656755009696187, |
| "grad_norm": 0.9782185554504395, |
| "learning_rate": 0.001, |
| "loss": 2.0849, |
| "step": 70100 |
| }, |
| { |
| "epoch": 22.689075630252102, |
| "grad_norm": 1.2251577377319336, |
| "learning_rate": 0.001, |
| "loss": 2.1201, |
| "step": 70200 |
| }, |
| { |
| "epoch": 22.721396250808017, |
| "grad_norm": 0.9563422799110413, |
| "learning_rate": 0.001, |
| "loss": 2.105, |
| "step": 70300 |
| }, |
| { |
| "epoch": 22.75371687136393, |
| "grad_norm": 1.2224578857421875, |
| "learning_rate": 0.001, |
| "loss": 2.1025, |
| "step": 70400 |
| }, |
| { |
| "epoch": 22.786037491919846, |
| "grad_norm": 1.3068078756332397, |
| "learning_rate": 0.001, |
| "loss": 2.1108, |
| "step": 70500 |
| }, |
| { |
| "epoch": 22.81835811247576, |
| "grad_norm": 1.2106717824935913, |
| "learning_rate": 0.001, |
| "loss": 2.1068, |
| "step": 70600 |
| }, |
| { |
| "epoch": 22.850678733031675, |
| "grad_norm": 1.1505300998687744, |
| "learning_rate": 0.001, |
| "loss": 2.0991, |
| "step": 70700 |
| }, |
| { |
| "epoch": 22.88299935358759, |
| "grad_norm": 1.1176759004592896, |
| "learning_rate": 0.001, |
| "loss": 2.1206, |
| "step": 70800 |
| }, |
| { |
| "epoch": 22.915319974143504, |
| "grad_norm": 1.4180165529251099, |
| "learning_rate": 0.001, |
| "loss": 2.108, |
| "step": 70900 |
| }, |
| { |
| "epoch": 22.94764059469942, |
| "grad_norm": 1.2236053943634033, |
| "learning_rate": 0.001, |
| "loss": 2.1049, |
| "step": 71000 |
| }, |
| { |
| "epoch": 22.979961215255333, |
| "grad_norm": 1.3975863456726074, |
| "learning_rate": 0.001, |
| "loss": 2.1208, |
| "step": 71100 |
| }, |
| { |
| "epoch": 23.012281835811248, |
| "grad_norm": 1.246389627456665, |
| "learning_rate": 0.001, |
| "loss": 2.0397, |
| "step": 71200 |
| }, |
| { |
| "epoch": 23.044602456367162, |
| "grad_norm": 1.5850071907043457, |
| "learning_rate": 0.001, |
| "loss": 1.9887, |
| "step": 71300 |
| }, |
| { |
| "epoch": 23.076923076923077, |
| "grad_norm": 1.1955056190490723, |
| "learning_rate": 0.001, |
| "loss": 1.9772, |
| "step": 71400 |
| }, |
| { |
| "epoch": 23.10924369747899, |
| "grad_norm": 1.3734443187713623, |
| "learning_rate": 0.001, |
| "loss": 1.9873, |
| "step": 71500 |
| }, |
| { |
| "epoch": 23.141564318034906, |
| "grad_norm": 1.61884343624115, |
| "learning_rate": 0.001, |
| "loss": 2.0123, |
| "step": 71600 |
| }, |
| { |
| "epoch": 23.17388493859082, |
| "grad_norm": 1.601148009300232, |
| "learning_rate": 0.001, |
| "loss": 2.0035, |
| "step": 71700 |
| }, |
| { |
| "epoch": 23.206205559146735, |
| "grad_norm": 1.3028818368911743, |
| "learning_rate": 0.001, |
| "loss": 2.0261, |
| "step": 71800 |
| }, |
| { |
| "epoch": 23.23852617970265, |
| "grad_norm": 1.3658092021942139, |
| "learning_rate": 0.001, |
| "loss": 2.0145, |
| "step": 71900 |
| }, |
| { |
| "epoch": 23.270846800258564, |
| "grad_norm": 1.2859296798706055, |
| "learning_rate": 0.001, |
| "loss": 2.0193, |
| "step": 72000 |
| }, |
| { |
| "epoch": 23.30316742081448, |
| "grad_norm": 1.5372207164764404, |
| "learning_rate": 0.001, |
| "loss": 2.0275, |
| "step": 72100 |
| }, |
| { |
| "epoch": 23.335488041370393, |
| "grad_norm": 1.2374789714813232, |
| "learning_rate": 0.001, |
| "loss": 2.0197, |
| "step": 72200 |
| }, |
| { |
| "epoch": 23.367808661926308, |
| "grad_norm": 1.213862657546997, |
| "learning_rate": 0.001, |
| "loss": 2.0317, |
| "step": 72300 |
| }, |
| { |
| "epoch": 23.400129282482222, |
| "grad_norm": 1.0549333095550537, |
| "learning_rate": 0.001, |
| "loss": 2.0409, |
| "step": 72400 |
| }, |
| { |
| "epoch": 23.432449903038137, |
| "grad_norm": 1.0349382162094116, |
| "learning_rate": 0.001, |
| "loss": 2.0287, |
| "step": 72500 |
| }, |
| { |
| "epoch": 23.46477052359405, |
| "grad_norm": 1.197283387184143, |
| "learning_rate": 0.001, |
| "loss": 2.0392, |
| "step": 72600 |
| }, |
| { |
| "epoch": 23.49709114414997, |
| "grad_norm": 1.5242730379104614, |
| "learning_rate": 0.001, |
| "loss": 2.048, |
| "step": 72700 |
| }, |
| { |
| "epoch": 23.529411764705884, |
| "grad_norm": 1.352638602256775, |
| "learning_rate": 0.001, |
| "loss": 2.0576, |
| "step": 72800 |
| }, |
| { |
| "epoch": 23.5617323852618, |
| "grad_norm": 0.8858992457389832, |
| "learning_rate": 0.001, |
| "loss": 2.0455, |
| "step": 72900 |
| }, |
| { |
| "epoch": 23.594053005817713, |
| "grad_norm": 1.298511028289795, |
| "learning_rate": 0.001, |
| "loss": 2.0653, |
| "step": 73000 |
| }, |
| { |
| "epoch": 23.626373626373628, |
| "grad_norm": 1.6291836500167847, |
| "learning_rate": 0.001, |
| "loss": 2.0624, |
| "step": 73100 |
| }, |
| { |
| "epoch": 23.658694246929542, |
| "grad_norm": 1.0975252389907837, |
| "learning_rate": 0.001, |
| "loss": 2.0855, |
| "step": 73200 |
| }, |
| { |
| "epoch": 23.691014867485457, |
| "grad_norm": 1.221786618232727, |
| "learning_rate": 0.001, |
| "loss": 2.0684, |
| "step": 73300 |
| }, |
| { |
| "epoch": 23.72333548804137, |
| "grad_norm": 1.5622029304504395, |
| "learning_rate": 0.001, |
| "loss": 2.0895, |
| "step": 73400 |
| }, |
| { |
| "epoch": 23.755656108597286, |
| "grad_norm": 1.9987596273422241, |
| "learning_rate": 0.001, |
| "loss": 2.083, |
| "step": 73500 |
| }, |
| { |
| "epoch": 23.7879767291532, |
| "grad_norm": 1.3780443668365479, |
| "learning_rate": 0.001, |
| "loss": 2.1051, |
| "step": 73600 |
| }, |
| { |
| "epoch": 23.820297349709115, |
| "grad_norm": 1.5061843395233154, |
| "learning_rate": 0.001, |
| "loss": 2.0651, |
| "step": 73700 |
| }, |
| { |
| "epoch": 23.85261797026503, |
| "grad_norm": 1.7464600801467896, |
| "learning_rate": 0.001, |
| "loss": 2.0765, |
| "step": 73800 |
| }, |
| { |
| "epoch": 23.884938590820944, |
| "grad_norm": 1.4121919870376587, |
| "learning_rate": 0.001, |
| "loss": 2.0719, |
| "step": 73900 |
| }, |
| { |
| "epoch": 23.91725921137686, |
| "grad_norm": 1.759332299232483, |
| "learning_rate": 0.001, |
| "loss": 2.0994, |
| "step": 74000 |
| }, |
| { |
| "epoch": 23.949579831932773, |
| "grad_norm": 1.6104328632354736, |
| "learning_rate": 0.001, |
| "loss": 2.0959, |
| "step": 74100 |
| }, |
| { |
| "epoch": 23.981900452488688, |
| "grad_norm": 1.543933391571045, |
| "learning_rate": 0.001, |
| "loss": 2.0865, |
| "step": 74200 |
| }, |
| { |
| "epoch": 24.014221073044602, |
| "grad_norm": 1.7527161836624146, |
| "learning_rate": 0.001, |
| "loss": 2.0309, |
| "step": 74300 |
| }, |
| { |
| "epoch": 24.046541693600517, |
| "grad_norm": 1.540932297706604, |
| "learning_rate": 0.001, |
| "loss": 1.9614, |
| "step": 74400 |
| }, |
| { |
| "epoch": 24.07886231415643, |
| "grad_norm": 1.3225823640823364, |
| "learning_rate": 0.001, |
| "loss": 1.9619, |
| "step": 74500 |
| }, |
| { |
| "epoch": 24.111182934712346, |
| "grad_norm": 1.4268288612365723, |
| "learning_rate": 0.001, |
| "loss": 1.9617, |
| "step": 74600 |
| }, |
| { |
| "epoch": 24.14350355526826, |
| "grad_norm": 1.436618685722351, |
| "learning_rate": 0.001, |
| "loss": 1.9802, |
| "step": 74700 |
| }, |
| { |
| "epoch": 24.175824175824175, |
| "grad_norm": 2.0793025493621826, |
| "learning_rate": 0.001, |
| "loss": 2.0074, |
| "step": 74800 |
| }, |
| { |
| "epoch": 24.20814479638009, |
| "grad_norm": 1.332802176475525, |
| "learning_rate": 0.001, |
| "loss": 1.9643, |
| "step": 74900 |
| }, |
| { |
| "epoch": 24.240465416936004, |
| "grad_norm": 1.3457491397857666, |
| "learning_rate": 0.001, |
| "loss": 1.99, |
| "step": 75000 |
| }, |
| { |
| "epoch": 24.27278603749192, |
| "grad_norm": 1.360344409942627, |
| "learning_rate": 0.001, |
| "loss": 2.0099, |
| "step": 75100 |
| }, |
| { |
| "epoch": 24.305106658047833, |
| "grad_norm": 1.846103549003601, |
| "learning_rate": 0.001, |
| "loss": 2.0058, |
| "step": 75200 |
| }, |
| { |
| "epoch": 24.337427278603748, |
| "grad_norm": 1.547523856163025, |
| "learning_rate": 0.001, |
| "loss": 2.0142, |
| "step": 75300 |
| }, |
| { |
| "epoch": 24.369747899159663, |
| "grad_norm": 1.4123824834823608, |
| "learning_rate": 0.001, |
| "loss": 1.9988, |
| "step": 75400 |
| }, |
| { |
| "epoch": 24.402068519715577, |
| "grad_norm": 1.568945050239563, |
| "learning_rate": 0.001, |
| "loss": 1.9981, |
| "step": 75500 |
| }, |
| { |
| "epoch": 24.43438914027149, |
| "grad_norm": 1.4623258113861084, |
| "learning_rate": 0.001, |
| "loss": 2.0148, |
| "step": 75600 |
| }, |
| { |
| "epoch": 24.466709760827406, |
| "grad_norm": 1.5970401763916016, |
| "learning_rate": 0.001, |
| "loss": 1.9942, |
| "step": 75700 |
| }, |
| { |
| "epoch": 24.49903038138332, |
| "grad_norm": 1.8230788707733154, |
| "learning_rate": 0.001, |
| "loss": 2.0233, |
| "step": 75800 |
| }, |
| { |
| "epoch": 24.53135100193924, |
| "grad_norm": 1.3407267332077026, |
| "learning_rate": 0.001, |
| "loss": 2.0037, |
| "step": 75900 |
| }, |
| { |
| "epoch": 24.563671622495153, |
| "grad_norm": 1.5265939235687256, |
| "learning_rate": 0.001, |
| "loss": 2.0392, |
| "step": 76000 |
| }, |
| { |
| "epoch": 24.595992243051068, |
| "grad_norm": 1.3966178894042969, |
| "learning_rate": 0.001, |
| "loss": 2.0244, |
| "step": 76100 |
| }, |
| { |
| "epoch": 24.628312863606983, |
| "grad_norm": 1.5279021263122559, |
| "learning_rate": 0.001, |
| "loss": 2.0293, |
| "step": 76200 |
| }, |
| { |
| "epoch": 24.660633484162897, |
| "grad_norm": 1.4958832263946533, |
| "learning_rate": 0.001, |
| "loss": 2.0439, |
| "step": 76300 |
| }, |
| { |
| "epoch": 24.69295410471881, |
| "grad_norm": 1.5250979661941528, |
| "learning_rate": 0.001, |
| "loss": 2.0475, |
| "step": 76400 |
| }, |
| { |
| "epoch": 24.725274725274726, |
| "grad_norm": 1.5153076648712158, |
| "learning_rate": 0.001, |
| "loss": 2.0397, |
| "step": 76500 |
| }, |
| { |
| "epoch": 24.75759534583064, |
| "grad_norm": 1.655045747756958, |
| "learning_rate": 0.001, |
| "loss": 2.0539, |
| "step": 76600 |
| }, |
| { |
| "epoch": 24.789915966386555, |
| "grad_norm": 1.5840027332305908, |
| "learning_rate": 0.001, |
| "loss": 2.0401, |
| "step": 76700 |
| }, |
| { |
| "epoch": 24.82223658694247, |
| "grad_norm": 1.7858777046203613, |
| "learning_rate": 0.001, |
| "loss": 2.0484, |
| "step": 76800 |
| }, |
| { |
| "epoch": 24.854557207498384, |
| "grad_norm": 1.2337720394134521, |
| "learning_rate": 0.001, |
| "loss": 2.0456, |
| "step": 76900 |
| }, |
| { |
| "epoch": 24.8868778280543, |
| "grad_norm": 1.4665465354919434, |
| "learning_rate": 0.001, |
| "loss": 2.086, |
| "step": 77000 |
| }, |
| { |
| "epoch": 24.919198448610214, |
| "grad_norm": 1.840309977531433, |
| "learning_rate": 0.001, |
| "loss": 2.0634, |
| "step": 77100 |
| }, |
| { |
| "epoch": 24.951519069166128, |
| "grad_norm": 1.268938660621643, |
| "learning_rate": 0.001, |
| "loss": 2.0702, |
| "step": 77200 |
| }, |
| { |
| "epoch": 24.983839689722043, |
| "grad_norm": 1.476412057876587, |
| "learning_rate": 0.001, |
| "loss": 2.066, |
| "step": 77300 |
| }, |
| { |
| "epoch": 25.016160310277957, |
| "grad_norm": 2.2744739055633545, |
| "learning_rate": 0.001, |
| "loss": 1.9527, |
| "step": 77400 |
| }, |
| { |
| "epoch": 25.048480930833872, |
| "grad_norm": 2.6017308235168457, |
| "learning_rate": 0.001, |
| "loss": 1.9749, |
| "step": 77500 |
| }, |
| { |
| "epoch": 25.080801551389786, |
| "grad_norm": 2.104499101638794, |
| "learning_rate": 0.001, |
| "loss": 1.9307, |
| "step": 77600 |
| }, |
| { |
| "epoch": 25.1131221719457, |
| "grad_norm": 2.4056954383850098, |
| "learning_rate": 0.001, |
| "loss": 1.9341, |
| "step": 77700 |
| }, |
| { |
| "epoch": 25.145442792501616, |
| "grad_norm": 2.033249855041504, |
| "learning_rate": 0.001, |
| "loss": 1.9518, |
| "step": 77800 |
| }, |
| { |
| "epoch": 25.17776341305753, |
| "grad_norm": 1.6417148113250732, |
| "learning_rate": 0.001, |
| "loss": 1.9826, |
| "step": 77900 |
| }, |
| { |
| "epoch": 25.210084033613445, |
| "grad_norm": 1.9145283699035645, |
| "learning_rate": 0.001, |
| "loss": 1.9765, |
| "step": 78000 |
| }, |
| { |
| "epoch": 25.24240465416936, |
| "grad_norm": 1.8713525533676147, |
| "learning_rate": 0.001, |
| "loss": 1.964, |
| "step": 78100 |
| }, |
| { |
| "epoch": 25.274725274725274, |
| "grad_norm": 1.9054548740386963, |
| "learning_rate": 0.001, |
| "loss": 1.9553, |
| "step": 78200 |
| }, |
| { |
| "epoch": 25.30704589528119, |
| "grad_norm": 1.673055648803711, |
| "learning_rate": 0.001, |
| "loss": 1.9904, |
| "step": 78300 |
| }, |
| { |
| "epoch": 25.339366515837103, |
| "grad_norm": 2.0248286724090576, |
| "learning_rate": 0.001, |
| "loss": 1.9886, |
| "step": 78400 |
| }, |
| { |
| "epoch": 25.371687136393017, |
| "grad_norm": 1.8936235904693604, |
| "learning_rate": 0.001, |
| "loss": 1.9842, |
| "step": 78500 |
| }, |
| { |
| "epoch": 25.404007756948932, |
| "grad_norm": 2.2731919288635254, |
| "learning_rate": 0.001, |
| "loss": 1.9873, |
| "step": 78600 |
| }, |
| { |
| "epoch": 25.436328377504847, |
| "grad_norm": 1.883629560470581, |
| "learning_rate": 0.001, |
| "loss": 1.9706, |
| "step": 78700 |
| }, |
| { |
| "epoch": 25.46864899806076, |
| "grad_norm": 1.7781099081039429, |
| "learning_rate": 0.001, |
| "loss": 1.9913, |
| "step": 78800 |
| }, |
| { |
| "epoch": 25.50096961861668, |
| "grad_norm": 1.5534332990646362, |
| "learning_rate": 0.001, |
| "loss": 1.9678, |
| "step": 78900 |
| }, |
| { |
| "epoch": 25.533290239172594, |
| "grad_norm": 1.9587346315383911, |
| "learning_rate": 0.001, |
| "loss": 2.0279, |
| "step": 79000 |
| }, |
| { |
| "epoch": 25.56561085972851, |
| "grad_norm": 1.8949452638626099, |
| "learning_rate": 0.001, |
| "loss": 1.9936, |
| "step": 79100 |
| }, |
| { |
| "epoch": 25.597931480284423, |
| "grad_norm": 1.916925311088562, |
| "learning_rate": 0.001, |
| "loss": 2.0152, |
| "step": 79200 |
| }, |
| { |
| "epoch": 25.630252100840337, |
| "grad_norm": 2.057035446166992, |
| "learning_rate": 0.001, |
| "loss": 1.9873, |
| "step": 79300 |
| }, |
| { |
| "epoch": 25.662572721396252, |
| "grad_norm": 2.0889744758605957, |
| "learning_rate": 0.001, |
| "loss": 2.0126, |
| "step": 79400 |
| }, |
| { |
| "epoch": 25.694893341952167, |
| "grad_norm": 2.0734827518463135, |
| "learning_rate": 0.001, |
| "loss": 2.0183, |
| "step": 79500 |
| }, |
| { |
| "epoch": 25.72721396250808, |
| "grad_norm": 1.9695285558700562, |
| "learning_rate": 0.001, |
| "loss": 2.0178, |
| "step": 79600 |
| }, |
| { |
| "epoch": 25.759534583063996, |
| "grad_norm": 1.844465970993042, |
| "learning_rate": 0.001, |
| "loss": 2.0045, |
| "step": 79700 |
| }, |
| { |
| "epoch": 25.79185520361991, |
| "grad_norm": 2.1013882160186768, |
| "learning_rate": 0.001, |
| "loss": 2.0245, |
| "step": 79800 |
| }, |
| { |
| "epoch": 25.824175824175825, |
| "grad_norm": 2.3713462352752686, |
| "learning_rate": 0.001, |
| "loss": 2.0091, |
| "step": 79900 |
| }, |
| { |
| "epoch": 25.85649644473174, |
| "grad_norm": 1.981223464012146, |
| "learning_rate": 0.001, |
| "loss": 2.0356, |
| "step": 80000 |
| }, |
| { |
| "epoch": 25.888817065287654, |
| "grad_norm": 2.089397668838501, |
| "learning_rate": 0.001, |
| "loss": 2.0271, |
| "step": 80100 |
| }, |
| { |
| "epoch": 25.92113768584357, |
| "grad_norm": 2.173301935195923, |
| "learning_rate": 0.001, |
| "loss": 2.0314, |
| "step": 80200 |
| }, |
| { |
| "epoch": 25.953458306399483, |
| "grad_norm": 1.814848780632019, |
| "learning_rate": 0.001, |
| "loss": 2.0307, |
| "step": 80300 |
| }, |
| { |
| "epoch": 25.985778926955398, |
| "grad_norm": 2.236610174179077, |
| "learning_rate": 0.001, |
| "loss": 2.0317, |
| "step": 80400 |
| }, |
| { |
| "epoch": 26.018099547511312, |
| "grad_norm": 1.870568037033081, |
| "learning_rate": 0.001, |
| "loss": 1.9713, |
| "step": 80500 |
| }, |
| { |
| "epoch": 26.050420168067227, |
| "grad_norm": 1.495255947113037, |
| "learning_rate": 0.001, |
| "loss": 1.9004, |
| "step": 80600 |
| }, |
| { |
| "epoch": 26.08274078862314, |
| "grad_norm": 1.3151029348373413, |
| "learning_rate": 0.001, |
| "loss": 1.9098, |
| "step": 80700 |
| }, |
| { |
| "epoch": 26.115061409179056, |
| "grad_norm": 1.3545511960983276, |
| "learning_rate": 0.001, |
| "loss": 1.9286, |
| "step": 80800 |
| }, |
| { |
| "epoch": 26.14738202973497, |
| "grad_norm": 1.536292552947998, |
| "learning_rate": 0.001, |
| "loss": 1.9249, |
| "step": 80900 |
| }, |
| { |
| "epoch": 26.179702650290885, |
| "grad_norm": 1.2461029291152954, |
| "learning_rate": 0.001, |
| "loss": 1.9315, |
| "step": 81000 |
| }, |
| { |
| "epoch": 26.2120232708468, |
| "grad_norm": 1.3342663049697876, |
| "learning_rate": 0.001, |
| "loss": 1.9299, |
| "step": 81100 |
| }, |
| { |
| "epoch": 26.244343891402714, |
| "grad_norm": 1.148798942565918, |
| "learning_rate": 0.001, |
| "loss": 1.9422, |
| "step": 81200 |
| }, |
| { |
| "epoch": 26.27666451195863, |
| "grad_norm": 1.2964235544204712, |
| "learning_rate": 0.001, |
| "loss": 1.9585, |
| "step": 81300 |
| }, |
| { |
| "epoch": 26.308985132514543, |
| "grad_norm": 1.3100039958953857, |
| "learning_rate": 0.001, |
| "loss": 1.9527, |
| "step": 81400 |
| }, |
| { |
| "epoch": 26.341305753070458, |
| "grad_norm": 1.6557549238204956, |
| "learning_rate": 0.001, |
| "loss": 1.9658, |
| "step": 81500 |
| }, |
| { |
| "epoch": 26.373626373626372, |
| "grad_norm": 1.9575376510620117, |
| "learning_rate": 0.001, |
| "loss": 1.9577, |
| "step": 81600 |
| }, |
| { |
| "epoch": 26.405946994182287, |
| "grad_norm": 1.3891215324401855, |
| "learning_rate": 0.001, |
| "loss": 1.9592, |
| "step": 81700 |
| }, |
| { |
| "epoch": 26.4382676147382, |
| "grad_norm": 1.4893115758895874, |
| "learning_rate": 0.001, |
| "loss": 1.9441, |
| "step": 81800 |
| }, |
| { |
| "epoch": 26.470588235294116, |
| "grad_norm": 1.5343430042266846, |
| "learning_rate": 0.001, |
| "loss": 1.9507, |
| "step": 81900 |
| }, |
| { |
| "epoch": 26.50290885585003, |
| "grad_norm": 1.5884416103363037, |
| "learning_rate": 0.001, |
| "loss": 1.9655, |
| "step": 82000 |
| }, |
| { |
| "epoch": 26.53522947640595, |
| "grad_norm": 1.4402801990509033, |
| "learning_rate": 0.001, |
| "loss": 1.9629, |
| "step": 82100 |
| }, |
| { |
| "epoch": 26.567550096961863, |
| "grad_norm": 1.8175873756408691, |
| "learning_rate": 0.001, |
| "loss": 1.9686, |
| "step": 82200 |
| }, |
| { |
| "epoch": 26.599870717517778, |
| "grad_norm": 1.0564377307891846, |
| "learning_rate": 0.001, |
| "loss": 1.977, |
| "step": 82300 |
| }, |
| { |
| "epoch": 26.632191338073692, |
| "grad_norm": 1.5843056440353394, |
| "learning_rate": 0.001, |
| "loss": 1.9762, |
| "step": 82400 |
| }, |
| { |
| "epoch": 26.664511958629607, |
| "grad_norm": 1.2666752338409424, |
| "learning_rate": 0.001, |
| "loss": 1.9911, |
| "step": 82500 |
| }, |
| { |
| "epoch": 26.69683257918552, |
| "grad_norm": 1.8625491857528687, |
| "learning_rate": 0.001, |
| "loss": 1.9856, |
| "step": 82600 |
| }, |
| { |
| "epoch": 26.729153199741436, |
| "grad_norm": 1.5447437763214111, |
| "learning_rate": 0.001, |
| "loss": 1.9967, |
| "step": 82700 |
| }, |
| { |
| "epoch": 26.76147382029735, |
| "grad_norm": 1.376990795135498, |
| "learning_rate": 0.001, |
| "loss": 1.9884, |
| "step": 82800 |
| }, |
| { |
| "epoch": 26.793794440853265, |
| "grad_norm": 1.804949164390564, |
| "learning_rate": 0.001, |
| "loss": 2.005, |
| "step": 82900 |
| }, |
| { |
| "epoch": 26.82611506140918, |
| "grad_norm": 1.2310376167297363, |
| "learning_rate": 0.001, |
| "loss": 2.0221, |
| "step": 83000 |
| }, |
| { |
| "epoch": 26.858435681965094, |
| "grad_norm": 1.287807583808899, |
| "learning_rate": 0.001, |
| "loss": 2.0364, |
| "step": 83100 |
| }, |
| { |
| "epoch": 26.89075630252101, |
| "grad_norm": 1.3025513887405396, |
| "learning_rate": 0.001, |
| "loss": 2.0196, |
| "step": 83200 |
| }, |
| { |
| "epoch": 26.923076923076923, |
| "grad_norm": 1.4322696924209595, |
| "learning_rate": 0.001, |
| "loss": 1.9999, |
| "step": 83300 |
| }, |
| { |
| "epoch": 26.955397543632838, |
| "grad_norm": 1.3139070272445679, |
| "learning_rate": 0.001, |
| "loss": 2.015, |
| "step": 83400 |
| }, |
| { |
| "epoch": 26.987718164188752, |
| "grad_norm": 1.5377211570739746, |
| "learning_rate": 0.001, |
| "loss": 2.0096, |
| "step": 83500 |
| }, |
| { |
| "epoch": 27.020038784744667, |
| "grad_norm": 1.1557625532150269, |
| "learning_rate": 0.001, |
| "loss": 1.9483, |
| "step": 83600 |
| }, |
| { |
| "epoch": 27.05235940530058, |
| "grad_norm": 1.446763277053833, |
| "learning_rate": 0.001, |
| "loss": 1.8683, |
| "step": 83700 |
| }, |
| { |
| "epoch": 27.084680025856496, |
| "grad_norm": 1.820147156715393, |
| "learning_rate": 0.001, |
| "loss": 1.8726, |
| "step": 83800 |
| }, |
| { |
| "epoch": 27.11700064641241, |
| "grad_norm": 1.3174505233764648, |
| "learning_rate": 0.001, |
| "loss": 1.9013, |
| "step": 83900 |
| }, |
| { |
| "epoch": 27.149321266968325, |
| "grad_norm": 1.4255985021591187, |
| "learning_rate": 0.001, |
| "loss": 1.9021, |
| "step": 84000 |
| }, |
| { |
| "epoch": 27.18164188752424, |
| "grad_norm": 1.037857174873352, |
| "learning_rate": 0.001, |
| "loss": 1.8889, |
| "step": 84100 |
| }, |
| { |
| "epoch": 27.213962508080154, |
| "grad_norm": 1.2322443723678589, |
| "learning_rate": 0.001, |
| "loss": 1.919, |
| "step": 84200 |
| }, |
| { |
| "epoch": 27.24628312863607, |
| "grad_norm": 1.1215150356292725, |
| "learning_rate": 0.001, |
| "loss": 1.8952, |
| "step": 84300 |
| }, |
| { |
| "epoch": 27.278603749191983, |
| "grad_norm": 1.5352097749710083, |
| "learning_rate": 0.001, |
| "loss": 1.9379, |
| "step": 84400 |
| }, |
| { |
| "epoch": 27.310924369747898, |
| "grad_norm": 1.6588249206542969, |
| "learning_rate": 0.001, |
| "loss": 1.9141, |
| "step": 84500 |
| }, |
| { |
| "epoch": 27.343244990303813, |
| "grad_norm": 1.0232058763504028, |
| "learning_rate": 0.001, |
| "loss": 1.9484, |
| "step": 84600 |
| }, |
| { |
| "epoch": 27.375565610859727, |
| "grad_norm": 1.1663068532943726, |
| "learning_rate": 0.001, |
| "loss": 1.9368, |
| "step": 84700 |
| }, |
| { |
| "epoch": 27.40788623141564, |
| "grad_norm": 1.4564636945724487, |
| "learning_rate": 0.001, |
| "loss": 1.9377, |
| "step": 84800 |
| }, |
| { |
| "epoch": 27.440206851971556, |
| "grad_norm": 1.569733738899231, |
| "learning_rate": 0.001, |
| "loss": 1.9459, |
| "step": 84900 |
| }, |
| { |
| "epoch": 27.47252747252747, |
| "grad_norm": 1.4780761003494263, |
| "learning_rate": 0.001, |
| "loss": 1.9596, |
| "step": 85000 |
| }, |
| { |
| "epoch": 27.50484809308339, |
| "grad_norm": 1.6503818035125732, |
| "learning_rate": 0.001, |
| "loss": 1.9514, |
| "step": 85100 |
| }, |
| { |
| "epoch": 27.537168713639304, |
| "grad_norm": 1.4085288047790527, |
| "learning_rate": 0.001, |
| "loss": 1.9569, |
| "step": 85200 |
| }, |
| { |
| "epoch": 27.569489334195218, |
| "grad_norm": 1.341328501701355, |
| "learning_rate": 0.001, |
| "loss": 1.9578, |
| "step": 85300 |
| }, |
| { |
| "epoch": 27.601809954751133, |
| "grad_norm": 1.232920527458191, |
| "learning_rate": 0.001, |
| "loss": 1.9485, |
| "step": 85400 |
| }, |
| { |
| "epoch": 27.634130575307047, |
| "grad_norm": 1.274560809135437, |
| "learning_rate": 0.001, |
| "loss": 1.9575, |
| "step": 85500 |
| }, |
| { |
| "epoch": 27.66645119586296, |
| "grad_norm": 1.2799341678619385, |
| "learning_rate": 0.001, |
| "loss": 1.9717, |
| "step": 85600 |
| }, |
| { |
| "epoch": 27.698771816418876, |
| "grad_norm": 1.42722749710083, |
| "learning_rate": 0.001, |
| "loss": 1.9646, |
| "step": 85700 |
| }, |
| { |
| "epoch": 27.73109243697479, |
| "grad_norm": 1.578813076019287, |
| "learning_rate": 0.001, |
| "loss": 1.965, |
| "step": 85800 |
| }, |
| { |
| "epoch": 27.763413057530705, |
| "grad_norm": 1.4803584814071655, |
| "learning_rate": 0.001, |
| "loss": 1.9622, |
| "step": 85900 |
| }, |
| { |
| "epoch": 27.79573367808662, |
| "grad_norm": 1.6228522062301636, |
| "learning_rate": 0.001, |
| "loss": 1.9464, |
| "step": 86000 |
| }, |
| { |
| "epoch": 27.828054298642535, |
| "grad_norm": 1.2919968366622925, |
| "learning_rate": 0.001, |
| "loss": 1.9683, |
| "step": 86100 |
| }, |
| { |
| "epoch": 27.86037491919845, |
| "grad_norm": 1.221200942993164, |
| "learning_rate": 0.001, |
| "loss": 1.9735, |
| "step": 86200 |
| }, |
| { |
| "epoch": 27.892695539754364, |
| "grad_norm": 1.462760090827942, |
| "learning_rate": 0.001, |
| "loss": 1.9756, |
| "step": 86300 |
| }, |
| { |
| "epoch": 27.92501616031028, |
| "grad_norm": 0.960984468460083, |
| "learning_rate": 0.001, |
| "loss": 1.9819, |
| "step": 86400 |
| }, |
| { |
| "epoch": 27.957336780866193, |
| "grad_norm": 1.4694859981536865, |
| "learning_rate": 0.001, |
| "loss": 1.9928, |
| "step": 86500 |
| }, |
| { |
| "epoch": 27.989657401422107, |
| "grad_norm": 1.3951972723007202, |
| "learning_rate": 0.001, |
| "loss": 1.9907, |
| "step": 86600 |
| }, |
| { |
| "epoch": 28.021978021978022, |
| "grad_norm": 1.2100982666015625, |
| "learning_rate": 0.001, |
| "loss": 1.9312, |
| "step": 86700 |
| }, |
| { |
| "epoch": 28.054298642533936, |
| "grad_norm": 1.3661807775497437, |
| "learning_rate": 0.001, |
| "loss": 1.8497, |
| "step": 86800 |
| }, |
| { |
| "epoch": 28.08661926308985, |
| "grad_norm": 1.0485259294509888, |
| "learning_rate": 0.001, |
| "loss": 1.8621, |
| "step": 86900 |
| }, |
| { |
| "epoch": 28.118939883645766, |
| "grad_norm": 1.33681321144104, |
| "learning_rate": 0.001, |
| "loss": 1.8819, |
| "step": 87000 |
| }, |
| { |
| "epoch": 28.15126050420168, |
| "grad_norm": 1.643082857131958, |
| "learning_rate": 0.001, |
| "loss": 1.8681, |
| "step": 87100 |
| }, |
| { |
| "epoch": 28.183581124757595, |
| "grad_norm": 1.5186346769332886, |
| "learning_rate": 0.001, |
| "loss": 1.8834, |
| "step": 87200 |
| }, |
| { |
| "epoch": 28.21590174531351, |
| "grad_norm": 0.9780005812644958, |
| "learning_rate": 0.001, |
| "loss": 1.8804, |
| "step": 87300 |
| }, |
| { |
| "epoch": 28.248222365869424, |
| "grad_norm": 1.3443195819854736, |
| "learning_rate": 0.001, |
| "loss": 1.8871, |
| "step": 87400 |
| }, |
| { |
| "epoch": 28.28054298642534, |
| "grad_norm": 1.2283108234405518, |
| "learning_rate": 0.001, |
| "loss": 1.9005, |
| "step": 87500 |
| }, |
| { |
| "epoch": 28.312863606981253, |
| "grad_norm": 1.2878503799438477, |
| "learning_rate": 0.001, |
| "loss": 1.9133, |
| "step": 87600 |
| }, |
| { |
| "epoch": 28.345184227537167, |
| "grad_norm": 1.4921066761016846, |
| "learning_rate": 0.001, |
| "loss": 1.8821, |
| "step": 87700 |
| }, |
| { |
| "epoch": 28.377504848093082, |
| "grad_norm": 1.0940135717391968, |
| "learning_rate": 0.001, |
| "loss": 1.9035, |
| "step": 87800 |
| }, |
| { |
| "epoch": 28.409825468648997, |
| "grad_norm": 1.5041978359222412, |
| "learning_rate": 0.001, |
| "loss": 1.9182, |
| "step": 87900 |
| }, |
| { |
| "epoch": 28.44214608920491, |
| "grad_norm": 1.3685961961746216, |
| "learning_rate": 0.001, |
| "loss": 1.9104, |
| "step": 88000 |
| }, |
| { |
| "epoch": 28.474466709760826, |
| "grad_norm": 0.9715288281440735, |
| "learning_rate": 0.001, |
| "loss": 1.8933, |
| "step": 88100 |
| }, |
| { |
| "epoch": 28.50678733031674, |
| "grad_norm": 1.1276092529296875, |
| "learning_rate": 0.001, |
| "loss": 1.9218, |
| "step": 88200 |
| }, |
| { |
| "epoch": 28.53910795087266, |
| "grad_norm": 1.2015578746795654, |
| "learning_rate": 0.001, |
| "loss": 1.9177, |
| "step": 88300 |
| }, |
| { |
| "epoch": 28.571428571428573, |
| "grad_norm": 1.405713438987732, |
| "learning_rate": 0.001, |
| "loss": 1.9161, |
| "step": 88400 |
| }, |
| { |
| "epoch": 28.603749191984488, |
| "grad_norm": 1.2195446491241455, |
| "learning_rate": 0.001, |
| "loss": 1.9379, |
| "step": 88500 |
| }, |
| { |
| "epoch": 28.636069812540402, |
| "grad_norm": 1.1534994840621948, |
| "learning_rate": 0.001, |
| "loss": 1.9198, |
| "step": 88600 |
| }, |
| { |
| "epoch": 28.668390433096317, |
| "grad_norm": 1.2401294708251953, |
| "learning_rate": 0.001, |
| "loss": 1.9546, |
| "step": 88700 |
| }, |
| { |
| "epoch": 28.70071105365223, |
| "grad_norm": 1.0914874076843262, |
| "learning_rate": 0.001, |
| "loss": 1.9235, |
| "step": 88800 |
| }, |
| { |
| "epoch": 28.733031674208146, |
| "grad_norm": 1.068054437637329, |
| "learning_rate": 0.001, |
| "loss": 1.9317, |
| "step": 88900 |
| }, |
| { |
| "epoch": 28.76535229476406, |
| "grad_norm": 1.1873078346252441, |
| "learning_rate": 0.001, |
| "loss": 1.9632, |
| "step": 89000 |
| }, |
| { |
| "epoch": 28.797672915319975, |
| "grad_norm": 1.529497504234314, |
| "learning_rate": 0.001, |
| "loss": 1.9459, |
| "step": 89100 |
| }, |
| { |
| "epoch": 28.82999353587589, |
| "grad_norm": 1.1832150220870972, |
| "learning_rate": 0.001, |
| "loss": 1.9613, |
| "step": 89200 |
| }, |
| { |
| "epoch": 28.862314156431804, |
| "grad_norm": 1.134078860282898, |
| "learning_rate": 0.001, |
| "loss": 1.9669, |
| "step": 89300 |
| }, |
| { |
| "epoch": 28.89463477698772, |
| "grad_norm": 1.0518510341644287, |
| "learning_rate": 0.001, |
| "loss": 1.9613, |
| "step": 89400 |
| }, |
| { |
| "epoch": 28.926955397543633, |
| "grad_norm": 1.4630978107452393, |
| "learning_rate": 0.001, |
| "loss": 1.9742, |
| "step": 89500 |
| }, |
| { |
| "epoch": 28.959276018099548, |
| "grad_norm": 1.2870162725448608, |
| "learning_rate": 0.001, |
| "loss": 1.9635, |
| "step": 89600 |
| }, |
| { |
| "epoch": 28.991596638655462, |
| "grad_norm": 1.2716412544250488, |
| "learning_rate": 0.001, |
| "loss": 1.9711, |
| "step": 89700 |
| }, |
| { |
| "epoch": 29.023917259211377, |
| "grad_norm": 1.6266603469848633, |
| "learning_rate": 0.001, |
| "loss": 1.8602, |
| "step": 89800 |
| }, |
| { |
| "epoch": 29.05623787976729, |
| "grad_norm": 1.565943956375122, |
| "learning_rate": 0.001, |
| "loss": 1.8294, |
| "step": 89900 |
| }, |
| { |
| "epoch": 29.088558500323206, |
| "grad_norm": 1.1085901260375977, |
| "learning_rate": 0.001, |
| "loss": 1.8302, |
| "step": 90000 |
| }, |
| { |
| "epoch": 29.12087912087912, |
| "grad_norm": 1.1395728588104248, |
| "learning_rate": 0.001, |
| "loss": 1.8454, |
| "step": 90100 |
| }, |
| { |
| "epoch": 29.153199741435035, |
| "grad_norm": 1.566756248474121, |
| "learning_rate": 0.001, |
| "loss": 1.8563, |
| "step": 90200 |
| }, |
| { |
| "epoch": 29.18552036199095, |
| "grad_norm": 0.9699623584747314, |
| "learning_rate": 0.001, |
| "loss": 1.8623, |
| "step": 90300 |
| }, |
| { |
| "epoch": 29.217840982546864, |
| "grad_norm": 1.4705579280853271, |
| "learning_rate": 0.001, |
| "loss": 1.8527, |
| "step": 90400 |
| }, |
| { |
| "epoch": 29.25016160310278, |
| "grad_norm": 1.355370044708252, |
| "learning_rate": 0.001, |
| "loss": 1.893, |
| "step": 90500 |
| }, |
| { |
| "epoch": 29.282482223658693, |
| "grad_norm": 1.3909432888031006, |
| "learning_rate": 0.001, |
| "loss": 1.8812, |
| "step": 90600 |
| }, |
| { |
| "epoch": 29.314802844214608, |
| "grad_norm": 1.1816900968551636, |
| "learning_rate": 0.001, |
| "loss": 1.8655, |
| "step": 90700 |
| }, |
| { |
| "epoch": 29.347123464770522, |
| "grad_norm": 1.5452797412872314, |
| "learning_rate": 0.001, |
| "loss": 1.877, |
| "step": 90800 |
| }, |
| { |
| "epoch": 29.379444085326437, |
| "grad_norm": 1.3184372186660767, |
| "learning_rate": 0.001, |
| "loss": 1.8757, |
| "step": 90900 |
| }, |
| { |
| "epoch": 29.41176470588235, |
| "grad_norm": 1.2689801454544067, |
| "learning_rate": 0.001, |
| "loss": 1.8703, |
| "step": 91000 |
| }, |
| { |
| "epoch": 29.444085326438266, |
| "grad_norm": 1.005915641784668, |
| "learning_rate": 0.001, |
| "loss": 1.8925, |
| "step": 91100 |
| }, |
| { |
| "epoch": 29.47640594699418, |
| "grad_norm": 1.5966804027557373, |
| "learning_rate": 0.001, |
| "loss": 1.8971, |
| "step": 91200 |
| }, |
| { |
| "epoch": 29.5087265675501, |
| "grad_norm": 1.195678949356079, |
| "learning_rate": 0.001, |
| "loss": 1.911, |
| "step": 91300 |
| }, |
| { |
| "epoch": 29.541047188106013, |
| "grad_norm": 1.8636404275894165, |
| "learning_rate": 0.001, |
| "loss": 1.9171, |
| "step": 91400 |
| }, |
| { |
| "epoch": 29.573367808661928, |
| "grad_norm": 1.2436506748199463, |
| "learning_rate": 0.001, |
| "loss": 1.8951, |
| "step": 91500 |
| }, |
| { |
| "epoch": 29.605688429217842, |
| "grad_norm": 1.3966453075408936, |
| "learning_rate": 0.001, |
| "loss": 1.9076, |
| "step": 91600 |
| }, |
| { |
| "epoch": 29.638009049773757, |
| "grad_norm": 1.2091529369354248, |
| "learning_rate": 0.001, |
| "loss": 1.9215, |
| "step": 91700 |
| }, |
| { |
| "epoch": 29.67032967032967, |
| "grad_norm": 1.365408182144165, |
| "learning_rate": 0.001, |
| "loss": 1.9221, |
| "step": 91800 |
| }, |
| { |
| "epoch": 29.702650290885586, |
| "grad_norm": 1.1305558681488037, |
| "learning_rate": 0.001, |
| "loss": 1.9288, |
| "step": 91900 |
| }, |
| { |
| "epoch": 29.7349709114415, |
| "grad_norm": 1.3631125688552856, |
| "learning_rate": 0.001, |
| "loss": 1.9105, |
| "step": 92000 |
| }, |
| { |
| "epoch": 29.767291531997415, |
| "grad_norm": 1.4561924934387207, |
| "learning_rate": 0.001, |
| "loss": 1.9325, |
| "step": 92100 |
| }, |
| { |
| "epoch": 29.79961215255333, |
| "grad_norm": 1.1379411220550537, |
| "learning_rate": 0.001, |
| "loss": 1.9197, |
| "step": 92200 |
| }, |
| { |
| "epoch": 29.831932773109244, |
| "grad_norm": 1.0893300771713257, |
| "learning_rate": 0.001, |
| "loss": 1.9179, |
| "step": 92300 |
| }, |
| { |
| "epoch": 29.86425339366516, |
| "grad_norm": 1.2213011980056763, |
| "learning_rate": 0.001, |
| "loss": 1.942, |
| "step": 92400 |
| }, |
| { |
| "epoch": 29.896574014221073, |
| "grad_norm": 1.494968056678772, |
| "learning_rate": 0.001, |
| "loss": 1.9272, |
| "step": 92500 |
| }, |
| { |
| "epoch": 29.928894634776988, |
| "grad_norm": 1.1858004331588745, |
| "learning_rate": 0.001, |
| "loss": 1.9369, |
| "step": 92600 |
| }, |
| { |
| "epoch": 29.961215255332903, |
| "grad_norm": 1.262840747833252, |
| "learning_rate": 0.001, |
| "loss": 1.9503, |
| "step": 92700 |
| }, |
| { |
| "epoch": 29.993535875888817, |
| "grad_norm": 1.2398407459259033, |
| "learning_rate": 0.001, |
| "loss": 1.9521, |
| "step": 92800 |
| }, |
| { |
| "epoch": 30.02585649644473, |
| "grad_norm": 1.3323872089385986, |
| "learning_rate": 0.001, |
| "loss": 1.8326, |
| "step": 92900 |
| }, |
| { |
| "epoch": 30.058177117000646, |
| "grad_norm": 1.5650134086608887, |
| "learning_rate": 0.001, |
| "loss": 1.8103, |
| "step": 93000 |
| }, |
| { |
| "epoch": 30.09049773755656, |
| "grad_norm": 1.6720815896987915, |
| "learning_rate": 0.001, |
| "loss": 1.8213, |
| "step": 93100 |
| }, |
| { |
| "epoch": 30.122818358112475, |
| "grad_norm": 1.2393896579742432, |
| "learning_rate": 0.001, |
| "loss": 1.8266, |
| "step": 93200 |
| }, |
| { |
| "epoch": 30.15513897866839, |
| "grad_norm": 1.468858242034912, |
| "learning_rate": 0.001, |
| "loss": 1.8387, |
| "step": 93300 |
| }, |
| { |
| "epoch": 30.187459599224304, |
| "grad_norm": 1.7441996335983276, |
| "learning_rate": 0.001, |
| "loss": 1.8402, |
| "step": 93400 |
| }, |
| { |
| "epoch": 30.21978021978022, |
| "grad_norm": 1.106845498085022, |
| "learning_rate": 0.001, |
| "loss": 1.8426, |
| "step": 93500 |
| }, |
| { |
| "epoch": 30.252100840336134, |
| "grad_norm": 1.4239120483398438, |
| "learning_rate": 0.001, |
| "loss": 1.8457, |
| "step": 93600 |
| }, |
| { |
| "epoch": 30.284421460892048, |
| "grad_norm": 1.1072075366973877, |
| "learning_rate": 0.001, |
| "loss": 1.8712, |
| "step": 93700 |
| }, |
| { |
| "epoch": 30.316742081447963, |
| "grad_norm": 1.4010498523712158, |
| "learning_rate": 0.001, |
| "loss": 1.8449, |
| "step": 93800 |
| }, |
| { |
| "epoch": 30.349062702003877, |
| "grad_norm": 1.7070467472076416, |
| "learning_rate": 0.001, |
| "loss": 1.8591, |
| "step": 93900 |
| }, |
| { |
| "epoch": 30.381383322559792, |
| "grad_norm": 1.3719133138656616, |
| "learning_rate": 0.001, |
| "loss": 1.8659, |
| "step": 94000 |
| }, |
| { |
| "epoch": 30.413703943115706, |
| "grad_norm": 1.2224758863449097, |
| "learning_rate": 0.001, |
| "loss": 1.8523, |
| "step": 94100 |
| }, |
| { |
| "epoch": 30.44602456367162, |
| "grad_norm": 1.2274152040481567, |
| "learning_rate": 0.001, |
| "loss": 1.8777, |
| "step": 94200 |
| }, |
| { |
| "epoch": 30.478345184227535, |
| "grad_norm": 1.23996901512146, |
| "learning_rate": 0.001, |
| "loss": 1.8823, |
| "step": 94300 |
| }, |
| { |
| "epoch": 30.51066580478345, |
| "grad_norm": 1.41676926612854, |
| "learning_rate": 0.001, |
| "loss": 1.8862, |
| "step": 94400 |
| }, |
| { |
| "epoch": 30.542986425339368, |
| "grad_norm": 1.3740698099136353, |
| "learning_rate": 0.001, |
| "loss": 1.8776, |
| "step": 94500 |
| }, |
| { |
| "epoch": 30.575307045895283, |
| "grad_norm": 1.242758870124817, |
| "learning_rate": 0.001, |
| "loss": 1.8893, |
| "step": 94600 |
| }, |
| { |
| "epoch": 30.607627666451197, |
| "grad_norm": 1.2667688131332397, |
| "learning_rate": 0.001, |
| "loss": 1.8815, |
| "step": 94700 |
| }, |
| { |
| "epoch": 30.639948287007112, |
| "grad_norm": 1.2830685377120972, |
| "learning_rate": 0.001, |
| "loss": 1.8888, |
| "step": 94800 |
| }, |
| { |
| "epoch": 30.672268907563026, |
| "grad_norm": 1.6480231285095215, |
| "learning_rate": 0.001, |
| "loss": 1.9095, |
| "step": 94900 |
| }, |
| { |
| "epoch": 30.70458952811894, |
| "grad_norm": 1.4405111074447632, |
| "learning_rate": 0.001, |
| "loss": 1.8965, |
| "step": 95000 |
| }, |
| { |
| "epoch": 30.736910148674855, |
| "grad_norm": 1.1821390390396118, |
| "learning_rate": 0.001, |
| "loss": 1.8876, |
| "step": 95100 |
| }, |
| { |
| "epoch": 30.76923076923077, |
| "grad_norm": 1.060267686843872, |
| "learning_rate": 0.001, |
| "loss": 1.9112, |
| "step": 95200 |
| }, |
| { |
| "epoch": 30.801551389786685, |
| "grad_norm": 1.1657590866088867, |
| "learning_rate": 0.001, |
| "loss": 1.8993, |
| "step": 95300 |
| }, |
| { |
| "epoch": 30.8338720103426, |
| "grad_norm": 1.247584581375122, |
| "learning_rate": 0.001, |
| "loss": 1.8987, |
| "step": 95400 |
| }, |
| { |
| "epoch": 30.866192630898514, |
| "grad_norm": 1.1204445362091064, |
| "learning_rate": 0.001, |
| "loss": 1.9222, |
| "step": 95500 |
| }, |
| { |
| "epoch": 30.89851325145443, |
| "grad_norm": 1.2892838716506958, |
| "learning_rate": 0.001, |
| "loss": 1.8837, |
| "step": 95600 |
| }, |
| { |
| "epoch": 30.930833872010343, |
| "grad_norm": 1.3222150802612305, |
| "learning_rate": 0.001, |
| "loss": 1.9183, |
| "step": 95700 |
| }, |
| { |
| "epoch": 30.963154492566257, |
| "grad_norm": 1.1754868030548096, |
| "learning_rate": 0.001, |
| "loss": 1.8873, |
| "step": 95800 |
| }, |
| { |
| "epoch": 30.995475113122172, |
| "grad_norm": 2.0036423206329346, |
| "learning_rate": 0.001, |
| "loss": 1.8993, |
| "step": 95900 |
| }, |
| { |
| "epoch": 31.027795733678087, |
| "grad_norm": 1.5189635753631592, |
| "learning_rate": 0.001, |
| "loss": 1.8267, |
| "step": 96000 |
| }, |
| { |
| "epoch": 31.060116354234, |
| "grad_norm": 1.5205469131469727, |
| "learning_rate": 0.001, |
| "loss": 1.7728, |
| "step": 96100 |
| }, |
| { |
| "epoch": 31.092436974789916, |
| "grad_norm": 1.5693349838256836, |
| "learning_rate": 0.001, |
| "loss": 1.7906, |
| "step": 96200 |
| }, |
| { |
| "epoch": 31.12475759534583, |
| "grad_norm": 1.1951963901519775, |
| "learning_rate": 0.001, |
| "loss": 1.7979, |
| "step": 96300 |
| }, |
| { |
| "epoch": 31.157078215901745, |
| "grad_norm": 1.4093998670578003, |
| "learning_rate": 0.001, |
| "loss": 1.807, |
| "step": 96400 |
| }, |
| { |
| "epoch": 31.18939883645766, |
| "grad_norm": 1.1882504224777222, |
| "learning_rate": 0.001, |
| "loss": 1.8112, |
| "step": 96500 |
| }, |
| { |
| "epoch": 31.221719457013574, |
| "grad_norm": 1.4225773811340332, |
| "learning_rate": 0.001, |
| "loss": 1.808, |
| "step": 96600 |
| }, |
| { |
| "epoch": 31.25404007756949, |
| "grad_norm": 1.5231945514678955, |
| "learning_rate": 0.001, |
| "loss": 1.817, |
| "step": 96700 |
| }, |
| { |
| "epoch": 31.286360698125403, |
| "grad_norm": 1.1047606468200684, |
| "learning_rate": 0.001, |
| "loss": 1.8177, |
| "step": 96800 |
| }, |
| { |
| "epoch": 31.318681318681318, |
| "grad_norm": 1.7534669637680054, |
| "learning_rate": 0.001, |
| "loss": 1.8447, |
| "step": 96900 |
| }, |
| { |
| "epoch": 31.351001939237232, |
| "grad_norm": 1.1873900890350342, |
| "learning_rate": 0.001, |
| "loss": 1.8534, |
| "step": 97000 |
| }, |
| { |
| "epoch": 31.383322559793147, |
| "grad_norm": 1.5391671657562256, |
| "learning_rate": 0.001, |
| "loss": 1.8465, |
| "step": 97100 |
| }, |
| { |
| "epoch": 31.41564318034906, |
| "grad_norm": 1.2355151176452637, |
| "learning_rate": 0.001, |
| "loss": 1.8367, |
| "step": 97200 |
| }, |
| { |
| "epoch": 31.447963800904976, |
| "grad_norm": 1.5391733646392822, |
| "learning_rate": 0.001, |
| "loss": 1.8533, |
| "step": 97300 |
| }, |
| { |
| "epoch": 31.48028442146089, |
| "grad_norm": 1.5837594270706177, |
| "learning_rate": 0.001, |
| "loss": 1.852, |
| "step": 97400 |
| }, |
| { |
| "epoch": 31.51260504201681, |
| "grad_norm": 1.621614694595337, |
| "learning_rate": 0.001, |
| "loss": 1.8666, |
| "step": 97500 |
| }, |
| { |
| "epoch": 31.544925662572723, |
| "grad_norm": 1.3326274156570435, |
| "learning_rate": 0.001, |
| "loss": 1.8625, |
| "step": 97600 |
| }, |
| { |
| "epoch": 31.577246283128638, |
| "grad_norm": 1.7983978986740112, |
| "learning_rate": 0.001, |
| "loss": 1.855, |
| "step": 97700 |
| }, |
| { |
| "epoch": 31.609566903684552, |
| "grad_norm": 1.7443538904190063, |
| "learning_rate": 0.001, |
| "loss": 1.8759, |
| "step": 97800 |
| }, |
| { |
| "epoch": 31.641887524240467, |
| "grad_norm": 1.2091596126556396, |
| "learning_rate": 0.001, |
| "loss": 1.8732, |
| "step": 97900 |
| }, |
| { |
| "epoch": 31.67420814479638, |
| "grad_norm": 1.17062509059906, |
| "learning_rate": 0.001, |
| "loss": 1.8758, |
| "step": 98000 |
| }, |
| { |
| "epoch": 31.706528765352296, |
| "grad_norm": 1.6161521673202515, |
| "learning_rate": 0.001, |
| "loss": 1.8711, |
| "step": 98100 |
| }, |
| { |
| "epoch": 31.73884938590821, |
| "grad_norm": 1.6055936813354492, |
| "learning_rate": 0.001, |
| "loss": 1.8694, |
| "step": 98200 |
| }, |
| { |
| "epoch": 31.771170006464125, |
| "grad_norm": 1.7089295387268066, |
| "learning_rate": 0.001, |
| "loss": 1.8783, |
| "step": 98300 |
| }, |
| { |
| "epoch": 31.80349062702004, |
| "grad_norm": 1.6026934385299683, |
| "learning_rate": 0.001, |
| "loss": 1.8962, |
| "step": 98400 |
| }, |
| { |
| "epoch": 31.835811247575954, |
| "grad_norm": 1.5359739065170288, |
| "learning_rate": 0.001, |
| "loss": 1.8792, |
| "step": 98500 |
| }, |
| { |
| "epoch": 31.86813186813187, |
| "grad_norm": 1.1889594793319702, |
| "learning_rate": 0.001, |
| "loss": 1.9086, |
| "step": 98600 |
| }, |
| { |
| "epoch": 31.900452488687783, |
| "grad_norm": 1.362362265586853, |
| "learning_rate": 0.001, |
| "loss": 1.8811, |
| "step": 98700 |
| }, |
| { |
| "epoch": 31.932773109243698, |
| "grad_norm": 1.6680059432983398, |
| "learning_rate": 0.001, |
| "loss": 1.8937, |
| "step": 98800 |
| }, |
| { |
| "epoch": 31.965093729799612, |
| "grad_norm": 1.2521178722381592, |
| "learning_rate": 0.001, |
| "loss": 1.8801, |
| "step": 98900 |
| }, |
| { |
| "epoch": 31.997414350355527, |
| "grad_norm": 1.616936206817627, |
| "learning_rate": 0.001, |
| "loss": 1.8914, |
| "step": 99000 |
| }, |
| { |
| "epoch": 32.02973497091144, |
| "grad_norm": 1.6018805503845215, |
| "learning_rate": 0.001, |
| "loss": 1.7961, |
| "step": 99100 |
| }, |
| { |
| "epoch": 32.062055591467356, |
| "grad_norm": 1.572320818901062, |
| "learning_rate": 0.001, |
| "loss": 1.7659, |
| "step": 99200 |
| }, |
| { |
| "epoch": 32.09437621202327, |
| "grad_norm": 1.987259030342102, |
| "learning_rate": 0.001, |
| "loss": 1.7972, |
| "step": 99300 |
| }, |
| { |
| "epoch": 32.126696832579185, |
| "grad_norm": 1.6123111248016357, |
| "learning_rate": 0.001, |
| "loss": 1.7724, |
| "step": 99400 |
| }, |
| { |
| "epoch": 32.1590174531351, |
| "grad_norm": 1.2040059566497803, |
| "learning_rate": 0.001, |
| "loss": 1.7894, |
| "step": 99500 |
| }, |
| { |
| "epoch": 32.191338073691014, |
| "grad_norm": 1.4743309020996094, |
| "learning_rate": 0.001, |
| "loss": 1.8069, |
| "step": 99600 |
| }, |
| { |
| "epoch": 32.22365869424693, |
| "grad_norm": 1.7867310047149658, |
| "learning_rate": 0.001, |
| "loss": 1.8028, |
| "step": 99700 |
| }, |
| { |
| "epoch": 32.25597931480284, |
| "grad_norm": 1.8878384828567505, |
| "learning_rate": 0.001, |
| "loss": 1.8005, |
| "step": 99800 |
| }, |
| { |
| "epoch": 32.28829993535876, |
| "grad_norm": 1.6064954996109009, |
| "learning_rate": 0.001, |
| "loss": 1.8001, |
| "step": 99900 |
| }, |
| { |
| "epoch": 32.32062055591467, |
| "grad_norm": 1.521106243133545, |
| "learning_rate": 0.001, |
| "loss": 1.7954, |
| "step": 100000 |
| }, |
| { |
| "epoch": 32.35294117647059, |
| "grad_norm": 1.6338764429092407, |
| "learning_rate": 0.001, |
| "loss": 1.8231, |
| "step": 100100 |
| }, |
| { |
| "epoch": 32.3852617970265, |
| "grad_norm": 1.635077714920044, |
| "learning_rate": 0.001, |
| "loss": 1.8095, |
| "step": 100200 |
| }, |
| { |
| "epoch": 32.417582417582416, |
| "grad_norm": 1.2968257665634155, |
| "learning_rate": 0.001, |
| "loss": 1.8286, |
| "step": 100300 |
| }, |
| { |
| "epoch": 32.44990303813833, |
| "grad_norm": 1.4315623044967651, |
| "learning_rate": 0.001, |
| "loss": 1.8202, |
| "step": 100400 |
| }, |
| { |
| "epoch": 32.482223658694245, |
| "grad_norm": 1.7625807523727417, |
| "learning_rate": 0.001, |
| "loss": 1.8428, |
| "step": 100500 |
| }, |
| { |
| "epoch": 32.51454427925016, |
| "grad_norm": 1.3693516254425049, |
| "learning_rate": 0.001, |
| "loss": 1.8412, |
| "step": 100600 |
| }, |
| { |
| "epoch": 32.546864899806074, |
| "grad_norm": 1.441098690032959, |
| "learning_rate": 0.001, |
| "loss": 1.8156, |
| "step": 100700 |
| }, |
| { |
| "epoch": 32.57918552036199, |
| "grad_norm": 1.4699690341949463, |
| "learning_rate": 0.001, |
| "loss": 1.8586, |
| "step": 100800 |
| }, |
| { |
| "epoch": 32.6115061409179, |
| "grad_norm": 2.550903558731079, |
| "learning_rate": 0.001, |
| "loss": 1.8482, |
| "step": 100900 |
| }, |
| { |
| "epoch": 32.64382676147382, |
| "grad_norm": 1.6919152736663818, |
| "learning_rate": 0.001, |
| "loss": 1.8602, |
| "step": 101000 |
| }, |
| { |
| "epoch": 32.67614738202973, |
| "grad_norm": 1.5435961484909058, |
| "learning_rate": 0.001, |
| "loss": 1.8381, |
| "step": 101100 |
| }, |
| { |
| "epoch": 32.70846800258565, |
| "grad_norm": 1.6188323497772217, |
| "learning_rate": 0.001, |
| "loss": 1.8566, |
| "step": 101200 |
| }, |
| { |
| "epoch": 32.74078862314156, |
| "grad_norm": 1.7124555110931396, |
| "learning_rate": 0.001, |
| "loss": 1.8561, |
| "step": 101300 |
| }, |
| { |
| "epoch": 32.773109243697476, |
| "grad_norm": 1.8261075019836426, |
| "learning_rate": 0.001, |
| "loss": 1.8466, |
| "step": 101400 |
| }, |
| { |
| "epoch": 32.80542986425339, |
| "grad_norm": 1.6595410108566284, |
| "learning_rate": 0.001, |
| "loss": 1.858, |
| "step": 101500 |
| }, |
| { |
| "epoch": 32.837750484809305, |
| "grad_norm": 1.3070226907730103, |
| "learning_rate": 0.001, |
| "loss": 1.8703, |
| "step": 101600 |
| }, |
| { |
| "epoch": 32.87007110536522, |
| "grad_norm": 1.761932373046875, |
| "learning_rate": 0.001, |
| "loss": 1.8731, |
| "step": 101700 |
| }, |
| { |
| "epoch": 32.902391725921134, |
| "grad_norm": 1.431853175163269, |
| "learning_rate": 0.001, |
| "loss": 1.8585, |
| "step": 101800 |
| }, |
| { |
| "epoch": 32.93471234647705, |
| "grad_norm": 1.4691671133041382, |
| "learning_rate": 0.001, |
| "loss": 1.8466, |
| "step": 101900 |
| }, |
| { |
| "epoch": 32.967032967032964, |
| "grad_norm": 1.5675320625305176, |
| "learning_rate": 0.001, |
| "loss": 1.8724, |
| "step": 102000 |
| }, |
| { |
| "epoch": 32.999353587588885, |
| "grad_norm": 1.831527829170227, |
| "learning_rate": 0.001, |
| "loss": 1.8737, |
| "step": 102100 |
| }, |
| { |
| "epoch": 33.0316742081448, |
| "grad_norm": 1.768676519393921, |
| "learning_rate": 0.001, |
| "loss": 1.7363, |
| "step": 102200 |
| }, |
| { |
| "epoch": 33.063994828700714, |
| "grad_norm": 1.7403342723846436, |
| "learning_rate": 0.001, |
| "loss": 1.7487, |
| "step": 102300 |
| }, |
| { |
| "epoch": 33.09631544925663, |
| "grad_norm": 1.5318952798843384, |
| "learning_rate": 0.001, |
| "loss": 1.765, |
| "step": 102400 |
| }, |
| { |
| "epoch": 33.12863606981254, |
| "grad_norm": 1.767539143562317, |
| "learning_rate": 0.001, |
| "loss": 1.7783, |
| "step": 102500 |
| }, |
| { |
| "epoch": 33.16095669036846, |
| "grad_norm": 2.2086029052734375, |
| "learning_rate": 0.001, |
| "loss": 1.7807, |
| "step": 102600 |
| }, |
| { |
| "epoch": 33.19327731092437, |
| "grad_norm": 2.058333396911621, |
| "learning_rate": 0.001, |
| "loss": 1.7926, |
| "step": 102700 |
| }, |
| { |
| "epoch": 33.22559793148029, |
| "grad_norm": 1.7875993251800537, |
| "learning_rate": 0.001, |
| "loss": 1.7916, |
| "step": 102800 |
| }, |
| { |
| "epoch": 33.2579185520362, |
| "grad_norm": 2.276167869567871, |
| "learning_rate": 0.001, |
| "loss": 1.7611, |
| "step": 102900 |
| }, |
| { |
| "epoch": 33.290239172592116, |
| "grad_norm": 1.4356229305267334, |
| "learning_rate": 0.001, |
| "loss": 1.7932, |
| "step": 103000 |
| }, |
| { |
| "epoch": 33.32255979314803, |
| "grad_norm": 2.1756107807159424, |
| "learning_rate": 0.001, |
| "loss": 1.7796, |
| "step": 103100 |
| }, |
| { |
| "epoch": 33.354880413703945, |
| "grad_norm": 2.1314845085144043, |
| "learning_rate": 0.001, |
| "loss": 1.7952, |
| "step": 103200 |
| }, |
| { |
| "epoch": 33.38720103425986, |
| "grad_norm": 1.6774673461914062, |
| "learning_rate": 0.001, |
| "loss": 1.8078, |
| "step": 103300 |
| }, |
| { |
| "epoch": 33.419521654815775, |
| "grad_norm": 1.5583240985870361, |
| "learning_rate": 0.001, |
| "loss": 1.7953, |
| "step": 103400 |
| }, |
| { |
| "epoch": 33.45184227537169, |
| "grad_norm": 2.216525077819824, |
| "learning_rate": 0.001, |
| "loss": 1.7887, |
| "step": 103500 |
| }, |
| { |
| "epoch": 33.484162895927604, |
| "grad_norm": 1.5822532176971436, |
| "learning_rate": 0.001, |
| "loss": 1.7857, |
| "step": 103600 |
| }, |
| { |
| "epoch": 33.51648351648352, |
| "grad_norm": 2.059272527694702, |
| "learning_rate": 0.001, |
| "loss": 1.8239, |
| "step": 103700 |
| }, |
| { |
| "epoch": 33.54880413703943, |
| "grad_norm": 2.067791223526001, |
| "learning_rate": 0.001, |
| "loss": 1.8228, |
| "step": 103800 |
| }, |
| { |
| "epoch": 33.58112475759535, |
| "grad_norm": 1.7426116466522217, |
| "learning_rate": 0.001, |
| "loss": 1.8187, |
| "step": 103900 |
| }, |
| { |
| "epoch": 33.61344537815126, |
| "grad_norm": 2.092301845550537, |
| "learning_rate": 0.001, |
| "loss": 1.8198, |
| "step": 104000 |
| }, |
| { |
| "epoch": 33.645765998707176, |
| "grad_norm": 1.4148643016815186, |
| "learning_rate": 0.001, |
| "loss": 1.8446, |
| "step": 104100 |
| }, |
| { |
| "epoch": 33.67808661926309, |
| "grad_norm": 2.115337610244751, |
| "learning_rate": 0.001, |
| "loss": 1.8341, |
| "step": 104200 |
| }, |
| { |
| "epoch": 33.710407239819006, |
| "grad_norm": 1.8922119140625, |
| "learning_rate": 0.001, |
| "loss": 1.8314, |
| "step": 104300 |
| }, |
| { |
| "epoch": 33.74272786037492, |
| "grad_norm": 2.526000499725342, |
| "learning_rate": 0.001, |
| "loss": 1.8322, |
| "step": 104400 |
| }, |
| { |
| "epoch": 33.775048480930835, |
| "grad_norm": 1.95648992061615, |
| "learning_rate": 0.001, |
| "loss": 1.8507, |
| "step": 104500 |
| }, |
| { |
| "epoch": 33.80736910148675, |
| "grad_norm": 2.0690479278564453, |
| "learning_rate": 0.001, |
| "loss": 1.8452, |
| "step": 104600 |
| }, |
| { |
| "epoch": 33.839689722042664, |
| "grad_norm": 1.7053970098495483, |
| "learning_rate": 0.001, |
| "loss": 1.8557, |
| "step": 104700 |
| }, |
| { |
| "epoch": 33.87201034259858, |
| "grad_norm": 1.5044736862182617, |
| "learning_rate": 0.001, |
| "loss": 1.8416, |
| "step": 104800 |
| }, |
| { |
| "epoch": 33.90433096315449, |
| "grad_norm": 1.7018338441848755, |
| "learning_rate": 0.001, |
| "loss": 1.8698, |
| "step": 104900 |
| }, |
| { |
| "epoch": 33.93665158371041, |
| "grad_norm": 1.5839446783065796, |
| "learning_rate": 0.001, |
| "loss": 1.8678, |
| "step": 105000 |
| }, |
| { |
| "epoch": 33.96897220426632, |
| "grad_norm": 1.8129023313522339, |
| "learning_rate": 0.001, |
| "loss": 1.8497, |
| "step": 105100 |
| }, |
| { |
| "epoch": 34.00129282482224, |
| "grad_norm": 2.0042531490325928, |
| "learning_rate": 0.001, |
| "loss": 1.8455, |
| "step": 105200 |
| }, |
| { |
| "epoch": 34.03361344537815, |
| "grad_norm": 2.1890511512756348, |
| "learning_rate": 0.001, |
| "loss": 1.7252, |
| "step": 105300 |
| }, |
| { |
| "epoch": 34.065934065934066, |
| "grad_norm": 1.5610796213150024, |
| "learning_rate": 0.001, |
| "loss": 1.7255, |
| "step": 105400 |
| }, |
| { |
| "epoch": 34.09825468648998, |
| "grad_norm": 1.540030837059021, |
| "learning_rate": 0.001, |
| "loss": 1.7401, |
| "step": 105500 |
| }, |
| { |
| "epoch": 34.130575307045895, |
| "grad_norm": 1.5150138139724731, |
| "learning_rate": 0.001, |
| "loss": 1.7475, |
| "step": 105600 |
| }, |
| { |
| "epoch": 34.16289592760181, |
| "grad_norm": 1.734230875968933, |
| "learning_rate": 0.001, |
| "loss": 1.7407, |
| "step": 105700 |
| }, |
| { |
| "epoch": 34.195216548157724, |
| "grad_norm": 1.4981082677841187, |
| "learning_rate": 0.001, |
| "loss": 1.7366, |
| "step": 105800 |
| }, |
| { |
| "epoch": 34.22753716871364, |
| "grad_norm": 1.9089019298553467, |
| "learning_rate": 0.001, |
| "loss": 1.756, |
| "step": 105900 |
| }, |
| { |
| "epoch": 34.25985778926955, |
| "grad_norm": 1.513126254081726, |
| "learning_rate": 0.001, |
| "loss": 1.772, |
| "step": 106000 |
| }, |
| { |
| "epoch": 34.29217840982547, |
| "grad_norm": 1.825900912284851, |
| "learning_rate": 0.001, |
| "loss": 1.7671, |
| "step": 106100 |
| }, |
| { |
| "epoch": 34.32449903038138, |
| "grad_norm": 1.3419309854507446, |
| "learning_rate": 0.001, |
| "loss": 1.7717, |
| "step": 106200 |
| }, |
| { |
| "epoch": 34.3568196509373, |
| "grad_norm": 1.7643647193908691, |
| "learning_rate": 0.001, |
| "loss": 1.7825, |
| "step": 106300 |
| }, |
| { |
| "epoch": 34.38914027149321, |
| "grad_norm": 1.6718119382858276, |
| "learning_rate": 0.001, |
| "loss": 1.7848, |
| "step": 106400 |
| }, |
| { |
| "epoch": 34.421460892049126, |
| "grad_norm": 1.4287859201431274, |
| "learning_rate": 0.001, |
| "loss": 1.7878, |
| "step": 106500 |
| }, |
| { |
| "epoch": 34.45378151260504, |
| "grad_norm": 1.3736450672149658, |
| "learning_rate": 0.001, |
| "loss": 1.7913, |
| "step": 106600 |
| }, |
| { |
| "epoch": 34.486102133160955, |
| "grad_norm": 1.4067167043685913, |
| "learning_rate": 0.001, |
| "loss": 1.7939, |
| "step": 106700 |
| }, |
| { |
| "epoch": 34.51842275371687, |
| "grad_norm": 1.4027693271636963, |
| "learning_rate": 0.001, |
| "loss": 1.7663, |
| "step": 106800 |
| }, |
| { |
| "epoch": 34.550743374272784, |
| "grad_norm": 1.3773787021636963, |
| "learning_rate": 0.001, |
| "loss": 1.8117, |
| "step": 106900 |
| }, |
| { |
| "epoch": 34.5830639948287, |
| "grad_norm": 1.795181155204773, |
| "learning_rate": 0.001, |
| "loss": 1.7823, |
| "step": 107000 |
| }, |
| { |
| "epoch": 34.61538461538461, |
| "grad_norm": 1.54495108127594, |
| "learning_rate": 0.001, |
| "loss": 1.7914, |
| "step": 107100 |
| }, |
| { |
| "epoch": 34.64770523594053, |
| "grad_norm": 2.048011064529419, |
| "learning_rate": 0.001, |
| "loss": 1.8232, |
| "step": 107200 |
| }, |
| { |
| "epoch": 34.68002585649644, |
| "grad_norm": 1.1294292211532593, |
| "learning_rate": 0.001, |
| "loss": 1.8143, |
| "step": 107300 |
| }, |
| { |
| "epoch": 34.71234647705236, |
| "grad_norm": 1.6068726778030396, |
| "learning_rate": 0.001, |
| "loss": 1.8199, |
| "step": 107400 |
| }, |
| { |
| "epoch": 34.74466709760827, |
| "grad_norm": 1.8478591442108154, |
| "learning_rate": 0.001, |
| "loss": 1.8298, |
| "step": 107500 |
| }, |
| { |
| "epoch": 34.776987718164186, |
| "grad_norm": 1.4019745588302612, |
| "learning_rate": 0.001, |
| "loss": 1.8242, |
| "step": 107600 |
| }, |
| { |
| "epoch": 34.8093083387201, |
| "grad_norm": 1.3042278289794922, |
| "learning_rate": 0.001, |
| "loss": 1.8359, |
| "step": 107700 |
| }, |
| { |
| "epoch": 34.841628959276015, |
| "grad_norm": 1.7335820198059082, |
| "learning_rate": 0.001, |
| "loss": 1.8355, |
| "step": 107800 |
| }, |
| { |
| "epoch": 34.87394957983193, |
| "grad_norm": 1.7621225118637085, |
| "learning_rate": 0.001, |
| "loss": 1.8295, |
| "step": 107900 |
| }, |
| { |
| "epoch": 34.906270200387844, |
| "grad_norm": 1.2129218578338623, |
| "learning_rate": 0.001, |
| "loss": 1.839, |
| "step": 108000 |
| }, |
| { |
| "epoch": 34.93859082094376, |
| "grad_norm": 1.3511784076690674, |
| "learning_rate": 0.001, |
| "loss": 1.8281, |
| "step": 108100 |
| }, |
| { |
| "epoch": 34.97091144149967, |
| "grad_norm": 1.543796181678772, |
| "learning_rate": 0.001, |
| "loss": 1.8349, |
| "step": 108200 |
| }, |
| { |
| "epoch": 35.003232062055595, |
| "grad_norm": 1.3133898973464966, |
| "learning_rate": 0.001, |
| "loss": 1.8232, |
| "step": 108300 |
| }, |
| { |
| "epoch": 35.03555268261151, |
| "grad_norm": 1.309926986694336, |
| "learning_rate": 0.001, |
| "loss": 1.71, |
| "step": 108400 |
| }, |
| { |
| "epoch": 35.067873303167424, |
| "grad_norm": 1.7792454957962036, |
| "learning_rate": 0.001, |
| "loss": 1.7366, |
| "step": 108500 |
| }, |
| { |
| "epoch": 35.10019392372334, |
| "grad_norm": 1.2482664585113525, |
| "learning_rate": 0.001, |
| "loss": 1.7138, |
| "step": 108600 |
| }, |
| { |
| "epoch": 35.13251454427925, |
| "grad_norm": 1.5493508577346802, |
| "learning_rate": 0.001, |
| "loss": 1.7139, |
| "step": 108700 |
| }, |
| { |
| "epoch": 35.16483516483517, |
| "grad_norm": 1.4078187942504883, |
| "learning_rate": 0.001, |
| "loss": 1.7421, |
| "step": 108800 |
| }, |
| { |
| "epoch": 35.19715578539108, |
| "grad_norm": 1.512757420539856, |
| "learning_rate": 0.001, |
| "loss": 1.753, |
| "step": 108900 |
| }, |
| { |
| "epoch": 35.229476405947, |
| "grad_norm": 1.444258451461792, |
| "learning_rate": 0.001, |
| "loss": 1.7136, |
| "step": 109000 |
| }, |
| { |
| "epoch": 35.26179702650291, |
| "grad_norm": 1.8412730693817139, |
| "learning_rate": 0.001, |
| "loss": 1.7397, |
| "step": 109100 |
| }, |
| { |
| "epoch": 35.294117647058826, |
| "grad_norm": 1.1742063760757446, |
| "learning_rate": 0.001, |
| "loss": 1.7359, |
| "step": 109200 |
| }, |
| { |
| "epoch": 35.32643826761474, |
| "grad_norm": 1.5857884883880615, |
| "learning_rate": 0.001, |
| "loss": 1.773, |
| "step": 109300 |
| }, |
| { |
| "epoch": 35.358758888170655, |
| "grad_norm": 1.8618083000183105, |
| "learning_rate": 0.001, |
| "loss": 1.775, |
| "step": 109400 |
| }, |
| { |
| "epoch": 35.39107950872657, |
| "grad_norm": 1.5522814989089966, |
| "learning_rate": 0.001, |
| "loss": 1.7807, |
| "step": 109500 |
| }, |
| { |
| "epoch": 35.423400129282484, |
| "grad_norm": 1.4634019136428833, |
| "learning_rate": 0.001, |
| "loss": 1.7788, |
| "step": 109600 |
| }, |
| { |
| "epoch": 35.4557207498384, |
| "grad_norm": 1.684096097946167, |
| "learning_rate": 0.001, |
| "loss": 1.7812, |
| "step": 109700 |
| }, |
| { |
| "epoch": 35.48804137039431, |
| "grad_norm": 1.911356806755066, |
| "learning_rate": 0.001, |
| "loss": 1.7775, |
| "step": 109800 |
| }, |
| { |
| "epoch": 35.52036199095023, |
| "grad_norm": 1.1707732677459717, |
| "learning_rate": 0.001, |
| "loss": 1.7715, |
| "step": 109900 |
| }, |
| { |
| "epoch": 35.55268261150614, |
| "grad_norm": 1.108163595199585, |
| "learning_rate": 0.001, |
| "loss": 1.7673, |
| "step": 110000 |
| }, |
| { |
| "epoch": 35.58500323206206, |
| "grad_norm": 1.791603684425354, |
| "learning_rate": 0.001, |
| "loss": 1.7828, |
| "step": 110100 |
| }, |
| { |
| "epoch": 35.61732385261797, |
| "grad_norm": 1.3673819303512573, |
| "learning_rate": 0.001, |
| "loss": 1.7974, |
| "step": 110200 |
| }, |
| { |
| "epoch": 35.649644473173886, |
| "grad_norm": 1.5180928707122803, |
| "learning_rate": 0.001, |
| "loss": 1.7823, |
| "step": 110300 |
| }, |
| { |
| "epoch": 35.6819650937298, |
| "grad_norm": 1.694899082183838, |
| "learning_rate": 0.001, |
| "loss": 1.7895, |
| "step": 110400 |
| }, |
| { |
| "epoch": 35.714285714285715, |
| "grad_norm": 1.395687222480774, |
| "learning_rate": 0.001, |
| "loss": 1.7905, |
| "step": 110500 |
| }, |
| { |
| "epoch": 35.74660633484163, |
| "grad_norm": 1.430392861366272, |
| "learning_rate": 0.001, |
| "loss": 1.8125, |
| "step": 110600 |
| }, |
| { |
| "epoch": 35.778926955397544, |
| "grad_norm": 1.394409418106079, |
| "learning_rate": 0.001, |
| "loss": 1.7847, |
| "step": 110700 |
| }, |
| { |
| "epoch": 35.81124757595346, |
| "grad_norm": 2.1065635681152344, |
| "learning_rate": 0.001, |
| "loss": 1.8037, |
| "step": 110800 |
| }, |
| { |
| "epoch": 35.84356819650937, |
| "grad_norm": 1.502328634262085, |
| "learning_rate": 0.001, |
| "loss": 1.8033, |
| "step": 110900 |
| }, |
| { |
| "epoch": 35.87588881706529, |
| "grad_norm": 1.1700212955474854, |
| "learning_rate": 0.001, |
| "loss": 1.8106, |
| "step": 111000 |
| }, |
| { |
| "epoch": 35.9082094376212, |
| "grad_norm": 1.6413511037826538, |
| "learning_rate": 0.001, |
| "loss": 1.8146, |
| "step": 111100 |
| }, |
| { |
| "epoch": 35.94053005817712, |
| "grad_norm": 1.7164719104766846, |
| "learning_rate": 0.001, |
| "loss": 1.8177, |
| "step": 111200 |
| }, |
| { |
| "epoch": 35.97285067873303, |
| "grad_norm": 1.4148567914962769, |
| "learning_rate": 0.001, |
| "loss": 1.805, |
| "step": 111300 |
| }, |
| { |
| "epoch": 36.005171299288946, |
| "grad_norm": 1.369500994682312, |
| "learning_rate": 0.001, |
| "loss": 1.7962, |
| "step": 111400 |
| }, |
| { |
| "epoch": 36.03749191984486, |
| "grad_norm": 1.2702453136444092, |
| "learning_rate": 0.001, |
| "loss": 1.6798, |
| "step": 111500 |
| }, |
| { |
| "epoch": 36.069812540400775, |
| "grad_norm": 1.3947583436965942, |
| "learning_rate": 0.001, |
| "loss": 1.7028, |
| "step": 111600 |
| }, |
| { |
| "epoch": 36.10213316095669, |
| "grad_norm": 1.4396507740020752, |
| "learning_rate": 0.001, |
| "loss": 1.7236, |
| "step": 111700 |
| }, |
| { |
| "epoch": 36.134453781512605, |
| "grad_norm": 1.1698938608169556, |
| "learning_rate": 0.001, |
| "loss": 1.6912, |
| "step": 111800 |
| }, |
| { |
| "epoch": 36.16677440206852, |
| "grad_norm": 1.5718566179275513, |
| "learning_rate": 0.001, |
| "loss": 1.7184, |
| "step": 111900 |
| }, |
| { |
| "epoch": 36.199095022624434, |
| "grad_norm": 1.6437325477600098, |
| "learning_rate": 0.001, |
| "loss": 1.7065, |
| "step": 112000 |
| }, |
| { |
| "epoch": 36.23141564318035, |
| "grad_norm": 1.4717180728912354, |
| "learning_rate": 0.001, |
| "loss": 1.709, |
| "step": 112100 |
| }, |
| { |
| "epoch": 36.26373626373626, |
| "grad_norm": 1.5429532527923584, |
| "learning_rate": 0.001, |
| "loss": 1.7359, |
| "step": 112200 |
| }, |
| { |
| "epoch": 36.29605688429218, |
| "grad_norm": 1.5812757015228271, |
| "learning_rate": 0.001, |
| "loss": 1.7223, |
| "step": 112300 |
| }, |
| { |
| "epoch": 36.32837750484809, |
| "grad_norm": 1.7356760501861572, |
| "learning_rate": 0.001, |
| "loss": 1.7252, |
| "step": 112400 |
| }, |
| { |
| "epoch": 36.36069812540401, |
| "grad_norm": 1.375969409942627, |
| "learning_rate": 0.001, |
| "loss": 1.7433, |
| "step": 112500 |
| }, |
| { |
| "epoch": 36.39301874595992, |
| "grad_norm": 1.131496787071228, |
| "learning_rate": 0.001, |
| "loss": 1.7571, |
| "step": 112600 |
| }, |
| { |
| "epoch": 36.425339366515836, |
| "grad_norm": 1.422635793685913, |
| "learning_rate": 0.001, |
| "loss": 1.7496, |
| "step": 112700 |
| }, |
| { |
| "epoch": 36.45765998707175, |
| "grad_norm": 1.3067173957824707, |
| "learning_rate": 0.001, |
| "loss": 1.7501, |
| "step": 112800 |
| }, |
| { |
| "epoch": 36.489980607627665, |
| "grad_norm": 1.524130940437317, |
| "learning_rate": 0.001, |
| "loss": 1.7565, |
| "step": 112900 |
| }, |
| { |
| "epoch": 36.52230122818358, |
| "grad_norm": 1.2107754945755005, |
| "learning_rate": 0.001, |
| "loss": 1.7719, |
| "step": 113000 |
| }, |
| { |
| "epoch": 36.554621848739494, |
| "grad_norm": 1.3486363887786865, |
| "learning_rate": 0.001, |
| "loss": 1.7725, |
| "step": 113100 |
| }, |
| { |
| "epoch": 36.58694246929541, |
| "grad_norm": 1.1192071437835693, |
| "learning_rate": 0.001, |
| "loss": 1.7699, |
| "step": 113200 |
| }, |
| { |
| "epoch": 36.61926308985132, |
| "grad_norm": 1.3475347757339478, |
| "learning_rate": 0.001, |
| "loss": 1.7544, |
| "step": 113300 |
| }, |
| { |
| "epoch": 36.65158371040724, |
| "grad_norm": 1.3668153285980225, |
| "learning_rate": 0.001, |
| "loss": 1.7797, |
| "step": 113400 |
| }, |
| { |
| "epoch": 36.68390433096315, |
| "grad_norm": 1.4461634159088135, |
| "learning_rate": 0.001, |
| "loss": 1.7845, |
| "step": 113500 |
| }, |
| { |
| "epoch": 36.71622495151907, |
| "grad_norm": 1.4558709859848022, |
| "learning_rate": 0.001, |
| "loss": 1.7933, |
| "step": 113600 |
| }, |
| { |
| "epoch": 36.74854557207498, |
| "grad_norm": 1.2568235397338867, |
| "learning_rate": 0.001, |
| "loss": 1.7781, |
| "step": 113700 |
| }, |
| { |
| "epoch": 36.780866192630896, |
| "grad_norm": 1.2808581590652466, |
| "learning_rate": 0.001, |
| "loss": 1.7947, |
| "step": 113800 |
| }, |
| { |
| "epoch": 36.81318681318681, |
| "grad_norm": 1.3239548206329346, |
| "learning_rate": 0.001, |
| "loss": 1.787, |
| "step": 113900 |
| }, |
| { |
| "epoch": 36.845507433742725, |
| "grad_norm": 1.446629285812378, |
| "learning_rate": 0.001, |
| "loss": 1.7931, |
| "step": 114000 |
| }, |
| { |
| "epoch": 36.87782805429864, |
| "grad_norm": 1.5585201978683472, |
| "learning_rate": 0.001, |
| "loss": 1.7733, |
| "step": 114100 |
| }, |
| { |
| "epoch": 36.910148674854554, |
| "grad_norm": 1.1408222913742065, |
| "learning_rate": 0.001, |
| "loss": 1.807, |
| "step": 114200 |
| }, |
| { |
| "epoch": 36.94246929541047, |
| "grad_norm": 1.409403681755066, |
| "learning_rate": 0.001, |
| "loss": 1.7972, |
| "step": 114300 |
| }, |
| { |
| "epoch": 36.97478991596638, |
| "grad_norm": 1.6675604581832886, |
| "learning_rate": 0.001, |
| "loss": 1.7797, |
| "step": 114400 |
| }, |
| { |
| "epoch": 37.007110536522305, |
| "grad_norm": 1.211574673652649, |
| "learning_rate": 0.001, |
| "loss": 1.7839, |
| "step": 114500 |
| }, |
| { |
| "epoch": 37.03943115707822, |
| "grad_norm": 1.6282230615615845, |
| "learning_rate": 0.001, |
| "loss": 1.6775, |
| "step": 114600 |
| }, |
| { |
| "epoch": 37.071751777634134, |
| "grad_norm": 1.6066936254501343, |
| "learning_rate": 0.001, |
| "loss": 1.668, |
| "step": 114700 |
| }, |
| { |
| "epoch": 37.10407239819005, |
| "grad_norm": 1.384175419807434, |
| "learning_rate": 0.001, |
| "loss": 1.6721, |
| "step": 114800 |
| }, |
| { |
| "epoch": 37.13639301874596, |
| "grad_norm": 1.2160314321517944, |
| "learning_rate": 0.001, |
| "loss": 1.6847, |
| "step": 114900 |
| }, |
| { |
| "epoch": 37.16871363930188, |
| "grad_norm": 1.1695581674575806, |
| "learning_rate": 0.001, |
| "loss": 1.6923, |
| "step": 115000 |
| }, |
| { |
| "epoch": 37.20103425985779, |
| "grad_norm": 1.4204388856887817, |
| "learning_rate": 0.001, |
| "loss": 1.6962, |
| "step": 115100 |
| }, |
| { |
| "epoch": 37.23335488041371, |
| "grad_norm": 1.1568971872329712, |
| "learning_rate": 0.001, |
| "loss": 1.709, |
| "step": 115200 |
| }, |
| { |
| "epoch": 37.26567550096962, |
| "grad_norm": 1.467090129852295, |
| "learning_rate": 0.001, |
| "loss": 1.7089, |
| "step": 115300 |
| }, |
| { |
| "epoch": 37.297996121525536, |
| "grad_norm": 1.3767518997192383, |
| "learning_rate": 0.001, |
| "loss": 1.7442, |
| "step": 115400 |
| }, |
| { |
| "epoch": 37.33031674208145, |
| "grad_norm": 1.2017751932144165, |
| "learning_rate": 0.001, |
| "loss": 1.7259, |
| "step": 115500 |
| }, |
| { |
| "epoch": 37.362637362637365, |
| "grad_norm": 1.2458261251449585, |
| "learning_rate": 0.001, |
| "loss": 1.7122, |
| "step": 115600 |
| }, |
| { |
| "epoch": 37.39495798319328, |
| "grad_norm": 1.1405456066131592, |
| "learning_rate": 0.001, |
| "loss": 1.7276, |
| "step": 115700 |
| }, |
| { |
| "epoch": 37.427278603749194, |
| "grad_norm": 1.1807092428207397, |
| "learning_rate": 0.001, |
| "loss": 1.7349, |
| "step": 115800 |
| }, |
| { |
| "epoch": 37.45959922430511, |
| "grad_norm": 1.2478501796722412, |
| "learning_rate": 0.001, |
| "loss": 1.7379, |
| "step": 115900 |
| }, |
| { |
| "epoch": 37.49191984486102, |
| "grad_norm": 1.7146801948547363, |
| "learning_rate": 0.001, |
| "loss": 1.7503, |
| "step": 116000 |
| }, |
| { |
| "epoch": 37.52424046541694, |
| "grad_norm": 1.3230942487716675, |
| "learning_rate": 0.001, |
| "loss": 1.7545, |
| "step": 116100 |
| }, |
| { |
| "epoch": 37.55656108597285, |
| "grad_norm": 1.4975101947784424, |
| "learning_rate": 0.001, |
| "loss": 1.7507, |
| "step": 116200 |
| }, |
| { |
| "epoch": 37.58888170652877, |
| "grad_norm": 1.3942639827728271, |
| "learning_rate": 0.001, |
| "loss": 1.7369, |
| "step": 116300 |
| }, |
| { |
| "epoch": 37.62120232708468, |
| "grad_norm": 1.3690227270126343, |
| "learning_rate": 0.001, |
| "loss": 1.7387, |
| "step": 116400 |
| }, |
| { |
| "epoch": 37.653522947640596, |
| "grad_norm": 1.3362698554992676, |
| "learning_rate": 0.001, |
| "loss": 1.7416, |
| "step": 116500 |
| }, |
| { |
| "epoch": 37.68584356819651, |
| "grad_norm": 1.4864155054092407, |
| "learning_rate": 0.001, |
| "loss": 1.7471, |
| "step": 116600 |
| }, |
| { |
| "epoch": 37.718164188752425, |
| "grad_norm": 1.328788161277771, |
| "learning_rate": 0.001, |
| "loss": 1.7511, |
| "step": 116700 |
| }, |
| { |
| "epoch": 37.75048480930834, |
| "grad_norm": 1.7826188802719116, |
| "learning_rate": 0.001, |
| "loss": 1.7506, |
| "step": 116800 |
| }, |
| { |
| "epoch": 37.782805429864254, |
| "grad_norm": 1.579100489616394, |
| "learning_rate": 0.001, |
| "loss": 1.7819, |
| "step": 116900 |
| }, |
| { |
| "epoch": 37.81512605042017, |
| "grad_norm": 1.4960875511169434, |
| "learning_rate": 0.001, |
| "loss": 1.7827, |
| "step": 117000 |
| }, |
| { |
| "epoch": 37.84744667097608, |
| "grad_norm": 1.3491441011428833, |
| "learning_rate": 0.001, |
| "loss": 1.7732, |
| "step": 117100 |
| }, |
| { |
| "epoch": 37.879767291532, |
| "grad_norm": 1.6198680400848389, |
| "learning_rate": 0.001, |
| "loss": 1.7596, |
| "step": 117200 |
| }, |
| { |
| "epoch": 37.91208791208791, |
| "grad_norm": 1.4920895099639893, |
| "learning_rate": 0.001, |
| "loss": 1.7824, |
| "step": 117300 |
| }, |
| { |
| "epoch": 37.94440853264383, |
| "grad_norm": 1.17668879032135, |
| "learning_rate": 0.001, |
| "loss": 1.7766, |
| "step": 117400 |
| }, |
| { |
| "epoch": 37.97672915319974, |
| "grad_norm": 1.381245493888855, |
| "learning_rate": 0.001, |
| "loss": 1.7859, |
| "step": 117500 |
| }, |
| { |
| "epoch": 38.009049773755656, |
| "grad_norm": 1.8120436668395996, |
| "learning_rate": 0.001, |
| "loss": 1.7456, |
| "step": 117600 |
| }, |
| { |
| "epoch": 38.04137039431157, |
| "grad_norm": 1.2770622968673706, |
| "learning_rate": 0.001, |
| "loss": 1.6473, |
| "step": 117700 |
| }, |
| { |
| "epoch": 38.073691014867485, |
| "grad_norm": 1.495666265487671, |
| "learning_rate": 0.001, |
| "loss": 1.6665, |
| "step": 117800 |
| }, |
| { |
| "epoch": 38.1060116354234, |
| "grad_norm": 1.3319461345672607, |
| "learning_rate": 0.001, |
| "loss": 1.6721, |
| "step": 117900 |
| }, |
| { |
| "epoch": 38.138332255979314, |
| "grad_norm": 1.181028127670288, |
| "learning_rate": 0.001, |
| "loss": 1.6755, |
| "step": 118000 |
| }, |
| { |
| "epoch": 38.17065287653523, |
| "grad_norm": 1.7435935735702515, |
| "learning_rate": 0.001, |
| "loss": 1.6728, |
| "step": 118100 |
| }, |
| { |
| "epoch": 38.20297349709114, |
| "grad_norm": 1.2086992263793945, |
| "learning_rate": 0.001, |
| "loss": 1.6858, |
| "step": 118200 |
| }, |
| { |
| "epoch": 38.23529411764706, |
| "grad_norm": 1.749821662902832, |
| "learning_rate": 0.001, |
| "loss": 1.692, |
| "step": 118300 |
| }, |
| { |
| "epoch": 38.26761473820297, |
| "grad_norm": 1.2589175701141357, |
| "learning_rate": 0.001, |
| "loss": 1.6806, |
| "step": 118400 |
| }, |
| { |
| "epoch": 38.29993535875889, |
| "grad_norm": 1.6938502788543701, |
| "learning_rate": 0.001, |
| "loss": 1.7078, |
| "step": 118500 |
| }, |
| { |
| "epoch": 38.3322559793148, |
| "grad_norm": 1.2834974527359009, |
| "learning_rate": 0.001, |
| "loss": 1.7057, |
| "step": 118600 |
| }, |
| { |
| "epoch": 38.364576599870716, |
| "grad_norm": 1.4331868886947632, |
| "learning_rate": 0.001, |
| "loss": 1.7216, |
| "step": 118700 |
| }, |
| { |
| "epoch": 38.39689722042663, |
| "grad_norm": 1.5875442028045654, |
| "learning_rate": 0.001, |
| "loss": 1.718, |
| "step": 118800 |
| }, |
| { |
| "epoch": 38.429217840982545, |
| "grad_norm": 1.463834285736084, |
| "learning_rate": 0.001, |
| "loss": 1.7014, |
| "step": 118900 |
| }, |
| { |
| "epoch": 38.46153846153846, |
| "grad_norm": 1.7719365358352661, |
| "learning_rate": 0.001, |
| "loss": 1.7055, |
| "step": 119000 |
| }, |
| { |
| "epoch": 38.493859082094374, |
| "grad_norm": 1.751819372177124, |
| "learning_rate": 0.001, |
| "loss": 1.7061, |
| "step": 119100 |
| }, |
| { |
| "epoch": 38.52617970265029, |
| "grad_norm": 1.264541506767273, |
| "learning_rate": 0.001, |
| "loss": 1.7394, |
| "step": 119200 |
| }, |
| { |
| "epoch": 38.558500323206204, |
| "grad_norm": 1.691881537437439, |
| "learning_rate": 0.001, |
| "loss": 1.7198, |
| "step": 119300 |
| }, |
| { |
| "epoch": 38.59082094376212, |
| "grad_norm": 1.7032718658447266, |
| "learning_rate": 0.001, |
| "loss": 1.736, |
| "step": 119400 |
| }, |
| { |
| "epoch": 38.62314156431803, |
| "grad_norm": 1.6821051836013794, |
| "learning_rate": 0.001, |
| "loss": 1.7352, |
| "step": 119500 |
| }, |
| { |
| "epoch": 38.65546218487395, |
| "grad_norm": 1.8984935283660889, |
| "learning_rate": 0.001, |
| "loss": 1.7463, |
| "step": 119600 |
| }, |
| { |
| "epoch": 38.68778280542986, |
| "grad_norm": 1.8338229656219482, |
| "learning_rate": 0.001, |
| "loss": 1.7445, |
| "step": 119700 |
| }, |
| { |
| "epoch": 38.720103425985776, |
| "grad_norm": 1.3673371076583862, |
| "learning_rate": 0.001, |
| "loss": 1.7467, |
| "step": 119800 |
| }, |
| { |
| "epoch": 38.75242404654169, |
| "grad_norm": 1.401082992553711, |
| "learning_rate": 0.001, |
| "loss": 1.7207, |
| "step": 119900 |
| }, |
| { |
| "epoch": 38.784744667097605, |
| "grad_norm": 1.3542741537094116, |
| "learning_rate": 0.001, |
| "loss": 1.7632, |
| "step": 120000 |
| }, |
| { |
| "epoch": 38.81706528765352, |
| "grad_norm": 1.6251832246780396, |
| "learning_rate": 0.001, |
| "loss": 1.751, |
| "step": 120100 |
| }, |
| { |
| "epoch": 38.849385908209435, |
| "grad_norm": 1.4141253232955933, |
| "learning_rate": 0.001, |
| "loss": 1.7618, |
| "step": 120200 |
| }, |
| { |
| "epoch": 38.88170652876535, |
| "grad_norm": 1.387152075767517, |
| "learning_rate": 0.001, |
| "loss": 1.7421, |
| "step": 120300 |
| }, |
| { |
| "epoch": 38.914027149321264, |
| "grad_norm": 2.0019166469573975, |
| "learning_rate": 0.001, |
| "loss": 1.7439, |
| "step": 120400 |
| }, |
| { |
| "epoch": 38.94634776987718, |
| "grad_norm": 1.5084810256958008, |
| "learning_rate": 0.001, |
| "loss": 1.7447, |
| "step": 120500 |
| }, |
| { |
| "epoch": 38.97866839043309, |
| "grad_norm": 1.3848720788955688, |
| "learning_rate": 0.001, |
| "loss": 1.7649, |
| "step": 120600 |
| }, |
| { |
| "epoch": 39.010989010989015, |
| "grad_norm": 1.547417402267456, |
| "learning_rate": 0.001, |
| "loss": 1.7236, |
| "step": 120700 |
| }, |
| { |
| "epoch": 39.04330963154493, |
| "grad_norm": 1.2557010650634766, |
| "learning_rate": 0.001, |
| "loss": 1.6258, |
| "step": 120800 |
| }, |
| { |
| "epoch": 39.075630252100844, |
| "grad_norm": 1.5394682884216309, |
| "learning_rate": 0.001, |
| "loss": 1.6437, |
| "step": 120900 |
| }, |
| { |
| "epoch": 39.10795087265676, |
| "grad_norm": 1.2757493257522583, |
| "learning_rate": 0.001, |
| "loss": 1.6441, |
| "step": 121000 |
| }, |
| { |
| "epoch": 39.14027149321267, |
| "grad_norm": 1.468744158744812, |
| "learning_rate": 0.001, |
| "loss": 1.6495, |
| "step": 121100 |
| }, |
| { |
| "epoch": 39.17259211376859, |
| "grad_norm": 1.1792622804641724, |
| "learning_rate": 0.001, |
| "loss": 1.6726, |
| "step": 121200 |
| }, |
| { |
| "epoch": 39.2049127343245, |
| "grad_norm": 1.330483078956604, |
| "learning_rate": 0.001, |
| "loss": 1.6983, |
| "step": 121300 |
| }, |
| { |
| "epoch": 39.237233354880416, |
| "grad_norm": 1.6947458982467651, |
| "learning_rate": 0.001, |
| "loss": 1.6798, |
| "step": 121400 |
| }, |
| { |
| "epoch": 39.26955397543633, |
| "grad_norm": 1.8113237619400024, |
| "learning_rate": 0.001, |
| "loss": 1.6815, |
| "step": 121500 |
| }, |
| { |
| "epoch": 39.301874595992246, |
| "grad_norm": 1.3059000968933105, |
| "learning_rate": 0.001, |
| "loss": 1.68, |
| "step": 121600 |
| }, |
| { |
| "epoch": 39.33419521654816, |
| "grad_norm": 2.113922119140625, |
| "learning_rate": 0.001, |
| "loss": 1.6893, |
| "step": 121700 |
| }, |
| { |
| "epoch": 39.366515837104075, |
| "grad_norm": 1.4525998830795288, |
| "learning_rate": 0.001, |
| "loss": 1.6622, |
| "step": 121800 |
| }, |
| { |
| "epoch": 39.39883645765999, |
| "grad_norm": 1.6511586904525757, |
| "learning_rate": 0.001, |
| "loss": 1.6923, |
| "step": 121900 |
| }, |
| { |
| "epoch": 39.431157078215904, |
| "grad_norm": 1.427304744720459, |
| "learning_rate": 0.001, |
| "loss": 1.6679, |
| "step": 122000 |
| }, |
| { |
| "epoch": 39.46347769877182, |
| "grad_norm": 1.3397293090820312, |
| "learning_rate": 0.001, |
| "loss": 1.7052, |
| "step": 122100 |
| }, |
| { |
| "epoch": 39.49579831932773, |
| "grad_norm": 1.3981614112854004, |
| "learning_rate": 0.001, |
| "loss": 1.697, |
| "step": 122200 |
| }, |
| { |
| "epoch": 39.52811893988365, |
| "grad_norm": 1.4614068269729614, |
| "learning_rate": 0.001, |
| "loss": 1.7255, |
| "step": 122300 |
| }, |
| { |
| "epoch": 39.56043956043956, |
| "grad_norm": 1.6252069473266602, |
| "learning_rate": 0.001, |
| "loss": 1.7105, |
| "step": 122400 |
| }, |
| { |
| "epoch": 39.59276018099548, |
| "grad_norm": 1.5491560697555542, |
| "learning_rate": 0.001, |
| "loss": 1.7287, |
| "step": 122500 |
| }, |
| { |
| "epoch": 39.62508080155139, |
| "grad_norm": 1.5289865732192993, |
| "learning_rate": 0.001, |
| "loss": 1.7312, |
| "step": 122600 |
| }, |
| { |
| "epoch": 39.657401422107306, |
| "grad_norm": 1.3649866580963135, |
| "learning_rate": 0.001, |
| "loss": 1.7001, |
| "step": 122700 |
| }, |
| { |
| "epoch": 39.68972204266322, |
| "grad_norm": 1.8227938413619995, |
| "learning_rate": 0.001, |
| "loss": 1.7325, |
| "step": 122800 |
| }, |
| { |
| "epoch": 39.722042663219135, |
| "grad_norm": 1.4305684566497803, |
| "learning_rate": 0.001, |
| "loss": 1.7297, |
| "step": 122900 |
| }, |
| { |
| "epoch": 39.75436328377505, |
| "grad_norm": 2.0663459300994873, |
| "learning_rate": 0.001, |
| "loss": 1.7248, |
| "step": 123000 |
| }, |
| { |
| "epoch": 39.786683904330964, |
| "grad_norm": 1.6483802795410156, |
| "learning_rate": 0.001, |
| "loss": 1.7143, |
| "step": 123100 |
| }, |
| { |
| "epoch": 39.81900452488688, |
| "grad_norm": 1.3471261262893677, |
| "learning_rate": 0.001, |
| "loss": 1.7331, |
| "step": 123200 |
| }, |
| { |
| "epoch": 39.85132514544279, |
| "grad_norm": 1.501815676689148, |
| "learning_rate": 0.001, |
| "loss": 1.7242, |
| "step": 123300 |
| }, |
| { |
| "epoch": 39.88364576599871, |
| "grad_norm": 2.0373356342315674, |
| "learning_rate": 0.001, |
| "loss": 1.7363, |
| "step": 123400 |
| }, |
| { |
| "epoch": 39.91596638655462, |
| "grad_norm": 1.8736695051193237, |
| "learning_rate": 0.001, |
| "loss": 1.727, |
| "step": 123500 |
| }, |
| { |
| "epoch": 39.94828700711054, |
| "grad_norm": 1.5386576652526855, |
| "learning_rate": 0.001, |
| "loss": 1.7602, |
| "step": 123600 |
| }, |
| { |
| "epoch": 39.98060762766645, |
| "grad_norm": 1.287049412727356, |
| "learning_rate": 0.001, |
| "loss": 1.7529, |
| "step": 123700 |
| }, |
| { |
| "epoch": 40.012928248222366, |
| "grad_norm": 1.3691580295562744, |
| "learning_rate": 0.001, |
| "loss": 1.6845, |
| "step": 123800 |
| }, |
| { |
| "epoch": 40.04524886877828, |
| "grad_norm": 1.5578200817108154, |
| "learning_rate": 0.001, |
| "loss": 1.624, |
| "step": 123900 |
| }, |
| { |
| "epoch": 40.077569489334195, |
| "grad_norm": 1.394068956375122, |
| "learning_rate": 0.001, |
| "loss": 1.6299, |
| "step": 124000 |
| }, |
| { |
| "epoch": 40.10989010989011, |
| "grad_norm": 1.5257561206817627, |
| "learning_rate": 0.001, |
| "loss": 1.6362, |
| "step": 124100 |
| }, |
| { |
| "epoch": 40.142210730446024, |
| "grad_norm": 1.6746106147766113, |
| "learning_rate": 0.001, |
| "loss": 1.6353, |
| "step": 124200 |
| }, |
| { |
| "epoch": 40.17453135100194, |
| "grad_norm": 1.3863023519515991, |
| "learning_rate": 0.001, |
| "loss": 1.6562, |
| "step": 124300 |
| }, |
| { |
| "epoch": 40.20685197155785, |
| "grad_norm": 1.9362484216690063, |
| "learning_rate": 0.001, |
| "loss": 1.6472, |
| "step": 124400 |
| }, |
| { |
| "epoch": 40.23917259211377, |
| "grad_norm": 1.3526794910430908, |
| "learning_rate": 0.001, |
| "loss": 1.6398, |
| "step": 124500 |
| }, |
| { |
| "epoch": 40.27149321266968, |
| "grad_norm": 1.2201699018478394, |
| "learning_rate": 0.001, |
| "loss": 1.6627, |
| "step": 124600 |
| }, |
| { |
| "epoch": 40.3038138332256, |
| "grad_norm": 1.4147565364837646, |
| "learning_rate": 0.001, |
| "loss": 1.653, |
| "step": 124700 |
| }, |
| { |
| "epoch": 40.33613445378151, |
| "grad_norm": 1.7582532167434692, |
| "learning_rate": 0.001, |
| "loss": 1.6675, |
| "step": 124800 |
| }, |
| { |
| "epoch": 40.368455074337426, |
| "grad_norm": 1.7984539270401, |
| "learning_rate": 0.001, |
| "loss": 1.6676, |
| "step": 124900 |
| }, |
| { |
| "epoch": 40.40077569489334, |
| "grad_norm": 1.4224402904510498, |
| "learning_rate": 0.001, |
| "loss": 1.6743, |
| "step": 125000 |
| }, |
| { |
| "epoch": 40.433096315449255, |
| "grad_norm": 1.6389150619506836, |
| "learning_rate": 0.001, |
| "loss": 1.6881, |
| "step": 125100 |
| }, |
| { |
| "epoch": 40.46541693600517, |
| "grad_norm": 1.8313899040222168, |
| "learning_rate": 0.001, |
| "loss": 1.6708, |
| "step": 125200 |
| }, |
| { |
| "epoch": 40.497737556561084, |
| "grad_norm": 2.3224472999572754, |
| "learning_rate": 0.001, |
| "loss": 1.6873, |
| "step": 125300 |
| }, |
| { |
| "epoch": 40.530058177117, |
| "grad_norm": 1.3956483602523804, |
| "learning_rate": 0.001, |
| "loss": 1.6993, |
| "step": 125400 |
| }, |
| { |
| "epoch": 40.56237879767291, |
| "grad_norm": 1.9109262228012085, |
| "learning_rate": 0.001, |
| "loss": 1.6995, |
| "step": 125500 |
| }, |
| { |
| "epoch": 40.59469941822883, |
| "grad_norm": 1.7382196187973022, |
| "learning_rate": 0.001, |
| "loss": 1.6979, |
| "step": 125600 |
| }, |
| { |
| "epoch": 40.62702003878474, |
| "grad_norm": 1.6517763137817383, |
| "learning_rate": 0.001, |
| "loss": 1.6993, |
| "step": 125700 |
| }, |
| { |
| "epoch": 40.65934065934066, |
| "grad_norm": 1.7835556268692017, |
| "learning_rate": 0.001, |
| "loss": 1.6925, |
| "step": 125800 |
| }, |
| { |
| "epoch": 40.69166127989657, |
| "grad_norm": 2.0330710411071777, |
| "learning_rate": 0.001, |
| "loss": 1.7008, |
| "step": 125900 |
| }, |
| { |
| "epoch": 40.723981900452486, |
| "grad_norm": 1.426033854484558, |
| "learning_rate": 0.001, |
| "loss": 1.7268, |
| "step": 126000 |
| }, |
| { |
| "epoch": 40.7563025210084, |
| "grad_norm": 1.7070313692092896, |
| "learning_rate": 0.001, |
| "loss": 1.7111, |
| "step": 126100 |
| }, |
| { |
| "epoch": 40.788623141564315, |
| "grad_norm": 2.436343193054199, |
| "learning_rate": 0.001, |
| "loss": 1.7108, |
| "step": 126200 |
| }, |
| { |
| "epoch": 40.82094376212023, |
| "grad_norm": 1.3666476011276245, |
| "learning_rate": 0.001, |
| "loss": 1.7239, |
| "step": 126300 |
| }, |
| { |
| "epoch": 40.853264382676144, |
| "grad_norm": 1.7341768741607666, |
| "learning_rate": 0.001, |
| "loss": 1.7275, |
| "step": 126400 |
| }, |
| { |
| "epoch": 40.88558500323206, |
| "grad_norm": 1.452822208404541, |
| "learning_rate": 0.001, |
| "loss": 1.7243, |
| "step": 126500 |
| }, |
| { |
| "epoch": 40.91790562378797, |
| "grad_norm": 1.5342082977294922, |
| "learning_rate": 0.001, |
| "loss": 1.7287, |
| "step": 126600 |
| }, |
| { |
| "epoch": 40.95022624434389, |
| "grad_norm": 1.5138624906539917, |
| "learning_rate": 0.001, |
| "loss": 1.7283, |
| "step": 126700 |
| }, |
| { |
| "epoch": 40.9825468648998, |
| "grad_norm": 1.7864232063293457, |
| "learning_rate": 0.001, |
| "loss": 1.7306, |
| "step": 126800 |
| }, |
| { |
| "epoch": 41.014867485455724, |
| "grad_norm": 1.4858065843582153, |
| "learning_rate": 0.001, |
| "loss": 1.6671, |
| "step": 126900 |
| }, |
| { |
| "epoch": 41.04718810601164, |
| "grad_norm": 2.3251335620880127, |
| "learning_rate": 0.001, |
| "loss": 1.6027, |
| "step": 127000 |
| }, |
| { |
| "epoch": 41.07950872656755, |
| "grad_norm": 2.122349262237549, |
| "learning_rate": 0.001, |
| "loss": 1.6185, |
| "step": 127100 |
| }, |
| { |
| "epoch": 41.11182934712347, |
| "grad_norm": 1.4440858364105225, |
| "learning_rate": 0.001, |
| "loss": 1.6143, |
| "step": 127200 |
| }, |
| { |
| "epoch": 41.14414996767938, |
| "grad_norm": 1.6865544319152832, |
| "learning_rate": 0.001, |
| "loss": 1.6151, |
| "step": 127300 |
| }, |
| { |
| "epoch": 41.1764705882353, |
| "grad_norm": 1.3845254182815552, |
| "learning_rate": 0.001, |
| "loss": 1.6216, |
| "step": 127400 |
| }, |
| { |
| "epoch": 41.20879120879121, |
| "grad_norm": 1.5724061727523804, |
| "learning_rate": 0.001, |
| "loss": 1.6334, |
| "step": 127500 |
| }, |
| { |
| "epoch": 41.241111829347126, |
| "grad_norm": 1.6744935512542725, |
| "learning_rate": 0.001, |
| "loss": 1.6441, |
| "step": 127600 |
| }, |
| { |
| "epoch": 41.27343244990304, |
| "grad_norm": 1.6147488355636597, |
| "learning_rate": 0.001, |
| "loss": 1.6485, |
| "step": 127700 |
| }, |
| { |
| "epoch": 41.305753070458955, |
| "grad_norm": 1.930953025817871, |
| "learning_rate": 0.001, |
| "loss": 1.6538, |
| "step": 127800 |
| }, |
| { |
| "epoch": 41.33807369101487, |
| "grad_norm": 1.6779670715332031, |
| "learning_rate": 0.001, |
| "loss": 1.6641, |
| "step": 127900 |
| }, |
| { |
| "epoch": 41.370394311570784, |
| "grad_norm": 2.094191074371338, |
| "learning_rate": 0.001, |
| "loss": 1.638, |
| "step": 128000 |
| }, |
| { |
| "epoch": 41.4027149321267, |
| "grad_norm": 1.5966159105300903, |
| "learning_rate": 0.001, |
| "loss": 1.6603, |
| "step": 128100 |
| }, |
| { |
| "epoch": 41.43503555268261, |
| "grad_norm": 1.9661147594451904, |
| "learning_rate": 0.001, |
| "loss": 1.6588, |
| "step": 128200 |
| }, |
| { |
| "epoch": 41.46735617323853, |
| "grad_norm": 2.126356363296509, |
| "learning_rate": 0.001, |
| "loss": 1.6594, |
| "step": 128300 |
| }, |
| { |
| "epoch": 41.49967679379444, |
| "grad_norm": 1.5801960229873657, |
| "learning_rate": 0.001, |
| "loss": 1.6669, |
| "step": 128400 |
| }, |
| { |
| "epoch": 41.53199741435036, |
| "grad_norm": 1.6362019777297974, |
| "learning_rate": 0.001, |
| "loss": 1.6799, |
| "step": 128500 |
| }, |
| { |
| "epoch": 41.56431803490627, |
| "grad_norm": 1.570550799369812, |
| "learning_rate": 0.001, |
| "loss": 1.6684, |
| "step": 128600 |
| }, |
| { |
| "epoch": 41.596638655462186, |
| "grad_norm": 1.7719006538391113, |
| "learning_rate": 0.001, |
| "loss": 1.6921, |
| "step": 128700 |
| }, |
| { |
| "epoch": 41.6289592760181, |
| "grad_norm": 1.702435851097107, |
| "learning_rate": 0.001, |
| "loss": 1.6774, |
| "step": 128800 |
| }, |
| { |
| "epoch": 41.661279896574015, |
| "grad_norm": 1.8023364543914795, |
| "learning_rate": 0.001, |
| "loss": 1.71, |
| "step": 128900 |
| }, |
| { |
| "epoch": 41.69360051712993, |
| "grad_norm": 1.429628849029541, |
| "learning_rate": 0.001, |
| "loss": 1.6754, |
| "step": 129000 |
| }, |
| { |
| "epoch": 41.725921137685845, |
| "grad_norm": 1.6636650562286377, |
| "learning_rate": 0.001, |
| "loss": 1.6908, |
| "step": 129100 |
| }, |
| { |
| "epoch": 41.75824175824176, |
| "grad_norm": 1.9453206062316895, |
| "learning_rate": 0.001, |
| "loss": 1.7009, |
| "step": 129200 |
| }, |
| { |
| "epoch": 41.790562378797674, |
| "grad_norm": 1.7370038032531738, |
| "learning_rate": 0.001, |
| "loss": 1.7091, |
| "step": 129300 |
| }, |
| { |
| "epoch": 41.82288299935359, |
| "grad_norm": 1.8082547187805176, |
| "learning_rate": 0.001, |
| "loss": 1.7062, |
| "step": 129400 |
| }, |
| { |
| "epoch": 41.8552036199095, |
| "grad_norm": 1.7985368967056274, |
| "learning_rate": 0.001, |
| "loss": 1.6996, |
| "step": 129500 |
| }, |
| { |
| "epoch": 41.88752424046542, |
| "grad_norm": 1.752468466758728, |
| "learning_rate": 0.001, |
| "loss": 1.7066, |
| "step": 129600 |
| }, |
| { |
| "epoch": 41.91984486102133, |
| "grad_norm": 1.7616372108459473, |
| "learning_rate": 0.001, |
| "loss": 1.6999, |
| "step": 129700 |
| }, |
| { |
| "epoch": 41.95216548157725, |
| "grad_norm": 1.5394361019134521, |
| "learning_rate": 0.001, |
| "loss": 1.7093, |
| "step": 129800 |
| }, |
| { |
| "epoch": 41.98448610213316, |
| "grad_norm": 1.9267899990081787, |
| "learning_rate": 0.001, |
| "loss": 1.731, |
| "step": 129900 |
| }, |
| { |
| "epoch": 42.016806722689076, |
| "grad_norm": 1.9508917331695557, |
| "learning_rate": 0.001, |
| "loss": 1.6271, |
| "step": 130000 |
| }, |
| { |
| "epoch": 42.04912734324499, |
| "grad_norm": 1.8610914945602417, |
| "learning_rate": 0.001, |
| "loss": 1.5836, |
| "step": 130100 |
| }, |
| { |
| "epoch": 42.081447963800905, |
| "grad_norm": 1.9641599655151367, |
| "learning_rate": 0.001, |
| "loss": 1.6183, |
| "step": 130200 |
| }, |
| { |
| "epoch": 42.11376858435682, |
| "grad_norm": 1.5812021493911743, |
| "learning_rate": 0.001, |
| "loss": 1.5993, |
| "step": 130300 |
| }, |
| { |
| "epoch": 42.146089204912734, |
| "grad_norm": 2.2505440711975098, |
| "learning_rate": 0.001, |
| "loss": 1.6031, |
| "step": 130400 |
| }, |
| { |
| "epoch": 42.17840982546865, |
| "grad_norm": 1.7632458209991455, |
| "learning_rate": 0.001, |
| "loss": 1.6272, |
| "step": 130500 |
| }, |
| { |
| "epoch": 42.21073044602456, |
| "grad_norm": 1.9542481899261475, |
| "learning_rate": 0.001, |
| "loss": 1.6128, |
| "step": 130600 |
| }, |
| { |
| "epoch": 42.24305106658048, |
| "grad_norm": 2.813657283782959, |
| "learning_rate": 0.001, |
| "loss": 1.6198, |
| "step": 130700 |
| }, |
| { |
| "epoch": 42.27537168713639, |
| "grad_norm": 1.9514821767807007, |
| "learning_rate": 0.001, |
| "loss": 1.6152, |
| "step": 130800 |
| }, |
| { |
| "epoch": 42.30769230769231, |
| "grad_norm": 2.014662981033325, |
| "learning_rate": 0.001, |
| "loss": 1.6251, |
| "step": 130900 |
| }, |
| { |
| "epoch": 42.34001292824822, |
| "grad_norm": 2.202014446258545, |
| "learning_rate": 0.001, |
| "loss": 1.6405, |
| "step": 131000 |
| }, |
| { |
| "epoch": 42.372333548804136, |
| "grad_norm": 2.675868034362793, |
| "learning_rate": 0.001, |
| "loss": 1.6475, |
| "step": 131100 |
| }, |
| { |
| "epoch": 42.40465416936005, |
| "grad_norm": 2.102691888809204, |
| "learning_rate": 0.001, |
| "loss": 1.6568, |
| "step": 131200 |
| }, |
| { |
| "epoch": 42.436974789915965, |
| "grad_norm": 2.1783900260925293, |
| "learning_rate": 0.001, |
| "loss": 1.637, |
| "step": 131300 |
| }, |
| { |
| "epoch": 42.46929541047188, |
| "grad_norm": 1.476436734199524, |
| "learning_rate": 0.001, |
| "loss": 1.6476, |
| "step": 131400 |
| }, |
| { |
| "epoch": 42.501616031027794, |
| "grad_norm": 1.7832789421081543, |
| "learning_rate": 0.001, |
| "loss": 1.6625, |
| "step": 131500 |
| }, |
| { |
| "epoch": 42.53393665158371, |
| "grad_norm": 2.12839674949646, |
| "learning_rate": 0.001, |
| "loss": 1.6465, |
| "step": 131600 |
| }, |
| { |
| "epoch": 42.56625727213962, |
| "grad_norm": 1.5003304481506348, |
| "learning_rate": 0.001, |
| "loss": 1.6794, |
| "step": 131700 |
| }, |
| { |
| "epoch": 42.59857789269554, |
| "grad_norm": 1.5726544857025146, |
| "learning_rate": 0.001, |
| "loss": 1.6767, |
| "step": 131800 |
| }, |
| { |
| "epoch": 42.63089851325145, |
| "grad_norm": 2.2940423488616943, |
| "learning_rate": 0.001, |
| "loss": 1.6638, |
| "step": 131900 |
| }, |
| { |
| "epoch": 42.66321913380737, |
| "grad_norm": 1.992763638496399, |
| "learning_rate": 0.001, |
| "loss": 1.6713, |
| "step": 132000 |
| }, |
| { |
| "epoch": 42.69553975436328, |
| "grad_norm": 2.10316801071167, |
| "learning_rate": 0.001, |
| "loss": 1.6719, |
| "step": 132100 |
| }, |
| { |
| "epoch": 42.727860374919196, |
| "grad_norm": 1.1966867446899414, |
| "learning_rate": 0.001, |
| "loss": 1.6687, |
| "step": 132200 |
| }, |
| { |
| "epoch": 42.76018099547511, |
| "grad_norm": 1.6396232843399048, |
| "learning_rate": 0.001, |
| "loss": 1.6927, |
| "step": 132300 |
| }, |
| { |
| "epoch": 42.792501616031025, |
| "grad_norm": 2.2217977046966553, |
| "learning_rate": 0.001, |
| "loss": 1.6911, |
| "step": 132400 |
| }, |
| { |
| "epoch": 42.82482223658694, |
| "grad_norm": 1.3404631614685059, |
| "learning_rate": 0.001, |
| "loss": 1.6951, |
| "step": 132500 |
| }, |
| { |
| "epoch": 42.857142857142854, |
| "grad_norm": 2.155172348022461, |
| "learning_rate": 0.001, |
| "loss": 1.7034, |
| "step": 132600 |
| }, |
| { |
| "epoch": 42.88946347769877, |
| "grad_norm": 1.7725328207015991, |
| "learning_rate": 0.001, |
| "loss": 1.6975, |
| "step": 132700 |
| }, |
| { |
| "epoch": 42.92178409825468, |
| "grad_norm": 1.6837446689605713, |
| "learning_rate": 0.001, |
| "loss": 1.7219, |
| "step": 132800 |
| }, |
| { |
| "epoch": 42.9541047188106, |
| "grad_norm": 1.77871835231781, |
| "learning_rate": 0.001, |
| "loss": 1.7004, |
| "step": 132900 |
| }, |
| { |
| "epoch": 42.98642533936652, |
| "grad_norm": 1.9019044637680054, |
| "learning_rate": 0.001, |
| "loss": 1.7157, |
| "step": 133000 |
| }, |
| { |
| "epoch": 43.018745959922434, |
| "grad_norm": 1.3710943460464478, |
| "learning_rate": 0.001, |
| "loss": 1.6493, |
| "step": 133100 |
| }, |
| { |
| "epoch": 43.05106658047835, |
| "grad_norm": 1.582306146621704, |
| "learning_rate": 0.001, |
| "loss": 1.5689, |
| "step": 133200 |
| }, |
| { |
| "epoch": 43.08338720103426, |
| "grad_norm": 1.378099799156189, |
| "learning_rate": 0.001, |
| "loss": 1.5964, |
| "step": 133300 |
| }, |
| { |
| "epoch": 43.11570782159018, |
| "grad_norm": 1.4372540712356567, |
| "learning_rate": 0.001, |
| "loss": 1.5997, |
| "step": 133400 |
| }, |
| { |
| "epoch": 43.14802844214609, |
| "grad_norm": 1.6468878984451294, |
| "learning_rate": 0.001, |
| "loss": 1.5879, |
| "step": 133500 |
| }, |
| { |
| "epoch": 43.18034906270201, |
| "grad_norm": 1.480918049812317, |
| "learning_rate": 0.001, |
| "loss": 1.5902, |
| "step": 133600 |
| }, |
| { |
| "epoch": 43.21266968325792, |
| "grad_norm": 1.351524829864502, |
| "learning_rate": 0.001, |
| "loss": 1.6, |
| "step": 133700 |
| }, |
| { |
| "epoch": 43.244990303813836, |
| "grad_norm": 1.486530065536499, |
| "learning_rate": 0.001, |
| "loss": 1.5967, |
| "step": 133800 |
| }, |
| { |
| "epoch": 43.27731092436975, |
| "grad_norm": 1.9295352697372437, |
| "learning_rate": 0.001, |
| "loss": 1.6096, |
| "step": 133900 |
| }, |
| { |
| "epoch": 43.309631544925665, |
| "grad_norm": 1.7292808294296265, |
| "learning_rate": 0.001, |
| "loss": 1.6064, |
| "step": 134000 |
| }, |
| { |
| "epoch": 43.34195216548158, |
| "grad_norm": 1.8573253154754639, |
| "learning_rate": 0.001, |
| "loss": 1.6132, |
| "step": 134100 |
| }, |
| { |
| "epoch": 43.374272786037494, |
| "grad_norm": 1.320823311805725, |
| "learning_rate": 0.001, |
| "loss": 1.6472, |
| "step": 134200 |
| }, |
| { |
| "epoch": 43.40659340659341, |
| "grad_norm": 1.955378532409668, |
| "learning_rate": 0.001, |
| "loss": 1.6251, |
| "step": 134300 |
| }, |
| { |
| "epoch": 43.43891402714932, |
| "grad_norm": 1.4546319246292114, |
| "learning_rate": 0.001, |
| "loss": 1.6083, |
| "step": 134400 |
| }, |
| { |
| "epoch": 43.47123464770524, |
| "grad_norm": 2.06282639503479, |
| "learning_rate": 0.001, |
| "loss": 1.6473, |
| "step": 134500 |
| }, |
| { |
| "epoch": 43.50355526826115, |
| "grad_norm": 1.3789846897125244, |
| "learning_rate": 0.001, |
| "loss": 1.64, |
| "step": 134600 |
| }, |
| { |
| "epoch": 43.53587588881707, |
| "grad_norm": 1.3253904581069946, |
| "learning_rate": 0.001, |
| "loss": 1.643, |
| "step": 134700 |
| }, |
| { |
| "epoch": 43.56819650937298, |
| "grad_norm": 1.5497514009475708, |
| "learning_rate": 0.001, |
| "loss": 1.6411, |
| "step": 134800 |
| }, |
| { |
| "epoch": 43.600517129928896, |
| "grad_norm": 1.4692950248718262, |
| "learning_rate": 0.001, |
| "loss": 1.6484, |
| "step": 134900 |
| }, |
| { |
| "epoch": 43.63283775048481, |
| "grad_norm": 1.3008520603179932, |
| "learning_rate": 0.001, |
| "loss": 1.6697, |
| "step": 135000 |
| }, |
| { |
| "epoch": 43.665158371040725, |
| "grad_norm": 1.8452352285385132, |
| "learning_rate": 0.001, |
| "loss": 1.6622, |
| "step": 135100 |
| }, |
| { |
| "epoch": 43.69747899159664, |
| "grad_norm": 1.386189341545105, |
| "learning_rate": 0.001, |
| "loss": 1.6648, |
| "step": 135200 |
| }, |
| { |
| "epoch": 43.729799612152554, |
| "grad_norm": 1.5888075828552246, |
| "learning_rate": 0.001, |
| "loss": 1.6699, |
| "step": 135300 |
| }, |
| { |
| "epoch": 43.76212023270847, |
| "grad_norm": 1.8917977809906006, |
| "learning_rate": 0.001, |
| "loss": 1.6639, |
| "step": 135400 |
| }, |
| { |
| "epoch": 43.79444085326438, |
| "grad_norm": 1.619673252105713, |
| "learning_rate": 0.001, |
| "loss": 1.67, |
| "step": 135500 |
| }, |
| { |
| "epoch": 43.8267614738203, |
| "grad_norm": 1.4577540159225464, |
| "learning_rate": 0.001, |
| "loss": 1.6833, |
| "step": 135600 |
| }, |
| { |
| "epoch": 43.85908209437621, |
| "grad_norm": 1.8329697847366333, |
| "learning_rate": 0.001, |
| "loss": 1.6777, |
| "step": 135700 |
| }, |
| { |
| "epoch": 43.89140271493213, |
| "grad_norm": 1.7369554042816162, |
| "learning_rate": 0.001, |
| "loss": 1.6815, |
| "step": 135800 |
| }, |
| { |
| "epoch": 43.92372333548804, |
| "grad_norm": 1.4946892261505127, |
| "learning_rate": 0.001, |
| "loss": 1.6964, |
| "step": 135900 |
| }, |
| { |
| "epoch": 43.956043956043956, |
| "grad_norm": 1.434802532196045, |
| "learning_rate": 0.001, |
| "loss": 1.695, |
| "step": 136000 |
| }, |
| { |
| "epoch": 43.98836457659987, |
| "grad_norm": 2.0379035472869873, |
| "learning_rate": 0.001, |
| "loss": 1.699, |
| "step": 136100 |
| }, |
| { |
| "epoch": 44.020685197155785, |
| "grad_norm": 1.2588499784469604, |
| "learning_rate": 0.001, |
| "loss": 1.6163, |
| "step": 136200 |
| }, |
| { |
| "epoch": 44.0530058177117, |
| "grad_norm": 1.443988561630249, |
| "learning_rate": 0.001, |
| "loss": 1.5757, |
| "step": 136300 |
| }, |
| { |
| "epoch": 44.085326438267614, |
| "grad_norm": 1.7067530155181885, |
| "learning_rate": 0.001, |
| "loss": 1.5694, |
| "step": 136400 |
| }, |
| { |
| "epoch": 44.11764705882353, |
| "grad_norm": 1.8109402656555176, |
| "learning_rate": 0.001, |
| "loss": 1.5745, |
| "step": 136500 |
| }, |
| { |
| "epoch": 44.14996767937944, |
| "grad_norm": 1.415626883506775, |
| "learning_rate": 0.001, |
| "loss": 1.5756, |
| "step": 136600 |
| }, |
| { |
| "epoch": 44.18228829993536, |
| "grad_norm": 1.2374643087387085, |
| "learning_rate": 0.001, |
| "loss": 1.5818, |
| "step": 136700 |
| }, |
| { |
| "epoch": 44.21460892049127, |
| "grad_norm": 1.1942877769470215, |
| "learning_rate": 0.001, |
| "loss": 1.6047, |
| "step": 136800 |
| }, |
| { |
| "epoch": 44.24692954104719, |
| "grad_norm": 1.377187967300415, |
| "learning_rate": 0.001, |
| "loss": 1.5992, |
| "step": 136900 |
| }, |
| { |
| "epoch": 44.2792501616031, |
| "grad_norm": 1.3861560821533203, |
| "learning_rate": 0.001, |
| "loss": 1.6016, |
| "step": 137000 |
| }, |
| { |
| "epoch": 44.311570782159016, |
| "grad_norm": 1.5110067129135132, |
| "learning_rate": 0.001, |
| "loss": 1.612, |
| "step": 137100 |
| }, |
| { |
| "epoch": 44.34389140271493, |
| "grad_norm": 1.3622841835021973, |
| "learning_rate": 0.001, |
| "loss": 1.6092, |
| "step": 137200 |
| }, |
| { |
| "epoch": 44.376212023270845, |
| "grad_norm": 1.599679708480835, |
| "learning_rate": 0.001, |
| "loss": 1.6228, |
| "step": 137300 |
| }, |
| { |
| "epoch": 44.40853264382676, |
| "grad_norm": 1.0782546997070312, |
| "learning_rate": 0.001, |
| "loss": 1.6112, |
| "step": 137400 |
| }, |
| { |
| "epoch": 44.440853264382675, |
| "grad_norm": 1.423081398010254, |
| "learning_rate": 0.001, |
| "loss": 1.6277, |
| "step": 137500 |
| }, |
| { |
| "epoch": 44.47317388493859, |
| "grad_norm": 1.9224975109100342, |
| "learning_rate": 0.001, |
| "loss": 1.6241, |
| "step": 137600 |
| }, |
| { |
| "epoch": 44.505494505494504, |
| "grad_norm": 1.9734402894973755, |
| "learning_rate": 0.001, |
| "loss": 1.6014, |
| "step": 137700 |
| }, |
| { |
| "epoch": 44.53781512605042, |
| "grad_norm": 1.341483235359192, |
| "learning_rate": 0.001, |
| "loss": 1.6249, |
| "step": 137800 |
| }, |
| { |
| "epoch": 44.57013574660633, |
| "grad_norm": 1.2303094863891602, |
| "learning_rate": 0.001, |
| "loss": 1.6322, |
| "step": 137900 |
| }, |
| { |
| "epoch": 44.60245636716225, |
| "grad_norm": 1.8994379043579102, |
| "learning_rate": 0.001, |
| "loss": 1.65, |
| "step": 138000 |
| }, |
| { |
| "epoch": 44.63477698771816, |
| "grad_norm": 1.8510922193527222, |
| "learning_rate": 0.001, |
| "loss": 1.6313, |
| "step": 138100 |
| }, |
| { |
| "epoch": 44.66709760827408, |
| "grad_norm": 1.5857359170913696, |
| "learning_rate": 0.001, |
| "loss": 1.6423, |
| "step": 138200 |
| }, |
| { |
| "epoch": 44.69941822882999, |
| "grad_norm": 1.4746818542480469, |
| "learning_rate": 0.001, |
| "loss": 1.6534, |
| "step": 138300 |
| }, |
| { |
| "epoch": 44.731738849385906, |
| "grad_norm": 1.5729411840438843, |
| "learning_rate": 0.001, |
| "loss": 1.6496, |
| "step": 138400 |
| }, |
| { |
| "epoch": 44.76405946994182, |
| "grad_norm": 1.5360504388809204, |
| "learning_rate": 0.001, |
| "loss": 1.6552, |
| "step": 138500 |
| }, |
| { |
| "epoch": 44.796380090497735, |
| "grad_norm": 1.364283800125122, |
| "learning_rate": 0.001, |
| "loss": 1.6658, |
| "step": 138600 |
| }, |
| { |
| "epoch": 44.82870071105365, |
| "grad_norm": 1.3980382680892944, |
| "learning_rate": 0.001, |
| "loss": 1.6547, |
| "step": 138700 |
| }, |
| { |
| "epoch": 44.861021331609564, |
| "grad_norm": 1.4113725423812866, |
| "learning_rate": 0.001, |
| "loss": 1.6739, |
| "step": 138800 |
| }, |
| { |
| "epoch": 44.89334195216548, |
| "grad_norm": 1.829128623008728, |
| "learning_rate": 0.001, |
| "loss": 1.6694, |
| "step": 138900 |
| }, |
| { |
| "epoch": 44.92566257272139, |
| "grad_norm": 1.3401802778244019, |
| "learning_rate": 0.001, |
| "loss": 1.6741, |
| "step": 139000 |
| }, |
| { |
| "epoch": 44.95798319327731, |
| "grad_norm": 1.678005337715149, |
| "learning_rate": 0.001, |
| "loss": 1.6568, |
| "step": 139100 |
| }, |
| { |
| "epoch": 44.99030381383322, |
| "grad_norm": 1.1385924816131592, |
| "learning_rate": 0.001, |
| "loss": 1.6321, |
| "step": 139200 |
| }, |
| { |
| "epoch": 45.022624434389144, |
| "grad_norm": 1.8373512029647827, |
| "learning_rate": 0.001, |
| "loss": 1.6038, |
| "step": 139300 |
| }, |
| { |
| "epoch": 45.05494505494506, |
| "grad_norm": 1.5865776538848877, |
| "learning_rate": 0.001, |
| "loss": 1.5343, |
| "step": 139400 |
| }, |
| { |
| "epoch": 45.08726567550097, |
| "grad_norm": 1.5001276731491089, |
| "learning_rate": 0.001, |
| "loss": 1.5549, |
| "step": 139500 |
| }, |
| { |
| "epoch": 45.11958629605689, |
| "grad_norm": 1.2797919511795044, |
| "learning_rate": 0.001, |
| "loss": 1.5443, |
| "step": 139600 |
| }, |
| { |
| "epoch": 45.1519069166128, |
| "grad_norm": 1.455429196357727, |
| "learning_rate": 0.001, |
| "loss": 1.5619, |
| "step": 139700 |
| }, |
| { |
| "epoch": 45.18422753716872, |
| "grad_norm": 1.4949995279312134, |
| "learning_rate": 0.001, |
| "loss": 1.5724, |
| "step": 139800 |
| }, |
| { |
| "epoch": 45.21654815772463, |
| "grad_norm": 1.2570242881774902, |
| "learning_rate": 0.001, |
| "loss": 1.5687, |
| "step": 139900 |
| }, |
| { |
| "epoch": 45.248868778280546, |
| "grad_norm": 1.2480852603912354, |
| "learning_rate": 0.001, |
| "loss": 1.5789, |
| "step": 140000 |
| }, |
| { |
| "epoch": 45.28118939883646, |
| "grad_norm": 1.5493923425674438, |
| "learning_rate": 0.001, |
| "loss": 1.5761, |
| "step": 140100 |
| }, |
| { |
| "epoch": 45.313510019392375, |
| "grad_norm": 1.478066086769104, |
| "learning_rate": 0.001, |
| "loss": 1.5899, |
| "step": 140200 |
| }, |
| { |
| "epoch": 45.34583063994829, |
| "grad_norm": 1.2514175176620483, |
| "learning_rate": 0.001, |
| "loss": 1.5733, |
| "step": 140300 |
| }, |
| { |
| "epoch": 45.378151260504204, |
| "grad_norm": 1.3240177631378174, |
| "learning_rate": 0.001, |
| "loss": 1.6065, |
| "step": 140400 |
| }, |
| { |
| "epoch": 45.41047188106012, |
| "grad_norm": 1.09498929977417, |
| "learning_rate": 0.001, |
| "loss": 1.6061, |
| "step": 140500 |
| }, |
| { |
| "epoch": 45.44279250161603, |
| "grad_norm": 1.4801580905914307, |
| "learning_rate": 0.001, |
| "loss": 1.6009, |
| "step": 140600 |
| }, |
| { |
| "epoch": 45.47511312217195, |
| "grad_norm": 1.4095863103866577, |
| "learning_rate": 0.001, |
| "loss": 1.606, |
| "step": 140700 |
| }, |
| { |
| "epoch": 45.50743374272786, |
| "grad_norm": 1.3383103609085083, |
| "learning_rate": 0.001, |
| "loss": 1.5947, |
| "step": 140800 |
| }, |
| { |
| "epoch": 45.53975436328378, |
| "grad_norm": 1.3287218809127808, |
| "learning_rate": 0.001, |
| "loss": 1.6218, |
| "step": 140900 |
| }, |
| { |
| "epoch": 45.57207498383969, |
| "grad_norm": 1.1489287614822388, |
| "learning_rate": 0.001, |
| "loss": 1.6112, |
| "step": 141000 |
| }, |
| { |
| "epoch": 45.604395604395606, |
| "grad_norm": 1.3265559673309326, |
| "learning_rate": 0.001, |
| "loss": 1.6198, |
| "step": 141100 |
| }, |
| { |
| "epoch": 45.63671622495152, |
| "grad_norm": 1.4024713039398193, |
| "learning_rate": 0.001, |
| "loss": 1.6408, |
| "step": 141200 |
| }, |
| { |
| "epoch": 45.669036845507435, |
| "grad_norm": 1.2570629119873047, |
| "learning_rate": 0.001, |
| "loss": 1.6241, |
| "step": 141300 |
| }, |
| { |
| "epoch": 45.70135746606335, |
| "grad_norm": 1.234506607055664, |
| "learning_rate": 0.001, |
| "loss": 1.6337, |
| "step": 141400 |
| }, |
| { |
| "epoch": 45.733678086619264, |
| "grad_norm": 1.523465871810913, |
| "learning_rate": 0.001, |
| "loss": 1.6442, |
| "step": 141500 |
| }, |
| { |
| "epoch": 45.76599870717518, |
| "grad_norm": 1.3609179258346558, |
| "learning_rate": 0.001, |
| "loss": 1.6457, |
| "step": 141600 |
| }, |
| { |
| "epoch": 45.79831932773109, |
| "grad_norm": 2.108837604522705, |
| "learning_rate": 0.001, |
| "loss": 1.6568, |
| "step": 141700 |
| }, |
| { |
| "epoch": 45.83063994828701, |
| "grad_norm": 1.3636060953140259, |
| "learning_rate": 0.001, |
| "loss": 1.6442, |
| "step": 141800 |
| }, |
| { |
| "epoch": 45.86296056884292, |
| "grad_norm": 1.3937108516693115, |
| "learning_rate": 0.001, |
| "loss": 1.6542, |
| "step": 141900 |
| }, |
| { |
| "epoch": 45.89528118939884, |
| "grad_norm": 1.1740082502365112, |
| "learning_rate": 0.001, |
| "loss": 1.6613, |
| "step": 142000 |
| }, |
| { |
| "epoch": 45.92760180995475, |
| "grad_norm": 1.1534056663513184, |
| "learning_rate": 0.001, |
| "loss": 1.6553, |
| "step": 142100 |
| }, |
| { |
| "epoch": 45.959922430510666, |
| "grad_norm": 1.6877473592758179, |
| "learning_rate": 0.001, |
| "loss": 1.6476, |
| "step": 142200 |
| }, |
| { |
| "epoch": 45.99224305106658, |
| "grad_norm": 1.163017988204956, |
| "learning_rate": 0.001, |
| "loss": 1.6809, |
| "step": 142300 |
| }, |
| { |
| "epoch": 46.024563671622495, |
| "grad_norm": 1.32700777053833, |
| "learning_rate": 0.001, |
| "loss": 1.5725, |
| "step": 142400 |
| }, |
| { |
| "epoch": 46.05688429217841, |
| "grad_norm": 1.7165447473526, |
| "learning_rate": 0.001, |
| "loss": 1.5315, |
| "step": 142500 |
| }, |
| { |
| "epoch": 46.089204912734324, |
| "grad_norm": 1.582828402519226, |
| "learning_rate": 0.001, |
| "loss": 1.542, |
| "step": 142600 |
| }, |
| { |
| "epoch": 46.12152553329024, |
| "grad_norm": 1.316243290901184, |
| "learning_rate": 0.001, |
| "loss": 1.5475, |
| "step": 142700 |
| }, |
| { |
| "epoch": 46.15384615384615, |
| "grad_norm": 1.4395389556884766, |
| "learning_rate": 0.001, |
| "loss": 1.5609, |
| "step": 142800 |
| }, |
| { |
| "epoch": 46.18616677440207, |
| "grad_norm": 1.7110323905944824, |
| "learning_rate": 0.001, |
| "loss": 1.5623, |
| "step": 142900 |
| }, |
| { |
| "epoch": 46.21848739495798, |
| "grad_norm": 1.58608078956604, |
| "learning_rate": 0.001, |
| "loss": 1.5753, |
| "step": 143000 |
| }, |
| { |
| "epoch": 46.2508080155139, |
| "grad_norm": 1.436051845550537, |
| "learning_rate": 0.001, |
| "loss": 1.5688, |
| "step": 143100 |
| }, |
| { |
| "epoch": 46.28312863606981, |
| "grad_norm": 1.8955553770065308, |
| "learning_rate": 0.001, |
| "loss": 1.5732, |
| "step": 143200 |
| }, |
| { |
| "epoch": 46.315449256625726, |
| "grad_norm": 1.6417391300201416, |
| "learning_rate": 0.001, |
| "loss": 1.5687, |
| "step": 143300 |
| }, |
| { |
| "epoch": 46.34776987718164, |
| "grad_norm": 1.4715759754180908, |
| "learning_rate": 0.001, |
| "loss": 1.568, |
| "step": 143400 |
| }, |
| { |
| "epoch": 46.380090497737555, |
| "grad_norm": 1.410852074623108, |
| "learning_rate": 0.001, |
| "loss": 1.5836, |
| "step": 143500 |
| }, |
| { |
| "epoch": 46.41241111829347, |
| "grad_norm": 1.6234686374664307, |
| "learning_rate": 0.001, |
| "loss": 1.5823, |
| "step": 143600 |
| }, |
| { |
| "epoch": 46.444731738849384, |
| "grad_norm": 1.5069587230682373, |
| "learning_rate": 0.001, |
| "loss": 1.5963, |
| "step": 143700 |
| }, |
| { |
| "epoch": 46.4770523594053, |
| "grad_norm": 1.4912326335906982, |
| "learning_rate": 0.001, |
| "loss": 1.6026, |
| "step": 143800 |
| }, |
| { |
| "epoch": 46.50937297996121, |
| "grad_norm": 2.0091121196746826, |
| "learning_rate": 0.001, |
| "loss": 1.5956, |
| "step": 143900 |
| }, |
| { |
| "epoch": 46.54169360051713, |
| "grad_norm": 1.2358951568603516, |
| "learning_rate": 0.001, |
| "loss": 1.5907, |
| "step": 144000 |
| }, |
| { |
| "epoch": 46.57401422107304, |
| "grad_norm": 1.4463825225830078, |
| "learning_rate": 0.001, |
| "loss": 1.5941, |
| "step": 144100 |
| }, |
| { |
| "epoch": 46.60633484162896, |
| "grad_norm": 1.6849217414855957, |
| "learning_rate": 0.001, |
| "loss": 1.6197, |
| "step": 144200 |
| }, |
| { |
| "epoch": 46.63865546218487, |
| "grad_norm": 1.4761693477630615, |
| "learning_rate": 0.001, |
| "loss": 1.6111, |
| "step": 144300 |
| }, |
| { |
| "epoch": 46.670976082740786, |
| "grad_norm": 1.9993269443511963, |
| "learning_rate": 0.001, |
| "loss": 1.6298, |
| "step": 144400 |
| }, |
| { |
| "epoch": 46.7032967032967, |
| "grad_norm": 1.3961095809936523, |
| "learning_rate": 0.001, |
| "loss": 1.6271, |
| "step": 144500 |
| }, |
| { |
| "epoch": 46.735617323852615, |
| "grad_norm": 1.3440773487091064, |
| "learning_rate": 0.001, |
| "loss": 1.6165, |
| "step": 144600 |
| }, |
| { |
| "epoch": 46.76793794440853, |
| "grad_norm": 1.7641288042068481, |
| "learning_rate": 0.001, |
| "loss": 1.6196, |
| "step": 144700 |
| }, |
| { |
| "epoch": 46.800258564964444, |
| "grad_norm": 2.0289649963378906, |
| "learning_rate": 0.001, |
| "loss": 1.6436, |
| "step": 144800 |
| }, |
| { |
| "epoch": 46.83257918552036, |
| "grad_norm": 1.651640772819519, |
| "learning_rate": 0.001, |
| "loss": 1.628, |
| "step": 144900 |
| }, |
| { |
| "epoch": 46.864899806076274, |
| "grad_norm": 1.3994982242584229, |
| "learning_rate": 0.001, |
| "loss": 1.6434, |
| "step": 145000 |
| }, |
| { |
| "epoch": 46.89722042663219, |
| "grad_norm": 1.3219499588012695, |
| "learning_rate": 0.001, |
| "loss": 1.6448, |
| "step": 145100 |
| }, |
| { |
| "epoch": 46.9295410471881, |
| "grad_norm": 1.4943182468414307, |
| "learning_rate": 0.001, |
| "loss": 1.6185, |
| "step": 145200 |
| }, |
| { |
| "epoch": 46.96186166774402, |
| "grad_norm": 1.338044285774231, |
| "learning_rate": 0.001, |
| "loss": 1.6472, |
| "step": 145300 |
| }, |
| { |
| "epoch": 46.99418228829994, |
| "grad_norm": 1.2849805355072021, |
| "learning_rate": 0.001, |
| "loss": 1.6408, |
| "step": 145400 |
| }, |
| { |
| "epoch": 47.02650290885585, |
| "grad_norm": 1.4934114217758179, |
| "learning_rate": 0.001, |
| "loss": 1.549, |
| "step": 145500 |
| }, |
| { |
| "epoch": 47.05882352941177, |
| "grad_norm": 1.7186708450317383, |
| "learning_rate": 0.001, |
| "loss": 1.5166, |
| "step": 145600 |
| }, |
| { |
| "epoch": 47.09114414996768, |
| "grad_norm": 1.237968921661377, |
| "learning_rate": 0.001, |
| "loss": 1.5266, |
| "step": 145700 |
| }, |
| { |
| "epoch": 47.1234647705236, |
| "grad_norm": 1.4882655143737793, |
| "learning_rate": 0.001, |
| "loss": 1.5339, |
| "step": 145800 |
| }, |
| { |
| "epoch": 47.15578539107951, |
| "grad_norm": 1.618738055229187, |
| "learning_rate": 0.001, |
| "loss": 1.5317, |
| "step": 145900 |
| }, |
| { |
| "epoch": 47.188106011635426, |
| "grad_norm": 2.116640090942383, |
| "learning_rate": 0.001, |
| "loss": 1.5316, |
| "step": 146000 |
| }, |
| { |
| "epoch": 47.22042663219134, |
| "grad_norm": 1.3013315200805664, |
| "learning_rate": 0.001, |
| "loss": 1.5584, |
| "step": 146100 |
| }, |
| { |
| "epoch": 47.252747252747255, |
| "grad_norm": 2.401732921600342, |
| "learning_rate": 0.001, |
| "loss": 1.5343, |
| "step": 146200 |
| }, |
| { |
| "epoch": 47.28506787330317, |
| "grad_norm": 1.1736658811569214, |
| "learning_rate": 0.001, |
| "loss": 1.5728, |
| "step": 146300 |
| }, |
| { |
| "epoch": 47.317388493859085, |
| "grad_norm": 1.4418681859970093, |
| "learning_rate": 0.001, |
| "loss": 1.5624, |
| "step": 146400 |
| }, |
| { |
| "epoch": 47.349709114415, |
| "grad_norm": 1.4937100410461426, |
| "learning_rate": 0.001, |
| "loss": 1.5712, |
| "step": 146500 |
| }, |
| { |
| "epoch": 47.382029734970914, |
| "grad_norm": 1.6570427417755127, |
| "learning_rate": 0.001, |
| "loss": 1.5789, |
| "step": 146600 |
| }, |
| { |
| "epoch": 47.41435035552683, |
| "grad_norm": 1.9760403633117676, |
| "learning_rate": 0.001, |
| "loss": 1.5811, |
| "step": 146700 |
| }, |
| { |
| "epoch": 47.44667097608274, |
| "grad_norm": 1.6386915445327759, |
| "learning_rate": 0.001, |
| "loss": 1.5614, |
| "step": 146800 |
| }, |
| { |
| "epoch": 47.47899159663866, |
| "grad_norm": 1.7690401077270508, |
| "learning_rate": 0.001, |
| "loss": 1.563, |
| "step": 146900 |
| }, |
| { |
| "epoch": 47.51131221719457, |
| "grad_norm": 1.3809525966644287, |
| "learning_rate": 0.001, |
| "loss": 1.5941, |
| "step": 147000 |
| }, |
| { |
| "epoch": 47.543632837750486, |
| "grad_norm": 1.2929129600524902, |
| "learning_rate": 0.001, |
| "loss": 1.6022, |
| "step": 147100 |
| }, |
| { |
| "epoch": 47.5759534583064, |
| "grad_norm": 1.6390870809555054, |
| "learning_rate": 0.001, |
| "loss": 1.5974, |
| "step": 147200 |
| }, |
| { |
| "epoch": 47.608274078862316, |
| "grad_norm": 1.5908600091934204, |
| "learning_rate": 0.001, |
| "loss": 1.605, |
| "step": 147300 |
| }, |
| { |
| "epoch": 47.64059469941823, |
| "grad_norm": 1.6766973733901978, |
| "learning_rate": 0.001, |
| "loss": 1.6013, |
| "step": 147400 |
| }, |
| { |
| "epoch": 47.672915319974145, |
| "grad_norm": 1.483106017112732, |
| "learning_rate": 0.001, |
| "loss": 1.6074, |
| "step": 147500 |
| }, |
| { |
| "epoch": 47.70523594053006, |
| "grad_norm": 1.9718176126480103, |
| "learning_rate": 0.001, |
| "loss": 1.5976, |
| "step": 147600 |
| }, |
| { |
| "epoch": 47.737556561085974, |
| "grad_norm": 1.3451755046844482, |
| "learning_rate": 0.001, |
| "loss": 1.607, |
| "step": 147700 |
| }, |
| { |
| "epoch": 47.76987718164189, |
| "grad_norm": 1.6685712337493896, |
| "learning_rate": 0.001, |
| "loss": 1.6157, |
| "step": 147800 |
| }, |
| { |
| "epoch": 47.8021978021978, |
| "grad_norm": 1.4344922304153442, |
| "learning_rate": 0.001, |
| "loss": 1.6238, |
| "step": 147900 |
| }, |
| { |
| "epoch": 47.83451842275372, |
| "grad_norm": 1.1716681718826294, |
| "learning_rate": 0.001, |
| "loss": 1.613, |
| "step": 148000 |
| }, |
| { |
| "epoch": 47.86683904330963, |
| "grad_norm": 1.5936914682388306, |
| "learning_rate": 0.001, |
| "loss": 1.622, |
| "step": 148100 |
| }, |
| { |
| "epoch": 47.89915966386555, |
| "grad_norm": 1.8645175695419312, |
| "learning_rate": 0.001, |
| "loss": 1.6352, |
| "step": 148200 |
| }, |
| { |
| "epoch": 47.93148028442146, |
| "grad_norm": 1.4023220539093018, |
| "learning_rate": 0.001, |
| "loss": 1.6131, |
| "step": 148300 |
| }, |
| { |
| "epoch": 47.963800904977376, |
| "grad_norm": 1.3344358205795288, |
| "learning_rate": 0.001, |
| "loss": 1.6347, |
| "step": 148400 |
| }, |
| { |
| "epoch": 47.99612152553329, |
| "grad_norm": 1.6620981693267822, |
| "learning_rate": 0.001, |
| "loss": 1.606, |
| "step": 148500 |
| }, |
| { |
| "epoch": 48.028442146089205, |
| "grad_norm": 1.550585389137268, |
| "learning_rate": 0.001, |
| "loss": 1.5141, |
| "step": 148600 |
| }, |
| { |
| "epoch": 48.06076276664512, |
| "grad_norm": 2.354085922241211, |
| "learning_rate": 0.001, |
| "loss": 1.5095, |
| "step": 148700 |
| }, |
| { |
| "epoch": 48.093083387201034, |
| "grad_norm": 1.6882182359695435, |
| "learning_rate": 0.001, |
| "loss": 1.5126, |
| "step": 148800 |
| }, |
| { |
| "epoch": 48.12540400775695, |
| "grad_norm": 1.8086196184158325, |
| "learning_rate": 0.001, |
| "loss": 1.5342, |
| "step": 148900 |
| }, |
| { |
| "epoch": 48.15772462831286, |
| "grad_norm": 1.2464903593063354, |
| "learning_rate": 0.001, |
| "loss": 1.5145, |
| "step": 149000 |
| }, |
| { |
| "epoch": 48.19004524886878, |
| "grad_norm": 1.5906083583831787, |
| "learning_rate": 0.001, |
| "loss": 1.5339, |
| "step": 149100 |
| }, |
| { |
| "epoch": 48.22236586942469, |
| "grad_norm": 1.7611628770828247, |
| "learning_rate": 0.001, |
| "loss": 1.5241, |
| "step": 149200 |
| }, |
| { |
| "epoch": 48.25468648998061, |
| "grad_norm": 1.8777153491973877, |
| "learning_rate": 0.001, |
| "loss": 1.5343, |
| "step": 149300 |
| }, |
| { |
| "epoch": 48.28700711053652, |
| "grad_norm": 1.7436455488204956, |
| "learning_rate": 0.001, |
| "loss": 1.5538, |
| "step": 149400 |
| }, |
| { |
| "epoch": 48.319327731092436, |
| "grad_norm": 1.5883677005767822, |
| "learning_rate": 0.001, |
| "loss": 1.5295, |
| "step": 149500 |
| }, |
| { |
| "epoch": 48.35164835164835, |
| "grad_norm": 1.8835762739181519, |
| "learning_rate": 0.001, |
| "loss": 1.5582, |
| "step": 149600 |
| }, |
| { |
| "epoch": 48.383968972204265, |
| "grad_norm": 1.5518923997879028, |
| "learning_rate": 0.001, |
| "loss": 1.5665, |
| "step": 149700 |
| }, |
| { |
| "epoch": 48.41628959276018, |
| "grad_norm": 1.4821540117263794, |
| "learning_rate": 0.001, |
| "loss": 1.5448, |
| "step": 149800 |
| }, |
| { |
| "epoch": 48.448610213316094, |
| "grad_norm": 1.5595805644989014, |
| "learning_rate": 0.001, |
| "loss": 1.577, |
| "step": 149900 |
| }, |
| { |
| "epoch": 48.48093083387201, |
| "grad_norm": 1.2543803453445435, |
| "learning_rate": 0.001, |
| "loss": 1.5539, |
| "step": 150000 |
| }, |
| { |
| "epoch": 48.51325145442792, |
| "grad_norm": 1.782151699066162, |
| "learning_rate": 0.001, |
| "loss": 1.5728, |
| "step": 150100 |
| }, |
| { |
| "epoch": 48.54557207498384, |
| "grad_norm": 1.6886430978775024, |
| "learning_rate": 0.001, |
| "loss": 1.5822, |
| "step": 150200 |
| }, |
| { |
| "epoch": 48.57789269553975, |
| "grad_norm": 1.2113796472549438, |
| "learning_rate": 0.001, |
| "loss": 1.5753, |
| "step": 150300 |
| }, |
| { |
| "epoch": 48.61021331609567, |
| "grad_norm": 1.291333794593811, |
| "learning_rate": 0.001, |
| "loss": 1.5937, |
| "step": 150400 |
| }, |
| { |
| "epoch": 48.64253393665158, |
| "grad_norm": 2.103907823562622, |
| "learning_rate": 0.001, |
| "loss": 1.5899, |
| "step": 150500 |
| }, |
| { |
| "epoch": 48.674854557207496, |
| "grad_norm": 1.4811424016952515, |
| "learning_rate": 0.001, |
| "loss": 1.5865, |
| "step": 150600 |
| }, |
| { |
| "epoch": 48.70717517776341, |
| "grad_norm": 1.5195327997207642, |
| "learning_rate": 0.001, |
| "loss": 1.598, |
| "step": 150700 |
| }, |
| { |
| "epoch": 48.739495798319325, |
| "grad_norm": 1.237692952156067, |
| "learning_rate": 0.001, |
| "loss": 1.5986, |
| "step": 150800 |
| }, |
| { |
| "epoch": 48.77181641887524, |
| "grad_norm": 1.5451624393463135, |
| "learning_rate": 0.001, |
| "loss": 1.6045, |
| "step": 150900 |
| }, |
| { |
| "epoch": 48.804137039431154, |
| "grad_norm": 1.2580842971801758, |
| "learning_rate": 0.001, |
| "loss": 1.6003, |
| "step": 151000 |
| }, |
| { |
| "epoch": 48.83645765998707, |
| "grad_norm": 1.950588345527649, |
| "learning_rate": 0.001, |
| "loss": 1.6034, |
| "step": 151100 |
| }, |
| { |
| "epoch": 48.86877828054298, |
| "grad_norm": 2.6268444061279297, |
| "learning_rate": 0.001, |
| "loss": 1.618, |
| "step": 151200 |
| }, |
| { |
| "epoch": 48.9010989010989, |
| "grad_norm": 1.5038946866989136, |
| "learning_rate": 0.001, |
| "loss": 1.6149, |
| "step": 151300 |
| }, |
| { |
| "epoch": 48.93341952165481, |
| "grad_norm": 1.7941875457763672, |
| "learning_rate": 0.001, |
| "loss": 1.6236, |
| "step": 151400 |
| }, |
| { |
| "epoch": 48.96574014221073, |
| "grad_norm": 1.6113241910934448, |
| "learning_rate": 0.001, |
| "loss": 1.6138, |
| "step": 151500 |
| }, |
| { |
| "epoch": 48.99806076276664, |
| "grad_norm": 2.102522134780884, |
| "learning_rate": 0.001, |
| "loss": 1.5985, |
| "step": 151600 |
| }, |
| { |
| "epoch": 49.03038138332256, |
| "grad_norm": 1.8292262554168701, |
| "learning_rate": 0.001, |
| "loss": 1.4972, |
| "step": 151700 |
| }, |
| { |
| "epoch": 49.06270200387848, |
| "grad_norm": 1.5576742887496948, |
| "learning_rate": 0.001, |
| "loss": 1.5018, |
| "step": 151800 |
| }, |
| { |
| "epoch": 49.09502262443439, |
| "grad_norm": 1.8177223205566406, |
| "learning_rate": 0.001, |
| "loss": 1.5011, |
| "step": 151900 |
| }, |
| { |
| "epoch": 49.12734324499031, |
| "grad_norm": 1.7465415000915527, |
| "learning_rate": 0.001, |
| "loss": 1.5105, |
| "step": 152000 |
| }, |
| { |
| "epoch": 49.15966386554622, |
| "grad_norm": 1.664346694946289, |
| "learning_rate": 0.001, |
| "loss": 1.5247, |
| "step": 152100 |
| }, |
| { |
| "epoch": 49.191984486102136, |
| "grad_norm": 1.8005129098892212, |
| "learning_rate": 0.001, |
| "loss": 1.5064, |
| "step": 152200 |
| }, |
| { |
| "epoch": 49.22430510665805, |
| "grad_norm": 1.9255499839782715, |
| "learning_rate": 0.001, |
| "loss": 1.5209, |
| "step": 152300 |
| }, |
| { |
| "epoch": 49.256625727213965, |
| "grad_norm": 1.71510648727417, |
| "learning_rate": 0.001, |
| "loss": 1.5359, |
| "step": 152400 |
| }, |
| { |
| "epoch": 49.28894634776988, |
| "grad_norm": 1.9309266805648804, |
| "learning_rate": 0.001, |
| "loss": 1.5303, |
| "step": 152500 |
| }, |
| { |
| "epoch": 49.321266968325794, |
| "grad_norm": 1.8289529085159302, |
| "learning_rate": 0.001, |
| "loss": 1.5395, |
| "step": 152600 |
| }, |
| { |
| "epoch": 49.35358758888171, |
| "grad_norm": 1.4219285249710083, |
| "learning_rate": 0.001, |
| "loss": 1.5633, |
| "step": 152700 |
| }, |
| { |
| "epoch": 49.38590820943762, |
| "grad_norm": 1.34114408493042, |
| "learning_rate": 0.001, |
| "loss": 1.5322, |
| "step": 152800 |
| }, |
| { |
| "epoch": 49.41822882999354, |
| "grad_norm": 2.261221408843994, |
| "learning_rate": 0.001, |
| "loss": 1.55, |
| "step": 152900 |
| }, |
| { |
| "epoch": 49.45054945054945, |
| "grad_norm": 1.7596498727798462, |
| "learning_rate": 0.001, |
| "loss": 1.5583, |
| "step": 153000 |
| }, |
| { |
| "epoch": 49.48287007110537, |
| "grad_norm": 2.1398353576660156, |
| "learning_rate": 0.001, |
| "loss": 1.5551, |
| "step": 153100 |
| }, |
| { |
| "epoch": 49.51519069166128, |
| "grad_norm": 1.5906637907028198, |
| "learning_rate": 0.001, |
| "loss": 1.5674, |
| "step": 153200 |
| }, |
| { |
| "epoch": 49.547511312217196, |
| "grad_norm": 1.7475155591964722, |
| "learning_rate": 0.001, |
| "loss": 1.5704, |
| "step": 153300 |
| }, |
| { |
| "epoch": 49.57983193277311, |
| "grad_norm": 1.7171398401260376, |
| "learning_rate": 0.001, |
| "loss": 1.5796, |
| "step": 153400 |
| }, |
| { |
| "epoch": 49.612152553329025, |
| "grad_norm": 1.4808603525161743, |
| "learning_rate": 0.001, |
| "loss": 1.5742, |
| "step": 153500 |
| }, |
| { |
| "epoch": 49.64447317388494, |
| "grad_norm": 2.099364995956421, |
| "learning_rate": 0.001, |
| "loss": 1.5767, |
| "step": 153600 |
| }, |
| { |
| "epoch": 49.676793794440854, |
| "grad_norm": 1.7997703552246094, |
| "learning_rate": 0.001, |
| "loss": 1.5549, |
| "step": 153700 |
| }, |
| { |
| "epoch": 49.70911441499677, |
| "grad_norm": 1.9364619255065918, |
| "learning_rate": 0.001, |
| "loss": 1.5791, |
| "step": 153800 |
| }, |
| { |
| "epoch": 49.74143503555268, |
| "grad_norm": 1.8328566551208496, |
| "learning_rate": 0.001, |
| "loss": 1.5916, |
| "step": 153900 |
| }, |
| { |
| "epoch": 49.7737556561086, |
| "grad_norm": 1.5092872381210327, |
| "learning_rate": 0.001, |
| "loss": 1.5775, |
| "step": 154000 |
| }, |
| { |
| "epoch": 49.80607627666451, |
| "grad_norm": 1.8696229457855225, |
| "learning_rate": 0.001, |
| "loss": 1.574, |
| "step": 154100 |
| }, |
| { |
| "epoch": 49.83839689722043, |
| "grad_norm": 1.8740304708480835, |
| "learning_rate": 0.001, |
| "loss": 1.5895, |
| "step": 154200 |
| }, |
| { |
| "epoch": 49.87071751777634, |
| "grad_norm": 1.6615220308303833, |
| "learning_rate": 0.001, |
| "loss": 1.596, |
| "step": 154300 |
| }, |
| { |
| "epoch": 49.903038138332256, |
| "grad_norm": 1.6122123003005981, |
| "learning_rate": 0.001, |
| "loss": 1.5956, |
| "step": 154400 |
| }, |
| { |
| "epoch": 49.93535875888817, |
| "grad_norm": 1.8043103218078613, |
| "learning_rate": 0.001, |
| "loss": 1.5898, |
| "step": 154500 |
| }, |
| { |
| "epoch": 49.967679379444085, |
| "grad_norm": 1.4336738586425781, |
| "learning_rate": 0.001, |
| "loss": 1.6143, |
| "step": 154600 |
| }, |
| { |
| "epoch": 50.0, |
| "grad_norm": 2.6835567951202393, |
| "learning_rate": 0.001, |
| "loss": 1.5645, |
| "step": 154700 |
| }, |
| { |
| "epoch": 50.032320620555915, |
| "grad_norm": 1.7635300159454346, |
| "learning_rate": 0.001, |
| "loss": 1.4718, |
| "step": 154800 |
| }, |
| { |
| "epoch": 50.06464124111183, |
| "grad_norm": 2.1710870265960693, |
| "learning_rate": 0.001, |
| "loss": 1.4931, |
| "step": 154900 |
| }, |
| { |
| "epoch": 50.096961861667744, |
| "grad_norm": 1.8564825057983398, |
| "learning_rate": 0.001, |
| "loss": 1.5021, |
| "step": 155000 |
| }, |
| { |
| "epoch": 50.12928248222366, |
| "grad_norm": 2.041884422302246, |
| "learning_rate": 0.001, |
| "loss": 1.51, |
| "step": 155100 |
| }, |
| { |
| "epoch": 50.16160310277957, |
| "grad_norm": 1.9597643613815308, |
| "learning_rate": 0.001, |
| "loss": 1.5013, |
| "step": 155200 |
| }, |
| { |
| "epoch": 50.19392372333549, |
| "grad_norm": 2.2390921115875244, |
| "learning_rate": 0.001, |
| "loss": 1.5206, |
| "step": 155300 |
| }, |
| { |
| "epoch": 50.2262443438914, |
| "grad_norm": 2.3989622592926025, |
| "learning_rate": 0.001, |
| "loss": 1.5063, |
| "step": 155400 |
| }, |
| { |
| "epoch": 50.25856496444732, |
| "grad_norm": 2.2737696170806885, |
| "learning_rate": 0.001, |
| "loss": 1.5145, |
| "step": 155500 |
| }, |
| { |
| "epoch": 50.29088558500323, |
| "grad_norm": 1.896588683128357, |
| "learning_rate": 0.001, |
| "loss": 1.5299, |
| "step": 155600 |
| }, |
| { |
| "epoch": 50.323206205559146, |
| "grad_norm": 3.11960506439209, |
| "learning_rate": 0.001, |
| "loss": 1.5229, |
| "step": 155700 |
| }, |
| { |
| "epoch": 50.35552682611506, |
| "grad_norm": 2.0307984352111816, |
| "learning_rate": 0.001, |
| "loss": 1.5363, |
| "step": 155800 |
| }, |
| { |
| "epoch": 50.387847446670975, |
| "grad_norm": 1.7273118495941162, |
| "learning_rate": 0.001, |
| "loss": 1.536, |
| "step": 155900 |
| }, |
| { |
| "epoch": 50.42016806722689, |
| "grad_norm": 2.4120867252349854, |
| "learning_rate": 0.001, |
| "loss": 1.5523, |
| "step": 156000 |
| }, |
| { |
| "epoch": 50.452488687782804, |
| "grad_norm": 1.8841583728790283, |
| "learning_rate": 0.001, |
| "loss": 1.535, |
| "step": 156100 |
| }, |
| { |
| "epoch": 50.48480930833872, |
| "grad_norm": 2.2487380504608154, |
| "learning_rate": 0.001, |
| "loss": 1.5375, |
| "step": 156200 |
| }, |
| { |
| "epoch": 50.51712992889463, |
| "grad_norm": 2.0784618854522705, |
| "learning_rate": 0.001, |
| "loss": 1.5397, |
| "step": 156300 |
| }, |
| { |
| "epoch": 50.54945054945055, |
| "grad_norm": 2.4231741428375244, |
| "learning_rate": 0.001, |
| "loss": 1.5446, |
| "step": 156400 |
| }, |
| { |
| "epoch": 50.58177117000646, |
| "grad_norm": 2.341740131378174, |
| "learning_rate": 0.001, |
| "loss": 1.5486, |
| "step": 156500 |
| }, |
| { |
| "epoch": 50.61409179056238, |
| "grad_norm": 1.7603386640548706, |
| "learning_rate": 0.001, |
| "loss": 1.5682, |
| "step": 156600 |
| }, |
| { |
| "epoch": 50.64641241111829, |
| "grad_norm": 2.165457248687744, |
| "learning_rate": 0.001, |
| "loss": 1.5494, |
| "step": 156700 |
| }, |
| { |
| "epoch": 50.678733031674206, |
| "grad_norm": 1.895553708076477, |
| "learning_rate": 0.001, |
| "loss": 1.5577, |
| "step": 156800 |
| }, |
| { |
| "epoch": 50.71105365223012, |
| "grad_norm": 2.9701766967773438, |
| "learning_rate": 0.001, |
| "loss": 1.5556, |
| "step": 156900 |
| }, |
| { |
| "epoch": 50.743374272786035, |
| "grad_norm": 1.9630506038665771, |
| "learning_rate": 0.001, |
| "loss": 1.5856, |
| "step": 157000 |
| }, |
| { |
| "epoch": 50.77569489334195, |
| "grad_norm": 2.3503477573394775, |
| "learning_rate": 0.001, |
| "loss": 1.561, |
| "step": 157100 |
| }, |
| { |
| "epoch": 50.808015513897864, |
| "grad_norm": 2.1852824687957764, |
| "learning_rate": 0.001, |
| "loss": 1.5687, |
| "step": 157200 |
| }, |
| { |
| "epoch": 50.84033613445378, |
| "grad_norm": 1.8669474124908447, |
| "learning_rate": 0.001, |
| "loss": 1.5816, |
| "step": 157300 |
| }, |
| { |
| "epoch": 50.87265675500969, |
| "grad_norm": 1.9535120725631714, |
| "learning_rate": 0.001, |
| "loss": 1.5901, |
| "step": 157400 |
| }, |
| { |
| "epoch": 50.90497737556561, |
| "grad_norm": 2.5935044288635254, |
| "learning_rate": 0.001, |
| "loss": 1.5801, |
| "step": 157500 |
| }, |
| { |
| "epoch": 50.93729799612152, |
| "grad_norm": 1.8464397192001343, |
| "learning_rate": 0.001, |
| "loss": 1.5987, |
| "step": 157600 |
| }, |
| { |
| "epoch": 50.96961861667744, |
| "grad_norm": 2.1178231239318848, |
| "learning_rate": 0.001, |
| "loss": 1.5834, |
| "step": 157700 |
| }, |
| { |
| "epoch": 51.00193923723336, |
| "grad_norm": 1.3759008646011353, |
| "learning_rate": 0.001, |
| "loss": 1.6042, |
| "step": 157800 |
| }, |
| { |
| "epoch": 51.03425985778927, |
| "grad_norm": 1.1606357097625732, |
| "learning_rate": 0.001, |
| "loss": 1.4657, |
| "step": 157900 |
| }, |
| { |
| "epoch": 51.06658047834519, |
| "grad_norm": 1.3527188301086426, |
| "learning_rate": 0.001, |
| "loss": 1.4664, |
| "step": 158000 |
| }, |
| { |
| "epoch": 51.0989010989011, |
| "grad_norm": 1.8263195753097534, |
| "learning_rate": 0.001, |
| "loss": 1.485, |
| "step": 158100 |
| }, |
| { |
| "epoch": 51.13122171945702, |
| "grad_norm": 2.2132692337036133, |
| "learning_rate": 0.001, |
| "loss": 1.4937, |
| "step": 158200 |
| }, |
| { |
| "epoch": 51.16354234001293, |
| "grad_norm": 1.8447751998901367, |
| "learning_rate": 0.001, |
| "loss": 1.4897, |
| "step": 158300 |
| }, |
| { |
| "epoch": 51.195862960568846, |
| "grad_norm": 1.335484266281128, |
| "learning_rate": 0.001, |
| "loss": 1.4808, |
| "step": 158400 |
| }, |
| { |
| "epoch": 51.22818358112476, |
| "grad_norm": 1.5198925733566284, |
| "learning_rate": 0.001, |
| "loss": 1.5121, |
| "step": 158500 |
| }, |
| { |
| "epoch": 51.260504201680675, |
| "grad_norm": 1.7248674631118774, |
| "learning_rate": 0.001, |
| "loss": 1.5077, |
| "step": 158600 |
| }, |
| { |
| "epoch": 51.29282482223659, |
| "grad_norm": 1.5273159742355347, |
| "learning_rate": 0.001, |
| "loss": 1.5231, |
| "step": 158700 |
| }, |
| { |
| "epoch": 51.325145442792504, |
| "grad_norm": 2.1961324214935303, |
| "learning_rate": 0.001, |
| "loss": 1.5029, |
| "step": 158800 |
| }, |
| { |
| "epoch": 51.35746606334842, |
| "grad_norm": 1.5844553709030151, |
| "learning_rate": 0.001, |
| "loss": 1.5136, |
| "step": 158900 |
| }, |
| { |
| "epoch": 51.38978668390433, |
| "grad_norm": 1.2531520128250122, |
| "learning_rate": 0.001, |
| "loss": 1.5301, |
| "step": 159000 |
| }, |
| { |
| "epoch": 51.42210730446025, |
| "grad_norm": 1.56679368019104, |
| "learning_rate": 0.001, |
| "loss": 1.5119, |
| "step": 159100 |
| }, |
| { |
| "epoch": 51.45442792501616, |
| "grad_norm": 1.5963170528411865, |
| "learning_rate": 0.001, |
| "loss": 1.5456, |
| "step": 159200 |
| }, |
| { |
| "epoch": 51.48674854557208, |
| "grad_norm": 1.8117672204971313, |
| "learning_rate": 0.001, |
| "loss": 1.536, |
| "step": 159300 |
| }, |
| { |
| "epoch": 51.51906916612799, |
| "grad_norm": 1.429930567741394, |
| "learning_rate": 0.001, |
| "loss": 1.5189, |
| "step": 159400 |
| }, |
| { |
| "epoch": 51.551389786683906, |
| "grad_norm": 2.074106216430664, |
| "learning_rate": 0.001, |
| "loss": 1.5547, |
| "step": 159500 |
| }, |
| { |
| "epoch": 51.58371040723982, |
| "grad_norm": 1.420963168144226, |
| "learning_rate": 0.001, |
| "loss": 1.5466, |
| "step": 159600 |
| }, |
| { |
| "epoch": 51.616031027795735, |
| "grad_norm": 1.265044093132019, |
| "learning_rate": 0.001, |
| "loss": 1.5397, |
| "step": 159700 |
| }, |
| { |
| "epoch": 51.64835164835165, |
| "grad_norm": 1.8755286931991577, |
| "learning_rate": 0.001, |
| "loss": 1.5581, |
| "step": 159800 |
| }, |
| { |
| "epoch": 51.680672268907564, |
| "grad_norm": 1.8416346311569214, |
| "learning_rate": 0.001, |
| "loss": 1.5525, |
| "step": 159900 |
| }, |
| { |
| "epoch": 51.71299288946348, |
| "grad_norm": 1.696976900100708, |
| "learning_rate": 0.001, |
| "loss": 1.5643, |
| "step": 160000 |
| }, |
| { |
| "epoch": 51.74531351001939, |
| "grad_norm": 1.5795621871948242, |
| "learning_rate": 0.001, |
| "loss": 1.561, |
| "step": 160100 |
| }, |
| { |
| "epoch": 51.77763413057531, |
| "grad_norm": 1.3417747020721436, |
| "learning_rate": 0.001, |
| "loss": 1.5681, |
| "step": 160200 |
| }, |
| { |
| "epoch": 51.80995475113122, |
| "grad_norm": 1.4509358406066895, |
| "learning_rate": 0.001, |
| "loss": 1.5706, |
| "step": 160300 |
| }, |
| { |
| "epoch": 51.84227537168714, |
| "grad_norm": 1.5150309801101685, |
| "learning_rate": 0.001, |
| "loss": 1.561, |
| "step": 160400 |
| }, |
| { |
| "epoch": 51.87459599224305, |
| "grad_norm": 1.656905174255371, |
| "learning_rate": 0.001, |
| "loss": 1.57, |
| "step": 160500 |
| }, |
| { |
| "epoch": 51.906916612798966, |
| "grad_norm": 1.7837170362472534, |
| "learning_rate": 0.001, |
| "loss": 1.5764, |
| "step": 160600 |
| }, |
| { |
| "epoch": 51.93923723335488, |
| "grad_norm": 1.3933733701705933, |
| "learning_rate": 0.001, |
| "loss": 1.5688, |
| "step": 160700 |
| }, |
| { |
| "epoch": 51.971557853910795, |
| "grad_norm": 1.3882452249526978, |
| "learning_rate": 0.001, |
| "loss": 1.5767, |
| "step": 160800 |
| }, |
| { |
| "epoch": 52.00387847446671, |
| "grad_norm": 1.3166024684906006, |
| "learning_rate": 0.001, |
| "loss": 1.5777, |
| "step": 160900 |
| }, |
| { |
| "epoch": 52.036199095022624, |
| "grad_norm": 1.8539063930511475, |
| "learning_rate": 0.001, |
| "loss": 1.4721, |
| "step": 161000 |
| }, |
| { |
| "epoch": 52.06851971557854, |
| "grad_norm": 1.6465736627578735, |
| "learning_rate": 0.001, |
| "loss": 1.4729, |
| "step": 161100 |
| }, |
| { |
| "epoch": 52.10084033613445, |
| "grad_norm": 1.6113590002059937, |
| "learning_rate": 0.001, |
| "loss": 1.4844, |
| "step": 161200 |
| }, |
| { |
| "epoch": 52.13316095669037, |
| "grad_norm": 1.2973045110702515, |
| "learning_rate": 0.001, |
| "loss": 1.4766, |
| "step": 161300 |
| }, |
| { |
| "epoch": 52.16548157724628, |
| "grad_norm": 1.8479392528533936, |
| "learning_rate": 0.001, |
| "loss": 1.4733, |
| "step": 161400 |
| }, |
| { |
| "epoch": 52.1978021978022, |
| "grad_norm": 2.0937154293060303, |
| "learning_rate": 0.001, |
| "loss": 1.4808, |
| "step": 161500 |
| }, |
| { |
| "epoch": 52.23012281835811, |
| "grad_norm": 1.845787525177002, |
| "learning_rate": 0.001, |
| "loss": 1.4955, |
| "step": 161600 |
| }, |
| { |
| "epoch": 52.262443438914026, |
| "grad_norm": 1.343607783317566, |
| "learning_rate": 0.001, |
| "loss": 1.4973, |
| "step": 161700 |
| }, |
| { |
| "epoch": 52.29476405946994, |
| "grad_norm": 1.5876832008361816, |
| "learning_rate": 0.001, |
| "loss": 1.4992, |
| "step": 161800 |
| }, |
| { |
| "epoch": 52.327084680025855, |
| "grad_norm": 1.8784267902374268, |
| "learning_rate": 0.001, |
| "loss": 1.4917, |
| "step": 161900 |
| }, |
| { |
| "epoch": 52.35940530058177, |
| "grad_norm": 1.451918601989746, |
| "learning_rate": 0.001, |
| "loss": 1.5066, |
| "step": 162000 |
| }, |
| { |
| "epoch": 52.391725921137684, |
| "grad_norm": 1.790123701095581, |
| "learning_rate": 0.001, |
| "loss": 1.5211, |
| "step": 162100 |
| }, |
| { |
| "epoch": 52.4240465416936, |
| "grad_norm": 1.447482943534851, |
| "learning_rate": 0.001, |
| "loss": 1.521, |
| "step": 162200 |
| }, |
| { |
| "epoch": 52.456367162249514, |
| "grad_norm": 1.4804128408432007, |
| "learning_rate": 0.001, |
| "loss": 1.5163, |
| "step": 162300 |
| }, |
| { |
| "epoch": 52.48868778280543, |
| "grad_norm": 1.4562474489212036, |
| "learning_rate": 0.001, |
| "loss": 1.5279, |
| "step": 162400 |
| }, |
| { |
| "epoch": 52.52100840336134, |
| "grad_norm": 2.2251856327056885, |
| "learning_rate": 0.001, |
| "loss": 1.5263, |
| "step": 162500 |
| }, |
| { |
| "epoch": 52.55332902391726, |
| "grad_norm": 1.5162007808685303, |
| "learning_rate": 0.001, |
| "loss": 1.5191, |
| "step": 162600 |
| }, |
| { |
| "epoch": 52.58564964447317, |
| "grad_norm": 1.2665784358978271, |
| "learning_rate": 0.001, |
| "loss": 1.5238, |
| "step": 162700 |
| }, |
| { |
| "epoch": 52.617970265029086, |
| "grad_norm": 1.884289264678955, |
| "learning_rate": 0.001, |
| "loss": 1.5426, |
| "step": 162800 |
| }, |
| { |
| "epoch": 52.650290885585, |
| "grad_norm": 1.6486436128616333, |
| "learning_rate": 0.001, |
| "loss": 1.5336, |
| "step": 162900 |
| }, |
| { |
| "epoch": 52.682611506140915, |
| "grad_norm": 1.4439828395843506, |
| "learning_rate": 0.001, |
| "loss": 1.5398, |
| "step": 163000 |
| }, |
| { |
| "epoch": 52.71493212669683, |
| "grad_norm": 1.2459328174591064, |
| "learning_rate": 0.001, |
| "loss": 1.5498, |
| "step": 163100 |
| }, |
| { |
| "epoch": 52.747252747252745, |
| "grad_norm": 1.3897027969360352, |
| "learning_rate": 0.001, |
| "loss": 1.5223, |
| "step": 163200 |
| }, |
| { |
| "epoch": 52.77957336780866, |
| "grad_norm": 1.3424687385559082, |
| "learning_rate": 0.001, |
| "loss": 1.529, |
| "step": 163300 |
| }, |
| { |
| "epoch": 52.811893988364574, |
| "grad_norm": 1.7006032466888428, |
| "learning_rate": 0.001, |
| "loss": 1.5353, |
| "step": 163400 |
| }, |
| { |
| "epoch": 52.84421460892049, |
| "grad_norm": 1.9055922031402588, |
| "learning_rate": 0.001, |
| "loss": 1.5481, |
| "step": 163500 |
| }, |
| { |
| "epoch": 52.8765352294764, |
| "grad_norm": 1.2229498624801636, |
| "learning_rate": 0.001, |
| "loss": 1.5771, |
| "step": 163600 |
| }, |
| { |
| "epoch": 52.90885585003232, |
| "grad_norm": 1.1328636407852173, |
| "learning_rate": 0.001, |
| "loss": 1.5554, |
| "step": 163700 |
| }, |
| { |
| "epoch": 52.94117647058823, |
| "grad_norm": 1.1441473960876465, |
| "learning_rate": 0.001, |
| "loss": 1.5656, |
| "step": 163800 |
| }, |
| { |
| "epoch": 52.97349709114415, |
| "grad_norm": 1.0366441011428833, |
| "learning_rate": 0.001, |
| "loss": 1.5629, |
| "step": 163900 |
| }, |
| { |
| "epoch": 53.00581771170007, |
| "grad_norm": 1.6409794092178345, |
| "learning_rate": 0.001, |
| "loss": 1.5575, |
| "step": 164000 |
| }, |
| { |
| "epoch": 53.03813833225598, |
| "grad_norm": 1.6734297275543213, |
| "learning_rate": 0.001, |
| "loss": 1.46, |
| "step": 164100 |
| }, |
| { |
| "epoch": 53.0704589528119, |
| "grad_norm": 1.6403851509094238, |
| "learning_rate": 0.001, |
| "loss": 1.4606, |
| "step": 164200 |
| }, |
| { |
| "epoch": 53.10277957336781, |
| "grad_norm": 1.1308412551879883, |
| "learning_rate": 0.001, |
| "loss": 1.4333, |
| "step": 164300 |
| }, |
| { |
| "epoch": 53.135100193923726, |
| "grad_norm": 1.4857252836227417, |
| "learning_rate": 0.001, |
| "loss": 1.4513, |
| "step": 164400 |
| }, |
| { |
| "epoch": 53.16742081447964, |
| "grad_norm": 1.3973990678787231, |
| "learning_rate": 0.001, |
| "loss": 1.4633, |
| "step": 164500 |
| }, |
| { |
| "epoch": 53.199741435035556, |
| "grad_norm": 1.6644614934921265, |
| "learning_rate": 0.001, |
| "loss": 1.4753, |
| "step": 164600 |
| }, |
| { |
| "epoch": 53.23206205559147, |
| "grad_norm": 1.3670461177825928, |
| "learning_rate": 0.001, |
| "loss": 1.4784, |
| "step": 164700 |
| }, |
| { |
| "epoch": 53.264382676147385, |
| "grad_norm": 1.745236873626709, |
| "learning_rate": 0.001, |
| "loss": 1.4781, |
| "step": 164800 |
| }, |
| { |
| "epoch": 53.2967032967033, |
| "grad_norm": 1.3107473850250244, |
| "learning_rate": 0.001, |
| "loss": 1.4922, |
| "step": 164900 |
| }, |
| { |
| "epoch": 53.329023917259214, |
| "grad_norm": 1.3133132457733154, |
| "learning_rate": 0.001, |
| "loss": 1.4846, |
| "step": 165000 |
| }, |
| { |
| "epoch": 53.36134453781513, |
| "grad_norm": 1.6218743324279785, |
| "learning_rate": 0.001, |
| "loss": 1.4851, |
| "step": 165100 |
| }, |
| { |
| "epoch": 53.39366515837104, |
| "grad_norm": 1.9341623783111572, |
| "learning_rate": 0.001, |
| "loss": 1.5117, |
| "step": 165200 |
| }, |
| { |
| "epoch": 53.42598577892696, |
| "grad_norm": 2.1856162548065186, |
| "learning_rate": 0.001, |
| "loss": 1.4999, |
| "step": 165300 |
| }, |
| { |
| "epoch": 53.45830639948287, |
| "grad_norm": 1.6803065538406372, |
| "learning_rate": 0.001, |
| "loss": 1.5102, |
| "step": 165400 |
| }, |
| { |
| "epoch": 53.49062702003879, |
| "grad_norm": 1.2509381771087646, |
| "learning_rate": 0.001, |
| "loss": 1.5022, |
| "step": 165500 |
| }, |
| { |
| "epoch": 53.5229476405947, |
| "grad_norm": 1.7496399879455566, |
| "learning_rate": 0.001, |
| "loss": 1.5154, |
| "step": 165600 |
| }, |
| { |
| "epoch": 53.555268261150616, |
| "grad_norm": 1.5907295942306519, |
| "learning_rate": 0.001, |
| "loss": 1.5101, |
| "step": 165700 |
| }, |
| { |
| "epoch": 53.58758888170653, |
| "grad_norm": 1.764585256576538, |
| "learning_rate": 0.001, |
| "loss": 1.4961, |
| "step": 165800 |
| }, |
| { |
| "epoch": 53.619909502262445, |
| "grad_norm": 1.676477074623108, |
| "learning_rate": 0.001, |
| "loss": 1.5318, |
| "step": 165900 |
| }, |
| { |
| "epoch": 53.65223012281836, |
| "grad_norm": 1.4604623317718506, |
| "learning_rate": 0.001, |
| "loss": 1.5397, |
| "step": 166000 |
| }, |
| { |
| "epoch": 53.684550743374274, |
| "grad_norm": 1.1267753839492798, |
| "learning_rate": 0.001, |
| "loss": 1.5256, |
| "step": 166100 |
| }, |
| { |
| "epoch": 53.71687136393019, |
| "grad_norm": 1.5615739822387695, |
| "learning_rate": 0.001, |
| "loss": 1.5222, |
| "step": 166200 |
| }, |
| { |
| "epoch": 53.7491919844861, |
| "grad_norm": 1.6060326099395752, |
| "learning_rate": 0.001, |
| "loss": 1.5438, |
| "step": 166300 |
| }, |
| { |
| "epoch": 53.78151260504202, |
| "grad_norm": 1.4747138023376465, |
| "learning_rate": 0.001, |
| "loss": 1.5469, |
| "step": 166400 |
| }, |
| { |
| "epoch": 53.81383322559793, |
| "grad_norm": 1.2557308673858643, |
| "learning_rate": 0.001, |
| "loss": 1.5408, |
| "step": 166500 |
| }, |
| { |
| "epoch": 53.84615384615385, |
| "grad_norm": 1.4080893993377686, |
| "learning_rate": 0.001, |
| "loss": 1.5441, |
| "step": 166600 |
| }, |
| { |
| "epoch": 53.87847446670976, |
| "grad_norm": 1.555981159210205, |
| "learning_rate": 0.001, |
| "loss": 1.5635, |
| "step": 166700 |
| }, |
| { |
| "epoch": 53.910795087265676, |
| "grad_norm": 1.2787331342697144, |
| "learning_rate": 0.001, |
| "loss": 1.5212, |
| "step": 166800 |
| }, |
| { |
| "epoch": 53.94311570782159, |
| "grad_norm": 1.1718072891235352, |
| "learning_rate": 0.001, |
| "loss": 1.5554, |
| "step": 166900 |
| }, |
| { |
| "epoch": 53.975436328377505, |
| "grad_norm": 1.5635219812393188, |
| "learning_rate": 0.001, |
| "loss": 1.5665, |
| "step": 167000 |
| }, |
| { |
| "epoch": 54.00775694893342, |
| "grad_norm": 1.2549020051956177, |
| "learning_rate": 0.001, |
| "loss": 1.5393, |
| "step": 167100 |
| }, |
| { |
| "epoch": 54.040077569489334, |
| "grad_norm": 1.4312152862548828, |
| "learning_rate": 0.001, |
| "loss": 1.43, |
| "step": 167200 |
| }, |
| { |
| "epoch": 54.07239819004525, |
| "grad_norm": 1.757952332496643, |
| "learning_rate": 0.001, |
| "loss": 1.439, |
| "step": 167300 |
| }, |
| { |
| "epoch": 54.10471881060116, |
| "grad_norm": 1.4769724607467651, |
| "learning_rate": 0.001, |
| "loss": 1.4521, |
| "step": 167400 |
| }, |
| { |
| "epoch": 54.13703943115708, |
| "grad_norm": 1.9982845783233643, |
| "learning_rate": 0.001, |
| "loss": 1.4369, |
| "step": 167500 |
| }, |
| { |
| "epoch": 54.16936005171299, |
| "grad_norm": 1.7478052377700806, |
| "learning_rate": 0.001, |
| "loss": 1.4519, |
| "step": 167600 |
| }, |
| { |
| "epoch": 54.20168067226891, |
| "grad_norm": 1.38131844997406, |
| "learning_rate": 0.001, |
| "loss": 1.4713, |
| "step": 167700 |
| }, |
| { |
| "epoch": 54.23400129282482, |
| "grad_norm": 1.747186303138733, |
| "learning_rate": 0.001, |
| "loss": 1.4481, |
| "step": 167800 |
| }, |
| { |
| "epoch": 54.266321913380736, |
| "grad_norm": 1.3846583366394043, |
| "learning_rate": 0.001, |
| "loss": 1.4738, |
| "step": 167900 |
| }, |
| { |
| "epoch": 54.29864253393665, |
| "grad_norm": 1.2487694025039673, |
| "learning_rate": 0.001, |
| "loss": 1.5031, |
| "step": 168000 |
| }, |
| { |
| "epoch": 54.330963154492565, |
| "grad_norm": 1.5017812252044678, |
| "learning_rate": 0.001, |
| "loss": 1.4877, |
| "step": 168100 |
| }, |
| { |
| "epoch": 54.36328377504848, |
| "grad_norm": 1.3167939186096191, |
| "learning_rate": 0.001, |
| "loss": 1.4894, |
| "step": 168200 |
| }, |
| { |
| "epoch": 54.395604395604394, |
| "grad_norm": 1.418789267539978, |
| "learning_rate": 0.001, |
| "loss": 1.4917, |
| "step": 168300 |
| }, |
| { |
| "epoch": 54.42792501616031, |
| "grad_norm": 1.5393143892288208, |
| "learning_rate": 0.001, |
| "loss": 1.4844, |
| "step": 168400 |
| }, |
| { |
| "epoch": 54.46024563671622, |
| "grad_norm": 1.560189127922058, |
| "learning_rate": 0.001, |
| "loss": 1.4849, |
| "step": 168500 |
| }, |
| { |
| "epoch": 54.49256625727214, |
| "grad_norm": 1.949209213256836, |
| "learning_rate": 0.001, |
| "loss": 1.5023, |
| "step": 168600 |
| }, |
| { |
| "epoch": 54.52488687782805, |
| "grad_norm": 1.287278175354004, |
| "learning_rate": 0.001, |
| "loss": 1.5138, |
| "step": 168700 |
| }, |
| { |
| "epoch": 54.55720749838397, |
| "grad_norm": 1.3032569885253906, |
| "learning_rate": 0.001, |
| "loss": 1.4824, |
| "step": 168800 |
| }, |
| { |
| "epoch": 54.58952811893988, |
| "grad_norm": 1.5704611539840698, |
| "learning_rate": 0.001, |
| "loss": 1.4988, |
| "step": 168900 |
| }, |
| { |
| "epoch": 54.621848739495796, |
| "grad_norm": 1.5014654397964478, |
| "learning_rate": 0.001, |
| "loss": 1.5112, |
| "step": 169000 |
| }, |
| { |
| "epoch": 54.65416936005171, |
| "grad_norm": 1.3570882081985474, |
| "learning_rate": 0.001, |
| "loss": 1.5177, |
| "step": 169100 |
| }, |
| { |
| "epoch": 54.686489980607625, |
| "grad_norm": 1.2611489295959473, |
| "learning_rate": 0.001, |
| "loss": 1.525, |
| "step": 169200 |
| }, |
| { |
| "epoch": 54.71881060116354, |
| "grad_norm": 1.2757374048233032, |
| "learning_rate": 0.001, |
| "loss": 1.5313, |
| "step": 169300 |
| }, |
| { |
| "epoch": 54.751131221719454, |
| "grad_norm": 1.571677327156067, |
| "learning_rate": 0.001, |
| "loss": 1.5313, |
| "step": 169400 |
| }, |
| { |
| "epoch": 54.78345184227537, |
| "grad_norm": 1.2518600225448608, |
| "learning_rate": 0.001, |
| "loss": 1.5307, |
| "step": 169500 |
| }, |
| { |
| "epoch": 54.81577246283128, |
| "grad_norm": 1.4769446849822998, |
| "learning_rate": 0.001, |
| "loss": 1.5224, |
| "step": 169600 |
| }, |
| { |
| "epoch": 54.8480930833872, |
| "grad_norm": 1.2939919233322144, |
| "learning_rate": 0.001, |
| "loss": 1.5528, |
| "step": 169700 |
| }, |
| { |
| "epoch": 54.88041370394311, |
| "grad_norm": 1.5968825817108154, |
| "learning_rate": 0.001, |
| "loss": 1.5279, |
| "step": 169800 |
| }, |
| { |
| "epoch": 54.91273432449903, |
| "grad_norm": 1.4943368434906006, |
| "learning_rate": 0.001, |
| "loss": 1.5162, |
| "step": 169900 |
| }, |
| { |
| "epoch": 54.94505494505494, |
| "grad_norm": 1.4535393714904785, |
| "learning_rate": 0.001, |
| "loss": 1.5206, |
| "step": 170000 |
| }, |
| { |
| "epoch": 54.977375565610856, |
| "grad_norm": 1.3544466495513916, |
| "learning_rate": 0.001, |
| "loss": 1.5482, |
| "step": 170100 |
| }, |
| { |
| "epoch": 55.00969618616678, |
| "grad_norm": 1.2927088737487793, |
| "learning_rate": 0.001, |
| "loss": 1.4979, |
| "step": 170200 |
| }, |
| { |
| "epoch": 55.04201680672269, |
| "grad_norm": 1.5981316566467285, |
| "learning_rate": 0.001, |
| "loss": 1.4178, |
| "step": 170300 |
| }, |
| { |
| "epoch": 55.07433742727861, |
| "grad_norm": 1.2203686237335205, |
| "learning_rate": 0.001, |
| "loss": 1.4376, |
| "step": 170400 |
| }, |
| { |
| "epoch": 55.10665804783452, |
| "grad_norm": 1.7450850009918213, |
| "learning_rate": 0.001, |
| "loss": 1.4392, |
| "step": 170500 |
| }, |
| { |
| "epoch": 55.138978668390436, |
| "grad_norm": 1.4528957605361938, |
| "learning_rate": 0.001, |
| "loss": 1.4457, |
| "step": 170600 |
| }, |
| { |
| "epoch": 55.17129928894635, |
| "grad_norm": 1.6601139307022095, |
| "learning_rate": 0.001, |
| "loss": 1.4468, |
| "step": 170700 |
| }, |
| { |
| "epoch": 55.203619909502265, |
| "grad_norm": 1.4350769519805908, |
| "learning_rate": 0.001, |
| "loss": 1.4602, |
| "step": 170800 |
| }, |
| { |
| "epoch": 55.23594053005818, |
| "grad_norm": 1.992567777633667, |
| "learning_rate": 0.001, |
| "loss": 1.4495, |
| "step": 170900 |
| }, |
| { |
| "epoch": 55.268261150614094, |
| "grad_norm": 1.6500948667526245, |
| "learning_rate": 0.001, |
| "loss": 1.4729, |
| "step": 171000 |
| }, |
| { |
| "epoch": 55.30058177117001, |
| "grad_norm": 1.863330364227295, |
| "learning_rate": 0.001, |
| "loss": 1.4435, |
| "step": 171100 |
| }, |
| { |
| "epoch": 55.33290239172592, |
| "grad_norm": 1.3665803670883179, |
| "learning_rate": 0.001, |
| "loss": 1.4696, |
| "step": 171200 |
| }, |
| { |
| "epoch": 55.36522301228184, |
| "grad_norm": 1.5130021572113037, |
| "learning_rate": 0.001, |
| "loss": 1.4687, |
| "step": 171300 |
| }, |
| { |
| "epoch": 55.39754363283775, |
| "grad_norm": 1.8200185298919678, |
| "learning_rate": 0.001, |
| "loss": 1.4711, |
| "step": 171400 |
| }, |
| { |
| "epoch": 55.42986425339367, |
| "grad_norm": 1.1587629318237305, |
| "learning_rate": 0.001, |
| "loss": 1.4784, |
| "step": 171500 |
| }, |
| { |
| "epoch": 55.46218487394958, |
| "grad_norm": 1.3702373504638672, |
| "learning_rate": 0.001, |
| "loss": 1.4809, |
| "step": 171600 |
| }, |
| { |
| "epoch": 55.494505494505496, |
| "grad_norm": 1.5741314888000488, |
| "learning_rate": 0.001, |
| "loss": 1.4934, |
| "step": 171700 |
| }, |
| { |
| "epoch": 55.52682611506141, |
| "grad_norm": 1.3632556200027466, |
| "learning_rate": 0.001, |
| "loss": 1.4945, |
| "step": 171800 |
| }, |
| { |
| "epoch": 55.559146735617325, |
| "grad_norm": 1.5309301614761353, |
| "learning_rate": 0.001, |
| "loss": 1.489, |
| "step": 171900 |
| }, |
| { |
| "epoch": 55.59146735617324, |
| "grad_norm": 1.448372721672058, |
| "learning_rate": 0.001, |
| "loss": 1.4978, |
| "step": 172000 |
| }, |
| { |
| "epoch": 55.623787976729155, |
| "grad_norm": 1.7779254913330078, |
| "learning_rate": 0.001, |
| "loss": 1.5016, |
| "step": 172100 |
| }, |
| { |
| "epoch": 55.65610859728507, |
| "grad_norm": 2.014350652694702, |
| "learning_rate": 0.001, |
| "loss": 1.4941, |
| "step": 172200 |
| }, |
| { |
| "epoch": 55.688429217840984, |
| "grad_norm": 1.3058133125305176, |
| "learning_rate": 0.001, |
| "loss": 1.5198, |
| "step": 172300 |
| }, |
| { |
| "epoch": 55.7207498383969, |
| "grad_norm": 1.4762943983078003, |
| "learning_rate": 0.001, |
| "loss": 1.4958, |
| "step": 172400 |
| }, |
| { |
| "epoch": 55.75307045895281, |
| "grad_norm": 1.649828314781189, |
| "learning_rate": 0.001, |
| "loss": 1.5166, |
| "step": 172500 |
| }, |
| { |
| "epoch": 55.78539107950873, |
| "grad_norm": 2.230229616165161, |
| "learning_rate": 0.001, |
| "loss": 1.4897, |
| "step": 172600 |
| }, |
| { |
| "epoch": 55.81771170006464, |
| "grad_norm": 1.9107069969177246, |
| "learning_rate": 0.001, |
| "loss": 1.5423, |
| "step": 172700 |
| }, |
| { |
| "epoch": 55.85003232062056, |
| "grad_norm": 1.5066547393798828, |
| "learning_rate": 0.001, |
| "loss": 1.5171, |
| "step": 172800 |
| }, |
| { |
| "epoch": 55.88235294117647, |
| "grad_norm": 1.2946594953536987, |
| "learning_rate": 0.001, |
| "loss": 1.5258, |
| "step": 172900 |
| }, |
| { |
| "epoch": 55.914673561732386, |
| "grad_norm": 1.9029631614685059, |
| "learning_rate": 0.001, |
| "loss": 1.5255, |
| "step": 173000 |
| }, |
| { |
| "epoch": 55.9469941822883, |
| "grad_norm": 1.4923566579818726, |
| "learning_rate": 0.001, |
| "loss": 1.5301, |
| "step": 173100 |
| }, |
| { |
| "epoch": 55.979314802844215, |
| "grad_norm": 1.3440992832183838, |
| "learning_rate": 0.001, |
| "loss": 1.5311, |
| "step": 173200 |
| }, |
| { |
| "epoch": 56.01163542340013, |
| "grad_norm": 1.6094200611114502, |
| "learning_rate": 0.001, |
| "loss": 1.486, |
| "step": 173300 |
| }, |
| { |
| "epoch": 56.043956043956044, |
| "grad_norm": 1.6978893280029297, |
| "learning_rate": 0.001, |
| "loss": 1.4141, |
| "step": 173400 |
| }, |
| { |
| "epoch": 56.07627666451196, |
| "grad_norm": 1.399122714996338, |
| "learning_rate": 0.001, |
| "loss": 1.4154, |
| "step": 173500 |
| }, |
| { |
| "epoch": 56.10859728506787, |
| "grad_norm": 1.4120837450027466, |
| "learning_rate": 0.001, |
| "loss": 1.4248, |
| "step": 173600 |
| }, |
| { |
| "epoch": 56.14091790562379, |
| "grad_norm": 1.6494534015655518, |
| "learning_rate": 0.001, |
| "loss": 1.4429, |
| "step": 173700 |
| }, |
| { |
| "epoch": 56.1732385261797, |
| "grad_norm": 1.2671856880187988, |
| "learning_rate": 0.001, |
| "loss": 1.425, |
| "step": 173800 |
| }, |
| { |
| "epoch": 56.20555914673562, |
| "grad_norm": 1.5630028247833252, |
| "learning_rate": 0.001, |
| "loss": 1.4297, |
| "step": 173900 |
| }, |
| { |
| "epoch": 56.23787976729153, |
| "grad_norm": 1.9019289016723633, |
| "learning_rate": 0.001, |
| "loss": 1.4784, |
| "step": 174000 |
| }, |
| { |
| "epoch": 56.270200387847446, |
| "grad_norm": 1.5625338554382324, |
| "learning_rate": 0.001, |
| "loss": 1.464, |
| "step": 174100 |
| }, |
| { |
| "epoch": 56.30252100840336, |
| "grad_norm": 1.8091297149658203, |
| "learning_rate": 0.001, |
| "loss": 1.4641, |
| "step": 174200 |
| }, |
| { |
| "epoch": 56.334841628959275, |
| "grad_norm": 1.6739461421966553, |
| "learning_rate": 0.001, |
| "loss": 1.4426, |
| "step": 174300 |
| }, |
| { |
| "epoch": 56.36716224951519, |
| "grad_norm": 1.7141815423965454, |
| "learning_rate": 0.001, |
| "loss": 1.4348, |
| "step": 174400 |
| }, |
| { |
| "epoch": 56.399482870071104, |
| "grad_norm": 1.2906224727630615, |
| "learning_rate": 0.001, |
| "loss": 1.4667, |
| "step": 174500 |
| }, |
| { |
| "epoch": 56.43180349062702, |
| "grad_norm": 1.4699000120162964, |
| "learning_rate": 0.001, |
| "loss": 1.4707, |
| "step": 174600 |
| }, |
| { |
| "epoch": 56.46412411118293, |
| "grad_norm": 1.8251467943191528, |
| "learning_rate": 0.001, |
| "loss": 1.4793, |
| "step": 174700 |
| }, |
| { |
| "epoch": 56.49644473173885, |
| "grad_norm": 1.5201164484024048, |
| "learning_rate": 0.001, |
| "loss": 1.4912, |
| "step": 174800 |
| }, |
| { |
| "epoch": 56.52876535229476, |
| "grad_norm": 1.3517299890518188, |
| "learning_rate": 0.001, |
| "loss": 1.488, |
| "step": 174900 |
| }, |
| { |
| "epoch": 56.56108597285068, |
| "grad_norm": 1.4030410051345825, |
| "learning_rate": 0.001, |
| "loss": 1.4762, |
| "step": 175000 |
| }, |
| { |
| "epoch": 56.59340659340659, |
| "grad_norm": 1.5813250541687012, |
| "learning_rate": 0.001, |
| "loss": 1.4818, |
| "step": 175100 |
| }, |
| { |
| "epoch": 56.625727213962506, |
| "grad_norm": 1.785956859588623, |
| "learning_rate": 0.001, |
| "loss": 1.4726, |
| "step": 175200 |
| }, |
| { |
| "epoch": 56.65804783451842, |
| "grad_norm": 1.752875566482544, |
| "learning_rate": 0.001, |
| "loss": 1.4817, |
| "step": 175300 |
| }, |
| { |
| "epoch": 56.690368455074335, |
| "grad_norm": 1.5739023685455322, |
| "learning_rate": 0.001, |
| "loss": 1.4835, |
| "step": 175400 |
| }, |
| { |
| "epoch": 56.72268907563025, |
| "grad_norm": 1.4878536462783813, |
| "learning_rate": 0.001, |
| "loss": 1.4908, |
| "step": 175500 |
| }, |
| { |
| "epoch": 56.755009696186164, |
| "grad_norm": 1.5592995882034302, |
| "learning_rate": 0.001, |
| "loss": 1.5006, |
| "step": 175600 |
| }, |
| { |
| "epoch": 56.78733031674208, |
| "grad_norm": 1.7241151332855225, |
| "learning_rate": 0.001, |
| "loss": 1.4934, |
| "step": 175700 |
| }, |
| { |
| "epoch": 56.81965093729799, |
| "grad_norm": 1.491036295890808, |
| "learning_rate": 0.001, |
| "loss": 1.5126, |
| "step": 175800 |
| }, |
| { |
| "epoch": 56.85197155785391, |
| "grad_norm": 1.9235769510269165, |
| "learning_rate": 0.001, |
| "loss": 1.5234, |
| "step": 175900 |
| }, |
| { |
| "epoch": 56.88429217840982, |
| "grad_norm": 1.616104245185852, |
| "learning_rate": 0.001, |
| "loss": 1.5355, |
| "step": 176000 |
| }, |
| { |
| "epoch": 56.91661279896574, |
| "grad_norm": 1.8652502298355103, |
| "learning_rate": 0.001, |
| "loss": 1.5193, |
| "step": 176100 |
| }, |
| { |
| "epoch": 56.94893341952165, |
| "grad_norm": 1.5958667993545532, |
| "learning_rate": 0.001, |
| "loss": 1.5206, |
| "step": 176200 |
| }, |
| { |
| "epoch": 56.981254040077566, |
| "grad_norm": 1.3792390823364258, |
| "learning_rate": 0.001, |
| "loss": 1.5136, |
| "step": 176300 |
| }, |
| { |
| "epoch": 57.01357466063349, |
| "grad_norm": 1.4925225973129272, |
| "learning_rate": 0.001, |
| "loss": 1.4569, |
| "step": 176400 |
| }, |
| { |
| "epoch": 57.0458952811894, |
| "grad_norm": 1.5995019674301147, |
| "learning_rate": 0.001, |
| "loss": 1.414, |
| "step": 176500 |
| }, |
| { |
| "epoch": 57.07821590174532, |
| "grad_norm": 1.842695951461792, |
| "learning_rate": 0.001, |
| "loss": 1.4157, |
| "step": 176600 |
| }, |
| { |
| "epoch": 57.11053652230123, |
| "grad_norm": 2.0299296379089355, |
| "learning_rate": 0.001, |
| "loss": 1.4066, |
| "step": 176700 |
| }, |
| { |
| "epoch": 57.142857142857146, |
| "grad_norm": 1.4141572713851929, |
| "learning_rate": 0.001, |
| "loss": 1.4208, |
| "step": 176800 |
| }, |
| { |
| "epoch": 57.17517776341306, |
| "grad_norm": 1.6466530561447144, |
| "learning_rate": 0.001, |
| "loss": 1.4173, |
| "step": 176900 |
| }, |
| { |
| "epoch": 57.207498383968975, |
| "grad_norm": 1.8495573997497559, |
| "learning_rate": 0.001, |
| "loss": 1.4368, |
| "step": 177000 |
| }, |
| { |
| "epoch": 57.23981900452489, |
| "grad_norm": 1.8739982843399048, |
| "learning_rate": 0.001, |
| "loss": 1.4374, |
| "step": 177100 |
| }, |
| { |
| "epoch": 57.272139625080804, |
| "grad_norm": 1.5880706310272217, |
| "learning_rate": 0.001, |
| "loss": 1.4305, |
| "step": 177200 |
| }, |
| { |
| "epoch": 57.30446024563672, |
| "grad_norm": 2.1057231426239014, |
| "learning_rate": 0.001, |
| "loss": 1.4469, |
| "step": 177300 |
| }, |
| { |
| "epoch": 57.33678086619263, |
| "grad_norm": 1.5921692848205566, |
| "learning_rate": 0.001, |
| "loss": 1.4301, |
| "step": 177400 |
| }, |
| { |
| "epoch": 57.36910148674855, |
| "grad_norm": 1.5669559240341187, |
| "learning_rate": 0.001, |
| "loss": 1.4547, |
| "step": 177500 |
| }, |
| { |
| "epoch": 57.40142210730446, |
| "grad_norm": 1.8174595832824707, |
| "learning_rate": 0.001, |
| "loss": 1.4568, |
| "step": 177600 |
| }, |
| { |
| "epoch": 57.43374272786038, |
| "grad_norm": 1.5995457172393799, |
| "learning_rate": 0.001, |
| "loss": 1.4631, |
| "step": 177700 |
| }, |
| { |
| "epoch": 57.46606334841629, |
| "grad_norm": 1.9321708679199219, |
| "learning_rate": 0.001, |
| "loss": 1.4577, |
| "step": 177800 |
| }, |
| { |
| "epoch": 57.498383968972206, |
| "grad_norm": 1.4424487352371216, |
| "learning_rate": 0.001, |
| "loss": 1.4615, |
| "step": 177900 |
| }, |
| { |
| "epoch": 57.53070458952812, |
| "grad_norm": 2.30383038520813, |
| "learning_rate": 0.001, |
| "loss": 1.4617, |
| "step": 178000 |
| }, |
| { |
| "epoch": 57.563025210084035, |
| "grad_norm": 1.7871912717819214, |
| "learning_rate": 0.001, |
| "loss": 1.4794, |
| "step": 178100 |
| }, |
| { |
| "epoch": 57.59534583063995, |
| "grad_norm": 1.3103487491607666, |
| "learning_rate": 0.001, |
| "loss": 1.4688, |
| "step": 178200 |
| }, |
| { |
| "epoch": 57.627666451195864, |
| "grad_norm": 1.822178602218628, |
| "learning_rate": 0.001, |
| "loss": 1.4942, |
| "step": 178300 |
| }, |
| { |
| "epoch": 57.65998707175178, |
| "grad_norm": 1.6625324487686157, |
| "learning_rate": 0.001, |
| "loss": 1.4696, |
| "step": 178400 |
| }, |
| { |
| "epoch": 57.69230769230769, |
| "grad_norm": 1.7712842226028442, |
| "learning_rate": 0.001, |
| "loss": 1.4974, |
| "step": 178500 |
| }, |
| { |
| "epoch": 57.72462831286361, |
| "grad_norm": 1.611838698387146, |
| "learning_rate": 0.001, |
| "loss": 1.5015, |
| "step": 178600 |
| }, |
| { |
| "epoch": 57.75694893341952, |
| "grad_norm": 1.9365757703781128, |
| "learning_rate": 0.001, |
| "loss": 1.4802, |
| "step": 178700 |
| }, |
| { |
| "epoch": 57.78926955397544, |
| "grad_norm": 1.673500657081604, |
| "learning_rate": 0.001, |
| "loss": 1.5027, |
| "step": 178800 |
| }, |
| { |
| "epoch": 57.82159017453135, |
| "grad_norm": 1.4796686172485352, |
| "learning_rate": 0.001, |
| "loss": 1.5001, |
| "step": 178900 |
| }, |
| { |
| "epoch": 57.853910795087266, |
| "grad_norm": 1.7874492406845093, |
| "learning_rate": 0.001, |
| "loss": 1.4892, |
| "step": 179000 |
| }, |
| { |
| "epoch": 57.88623141564318, |
| "grad_norm": 1.7060503959655762, |
| "learning_rate": 0.001, |
| "loss": 1.5039, |
| "step": 179100 |
| }, |
| { |
| "epoch": 57.918552036199095, |
| "grad_norm": 1.9481016397476196, |
| "learning_rate": 0.001, |
| "loss": 1.5026, |
| "step": 179200 |
| }, |
| { |
| "epoch": 57.95087265675501, |
| "grad_norm": 1.857441782951355, |
| "learning_rate": 0.001, |
| "loss": 1.5044, |
| "step": 179300 |
| }, |
| { |
| "epoch": 57.983193277310924, |
| "grad_norm": 1.7272157669067383, |
| "learning_rate": 0.001, |
| "loss": 1.5088, |
| "step": 179400 |
| }, |
| { |
| "epoch": 58.01551389786684, |
| "grad_norm": 1.9517948627471924, |
| "learning_rate": 0.001, |
| "loss": 1.4247, |
| "step": 179500 |
| }, |
| { |
| "epoch": 58.04783451842275, |
| "grad_norm": 1.648769736289978, |
| "learning_rate": 0.001, |
| "loss": 1.3887, |
| "step": 179600 |
| }, |
| { |
| "epoch": 58.08015513897867, |
| "grad_norm": 2.3552958965301514, |
| "learning_rate": 0.001, |
| "loss": 1.3846, |
| "step": 179700 |
| }, |
| { |
| "epoch": 58.11247575953458, |
| "grad_norm": 1.8240126371383667, |
| "learning_rate": 0.001, |
| "loss": 1.3953, |
| "step": 179800 |
| }, |
| { |
| "epoch": 58.1447963800905, |
| "grad_norm": 2.1630699634552, |
| "learning_rate": 0.001, |
| "loss": 1.4126, |
| "step": 179900 |
| }, |
| { |
| "epoch": 58.17711700064641, |
| "grad_norm": 1.6678168773651123, |
| "learning_rate": 0.001, |
| "loss": 1.4289, |
| "step": 180000 |
| }, |
| { |
| "epoch": 58.209437621202326, |
| "grad_norm": 1.5961699485778809, |
| "learning_rate": 0.001, |
| "loss": 1.4081, |
| "step": 180100 |
| }, |
| { |
| "epoch": 58.24175824175824, |
| "grad_norm": 2.3336539268493652, |
| "learning_rate": 0.001, |
| "loss": 1.4226, |
| "step": 180200 |
| }, |
| { |
| "epoch": 58.274078862314155, |
| "grad_norm": 2.197392463684082, |
| "learning_rate": 0.001, |
| "loss": 1.438, |
| "step": 180300 |
| }, |
| { |
| "epoch": 58.30639948287007, |
| "grad_norm": 2.0521631240844727, |
| "learning_rate": 0.001, |
| "loss": 1.4269, |
| "step": 180400 |
| }, |
| { |
| "epoch": 58.338720103425985, |
| "grad_norm": 1.9566603899002075, |
| "learning_rate": 0.001, |
| "loss": 1.4343, |
| "step": 180500 |
| }, |
| { |
| "epoch": 58.3710407239819, |
| "grad_norm": 2.1111741065979004, |
| "learning_rate": 0.001, |
| "loss": 1.4378, |
| "step": 180600 |
| }, |
| { |
| "epoch": 58.403361344537814, |
| "grad_norm": 2.358926296234131, |
| "learning_rate": 0.001, |
| "loss": 1.4458, |
| "step": 180700 |
| }, |
| { |
| "epoch": 58.43568196509373, |
| "grad_norm": 1.9776195287704468, |
| "learning_rate": 0.001, |
| "loss": 1.4596, |
| "step": 180800 |
| }, |
| { |
| "epoch": 58.46800258564964, |
| "grad_norm": 1.6591037511825562, |
| "learning_rate": 0.001, |
| "loss": 1.4484, |
| "step": 180900 |
| }, |
| { |
| "epoch": 58.50032320620556, |
| "grad_norm": 2.1159605979919434, |
| "learning_rate": 0.001, |
| "loss": 1.4584, |
| "step": 181000 |
| }, |
| { |
| "epoch": 58.53264382676147, |
| "grad_norm": 1.8147565126419067, |
| "learning_rate": 0.001, |
| "loss": 1.4609, |
| "step": 181100 |
| }, |
| { |
| "epoch": 58.56496444731739, |
| "grad_norm": 2.349036455154419, |
| "learning_rate": 0.001, |
| "loss": 1.4592, |
| "step": 181200 |
| }, |
| { |
| "epoch": 58.5972850678733, |
| "grad_norm": 1.9788084030151367, |
| "learning_rate": 0.001, |
| "loss": 1.4589, |
| "step": 181300 |
| }, |
| { |
| "epoch": 58.629605688429216, |
| "grad_norm": 1.711326003074646, |
| "learning_rate": 0.001, |
| "loss": 1.4564, |
| "step": 181400 |
| }, |
| { |
| "epoch": 58.66192630898513, |
| "grad_norm": 1.824481725692749, |
| "learning_rate": 0.001, |
| "loss": 1.4739, |
| "step": 181500 |
| }, |
| { |
| "epoch": 58.694246929541045, |
| "grad_norm": 1.8846087455749512, |
| "learning_rate": 0.001, |
| "loss": 1.4962, |
| "step": 181600 |
| }, |
| { |
| "epoch": 58.72656755009696, |
| "grad_norm": 1.3911494016647339, |
| "learning_rate": 0.001, |
| "loss": 1.4869, |
| "step": 181700 |
| }, |
| { |
| "epoch": 58.758888170652874, |
| "grad_norm": 2.168950080871582, |
| "learning_rate": 0.001, |
| "loss": 1.4737, |
| "step": 181800 |
| }, |
| { |
| "epoch": 58.79120879120879, |
| "grad_norm": 2.162898063659668, |
| "learning_rate": 0.001, |
| "loss": 1.4896, |
| "step": 181900 |
| }, |
| { |
| "epoch": 58.8235294117647, |
| "grad_norm": 1.8316357135772705, |
| "learning_rate": 0.001, |
| "loss": 1.4872, |
| "step": 182000 |
| }, |
| { |
| "epoch": 58.85585003232062, |
| "grad_norm": 1.8830862045288086, |
| "learning_rate": 0.001, |
| "loss": 1.4822, |
| "step": 182100 |
| }, |
| { |
| "epoch": 58.88817065287653, |
| "grad_norm": 1.4162304401397705, |
| "learning_rate": 0.001, |
| "loss": 1.4798, |
| "step": 182200 |
| }, |
| { |
| "epoch": 58.92049127343245, |
| "grad_norm": 2.3811752796173096, |
| "learning_rate": 0.001, |
| "loss": 1.5042, |
| "step": 182300 |
| }, |
| { |
| "epoch": 58.95281189398836, |
| "grad_norm": 2.115408182144165, |
| "learning_rate": 0.001, |
| "loss": 1.4937, |
| "step": 182400 |
| }, |
| { |
| "epoch": 58.985132514544276, |
| "grad_norm": 1.5884422063827515, |
| "learning_rate": 0.001, |
| "loss": 1.4966, |
| "step": 182500 |
| }, |
| { |
| "epoch": 59.0174531351002, |
| "grad_norm": 1.5762377977371216, |
| "learning_rate": 0.001, |
| "loss": 1.4325, |
| "step": 182600 |
| }, |
| { |
| "epoch": 59.04977375565611, |
| "grad_norm": 1.9034700393676758, |
| "learning_rate": 0.001, |
| "loss": 1.3811, |
| "step": 182700 |
| }, |
| { |
| "epoch": 59.08209437621203, |
| "grad_norm": 1.8262866735458374, |
| "learning_rate": 0.001, |
| "loss": 1.4013, |
| "step": 182800 |
| }, |
| { |
| "epoch": 59.11441499676794, |
| "grad_norm": 1.5713839530944824, |
| "learning_rate": 0.001, |
| "loss": 1.3897, |
| "step": 182900 |
| }, |
| { |
| "epoch": 59.146735617323856, |
| "grad_norm": 2.6167213916778564, |
| "learning_rate": 0.001, |
| "loss": 1.4069, |
| "step": 183000 |
| }, |
| { |
| "epoch": 59.17905623787977, |
| "grad_norm": 1.6956098079681396, |
| "learning_rate": 0.001, |
| "loss": 1.4126, |
| "step": 183100 |
| }, |
| { |
| "epoch": 59.211376858435685, |
| "grad_norm": 1.5501548051834106, |
| "learning_rate": 0.001, |
| "loss": 1.4041, |
| "step": 183200 |
| }, |
| { |
| "epoch": 59.2436974789916, |
| "grad_norm": 1.632919430732727, |
| "learning_rate": 0.001, |
| "loss": 1.4135, |
| "step": 183300 |
| }, |
| { |
| "epoch": 59.276018099547514, |
| "grad_norm": 1.7773116827011108, |
| "learning_rate": 0.001, |
| "loss": 1.4199, |
| "step": 183400 |
| }, |
| { |
| "epoch": 59.30833872010343, |
| "grad_norm": 1.6854921579360962, |
| "learning_rate": 0.001, |
| "loss": 1.4134, |
| "step": 183500 |
| }, |
| { |
| "epoch": 59.34065934065934, |
| "grad_norm": 1.7805248498916626, |
| "learning_rate": 0.001, |
| "loss": 1.4246, |
| "step": 183600 |
| }, |
| { |
| "epoch": 59.37297996121526, |
| "grad_norm": 1.943892002105713, |
| "learning_rate": 0.001, |
| "loss": 1.4288, |
| "step": 183700 |
| }, |
| { |
| "epoch": 59.40530058177117, |
| "grad_norm": 1.6803494691848755, |
| "learning_rate": 0.001, |
| "loss": 1.4369, |
| "step": 183800 |
| }, |
| { |
| "epoch": 59.43762120232709, |
| "grad_norm": 2.133312940597534, |
| "learning_rate": 0.001, |
| "loss": 1.452, |
| "step": 183900 |
| }, |
| { |
| "epoch": 59.469941822883, |
| "grad_norm": 1.6594949960708618, |
| "learning_rate": 0.001, |
| "loss": 1.4436, |
| "step": 184000 |
| }, |
| { |
| "epoch": 59.502262443438916, |
| "grad_norm": 1.515779733657837, |
| "learning_rate": 0.001, |
| "loss": 1.4516, |
| "step": 184100 |
| }, |
| { |
| "epoch": 59.53458306399483, |
| "grad_norm": 1.8933998346328735, |
| "learning_rate": 0.001, |
| "loss": 1.444, |
| "step": 184200 |
| }, |
| { |
| "epoch": 59.566903684550745, |
| "grad_norm": 1.962734580039978, |
| "learning_rate": 0.001, |
| "loss": 1.4462, |
| "step": 184300 |
| }, |
| { |
| "epoch": 59.59922430510666, |
| "grad_norm": 1.573004126548767, |
| "learning_rate": 0.001, |
| "loss": 1.4622, |
| "step": 184400 |
| }, |
| { |
| "epoch": 59.631544925662574, |
| "grad_norm": 2.281193256378174, |
| "learning_rate": 0.001, |
| "loss": 1.4556, |
| "step": 184500 |
| }, |
| { |
| "epoch": 59.66386554621849, |
| "grad_norm": 1.6149003505706787, |
| "learning_rate": 0.001, |
| "loss": 1.4537, |
| "step": 184600 |
| }, |
| { |
| "epoch": 59.6961861667744, |
| "grad_norm": 1.7026011943817139, |
| "learning_rate": 0.001, |
| "loss": 1.4564, |
| "step": 184700 |
| }, |
| { |
| "epoch": 59.72850678733032, |
| "grad_norm": 1.9733768701553345, |
| "learning_rate": 0.001, |
| "loss": 1.4769, |
| "step": 184800 |
| }, |
| { |
| "epoch": 59.76082740788623, |
| "grad_norm": 1.584242343902588, |
| "learning_rate": 0.001, |
| "loss": 1.4786, |
| "step": 184900 |
| }, |
| { |
| "epoch": 59.79314802844215, |
| "grad_norm": 1.8707184791564941, |
| "learning_rate": 0.001, |
| "loss": 1.4709, |
| "step": 185000 |
| }, |
| { |
| "epoch": 59.82546864899806, |
| "grad_norm": 2.813775062561035, |
| "learning_rate": 0.001, |
| "loss": 1.4704, |
| "step": 185100 |
| }, |
| { |
| "epoch": 59.857789269553976, |
| "grad_norm": 1.6593128442764282, |
| "learning_rate": 0.001, |
| "loss": 1.4652, |
| "step": 185200 |
| }, |
| { |
| "epoch": 59.89010989010989, |
| "grad_norm": 2.4981613159179688, |
| "learning_rate": 0.001, |
| "loss": 1.4661, |
| "step": 185300 |
| }, |
| { |
| "epoch": 59.922430510665805, |
| "grad_norm": 1.8080812692642212, |
| "learning_rate": 0.001, |
| "loss": 1.4756, |
| "step": 185400 |
| }, |
| { |
| "epoch": 59.95475113122172, |
| "grad_norm": 1.5147720575332642, |
| "learning_rate": 0.001, |
| "loss": 1.4866, |
| "step": 185500 |
| }, |
| { |
| "epoch": 59.987071751777634, |
| "grad_norm": 2.0482633113861084, |
| "learning_rate": 0.001, |
| "loss": 1.4873, |
| "step": 185600 |
| }, |
| { |
| "epoch": 60.01939237233355, |
| "grad_norm": 1.222092628479004, |
| "learning_rate": 0.001, |
| "loss": 1.4183, |
| "step": 185700 |
| }, |
| { |
| "epoch": 60.05171299288946, |
| "grad_norm": 1.309107780456543, |
| "learning_rate": 0.001, |
| "loss": 1.3756, |
| "step": 185800 |
| }, |
| { |
| "epoch": 60.08403361344538, |
| "grad_norm": 1.9363985061645508, |
| "learning_rate": 0.001, |
| "loss": 1.3947, |
| "step": 185900 |
| }, |
| { |
| "epoch": 60.11635423400129, |
| "grad_norm": 1.837631106376648, |
| "learning_rate": 0.001, |
| "loss": 1.3782, |
| "step": 186000 |
| }, |
| { |
| "epoch": 60.14867485455721, |
| "grad_norm": 1.3715769052505493, |
| "learning_rate": 0.001, |
| "loss": 1.3752, |
| "step": 186100 |
| }, |
| { |
| "epoch": 60.18099547511312, |
| "grad_norm": 1.2659372091293335, |
| "learning_rate": 0.001, |
| "loss": 1.3928, |
| "step": 186200 |
| }, |
| { |
| "epoch": 60.213316095669036, |
| "grad_norm": 1.2593166828155518, |
| "learning_rate": 0.001, |
| "loss": 1.3924, |
| "step": 186300 |
| }, |
| { |
| "epoch": 60.24563671622495, |
| "grad_norm": 1.8180540800094604, |
| "learning_rate": 0.001, |
| "loss": 1.3932, |
| "step": 186400 |
| }, |
| { |
| "epoch": 60.277957336780865, |
| "grad_norm": 2.077974557876587, |
| "learning_rate": 0.001, |
| "loss": 1.4231, |
| "step": 186500 |
| }, |
| { |
| "epoch": 60.31027795733678, |
| "grad_norm": 1.648301124572754, |
| "learning_rate": 0.001, |
| "loss": 1.4192, |
| "step": 186600 |
| }, |
| { |
| "epoch": 60.342598577892694, |
| "grad_norm": 1.756912350654602, |
| "learning_rate": 0.001, |
| "loss": 1.4235, |
| "step": 186700 |
| }, |
| { |
| "epoch": 60.37491919844861, |
| "grad_norm": 1.4692977666854858, |
| "learning_rate": 0.001, |
| "loss": 1.4013, |
| "step": 186800 |
| }, |
| { |
| "epoch": 60.40723981900452, |
| "grad_norm": 1.488180160522461, |
| "learning_rate": 0.001, |
| "loss": 1.4257, |
| "step": 186900 |
| }, |
| { |
| "epoch": 60.43956043956044, |
| "grad_norm": 1.2721911668777466, |
| "learning_rate": 0.001, |
| "loss": 1.4242, |
| "step": 187000 |
| }, |
| { |
| "epoch": 60.47188106011635, |
| "grad_norm": 1.282834529876709, |
| "learning_rate": 0.001, |
| "loss": 1.4192, |
| "step": 187100 |
| }, |
| { |
| "epoch": 60.50420168067227, |
| "grad_norm": 1.254542589187622, |
| "learning_rate": 0.001, |
| "loss": 1.4391, |
| "step": 187200 |
| }, |
| { |
| "epoch": 60.53652230122818, |
| "grad_norm": 1.5919004678726196, |
| "learning_rate": 0.001, |
| "loss": 1.4214, |
| "step": 187300 |
| }, |
| { |
| "epoch": 60.568842921784096, |
| "grad_norm": 1.6546337604522705, |
| "learning_rate": 0.001, |
| "loss": 1.4659, |
| "step": 187400 |
| }, |
| { |
| "epoch": 60.60116354234001, |
| "grad_norm": 1.2467652559280396, |
| "learning_rate": 0.001, |
| "loss": 1.4507, |
| "step": 187500 |
| }, |
| { |
| "epoch": 60.633484162895925, |
| "grad_norm": 1.228142499923706, |
| "learning_rate": 0.001, |
| "loss": 1.4256, |
| "step": 187600 |
| }, |
| { |
| "epoch": 60.66580478345184, |
| "grad_norm": 1.2312934398651123, |
| "learning_rate": 0.001, |
| "loss": 1.4597, |
| "step": 187700 |
| }, |
| { |
| "epoch": 60.698125404007754, |
| "grad_norm": 1.362423062324524, |
| "learning_rate": 0.001, |
| "loss": 1.456, |
| "step": 187800 |
| }, |
| { |
| "epoch": 60.73044602456367, |
| "grad_norm": 1.8674683570861816, |
| "learning_rate": 0.001, |
| "loss": 1.4538, |
| "step": 187900 |
| }, |
| { |
| "epoch": 60.762766645119584, |
| "grad_norm": 1.8995897769927979, |
| "learning_rate": 0.001, |
| "loss": 1.4637, |
| "step": 188000 |
| }, |
| { |
| "epoch": 60.7950872656755, |
| "grad_norm": 1.3285086154937744, |
| "learning_rate": 0.001, |
| "loss": 1.4647, |
| "step": 188100 |
| }, |
| { |
| "epoch": 60.82740788623141, |
| "grad_norm": 1.7458430528640747, |
| "learning_rate": 0.001, |
| "loss": 1.4563, |
| "step": 188200 |
| }, |
| { |
| "epoch": 60.85972850678733, |
| "grad_norm": 1.4835847616195679, |
| "learning_rate": 0.001, |
| "loss": 1.4792, |
| "step": 188300 |
| }, |
| { |
| "epoch": 60.89204912734324, |
| "grad_norm": 2.0184285640716553, |
| "learning_rate": 0.001, |
| "loss": 1.471, |
| "step": 188400 |
| }, |
| { |
| "epoch": 60.924369747899156, |
| "grad_norm": 2.2625019550323486, |
| "learning_rate": 0.001, |
| "loss": 1.4764, |
| "step": 188500 |
| }, |
| { |
| "epoch": 60.95669036845507, |
| "grad_norm": 1.3040214776992798, |
| "learning_rate": 0.001, |
| "loss": 1.4861, |
| "step": 188600 |
| }, |
| { |
| "epoch": 60.98901098901099, |
| "grad_norm": 2.382171392440796, |
| "learning_rate": 0.001, |
| "loss": 1.4735, |
| "step": 188700 |
| }, |
| { |
| "epoch": 61.02133160956691, |
| "grad_norm": 1.2037104368209839, |
| "learning_rate": 0.001, |
| "loss": 1.4076, |
| "step": 188800 |
| }, |
| { |
| "epoch": 61.05365223012282, |
| "grad_norm": 1.1736313104629517, |
| "learning_rate": 0.001, |
| "loss": 1.369, |
| "step": 188900 |
| }, |
| { |
| "epoch": 61.085972850678736, |
| "grad_norm": 1.3122565746307373, |
| "learning_rate": 0.001, |
| "loss": 1.369, |
| "step": 189000 |
| }, |
| { |
| "epoch": 61.11829347123465, |
| "grad_norm": 1.4312031269073486, |
| "learning_rate": 0.001, |
| "loss": 1.3899, |
| "step": 189100 |
| }, |
| { |
| "epoch": 61.150614091790565, |
| "grad_norm": 1.6407980918884277, |
| "learning_rate": 0.001, |
| "loss": 1.3582, |
| "step": 189200 |
| }, |
| { |
| "epoch": 61.18293471234648, |
| "grad_norm": 1.3378984928131104, |
| "learning_rate": 0.001, |
| "loss": 1.3773, |
| "step": 189300 |
| }, |
| { |
| "epoch": 61.215255332902395, |
| "grad_norm": 1.4513931274414062, |
| "learning_rate": 0.001, |
| "loss": 1.3769, |
| "step": 189400 |
| }, |
| { |
| "epoch": 61.24757595345831, |
| "grad_norm": 1.5741044282913208, |
| "learning_rate": 0.001, |
| "loss": 1.3863, |
| "step": 189500 |
| }, |
| { |
| "epoch": 61.279896574014224, |
| "grad_norm": 2.1121723651885986, |
| "learning_rate": 0.001, |
| "loss": 1.3781, |
| "step": 189600 |
| }, |
| { |
| "epoch": 61.31221719457014, |
| "grad_norm": 1.7127248048782349, |
| "learning_rate": 0.001, |
| "loss": 1.4187, |
| "step": 189700 |
| }, |
| { |
| "epoch": 61.34453781512605, |
| "grad_norm": 1.1498795747756958, |
| "learning_rate": 0.001, |
| "loss": 1.4092, |
| "step": 189800 |
| }, |
| { |
| "epoch": 61.37685843568197, |
| "grad_norm": 1.63968825340271, |
| "learning_rate": 0.001, |
| "loss": 1.3977, |
| "step": 189900 |
| }, |
| { |
| "epoch": 61.40917905623788, |
| "grad_norm": 1.6286283731460571, |
| "learning_rate": 0.001, |
| "loss": 1.426, |
| "step": 190000 |
| }, |
| { |
| "epoch": 61.441499676793796, |
| "grad_norm": 1.6149277687072754, |
| "learning_rate": 0.001, |
| "loss": 1.4122, |
| "step": 190100 |
| }, |
| { |
| "epoch": 61.47382029734971, |
| "grad_norm": 1.7483794689178467, |
| "learning_rate": 0.001, |
| "loss": 1.4236, |
| "step": 190200 |
| }, |
| { |
| "epoch": 61.506140917905626, |
| "grad_norm": 1.2920759916305542, |
| "learning_rate": 0.001, |
| "loss": 1.4363, |
| "step": 190300 |
| }, |
| { |
| "epoch": 61.53846153846154, |
| "grad_norm": 1.121927261352539, |
| "learning_rate": 0.001, |
| "loss": 1.4203, |
| "step": 190400 |
| }, |
| { |
| "epoch": 61.570782159017455, |
| "grad_norm": 1.5375155210494995, |
| "learning_rate": 0.001, |
| "loss": 1.4306, |
| "step": 190500 |
| }, |
| { |
| "epoch": 61.60310277957337, |
| "grad_norm": 1.3647403717041016, |
| "learning_rate": 0.001, |
| "loss": 1.4306, |
| "step": 190600 |
| }, |
| { |
| "epoch": 61.635423400129284, |
| "grad_norm": 1.3525989055633545, |
| "learning_rate": 0.001, |
| "loss": 1.4301, |
| "step": 190700 |
| }, |
| { |
| "epoch": 61.6677440206852, |
| "grad_norm": 1.311113715171814, |
| "learning_rate": 0.001, |
| "loss": 1.4498, |
| "step": 190800 |
| }, |
| { |
| "epoch": 61.70006464124111, |
| "grad_norm": 1.514700174331665, |
| "learning_rate": 0.001, |
| "loss": 1.436, |
| "step": 190900 |
| }, |
| { |
| "epoch": 61.73238526179703, |
| "grad_norm": 1.467130422592163, |
| "learning_rate": 0.001, |
| "loss": 1.4369, |
| "step": 191000 |
| }, |
| { |
| "epoch": 61.76470588235294, |
| "grad_norm": 1.9764527082443237, |
| "learning_rate": 0.001, |
| "loss": 1.4512, |
| "step": 191100 |
| }, |
| { |
| "epoch": 61.79702650290886, |
| "grad_norm": 1.0968976020812988, |
| "learning_rate": 0.001, |
| "loss": 1.4429, |
| "step": 191200 |
| }, |
| { |
| "epoch": 61.82934712346477, |
| "grad_norm": 1.7983734607696533, |
| "learning_rate": 0.001, |
| "loss": 1.4687, |
| "step": 191300 |
| }, |
| { |
| "epoch": 61.861667744020686, |
| "grad_norm": 1.818927526473999, |
| "learning_rate": 0.001, |
| "loss": 1.4679, |
| "step": 191400 |
| }, |
| { |
| "epoch": 61.8939883645766, |
| "grad_norm": 1.3303331136703491, |
| "learning_rate": 0.001, |
| "loss": 1.4805, |
| "step": 191500 |
| }, |
| { |
| "epoch": 61.926308985132515, |
| "grad_norm": 1.4884530305862427, |
| "learning_rate": 0.001, |
| "loss": 1.4622, |
| "step": 191600 |
| }, |
| { |
| "epoch": 61.95862960568843, |
| "grad_norm": 1.3443821668624878, |
| "learning_rate": 0.001, |
| "loss": 1.4598, |
| "step": 191700 |
| }, |
| { |
| "epoch": 61.990950226244344, |
| "grad_norm": 1.5385671854019165, |
| "learning_rate": 0.001, |
| "loss": 1.4846, |
| "step": 191800 |
| }, |
| { |
| "epoch": 62.02327084680026, |
| "grad_norm": 1.9123036861419678, |
| "learning_rate": 0.001, |
| "loss": 1.4038, |
| "step": 191900 |
| }, |
| { |
| "epoch": 62.05559146735617, |
| "grad_norm": 1.686424970626831, |
| "learning_rate": 0.001, |
| "loss": 1.3439, |
| "step": 192000 |
| }, |
| { |
| "epoch": 62.08791208791209, |
| "grad_norm": 1.4864493608474731, |
| "learning_rate": 0.001, |
| "loss": 1.3631, |
| "step": 192100 |
| }, |
| { |
| "epoch": 62.120232708468, |
| "grad_norm": 1.57008957862854, |
| "learning_rate": 0.001, |
| "loss": 1.3621, |
| "step": 192200 |
| }, |
| { |
| "epoch": 62.15255332902392, |
| "grad_norm": 1.1772888898849487, |
| "learning_rate": 0.001, |
| "loss": 1.3641, |
| "step": 192300 |
| }, |
| { |
| "epoch": 62.18487394957983, |
| "grad_norm": 1.5883642435073853, |
| "learning_rate": 0.001, |
| "loss": 1.3795, |
| "step": 192400 |
| }, |
| { |
| "epoch": 62.217194570135746, |
| "grad_norm": 1.4293068647384644, |
| "learning_rate": 0.001, |
| "loss": 1.38, |
| "step": 192500 |
| }, |
| { |
| "epoch": 62.24951519069166, |
| "grad_norm": 1.596917986869812, |
| "learning_rate": 0.001, |
| "loss": 1.388, |
| "step": 192600 |
| }, |
| { |
| "epoch": 62.281835811247575, |
| "grad_norm": 1.4482003450393677, |
| "learning_rate": 0.001, |
| "loss": 1.3861, |
| "step": 192700 |
| }, |
| { |
| "epoch": 62.31415643180349, |
| "grad_norm": 1.8990685939788818, |
| "learning_rate": 0.001, |
| "loss": 1.3957, |
| "step": 192800 |
| }, |
| { |
| "epoch": 62.346477052359404, |
| "grad_norm": 1.6335766315460205, |
| "learning_rate": 0.001, |
| "loss": 1.3829, |
| "step": 192900 |
| }, |
| { |
| "epoch": 62.37879767291532, |
| "grad_norm": 1.9832218885421753, |
| "learning_rate": 0.001, |
| "loss": 1.4018, |
| "step": 193000 |
| }, |
| { |
| "epoch": 62.41111829347123, |
| "grad_norm": 1.4106504917144775, |
| "learning_rate": 0.001, |
| "loss": 1.4156, |
| "step": 193100 |
| }, |
| { |
| "epoch": 62.44343891402715, |
| "grad_norm": 1.4505540132522583, |
| "learning_rate": 0.001, |
| "loss": 1.3913, |
| "step": 193200 |
| }, |
| { |
| "epoch": 62.47575953458306, |
| "grad_norm": 1.5402984619140625, |
| "learning_rate": 0.001, |
| "loss": 1.4008, |
| "step": 193300 |
| }, |
| { |
| "epoch": 62.50808015513898, |
| "grad_norm": 1.4702680110931396, |
| "learning_rate": 0.001, |
| "loss": 1.4007, |
| "step": 193400 |
| }, |
| { |
| "epoch": 62.54040077569489, |
| "grad_norm": 1.7161102294921875, |
| "learning_rate": 0.001, |
| "loss": 1.4311, |
| "step": 193500 |
| }, |
| { |
| "epoch": 62.572721396250806, |
| "grad_norm": 1.1644012928009033, |
| "learning_rate": 0.001, |
| "loss": 1.4264, |
| "step": 193600 |
| }, |
| { |
| "epoch": 62.60504201680672, |
| "grad_norm": 1.5992450714111328, |
| "learning_rate": 0.001, |
| "loss": 1.4156, |
| "step": 193700 |
| }, |
| { |
| "epoch": 62.637362637362635, |
| "grad_norm": 1.2773569822311401, |
| "learning_rate": 0.001, |
| "loss": 1.4371, |
| "step": 193800 |
| }, |
| { |
| "epoch": 62.66968325791855, |
| "grad_norm": 1.7517127990722656, |
| "learning_rate": 0.001, |
| "loss": 1.4243, |
| "step": 193900 |
| }, |
| { |
| "epoch": 62.702003878474464, |
| "grad_norm": 1.5832035541534424, |
| "learning_rate": 0.001, |
| "loss": 1.4346, |
| "step": 194000 |
| }, |
| { |
| "epoch": 62.73432449903038, |
| "grad_norm": 1.5175061225891113, |
| "learning_rate": 0.001, |
| "loss": 1.4373, |
| "step": 194100 |
| }, |
| { |
| "epoch": 62.76664511958629, |
| "grad_norm": 1.4983898401260376, |
| "learning_rate": 0.001, |
| "loss": 1.4354, |
| "step": 194200 |
| }, |
| { |
| "epoch": 62.79896574014221, |
| "grad_norm": 1.923129916191101, |
| "learning_rate": 0.001, |
| "loss": 1.4652, |
| "step": 194300 |
| }, |
| { |
| "epoch": 62.83128636069812, |
| "grad_norm": 1.5332492589950562, |
| "learning_rate": 0.001, |
| "loss": 1.4446, |
| "step": 194400 |
| }, |
| { |
| "epoch": 62.86360698125404, |
| "grad_norm": 1.3865859508514404, |
| "learning_rate": 0.001, |
| "loss": 1.4332, |
| "step": 194500 |
| }, |
| { |
| "epoch": 62.89592760180995, |
| "grad_norm": 1.3664981126785278, |
| "learning_rate": 0.001, |
| "loss": 1.4324, |
| "step": 194600 |
| }, |
| { |
| "epoch": 62.928248222365866, |
| "grad_norm": 1.507145881652832, |
| "learning_rate": 0.001, |
| "loss": 1.477, |
| "step": 194700 |
| }, |
| { |
| "epoch": 62.96056884292178, |
| "grad_norm": 1.1725382804870605, |
| "learning_rate": 0.001, |
| "loss": 1.4616, |
| "step": 194800 |
| }, |
| { |
| "epoch": 62.992889463477695, |
| "grad_norm": 1.6565682888031006, |
| "learning_rate": 0.001, |
| "loss": 1.4521, |
| "step": 194900 |
| }, |
| { |
| "epoch": 63.02521008403362, |
| "grad_norm": 1.4437625408172607, |
| "learning_rate": 0.001, |
| "loss": 1.3588, |
| "step": 195000 |
| }, |
| { |
| "epoch": 63.05753070458953, |
| "grad_norm": 1.420046329498291, |
| "learning_rate": 0.001, |
| "loss": 1.3529, |
| "step": 195100 |
| }, |
| { |
| "epoch": 63.089851325145446, |
| "grad_norm": 1.674965262413025, |
| "learning_rate": 0.001, |
| "loss": 1.3499, |
| "step": 195200 |
| }, |
| { |
| "epoch": 63.12217194570136, |
| "grad_norm": 1.4057883024215698, |
| "learning_rate": 0.001, |
| "loss": 1.3682, |
| "step": 195300 |
| }, |
| { |
| "epoch": 63.154492566257275, |
| "grad_norm": 1.432799220085144, |
| "learning_rate": 0.001, |
| "loss": 1.3541, |
| "step": 195400 |
| }, |
| { |
| "epoch": 63.18681318681319, |
| "grad_norm": 1.248239517211914, |
| "learning_rate": 0.001, |
| "loss": 1.3637, |
| "step": 195500 |
| }, |
| { |
| "epoch": 63.219133807369104, |
| "grad_norm": 1.353018045425415, |
| "learning_rate": 0.001, |
| "loss": 1.3654, |
| "step": 195600 |
| }, |
| { |
| "epoch": 63.25145442792502, |
| "grad_norm": 1.305019497871399, |
| "learning_rate": 0.001, |
| "loss": 1.3749, |
| "step": 195700 |
| }, |
| { |
| "epoch": 63.28377504848093, |
| "grad_norm": 1.4432069063186646, |
| "learning_rate": 0.001, |
| "loss": 1.3658, |
| "step": 195800 |
| }, |
| { |
| "epoch": 63.31609566903685, |
| "grad_norm": 1.7704466581344604, |
| "learning_rate": 0.001, |
| "loss": 1.3877, |
| "step": 195900 |
| }, |
| { |
| "epoch": 63.34841628959276, |
| "grad_norm": 1.480355978012085, |
| "learning_rate": 0.001, |
| "loss": 1.3974, |
| "step": 196000 |
| }, |
| { |
| "epoch": 63.38073691014868, |
| "grad_norm": 1.5801160335540771, |
| "learning_rate": 0.001, |
| "loss": 1.3832, |
| "step": 196100 |
| }, |
| { |
| "epoch": 63.41305753070459, |
| "grad_norm": 1.4862293004989624, |
| "learning_rate": 0.001, |
| "loss": 1.4066, |
| "step": 196200 |
| }, |
| { |
| "epoch": 63.445378151260506, |
| "grad_norm": 1.883362889289856, |
| "learning_rate": 0.001, |
| "loss": 1.3955, |
| "step": 196300 |
| }, |
| { |
| "epoch": 63.47769877181642, |
| "grad_norm": 1.3884245157241821, |
| "learning_rate": 0.001, |
| "loss": 1.3998, |
| "step": 196400 |
| }, |
| { |
| "epoch": 63.510019392372335, |
| "grad_norm": 1.5210504531860352, |
| "learning_rate": 0.001, |
| "loss": 1.4087, |
| "step": 196500 |
| }, |
| { |
| "epoch": 63.54234001292825, |
| "grad_norm": 1.1657111644744873, |
| "learning_rate": 0.001, |
| "loss": 1.3979, |
| "step": 196600 |
| }, |
| { |
| "epoch": 63.574660633484164, |
| "grad_norm": 1.3850380182266235, |
| "learning_rate": 0.001, |
| "loss": 1.3997, |
| "step": 196700 |
| }, |
| { |
| "epoch": 63.60698125404008, |
| "grad_norm": 1.7572051286697388, |
| "learning_rate": 0.001, |
| "loss": 1.3992, |
| "step": 196800 |
| }, |
| { |
| "epoch": 63.63930187459599, |
| "grad_norm": 1.5824946165084839, |
| "learning_rate": 0.001, |
| "loss": 1.4276, |
| "step": 196900 |
| }, |
| { |
| "epoch": 63.67162249515191, |
| "grad_norm": 1.6165339946746826, |
| "learning_rate": 0.001, |
| "loss": 1.4299, |
| "step": 197000 |
| }, |
| { |
| "epoch": 63.70394311570782, |
| "grad_norm": 1.657211422920227, |
| "learning_rate": 0.001, |
| "loss": 1.4234, |
| "step": 197100 |
| }, |
| { |
| "epoch": 63.73626373626374, |
| "grad_norm": 2.030299186706543, |
| "learning_rate": 0.001, |
| "loss": 1.4291, |
| "step": 197200 |
| }, |
| { |
| "epoch": 63.76858435681965, |
| "grad_norm": 1.864864706993103, |
| "learning_rate": 0.001, |
| "loss": 1.4228, |
| "step": 197300 |
| }, |
| { |
| "epoch": 63.800904977375566, |
| "grad_norm": 1.3653168678283691, |
| "learning_rate": 0.001, |
| "loss": 1.4244, |
| "step": 197400 |
| }, |
| { |
| "epoch": 63.83322559793148, |
| "grad_norm": 1.5868542194366455, |
| "learning_rate": 0.001, |
| "loss": 1.4279, |
| "step": 197500 |
| }, |
| { |
| "epoch": 63.865546218487395, |
| "grad_norm": 1.6072509288787842, |
| "learning_rate": 0.001, |
| "loss": 1.463, |
| "step": 197600 |
| }, |
| { |
| "epoch": 63.89786683904331, |
| "grad_norm": 1.2431442737579346, |
| "learning_rate": 0.001, |
| "loss": 1.4426, |
| "step": 197700 |
| }, |
| { |
| "epoch": 63.930187459599225, |
| "grad_norm": 1.8268156051635742, |
| "learning_rate": 0.001, |
| "loss": 1.4428, |
| "step": 197800 |
| }, |
| { |
| "epoch": 63.96250808015514, |
| "grad_norm": 1.5013717412948608, |
| "learning_rate": 0.001, |
| "loss": 1.4635, |
| "step": 197900 |
| }, |
| { |
| "epoch": 63.994828700711054, |
| "grad_norm": 1.5922578573226929, |
| "learning_rate": 0.001, |
| "loss": 1.4469, |
| "step": 198000 |
| }, |
| { |
| "epoch": 64.02714932126698, |
| "grad_norm": 1.7445586919784546, |
| "learning_rate": 0.001, |
| "loss": 1.3447, |
| "step": 198100 |
| }, |
| { |
| "epoch": 64.05946994182288, |
| "grad_norm": 1.9003431797027588, |
| "learning_rate": 0.001, |
| "loss": 1.3404, |
| "step": 198200 |
| }, |
| { |
| "epoch": 64.0917905623788, |
| "grad_norm": 1.6859298944473267, |
| "learning_rate": 0.001, |
| "loss": 1.3413, |
| "step": 198300 |
| }, |
| { |
| "epoch": 64.12411118293471, |
| "grad_norm": 1.992749571800232, |
| "learning_rate": 0.001, |
| "loss": 1.3571, |
| "step": 198400 |
| }, |
| { |
| "epoch": 64.15643180349063, |
| "grad_norm": 1.800107717514038, |
| "learning_rate": 0.001, |
| "loss": 1.3495, |
| "step": 198500 |
| }, |
| { |
| "epoch": 64.18875242404654, |
| "grad_norm": 1.5717955827713013, |
| "learning_rate": 0.001, |
| "loss": 1.3485, |
| "step": 198600 |
| }, |
| { |
| "epoch": 64.22107304460246, |
| "grad_norm": 1.43135404586792, |
| "learning_rate": 0.001, |
| "loss": 1.373, |
| "step": 198700 |
| }, |
| { |
| "epoch": 64.25339366515837, |
| "grad_norm": 1.5628668069839478, |
| "learning_rate": 0.001, |
| "loss": 1.3637, |
| "step": 198800 |
| }, |
| { |
| "epoch": 64.28571428571429, |
| "grad_norm": 1.3284069299697876, |
| "learning_rate": 0.001, |
| "loss": 1.3696, |
| "step": 198900 |
| }, |
| { |
| "epoch": 64.3180349062702, |
| "grad_norm": 1.7991653680801392, |
| "learning_rate": 0.001, |
| "loss": 1.3685, |
| "step": 199000 |
| }, |
| { |
| "epoch": 64.35035552682612, |
| "grad_norm": 1.3946183919906616, |
| "learning_rate": 0.001, |
| "loss": 1.3773, |
| "step": 199100 |
| }, |
| { |
| "epoch": 64.38267614738203, |
| "grad_norm": 1.5314682722091675, |
| "learning_rate": 0.001, |
| "loss": 1.3858, |
| "step": 199200 |
| }, |
| { |
| "epoch": 64.41499676793795, |
| "grad_norm": 1.5692931413650513, |
| "learning_rate": 0.001, |
| "loss": 1.3734, |
| "step": 199300 |
| }, |
| { |
| "epoch": 64.44731738849386, |
| "grad_norm": 2.0414280891418457, |
| "learning_rate": 0.001, |
| "loss": 1.3784, |
| "step": 199400 |
| }, |
| { |
| "epoch": 64.47963800904978, |
| "grad_norm": 1.4214924573898315, |
| "learning_rate": 0.001, |
| "loss": 1.3956, |
| "step": 199500 |
| }, |
| { |
| "epoch": 64.51195862960569, |
| "grad_norm": 1.3940602540969849, |
| "learning_rate": 0.001, |
| "loss": 1.406, |
| "step": 199600 |
| }, |
| { |
| "epoch": 64.54427925016161, |
| "grad_norm": 2.5642523765563965, |
| "learning_rate": 0.001, |
| "loss": 1.4053, |
| "step": 199700 |
| }, |
| { |
| "epoch": 64.57659987071752, |
| "grad_norm": 1.6160247325897217, |
| "learning_rate": 0.001, |
| "loss": 1.4018, |
| "step": 199800 |
| }, |
| { |
| "epoch": 64.60892049127344, |
| "grad_norm": 1.409174919128418, |
| "learning_rate": 0.001, |
| "loss": 1.4122, |
| "step": 199900 |
| }, |
| { |
| "epoch": 64.64124111182934, |
| "grad_norm": 1.6104872226715088, |
| "learning_rate": 0.001, |
| "loss": 1.4068, |
| "step": 200000 |
| }, |
| { |
| "epoch": 64.67356173238527, |
| "grad_norm": 1.4003249406814575, |
| "learning_rate": 0.001, |
| "loss": 1.4067, |
| "step": 200100 |
| }, |
| { |
| "epoch": 64.70588235294117, |
| "grad_norm": 1.6747041940689087, |
| "learning_rate": 0.001, |
| "loss": 1.4152, |
| "step": 200200 |
| }, |
| { |
| "epoch": 64.7382029734971, |
| "grad_norm": 2.0438196659088135, |
| "learning_rate": 0.001, |
| "loss": 1.4197, |
| "step": 200300 |
| }, |
| { |
| "epoch": 64.770523594053, |
| "grad_norm": 2.127326726913452, |
| "learning_rate": 0.001, |
| "loss": 1.4225, |
| "step": 200400 |
| }, |
| { |
| "epoch": 64.80284421460892, |
| "grad_norm": 1.4178484678268433, |
| "learning_rate": 0.001, |
| "loss": 1.4386, |
| "step": 200500 |
| }, |
| { |
| "epoch": 64.83516483516483, |
| "grad_norm": 1.8293603658676147, |
| "learning_rate": 0.001, |
| "loss": 1.4268, |
| "step": 200600 |
| }, |
| { |
| "epoch": 64.86748545572075, |
| "grad_norm": 2.1161253452301025, |
| "learning_rate": 0.001, |
| "loss": 1.4351, |
| "step": 200700 |
| }, |
| { |
| "epoch": 64.89980607627666, |
| "grad_norm": 1.7479701042175293, |
| "learning_rate": 0.001, |
| "loss": 1.4261, |
| "step": 200800 |
| }, |
| { |
| "epoch": 64.93212669683258, |
| "grad_norm": 1.317136526107788, |
| "learning_rate": 0.001, |
| "loss": 1.4205, |
| "step": 200900 |
| }, |
| { |
| "epoch": 64.96444731738849, |
| "grad_norm": 1.641223669052124, |
| "learning_rate": 0.001, |
| "loss": 1.4455, |
| "step": 201000 |
| }, |
| { |
| "epoch": 64.99676793794441, |
| "grad_norm": 1.5180416107177734, |
| "learning_rate": 0.001, |
| "loss": 1.4195, |
| "step": 201100 |
| }, |
| { |
| "epoch": 65.02908855850032, |
| "grad_norm": 1.7227802276611328, |
| "learning_rate": 0.001, |
| "loss": 1.3384, |
| "step": 201200 |
| }, |
| { |
| "epoch": 65.06140917905624, |
| "grad_norm": 1.5802651643753052, |
| "learning_rate": 0.001, |
| "loss": 1.3211, |
| "step": 201300 |
| }, |
| { |
| "epoch": 65.09372979961215, |
| "grad_norm": 1.1960089206695557, |
| "learning_rate": 0.001, |
| "loss": 1.3314, |
| "step": 201400 |
| }, |
| { |
| "epoch": 65.12605042016807, |
| "grad_norm": 1.4870516061782837, |
| "learning_rate": 0.001, |
| "loss": 1.3412, |
| "step": 201500 |
| }, |
| { |
| "epoch": 65.15837104072398, |
| "grad_norm": 1.4223541021347046, |
| "learning_rate": 0.001, |
| "loss": 1.3444, |
| "step": 201600 |
| }, |
| { |
| "epoch": 65.1906916612799, |
| "grad_norm": 1.4809181690216064, |
| "learning_rate": 0.001, |
| "loss": 1.3426, |
| "step": 201700 |
| }, |
| { |
| "epoch": 65.2230122818358, |
| "grad_norm": 2.217803478240967, |
| "learning_rate": 0.001, |
| "loss": 1.3414, |
| "step": 201800 |
| }, |
| { |
| "epoch": 65.25533290239173, |
| "grad_norm": 1.7147786617279053, |
| "learning_rate": 0.001, |
| "loss": 1.3636, |
| "step": 201900 |
| }, |
| { |
| "epoch": 65.28765352294764, |
| "grad_norm": 1.6113975048065186, |
| "learning_rate": 0.001, |
| "loss": 1.3743, |
| "step": 202000 |
| }, |
| { |
| "epoch": 65.31997414350356, |
| "grad_norm": 2.024049997329712, |
| "learning_rate": 0.001, |
| "loss": 1.3468, |
| "step": 202100 |
| }, |
| { |
| "epoch": 65.35229476405947, |
| "grad_norm": 1.9836534261703491, |
| "learning_rate": 0.001, |
| "loss": 1.371, |
| "step": 202200 |
| }, |
| { |
| "epoch": 65.38461538461539, |
| "grad_norm": 1.769474744796753, |
| "learning_rate": 0.001, |
| "loss": 1.3667, |
| "step": 202300 |
| }, |
| { |
| "epoch": 65.4169360051713, |
| "grad_norm": 1.8485040664672852, |
| "learning_rate": 0.001, |
| "loss": 1.3754, |
| "step": 202400 |
| }, |
| { |
| "epoch": 65.44925662572722, |
| "grad_norm": 1.2611091136932373, |
| "learning_rate": 0.001, |
| "loss": 1.3839, |
| "step": 202500 |
| }, |
| { |
| "epoch": 65.48157724628312, |
| "grad_norm": 1.3341859579086304, |
| "learning_rate": 0.001, |
| "loss": 1.3773, |
| "step": 202600 |
| }, |
| { |
| "epoch": 65.51389786683905, |
| "grad_norm": 1.6250958442687988, |
| "learning_rate": 0.001, |
| "loss": 1.384, |
| "step": 202700 |
| }, |
| { |
| "epoch": 65.54621848739495, |
| "grad_norm": 1.9036827087402344, |
| "learning_rate": 0.001, |
| "loss": 1.3946, |
| "step": 202800 |
| }, |
| { |
| "epoch": 65.57853910795087, |
| "grad_norm": 1.4791826009750366, |
| "learning_rate": 0.001, |
| "loss": 1.3823, |
| "step": 202900 |
| }, |
| { |
| "epoch": 65.61085972850678, |
| "grad_norm": 2.300971031188965, |
| "learning_rate": 0.001, |
| "loss": 1.3839, |
| "step": 203000 |
| }, |
| { |
| "epoch": 65.6431803490627, |
| "grad_norm": 1.2696810960769653, |
| "learning_rate": 0.001, |
| "loss": 1.4193, |
| "step": 203100 |
| }, |
| { |
| "epoch": 65.67550096961861, |
| "grad_norm": 1.4126332998275757, |
| "learning_rate": 0.001, |
| "loss": 1.4136, |
| "step": 203200 |
| }, |
| { |
| "epoch": 65.70782159017453, |
| "grad_norm": 1.5814741849899292, |
| "learning_rate": 0.001, |
| "loss": 1.4094, |
| "step": 203300 |
| }, |
| { |
| "epoch": 65.74014221073044, |
| "grad_norm": 1.7776198387145996, |
| "learning_rate": 0.001, |
| "loss": 1.4044, |
| "step": 203400 |
| }, |
| { |
| "epoch": 65.77246283128636, |
| "grad_norm": 1.5066275596618652, |
| "learning_rate": 0.001, |
| "loss": 1.4129, |
| "step": 203500 |
| }, |
| { |
| "epoch": 65.80478345184227, |
| "grad_norm": 2.751493453979492, |
| "learning_rate": 0.001, |
| "loss": 1.4055, |
| "step": 203600 |
| }, |
| { |
| "epoch": 65.83710407239819, |
| "grad_norm": 1.5740598440170288, |
| "learning_rate": 0.001, |
| "loss": 1.4154, |
| "step": 203700 |
| }, |
| { |
| "epoch": 65.8694246929541, |
| "grad_norm": 1.510005235671997, |
| "learning_rate": 0.001, |
| "loss": 1.4266, |
| "step": 203800 |
| }, |
| { |
| "epoch": 65.90174531351002, |
| "grad_norm": 2.236680746078491, |
| "learning_rate": 0.001, |
| "loss": 1.4052, |
| "step": 203900 |
| }, |
| { |
| "epoch": 65.93406593406593, |
| "grad_norm": 1.301505208015442, |
| "learning_rate": 0.001, |
| "loss": 1.4244, |
| "step": 204000 |
| }, |
| { |
| "epoch": 65.96638655462185, |
| "grad_norm": 2.374619722366333, |
| "learning_rate": 0.001, |
| "loss": 1.4352, |
| "step": 204100 |
| }, |
| { |
| "epoch": 65.99870717517777, |
| "grad_norm": 1.7071536779403687, |
| "learning_rate": 0.001, |
| "loss": 1.4323, |
| "step": 204200 |
| }, |
| { |
| "epoch": 66.03102779573368, |
| "grad_norm": 2.193371295928955, |
| "learning_rate": 0.001, |
| "loss": 1.3099, |
| "step": 204300 |
| }, |
| { |
| "epoch": 66.0633484162896, |
| "grad_norm": 1.6853256225585938, |
| "learning_rate": 0.001, |
| "loss": 1.3084, |
| "step": 204400 |
| }, |
| { |
| "epoch": 66.0956690368455, |
| "grad_norm": 1.849713683128357, |
| "learning_rate": 0.001, |
| "loss": 1.3098, |
| "step": 204500 |
| }, |
| { |
| "epoch": 66.12798965740143, |
| "grad_norm": 1.6212743520736694, |
| "learning_rate": 0.001, |
| "loss": 1.3458, |
| "step": 204600 |
| }, |
| { |
| "epoch": 66.16031027795734, |
| "grad_norm": 2.065988779067993, |
| "learning_rate": 0.001, |
| "loss": 1.3158, |
| "step": 204700 |
| }, |
| { |
| "epoch": 66.19263089851326, |
| "grad_norm": 1.4905744791030884, |
| "learning_rate": 0.001, |
| "loss": 1.3421, |
| "step": 204800 |
| }, |
| { |
| "epoch": 66.22495151906917, |
| "grad_norm": 1.9883614778518677, |
| "learning_rate": 0.001, |
| "loss": 1.3606, |
| "step": 204900 |
| }, |
| { |
| "epoch": 66.25727213962509, |
| "grad_norm": 1.4957332611083984, |
| "learning_rate": 0.001, |
| "loss": 1.3492, |
| "step": 205000 |
| }, |
| { |
| "epoch": 66.289592760181, |
| "grad_norm": 1.8609826564788818, |
| "learning_rate": 0.001, |
| "loss": 1.3277, |
| "step": 205100 |
| }, |
| { |
| "epoch": 66.32191338073692, |
| "grad_norm": 1.843746304512024, |
| "learning_rate": 0.001, |
| "loss": 1.352, |
| "step": 205200 |
| }, |
| { |
| "epoch": 66.35423400129282, |
| "grad_norm": 2.3093807697296143, |
| "learning_rate": 0.001, |
| "loss": 1.3516, |
| "step": 205300 |
| }, |
| { |
| "epoch": 66.38655462184875, |
| "grad_norm": 1.9718279838562012, |
| "learning_rate": 0.001, |
| "loss": 1.3625, |
| "step": 205400 |
| }, |
| { |
| "epoch": 66.41887524240465, |
| "grad_norm": 1.7455960512161255, |
| "learning_rate": 0.001, |
| "loss": 1.3772, |
| "step": 205500 |
| }, |
| { |
| "epoch": 66.45119586296057, |
| "grad_norm": 1.5213228464126587, |
| "learning_rate": 0.001, |
| "loss": 1.3873, |
| "step": 205600 |
| }, |
| { |
| "epoch": 66.48351648351648, |
| "grad_norm": 1.6287715435028076, |
| "learning_rate": 0.001, |
| "loss": 1.3853, |
| "step": 205700 |
| }, |
| { |
| "epoch": 66.5158371040724, |
| "grad_norm": 1.6267790794372559, |
| "learning_rate": 0.001, |
| "loss": 1.3773, |
| "step": 205800 |
| }, |
| { |
| "epoch": 66.54815772462831, |
| "grad_norm": 2.083371877670288, |
| "learning_rate": 0.001, |
| "loss": 1.369, |
| "step": 205900 |
| }, |
| { |
| "epoch": 66.58047834518423, |
| "grad_norm": 2.5718905925750732, |
| "learning_rate": 0.001, |
| "loss": 1.3731, |
| "step": 206000 |
| }, |
| { |
| "epoch": 66.61279896574014, |
| "grad_norm": 2.2809901237487793, |
| "learning_rate": 0.001, |
| "loss": 1.3964, |
| "step": 206100 |
| }, |
| { |
| "epoch": 66.64511958629606, |
| "grad_norm": 1.6251013278961182, |
| "learning_rate": 0.001, |
| "loss": 1.3975, |
| "step": 206200 |
| }, |
| { |
| "epoch": 66.67744020685197, |
| "grad_norm": 1.7564525604248047, |
| "learning_rate": 0.001, |
| "loss": 1.3877, |
| "step": 206300 |
| }, |
| { |
| "epoch": 66.70976082740789, |
| "grad_norm": 1.748744010925293, |
| "learning_rate": 0.001, |
| "loss": 1.3938, |
| "step": 206400 |
| }, |
| { |
| "epoch": 66.7420814479638, |
| "grad_norm": 2.089381217956543, |
| "learning_rate": 0.001, |
| "loss": 1.4197, |
| "step": 206500 |
| }, |
| { |
| "epoch": 66.77440206851972, |
| "grad_norm": 2.1797640323638916, |
| "learning_rate": 0.001, |
| "loss": 1.3991, |
| "step": 206600 |
| }, |
| { |
| "epoch": 66.80672268907563, |
| "grad_norm": 1.92826509475708, |
| "learning_rate": 0.001, |
| "loss": 1.4103, |
| "step": 206700 |
| }, |
| { |
| "epoch": 66.83904330963155, |
| "grad_norm": 1.3981081247329712, |
| "learning_rate": 0.001, |
| "loss": 1.4018, |
| "step": 206800 |
| }, |
| { |
| "epoch": 66.87136393018746, |
| "grad_norm": 2.354722738265991, |
| "learning_rate": 0.001, |
| "loss": 1.4124, |
| "step": 206900 |
| }, |
| { |
| "epoch": 66.90368455074338, |
| "grad_norm": 2.201211929321289, |
| "learning_rate": 0.001, |
| "loss": 1.3906, |
| "step": 207000 |
| }, |
| { |
| "epoch": 66.93600517129929, |
| "grad_norm": 1.7025607824325562, |
| "learning_rate": 0.001, |
| "loss": 1.422, |
| "step": 207100 |
| }, |
| { |
| "epoch": 66.96832579185521, |
| "grad_norm": 1.9557732343673706, |
| "learning_rate": 0.001, |
| "loss": 1.4151, |
| "step": 207200 |
| }, |
| { |
| "epoch": 67.00064641241111, |
| "grad_norm": 1.8797639608383179, |
| "learning_rate": 0.001, |
| "loss": 1.3962, |
| "step": 207300 |
| }, |
| { |
| "epoch": 67.03296703296704, |
| "grad_norm": 1.7835898399353027, |
| "learning_rate": 0.001, |
| "loss": 1.3011, |
| "step": 207400 |
| }, |
| { |
| "epoch": 67.06528765352294, |
| "grad_norm": 1.837484359741211, |
| "learning_rate": 0.001, |
| "loss": 1.3101, |
| "step": 207500 |
| }, |
| { |
| "epoch": 67.09760827407887, |
| "grad_norm": 1.8803691864013672, |
| "learning_rate": 0.001, |
| "loss": 1.308, |
| "step": 207600 |
| }, |
| { |
| "epoch": 67.12992889463477, |
| "grad_norm": 1.8284261226654053, |
| "learning_rate": 0.001, |
| "loss": 1.3088, |
| "step": 207700 |
| }, |
| { |
| "epoch": 67.1622495151907, |
| "grad_norm": 2.1951427459716797, |
| "learning_rate": 0.001, |
| "loss": 1.3291, |
| "step": 207800 |
| }, |
| { |
| "epoch": 67.1945701357466, |
| "grad_norm": 1.9866607189178467, |
| "learning_rate": 0.001, |
| "loss": 1.3332, |
| "step": 207900 |
| }, |
| { |
| "epoch": 67.22689075630252, |
| "grad_norm": 2.5242843627929688, |
| "learning_rate": 0.001, |
| "loss": 1.353, |
| "step": 208000 |
| }, |
| { |
| "epoch": 67.25921137685843, |
| "grad_norm": 2.158780336380005, |
| "learning_rate": 0.001, |
| "loss": 1.3228, |
| "step": 208100 |
| }, |
| { |
| "epoch": 67.29153199741435, |
| "grad_norm": 2.7524983882904053, |
| "learning_rate": 0.001, |
| "loss": 1.331, |
| "step": 208200 |
| }, |
| { |
| "epoch": 67.32385261797026, |
| "grad_norm": 2.2914531230926514, |
| "learning_rate": 0.001, |
| "loss": 1.3532, |
| "step": 208300 |
| }, |
| { |
| "epoch": 67.35617323852618, |
| "grad_norm": 2.0430986881256104, |
| "learning_rate": 0.001, |
| "loss": 1.3657, |
| "step": 208400 |
| }, |
| { |
| "epoch": 67.38849385908209, |
| "grad_norm": 2.7747621536254883, |
| "learning_rate": 0.001, |
| "loss": 1.3503, |
| "step": 208500 |
| }, |
| { |
| "epoch": 67.42081447963801, |
| "grad_norm": 2.5037589073181152, |
| "learning_rate": 0.001, |
| "loss": 1.3583, |
| "step": 208600 |
| }, |
| { |
| "epoch": 67.45313510019392, |
| "grad_norm": 1.931546926498413, |
| "learning_rate": 0.001, |
| "loss": 1.3666, |
| "step": 208700 |
| }, |
| { |
| "epoch": 67.48545572074984, |
| "grad_norm": 1.8246216773986816, |
| "learning_rate": 0.001, |
| "loss": 1.362, |
| "step": 208800 |
| }, |
| { |
| "epoch": 67.51777634130575, |
| "grad_norm": 2.764183282852173, |
| "learning_rate": 0.001, |
| "loss": 1.3682, |
| "step": 208900 |
| }, |
| { |
| "epoch": 67.55009696186167, |
| "grad_norm": 2.9155993461608887, |
| "learning_rate": 0.001, |
| "loss": 1.3689, |
| "step": 209000 |
| }, |
| { |
| "epoch": 67.58241758241758, |
| "grad_norm": 1.8072984218597412, |
| "learning_rate": 0.001, |
| "loss": 1.3907, |
| "step": 209100 |
| }, |
| { |
| "epoch": 67.6147382029735, |
| "grad_norm": 2.9374847412109375, |
| "learning_rate": 0.001, |
| "loss": 1.3778, |
| "step": 209200 |
| }, |
| { |
| "epoch": 67.6470588235294, |
| "grad_norm": 2.64780592918396, |
| "learning_rate": 0.001, |
| "loss": 1.3905, |
| "step": 209300 |
| }, |
| { |
| "epoch": 67.67937944408533, |
| "grad_norm": 2.061706304550171, |
| "learning_rate": 0.001, |
| "loss": 1.393, |
| "step": 209400 |
| }, |
| { |
| "epoch": 67.71170006464124, |
| "grad_norm": 2.0080013275146484, |
| "learning_rate": 0.001, |
| "loss": 1.3761, |
| "step": 209500 |
| }, |
| { |
| "epoch": 67.74402068519716, |
| "grad_norm": 1.6209617853164673, |
| "learning_rate": 0.001, |
| "loss": 1.3852, |
| "step": 209600 |
| }, |
| { |
| "epoch": 67.77634130575306, |
| "grad_norm": 2.044245481491089, |
| "learning_rate": 0.001, |
| "loss": 1.4122, |
| "step": 209700 |
| }, |
| { |
| "epoch": 67.80866192630899, |
| "grad_norm": 1.3390419483184814, |
| "learning_rate": 0.001, |
| "loss": 1.4145, |
| "step": 209800 |
| }, |
| { |
| "epoch": 67.8409825468649, |
| "grad_norm": 1.7002332210540771, |
| "learning_rate": 0.001, |
| "loss": 1.388, |
| "step": 209900 |
| }, |
| { |
| "epoch": 67.87330316742081, |
| "grad_norm": 3.0040740966796875, |
| "learning_rate": 0.001, |
| "loss": 1.393, |
| "step": 210000 |
| }, |
| { |
| "epoch": 67.90562378797672, |
| "grad_norm": 2.0390803813934326, |
| "learning_rate": 0.001, |
| "loss": 1.3944, |
| "step": 210100 |
| }, |
| { |
| "epoch": 67.93794440853264, |
| "grad_norm": 2.327254295349121, |
| "learning_rate": 0.001, |
| "loss": 1.4002, |
| "step": 210200 |
| }, |
| { |
| "epoch": 67.97026502908855, |
| "grad_norm": 1.9059028625488281, |
| "learning_rate": 0.001, |
| "loss": 1.3971, |
| "step": 210300 |
| }, |
| { |
| "epoch": 68.00258564964447, |
| "grad_norm": 1.3357656002044678, |
| "learning_rate": 0.001, |
| "loss": 1.4255, |
| "step": 210400 |
| }, |
| { |
| "epoch": 68.0349062702004, |
| "grad_norm": 1.5476328134536743, |
| "learning_rate": 0.001, |
| "loss": 1.3014, |
| "step": 210500 |
| }, |
| { |
| "epoch": 68.0672268907563, |
| "grad_norm": 1.7639280557632446, |
| "learning_rate": 0.001, |
| "loss": 1.3068, |
| "step": 210600 |
| }, |
| { |
| "epoch": 68.09954751131222, |
| "grad_norm": 1.6734331846237183, |
| "learning_rate": 0.001, |
| "loss": 1.3149, |
| "step": 210700 |
| }, |
| { |
| "epoch": 68.13186813186813, |
| "grad_norm": 1.776329755783081, |
| "learning_rate": 0.001, |
| "loss": 1.2975, |
| "step": 210800 |
| }, |
| { |
| "epoch": 68.16418875242405, |
| "grad_norm": 1.520376443862915, |
| "learning_rate": 0.001, |
| "loss": 1.311, |
| "step": 210900 |
| }, |
| { |
| "epoch": 68.19650937297996, |
| "grad_norm": 2.158876657485962, |
| "learning_rate": 0.001, |
| "loss": 1.3252, |
| "step": 211000 |
| }, |
| { |
| "epoch": 68.22882999353588, |
| "grad_norm": 1.4346355199813843, |
| "learning_rate": 0.001, |
| "loss": 1.3211, |
| "step": 211100 |
| }, |
| { |
| "epoch": 68.26115061409179, |
| "grad_norm": 1.6036536693572998, |
| "learning_rate": 0.001, |
| "loss": 1.3322, |
| "step": 211200 |
| }, |
| { |
| "epoch": 68.29347123464771, |
| "grad_norm": 1.9127318859100342, |
| "learning_rate": 0.001, |
| "loss": 1.3335, |
| "step": 211300 |
| }, |
| { |
| "epoch": 68.32579185520362, |
| "grad_norm": 1.553421974182129, |
| "learning_rate": 0.001, |
| "loss": 1.3424, |
| "step": 211400 |
| }, |
| { |
| "epoch": 68.35811247575954, |
| "grad_norm": 1.2245683670043945, |
| "learning_rate": 0.001, |
| "loss": 1.343, |
| "step": 211500 |
| }, |
| { |
| "epoch": 68.39043309631545, |
| "grad_norm": 1.4779609441757202, |
| "learning_rate": 0.001, |
| "loss": 1.3424, |
| "step": 211600 |
| }, |
| { |
| "epoch": 68.42275371687137, |
| "grad_norm": 1.516800045967102, |
| "learning_rate": 0.001, |
| "loss": 1.3462, |
| "step": 211700 |
| }, |
| { |
| "epoch": 68.45507433742728, |
| "grad_norm": 1.5422505140304565, |
| "learning_rate": 0.001, |
| "loss": 1.3566, |
| "step": 211800 |
| }, |
| { |
| "epoch": 68.4873949579832, |
| "grad_norm": 1.556553602218628, |
| "learning_rate": 0.001, |
| "loss": 1.3554, |
| "step": 211900 |
| }, |
| { |
| "epoch": 68.5197155785391, |
| "grad_norm": 2.1386520862579346, |
| "learning_rate": 0.001, |
| "loss": 1.3652, |
| "step": 212000 |
| }, |
| { |
| "epoch": 68.55203619909503, |
| "grad_norm": 1.5340510606765747, |
| "learning_rate": 0.001, |
| "loss": 1.3761, |
| "step": 212100 |
| }, |
| { |
| "epoch": 68.58435681965094, |
| "grad_norm": 2.1872169971466064, |
| "learning_rate": 0.001, |
| "loss": 1.3684, |
| "step": 212200 |
| }, |
| { |
| "epoch": 68.61667744020686, |
| "grad_norm": 1.7372183799743652, |
| "learning_rate": 0.001, |
| "loss": 1.3716, |
| "step": 212300 |
| }, |
| { |
| "epoch": 68.64899806076276, |
| "grad_norm": 1.9826773405075073, |
| "learning_rate": 0.001, |
| "loss": 1.3804, |
| "step": 212400 |
| }, |
| { |
| "epoch": 68.68131868131869, |
| "grad_norm": 1.1110419034957886, |
| "learning_rate": 0.001, |
| "loss": 1.3517, |
| "step": 212500 |
| }, |
| { |
| "epoch": 68.7136393018746, |
| "grad_norm": 1.7787781953811646, |
| "learning_rate": 0.001, |
| "loss": 1.3773, |
| "step": 212600 |
| }, |
| { |
| "epoch": 68.74595992243052, |
| "grad_norm": 2.0240843296051025, |
| "learning_rate": 0.001, |
| "loss": 1.385, |
| "step": 212700 |
| }, |
| { |
| "epoch": 68.77828054298642, |
| "grad_norm": 1.1772947311401367, |
| "learning_rate": 0.001, |
| "loss": 1.3794, |
| "step": 212800 |
| }, |
| { |
| "epoch": 68.81060116354234, |
| "grad_norm": 1.6726468801498413, |
| "learning_rate": 0.001, |
| "loss": 1.3895, |
| "step": 212900 |
| }, |
| { |
| "epoch": 68.84292178409825, |
| "grad_norm": 2.144679069519043, |
| "learning_rate": 0.001, |
| "loss": 1.3796, |
| "step": 213000 |
| }, |
| { |
| "epoch": 68.87524240465417, |
| "grad_norm": 1.5072028636932373, |
| "learning_rate": 0.001, |
| "loss": 1.3964, |
| "step": 213100 |
| }, |
| { |
| "epoch": 68.90756302521008, |
| "grad_norm": 1.3276915550231934, |
| "learning_rate": 0.001, |
| "loss": 1.3811, |
| "step": 213200 |
| }, |
| { |
| "epoch": 68.939883645766, |
| "grad_norm": 1.5651215314865112, |
| "learning_rate": 0.001, |
| "loss": 1.4048, |
| "step": 213300 |
| }, |
| { |
| "epoch": 68.97220426632191, |
| "grad_norm": 1.139343023300171, |
| "learning_rate": 0.001, |
| "loss": 1.3835, |
| "step": 213400 |
| }, |
| { |
| "epoch": 69.00452488687783, |
| "grad_norm": 1.4380346536636353, |
| "learning_rate": 0.001, |
| "loss": 1.4131, |
| "step": 213500 |
| }, |
| { |
| "epoch": 69.03684550743374, |
| "grad_norm": 1.3611998558044434, |
| "learning_rate": 0.001, |
| "loss": 1.2713, |
| "step": 213600 |
| }, |
| { |
| "epoch": 69.06916612798966, |
| "grad_norm": 2.2016654014587402, |
| "learning_rate": 0.001, |
| "loss": 1.2976, |
| "step": 213700 |
| }, |
| { |
| "epoch": 69.10148674854557, |
| "grad_norm": 1.313778042793274, |
| "learning_rate": 0.001, |
| "loss": 1.303, |
| "step": 213800 |
| }, |
| { |
| "epoch": 69.13380736910149, |
| "grad_norm": 1.6348943710327148, |
| "learning_rate": 0.001, |
| "loss": 1.3014, |
| "step": 213900 |
| }, |
| { |
| "epoch": 69.1661279896574, |
| "grad_norm": 1.3724998235702515, |
| "learning_rate": 0.001, |
| "loss": 1.3346, |
| "step": 214000 |
| }, |
| { |
| "epoch": 69.19844861021332, |
| "grad_norm": 1.3382467031478882, |
| "learning_rate": 0.001, |
| "loss": 1.3138, |
| "step": 214100 |
| }, |
| { |
| "epoch": 69.23076923076923, |
| "grad_norm": 1.7528189420700073, |
| "learning_rate": 0.001, |
| "loss": 1.307, |
| "step": 214200 |
| }, |
| { |
| "epoch": 69.26308985132515, |
| "grad_norm": 1.160068154335022, |
| "learning_rate": 0.001, |
| "loss": 1.3214, |
| "step": 214300 |
| }, |
| { |
| "epoch": 69.29541047188106, |
| "grad_norm": 1.3790607452392578, |
| "learning_rate": 0.001, |
| "loss": 1.3213, |
| "step": 214400 |
| }, |
| { |
| "epoch": 69.32773109243698, |
| "grad_norm": 1.7774428129196167, |
| "learning_rate": 0.001, |
| "loss": 1.3172, |
| "step": 214500 |
| }, |
| { |
| "epoch": 69.36005171299288, |
| "grad_norm": 1.3554517030715942, |
| "learning_rate": 0.001, |
| "loss": 1.3276, |
| "step": 214600 |
| }, |
| { |
| "epoch": 69.3923723335488, |
| "grad_norm": 1.5843979120254517, |
| "learning_rate": 0.001, |
| "loss": 1.3305, |
| "step": 214700 |
| }, |
| { |
| "epoch": 69.42469295410471, |
| "grad_norm": 1.7524763345718384, |
| "learning_rate": 0.001, |
| "loss": 1.3386, |
| "step": 214800 |
| }, |
| { |
| "epoch": 69.45701357466064, |
| "grad_norm": 2.0899758338928223, |
| "learning_rate": 0.001, |
| "loss": 1.3593, |
| "step": 214900 |
| }, |
| { |
| "epoch": 69.48933419521654, |
| "grad_norm": 2.218890905380249, |
| "learning_rate": 0.001, |
| "loss": 1.375, |
| "step": 215000 |
| }, |
| { |
| "epoch": 69.52165481577246, |
| "grad_norm": 1.7657575607299805, |
| "learning_rate": 0.001, |
| "loss": 1.3616, |
| "step": 215100 |
| }, |
| { |
| "epoch": 69.55397543632837, |
| "grad_norm": 1.7468805313110352, |
| "learning_rate": 0.001, |
| "loss": 1.3635, |
| "step": 215200 |
| }, |
| { |
| "epoch": 69.5862960568843, |
| "grad_norm": 2.189852714538574, |
| "learning_rate": 0.001, |
| "loss": 1.3652, |
| "step": 215300 |
| }, |
| { |
| "epoch": 69.6186166774402, |
| "grad_norm": 1.7859878540039062, |
| "learning_rate": 0.001, |
| "loss": 1.3527, |
| "step": 215400 |
| }, |
| { |
| "epoch": 69.65093729799612, |
| "grad_norm": 1.292675495147705, |
| "learning_rate": 0.001, |
| "loss": 1.3743, |
| "step": 215500 |
| }, |
| { |
| "epoch": 69.68325791855203, |
| "grad_norm": 1.5818181037902832, |
| "learning_rate": 0.001, |
| "loss": 1.3749, |
| "step": 215600 |
| }, |
| { |
| "epoch": 69.71557853910795, |
| "grad_norm": 1.57987380027771, |
| "learning_rate": 0.001, |
| "loss": 1.3835, |
| "step": 215700 |
| }, |
| { |
| "epoch": 69.74789915966386, |
| "grad_norm": 1.5561144351959229, |
| "learning_rate": 0.001, |
| "loss": 1.3734, |
| "step": 215800 |
| }, |
| { |
| "epoch": 69.78021978021978, |
| "grad_norm": 1.6584949493408203, |
| "learning_rate": 0.001, |
| "loss": 1.3633, |
| "step": 215900 |
| }, |
| { |
| "epoch": 69.81254040077569, |
| "grad_norm": 1.736039161682129, |
| "learning_rate": 0.001, |
| "loss": 1.3672, |
| "step": 216000 |
| }, |
| { |
| "epoch": 69.84486102133161, |
| "grad_norm": 1.2760789394378662, |
| "learning_rate": 0.001, |
| "loss": 1.3706, |
| "step": 216100 |
| }, |
| { |
| "epoch": 69.87718164188752, |
| "grad_norm": 1.7503100633621216, |
| "learning_rate": 0.001, |
| "loss": 1.3709, |
| "step": 216200 |
| }, |
| { |
| "epoch": 69.90950226244344, |
| "grad_norm": 1.3824061155319214, |
| "learning_rate": 0.001, |
| "loss": 1.3972, |
| "step": 216300 |
| }, |
| { |
| "epoch": 69.94182288299935, |
| "grad_norm": 1.5360379219055176, |
| "learning_rate": 0.001, |
| "loss": 1.3795, |
| "step": 216400 |
| }, |
| { |
| "epoch": 69.97414350355527, |
| "grad_norm": 1.843446135520935, |
| "learning_rate": 0.001, |
| "loss": 1.4005, |
| "step": 216500 |
| }, |
| { |
| "epoch": 70.00646412411119, |
| "grad_norm": 1.5242737531661987, |
| "learning_rate": 0.001, |
| "loss": 1.3872, |
| "step": 216600 |
| }, |
| { |
| "epoch": 70.0387847446671, |
| "grad_norm": 1.4649934768676758, |
| "learning_rate": 0.001, |
| "loss": 1.294, |
| "step": 216700 |
| }, |
| { |
| "epoch": 70.07110536522302, |
| "grad_norm": 1.3466111421585083, |
| "learning_rate": 0.001, |
| "loss": 1.2915, |
| "step": 216800 |
| }, |
| { |
| "epoch": 70.10342598577893, |
| "grad_norm": 1.0711795091629028, |
| "learning_rate": 0.001, |
| "loss": 1.2955, |
| "step": 216900 |
| }, |
| { |
| "epoch": 70.13574660633485, |
| "grad_norm": 1.2805978059768677, |
| "learning_rate": 0.001, |
| "loss": 1.2906, |
| "step": 217000 |
| }, |
| { |
| "epoch": 70.16806722689076, |
| "grad_norm": 1.4977800846099854, |
| "learning_rate": 0.001, |
| "loss": 1.3109, |
| "step": 217100 |
| }, |
| { |
| "epoch": 70.20038784744668, |
| "grad_norm": 1.2956316471099854, |
| "learning_rate": 0.001, |
| "loss": 1.2928, |
| "step": 217200 |
| }, |
| { |
| "epoch": 70.23270846800258, |
| "grad_norm": 1.3268839120864868, |
| "learning_rate": 0.001, |
| "loss": 1.3001, |
| "step": 217300 |
| }, |
| { |
| "epoch": 70.2650290885585, |
| "grad_norm": 1.3315646648406982, |
| "learning_rate": 0.001, |
| "loss": 1.3077, |
| "step": 217400 |
| }, |
| { |
| "epoch": 70.29734970911441, |
| "grad_norm": 1.6829092502593994, |
| "learning_rate": 0.001, |
| "loss": 1.3241, |
| "step": 217500 |
| }, |
| { |
| "epoch": 70.32967032967034, |
| "grad_norm": 1.6773277521133423, |
| "learning_rate": 0.001, |
| "loss": 1.3193, |
| "step": 217600 |
| }, |
| { |
| "epoch": 70.36199095022624, |
| "grad_norm": 1.407239317893982, |
| "learning_rate": 0.001, |
| "loss": 1.3409, |
| "step": 217700 |
| }, |
| { |
| "epoch": 70.39431157078216, |
| "grad_norm": 1.7320756912231445, |
| "learning_rate": 0.001, |
| "loss": 1.3263, |
| "step": 217800 |
| }, |
| { |
| "epoch": 70.42663219133807, |
| "grad_norm": 1.6086102724075317, |
| "learning_rate": 0.001, |
| "loss": 1.3269, |
| "step": 217900 |
| }, |
| { |
| "epoch": 70.458952811894, |
| "grad_norm": 1.3016129732131958, |
| "learning_rate": 0.001, |
| "loss": 1.335, |
| "step": 218000 |
| }, |
| { |
| "epoch": 70.4912734324499, |
| "grad_norm": 1.8595035076141357, |
| "learning_rate": 0.001, |
| "loss": 1.345, |
| "step": 218100 |
| }, |
| { |
| "epoch": 70.52359405300582, |
| "grad_norm": 1.5059258937835693, |
| "learning_rate": 0.001, |
| "loss": 1.3563, |
| "step": 218200 |
| }, |
| { |
| "epoch": 70.55591467356173, |
| "grad_norm": 1.7828933000564575, |
| "learning_rate": 0.001, |
| "loss": 1.3489, |
| "step": 218300 |
| }, |
| { |
| "epoch": 70.58823529411765, |
| "grad_norm": 1.5474156141281128, |
| "learning_rate": 0.001, |
| "loss": 1.3549, |
| "step": 218400 |
| }, |
| { |
| "epoch": 70.62055591467356, |
| "grad_norm": 1.276524305343628, |
| "learning_rate": 0.001, |
| "loss": 1.3553, |
| "step": 218500 |
| }, |
| { |
| "epoch": 70.65287653522948, |
| "grad_norm": 1.3445316553115845, |
| "learning_rate": 0.001, |
| "loss": 1.3473, |
| "step": 218600 |
| }, |
| { |
| "epoch": 70.68519715578539, |
| "grad_norm": 1.4307173490524292, |
| "learning_rate": 0.001, |
| "loss": 1.358, |
| "step": 218700 |
| }, |
| { |
| "epoch": 70.71751777634131, |
| "grad_norm": 1.488200068473816, |
| "learning_rate": 0.001, |
| "loss": 1.354, |
| "step": 218800 |
| }, |
| { |
| "epoch": 70.74983839689722, |
| "grad_norm": 1.5671435594558716, |
| "learning_rate": 0.001, |
| "loss": 1.3533, |
| "step": 218900 |
| }, |
| { |
| "epoch": 70.78215901745314, |
| "grad_norm": 1.243046760559082, |
| "learning_rate": 0.001, |
| "loss": 1.3584, |
| "step": 219000 |
| }, |
| { |
| "epoch": 70.81447963800905, |
| "grad_norm": 1.329303503036499, |
| "learning_rate": 0.001, |
| "loss": 1.3644, |
| "step": 219100 |
| }, |
| { |
| "epoch": 70.84680025856497, |
| "grad_norm": 1.2586477994918823, |
| "learning_rate": 0.001, |
| "loss": 1.3829, |
| "step": 219200 |
| }, |
| { |
| "epoch": 70.87912087912088, |
| "grad_norm": 1.5427042245864868, |
| "learning_rate": 0.001, |
| "loss": 1.3736, |
| "step": 219300 |
| }, |
| { |
| "epoch": 70.9114414996768, |
| "grad_norm": 1.883329153060913, |
| "learning_rate": 0.001, |
| "loss": 1.3865, |
| "step": 219400 |
| }, |
| { |
| "epoch": 70.9437621202327, |
| "grad_norm": 1.3526417016983032, |
| "learning_rate": 0.001, |
| "loss": 1.3863, |
| "step": 219500 |
| }, |
| { |
| "epoch": 70.97608274078863, |
| "grad_norm": 1.1791620254516602, |
| "learning_rate": 0.001, |
| "loss": 1.3817, |
| "step": 219600 |
| }, |
| { |
| "epoch": 71.00840336134453, |
| "grad_norm": 1.4550600051879883, |
| "learning_rate": 0.001, |
| "loss": 1.3615, |
| "step": 219700 |
| }, |
| { |
| "epoch": 71.04072398190046, |
| "grad_norm": 1.37946355342865, |
| "learning_rate": 0.001, |
| "loss": 1.2663, |
| "step": 219800 |
| }, |
| { |
| "epoch": 71.07304460245636, |
| "grad_norm": 1.4288150072097778, |
| "learning_rate": 0.001, |
| "loss": 1.2903, |
| "step": 219900 |
| }, |
| { |
| "epoch": 71.10536522301229, |
| "grad_norm": 1.9936612844467163, |
| "learning_rate": 0.001, |
| "loss": 1.2918, |
| "step": 220000 |
| }, |
| { |
| "epoch": 71.13768584356819, |
| "grad_norm": 1.492072343826294, |
| "learning_rate": 0.001, |
| "loss": 1.2894, |
| "step": 220100 |
| }, |
| { |
| "epoch": 71.17000646412411, |
| "grad_norm": 1.6720198392868042, |
| "learning_rate": 0.001, |
| "loss": 1.2904, |
| "step": 220200 |
| }, |
| { |
| "epoch": 71.20232708468002, |
| "grad_norm": 1.508172631263733, |
| "learning_rate": 0.001, |
| "loss": 1.3064, |
| "step": 220300 |
| }, |
| { |
| "epoch": 71.23464770523594, |
| "grad_norm": 1.0777677297592163, |
| "learning_rate": 0.001, |
| "loss": 1.2844, |
| "step": 220400 |
| }, |
| { |
| "epoch": 71.26696832579185, |
| "grad_norm": 1.7317595481872559, |
| "learning_rate": 0.001, |
| "loss": 1.3043, |
| "step": 220500 |
| }, |
| { |
| "epoch": 71.29928894634777, |
| "grad_norm": 1.95646333694458, |
| "learning_rate": 0.001, |
| "loss": 1.2934, |
| "step": 220600 |
| }, |
| { |
| "epoch": 71.33160956690368, |
| "grad_norm": 1.57399320602417, |
| "learning_rate": 0.001, |
| "loss": 1.3161, |
| "step": 220700 |
| }, |
| { |
| "epoch": 71.3639301874596, |
| "grad_norm": 1.6432753801345825, |
| "learning_rate": 0.001, |
| "loss": 1.318, |
| "step": 220800 |
| }, |
| { |
| "epoch": 71.39625080801551, |
| "grad_norm": 1.5056768655776978, |
| "learning_rate": 0.001, |
| "loss": 1.3235, |
| "step": 220900 |
| }, |
| { |
| "epoch": 71.42857142857143, |
| "grad_norm": 1.7427054643630981, |
| "learning_rate": 0.001, |
| "loss": 1.3308, |
| "step": 221000 |
| }, |
| { |
| "epoch": 71.46089204912734, |
| "grad_norm": 1.5216237306594849, |
| "learning_rate": 0.001, |
| "loss": 1.3267, |
| "step": 221100 |
| }, |
| { |
| "epoch": 71.49321266968326, |
| "grad_norm": 1.37506902217865, |
| "learning_rate": 0.001, |
| "loss": 1.3201, |
| "step": 221200 |
| }, |
| { |
| "epoch": 71.52553329023917, |
| "grad_norm": 4.7747111320495605, |
| "learning_rate": 0.001, |
| "loss": 1.3388, |
| "step": 221300 |
| }, |
| { |
| "epoch": 71.55785391079509, |
| "grad_norm": 1.690388798713684, |
| "learning_rate": 0.001, |
| "loss": 1.3483, |
| "step": 221400 |
| }, |
| { |
| "epoch": 71.590174531351, |
| "grad_norm": 1.5094951391220093, |
| "learning_rate": 0.001, |
| "loss": 1.332, |
| "step": 221500 |
| }, |
| { |
| "epoch": 71.62249515190692, |
| "grad_norm": 1.6856282949447632, |
| "learning_rate": 0.001, |
| "loss": 1.3219, |
| "step": 221600 |
| }, |
| { |
| "epoch": 71.65481577246283, |
| "grad_norm": 1.7640798091888428, |
| "learning_rate": 0.001, |
| "loss": 1.3439, |
| "step": 221700 |
| }, |
| { |
| "epoch": 71.68713639301875, |
| "grad_norm": 1.9872925281524658, |
| "learning_rate": 0.001, |
| "loss": 1.3587, |
| "step": 221800 |
| }, |
| { |
| "epoch": 71.71945701357465, |
| "grad_norm": 2.0306122303009033, |
| "learning_rate": 0.001, |
| "loss": 1.3454, |
| "step": 221900 |
| }, |
| { |
| "epoch": 71.75177763413058, |
| "grad_norm": 1.7729268074035645, |
| "learning_rate": 0.001, |
| "loss": 1.3727, |
| "step": 222000 |
| }, |
| { |
| "epoch": 71.78409825468648, |
| "grad_norm": 1.2778888940811157, |
| "learning_rate": 0.001, |
| "loss": 1.3573, |
| "step": 222100 |
| }, |
| { |
| "epoch": 71.8164188752424, |
| "grad_norm": 1.673628807067871, |
| "learning_rate": 0.001, |
| "loss": 1.3682, |
| "step": 222200 |
| }, |
| { |
| "epoch": 71.84873949579831, |
| "grad_norm": 1.5933386087417603, |
| "learning_rate": 0.001, |
| "loss": 1.3731, |
| "step": 222300 |
| }, |
| { |
| "epoch": 71.88106011635423, |
| "grad_norm": 1.3162115812301636, |
| "learning_rate": 0.001, |
| "loss": 1.373, |
| "step": 222400 |
| }, |
| { |
| "epoch": 71.91338073691014, |
| "grad_norm": 1.569196105003357, |
| "learning_rate": 0.001, |
| "loss": 1.3633, |
| "step": 222500 |
| }, |
| { |
| "epoch": 71.94570135746606, |
| "grad_norm": 1.152018666267395, |
| "learning_rate": 0.001, |
| "loss": 1.3793, |
| "step": 222600 |
| }, |
| { |
| "epoch": 71.97802197802197, |
| "grad_norm": 1.4814441204071045, |
| "learning_rate": 0.001, |
| "loss": 1.3901, |
| "step": 222700 |
| }, |
| { |
| "epoch": 72.01034259857789, |
| "grad_norm": 1.4163578748703003, |
| "learning_rate": 0.001, |
| "loss": 1.3384, |
| "step": 222800 |
| }, |
| { |
| "epoch": 72.04266321913381, |
| "grad_norm": 1.1956467628479004, |
| "learning_rate": 0.001, |
| "loss": 1.2723, |
| "step": 222900 |
| }, |
| { |
| "epoch": 72.07498383968972, |
| "grad_norm": 1.5386581420898438, |
| "learning_rate": 0.001, |
| "loss": 1.2844, |
| "step": 223000 |
| }, |
| { |
| "epoch": 72.10730446024564, |
| "grad_norm": 1.6779135465621948, |
| "learning_rate": 0.001, |
| "loss": 1.2884, |
| "step": 223100 |
| }, |
| { |
| "epoch": 72.13962508080155, |
| "grad_norm": 1.844859004020691, |
| "learning_rate": 0.001, |
| "loss": 1.2919, |
| "step": 223200 |
| }, |
| { |
| "epoch": 72.17194570135747, |
| "grad_norm": 1.4852981567382812, |
| "learning_rate": 0.001, |
| "loss": 1.2842, |
| "step": 223300 |
| }, |
| { |
| "epoch": 72.20426632191338, |
| "grad_norm": 1.4666519165039062, |
| "learning_rate": 0.001, |
| "loss": 1.2886, |
| "step": 223400 |
| }, |
| { |
| "epoch": 72.2365869424693, |
| "grad_norm": 1.517770528793335, |
| "learning_rate": 0.001, |
| "loss": 1.2893, |
| "step": 223500 |
| }, |
| { |
| "epoch": 72.26890756302521, |
| "grad_norm": 1.9516693353652954, |
| "learning_rate": 0.001, |
| "loss": 1.2859, |
| "step": 223600 |
| }, |
| { |
| "epoch": 72.30122818358113, |
| "grad_norm": 1.8538799285888672, |
| "learning_rate": 0.001, |
| "loss": 1.3131, |
| "step": 223700 |
| }, |
| { |
| "epoch": 72.33354880413704, |
| "grad_norm": 1.3030380010604858, |
| "learning_rate": 0.001, |
| "loss": 1.3164, |
| "step": 223800 |
| }, |
| { |
| "epoch": 72.36586942469296, |
| "grad_norm": 1.5888701677322388, |
| "learning_rate": 0.001, |
| "loss": 1.3132, |
| "step": 223900 |
| }, |
| { |
| "epoch": 72.39819004524887, |
| "grad_norm": 1.2761156558990479, |
| "learning_rate": 0.001, |
| "loss": 1.3071, |
| "step": 224000 |
| }, |
| { |
| "epoch": 72.43051066580479, |
| "grad_norm": 1.4118460416793823, |
| "learning_rate": 0.001, |
| "loss": 1.3191, |
| "step": 224100 |
| }, |
| { |
| "epoch": 72.4628312863607, |
| "grad_norm": 1.9316996335983276, |
| "learning_rate": 0.001, |
| "loss": 1.3, |
| "step": 224200 |
| }, |
| { |
| "epoch": 72.49515190691662, |
| "grad_norm": 1.3990607261657715, |
| "learning_rate": 0.001, |
| "loss": 1.3264, |
| "step": 224300 |
| }, |
| { |
| "epoch": 72.52747252747253, |
| "grad_norm": 2.499032497406006, |
| "learning_rate": 0.001, |
| "loss": 1.3209, |
| "step": 224400 |
| }, |
| { |
| "epoch": 72.55979314802845, |
| "grad_norm": 1.4380666017532349, |
| "learning_rate": 0.001, |
| "loss": 1.3185, |
| "step": 224500 |
| }, |
| { |
| "epoch": 72.59211376858435, |
| "grad_norm": 2.1750357151031494, |
| "learning_rate": 0.001, |
| "loss": 1.3474, |
| "step": 224600 |
| }, |
| { |
| "epoch": 72.62443438914028, |
| "grad_norm": 1.3416081666946411, |
| "learning_rate": 0.001, |
| "loss": 1.3386, |
| "step": 224700 |
| }, |
| { |
| "epoch": 72.65675500969618, |
| "grad_norm": 1.6465892791748047, |
| "learning_rate": 0.001, |
| "loss": 1.3424, |
| "step": 224800 |
| }, |
| { |
| "epoch": 72.6890756302521, |
| "grad_norm": 2.0284931659698486, |
| "learning_rate": 0.001, |
| "loss": 1.3505, |
| "step": 224900 |
| }, |
| { |
| "epoch": 72.72139625080801, |
| "grad_norm": 1.6026378870010376, |
| "learning_rate": 0.001, |
| "loss": 1.343, |
| "step": 225000 |
| }, |
| { |
| "epoch": 72.75371687136393, |
| "grad_norm": 1.9068299531936646, |
| "learning_rate": 0.001, |
| "loss": 1.3451, |
| "step": 225100 |
| }, |
| { |
| "epoch": 72.78603749191984, |
| "grad_norm": 1.76893949508667, |
| "learning_rate": 0.001, |
| "loss": 1.354, |
| "step": 225200 |
| }, |
| { |
| "epoch": 72.81835811247576, |
| "grad_norm": 1.2659997940063477, |
| "learning_rate": 0.001, |
| "loss": 1.3381, |
| "step": 225300 |
| }, |
| { |
| "epoch": 72.85067873303167, |
| "grad_norm": 1.6921870708465576, |
| "learning_rate": 0.001, |
| "loss": 1.3553, |
| "step": 225400 |
| }, |
| { |
| "epoch": 72.88299935358759, |
| "grad_norm": 1.335923433303833, |
| "learning_rate": 0.001, |
| "loss": 1.3625, |
| "step": 225500 |
| }, |
| { |
| "epoch": 72.9153199741435, |
| "grad_norm": 1.394056797027588, |
| "learning_rate": 0.001, |
| "loss": 1.3426, |
| "step": 225600 |
| }, |
| { |
| "epoch": 72.94764059469942, |
| "grad_norm": 1.6350864171981812, |
| "learning_rate": 0.001, |
| "loss": 1.3675, |
| "step": 225700 |
| }, |
| { |
| "epoch": 72.97996121525533, |
| "grad_norm": 1.704758882522583, |
| "learning_rate": 0.001, |
| "loss": 1.3789, |
| "step": 225800 |
| }, |
| { |
| "epoch": 73.01228183581125, |
| "grad_norm": 1.4936609268188477, |
| "learning_rate": 0.001, |
| "loss": 1.316, |
| "step": 225900 |
| }, |
| { |
| "epoch": 73.04460245636716, |
| "grad_norm": 1.3105096817016602, |
| "learning_rate": 0.001, |
| "loss": 1.249, |
| "step": 226000 |
| }, |
| { |
| "epoch": 73.07692307692308, |
| "grad_norm": 1.5214767456054688, |
| "learning_rate": 0.001, |
| "loss": 1.2599, |
| "step": 226100 |
| }, |
| { |
| "epoch": 73.10924369747899, |
| "grad_norm": 1.9506388902664185, |
| "learning_rate": 0.001, |
| "loss": 1.2607, |
| "step": 226200 |
| }, |
| { |
| "epoch": 73.14156431803491, |
| "grad_norm": 1.7868854999542236, |
| "learning_rate": 0.001, |
| "loss": 1.2781, |
| "step": 226300 |
| }, |
| { |
| "epoch": 73.17388493859082, |
| "grad_norm": 1.758193016052246, |
| "learning_rate": 0.001, |
| "loss": 1.2745, |
| "step": 226400 |
| }, |
| { |
| "epoch": 73.20620555914674, |
| "grad_norm": 1.6787816286087036, |
| "learning_rate": 0.001, |
| "loss": 1.2754, |
| "step": 226500 |
| }, |
| { |
| "epoch": 73.23852617970265, |
| "grad_norm": 1.638461709022522, |
| "learning_rate": 0.001, |
| "loss": 1.2815, |
| "step": 226600 |
| }, |
| { |
| "epoch": 73.27084680025857, |
| "grad_norm": 1.415128469467163, |
| "learning_rate": 0.001, |
| "loss": 1.3014, |
| "step": 226700 |
| }, |
| { |
| "epoch": 73.30316742081448, |
| "grad_norm": 1.5924972295761108, |
| "learning_rate": 0.001, |
| "loss": 1.2916, |
| "step": 226800 |
| }, |
| { |
| "epoch": 73.3354880413704, |
| "grad_norm": 1.4342584609985352, |
| "learning_rate": 0.001, |
| "loss": 1.3054, |
| "step": 226900 |
| }, |
| { |
| "epoch": 73.3678086619263, |
| "grad_norm": 1.7077903747558594, |
| "learning_rate": 0.001, |
| "loss": 1.3224, |
| "step": 227000 |
| }, |
| { |
| "epoch": 73.40012928248223, |
| "grad_norm": 1.7451516389846802, |
| "learning_rate": 0.001, |
| "loss": 1.3114, |
| "step": 227100 |
| }, |
| { |
| "epoch": 73.43244990303813, |
| "grad_norm": 1.8831264972686768, |
| "learning_rate": 0.001, |
| "loss": 1.3046, |
| "step": 227200 |
| }, |
| { |
| "epoch": 73.46477052359405, |
| "grad_norm": 1.3531708717346191, |
| "learning_rate": 0.001, |
| "loss": 1.3241, |
| "step": 227300 |
| }, |
| { |
| "epoch": 73.49709114414996, |
| "grad_norm": 1.7281534671783447, |
| "learning_rate": 0.001, |
| "loss": 1.3101, |
| "step": 227400 |
| }, |
| { |
| "epoch": 73.52941176470588, |
| "grad_norm": 2.001070737838745, |
| "learning_rate": 0.001, |
| "loss": 1.3167, |
| "step": 227500 |
| }, |
| { |
| "epoch": 73.56173238526179, |
| "grad_norm": 2.234375, |
| "learning_rate": 0.001, |
| "loss": 1.3032, |
| "step": 227600 |
| }, |
| { |
| "epoch": 73.59405300581771, |
| "grad_norm": 1.3920390605926514, |
| "learning_rate": 0.001, |
| "loss": 1.3134, |
| "step": 227700 |
| }, |
| { |
| "epoch": 73.62637362637362, |
| "grad_norm": 1.7844200134277344, |
| "learning_rate": 0.001, |
| "loss": 1.332, |
| "step": 227800 |
| }, |
| { |
| "epoch": 73.65869424692954, |
| "grad_norm": 1.3945051431655884, |
| "learning_rate": 0.001, |
| "loss": 1.3281, |
| "step": 227900 |
| }, |
| { |
| "epoch": 73.69101486748545, |
| "grad_norm": 2.268733024597168, |
| "learning_rate": 0.001, |
| "loss": 1.3415, |
| "step": 228000 |
| }, |
| { |
| "epoch": 73.72333548804137, |
| "grad_norm": 1.682381510734558, |
| "learning_rate": 0.001, |
| "loss": 1.3424, |
| "step": 228100 |
| }, |
| { |
| "epoch": 73.75565610859728, |
| "grad_norm": 1.4384807348251343, |
| "learning_rate": 0.001, |
| "loss": 1.344, |
| "step": 228200 |
| }, |
| { |
| "epoch": 73.7879767291532, |
| "grad_norm": 1.9348750114440918, |
| "learning_rate": 0.001, |
| "loss": 1.3405, |
| "step": 228300 |
| }, |
| { |
| "epoch": 73.82029734970911, |
| "grad_norm": 1.895323634147644, |
| "learning_rate": 0.001, |
| "loss": 1.3485, |
| "step": 228400 |
| }, |
| { |
| "epoch": 73.85261797026503, |
| "grad_norm": 1.6878876686096191, |
| "learning_rate": 0.001, |
| "loss": 1.342, |
| "step": 228500 |
| }, |
| { |
| "epoch": 73.88493859082094, |
| "grad_norm": 1.4254131317138672, |
| "learning_rate": 0.001, |
| "loss": 1.3479, |
| "step": 228600 |
| }, |
| { |
| "epoch": 73.91725921137686, |
| "grad_norm": 2.2063348293304443, |
| "learning_rate": 0.001, |
| "loss": 1.3644, |
| "step": 228700 |
| }, |
| { |
| "epoch": 73.94957983193277, |
| "grad_norm": 1.7034088373184204, |
| "learning_rate": 0.001, |
| "loss": 1.3651, |
| "step": 228800 |
| }, |
| { |
| "epoch": 73.98190045248869, |
| "grad_norm": 1.8378410339355469, |
| "learning_rate": 0.001, |
| "loss": 1.3628, |
| "step": 228900 |
| }, |
| { |
| "epoch": 74.01422107304461, |
| "grad_norm": 1.528680443763733, |
| "learning_rate": 0.001, |
| "loss": 1.2987, |
| "step": 229000 |
| }, |
| { |
| "epoch": 74.04654169360052, |
| "grad_norm": 2.586549997329712, |
| "learning_rate": 0.001, |
| "loss": 1.2445, |
| "step": 229100 |
| }, |
| { |
| "epoch": 74.07886231415644, |
| "grad_norm": 1.8505322933197021, |
| "learning_rate": 0.001, |
| "loss": 1.259, |
| "step": 229200 |
| }, |
| { |
| "epoch": 74.11118293471235, |
| "grad_norm": 2.220876932144165, |
| "learning_rate": 0.001, |
| "loss": 1.2778, |
| "step": 229300 |
| }, |
| { |
| "epoch": 74.14350355526827, |
| "grad_norm": 2.3290762901306152, |
| "learning_rate": 0.001, |
| "loss": 1.2634, |
| "step": 229400 |
| }, |
| { |
| "epoch": 74.17582417582418, |
| "grad_norm": 1.517318844795227, |
| "learning_rate": 0.001, |
| "loss": 1.26, |
| "step": 229500 |
| }, |
| { |
| "epoch": 74.2081447963801, |
| "grad_norm": 1.7899667024612427, |
| "learning_rate": 0.001, |
| "loss": 1.2895, |
| "step": 229600 |
| }, |
| { |
| "epoch": 74.240465416936, |
| "grad_norm": 1.6911143064498901, |
| "learning_rate": 0.001, |
| "loss": 1.2937, |
| "step": 229700 |
| }, |
| { |
| "epoch": 74.27278603749193, |
| "grad_norm": 2.1663761138916016, |
| "learning_rate": 0.001, |
| "loss": 1.2691, |
| "step": 229800 |
| }, |
| { |
| "epoch": 74.30510665804783, |
| "grad_norm": 1.930943250656128, |
| "learning_rate": 0.001, |
| "loss": 1.2927, |
| "step": 229900 |
| }, |
| { |
| "epoch": 74.33742727860376, |
| "grad_norm": 1.7484216690063477, |
| "learning_rate": 0.001, |
| "loss": 1.2918, |
| "step": 230000 |
| }, |
| { |
| "epoch": 74.36974789915966, |
| "grad_norm": 2.6189985275268555, |
| "learning_rate": 0.001, |
| "loss": 1.2971, |
| "step": 230100 |
| }, |
| { |
| "epoch": 74.40206851971558, |
| "grad_norm": 1.7392945289611816, |
| "learning_rate": 0.001, |
| "loss": 1.3088, |
| "step": 230200 |
| }, |
| { |
| "epoch": 74.43438914027149, |
| "grad_norm": 1.9195913076400757, |
| "learning_rate": 0.001, |
| "loss": 1.3174, |
| "step": 230300 |
| }, |
| { |
| "epoch": 74.46670976082741, |
| "grad_norm": 2.06931209564209, |
| "learning_rate": 0.001, |
| "loss": 1.3164, |
| "step": 230400 |
| }, |
| { |
| "epoch": 74.49903038138332, |
| "grad_norm": 2.035064697265625, |
| "learning_rate": 0.001, |
| "loss": 1.3102, |
| "step": 230500 |
| }, |
| { |
| "epoch": 74.53135100193924, |
| "grad_norm": 1.3531410694122314, |
| "learning_rate": 0.001, |
| "loss": 1.3013, |
| "step": 230600 |
| }, |
| { |
| "epoch": 74.56367162249515, |
| "grad_norm": 2.2259304523468018, |
| "learning_rate": 0.001, |
| "loss": 1.296, |
| "step": 230700 |
| }, |
| { |
| "epoch": 74.59599224305107, |
| "grad_norm": 1.3927483558654785, |
| "learning_rate": 0.001, |
| "loss": 1.3141, |
| "step": 230800 |
| }, |
| { |
| "epoch": 74.62831286360698, |
| "grad_norm": 1.8264931440353394, |
| "learning_rate": 0.001, |
| "loss": 1.3226, |
| "step": 230900 |
| }, |
| { |
| "epoch": 74.6606334841629, |
| "grad_norm": 1.607061743736267, |
| "learning_rate": 0.001, |
| "loss": 1.33, |
| "step": 231000 |
| }, |
| { |
| "epoch": 74.69295410471881, |
| "grad_norm": 1.7336506843566895, |
| "learning_rate": 0.001, |
| "loss": 1.3326, |
| "step": 231100 |
| }, |
| { |
| "epoch": 74.72527472527473, |
| "grad_norm": 2.0042037963867188, |
| "learning_rate": 0.001, |
| "loss": 1.3291, |
| "step": 231200 |
| }, |
| { |
| "epoch": 74.75759534583064, |
| "grad_norm": 1.6352319717407227, |
| "learning_rate": 0.001, |
| "loss": 1.3266, |
| "step": 231300 |
| }, |
| { |
| "epoch": 74.78991596638656, |
| "grad_norm": 1.8481652736663818, |
| "learning_rate": 0.001, |
| "loss": 1.346, |
| "step": 231400 |
| }, |
| { |
| "epoch": 74.82223658694247, |
| "grad_norm": 2.5028653144836426, |
| "learning_rate": 0.001, |
| "loss": 1.3339, |
| "step": 231500 |
| }, |
| { |
| "epoch": 74.85455720749839, |
| "grad_norm": 1.7493265867233276, |
| "learning_rate": 0.001, |
| "loss": 1.3218, |
| "step": 231600 |
| }, |
| { |
| "epoch": 74.8868778280543, |
| "grad_norm": 2.155993700027466, |
| "learning_rate": 0.001, |
| "loss": 1.3362, |
| "step": 231700 |
| }, |
| { |
| "epoch": 74.91919844861022, |
| "grad_norm": 1.4427071809768677, |
| "learning_rate": 0.001, |
| "loss": 1.3435, |
| "step": 231800 |
| }, |
| { |
| "epoch": 74.95151906916612, |
| "grad_norm": 2.04618763923645, |
| "learning_rate": 0.001, |
| "loss": 1.3363, |
| "step": 231900 |
| }, |
| { |
| "epoch": 74.98383968972205, |
| "grad_norm": 2.1985716819763184, |
| "learning_rate": 0.001, |
| "loss": 1.3433, |
| "step": 232000 |
| }, |
| { |
| "epoch": 75.01616031027795, |
| "grad_norm": 1.8995875120162964, |
| "learning_rate": 0.001, |
| "loss": 1.2547, |
| "step": 232100 |
| }, |
| { |
| "epoch": 75.04848093083388, |
| "grad_norm": 1.994150996208191, |
| "learning_rate": 0.001, |
| "loss": 1.2323, |
| "step": 232200 |
| }, |
| { |
| "epoch": 75.08080155138978, |
| "grad_norm": 2.4287238121032715, |
| "learning_rate": 0.001, |
| "loss": 1.2461, |
| "step": 232300 |
| }, |
| { |
| "epoch": 75.1131221719457, |
| "grad_norm": 2.103858232498169, |
| "learning_rate": 0.001, |
| "loss": 1.2589, |
| "step": 232400 |
| }, |
| { |
| "epoch": 75.14544279250161, |
| "grad_norm": 2.1579859256744385, |
| "learning_rate": 0.001, |
| "loss": 1.2667, |
| "step": 232500 |
| }, |
| { |
| "epoch": 75.17776341305753, |
| "grad_norm": 2.698798894882202, |
| "learning_rate": 0.001, |
| "loss": 1.2723, |
| "step": 232600 |
| }, |
| { |
| "epoch": 75.21008403361344, |
| "grad_norm": 1.7385883331298828, |
| "learning_rate": 0.001, |
| "loss": 1.2695, |
| "step": 232700 |
| }, |
| { |
| "epoch": 75.24240465416936, |
| "grad_norm": 2.474719524383545, |
| "learning_rate": 0.001, |
| "loss": 1.2692, |
| "step": 232800 |
| }, |
| { |
| "epoch": 75.27472527472527, |
| "grad_norm": 2.216207265853882, |
| "learning_rate": 0.001, |
| "loss": 1.2677, |
| "step": 232900 |
| }, |
| { |
| "epoch": 75.30704589528119, |
| "grad_norm": 2.280482053756714, |
| "learning_rate": 0.001, |
| "loss": 1.272, |
| "step": 233000 |
| }, |
| { |
| "epoch": 75.3393665158371, |
| "grad_norm": 1.7663888931274414, |
| "learning_rate": 0.001, |
| "loss": 1.2754, |
| "step": 233100 |
| }, |
| { |
| "epoch": 75.37168713639302, |
| "grad_norm": 2.109699249267578, |
| "learning_rate": 0.001, |
| "loss": 1.2942, |
| "step": 233200 |
| }, |
| { |
| "epoch": 75.40400775694893, |
| "grad_norm": 1.941871166229248, |
| "learning_rate": 0.001, |
| "loss": 1.2891, |
| "step": 233300 |
| }, |
| { |
| "epoch": 75.43632837750485, |
| "grad_norm": 2.5241806507110596, |
| "learning_rate": 0.001, |
| "loss": 1.2873, |
| "step": 233400 |
| }, |
| { |
| "epoch": 75.46864899806076, |
| "grad_norm": 2.4268338680267334, |
| "learning_rate": 0.001, |
| "loss": 1.303, |
| "step": 233500 |
| }, |
| { |
| "epoch": 75.50096961861668, |
| "grad_norm": 2.0684986114501953, |
| "learning_rate": 0.001, |
| "loss": 1.2946, |
| "step": 233600 |
| }, |
| { |
| "epoch": 75.53329023917259, |
| "grad_norm": 2.0574164390563965, |
| "learning_rate": 0.001, |
| "loss": 1.3119, |
| "step": 233700 |
| }, |
| { |
| "epoch": 75.56561085972851, |
| "grad_norm": 2.067401647567749, |
| "learning_rate": 0.001, |
| "loss": 1.3042, |
| "step": 233800 |
| }, |
| { |
| "epoch": 75.59793148028442, |
| "grad_norm": 2.7004194259643555, |
| "learning_rate": 0.001, |
| "loss": 1.3117, |
| "step": 233900 |
| }, |
| { |
| "epoch": 75.63025210084034, |
| "grad_norm": 2.0863163471221924, |
| "learning_rate": 0.001, |
| "loss": 1.3333, |
| "step": 234000 |
| }, |
| { |
| "epoch": 75.66257272139624, |
| "grad_norm": 1.922520399093628, |
| "learning_rate": 0.001, |
| "loss": 1.3118, |
| "step": 234100 |
| }, |
| { |
| "epoch": 75.69489334195217, |
| "grad_norm": 2.3876094818115234, |
| "learning_rate": 0.001, |
| "loss": 1.3296, |
| "step": 234200 |
| }, |
| { |
| "epoch": 75.72721396250807, |
| "grad_norm": 2.248257875442505, |
| "learning_rate": 0.001, |
| "loss": 1.3328, |
| "step": 234300 |
| }, |
| { |
| "epoch": 75.759534583064, |
| "grad_norm": 2.286165237426758, |
| "learning_rate": 0.001, |
| "loss": 1.3248, |
| "step": 234400 |
| }, |
| { |
| "epoch": 75.7918552036199, |
| "grad_norm": 2.5652499198913574, |
| "learning_rate": 0.001, |
| "loss": 1.3373, |
| "step": 234500 |
| }, |
| { |
| "epoch": 75.82417582417582, |
| "grad_norm": 2.686047077178955, |
| "learning_rate": 0.001, |
| "loss": 1.3372, |
| "step": 234600 |
| }, |
| { |
| "epoch": 75.85649644473173, |
| "grad_norm": 1.6270856857299805, |
| "learning_rate": 0.001, |
| "loss": 1.3453, |
| "step": 234700 |
| }, |
| { |
| "epoch": 75.88881706528765, |
| "grad_norm": 2.0927953720092773, |
| "learning_rate": 0.001, |
| "loss": 1.3365, |
| "step": 234800 |
| }, |
| { |
| "epoch": 75.92113768584356, |
| "grad_norm": 2.6588659286499023, |
| "learning_rate": 0.001, |
| "loss": 1.3346, |
| "step": 234900 |
| }, |
| { |
| "epoch": 75.95345830639948, |
| "grad_norm": 1.880811095237732, |
| "learning_rate": 0.001, |
| "loss": 1.32, |
| "step": 235000 |
| }, |
| { |
| "epoch": 75.98577892695539, |
| "grad_norm": 2.332777261734009, |
| "learning_rate": 0.001, |
| "loss": 1.3514, |
| "step": 235100 |
| }, |
| { |
| "epoch": 76.01809954751131, |
| "grad_norm": 1.4552125930786133, |
| "learning_rate": 0.001, |
| "loss": 1.2992, |
| "step": 235200 |
| }, |
| { |
| "epoch": 76.05042016806723, |
| "grad_norm": 1.6631439924240112, |
| "learning_rate": 0.001, |
| "loss": 1.229, |
| "step": 235300 |
| }, |
| { |
| "epoch": 76.08274078862314, |
| "grad_norm": 1.6396737098693848, |
| "learning_rate": 0.001, |
| "loss": 1.2473, |
| "step": 235400 |
| }, |
| { |
| "epoch": 76.11506140917906, |
| "grad_norm": 1.4893487691879272, |
| "learning_rate": 0.001, |
| "loss": 1.251, |
| "step": 235500 |
| }, |
| { |
| "epoch": 76.14738202973497, |
| "grad_norm": 1.5711896419525146, |
| "learning_rate": 0.001, |
| "loss": 1.2428, |
| "step": 235600 |
| }, |
| { |
| "epoch": 76.17970265029089, |
| "grad_norm": 1.5022379159927368, |
| "learning_rate": 0.001, |
| "loss": 1.2523, |
| "step": 235700 |
| }, |
| { |
| "epoch": 76.2120232708468, |
| "grad_norm": 1.4917467832565308, |
| "learning_rate": 0.001, |
| "loss": 1.2816, |
| "step": 235800 |
| }, |
| { |
| "epoch": 76.24434389140272, |
| "grad_norm": 2.1330008506774902, |
| "learning_rate": 0.001, |
| "loss": 1.2446, |
| "step": 235900 |
| }, |
| { |
| "epoch": 76.27666451195863, |
| "grad_norm": 1.9323478937149048, |
| "learning_rate": 0.001, |
| "loss": 1.2773, |
| "step": 236000 |
| }, |
| { |
| "epoch": 76.30898513251455, |
| "grad_norm": 1.798542857170105, |
| "learning_rate": 0.001, |
| "loss": 1.2836, |
| "step": 236100 |
| }, |
| { |
| "epoch": 76.34130575307046, |
| "grad_norm": 1.9975454807281494, |
| "learning_rate": 0.001, |
| "loss": 1.2751, |
| "step": 236200 |
| }, |
| { |
| "epoch": 76.37362637362638, |
| "grad_norm": 1.5190203189849854, |
| "learning_rate": 0.001, |
| "loss": 1.2687, |
| "step": 236300 |
| }, |
| { |
| "epoch": 76.40594699418229, |
| "grad_norm": 2.050550699234009, |
| "learning_rate": 0.001, |
| "loss": 1.2856, |
| "step": 236400 |
| }, |
| { |
| "epoch": 76.43826761473821, |
| "grad_norm": 1.8917027711868286, |
| "learning_rate": 0.001, |
| "loss": 1.2778, |
| "step": 236500 |
| }, |
| { |
| "epoch": 76.47058823529412, |
| "grad_norm": 1.359777808189392, |
| "learning_rate": 0.001, |
| "loss": 1.2847, |
| "step": 236600 |
| }, |
| { |
| "epoch": 76.50290885585004, |
| "grad_norm": 1.5652052164077759, |
| "learning_rate": 0.001, |
| "loss": 1.301, |
| "step": 236700 |
| }, |
| { |
| "epoch": 76.53522947640595, |
| "grad_norm": 2.3575685024261475, |
| "learning_rate": 0.001, |
| "loss": 1.3116, |
| "step": 236800 |
| }, |
| { |
| "epoch": 76.56755009696187, |
| "grad_norm": 1.5174378156661987, |
| "learning_rate": 0.001, |
| "loss": 1.2913, |
| "step": 236900 |
| }, |
| { |
| "epoch": 76.59987071751777, |
| "grad_norm": 1.8151758909225464, |
| "learning_rate": 0.001, |
| "loss": 1.2957, |
| "step": 237000 |
| }, |
| { |
| "epoch": 76.6321913380737, |
| "grad_norm": 2.0352110862731934, |
| "learning_rate": 0.001, |
| "loss": 1.3008, |
| "step": 237100 |
| }, |
| { |
| "epoch": 76.6645119586296, |
| "grad_norm": 1.5290433168411255, |
| "learning_rate": 0.001, |
| "loss": 1.3001, |
| "step": 237200 |
| }, |
| { |
| "epoch": 76.69683257918552, |
| "grad_norm": 1.6776798963546753, |
| "learning_rate": 0.001, |
| "loss": 1.2941, |
| "step": 237300 |
| }, |
| { |
| "epoch": 76.72915319974143, |
| "grad_norm": 1.7184401750564575, |
| "learning_rate": 0.001, |
| "loss": 1.3165, |
| "step": 237400 |
| }, |
| { |
| "epoch": 76.76147382029735, |
| "grad_norm": 1.907509446144104, |
| "learning_rate": 0.001, |
| "loss": 1.3158, |
| "step": 237500 |
| }, |
| { |
| "epoch": 76.79379444085326, |
| "grad_norm": 2.0751779079437256, |
| "learning_rate": 0.001, |
| "loss": 1.3261, |
| "step": 237600 |
| }, |
| { |
| "epoch": 76.82611506140918, |
| "grad_norm": 2.162783145904541, |
| "learning_rate": 0.001, |
| "loss": 1.3363, |
| "step": 237700 |
| }, |
| { |
| "epoch": 76.85843568196509, |
| "grad_norm": 1.6640735864639282, |
| "learning_rate": 0.001, |
| "loss": 1.3185, |
| "step": 237800 |
| }, |
| { |
| "epoch": 76.89075630252101, |
| "grad_norm": 1.5273408889770508, |
| "learning_rate": 0.001, |
| "loss": 1.3487, |
| "step": 237900 |
| }, |
| { |
| "epoch": 76.92307692307692, |
| "grad_norm": 1.5014934539794922, |
| "learning_rate": 0.001, |
| "loss": 1.34, |
| "step": 238000 |
| }, |
| { |
| "epoch": 76.95539754363284, |
| "grad_norm": 1.6896687746047974, |
| "learning_rate": 0.001, |
| "loss": 1.3323, |
| "step": 238100 |
| }, |
| { |
| "epoch": 76.98771816418875, |
| "grad_norm": 1.7303470373153687, |
| "learning_rate": 0.001, |
| "loss": 1.3495, |
| "step": 238200 |
| }, |
| { |
| "epoch": 77.02003878474467, |
| "grad_norm": 1.8442621231079102, |
| "learning_rate": 0.001, |
| "loss": 1.2774, |
| "step": 238300 |
| }, |
| { |
| "epoch": 77.05235940530058, |
| "grad_norm": 1.4819287061691284, |
| "learning_rate": 0.001, |
| "loss": 1.2315, |
| "step": 238400 |
| }, |
| { |
| "epoch": 77.0846800258565, |
| "grad_norm": 1.1895514726638794, |
| "learning_rate": 0.001, |
| "loss": 1.2365, |
| "step": 238500 |
| }, |
| { |
| "epoch": 77.11700064641241, |
| "grad_norm": 1.6040140390396118, |
| "learning_rate": 0.001, |
| "loss": 1.2295, |
| "step": 238600 |
| }, |
| { |
| "epoch": 77.14932126696833, |
| "grad_norm": 1.6429595947265625, |
| "learning_rate": 0.001, |
| "loss": 1.2461, |
| "step": 238700 |
| }, |
| { |
| "epoch": 77.18164188752424, |
| "grad_norm": 2.7958550453186035, |
| "learning_rate": 0.001, |
| "loss": 1.2478, |
| "step": 238800 |
| }, |
| { |
| "epoch": 77.21396250808016, |
| "grad_norm": 1.813547968864441, |
| "learning_rate": 0.001, |
| "loss": 1.2632, |
| "step": 238900 |
| }, |
| { |
| "epoch": 77.24628312863607, |
| "grad_norm": 1.5957227945327759, |
| "learning_rate": 0.001, |
| "loss": 1.2692, |
| "step": 239000 |
| }, |
| { |
| "epoch": 77.27860374919199, |
| "grad_norm": 2.054250955581665, |
| "learning_rate": 0.001, |
| "loss": 1.2543, |
| "step": 239100 |
| }, |
| { |
| "epoch": 77.3109243697479, |
| "grad_norm": 1.2453227043151855, |
| "learning_rate": 0.001, |
| "loss": 1.273, |
| "step": 239200 |
| }, |
| { |
| "epoch": 77.34324499030382, |
| "grad_norm": 1.8191041946411133, |
| "learning_rate": 0.001, |
| "loss": 1.2668, |
| "step": 239300 |
| }, |
| { |
| "epoch": 77.37556561085972, |
| "grad_norm": 1.5118216276168823, |
| "learning_rate": 0.001, |
| "loss": 1.2659, |
| "step": 239400 |
| }, |
| { |
| "epoch": 77.40788623141565, |
| "grad_norm": 1.0511139631271362, |
| "learning_rate": 0.001, |
| "loss": 1.2665, |
| "step": 239500 |
| }, |
| { |
| "epoch": 77.44020685197155, |
| "grad_norm": 1.2704646587371826, |
| "learning_rate": 0.001, |
| "loss": 1.2857, |
| "step": 239600 |
| }, |
| { |
| "epoch": 77.47252747252747, |
| "grad_norm": 2.2298457622528076, |
| "learning_rate": 0.001, |
| "loss": 1.2797, |
| "step": 239700 |
| }, |
| { |
| "epoch": 77.50484809308338, |
| "grad_norm": 1.5113145112991333, |
| "learning_rate": 0.001, |
| "loss": 1.2943, |
| "step": 239800 |
| }, |
| { |
| "epoch": 77.5371687136393, |
| "grad_norm": 1.9049075841903687, |
| "learning_rate": 0.001, |
| "loss": 1.287, |
| "step": 239900 |
| }, |
| { |
| "epoch": 77.56948933419521, |
| "grad_norm": 1.4290504455566406, |
| "learning_rate": 0.001, |
| "loss": 1.2926, |
| "step": 240000 |
| }, |
| { |
| "epoch": 77.60180995475113, |
| "grad_norm": 2.210345983505249, |
| "learning_rate": 0.001, |
| "loss": 1.2926, |
| "step": 240100 |
| }, |
| { |
| "epoch": 77.63413057530704, |
| "grad_norm": 1.477892518043518, |
| "learning_rate": 0.001, |
| "loss": 1.294, |
| "step": 240200 |
| }, |
| { |
| "epoch": 77.66645119586296, |
| "grad_norm": 1.285657525062561, |
| "learning_rate": 0.001, |
| "loss": 1.3229, |
| "step": 240300 |
| }, |
| { |
| "epoch": 77.69877181641887, |
| "grad_norm": 1.651516079902649, |
| "learning_rate": 0.001, |
| "loss": 1.2973, |
| "step": 240400 |
| }, |
| { |
| "epoch": 77.73109243697479, |
| "grad_norm": 1.464377760887146, |
| "learning_rate": 0.001, |
| "loss": 1.2994, |
| "step": 240500 |
| }, |
| { |
| "epoch": 77.7634130575307, |
| "grad_norm": 1.3478065729141235, |
| "learning_rate": 0.001, |
| "loss": 1.319, |
| "step": 240600 |
| }, |
| { |
| "epoch": 77.79573367808662, |
| "grad_norm": 1.2298150062561035, |
| "learning_rate": 0.001, |
| "loss": 1.3226, |
| "step": 240700 |
| }, |
| { |
| "epoch": 77.82805429864253, |
| "grad_norm": 2.020278215408325, |
| "learning_rate": 0.001, |
| "loss": 1.3284, |
| "step": 240800 |
| }, |
| { |
| "epoch": 77.86037491919845, |
| "grad_norm": 1.2401459217071533, |
| "learning_rate": 0.001, |
| "loss": 1.3085, |
| "step": 240900 |
| }, |
| { |
| "epoch": 77.89269553975436, |
| "grad_norm": 1.4142014980316162, |
| "learning_rate": 0.001, |
| "loss": 1.3168, |
| "step": 241000 |
| }, |
| { |
| "epoch": 77.92501616031028, |
| "grad_norm": 1.6408050060272217, |
| "learning_rate": 0.001, |
| "loss": 1.3243, |
| "step": 241100 |
| }, |
| { |
| "epoch": 77.95733678086619, |
| "grad_norm": 1.4897146224975586, |
| "learning_rate": 0.001, |
| "loss": 1.3336, |
| "step": 241200 |
| }, |
| { |
| "epoch": 77.98965740142211, |
| "grad_norm": 2.0061614513397217, |
| "learning_rate": 0.001, |
| "loss": 1.3167, |
| "step": 241300 |
| }, |
| { |
| "epoch": 78.02197802197803, |
| "grad_norm": 1.3673760890960693, |
| "learning_rate": 0.001, |
| "loss": 1.2673, |
| "step": 241400 |
| }, |
| { |
| "epoch": 78.05429864253394, |
| "grad_norm": 1.8199971914291382, |
| "learning_rate": 0.001, |
| "loss": 1.2247, |
| "step": 241500 |
| }, |
| { |
| "epoch": 78.08661926308986, |
| "grad_norm": 1.2587751150131226, |
| "learning_rate": 0.001, |
| "loss": 1.2225, |
| "step": 241600 |
| }, |
| { |
| "epoch": 78.11893988364577, |
| "grad_norm": 1.8833216428756714, |
| "learning_rate": 0.001, |
| "loss": 1.2309, |
| "step": 241700 |
| }, |
| { |
| "epoch": 78.15126050420169, |
| "grad_norm": 1.4985368251800537, |
| "learning_rate": 0.001, |
| "loss": 1.2471, |
| "step": 241800 |
| }, |
| { |
| "epoch": 78.1835811247576, |
| "grad_norm": 1.5621936321258545, |
| "learning_rate": 0.001, |
| "loss": 1.2422, |
| "step": 241900 |
| }, |
| { |
| "epoch": 78.21590174531352, |
| "grad_norm": 1.7414886951446533, |
| "learning_rate": 0.001, |
| "loss": 1.2474, |
| "step": 242000 |
| }, |
| { |
| "epoch": 78.24822236586942, |
| "grad_norm": 1.4623531103134155, |
| "learning_rate": 0.001, |
| "loss": 1.2563, |
| "step": 242100 |
| }, |
| { |
| "epoch": 78.28054298642535, |
| "grad_norm": 1.414461374282837, |
| "learning_rate": 0.001, |
| "loss": 1.265, |
| "step": 242200 |
| }, |
| { |
| "epoch": 78.31286360698125, |
| "grad_norm": 1.269479513168335, |
| "learning_rate": 0.001, |
| "loss": 1.2619, |
| "step": 242300 |
| }, |
| { |
| "epoch": 78.34518422753717, |
| "grad_norm": 1.8102675676345825, |
| "learning_rate": 0.001, |
| "loss": 1.2651, |
| "step": 242400 |
| }, |
| { |
| "epoch": 78.37750484809308, |
| "grad_norm": 1.960545539855957, |
| "learning_rate": 0.001, |
| "loss": 1.2483, |
| "step": 242500 |
| }, |
| { |
| "epoch": 78.409825468649, |
| "grad_norm": 1.1952399015426636, |
| "learning_rate": 0.001, |
| "loss": 1.2729, |
| "step": 242600 |
| }, |
| { |
| "epoch": 78.44214608920491, |
| "grad_norm": 1.357187032699585, |
| "learning_rate": 0.001, |
| "loss": 1.2678, |
| "step": 242700 |
| }, |
| { |
| "epoch": 78.47446670976083, |
| "grad_norm": 1.668985366821289, |
| "learning_rate": 0.001, |
| "loss": 1.2721, |
| "step": 242800 |
| }, |
| { |
| "epoch": 78.50678733031674, |
| "grad_norm": 1.272112250328064, |
| "learning_rate": 0.001, |
| "loss": 1.2756, |
| "step": 242900 |
| }, |
| { |
| "epoch": 78.53910795087266, |
| "grad_norm": 1.2903515100479126, |
| "learning_rate": 0.001, |
| "loss": 1.2804, |
| "step": 243000 |
| }, |
| { |
| "epoch": 78.57142857142857, |
| "grad_norm": 1.7097840309143066, |
| "learning_rate": 0.001, |
| "loss": 1.2845, |
| "step": 243100 |
| }, |
| { |
| "epoch": 78.60374919198449, |
| "grad_norm": 1.4061191082000732, |
| "learning_rate": 0.001, |
| "loss": 1.2745, |
| "step": 243200 |
| }, |
| { |
| "epoch": 78.6360698125404, |
| "grad_norm": 1.44053053855896, |
| "learning_rate": 0.001, |
| "loss": 1.2889, |
| "step": 243300 |
| }, |
| { |
| "epoch": 78.66839043309632, |
| "grad_norm": 2.1334850788116455, |
| "learning_rate": 0.001, |
| "loss": 1.282, |
| "step": 243400 |
| }, |
| { |
| "epoch": 78.70071105365223, |
| "grad_norm": 1.7297238111495972, |
| "learning_rate": 0.001, |
| "loss": 1.295, |
| "step": 243500 |
| }, |
| { |
| "epoch": 78.73303167420815, |
| "grad_norm": 1.1532487869262695, |
| "learning_rate": 0.001, |
| "loss": 1.299, |
| "step": 243600 |
| }, |
| { |
| "epoch": 78.76535229476406, |
| "grad_norm": 2.166877031326294, |
| "learning_rate": 0.001, |
| "loss": 1.3134, |
| "step": 243700 |
| }, |
| { |
| "epoch": 78.79767291531998, |
| "grad_norm": 1.2987929582595825, |
| "learning_rate": 0.001, |
| "loss": 1.3171, |
| "step": 243800 |
| }, |
| { |
| "epoch": 78.82999353587589, |
| "grad_norm": 1.4848920106887817, |
| "learning_rate": 0.001, |
| "loss": 1.3014, |
| "step": 243900 |
| }, |
| { |
| "epoch": 78.86231415643181, |
| "grad_norm": 1.5554513931274414, |
| "learning_rate": 0.001, |
| "loss": 1.3108, |
| "step": 244000 |
| }, |
| { |
| "epoch": 78.89463477698771, |
| "grad_norm": 1.750722885131836, |
| "learning_rate": 0.001, |
| "loss": 1.3258, |
| "step": 244100 |
| }, |
| { |
| "epoch": 78.92695539754364, |
| "grad_norm": 1.9449609518051147, |
| "learning_rate": 0.001, |
| "loss": 1.3133, |
| "step": 244200 |
| }, |
| { |
| "epoch": 78.95927601809954, |
| "grad_norm": 1.9386721849441528, |
| "learning_rate": 0.001, |
| "loss": 1.3233, |
| "step": 244300 |
| }, |
| { |
| "epoch": 78.99159663865547, |
| "grad_norm": 1.9014012813568115, |
| "learning_rate": 0.001, |
| "loss": 1.3221, |
| "step": 244400 |
| }, |
| { |
| "epoch": 79.02391725921137, |
| "grad_norm": 1.2058799266815186, |
| "learning_rate": 0.001, |
| "loss": 1.236, |
| "step": 244500 |
| }, |
| { |
| "epoch": 79.0562378797673, |
| "grad_norm": 1.2706985473632812, |
| "learning_rate": 0.001, |
| "loss": 1.2129, |
| "step": 244600 |
| }, |
| { |
| "epoch": 79.0885585003232, |
| "grad_norm": 1.4728243350982666, |
| "learning_rate": 0.001, |
| "loss": 1.2243, |
| "step": 244700 |
| }, |
| { |
| "epoch": 79.12087912087912, |
| "grad_norm": 1.2134689092636108, |
| "learning_rate": 0.001, |
| "loss": 1.2325, |
| "step": 244800 |
| }, |
| { |
| "epoch": 79.15319974143503, |
| "grad_norm": 1.6454464197158813, |
| "learning_rate": 0.001, |
| "loss": 1.2395, |
| "step": 244900 |
| }, |
| { |
| "epoch": 79.18552036199095, |
| "grad_norm": 1.9662758111953735, |
| "learning_rate": 0.001, |
| "loss": 1.2388, |
| "step": 245000 |
| }, |
| { |
| "epoch": 79.21784098254686, |
| "grad_norm": 1.4569859504699707, |
| "learning_rate": 0.001, |
| "loss": 1.2291, |
| "step": 245100 |
| }, |
| { |
| "epoch": 79.25016160310278, |
| "grad_norm": 1.729219675064087, |
| "learning_rate": 0.001, |
| "loss": 1.249, |
| "step": 245200 |
| }, |
| { |
| "epoch": 79.28248222365869, |
| "grad_norm": 1.807348608970642, |
| "learning_rate": 0.001, |
| "loss": 1.2396, |
| "step": 245300 |
| }, |
| { |
| "epoch": 79.31480284421461, |
| "grad_norm": 1.5521453619003296, |
| "learning_rate": 0.001, |
| "loss": 1.2442, |
| "step": 245400 |
| }, |
| { |
| "epoch": 79.34712346477052, |
| "grad_norm": 2.2156636714935303, |
| "learning_rate": 0.001, |
| "loss": 1.2616, |
| "step": 245500 |
| }, |
| { |
| "epoch": 79.37944408532644, |
| "grad_norm": 1.3331085443496704, |
| "learning_rate": 0.001, |
| "loss": 1.2644, |
| "step": 245600 |
| }, |
| { |
| "epoch": 79.41176470588235, |
| "grad_norm": 1.475218415260315, |
| "learning_rate": 0.001, |
| "loss": 1.2595, |
| "step": 245700 |
| }, |
| { |
| "epoch": 79.44408532643827, |
| "grad_norm": 1.450708031654358, |
| "learning_rate": 0.001, |
| "loss": 1.2726, |
| "step": 245800 |
| }, |
| { |
| "epoch": 79.47640594699418, |
| "grad_norm": 1.3240978717803955, |
| "learning_rate": 0.001, |
| "loss": 1.2772, |
| "step": 245900 |
| }, |
| { |
| "epoch": 79.5087265675501, |
| "grad_norm": 2.481502056121826, |
| "learning_rate": 0.001, |
| "loss": 1.264, |
| "step": 246000 |
| }, |
| { |
| "epoch": 79.541047188106, |
| "grad_norm": 2.0001509189605713, |
| "learning_rate": 0.001, |
| "loss": 1.2684, |
| "step": 246100 |
| }, |
| { |
| "epoch": 79.57336780866193, |
| "grad_norm": 1.6823323965072632, |
| "learning_rate": 0.001, |
| "loss": 1.2738, |
| "step": 246200 |
| }, |
| { |
| "epoch": 79.60568842921784, |
| "grad_norm": 1.9127426147460938, |
| "learning_rate": 0.001, |
| "loss": 1.2745, |
| "step": 246300 |
| }, |
| { |
| "epoch": 79.63800904977376, |
| "grad_norm": 2.290731430053711, |
| "learning_rate": 0.001, |
| "loss": 1.2999, |
| "step": 246400 |
| }, |
| { |
| "epoch": 79.67032967032966, |
| "grad_norm": 1.918789029121399, |
| "learning_rate": 0.001, |
| "loss": 1.2915, |
| "step": 246500 |
| }, |
| { |
| "epoch": 79.70265029088559, |
| "grad_norm": 1.8735263347625732, |
| "learning_rate": 0.001, |
| "loss": 1.2894, |
| "step": 246600 |
| }, |
| { |
| "epoch": 79.7349709114415, |
| "grad_norm": 1.90142023563385, |
| "learning_rate": 0.001, |
| "loss": 1.2924, |
| "step": 246700 |
| }, |
| { |
| "epoch": 79.76729153199742, |
| "grad_norm": 1.792066216468811, |
| "learning_rate": 0.001, |
| "loss": 1.2807, |
| "step": 246800 |
| }, |
| { |
| "epoch": 79.79961215255332, |
| "grad_norm": 1.2293639183044434, |
| "learning_rate": 0.001, |
| "loss": 1.2857, |
| "step": 246900 |
| }, |
| { |
| "epoch": 79.83193277310924, |
| "grad_norm": 1.5063929557800293, |
| "learning_rate": 0.001, |
| "loss": 1.3013, |
| "step": 247000 |
| }, |
| { |
| "epoch": 79.86425339366515, |
| "grad_norm": 1.3179506063461304, |
| "learning_rate": 0.001, |
| "loss": 1.2913, |
| "step": 247100 |
| }, |
| { |
| "epoch": 79.89657401422107, |
| "grad_norm": 1.338300108909607, |
| "learning_rate": 0.001, |
| "loss": 1.3201, |
| "step": 247200 |
| }, |
| { |
| "epoch": 79.92889463477698, |
| "grad_norm": 1.3150955438613892, |
| "learning_rate": 0.001, |
| "loss": 1.3067, |
| "step": 247300 |
| }, |
| { |
| "epoch": 79.9612152553329, |
| "grad_norm": 1.5713889598846436, |
| "learning_rate": 0.001, |
| "loss": 1.3337, |
| "step": 247400 |
| }, |
| { |
| "epoch": 79.99353587588882, |
| "grad_norm": 1.7930541038513184, |
| "learning_rate": 0.001, |
| "loss": 1.3121, |
| "step": 247500 |
| }, |
| { |
| "epoch": 80.02585649644473, |
| "grad_norm": 1.610605001449585, |
| "learning_rate": 0.001, |
| "loss": 1.2352, |
| "step": 247600 |
| }, |
| { |
| "epoch": 80.05817711700065, |
| "grad_norm": 2.0419564247131348, |
| "learning_rate": 0.001, |
| "loss": 1.2096, |
| "step": 247700 |
| }, |
| { |
| "epoch": 80.09049773755656, |
| "grad_norm": 2.3417627811431885, |
| "learning_rate": 0.001, |
| "loss": 1.2112, |
| "step": 247800 |
| }, |
| { |
| "epoch": 80.12281835811248, |
| "grad_norm": 1.5265817642211914, |
| "learning_rate": 0.001, |
| "loss": 1.2217, |
| "step": 247900 |
| }, |
| { |
| "epoch": 80.15513897866839, |
| "grad_norm": 1.7095226049423218, |
| "learning_rate": 0.001, |
| "loss": 1.2166, |
| "step": 248000 |
| }, |
| { |
| "epoch": 80.18745959922431, |
| "grad_norm": 1.6318248510360718, |
| "learning_rate": 0.001, |
| "loss": 1.2286, |
| "step": 248100 |
| }, |
| { |
| "epoch": 80.21978021978022, |
| "grad_norm": 1.8089474439620972, |
| "learning_rate": 0.001, |
| "loss": 1.2317, |
| "step": 248200 |
| }, |
| { |
| "epoch": 80.25210084033614, |
| "grad_norm": 1.3798012733459473, |
| "learning_rate": 0.001, |
| "loss": 1.2327, |
| "step": 248300 |
| }, |
| { |
| "epoch": 80.28442146089205, |
| "grad_norm": 1.7488353252410889, |
| "learning_rate": 0.001, |
| "loss": 1.2438, |
| "step": 248400 |
| }, |
| { |
| "epoch": 80.31674208144797, |
| "grad_norm": 1.488607406616211, |
| "learning_rate": 0.001, |
| "loss": 1.25, |
| "step": 248500 |
| }, |
| { |
| "epoch": 80.34906270200388, |
| "grad_norm": 2.1224374771118164, |
| "learning_rate": 0.001, |
| "loss": 1.2424, |
| "step": 248600 |
| }, |
| { |
| "epoch": 80.3813833225598, |
| "grad_norm": 1.66317880153656, |
| "learning_rate": 0.001, |
| "loss": 1.2541, |
| "step": 248700 |
| }, |
| { |
| "epoch": 80.4137039431157, |
| "grad_norm": 1.8994399309158325, |
| "learning_rate": 0.001, |
| "loss": 1.2567, |
| "step": 248800 |
| }, |
| { |
| "epoch": 80.44602456367163, |
| "grad_norm": 1.2667827606201172, |
| "learning_rate": 0.001, |
| "loss": 1.2605, |
| "step": 248900 |
| }, |
| { |
| "epoch": 80.47834518422754, |
| "grad_norm": 1.685808539390564, |
| "learning_rate": 0.001, |
| "loss": 1.2557, |
| "step": 249000 |
| }, |
| { |
| "epoch": 80.51066580478346, |
| "grad_norm": 1.3293232917785645, |
| "learning_rate": 0.001, |
| "loss": 1.2535, |
| "step": 249100 |
| }, |
| { |
| "epoch": 80.54298642533936, |
| "grad_norm": 2.5228679180145264, |
| "learning_rate": 0.001, |
| "loss": 1.273, |
| "step": 249200 |
| }, |
| { |
| "epoch": 80.57530704589529, |
| "grad_norm": 1.449995756149292, |
| "learning_rate": 0.001, |
| "loss": 1.2904, |
| "step": 249300 |
| }, |
| { |
| "epoch": 80.6076276664512, |
| "grad_norm": 1.6519827842712402, |
| "learning_rate": 0.001, |
| "loss": 1.2546, |
| "step": 249400 |
| }, |
| { |
| "epoch": 80.63994828700712, |
| "grad_norm": 1.2943912744522095, |
| "learning_rate": 0.001, |
| "loss": 1.278, |
| "step": 249500 |
| }, |
| { |
| "epoch": 80.67226890756302, |
| "grad_norm": 1.3893470764160156, |
| "learning_rate": 0.001, |
| "loss": 1.2863, |
| "step": 249600 |
| }, |
| { |
| "epoch": 80.70458952811894, |
| "grad_norm": 1.8279205560684204, |
| "learning_rate": 0.001, |
| "loss": 1.287, |
| "step": 249700 |
| }, |
| { |
| "epoch": 80.73691014867485, |
| "grad_norm": 1.4569382667541504, |
| "learning_rate": 0.001, |
| "loss": 1.2794, |
| "step": 249800 |
| }, |
| { |
| "epoch": 80.76923076923077, |
| "grad_norm": 1.452245831489563, |
| "learning_rate": 0.001, |
| "loss": 1.2973, |
| "step": 249900 |
| }, |
| { |
| "epoch": 80.80155138978668, |
| "grad_norm": 1.566039800643921, |
| "learning_rate": 0.001, |
| "loss": 1.2916, |
| "step": 250000 |
| }, |
| { |
| "epoch": 80.8338720103426, |
| "grad_norm": 1.5842382907867432, |
| "learning_rate": 0.001, |
| "loss": 1.2887, |
| "step": 250100 |
| }, |
| { |
| "epoch": 80.86619263089851, |
| "grad_norm": 1.286839246749878, |
| "learning_rate": 0.001, |
| "loss": 1.2955, |
| "step": 250200 |
| }, |
| { |
| "epoch": 80.89851325145443, |
| "grad_norm": 1.8135418891906738, |
| "learning_rate": 0.001, |
| "loss": 1.2965, |
| "step": 250300 |
| }, |
| { |
| "epoch": 80.93083387201034, |
| "grad_norm": 1.5368353128433228, |
| "learning_rate": 0.001, |
| "loss": 1.2948, |
| "step": 250400 |
| }, |
| { |
| "epoch": 80.96315449256626, |
| "grad_norm": 1.3691718578338623, |
| "learning_rate": 0.001, |
| "loss": 1.3264, |
| "step": 250500 |
| }, |
| { |
| "epoch": 80.99547511312217, |
| "grad_norm": 1.9810742139816284, |
| "learning_rate": 0.001, |
| "loss": 1.2892, |
| "step": 250600 |
| }, |
| { |
| "epoch": 81.02779573367809, |
| "grad_norm": 1.7202798128128052, |
| "learning_rate": 0.001, |
| "loss": 1.2013, |
| "step": 250700 |
| }, |
| { |
| "epoch": 81.060116354234, |
| "grad_norm": 1.5454028844833374, |
| "learning_rate": 0.001, |
| "loss": 1.205, |
| "step": 250800 |
| }, |
| { |
| "epoch": 81.09243697478992, |
| "grad_norm": 1.8991540670394897, |
| "learning_rate": 0.001, |
| "loss": 1.2132, |
| "step": 250900 |
| }, |
| { |
| "epoch": 81.12475759534583, |
| "grad_norm": 1.3878988027572632, |
| "learning_rate": 0.001, |
| "loss": 1.2278, |
| "step": 251000 |
| }, |
| { |
| "epoch": 81.15707821590175, |
| "grad_norm": 1.6878595352172852, |
| "learning_rate": 0.001, |
| "loss": 1.2232, |
| "step": 251100 |
| }, |
| { |
| "epoch": 81.18939883645766, |
| "grad_norm": 1.8622406721115112, |
| "learning_rate": 0.001, |
| "loss": 1.2336, |
| "step": 251200 |
| }, |
| { |
| "epoch": 81.22171945701358, |
| "grad_norm": 1.7215933799743652, |
| "learning_rate": 0.001, |
| "loss": 1.2248, |
| "step": 251300 |
| }, |
| { |
| "epoch": 81.25404007756948, |
| "grad_norm": 1.5063960552215576, |
| "learning_rate": 0.001, |
| "loss": 1.2253, |
| "step": 251400 |
| }, |
| { |
| "epoch": 81.2863606981254, |
| "grad_norm": 1.4453834295272827, |
| "learning_rate": 0.001, |
| "loss": 1.23, |
| "step": 251500 |
| }, |
| { |
| "epoch": 81.31868131868131, |
| "grad_norm": 2.0164635181427, |
| "learning_rate": 0.001, |
| "loss": 1.2498, |
| "step": 251600 |
| }, |
| { |
| "epoch": 81.35100193923724, |
| "grad_norm": 1.4843008518218994, |
| "learning_rate": 0.001, |
| "loss": 1.213, |
| "step": 251700 |
| }, |
| { |
| "epoch": 81.38332255979314, |
| "grad_norm": 1.3511826992034912, |
| "learning_rate": 0.001, |
| "loss": 1.2365, |
| "step": 251800 |
| }, |
| { |
| "epoch": 81.41564318034906, |
| "grad_norm": 1.81965970993042, |
| "learning_rate": 0.001, |
| "loss": 1.2525, |
| "step": 251900 |
| }, |
| { |
| "epoch": 81.44796380090497, |
| "grad_norm": 1.1546568870544434, |
| "learning_rate": 0.001, |
| "loss": 1.2577, |
| "step": 252000 |
| }, |
| { |
| "epoch": 81.4802844214609, |
| "grad_norm": 1.674634575843811, |
| "learning_rate": 0.001, |
| "loss": 1.2677, |
| "step": 252100 |
| }, |
| { |
| "epoch": 81.5126050420168, |
| "grad_norm": 1.6290024518966675, |
| "learning_rate": 0.001, |
| "loss": 1.248, |
| "step": 252200 |
| }, |
| { |
| "epoch": 81.54492566257272, |
| "grad_norm": 1.4020588397979736, |
| "learning_rate": 0.001, |
| "loss": 1.258, |
| "step": 252300 |
| }, |
| { |
| "epoch": 81.57724628312863, |
| "grad_norm": 1.9796793460845947, |
| "learning_rate": 0.001, |
| "loss": 1.266, |
| "step": 252400 |
| }, |
| { |
| "epoch": 81.60956690368455, |
| "grad_norm": 2.3626046180725098, |
| "learning_rate": 0.001, |
| "loss": 1.287, |
| "step": 252500 |
| }, |
| { |
| "epoch": 81.64188752424046, |
| "grad_norm": 1.972294807434082, |
| "learning_rate": 0.001, |
| "loss": 1.2659, |
| "step": 252600 |
| }, |
| { |
| "epoch": 81.67420814479638, |
| "grad_norm": 1.8454577922821045, |
| "learning_rate": 0.001, |
| "loss": 1.2686, |
| "step": 252700 |
| }, |
| { |
| "epoch": 81.70652876535229, |
| "grad_norm": 1.5707216262817383, |
| "learning_rate": 0.001, |
| "loss": 1.2645, |
| "step": 252800 |
| }, |
| { |
| "epoch": 81.73884938590821, |
| "grad_norm": 2.1897661685943604, |
| "learning_rate": 0.001, |
| "loss": 1.2955, |
| "step": 252900 |
| }, |
| { |
| "epoch": 81.77117000646412, |
| "grad_norm": 2.0576090812683105, |
| "learning_rate": 0.001, |
| "loss": 1.2879, |
| "step": 253000 |
| }, |
| { |
| "epoch": 81.80349062702004, |
| "grad_norm": 1.7895479202270508, |
| "learning_rate": 0.001, |
| "loss": 1.2828, |
| "step": 253100 |
| }, |
| { |
| "epoch": 81.83581124757595, |
| "grad_norm": 2.119438886642456, |
| "learning_rate": 0.001, |
| "loss": 1.2859, |
| "step": 253200 |
| }, |
| { |
| "epoch": 81.86813186813187, |
| "grad_norm": 1.8811088800430298, |
| "learning_rate": 0.001, |
| "loss": 1.3123, |
| "step": 253300 |
| }, |
| { |
| "epoch": 81.90045248868778, |
| "grad_norm": 1.4409716129302979, |
| "learning_rate": 0.001, |
| "loss": 1.2987, |
| "step": 253400 |
| }, |
| { |
| "epoch": 81.9327731092437, |
| "grad_norm": 1.7243212461471558, |
| "learning_rate": 0.001, |
| "loss": 1.2764, |
| "step": 253500 |
| }, |
| { |
| "epoch": 81.9650937297996, |
| "grad_norm": 1.9806982278823853, |
| "learning_rate": 0.001, |
| "loss": 1.2895, |
| "step": 253600 |
| }, |
| { |
| "epoch": 81.99741435035553, |
| "grad_norm": 1.9793446063995361, |
| "learning_rate": 0.001, |
| "loss": 1.2932, |
| "step": 253700 |
| }, |
| { |
| "epoch": 82.02973497091145, |
| "grad_norm": 1.556868553161621, |
| "learning_rate": 0.001, |
| "loss": 1.2066, |
| "step": 253800 |
| }, |
| { |
| "epoch": 82.06205559146736, |
| "grad_norm": 2.289224147796631, |
| "learning_rate": 0.001, |
| "loss": 1.1951, |
| "step": 253900 |
| }, |
| { |
| "epoch": 82.09437621202328, |
| "grad_norm": 2.0421366691589355, |
| "learning_rate": 0.001, |
| "loss": 1.1905, |
| "step": 254000 |
| }, |
| { |
| "epoch": 82.12669683257919, |
| "grad_norm": 1.501111388206482, |
| "learning_rate": 0.001, |
| "loss": 1.2058, |
| "step": 254100 |
| }, |
| { |
| "epoch": 82.1590174531351, |
| "grad_norm": 1.870614767074585, |
| "learning_rate": 0.001, |
| "loss": 1.2213, |
| "step": 254200 |
| }, |
| { |
| "epoch": 82.19133807369101, |
| "grad_norm": 1.9706679582595825, |
| "learning_rate": 0.001, |
| "loss": 1.2093, |
| "step": 254300 |
| }, |
| { |
| "epoch": 82.22365869424694, |
| "grad_norm": 1.7341151237487793, |
| "learning_rate": 0.001, |
| "loss": 1.2193, |
| "step": 254400 |
| }, |
| { |
| "epoch": 82.25597931480284, |
| "grad_norm": 1.816436529159546, |
| "learning_rate": 0.001, |
| "loss": 1.2146, |
| "step": 254500 |
| }, |
| { |
| "epoch": 82.28829993535876, |
| "grad_norm": 1.837285041809082, |
| "learning_rate": 0.001, |
| "loss": 1.2314, |
| "step": 254600 |
| }, |
| { |
| "epoch": 82.32062055591467, |
| "grad_norm": 1.6776964664459229, |
| "learning_rate": 0.001, |
| "loss": 1.222, |
| "step": 254700 |
| }, |
| { |
| "epoch": 82.3529411764706, |
| "grad_norm": 1.6069055795669556, |
| "learning_rate": 0.001, |
| "loss": 1.2429, |
| "step": 254800 |
| }, |
| { |
| "epoch": 82.3852617970265, |
| "grad_norm": 1.5101720094680786, |
| "learning_rate": 0.001, |
| "loss": 1.2366, |
| "step": 254900 |
| }, |
| { |
| "epoch": 82.41758241758242, |
| "grad_norm": 2.5875556468963623, |
| "learning_rate": 0.001, |
| "loss": 1.2479, |
| "step": 255000 |
| }, |
| { |
| "epoch": 82.44990303813833, |
| "grad_norm": 2.130882740020752, |
| "learning_rate": 0.001, |
| "loss": 1.2515, |
| "step": 255100 |
| }, |
| { |
| "epoch": 82.48222365869425, |
| "grad_norm": 1.422727346420288, |
| "learning_rate": 0.001, |
| "loss": 1.2506, |
| "step": 255200 |
| }, |
| { |
| "epoch": 82.51454427925016, |
| "grad_norm": 1.9222614765167236, |
| "learning_rate": 0.001, |
| "loss": 1.2563, |
| "step": 255300 |
| }, |
| { |
| "epoch": 82.54686489980608, |
| "grad_norm": 3.0412681102752686, |
| "learning_rate": 0.001, |
| "loss": 1.2663, |
| "step": 255400 |
| }, |
| { |
| "epoch": 82.57918552036199, |
| "grad_norm": 1.837545394897461, |
| "learning_rate": 0.001, |
| "loss": 1.2609, |
| "step": 255500 |
| }, |
| { |
| "epoch": 82.61150614091791, |
| "grad_norm": 2.1308412551879883, |
| "learning_rate": 0.001, |
| "loss": 1.2568, |
| "step": 255600 |
| }, |
| { |
| "epoch": 82.64382676147382, |
| "grad_norm": 2.1382863521575928, |
| "learning_rate": 0.001, |
| "loss": 1.2525, |
| "step": 255700 |
| }, |
| { |
| "epoch": 82.67614738202974, |
| "grad_norm": 1.825967788696289, |
| "learning_rate": 0.001, |
| "loss": 1.2605, |
| "step": 255800 |
| }, |
| { |
| "epoch": 82.70846800258565, |
| "grad_norm": 1.8287477493286133, |
| "learning_rate": 0.001, |
| "loss": 1.2894, |
| "step": 255900 |
| }, |
| { |
| "epoch": 82.74078862314157, |
| "grad_norm": 1.9624981880187988, |
| "learning_rate": 0.001, |
| "loss": 1.2688, |
| "step": 256000 |
| }, |
| { |
| "epoch": 82.77310924369748, |
| "grad_norm": 2.4718258380889893, |
| "learning_rate": 0.001, |
| "loss": 1.2777, |
| "step": 256100 |
| }, |
| { |
| "epoch": 82.8054298642534, |
| "grad_norm": 1.8880257606506348, |
| "learning_rate": 0.001, |
| "loss": 1.2586, |
| "step": 256200 |
| }, |
| { |
| "epoch": 82.8377504848093, |
| "grad_norm": 1.5955743789672852, |
| "learning_rate": 0.001, |
| "loss": 1.2769, |
| "step": 256300 |
| }, |
| { |
| "epoch": 82.87007110536523, |
| "grad_norm": 1.7947994470596313, |
| "learning_rate": 0.001, |
| "loss": 1.2757, |
| "step": 256400 |
| }, |
| { |
| "epoch": 82.90239172592113, |
| "grad_norm": 2.2135133743286133, |
| "learning_rate": 0.001, |
| "loss": 1.2686, |
| "step": 256500 |
| }, |
| { |
| "epoch": 82.93471234647706, |
| "grad_norm": 1.7485551834106445, |
| "learning_rate": 0.001, |
| "loss": 1.2962, |
| "step": 256600 |
| }, |
| { |
| "epoch": 82.96703296703296, |
| "grad_norm": 1.9167695045471191, |
| "learning_rate": 0.001, |
| "loss": 1.2905, |
| "step": 256700 |
| }, |
| { |
| "epoch": 82.99935358758889, |
| "grad_norm": 1.720075249671936, |
| "learning_rate": 0.001, |
| "loss": 1.2713, |
| "step": 256800 |
| }, |
| { |
| "epoch": 83.03167420814479, |
| "grad_norm": 2.292396306991577, |
| "learning_rate": 0.001, |
| "loss": 1.1707, |
| "step": 256900 |
| }, |
| { |
| "epoch": 83.06399482870071, |
| "grad_norm": 1.627952218055725, |
| "learning_rate": 0.001, |
| "loss": 1.1873, |
| "step": 257000 |
| }, |
| { |
| "epoch": 83.09631544925662, |
| "grad_norm": 1.960242509841919, |
| "learning_rate": 0.001, |
| "loss": 1.1937, |
| "step": 257100 |
| }, |
| { |
| "epoch": 83.12863606981254, |
| "grad_norm": 1.4049196243286133, |
| "learning_rate": 0.001, |
| "loss": 1.2042, |
| "step": 257200 |
| }, |
| { |
| "epoch": 83.16095669036845, |
| "grad_norm": 2.174247980117798, |
| "learning_rate": 0.001, |
| "loss": 1.2041, |
| "step": 257300 |
| }, |
| { |
| "epoch": 83.19327731092437, |
| "grad_norm": 1.7446444034576416, |
| "learning_rate": 0.001, |
| "loss": 1.2162, |
| "step": 257400 |
| }, |
| { |
| "epoch": 83.22559793148028, |
| "grad_norm": 2.2466647624969482, |
| "learning_rate": 0.001, |
| "loss": 1.1843, |
| "step": 257500 |
| }, |
| { |
| "epoch": 83.2579185520362, |
| "grad_norm": 2.1880221366882324, |
| "learning_rate": 0.001, |
| "loss": 1.2089, |
| "step": 257600 |
| }, |
| { |
| "epoch": 83.29023917259211, |
| "grad_norm": 1.840790867805481, |
| "learning_rate": 0.001, |
| "loss": 1.2136, |
| "step": 257700 |
| }, |
| { |
| "epoch": 83.32255979314803, |
| "grad_norm": 1.8245068788528442, |
| "learning_rate": 0.001, |
| "loss": 1.2295, |
| "step": 257800 |
| }, |
| { |
| "epoch": 83.35488041370394, |
| "grad_norm": 2.5724518299102783, |
| "learning_rate": 0.001, |
| "loss": 1.2236, |
| "step": 257900 |
| }, |
| { |
| "epoch": 83.38720103425986, |
| "grad_norm": 1.9020832777023315, |
| "learning_rate": 0.001, |
| "loss": 1.2394, |
| "step": 258000 |
| }, |
| { |
| "epoch": 83.41952165481577, |
| "grad_norm": 1.76996648311615, |
| "learning_rate": 0.001, |
| "loss": 1.2406, |
| "step": 258100 |
| }, |
| { |
| "epoch": 83.45184227537169, |
| "grad_norm": 2.1938960552215576, |
| "learning_rate": 0.001, |
| "loss": 1.2445, |
| "step": 258200 |
| }, |
| { |
| "epoch": 83.4841628959276, |
| "grad_norm": 2.2984790802001953, |
| "learning_rate": 0.001, |
| "loss": 1.2505, |
| "step": 258300 |
| }, |
| { |
| "epoch": 83.51648351648352, |
| "grad_norm": 1.9285473823547363, |
| "learning_rate": 0.001, |
| "loss": 1.266, |
| "step": 258400 |
| }, |
| { |
| "epoch": 83.54880413703943, |
| "grad_norm": 1.770233392715454, |
| "learning_rate": 0.001, |
| "loss": 1.2508, |
| "step": 258500 |
| }, |
| { |
| "epoch": 83.58112475759535, |
| "grad_norm": 1.5901305675506592, |
| "learning_rate": 0.001, |
| "loss": 1.2521, |
| "step": 258600 |
| }, |
| { |
| "epoch": 83.61344537815125, |
| "grad_norm": 2.1101291179656982, |
| "learning_rate": 0.001, |
| "loss": 1.2566, |
| "step": 258700 |
| }, |
| { |
| "epoch": 83.64576599870718, |
| "grad_norm": 2.0092530250549316, |
| "learning_rate": 0.001, |
| "loss": 1.2601, |
| "step": 258800 |
| }, |
| { |
| "epoch": 83.67808661926308, |
| "grad_norm": 2.428415298461914, |
| "learning_rate": 0.001, |
| "loss": 1.272, |
| "step": 258900 |
| }, |
| { |
| "epoch": 83.710407239819, |
| "grad_norm": 2.0060224533081055, |
| "learning_rate": 0.001, |
| "loss": 1.2866, |
| "step": 259000 |
| }, |
| { |
| "epoch": 83.74272786037491, |
| "grad_norm": 3.634275436401367, |
| "learning_rate": 0.001, |
| "loss": 1.2685, |
| "step": 259100 |
| }, |
| { |
| "epoch": 83.77504848093083, |
| "grad_norm": 1.6629226207733154, |
| "learning_rate": 0.001, |
| "loss": 1.2629, |
| "step": 259200 |
| }, |
| { |
| "epoch": 83.80736910148674, |
| "grad_norm": 2.2785284519195557, |
| "learning_rate": 0.001, |
| "loss": 1.2617, |
| "step": 259300 |
| }, |
| { |
| "epoch": 83.83968972204266, |
| "grad_norm": 2.1297974586486816, |
| "learning_rate": 0.001, |
| "loss": 1.2808, |
| "step": 259400 |
| }, |
| { |
| "epoch": 83.87201034259857, |
| "grad_norm": 1.719080924987793, |
| "learning_rate": 0.001, |
| "loss": 1.2701, |
| "step": 259500 |
| }, |
| { |
| "epoch": 83.9043309631545, |
| "grad_norm": 2.1362926959991455, |
| "learning_rate": 0.001, |
| "loss": 1.2789, |
| "step": 259600 |
| }, |
| { |
| "epoch": 83.9366515837104, |
| "grad_norm": 1.7237249612808228, |
| "learning_rate": 0.001, |
| "loss": 1.2776, |
| "step": 259700 |
| }, |
| { |
| "epoch": 83.96897220426632, |
| "grad_norm": 2.223992347717285, |
| "learning_rate": 0.001, |
| "loss": 1.2811, |
| "step": 259800 |
| }, |
| { |
| "epoch": 84.00129282482224, |
| "grad_norm": 1.4170386791229248, |
| "learning_rate": 0.001, |
| "loss": 1.2828, |
| "step": 259900 |
| }, |
| { |
| "epoch": 84.03361344537815, |
| "grad_norm": 1.6659284830093384, |
| "learning_rate": 0.001, |
| "loss": 1.1881, |
| "step": 260000 |
| }, |
| { |
| "epoch": 84.06593406593407, |
| "grad_norm": 2.675107002258301, |
| "learning_rate": 0.001, |
| "loss": 1.1863, |
| "step": 260100 |
| }, |
| { |
| "epoch": 84.09825468648998, |
| "grad_norm": 1.7558014392852783, |
| "learning_rate": 0.001, |
| "loss": 1.1711, |
| "step": 260200 |
| }, |
| { |
| "epoch": 84.1305753070459, |
| "grad_norm": 1.7978248596191406, |
| "learning_rate": 0.001, |
| "loss": 1.1957, |
| "step": 260300 |
| }, |
| { |
| "epoch": 84.16289592760181, |
| "grad_norm": 1.5442531108856201, |
| "learning_rate": 0.001, |
| "loss": 1.2006, |
| "step": 260400 |
| }, |
| { |
| "epoch": 84.19521654815773, |
| "grad_norm": 1.9399360418319702, |
| "learning_rate": 0.001, |
| "loss": 1.2176, |
| "step": 260500 |
| }, |
| { |
| "epoch": 84.22753716871364, |
| "grad_norm": 1.6368488073349, |
| "learning_rate": 0.001, |
| "loss": 1.2071, |
| "step": 260600 |
| }, |
| { |
| "epoch": 84.25985778926956, |
| "grad_norm": 1.65631103515625, |
| "learning_rate": 0.001, |
| "loss": 1.1956, |
| "step": 260700 |
| }, |
| { |
| "epoch": 84.29217840982547, |
| "grad_norm": 1.7855615615844727, |
| "learning_rate": 0.001, |
| "loss": 1.2152, |
| "step": 260800 |
| }, |
| { |
| "epoch": 84.32449903038139, |
| "grad_norm": 1.879173755645752, |
| "learning_rate": 0.001, |
| "loss": 1.2267, |
| "step": 260900 |
| }, |
| { |
| "epoch": 84.3568196509373, |
| "grad_norm": 2.325134038925171, |
| "learning_rate": 0.001, |
| "loss": 1.2399, |
| "step": 261000 |
| }, |
| { |
| "epoch": 84.38914027149322, |
| "grad_norm": 1.6414111852645874, |
| "learning_rate": 0.001, |
| "loss": 1.2183, |
| "step": 261100 |
| }, |
| { |
| "epoch": 84.42146089204913, |
| "grad_norm": 1.7910301685333252, |
| "learning_rate": 0.001, |
| "loss": 1.2269, |
| "step": 261200 |
| }, |
| { |
| "epoch": 84.45378151260505, |
| "grad_norm": 1.5484957695007324, |
| "learning_rate": 0.001, |
| "loss": 1.2264, |
| "step": 261300 |
| }, |
| { |
| "epoch": 84.48610213316095, |
| "grad_norm": 2.113661527633667, |
| "learning_rate": 0.001, |
| "loss": 1.2232, |
| "step": 261400 |
| }, |
| { |
| "epoch": 84.51842275371688, |
| "grad_norm": 1.4829312562942505, |
| "learning_rate": 0.001, |
| "loss": 1.2486, |
| "step": 261500 |
| }, |
| { |
| "epoch": 84.55074337427278, |
| "grad_norm": 1.8122377395629883, |
| "learning_rate": 0.001, |
| "loss": 1.232, |
| "step": 261600 |
| }, |
| { |
| "epoch": 84.5830639948287, |
| "grad_norm": 1.4297584295272827, |
| "learning_rate": 0.001, |
| "loss": 1.2386, |
| "step": 261700 |
| }, |
| { |
| "epoch": 84.61538461538461, |
| "grad_norm": 2.0205373764038086, |
| "learning_rate": 0.001, |
| "loss": 1.2506, |
| "step": 261800 |
| }, |
| { |
| "epoch": 84.64770523594053, |
| "grad_norm": 1.655685544013977, |
| "learning_rate": 0.001, |
| "loss": 1.2424, |
| "step": 261900 |
| }, |
| { |
| "epoch": 84.68002585649644, |
| "grad_norm": 2.893658399581909, |
| "learning_rate": 0.001, |
| "loss": 1.2462, |
| "step": 262000 |
| }, |
| { |
| "epoch": 84.71234647705236, |
| "grad_norm": 2.2161877155303955, |
| "learning_rate": 0.001, |
| "loss": 1.2411, |
| "step": 262100 |
| }, |
| { |
| "epoch": 84.74466709760827, |
| "grad_norm": 1.4535630941390991, |
| "learning_rate": 0.001, |
| "loss": 1.2521, |
| "step": 262200 |
| }, |
| { |
| "epoch": 84.7769877181642, |
| "grad_norm": 2.2844653129577637, |
| "learning_rate": 0.001, |
| "loss": 1.2678, |
| "step": 262300 |
| }, |
| { |
| "epoch": 84.8093083387201, |
| "grad_norm": 2.162257432937622, |
| "learning_rate": 0.001, |
| "loss": 1.2596, |
| "step": 262400 |
| }, |
| { |
| "epoch": 84.84162895927602, |
| "grad_norm": 1.8454498052597046, |
| "learning_rate": 0.001, |
| "loss": 1.2757, |
| "step": 262500 |
| }, |
| { |
| "epoch": 84.87394957983193, |
| "grad_norm": 1.4504950046539307, |
| "learning_rate": 0.001, |
| "loss": 1.2718, |
| "step": 262600 |
| }, |
| { |
| "epoch": 84.90627020038785, |
| "grad_norm": 1.8487071990966797, |
| "learning_rate": 0.001, |
| "loss": 1.2746, |
| "step": 262700 |
| }, |
| { |
| "epoch": 84.93859082094376, |
| "grad_norm": 2.1302402019500732, |
| "learning_rate": 0.001, |
| "loss": 1.2723, |
| "step": 262800 |
| }, |
| { |
| "epoch": 84.97091144149968, |
| "grad_norm": 2.300407648086548, |
| "learning_rate": 0.001, |
| "loss": 1.2888, |
| "step": 262900 |
| }, |
| { |
| "epoch": 85.00323206205559, |
| "grad_norm": 1.430111050605774, |
| "learning_rate": 0.001, |
| "loss": 1.2833, |
| "step": 263000 |
| }, |
| { |
| "epoch": 85.03555268261151, |
| "grad_norm": 1.5040006637573242, |
| "learning_rate": 0.001, |
| "loss": 1.177, |
| "step": 263100 |
| }, |
| { |
| "epoch": 85.06787330316742, |
| "grad_norm": 1.4833869934082031, |
| "learning_rate": 0.001, |
| "loss": 1.1778, |
| "step": 263200 |
| }, |
| { |
| "epoch": 85.10019392372334, |
| "grad_norm": 1.8925707340240479, |
| "learning_rate": 0.001, |
| "loss": 1.1796, |
| "step": 263300 |
| }, |
| { |
| "epoch": 85.13251454427925, |
| "grad_norm": 1.7188044786453247, |
| "learning_rate": 0.001, |
| "loss": 1.1676, |
| "step": 263400 |
| }, |
| { |
| "epoch": 85.16483516483517, |
| "grad_norm": 1.8766733407974243, |
| "learning_rate": 0.001, |
| "loss": 1.1869, |
| "step": 263500 |
| }, |
| { |
| "epoch": 85.19715578539108, |
| "grad_norm": 1.5815516710281372, |
| "learning_rate": 0.001, |
| "loss": 1.1921, |
| "step": 263600 |
| }, |
| { |
| "epoch": 85.229476405947, |
| "grad_norm": 2.0210530757904053, |
| "learning_rate": 0.001, |
| "loss": 1.2048, |
| "step": 263700 |
| }, |
| { |
| "epoch": 85.2617970265029, |
| "grad_norm": 1.6629260778427124, |
| "learning_rate": 0.001, |
| "loss": 1.2358, |
| "step": 263800 |
| }, |
| { |
| "epoch": 85.29411764705883, |
| "grad_norm": 1.2752668857574463, |
| "learning_rate": 0.001, |
| "loss": 1.2006, |
| "step": 263900 |
| }, |
| { |
| "epoch": 85.32643826761473, |
| "grad_norm": 1.6782549619674683, |
| "learning_rate": 0.001, |
| "loss": 1.2229, |
| "step": 264000 |
| }, |
| { |
| "epoch": 85.35875888817066, |
| "grad_norm": 1.7548514604568481, |
| "learning_rate": 0.001, |
| "loss": 1.2279, |
| "step": 264100 |
| }, |
| { |
| "epoch": 85.39107950872656, |
| "grad_norm": 1.534002423286438, |
| "learning_rate": 0.001, |
| "loss": 1.2161, |
| "step": 264200 |
| }, |
| { |
| "epoch": 85.42340012928248, |
| "grad_norm": 1.6671688556671143, |
| "learning_rate": 0.001, |
| "loss": 1.23, |
| "step": 264300 |
| }, |
| { |
| "epoch": 85.45572074983839, |
| "grad_norm": 1.7113094329833984, |
| "learning_rate": 0.001, |
| "loss": 1.2288, |
| "step": 264400 |
| }, |
| { |
| "epoch": 85.48804137039431, |
| "grad_norm": 1.709190011024475, |
| "learning_rate": 0.001, |
| "loss": 1.2163, |
| "step": 264500 |
| }, |
| { |
| "epoch": 85.52036199095022, |
| "grad_norm": 1.7736314535140991, |
| "learning_rate": 0.001, |
| "loss": 1.2368, |
| "step": 264600 |
| }, |
| { |
| "epoch": 85.55268261150614, |
| "grad_norm": 1.317912220954895, |
| "learning_rate": 0.001, |
| "loss": 1.2442, |
| "step": 264700 |
| }, |
| { |
| "epoch": 85.58500323206205, |
| "grad_norm": 1.6481040716171265, |
| "learning_rate": 0.001, |
| "loss": 1.2399, |
| "step": 264800 |
| }, |
| { |
| "epoch": 85.61732385261797, |
| "grad_norm": 1.806247591972351, |
| "learning_rate": 0.001, |
| "loss": 1.2449, |
| "step": 264900 |
| }, |
| { |
| "epoch": 85.64964447317388, |
| "grad_norm": 1.9776191711425781, |
| "learning_rate": 0.001, |
| "loss": 1.2268, |
| "step": 265000 |
| }, |
| { |
| "epoch": 85.6819650937298, |
| "grad_norm": 1.5968152284622192, |
| "learning_rate": 0.001, |
| "loss": 1.2466, |
| "step": 265100 |
| }, |
| { |
| "epoch": 85.71428571428571, |
| "grad_norm": 1.5977829694747925, |
| "learning_rate": 0.001, |
| "loss": 1.2251, |
| "step": 265200 |
| }, |
| { |
| "epoch": 85.74660633484163, |
| "grad_norm": 1.4744341373443604, |
| "learning_rate": 0.001, |
| "loss": 1.2618, |
| "step": 265300 |
| }, |
| { |
| "epoch": 85.77892695539754, |
| "grad_norm": 1.6004712581634521, |
| "learning_rate": 0.001, |
| "loss": 1.2541, |
| "step": 265400 |
| }, |
| { |
| "epoch": 85.81124757595346, |
| "grad_norm": 1.8422083854675293, |
| "learning_rate": 0.001, |
| "loss": 1.2681, |
| "step": 265500 |
| }, |
| { |
| "epoch": 85.84356819650937, |
| "grad_norm": 1.2561287879943848, |
| "learning_rate": 0.001, |
| "loss": 1.2552, |
| "step": 265600 |
| }, |
| { |
| "epoch": 85.87588881706529, |
| "grad_norm": 1.9980125427246094, |
| "learning_rate": 0.001, |
| "loss": 1.265, |
| "step": 265700 |
| }, |
| { |
| "epoch": 85.9082094376212, |
| "grad_norm": 1.6248669624328613, |
| "learning_rate": 0.001, |
| "loss": 1.2585, |
| "step": 265800 |
| }, |
| { |
| "epoch": 85.94053005817712, |
| "grad_norm": 1.5957036018371582, |
| "learning_rate": 0.001, |
| "loss": 1.2756, |
| "step": 265900 |
| }, |
| { |
| "epoch": 85.97285067873302, |
| "grad_norm": 1.9643137454986572, |
| "learning_rate": 0.001, |
| "loss": 1.2729, |
| "step": 266000 |
| }, |
| { |
| "epoch": 86.00517129928895, |
| "grad_norm": 1.8060412406921387, |
| "learning_rate": 0.001, |
| "loss": 1.2612, |
| "step": 266100 |
| }, |
| { |
| "epoch": 86.03749191984487, |
| "grad_norm": 1.5058979988098145, |
| "learning_rate": 0.001, |
| "loss": 1.1647, |
| "step": 266200 |
| }, |
| { |
| "epoch": 86.06981254040078, |
| "grad_norm": 1.47739839553833, |
| "learning_rate": 0.001, |
| "loss": 1.1664, |
| "step": 266300 |
| }, |
| { |
| "epoch": 86.1021331609567, |
| "grad_norm": 1.7648849487304688, |
| "learning_rate": 0.001, |
| "loss": 1.1676, |
| "step": 266400 |
| }, |
| { |
| "epoch": 86.1344537815126, |
| "grad_norm": 2.075975179672241, |
| "learning_rate": 0.001, |
| "loss": 1.1901, |
| "step": 266500 |
| }, |
| { |
| "epoch": 86.16677440206853, |
| "grad_norm": 1.394399642944336, |
| "learning_rate": 0.001, |
| "loss": 1.1891, |
| "step": 266600 |
| }, |
| { |
| "epoch": 86.19909502262443, |
| "grad_norm": 1.4004300832748413, |
| "learning_rate": 0.001, |
| "loss": 1.1836, |
| "step": 266700 |
| }, |
| { |
| "epoch": 86.23141564318036, |
| "grad_norm": 2.235933303833008, |
| "learning_rate": 0.001, |
| "loss": 1.1876, |
| "step": 266800 |
| }, |
| { |
| "epoch": 86.26373626373626, |
| "grad_norm": 1.395911693572998, |
| "learning_rate": 0.001, |
| "loss": 1.2057, |
| "step": 266900 |
| }, |
| { |
| "epoch": 86.29605688429218, |
| "grad_norm": 1.5776115655899048, |
| "learning_rate": 0.001, |
| "loss": 1.2151, |
| "step": 267000 |
| }, |
| { |
| "epoch": 86.32837750484809, |
| "grad_norm": 2.331512451171875, |
| "learning_rate": 0.001, |
| "loss": 1.204, |
| "step": 267100 |
| }, |
| { |
| "epoch": 86.36069812540401, |
| "grad_norm": 1.41651451587677, |
| "learning_rate": 0.001, |
| "loss": 1.2105, |
| "step": 267200 |
| }, |
| { |
| "epoch": 86.39301874595992, |
| "grad_norm": 1.1959208250045776, |
| "learning_rate": 0.001, |
| "loss": 1.2182, |
| "step": 267300 |
| }, |
| { |
| "epoch": 86.42533936651584, |
| "grad_norm": 1.5867549180984497, |
| "learning_rate": 0.001, |
| "loss": 1.226, |
| "step": 267400 |
| }, |
| { |
| "epoch": 86.45765998707175, |
| "grad_norm": 1.6116405725479126, |
| "learning_rate": 0.001, |
| "loss": 1.2006, |
| "step": 267500 |
| }, |
| { |
| "epoch": 86.48998060762767, |
| "grad_norm": 1.4707088470458984, |
| "learning_rate": 0.001, |
| "loss": 1.2255, |
| "step": 267600 |
| }, |
| { |
| "epoch": 86.52230122818358, |
| "grad_norm": 1.641430139541626, |
| "learning_rate": 0.001, |
| "loss": 1.2294, |
| "step": 267700 |
| }, |
| { |
| "epoch": 86.5546218487395, |
| "grad_norm": 1.7087689638137817, |
| "learning_rate": 0.001, |
| "loss": 1.2211, |
| "step": 267800 |
| }, |
| { |
| "epoch": 86.58694246929541, |
| "grad_norm": 1.8745614290237427, |
| "learning_rate": 0.001, |
| "loss": 1.2417, |
| "step": 267900 |
| }, |
| { |
| "epoch": 86.61926308985133, |
| "grad_norm": 1.6811854839324951, |
| "learning_rate": 0.001, |
| "loss": 1.2286, |
| "step": 268000 |
| }, |
| { |
| "epoch": 86.65158371040724, |
| "grad_norm": 1.4782730340957642, |
| "learning_rate": 0.001, |
| "loss": 1.2382, |
| "step": 268100 |
| }, |
| { |
| "epoch": 86.68390433096316, |
| "grad_norm": 1.927930235862732, |
| "learning_rate": 0.001, |
| "loss": 1.2499, |
| "step": 268200 |
| }, |
| { |
| "epoch": 86.71622495151907, |
| "grad_norm": 1.6725027561187744, |
| "learning_rate": 0.001, |
| "loss": 1.2356, |
| "step": 268300 |
| }, |
| { |
| "epoch": 86.74854557207499, |
| "grad_norm": 1.7192740440368652, |
| "learning_rate": 0.001, |
| "loss": 1.2512, |
| "step": 268400 |
| }, |
| { |
| "epoch": 86.7808661926309, |
| "grad_norm": 1.5314967632293701, |
| "learning_rate": 0.001, |
| "loss": 1.2408, |
| "step": 268500 |
| }, |
| { |
| "epoch": 86.81318681318682, |
| "grad_norm": 1.8034896850585938, |
| "learning_rate": 0.001, |
| "loss": 1.2595, |
| "step": 268600 |
| }, |
| { |
| "epoch": 86.84550743374272, |
| "grad_norm": 1.245100975036621, |
| "learning_rate": 0.001, |
| "loss": 1.2543, |
| "step": 268700 |
| }, |
| { |
| "epoch": 86.87782805429865, |
| "grad_norm": 2.7486352920532227, |
| "learning_rate": 0.001, |
| "loss": 1.2456, |
| "step": 268800 |
| }, |
| { |
| "epoch": 86.91014867485455, |
| "grad_norm": 1.4387123584747314, |
| "learning_rate": 0.001, |
| "loss": 1.2536, |
| "step": 268900 |
| }, |
| { |
| "epoch": 86.94246929541048, |
| "grad_norm": 1.3550703525543213, |
| "learning_rate": 0.001, |
| "loss": 1.2516, |
| "step": 269000 |
| }, |
| { |
| "epoch": 86.97478991596638, |
| "grad_norm": 1.9709012508392334, |
| "learning_rate": 0.001, |
| "loss": 1.2724, |
| "step": 269100 |
| }, |
| { |
| "epoch": 87.0071105365223, |
| "grad_norm": 1.8654589653015137, |
| "learning_rate": 0.001, |
| "loss": 1.2617, |
| "step": 269200 |
| }, |
| { |
| "epoch": 87.03943115707821, |
| "grad_norm": 1.6749591827392578, |
| "learning_rate": 0.001, |
| "loss": 1.1557, |
| "step": 269300 |
| }, |
| { |
| "epoch": 87.07175177763413, |
| "grad_norm": 1.3441767692565918, |
| "learning_rate": 0.001, |
| "loss": 1.1659, |
| "step": 269400 |
| }, |
| { |
| "epoch": 87.10407239819004, |
| "grad_norm": 1.4837168455123901, |
| "learning_rate": 0.001, |
| "loss": 1.1694, |
| "step": 269500 |
| }, |
| { |
| "epoch": 87.13639301874596, |
| "grad_norm": 1.3991222381591797, |
| "learning_rate": 0.001, |
| "loss": 1.179, |
| "step": 269600 |
| }, |
| { |
| "epoch": 87.16871363930187, |
| "grad_norm": 1.1834875345230103, |
| "learning_rate": 0.001, |
| "loss": 1.1607, |
| "step": 269700 |
| }, |
| { |
| "epoch": 87.20103425985779, |
| "grad_norm": 1.8037092685699463, |
| "learning_rate": 0.001, |
| "loss": 1.1802, |
| "step": 269800 |
| }, |
| { |
| "epoch": 87.2333548804137, |
| "grad_norm": 1.2468910217285156, |
| "learning_rate": 0.001, |
| "loss": 1.1921, |
| "step": 269900 |
| }, |
| { |
| "epoch": 87.26567550096962, |
| "grad_norm": 1.94520902633667, |
| "learning_rate": 0.001, |
| "loss": 1.1904, |
| "step": 270000 |
| }, |
| { |
| "epoch": 87.29799612152553, |
| "grad_norm": 1.4701403379440308, |
| "learning_rate": 0.001, |
| "loss": 1.202, |
| "step": 270100 |
| }, |
| { |
| "epoch": 87.33031674208145, |
| "grad_norm": 1.37777841091156, |
| "learning_rate": 0.001, |
| "loss": 1.2069, |
| "step": 270200 |
| }, |
| { |
| "epoch": 87.36263736263736, |
| "grad_norm": 1.7511200904846191, |
| "learning_rate": 0.001, |
| "loss": 1.2119, |
| "step": 270300 |
| }, |
| { |
| "epoch": 87.39495798319328, |
| "grad_norm": 1.3382188081741333, |
| "learning_rate": 0.001, |
| "loss": 1.2133, |
| "step": 270400 |
| }, |
| { |
| "epoch": 87.42727860374919, |
| "grad_norm": 1.5276641845703125, |
| "learning_rate": 0.001, |
| "loss": 1.1879, |
| "step": 270500 |
| }, |
| { |
| "epoch": 87.45959922430511, |
| "grad_norm": 1.6079522371292114, |
| "learning_rate": 0.001, |
| "loss": 1.219, |
| "step": 270600 |
| }, |
| { |
| "epoch": 87.49191984486102, |
| "grad_norm": 1.564125657081604, |
| "learning_rate": 0.001, |
| "loss": 1.2191, |
| "step": 270700 |
| }, |
| { |
| "epoch": 87.52424046541694, |
| "grad_norm": 1.3489891290664673, |
| "learning_rate": 0.001, |
| "loss": 1.2099, |
| "step": 270800 |
| }, |
| { |
| "epoch": 87.55656108597285, |
| "grad_norm": 1.456994891166687, |
| "learning_rate": 0.001, |
| "loss": 1.2107, |
| "step": 270900 |
| }, |
| { |
| "epoch": 87.58888170652877, |
| "grad_norm": 1.3025860786437988, |
| "learning_rate": 0.001, |
| "loss": 1.2268, |
| "step": 271000 |
| }, |
| { |
| "epoch": 87.62120232708467, |
| "grad_norm": 3.015007972717285, |
| "learning_rate": 0.001, |
| "loss": 1.2385, |
| "step": 271100 |
| }, |
| { |
| "epoch": 87.6535229476406, |
| "grad_norm": 1.3912714719772339, |
| "learning_rate": 0.001, |
| "loss": 1.2338, |
| "step": 271200 |
| }, |
| { |
| "epoch": 87.6858435681965, |
| "grad_norm": 1.419293761253357, |
| "learning_rate": 0.001, |
| "loss": 1.2603, |
| "step": 271300 |
| }, |
| { |
| "epoch": 87.71816418875243, |
| "grad_norm": 1.6812208890914917, |
| "learning_rate": 0.001, |
| "loss": 1.2414, |
| "step": 271400 |
| }, |
| { |
| "epoch": 87.75048480930833, |
| "grad_norm": 1.2488850355148315, |
| "learning_rate": 0.001, |
| "loss": 1.2462, |
| "step": 271500 |
| }, |
| { |
| "epoch": 87.78280542986425, |
| "grad_norm": 1.5979465246200562, |
| "learning_rate": 0.001, |
| "loss": 1.2491, |
| "step": 271600 |
| }, |
| { |
| "epoch": 87.81512605042016, |
| "grad_norm": 1.653700351715088, |
| "learning_rate": 0.001, |
| "loss": 1.2597, |
| "step": 271700 |
| }, |
| { |
| "epoch": 87.84744667097608, |
| "grad_norm": 1.933099389076233, |
| "learning_rate": 0.001, |
| "loss": 1.2401, |
| "step": 271800 |
| }, |
| { |
| "epoch": 87.87976729153199, |
| "grad_norm": 1.5535463094711304, |
| "learning_rate": 0.001, |
| "loss": 1.23, |
| "step": 271900 |
| }, |
| { |
| "epoch": 87.91208791208791, |
| "grad_norm": 1.6965819597244263, |
| "learning_rate": 0.001, |
| "loss": 1.2601, |
| "step": 272000 |
| }, |
| { |
| "epoch": 87.94440853264382, |
| "grad_norm": 1.4667446613311768, |
| "learning_rate": 0.001, |
| "loss": 1.2626, |
| "step": 272100 |
| }, |
| { |
| "epoch": 87.97672915319974, |
| "grad_norm": 2.0227420330047607, |
| "learning_rate": 0.001, |
| "loss": 1.2639, |
| "step": 272200 |
| }, |
| { |
| "epoch": 88.00904977375566, |
| "grad_norm": 1.4765963554382324, |
| "learning_rate": 0.001, |
| "loss": 1.2238, |
| "step": 272300 |
| }, |
| { |
| "epoch": 88.04137039431157, |
| "grad_norm": 1.793649435043335, |
| "learning_rate": 0.001, |
| "loss": 1.1591, |
| "step": 272400 |
| }, |
| { |
| "epoch": 88.07369101486749, |
| "grad_norm": 2.400162696838379, |
| "learning_rate": 0.001, |
| "loss": 1.1521, |
| "step": 272500 |
| }, |
| { |
| "epoch": 88.1060116354234, |
| "grad_norm": 1.7438671588897705, |
| "learning_rate": 0.001, |
| "loss": 1.1577, |
| "step": 272600 |
| }, |
| { |
| "epoch": 88.13833225597932, |
| "grad_norm": 1.7920339107513428, |
| "learning_rate": 0.001, |
| "loss": 1.184, |
| "step": 272700 |
| }, |
| { |
| "epoch": 88.17065287653523, |
| "grad_norm": 1.8441482782363892, |
| "learning_rate": 0.001, |
| "loss": 1.1824, |
| "step": 272800 |
| }, |
| { |
| "epoch": 88.20297349709115, |
| "grad_norm": 1.4053528308868408, |
| "learning_rate": 0.001, |
| "loss": 1.1754, |
| "step": 272900 |
| }, |
| { |
| "epoch": 88.23529411764706, |
| "grad_norm": 1.9038269519805908, |
| "learning_rate": 0.001, |
| "loss": 1.1905, |
| "step": 273000 |
| }, |
| { |
| "epoch": 88.26761473820298, |
| "grad_norm": 1.4530909061431885, |
| "learning_rate": 0.001, |
| "loss": 1.1835, |
| "step": 273100 |
| }, |
| { |
| "epoch": 88.29993535875889, |
| "grad_norm": 1.4575316905975342, |
| "learning_rate": 0.001, |
| "loss": 1.1903, |
| "step": 273200 |
| }, |
| { |
| "epoch": 88.33225597931481, |
| "grad_norm": 1.709861159324646, |
| "learning_rate": 0.001, |
| "loss": 1.198, |
| "step": 273300 |
| }, |
| { |
| "epoch": 88.36457659987072, |
| "grad_norm": 1.7516298294067383, |
| "learning_rate": 0.001, |
| "loss": 1.1878, |
| "step": 273400 |
| }, |
| { |
| "epoch": 88.39689722042664, |
| "grad_norm": 1.1928800344467163, |
| "learning_rate": 0.001, |
| "loss": 1.1845, |
| "step": 273500 |
| }, |
| { |
| "epoch": 88.42921784098255, |
| "grad_norm": 1.709099292755127, |
| "learning_rate": 0.001, |
| "loss": 1.2039, |
| "step": 273600 |
| }, |
| { |
| "epoch": 88.46153846153847, |
| "grad_norm": 1.385904312133789, |
| "learning_rate": 0.001, |
| "loss": 1.2043, |
| "step": 273700 |
| }, |
| { |
| "epoch": 88.49385908209437, |
| "grad_norm": 1.6170157194137573, |
| "learning_rate": 0.001, |
| "loss": 1.204, |
| "step": 273800 |
| }, |
| { |
| "epoch": 88.5261797026503, |
| "grad_norm": 2.177598476409912, |
| "learning_rate": 0.001, |
| "loss": 1.2059, |
| "step": 273900 |
| }, |
| { |
| "epoch": 88.5585003232062, |
| "grad_norm": 1.483108639717102, |
| "learning_rate": 0.001, |
| "loss": 1.2129, |
| "step": 274000 |
| }, |
| { |
| "epoch": 88.59082094376213, |
| "grad_norm": 1.579835295677185, |
| "learning_rate": 0.001, |
| "loss": 1.2201, |
| "step": 274100 |
| }, |
| { |
| "epoch": 88.62314156431803, |
| "grad_norm": 1.5038883686065674, |
| "learning_rate": 0.001, |
| "loss": 1.2275, |
| "step": 274200 |
| }, |
| { |
| "epoch": 88.65546218487395, |
| "grad_norm": 1.4175972938537598, |
| "learning_rate": 0.001, |
| "loss": 1.2286, |
| "step": 274300 |
| }, |
| { |
| "epoch": 88.68778280542986, |
| "grad_norm": 1.6123201847076416, |
| "learning_rate": 0.001, |
| "loss": 1.2265, |
| "step": 274400 |
| }, |
| { |
| "epoch": 88.72010342598578, |
| "grad_norm": 1.1493910551071167, |
| "learning_rate": 0.001, |
| "loss": 1.2208, |
| "step": 274500 |
| }, |
| { |
| "epoch": 88.75242404654169, |
| "grad_norm": 1.391905426979065, |
| "learning_rate": 0.001, |
| "loss": 1.2416, |
| "step": 274600 |
| }, |
| { |
| "epoch": 88.78474466709761, |
| "grad_norm": 1.8348945379257202, |
| "learning_rate": 0.001, |
| "loss": 1.235, |
| "step": 274700 |
| }, |
| { |
| "epoch": 88.81706528765352, |
| "grad_norm": 1.6627116203308105, |
| "learning_rate": 0.001, |
| "loss": 1.2438, |
| "step": 274800 |
| }, |
| { |
| "epoch": 88.84938590820944, |
| "grad_norm": 1.9870775938034058, |
| "learning_rate": 0.001, |
| "loss": 1.2364, |
| "step": 274900 |
| }, |
| { |
| "epoch": 88.88170652876535, |
| "grad_norm": 2.135324478149414, |
| "learning_rate": 0.001, |
| "loss": 1.2473, |
| "step": 275000 |
| }, |
| { |
| "epoch": 88.91402714932127, |
| "grad_norm": 2.056169033050537, |
| "learning_rate": 0.001, |
| "loss": 1.2485, |
| "step": 275100 |
| }, |
| { |
| "epoch": 88.94634776987718, |
| "grad_norm": 1.3009580373764038, |
| "learning_rate": 0.001, |
| "loss": 1.2538, |
| "step": 275200 |
| }, |
| { |
| "epoch": 88.9786683904331, |
| "grad_norm": 1.1325513124465942, |
| "learning_rate": 0.001, |
| "loss": 1.2549, |
| "step": 275300 |
| }, |
| { |
| "epoch": 89.01098901098901, |
| "grad_norm": 1.875545620918274, |
| "learning_rate": 0.001, |
| "loss": 1.2179, |
| "step": 275400 |
| }, |
| { |
| "epoch": 89.04330963154493, |
| "grad_norm": 1.6356204748153687, |
| "learning_rate": 0.001, |
| "loss": 1.1624, |
| "step": 275500 |
| }, |
| { |
| "epoch": 89.07563025210084, |
| "grad_norm": 1.6611065864562988, |
| "learning_rate": 0.001, |
| "loss": 1.1568, |
| "step": 275600 |
| }, |
| { |
| "epoch": 89.10795087265676, |
| "grad_norm": 1.744854211807251, |
| "learning_rate": 0.001, |
| "loss": 1.1693, |
| "step": 275700 |
| }, |
| { |
| "epoch": 89.14027149321267, |
| "grad_norm": 1.3900748491287231, |
| "learning_rate": 0.001, |
| "loss": 1.1565, |
| "step": 275800 |
| }, |
| { |
| "epoch": 89.17259211376859, |
| "grad_norm": 1.3018250465393066, |
| "learning_rate": 0.001, |
| "loss": 1.1738, |
| "step": 275900 |
| }, |
| { |
| "epoch": 89.2049127343245, |
| "grad_norm": 1.2930840253829956, |
| "learning_rate": 0.001, |
| "loss": 1.1621, |
| "step": 276000 |
| }, |
| { |
| "epoch": 89.23723335488042, |
| "grad_norm": 1.5405704975128174, |
| "learning_rate": 0.001, |
| "loss": 1.1807, |
| "step": 276100 |
| }, |
| { |
| "epoch": 89.26955397543632, |
| "grad_norm": 2.3899145126342773, |
| "learning_rate": 0.001, |
| "loss": 1.1647, |
| "step": 276200 |
| }, |
| { |
| "epoch": 89.30187459599225, |
| "grad_norm": 1.6378835439682007, |
| "learning_rate": 0.001, |
| "loss": 1.2026, |
| "step": 276300 |
| }, |
| { |
| "epoch": 89.33419521654815, |
| "grad_norm": 1.6792488098144531, |
| "learning_rate": 0.001, |
| "loss": 1.18, |
| "step": 276400 |
| }, |
| { |
| "epoch": 89.36651583710407, |
| "grad_norm": 2.291990280151367, |
| "learning_rate": 0.001, |
| "loss": 1.1842, |
| "step": 276500 |
| }, |
| { |
| "epoch": 89.39883645765998, |
| "grad_norm": 1.4593669176101685, |
| "learning_rate": 0.001, |
| "loss": 1.2003, |
| "step": 276600 |
| }, |
| { |
| "epoch": 89.4311570782159, |
| "grad_norm": 1.4892886877059937, |
| "learning_rate": 0.001, |
| "loss": 1.1874, |
| "step": 276700 |
| }, |
| { |
| "epoch": 89.46347769877181, |
| "grad_norm": 1.5590368509292603, |
| "learning_rate": 0.001, |
| "loss": 1.1991, |
| "step": 276800 |
| }, |
| { |
| "epoch": 89.49579831932773, |
| "grad_norm": 1.8275721073150635, |
| "learning_rate": 0.001, |
| "loss": 1.1896, |
| "step": 276900 |
| }, |
| { |
| "epoch": 89.52811893988364, |
| "grad_norm": 1.4684890508651733, |
| "learning_rate": 0.001, |
| "loss": 1.2036, |
| "step": 277000 |
| }, |
| { |
| "epoch": 89.56043956043956, |
| "grad_norm": 1.6872565746307373, |
| "learning_rate": 0.001, |
| "loss": 1.2127, |
| "step": 277100 |
| }, |
| { |
| "epoch": 89.59276018099547, |
| "grad_norm": 1.5722708702087402, |
| "learning_rate": 0.001, |
| "loss": 1.2219, |
| "step": 277200 |
| }, |
| { |
| "epoch": 89.62508080155139, |
| "grad_norm": 1.6149400472640991, |
| "learning_rate": 0.001, |
| "loss": 1.2185, |
| "step": 277300 |
| }, |
| { |
| "epoch": 89.6574014221073, |
| "grad_norm": 2.0069503784179688, |
| "learning_rate": 0.001, |
| "loss": 1.2172, |
| "step": 277400 |
| }, |
| { |
| "epoch": 89.68972204266322, |
| "grad_norm": 1.6077826023101807, |
| "learning_rate": 0.001, |
| "loss": 1.2148, |
| "step": 277500 |
| }, |
| { |
| "epoch": 89.72204266321913, |
| "grad_norm": 2.0763955116271973, |
| "learning_rate": 0.001, |
| "loss": 1.2339, |
| "step": 277600 |
| }, |
| { |
| "epoch": 89.75436328377505, |
| "grad_norm": 2.2948250770568848, |
| "learning_rate": 0.001, |
| "loss": 1.2174, |
| "step": 277700 |
| }, |
| { |
| "epoch": 89.78668390433096, |
| "grad_norm": 1.7497351169586182, |
| "learning_rate": 0.001, |
| "loss": 1.2169, |
| "step": 277800 |
| }, |
| { |
| "epoch": 89.81900452488688, |
| "grad_norm": 1.3225711584091187, |
| "learning_rate": 0.001, |
| "loss": 1.2359, |
| "step": 277900 |
| }, |
| { |
| "epoch": 89.85132514544279, |
| "grad_norm": 2.303297281265259, |
| "learning_rate": 0.001, |
| "loss": 1.2395, |
| "step": 278000 |
| }, |
| { |
| "epoch": 89.88364576599871, |
| "grad_norm": 1.5867317914962769, |
| "learning_rate": 0.001, |
| "loss": 1.2494, |
| "step": 278100 |
| }, |
| { |
| "epoch": 89.91596638655462, |
| "grad_norm": 2.0131945610046387, |
| "learning_rate": 0.001, |
| "loss": 1.2354, |
| "step": 278200 |
| }, |
| { |
| "epoch": 89.94828700711054, |
| "grad_norm": 2.566061019897461, |
| "learning_rate": 0.001, |
| "loss": 1.244, |
| "step": 278300 |
| }, |
| { |
| "epoch": 89.98060762766644, |
| "grad_norm": 1.5201761722564697, |
| "learning_rate": 0.001, |
| "loss": 1.2486, |
| "step": 278400 |
| }, |
| { |
| "epoch": 90.01292824822237, |
| "grad_norm": 1.5201029777526855, |
| "learning_rate": 0.001, |
| "loss": 1.1924, |
| "step": 278500 |
| }, |
| { |
| "epoch": 90.04524886877829, |
| "grad_norm": 2.2412655353546143, |
| "learning_rate": 0.001, |
| "loss": 1.1479, |
| "step": 278600 |
| }, |
| { |
| "epoch": 90.0775694893342, |
| "grad_norm": 1.3718254566192627, |
| "learning_rate": 0.001, |
| "loss": 1.1611, |
| "step": 278700 |
| }, |
| { |
| "epoch": 90.10989010989012, |
| "grad_norm": 1.3264670372009277, |
| "learning_rate": 0.001, |
| "loss": 1.1573, |
| "step": 278800 |
| }, |
| { |
| "epoch": 90.14221073044602, |
| "grad_norm": 2.397085189819336, |
| "learning_rate": 0.001, |
| "loss": 1.1617, |
| "step": 278900 |
| }, |
| { |
| "epoch": 90.17453135100195, |
| "grad_norm": 1.645229458808899, |
| "learning_rate": 0.001, |
| "loss": 1.1594, |
| "step": 279000 |
| }, |
| { |
| "epoch": 90.20685197155785, |
| "grad_norm": 1.3829667568206787, |
| "learning_rate": 0.001, |
| "loss": 1.1772, |
| "step": 279100 |
| }, |
| { |
| "epoch": 90.23917259211377, |
| "grad_norm": 1.2982192039489746, |
| "learning_rate": 0.001, |
| "loss": 1.166, |
| "step": 279200 |
| }, |
| { |
| "epoch": 90.27149321266968, |
| "grad_norm": 1.889725685119629, |
| "learning_rate": 0.001, |
| "loss": 1.1722, |
| "step": 279300 |
| }, |
| { |
| "epoch": 90.3038138332256, |
| "grad_norm": 1.9517900943756104, |
| "learning_rate": 0.001, |
| "loss": 1.1658, |
| "step": 279400 |
| }, |
| { |
| "epoch": 90.33613445378151, |
| "grad_norm": 1.5264050960540771, |
| "learning_rate": 0.001, |
| "loss": 1.1812, |
| "step": 279500 |
| }, |
| { |
| "epoch": 90.36845507433743, |
| "grad_norm": 1.7745048999786377, |
| "learning_rate": 0.001, |
| "loss": 1.1828, |
| "step": 279600 |
| }, |
| { |
| "epoch": 90.40077569489334, |
| "grad_norm": 1.7343498468399048, |
| "learning_rate": 0.001, |
| "loss": 1.1999, |
| "step": 279700 |
| }, |
| { |
| "epoch": 90.43309631544926, |
| "grad_norm": 1.9775893688201904, |
| "learning_rate": 0.001, |
| "loss": 1.1927, |
| "step": 279800 |
| }, |
| { |
| "epoch": 90.46541693600517, |
| "grad_norm": 1.539505124092102, |
| "learning_rate": 0.001, |
| "loss": 1.1905, |
| "step": 279900 |
| }, |
| { |
| "epoch": 90.49773755656109, |
| "grad_norm": 1.4885227680206299, |
| "learning_rate": 0.001, |
| "loss": 1.2113, |
| "step": 280000 |
| }, |
| { |
| "epoch": 90.530058177117, |
| "grad_norm": 2.354274034500122, |
| "learning_rate": 0.001, |
| "loss": 1.1966, |
| "step": 280100 |
| }, |
| { |
| "epoch": 90.56237879767292, |
| "grad_norm": 1.6391592025756836, |
| "learning_rate": 0.001, |
| "loss": 1.2162, |
| "step": 280200 |
| }, |
| { |
| "epoch": 90.59469941822883, |
| "grad_norm": 1.4249579906463623, |
| "learning_rate": 0.001, |
| "loss": 1.1971, |
| "step": 280300 |
| }, |
| { |
| "epoch": 90.62702003878475, |
| "grad_norm": 1.3456435203552246, |
| "learning_rate": 0.001, |
| "loss": 1.2086, |
| "step": 280400 |
| }, |
| { |
| "epoch": 90.65934065934066, |
| "grad_norm": 1.8398938179016113, |
| "learning_rate": 0.001, |
| "loss": 1.2132, |
| "step": 280500 |
| }, |
| { |
| "epoch": 90.69166127989658, |
| "grad_norm": 1.3585916757583618, |
| "learning_rate": 0.001, |
| "loss": 1.2132, |
| "step": 280600 |
| }, |
| { |
| "epoch": 90.72398190045249, |
| "grad_norm": 1.7988615036010742, |
| "learning_rate": 0.001, |
| "loss": 1.2183, |
| "step": 280700 |
| }, |
| { |
| "epoch": 90.75630252100841, |
| "grad_norm": 1.2848037481307983, |
| "learning_rate": 0.001, |
| "loss": 1.2222, |
| "step": 280800 |
| }, |
| { |
| "epoch": 90.78862314156432, |
| "grad_norm": 1.5872573852539062, |
| "learning_rate": 0.001, |
| "loss": 1.2307, |
| "step": 280900 |
| }, |
| { |
| "epoch": 90.82094376212024, |
| "grad_norm": 2.1241719722747803, |
| "learning_rate": 0.001, |
| "loss": 1.2165, |
| "step": 281000 |
| }, |
| { |
| "epoch": 90.85326438267614, |
| "grad_norm": 1.4638140201568604, |
| "learning_rate": 0.001, |
| "loss": 1.2404, |
| "step": 281100 |
| }, |
| { |
| "epoch": 90.88558500323207, |
| "grad_norm": 1.294054627418518, |
| "learning_rate": 0.001, |
| "loss": 1.2369, |
| "step": 281200 |
| }, |
| { |
| "epoch": 90.91790562378797, |
| "grad_norm": 1.7885204553604126, |
| "learning_rate": 0.001, |
| "loss": 1.2221, |
| "step": 281300 |
| }, |
| { |
| "epoch": 90.9502262443439, |
| "grad_norm": 1.8226639032363892, |
| "learning_rate": 0.001, |
| "loss": 1.2324, |
| "step": 281400 |
| }, |
| { |
| "epoch": 90.9825468648998, |
| "grad_norm": 2.0065526962280273, |
| "learning_rate": 0.001, |
| "loss": 1.2467, |
| "step": 281500 |
| }, |
| { |
| "epoch": 91.01486748545572, |
| "grad_norm": 1.553343653678894, |
| "learning_rate": 0.001, |
| "loss": 1.1705, |
| "step": 281600 |
| }, |
| { |
| "epoch": 91.04718810601163, |
| "grad_norm": 1.574761152267456, |
| "learning_rate": 0.001, |
| "loss": 1.1325, |
| "step": 281700 |
| }, |
| { |
| "epoch": 91.07950872656755, |
| "grad_norm": 2.1614627838134766, |
| "learning_rate": 0.001, |
| "loss": 1.139, |
| "step": 281800 |
| }, |
| { |
| "epoch": 91.11182934712346, |
| "grad_norm": 1.8416796922683716, |
| "learning_rate": 0.001, |
| "loss": 1.1524, |
| "step": 281900 |
| }, |
| { |
| "epoch": 91.14414996767938, |
| "grad_norm": 2.1431214809417725, |
| "learning_rate": 0.001, |
| "loss": 1.1487, |
| "step": 282000 |
| }, |
| { |
| "epoch": 91.17647058823529, |
| "grad_norm": 1.7777167558670044, |
| "learning_rate": 0.001, |
| "loss": 1.1679, |
| "step": 282100 |
| }, |
| { |
| "epoch": 91.20879120879121, |
| "grad_norm": 1.6724756956100464, |
| "learning_rate": 0.001, |
| "loss": 1.1428, |
| "step": 282200 |
| }, |
| { |
| "epoch": 91.24111182934712, |
| "grad_norm": 1.779128074645996, |
| "learning_rate": 0.001, |
| "loss": 1.1642, |
| "step": 282300 |
| }, |
| { |
| "epoch": 91.27343244990304, |
| "grad_norm": 1.6053786277770996, |
| "learning_rate": 0.001, |
| "loss": 1.169, |
| "step": 282400 |
| }, |
| { |
| "epoch": 91.30575307045895, |
| "grad_norm": 1.982901930809021, |
| "learning_rate": 0.001, |
| "loss": 1.172, |
| "step": 282500 |
| }, |
| { |
| "epoch": 91.33807369101487, |
| "grad_norm": 2.143629550933838, |
| "learning_rate": 0.001, |
| "loss": 1.1728, |
| "step": 282600 |
| }, |
| { |
| "epoch": 91.37039431157078, |
| "grad_norm": 1.7679647207260132, |
| "learning_rate": 0.001, |
| "loss": 1.1881, |
| "step": 282700 |
| }, |
| { |
| "epoch": 91.4027149321267, |
| "grad_norm": 2.009580373764038, |
| "learning_rate": 0.001, |
| "loss": 1.1857, |
| "step": 282800 |
| }, |
| { |
| "epoch": 91.4350355526826, |
| "grad_norm": 1.971984624862671, |
| "learning_rate": 0.001, |
| "loss": 1.1823, |
| "step": 282900 |
| }, |
| { |
| "epoch": 91.46735617323853, |
| "grad_norm": 1.5214993953704834, |
| "learning_rate": 0.001, |
| "loss": 1.1773, |
| "step": 283000 |
| }, |
| { |
| "epoch": 91.49967679379444, |
| "grad_norm": 1.8038649559020996, |
| "learning_rate": 0.001, |
| "loss": 1.1941, |
| "step": 283100 |
| }, |
| { |
| "epoch": 91.53199741435036, |
| "grad_norm": 1.5943965911865234, |
| "learning_rate": 0.001, |
| "loss": 1.1726, |
| "step": 283200 |
| }, |
| { |
| "epoch": 91.56431803490626, |
| "grad_norm": 2.3694846630096436, |
| "learning_rate": 0.001, |
| "loss": 1.2078, |
| "step": 283300 |
| }, |
| { |
| "epoch": 91.59663865546219, |
| "grad_norm": 1.6778090000152588, |
| "learning_rate": 0.001, |
| "loss": 1.2191, |
| "step": 283400 |
| }, |
| { |
| "epoch": 91.6289592760181, |
| "grad_norm": 1.5933371782302856, |
| "learning_rate": 0.001, |
| "loss": 1.1945, |
| "step": 283500 |
| }, |
| { |
| "epoch": 91.66127989657402, |
| "grad_norm": 1.8515619039535522, |
| "learning_rate": 0.001, |
| "loss": 1.2358, |
| "step": 283600 |
| }, |
| { |
| "epoch": 91.69360051712992, |
| "grad_norm": 1.7395762205123901, |
| "learning_rate": 0.001, |
| "loss": 1.2278, |
| "step": 283700 |
| }, |
| { |
| "epoch": 91.72592113768584, |
| "grad_norm": 2.1333162784576416, |
| "learning_rate": 0.001, |
| "loss": 1.2081, |
| "step": 283800 |
| }, |
| { |
| "epoch": 91.75824175824175, |
| "grad_norm": 2.038357734680176, |
| "learning_rate": 0.001, |
| "loss": 1.2195, |
| "step": 283900 |
| }, |
| { |
| "epoch": 91.79056237879767, |
| "grad_norm": 1.819784164428711, |
| "learning_rate": 0.001, |
| "loss": 1.2284, |
| "step": 284000 |
| }, |
| { |
| "epoch": 91.82288299935358, |
| "grad_norm": 2.1269073486328125, |
| "learning_rate": 0.001, |
| "loss": 1.2231, |
| "step": 284100 |
| }, |
| { |
| "epoch": 91.8552036199095, |
| "grad_norm": 2.2647945880889893, |
| "learning_rate": 0.001, |
| "loss": 1.2115, |
| "step": 284200 |
| }, |
| { |
| "epoch": 91.88752424046541, |
| "grad_norm": 1.5195850133895874, |
| "learning_rate": 0.001, |
| "loss": 1.2324, |
| "step": 284300 |
| }, |
| { |
| "epoch": 91.91984486102133, |
| "grad_norm": 2.303753137588501, |
| "learning_rate": 0.001, |
| "loss": 1.2391, |
| "step": 284400 |
| }, |
| { |
| "epoch": 91.95216548157724, |
| "grad_norm": 1.922239899635315, |
| "learning_rate": 0.001, |
| "loss": 1.2234, |
| "step": 284500 |
| }, |
| { |
| "epoch": 91.98448610213316, |
| "grad_norm": 1.695598840713501, |
| "learning_rate": 0.001, |
| "loss": 1.2452, |
| "step": 284600 |
| }, |
| { |
| "epoch": 92.01680672268908, |
| "grad_norm": 1.7118265628814697, |
| "learning_rate": 0.001, |
| "loss": 1.1588, |
| "step": 284700 |
| }, |
| { |
| "epoch": 92.04912734324499, |
| "grad_norm": 1.8453078269958496, |
| "learning_rate": 0.001, |
| "loss": 1.1266, |
| "step": 284800 |
| }, |
| { |
| "epoch": 92.08144796380091, |
| "grad_norm": 2.1100013256073, |
| "learning_rate": 0.001, |
| "loss": 1.137, |
| "step": 284900 |
| }, |
| { |
| "epoch": 92.11376858435682, |
| "grad_norm": 1.8456408977508545, |
| "learning_rate": 0.001, |
| "loss": 1.1368, |
| "step": 285000 |
| }, |
| { |
| "epoch": 92.14608920491274, |
| "grad_norm": 1.7554715871810913, |
| "learning_rate": 0.001, |
| "loss": 1.1544, |
| "step": 285100 |
| }, |
| { |
| "epoch": 92.17840982546865, |
| "grad_norm": 1.618524432182312, |
| "learning_rate": 0.001, |
| "loss": 1.1593, |
| "step": 285200 |
| }, |
| { |
| "epoch": 92.21073044602457, |
| "grad_norm": 1.7603201866149902, |
| "learning_rate": 0.001, |
| "loss": 1.1633, |
| "step": 285300 |
| }, |
| { |
| "epoch": 92.24305106658048, |
| "grad_norm": 1.407418966293335, |
| "learning_rate": 0.001, |
| "loss": 1.153, |
| "step": 285400 |
| }, |
| { |
| "epoch": 92.2753716871364, |
| "grad_norm": 2.6295015811920166, |
| "learning_rate": 0.001, |
| "loss": 1.1683, |
| "step": 285500 |
| }, |
| { |
| "epoch": 92.3076923076923, |
| "grad_norm": 1.855170726776123, |
| "learning_rate": 0.001, |
| "loss": 1.1654, |
| "step": 285600 |
| }, |
| { |
| "epoch": 92.34001292824823, |
| "grad_norm": 3.7528069019317627, |
| "learning_rate": 0.001, |
| "loss": 1.1533, |
| "step": 285700 |
| }, |
| { |
| "epoch": 92.37233354880414, |
| "grad_norm": 2.2886791229248047, |
| "learning_rate": 0.001, |
| "loss": 1.1604, |
| "step": 285800 |
| }, |
| { |
| "epoch": 92.40465416936006, |
| "grad_norm": 2.4651310443878174, |
| "learning_rate": 0.001, |
| "loss": 1.1765, |
| "step": 285900 |
| }, |
| { |
| "epoch": 92.43697478991596, |
| "grad_norm": 2.6569302082061768, |
| "learning_rate": 0.001, |
| "loss": 1.184, |
| "step": 286000 |
| }, |
| { |
| "epoch": 92.46929541047189, |
| "grad_norm": 2.0062756538391113, |
| "learning_rate": 0.001, |
| "loss": 1.2011, |
| "step": 286100 |
| }, |
| { |
| "epoch": 92.5016160310278, |
| "grad_norm": 1.9018144607543945, |
| "learning_rate": 0.001, |
| "loss": 1.1914, |
| "step": 286200 |
| }, |
| { |
| "epoch": 92.53393665158372, |
| "grad_norm": 1.7802410125732422, |
| "learning_rate": 0.001, |
| "loss": 1.1921, |
| "step": 286300 |
| }, |
| { |
| "epoch": 92.56625727213962, |
| "grad_norm": 1.9713374376296997, |
| "learning_rate": 0.001, |
| "loss": 1.1795, |
| "step": 286400 |
| }, |
| { |
| "epoch": 92.59857789269554, |
| "grad_norm": 2.6365981101989746, |
| "learning_rate": 0.001, |
| "loss": 1.1892, |
| "step": 286500 |
| }, |
| { |
| "epoch": 92.63089851325145, |
| "grad_norm": 2.187910318374634, |
| "learning_rate": 0.001, |
| "loss": 1.2051, |
| "step": 286600 |
| }, |
| { |
| "epoch": 92.66321913380737, |
| "grad_norm": 2.039344072341919, |
| "learning_rate": 0.001, |
| "loss": 1.1984, |
| "step": 286700 |
| }, |
| { |
| "epoch": 92.69553975436328, |
| "grad_norm": 2.032949209213257, |
| "learning_rate": 0.001, |
| "loss": 1.2181, |
| "step": 286800 |
| }, |
| { |
| "epoch": 92.7278603749192, |
| "grad_norm": 2.804405689239502, |
| "learning_rate": 0.001, |
| "loss": 1.1987, |
| "step": 286900 |
| }, |
| { |
| "epoch": 92.76018099547511, |
| "grad_norm": 1.8193217515945435, |
| "learning_rate": 0.001, |
| "loss": 1.1995, |
| "step": 287000 |
| }, |
| { |
| "epoch": 92.79250161603103, |
| "grad_norm": 2.1374568939208984, |
| "learning_rate": 0.001, |
| "loss": 1.2147, |
| "step": 287100 |
| }, |
| { |
| "epoch": 92.82482223658694, |
| "grad_norm": 2.0475242137908936, |
| "learning_rate": 0.001, |
| "loss": 1.2137, |
| "step": 287200 |
| }, |
| { |
| "epoch": 92.85714285714286, |
| "grad_norm": 1.8530298471450806, |
| "learning_rate": 0.001, |
| "loss": 1.2203, |
| "step": 287300 |
| }, |
| { |
| "epoch": 92.88946347769877, |
| "grad_norm": 2.1449034214019775, |
| "learning_rate": 0.001, |
| "loss": 1.2357, |
| "step": 287400 |
| }, |
| { |
| "epoch": 92.92178409825469, |
| "grad_norm": 1.8257231712341309, |
| "learning_rate": 0.001, |
| "loss": 1.2428, |
| "step": 287500 |
| }, |
| { |
| "epoch": 92.9541047188106, |
| "grad_norm": 1.6015028953552246, |
| "learning_rate": 0.001, |
| "loss": 1.2322, |
| "step": 287600 |
| }, |
| { |
| "epoch": 92.98642533936652, |
| "grad_norm": 2.003861665725708, |
| "learning_rate": 0.001, |
| "loss": 1.2222, |
| "step": 287700 |
| }, |
| { |
| "epoch": 93.01874595992243, |
| "grad_norm": 1.8797523975372314, |
| "learning_rate": 0.001, |
| "loss": 1.1834, |
| "step": 287800 |
| }, |
| { |
| "epoch": 93.05106658047835, |
| "grad_norm": 1.5017789602279663, |
| "learning_rate": 0.001, |
| "loss": 1.1172, |
| "step": 287900 |
| }, |
| { |
| "epoch": 93.08338720103426, |
| "grad_norm": 1.3459205627441406, |
| "learning_rate": 0.001, |
| "loss": 1.1426, |
| "step": 288000 |
| }, |
| { |
| "epoch": 93.11570782159018, |
| "grad_norm": 1.5411944389343262, |
| "learning_rate": 0.001, |
| "loss": 1.1388, |
| "step": 288100 |
| }, |
| { |
| "epoch": 93.14802844214609, |
| "grad_norm": 2.0234344005584717, |
| "learning_rate": 0.001, |
| "loss": 1.1572, |
| "step": 288200 |
| }, |
| { |
| "epoch": 93.180349062702, |
| "grad_norm": 1.6879246234893799, |
| "learning_rate": 0.001, |
| "loss": 1.1414, |
| "step": 288300 |
| }, |
| { |
| "epoch": 93.21266968325791, |
| "grad_norm": 1.4526965618133545, |
| "learning_rate": 0.001, |
| "loss": 1.1476, |
| "step": 288400 |
| }, |
| { |
| "epoch": 93.24499030381384, |
| "grad_norm": 2.114469528198242, |
| "learning_rate": 0.001, |
| "loss": 1.1461, |
| "step": 288500 |
| }, |
| { |
| "epoch": 93.27731092436974, |
| "grad_norm": 2.7266154289245605, |
| "learning_rate": 0.001, |
| "loss": 1.1739, |
| "step": 288600 |
| }, |
| { |
| "epoch": 93.30963154492567, |
| "grad_norm": 2.0473294258117676, |
| "learning_rate": 0.001, |
| "loss": 1.1502, |
| "step": 288700 |
| }, |
| { |
| "epoch": 93.34195216548157, |
| "grad_norm": 1.4588580131530762, |
| "learning_rate": 0.001, |
| "loss": 1.1757, |
| "step": 288800 |
| }, |
| { |
| "epoch": 93.3742727860375, |
| "grad_norm": 1.7957432270050049, |
| "learning_rate": 0.001, |
| "loss": 1.1789, |
| "step": 288900 |
| }, |
| { |
| "epoch": 93.4065934065934, |
| "grad_norm": 1.9788367748260498, |
| "learning_rate": 0.001, |
| "loss": 1.1766, |
| "step": 289000 |
| }, |
| { |
| "epoch": 93.43891402714932, |
| "grad_norm": 1.7873634099960327, |
| "learning_rate": 0.001, |
| "loss": 1.1648, |
| "step": 289100 |
| }, |
| { |
| "epoch": 93.47123464770523, |
| "grad_norm": 1.6952708959579468, |
| "learning_rate": 0.001, |
| "loss": 1.1728, |
| "step": 289200 |
| }, |
| { |
| "epoch": 93.50355526826115, |
| "grad_norm": 1.3903928995132446, |
| "learning_rate": 0.001, |
| "loss": 1.1731, |
| "step": 289300 |
| }, |
| { |
| "epoch": 93.53587588881706, |
| "grad_norm": 1.5076650381088257, |
| "learning_rate": 0.001, |
| "loss": 1.185, |
| "step": 289400 |
| }, |
| { |
| "epoch": 93.56819650937298, |
| "grad_norm": 2.4555656909942627, |
| "learning_rate": 0.001, |
| "loss": 1.1829, |
| "step": 289500 |
| }, |
| { |
| "epoch": 93.60051712992889, |
| "grad_norm": 2.349330425262451, |
| "learning_rate": 0.001, |
| "loss": 1.1894, |
| "step": 289600 |
| }, |
| { |
| "epoch": 93.63283775048481, |
| "grad_norm": 1.7061920166015625, |
| "learning_rate": 0.001, |
| "loss": 1.1822, |
| "step": 289700 |
| }, |
| { |
| "epoch": 93.66515837104072, |
| "grad_norm": 1.7948163747787476, |
| "learning_rate": 0.001, |
| "loss": 1.1854, |
| "step": 289800 |
| }, |
| { |
| "epoch": 93.69747899159664, |
| "grad_norm": 1.5287566184997559, |
| "learning_rate": 0.001, |
| "loss": 1.1888, |
| "step": 289900 |
| }, |
| { |
| "epoch": 93.72979961215255, |
| "grad_norm": 1.590301513671875, |
| "learning_rate": 0.001, |
| "loss": 1.2022, |
| "step": 290000 |
| }, |
| { |
| "epoch": 93.76212023270847, |
| "grad_norm": 1.5148197412490845, |
| "learning_rate": 0.001, |
| "loss": 1.2101, |
| "step": 290100 |
| }, |
| { |
| "epoch": 93.79444085326438, |
| "grad_norm": 1.9384305477142334, |
| "learning_rate": 0.001, |
| "loss": 1.2059, |
| "step": 290200 |
| }, |
| { |
| "epoch": 93.8267614738203, |
| "grad_norm": 1.7551034688949585, |
| "learning_rate": 0.001, |
| "loss": 1.2084, |
| "step": 290300 |
| }, |
| { |
| "epoch": 93.8590820943762, |
| "grad_norm": 1.8605059385299683, |
| "learning_rate": 0.001, |
| "loss": 1.2075, |
| "step": 290400 |
| }, |
| { |
| "epoch": 93.89140271493213, |
| "grad_norm": 1.7120189666748047, |
| "learning_rate": 0.001, |
| "loss": 1.2183, |
| "step": 290500 |
| }, |
| { |
| "epoch": 93.92372333548803, |
| "grad_norm": 1.8584399223327637, |
| "learning_rate": 0.001, |
| "loss": 1.2287, |
| "step": 290600 |
| }, |
| { |
| "epoch": 93.95604395604396, |
| "grad_norm": 1.8144450187683105, |
| "learning_rate": 0.001, |
| "loss": 1.2198, |
| "step": 290700 |
| }, |
| { |
| "epoch": 93.98836457659988, |
| "grad_norm": 2.061105966567993, |
| "learning_rate": 0.001, |
| "loss": 1.2181, |
| "step": 290800 |
| }, |
| { |
| "epoch": 94.02068519715579, |
| "grad_norm": 2.702474594116211, |
| "learning_rate": 0.001, |
| "loss": 1.1685, |
| "step": 290900 |
| }, |
| { |
| "epoch": 94.0530058177117, |
| "grad_norm": 1.64632248878479, |
| "learning_rate": 0.001, |
| "loss": 1.1171, |
| "step": 291000 |
| }, |
| { |
| "epoch": 94.08532643826761, |
| "grad_norm": 2.041278839111328, |
| "learning_rate": 0.001, |
| "loss": 1.1365, |
| "step": 291100 |
| }, |
| { |
| "epoch": 94.11764705882354, |
| "grad_norm": 1.4763818979263306, |
| "learning_rate": 0.001, |
| "loss": 1.1433, |
| "step": 291200 |
| }, |
| { |
| "epoch": 94.14996767937944, |
| "grad_norm": 1.36812424659729, |
| "learning_rate": 0.001, |
| "loss": 1.118, |
| "step": 291300 |
| }, |
| { |
| "epoch": 94.18228829993537, |
| "grad_norm": 1.6154983043670654, |
| "learning_rate": 0.001, |
| "loss": 1.1485, |
| "step": 291400 |
| }, |
| { |
| "epoch": 94.21460892049127, |
| "grad_norm": 1.8375099897384644, |
| "learning_rate": 0.001, |
| "loss": 1.1424, |
| "step": 291500 |
| }, |
| { |
| "epoch": 94.2469295410472, |
| "grad_norm": 1.8397036790847778, |
| "learning_rate": 0.001, |
| "loss": 1.1528, |
| "step": 291600 |
| }, |
| { |
| "epoch": 94.2792501616031, |
| "grad_norm": 1.466280460357666, |
| "learning_rate": 0.001, |
| "loss": 1.1516, |
| "step": 291700 |
| }, |
| { |
| "epoch": 94.31157078215902, |
| "grad_norm": 2.9650964736938477, |
| "learning_rate": 0.001, |
| "loss": 1.1675, |
| "step": 291800 |
| }, |
| { |
| "epoch": 94.34389140271493, |
| "grad_norm": 2.0993025302886963, |
| "learning_rate": 0.001, |
| "loss": 1.1409, |
| "step": 291900 |
| }, |
| { |
| "epoch": 94.37621202327085, |
| "grad_norm": 1.6603684425354004, |
| "learning_rate": 0.001, |
| "loss": 1.168, |
| "step": 292000 |
| }, |
| { |
| "epoch": 94.40853264382676, |
| "grad_norm": 1.7989704608917236, |
| "learning_rate": 0.001, |
| "loss": 1.1621, |
| "step": 292100 |
| }, |
| { |
| "epoch": 94.44085326438268, |
| "grad_norm": 1.5147311687469482, |
| "learning_rate": 0.001, |
| "loss": 1.1662, |
| "step": 292200 |
| }, |
| { |
| "epoch": 94.47317388493859, |
| "grad_norm": 1.8816263675689697, |
| "learning_rate": 0.001, |
| "loss": 1.1662, |
| "step": 292300 |
| }, |
| { |
| "epoch": 94.50549450549451, |
| "grad_norm": 1.3142783641815186, |
| "learning_rate": 0.001, |
| "loss": 1.1803, |
| "step": 292400 |
| }, |
| { |
| "epoch": 94.53781512605042, |
| "grad_norm": 2.246133327484131, |
| "learning_rate": 0.001, |
| "loss": 1.1873, |
| "step": 292500 |
| }, |
| { |
| "epoch": 94.57013574660634, |
| "grad_norm": 1.5358392000198364, |
| "learning_rate": 0.001, |
| "loss": 1.1777, |
| "step": 292600 |
| }, |
| { |
| "epoch": 94.60245636716225, |
| "grad_norm": 1.275778889656067, |
| "learning_rate": 0.001, |
| "loss": 1.172, |
| "step": 292700 |
| }, |
| { |
| "epoch": 94.63477698771817, |
| "grad_norm": 1.631397008895874, |
| "learning_rate": 0.001, |
| "loss": 1.1943, |
| "step": 292800 |
| }, |
| { |
| "epoch": 94.66709760827408, |
| "grad_norm": 1.462730884552002, |
| "learning_rate": 0.001, |
| "loss": 1.1937, |
| "step": 292900 |
| }, |
| { |
| "epoch": 94.69941822883, |
| "grad_norm": 2.0201778411865234, |
| "learning_rate": 0.001, |
| "loss": 1.2112, |
| "step": 293000 |
| }, |
| { |
| "epoch": 94.7317388493859, |
| "grad_norm": 1.8298077583312988, |
| "learning_rate": 0.001, |
| "loss": 1.2012, |
| "step": 293100 |
| }, |
| { |
| "epoch": 94.76405946994183, |
| "grad_norm": 1.727700114250183, |
| "learning_rate": 0.001, |
| "loss": 1.1993, |
| "step": 293200 |
| }, |
| { |
| "epoch": 94.79638009049773, |
| "grad_norm": 1.6637187004089355, |
| "learning_rate": 0.001, |
| "loss": 1.1984, |
| "step": 293300 |
| }, |
| { |
| "epoch": 94.82870071105366, |
| "grad_norm": 1.911285400390625, |
| "learning_rate": 0.001, |
| "loss": 1.2174, |
| "step": 293400 |
| }, |
| { |
| "epoch": 94.86102133160956, |
| "grad_norm": 1.6867454051971436, |
| "learning_rate": 0.001, |
| "loss": 1.2025, |
| "step": 293500 |
| }, |
| { |
| "epoch": 94.89334195216549, |
| "grad_norm": 1.5231786966323853, |
| "learning_rate": 0.001, |
| "loss": 1.2163, |
| "step": 293600 |
| }, |
| { |
| "epoch": 94.9256625727214, |
| "grad_norm": 2.033961296081543, |
| "learning_rate": 0.001, |
| "loss": 1.2061, |
| "step": 293700 |
| }, |
| { |
| "epoch": 94.95798319327731, |
| "grad_norm": 2.617823362350464, |
| "learning_rate": 0.001, |
| "loss": 1.2122, |
| "step": 293800 |
| }, |
| { |
| "epoch": 94.99030381383322, |
| "grad_norm": 1.7739336490631104, |
| "learning_rate": 0.001, |
| "loss": 1.2211, |
| "step": 293900 |
| }, |
| { |
| "epoch": 95.02262443438914, |
| "grad_norm": 2.470210552215576, |
| "learning_rate": 0.001, |
| "loss": 1.1605, |
| "step": 294000 |
| }, |
| { |
| "epoch": 95.05494505494505, |
| "grad_norm": 1.1751980781555176, |
| "learning_rate": 0.001, |
| "loss": 1.1296, |
| "step": 294100 |
| }, |
| { |
| "epoch": 95.08726567550097, |
| "grad_norm": 1.6907886266708374, |
| "learning_rate": 0.001, |
| "loss": 1.1378, |
| "step": 294200 |
| }, |
| { |
| "epoch": 95.11958629605688, |
| "grad_norm": 2.201791524887085, |
| "learning_rate": 0.001, |
| "loss": 1.1211, |
| "step": 294300 |
| }, |
| { |
| "epoch": 95.1519069166128, |
| "grad_norm": 1.7307604551315308, |
| "learning_rate": 0.001, |
| "loss": 1.1174, |
| "step": 294400 |
| }, |
| { |
| "epoch": 95.18422753716871, |
| "grad_norm": 1.2891641855239868, |
| "learning_rate": 0.001, |
| "loss": 1.1371, |
| "step": 294500 |
| }, |
| { |
| "epoch": 95.21654815772463, |
| "grad_norm": 1.5916978120803833, |
| "learning_rate": 0.001, |
| "loss": 1.1486, |
| "step": 294600 |
| }, |
| { |
| "epoch": 95.24886877828054, |
| "grad_norm": 1.6448651552200317, |
| "learning_rate": 0.001, |
| "loss": 1.143, |
| "step": 294700 |
| }, |
| { |
| "epoch": 95.28118939883646, |
| "grad_norm": 2.0232772827148438, |
| "learning_rate": 0.001, |
| "loss": 1.1474, |
| "step": 294800 |
| }, |
| { |
| "epoch": 95.31351001939237, |
| "grad_norm": 1.5678558349609375, |
| "learning_rate": 0.001, |
| "loss": 1.1452, |
| "step": 294900 |
| }, |
| { |
| "epoch": 95.34583063994829, |
| "grad_norm": 2.3138344287872314, |
| "learning_rate": 0.001, |
| "loss": 1.1451, |
| "step": 295000 |
| }, |
| { |
| "epoch": 95.3781512605042, |
| "grad_norm": 1.411215901374817, |
| "learning_rate": 0.001, |
| "loss": 1.1632, |
| "step": 295100 |
| }, |
| { |
| "epoch": 95.41047188106012, |
| "grad_norm": 1.6272342205047607, |
| "learning_rate": 0.001, |
| "loss": 1.1544, |
| "step": 295200 |
| }, |
| { |
| "epoch": 95.44279250161603, |
| "grad_norm": 2.5069704055786133, |
| "learning_rate": 0.001, |
| "loss": 1.1573, |
| "step": 295300 |
| }, |
| { |
| "epoch": 95.47511312217195, |
| "grad_norm": 1.6448183059692383, |
| "learning_rate": 0.001, |
| "loss": 1.167, |
| "step": 295400 |
| }, |
| { |
| "epoch": 95.50743374272786, |
| "grad_norm": 1.4927773475646973, |
| "learning_rate": 0.001, |
| "loss": 1.1704, |
| "step": 295500 |
| }, |
| { |
| "epoch": 95.53975436328378, |
| "grad_norm": 1.57712984085083, |
| "learning_rate": 0.001, |
| "loss": 1.1789, |
| "step": 295600 |
| }, |
| { |
| "epoch": 95.57207498383968, |
| "grad_norm": 1.4393914937973022, |
| "learning_rate": 0.001, |
| "loss": 1.1751, |
| "step": 295700 |
| }, |
| { |
| "epoch": 95.6043956043956, |
| "grad_norm": 1.8276870250701904, |
| "learning_rate": 0.001, |
| "loss": 1.1739, |
| "step": 295800 |
| }, |
| { |
| "epoch": 95.63671622495151, |
| "grad_norm": 1.1675989627838135, |
| "learning_rate": 0.001, |
| "loss": 1.1823, |
| "step": 295900 |
| }, |
| { |
| "epoch": 95.66903684550743, |
| "grad_norm": 1.2009097337722778, |
| "learning_rate": 0.001, |
| "loss": 1.1933, |
| "step": 296000 |
| }, |
| { |
| "epoch": 95.70135746606334, |
| "grad_norm": 1.5777475833892822, |
| "learning_rate": 0.001, |
| "loss": 1.1854, |
| "step": 296100 |
| }, |
| { |
| "epoch": 95.73367808661926, |
| "grad_norm": 1.2625652551651, |
| "learning_rate": 0.001, |
| "loss": 1.1898, |
| "step": 296200 |
| }, |
| { |
| "epoch": 95.76599870717517, |
| "grad_norm": 1.355389952659607, |
| "learning_rate": 0.001, |
| "loss": 1.1894, |
| "step": 296300 |
| }, |
| { |
| "epoch": 95.7983193277311, |
| "grad_norm": 1.6914863586425781, |
| "learning_rate": 0.001, |
| "loss": 1.2022, |
| "step": 296400 |
| }, |
| { |
| "epoch": 95.830639948287, |
| "grad_norm": 1.6860562562942505, |
| "learning_rate": 0.001, |
| "loss": 1.2021, |
| "step": 296500 |
| }, |
| { |
| "epoch": 95.86296056884292, |
| "grad_norm": 1.7306995391845703, |
| "learning_rate": 0.001, |
| "loss": 1.2158, |
| "step": 296600 |
| }, |
| { |
| "epoch": 95.89528118939883, |
| "grad_norm": 1.8452569246292114, |
| "learning_rate": 0.001, |
| "loss": 1.1822, |
| "step": 296700 |
| }, |
| { |
| "epoch": 95.92760180995475, |
| "grad_norm": 1.5447825193405151, |
| "learning_rate": 0.001, |
| "loss": 1.1996, |
| "step": 296800 |
| }, |
| { |
| "epoch": 95.95992243051066, |
| "grad_norm": 2.2806003093719482, |
| "learning_rate": 0.001, |
| "loss": 1.2088, |
| "step": 296900 |
| }, |
| { |
| "epoch": 95.99224305106658, |
| "grad_norm": 1.7977348566055298, |
| "learning_rate": 0.001, |
| "loss": 1.221, |
| "step": 297000 |
| }, |
| { |
| "epoch": 96.0245636716225, |
| "grad_norm": 1.860256314277649, |
| "learning_rate": 0.001, |
| "loss": 1.1416, |
| "step": 297100 |
| }, |
| { |
| "epoch": 96.05688429217841, |
| "grad_norm": 1.2851029634475708, |
| "learning_rate": 0.001, |
| "loss": 1.1051, |
| "step": 297200 |
| }, |
| { |
| "epoch": 96.08920491273433, |
| "grad_norm": 2.5045924186706543, |
| "learning_rate": 0.001, |
| "loss": 1.1224, |
| "step": 297300 |
| }, |
| { |
| "epoch": 96.12152553329024, |
| "grad_norm": 1.9603523015975952, |
| "learning_rate": 0.001, |
| "loss": 1.1228, |
| "step": 297400 |
| }, |
| { |
| "epoch": 96.15384615384616, |
| "grad_norm": 1.8318489789962769, |
| "learning_rate": 0.001, |
| "loss": 1.1256, |
| "step": 297500 |
| }, |
| { |
| "epoch": 96.18616677440207, |
| "grad_norm": 1.1082842350006104, |
| "learning_rate": 0.001, |
| "loss": 1.142, |
| "step": 297600 |
| }, |
| { |
| "epoch": 96.21848739495799, |
| "grad_norm": 2.170994758605957, |
| "learning_rate": 0.001, |
| "loss": 1.1279, |
| "step": 297700 |
| }, |
| { |
| "epoch": 96.2508080155139, |
| "grad_norm": 1.8286175727844238, |
| "learning_rate": 0.001, |
| "loss": 1.1416, |
| "step": 297800 |
| }, |
| { |
| "epoch": 96.28312863606982, |
| "grad_norm": 1.4053666591644287, |
| "learning_rate": 0.001, |
| "loss": 1.1379, |
| "step": 297900 |
| }, |
| { |
| "epoch": 96.31544925662573, |
| "grad_norm": 1.911604404449463, |
| "learning_rate": 0.001, |
| "loss": 1.1593, |
| "step": 298000 |
| }, |
| { |
| "epoch": 96.34776987718165, |
| "grad_norm": 1.3394492864608765, |
| "learning_rate": 0.001, |
| "loss": 1.1416, |
| "step": 298100 |
| }, |
| { |
| "epoch": 96.38009049773756, |
| "grad_norm": 1.5122565031051636, |
| "learning_rate": 0.001, |
| "loss": 1.1487, |
| "step": 298200 |
| }, |
| { |
| "epoch": 96.41241111829348, |
| "grad_norm": 2.1100003719329834, |
| "learning_rate": 0.001, |
| "loss": 1.1512, |
| "step": 298300 |
| }, |
| { |
| "epoch": 96.44473173884938, |
| "grad_norm": 1.941153883934021, |
| "learning_rate": 0.001, |
| "loss": 1.1693, |
| "step": 298400 |
| }, |
| { |
| "epoch": 96.4770523594053, |
| "grad_norm": 1.6588879823684692, |
| "learning_rate": 0.001, |
| "loss": 1.1611, |
| "step": 298500 |
| }, |
| { |
| "epoch": 96.50937297996121, |
| "grad_norm": 1.2245738506317139, |
| "learning_rate": 0.001, |
| "loss": 1.1614, |
| "step": 298600 |
| }, |
| { |
| "epoch": 96.54169360051714, |
| "grad_norm": 1.5878503322601318, |
| "learning_rate": 0.001, |
| "loss": 1.1777, |
| "step": 298700 |
| }, |
| { |
| "epoch": 96.57401422107304, |
| "grad_norm": 1.4396772384643555, |
| "learning_rate": 0.001, |
| "loss": 1.157, |
| "step": 298800 |
| }, |
| { |
| "epoch": 96.60633484162896, |
| "grad_norm": 1.590366244316101, |
| "learning_rate": 0.001, |
| "loss": 1.1681, |
| "step": 298900 |
| }, |
| { |
| "epoch": 96.63865546218487, |
| "grad_norm": 1.252959966659546, |
| "learning_rate": 0.001, |
| "loss": 1.1765, |
| "step": 299000 |
| }, |
| { |
| "epoch": 96.6709760827408, |
| "grad_norm": 2.057255983352661, |
| "learning_rate": 0.001, |
| "loss": 1.1884, |
| "step": 299100 |
| }, |
| { |
| "epoch": 96.7032967032967, |
| "grad_norm": 1.8908469676971436, |
| "learning_rate": 0.001, |
| "loss": 1.1843, |
| "step": 299200 |
| }, |
| { |
| "epoch": 96.73561732385262, |
| "grad_norm": 1.616100549697876, |
| "learning_rate": 0.001, |
| "loss": 1.1835, |
| "step": 299300 |
| }, |
| { |
| "epoch": 96.76793794440853, |
| "grad_norm": 1.4547150135040283, |
| "learning_rate": 0.001, |
| "loss": 1.2027, |
| "step": 299400 |
| }, |
| { |
| "epoch": 96.80025856496445, |
| "grad_norm": 1.4257676601409912, |
| "learning_rate": 0.001, |
| "loss": 1.1795, |
| "step": 299500 |
| }, |
| { |
| "epoch": 96.83257918552036, |
| "grad_norm": 1.3480945825576782, |
| "learning_rate": 0.001, |
| "loss": 1.2027, |
| "step": 299600 |
| }, |
| { |
| "epoch": 96.86489980607628, |
| "grad_norm": 1.460683822631836, |
| "learning_rate": 0.001, |
| "loss": 1.1955, |
| "step": 299700 |
| }, |
| { |
| "epoch": 96.89722042663219, |
| "grad_norm": 2.127368450164795, |
| "learning_rate": 0.001, |
| "loss": 1.1796, |
| "step": 299800 |
| }, |
| { |
| "epoch": 96.92954104718811, |
| "grad_norm": 2.2763729095458984, |
| "learning_rate": 0.001, |
| "loss": 1.2024, |
| "step": 299900 |
| }, |
| { |
| "epoch": 96.96186166774402, |
| "grad_norm": 1.617999792098999, |
| "learning_rate": 0.001, |
| "loss": 1.2059, |
| "step": 300000 |
| }, |
| { |
| "epoch": 96.99418228829994, |
| "grad_norm": 1.829553246498108, |
| "learning_rate": 0.001, |
| "loss": 1.1947, |
| "step": 300100 |
| }, |
| { |
| "epoch": 97.02650290885585, |
| "grad_norm": 1.306296944618225, |
| "learning_rate": 0.001, |
| "loss": 1.1321, |
| "step": 300200 |
| }, |
| { |
| "epoch": 97.05882352941177, |
| "grad_norm": 1.4006305932998657, |
| "learning_rate": 0.001, |
| "loss": 1.1103, |
| "step": 300300 |
| }, |
| { |
| "epoch": 97.09114414996768, |
| "grad_norm": 1.3456957340240479, |
| "learning_rate": 0.001, |
| "loss": 1.1028, |
| "step": 300400 |
| }, |
| { |
| "epoch": 97.1234647705236, |
| "grad_norm": 1.495202898979187, |
| "learning_rate": 0.001, |
| "loss": 1.099, |
| "step": 300500 |
| }, |
| { |
| "epoch": 97.1557853910795, |
| "grad_norm": 1.624022126197815, |
| "learning_rate": 0.001, |
| "loss": 1.1155, |
| "step": 300600 |
| }, |
| { |
| "epoch": 97.18810601163543, |
| "grad_norm": 1.707629680633545, |
| "learning_rate": 0.001, |
| "loss": 1.1294, |
| "step": 300700 |
| }, |
| { |
| "epoch": 97.22042663219133, |
| "grad_norm": 1.6954739093780518, |
| "learning_rate": 0.001, |
| "loss": 1.1397, |
| "step": 300800 |
| }, |
| { |
| "epoch": 97.25274725274726, |
| "grad_norm": 1.5452526807785034, |
| "learning_rate": 0.001, |
| "loss": 1.1212, |
| "step": 300900 |
| }, |
| { |
| "epoch": 97.28506787330316, |
| "grad_norm": 2.1428868770599365, |
| "learning_rate": 0.001, |
| "loss": 1.1411, |
| "step": 301000 |
| }, |
| { |
| "epoch": 97.31738849385908, |
| "grad_norm": 1.6986525058746338, |
| "learning_rate": 0.001, |
| "loss": 1.1356, |
| "step": 301100 |
| }, |
| { |
| "epoch": 97.34970911441499, |
| "grad_norm": 1.5793352127075195, |
| "learning_rate": 0.001, |
| "loss": 1.1409, |
| "step": 301200 |
| }, |
| { |
| "epoch": 97.38202973497091, |
| "grad_norm": 1.5635677576065063, |
| "learning_rate": 0.001, |
| "loss": 1.1426, |
| "step": 301300 |
| }, |
| { |
| "epoch": 97.41435035552682, |
| "grad_norm": 1.561034917831421, |
| "learning_rate": 0.001, |
| "loss": 1.1528, |
| "step": 301400 |
| }, |
| { |
| "epoch": 97.44667097608274, |
| "grad_norm": 2.120241403579712, |
| "learning_rate": 0.001, |
| "loss": 1.162, |
| "step": 301500 |
| }, |
| { |
| "epoch": 97.47899159663865, |
| "grad_norm": 2.110903024673462, |
| "learning_rate": 0.001, |
| "loss": 1.1612, |
| "step": 301600 |
| }, |
| { |
| "epoch": 97.51131221719457, |
| "grad_norm": 1.827684998512268, |
| "learning_rate": 0.001, |
| "loss": 1.1689, |
| "step": 301700 |
| }, |
| { |
| "epoch": 97.54363283775048, |
| "grad_norm": 1.3731664419174194, |
| "learning_rate": 0.001, |
| "loss": 1.154, |
| "step": 301800 |
| }, |
| { |
| "epoch": 97.5759534583064, |
| "grad_norm": 1.240972876548767, |
| "learning_rate": 0.001, |
| "loss": 1.1662, |
| "step": 301900 |
| }, |
| { |
| "epoch": 97.60827407886231, |
| "grad_norm": 1.9408973455429077, |
| "learning_rate": 0.001, |
| "loss": 1.1606, |
| "step": 302000 |
| }, |
| { |
| "epoch": 97.64059469941823, |
| "grad_norm": 1.7482811212539673, |
| "learning_rate": 0.001, |
| "loss": 1.1665, |
| "step": 302100 |
| }, |
| { |
| "epoch": 97.67291531997414, |
| "grad_norm": 2.163902997970581, |
| "learning_rate": 0.001, |
| "loss": 1.1741, |
| "step": 302200 |
| }, |
| { |
| "epoch": 97.70523594053006, |
| "grad_norm": 1.2171028852462769, |
| "learning_rate": 0.001, |
| "loss": 1.1902, |
| "step": 302300 |
| }, |
| { |
| "epoch": 97.73755656108597, |
| "grad_norm": 1.5814447402954102, |
| "learning_rate": 0.001, |
| "loss": 1.1836, |
| "step": 302400 |
| }, |
| { |
| "epoch": 97.76987718164189, |
| "grad_norm": 1.408679723739624, |
| "learning_rate": 0.001, |
| "loss": 1.184, |
| "step": 302500 |
| }, |
| { |
| "epoch": 97.8021978021978, |
| "grad_norm": 1.645927906036377, |
| "learning_rate": 0.001, |
| "loss": 1.1835, |
| "step": 302600 |
| }, |
| { |
| "epoch": 97.83451842275372, |
| "grad_norm": 1.3949730396270752, |
| "learning_rate": 0.001, |
| "loss": 1.1798, |
| "step": 302700 |
| }, |
| { |
| "epoch": 97.86683904330962, |
| "grad_norm": 1.6577595472335815, |
| "learning_rate": 0.001, |
| "loss": 1.1858, |
| "step": 302800 |
| }, |
| { |
| "epoch": 97.89915966386555, |
| "grad_norm": 1.934249997138977, |
| "learning_rate": 0.001, |
| "loss": 1.2028, |
| "step": 302900 |
| }, |
| { |
| "epoch": 97.93148028442145, |
| "grad_norm": 1.6780946254730225, |
| "learning_rate": 0.001, |
| "loss": 1.2043, |
| "step": 303000 |
| }, |
| { |
| "epoch": 97.96380090497738, |
| "grad_norm": 1.8218052387237549, |
| "learning_rate": 0.001, |
| "loss": 1.1921, |
| "step": 303100 |
| }, |
| { |
| "epoch": 97.99612152553328, |
| "grad_norm": 2.1109933853149414, |
| "learning_rate": 0.001, |
| "loss": 1.1976, |
| "step": 303200 |
| }, |
| { |
| "epoch": 98.0284421460892, |
| "grad_norm": 1.517188310623169, |
| "learning_rate": 0.001, |
| "loss": 1.119, |
| "step": 303300 |
| }, |
| { |
| "epoch": 98.06076276664513, |
| "grad_norm": 2.3147544860839844, |
| "learning_rate": 0.001, |
| "loss": 1.0996, |
| "step": 303400 |
| }, |
| { |
| "epoch": 98.09308338720103, |
| "grad_norm": 1.553000807762146, |
| "learning_rate": 0.001, |
| "loss": 1.0983, |
| "step": 303500 |
| }, |
| { |
| "epoch": 98.12540400775696, |
| "grad_norm": 1.4100111722946167, |
| "learning_rate": 0.001, |
| "loss": 1.1109, |
| "step": 303600 |
| }, |
| { |
| "epoch": 98.15772462831286, |
| "grad_norm": 2.305436849594116, |
| "learning_rate": 0.001, |
| "loss": 1.1085, |
| "step": 303700 |
| }, |
| { |
| "epoch": 98.19004524886878, |
| "grad_norm": 1.940406322479248, |
| "learning_rate": 0.001, |
| "loss": 1.122, |
| "step": 303800 |
| }, |
| { |
| "epoch": 98.22236586942469, |
| "grad_norm": 1.3003724813461304, |
| "learning_rate": 0.001, |
| "loss": 1.117, |
| "step": 303900 |
| }, |
| { |
| "epoch": 98.25468648998061, |
| "grad_norm": 1.3235238790512085, |
| "learning_rate": 0.001, |
| "loss": 1.1067, |
| "step": 304000 |
| }, |
| { |
| "epoch": 98.28700711053652, |
| "grad_norm": 1.5034582614898682, |
| "learning_rate": 0.001, |
| "loss": 1.1329, |
| "step": 304100 |
| }, |
| { |
| "epoch": 98.31932773109244, |
| "grad_norm": 1.7820682525634766, |
| "learning_rate": 0.001, |
| "loss": 1.1382, |
| "step": 304200 |
| }, |
| { |
| "epoch": 98.35164835164835, |
| "grad_norm": 1.7481606006622314, |
| "learning_rate": 0.001, |
| "loss": 1.1483, |
| "step": 304300 |
| }, |
| { |
| "epoch": 98.38396897220427, |
| "grad_norm": 1.8273428678512573, |
| "learning_rate": 0.001, |
| "loss": 1.1556, |
| "step": 304400 |
| }, |
| { |
| "epoch": 98.41628959276018, |
| "grad_norm": 1.975685715675354, |
| "learning_rate": 0.001, |
| "loss": 1.1473, |
| "step": 304500 |
| }, |
| { |
| "epoch": 98.4486102133161, |
| "grad_norm": 2.329911231994629, |
| "learning_rate": 0.001, |
| "loss": 1.1651, |
| "step": 304600 |
| }, |
| { |
| "epoch": 98.48093083387201, |
| "grad_norm": 2.8156585693359375, |
| "learning_rate": 0.001, |
| "loss": 1.1491, |
| "step": 304700 |
| }, |
| { |
| "epoch": 98.51325145442793, |
| "grad_norm": 2.167443037033081, |
| "learning_rate": 0.001, |
| "loss": 1.1701, |
| "step": 304800 |
| }, |
| { |
| "epoch": 98.54557207498384, |
| "grad_norm": 2.023167848587036, |
| "learning_rate": 0.001, |
| "loss": 1.1603, |
| "step": 304900 |
| }, |
| { |
| "epoch": 98.57789269553976, |
| "grad_norm": 2.202585458755493, |
| "learning_rate": 0.001, |
| "loss": 1.1602, |
| "step": 305000 |
| }, |
| { |
| "epoch": 98.61021331609567, |
| "grad_norm": 2.8777387142181396, |
| "learning_rate": 0.001, |
| "loss": 1.1638, |
| "step": 305100 |
| }, |
| { |
| "epoch": 98.64253393665159, |
| "grad_norm": 1.5604068040847778, |
| "learning_rate": 0.001, |
| "loss": 1.1625, |
| "step": 305200 |
| }, |
| { |
| "epoch": 98.6748545572075, |
| "grad_norm": 1.617970585823059, |
| "learning_rate": 0.001, |
| "loss": 1.1686, |
| "step": 305300 |
| }, |
| { |
| "epoch": 98.70717517776342, |
| "grad_norm": 1.7877370119094849, |
| "learning_rate": 0.001, |
| "loss": 1.1626, |
| "step": 305400 |
| }, |
| { |
| "epoch": 98.73949579831933, |
| "grad_norm": 2.0654261112213135, |
| "learning_rate": 0.001, |
| "loss": 1.1779, |
| "step": 305500 |
| }, |
| { |
| "epoch": 98.77181641887525, |
| "grad_norm": 2.0315310955047607, |
| "learning_rate": 0.001, |
| "loss": 1.1987, |
| "step": 305600 |
| }, |
| { |
| "epoch": 98.80413703943115, |
| "grad_norm": 1.6771689653396606, |
| "learning_rate": 0.001, |
| "loss": 1.1664, |
| "step": 305700 |
| }, |
| { |
| "epoch": 98.83645765998708, |
| "grad_norm": 1.3909519910812378, |
| "learning_rate": 0.001, |
| "loss": 1.1754, |
| "step": 305800 |
| }, |
| { |
| "epoch": 98.86877828054298, |
| "grad_norm": 2.052640914916992, |
| "learning_rate": 0.001, |
| "loss": 1.1754, |
| "step": 305900 |
| }, |
| { |
| "epoch": 98.9010989010989, |
| "grad_norm": 1.5628565549850464, |
| "learning_rate": 0.001, |
| "loss": 1.1769, |
| "step": 306000 |
| }, |
| { |
| "epoch": 98.93341952165481, |
| "grad_norm": 1.619795322418213, |
| "learning_rate": 0.001, |
| "loss": 1.1961, |
| "step": 306100 |
| }, |
| { |
| "epoch": 98.96574014221073, |
| "grad_norm": 2.042397975921631, |
| "learning_rate": 0.001, |
| "loss": 1.1972, |
| "step": 306200 |
| }, |
| { |
| "epoch": 98.99806076276664, |
| "grad_norm": 1.8413684368133545, |
| "learning_rate": 0.001, |
| "loss": 1.1885, |
| "step": 306300 |
| }, |
| { |
| "epoch": 99.03038138332256, |
| "grad_norm": 2.001903772354126, |
| "learning_rate": 0.001, |
| "loss": 1.1038, |
| "step": 306400 |
| }, |
| { |
| "epoch": 99.06270200387847, |
| "grad_norm": 1.5408207178115845, |
| "learning_rate": 0.001, |
| "loss": 1.0963, |
| "step": 306500 |
| }, |
| { |
| "epoch": 99.09502262443439, |
| "grad_norm": 1.8227003812789917, |
| "learning_rate": 0.001, |
| "loss": 1.0963, |
| "step": 306600 |
| }, |
| { |
| "epoch": 99.1273432449903, |
| "grad_norm": 1.7955845594406128, |
| "learning_rate": 0.001, |
| "loss": 1.0903, |
| "step": 306700 |
| }, |
| { |
| "epoch": 99.15966386554622, |
| "grad_norm": 1.5789191722869873, |
| "learning_rate": 0.001, |
| "loss": 1.1043, |
| "step": 306800 |
| }, |
| { |
| "epoch": 99.19198448610213, |
| "grad_norm": 2.1326937675476074, |
| "learning_rate": 0.001, |
| "loss": 1.1306, |
| "step": 306900 |
| }, |
| { |
| "epoch": 99.22430510665805, |
| "grad_norm": 2.003293514251709, |
| "learning_rate": 0.001, |
| "loss": 1.1107, |
| "step": 307000 |
| }, |
| { |
| "epoch": 99.25662572721396, |
| "grad_norm": 1.4671404361724854, |
| "learning_rate": 0.001, |
| "loss": 1.1183, |
| "step": 307100 |
| }, |
| { |
| "epoch": 99.28894634776988, |
| "grad_norm": 1.8515692949295044, |
| "learning_rate": 0.001, |
| "loss": 1.1388, |
| "step": 307200 |
| }, |
| { |
| "epoch": 99.32126696832579, |
| "grad_norm": 1.925635576248169, |
| "learning_rate": 0.001, |
| "loss": 1.1495, |
| "step": 307300 |
| }, |
| { |
| "epoch": 99.35358758888171, |
| "grad_norm": 1.4091397523880005, |
| "learning_rate": 0.001, |
| "loss": 1.1281, |
| "step": 307400 |
| }, |
| { |
| "epoch": 99.38590820943762, |
| "grad_norm": 2.091721534729004, |
| "learning_rate": 0.001, |
| "loss": 1.1499, |
| "step": 307500 |
| }, |
| { |
| "epoch": 99.41822882999354, |
| "grad_norm": 2.5117223262786865, |
| "learning_rate": 0.001, |
| "loss": 1.1406, |
| "step": 307600 |
| }, |
| { |
| "epoch": 99.45054945054945, |
| "grad_norm": 1.422900676727295, |
| "learning_rate": 0.001, |
| "loss": 1.1439, |
| "step": 307700 |
| }, |
| { |
| "epoch": 99.48287007110537, |
| "grad_norm": 1.3010830879211426, |
| "learning_rate": 0.001, |
| "loss": 1.1704, |
| "step": 307800 |
| }, |
| { |
| "epoch": 99.51519069166127, |
| "grad_norm": 1.8194390535354614, |
| "learning_rate": 0.001, |
| "loss": 1.1524, |
| "step": 307900 |
| }, |
| { |
| "epoch": 99.5475113122172, |
| "grad_norm": 2.241621255874634, |
| "learning_rate": 0.001, |
| "loss": 1.1472, |
| "step": 308000 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 309400, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 100, |
| "save_steps": 2000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.6949805082257654e+18, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|