| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 99.5475113122172, |
| "eval_steps": 20000, |
| "global_step": 308000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.03232062055591468, |
| "grad_norm": 13.562658309936523, |
| "learning_rate": 9.900000000000002e-06, |
| "loss": 4.2119, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.06464124111182935, |
| "grad_norm": 17.95138931274414, |
| "learning_rate": 1.9900000000000003e-05, |
| "loss": 4.0516, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.09696186166774402, |
| "grad_norm": 11.31137752532959, |
| "learning_rate": 2.9900000000000002e-05, |
| "loss": 3.9654, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.1292824822236587, |
| "grad_norm": 17.110918045043945, |
| "learning_rate": 3.99e-05, |
| "loss": 3.7782, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.16160310277957338, |
| "grad_norm": 11.72215461730957, |
| "learning_rate": 4.99e-05, |
| "loss": 3.4622, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.19392372333548805, |
| "grad_norm": 7.6839447021484375, |
| "learning_rate": 5.9900000000000006e-05, |
| "loss": 3.0781, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.22624434389140272, |
| "grad_norm": 9.97439193725586, |
| "learning_rate": 6.99e-05, |
| "loss": 2.8028, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.2585649644473174, |
| "grad_norm": 7.437990665435791, |
| "learning_rate": 7.99e-05, |
| "loss": 2.6209, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.2908855850032321, |
| "grad_norm": 5.37725830078125, |
| "learning_rate": 8.989999999999999e-05, |
| "loss": 2.4793, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.32320620555914675, |
| "grad_norm": 3.4234330654144287, |
| "learning_rate": 9.99e-05, |
| "loss": 2.4139, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.3555268261150614, |
| "grad_norm": 3.771662473678589, |
| "learning_rate": 0.0001099, |
| "loss": 2.3273, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.3878474466709761, |
| "grad_norm": 3.9465994834899902, |
| "learning_rate": 0.00011990000000000001, |
| "loss": 2.3011, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.42016806722689076, |
| "grad_norm": 3.0348823070526123, |
| "learning_rate": 0.00012989999999999999, |
| "loss": 2.2512, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.45248868778280543, |
| "grad_norm": 2.580744504928589, |
| "learning_rate": 0.0001399, |
| "loss": 2.2338, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.4848093083387201, |
| "grad_norm": 2.7396256923675537, |
| "learning_rate": 0.0001499, |
| "loss": 2.2165, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.5171299288946348, |
| "grad_norm": 2.427551507949829, |
| "learning_rate": 0.00015989999999999998, |
| "loss": 2.1936, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.5494505494505495, |
| "grad_norm": 4.367044448852539, |
| "learning_rate": 0.0001699, |
| "loss": 2.1448, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.5817711700064642, |
| "grad_norm": 3.5235722064971924, |
| "learning_rate": 0.0001799, |
| "loss": 2.1785, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.6140917905623788, |
| "grad_norm": 1.780901551246643, |
| "learning_rate": 0.0001899, |
| "loss": 2.1839, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.6464124111182935, |
| "grad_norm": 1.8098434209823608, |
| "learning_rate": 0.0001999, |
| "loss": 2.1043, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.6787330316742082, |
| "grad_norm": 2.296581268310547, |
| "learning_rate": 0.0002099, |
| "loss": 2.1415, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.7110536522301228, |
| "grad_norm": 2.007957696914673, |
| "learning_rate": 0.0002199, |
| "loss": 2.1096, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.7433742727860375, |
| "grad_norm": 3.1119742393493652, |
| "learning_rate": 0.0002299, |
| "loss": 2.1146, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.7756948933419522, |
| "grad_norm": 1.6894687414169312, |
| "learning_rate": 0.0002399, |
| "loss": 2.0824, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.8080155138978669, |
| "grad_norm": 1.2437957525253296, |
| "learning_rate": 0.0002499, |
| "loss": 2.0671, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.8403361344537815, |
| "grad_norm": 2.4846837520599365, |
| "learning_rate": 0.00025990000000000003, |
| "loss": 2.1032, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.8726567550096962, |
| "grad_norm": 1.7763832807540894, |
| "learning_rate": 0.0002699, |
| "loss": 2.1034, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.9049773755656109, |
| "grad_norm": 2.0607247352600098, |
| "learning_rate": 0.0002799, |
| "loss": 2.0898, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.9372979961215255, |
| "grad_norm": 1.1315553188323975, |
| "learning_rate": 0.0002899, |
| "loss": 2.0906, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.9696186166774402, |
| "grad_norm": 1.3324776887893677, |
| "learning_rate": 0.0002999, |
| "loss": 2.065, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.0019392372333549, |
| "grad_norm": 1.5390121936798096, |
| "learning_rate": 0.0003099, |
| "loss": 2.0899, |
| "step": 3100 |
| }, |
| { |
| "epoch": 1.0342598577892697, |
| "grad_norm": 3.3471102714538574, |
| "learning_rate": 0.0003199, |
| "loss": 1.986, |
| "step": 3200 |
| }, |
| { |
| "epoch": 1.0665804783451842, |
| "grad_norm": 2.176851749420166, |
| "learning_rate": 0.00032990000000000005, |
| "loss": 2.0288, |
| "step": 3300 |
| }, |
| { |
| "epoch": 1.098901098901099, |
| "grad_norm": 1.6547337770462036, |
| "learning_rate": 0.00033989999999999997, |
| "loss": 2.0137, |
| "step": 3400 |
| }, |
| { |
| "epoch": 1.1312217194570136, |
| "grad_norm": 2.7004611492156982, |
| "learning_rate": 0.0003499, |
| "loss": 2.0361, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.1635423400129283, |
| "grad_norm": 9.764275550842285, |
| "learning_rate": 0.0003599, |
| "loss": 2.0461, |
| "step": 3600 |
| }, |
| { |
| "epoch": 1.195862960568843, |
| "grad_norm": 2.408719062805176, |
| "learning_rate": 0.0003699, |
| "loss": 2.013, |
| "step": 3700 |
| }, |
| { |
| "epoch": 1.2281835811247577, |
| "grad_norm": 1.4798204898834229, |
| "learning_rate": 0.0003799, |
| "loss": 2.0373, |
| "step": 3800 |
| }, |
| { |
| "epoch": 1.2605042016806722, |
| "grad_norm": 1.6942540407180786, |
| "learning_rate": 0.00038990000000000004, |
| "loss": 2.0252, |
| "step": 3900 |
| }, |
| { |
| "epoch": 1.292824822236587, |
| "grad_norm": 1.9354304075241089, |
| "learning_rate": 0.00039989999999999996, |
| "loss": 1.968, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.3251454427925016, |
| "grad_norm": 1.3031123876571655, |
| "learning_rate": 0.0004099, |
| "loss": 1.9987, |
| "step": 4100 |
| }, |
| { |
| "epoch": 1.3574660633484164, |
| "grad_norm": 2.0432260036468506, |
| "learning_rate": 0.0004199, |
| "loss": 2.0022, |
| "step": 4200 |
| }, |
| { |
| "epoch": 1.389786683904331, |
| "grad_norm": 1.3270591497421265, |
| "learning_rate": 0.0004299, |
| "loss": 1.9947, |
| "step": 4300 |
| }, |
| { |
| "epoch": 1.4221073044602457, |
| "grad_norm": 2.2205493450164795, |
| "learning_rate": 0.0004399, |
| "loss": 2.0379, |
| "step": 4400 |
| }, |
| { |
| "epoch": 1.4544279250161603, |
| "grad_norm": 1.547240138053894, |
| "learning_rate": 0.00044990000000000004, |
| "loss": 1.9993, |
| "step": 4500 |
| }, |
| { |
| "epoch": 1.486748545572075, |
| "grad_norm": 1.6051744222640991, |
| "learning_rate": 0.0004599, |
| "loss": 1.9934, |
| "step": 4600 |
| }, |
| { |
| "epoch": 1.5190691661279896, |
| "grad_norm": 1.6245908737182617, |
| "learning_rate": 0.0004699, |
| "loss": 2.0052, |
| "step": 4700 |
| }, |
| { |
| "epoch": 1.5513897866839044, |
| "grad_norm": 1.2813587188720703, |
| "learning_rate": 0.0004799, |
| "loss": 2.0224, |
| "step": 4800 |
| }, |
| { |
| "epoch": 1.5837104072398192, |
| "grad_norm": 1.2587623596191406, |
| "learning_rate": 0.0004899, |
| "loss": 2.0377, |
| "step": 4900 |
| }, |
| { |
| "epoch": 1.6160310277957337, |
| "grad_norm": 1.2050402164459229, |
| "learning_rate": 0.0004999000000000001, |
| "loss": 2.0159, |
| "step": 5000 |
| }, |
| { |
| "epoch": 1.6483516483516483, |
| "grad_norm": 1.2096720933914185, |
| "learning_rate": 0.0005099, |
| "loss": 1.9913, |
| "step": 5100 |
| }, |
| { |
| "epoch": 1.680672268907563, |
| "grad_norm": 1.371106743812561, |
| "learning_rate": 0.0005199, |
| "loss": 1.9823, |
| "step": 5200 |
| }, |
| { |
| "epoch": 1.7129928894634778, |
| "grad_norm": 1.6344568729400635, |
| "learning_rate": 0.0005299, |
| "loss": 1.9601, |
| "step": 5300 |
| }, |
| { |
| "epoch": 1.7453135100193924, |
| "grad_norm": 1.475213646888733, |
| "learning_rate": 0.0005399000000000001, |
| "loss": 2.0069, |
| "step": 5400 |
| }, |
| { |
| "epoch": 1.777634130575307, |
| "grad_norm": 1.147307276725769, |
| "learning_rate": 0.0005499000000000001, |
| "loss": 1.9979, |
| "step": 5500 |
| }, |
| { |
| "epoch": 1.8099547511312217, |
| "grad_norm": 1.5867249965667725, |
| "learning_rate": 0.0005599, |
| "loss": 1.9819, |
| "step": 5600 |
| }, |
| { |
| "epoch": 1.8422753716871365, |
| "grad_norm": 1.2033599615097046, |
| "learning_rate": 0.0005698999999999999, |
| "loss": 1.9972, |
| "step": 5700 |
| }, |
| { |
| "epoch": 1.874595992243051, |
| "grad_norm": 1.5934315919876099, |
| "learning_rate": 0.0005799, |
| "loss": 1.9636, |
| "step": 5800 |
| }, |
| { |
| "epoch": 1.9069166127989656, |
| "grad_norm": 1.7510161399841309, |
| "learning_rate": 0.0005899, |
| "loss": 1.9905, |
| "step": 5900 |
| }, |
| { |
| "epoch": 1.9392372333548804, |
| "grad_norm": 1.3442281484603882, |
| "learning_rate": 0.0005999, |
| "loss": 2.0058, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.9715578539107952, |
| "grad_norm": 1.8174034357070923, |
| "learning_rate": 0.0006099, |
| "loss": 1.9725, |
| "step": 6100 |
| }, |
| { |
| "epoch": 2.0038784744667097, |
| "grad_norm": 1.3350085020065308, |
| "learning_rate": 0.0006199, |
| "loss": 2.0277, |
| "step": 6200 |
| }, |
| { |
| "epoch": 2.0361990950226243, |
| "grad_norm": 1.1999424695968628, |
| "learning_rate": 0.0006299000000000001, |
| "loss": 1.9098, |
| "step": 6300 |
| }, |
| { |
| "epoch": 2.0685197155785393, |
| "grad_norm": 1.2381353378295898, |
| "learning_rate": 0.0006399, |
| "loss": 1.9531, |
| "step": 6400 |
| }, |
| { |
| "epoch": 2.100840336134454, |
| "grad_norm": 1.414468765258789, |
| "learning_rate": 0.0006499, |
| "loss": 1.9276, |
| "step": 6500 |
| }, |
| { |
| "epoch": 2.1331609566903684, |
| "grad_norm": 1.050279140472412, |
| "learning_rate": 0.0006599, |
| "loss": 1.9068, |
| "step": 6600 |
| }, |
| { |
| "epoch": 2.165481577246283, |
| "grad_norm": 1.3866891860961914, |
| "learning_rate": 0.0006699000000000001, |
| "loss": 1.9283, |
| "step": 6700 |
| }, |
| { |
| "epoch": 2.197802197802198, |
| "grad_norm": 1.0788445472717285, |
| "learning_rate": 0.0006799, |
| "loss": 1.9478, |
| "step": 6800 |
| }, |
| { |
| "epoch": 2.2301228183581125, |
| "grad_norm": 1.032123327255249, |
| "learning_rate": 0.0006899, |
| "loss": 1.9247, |
| "step": 6900 |
| }, |
| { |
| "epoch": 2.262443438914027, |
| "grad_norm": 1.3964852094650269, |
| "learning_rate": 0.0006998999999999999, |
| "loss": 1.9256, |
| "step": 7000 |
| }, |
| { |
| "epoch": 2.2947640594699417, |
| "grad_norm": 1.1780961751937866, |
| "learning_rate": 0.0007099, |
| "loss": 1.9308, |
| "step": 7100 |
| }, |
| { |
| "epoch": 2.3270846800258567, |
| "grad_norm": 1.3616302013397217, |
| "learning_rate": 0.0007199, |
| "loss": 1.9456, |
| "step": 7200 |
| }, |
| { |
| "epoch": 2.3594053005817712, |
| "grad_norm": 1.266806960105896, |
| "learning_rate": 0.0007299, |
| "loss": 1.9409, |
| "step": 7300 |
| }, |
| { |
| "epoch": 2.391725921137686, |
| "grad_norm": 1.1740697622299194, |
| "learning_rate": 0.0007399, |
| "loss": 1.9325, |
| "step": 7400 |
| }, |
| { |
| "epoch": 2.4240465416936003, |
| "grad_norm": 1.1045465469360352, |
| "learning_rate": 0.0007499000000000001, |
| "loss": 1.9336, |
| "step": 7500 |
| }, |
| { |
| "epoch": 2.4563671622495153, |
| "grad_norm": 1.4932281970977783, |
| "learning_rate": 0.0007599, |
| "loss": 1.9374, |
| "step": 7600 |
| }, |
| { |
| "epoch": 2.48868778280543, |
| "grad_norm": 1.0262266397476196, |
| "learning_rate": 0.0007699, |
| "loss": 1.9295, |
| "step": 7700 |
| }, |
| { |
| "epoch": 2.5210084033613445, |
| "grad_norm": 1.2064828872680664, |
| "learning_rate": 0.0007799, |
| "loss": 1.9178, |
| "step": 7800 |
| }, |
| { |
| "epoch": 2.553329023917259, |
| "grad_norm": 1.2155147790908813, |
| "learning_rate": 0.0007899000000000001, |
| "loss": 1.9659, |
| "step": 7900 |
| }, |
| { |
| "epoch": 2.585649644473174, |
| "grad_norm": 1.2359004020690918, |
| "learning_rate": 0.0007999000000000001, |
| "loss": 1.943, |
| "step": 8000 |
| }, |
| { |
| "epoch": 2.6179702650290886, |
| "grad_norm": 1.1473153829574585, |
| "learning_rate": 0.0008099, |
| "loss": 1.9309, |
| "step": 8100 |
| }, |
| { |
| "epoch": 2.650290885585003, |
| "grad_norm": 1.13937246799469, |
| "learning_rate": 0.0008198999999999999, |
| "loss": 1.9133, |
| "step": 8200 |
| }, |
| { |
| "epoch": 2.682611506140918, |
| "grad_norm": 1.46646249294281, |
| "learning_rate": 0.0008299, |
| "loss": 1.9746, |
| "step": 8300 |
| }, |
| { |
| "epoch": 2.7149321266968327, |
| "grad_norm": 1.3891866207122803, |
| "learning_rate": 0.0008399, |
| "loss": 1.9353, |
| "step": 8400 |
| }, |
| { |
| "epoch": 2.7472527472527473, |
| "grad_norm": 1.0530701875686646, |
| "learning_rate": 0.0008499, |
| "loss": 1.9152, |
| "step": 8500 |
| }, |
| { |
| "epoch": 2.779573367808662, |
| "grad_norm": 1.3477870225906372, |
| "learning_rate": 0.0008599, |
| "loss": 1.9334, |
| "step": 8600 |
| }, |
| { |
| "epoch": 2.8118939883645764, |
| "grad_norm": 1.0107545852661133, |
| "learning_rate": 0.0008699000000000001, |
| "loss": 1.9586, |
| "step": 8700 |
| }, |
| { |
| "epoch": 2.8442146089204914, |
| "grad_norm": 1.2573806047439575, |
| "learning_rate": 0.0008799000000000001, |
| "loss": 1.9526, |
| "step": 8800 |
| }, |
| { |
| "epoch": 2.876535229476406, |
| "grad_norm": 1.7003774642944336, |
| "learning_rate": 0.0008899, |
| "loss": 1.933, |
| "step": 8900 |
| }, |
| { |
| "epoch": 2.9088558500323205, |
| "grad_norm": 0.9627268314361572, |
| "learning_rate": 0.0008999, |
| "loss": 1.9588, |
| "step": 9000 |
| }, |
| { |
| "epoch": 2.9411764705882355, |
| "grad_norm": 1.1297580003738403, |
| "learning_rate": 0.0009099, |
| "loss": 1.961, |
| "step": 9100 |
| }, |
| { |
| "epoch": 2.97349709114415, |
| "grad_norm": 1.230385184288025, |
| "learning_rate": 0.0009199000000000001, |
| "loss": 1.9194, |
| "step": 9200 |
| }, |
| { |
| "epoch": 3.0058177117000646, |
| "grad_norm": 0.9041159152984619, |
| "learning_rate": 0.0009299, |
| "loss": 1.9607, |
| "step": 9300 |
| }, |
| { |
| "epoch": 3.038138332255979, |
| "grad_norm": 1.1520296335220337, |
| "learning_rate": 0.0009399, |
| "loss": 1.8419, |
| "step": 9400 |
| }, |
| { |
| "epoch": 3.070458952811894, |
| "grad_norm": 0.8856098651885986, |
| "learning_rate": 0.0009498999999999999, |
| "loss": 1.865, |
| "step": 9500 |
| }, |
| { |
| "epoch": 3.1027795733678087, |
| "grad_norm": 1.082818627357483, |
| "learning_rate": 0.0009599, |
| "loss": 1.8975, |
| "step": 9600 |
| }, |
| { |
| "epoch": 3.1351001939237233, |
| "grad_norm": 1.1733167171478271, |
| "learning_rate": 0.0009699, |
| "loss": 1.8623, |
| "step": 9700 |
| }, |
| { |
| "epoch": 3.167420814479638, |
| "grad_norm": 1.0028611421585083, |
| "learning_rate": 0.0009799, |
| "loss": 1.8665, |
| "step": 9800 |
| }, |
| { |
| "epoch": 3.199741435035553, |
| "grad_norm": 1.318609356880188, |
| "learning_rate": 0.0009899, |
| "loss": 1.8917, |
| "step": 9900 |
| }, |
| { |
| "epoch": 3.2320620555914674, |
| "grad_norm": 1.452332854270935, |
| "learning_rate": 0.0009999, |
| "loss": 1.8757, |
| "step": 10000 |
| }, |
| { |
| "epoch": 3.264382676147382, |
| "grad_norm": 1.2408781051635742, |
| "learning_rate": 0.001, |
| "loss": 1.825, |
| "step": 10100 |
| }, |
| { |
| "epoch": 3.2967032967032965, |
| "grad_norm": 0.9759476780891418, |
| "learning_rate": 0.001, |
| "loss": 1.8435, |
| "step": 10200 |
| }, |
| { |
| "epoch": 3.3290239172592115, |
| "grad_norm": 1.2983310222625732, |
| "learning_rate": 0.001, |
| "loss": 1.8448, |
| "step": 10300 |
| }, |
| { |
| "epoch": 3.361344537815126, |
| "grad_norm": 1.7093514204025269, |
| "learning_rate": 0.001, |
| "loss": 1.8535, |
| "step": 10400 |
| }, |
| { |
| "epoch": 3.3936651583710407, |
| "grad_norm": 0.990372359752655, |
| "learning_rate": 0.001, |
| "loss": 1.8681, |
| "step": 10500 |
| }, |
| { |
| "epoch": 3.425985778926955, |
| "grad_norm": 30.755449295043945, |
| "learning_rate": 0.001, |
| "loss": 1.8494, |
| "step": 10600 |
| }, |
| { |
| "epoch": 3.45830639948287, |
| "grad_norm": 1.0039843320846558, |
| "learning_rate": 0.001, |
| "loss": 1.8672, |
| "step": 10700 |
| }, |
| { |
| "epoch": 3.490627020038785, |
| "grad_norm": 1.0978604555130005, |
| "learning_rate": 0.001, |
| "loss": 1.8695, |
| "step": 10800 |
| }, |
| { |
| "epoch": 3.5229476405946993, |
| "grad_norm": 1.1592726707458496, |
| "learning_rate": 0.001, |
| "loss": 1.8769, |
| "step": 10900 |
| }, |
| { |
| "epoch": 3.555268261150614, |
| "grad_norm": 1.8122249841690063, |
| "learning_rate": 0.001, |
| "loss": 1.8361, |
| "step": 11000 |
| }, |
| { |
| "epoch": 3.587588881706529, |
| "grad_norm": 1.182767391204834, |
| "learning_rate": 0.001, |
| "loss": 1.866, |
| "step": 11100 |
| }, |
| { |
| "epoch": 3.6199095022624435, |
| "grad_norm": 1.7454653978347778, |
| "learning_rate": 0.001, |
| "loss": 1.8712, |
| "step": 11200 |
| }, |
| { |
| "epoch": 3.652230122818358, |
| "grad_norm": 1.3222342729568481, |
| "learning_rate": 0.001, |
| "loss": 1.8495, |
| "step": 11300 |
| }, |
| { |
| "epoch": 3.684550743374273, |
| "grad_norm": 1.1197530031204224, |
| "learning_rate": 0.001, |
| "loss": 1.8636, |
| "step": 11400 |
| }, |
| { |
| "epoch": 3.7168713639301876, |
| "grad_norm": 1.0089489221572876, |
| "learning_rate": 0.001, |
| "loss": 1.8698, |
| "step": 11500 |
| }, |
| { |
| "epoch": 3.749191984486102, |
| "grad_norm": 1.1160805225372314, |
| "learning_rate": 0.001, |
| "loss": 1.8626, |
| "step": 11600 |
| }, |
| { |
| "epoch": 3.7815126050420167, |
| "grad_norm": 1.6491235494613647, |
| "learning_rate": 0.001, |
| "loss": 1.8567, |
| "step": 11700 |
| }, |
| { |
| "epoch": 3.8138332255979313, |
| "grad_norm": 1.2563141584396362, |
| "learning_rate": 0.001, |
| "loss": 1.8366, |
| "step": 11800 |
| }, |
| { |
| "epoch": 3.8461538461538463, |
| "grad_norm": 1.25520658493042, |
| "learning_rate": 0.001, |
| "loss": 1.8873, |
| "step": 11900 |
| }, |
| { |
| "epoch": 3.878474466709761, |
| "grad_norm": 0.7658872604370117, |
| "learning_rate": 0.001, |
| "loss": 1.8845, |
| "step": 12000 |
| }, |
| { |
| "epoch": 3.9107950872656754, |
| "grad_norm": 1.1348446607589722, |
| "learning_rate": 0.001, |
| "loss": 1.8875, |
| "step": 12100 |
| }, |
| { |
| "epoch": 3.9431157078215904, |
| "grad_norm": 1.0871976613998413, |
| "learning_rate": 0.001, |
| "loss": 1.8727, |
| "step": 12200 |
| }, |
| { |
| "epoch": 3.975436328377505, |
| "grad_norm": 1.2896612882614136, |
| "learning_rate": 0.001, |
| "loss": 1.865, |
| "step": 12300 |
| }, |
| { |
| "epoch": 4.0077569489334195, |
| "grad_norm": 1.233090877532959, |
| "learning_rate": 0.001, |
| "loss": 1.8577, |
| "step": 12400 |
| }, |
| { |
| "epoch": 4.040077569489334, |
| "grad_norm": 1.2701627016067505, |
| "learning_rate": 0.001, |
| "loss": 1.7695, |
| "step": 12500 |
| }, |
| { |
| "epoch": 4.072398190045249, |
| "grad_norm": 0.9071168899536133, |
| "learning_rate": 0.001, |
| "loss": 1.7788, |
| "step": 12600 |
| }, |
| { |
| "epoch": 4.104718810601163, |
| "grad_norm": 0.851148247718811, |
| "learning_rate": 0.001, |
| "loss": 1.8106, |
| "step": 12700 |
| }, |
| { |
| "epoch": 4.137039431157079, |
| "grad_norm": 1.1509605646133423, |
| "learning_rate": 0.001, |
| "loss": 1.7861, |
| "step": 12800 |
| }, |
| { |
| "epoch": 4.169360051712993, |
| "grad_norm": 1.0977354049682617, |
| "learning_rate": 0.001, |
| "loss": 1.8142, |
| "step": 12900 |
| }, |
| { |
| "epoch": 4.201680672268908, |
| "grad_norm": 1.4111992120742798, |
| "learning_rate": 0.001, |
| "loss": 1.8264, |
| "step": 13000 |
| }, |
| { |
| "epoch": 4.234001292824822, |
| "grad_norm": 0.900395929813385, |
| "learning_rate": 0.001, |
| "loss": 1.8613, |
| "step": 13100 |
| }, |
| { |
| "epoch": 4.266321913380737, |
| "grad_norm": 1.2737363576889038, |
| "learning_rate": 0.001, |
| "loss": 1.7886, |
| "step": 13200 |
| }, |
| { |
| "epoch": 4.298642533936651, |
| "grad_norm": 1.435176968574524, |
| "learning_rate": 0.001, |
| "loss": 1.8192, |
| "step": 13300 |
| }, |
| { |
| "epoch": 4.330963154492566, |
| "grad_norm": 0.7658246755599976, |
| "learning_rate": 0.001, |
| "loss": 1.8316, |
| "step": 13400 |
| }, |
| { |
| "epoch": 4.3632837750484805, |
| "grad_norm": 1.1032602787017822, |
| "learning_rate": 0.001, |
| "loss": 1.8161, |
| "step": 13500 |
| }, |
| { |
| "epoch": 4.395604395604396, |
| "grad_norm": 1.1794319152832031, |
| "learning_rate": 0.001, |
| "loss": 1.7846, |
| "step": 13600 |
| }, |
| { |
| "epoch": 4.4279250161603105, |
| "grad_norm": 0.8004180192947388, |
| "learning_rate": 0.001, |
| "loss": 1.7987, |
| "step": 13700 |
| }, |
| { |
| "epoch": 4.460245636716225, |
| "grad_norm": 1.0232980251312256, |
| "learning_rate": 0.001, |
| "loss": 1.7836, |
| "step": 13800 |
| }, |
| { |
| "epoch": 4.49256625727214, |
| "grad_norm": 1.3699309825897217, |
| "learning_rate": 0.001, |
| "loss": 1.8018, |
| "step": 13900 |
| }, |
| { |
| "epoch": 4.524886877828054, |
| "grad_norm": 1.0403003692626953, |
| "learning_rate": 0.001, |
| "loss": 1.7962, |
| "step": 14000 |
| }, |
| { |
| "epoch": 4.557207498383969, |
| "grad_norm": 1.176257610321045, |
| "learning_rate": 0.001, |
| "loss": 1.8054, |
| "step": 14100 |
| }, |
| { |
| "epoch": 4.589528118939883, |
| "grad_norm": 0.7099631428718567, |
| "learning_rate": 0.001, |
| "loss": 1.8299, |
| "step": 14200 |
| }, |
| { |
| "epoch": 4.621848739495798, |
| "grad_norm": 0.7507422566413879, |
| "learning_rate": 0.001, |
| "loss": 1.8118, |
| "step": 14300 |
| }, |
| { |
| "epoch": 4.654169360051713, |
| "grad_norm": 0.9232106804847717, |
| "learning_rate": 0.001, |
| "loss": 1.8049, |
| "step": 14400 |
| }, |
| { |
| "epoch": 4.686489980607628, |
| "grad_norm": 1.074141263961792, |
| "learning_rate": 0.001, |
| "loss": 1.8488, |
| "step": 14500 |
| }, |
| { |
| "epoch": 4.7188106011635425, |
| "grad_norm": 2.9028780460357666, |
| "learning_rate": 0.001, |
| "loss": 1.8251, |
| "step": 14600 |
| }, |
| { |
| "epoch": 4.751131221719457, |
| "grad_norm": 0.8118910789489746, |
| "learning_rate": 0.001, |
| "loss": 1.8049, |
| "step": 14700 |
| }, |
| { |
| "epoch": 4.783451842275372, |
| "grad_norm": 0.8545799255371094, |
| "learning_rate": 0.001, |
| "loss": 1.8078, |
| "step": 14800 |
| }, |
| { |
| "epoch": 4.815772462831286, |
| "grad_norm": 1.1466169357299805, |
| "learning_rate": 0.001, |
| "loss": 1.7987, |
| "step": 14900 |
| }, |
| { |
| "epoch": 4.848093083387201, |
| "grad_norm": 1.126753568649292, |
| "learning_rate": 0.001, |
| "loss": 1.7949, |
| "step": 15000 |
| }, |
| { |
| "epoch": 4.880413703943116, |
| "grad_norm": 0.7958624362945557, |
| "learning_rate": 0.001, |
| "loss": 1.7877, |
| "step": 15100 |
| }, |
| { |
| "epoch": 4.912734324499031, |
| "grad_norm": 1.2399948835372925, |
| "learning_rate": 0.001, |
| "loss": 1.8136, |
| "step": 15200 |
| }, |
| { |
| "epoch": 4.945054945054945, |
| "grad_norm": 1.478546380996704, |
| "learning_rate": 0.001, |
| "loss": 1.8386, |
| "step": 15300 |
| }, |
| { |
| "epoch": 4.97737556561086, |
| "grad_norm": 1.4831817150115967, |
| "learning_rate": 0.001, |
| "loss": 1.828, |
| "step": 15400 |
| }, |
| { |
| "epoch": 5.009696186166774, |
| "grad_norm": 1.126725196838379, |
| "learning_rate": 0.001, |
| "loss": 1.7936, |
| "step": 15500 |
| }, |
| { |
| "epoch": 5.042016806722689, |
| "grad_norm": 0.9983710646629333, |
| "learning_rate": 0.001, |
| "loss": 1.6942, |
| "step": 15600 |
| }, |
| { |
| "epoch": 5.0743374272786035, |
| "grad_norm": 1.1261931657791138, |
| "learning_rate": 0.001, |
| "loss": 1.7046, |
| "step": 15700 |
| }, |
| { |
| "epoch": 5.106658047834518, |
| "grad_norm": 0.8670969605445862, |
| "learning_rate": 0.001, |
| "loss": 1.7256, |
| "step": 15800 |
| }, |
| { |
| "epoch": 5.1389786683904335, |
| "grad_norm": 1.0237650871276855, |
| "learning_rate": 0.001, |
| "loss": 1.7319, |
| "step": 15900 |
| }, |
| { |
| "epoch": 5.171299288946348, |
| "grad_norm": 0.8006519079208374, |
| "learning_rate": 0.001, |
| "loss": 1.7529, |
| "step": 16000 |
| }, |
| { |
| "epoch": 5.203619909502263, |
| "grad_norm": 0.9771319627761841, |
| "learning_rate": 0.001, |
| "loss": 1.7032, |
| "step": 16100 |
| }, |
| { |
| "epoch": 5.235940530058177, |
| "grad_norm": 0.9730778336524963, |
| "learning_rate": 0.001, |
| "loss": 1.7376, |
| "step": 16200 |
| }, |
| { |
| "epoch": 5.268261150614092, |
| "grad_norm": 0.8748031854629517, |
| "learning_rate": 0.001, |
| "loss": 1.7531, |
| "step": 16300 |
| }, |
| { |
| "epoch": 5.300581771170006, |
| "grad_norm": 0.7881172895431519, |
| "learning_rate": 0.001, |
| "loss": 1.7506, |
| "step": 16400 |
| }, |
| { |
| "epoch": 5.332902391725921, |
| "grad_norm": 2.841874599456787, |
| "learning_rate": 0.001, |
| "loss": 1.7433, |
| "step": 16500 |
| }, |
| { |
| "epoch": 5.365223012281835, |
| "grad_norm": 0.8375452756881714, |
| "learning_rate": 0.001, |
| "loss": 1.7232, |
| "step": 16600 |
| }, |
| { |
| "epoch": 5.397543632837751, |
| "grad_norm": 1.30898916721344, |
| "learning_rate": 0.001, |
| "loss": 1.7317, |
| "step": 16700 |
| }, |
| { |
| "epoch": 5.429864253393665, |
| "grad_norm": 0.9335517883300781, |
| "learning_rate": 0.001, |
| "loss": 1.7807, |
| "step": 16800 |
| }, |
| { |
| "epoch": 5.46218487394958, |
| "grad_norm": 1.097780704498291, |
| "learning_rate": 0.001, |
| "loss": 1.7398, |
| "step": 16900 |
| }, |
| { |
| "epoch": 5.4945054945054945, |
| "grad_norm": 0.9672789573669434, |
| "learning_rate": 0.001, |
| "loss": 1.7514, |
| "step": 17000 |
| }, |
| { |
| "epoch": 5.526826115061409, |
| "grad_norm": 0.6645662784576416, |
| "learning_rate": 0.001, |
| "loss": 1.7464, |
| "step": 17100 |
| }, |
| { |
| "epoch": 5.559146735617324, |
| "grad_norm": 0.8699595928192139, |
| "learning_rate": 0.001, |
| "loss": 1.7568, |
| "step": 17200 |
| }, |
| { |
| "epoch": 5.591467356173238, |
| "grad_norm": 0.897167980670929, |
| "learning_rate": 0.001, |
| "loss": 1.7206, |
| "step": 17300 |
| }, |
| { |
| "epoch": 5.623787976729153, |
| "grad_norm": 0.8125122785568237, |
| "learning_rate": 0.001, |
| "loss": 1.731, |
| "step": 17400 |
| }, |
| { |
| "epoch": 5.656108597285068, |
| "grad_norm": 1.1313683986663818, |
| "learning_rate": 0.001, |
| "loss": 1.7657, |
| "step": 17500 |
| }, |
| { |
| "epoch": 5.688429217840983, |
| "grad_norm": 0.751796543598175, |
| "learning_rate": 0.001, |
| "loss": 1.7696, |
| "step": 17600 |
| }, |
| { |
| "epoch": 5.720749838396897, |
| "grad_norm": 0.7507619857788086, |
| "learning_rate": 0.001, |
| "loss": 1.7562, |
| "step": 17700 |
| }, |
| { |
| "epoch": 5.753070458952812, |
| "grad_norm": 0.7618012428283691, |
| "learning_rate": 0.001, |
| "loss": 1.7589, |
| "step": 17800 |
| }, |
| { |
| "epoch": 5.785391079508726, |
| "grad_norm": 1.1747350692749023, |
| "learning_rate": 0.001, |
| "loss": 1.7558, |
| "step": 17900 |
| }, |
| { |
| "epoch": 5.817711700064641, |
| "grad_norm": 0.8985292911529541, |
| "learning_rate": 0.001, |
| "loss": 1.7461, |
| "step": 18000 |
| }, |
| { |
| "epoch": 5.850032320620556, |
| "grad_norm": 0.7552205324172974, |
| "learning_rate": 0.001, |
| "loss": 1.7702, |
| "step": 18100 |
| }, |
| { |
| "epoch": 5.882352941176471, |
| "grad_norm": 0.8019037246704102, |
| "learning_rate": 0.001, |
| "loss": 1.7779, |
| "step": 18200 |
| }, |
| { |
| "epoch": 5.914673561732386, |
| "grad_norm": 0.9410333037376404, |
| "learning_rate": 0.001, |
| "loss": 1.7969, |
| "step": 18300 |
| }, |
| { |
| "epoch": 5.9469941822883, |
| "grad_norm": 1.1927149295806885, |
| "learning_rate": 0.001, |
| "loss": 1.7573, |
| "step": 18400 |
| }, |
| { |
| "epoch": 5.979314802844215, |
| "grad_norm": 1.060541033744812, |
| "learning_rate": 0.001, |
| "loss": 1.7674, |
| "step": 18500 |
| }, |
| { |
| "epoch": 6.011635423400129, |
| "grad_norm": 1.0835535526275635, |
| "learning_rate": 0.001, |
| "loss": 1.6796, |
| "step": 18600 |
| }, |
| { |
| "epoch": 6.043956043956044, |
| "grad_norm": 0.9343781471252441, |
| "learning_rate": 0.001, |
| "loss": 1.6642, |
| "step": 18700 |
| }, |
| { |
| "epoch": 6.076276664511958, |
| "grad_norm": 0.9074199795722961, |
| "learning_rate": 0.001, |
| "loss": 1.6493, |
| "step": 18800 |
| }, |
| { |
| "epoch": 6.108597285067873, |
| "grad_norm": 0.9927520155906677, |
| "learning_rate": 0.001, |
| "loss": 1.6346, |
| "step": 18900 |
| }, |
| { |
| "epoch": 6.140917905623788, |
| "grad_norm": 0.7810798287391663, |
| "learning_rate": 0.001, |
| "loss": 1.6908, |
| "step": 19000 |
| }, |
| { |
| "epoch": 6.173238526179703, |
| "grad_norm": 0.6857879757881165, |
| "learning_rate": 0.001, |
| "loss": 1.6729, |
| "step": 19100 |
| }, |
| { |
| "epoch": 6.2055591467356175, |
| "grad_norm": 0.8595390915870667, |
| "learning_rate": 0.001, |
| "loss": 1.6679, |
| "step": 19200 |
| }, |
| { |
| "epoch": 6.237879767291532, |
| "grad_norm": 0.9147341847419739, |
| "learning_rate": 0.001, |
| "loss": 1.7043, |
| "step": 19300 |
| }, |
| { |
| "epoch": 6.270200387847447, |
| "grad_norm": 0.9340370893478394, |
| "learning_rate": 0.001, |
| "loss": 1.7241, |
| "step": 19400 |
| }, |
| { |
| "epoch": 6.302521008403361, |
| "grad_norm": 1.0489479303359985, |
| "learning_rate": 0.001, |
| "loss": 1.6671, |
| "step": 19500 |
| }, |
| { |
| "epoch": 6.334841628959276, |
| "grad_norm": 0.7707657814025879, |
| "learning_rate": 0.001, |
| "loss": 1.6772, |
| "step": 19600 |
| }, |
| { |
| "epoch": 6.36716224951519, |
| "grad_norm": 0.748698353767395, |
| "learning_rate": 0.001, |
| "loss": 1.7033, |
| "step": 19700 |
| }, |
| { |
| "epoch": 6.399482870071106, |
| "grad_norm": 0.7804653644561768, |
| "learning_rate": 0.001, |
| "loss": 1.7091, |
| "step": 19800 |
| }, |
| { |
| "epoch": 6.43180349062702, |
| "grad_norm": 1.2131924629211426, |
| "learning_rate": 0.001, |
| "loss": 1.6997, |
| "step": 19900 |
| }, |
| { |
| "epoch": 6.464124111182935, |
| "grad_norm": 0.9598875641822815, |
| "learning_rate": 0.001, |
| "loss": 1.676, |
| "step": 20000 |
| }, |
| { |
| "epoch": 6.496444731738849, |
| "grad_norm": 1.1061313152313232, |
| "learning_rate": 0.001, |
| "loss": 1.7048, |
| "step": 20100 |
| }, |
| { |
| "epoch": 6.528765352294764, |
| "grad_norm": 0.8944084644317627, |
| "learning_rate": 0.001, |
| "loss": 1.6876, |
| "step": 20200 |
| }, |
| { |
| "epoch": 6.5610859728506785, |
| "grad_norm": 1.101098895072937, |
| "learning_rate": 0.001, |
| "loss": 1.6805, |
| "step": 20300 |
| }, |
| { |
| "epoch": 6.593406593406593, |
| "grad_norm": 0.9273419380187988, |
| "learning_rate": 0.001, |
| "loss": 1.6776, |
| "step": 20400 |
| }, |
| { |
| "epoch": 6.625727213962508, |
| "grad_norm": 1.4434058666229248, |
| "learning_rate": 0.001, |
| "loss": 1.6771, |
| "step": 20500 |
| }, |
| { |
| "epoch": 6.658047834518423, |
| "grad_norm": 0.9152409434318542, |
| "learning_rate": 0.001, |
| "loss": 1.685, |
| "step": 20600 |
| }, |
| { |
| "epoch": 6.690368455074338, |
| "grad_norm": 1.1112756729125977, |
| "learning_rate": 0.001, |
| "loss": 1.6881, |
| "step": 20700 |
| }, |
| { |
| "epoch": 6.722689075630252, |
| "grad_norm": 0.9489529132843018, |
| "learning_rate": 0.001, |
| "loss": 1.6868, |
| "step": 20800 |
| }, |
| { |
| "epoch": 6.755009696186167, |
| "grad_norm": 0.7932117581367493, |
| "learning_rate": 0.001, |
| "loss": 1.6925, |
| "step": 20900 |
| }, |
| { |
| "epoch": 6.787330316742081, |
| "grad_norm": 0.893706202507019, |
| "learning_rate": 0.001, |
| "loss": 1.7308, |
| "step": 21000 |
| }, |
| { |
| "epoch": 6.819650937297996, |
| "grad_norm": 1.0104913711547852, |
| "learning_rate": 0.001, |
| "loss": 1.7075, |
| "step": 21100 |
| }, |
| { |
| "epoch": 6.85197155785391, |
| "grad_norm": 0.8653020858764648, |
| "learning_rate": 0.001, |
| "loss": 1.7352, |
| "step": 21200 |
| }, |
| { |
| "epoch": 6.884292178409826, |
| "grad_norm": 0.8224719166755676, |
| "learning_rate": 0.001, |
| "loss": 1.7129, |
| "step": 21300 |
| }, |
| { |
| "epoch": 6.91661279896574, |
| "grad_norm": 1.0622234344482422, |
| "learning_rate": 0.001, |
| "loss": 1.7153, |
| "step": 21400 |
| }, |
| { |
| "epoch": 6.948933419521655, |
| "grad_norm": 1.1205211877822876, |
| "learning_rate": 0.001, |
| "loss": 1.741, |
| "step": 21500 |
| }, |
| { |
| "epoch": 6.98125404007757, |
| "grad_norm": 1.1996233463287354, |
| "learning_rate": 0.001, |
| "loss": 1.7005, |
| "step": 21600 |
| }, |
| { |
| "epoch": 7.013574660633484, |
| "grad_norm": 1.043961763381958, |
| "learning_rate": 0.001, |
| "loss": 1.6296, |
| "step": 21700 |
| }, |
| { |
| "epoch": 7.045895281189399, |
| "grad_norm": 0.9840327501296997, |
| "learning_rate": 0.001, |
| "loss": 1.5904, |
| "step": 21800 |
| }, |
| { |
| "epoch": 7.078215901745313, |
| "grad_norm": 1.1553939580917358, |
| "learning_rate": 0.001, |
| "loss": 1.5837, |
| "step": 21900 |
| }, |
| { |
| "epoch": 7.110536522301228, |
| "grad_norm": 0.9404619336128235, |
| "learning_rate": 0.001, |
| "loss": 1.6182, |
| "step": 22000 |
| }, |
| { |
| "epoch": 7.142857142857143, |
| "grad_norm": 0.7575173377990723, |
| "learning_rate": 0.001, |
| "loss": 1.6091, |
| "step": 22100 |
| }, |
| { |
| "epoch": 7.175177763413058, |
| "grad_norm": 0.8651929497718811, |
| "learning_rate": 0.001, |
| "loss": 1.6097, |
| "step": 22200 |
| }, |
| { |
| "epoch": 7.207498383968972, |
| "grad_norm": 0.7588925361633301, |
| "learning_rate": 0.001, |
| "loss": 1.6596, |
| "step": 22300 |
| }, |
| { |
| "epoch": 7.239819004524887, |
| "grad_norm": 0.9948194026947021, |
| "learning_rate": 0.001, |
| "loss": 1.6307, |
| "step": 22400 |
| }, |
| { |
| "epoch": 7.2721396250808015, |
| "grad_norm": 0.7452443242073059, |
| "learning_rate": 0.001, |
| "loss": 1.6545, |
| "step": 22500 |
| }, |
| { |
| "epoch": 7.304460245636716, |
| "grad_norm": 0.7591370940208435, |
| "learning_rate": 0.001, |
| "loss": 1.6257, |
| "step": 22600 |
| }, |
| { |
| "epoch": 7.336780866192631, |
| "grad_norm": 1.1810401678085327, |
| "learning_rate": 0.001, |
| "loss": 1.6381, |
| "step": 22700 |
| }, |
| { |
| "epoch": 7.369101486748546, |
| "grad_norm": 0.785140335559845, |
| "learning_rate": 0.001, |
| "loss": 1.6453, |
| "step": 22800 |
| }, |
| { |
| "epoch": 7.401422107304461, |
| "grad_norm": 1.0665206909179688, |
| "learning_rate": 0.001, |
| "loss": 1.656, |
| "step": 22900 |
| }, |
| { |
| "epoch": 7.433742727860375, |
| "grad_norm": 1.0718743801116943, |
| "learning_rate": 0.001, |
| "loss": 1.6676, |
| "step": 23000 |
| }, |
| { |
| "epoch": 7.46606334841629, |
| "grad_norm": 0.8834295868873596, |
| "learning_rate": 0.001, |
| "loss": 1.6464, |
| "step": 23100 |
| }, |
| { |
| "epoch": 7.498383968972204, |
| "grad_norm": 0.9542046189308167, |
| "learning_rate": 0.001, |
| "loss": 1.6544, |
| "step": 23200 |
| }, |
| { |
| "epoch": 7.530704589528119, |
| "grad_norm": 0.7912190556526184, |
| "learning_rate": 0.001, |
| "loss": 1.6348, |
| "step": 23300 |
| }, |
| { |
| "epoch": 7.563025210084033, |
| "grad_norm": 46.410430908203125, |
| "learning_rate": 0.001, |
| "loss": 1.6686, |
| "step": 23400 |
| }, |
| { |
| "epoch": 7.595345830639948, |
| "grad_norm": 0.779313325881958, |
| "learning_rate": 0.001, |
| "loss": 1.6581, |
| "step": 23500 |
| }, |
| { |
| "epoch": 7.6276664511958625, |
| "grad_norm": 1.3436274528503418, |
| "learning_rate": 0.001, |
| "loss": 1.6553, |
| "step": 23600 |
| }, |
| { |
| "epoch": 7.659987071751778, |
| "grad_norm": 0.8226367831230164, |
| "learning_rate": 0.001, |
| "loss": 1.6682, |
| "step": 23700 |
| }, |
| { |
| "epoch": 7.6923076923076925, |
| "grad_norm": 0.8935831189155579, |
| "learning_rate": 0.001, |
| "loss": 1.6315, |
| "step": 23800 |
| }, |
| { |
| "epoch": 7.724628312863607, |
| "grad_norm": 1.181753158569336, |
| "learning_rate": 0.001, |
| "loss": 1.6274, |
| "step": 23900 |
| }, |
| { |
| "epoch": 7.756948933419522, |
| "grad_norm": 0.674148440361023, |
| "learning_rate": 0.001, |
| "loss": 1.6386, |
| "step": 24000 |
| }, |
| { |
| "epoch": 7.789269553975436, |
| "grad_norm": 1.0500868558883667, |
| "learning_rate": 0.001, |
| "loss": 1.6534, |
| "step": 24100 |
| }, |
| { |
| "epoch": 7.821590174531351, |
| "grad_norm": 0.9021979570388794, |
| "learning_rate": 0.001, |
| "loss": 1.6714, |
| "step": 24200 |
| }, |
| { |
| "epoch": 7.853910795087265, |
| "grad_norm": 1.0396559238433838, |
| "learning_rate": 0.001, |
| "loss": 1.6539, |
| "step": 24300 |
| }, |
| { |
| "epoch": 7.886231415643181, |
| "grad_norm": 0.9892787337303162, |
| "learning_rate": 0.001, |
| "loss": 1.6734, |
| "step": 24400 |
| }, |
| { |
| "epoch": 7.918552036199095, |
| "grad_norm": 0.8489758968353271, |
| "learning_rate": 0.001, |
| "loss": 1.6485, |
| "step": 24500 |
| }, |
| { |
| "epoch": 7.95087265675501, |
| "grad_norm": 1.0946519374847412, |
| "learning_rate": 0.001, |
| "loss": 1.6992, |
| "step": 24600 |
| }, |
| { |
| "epoch": 7.983193277310924, |
| "grad_norm": 0.9034314155578613, |
| "learning_rate": 0.001, |
| "loss": 1.6708, |
| "step": 24700 |
| }, |
| { |
| "epoch": 8.015513897866839, |
| "grad_norm": 0.7086787819862366, |
| "learning_rate": 0.001, |
| "loss": 1.548, |
| "step": 24800 |
| }, |
| { |
| "epoch": 8.047834518422754, |
| "grad_norm": 0.42088550329208374, |
| "learning_rate": 0.001, |
| "loss": 1.5713, |
| "step": 24900 |
| }, |
| { |
| "epoch": 8.080155138978668, |
| "grad_norm": 0.5306487083435059, |
| "learning_rate": 0.001, |
| "loss": 1.568, |
| "step": 25000 |
| }, |
| { |
| "epoch": 8.112475759534583, |
| "grad_norm": 0.7567152976989746, |
| "learning_rate": 0.001, |
| "loss": 1.5902, |
| "step": 25100 |
| }, |
| { |
| "epoch": 8.144796380090497, |
| "grad_norm": 0.8452807664871216, |
| "learning_rate": 0.001, |
| "loss": 1.6055, |
| "step": 25200 |
| }, |
| { |
| "epoch": 8.177117000646412, |
| "grad_norm": 0.7313001751899719, |
| "learning_rate": 0.001, |
| "loss": 1.5539, |
| "step": 25300 |
| }, |
| { |
| "epoch": 8.209437621202326, |
| "grad_norm": 0.8463912606239319, |
| "learning_rate": 0.001, |
| "loss": 1.6055, |
| "step": 25400 |
| }, |
| { |
| "epoch": 8.241758241758241, |
| "grad_norm": 0.7595904469490051, |
| "learning_rate": 0.001, |
| "loss": 1.5831, |
| "step": 25500 |
| }, |
| { |
| "epoch": 8.274078862314157, |
| "grad_norm": 0.8072571158409119, |
| "learning_rate": 0.001, |
| "loss": 1.5837, |
| "step": 25600 |
| }, |
| { |
| "epoch": 8.306399482870072, |
| "grad_norm": 0.5768963694572449, |
| "learning_rate": 0.001, |
| "loss": 1.589, |
| "step": 25700 |
| }, |
| { |
| "epoch": 8.338720103425986, |
| "grad_norm": 0.7995852828025818, |
| "learning_rate": 0.001, |
| "loss": 1.5792, |
| "step": 25800 |
| }, |
| { |
| "epoch": 8.371040723981901, |
| "grad_norm": 0.8774004578590393, |
| "learning_rate": 0.001, |
| "loss": 1.5867, |
| "step": 25900 |
| }, |
| { |
| "epoch": 8.403361344537815, |
| "grad_norm": 1.035139560699463, |
| "learning_rate": 0.001, |
| "loss": 1.5771, |
| "step": 26000 |
| }, |
| { |
| "epoch": 8.43568196509373, |
| "grad_norm": 0.6128062605857849, |
| "learning_rate": 0.001, |
| "loss": 1.573, |
| "step": 26100 |
| }, |
| { |
| "epoch": 8.468002585649645, |
| "grad_norm": 0.6861528158187866, |
| "learning_rate": 0.001, |
| "loss": 1.5973, |
| "step": 26200 |
| }, |
| { |
| "epoch": 8.50032320620556, |
| "grad_norm": 0.6741127371788025, |
| "learning_rate": 0.001, |
| "loss": 1.6055, |
| "step": 26300 |
| }, |
| { |
| "epoch": 8.532643826761474, |
| "grad_norm": 0.7249501943588257, |
| "learning_rate": 0.001, |
| "loss": 1.6032, |
| "step": 26400 |
| }, |
| { |
| "epoch": 8.564964447317388, |
| "grad_norm": 0.5071559548377991, |
| "learning_rate": 0.001, |
| "loss": 1.6135, |
| "step": 26500 |
| }, |
| { |
| "epoch": 8.597285067873303, |
| "grad_norm": 0.6695942282676697, |
| "learning_rate": 0.001, |
| "loss": 1.6317, |
| "step": 26600 |
| }, |
| { |
| "epoch": 8.629605688429217, |
| "grad_norm": 0.8934532999992371, |
| "learning_rate": 0.001, |
| "loss": 1.6195, |
| "step": 26700 |
| }, |
| { |
| "epoch": 8.661926308985132, |
| "grad_norm": 0.6500742435455322, |
| "learning_rate": 0.001, |
| "loss": 1.5722, |
| "step": 26800 |
| }, |
| { |
| "epoch": 8.694246929541046, |
| "grad_norm": 0.6032618880271912, |
| "learning_rate": 0.001, |
| "loss": 1.5891, |
| "step": 26900 |
| }, |
| { |
| "epoch": 8.726567550096961, |
| "grad_norm": 0.8172997236251831, |
| "learning_rate": 0.001, |
| "loss": 1.6224, |
| "step": 27000 |
| }, |
| { |
| "epoch": 8.758888170652877, |
| "grad_norm": 0.7582260370254517, |
| "learning_rate": 0.001, |
| "loss": 1.6274, |
| "step": 27100 |
| }, |
| { |
| "epoch": 8.791208791208792, |
| "grad_norm": 0.7059329152107239, |
| "learning_rate": 0.001, |
| "loss": 1.6109, |
| "step": 27200 |
| }, |
| { |
| "epoch": 8.823529411764707, |
| "grad_norm": 0.8909659385681152, |
| "learning_rate": 0.001, |
| "loss": 1.6163, |
| "step": 27300 |
| }, |
| { |
| "epoch": 8.855850032320621, |
| "grad_norm": 0.6082348227500916, |
| "learning_rate": 0.001, |
| "loss": 1.6392, |
| "step": 27400 |
| }, |
| { |
| "epoch": 8.888170652876536, |
| "grad_norm": 0.6296877861022949, |
| "learning_rate": 0.001, |
| "loss": 1.6235, |
| "step": 27500 |
| }, |
| { |
| "epoch": 8.92049127343245, |
| "grad_norm": 0.5239854454994202, |
| "learning_rate": 0.001, |
| "loss": 1.5809, |
| "step": 27600 |
| }, |
| { |
| "epoch": 8.952811893988365, |
| "grad_norm": 0.5151014924049377, |
| "learning_rate": 0.001, |
| "loss": 1.6231, |
| "step": 27700 |
| }, |
| { |
| "epoch": 8.98513251454428, |
| "grad_norm": 0.6107050180435181, |
| "learning_rate": 0.001, |
| "loss": 1.6294, |
| "step": 27800 |
| }, |
| { |
| "epoch": 9.017453135100194, |
| "grad_norm": 1.571207880973816, |
| "learning_rate": 0.001, |
| "loss": 1.5311, |
| "step": 27900 |
| }, |
| { |
| "epoch": 9.049773755656108, |
| "grad_norm": 1.3317564725875854, |
| "learning_rate": 0.001, |
| "loss": 1.5232, |
| "step": 28000 |
| }, |
| { |
| "epoch": 9.082094376212023, |
| "grad_norm": 0.9770411252975464, |
| "learning_rate": 0.001, |
| "loss": 1.5323, |
| "step": 28100 |
| }, |
| { |
| "epoch": 9.114414996767938, |
| "grad_norm": 1.5269267559051514, |
| "learning_rate": 0.001, |
| "loss": 1.538, |
| "step": 28200 |
| }, |
| { |
| "epoch": 9.146735617323852, |
| "grad_norm": 1.4465818405151367, |
| "learning_rate": 0.001, |
| "loss": 1.5356, |
| "step": 28300 |
| }, |
| { |
| "epoch": 9.179056237879767, |
| "grad_norm": 1.4123685359954834, |
| "learning_rate": 0.001, |
| "loss": 1.5354, |
| "step": 28400 |
| }, |
| { |
| "epoch": 9.211376858435681, |
| "grad_norm": 1.1594544649124146, |
| "learning_rate": 0.001, |
| "loss": 1.4929, |
| "step": 28500 |
| }, |
| { |
| "epoch": 9.243697478991596, |
| "grad_norm": 1.5278046131134033, |
| "learning_rate": 0.001, |
| "loss": 1.5353, |
| "step": 28600 |
| }, |
| { |
| "epoch": 9.276018099547512, |
| "grad_norm": 1.3489573001861572, |
| "learning_rate": 0.001, |
| "loss": 1.5223, |
| "step": 28700 |
| }, |
| { |
| "epoch": 9.308338720103427, |
| "grad_norm": 0.9945598244667053, |
| "learning_rate": 0.001, |
| "loss": 1.565, |
| "step": 28800 |
| }, |
| { |
| "epoch": 9.340659340659341, |
| "grad_norm": 1.3939306735992432, |
| "learning_rate": 0.001, |
| "loss": 1.5747, |
| "step": 28900 |
| }, |
| { |
| "epoch": 9.372979961215256, |
| "grad_norm": 1.0500473976135254, |
| "learning_rate": 0.001, |
| "loss": 1.5677, |
| "step": 29000 |
| }, |
| { |
| "epoch": 9.40530058177117, |
| "grad_norm": 1.1538209915161133, |
| "learning_rate": 0.001, |
| "loss": 1.5602, |
| "step": 29100 |
| }, |
| { |
| "epoch": 9.437621202327085, |
| "grad_norm": 1.2732844352722168, |
| "learning_rate": 0.001, |
| "loss": 1.5582, |
| "step": 29200 |
| }, |
| { |
| "epoch": 9.469941822883, |
| "grad_norm": 1.4475446939468384, |
| "learning_rate": 0.001, |
| "loss": 1.5542, |
| "step": 29300 |
| }, |
| { |
| "epoch": 9.502262443438914, |
| "grad_norm": 1.5488345623016357, |
| "learning_rate": 0.001, |
| "loss": 1.5589, |
| "step": 29400 |
| }, |
| { |
| "epoch": 9.534583063994829, |
| "grad_norm": 0.9935588836669922, |
| "learning_rate": 0.001, |
| "loss": 1.5645, |
| "step": 29500 |
| }, |
| { |
| "epoch": 9.566903684550743, |
| "grad_norm": 1.3134706020355225, |
| "learning_rate": 0.001, |
| "loss": 1.5479, |
| "step": 29600 |
| }, |
| { |
| "epoch": 9.599224305106658, |
| "grad_norm": 1.4117892980575562, |
| "learning_rate": 0.001, |
| "loss": 1.5534, |
| "step": 29700 |
| }, |
| { |
| "epoch": 9.631544925662572, |
| "grad_norm": 1.0989189147949219, |
| "learning_rate": 0.001, |
| "loss": 1.5924, |
| "step": 29800 |
| }, |
| { |
| "epoch": 9.663865546218487, |
| "grad_norm": 0.9962360858917236, |
| "learning_rate": 0.001, |
| "loss": 1.5521, |
| "step": 29900 |
| }, |
| { |
| "epoch": 9.696186166774401, |
| "grad_norm": 1.0200791358947754, |
| "learning_rate": 0.001, |
| "loss": 1.5421, |
| "step": 30000 |
| }, |
| { |
| "epoch": 9.728506787330316, |
| "grad_norm": 1.2421667575836182, |
| "learning_rate": 0.001, |
| "loss": 1.5971, |
| "step": 30100 |
| }, |
| { |
| "epoch": 9.760827407886232, |
| "grad_norm": 1.2540074586868286, |
| "learning_rate": 0.001, |
| "loss": 1.562, |
| "step": 30200 |
| }, |
| { |
| "epoch": 9.793148028442147, |
| "grad_norm": 1.1804842948913574, |
| "learning_rate": 0.001, |
| "loss": 1.5673, |
| "step": 30300 |
| }, |
| { |
| "epoch": 9.825468648998061, |
| "grad_norm": 1.2499170303344727, |
| "learning_rate": 0.001, |
| "loss": 1.5783, |
| "step": 30400 |
| }, |
| { |
| "epoch": 9.857789269553976, |
| "grad_norm": 1.1028128862380981, |
| "learning_rate": 0.001, |
| "loss": 1.5888, |
| "step": 30500 |
| }, |
| { |
| "epoch": 9.89010989010989, |
| "grad_norm": 1.251753330230713, |
| "learning_rate": 0.001, |
| "loss": 1.5749, |
| "step": 30600 |
| }, |
| { |
| "epoch": 9.922430510665805, |
| "grad_norm": 1.2316412925720215, |
| "learning_rate": 0.001, |
| "loss": 1.5755, |
| "step": 30700 |
| }, |
| { |
| "epoch": 9.95475113122172, |
| "grad_norm": 0.9406437873840332, |
| "learning_rate": 0.001, |
| "loss": 1.5741, |
| "step": 30800 |
| }, |
| { |
| "epoch": 9.987071751777634, |
| "grad_norm": 1.3652178049087524, |
| "learning_rate": 0.001, |
| "loss": 1.5867, |
| "step": 30900 |
| }, |
| { |
| "epoch": 10.019392372333549, |
| "grad_norm": 1.1299371719360352, |
| "learning_rate": 0.001, |
| "loss": 1.5492, |
| "step": 31000 |
| }, |
| { |
| "epoch": 10.051712992889463, |
| "grad_norm": 1.1812490224838257, |
| "learning_rate": 0.001, |
| "loss": 1.4886, |
| "step": 31100 |
| }, |
| { |
| "epoch": 10.084033613445378, |
| "grad_norm": 1.179519534111023, |
| "learning_rate": 0.001, |
| "loss": 1.4934, |
| "step": 31200 |
| }, |
| { |
| "epoch": 10.116354234001292, |
| "grad_norm": 1.120004653930664, |
| "learning_rate": 0.001, |
| "loss": 1.4879, |
| "step": 31300 |
| }, |
| { |
| "epoch": 10.148674854557207, |
| "grad_norm": 1.0279499292373657, |
| "learning_rate": 0.001, |
| "loss": 1.4963, |
| "step": 31400 |
| }, |
| { |
| "epoch": 10.180995475113122, |
| "grad_norm": 1.1619842052459717, |
| "learning_rate": 0.001, |
| "loss": 1.5114, |
| "step": 31500 |
| }, |
| { |
| "epoch": 10.213316095669036, |
| "grad_norm": 1.1536353826522827, |
| "learning_rate": 0.001, |
| "loss": 1.4935, |
| "step": 31600 |
| }, |
| { |
| "epoch": 10.24563671622495, |
| "grad_norm": 1.0345031023025513, |
| "learning_rate": 0.001, |
| "loss": 1.506, |
| "step": 31700 |
| }, |
| { |
| "epoch": 10.277957336780867, |
| "grad_norm": 1.0397800207138062, |
| "learning_rate": 0.001, |
| "loss": 1.5026, |
| "step": 31800 |
| }, |
| { |
| "epoch": 10.310277957336782, |
| "grad_norm": 0.8596133589744568, |
| "learning_rate": 0.001, |
| "loss": 1.5256, |
| "step": 31900 |
| }, |
| { |
| "epoch": 10.342598577892696, |
| "grad_norm": 1.1593185663223267, |
| "learning_rate": 0.001, |
| "loss": 1.5055, |
| "step": 32000 |
| }, |
| { |
| "epoch": 10.37491919844861, |
| "grad_norm": 1.0919194221496582, |
| "learning_rate": 0.001, |
| "loss": 1.4956, |
| "step": 32100 |
| }, |
| { |
| "epoch": 10.407239819004525, |
| "grad_norm": 1.1217265129089355, |
| "learning_rate": 0.001, |
| "loss": 1.5337, |
| "step": 32200 |
| }, |
| { |
| "epoch": 10.43956043956044, |
| "grad_norm": 0.9850316643714905, |
| "learning_rate": 0.001, |
| "loss": 1.516, |
| "step": 32300 |
| }, |
| { |
| "epoch": 10.471881060116354, |
| "grad_norm": 1.0271371603012085, |
| "learning_rate": 0.001, |
| "loss": 1.5346, |
| "step": 32400 |
| }, |
| { |
| "epoch": 10.504201680672269, |
| "grad_norm": 1.0458983182907104, |
| "learning_rate": 0.001, |
| "loss": 1.5167, |
| "step": 32500 |
| }, |
| { |
| "epoch": 10.536522301228183, |
| "grad_norm": 1.0349985361099243, |
| "learning_rate": 0.001, |
| "loss": 1.5203, |
| "step": 32600 |
| }, |
| { |
| "epoch": 10.568842921784098, |
| "grad_norm": 1.091556191444397, |
| "learning_rate": 0.001, |
| "loss": 1.5097, |
| "step": 32700 |
| }, |
| { |
| "epoch": 10.601163542340013, |
| "grad_norm": 1.1958649158477783, |
| "learning_rate": 0.001, |
| "loss": 1.5038, |
| "step": 32800 |
| }, |
| { |
| "epoch": 10.633484162895927, |
| "grad_norm": 1.1409047842025757, |
| "learning_rate": 0.001, |
| "loss": 1.4975, |
| "step": 32900 |
| }, |
| { |
| "epoch": 10.665804783451842, |
| "grad_norm": 1.205556035041809, |
| "learning_rate": 0.001, |
| "loss": 1.5596, |
| "step": 33000 |
| }, |
| { |
| "epoch": 10.698125404007756, |
| "grad_norm": 1.0502017736434937, |
| "learning_rate": 0.001, |
| "loss": 1.5111, |
| "step": 33100 |
| }, |
| { |
| "epoch": 10.73044602456367, |
| "grad_norm": 1.3859450817108154, |
| "learning_rate": 0.001, |
| "loss": 1.508, |
| "step": 33200 |
| }, |
| { |
| "epoch": 10.762766645119587, |
| "grad_norm": 1.0951238870620728, |
| "learning_rate": 0.001, |
| "loss": 1.5417, |
| "step": 33300 |
| }, |
| { |
| "epoch": 10.795087265675502, |
| "grad_norm": 2.344174385070801, |
| "learning_rate": 0.001, |
| "loss": 1.5599, |
| "step": 33400 |
| }, |
| { |
| "epoch": 10.827407886231416, |
| "grad_norm": 0.9748075604438782, |
| "learning_rate": 0.001, |
| "loss": 1.5558, |
| "step": 33500 |
| }, |
| { |
| "epoch": 10.85972850678733, |
| "grad_norm": 0.9509823322296143, |
| "learning_rate": 0.001, |
| "loss": 1.5253, |
| "step": 33600 |
| }, |
| { |
| "epoch": 10.892049127343245, |
| "grad_norm": 0.9078757166862488, |
| "learning_rate": 0.001, |
| "loss": 1.543, |
| "step": 33700 |
| }, |
| { |
| "epoch": 10.92436974789916, |
| "grad_norm": 0.9902774691581726, |
| "learning_rate": 0.001, |
| "loss": 1.5438, |
| "step": 33800 |
| }, |
| { |
| "epoch": 10.956690368455074, |
| "grad_norm": 1.2797644138336182, |
| "learning_rate": 0.001, |
| "loss": 1.5345, |
| "step": 33900 |
| }, |
| { |
| "epoch": 10.989010989010989, |
| "grad_norm": 1.1902170181274414, |
| "learning_rate": 0.001, |
| "loss": 1.5337, |
| "step": 34000 |
| }, |
| { |
| "epoch": 11.021331609566904, |
| "grad_norm": 1.0825999975204468, |
| "learning_rate": 0.001, |
| "loss": 1.4888, |
| "step": 34100 |
| }, |
| { |
| "epoch": 11.053652230122818, |
| "grad_norm": 1.1632471084594727, |
| "learning_rate": 0.001, |
| "loss": 1.4294, |
| "step": 34200 |
| }, |
| { |
| "epoch": 11.085972850678733, |
| "grad_norm": 1.212774634361267, |
| "learning_rate": 0.001, |
| "loss": 1.4312, |
| "step": 34300 |
| }, |
| { |
| "epoch": 11.118293471234647, |
| "grad_norm": 1.018050193786621, |
| "learning_rate": 0.001, |
| "loss": 1.4696, |
| "step": 34400 |
| }, |
| { |
| "epoch": 11.150614091790562, |
| "grad_norm": 1.0512523651123047, |
| "learning_rate": 0.001, |
| "loss": 1.4331, |
| "step": 34500 |
| }, |
| { |
| "epoch": 11.182934712346476, |
| "grad_norm": 0.9138154983520508, |
| "learning_rate": 0.001, |
| "loss": 1.4497, |
| "step": 34600 |
| }, |
| { |
| "epoch": 11.215255332902391, |
| "grad_norm": 1.0956708192825317, |
| "learning_rate": 0.001, |
| "loss": 1.4666, |
| "step": 34700 |
| }, |
| { |
| "epoch": 11.247575953458306, |
| "grad_norm": 1.0326133966445923, |
| "learning_rate": 0.001, |
| "loss": 1.4538, |
| "step": 34800 |
| }, |
| { |
| "epoch": 11.279896574014222, |
| "grad_norm": 0.9127147793769836, |
| "learning_rate": 0.001, |
| "loss": 1.4677, |
| "step": 34900 |
| }, |
| { |
| "epoch": 11.312217194570136, |
| "grad_norm": 0.9023076295852661, |
| "learning_rate": 0.001, |
| "loss": 1.4532, |
| "step": 35000 |
| }, |
| { |
| "epoch": 11.344537815126051, |
| "grad_norm": 1.0806233882904053, |
| "learning_rate": 0.001, |
| "loss": 1.464, |
| "step": 35100 |
| }, |
| { |
| "epoch": 11.376858435681966, |
| "grad_norm": 1.0325735807418823, |
| "learning_rate": 0.001, |
| "loss": 1.4889, |
| "step": 35200 |
| }, |
| { |
| "epoch": 11.40917905623788, |
| "grad_norm": 0.9904654026031494, |
| "learning_rate": 0.001, |
| "loss": 1.4935, |
| "step": 35300 |
| }, |
| { |
| "epoch": 11.441499676793795, |
| "grad_norm": 1.2254970073699951, |
| "learning_rate": 0.001, |
| "loss": 1.491, |
| "step": 35400 |
| }, |
| { |
| "epoch": 11.47382029734971, |
| "grad_norm": 1.5757197141647339, |
| "learning_rate": 0.001, |
| "loss": 1.4954, |
| "step": 35500 |
| }, |
| { |
| "epoch": 11.506140917905624, |
| "grad_norm": 0.9046617746353149, |
| "learning_rate": 0.001, |
| "loss": 1.4765, |
| "step": 35600 |
| }, |
| { |
| "epoch": 11.538461538461538, |
| "grad_norm": 0.987343430519104, |
| "learning_rate": 0.001, |
| "loss": 1.5013, |
| "step": 35700 |
| }, |
| { |
| "epoch": 11.570782159017453, |
| "grad_norm": 1.0150471925735474, |
| "learning_rate": 0.001, |
| "loss": 1.4783, |
| "step": 35800 |
| }, |
| { |
| "epoch": 11.603102779573367, |
| "grad_norm": 1.2784874439239502, |
| "learning_rate": 0.001, |
| "loss": 1.4966, |
| "step": 35900 |
| }, |
| { |
| "epoch": 11.635423400129282, |
| "grad_norm": 1.0940210819244385, |
| "learning_rate": 0.001, |
| "loss": 1.5001, |
| "step": 36000 |
| }, |
| { |
| "epoch": 11.667744020685197, |
| "grad_norm": 1.2780746221542358, |
| "learning_rate": 0.001, |
| "loss": 1.4962, |
| "step": 36100 |
| }, |
| { |
| "epoch": 11.700064641241111, |
| "grad_norm": 0.9342361688613892, |
| "learning_rate": 0.001, |
| "loss": 1.5119, |
| "step": 36200 |
| }, |
| { |
| "epoch": 11.732385261797026, |
| "grad_norm": 1.034030556678772, |
| "learning_rate": 0.001, |
| "loss": 1.4994, |
| "step": 36300 |
| }, |
| { |
| "epoch": 11.764705882352942, |
| "grad_norm": 1.0301884412765503, |
| "learning_rate": 0.001, |
| "loss": 1.5109, |
| "step": 36400 |
| }, |
| { |
| "epoch": 11.797026502908857, |
| "grad_norm": 1.0798345804214478, |
| "learning_rate": 0.001, |
| "loss": 1.4931, |
| "step": 36500 |
| }, |
| { |
| "epoch": 11.829347123464771, |
| "grad_norm": 0.9824477434158325, |
| "learning_rate": 0.001, |
| "loss": 1.4757, |
| "step": 36600 |
| }, |
| { |
| "epoch": 11.861667744020686, |
| "grad_norm": 0.970503568649292, |
| "learning_rate": 0.001, |
| "loss": 1.5167, |
| "step": 36700 |
| }, |
| { |
| "epoch": 11.8939883645766, |
| "grad_norm": 1.0813010931015015, |
| "learning_rate": 0.001, |
| "loss": 1.497, |
| "step": 36800 |
| }, |
| { |
| "epoch": 11.926308985132515, |
| "grad_norm": 1.0717248916625977, |
| "learning_rate": 0.001, |
| "loss": 1.5072, |
| "step": 36900 |
| }, |
| { |
| "epoch": 11.95862960568843, |
| "grad_norm": 0.9872753024101257, |
| "learning_rate": 0.001, |
| "loss": 1.5011, |
| "step": 37000 |
| }, |
| { |
| "epoch": 11.990950226244344, |
| "grad_norm": 0.9820966124534607, |
| "learning_rate": 0.001, |
| "loss": 1.5042, |
| "step": 37100 |
| }, |
| { |
| "epoch": 12.023270846800258, |
| "grad_norm": 0.968085527420044, |
| "learning_rate": 0.001, |
| "loss": 1.4501, |
| "step": 37200 |
| }, |
| { |
| "epoch": 12.055591467356173, |
| "grad_norm": 0.980858325958252, |
| "learning_rate": 0.001, |
| "loss": 1.4073, |
| "step": 37300 |
| }, |
| { |
| "epoch": 12.087912087912088, |
| "grad_norm": 1.4215143918991089, |
| "learning_rate": 0.001, |
| "loss": 1.4116, |
| "step": 37400 |
| }, |
| { |
| "epoch": 12.120232708468002, |
| "grad_norm": 0.9262951612472534, |
| "learning_rate": 0.001, |
| "loss": 1.4064, |
| "step": 37500 |
| }, |
| { |
| "epoch": 12.152553329023917, |
| "grad_norm": 1.0522440671920776, |
| "learning_rate": 0.001, |
| "loss": 1.4034, |
| "step": 37600 |
| }, |
| { |
| "epoch": 12.184873949579831, |
| "grad_norm": 1.0187525749206543, |
| "learning_rate": 0.001, |
| "loss": 1.4318, |
| "step": 37700 |
| }, |
| { |
| "epoch": 12.217194570135746, |
| "grad_norm": 0.9597002863883972, |
| "learning_rate": 0.001, |
| "loss": 1.4324, |
| "step": 37800 |
| }, |
| { |
| "epoch": 12.24951519069166, |
| "grad_norm": 1.0441052913665771, |
| "learning_rate": 0.001, |
| "loss": 1.4142, |
| "step": 37900 |
| }, |
| { |
| "epoch": 12.281835811247577, |
| "grad_norm": 0.8995744585990906, |
| "learning_rate": 0.001, |
| "loss": 1.432, |
| "step": 38000 |
| }, |
| { |
| "epoch": 12.314156431803491, |
| "grad_norm": 1.096145510673523, |
| "learning_rate": 0.001, |
| "loss": 1.4187, |
| "step": 38100 |
| }, |
| { |
| "epoch": 12.346477052359406, |
| "grad_norm": 0.9527760148048401, |
| "learning_rate": 0.001, |
| "loss": 1.4671, |
| "step": 38200 |
| }, |
| { |
| "epoch": 12.37879767291532, |
| "grad_norm": 1.1196210384368896, |
| "learning_rate": 0.001, |
| "loss": 1.4443, |
| "step": 38300 |
| }, |
| { |
| "epoch": 12.411118293471235, |
| "grad_norm": 0.9554662108421326, |
| "learning_rate": 0.001, |
| "loss": 1.4614, |
| "step": 38400 |
| }, |
| { |
| "epoch": 12.44343891402715, |
| "grad_norm": 0.9521270394325256, |
| "learning_rate": 0.001, |
| "loss": 1.4679, |
| "step": 38500 |
| }, |
| { |
| "epoch": 12.475759534583064, |
| "grad_norm": 1.0394660234451294, |
| "learning_rate": 0.001, |
| "loss": 1.4449, |
| "step": 38600 |
| }, |
| { |
| "epoch": 12.508080155138979, |
| "grad_norm": 1.0146692991256714, |
| "learning_rate": 0.001, |
| "loss": 1.4368, |
| "step": 38700 |
| }, |
| { |
| "epoch": 12.540400775694893, |
| "grad_norm": 1.3197181224822998, |
| "learning_rate": 0.001, |
| "loss": 1.4654, |
| "step": 38800 |
| }, |
| { |
| "epoch": 12.572721396250808, |
| "grad_norm": 1.0358250141143799, |
| "learning_rate": 0.001, |
| "loss": 1.4495, |
| "step": 38900 |
| }, |
| { |
| "epoch": 12.605042016806722, |
| "grad_norm": 1.08975088596344, |
| "learning_rate": 0.001, |
| "loss": 1.4504, |
| "step": 39000 |
| }, |
| { |
| "epoch": 12.637362637362637, |
| "grad_norm": 0.9866904020309448, |
| "learning_rate": 0.001, |
| "loss": 1.435, |
| "step": 39100 |
| }, |
| { |
| "epoch": 12.669683257918551, |
| "grad_norm": 1.085909366607666, |
| "learning_rate": 0.001, |
| "loss": 1.4448, |
| "step": 39200 |
| }, |
| { |
| "epoch": 12.702003878474466, |
| "grad_norm": 0.9458845257759094, |
| "learning_rate": 0.001, |
| "loss": 1.4559, |
| "step": 39300 |
| }, |
| { |
| "epoch": 12.73432449903038, |
| "grad_norm": 1.0192725658416748, |
| "learning_rate": 0.001, |
| "loss": 1.458, |
| "step": 39400 |
| }, |
| { |
| "epoch": 12.766645119586297, |
| "grad_norm": 1.2316535711288452, |
| "learning_rate": 0.001, |
| "loss": 1.4697, |
| "step": 39500 |
| }, |
| { |
| "epoch": 12.798965740142211, |
| "grad_norm": 0.9104325771331787, |
| "learning_rate": 0.001, |
| "loss": 1.5081, |
| "step": 39600 |
| }, |
| { |
| "epoch": 12.831286360698126, |
| "grad_norm": 1.11668860912323, |
| "learning_rate": 0.001, |
| "loss": 1.4984, |
| "step": 39700 |
| }, |
| { |
| "epoch": 12.86360698125404, |
| "grad_norm": 0.9999051690101624, |
| "learning_rate": 0.001, |
| "loss": 1.4485, |
| "step": 39800 |
| }, |
| { |
| "epoch": 12.895927601809955, |
| "grad_norm": 1.0887517929077148, |
| "learning_rate": 0.001, |
| "loss": 1.4528, |
| "step": 39900 |
| }, |
| { |
| "epoch": 12.92824822236587, |
| "grad_norm": 1.1782604455947876, |
| "learning_rate": 0.001, |
| "loss": 1.4756, |
| "step": 40000 |
| }, |
| { |
| "epoch": 12.960568842921784, |
| "grad_norm": 0.8981242775917053, |
| "learning_rate": 0.001, |
| "loss": 1.4514, |
| "step": 40100 |
| }, |
| { |
| "epoch": 12.992889463477699, |
| "grad_norm": 0.875098705291748, |
| "learning_rate": 0.001, |
| "loss": 1.4544, |
| "step": 40200 |
| }, |
| { |
| "epoch": 13.025210084033613, |
| "grad_norm": 1.2988208532333374, |
| "learning_rate": 0.001, |
| "loss": 1.3966, |
| "step": 40300 |
| }, |
| { |
| "epoch": 13.057530704589528, |
| "grad_norm": 1.8159008026123047, |
| "learning_rate": 0.001, |
| "loss": 1.3734, |
| "step": 40400 |
| }, |
| { |
| "epoch": 13.089851325145442, |
| "grad_norm": 0.9084588289260864, |
| "learning_rate": 0.001, |
| "loss": 1.369, |
| "step": 40500 |
| }, |
| { |
| "epoch": 13.122171945701357, |
| "grad_norm": 0.8925555944442749, |
| "learning_rate": 0.001, |
| "loss": 1.3684, |
| "step": 40600 |
| }, |
| { |
| "epoch": 13.154492566257272, |
| "grad_norm": 0.8304650187492371, |
| "learning_rate": 0.001, |
| "loss": 1.3887, |
| "step": 40700 |
| }, |
| { |
| "epoch": 13.186813186813186, |
| "grad_norm": 1.0589717626571655, |
| "learning_rate": 0.001, |
| "loss": 1.3834, |
| "step": 40800 |
| }, |
| { |
| "epoch": 13.2191338073691, |
| "grad_norm": 0.9229005575180054, |
| "learning_rate": 0.001, |
| "loss": 1.3972, |
| "step": 40900 |
| }, |
| { |
| "epoch": 13.251454427925015, |
| "grad_norm": 0.9296209216117859, |
| "learning_rate": 0.001, |
| "loss": 1.3909, |
| "step": 41000 |
| }, |
| { |
| "epoch": 13.283775048480932, |
| "grad_norm": 0.8965741991996765, |
| "learning_rate": 0.001, |
| "loss": 1.3834, |
| "step": 41100 |
| }, |
| { |
| "epoch": 13.316095669036846, |
| "grad_norm": 0.9377841353416443, |
| "learning_rate": 0.001, |
| "loss": 1.4088, |
| "step": 41200 |
| }, |
| { |
| "epoch": 13.34841628959276, |
| "grad_norm": 1.0339752435684204, |
| "learning_rate": 0.001, |
| "loss": 1.4098, |
| "step": 41300 |
| }, |
| { |
| "epoch": 13.380736910148675, |
| "grad_norm": 0.8679028749465942, |
| "learning_rate": 0.001, |
| "loss": 1.3976, |
| "step": 41400 |
| }, |
| { |
| "epoch": 13.41305753070459, |
| "grad_norm": 0.9596776366233826, |
| "learning_rate": 0.001, |
| "loss": 1.3852, |
| "step": 41500 |
| }, |
| { |
| "epoch": 13.445378151260504, |
| "grad_norm": 1.042893886566162, |
| "learning_rate": 0.001, |
| "loss": 1.4066, |
| "step": 41600 |
| }, |
| { |
| "epoch": 13.477698771816419, |
| "grad_norm": 1.0267068147659302, |
| "learning_rate": 0.001, |
| "loss": 1.4032, |
| "step": 41700 |
| }, |
| { |
| "epoch": 13.510019392372334, |
| "grad_norm": 1.0795466899871826, |
| "learning_rate": 0.001, |
| "loss": 1.4233, |
| "step": 41800 |
| }, |
| { |
| "epoch": 13.542340012928248, |
| "grad_norm": 0.9310310482978821, |
| "learning_rate": 0.001, |
| "loss": 1.4176, |
| "step": 41900 |
| }, |
| { |
| "epoch": 13.574660633484163, |
| "grad_norm": 0.7695964574813843, |
| "learning_rate": 0.001, |
| "loss": 1.4068, |
| "step": 42000 |
| }, |
| { |
| "epoch": 13.606981254040077, |
| "grad_norm": 0.9554638266563416, |
| "learning_rate": 0.001, |
| "loss": 1.4462, |
| "step": 42100 |
| }, |
| { |
| "epoch": 13.639301874595992, |
| "grad_norm": 0.963115394115448, |
| "learning_rate": 0.001, |
| "loss": 1.4159, |
| "step": 42200 |
| }, |
| { |
| "epoch": 13.671622495151906, |
| "grad_norm": 0.9717909693717957, |
| "learning_rate": 0.001, |
| "loss": 1.4081, |
| "step": 42300 |
| }, |
| { |
| "epoch": 13.70394311570782, |
| "grad_norm": 1.1150710582733154, |
| "learning_rate": 0.001, |
| "loss": 1.4377, |
| "step": 42400 |
| }, |
| { |
| "epoch": 13.736263736263737, |
| "grad_norm": 1.0072553157806396, |
| "learning_rate": 0.001, |
| "loss": 1.4576, |
| "step": 42500 |
| }, |
| { |
| "epoch": 13.768584356819652, |
| "grad_norm": 0.9450471997261047, |
| "learning_rate": 0.001, |
| "loss": 1.4349, |
| "step": 42600 |
| }, |
| { |
| "epoch": 13.800904977375566, |
| "grad_norm": 0.8816408514976501, |
| "learning_rate": 0.001, |
| "loss": 1.4237, |
| "step": 42700 |
| }, |
| { |
| "epoch": 13.83322559793148, |
| "grad_norm": 0.9339333772659302, |
| "learning_rate": 0.001, |
| "loss": 1.4434, |
| "step": 42800 |
| }, |
| { |
| "epoch": 13.865546218487395, |
| "grad_norm": 1.1679960489273071, |
| "learning_rate": 0.001, |
| "loss": 1.4531, |
| "step": 42900 |
| }, |
| { |
| "epoch": 13.89786683904331, |
| "grad_norm": 1.0511928796768188, |
| "learning_rate": 0.001, |
| "loss": 1.4101, |
| "step": 43000 |
| }, |
| { |
| "epoch": 13.930187459599225, |
| "grad_norm": 0.8638760447502136, |
| "learning_rate": 0.001, |
| "loss": 1.4366, |
| "step": 43100 |
| }, |
| { |
| "epoch": 13.96250808015514, |
| "grad_norm": 1.1316864490509033, |
| "learning_rate": 0.001, |
| "loss": 1.434, |
| "step": 43200 |
| }, |
| { |
| "epoch": 13.994828700711054, |
| "grad_norm": 1.0850309133529663, |
| "learning_rate": 0.001, |
| "loss": 1.4175, |
| "step": 43300 |
| }, |
| { |
| "epoch": 14.027149321266968, |
| "grad_norm": 1.1530749797821045, |
| "learning_rate": 0.001, |
| "loss": 1.3574, |
| "step": 43400 |
| }, |
| { |
| "epoch": 14.059469941822883, |
| "grad_norm": 1.2573904991149902, |
| "learning_rate": 0.001, |
| "loss": 1.3626, |
| "step": 43500 |
| }, |
| { |
| "epoch": 14.091790562378797, |
| "grad_norm": 1.0096023082733154, |
| "learning_rate": 0.001, |
| "loss": 1.3593, |
| "step": 43600 |
| }, |
| { |
| "epoch": 14.124111182934712, |
| "grad_norm": 1.0445042848587036, |
| "learning_rate": 0.001, |
| "loss": 1.337, |
| "step": 43700 |
| }, |
| { |
| "epoch": 14.156431803490626, |
| "grad_norm": 0.976283609867096, |
| "learning_rate": 0.001, |
| "loss": 1.3559, |
| "step": 43800 |
| }, |
| { |
| "epoch": 14.188752424046541, |
| "grad_norm": 0.9800626635551453, |
| "learning_rate": 0.001, |
| "loss": 1.3801, |
| "step": 43900 |
| }, |
| { |
| "epoch": 14.221073044602456, |
| "grad_norm": 1.07820725440979, |
| "learning_rate": 0.001, |
| "loss": 1.3517, |
| "step": 44000 |
| }, |
| { |
| "epoch": 14.25339366515837, |
| "grad_norm": 1.0982788801193237, |
| "learning_rate": 0.001, |
| "loss": 1.3222, |
| "step": 44100 |
| }, |
| { |
| "epoch": 14.285714285714286, |
| "grad_norm": 1.013915777206421, |
| "learning_rate": 0.001, |
| "loss": 1.3454, |
| "step": 44200 |
| }, |
| { |
| "epoch": 14.318034906270201, |
| "grad_norm": 0.9462135434150696, |
| "learning_rate": 0.001, |
| "loss": 1.3472, |
| "step": 44300 |
| }, |
| { |
| "epoch": 14.350355526826116, |
| "grad_norm": 1.1129679679870605, |
| "learning_rate": 0.001, |
| "loss": 1.3625, |
| "step": 44400 |
| }, |
| { |
| "epoch": 14.38267614738203, |
| "grad_norm": 1.1635687351226807, |
| "learning_rate": 0.001, |
| "loss": 1.3752, |
| "step": 44500 |
| }, |
| { |
| "epoch": 14.414996767937945, |
| "grad_norm": 0.9367783665657043, |
| "learning_rate": 0.001, |
| "loss": 1.398, |
| "step": 44600 |
| }, |
| { |
| "epoch": 14.44731738849386, |
| "grad_norm": 4.812443256378174, |
| "learning_rate": 0.001, |
| "loss": 1.3708, |
| "step": 44700 |
| }, |
| { |
| "epoch": 14.479638009049774, |
| "grad_norm": 1.1130398511886597, |
| "learning_rate": 0.001, |
| "loss": 1.3928, |
| "step": 44800 |
| }, |
| { |
| "epoch": 14.511958629605688, |
| "grad_norm": 1.0559415817260742, |
| "learning_rate": 0.001, |
| "loss": 1.367, |
| "step": 44900 |
| }, |
| { |
| "epoch": 14.544279250161603, |
| "grad_norm": 1.052778959274292, |
| "learning_rate": 0.001, |
| "loss": 1.3692, |
| "step": 45000 |
| }, |
| { |
| "epoch": 14.576599870717518, |
| "grad_norm": 1.0826404094696045, |
| "learning_rate": 0.001, |
| "loss": 1.3857, |
| "step": 45100 |
| }, |
| { |
| "epoch": 14.608920491273432, |
| "grad_norm": 0.9068247675895691, |
| "learning_rate": 0.001, |
| "loss": 1.3805, |
| "step": 45200 |
| }, |
| { |
| "epoch": 14.641241111829347, |
| "grad_norm": 1.2357306480407715, |
| "learning_rate": 0.001, |
| "loss": 1.4152, |
| "step": 45300 |
| }, |
| { |
| "epoch": 14.673561732385261, |
| "grad_norm": 0.9457151293754578, |
| "learning_rate": 0.001, |
| "loss": 1.4111, |
| "step": 45400 |
| }, |
| { |
| "epoch": 14.705882352941176, |
| "grad_norm": 0.8796570301055908, |
| "learning_rate": 0.001, |
| "loss": 1.3951, |
| "step": 45500 |
| }, |
| { |
| "epoch": 14.738202973497092, |
| "grad_norm": 0.9057885408401489, |
| "learning_rate": 0.001, |
| "loss": 1.4123, |
| "step": 45600 |
| }, |
| { |
| "epoch": 14.770523594053007, |
| "grad_norm": 1.1413110494613647, |
| "learning_rate": 0.001, |
| "loss": 1.3942, |
| "step": 45700 |
| }, |
| { |
| "epoch": 14.802844214608921, |
| "grad_norm": 0.8065590262413025, |
| "learning_rate": 0.001, |
| "loss": 1.4036, |
| "step": 45800 |
| }, |
| { |
| "epoch": 14.835164835164836, |
| "grad_norm": 0.9088504314422607, |
| "learning_rate": 0.001, |
| "loss": 1.3785, |
| "step": 45900 |
| }, |
| { |
| "epoch": 14.86748545572075, |
| "grad_norm": 1.1512057781219482, |
| "learning_rate": 0.001, |
| "loss": 1.411, |
| "step": 46000 |
| }, |
| { |
| "epoch": 14.899806076276665, |
| "grad_norm": 0.9676141142845154, |
| "learning_rate": 0.001, |
| "loss": 1.3749, |
| "step": 46100 |
| }, |
| { |
| "epoch": 14.93212669683258, |
| "grad_norm": 1.2688740491867065, |
| "learning_rate": 0.001, |
| "loss": 1.4197, |
| "step": 46200 |
| }, |
| { |
| "epoch": 14.964447317388494, |
| "grad_norm": 0.9541943669319153, |
| "learning_rate": 0.001, |
| "loss": 1.4337, |
| "step": 46300 |
| }, |
| { |
| "epoch": 14.996767937944409, |
| "grad_norm": 1.5543314218521118, |
| "learning_rate": 0.001, |
| "loss": 1.3808, |
| "step": 46400 |
| }, |
| { |
| "epoch": 15.029088558500323, |
| "grad_norm": 1.0888712406158447, |
| "learning_rate": 0.001, |
| "loss": 1.3064, |
| "step": 46500 |
| }, |
| { |
| "epoch": 15.061409179056238, |
| "grad_norm": 1.151442527770996, |
| "learning_rate": 0.001, |
| "loss": 1.3064, |
| "step": 46600 |
| }, |
| { |
| "epoch": 15.093729799612152, |
| "grad_norm": 1.339379072189331, |
| "learning_rate": 0.001, |
| "loss": 1.3393, |
| "step": 46700 |
| }, |
| { |
| "epoch": 15.126050420168067, |
| "grad_norm": 1.1313862800598145, |
| "learning_rate": 0.001, |
| "loss": 1.313, |
| "step": 46800 |
| }, |
| { |
| "epoch": 15.158371040723981, |
| "grad_norm": 0.9059498906135559, |
| "learning_rate": 0.001, |
| "loss": 1.3322, |
| "step": 46900 |
| }, |
| { |
| "epoch": 15.190691661279896, |
| "grad_norm": 0.9920981526374817, |
| "learning_rate": 0.001, |
| "loss": 1.325, |
| "step": 47000 |
| }, |
| { |
| "epoch": 15.22301228183581, |
| "grad_norm": 0.7398461103439331, |
| "learning_rate": 0.001, |
| "loss": 1.3382, |
| "step": 47100 |
| }, |
| { |
| "epoch": 15.255332902391725, |
| "grad_norm": 1.0884451866149902, |
| "learning_rate": 0.001, |
| "loss": 1.3054, |
| "step": 47200 |
| }, |
| { |
| "epoch": 15.287653522947641, |
| "grad_norm": 1.2823668718338013, |
| "learning_rate": 0.001, |
| "loss": 1.3264, |
| "step": 47300 |
| }, |
| { |
| "epoch": 15.319974143503556, |
| "grad_norm": 1.3318175077438354, |
| "learning_rate": 0.001, |
| "loss": 1.3511, |
| "step": 47400 |
| }, |
| { |
| "epoch": 15.35229476405947, |
| "grad_norm": 1.1119569540023804, |
| "learning_rate": 0.001, |
| "loss": 1.3449, |
| "step": 47500 |
| }, |
| { |
| "epoch": 15.384615384615385, |
| "grad_norm": 1.1583483219146729, |
| "learning_rate": 0.001, |
| "loss": 1.3145, |
| "step": 47600 |
| }, |
| { |
| "epoch": 15.4169360051713, |
| "grad_norm": 1.323159098625183, |
| "learning_rate": 0.001, |
| "loss": 1.3609, |
| "step": 47700 |
| }, |
| { |
| "epoch": 15.449256625727214, |
| "grad_norm": 1.1489546298980713, |
| "learning_rate": 0.001, |
| "loss": 1.3374, |
| "step": 47800 |
| }, |
| { |
| "epoch": 15.481577246283129, |
| "grad_norm": 1.1882226467132568, |
| "learning_rate": 0.001, |
| "loss": 1.353, |
| "step": 47900 |
| }, |
| { |
| "epoch": 15.513897866839043, |
| "grad_norm": 1.6996192932128906, |
| "learning_rate": 0.001, |
| "loss": 1.3764, |
| "step": 48000 |
| }, |
| { |
| "epoch": 15.546218487394958, |
| "grad_norm": 1.079559564590454, |
| "learning_rate": 0.001, |
| "loss": 1.3654, |
| "step": 48100 |
| }, |
| { |
| "epoch": 15.578539107950872, |
| "grad_norm": 0.9709523916244507, |
| "learning_rate": 0.001, |
| "loss": 1.361, |
| "step": 48200 |
| }, |
| { |
| "epoch": 15.610859728506787, |
| "grad_norm": 1.1908799409866333, |
| "learning_rate": 0.001, |
| "loss": 1.3668, |
| "step": 48300 |
| }, |
| { |
| "epoch": 15.643180349062701, |
| "grad_norm": 0.8918905854225159, |
| "learning_rate": 0.001, |
| "loss": 1.3524, |
| "step": 48400 |
| }, |
| { |
| "epoch": 15.675500969618616, |
| "grad_norm": 0.880649983882904, |
| "learning_rate": 0.001, |
| "loss": 1.3517, |
| "step": 48500 |
| }, |
| { |
| "epoch": 15.70782159017453, |
| "grad_norm": 1.1815990209579468, |
| "learning_rate": 0.001, |
| "loss": 1.349, |
| "step": 48600 |
| }, |
| { |
| "epoch": 15.740142210730447, |
| "grad_norm": 1.071019172668457, |
| "learning_rate": 0.001, |
| "loss": 1.3596, |
| "step": 48700 |
| }, |
| { |
| "epoch": 15.772462831286362, |
| "grad_norm": 1.1914271116256714, |
| "learning_rate": 0.001, |
| "loss": 1.3663, |
| "step": 48800 |
| }, |
| { |
| "epoch": 15.804783451842276, |
| "grad_norm": 1.4547832012176514, |
| "learning_rate": 0.001, |
| "loss": 1.3611, |
| "step": 48900 |
| }, |
| { |
| "epoch": 15.83710407239819, |
| "grad_norm": 1.1460895538330078, |
| "learning_rate": 0.001, |
| "loss": 1.3605, |
| "step": 49000 |
| }, |
| { |
| "epoch": 15.869424692954105, |
| "grad_norm": 1.155902624130249, |
| "learning_rate": 0.001, |
| "loss": 1.3833, |
| "step": 49100 |
| }, |
| { |
| "epoch": 15.90174531351002, |
| "grad_norm": 1.1332170963287354, |
| "learning_rate": 0.001, |
| "loss": 1.366, |
| "step": 49200 |
| }, |
| { |
| "epoch": 15.934065934065934, |
| "grad_norm": 1.009746789932251, |
| "learning_rate": 0.001, |
| "loss": 1.3741, |
| "step": 49300 |
| }, |
| { |
| "epoch": 15.966386554621849, |
| "grad_norm": 1.1160213947296143, |
| "learning_rate": 0.001, |
| "loss": 1.3727, |
| "step": 49400 |
| }, |
| { |
| "epoch": 15.998707175177763, |
| "grad_norm": 0.9431664347648621, |
| "learning_rate": 0.001, |
| "loss": 1.3603, |
| "step": 49500 |
| }, |
| { |
| "epoch": 16.031027795733678, |
| "grad_norm": 1.071413516998291, |
| "learning_rate": 0.001, |
| "loss": 1.2621, |
| "step": 49600 |
| }, |
| { |
| "epoch": 16.063348416289593, |
| "grad_norm": 1.035800576210022, |
| "learning_rate": 0.001, |
| "loss": 1.2914, |
| "step": 49700 |
| }, |
| { |
| "epoch": 16.095669036845507, |
| "grad_norm": 1.001190423965454, |
| "learning_rate": 0.001, |
| "loss": 1.2734, |
| "step": 49800 |
| }, |
| { |
| "epoch": 16.12798965740142, |
| "grad_norm": 0.9272027611732483, |
| "learning_rate": 0.001, |
| "loss": 1.2658, |
| "step": 49900 |
| }, |
| { |
| "epoch": 16.160310277957336, |
| "grad_norm": 0.8124894499778748, |
| "learning_rate": 0.001, |
| "loss": 1.2945, |
| "step": 50000 |
| }, |
| { |
| "epoch": 16.19263089851325, |
| "grad_norm": 1.0019245147705078, |
| "learning_rate": 0.001, |
| "loss": 1.3106, |
| "step": 50100 |
| }, |
| { |
| "epoch": 16.224951519069165, |
| "grad_norm": 1.071997880935669, |
| "learning_rate": 0.001, |
| "loss": 1.3253, |
| "step": 50200 |
| }, |
| { |
| "epoch": 16.25727213962508, |
| "grad_norm": 0.8945184946060181, |
| "learning_rate": 0.001, |
| "loss": 1.2997, |
| "step": 50300 |
| }, |
| { |
| "epoch": 16.289592760180994, |
| "grad_norm": 0.8286392092704773, |
| "learning_rate": 0.001, |
| "loss": 1.3153, |
| "step": 50400 |
| }, |
| { |
| "epoch": 16.32191338073691, |
| "grad_norm": 1.137093424797058, |
| "learning_rate": 0.001, |
| "loss": 1.3035, |
| "step": 50500 |
| }, |
| { |
| "epoch": 16.354234001292824, |
| "grad_norm": 1.379814863204956, |
| "learning_rate": 0.001, |
| "loss": 1.2933, |
| "step": 50600 |
| }, |
| { |
| "epoch": 16.386554621848738, |
| "grad_norm": 0.8075309991836548, |
| "learning_rate": 0.001, |
| "loss": 1.3175, |
| "step": 50700 |
| }, |
| { |
| "epoch": 16.418875242404653, |
| "grad_norm": 1.3296177387237549, |
| "learning_rate": 0.001, |
| "loss": 1.3143, |
| "step": 50800 |
| }, |
| { |
| "epoch": 16.451195862960567, |
| "grad_norm": 1.5358154773712158, |
| "learning_rate": 0.001, |
| "loss": 1.3137, |
| "step": 50900 |
| }, |
| { |
| "epoch": 16.483516483516482, |
| "grad_norm": 1.2911955118179321, |
| "learning_rate": 0.001, |
| "loss": 1.3108, |
| "step": 51000 |
| }, |
| { |
| "epoch": 16.5158371040724, |
| "grad_norm": 0.6920326352119446, |
| "learning_rate": 0.001, |
| "loss": 1.3177, |
| "step": 51100 |
| }, |
| { |
| "epoch": 16.548157724628314, |
| "grad_norm": 1.075358510017395, |
| "learning_rate": 0.001, |
| "loss": 1.3267, |
| "step": 51200 |
| }, |
| { |
| "epoch": 16.58047834518423, |
| "grad_norm": 0.9144567251205444, |
| "learning_rate": 0.001, |
| "loss": 1.3318, |
| "step": 51300 |
| }, |
| { |
| "epoch": 16.612798965740144, |
| "grad_norm": 1.1483515501022339, |
| "learning_rate": 0.001, |
| "loss": 1.3312, |
| "step": 51400 |
| }, |
| { |
| "epoch": 16.645119586296058, |
| "grad_norm": 1.050889253616333, |
| "learning_rate": 0.001, |
| "loss": 1.3428, |
| "step": 51500 |
| }, |
| { |
| "epoch": 16.677440206851973, |
| "grad_norm": 1.0265402793884277, |
| "learning_rate": 0.001, |
| "loss": 1.3533, |
| "step": 51600 |
| }, |
| { |
| "epoch": 16.709760827407887, |
| "grad_norm": 0.9257956743240356, |
| "learning_rate": 0.001, |
| "loss": 1.3497, |
| "step": 51700 |
| }, |
| { |
| "epoch": 16.742081447963802, |
| "grad_norm": 1.9063913822174072, |
| "learning_rate": 0.001, |
| "loss": 1.3191, |
| "step": 51800 |
| }, |
| { |
| "epoch": 16.774402068519716, |
| "grad_norm": 1.0440740585327148, |
| "learning_rate": 0.001, |
| "loss": 1.3378, |
| "step": 51900 |
| }, |
| { |
| "epoch": 16.80672268907563, |
| "grad_norm": 1.0758187770843506, |
| "learning_rate": 0.001, |
| "loss": 1.3278, |
| "step": 52000 |
| }, |
| { |
| "epoch": 16.839043309631545, |
| "grad_norm": 1.1654984951019287, |
| "learning_rate": 0.001, |
| "loss": 1.3627, |
| "step": 52100 |
| }, |
| { |
| "epoch": 16.87136393018746, |
| "grad_norm": 0.9538053870201111, |
| "learning_rate": 0.001, |
| "loss": 1.3415, |
| "step": 52200 |
| }, |
| { |
| "epoch": 16.903684550743375, |
| "grad_norm": 1.0862343311309814, |
| "learning_rate": 0.001, |
| "loss": 1.351, |
| "step": 52300 |
| }, |
| { |
| "epoch": 16.93600517129929, |
| "grad_norm": 1.0913732051849365, |
| "learning_rate": 0.001, |
| "loss": 1.3614, |
| "step": 52400 |
| }, |
| { |
| "epoch": 16.968325791855204, |
| "grad_norm": 1.0344431400299072, |
| "learning_rate": 0.001, |
| "loss": 1.3385, |
| "step": 52500 |
| }, |
| { |
| "epoch": 17.00064641241112, |
| "grad_norm": 1.0967899560928345, |
| "learning_rate": 0.001, |
| "loss": 1.3154, |
| "step": 52600 |
| }, |
| { |
| "epoch": 17.032967032967033, |
| "grad_norm": 1.7565858364105225, |
| "learning_rate": 0.001, |
| "loss": 1.2238, |
| "step": 52700 |
| }, |
| { |
| "epoch": 17.065287653522947, |
| "grad_norm": 1.8335599899291992, |
| "learning_rate": 0.001, |
| "loss": 1.2362, |
| "step": 52800 |
| }, |
| { |
| "epoch": 17.097608274078862, |
| "grad_norm": 1.951370358467102, |
| "learning_rate": 0.001, |
| "loss": 1.2447, |
| "step": 52900 |
| }, |
| { |
| "epoch": 17.129928894634777, |
| "grad_norm": 1.7428096532821655, |
| "learning_rate": 0.001, |
| "loss": 1.273, |
| "step": 53000 |
| }, |
| { |
| "epoch": 17.16224951519069, |
| "grad_norm": 1.5402082204818726, |
| "learning_rate": 0.001, |
| "loss": 1.2413, |
| "step": 53100 |
| }, |
| { |
| "epoch": 17.194570135746606, |
| "grad_norm": 1.3417208194732666, |
| "learning_rate": 0.001, |
| "loss": 1.2456, |
| "step": 53200 |
| }, |
| { |
| "epoch": 17.22689075630252, |
| "grad_norm": 1.6393389701843262, |
| "learning_rate": 0.001, |
| "loss": 1.2635, |
| "step": 53300 |
| }, |
| { |
| "epoch": 17.259211376858435, |
| "grad_norm": 1.5430525541305542, |
| "learning_rate": 0.001, |
| "loss": 1.2979, |
| "step": 53400 |
| }, |
| { |
| "epoch": 17.29153199741435, |
| "grad_norm": 1.640655517578125, |
| "learning_rate": 0.001, |
| "loss": 1.2704, |
| "step": 53500 |
| }, |
| { |
| "epoch": 17.323852617970264, |
| "grad_norm": 1.7041141986846924, |
| "learning_rate": 0.001, |
| "loss": 1.3015, |
| "step": 53600 |
| }, |
| { |
| "epoch": 17.35617323852618, |
| "grad_norm": 1.5950192213058472, |
| "learning_rate": 0.001, |
| "loss": 1.2574, |
| "step": 53700 |
| }, |
| { |
| "epoch": 17.388493859082093, |
| "grad_norm": 1.8514941930770874, |
| "learning_rate": 0.001, |
| "loss": 1.2826, |
| "step": 53800 |
| }, |
| { |
| "epoch": 17.420814479638008, |
| "grad_norm": 1.5289716720581055, |
| "learning_rate": 0.001, |
| "loss": 1.2847, |
| "step": 53900 |
| }, |
| { |
| "epoch": 17.453135100193922, |
| "grad_norm": 1.5072901248931885, |
| "learning_rate": 0.001, |
| "loss": 1.3026, |
| "step": 54000 |
| }, |
| { |
| "epoch": 17.485455720749837, |
| "grad_norm": 1.9386134147644043, |
| "learning_rate": 0.001, |
| "loss": 1.3189, |
| "step": 54100 |
| }, |
| { |
| "epoch": 17.517776341305755, |
| "grad_norm": 1.3212685585021973, |
| "learning_rate": 0.001, |
| "loss": 1.2961, |
| "step": 54200 |
| }, |
| { |
| "epoch": 17.55009696186167, |
| "grad_norm": 1.8733723163604736, |
| "learning_rate": 0.001, |
| "loss": 1.2841, |
| "step": 54300 |
| }, |
| { |
| "epoch": 17.582417582417584, |
| "grad_norm": 1.5845545530319214, |
| "learning_rate": 0.001, |
| "loss": 1.2776, |
| "step": 54400 |
| }, |
| { |
| "epoch": 17.6147382029735, |
| "grad_norm": 1.2121365070343018, |
| "learning_rate": 0.001, |
| "loss": 1.3145, |
| "step": 54500 |
| }, |
| { |
| "epoch": 17.647058823529413, |
| "grad_norm": 1.1913347244262695, |
| "learning_rate": 0.001, |
| "loss": 1.3027, |
| "step": 54600 |
| }, |
| { |
| "epoch": 17.679379444085328, |
| "grad_norm": 1.4041755199432373, |
| "learning_rate": 0.001, |
| "loss": 1.3157, |
| "step": 54700 |
| }, |
| { |
| "epoch": 17.711700064641242, |
| "grad_norm": 1.5565708875656128, |
| "learning_rate": 0.001, |
| "loss": 1.3094, |
| "step": 54800 |
| }, |
| { |
| "epoch": 17.744020685197157, |
| "grad_norm": 1.6354928016662598, |
| "learning_rate": 0.001, |
| "loss": 1.3378, |
| "step": 54900 |
| }, |
| { |
| "epoch": 17.77634130575307, |
| "grad_norm": 1.461625099182129, |
| "learning_rate": 0.001, |
| "loss": 1.3012, |
| "step": 55000 |
| }, |
| { |
| "epoch": 17.808661926308986, |
| "grad_norm": 1.9901479482650757, |
| "learning_rate": 0.001, |
| "loss": 1.3125, |
| "step": 55100 |
| }, |
| { |
| "epoch": 17.8409825468649, |
| "grad_norm": 1.8469276428222656, |
| "learning_rate": 0.001, |
| "loss": 1.3086, |
| "step": 55200 |
| }, |
| { |
| "epoch": 17.873303167420815, |
| "grad_norm": 1.725543737411499, |
| "learning_rate": 0.001, |
| "loss": 1.351, |
| "step": 55300 |
| }, |
| { |
| "epoch": 17.90562378797673, |
| "grad_norm": 1.799232006072998, |
| "learning_rate": 0.001, |
| "loss": 1.3276, |
| "step": 55400 |
| }, |
| { |
| "epoch": 17.937944408532644, |
| "grad_norm": 1.447696566581726, |
| "learning_rate": 0.001, |
| "loss": 1.3347, |
| "step": 55500 |
| }, |
| { |
| "epoch": 17.97026502908856, |
| "grad_norm": 1.706591010093689, |
| "learning_rate": 0.001, |
| "loss": 1.3436, |
| "step": 55600 |
| }, |
| { |
| "epoch": 18.002585649644473, |
| "grad_norm": 1.0814664363861084, |
| "learning_rate": 0.001, |
| "loss": 1.3459, |
| "step": 55700 |
| }, |
| { |
| "epoch": 18.034906270200388, |
| "grad_norm": 1.1114346981048584, |
| "learning_rate": 0.001, |
| "loss": 1.1934, |
| "step": 55800 |
| }, |
| { |
| "epoch": 18.067226890756302, |
| "grad_norm": 1.211247205734253, |
| "learning_rate": 0.001, |
| "loss": 1.2291, |
| "step": 55900 |
| }, |
| { |
| "epoch": 18.099547511312217, |
| "grad_norm": 1.1112585067749023, |
| "learning_rate": 0.001, |
| "loss": 1.2196, |
| "step": 56000 |
| }, |
| { |
| "epoch": 18.13186813186813, |
| "grad_norm": 1.2427830696105957, |
| "learning_rate": 0.001, |
| "loss": 1.2401, |
| "step": 56100 |
| }, |
| { |
| "epoch": 18.164188752424046, |
| "grad_norm": 1.1477326154708862, |
| "learning_rate": 0.001, |
| "loss": 1.2263, |
| "step": 56200 |
| }, |
| { |
| "epoch": 18.19650937297996, |
| "grad_norm": 1.2850887775421143, |
| "learning_rate": 0.001, |
| "loss": 1.2521, |
| "step": 56300 |
| }, |
| { |
| "epoch": 18.228829993535875, |
| "grad_norm": 1.1225168704986572, |
| "learning_rate": 0.001, |
| "loss": 1.2548, |
| "step": 56400 |
| }, |
| { |
| "epoch": 18.26115061409179, |
| "grad_norm": 1.1634271144866943, |
| "learning_rate": 0.001, |
| "loss": 1.2274, |
| "step": 56500 |
| }, |
| { |
| "epoch": 18.293471234647704, |
| "grad_norm": 1.162834644317627, |
| "learning_rate": 0.001, |
| "loss": 1.2562, |
| "step": 56600 |
| }, |
| { |
| "epoch": 18.32579185520362, |
| "grad_norm": 1.3091973066329956, |
| "learning_rate": 0.001, |
| "loss": 1.2844, |
| "step": 56700 |
| }, |
| { |
| "epoch": 18.358112475759533, |
| "grad_norm": 1.1031546592712402, |
| "learning_rate": 0.001, |
| "loss": 1.2595, |
| "step": 56800 |
| }, |
| { |
| "epoch": 18.390433096315448, |
| "grad_norm": 1.2217282056808472, |
| "learning_rate": 0.001, |
| "loss": 1.2533, |
| "step": 56900 |
| }, |
| { |
| "epoch": 18.422753716871362, |
| "grad_norm": 1.2840886116027832, |
| "learning_rate": 0.001, |
| "loss": 1.2639, |
| "step": 57000 |
| }, |
| { |
| "epoch": 18.455074337427277, |
| "grad_norm": 0.935218870639801, |
| "learning_rate": 0.001, |
| "loss": 1.2536, |
| "step": 57100 |
| }, |
| { |
| "epoch": 18.48739495798319, |
| "grad_norm": 1.423843502998352, |
| "learning_rate": 0.001, |
| "loss": 1.2741, |
| "step": 57200 |
| }, |
| { |
| "epoch": 18.51971557853911, |
| "grad_norm": 1.232347011566162, |
| "learning_rate": 0.001, |
| "loss": 1.2671, |
| "step": 57300 |
| }, |
| { |
| "epoch": 18.552036199095024, |
| "grad_norm": 1.2617510557174683, |
| "learning_rate": 0.001, |
| "loss": 1.2726, |
| "step": 57400 |
| }, |
| { |
| "epoch": 18.58435681965094, |
| "grad_norm": 1.3618507385253906, |
| "learning_rate": 0.001, |
| "loss": 1.2602, |
| "step": 57500 |
| }, |
| { |
| "epoch": 18.616677440206853, |
| "grad_norm": 1.2484835386276245, |
| "learning_rate": 0.001, |
| "loss": 1.2583, |
| "step": 57600 |
| }, |
| { |
| "epoch": 18.648998060762768, |
| "grad_norm": 1.2037937641143799, |
| "learning_rate": 0.001, |
| "loss": 1.2757, |
| "step": 57700 |
| }, |
| { |
| "epoch": 18.681318681318682, |
| "grad_norm": 1.1551131010055542, |
| "learning_rate": 0.001, |
| "loss": 1.2876, |
| "step": 57800 |
| }, |
| { |
| "epoch": 18.713639301874597, |
| "grad_norm": 1.0815056562423706, |
| "learning_rate": 0.001, |
| "loss": 1.3025, |
| "step": 57900 |
| }, |
| { |
| "epoch": 18.74595992243051, |
| "grad_norm": 1.0868639945983887, |
| "learning_rate": 0.001, |
| "loss": 1.3064, |
| "step": 58000 |
| }, |
| { |
| "epoch": 18.778280542986426, |
| "grad_norm": 1.2222222089767456, |
| "learning_rate": 0.001, |
| "loss": 1.2787, |
| "step": 58100 |
| }, |
| { |
| "epoch": 18.81060116354234, |
| "grad_norm": 1.2130047082901, |
| "learning_rate": 0.001, |
| "loss": 1.2716, |
| "step": 58200 |
| }, |
| { |
| "epoch": 18.842921784098255, |
| "grad_norm": 1.2509404420852661, |
| "learning_rate": 0.001, |
| "loss": 1.2875, |
| "step": 58300 |
| }, |
| { |
| "epoch": 18.87524240465417, |
| "grad_norm": 1.3974509239196777, |
| "learning_rate": 0.001, |
| "loss": 1.3006, |
| "step": 58400 |
| }, |
| { |
| "epoch": 18.907563025210084, |
| "grad_norm": 1.0603617429733276, |
| "learning_rate": 0.001, |
| "loss": 1.3321, |
| "step": 58500 |
| }, |
| { |
| "epoch": 18.939883645766, |
| "grad_norm": 1.0816590785980225, |
| "learning_rate": 0.001, |
| "loss": 1.2933, |
| "step": 58600 |
| }, |
| { |
| "epoch": 18.972204266321913, |
| "grad_norm": 1.3309355974197388, |
| "learning_rate": 0.001, |
| "loss": 1.2893, |
| "step": 58700 |
| }, |
| { |
| "epoch": 19.004524886877828, |
| "grad_norm": 1.2768296003341675, |
| "learning_rate": 0.001, |
| "loss": 1.3172, |
| "step": 58800 |
| }, |
| { |
| "epoch": 19.036845507433743, |
| "grad_norm": 1.4616053104400635, |
| "learning_rate": 0.001, |
| "loss": 1.1704, |
| "step": 58900 |
| }, |
| { |
| "epoch": 19.069166127989657, |
| "grad_norm": 1.417331337928772, |
| "learning_rate": 0.001, |
| "loss": 1.1887, |
| "step": 59000 |
| }, |
| { |
| "epoch": 19.10148674854557, |
| "grad_norm": 1.2152198553085327, |
| "learning_rate": 0.001, |
| "loss": 1.1897, |
| "step": 59100 |
| }, |
| { |
| "epoch": 19.133807369101486, |
| "grad_norm": 1.1913012266159058, |
| "learning_rate": 0.001, |
| "loss": 1.2236, |
| "step": 59200 |
| }, |
| { |
| "epoch": 19.1661279896574, |
| "grad_norm": 0.9392966628074646, |
| "learning_rate": 0.001, |
| "loss": 1.226, |
| "step": 59300 |
| }, |
| { |
| "epoch": 19.198448610213315, |
| "grad_norm": 1.0119884014129639, |
| "learning_rate": 0.001, |
| "loss": 1.2228, |
| "step": 59400 |
| }, |
| { |
| "epoch": 19.23076923076923, |
| "grad_norm": 1.276890754699707, |
| "learning_rate": 0.001, |
| "loss": 1.2287, |
| "step": 59500 |
| }, |
| { |
| "epoch": 19.263089851325145, |
| "grad_norm": 1.1096450090408325, |
| "learning_rate": 0.001, |
| "loss": 1.2371, |
| "step": 59600 |
| }, |
| { |
| "epoch": 19.29541047188106, |
| "grad_norm": 1.1702587604522705, |
| "learning_rate": 0.001, |
| "loss": 1.2268, |
| "step": 59700 |
| }, |
| { |
| "epoch": 19.327731092436974, |
| "grad_norm": 1.0416254997253418, |
| "learning_rate": 0.001, |
| "loss": 1.2284, |
| "step": 59800 |
| }, |
| { |
| "epoch": 19.360051712992888, |
| "grad_norm": 1.093361735343933, |
| "learning_rate": 0.001, |
| "loss": 1.2461, |
| "step": 59900 |
| }, |
| { |
| "epoch": 19.392372333548803, |
| "grad_norm": 1.0348377227783203, |
| "learning_rate": 0.001, |
| "loss": 1.2525, |
| "step": 60000 |
| }, |
| { |
| "epoch": 19.424692954104717, |
| "grad_norm": 1.9038110971450806, |
| "learning_rate": 0.001, |
| "loss": 1.2481, |
| "step": 60100 |
| }, |
| { |
| "epoch": 19.457013574660632, |
| "grad_norm": 1.2229783535003662, |
| "learning_rate": 0.001, |
| "loss": 1.2186, |
| "step": 60200 |
| }, |
| { |
| "epoch": 19.489334195216546, |
| "grad_norm": 1.1477720737457275, |
| "learning_rate": 0.001, |
| "loss": 1.2482, |
| "step": 60300 |
| }, |
| { |
| "epoch": 19.521654815772465, |
| "grad_norm": 1.255200743675232, |
| "learning_rate": 0.001, |
| "loss": 1.2552, |
| "step": 60400 |
| }, |
| { |
| "epoch": 19.55397543632838, |
| "grad_norm": 1.0543900728225708, |
| "learning_rate": 0.001, |
| "loss": 1.2373, |
| "step": 60500 |
| }, |
| { |
| "epoch": 19.586296056884294, |
| "grad_norm": 1.2552344799041748, |
| "learning_rate": 0.001, |
| "loss": 1.2511, |
| "step": 60600 |
| }, |
| { |
| "epoch": 19.618616677440208, |
| "grad_norm": 1.2354882955551147, |
| "learning_rate": 0.001, |
| "loss": 1.2509, |
| "step": 60700 |
| }, |
| { |
| "epoch": 19.650937297996123, |
| "grad_norm": 1.1915310621261597, |
| "learning_rate": 0.001, |
| "loss": 1.2541, |
| "step": 60800 |
| }, |
| { |
| "epoch": 19.683257918552037, |
| "grad_norm": 1.3169018030166626, |
| "learning_rate": 0.001, |
| "loss": 1.246, |
| "step": 60900 |
| }, |
| { |
| "epoch": 19.715578539107952, |
| "grad_norm": 1.34446120262146, |
| "learning_rate": 0.001, |
| "loss": 1.2651, |
| "step": 61000 |
| }, |
| { |
| "epoch": 19.747899159663866, |
| "grad_norm": 1.141524076461792, |
| "learning_rate": 0.001, |
| "loss": 1.2607, |
| "step": 61100 |
| }, |
| { |
| "epoch": 19.78021978021978, |
| "grad_norm": 1.2178977727890015, |
| "learning_rate": 0.001, |
| "loss": 1.2887, |
| "step": 61200 |
| }, |
| { |
| "epoch": 19.812540400775696, |
| "grad_norm": 0.9515364766120911, |
| "learning_rate": 0.001, |
| "loss": 1.2445, |
| "step": 61300 |
| }, |
| { |
| "epoch": 19.84486102133161, |
| "grad_norm": 1.2744394540786743, |
| "learning_rate": 0.001, |
| "loss": 1.2612, |
| "step": 61400 |
| }, |
| { |
| "epoch": 19.877181641887525, |
| "grad_norm": 1.0756638050079346, |
| "learning_rate": 0.001, |
| "loss": 1.2727, |
| "step": 61500 |
| }, |
| { |
| "epoch": 19.90950226244344, |
| "grad_norm": 1.0071052312850952, |
| "learning_rate": 0.001, |
| "loss": 1.2548, |
| "step": 61600 |
| }, |
| { |
| "epoch": 19.941822882999354, |
| "grad_norm": 1.349400520324707, |
| "learning_rate": 0.001, |
| "loss": 1.2679, |
| "step": 61700 |
| }, |
| { |
| "epoch": 19.97414350355527, |
| "grad_norm": 3.5351197719573975, |
| "learning_rate": 0.001, |
| "loss": 1.2702, |
| "step": 61800 |
| }, |
| { |
| "epoch": 20.006464124111183, |
| "grad_norm": 1.235960602760315, |
| "learning_rate": 0.001, |
| "loss": 1.2695, |
| "step": 61900 |
| }, |
| { |
| "epoch": 20.038784744667097, |
| "grad_norm": 1.0976554155349731, |
| "learning_rate": 0.001, |
| "loss": 1.1755, |
| "step": 62000 |
| }, |
| { |
| "epoch": 20.071105365223012, |
| "grad_norm": 1.267794132232666, |
| "learning_rate": 0.001, |
| "loss": 1.1877, |
| "step": 62100 |
| }, |
| { |
| "epoch": 20.103425985778927, |
| "grad_norm": 1.089699625968933, |
| "learning_rate": 0.001, |
| "loss": 1.1617, |
| "step": 62200 |
| }, |
| { |
| "epoch": 20.13574660633484, |
| "grad_norm": 1.0829448699951172, |
| "learning_rate": 0.001, |
| "loss": 1.1832, |
| "step": 62300 |
| }, |
| { |
| "epoch": 20.168067226890756, |
| "grad_norm": 1.2309054136276245, |
| "learning_rate": 0.001, |
| "loss": 1.2009, |
| "step": 62400 |
| }, |
| { |
| "epoch": 20.20038784744667, |
| "grad_norm": 1.1939396858215332, |
| "learning_rate": 0.001, |
| "loss": 1.1831, |
| "step": 62500 |
| }, |
| { |
| "epoch": 20.232708468002585, |
| "grad_norm": 1.3018850088119507, |
| "learning_rate": 0.001, |
| "loss": 1.192, |
| "step": 62600 |
| }, |
| { |
| "epoch": 20.2650290885585, |
| "grad_norm": 0.9886336922645569, |
| "learning_rate": 0.001, |
| "loss": 1.2002, |
| "step": 62700 |
| }, |
| { |
| "epoch": 20.297349709114414, |
| "grad_norm": 1.4016402959823608, |
| "learning_rate": 0.001, |
| "loss": 1.2176, |
| "step": 62800 |
| }, |
| { |
| "epoch": 20.32967032967033, |
| "grad_norm": 1.0506421327590942, |
| "learning_rate": 0.001, |
| "loss": 1.2063, |
| "step": 62900 |
| }, |
| { |
| "epoch": 20.361990950226243, |
| "grad_norm": 1.0052201747894287, |
| "learning_rate": 0.001, |
| "loss": 1.2195, |
| "step": 63000 |
| }, |
| { |
| "epoch": 20.394311570782158, |
| "grad_norm": 1.0880728960037231, |
| "learning_rate": 0.001, |
| "loss": 1.2034, |
| "step": 63100 |
| }, |
| { |
| "epoch": 20.426632191338072, |
| "grad_norm": 1.157288670539856, |
| "learning_rate": 0.001, |
| "loss": 1.2042, |
| "step": 63200 |
| }, |
| { |
| "epoch": 20.458952811893987, |
| "grad_norm": 0.956786036491394, |
| "learning_rate": 0.001, |
| "loss": 1.1999, |
| "step": 63300 |
| }, |
| { |
| "epoch": 20.4912734324499, |
| "grad_norm": 1.3962632417678833, |
| "learning_rate": 0.001, |
| "loss": 1.2276, |
| "step": 63400 |
| }, |
| { |
| "epoch": 20.52359405300582, |
| "grad_norm": 1.192475438117981, |
| "learning_rate": 0.001, |
| "loss": 1.196, |
| "step": 63500 |
| }, |
| { |
| "epoch": 20.555914673561734, |
| "grad_norm": 1.085652470588684, |
| "learning_rate": 0.001, |
| "loss": 1.2409, |
| "step": 63600 |
| }, |
| { |
| "epoch": 20.58823529411765, |
| "grad_norm": 1.0996172428131104, |
| "learning_rate": 0.001, |
| "loss": 1.2265, |
| "step": 63700 |
| }, |
| { |
| "epoch": 20.620555914673563, |
| "grad_norm": 1.1674541234970093, |
| "learning_rate": 0.001, |
| "loss": 1.2195, |
| "step": 63800 |
| }, |
| { |
| "epoch": 20.652876535229478, |
| "grad_norm": 1.3376907110214233, |
| "learning_rate": 0.001, |
| "loss": 1.2255, |
| "step": 63900 |
| }, |
| { |
| "epoch": 20.685197155785392, |
| "grad_norm": 1.0617411136627197, |
| "learning_rate": 0.001, |
| "loss": 1.2072, |
| "step": 64000 |
| }, |
| { |
| "epoch": 20.717517776341307, |
| "grad_norm": 0.935806930065155, |
| "learning_rate": 0.001, |
| "loss": 1.2313, |
| "step": 64100 |
| }, |
| { |
| "epoch": 20.74983839689722, |
| "grad_norm": 1.2648018598556519, |
| "learning_rate": 0.001, |
| "loss": 1.2438, |
| "step": 64200 |
| }, |
| { |
| "epoch": 20.782159017453136, |
| "grad_norm": 1.2684612274169922, |
| "learning_rate": 0.001, |
| "loss": 1.2323, |
| "step": 64300 |
| }, |
| { |
| "epoch": 20.81447963800905, |
| "grad_norm": 1.0561871528625488, |
| "learning_rate": 0.001, |
| "loss": 1.2478, |
| "step": 64400 |
| }, |
| { |
| "epoch": 20.846800258564965, |
| "grad_norm": 1.1016511917114258, |
| "learning_rate": 0.001, |
| "loss": 1.2459, |
| "step": 64500 |
| }, |
| { |
| "epoch": 20.87912087912088, |
| "grad_norm": 1.1570508480072021, |
| "learning_rate": 0.001, |
| "loss": 1.272, |
| "step": 64600 |
| }, |
| { |
| "epoch": 20.911441499676794, |
| "grad_norm": 1.2147791385650635, |
| "learning_rate": 0.001, |
| "loss": 1.2439, |
| "step": 64700 |
| }, |
| { |
| "epoch": 20.94376212023271, |
| "grad_norm": 1.2510056495666504, |
| "learning_rate": 0.001, |
| "loss": 1.2652, |
| "step": 64800 |
| }, |
| { |
| "epoch": 20.976082740788623, |
| "grad_norm": 0.9977579116821289, |
| "learning_rate": 0.001, |
| "loss": 1.2608, |
| "step": 64900 |
| }, |
| { |
| "epoch": 21.008403361344538, |
| "grad_norm": 1.4413050413131714, |
| "learning_rate": 0.001, |
| "loss": 1.2153, |
| "step": 65000 |
| }, |
| { |
| "epoch": 21.040723981900452, |
| "grad_norm": 1.1104098558425903, |
| "learning_rate": 0.001, |
| "loss": 1.1585, |
| "step": 65100 |
| }, |
| { |
| "epoch": 21.073044602456367, |
| "grad_norm": 1.1735037565231323, |
| "learning_rate": 0.001, |
| "loss": 1.1601, |
| "step": 65200 |
| }, |
| { |
| "epoch": 21.10536522301228, |
| "grad_norm": 1.2895740270614624, |
| "learning_rate": 0.001, |
| "loss": 1.1557, |
| "step": 65300 |
| }, |
| { |
| "epoch": 21.137685843568196, |
| "grad_norm": 1.7517699003219604, |
| "learning_rate": 0.001, |
| "loss": 1.1728, |
| "step": 65400 |
| }, |
| { |
| "epoch": 21.17000646412411, |
| "grad_norm": 1.2219549417495728, |
| "learning_rate": 0.001, |
| "loss": 1.1527, |
| "step": 65500 |
| }, |
| { |
| "epoch": 21.202327084680025, |
| "grad_norm": 1.2364577054977417, |
| "learning_rate": 0.001, |
| "loss": 1.1771, |
| "step": 65600 |
| }, |
| { |
| "epoch": 21.23464770523594, |
| "grad_norm": 1.2639278173446655, |
| "learning_rate": 0.001, |
| "loss": 1.1716, |
| "step": 65700 |
| }, |
| { |
| "epoch": 21.266968325791854, |
| "grad_norm": 1.2081694602966309, |
| "learning_rate": 0.001, |
| "loss": 1.1708, |
| "step": 65800 |
| }, |
| { |
| "epoch": 21.29928894634777, |
| "grad_norm": 1.219407558441162, |
| "learning_rate": 0.001, |
| "loss": 1.1861, |
| "step": 65900 |
| }, |
| { |
| "epoch": 21.331609566903683, |
| "grad_norm": 1.067844271659851, |
| "learning_rate": 0.001, |
| "loss": 1.1979, |
| "step": 66000 |
| }, |
| { |
| "epoch": 21.363930187459598, |
| "grad_norm": 1.2782212495803833, |
| "learning_rate": 0.001, |
| "loss": 1.1796, |
| "step": 66100 |
| }, |
| { |
| "epoch": 21.396250808015512, |
| "grad_norm": 1.3282757997512817, |
| "learning_rate": 0.001, |
| "loss": 1.186, |
| "step": 66200 |
| }, |
| { |
| "epoch": 21.428571428571427, |
| "grad_norm": 1.018190860748291, |
| "learning_rate": 0.001, |
| "loss": 1.242, |
| "step": 66300 |
| }, |
| { |
| "epoch": 21.46089204912734, |
| "grad_norm": 1.1868315935134888, |
| "learning_rate": 0.001, |
| "loss": 1.2631, |
| "step": 66400 |
| }, |
| { |
| "epoch": 21.49321266968326, |
| "grad_norm": 1.3459444046020508, |
| "learning_rate": 0.001, |
| "loss": 1.2075, |
| "step": 66500 |
| }, |
| { |
| "epoch": 21.525533290239174, |
| "grad_norm": 0.9880927801132202, |
| "learning_rate": 0.001, |
| "loss": 1.1989, |
| "step": 66600 |
| }, |
| { |
| "epoch": 21.55785391079509, |
| "grad_norm": 1.5468779802322388, |
| "learning_rate": 0.001, |
| "loss": 1.2444, |
| "step": 66700 |
| }, |
| { |
| "epoch": 21.590174531351003, |
| "grad_norm": 0.9835416674613953, |
| "learning_rate": 0.001, |
| "loss": 1.2407, |
| "step": 66800 |
| }, |
| { |
| "epoch": 21.622495151906918, |
| "grad_norm": 1.1654820442199707, |
| "learning_rate": 0.001, |
| "loss": 1.2372, |
| "step": 66900 |
| }, |
| { |
| "epoch": 21.654815772462833, |
| "grad_norm": 1.2552804946899414, |
| "learning_rate": 0.001, |
| "loss": 1.2291, |
| "step": 67000 |
| }, |
| { |
| "epoch": 21.687136393018747, |
| "grad_norm": 3.475341796875, |
| "learning_rate": 0.001, |
| "loss": 1.2355, |
| "step": 67100 |
| }, |
| { |
| "epoch": 21.71945701357466, |
| "grad_norm": 1.0410614013671875, |
| "learning_rate": 0.001, |
| "loss": 1.234, |
| "step": 67200 |
| }, |
| { |
| "epoch": 21.751777634130576, |
| "grad_norm": 1.168955683708191, |
| "learning_rate": 0.001, |
| "loss": 1.2406, |
| "step": 67300 |
| }, |
| { |
| "epoch": 21.78409825468649, |
| "grad_norm": 1.7832200527191162, |
| "learning_rate": 0.001, |
| "loss": 1.2689, |
| "step": 67400 |
| }, |
| { |
| "epoch": 21.816418875242405, |
| "grad_norm": 1.3156075477600098, |
| "learning_rate": 0.001, |
| "loss": 1.2598, |
| "step": 67500 |
| }, |
| { |
| "epoch": 21.84873949579832, |
| "grad_norm": 1.005856990814209, |
| "learning_rate": 0.001, |
| "loss": 1.2477, |
| "step": 67600 |
| }, |
| { |
| "epoch": 21.881060116354234, |
| "grad_norm": 0.9797137379646301, |
| "learning_rate": 0.001, |
| "loss": 1.2534, |
| "step": 67700 |
| }, |
| { |
| "epoch": 21.91338073691015, |
| "grad_norm": 0.9590547680854797, |
| "learning_rate": 0.001, |
| "loss": 1.2328, |
| "step": 67800 |
| }, |
| { |
| "epoch": 21.945701357466064, |
| "grad_norm": 1.1797035932540894, |
| "learning_rate": 0.001, |
| "loss": 1.2347, |
| "step": 67900 |
| }, |
| { |
| "epoch": 21.978021978021978, |
| "grad_norm": 1.065169334411621, |
| "learning_rate": 0.001, |
| "loss": 1.2465, |
| "step": 68000 |
| }, |
| { |
| "epoch": 22.010342598577893, |
| "grad_norm": 1.0149396657943726, |
| "learning_rate": 0.001, |
| "loss": 1.1883, |
| "step": 68100 |
| }, |
| { |
| "epoch": 22.042663219133807, |
| "grad_norm": 1.1508020162582397, |
| "learning_rate": 0.001, |
| "loss": 1.156, |
| "step": 68200 |
| }, |
| { |
| "epoch": 22.07498383968972, |
| "grad_norm": 3.199270725250244, |
| "learning_rate": 0.001, |
| "loss": 1.1385, |
| "step": 68300 |
| }, |
| { |
| "epoch": 22.107304460245636, |
| "grad_norm": 1.0568350553512573, |
| "learning_rate": 0.001, |
| "loss": 1.1544, |
| "step": 68400 |
| }, |
| { |
| "epoch": 22.13962508080155, |
| "grad_norm": 5.397552013397217, |
| "learning_rate": 0.001, |
| "loss": 1.149, |
| "step": 68500 |
| }, |
| { |
| "epoch": 22.171945701357465, |
| "grad_norm": 1.1484830379486084, |
| "learning_rate": 0.001, |
| "loss": 1.1669, |
| "step": 68600 |
| }, |
| { |
| "epoch": 22.20426632191338, |
| "grad_norm": 1.3436305522918701, |
| "learning_rate": 0.001, |
| "loss": 1.1669, |
| "step": 68700 |
| }, |
| { |
| "epoch": 22.236586942469295, |
| "grad_norm": 1.3443758487701416, |
| "learning_rate": 0.001, |
| "loss": 1.1538, |
| "step": 68800 |
| }, |
| { |
| "epoch": 22.26890756302521, |
| "grad_norm": 1.4839012622833252, |
| "learning_rate": 0.001, |
| "loss": 1.1557, |
| "step": 68900 |
| }, |
| { |
| "epoch": 22.301228183581124, |
| "grad_norm": 1.2252734899520874, |
| "learning_rate": 0.001, |
| "loss": 1.1756, |
| "step": 69000 |
| }, |
| { |
| "epoch": 22.33354880413704, |
| "grad_norm": 1.0860698223114014, |
| "learning_rate": 0.001, |
| "loss": 1.161, |
| "step": 69100 |
| }, |
| { |
| "epoch": 22.365869424692953, |
| "grad_norm": 0.9651658535003662, |
| "learning_rate": 0.001, |
| "loss": 1.2006, |
| "step": 69200 |
| }, |
| { |
| "epoch": 22.398190045248867, |
| "grad_norm": 1.0372250080108643, |
| "learning_rate": 0.001, |
| "loss": 1.1829, |
| "step": 69300 |
| }, |
| { |
| "epoch": 22.430510665804782, |
| "grad_norm": 1.0921285152435303, |
| "learning_rate": 0.001, |
| "loss": 1.1988, |
| "step": 69400 |
| }, |
| { |
| "epoch": 22.462831286360696, |
| "grad_norm": 1.0957082509994507, |
| "learning_rate": 0.001, |
| "loss": 1.1702, |
| "step": 69500 |
| }, |
| { |
| "epoch": 22.49515190691661, |
| "grad_norm": 0.9881604909896851, |
| "learning_rate": 0.001, |
| "loss": 1.1969, |
| "step": 69600 |
| }, |
| { |
| "epoch": 22.52747252747253, |
| "grad_norm": 1.0762852430343628, |
| "learning_rate": 0.001, |
| "loss": 1.1866, |
| "step": 69700 |
| }, |
| { |
| "epoch": 22.559793148028444, |
| "grad_norm": 1.1281818151474, |
| "learning_rate": 0.001, |
| "loss": 1.1812, |
| "step": 69800 |
| }, |
| { |
| "epoch": 22.59211376858436, |
| "grad_norm": 1.1947038173675537, |
| "learning_rate": 0.001, |
| "loss": 1.1999, |
| "step": 69900 |
| }, |
| { |
| "epoch": 22.624434389140273, |
| "grad_norm": 1.0397061109542847, |
| "learning_rate": 0.001, |
| "loss": 1.1993, |
| "step": 70000 |
| }, |
| { |
| "epoch": 22.656755009696187, |
| "grad_norm": 0.9127522110939026, |
| "learning_rate": 0.001, |
| "loss": 1.1945, |
| "step": 70100 |
| }, |
| { |
| "epoch": 22.689075630252102, |
| "grad_norm": 1.0395044088363647, |
| "learning_rate": 0.001, |
| "loss": 1.2217, |
| "step": 70200 |
| }, |
| { |
| "epoch": 22.721396250808017, |
| "grad_norm": 1.1878328323364258, |
| "learning_rate": 0.001, |
| "loss": 1.211, |
| "step": 70300 |
| }, |
| { |
| "epoch": 22.75371687136393, |
| "grad_norm": 1.0908920764923096, |
| "learning_rate": 0.001, |
| "loss": 1.1992, |
| "step": 70400 |
| }, |
| { |
| "epoch": 22.786037491919846, |
| "grad_norm": 1.1181707382202148, |
| "learning_rate": 0.001, |
| "loss": 1.1994, |
| "step": 70500 |
| }, |
| { |
| "epoch": 22.81835811247576, |
| "grad_norm": 0.9277530312538147, |
| "learning_rate": 0.001, |
| "loss": 1.2147, |
| "step": 70600 |
| }, |
| { |
| "epoch": 22.850678733031675, |
| "grad_norm": 1.4166144132614136, |
| "learning_rate": 0.001, |
| "loss": 1.1931, |
| "step": 70700 |
| }, |
| { |
| "epoch": 22.88299935358759, |
| "grad_norm": 1.0426658391952515, |
| "learning_rate": 0.001, |
| "loss": 1.2126, |
| "step": 70800 |
| }, |
| { |
| "epoch": 22.915319974143504, |
| "grad_norm": 1.1179332733154297, |
| "learning_rate": 0.001, |
| "loss": 1.2002, |
| "step": 70900 |
| }, |
| { |
| "epoch": 22.94764059469942, |
| "grad_norm": 1.1418015956878662, |
| "learning_rate": 0.001, |
| "loss": 1.206, |
| "step": 71000 |
| }, |
| { |
| "epoch": 22.979961215255333, |
| "grad_norm": 1.1119362115859985, |
| "learning_rate": 0.001, |
| "loss": 1.2136, |
| "step": 71100 |
| }, |
| { |
| "epoch": 23.012281835811248, |
| "grad_norm": 1.1023640632629395, |
| "learning_rate": 0.001, |
| "loss": 1.133, |
| "step": 71200 |
| }, |
| { |
| "epoch": 23.044602456367162, |
| "grad_norm": 1.1139122247695923, |
| "learning_rate": 0.001, |
| "loss": 1.1339, |
| "step": 71300 |
| }, |
| { |
| "epoch": 23.076923076923077, |
| "grad_norm": 1.0510680675506592, |
| "learning_rate": 0.001, |
| "loss": 1.0907, |
| "step": 71400 |
| }, |
| { |
| "epoch": 23.10924369747899, |
| "grad_norm": 1.1501266956329346, |
| "learning_rate": 0.001, |
| "loss": 1.1127, |
| "step": 71500 |
| }, |
| { |
| "epoch": 23.141564318034906, |
| "grad_norm": 1.1204190254211426, |
| "learning_rate": 0.001, |
| "loss": 1.1262, |
| "step": 71600 |
| }, |
| { |
| "epoch": 23.17388493859082, |
| "grad_norm": 1.0314933061599731, |
| "learning_rate": 0.001, |
| "loss": 1.1214, |
| "step": 71700 |
| }, |
| { |
| "epoch": 23.206205559146735, |
| "grad_norm": 1.4279398918151855, |
| "learning_rate": 0.001, |
| "loss": 1.1146, |
| "step": 71800 |
| }, |
| { |
| "epoch": 23.23852617970265, |
| "grad_norm": 0.9070366621017456, |
| "learning_rate": 0.001, |
| "loss": 1.1344, |
| "step": 71900 |
| }, |
| { |
| "epoch": 23.270846800258564, |
| "grad_norm": 1.1681766510009766, |
| "learning_rate": 0.001, |
| "loss": 1.1446, |
| "step": 72000 |
| }, |
| { |
| "epoch": 23.30316742081448, |
| "grad_norm": 1.0091142654418945, |
| "learning_rate": 0.001, |
| "loss": 1.1534, |
| "step": 72100 |
| }, |
| { |
| "epoch": 23.335488041370393, |
| "grad_norm": 1.163992166519165, |
| "learning_rate": 0.001, |
| "loss": 1.1348, |
| "step": 72200 |
| }, |
| { |
| "epoch": 23.367808661926308, |
| "grad_norm": 1.4471473693847656, |
| "learning_rate": 0.001, |
| "loss": 1.1372, |
| "step": 72300 |
| }, |
| { |
| "epoch": 23.400129282482222, |
| "grad_norm": 1.072084665298462, |
| "learning_rate": 0.001, |
| "loss": 1.1513, |
| "step": 72400 |
| }, |
| { |
| "epoch": 23.432449903038137, |
| "grad_norm": 1.074723720550537, |
| "learning_rate": 0.001, |
| "loss": 1.1345, |
| "step": 72500 |
| }, |
| { |
| "epoch": 23.46477052359405, |
| "grad_norm": 1.3358465433120728, |
| "learning_rate": 0.001, |
| "loss": 1.148, |
| "step": 72600 |
| }, |
| { |
| "epoch": 23.49709114414997, |
| "grad_norm": 1.1754719018936157, |
| "learning_rate": 0.001, |
| "loss": 1.1531, |
| "step": 72700 |
| }, |
| { |
| "epoch": 23.529411764705884, |
| "grad_norm": 1.1433322429656982, |
| "learning_rate": 0.001, |
| "loss": 1.1684, |
| "step": 72800 |
| }, |
| { |
| "epoch": 23.5617323852618, |
| "grad_norm": 1.201206922531128, |
| "learning_rate": 0.001, |
| "loss": 1.1648, |
| "step": 72900 |
| }, |
| { |
| "epoch": 23.594053005817713, |
| "grad_norm": 1.3061528205871582, |
| "learning_rate": 0.001, |
| "loss": 1.177, |
| "step": 73000 |
| }, |
| { |
| "epoch": 23.626373626373628, |
| "grad_norm": 1.055266261100769, |
| "learning_rate": 0.001, |
| "loss": 1.1715, |
| "step": 73100 |
| }, |
| { |
| "epoch": 23.658694246929542, |
| "grad_norm": 1.279536247253418, |
| "learning_rate": 0.001, |
| "loss": 1.1822, |
| "step": 73200 |
| }, |
| { |
| "epoch": 23.691014867485457, |
| "grad_norm": 1.0013713836669922, |
| "learning_rate": 0.001, |
| "loss": 1.182, |
| "step": 73300 |
| }, |
| { |
| "epoch": 23.72333548804137, |
| "grad_norm": 1.2121005058288574, |
| "learning_rate": 0.001, |
| "loss": 1.1937, |
| "step": 73400 |
| }, |
| { |
| "epoch": 23.755656108597286, |
| "grad_norm": 1.1226108074188232, |
| "learning_rate": 0.001, |
| "loss": 1.1872, |
| "step": 73500 |
| }, |
| { |
| "epoch": 23.7879767291532, |
| "grad_norm": 1.16231369972229, |
| "learning_rate": 0.001, |
| "loss": 1.2158, |
| "step": 73600 |
| }, |
| { |
| "epoch": 23.820297349709115, |
| "grad_norm": 0.9862212538719177, |
| "learning_rate": 0.001, |
| "loss": 1.1814, |
| "step": 73700 |
| }, |
| { |
| "epoch": 23.85261797026503, |
| "grad_norm": 1.0801526308059692, |
| "learning_rate": 0.001, |
| "loss": 1.1786, |
| "step": 73800 |
| }, |
| { |
| "epoch": 23.884938590820944, |
| "grad_norm": 1.2229857444763184, |
| "learning_rate": 0.001, |
| "loss": 1.1822, |
| "step": 73900 |
| }, |
| { |
| "epoch": 23.91725921137686, |
| "grad_norm": 1.4451088905334473, |
| "learning_rate": 0.001, |
| "loss": 1.1959, |
| "step": 74000 |
| }, |
| { |
| "epoch": 23.949579831932773, |
| "grad_norm": 0.986847460269928, |
| "learning_rate": 0.001, |
| "loss": 1.1885, |
| "step": 74100 |
| }, |
| { |
| "epoch": 23.981900452488688, |
| "grad_norm": 1.1042050123214722, |
| "learning_rate": 0.001, |
| "loss": 1.1995, |
| "step": 74200 |
| }, |
| { |
| "epoch": 24.014221073044602, |
| "grad_norm": 0.8930547833442688, |
| "learning_rate": 0.001, |
| "loss": 1.1226, |
| "step": 74300 |
| }, |
| { |
| "epoch": 24.046541693600517, |
| "grad_norm": 1.0590393543243408, |
| "learning_rate": 0.001, |
| "loss": 1.0869, |
| "step": 74400 |
| }, |
| { |
| "epoch": 24.07886231415643, |
| "grad_norm": 1.775429368019104, |
| "learning_rate": 0.001, |
| "loss": 1.0912, |
| "step": 74500 |
| }, |
| { |
| "epoch": 24.111182934712346, |
| "grad_norm": 1.147539734840393, |
| "learning_rate": 0.001, |
| "loss": 1.088, |
| "step": 74600 |
| }, |
| { |
| "epoch": 24.14350355526826, |
| "grad_norm": 0.8840130567550659, |
| "learning_rate": 0.001, |
| "loss": 1.1231, |
| "step": 74700 |
| }, |
| { |
| "epoch": 24.175824175824175, |
| "grad_norm": 1.4045928716659546, |
| "learning_rate": 0.001, |
| "loss": 1.1347, |
| "step": 74800 |
| }, |
| { |
| "epoch": 24.20814479638009, |
| "grad_norm": 1.090484857559204, |
| "learning_rate": 0.001, |
| "loss": 1.0942, |
| "step": 74900 |
| }, |
| { |
| "epoch": 24.240465416936004, |
| "grad_norm": 1.1950263977050781, |
| "learning_rate": 0.001, |
| "loss": 1.1123, |
| "step": 75000 |
| }, |
| { |
| "epoch": 24.27278603749192, |
| "grad_norm": 2.7916829586029053, |
| "learning_rate": 0.001, |
| "loss": 1.113, |
| "step": 75100 |
| }, |
| { |
| "epoch": 24.305106658047833, |
| "grad_norm": 1.1619802713394165, |
| "learning_rate": 0.001, |
| "loss": 1.1258, |
| "step": 75200 |
| }, |
| { |
| "epoch": 24.337427278603748, |
| "grad_norm": 0.9073593616485596, |
| "learning_rate": 0.001, |
| "loss": 1.1345, |
| "step": 75300 |
| }, |
| { |
| "epoch": 24.369747899159663, |
| "grad_norm": 0.8575296401977539, |
| "learning_rate": 0.001, |
| "loss": 1.1108, |
| "step": 75400 |
| }, |
| { |
| "epoch": 24.402068519715577, |
| "grad_norm": 1.386518955230713, |
| "learning_rate": 0.001, |
| "loss": 1.1076, |
| "step": 75500 |
| }, |
| { |
| "epoch": 24.43438914027149, |
| "grad_norm": 76.49897003173828, |
| "learning_rate": 0.001, |
| "loss": 1.1144, |
| "step": 75600 |
| }, |
| { |
| "epoch": 24.466709760827406, |
| "grad_norm": 0.9959169626235962, |
| "learning_rate": 0.001, |
| "loss": 1.1121, |
| "step": 75700 |
| }, |
| { |
| "epoch": 24.49903038138332, |
| "grad_norm": 0.896315336227417, |
| "learning_rate": 0.001, |
| "loss": 1.1449, |
| "step": 75800 |
| }, |
| { |
| "epoch": 24.53135100193924, |
| "grad_norm": 0.8647759556770325, |
| "learning_rate": 0.001, |
| "loss": 1.1313, |
| "step": 75900 |
| }, |
| { |
| "epoch": 24.563671622495153, |
| "grad_norm": 1.080623984336853, |
| "learning_rate": 0.001, |
| "loss": 1.1644, |
| "step": 76000 |
| }, |
| { |
| "epoch": 24.595992243051068, |
| "grad_norm": 1.1319677829742432, |
| "learning_rate": 0.001, |
| "loss": 1.1349, |
| "step": 76100 |
| }, |
| { |
| "epoch": 24.628312863606983, |
| "grad_norm": 1.0553691387176514, |
| "learning_rate": 0.001, |
| "loss": 1.1447, |
| "step": 76200 |
| }, |
| { |
| "epoch": 24.660633484162897, |
| "grad_norm": 1.051260232925415, |
| "learning_rate": 0.001, |
| "loss": 1.1257, |
| "step": 76300 |
| }, |
| { |
| "epoch": 24.69295410471881, |
| "grad_norm": 1.0142831802368164, |
| "learning_rate": 0.001, |
| "loss": 1.161, |
| "step": 76400 |
| }, |
| { |
| "epoch": 24.725274725274726, |
| "grad_norm": 1.0020849704742432, |
| "learning_rate": 0.001, |
| "loss": 1.1579, |
| "step": 76500 |
| }, |
| { |
| "epoch": 24.75759534583064, |
| "grad_norm": 0.996508002281189, |
| "learning_rate": 0.001, |
| "loss": 1.1621, |
| "step": 76600 |
| }, |
| { |
| "epoch": 24.789915966386555, |
| "grad_norm": 1.1945658922195435, |
| "learning_rate": 0.001, |
| "loss": 1.1346, |
| "step": 76700 |
| }, |
| { |
| "epoch": 24.82223658694247, |
| "grad_norm": 0.9318341612815857, |
| "learning_rate": 0.001, |
| "loss": 1.1534, |
| "step": 76800 |
| }, |
| { |
| "epoch": 24.854557207498384, |
| "grad_norm": 1.0282987356185913, |
| "learning_rate": 0.001, |
| "loss": 1.1679, |
| "step": 76900 |
| }, |
| { |
| "epoch": 24.8868778280543, |
| "grad_norm": 0.9928200840950012, |
| "learning_rate": 0.001, |
| "loss": 1.1851, |
| "step": 77000 |
| }, |
| { |
| "epoch": 24.919198448610214, |
| "grad_norm": 1.1055632829666138, |
| "learning_rate": 0.001, |
| "loss": 1.1779, |
| "step": 77100 |
| }, |
| { |
| "epoch": 24.951519069166128, |
| "grad_norm": 0.9744527339935303, |
| "learning_rate": 0.001, |
| "loss": 1.1775, |
| "step": 77200 |
| }, |
| { |
| "epoch": 24.983839689722043, |
| "grad_norm": 1.0894049406051636, |
| "learning_rate": 0.001, |
| "loss": 1.1786, |
| "step": 77300 |
| }, |
| { |
| "epoch": 25.016160310277957, |
| "grad_norm": 0.2896379232406616, |
| "learning_rate": 0.001, |
| "loss": 1.0473, |
| "step": 77400 |
| }, |
| { |
| "epoch": 25.048480930833872, |
| "grad_norm": 0.6147540211677551, |
| "learning_rate": 0.001, |
| "loss": 1.1036, |
| "step": 77500 |
| }, |
| { |
| "epoch": 25.080801551389786, |
| "grad_norm": 0.5519897937774658, |
| "learning_rate": 0.001, |
| "loss": 1.0645, |
| "step": 77600 |
| }, |
| { |
| "epoch": 25.1131221719457, |
| "grad_norm": 0.30935660004615784, |
| "learning_rate": 0.001, |
| "loss": 1.0736, |
| "step": 77700 |
| }, |
| { |
| "epoch": 25.145442792501616, |
| "grad_norm": 1.2920175790786743, |
| "learning_rate": 0.001, |
| "loss": 1.0879, |
| "step": 77800 |
| }, |
| { |
| "epoch": 25.17776341305753, |
| "grad_norm": 0.775704562664032, |
| "learning_rate": 0.001, |
| "loss": 1.106, |
| "step": 77900 |
| }, |
| { |
| "epoch": 25.210084033613445, |
| "grad_norm": 0.5069411993026733, |
| "learning_rate": 0.001, |
| "loss": 1.096, |
| "step": 78000 |
| }, |
| { |
| "epoch": 25.24240465416936, |
| "grad_norm": 1.0527359247207642, |
| "learning_rate": 0.001, |
| "loss": 1.0937, |
| "step": 78100 |
| }, |
| { |
| "epoch": 25.274725274725274, |
| "grad_norm": 1.1386781930923462, |
| "learning_rate": 0.001, |
| "loss": 1.0713, |
| "step": 78200 |
| }, |
| { |
| "epoch": 25.30704589528119, |
| "grad_norm": 0.48567479848861694, |
| "learning_rate": 0.001, |
| "loss": 1.1076, |
| "step": 78300 |
| }, |
| { |
| "epoch": 25.339366515837103, |
| "grad_norm": 0.3815343379974365, |
| "learning_rate": 0.001, |
| "loss": 1.1034, |
| "step": 78400 |
| }, |
| { |
| "epoch": 25.371687136393017, |
| "grad_norm": 0.11455032974481583, |
| "learning_rate": 0.001, |
| "loss": 1.1048, |
| "step": 78500 |
| }, |
| { |
| "epoch": 25.404007756948932, |
| "grad_norm": 0.6132304072380066, |
| "learning_rate": 0.001, |
| "loss": 1.1235, |
| "step": 78600 |
| }, |
| { |
| "epoch": 25.436328377504847, |
| "grad_norm": 0.564409613609314, |
| "learning_rate": 0.001, |
| "loss": 1.1046, |
| "step": 78700 |
| }, |
| { |
| "epoch": 25.46864899806076, |
| "grad_norm": 0.3146139085292816, |
| "learning_rate": 0.001, |
| "loss": 1.1204, |
| "step": 78800 |
| }, |
| { |
| "epoch": 25.50096961861668, |
| "grad_norm": 0.21083228290081024, |
| "learning_rate": 0.001, |
| "loss": 1.0924, |
| "step": 78900 |
| }, |
| { |
| "epoch": 25.533290239172594, |
| "grad_norm": 0.9692633152008057, |
| "learning_rate": 0.001, |
| "loss": 1.1529, |
| "step": 79000 |
| }, |
| { |
| "epoch": 25.56561085972851, |
| "grad_norm": 0.813957929611206, |
| "learning_rate": 0.001, |
| "loss": 1.1256, |
| "step": 79100 |
| }, |
| { |
| "epoch": 25.597931480284423, |
| "grad_norm": 1.2155555486679077, |
| "learning_rate": 0.001, |
| "loss": 1.1369, |
| "step": 79200 |
| }, |
| { |
| "epoch": 25.630252100840337, |
| "grad_norm": 0.21164648234844208, |
| "learning_rate": 0.001, |
| "loss": 1.1224, |
| "step": 79300 |
| }, |
| { |
| "epoch": 25.662572721396252, |
| "grad_norm": 1.0323749780654907, |
| "learning_rate": 0.001, |
| "loss": 1.1336, |
| "step": 79400 |
| }, |
| { |
| "epoch": 25.694893341952167, |
| "grad_norm": 0.2119108885526657, |
| "learning_rate": 0.001, |
| "loss": 1.1362, |
| "step": 79500 |
| }, |
| { |
| "epoch": 25.72721396250808, |
| "grad_norm": 0.8170724511146545, |
| "learning_rate": 0.001, |
| "loss": 1.125, |
| "step": 79600 |
| }, |
| { |
| "epoch": 25.759534583063996, |
| "grad_norm": 0.5977357625961304, |
| "learning_rate": 0.001, |
| "loss": 1.1174, |
| "step": 79700 |
| }, |
| { |
| "epoch": 25.79185520361991, |
| "grad_norm": 0.6432121396064758, |
| "learning_rate": 0.001, |
| "loss": 1.1451, |
| "step": 79800 |
| }, |
| { |
| "epoch": 25.824175824175825, |
| "grad_norm": 0.4329550862312317, |
| "learning_rate": 0.001, |
| "loss": 1.1333, |
| "step": 79900 |
| }, |
| { |
| "epoch": 25.85649644473174, |
| "grad_norm": 0.66637122631073, |
| "learning_rate": 0.001, |
| "loss": 1.1565, |
| "step": 80000 |
| }, |
| { |
| "epoch": 25.888817065287654, |
| "grad_norm": 0.4904533922672272, |
| "learning_rate": 0.001, |
| "loss": 1.1281, |
| "step": 80100 |
| }, |
| { |
| "epoch": 25.92113768584357, |
| "grad_norm": 0.5748351812362671, |
| "learning_rate": 0.001, |
| "loss": 1.1416, |
| "step": 80200 |
| }, |
| { |
| "epoch": 25.953458306399483, |
| "grad_norm": 0.5132641792297363, |
| "learning_rate": 0.001, |
| "loss": 1.1453, |
| "step": 80300 |
| }, |
| { |
| "epoch": 25.985778926955398, |
| "grad_norm": 0.8233006596565247, |
| "learning_rate": 0.001, |
| "loss": 1.142, |
| "step": 80400 |
| }, |
| { |
| "epoch": 26.018099547511312, |
| "grad_norm": 1.450419306755066, |
| "learning_rate": 0.001, |
| "loss": 1.1022, |
| "step": 80500 |
| }, |
| { |
| "epoch": 26.050420168067227, |
| "grad_norm": 0.9929932951927185, |
| "learning_rate": 0.001, |
| "loss": 1.0498, |
| "step": 80600 |
| }, |
| { |
| "epoch": 26.08274078862314, |
| "grad_norm": 1.1767561435699463, |
| "learning_rate": 0.001, |
| "loss": 1.0544, |
| "step": 80700 |
| }, |
| { |
| "epoch": 26.115061409179056, |
| "grad_norm": 1.4456899166107178, |
| "learning_rate": 0.001, |
| "loss": 1.0704, |
| "step": 80800 |
| }, |
| { |
| "epoch": 26.14738202973497, |
| "grad_norm": 1.3941863775253296, |
| "learning_rate": 0.001, |
| "loss": 1.0555, |
| "step": 80900 |
| }, |
| { |
| "epoch": 26.179702650290885, |
| "grad_norm": 1.3099164962768555, |
| "learning_rate": 0.001, |
| "loss": 1.0503, |
| "step": 81000 |
| }, |
| { |
| "epoch": 26.2120232708468, |
| "grad_norm": 1.0749870538711548, |
| "learning_rate": 0.001, |
| "loss": 1.0674, |
| "step": 81100 |
| }, |
| { |
| "epoch": 26.244343891402714, |
| "grad_norm": 1.2630252838134766, |
| "learning_rate": 0.001, |
| "loss": 1.069, |
| "step": 81200 |
| }, |
| { |
| "epoch": 26.27666451195863, |
| "grad_norm": 1.1559174060821533, |
| "learning_rate": 0.001, |
| "loss": 1.0838, |
| "step": 81300 |
| }, |
| { |
| "epoch": 26.308985132514543, |
| "grad_norm": 1.3933382034301758, |
| "learning_rate": 0.001, |
| "loss": 1.0832, |
| "step": 81400 |
| }, |
| { |
| "epoch": 26.341305753070458, |
| "grad_norm": 1.12375807762146, |
| "learning_rate": 0.001, |
| "loss": 1.1035, |
| "step": 81500 |
| }, |
| { |
| "epoch": 26.373626373626372, |
| "grad_norm": 1.1551575660705566, |
| "learning_rate": 0.001, |
| "loss": 1.0778, |
| "step": 81600 |
| }, |
| { |
| "epoch": 26.405946994182287, |
| "grad_norm": 1.2731724977493286, |
| "learning_rate": 0.001, |
| "loss": 1.0918, |
| "step": 81700 |
| }, |
| { |
| "epoch": 26.4382676147382, |
| "grad_norm": 1.1205143928527832, |
| "learning_rate": 0.001, |
| "loss": 1.0749, |
| "step": 81800 |
| }, |
| { |
| "epoch": 26.470588235294116, |
| "grad_norm": 1.176575779914856, |
| "learning_rate": 0.001, |
| "loss": 1.079, |
| "step": 81900 |
| }, |
| { |
| "epoch": 26.50290885585003, |
| "grad_norm": 1.095289707183838, |
| "learning_rate": 0.001, |
| "loss": 1.0939, |
| "step": 82000 |
| }, |
| { |
| "epoch": 26.53522947640595, |
| "grad_norm": 1.2680126428604126, |
| "learning_rate": 0.001, |
| "loss": 1.0829, |
| "step": 82100 |
| }, |
| { |
| "epoch": 26.567550096961863, |
| "grad_norm": 1.43263578414917, |
| "learning_rate": 0.001, |
| "loss": 1.101, |
| "step": 82200 |
| }, |
| { |
| "epoch": 26.599870717517778, |
| "grad_norm": 1.2441238164901733, |
| "learning_rate": 0.001, |
| "loss": 1.107, |
| "step": 82300 |
| }, |
| { |
| "epoch": 26.632191338073692, |
| "grad_norm": 1.180927038192749, |
| "learning_rate": 0.001, |
| "loss": 1.0939, |
| "step": 82400 |
| }, |
| { |
| "epoch": 26.664511958629607, |
| "grad_norm": 1.0788034200668335, |
| "learning_rate": 0.001, |
| "loss": 1.1133, |
| "step": 82500 |
| }, |
| { |
| "epoch": 26.69683257918552, |
| "grad_norm": 1.4446241855621338, |
| "learning_rate": 0.001, |
| "loss": 1.1139, |
| "step": 82600 |
| }, |
| { |
| "epoch": 26.729153199741436, |
| "grad_norm": 1.2069514989852905, |
| "learning_rate": 0.001, |
| "loss": 1.1228, |
| "step": 82700 |
| }, |
| { |
| "epoch": 26.76147382029735, |
| "grad_norm": 1.1585434675216675, |
| "learning_rate": 0.001, |
| "loss": 1.0998, |
| "step": 82800 |
| }, |
| { |
| "epoch": 26.793794440853265, |
| "grad_norm": 1.341698169708252, |
| "learning_rate": 0.001, |
| "loss": 1.1213, |
| "step": 82900 |
| }, |
| { |
| "epoch": 26.82611506140918, |
| "grad_norm": 1.0288625955581665, |
| "learning_rate": 0.001, |
| "loss": 1.1245, |
| "step": 83000 |
| }, |
| { |
| "epoch": 26.858435681965094, |
| "grad_norm": 1.1505529880523682, |
| "learning_rate": 0.001, |
| "loss": 1.1668, |
| "step": 83100 |
| }, |
| { |
| "epoch": 26.89075630252101, |
| "grad_norm": 1.243490219116211, |
| "learning_rate": 0.001, |
| "loss": 1.1389, |
| "step": 83200 |
| }, |
| { |
| "epoch": 26.923076923076923, |
| "grad_norm": 1.4656898975372314, |
| "learning_rate": 0.001, |
| "loss": 1.1215, |
| "step": 83300 |
| }, |
| { |
| "epoch": 26.955397543632838, |
| "grad_norm": 1.286278247833252, |
| "learning_rate": 0.001, |
| "loss": 1.1255, |
| "step": 83400 |
| }, |
| { |
| "epoch": 26.987718164188752, |
| "grad_norm": 1.291344404220581, |
| "learning_rate": 0.001, |
| "loss": 1.1213, |
| "step": 83500 |
| }, |
| { |
| "epoch": 27.020038784744667, |
| "grad_norm": 1.1009141206741333, |
| "learning_rate": 0.001, |
| "loss": 1.0931, |
| "step": 83600 |
| }, |
| { |
| "epoch": 27.05235940530058, |
| "grad_norm": 1.21869695186615, |
| "learning_rate": 0.001, |
| "loss": 1.0301, |
| "step": 83700 |
| }, |
| { |
| "epoch": 27.084680025856496, |
| "grad_norm": 1.084806203842163, |
| "learning_rate": 0.001, |
| "loss": 1.0152, |
| "step": 83800 |
| }, |
| { |
| "epoch": 27.11700064641241, |
| "grad_norm": 1.2538260221481323, |
| "learning_rate": 0.001, |
| "loss": 1.0652, |
| "step": 83900 |
| }, |
| { |
| "epoch": 27.149321266968325, |
| "grad_norm": 1.1370879411697388, |
| "learning_rate": 0.001, |
| "loss": 1.0567, |
| "step": 84000 |
| }, |
| { |
| "epoch": 27.18164188752424, |
| "grad_norm": 1.0101512670516968, |
| "learning_rate": 0.001, |
| "loss": 1.0426, |
| "step": 84100 |
| }, |
| { |
| "epoch": 27.213962508080154, |
| "grad_norm": 1.0626932382583618, |
| "learning_rate": 0.001, |
| "loss": 1.0708, |
| "step": 84200 |
| }, |
| { |
| "epoch": 27.24628312863607, |
| "grad_norm": 1.3760011196136475, |
| "learning_rate": 0.001, |
| "loss": 1.0445, |
| "step": 84300 |
| }, |
| { |
| "epoch": 27.278603749191983, |
| "grad_norm": 6.08766508102417, |
| "learning_rate": 0.001, |
| "loss": 1.0801, |
| "step": 84400 |
| }, |
| { |
| "epoch": 27.310924369747898, |
| "grad_norm": 1.370469570159912, |
| "learning_rate": 0.001, |
| "loss": 1.0628, |
| "step": 84500 |
| }, |
| { |
| "epoch": 27.343244990303813, |
| "grad_norm": 1.2071462869644165, |
| "learning_rate": 0.001, |
| "loss": 1.091, |
| "step": 84600 |
| }, |
| { |
| "epoch": 27.375565610859727, |
| "grad_norm": 1.372673511505127, |
| "learning_rate": 0.001, |
| "loss": 1.0651, |
| "step": 84700 |
| }, |
| { |
| "epoch": 27.40788623141564, |
| "grad_norm": 1.2071818113327026, |
| "learning_rate": 0.001, |
| "loss": 1.0622, |
| "step": 84800 |
| }, |
| { |
| "epoch": 27.440206851971556, |
| "grad_norm": 1.0134035348892212, |
| "learning_rate": 0.001, |
| "loss": 1.0684, |
| "step": 84900 |
| }, |
| { |
| "epoch": 27.47252747252747, |
| "grad_norm": 1.3614178895950317, |
| "learning_rate": 0.001, |
| "loss": 1.0721, |
| "step": 85000 |
| }, |
| { |
| "epoch": 27.50484809308339, |
| "grad_norm": 1.2039440870285034, |
| "learning_rate": 0.001, |
| "loss": 1.0645, |
| "step": 85100 |
| }, |
| { |
| "epoch": 27.537168713639304, |
| "grad_norm": 1.2189968824386597, |
| "learning_rate": 0.001, |
| "loss": 1.0833, |
| "step": 85200 |
| }, |
| { |
| "epoch": 27.569489334195218, |
| "grad_norm": 1.1108815670013428, |
| "learning_rate": 0.001, |
| "loss": 1.0945, |
| "step": 85300 |
| }, |
| { |
| "epoch": 27.601809954751133, |
| "grad_norm": 1.3825979232788086, |
| "learning_rate": 0.001, |
| "loss": 1.0647, |
| "step": 85400 |
| }, |
| { |
| "epoch": 27.634130575307047, |
| "grad_norm": 1.2646015882492065, |
| "learning_rate": 0.001, |
| "loss": 1.0858, |
| "step": 85500 |
| }, |
| { |
| "epoch": 27.66645119586296, |
| "grad_norm": 1.1898399591445923, |
| "learning_rate": 0.001, |
| "loss": 1.081, |
| "step": 85600 |
| }, |
| { |
| "epoch": 27.698771816418876, |
| "grad_norm": 1.1825064420700073, |
| "learning_rate": 0.001, |
| "loss": 1.0948, |
| "step": 85700 |
| }, |
| { |
| "epoch": 27.73109243697479, |
| "grad_norm": 1.337872862815857, |
| "learning_rate": 0.001, |
| "loss": 1.0779, |
| "step": 85800 |
| }, |
| { |
| "epoch": 27.763413057530705, |
| "grad_norm": 1.146846890449524, |
| "learning_rate": 0.001, |
| "loss": 1.0819, |
| "step": 85900 |
| }, |
| { |
| "epoch": 27.79573367808662, |
| "grad_norm": 1.468003511428833, |
| "learning_rate": 0.001, |
| "loss": 1.0798, |
| "step": 86000 |
| }, |
| { |
| "epoch": 27.828054298642535, |
| "grad_norm": 1.125246286392212, |
| "learning_rate": 0.001, |
| "loss": 1.0924, |
| "step": 86100 |
| }, |
| { |
| "epoch": 27.86037491919845, |
| "grad_norm": 1.1755690574645996, |
| "learning_rate": 0.001, |
| "loss": 1.0965, |
| "step": 86200 |
| }, |
| { |
| "epoch": 27.892695539754364, |
| "grad_norm": 1.1506797075271606, |
| "learning_rate": 0.001, |
| "loss": 1.1024, |
| "step": 86300 |
| }, |
| { |
| "epoch": 27.92501616031028, |
| "grad_norm": 1.084913730621338, |
| "learning_rate": 0.001, |
| "loss": 1.0961, |
| "step": 86400 |
| }, |
| { |
| "epoch": 27.957336780866193, |
| "grad_norm": 1.0553350448608398, |
| "learning_rate": 0.001, |
| "loss": 1.1164, |
| "step": 86500 |
| }, |
| { |
| "epoch": 27.989657401422107, |
| "grad_norm": 1.1251089572906494, |
| "learning_rate": 0.001, |
| "loss": 1.1268, |
| "step": 86600 |
| }, |
| { |
| "epoch": 28.021978021978022, |
| "grad_norm": 1.3656169176101685, |
| "learning_rate": 0.001, |
| "loss": 1.0753, |
| "step": 86700 |
| }, |
| { |
| "epoch": 28.054298642533936, |
| "grad_norm": 1.0576167106628418, |
| "learning_rate": 0.001, |
| "loss": 0.9991, |
| "step": 86800 |
| }, |
| { |
| "epoch": 28.08661926308985, |
| "grad_norm": 1.0733096599578857, |
| "learning_rate": 0.001, |
| "loss": 1.0239, |
| "step": 86900 |
| }, |
| { |
| "epoch": 28.118939883645766, |
| "grad_norm": 1.265825629234314, |
| "learning_rate": 0.001, |
| "loss": 1.0298, |
| "step": 87000 |
| }, |
| { |
| "epoch": 28.15126050420168, |
| "grad_norm": 1.655713438987732, |
| "learning_rate": 0.001, |
| "loss": 1.016, |
| "step": 87100 |
| }, |
| { |
| "epoch": 28.183581124757595, |
| "grad_norm": 1.3085395097732544, |
| "learning_rate": 0.001, |
| "loss": 1.0343, |
| "step": 87200 |
| }, |
| { |
| "epoch": 28.21590174531351, |
| "grad_norm": 1.0143513679504395, |
| "learning_rate": 0.001, |
| "loss": 1.0293, |
| "step": 87300 |
| }, |
| { |
| "epoch": 28.248222365869424, |
| "grad_norm": 1.4002749919891357, |
| "learning_rate": 0.001, |
| "loss": 1.0416, |
| "step": 87400 |
| }, |
| { |
| "epoch": 28.28054298642534, |
| "grad_norm": 1.082223892211914, |
| "learning_rate": 0.001, |
| "loss": 1.0368, |
| "step": 87500 |
| }, |
| { |
| "epoch": 28.312863606981253, |
| "grad_norm": 1.3155796527862549, |
| "learning_rate": 0.001, |
| "loss": 1.051, |
| "step": 87600 |
| }, |
| { |
| "epoch": 28.345184227537167, |
| "grad_norm": 1.3327791690826416, |
| "learning_rate": 0.001, |
| "loss": 1.0128, |
| "step": 87700 |
| }, |
| { |
| "epoch": 28.377504848093082, |
| "grad_norm": 0.9752927422523499, |
| "learning_rate": 0.001, |
| "loss": 1.0389, |
| "step": 87800 |
| }, |
| { |
| "epoch": 28.409825468648997, |
| "grad_norm": 1.3138093948364258, |
| "learning_rate": 0.001, |
| "loss": 1.0696, |
| "step": 87900 |
| }, |
| { |
| "epoch": 28.44214608920491, |
| "grad_norm": 0.9144354462623596, |
| "learning_rate": 0.001, |
| "loss": 1.047, |
| "step": 88000 |
| }, |
| { |
| "epoch": 28.474466709760826, |
| "grad_norm": 1.2346453666687012, |
| "learning_rate": 0.001, |
| "loss": 1.0398, |
| "step": 88100 |
| }, |
| { |
| "epoch": 28.50678733031674, |
| "grad_norm": 1.703855037689209, |
| "learning_rate": 0.001, |
| "loss": 1.0524, |
| "step": 88200 |
| }, |
| { |
| "epoch": 28.53910795087266, |
| "grad_norm": 1.0008935928344727, |
| "learning_rate": 0.001, |
| "loss": 1.0512, |
| "step": 88300 |
| }, |
| { |
| "epoch": 28.571428571428573, |
| "grad_norm": 1.1595375537872314, |
| "learning_rate": 0.001, |
| "loss": 1.0562, |
| "step": 88400 |
| }, |
| { |
| "epoch": 28.603749191984488, |
| "grad_norm": 0.9839758276939392, |
| "learning_rate": 0.001, |
| "loss": 1.0767, |
| "step": 88500 |
| }, |
| { |
| "epoch": 28.636069812540402, |
| "grad_norm": 0.9803606271743774, |
| "learning_rate": 0.001, |
| "loss": 1.0523, |
| "step": 88600 |
| }, |
| { |
| "epoch": 28.668390433096317, |
| "grad_norm": 1.0053520202636719, |
| "learning_rate": 0.001, |
| "loss": 1.069, |
| "step": 88700 |
| }, |
| { |
| "epoch": 28.70071105365223, |
| "grad_norm": 1.0004795789718628, |
| "learning_rate": 0.001, |
| "loss": 1.0402, |
| "step": 88800 |
| }, |
| { |
| "epoch": 28.733031674208146, |
| "grad_norm": 1.1498825550079346, |
| "learning_rate": 0.001, |
| "loss": 1.0616, |
| "step": 88900 |
| }, |
| { |
| "epoch": 28.76535229476406, |
| "grad_norm": 1.0905274152755737, |
| "learning_rate": 0.001, |
| "loss": 1.0813, |
| "step": 89000 |
| }, |
| { |
| "epoch": 28.797672915319975, |
| "grad_norm": 0.9474394917488098, |
| "learning_rate": 0.001, |
| "loss": 1.0742, |
| "step": 89100 |
| }, |
| { |
| "epoch": 28.82999353587589, |
| "grad_norm": 1.319617509841919, |
| "learning_rate": 0.001, |
| "loss": 1.0922, |
| "step": 89200 |
| }, |
| { |
| "epoch": 28.862314156431804, |
| "grad_norm": 1.1449226140975952, |
| "learning_rate": 0.001, |
| "loss": 1.1161, |
| "step": 89300 |
| }, |
| { |
| "epoch": 28.89463477698772, |
| "grad_norm": 1.1758530139923096, |
| "learning_rate": 0.001, |
| "loss": 1.0891, |
| "step": 89400 |
| }, |
| { |
| "epoch": 28.926955397543633, |
| "grad_norm": 1.2047131061553955, |
| "learning_rate": 0.001, |
| "loss": 1.0833, |
| "step": 89500 |
| }, |
| { |
| "epoch": 28.959276018099548, |
| "grad_norm": 1.0821508169174194, |
| "learning_rate": 0.001, |
| "loss": 1.0829, |
| "step": 89600 |
| }, |
| { |
| "epoch": 28.991596638655462, |
| "grad_norm": 1.1592580080032349, |
| "learning_rate": 0.001, |
| "loss": 1.0958, |
| "step": 89700 |
| }, |
| { |
| "epoch": 29.023917259211377, |
| "grad_norm": 1.1290960311889648, |
| "learning_rate": 0.001, |
| "loss": 1.0154, |
| "step": 89800 |
| }, |
| { |
| "epoch": 29.05623787976729, |
| "grad_norm": 1.4548909664154053, |
| "learning_rate": 0.001, |
| "loss": 0.9883, |
| "step": 89900 |
| }, |
| { |
| "epoch": 29.088558500323206, |
| "grad_norm": 1.4414570331573486, |
| "learning_rate": 0.001, |
| "loss": 0.9885, |
| "step": 90000 |
| }, |
| { |
| "epoch": 29.12087912087912, |
| "grad_norm": 1.0668739080429077, |
| "learning_rate": 0.001, |
| "loss": 1.0079, |
| "step": 90100 |
| }, |
| { |
| "epoch": 29.153199741435035, |
| "grad_norm": 1.2436254024505615, |
| "learning_rate": 0.001, |
| "loss": 1.0177, |
| "step": 90200 |
| }, |
| { |
| "epoch": 29.18552036199095, |
| "grad_norm": 1.1421066522598267, |
| "learning_rate": 0.001, |
| "loss": 1.0225, |
| "step": 90300 |
| }, |
| { |
| "epoch": 29.217840982546864, |
| "grad_norm": 0.9146871566772461, |
| "learning_rate": 0.001, |
| "loss": 1.0021, |
| "step": 90400 |
| }, |
| { |
| "epoch": 29.25016160310278, |
| "grad_norm": 1.0086992979049683, |
| "learning_rate": 0.001, |
| "loss": 1.0366, |
| "step": 90500 |
| }, |
| { |
| "epoch": 29.282482223658693, |
| "grad_norm": 0.9744951128959656, |
| "learning_rate": 0.001, |
| "loss": 1.0344, |
| "step": 90600 |
| }, |
| { |
| "epoch": 29.314802844214608, |
| "grad_norm": 1.1832053661346436, |
| "learning_rate": 0.001, |
| "loss": 1.0138, |
| "step": 90700 |
| }, |
| { |
| "epoch": 29.347123464770522, |
| "grad_norm": 1.3362852334976196, |
| "learning_rate": 0.001, |
| "loss": 1.0176, |
| "step": 90800 |
| }, |
| { |
| "epoch": 29.379444085326437, |
| "grad_norm": 1.1680186986923218, |
| "learning_rate": 0.001, |
| "loss": 1.0305, |
| "step": 90900 |
| }, |
| { |
| "epoch": 29.41176470588235, |
| "grad_norm": 1.1634215116500854, |
| "learning_rate": 0.001, |
| "loss": 1.0087, |
| "step": 91000 |
| }, |
| { |
| "epoch": 29.444085326438266, |
| "grad_norm": 1.0064040422439575, |
| "learning_rate": 0.001, |
| "loss": 1.0162, |
| "step": 91100 |
| }, |
| { |
| "epoch": 29.47640594699418, |
| "grad_norm": 1.3981666564941406, |
| "learning_rate": 0.001, |
| "loss": 1.0381, |
| "step": 91200 |
| }, |
| { |
| "epoch": 29.5087265675501, |
| "grad_norm": 1.1178312301635742, |
| "learning_rate": 0.001, |
| "loss": 1.0377, |
| "step": 91300 |
| }, |
| { |
| "epoch": 29.541047188106013, |
| "grad_norm": 1.013622522354126, |
| "learning_rate": 0.001, |
| "loss": 1.0503, |
| "step": 91400 |
| }, |
| { |
| "epoch": 29.573367808661928, |
| "grad_norm": 1.1112215518951416, |
| "learning_rate": 0.001, |
| "loss": 1.0493, |
| "step": 91500 |
| }, |
| { |
| "epoch": 29.605688429217842, |
| "grad_norm": 1.1293482780456543, |
| "learning_rate": 0.001, |
| "loss": 1.043, |
| "step": 91600 |
| }, |
| { |
| "epoch": 29.638009049773757, |
| "grad_norm": 1.1543068885803223, |
| "learning_rate": 0.001, |
| "loss": 1.0458, |
| "step": 91700 |
| }, |
| { |
| "epoch": 29.67032967032967, |
| "grad_norm": 0.9590059518814087, |
| "learning_rate": 0.001, |
| "loss": 1.0692, |
| "step": 91800 |
| }, |
| { |
| "epoch": 29.702650290885586, |
| "grad_norm": 0.9611120820045471, |
| "learning_rate": 0.001, |
| "loss": 1.0507, |
| "step": 91900 |
| }, |
| { |
| "epoch": 29.7349709114415, |
| "grad_norm": 1.105459451675415, |
| "learning_rate": 0.001, |
| "loss": 1.0477, |
| "step": 92000 |
| }, |
| { |
| "epoch": 29.767291531997415, |
| "grad_norm": 1.2844051122665405, |
| "learning_rate": 0.001, |
| "loss": 1.0547, |
| "step": 92100 |
| }, |
| { |
| "epoch": 29.79961215255333, |
| "grad_norm": 1.194689154624939, |
| "learning_rate": 0.001, |
| "loss": 1.0415, |
| "step": 92200 |
| }, |
| { |
| "epoch": 29.831932773109244, |
| "grad_norm": 1.2303766012191772, |
| "learning_rate": 0.001, |
| "loss": 1.0577, |
| "step": 92300 |
| }, |
| { |
| "epoch": 29.86425339366516, |
| "grad_norm": 1.0361992120742798, |
| "learning_rate": 0.001, |
| "loss": 1.0653, |
| "step": 92400 |
| }, |
| { |
| "epoch": 29.896574014221073, |
| "grad_norm": 1.1771466732025146, |
| "learning_rate": 0.001, |
| "loss": 1.0595, |
| "step": 92500 |
| }, |
| { |
| "epoch": 29.928894634776988, |
| "grad_norm": 1.1949156522750854, |
| "learning_rate": 0.001, |
| "loss": 1.061, |
| "step": 92600 |
| }, |
| { |
| "epoch": 29.961215255332903, |
| "grad_norm": 1.147527813911438, |
| "learning_rate": 0.001, |
| "loss": 1.0707, |
| "step": 92700 |
| }, |
| { |
| "epoch": 29.993535875888817, |
| "grad_norm": 1.1559338569641113, |
| "learning_rate": 0.001, |
| "loss": 1.0782, |
| "step": 92800 |
| }, |
| { |
| "epoch": 30.02585649644473, |
| "grad_norm": 1.3547965288162231, |
| "learning_rate": 0.001, |
| "loss": 1.0003, |
| "step": 92900 |
| }, |
| { |
| "epoch": 30.058177117000646, |
| "grad_norm": 1.4437899589538574, |
| "learning_rate": 0.001, |
| "loss": 0.966, |
| "step": 93000 |
| }, |
| { |
| "epoch": 30.09049773755656, |
| "grad_norm": 1.0247992277145386, |
| "learning_rate": 0.001, |
| "loss": 0.9779, |
| "step": 93100 |
| }, |
| { |
| "epoch": 30.122818358112475, |
| "grad_norm": 1.0951383113861084, |
| "learning_rate": 0.001, |
| "loss": 0.9782, |
| "step": 93200 |
| }, |
| { |
| "epoch": 30.15513897866839, |
| "grad_norm": 1.2668837308883667, |
| "learning_rate": 0.001, |
| "loss": 0.977, |
| "step": 93300 |
| }, |
| { |
| "epoch": 30.187459599224304, |
| "grad_norm": 1.2851572036743164, |
| "learning_rate": 0.001, |
| "loss": 0.9819, |
| "step": 93400 |
| }, |
| { |
| "epoch": 30.21978021978022, |
| "grad_norm": 1.1531295776367188, |
| "learning_rate": 0.001, |
| "loss": 0.9862, |
| "step": 93500 |
| }, |
| { |
| "epoch": 30.252100840336134, |
| "grad_norm": 1.1562260389328003, |
| "learning_rate": 0.001, |
| "loss": 0.9874, |
| "step": 93600 |
| }, |
| { |
| "epoch": 30.284421460892048, |
| "grad_norm": 1.5142194032669067, |
| "learning_rate": 0.001, |
| "loss": 1.0211, |
| "step": 93700 |
| }, |
| { |
| "epoch": 30.316742081447963, |
| "grad_norm": 1.2942471504211426, |
| "learning_rate": 0.001, |
| "loss": 0.9893, |
| "step": 93800 |
| }, |
| { |
| "epoch": 30.349062702003877, |
| "grad_norm": 1.273345708847046, |
| "learning_rate": 0.001, |
| "loss": 1.0102, |
| "step": 93900 |
| }, |
| { |
| "epoch": 30.381383322559792, |
| "grad_norm": 1.251236915588379, |
| "learning_rate": 0.001, |
| "loss": 1.0103, |
| "step": 94000 |
| }, |
| { |
| "epoch": 30.413703943115706, |
| "grad_norm": 1.4119136333465576, |
| "learning_rate": 0.001, |
| "loss": 0.9897, |
| "step": 94100 |
| }, |
| { |
| "epoch": 30.44602456367162, |
| "grad_norm": 1.2509639263153076, |
| "learning_rate": 0.001, |
| "loss": 1.0158, |
| "step": 94200 |
| }, |
| { |
| "epoch": 30.478345184227535, |
| "grad_norm": 1.3147063255310059, |
| "learning_rate": 0.001, |
| "loss": 1.0271, |
| "step": 94300 |
| }, |
| { |
| "epoch": 30.51066580478345, |
| "grad_norm": 1.2182319164276123, |
| "learning_rate": 0.001, |
| "loss": 1.0446, |
| "step": 94400 |
| }, |
| { |
| "epoch": 30.542986425339368, |
| "grad_norm": 3.014864206314087, |
| "learning_rate": 0.001, |
| "loss": 1.0268, |
| "step": 94500 |
| }, |
| { |
| "epoch": 30.575307045895283, |
| "grad_norm": 1.1657381057739258, |
| "learning_rate": 0.001, |
| "loss": 1.0381, |
| "step": 94600 |
| }, |
| { |
| "epoch": 30.607627666451197, |
| "grad_norm": 1.2702171802520752, |
| "learning_rate": 0.001, |
| "loss": 1.022, |
| "step": 94700 |
| }, |
| { |
| "epoch": 30.639948287007112, |
| "grad_norm": 1.3262053728103638, |
| "learning_rate": 0.001, |
| "loss": 1.0328, |
| "step": 94800 |
| }, |
| { |
| "epoch": 30.672268907563026, |
| "grad_norm": 1.1520822048187256, |
| "learning_rate": 0.001, |
| "loss": 1.0414, |
| "step": 94900 |
| }, |
| { |
| "epoch": 30.70458952811894, |
| "grad_norm": 1.637629747390747, |
| "learning_rate": 0.001, |
| "loss": 1.0357, |
| "step": 95000 |
| }, |
| { |
| "epoch": 30.736910148674855, |
| "grad_norm": 1.3296183347702026, |
| "learning_rate": 0.001, |
| "loss": 1.0383, |
| "step": 95100 |
| }, |
| { |
| "epoch": 30.76923076923077, |
| "grad_norm": 1.197227120399475, |
| "learning_rate": 0.001, |
| "loss": 1.0412, |
| "step": 95200 |
| }, |
| { |
| "epoch": 30.801551389786685, |
| "grad_norm": 1.2573148012161255, |
| "learning_rate": 0.001, |
| "loss": 1.0402, |
| "step": 95300 |
| }, |
| { |
| "epoch": 30.8338720103426, |
| "grad_norm": 1.1908835172653198, |
| "learning_rate": 0.001, |
| "loss": 1.0467, |
| "step": 95400 |
| }, |
| { |
| "epoch": 30.866192630898514, |
| "grad_norm": 1.181947946548462, |
| "learning_rate": 0.001, |
| "loss": 1.0488, |
| "step": 95500 |
| }, |
| { |
| "epoch": 30.89851325145443, |
| "grad_norm": 1.127081036567688, |
| "learning_rate": 0.001, |
| "loss": 1.028, |
| "step": 95600 |
| }, |
| { |
| "epoch": 30.930833872010343, |
| "grad_norm": 1.8955832719802856, |
| "learning_rate": 0.001, |
| "loss": 1.0532, |
| "step": 95700 |
| }, |
| { |
| "epoch": 30.963154492566257, |
| "grad_norm": 1.178209900856018, |
| "learning_rate": 0.001, |
| "loss": 1.0188, |
| "step": 95800 |
| }, |
| { |
| "epoch": 30.995475113122172, |
| "grad_norm": 1.3171616792678833, |
| "learning_rate": 0.001, |
| "loss": 1.019, |
| "step": 95900 |
| }, |
| { |
| "epoch": 31.027795733678087, |
| "grad_norm": 0.9879239201545715, |
| "learning_rate": 0.001, |
| "loss": 0.9763, |
| "step": 96000 |
| }, |
| { |
| "epoch": 31.060116354234, |
| "grad_norm": 0.9748243093490601, |
| "learning_rate": 0.001, |
| "loss": 0.9399, |
| "step": 96100 |
| }, |
| { |
| "epoch": 31.092436974789916, |
| "grad_norm": 1.102300763130188, |
| "learning_rate": 0.001, |
| "loss": 0.9548, |
| "step": 96200 |
| }, |
| { |
| "epoch": 31.12475759534583, |
| "grad_norm": 1.0693408250808716, |
| "learning_rate": 0.001, |
| "loss": 0.964, |
| "step": 96300 |
| }, |
| { |
| "epoch": 31.157078215901745, |
| "grad_norm": 1.0911110639572144, |
| "learning_rate": 0.001, |
| "loss": 0.9647, |
| "step": 96400 |
| }, |
| { |
| "epoch": 31.18939883645766, |
| "grad_norm": 1.2917101383209229, |
| "learning_rate": 0.001, |
| "loss": 0.9822, |
| "step": 96500 |
| }, |
| { |
| "epoch": 31.221719457013574, |
| "grad_norm": 1.0852818489074707, |
| "learning_rate": 0.001, |
| "loss": 0.9762, |
| "step": 96600 |
| }, |
| { |
| "epoch": 31.25404007756949, |
| "grad_norm": 1.5321288108825684, |
| "learning_rate": 0.001, |
| "loss": 0.9643, |
| "step": 96700 |
| }, |
| { |
| "epoch": 31.286360698125403, |
| "grad_norm": 1.1907495260238647, |
| "learning_rate": 0.001, |
| "loss": 0.9786, |
| "step": 96800 |
| }, |
| { |
| "epoch": 31.318681318681318, |
| "grad_norm": 1.236419439315796, |
| "learning_rate": 0.001, |
| "loss": 1.0093, |
| "step": 96900 |
| }, |
| { |
| "epoch": 31.351001939237232, |
| "grad_norm": 1.4393523931503296, |
| "learning_rate": 0.001, |
| "loss": 0.9868, |
| "step": 97000 |
| }, |
| { |
| "epoch": 31.383322559793147, |
| "grad_norm": 1.1615034341812134, |
| "learning_rate": 0.001, |
| "loss": 0.9932, |
| "step": 97100 |
| }, |
| { |
| "epoch": 31.41564318034906, |
| "grad_norm": 1.4453788995742798, |
| "learning_rate": 0.001, |
| "loss": 0.9853, |
| "step": 97200 |
| }, |
| { |
| "epoch": 31.447963800904976, |
| "grad_norm": 1.1026290655136108, |
| "learning_rate": 0.001, |
| "loss": 0.9995, |
| "step": 97300 |
| }, |
| { |
| "epoch": 31.48028442146089, |
| "grad_norm": 1.1974660158157349, |
| "learning_rate": 0.001, |
| "loss": 0.9895, |
| "step": 97400 |
| }, |
| { |
| "epoch": 31.51260504201681, |
| "grad_norm": 1.2898255586624146, |
| "learning_rate": 0.001, |
| "loss": 1.0166, |
| "step": 97500 |
| }, |
| { |
| "epoch": 31.544925662572723, |
| "grad_norm": 1.1666028499603271, |
| "learning_rate": 0.001, |
| "loss": 1.0055, |
| "step": 97600 |
| }, |
| { |
| "epoch": 31.577246283128638, |
| "grad_norm": 1.4688661098480225, |
| "learning_rate": 0.001, |
| "loss": 0.9944, |
| "step": 97700 |
| }, |
| { |
| "epoch": 31.609566903684552, |
| "grad_norm": 1.2777717113494873, |
| "learning_rate": 0.001, |
| "loss": 1.0156, |
| "step": 97800 |
| }, |
| { |
| "epoch": 31.641887524240467, |
| "grad_norm": 1.1962950229644775, |
| "learning_rate": 0.001, |
| "loss": 1.0111, |
| "step": 97900 |
| }, |
| { |
| "epoch": 31.67420814479638, |
| "grad_norm": 1.0366114377975464, |
| "learning_rate": 0.001, |
| "loss": 1.012, |
| "step": 98000 |
| }, |
| { |
| "epoch": 31.706528765352296, |
| "grad_norm": 1.0451583862304688, |
| "learning_rate": 0.001, |
| "loss": 1.0146, |
| "step": 98100 |
| }, |
| { |
| "epoch": 31.73884938590821, |
| "grad_norm": 1.020317554473877, |
| "learning_rate": 0.001, |
| "loss": 1.0263, |
| "step": 98200 |
| }, |
| { |
| "epoch": 31.771170006464125, |
| "grad_norm": 1.1409897804260254, |
| "learning_rate": 0.001, |
| "loss": 1.0167, |
| "step": 98300 |
| }, |
| { |
| "epoch": 31.80349062702004, |
| "grad_norm": 1.5148199796676636, |
| "learning_rate": 0.001, |
| "loss": 1.0359, |
| "step": 98400 |
| }, |
| { |
| "epoch": 31.835811247575954, |
| "grad_norm": 1.0931096076965332, |
| "learning_rate": 0.001, |
| "loss": 1.0176, |
| "step": 98500 |
| }, |
| { |
| "epoch": 31.86813186813187, |
| "grad_norm": 1.3822332620620728, |
| "learning_rate": 0.001, |
| "loss": 1.0394, |
| "step": 98600 |
| }, |
| { |
| "epoch": 31.900452488687783, |
| "grad_norm": 1.0372686386108398, |
| "learning_rate": 0.001, |
| "loss": 1.0125, |
| "step": 98700 |
| }, |
| { |
| "epoch": 31.932773109243698, |
| "grad_norm": 1.1177948713302612, |
| "learning_rate": 0.001, |
| "loss": 1.0355, |
| "step": 98800 |
| }, |
| { |
| "epoch": 31.965093729799612, |
| "grad_norm": 22.84385108947754, |
| "learning_rate": 0.001, |
| "loss": 1.0112, |
| "step": 98900 |
| }, |
| { |
| "epoch": 31.997414350355527, |
| "grad_norm": 1.431148648262024, |
| "learning_rate": 0.001, |
| "loss": 1.0157, |
| "step": 99000 |
| }, |
| { |
| "epoch": 32.02973497091144, |
| "grad_norm": 1.1543240547180176, |
| "learning_rate": 0.001, |
| "loss": 0.9505, |
| "step": 99100 |
| }, |
| { |
| "epoch": 32.062055591467356, |
| "grad_norm": 1.2091968059539795, |
| "learning_rate": 0.001, |
| "loss": 0.9292, |
| "step": 99200 |
| }, |
| { |
| "epoch": 32.09437621202327, |
| "grad_norm": 1.3215097188949585, |
| "learning_rate": 0.001, |
| "loss": 0.9619, |
| "step": 99300 |
| }, |
| { |
| "epoch": 32.126696832579185, |
| "grad_norm": 1.0096403360366821, |
| "learning_rate": 0.001, |
| "loss": 0.9312, |
| "step": 99400 |
| }, |
| { |
| "epoch": 32.1590174531351, |
| "grad_norm": 1.2567592859268188, |
| "learning_rate": 0.001, |
| "loss": 0.9507, |
| "step": 99500 |
| }, |
| { |
| "epoch": 32.191338073691014, |
| "grad_norm": 1.2970302104949951, |
| "learning_rate": 0.001, |
| "loss": 0.9646, |
| "step": 99600 |
| }, |
| { |
| "epoch": 32.22365869424693, |
| "grad_norm": 1.2226184606552124, |
| "learning_rate": 0.001, |
| "loss": 0.9644, |
| "step": 99700 |
| }, |
| { |
| "epoch": 32.25597931480284, |
| "grad_norm": 1.4690126180648804, |
| "learning_rate": 0.001, |
| "loss": 0.9591, |
| "step": 99800 |
| }, |
| { |
| "epoch": 32.28829993535876, |
| "grad_norm": 5.503747463226318, |
| "learning_rate": 0.001, |
| "loss": 0.9596, |
| "step": 99900 |
| }, |
| { |
| "epoch": 32.32062055591467, |
| "grad_norm": 1.5522727966308594, |
| "learning_rate": 0.001, |
| "loss": 0.9552, |
| "step": 100000 |
| }, |
| { |
| "epoch": 32.35294117647059, |
| "grad_norm": 1.183016061782837, |
| "learning_rate": 0.001, |
| "loss": 0.9799, |
| "step": 100100 |
| }, |
| { |
| "epoch": 32.3852617970265, |
| "grad_norm": 1.1440593004226685, |
| "learning_rate": 0.001, |
| "loss": 0.9745, |
| "step": 100200 |
| }, |
| { |
| "epoch": 32.417582417582416, |
| "grad_norm": 1.1371045112609863, |
| "learning_rate": 0.001, |
| "loss": 0.9723, |
| "step": 100300 |
| }, |
| { |
| "epoch": 32.44990303813833, |
| "grad_norm": 1.2485049962997437, |
| "learning_rate": 0.001, |
| "loss": 0.9701, |
| "step": 100400 |
| }, |
| { |
| "epoch": 32.482223658694245, |
| "grad_norm": 1.0270694494247437, |
| "learning_rate": 0.001, |
| "loss": 0.9945, |
| "step": 100500 |
| }, |
| { |
| "epoch": 32.51454427925016, |
| "grad_norm": 1.3093137741088867, |
| "learning_rate": 0.001, |
| "loss": 0.9781, |
| "step": 100600 |
| }, |
| { |
| "epoch": 32.546864899806074, |
| "grad_norm": 0.982565701007843, |
| "learning_rate": 0.001, |
| "loss": 0.9586, |
| "step": 100700 |
| }, |
| { |
| "epoch": 32.57918552036199, |
| "grad_norm": 1.180829644203186, |
| "learning_rate": 0.001, |
| "loss": 1.0096, |
| "step": 100800 |
| }, |
| { |
| "epoch": 32.6115061409179, |
| "grad_norm": 1.8596967458724976, |
| "learning_rate": 0.001, |
| "loss": 1.0101, |
| "step": 100900 |
| }, |
| { |
| "epoch": 32.64382676147382, |
| "grad_norm": 1.1227686405181885, |
| "learning_rate": 0.001, |
| "loss": 0.9968, |
| "step": 101000 |
| }, |
| { |
| "epoch": 32.67614738202973, |
| "grad_norm": 1.3988664150238037, |
| "learning_rate": 0.001, |
| "loss": 0.9934, |
| "step": 101100 |
| }, |
| { |
| "epoch": 32.70846800258565, |
| "grad_norm": 1.0867431163787842, |
| "learning_rate": 0.001, |
| "loss": 1.0071, |
| "step": 101200 |
| }, |
| { |
| "epoch": 32.74078862314156, |
| "grad_norm": 1.2575784921646118, |
| "learning_rate": 0.001, |
| "loss": 1.0145, |
| "step": 101300 |
| }, |
| { |
| "epoch": 32.773109243697476, |
| "grad_norm": 1.1998168230056763, |
| "learning_rate": 0.001, |
| "loss": 0.9904, |
| "step": 101400 |
| }, |
| { |
| "epoch": 32.80542986425339, |
| "grad_norm": 0.9470672011375427, |
| "learning_rate": 0.001, |
| "loss": 1.0114, |
| "step": 101500 |
| }, |
| { |
| "epoch": 32.837750484809305, |
| "grad_norm": 1.0151934623718262, |
| "learning_rate": 0.001, |
| "loss": 1.0152, |
| "step": 101600 |
| }, |
| { |
| "epoch": 32.87007110536522, |
| "grad_norm": 1.1765100955963135, |
| "learning_rate": 0.001, |
| "loss": 1.023, |
| "step": 101700 |
| }, |
| { |
| "epoch": 32.902391725921134, |
| "grad_norm": 1.2009962797164917, |
| "learning_rate": 0.001, |
| "loss": 0.9987, |
| "step": 101800 |
| }, |
| { |
| "epoch": 32.93471234647705, |
| "grad_norm": 1.1376994848251343, |
| "learning_rate": 0.001, |
| "loss": 0.9883, |
| "step": 101900 |
| }, |
| { |
| "epoch": 32.967032967032964, |
| "grad_norm": 0.9074110984802246, |
| "learning_rate": 0.001, |
| "loss": 1.0078, |
| "step": 102000 |
| }, |
| { |
| "epoch": 32.999353587588885, |
| "grad_norm": 0.8598896861076355, |
| "learning_rate": 0.001, |
| "loss": 0.9777, |
| "step": 102100 |
| }, |
| { |
| "epoch": 33.0316742081448, |
| "grad_norm": 0.7127558588981628, |
| "learning_rate": 0.001, |
| "loss": 0.9143, |
| "step": 102200 |
| }, |
| { |
| "epoch": 33.063994828700714, |
| "grad_norm": 0.7595661878585815, |
| "learning_rate": 0.001, |
| "loss": 0.9234, |
| "step": 102300 |
| }, |
| { |
| "epoch": 33.09631544925663, |
| "grad_norm": 0.8373937010765076, |
| "learning_rate": 0.001, |
| "loss": 0.9316, |
| "step": 102400 |
| }, |
| { |
| "epoch": 33.12863606981254, |
| "grad_norm": 1.197033166885376, |
| "learning_rate": 0.001, |
| "loss": 0.949, |
| "step": 102500 |
| }, |
| { |
| "epoch": 33.16095669036846, |
| "grad_norm": 0.6342483758926392, |
| "learning_rate": 0.001, |
| "loss": 0.9397, |
| "step": 102600 |
| }, |
| { |
| "epoch": 33.19327731092437, |
| "grad_norm": 1.198174238204956, |
| "learning_rate": 0.001, |
| "loss": 0.9458, |
| "step": 102700 |
| }, |
| { |
| "epoch": 33.22559793148029, |
| "grad_norm": 0.8614729642868042, |
| "learning_rate": 0.001, |
| "loss": 0.9565, |
| "step": 102800 |
| }, |
| { |
| "epoch": 33.2579185520362, |
| "grad_norm": 0.9009416699409485, |
| "learning_rate": 0.001, |
| "loss": 0.9174, |
| "step": 102900 |
| }, |
| { |
| "epoch": 33.290239172592116, |
| "grad_norm": 1.1099083423614502, |
| "learning_rate": 0.001, |
| "loss": 0.9521, |
| "step": 103000 |
| }, |
| { |
| "epoch": 33.32255979314803, |
| "grad_norm": 0.8563976883888245, |
| "learning_rate": 0.001, |
| "loss": 0.9295, |
| "step": 103100 |
| }, |
| { |
| "epoch": 33.354880413703945, |
| "grad_norm": 0.8638460040092468, |
| "learning_rate": 0.001, |
| "loss": 0.9511, |
| "step": 103200 |
| }, |
| { |
| "epoch": 33.38720103425986, |
| "grad_norm": 0.7926732301712036, |
| "learning_rate": 0.001, |
| "loss": 0.9582, |
| "step": 103300 |
| }, |
| { |
| "epoch": 33.419521654815775, |
| "grad_norm": 0.6091955900192261, |
| "learning_rate": 0.001, |
| "loss": 0.948, |
| "step": 103400 |
| }, |
| { |
| "epoch": 33.45184227537169, |
| "grad_norm": 1.9351041316986084, |
| "learning_rate": 0.001, |
| "loss": 0.9555, |
| "step": 103500 |
| }, |
| { |
| "epoch": 33.484162895927604, |
| "grad_norm": 0.9850326776504517, |
| "learning_rate": 0.001, |
| "loss": 0.945, |
| "step": 103600 |
| }, |
| { |
| "epoch": 33.51648351648352, |
| "grad_norm": 1.0357599258422852, |
| "learning_rate": 0.001, |
| "loss": 0.975, |
| "step": 103700 |
| }, |
| { |
| "epoch": 33.54880413703943, |
| "grad_norm": 0.6513636112213135, |
| "learning_rate": 0.001, |
| "loss": 0.9802, |
| "step": 103800 |
| }, |
| { |
| "epoch": 33.58112475759535, |
| "grad_norm": 1.0478070974349976, |
| "learning_rate": 0.001, |
| "loss": 0.9791, |
| "step": 103900 |
| }, |
| { |
| "epoch": 33.61344537815126, |
| "grad_norm": 0.8873677253723145, |
| "learning_rate": 0.001, |
| "loss": 0.975, |
| "step": 104000 |
| }, |
| { |
| "epoch": 33.645765998707176, |
| "grad_norm": 0.8857697248458862, |
| "learning_rate": 0.001, |
| "loss": 0.9868, |
| "step": 104100 |
| }, |
| { |
| "epoch": 33.67808661926309, |
| "grad_norm": 0.8290280699729919, |
| "learning_rate": 0.001, |
| "loss": 0.9874, |
| "step": 104200 |
| }, |
| { |
| "epoch": 33.710407239819006, |
| "grad_norm": 1.063020944595337, |
| "learning_rate": 0.001, |
| "loss": 0.9847, |
| "step": 104300 |
| }, |
| { |
| "epoch": 33.74272786037492, |
| "grad_norm": 0.8536366820335388, |
| "learning_rate": 0.001, |
| "loss": 0.9826, |
| "step": 104400 |
| }, |
| { |
| "epoch": 33.775048480930835, |
| "grad_norm": 1.7910808324813843, |
| "learning_rate": 0.001, |
| "loss": 1.0049, |
| "step": 104500 |
| }, |
| { |
| "epoch": 33.80736910148675, |
| "grad_norm": 0.9663560390472412, |
| "learning_rate": 0.001, |
| "loss": 0.9915, |
| "step": 104600 |
| }, |
| { |
| "epoch": 33.839689722042664, |
| "grad_norm": 0.8727264404296875, |
| "learning_rate": 0.001, |
| "loss": 1.0047, |
| "step": 104700 |
| }, |
| { |
| "epoch": 33.87201034259858, |
| "grad_norm": 1.2965424060821533, |
| "learning_rate": 0.001, |
| "loss": 0.9907, |
| "step": 104800 |
| }, |
| { |
| "epoch": 33.90433096315449, |
| "grad_norm": 1.0414011478424072, |
| "learning_rate": 0.001, |
| "loss": 1.0034, |
| "step": 104900 |
| }, |
| { |
| "epoch": 33.93665158371041, |
| "grad_norm": 0.7418168783187866, |
| "learning_rate": 0.001, |
| "loss": 0.9997, |
| "step": 105000 |
| }, |
| { |
| "epoch": 33.96897220426632, |
| "grad_norm": 0.7445783615112305, |
| "learning_rate": 0.001, |
| "loss": 0.9919, |
| "step": 105100 |
| }, |
| { |
| "epoch": 34.00129282482224, |
| "grad_norm": 1.2291535139083862, |
| "learning_rate": 0.001, |
| "loss": 0.9792, |
| "step": 105200 |
| }, |
| { |
| "epoch": 34.03361344537815, |
| "grad_norm": 1.6661193370819092, |
| "learning_rate": 0.001, |
| "loss": 0.9099, |
| "step": 105300 |
| }, |
| { |
| "epoch": 34.065934065934066, |
| "grad_norm": 1.3818449974060059, |
| "learning_rate": 0.001, |
| "loss": 0.9103, |
| "step": 105400 |
| }, |
| { |
| "epoch": 34.09825468648998, |
| "grad_norm": 1.2478028535842896, |
| "learning_rate": 0.001, |
| "loss": 0.9196, |
| "step": 105500 |
| }, |
| { |
| "epoch": 34.130575307045895, |
| "grad_norm": 1.4901819229125977, |
| "learning_rate": 0.001, |
| "loss": 0.9162, |
| "step": 105600 |
| }, |
| { |
| "epoch": 34.16289592760181, |
| "grad_norm": 1.3518427610397339, |
| "learning_rate": 0.001, |
| "loss": 0.9214, |
| "step": 105700 |
| }, |
| { |
| "epoch": 34.195216548157724, |
| "grad_norm": 1.1966758966445923, |
| "learning_rate": 0.001, |
| "loss": 0.921, |
| "step": 105800 |
| }, |
| { |
| "epoch": 34.22753716871364, |
| "grad_norm": 1.0546095371246338, |
| "learning_rate": 0.001, |
| "loss": 0.9203, |
| "step": 105900 |
| }, |
| { |
| "epoch": 34.25985778926955, |
| "grad_norm": 1.6791573762893677, |
| "learning_rate": 0.001, |
| "loss": 0.9263, |
| "step": 106000 |
| }, |
| { |
| "epoch": 34.29217840982547, |
| "grad_norm": 1.7650243043899536, |
| "learning_rate": 0.001, |
| "loss": 0.9263, |
| "step": 106100 |
| }, |
| { |
| "epoch": 34.32449903038138, |
| "grad_norm": 1.4087214469909668, |
| "learning_rate": 0.001, |
| "loss": 0.9372, |
| "step": 106200 |
| }, |
| { |
| "epoch": 34.3568196509373, |
| "grad_norm": 1.335076928138733, |
| "learning_rate": 0.001, |
| "loss": 0.9456, |
| "step": 106300 |
| }, |
| { |
| "epoch": 34.38914027149321, |
| "grad_norm": 1.5549242496490479, |
| "learning_rate": 0.001, |
| "loss": 0.9418, |
| "step": 106400 |
| }, |
| { |
| "epoch": 34.421460892049126, |
| "grad_norm": 2.1881766319274902, |
| "learning_rate": 0.001, |
| "loss": 0.9424, |
| "step": 106500 |
| }, |
| { |
| "epoch": 34.45378151260504, |
| "grad_norm": 1.419062614440918, |
| "learning_rate": 0.001, |
| "loss": 0.9557, |
| "step": 106600 |
| }, |
| { |
| "epoch": 34.486102133160955, |
| "grad_norm": 1.5608348846435547, |
| "learning_rate": 0.001, |
| "loss": 0.9554, |
| "step": 106700 |
| }, |
| { |
| "epoch": 34.51842275371687, |
| "grad_norm": 1.1787161827087402, |
| "learning_rate": 0.001, |
| "loss": 0.934, |
| "step": 106800 |
| }, |
| { |
| "epoch": 34.550743374272784, |
| "grad_norm": 1.2317980527877808, |
| "learning_rate": 0.001, |
| "loss": 0.9724, |
| "step": 106900 |
| }, |
| { |
| "epoch": 34.5830639948287, |
| "grad_norm": 1.6141093969345093, |
| "learning_rate": 0.001, |
| "loss": 0.9421, |
| "step": 107000 |
| }, |
| { |
| "epoch": 34.61538461538461, |
| "grad_norm": 1.3813108205795288, |
| "learning_rate": 0.001, |
| "loss": 0.9603, |
| "step": 107100 |
| }, |
| { |
| "epoch": 34.64770523594053, |
| "grad_norm": 1.9711265563964844, |
| "learning_rate": 0.001, |
| "loss": 0.9639, |
| "step": 107200 |
| }, |
| { |
| "epoch": 34.68002585649644, |
| "grad_norm": 1.3804035186767578, |
| "learning_rate": 0.001, |
| "loss": 0.9641, |
| "step": 107300 |
| }, |
| { |
| "epoch": 34.71234647705236, |
| "grad_norm": 1.8484543561935425, |
| "learning_rate": 0.001, |
| "loss": 0.9766, |
| "step": 107400 |
| }, |
| { |
| "epoch": 34.74466709760827, |
| "grad_norm": 1.755317211151123, |
| "learning_rate": 0.001, |
| "loss": 0.9875, |
| "step": 107500 |
| }, |
| { |
| "epoch": 34.776987718164186, |
| "grad_norm": 1.6666924953460693, |
| "learning_rate": 0.001, |
| "loss": 0.9852, |
| "step": 107600 |
| }, |
| { |
| "epoch": 34.8093083387201, |
| "grad_norm": 1.394085168838501, |
| "learning_rate": 0.001, |
| "loss": 0.9903, |
| "step": 107700 |
| }, |
| { |
| "epoch": 34.841628959276015, |
| "grad_norm": 1.8693289756774902, |
| "learning_rate": 0.001, |
| "loss": 0.9803, |
| "step": 107800 |
| }, |
| { |
| "epoch": 34.87394957983193, |
| "grad_norm": 1.5679796934127808, |
| "learning_rate": 0.001, |
| "loss": 0.989, |
| "step": 107900 |
| }, |
| { |
| "epoch": 34.906270200387844, |
| "grad_norm": 1.6920912265777588, |
| "learning_rate": 0.001, |
| "loss": 0.9789, |
| "step": 108000 |
| }, |
| { |
| "epoch": 34.93859082094376, |
| "grad_norm": 1.1828668117523193, |
| "learning_rate": 0.001, |
| "loss": 0.9783, |
| "step": 108100 |
| }, |
| { |
| "epoch": 34.97091144149967, |
| "grad_norm": 1.4724658727645874, |
| "learning_rate": 0.001, |
| "loss": 0.9847, |
| "step": 108200 |
| }, |
| { |
| "epoch": 35.003232062055595, |
| "grad_norm": 1.3865439891815186, |
| "learning_rate": 0.001, |
| "loss": 0.9699, |
| "step": 108300 |
| }, |
| { |
| "epoch": 35.03555268261151, |
| "grad_norm": 1.377656102180481, |
| "learning_rate": 0.001, |
| "loss": 0.8899, |
| "step": 108400 |
| }, |
| { |
| "epoch": 35.067873303167424, |
| "grad_norm": 1.2798742055892944, |
| "learning_rate": 0.001, |
| "loss": 0.9105, |
| "step": 108500 |
| }, |
| { |
| "epoch": 35.10019392372334, |
| "grad_norm": 1.2324934005737305, |
| "learning_rate": 0.001, |
| "loss": 0.8821, |
| "step": 108600 |
| }, |
| { |
| "epoch": 35.13251454427925, |
| "grad_norm": 1.3357598781585693, |
| "learning_rate": 0.001, |
| "loss": 0.8884, |
| "step": 108700 |
| }, |
| { |
| "epoch": 35.16483516483517, |
| "grad_norm": 1.5128265619277954, |
| "learning_rate": 0.001, |
| "loss": 0.9217, |
| "step": 108800 |
| }, |
| { |
| "epoch": 35.19715578539108, |
| "grad_norm": 1.328216314315796, |
| "learning_rate": 0.001, |
| "loss": 0.9182, |
| "step": 108900 |
| }, |
| { |
| "epoch": 35.229476405947, |
| "grad_norm": 1.1615173816680908, |
| "learning_rate": 0.001, |
| "loss": 0.8841, |
| "step": 109000 |
| }, |
| { |
| "epoch": 35.26179702650291, |
| "grad_norm": 1.154482364654541, |
| "learning_rate": 0.001, |
| "loss": 0.9154, |
| "step": 109100 |
| }, |
| { |
| "epoch": 35.294117647058826, |
| "grad_norm": 1.2462170124053955, |
| "learning_rate": 0.001, |
| "loss": 0.9134, |
| "step": 109200 |
| }, |
| { |
| "epoch": 35.32643826761474, |
| "grad_norm": 1.3430410623550415, |
| "learning_rate": 0.001, |
| "loss": 0.9442, |
| "step": 109300 |
| }, |
| { |
| "epoch": 35.358758888170655, |
| "grad_norm": 1.0239200592041016, |
| "learning_rate": 0.001, |
| "loss": 0.9379, |
| "step": 109400 |
| }, |
| { |
| "epoch": 35.39107950872657, |
| "grad_norm": 1.3149348497390747, |
| "learning_rate": 0.001, |
| "loss": 0.9362, |
| "step": 109500 |
| }, |
| { |
| "epoch": 35.423400129282484, |
| "grad_norm": 1.2154160737991333, |
| "learning_rate": 0.001, |
| "loss": 0.9421, |
| "step": 109600 |
| }, |
| { |
| "epoch": 35.4557207498384, |
| "grad_norm": 1.1552187204360962, |
| "learning_rate": 0.001, |
| "loss": 0.9314, |
| "step": 109700 |
| }, |
| { |
| "epoch": 35.48804137039431, |
| "grad_norm": 1.5669306516647339, |
| "learning_rate": 0.001, |
| "loss": 0.9438, |
| "step": 109800 |
| }, |
| { |
| "epoch": 35.52036199095023, |
| "grad_norm": 1.1329694986343384, |
| "learning_rate": 0.001, |
| "loss": 0.9255, |
| "step": 109900 |
| }, |
| { |
| "epoch": 35.55268261150614, |
| "grad_norm": 1.1270161867141724, |
| "learning_rate": 0.001, |
| "loss": 0.9267, |
| "step": 110000 |
| }, |
| { |
| "epoch": 35.58500323206206, |
| "grad_norm": 1.4306011199951172, |
| "learning_rate": 0.001, |
| "loss": 0.9465, |
| "step": 110100 |
| }, |
| { |
| "epoch": 35.61732385261797, |
| "grad_norm": 1.260940670967102, |
| "learning_rate": 0.001, |
| "loss": 0.9495, |
| "step": 110200 |
| }, |
| { |
| "epoch": 35.649644473173886, |
| "grad_norm": 1.591579794883728, |
| "learning_rate": 0.001, |
| "loss": 0.9466, |
| "step": 110300 |
| }, |
| { |
| "epoch": 35.6819650937298, |
| "grad_norm": 1.207124948501587, |
| "learning_rate": 0.001, |
| "loss": 0.9448, |
| "step": 110400 |
| }, |
| { |
| "epoch": 35.714285714285715, |
| "grad_norm": 1.2340648174285889, |
| "learning_rate": 0.001, |
| "loss": 0.9448, |
| "step": 110500 |
| }, |
| { |
| "epoch": 35.74660633484163, |
| "grad_norm": 1.2188169956207275, |
| "learning_rate": 0.001, |
| "loss": 0.962, |
| "step": 110600 |
| }, |
| { |
| "epoch": 35.778926955397544, |
| "grad_norm": 1.1369333267211914, |
| "learning_rate": 0.001, |
| "loss": 0.9493, |
| "step": 110700 |
| }, |
| { |
| "epoch": 35.81124757595346, |
| "grad_norm": 1.1521050930023193, |
| "learning_rate": 0.001, |
| "loss": 0.9591, |
| "step": 110800 |
| }, |
| { |
| "epoch": 35.84356819650937, |
| "grad_norm": 1.3265663385391235, |
| "learning_rate": 0.001, |
| "loss": 0.962, |
| "step": 110900 |
| }, |
| { |
| "epoch": 35.87588881706529, |
| "grad_norm": 1.1715490818023682, |
| "learning_rate": 0.001, |
| "loss": 0.9482, |
| "step": 111000 |
| }, |
| { |
| "epoch": 35.9082094376212, |
| "grad_norm": 1.5694321393966675, |
| "learning_rate": 0.001, |
| "loss": 0.9593, |
| "step": 111100 |
| }, |
| { |
| "epoch": 35.94053005817712, |
| "grad_norm": 1.4879381656646729, |
| "learning_rate": 0.001, |
| "loss": 0.9633, |
| "step": 111200 |
| }, |
| { |
| "epoch": 35.97285067873303, |
| "grad_norm": 1.109749674797058, |
| "learning_rate": 0.001, |
| "loss": 0.9738, |
| "step": 111300 |
| }, |
| { |
| "epoch": 36.005171299288946, |
| "grad_norm": 1.32253098487854, |
| "learning_rate": 0.001, |
| "loss": 0.974, |
| "step": 111400 |
| }, |
| { |
| "epoch": 36.03749191984486, |
| "grad_norm": 1.4396809339523315, |
| "learning_rate": 0.001, |
| "loss": 0.8579, |
| "step": 111500 |
| }, |
| { |
| "epoch": 36.069812540400775, |
| "grad_norm": 1.4428057670593262, |
| "learning_rate": 0.001, |
| "loss": 0.8854, |
| "step": 111600 |
| }, |
| { |
| "epoch": 36.10213316095669, |
| "grad_norm": 1.3725905418395996, |
| "learning_rate": 0.001, |
| "loss": 0.9104, |
| "step": 111700 |
| }, |
| { |
| "epoch": 36.134453781512605, |
| "grad_norm": 1.1569488048553467, |
| "learning_rate": 0.001, |
| "loss": 0.8795, |
| "step": 111800 |
| }, |
| { |
| "epoch": 36.16677440206852, |
| "grad_norm": 1.3119192123413086, |
| "learning_rate": 0.001, |
| "loss": 0.8881, |
| "step": 111900 |
| }, |
| { |
| "epoch": 36.199095022624434, |
| "grad_norm": 1.108713984489441, |
| "learning_rate": 0.001, |
| "loss": 0.8895, |
| "step": 112000 |
| }, |
| { |
| "epoch": 36.23141564318035, |
| "grad_norm": 1.3103444576263428, |
| "learning_rate": 0.001, |
| "loss": 0.884, |
| "step": 112100 |
| }, |
| { |
| "epoch": 36.26373626373626, |
| "grad_norm": 1.0280040502548218, |
| "learning_rate": 0.001, |
| "loss": 0.8962, |
| "step": 112200 |
| }, |
| { |
| "epoch": 36.29605688429218, |
| "grad_norm": 1.3514018058776855, |
| "learning_rate": 0.001, |
| "loss": 0.8936, |
| "step": 112300 |
| }, |
| { |
| "epoch": 36.32837750484809, |
| "grad_norm": 1.2298413515090942, |
| "learning_rate": 0.001, |
| "loss": 0.8838, |
| "step": 112400 |
| }, |
| { |
| "epoch": 36.36069812540401, |
| "grad_norm": 1.0672255754470825, |
| "learning_rate": 0.001, |
| "loss": 0.9085, |
| "step": 112500 |
| }, |
| { |
| "epoch": 36.39301874595992, |
| "grad_norm": 1.4790087938308716, |
| "learning_rate": 0.001, |
| "loss": 0.9262, |
| "step": 112600 |
| }, |
| { |
| "epoch": 36.425339366515836, |
| "grad_norm": 1.1427301168441772, |
| "learning_rate": 0.001, |
| "loss": 0.9084, |
| "step": 112700 |
| }, |
| { |
| "epoch": 36.45765998707175, |
| "grad_norm": 1.0541187524795532, |
| "learning_rate": 0.001, |
| "loss": 0.9191, |
| "step": 112800 |
| }, |
| { |
| "epoch": 36.489980607627665, |
| "grad_norm": 1.4663690328598022, |
| "learning_rate": 0.001, |
| "loss": 0.9199, |
| "step": 112900 |
| }, |
| { |
| "epoch": 36.52230122818358, |
| "grad_norm": 20.26365089416504, |
| "learning_rate": 0.001, |
| "loss": 0.943, |
| "step": 113000 |
| }, |
| { |
| "epoch": 36.554621848739494, |
| "grad_norm": 1.370599389076233, |
| "learning_rate": 0.001, |
| "loss": 0.9374, |
| "step": 113100 |
| }, |
| { |
| "epoch": 36.58694246929541, |
| "grad_norm": 1.2954767942428589, |
| "learning_rate": 0.001, |
| "loss": 0.9299, |
| "step": 113200 |
| }, |
| { |
| "epoch": 36.61926308985132, |
| "grad_norm": 1.1597362756729126, |
| "learning_rate": 0.001, |
| "loss": 0.9112, |
| "step": 113300 |
| }, |
| { |
| "epoch": 36.65158371040724, |
| "grad_norm": 1.1877658367156982, |
| "learning_rate": 0.001, |
| "loss": 0.9379, |
| "step": 113400 |
| }, |
| { |
| "epoch": 36.68390433096315, |
| "grad_norm": 1.0057965517044067, |
| "learning_rate": 0.001, |
| "loss": 0.9497, |
| "step": 113500 |
| }, |
| { |
| "epoch": 36.71622495151907, |
| "grad_norm": 1.238929033279419, |
| "learning_rate": 0.001, |
| "loss": 0.9472, |
| "step": 113600 |
| }, |
| { |
| "epoch": 36.74854557207498, |
| "grad_norm": 1.2580220699310303, |
| "learning_rate": 0.001, |
| "loss": 0.9394, |
| "step": 113700 |
| }, |
| { |
| "epoch": 36.780866192630896, |
| "grad_norm": 1.4125406742095947, |
| "learning_rate": 0.001, |
| "loss": 0.9507, |
| "step": 113800 |
| }, |
| { |
| "epoch": 36.81318681318681, |
| "grad_norm": 1.0683133602142334, |
| "learning_rate": 0.001, |
| "loss": 0.9365, |
| "step": 113900 |
| }, |
| { |
| "epoch": 36.845507433742725, |
| "grad_norm": 1.269522786140442, |
| "learning_rate": 0.001, |
| "loss": 0.9508, |
| "step": 114000 |
| }, |
| { |
| "epoch": 36.87782805429864, |
| "grad_norm": 1.5544761419296265, |
| "learning_rate": 0.001, |
| "loss": 0.9311, |
| "step": 114100 |
| }, |
| { |
| "epoch": 36.910148674854554, |
| "grad_norm": 1.7500112056732178, |
| "learning_rate": 0.001, |
| "loss": 0.9612, |
| "step": 114200 |
| }, |
| { |
| "epoch": 36.94246929541047, |
| "grad_norm": 1.395186185836792, |
| "learning_rate": 0.001, |
| "loss": 0.9531, |
| "step": 114300 |
| }, |
| { |
| "epoch": 36.97478991596638, |
| "grad_norm": 1.5248706340789795, |
| "learning_rate": 0.001, |
| "loss": 0.9398, |
| "step": 114400 |
| }, |
| { |
| "epoch": 37.007110536522305, |
| "grad_norm": 1.1058337688446045, |
| "learning_rate": 0.001, |
| "loss": 0.9588, |
| "step": 114500 |
| }, |
| { |
| "epoch": 37.03943115707822, |
| "grad_norm": 1.2561908960342407, |
| "learning_rate": 0.001, |
| "loss": 0.8687, |
| "step": 114600 |
| }, |
| { |
| "epoch": 37.071751777634134, |
| "grad_norm": 1.2578023672103882, |
| "learning_rate": 0.001, |
| "loss": 0.8513, |
| "step": 114700 |
| }, |
| { |
| "epoch": 37.10407239819005, |
| "grad_norm": 1.7073860168457031, |
| "learning_rate": 0.001, |
| "loss": 0.8689, |
| "step": 114800 |
| }, |
| { |
| "epoch": 37.13639301874596, |
| "grad_norm": 1.2335535287857056, |
| "learning_rate": 0.001, |
| "loss": 0.8638, |
| "step": 114900 |
| }, |
| { |
| "epoch": 37.16871363930188, |
| "grad_norm": 1.389021396636963, |
| "learning_rate": 0.001, |
| "loss": 0.8734, |
| "step": 115000 |
| }, |
| { |
| "epoch": 37.20103425985779, |
| "grad_norm": 1.0842416286468506, |
| "learning_rate": 0.001, |
| "loss": 0.8801, |
| "step": 115100 |
| }, |
| { |
| "epoch": 37.23335488041371, |
| "grad_norm": 1.200750708580017, |
| "learning_rate": 0.001, |
| "loss": 0.8774, |
| "step": 115200 |
| }, |
| { |
| "epoch": 37.26567550096962, |
| "grad_norm": 1.2029190063476562, |
| "learning_rate": 0.001, |
| "loss": 0.893, |
| "step": 115300 |
| }, |
| { |
| "epoch": 37.297996121525536, |
| "grad_norm": 1.0285815000534058, |
| "learning_rate": 0.001, |
| "loss": 0.9137, |
| "step": 115400 |
| }, |
| { |
| "epoch": 37.33031674208145, |
| "grad_norm": 1.4431654214859009, |
| "learning_rate": 0.001, |
| "loss": 0.9109, |
| "step": 115500 |
| }, |
| { |
| "epoch": 37.362637362637365, |
| "grad_norm": 1.1851094961166382, |
| "learning_rate": 0.001, |
| "loss": 0.8865, |
| "step": 115600 |
| }, |
| { |
| "epoch": 37.39495798319328, |
| "grad_norm": 1.4423298835754395, |
| "learning_rate": 0.001, |
| "loss": 0.9152, |
| "step": 115700 |
| }, |
| { |
| "epoch": 37.427278603749194, |
| "grad_norm": 1.3550646305084229, |
| "learning_rate": 0.001, |
| "loss": 0.9074, |
| "step": 115800 |
| }, |
| { |
| "epoch": 37.45959922430511, |
| "grad_norm": 1.1404973268508911, |
| "learning_rate": 0.001, |
| "loss": 0.9146, |
| "step": 115900 |
| }, |
| { |
| "epoch": 37.49191984486102, |
| "grad_norm": 1.348008632659912, |
| "learning_rate": 0.001, |
| "loss": 0.9181, |
| "step": 116000 |
| }, |
| { |
| "epoch": 37.52424046541694, |
| "grad_norm": 1.3652067184448242, |
| "learning_rate": 0.001, |
| "loss": 0.9212, |
| "step": 116100 |
| }, |
| { |
| "epoch": 37.55656108597285, |
| "grad_norm": 1.0638148784637451, |
| "learning_rate": 0.001, |
| "loss": 0.9181, |
| "step": 116200 |
| }, |
| { |
| "epoch": 37.58888170652877, |
| "grad_norm": 1.0108212232589722, |
| "learning_rate": 0.001, |
| "loss": 0.9068, |
| "step": 116300 |
| }, |
| { |
| "epoch": 37.62120232708468, |
| "grad_norm": 1.4408948421478271, |
| "learning_rate": 0.001, |
| "loss": 0.9113, |
| "step": 116400 |
| }, |
| { |
| "epoch": 37.653522947640596, |
| "grad_norm": 1.2225804328918457, |
| "learning_rate": 0.001, |
| "loss": 0.9123, |
| "step": 116500 |
| }, |
| { |
| "epoch": 37.68584356819651, |
| "grad_norm": 2.1255993843078613, |
| "learning_rate": 0.001, |
| "loss": 0.9122, |
| "step": 116600 |
| }, |
| { |
| "epoch": 37.718164188752425, |
| "grad_norm": 1.3163739442825317, |
| "learning_rate": 0.001, |
| "loss": 0.9171, |
| "step": 116700 |
| }, |
| { |
| "epoch": 37.75048480930834, |
| "grad_norm": 1.3287923336029053, |
| "learning_rate": 0.001, |
| "loss": 0.9176, |
| "step": 116800 |
| }, |
| { |
| "epoch": 37.782805429864254, |
| "grad_norm": 1.0648704767227173, |
| "learning_rate": 0.001, |
| "loss": 0.9277, |
| "step": 116900 |
| }, |
| { |
| "epoch": 37.81512605042017, |
| "grad_norm": 1.2207541465759277, |
| "learning_rate": 0.001, |
| "loss": 0.9486, |
| "step": 117000 |
| }, |
| { |
| "epoch": 37.84744667097608, |
| "grad_norm": 1.2167179584503174, |
| "learning_rate": 0.001, |
| "loss": 0.9341, |
| "step": 117100 |
| }, |
| { |
| "epoch": 37.879767291532, |
| "grad_norm": 1.1062008142471313, |
| "learning_rate": 0.001, |
| "loss": 0.9155, |
| "step": 117200 |
| }, |
| { |
| "epoch": 37.91208791208791, |
| "grad_norm": 1.135310173034668, |
| "learning_rate": 0.001, |
| "loss": 0.9465, |
| "step": 117300 |
| }, |
| { |
| "epoch": 37.94440853264383, |
| "grad_norm": 1.182563304901123, |
| "learning_rate": 0.001, |
| "loss": 0.9311, |
| "step": 117400 |
| }, |
| { |
| "epoch": 37.97672915319974, |
| "grad_norm": 1.289273977279663, |
| "learning_rate": 0.001, |
| "loss": 0.9395, |
| "step": 117500 |
| }, |
| { |
| "epoch": 38.009049773755656, |
| "grad_norm": 2.1775407791137695, |
| "learning_rate": 0.001, |
| "loss": 0.912, |
| "step": 117600 |
| }, |
| { |
| "epoch": 38.04137039431157, |
| "grad_norm": 1.0216083526611328, |
| "learning_rate": 0.001, |
| "loss": 0.8395, |
| "step": 117700 |
| }, |
| { |
| "epoch": 38.073691014867485, |
| "grad_norm": 1.5370399951934814, |
| "learning_rate": 0.001, |
| "loss": 0.8599, |
| "step": 117800 |
| }, |
| { |
| "epoch": 38.1060116354234, |
| "grad_norm": 1.2524052858352661, |
| "learning_rate": 0.001, |
| "loss": 0.8611, |
| "step": 117900 |
| }, |
| { |
| "epoch": 38.138332255979314, |
| "grad_norm": 1.2202684879302979, |
| "learning_rate": 0.001, |
| "loss": 0.8669, |
| "step": 118000 |
| }, |
| { |
| "epoch": 38.17065287653523, |
| "grad_norm": 1.1983212232589722, |
| "learning_rate": 0.001, |
| "loss": 0.859, |
| "step": 118100 |
| }, |
| { |
| "epoch": 38.20297349709114, |
| "grad_norm": 3.6025729179382324, |
| "learning_rate": 0.001, |
| "loss": 0.8675, |
| "step": 118200 |
| }, |
| { |
| "epoch": 38.23529411764706, |
| "grad_norm": 1.2796443700790405, |
| "learning_rate": 0.001, |
| "loss": 0.8688, |
| "step": 118300 |
| }, |
| { |
| "epoch": 38.26761473820297, |
| "grad_norm": 1.3514477014541626, |
| "learning_rate": 0.001, |
| "loss": 0.8642, |
| "step": 118400 |
| }, |
| { |
| "epoch": 38.29993535875889, |
| "grad_norm": 1.2543727159500122, |
| "learning_rate": 0.001, |
| "loss": 0.892, |
| "step": 118500 |
| }, |
| { |
| "epoch": 38.3322559793148, |
| "grad_norm": 2.0149078369140625, |
| "learning_rate": 0.001, |
| "loss": 0.875, |
| "step": 118600 |
| }, |
| { |
| "epoch": 38.364576599870716, |
| "grad_norm": 1.5488308668136597, |
| "learning_rate": 0.001, |
| "loss": 0.9035, |
| "step": 118700 |
| }, |
| { |
| "epoch": 38.39689722042663, |
| "grad_norm": 1.1685779094696045, |
| "learning_rate": 0.001, |
| "loss": 0.8959, |
| "step": 118800 |
| }, |
| { |
| "epoch": 38.429217840982545, |
| "grad_norm": 1.3017526865005493, |
| "learning_rate": 0.001, |
| "loss": 0.8843, |
| "step": 118900 |
| }, |
| { |
| "epoch": 38.46153846153846, |
| "grad_norm": 1.4337364435195923, |
| "learning_rate": 0.001, |
| "loss": 0.8842, |
| "step": 119000 |
| }, |
| { |
| "epoch": 38.493859082094374, |
| "grad_norm": 1.4739630222320557, |
| "learning_rate": 0.001, |
| "loss": 0.8781, |
| "step": 119100 |
| }, |
| { |
| "epoch": 38.52617970265029, |
| "grad_norm": 1.5166560411453247, |
| "learning_rate": 0.001, |
| "loss": 0.9092, |
| "step": 119200 |
| }, |
| { |
| "epoch": 38.558500323206204, |
| "grad_norm": 1.242997407913208, |
| "learning_rate": 0.001, |
| "loss": 0.8854, |
| "step": 119300 |
| }, |
| { |
| "epoch": 38.59082094376212, |
| "grad_norm": 1.5462573766708374, |
| "learning_rate": 0.001, |
| "loss": 0.9008, |
| "step": 119400 |
| }, |
| { |
| "epoch": 38.62314156431803, |
| "grad_norm": 1.318841576576233, |
| "learning_rate": 0.001, |
| "loss": 0.9186, |
| "step": 119500 |
| }, |
| { |
| "epoch": 38.65546218487395, |
| "grad_norm": 1.2831882238388062, |
| "learning_rate": 0.001, |
| "loss": 0.9134, |
| "step": 119600 |
| }, |
| { |
| "epoch": 38.68778280542986, |
| "grad_norm": 1.7237813472747803, |
| "learning_rate": 0.001, |
| "loss": 0.9139, |
| "step": 119700 |
| }, |
| { |
| "epoch": 38.720103425985776, |
| "grad_norm": 1.2951987981796265, |
| "learning_rate": 0.001, |
| "loss": 0.9191, |
| "step": 119800 |
| }, |
| { |
| "epoch": 38.75242404654169, |
| "grad_norm": 1.2112561464309692, |
| "learning_rate": 0.001, |
| "loss": 0.8893, |
| "step": 119900 |
| }, |
| { |
| "epoch": 38.784744667097605, |
| "grad_norm": 1.510880708694458, |
| "learning_rate": 0.001, |
| "loss": 0.9243, |
| "step": 120000 |
| }, |
| { |
| "epoch": 38.81706528765352, |
| "grad_norm": 1.351163387298584, |
| "learning_rate": 0.001, |
| "loss": 0.9163, |
| "step": 120100 |
| }, |
| { |
| "epoch": 38.849385908209435, |
| "grad_norm": 1.3247283697128296, |
| "learning_rate": 0.001, |
| "loss": 0.9262, |
| "step": 120200 |
| }, |
| { |
| "epoch": 38.88170652876535, |
| "grad_norm": 1.0785566568374634, |
| "learning_rate": 0.001, |
| "loss": 0.9076, |
| "step": 120300 |
| }, |
| { |
| "epoch": 38.914027149321264, |
| "grad_norm": 1.0955142974853516, |
| "learning_rate": 0.001, |
| "loss": 0.9009, |
| "step": 120400 |
| }, |
| { |
| "epoch": 38.94634776987718, |
| "grad_norm": 1.0944831371307373, |
| "learning_rate": 0.001, |
| "loss": 0.9169, |
| "step": 120500 |
| }, |
| { |
| "epoch": 38.97866839043309, |
| "grad_norm": 2.2278664112091064, |
| "learning_rate": 0.001, |
| "loss": 0.9378, |
| "step": 120600 |
| }, |
| { |
| "epoch": 39.010989010989015, |
| "grad_norm": 1.430474042892456, |
| "learning_rate": 0.001, |
| "loss": 0.8951, |
| "step": 120700 |
| }, |
| { |
| "epoch": 39.04330963154493, |
| "grad_norm": 1.299006462097168, |
| "learning_rate": 0.001, |
| "loss": 0.82, |
| "step": 120800 |
| }, |
| { |
| "epoch": 39.075630252100844, |
| "grad_norm": 1.3967899084091187, |
| "learning_rate": 0.001, |
| "loss": 0.8329, |
| "step": 120900 |
| }, |
| { |
| "epoch": 39.10795087265676, |
| "grad_norm": 1.8231886625289917, |
| "learning_rate": 0.001, |
| "loss": 0.8339, |
| "step": 121000 |
| }, |
| { |
| "epoch": 39.14027149321267, |
| "grad_norm": 2.1227855682373047, |
| "learning_rate": 0.001, |
| "loss": 0.8417, |
| "step": 121100 |
| }, |
| { |
| "epoch": 39.17259211376859, |
| "grad_norm": 1.2603408098220825, |
| "learning_rate": 0.001, |
| "loss": 0.8658, |
| "step": 121200 |
| }, |
| { |
| "epoch": 39.2049127343245, |
| "grad_norm": 1.3786275386810303, |
| "learning_rate": 0.001, |
| "loss": 0.8835, |
| "step": 121300 |
| }, |
| { |
| "epoch": 39.237233354880416, |
| "grad_norm": 1.3917183876037598, |
| "learning_rate": 0.001, |
| "loss": 0.8726, |
| "step": 121400 |
| }, |
| { |
| "epoch": 39.26955397543633, |
| "grad_norm": 1.4549587965011597, |
| "learning_rate": 0.001, |
| "loss": 0.8656, |
| "step": 121500 |
| }, |
| { |
| "epoch": 39.301874595992246, |
| "grad_norm": 1.4152559041976929, |
| "learning_rate": 0.001, |
| "loss": 0.8748, |
| "step": 121600 |
| }, |
| { |
| "epoch": 39.33419521654816, |
| "grad_norm": 2.8188393115997314, |
| "learning_rate": 0.001, |
| "loss": 0.8734, |
| "step": 121700 |
| }, |
| { |
| "epoch": 39.366515837104075, |
| "grad_norm": 1.3342585563659668, |
| "learning_rate": 0.001, |
| "loss": 0.847, |
| "step": 121800 |
| }, |
| { |
| "epoch": 39.39883645765999, |
| "grad_norm": 1.2291045188903809, |
| "learning_rate": 0.001, |
| "loss": 0.8779, |
| "step": 121900 |
| }, |
| { |
| "epoch": 39.431157078215904, |
| "grad_norm": 1.130303978919983, |
| "learning_rate": 0.001, |
| "loss": 0.8561, |
| "step": 122000 |
| }, |
| { |
| "epoch": 39.46347769877182, |
| "grad_norm": 1.1709375381469727, |
| "learning_rate": 0.001, |
| "loss": 0.8792, |
| "step": 122100 |
| }, |
| { |
| "epoch": 39.49579831932773, |
| "grad_norm": 1.5080801248550415, |
| "learning_rate": 0.001, |
| "loss": 0.8826, |
| "step": 122200 |
| }, |
| { |
| "epoch": 39.52811893988365, |
| "grad_norm": 1.1423321962356567, |
| "learning_rate": 0.001, |
| "loss": 0.9035, |
| "step": 122300 |
| }, |
| { |
| "epoch": 39.56043956043956, |
| "grad_norm": 1.049439549446106, |
| "learning_rate": 0.001, |
| "loss": 0.8805, |
| "step": 122400 |
| }, |
| { |
| "epoch": 39.59276018099548, |
| "grad_norm": 1.460243821144104, |
| "learning_rate": 0.001, |
| "loss": 0.9, |
| "step": 122500 |
| }, |
| { |
| "epoch": 39.62508080155139, |
| "grad_norm": 1.9269883632659912, |
| "learning_rate": 0.001, |
| "loss": 0.9058, |
| "step": 122600 |
| }, |
| { |
| "epoch": 39.657401422107306, |
| "grad_norm": 1.2040832042694092, |
| "learning_rate": 0.001, |
| "loss": 0.8709, |
| "step": 122700 |
| }, |
| { |
| "epoch": 39.68972204266322, |
| "grad_norm": 1.3963993787765503, |
| "learning_rate": 0.001, |
| "loss": 0.9073, |
| "step": 122800 |
| }, |
| { |
| "epoch": 39.722042663219135, |
| "grad_norm": 1.2941703796386719, |
| "learning_rate": 0.001, |
| "loss": 0.8873, |
| "step": 122900 |
| }, |
| { |
| "epoch": 39.75436328377505, |
| "grad_norm": 1.2239911556243896, |
| "learning_rate": 0.001, |
| "loss": 0.8998, |
| "step": 123000 |
| }, |
| { |
| "epoch": 39.786683904330964, |
| "grad_norm": 1.0870931148529053, |
| "learning_rate": 0.001, |
| "loss": 0.8863, |
| "step": 123100 |
| }, |
| { |
| "epoch": 39.81900452488688, |
| "grad_norm": 1.0407912731170654, |
| "learning_rate": 0.001, |
| "loss": 0.8919, |
| "step": 123200 |
| }, |
| { |
| "epoch": 39.85132514544279, |
| "grad_norm": 1.204813838005066, |
| "learning_rate": 0.001, |
| "loss": 0.8963, |
| "step": 123300 |
| }, |
| { |
| "epoch": 39.88364576599871, |
| "grad_norm": 1.4278241395950317, |
| "learning_rate": 0.001, |
| "loss": 0.9084, |
| "step": 123400 |
| }, |
| { |
| "epoch": 39.91596638655462, |
| "grad_norm": 1.1825990676879883, |
| "learning_rate": 0.001, |
| "loss": 0.9032, |
| "step": 123500 |
| }, |
| { |
| "epoch": 39.94828700711054, |
| "grad_norm": 1.0813344717025757, |
| "learning_rate": 0.001, |
| "loss": 0.9231, |
| "step": 123600 |
| }, |
| { |
| "epoch": 39.98060762766645, |
| "grad_norm": 1.409714937210083, |
| "learning_rate": 0.001, |
| "loss": 0.9177, |
| "step": 123700 |
| }, |
| { |
| "epoch": 40.012928248222366, |
| "grad_norm": 1.474061131477356, |
| "learning_rate": 0.001, |
| "loss": 0.8616, |
| "step": 123800 |
| }, |
| { |
| "epoch": 40.04524886877828, |
| "grad_norm": 1.4782670736312866, |
| "learning_rate": 0.001, |
| "loss": 0.8286, |
| "step": 123900 |
| }, |
| { |
| "epoch": 40.077569489334195, |
| "grad_norm": 1.4716808795928955, |
| "learning_rate": 0.001, |
| "loss": 0.8311, |
| "step": 124000 |
| }, |
| { |
| "epoch": 40.10989010989011, |
| "grad_norm": 1.3071645498275757, |
| "learning_rate": 0.001, |
| "loss": 0.8232, |
| "step": 124100 |
| }, |
| { |
| "epoch": 40.142210730446024, |
| "grad_norm": 1.4670727252960205, |
| "learning_rate": 0.001, |
| "loss": 0.8244, |
| "step": 124200 |
| }, |
| { |
| "epoch": 40.17453135100194, |
| "grad_norm": 1.10783851146698, |
| "learning_rate": 0.001, |
| "loss": 0.8399, |
| "step": 124300 |
| }, |
| { |
| "epoch": 40.20685197155785, |
| "grad_norm": 1.0578640699386597, |
| "learning_rate": 0.001, |
| "loss": 0.8455, |
| "step": 124400 |
| }, |
| { |
| "epoch": 40.23917259211377, |
| "grad_norm": 1.236038327217102, |
| "learning_rate": 0.001, |
| "loss": 0.842, |
| "step": 124500 |
| }, |
| { |
| "epoch": 40.27149321266968, |
| "grad_norm": 1.1498534679412842, |
| "learning_rate": 0.001, |
| "loss": 0.8523, |
| "step": 124600 |
| }, |
| { |
| "epoch": 40.3038138332256, |
| "grad_norm": 1.199738621711731, |
| "learning_rate": 0.001, |
| "loss": 0.8457, |
| "step": 124700 |
| }, |
| { |
| "epoch": 40.33613445378151, |
| "grad_norm": 1.2088783979415894, |
| "learning_rate": 0.001, |
| "loss": 0.8572, |
| "step": 124800 |
| }, |
| { |
| "epoch": 40.368455074337426, |
| "grad_norm": 1.3873686790466309, |
| "learning_rate": 0.001, |
| "loss": 0.8561, |
| "step": 124900 |
| }, |
| { |
| "epoch": 40.40077569489334, |
| "grad_norm": 1.2533921003341675, |
| "learning_rate": 0.001, |
| "loss": 0.8655, |
| "step": 125000 |
| }, |
| { |
| "epoch": 40.433096315449255, |
| "grad_norm": 1.8301284313201904, |
| "learning_rate": 0.001, |
| "loss": 0.8748, |
| "step": 125100 |
| }, |
| { |
| "epoch": 40.46541693600517, |
| "grad_norm": 1.1949044466018677, |
| "learning_rate": 0.001, |
| "loss": 0.859, |
| "step": 125200 |
| }, |
| { |
| "epoch": 40.497737556561084, |
| "grad_norm": 2.156513214111328, |
| "learning_rate": 0.001, |
| "loss": 0.8567, |
| "step": 125300 |
| }, |
| { |
| "epoch": 40.530058177117, |
| "grad_norm": 1.2791069746017456, |
| "learning_rate": 0.001, |
| "loss": 0.8792, |
| "step": 125400 |
| }, |
| { |
| "epoch": 40.56237879767291, |
| "grad_norm": 1.3833438158035278, |
| "learning_rate": 0.001, |
| "loss": 0.8753, |
| "step": 125500 |
| }, |
| { |
| "epoch": 40.59469941822883, |
| "grad_norm": 1.301124095916748, |
| "learning_rate": 0.001, |
| "loss": 0.8718, |
| "step": 125600 |
| }, |
| { |
| "epoch": 40.62702003878474, |
| "grad_norm": 1.559802770614624, |
| "learning_rate": 0.001, |
| "loss": 0.8723, |
| "step": 125700 |
| }, |
| { |
| "epoch": 40.65934065934066, |
| "grad_norm": 1.1438794136047363, |
| "learning_rate": 0.001, |
| "loss": 0.8669, |
| "step": 125800 |
| }, |
| { |
| "epoch": 40.69166127989657, |
| "grad_norm": 1.6619253158569336, |
| "learning_rate": 0.001, |
| "loss": 0.8853, |
| "step": 125900 |
| }, |
| { |
| "epoch": 40.723981900452486, |
| "grad_norm": 1.3953354358673096, |
| "learning_rate": 0.001, |
| "loss": 0.8997, |
| "step": 126000 |
| }, |
| { |
| "epoch": 40.7563025210084, |
| "grad_norm": 1.3383327722549438, |
| "learning_rate": 0.001, |
| "loss": 0.8962, |
| "step": 126100 |
| }, |
| { |
| "epoch": 40.788623141564315, |
| "grad_norm": 1.0945528745651245, |
| "learning_rate": 0.001, |
| "loss": 0.8861, |
| "step": 126200 |
| }, |
| { |
| "epoch": 40.82094376212023, |
| "grad_norm": 1.4700912237167358, |
| "learning_rate": 0.001, |
| "loss": 0.899, |
| "step": 126300 |
| }, |
| { |
| "epoch": 40.853264382676144, |
| "grad_norm": 1.1272400617599487, |
| "learning_rate": 0.001, |
| "loss": 0.9029, |
| "step": 126400 |
| }, |
| { |
| "epoch": 40.88558500323206, |
| "grad_norm": 1.222775936126709, |
| "learning_rate": 0.001, |
| "loss": 0.8966, |
| "step": 126500 |
| }, |
| { |
| "epoch": 40.91790562378797, |
| "grad_norm": 1.9689314365386963, |
| "learning_rate": 0.001, |
| "loss": 0.894, |
| "step": 126600 |
| }, |
| { |
| "epoch": 40.95022624434389, |
| "grad_norm": 1.0816248655319214, |
| "learning_rate": 0.001, |
| "loss": 0.8955, |
| "step": 126700 |
| }, |
| { |
| "epoch": 40.9825468648998, |
| "grad_norm": 7.644120216369629, |
| "learning_rate": 0.001, |
| "loss": 0.8911, |
| "step": 126800 |
| }, |
| { |
| "epoch": 41.014867485455724, |
| "grad_norm": 1.3644909858703613, |
| "learning_rate": 0.001, |
| "loss": 0.8324, |
| "step": 126900 |
| }, |
| { |
| "epoch": 41.04718810601164, |
| "grad_norm": 1.2451497316360474, |
| "learning_rate": 0.001, |
| "loss": 0.8106, |
| "step": 127000 |
| }, |
| { |
| "epoch": 41.07950872656755, |
| "grad_norm": 2.127196788787842, |
| "learning_rate": 0.001, |
| "loss": 0.8169, |
| "step": 127100 |
| }, |
| { |
| "epoch": 41.11182934712347, |
| "grad_norm": 1.1432043313980103, |
| "learning_rate": 0.001, |
| "loss": 0.8184, |
| "step": 127200 |
| }, |
| { |
| "epoch": 41.14414996767938, |
| "grad_norm": 1.2829303741455078, |
| "learning_rate": 0.001, |
| "loss": 0.8124, |
| "step": 127300 |
| }, |
| { |
| "epoch": 41.1764705882353, |
| "grad_norm": 0.8562968373298645, |
| "learning_rate": 0.001, |
| "loss": 0.8184, |
| "step": 127400 |
| }, |
| { |
| "epoch": 41.20879120879121, |
| "grad_norm": 0.9684380888938904, |
| "learning_rate": 0.001, |
| "loss": 0.828, |
| "step": 127500 |
| }, |
| { |
| "epoch": 41.241111829347126, |
| "grad_norm": 1.0784997940063477, |
| "learning_rate": 0.001, |
| "loss": 0.836, |
| "step": 127600 |
| }, |
| { |
| "epoch": 41.27343244990304, |
| "grad_norm": 1.046213984489441, |
| "learning_rate": 0.001, |
| "loss": 0.844, |
| "step": 127700 |
| }, |
| { |
| "epoch": 41.305753070458955, |
| "grad_norm": 0.8843748569488525, |
| "learning_rate": 0.001, |
| "loss": 0.84, |
| "step": 127800 |
| }, |
| { |
| "epoch": 41.33807369101487, |
| "grad_norm": 0.7278721332550049, |
| "learning_rate": 0.001, |
| "loss": 0.8598, |
| "step": 127900 |
| }, |
| { |
| "epoch": 41.370394311570784, |
| "grad_norm": 0.9991313815116882, |
| "learning_rate": 0.001, |
| "loss": 0.8365, |
| "step": 128000 |
| }, |
| { |
| "epoch": 41.4027149321267, |
| "grad_norm": 0.8424844145774841, |
| "learning_rate": 0.001, |
| "loss": 0.8411, |
| "step": 128100 |
| }, |
| { |
| "epoch": 41.43503555268261, |
| "grad_norm": 1.4285770654678345, |
| "learning_rate": 0.001, |
| "loss": 0.8432, |
| "step": 128200 |
| }, |
| { |
| "epoch": 41.46735617323853, |
| "grad_norm": 1.2143181562423706, |
| "learning_rate": 0.001, |
| "loss": 0.8543, |
| "step": 128300 |
| }, |
| { |
| "epoch": 41.49967679379444, |
| "grad_norm": 1.2977038621902466, |
| "learning_rate": 0.001, |
| "loss": 0.8526, |
| "step": 128400 |
| }, |
| { |
| "epoch": 41.53199741435036, |
| "grad_norm": 1.0228110551834106, |
| "learning_rate": 0.001, |
| "loss": 0.8542, |
| "step": 128500 |
| }, |
| { |
| "epoch": 41.56431803490627, |
| "grad_norm": 1.2492506504058838, |
| "learning_rate": 0.001, |
| "loss": 0.8704, |
| "step": 128600 |
| }, |
| { |
| "epoch": 41.596638655462186, |
| "grad_norm": 1.5715211629867554, |
| "learning_rate": 0.001, |
| "loss": 0.8777, |
| "step": 128700 |
| }, |
| { |
| "epoch": 41.6289592760181, |
| "grad_norm": 1.181208610534668, |
| "learning_rate": 0.001, |
| "loss": 0.8637, |
| "step": 128800 |
| }, |
| { |
| "epoch": 41.661279896574015, |
| "grad_norm": 0.986804187297821, |
| "learning_rate": 0.001, |
| "loss": 0.8951, |
| "step": 128900 |
| }, |
| { |
| "epoch": 41.69360051712993, |
| "grad_norm": 1.0209100246429443, |
| "learning_rate": 0.001, |
| "loss": 0.8649, |
| "step": 129000 |
| }, |
| { |
| "epoch": 41.725921137685845, |
| "grad_norm": 1.1363537311553955, |
| "learning_rate": 0.001, |
| "loss": 0.8669, |
| "step": 129100 |
| }, |
| { |
| "epoch": 41.75824175824176, |
| "grad_norm": 1.1821092367172241, |
| "learning_rate": 0.001, |
| "loss": 0.8749, |
| "step": 129200 |
| }, |
| { |
| "epoch": 41.790562378797674, |
| "grad_norm": 0.9846389293670654, |
| "learning_rate": 0.001, |
| "loss": 0.8755, |
| "step": 129300 |
| }, |
| { |
| "epoch": 41.82288299935359, |
| "grad_norm": 1.3120102882385254, |
| "learning_rate": 0.001, |
| "loss": 0.8847, |
| "step": 129400 |
| }, |
| { |
| "epoch": 41.8552036199095, |
| "grad_norm": 1.2054803371429443, |
| "learning_rate": 0.001, |
| "loss": 0.8704, |
| "step": 129500 |
| }, |
| { |
| "epoch": 41.88752424046542, |
| "grad_norm": 1.1038309335708618, |
| "learning_rate": 0.001, |
| "loss": 0.8747, |
| "step": 129600 |
| }, |
| { |
| "epoch": 41.91984486102133, |
| "grad_norm": 3.2323157787323, |
| "learning_rate": 0.001, |
| "loss": 0.8803, |
| "step": 129700 |
| }, |
| { |
| "epoch": 41.95216548157725, |
| "grad_norm": 1.0762603282928467, |
| "learning_rate": 0.001, |
| "loss": 0.8818, |
| "step": 129800 |
| }, |
| { |
| "epoch": 41.98448610213316, |
| "grad_norm": 1.0352551937103271, |
| "learning_rate": 0.001, |
| "loss": 0.8977, |
| "step": 129900 |
| }, |
| { |
| "epoch": 42.016806722689076, |
| "grad_norm": 1.9383422136306763, |
| "learning_rate": 0.001, |
| "loss": 0.8207, |
| "step": 130000 |
| }, |
| { |
| "epoch": 42.04912734324499, |
| "grad_norm": 1.456878900527954, |
| "learning_rate": 0.001, |
| "loss": 0.7909, |
| "step": 130100 |
| }, |
| { |
| "epoch": 42.081447963800905, |
| "grad_norm": 2.3285322189331055, |
| "learning_rate": 0.001, |
| "loss": 0.8194, |
| "step": 130200 |
| }, |
| { |
| "epoch": 42.11376858435682, |
| "grad_norm": 1.8079019784927368, |
| "learning_rate": 0.001, |
| "loss": 0.8073, |
| "step": 130300 |
| }, |
| { |
| "epoch": 42.146089204912734, |
| "grad_norm": 1.3452057838439941, |
| "learning_rate": 0.001, |
| "loss": 0.7993, |
| "step": 130400 |
| }, |
| { |
| "epoch": 42.17840982546865, |
| "grad_norm": 1.915332555770874, |
| "learning_rate": 0.001, |
| "loss": 0.8246, |
| "step": 130500 |
| }, |
| { |
| "epoch": 42.21073044602456, |
| "grad_norm": 1.9111815690994263, |
| "learning_rate": 0.001, |
| "loss": 0.8088, |
| "step": 130600 |
| }, |
| { |
| "epoch": 42.24305106658048, |
| "grad_norm": 1.762703537940979, |
| "learning_rate": 0.001, |
| "loss": 0.8243, |
| "step": 130700 |
| }, |
| { |
| "epoch": 42.27537168713639, |
| "grad_norm": 1.3808751106262207, |
| "learning_rate": 0.001, |
| "loss": 0.8224, |
| "step": 130800 |
| }, |
| { |
| "epoch": 42.30769230769231, |
| "grad_norm": 1.7224359512329102, |
| "learning_rate": 0.001, |
| "loss": 0.8173, |
| "step": 130900 |
| }, |
| { |
| "epoch": 42.34001292824822, |
| "grad_norm": 2.0048210620880127, |
| "learning_rate": 0.001, |
| "loss": 0.833, |
| "step": 131000 |
| }, |
| { |
| "epoch": 42.372333548804136, |
| "grad_norm": 1.6992894411087036, |
| "learning_rate": 0.001, |
| "loss": 0.8386, |
| "step": 131100 |
| }, |
| { |
| "epoch": 42.40465416936005, |
| "grad_norm": 1.8275269269943237, |
| "learning_rate": 0.001, |
| "loss": 0.8428, |
| "step": 131200 |
| }, |
| { |
| "epoch": 42.436974789915965, |
| "grad_norm": 2.066981554031372, |
| "learning_rate": 0.001, |
| "loss": 0.818, |
| "step": 131300 |
| }, |
| { |
| "epoch": 42.46929541047188, |
| "grad_norm": 1.7945083379745483, |
| "learning_rate": 0.001, |
| "loss": 0.8342, |
| "step": 131400 |
| }, |
| { |
| "epoch": 42.501616031027794, |
| "grad_norm": 2.0542707443237305, |
| "learning_rate": 0.001, |
| "loss": 0.8374, |
| "step": 131500 |
| }, |
| { |
| "epoch": 42.53393665158371, |
| "grad_norm": 1.5740739107131958, |
| "learning_rate": 0.001, |
| "loss": 0.8487, |
| "step": 131600 |
| }, |
| { |
| "epoch": 42.56625727213962, |
| "grad_norm": 2.1072022914886475, |
| "learning_rate": 0.001, |
| "loss": 0.8606, |
| "step": 131700 |
| }, |
| { |
| "epoch": 42.59857789269554, |
| "grad_norm": 2.105353593826294, |
| "learning_rate": 0.001, |
| "loss": 0.8614, |
| "step": 131800 |
| }, |
| { |
| "epoch": 42.63089851325145, |
| "grad_norm": 1.8877348899841309, |
| "learning_rate": 0.001, |
| "loss": 0.8343, |
| "step": 131900 |
| }, |
| { |
| "epoch": 42.66321913380737, |
| "grad_norm": 1.740787148475647, |
| "learning_rate": 0.001, |
| "loss": 0.8556, |
| "step": 132000 |
| }, |
| { |
| "epoch": 42.69553975436328, |
| "grad_norm": 1.4763180017471313, |
| "learning_rate": 0.001, |
| "loss": 0.8485, |
| "step": 132100 |
| }, |
| { |
| "epoch": 42.727860374919196, |
| "grad_norm": 1.9027022123336792, |
| "learning_rate": 0.001, |
| "loss": 0.8436, |
| "step": 132200 |
| }, |
| { |
| "epoch": 42.76018099547511, |
| "grad_norm": 1.3983405828475952, |
| "learning_rate": 0.001, |
| "loss": 0.8714, |
| "step": 132300 |
| }, |
| { |
| "epoch": 42.792501616031025, |
| "grad_norm": 2.2473886013031006, |
| "learning_rate": 0.001, |
| "loss": 0.8717, |
| "step": 132400 |
| }, |
| { |
| "epoch": 42.82482223658694, |
| "grad_norm": 1.3791003227233887, |
| "learning_rate": 0.001, |
| "loss": 0.8692, |
| "step": 132500 |
| }, |
| { |
| "epoch": 42.857142857142854, |
| "grad_norm": 1.7160332202911377, |
| "learning_rate": 0.001, |
| "loss": 0.8712, |
| "step": 132600 |
| }, |
| { |
| "epoch": 42.88946347769877, |
| "grad_norm": 1.4543373584747314, |
| "learning_rate": 0.001, |
| "loss": 0.8719, |
| "step": 132700 |
| }, |
| { |
| "epoch": 42.92178409825468, |
| "grad_norm": 1.5968679189682007, |
| "learning_rate": 0.001, |
| "loss": 0.8879, |
| "step": 132800 |
| }, |
| { |
| "epoch": 42.9541047188106, |
| "grad_norm": 1.9997758865356445, |
| "learning_rate": 0.001, |
| "loss": 0.8759, |
| "step": 132900 |
| }, |
| { |
| "epoch": 42.98642533936652, |
| "grad_norm": 1.7506520748138428, |
| "learning_rate": 0.001, |
| "loss": 0.8855, |
| "step": 133000 |
| }, |
| { |
| "epoch": 43.018745959922434, |
| "grad_norm": 1.527773380279541, |
| "learning_rate": 0.001, |
| "loss": 0.8467, |
| "step": 133100 |
| }, |
| { |
| "epoch": 43.05106658047835, |
| "grad_norm": 1.4628387689590454, |
| "learning_rate": 0.001, |
| "loss": 0.7821, |
| "step": 133200 |
| }, |
| { |
| "epoch": 43.08338720103426, |
| "grad_norm": 1.3667227029800415, |
| "learning_rate": 0.001, |
| "loss": 0.7919, |
| "step": 133300 |
| }, |
| { |
| "epoch": 43.11570782159018, |
| "grad_norm": 1.2891846895217896, |
| "learning_rate": 0.001, |
| "loss": 0.8055, |
| "step": 133400 |
| }, |
| { |
| "epoch": 43.14802844214609, |
| "grad_norm": 1.349412441253662, |
| "learning_rate": 0.001, |
| "loss": 0.7955, |
| "step": 133500 |
| }, |
| { |
| "epoch": 43.18034906270201, |
| "grad_norm": 1.5655378103256226, |
| "learning_rate": 0.001, |
| "loss": 0.7913, |
| "step": 133600 |
| }, |
| { |
| "epoch": 43.21266968325792, |
| "grad_norm": 1.305897831916809, |
| "learning_rate": 0.001, |
| "loss": 0.8004, |
| "step": 133700 |
| }, |
| { |
| "epoch": 43.244990303813836, |
| "grad_norm": 1.294546365737915, |
| "learning_rate": 0.001, |
| "loss": 0.8053, |
| "step": 133800 |
| }, |
| { |
| "epoch": 43.27731092436975, |
| "grad_norm": 1.4070929288864136, |
| "learning_rate": 0.001, |
| "loss": 0.8101, |
| "step": 133900 |
| }, |
| { |
| "epoch": 43.309631544925665, |
| "grad_norm": 1.0786323547363281, |
| "learning_rate": 0.001, |
| "loss": 0.805, |
| "step": 134000 |
| }, |
| { |
| "epoch": 43.34195216548158, |
| "grad_norm": 1.484237551689148, |
| "learning_rate": 0.001, |
| "loss": 0.8018, |
| "step": 134100 |
| }, |
| { |
| "epoch": 43.374272786037494, |
| "grad_norm": 1.4975357055664062, |
| "learning_rate": 0.001, |
| "loss": 0.8563, |
| "step": 134200 |
| }, |
| { |
| "epoch": 43.40659340659341, |
| "grad_norm": 1.1751554012298584, |
| "learning_rate": 0.001, |
| "loss": 0.8259, |
| "step": 134300 |
| }, |
| { |
| "epoch": 43.43891402714932, |
| "grad_norm": 1.3102048635482788, |
| "learning_rate": 0.001, |
| "loss": 0.796, |
| "step": 134400 |
| }, |
| { |
| "epoch": 43.47123464770524, |
| "grad_norm": 1.3700244426727295, |
| "learning_rate": 0.001, |
| "loss": 0.8382, |
| "step": 134500 |
| }, |
| { |
| "epoch": 43.50355526826115, |
| "grad_norm": 1.8419667482376099, |
| "learning_rate": 0.001, |
| "loss": 0.8244, |
| "step": 134600 |
| }, |
| { |
| "epoch": 43.53587588881707, |
| "grad_norm": 1.279415249824524, |
| "learning_rate": 0.001, |
| "loss": 0.8414, |
| "step": 134700 |
| }, |
| { |
| "epoch": 43.56819650937298, |
| "grad_norm": 1.1568162441253662, |
| "learning_rate": 0.001, |
| "loss": 0.8424, |
| "step": 134800 |
| }, |
| { |
| "epoch": 43.600517129928896, |
| "grad_norm": 1.2953382730484009, |
| "learning_rate": 0.001, |
| "loss": 0.8303, |
| "step": 134900 |
| }, |
| { |
| "epoch": 43.63283775048481, |
| "grad_norm": 1.2967185974121094, |
| "learning_rate": 0.001, |
| "loss": 0.8394, |
| "step": 135000 |
| }, |
| { |
| "epoch": 43.665158371040725, |
| "grad_norm": 1.3641395568847656, |
| "learning_rate": 0.001, |
| "loss": 0.8408, |
| "step": 135100 |
| }, |
| { |
| "epoch": 43.69747899159664, |
| "grad_norm": 1.2159711122512817, |
| "learning_rate": 0.001, |
| "loss": 0.8462, |
| "step": 135200 |
| }, |
| { |
| "epoch": 43.729799612152554, |
| "grad_norm": 1.5351017713546753, |
| "learning_rate": 0.001, |
| "loss": 0.8459, |
| "step": 135300 |
| }, |
| { |
| "epoch": 43.76212023270847, |
| "grad_norm": 1.5198405981063843, |
| "learning_rate": 0.001, |
| "loss": 0.8499, |
| "step": 135400 |
| }, |
| { |
| "epoch": 43.79444085326438, |
| "grad_norm": 1.2137715816497803, |
| "learning_rate": 0.001, |
| "loss": 0.8526, |
| "step": 135500 |
| }, |
| { |
| "epoch": 43.8267614738203, |
| "grad_norm": 1.2001396417617798, |
| "learning_rate": 0.001, |
| "loss": 0.8715, |
| "step": 135600 |
| }, |
| { |
| "epoch": 43.85908209437621, |
| "grad_norm": 1.7482579946517944, |
| "learning_rate": 0.001, |
| "loss": 0.8561, |
| "step": 135700 |
| }, |
| { |
| "epoch": 43.89140271493213, |
| "grad_norm": 1.476863980293274, |
| "learning_rate": 0.001, |
| "loss": 0.86, |
| "step": 135800 |
| }, |
| { |
| "epoch": 43.92372333548804, |
| "grad_norm": 1.6021820306777954, |
| "learning_rate": 0.001, |
| "loss": 0.881, |
| "step": 135900 |
| }, |
| { |
| "epoch": 43.956043956043956, |
| "grad_norm": 1.5128792524337769, |
| "learning_rate": 0.001, |
| "loss": 0.873, |
| "step": 136000 |
| }, |
| { |
| "epoch": 43.98836457659987, |
| "grad_norm": 1.4070549011230469, |
| "learning_rate": 0.001, |
| "loss": 0.8732, |
| "step": 136100 |
| }, |
| { |
| "epoch": 44.020685197155785, |
| "grad_norm": 1.2981127500534058, |
| "learning_rate": 0.001, |
| "loss": 0.8268, |
| "step": 136200 |
| }, |
| { |
| "epoch": 44.0530058177117, |
| "grad_norm": 1.4790092706680298, |
| "learning_rate": 0.001, |
| "loss": 0.7825, |
| "step": 136300 |
| }, |
| { |
| "epoch": 44.085326438267614, |
| "grad_norm": 1.3770111799240112, |
| "learning_rate": 0.001, |
| "loss": 0.7824, |
| "step": 136400 |
| }, |
| { |
| "epoch": 44.11764705882353, |
| "grad_norm": 9.711091041564941, |
| "learning_rate": 0.001, |
| "loss": 0.7852, |
| "step": 136500 |
| }, |
| { |
| "epoch": 44.14996767937944, |
| "grad_norm": 1.4254145622253418, |
| "learning_rate": 0.001, |
| "loss": 0.7864, |
| "step": 136600 |
| }, |
| { |
| "epoch": 44.18228829993536, |
| "grad_norm": 1.2787805795669556, |
| "learning_rate": 0.001, |
| "loss": 0.79, |
| "step": 136700 |
| }, |
| { |
| "epoch": 44.21460892049127, |
| "grad_norm": 1.3041934967041016, |
| "learning_rate": 0.001, |
| "loss": 0.8038, |
| "step": 136800 |
| }, |
| { |
| "epoch": 44.24692954104719, |
| "grad_norm": 1.2634992599487305, |
| "learning_rate": 0.001, |
| "loss": 0.8016, |
| "step": 136900 |
| }, |
| { |
| "epoch": 44.2792501616031, |
| "grad_norm": 1.237308144569397, |
| "learning_rate": 0.001, |
| "loss": 0.8099, |
| "step": 137000 |
| }, |
| { |
| "epoch": 44.311570782159016, |
| "grad_norm": 1.2396942377090454, |
| "learning_rate": 0.001, |
| "loss": 0.8169, |
| "step": 137100 |
| }, |
| { |
| "epoch": 44.34389140271493, |
| "grad_norm": 1.2848567962646484, |
| "learning_rate": 0.001, |
| "loss": 0.8059, |
| "step": 137200 |
| }, |
| { |
| "epoch": 44.376212023270845, |
| "grad_norm": 1.5477381944656372, |
| "learning_rate": 0.001, |
| "loss": 0.8163, |
| "step": 137300 |
| }, |
| { |
| "epoch": 44.40853264382676, |
| "grad_norm": 1.2960271835327148, |
| "learning_rate": 0.001, |
| "loss": 0.8032, |
| "step": 137400 |
| }, |
| { |
| "epoch": 44.440853264382675, |
| "grad_norm": 1.273160696029663, |
| "learning_rate": 0.001, |
| "loss": 0.8081, |
| "step": 137500 |
| }, |
| { |
| "epoch": 44.47317388493859, |
| "grad_norm": 1.191059947013855, |
| "learning_rate": 0.001, |
| "loss": 0.8189, |
| "step": 137600 |
| }, |
| { |
| "epoch": 44.505494505494504, |
| "grad_norm": 1.1510519981384277, |
| "learning_rate": 0.001, |
| "loss": 0.807, |
| "step": 137700 |
| }, |
| { |
| "epoch": 44.53781512605042, |
| "grad_norm": 1.358547329902649, |
| "learning_rate": 0.001, |
| "loss": 0.8193, |
| "step": 137800 |
| }, |
| { |
| "epoch": 44.57013574660633, |
| "grad_norm": 1.4065147638320923, |
| "learning_rate": 0.001, |
| "loss": 0.8166, |
| "step": 137900 |
| }, |
| { |
| "epoch": 44.60245636716225, |
| "grad_norm": 1.5476964712142944, |
| "learning_rate": 0.001, |
| "loss": 0.8333, |
| "step": 138000 |
| }, |
| { |
| "epoch": 44.63477698771816, |
| "grad_norm": 1.3824409246444702, |
| "learning_rate": 0.001, |
| "loss": 0.8273, |
| "step": 138100 |
| }, |
| { |
| "epoch": 44.66709760827408, |
| "grad_norm": 1.1956334114074707, |
| "learning_rate": 0.001, |
| "loss": 0.8241, |
| "step": 138200 |
| }, |
| { |
| "epoch": 44.69941822882999, |
| "grad_norm": 1.1970595121383667, |
| "learning_rate": 0.001, |
| "loss": 0.8477, |
| "step": 138300 |
| }, |
| { |
| "epoch": 44.731738849385906, |
| "grad_norm": 1.162197470664978, |
| "learning_rate": 0.001, |
| "loss": 0.8377, |
| "step": 138400 |
| }, |
| { |
| "epoch": 44.76405946994182, |
| "grad_norm": 1.2741224765777588, |
| "learning_rate": 0.001, |
| "loss": 0.8509, |
| "step": 138500 |
| }, |
| { |
| "epoch": 44.796380090497735, |
| "grad_norm": 1.1326532363891602, |
| "learning_rate": 0.001, |
| "loss": 0.8413, |
| "step": 138600 |
| }, |
| { |
| "epoch": 44.82870071105365, |
| "grad_norm": 1.5357139110565186, |
| "learning_rate": 0.001, |
| "loss": 0.8304, |
| "step": 138700 |
| }, |
| { |
| "epoch": 44.861021331609564, |
| "grad_norm": 1.355280876159668, |
| "learning_rate": 0.001, |
| "loss": 0.8572, |
| "step": 138800 |
| }, |
| { |
| "epoch": 44.89334195216548, |
| "grad_norm": 1.6483911275863647, |
| "learning_rate": 0.001, |
| "loss": 0.8489, |
| "step": 138900 |
| }, |
| { |
| "epoch": 44.92566257272139, |
| "grad_norm": 1.3355098962783813, |
| "learning_rate": 0.001, |
| "loss": 0.8559, |
| "step": 139000 |
| }, |
| { |
| "epoch": 44.95798319327731, |
| "grad_norm": 1.3055047988891602, |
| "learning_rate": 0.001, |
| "loss": 0.8465, |
| "step": 139100 |
| }, |
| { |
| "epoch": 44.99030381383322, |
| "grad_norm": 1.4414533376693726, |
| "learning_rate": 0.001, |
| "loss": 0.8137, |
| "step": 139200 |
| }, |
| { |
| "epoch": 45.022624434389144, |
| "grad_norm": 1.346257209777832, |
| "learning_rate": 0.001, |
| "loss": 0.8097, |
| "step": 139300 |
| }, |
| { |
| "epoch": 45.05494505494506, |
| "grad_norm": 1.8192956447601318, |
| "learning_rate": 0.001, |
| "loss": 0.7631, |
| "step": 139400 |
| }, |
| { |
| "epoch": 45.08726567550097, |
| "grad_norm": 1.2960363626480103, |
| "learning_rate": 0.001, |
| "loss": 0.7741, |
| "step": 139500 |
| }, |
| { |
| "epoch": 45.11958629605689, |
| "grad_norm": 2.104506731033325, |
| "learning_rate": 0.001, |
| "loss": 0.7705, |
| "step": 139600 |
| }, |
| { |
| "epoch": 45.1519069166128, |
| "grad_norm": 1.1508853435516357, |
| "learning_rate": 0.001, |
| "loss": 0.7728, |
| "step": 139700 |
| }, |
| { |
| "epoch": 45.18422753716872, |
| "grad_norm": 1.65923011302948, |
| "learning_rate": 0.001, |
| "loss": 0.7694, |
| "step": 139800 |
| }, |
| { |
| "epoch": 45.21654815772463, |
| "grad_norm": 1.5621492862701416, |
| "learning_rate": 0.001, |
| "loss": 0.7782, |
| "step": 139900 |
| }, |
| { |
| "epoch": 45.248868778280546, |
| "grad_norm": 1.388525366783142, |
| "learning_rate": 0.001, |
| "loss": 0.7728, |
| "step": 140000 |
| }, |
| { |
| "epoch": 45.28118939883646, |
| "grad_norm": 1.4508206844329834, |
| "learning_rate": 0.001, |
| "loss": 0.7853, |
| "step": 140100 |
| }, |
| { |
| "epoch": 45.313510019392375, |
| "grad_norm": 1.1052316427230835, |
| "learning_rate": 0.001, |
| "loss": 0.7995, |
| "step": 140200 |
| }, |
| { |
| "epoch": 45.34583063994829, |
| "grad_norm": 1.0586109161376953, |
| "learning_rate": 0.001, |
| "loss": 0.7858, |
| "step": 140300 |
| }, |
| { |
| "epoch": 45.378151260504204, |
| "grad_norm": 1.1809642314910889, |
| "learning_rate": 0.001, |
| "loss": 0.7982, |
| "step": 140400 |
| }, |
| { |
| "epoch": 45.41047188106012, |
| "grad_norm": 1.301119327545166, |
| "learning_rate": 0.001, |
| "loss": 0.7995, |
| "step": 140500 |
| }, |
| { |
| "epoch": 45.44279250161603, |
| "grad_norm": 1.2782262563705444, |
| "learning_rate": 0.001, |
| "loss": 0.8058, |
| "step": 140600 |
| }, |
| { |
| "epoch": 45.47511312217195, |
| "grad_norm": 1.2796794176101685, |
| "learning_rate": 0.001, |
| "loss": 0.7985, |
| "step": 140700 |
| }, |
| { |
| "epoch": 45.50743374272786, |
| "grad_norm": 1.1781928539276123, |
| "learning_rate": 0.001, |
| "loss": 0.7911, |
| "step": 140800 |
| }, |
| { |
| "epoch": 45.53975436328378, |
| "grad_norm": 1.1269559860229492, |
| "learning_rate": 0.001, |
| "loss": 0.8117, |
| "step": 140900 |
| }, |
| { |
| "epoch": 45.57207498383969, |
| "grad_norm": 1.3490080833435059, |
| "learning_rate": 0.001, |
| "loss": 0.793, |
| "step": 141000 |
| }, |
| { |
| "epoch": 45.604395604395606, |
| "grad_norm": 1.1441093683242798, |
| "learning_rate": 0.001, |
| "loss": 0.8239, |
| "step": 141100 |
| }, |
| { |
| "epoch": 45.63671622495152, |
| "grad_norm": 1.4738757610321045, |
| "learning_rate": 0.001, |
| "loss": 0.8282, |
| "step": 141200 |
| }, |
| { |
| "epoch": 45.669036845507435, |
| "grad_norm": 1.5204062461853027, |
| "learning_rate": 0.001, |
| "loss": 0.8204, |
| "step": 141300 |
| }, |
| { |
| "epoch": 45.70135746606335, |
| "grad_norm": 1.3744752407073975, |
| "learning_rate": 0.001, |
| "loss": 0.8146, |
| "step": 141400 |
| }, |
| { |
| "epoch": 45.733678086619264, |
| "grad_norm": 1.4345048666000366, |
| "learning_rate": 0.001, |
| "loss": 0.8251, |
| "step": 141500 |
| }, |
| { |
| "epoch": 45.76599870717518, |
| "grad_norm": 1.3285446166992188, |
| "learning_rate": 0.001, |
| "loss": 0.8376, |
| "step": 141600 |
| }, |
| { |
| "epoch": 45.79831932773109, |
| "grad_norm": 1.2864034175872803, |
| "learning_rate": 0.001, |
| "loss": 0.84, |
| "step": 141700 |
| }, |
| { |
| "epoch": 45.83063994828701, |
| "grad_norm": 1.5362489223480225, |
| "learning_rate": 0.001, |
| "loss": 0.8346, |
| "step": 141800 |
| }, |
| { |
| "epoch": 45.86296056884292, |
| "grad_norm": 1.177847981452942, |
| "learning_rate": 0.001, |
| "loss": 0.8402, |
| "step": 141900 |
| }, |
| { |
| "epoch": 45.89528118939884, |
| "grad_norm": 1.168405294418335, |
| "learning_rate": 0.001, |
| "loss": 0.8333, |
| "step": 142000 |
| }, |
| { |
| "epoch": 45.92760180995475, |
| "grad_norm": 1.4306167364120483, |
| "learning_rate": 0.001, |
| "loss": 0.8347, |
| "step": 142100 |
| }, |
| { |
| "epoch": 45.959922430510666, |
| "grad_norm": 1.3686728477478027, |
| "learning_rate": 0.001, |
| "loss": 0.8238, |
| "step": 142200 |
| }, |
| { |
| "epoch": 45.99224305106658, |
| "grad_norm": 1.2857394218444824, |
| "learning_rate": 0.001, |
| "loss": 0.8655, |
| "step": 142300 |
| }, |
| { |
| "epoch": 46.024563671622495, |
| "grad_norm": 1.2607849836349487, |
| "learning_rate": 0.001, |
| "loss": 0.7783, |
| "step": 142400 |
| }, |
| { |
| "epoch": 46.05688429217841, |
| "grad_norm": 1.5633095502853394, |
| "learning_rate": 0.001, |
| "loss": 0.7446, |
| "step": 142500 |
| }, |
| { |
| "epoch": 46.089204912734324, |
| "grad_norm": 4.660933494567871, |
| "learning_rate": 0.001, |
| "loss": 0.7425, |
| "step": 142600 |
| }, |
| { |
| "epoch": 46.12152553329024, |
| "grad_norm": 1.7380452156066895, |
| "learning_rate": 0.001, |
| "loss": 0.7674, |
| "step": 142700 |
| }, |
| { |
| "epoch": 46.15384615384615, |
| "grad_norm": 1.2123302221298218, |
| "learning_rate": 0.001, |
| "loss": 0.775, |
| "step": 142800 |
| }, |
| { |
| "epoch": 46.18616677440207, |
| "grad_norm": 1.2319421768188477, |
| "learning_rate": 0.001, |
| "loss": 0.7753, |
| "step": 142900 |
| }, |
| { |
| "epoch": 46.21848739495798, |
| "grad_norm": 1.5815069675445557, |
| "learning_rate": 0.001, |
| "loss": 0.7775, |
| "step": 143000 |
| }, |
| { |
| "epoch": 46.2508080155139, |
| "grad_norm": 1.0354673862457275, |
| "learning_rate": 0.001, |
| "loss": 0.7743, |
| "step": 143100 |
| }, |
| { |
| "epoch": 46.28312863606981, |
| "grad_norm": 1.3230652809143066, |
| "learning_rate": 0.001, |
| "loss": 0.7778, |
| "step": 143200 |
| }, |
| { |
| "epoch": 46.315449256625726, |
| "grad_norm": 1.2702628374099731, |
| "learning_rate": 0.001, |
| "loss": 0.7708, |
| "step": 143300 |
| }, |
| { |
| "epoch": 46.34776987718164, |
| "grad_norm": 1.2347975969314575, |
| "learning_rate": 0.001, |
| "loss": 0.7792, |
| "step": 143400 |
| }, |
| { |
| "epoch": 46.380090497737555, |
| "grad_norm": 1.1771498918533325, |
| "learning_rate": 0.001, |
| "loss": 0.7826, |
| "step": 143500 |
| }, |
| { |
| "epoch": 46.41241111829347, |
| "grad_norm": 1.1414570808410645, |
| "learning_rate": 0.001, |
| "loss": 0.7848, |
| "step": 143600 |
| }, |
| { |
| "epoch": 46.444731738849384, |
| "grad_norm": 1.1657198667526245, |
| "learning_rate": 0.001, |
| "loss": 0.7975, |
| "step": 143700 |
| }, |
| { |
| "epoch": 46.4770523594053, |
| "grad_norm": 1.267530083656311, |
| "learning_rate": 0.001, |
| "loss": 0.8029, |
| "step": 143800 |
| }, |
| { |
| "epoch": 46.50937297996121, |
| "grad_norm": 1.1892708539962769, |
| "learning_rate": 0.001, |
| "loss": 0.798, |
| "step": 143900 |
| }, |
| { |
| "epoch": 46.54169360051713, |
| "grad_norm": 1.89950692653656, |
| "learning_rate": 0.001, |
| "loss": 0.7923, |
| "step": 144000 |
| }, |
| { |
| "epoch": 46.57401422107304, |
| "grad_norm": 1.305152177810669, |
| "learning_rate": 0.001, |
| "loss": 0.8014, |
| "step": 144100 |
| }, |
| { |
| "epoch": 46.60633484162896, |
| "grad_norm": 1.5587385892868042, |
| "learning_rate": 0.001, |
| "loss": 0.8099, |
| "step": 144200 |
| }, |
| { |
| "epoch": 46.63865546218487, |
| "grad_norm": 1.152372121810913, |
| "learning_rate": 0.001, |
| "loss": 0.8062, |
| "step": 144300 |
| }, |
| { |
| "epoch": 46.670976082740786, |
| "grad_norm": 1.1674656867980957, |
| "learning_rate": 0.001, |
| "loss": 0.8167, |
| "step": 144400 |
| }, |
| { |
| "epoch": 46.7032967032967, |
| "grad_norm": 1.259466290473938, |
| "learning_rate": 0.001, |
| "loss": 0.8222, |
| "step": 144500 |
| }, |
| { |
| "epoch": 46.735617323852615, |
| "grad_norm": 1.4729558229446411, |
| "learning_rate": 0.001, |
| "loss": 0.8043, |
| "step": 144600 |
| }, |
| { |
| "epoch": 46.76793794440853, |
| "grad_norm": 1.0603488683700562, |
| "learning_rate": 0.001, |
| "loss": 0.8125, |
| "step": 144700 |
| }, |
| { |
| "epoch": 46.800258564964444, |
| "grad_norm": 3.8644630908966064, |
| "learning_rate": 0.001, |
| "loss": 0.8291, |
| "step": 144800 |
| }, |
| { |
| "epoch": 46.83257918552036, |
| "grad_norm": 1.261315107345581, |
| "learning_rate": 0.001, |
| "loss": 0.8183, |
| "step": 144900 |
| }, |
| { |
| "epoch": 46.864899806076274, |
| "grad_norm": 1.1630548238754272, |
| "learning_rate": 0.001, |
| "loss": 0.8281, |
| "step": 145000 |
| }, |
| { |
| "epoch": 46.89722042663219, |
| "grad_norm": 1.2682583332061768, |
| "learning_rate": 0.001, |
| "loss": 0.8263, |
| "step": 145100 |
| }, |
| { |
| "epoch": 46.9295410471881, |
| "grad_norm": 1.2569491863250732, |
| "learning_rate": 0.001, |
| "loss": 0.8012, |
| "step": 145200 |
| }, |
| { |
| "epoch": 46.96186166774402, |
| "grad_norm": 1.174126148223877, |
| "learning_rate": 0.001, |
| "loss": 0.8269, |
| "step": 145300 |
| }, |
| { |
| "epoch": 46.99418228829994, |
| "grad_norm": 1.253170371055603, |
| "learning_rate": 0.001, |
| "loss": 0.826, |
| "step": 145400 |
| }, |
| { |
| "epoch": 47.02650290885585, |
| "grad_norm": 1.2278573513031006, |
| "learning_rate": 0.001, |
| "loss": 0.7577, |
| "step": 145500 |
| }, |
| { |
| "epoch": 47.05882352941177, |
| "grad_norm": 1.2207344770431519, |
| "learning_rate": 0.001, |
| "loss": 0.7376, |
| "step": 145600 |
| }, |
| { |
| "epoch": 47.09114414996768, |
| "grad_norm": 1.3616275787353516, |
| "learning_rate": 0.001, |
| "loss": 0.745, |
| "step": 145700 |
| }, |
| { |
| "epoch": 47.1234647705236, |
| "grad_norm": 1.287418007850647, |
| "learning_rate": 0.001, |
| "loss": 0.7458, |
| "step": 145800 |
| }, |
| { |
| "epoch": 47.15578539107951, |
| "grad_norm": 1.1178083419799805, |
| "learning_rate": 0.001, |
| "loss": 0.7562, |
| "step": 145900 |
| }, |
| { |
| "epoch": 47.188106011635426, |
| "grad_norm": 1.5619690418243408, |
| "learning_rate": 0.001, |
| "loss": 0.7389, |
| "step": 146000 |
| }, |
| { |
| "epoch": 47.22042663219134, |
| "grad_norm": 1.4271554946899414, |
| "learning_rate": 0.001, |
| "loss": 0.7723, |
| "step": 146100 |
| }, |
| { |
| "epoch": 47.252747252747255, |
| "grad_norm": 1.7732999324798584, |
| "learning_rate": 0.001, |
| "loss": 0.7538, |
| "step": 146200 |
| }, |
| { |
| "epoch": 47.28506787330317, |
| "grad_norm": 1.4282053709030151, |
| "learning_rate": 0.001, |
| "loss": 0.7821, |
| "step": 146300 |
| }, |
| { |
| "epoch": 47.317388493859085, |
| "grad_norm": 1.228203535079956, |
| "learning_rate": 0.001, |
| "loss": 0.7722, |
| "step": 146400 |
| }, |
| { |
| "epoch": 47.349709114415, |
| "grad_norm": 1.3013367652893066, |
| "learning_rate": 0.001, |
| "loss": 0.7652, |
| "step": 146500 |
| }, |
| { |
| "epoch": 47.382029734970914, |
| "grad_norm": 1.2661182880401611, |
| "learning_rate": 0.001, |
| "loss": 0.7777, |
| "step": 146600 |
| }, |
| { |
| "epoch": 47.41435035552683, |
| "grad_norm": 1.3260958194732666, |
| "learning_rate": 0.001, |
| "loss": 0.7804, |
| "step": 146700 |
| }, |
| { |
| "epoch": 47.44667097608274, |
| "grad_norm": 1.212944507598877, |
| "learning_rate": 0.001, |
| "loss": 0.7679, |
| "step": 146800 |
| }, |
| { |
| "epoch": 47.47899159663866, |
| "grad_norm": 1.4435935020446777, |
| "learning_rate": 0.001, |
| "loss": 0.765, |
| "step": 146900 |
| }, |
| { |
| "epoch": 47.51131221719457, |
| "grad_norm": 1.4578791856765747, |
| "learning_rate": 0.001, |
| "loss": 0.7912, |
| "step": 147000 |
| }, |
| { |
| "epoch": 47.543632837750486, |
| "grad_norm": 1.462074637413025, |
| "learning_rate": 0.001, |
| "loss": 0.7974, |
| "step": 147100 |
| }, |
| { |
| "epoch": 47.5759534583064, |
| "grad_norm": 1.403319001197815, |
| "learning_rate": 0.001, |
| "loss": 0.7922, |
| "step": 147200 |
| }, |
| { |
| "epoch": 47.608274078862316, |
| "grad_norm": 1.2772393226623535, |
| "learning_rate": 0.001, |
| "loss": 0.8032, |
| "step": 147300 |
| }, |
| { |
| "epoch": 47.64059469941823, |
| "grad_norm": 1.217108964920044, |
| "learning_rate": 0.001, |
| "loss": 0.8016, |
| "step": 147400 |
| }, |
| { |
| "epoch": 47.672915319974145, |
| "grad_norm": 1.2560069561004639, |
| "learning_rate": 0.001, |
| "loss": 0.802, |
| "step": 147500 |
| }, |
| { |
| "epoch": 47.70523594053006, |
| "grad_norm": 1.161948800086975, |
| "learning_rate": 0.001, |
| "loss": 0.7941, |
| "step": 147600 |
| }, |
| { |
| "epoch": 47.737556561085974, |
| "grad_norm": 1.2663328647613525, |
| "learning_rate": 0.001, |
| "loss": 0.8108, |
| "step": 147700 |
| }, |
| { |
| "epoch": 47.76987718164189, |
| "grad_norm": 1.628860592842102, |
| "learning_rate": 0.001, |
| "loss": 0.8044, |
| "step": 147800 |
| }, |
| { |
| "epoch": 47.8021978021978, |
| "grad_norm": 1.2811692953109741, |
| "learning_rate": 0.001, |
| "loss": 0.8118, |
| "step": 147900 |
| }, |
| { |
| "epoch": 47.83451842275372, |
| "grad_norm": 1.2758411169052124, |
| "learning_rate": 0.001, |
| "loss": 0.8147, |
| "step": 148000 |
| }, |
| { |
| "epoch": 47.86683904330963, |
| "grad_norm": 1.3008402585983276, |
| "learning_rate": 0.001, |
| "loss": 0.8128, |
| "step": 148100 |
| }, |
| { |
| "epoch": 47.89915966386555, |
| "grad_norm": 1.4310789108276367, |
| "learning_rate": 0.001, |
| "loss": 0.8126, |
| "step": 148200 |
| }, |
| { |
| "epoch": 47.93148028442146, |
| "grad_norm": 1.4643484354019165, |
| "learning_rate": 0.001, |
| "loss": 0.8037, |
| "step": 148300 |
| }, |
| { |
| "epoch": 47.963800904977376, |
| "grad_norm": 1.2317537069320679, |
| "learning_rate": 0.001, |
| "loss": 0.8193, |
| "step": 148400 |
| }, |
| { |
| "epoch": 47.99612152553329, |
| "grad_norm": 1.2515267133712769, |
| "learning_rate": 0.001, |
| "loss": 0.8045, |
| "step": 148500 |
| }, |
| { |
| "epoch": 48.028442146089205, |
| "grad_norm": 1.475560188293457, |
| "learning_rate": 0.001, |
| "loss": 0.7236, |
| "step": 148600 |
| }, |
| { |
| "epoch": 48.06076276664512, |
| "grad_norm": 1.2725452184677124, |
| "learning_rate": 0.001, |
| "loss": 0.7288, |
| "step": 148700 |
| }, |
| { |
| "epoch": 48.093083387201034, |
| "grad_norm": 1.295411467552185, |
| "learning_rate": 0.001, |
| "loss": 0.7375, |
| "step": 148800 |
| }, |
| { |
| "epoch": 48.12540400775695, |
| "grad_norm": 1.436627984046936, |
| "learning_rate": 0.001, |
| "loss": 0.7454, |
| "step": 148900 |
| }, |
| { |
| "epoch": 48.15772462831286, |
| "grad_norm": 1.3891310691833496, |
| "learning_rate": 0.001, |
| "loss": 0.744, |
| "step": 149000 |
| }, |
| { |
| "epoch": 48.19004524886878, |
| "grad_norm": 1.15779447555542, |
| "learning_rate": 0.001, |
| "loss": 0.7465, |
| "step": 149100 |
| }, |
| { |
| "epoch": 48.22236586942469, |
| "grad_norm": 1.1223961114883423, |
| "learning_rate": 0.001, |
| "loss": 0.7363, |
| "step": 149200 |
| }, |
| { |
| "epoch": 48.25468648998061, |
| "grad_norm": 1.1627105474472046, |
| "learning_rate": 0.001, |
| "loss": 0.7456, |
| "step": 149300 |
| }, |
| { |
| "epoch": 48.28700711053652, |
| "grad_norm": 1.5019142627716064, |
| "learning_rate": 0.001, |
| "loss": 0.7612, |
| "step": 149400 |
| }, |
| { |
| "epoch": 48.319327731092436, |
| "grad_norm": 1.1844847202301025, |
| "learning_rate": 0.001, |
| "loss": 0.7567, |
| "step": 149500 |
| }, |
| { |
| "epoch": 48.35164835164835, |
| "grad_norm": 1.4008020162582397, |
| "learning_rate": 0.001, |
| "loss": 0.7587, |
| "step": 149600 |
| }, |
| { |
| "epoch": 48.383968972204265, |
| "grad_norm": 1.299863338470459, |
| "learning_rate": 0.001, |
| "loss": 0.7637, |
| "step": 149700 |
| }, |
| { |
| "epoch": 48.41628959276018, |
| "grad_norm": 1.1990106105804443, |
| "learning_rate": 0.001, |
| "loss": 0.7528, |
| "step": 149800 |
| }, |
| { |
| "epoch": 48.448610213316094, |
| "grad_norm": 1.5277669429779053, |
| "learning_rate": 0.001, |
| "loss": 0.7751, |
| "step": 149900 |
| }, |
| { |
| "epoch": 48.48093083387201, |
| "grad_norm": 1.361127495765686, |
| "learning_rate": 0.001, |
| "loss": 0.7523, |
| "step": 150000 |
| }, |
| { |
| "epoch": 48.51325145442792, |
| "grad_norm": 1.2485038042068481, |
| "learning_rate": 0.001, |
| "loss": 0.7826, |
| "step": 150100 |
| }, |
| { |
| "epoch": 48.54557207498384, |
| "grad_norm": 1.1917405128479004, |
| "learning_rate": 0.001, |
| "loss": 0.7906, |
| "step": 150200 |
| }, |
| { |
| "epoch": 48.57789269553975, |
| "grad_norm": 1.3229522705078125, |
| "learning_rate": 0.001, |
| "loss": 0.7801, |
| "step": 150300 |
| }, |
| { |
| "epoch": 48.61021331609567, |
| "grad_norm": 1.2076053619384766, |
| "learning_rate": 0.001, |
| "loss": 0.7907, |
| "step": 150400 |
| }, |
| { |
| "epoch": 48.64253393665158, |
| "grad_norm": 1.409424066543579, |
| "learning_rate": 0.001, |
| "loss": 0.7948, |
| "step": 150500 |
| }, |
| { |
| "epoch": 48.674854557207496, |
| "grad_norm": 1.1984336376190186, |
| "learning_rate": 0.001, |
| "loss": 0.7876, |
| "step": 150600 |
| }, |
| { |
| "epoch": 48.70717517776341, |
| "grad_norm": 14.626358985900879, |
| "learning_rate": 0.001, |
| "loss": 0.7982, |
| "step": 150700 |
| }, |
| { |
| "epoch": 48.739495798319325, |
| "grad_norm": 1.6344068050384521, |
| "learning_rate": 0.001, |
| "loss": 0.7986, |
| "step": 150800 |
| }, |
| { |
| "epoch": 48.77181641887524, |
| "grad_norm": 1.367846965789795, |
| "learning_rate": 0.001, |
| "loss": 0.791, |
| "step": 150900 |
| }, |
| { |
| "epoch": 48.804137039431154, |
| "grad_norm": 1.1952319145202637, |
| "learning_rate": 0.001, |
| "loss": 0.7965, |
| "step": 151000 |
| }, |
| { |
| "epoch": 48.83645765998707, |
| "grad_norm": 1.4131128787994385, |
| "learning_rate": 0.001, |
| "loss": 0.8009, |
| "step": 151100 |
| }, |
| { |
| "epoch": 48.86877828054298, |
| "grad_norm": 1.2616978883743286, |
| "learning_rate": 0.001, |
| "loss": 0.8031, |
| "step": 151200 |
| }, |
| { |
| "epoch": 48.9010989010989, |
| "grad_norm": 1.5032106637954712, |
| "learning_rate": 0.001, |
| "loss": 0.8055, |
| "step": 151300 |
| }, |
| { |
| "epoch": 48.93341952165481, |
| "grad_norm": 1.5496946573257446, |
| "learning_rate": 0.001, |
| "loss": 0.801, |
| "step": 151400 |
| }, |
| { |
| "epoch": 48.96574014221073, |
| "grad_norm": 1.0915486812591553, |
| "learning_rate": 0.001, |
| "loss": 0.8008, |
| "step": 151500 |
| }, |
| { |
| "epoch": 48.99806076276664, |
| "grad_norm": 1.169753074645996, |
| "learning_rate": 0.001, |
| "loss": 0.7731, |
| "step": 151600 |
| }, |
| { |
| "epoch": 49.03038138332256, |
| "grad_norm": 1.0841970443725586, |
| "learning_rate": 0.001, |
| "loss": 0.7138, |
| "step": 151700 |
| }, |
| { |
| "epoch": 49.06270200387848, |
| "grad_norm": 1.2232255935668945, |
| "learning_rate": 0.001, |
| "loss": 0.7295, |
| "step": 151800 |
| }, |
| { |
| "epoch": 49.09502262443439, |
| "grad_norm": 1.101244330406189, |
| "learning_rate": 0.001, |
| "loss": 0.7295, |
| "step": 151900 |
| }, |
| { |
| "epoch": 49.12734324499031, |
| "grad_norm": 1.1868951320648193, |
| "learning_rate": 0.001, |
| "loss": 0.7322, |
| "step": 152000 |
| }, |
| { |
| "epoch": 49.15966386554622, |
| "grad_norm": 1.4374502897262573, |
| "learning_rate": 0.001, |
| "loss": 0.7394, |
| "step": 152100 |
| }, |
| { |
| "epoch": 49.191984486102136, |
| "grad_norm": 1.1606899499893188, |
| "learning_rate": 0.001, |
| "loss": 0.7298, |
| "step": 152200 |
| }, |
| { |
| "epoch": 49.22430510665805, |
| "grad_norm": 1.3472959995269775, |
| "learning_rate": 0.001, |
| "loss": 0.7425, |
| "step": 152300 |
| }, |
| { |
| "epoch": 49.256625727213965, |
| "grad_norm": 1.0685003995895386, |
| "learning_rate": 0.001, |
| "loss": 0.7441, |
| "step": 152400 |
| }, |
| { |
| "epoch": 49.28894634776988, |
| "grad_norm": 0.9984011650085449, |
| "learning_rate": 0.001, |
| "loss": 0.7484, |
| "step": 152500 |
| }, |
| { |
| "epoch": 49.321266968325794, |
| "grad_norm": 1.1586939096450806, |
| "learning_rate": 0.001, |
| "loss": 0.7544, |
| "step": 152600 |
| }, |
| { |
| "epoch": 49.35358758888171, |
| "grad_norm": 1.0123095512390137, |
| "learning_rate": 0.001, |
| "loss": 0.771, |
| "step": 152700 |
| }, |
| { |
| "epoch": 49.38590820943762, |
| "grad_norm": 1.0273991823196411, |
| "learning_rate": 0.001, |
| "loss": 0.7457, |
| "step": 152800 |
| }, |
| { |
| "epoch": 49.41822882999354, |
| "grad_norm": 1.0201725959777832, |
| "learning_rate": 0.001, |
| "loss": 0.7645, |
| "step": 152900 |
| }, |
| { |
| "epoch": 49.45054945054945, |
| "grad_norm": 1.0155104398727417, |
| "learning_rate": 0.001, |
| "loss": 0.7649, |
| "step": 153000 |
| }, |
| { |
| "epoch": 49.48287007110537, |
| "grad_norm": 1.6812835931777954, |
| "learning_rate": 0.001, |
| "loss": 0.7636, |
| "step": 153100 |
| }, |
| { |
| "epoch": 49.51519069166128, |
| "grad_norm": 1.271406888961792, |
| "learning_rate": 0.001, |
| "loss": 0.7741, |
| "step": 153200 |
| }, |
| { |
| "epoch": 49.547511312217196, |
| "grad_norm": 1.2045003175735474, |
| "learning_rate": 0.001, |
| "loss": 0.769, |
| "step": 153300 |
| }, |
| { |
| "epoch": 49.57983193277311, |
| "grad_norm": 1.3944015502929688, |
| "learning_rate": 0.001, |
| "loss": 0.7857, |
| "step": 153400 |
| }, |
| { |
| "epoch": 49.612152553329025, |
| "grad_norm": 1.1044825315475464, |
| "learning_rate": 0.001, |
| "loss": 0.7744, |
| "step": 153500 |
| }, |
| { |
| "epoch": 49.64447317388494, |
| "grad_norm": 1.2409112453460693, |
| "learning_rate": 0.001, |
| "loss": 0.7744, |
| "step": 153600 |
| }, |
| { |
| "epoch": 49.676793794440854, |
| "grad_norm": 1.0952699184417725, |
| "learning_rate": 0.001, |
| "loss": 0.7621, |
| "step": 153700 |
| }, |
| { |
| "epoch": 49.70911441499677, |
| "grad_norm": 1.224043607711792, |
| "learning_rate": 0.001, |
| "loss": 0.7822, |
| "step": 153800 |
| }, |
| { |
| "epoch": 49.74143503555268, |
| "grad_norm": 1.085472822189331, |
| "learning_rate": 0.001, |
| "loss": 0.7815, |
| "step": 153900 |
| }, |
| { |
| "epoch": 49.7737556561086, |
| "grad_norm": 1.2062139511108398, |
| "learning_rate": 0.001, |
| "loss": 0.7739, |
| "step": 154000 |
| }, |
| { |
| "epoch": 49.80607627666451, |
| "grad_norm": 1.1115362644195557, |
| "learning_rate": 0.001, |
| "loss": 0.775, |
| "step": 154100 |
| }, |
| { |
| "epoch": 49.83839689722043, |
| "grad_norm": 1.2838987112045288, |
| "learning_rate": 0.001, |
| "loss": 0.7823, |
| "step": 154200 |
| }, |
| { |
| "epoch": 49.87071751777634, |
| "grad_norm": 1.238246202468872, |
| "learning_rate": 0.001, |
| "loss": 0.7866, |
| "step": 154300 |
| }, |
| { |
| "epoch": 49.903038138332256, |
| "grad_norm": 1.235521912574768, |
| "learning_rate": 0.001, |
| "loss": 0.7761, |
| "step": 154400 |
| }, |
| { |
| "epoch": 49.93535875888817, |
| "grad_norm": 1.0825852155685425, |
| "learning_rate": 0.001, |
| "loss": 0.7827, |
| "step": 154500 |
| }, |
| { |
| "epoch": 49.967679379444085, |
| "grad_norm": 1.4009476900100708, |
| "learning_rate": 0.001, |
| "loss": 0.7995, |
| "step": 154600 |
| }, |
| { |
| "epoch": 50.0, |
| "grad_norm": 0.06359585374593735, |
| "learning_rate": 0.001, |
| "loss": 0.7531, |
| "step": 154700 |
| }, |
| { |
| "epoch": 50.032320620555915, |
| "grad_norm": 0.6125797033309937, |
| "learning_rate": 0.001, |
| "loss": 0.6978, |
| "step": 154800 |
| }, |
| { |
| "epoch": 50.06464124111183, |
| "grad_norm": 0.09692316502332687, |
| "learning_rate": 0.001, |
| "loss": 0.7195, |
| "step": 154900 |
| }, |
| { |
| "epoch": 50.096961861667744, |
| "grad_norm": 0.6651096343994141, |
| "learning_rate": 0.001, |
| "loss": 0.7354, |
| "step": 155000 |
| }, |
| { |
| "epoch": 50.12928248222366, |
| "grad_norm": 0.3355295658111572, |
| "learning_rate": 0.001, |
| "loss": 0.7262, |
| "step": 155100 |
| }, |
| { |
| "epoch": 50.16160310277957, |
| "grad_norm": 0.36281466484069824, |
| "learning_rate": 0.001, |
| "loss": 0.7209, |
| "step": 155200 |
| }, |
| { |
| "epoch": 50.19392372333549, |
| "grad_norm": 0.5302878022193909, |
| "learning_rate": 0.001, |
| "loss": 0.7309, |
| "step": 155300 |
| }, |
| { |
| "epoch": 50.2262443438914, |
| "grad_norm": 0.20198680460453033, |
| "learning_rate": 0.001, |
| "loss": 0.7168, |
| "step": 155400 |
| }, |
| { |
| "epoch": 50.25856496444732, |
| "grad_norm": 0.5641872882843018, |
| "learning_rate": 0.001, |
| "loss": 0.7314, |
| "step": 155500 |
| }, |
| { |
| "epoch": 50.29088558500323, |
| "grad_norm": 0.1572069376707077, |
| "learning_rate": 0.001, |
| "loss": 0.7431, |
| "step": 155600 |
| }, |
| { |
| "epoch": 50.323206205559146, |
| "grad_norm": 1.641300082206726, |
| "learning_rate": 0.001, |
| "loss": 0.7391, |
| "step": 155700 |
| }, |
| { |
| "epoch": 50.35552682611506, |
| "grad_norm": 0.5732482671737671, |
| "learning_rate": 0.001, |
| "loss": 0.7487, |
| "step": 155800 |
| }, |
| { |
| "epoch": 50.387847446670975, |
| "grad_norm": 0.4709911644458771, |
| "learning_rate": 0.001, |
| "loss": 0.7426, |
| "step": 155900 |
| }, |
| { |
| "epoch": 50.42016806722689, |
| "grad_norm": 0.3790436387062073, |
| "learning_rate": 0.001, |
| "loss": 0.7581, |
| "step": 156000 |
| }, |
| { |
| "epoch": 50.452488687782804, |
| "grad_norm": 0.8265621662139893, |
| "learning_rate": 0.001, |
| "loss": 0.7376, |
| "step": 156100 |
| }, |
| { |
| "epoch": 50.48480930833872, |
| "grad_norm": 0.5220683813095093, |
| "learning_rate": 0.001, |
| "loss": 0.7474, |
| "step": 156200 |
| }, |
| { |
| "epoch": 50.51712992889463, |
| "grad_norm": 0.22513864934444427, |
| "learning_rate": 0.001, |
| "loss": 0.7492, |
| "step": 156300 |
| }, |
| { |
| "epoch": 50.54945054945055, |
| "grad_norm": 0.5701922178268433, |
| "learning_rate": 0.001, |
| "loss": 0.7517, |
| "step": 156400 |
| }, |
| { |
| "epoch": 50.58177117000646, |
| "grad_norm": 0.5959166288375854, |
| "learning_rate": 0.001, |
| "loss": 0.7521, |
| "step": 156500 |
| }, |
| { |
| "epoch": 50.61409179056238, |
| "grad_norm": 0.5421650409698486, |
| "learning_rate": 0.001, |
| "loss": 0.7633, |
| "step": 156600 |
| }, |
| { |
| "epoch": 50.64641241111829, |
| "grad_norm": 0.604699969291687, |
| "learning_rate": 0.001, |
| "loss": 0.7506, |
| "step": 156700 |
| }, |
| { |
| "epoch": 50.678733031674206, |
| "grad_norm": 0.7056938409805298, |
| "learning_rate": 0.001, |
| "loss": 0.7583, |
| "step": 156800 |
| }, |
| { |
| "epoch": 50.71105365223012, |
| "grad_norm": 0.36048054695129395, |
| "learning_rate": 0.001, |
| "loss": 0.7446, |
| "step": 156900 |
| }, |
| { |
| "epoch": 50.743374272786035, |
| "grad_norm": 1.0495373010635376, |
| "learning_rate": 0.001, |
| "loss": 0.791, |
| "step": 157000 |
| }, |
| { |
| "epoch": 50.77569489334195, |
| "grad_norm": 0.38334110379219055, |
| "learning_rate": 0.001, |
| "loss": 0.7661, |
| "step": 157100 |
| }, |
| { |
| "epoch": 50.808015513897864, |
| "grad_norm": 0.34685152769088745, |
| "learning_rate": 0.001, |
| "loss": 0.7642, |
| "step": 157200 |
| }, |
| { |
| "epoch": 50.84033613445378, |
| "grad_norm": 6.124505043029785, |
| "learning_rate": 0.001, |
| "loss": 0.7831, |
| "step": 157300 |
| }, |
| { |
| "epoch": 50.87265675500969, |
| "grad_norm": 0.3870513439178467, |
| "learning_rate": 0.001, |
| "loss": 0.7879, |
| "step": 157400 |
| }, |
| { |
| "epoch": 50.90497737556561, |
| "grad_norm": 0.3915799558162689, |
| "learning_rate": 0.001, |
| "loss": 0.7751, |
| "step": 157500 |
| }, |
| { |
| "epoch": 50.93729799612152, |
| "grad_norm": 0.4591212868690491, |
| "learning_rate": 0.001, |
| "loss": 0.7986, |
| "step": 157600 |
| }, |
| { |
| "epoch": 50.96961861667744, |
| "grad_norm": 0.3180168867111206, |
| "learning_rate": 0.001, |
| "loss": 0.7874, |
| "step": 157700 |
| }, |
| { |
| "epoch": 51.00193923723336, |
| "grad_norm": 1.6160231828689575, |
| "learning_rate": 0.001, |
| "loss": 0.804, |
| "step": 157800 |
| }, |
| { |
| "epoch": 51.03425985778927, |
| "grad_norm": 1.5989055633544922, |
| "learning_rate": 0.001, |
| "loss": 0.6946, |
| "step": 157900 |
| }, |
| { |
| "epoch": 51.06658047834519, |
| "grad_norm": 1.248809576034546, |
| "learning_rate": 0.001, |
| "loss": 0.6941, |
| "step": 158000 |
| }, |
| { |
| "epoch": 51.0989010989011, |
| "grad_norm": 1.428173303604126, |
| "learning_rate": 0.001, |
| "loss": 0.717, |
| "step": 158100 |
| }, |
| { |
| "epoch": 51.13122171945702, |
| "grad_norm": 1.5812784433364868, |
| "learning_rate": 0.001, |
| "loss": 0.7142, |
| "step": 158200 |
| }, |
| { |
| "epoch": 51.16354234001293, |
| "grad_norm": 1.3164476156234741, |
| "learning_rate": 0.001, |
| "loss": 0.6995, |
| "step": 158300 |
| }, |
| { |
| "epoch": 51.195862960568846, |
| "grad_norm": 1.545201301574707, |
| "learning_rate": 0.001, |
| "loss": 0.7093, |
| "step": 158400 |
| }, |
| { |
| "epoch": 51.22818358112476, |
| "grad_norm": 1.6235681772232056, |
| "learning_rate": 0.001, |
| "loss": 0.728, |
| "step": 158500 |
| }, |
| { |
| "epoch": 51.260504201680675, |
| "grad_norm": 1.6064990758895874, |
| "learning_rate": 0.001, |
| "loss": 0.7269, |
| "step": 158600 |
| }, |
| { |
| "epoch": 51.29282482223659, |
| "grad_norm": 1.4662895202636719, |
| "learning_rate": 0.001, |
| "loss": 0.741, |
| "step": 158700 |
| }, |
| { |
| "epoch": 51.325145442792504, |
| "grad_norm": 1.4283791780471802, |
| "learning_rate": 0.001, |
| "loss": 0.7171, |
| "step": 158800 |
| }, |
| { |
| "epoch": 51.35746606334842, |
| "grad_norm": 1.3249914646148682, |
| "learning_rate": 0.001, |
| "loss": 0.7335, |
| "step": 158900 |
| }, |
| { |
| "epoch": 51.38978668390433, |
| "grad_norm": 1.3508305549621582, |
| "learning_rate": 0.001, |
| "loss": 0.7484, |
| "step": 159000 |
| }, |
| { |
| "epoch": 51.42210730446025, |
| "grad_norm": 1.402336597442627, |
| "learning_rate": 0.001, |
| "loss": 0.7242, |
| "step": 159100 |
| }, |
| { |
| "epoch": 51.45442792501616, |
| "grad_norm": 1.5387367010116577, |
| "learning_rate": 0.001, |
| "loss": 0.7565, |
| "step": 159200 |
| }, |
| { |
| "epoch": 51.48674854557208, |
| "grad_norm": 1.280126690864563, |
| "learning_rate": 0.001, |
| "loss": 0.7443, |
| "step": 159300 |
| }, |
| { |
| "epoch": 51.51906916612799, |
| "grad_norm": 1.4007776975631714, |
| "learning_rate": 0.001, |
| "loss": 0.73, |
| "step": 159400 |
| }, |
| { |
| "epoch": 51.551389786683906, |
| "grad_norm": 1.4511324167251587, |
| "learning_rate": 0.001, |
| "loss": 0.7601, |
| "step": 159500 |
| }, |
| { |
| "epoch": 51.58371040723982, |
| "grad_norm": 1.6074110269546509, |
| "learning_rate": 0.001, |
| "loss": 0.7468, |
| "step": 159600 |
| }, |
| { |
| "epoch": 51.616031027795735, |
| "grad_norm": 1.6182364225387573, |
| "learning_rate": 0.001, |
| "loss": 0.7409, |
| "step": 159700 |
| }, |
| { |
| "epoch": 51.64835164835165, |
| "grad_norm": 1.3085405826568604, |
| "learning_rate": 0.001, |
| "loss": 0.7545, |
| "step": 159800 |
| }, |
| { |
| "epoch": 51.680672268907564, |
| "grad_norm": 1.2141468524932861, |
| "learning_rate": 0.001, |
| "loss": 0.7507, |
| "step": 159900 |
| }, |
| { |
| "epoch": 51.71299288946348, |
| "grad_norm": 1.3837734460830688, |
| "learning_rate": 0.001, |
| "loss": 0.7613, |
| "step": 160000 |
| }, |
| { |
| "epoch": 51.74531351001939, |
| "grad_norm": 1.4727295637130737, |
| "learning_rate": 0.001, |
| "loss": 0.7631, |
| "step": 160100 |
| }, |
| { |
| "epoch": 51.77763413057531, |
| "grad_norm": 1.4606034755706787, |
| "learning_rate": 0.001, |
| "loss": 0.7705, |
| "step": 160200 |
| }, |
| { |
| "epoch": 51.80995475113122, |
| "grad_norm": 1.729146957397461, |
| "learning_rate": 0.001, |
| "loss": 0.7593, |
| "step": 160300 |
| }, |
| { |
| "epoch": 51.84227537168714, |
| "grad_norm": 1.6483460664749146, |
| "learning_rate": 0.001, |
| "loss": 0.7622, |
| "step": 160400 |
| }, |
| { |
| "epoch": 51.87459599224305, |
| "grad_norm": 1.164381980895996, |
| "learning_rate": 0.001, |
| "loss": 0.7678, |
| "step": 160500 |
| }, |
| { |
| "epoch": 51.906916612798966, |
| "grad_norm": 1.6500674486160278, |
| "learning_rate": 0.001, |
| "loss": 0.7659, |
| "step": 160600 |
| }, |
| { |
| "epoch": 51.93923723335488, |
| "grad_norm": 1.311309814453125, |
| "learning_rate": 0.001, |
| "loss": 0.7597, |
| "step": 160700 |
| }, |
| { |
| "epoch": 51.971557853910795, |
| "grad_norm": 1.169773817062378, |
| "learning_rate": 0.001, |
| "loss": 0.7661, |
| "step": 160800 |
| }, |
| { |
| "epoch": 52.00387847446671, |
| "grad_norm": 1.237779140472412, |
| "learning_rate": 0.001, |
| "loss": 0.7649, |
| "step": 160900 |
| }, |
| { |
| "epoch": 52.036199095022624, |
| "grad_norm": 1.669989824295044, |
| "learning_rate": 0.001, |
| "loss": 0.697, |
| "step": 161000 |
| }, |
| { |
| "epoch": 52.06851971557854, |
| "grad_norm": 1.4660130739212036, |
| "learning_rate": 0.001, |
| "loss": 0.7028, |
| "step": 161100 |
| }, |
| { |
| "epoch": 52.10084033613445, |
| "grad_norm": 1.304290533065796, |
| "learning_rate": 0.001, |
| "loss": 0.7115, |
| "step": 161200 |
| }, |
| { |
| "epoch": 52.13316095669037, |
| "grad_norm": 1.3688267469406128, |
| "learning_rate": 0.001, |
| "loss": 0.7015, |
| "step": 161300 |
| }, |
| { |
| "epoch": 52.16548157724628, |
| "grad_norm": 1.2158695459365845, |
| "learning_rate": 0.001, |
| "loss": 0.701, |
| "step": 161400 |
| }, |
| { |
| "epoch": 52.1978021978022, |
| "grad_norm": 1.2809381484985352, |
| "learning_rate": 0.001, |
| "loss": 0.703, |
| "step": 161500 |
| }, |
| { |
| "epoch": 52.23012281835811, |
| "grad_norm": 1.359277367591858, |
| "learning_rate": 0.001, |
| "loss": 0.7179, |
| "step": 161600 |
| }, |
| { |
| "epoch": 52.262443438914026, |
| "grad_norm": 1.4113330841064453, |
| "learning_rate": 0.001, |
| "loss": 0.7142, |
| "step": 161700 |
| }, |
| { |
| "epoch": 52.29476405946994, |
| "grad_norm": 1.4368807077407837, |
| "learning_rate": 0.001, |
| "loss": 0.7208, |
| "step": 161800 |
| }, |
| { |
| "epoch": 52.327084680025855, |
| "grad_norm": 1.1833715438842773, |
| "learning_rate": 0.001, |
| "loss": 0.715, |
| "step": 161900 |
| }, |
| { |
| "epoch": 52.35940530058177, |
| "grad_norm": 1.5551220178604126, |
| "learning_rate": 0.001, |
| "loss": 0.7246, |
| "step": 162000 |
| }, |
| { |
| "epoch": 52.391725921137684, |
| "grad_norm": 1.8916435241699219, |
| "learning_rate": 0.001, |
| "loss": 0.7346, |
| "step": 162100 |
| }, |
| { |
| "epoch": 52.4240465416936, |
| "grad_norm": 1.30422842502594, |
| "learning_rate": 0.001, |
| "loss": 0.7308, |
| "step": 162200 |
| }, |
| { |
| "epoch": 52.456367162249514, |
| "grad_norm": 1.31717050075531, |
| "learning_rate": 0.001, |
| "loss": 0.7259, |
| "step": 162300 |
| }, |
| { |
| "epoch": 52.48868778280543, |
| "grad_norm": 1.4147814512252808, |
| "learning_rate": 0.001, |
| "loss": 0.742, |
| "step": 162400 |
| }, |
| { |
| "epoch": 52.52100840336134, |
| "grad_norm": 1.0690675973892212, |
| "learning_rate": 0.001, |
| "loss": 0.7292, |
| "step": 162500 |
| }, |
| { |
| "epoch": 52.55332902391726, |
| "grad_norm": 1.655393123626709, |
| "learning_rate": 0.001, |
| "loss": 0.7393, |
| "step": 162600 |
| }, |
| { |
| "epoch": 52.58564964447317, |
| "grad_norm": 1.5793589353561401, |
| "learning_rate": 0.001, |
| "loss": 0.7345, |
| "step": 162700 |
| }, |
| { |
| "epoch": 52.617970265029086, |
| "grad_norm": 1.1870840787887573, |
| "learning_rate": 0.001, |
| "loss": 0.7449, |
| "step": 162800 |
| }, |
| { |
| "epoch": 52.650290885585, |
| "grad_norm": 1.2543737888336182, |
| "learning_rate": 0.001, |
| "loss": 0.7454, |
| "step": 162900 |
| }, |
| { |
| "epoch": 52.682611506140915, |
| "grad_norm": 1.7071529626846313, |
| "learning_rate": 0.001, |
| "loss": 0.7386, |
| "step": 163000 |
| }, |
| { |
| "epoch": 52.71493212669683, |
| "grad_norm": 1.2244914770126343, |
| "learning_rate": 0.001, |
| "loss": 0.7432, |
| "step": 163100 |
| }, |
| { |
| "epoch": 52.747252747252745, |
| "grad_norm": 1.2786270380020142, |
| "learning_rate": 0.001, |
| "loss": 0.7358, |
| "step": 163200 |
| }, |
| { |
| "epoch": 52.77957336780866, |
| "grad_norm": 1.573927640914917, |
| "learning_rate": 0.001, |
| "loss": 0.7321, |
| "step": 163300 |
| }, |
| { |
| "epoch": 52.811893988364574, |
| "grad_norm": 1.7538331747055054, |
| "learning_rate": 0.001, |
| "loss": 0.7337, |
| "step": 163400 |
| }, |
| { |
| "epoch": 52.84421460892049, |
| "grad_norm": 1.616270661354065, |
| "learning_rate": 0.001, |
| "loss": 0.7482, |
| "step": 163500 |
| }, |
| { |
| "epoch": 52.8765352294764, |
| "grad_norm": 1.253475546836853, |
| "learning_rate": 0.001, |
| "loss": 0.7635, |
| "step": 163600 |
| }, |
| { |
| "epoch": 52.90885585003232, |
| "grad_norm": 1.7537295818328857, |
| "learning_rate": 0.001, |
| "loss": 0.751, |
| "step": 163700 |
| }, |
| { |
| "epoch": 52.94117647058823, |
| "grad_norm": 1.3528518676757812, |
| "learning_rate": 0.001, |
| "loss": 0.7697, |
| "step": 163800 |
| }, |
| { |
| "epoch": 52.97349709114415, |
| "grad_norm": 1.401299238204956, |
| "learning_rate": 0.001, |
| "loss": 0.7577, |
| "step": 163900 |
| }, |
| { |
| "epoch": 53.00581771170007, |
| "grad_norm": 1.3519210815429688, |
| "learning_rate": 0.001, |
| "loss": 0.7499, |
| "step": 164000 |
| }, |
| { |
| "epoch": 53.03813833225598, |
| "grad_norm": 1.4237704277038574, |
| "learning_rate": 0.001, |
| "loss": 0.6907, |
| "step": 164100 |
| }, |
| { |
| "epoch": 53.0704589528119, |
| "grad_norm": 1.6554957628250122, |
| "learning_rate": 0.001, |
| "loss": 0.6931, |
| "step": 164200 |
| }, |
| { |
| "epoch": 53.10277957336781, |
| "grad_norm": 1.322135090827942, |
| "learning_rate": 0.001, |
| "loss": 0.6639, |
| "step": 164300 |
| }, |
| { |
| "epoch": 53.135100193923726, |
| "grad_norm": 1.5357197523117065, |
| "learning_rate": 0.001, |
| "loss": 0.6789, |
| "step": 164400 |
| }, |
| { |
| "epoch": 53.16742081447964, |
| "grad_norm": 1.1936781406402588, |
| "learning_rate": 0.001, |
| "loss": 0.69, |
| "step": 164500 |
| }, |
| { |
| "epoch": 53.199741435035556, |
| "grad_norm": 1.3940258026123047, |
| "learning_rate": 0.001, |
| "loss": 0.6974, |
| "step": 164600 |
| }, |
| { |
| "epoch": 53.23206205559147, |
| "grad_norm": 1.251088261604309, |
| "learning_rate": 0.001, |
| "loss": 0.7037, |
| "step": 164700 |
| }, |
| { |
| "epoch": 53.264382676147385, |
| "grad_norm": 1.4187980890274048, |
| "learning_rate": 0.001, |
| "loss": 0.6951, |
| "step": 164800 |
| }, |
| { |
| "epoch": 53.2967032967033, |
| "grad_norm": 1.410966396331787, |
| "learning_rate": 0.001, |
| "loss": 0.7132, |
| "step": 164900 |
| }, |
| { |
| "epoch": 53.329023917259214, |
| "grad_norm": 1.2439744472503662, |
| "learning_rate": 0.001, |
| "loss": 0.6969, |
| "step": 165000 |
| }, |
| { |
| "epoch": 53.36134453781513, |
| "grad_norm": 1.6391518115997314, |
| "learning_rate": 0.001, |
| "loss": 0.6999, |
| "step": 165100 |
| }, |
| { |
| "epoch": 53.39366515837104, |
| "grad_norm": 1.2765934467315674, |
| "learning_rate": 0.001, |
| "loss": 0.7341, |
| "step": 165200 |
| }, |
| { |
| "epoch": 53.42598577892696, |
| "grad_norm": 1.314164400100708, |
| "learning_rate": 0.001, |
| "loss": 0.7119, |
| "step": 165300 |
| }, |
| { |
| "epoch": 53.45830639948287, |
| "grad_norm": 1.3228839635849, |
| "learning_rate": 0.001, |
| "loss": 0.7288, |
| "step": 165400 |
| }, |
| { |
| "epoch": 53.49062702003879, |
| "grad_norm": 1.2543978691101074, |
| "learning_rate": 0.001, |
| "loss": 0.7222, |
| "step": 165500 |
| }, |
| { |
| "epoch": 53.5229476405947, |
| "grad_norm": 1.3691924810409546, |
| "learning_rate": 0.001, |
| "loss": 0.7289, |
| "step": 165600 |
| }, |
| { |
| "epoch": 53.555268261150616, |
| "grad_norm": 1.47661292552948, |
| "learning_rate": 0.001, |
| "loss": 0.7187, |
| "step": 165700 |
| }, |
| { |
| "epoch": 53.58758888170653, |
| "grad_norm": 1.5954946279525757, |
| "learning_rate": 0.001, |
| "loss": 0.7064, |
| "step": 165800 |
| }, |
| { |
| "epoch": 53.619909502262445, |
| "grad_norm": 1.302594780921936, |
| "learning_rate": 0.001, |
| "loss": 0.7357, |
| "step": 165900 |
| }, |
| { |
| "epoch": 53.65223012281836, |
| "grad_norm": 1.3874233961105347, |
| "learning_rate": 0.001, |
| "loss": 0.7342, |
| "step": 166000 |
| }, |
| { |
| "epoch": 53.684550743374274, |
| "grad_norm": 1.2953951358795166, |
| "learning_rate": 0.001, |
| "loss": 0.7399, |
| "step": 166100 |
| }, |
| { |
| "epoch": 53.71687136393019, |
| "grad_norm": 1.5059915781021118, |
| "learning_rate": 0.001, |
| "loss": 0.7226, |
| "step": 166200 |
| }, |
| { |
| "epoch": 53.7491919844861, |
| "grad_norm": 1.6534998416900635, |
| "learning_rate": 0.001, |
| "loss": 0.7478, |
| "step": 166300 |
| }, |
| { |
| "epoch": 53.78151260504202, |
| "grad_norm": 1.4183323383331299, |
| "learning_rate": 0.001, |
| "loss": 0.744, |
| "step": 166400 |
| }, |
| { |
| "epoch": 53.81383322559793, |
| "grad_norm": 1.4309442043304443, |
| "learning_rate": 0.001, |
| "loss": 0.7421, |
| "step": 166500 |
| }, |
| { |
| "epoch": 53.84615384615385, |
| "grad_norm": 1.4040199518203735, |
| "learning_rate": 0.001, |
| "loss": 0.7511, |
| "step": 166600 |
| }, |
| { |
| "epoch": 53.87847446670976, |
| "grad_norm": 1.5059336423873901, |
| "learning_rate": 0.001, |
| "loss": 0.763, |
| "step": 166700 |
| }, |
| { |
| "epoch": 53.910795087265676, |
| "grad_norm": 1.2663861513137817, |
| "learning_rate": 0.001, |
| "loss": 0.7299, |
| "step": 166800 |
| }, |
| { |
| "epoch": 53.94311570782159, |
| "grad_norm": 1.3244277238845825, |
| "learning_rate": 0.001, |
| "loss": 0.7438, |
| "step": 166900 |
| }, |
| { |
| "epoch": 53.975436328377505, |
| "grad_norm": 1.1952475309371948, |
| "learning_rate": 0.001, |
| "loss": 0.7579, |
| "step": 167000 |
| }, |
| { |
| "epoch": 54.00775694893342, |
| "grad_norm": 1.4403356313705444, |
| "learning_rate": 0.001, |
| "loss": 0.7466, |
| "step": 167100 |
| }, |
| { |
| "epoch": 54.040077569489334, |
| "grad_norm": 1.3646568059921265, |
| "learning_rate": 0.001, |
| "loss": 0.665, |
| "step": 167200 |
| }, |
| { |
| "epoch": 54.07239819004525, |
| "grad_norm": 1.3861289024353027, |
| "learning_rate": 0.001, |
| "loss": 0.6708, |
| "step": 167300 |
| }, |
| { |
| "epoch": 54.10471881060116, |
| "grad_norm": 1.2403484582901, |
| "learning_rate": 0.001, |
| "loss": 0.6773, |
| "step": 167400 |
| }, |
| { |
| "epoch": 54.13703943115708, |
| "grad_norm": 1.4412761926651, |
| "learning_rate": 0.001, |
| "loss": 0.6714, |
| "step": 167500 |
| }, |
| { |
| "epoch": 54.16936005171299, |
| "grad_norm": 1.5241587162017822, |
| "learning_rate": 0.001, |
| "loss": 0.6872, |
| "step": 167600 |
| }, |
| { |
| "epoch": 54.20168067226891, |
| "grad_norm": 1.2935175895690918, |
| "learning_rate": 0.001, |
| "loss": 0.6931, |
| "step": 167700 |
| }, |
| { |
| "epoch": 54.23400129282482, |
| "grad_norm": 1.2425872087478638, |
| "learning_rate": 0.001, |
| "loss": 0.6821, |
| "step": 167800 |
| }, |
| { |
| "epoch": 54.266321913380736, |
| "grad_norm": 1.1844414472579956, |
| "learning_rate": 0.001, |
| "loss": 0.6979, |
| "step": 167900 |
| }, |
| { |
| "epoch": 54.29864253393665, |
| "grad_norm": 1.5796948671340942, |
| "learning_rate": 0.001, |
| "loss": 0.7203, |
| "step": 168000 |
| }, |
| { |
| "epoch": 54.330963154492565, |
| "grad_norm": 1.386918306350708, |
| "learning_rate": 0.001, |
| "loss": 0.7002, |
| "step": 168100 |
| }, |
| { |
| "epoch": 54.36328377504848, |
| "grad_norm": 1.1819859743118286, |
| "learning_rate": 0.001, |
| "loss": 0.7158, |
| "step": 168200 |
| }, |
| { |
| "epoch": 54.395604395604394, |
| "grad_norm": 1.2906848192214966, |
| "learning_rate": 0.001, |
| "loss": 0.7115, |
| "step": 168300 |
| }, |
| { |
| "epoch": 54.42792501616031, |
| "grad_norm": 1.1121623516082764, |
| "learning_rate": 0.001, |
| "loss": 0.6974, |
| "step": 168400 |
| }, |
| { |
| "epoch": 54.46024563671622, |
| "grad_norm": 1.2486132383346558, |
| "learning_rate": 0.001, |
| "loss": 0.7067, |
| "step": 168500 |
| }, |
| { |
| "epoch": 54.49256625727214, |
| "grad_norm": 1.1886978149414062, |
| "learning_rate": 0.001, |
| "loss": 0.7072, |
| "step": 168600 |
| }, |
| { |
| "epoch": 54.52488687782805, |
| "grad_norm": 1.0904563665390015, |
| "learning_rate": 0.001, |
| "loss": 0.7235, |
| "step": 168700 |
| }, |
| { |
| "epoch": 54.55720749838397, |
| "grad_norm": 1.4579226970672607, |
| "learning_rate": 0.001, |
| "loss": 0.7086, |
| "step": 168800 |
| }, |
| { |
| "epoch": 54.58952811893988, |
| "grad_norm": 1.0935107469558716, |
| "learning_rate": 0.001, |
| "loss": 0.7086, |
| "step": 168900 |
| }, |
| { |
| "epoch": 54.621848739495796, |
| "grad_norm": 1.4180753231048584, |
| "learning_rate": 0.001, |
| "loss": 0.7231, |
| "step": 169000 |
| }, |
| { |
| "epoch": 54.65416936005171, |
| "grad_norm": 1.4720892906188965, |
| "learning_rate": 0.001, |
| "loss": 0.7279, |
| "step": 169100 |
| }, |
| { |
| "epoch": 54.686489980607625, |
| "grad_norm": 1.350489616394043, |
| "learning_rate": 0.001, |
| "loss": 0.7294, |
| "step": 169200 |
| }, |
| { |
| "epoch": 54.71881060116354, |
| "grad_norm": 1.259083867073059, |
| "learning_rate": 0.001, |
| "loss": 0.7361, |
| "step": 169300 |
| }, |
| { |
| "epoch": 54.751131221719454, |
| "grad_norm": 1.6136354207992554, |
| "learning_rate": 0.001, |
| "loss": 0.7294, |
| "step": 169400 |
| }, |
| { |
| "epoch": 54.78345184227537, |
| "grad_norm": 1.4642295837402344, |
| "learning_rate": 0.001, |
| "loss": 0.7428, |
| "step": 169500 |
| }, |
| { |
| "epoch": 54.81577246283128, |
| "grad_norm": 1.4074591398239136, |
| "learning_rate": 0.001, |
| "loss": 0.7217, |
| "step": 169600 |
| }, |
| { |
| "epoch": 54.8480930833872, |
| "grad_norm": 1.2914643287658691, |
| "learning_rate": 0.001, |
| "loss": 0.7551, |
| "step": 169700 |
| }, |
| { |
| "epoch": 54.88041370394311, |
| "grad_norm": 1.2137229442596436, |
| "learning_rate": 0.001, |
| "loss": 0.7368, |
| "step": 169800 |
| }, |
| { |
| "epoch": 54.91273432449903, |
| "grad_norm": 1.177645206451416, |
| "learning_rate": 0.001, |
| "loss": 0.7293, |
| "step": 169900 |
| }, |
| { |
| "epoch": 54.94505494505494, |
| "grad_norm": 1.1865633726119995, |
| "learning_rate": 0.001, |
| "loss": 0.7241, |
| "step": 170000 |
| }, |
| { |
| "epoch": 54.977375565610856, |
| "grad_norm": 1.1472890377044678, |
| "learning_rate": 0.001, |
| "loss": 0.7466, |
| "step": 170100 |
| }, |
| { |
| "epoch": 55.00969618616678, |
| "grad_norm": 1.3177831172943115, |
| "learning_rate": 0.001, |
| "loss": 0.7035, |
| "step": 170200 |
| }, |
| { |
| "epoch": 55.04201680672269, |
| "grad_norm": 2.511430263519287, |
| "learning_rate": 0.001, |
| "loss": 0.6627, |
| "step": 170300 |
| }, |
| { |
| "epoch": 55.07433742727861, |
| "grad_norm": 1.3882372379302979, |
| "learning_rate": 0.001, |
| "loss": 0.6713, |
| "step": 170400 |
| }, |
| { |
| "epoch": 55.10665804783452, |
| "grad_norm": 1.1807273626327515, |
| "learning_rate": 0.001, |
| "loss": 0.6763, |
| "step": 170500 |
| }, |
| { |
| "epoch": 55.138978668390436, |
| "grad_norm": 1.6859544515609741, |
| "learning_rate": 0.001, |
| "loss": 0.6848, |
| "step": 170600 |
| }, |
| { |
| "epoch": 55.17129928894635, |
| "grad_norm": 1.858846664428711, |
| "learning_rate": 0.001, |
| "loss": 0.6797, |
| "step": 170700 |
| }, |
| { |
| "epoch": 55.203619909502265, |
| "grad_norm": 1.37615168094635, |
| "learning_rate": 0.001, |
| "loss": 0.6874, |
| "step": 170800 |
| }, |
| { |
| "epoch": 55.23594053005818, |
| "grad_norm": 1.270747184753418, |
| "learning_rate": 0.001, |
| "loss": 0.6824, |
| "step": 170900 |
| }, |
| { |
| "epoch": 55.268261150614094, |
| "grad_norm": 1.7557049989700317, |
| "learning_rate": 0.001, |
| "loss": 0.6876, |
| "step": 171000 |
| }, |
| { |
| "epoch": 55.30058177117001, |
| "grad_norm": 1.3425112962722778, |
| "learning_rate": 0.001, |
| "loss": 0.674, |
| "step": 171100 |
| }, |
| { |
| "epoch": 55.33290239172592, |
| "grad_norm": 1.377942681312561, |
| "learning_rate": 0.001, |
| "loss": 0.691, |
| "step": 171200 |
| }, |
| { |
| "epoch": 55.36522301228184, |
| "grad_norm": 1.1815950870513916, |
| "learning_rate": 0.001, |
| "loss": 0.6899, |
| "step": 171300 |
| }, |
| { |
| "epoch": 55.39754363283775, |
| "grad_norm": 1.3967550992965698, |
| "learning_rate": 0.001, |
| "loss": 0.6935, |
| "step": 171400 |
| }, |
| { |
| "epoch": 55.42986425339367, |
| "grad_norm": 1.5351048707962036, |
| "learning_rate": 0.001, |
| "loss": 0.6875, |
| "step": 171500 |
| }, |
| { |
| "epoch": 55.46218487394958, |
| "grad_norm": 1.2867332696914673, |
| "learning_rate": 0.001, |
| "loss": 0.6963, |
| "step": 171600 |
| }, |
| { |
| "epoch": 55.494505494505496, |
| "grad_norm": 1.6633144617080688, |
| "learning_rate": 0.001, |
| "loss": 0.707, |
| "step": 171700 |
| }, |
| { |
| "epoch": 55.52682611506141, |
| "grad_norm": 1.3871257305145264, |
| "learning_rate": 0.001, |
| "loss": 0.7089, |
| "step": 171800 |
| }, |
| { |
| "epoch": 55.559146735617325, |
| "grad_norm": 1.155198097229004, |
| "learning_rate": 0.001, |
| "loss": 0.6981, |
| "step": 171900 |
| }, |
| { |
| "epoch": 55.59146735617324, |
| "grad_norm": 1.0028072595596313, |
| "learning_rate": 0.001, |
| "loss": 0.7082, |
| "step": 172000 |
| }, |
| { |
| "epoch": 55.623787976729155, |
| "grad_norm": 1.5060408115386963, |
| "learning_rate": 0.001, |
| "loss": 0.7144, |
| "step": 172100 |
| }, |
| { |
| "epoch": 55.65610859728507, |
| "grad_norm": 1.0690950155258179, |
| "learning_rate": 0.001, |
| "loss": 0.697, |
| "step": 172200 |
| }, |
| { |
| "epoch": 55.688429217840984, |
| "grad_norm": 1.4234610795974731, |
| "learning_rate": 0.001, |
| "loss": 0.7196, |
| "step": 172300 |
| }, |
| { |
| "epoch": 55.7207498383969, |
| "grad_norm": 1.2625986337661743, |
| "learning_rate": 0.001, |
| "loss": 0.7008, |
| "step": 172400 |
| }, |
| { |
| "epoch": 55.75307045895281, |
| "grad_norm": 1.2988468408584595, |
| "learning_rate": 0.001, |
| "loss": 0.7296, |
| "step": 172500 |
| }, |
| { |
| "epoch": 55.78539107950873, |
| "grad_norm": 1.5457489490509033, |
| "learning_rate": 0.001, |
| "loss": 0.7056, |
| "step": 172600 |
| }, |
| { |
| "epoch": 55.81771170006464, |
| "grad_norm": 1.2999827861785889, |
| "learning_rate": 0.001, |
| "loss": 0.7393, |
| "step": 172700 |
| }, |
| { |
| "epoch": 55.85003232062056, |
| "grad_norm": 1.3677887916564941, |
| "learning_rate": 0.001, |
| "loss": 0.7106, |
| "step": 172800 |
| }, |
| { |
| "epoch": 55.88235294117647, |
| "grad_norm": 1.438559889793396, |
| "learning_rate": 0.001, |
| "loss": 0.7275, |
| "step": 172900 |
| }, |
| { |
| "epoch": 55.914673561732386, |
| "grad_norm": 1.15455162525177, |
| "learning_rate": 0.001, |
| "loss": 0.7265, |
| "step": 173000 |
| }, |
| { |
| "epoch": 55.9469941822883, |
| "grad_norm": 1.3578943014144897, |
| "learning_rate": 0.001, |
| "loss": 0.734, |
| "step": 173100 |
| }, |
| { |
| "epoch": 55.979314802844215, |
| "grad_norm": 1.8947703838348389, |
| "learning_rate": 0.001, |
| "loss": 0.7342, |
| "step": 173200 |
| }, |
| { |
| "epoch": 56.01163542340013, |
| "grad_norm": 1.7013978958129883, |
| "learning_rate": 0.001, |
| "loss": 0.6952, |
| "step": 173300 |
| }, |
| { |
| "epoch": 56.043956043956044, |
| "grad_norm": 1.2173410654067993, |
| "learning_rate": 0.001, |
| "loss": 0.6489, |
| "step": 173400 |
| }, |
| { |
| "epoch": 56.07627666451196, |
| "grad_norm": 1.3756160736083984, |
| "learning_rate": 0.001, |
| "loss": 0.6525, |
| "step": 173500 |
| }, |
| { |
| "epoch": 56.10859728506787, |
| "grad_norm": 1.1060224771499634, |
| "learning_rate": 0.001, |
| "loss": 0.6633, |
| "step": 173600 |
| }, |
| { |
| "epoch": 56.14091790562379, |
| "grad_norm": 1.318628191947937, |
| "learning_rate": 0.001, |
| "loss": 0.6619, |
| "step": 173700 |
| }, |
| { |
| "epoch": 56.1732385261797, |
| "grad_norm": 1.0686438083648682, |
| "learning_rate": 0.001, |
| "loss": 0.6631, |
| "step": 173800 |
| }, |
| { |
| "epoch": 56.20555914673562, |
| "grad_norm": 1.1766424179077148, |
| "learning_rate": 0.001, |
| "loss": 0.6583, |
| "step": 173900 |
| }, |
| { |
| "epoch": 56.23787976729153, |
| "grad_norm": 1.3714375495910645, |
| "learning_rate": 0.001, |
| "loss": 0.7061, |
| "step": 174000 |
| }, |
| { |
| "epoch": 56.270200387847446, |
| "grad_norm": 1.0865048170089722, |
| "learning_rate": 0.001, |
| "loss": 0.6906, |
| "step": 174100 |
| }, |
| { |
| "epoch": 56.30252100840336, |
| "grad_norm": 1.3435384035110474, |
| "learning_rate": 0.001, |
| "loss": 0.6932, |
| "step": 174200 |
| }, |
| { |
| "epoch": 56.334841628959275, |
| "grad_norm": 1.2520074844360352, |
| "learning_rate": 0.001, |
| "loss": 0.6673, |
| "step": 174300 |
| }, |
| { |
| "epoch": 56.36716224951519, |
| "grad_norm": 1.3908581733703613, |
| "learning_rate": 0.001, |
| "loss": 0.6668, |
| "step": 174400 |
| }, |
| { |
| "epoch": 56.399482870071104, |
| "grad_norm": 1.0985134840011597, |
| "learning_rate": 0.001, |
| "loss": 0.6863, |
| "step": 174500 |
| }, |
| { |
| "epoch": 56.43180349062702, |
| "grad_norm": 1.246180772781372, |
| "learning_rate": 0.001, |
| "loss": 0.6887, |
| "step": 174600 |
| }, |
| { |
| "epoch": 56.46412411118293, |
| "grad_norm": 1.1640903949737549, |
| "learning_rate": 0.001, |
| "loss": 0.6972, |
| "step": 174700 |
| }, |
| { |
| "epoch": 56.49644473173885, |
| "grad_norm": 1.4162952899932861, |
| "learning_rate": 0.001, |
| "loss": 0.7061, |
| "step": 174800 |
| }, |
| { |
| "epoch": 56.52876535229476, |
| "grad_norm": 1.4577606916427612, |
| "learning_rate": 0.001, |
| "loss": 0.6915, |
| "step": 174900 |
| }, |
| { |
| "epoch": 56.56108597285068, |
| "grad_norm": 1.6326485872268677, |
| "learning_rate": 0.001, |
| "loss": 0.69, |
| "step": 175000 |
| }, |
| { |
| "epoch": 56.59340659340659, |
| "grad_norm": 1.6330126523971558, |
| "learning_rate": 0.001, |
| "loss": 0.698, |
| "step": 175100 |
| }, |
| { |
| "epoch": 56.625727213962506, |
| "grad_norm": 1.5364458560943604, |
| "learning_rate": 0.001, |
| "loss": 0.6766, |
| "step": 175200 |
| }, |
| { |
| "epoch": 56.65804783451842, |
| "grad_norm": 1.5034723281860352, |
| "learning_rate": 0.001, |
| "loss": 0.702, |
| "step": 175300 |
| }, |
| { |
| "epoch": 56.690368455074335, |
| "grad_norm": 1.1095960140228271, |
| "learning_rate": 0.001, |
| "loss": 0.7051, |
| "step": 175400 |
| }, |
| { |
| "epoch": 56.72268907563025, |
| "grad_norm": 1.692766785621643, |
| "learning_rate": 0.001, |
| "loss": 0.711, |
| "step": 175500 |
| }, |
| { |
| "epoch": 56.755009696186164, |
| "grad_norm": 1.2210887670516968, |
| "learning_rate": 0.001, |
| "loss": 0.6972, |
| "step": 175600 |
| }, |
| { |
| "epoch": 56.78733031674208, |
| "grad_norm": 1.6064265966415405, |
| "learning_rate": 0.001, |
| "loss": 0.7076, |
| "step": 175700 |
| }, |
| { |
| "epoch": 56.81965093729799, |
| "grad_norm": 1.4132839441299438, |
| "learning_rate": 0.001, |
| "loss": 0.7206, |
| "step": 175800 |
| }, |
| { |
| "epoch": 56.85197155785391, |
| "grad_norm": 1.3864809274673462, |
| "learning_rate": 0.001, |
| "loss": 0.7339, |
| "step": 175900 |
| }, |
| { |
| "epoch": 56.88429217840982, |
| "grad_norm": 1.4417805671691895, |
| "learning_rate": 0.001, |
| "loss": 0.7262, |
| "step": 176000 |
| }, |
| { |
| "epoch": 56.91661279896574, |
| "grad_norm": 1.3630573749542236, |
| "learning_rate": 0.001, |
| "loss": 0.7211, |
| "step": 176100 |
| }, |
| { |
| "epoch": 56.94893341952165, |
| "grad_norm": 1.3025026321411133, |
| "learning_rate": 0.001, |
| "loss": 0.7153, |
| "step": 176200 |
| }, |
| { |
| "epoch": 56.981254040077566, |
| "grad_norm": 1.1699614524841309, |
| "learning_rate": 0.001, |
| "loss": 0.7156, |
| "step": 176300 |
| }, |
| { |
| "epoch": 57.01357466063349, |
| "grad_norm": 1.3091782331466675, |
| "learning_rate": 0.001, |
| "loss": 0.6699, |
| "step": 176400 |
| }, |
| { |
| "epoch": 57.0458952811894, |
| "grad_norm": 1.0730043649673462, |
| "learning_rate": 0.001, |
| "loss": 0.6543, |
| "step": 176500 |
| }, |
| { |
| "epoch": 57.07821590174532, |
| "grad_norm": 1.6480448246002197, |
| "learning_rate": 0.001, |
| "loss": 0.6499, |
| "step": 176600 |
| }, |
| { |
| "epoch": 57.11053652230123, |
| "grad_norm": 1.1549612283706665, |
| "learning_rate": 0.001, |
| "loss": 0.6457, |
| "step": 176700 |
| }, |
| { |
| "epoch": 57.142857142857146, |
| "grad_norm": 0.9740864634513855, |
| "learning_rate": 0.001, |
| "loss": 0.6583, |
| "step": 176800 |
| }, |
| { |
| "epoch": 57.17517776341306, |
| "grad_norm": 1.0623377561569214, |
| "learning_rate": 0.001, |
| "loss": 0.6563, |
| "step": 176900 |
| }, |
| { |
| "epoch": 57.207498383968975, |
| "grad_norm": 1.1579992771148682, |
| "learning_rate": 0.001, |
| "loss": 0.6723, |
| "step": 177000 |
| }, |
| { |
| "epoch": 57.23981900452489, |
| "grad_norm": 1.4379750490188599, |
| "learning_rate": 0.001, |
| "loss": 0.6672, |
| "step": 177100 |
| }, |
| { |
| "epoch": 57.272139625080804, |
| "grad_norm": 1.191053032875061, |
| "learning_rate": 0.001, |
| "loss": 0.6594, |
| "step": 177200 |
| }, |
| { |
| "epoch": 57.30446024563672, |
| "grad_norm": 1.3507729768753052, |
| "learning_rate": 0.001, |
| "loss": 0.6797, |
| "step": 177300 |
| }, |
| { |
| "epoch": 57.33678086619263, |
| "grad_norm": 1.240713119506836, |
| "learning_rate": 0.001, |
| "loss": 0.6605, |
| "step": 177400 |
| }, |
| { |
| "epoch": 57.36910148674855, |
| "grad_norm": 1.1125223636627197, |
| "learning_rate": 0.001, |
| "loss": 0.6766, |
| "step": 177500 |
| }, |
| { |
| "epoch": 57.40142210730446, |
| "grad_norm": 1.2132062911987305, |
| "learning_rate": 0.001, |
| "loss": 0.6815, |
| "step": 177600 |
| }, |
| { |
| "epoch": 57.43374272786038, |
| "grad_norm": 1.2277776002883911, |
| "learning_rate": 0.001, |
| "loss": 0.684, |
| "step": 177700 |
| }, |
| { |
| "epoch": 57.46606334841629, |
| "grad_norm": 1.0552458763122559, |
| "learning_rate": 0.001, |
| "loss": 0.6758, |
| "step": 177800 |
| }, |
| { |
| "epoch": 57.498383968972206, |
| "grad_norm": 1.1401628255844116, |
| "learning_rate": 0.001, |
| "loss": 0.685, |
| "step": 177900 |
| }, |
| { |
| "epoch": 57.53070458952812, |
| "grad_norm": 1.225094199180603, |
| "learning_rate": 0.001, |
| "loss": 0.6842, |
| "step": 178000 |
| }, |
| { |
| "epoch": 57.563025210084035, |
| "grad_norm": 1.2406818866729736, |
| "learning_rate": 0.001, |
| "loss": 0.695, |
| "step": 178100 |
| }, |
| { |
| "epoch": 57.59534583063995, |
| "grad_norm": 0.9722842574119568, |
| "learning_rate": 0.001, |
| "loss": 0.6877, |
| "step": 178200 |
| }, |
| { |
| "epoch": 57.627666451195864, |
| "grad_norm": 1.3157597780227661, |
| "learning_rate": 0.001, |
| "loss": 0.7033, |
| "step": 178300 |
| }, |
| { |
| "epoch": 57.65998707175178, |
| "grad_norm": 1.02400803565979, |
| "learning_rate": 0.001, |
| "loss": 0.6946, |
| "step": 178400 |
| }, |
| { |
| "epoch": 57.69230769230769, |
| "grad_norm": 1.0910509824752808, |
| "learning_rate": 0.001, |
| "loss": 0.7019, |
| "step": 178500 |
| }, |
| { |
| "epoch": 57.72462831286361, |
| "grad_norm": 1.3276338577270508, |
| "learning_rate": 0.001, |
| "loss": 0.7062, |
| "step": 178600 |
| }, |
| { |
| "epoch": 57.75694893341952, |
| "grad_norm": 1.2191648483276367, |
| "learning_rate": 0.001, |
| "loss": 0.6972, |
| "step": 178700 |
| }, |
| { |
| "epoch": 57.78926955397544, |
| "grad_norm": 1.43450129032135, |
| "learning_rate": 0.001, |
| "loss": 0.706, |
| "step": 178800 |
| }, |
| { |
| "epoch": 57.82159017453135, |
| "grad_norm": 1.2280073165893555, |
| "learning_rate": 0.001, |
| "loss": 0.6939, |
| "step": 178900 |
| }, |
| { |
| "epoch": 57.853910795087266, |
| "grad_norm": 1.187094807624817, |
| "learning_rate": 0.001, |
| "loss": 0.6895, |
| "step": 179000 |
| }, |
| { |
| "epoch": 57.88623141564318, |
| "grad_norm": 1.1127578020095825, |
| "learning_rate": 0.001, |
| "loss": 0.709, |
| "step": 179100 |
| }, |
| { |
| "epoch": 57.918552036199095, |
| "grad_norm": 1.1391856670379639, |
| "learning_rate": 0.001, |
| "loss": 0.7105, |
| "step": 179200 |
| }, |
| { |
| "epoch": 57.95087265675501, |
| "grad_norm": 1.3863335847854614, |
| "learning_rate": 0.001, |
| "loss": 0.721, |
| "step": 179300 |
| }, |
| { |
| "epoch": 57.983193277310924, |
| "grad_norm": 0.9819924235343933, |
| "learning_rate": 0.001, |
| "loss": 0.7063, |
| "step": 179400 |
| }, |
| { |
| "epoch": 58.01551389786684, |
| "grad_norm": 0.818321168422699, |
| "learning_rate": 0.001, |
| "loss": 0.6418, |
| "step": 179500 |
| }, |
| { |
| "epoch": 58.04783451842275, |
| "grad_norm": 0.7625473737716675, |
| "learning_rate": 0.001, |
| "loss": 0.6364, |
| "step": 179600 |
| }, |
| { |
| "epoch": 58.08015513897867, |
| "grad_norm": 0.5951278209686279, |
| "learning_rate": 0.001, |
| "loss": 0.6252, |
| "step": 179700 |
| }, |
| { |
| "epoch": 58.11247575953458, |
| "grad_norm": 0.5998912453651428, |
| "learning_rate": 0.001, |
| "loss": 0.6378, |
| "step": 179800 |
| }, |
| { |
| "epoch": 58.1447963800905, |
| "grad_norm": 1.1031502485275269, |
| "learning_rate": 0.001, |
| "loss": 0.6487, |
| "step": 179900 |
| }, |
| { |
| "epoch": 58.17711700064641, |
| "grad_norm": 0.686004102230072, |
| "learning_rate": 0.001, |
| "loss": 0.6581, |
| "step": 180000 |
| }, |
| { |
| "epoch": 58.209437621202326, |
| "grad_norm": 0.9831950068473816, |
| "learning_rate": 0.001, |
| "loss": 0.6453, |
| "step": 180100 |
| }, |
| { |
| "epoch": 58.24175824175824, |
| "grad_norm": 0.9751262664794922, |
| "learning_rate": 0.001, |
| "loss": 0.6528, |
| "step": 180200 |
| }, |
| { |
| "epoch": 58.274078862314155, |
| "grad_norm": 0.9873791337013245, |
| "learning_rate": 0.001, |
| "loss": 0.6597, |
| "step": 180300 |
| }, |
| { |
| "epoch": 58.30639948287007, |
| "grad_norm": 0.6696386933326721, |
| "learning_rate": 0.001, |
| "loss": 0.6569, |
| "step": 180400 |
| }, |
| { |
| "epoch": 58.338720103425985, |
| "grad_norm": 0.8646842837333679, |
| "learning_rate": 0.001, |
| "loss": 0.6658, |
| "step": 180500 |
| }, |
| { |
| "epoch": 58.3710407239819, |
| "grad_norm": 0.4439694881439209, |
| "learning_rate": 0.001, |
| "loss": 0.6511, |
| "step": 180600 |
| }, |
| { |
| "epoch": 58.403361344537814, |
| "grad_norm": 0.6436874270439148, |
| "learning_rate": 0.001, |
| "loss": 0.6722, |
| "step": 180700 |
| }, |
| { |
| "epoch": 58.43568196509373, |
| "grad_norm": 1.4539772272109985, |
| "learning_rate": 0.001, |
| "loss": 0.6739, |
| "step": 180800 |
| }, |
| { |
| "epoch": 58.46800258564964, |
| "grad_norm": 1.248949408531189, |
| "learning_rate": 0.001, |
| "loss": 0.6787, |
| "step": 180900 |
| }, |
| { |
| "epoch": 58.50032320620556, |
| "grad_norm": 0.9955918788909912, |
| "learning_rate": 0.001, |
| "loss": 0.6826, |
| "step": 181000 |
| }, |
| { |
| "epoch": 58.53264382676147, |
| "grad_norm": 1.4825260639190674, |
| "learning_rate": 0.001, |
| "loss": 0.68, |
| "step": 181100 |
| }, |
| { |
| "epoch": 58.56496444731739, |
| "grad_norm": 1.0058106184005737, |
| "learning_rate": 0.001, |
| "loss": 0.6706, |
| "step": 181200 |
| }, |
| { |
| "epoch": 58.5972850678733, |
| "grad_norm": 1.0586559772491455, |
| "learning_rate": 0.001, |
| "loss": 0.6747, |
| "step": 181300 |
| }, |
| { |
| "epoch": 58.629605688429216, |
| "grad_norm": 0.8397805094718933, |
| "learning_rate": 0.001, |
| "loss": 0.6897, |
| "step": 181400 |
| }, |
| { |
| "epoch": 58.66192630898513, |
| "grad_norm": 0.8589861392974854, |
| "learning_rate": 0.001, |
| "loss": 0.6802, |
| "step": 181500 |
| }, |
| { |
| "epoch": 58.694246929541045, |
| "grad_norm": 0.6333179473876953, |
| "learning_rate": 0.001, |
| "loss": 0.7055, |
| "step": 181600 |
| }, |
| { |
| "epoch": 58.72656755009696, |
| "grad_norm": 0.7798647284507751, |
| "learning_rate": 0.001, |
| "loss": 0.6974, |
| "step": 181700 |
| }, |
| { |
| "epoch": 58.758888170652874, |
| "grad_norm": 1.4674975872039795, |
| "learning_rate": 0.001, |
| "loss": 0.6912, |
| "step": 181800 |
| }, |
| { |
| "epoch": 58.79120879120879, |
| "grad_norm": 0.7736033201217651, |
| "learning_rate": 0.001, |
| "loss": 0.6948, |
| "step": 181900 |
| }, |
| { |
| "epoch": 58.8235294117647, |
| "grad_norm": 0.7464256882667542, |
| "learning_rate": 0.001, |
| "loss": 0.6924, |
| "step": 182000 |
| }, |
| { |
| "epoch": 58.85585003232062, |
| "grad_norm": 0.839506983757019, |
| "learning_rate": 0.001, |
| "loss": 0.6994, |
| "step": 182100 |
| }, |
| { |
| "epoch": 58.88817065287653, |
| "grad_norm": 0.6937113404273987, |
| "learning_rate": 0.001, |
| "loss": 0.6914, |
| "step": 182200 |
| }, |
| { |
| "epoch": 58.92049127343245, |
| "grad_norm": 0.8933976888656616, |
| "learning_rate": 0.001, |
| "loss": 0.715, |
| "step": 182300 |
| }, |
| { |
| "epoch": 58.95281189398836, |
| "grad_norm": 0.8206430673599243, |
| "learning_rate": 0.001, |
| "loss": 0.707, |
| "step": 182400 |
| }, |
| { |
| "epoch": 58.985132514544276, |
| "grad_norm": 0.863739013671875, |
| "learning_rate": 0.001, |
| "loss": 0.7061, |
| "step": 182500 |
| }, |
| { |
| "epoch": 59.0174531351002, |
| "grad_norm": 1.6361074447631836, |
| "learning_rate": 0.001, |
| "loss": 0.6643, |
| "step": 182600 |
| }, |
| { |
| "epoch": 59.04977375565611, |
| "grad_norm": 1.581034779548645, |
| "learning_rate": 0.001, |
| "loss": 0.6275, |
| "step": 182700 |
| }, |
| { |
| "epoch": 59.08209437621203, |
| "grad_norm": 1.680957317352295, |
| "learning_rate": 0.001, |
| "loss": 0.6349, |
| "step": 182800 |
| }, |
| { |
| "epoch": 59.11441499676794, |
| "grad_norm": 1.7037603855133057, |
| "learning_rate": 0.001, |
| "loss": 0.634, |
| "step": 182900 |
| }, |
| { |
| "epoch": 59.146735617323856, |
| "grad_norm": 1.5957320928573608, |
| "learning_rate": 0.001, |
| "loss": 0.6395, |
| "step": 183000 |
| }, |
| { |
| "epoch": 59.17905623787977, |
| "grad_norm": 1.8327863216400146, |
| "learning_rate": 0.001, |
| "loss": 0.6531, |
| "step": 183100 |
| }, |
| { |
| "epoch": 59.211376858435685, |
| "grad_norm": 1.845932960510254, |
| "learning_rate": 0.001, |
| "loss": 0.6402, |
| "step": 183200 |
| }, |
| { |
| "epoch": 59.2436974789916, |
| "grad_norm": 1.7632895708084106, |
| "learning_rate": 0.001, |
| "loss": 0.6557, |
| "step": 183300 |
| }, |
| { |
| "epoch": 59.276018099547514, |
| "grad_norm": 1.6277376413345337, |
| "learning_rate": 0.001, |
| "loss": 0.6491, |
| "step": 183400 |
| }, |
| { |
| "epoch": 59.30833872010343, |
| "grad_norm": 1.7220267057418823, |
| "learning_rate": 0.001, |
| "loss": 0.6503, |
| "step": 183500 |
| }, |
| { |
| "epoch": 59.34065934065934, |
| "grad_norm": 1.6623505353927612, |
| "learning_rate": 0.001, |
| "loss": 0.6621, |
| "step": 183600 |
| }, |
| { |
| "epoch": 59.37297996121526, |
| "grad_norm": 1.4514002799987793, |
| "learning_rate": 0.001, |
| "loss": 0.6541, |
| "step": 183700 |
| }, |
| { |
| "epoch": 59.40530058177117, |
| "grad_norm": 1.77423095703125, |
| "learning_rate": 0.001, |
| "loss": 0.6583, |
| "step": 183800 |
| }, |
| { |
| "epoch": 59.43762120232709, |
| "grad_norm": 1.649661898612976, |
| "learning_rate": 0.001, |
| "loss": 0.6752, |
| "step": 183900 |
| }, |
| { |
| "epoch": 59.469941822883, |
| "grad_norm": 1.8120254278182983, |
| "learning_rate": 0.001, |
| "loss": 0.6613, |
| "step": 184000 |
| }, |
| { |
| "epoch": 59.502262443438916, |
| "grad_norm": 1.68980872631073, |
| "learning_rate": 0.001, |
| "loss": 0.6742, |
| "step": 184100 |
| }, |
| { |
| "epoch": 59.53458306399483, |
| "grad_norm": 1.6394644975662231, |
| "learning_rate": 0.001, |
| "loss": 0.6718, |
| "step": 184200 |
| }, |
| { |
| "epoch": 59.566903684550745, |
| "grad_norm": 1.7204967737197876, |
| "learning_rate": 0.001, |
| "loss": 0.6731, |
| "step": 184300 |
| }, |
| { |
| "epoch": 59.59922430510666, |
| "grad_norm": 1.4989378452301025, |
| "learning_rate": 0.001, |
| "loss": 0.6842, |
| "step": 184400 |
| }, |
| { |
| "epoch": 59.631544925662574, |
| "grad_norm": 1.6579501628875732, |
| "learning_rate": 0.001, |
| "loss": 0.6813, |
| "step": 184500 |
| }, |
| { |
| "epoch": 59.66386554621849, |
| "grad_norm": 1.2888191938400269, |
| "learning_rate": 0.001, |
| "loss": 0.6843, |
| "step": 184600 |
| }, |
| { |
| "epoch": 59.6961861667744, |
| "grad_norm": 1.838328242301941, |
| "learning_rate": 0.001, |
| "loss": 0.6749, |
| "step": 184700 |
| }, |
| { |
| "epoch": 59.72850678733032, |
| "grad_norm": 1.8516156673431396, |
| "learning_rate": 0.001, |
| "loss": 0.6793, |
| "step": 184800 |
| }, |
| { |
| "epoch": 59.76082740788623, |
| "grad_norm": 1.3773984909057617, |
| "learning_rate": 0.001, |
| "loss": 0.6932, |
| "step": 184900 |
| }, |
| { |
| "epoch": 59.79314802844215, |
| "grad_norm": 1.4964762926101685, |
| "learning_rate": 0.001, |
| "loss": 0.6755, |
| "step": 185000 |
| }, |
| { |
| "epoch": 59.82546864899806, |
| "grad_norm": 1.436951994895935, |
| "learning_rate": 0.001, |
| "loss": 0.6887, |
| "step": 185100 |
| }, |
| { |
| "epoch": 59.857789269553976, |
| "grad_norm": 1.999688982963562, |
| "learning_rate": 0.001, |
| "loss": 0.6755, |
| "step": 185200 |
| }, |
| { |
| "epoch": 59.89010989010989, |
| "grad_norm": 2.138962507247925, |
| "learning_rate": 0.001, |
| "loss": 0.6728, |
| "step": 185300 |
| }, |
| { |
| "epoch": 59.922430510665805, |
| "grad_norm": 1.625344157218933, |
| "learning_rate": 0.001, |
| "loss": 0.6871, |
| "step": 185400 |
| }, |
| { |
| "epoch": 59.95475113122172, |
| "grad_norm": 1.556227445602417, |
| "learning_rate": 0.001, |
| "loss": 0.692, |
| "step": 185500 |
| }, |
| { |
| "epoch": 59.987071751777634, |
| "grad_norm": 1.5213489532470703, |
| "learning_rate": 0.001, |
| "loss": 0.6942, |
| "step": 185600 |
| }, |
| { |
| "epoch": 60.01939237233355, |
| "grad_norm": 1.5821621417999268, |
| "learning_rate": 0.001, |
| "loss": 0.6547, |
| "step": 185700 |
| }, |
| { |
| "epoch": 60.05171299288946, |
| "grad_norm": 1.2159498929977417, |
| "learning_rate": 0.001, |
| "loss": 0.6171, |
| "step": 185800 |
| }, |
| { |
| "epoch": 60.08403361344538, |
| "grad_norm": 1.3063063621520996, |
| "learning_rate": 0.001, |
| "loss": 0.6401, |
| "step": 185900 |
| }, |
| { |
| "epoch": 60.11635423400129, |
| "grad_norm": 1.501042127609253, |
| "learning_rate": 0.001, |
| "loss": 0.6274, |
| "step": 186000 |
| }, |
| { |
| "epoch": 60.14867485455721, |
| "grad_norm": 1.4862663745880127, |
| "learning_rate": 0.001, |
| "loss": 0.6215, |
| "step": 186100 |
| }, |
| { |
| "epoch": 60.18099547511312, |
| "grad_norm": 1.3393819332122803, |
| "learning_rate": 0.001, |
| "loss": 0.6366, |
| "step": 186200 |
| }, |
| { |
| "epoch": 60.213316095669036, |
| "grad_norm": 1.5676498413085938, |
| "learning_rate": 0.001, |
| "loss": 0.6364, |
| "step": 186300 |
| }, |
| { |
| "epoch": 60.24563671622495, |
| "grad_norm": 1.6434075832366943, |
| "learning_rate": 0.001, |
| "loss": 0.6319, |
| "step": 186400 |
| }, |
| { |
| "epoch": 60.277957336780865, |
| "grad_norm": 1.2878286838531494, |
| "learning_rate": 0.001, |
| "loss": 0.6592, |
| "step": 186500 |
| }, |
| { |
| "epoch": 60.31027795733678, |
| "grad_norm": 1.3613319396972656, |
| "learning_rate": 0.001, |
| "loss": 0.647, |
| "step": 186600 |
| }, |
| { |
| "epoch": 60.342598577892694, |
| "grad_norm": 1.654203176498413, |
| "learning_rate": 0.001, |
| "loss": 0.6508, |
| "step": 186700 |
| }, |
| { |
| "epoch": 60.37491919844861, |
| "grad_norm": 1.6589421033859253, |
| "learning_rate": 0.001, |
| "loss": 0.6401, |
| "step": 186800 |
| }, |
| { |
| "epoch": 60.40723981900452, |
| "grad_norm": 1.7555818557739258, |
| "learning_rate": 0.001, |
| "loss": 0.6528, |
| "step": 186900 |
| }, |
| { |
| "epoch": 60.43956043956044, |
| "grad_norm": 1.5702903270721436, |
| "learning_rate": 0.001, |
| "loss": 0.6553, |
| "step": 187000 |
| }, |
| { |
| "epoch": 60.47188106011635, |
| "grad_norm": 1.3548332452774048, |
| "learning_rate": 0.001, |
| "loss": 0.6411, |
| "step": 187100 |
| }, |
| { |
| "epoch": 60.50420168067227, |
| "grad_norm": 1.6495980024337769, |
| "learning_rate": 0.001, |
| "loss": 0.6673, |
| "step": 187200 |
| }, |
| { |
| "epoch": 60.53652230122818, |
| "grad_norm": 1.319981336593628, |
| "learning_rate": 0.001, |
| "loss": 0.6493, |
| "step": 187300 |
| }, |
| { |
| "epoch": 60.568842921784096, |
| "grad_norm": 1.2345103025436401, |
| "learning_rate": 0.001, |
| "loss": 0.6848, |
| "step": 187400 |
| }, |
| { |
| "epoch": 60.60116354234001, |
| "grad_norm": 1.2018239498138428, |
| "learning_rate": 0.001, |
| "loss": 0.6668, |
| "step": 187500 |
| }, |
| { |
| "epoch": 60.633484162895925, |
| "grad_norm": 1.442687749862671, |
| "learning_rate": 0.001, |
| "loss": 0.6462, |
| "step": 187600 |
| }, |
| { |
| "epoch": 60.66580478345184, |
| "grad_norm": 1.4780453443527222, |
| "learning_rate": 0.001, |
| "loss": 0.6695, |
| "step": 187700 |
| }, |
| { |
| "epoch": 60.698125404007754, |
| "grad_norm": 1.3161441087722778, |
| "learning_rate": 0.001, |
| "loss": 0.6759, |
| "step": 187800 |
| }, |
| { |
| "epoch": 60.73044602456367, |
| "grad_norm": 1.5663788318634033, |
| "learning_rate": 0.001, |
| "loss": 0.6783, |
| "step": 187900 |
| }, |
| { |
| "epoch": 60.762766645119584, |
| "grad_norm": 1.2903250455856323, |
| "learning_rate": 0.001, |
| "loss": 0.6802, |
| "step": 188000 |
| }, |
| { |
| "epoch": 60.7950872656755, |
| "grad_norm": 1.2310444116592407, |
| "learning_rate": 0.001, |
| "loss": 0.6798, |
| "step": 188100 |
| }, |
| { |
| "epoch": 60.82740788623141, |
| "grad_norm": 1.6267993450164795, |
| "learning_rate": 0.001, |
| "loss": 0.6747, |
| "step": 188200 |
| }, |
| { |
| "epoch": 60.85972850678733, |
| "grad_norm": 1.3514257669448853, |
| "learning_rate": 0.001, |
| "loss": 0.6954, |
| "step": 188300 |
| }, |
| { |
| "epoch": 60.89204912734324, |
| "grad_norm": 1.3700529336929321, |
| "learning_rate": 0.001, |
| "loss": 0.6921, |
| "step": 188400 |
| }, |
| { |
| "epoch": 60.924369747899156, |
| "grad_norm": 1.3592313528060913, |
| "learning_rate": 0.001, |
| "loss": 0.6949, |
| "step": 188500 |
| }, |
| { |
| "epoch": 60.95669036845507, |
| "grad_norm": 1.6864216327667236, |
| "learning_rate": 0.001, |
| "loss": 0.693, |
| "step": 188600 |
| }, |
| { |
| "epoch": 60.98901098901099, |
| "grad_norm": 1.476345181465149, |
| "learning_rate": 0.001, |
| "loss": 0.679, |
| "step": 188700 |
| }, |
| { |
| "epoch": 61.02133160956691, |
| "grad_norm": 1.2383800745010376, |
| "learning_rate": 0.001, |
| "loss": 0.6467, |
| "step": 188800 |
| }, |
| { |
| "epoch": 61.05365223012282, |
| "grad_norm": 1.6404695510864258, |
| "learning_rate": 0.001, |
| "loss": 0.617, |
| "step": 188900 |
| }, |
| { |
| "epoch": 61.085972850678736, |
| "grad_norm": 1.4813312292099, |
| "learning_rate": 0.001, |
| "loss": 0.612, |
| "step": 189000 |
| }, |
| { |
| "epoch": 61.11829347123465, |
| "grad_norm": 1.823527455329895, |
| "learning_rate": 0.001, |
| "loss": 0.6325, |
| "step": 189100 |
| }, |
| { |
| "epoch": 61.150614091790565, |
| "grad_norm": 1.2506108283996582, |
| "learning_rate": 0.001, |
| "loss": 0.6095, |
| "step": 189200 |
| }, |
| { |
| "epoch": 61.18293471234648, |
| "grad_norm": 1.376138687133789, |
| "learning_rate": 0.001, |
| "loss": 0.6224, |
| "step": 189300 |
| }, |
| { |
| "epoch": 61.215255332902395, |
| "grad_norm": 1.1488109827041626, |
| "learning_rate": 0.001, |
| "loss": 0.6249, |
| "step": 189400 |
| }, |
| { |
| "epoch": 61.24757595345831, |
| "grad_norm": 1.6473678350448608, |
| "learning_rate": 0.001, |
| "loss": 0.6276, |
| "step": 189500 |
| }, |
| { |
| "epoch": 61.279896574014224, |
| "grad_norm": 1.2410812377929688, |
| "learning_rate": 0.001, |
| "loss": 0.6226, |
| "step": 189600 |
| }, |
| { |
| "epoch": 61.31221719457014, |
| "grad_norm": 10.854134559631348, |
| "learning_rate": 0.001, |
| "loss": 0.652, |
| "step": 189700 |
| }, |
| { |
| "epoch": 61.34453781512605, |
| "grad_norm": 2.2806782722473145, |
| "learning_rate": 0.001, |
| "loss": 0.6421, |
| "step": 189800 |
| }, |
| { |
| "epoch": 61.37685843568197, |
| "grad_norm": 1.4156832695007324, |
| "learning_rate": 0.001, |
| "loss": 0.6338, |
| "step": 189900 |
| }, |
| { |
| "epoch": 61.40917905623788, |
| "grad_norm": 1.337144374847412, |
| "learning_rate": 0.001, |
| "loss": 0.65, |
| "step": 190000 |
| }, |
| { |
| "epoch": 61.441499676793796, |
| "grad_norm": 1.3378078937530518, |
| "learning_rate": 0.001, |
| "loss": 0.6391, |
| "step": 190100 |
| }, |
| { |
| "epoch": 61.47382029734971, |
| "grad_norm": 1.5442296266555786, |
| "learning_rate": 0.001, |
| "loss": 0.6588, |
| "step": 190200 |
| }, |
| { |
| "epoch": 61.506140917905626, |
| "grad_norm": 1.5696725845336914, |
| "learning_rate": 0.001, |
| "loss": 0.6621, |
| "step": 190300 |
| }, |
| { |
| "epoch": 61.53846153846154, |
| "grad_norm": 1.3986225128173828, |
| "learning_rate": 0.001, |
| "loss": 0.6463, |
| "step": 190400 |
| }, |
| { |
| "epoch": 61.570782159017455, |
| "grad_norm": 1.4197642803192139, |
| "learning_rate": 0.001, |
| "loss": 0.6643, |
| "step": 190500 |
| }, |
| { |
| "epoch": 61.60310277957337, |
| "grad_norm": 1.4512290954589844, |
| "learning_rate": 0.001, |
| "loss": 0.6533, |
| "step": 190600 |
| }, |
| { |
| "epoch": 61.635423400129284, |
| "grad_norm": 1.717221975326538, |
| "learning_rate": 0.001, |
| "loss": 0.6638, |
| "step": 190700 |
| }, |
| { |
| "epoch": 61.6677440206852, |
| "grad_norm": 1.4332921504974365, |
| "learning_rate": 0.001, |
| "loss": 0.6626, |
| "step": 190800 |
| }, |
| { |
| "epoch": 61.70006464124111, |
| "grad_norm": 1.781604290008545, |
| "learning_rate": 0.001, |
| "loss": 0.6637, |
| "step": 190900 |
| }, |
| { |
| "epoch": 61.73238526179703, |
| "grad_norm": 1.6162261962890625, |
| "learning_rate": 0.001, |
| "loss": 0.6542, |
| "step": 191000 |
| }, |
| { |
| "epoch": 61.76470588235294, |
| "grad_norm": 1.3693976402282715, |
| "learning_rate": 0.001, |
| "loss": 0.6713, |
| "step": 191100 |
| }, |
| { |
| "epoch": 61.79702650290886, |
| "grad_norm": 1.339870810508728, |
| "learning_rate": 0.001, |
| "loss": 0.667, |
| "step": 191200 |
| }, |
| { |
| "epoch": 61.82934712346477, |
| "grad_norm": 1.6282362937927246, |
| "learning_rate": 0.001, |
| "loss": 0.677, |
| "step": 191300 |
| }, |
| { |
| "epoch": 61.861667744020686, |
| "grad_norm": 1.3749769926071167, |
| "learning_rate": 0.001, |
| "loss": 0.6788, |
| "step": 191400 |
| }, |
| { |
| "epoch": 61.8939883645766, |
| "grad_norm": 1.6163561344146729, |
| "learning_rate": 0.001, |
| "loss": 0.6856, |
| "step": 191500 |
| }, |
| { |
| "epoch": 61.926308985132515, |
| "grad_norm": 1.7528187036514282, |
| "learning_rate": 0.001, |
| "loss": 0.6811, |
| "step": 191600 |
| }, |
| { |
| "epoch": 61.95862960568843, |
| "grad_norm": 1.3337574005126953, |
| "learning_rate": 0.001, |
| "loss": 0.6813, |
| "step": 191700 |
| }, |
| { |
| "epoch": 61.990950226244344, |
| "grad_norm": 1.4761725664138794, |
| "learning_rate": 0.001, |
| "loss": 0.688, |
| "step": 191800 |
| }, |
| { |
| "epoch": 62.02327084680026, |
| "grad_norm": 1.1570292711257935, |
| "learning_rate": 0.001, |
| "loss": 0.634, |
| "step": 191900 |
| }, |
| { |
| "epoch": 62.05559146735617, |
| "grad_norm": 1.2674442529678345, |
| "learning_rate": 0.001, |
| "loss": 0.6063, |
| "step": 192000 |
| }, |
| { |
| "epoch": 62.08791208791209, |
| "grad_norm": 1.3792743682861328, |
| "learning_rate": 0.001, |
| "loss": 0.6035, |
| "step": 192100 |
| }, |
| { |
| "epoch": 62.120232708468, |
| "grad_norm": 1.5314244031906128, |
| "learning_rate": 0.001, |
| "loss": 0.6136, |
| "step": 192200 |
| }, |
| { |
| "epoch": 62.15255332902392, |
| "grad_norm": 1.6034588813781738, |
| "learning_rate": 0.001, |
| "loss": 0.6097, |
| "step": 192300 |
| }, |
| { |
| "epoch": 62.18487394957983, |
| "grad_norm": 1.53525972366333, |
| "learning_rate": 0.001, |
| "loss": 0.6319, |
| "step": 192400 |
| }, |
| { |
| "epoch": 62.217194570135746, |
| "grad_norm": 1.1694210767745972, |
| "learning_rate": 0.001, |
| "loss": 0.6177, |
| "step": 192500 |
| }, |
| { |
| "epoch": 62.24951519069166, |
| "grad_norm": 1.1147392988204956, |
| "learning_rate": 0.001, |
| "loss": 0.6296, |
| "step": 192600 |
| }, |
| { |
| "epoch": 62.281835811247575, |
| "grad_norm": 2.554081916809082, |
| "learning_rate": 0.001, |
| "loss": 0.6303, |
| "step": 192700 |
| }, |
| { |
| "epoch": 62.31415643180349, |
| "grad_norm": 1.8635549545288086, |
| "learning_rate": 0.001, |
| "loss": 0.6389, |
| "step": 192800 |
| }, |
| { |
| "epoch": 62.346477052359404, |
| "grad_norm": 1.2993345260620117, |
| "learning_rate": 0.001, |
| "loss": 0.6266, |
| "step": 192900 |
| }, |
| { |
| "epoch": 62.37879767291532, |
| "grad_norm": 1.4036643505096436, |
| "learning_rate": 0.001, |
| "loss": 0.6352, |
| "step": 193000 |
| }, |
| { |
| "epoch": 62.41111829347123, |
| "grad_norm": 1.7309150695800781, |
| "learning_rate": 0.001, |
| "loss": 0.6386, |
| "step": 193100 |
| }, |
| { |
| "epoch": 62.44343891402715, |
| "grad_norm": 1.4345393180847168, |
| "learning_rate": 0.001, |
| "loss": 0.6329, |
| "step": 193200 |
| }, |
| { |
| "epoch": 62.47575953458306, |
| "grad_norm": 1.1931675672531128, |
| "learning_rate": 0.001, |
| "loss": 0.6399, |
| "step": 193300 |
| }, |
| { |
| "epoch": 62.50808015513898, |
| "grad_norm": 1.2835602760314941, |
| "learning_rate": 0.001, |
| "loss": 0.636, |
| "step": 193400 |
| }, |
| { |
| "epoch": 62.54040077569489, |
| "grad_norm": 1.2109442949295044, |
| "learning_rate": 0.001, |
| "loss": 0.6593, |
| "step": 193500 |
| }, |
| { |
| "epoch": 62.572721396250806, |
| "grad_norm": 1.3101767301559448, |
| "learning_rate": 0.001, |
| "loss": 0.6553, |
| "step": 193600 |
| }, |
| { |
| "epoch": 62.60504201680672, |
| "grad_norm": 1.3763705492019653, |
| "learning_rate": 0.001, |
| "loss": 0.6493, |
| "step": 193700 |
| }, |
| { |
| "epoch": 62.637362637362635, |
| "grad_norm": 1.2936313152313232, |
| "learning_rate": 0.001, |
| "loss": 0.6668, |
| "step": 193800 |
| }, |
| { |
| "epoch": 62.66968325791855, |
| "grad_norm": 1.3182551860809326, |
| "learning_rate": 0.001, |
| "loss": 0.6578, |
| "step": 193900 |
| }, |
| { |
| "epoch": 62.702003878474464, |
| "grad_norm": 1.171347737312317, |
| "learning_rate": 0.001, |
| "loss": 0.6648, |
| "step": 194000 |
| }, |
| { |
| "epoch": 62.73432449903038, |
| "grad_norm": 1.108046293258667, |
| "learning_rate": 0.001, |
| "loss": 0.6601, |
| "step": 194100 |
| }, |
| { |
| "epoch": 62.76664511958629, |
| "grad_norm": 1.2180066108703613, |
| "learning_rate": 0.001, |
| "loss": 0.662, |
| "step": 194200 |
| }, |
| { |
| "epoch": 62.79896574014221, |
| "grad_norm": 1.196394443511963, |
| "learning_rate": 0.001, |
| "loss": 0.6833, |
| "step": 194300 |
| }, |
| { |
| "epoch": 62.83128636069812, |
| "grad_norm": 1.340371012687683, |
| "learning_rate": 0.001, |
| "loss": 0.6584, |
| "step": 194400 |
| }, |
| { |
| "epoch": 62.86360698125404, |
| "grad_norm": 1.432305932044983, |
| "learning_rate": 0.001, |
| "loss": 0.6577, |
| "step": 194500 |
| }, |
| { |
| "epoch": 62.89592760180995, |
| "grad_norm": 1.041042447090149, |
| "learning_rate": 0.001, |
| "loss": 0.6583, |
| "step": 194600 |
| }, |
| { |
| "epoch": 62.928248222365866, |
| "grad_norm": 1.2910453081130981, |
| "learning_rate": 0.001, |
| "loss": 0.6918, |
| "step": 194700 |
| }, |
| { |
| "epoch": 62.96056884292178, |
| "grad_norm": 1.4799001216888428, |
| "learning_rate": 0.001, |
| "loss": 0.6755, |
| "step": 194800 |
| }, |
| { |
| "epoch": 62.992889463477695, |
| "grad_norm": 1.722066879272461, |
| "learning_rate": 0.001, |
| "loss": 0.6661, |
| "step": 194900 |
| }, |
| { |
| "epoch": 63.02521008403362, |
| "grad_norm": 1.3788399696350098, |
| "learning_rate": 0.001, |
| "loss": 0.6136, |
| "step": 195000 |
| }, |
| { |
| "epoch": 63.05753070458953, |
| "grad_norm": 1.6480493545532227, |
| "learning_rate": 0.001, |
| "loss": 0.608, |
| "step": 195100 |
| }, |
| { |
| "epoch": 63.089851325145446, |
| "grad_norm": 1.5709948539733887, |
| "learning_rate": 0.001, |
| "loss": 0.6036, |
| "step": 195200 |
| }, |
| { |
| "epoch": 63.12217194570136, |
| "grad_norm": 1.4029523134231567, |
| "learning_rate": 0.001, |
| "loss": 0.6171, |
| "step": 195300 |
| }, |
| { |
| "epoch": 63.154492566257275, |
| "grad_norm": 1.2338638305664062, |
| "learning_rate": 0.001, |
| "loss": 0.6006, |
| "step": 195400 |
| }, |
| { |
| "epoch": 63.18681318681319, |
| "grad_norm": 1.1037108898162842, |
| "learning_rate": 0.001, |
| "loss": 0.6153, |
| "step": 195500 |
| }, |
| { |
| "epoch": 63.219133807369104, |
| "grad_norm": 1.4087430238723755, |
| "learning_rate": 0.001, |
| "loss": 0.6155, |
| "step": 195600 |
| }, |
| { |
| "epoch": 63.25145442792502, |
| "grad_norm": 1.2912625074386597, |
| "learning_rate": 0.001, |
| "loss": 0.6161, |
| "step": 195700 |
| }, |
| { |
| "epoch": 63.28377504848093, |
| "grad_norm": 1.2002849578857422, |
| "learning_rate": 0.001, |
| "loss": 0.611, |
| "step": 195800 |
| }, |
| { |
| "epoch": 63.31609566903685, |
| "grad_norm": 1.1420046091079712, |
| "learning_rate": 0.001, |
| "loss": 0.6373, |
| "step": 195900 |
| }, |
| { |
| "epoch": 63.34841628959276, |
| "grad_norm": 1.29826819896698, |
| "learning_rate": 0.001, |
| "loss": 0.6353, |
| "step": 196000 |
| }, |
| { |
| "epoch": 63.38073691014868, |
| "grad_norm": 1.15346360206604, |
| "learning_rate": 0.001, |
| "loss": 0.6186, |
| "step": 196100 |
| }, |
| { |
| "epoch": 63.41305753070459, |
| "grad_norm": 1.4631860256195068, |
| "learning_rate": 0.001, |
| "loss": 0.633, |
| "step": 196200 |
| }, |
| { |
| "epoch": 63.445378151260506, |
| "grad_norm": 1.5273650884628296, |
| "learning_rate": 0.001, |
| "loss": 0.6317, |
| "step": 196300 |
| }, |
| { |
| "epoch": 63.47769877181642, |
| "grad_norm": 1.2980990409851074, |
| "learning_rate": 0.001, |
| "loss": 0.6256, |
| "step": 196400 |
| }, |
| { |
| "epoch": 63.510019392372335, |
| "grad_norm": 4.956090927124023, |
| "learning_rate": 0.001, |
| "loss": 0.6367, |
| "step": 196500 |
| }, |
| { |
| "epoch": 63.54234001292825, |
| "grad_norm": 1.180713176727295, |
| "learning_rate": 0.001, |
| "loss": 0.6315, |
| "step": 196600 |
| }, |
| { |
| "epoch": 63.574660633484164, |
| "grad_norm": 1.9830008745193481, |
| "learning_rate": 0.001, |
| "loss": 0.6323, |
| "step": 196700 |
| }, |
| { |
| "epoch": 63.60698125404008, |
| "grad_norm": 1.653559923171997, |
| "learning_rate": 0.001, |
| "loss": 0.6339, |
| "step": 196800 |
| }, |
| { |
| "epoch": 63.63930187459599, |
| "grad_norm": 1.3870145082473755, |
| "learning_rate": 0.001, |
| "loss": 0.6497, |
| "step": 196900 |
| }, |
| { |
| "epoch": 63.67162249515191, |
| "grad_norm": 1.1964478492736816, |
| "learning_rate": 0.001, |
| "loss": 0.6457, |
| "step": 197000 |
| }, |
| { |
| "epoch": 63.70394311570782, |
| "grad_norm": 1.1363627910614014, |
| "learning_rate": 0.001, |
| "loss": 0.6566, |
| "step": 197100 |
| }, |
| { |
| "epoch": 63.73626373626374, |
| "grad_norm": 1.4076826572418213, |
| "learning_rate": 0.001, |
| "loss": 0.6512, |
| "step": 197200 |
| }, |
| { |
| "epoch": 63.76858435681965, |
| "grad_norm": 1.313567876815796, |
| "learning_rate": 0.001, |
| "loss": 0.6445, |
| "step": 197300 |
| }, |
| { |
| "epoch": 63.800904977375566, |
| "grad_norm": 1.2769737243652344, |
| "learning_rate": 0.001, |
| "loss": 0.651, |
| "step": 197400 |
| }, |
| { |
| "epoch": 63.83322559793148, |
| "grad_norm": 1.3601458072662354, |
| "learning_rate": 0.001, |
| "loss": 0.6534, |
| "step": 197500 |
| }, |
| { |
| "epoch": 63.865546218487395, |
| "grad_norm": 1.2652206420898438, |
| "learning_rate": 0.001, |
| "loss": 0.6746, |
| "step": 197600 |
| }, |
| { |
| "epoch": 63.89786683904331, |
| "grad_norm": 1.2474119663238525, |
| "learning_rate": 0.001, |
| "loss": 0.6498, |
| "step": 197700 |
| }, |
| { |
| "epoch": 63.930187459599225, |
| "grad_norm": 1.1776601076126099, |
| "learning_rate": 0.001, |
| "loss": 0.6568, |
| "step": 197800 |
| }, |
| { |
| "epoch": 63.96250808015514, |
| "grad_norm": 1.3926881551742554, |
| "learning_rate": 0.001, |
| "loss": 0.6834, |
| "step": 197900 |
| }, |
| { |
| "epoch": 63.994828700711054, |
| "grad_norm": 1.4210342168807983, |
| "learning_rate": 0.001, |
| "loss": 0.6644, |
| "step": 198000 |
| }, |
| { |
| "epoch": 64.02714932126698, |
| "grad_norm": 1.1131622791290283, |
| "learning_rate": 0.001, |
| "loss": 0.5911, |
| "step": 198100 |
| }, |
| { |
| "epoch": 64.05946994182288, |
| "grad_norm": 1.1499837636947632, |
| "learning_rate": 0.001, |
| "loss": 0.5899, |
| "step": 198200 |
| }, |
| { |
| "epoch": 64.0917905623788, |
| "grad_norm": 1.542824625968933, |
| "learning_rate": 0.001, |
| "loss": 0.5949, |
| "step": 198300 |
| }, |
| { |
| "epoch": 64.12411118293471, |
| "grad_norm": 0.9843227863311768, |
| "learning_rate": 0.001, |
| "loss": 0.6054, |
| "step": 198400 |
| }, |
| { |
| "epoch": 64.15643180349063, |
| "grad_norm": 1.3814574480056763, |
| "learning_rate": 0.001, |
| "loss": 0.6014, |
| "step": 198500 |
| }, |
| { |
| "epoch": 64.18875242404654, |
| "grad_norm": 1.4337519407272339, |
| "learning_rate": 0.001, |
| "loss": 0.6009, |
| "step": 198600 |
| }, |
| { |
| "epoch": 64.22107304460246, |
| "grad_norm": 1.3197928667068481, |
| "learning_rate": 0.001, |
| "loss": 0.6177, |
| "step": 198700 |
| }, |
| { |
| "epoch": 64.25339366515837, |
| "grad_norm": 1.289289116859436, |
| "learning_rate": 0.001, |
| "loss": 0.6107, |
| "step": 198800 |
| }, |
| { |
| "epoch": 64.28571428571429, |
| "grad_norm": 1.8015815019607544, |
| "learning_rate": 0.001, |
| "loss": 0.6158, |
| "step": 198900 |
| }, |
| { |
| "epoch": 64.3180349062702, |
| "grad_norm": 1.9143645763397217, |
| "learning_rate": 0.001, |
| "loss": 0.6172, |
| "step": 199000 |
| }, |
| { |
| "epoch": 64.35035552682612, |
| "grad_norm": 1.310492753982544, |
| "learning_rate": 0.001, |
| "loss": 0.6177, |
| "step": 199100 |
| }, |
| { |
| "epoch": 64.38267614738203, |
| "grad_norm": 1.1836750507354736, |
| "learning_rate": 0.001, |
| "loss": 0.6297, |
| "step": 199200 |
| }, |
| { |
| "epoch": 64.41499676793795, |
| "grad_norm": 1.273499846458435, |
| "learning_rate": 0.001, |
| "loss": 0.6168, |
| "step": 199300 |
| }, |
| { |
| "epoch": 64.44731738849386, |
| "grad_norm": 1.3184856176376343, |
| "learning_rate": 0.001, |
| "loss": 0.6205, |
| "step": 199400 |
| }, |
| { |
| "epoch": 64.47963800904978, |
| "grad_norm": 1.370647668838501, |
| "learning_rate": 0.001, |
| "loss": 0.6336, |
| "step": 199500 |
| }, |
| { |
| "epoch": 64.51195862960569, |
| "grad_norm": 1.4278833866119385, |
| "learning_rate": 0.001, |
| "loss": 0.6315, |
| "step": 199600 |
| }, |
| { |
| "epoch": 64.54427925016161, |
| "grad_norm": 1.2103545665740967, |
| "learning_rate": 0.001, |
| "loss": 0.6377, |
| "step": 199700 |
| }, |
| { |
| "epoch": 64.57659987071752, |
| "grad_norm": 1.6035828590393066, |
| "learning_rate": 0.001, |
| "loss": 0.6336, |
| "step": 199800 |
| }, |
| { |
| "epoch": 64.60892049127344, |
| "grad_norm": 1.029595971107483, |
| "learning_rate": 0.001, |
| "loss": 0.6382, |
| "step": 199900 |
| }, |
| { |
| "epoch": 64.64124111182934, |
| "grad_norm": 1.3785440921783447, |
| "learning_rate": 0.001, |
| "loss": 0.6392, |
| "step": 200000 |
| }, |
| { |
| "epoch": 64.67356173238527, |
| "grad_norm": 1.218138337135315, |
| "learning_rate": 0.001, |
| "loss": 0.6394, |
| "step": 200100 |
| }, |
| { |
| "epoch": 64.70588235294117, |
| "grad_norm": 1.326785922050476, |
| "learning_rate": 0.001, |
| "loss": 0.6552, |
| "step": 200200 |
| }, |
| { |
| "epoch": 64.7382029734971, |
| "grad_norm": 1.4117070436477661, |
| "learning_rate": 0.001, |
| "loss": 0.6476, |
| "step": 200300 |
| }, |
| { |
| "epoch": 64.770523594053, |
| "grad_norm": 1.4691327810287476, |
| "learning_rate": 0.001, |
| "loss": 0.6415, |
| "step": 200400 |
| }, |
| { |
| "epoch": 64.80284421460892, |
| "grad_norm": 1.4258376359939575, |
| "learning_rate": 0.001, |
| "loss": 0.6622, |
| "step": 200500 |
| }, |
| { |
| "epoch": 64.83516483516483, |
| "grad_norm": 1.3137234449386597, |
| "learning_rate": 0.001, |
| "loss": 0.6443, |
| "step": 200600 |
| }, |
| { |
| "epoch": 64.86748545572075, |
| "grad_norm": 1.0861976146697998, |
| "learning_rate": 0.001, |
| "loss": 0.6563, |
| "step": 200700 |
| }, |
| { |
| "epoch": 64.89980607627666, |
| "grad_norm": 1.148963212966919, |
| "learning_rate": 0.001, |
| "loss": 0.6483, |
| "step": 200800 |
| }, |
| { |
| "epoch": 64.93212669683258, |
| "grad_norm": 1.3883782625198364, |
| "learning_rate": 0.001, |
| "loss": 0.648, |
| "step": 200900 |
| }, |
| { |
| "epoch": 64.96444731738849, |
| "grad_norm": 1.6844929456710815, |
| "learning_rate": 0.001, |
| "loss": 0.6661, |
| "step": 201000 |
| }, |
| { |
| "epoch": 64.99676793794441, |
| "grad_norm": 1.6211166381835938, |
| "learning_rate": 0.001, |
| "loss": 0.6372, |
| "step": 201100 |
| }, |
| { |
| "epoch": 65.02908855850032, |
| "grad_norm": 1.3715167045593262, |
| "learning_rate": 0.001, |
| "loss": 0.5833, |
| "step": 201200 |
| }, |
| { |
| "epoch": 65.06140917905624, |
| "grad_norm": 1.0815142393112183, |
| "learning_rate": 0.001, |
| "loss": 0.5804, |
| "step": 201300 |
| }, |
| { |
| "epoch": 65.09372979961215, |
| "grad_norm": 1.040615439414978, |
| "learning_rate": 0.001, |
| "loss": 0.5859, |
| "step": 201400 |
| }, |
| { |
| "epoch": 65.12605042016807, |
| "grad_norm": 1.36244797706604, |
| "learning_rate": 0.001, |
| "loss": 0.5935, |
| "step": 201500 |
| }, |
| { |
| "epoch": 65.15837104072398, |
| "grad_norm": 1.1301552057266235, |
| "learning_rate": 0.001, |
| "loss": 0.5946, |
| "step": 201600 |
| }, |
| { |
| "epoch": 65.1906916612799, |
| "grad_norm": 1.00641930103302, |
| "learning_rate": 0.001, |
| "loss": 0.5962, |
| "step": 201700 |
| }, |
| { |
| "epoch": 65.2230122818358, |
| "grad_norm": 1.5438146591186523, |
| "learning_rate": 0.001, |
| "loss": 0.6009, |
| "step": 201800 |
| }, |
| { |
| "epoch": 65.25533290239173, |
| "grad_norm": 1.7968852519989014, |
| "learning_rate": 0.001, |
| "loss": 0.61, |
| "step": 201900 |
| }, |
| { |
| "epoch": 65.28765352294764, |
| "grad_norm": 1.2997229099273682, |
| "learning_rate": 0.001, |
| "loss": 0.6055, |
| "step": 202000 |
| }, |
| { |
| "epoch": 65.31997414350356, |
| "grad_norm": 1.2967760562896729, |
| "learning_rate": 0.001, |
| "loss": 0.5919, |
| "step": 202100 |
| }, |
| { |
| "epoch": 65.35229476405947, |
| "grad_norm": 1.2664779424667358, |
| "learning_rate": 0.001, |
| "loss": 0.6116, |
| "step": 202200 |
| }, |
| { |
| "epoch": 65.38461538461539, |
| "grad_norm": 1.5638916492462158, |
| "learning_rate": 0.001, |
| "loss": 0.6073, |
| "step": 202300 |
| }, |
| { |
| "epoch": 65.4169360051713, |
| "grad_norm": 1.400978922843933, |
| "learning_rate": 0.001, |
| "loss": 0.6127, |
| "step": 202400 |
| }, |
| { |
| "epoch": 65.44925662572722, |
| "grad_norm": 1.990863561630249, |
| "learning_rate": 0.001, |
| "loss": 0.6285, |
| "step": 202500 |
| }, |
| { |
| "epoch": 65.48157724628312, |
| "grad_norm": 1.062779188156128, |
| "learning_rate": 0.001, |
| "loss": 0.6176, |
| "step": 202600 |
| }, |
| { |
| "epoch": 65.51389786683905, |
| "grad_norm": 1.3369722366333008, |
| "learning_rate": 0.001, |
| "loss": 0.6216, |
| "step": 202700 |
| }, |
| { |
| "epoch": 65.54621848739495, |
| "grad_norm": 1.756347417831421, |
| "learning_rate": 0.001, |
| "loss": 0.6291, |
| "step": 202800 |
| }, |
| { |
| "epoch": 65.57853910795087, |
| "grad_norm": 1.065873384475708, |
| "learning_rate": 0.001, |
| "loss": 0.6181, |
| "step": 202900 |
| }, |
| { |
| "epoch": 65.61085972850678, |
| "grad_norm": 1.2089930772781372, |
| "learning_rate": 0.001, |
| "loss": 0.6189, |
| "step": 203000 |
| }, |
| { |
| "epoch": 65.6431803490627, |
| "grad_norm": 1.3150242567062378, |
| "learning_rate": 0.001, |
| "loss": 0.6453, |
| "step": 203100 |
| }, |
| { |
| "epoch": 65.67550096961861, |
| "grad_norm": 1.3111896514892578, |
| "learning_rate": 0.001, |
| "loss": 0.6317, |
| "step": 203200 |
| }, |
| { |
| "epoch": 65.70782159017453, |
| "grad_norm": 1.1152726411819458, |
| "learning_rate": 0.001, |
| "loss": 0.6405, |
| "step": 203300 |
| }, |
| { |
| "epoch": 65.74014221073044, |
| "grad_norm": 1.3868318796157837, |
| "learning_rate": 0.001, |
| "loss": 0.6381, |
| "step": 203400 |
| }, |
| { |
| "epoch": 65.77246283128636, |
| "grad_norm": 1.0794872045516968, |
| "learning_rate": 0.001, |
| "loss": 0.6367, |
| "step": 203500 |
| }, |
| { |
| "epoch": 65.80478345184227, |
| "grad_norm": 0.9957221746444702, |
| "learning_rate": 0.001, |
| "loss": 0.6376, |
| "step": 203600 |
| }, |
| { |
| "epoch": 65.83710407239819, |
| "grad_norm": 1.7803281545639038, |
| "learning_rate": 0.001, |
| "loss": 0.6441, |
| "step": 203700 |
| }, |
| { |
| "epoch": 65.8694246929541, |
| "grad_norm": 1.3243619203567505, |
| "learning_rate": 0.001, |
| "loss": 0.6486, |
| "step": 203800 |
| }, |
| { |
| "epoch": 65.90174531351002, |
| "grad_norm": 1.8025758266448975, |
| "learning_rate": 0.001, |
| "loss": 0.6266, |
| "step": 203900 |
| }, |
| { |
| "epoch": 65.93406593406593, |
| "grad_norm": 1.5865435600280762, |
| "learning_rate": 0.001, |
| "loss": 0.6534, |
| "step": 204000 |
| }, |
| { |
| "epoch": 65.96638655462185, |
| "grad_norm": 1.2580561637878418, |
| "learning_rate": 0.001, |
| "loss": 0.6571, |
| "step": 204100 |
| }, |
| { |
| "epoch": 65.99870717517777, |
| "grad_norm": 1.0204856395721436, |
| "learning_rate": 0.001, |
| "loss": 0.6378, |
| "step": 204200 |
| }, |
| { |
| "epoch": 66.03102779573368, |
| "grad_norm": 0.7897757887840271, |
| "learning_rate": 0.001, |
| "loss": 0.5688, |
| "step": 204300 |
| }, |
| { |
| "epoch": 66.0633484162896, |
| "grad_norm": 0.9957269430160522, |
| "learning_rate": 0.001, |
| "loss": 0.5762, |
| "step": 204400 |
| }, |
| { |
| "epoch": 66.0956690368455, |
| "grad_norm": 0.931441605091095, |
| "learning_rate": 0.001, |
| "loss": 0.5709, |
| "step": 204500 |
| }, |
| { |
| "epoch": 66.12798965740143, |
| "grad_norm": 1.0603188276290894, |
| "learning_rate": 0.001, |
| "loss": 0.6035, |
| "step": 204600 |
| }, |
| { |
| "epoch": 66.16031027795734, |
| "grad_norm": 0.9763708114624023, |
| "learning_rate": 0.001, |
| "loss": 0.5772, |
| "step": 204700 |
| }, |
| { |
| "epoch": 66.19263089851326, |
| "grad_norm": 0.8747771382331848, |
| "learning_rate": 0.001, |
| "loss": 0.5889, |
| "step": 204800 |
| }, |
| { |
| "epoch": 66.22495151906917, |
| "grad_norm": 0.968600869178772, |
| "learning_rate": 0.001, |
| "loss": 0.6063, |
| "step": 204900 |
| }, |
| { |
| "epoch": 66.25727213962509, |
| "grad_norm": 0.8659926652908325, |
| "learning_rate": 0.001, |
| "loss": 0.5979, |
| "step": 205000 |
| }, |
| { |
| "epoch": 66.289592760181, |
| "grad_norm": 0.9334861040115356, |
| "learning_rate": 0.001, |
| "loss": 0.5813, |
| "step": 205100 |
| }, |
| { |
| "epoch": 66.32191338073692, |
| "grad_norm": 1.1221340894699097, |
| "learning_rate": 0.001, |
| "loss": 0.6035, |
| "step": 205200 |
| }, |
| { |
| "epoch": 66.35423400129282, |
| "grad_norm": 1.2959271669387817, |
| "learning_rate": 0.001, |
| "loss": 0.5931, |
| "step": 205300 |
| }, |
| { |
| "epoch": 66.38655462184875, |
| "grad_norm": 0.9807857275009155, |
| "learning_rate": 0.001, |
| "loss": 0.6045, |
| "step": 205400 |
| }, |
| { |
| "epoch": 66.41887524240465, |
| "grad_norm": 1.0263525247573853, |
| "learning_rate": 0.001, |
| "loss": 0.604, |
| "step": 205500 |
| }, |
| { |
| "epoch": 66.45119586296057, |
| "grad_norm": 0.9026124477386475, |
| "learning_rate": 0.001, |
| "loss": 0.6266, |
| "step": 205600 |
| }, |
| { |
| "epoch": 66.48351648351648, |
| "grad_norm": 1.1074974536895752, |
| "learning_rate": 0.001, |
| "loss": 0.6292, |
| "step": 205700 |
| }, |
| { |
| "epoch": 66.5158371040724, |
| "grad_norm": 0.889342725276947, |
| "learning_rate": 0.001, |
| "loss": 0.6134, |
| "step": 205800 |
| }, |
| { |
| "epoch": 66.54815772462831, |
| "grad_norm": 1.0654977560043335, |
| "learning_rate": 0.001, |
| "loss": 0.6055, |
| "step": 205900 |
| }, |
| { |
| "epoch": 66.58047834518423, |
| "grad_norm": 1.0482105016708374, |
| "learning_rate": 0.001, |
| "loss": 0.6176, |
| "step": 206000 |
| }, |
| { |
| "epoch": 66.61279896574014, |
| "grad_norm": 1.1681993007659912, |
| "learning_rate": 0.001, |
| "loss": 0.6305, |
| "step": 206100 |
| }, |
| { |
| "epoch": 66.64511958629606, |
| "grad_norm": 1.0052601099014282, |
| "learning_rate": 0.001, |
| "loss": 0.6273, |
| "step": 206200 |
| }, |
| { |
| "epoch": 66.67744020685197, |
| "grad_norm": 0.967351496219635, |
| "learning_rate": 0.001, |
| "loss": 0.6226, |
| "step": 206300 |
| }, |
| { |
| "epoch": 66.70976082740789, |
| "grad_norm": 0.9480918049812317, |
| "learning_rate": 0.001, |
| "loss": 0.6232, |
| "step": 206400 |
| }, |
| { |
| "epoch": 66.7420814479638, |
| "grad_norm": 1.2429511547088623, |
| "learning_rate": 0.001, |
| "loss": 0.6479, |
| "step": 206500 |
| }, |
| { |
| "epoch": 66.77440206851972, |
| "grad_norm": 0.8635256290435791, |
| "learning_rate": 0.001, |
| "loss": 0.6254, |
| "step": 206600 |
| }, |
| { |
| "epoch": 66.80672268907563, |
| "grad_norm": 1.0228506326675415, |
| "learning_rate": 0.001, |
| "loss": 0.6335, |
| "step": 206700 |
| }, |
| { |
| "epoch": 66.83904330963155, |
| "grad_norm": 1.1268036365509033, |
| "learning_rate": 0.001, |
| "loss": 0.6306, |
| "step": 206800 |
| }, |
| { |
| "epoch": 66.87136393018746, |
| "grad_norm": 0.8553975224494934, |
| "learning_rate": 0.001, |
| "loss": 0.6328, |
| "step": 206900 |
| }, |
| { |
| "epoch": 66.90368455074338, |
| "grad_norm": 1.1963768005371094, |
| "learning_rate": 0.001, |
| "loss": 0.6224, |
| "step": 207000 |
| }, |
| { |
| "epoch": 66.93600517129929, |
| "grad_norm": 1.0734686851501465, |
| "learning_rate": 0.001, |
| "loss": 0.6444, |
| "step": 207100 |
| }, |
| { |
| "epoch": 66.96832579185521, |
| "grad_norm": 1.1173734664916992, |
| "learning_rate": 0.001, |
| "loss": 0.6419, |
| "step": 207200 |
| }, |
| { |
| "epoch": 67.00064641241111, |
| "grad_norm": 2.1012940406799316, |
| "learning_rate": 0.001, |
| "loss": 0.6206, |
| "step": 207300 |
| }, |
| { |
| "epoch": 67.03296703296704, |
| "grad_norm": 1.714339017868042, |
| "learning_rate": 0.001, |
| "loss": 0.5702, |
| "step": 207400 |
| }, |
| { |
| "epoch": 67.06528765352294, |
| "grad_norm": 1.949273705482483, |
| "learning_rate": 0.001, |
| "loss": 0.5714, |
| "step": 207500 |
| }, |
| { |
| "epoch": 67.09760827407887, |
| "grad_norm": 1.5728001594543457, |
| "learning_rate": 0.001, |
| "loss": 0.5665, |
| "step": 207600 |
| }, |
| { |
| "epoch": 67.12992889463477, |
| "grad_norm": 1.9394041299819946, |
| "learning_rate": 0.001, |
| "loss": 0.5756, |
| "step": 207700 |
| }, |
| { |
| "epoch": 67.1622495151907, |
| "grad_norm": 2.8248255252838135, |
| "learning_rate": 0.001, |
| "loss": 0.5865, |
| "step": 207800 |
| }, |
| { |
| "epoch": 67.1945701357466, |
| "grad_norm": 2.1231889724731445, |
| "learning_rate": 0.001, |
| "loss": 0.5867, |
| "step": 207900 |
| }, |
| { |
| "epoch": 67.22689075630252, |
| "grad_norm": 1.68788480758667, |
| "learning_rate": 0.001, |
| "loss": 0.5956, |
| "step": 208000 |
| }, |
| { |
| "epoch": 67.25921137685843, |
| "grad_norm": 2.0351736545562744, |
| "learning_rate": 0.001, |
| "loss": 0.5791, |
| "step": 208100 |
| }, |
| { |
| "epoch": 67.29153199741435, |
| "grad_norm": 39.95174789428711, |
| "learning_rate": 0.001, |
| "loss": 0.5842, |
| "step": 208200 |
| }, |
| { |
| "epoch": 67.32385261797026, |
| "grad_norm": 1.5130423307418823, |
| "learning_rate": 0.001, |
| "loss": 0.6005, |
| "step": 208300 |
| }, |
| { |
| "epoch": 67.35617323852618, |
| "grad_norm": 1.4413126707077026, |
| "learning_rate": 0.001, |
| "loss": 0.6138, |
| "step": 208400 |
| }, |
| { |
| "epoch": 67.38849385908209, |
| "grad_norm": 1.9939687252044678, |
| "learning_rate": 0.001, |
| "loss": 0.5946, |
| "step": 208500 |
| }, |
| { |
| "epoch": 67.42081447963801, |
| "grad_norm": 2.0666182041168213, |
| "learning_rate": 0.001, |
| "loss": 0.5939, |
| "step": 208600 |
| }, |
| { |
| "epoch": 67.45313510019392, |
| "grad_norm": 1.77826726436615, |
| "learning_rate": 0.001, |
| "loss": 0.6037, |
| "step": 208700 |
| }, |
| { |
| "epoch": 67.48545572074984, |
| "grad_norm": 1.637294888496399, |
| "learning_rate": 0.001, |
| "loss": 0.5942, |
| "step": 208800 |
| }, |
| { |
| "epoch": 67.51777634130575, |
| "grad_norm": 1.2931591272354126, |
| "learning_rate": 0.001, |
| "loss": 0.6037, |
| "step": 208900 |
| }, |
| { |
| "epoch": 67.55009696186167, |
| "grad_norm": 1.5712636709213257, |
| "learning_rate": 0.001, |
| "loss": 0.604, |
| "step": 209000 |
| }, |
| { |
| "epoch": 67.58241758241758, |
| "grad_norm": 2.2092108726501465, |
| "learning_rate": 0.001, |
| "loss": 0.6223, |
| "step": 209100 |
| }, |
| { |
| "epoch": 67.6147382029735, |
| "grad_norm": 1.896464228630066, |
| "learning_rate": 0.001, |
| "loss": 0.6131, |
| "step": 209200 |
| }, |
| { |
| "epoch": 67.6470588235294, |
| "grad_norm": 1.501253366470337, |
| "learning_rate": 0.001, |
| "loss": 0.6169, |
| "step": 209300 |
| }, |
| { |
| "epoch": 67.67937944408533, |
| "grad_norm": 2.152764081954956, |
| "learning_rate": 0.001, |
| "loss": 0.6189, |
| "step": 209400 |
| }, |
| { |
| "epoch": 67.71170006464124, |
| "grad_norm": 1.7892820835113525, |
| "learning_rate": 0.001, |
| "loss": 0.626, |
| "step": 209500 |
| }, |
| { |
| "epoch": 67.74402068519716, |
| "grad_norm": 1.704662799835205, |
| "learning_rate": 0.001, |
| "loss": 0.6233, |
| "step": 209600 |
| }, |
| { |
| "epoch": 67.77634130575306, |
| "grad_norm": 1.6158897876739502, |
| "learning_rate": 0.001, |
| "loss": 0.6373, |
| "step": 209700 |
| }, |
| { |
| "epoch": 67.80866192630899, |
| "grad_norm": 2.0007736682891846, |
| "learning_rate": 0.001, |
| "loss": 0.6404, |
| "step": 209800 |
| }, |
| { |
| "epoch": 67.8409825468649, |
| "grad_norm": 2.2105071544647217, |
| "learning_rate": 0.001, |
| "loss": 0.6201, |
| "step": 209900 |
| }, |
| { |
| "epoch": 67.87330316742081, |
| "grad_norm": 2.1892433166503906, |
| "learning_rate": 0.001, |
| "loss": 0.6174, |
| "step": 210000 |
| }, |
| { |
| "epoch": 67.90562378797672, |
| "grad_norm": 2.033268451690674, |
| "learning_rate": 0.001, |
| "loss": 0.6263, |
| "step": 210100 |
| }, |
| { |
| "epoch": 67.93794440853264, |
| "grad_norm": 1.7615630626678467, |
| "learning_rate": 0.001, |
| "loss": 0.6335, |
| "step": 210200 |
| }, |
| { |
| "epoch": 67.97026502908855, |
| "grad_norm": 2.064373254776001, |
| "learning_rate": 0.001, |
| "loss": 0.6224, |
| "step": 210300 |
| }, |
| { |
| "epoch": 68.00258564964447, |
| "grad_norm": 1.2215957641601562, |
| "learning_rate": 0.001, |
| "loss": 0.6624, |
| "step": 210400 |
| }, |
| { |
| "epoch": 68.0349062702004, |
| "grad_norm": 1.3218648433685303, |
| "learning_rate": 0.001, |
| "loss": 0.5683, |
| "step": 210500 |
| }, |
| { |
| "epoch": 68.0672268907563, |
| "grad_norm": 1.6484256982803345, |
| "learning_rate": 0.001, |
| "loss": 0.5646, |
| "step": 210600 |
| }, |
| { |
| "epoch": 68.09954751131222, |
| "grad_norm": 2.0356903076171875, |
| "learning_rate": 0.001, |
| "loss": 0.5728, |
| "step": 210700 |
| }, |
| { |
| "epoch": 68.13186813186813, |
| "grad_norm": 1.6391444206237793, |
| "learning_rate": 0.001, |
| "loss": 0.5682, |
| "step": 210800 |
| }, |
| { |
| "epoch": 68.16418875242405, |
| "grad_norm": 1.3358436822891235, |
| "learning_rate": 0.001, |
| "loss": 0.572, |
| "step": 210900 |
| }, |
| { |
| "epoch": 68.19650937297996, |
| "grad_norm": 1.5110633373260498, |
| "learning_rate": 0.001, |
| "loss": 0.5841, |
| "step": 211000 |
| }, |
| { |
| "epoch": 68.22882999353588, |
| "grad_norm": 1.7556782960891724, |
| "learning_rate": 0.001, |
| "loss": 0.579, |
| "step": 211100 |
| }, |
| { |
| "epoch": 68.26115061409179, |
| "grad_norm": 1.377893090248108, |
| "learning_rate": 0.001, |
| "loss": 0.5891, |
| "step": 211200 |
| }, |
| { |
| "epoch": 68.29347123464771, |
| "grad_norm": 1.45765221118927, |
| "learning_rate": 0.001, |
| "loss": 0.5824, |
| "step": 211300 |
| }, |
| { |
| "epoch": 68.32579185520362, |
| "grad_norm": 1.6839174032211304, |
| "learning_rate": 0.001, |
| "loss": 0.5854, |
| "step": 211400 |
| }, |
| { |
| "epoch": 68.35811247575954, |
| "grad_norm": 1.3309268951416016, |
| "learning_rate": 0.001, |
| "loss": 0.585, |
| "step": 211500 |
| }, |
| { |
| "epoch": 68.39043309631545, |
| "grad_norm": 1.6310690641403198, |
| "learning_rate": 0.001, |
| "loss": 0.592, |
| "step": 211600 |
| }, |
| { |
| "epoch": 68.42275371687137, |
| "grad_norm": 1.7203037738800049, |
| "learning_rate": 0.001, |
| "loss": 0.5917, |
| "step": 211700 |
| }, |
| { |
| "epoch": 68.45507433742728, |
| "grad_norm": 1.5274381637573242, |
| "learning_rate": 0.001, |
| "loss": 0.6048, |
| "step": 211800 |
| }, |
| { |
| "epoch": 68.4873949579832, |
| "grad_norm": 1.3097580671310425, |
| "learning_rate": 0.001, |
| "loss": 0.5978, |
| "step": 211900 |
| }, |
| { |
| "epoch": 68.5197155785391, |
| "grad_norm": 1.3245912790298462, |
| "learning_rate": 0.001, |
| "loss": 0.6101, |
| "step": 212000 |
| }, |
| { |
| "epoch": 68.55203619909503, |
| "grad_norm": 1.7545794248580933, |
| "learning_rate": 0.001, |
| "loss": 0.6098, |
| "step": 212100 |
| }, |
| { |
| "epoch": 68.58435681965094, |
| "grad_norm": 1.5376828908920288, |
| "learning_rate": 0.001, |
| "loss": 0.6023, |
| "step": 212200 |
| }, |
| { |
| "epoch": 68.61667744020686, |
| "grad_norm": 1.4824035167694092, |
| "learning_rate": 0.001, |
| "loss": 0.6075, |
| "step": 212300 |
| }, |
| { |
| "epoch": 68.64899806076276, |
| "grad_norm": 1.7261457443237305, |
| "learning_rate": 0.001, |
| "loss": 0.62, |
| "step": 212400 |
| }, |
| { |
| "epoch": 68.68131868131869, |
| "grad_norm": 1.4751150608062744, |
| "learning_rate": 0.001, |
| "loss": 0.595, |
| "step": 212500 |
| }, |
| { |
| "epoch": 68.7136393018746, |
| "grad_norm": 1.6435458660125732, |
| "learning_rate": 0.001, |
| "loss": 0.6185, |
| "step": 212600 |
| }, |
| { |
| "epoch": 68.74595992243052, |
| "grad_norm": 1.823643445968628, |
| "learning_rate": 0.001, |
| "loss": 0.6149, |
| "step": 212700 |
| }, |
| { |
| "epoch": 68.77828054298642, |
| "grad_norm": 1.5763131380081177, |
| "learning_rate": 0.001, |
| "loss": 0.6125, |
| "step": 212800 |
| }, |
| { |
| "epoch": 68.81060116354234, |
| "grad_norm": 2.119630813598633, |
| "learning_rate": 0.001, |
| "loss": 0.6197, |
| "step": 212900 |
| }, |
| { |
| "epoch": 68.84292178409825, |
| "grad_norm": 1.5855299234390259, |
| "learning_rate": 0.001, |
| "loss": 0.6174, |
| "step": 213000 |
| }, |
| { |
| "epoch": 68.87524240465417, |
| "grad_norm": 1.9166241884231567, |
| "learning_rate": 0.001, |
| "loss": 0.6214, |
| "step": 213100 |
| }, |
| { |
| "epoch": 68.90756302521008, |
| "grad_norm": 1.4405461549758911, |
| "learning_rate": 0.001, |
| "loss": 0.6125, |
| "step": 213200 |
| }, |
| { |
| "epoch": 68.939883645766, |
| "grad_norm": 1.537550449371338, |
| "learning_rate": 0.001, |
| "loss": 0.6363, |
| "step": 213300 |
| }, |
| { |
| "epoch": 68.97220426632191, |
| "grad_norm": 1.497914433479309, |
| "learning_rate": 0.001, |
| "loss": 0.6071, |
| "step": 213400 |
| }, |
| { |
| "epoch": 69.00452488687783, |
| "grad_norm": 1.598833441734314, |
| "learning_rate": 0.001, |
| "loss": 0.6439, |
| "step": 213500 |
| }, |
| { |
| "epoch": 69.03684550743374, |
| "grad_norm": 1.656296968460083, |
| "learning_rate": 0.001, |
| "loss": 0.5466, |
| "step": 213600 |
| }, |
| { |
| "epoch": 69.06916612798966, |
| "grad_norm": 1.80128812789917, |
| "learning_rate": 0.001, |
| "loss": 0.5598, |
| "step": 213700 |
| }, |
| { |
| "epoch": 69.10148674854557, |
| "grad_norm": 1.3732154369354248, |
| "learning_rate": 0.001, |
| "loss": 0.5704, |
| "step": 213800 |
| }, |
| { |
| "epoch": 69.13380736910149, |
| "grad_norm": 1.4523626565933228, |
| "learning_rate": 0.001, |
| "loss": 0.5613, |
| "step": 213900 |
| }, |
| { |
| "epoch": 69.1661279896574, |
| "grad_norm": 1.4673432111740112, |
| "learning_rate": 0.001, |
| "loss": 0.592, |
| "step": 214000 |
| }, |
| { |
| "epoch": 69.19844861021332, |
| "grad_norm": 1.9327133893966675, |
| "learning_rate": 0.001, |
| "loss": 0.5726, |
| "step": 214100 |
| }, |
| { |
| "epoch": 69.23076923076923, |
| "grad_norm": 1.9804271459579468, |
| "learning_rate": 0.001, |
| "loss": 0.563, |
| "step": 214200 |
| }, |
| { |
| "epoch": 69.26308985132515, |
| "grad_norm": 1.4549953937530518, |
| "learning_rate": 0.001, |
| "loss": 0.5724, |
| "step": 214300 |
| }, |
| { |
| "epoch": 69.29541047188106, |
| "grad_norm": 1.4232009649276733, |
| "learning_rate": 0.001, |
| "loss": 0.5779, |
| "step": 214400 |
| }, |
| { |
| "epoch": 69.32773109243698, |
| "grad_norm": 1.3493443727493286, |
| "learning_rate": 0.001, |
| "loss": 0.5708, |
| "step": 214500 |
| }, |
| { |
| "epoch": 69.36005171299288, |
| "grad_norm": 1.3888554573059082, |
| "learning_rate": 0.001, |
| "loss": 0.5761, |
| "step": 214600 |
| }, |
| { |
| "epoch": 69.3923723335488, |
| "grad_norm": 1.9216724634170532, |
| "learning_rate": 0.001, |
| "loss": 0.5856, |
| "step": 214700 |
| }, |
| { |
| "epoch": 69.42469295410471, |
| "grad_norm": 1.405328631401062, |
| "learning_rate": 0.001, |
| "loss": 0.5776, |
| "step": 214800 |
| }, |
| { |
| "epoch": 69.45701357466064, |
| "grad_norm": 6.138822078704834, |
| "learning_rate": 0.001, |
| "loss": 0.5921, |
| "step": 214900 |
| }, |
| { |
| "epoch": 69.48933419521654, |
| "grad_norm": 1.3978585004806519, |
| "learning_rate": 0.001, |
| "loss": 0.6057, |
| "step": 215000 |
| }, |
| { |
| "epoch": 69.52165481577246, |
| "grad_norm": 1.506648302078247, |
| "learning_rate": 0.001, |
| "loss": 0.6013, |
| "step": 215100 |
| }, |
| { |
| "epoch": 69.55397543632837, |
| "grad_norm": 1.3363564014434814, |
| "learning_rate": 0.001, |
| "loss": 0.6018, |
| "step": 215200 |
| }, |
| { |
| "epoch": 69.5862960568843, |
| "grad_norm": 1.5143280029296875, |
| "learning_rate": 0.001, |
| "loss": 0.6043, |
| "step": 215300 |
| }, |
| { |
| "epoch": 69.6186166774402, |
| "grad_norm": 1.5944569110870361, |
| "learning_rate": 0.001, |
| "loss": 0.6056, |
| "step": 215400 |
| }, |
| { |
| "epoch": 69.65093729799612, |
| "grad_norm": 1.5265306234359741, |
| "learning_rate": 0.001, |
| "loss": 0.6095, |
| "step": 215500 |
| }, |
| { |
| "epoch": 69.68325791855203, |
| "grad_norm": 1.4436867237091064, |
| "learning_rate": 0.001, |
| "loss": 0.6039, |
| "step": 215600 |
| }, |
| { |
| "epoch": 69.71557853910795, |
| "grad_norm": 1.7266173362731934, |
| "learning_rate": 0.001, |
| "loss": 0.6178, |
| "step": 215700 |
| }, |
| { |
| "epoch": 69.74789915966386, |
| "grad_norm": 1.4980125427246094, |
| "learning_rate": 0.001, |
| "loss": 0.6012, |
| "step": 215800 |
| }, |
| { |
| "epoch": 69.78021978021978, |
| "grad_norm": 1.6730942726135254, |
| "learning_rate": 0.001, |
| "loss": 0.5914, |
| "step": 215900 |
| }, |
| { |
| "epoch": 69.81254040077569, |
| "grad_norm": 1.3241289854049683, |
| "learning_rate": 0.001, |
| "loss": 0.6106, |
| "step": 216000 |
| }, |
| { |
| "epoch": 69.84486102133161, |
| "grad_norm": 1.7309767007827759, |
| "learning_rate": 0.001, |
| "loss": 0.6057, |
| "step": 216100 |
| }, |
| { |
| "epoch": 69.87718164188752, |
| "grad_norm": 1.775754451751709, |
| "learning_rate": 0.001, |
| "loss": 0.6051, |
| "step": 216200 |
| }, |
| { |
| "epoch": 69.90950226244344, |
| "grad_norm": 1.5161246061325073, |
| "learning_rate": 0.001, |
| "loss": 0.6182, |
| "step": 216300 |
| }, |
| { |
| "epoch": 69.94182288299935, |
| "grad_norm": 1.2103925943374634, |
| "learning_rate": 0.001, |
| "loss": 0.6103, |
| "step": 216400 |
| }, |
| { |
| "epoch": 69.97414350355527, |
| "grad_norm": 1.3319581747055054, |
| "learning_rate": 0.001, |
| "loss": 0.6308, |
| "step": 216500 |
| }, |
| { |
| "epoch": 70.00646412411119, |
| "grad_norm": 1.5204486846923828, |
| "learning_rate": 0.001, |
| "loss": 0.6271, |
| "step": 216600 |
| }, |
| { |
| "epoch": 70.0387847446671, |
| "grad_norm": 1.5178135633468628, |
| "learning_rate": 0.001, |
| "loss": 0.56, |
| "step": 216700 |
| }, |
| { |
| "epoch": 70.07110536522302, |
| "grad_norm": 2.1006743907928467, |
| "learning_rate": 0.001, |
| "loss": 0.5537, |
| "step": 216800 |
| }, |
| { |
| "epoch": 70.10342598577893, |
| "grad_norm": 1.3505820035934448, |
| "learning_rate": 0.001, |
| "loss": 0.552, |
| "step": 216900 |
| }, |
| { |
| "epoch": 70.13574660633485, |
| "grad_norm": 1.6325558423995972, |
| "learning_rate": 0.001, |
| "loss": 0.5586, |
| "step": 217000 |
| }, |
| { |
| "epoch": 70.16806722689076, |
| "grad_norm": 1.2984336614608765, |
| "learning_rate": 0.001, |
| "loss": 0.575, |
| "step": 217100 |
| }, |
| { |
| "epoch": 70.20038784744668, |
| "grad_norm": 1.3796709775924683, |
| "learning_rate": 0.001, |
| "loss": 0.5591, |
| "step": 217200 |
| }, |
| { |
| "epoch": 70.23270846800258, |
| "grad_norm": 1.330457091331482, |
| "learning_rate": 0.001, |
| "loss": 0.5662, |
| "step": 217300 |
| }, |
| { |
| "epoch": 70.2650290885585, |
| "grad_norm": 1.3719303607940674, |
| "learning_rate": 0.001, |
| "loss": 0.5662, |
| "step": 217400 |
| }, |
| { |
| "epoch": 70.29734970911441, |
| "grad_norm": 1.4298548698425293, |
| "learning_rate": 0.001, |
| "loss": 0.5764, |
| "step": 217500 |
| }, |
| { |
| "epoch": 70.32967032967034, |
| "grad_norm": 1.5424270629882812, |
| "learning_rate": 0.001, |
| "loss": 0.5796, |
| "step": 217600 |
| }, |
| { |
| "epoch": 70.36199095022624, |
| "grad_norm": 2.652244806289673, |
| "learning_rate": 0.001, |
| "loss": 0.5843, |
| "step": 217700 |
| }, |
| { |
| "epoch": 70.39431157078216, |
| "grad_norm": 1.5292531251907349, |
| "learning_rate": 0.001, |
| "loss": 0.5734, |
| "step": 217800 |
| }, |
| { |
| "epoch": 70.42663219133807, |
| "grad_norm": 1.3583437204360962, |
| "learning_rate": 0.001, |
| "loss": 0.5722, |
| "step": 217900 |
| }, |
| { |
| "epoch": 70.458952811894, |
| "grad_norm": 1.53679358959198, |
| "learning_rate": 0.001, |
| "loss": 0.5855, |
| "step": 218000 |
| }, |
| { |
| "epoch": 70.4912734324499, |
| "grad_norm": 1.4344574213027954, |
| "learning_rate": 0.001, |
| "loss": 0.5878, |
| "step": 218100 |
| }, |
| { |
| "epoch": 70.52359405300582, |
| "grad_norm": 1.6726317405700684, |
| "learning_rate": 0.001, |
| "loss": 0.6018, |
| "step": 218200 |
| }, |
| { |
| "epoch": 70.55591467356173, |
| "grad_norm": 1.8626093864440918, |
| "learning_rate": 0.001, |
| "loss": 0.5872, |
| "step": 218300 |
| }, |
| { |
| "epoch": 70.58823529411765, |
| "grad_norm": 1.1871416568756104, |
| "learning_rate": 0.001, |
| "loss": 0.5868, |
| "step": 218400 |
| }, |
| { |
| "epoch": 70.62055591467356, |
| "grad_norm": 1.326026439666748, |
| "learning_rate": 0.001, |
| "loss": 0.5924, |
| "step": 218500 |
| }, |
| { |
| "epoch": 70.65287653522948, |
| "grad_norm": 1.5485585927963257, |
| "learning_rate": 0.001, |
| "loss": 0.5912, |
| "step": 218600 |
| }, |
| { |
| "epoch": 70.68519715578539, |
| "grad_norm": 1.8677207231521606, |
| "learning_rate": 0.001, |
| "loss": 0.5959, |
| "step": 218700 |
| }, |
| { |
| "epoch": 70.71751777634131, |
| "grad_norm": 1.7053265571594238, |
| "learning_rate": 0.001, |
| "loss": 0.5952, |
| "step": 218800 |
| }, |
| { |
| "epoch": 70.74983839689722, |
| "grad_norm": 1.3536996841430664, |
| "learning_rate": 0.001, |
| "loss": 0.5907, |
| "step": 218900 |
| }, |
| { |
| "epoch": 70.78215901745314, |
| "grad_norm": 2.0897066593170166, |
| "learning_rate": 0.001, |
| "loss": 0.6039, |
| "step": 219000 |
| }, |
| { |
| "epoch": 70.81447963800905, |
| "grad_norm": 1.4968990087509155, |
| "learning_rate": 0.001, |
| "loss": 0.5972, |
| "step": 219100 |
| }, |
| { |
| "epoch": 70.84680025856497, |
| "grad_norm": 1.3414818048477173, |
| "learning_rate": 0.001, |
| "loss": 0.6101, |
| "step": 219200 |
| }, |
| { |
| "epoch": 70.87912087912088, |
| "grad_norm": 1.3500386476516724, |
| "learning_rate": 0.001, |
| "loss": 0.6112, |
| "step": 219300 |
| }, |
| { |
| "epoch": 70.9114414996768, |
| "grad_norm": 1.1630859375, |
| "learning_rate": 0.001, |
| "loss": 0.6123, |
| "step": 219400 |
| }, |
| { |
| "epoch": 70.9437621202327, |
| "grad_norm": 1.5307071208953857, |
| "learning_rate": 0.001, |
| "loss": 0.6161, |
| "step": 219500 |
| }, |
| { |
| "epoch": 70.97608274078863, |
| "grad_norm": 1.4781556129455566, |
| "learning_rate": 0.001, |
| "loss": 0.6133, |
| "step": 219600 |
| }, |
| { |
| "epoch": 71.00840336134453, |
| "grad_norm": 1.4278903007507324, |
| "learning_rate": 0.001, |
| "loss": 0.5894, |
| "step": 219700 |
| }, |
| { |
| "epoch": 71.04072398190046, |
| "grad_norm": 1.442977786064148, |
| "learning_rate": 0.001, |
| "loss": 0.534, |
| "step": 219800 |
| }, |
| { |
| "epoch": 71.07304460245636, |
| "grad_norm": 1.3905531167984009, |
| "learning_rate": 0.001, |
| "loss": 0.561, |
| "step": 219900 |
| }, |
| { |
| "epoch": 71.10536522301229, |
| "grad_norm": 1.9299741983413696, |
| "learning_rate": 0.001, |
| "loss": 0.5489, |
| "step": 220000 |
| }, |
| { |
| "epoch": 71.13768584356819, |
| "grad_norm": 1.498746633529663, |
| "learning_rate": 0.001, |
| "loss": 0.5542, |
| "step": 220100 |
| }, |
| { |
| "epoch": 71.17000646412411, |
| "grad_norm": 1.8542300462722778, |
| "learning_rate": 0.001, |
| "loss": 0.5478, |
| "step": 220200 |
| }, |
| { |
| "epoch": 71.20232708468002, |
| "grad_norm": 1.0949512720108032, |
| "learning_rate": 0.001, |
| "loss": 0.5619, |
| "step": 220300 |
| }, |
| { |
| "epoch": 71.23464770523594, |
| "grad_norm": 1.170140027999878, |
| "learning_rate": 0.001, |
| "loss": 0.5533, |
| "step": 220400 |
| }, |
| { |
| "epoch": 71.26696832579185, |
| "grad_norm": 1.5471538305282593, |
| "learning_rate": 0.001, |
| "loss": 0.5618, |
| "step": 220500 |
| }, |
| { |
| "epoch": 71.29928894634777, |
| "grad_norm": 1.3798595666885376, |
| "learning_rate": 0.001, |
| "loss": 0.5587, |
| "step": 220600 |
| }, |
| { |
| "epoch": 71.33160956690368, |
| "grad_norm": 1.5981390476226807, |
| "learning_rate": 0.001, |
| "loss": 0.5715, |
| "step": 220700 |
| }, |
| { |
| "epoch": 71.3639301874596, |
| "grad_norm": 1.3824917078018188, |
| "learning_rate": 0.001, |
| "loss": 0.5809, |
| "step": 220800 |
| }, |
| { |
| "epoch": 71.39625080801551, |
| "grad_norm": 1.6975314617156982, |
| "learning_rate": 0.001, |
| "loss": 0.5744, |
| "step": 220900 |
| }, |
| { |
| "epoch": 71.42857142857143, |
| "grad_norm": 1.794413685798645, |
| "learning_rate": 0.001, |
| "loss": 0.5806, |
| "step": 221000 |
| }, |
| { |
| "epoch": 71.46089204912734, |
| "grad_norm": 1.2590088844299316, |
| "learning_rate": 0.001, |
| "loss": 0.5779, |
| "step": 221100 |
| }, |
| { |
| "epoch": 71.49321266968326, |
| "grad_norm": 1.3916282653808594, |
| "learning_rate": 0.001, |
| "loss": 0.5704, |
| "step": 221200 |
| }, |
| { |
| "epoch": 71.52553329023917, |
| "grad_norm": 1.3690452575683594, |
| "learning_rate": 0.001, |
| "loss": 0.5878, |
| "step": 221300 |
| }, |
| { |
| "epoch": 71.55785391079509, |
| "grad_norm": 1.6590235233306885, |
| "learning_rate": 0.001, |
| "loss": 0.5892, |
| "step": 221400 |
| }, |
| { |
| "epoch": 71.590174531351, |
| "grad_norm": 1.3054158687591553, |
| "learning_rate": 0.001, |
| "loss": 0.5834, |
| "step": 221500 |
| }, |
| { |
| "epoch": 71.62249515190692, |
| "grad_norm": 1.5784341096878052, |
| "learning_rate": 0.001, |
| "loss": 0.5712, |
| "step": 221600 |
| }, |
| { |
| "epoch": 71.65481577246283, |
| "grad_norm": 1.4556595087051392, |
| "learning_rate": 0.001, |
| "loss": 0.5901, |
| "step": 221700 |
| }, |
| { |
| "epoch": 71.68713639301875, |
| "grad_norm": 1.4017647504806519, |
| "learning_rate": 0.001, |
| "loss": 0.5984, |
| "step": 221800 |
| }, |
| { |
| "epoch": 71.71945701357465, |
| "grad_norm": 1.2876161336898804, |
| "learning_rate": 0.001, |
| "loss": 0.5852, |
| "step": 221900 |
| }, |
| { |
| "epoch": 71.75177763413058, |
| "grad_norm": 1.0554882287979126, |
| "learning_rate": 0.001, |
| "loss": 0.6103, |
| "step": 222000 |
| }, |
| { |
| "epoch": 71.78409825468648, |
| "grad_norm": 1.4979000091552734, |
| "learning_rate": 0.001, |
| "loss": 0.6002, |
| "step": 222100 |
| }, |
| { |
| "epoch": 71.8164188752424, |
| "grad_norm": 1.4148439168930054, |
| "learning_rate": 0.001, |
| "loss": 0.5982, |
| "step": 222200 |
| }, |
| { |
| "epoch": 71.84873949579831, |
| "grad_norm": 1.569904088973999, |
| "learning_rate": 0.001, |
| "loss": 0.6052, |
| "step": 222300 |
| }, |
| { |
| "epoch": 71.88106011635423, |
| "grad_norm": 1.2948265075683594, |
| "learning_rate": 0.001, |
| "loss": 0.6032, |
| "step": 222400 |
| }, |
| { |
| "epoch": 71.91338073691014, |
| "grad_norm": 1.949517011642456, |
| "learning_rate": 0.001, |
| "loss": 0.5991, |
| "step": 222500 |
| }, |
| { |
| "epoch": 71.94570135746606, |
| "grad_norm": 1.2780336141586304, |
| "learning_rate": 0.001, |
| "loss": 0.6034, |
| "step": 222600 |
| }, |
| { |
| "epoch": 71.97802197802197, |
| "grad_norm": 2.284240245819092, |
| "learning_rate": 0.001, |
| "loss": 0.6101, |
| "step": 222700 |
| }, |
| { |
| "epoch": 72.01034259857789, |
| "grad_norm": 1.7869925498962402, |
| "learning_rate": 0.001, |
| "loss": 0.5696, |
| "step": 222800 |
| }, |
| { |
| "epoch": 72.04266321913381, |
| "grad_norm": 1.3875548839569092, |
| "learning_rate": 0.001, |
| "loss": 0.545, |
| "step": 222900 |
| }, |
| { |
| "epoch": 72.07498383968972, |
| "grad_norm": 1.3865928649902344, |
| "learning_rate": 0.001, |
| "loss": 0.5482, |
| "step": 223000 |
| }, |
| { |
| "epoch": 72.10730446024564, |
| "grad_norm": 1.2828845977783203, |
| "learning_rate": 0.001, |
| "loss": 0.552, |
| "step": 223100 |
| }, |
| { |
| "epoch": 72.13962508080155, |
| "grad_norm": 1.516929030418396, |
| "learning_rate": 0.001, |
| "loss": 0.5515, |
| "step": 223200 |
| }, |
| { |
| "epoch": 72.17194570135747, |
| "grad_norm": 1.5746568441390991, |
| "learning_rate": 0.001, |
| "loss": 0.5452, |
| "step": 223300 |
| }, |
| { |
| "epoch": 72.20426632191338, |
| "grad_norm": 1.2963000535964966, |
| "learning_rate": 0.001, |
| "loss": 0.5619, |
| "step": 223400 |
| }, |
| { |
| "epoch": 72.2365869424693, |
| "grad_norm": 1.3902140855789185, |
| "learning_rate": 0.001, |
| "loss": 0.5516, |
| "step": 223500 |
| }, |
| { |
| "epoch": 72.26890756302521, |
| "grad_norm": 1.8248586654663086, |
| "learning_rate": 0.001, |
| "loss": 0.5513, |
| "step": 223600 |
| }, |
| { |
| "epoch": 72.30122818358113, |
| "grad_norm": 1.8046656847000122, |
| "learning_rate": 0.001, |
| "loss": 0.569, |
| "step": 223700 |
| }, |
| { |
| "epoch": 72.33354880413704, |
| "grad_norm": 1.6079381704330444, |
| "learning_rate": 0.001, |
| "loss": 0.5721, |
| "step": 223800 |
| }, |
| { |
| "epoch": 72.36586942469296, |
| "grad_norm": 1.4093081951141357, |
| "learning_rate": 0.001, |
| "loss": 0.5692, |
| "step": 223900 |
| }, |
| { |
| "epoch": 72.39819004524887, |
| "grad_norm": 1.3620543479919434, |
| "learning_rate": 0.001, |
| "loss": 0.5683, |
| "step": 224000 |
| }, |
| { |
| "epoch": 72.43051066580479, |
| "grad_norm": 1.451627254486084, |
| "learning_rate": 0.001, |
| "loss": 0.565, |
| "step": 224100 |
| }, |
| { |
| "epoch": 72.4628312863607, |
| "grad_norm": 1.3643581867218018, |
| "learning_rate": 0.001, |
| "loss": 0.5579, |
| "step": 224200 |
| }, |
| { |
| "epoch": 72.49515190691662, |
| "grad_norm": 1.289871096611023, |
| "learning_rate": 0.001, |
| "loss": 0.5703, |
| "step": 224300 |
| }, |
| { |
| "epoch": 72.52747252747253, |
| "grad_norm": 1.699236512184143, |
| "learning_rate": 0.001, |
| "loss": 0.5651, |
| "step": 224400 |
| }, |
| { |
| "epoch": 72.55979314802845, |
| "grad_norm": 1.1216737031936646, |
| "learning_rate": 0.001, |
| "loss": 0.5717, |
| "step": 224500 |
| }, |
| { |
| "epoch": 72.59211376858435, |
| "grad_norm": 1.4215786457061768, |
| "learning_rate": 0.001, |
| "loss": 0.5855, |
| "step": 224600 |
| }, |
| { |
| "epoch": 72.62443438914028, |
| "grad_norm": 1.5842723846435547, |
| "learning_rate": 0.001, |
| "loss": 0.5796, |
| "step": 224700 |
| }, |
| { |
| "epoch": 72.65675500969618, |
| "grad_norm": 1.7459555864334106, |
| "learning_rate": 0.001, |
| "loss": 0.5807, |
| "step": 224800 |
| }, |
| { |
| "epoch": 72.6890756302521, |
| "grad_norm": 1.1991825103759766, |
| "learning_rate": 0.001, |
| "loss": 0.5856, |
| "step": 224900 |
| }, |
| { |
| "epoch": 72.72139625080801, |
| "grad_norm": 1.3949995040893555, |
| "learning_rate": 0.001, |
| "loss": 0.5847, |
| "step": 225000 |
| }, |
| { |
| "epoch": 72.75371687136393, |
| "grad_norm": 1.186496376991272, |
| "learning_rate": 0.001, |
| "loss": 0.5825, |
| "step": 225100 |
| }, |
| { |
| "epoch": 72.78603749191984, |
| "grad_norm": 1.3610190153121948, |
| "learning_rate": 0.001, |
| "loss": 0.5915, |
| "step": 225200 |
| }, |
| { |
| "epoch": 72.81835811247576, |
| "grad_norm": 1.4443519115447998, |
| "learning_rate": 0.001, |
| "loss": 0.5822, |
| "step": 225300 |
| }, |
| { |
| "epoch": 72.85067873303167, |
| "grad_norm": 1.031691074371338, |
| "learning_rate": 0.001, |
| "loss": 0.5897, |
| "step": 225400 |
| }, |
| { |
| "epoch": 72.88299935358759, |
| "grad_norm": 1.4198780059814453, |
| "learning_rate": 0.001, |
| "loss": 0.6026, |
| "step": 225500 |
| }, |
| { |
| "epoch": 72.9153199741435, |
| "grad_norm": 1.3856195211410522, |
| "learning_rate": 0.001, |
| "loss": 0.58, |
| "step": 225600 |
| }, |
| { |
| "epoch": 72.94764059469942, |
| "grad_norm": 1.4383305311203003, |
| "learning_rate": 0.001, |
| "loss": 0.5963, |
| "step": 225700 |
| }, |
| { |
| "epoch": 72.97996121525533, |
| "grad_norm": 1.4304252862930298, |
| "learning_rate": 0.001, |
| "loss": 0.6073, |
| "step": 225800 |
| }, |
| { |
| "epoch": 73.01228183581125, |
| "grad_norm": 1.6289730072021484, |
| "learning_rate": 0.001, |
| "loss": 0.5603, |
| "step": 225900 |
| }, |
| { |
| "epoch": 73.04460245636716, |
| "grad_norm": 1.623422622680664, |
| "learning_rate": 0.001, |
| "loss": 0.5222, |
| "step": 226000 |
| }, |
| { |
| "epoch": 73.07692307692308, |
| "grad_norm": 1.2384452819824219, |
| "learning_rate": 0.001, |
| "loss": 0.5311, |
| "step": 226100 |
| }, |
| { |
| "epoch": 73.10924369747899, |
| "grad_norm": 1.1145490407943726, |
| "learning_rate": 0.001, |
| "loss": 0.5302, |
| "step": 226200 |
| }, |
| { |
| "epoch": 73.14156431803491, |
| "grad_norm": 1.1442946195602417, |
| "learning_rate": 0.001, |
| "loss": 0.5457, |
| "step": 226300 |
| }, |
| { |
| "epoch": 73.17388493859082, |
| "grad_norm": 1.4592894315719604, |
| "learning_rate": 0.001, |
| "loss": 0.5369, |
| "step": 226400 |
| }, |
| { |
| "epoch": 73.20620555914674, |
| "grad_norm": 0.9766262173652649, |
| "learning_rate": 0.001, |
| "loss": 0.5423, |
| "step": 226500 |
| }, |
| { |
| "epoch": 73.23852617970265, |
| "grad_norm": 1.052048683166504, |
| "learning_rate": 0.001, |
| "loss": 0.5464, |
| "step": 226600 |
| }, |
| { |
| "epoch": 73.27084680025857, |
| "grad_norm": 1.2609376907348633, |
| "learning_rate": 0.001, |
| "loss": 0.5612, |
| "step": 226700 |
| }, |
| { |
| "epoch": 73.30316742081448, |
| "grad_norm": 1.1084191799163818, |
| "learning_rate": 0.001, |
| "loss": 0.5519, |
| "step": 226800 |
| }, |
| { |
| "epoch": 73.3354880413704, |
| "grad_norm": 1.3045639991760254, |
| "learning_rate": 0.001, |
| "loss": 0.5564, |
| "step": 226900 |
| }, |
| { |
| "epoch": 73.3678086619263, |
| "grad_norm": 1.4363510608673096, |
| "learning_rate": 0.001, |
| "loss": 0.5715, |
| "step": 227000 |
| }, |
| { |
| "epoch": 73.40012928248223, |
| "grad_norm": 1.3683849573135376, |
| "learning_rate": 0.001, |
| "loss": 0.5619, |
| "step": 227100 |
| }, |
| { |
| "epoch": 73.43244990303813, |
| "grad_norm": 1.4905529022216797, |
| "learning_rate": 0.001, |
| "loss": 0.5613, |
| "step": 227200 |
| }, |
| { |
| "epoch": 73.46477052359405, |
| "grad_norm": 1.3460173606872559, |
| "learning_rate": 0.001, |
| "loss": 0.5746, |
| "step": 227300 |
| }, |
| { |
| "epoch": 73.49709114414996, |
| "grad_norm": 1.5590981245040894, |
| "learning_rate": 0.001, |
| "loss": 0.5597, |
| "step": 227400 |
| }, |
| { |
| "epoch": 73.52941176470588, |
| "grad_norm": 1.2848957777023315, |
| "learning_rate": 0.001, |
| "loss": 0.5638, |
| "step": 227500 |
| }, |
| { |
| "epoch": 73.56173238526179, |
| "grad_norm": 1.3358205556869507, |
| "learning_rate": 0.001, |
| "loss": 0.5602, |
| "step": 227600 |
| }, |
| { |
| "epoch": 73.59405300581771, |
| "grad_norm": 1.6583843231201172, |
| "learning_rate": 0.001, |
| "loss": 0.5654, |
| "step": 227700 |
| }, |
| { |
| "epoch": 73.62637362637362, |
| "grad_norm": 1.0921430587768555, |
| "learning_rate": 0.001, |
| "loss": 0.5805, |
| "step": 227800 |
| }, |
| { |
| "epoch": 73.65869424692954, |
| "grad_norm": 1.0443511009216309, |
| "learning_rate": 0.001, |
| "loss": 0.5708, |
| "step": 227900 |
| }, |
| { |
| "epoch": 73.69101486748545, |
| "grad_norm": 1.2265650033950806, |
| "learning_rate": 0.001, |
| "loss": 0.5858, |
| "step": 228000 |
| }, |
| { |
| "epoch": 73.72333548804137, |
| "grad_norm": 1.4629805088043213, |
| "learning_rate": 0.001, |
| "loss": 0.5847, |
| "step": 228100 |
| }, |
| { |
| "epoch": 73.75565610859728, |
| "grad_norm": 1.3702300786972046, |
| "learning_rate": 0.001, |
| "loss": 0.5832, |
| "step": 228200 |
| }, |
| { |
| "epoch": 73.7879767291532, |
| "grad_norm": 1.6281440258026123, |
| "learning_rate": 0.001, |
| "loss": 0.5861, |
| "step": 228300 |
| }, |
| { |
| "epoch": 73.82029734970911, |
| "grad_norm": 1.4549918174743652, |
| "learning_rate": 0.001, |
| "loss": 0.5823, |
| "step": 228400 |
| }, |
| { |
| "epoch": 73.85261797026503, |
| "grad_norm": 1.3140660524368286, |
| "learning_rate": 0.001, |
| "loss": 0.5759, |
| "step": 228500 |
| }, |
| { |
| "epoch": 73.88493859082094, |
| "grad_norm": 1.5533117055892944, |
| "learning_rate": 0.001, |
| "loss": 0.586, |
| "step": 228600 |
| }, |
| { |
| "epoch": 73.91725921137686, |
| "grad_norm": 1.5729376077651978, |
| "learning_rate": 0.001, |
| "loss": 0.5965, |
| "step": 228700 |
| }, |
| { |
| "epoch": 73.94957983193277, |
| "grad_norm": 1.4772778749465942, |
| "learning_rate": 0.001, |
| "loss": 0.5939, |
| "step": 228800 |
| }, |
| { |
| "epoch": 73.98190045248869, |
| "grad_norm": 1.5759507417678833, |
| "learning_rate": 0.001, |
| "loss": 0.5919, |
| "step": 228900 |
| }, |
| { |
| "epoch": 74.01422107304461, |
| "grad_norm": 0.9670353531837463, |
| "learning_rate": 0.001, |
| "loss": 0.5414, |
| "step": 229000 |
| }, |
| { |
| "epoch": 74.04654169360052, |
| "grad_norm": 1.0269129276275635, |
| "learning_rate": 0.001, |
| "loss": 0.5299, |
| "step": 229100 |
| }, |
| { |
| "epoch": 74.07886231415644, |
| "grad_norm": 1.191085696220398, |
| "learning_rate": 0.001, |
| "loss": 0.5286, |
| "step": 229200 |
| }, |
| { |
| "epoch": 74.11118293471235, |
| "grad_norm": 1.1801611185073853, |
| "learning_rate": 0.001, |
| "loss": 0.549, |
| "step": 229300 |
| }, |
| { |
| "epoch": 74.14350355526827, |
| "grad_norm": 0.983653724193573, |
| "learning_rate": 0.001, |
| "loss": 0.5277, |
| "step": 229400 |
| }, |
| { |
| "epoch": 74.17582417582418, |
| "grad_norm": 1.0740876197814941, |
| "learning_rate": 0.001, |
| "loss": 0.5339, |
| "step": 229500 |
| }, |
| { |
| "epoch": 74.2081447963801, |
| "grad_norm": 1.0492289066314697, |
| "learning_rate": 0.001, |
| "loss": 0.5553, |
| "step": 229600 |
| }, |
| { |
| "epoch": 74.240465416936, |
| "grad_norm": 1.4024004936218262, |
| "learning_rate": 0.001, |
| "loss": 0.5512, |
| "step": 229700 |
| }, |
| { |
| "epoch": 74.27278603749193, |
| "grad_norm": 0.8431639671325684, |
| "learning_rate": 0.001, |
| "loss": 0.538, |
| "step": 229800 |
| }, |
| { |
| "epoch": 74.30510665804783, |
| "grad_norm": 0.8813731074333191, |
| "learning_rate": 0.001, |
| "loss": 0.5517, |
| "step": 229900 |
| }, |
| { |
| "epoch": 74.33742727860376, |
| "grad_norm": 1.1411322355270386, |
| "learning_rate": 0.001, |
| "loss": 0.5547, |
| "step": 230000 |
| }, |
| { |
| "epoch": 74.36974789915966, |
| "grad_norm": 1.0830614566802979, |
| "learning_rate": 0.001, |
| "loss": 0.5547, |
| "step": 230100 |
| }, |
| { |
| "epoch": 74.40206851971558, |
| "grad_norm": 1.2333221435546875, |
| "learning_rate": 0.001, |
| "loss": 0.5565, |
| "step": 230200 |
| }, |
| { |
| "epoch": 74.43438914027149, |
| "grad_norm": 1.199706792831421, |
| "learning_rate": 0.001, |
| "loss": 0.5625, |
| "step": 230300 |
| }, |
| { |
| "epoch": 74.46670976082741, |
| "grad_norm": 1.2191143035888672, |
| "learning_rate": 0.001, |
| "loss": 0.5647, |
| "step": 230400 |
| }, |
| { |
| "epoch": 74.49903038138332, |
| "grad_norm": 0.8158851861953735, |
| "learning_rate": 0.001, |
| "loss": 0.5592, |
| "step": 230500 |
| }, |
| { |
| "epoch": 74.53135100193924, |
| "grad_norm": 0.9935479760169983, |
| "learning_rate": 0.001, |
| "loss": 0.558, |
| "step": 230600 |
| }, |
| { |
| "epoch": 74.56367162249515, |
| "grad_norm": 1.1733251810073853, |
| "learning_rate": 0.001, |
| "loss": 0.5539, |
| "step": 230700 |
| }, |
| { |
| "epoch": 74.59599224305107, |
| "grad_norm": 1.2278839349746704, |
| "learning_rate": 0.001, |
| "loss": 0.5626, |
| "step": 230800 |
| }, |
| { |
| "epoch": 74.62831286360698, |
| "grad_norm": 3.4744834899902344, |
| "learning_rate": 0.001, |
| "loss": 0.5678, |
| "step": 230900 |
| }, |
| { |
| "epoch": 74.6606334841629, |
| "grad_norm": 0.8104879856109619, |
| "learning_rate": 0.001, |
| "loss": 0.5721, |
| "step": 231000 |
| }, |
| { |
| "epoch": 74.69295410471881, |
| "grad_norm": 0.9081707000732422, |
| "learning_rate": 0.001, |
| "loss": 0.571, |
| "step": 231100 |
| }, |
| { |
| "epoch": 74.72527472527473, |
| "grad_norm": 1.0820246934890747, |
| "learning_rate": 0.001, |
| "loss": 0.5718, |
| "step": 231200 |
| }, |
| { |
| "epoch": 74.75759534583064, |
| "grad_norm": 1.142275333404541, |
| "learning_rate": 0.001, |
| "loss": 0.5665, |
| "step": 231300 |
| }, |
| { |
| "epoch": 74.78991596638656, |
| "grad_norm": 0.747945249080658, |
| "learning_rate": 0.001, |
| "loss": 0.5796, |
| "step": 231400 |
| }, |
| { |
| "epoch": 74.82223658694247, |
| "grad_norm": 1.0675921440124512, |
| "learning_rate": 0.001, |
| "loss": 0.5735, |
| "step": 231500 |
| }, |
| { |
| "epoch": 74.85455720749839, |
| "grad_norm": 1.234141230583191, |
| "learning_rate": 0.001, |
| "loss": 0.5672, |
| "step": 231600 |
| }, |
| { |
| "epoch": 74.8868778280543, |
| "grad_norm": 1.0947504043579102, |
| "learning_rate": 0.001, |
| "loss": 0.5764, |
| "step": 231700 |
| }, |
| { |
| "epoch": 74.91919844861022, |
| "grad_norm": 1.1369717121124268, |
| "learning_rate": 0.001, |
| "loss": 0.5814, |
| "step": 231800 |
| }, |
| { |
| "epoch": 74.95151906916612, |
| "grad_norm": 1.0353525876998901, |
| "learning_rate": 0.001, |
| "loss": 0.5731, |
| "step": 231900 |
| }, |
| { |
| "epoch": 74.98383968972205, |
| "grad_norm": 1.0561206340789795, |
| "learning_rate": 0.001, |
| "loss": 0.5865, |
| "step": 232000 |
| }, |
| { |
| "epoch": 75.01616031027795, |
| "grad_norm": 1.0930769443511963, |
| "learning_rate": 0.001, |
| "loss": 0.5193, |
| "step": 232100 |
| }, |
| { |
| "epoch": 75.04848093083388, |
| "grad_norm": 0.1771896481513977, |
| "learning_rate": 0.001, |
| "loss": 0.5157, |
| "step": 232200 |
| }, |
| { |
| "epoch": 75.08080155138978, |
| "grad_norm": 1.141357660293579, |
| "learning_rate": 0.001, |
| "loss": 0.5361, |
| "step": 232300 |
| }, |
| { |
| "epoch": 75.1131221719457, |
| "grad_norm": 0.5655999779701233, |
| "learning_rate": 0.001, |
| "loss": 0.5407, |
| "step": 232400 |
| }, |
| { |
| "epoch": 75.14544279250161, |
| "grad_norm": 0.7469679713249207, |
| "learning_rate": 0.001, |
| "loss": 0.5313, |
| "step": 232500 |
| }, |
| { |
| "epoch": 75.17776341305753, |
| "grad_norm": 0.29743582010269165, |
| "learning_rate": 0.001, |
| "loss": 0.5311, |
| "step": 232600 |
| }, |
| { |
| "epoch": 75.21008403361344, |
| "grad_norm": 0.19023145735263824, |
| "learning_rate": 0.001, |
| "loss": 0.5385, |
| "step": 232700 |
| }, |
| { |
| "epoch": 75.24240465416936, |
| "grad_norm": 0.530585765838623, |
| "learning_rate": 0.001, |
| "loss": 0.5263, |
| "step": 232800 |
| }, |
| { |
| "epoch": 75.27472527472527, |
| "grad_norm": 0.21099801361560822, |
| "learning_rate": 0.001, |
| "loss": 0.5281, |
| "step": 232900 |
| }, |
| { |
| "epoch": 75.30704589528119, |
| "grad_norm": 0.460519015789032, |
| "learning_rate": 0.001, |
| "loss": 0.5299, |
| "step": 233000 |
| }, |
| { |
| "epoch": 75.3393665158371, |
| "grad_norm": 0.435160756111145, |
| "learning_rate": 0.001, |
| "loss": 0.5394, |
| "step": 233100 |
| }, |
| { |
| "epoch": 75.37168713639302, |
| "grad_norm": 0.1967736929655075, |
| "learning_rate": 0.001, |
| "loss": 0.5525, |
| "step": 233200 |
| }, |
| { |
| "epoch": 75.40400775694893, |
| "grad_norm": 1.248533844947815, |
| "learning_rate": 0.001, |
| "loss": 0.5491, |
| "step": 233300 |
| }, |
| { |
| "epoch": 75.43632837750485, |
| "grad_norm": 0.20162171125411987, |
| "learning_rate": 0.001, |
| "loss": 0.5436, |
| "step": 233400 |
| }, |
| { |
| "epoch": 75.46864899806076, |
| "grad_norm": 0.5278852581977844, |
| "learning_rate": 0.001, |
| "loss": 0.5521, |
| "step": 233500 |
| }, |
| { |
| "epoch": 75.50096961861668, |
| "grad_norm": 0.3178282380104065, |
| "learning_rate": 0.001, |
| "loss": 0.5498, |
| "step": 233600 |
| }, |
| { |
| "epoch": 75.53329023917259, |
| "grad_norm": 0.2286689579486847, |
| "learning_rate": 0.001, |
| "loss": 0.5596, |
| "step": 233700 |
| }, |
| { |
| "epoch": 75.56561085972851, |
| "grad_norm": 0.18539604544639587, |
| "learning_rate": 0.001, |
| "loss": 0.5556, |
| "step": 233800 |
| }, |
| { |
| "epoch": 75.59793148028442, |
| "grad_norm": 0.07810671627521515, |
| "learning_rate": 0.001, |
| "loss": 0.5644, |
| "step": 233900 |
| }, |
| { |
| "epoch": 75.63025210084034, |
| "grad_norm": 0.5242074131965637, |
| "learning_rate": 0.001, |
| "loss": 0.5799, |
| "step": 234000 |
| }, |
| { |
| "epoch": 75.66257272139624, |
| "grad_norm": 0.18300612270832062, |
| "learning_rate": 0.001, |
| "loss": 0.5625, |
| "step": 234100 |
| }, |
| { |
| "epoch": 75.69489334195217, |
| "grad_norm": 0.3278989791870117, |
| "learning_rate": 0.001, |
| "loss": 0.5732, |
| "step": 234200 |
| }, |
| { |
| "epoch": 75.72721396250807, |
| "grad_norm": 0.913730800151825, |
| "learning_rate": 0.001, |
| "loss": 0.5817, |
| "step": 234300 |
| }, |
| { |
| "epoch": 75.759534583064, |
| "grad_norm": 0.3457682132720947, |
| "learning_rate": 0.001, |
| "loss": 0.5654, |
| "step": 234400 |
| }, |
| { |
| "epoch": 75.7918552036199, |
| "grad_norm": 0.430754154920578, |
| "learning_rate": 0.001, |
| "loss": 0.5754, |
| "step": 234500 |
| }, |
| { |
| "epoch": 75.82417582417582, |
| "grad_norm": 0.433608740568161, |
| "learning_rate": 0.001, |
| "loss": 0.5754, |
| "step": 234600 |
| }, |
| { |
| "epoch": 75.85649644473173, |
| "grad_norm": 0.19627642631530762, |
| "learning_rate": 0.001, |
| "loss": 0.5867, |
| "step": 234700 |
| }, |
| { |
| "epoch": 75.88881706528765, |
| "grad_norm": 0.11609924584627151, |
| "learning_rate": 0.001, |
| "loss": 0.5757, |
| "step": 234800 |
| }, |
| { |
| "epoch": 75.92113768584356, |
| "grad_norm": 0.7167121171951294, |
| "learning_rate": 0.001, |
| "loss": 0.5651, |
| "step": 234900 |
| }, |
| { |
| "epoch": 75.95345830639948, |
| "grad_norm": 0.4575307369232178, |
| "learning_rate": 0.001, |
| "loss": 0.5579, |
| "step": 235000 |
| }, |
| { |
| "epoch": 75.98577892695539, |
| "grad_norm": 0.15593650937080383, |
| "learning_rate": 0.001, |
| "loss": 0.5828, |
| "step": 235100 |
| }, |
| { |
| "epoch": 76.01809954751131, |
| "grad_norm": 1.413036584854126, |
| "learning_rate": 0.001, |
| "loss": 0.5612, |
| "step": 235200 |
| }, |
| { |
| "epoch": 76.05042016806723, |
| "grad_norm": 1.6599562168121338, |
| "learning_rate": 0.001, |
| "loss": 0.5069, |
| "step": 235300 |
| }, |
| { |
| "epoch": 76.08274078862314, |
| "grad_norm": 1.707543969154358, |
| "learning_rate": 0.001, |
| "loss": 0.5186, |
| "step": 235400 |
| }, |
| { |
| "epoch": 76.11506140917906, |
| "grad_norm": 1.6075223684310913, |
| "learning_rate": 0.001, |
| "loss": 0.5235, |
| "step": 235500 |
| }, |
| { |
| "epoch": 76.14738202973497, |
| "grad_norm": 1.6342514753341675, |
| "learning_rate": 0.001, |
| "loss": 0.5195, |
| "step": 235600 |
| }, |
| { |
| "epoch": 76.17970265029089, |
| "grad_norm": 2.0759618282318115, |
| "learning_rate": 0.001, |
| "loss": 0.5269, |
| "step": 235700 |
| }, |
| { |
| "epoch": 76.2120232708468, |
| "grad_norm": 1.9232611656188965, |
| "learning_rate": 0.001, |
| "loss": 0.5439, |
| "step": 235800 |
| }, |
| { |
| "epoch": 76.24434389140272, |
| "grad_norm": 1.413246989250183, |
| "learning_rate": 0.001, |
| "loss": 0.5142, |
| "step": 235900 |
| }, |
| { |
| "epoch": 76.27666451195863, |
| "grad_norm": 1.647636890411377, |
| "learning_rate": 0.001, |
| "loss": 0.5399, |
| "step": 236000 |
| }, |
| { |
| "epoch": 76.30898513251455, |
| "grad_norm": 1.4785175323486328, |
| "learning_rate": 0.001, |
| "loss": 0.5389, |
| "step": 236100 |
| }, |
| { |
| "epoch": 76.34130575307046, |
| "grad_norm": 1.6953210830688477, |
| "learning_rate": 0.001, |
| "loss": 0.5401, |
| "step": 236200 |
| }, |
| { |
| "epoch": 76.37362637362638, |
| "grad_norm": 1.4326900243759155, |
| "learning_rate": 0.001, |
| "loss": 0.5289, |
| "step": 236300 |
| }, |
| { |
| "epoch": 76.40594699418229, |
| "grad_norm": 1.54912531375885, |
| "learning_rate": 0.001, |
| "loss": 0.5453, |
| "step": 236400 |
| }, |
| { |
| "epoch": 76.43826761473821, |
| "grad_norm": 1.8962860107421875, |
| "learning_rate": 0.001, |
| "loss": 0.5377, |
| "step": 236500 |
| }, |
| { |
| "epoch": 76.47058823529412, |
| "grad_norm": 1.2893810272216797, |
| "learning_rate": 0.001, |
| "loss": 0.5392, |
| "step": 236600 |
| }, |
| { |
| "epoch": 76.50290885585004, |
| "grad_norm": 1.5049962997436523, |
| "learning_rate": 0.001, |
| "loss": 0.5516, |
| "step": 236700 |
| }, |
| { |
| "epoch": 76.53522947640595, |
| "grad_norm": 1.5820329189300537, |
| "learning_rate": 0.001, |
| "loss": 0.5614, |
| "step": 236800 |
| }, |
| { |
| "epoch": 76.56755009696187, |
| "grad_norm": 1.3666566610336304, |
| "learning_rate": 0.001, |
| "loss": 0.5431, |
| "step": 236900 |
| }, |
| { |
| "epoch": 76.59987071751777, |
| "grad_norm": 1.2955440282821655, |
| "learning_rate": 0.001, |
| "loss": 0.5587, |
| "step": 237000 |
| }, |
| { |
| "epoch": 76.6321913380737, |
| "grad_norm": 1.5790258646011353, |
| "learning_rate": 0.001, |
| "loss": 0.5448, |
| "step": 237100 |
| }, |
| { |
| "epoch": 76.6645119586296, |
| "grad_norm": 1.5983870029449463, |
| "learning_rate": 0.001, |
| "loss": 0.5588, |
| "step": 237200 |
| }, |
| { |
| "epoch": 76.69683257918552, |
| "grad_norm": 1.6741951704025269, |
| "learning_rate": 0.001, |
| "loss": 0.5467, |
| "step": 237300 |
| }, |
| { |
| "epoch": 76.72915319974143, |
| "grad_norm": 1.8480662107467651, |
| "learning_rate": 0.001, |
| "loss": 0.5635, |
| "step": 237400 |
| }, |
| { |
| "epoch": 76.76147382029735, |
| "grad_norm": 1.6002708673477173, |
| "learning_rate": 0.001, |
| "loss": 0.5654, |
| "step": 237500 |
| }, |
| { |
| "epoch": 76.79379444085326, |
| "grad_norm": 1.3160319328308105, |
| "learning_rate": 0.001, |
| "loss": 0.561, |
| "step": 237600 |
| }, |
| { |
| "epoch": 76.82611506140918, |
| "grad_norm": 1.7721960544586182, |
| "learning_rate": 0.001, |
| "loss": 0.5754, |
| "step": 237700 |
| }, |
| { |
| "epoch": 76.85843568196509, |
| "grad_norm": 1.5011405944824219, |
| "learning_rate": 0.001, |
| "loss": 0.5592, |
| "step": 237800 |
| }, |
| { |
| "epoch": 76.89075630252101, |
| "grad_norm": 1.771674633026123, |
| "learning_rate": 0.001, |
| "loss": 0.5749, |
| "step": 237900 |
| }, |
| { |
| "epoch": 76.92307692307692, |
| "grad_norm": 1.3502850532531738, |
| "learning_rate": 0.001, |
| "loss": 0.5739, |
| "step": 238000 |
| }, |
| { |
| "epoch": 76.95539754363284, |
| "grad_norm": 1.7754833698272705, |
| "learning_rate": 0.001, |
| "loss": 0.5752, |
| "step": 238100 |
| }, |
| { |
| "epoch": 76.98771816418875, |
| "grad_norm": 1.694458246231079, |
| "learning_rate": 0.001, |
| "loss": 0.5826, |
| "step": 238200 |
| }, |
| { |
| "epoch": 77.02003878474467, |
| "grad_norm": 1.396079421043396, |
| "learning_rate": 0.001, |
| "loss": 0.5537, |
| "step": 238300 |
| }, |
| { |
| "epoch": 77.05235940530058, |
| "grad_norm": 1.532308578491211, |
| "learning_rate": 0.001, |
| "loss": 0.5143, |
| "step": 238400 |
| }, |
| { |
| "epoch": 77.0846800258565, |
| "grad_norm": 1.5896692276000977, |
| "learning_rate": 0.001, |
| "loss": 0.5114, |
| "step": 238500 |
| }, |
| { |
| "epoch": 77.11700064641241, |
| "grad_norm": 1.4605082273483276, |
| "learning_rate": 0.001, |
| "loss": 0.512, |
| "step": 238600 |
| }, |
| { |
| "epoch": 77.14932126696833, |
| "grad_norm": 1.233646035194397, |
| "learning_rate": 0.001, |
| "loss": 0.515, |
| "step": 238700 |
| }, |
| { |
| "epoch": 77.18164188752424, |
| "grad_norm": 1.7335985898971558, |
| "learning_rate": 0.001, |
| "loss": 0.5205, |
| "step": 238800 |
| }, |
| { |
| "epoch": 77.21396250808016, |
| "grad_norm": 1.4268879890441895, |
| "learning_rate": 0.001, |
| "loss": 0.5347, |
| "step": 238900 |
| }, |
| { |
| "epoch": 77.24628312863607, |
| "grad_norm": 1.8350880146026611, |
| "learning_rate": 0.001, |
| "loss": 0.5227, |
| "step": 239000 |
| }, |
| { |
| "epoch": 77.27860374919199, |
| "grad_norm": 1.3111212253570557, |
| "learning_rate": 0.001, |
| "loss": 0.512, |
| "step": 239100 |
| }, |
| { |
| "epoch": 77.3109243697479, |
| "grad_norm": 1.2222663164138794, |
| "learning_rate": 0.001, |
| "loss": 0.5264, |
| "step": 239200 |
| }, |
| { |
| "epoch": 77.34324499030382, |
| "grad_norm": 1.6778181791305542, |
| "learning_rate": 0.001, |
| "loss": 0.525, |
| "step": 239300 |
| }, |
| { |
| "epoch": 77.37556561085972, |
| "grad_norm": 1.714513897895813, |
| "learning_rate": 0.001, |
| "loss": 0.53, |
| "step": 239400 |
| }, |
| { |
| "epoch": 77.40788623141565, |
| "grad_norm": 1.4410483837127686, |
| "learning_rate": 0.001, |
| "loss": 0.5352, |
| "step": 239500 |
| }, |
| { |
| "epoch": 77.44020685197155, |
| "grad_norm": 1.4495421648025513, |
| "learning_rate": 0.001, |
| "loss": 0.5429, |
| "step": 239600 |
| }, |
| { |
| "epoch": 77.47252747252747, |
| "grad_norm": 1.4672760963439941, |
| "learning_rate": 0.001, |
| "loss": 0.5369, |
| "step": 239700 |
| }, |
| { |
| "epoch": 77.50484809308338, |
| "grad_norm": 1.186777114868164, |
| "learning_rate": 0.001, |
| "loss": 0.5525, |
| "step": 239800 |
| }, |
| { |
| "epoch": 77.5371687136393, |
| "grad_norm": 1.2865841388702393, |
| "learning_rate": 0.001, |
| "loss": 0.5406, |
| "step": 239900 |
| }, |
| { |
| "epoch": 77.56948933419521, |
| "grad_norm": 1.964085340499878, |
| "learning_rate": 0.001, |
| "loss": 0.5471, |
| "step": 240000 |
| }, |
| { |
| "epoch": 77.60180995475113, |
| "grad_norm": 1.2692760229110718, |
| "learning_rate": 0.001, |
| "loss": 0.5511, |
| "step": 240100 |
| }, |
| { |
| "epoch": 77.63413057530704, |
| "grad_norm": 1.427521824836731, |
| "learning_rate": 0.001, |
| "loss": 0.5463, |
| "step": 240200 |
| }, |
| { |
| "epoch": 77.66645119586296, |
| "grad_norm": 1.5266354084014893, |
| "learning_rate": 0.001, |
| "loss": 0.5682, |
| "step": 240300 |
| }, |
| { |
| "epoch": 77.69877181641887, |
| "grad_norm": 1.3747498989105225, |
| "learning_rate": 0.001, |
| "loss": 0.5447, |
| "step": 240400 |
| }, |
| { |
| "epoch": 77.73109243697479, |
| "grad_norm": 1.8401381969451904, |
| "learning_rate": 0.001, |
| "loss": 0.5509, |
| "step": 240500 |
| }, |
| { |
| "epoch": 77.7634130575307, |
| "grad_norm": 1.593461513519287, |
| "learning_rate": 0.001, |
| "loss": 0.569, |
| "step": 240600 |
| }, |
| { |
| "epoch": 77.79573367808662, |
| "grad_norm": 1.2960352897644043, |
| "learning_rate": 0.001, |
| "loss": 0.5557, |
| "step": 240700 |
| }, |
| { |
| "epoch": 77.82805429864253, |
| "grad_norm": 1.2029883861541748, |
| "learning_rate": 0.001, |
| "loss": 0.5646, |
| "step": 240800 |
| }, |
| { |
| "epoch": 77.86037491919845, |
| "grad_norm": 1.672709584236145, |
| "learning_rate": 0.001, |
| "loss": 0.5518, |
| "step": 240900 |
| }, |
| { |
| "epoch": 77.89269553975436, |
| "grad_norm": 1.5223016738891602, |
| "learning_rate": 0.001, |
| "loss": 0.5597, |
| "step": 241000 |
| }, |
| { |
| "epoch": 77.92501616031028, |
| "grad_norm": 1.2413277626037598, |
| "learning_rate": 0.001, |
| "loss": 0.5648, |
| "step": 241100 |
| }, |
| { |
| "epoch": 77.95733678086619, |
| "grad_norm": 1.4993832111358643, |
| "learning_rate": 0.001, |
| "loss": 0.571, |
| "step": 241200 |
| }, |
| { |
| "epoch": 77.98965740142211, |
| "grad_norm": 1.328671932220459, |
| "learning_rate": 0.001, |
| "loss": 0.5648, |
| "step": 241300 |
| }, |
| { |
| "epoch": 78.02197802197803, |
| "grad_norm": 1.6180659532546997, |
| "learning_rate": 0.001, |
| "loss": 0.5366, |
| "step": 241400 |
| }, |
| { |
| "epoch": 78.05429864253394, |
| "grad_norm": 1.5706161260604858, |
| "learning_rate": 0.001, |
| "loss": 0.5075, |
| "step": 241500 |
| }, |
| { |
| "epoch": 78.08661926308986, |
| "grad_norm": 1.5466357469558716, |
| "learning_rate": 0.001, |
| "loss": 0.4995, |
| "step": 241600 |
| }, |
| { |
| "epoch": 78.11893988364577, |
| "grad_norm": 1.4233200550079346, |
| "learning_rate": 0.001, |
| "loss": 0.507, |
| "step": 241700 |
| }, |
| { |
| "epoch": 78.15126050420169, |
| "grad_norm": 3.640650749206543, |
| "learning_rate": 0.001, |
| "loss": 0.5213, |
| "step": 241800 |
| }, |
| { |
| "epoch": 78.1835811247576, |
| "grad_norm": 1.5065131187438965, |
| "learning_rate": 0.001, |
| "loss": 0.5084, |
| "step": 241900 |
| }, |
| { |
| "epoch": 78.21590174531352, |
| "grad_norm": 1.3846442699432373, |
| "learning_rate": 0.001, |
| "loss": 0.5188, |
| "step": 242000 |
| }, |
| { |
| "epoch": 78.24822236586942, |
| "grad_norm": 1.592302918434143, |
| "learning_rate": 0.001, |
| "loss": 0.5206, |
| "step": 242100 |
| }, |
| { |
| "epoch": 78.28054298642535, |
| "grad_norm": 1.4627457857131958, |
| "learning_rate": 0.001, |
| "loss": 0.5306, |
| "step": 242200 |
| }, |
| { |
| "epoch": 78.31286360698125, |
| "grad_norm": 1.4785966873168945, |
| "learning_rate": 0.001, |
| "loss": 0.5259, |
| "step": 242300 |
| }, |
| { |
| "epoch": 78.34518422753717, |
| "grad_norm": 1.6304181814193726, |
| "learning_rate": 0.001, |
| "loss": 0.5293, |
| "step": 242400 |
| }, |
| { |
| "epoch": 78.37750484809308, |
| "grad_norm": 1.4955110549926758, |
| "learning_rate": 0.001, |
| "loss": 0.5267, |
| "step": 242500 |
| }, |
| { |
| "epoch": 78.409825468649, |
| "grad_norm": 1.3782621622085571, |
| "learning_rate": 0.001, |
| "loss": 0.5309, |
| "step": 242600 |
| }, |
| { |
| "epoch": 78.44214608920491, |
| "grad_norm": 1.5828373432159424, |
| "learning_rate": 0.001, |
| "loss": 0.5314, |
| "step": 242700 |
| }, |
| { |
| "epoch": 78.47446670976083, |
| "grad_norm": 2.112483024597168, |
| "learning_rate": 0.001, |
| "loss": 0.5381, |
| "step": 242800 |
| }, |
| { |
| "epoch": 78.50678733031674, |
| "grad_norm": 1.565582036972046, |
| "learning_rate": 0.001, |
| "loss": 0.5339, |
| "step": 242900 |
| }, |
| { |
| "epoch": 78.53910795087266, |
| "grad_norm": 1.747968316078186, |
| "learning_rate": 0.001, |
| "loss": 0.5372, |
| "step": 243000 |
| }, |
| { |
| "epoch": 78.57142857142857, |
| "grad_norm": 1.6491109132766724, |
| "learning_rate": 0.001, |
| "loss": 0.5333, |
| "step": 243100 |
| }, |
| { |
| "epoch": 78.60374919198449, |
| "grad_norm": 1.4201022386550903, |
| "learning_rate": 0.001, |
| "loss": 0.5317, |
| "step": 243200 |
| }, |
| { |
| "epoch": 78.6360698125404, |
| "grad_norm": 1.7670608758926392, |
| "learning_rate": 0.001, |
| "loss": 0.5339, |
| "step": 243300 |
| }, |
| { |
| "epoch": 78.66839043309632, |
| "grad_norm": 1.3140902519226074, |
| "learning_rate": 0.001, |
| "loss": 0.539, |
| "step": 243400 |
| }, |
| { |
| "epoch": 78.70071105365223, |
| "grad_norm": 1.4319005012512207, |
| "learning_rate": 0.001, |
| "loss": 0.5504, |
| "step": 243500 |
| }, |
| { |
| "epoch": 78.73303167420815, |
| "grad_norm": 1.4455088376998901, |
| "learning_rate": 0.001, |
| "loss": 0.5456, |
| "step": 243600 |
| }, |
| { |
| "epoch": 78.76535229476406, |
| "grad_norm": 1.309688687324524, |
| "learning_rate": 0.001, |
| "loss": 0.557, |
| "step": 243700 |
| }, |
| { |
| "epoch": 78.79767291531998, |
| "grad_norm": 1.356952428817749, |
| "learning_rate": 0.001, |
| "loss": 0.5575, |
| "step": 243800 |
| }, |
| { |
| "epoch": 78.82999353587589, |
| "grad_norm": 1.2032933235168457, |
| "learning_rate": 0.001, |
| "loss": 0.5477, |
| "step": 243900 |
| }, |
| { |
| "epoch": 78.86231415643181, |
| "grad_norm": 1.314985990524292, |
| "learning_rate": 0.001, |
| "loss": 0.554, |
| "step": 244000 |
| }, |
| { |
| "epoch": 78.89463477698771, |
| "grad_norm": 1.465287685394287, |
| "learning_rate": 0.001, |
| "loss": 0.5621, |
| "step": 244100 |
| }, |
| { |
| "epoch": 78.92695539754364, |
| "grad_norm": 1.4359557628631592, |
| "learning_rate": 0.001, |
| "loss": 0.5535, |
| "step": 244200 |
| }, |
| { |
| "epoch": 78.95927601809954, |
| "grad_norm": 1.966088056564331, |
| "learning_rate": 0.001, |
| "loss": 0.5639, |
| "step": 244300 |
| }, |
| { |
| "epoch": 78.99159663865547, |
| "grad_norm": 1.3312709331512451, |
| "learning_rate": 0.001, |
| "loss": 0.5657, |
| "step": 244400 |
| }, |
| { |
| "epoch": 79.02391725921137, |
| "grad_norm": 1.2351542711257935, |
| "learning_rate": 0.001, |
| "loss": 0.5102, |
| "step": 244500 |
| }, |
| { |
| "epoch": 79.0562378797673, |
| "grad_norm": 1.573776364326477, |
| "learning_rate": 0.001, |
| "loss": 0.4925, |
| "step": 244600 |
| }, |
| { |
| "epoch": 79.0885585003232, |
| "grad_norm": 1.472129225730896, |
| "learning_rate": 0.001, |
| "loss": 0.4985, |
| "step": 244700 |
| }, |
| { |
| "epoch": 79.12087912087912, |
| "grad_norm": 1.5297859907150269, |
| "learning_rate": 0.001, |
| "loss": 0.5091, |
| "step": 244800 |
| }, |
| { |
| "epoch": 79.15319974143503, |
| "grad_norm": 1.306641697883606, |
| "learning_rate": 0.001, |
| "loss": 0.5116, |
| "step": 244900 |
| }, |
| { |
| "epoch": 79.18552036199095, |
| "grad_norm": 1.8457226753234863, |
| "learning_rate": 0.001, |
| "loss": 0.5082, |
| "step": 245000 |
| }, |
| { |
| "epoch": 79.21784098254686, |
| "grad_norm": 1.9189332723617554, |
| "learning_rate": 0.001, |
| "loss": 0.5053, |
| "step": 245100 |
| }, |
| { |
| "epoch": 79.25016160310278, |
| "grad_norm": 1.3402392864227295, |
| "learning_rate": 0.001, |
| "loss": 0.5216, |
| "step": 245200 |
| }, |
| { |
| "epoch": 79.28248222365869, |
| "grad_norm": 1.5878645181655884, |
| "learning_rate": 0.001, |
| "loss": 0.5042, |
| "step": 245300 |
| }, |
| { |
| "epoch": 79.31480284421461, |
| "grad_norm": 1.192588448524475, |
| "learning_rate": 0.001, |
| "loss": 0.5133, |
| "step": 245400 |
| }, |
| { |
| "epoch": 79.34712346477052, |
| "grad_norm": 1.310906171798706, |
| "learning_rate": 0.001, |
| "loss": 0.522, |
| "step": 245500 |
| }, |
| { |
| "epoch": 79.37944408532644, |
| "grad_norm": 1.6999276876449585, |
| "learning_rate": 0.001, |
| "loss": 0.5244, |
| "step": 245600 |
| }, |
| { |
| "epoch": 79.41176470588235, |
| "grad_norm": 1.4118130207061768, |
| "learning_rate": 0.001, |
| "loss": 0.5215, |
| "step": 245700 |
| }, |
| { |
| "epoch": 79.44408532643827, |
| "grad_norm": 1.5291193723678589, |
| "learning_rate": 0.001, |
| "loss": 0.5291, |
| "step": 245800 |
| }, |
| { |
| "epoch": 79.47640594699418, |
| "grad_norm": 1.4727531671524048, |
| "learning_rate": 0.001, |
| "loss": 0.5372, |
| "step": 245900 |
| }, |
| { |
| "epoch": 79.5087265675501, |
| "grad_norm": 1.576590657234192, |
| "learning_rate": 0.001, |
| "loss": 0.5243, |
| "step": 246000 |
| }, |
| { |
| "epoch": 79.541047188106, |
| "grad_norm": 1.39322030544281, |
| "learning_rate": 0.001, |
| "loss": 0.5249, |
| "step": 246100 |
| }, |
| { |
| "epoch": 79.57336780866193, |
| "grad_norm": 1.2230325937271118, |
| "learning_rate": 0.001, |
| "loss": 0.5388, |
| "step": 246200 |
| }, |
| { |
| "epoch": 79.60568842921784, |
| "grad_norm": 1.5058960914611816, |
| "learning_rate": 0.001, |
| "loss": 0.5271, |
| "step": 246300 |
| }, |
| { |
| "epoch": 79.63800904977376, |
| "grad_norm": 1.4781763553619385, |
| "learning_rate": 0.001, |
| "loss": 0.5506, |
| "step": 246400 |
| }, |
| { |
| "epoch": 79.67032967032966, |
| "grad_norm": 1.458723783493042, |
| "learning_rate": 0.001, |
| "loss": 0.5491, |
| "step": 246500 |
| }, |
| { |
| "epoch": 79.70265029088559, |
| "grad_norm": 1.423794150352478, |
| "learning_rate": 0.001, |
| "loss": 0.5513, |
| "step": 246600 |
| }, |
| { |
| "epoch": 79.7349709114415, |
| "grad_norm": 1.9729347229003906, |
| "learning_rate": 0.001, |
| "loss": 0.5496, |
| "step": 246700 |
| }, |
| { |
| "epoch": 79.76729153199742, |
| "grad_norm": 1.5807349681854248, |
| "learning_rate": 0.001, |
| "loss": 0.5364, |
| "step": 246800 |
| }, |
| { |
| "epoch": 79.79961215255332, |
| "grad_norm": 1.4756208658218384, |
| "learning_rate": 0.001, |
| "loss": 0.5361, |
| "step": 246900 |
| }, |
| { |
| "epoch": 79.83193277310924, |
| "grad_norm": 2.0747814178466797, |
| "learning_rate": 0.001, |
| "loss": 0.5479, |
| "step": 247000 |
| }, |
| { |
| "epoch": 79.86425339366515, |
| "grad_norm": 1.1575489044189453, |
| "learning_rate": 0.001, |
| "loss": 0.5413, |
| "step": 247100 |
| }, |
| { |
| "epoch": 79.89657401422107, |
| "grad_norm": 1.3140865564346313, |
| "learning_rate": 0.001, |
| "loss": 0.561, |
| "step": 247200 |
| }, |
| { |
| "epoch": 79.92889463477698, |
| "grad_norm": 1.5108518600463867, |
| "learning_rate": 0.001, |
| "loss": 0.5559, |
| "step": 247300 |
| }, |
| { |
| "epoch": 79.9612152553329, |
| "grad_norm": 1.3009579181671143, |
| "learning_rate": 0.001, |
| "loss": 0.5639, |
| "step": 247400 |
| }, |
| { |
| "epoch": 79.99353587588882, |
| "grad_norm": 1.4407790899276733, |
| "learning_rate": 0.001, |
| "loss": 0.5556, |
| "step": 247500 |
| }, |
| { |
| "epoch": 80.02585649644473, |
| "grad_norm": 4.573031902313232, |
| "learning_rate": 0.001, |
| "loss": 0.5021, |
| "step": 247600 |
| }, |
| { |
| "epoch": 80.05817711700065, |
| "grad_norm": 1.7188622951507568, |
| "learning_rate": 0.001, |
| "loss": 0.4948, |
| "step": 247700 |
| }, |
| { |
| "epoch": 80.09049773755656, |
| "grad_norm": 1.1180098056793213, |
| "learning_rate": 0.001, |
| "loss": 0.4918, |
| "step": 247800 |
| }, |
| { |
| "epoch": 80.12281835811248, |
| "grad_norm": 1.9229891300201416, |
| "learning_rate": 0.001, |
| "loss": 0.5011, |
| "step": 247900 |
| }, |
| { |
| "epoch": 80.15513897866839, |
| "grad_norm": 1.5299087762832642, |
| "learning_rate": 0.001, |
| "loss": 0.4973, |
| "step": 248000 |
| }, |
| { |
| "epoch": 80.18745959922431, |
| "grad_norm": 1.6426825523376465, |
| "learning_rate": 0.001, |
| "loss": 0.5023, |
| "step": 248100 |
| }, |
| { |
| "epoch": 80.21978021978022, |
| "grad_norm": 1.409515142440796, |
| "learning_rate": 0.001, |
| "loss": 0.5165, |
| "step": 248200 |
| }, |
| { |
| "epoch": 80.25210084033614, |
| "grad_norm": 1.6629232168197632, |
| "learning_rate": 0.001, |
| "loss": 0.5056, |
| "step": 248300 |
| }, |
| { |
| "epoch": 80.28442146089205, |
| "grad_norm": 1.4850993156433105, |
| "learning_rate": 0.001, |
| "loss": 0.5186, |
| "step": 248400 |
| }, |
| { |
| "epoch": 80.31674208144797, |
| "grad_norm": 1.3737995624542236, |
| "learning_rate": 0.001, |
| "loss": 0.52, |
| "step": 248500 |
| }, |
| { |
| "epoch": 80.34906270200388, |
| "grad_norm": 1.550413966178894, |
| "learning_rate": 0.001, |
| "loss": 0.5109, |
| "step": 248600 |
| }, |
| { |
| "epoch": 80.3813833225598, |
| "grad_norm": 1.4993878602981567, |
| "learning_rate": 0.001, |
| "loss": 0.521, |
| "step": 248700 |
| }, |
| { |
| "epoch": 80.4137039431157, |
| "grad_norm": 1.1930394172668457, |
| "learning_rate": 0.001, |
| "loss": 0.5255, |
| "step": 248800 |
| }, |
| { |
| "epoch": 80.44602456367163, |
| "grad_norm": 1.3800441026687622, |
| "learning_rate": 0.001, |
| "loss": 0.525, |
| "step": 248900 |
| }, |
| { |
| "epoch": 80.47834518422754, |
| "grad_norm": 1.4232189655303955, |
| "learning_rate": 0.001, |
| "loss": 0.522, |
| "step": 249000 |
| }, |
| { |
| "epoch": 80.51066580478346, |
| "grad_norm": 1.3573640584945679, |
| "learning_rate": 0.001, |
| "loss": 0.524, |
| "step": 249100 |
| }, |
| { |
| "epoch": 80.54298642533936, |
| "grad_norm": 1.6436176300048828, |
| "learning_rate": 0.001, |
| "loss": 0.5277, |
| "step": 249200 |
| }, |
| { |
| "epoch": 80.57530704589529, |
| "grad_norm": 1.6628504991531372, |
| "learning_rate": 0.001, |
| "loss": 0.5448, |
| "step": 249300 |
| }, |
| { |
| "epoch": 80.6076276664512, |
| "grad_norm": 1.403755784034729, |
| "learning_rate": 0.001, |
| "loss": 0.5198, |
| "step": 249400 |
| }, |
| { |
| "epoch": 80.63994828700712, |
| "grad_norm": 1.6308602094650269, |
| "learning_rate": 0.001, |
| "loss": 0.5291, |
| "step": 249500 |
| }, |
| { |
| "epoch": 80.67226890756302, |
| "grad_norm": 1.2523367404937744, |
| "learning_rate": 0.001, |
| "loss": 0.5306, |
| "step": 249600 |
| }, |
| { |
| "epoch": 80.70458952811894, |
| "grad_norm": 1.445035457611084, |
| "learning_rate": 0.001, |
| "loss": 0.542, |
| "step": 249700 |
| }, |
| { |
| "epoch": 80.73691014867485, |
| "grad_norm": 1.7217998504638672, |
| "learning_rate": 0.001, |
| "loss": 0.5308, |
| "step": 249800 |
| }, |
| { |
| "epoch": 80.76923076923077, |
| "grad_norm": 1.1785924434661865, |
| "learning_rate": 0.001, |
| "loss": 0.5486, |
| "step": 249900 |
| }, |
| { |
| "epoch": 80.80155138978668, |
| "grad_norm": 1.978576898574829, |
| "learning_rate": 0.001, |
| "loss": 0.5426, |
| "step": 250000 |
| }, |
| { |
| "epoch": 80.8338720103426, |
| "grad_norm": 1.4352853298187256, |
| "learning_rate": 0.001, |
| "loss": 0.5462, |
| "step": 250100 |
| }, |
| { |
| "epoch": 80.86619263089851, |
| "grad_norm": 1.4232949018478394, |
| "learning_rate": 0.001, |
| "loss": 0.5443, |
| "step": 250200 |
| }, |
| { |
| "epoch": 80.89851325145443, |
| "grad_norm": 2.1108646392822266, |
| "learning_rate": 0.001, |
| "loss": 0.5451, |
| "step": 250300 |
| }, |
| { |
| "epoch": 80.93083387201034, |
| "grad_norm": 1.9700771570205688, |
| "learning_rate": 0.001, |
| "loss": 0.5406, |
| "step": 250400 |
| }, |
| { |
| "epoch": 80.96315449256626, |
| "grad_norm": 1.5097615718841553, |
| "learning_rate": 0.001, |
| "loss": 0.5569, |
| "step": 250500 |
| }, |
| { |
| "epoch": 80.99547511312217, |
| "grad_norm": 1.2994375228881836, |
| "learning_rate": 0.001, |
| "loss": 0.5285, |
| "step": 250600 |
| }, |
| { |
| "epoch": 81.02779573367809, |
| "grad_norm": 1.177480697631836, |
| "learning_rate": 0.001, |
| "loss": 0.4835, |
| "step": 250700 |
| }, |
| { |
| "epoch": 81.060116354234, |
| "grad_norm": 1.5064599514007568, |
| "learning_rate": 0.001, |
| "loss": 0.488, |
| "step": 250800 |
| }, |
| { |
| "epoch": 81.09243697478992, |
| "grad_norm": 1.557698369026184, |
| "learning_rate": 0.001, |
| "loss": 0.49, |
| "step": 250900 |
| }, |
| { |
| "epoch": 81.12475759534583, |
| "grad_norm": 1.3125524520874023, |
| "learning_rate": 0.001, |
| "loss": 0.5003, |
| "step": 251000 |
| }, |
| { |
| "epoch": 81.15707821590175, |
| "grad_norm": 1.453548550605774, |
| "learning_rate": 0.001, |
| "loss": 0.5017, |
| "step": 251100 |
| }, |
| { |
| "epoch": 81.18939883645766, |
| "grad_norm": 1.3550654649734497, |
| "learning_rate": 0.001, |
| "loss": 0.5051, |
| "step": 251200 |
| }, |
| { |
| "epoch": 81.22171945701358, |
| "grad_norm": 1.2782098054885864, |
| "learning_rate": 0.001, |
| "loss": 0.4999, |
| "step": 251300 |
| }, |
| { |
| "epoch": 81.25404007756948, |
| "grad_norm": 1.5780470371246338, |
| "learning_rate": 0.001, |
| "loss": 0.5014, |
| "step": 251400 |
| }, |
| { |
| "epoch": 81.2863606981254, |
| "grad_norm": 1.6237266063690186, |
| "learning_rate": 0.001, |
| "loss": 0.5, |
| "step": 251500 |
| }, |
| { |
| "epoch": 81.31868131868131, |
| "grad_norm": 1.1814064979553223, |
| "learning_rate": 0.001, |
| "loss": 0.5206, |
| "step": 251600 |
| }, |
| { |
| "epoch": 81.35100193923724, |
| "grad_norm": 1.1381033658981323, |
| "learning_rate": 0.001, |
| "loss": 0.4945, |
| "step": 251700 |
| }, |
| { |
| "epoch": 81.38332255979314, |
| "grad_norm": 1.3088628053665161, |
| "learning_rate": 0.001, |
| "loss": 0.505, |
| "step": 251800 |
| }, |
| { |
| "epoch": 81.41564318034906, |
| "grad_norm": 1.3342161178588867, |
| "learning_rate": 0.001, |
| "loss": 0.5175, |
| "step": 251900 |
| }, |
| { |
| "epoch": 81.44796380090497, |
| "grad_norm": 1.3151096105575562, |
| "learning_rate": 0.001, |
| "loss": 0.5176, |
| "step": 252000 |
| }, |
| { |
| "epoch": 81.4802844214609, |
| "grad_norm": 1.4550706148147583, |
| "learning_rate": 0.001, |
| "loss": 0.5305, |
| "step": 252100 |
| }, |
| { |
| "epoch": 81.5126050420168, |
| "grad_norm": 1.0697120428085327, |
| "learning_rate": 0.001, |
| "loss": 0.5091, |
| "step": 252200 |
| }, |
| { |
| "epoch": 81.54492566257272, |
| "grad_norm": 1.358964443206787, |
| "learning_rate": 0.001, |
| "loss": 0.5143, |
| "step": 252300 |
| }, |
| { |
| "epoch": 81.57724628312863, |
| "grad_norm": 1.603599190711975, |
| "learning_rate": 0.001, |
| "loss": 0.5222, |
| "step": 252400 |
| }, |
| { |
| "epoch": 81.60956690368455, |
| "grad_norm": 1.2112364768981934, |
| "learning_rate": 0.001, |
| "loss": 0.5381, |
| "step": 252500 |
| }, |
| { |
| "epoch": 81.64188752424046, |
| "grad_norm": 1.5382639169692993, |
| "learning_rate": 0.001, |
| "loss": 0.52, |
| "step": 252600 |
| }, |
| { |
| "epoch": 81.67420814479638, |
| "grad_norm": 1.3610308170318604, |
| "learning_rate": 0.001, |
| "loss": 0.5281, |
| "step": 252700 |
| }, |
| { |
| "epoch": 81.70652876535229, |
| "grad_norm": 1.3466453552246094, |
| "learning_rate": 0.001, |
| "loss": 0.5268, |
| "step": 252800 |
| }, |
| { |
| "epoch": 81.73884938590821, |
| "grad_norm": 1.0718330144882202, |
| "learning_rate": 0.001, |
| "loss": 0.539, |
| "step": 252900 |
| }, |
| { |
| "epoch": 81.77117000646412, |
| "grad_norm": 1.501024603843689, |
| "learning_rate": 0.001, |
| "loss": 0.5298, |
| "step": 253000 |
| }, |
| { |
| "epoch": 81.80349062702004, |
| "grad_norm": 1.89411461353302, |
| "learning_rate": 0.001, |
| "loss": 0.5463, |
| "step": 253100 |
| }, |
| { |
| "epoch": 81.83581124757595, |
| "grad_norm": 1.5171462297439575, |
| "learning_rate": 0.001, |
| "loss": 0.5408, |
| "step": 253200 |
| }, |
| { |
| "epoch": 81.86813186813187, |
| "grad_norm": 1.353501796722412, |
| "learning_rate": 0.001, |
| "loss": 0.5524, |
| "step": 253300 |
| }, |
| { |
| "epoch": 81.90045248868778, |
| "grad_norm": 1.2344218492507935, |
| "learning_rate": 0.001, |
| "loss": 0.5385, |
| "step": 253400 |
| }, |
| { |
| "epoch": 81.9327731092437, |
| "grad_norm": 1.0975984334945679, |
| "learning_rate": 0.001, |
| "loss": 0.5289, |
| "step": 253500 |
| }, |
| { |
| "epoch": 81.9650937297996, |
| "grad_norm": 1.1510794162750244, |
| "learning_rate": 0.001, |
| "loss": 0.5447, |
| "step": 253600 |
| }, |
| { |
| "epoch": 81.99741435035553, |
| "grad_norm": 1.4088082313537598, |
| "learning_rate": 0.001, |
| "loss": 0.5271, |
| "step": 253700 |
| }, |
| { |
| "epoch": 82.02973497091145, |
| "grad_norm": 1.0249762535095215, |
| "learning_rate": 0.001, |
| "loss": 0.4862, |
| "step": 253800 |
| }, |
| { |
| "epoch": 82.06205559146736, |
| "grad_norm": 1.7136085033416748, |
| "learning_rate": 0.001, |
| "loss": 0.4803, |
| "step": 253900 |
| }, |
| { |
| "epoch": 82.09437621202328, |
| "grad_norm": 1.3353995084762573, |
| "learning_rate": 0.001, |
| "loss": 0.4792, |
| "step": 254000 |
| }, |
| { |
| "epoch": 82.12669683257919, |
| "grad_norm": 1.2580580711364746, |
| "learning_rate": 0.001, |
| "loss": 0.4859, |
| "step": 254100 |
| }, |
| { |
| "epoch": 82.1590174531351, |
| "grad_norm": 1.9082170724868774, |
| "learning_rate": 0.001, |
| "loss": 0.4976, |
| "step": 254200 |
| }, |
| { |
| "epoch": 82.19133807369101, |
| "grad_norm": 0.9331086277961731, |
| "learning_rate": 0.001, |
| "loss": 0.4908, |
| "step": 254300 |
| }, |
| { |
| "epoch": 82.22365869424694, |
| "grad_norm": 1.1894890069961548, |
| "learning_rate": 0.001, |
| "loss": 0.5025, |
| "step": 254400 |
| }, |
| { |
| "epoch": 82.25597931480284, |
| "grad_norm": 2.082413673400879, |
| "learning_rate": 0.001, |
| "loss": 0.4951, |
| "step": 254500 |
| }, |
| { |
| "epoch": 82.28829993535876, |
| "grad_norm": 1.085007905960083, |
| "learning_rate": 0.001, |
| "loss": 0.5051, |
| "step": 254600 |
| }, |
| { |
| "epoch": 82.32062055591467, |
| "grad_norm": 4.038010120391846, |
| "learning_rate": 0.001, |
| "loss": 0.4978, |
| "step": 254700 |
| }, |
| { |
| "epoch": 82.3529411764706, |
| "grad_norm": 0.947473406791687, |
| "learning_rate": 0.001, |
| "loss": 0.504, |
| "step": 254800 |
| }, |
| { |
| "epoch": 82.3852617970265, |
| "grad_norm": 0.9688652157783508, |
| "learning_rate": 0.001, |
| "loss": 0.515, |
| "step": 254900 |
| }, |
| { |
| "epoch": 82.41758241758242, |
| "grad_norm": 1.1633764505386353, |
| "learning_rate": 0.001, |
| "loss": 0.516, |
| "step": 255000 |
| }, |
| { |
| "epoch": 82.44990303813833, |
| "grad_norm": 1.1820343732833862, |
| "learning_rate": 0.001, |
| "loss": 0.5087, |
| "step": 255100 |
| }, |
| { |
| "epoch": 82.48222365869425, |
| "grad_norm": 0.9853097796440125, |
| "learning_rate": 0.001, |
| "loss": 0.5236, |
| "step": 255200 |
| }, |
| { |
| "epoch": 82.51454427925016, |
| "grad_norm": 1.0773764848709106, |
| "learning_rate": 0.001, |
| "loss": 0.5222, |
| "step": 255300 |
| }, |
| { |
| "epoch": 82.54686489980608, |
| "grad_norm": 1.3068816661834717, |
| "learning_rate": 0.001, |
| "loss": 0.5314, |
| "step": 255400 |
| }, |
| { |
| "epoch": 82.57918552036199, |
| "grad_norm": 1.1652721166610718, |
| "learning_rate": 0.001, |
| "loss": 0.5161, |
| "step": 255500 |
| }, |
| { |
| "epoch": 82.61150614091791, |
| "grad_norm": 1.4878023862838745, |
| "learning_rate": 0.001, |
| "loss": 0.5206, |
| "step": 255600 |
| }, |
| { |
| "epoch": 82.64382676147382, |
| "grad_norm": 0.9717551469802856, |
| "learning_rate": 0.001, |
| "loss": 0.5143, |
| "step": 255700 |
| }, |
| { |
| "epoch": 82.67614738202974, |
| "grad_norm": 0.9180939793586731, |
| "learning_rate": 0.001, |
| "loss": 0.5245, |
| "step": 255800 |
| }, |
| { |
| "epoch": 82.70846800258565, |
| "grad_norm": 1.2813619375228882, |
| "learning_rate": 0.001, |
| "loss": 0.5479, |
| "step": 255900 |
| }, |
| { |
| "epoch": 82.74078862314157, |
| "grad_norm": 1.2449421882629395, |
| "learning_rate": 0.001, |
| "loss": 0.5229, |
| "step": 256000 |
| }, |
| { |
| "epoch": 82.77310924369748, |
| "grad_norm": 1.3184751272201538, |
| "learning_rate": 0.001, |
| "loss": 0.5303, |
| "step": 256100 |
| }, |
| { |
| "epoch": 82.8054298642534, |
| "grad_norm": 0.9855522513389587, |
| "learning_rate": 0.001, |
| "loss": 0.5216, |
| "step": 256200 |
| }, |
| { |
| "epoch": 82.8377504848093, |
| "grad_norm": 1.078248143196106, |
| "learning_rate": 0.001, |
| "loss": 0.5323, |
| "step": 256300 |
| }, |
| { |
| "epoch": 82.87007110536523, |
| "grad_norm": 1.2535841464996338, |
| "learning_rate": 0.001, |
| "loss": 0.5222, |
| "step": 256400 |
| }, |
| { |
| "epoch": 82.90239172592113, |
| "grad_norm": 1.2945135831832886, |
| "learning_rate": 0.001, |
| "loss": 0.5212, |
| "step": 256500 |
| }, |
| { |
| "epoch": 82.93471234647706, |
| "grad_norm": 1.130628228187561, |
| "learning_rate": 0.001, |
| "loss": 0.5398, |
| "step": 256600 |
| }, |
| { |
| "epoch": 82.96703296703296, |
| "grad_norm": 1.1353565454483032, |
| "learning_rate": 0.001, |
| "loss": 0.5363, |
| "step": 256700 |
| }, |
| { |
| "epoch": 82.99935358758889, |
| "grad_norm": 0.877821683883667, |
| "learning_rate": 0.001, |
| "loss": 0.5173, |
| "step": 256800 |
| }, |
| { |
| "epoch": 83.03167420814479, |
| "grad_norm": 1.0781742334365845, |
| "learning_rate": 0.001, |
| "loss": 0.4592, |
| "step": 256900 |
| }, |
| { |
| "epoch": 83.06399482870071, |
| "grad_norm": 0.8791617751121521, |
| "learning_rate": 0.001, |
| "loss": 0.4768, |
| "step": 257000 |
| }, |
| { |
| "epoch": 83.09631544925662, |
| "grad_norm": 0.7136475443840027, |
| "learning_rate": 0.001, |
| "loss": 0.4807, |
| "step": 257100 |
| }, |
| { |
| "epoch": 83.12863606981254, |
| "grad_norm": 1.038556456565857, |
| "learning_rate": 0.001, |
| "loss": 0.4869, |
| "step": 257200 |
| }, |
| { |
| "epoch": 83.16095669036845, |
| "grad_norm": 0.8827602863311768, |
| "learning_rate": 0.001, |
| "loss": 0.4861, |
| "step": 257300 |
| }, |
| { |
| "epoch": 83.19327731092437, |
| "grad_norm": 0.8706760406494141, |
| "learning_rate": 0.001, |
| "loss": 0.5013, |
| "step": 257400 |
| }, |
| { |
| "epoch": 83.22559793148028, |
| "grad_norm": 0.7551383376121521, |
| "learning_rate": 0.001, |
| "loss": 0.4835, |
| "step": 257500 |
| }, |
| { |
| "epoch": 83.2579185520362, |
| "grad_norm": 0.6588653922080994, |
| "learning_rate": 0.001, |
| "loss": 0.4866, |
| "step": 257600 |
| }, |
| { |
| "epoch": 83.29023917259211, |
| "grad_norm": 0.7341609597206116, |
| "learning_rate": 0.001, |
| "loss": 0.4937, |
| "step": 257700 |
| }, |
| { |
| "epoch": 83.32255979314803, |
| "grad_norm": 0.7104642391204834, |
| "learning_rate": 0.001, |
| "loss": 0.4991, |
| "step": 257800 |
| }, |
| { |
| "epoch": 83.35488041370394, |
| "grad_norm": 1.0989702939987183, |
| "learning_rate": 0.001, |
| "loss": 0.4976, |
| "step": 257900 |
| }, |
| { |
| "epoch": 83.38720103425986, |
| "grad_norm": 0.7935035228729248, |
| "learning_rate": 0.001, |
| "loss": 0.5084, |
| "step": 258000 |
| }, |
| { |
| "epoch": 83.41952165481577, |
| "grad_norm": 0.5782608985900879, |
| "learning_rate": 0.001, |
| "loss": 0.5151, |
| "step": 258100 |
| }, |
| { |
| "epoch": 83.45184227537169, |
| "grad_norm": 0.8032583594322205, |
| "learning_rate": 0.001, |
| "loss": 0.5042, |
| "step": 258200 |
| }, |
| { |
| "epoch": 83.4841628959276, |
| "grad_norm": 0.7582470774650574, |
| "learning_rate": 0.001, |
| "loss": 0.514, |
| "step": 258300 |
| }, |
| { |
| "epoch": 83.51648351648352, |
| "grad_norm": 0.8967429995536804, |
| "learning_rate": 0.001, |
| "loss": 0.5185, |
| "step": 258400 |
| }, |
| { |
| "epoch": 83.54880413703943, |
| "grad_norm": 0.7805576920509338, |
| "learning_rate": 0.001, |
| "loss": 0.5115, |
| "step": 258500 |
| }, |
| { |
| "epoch": 83.58112475759535, |
| "grad_norm": 1.0321794748306274, |
| "learning_rate": 0.001, |
| "loss": 0.5181, |
| "step": 258600 |
| }, |
| { |
| "epoch": 83.61344537815125, |
| "grad_norm": 0.6753956079483032, |
| "learning_rate": 0.001, |
| "loss": 0.514, |
| "step": 258700 |
| }, |
| { |
| "epoch": 83.64576599870718, |
| "grad_norm": 0.8881549835205078, |
| "learning_rate": 0.001, |
| "loss": 0.5151, |
| "step": 258800 |
| }, |
| { |
| "epoch": 83.67808661926308, |
| "grad_norm": 0.6611385941505432, |
| "learning_rate": 0.001, |
| "loss": 0.5282, |
| "step": 258900 |
| }, |
| { |
| "epoch": 83.710407239819, |
| "grad_norm": 0.7606088519096375, |
| "learning_rate": 0.001, |
| "loss": 0.5317, |
| "step": 259000 |
| }, |
| { |
| "epoch": 83.74272786037491, |
| "grad_norm": 0.8321859836578369, |
| "learning_rate": 0.001, |
| "loss": 0.5248, |
| "step": 259100 |
| }, |
| { |
| "epoch": 83.77504848093083, |
| "grad_norm": 0.7359297275543213, |
| "learning_rate": 0.001, |
| "loss": 0.5207, |
| "step": 259200 |
| }, |
| { |
| "epoch": 83.80736910148674, |
| "grad_norm": 0.6839292049407959, |
| "learning_rate": 0.001, |
| "loss": 0.5175, |
| "step": 259300 |
| }, |
| { |
| "epoch": 83.83968972204266, |
| "grad_norm": 0.5971709489822388, |
| "learning_rate": 0.001, |
| "loss": 0.5355, |
| "step": 259400 |
| }, |
| { |
| "epoch": 83.87201034259857, |
| "grad_norm": 0.5360345244407654, |
| "learning_rate": 0.001, |
| "loss": 0.5233, |
| "step": 259500 |
| }, |
| { |
| "epoch": 83.9043309631545, |
| "grad_norm": 0.551607608795166, |
| "learning_rate": 0.001, |
| "loss": 0.5298, |
| "step": 259600 |
| }, |
| { |
| "epoch": 83.9366515837104, |
| "grad_norm": 0.7951961755752563, |
| "learning_rate": 0.001, |
| "loss": 0.5336, |
| "step": 259700 |
| }, |
| { |
| "epoch": 83.96897220426632, |
| "grad_norm": 0.7808154821395874, |
| "learning_rate": 0.001, |
| "loss": 0.5337, |
| "step": 259800 |
| }, |
| { |
| "epoch": 84.00129282482224, |
| "grad_norm": 1.4088801145553589, |
| "learning_rate": 0.001, |
| "loss": 0.5438, |
| "step": 259900 |
| }, |
| { |
| "epoch": 84.03361344537815, |
| "grad_norm": 1.5833579301834106, |
| "learning_rate": 0.001, |
| "loss": 0.4798, |
| "step": 260000 |
| }, |
| { |
| "epoch": 84.06593406593407, |
| "grad_norm": 1.3921915292739868, |
| "learning_rate": 0.001, |
| "loss": 0.4727, |
| "step": 260100 |
| }, |
| { |
| "epoch": 84.09825468648998, |
| "grad_norm": 1.3180691003799438, |
| "learning_rate": 0.001, |
| "loss": 0.4633, |
| "step": 260200 |
| }, |
| { |
| "epoch": 84.1305753070459, |
| "grad_norm": 1.685084581375122, |
| "learning_rate": 0.001, |
| "loss": 0.4799, |
| "step": 260300 |
| }, |
| { |
| "epoch": 84.16289592760181, |
| "grad_norm": 1.4940513372421265, |
| "learning_rate": 0.001, |
| "loss": 0.4854, |
| "step": 260400 |
| }, |
| { |
| "epoch": 84.19521654815773, |
| "grad_norm": 1.7479921579360962, |
| "learning_rate": 0.001, |
| "loss": 0.4912, |
| "step": 260500 |
| }, |
| { |
| "epoch": 84.22753716871364, |
| "grad_norm": 2.0624806880950928, |
| "learning_rate": 0.001, |
| "loss": 0.4887, |
| "step": 260600 |
| }, |
| { |
| "epoch": 84.25985778926956, |
| "grad_norm": 1.9271317720413208, |
| "learning_rate": 0.001, |
| "loss": 0.4773, |
| "step": 260700 |
| }, |
| { |
| "epoch": 84.29217840982547, |
| "grad_norm": 4.249568939208984, |
| "learning_rate": 0.001, |
| "loss": 0.4867, |
| "step": 260800 |
| }, |
| { |
| "epoch": 84.32449903038139, |
| "grad_norm": 1.7943352460861206, |
| "learning_rate": 0.001, |
| "loss": 0.5005, |
| "step": 260900 |
| }, |
| { |
| "epoch": 84.3568196509373, |
| "grad_norm": 2.092435359954834, |
| "learning_rate": 0.001, |
| "loss": 0.5074, |
| "step": 261000 |
| }, |
| { |
| "epoch": 84.38914027149322, |
| "grad_norm": 1.368276834487915, |
| "learning_rate": 0.001, |
| "loss": 0.4962, |
| "step": 261100 |
| }, |
| { |
| "epoch": 84.42146089204913, |
| "grad_norm": 1.6893104314804077, |
| "learning_rate": 0.001, |
| "loss": 0.503, |
| "step": 261200 |
| }, |
| { |
| "epoch": 84.45378151260505, |
| "grad_norm": 1.8402940034866333, |
| "learning_rate": 0.001, |
| "loss": 0.5013, |
| "step": 261300 |
| }, |
| { |
| "epoch": 84.48610213316095, |
| "grad_norm": 1.8994182348251343, |
| "learning_rate": 0.001, |
| "loss": 0.5002, |
| "step": 261400 |
| }, |
| { |
| "epoch": 84.51842275371688, |
| "grad_norm": 1.7885549068450928, |
| "learning_rate": 0.001, |
| "loss": 0.5087, |
| "step": 261500 |
| }, |
| { |
| "epoch": 84.55074337427278, |
| "grad_norm": 2.087735652923584, |
| "learning_rate": 0.001, |
| "loss": 0.4951, |
| "step": 261600 |
| }, |
| { |
| "epoch": 84.5830639948287, |
| "grad_norm": 1.4527853727340698, |
| "learning_rate": 0.001, |
| "loss": 0.5014, |
| "step": 261700 |
| }, |
| { |
| "epoch": 84.61538461538461, |
| "grad_norm": 1.629301905632019, |
| "learning_rate": 0.001, |
| "loss": 0.5102, |
| "step": 261800 |
| }, |
| { |
| "epoch": 84.64770523594053, |
| "grad_norm": 1.9578490257263184, |
| "learning_rate": 0.001, |
| "loss": 0.5024, |
| "step": 261900 |
| }, |
| { |
| "epoch": 84.68002585649644, |
| "grad_norm": 2.0275087356567383, |
| "learning_rate": 0.001, |
| "loss": 0.5099, |
| "step": 262000 |
| }, |
| { |
| "epoch": 84.71234647705236, |
| "grad_norm": 1.6318986415863037, |
| "learning_rate": 0.001, |
| "loss": 0.5113, |
| "step": 262100 |
| }, |
| { |
| "epoch": 84.74466709760827, |
| "grad_norm": 1.8131710290908813, |
| "learning_rate": 0.001, |
| "loss": 0.5119, |
| "step": 262200 |
| }, |
| { |
| "epoch": 84.7769877181642, |
| "grad_norm": 1.5382099151611328, |
| "learning_rate": 0.001, |
| "loss": 0.5303, |
| "step": 262300 |
| }, |
| { |
| "epoch": 84.8093083387201, |
| "grad_norm": 2.2519476413726807, |
| "learning_rate": 0.001, |
| "loss": 0.5204, |
| "step": 262400 |
| }, |
| { |
| "epoch": 84.84162895927602, |
| "grad_norm": 1.4845337867736816, |
| "learning_rate": 0.001, |
| "loss": 0.5277, |
| "step": 262500 |
| }, |
| { |
| "epoch": 84.87394957983193, |
| "grad_norm": 1.4768366813659668, |
| "learning_rate": 0.001, |
| "loss": 0.5232, |
| "step": 262600 |
| }, |
| { |
| "epoch": 84.90627020038785, |
| "grad_norm": 1.7839581966400146, |
| "learning_rate": 0.001, |
| "loss": 0.5319, |
| "step": 262700 |
| }, |
| { |
| "epoch": 84.93859082094376, |
| "grad_norm": 1.6787962913513184, |
| "learning_rate": 0.001, |
| "loss": 0.5212, |
| "step": 262800 |
| }, |
| { |
| "epoch": 84.97091144149968, |
| "grad_norm": 1.7544941902160645, |
| "learning_rate": 0.001, |
| "loss": 0.5338, |
| "step": 262900 |
| }, |
| { |
| "epoch": 85.00323206205559, |
| "grad_norm": 1.494774580001831, |
| "learning_rate": 0.001, |
| "loss": 0.545, |
| "step": 263000 |
| }, |
| { |
| "epoch": 85.03555268261151, |
| "grad_norm": 1.3960219621658325, |
| "learning_rate": 0.001, |
| "loss": 0.4712, |
| "step": 263100 |
| }, |
| { |
| "epoch": 85.06787330316742, |
| "grad_norm": 1.553849220275879, |
| "learning_rate": 0.001, |
| "loss": 0.4741, |
| "step": 263200 |
| }, |
| { |
| "epoch": 85.10019392372334, |
| "grad_norm": 1.8072491884231567, |
| "learning_rate": 0.001, |
| "loss": 0.4699, |
| "step": 263300 |
| }, |
| { |
| "epoch": 85.13251454427925, |
| "grad_norm": 1.5213582515716553, |
| "learning_rate": 0.001, |
| "loss": 0.4671, |
| "step": 263400 |
| }, |
| { |
| "epoch": 85.16483516483517, |
| "grad_norm": 1.9114036560058594, |
| "learning_rate": 0.001, |
| "loss": 0.4783, |
| "step": 263500 |
| }, |
| { |
| "epoch": 85.19715578539108, |
| "grad_norm": 1.4559093713760376, |
| "learning_rate": 0.001, |
| "loss": 0.4804, |
| "step": 263600 |
| }, |
| { |
| "epoch": 85.229476405947, |
| "grad_norm": 1.2537195682525635, |
| "learning_rate": 0.001, |
| "loss": 0.4838, |
| "step": 263700 |
| }, |
| { |
| "epoch": 85.2617970265029, |
| "grad_norm": 1.655987024307251, |
| "learning_rate": 0.001, |
| "loss": 0.5079, |
| "step": 263800 |
| }, |
| { |
| "epoch": 85.29411764705883, |
| "grad_norm": 1.7664257287979126, |
| "learning_rate": 0.001, |
| "loss": 0.4813, |
| "step": 263900 |
| }, |
| { |
| "epoch": 85.32643826761473, |
| "grad_norm": 1.795916199684143, |
| "learning_rate": 0.001, |
| "loss": 0.4974, |
| "step": 264000 |
| }, |
| { |
| "epoch": 85.35875888817066, |
| "grad_norm": 1.5820717811584473, |
| "learning_rate": 0.001, |
| "loss": 0.5014, |
| "step": 264100 |
| }, |
| { |
| "epoch": 85.39107950872656, |
| "grad_norm": 1.4357985258102417, |
| "learning_rate": 0.001, |
| "loss": 0.496, |
| "step": 264200 |
| }, |
| { |
| "epoch": 85.42340012928248, |
| "grad_norm": 1.449780821800232, |
| "learning_rate": 0.001, |
| "loss": 0.4962, |
| "step": 264300 |
| }, |
| { |
| "epoch": 85.45572074983839, |
| "grad_norm": 1.4229458570480347, |
| "learning_rate": 0.001, |
| "loss": 0.5044, |
| "step": 264400 |
| }, |
| { |
| "epoch": 85.48804137039431, |
| "grad_norm": 1.443252682685852, |
| "learning_rate": 0.001, |
| "loss": 0.4841, |
| "step": 264500 |
| }, |
| { |
| "epoch": 85.52036199095022, |
| "grad_norm": 1.1594971418380737, |
| "learning_rate": 0.001, |
| "loss": 0.5087, |
| "step": 264600 |
| }, |
| { |
| "epoch": 85.55268261150614, |
| "grad_norm": 1.7895407676696777, |
| "learning_rate": 0.001, |
| "loss": 0.5107, |
| "step": 264700 |
| }, |
| { |
| "epoch": 85.58500323206205, |
| "grad_norm": 1.5300350189208984, |
| "learning_rate": 0.001, |
| "loss": 0.5063, |
| "step": 264800 |
| }, |
| { |
| "epoch": 85.61732385261797, |
| "grad_norm": 1.501360297203064, |
| "learning_rate": 0.001, |
| "loss": 0.5023, |
| "step": 264900 |
| }, |
| { |
| "epoch": 85.64964447317388, |
| "grad_norm": 1.5320264101028442, |
| "learning_rate": 0.001, |
| "loss": 0.4965, |
| "step": 265000 |
| }, |
| { |
| "epoch": 85.6819650937298, |
| "grad_norm": 1.4581434726715088, |
| "learning_rate": 0.001, |
| "loss": 0.5048, |
| "step": 265100 |
| }, |
| { |
| "epoch": 85.71428571428571, |
| "grad_norm": 1.3853094577789307, |
| "learning_rate": 0.001, |
| "loss": 0.4939, |
| "step": 265200 |
| }, |
| { |
| "epoch": 85.74660633484163, |
| "grad_norm": 1.4571151733398438, |
| "learning_rate": 0.001, |
| "loss": 0.5211, |
| "step": 265300 |
| }, |
| { |
| "epoch": 85.77892695539754, |
| "grad_norm": 1.6544904708862305, |
| "learning_rate": 0.001, |
| "loss": 0.51, |
| "step": 265400 |
| }, |
| { |
| "epoch": 85.81124757595346, |
| "grad_norm": 1.4322007894515991, |
| "learning_rate": 0.001, |
| "loss": 0.521, |
| "step": 265500 |
| }, |
| { |
| "epoch": 85.84356819650937, |
| "grad_norm": 1.7621874809265137, |
| "learning_rate": 0.001, |
| "loss": 0.5152, |
| "step": 265600 |
| }, |
| { |
| "epoch": 85.87588881706529, |
| "grad_norm": 1.492745041847229, |
| "learning_rate": 0.001, |
| "loss": 0.5191, |
| "step": 265700 |
| }, |
| { |
| "epoch": 85.9082094376212, |
| "grad_norm": 1.6130776405334473, |
| "learning_rate": 0.001, |
| "loss": 0.5192, |
| "step": 265800 |
| }, |
| { |
| "epoch": 85.94053005817712, |
| "grad_norm": 1.2372713088989258, |
| "learning_rate": 0.001, |
| "loss": 0.524, |
| "step": 265900 |
| }, |
| { |
| "epoch": 85.97285067873302, |
| "grad_norm": 1.493804693222046, |
| "learning_rate": 0.001, |
| "loss": 0.5237, |
| "step": 266000 |
| }, |
| { |
| "epoch": 86.00517129928895, |
| "grad_norm": 1.6226726770401, |
| "learning_rate": 0.001, |
| "loss": 0.5214, |
| "step": 266100 |
| }, |
| { |
| "epoch": 86.03749191984487, |
| "grad_norm": 1.511785864830017, |
| "learning_rate": 0.001, |
| "loss": 0.4658, |
| "step": 266200 |
| }, |
| { |
| "epoch": 86.06981254040078, |
| "grad_norm": 1.5849074125289917, |
| "learning_rate": 0.001, |
| "loss": 0.4684, |
| "step": 266300 |
| }, |
| { |
| "epoch": 86.1021331609567, |
| "grad_norm": 1.510827898979187, |
| "learning_rate": 0.001, |
| "loss": 0.4636, |
| "step": 266400 |
| }, |
| { |
| "epoch": 86.1344537815126, |
| "grad_norm": 1.6693753004074097, |
| "learning_rate": 0.001, |
| "loss": 0.4764, |
| "step": 266500 |
| }, |
| { |
| "epoch": 86.16677440206853, |
| "grad_norm": 1.2760614156723022, |
| "learning_rate": 0.001, |
| "loss": 0.4754, |
| "step": 266600 |
| }, |
| { |
| "epoch": 86.19909502262443, |
| "grad_norm": 1.7338852882385254, |
| "learning_rate": 0.001, |
| "loss": 0.4715, |
| "step": 266700 |
| }, |
| { |
| "epoch": 86.23141564318036, |
| "grad_norm": 1.388390302658081, |
| "learning_rate": 0.001, |
| "loss": 0.4788, |
| "step": 266800 |
| }, |
| { |
| "epoch": 86.26373626373626, |
| "grad_norm": 1.734383225440979, |
| "learning_rate": 0.001, |
| "loss": 0.4847, |
| "step": 266900 |
| }, |
| { |
| "epoch": 86.29605688429218, |
| "grad_norm": 1.4146814346313477, |
| "learning_rate": 0.001, |
| "loss": 0.4865, |
| "step": 267000 |
| }, |
| { |
| "epoch": 86.32837750484809, |
| "grad_norm": 1.3522993326187134, |
| "learning_rate": 0.001, |
| "loss": 0.4895, |
| "step": 267100 |
| }, |
| { |
| "epoch": 86.36069812540401, |
| "grad_norm": 1.469128131866455, |
| "learning_rate": 0.001, |
| "loss": 0.4917, |
| "step": 267200 |
| }, |
| { |
| "epoch": 86.39301874595992, |
| "grad_norm": 1.8461087942123413, |
| "learning_rate": 0.001, |
| "loss": 0.4969, |
| "step": 267300 |
| }, |
| { |
| "epoch": 86.42533936651584, |
| "grad_norm": 1.894525170326233, |
| "learning_rate": 0.001, |
| "loss": 0.4877, |
| "step": 267400 |
| }, |
| { |
| "epoch": 86.45765998707175, |
| "grad_norm": 2.118194341659546, |
| "learning_rate": 0.001, |
| "loss": 0.48, |
| "step": 267500 |
| }, |
| { |
| "epoch": 86.48998060762767, |
| "grad_norm": 1.5822690725326538, |
| "learning_rate": 0.001, |
| "loss": 0.4902, |
| "step": 267600 |
| }, |
| { |
| "epoch": 86.52230122818358, |
| "grad_norm": 1.3895262479782104, |
| "learning_rate": 0.001, |
| "loss": 0.4972, |
| "step": 267700 |
| }, |
| { |
| "epoch": 86.5546218487395, |
| "grad_norm": 1.4661526679992676, |
| "learning_rate": 0.001, |
| "loss": 0.4966, |
| "step": 267800 |
| }, |
| { |
| "epoch": 86.58694246929541, |
| "grad_norm": 1.634231448173523, |
| "learning_rate": 0.001, |
| "loss": 0.5081, |
| "step": 267900 |
| }, |
| { |
| "epoch": 86.61926308985133, |
| "grad_norm": 1.520247459411621, |
| "learning_rate": 0.001, |
| "loss": 0.4971, |
| "step": 268000 |
| }, |
| { |
| "epoch": 86.65158371040724, |
| "grad_norm": 1.878395438194275, |
| "learning_rate": 0.001, |
| "loss": 0.5079, |
| "step": 268100 |
| }, |
| { |
| "epoch": 86.68390433096316, |
| "grad_norm": 1.7085822820663452, |
| "learning_rate": 0.001, |
| "loss": 0.5052, |
| "step": 268200 |
| }, |
| { |
| "epoch": 86.71622495151907, |
| "grad_norm": 1.3339471817016602, |
| "learning_rate": 0.001, |
| "loss": 0.4948, |
| "step": 268300 |
| }, |
| { |
| "epoch": 86.74854557207499, |
| "grad_norm": 1.9712716341018677, |
| "learning_rate": 0.001, |
| "loss": 0.5098, |
| "step": 268400 |
| }, |
| { |
| "epoch": 86.7808661926309, |
| "grad_norm": 2.210573434829712, |
| "learning_rate": 0.001, |
| "loss": 0.5105, |
| "step": 268500 |
| }, |
| { |
| "epoch": 86.81318681318682, |
| "grad_norm": 1.4131146669387817, |
| "learning_rate": 0.001, |
| "loss": 0.5096, |
| "step": 268600 |
| }, |
| { |
| "epoch": 86.84550743374272, |
| "grad_norm": 1.391532063484192, |
| "learning_rate": 0.001, |
| "loss": 0.5151, |
| "step": 268700 |
| }, |
| { |
| "epoch": 86.87782805429865, |
| "grad_norm": 1.9204423427581787, |
| "learning_rate": 0.001, |
| "loss": 0.5061, |
| "step": 268800 |
| }, |
| { |
| "epoch": 86.91014867485455, |
| "grad_norm": 2.0518362522125244, |
| "learning_rate": 0.001, |
| "loss": 0.5058, |
| "step": 268900 |
| }, |
| { |
| "epoch": 86.94246929541048, |
| "grad_norm": 1.5138390064239502, |
| "learning_rate": 0.001, |
| "loss": 0.5072, |
| "step": 269000 |
| }, |
| { |
| "epoch": 86.97478991596638, |
| "grad_norm": 1.3717081546783447, |
| "learning_rate": 0.001, |
| "loss": 0.5203, |
| "step": 269100 |
| }, |
| { |
| "epoch": 87.0071105365223, |
| "grad_norm": 1.4673140048980713, |
| "learning_rate": 0.001, |
| "loss": 0.5119, |
| "step": 269200 |
| }, |
| { |
| "epoch": 87.03943115707821, |
| "grad_norm": 1.371171474456787, |
| "learning_rate": 0.001, |
| "loss": 0.4482, |
| "step": 269300 |
| }, |
| { |
| "epoch": 87.07175177763413, |
| "grad_norm": 1.397260308265686, |
| "learning_rate": 0.001, |
| "loss": 0.4578, |
| "step": 269400 |
| }, |
| { |
| "epoch": 87.10407239819004, |
| "grad_norm": 1.6206611394882202, |
| "learning_rate": 0.001, |
| "loss": 0.4609, |
| "step": 269500 |
| }, |
| { |
| "epoch": 87.13639301874596, |
| "grad_norm": 1.3245476484298706, |
| "learning_rate": 0.001, |
| "loss": 0.4701, |
| "step": 269600 |
| }, |
| { |
| "epoch": 87.16871363930187, |
| "grad_norm": 1.6914180517196655, |
| "learning_rate": 0.001, |
| "loss": 0.4584, |
| "step": 269700 |
| }, |
| { |
| "epoch": 87.20103425985779, |
| "grad_norm": 2.0249228477478027, |
| "learning_rate": 0.001, |
| "loss": 0.4669, |
| "step": 269800 |
| }, |
| { |
| "epoch": 87.2333548804137, |
| "grad_norm": 1.3308566808700562, |
| "learning_rate": 0.001, |
| "loss": 0.4724, |
| "step": 269900 |
| }, |
| { |
| "epoch": 87.26567550096962, |
| "grad_norm": 1.2931392192840576, |
| "learning_rate": 0.001, |
| "loss": 0.4759, |
| "step": 270000 |
| }, |
| { |
| "epoch": 87.29799612152553, |
| "grad_norm": 1.290625810623169, |
| "learning_rate": 0.001, |
| "loss": 0.4803, |
| "step": 270100 |
| }, |
| { |
| "epoch": 87.33031674208145, |
| "grad_norm": 1.3367546796798706, |
| "learning_rate": 0.001, |
| "loss": 0.4847, |
| "step": 270200 |
| }, |
| { |
| "epoch": 87.36263736263736, |
| "grad_norm": 1.2759326696395874, |
| "learning_rate": 0.001, |
| "loss": 0.4872, |
| "step": 270300 |
| }, |
| { |
| "epoch": 87.39495798319328, |
| "grad_norm": 1.3992459774017334, |
| "learning_rate": 0.001, |
| "loss": 0.4876, |
| "step": 270400 |
| }, |
| { |
| "epoch": 87.42727860374919, |
| "grad_norm": 1.4173572063446045, |
| "learning_rate": 0.001, |
| "loss": 0.4712, |
| "step": 270500 |
| }, |
| { |
| "epoch": 87.45959922430511, |
| "grad_norm": 1.8669795989990234, |
| "learning_rate": 0.001, |
| "loss": 0.4922, |
| "step": 270600 |
| }, |
| { |
| "epoch": 87.49191984486102, |
| "grad_norm": 1.6839419603347778, |
| "learning_rate": 0.001, |
| "loss": 0.495, |
| "step": 270700 |
| }, |
| { |
| "epoch": 87.52424046541694, |
| "grad_norm": 1.2317043542861938, |
| "learning_rate": 0.001, |
| "loss": 0.4771, |
| "step": 270800 |
| }, |
| { |
| "epoch": 87.55656108597285, |
| "grad_norm": 1.3039339780807495, |
| "learning_rate": 0.001, |
| "loss": 0.4837, |
| "step": 270900 |
| }, |
| { |
| "epoch": 87.58888170652877, |
| "grad_norm": 1.291479229927063, |
| "learning_rate": 0.001, |
| "loss": 0.4902, |
| "step": 271000 |
| }, |
| { |
| "epoch": 87.62120232708467, |
| "grad_norm": 1.3181146383285522, |
| "learning_rate": 0.001, |
| "loss": 0.4975, |
| "step": 271100 |
| }, |
| { |
| "epoch": 87.6535229476406, |
| "grad_norm": 1.7635709047317505, |
| "learning_rate": 0.001, |
| "loss": 0.4954, |
| "step": 271200 |
| }, |
| { |
| "epoch": 87.6858435681965, |
| "grad_norm": 1.4242125749588013, |
| "learning_rate": 0.001, |
| "loss": 0.509, |
| "step": 271300 |
| }, |
| { |
| "epoch": 87.71816418875243, |
| "grad_norm": 1.4490323066711426, |
| "learning_rate": 0.001, |
| "loss": 0.5068, |
| "step": 271400 |
| }, |
| { |
| "epoch": 87.75048480930833, |
| "grad_norm": 1.5760105848312378, |
| "learning_rate": 0.001, |
| "loss": 0.4997, |
| "step": 271500 |
| }, |
| { |
| "epoch": 87.78280542986425, |
| "grad_norm": 1.3230277299880981, |
| "learning_rate": 0.001, |
| "loss": 0.5041, |
| "step": 271600 |
| }, |
| { |
| "epoch": 87.81512605042016, |
| "grad_norm": 1.6378422975540161, |
| "learning_rate": 0.001, |
| "loss": 0.5138, |
| "step": 271700 |
| }, |
| { |
| "epoch": 87.84744667097608, |
| "grad_norm": 1.4305038452148438, |
| "learning_rate": 0.001, |
| "loss": 0.5073, |
| "step": 271800 |
| }, |
| { |
| "epoch": 87.87976729153199, |
| "grad_norm": 1.6552515029907227, |
| "learning_rate": 0.001, |
| "loss": 0.4978, |
| "step": 271900 |
| }, |
| { |
| "epoch": 87.91208791208791, |
| "grad_norm": 1.555861473083496, |
| "learning_rate": 0.001, |
| "loss": 0.511, |
| "step": 272000 |
| }, |
| { |
| "epoch": 87.94440853264382, |
| "grad_norm": 1.9281134605407715, |
| "learning_rate": 0.001, |
| "loss": 0.5197, |
| "step": 272100 |
| }, |
| { |
| "epoch": 87.97672915319974, |
| "grad_norm": 2.146212100982666, |
| "learning_rate": 0.001, |
| "loss": 0.5133, |
| "step": 272200 |
| }, |
| { |
| "epoch": 88.00904977375566, |
| "grad_norm": 1.4571073055267334, |
| "learning_rate": 0.001, |
| "loss": 0.4881, |
| "step": 272300 |
| }, |
| { |
| "epoch": 88.04137039431157, |
| "grad_norm": 1.42752206325531, |
| "learning_rate": 0.001, |
| "loss": 0.4532, |
| "step": 272400 |
| }, |
| { |
| "epoch": 88.07369101486749, |
| "grad_norm": 1.7515978813171387, |
| "learning_rate": 0.001, |
| "loss": 0.4482, |
| "step": 272500 |
| }, |
| { |
| "epoch": 88.1060116354234, |
| "grad_norm": 1.3939858675003052, |
| "learning_rate": 0.001, |
| "loss": 0.4564, |
| "step": 272600 |
| }, |
| { |
| "epoch": 88.13833225597932, |
| "grad_norm": 1.2333893775939941, |
| "learning_rate": 0.001, |
| "loss": 0.4716, |
| "step": 272700 |
| }, |
| { |
| "epoch": 88.17065287653523, |
| "grad_norm": 1.5755928754806519, |
| "learning_rate": 0.001, |
| "loss": 0.4726, |
| "step": 272800 |
| }, |
| { |
| "epoch": 88.20297349709115, |
| "grad_norm": 2.824388265609741, |
| "learning_rate": 0.001, |
| "loss": 0.4669, |
| "step": 272900 |
| }, |
| { |
| "epoch": 88.23529411764706, |
| "grad_norm": 1.1696257591247559, |
| "learning_rate": 0.001, |
| "loss": 0.4707, |
| "step": 273000 |
| }, |
| { |
| "epoch": 88.26761473820298, |
| "grad_norm": 1.5280286073684692, |
| "learning_rate": 0.001, |
| "loss": 0.4658, |
| "step": 273100 |
| }, |
| { |
| "epoch": 88.29993535875889, |
| "grad_norm": 1.493591070175171, |
| "learning_rate": 0.001, |
| "loss": 0.4746, |
| "step": 273200 |
| }, |
| { |
| "epoch": 88.33225597931481, |
| "grad_norm": 1.619083046913147, |
| "learning_rate": 0.001, |
| "loss": 0.4755, |
| "step": 273300 |
| }, |
| { |
| "epoch": 88.36457659987072, |
| "grad_norm": 3.416677236557007, |
| "learning_rate": 0.001, |
| "loss": 0.4689, |
| "step": 273400 |
| }, |
| { |
| "epoch": 88.39689722042664, |
| "grad_norm": 1.3980712890625, |
| "learning_rate": 0.001, |
| "loss": 0.4713, |
| "step": 273500 |
| }, |
| { |
| "epoch": 88.42921784098255, |
| "grad_norm": 1.4884861707687378, |
| "learning_rate": 0.001, |
| "loss": 0.4793, |
| "step": 273600 |
| }, |
| { |
| "epoch": 88.46153846153847, |
| "grad_norm": 1.4330198764801025, |
| "learning_rate": 0.001, |
| "loss": 0.4845, |
| "step": 273700 |
| }, |
| { |
| "epoch": 88.49385908209437, |
| "grad_norm": 1.7243083715438843, |
| "learning_rate": 0.001, |
| "loss": 0.4794, |
| "step": 273800 |
| }, |
| { |
| "epoch": 88.5261797026503, |
| "grad_norm": 1.2517062425613403, |
| "learning_rate": 0.001, |
| "loss": 0.4865, |
| "step": 273900 |
| }, |
| { |
| "epoch": 88.5585003232062, |
| "grad_norm": 1.5816173553466797, |
| "learning_rate": 0.001, |
| "loss": 0.4873, |
| "step": 274000 |
| }, |
| { |
| "epoch": 88.59082094376213, |
| "grad_norm": 1.3292566537857056, |
| "learning_rate": 0.001, |
| "loss": 0.4888, |
| "step": 274100 |
| }, |
| { |
| "epoch": 88.62314156431803, |
| "grad_norm": 1.2126435041427612, |
| "learning_rate": 0.001, |
| "loss": 0.4922, |
| "step": 274200 |
| }, |
| { |
| "epoch": 88.65546218487395, |
| "grad_norm": 1.7256187200546265, |
| "learning_rate": 0.001, |
| "loss": 0.4931, |
| "step": 274300 |
| }, |
| { |
| "epoch": 88.68778280542986, |
| "grad_norm": 1.8610934019088745, |
| "learning_rate": 0.001, |
| "loss": 0.5008, |
| "step": 274400 |
| }, |
| { |
| "epoch": 88.72010342598578, |
| "grad_norm": 1.6567224264144897, |
| "learning_rate": 0.001, |
| "loss": 0.4895, |
| "step": 274500 |
| }, |
| { |
| "epoch": 88.75242404654169, |
| "grad_norm": 1.3405829668045044, |
| "learning_rate": 0.001, |
| "loss": 0.5039, |
| "step": 274600 |
| }, |
| { |
| "epoch": 88.78474466709761, |
| "grad_norm": 1.4917628765106201, |
| "learning_rate": 0.001, |
| "loss": 0.4968, |
| "step": 274700 |
| }, |
| { |
| "epoch": 88.81706528765352, |
| "grad_norm": 1.517630696296692, |
| "learning_rate": 0.001, |
| "loss": 0.4977, |
| "step": 274800 |
| }, |
| { |
| "epoch": 88.84938590820944, |
| "grad_norm": 1.293230652809143, |
| "learning_rate": 0.001, |
| "loss": 0.4992, |
| "step": 274900 |
| }, |
| { |
| "epoch": 88.88170652876535, |
| "grad_norm": 1.6211521625518799, |
| "learning_rate": 0.001, |
| "loss": 0.5038, |
| "step": 275000 |
| }, |
| { |
| "epoch": 88.91402714932127, |
| "grad_norm": 1.3522111177444458, |
| "learning_rate": 0.001, |
| "loss": 0.5073, |
| "step": 275100 |
| }, |
| { |
| "epoch": 88.94634776987718, |
| "grad_norm": 1.6885173320770264, |
| "learning_rate": 0.001, |
| "loss": 0.5128, |
| "step": 275200 |
| }, |
| { |
| "epoch": 88.9786683904331, |
| "grad_norm": 1.1733167171478271, |
| "learning_rate": 0.001, |
| "loss": 0.5152, |
| "step": 275300 |
| }, |
| { |
| "epoch": 89.01098901098901, |
| "grad_norm": 1.6227352619171143, |
| "learning_rate": 0.001, |
| "loss": 0.4826, |
| "step": 275400 |
| }, |
| { |
| "epoch": 89.04330963154493, |
| "grad_norm": 1.5030772686004639, |
| "learning_rate": 0.001, |
| "loss": 0.4584, |
| "step": 275500 |
| }, |
| { |
| "epoch": 89.07563025210084, |
| "grad_norm": 1.3889706134796143, |
| "learning_rate": 0.001, |
| "loss": 0.4535, |
| "step": 275600 |
| }, |
| { |
| "epoch": 89.10795087265676, |
| "grad_norm": 2.0631189346313477, |
| "learning_rate": 0.001, |
| "loss": 0.4577, |
| "step": 275700 |
| }, |
| { |
| "epoch": 89.14027149321267, |
| "grad_norm": 1.3928669691085815, |
| "learning_rate": 0.001, |
| "loss": 0.4504, |
| "step": 275800 |
| }, |
| { |
| "epoch": 89.17259211376859, |
| "grad_norm": 1.517021656036377, |
| "learning_rate": 0.001, |
| "loss": 0.4644, |
| "step": 275900 |
| }, |
| { |
| "epoch": 89.2049127343245, |
| "grad_norm": 1.4841409921646118, |
| "learning_rate": 0.001, |
| "loss": 0.4523, |
| "step": 276000 |
| }, |
| { |
| "epoch": 89.23723335488042, |
| "grad_norm": 1.5518596172332764, |
| "learning_rate": 0.001, |
| "loss": 0.4628, |
| "step": 276100 |
| }, |
| { |
| "epoch": 89.26955397543632, |
| "grad_norm": 1.3948451280593872, |
| "learning_rate": 0.001, |
| "loss": 0.4553, |
| "step": 276200 |
| }, |
| { |
| "epoch": 89.30187459599225, |
| "grad_norm": 1.4886418581008911, |
| "learning_rate": 0.001, |
| "loss": 0.4834, |
| "step": 276300 |
| }, |
| { |
| "epoch": 89.33419521654815, |
| "grad_norm": 1.31479012966156, |
| "learning_rate": 0.001, |
| "loss": 0.4627, |
| "step": 276400 |
| }, |
| { |
| "epoch": 89.36651583710407, |
| "grad_norm": 1.0634099245071411, |
| "learning_rate": 0.001, |
| "loss": 0.4594, |
| "step": 276500 |
| }, |
| { |
| "epoch": 89.39883645765998, |
| "grad_norm": 1.2459696531295776, |
| "learning_rate": 0.001, |
| "loss": 0.479, |
| "step": 276600 |
| }, |
| { |
| "epoch": 89.4311570782159, |
| "grad_norm": 1.0318691730499268, |
| "learning_rate": 0.001, |
| "loss": 0.4676, |
| "step": 276700 |
| }, |
| { |
| "epoch": 89.46347769877181, |
| "grad_norm": 1.7067400217056274, |
| "learning_rate": 0.001, |
| "loss": 0.4723, |
| "step": 276800 |
| }, |
| { |
| "epoch": 89.49579831932773, |
| "grad_norm": 1.4222896099090576, |
| "learning_rate": 0.001, |
| "loss": 0.4702, |
| "step": 276900 |
| }, |
| { |
| "epoch": 89.52811893988364, |
| "grad_norm": 1.3543707132339478, |
| "learning_rate": 0.001, |
| "loss": 0.4773, |
| "step": 277000 |
| }, |
| { |
| "epoch": 89.56043956043956, |
| "grad_norm": 1.1513952016830444, |
| "learning_rate": 0.001, |
| "loss": 0.4819, |
| "step": 277100 |
| }, |
| { |
| "epoch": 89.59276018099547, |
| "grad_norm": 1.3153300285339355, |
| "learning_rate": 0.001, |
| "loss": 0.4861, |
| "step": 277200 |
| }, |
| { |
| "epoch": 89.62508080155139, |
| "grad_norm": 1.3194316625595093, |
| "learning_rate": 0.001, |
| "loss": 0.4933, |
| "step": 277300 |
| }, |
| { |
| "epoch": 89.6574014221073, |
| "grad_norm": 1.3721164464950562, |
| "learning_rate": 0.001, |
| "loss": 0.49, |
| "step": 277400 |
| }, |
| { |
| "epoch": 89.68972204266322, |
| "grad_norm": 1.4286446571350098, |
| "learning_rate": 0.001, |
| "loss": 0.4892, |
| "step": 277500 |
| }, |
| { |
| "epoch": 89.72204266321913, |
| "grad_norm": 1.2708386182785034, |
| "learning_rate": 0.001, |
| "loss": 0.4953, |
| "step": 277600 |
| }, |
| { |
| "epoch": 89.75436328377505, |
| "grad_norm": 1.0783087015151978, |
| "learning_rate": 0.001, |
| "loss": 0.4883, |
| "step": 277700 |
| }, |
| { |
| "epoch": 89.78668390433096, |
| "grad_norm": 1.4168130159378052, |
| "learning_rate": 0.001, |
| "loss": 0.4907, |
| "step": 277800 |
| }, |
| { |
| "epoch": 89.81900452488688, |
| "grad_norm": 1.315295934677124, |
| "learning_rate": 0.001, |
| "loss": 0.498, |
| "step": 277900 |
| }, |
| { |
| "epoch": 89.85132514544279, |
| "grad_norm": 1.4645394086837769, |
| "learning_rate": 0.001, |
| "loss": 0.5051, |
| "step": 278000 |
| }, |
| { |
| "epoch": 89.88364576599871, |
| "grad_norm": 1.6038243770599365, |
| "learning_rate": 0.001, |
| "loss": 0.5035, |
| "step": 278100 |
| }, |
| { |
| "epoch": 89.91596638655462, |
| "grad_norm": 1.3226242065429688, |
| "learning_rate": 0.001, |
| "loss": 0.5009, |
| "step": 278200 |
| }, |
| { |
| "epoch": 89.94828700711054, |
| "grad_norm": 1.4041608572006226, |
| "learning_rate": 0.001, |
| "loss": 0.5034, |
| "step": 278300 |
| }, |
| { |
| "epoch": 89.98060762766644, |
| "grad_norm": 1.4587429761886597, |
| "learning_rate": 0.001, |
| "loss": 0.5081, |
| "step": 278400 |
| }, |
| { |
| "epoch": 90.01292824822237, |
| "grad_norm": 1.2513530254364014, |
| "learning_rate": 0.001, |
| "loss": 0.4572, |
| "step": 278500 |
| }, |
| { |
| "epoch": 90.04524886877829, |
| "grad_norm": 1.0929393768310547, |
| "learning_rate": 0.001, |
| "loss": 0.4435, |
| "step": 278600 |
| }, |
| { |
| "epoch": 90.0775694893342, |
| "grad_norm": 1.1157305240631104, |
| "learning_rate": 0.001, |
| "loss": 0.4525, |
| "step": 278700 |
| }, |
| { |
| "epoch": 90.10989010989012, |
| "grad_norm": 1.1245415210723877, |
| "learning_rate": 0.001, |
| "loss": 0.452, |
| "step": 278800 |
| }, |
| { |
| "epoch": 90.14221073044602, |
| "grad_norm": 1.493154764175415, |
| "learning_rate": 0.001, |
| "loss": 0.4605, |
| "step": 278900 |
| }, |
| { |
| "epoch": 90.17453135100195, |
| "grad_norm": 0.9956198334693909, |
| "learning_rate": 0.001, |
| "loss": 0.4653, |
| "step": 279000 |
| }, |
| { |
| "epoch": 90.20685197155785, |
| "grad_norm": 1.3177974224090576, |
| "learning_rate": 0.001, |
| "loss": 0.4651, |
| "step": 279100 |
| }, |
| { |
| "epoch": 90.23917259211377, |
| "grad_norm": 1.2235926389694214, |
| "learning_rate": 0.001, |
| "loss": 0.46, |
| "step": 279200 |
| }, |
| { |
| "epoch": 90.27149321266968, |
| "grad_norm": 1.260908603668213, |
| "learning_rate": 0.001, |
| "loss": 0.4564, |
| "step": 279300 |
| }, |
| { |
| "epoch": 90.3038138332256, |
| "grad_norm": 1.13411545753479, |
| "learning_rate": 0.001, |
| "loss": 0.4617, |
| "step": 279400 |
| }, |
| { |
| "epoch": 90.33613445378151, |
| "grad_norm": 1.1787828207015991, |
| "learning_rate": 0.001, |
| "loss": 0.4699, |
| "step": 279500 |
| }, |
| { |
| "epoch": 90.36845507433743, |
| "grad_norm": 1.1601965427398682, |
| "learning_rate": 0.001, |
| "loss": 0.4677, |
| "step": 279600 |
| }, |
| { |
| "epoch": 90.40077569489334, |
| "grad_norm": 1.2311824560165405, |
| "learning_rate": 0.001, |
| "loss": 0.4751, |
| "step": 279700 |
| }, |
| { |
| "epoch": 90.43309631544926, |
| "grad_norm": 1.132232427597046, |
| "learning_rate": 0.001, |
| "loss": 0.472, |
| "step": 279800 |
| }, |
| { |
| "epoch": 90.46541693600517, |
| "grad_norm": 0.876586377620697, |
| "learning_rate": 0.001, |
| "loss": 0.4705, |
| "step": 279900 |
| }, |
| { |
| "epoch": 90.49773755656109, |
| "grad_norm": 1.2030634880065918, |
| "learning_rate": 0.001, |
| "loss": 0.477, |
| "step": 280000 |
| }, |
| { |
| "epoch": 90.530058177117, |
| "grad_norm": 0.9745743274688721, |
| "learning_rate": 0.001, |
| "loss": 0.4403, |
| "step": 280100 |
| }, |
| { |
| "epoch": 90.56237879767292, |
| "grad_norm": 1.3243677616119385, |
| "learning_rate": 0.001, |
| "loss": 0.4505, |
| "step": 280200 |
| }, |
| { |
| "epoch": 90.59469941822883, |
| "grad_norm": 1.1608860492706299, |
| "learning_rate": 0.001, |
| "loss": 0.4497, |
| "step": 280300 |
| }, |
| { |
| "epoch": 90.62702003878475, |
| "grad_norm": 1.1231688261032104, |
| "learning_rate": 0.001, |
| "loss": 0.4549, |
| "step": 280400 |
| }, |
| { |
| "epoch": 90.65934065934066, |
| "grad_norm": 1.5799304246902466, |
| "learning_rate": 0.001, |
| "loss": 0.4487, |
| "step": 280500 |
| }, |
| { |
| "epoch": 90.69166127989658, |
| "grad_norm": 1.247395396232605, |
| "learning_rate": 0.001, |
| "loss": 0.4572, |
| "step": 280600 |
| }, |
| { |
| "epoch": 90.72398190045249, |
| "grad_norm": 1.3597042560577393, |
| "learning_rate": 0.001, |
| "loss": 0.4627, |
| "step": 280700 |
| }, |
| { |
| "epoch": 90.75630252100841, |
| "grad_norm": 1.445366621017456, |
| "learning_rate": 0.001, |
| "loss": 0.4587, |
| "step": 280800 |
| }, |
| { |
| "epoch": 90.78862314156432, |
| "grad_norm": 1.2129086256027222, |
| "learning_rate": 0.001, |
| "loss": 0.4598, |
| "step": 280900 |
| }, |
| { |
| "epoch": 90.82094376212024, |
| "grad_norm": 1.0306206941604614, |
| "learning_rate": 0.001, |
| "loss": 0.4561, |
| "step": 281000 |
| }, |
| { |
| "epoch": 90.85326438267614, |
| "grad_norm": 1.1839573383331299, |
| "learning_rate": 0.001, |
| "loss": 0.4791, |
| "step": 281100 |
| }, |
| { |
| "epoch": 90.88558500323207, |
| "grad_norm": 0.9946609735488892, |
| "learning_rate": 0.001, |
| "loss": 0.4646, |
| "step": 281200 |
| }, |
| { |
| "epoch": 90.91790562378797, |
| "grad_norm": 1.1721656322479248, |
| "learning_rate": 0.001, |
| "loss": 0.4819, |
| "step": 281300 |
| }, |
| { |
| "epoch": 90.9502262443439, |
| "grad_norm": 1.1597821712493896, |
| "learning_rate": 0.001, |
| "loss": 0.4703, |
| "step": 281400 |
| }, |
| { |
| "epoch": 90.9825468648998, |
| "grad_norm": 1.4225759506225586, |
| "learning_rate": 0.001, |
| "loss": 0.4751, |
| "step": 281500 |
| }, |
| { |
| "epoch": 91.01486748545572, |
| "grad_norm": 9.793893814086914, |
| "learning_rate": 0.001, |
| "loss": 0.434, |
| "step": 281600 |
| }, |
| { |
| "epoch": 91.04718810601163, |
| "grad_norm": 1.1725051403045654, |
| "learning_rate": 0.001, |
| "loss": 0.4464, |
| "step": 281700 |
| }, |
| { |
| "epoch": 91.07950872656755, |
| "grad_norm": 0.8536701202392578, |
| "learning_rate": 0.001, |
| "loss": 0.439, |
| "step": 281800 |
| }, |
| { |
| "epoch": 91.11182934712346, |
| "grad_norm": 0.939294695854187, |
| "learning_rate": 0.001, |
| "loss": 0.4568, |
| "step": 281900 |
| }, |
| { |
| "epoch": 91.14414996767938, |
| "grad_norm": 1.0028142929077148, |
| "learning_rate": 0.001, |
| "loss": 0.452, |
| "step": 282000 |
| }, |
| { |
| "epoch": 91.17647058823529, |
| "grad_norm": 0.818120539188385, |
| "learning_rate": 0.001, |
| "loss": 0.4583, |
| "step": 282100 |
| }, |
| { |
| "epoch": 91.20879120879121, |
| "grad_norm": 0.8494078516960144, |
| "learning_rate": 0.001, |
| "loss": 0.4485, |
| "step": 282200 |
| }, |
| { |
| "epoch": 91.24111182934712, |
| "grad_norm": 0.7314313054084778, |
| "learning_rate": 0.001, |
| "loss": 0.4577, |
| "step": 282300 |
| }, |
| { |
| "epoch": 91.27343244990304, |
| "grad_norm": 0.6735559105873108, |
| "learning_rate": 0.001, |
| "loss": 0.465, |
| "step": 282400 |
| }, |
| { |
| "epoch": 91.30575307045895, |
| "grad_norm": 0.8712020516395569, |
| "learning_rate": 0.001, |
| "loss": 0.4652, |
| "step": 282500 |
| }, |
| { |
| "epoch": 91.33807369101487, |
| "grad_norm": 1.028043270111084, |
| "learning_rate": 0.001, |
| "loss": 0.4593, |
| "step": 282600 |
| }, |
| { |
| "epoch": 91.37039431157078, |
| "grad_norm": 0.8021206855773926, |
| "learning_rate": 0.001, |
| "loss": 0.4723, |
| "step": 282700 |
| }, |
| { |
| "epoch": 91.4027149321267, |
| "grad_norm": 0.8332772850990295, |
| "learning_rate": 0.001, |
| "loss": 0.47, |
| "step": 282800 |
| }, |
| { |
| "epoch": 91.4350355526826, |
| "grad_norm": 0.8631690740585327, |
| "learning_rate": 0.001, |
| "loss": 0.4683, |
| "step": 282900 |
| }, |
| { |
| "epoch": 91.46735617323853, |
| "grad_norm": 0.806066632270813, |
| "learning_rate": 0.001, |
| "loss": 0.47, |
| "step": 283000 |
| }, |
| { |
| "epoch": 91.49967679379444, |
| "grad_norm": 0.9106870889663696, |
| "learning_rate": 0.001, |
| "loss": 0.4687, |
| "step": 283100 |
| }, |
| { |
| "epoch": 91.53199741435036, |
| "grad_norm": 0.8315029740333557, |
| "learning_rate": 0.001, |
| "loss": 0.4618, |
| "step": 283200 |
| }, |
| { |
| "epoch": 91.56431803490626, |
| "grad_norm": 0.8321149945259094, |
| "learning_rate": 0.001, |
| "loss": 0.4793, |
| "step": 283300 |
| }, |
| { |
| "epoch": 91.59663865546219, |
| "grad_norm": 0.6962825059890747, |
| "learning_rate": 0.001, |
| "loss": 0.4825, |
| "step": 283400 |
| }, |
| { |
| "epoch": 91.6289592760181, |
| "grad_norm": 1.0555658340454102, |
| "learning_rate": 0.001, |
| "loss": 0.4724, |
| "step": 283500 |
| }, |
| { |
| "epoch": 91.66127989657402, |
| "grad_norm": 0.9735150337219238, |
| "learning_rate": 0.001, |
| "loss": 0.51, |
| "step": 283600 |
| }, |
| { |
| "epoch": 91.69360051712992, |
| "grad_norm": 1.2564635276794434, |
| "learning_rate": 0.001, |
| "loss": 0.4918, |
| "step": 283700 |
| }, |
| { |
| "epoch": 91.72592113768584, |
| "grad_norm": 0.7897902131080627, |
| "learning_rate": 0.001, |
| "loss": 0.486, |
| "step": 283800 |
| }, |
| { |
| "epoch": 91.75824175824175, |
| "grad_norm": 1.0546153783798218, |
| "learning_rate": 0.001, |
| "loss": 0.4935, |
| "step": 283900 |
| }, |
| { |
| "epoch": 91.79056237879767, |
| "grad_norm": 0.8487304449081421, |
| "learning_rate": 0.001, |
| "loss": 0.4855, |
| "step": 284000 |
| }, |
| { |
| "epoch": 91.82288299935358, |
| "grad_norm": 0.8980190753936768, |
| "learning_rate": 0.001, |
| "loss": 0.4905, |
| "step": 284100 |
| }, |
| { |
| "epoch": 91.8552036199095, |
| "grad_norm": 1.1748449802398682, |
| "learning_rate": 0.001, |
| "loss": 0.4818, |
| "step": 284200 |
| }, |
| { |
| "epoch": 91.88752424046541, |
| "grad_norm": 0.9486263394355774, |
| "learning_rate": 0.001, |
| "loss": 0.4965, |
| "step": 284300 |
| }, |
| { |
| "epoch": 91.91984486102133, |
| "grad_norm": 1.0960006713867188, |
| "learning_rate": 0.001, |
| "loss": 0.502, |
| "step": 284400 |
| }, |
| { |
| "epoch": 91.95216548157724, |
| "grad_norm": 1.006560206413269, |
| "learning_rate": 0.001, |
| "loss": 0.4868, |
| "step": 284500 |
| }, |
| { |
| "epoch": 91.98448610213316, |
| "grad_norm": 1.080833911895752, |
| "learning_rate": 0.001, |
| "loss": 0.502, |
| "step": 284600 |
| }, |
| { |
| "epoch": 92.01680672268908, |
| "grad_norm": 2.0345189571380615, |
| "learning_rate": 0.001, |
| "loss": 0.4498, |
| "step": 284700 |
| }, |
| { |
| "epoch": 92.04912734324499, |
| "grad_norm": 1.6295733451843262, |
| "learning_rate": 0.001, |
| "loss": 0.4319, |
| "step": 284800 |
| }, |
| { |
| "epoch": 92.08144796380091, |
| "grad_norm": 1.82840096950531, |
| "learning_rate": 0.001, |
| "loss": 0.4405, |
| "step": 284900 |
| }, |
| { |
| "epoch": 92.11376858435682, |
| "grad_norm": 3.0892772674560547, |
| "learning_rate": 0.001, |
| "loss": 0.4414, |
| "step": 285000 |
| }, |
| { |
| "epoch": 92.14608920491274, |
| "grad_norm": 2.1895627975463867, |
| "learning_rate": 0.001, |
| "loss": 0.4482, |
| "step": 285100 |
| }, |
| { |
| "epoch": 92.17840982546865, |
| "grad_norm": 1.6839579343795776, |
| "learning_rate": 0.001, |
| "loss": 0.4534, |
| "step": 285200 |
| }, |
| { |
| "epoch": 92.21073044602457, |
| "grad_norm": 2.260793924331665, |
| "learning_rate": 0.001, |
| "loss": 0.453, |
| "step": 285300 |
| }, |
| { |
| "epoch": 92.24305106658048, |
| "grad_norm": 42.66197967529297, |
| "learning_rate": 0.001, |
| "loss": 0.4458, |
| "step": 285400 |
| }, |
| { |
| "epoch": 92.2753716871364, |
| "grad_norm": 1.6944698095321655, |
| "learning_rate": 0.001, |
| "loss": 0.4503, |
| "step": 285500 |
| }, |
| { |
| "epoch": 92.3076923076923, |
| "grad_norm": 1.6756781339645386, |
| "learning_rate": 0.001, |
| "loss": 0.4563, |
| "step": 285600 |
| }, |
| { |
| "epoch": 92.34001292824823, |
| "grad_norm": 1.8374137878417969, |
| "learning_rate": 0.001, |
| "loss": 0.4468, |
| "step": 285700 |
| }, |
| { |
| "epoch": 92.37233354880414, |
| "grad_norm": 2.041393518447876, |
| "learning_rate": 0.001, |
| "loss": 0.4505, |
| "step": 285800 |
| }, |
| { |
| "epoch": 92.40465416936006, |
| "grad_norm": 1.8268532752990723, |
| "learning_rate": 0.001, |
| "loss": 0.4608, |
| "step": 285900 |
| }, |
| { |
| "epoch": 92.43697478991596, |
| "grad_norm": 1.7450902462005615, |
| "learning_rate": 0.001, |
| "loss": 0.4634, |
| "step": 286000 |
| }, |
| { |
| "epoch": 92.46929541047189, |
| "grad_norm": 1.7353445291519165, |
| "learning_rate": 0.001, |
| "loss": 0.4794, |
| "step": 286100 |
| }, |
| { |
| "epoch": 92.5016160310278, |
| "grad_norm": 2.340031385421753, |
| "learning_rate": 0.001, |
| "loss": 0.471, |
| "step": 286200 |
| }, |
| { |
| "epoch": 92.53393665158372, |
| "grad_norm": 1.8648983240127563, |
| "learning_rate": 0.001, |
| "loss": 0.4723, |
| "step": 286300 |
| }, |
| { |
| "epoch": 92.56625727213962, |
| "grad_norm": 2.6980106830596924, |
| "learning_rate": 0.001, |
| "loss": 0.4644, |
| "step": 286400 |
| }, |
| { |
| "epoch": 92.59857789269554, |
| "grad_norm": 1.8055412769317627, |
| "learning_rate": 0.001, |
| "loss": 0.468, |
| "step": 286500 |
| }, |
| { |
| "epoch": 92.63089851325145, |
| "grad_norm": 2.3196511268615723, |
| "learning_rate": 0.001, |
| "loss": 0.4766, |
| "step": 286600 |
| }, |
| { |
| "epoch": 92.66321913380737, |
| "grad_norm": 1.4777075052261353, |
| "learning_rate": 0.001, |
| "loss": 0.4736, |
| "step": 286700 |
| }, |
| { |
| "epoch": 92.69553975436328, |
| "grad_norm": 2.1524486541748047, |
| "learning_rate": 0.001, |
| "loss": 0.4834, |
| "step": 286800 |
| }, |
| { |
| "epoch": 92.7278603749192, |
| "grad_norm": 2.242614507675171, |
| "learning_rate": 0.001, |
| "loss": 0.4772, |
| "step": 286900 |
| }, |
| { |
| "epoch": 92.76018099547511, |
| "grad_norm": 1.7869904041290283, |
| "learning_rate": 0.001, |
| "loss": 0.4739, |
| "step": 287000 |
| }, |
| { |
| "epoch": 92.79250161603103, |
| "grad_norm": 2.129345417022705, |
| "learning_rate": 0.001, |
| "loss": 0.4887, |
| "step": 287100 |
| }, |
| { |
| "epoch": 92.82482223658694, |
| "grad_norm": 1.5137324333190918, |
| "learning_rate": 0.001, |
| "loss": 0.4807, |
| "step": 287200 |
| }, |
| { |
| "epoch": 92.85714285714286, |
| "grad_norm": 2.0685720443725586, |
| "learning_rate": 0.001, |
| "loss": 0.487, |
| "step": 287300 |
| }, |
| { |
| "epoch": 92.88946347769877, |
| "grad_norm": 1.8368549346923828, |
| "learning_rate": 0.001, |
| "loss": 0.5025, |
| "step": 287400 |
| }, |
| { |
| "epoch": 92.92178409825469, |
| "grad_norm": 1.7226216793060303, |
| "learning_rate": 0.001, |
| "loss": 0.4917, |
| "step": 287500 |
| }, |
| { |
| "epoch": 92.9541047188106, |
| "grad_norm": 2.0484440326690674, |
| "learning_rate": 0.001, |
| "loss": 0.4885, |
| "step": 287600 |
| }, |
| { |
| "epoch": 92.98642533936652, |
| "grad_norm": 1.8843029737472534, |
| "learning_rate": 0.001, |
| "loss": 0.4872, |
| "step": 287700 |
| }, |
| { |
| "epoch": 93.01874595992243, |
| "grad_norm": 2.280056953430176, |
| "learning_rate": 0.001, |
| "loss": 0.4684, |
| "step": 287800 |
| }, |
| { |
| "epoch": 93.05106658047835, |
| "grad_norm": 1.6946696043014526, |
| "learning_rate": 0.001, |
| "loss": 0.4274, |
| "step": 287900 |
| }, |
| { |
| "epoch": 93.08338720103426, |
| "grad_norm": 2.300701856613159, |
| "learning_rate": 0.001, |
| "loss": 0.4325, |
| "step": 288000 |
| }, |
| { |
| "epoch": 93.11570782159018, |
| "grad_norm": 1.729552984237671, |
| "learning_rate": 0.001, |
| "loss": 0.4413, |
| "step": 288100 |
| }, |
| { |
| "epoch": 93.14802844214609, |
| "grad_norm": 2.0106143951416016, |
| "learning_rate": 0.001, |
| "loss": 0.4507, |
| "step": 288200 |
| }, |
| { |
| "epoch": 93.180349062702, |
| "grad_norm": 1.5920679569244385, |
| "learning_rate": 0.001, |
| "loss": 0.4397, |
| "step": 288300 |
| }, |
| { |
| "epoch": 93.21266968325791, |
| "grad_norm": 1.4083905220031738, |
| "learning_rate": 0.001, |
| "loss": 0.4442, |
| "step": 288400 |
| }, |
| { |
| "epoch": 93.24499030381384, |
| "grad_norm": 1.5656646490097046, |
| "learning_rate": 0.001, |
| "loss": 0.444, |
| "step": 288500 |
| }, |
| { |
| "epoch": 93.27731092436974, |
| "grad_norm": 1.4155176877975464, |
| "learning_rate": 0.001, |
| "loss": 0.4594, |
| "step": 288600 |
| }, |
| { |
| "epoch": 93.30963154492567, |
| "grad_norm": 1.781264305114746, |
| "learning_rate": 0.001, |
| "loss": 0.4541, |
| "step": 288700 |
| }, |
| { |
| "epoch": 93.34195216548157, |
| "grad_norm": 1.7372766733169556, |
| "learning_rate": 0.001, |
| "loss": 0.4577, |
| "step": 288800 |
| }, |
| { |
| "epoch": 93.3742727860375, |
| "grad_norm": 1.7941893339157104, |
| "learning_rate": 0.001, |
| "loss": 0.4646, |
| "step": 288900 |
| }, |
| { |
| "epoch": 93.4065934065934, |
| "grad_norm": 1.5748519897460938, |
| "learning_rate": 0.001, |
| "loss": 0.4587, |
| "step": 289000 |
| }, |
| { |
| "epoch": 93.43891402714932, |
| "grad_norm": 2.3406195640563965, |
| "learning_rate": 0.001, |
| "loss": 0.4608, |
| "step": 289100 |
| }, |
| { |
| "epoch": 93.47123464770523, |
| "grad_norm": 1.817068099975586, |
| "learning_rate": 0.001, |
| "loss": 0.4598, |
| "step": 289200 |
| }, |
| { |
| "epoch": 93.50355526826115, |
| "grad_norm": 1.4823451042175293, |
| "learning_rate": 0.001, |
| "loss": 0.4561, |
| "step": 289300 |
| }, |
| { |
| "epoch": 93.53587588881706, |
| "grad_norm": 1.5961811542510986, |
| "learning_rate": 0.001, |
| "loss": 0.4662, |
| "step": 289400 |
| }, |
| { |
| "epoch": 93.56819650937298, |
| "grad_norm": 1.4638841152191162, |
| "learning_rate": 0.001, |
| "loss": 0.4713, |
| "step": 289500 |
| }, |
| { |
| "epoch": 93.60051712992889, |
| "grad_norm": 2.1063473224639893, |
| "learning_rate": 0.001, |
| "loss": 0.4644, |
| "step": 289600 |
| }, |
| { |
| "epoch": 93.63283775048481, |
| "grad_norm": 1.989016056060791, |
| "learning_rate": 0.001, |
| "loss": 0.4611, |
| "step": 289700 |
| }, |
| { |
| "epoch": 93.66515837104072, |
| "grad_norm": 1.9754928350448608, |
| "learning_rate": 0.001, |
| "loss": 0.4624, |
| "step": 289800 |
| }, |
| { |
| "epoch": 93.69747899159664, |
| "grad_norm": 1.4447102546691895, |
| "learning_rate": 0.001, |
| "loss": 0.4636, |
| "step": 289900 |
| }, |
| { |
| "epoch": 93.72979961215255, |
| "grad_norm": 1.5371551513671875, |
| "learning_rate": 0.001, |
| "loss": 0.4768, |
| "step": 290000 |
| }, |
| { |
| "epoch": 93.76212023270847, |
| "grad_norm": 1.5299981832504272, |
| "learning_rate": 0.001, |
| "loss": 0.4783, |
| "step": 290100 |
| }, |
| { |
| "epoch": 93.79444085326438, |
| "grad_norm": 1.286318063735962, |
| "learning_rate": 0.001, |
| "loss": 0.4764, |
| "step": 290200 |
| }, |
| { |
| "epoch": 93.8267614738203, |
| "grad_norm": 2.0626156330108643, |
| "learning_rate": 0.001, |
| "loss": 0.4776, |
| "step": 290300 |
| }, |
| { |
| "epoch": 93.8590820943762, |
| "grad_norm": 1.6997871398925781, |
| "learning_rate": 0.001, |
| "loss": 0.4695, |
| "step": 290400 |
| }, |
| { |
| "epoch": 93.89140271493213, |
| "grad_norm": 3.9364075660705566, |
| "learning_rate": 0.001, |
| "loss": 0.4801, |
| "step": 290500 |
| }, |
| { |
| "epoch": 93.92372333548803, |
| "grad_norm": 2.015624523162842, |
| "learning_rate": 0.001, |
| "loss": 0.4962, |
| "step": 290600 |
| }, |
| { |
| "epoch": 93.95604395604396, |
| "grad_norm": 344.4596862792969, |
| "learning_rate": 0.001, |
| "loss": 0.4812, |
| "step": 290700 |
| }, |
| { |
| "epoch": 93.98836457659988, |
| "grad_norm": 1.8616305589675903, |
| "learning_rate": 0.001, |
| "loss": 0.4814, |
| "step": 290800 |
| }, |
| { |
| "epoch": 94.02068519715579, |
| "grad_norm": 1.8940582275390625, |
| "learning_rate": 0.001, |
| "loss": 0.464, |
| "step": 290900 |
| }, |
| { |
| "epoch": 94.0530058177117, |
| "grad_norm": 1.938902735710144, |
| "learning_rate": 0.001, |
| "loss": 0.4213, |
| "step": 291000 |
| }, |
| { |
| "epoch": 94.08532643826761, |
| "grad_norm": 1.4678521156311035, |
| "learning_rate": 0.001, |
| "loss": 0.4337, |
| "step": 291100 |
| }, |
| { |
| "epoch": 94.11764705882354, |
| "grad_norm": 1.5415394306182861, |
| "learning_rate": 0.001, |
| "loss": 0.4445, |
| "step": 291200 |
| }, |
| { |
| "epoch": 94.14996767937944, |
| "grad_norm": 1.8836512565612793, |
| "learning_rate": 0.001, |
| "loss": 0.4274, |
| "step": 291300 |
| }, |
| { |
| "epoch": 94.18228829993537, |
| "grad_norm": 1.2818994522094727, |
| "learning_rate": 0.001, |
| "loss": 0.4448, |
| "step": 291400 |
| }, |
| { |
| "epoch": 94.21460892049127, |
| "grad_norm": 2.0979812145233154, |
| "learning_rate": 0.001, |
| "loss": 0.4396, |
| "step": 291500 |
| }, |
| { |
| "epoch": 94.2469295410472, |
| "grad_norm": 1.6995420455932617, |
| "learning_rate": 0.001, |
| "loss": 0.4488, |
| "step": 291600 |
| }, |
| { |
| "epoch": 94.2792501616031, |
| "grad_norm": 1.7920165061950684, |
| "learning_rate": 0.001, |
| "loss": 0.448, |
| "step": 291700 |
| }, |
| { |
| "epoch": 94.31157078215902, |
| "grad_norm": 1.6216713190078735, |
| "learning_rate": 0.001, |
| "loss": 0.452, |
| "step": 291800 |
| }, |
| { |
| "epoch": 94.34389140271493, |
| "grad_norm": 1.4908044338226318, |
| "learning_rate": 0.001, |
| "loss": 0.4468, |
| "step": 291900 |
| }, |
| { |
| "epoch": 94.37621202327085, |
| "grad_norm": 1.6962876319885254, |
| "learning_rate": 0.001, |
| "loss": 0.45, |
| "step": 292000 |
| }, |
| { |
| "epoch": 94.40853264382676, |
| "grad_norm": 1.460558533668518, |
| "learning_rate": 0.001, |
| "loss": 0.446, |
| "step": 292100 |
| }, |
| { |
| "epoch": 94.44085326438268, |
| "grad_norm": 1.4219996929168701, |
| "learning_rate": 0.001, |
| "loss": 0.4533, |
| "step": 292200 |
| }, |
| { |
| "epoch": 94.47317388493859, |
| "grad_norm": 1.758070468902588, |
| "learning_rate": 0.001, |
| "loss": 0.4511, |
| "step": 292300 |
| }, |
| { |
| "epoch": 94.50549450549451, |
| "grad_norm": 1.518767237663269, |
| "learning_rate": 0.001, |
| "loss": 0.4601, |
| "step": 292400 |
| }, |
| { |
| "epoch": 94.53781512605042, |
| "grad_norm": 1.4330819845199585, |
| "learning_rate": 0.001, |
| "loss": 0.464, |
| "step": 292500 |
| }, |
| { |
| "epoch": 94.57013574660634, |
| "grad_norm": 1.7060296535491943, |
| "learning_rate": 0.001, |
| "loss": 0.4593, |
| "step": 292600 |
| }, |
| { |
| "epoch": 94.60245636716225, |
| "grad_norm": 1.3387072086334229, |
| "learning_rate": 0.001, |
| "loss": 0.4582, |
| "step": 292700 |
| }, |
| { |
| "epoch": 94.63477698771817, |
| "grad_norm": 1.6963450908660889, |
| "learning_rate": 0.001, |
| "loss": 0.4619, |
| "step": 292800 |
| }, |
| { |
| "epoch": 94.66709760827408, |
| "grad_norm": 2.0844035148620605, |
| "learning_rate": 0.001, |
| "loss": 0.4665, |
| "step": 292900 |
| }, |
| { |
| "epoch": 94.69941822883, |
| "grad_norm": 1.3902792930603027, |
| "learning_rate": 0.001, |
| "loss": 0.4783, |
| "step": 293000 |
| }, |
| { |
| "epoch": 94.7317388493859, |
| "grad_norm": 1.3973846435546875, |
| "learning_rate": 0.001, |
| "loss": 0.4714, |
| "step": 293100 |
| }, |
| { |
| "epoch": 94.76405946994183, |
| "grad_norm": 1.479785442352295, |
| "learning_rate": 0.001, |
| "loss": 0.471, |
| "step": 293200 |
| }, |
| { |
| "epoch": 94.79638009049773, |
| "grad_norm": 1.6167840957641602, |
| "learning_rate": 0.001, |
| "loss": 0.4683, |
| "step": 293300 |
| }, |
| { |
| "epoch": 94.82870071105366, |
| "grad_norm": 1.5192022323608398, |
| "learning_rate": 0.001, |
| "loss": 0.484, |
| "step": 293400 |
| }, |
| { |
| "epoch": 94.86102133160956, |
| "grad_norm": 1.438111424446106, |
| "learning_rate": 0.001, |
| "loss": 0.4732, |
| "step": 293500 |
| }, |
| { |
| "epoch": 94.89334195216549, |
| "grad_norm": 1.3876760005950928, |
| "learning_rate": 0.001, |
| "loss": 0.4824, |
| "step": 293600 |
| }, |
| { |
| "epoch": 94.9256625727214, |
| "grad_norm": 1.9509048461914062, |
| "learning_rate": 0.001, |
| "loss": 0.4725, |
| "step": 293700 |
| }, |
| { |
| "epoch": 94.95798319327731, |
| "grad_norm": 1.4013237953186035, |
| "learning_rate": 0.001, |
| "loss": 0.4802, |
| "step": 293800 |
| }, |
| { |
| "epoch": 94.99030381383322, |
| "grad_norm": 1.737876534461975, |
| "learning_rate": 0.001, |
| "loss": 0.4793, |
| "step": 293900 |
| }, |
| { |
| "epoch": 95.02262443438914, |
| "grad_norm": 1.3785439729690552, |
| "learning_rate": 0.001, |
| "loss": 0.4504, |
| "step": 294000 |
| }, |
| { |
| "epoch": 95.05494505494505, |
| "grad_norm": 1.9236104488372803, |
| "learning_rate": 0.001, |
| "loss": 0.4349, |
| "step": 294100 |
| }, |
| { |
| "epoch": 95.08726567550097, |
| "grad_norm": 1.619763970375061, |
| "learning_rate": 0.001, |
| "loss": 0.4403, |
| "step": 294200 |
| }, |
| { |
| "epoch": 95.11958629605688, |
| "grad_norm": 1.5868083238601685, |
| "learning_rate": 0.001, |
| "loss": 0.4352, |
| "step": 294300 |
| }, |
| { |
| "epoch": 95.1519069166128, |
| "grad_norm": 1.8232312202453613, |
| "learning_rate": 0.001, |
| "loss": 0.4315, |
| "step": 294400 |
| }, |
| { |
| "epoch": 95.18422753716871, |
| "grad_norm": 1.5176035165786743, |
| "learning_rate": 0.001, |
| "loss": 0.434, |
| "step": 294500 |
| }, |
| { |
| "epoch": 95.21654815772463, |
| "grad_norm": 1.5614092350006104, |
| "learning_rate": 0.001, |
| "loss": 0.4418, |
| "step": 294600 |
| }, |
| { |
| "epoch": 95.24886877828054, |
| "grad_norm": 1.5651224851608276, |
| "learning_rate": 0.001, |
| "loss": 0.434, |
| "step": 294700 |
| }, |
| { |
| "epoch": 95.28118939883646, |
| "grad_norm": 1.3997880220413208, |
| "learning_rate": 0.001, |
| "loss": 0.4466, |
| "step": 294800 |
| }, |
| { |
| "epoch": 95.31351001939237, |
| "grad_norm": 1.8400006294250488, |
| "learning_rate": 0.001, |
| "loss": 0.4402, |
| "step": 294900 |
| }, |
| { |
| "epoch": 95.34583063994829, |
| "grad_norm": 1.32888662815094, |
| "learning_rate": 0.001, |
| "loss": 0.4404, |
| "step": 295000 |
| }, |
| { |
| "epoch": 95.3781512605042, |
| "grad_norm": 1.6747912168502808, |
| "learning_rate": 0.001, |
| "loss": 0.4522, |
| "step": 295100 |
| }, |
| { |
| "epoch": 95.41047188106012, |
| "grad_norm": 1.6857593059539795, |
| "learning_rate": 0.001, |
| "loss": 0.444, |
| "step": 295200 |
| }, |
| { |
| "epoch": 95.44279250161603, |
| "grad_norm": 1.3832392692565918, |
| "learning_rate": 0.001, |
| "loss": 0.4434, |
| "step": 295300 |
| }, |
| { |
| "epoch": 95.47511312217195, |
| "grad_norm": 1.914587140083313, |
| "learning_rate": 0.001, |
| "loss": 0.454, |
| "step": 295400 |
| }, |
| { |
| "epoch": 95.50743374272786, |
| "grad_norm": 1.6839224100112915, |
| "learning_rate": 0.001, |
| "loss": 0.4537, |
| "step": 295500 |
| }, |
| { |
| "epoch": 95.53975436328378, |
| "grad_norm": 1.4552383422851562, |
| "learning_rate": 0.001, |
| "loss": 0.4591, |
| "step": 295600 |
| }, |
| { |
| "epoch": 95.57207498383968, |
| "grad_norm": 1.5288134813308716, |
| "learning_rate": 0.001, |
| "loss": 0.4533, |
| "step": 295700 |
| }, |
| { |
| "epoch": 95.6043956043956, |
| "grad_norm": 1.8305370807647705, |
| "learning_rate": 0.001, |
| "loss": 0.4655, |
| "step": 295800 |
| }, |
| { |
| "epoch": 95.63671622495151, |
| "grad_norm": 1.2696152925491333, |
| "learning_rate": 0.001, |
| "loss": 0.4595, |
| "step": 295900 |
| }, |
| { |
| "epoch": 95.66903684550743, |
| "grad_norm": 1.5350581407546997, |
| "learning_rate": 0.001, |
| "loss": 0.4685, |
| "step": 296000 |
| }, |
| { |
| "epoch": 95.70135746606334, |
| "grad_norm": 1.1999725103378296, |
| "learning_rate": 0.001, |
| "loss": 0.4595, |
| "step": 296100 |
| }, |
| { |
| "epoch": 95.73367808661926, |
| "grad_norm": 1.2505929470062256, |
| "learning_rate": 0.001, |
| "loss": 0.4678, |
| "step": 296200 |
| }, |
| { |
| "epoch": 95.76599870717517, |
| "grad_norm": 1.3994829654693604, |
| "learning_rate": 0.001, |
| "loss": 0.4646, |
| "step": 296300 |
| }, |
| { |
| "epoch": 95.7983193277311, |
| "grad_norm": 1.6253557205200195, |
| "learning_rate": 0.001, |
| "loss": 0.4723, |
| "step": 296400 |
| }, |
| { |
| "epoch": 95.830639948287, |
| "grad_norm": 1.6554275751113892, |
| "learning_rate": 0.001, |
| "loss": 0.4714, |
| "step": 296500 |
| }, |
| { |
| "epoch": 95.86296056884292, |
| "grad_norm": 1.4614795446395874, |
| "learning_rate": 0.001, |
| "loss": 0.4793, |
| "step": 296600 |
| }, |
| { |
| "epoch": 95.89528118939883, |
| "grad_norm": 1.41798996925354, |
| "learning_rate": 0.001, |
| "loss": 0.464, |
| "step": 296700 |
| }, |
| { |
| "epoch": 95.92760180995475, |
| "grad_norm": 1.7458993196487427, |
| "learning_rate": 0.001, |
| "loss": 0.4661, |
| "step": 296800 |
| }, |
| { |
| "epoch": 95.95992243051066, |
| "grad_norm": 2.5169103145599365, |
| "learning_rate": 0.001, |
| "loss": 0.4737, |
| "step": 296900 |
| }, |
| { |
| "epoch": 95.99224305106658, |
| "grad_norm": 1.4173320531845093, |
| "learning_rate": 0.001, |
| "loss": 0.4853, |
| "step": 297000 |
| }, |
| { |
| "epoch": 96.0245636716225, |
| "grad_norm": 1.596639633178711, |
| "learning_rate": 0.001, |
| "loss": 0.4336, |
| "step": 297100 |
| }, |
| { |
| "epoch": 96.05688429217841, |
| "grad_norm": 1.4442991018295288, |
| "learning_rate": 0.001, |
| "loss": 0.4185, |
| "step": 297200 |
| }, |
| { |
| "epoch": 96.08920491273433, |
| "grad_norm": 1.3611177206039429, |
| "learning_rate": 0.001, |
| "loss": 0.4274, |
| "step": 297300 |
| }, |
| { |
| "epoch": 96.12152553329024, |
| "grad_norm": 1.1800782680511475, |
| "learning_rate": 0.001, |
| "loss": 0.4365, |
| "step": 297400 |
| }, |
| { |
| "epoch": 96.15384615384616, |
| "grad_norm": 1.8172197341918945, |
| "learning_rate": 0.001, |
| "loss": 0.4372, |
| "step": 297500 |
| }, |
| { |
| "epoch": 96.18616677440207, |
| "grad_norm": 1.4317479133605957, |
| "learning_rate": 0.001, |
| "loss": 0.4323, |
| "step": 297600 |
| }, |
| { |
| "epoch": 96.21848739495799, |
| "grad_norm": 1.2062143087387085, |
| "learning_rate": 0.001, |
| "loss": 0.4232, |
| "step": 297700 |
| }, |
| { |
| "epoch": 96.2508080155139, |
| "grad_norm": 1.6967133283615112, |
| "learning_rate": 0.001, |
| "loss": 0.4363, |
| "step": 297800 |
| }, |
| { |
| "epoch": 96.28312863606982, |
| "grad_norm": 1.3945610523223877, |
| "learning_rate": 0.001, |
| "loss": 0.4354, |
| "step": 297900 |
| }, |
| { |
| "epoch": 96.31544925662573, |
| "grad_norm": 1.4094641208648682, |
| "learning_rate": 0.001, |
| "loss": 0.4441, |
| "step": 298000 |
| }, |
| { |
| "epoch": 96.34776987718165, |
| "grad_norm": 1.2168558835983276, |
| "learning_rate": 0.001, |
| "loss": 0.4382, |
| "step": 298100 |
| }, |
| { |
| "epoch": 96.38009049773756, |
| "grad_norm": 1.9572267532348633, |
| "learning_rate": 0.001, |
| "loss": 0.4441, |
| "step": 298200 |
| }, |
| { |
| "epoch": 96.41241111829348, |
| "grad_norm": 1.3450571298599243, |
| "learning_rate": 0.001, |
| "loss": 0.439, |
| "step": 298300 |
| }, |
| { |
| "epoch": 96.44473173884938, |
| "grad_norm": 1.5765080451965332, |
| "learning_rate": 0.001, |
| "loss": 0.4468, |
| "step": 298400 |
| }, |
| { |
| "epoch": 96.4770523594053, |
| "grad_norm": 1.4704385995864868, |
| "learning_rate": 0.001, |
| "loss": 0.4472, |
| "step": 298500 |
| }, |
| { |
| "epoch": 96.50937297996121, |
| "grad_norm": 1.5044299364089966, |
| "learning_rate": 0.001, |
| "loss": 0.4453, |
| "step": 298600 |
| }, |
| { |
| "epoch": 96.54169360051714, |
| "grad_norm": 1.373509407043457, |
| "learning_rate": 0.001, |
| "loss": 0.4599, |
| "step": 298700 |
| }, |
| { |
| "epoch": 96.57401422107304, |
| "grad_norm": 1.180299162864685, |
| "learning_rate": 0.001, |
| "loss": 0.4476, |
| "step": 298800 |
| }, |
| { |
| "epoch": 96.60633484162896, |
| "grad_norm": 2.22157621383667, |
| "learning_rate": 0.001, |
| "loss": 0.451, |
| "step": 298900 |
| }, |
| { |
| "epoch": 96.63865546218487, |
| "grad_norm": 1.5506231784820557, |
| "learning_rate": 0.001, |
| "loss": 0.461, |
| "step": 299000 |
| }, |
| { |
| "epoch": 96.6709760827408, |
| "grad_norm": 1.7377060651779175, |
| "learning_rate": 0.001, |
| "loss": 0.4565, |
| "step": 299100 |
| }, |
| { |
| "epoch": 96.7032967032967, |
| "grad_norm": 1.1429818868637085, |
| "learning_rate": 0.001, |
| "loss": 0.4613, |
| "step": 299200 |
| }, |
| { |
| "epoch": 96.73561732385262, |
| "grad_norm": 1.4461809396743774, |
| "learning_rate": 0.001, |
| "loss": 0.4636, |
| "step": 299300 |
| }, |
| { |
| "epoch": 96.76793794440853, |
| "grad_norm": 1.6371991634368896, |
| "learning_rate": 0.001, |
| "loss": 0.4725, |
| "step": 299400 |
| }, |
| { |
| "epoch": 96.80025856496445, |
| "grad_norm": 2.10194993019104, |
| "learning_rate": 0.001, |
| "loss": 0.4548, |
| "step": 299500 |
| }, |
| { |
| "epoch": 96.83257918552036, |
| "grad_norm": 1.3943737745285034, |
| "learning_rate": 0.001, |
| "loss": 0.4732, |
| "step": 299600 |
| }, |
| { |
| "epoch": 96.86489980607628, |
| "grad_norm": 1.4672203063964844, |
| "learning_rate": 0.001, |
| "loss": 0.465, |
| "step": 299700 |
| }, |
| { |
| "epoch": 96.89722042663219, |
| "grad_norm": 1.6431360244750977, |
| "learning_rate": 0.001, |
| "loss": 0.4576, |
| "step": 299800 |
| }, |
| { |
| "epoch": 96.92954104718811, |
| "grad_norm": 1.731074333190918, |
| "learning_rate": 0.001, |
| "loss": 0.4686, |
| "step": 299900 |
| }, |
| { |
| "epoch": 96.96186166774402, |
| "grad_norm": 1.5479676723480225, |
| "learning_rate": 0.001, |
| "loss": 0.4716, |
| "step": 300000 |
| }, |
| { |
| "epoch": 96.99418228829994, |
| "grad_norm": 1.5700945854187012, |
| "learning_rate": 0.001, |
| "loss": 0.4677, |
| "step": 300100 |
| }, |
| { |
| "epoch": 97.02650290885585, |
| "grad_norm": 1.269909381866455, |
| "learning_rate": 0.001, |
| "loss": 0.4298, |
| "step": 300200 |
| }, |
| { |
| "epoch": 97.05882352941177, |
| "grad_norm": 1.5230191946029663, |
| "learning_rate": 0.001, |
| "loss": 0.4174, |
| "step": 300300 |
| }, |
| { |
| "epoch": 97.09114414996768, |
| "grad_norm": 1.2741731405258179, |
| "learning_rate": 0.001, |
| "loss": 0.4206, |
| "step": 300400 |
| }, |
| { |
| "epoch": 97.1234647705236, |
| "grad_norm": 1.440308690071106, |
| "learning_rate": 0.001, |
| "loss": 0.4147, |
| "step": 300500 |
| }, |
| { |
| "epoch": 97.1557853910795, |
| "grad_norm": 1.1169484853744507, |
| "learning_rate": 0.001, |
| "loss": 0.4224, |
| "step": 300600 |
| }, |
| { |
| "epoch": 97.18810601163543, |
| "grad_norm": 1.6060082912445068, |
| "learning_rate": 0.001, |
| "loss": 0.4322, |
| "step": 300700 |
| }, |
| { |
| "epoch": 97.22042663219133, |
| "grad_norm": 1.4612475633621216, |
| "learning_rate": 0.001, |
| "loss": 0.4353, |
| "step": 300800 |
| }, |
| { |
| "epoch": 97.25274725274726, |
| "grad_norm": 1.2991474866867065, |
| "learning_rate": 0.001, |
| "loss": 0.4241, |
| "step": 300900 |
| }, |
| { |
| "epoch": 97.28506787330316, |
| "grad_norm": 1.3116704225540161, |
| "learning_rate": 0.001, |
| "loss": 0.4366, |
| "step": 301000 |
| }, |
| { |
| "epoch": 97.31738849385908, |
| "grad_norm": 1.2389549016952515, |
| "learning_rate": 0.001, |
| "loss": 0.4384, |
| "step": 301100 |
| }, |
| { |
| "epoch": 97.34970911441499, |
| "grad_norm": 1.2728149890899658, |
| "learning_rate": 0.001, |
| "loss": 0.4372, |
| "step": 301200 |
| }, |
| { |
| "epoch": 97.38202973497091, |
| "grad_norm": 1.3004136085510254, |
| "learning_rate": 0.001, |
| "loss": 0.4373, |
| "step": 301300 |
| }, |
| { |
| "epoch": 97.41435035552682, |
| "grad_norm": 1.227472186088562, |
| "learning_rate": 0.001, |
| "loss": 0.4422, |
| "step": 301400 |
| }, |
| { |
| "epoch": 97.44667097608274, |
| "grad_norm": 1.1583950519561768, |
| "learning_rate": 0.001, |
| "loss": 0.4505, |
| "step": 301500 |
| }, |
| { |
| "epoch": 97.47899159663865, |
| "grad_norm": 1.5739110708236694, |
| "learning_rate": 0.001, |
| "loss": 0.4521, |
| "step": 301600 |
| }, |
| { |
| "epoch": 97.51131221719457, |
| "grad_norm": 1.3445355892181396, |
| "learning_rate": 0.001, |
| "loss": 0.4532, |
| "step": 301700 |
| }, |
| { |
| "epoch": 97.54363283775048, |
| "grad_norm": 1.2330920696258545, |
| "learning_rate": 0.001, |
| "loss": 0.4477, |
| "step": 301800 |
| }, |
| { |
| "epoch": 97.5759534583064, |
| "grad_norm": 1.5380816459655762, |
| "learning_rate": 0.001, |
| "loss": 0.4513, |
| "step": 301900 |
| }, |
| { |
| "epoch": 97.60827407886231, |
| "grad_norm": 1.2839477062225342, |
| "learning_rate": 0.001, |
| "loss": 0.4455, |
| "step": 302000 |
| }, |
| { |
| "epoch": 97.64059469941823, |
| "grad_norm": 1.4159269332885742, |
| "learning_rate": 0.001, |
| "loss": 0.4491, |
| "step": 302100 |
| }, |
| { |
| "epoch": 97.67291531997414, |
| "grad_norm": 1.401963472366333, |
| "learning_rate": 0.001, |
| "loss": 0.4503, |
| "step": 302200 |
| }, |
| { |
| "epoch": 97.70523594053006, |
| "grad_norm": 1.7864422798156738, |
| "learning_rate": 0.001, |
| "loss": 0.4675, |
| "step": 302300 |
| }, |
| { |
| "epoch": 97.73755656108597, |
| "grad_norm": 1.2578034400939941, |
| "learning_rate": 0.001, |
| "loss": 0.4583, |
| "step": 302400 |
| }, |
| { |
| "epoch": 97.76987718164189, |
| "grad_norm": 1.2712514400482178, |
| "learning_rate": 0.001, |
| "loss": 0.4646, |
| "step": 302500 |
| }, |
| { |
| "epoch": 97.8021978021978, |
| "grad_norm": 1.465354561805725, |
| "learning_rate": 0.001, |
| "loss": 0.4548, |
| "step": 302600 |
| }, |
| { |
| "epoch": 97.83451842275372, |
| "grad_norm": 1.1005356311798096, |
| "learning_rate": 0.001, |
| "loss": 0.4517, |
| "step": 302700 |
| }, |
| { |
| "epoch": 97.86683904330962, |
| "grad_norm": 1.7632066011428833, |
| "learning_rate": 0.001, |
| "loss": 0.4521, |
| "step": 302800 |
| }, |
| { |
| "epoch": 97.89915966386555, |
| "grad_norm": 1.5967885255813599, |
| "learning_rate": 0.001, |
| "loss": 0.473, |
| "step": 302900 |
| }, |
| { |
| "epoch": 97.93148028442145, |
| "grad_norm": 1.3136255741119385, |
| "learning_rate": 0.001, |
| "loss": 0.4713, |
| "step": 303000 |
| }, |
| { |
| "epoch": 97.96380090497738, |
| "grad_norm": 1.0595479011535645, |
| "learning_rate": 0.001, |
| "loss": 0.4598, |
| "step": 303100 |
| }, |
| { |
| "epoch": 97.99612152553328, |
| "grad_norm": 1.0992523431777954, |
| "learning_rate": 0.001, |
| "loss": 0.464, |
| "step": 303200 |
| }, |
| { |
| "epoch": 98.0284421460892, |
| "grad_norm": 1.4200688600540161, |
| "learning_rate": 0.001, |
| "loss": 0.4209, |
| "step": 303300 |
| }, |
| { |
| "epoch": 98.06076276664513, |
| "grad_norm": 1.107372760772705, |
| "learning_rate": 0.001, |
| "loss": 0.418, |
| "step": 303400 |
| }, |
| { |
| "epoch": 98.09308338720103, |
| "grad_norm": 0.9656426310539246, |
| "learning_rate": 0.001, |
| "loss": 0.4113, |
| "step": 303500 |
| }, |
| { |
| "epoch": 98.12540400775696, |
| "grad_norm": 0.9549182057380676, |
| "learning_rate": 0.001, |
| "loss": 0.4301, |
| "step": 303600 |
| }, |
| { |
| "epoch": 98.15772462831286, |
| "grad_norm": 1.4307749271392822, |
| "learning_rate": 0.001, |
| "loss": 0.4327, |
| "step": 303700 |
| }, |
| { |
| "epoch": 98.19004524886878, |
| "grad_norm": 1.2919740676879883, |
| "learning_rate": 0.001, |
| "loss": 0.4276, |
| "step": 303800 |
| }, |
| { |
| "epoch": 98.22236586942469, |
| "grad_norm": 1.20173978805542, |
| "learning_rate": 0.001, |
| "loss": 0.4228, |
| "step": 303900 |
| }, |
| { |
| "epoch": 98.25468648998061, |
| "grad_norm": 1.2167638540267944, |
| "learning_rate": 0.001, |
| "loss": 0.4178, |
| "step": 304000 |
| }, |
| { |
| "epoch": 98.28700711053652, |
| "grad_norm": 1.0864397287368774, |
| "learning_rate": 0.001, |
| "loss": 0.4273, |
| "step": 304100 |
| }, |
| { |
| "epoch": 98.31932773109244, |
| "grad_norm": 1.4295344352722168, |
| "learning_rate": 0.001, |
| "loss": 0.4379, |
| "step": 304200 |
| }, |
| { |
| "epoch": 98.35164835164835, |
| "grad_norm": 1.2204785346984863, |
| "learning_rate": 0.001, |
| "loss": 0.4396, |
| "step": 304300 |
| }, |
| { |
| "epoch": 98.38396897220427, |
| "grad_norm": 1.4378937482833862, |
| "learning_rate": 0.001, |
| "loss": 0.4384, |
| "step": 304400 |
| }, |
| { |
| "epoch": 98.41628959276018, |
| "grad_norm": 1.0496668815612793, |
| "learning_rate": 0.001, |
| "loss": 0.4438, |
| "step": 304500 |
| }, |
| { |
| "epoch": 98.4486102133161, |
| "grad_norm": 1.357386827468872, |
| "learning_rate": 0.001, |
| "loss": 0.4592, |
| "step": 304600 |
| }, |
| { |
| "epoch": 98.48093083387201, |
| "grad_norm": 1.6617597341537476, |
| "learning_rate": 0.001, |
| "loss": 0.4409, |
| "step": 304700 |
| }, |
| { |
| "epoch": 98.51325145442793, |
| "grad_norm": 1.351397156715393, |
| "learning_rate": 0.001, |
| "loss": 0.4471, |
| "step": 304800 |
| }, |
| { |
| "epoch": 98.54557207498384, |
| "grad_norm": 1.240196704864502, |
| "learning_rate": 0.001, |
| "loss": 0.4375, |
| "step": 304900 |
| }, |
| { |
| "epoch": 98.57789269553976, |
| "grad_norm": 1.221144437789917, |
| "learning_rate": 0.001, |
| "loss": 0.4476, |
| "step": 305000 |
| }, |
| { |
| "epoch": 98.61021331609567, |
| "grad_norm": 1.5508735179901123, |
| "learning_rate": 0.001, |
| "loss": 0.4431, |
| "step": 305100 |
| }, |
| { |
| "epoch": 98.64253393665159, |
| "grad_norm": 1.0978624820709229, |
| "learning_rate": 0.001, |
| "loss": 0.4461, |
| "step": 305200 |
| }, |
| { |
| "epoch": 98.6748545572075, |
| "grad_norm": 1.2614808082580566, |
| "learning_rate": 0.001, |
| "loss": 0.4397, |
| "step": 305300 |
| }, |
| { |
| "epoch": 98.70717517776342, |
| "grad_norm": 1.1213574409484863, |
| "learning_rate": 0.001, |
| "loss": 0.439, |
| "step": 305400 |
| }, |
| { |
| "epoch": 98.73949579831933, |
| "grad_norm": 1.5465046167373657, |
| "learning_rate": 0.001, |
| "loss": 0.4592, |
| "step": 305500 |
| }, |
| { |
| "epoch": 98.77181641887525, |
| "grad_norm": 1.1399887800216675, |
| "learning_rate": 0.001, |
| "loss": 0.4663, |
| "step": 305600 |
| }, |
| { |
| "epoch": 98.80413703943115, |
| "grad_norm": 1.1044340133666992, |
| "learning_rate": 0.001, |
| "loss": 0.4492, |
| "step": 305700 |
| }, |
| { |
| "epoch": 98.83645765998708, |
| "grad_norm": 1.1779178380966187, |
| "learning_rate": 0.001, |
| "loss": 0.4516, |
| "step": 305800 |
| }, |
| { |
| "epoch": 98.86877828054298, |
| "grad_norm": 1.3684269189834595, |
| "learning_rate": 0.001, |
| "loss": 0.4532, |
| "step": 305900 |
| }, |
| { |
| "epoch": 98.9010989010989, |
| "grad_norm": 1.3492311239242554, |
| "learning_rate": 0.001, |
| "loss": 0.4511, |
| "step": 306000 |
| }, |
| { |
| "epoch": 98.93341952165481, |
| "grad_norm": 1.1889764070510864, |
| "learning_rate": 0.001, |
| "loss": 0.4664, |
| "step": 306100 |
| }, |
| { |
| "epoch": 98.96574014221073, |
| "grad_norm": 0.8841199278831482, |
| "learning_rate": 0.001, |
| "loss": 0.4649, |
| "step": 306200 |
| }, |
| { |
| "epoch": 98.99806076276664, |
| "grad_norm": 0.9290673136711121, |
| "learning_rate": 0.001, |
| "loss": 0.4531, |
| "step": 306300 |
| }, |
| { |
| "epoch": 99.03038138332256, |
| "grad_norm": 1.022675633430481, |
| "learning_rate": 0.001, |
| "loss": 0.42, |
| "step": 306400 |
| }, |
| { |
| "epoch": 99.06270200387847, |
| "grad_norm": 1.138304352760315, |
| "learning_rate": 0.001, |
| "loss": 0.4125, |
| "step": 306500 |
| }, |
| { |
| "epoch": 99.09502262443439, |
| "grad_norm": 1.1132817268371582, |
| "learning_rate": 0.001, |
| "loss": 0.4205, |
| "step": 306600 |
| }, |
| { |
| "epoch": 99.1273432449903, |
| "grad_norm": 1.1500136852264404, |
| "learning_rate": 0.001, |
| "loss": 0.4136, |
| "step": 306700 |
| }, |
| { |
| "epoch": 99.15966386554622, |
| "grad_norm": 1.1773744821548462, |
| "learning_rate": 0.001, |
| "loss": 0.4212, |
| "step": 306800 |
| }, |
| { |
| "epoch": 99.19198448610213, |
| "grad_norm": 1.4039117097854614, |
| "learning_rate": 0.001, |
| "loss": 0.4338, |
| "step": 306900 |
| }, |
| { |
| "epoch": 99.22430510665805, |
| "grad_norm": 1.0985853672027588, |
| "learning_rate": 0.001, |
| "loss": 0.4278, |
| "step": 307000 |
| }, |
| { |
| "epoch": 99.25662572721396, |
| "grad_norm": 0.7000985741615295, |
| "learning_rate": 0.001, |
| "loss": 0.424, |
| "step": 307100 |
| }, |
| { |
| "epoch": 99.28894634776988, |
| "grad_norm": 1.5359601974487305, |
| "learning_rate": 0.001, |
| "loss": 0.4353, |
| "step": 307200 |
| }, |
| { |
| "epoch": 99.32126696832579, |
| "grad_norm": 0.8994166254997253, |
| "learning_rate": 0.001, |
| "loss": 0.4382, |
| "step": 307300 |
| }, |
| { |
| "epoch": 99.35358758888171, |
| "grad_norm": 0.724768340587616, |
| "learning_rate": 0.001, |
| "loss": 0.4355, |
| "step": 307400 |
| }, |
| { |
| "epoch": 99.38590820943762, |
| "grad_norm": 0.8465492129325867, |
| "learning_rate": 0.001, |
| "loss": 0.444, |
| "step": 307500 |
| }, |
| { |
| "epoch": 99.41822882999354, |
| "grad_norm": 1.830216884613037, |
| "learning_rate": 0.001, |
| "loss": 0.4336, |
| "step": 307600 |
| }, |
| { |
| "epoch": 99.45054945054945, |
| "grad_norm": 1.0542271137237549, |
| "learning_rate": 0.001, |
| "loss": 0.4336, |
| "step": 307700 |
| }, |
| { |
| "epoch": 99.48287007110537, |
| "grad_norm": 0.8846477270126343, |
| "learning_rate": 0.001, |
| "loss": 0.4516, |
| "step": 307800 |
| }, |
| { |
| "epoch": 99.51519069166127, |
| "grad_norm": 0.8210715055465698, |
| "learning_rate": 0.001, |
| "loss": 0.4412, |
| "step": 307900 |
| }, |
| { |
| "epoch": 99.5475113122172, |
| "grad_norm": 0.9523438215255737, |
| "learning_rate": 0.001, |
| "loss": 0.4341, |
| "step": 308000 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 309400, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 100, |
| "save_steps": 2000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.553808005867438e+17, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|