| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 99.5475113122172, |
| "eval_steps": 20000, |
| "global_step": 308000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.03232062055591468, |
| "grad_norm": 9.332721710205078, |
| "learning_rate": 9.900000000000002e-06, |
| "loss": 4.2426, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.06464124111182935, |
| "grad_norm": 8.882152557373047, |
| "learning_rate": 1.9900000000000003e-05, |
| "loss": 4.0765, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.09696186166774402, |
| "grad_norm": 15.724532127380371, |
| "learning_rate": 2.9900000000000002e-05, |
| "loss": 4.0268, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.1292824822236587, |
| "grad_norm": 7.888890266418457, |
| "learning_rate": 3.99e-05, |
| "loss": 3.9902, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.16160310277957338, |
| "grad_norm": 10.950591087341309, |
| "learning_rate": 4.99e-05, |
| "loss": 3.9292, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.19392372333548805, |
| "grad_norm": 16.338972091674805, |
| "learning_rate": 5.9900000000000006e-05, |
| "loss": 3.8515, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.22624434389140272, |
| "grad_norm": 5.990420818328857, |
| "learning_rate": 6.99e-05, |
| "loss": 3.8284, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.2585649644473174, |
| "grad_norm": 9.774703025817871, |
| "learning_rate": 7.99e-05, |
| "loss": 3.7876, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.2908855850032321, |
| "grad_norm": 7.742414951324463, |
| "learning_rate": 8.989999999999999e-05, |
| "loss": 3.7472, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.32320620555914675, |
| "grad_norm": 4.4023284912109375, |
| "learning_rate": 9.99e-05, |
| "loss": 3.7163, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.3555268261150614, |
| "grad_norm": 4.680882453918457, |
| "learning_rate": 0.0001099, |
| "loss": 3.6804, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.3878474466709761, |
| "grad_norm": 4.001336574554443, |
| "learning_rate": 0.00011990000000000001, |
| "loss": 3.6462, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.42016806722689076, |
| "grad_norm": 5.962468147277832, |
| "learning_rate": 0.00012989999999999999, |
| "loss": 3.6611, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.45248868778280543, |
| "grad_norm": 3.200063467025757, |
| "learning_rate": 0.0001399, |
| "loss": 3.6355, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.4848093083387201, |
| "grad_norm": 4.732622146606445, |
| "learning_rate": 0.0001499, |
| "loss": 3.6127, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.5171299288946348, |
| "grad_norm": 3.1570827960968018, |
| "learning_rate": 0.00015989999999999998, |
| "loss": 3.6142, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.5494505494505495, |
| "grad_norm": 4.7870073318481445, |
| "learning_rate": 0.0001699, |
| "loss": 3.5934, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.5817711700064642, |
| "grad_norm": 3.6958024501800537, |
| "learning_rate": 0.0001799, |
| "loss": 3.5961, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.6140917905623788, |
| "grad_norm": 3.7356350421905518, |
| "learning_rate": 0.0001899, |
| "loss": 3.6097, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.6464124111182935, |
| "grad_norm": 3.8976237773895264, |
| "learning_rate": 0.0001999, |
| "loss": 3.5555, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.6787330316742082, |
| "grad_norm": 3.5063016414642334, |
| "learning_rate": 0.0002099, |
| "loss": 3.5528, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.7110536522301228, |
| "grad_norm": 3.2186264991760254, |
| "learning_rate": 0.0002199, |
| "loss": 3.5596, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.7433742727860375, |
| "grad_norm": 3.2118473052978516, |
| "learning_rate": 0.0002299, |
| "loss": 3.5283, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.7756948933419522, |
| "grad_norm": 3.3671634197235107, |
| "learning_rate": 0.0002399, |
| "loss": 3.5257, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.8080155138978669, |
| "grad_norm": 4.391268730163574, |
| "learning_rate": 0.0002499, |
| "loss": 3.5231, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.8403361344537815, |
| "grad_norm": 2.7105960845947266, |
| "learning_rate": 0.00025990000000000003, |
| "loss": 3.522, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.8726567550096962, |
| "grad_norm": 3.373960494995117, |
| "learning_rate": 0.0002699, |
| "loss": 3.5354, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.9049773755656109, |
| "grad_norm": 2.757404088973999, |
| "learning_rate": 0.0002799, |
| "loss": 3.5267, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.9372979961215255, |
| "grad_norm": 4.519193649291992, |
| "learning_rate": 0.0002899, |
| "loss": 3.501, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.9696186166774402, |
| "grad_norm": 4.307316780090332, |
| "learning_rate": 0.0002999, |
| "loss": 3.4861, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.0019392372333549, |
| "grad_norm": 3.6178064346313477, |
| "learning_rate": 0.0003099, |
| "loss": 3.4992, |
| "step": 3100 |
| }, |
| { |
| "epoch": 1.0342598577892697, |
| "grad_norm": 1.7168558835983276, |
| "learning_rate": 0.0003199, |
| "loss": 3.4419, |
| "step": 3200 |
| }, |
| { |
| "epoch": 1.0665804783451842, |
| "grad_norm": 1.5993854999542236, |
| "learning_rate": 0.00032990000000000005, |
| "loss": 3.4434, |
| "step": 3300 |
| }, |
| { |
| "epoch": 1.098901098901099, |
| "grad_norm": 1.2065600156784058, |
| "learning_rate": 0.00033989999999999997, |
| "loss": 3.4292, |
| "step": 3400 |
| }, |
| { |
| "epoch": 1.1312217194570136, |
| "grad_norm": 1.1300657987594604, |
| "learning_rate": 0.0003499, |
| "loss": 3.4418, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.1635423400129283, |
| "grad_norm": 2.4604320526123047, |
| "learning_rate": 0.0003599, |
| "loss": 3.4454, |
| "step": 3600 |
| }, |
| { |
| "epoch": 1.195862960568843, |
| "grad_norm": 5.035538196563721, |
| "learning_rate": 0.0003699, |
| "loss": 3.4292, |
| "step": 3700 |
| }, |
| { |
| "epoch": 1.2281835811247577, |
| "grad_norm": 1.4227688312530518, |
| "learning_rate": 0.0003799, |
| "loss": 3.4257, |
| "step": 3800 |
| }, |
| { |
| "epoch": 1.2605042016806722, |
| "grad_norm": 1.5160913467407227, |
| "learning_rate": 0.00038990000000000004, |
| "loss": 3.4242, |
| "step": 3900 |
| }, |
| { |
| "epoch": 1.292824822236587, |
| "grad_norm": 1.1818920373916626, |
| "learning_rate": 0.00039989999999999996, |
| "loss": 3.4004, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.3251454427925016, |
| "grad_norm": 1.411624550819397, |
| "learning_rate": 0.0004099, |
| "loss": 3.4166, |
| "step": 4100 |
| }, |
| { |
| "epoch": 1.3574660633484164, |
| "grad_norm": 1.5991268157958984, |
| "learning_rate": 0.0004199, |
| "loss": 3.4149, |
| "step": 4200 |
| }, |
| { |
| "epoch": 1.389786683904331, |
| "grad_norm": 1.228127360343933, |
| "learning_rate": 0.0004299, |
| "loss": 3.4229, |
| "step": 4300 |
| }, |
| { |
| "epoch": 1.4221073044602457, |
| "grad_norm": 7.947666645050049, |
| "learning_rate": 0.0004399, |
| "loss": 3.4058, |
| "step": 4400 |
| }, |
| { |
| "epoch": 1.4544279250161603, |
| "grad_norm": 1.4560375213623047, |
| "learning_rate": 0.00044990000000000004, |
| "loss": 3.3876, |
| "step": 4500 |
| }, |
| { |
| "epoch": 1.486748545572075, |
| "grad_norm": 1.2084722518920898, |
| "learning_rate": 0.0004599, |
| "loss": 3.3963, |
| "step": 4600 |
| }, |
| { |
| "epoch": 1.5190691661279896, |
| "grad_norm": 1.5284830331802368, |
| "learning_rate": 0.0004699, |
| "loss": 3.4088, |
| "step": 4700 |
| }, |
| { |
| "epoch": 1.5513897866839044, |
| "grad_norm": 1.1983979940414429, |
| "learning_rate": 0.0004799, |
| "loss": 3.4061, |
| "step": 4800 |
| }, |
| { |
| "epoch": 1.5837104072398192, |
| "grad_norm": 1.314408540725708, |
| "learning_rate": 0.0004899, |
| "loss": 3.3987, |
| "step": 4900 |
| }, |
| { |
| "epoch": 1.6160310277957337, |
| "grad_norm": 1.5176293849945068, |
| "learning_rate": 0.0004999000000000001, |
| "loss": 3.366, |
| "step": 5000 |
| }, |
| { |
| "epoch": 1.6483516483516483, |
| "grad_norm": 1.500085711479187, |
| "learning_rate": 0.0005099, |
| "loss": 3.3984, |
| "step": 5100 |
| }, |
| { |
| "epoch": 1.680672268907563, |
| "grad_norm": 1.016550898551941, |
| "learning_rate": 0.0005199, |
| "loss": 3.3626, |
| "step": 5200 |
| }, |
| { |
| "epoch": 1.7129928894634778, |
| "grad_norm": 13.165894508361816, |
| "learning_rate": 0.0005299, |
| "loss": 3.3609, |
| "step": 5300 |
| }, |
| { |
| "epoch": 1.7453135100193924, |
| "grad_norm": 1.186579942703247, |
| "learning_rate": 0.0005399000000000001, |
| "loss": 3.3672, |
| "step": 5400 |
| }, |
| { |
| "epoch": 1.777634130575307, |
| "grad_norm": 1.2896537780761719, |
| "learning_rate": 0.0005499000000000001, |
| "loss": 3.3825, |
| "step": 5500 |
| }, |
| { |
| "epoch": 1.8099547511312217, |
| "grad_norm": 1.2675527334213257, |
| "learning_rate": 0.0005599, |
| "loss": 3.3698, |
| "step": 5600 |
| }, |
| { |
| "epoch": 1.8422753716871365, |
| "grad_norm": 1.1798584461212158, |
| "learning_rate": 0.0005698999999999999, |
| "loss": 3.3624, |
| "step": 5700 |
| }, |
| { |
| "epoch": 1.874595992243051, |
| "grad_norm": 0.8817252516746521, |
| "learning_rate": 0.0005799, |
| "loss": 3.3503, |
| "step": 5800 |
| }, |
| { |
| "epoch": 1.9069166127989656, |
| "grad_norm": 1.2770187854766846, |
| "learning_rate": 0.0005899, |
| "loss": 3.3655, |
| "step": 5900 |
| }, |
| { |
| "epoch": 1.9392372333548804, |
| "grad_norm": 1.062826156616211, |
| "learning_rate": 0.0005999, |
| "loss": 3.3595, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.9715578539107952, |
| "grad_norm": 0.97618567943573, |
| "learning_rate": 0.0006099, |
| "loss": 3.365, |
| "step": 6100 |
| }, |
| { |
| "epoch": 2.0038784744667097, |
| "grad_norm": 0.8138112425804138, |
| "learning_rate": 0.0006199, |
| "loss": 3.3674, |
| "step": 6200 |
| }, |
| { |
| "epoch": 2.0361990950226243, |
| "grad_norm": 0.8098726272583008, |
| "learning_rate": 0.0006299000000000001, |
| "loss": 3.255, |
| "step": 6300 |
| }, |
| { |
| "epoch": 2.0685197155785393, |
| "grad_norm": 1.1858711242675781, |
| "learning_rate": 0.0006399, |
| "loss": 3.2813, |
| "step": 6400 |
| }, |
| { |
| "epoch": 2.100840336134454, |
| "grad_norm": 0.9740011692047119, |
| "learning_rate": 0.0006499, |
| "loss": 3.2799, |
| "step": 6500 |
| }, |
| { |
| "epoch": 2.1331609566903684, |
| "grad_norm": 1.2355788946151733, |
| "learning_rate": 0.0006599, |
| "loss": 3.2678, |
| "step": 6600 |
| }, |
| { |
| "epoch": 2.165481577246283, |
| "grad_norm": 5.133415222167969, |
| "learning_rate": 0.0006699000000000001, |
| "loss": 3.2945, |
| "step": 6700 |
| }, |
| { |
| "epoch": 2.197802197802198, |
| "grad_norm": 1.0777193307876587, |
| "learning_rate": 0.0006799, |
| "loss": 3.3022, |
| "step": 6800 |
| }, |
| { |
| "epoch": 2.2301228183581125, |
| "grad_norm": 0.9968545436859131, |
| "learning_rate": 0.0006899, |
| "loss": 3.2852, |
| "step": 6900 |
| }, |
| { |
| "epoch": 2.262443438914027, |
| "grad_norm": 1.0664645433425903, |
| "learning_rate": 0.0006998999999999999, |
| "loss": 3.2945, |
| "step": 7000 |
| }, |
| { |
| "epoch": 2.2947640594699417, |
| "grad_norm": 0.9292928576469421, |
| "learning_rate": 0.0007099, |
| "loss": 3.2809, |
| "step": 7100 |
| }, |
| { |
| "epoch": 2.3270846800258567, |
| "grad_norm": 0.9592123627662659, |
| "learning_rate": 0.0007199, |
| "loss": 3.2944, |
| "step": 7200 |
| }, |
| { |
| "epoch": 2.3594053005817712, |
| "grad_norm": 1.028623342514038, |
| "learning_rate": 0.0007299, |
| "loss": 3.2946, |
| "step": 7300 |
| }, |
| { |
| "epoch": 2.391725921137686, |
| "grad_norm": 4.865314483642578, |
| "learning_rate": 0.0007399, |
| "loss": 3.296, |
| "step": 7400 |
| }, |
| { |
| "epoch": 2.4240465416936003, |
| "grad_norm": 1.256349802017212, |
| "learning_rate": 0.0007499000000000001, |
| "loss": 3.2976, |
| "step": 7500 |
| }, |
| { |
| "epoch": 2.4563671622495153, |
| "grad_norm": 1.0199131965637207, |
| "learning_rate": 0.0007599, |
| "loss": 3.2802, |
| "step": 7600 |
| }, |
| { |
| "epoch": 2.48868778280543, |
| "grad_norm": 1.946007251739502, |
| "learning_rate": 0.0007699, |
| "loss": 3.2834, |
| "step": 7700 |
| }, |
| { |
| "epoch": 2.5210084033613445, |
| "grad_norm": 0.9734399914741516, |
| "learning_rate": 0.0007799, |
| "loss": 3.2793, |
| "step": 7800 |
| }, |
| { |
| "epoch": 2.553329023917259, |
| "grad_norm": 0.9436636567115784, |
| "learning_rate": 0.0007899000000000001, |
| "loss": 3.29, |
| "step": 7900 |
| }, |
| { |
| "epoch": 2.585649644473174, |
| "grad_norm": 0.9262025952339172, |
| "learning_rate": 0.0007999000000000001, |
| "loss": 3.2756, |
| "step": 8000 |
| }, |
| { |
| "epoch": 2.6179702650290886, |
| "grad_norm": 1.195101022720337, |
| "learning_rate": 0.0008099, |
| "loss": 3.2799, |
| "step": 8100 |
| }, |
| { |
| "epoch": 2.650290885585003, |
| "grad_norm": 0.9717804193496704, |
| "learning_rate": 0.0008198999999999999, |
| "loss": 3.2714, |
| "step": 8200 |
| }, |
| { |
| "epoch": 2.682611506140918, |
| "grad_norm": 1.1211719512939453, |
| "learning_rate": 0.0008299, |
| "loss": 3.287, |
| "step": 8300 |
| }, |
| { |
| "epoch": 2.7149321266968327, |
| "grad_norm": 1.057012915611267, |
| "learning_rate": 0.0008399, |
| "loss": 3.2752, |
| "step": 8400 |
| }, |
| { |
| "epoch": 2.7472527472527473, |
| "grad_norm": 1.0968471765518188, |
| "learning_rate": 0.0008499, |
| "loss": 3.2719, |
| "step": 8500 |
| }, |
| { |
| "epoch": 2.779573367808662, |
| "grad_norm": 1.0198901891708374, |
| "learning_rate": 0.0008599, |
| "loss": 3.2522, |
| "step": 8600 |
| }, |
| { |
| "epoch": 2.8118939883645764, |
| "grad_norm": 1.330259919166565, |
| "learning_rate": 0.0008699000000000001, |
| "loss": 3.2589, |
| "step": 8700 |
| }, |
| { |
| "epoch": 2.8442146089204914, |
| "grad_norm": 0.8509685397148132, |
| "learning_rate": 0.0008799000000000001, |
| "loss": 3.2736, |
| "step": 8800 |
| }, |
| { |
| "epoch": 2.876535229476406, |
| "grad_norm": 1.174782633781433, |
| "learning_rate": 0.0008899, |
| "loss": 3.2597, |
| "step": 8900 |
| }, |
| { |
| "epoch": 2.9088558500323205, |
| "grad_norm": 1.1556833982467651, |
| "learning_rate": 0.0008999, |
| "loss": 3.2822, |
| "step": 9000 |
| }, |
| { |
| "epoch": 2.9411764705882355, |
| "grad_norm": 1.1285648345947266, |
| "learning_rate": 0.0009099, |
| "loss": 3.2897, |
| "step": 9100 |
| }, |
| { |
| "epoch": 2.97349709114415, |
| "grad_norm": 0.9292157292366028, |
| "learning_rate": 0.0009199000000000001, |
| "loss": 3.2389, |
| "step": 9200 |
| }, |
| { |
| "epoch": 3.0058177117000646, |
| "grad_norm": 0.9960983991622925, |
| "learning_rate": 0.0009299, |
| "loss": 3.2608, |
| "step": 9300 |
| }, |
| { |
| "epoch": 3.038138332255979, |
| "grad_norm": 1.0418298244476318, |
| "learning_rate": 0.0009399, |
| "loss": 3.1552, |
| "step": 9400 |
| }, |
| { |
| "epoch": 3.070458952811894, |
| "grad_norm": 0.8377931714057922, |
| "learning_rate": 0.0009498999999999999, |
| "loss": 3.1847, |
| "step": 9500 |
| }, |
| { |
| "epoch": 3.1027795733678087, |
| "grad_norm": 1.1886683702468872, |
| "learning_rate": 0.0009599, |
| "loss": 3.1789, |
| "step": 9600 |
| }, |
| { |
| "epoch": 3.1351001939237233, |
| "grad_norm": 0.9381577968597412, |
| "learning_rate": 0.0009699, |
| "loss": 3.166, |
| "step": 9700 |
| }, |
| { |
| "epoch": 3.167420814479638, |
| "grad_norm": 0.9787984490394592, |
| "learning_rate": 0.0009799, |
| "loss": 3.1831, |
| "step": 9800 |
| }, |
| { |
| "epoch": 3.199741435035553, |
| "grad_norm": 2.247471332550049, |
| "learning_rate": 0.0009899, |
| "loss": 3.2012, |
| "step": 9900 |
| }, |
| { |
| "epoch": 3.2320620555914674, |
| "grad_norm": 0.855204701423645, |
| "learning_rate": 0.0009999, |
| "loss": 3.1842, |
| "step": 10000 |
| }, |
| { |
| "epoch": 3.264382676147382, |
| "grad_norm": 1.1390490531921387, |
| "learning_rate": 0.001, |
| "loss": 3.1923, |
| "step": 10100 |
| }, |
| { |
| "epoch": 3.2967032967032965, |
| "grad_norm": 1.1109646558761597, |
| "learning_rate": 0.001, |
| "loss": 3.192, |
| "step": 10200 |
| }, |
| { |
| "epoch": 3.3290239172592115, |
| "grad_norm": 1.2080135345458984, |
| "learning_rate": 0.001, |
| "loss": 3.1771, |
| "step": 10300 |
| }, |
| { |
| "epoch": 3.361344537815126, |
| "grad_norm": 1.106696367263794, |
| "learning_rate": 0.001, |
| "loss": 3.1865, |
| "step": 10400 |
| }, |
| { |
| "epoch": 3.3936651583710407, |
| "grad_norm": 1.0035741329193115, |
| "learning_rate": 0.001, |
| "loss": 3.1756, |
| "step": 10500 |
| }, |
| { |
| "epoch": 3.425985778926955, |
| "grad_norm": 1.0501046180725098, |
| "learning_rate": 0.001, |
| "loss": 3.1716, |
| "step": 10600 |
| }, |
| { |
| "epoch": 3.45830639948287, |
| "grad_norm": 0.8912081122398376, |
| "learning_rate": 0.001, |
| "loss": 3.1657, |
| "step": 10700 |
| }, |
| { |
| "epoch": 3.490627020038785, |
| "grad_norm": 1.302748680114746, |
| "learning_rate": 0.001, |
| "loss": 3.1769, |
| "step": 10800 |
| }, |
| { |
| "epoch": 3.5229476405946993, |
| "grad_norm": 0.816489577293396, |
| "learning_rate": 0.001, |
| "loss": 3.201, |
| "step": 10900 |
| }, |
| { |
| "epoch": 3.555268261150614, |
| "grad_norm": 1.2402598857879639, |
| "learning_rate": 0.001, |
| "loss": 3.1804, |
| "step": 11000 |
| }, |
| { |
| "epoch": 3.587588881706529, |
| "grad_norm": 1.3531599044799805, |
| "learning_rate": 0.001, |
| "loss": 3.1921, |
| "step": 11100 |
| }, |
| { |
| "epoch": 3.6199095022624435, |
| "grad_norm": 2.2036519050598145, |
| "learning_rate": 0.001, |
| "loss": 3.1774, |
| "step": 11200 |
| }, |
| { |
| "epoch": 3.652230122818358, |
| "grad_norm": 1.7961952686309814, |
| "learning_rate": 0.001, |
| "loss": 3.1717, |
| "step": 11300 |
| }, |
| { |
| "epoch": 3.684550743374273, |
| "grad_norm": 1.176538348197937, |
| "learning_rate": 0.001, |
| "loss": 3.171, |
| "step": 11400 |
| }, |
| { |
| "epoch": 3.7168713639301876, |
| "grad_norm": 1.1532666683197021, |
| "learning_rate": 0.001, |
| "loss": 3.1913, |
| "step": 11500 |
| }, |
| { |
| "epoch": 3.749191984486102, |
| "grad_norm": 0.9709277749061584, |
| "learning_rate": 0.001, |
| "loss": 3.1926, |
| "step": 11600 |
| }, |
| { |
| "epoch": 3.7815126050420167, |
| "grad_norm": 1.0044294595718384, |
| "learning_rate": 0.001, |
| "loss": 3.19, |
| "step": 11700 |
| }, |
| { |
| "epoch": 3.8138332255979313, |
| "grad_norm": 0.8759526610374451, |
| "learning_rate": 0.001, |
| "loss": 3.1719, |
| "step": 11800 |
| }, |
| { |
| "epoch": 3.8461538461538463, |
| "grad_norm": 0.9571463465690613, |
| "learning_rate": 0.001, |
| "loss": 3.1798, |
| "step": 11900 |
| }, |
| { |
| "epoch": 3.878474466709761, |
| "grad_norm": 1.0519996881484985, |
| "learning_rate": 0.001, |
| "loss": 3.1891, |
| "step": 12000 |
| }, |
| { |
| "epoch": 3.9107950872656754, |
| "grad_norm": 1.0874314308166504, |
| "learning_rate": 0.001, |
| "loss": 3.177, |
| "step": 12100 |
| }, |
| { |
| "epoch": 3.9431157078215904, |
| "grad_norm": 1.134121298789978, |
| "learning_rate": 0.001, |
| "loss": 3.1724, |
| "step": 12200 |
| }, |
| { |
| "epoch": 3.975436328377505, |
| "grad_norm": 1.093509554862976, |
| "learning_rate": 0.001, |
| "loss": 3.1751, |
| "step": 12300 |
| }, |
| { |
| "epoch": 4.0077569489334195, |
| "grad_norm": 0.838979184627533, |
| "learning_rate": 0.001, |
| "loss": 3.1506, |
| "step": 12400 |
| }, |
| { |
| "epoch": 4.040077569489334, |
| "grad_norm": 0.9417329430580139, |
| "learning_rate": 0.001, |
| "loss": 3.0295, |
| "step": 12500 |
| }, |
| { |
| "epoch": 4.072398190045249, |
| "grad_norm": 0.941433310508728, |
| "learning_rate": 0.001, |
| "loss": 3.0514, |
| "step": 12600 |
| }, |
| { |
| "epoch": 4.104718810601163, |
| "grad_norm": 1.1772059202194214, |
| "learning_rate": 0.001, |
| "loss": 3.0639, |
| "step": 12700 |
| }, |
| { |
| "epoch": 4.137039431157079, |
| "grad_norm": 0.9115270376205444, |
| "learning_rate": 0.001, |
| "loss": 3.0563, |
| "step": 12800 |
| }, |
| { |
| "epoch": 4.169360051712993, |
| "grad_norm": 1.0796817541122437, |
| "learning_rate": 0.001, |
| "loss": 3.07, |
| "step": 12900 |
| }, |
| { |
| "epoch": 4.201680672268908, |
| "grad_norm": 0.9457636475563049, |
| "learning_rate": 0.001, |
| "loss": 3.0671, |
| "step": 13000 |
| }, |
| { |
| "epoch": 4.234001292824822, |
| "grad_norm": 1.6689468622207642, |
| "learning_rate": 0.001, |
| "loss": 3.0806, |
| "step": 13100 |
| }, |
| { |
| "epoch": 4.266321913380737, |
| "grad_norm": 0.8102026581764221, |
| "learning_rate": 0.001, |
| "loss": 3.0564, |
| "step": 13200 |
| }, |
| { |
| "epoch": 4.298642533936651, |
| "grad_norm": 0.8251219391822815, |
| "learning_rate": 0.001, |
| "loss": 3.0567, |
| "step": 13300 |
| }, |
| { |
| "epoch": 4.330963154492566, |
| "grad_norm": 0.921691358089447, |
| "learning_rate": 0.001, |
| "loss": 3.0732, |
| "step": 13400 |
| }, |
| { |
| "epoch": 4.3632837750484805, |
| "grad_norm": 1.216403841972351, |
| "learning_rate": 0.001, |
| "loss": 3.0796, |
| "step": 13500 |
| }, |
| { |
| "epoch": 4.395604395604396, |
| "grad_norm": 1.2740323543548584, |
| "learning_rate": 0.001, |
| "loss": 3.0667, |
| "step": 13600 |
| }, |
| { |
| "epoch": 4.4279250161603105, |
| "grad_norm": 1.0430225133895874, |
| "learning_rate": 0.001, |
| "loss": 3.0521, |
| "step": 13700 |
| }, |
| { |
| "epoch": 4.460245636716225, |
| "grad_norm": 0.8932771682739258, |
| "learning_rate": 0.001, |
| "loss": 3.0704, |
| "step": 13800 |
| }, |
| { |
| "epoch": 4.49256625727214, |
| "grad_norm": 1.011426568031311, |
| "learning_rate": 0.001, |
| "loss": 3.0857, |
| "step": 13900 |
| }, |
| { |
| "epoch": 4.524886877828054, |
| "grad_norm": 1.2008609771728516, |
| "learning_rate": 0.001, |
| "loss": 3.0533, |
| "step": 14000 |
| }, |
| { |
| "epoch": 4.557207498383969, |
| "grad_norm": 0.847837507724762, |
| "learning_rate": 0.001, |
| "loss": 3.0694, |
| "step": 14100 |
| }, |
| { |
| "epoch": 4.589528118939883, |
| "grad_norm": 0.951865017414093, |
| "learning_rate": 0.001, |
| "loss": 3.084, |
| "step": 14200 |
| }, |
| { |
| "epoch": 4.621848739495798, |
| "grad_norm": 0.972174882888794, |
| "learning_rate": 0.001, |
| "loss": 3.0808, |
| "step": 14300 |
| }, |
| { |
| "epoch": 4.654169360051713, |
| "grad_norm": 1.0798579454421997, |
| "learning_rate": 0.001, |
| "loss": 3.0889, |
| "step": 14400 |
| }, |
| { |
| "epoch": 4.686489980607628, |
| "grad_norm": 0.7689244747161865, |
| "learning_rate": 0.001, |
| "loss": 3.0891, |
| "step": 14500 |
| }, |
| { |
| "epoch": 4.7188106011635425, |
| "grad_norm": 1.4271835088729858, |
| "learning_rate": 0.001, |
| "loss": 3.0655, |
| "step": 14600 |
| }, |
| { |
| "epoch": 4.751131221719457, |
| "grad_norm": 1.0069650411605835, |
| "learning_rate": 0.001, |
| "loss": 3.0706, |
| "step": 14700 |
| }, |
| { |
| "epoch": 4.783451842275372, |
| "grad_norm": 0.9084206223487854, |
| "learning_rate": 0.001, |
| "loss": 3.057, |
| "step": 14800 |
| }, |
| { |
| "epoch": 4.815772462831286, |
| "grad_norm": 1.3670860528945923, |
| "learning_rate": 0.001, |
| "loss": 3.0592, |
| "step": 14900 |
| }, |
| { |
| "epoch": 4.848093083387201, |
| "grad_norm": 0.9387325048446655, |
| "learning_rate": 0.001, |
| "loss": 3.0789, |
| "step": 15000 |
| }, |
| { |
| "epoch": 4.880413703943116, |
| "grad_norm": 0.8084505200386047, |
| "learning_rate": 0.001, |
| "loss": 3.0631, |
| "step": 15100 |
| }, |
| { |
| "epoch": 4.912734324499031, |
| "grad_norm": 1.0331807136535645, |
| "learning_rate": 0.001, |
| "loss": 3.0723, |
| "step": 15200 |
| }, |
| { |
| "epoch": 4.945054945054945, |
| "grad_norm": 0.9408292770385742, |
| "learning_rate": 0.001, |
| "loss": 3.0616, |
| "step": 15300 |
| }, |
| { |
| "epoch": 4.97737556561086, |
| "grad_norm": 0.9665517807006836, |
| "learning_rate": 0.001, |
| "loss": 3.0801, |
| "step": 15400 |
| }, |
| { |
| "epoch": 5.009696186166774, |
| "grad_norm": 1.1656768321990967, |
| "learning_rate": 0.001, |
| "loss": 3.0375, |
| "step": 15500 |
| }, |
| { |
| "epoch": 5.042016806722689, |
| "grad_norm": 0.9300348162651062, |
| "learning_rate": 0.001, |
| "loss": 2.9091, |
| "step": 15600 |
| }, |
| { |
| "epoch": 5.0743374272786035, |
| "grad_norm": 1.0597182512283325, |
| "learning_rate": 0.001, |
| "loss": 2.9504, |
| "step": 15700 |
| }, |
| { |
| "epoch": 5.106658047834518, |
| "grad_norm": 1.2280610799789429, |
| "learning_rate": 0.001, |
| "loss": 2.9492, |
| "step": 15800 |
| }, |
| { |
| "epoch": 5.1389786683904335, |
| "grad_norm": 1.0233289003372192, |
| "learning_rate": 0.001, |
| "loss": 2.9551, |
| "step": 15900 |
| }, |
| { |
| "epoch": 5.171299288946348, |
| "grad_norm": 0.941676676273346, |
| "learning_rate": 0.001, |
| "loss": 2.9642, |
| "step": 16000 |
| }, |
| { |
| "epoch": 5.203619909502263, |
| "grad_norm": 0.7993482351303101, |
| "learning_rate": 0.001, |
| "loss": 2.9547, |
| "step": 16100 |
| }, |
| { |
| "epoch": 5.235940530058177, |
| "grad_norm": 0.9896591305732727, |
| "learning_rate": 0.001, |
| "loss": 2.9548, |
| "step": 16200 |
| }, |
| { |
| "epoch": 5.268261150614092, |
| "grad_norm": 0.9227080345153809, |
| "learning_rate": 0.001, |
| "loss": 2.9812, |
| "step": 16300 |
| }, |
| { |
| "epoch": 5.300581771170006, |
| "grad_norm": 1.2044575214385986, |
| "learning_rate": 0.001, |
| "loss": 2.9787, |
| "step": 16400 |
| }, |
| { |
| "epoch": 5.332902391725921, |
| "grad_norm": 1.003462791442871, |
| "learning_rate": 0.001, |
| "loss": 2.9652, |
| "step": 16500 |
| }, |
| { |
| "epoch": 5.365223012281835, |
| "grad_norm": 1.0406891107559204, |
| "learning_rate": 0.001, |
| "loss": 2.963, |
| "step": 16600 |
| }, |
| { |
| "epoch": 5.397543632837751, |
| "grad_norm": 1.0937559604644775, |
| "learning_rate": 0.001, |
| "loss": 2.9639, |
| "step": 16700 |
| }, |
| { |
| "epoch": 5.429864253393665, |
| "grad_norm": 0.8680944442749023, |
| "learning_rate": 0.001, |
| "loss": 2.9819, |
| "step": 16800 |
| }, |
| { |
| "epoch": 5.46218487394958, |
| "grad_norm": 0.917489230632782, |
| "learning_rate": 0.001, |
| "loss": 2.9663, |
| "step": 16900 |
| }, |
| { |
| "epoch": 5.4945054945054945, |
| "grad_norm": 0.8815052509307861, |
| "learning_rate": 0.001, |
| "loss": 2.9881, |
| "step": 17000 |
| }, |
| { |
| "epoch": 5.526826115061409, |
| "grad_norm": 0.9785053133964539, |
| "learning_rate": 0.001, |
| "loss": 2.9701, |
| "step": 17100 |
| }, |
| { |
| "epoch": 5.559146735617324, |
| "grad_norm": 1.2235257625579834, |
| "learning_rate": 0.001, |
| "loss": 2.9775, |
| "step": 17200 |
| }, |
| { |
| "epoch": 5.591467356173238, |
| "grad_norm": 0.8558531403541565, |
| "learning_rate": 0.001, |
| "loss": 2.9492, |
| "step": 17300 |
| }, |
| { |
| "epoch": 5.623787976729153, |
| "grad_norm": 1.730175495147705, |
| "learning_rate": 0.001, |
| "loss": 2.9725, |
| "step": 17400 |
| }, |
| { |
| "epoch": 5.656108597285068, |
| "grad_norm": 0.9976469278335571, |
| "learning_rate": 0.001, |
| "loss": 2.9795, |
| "step": 17500 |
| }, |
| { |
| "epoch": 5.688429217840983, |
| "grad_norm": 1.102630853652954, |
| "learning_rate": 0.001, |
| "loss": 3.0011, |
| "step": 17600 |
| }, |
| { |
| "epoch": 5.720749838396897, |
| "grad_norm": 0.8655111789703369, |
| "learning_rate": 0.001, |
| "loss": 2.982, |
| "step": 17700 |
| }, |
| { |
| "epoch": 5.753070458952812, |
| "grad_norm": 0.9005181193351746, |
| "learning_rate": 0.001, |
| "loss": 2.9795, |
| "step": 17800 |
| }, |
| { |
| "epoch": 5.785391079508726, |
| "grad_norm": 0.91997891664505, |
| "learning_rate": 0.001, |
| "loss": 2.9668, |
| "step": 17900 |
| }, |
| { |
| "epoch": 5.817711700064641, |
| "grad_norm": 0.9092044234275818, |
| "learning_rate": 0.001, |
| "loss": 2.9854, |
| "step": 18000 |
| }, |
| { |
| "epoch": 5.850032320620556, |
| "grad_norm": 1.1681147813796997, |
| "learning_rate": 0.001, |
| "loss": 2.9888, |
| "step": 18100 |
| }, |
| { |
| "epoch": 5.882352941176471, |
| "grad_norm": 0.88965904712677, |
| "learning_rate": 0.001, |
| "loss": 2.99, |
| "step": 18200 |
| }, |
| { |
| "epoch": 5.914673561732386, |
| "grad_norm": 0.915117084980011, |
| "learning_rate": 0.001, |
| "loss": 3.0072, |
| "step": 18300 |
| }, |
| { |
| "epoch": 5.9469941822883, |
| "grad_norm": 0.9954575300216675, |
| "learning_rate": 0.001, |
| "loss": 2.9764, |
| "step": 18400 |
| }, |
| { |
| "epoch": 5.979314802844215, |
| "grad_norm": 1.087384581565857, |
| "learning_rate": 0.001, |
| "loss": 2.9776, |
| "step": 18500 |
| }, |
| { |
| "epoch": 6.011635423400129, |
| "grad_norm": 0.9920886158943176, |
| "learning_rate": 0.001, |
| "loss": 2.9278, |
| "step": 18600 |
| }, |
| { |
| "epoch": 6.043956043956044, |
| "grad_norm": 1.1691513061523438, |
| "learning_rate": 0.001, |
| "loss": 2.842, |
| "step": 18700 |
| }, |
| { |
| "epoch": 6.076276664511958, |
| "grad_norm": 1.0943280458450317, |
| "learning_rate": 0.001, |
| "loss": 2.8453, |
| "step": 18800 |
| }, |
| { |
| "epoch": 6.108597285067873, |
| "grad_norm": 1.4848939180374146, |
| "learning_rate": 0.001, |
| "loss": 2.8522, |
| "step": 18900 |
| }, |
| { |
| "epoch": 6.140917905623788, |
| "grad_norm": 0.8807019591331482, |
| "learning_rate": 0.001, |
| "loss": 2.8703, |
| "step": 19000 |
| }, |
| { |
| "epoch": 6.173238526179703, |
| "grad_norm": 1.2009692192077637, |
| "learning_rate": 0.001, |
| "loss": 2.8789, |
| "step": 19100 |
| }, |
| { |
| "epoch": 6.2055591467356175, |
| "grad_norm": 1.0970031023025513, |
| "learning_rate": 0.001, |
| "loss": 2.8751, |
| "step": 19200 |
| }, |
| { |
| "epoch": 6.237879767291532, |
| "grad_norm": 1.1768124103546143, |
| "learning_rate": 0.001, |
| "loss": 2.886, |
| "step": 19300 |
| }, |
| { |
| "epoch": 6.270200387847447, |
| "grad_norm": 0.8989688754081726, |
| "learning_rate": 0.001, |
| "loss": 2.8944, |
| "step": 19400 |
| }, |
| { |
| "epoch": 6.302521008403361, |
| "grad_norm": 1.2694783210754395, |
| "learning_rate": 0.001, |
| "loss": 2.8668, |
| "step": 19500 |
| }, |
| { |
| "epoch": 6.334841628959276, |
| "grad_norm": 0.9729022979736328, |
| "learning_rate": 0.001, |
| "loss": 2.8705, |
| "step": 19600 |
| }, |
| { |
| "epoch": 6.36716224951519, |
| "grad_norm": 1.0138781070709229, |
| "learning_rate": 0.001, |
| "loss": 2.8859, |
| "step": 19700 |
| }, |
| { |
| "epoch": 6.399482870071106, |
| "grad_norm": 0.9176075458526611, |
| "learning_rate": 0.001, |
| "loss": 2.9032, |
| "step": 19800 |
| }, |
| { |
| "epoch": 6.43180349062702, |
| "grad_norm": 1.1158503293991089, |
| "learning_rate": 0.001, |
| "loss": 2.8774, |
| "step": 19900 |
| }, |
| { |
| "epoch": 6.464124111182935, |
| "grad_norm": 0.9626113176345825, |
| "learning_rate": 0.001, |
| "loss": 2.8744, |
| "step": 20000 |
| }, |
| { |
| "epoch": 6.496444731738849, |
| "grad_norm": 0.9146256446838379, |
| "learning_rate": 0.001, |
| "loss": 2.8945, |
| "step": 20100 |
| }, |
| { |
| "epoch": 6.528765352294764, |
| "grad_norm": 0.9654421806335449, |
| "learning_rate": 0.001, |
| "loss": 2.8934, |
| "step": 20200 |
| }, |
| { |
| "epoch": 6.5610859728506785, |
| "grad_norm": 0.9559252262115479, |
| "learning_rate": 0.001, |
| "loss": 2.8954, |
| "step": 20300 |
| }, |
| { |
| "epoch": 6.593406593406593, |
| "grad_norm": 0.9600493907928467, |
| "learning_rate": 0.001, |
| "loss": 2.8964, |
| "step": 20400 |
| }, |
| { |
| "epoch": 6.625727213962508, |
| "grad_norm": 1.152198314666748, |
| "learning_rate": 0.001, |
| "loss": 2.8953, |
| "step": 20500 |
| }, |
| { |
| "epoch": 6.658047834518423, |
| "grad_norm": 0.7821874618530273, |
| "learning_rate": 0.001, |
| "loss": 2.8899, |
| "step": 20600 |
| }, |
| { |
| "epoch": 6.690368455074338, |
| "grad_norm": 1.2071696519851685, |
| "learning_rate": 0.001, |
| "loss": 2.8832, |
| "step": 20700 |
| }, |
| { |
| "epoch": 6.722689075630252, |
| "grad_norm": 0.9243321418762207, |
| "learning_rate": 0.001, |
| "loss": 2.9011, |
| "step": 20800 |
| }, |
| { |
| "epoch": 6.755009696186167, |
| "grad_norm": 0.925390899181366, |
| "learning_rate": 0.001, |
| "loss": 2.9143, |
| "step": 20900 |
| }, |
| { |
| "epoch": 6.787330316742081, |
| "grad_norm": 0.9191309213638306, |
| "learning_rate": 0.001, |
| "loss": 2.9058, |
| "step": 21000 |
| }, |
| { |
| "epoch": 6.819650937297996, |
| "grad_norm": 0.8833218812942505, |
| "learning_rate": 0.001, |
| "loss": 2.8933, |
| "step": 21100 |
| }, |
| { |
| "epoch": 6.85197155785391, |
| "grad_norm": 0.9937705397605896, |
| "learning_rate": 0.001, |
| "loss": 2.9203, |
| "step": 21200 |
| }, |
| { |
| "epoch": 6.884292178409826, |
| "grad_norm": 1.2760004997253418, |
| "learning_rate": 0.001, |
| "loss": 2.9042, |
| "step": 21300 |
| }, |
| { |
| "epoch": 6.91661279896574, |
| "grad_norm": 1.0547213554382324, |
| "learning_rate": 0.001, |
| "loss": 2.9187, |
| "step": 21400 |
| }, |
| { |
| "epoch": 6.948933419521655, |
| "grad_norm": 0.957062840461731, |
| "learning_rate": 0.001, |
| "loss": 2.903, |
| "step": 21500 |
| }, |
| { |
| "epoch": 6.98125404007757, |
| "grad_norm": 1.1016453504562378, |
| "learning_rate": 0.001, |
| "loss": 2.8896, |
| "step": 21600 |
| }, |
| { |
| "epoch": 7.013574660633484, |
| "grad_norm": 1.3905339241027832, |
| "learning_rate": 0.001, |
| "loss": 2.8379, |
| "step": 21700 |
| }, |
| { |
| "epoch": 7.045895281189399, |
| "grad_norm": 1.1392508745193481, |
| "learning_rate": 0.001, |
| "loss": 2.7692, |
| "step": 21800 |
| }, |
| { |
| "epoch": 7.078215901745313, |
| "grad_norm": 0.9623212814331055, |
| "learning_rate": 0.001, |
| "loss": 2.76, |
| "step": 21900 |
| }, |
| { |
| "epoch": 7.110536522301228, |
| "grad_norm": 1.0762792825698853, |
| "learning_rate": 0.001, |
| "loss": 2.7784, |
| "step": 22000 |
| }, |
| { |
| "epoch": 7.142857142857143, |
| "grad_norm": 1.1317200660705566, |
| "learning_rate": 0.001, |
| "loss": 2.7783, |
| "step": 22100 |
| }, |
| { |
| "epoch": 7.175177763413058, |
| "grad_norm": 1.379044532775879, |
| "learning_rate": 0.001, |
| "loss": 2.7868, |
| "step": 22200 |
| }, |
| { |
| "epoch": 7.207498383968972, |
| "grad_norm": 1.036180019378662, |
| "learning_rate": 0.001, |
| "loss": 2.8014, |
| "step": 22300 |
| }, |
| { |
| "epoch": 7.239819004524887, |
| "grad_norm": 1.326994776725769, |
| "learning_rate": 0.001, |
| "loss": 2.8077, |
| "step": 22400 |
| }, |
| { |
| "epoch": 7.2721396250808015, |
| "grad_norm": 1.378857135772705, |
| "learning_rate": 0.001, |
| "loss": 2.8186, |
| "step": 22500 |
| }, |
| { |
| "epoch": 7.304460245636716, |
| "grad_norm": 1.1402287483215332, |
| "learning_rate": 0.001, |
| "loss": 2.8114, |
| "step": 22600 |
| }, |
| { |
| "epoch": 7.336780866192631, |
| "grad_norm": 1.236741304397583, |
| "learning_rate": 0.001, |
| "loss": 2.798, |
| "step": 22700 |
| }, |
| { |
| "epoch": 7.369101486748546, |
| "grad_norm": 0.9529298543930054, |
| "learning_rate": 0.001, |
| "loss": 2.8223, |
| "step": 22800 |
| }, |
| { |
| "epoch": 7.401422107304461, |
| "grad_norm": 1.272033929824829, |
| "learning_rate": 0.001, |
| "loss": 2.8241, |
| "step": 22900 |
| }, |
| { |
| "epoch": 7.433742727860375, |
| "grad_norm": 0.9919891953468323, |
| "learning_rate": 0.001, |
| "loss": 2.8106, |
| "step": 23000 |
| }, |
| { |
| "epoch": 7.46606334841629, |
| "grad_norm": 0.9951006770133972, |
| "learning_rate": 0.001, |
| "loss": 2.8164, |
| "step": 23100 |
| }, |
| { |
| "epoch": 7.498383968972204, |
| "grad_norm": 1.260886549949646, |
| "learning_rate": 0.001, |
| "loss": 2.8238, |
| "step": 23200 |
| }, |
| { |
| "epoch": 7.530704589528119, |
| "grad_norm": 1.0894906520843506, |
| "learning_rate": 0.001, |
| "loss": 2.818, |
| "step": 23300 |
| }, |
| { |
| "epoch": 7.563025210084033, |
| "grad_norm": 1.1154838800430298, |
| "learning_rate": 0.001, |
| "loss": 2.8312, |
| "step": 23400 |
| }, |
| { |
| "epoch": 7.595345830639948, |
| "grad_norm": 1.0110588073730469, |
| "learning_rate": 0.001, |
| "loss": 2.812, |
| "step": 23500 |
| }, |
| { |
| "epoch": 7.6276664511958625, |
| "grad_norm": 0.9715908765792847, |
| "learning_rate": 0.001, |
| "loss": 2.819, |
| "step": 23600 |
| }, |
| { |
| "epoch": 7.659987071751778, |
| "grad_norm": 1.0196453332901, |
| "learning_rate": 0.001, |
| "loss": 2.8195, |
| "step": 23700 |
| }, |
| { |
| "epoch": 7.6923076923076925, |
| "grad_norm": 1.3575221300125122, |
| "learning_rate": 0.001, |
| "loss": 2.8115, |
| "step": 23800 |
| }, |
| { |
| "epoch": 7.724628312863607, |
| "grad_norm": 1.3838183879852295, |
| "learning_rate": 0.001, |
| "loss": 2.8206, |
| "step": 23900 |
| }, |
| { |
| "epoch": 7.756948933419522, |
| "grad_norm": 1.2254596948623657, |
| "learning_rate": 0.001, |
| "loss": 2.8281, |
| "step": 24000 |
| }, |
| { |
| "epoch": 7.789269553975436, |
| "grad_norm": 1.2324926853179932, |
| "learning_rate": 0.001, |
| "loss": 2.8426, |
| "step": 24100 |
| }, |
| { |
| "epoch": 7.821590174531351, |
| "grad_norm": 1.2173677682876587, |
| "learning_rate": 0.001, |
| "loss": 2.8355, |
| "step": 24200 |
| }, |
| { |
| "epoch": 7.853910795087265, |
| "grad_norm": 0.9907102584838867, |
| "learning_rate": 0.001, |
| "loss": 2.8081, |
| "step": 24300 |
| }, |
| { |
| "epoch": 7.886231415643181, |
| "grad_norm": 1.141242265701294, |
| "learning_rate": 0.001, |
| "loss": 2.8234, |
| "step": 24400 |
| }, |
| { |
| "epoch": 7.918552036199095, |
| "grad_norm": 1.286460518836975, |
| "learning_rate": 0.001, |
| "loss": 2.8324, |
| "step": 24500 |
| }, |
| { |
| "epoch": 7.95087265675501, |
| "grad_norm": 0.9198762774467468, |
| "learning_rate": 0.001, |
| "loss": 2.8671, |
| "step": 24600 |
| }, |
| { |
| "epoch": 7.983193277310924, |
| "grad_norm": 0.9404067397117615, |
| "learning_rate": 0.001, |
| "loss": 2.8488, |
| "step": 24700 |
| }, |
| { |
| "epoch": 8.015513897866839, |
| "grad_norm": 1.2536792755126953, |
| "learning_rate": 0.001, |
| "loss": 2.7556, |
| "step": 24800 |
| }, |
| { |
| "epoch": 8.047834518422754, |
| "grad_norm": 1.4987441301345825, |
| "learning_rate": 0.001, |
| "loss": 2.7025, |
| "step": 24900 |
| }, |
| { |
| "epoch": 8.080155138978668, |
| "grad_norm": 1.1885977983474731, |
| "learning_rate": 0.001, |
| "loss": 2.7104, |
| "step": 25000 |
| }, |
| { |
| "epoch": 8.112475759534583, |
| "grad_norm": 1.5676301717758179, |
| "learning_rate": 0.001, |
| "loss": 2.7184, |
| "step": 25100 |
| }, |
| { |
| "epoch": 8.144796380090497, |
| "grad_norm": 1.3227053880691528, |
| "learning_rate": 0.001, |
| "loss": 2.7353, |
| "step": 25200 |
| }, |
| { |
| "epoch": 8.177117000646412, |
| "grad_norm": 1.1693710088729858, |
| "learning_rate": 0.001, |
| "loss": 2.7109, |
| "step": 25300 |
| }, |
| { |
| "epoch": 8.209437621202326, |
| "grad_norm": 1.5500166416168213, |
| "learning_rate": 0.001, |
| "loss": 2.7279, |
| "step": 25400 |
| }, |
| { |
| "epoch": 8.241758241758241, |
| "grad_norm": 1.6105555295944214, |
| "learning_rate": 0.001, |
| "loss": 2.7238, |
| "step": 25500 |
| }, |
| { |
| "epoch": 8.274078862314157, |
| "grad_norm": 1.3008909225463867, |
| "learning_rate": 0.001, |
| "loss": 2.7138, |
| "step": 25600 |
| }, |
| { |
| "epoch": 8.306399482870072, |
| "grad_norm": 1.1506481170654297, |
| "learning_rate": 0.001, |
| "loss": 2.7202, |
| "step": 25700 |
| }, |
| { |
| "epoch": 8.338720103425986, |
| "grad_norm": 1.583932876586914, |
| "learning_rate": 0.001, |
| "loss": 2.7328, |
| "step": 25800 |
| }, |
| { |
| "epoch": 8.371040723981901, |
| "grad_norm": 1.3606271743774414, |
| "learning_rate": 0.001, |
| "loss": 2.7364, |
| "step": 25900 |
| }, |
| { |
| "epoch": 8.403361344537815, |
| "grad_norm": 1.6803429126739502, |
| "learning_rate": 0.001, |
| "loss": 2.7109, |
| "step": 26000 |
| }, |
| { |
| "epoch": 8.43568196509373, |
| "grad_norm": 1.6417889595031738, |
| "learning_rate": 0.001, |
| "loss": 2.7318, |
| "step": 26100 |
| }, |
| { |
| "epoch": 8.468002585649645, |
| "grad_norm": 1.2529147863388062, |
| "learning_rate": 0.001, |
| "loss": 2.7559, |
| "step": 26200 |
| }, |
| { |
| "epoch": 8.50032320620556, |
| "grad_norm": 1.352419137954712, |
| "learning_rate": 0.001, |
| "loss": 2.7578, |
| "step": 26300 |
| }, |
| { |
| "epoch": 8.532643826761474, |
| "grad_norm": 1.5327883958816528, |
| "learning_rate": 0.001, |
| "loss": 2.7497, |
| "step": 26400 |
| }, |
| { |
| "epoch": 8.564964447317388, |
| "grad_norm": 1.2606614828109741, |
| "learning_rate": 0.001, |
| "loss": 2.7531, |
| "step": 26500 |
| }, |
| { |
| "epoch": 8.597285067873303, |
| "grad_norm": 1.340108871459961, |
| "learning_rate": 0.001, |
| "loss": 2.76, |
| "step": 26600 |
| }, |
| { |
| "epoch": 8.629605688429217, |
| "grad_norm": 1.622501015663147, |
| "learning_rate": 0.001, |
| "loss": 2.7671, |
| "step": 26700 |
| }, |
| { |
| "epoch": 8.661926308985132, |
| "grad_norm": 2.9881439208984375, |
| "learning_rate": 0.001, |
| "loss": 2.7507, |
| "step": 26800 |
| }, |
| { |
| "epoch": 8.694246929541046, |
| "grad_norm": 1.4684876203536987, |
| "learning_rate": 0.001, |
| "loss": 2.7754, |
| "step": 26900 |
| }, |
| { |
| "epoch": 8.726567550096961, |
| "grad_norm": 1.279051423072815, |
| "learning_rate": 0.001, |
| "loss": 2.7737, |
| "step": 27000 |
| }, |
| { |
| "epoch": 8.758888170652877, |
| "grad_norm": 1.2881108522415161, |
| "learning_rate": 0.001, |
| "loss": 2.7566, |
| "step": 27100 |
| }, |
| { |
| "epoch": 8.791208791208792, |
| "grad_norm": 1.4721473455429077, |
| "learning_rate": 0.001, |
| "loss": 2.753, |
| "step": 27200 |
| }, |
| { |
| "epoch": 8.823529411764707, |
| "grad_norm": 1.335915207862854, |
| "learning_rate": 0.001, |
| "loss": 2.7699, |
| "step": 27300 |
| }, |
| { |
| "epoch": 8.855850032320621, |
| "grad_norm": 1.3305962085723877, |
| "learning_rate": 0.001, |
| "loss": 2.7792, |
| "step": 27400 |
| }, |
| { |
| "epoch": 8.888170652876536, |
| "grad_norm": 1.196067452430725, |
| "learning_rate": 0.001, |
| "loss": 2.7727, |
| "step": 27500 |
| }, |
| { |
| "epoch": 8.92049127343245, |
| "grad_norm": 1.2162010669708252, |
| "learning_rate": 0.001, |
| "loss": 2.751, |
| "step": 27600 |
| }, |
| { |
| "epoch": 8.952811893988365, |
| "grad_norm": 1.5516213178634644, |
| "learning_rate": 0.001, |
| "loss": 2.7937, |
| "step": 27700 |
| }, |
| { |
| "epoch": 8.98513251454428, |
| "grad_norm": 1.2385696172714233, |
| "learning_rate": 0.001, |
| "loss": 2.7732, |
| "step": 27800 |
| }, |
| { |
| "epoch": 9.017453135100194, |
| "grad_norm": 1.991585373878479, |
| "learning_rate": 0.001, |
| "loss": 2.6816, |
| "step": 27900 |
| }, |
| { |
| "epoch": 9.049773755656108, |
| "grad_norm": 1.8597898483276367, |
| "learning_rate": 0.001, |
| "loss": 2.6321, |
| "step": 28000 |
| }, |
| { |
| "epoch": 9.082094376212023, |
| "grad_norm": 1.6625946760177612, |
| "learning_rate": 0.001, |
| "loss": 2.6467, |
| "step": 28100 |
| }, |
| { |
| "epoch": 9.114414996767938, |
| "grad_norm": 1.5089329481124878, |
| "learning_rate": 0.001, |
| "loss": 2.6456, |
| "step": 28200 |
| }, |
| { |
| "epoch": 9.146735617323852, |
| "grad_norm": 1.6687277555465698, |
| "learning_rate": 0.001, |
| "loss": 2.652, |
| "step": 28300 |
| }, |
| { |
| "epoch": 9.179056237879767, |
| "grad_norm": 1.1334947347640991, |
| "learning_rate": 0.001, |
| "loss": 2.6543, |
| "step": 28400 |
| }, |
| { |
| "epoch": 9.211376858435681, |
| "grad_norm": 1.7903298139572144, |
| "learning_rate": 0.001, |
| "loss": 2.6396, |
| "step": 28500 |
| }, |
| { |
| "epoch": 9.243697478991596, |
| "grad_norm": 1.8491761684417725, |
| "learning_rate": 0.001, |
| "loss": 2.6443, |
| "step": 28600 |
| }, |
| { |
| "epoch": 9.276018099547512, |
| "grad_norm": 1.4492714405059814, |
| "learning_rate": 0.001, |
| "loss": 2.6509, |
| "step": 28700 |
| }, |
| { |
| "epoch": 9.308338720103427, |
| "grad_norm": 1.8237452507019043, |
| "learning_rate": 0.001, |
| "loss": 2.6867, |
| "step": 28800 |
| }, |
| { |
| "epoch": 9.340659340659341, |
| "grad_norm": 1.7002811431884766, |
| "learning_rate": 0.001, |
| "loss": 2.6911, |
| "step": 28900 |
| }, |
| { |
| "epoch": 9.372979961215256, |
| "grad_norm": 1.24556303024292, |
| "learning_rate": 0.001, |
| "loss": 2.6842, |
| "step": 29000 |
| }, |
| { |
| "epoch": 9.40530058177117, |
| "grad_norm": 1.3534449338912964, |
| "learning_rate": 0.001, |
| "loss": 2.6882, |
| "step": 29100 |
| }, |
| { |
| "epoch": 9.437621202327085, |
| "grad_norm": 1.465098261833191, |
| "learning_rate": 0.001, |
| "loss": 2.6845, |
| "step": 29200 |
| }, |
| { |
| "epoch": 9.469941822883, |
| "grad_norm": 1.1282223463058472, |
| "learning_rate": 0.001, |
| "loss": 2.6926, |
| "step": 29300 |
| }, |
| { |
| "epoch": 9.502262443438914, |
| "grad_norm": 1.5469937324523926, |
| "learning_rate": 0.001, |
| "loss": 2.6861, |
| "step": 29400 |
| }, |
| { |
| "epoch": 9.534583063994829, |
| "grad_norm": 1.313873052597046, |
| "learning_rate": 0.001, |
| "loss": 2.6821, |
| "step": 29500 |
| }, |
| { |
| "epoch": 9.566903684550743, |
| "grad_norm": 1.0639135837554932, |
| "learning_rate": 0.001, |
| "loss": 2.6661, |
| "step": 29600 |
| }, |
| { |
| "epoch": 9.599224305106658, |
| "grad_norm": 1.1810053586959839, |
| "learning_rate": 0.001, |
| "loss": 2.6854, |
| "step": 29700 |
| }, |
| { |
| "epoch": 9.631544925662572, |
| "grad_norm": 1.4090721607208252, |
| "learning_rate": 0.001, |
| "loss": 2.7232, |
| "step": 29800 |
| }, |
| { |
| "epoch": 9.663865546218487, |
| "grad_norm": 1.278445839881897, |
| "learning_rate": 0.001, |
| "loss": 2.6675, |
| "step": 29900 |
| }, |
| { |
| "epoch": 9.696186166774401, |
| "grad_norm": 1.2034200429916382, |
| "learning_rate": 0.001, |
| "loss": 2.6747, |
| "step": 30000 |
| }, |
| { |
| "epoch": 9.728506787330316, |
| "grad_norm": 1.2123016119003296, |
| "learning_rate": 0.001, |
| "loss": 2.7185, |
| "step": 30100 |
| }, |
| { |
| "epoch": 9.760827407886232, |
| "grad_norm": 1.6041324138641357, |
| "learning_rate": 0.001, |
| "loss": 2.7031, |
| "step": 30200 |
| }, |
| { |
| "epoch": 9.793148028442147, |
| "grad_norm": 1.3464832305908203, |
| "learning_rate": 0.001, |
| "loss": 2.7295, |
| "step": 30300 |
| }, |
| { |
| "epoch": 9.825468648998061, |
| "grad_norm": 1.8239651918411255, |
| "learning_rate": 0.001, |
| "loss": 2.6939, |
| "step": 30400 |
| }, |
| { |
| "epoch": 9.857789269553976, |
| "grad_norm": 1.3928236961364746, |
| "learning_rate": 0.001, |
| "loss": 2.702, |
| "step": 30500 |
| }, |
| { |
| "epoch": 9.89010989010989, |
| "grad_norm": 1.341913104057312, |
| "learning_rate": 0.001, |
| "loss": 2.699, |
| "step": 30600 |
| }, |
| { |
| "epoch": 9.922430510665805, |
| "grad_norm": 1.3967502117156982, |
| "learning_rate": 0.001, |
| "loss": 2.7218, |
| "step": 30700 |
| }, |
| { |
| "epoch": 9.95475113122172, |
| "grad_norm": 1.2057521343231201, |
| "learning_rate": 0.001, |
| "loss": 2.7233, |
| "step": 30800 |
| }, |
| { |
| "epoch": 9.987071751777634, |
| "grad_norm": 1.4760347604751587, |
| "learning_rate": 0.001, |
| "loss": 2.7322, |
| "step": 30900 |
| }, |
| { |
| "epoch": 10.019392372333549, |
| "grad_norm": 0.9195663332939148, |
| "learning_rate": 0.001, |
| "loss": 2.642, |
| "step": 31000 |
| }, |
| { |
| "epoch": 10.051712992889463, |
| "grad_norm": 1.7018245458602905, |
| "learning_rate": 0.001, |
| "loss": 2.5902, |
| "step": 31100 |
| }, |
| { |
| "epoch": 10.084033613445378, |
| "grad_norm": 1.2946157455444336, |
| "learning_rate": 0.001, |
| "loss": 2.5836, |
| "step": 31200 |
| }, |
| { |
| "epoch": 10.116354234001292, |
| "grad_norm": 1.2677333354949951, |
| "learning_rate": 0.001, |
| "loss": 2.5861, |
| "step": 31300 |
| }, |
| { |
| "epoch": 10.148674854557207, |
| "grad_norm": 0.9341103434562683, |
| "learning_rate": 0.001, |
| "loss": 2.5815, |
| "step": 31400 |
| }, |
| { |
| "epoch": 10.180995475113122, |
| "grad_norm": 1.197549819946289, |
| "learning_rate": 0.001, |
| "loss": 2.5988, |
| "step": 31500 |
| }, |
| { |
| "epoch": 10.213316095669036, |
| "grad_norm": 0.9701215028762817, |
| "learning_rate": 0.001, |
| "loss": 2.6033, |
| "step": 31600 |
| }, |
| { |
| "epoch": 10.24563671622495, |
| "grad_norm": 1.440954327583313, |
| "learning_rate": 0.001, |
| "loss": 2.6095, |
| "step": 31700 |
| }, |
| { |
| "epoch": 10.277957336780867, |
| "grad_norm": 1.2938240766525269, |
| "learning_rate": 0.001, |
| "loss": 2.6024, |
| "step": 31800 |
| }, |
| { |
| "epoch": 10.310277957336782, |
| "grad_norm": 1.4884780645370483, |
| "learning_rate": 0.001, |
| "loss": 2.6229, |
| "step": 31900 |
| }, |
| { |
| "epoch": 10.342598577892696, |
| "grad_norm": 0.9927781820297241, |
| "learning_rate": 0.001, |
| "loss": 2.5953, |
| "step": 32000 |
| }, |
| { |
| "epoch": 10.37491919844861, |
| "grad_norm": 0.957020103931427, |
| "learning_rate": 0.001, |
| "loss": 2.6005, |
| "step": 32100 |
| }, |
| { |
| "epoch": 10.407239819004525, |
| "grad_norm": 1.0292260646820068, |
| "learning_rate": 0.001, |
| "loss": 2.6422, |
| "step": 32200 |
| }, |
| { |
| "epoch": 10.43956043956044, |
| "grad_norm": 1.205029010772705, |
| "learning_rate": 0.001, |
| "loss": 2.6276, |
| "step": 32300 |
| }, |
| { |
| "epoch": 10.471881060116354, |
| "grad_norm": 1.0172486305236816, |
| "learning_rate": 0.001, |
| "loss": 2.6254, |
| "step": 32400 |
| }, |
| { |
| "epoch": 10.504201680672269, |
| "grad_norm": 0.9256879687309265, |
| "learning_rate": 0.001, |
| "loss": 2.6103, |
| "step": 32500 |
| }, |
| { |
| "epoch": 10.536522301228183, |
| "grad_norm": 1.0289719104766846, |
| "learning_rate": 0.001, |
| "loss": 2.6329, |
| "step": 32600 |
| }, |
| { |
| "epoch": 10.568842921784098, |
| "grad_norm": 1.1163206100463867, |
| "learning_rate": 0.001, |
| "loss": 2.6488, |
| "step": 32700 |
| }, |
| { |
| "epoch": 10.601163542340013, |
| "grad_norm": 1.0654981136322021, |
| "learning_rate": 0.001, |
| "loss": 2.6346, |
| "step": 32800 |
| }, |
| { |
| "epoch": 10.633484162895927, |
| "grad_norm": 0.7902207374572754, |
| "learning_rate": 0.001, |
| "loss": 2.6188, |
| "step": 32900 |
| }, |
| { |
| "epoch": 10.665804783451842, |
| "grad_norm": 1.250271201133728, |
| "learning_rate": 0.001, |
| "loss": 2.6563, |
| "step": 33000 |
| }, |
| { |
| "epoch": 10.698125404007756, |
| "grad_norm": 0.969681978225708, |
| "learning_rate": 0.001, |
| "loss": 2.6384, |
| "step": 33100 |
| }, |
| { |
| "epoch": 10.73044602456367, |
| "grad_norm": 1.1124166250228882, |
| "learning_rate": 0.001, |
| "loss": 2.6356, |
| "step": 33200 |
| }, |
| { |
| "epoch": 10.762766645119587, |
| "grad_norm": 0.918755292892456, |
| "learning_rate": 0.001, |
| "loss": 2.6393, |
| "step": 33300 |
| }, |
| { |
| "epoch": 10.795087265675502, |
| "grad_norm": 0.9233816862106323, |
| "learning_rate": 0.001, |
| "loss": 2.6507, |
| "step": 33400 |
| }, |
| { |
| "epoch": 10.827407886231416, |
| "grad_norm": 1.036242127418518, |
| "learning_rate": 0.001, |
| "loss": 2.6541, |
| "step": 33500 |
| }, |
| { |
| "epoch": 10.85972850678733, |
| "grad_norm": 1.00826895236969, |
| "learning_rate": 0.001, |
| "loss": 2.6496, |
| "step": 33600 |
| }, |
| { |
| "epoch": 10.892049127343245, |
| "grad_norm": 1.0117528438568115, |
| "learning_rate": 0.001, |
| "loss": 2.6466, |
| "step": 33700 |
| }, |
| { |
| "epoch": 10.92436974789916, |
| "grad_norm": 0.9768591523170471, |
| "learning_rate": 0.001, |
| "loss": 2.6448, |
| "step": 33800 |
| }, |
| { |
| "epoch": 10.956690368455074, |
| "grad_norm": 1.2766749858856201, |
| "learning_rate": 0.001, |
| "loss": 2.647, |
| "step": 33900 |
| }, |
| { |
| "epoch": 10.989010989010989, |
| "grad_norm": 1.2299177646636963, |
| "learning_rate": 0.001, |
| "loss": 2.6419, |
| "step": 34000 |
| }, |
| { |
| "epoch": 11.021331609566904, |
| "grad_norm": 1.1423105001449585, |
| "learning_rate": 0.001, |
| "loss": 2.5704, |
| "step": 34100 |
| }, |
| { |
| "epoch": 11.053652230122818, |
| "grad_norm": 0.8941395282745361, |
| "learning_rate": 0.001, |
| "loss": 2.4834, |
| "step": 34200 |
| }, |
| { |
| "epoch": 11.085972850678733, |
| "grad_norm": 1.1033960580825806, |
| "learning_rate": 0.001, |
| "loss": 2.513, |
| "step": 34300 |
| }, |
| { |
| "epoch": 11.118293471234647, |
| "grad_norm": 1.0653290748596191, |
| "learning_rate": 0.001, |
| "loss": 2.541, |
| "step": 34400 |
| }, |
| { |
| "epoch": 11.150614091790562, |
| "grad_norm": 1.4494647979736328, |
| "learning_rate": 0.001, |
| "loss": 2.5199, |
| "step": 34500 |
| }, |
| { |
| "epoch": 11.182934712346476, |
| "grad_norm": 1.2029805183410645, |
| "learning_rate": 0.001, |
| "loss": 2.5371, |
| "step": 34600 |
| }, |
| { |
| "epoch": 11.215255332902391, |
| "grad_norm": 0.9592697024345398, |
| "learning_rate": 0.001, |
| "loss": 2.5513, |
| "step": 34700 |
| }, |
| { |
| "epoch": 11.247575953458306, |
| "grad_norm": 0.9625367522239685, |
| "learning_rate": 0.001, |
| "loss": 2.5316, |
| "step": 34800 |
| }, |
| { |
| "epoch": 11.279896574014222, |
| "grad_norm": 1.119964361190796, |
| "learning_rate": 0.001, |
| "loss": 2.5531, |
| "step": 34900 |
| }, |
| { |
| "epoch": 11.312217194570136, |
| "grad_norm": 0.9373201131820679, |
| "learning_rate": 0.001, |
| "loss": 2.5427, |
| "step": 35000 |
| }, |
| { |
| "epoch": 11.344537815126051, |
| "grad_norm": 0.9922090172767639, |
| "learning_rate": 0.001, |
| "loss": 2.5593, |
| "step": 35100 |
| }, |
| { |
| "epoch": 11.376858435681966, |
| "grad_norm": 0.949802577495575, |
| "learning_rate": 0.001, |
| "loss": 2.5581, |
| "step": 35200 |
| }, |
| { |
| "epoch": 11.40917905623788, |
| "grad_norm": 1.0595334768295288, |
| "learning_rate": 0.001, |
| "loss": 2.572, |
| "step": 35300 |
| }, |
| { |
| "epoch": 11.441499676793795, |
| "grad_norm": 0.883158802986145, |
| "learning_rate": 0.001, |
| "loss": 2.584, |
| "step": 35400 |
| }, |
| { |
| "epoch": 11.47382029734971, |
| "grad_norm": 0.983586311340332, |
| "learning_rate": 0.001, |
| "loss": 2.5704, |
| "step": 35500 |
| }, |
| { |
| "epoch": 11.506140917905624, |
| "grad_norm": 0.814781129360199, |
| "learning_rate": 0.001, |
| "loss": 2.5732, |
| "step": 35600 |
| }, |
| { |
| "epoch": 11.538461538461538, |
| "grad_norm": 1.2671406269073486, |
| "learning_rate": 0.001, |
| "loss": 2.5872, |
| "step": 35700 |
| }, |
| { |
| "epoch": 11.570782159017453, |
| "grad_norm": 1.1636486053466797, |
| "learning_rate": 0.001, |
| "loss": 2.5662, |
| "step": 35800 |
| }, |
| { |
| "epoch": 11.603102779573367, |
| "grad_norm": 0.9227073192596436, |
| "learning_rate": 0.001, |
| "loss": 2.5826, |
| "step": 35900 |
| }, |
| { |
| "epoch": 11.635423400129282, |
| "grad_norm": 1.394180178642273, |
| "learning_rate": 0.001, |
| "loss": 2.5915, |
| "step": 36000 |
| }, |
| { |
| "epoch": 11.667744020685197, |
| "grad_norm": 1.2034887075424194, |
| "learning_rate": 0.001, |
| "loss": 2.5843, |
| "step": 36100 |
| }, |
| { |
| "epoch": 11.700064641241111, |
| "grad_norm": 1.2949236631393433, |
| "learning_rate": 0.001, |
| "loss": 2.617, |
| "step": 36200 |
| }, |
| { |
| "epoch": 11.732385261797026, |
| "grad_norm": 0.9753849506378174, |
| "learning_rate": 0.001, |
| "loss": 2.5988, |
| "step": 36300 |
| }, |
| { |
| "epoch": 11.764705882352942, |
| "grad_norm": 0.8794882893562317, |
| "learning_rate": 0.001, |
| "loss": 2.602, |
| "step": 36400 |
| }, |
| { |
| "epoch": 11.797026502908857, |
| "grad_norm": 0.9771369099617004, |
| "learning_rate": 0.001, |
| "loss": 2.5793, |
| "step": 36500 |
| }, |
| { |
| "epoch": 11.829347123464771, |
| "grad_norm": 1.5032073259353638, |
| "learning_rate": 0.001, |
| "loss": 2.5841, |
| "step": 36600 |
| }, |
| { |
| "epoch": 11.861667744020686, |
| "grad_norm": 1.11965012550354, |
| "learning_rate": 0.001, |
| "loss": 2.5914, |
| "step": 36700 |
| }, |
| { |
| "epoch": 11.8939883645766, |
| "grad_norm": 1.1560014486312866, |
| "learning_rate": 0.001, |
| "loss": 2.5853, |
| "step": 36800 |
| }, |
| { |
| "epoch": 11.926308985132515, |
| "grad_norm": 1.01851224899292, |
| "learning_rate": 0.001, |
| "loss": 2.6131, |
| "step": 36900 |
| }, |
| { |
| "epoch": 11.95862960568843, |
| "grad_norm": 1.0631927251815796, |
| "learning_rate": 0.001, |
| "loss": 2.6137, |
| "step": 37000 |
| }, |
| { |
| "epoch": 11.990950226244344, |
| "grad_norm": 1.0793895721435547, |
| "learning_rate": 0.001, |
| "loss": 2.5941, |
| "step": 37100 |
| }, |
| { |
| "epoch": 12.023270846800258, |
| "grad_norm": 1.1296499967575073, |
| "learning_rate": 0.001, |
| "loss": 2.5119, |
| "step": 37200 |
| }, |
| { |
| "epoch": 12.055591467356173, |
| "grad_norm": 1.054184913635254, |
| "learning_rate": 0.001, |
| "loss": 2.4729, |
| "step": 37300 |
| }, |
| { |
| "epoch": 12.087912087912088, |
| "grad_norm": 21.742403030395508, |
| "learning_rate": 0.001, |
| "loss": 2.4631, |
| "step": 37400 |
| }, |
| { |
| "epoch": 12.120232708468002, |
| "grad_norm": 1.0219130516052246, |
| "learning_rate": 0.001, |
| "loss": 2.4652, |
| "step": 37500 |
| }, |
| { |
| "epoch": 12.152553329023917, |
| "grad_norm": 1.1618340015411377, |
| "learning_rate": 0.001, |
| "loss": 2.4674, |
| "step": 37600 |
| }, |
| { |
| "epoch": 12.184873949579831, |
| "grad_norm": 1.190769910812378, |
| "learning_rate": 0.001, |
| "loss": 2.4804, |
| "step": 37700 |
| }, |
| { |
| "epoch": 12.217194570135746, |
| "grad_norm": 1.1756348609924316, |
| "learning_rate": 0.001, |
| "loss": 2.5027, |
| "step": 37800 |
| }, |
| { |
| "epoch": 12.24951519069166, |
| "grad_norm": 0.9078492522239685, |
| "learning_rate": 0.001, |
| "loss": 2.4863, |
| "step": 37900 |
| }, |
| { |
| "epoch": 12.281835811247577, |
| "grad_norm": 0.9652780294418335, |
| "learning_rate": 0.001, |
| "loss": 2.4849, |
| "step": 38000 |
| }, |
| { |
| "epoch": 12.314156431803491, |
| "grad_norm": 1.3750672340393066, |
| "learning_rate": 0.001, |
| "loss": 2.4803, |
| "step": 38100 |
| }, |
| { |
| "epoch": 12.346477052359406, |
| "grad_norm": 1.0233724117279053, |
| "learning_rate": 0.001, |
| "loss": 2.5161, |
| "step": 38200 |
| }, |
| { |
| "epoch": 12.37879767291532, |
| "grad_norm": 1.130647897720337, |
| "learning_rate": 0.001, |
| "loss": 2.5181, |
| "step": 38300 |
| }, |
| { |
| "epoch": 12.411118293471235, |
| "grad_norm": 1.1900297403335571, |
| "learning_rate": 0.001, |
| "loss": 2.5251, |
| "step": 38400 |
| }, |
| { |
| "epoch": 12.44343891402715, |
| "grad_norm": 0.9599136710166931, |
| "learning_rate": 0.001, |
| "loss": 2.5308, |
| "step": 38500 |
| }, |
| { |
| "epoch": 12.475759534583064, |
| "grad_norm": 1.0950437784194946, |
| "learning_rate": 0.001, |
| "loss": 2.528, |
| "step": 38600 |
| }, |
| { |
| "epoch": 12.508080155138979, |
| "grad_norm": 2.040606737136841, |
| "learning_rate": 0.001, |
| "loss": 2.5159, |
| "step": 38700 |
| }, |
| { |
| "epoch": 12.540400775694893, |
| "grad_norm": 0.9455929398536682, |
| "learning_rate": 0.001, |
| "loss": 2.5413, |
| "step": 38800 |
| }, |
| { |
| "epoch": 12.572721396250808, |
| "grad_norm": 1.0487362146377563, |
| "learning_rate": 0.001, |
| "loss": 2.5331, |
| "step": 38900 |
| }, |
| { |
| "epoch": 12.605042016806722, |
| "grad_norm": 1.202513337135315, |
| "learning_rate": 0.001, |
| "loss": 2.5273, |
| "step": 39000 |
| }, |
| { |
| "epoch": 12.637362637362637, |
| "grad_norm": 0.8983702063560486, |
| "learning_rate": 0.001, |
| "loss": 2.5342, |
| "step": 39100 |
| }, |
| { |
| "epoch": 12.669683257918551, |
| "grad_norm": 1.1663144826889038, |
| "learning_rate": 0.001, |
| "loss": 2.5271, |
| "step": 39200 |
| }, |
| { |
| "epoch": 12.702003878474466, |
| "grad_norm": 1.0637140274047852, |
| "learning_rate": 0.001, |
| "loss": 2.5429, |
| "step": 39300 |
| }, |
| { |
| "epoch": 12.73432449903038, |
| "grad_norm": 0.9071537852287292, |
| "learning_rate": 0.001, |
| "loss": 2.5431, |
| "step": 39400 |
| }, |
| { |
| "epoch": 12.766645119586297, |
| "grad_norm": 1.0884722471237183, |
| "learning_rate": 0.001, |
| "loss": 2.5415, |
| "step": 39500 |
| }, |
| { |
| "epoch": 12.798965740142211, |
| "grad_norm": 1.1432896852493286, |
| "learning_rate": 0.001, |
| "loss": 2.5633, |
| "step": 39600 |
| }, |
| { |
| "epoch": 12.831286360698126, |
| "grad_norm": 1.1623923778533936, |
| "learning_rate": 0.001, |
| "loss": 2.5508, |
| "step": 39700 |
| }, |
| { |
| "epoch": 12.86360698125404, |
| "grad_norm": 0.9450523257255554, |
| "learning_rate": 0.001, |
| "loss": 2.5332, |
| "step": 39800 |
| }, |
| { |
| "epoch": 12.895927601809955, |
| "grad_norm": 1.2209385633468628, |
| "learning_rate": 0.001, |
| "loss": 2.538, |
| "step": 39900 |
| }, |
| { |
| "epoch": 12.92824822236587, |
| "grad_norm": 0.8747568726539612, |
| "learning_rate": 0.001, |
| "loss": 2.5379, |
| "step": 40000 |
| }, |
| { |
| "epoch": 12.960568842921784, |
| "grad_norm": 0.8547672629356384, |
| "learning_rate": 0.001, |
| "loss": 2.531, |
| "step": 40100 |
| }, |
| { |
| "epoch": 12.992889463477699, |
| "grad_norm": 1.1148180961608887, |
| "learning_rate": 0.001, |
| "loss": 2.5493, |
| "step": 40200 |
| }, |
| { |
| "epoch": 13.025210084033613, |
| "grad_norm": 1.0299571752548218, |
| "learning_rate": 0.001, |
| "loss": 2.455, |
| "step": 40300 |
| }, |
| { |
| "epoch": 13.057530704589528, |
| "grad_norm": 1.5488170385360718, |
| "learning_rate": 0.001, |
| "loss": 2.4235, |
| "step": 40400 |
| }, |
| { |
| "epoch": 13.089851325145442, |
| "grad_norm": 0.9480970501899719, |
| "learning_rate": 0.001, |
| "loss": 2.4149, |
| "step": 40500 |
| }, |
| { |
| "epoch": 13.122171945701357, |
| "grad_norm": 0.9796513319015503, |
| "learning_rate": 0.001, |
| "loss": 2.4012, |
| "step": 40600 |
| }, |
| { |
| "epoch": 13.154492566257272, |
| "grad_norm": 1.253645658493042, |
| "learning_rate": 0.001, |
| "loss": 2.4345, |
| "step": 40700 |
| }, |
| { |
| "epoch": 13.186813186813186, |
| "grad_norm": 0.9671187996864319, |
| "learning_rate": 0.001, |
| "loss": 2.421, |
| "step": 40800 |
| }, |
| { |
| "epoch": 13.2191338073691, |
| "grad_norm": 1.2620867490768433, |
| "learning_rate": 0.001, |
| "loss": 2.4489, |
| "step": 40900 |
| }, |
| { |
| "epoch": 13.251454427925015, |
| "grad_norm": 1.3267464637756348, |
| "learning_rate": 0.001, |
| "loss": 2.4219, |
| "step": 41000 |
| }, |
| { |
| "epoch": 13.283775048480932, |
| "grad_norm": 0.949113130569458, |
| "learning_rate": 0.001, |
| "loss": 2.4586, |
| "step": 41100 |
| }, |
| { |
| "epoch": 13.316095669036846, |
| "grad_norm": 0.9057651162147522, |
| "learning_rate": 0.001, |
| "loss": 2.4504, |
| "step": 41200 |
| }, |
| { |
| "epoch": 13.34841628959276, |
| "grad_norm": 0.9519304633140564, |
| "learning_rate": 0.001, |
| "loss": 2.4565, |
| "step": 41300 |
| }, |
| { |
| "epoch": 13.380736910148675, |
| "grad_norm": 0.8539422154426575, |
| "learning_rate": 0.001, |
| "loss": 2.4649, |
| "step": 41400 |
| }, |
| { |
| "epoch": 13.41305753070459, |
| "grad_norm": 0.9074021577835083, |
| "learning_rate": 0.001, |
| "loss": 2.4422, |
| "step": 41500 |
| }, |
| { |
| "epoch": 13.445378151260504, |
| "grad_norm": 0.9951125383377075, |
| "learning_rate": 0.001, |
| "loss": 2.474, |
| "step": 41600 |
| }, |
| { |
| "epoch": 13.477698771816419, |
| "grad_norm": 0.884623646736145, |
| "learning_rate": 0.001, |
| "loss": 2.4678, |
| "step": 41700 |
| }, |
| { |
| "epoch": 13.510019392372334, |
| "grad_norm": 1.0569515228271484, |
| "learning_rate": 0.001, |
| "loss": 2.4836, |
| "step": 41800 |
| }, |
| { |
| "epoch": 13.542340012928248, |
| "grad_norm": 1.202636957168579, |
| "learning_rate": 0.001, |
| "loss": 2.4781, |
| "step": 41900 |
| }, |
| { |
| "epoch": 13.574660633484163, |
| "grad_norm": 0.9308040738105774, |
| "learning_rate": 0.001, |
| "loss": 2.4682, |
| "step": 42000 |
| }, |
| { |
| "epoch": 13.606981254040077, |
| "grad_norm": 1.0900559425354004, |
| "learning_rate": 0.001, |
| "loss": 2.504, |
| "step": 42100 |
| }, |
| { |
| "epoch": 13.639301874595992, |
| "grad_norm": 1.135162591934204, |
| "learning_rate": 0.001, |
| "loss": 2.4863, |
| "step": 42200 |
| }, |
| { |
| "epoch": 13.671622495151906, |
| "grad_norm": 1.2860257625579834, |
| "learning_rate": 0.001, |
| "loss": 2.4946, |
| "step": 42300 |
| }, |
| { |
| "epoch": 13.70394311570782, |
| "grad_norm": 1.5642868280410767, |
| "learning_rate": 0.001, |
| "loss": 2.4893, |
| "step": 42400 |
| }, |
| { |
| "epoch": 13.736263736263737, |
| "grad_norm": 1.355553388595581, |
| "learning_rate": 0.001, |
| "loss": 2.4914, |
| "step": 42500 |
| }, |
| { |
| "epoch": 13.768584356819652, |
| "grad_norm": 1.0944535732269287, |
| "learning_rate": 0.001, |
| "loss": 2.4979, |
| "step": 42600 |
| }, |
| { |
| "epoch": 13.800904977375566, |
| "grad_norm": 1.1084762811660767, |
| "learning_rate": 0.001, |
| "loss": 2.4889, |
| "step": 42700 |
| }, |
| { |
| "epoch": 13.83322559793148, |
| "grad_norm": 1.0795499086380005, |
| "learning_rate": 0.001, |
| "loss": 2.5077, |
| "step": 42800 |
| }, |
| { |
| "epoch": 13.865546218487395, |
| "grad_norm": 1.1181540489196777, |
| "learning_rate": 0.001, |
| "loss": 2.5115, |
| "step": 42900 |
| }, |
| { |
| "epoch": 13.89786683904331, |
| "grad_norm": 0.8543340563774109, |
| "learning_rate": 0.001, |
| "loss": 2.4785, |
| "step": 43000 |
| }, |
| { |
| "epoch": 13.930187459599225, |
| "grad_norm": 0.9921061396598816, |
| "learning_rate": 0.001, |
| "loss": 2.5062, |
| "step": 43100 |
| }, |
| { |
| "epoch": 13.96250808015514, |
| "grad_norm": 1.1608710289001465, |
| "learning_rate": 0.001, |
| "loss": 2.5112, |
| "step": 43200 |
| }, |
| { |
| "epoch": 13.994828700711054, |
| "grad_norm": 0.9125173687934875, |
| "learning_rate": 0.001, |
| "loss": 2.4806, |
| "step": 43300 |
| }, |
| { |
| "epoch": 14.027149321266968, |
| "grad_norm": 1.0954087972640991, |
| "learning_rate": 0.001, |
| "loss": 2.3845, |
| "step": 43400 |
| }, |
| { |
| "epoch": 14.059469941822883, |
| "grad_norm": 1.1624791622161865, |
| "learning_rate": 0.001, |
| "loss": 2.3829, |
| "step": 43500 |
| }, |
| { |
| "epoch": 14.091790562378797, |
| "grad_norm": 1.3029100894927979, |
| "learning_rate": 0.001, |
| "loss": 2.3835, |
| "step": 43600 |
| }, |
| { |
| "epoch": 14.124111182934712, |
| "grad_norm": 1.024627923965454, |
| "learning_rate": 0.001, |
| "loss": 2.3614, |
| "step": 43700 |
| }, |
| { |
| "epoch": 14.156431803490626, |
| "grad_norm": 1.298632264137268, |
| "learning_rate": 0.001, |
| "loss": 2.3911, |
| "step": 43800 |
| }, |
| { |
| "epoch": 14.188752424046541, |
| "grad_norm": 1.313515543937683, |
| "learning_rate": 0.001, |
| "loss": 2.4011, |
| "step": 43900 |
| }, |
| { |
| "epoch": 14.221073044602456, |
| "grad_norm": 1.3333510160446167, |
| "learning_rate": 0.001, |
| "loss": 2.3881, |
| "step": 44000 |
| }, |
| { |
| "epoch": 14.25339366515837, |
| "grad_norm": 1.04513680934906, |
| "learning_rate": 0.001, |
| "loss": 2.3704, |
| "step": 44100 |
| }, |
| { |
| "epoch": 14.285714285714286, |
| "grad_norm": 1.4951848983764648, |
| "learning_rate": 0.001, |
| "loss": 2.386, |
| "step": 44200 |
| }, |
| { |
| "epoch": 14.318034906270201, |
| "grad_norm": 1.0465465784072876, |
| "learning_rate": 0.001, |
| "loss": 2.3972, |
| "step": 44300 |
| }, |
| { |
| "epoch": 14.350355526826116, |
| "grad_norm": 1.1519221067428589, |
| "learning_rate": 0.001, |
| "loss": 2.4056, |
| "step": 44400 |
| }, |
| { |
| "epoch": 14.38267614738203, |
| "grad_norm": 1.315697431564331, |
| "learning_rate": 0.001, |
| "loss": 2.4214, |
| "step": 44500 |
| }, |
| { |
| "epoch": 14.414996767937945, |
| "grad_norm": 1.0481849908828735, |
| "learning_rate": 0.001, |
| "loss": 2.4211, |
| "step": 44600 |
| }, |
| { |
| "epoch": 14.44731738849386, |
| "grad_norm": 1.1554055213928223, |
| "learning_rate": 0.001, |
| "loss": 2.4134, |
| "step": 44700 |
| }, |
| { |
| "epoch": 14.479638009049774, |
| "grad_norm": 1.0574384927749634, |
| "learning_rate": 0.001, |
| "loss": 2.4197, |
| "step": 44800 |
| }, |
| { |
| "epoch": 14.511958629605688, |
| "grad_norm": 1.1386839151382446, |
| "learning_rate": 0.001, |
| "loss": 2.414, |
| "step": 44900 |
| }, |
| { |
| "epoch": 14.544279250161603, |
| "grad_norm": 1.327596664428711, |
| "learning_rate": 0.001, |
| "loss": 2.4182, |
| "step": 45000 |
| }, |
| { |
| "epoch": 14.576599870717518, |
| "grad_norm": 0.963005542755127, |
| "learning_rate": 0.001, |
| "loss": 2.4251, |
| "step": 45100 |
| }, |
| { |
| "epoch": 14.608920491273432, |
| "grad_norm": 1.0058512687683105, |
| "learning_rate": 0.001, |
| "loss": 2.4233, |
| "step": 45200 |
| }, |
| { |
| "epoch": 14.641241111829347, |
| "grad_norm": 1.0360257625579834, |
| "learning_rate": 0.001, |
| "loss": 2.4353, |
| "step": 45300 |
| }, |
| { |
| "epoch": 14.673561732385261, |
| "grad_norm": 1.2501556873321533, |
| "learning_rate": 0.001, |
| "loss": 2.4511, |
| "step": 45400 |
| }, |
| { |
| "epoch": 14.705882352941176, |
| "grad_norm": 1.212724208831787, |
| "learning_rate": 0.001, |
| "loss": 2.4493, |
| "step": 45500 |
| }, |
| { |
| "epoch": 14.738202973497092, |
| "grad_norm": 1.4460214376449585, |
| "learning_rate": 0.001, |
| "loss": 2.4591, |
| "step": 45600 |
| }, |
| { |
| "epoch": 14.770523594053007, |
| "grad_norm": 1.2264606952667236, |
| "learning_rate": 0.001, |
| "loss": 2.431, |
| "step": 45700 |
| }, |
| { |
| "epoch": 14.802844214608921, |
| "grad_norm": 0.9162919521331787, |
| "learning_rate": 0.001, |
| "loss": 2.4497, |
| "step": 45800 |
| }, |
| { |
| "epoch": 14.835164835164836, |
| "grad_norm": 1.2006787061691284, |
| "learning_rate": 0.001, |
| "loss": 2.4343, |
| "step": 45900 |
| }, |
| { |
| "epoch": 14.86748545572075, |
| "grad_norm": 1.3091291189193726, |
| "learning_rate": 0.001, |
| "loss": 2.4543, |
| "step": 46000 |
| }, |
| { |
| "epoch": 14.899806076276665, |
| "grad_norm": 1.2788023948669434, |
| "learning_rate": 0.001, |
| "loss": 2.4441, |
| "step": 46100 |
| }, |
| { |
| "epoch": 14.93212669683258, |
| "grad_norm": 1.4079340696334839, |
| "learning_rate": 0.001, |
| "loss": 2.467, |
| "step": 46200 |
| }, |
| { |
| "epoch": 14.964447317388494, |
| "grad_norm": 1.0543346405029297, |
| "learning_rate": 0.001, |
| "loss": 2.4757, |
| "step": 46300 |
| }, |
| { |
| "epoch": 14.996767937944409, |
| "grad_norm": 1.0508509874343872, |
| "learning_rate": 0.001, |
| "loss": 2.448, |
| "step": 46400 |
| }, |
| { |
| "epoch": 15.029088558500323, |
| "grad_norm": 1.4893016815185547, |
| "learning_rate": 0.001, |
| "loss": 2.3238, |
| "step": 46500 |
| }, |
| { |
| "epoch": 15.061409179056238, |
| "grad_norm": 1.837750792503357, |
| "learning_rate": 0.001, |
| "loss": 2.3131, |
| "step": 46600 |
| }, |
| { |
| "epoch": 15.093729799612152, |
| "grad_norm": 1.2078869342803955, |
| "learning_rate": 0.001, |
| "loss": 2.3283, |
| "step": 46700 |
| }, |
| { |
| "epoch": 15.126050420168067, |
| "grad_norm": 1.6027250289916992, |
| "learning_rate": 0.001, |
| "loss": 2.3327, |
| "step": 46800 |
| }, |
| { |
| "epoch": 15.158371040723981, |
| "grad_norm": 1.5826632976531982, |
| "learning_rate": 0.001, |
| "loss": 2.3439, |
| "step": 46900 |
| }, |
| { |
| "epoch": 15.190691661279896, |
| "grad_norm": 1.4874987602233887, |
| "learning_rate": 0.001, |
| "loss": 2.3284, |
| "step": 47000 |
| }, |
| { |
| "epoch": 15.22301228183581, |
| "grad_norm": 1.3203476667404175, |
| "learning_rate": 0.001, |
| "loss": 2.3384, |
| "step": 47100 |
| }, |
| { |
| "epoch": 15.255332902391725, |
| "grad_norm": 1.3712375164031982, |
| "learning_rate": 0.001, |
| "loss": 2.3232, |
| "step": 47200 |
| }, |
| { |
| "epoch": 15.287653522947641, |
| "grad_norm": 1.7889151573181152, |
| "learning_rate": 0.001, |
| "loss": 2.3565, |
| "step": 47300 |
| }, |
| { |
| "epoch": 15.319974143503556, |
| "grad_norm": 1.271273136138916, |
| "learning_rate": 0.001, |
| "loss": 2.3609, |
| "step": 47400 |
| }, |
| { |
| "epoch": 15.35229476405947, |
| "grad_norm": 1.26175856590271, |
| "learning_rate": 0.001, |
| "loss": 2.3648, |
| "step": 47500 |
| }, |
| { |
| "epoch": 15.384615384615385, |
| "grad_norm": 1.1784569025039673, |
| "learning_rate": 0.001, |
| "loss": 2.3543, |
| "step": 47600 |
| }, |
| { |
| "epoch": 15.4169360051713, |
| "grad_norm": 1.213889241218567, |
| "learning_rate": 0.001, |
| "loss": 2.3653, |
| "step": 47700 |
| }, |
| { |
| "epoch": 15.449256625727214, |
| "grad_norm": 1.2286897897720337, |
| "learning_rate": 0.001, |
| "loss": 2.3669, |
| "step": 47800 |
| }, |
| { |
| "epoch": 15.481577246283129, |
| "grad_norm": 1.0805023908615112, |
| "learning_rate": 0.001, |
| "loss": 2.3646, |
| "step": 47900 |
| }, |
| { |
| "epoch": 15.513897866839043, |
| "grad_norm": 1.3555302619934082, |
| "learning_rate": 0.001, |
| "loss": 2.4106, |
| "step": 48000 |
| }, |
| { |
| "epoch": 15.546218487394958, |
| "grad_norm": 1.2589572668075562, |
| "learning_rate": 0.001, |
| "loss": 2.3889, |
| "step": 48100 |
| }, |
| { |
| "epoch": 15.578539107950872, |
| "grad_norm": 1.3926182985305786, |
| "learning_rate": 0.001, |
| "loss": 2.3845, |
| "step": 48200 |
| }, |
| { |
| "epoch": 15.610859728506787, |
| "grad_norm": 1.1518105268478394, |
| "learning_rate": 0.001, |
| "loss": 2.3971, |
| "step": 48300 |
| }, |
| { |
| "epoch": 15.643180349062701, |
| "grad_norm": 3.0709455013275146, |
| "learning_rate": 0.001, |
| "loss": 2.3771, |
| "step": 48400 |
| }, |
| { |
| "epoch": 15.675500969618616, |
| "grad_norm": 1.155657172203064, |
| "learning_rate": 0.001, |
| "loss": 2.3964, |
| "step": 48500 |
| }, |
| { |
| "epoch": 15.70782159017453, |
| "grad_norm": 1.1553372144699097, |
| "learning_rate": 0.001, |
| "loss": 2.3915, |
| "step": 48600 |
| }, |
| { |
| "epoch": 15.740142210730447, |
| "grad_norm": 1.320940613746643, |
| "learning_rate": 0.001, |
| "loss": 2.4046, |
| "step": 48700 |
| }, |
| { |
| "epoch": 15.772462831286362, |
| "grad_norm": 1.5498952865600586, |
| "learning_rate": 0.001, |
| "loss": 2.3993, |
| "step": 48800 |
| }, |
| { |
| "epoch": 15.804783451842276, |
| "grad_norm": 1.2019668817520142, |
| "learning_rate": 0.001, |
| "loss": 2.3913, |
| "step": 48900 |
| }, |
| { |
| "epoch": 15.83710407239819, |
| "grad_norm": 1.5592061281204224, |
| "learning_rate": 0.001, |
| "loss": 2.3928, |
| "step": 49000 |
| }, |
| { |
| "epoch": 15.869424692954105, |
| "grad_norm": 1.2563676834106445, |
| "learning_rate": 0.001, |
| "loss": 2.4179, |
| "step": 49100 |
| }, |
| { |
| "epoch": 15.90174531351002, |
| "grad_norm": 1.7604995965957642, |
| "learning_rate": 0.001, |
| "loss": 2.4154, |
| "step": 49200 |
| }, |
| { |
| "epoch": 15.934065934065934, |
| "grad_norm": 2.444636583328247, |
| "learning_rate": 0.001, |
| "loss": 2.4203, |
| "step": 49300 |
| }, |
| { |
| "epoch": 15.966386554621849, |
| "grad_norm": 1.1105613708496094, |
| "learning_rate": 0.001, |
| "loss": 2.4068, |
| "step": 49400 |
| }, |
| { |
| "epoch": 15.998707175177763, |
| "grad_norm": 1.4374933242797852, |
| "learning_rate": 0.001, |
| "loss": 2.4238, |
| "step": 49500 |
| }, |
| { |
| "epoch": 16.031027795733678, |
| "grad_norm": 1.5967682600021362, |
| "learning_rate": 0.001, |
| "loss": 2.265, |
| "step": 49600 |
| }, |
| { |
| "epoch": 16.063348416289593, |
| "grad_norm": 1.267298698425293, |
| "learning_rate": 0.001, |
| "loss": 2.2788, |
| "step": 49700 |
| }, |
| { |
| "epoch": 16.095669036845507, |
| "grad_norm": 1.7938473224639893, |
| "learning_rate": 0.001, |
| "loss": 2.2835, |
| "step": 49800 |
| }, |
| { |
| "epoch": 16.12798965740142, |
| "grad_norm": 1.4659450054168701, |
| "learning_rate": 0.001, |
| "loss": 2.2731, |
| "step": 49900 |
| }, |
| { |
| "epoch": 16.160310277957336, |
| "grad_norm": 1.5971636772155762, |
| "learning_rate": 0.001, |
| "loss": 2.3012, |
| "step": 50000 |
| }, |
| { |
| "epoch": 16.19263089851325, |
| "grad_norm": 1.6608117818832397, |
| "learning_rate": 0.001, |
| "loss": 2.309, |
| "step": 50100 |
| }, |
| { |
| "epoch": 16.224951519069165, |
| "grad_norm": 1.403343915939331, |
| "learning_rate": 0.001, |
| "loss": 2.3204, |
| "step": 50200 |
| }, |
| { |
| "epoch": 16.25727213962508, |
| "grad_norm": 1.548507809638977, |
| "learning_rate": 0.001, |
| "loss": 2.307, |
| "step": 50300 |
| }, |
| { |
| "epoch": 16.289592760180994, |
| "grad_norm": 1.3704252243041992, |
| "learning_rate": 0.001, |
| "loss": 2.3057, |
| "step": 50400 |
| }, |
| { |
| "epoch": 16.32191338073691, |
| "grad_norm": 1.283632755279541, |
| "learning_rate": 0.001, |
| "loss": 2.3067, |
| "step": 50500 |
| }, |
| { |
| "epoch": 16.354234001292824, |
| "grad_norm": 1.857095718383789, |
| "learning_rate": 0.001, |
| "loss": 2.3188, |
| "step": 50600 |
| }, |
| { |
| "epoch": 16.386554621848738, |
| "grad_norm": 1.4724833965301514, |
| "learning_rate": 0.001, |
| "loss": 2.3249, |
| "step": 50700 |
| }, |
| { |
| "epoch": 16.418875242404653, |
| "grad_norm": 1.4150060415267944, |
| "learning_rate": 0.001, |
| "loss": 2.3166, |
| "step": 50800 |
| }, |
| { |
| "epoch": 16.451195862960567, |
| "grad_norm": 1.3506375551223755, |
| "learning_rate": 0.001, |
| "loss": 2.332, |
| "step": 50900 |
| }, |
| { |
| "epoch": 16.483516483516482, |
| "grad_norm": 1.4680278301239014, |
| "learning_rate": 0.001, |
| "loss": 2.3305, |
| "step": 51000 |
| }, |
| { |
| "epoch": 16.5158371040724, |
| "grad_norm": 1.3896517753601074, |
| "learning_rate": 0.001, |
| "loss": 2.3371, |
| "step": 51100 |
| }, |
| { |
| "epoch": 16.548157724628314, |
| "grad_norm": 1.4641127586364746, |
| "learning_rate": 0.001, |
| "loss": 2.3332, |
| "step": 51200 |
| }, |
| { |
| "epoch": 16.58047834518423, |
| "grad_norm": 1.54449462890625, |
| "learning_rate": 0.001, |
| "loss": 2.3543, |
| "step": 51300 |
| }, |
| { |
| "epoch": 16.612798965740144, |
| "grad_norm": 1.260672688484192, |
| "learning_rate": 0.001, |
| "loss": 2.3588, |
| "step": 51400 |
| }, |
| { |
| "epoch": 16.645119586296058, |
| "grad_norm": 1.3852020502090454, |
| "learning_rate": 0.001, |
| "loss": 2.3545, |
| "step": 51500 |
| }, |
| { |
| "epoch": 16.677440206851973, |
| "grad_norm": 1.5494886636734009, |
| "learning_rate": 0.001, |
| "loss": 2.358, |
| "step": 51600 |
| }, |
| { |
| "epoch": 16.709760827407887, |
| "grad_norm": 1.8374618291854858, |
| "learning_rate": 0.001, |
| "loss": 2.3726, |
| "step": 51700 |
| }, |
| { |
| "epoch": 16.742081447963802, |
| "grad_norm": 1.3113868236541748, |
| "learning_rate": 0.001, |
| "loss": 2.3415, |
| "step": 51800 |
| }, |
| { |
| "epoch": 16.774402068519716, |
| "grad_norm": 1.4161752462387085, |
| "learning_rate": 0.001, |
| "loss": 2.3594, |
| "step": 51900 |
| }, |
| { |
| "epoch": 16.80672268907563, |
| "grad_norm": 1.4049732685089111, |
| "learning_rate": 0.001, |
| "loss": 2.3403, |
| "step": 52000 |
| }, |
| { |
| "epoch": 16.839043309631545, |
| "grad_norm": 1.5107018947601318, |
| "learning_rate": 0.001, |
| "loss": 2.3809, |
| "step": 52100 |
| }, |
| { |
| "epoch": 16.87136393018746, |
| "grad_norm": 1.6911265850067139, |
| "learning_rate": 0.001, |
| "loss": 2.3562, |
| "step": 52200 |
| }, |
| { |
| "epoch": 16.903684550743375, |
| "grad_norm": 1.6009875535964966, |
| "learning_rate": 0.001, |
| "loss": 2.3589, |
| "step": 52300 |
| }, |
| { |
| "epoch": 16.93600517129929, |
| "grad_norm": 1.415225863456726, |
| "learning_rate": 0.001, |
| "loss": 2.3723, |
| "step": 52400 |
| }, |
| { |
| "epoch": 16.968325791855204, |
| "grad_norm": 1.5780458450317383, |
| "learning_rate": 0.001, |
| "loss": 2.3631, |
| "step": 52500 |
| }, |
| { |
| "epoch": 17.00064641241112, |
| "grad_norm": 1.3046797513961792, |
| "learning_rate": 0.001, |
| "loss": 2.3591, |
| "step": 52600 |
| }, |
| { |
| "epoch": 17.032967032967033, |
| "grad_norm": 1.631547212600708, |
| "learning_rate": 0.001, |
| "loss": 2.213, |
| "step": 52700 |
| }, |
| { |
| "epoch": 17.065287653522947, |
| "grad_norm": 1.5670453310012817, |
| "learning_rate": 0.001, |
| "loss": 2.231, |
| "step": 52800 |
| }, |
| { |
| "epoch": 17.097608274078862, |
| "grad_norm": 1.5162924528121948, |
| "learning_rate": 0.001, |
| "loss": 2.2282, |
| "step": 52900 |
| }, |
| { |
| "epoch": 17.129928894634777, |
| "grad_norm": 1.8685030937194824, |
| "learning_rate": 0.001, |
| "loss": 2.2464, |
| "step": 53000 |
| }, |
| { |
| "epoch": 17.16224951519069, |
| "grad_norm": 1.8752682209014893, |
| "learning_rate": 0.001, |
| "loss": 2.2316, |
| "step": 53100 |
| }, |
| { |
| "epoch": 17.194570135746606, |
| "grad_norm": 1.5304337739944458, |
| "learning_rate": 0.001, |
| "loss": 2.2437, |
| "step": 53200 |
| }, |
| { |
| "epoch": 17.22689075630252, |
| "grad_norm": 1.8339931964874268, |
| "learning_rate": 0.001, |
| "loss": 2.2524, |
| "step": 53300 |
| }, |
| { |
| "epoch": 17.259211376858435, |
| "grad_norm": 1.6601121425628662, |
| "learning_rate": 0.001, |
| "loss": 2.2821, |
| "step": 53400 |
| }, |
| { |
| "epoch": 17.29153199741435, |
| "grad_norm": 1.037027359008789, |
| "learning_rate": 0.001, |
| "loss": 2.2599, |
| "step": 53500 |
| }, |
| { |
| "epoch": 17.323852617970264, |
| "grad_norm": 1.4101696014404297, |
| "learning_rate": 0.001, |
| "loss": 2.2894, |
| "step": 53600 |
| }, |
| { |
| "epoch": 17.35617323852618, |
| "grad_norm": 1.715714931488037, |
| "learning_rate": 0.001, |
| "loss": 2.2673, |
| "step": 53700 |
| }, |
| { |
| "epoch": 17.388493859082093, |
| "grad_norm": 1.6918067932128906, |
| "learning_rate": 0.001, |
| "loss": 2.277, |
| "step": 53800 |
| }, |
| { |
| "epoch": 17.420814479638008, |
| "grad_norm": 1.513771653175354, |
| "learning_rate": 0.001, |
| "loss": 2.2822, |
| "step": 53900 |
| }, |
| { |
| "epoch": 17.453135100193922, |
| "grad_norm": 2.0623667240142822, |
| "learning_rate": 0.001, |
| "loss": 2.3088, |
| "step": 54000 |
| }, |
| { |
| "epoch": 17.485455720749837, |
| "grad_norm": 1.43783700466156, |
| "learning_rate": 0.001, |
| "loss": 2.2943, |
| "step": 54100 |
| }, |
| { |
| "epoch": 17.517776341305755, |
| "grad_norm": 1.387234091758728, |
| "learning_rate": 0.001, |
| "loss": 2.3021, |
| "step": 54200 |
| }, |
| { |
| "epoch": 17.55009696186167, |
| "grad_norm": 1.8661473989486694, |
| "learning_rate": 0.001, |
| "loss": 2.2701, |
| "step": 54300 |
| }, |
| { |
| "epoch": 17.582417582417584, |
| "grad_norm": 1.76520836353302, |
| "learning_rate": 0.001, |
| "loss": 2.2823, |
| "step": 54400 |
| }, |
| { |
| "epoch": 17.6147382029735, |
| "grad_norm": 1.5826014280319214, |
| "learning_rate": 0.001, |
| "loss": 2.3244, |
| "step": 54500 |
| }, |
| { |
| "epoch": 17.647058823529413, |
| "grad_norm": 1.3721729516983032, |
| "learning_rate": 0.001, |
| "loss": 2.318, |
| "step": 54600 |
| }, |
| { |
| "epoch": 17.679379444085328, |
| "grad_norm": 1.4153558015823364, |
| "learning_rate": 0.001, |
| "loss": 2.3211, |
| "step": 54700 |
| }, |
| { |
| "epoch": 17.711700064641242, |
| "grad_norm": 1.6873489618301392, |
| "learning_rate": 0.001, |
| "loss": 2.3211, |
| "step": 54800 |
| }, |
| { |
| "epoch": 17.744020685197157, |
| "grad_norm": 1.48008131980896, |
| "learning_rate": 0.001, |
| "loss": 2.3298, |
| "step": 54900 |
| }, |
| { |
| "epoch": 17.77634130575307, |
| "grad_norm": 1.2169060707092285, |
| "learning_rate": 0.001, |
| "loss": 2.3117, |
| "step": 55000 |
| }, |
| { |
| "epoch": 17.808661926308986, |
| "grad_norm": 2.0541675090789795, |
| "learning_rate": 0.001, |
| "loss": 2.3168, |
| "step": 55100 |
| }, |
| { |
| "epoch": 17.8409825468649, |
| "grad_norm": 1.6494852304458618, |
| "learning_rate": 0.001, |
| "loss": 2.3136, |
| "step": 55200 |
| }, |
| { |
| "epoch": 17.873303167420815, |
| "grad_norm": 1.9559639692306519, |
| "learning_rate": 0.001, |
| "loss": 2.3385, |
| "step": 55300 |
| }, |
| { |
| "epoch": 17.90562378797673, |
| "grad_norm": 1.883894443511963, |
| "learning_rate": 0.001, |
| "loss": 2.3241, |
| "step": 55400 |
| }, |
| { |
| "epoch": 17.937944408532644, |
| "grad_norm": 1.4204341173171997, |
| "learning_rate": 0.001, |
| "loss": 2.3306, |
| "step": 55500 |
| }, |
| { |
| "epoch": 17.97026502908856, |
| "grad_norm": 1.837131142616272, |
| "learning_rate": 0.001, |
| "loss": 2.3515, |
| "step": 55600 |
| }, |
| { |
| "epoch": 18.002585649644473, |
| "grad_norm": 1.2758315801620483, |
| "learning_rate": 0.001, |
| "loss": 2.3336, |
| "step": 55700 |
| }, |
| { |
| "epoch": 18.034906270200388, |
| "grad_norm": 1.0778571367263794, |
| "learning_rate": 0.001, |
| "loss": 2.1599, |
| "step": 55800 |
| }, |
| { |
| "epoch": 18.067226890756302, |
| "grad_norm": 1.2033774852752686, |
| "learning_rate": 0.001, |
| "loss": 2.1879, |
| "step": 55900 |
| }, |
| { |
| "epoch": 18.099547511312217, |
| "grad_norm": 1.5203527212142944, |
| "learning_rate": 0.001, |
| "loss": 2.1859, |
| "step": 56000 |
| }, |
| { |
| "epoch": 18.13186813186813, |
| "grad_norm": 1.2778196334838867, |
| "learning_rate": 0.001, |
| "loss": 2.2118, |
| "step": 56100 |
| }, |
| { |
| "epoch": 18.164188752424046, |
| "grad_norm": 1.490444302558899, |
| "learning_rate": 0.001, |
| "loss": 2.215, |
| "step": 56200 |
| }, |
| { |
| "epoch": 18.19650937297996, |
| "grad_norm": 1.25520658493042, |
| "learning_rate": 0.001, |
| "loss": 2.2096, |
| "step": 56300 |
| }, |
| { |
| "epoch": 18.228829993535875, |
| "grad_norm": 1.3420361280441284, |
| "learning_rate": 0.001, |
| "loss": 2.2346, |
| "step": 56400 |
| }, |
| { |
| "epoch": 18.26115061409179, |
| "grad_norm": 1.4662959575653076, |
| "learning_rate": 0.001, |
| "loss": 2.2047, |
| "step": 56500 |
| }, |
| { |
| "epoch": 18.293471234647704, |
| "grad_norm": 1.3517006635665894, |
| "learning_rate": 0.001, |
| "loss": 2.2302, |
| "step": 56600 |
| }, |
| { |
| "epoch": 18.32579185520362, |
| "grad_norm": 1.6744149923324585, |
| "learning_rate": 0.001, |
| "loss": 2.2548, |
| "step": 56700 |
| }, |
| { |
| "epoch": 18.358112475759533, |
| "grad_norm": 1.6994774341583252, |
| "learning_rate": 0.001, |
| "loss": 2.2184, |
| "step": 56800 |
| }, |
| { |
| "epoch": 18.390433096315448, |
| "grad_norm": 1.2075378894805908, |
| "learning_rate": 0.001, |
| "loss": 2.2467, |
| "step": 56900 |
| }, |
| { |
| "epoch": 18.422753716871362, |
| "grad_norm": 1.0433144569396973, |
| "learning_rate": 0.001, |
| "loss": 2.2499, |
| "step": 57000 |
| }, |
| { |
| "epoch": 18.455074337427277, |
| "grad_norm": 1.2884716987609863, |
| "learning_rate": 0.001, |
| "loss": 2.2475, |
| "step": 57100 |
| }, |
| { |
| "epoch": 18.48739495798319, |
| "grad_norm": 1.8086559772491455, |
| "learning_rate": 0.001, |
| "loss": 2.2572, |
| "step": 57200 |
| }, |
| { |
| "epoch": 18.51971557853911, |
| "grad_norm": 1.1635278463363647, |
| "learning_rate": 0.001, |
| "loss": 2.2554, |
| "step": 57300 |
| }, |
| { |
| "epoch": 18.552036199095024, |
| "grad_norm": 1.3635642528533936, |
| "learning_rate": 0.001, |
| "loss": 2.2633, |
| "step": 57400 |
| }, |
| { |
| "epoch": 18.58435681965094, |
| "grad_norm": 1.2767882347106934, |
| "learning_rate": 0.001, |
| "loss": 2.2519, |
| "step": 57500 |
| }, |
| { |
| "epoch": 18.616677440206853, |
| "grad_norm": 1.571807861328125, |
| "learning_rate": 0.001, |
| "loss": 2.2582, |
| "step": 57600 |
| }, |
| { |
| "epoch": 18.648998060762768, |
| "grad_norm": 1.5809171199798584, |
| "learning_rate": 0.001, |
| "loss": 2.2612, |
| "step": 57700 |
| }, |
| { |
| "epoch": 18.681318681318682, |
| "grad_norm": 1.2579069137573242, |
| "learning_rate": 0.001, |
| "loss": 2.2713, |
| "step": 57800 |
| }, |
| { |
| "epoch": 18.713639301874597, |
| "grad_norm": 1.2632404565811157, |
| "learning_rate": 0.001, |
| "loss": 2.2876, |
| "step": 57900 |
| }, |
| { |
| "epoch": 18.74595992243051, |
| "grad_norm": 1.0768790245056152, |
| "learning_rate": 0.001, |
| "loss": 2.2794, |
| "step": 58000 |
| }, |
| { |
| "epoch": 18.778280542986426, |
| "grad_norm": 1.4682295322418213, |
| "learning_rate": 0.001, |
| "loss": 2.2766, |
| "step": 58100 |
| }, |
| { |
| "epoch": 18.81060116354234, |
| "grad_norm": 1.269097089767456, |
| "learning_rate": 0.001, |
| "loss": 2.2587, |
| "step": 58200 |
| }, |
| { |
| "epoch": 18.842921784098255, |
| "grad_norm": 1.7296055555343628, |
| "learning_rate": 0.001, |
| "loss": 2.2853, |
| "step": 58300 |
| }, |
| { |
| "epoch": 18.87524240465417, |
| "grad_norm": 1.5035419464111328, |
| "learning_rate": 0.001, |
| "loss": 2.2967, |
| "step": 58400 |
| }, |
| { |
| "epoch": 18.907563025210084, |
| "grad_norm": 1.2617650032043457, |
| "learning_rate": 0.001, |
| "loss": 2.3184, |
| "step": 58500 |
| }, |
| { |
| "epoch": 18.939883645766, |
| "grad_norm": 1.4061576128005981, |
| "learning_rate": 0.001, |
| "loss": 2.2902, |
| "step": 58600 |
| }, |
| { |
| "epoch": 18.972204266321913, |
| "grad_norm": 1.2522116899490356, |
| "learning_rate": 0.001, |
| "loss": 2.2897, |
| "step": 58700 |
| }, |
| { |
| "epoch": 19.004524886877828, |
| "grad_norm": 1.2318428754806519, |
| "learning_rate": 0.001, |
| "loss": 2.295, |
| "step": 58800 |
| }, |
| { |
| "epoch": 19.036845507433743, |
| "grad_norm": 1.2215492725372314, |
| "learning_rate": 0.001, |
| "loss": 2.1301, |
| "step": 58900 |
| }, |
| { |
| "epoch": 19.069166127989657, |
| "grad_norm": 1.204942226409912, |
| "learning_rate": 0.001, |
| "loss": 2.1383, |
| "step": 59000 |
| }, |
| { |
| "epoch": 19.10148674854557, |
| "grad_norm": 1.343122124671936, |
| "learning_rate": 0.001, |
| "loss": 2.1669, |
| "step": 59100 |
| }, |
| { |
| "epoch": 19.133807369101486, |
| "grad_norm": 1.4247043132781982, |
| "learning_rate": 0.001, |
| "loss": 2.17, |
| "step": 59200 |
| }, |
| { |
| "epoch": 19.1661279896574, |
| "grad_norm": 1.212086796760559, |
| "learning_rate": 0.001, |
| "loss": 2.1771, |
| "step": 59300 |
| }, |
| { |
| "epoch": 19.198448610213315, |
| "grad_norm": 0.9887686371803284, |
| "learning_rate": 0.001, |
| "loss": 2.1871, |
| "step": 59400 |
| }, |
| { |
| "epoch": 19.23076923076923, |
| "grad_norm": 0.9896878600120544, |
| "learning_rate": 0.001, |
| "loss": 2.1768, |
| "step": 59500 |
| }, |
| { |
| "epoch": 19.263089851325145, |
| "grad_norm": 1.0798989534378052, |
| "learning_rate": 0.001, |
| "loss": 2.1963, |
| "step": 59600 |
| }, |
| { |
| "epoch": 19.29541047188106, |
| "grad_norm": 1.0032464265823364, |
| "learning_rate": 0.001, |
| "loss": 2.1917, |
| "step": 59700 |
| }, |
| { |
| "epoch": 19.327731092436974, |
| "grad_norm": 1.21811044216156, |
| "learning_rate": 0.001, |
| "loss": 2.204, |
| "step": 59800 |
| }, |
| { |
| "epoch": 19.360051712992888, |
| "grad_norm": 1.1439648866653442, |
| "learning_rate": 0.001, |
| "loss": 2.2006, |
| "step": 59900 |
| }, |
| { |
| "epoch": 19.392372333548803, |
| "grad_norm": 1.0855740308761597, |
| "learning_rate": 0.001, |
| "loss": 2.2165, |
| "step": 60000 |
| }, |
| { |
| "epoch": 19.424692954104717, |
| "grad_norm": 1.388441562652588, |
| "learning_rate": 0.001, |
| "loss": 2.2109, |
| "step": 60100 |
| }, |
| { |
| "epoch": 19.457013574660632, |
| "grad_norm": 1.4667842388153076, |
| "learning_rate": 0.001, |
| "loss": 2.1972, |
| "step": 60200 |
| }, |
| { |
| "epoch": 19.489334195216546, |
| "grad_norm": 1.7039697170257568, |
| "learning_rate": 0.001, |
| "loss": 2.2221, |
| "step": 60300 |
| }, |
| { |
| "epoch": 19.521654815772465, |
| "grad_norm": 1.1940791606903076, |
| "learning_rate": 0.001, |
| "loss": 2.2167, |
| "step": 60400 |
| }, |
| { |
| "epoch": 19.55397543632838, |
| "grad_norm": 1.150011420249939, |
| "learning_rate": 0.001, |
| "loss": 2.2246, |
| "step": 60500 |
| }, |
| { |
| "epoch": 19.586296056884294, |
| "grad_norm": 1.097654104232788, |
| "learning_rate": 0.001, |
| "loss": 2.2212, |
| "step": 60600 |
| }, |
| { |
| "epoch": 19.618616677440208, |
| "grad_norm": 1.1845519542694092, |
| "learning_rate": 0.001, |
| "loss": 2.2251, |
| "step": 60700 |
| }, |
| { |
| "epoch": 19.650937297996123, |
| "grad_norm": 1.1336361169815063, |
| "learning_rate": 0.001, |
| "loss": 2.234, |
| "step": 60800 |
| }, |
| { |
| "epoch": 19.683257918552037, |
| "grad_norm": 1.1724891662597656, |
| "learning_rate": 0.001, |
| "loss": 2.2089, |
| "step": 60900 |
| }, |
| { |
| "epoch": 19.715578539107952, |
| "grad_norm": 0.9693626165390015, |
| "learning_rate": 0.001, |
| "loss": 2.2348, |
| "step": 61000 |
| }, |
| { |
| "epoch": 19.747899159663866, |
| "grad_norm": 1.1252988576889038, |
| "learning_rate": 0.001, |
| "loss": 2.2443, |
| "step": 61100 |
| }, |
| { |
| "epoch": 19.78021978021978, |
| "grad_norm": 0.9875534772872925, |
| "learning_rate": 0.001, |
| "loss": 2.2378, |
| "step": 61200 |
| }, |
| { |
| "epoch": 19.812540400775696, |
| "grad_norm": 1.3839106559753418, |
| "learning_rate": 0.001, |
| "loss": 2.2329, |
| "step": 61300 |
| }, |
| { |
| "epoch": 19.84486102133161, |
| "grad_norm": 1.5243983268737793, |
| "learning_rate": 0.001, |
| "loss": 2.252, |
| "step": 61400 |
| }, |
| { |
| "epoch": 19.877181641887525, |
| "grad_norm": 1.1300511360168457, |
| "learning_rate": 0.001, |
| "loss": 2.2568, |
| "step": 61500 |
| }, |
| { |
| "epoch": 19.90950226244344, |
| "grad_norm": 1.2548259496688843, |
| "learning_rate": 0.001, |
| "loss": 2.2585, |
| "step": 61600 |
| }, |
| { |
| "epoch": 19.941822882999354, |
| "grad_norm": 1.2727535963058472, |
| "learning_rate": 0.001, |
| "loss": 2.236, |
| "step": 61700 |
| }, |
| { |
| "epoch": 19.97414350355527, |
| "grad_norm": 1.0166510343551636, |
| "learning_rate": 0.001, |
| "loss": 2.2477, |
| "step": 61800 |
| }, |
| { |
| "epoch": 20.006464124111183, |
| "grad_norm": 1.0059797763824463, |
| "learning_rate": 0.001, |
| "loss": 2.2228, |
| "step": 61900 |
| }, |
| { |
| "epoch": 20.038784744667097, |
| "grad_norm": 1.5406187772750854, |
| "learning_rate": 0.001, |
| "loss": 2.1074, |
| "step": 62000 |
| }, |
| { |
| "epoch": 20.071105365223012, |
| "grad_norm": 1.2194257974624634, |
| "learning_rate": 0.001, |
| "loss": 2.128, |
| "step": 62100 |
| }, |
| { |
| "epoch": 20.103425985778927, |
| "grad_norm": 1.000863790512085, |
| "learning_rate": 0.001, |
| "loss": 2.1116, |
| "step": 62200 |
| }, |
| { |
| "epoch": 20.13574660633484, |
| "grad_norm": 1.1983182430267334, |
| "learning_rate": 0.001, |
| "loss": 2.1301, |
| "step": 62300 |
| }, |
| { |
| "epoch": 20.168067226890756, |
| "grad_norm": 1.2805134057998657, |
| "learning_rate": 0.001, |
| "loss": 2.1357, |
| "step": 62400 |
| }, |
| { |
| "epoch": 20.20038784744667, |
| "grad_norm": 1.5315334796905518, |
| "learning_rate": 0.001, |
| "loss": 2.127, |
| "step": 62500 |
| }, |
| { |
| "epoch": 20.232708468002585, |
| "grad_norm": 1.239235520362854, |
| "learning_rate": 0.001, |
| "loss": 2.1294, |
| "step": 62600 |
| }, |
| { |
| "epoch": 20.2650290885585, |
| "grad_norm": 1.214128851890564, |
| "learning_rate": 0.001, |
| "loss": 2.1297, |
| "step": 62700 |
| }, |
| { |
| "epoch": 20.297349709114414, |
| "grad_norm": 1.1846396923065186, |
| "learning_rate": 0.001, |
| "loss": 2.1661, |
| "step": 62800 |
| }, |
| { |
| "epoch": 20.32967032967033, |
| "grad_norm": 1.3728803396224976, |
| "learning_rate": 0.001, |
| "loss": 2.1509, |
| "step": 62900 |
| }, |
| { |
| "epoch": 20.361990950226243, |
| "grad_norm": 1.369428038597107, |
| "learning_rate": 0.001, |
| "loss": 2.1814, |
| "step": 63000 |
| }, |
| { |
| "epoch": 20.394311570782158, |
| "grad_norm": 1.49150812625885, |
| "learning_rate": 0.001, |
| "loss": 2.156, |
| "step": 63100 |
| }, |
| { |
| "epoch": 20.426632191338072, |
| "grad_norm": 1.056602954864502, |
| "learning_rate": 0.001, |
| "loss": 2.1691, |
| "step": 63200 |
| }, |
| { |
| "epoch": 20.458952811893987, |
| "grad_norm": 1.071666955947876, |
| "learning_rate": 0.001, |
| "loss": 2.1601, |
| "step": 63300 |
| }, |
| { |
| "epoch": 20.4912734324499, |
| "grad_norm": 1.1927623748779297, |
| "learning_rate": 0.001, |
| "loss": 2.1771, |
| "step": 63400 |
| }, |
| { |
| "epoch": 20.52359405300582, |
| "grad_norm": 1.1696590185165405, |
| "learning_rate": 0.001, |
| "loss": 2.1663, |
| "step": 63500 |
| }, |
| { |
| "epoch": 20.555914673561734, |
| "grad_norm": 1.454006314277649, |
| "learning_rate": 0.001, |
| "loss": 2.1836, |
| "step": 63600 |
| }, |
| { |
| "epoch": 20.58823529411765, |
| "grad_norm": 1.0862654447555542, |
| "learning_rate": 0.001, |
| "loss": 2.1822, |
| "step": 63700 |
| }, |
| { |
| "epoch": 20.620555914673563, |
| "grad_norm": 1.1355229616165161, |
| "learning_rate": 0.001, |
| "loss": 2.1819, |
| "step": 63800 |
| }, |
| { |
| "epoch": 20.652876535229478, |
| "grad_norm": 1.1009161472320557, |
| "learning_rate": 0.001, |
| "loss": 2.1741, |
| "step": 63900 |
| }, |
| { |
| "epoch": 20.685197155785392, |
| "grad_norm": 1.2176330089569092, |
| "learning_rate": 0.001, |
| "loss": 2.1882, |
| "step": 64000 |
| }, |
| { |
| "epoch": 20.717517776341307, |
| "grad_norm": 1.3134511709213257, |
| "learning_rate": 0.001, |
| "loss": 2.1978, |
| "step": 64100 |
| }, |
| { |
| "epoch": 20.74983839689722, |
| "grad_norm": 1.0069459676742554, |
| "learning_rate": 0.001, |
| "loss": 2.2084, |
| "step": 64200 |
| }, |
| { |
| "epoch": 20.782159017453136, |
| "grad_norm": 1.4432331323623657, |
| "learning_rate": 0.001, |
| "loss": 2.1979, |
| "step": 64300 |
| }, |
| { |
| "epoch": 20.81447963800905, |
| "grad_norm": 1.021673321723938, |
| "learning_rate": 0.001, |
| "loss": 2.2206, |
| "step": 64400 |
| }, |
| { |
| "epoch": 20.846800258564965, |
| "grad_norm": 1.5848689079284668, |
| "learning_rate": 0.001, |
| "loss": 2.2038, |
| "step": 64500 |
| }, |
| { |
| "epoch": 20.87912087912088, |
| "grad_norm": 1.2562905550003052, |
| "learning_rate": 0.001, |
| "loss": 2.2304, |
| "step": 64600 |
| }, |
| { |
| "epoch": 20.911441499676794, |
| "grad_norm": 1.084649920463562, |
| "learning_rate": 0.001, |
| "loss": 2.2199, |
| "step": 64700 |
| }, |
| { |
| "epoch": 20.94376212023271, |
| "grad_norm": 1.2564764022827148, |
| "learning_rate": 0.001, |
| "loss": 2.2264, |
| "step": 64800 |
| }, |
| { |
| "epoch": 20.976082740788623, |
| "grad_norm": 1.4754396677017212, |
| "learning_rate": 0.001, |
| "loss": 2.2299, |
| "step": 64900 |
| }, |
| { |
| "epoch": 21.008403361344538, |
| "grad_norm": 1.1850255727767944, |
| "learning_rate": 0.001, |
| "loss": 2.1639, |
| "step": 65000 |
| }, |
| { |
| "epoch": 21.040723981900452, |
| "grad_norm": 1.1101585626602173, |
| "learning_rate": 0.001, |
| "loss": 2.0785, |
| "step": 65100 |
| }, |
| { |
| "epoch": 21.073044602456367, |
| "grad_norm": 1.2771697044372559, |
| "learning_rate": 0.001, |
| "loss": 2.084, |
| "step": 65200 |
| }, |
| { |
| "epoch": 21.10536522301228, |
| "grad_norm": 1.1103767156600952, |
| "learning_rate": 0.001, |
| "loss": 2.0889, |
| "step": 65300 |
| }, |
| { |
| "epoch": 21.137685843568196, |
| "grad_norm": 1.2200546264648438, |
| "learning_rate": 0.001, |
| "loss": 2.0993, |
| "step": 65400 |
| }, |
| { |
| "epoch": 21.17000646412411, |
| "grad_norm": 1.3447659015655518, |
| "learning_rate": 0.001, |
| "loss": 2.0916, |
| "step": 65500 |
| }, |
| { |
| "epoch": 21.202327084680025, |
| "grad_norm": 2.29350209236145, |
| "learning_rate": 0.001, |
| "loss": 2.1049, |
| "step": 65600 |
| }, |
| { |
| "epoch": 21.23464770523594, |
| "grad_norm": 1.195257306098938, |
| "learning_rate": 0.001, |
| "loss": 2.1069, |
| "step": 65700 |
| }, |
| { |
| "epoch": 21.266968325791854, |
| "grad_norm": 1.0652481317520142, |
| "learning_rate": 0.001, |
| "loss": 2.1048, |
| "step": 65800 |
| }, |
| { |
| "epoch": 21.29928894634777, |
| "grad_norm": 1.1504040956497192, |
| "learning_rate": 0.001, |
| "loss": 2.1239, |
| "step": 65900 |
| }, |
| { |
| "epoch": 21.331609566903683, |
| "grad_norm": 1.2053735256195068, |
| "learning_rate": 0.001, |
| "loss": 2.1326, |
| "step": 66000 |
| }, |
| { |
| "epoch": 21.363930187459598, |
| "grad_norm": 39.07048034667969, |
| "learning_rate": 0.001, |
| "loss": 2.112, |
| "step": 66100 |
| }, |
| { |
| "epoch": 21.396250808015512, |
| "grad_norm": 1.1385326385498047, |
| "learning_rate": 0.001, |
| "loss": 2.1137, |
| "step": 66200 |
| }, |
| { |
| "epoch": 21.428571428571427, |
| "grad_norm": 1.2207857370376587, |
| "learning_rate": 0.001, |
| "loss": 2.125, |
| "step": 66300 |
| }, |
| { |
| "epoch": 21.46089204912734, |
| "grad_norm": 1.2614213228225708, |
| "learning_rate": 0.001, |
| "loss": 2.1434, |
| "step": 66400 |
| }, |
| { |
| "epoch": 21.49321266968326, |
| "grad_norm": 1.3514631986618042, |
| "learning_rate": 0.001, |
| "loss": 2.1106, |
| "step": 66500 |
| }, |
| { |
| "epoch": 21.525533290239174, |
| "grad_norm": 1.471451759338379, |
| "learning_rate": 0.001, |
| "loss": 2.1038, |
| "step": 66600 |
| }, |
| { |
| "epoch": 21.55785391079509, |
| "grad_norm": 1.3486419916152954, |
| "learning_rate": 0.001, |
| "loss": 2.1398, |
| "step": 66700 |
| }, |
| { |
| "epoch": 21.590174531351003, |
| "grad_norm": 3.350062847137451, |
| "learning_rate": 0.001, |
| "loss": 2.1478, |
| "step": 66800 |
| }, |
| { |
| "epoch": 21.622495151906918, |
| "grad_norm": 1.3389320373535156, |
| "learning_rate": 0.001, |
| "loss": 2.1472, |
| "step": 66900 |
| }, |
| { |
| "epoch": 21.654815772462833, |
| "grad_norm": 1.0626788139343262, |
| "learning_rate": 0.001, |
| "loss": 2.1595, |
| "step": 67000 |
| }, |
| { |
| "epoch": 21.687136393018747, |
| "grad_norm": 1.2367748022079468, |
| "learning_rate": 0.001, |
| "loss": 2.1655, |
| "step": 67100 |
| }, |
| { |
| "epoch": 21.71945701357466, |
| "grad_norm": 1.1122276782989502, |
| "learning_rate": 0.001, |
| "loss": 2.179, |
| "step": 67200 |
| }, |
| { |
| "epoch": 21.751777634130576, |
| "grad_norm": 1.179870367050171, |
| "learning_rate": 0.001, |
| "loss": 2.1743, |
| "step": 67300 |
| }, |
| { |
| "epoch": 21.78409825468649, |
| "grad_norm": 1.1807243824005127, |
| "learning_rate": 0.001, |
| "loss": 2.1917, |
| "step": 67400 |
| }, |
| { |
| "epoch": 21.816418875242405, |
| "grad_norm": 1.0804619789123535, |
| "learning_rate": 0.001, |
| "loss": 2.192, |
| "step": 67500 |
| }, |
| { |
| "epoch": 21.84873949579832, |
| "grad_norm": 1.6039589643478394, |
| "learning_rate": 0.001, |
| "loss": 2.1808, |
| "step": 67600 |
| }, |
| { |
| "epoch": 21.881060116354234, |
| "grad_norm": 1.2812756299972534, |
| "learning_rate": 0.001, |
| "loss": 2.1729, |
| "step": 67700 |
| }, |
| { |
| "epoch": 21.91338073691015, |
| "grad_norm": 1.1737068891525269, |
| "learning_rate": 0.001, |
| "loss": 2.1592, |
| "step": 67800 |
| }, |
| { |
| "epoch": 21.945701357466064, |
| "grad_norm": 1.1612744331359863, |
| "learning_rate": 0.001, |
| "loss": 2.1783, |
| "step": 67900 |
| }, |
| { |
| "epoch": 21.978021978021978, |
| "grad_norm": 1.238431692123413, |
| "learning_rate": 0.001, |
| "loss": 2.1802, |
| "step": 68000 |
| }, |
| { |
| "epoch": 22.010342598577893, |
| "grad_norm": 1.3498260974884033, |
| "learning_rate": 0.001, |
| "loss": 2.1187, |
| "step": 68100 |
| }, |
| { |
| "epoch": 22.042663219133807, |
| "grad_norm": 1.307900309562683, |
| "learning_rate": 0.001, |
| "loss": 2.0347, |
| "step": 68200 |
| }, |
| { |
| "epoch": 22.07498383968972, |
| "grad_norm": 1.265341877937317, |
| "learning_rate": 0.001, |
| "loss": 2.0262, |
| "step": 68300 |
| }, |
| { |
| "epoch": 22.107304460245636, |
| "grad_norm": 1.0917607545852661, |
| "learning_rate": 0.001, |
| "loss": 2.0515, |
| "step": 68400 |
| }, |
| { |
| "epoch": 22.13962508080155, |
| "grad_norm": 1.6194117069244385, |
| "learning_rate": 0.001, |
| "loss": 2.0563, |
| "step": 68500 |
| }, |
| { |
| "epoch": 22.171945701357465, |
| "grad_norm": 1.8932991027832031, |
| "learning_rate": 0.001, |
| "loss": 2.0652, |
| "step": 68600 |
| }, |
| { |
| "epoch": 22.20426632191338, |
| "grad_norm": 1.2356934547424316, |
| "learning_rate": 0.001, |
| "loss": 2.0741, |
| "step": 68700 |
| }, |
| { |
| "epoch": 22.236586942469295, |
| "grad_norm": 1.2971307039260864, |
| "learning_rate": 0.001, |
| "loss": 2.0682, |
| "step": 68800 |
| }, |
| { |
| "epoch": 22.26890756302521, |
| "grad_norm": 1.5780755281448364, |
| "learning_rate": 0.001, |
| "loss": 2.0567, |
| "step": 68900 |
| }, |
| { |
| "epoch": 22.301228183581124, |
| "grad_norm": 1.664420247077942, |
| "learning_rate": 0.001, |
| "loss": 2.0788, |
| "step": 69000 |
| }, |
| { |
| "epoch": 22.33354880413704, |
| "grad_norm": 1.3689608573913574, |
| "learning_rate": 0.001, |
| "loss": 2.0612, |
| "step": 69100 |
| }, |
| { |
| "epoch": 22.365869424692953, |
| "grad_norm": 1.2644816637039185, |
| "learning_rate": 0.001, |
| "loss": 2.1076, |
| "step": 69200 |
| }, |
| { |
| "epoch": 22.398190045248867, |
| "grad_norm": 1.6748441457748413, |
| "learning_rate": 0.001, |
| "loss": 2.1104, |
| "step": 69300 |
| }, |
| { |
| "epoch": 22.430510665804782, |
| "grad_norm": 1.0100698471069336, |
| "learning_rate": 0.001, |
| "loss": 2.1049, |
| "step": 69400 |
| }, |
| { |
| "epoch": 22.462831286360696, |
| "grad_norm": 1.4298042058944702, |
| "learning_rate": 0.001, |
| "loss": 2.1022, |
| "step": 69500 |
| }, |
| { |
| "epoch": 22.49515190691661, |
| "grad_norm": 1.8333765268325806, |
| "learning_rate": 0.001, |
| "loss": 2.12, |
| "step": 69600 |
| }, |
| { |
| "epoch": 22.52747252747253, |
| "grad_norm": 1.4487437009811401, |
| "learning_rate": 0.001, |
| "loss": 2.1007, |
| "step": 69700 |
| }, |
| { |
| "epoch": 22.559793148028444, |
| "grad_norm": 1.6013681888580322, |
| "learning_rate": 0.001, |
| "loss": 2.1079, |
| "step": 69800 |
| }, |
| { |
| "epoch": 22.59211376858436, |
| "grad_norm": 2.3832428455352783, |
| "learning_rate": 0.001, |
| "loss": 2.1165, |
| "step": 69900 |
| }, |
| { |
| "epoch": 22.624434389140273, |
| "grad_norm": 1.600501298904419, |
| "learning_rate": 0.001, |
| "loss": 2.1214, |
| "step": 70000 |
| }, |
| { |
| "epoch": 22.656755009696187, |
| "grad_norm": 1.5591310262680054, |
| "learning_rate": 0.001, |
| "loss": 2.115, |
| "step": 70100 |
| }, |
| { |
| "epoch": 22.689075630252102, |
| "grad_norm": 1.2109787464141846, |
| "learning_rate": 0.001, |
| "loss": 2.1442, |
| "step": 70200 |
| }, |
| { |
| "epoch": 22.721396250808017, |
| "grad_norm": 1.465110421180725, |
| "learning_rate": 0.001, |
| "loss": 2.1413, |
| "step": 70300 |
| }, |
| { |
| "epoch": 22.75371687136393, |
| "grad_norm": 1.2152010202407837, |
| "learning_rate": 0.001, |
| "loss": 2.1242, |
| "step": 70400 |
| }, |
| { |
| "epoch": 22.786037491919846, |
| "grad_norm": 1.4363352060317993, |
| "learning_rate": 0.001, |
| "loss": 2.1379, |
| "step": 70500 |
| }, |
| { |
| "epoch": 22.81835811247576, |
| "grad_norm": 1.399573564529419, |
| "learning_rate": 0.001, |
| "loss": 2.138, |
| "step": 70600 |
| }, |
| { |
| "epoch": 22.850678733031675, |
| "grad_norm": 1.378006100654602, |
| "learning_rate": 0.001, |
| "loss": 2.1285, |
| "step": 70700 |
| }, |
| { |
| "epoch": 22.88299935358759, |
| "grad_norm": 1.274100422859192, |
| "learning_rate": 0.001, |
| "loss": 2.1528, |
| "step": 70800 |
| }, |
| { |
| "epoch": 22.915319974143504, |
| "grad_norm": 1.2786856889724731, |
| "learning_rate": 0.001, |
| "loss": 2.1371, |
| "step": 70900 |
| }, |
| { |
| "epoch": 22.94764059469942, |
| "grad_norm": 1.3367137908935547, |
| "learning_rate": 0.001, |
| "loss": 2.1356, |
| "step": 71000 |
| }, |
| { |
| "epoch": 22.979961215255333, |
| "grad_norm": 1.1747994422912598, |
| "learning_rate": 0.001, |
| "loss": 2.1513, |
| "step": 71100 |
| }, |
| { |
| "epoch": 23.012281835811248, |
| "grad_norm": 1.472936987876892, |
| "learning_rate": 0.001, |
| "loss": 2.0644, |
| "step": 71200 |
| }, |
| { |
| "epoch": 23.044602456367162, |
| "grad_norm": 1.5637643337249756, |
| "learning_rate": 0.001, |
| "loss": 2.0074, |
| "step": 71300 |
| }, |
| { |
| "epoch": 23.076923076923077, |
| "grad_norm": 1.5909124612808228, |
| "learning_rate": 0.001, |
| "loss": 1.99, |
| "step": 71400 |
| }, |
| { |
| "epoch": 23.10924369747899, |
| "grad_norm": 1.779449462890625, |
| "learning_rate": 0.001, |
| "loss": 2.0129, |
| "step": 71500 |
| }, |
| { |
| "epoch": 23.141564318034906, |
| "grad_norm": 1.4273805618286133, |
| "learning_rate": 0.001, |
| "loss": 2.0336, |
| "step": 71600 |
| }, |
| { |
| "epoch": 23.17388493859082, |
| "grad_norm": 1.4807401895523071, |
| "learning_rate": 0.001, |
| "loss": 2.0258, |
| "step": 71700 |
| }, |
| { |
| "epoch": 23.206205559146735, |
| "grad_norm": 1.5846195220947266, |
| "learning_rate": 0.001, |
| "loss": 2.0398, |
| "step": 71800 |
| }, |
| { |
| "epoch": 23.23852617970265, |
| "grad_norm": 1.2522149085998535, |
| "learning_rate": 0.001, |
| "loss": 2.0358, |
| "step": 71900 |
| }, |
| { |
| "epoch": 23.270846800258564, |
| "grad_norm": 1.8011384010314941, |
| "learning_rate": 0.001, |
| "loss": 2.0368, |
| "step": 72000 |
| }, |
| { |
| "epoch": 23.30316742081448, |
| "grad_norm": 1.339313268661499, |
| "learning_rate": 0.001, |
| "loss": 2.0504, |
| "step": 72100 |
| }, |
| { |
| "epoch": 23.335488041370393, |
| "grad_norm": 1.317734956741333, |
| "learning_rate": 0.001, |
| "loss": 2.0429, |
| "step": 72200 |
| }, |
| { |
| "epoch": 23.367808661926308, |
| "grad_norm": 1.351259708404541, |
| "learning_rate": 0.001, |
| "loss": 2.0514, |
| "step": 72300 |
| }, |
| { |
| "epoch": 23.400129282482222, |
| "grad_norm": 1.3582738637924194, |
| "learning_rate": 0.001, |
| "loss": 2.0577, |
| "step": 72400 |
| }, |
| { |
| "epoch": 23.432449903038137, |
| "grad_norm": 1.813624382019043, |
| "learning_rate": 0.001, |
| "loss": 2.0445, |
| "step": 72500 |
| }, |
| { |
| "epoch": 23.46477052359405, |
| "grad_norm": 1.531417727470398, |
| "learning_rate": 0.001, |
| "loss": 2.0614, |
| "step": 72600 |
| }, |
| { |
| "epoch": 23.49709114414997, |
| "grad_norm": 1.5919642448425293, |
| "learning_rate": 0.001, |
| "loss": 2.0691, |
| "step": 72700 |
| }, |
| { |
| "epoch": 23.529411764705884, |
| "grad_norm": 1.5767107009887695, |
| "learning_rate": 0.001, |
| "loss": 2.0789, |
| "step": 72800 |
| }, |
| { |
| "epoch": 23.5617323852618, |
| "grad_norm": 1.5957386493682861, |
| "learning_rate": 0.001, |
| "loss": 2.067, |
| "step": 72900 |
| }, |
| { |
| "epoch": 23.594053005817713, |
| "grad_norm": 1.8179656267166138, |
| "learning_rate": 0.001, |
| "loss": 2.0862, |
| "step": 73000 |
| }, |
| { |
| "epoch": 23.626373626373628, |
| "grad_norm": 1.5586670637130737, |
| "learning_rate": 0.001, |
| "loss": 2.0848, |
| "step": 73100 |
| }, |
| { |
| "epoch": 23.658694246929542, |
| "grad_norm": 1.4760098457336426, |
| "learning_rate": 0.001, |
| "loss": 2.106, |
| "step": 73200 |
| }, |
| { |
| "epoch": 23.691014867485457, |
| "grad_norm": 1.4071135520935059, |
| "learning_rate": 0.001, |
| "loss": 2.0928, |
| "step": 73300 |
| }, |
| { |
| "epoch": 23.72333548804137, |
| "grad_norm": 1.3541771173477173, |
| "learning_rate": 0.001, |
| "loss": 2.1112, |
| "step": 73400 |
| }, |
| { |
| "epoch": 23.755656108597286, |
| "grad_norm": 1.6055703163146973, |
| "learning_rate": 0.001, |
| "loss": 2.1058, |
| "step": 73500 |
| }, |
| { |
| "epoch": 23.7879767291532, |
| "grad_norm": 1.7289507389068604, |
| "learning_rate": 0.001, |
| "loss": 2.1219, |
| "step": 73600 |
| }, |
| { |
| "epoch": 23.820297349709115, |
| "grad_norm": 1.8029732704162598, |
| "learning_rate": 0.001, |
| "loss": 2.083, |
| "step": 73700 |
| }, |
| { |
| "epoch": 23.85261797026503, |
| "grad_norm": 1.8605992794036865, |
| "learning_rate": 0.001, |
| "loss": 2.103, |
| "step": 73800 |
| }, |
| { |
| "epoch": 23.884938590820944, |
| "grad_norm": 1.1460589170455933, |
| "learning_rate": 0.001, |
| "loss": 2.0971, |
| "step": 73900 |
| }, |
| { |
| "epoch": 23.91725921137686, |
| "grad_norm": 1.4249467849731445, |
| "learning_rate": 0.001, |
| "loss": 2.1331, |
| "step": 74000 |
| }, |
| { |
| "epoch": 23.949579831932773, |
| "grad_norm": 1.4224003553390503, |
| "learning_rate": 0.001, |
| "loss": 2.1221, |
| "step": 74100 |
| }, |
| { |
| "epoch": 23.981900452488688, |
| "grad_norm": 1.3229635953903198, |
| "learning_rate": 0.001, |
| "loss": 2.1182, |
| "step": 74200 |
| }, |
| { |
| "epoch": 24.014221073044602, |
| "grad_norm": 1.3930984735488892, |
| "learning_rate": 0.001, |
| "loss": 2.0503, |
| "step": 74300 |
| }, |
| { |
| "epoch": 24.046541693600517, |
| "grad_norm": 1.4770911931991577, |
| "learning_rate": 0.001, |
| "loss": 1.9697, |
| "step": 74400 |
| }, |
| { |
| "epoch": 24.07886231415643, |
| "grad_norm": 1.6999855041503906, |
| "learning_rate": 0.001, |
| "loss": 1.9748, |
| "step": 74500 |
| }, |
| { |
| "epoch": 24.111182934712346, |
| "grad_norm": 1.63511061668396, |
| "learning_rate": 0.001, |
| "loss": 1.9672, |
| "step": 74600 |
| }, |
| { |
| "epoch": 24.14350355526826, |
| "grad_norm": 1.4599164724349976, |
| "learning_rate": 0.001, |
| "loss": 1.9993, |
| "step": 74700 |
| }, |
| { |
| "epoch": 24.175824175824175, |
| "grad_norm": 1.9511414766311646, |
| "learning_rate": 0.001, |
| "loss": 2.0237, |
| "step": 74800 |
| }, |
| { |
| "epoch": 24.20814479638009, |
| "grad_norm": 1.46794593334198, |
| "learning_rate": 0.001, |
| "loss": 1.985, |
| "step": 74900 |
| }, |
| { |
| "epoch": 24.240465416936004, |
| "grad_norm": 1.2807059288024902, |
| "learning_rate": 0.001, |
| "loss": 2.0034, |
| "step": 75000 |
| }, |
| { |
| "epoch": 24.27278603749192, |
| "grad_norm": 1.7410281896591187, |
| "learning_rate": 0.001, |
| "loss": 2.024, |
| "step": 75100 |
| }, |
| { |
| "epoch": 24.305106658047833, |
| "grad_norm": 1.439113736152649, |
| "learning_rate": 0.001, |
| "loss": 2.0233, |
| "step": 75200 |
| }, |
| { |
| "epoch": 24.337427278603748, |
| "grad_norm": 1.419325351715088, |
| "learning_rate": 0.001, |
| "loss": 2.0295, |
| "step": 75300 |
| }, |
| { |
| "epoch": 24.369747899159663, |
| "grad_norm": 1.564228892326355, |
| "learning_rate": 0.001, |
| "loss": 2.0153, |
| "step": 75400 |
| }, |
| { |
| "epoch": 24.402068519715577, |
| "grad_norm": 1.38504159450531, |
| "learning_rate": 0.001, |
| "loss": 2.0205, |
| "step": 75500 |
| }, |
| { |
| "epoch": 24.43438914027149, |
| "grad_norm": 1.453363060951233, |
| "learning_rate": 0.001, |
| "loss": 2.0271, |
| "step": 75600 |
| }, |
| { |
| "epoch": 24.466709760827406, |
| "grad_norm": 1.6083861589431763, |
| "learning_rate": 0.001, |
| "loss": 2.014, |
| "step": 75700 |
| }, |
| { |
| "epoch": 24.49903038138332, |
| "grad_norm": 1.7092853784561157, |
| "learning_rate": 0.001, |
| "loss": 2.0424, |
| "step": 75800 |
| }, |
| { |
| "epoch": 24.53135100193924, |
| "grad_norm": 1.4002851247787476, |
| "learning_rate": 0.001, |
| "loss": 2.0249, |
| "step": 75900 |
| }, |
| { |
| "epoch": 24.563671622495153, |
| "grad_norm": 1.5491043329238892, |
| "learning_rate": 0.001, |
| "loss": 2.0572, |
| "step": 76000 |
| }, |
| { |
| "epoch": 24.595992243051068, |
| "grad_norm": 1.3030824661254883, |
| "learning_rate": 0.001, |
| "loss": 2.0439, |
| "step": 76100 |
| }, |
| { |
| "epoch": 24.628312863606983, |
| "grad_norm": 1.2916637659072876, |
| "learning_rate": 0.001, |
| "loss": 2.0471, |
| "step": 76200 |
| }, |
| { |
| "epoch": 24.660633484162897, |
| "grad_norm": 1.4240041971206665, |
| "learning_rate": 0.001, |
| "loss": 2.0634, |
| "step": 76300 |
| }, |
| { |
| "epoch": 24.69295410471881, |
| "grad_norm": 1.753969669342041, |
| "learning_rate": 0.001, |
| "loss": 2.0657, |
| "step": 76400 |
| }, |
| { |
| "epoch": 24.725274725274726, |
| "grad_norm": 1.2906912565231323, |
| "learning_rate": 0.001, |
| "loss": 2.0597, |
| "step": 76500 |
| }, |
| { |
| "epoch": 24.75759534583064, |
| "grad_norm": 1.5350031852722168, |
| "learning_rate": 0.001, |
| "loss": 2.0839, |
| "step": 76600 |
| }, |
| { |
| "epoch": 24.789915966386555, |
| "grad_norm": 1.9816159009933472, |
| "learning_rate": 0.001, |
| "loss": 2.057, |
| "step": 76700 |
| }, |
| { |
| "epoch": 24.82223658694247, |
| "grad_norm": 1.9255553483963013, |
| "learning_rate": 0.001, |
| "loss": 2.0699, |
| "step": 76800 |
| }, |
| { |
| "epoch": 24.854557207498384, |
| "grad_norm": 1.4984052181243896, |
| "learning_rate": 0.001, |
| "loss": 2.0713, |
| "step": 76900 |
| }, |
| { |
| "epoch": 24.8868778280543, |
| "grad_norm": 4.322579860687256, |
| "learning_rate": 0.001, |
| "loss": 2.1062, |
| "step": 77000 |
| }, |
| { |
| "epoch": 24.919198448610214, |
| "grad_norm": 1.597294569015503, |
| "learning_rate": 0.001, |
| "loss": 2.0879, |
| "step": 77100 |
| }, |
| { |
| "epoch": 24.951519069166128, |
| "grad_norm": 1.4483535289764404, |
| "learning_rate": 0.001, |
| "loss": 2.0873, |
| "step": 77200 |
| }, |
| { |
| "epoch": 24.983839689722043, |
| "grad_norm": 1.4084569215774536, |
| "learning_rate": 0.001, |
| "loss": 2.0913, |
| "step": 77300 |
| }, |
| { |
| "epoch": 25.016160310277957, |
| "grad_norm": 1.9849538803100586, |
| "learning_rate": 0.001, |
| "loss": 1.9734, |
| "step": 77400 |
| }, |
| { |
| "epoch": 25.048480930833872, |
| "grad_norm": 2.1516408920288086, |
| "learning_rate": 0.001, |
| "loss": 1.9801, |
| "step": 77500 |
| }, |
| { |
| "epoch": 25.080801551389786, |
| "grad_norm": 2.2661306858062744, |
| "learning_rate": 0.001, |
| "loss": 1.937, |
| "step": 77600 |
| }, |
| { |
| "epoch": 25.1131221719457, |
| "grad_norm": 2.510815382003784, |
| "learning_rate": 0.001, |
| "loss": 1.9473, |
| "step": 77700 |
| }, |
| { |
| "epoch": 25.145442792501616, |
| "grad_norm": 2.2111470699310303, |
| "learning_rate": 0.001, |
| "loss": 1.96, |
| "step": 77800 |
| }, |
| { |
| "epoch": 25.17776341305753, |
| "grad_norm": 2.111010789871216, |
| "learning_rate": 0.001, |
| "loss": 1.9912, |
| "step": 77900 |
| }, |
| { |
| "epoch": 25.210084033613445, |
| "grad_norm": 2.753941297531128, |
| "learning_rate": 0.001, |
| "loss": 1.9919, |
| "step": 78000 |
| }, |
| { |
| "epoch": 25.24240465416936, |
| "grad_norm": 2.347527027130127, |
| "learning_rate": 0.001, |
| "loss": 1.9843, |
| "step": 78100 |
| }, |
| { |
| "epoch": 25.274725274725274, |
| "grad_norm": 2.777312755584717, |
| "learning_rate": 0.001, |
| "loss": 1.9692, |
| "step": 78200 |
| }, |
| { |
| "epoch": 25.30704589528119, |
| "grad_norm": 1.7811833620071411, |
| "learning_rate": 0.001, |
| "loss": 2.0007, |
| "step": 78300 |
| }, |
| { |
| "epoch": 25.339366515837103, |
| "grad_norm": 2.1932690143585205, |
| "learning_rate": 0.001, |
| "loss": 2.0012, |
| "step": 78400 |
| }, |
| { |
| "epoch": 25.371687136393017, |
| "grad_norm": 2.195629596710205, |
| "learning_rate": 0.001, |
| "loss": 1.9921, |
| "step": 78500 |
| }, |
| { |
| "epoch": 25.404007756948932, |
| "grad_norm": 2.693999767303467, |
| "learning_rate": 0.001, |
| "loss": 2.0042, |
| "step": 78600 |
| }, |
| { |
| "epoch": 25.436328377504847, |
| "grad_norm": 3.0932207107543945, |
| "learning_rate": 0.001, |
| "loss": 1.9884, |
| "step": 78700 |
| }, |
| { |
| "epoch": 25.46864899806076, |
| "grad_norm": 2.486372232437134, |
| "learning_rate": 0.001, |
| "loss": 2.0082, |
| "step": 78800 |
| }, |
| { |
| "epoch": 25.50096961861668, |
| "grad_norm": 2.309953212738037, |
| "learning_rate": 0.001, |
| "loss": 1.9836, |
| "step": 78900 |
| }, |
| { |
| "epoch": 25.533290239172594, |
| "grad_norm": 2.481170892715454, |
| "learning_rate": 0.001, |
| "loss": 2.0401, |
| "step": 79000 |
| }, |
| { |
| "epoch": 25.56561085972851, |
| "grad_norm": 6.894639492034912, |
| "learning_rate": 0.001, |
| "loss": 2.0187, |
| "step": 79100 |
| }, |
| { |
| "epoch": 25.597931480284423, |
| "grad_norm": 2.154688596725464, |
| "learning_rate": 0.001, |
| "loss": 2.0334, |
| "step": 79200 |
| }, |
| { |
| "epoch": 25.630252100840337, |
| "grad_norm": 2.5269687175750732, |
| "learning_rate": 0.001, |
| "loss": 2.0062, |
| "step": 79300 |
| }, |
| { |
| "epoch": 25.662572721396252, |
| "grad_norm": 2.0479533672332764, |
| "learning_rate": 0.001, |
| "loss": 2.0332, |
| "step": 79400 |
| }, |
| { |
| "epoch": 25.694893341952167, |
| "grad_norm": 2.8446123600006104, |
| "learning_rate": 0.001, |
| "loss": 2.0332, |
| "step": 79500 |
| }, |
| { |
| "epoch": 25.72721396250808, |
| "grad_norm": 1.9865922927856445, |
| "learning_rate": 0.001, |
| "loss": 2.0408, |
| "step": 79600 |
| }, |
| { |
| "epoch": 25.759534583063996, |
| "grad_norm": 2.4870991706848145, |
| "learning_rate": 0.001, |
| "loss": 2.0251, |
| "step": 79700 |
| }, |
| { |
| "epoch": 25.79185520361991, |
| "grad_norm": 2.0632777214050293, |
| "learning_rate": 0.001, |
| "loss": 2.0453, |
| "step": 79800 |
| }, |
| { |
| "epoch": 25.824175824175825, |
| "grad_norm": 2.7556283473968506, |
| "learning_rate": 0.001, |
| "loss": 2.0285, |
| "step": 79900 |
| }, |
| { |
| "epoch": 25.85649644473174, |
| "grad_norm": 2.0263702869415283, |
| "learning_rate": 0.001, |
| "loss": 2.0623, |
| "step": 80000 |
| }, |
| { |
| "epoch": 25.888817065287654, |
| "grad_norm": 3.15863299369812, |
| "learning_rate": 0.001, |
| "loss": 2.0461, |
| "step": 80100 |
| }, |
| { |
| "epoch": 25.92113768584357, |
| "grad_norm": 2.3202335834503174, |
| "learning_rate": 0.001, |
| "loss": 2.0489, |
| "step": 80200 |
| }, |
| { |
| "epoch": 25.953458306399483, |
| "grad_norm": 2.275404691696167, |
| "learning_rate": 0.001, |
| "loss": 2.0559, |
| "step": 80300 |
| }, |
| { |
| "epoch": 25.985778926955398, |
| "grad_norm": 1.9701374769210815, |
| "learning_rate": 0.001, |
| "loss": 2.0521, |
| "step": 80400 |
| }, |
| { |
| "epoch": 26.018099547511312, |
| "grad_norm": 2.0686566829681396, |
| "learning_rate": 0.001, |
| "loss": 1.982, |
| "step": 80500 |
| }, |
| { |
| "epoch": 26.050420168067227, |
| "grad_norm": 1.7882318496704102, |
| "learning_rate": 0.001, |
| "loss": 1.9092, |
| "step": 80600 |
| }, |
| { |
| "epoch": 26.08274078862314, |
| "grad_norm": 1.3887428045272827, |
| "learning_rate": 0.001, |
| "loss": 1.9209, |
| "step": 80700 |
| }, |
| { |
| "epoch": 26.115061409179056, |
| "grad_norm": 1.775455117225647, |
| "learning_rate": 0.001, |
| "loss": 1.935, |
| "step": 80800 |
| }, |
| { |
| "epoch": 26.14738202973497, |
| "grad_norm": 1.9676622152328491, |
| "learning_rate": 0.001, |
| "loss": 1.9301, |
| "step": 80900 |
| }, |
| { |
| "epoch": 26.179702650290885, |
| "grad_norm": 1.7484667301177979, |
| "learning_rate": 0.001, |
| "loss": 1.9365, |
| "step": 81000 |
| }, |
| { |
| "epoch": 26.2120232708468, |
| "grad_norm": 1.333925485610962, |
| "learning_rate": 0.001, |
| "loss": 1.9396, |
| "step": 81100 |
| }, |
| { |
| "epoch": 26.244343891402714, |
| "grad_norm": 1.3024888038635254, |
| "learning_rate": 0.001, |
| "loss": 1.9488, |
| "step": 81200 |
| }, |
| { |
| "epoch": 26.27666451195863, |
| "grad_norm": 1.3314354419708252, |
| "learning_rate": 0.001, |
| "loss": 1.9625, |
| "step": 81300 |
| }, |
| { |
| "epoch": 26.308985132514543, |
| "grad_norm": 2.3453688621520996, |
| "learning_rate": 0.001, |
| "loss": 1.9588, |
| "step": 81400 |
| }, |
| { |
| "epoch": 26.341305753070458, |
| "grad_norm": 1.459130048751831, |
| "learning_rate": 0.001, |
| "loss": 1.9716, |
| "step": 81500 |
| }, |
| { |
| "epoch": 26.373626373626372, |
| "grad_norm": 1.3043862581253052, |
| "learning_rate": 0.001, |
| "loss": 1.9662, |
| "step": 81600 |
| }, |
| { |
| "epoch": 26.405946994182287, |
| "grad_norm": 1.2890630960464478, |
| "learning_rate": 0.001, |
| "loss": 1.9729, |
| "step": 81700 |
| }, |
| { |
| "epoch": 26.4382676147382, |
| "grad_norm": 1.2323054075241089, |
| "learning_rate": 0.001, |
| "loss": 1.9562, |
| "step": 81800 |
| }, |
| { |
| "epoch": 26.470588235294116, |
| "grad_norm": 1.9107179641723633, |
| "learning_rate": 0.001, |
| "loss": 1.9665, |
| "step": 81900 |
| }, |
| { |
| "epoch": 26.50290885585003, |
| "grad_norm": 1.5742831230163574, |
| "learning_rate": 0.001, |
| "loss": 1.9809, |
| "step": 82000 |
| }, |
| { |
| "epoch": 26.53522947640595, |
| "grad_norm": 1.3814857006072998, |
| "learning_rate": 0.001, |
| "loss": 1.975, |
| "step": 82100 |
| }, |
| { |
| "epoch": 26.567550096961863, |
| "grad_norm": 1.3042103052139282, |
| "learning_rate": 0.001, |
| "loss": 1.9745, |
| "step": 82200 |
| }, |
| { |
| "epoch": 26.599870717517778, |
| "grad_norm": 1.6151447296142578, |
| "learning_rate": 0.001, |
| "loss": 1.9872, |
| "step": 82300 |
| }, |
| { |
| "epoch": 26.632191338073692, |
| "grad_norm": 1.6068259477615356, |
| "learning_rate": 0.001, |
| "loss": 1.9933, |
| "step": 82400 |
| }, |
| { |
| "epoch": 26.664511958629607, |
| "grad_norm": 1.3208508491516113, |
| "learning_rate": 0.001, |
| "loss": 2.0022, |
| "step": 82500 |
| }, |
| { |
| "epoch": 26.69683257918552, |
| "grad_norm": 1.5930817127227783, |
| "learning_rate": 0.001, |
| "loss": 1.9939, |
| "step": 82600 |
| }, |
| { |
| "epoch": 26.729153199741436, |
| "grad_norm": 1.5000683069229126, |
| "learning_rate": 0.001, |
| "loss": 2.0076, |
| "step": 82700 |
| }, |
| { |
| "epoch": 26.76147382029735, |
| "grad_norm": 1.692630410194397, |
| "learning_rate": 0.001, |
| "loss": 2.002, |
| "step": 82800 |
| }, |
| { |
| "epoch": 26.793794440853265, |
| "grad_norm": 2.1297543048858643, |
| "learning_rate": 0.001, |
| "loss": 2.024, |
| "step": 82900 |
| }, |
| { |
| "epoch": 26.82611506140918, |
| "grad_norm": 1.2182215452194214, |
| "learning_rate": 0.001, |
| "loss": 2.0362, |
| "step": 83000 |
| }, |
| { |
| "epoch": 26.858435681965094, |
| "grad_norm": 1.3465772867202759, |
| "learning_rate": 0.001, |
| "loss": 2.0525, |
| "step": 83100 |
| }, |
| { |
| "epoch": 26.89075630252101, |
| "grad_norm": 1.9355134963989258, |
| "learning_rate": 0.001, |
| "loss": 2.0377, |
| "step": 83200 |
| }, |
| { |
| "epoch": 26.923076923076923, |
| "grad_norm": 1.3993531465530396, |
| "learning_rate": 0.001, |
| "loss": 2.0204, |
| "step": 83300 |
| }, |
| { |
| "epoch": 26.955397543632838, |
| "grad_norm": 1.3632115125656128, |
| "learning_rate": 0.001, |
| "loss": 2.0247, |
| "step": 83400 |
| }, |
| { |
| "epoch": 26.987718164188752, |
| "grad_norm": 1.70760977268219, |
| "learning_rate": 0.001, |
| "loss": 2.0275, |
| "step": 83500 |
| }, |
| { |
| "epoch": 27.020038784744667, |
| "grad_norm": 1.203029751777649, |
| "learning_rate": 0.001, |
| "loss": 1.951, |
| "step": 83600 |
| }, |
| { |
| "epoch": 27.05235940530058, |
| "grad_norm": 1.2251530885696411, |
| "learning_rate": 0.001, |
| "loss": 1.8761, |
| "step": 83700 |
| }, |
| { |
| "epoch": 27.084680025856496, |
| "grad_norm": 1.2010034322738647, |
| "learning_rate": 0.001, |
| "loss": 1.8784, |
| "step": 83800 |
| }, |
| { |
| "epoch": 27.11700064641241, |
| "grad_norm": 1.628166913986206, |
| "learning_rate": 0.001, |
| "loss": 1.9073, |
| "step": 83900 |
| }, |
| { |
| "epoch": 27.149321266968325, |
| "grad_norm": 1.467311143875122, |
| "learning_rate": 0.001, |
| "loss": 1.9048, |
| "step": 84000 |
| }, |
| { |
| "epoch": 27.18164188752424, |
| "grad_norm": 1.3212134838104248, |
| "learning_rate": 0.001, |
| "loss": 1.9009, |
| "step": 84100 |
| }, |
| { |
| "epoch": 27.213962508080154, |
| "grad_norm": 1.3070082664489746, |
| "learning_rate": 0.001, |
| "loss": 1.9234, |
| "step": 84200 |
| }, |
| { |
| "epoch": 27.24628312863607, |
| "grad_norm": 1.4710814952850342, |
| "learning_rate": 0.001, |
| "loss": 1.897, |
| "step": 84300 |
| }, |
| { |
| "epoch": 27.278603749191983, |
| "grad_norm": 1.5833498239517212, |
| "learning_rate": 0.001, |
| "loss": 1.9441, |
| "step": 84400 |
| }, |
| { |
| "epoch": 27.310924369747898, |
| "grad_norm": 1.346295952796936, |
| "learning_rate": 0.001, |
| "loss": 1.9243, |
| "step": 84500 |
| }, |
| { |
| "epoch": 27.343244990303813, |
| "grad_norm": 1.4825193881988525, |
| "learning_rate": 0.001, |
| "loss": 1.948, |
| "step": 84600 |
| }, |
| { |
| "epoch": 27.375565610859727, |
| "grad_norm": 1.489837884902954, |
| "learning_rate": 0.001, |
| "loss": 1.933, |
| "step": 84700 |
| }, |
| { |
| "epoch": 27.40788623141564, |
| "grad_norm": 1.3613611459732056, |
| "learning_rate": 0.001, |
| "loss": 1.9417, |
| "step": 84800 |
| }, |
| { |
| "epoch": 27.440206851971556, |
| "grad_norm": 1.3851776123046875, |
| "learning_rate": 0.001, |
| "loss": 1.9535, |
| "step": 84900 |
| }, |
| { |
| "epoch": 27.47252747252747, |
| "grad_norm": 1.228777289390564, |
| "learning_rate": 0.001, |
| "loss": 1.9639, |
| "step": 85000 |
| }, |
| { |
| "epoch": 27.50484809308339, |
| "grad_norm": 1.362752914428711, |
| "learning_rate": 0.001, |
| "loss": 1.9513, |
| "step": 85100 |
| }, |
| { |
| "epoch": 27.537168713639304, |
| "grad_norm": 1.3782377243041992, |
| "learning_rate": 0.001, |
| "loss": 1.9598, |
| "step": 85200 |
| }, |
| { |
| "epoch": 27.569489334195218, |
| "grad_norm": 1.31719970703125, |
| "learning_rate": 0.001, |
| "loss": 1.961, |
| "step": 85300 |
| }, |
| { |
| "epoch": 27.601809954751133, |
| "grad_norm": 1.4611492156982422, |
| "learning_rate": 0.001, |
| "loss": 1.9581, |
| "step": 85400 |
| }, |
| { |
| "epoch": 27.634130575307047, |
| "grad_norm": 1.6541672945022583, |
| "learning_rate": 0.001, |
| "loss": 1.9674, |
| "step": 85500 |
| }, |
| { |
| "epoch": 27.66645119586296, |
| "grad_norm": 1.4168950319290161, |
| "learning_rate": 0.001, |
| "loss": 1.9783, |
| "step": 85600 |
| }, |
| { |
| "epoch": 27.698771816418876, |
| "grad_norm": 1.565339207649231, |
| "learning_rate": 0.001, |
| "loss": 1.9772, |
| "step": 85700 |
| }, |
| { |
| "epoch": 27.73109243697479, |
| "grad_norm": 1.4636658430099487, |
| "learning_rate": 0.001, |
| "loss": 1.9662, |
| "step": 85800 |
| }, |
| { |
| "epoch": 27.763413057530705, |
| "grad_norm": 1.1653151512145996, |
| "learning_rate": 0.001, |
| "loss": 1.9753, |
| "step": 85900 |
| }, |
| { |
| "epoch": 27.79573367808662, |
| "grad_norm": 1.3461968898773193, |
| "learning_rate": 0.001, |
| "loss": 1.9591, |
| "step": 86000 |
| }, |
| { |
| "epoch": 27.828054298642535, |
| "grad_norm": 1.2734227180480957, |
| "learning_rate": 0.001, |
| "loss": 1.975, |
| "step": 86100 |
| }, |
| { |
| "epoch": 27.86037491919845, |
| "grad_norm": 1.3196417093276978, |
| "learning_rate": 0.001, |
| "loss": 1.9847, |
| "step": 86200 |
| }, |
| { |
| "epoch": 27.892695539754364, |
| "grad_norm": 1.2176880836486816, |
| "learning_rate": 0.001, |
| "loss": 1.9904, |
| "step": 86300 |
| }, |
| { |
| "epoch": 27.92501616031028, |
| "grad_norm": 1.1071490049362183, |
| "learning_rate": 0.001, |
| "loss": 1.9906, |
| "step": 86400 |
| }, |
| { |
| "epoch": 27.957336780866193, |
| "grad_norm": 1.5895119905471802, |
| "learning_rate": 0.001, |
| "loss": 2.0066, |
| "step": 86500 |
| }, |
| { |
| "epoch": 27.989657401422107, |
| "grad_norm": 1.3792082071304321, |
| "learning_rate": 0.001, |
| "loss": 1.9941, |
| "step": 86600 |
| }, |
| { |
| "epoch": 28.021978021978022, |
| "grad_norm": 1.2426801919937134, |
| "learning_rate": 0.001, |
| "loss": 1.9242, |
| "step": 86700 |
| }, |
| { |
| "epoch": 28.054298642533936, |
| "grad_norm": 1.7085320949554443, |
| "learning_rate": 0.001, |
| "loss": 1.8433, |
| "step": 86800 |
| }, |
| { |
| "epoch": 28.08661926308985, |
| "grad_norm": 1.3165888786315918, |
| "learning_rate": 0.001, |
| "loss": 1.8714, |
| "step": 86900 |
| }, |
| { |
| "epoch": 28.118939883645766, |
| "grad_norm": 1.4661237001419067, |
| "learning_rate": 0.001, |
| "loss": 1.8749, |
| "step": 87000 |
| }, |
| { |
| "epoch": 28.15126050420168, |
| "grad_norm": 1.6082890033721924, |
| "learning_rate": 0.001, |
| "loss": 1.8612, |
| "step": 87100 |
| }, |
| { |
| "epoch": 28.183581124757595, |
| "grad_norm": 1.3356757164001465, |
| "learning_rate": 0.001, |
| "loss": 1.8897, |
| "step": 87200 |
| }, |
| { |
| "epoch": 28.21590174531351, |
| "grad_norm": 1.557093620300293, |
| "learning_rate": 0.001, |
| "loss": 1.8871, |
| "step": 87300 |
| }, |
| { |
| "epoch": 28.248222365869424, |
| "grad_norm": 1.7916589975357056, |
| "learning_rate": 0.001, |
| "loss": 1.8883, |
| "step": 87400 |
| }, |
| { |
| "epoch": 28.28054298642534, |
| "grad_norm": 1.2493481636047363, |
| "learning_rate": 0.001, |
| "loss": 1.9002, |
| "step": 87500 |
| }, |
| { |
| "epoch": 28.312863606981253, |
| "grad_norm": 1.3864846229553223, |
| "learning_rate": 0.001, |
| "loss": 1.9133, |
| "step": 87600 |
| }, |
| { |
| "epoch": 28.345184227537167, |
| "grad_norm": 1.6483922004699707, |
| "learning_rate": 0.001, |
| "loss": 1.8825, |
| "step": 87700 |
| }, |
| { |
| "epoch": 28.377504848093082, |
| "grad_norm": 1.1912819147109985, |
| "learning_rate": 0.001, |
| "loss": 1.91, |
| "step": 87800 |
| }, |
| { |
| "epoch": 28.409825468648997, |
| "grad_norm": 1.1268420219421387, |
| "learning_rate": 0.001, |
| "loss": 1.9175, |
| "step": 87900 |
| }, |
| { |
| "epoch": 28.44214608920491, |
| "grad_norm": 1.364435076713562, |
| "learning_rate": 0.001, |
| "loss": 1.9084, |
| "step": 88000 |
| }, |
| { |
| "epoch": 28.474466709760826, |
| "grad_norm": 1.3538644313812256, |
| "learning_rate": 0.001, |
| "loss": 1.8976, |
| "step": 88100 |
| }, |
| { |
| "epoch": 28.50678733031674, |
| "grad_norm": 1.308135986328125, |
| "learning_rate": 0.001, |
| "loss": 1.9203, |
| "step": 88200 |
| }, |
| { |
| "epoch": 28.53910795087266, |
| "grad_norm": 1.5055309534072876, |
| "learning_rate": 0.001, |
| "loss": 1.9177, |
| "step": 88300 |
| }, |
| { |
| "epoch": 28.571428571428573, |
| "grad_norm": 1.4888206720352173, |
| "learning_rate": 0.001, |
| "loss": 1.9213, |
| "step": 88400 |
| }, |
| { |
| "epoch": 28.603749191984488, |
| "grad_norm": 1.112297773361206, |
| "learning_rate": 0.001, |
| "loss": 1.939, |
| "step": 88500 |
| }, |
| { |
| "epoch": 28.636069812540402, |
| "grad_norm": 1.3420555591583252, |
| "learning_rate": 0.001, |
| "loss": 1.9181, |
| "step": 88600 |
| }, |
| { |
| "epoch": 28.668390433096317, |
| "grad_norm": 1.143880009651184, |
| "learning_rate": 0.001, |
| "loss": 1.9568, |
| "step": 88700 |
| }, |
| { |
| "epoch": 28.70071105365223, |
| "grad_norm": 1.6893914937973022, |
| "learning_rate": 0.001, |
| "loss": 1.9262, |
| "step": 88800 |
| }, |
| { |
| "epoch": 28.733031674208146, |
| "grad_norm": 1.6080857515335083, |
| "learning_rate": 0.001, |
| "loss": 1.929, |
| "step": 88900 |
| }, |
| { |
| "epoch": 28.76535229476406, |
| "grad_norm": 1.342633605003357, |
| "learning_rate": 0.001, |
| "loss": 1.9665, |
| "step": 89000 |
| }, |
| { |
| "epoch": 28.797672915319975, |
| "grad_norm": 1.5504629611968994, |
| "learning_rate": 0.001, |
| "loss": 1.9502, |
| "step": 89100 |
| }, |
| { |
| "epoch": 28.82999353587589, |
| "grad_norm": 1.4633890390396118, |
| "learning_rate": 0.001, |
| "loss": 1.9644, |
| "step": 89200 |
| }, |
| { |
| "epoch": 28.862314156431804, |
| "grad_norm": 1.3921465873718262, |
| "learning_rate": 0.001, |
| "loss": 1.9698, |
| "step": 89300 |
| }, |
| { |
| "epoch": 28.89463477698772, |
| "grad_norm": 1.5433467626571655, |
| "learning_rate": 0.001, |
| "loss": 1.9694, |
| "step": 89400 |
| }, |
| { |
| "epoch": 28.926955397543633, |
| "grad_norm": 1.159615397453308, |
| "learning_rate": 0.001, |
| "loss": 1.968, |
| "step": 89500 |
| }, |
| { |
| "epoch": 28.959276018099548, |
| "grad_norm": 1.3793103694915771, |
| "learning_rate": 0.001, |
| "loss": 1.9649, |
| "step": 89600 |
| }, |
| { |
| "epoch": 28.991596638655462, |
| "grad_norm": 2.6745166778564453, |
| "learning_rate": 0.001, |
| "loss": 1.9738, |
| "step": 89700 |
| }, |
| { |
| "epoch": 29.023917259211377, |
| "grad_norm": 1.4499655961990356, |
| "learning_rate": 0.001, |
| "loss": 1.8535, |
| "step": 89800 |
| }, |
| { |
| "epoch": 29.05623787976729, |
| "grad_norm": 1.5100798606872559, |
| "learning_rate": 0.001, |
| "loss": 1.8261, |
| "step": 89900 |
| }, |
| { |
| "epoch": 29.088558500323206, |
| "grad_norm": 1.518397569656372, |
| "learning_rate": 0.001, |
| "loss": 1.8219, |
| "step": 90000 |
| }, |
| { |
| "epoch": 29.12087912087912, |
| "grad_norm": 1.3293129205703735, |
| "learning_rate": 0.001, |
| "loss": 1.8459, |
| "step": 90100 |
| }, |
| { |
| "epoch": 29.153199741435035, |
| "grad_norm": 1.8555089235305786, |
| "learning_rate": 0.001, |
| "loss": 1.8459, |
| "step": 90200 |
| }, |
| { |
| "epoch": 29.18552036199095, |
| "grad_norm": 1.4512817859649658, |
| "learning_rate": 0.001, |
| "loss": 1.8584, |
| "step": 90300 |
| }, |
| { |
| "epoch": 29.217840982546864, |
| "grad_norm": 1.735163688659668, |
| "learning_rate": 0.001, |
| "loss": 1.8434, |
| "step": 90400 |
| }, |
| { |
| "epoch": 29.25016160310278, |
| "grad_norm": 1.3833225965499878, |
| "learning_rate": 0.001, |
| "loss": 1.884, |
| "step": 90500 |
| }, |
| { |
| "epoch": 29.282482223658693, |
| "grad_norm": 1.7664813995361328, |
| "learning_rate": 0.001, |
| "loss": 1.88, |
| "step": 90600 |
| }, |
| { |
| "epoch": 29.314802844214608, |
| "grad_norm": 1.440193772315979, |
| "learning_rate": 0.001, |
| "loss": 1.8567, |
| "step": 90700 |
| }, |
| { |
| "epoch": 29.347123464770522, |
| "grad_norm": 1.239136815071106, |
| "learning_rate": 0.001, |
| "loss": 1.8746, |
| "step": 90800 |
| }, |
| { |
| "epoch": 29.379444085326437, |
| "grad_norm": 1.3825310468673706, |
| "learning_rate": 0.001, |
| "loss": 1.8684, |
| "step": 90900 |
| }, |
| { |
| "epoch": 29.41176470588235, |
| "grad_norm": 1.4752728939056396, |
| "learning_rate": 0.001, |
| "loss": 1.8587, |
| "step": 91000 |
| }, |
| { |
| "epoch": 29.444085326438266, |
| "grad_norm": 1.495429515838623, |
| "learning_rate": 0.001, |
| "loss": 1.8849, |
| "step": 91100 |
| }, |
| { |
| "epoch": 29.47640594699418, |
| "grad_norm": 1.4592987298965454, |
| "learning_rate": 0.001, |
| "loss": 1.8892, |
| "step": 91200 |
| }, |
| { |
| "epoch": 29.5087265675501, |
| "grad_norm": 1.321603536605835, |
| "learning_rate": 0.001, |
| "loss": 1.9044, |
| "step": 91300 |
| }, |
| { |
| "epoch": 29.541047188106013, |
| "grad_norm": 1.2586690187454224, |
| "learning_rate": 0.001, |
| "loss": 1.9141, |
| "step": 91400 |
| }, |
| { |
| "epoch": 29.573367808661928, |
| "grad_norm": 1.3730324506759644, |
| "learning_rate": 0.001, |
| "loss": 1.8934, |
| "step": 91500 |
| }, |
| { |
| "epoch": 29.605688429217842, |
| "grad_norm": 1.6763105392456055, |
| "learning_rate": 0.001, |
| "loss": 1.905, |
| "step": 91600 |
| }, |
| { |
| "epoch": 29.638009049773757, |
| "grad_norm": 1.6392866373062134, |
| "learning_rate": 0.001, |
| "loss": 1.9155, |
| "step": 91700 |
| }, |
| { |
| "epoch": 29.67032967032967, |
| "grad_norm": 1.2820043563842773, |
| "learning_rate": 0.001, |
| "loss": 1.9181, |
| "step": 91800 |
| }, |
| { |
| "epoch": 29.702650290885586, |
| "grad_norm": 1.6354836225509644, |
| "learning_rate": 0.001, |
| "loss": 1.9207, |
| "step": 91900 |
| }, |
| { |
| "epoch": 29.7349709114415, |
| "grad_norm": 1.3978163003921509, |
| "learning_rate": 0.001, |
| "loss": 1.9039, |
| "step": 92000 |
| }, |
| { |
| "epoch": 29.767291531997415, |
| "grad_norm": 1.3554919958114624, |
| "learning_rate": 0.001, |
| "loss": 1.9283, |
| "step": 92100 |
| }, |
| { |
| "epoch": 29.79961215255333, |
| "grad_norm": 1.4941645860671997, |
| "learning_rate": 0.001, |
| "loss": 1.9183, |
| "step": 92200 |
| }, |
| { |
| "epoch": 29.831932773109244, |
| "grad_norm": 1.5266228914260864, |
| "learning_rate": 0.001, |
| "loss": 1.9186, |
| "step": 92300 |
| }, |
| { |
| "epoch": 29.86425339366516, |
| "grad_norm": 1.4845457077026367, |
| "learning_rate": 0.001, |
| "loss": 1.9399, |
| "step": 92400 |
| }, |
| { |
| "epoch": 29.896574014221073, |
| "grad_norm": 1.1934114694595337, |
| "learning_rate": 0.001, |
| "loss": 1.9314, |
| "step": 92500 |
| }, |
| { |
| "epoch": 29.928894634776988, |
| "grad_norm": 1.3535517454147339, |
| "learning_rate": 0.001, |
| "loss": 1.9399, |
| "step": 92600 |
| }, |
| { |
| "epoch": 29.961215255332903, |
| "grad_norm": 1.230025291442871, |
| "learning_rate": 0.001, |
| "loss": 1.9475, |
| "step": 92700 |
| }, |
| { |
| "epoch": 29.993535875888817, |
| "grad_norm": 1.4865684509277344, |
| "learning_rate": 0.001, |
| "loss": 1.9565, |
| "step": 92800 |
| }, |
| { |
| "epoch": 30.02585649644473, |
| "grad_norm": 1.4620200395584106, |
| "learning_rate": 0.001, |
| "loss": 1.828, |
| "step": 92900 |
| }, |
| { |
| "epoch": 30.058177117000646, |
| "grad_norm": 3.006803512573242, |
| "learning_rate": 0.001, |
| "loss": 1.7962, |
| "step": 93000 |
| }, |
| { |
| "epoch": 30.09049773755656, |
| "grad_norm": 1.6281250715255737, |
| "learning_rate": 0.001, |
| "loss": 1.8051, |
| "step": 93100 |
| }, |
| { |
| "epoch": 30.122818358112475, |
| "grad_norm": 1.3403794765472412, |
| "learning_rate": 0.001, |
| "loss": 1.8096, |
| "step": 93200 |
| }, |
| { |
| "epoch": 30.15513897866839, |
| "grad_norm": 1.3069578409194946, |
| "learning_rate": 0.001, |
| "loss": 1.8239, |
| "step": 93300 |
| }, |
| { |
| "epoch": 30.187459599224304, |
| "grad_norm": 1.467483401298523, |
| "learning_rate": 0.001, |
| "loss": 1.8257, |
| "step": 93400 |
| }, |
| { |
| "epoch": 30.21978021978022, |
| "grad_norm": 3.415764570236206, |
| "learning_rate": 0.001, |
| "loss": 1.8274, |
| "step": 93500 |
| }, |
| { |
| "epoch": 30.252100840336134, |
| "grad_norm": 2.0394747257232666, |
| "learning_rate": 0.001, |
| "loss": 1.8256, |
| "step": 93600 |
| }, |
| { |
| "epoch": 30.284421460892048, |
| "grad_norm": 1.498351812362671, |
| "learning_rate": 0.001, |
| "loss": 1.855, |
| "step": 93700 |
| }, |
| { |
| "epoch": 30.316742081447963, |
| "grad_norm": 1.360203742980957, |
| "learning_rate": 0.001, |
| "loss": 1.8346, |
| "step": 93800 |
| }, |
| { |
| "epoch": 30.349062702003877, |
| "grad_norm": 1.4011281728744507, |
| "learning_rate": 0.001, |
| "loss": 1.8497, |
| "step": 93900 |
| }, |
| { |
| "epoch": 30.381383322559792, |
| "grad_norm": 1.6812119483947754, |
| "learning_rate": 0.001, |
| "loss": 1.8551, |
| "step": 94000 |
| }, |
| { |
| "epoch": 30.413703943115706, |
| "grad_norm": 1.4505479335784912, |
| "learning_rate": 0.001, |
| "loss": 1.8439, |
| "step": 94100 |
| }, |
| { |
| "epoch": 30.44602456367162, |
| "grad_norm": 1.6102886199951172, |
| "learning_rate": 0.001, |
| "loss": 1.8579, |
| "step": 94200 |
| }, |
| { |
| "epoch": 30.478345184227535, |
| "grad_norm": 1.5858819484710693, |
| "learning_rate": 0.001, |
| "loss": 1.8701, |
| "step": 94300 |
| }, |
| { |
| "epoch": 30.51066580478345, |
| "grad_norm": 2.008108139038086, |
| "learning_rate": 0.001, |
| "loss": 1.8719, |
| "step": 94400 |
| }, |
| { |
| "epoch": 30.542986425339368, |
| "grad_norm": 1.247879147529602, |
| "learning_rate": 0.001, |
| "loss": 1.8679, |
| "step": 94500 |
| }, |
| { |
| "epoch": 30.575307045895283, |
| "grad_norm": 1.4479765892028809, |
| "learning_rate": 0.001, |
| "loss": 1.8821, |
| "step": 94600 |
| }, |
| { |
| "epoch": 30.607627666451197, |
| "grad_norm": 1.325579047203064, |
| "learning_rate": 0.001, |
| "loss": 1.8679, |
| "step": 94700 |
| }, |
| { |
| "epoch": 30.639948287007112, |
| "grad_norm": 1.6809526681900024, |
| "learning_rate": 0.001, |
| "loss": 1.8784, |
| "step": 94800 |
| }, |
| { |
| "epoch": 30.672268907563026, |
| "grad_norm": 1.5918498039245605, |
| "learning_rate": 0.001, |
| "loss": 1.8975, |
| "step": 94900 |
| }, |
| { |
| "epoch": 30.70458952811894, |
| "grad_norm": 1.6501222848892212, |
| "learning_rate": 0.001, |
| "loss": 1.8868, |
| "step": 95000 |
| }, |
| { |
| "epoch": 30.736910148674855, |
| "grad_norm": 2.2188880443573, |
| "learning_rate": 0.001, |
| "loss": 1.8769, |
| "step": 95100 |
| }, |
| { |
| "epoch": 30.76923076923077, |
| "grad_norm": 1.1673107147216797, |
| "learning_rate": 0.001, |
| "loss": 1.9076, |
| "step": 95200 |
| }, |
| { |
| "epoch": 30.801551389786685, |
| "grad_norm": 1.7042407989501953, |
| "learning_rate": 0.001, |
| "loss": 1.8965, |
| "step": 95300 |
| }, |
| { |
| "epoch": 30.8338720103426, |
| "grad_norm": 1.224590539932251, |
| "learning_rate": 0.001, |
| "loss": 1.8954, |
| "step": 95400 |
| }, |
| { |
| "epoch": 30.866192630898514, |
| "grad_norm": 1.4763602018356323, |
| "learning_rate": 0.001, |
| "loss": 1.9105, |
| "step": 95500 |
| }, |
| { |
| "epoch": 30.89851325145443, |
| "grad_norm": 1.6836724281311035, |
| "learning_rate": 0.001, |
| "loss": 1.8827, |
| "step": 95600 |
| }, |
| { |
| "epoch": 30.930833872010343, |
| "grad_norm": 1.3475334644317627, |
| "learning_rate": 0.001, |
| "loss": 1.9173, |
| "step": 95700 |
| }, |
| { |
| "epoch": 30.963154492566257, |
| "grad_norm": 1.347589373588562, |
| "learning_rate": 0.001, |
| "loss": 1.889, |
| "step": 95800 |
| }, |
| { |
| "epoch": 30.995475113122172, |
| "grad_norm": 1.473758339881897, |
| "learning_rate": 0.001, |
| "loss": 1.8965, |
| "step": 95900 |
| }, |
| { |
| "epoch": 31.027795733678087, |
| "grad_norm": 1.728955864906311, |
| "learning_rate": 0.001, |
| "loss": 1.8083, |
| "step": 96000 |
| }, |
| { |
| "epoch": 31.060116354234, |
| "grad_norm": 1.5232839584350586, |
| "learning_rate": 0.001, |
| "loss": 1.7555, |
| "step": 96100 |
| }, |
| { |
| "epoch": 31.092436974789916, |
| "grad_norm": 1.8657755851745605, |
| "learning_rate": 0.001, |
| "loss": 1.7685, |
| "step": 96200 |
| }, |
| { |
| "epoch": 31.12475759534583, |
| "grad_norm": 1.4750165939331055, |
| "learning_rate": 0.001, |
| "loss": 1.7735, |
| "step": 96300 |
| }, |
| { |
| "epoch": 31.157078215901745, |
| "grad_norm": 1.5432738065719604, |
| "learning_rate": 0.001, |
| "loss": 1.7882, |
| "step": 96400 |
| }, |
| { |
| "epoch": 31.18939883645766, |
| "grad_norm": 1.422799825668335, |
| "learning_rate": 0.001, |
| "loss": 1.7952, |
| "step": 96500 |
| }, |
| { |
| "epoch": 31.221719457013574, |
| "grad_norm": 1.6849409341812134, |
| "learning_rate": 0.001, |
| "loss": 1.784, |
| "step": 96600 |
| }, |
| { |
| "epoch": 31.25404007756949, |
| "grad_norm": 1.4621268510818481, |
| "learning_rate": 0.001, |
| "loss": 1.788, |
| "step": 96700 |
| }, |
| { |
| "epoch": 31.286360698125403, |
| "grad_norm": 1.3027772903442383, |
| "learning_rate": 0.001, |
| "loss": 1.8025, |
| "step": 96800 |
| }, |
| { |
| "epoch": 31.318681318681318, |
| "grad_norm": 1.5838264226913452, |
| "learning_rate": 0.001, |
| "loss": 1.8274, |
| "step": 96900 |
| }, |
| { |
| "epoch": 31.351001939237232, |
| "grad_norm": 1.368515968322754, |
| "learning_rate": 0.001, |
| "loss": 1.8323, |
| "step": 97000 |
| }, |
| { |
| "epoch": 31.383322559793147, |
| "grad_norm": 1.6732114553451538, |
| "learning_rate": 0.001, |
| "loss": 1.8276, |
| "step": 97100 |
| }, |
| { |
| "epoch": 31.41564318034906, |
| "grad_norm": 1.4694806337356567, |
| "learning_rate": 0.001, |
| "loss": 1.8157, |
| "step": 97200 |
| }, |
| { |
| "epoch": 31.447963800904976, |
| "grad_norm": 2.3622124195098877, |
| "learning_rate": 0.001, |
| "loss": 1.8327, |
| "step": 97300 |
| }, |
| { |
| "epoch": 31.48028442146089, |
| "grad_norm": 1.6618635654449463, |
| "learning_rate": 0.001, |
| "loss": 1.8298, |
| "step": 97400 |
| }, |
| { |
| "epoch": 31.51260504201681, |
| "grad_norm": 1.743264079093933, |
| "learning_rate": 0.001, |
| "loss": 1.8448, |
| "step": 97500 |
| }, |
| { |
| "epoch": 31.544925662572723, |
| "grad_norm": 1.2837010622024536, |
| "learning_rate": 0.001, |
| "loss": 1.8467, |
| "step": 97600 |
| }, |
| { |
| "epoch": 31.577246283128638, |
| "grad_norm": 1.7191213369369507, |
| "learning_rate": 0.001, |
| "loss": 1.8323, |
| "step": 97700 |
| }, |
| { |
| "epoch": 31.609566903684552, |
| "grad_norm": 2.304013729095459, |
| "learning_rate": 0.001, |
| "loss": 1.8629, |
| "step": 97800 |
| }, |
| { |
| "epoch": 31.641887524240467, |
| "grad_norm": 1.6232678890228271, |
| "learning_rate": 0.001, |
| "loss": 1.8508, |
| "step": 97900 |
| }, |
| { |
| "epoch": 31.67420814479638, |
| "grad_norm": 2.126199722290039, |
| "learning_rate": 0.001, |
| "loss": 1.858, |
| "step": 98000 |
| }, |
| { |
| "epoch": 31.706528765352296, |
| "grad_norm": 1.7926188707351685, |
| "learning_rate": 0.001, |
| "loss": 1.8507, |
| "step": 98100 |
| }, |
| { |
| "epoch": 31.73884938590821, |
| "grad_norm": 1.4954445362091064, |
| "learning_rate": 0.001, |
| "loss": 1.8566, |
| "step": 98200 |
| }, |
| { |
| "epoch": 31.771170006464125, |
| "grad_norm": 1.5035561323165894, |
| "learning_rate": 0.001, |
| "loss": 1.8696, |
| "step": 98300 |
| }, |
| { |
| "epoch": 31.80349062702004, |
| "grad_norm": 1.323290467262268, |
| "learning_rate": 0.001, |
| "loss": 1.8866, |
| "step": 98400 |
| }, |
| { |
| "epoch": 31.835811247575954, |
| "grad_norm": 1.6078685522079468, |
| "learning_rate": 0.001, |
| "loss": 1.872, |
| "step": 98500 |
| }, |
| { |
| "epoch": 31.86813186813187, |
| "grad_norm": 1.5674740076065063, |
| "learning_rate": 0.001, |
| "loss": 1.8907, |
| "step": 98600 |
| }, |
| { |
| "epoch": 31.900452488687783, |
| "grad_norm": 1.6643004417419434, |
| "learning_rate": 0.001, |
| "loss": 1.8766, |
| "step": 98700 |
| }, |
| { |
| "epoch": 31.932773109243698, |
| "grad_norm": 1.5275764465332031, |
| "learning_rate": 0.001, |
| "loss": 1.8751, |
| "step": 98800 |
| }, |
| { |
| "epoch": 31.965093729799612, |
| "grad_norm": 1.471692681312561, |
| "learning_rate": 0.001, |
| "loss": 1.8686, |
| "step": 98900 |
| }, |
| { |
| "epoch": 31.997414350355527, |
| "grad_norm": 1.378650188446045, |
| "learning_rate": 0.001, |
| "loss": 1.8815, |
| "step": 99000 |
| }, |
| { |
| "epoch": 32.02973497091144, |
| "grad_norm": 1.5829144716262817, |
| "learning_rate": 0.001, |
| "loss": 1.7734, |
| "step": 99100 |
| }, |
| { |
| "epoch": 32.062055591467356, |
| "grad_norm": 1.8891956806182861, |
| "learning_rate": 0.001, |
| "loss": 1.7384, |
| "step": 99200 |
| }, |
| { |
| "epoch": 32.09437621202327, |
| "grad_norm": 1.706789493560791, |
| "learning_rate": 0.001, |
| "loss": 1.7708, |
| "step": 99300 |
| }, |
| { |
| "epoch": 32.126696832579185, |
| "grad_norm": 1.6363348960876465, |
| "learning_rate": 0.001, |
| "loss": 1.751, |
| "step": 99400 |
| }, |
| { |
| "epoch": 32.1590174531351, |
| "grad_norm": 1.6457102298736572, |
| "learning_rate": 0.001, |
| "loss": 1.765, |
| "step": 99500 |
| }, |
| { |
| "epoch": 32.191338073691014, |
| "grad_norm": 1.5266406536102295, |
| "learning_rate": 0.001, |
| "loss": 1.7812, |
| "step": 99600 |
| }, |
| { |
| "epoch": 32.22365869424693, |
| "grad_norm": 1.9714754819869995, |
| "learning_rate": 0.001, |
| "loss": 1.7729, |
| "step": 99700 |
| }, |
| { |
| "epoch": 32.25597931480284, |
| "grad_norm": 1.6170477867126465, |
| "learning_rate": 0.001, |
| "loss": 1.7729, |
| "step": 99800 |
| }, |
| { |
| "epoch": 32.28829993535876, |
| "grad_norm": 1.3753221035003662, |
| "learning_rate": 0.001, |
| "loss": 1.7805, |
| "step": 99900 |
| }, |
| { |
| "epoch": 32.32062055591467, |
| "grad_norm": 2.1753334999084473, |
| "learning_rate": 0.001, |
| "loss": 1.7737, |
| "step": 100000 |
| }, |
| { |
| "epoch": 32.35294117647059, |
| "grad_norm": 1.8860663175582886, |
| "learning_rate": 0.001, |
| "loss": 1.7906, |
| "step": 100100 |
| }, |
| { |
| "epoch": 32.3852617970265, |
| "grad_norm": 2.0654399394989014, |
| "learning_rate": 0.001, |
| "loss": 1.7885, |
| "step": 100200 |
| }, |
| { |
| "epoch": 32.417582417582416, |
| "grad_norm": 1.7325553894042969, |
| "learning_rate": 0.001, |
| "loss": 1.8063, |
| "step": 100300 |
| }, |
| { |
| "epoch": 32.44990303813833, |
| "grad_norm": 1.588712215423584, |
| "learning_rate": 0.001, |
| "loss": 1.8004, |
| "step": 100400 |
| }, |
| { |
| "epoch": 32.482223658694245, |
| "grad_norm": 1.6810243129730225, |
| "learning_rate": 0.001, |
| "loss": 1.8152, |
| "step": 100500 |
| }, |
| { |
| "epoch": 32.51454427925016, |
| "grad_norm": 1.6769487857818604, |
| "learning_rate": 0.001, |
| "loss": 1.8212, |
| "step": 100600 |
| }, |
| { |
| "epoch": 32.546864899806074, |
| "grad_norm": 1.9445384740829468, |
| "learning_rate": 0.001, |
| "loss": 1.7917, |
| "step": 100700 |
| }, |
| { |
| "epoch": 32.57918552036199, |
| "grad_norm": 1.6605241298675537, |
| "learning_rate": 0.001, |
| "loss": 1.8362, |
| "step": 100800 |
| }, |
| { |
| "epoch": 32.6115061409179, |
| "grad_norm": 2.058520793914795, |
| "learning_rate": 0.001, |
| "loss": 1.8341, |
| "step": 100900 |
| }, |
| { |
| "epoch": 32.64382676147382, |
| "grad_norm": 1.3105531930923462, |
| "learning_rate": 0.001, |
| "loss": 1.8431, |
| "step": 101000 |
| }, |
| { |
| "epoch": 32.67614738202973, |
| "grad_norm": 1.626055359840393, |
| "learning_rate": 0.001, |
| "loss": 1.824, |
| "step": 101100 |
| }, |
| { |
| "epoch": 32.70846800258565, |
| "grad_norm": 1.5629281997680664, |
| "learning_rate": 0.001, |
| "loss": 1.8388, |
| "step": 101200 |
| }, |
| { |
| "epoch": 32.74078862314156, |
| "grad_norm": 1.8073807954788208, |
| "learning_rate": 0.001, |
| "loss": 1.8373, |
| "step": 101300 |
| }, |
| { |
| "epoch": 32.773109243697476, |
| "grad_norm": 2.0889344215393066, |
| "learning_rate": 0.001, |
| "loss": 1.8304, |
| "step": 101400 |
| }, |
| { |
| "epoch": 32.80542986425339, |
| "grad_norm": 1.771985650062561, |
| "learning_rate": 0.001, |
| "loss": 1.8436, |
| "step": 101500 |
| }, |
| { |
| "epoch": 32.837750484809305, |
| "grad_norm": 1.631714940071106, |
| "learning_rate": 0.001, |
| "loss": 1.8502, |
| "step": 101600 |
| }, |
| { |
| "epoch": 32.87007110536522, |
| "grad_norm": 2.003098487854004, |
| "learning_rate": 0.001, |
| "loss": 1.8608, |
| "step": 101700 |
| }, |
| { |
| "epoch": 32.902391725921134, |
| "grad_norm": 1.7163927555084229, |
| "learning_rate": 0.001, |
| "loss": 1.8409, |
| "step": 101800 |
| }, |
| { |
| "epoch": 32.93471234647705, |
| "grad_norm": 1.5179773569107056, |
| "learning_rate": 0.001, |
| "loss": 1.836, |
| "step": 101900 |
| }, |
| { |
| "epoch": 32.967032967032964, |
| "grad_norm": 1.826611042022705, |
| "learning_rate": 0.001, |
| "loss": 1.8544, |
| "step": 102000 |
| }, |
| { |
| "epoch": 32.999353587588885, |
| "grad_norm": 2.0512094497680664, |
| "learning_rate": 0.001, |
| "loss": 1.8587, |
| "step": 102100 |
| }, |
| { |
| "epoch": 33.0316742081448, |
| "grad_norm": 2.0743322372436523, |
| "learning_rate": 0.001, |
| "loss": 1.6994, |
| "step": 102200 |
| }, |
| { |
| "epoch": 33.063994828700714, |
| "grad_norm": 2.1216299533843994, |
| "learning_rate": 0.001, |
| "loss": 1.7167, |
| "step": 102300 |
| }, |
| { |
| "epoch": 33.09631544925663, |
| "grad_norm": 2.0061864852905273, |
| "learning_rate": 0.001, |
| "loss": 1.7269, |
| "step": 102400 |
| }, |
| { |
| "epoch": 33.12863606981254, |
| "grad_norm": 1.8516168594360352, |
| "learning_rate": 0.001, |
| "loss": 1.7471, |
| "step": 102500 |
| }, |
| { |
| "epoch": 33.16095669036846, |
| "grad_norm": 2.0900888442993164, |
| "learning_rate": 0.001, |
| "loss": 1.7444, |
| "step": 102600 |
| }, |
| { |
| "epoch": 33.19327731092437, |
| "grad_norm": 1.9788792133331299, |
| "learning_rate": 0.001, |
| "loss": 1.7557, |
| "step": 102700 |
| }, |
| { |
| "epoch": 33.22559793148029, |
| "grad_norm": 2.034575939178467, |
| "learning_rate": 0.001, |
| "loss": 1.7563, |
| "step": 102800 |
| }, |
| { |
| "epoch": 33.2579185520362, |
| "grad_norm": 2.135305881500244, |
| "learning_rate": 0.001, |
| "loss": 1.7338, |
| "step": 102900 |
| }, |
| { |
| "epoch": 33.290239172592116, |
| "grad_norm": 1.8343353271484375, |
| "learning_rate": 0.001, |
| "loss": 1.7662, |
| "step": 103000 |
| }, |
| { |
| "epoch": 33.32255979314803, |
| "grad_norm": 2.277712106704712, |
| "learning_rate": 0.001, |
| "loss": 1.7439, |
| "step": 103100 |
| }, |
| { |
| "epoch": 33.354880413703945, |
| "grad_norm": 1.983909010887146, |
| "learning_rate": 0.001, |
| "loss": 1.7626, |
| "step": 103200 |
| }, |
| { |
| "epoch": 33.38720103425986, |
| "grad_norm": 1.6377862691879272, |
| "learning_rate": 0.001, |
| "loss": 1.7789, |
| "step": 103300 |
| }, |
| { |
| "epoch": 33.419521654815775, |
| "grad_norm": 1.8458659648895264, |
| "learning_rate": 0.001, |
| "loss": 1.7728, |
| "step": 103400 |
| }, |
| { |
| "epoch": 33.45184227537169, |
| "grad_norm": 1.9960947036743164, |
| "learning_rate": 0.001, |
| "loss": 1.7659, |
| "step": 103500 |
| }, |
| { |
| "epoch": 33.484162895927604, |
| "grad_norm": 2.0840041637420654, |
| "learning_rate": 0.001, |
| "loss": 1.763, |
| "step": 103600 |
| }, |
| { |
| "epoch": 33.51648351648352, |
| "grad_norm": 2.215972661972046, |
| "learning_rate": 0.001, |
| "loss": 1.7933, |
| "step": 103700 |
| }, |
| { |
| "epoch": 33.54880413703943, |
| "grad_norm": 1.8777416944503784, |
| "learning_rate": 0.001, |
| "loss": 1.7918, |
| "step": 103800 |
| }, |
| { |
| "epoch": 33.58112475759535, |
| "grad_norm": 1.7286779880523682, |
| "learning_rate": 0.001, |
| "loss": 1.792, |
| "step": 103900 |
| }, |
| { |
| "epoch": 33.61344537815126, |
| "grad_norm": 2.0685818195343018, |
| "learning_rate": 0.001, |
| "loss": 1.7941, |
| "step": 104000 |
| }, |
| { |
| "epoch": 33.645765998707176, |
| "grad_norm": 1.928328037261963, |
| "learning_rate": 0.001, |
| "loss": 1.8152, |
| "step": 104100 |
| }, |
| { |
| "epoch": 33.67808661926309, |
| "grad_norm": 1.7642704248428345, |
| "learning_rate": 0.001, |
| "loss": 1.8128, |
| "step": 104200 |
| }, |
| { |
| "epoch": 33.710407239819006, |
| "grad_norm": 2.2432053089141846, |
| "learning_rate": 0.001, |
| "loss": 1.8034, |
| "step": 104300 |
| }, |
| { |
| "epoch": 33.74272786037492, |
| "grad_norm": 2.012679100036621, |
| "learning_rate": 0.001, |
| "loss": 1.8132, |
| "step": 104400 |
| }, |
| { |
| "epoch": 33.775048480930835, |
| "grad_norm": 2.067655086517334, |
| "learning_rate": 0.001, |
| "loss": 1.8267, |
| "step": 104500 |
| }, |
| { |
| "epoch": 33.80736910148675, |
| "grad_norm": 1.9166897535324097, |
| "learning_rate": 0.001, |
| "loss": 1.8191, |
| "step": 104600 |
| }, |
| { |
| "epoch": 33.839689722042664, |
| "grad_norm": 1.8261538743972778, |
| "learning_rate": 0.001, |
| "loss": 1.8396, |
| "step": 104700 |
| }, |
| { |
| "epoch": 33.87201034259858, |
| "grad_norm": 1.6121453046798706, |
| "learning_rate": 0.001, |
| "loss": 1.8192, |
| "step": 104800 |
| }, |
| { |
| "epoch": 33.90433096315449, |
| "grad_norm": 1.7790288925170898, |
| "learning_rate": 0.001, |
| "loss": 1.8441, |
| "step": 104900 |
| }, |
| { |
| "epoch": 33.93665158371041, |
| "grad_norm": 2.033315658569336, |
| "learning_rate": 0.001, |
| "loss": 1.8407, |
| "step": 105000 |
| }, |
| { |
| "epoch": 33.96897220426632, |
| "grad_norm": 1.9757274389266968, |
| "learning_rate": 0.001, |
| "loss": 1.8312, |
| "step": 105100 |
| }, |
| { |
| "epoch": 34.00129282482224, |
| "grad_norm": 1.8282935619354248, |
| "learning_rate": 0.001, |
| "loss": 1.824, |
| "step": 105200 |
| }, |
| { |
| "epoch": 34.03361344537815, |
| "grad_norm": 1.3721950054168701, |
| "learning_rate": 0.001, |
| "loss": 1.6864, |
| "step": 105300 |
| }, |
| { |
| "epoch": 34.065934065934066, |
| "grad_norm": 2.061631679534912, |
| "learning_rate": 0.001, |
| "loss": 1.6903, |
| "step": 105400 |
| }, |
| { |
| "epoch": 34.09825468648998, |
| "grad_norm": 2.0354113578796387, |
| "learning_rate": 0.001, |
| "loss": 1.708, |
| "step": 105500 |
| }, |
| { |
| "epoch": 34.130575307045895, |
| "grad_norm": 1.6836439371109009, |
| "learning_rate": 0.001, |
| "loss": 1.7179, |
| "step": 105600 |
| }, |
| { |
| "epoch": 34.16289592760181, |
| "grad_norm": 2.024641513824463, |
| "learning_rate": 0.001, |
| "loss": 1.7056, |
| "step": 105700 |
| }, |
| { |
| "epoch": 34.195216548157724, |
| "grad_norm": 1.5095860958099365, |
| "learning_rate": 0.001, |
| "loss": 1.705, |
| "step": 105800 |
| }, |
| { |
| "epoch": 34.22753716871364, |
| "grad_norm": 2.405456781387329, |
| "learning_rate": 0.001, |
| "loss": 1.7241, |
| "step": 105900 |
| }, |
| { |
| "epoch": 34.25985778926955, |
| "grad_norm": 1.871866226196289, |
| "learning_rate": 0.001, |
| "loss": 1.7392, |
| "step": 106000 |
| }, |
| { |
| "epoch": 34.29217840982547, |
| "grad_norm": 2.0286736488342285, |
| "learning_rate": 0.001, |
| "loss": 1.7312, |
| "step": 106100 |
| }, |
| { |
| "epoch": 34.32449903038138, |
| "grad_norm": 1.6875593662261963, |
| "learning_rate": 0.001, |
| "loss": 1.747, |
| "step": 106200 |
| }, |
| { |
| "epoch": 34.3568196509373, |
| "grad_norm": 1.6652581691741943, |
| "learning_rate": 0.001, |
| "loss": 1.7525, |
| "step": 106300 |
| }, |
| { |
| "epoch": 34.38914027149321, |
| "grad_norm": 1.8134232759475708, |
| "learning_rate": 0.001, |
| "loss": 1.7527, |
| "step": 106400 |
| }, |
| { |
| "epoch": 34.421460892049126, |
| "grad_norm": 1.7698140144348145, |
| "learning_rate": 0.001, |
| "loss": 1.762, |
| "step": 106500 |
| }, |
| { |
| "epoch": 34.45378151260504, |
| "grad_norm": 1.5082416534423828, |
| "learning_rate": 0.001, |
| "loss": 1.7687, |
| "step": 106600 |
| }, |
| { |
| "epoch": 34.486102133160955, |
| "grad_norm": 1.5613726377487183, |
| "learning_rate": 0.001, |
| "loss": 1.7655, |
| "step": 106700 |
| }, |
| { |
| "epoch": 34.51842275371687, |
| "grad_norm": 1.7272530794143677, |
| "learning_rate": 0.001, |
| "loss": 1.7483, |
| "step": 106800 |
| }, |
| { |
| "epoch": 34.550743374272784, |
| "grad_norm": 1.644972801208496, |
| "learning_rate": 0.001, |
| "loss": 1.7832, |
| "step": 106900 |
| }, |
| { |
| "epoch": 34.5830639948287, |
| "grad_norm": 1.644237756729126, |
| "learning_rate": 0.001, |
| "loss": 1.7609, |
| "step": 107000 |
| }, |
| { |
| "epoch": 34.61538461538461, |
| "grad_norm": 1.6201183795928955, |
| "learning_rate": 0.001, |
| "loss": 1.772, |
| "step": 107100 |
| }, |
| { |
| "epoch": 34.64770523594053, |
| "grad_norm": 1.9227070808410645, |
| "learning_rate": 0.001, |
| "loss": 1.7954, |
| "step": 107200 |
| }, |
| { |
| "epoch": 34.68002585649644, |
| "grad_norm": 1.4974156618118286, |
| "learning_rate": 0.001, |
| "loss": 1.7881, |
| "step": 107300 |
| }, |
| { |
| "epoch": 34.71234647705236, |
| "grad_norm": 1.9709665775299072, |
| "learning_rate": 0.001, |
| "loss": 1.7943, |
| "step": 107400 |
| }, |
| { |
| "epoch": 34.74466709760827, |
| "grad_norm": 1.6651779413223267, |
| "learning_rate": 0.001, |
| "loss": 1.803, |
| "step": 107500 |
| }, |
| { |
| "epoch": 34.776987718164186, |
| "grad_norm": 1.9187260866165161, |
| "learning_rate": 0.001, |
| "loss": 1.7989, |
| "step": 107600 |
| }, |
| { |
| "epoch": 34.8093083387201, |
| "grad_norm": 1.8573428392410278, |
| "learning_rate": 0.001, |
| "loss": 1.8145, |
| "step": 107700 |
| }, |
| { |
| "epoch": 34.841628959276015, |
| "grad_norm": 1.4682703018188477, |
| "learning_rate": 0.001, |
| "loss": 1.8096, |
| "step": 107800 |
| }, |
| { |
| "epoch": 34.87394957983193, |
| "grad_norm": 2.2076940536499023, |
| "learning_rate": 0.001, |
| "loss": 1.8052, |
| "step": 107900 |
| }, |
| { |
| "epoch": 34.906270200387844, |
| "grad_norm": 1.7180118560791016, |
| "learning_rate": 0.001, |
| "loss": 1.8129, |
| "step": 108000 |
| }, |
| { |
| "epoch": 34.93859082094376, |
| "grad_norm": 1.665969729423523, |
| "learning_rate": 0.001, |
| "loss": 1.8067, |
| "step": 108100 |
| }, |
| { |
| "epoch": 34.97091144149967, |
| "grad_norm": 5.9310622215271, |
| "learning_rate": 0.001, |
| "loss": 1.8212, |
| "step": 108200 |
| }, |
| { |
| "epoch": 35.003232062055595, |
| "grad_norm": 1.5211377143859863, |
| "learning_rate": 0.001, |
| "loss": 1.8044, |
| "step": 108300 |
| }, |
| { |
| "epoch": 35.03555268261151, |
| "grad_norm": 1.2844172716140747, |
| "learning_rate": 0.001, |
| "loss": 1.6721, |
| "step": 108400 |
| }, |
| { |
| "epoch": 35.067873303167424, |
| "grad_norm": 1.5357627868652344, |
| "learning_rate": 0.001, |
| "loss": 1.6904, |
| "step": 108500 |
| }, |
| { |
| "epoch": 35.10019392372334, |
| "grad_norm": 1.5204764604568481, |
| "learning_rate": 0.001, |
| "loss": 1.6687, |
| "step": 108600 |
| }, |
| { |
| "epoch": 35.13251454427925, |
| "grad_norm": 2.441347599029541, |
| "learning_rate": 0.001, |
| "loss": 1.675, |
| "step": 108700 |
| }, |
| { |
| "epoch": 35.16483516483517, |
| "grad_norm": 1.9407317638397217, |
| "learning_rate": 0.001, |
| "loss": 1.7092, |
| "step": 108800 |
| }, |
| { |
| "epoch": 35.19715578539108, |
| "grad_norm": 1.4935519695281982, |
| "learning_rate": 0.001, |
| "loss": 1.7088, |
| "step": 108900 |
| }, |
| { |
| "epoch": 35.229476405947, |
| "grad_norm": 1.2903261184692383, |
| "learning_rate": 0.001, |
| "loss": 1.6767, |
| "step": 109000 |
| }, |
| { |
| "epoch": 35.26179702650291, |
| "grad_norm": 2.764295816421509, |
| "learning_rate": 0.001, |
| "loss": 1.7007, |
| "step": 109100 |
| }, |
| { |
| "epoch": 35.294117647058826, |
| "grad_norm": 1.714455008506775, |
| "learning_rate": 0.001, |
| "loss": 1.7017, |
| "step": 109200 |
| }, |
| { |
| "epoch": 35.32643826761474, |
| "grad_norm": 1.885419487953186, |
| "learning_rate": 0.001, |
| "loss": 1.7402, |
| "step": 109300 |
| }, |
| { |
| "epoch": 35.358758888170655, |
| "grad_norm": 1.5595377683639526, |
| "learning_rate": 0.001, |
| "loss": 1.7365, |
| "step": 109400 |
| }, |
| { |
| "epoch": 35.39107950872657, |
| "grad_norm": 1.3263633251190186, |
| "learning_rate": 0.001, |
| "loss": 1.7513, |
| "step": 109500 |
| }, |
| { |
| "epoch": 35.423400129282484, |
| "grad_norm": 1.623655080795288, |
| "learning_rate": 0.001, |
| "loss": 1.7408, |
| "step": 109600 |
| }, |
| { |
| "epoch": 35.4557207498384, |
| "grad_norm": 1.5252445936203003, |
| "learning_rate": 0.001, |
| "loss": 1.7426, |
| "step": 109700 |
| }, |
| { |
| "epoch": 35.48804137039431, |
| "grad_norm": 1.5424871444702148, |
| "learning_rate": 0.001, |
| "loss": 1.7424, |
| "step": 109800 |
| }, |
| { |
| "epoch": 35.52036199095023, |
| "grad_norm": 1.604275107383728, |
| "learning_rate": 0.001, |
| "loss": 1.7404, |
| "step": 109900 |
| }, |
| { |
| "epoch": 35.55268261150614, |
| "grad_norm": 1.3040121793746948, |
| "learning_rate": 0.001, |
| "loss": 1.7377, |
| "step": 110000 |
| }, |
| { |
| "epoch": 35.58500323206206, |
| "grad_norm": 1.4881088733673096, |
| "learning_rate": 0.001, |
| "loss": 1.7472, |
| "step": 110100 |
| }, |
| { |
| "epoch": 35.61732385261797, |
| "grad_norm": 1.2945785522460938, |
| "learning_rate": 0.001, |
| "loss": 1.7656, |
| "step": 110200 |
| }, |
| { |
| "epoch": 35.649644473173886, |
| "grad_norm": 1.5668212175369263, |
| "learning_rate": 0.001, |
| "loss": 1.7522, |
| "step": 110300 |
| }, |
| { |
| "epoch": 35.6819650937298, |
| "grad_norm": 1.6249139308929443, |
| "learning_rate": 0.001, |
| "loss": 1.7661, |
| "step": 110400 |
| }, |
| { |
| "epoch": 35.714285714285715, |
| "grad_norm": 1.5623595714569092, |
| "learning_rate": 0.001, |
| "loss": 1.7587, |
| "step": 110500 |
| }, |
| { |
| "epoch": 35.74660633484163, |
| "grad_norm": 1.6370724439620972, |
| "learning_rate": 0.001, |
| "loss": 1.7873, |
| "step": 110600 |
| }, |
| { |
| "epoch": 35.778926955397544, |
| "grad_norm": 1.387434959411621, |
| "learning_rate": 0.001, |
| "loss": 1.7579, |
| "step": 110700 |
| }, |
| { |
| "epoch": 35.81124757595346, |
| "grad_norm": 1.4653477668762207, |
| "learning_rate": 0.001, |
| "loss": 1.7723, |
| "step": 110800 |
| }, |
| { |
| "epoch": 35.84356819650937, |
| "grad_norm": 1.6415297985076904, |
| "learning_rate": 0.001, |
| "loss": 1.7719, |
| "step": 110900 |
| }, |
| { |
| "epoch": 35.87588881706529, |
| "grad_norm": 1.447180151939392, |
| "learning_rate": 0.001, |
| "loss": 1.7818, |
| "step": 111000 |
| }, |
| { |
| "epoch": 35.9082094376212, |
| "grad_norm": 1.4582403898239136, |
| "learning_rate": 0.001, |
| "loss": 1.7835, |
| "step": 111100 |
| }, |
| { |
| "epoch": 35.94053005817712, |
| "grad_norm": 1.6619702577590942, |
| "learning_rate": 0.001, |
| "loss": 1.7904, |
| "step": 111200 |
| }, |
| { |
| "epoch": 35.97285067873303, |
| "grad_norm": 1.6435052156448364, |
| "learning_rate": 0.001, |
| "loss": 1.7815, |
| "step": 111300 |
| }, |
| { |
| "epoch": 36.005171299288946, |
| "grad_norm": 1.2920403480529785, |
| "learning_rate": 0.001, |
| "loss": 1.7803, |
| "step": 111400 |
| }, |
| { |
| "epoch": 36.03749191984486, |
| "grad_norm": 1.1500316858291626, |
| "learning_rate": 0.001, |
| "loss": 1.6365, |
| "step": 111500 |
| }, |
| { |
| "epoch": 36.069812540400775, |
| "grad_norm": 1.4132637977600098, |
| "learning_rate": 0.001, |
| "loss": 1.6589, |
| "step": 111600 |
| }, |
| { |
| "epoch": 36.10213316095669, |
| "grad_norm": 1.4161231517791748, |
| "learning_rate": 0.001, |
| "loss": 1.6784, |
| "step": 111700 |
| }, |
| { |
| "epoch": 36.134453781512605, |
| "grad_norm": 1.4937621355056763, |
| "learning_rate": 0.001, |
| "loss": 1.6499, |
| "step": 111800 |
| }, |
| { |
| "epoch": 36.16677440206852, |
| "grad_norm": 1.2990803718566895, |
| "learning_rate": 0.001, |
| "loss": 1.6766, |
| "step": 111900 |
| }, |
| { |
| "epoch": 36.199095022624434, |
| "grad_norm": 1.636744737625122, |
| "learning_rate": 0.001, |
| "loss": 1.6621, |
| "step": 112000 |
| }, |
| { |
| "epoch": 36.23141564318035, |
| "grad_norm": 1.783856987953186, |
| "learning_rate": 0.001, |
| "loss": 1.6684, |
| "step": 112100 |
| }, |
| { |
| "epoch": 36.26373626373626, |
| "grad_norm": 1.3172340393066406, |
| "learning_rate": 0.001, |
| "loss": 1.6902, |
| "step": 112200 |
| }, |
| { |
| "epoch": 36.29605688429218, |
| "grad_norm": 1.2710336446762085, |
| "learning_rate": 0.001, |
| "loss": 1.6858, |
| "step": 112300 |
| }, |
| { |
| "epoch": 36.32837750484809, |
| "grad_norm": 2.070700168609619, |
| "learning_rate": 0.001, |
| "loss": 1.6842, |
| "step": 112400 |
| }, |
| { |
| "epoch": 36.36069812540401, |
| "grad_norm": 1.738664984703064, |
| "learning_rate": 0.001, |
| "loss": 1.7021, |
| "step": 112500 |
| }, |
| { |
| "epoch": 36.39301874595992, |
| "grad_norm": 1.4381351470947266, |
| "learning_rate": 0.001, |
| "loss": 1.7221, |
| "step": 112600 |
| }, |
| { |
| "epoch": 36.425339366515836, |
| "grad_norm": 1.3279963731765747, |
| "learning_rate": 0.001, |
| "loss": 1.7187, |
| "step": 112700 |
| }, |
| { |
| "epoch": 36.45765998707175, |
| "grad_norm": 1.7753669023513794, |
| "learning_rate": 0.001, |
| "loss": 1.7147, |
| "step": 112800 |
| }, |
| { |
| "epoch": 36.489980607627665, |
| "grad_norm": 2.194094657897949, |
| "learning_rate": 0.001, |
| "loss": 1.7264, |
| "step": 112900 |
| }, |
| { |
| "epoch": 36.52230122818358, |
| "grad_norm": 1.360891342163086, |
| "learning_rate": 0.001, |
| "loss": 1.7423, |
| "step": 113000 |
| }, |
| { |
| "epoch": 36.554621848739494, |
| "grad_norm": 1.8281059265136719, |
| "learning_rate": 0.001, |
| "loss": 1.7389, |
| "step": 113100 |
| }, |
| { |
| "epoch": 36.58694246929541, |
| "grad_norm": 7.225008964538574, |
| "learning_rate": 0.001, |
| "loss": 1.7317, |
| "step": 113200 |
| }, |
| { |
| "epoch": 36.61926308985132, |
| "grad_norm": 1.3751835823059082, |
| "learning_rate": 0.001, |
| "loss": 1.7178, |
| "step": 113300 |
| }, |
| { |
| "epoch": 36.65158371040724, |
| "grad_norm": 1.3303834199905396, |
| "learning_rate": 0.001, |
| "loss": 1.7376, |
| "step": 113400 |
| }, |
| { |
| "epoch": 36.68390433096315, |
| "grad_norm": 1.3303159475326538, |
| "learning_rate": 0.001, |
| "loss": 1.7494, |
| "step": 113500 |
| }, |
| { |
| "epoch": 36.71622495151907, |
| "grad_norm": 1.3926934003829956, |
| "learning_rate": 0.001, |
| "loss": 1.7545, |
| "step": 113600 |
| }, |
| { |
| "epoch": 36.74854557207498, |
| "grad_norm": 1.3599934577941895, |
| "learning_rate": 0.001, |
| "loss": 1.7438, |
| "step": 113700 |
| }, |
| { |
| "epoch": 36.780866192630896, |
| "grad_norm": 17.112966537475586, |
| "learning_rate": 0.001, |
| "loss": 1.7566, |
| "step": 113800 |
| }, |
| { |
| "epoch": 36.81318681318681, |
| "grad_norm": 1.1081054210662842, |
| "learning_rate": 0.001, |
| "loss": 1.7545, |
| "step": 113900 |
| }, |
| { |
| "epoch": 36.845507433742725, |
| "grad_norm": 1.49489426612854, |
| "learning_rate": 0.001, |
| "loss": 1.7553, |
| "step": 114000 |
| }, |
| { |
| "epoch": 36.87782805429864, |
| "grad_norm": 1.4801154136657715, |
| "learning_rate": 0.001, |
| "loss": 1.739, |
| "step": 114100 |
| }, |
| { |
| "epoch": 36.910148674854554, |
| "grad_norm": 1.4519503116607666, |
| "learning_rate": 0.001, |
| "loss": 1.7696, |
| "step": 114200 |
| }, |
| { |
| "epoch": 36.94246929541047, |
| "grad_norm": 1.424436092376709, |
| "learning_rate": 0.001, |
| "loss": 1.7624, |
| "step": 114300 |
| }, |
| { |
| "epoch": 36.97478991596638, |
| "grad_norm": 1.5529747009277344, |
| "learning_rate": 0.001, |
| "loss": 1.7481, |
| "step": 114400 |
| }, |
| { |
| "epoch": 37.007110536522305, |
| "grad_norm": 1.3554753065109253, |
| "learning_rate": 0.001, |
| "loss": 1.7531, |
| "step": 114500 |
| }, |
| { |
| "epoch": 37.03943115707822, |
| "grad_norm": 1.5860496759414673, |
| "learning_rate": 0.001, |
| "loss": 1.6318, |
| "step": 114600 |
| }, |
| { |
| "epoch": 37.071751777634134, |
| "grad_norm": 2.0900566577911377, |
| "learning_rate": 0.001, |
| "loss": 1.623, |
| "step": 114700 |
| }, |
| { |
| "epoch": 37.10407239819005, |
| "grad_norm": 1.452841877937317, |
| "learning_rate": 0.001, |
| "loss": 1.6301, |
| "step": 114800 |
| }, |
| { |
| "epoch": 37.13639301874596, |
| "grad_norm": 2.630765914916992, |
| "learning_rate": 0.001, |
| "loss": 1.6386, |
| "step": 114900 |
| }, |
| { |
| "epoch": 37.16871363930188, |
| "grad_norm": 1.6086002588272095, |
| "learning_rate": 0.001, |
| "loss": 1.6525, |
| "step": 115000 |
| }, |
| { |
| "epoch": 37.20103425985779, |
| "grad_norm": 1.5856311321258545, |
| "learning_rate": 0.001, |
| "loss": 1.6452, |
| "step": 115100 |
| }, |
| { |
| "epoch": 37.23335488041371, |
| "grad_norm": 1.3888436555862427, |
| "learning_rate": 0.001, |
| "loss": 1.6605, |
| "step": 115200 |
| }, |
| { |
| "epoch": 37.26567550096962, |
| "grad_norm": 1.6696892976760864, |
| "learning_rate": 0.001, |
| "loss": 1.6635, |
| "step": 115300 |
| }, |
| { |
| "epoch": 37.297996121525536, |
| "grad_norm": 1.3410964012145996, |
| "learning_rate": 0.001, |
| "loss": 1.6879, |
| "step": 115400 |
| }, |
| { |
| "epoch": 37.33031674208145, |
| "grad_norm": 1.9728986024856567, |
| "learning_rate": 0.001, |
| "loss": 1.6818, |
| "step": 115500 |
| }, |
| { |
| "epoch": 37.362637362637365, |
| "grad_norm": 1.2899627685546875, |
| "learning_rate": 0.001, |
| "loss": 1.6637, |
| "step": 115600 |
| }, |
| { |
| "epoch": 37.39495798319328, |
| "grad_norm": 1.4178438186645508, |
| "learning_rate": 0.001, |
| "loss": 1.6829, |
| "step": 115700 |
| }, |
| { |
| "epoch": 37.427278603749194, |
| "grad_norm": 1.2930041551589966, |
| "learning_rate": 0.001, |
| "loss": 1.6989, |
| "step": 115800 |
| }, |
| { |
| "epoch": 37.45959922430511, |
| "grad_norm": 1.5607823133468628, |
| "learning_rate": 0.001, |
| "loss": 1.6994, |
| "step": 115900 |
| }, |
| { |
| "epoch": 37.49191984486102, |
| "grad_norm": 1.3192517757415771, |
| "learning_rate": 0.001, |
| "loss": 1.7011, |
| "step": 116000 |
| }, |
| { |
| "epoch": 37.52424046541694, |
| "grad_norm": 1.385068655014038, |
| "learning_rate": 0.001, |
| "loss": 1.715, |
| "step": 116100 |
| }, |
| { |
| "epoch": 37.55656108597285, |
| "grad_norm": 1.4092013835906982, |
| "learning_rate": 0.001, |
| "loss": 1.7085, |
| "step": 116200 |
| }, |
| { |
| "epoch": 37.58888170652877, |
| "grad_norm": 1.5187952518463135, |
| "learning_rate": 0.001, |
| "loss": 1.6988, |
| "step": 116300 |
| }, |
| { |
| "epoch": 37.62120232708468, |
| "grad_norm": 1.5603824853897095, |
| "learning_rate": 0.001, |
| "loss": 1.6981, |
| "step": 116400 |
| }, |
| { |
| "epoch": 37.653522947640596, |
| "grad_norm": 1.3722445964813232, |
| "learning_rate": 0.001, |
| "loss": 1.7052, |
| "step": 116500 |
| }, |
| { |
| "epoch": 37.68584356819651, |
| "grad_norm": 1.2925854921340942, |
| "learning_rate": 0.001, |
| "loss": 1.7185, |
| "step": 116600 |
| }, |
| { |
| "epoch": 37.718164188752425, |
| "grad_norm": 1.8050371408462524, |
| "learning_rate": 0.001, |
| "loss": 1.7142, |
| "step": 116700 |
| }, |
| { |
| "epoch": 37.75048480930834, |
| "grad_norm": 1.6540824174880981, |
| "learning_rate": 0.001, |
| "loss": 1.7186, |
| "step": 116800 |
| }, |
| { |
| "epoch": 37.782805429864254, |
| "grad_norm": 1.7223883867263794, |
| "learning_rate": 0.001, |
| "loss": 1.7336, |
| "step": 116900 |
| }, |
| { |
| "epoch": 37.81512605042017, |
| "grad_norm": 1.4943795204162598, |
| "learning_rate": 0.001, |
| "loss": 1.7443, |
| "step": 117000 |
| }, |
| { |
| "epoch": 37.84744667097608, |
| "grad_norm": 1.3059314489364624, |
| "learning_rate": 0.001, |
| "loss": 1.7356, |
| "step": 117100 |
| }, |
| { |
| "epoch": 37.879767291532, |
| "grad_norm": 1.4005022048950195, |
| "learning_rate": 0.001, |
| "loss": 1.7175, |
| "step": 117200 |
| }, |
| { |
| "epoch": 37.91208791208791, |
| "grad_norm": 1.3799285888671875, |
| "learning_rate": 0.001, |
| "loss": 1.7465, |
| "step": 117300 |
| }, |
| { |
| "epoch": 37.94440853264383, |
| "grad_norm": 1.4844553470611572, |
| "learning_rate": 0.001, |
| "loss": 1.7428, |
| "step": 117400 |
| }, |
| { |
| "epoch": 37.97672915319974, |
| "grad_norm": 1.2771897315979004, |
| "learning_rate": 0.001, |
| "loss": 1.746, |
| "step": 117500 |
| }, |
| { |
| "epoch": 38.009049773755656, |
| "grad_norm": 1.4916632175445557, |
| "learning_rate": 0.001, |
| "loss": 1.7074, |
| "step": 117600 |
| }, |
| { |
| "epoch": 38.04137039431157, |
| "grad_norm": 1.445297360420227, |
| "learning_rate": 0.001, |
| "loss": 1.5948, |
| "step": 117700 |
| }, |
| { |
| "epoch": 38.073691014867485, |
| "grad_norm": 1.513931155204773, |
| "learning_rate": 0.001, |
| "loss": 1.61, |
| "step": 117800 |
| }, |
| { |
| "epoch": 38.1060116354234, |
| "grad_norm": 1.5125148296356201, |
| "learning_rate": 0.001, |
| "loss": 1.6239, |
| "step": 117900 |
| }, |
| { |
| "epoch": 38.138332255979314, |
| "grad_norm": 1.384259819984436, |
| "learning_rate": 0.001, |
| "loss": 1.6232, |
| "step": 118000 |
| }, |
| { |
| "epoch": 38.17065287653523, |
| "grad_norm": 1.5894758701324463, |
| "learning_rate": 0.001, |
| "loss": 1.6157, |
| "step": 118100 |
| }, |
| { |
| "epoch": 38.20297349709114, |
| "grad_norm": 1.6156755685806274, |
| "learning_rate": 0.001, |
| "loss": 1.638, |
| "step": 118200 |
| }, |
| { |
| "epoch": 38.23529411764706, |
| "grad_norm": 1.5429646968841553, |
| "learning_rate": 0.001, |
| "loss": 1.6424, |
| "step": 118300 |
| }, |
| { |
| "epoch": 38.26761473820297, |
| "grad_norm": 2.129305839538574, |
| "learning_rate": 0.001, |
| "loss": 1.6344, |
| "step": 118400 |
| }, |
| { |
| "epoch": 38.29993535875889, |
| "grad_norm": 1.6252347230911255, |
| "learning_rate": 0.001, |
| "loss": 1.6621, |
| "step": 118500 |
| }, |
| { |
| "epoch": 38.3322559793148, |
| "grad_norm": 1.6291354894638062, |
| "learning_rate": 0.001, |
| "loss": 1.6543, |
| "step": 118600 |
| }, |
| { |
| "epoch": 38.364576599870716, |
| "grad_norm": 1.9039782285690308, |
| "learning_rate": 0.001, |
| "loss": 1.6766, |
| "step": 118700 |
| }, |
| { |
| "epoch": 38.39689722042663, |
| "grad_norm": 1.4002729654312134, |
| "learning_rate": 0.001, |
| "loss": 1.6732, |
| "step": 118800 |
| }, |
| { |
| "epoch": 38.429217840982545, |
| "grad_norm": 1.6930391788482666, |
| "learning_rate": 0.001, |
| "loss": 1.6552, |
| "step": 118900 |
| }, |
| { |
| "epoch": 38.46153846153846, |
| "grad_norm": 1.3052587509155273, |
| "learning_rate": 0.001, |
| "loss": 1.6699, |
| "step": 119000 |
| }, |
| { |
| "epoch": 38.493859082094374, |
| "grad_norm": 1.9328466653823853, |
| "learning_rate": 0.001, |
| "loss": 1.6617, |
| "step": 119100 |
| }, |
| { |
| "epoch": 38.52617970265029, |
| "grad_norm": 1.5333701372146606, |
| "learning_rate": 0.001, |
| "loss": 1.6919, |
| "step": 119200 |
| }, |
| { |
| "epoch": 38.558500323206204, |
| "grad_norm": 1.3492674827575684, |
| "learning_rate": 0.001, |
| "loss": 1.6642, |
| "step": 119300 |
| }, |
| { |
| "epoch": 38.59082094376212, |
| "grad_norm": 1.2537891864776611, |
| "learning_rate": 0.001, |
| "loss": 1.6928, |
| "step": 119400 |
| }, |
| { |
| "epoch": 38.62314156431803, |
| "grad_norm": 1.5430333614349365, |
| "learning_rate": 0.001, |
| "loss": 1.6925, |
| "step": 119500 |
| }, |
| { |
| "epoch": 38.65546218487395, |
| "grad_norm": 1.3039238452911377, |
| "learning_rate": 0.001, |
| "loss": 1.6964, |
| "step": 119600 |
| }, |
| { |
| "epoch": 38.68778280542986, |
| "grad_norm": 1.9416965246200562, |
| "learning_rate": 0.001, |
| "loss": 1.6892, |
| "step": 119700 |
| }, |
| { |
| "epoch": 38.720103425985776, |
| "grad_norm": 1.5908054113388062, |
| "learning_rate": 0.001, |
| "loss": 1.6994, |
| "step": 119800 |
| }, |
| { |
| "epoch": 38.75242404654169, |
| "grad_norm": 1.4828917980194092, |
| "learning_rate": 0.001, |
| "loss": 1.678, |
| "step": 119900 |
| }, |
| { |
| "epoch": 38.784744667097605, |
| "grad_norm": 1.236195683479309, |
| "learning_rate": 0.001, |
| "loss": 1.7161, |
| "step": 120000 |
| }, |
| { |
| "epoch": 38.81706528765352, |
| "grad_norm": 1.6175165176391602, |
| "learning_rate": 0.001, |
| "loss": 1.7076, |
| "step": 120100 |
| }, |
| { |
| "epoch": 38.849385908209435, |
| "grad_norm": 1.4187142848968506, |
| "learning_rate": 0.001, |
| "loss": 1.7213, |
| "step": 120200 |
| }, |
| { |
| "epoch": 38.88170652876535, |
| "grad_norm": 1.615424633026123, |
| "learning_rate": 0.001, |
| "loss": 1.6994, |
| "step": 120300 |
| }, |
| { |
| "epoch": 38.914027149321264, |
| "grad_norm": 1.3184314966201782, |
| "learning_rate": 0.001, |
| "loss": 1.7017, |
| "step": 120400 |
| }, |
| { |
| "epoch": 38.94634776987718, |
| "grad_norm": 2.0182273387908936, |
| "learning_rate": 0.001, |
| "loss": 1.7112, |
| "step": 120500 |
| }, |
| { |
| "epoch": 38.97866839043309, |
| "grad_norm": 1.53033447265625, |
| "learning_rate": 0.001, |
| "loss": 1.7221, |
| "step": 120600 |
| }, |
| { |
| "epoch": 39.010989010989015, |
| "grad_norm": 1.5735342502593994, |
| "learning_rate": 0.001, |
| "loss": 1.6814, |
| "step": 120700 |
| }, |
| { |
| "epoch": 39.04330963154493, |
| "grad_norm": 1.396195411682129, |
| "learning_rate": 0.001, |
| "loss": 1.5669, |
| "step": 120800 |
| }, |
| { |
| "epoch": 39.075630252100844, |
| "grad_norm": 1.510571002960205, |
| "learning_rate": 0.001, |
| "loss": 1.5803, |
| "step": 120900 |
| }, |
| { |
| "epoch": 39.10795087265676, |
| "grad_norm": 1.533634066581726, |
| "learning_rate": 0.001, |
| "loss": 1.5828, |
| "step": 121000 |
| }, |
| { |
| "epoch": 39.14027149321267, |
| "grad_norm": 1.4806658029556274, |
| "learning_rate": 0.001, |
| "loss": 1.5861, |
| "step": 121100 |
| }, |
| { |
| "epoch": 39.17259211376859, |
| "grad_norm": 1.7448620796203613, |
| "learning_rate": 0.001, |
| "loss": 1.6208, |
| "step": 121200 |
| }, |
| { |
| "epoch": 39.2049127343245, |
| "grad_norm": 1.800340175628662, |
| "learning_rate": 0.001, |
| "loss": 1.6287, |
| "step": 121300 |
| }, |
| { |
| "epoch": 39.237233354880416, |
| "grad_norm": 1.4154140949249268, |
| "learning_rate": 0.001, |
| "loss": 1.6252, |
| "step": 121400 |
| }, |
| { |
| "epoch": 39.26955397543633, |
| "grad_norm": 1.729437232017517, |
| "learning_rate": 0.001, |
| "loss": 1.6358, |
| "step": 121500 |
| }, |
| { |
| "epoch": 39.301874595992246, |
| "grad_norm": 1.3439644575119019, |
| "learning_rate": 0.001, |
| "loss": 1.6278, |
| "step": 121600 |
| }, |
| { |
| "epoch": 39.33419521654816, |
| "grad_norm": 2.249131441116333, |
| "learning_rate": 0.001, |
| "loss": 1.6345, |
| "step": 121700 |
| }, |
| { |
| "epoch": 39.366515837104075, |
| "grad_norm": 1.8861782550811768, |
| "learning_rate": 0.001, |
| "loss": 1.6142, |
| "step": 121800 |
| }, |
| { |
| "epoch": 39.39883645765999, |
| "grad_norm": 1.7923866510391235, |
| "learning_rate": 0.001, |
| "loss": 1.6408, |
| "step": 121900 |
| }, |
| { |
| "epoch": 39.431157078215904, |
| "grad_norm": 1.8089905977249146, |
| "learning_rate": 0.001, |
| "loss": 1.625, |
| "step": 122000 |
| }, |
| { |
| "epoch": 39.46347769877182, |
| "grad_norm": 1.6108099222183228, |
| "learning_rate": 0.001, |
| "loss": 1.6471, |
| "step": 122100 |
| }, |
| { |
| "epoch": 39.49579831932773, |
| "grad_norm": 1.8288142681121826, |
| "learning_rate": 0.001, |
| "loss": 1.6492, |
| "step": 122200 |
| }, |
| { |
| "epoch": 39.52811893988365, |
| "grad_norm": 1.6648753881454468, |
| "learning_rate": 0.001, |
| "loss": 1.6743, |
| "step": 122300 |
| }, |
| { |
| "epoch": 39.56043956043956, |
| "grad_norm": 1.447311282157898, |
| "learning_rate": 0.001, |
| "loss": 1.6649, |
| "step": 122400 |
| }, |
| { |
| "epoch": 39.59276018099548, |
| "grad_norm": 1.5929150581359863, |
| "learning_rate": 0.001, |
| "loss": 1.6768, |
| "step": 122500 |
| }, |
| { |
| "epoch": 39.62508080155139, |
| "grad_norm": 1.5306599140167236, |
| "learning_rate": 0.001, |
| "loss": 1.6823, |
| "step": 122600 |
| }, |
| { |
| "epoch": 39.657401422107306, |
| "grad_norm": 2.1185336112976074, |
| "learning_rate": 0.001, |
| "loss": 1.647, |
| "step": 122700 |
| }, |
| { |
| "epoch": 39.68972204266322, |
| "grad_norm": 1.5616239309310913, |
| "learning_rate": 0.001, |
| "loss": 1.6811, |
| "step": 122800 |
| }, |
| { |
| "epoch": 39.722042663219135, |
| "grad_norm": 1.610378623008728, |
| "learning_rate": 0.001, |
| "loss": 1.682, |
| "step": 122900 |
| }, |
| { |
| "epoch": 39.75436328377505, |
| "grad_norm": 1.679341197013855, |
| "learning_rate": 0.001, |
| "loss": 1.6773, |
| "step": 123000 |
| }, |
| { |
| "epoch": 39.786683904330964, |
| "grad_norm": 1.6334840059280396, |
| "learning_rate": 0.001, |
| "loss": 1.6717, |
| "step": 123100 |
| }, |
| { |
| "epoch": 39.81900452488688, |
| "grad_norm": 1.4154548645019531, |
| "learning_rate": 0.001, |
| "loss": 1.6878, |
| "step": 123200 |
| }, |
| { |
| "epoch": 39.85132514544279, |
| "grad_norm": 1.5215219259262085, |
| "learning_rate": 0.001, |
| "loss": 1.6776, |
| "step": 123300 |
| }, |
| { |
| "epoch": 39.88364576599871, |
| "grad_norm": 1.4763894081115723, |
| "learning_rate": 0.001, |
| "loss": 1.6989, |
| "step": 123400 |
| }, |
| { |
| "epoch": 39.91596638655462, |
| "grad_norm": 1.5995749235153198, |
| "learning_rate": 0.001, |
| "loss": 1.6769, |
| "step": 123500 |
| }, |
| { |
| "epoch": 39.94828700711054, |
| "grad_norm": 1.726880431175232, |
| "learning_rate": 0.001, |
| "loss": 1.7047, |
| "step": 123600 |
| }, |
| { |
| "epoch": 39.98060762766645, |
| "grad_norm": 2.136265754699707, |
| "learning_rate": 0.001, |
| "loss": 1.7003, |
| "step": 123700 |
| }, |
| { |
| "epoch": 40.012928248222366, |
| "grad_norm": 1.5406869649887085, |
| "learning_rate": 0.001, |
| "loss": 1.6333, |
| "step": 123800 |
| }, |
| { |
| "epoch": 40.04524886877828, |
| "grad_norm": 1.669394850730896, |
| "learning_rate": 0.001, |
| "loss": 1.5618, |
| "step": 123900 |
| }, |
| { |
| "epoch": 40.077569489334195, |
| "grad_norm": 1.9656615257263184, |
| "learning_rate": 0.001, |
| "loss": 1.5684, |
| "step": 124000 |
| }, |
| { |
| "epoch": 40.10989010989011, |
| "grad_norm": 1.5017229318618774, |
| "learning_rate": 0.001, |
| "loss": 1.5721, |
| "step": 124100 |
| }, |
| { |
| "epoch": 40.142210730446024, |
| "grad_norm": 1.9601807594299316, |
| "learning_rate": 0.001, |
| "loss": 1.5763, |
| "step": 124200 |
| }, |
| { |
| "epoch": 40.17453135100194, |
| "grad_norm": 1.748874545097351, |
| "learning_rate": 0.001, |
| "loss": 1.5934, |
| "step": 124300 |
| }, |
| { |
| "epoch": 40.20685197155785, |
| "grad_norm": 1.5721076726913452, |
| "learning_rate": 0.001, |
| "loss": 1.5867, |
| "step": 124400 |
| }, |
| { |
| "epoch": 40.23917259211377, |
| "grad_norm": 2.065415620803833, |
| "learning_rate": 0.001, |
| "loss": 1.5856, |
| "step": 124500 |
| }, |
| { |
| "epoch": 40.27149321266968, |
| "grad_norm": 1.926713466644287, |
| "learning_rate": 0.001, |
| "loss": 1.6055, |
| "step": 124600 |
| }, |
| { |
| "epoch": 40.3038138332256, |
| "grad_norm": 1.827108383178711, |
| "learning_rate": 0.001, |
| "loss": 1.5936, |
| "step": 124700 |
| }, |
| { |
| "epoch": 40.33613445378151, |
| "grad_norm": 2.123699426651001, |
| "learning_rate": 0.001, |
| "loss": 1.6068, |
| "step": 124800 |
| }, |
| { |
| "epoch": 40.368455074337426, |
| "grad_norm": 2.119037628173828, |
| "learning_rate": 0.001, |
| "loss": 1.6113, |
| "step": 124900 |
| }, |
| { |
| "epoch": 40.40077569489334, |
| "grad_norm": 1.7764804363250732, |
| "learning_rate": 0.001, |
| "loss": 1.6156, |
| "step": 125000 |
| }, |
| { |
| "epoch": 40.433096315449255, |
| "grad_norm": 1.7340418100357056, |
| "learning_rate": 0.001, |
| "loss": 1.6309, |
| "step": 125100 |
| }, |
| { |
| "epoch": 40.46541693600517, |
| "grad_norm": 2.1750295162200928, |
| "learning_rate": 0.001, |
| "loss": 1.6165, |
| "step": 125200 |
| }, |
| { |
| "epoch": 40.497737556561084, |
| "grad_norm": 1.8627874851226807, |
| "learning_rate": 0.001, |
| "loss": 1.6342, |
| "step": 125300 |
| }, |
| { |
| "epoch": 40.530058177117, |
| "grad_norm": 2.254770517349243, |
| "learning_rate": 0.001, |
| "loss": 1.64, |
| "step": 125400 |
| }, |
| { |
| "epoch": 40.56237879767291, |
| "grad_norm": 1.959664225578308, |
| "learning_rate": 0.001, |
| "loss": 1.6451, |
| "step": 125500 |
| }, |
| { |
| "epoch": 40.59469941822883, |
| "grad_norm": 1.7636991739273071, |
| "learning_rate": 0.001, |
| "loss": 1.6386, |
| "step": 125600 |
| }, |
| { |
| "epoch": 40.62702003878474, |
| "grad_norm": 1.5677493810653687, |
| "learning_rate": 0.001, |
| "loss": 1.6454, |
| "step": 125700 |
| }, |
| { |
| "epoch": 40.65934065934066, |
| "grad_norm": 2.089933156967163, |
| "learning_rate": 0.001, |
| "loss": 1.6412, |
| "step": 125800 |
| }, |
| { |
| "epoch": 40.69166127989657, |
| "grad_norm": 1.8187321424484253, |
| "learning_rate": 0.001, |
| "loss": 1.6429, |
| "step": 125900 |
| }, |
| { |
| "epoch": 40.723981900452486, |
| "grad_norm": 1.580985426902771, |
| "learning_rate": 0.001, |
| "loss": 1.6749, |
| "step": 126000 |
| }, |
| { |
| "epoch": 40.7563025210084, |
| "grad_norm": 1.7752652168273926, |
| "learning_rate": 0.001, |
| "loss": 1.6616, |
| "step": 126100 |
| }, |
| { |
| "epoch": 40.788623141564315, |
| "grad_norm": 2.0827362537384033, |
| "learning_rate": 0.001, |
| "loss": 1.6555, |
| "step": 126200 |
| }, |
| { |
| "epoch": 40.82094376212023, |
| "grad_norm": 1.8399465084075928, |
| "learning_rate": 0.001, |
| "loss": 1.6663, |
| "step": 126300 |
| }, |
| { |
| "epoch": 40.853264382676144, |
| "grad_norm": 1.5653818845748901, |
| "learning_rate": 0.001, |
| "loss": 1.6752, |
| "step": 126400 |
| }, |
| { |
| "epoch": 40.88558500323206, |
| "grad_norm": 2.2567121982574463, |
| "learning_rate": 0.001, |
| "loss": 1.6658, |
| "step": 126500 |
| }, |
| { |
| "epoch": 40.91790562378797, |
| "grad_norm": 1.8558109998703003, |
| "learning_rate": 0.001, |
| "loss": 1.6766, |
| "step": 126600 |
| }, |
| { |
| "epoch": 40.95022624434389, |
| "grad_norm": 1.7659657001495361, |
| "learning_rate": 0.001, |
| "loss": 1.6761, |
| "step": 126700 |
| }, |
| { |
| "epoch": 40.9825468648998, |
| "grad_norm": 1.8757858276367188, |
| "learning_rate": 0.001, |
| "loss": 1.6769, |
| "step": 126800 |
| }, |
| { |
| "epoch": 41.014867485455724, |
| "grad_norm": 2.512212038040161, |
| "learning_rate": 0.001, |
| "loss": 1.6044, |
| "step": 126900 |
| }, |
| { |
| "epoch": 41.04718810601164, |
| "grad_norm": 2.0472733974456787, |
| "learning_rate": 0.001, |
| "loss": 1.541, |
| "step": 127000 |
| }, |
| { |
| "epoch": 41.07950872656755, |
| "grad_norm": 1.9753613471984863, |
| "learning_rate": 0.001, |
| "loss": 1.5501, |
| "step": 127100 |
| }, |
| { |
| "epoch": 41.11182934712347, |
| "grad_norm": 1.7416068315505981, |
| "learning_rate": 0.001, |
| "loss": 1.5391, |
| "step": 127200 |
| }, |
| { |
| "epoch": 41.14414996767938, |
| "grad_norm": 2.020958662033081, |
| "learning_rate": 0.001, |
| "loss": 1.55, |
| "step": 127300 |
| }, |
| { |
| "epoch": 41.1764705882353, |
| "grad_norm": 1.6599868535995483, |
| "learning_rate": 0.001, |
| "loss": 1.5525, |
| "step": 127400 |
| }, |
| { |
| "epoch": 41.20879120879121, |
| "grad_norm": 2.1170713901519775, |
| "learning_rate": 0.001, |
| "loss": 1.5644, |
| "step": 127500 |
| }, |
| { |
| "epoch": 41.241111829347126, |
| "grad_norm": 2.277888298034668, |
| "learning_rate": 0.001, |
| "loss": 1.5721, |
| "step": 127600 |
| }, |
| { |
| "epoch": 41.27343244990304, |
| "grad_norm": 2.1243207454681396, |
| "learning_rate": 0.001, |
| "loss": 1.585, |
| "step": 127700 |
| }, |
| { |
| "epoch": 41.305753070458955, |
| "grad_norm": 1.9793413877487183, |
| "learning_rate": 0.001, |
| "loss": 1.5874, |
| "step": 127800 |
| }, |
| { |
| "epoch": 41.33807369101487, |
| "grad_norm": 2.173837661743164, |
| "learning_rate": 0.001, |
| "loss": 1.6025, |
| "step": 127900 |
| }, |
| { |
| "epoch": 41.370394311570784, |
| "grad_norm": 1.8958253860473633, |
| "learning_rate": 0.001, |
| "loss": 1.5792, |
| "step": 128000 |
| }, |
| { |
| "epoch": 41.4027149321267, |
| "grad_norm": 1.8988944292068481, |
| "learning_rate": 0.001, |
| "loss": 1.5951, |
| "step": 128100 |
| }, |
| { |
| "epoch": 41.43503555268261, |
| "grad_norm": 2.107556104660034, |
| "learning_rate": 0.001, |
| "loss": 1.6001, |
| "step": 128200 |
| }, |
| { |
| "epoch": 41.46735617323853, |
| "grad_norm": 1.8033684492111206, |
| "learning_rate": 0.001, |
| "loss": 1.5962, |
| "step": 128300 |
| }, |
| { |
| "epoch": 41.49967679379444, |
| "grad_norm": 2.004406213760376, |
| "learning_rate": 0.001, |
| "loss": 1.6003, |
| "step": 128400 |
| }, |
| { |
| "epoch": 41.53199741435036, |
| "grad_norm": 1.7807986736297607, |
| "learning_rate": 0.001, |
| "loss": 1.6142, |
| "step": 128500 |
| }, |
| { |
| "epoch": 41.56431803490627, |
| "grad_norm": 2.0727765560150146, |
| "learning_rate": 0.001, |
| "loss": 1.6103, |
| "step": 128600 |
| }, |
| { |
| "epoch": 41.596638655462186, |
| "grad_norm": 1.7568210363388062, |
| "learning_rate": 0.001, |
| "loss": 1.6313, |
| "step": 128700 |
| }, |
| { |
| "epoch": 41.6289592760181, |
| "grad_norm": 1.965510606765747, |
| "learning_rate": 0.001, |
| "loss": 1.6206, |
| "step": 128800 |
| }, |
| { |
| "epoch": 41.661279896574015, |
| "grad_norm": 1.6988037824630737, |
| "learning_rate": 0.001, |
| "loss": 1.6457, |
| "step": 128900 |
| }, |
| { |
| "epoch": 41.69360051712993, |
| "grad_norm": 2.4268720149993896, |
| "learning_rate": 0.001, |
| "loss": 1.6157, |
| "step": 129000 |
| }, |
| { |
| "epoch": 41.725921137685845, |
| "grad_norm": 2.0334534645080566, |
| "learning_rate": 0.001, |
| "loss": 1.6322, |
| "step": 129100 |
| }, |
| { |
| "epoch": 41.75824175824176, |
| "grad_norm": 2.085456371307373, |
| "learning_rate": 0.001, |
| "loss": 1.6448, |
| "step": 129200 |
| }, |
| { |
| "epoch": 41.790562378797674, |
| "grad_norm": 2.056472063064575, |
| "learning_rate": 0.001, |
| "loss": 1.6533, |
| "step": 129300 |
| }, |
| { |
| "epoch": 41.82288299935359, |
| "grad_norm": 2.280672073364258, |
| "learning_rate": 0.001, |
| "loss": 1.6466, |
| "step": 129400 |
| }, |
| { |
| "epoch": 41.8552036199095, |
| "grad_norm": 1.9739775657653809, |
| "learning_rate": 0.001, |
| "loss": 1.647, |
| "step": 129500 |
| }, |
| { |
| "epoch": 41.88752424046542, |
| "grad_norm": 2.5622267723083496, |
| "learning_rate": 0.001, |
| "loss": 1.6521, |
| "step": 129600 |
| }, |
| { |
| "epoch": 41.91984486102133, |
| "grad_norm": 1.893709421157837, |
| "learning_rate": 0.001, |
| "loss": 1.6407, |
| "step": 129700 |
| }, |
| { |
| "epoch": 41.95216548157725, |
| "grad_norm": 1.7965614795684814, |
| "learning_rate": 0.001, |
| "loss": 1.6568, |
| "step": 129800 |
| }, |
| { |
| "epoch": 41.98448610213316, |
| "grad_norm": 2.208484411239624, |
| "learning_rate": 0.001, |
| "loss": 1.6759, |
| "step": 129900 |
| }, |
| { |
| "epoch": 42.016806722689076, |
| "grad_norm": 2.078091859817505, |
| "learning_rate": 0.001, |
| "loss": 1.5652, |
| "step": 130000 |
| }, |
| { |
| "epoch": 42.04912734324499, |
| "grad_norm": 1.9855774641036987, |
| "learning_rate": 0.001, |
| "loss": 1.5017, |
| "step": 130100 |
| }, |
| { |
| "epoch": 42.081447963800905, |
| "grad_norm": 2.0214784145355225, |
| "learning_rate": 0.001, |
| "loss": 1.545, |
| "step": 130200 |
| }, |
| { |
| "epoch": 42.11376858435682, |
| "grad_norm": 2.2106988430023193, |
| "learning_rate": 0.001, |
| "loss": 1.5264, |
| "step": 130300 |
| }, |
| { |
| "epoch": 42.146089204912734, |
| "grad_norm": 2.208866834640503, |
| "learning_rate": 0.001, |
| "loss": 1.5276, |
| "step": 130400 |
| }, |
| { |
| "epoch": 42.17840982546865, |
| "grad_norm": 2.6902999877929688, |
| "learning_rate": 0.001, |
| "loss": 1.5549, |
| "step": 130500 |
| }, |
| { |
| "epoch": 42.21073044602456, |
| "grad_norm": 2.041752576828003, |
| "learning_rate": 0.001, |
| "loss": 1.5361, |
| "step": 130600 |
| }, |
| { |
| "epoch": 42.24305106658048, |
| "grad_norm": 1.764216423034668, |
| "learning_rate": 0.001, |
| "loss": 1.5404, |
| "step": 130700 |
| }, |
| { |
| "epoch": 42.27537168713639, |
| "grad_norm": 2.0946340560913086, |
| "learning_rate": 0.001, |
| "loss": 1.5454, |
| "step": 130800 |
| }, |
| { |
| "epoch": 42.30769230769231, |
| "grad_norm": 2.1806182861328125, |
| "learning_rate": 0.001, |
| "loss": 1.5488, |
| "step": 130900 |
| }, |
| { |
| "epoch": 42.34001292824822, |
| "grad_norm": 1.8357605934143066, |
| "learning_rate": 0.001, |
| "loss": 1.572, |
| "step": 131000 |
| }, |
| { |
| "epoch": 42.372333548804136, |
| "grad_norm": 2.311324119567871, |
| "learning_rate": 0.001, |
| "loss": 1.5771, |
| "step": 131100 |
| }, |
| { |
| "epoch": 42.40465416936005, |
| "grad_norm": 2.406909465789795, |
| "learning_rate": 0.001, |
| "loss": 1.588, |
| "step": 131200 |
| }, |
| { |
| "epoch": 42.436974789915965, |
| "grad_norm": 2.6602001190185547, |
| "learning_rate": 0.001, |
| "loss": 1.5705, |
| "step": 131300 |
| }, |
| { |
| "epoch": 42.46929541047188, |
| "grad_norm": 2.230372428894043, |
| "learning_rate": 0.001, |
| "loss": 1.5834, |
| "step": 131400 |
| }, |
| { |
| "epoch": 42.501616031027794, |
| "grad_norm": 2.017826557159424, |
| "learning_rate": 0.001, |
| "loss": 1.5873, |
| "step": 131500 |
| }, |
| { |
| "epoch": 42.53393665158371, |
| "grad_norm": 2.3046560287475586, |
| "learning_rate": 0.001, |
| "loss": 1.5883, |
| "step": 131600 |
| }, |
| { |
| "epoch": 42.56625727213962, |
| "grad_norm": 2.286146402359009, |
| "learning_rate": 0.001, |
| "loss": 1.6031, |
| "step": 131700 |
| }, |
| { |
| "epoch": 42.59857789269554, |
| "grad_norm": 2.2403616905212402, |
| "learning_rate": 0.001, |
| "loss": 1.6109, |
| "step": 131800 |
| }, |
| { |
| "epoch": 42.63089851325145, |
| "grad_norm": 2.069786787033081, |
| "learning_rate": 0.001, |
| "loss": 1.6011, |
| "step": 131900 |
| }, |
| { |
| "epoch": 42.66321913380737, |
| "grad_norm": 2.289213180541992, |
| "learning_rate": 0.001, |
| "loss": 1.6092, |
| "step": 132000 |
| }, |
| { |
| "epoch": 42.69553975436328, |
| "grad_norm": 2.493170738220215, |
| "learning_rate": 0.001, |
| "loss": 1.608, |
| "step": 132100 |
| }, |
| { |
| "epoch": 42.727860374919196, |
| "grad_norm": 2.157357931137085, |
| "learning_rate": 0.001, |
| "loss": 1.6024, |
| "step": 132200 |
| }, |
| { |
| "epoch": 42.76018099547511, |
| "grad_norm": 1.8655034303665161, |
| "learning_rate": 0.001, |
| "loss": 1.6296, |
| "step": 132300 |
| }, |
| { |
| "epoch": 42.792501616031025, |
| "grad_norm": 1.8031189441680908, |
| "learning_rate": 0.001, |
| "loss": 1.623, |
| "step": 132400 |
| }, |
| { |
| "epoch": 42.82482223658694, |
| "grad_norm": 1.9016975164413452, |
| "learning_rate": 0.001, |
| "loss": 1.6318, |
| "step": 132500 |
| }, |
| { |
| "epoch": 42.857142857142854, |
| "grad_norm": 2.0630972385406494, |
| "learning_rate": 0.001, |
| "loss": 1.6342, |
| "step": 132600 |
| }, |
| { |
| "epoch": 42.88946347769877, |
| "grad_norm": 2.427926778793335, |
| "learning_rate": 0.001, |
| "loss": 1.6399, |
| "step": 132700 |
| }, |
| { |
| "epoch": 42.92178409825468, |
| "grad_norm": 2.0053133964538574, |
| "learning_rate": 0.001, |
| "loss": 1.6561, |
| "step": 132800 |
| }, |
| { |
| "epoch": 42.9541047188106, |
| "grad_norm": 2.1016571521759033, |
| "learning_rate": 0.001, |
| "loss": 1.6391, |
| "step": 132900 |
| }, |
| { |
| "epoch": 42.98642533936652, |
| "grad_norm": 2.22855281829834, |
| "learning_rate": 0.001, |
| "loss": 1.6486, |
| "step": 133000 |
| }, |
| { |
| "epoch": 43.018745959922434, |
| "grad_norm": 1.6028803586959839, |
| "learning_rate": 0.001, |
| "loss": 1.5826, |
| "step": 133100 |
| }, |
| { |
| "epoch": 43.05106658047835, |
| "grad_norm": 2.0032293796539307, |
| "learning_rate": 0.001, |
| "loss": 1.4941, |
| "step": 133200 |
| }, |
| { |
| "epoch": 43.08338720103426, |
| "grad_norm": 1.550441861152649, |
| "learning_rate": 0.001, |
| "loss": 1.527, |
| "step": 133300 |
| }, |
| { |
| "epoch": 43.11570782159018, |
| "grad_norm": 1.429332971572876, |
| "learning_rate": 0.001, |
| "loss": 1.5175, |
| "step": 133400 |
| }, |
| { |
| "epoch": 43.14802844214609, |
| "grad_norm": 1.6169044971466064, |
| "learning_rate": 0.001, |
| "loss": 1.5182, |
| "step": 133500 |
| }, |
| { |
| "epoch": 43.18034906270201, |
| "grad_norm": 1.5943845510482788, |
| "learning_rate": 0.001, |
| "loss": 1.5155, |
| "step": 133600 |
| }, |
| { |
| "epoch": 43.21266968325792, |
| "grad_norm": 5.721950531005859, |
| "learning_rate": 0.001, |
| "loss": 1.5268, |
| "step": 133700 |
| }, |
| { |
| "epoch": 43.244990303813836, |
| "grad_norm": 1.472016453742981, |
| "learning_rate": 0.001, |
| "loss": 1.5302, |
| "step": 133800 |
| }, |
| { |
| "epoch": 43.27731092436975, |
| "grad_norm": 1.616565227508545, |
| "learning_rate": 0.001, |
| "loss": 1.5384, |
| "step": 133900 |
| }, |
| { |
| "epoch": 43.309631544925665, |
| "grad_norm": 1.4653263092041016, |
| "learning_rate": 0.001, |
| "loss": 1.5375, |
| "step": 134000 |
| }, |
| { |
| "epoch": 43.34195216548158, |
| "grad_norm": 1.5831880569458008, |
| "learning_rate": 0.001, |
| "loss": 1.5442, |
| "step": 134100 |
| }, |
| { |
| "epoch": 43.374272786037494, |
| "grad_norm": 2.1112189292907715, |
| "learning_rate": 0.001, |
| "loss": 1.5827, |
| "step": 134200 |
| }, |
| { |
| "epoch": 43.40659340659341, |
| "grad_norm": 1.6573975086212158, |
| "learning_rate": 0.001, |
| "loss": 1.5598, |
| "step": 134300 |
| }, |
| { |
| "epoch": 43.43891402714932, |
| "grad_norm": 1.8012890815734863, |
| "learning_rate": 0.001, |
| "loss": 1.5378, |
| "step": 134400 |
| }, |
| { |
| "epoch": 43.47123464770524, |
| "grad_norm": 1.6912004947662354, |
| "learning_rate": 0.001, |
| "loss": 1.5774, |
| "step": 134500 |
| }, |
| { |
| "epoch": 43.50355526826115, |
| "grad_norm": 1.7754735946655273, |
| "learning_rate": 0.001, |
| "loss": 1.5764, |
| "step": 134600 |
| }, |
| { |
| "epoch": 43.53587588881707, |
| "grad_norm": 2.1237239837646484, |
| "learning_rate": 0.001, |
| "loss": 1.576, |
| "step": 134700 |
| }, |
| { |
| "epoch": 43.56819650937298, |
| "grad_norm": 1.9989526271820068, |
| "learning_rate": 0.001, |
| "loss": 1.5701, |
| "step": 134800 |
| }, |
| { |
| "epoch": 43.600517129928896, |
| "grad_norm": 1.6959154605865479, |
| "learning_rate": 0.001, |
| "loss": 1.5828, |
| "step": 134900 |
| }, |
| { |
| "epoch": 43.63283775048481, |
| "grad_norm": 1.4177024364471436, |
| "learning_rate": 0.001, |
| "loss": 1.5929, |
| "step": 135000 |
| }, |
| { |
| "epoch": 43.665158371040725, |
| "grad_norm": 1.2994517087936401, |
| "learning_rate": 0.001, |
| "loss": 1.5947, |
| "step": 135100 |
| }, |
| { |
| "epoch": 43.69747899159664, |
| "grad_norm": 1.7819082736968994, |
| "learning_rate": 0.001, |
| "loss": 1.5897, |
| "step": 135200 |
| }, |
| { |
| "epoch": 43.729799612152554, |
| "grad_norm": 1.472542405128479, |
| "learning_rate": 0.001, |
| "loss": 1.6063, |
| "step": 135300 |
| }, |
| { |
| "epoch": 43.76212023270847, |
| "grad_norm": 1.7978127002716064, |
| "learning_rate": 0.001, |
| "loss": 1.5995, |
| "step": 135400 |
| }, |
| { |
| "epoch": 43.79444085326438, |
| "grad_norm": 1.496368169784546, |
| "learning_rate": 0.001, |
| "loss": 1.6055, |
| "step": 135500 |
| }, |
| { |
| "epoch": 43.8267614738203, |
| "grad_norm": 2.0534775257110596, |
| "learning_rate": 0.001, |
| "loss": 1.624, |
| "step": 135600 |
| }, |
| { |
| "epoch": 43.85908209437621, |
| "grad_norm": 1.7040880918502808, |
| "learning_rate": 0.001, |
| "loss": 1.6129, |
| "step": 135700 |
| }, |
| { |
| "epoch": 43.89140271493213, |
| "grad_norm": 1.5424871444702148, |
| "learning_rate": 0.001, |
| "loss": 1.6121, |
| "step": 135800 |
| }, |
| { |
| "epoch": 43.92372333548804, |
| "grad_norm": 1.6452441215515137, |
| "learning_rate": 0.001, |
| "loss": 1.6367, |
| "step": 135900 |
| }, |
| { |
| "epoch": 43.956043956043956, |
| "grad_norm": 1.9150134325027466, |
| "learning_rate": 0.001, |
| "loss": 1.6305, |
| "step": 136000 |
| }, |
| { |
| "epoch": 43.98836457659987, |
| "grad_norm": 1.769917607307434, |
| "learning_rate": 0.001, |
| "loss": 1.6333, |
| "step": 136100 |
| }, |
| { |
| "epoch": 44.020685197155785, |
| "grad_norm": 1.7169297933578491, |
| "learning_rate": 0.001, |
| "loss": 1.5414, |
| "step": 136200 |
| }, |
| { |
| "epoch": 44.0530058177117, |
| "grad_norm": 1.613829255104065, |
| "learning_rate": 0.001, |
| "loss": 1.494, |
| "step": 136300 |
| }, |
| { |
| "epoch": 44.085326438267614, |
| "grad_norm": 1.3439263105392456, |
| "learning_rate": 0.001, |
| "loss": 1.488, |
| "step": 136400 |
| }, |
| { |
| "epoch": 44.11764705882353, |
| "grad_norm": 1.3409062623977661, |
| "learning_rate": 0.001, |
| "loss": 1.5025, |
| "step": 136500 |
| }, |
| { |
| "epoch": 44.14996767937944, |
| "grad_norm": 1.8013584613800049, |
| "learning_rate": 0.001, |
| "loss": 1.4958, |
| "step": 136600 |
| }, |
| { |
| "epoch": 44.18228829993536, |
| "grad_norm": 1.5000190734863281, |
| "learning_rate": 0.001, |
| "loss": 1.5028, |
| "step": 136700 |
| }, |
| { |
| "epoch": 44.21460892049127, |
| "grad_norm": 1.672287940979004, |
| "learning_rate": 0.001, |
| "loss": 1.5285, |
| "step": 136800 |
| }, |
| { |
| "epoch": 44.24692954104719, |
| "grad_norm": 1.3260489702224731, |
| "learning_rate": 0.001, |
| "loss": 1.5245, |
| "step": 136900 |
| }, |
| { |
| "epoch": 44.2792501616031, |
| "grad_norm": 1.5546982288360596, |
| "learning_rate": 0.001, |
| "loss": 1.5327, |
| "step": 137000 |
| }, |
| { |
| "epoch": 44.311570782159016, |
| "grad_norm": 1.8142194747924805, |
| "learning_rate": 0.001, |
| "loss": 1.5384, |
| "step": 137100 |
| }, |
| { |
| "epoch": 44.34389140271493, |
| "grad_norm": 1.5028103590011597, |
| "learning_rate": 0.001, |
| "loss": 1.5345, |
| "step": 137200 |
| }, |
| { |
| "epoch": 44.376212023270845, |
| "grad_norm": 1.577204704284668, |
| "learning_rate": 0.001, |
| "loss": 1.5437, |
| "step": 137300 |
| }, |
| { |
| "epoch": 44.40853264382676, |
| "grad_norm": 1.8334197998046875, |
| "learning_rate": 0.001, |
| "loss": 1.5337, |
| "step": 137400 |
| }, |
| { |
| "epoch": 44.440853264382675, |
| "grad_norm": 1.5087043046951294, |
| "learning_rate": 0.001, |
| "loss": 1.5548, |
| "step": 137500 |
| }, |
| { |
| "epoch": 44.47317388493859, |
| "grad_norm": 1.5817244052886963, |
| "learning_rate": 0.001, |
| "loss": 1.5527, |
| "step": 137600 |
| }, |
| { |
| "epoch": 44.505494505494504, |
| "grad_norm": 1.6286990642547607, |
| "learning_rate": 0.001, |
| "loss": 1.5375, |
| "step": 137700 |
| }, |
| { |
| "epoch": 44.53781512605042, |
| "grad_norm": 1.6903146505355835, |
| "learning_rate": 0.001, |
| "loss": 1.555, |
| "step": 137800 |
| }, |
| { |
| "epoch": 44.57013574660633, |
| "grad_norm": 1.5768530368804932, |
| "learning_rate": 0.001, |
| "loss": 1.5537, |
| "step": 137900 |
| }, |
| { |
| "epoch": 44.60245636716225, |
| "grad_norm": 1.6440843343734741, |
| "learning_rate": 0.001, |
| "loss": 1.5767, |
| "step": 138000 |
| }, |
| { |
| "epoch": 44.63477698771816, |
| "grad_norm": 1.4411373138427734, |
| "learning_rate": 0.001, |
| "loss": 1.5582, |
| "step": 138100 |
| }, |
| { |
| "epoch": 44.66709760827408, |
| "grad_norm": 6.931090354919434, |
| "learning_rate": 0.001, |
| "loss": 1.5674, |
| "step": 138200 |
| }, |
| { |
| "epoch": 44.69941822882999, |
| "grad_norm": 1.7121952772140503, |
| "learning_rate": 0.001, |
| "loss": 1.5849, |
| "step": 138300 |
| }, |
| { |
| "epoch": 44.731738849385906, |
| "grad_norm": 1.3487331867218018, |
| "learning_rate": 0.001, |
| "loss": 1.581, |
| "step": 138400 |
| }, |
| { |
| "epoch": 44.76405946994182, |
| "grad_norm": 1.4686524868011475, |
| "learning_rate": 0.001, |
| "loss": 1.5855, |
| "step": 138500 |
| }, |
| { |
| "epoch": 44.796380090497735, |
| "grad_norm": 1.3534255027770996, |
| "learning_rate": 0.001, |
| "loss": 1.5878, |
| "step": 138600 |
| }, |
| { |
| "epoch": 44.82870071105365, |
| "grad_norm": 1.4645819664001465, |
| "learning_rate": 0.001, |
| "loss": 1.5831, |
| "step": 138700 |
| }, |
| { |
| "epoch": 44.861021331609564, |
| "grad_norm": 1.9700833559036255, |
| "learning_rate": 0.001, |
| "loss": 1.5986, |
| "step": 138800 |
| }, |
| { |
| "epoch": 44.89334195216548, |
| "grad_norm": 1.4428085088729858, |
| "learning_rate": 0.001, |
| "loss": 1.5975, |
| "step": 138900 |
| }, |
| { |
| "epoch": 44.92566257272139, |
| "grad_norm": 1.724913239479065, |
| "learning_rate": 0.001, |
| "loss": 1.601, |
| "step": 139000 |
| }, |
| { |
| "epoch": 44.95798319327731, |
| "grad_norm": 1.7658933401107788, |
| "learning_rate": 0.001, |
| "loss": 1.5877, |
| "step": 139100 |
| }, |
| { |
| "epoch": 44.99030381383322, |
| "grad_norm": 1.6144723892211914, |
| "learning_rate": 0.001, |
| "loss": 1.566, |
| "step": 139200 |
| }, |
| { |
| "epoch": 45.022624434389144, |
| "grad_norm": 1.3865910768508911, |
| "learning_rate": 0.001, |
| "loss": 1.5231, |
| "step": 139300 |
| }, |
| { |
| "epoch": 45.05494505494506, |
| "grad_norm": 1.631665825843811, |
| "learning_rate": 0.001, |
| "loss": 1.4477, |
| "step": 139400 |
| }, |
| { |
| "epoch": 45.08726567550097, |
| "grad_norm": 1.7368639707565308, |
| "learning_rate": 0.001, |
| "loss": 1.4724, |
| "step": 139500 |
| }, |
| { |
| "epoch": 45.11958629605689, |
| "grad_norm": 1.483976125717163, |
| "learning_rate": 0.001, |
| "loss": 1.4668, |
| "step": 139600 |
| }, |
| { |
| "epoch": 45.1519069166128, |
| "grad_norm": 1.597456932067871, |
| "learning_rate": 0.001, |
| "loss": 1.484, |
| "step": 139700 |
| }, |
| { |
| "epoch": 45.18422753716872, |
| "grad_norm": 1.684712529182434, |
| "learning_rate": 0.001, |
| "loss": 1.4857, |
| "step": 139800 |
| }, |
| { |
| "epoch": 45.21654815772463, |
| "grad_norm": 1.7697254419326782, |
| "learning_rate": 0.001, |
| "loss": 1.4854, |
| "step": 139900 |
| }, |
| { |
| "epoch": 45.248868778280546, |
| "grad_norm": 1.6394789218902588, |
| "learning_rate": 0.001, |
| "loss": 1.494, |
| "step": 140000 |
| }, |
| { |
| "epoch": 45.28118939883646, |
| "grad_norm": 1.8333781957626343, |
| "learning_rate": 0.001, |
| "loss": 1.4974, |
| "step": 140100 |
| }, |
| { |
| "epoch": 45.313510019392375, |
| "grad_norm": 1.4193600416183472, |
| "learning_rate": 0.001, |
| "loss": 1.5108, |
| "step": 140200 |
| }, |
| { |
| "epoch": 45.34583063994829, |
| "grad_norm": 1.4354008436203003, |
| "learning_rate": 0.001, |
| "loss": 1.4979, |
| "step": 140300 |
| }, |
| { |
| "epoch": 45.378151260504204, |
| "grad_norm": 1.3426471948623657, |
| "learning_rate": 0.001, |
| "loss": 1.5283, |
| "step": 140400 |
| }, |
| { |
| "epoch": 45.41047188106012, |
| "grad_norm": 1.817825198173523, |
| "learning_rate": 0.001, |
| "loss": 1.5295, |
| "step": 140500 |
| }, |
| { |
| "epoch": 45.44279250161603, |
| "grad_norm": 1.8174062967300415, |
| "learning_rate": 0.001, |
| "loss": 1.5311, |
| "step": 140600 |
| }, |
| { |
| "epoch": 45.47511312217195, |
| "grad_norm": 1.4800944328308105, |
| "learning_rate": 0.001, |
| "loss": 1.5262, |
| "step": 140700 |
| }, |
| { |
| "epoch": 45.50743374272786, |
| "grad_norm": 1.8175491094589233, |
| "learning_rate": 0.001, |
| "loss": 1.5133, |
| "step": 140800 |
| }, |
| { |
| "epoch": 45.53975436328378, |
| "grad_norm": 1.32296884059906, |
| "learning_rate": 0.001, |
| "loss": 1.5473, |
| "step": 140900 |
| }, |
| { |
| "epoch": 45.57207498383969, |
| "grad_norm": 1.3446507453918457, |
| "learning_rate": 0.001, |
| "loss": 1.5304, |
| "step": 141000 |
| }, |
| { |
| "epoch": 45.604395604395606, |
| "grad_norm": 1.742011547088623, |
| "learning_rate": 0.001, |
| "loss": 1.5464, |
| "step": 141100 |
| }, |
| { |
| "epoch": 45.63671622495152, |
| "grad_norm": 1.6908282041549683, |
| "learning_rate": 0.001, |
| "loss": 1.5599, |
| "step": 141200 |
| }, |
| { |
| "epoch": 45.669036845507435, |
| "grad_norm": 1.6784456968307495, |
| "learning_rate": 0.001, |
| "loss": 1.5543, |
| "step": 141300 |
| }, |
| { |
| "epoch": 45.70135746606335, |
| "grad_norm": 1.5689213275909424, |
| "learning_rate": 0.001, |
| "loss": 1.555, |
| "step": 141400 |
| }, |
| { |
| "epoch": 45.733678086619264, |
| "grad_norm": 1.5936824083328247, |
| "learning_rate": 0.001, |
| "loss": 1.5674, |
| "step": 141500 |
| }, |
| { |
| "epoch": 45.76599870717518, |
| "grad_norm": 1.7053321599960327, |
| "learning_rate": 0.001, |
| "loss": 1.5705, |
| "step": 141600 |
| }, |
| { |
| "epoch": 45.79831932773109, |
| "grad_norm": 2.1247267723083496, |
| "learning_rate": 0.001, |
| "loss": 1.5815, |
| "step": 141700 |
| }, |
| { |
| "epoch": 45.83063994828701, |
| "grad_norm": 1.4352737665176392, |
| "learning_rate": 0.001, |
| "loss": 1.573, |
| "step": 141800 |
| }, |
| { |
| "epoch": 45.86296056884292, |
| "grad_norm": 1.7753167152404785, |
| "learning_rate": 0.001, |
| "loss": 1.5797, |
| "step": 141900 |
| }, |
| { |
| "epoch": 45.89528118939884, |
| "grad_norm": 1.3698890209197998, |
| "learning_rate": 0.001, |
| "loss": 1.5852, |
| "step": 142000 |
| }, |
| { |
| "epoch": 45.92760180995475, |
| "grad_norm": 1.4331963062286377, |
| "learning_rate": 0.001, |
| "loss": 1.5806, |
| "step": 142100 |
| }, |
| { |
| "epoch": 45.959922430510666, |
| "grad_norm": 1.4742848873138428, |
| "learning_rate": 0.001, |
| "loss": 1.5736, |
| "step": 142200 |
| }, |
| { |
| "epoch": 45.99224305106658, |
| "grad_norm": 1.542399287223816, |
| "learning_rate": 0.001, |
| "loss": 1.6072, |
| "step": 142300 |
| }, |
| { |
| "epoch": 46.024563671622495, |
| "grad_norm": 1.7629003524780273, |
| "learning_rate": 0.001, |
| "loss": 1.4912, |
| "step": 142400 |
| }, |
| { |
| "epoch": 46.05688429217841, |
| "grad_norm": 1.528564691543579, |
| "learning_rate": 0.001, |
| "loss": 1.4412, |
| "step": 142500 |
| }, |
| { |
| "epoch": 46.089204912734324, |
| "grad_norm": 1.6338632106781006, |
| "learning_rate": 0.001, |
| "loss": 1.4508, |
| "step": 142600 |
| }, |
| { |
| "epoch": 46.12152553329024, |
| "grad_norm": 2.0615508556365967, |
| "learning_rate": 0.001, |
| "loss": 1.4567, |
| "step": 142700 |
| }, |
| { |
| "epoch": 46.15384615384615, |
| "grad_norm": 1.3304144144058228, |
| "learning_rate": 0.001, |
| "loss": 1.4729, |
| "step": 142800 |
| }, |
| { |
| "epoch": 46.18616677440207, |
| "grad_norm": 2.0170974731445312, |
| "learning_rate": 0.001, |
| "loss": 1.4745, |
| "step": 142900 |
| }, |
| { |
| "epoch": 46.21848739495798, |
| "grad_norm": 1.8077945709228516, |
| "learning_rate": 0.001, |
| "loss": 1.4868, |
| "step": 143000 |
| }, |
| { |
| "epoch": 46.2508080155139, |
| "grad_norm": 1.6119381189346313, |
| "learning_rate": 0.001, |
| "loss": 1.4822, |
| "step": 143100 |
| }, |
| { |
| "epoch": 46.28312863606981, |
| "grad_norm": 1.403429388999939, |
| "learning_rate": 0.001, |
| "loss": 1.4943, |
| "step": 143200 |
| }, |
| { |
| "epoch": 46.315449256625726, |
| "grad_norm": 1.3802709579467773, |
| "learning_rate": 0.001, |
| "loss": 1.485, |
| "step": 143300 |
| }, |
| { |
| "epoch": 46.34776987718164, |
| "grad_norm": 1.4897013902664185, |
| "learning_rate": 0.001, |
| "loss": 1.4872, |
| "step": 143400 |
| }, |
| { |
| "epoch": 46.380090497737555, |
| "grad_norm": 1.8272331953048706, |
| "learning_rate": 0.001, |
| "loss": 1.4959, |
| "step": 143500 |
| }, |
| { |
| "epoch": 46.41241111829347, |
| "grad_norm": 1.6205357313156128, |
| "learning_rate": 0.001, |
| "loss": 1.4995, |
| "step": 143600 |
| }, |
| { |
| "epoch": 46.444731738849384, |
| "grad_norm": 1.613732099533081, |
| "learning_rate": 0.001, |
| "loss": 1.5116, |
| "step": 143700 |
| }, |
| { |
| "epoch": 46.4770523594053, |
| "grad_norm": 1.474404215812683, |
| "learning_rate": 0.001, |
| "loss": 1.5179, |
| "step": 143800 |
| }, |
| { |
| "epoch": 46.50937297996121, |
| "grad_norm": 1.4578990936279297, |
| "learning_rate": 0.001, |
| "loss": 1.5057, |
| "step": 143900 |
| }, |
| { |
| "epoch": 46.54169360051713, |
| "grad_norm": 1.424071192741394, |
| "learning_rate": 0.001, |
| "loss": 1.5122, |
| "step": 144000 |
| }, |
| { |
| "epoch": 46.57401422107304, |
| "grad_norm": 1.4065488576889038, |
| "learning_rate": 0.001, |
| "loss": 1.5131, |
| "step": 144100 |
| }, |
| { |
| "epoch": 46.60633484162896, |
| "grad_norm": 1.8913878202438354, |
| "learning_rate": 0.001, |
| "loss": 1.5399, |
| "step": 144200 |
| }, |
| { |
| "epoch": 46.63865546218487, |
| "grad_norm": 1.6994715929031372, |
| "learning_rate": 0.001, |
| "loss": 1.5329, |
| "step": 144300 |
| }, |
| { |
| "epoch": 46.670976082740786, |
| "grad_norm": 2.1696887016296387, |
| "learning_rate": 0.001, |
| "loss": 1.5482, |
| "step": 144400 |
| }, |
| { |
| "epoch": 46.7032967032967, |
| "grad_norm": 1.8181955814361572, |
| "learning_rate": 0.001, |
| "loss": 1.5486, |
| "step": 144500 |
| }, |
| { |
| "epoch": 46.735617323852615, |
| "grad_norm": 1.3913260698318481, |
| "learning_rate": 0.001, |
| "loss": 1.5384, |
| "step": 144600 |
| }, |
| { |
| "epoch": 46.76793794440853, |
| "grad_norm": 1.6030380725860596, |
| "learning_rate": 0.001, |
| "loss": 1.5396, |
| "step": 144700 |
| }, |
| { |
| "epoch": 46.800258564964444, |
| "grad_norm": 2.035700559616089, |
| "learning_rate": 0.001, |
| "loss": 1.5648, |
| "step": 144800 |
| }, |
| { |
| "epoch": 46.83257918552036, |
| "grad_norm": 1.7987518310546875, |
| "learning_rate": 0.001, |
| "loss": 1.5471, |
| "step": 144900 |
| }, |
| { |
| "epoch": 46.864899806076274, |
| "grad_norm": 1.4945619106292725, |
| "learning_rate": 0.001, |
| "loss": 1.5659, |
| "step": 145000 |
| }, |
| { |
| "epoch": 46.89722042663219, |
| "grad_norm": 1.7767857313156128, |
| "learning_rate": 0.001, |
| "loss": 1.5632, |
| "step": 145100 |
| }, |
| { |
| "epoch": 46.9295410471881, |
| "grad_norm": 1.6083945035934448, |
| "learning_rate": 0.001, |
| "loss": 1.5382, |
| "step": 145200 |
| }, |
| { |
| "epoch": 46.96186166774402, |
| "grad_norm": 1.7108503580093384, |
| "learning_rate": 0.001, |
| "loss": 1.57, |
| "step": 145300 |
| }, |
| { |
| "epoch": 46.99418228829994, |
| "grad_norm": 1.4575233459472656, |
| "learning_rate": 0.001, |
| "loss": 1.566, |
| "step": 145400 |
| }, |
| { |
| "epoch": 47.02650290885585, |
| "grad_norm": 1.838165283203125, |
| "learning_rate": 0.001, |
| "loss": 1.4575, |
| "step": 145500 |
| }, |
| { |
| "epoch": 47.05882352941177, |
| "grad_norm": 1.6801042556762695, |
| "learning_rate": 0.001, |
| "loss": 1.4235, |
| "step": 145600 |
| }, |
| { |
| "epoch": 47.09114414996768, |
| "grad_norm": 1.862371802330017, |
| "learning_rate": 0.001, |
| "loss": 1.434, |
| "step": 145700 |
| }, |
| { |
| "epoch": 47.1234647705236, |
| "grad_norm": 2.157015800476074, |
| "learning_rate": 0.001, |
| "loss": 1.4335, |
| "step": 145800 |
| }, |
| { |
| "epoch": 47.15578539107951, |
| "grad_norm": 1.6546690464019775, |
| "learning_rate": 0.001, |
| "loss": 1.4435, |
| "step": 145900 |
| }, |
| { |
| "epoch": 47.188106011635426, |
| "grad_norm": 1.6954889297485352, |
| "learning_rate": 0.001, |
| "loss": 1.4419, |
| "step": 146000 |
| }, |
| { |
| "epoch": 47.22042663219134, |
| "grad_norm": 1.6528239250183105, |
| "learning_rate": 0.001, |
| "loss": 1.468, |
| "step": 146100 |
| }, |
| { |
| "epoch": 47.252747252747255, |
| "grad_norm": 1.9358820915222168, |
| "learning_rate": 0.001, |
| "loss": 1.4415, |
| "step": 146200 |
| }, |
| { |
| "epoch": 47.28506787330317, |
| "grad_norm": 1.525715708732605, |
| "learning_rate": 0.001, |
| "loss": 1.483, |
| "step": 146300 |
| }, |
| { |
| "epoch": 47.317388493859085, |
| "grad_norm": 1.8445520401000977, |
| "learning_rate": 0.001, |
| "loss": 1.4701, |
| "step": 146400 |
| }, |
| { |
| "epoch": 47.349709114415, |
| "grad_norm": 1.9748725891113281, |
| "learning_rate": 0.001, |
| "loss": 1.4815, |
| "step": 146500 |
| }, |
| { |
| "epoch": 47.382029734970914, |
| "grad_norm": 1.626009464263916, |
| "learning_rate": 0.001, |
| "loss": 1.4907, |
| "step": 146600 |
| }, |
| { |
| "epoch": 47.41435035552683, |
| "grad_norm": 1.9791555404663086, |
| "learning_rate": 0.001, |
| "loss": 1.4922, |
| "step": 146700 |
| }, |
| { |
| "epoch": 47.44667097608274, |
| "grad_norm": 2.1587910652160645, |
| "learning_rate": 0.001, |
| "loss": 1.4788, |
| "step": 146800 |
| }, |
| { |
| "epoch": 47.47899159663866, |
| "grad_norm": 1.6786390542984009, |
| "learning_rate": 0.001, |
| "loss": 1.483, |
| "step": 146900 |
| }, |
| { |
| "epoch": 47.51131221719457, |
| "grad_norm": 1.9314137697219849, |
| "learning_rate": 0.001, |
| "loss": 1.5109, |
| "step": 147000 |
| }, |
| { |
| "epoch": 47.543632837750486, |
| "grad_norm": 1.7206977605819702, |
| "learning_rate": 0.001, |
| "loss": 1.5194, |
| "step": 147100 |
| }, |
| { |
| "epoch": 47.5759534583064, |
| "grad_norm": 1.6161469221115112, |
| "learning_rate": 0.001, |
| "loss": 1.5116, |
| "step": 147200 |
| }, |
| { |
| "epoch": 47.608274078862316, |
| "grad_norm": 2.3246424198150635, |
| "learning_rate": 0.001, |
| "loss": 1.5192, |
| "step": 147300 |
| }, |
| { |
| "epoch": 47.64059469941823, |
| "grad_norm": 1.691521167755127, |
| "learning_rate": 0.001, |
| "loss": 1.5237, |
| "step": 147400 |
| }, |
| { |
| "epoch": 47.672915319974145, |
| "grad_norm": 2.0103893280029297, |
| "learning_rate": 0.001, |
| "loss": 1.5214, |
| "step": 147500 |
| }, |
| { |
| "epoch": 47.70523594053006, |
| "grad_norm": 1.7042127847671509, |
| "learning_rate": 0.001, |
| "loss": 1.5186, |
| "step": 147600 |
| }, |
| { |
| "epoch": 47.737556561085974, |
| "grad_norm": 1.4514890909194946, |
| "learning_rate": 0.001, |
| "loss": 1.5237, |
| "step": 147700 |
| }, |
| { |
| "epoch": 47.76987718164189, |
| "grad_norm": 1.584375023841858, |
| "learning_rate": 0.001, |
| "loss": 1.5369, |
| "step": 147800 |
| }, |
| { |
| "epoch": 47.8021978021978, |
| "grad_norm": 1.8270056247711182, |
| "learning_rate": 0.001, |
| "loss": 1.5443, |
| "step": 147900 |
| }, |
| { |
| "epoch": 47.83451842275372, |
| "grad_norm": 1.6497583389282227, |
| "learning_rate": 0.001, |
| "loss": 1.5389, |
| "step": 148000 |
| }, |
| { |
| "epoch": 47.86683904330963, |
| "grad_norm": 1.663865089416504, |
| "learning_rate": 0.001, |
| "loss": 1.5383, |
| "step": 148100 |
| }, |
| { |
| "epoch": 47.89915966386555, |
| "grad_norm": 1.5827676057815552, |
| "learning_rate": 0.001, |
| "loss": 1.5506, |
| "step": 148200 |
| }, |
| { |
| "epoch": 47.93148028442146, |
| "grad_norm": 1.687949776649475, |
| "learning_rate": 0.001, |
| "loss": 1.5282, |
| "step": 148300 |
| }, |
| { |
| "epoch": 47.963800904977376, |
| "grad_norm": 1.9395780563354492, |
| "learning_rate": 0.001, |
| "loss": 1.5506, |
| "step": 148400 |
| }, |
| { |
| "epoch": 47.99612152553329, |
| "grad_norm": 2.5985894203186035, |
| "learning_rate": 0.001, |
| "loss": 1.5313, |
| "step": 148500 |
| }, |
| { |
| "epoch": 48.028442146089205, |
| "grad_norm": 1.8825600147247314, |
| "learning_rate": 0.001, |
| "loss": 1.4213, |
| "step": 148600 |
| }, |
| { |
| "epoch": 48.06076276664512, |
| "grad_norm": 1.617006540298462, |
| "learning_rate": 0.001, |
| "loss": 1.4215, |
| "step": 148700 |
| }, |
| { |
| "epoch": 48.093083387201034, |
| "grad_norm": 1.5653728246688843, |
| "learning_rate": 0.001, |
| "loss": 1.4204, |
| "step": 148800 |
| }, |
| { |
| "epoch": 48.12540400775695, |
| "grad_norm": 1.995313286781311, |
| "learning_rate": 0.001, |
| "loss": 1.4367, |
| "step": 148900 |
| }, |
| { |
| "epoch": 48.15772462831286, |
| "grad_norm": 1.708569049835205, |
| "learning_rate": 0.001, |
| "loss": 1.4358, |
| "step": 149000 |
| }, |
| { |
| "epoch": 48.19004524886878, |
| "grad_norm": 1.6492958068847656, |
| "learning_rate": 0.001, |
| "loss": 1.4422, |
| "step": 149100 |
| }, |
| { |
| "epoch": 48.22236586942469, |
| "grad_norm": 1.8807870149612427, |
| "learning_rate": 0.001, |
| "loss": 1.4331, |
| "step": 149200 |
| }, |
| { |
| "epoch": 48.25468648998061, |
| "grad_norm": 1.88623046875, |
| "learning_rate": 0.001, |
| "loss": 1.4425, |
| "step": 149300 |
| }, |
| { |
| "epoch": 48.28700711053652, |
| "grad_norm": 2.2832539081573486, |
| "learning_rate": 0.001, |
| "loss": 1.4621, |
| "step": 149400 |
| }, |
| { |
| "epoch": 48.319327731092436, |
| "grad_norm": 1.6884719133377075, |
| "learning_rate": 0.001, |
| "loss": 1.4365, |
| "step": 149500 |
| }, |
| { |
| "epoch": 48.35164835164835, |
| "grad_norm": 1.904253602027893, |
| "learning_rate": 0.001, |
| "loss": 1.4675, |
| "step": 149600 |
| }, |
| { |
| "epoch": 48.383968972204265, |
| "grad_norm": 1.6859662532806396, |
| "learning_rate": 0.001, |
| "loss": 1.4725, |
| "step": 149700 |
| }, |
| { |
| "epoch": 48.41628959276018, |
| "grad_norm": 2.050351858139038, |
| "learning_rate": 0.001, |
| "loss": 1.4567, |
| "step": 149800 |
| }, |
| { |
| "epoch": 48.448610213316094, |
| "grad_norm": 1.3949456214904785, |
| "learning_rate": 0.001, |
| "loss": 1.489, |
| "step": 149900 |
| }, |
| { |
| "epoch": 48.48093083387201, |
| "grad_norm": 1.9798871278762817, |
| "learning_rate": 0.001, |
| "loss": 1.4699, |
| "step": 150000 |
| }, |
| { |
| "epoch": 48.51325145442792, |
| "grad_norm": 1.7036690711975098, |
| "learning_rate": 0.001, |
| "loss": 1.4783, |
| "step": 150100 |
| }, |
| { |
| "epoch": 48.54557207498384, |
| "grad_norm": 2.0233912467956543, |
| "learning_rate": 0.001, |
| "loss": 1.4945, |
| "step": 150200 |
| }, |
| { |
| "epoch": 48.57789269553975, |
| "grad_norm": 1.6232671737670898, |
| "learning_rate": 0.001, |
| "loss": 1.4934, |
| "step": 150300 |
| }, |
| { |
| "epoch": 48.61021331609567, |
| "grad_norm": 1.8260281085968018, |
| "learning_rate": 0.001, |
| "loss": 1.5012, |
| "step": 150400 |
| }, |
| { |
| "epoch": 48.64253393665158, |
| "grad_norm": 2.079585552215576, |
| "learning_rate": 0.001, |
| "loss": 1.5057, |
| "step": 150500 |
| }, |
| { |
| "epoch": 48.674854557207496, |
| "grad_norm": 1.9923733472824097, |
| "learning_rate": 0.001, |
| "loss": 1.4999, |
| "step": 150600 |
| }, |
| { |
| "epoch": 48.70717517776341, |
| "grad_norm": 2.004462480545044, |
| "learning_rate": 0.001, |
| "loss": 1.5087, |
| "step": 150700 |
| }, |
| { |
| "epoch": 48.739495798319325, |
| "grad_norm": 1.9867020845413208, |
| "learning_rate": 0.001, |
| "loss": 1.5161, |
| "step": 150800 |
| }, |
| { |
| "epoch": 48.77181641887524, |
| "grad_norm": 1.81278657913208, |
| "learning_rate": 0.001, |
| "loss": 1.5151, |
| "step": 150900 |
| }, |
| { |
| "epoch": 48.804137039431154, |
| "grad_norm": 1.93266761302948, |
| "learning_rate": 0.001, |
| "loss": 1.5136, |
| "step": 151000 |
| }, |
| { |
| "epoch": 48.83645765998707, |
| "grad_norm": 2.0835654735565186, |
| "learning_rate": 0.001, |
| "loss": 1.5254, |
| "step": 151100 |
| }, |
| { |
| "epoch": 48.86877828054298, |
| "grad_norm": 1.9827555418014526, |
| "learning_rate": 0.001, |
| "loss": 1.5334, |
| "step": 151200 |
| }, |
| { |
| "epoch": 48.9010989010989, |
| "grad_norm": 1.724138855934143, |
| "learning_rate": 0.001, |
| "loss": 1.5281, |
| "step": 151300 |
| }, |
| { |
| "epoch": 48.93341952165481, |
| "grad_norm": 1.6550005674362183, |
| "learning_rate": 0.001, |
| "loss": 1.5372, |
| "step": 151400 |
| }, |
| { |
| "epoch": 48.96574014221073, |
| "grad_norm": 2.217447519302368, |
| "learning_rate": 0.001, |
| "loss": 1.5312, |
| "step": 151500 |
| }, |
| { |
| "epoch": 48.99806076276664, |
| "grad_norm": 2.1639389991760254, |
| "learning_rate": 0.001, |
| "loss": 1.5187, |
| "step": 151600 |
| }, |
| { |
| "epoch": 49.03038138332256, |
| "grad_norm": 1.84763503074646, |
| "learning_rate": 0.001, |
| "loss": 1.3963, |
| "step": 151700 |
| }, |
| { |
| "epoch": 49.06270200387848, |
| "grad_norm": 1.755767583847046, |
| "learning_rate": 0.001, |
| "loss": 1.4093, |
| "step": 151800 |
| }, |
| { |
| "epoch": 49.09502262443439, |
| "grad_norm": 2.258392333984375, |
| "learning_rate": 0.001, |
| "loss": 1.4131, |
| "step": 151900 |
| }, |
| { |
| "epoch": 49.12734324499031, |
| "grad_norm": 2.0753369331359863, |
| "learning_rate": 0.001, |
| "loss": 1.4137, |
| "step": 152000 |
| }, |
| { |
| "epoch": 49.15966386554622, |
| "grad_norm": 1.6378613710403442, |
| "learning_rate": 0.001, |
| "loss": 1.424, |
| "step": 152100 |
| }, |
| { |
| "epoch": 49.191984486102136, |
| "grad_norm": 7.034304618835449, |
| "learning_rate": 0.001, |
| "loss": 1.4069, |
| "step": 152200 |
| }, |
| { |
| "epoch": 49.22430510665805, |
| "grad_norm": 2.0201122760772705, |
| "learning_rate": 0.001, |
| "loss": 1.426, |
| "step": 152300 |
| }, |
| { |
| "epoch": 49.256625727213965, |
| "grad_norm": 1.9883705377578735, |
| "learning_rate": 0.001, |
| "loss": 1.4445, |
| "step": 152400 |
| }, |
| { |
| "epoch": 49.28894634776988, |
| "grad_norm": 2.0591297149658203, |
| "learning_rate": 0.001, |
| "loss": 1.43, |
| "step": 152500 |
| }, |
| { |
| "epoch": 49.321266968325794, |
| "grad_norm": 1.975419521331787, |
| "learning_rate": 0.001, |
| "loss": 1.4497, |
| "step": 152600 |
| }, |
| { |
| "epoch": 49.35358758888171, |
| "grad_norm": 2.1251235008239746, |
| "learning_rate": 0.001, |
| "loss": 1.465, |
| "step": 152700 |
| }, |
| { |
| "epoch": 49.38590820943762, |
| "grad_norm": 1.797031283378601, |
| "learning_rate": 0.001, |
| "loss": 1.441, |
| "step": 152800 |
| }, |
| { |
| "epoch": 49.41822882999354, |
| "grad_norm": 2.097074270248413, |
| "learning_rate": 0.001, |
| "loss": 1.4631, |
| "step": 152900 |
| }, |
| { |
| "epoch": 49.45054945054945, |
| "grad_norm": 1.9780620336532593, |
| "learning_rate": 0.001, |
| "loss": 1.4636, |
| "step": 153000 |
| }, |
| { |
| "epoch": 49.48287007110537, |
| "grad_norm": 1.8536884784698486, |
| "learning_rate": 0.001, |
| "loss": 1.4603, |
| "step": 153100 |
| }, |
| { |
| "epoch": 49.51519069166128, |
| "grad_norm": 2.2152280807495117, |
| "learning_rate": 0.001, |
| "loss": 1.4746, |
| "step": 153200 |
| }, |
| { |
| "epoch": 49.547511312217196, |
| "grad_norm": 2.028168201446533, |
| "learning_rate": 0.001, |
| "loss": 1.4809, |
| "step": 153300 |
| }, |
| { |
| "epoch": 49.57983193277311, |
| "grad_norm": 2.026210308074951, |
| "learning_rate": 0.001, |
| "loss": 1.4934, |
| "step": 153400 |
| }, |
| { |
| "epoch": 49.612152553329025, |
| "grad_norm": 1.8274730443954468, |
| "learning_rate": 0.001, |
| "loss": 1.4745, |
| "step": 153500 |
| }, |
| { |
| "epoch": 49.64447317388494, |
| "grad_norm": 1.7476829290390015, |
| "learning_rate": 0.001, |
| "loss": 1.4877, |
| "step": 153600 |
| }, |
| { |
| "epoch": 49.676793794440854, |
| "grad_norm": 1.8586665391921997, |
| "learning_rate": 0.001, |
| "loss": 1.4779, |
| "step": 153700 |
| }, |
| { |
| "epoch": 49.70911441499677, |
| "grad_norm": 2.142073154449463, |
| "learning_rate": 0.001, |
| "loss": 1.4904, |
| "step": 153800 |
| }, |
| { |
| "epoch": 49.74143503555268, |
| "grad_norm": 2.4163622856140137, |
| "learning_rate": 0.001, |
| "loss": 1.4913, |
| "step": 153900 |
| }, |
| { |
| "epoch": 49.7737556561086, |
| "grad_norm": 1.5314381122589111, |
| "learning_rate": 0.001, |
| "loss": 1.4888, |
| "step": 154000 |
| }, |
| { |
| "epoch": 49.80607627666451, |
| "grad_norm": 2.1272311210632324, |
| "learning_rate": 0.001, |
| "loss": 1.4829, |
| "step": 154100 |
| }, |
| { |
| "epoch": 49.83839689722043, |
| "grad_norm": 1.8081731796264648, |
| "learning_rate": 0.001, |
| "loss": 1.4969, |
| "step": 154200 |
| }, |
| { |
| "epoch": 49.87071751777634, |
| "grad_norm": 2.0165419578552246, |
| "learning_rate": 0.001, |
| "loss": 1.5123, |
| "step": 154300 |
| }, |
| { |
| "epoch": 49.903038138332256, |
| "grad_norm": 1.895053744316101, |
| "learning_rate": 0.001, |
| "loss": 1.5118, |
| "step": 154400 |
| }, |
| { |
| "epoch": 49.93535875888817, |
| "grad_norm": 1.836590051651001, |
| "learning_rate": 0.001, |
| "loss": 1.5071, |
| "step": 154500 |
| }, |
| { |
| "epoch": 49.967679379444085, |
| "grad_norm": 2.259945869445801, |
| "learning_rate": 0.001, |
| "loss": 1.5239, |
| "step": 154600 |
| }, |
| { |
| "epoch": 50.0, |
| "grad_norm": 2.8617236614227295, |
| "learning_rate": 0.001, |
| "loss": 1.4828, |
| "step": 154700 |
| }, |
| { |
| "epoch": 50.032320620555915, |
| "grad_norm": 2.7500596046447754, |
| "learning_rate": 0.001, |
| "loss": 1.3622, |
| "step": 154800 |
| }, |
| { |
| "epoch": 50.06464124111183, |
| "grad_norm": 2.843418598175049, |
| "learning_rate": 0.001, |
| "loss": 1.389, |
| "step": 154900 |
| }, |
| { |
| "epoch": 50.096961861667744, |
| "grad_norm": 1.911349892616272, |
| "learning_rate": 0.001, |
| "loss": 1.4045, |
| "step": 155000 |
| }, |
| { |
| "epoch": 50.12928248222366, |
| "grad_norm": 2.789196491241455, |
| "learning_rate": 0.001, |
| "loss": 1.4044, |
| "step": 155100 |
| }, |
| { |
| "epoch": 50.16160310277957, |
| "grad_norm": 2.3924665451049805, |
| "learning_rate": 0.001, |
| "loss": 1.4078, |
| "step": 155200 |
| }, |
| { |
| "epoch": 50.19392372333549, |
| "grad_norm": 2.2527916431427, |
| "learning_rate": 0.001, |
| "loss": 1.4179, |
| "step": 155300 |
| }, |
| { |
| "epoch": 50.2262443438914, |
| "grad_norm": 2.869682788848877, |
| "learning_rate": 0.001, |
| "loss": 1.4157, |
| "step": 155400 |
| }, |
| { |
| "epoch": 50.25856496444732, |
| "grad_norm": 3.076284646987915, |
| "learning_rate": 0.001, |
| "loss": 1.4194, |
| "step": 155500 |
| }, |
| { |
| "epoch": 50.29088558500323, |
| "grad_norm": 2.405968427658081, |
| "learning_rate": 0.001, |
| "loss": 1.4324, |
| "step": 155600 |
| }, |
| { |
| "epoch": 50.323206205559146, |
| "grad_norm": 2.4868760108947754, |
| "learning_rate": 0.001, |
| "loss": 1.4284, |
| "step": 155700 |
| }, |
| { |
| "epoch": 50.35552682611506, |
| "grad_norm": 2.351515769958496, |
| "learning_rate": 0.001, |
| "loss": 1.4396, |
| "step": 155800 |
| }, |
| { |
| "epoch": 50.387847446670975, |
| "grad_norm": 2.545591354370117, |
| "learning_rate": 0.001, |
| "loss": 1.4433, |
| "step": 155900 |
| }, |
| { |
| "epoch": 50.42016806722689, |
| "grad_norm": 2.52632212638855, |
| "learning_rate": 0.001, |
| "loss": 1.4548, |
| "step": 156000 |
| }, |
| { |
| "epoch": 50.452488687782804, |
| "grad_norm": 2.8453922271728516, |
| "learning_rate": 0.001, |
| "loss": 1.4365, |
| "step": 156100 |
| }, |
| { |
| "epoch": 50.48480930833872, |
| "grad_norm": 2.562619209289551, |
| "learning_rate": 0.001, |
| "loss": 1.4401, |
| "step": 156200 |
| }, |
| { |
| "epoch": 50.51712992889463, |
| "grad_norm": 2.5165834426879883, |
| "learning_rate": 0.001, |
| "loss": 1.4542, |
| "step": 156300 |
| }, |
| { |
| "epoch": 50.54945054945055, |
| "grad_norm": 2.4320249557495117, |
| "learning_rate": 0.001, |
| "loss": 1.4499, |
| "step": 156400 |
| }, |
| { |
| "epoch": 50.58177117000646, |
| "grad_norm": 2.7280805110931396, |
| "learning_rate": 0.001, |
| "loss": 1.4534, |
| "step": 156500 |
| }, |
| { |
| "epoch": 50.61409179056238, |
| "grad_norm": 2.658902645111084, |
| "learning_rate": 0.001, |
| "loss": 1.4678, |
| "step": 156600 |
| }, |
| { |
| "epoch": 50.64641241111829, |
| "grad_norm": 2.3166139125823975, |
| "learning_rate": 0.001, |
| "loss": 1.4596, |
| "step": 156700 |
| }, |
| { |
| "epoch": 50.678733031674206, |
| "grad_norm": 3.5382041931152344, |
| "learning_rate": 0.001, |
| "loss": 1.4593, |
| "step": 156800 |
| }, |
| { |
| "epoch": 50.71105365223012, |
| "grad_norm": 3.2525131702423096, |
| "learning_rate": 0.001, |
| "loss": 1.462, |
| "step": 156900 |
| }, |
| { |
| "epoch": 50.743374272786035, |
| "grad_norm": 2.6875829696655273, |
| "learning_rate": 0.001, |
| "loss": 1.5004, |
| "step": 157000 |
| }, |
| { |
| "epoch": 50.77569489334195, |
| "grad_norm": 2.6666595935821533, |
| "learning_rate": 0.001, |
| "loss": 1.473, |
| "step": 157100 |
| }, |
| { |
| "epoch": 50.808015513897864, |
| "grad_norm": 2.2740638256073, |
| "learning_rate": 0.001, |
| "loss": 1.4764, |
| "step": 157200 |
| }, |
| { |
| "epoch": 50.84033613445378, |
| "grad_norm": 2.457540273666382, |
| "learning_rate": 0.001, |
| "loss": 1.4869, |
| "step": 157300 |
| }, |
| { |
| "epoch": 50.87265675500969, |
| "grad_norm": 7.748457431793213, |
| "learning_rate": 0.001, |
| "loss": 1.4969, |
| "step": 157400 |
| }, |
| { |
| "epoch": 50.90497737556561, |
| "grad_norm": 2.187288284301758, |
| "learning_rate": 0.001, |
| "loss": 1.4913, |
| "step": 157500 |
| }, |
| { |
| "epoch": 50.93729799612152, |
| "grad_norm": 7.933531761169434, |
| "learning_rate": 0.001, |
| "loss": 1.5105, |
| "step": 157600 |
| }, |
| { |
| "epoch": 50.96961861667744, |
| "grad_norm": 2.370905637741089, |
| "learning_rate": 0.001, |
| "loss": 1.4957, |
| "step": 157700 |
| }, |
| { |
| "epoch": 51.00193923723336, |
| "grad_norm": 1.6068298816680908, |
| "learning_rate": 0.001, |
| "loss": 1.5156, |
| "step": 157800 |
| }, |
| { |
| "epoch": 51.03425985778927, |
| "grad_norm": 1.8159526586532593, |
| "learning_rate": 0.001, |
| "loss": 1.358, |
| "step": 157900 |
| }, |
| { |
| "epoch": 51.06658047834519, |
| "grad_norm": 1.58469820022583, |
| "learning_rate": 0.001, |
| "loss": 1.3614, |
| "step": 158000 |
| }, |
| { |
| "epoch": 51.0989010989011, |
| "grad_norm": 1.645398736000061, |
| "learning_rate": 0.001, |
| "loss": 1.3755, |
| "step": 158100 |
| }, |
| { |
| "epoch": 51.13122171945702, |
| "grad_norm": 2.198871612548828, |
| "learning_rate": 0.001, |
| "loss": 1.3954, |
| "step": 158200 |
| }, |
| { |
| "epoch": 51.16354234001293, |
| "grad_norm": 1.8951984643936157, |
| "learning_rate": 0.001, |
| "loss": 1.3857, |
| "step": 158300 |
| }, |
| { |
| "epoch": 51.195862960568846, |
| "grad_norm": 1.9388495683670044, |
| "learning_rate": 0.001, |
| "loss": 1.3751, |
| "step": 158400 |
| }, |
| { |
| "epoch": 51.22818358112476, |
| "grad_norm": 1.894666075706482, |
| "learning_rate": 0.001, |
| "loss": 1.414, |
| "step": 158500 |
| }, |
| { |
| "epoch": 51.260504201680675, |
| "grad_norm": 2.324024200439453, |
| "learning_rate": 0.001, |
| "loss": 1.4045, |
| "step": 158600 |
| }, |
| { |
| "epoch": 51.29282482223659, |
| "grad_norm": 2.4311294555664062, |
| "learning_rate": 0.001, |
| "loss": 1.4152, |
| "step": 158700 |
| }, |
| { |
| "epoch": 51.325145442792504, |
| "grad_norm": 1.6439367532730103, |
| "learning_rate": 0.001, |
| "loss": 1.4066, |
| "step": 158800 |
| }, |
| { |
| "epoch": 51.35746606334842, |
| "grad_norm": 1.458225131034851, |
| "learning_rate": 0.001, |
| "loss": 1.4157, |
| "step": 158900 |
| }, |
| { |
| "epoch": 51.38978668390433, |
| "grad_norm": 1.7206692695617676, |
| "learning_rate": 0.001, |
| "loss": 1.4376, |
| "step": 159000 |
| }, |
| { |
| "epoch": 51.42210730446025, |
| "grad_norm": 2.1803271770477295, |
| "learning_rate": 0.001, |
| "loss": 1.4167, |
| "step": 159100 |
| }, |
| { |
| "epoch": 51.45442792501616, |
| "grad_norm": 1.9700300693511963, |
| "learning_rate": 0.001, |
| "loss": 1.4532, |
| "step": 159200 |
| }, |
| { |
| "epoch": 51.48674854557208, |
| "grad_norm": 1.7611888647079468, |
| "learning_rate": 0.001, |
| "loss": 1.4344, |
| "step": 159300 |
| }, |
| { |
| "epoch": 51.51906916612799, |
| "grad_norm": 1.614646553993225, |
| "learning_rate": 0.001, |
| "loss": 1.4172, |
| "step": 159400 |
| }, |
| { |
| "epoch": 51.551389786683906, |
| "grad_norm": 1.9561604261398315, |
| "learning_rate": 0.001, |
| "loss": 1.4543, |
| "step": 159500 |
| }, |
| { |
| "epoch": 51.58371040723982, |
| "grad_norm": 2.4098892211914062, |
| "learning_rate": 0.001, |
| "loss": 1.4481, |
| "step": 159600 |
| }, |
| { |
| "epoch": 51.616031027795735, |
| "grad_norm": 1.6354035139083862, |
| "learning_rate": 0.001, |
| "loss": 1.4402, |
| "step": 159700 |
| }, |
| { |
| "epoch": 51.64835164835165, |
| "grad_norm": 1.5231209993362427, |
| "learning_rate": 0.001, |
| "loss": 1.4591, |
| "step": 159800 |
| }, |
| { |
| "epoch": 51.680672268907564, |
| "grad_norm": 1.4801390171051025, |
| "learning_rate": 0.001, |
| "loss": 1.4647, |
| "step": 159900 |
| }, |
| { |
| "epoch": 51.71299288946348, |
| "grad_norm": 1.897993803024292, |
| "learning_rate": 0.001, |
| "loss": 1.4731, |
| "step": 160000 |
| }, |
| { |
| "epoch": 51.74531351001939, |
| "grad_norm": 1.9183740615844727, |
| "learning_rate": 0.001, |
| "loss": 1.4652, |
| "step": 160100 |
| }, |
| { |
| "epoch": 51.77763413057531, |
| "grad_norm": 1.6743416786193848, |
| "learning_rate": 0.001, |
| "loss": 1.468, |
| "step": 160200 |
| }, |
| { |
| "epoch": 51.80995475113122, |
| "grad_norm": 1.5103219747543335, |
| "learning_rate": 0.001, |
| "loss": 1.4753, |
| "step": 160300 |
| }, |
| { |
| "epoch": 51.84227537168714, |
| "grad_norm": 1.8089489936828613, |
| "learning_rate": 0.001, |
| "loss": 1.4642, |
| "step": 160400 |
| }, |
| { |
| "epoch": 51.87459599224305, |
| "grad_norm": 1.889352560043335, |
| "learning_rate": 0.001, |
| "loss": 1.4753, |
| "step": 160500 |
| }, |
| { |
| "epoch": 51.906916612798966, |
| "grad_norm": 1.892152190208435, |
| "learning_rate": 0.001, |
| "loss": 1.4855, |
| "step": 160600 |
| }, |
| { |
| "epoch": 51.93923723335488, |
| "grad_norm": 2.1310675144195557, |
| "learning_rate": 0.001, |
| "loss": 1.4741, |
| "step": 160700 |
| }, |
| { |
| "epoch": 51.971557853910795, |
| "grad_norm": 1.6801360845565796, |
| "learning_rate": 0.001, |
| "loss": 1.4823, |
| "step": 160800 |
| }, |
| { |
| "epoch": 52.00387847446671, |
| "grad_norm": 1.5871704816818237, |
| "learning_rate": 0.001, |
| "loss": 1.4805, |
| "step": 160900 |
| }, |
| { |
| "epoch": 52.036199095022624, |
| "grad_norm": 1.765568494796753, |
| "learning_rate": 0.001, |
| "loss": 1.3575, |
| "step": 161000 |
| }, |
| { |
| "epoch": 52.06851971557854, |
| "grad_norm": 1.6732524633407593, |
| "learning_rate": 0.001, |
| "loss": 1.3625, |
| "step": 161100 |
| }, |
| { |
| "epoch": 52.10084033613445, |
| "grad_norm": 2.0522379875183105, |
| "learning_rate": 0.001, |
| "loss": 1.3777, |
| "step": 161200 |
| }, |
| { |
| "epoch": 52.13316095669037, |
| "grad_norm": 1.4788262844085693, |
| "learning_rate": 0.001, |
| "loss": 1.3684, |
| "step": 161300 |
| }, |
| { |
| "epoch": 52.16548157724628, |
| "grad_norm": 1.94754159450531, |
| "learning_rate": 0.001, |
| "loss": 1.364, |
| "step": 161400 |
| }, |
| { |
| "epoch": 52.1978021978022, |
| "grad_norm": 1.6368705034255981, |
| "learning_rate": 0.001, |
| "loss": 1.3813, |
| "step": 161500 |
| }, |
| { |
| "epoch": 52.23012281835811, |
| "grad_norm": 1.5317388772964478, |
| "learning_rate": 0.001, |
| "loss": 1.392, |
| "step": 161600 |
| }, |
| { |
| "epoch": 52.262443438914026, |
| "grad_norm": 1.593577265739441, |
| "learning_rate": 0.001, |
| "loss": 1.3962, |
| "step": 161700 |
| }, |
| { |
| "epoch": 52.29476405946994, |
| "grad_norm": 1.7241520881652832, |
| "learning_rate": 0.001, |
| "loss": 1.3977, |
| "step": 161800 |
| }, |
| { |
| "epoch": 52.327084680025855, |
| "grad_norm": 1.9638442993164062, |
| "learning_rate": 0.001, |
| "loss": 1.3956, |
| "step": 161900 |
| }, |
| { |
| "epoch": 52.35940530058177, |
| "grad_norm": 1.7215887308120728, |
| "learning_rate": 0.001, |
| "loss": 1.4052, |
| "step": 162000 |
| }, |
| { |
| "epoch": 52.391725921137684, |
| "grad_norm": 1.7366405725479126, |
| "learning_rate": 0.001, |
| "loss": 1.4169, |
| "step": 162100 |
| }, |
| { |
| "epoch": 52.4240465416936, |
| "grad_norm": 2.66810941696167, |
| "learning_rate": 0.001, |
| "loss": 1.4099, |
| "step": 162200 |
| }, |
| { |
| "epoch": 52.456367162249514, |
| "grad_norm": 1.9656466245651245, |
| "learning_rate": 0.001, |
| "loss": 1.4154, |
| "step": 162300 |
| }, |
| { |
| "epoch": 52.48868778280543, |
| "grad_norm": 1.7060424089431763, |
| "learning_rate": 0.001, |
| "loss": 1.4269, |
| "step": 162400 |
| }, |
| { |
| "epoch": 52.52100840336134, |
| "grad_norm": 1.6961543560028076, |
| "learning_rate": 0.001, |
| "loss": 1.4199, |
| "step": 162500 |
| }, |
| { |
| "epoch": 52.55332902391726, |
| "grad_norm": 1.670259952545166, |
| "learning_rate": 0.001, |
| "loss": 1.4225, |
| "step": 162600 |
| }, |
| { |
| "epoch": 52.58564964447317, |
| "grad_norm": 1.4814908504486084, |
| "learning_rate": 0.001, |
| "loss": 1.4246, |
| "step": 162700 |
| }, |
| { |
| "epoch": 52.617970265029086, |
| "grad_norm": 1.7170677185058594, |
| "learning_rate": 0.001, |
| "loss": 1.4303, |
| "step": 162800 |
| }, |
| { |
| "epoch": 52.650290885585, |
| "grad_norm": 1.9022291898727417, |
| "learning_rate": 0.001, |
| "loss": 1.4298, |
| "step": 162900 |
| }, |
| { |
| "epoch": 52.682611506140915, |
| "grad_norm": 1.8609496355056763, |
| "learning_rate": 0.001, |
| "loss": 1.4399, |
| "step": 163000 |
| }, |
| { |
| "epoch": 52.71493212669683, |
| "grad_norm": 1.6735694408416748, |
| "learning_rate": 0.001, |
| "loss": 1.4488, |
| "step": 163100 |
| }, |
| { |
| "epoch": 52.747252747252745, |
| "grad_norm": 1.5999531745910645, |
| "learning_rate": 0.001, |
| "loss": 1.4326, |
| "step": 163200 |
| }, |
| { |
| "epoch": 52.77957336780866, |
| "grad_norm": 1.8553581237792969, |
| "learning_rate": 0.001, |
| "loss": 1.4322, |
| "step": 163300 |
| }, |
| { |
| "epoch": 52.811893988364574, |
| "grad_norm": 1.971063256263733, |
| "learning_rate": 0.001, |
| "loss": 1.4361, |
| "step": 163400 |
| }, |
| { |
| "epoch": 52.84421460892049, |
| "grad_norm": 1.682065725326538, |
| "learning_rate": 0.001, |
| "loss": 1.4516, |
| "step": 163500 |
| }, |
| { |
| "epoch": 52.8765352294764, |
| "grad_norm": 1.498920202255249, |
| "learning_rate": 0.001, |
| "loss": 1.4783, |
| "step": 163600 |
| }, |
| { |
| "epoch": 52.90885585003232, |
| "grad_norm": 2.0326061248779297, |
| "learning_rate": 0.001, |
| "loss": 1.4666, |
| "step": 163700 |
| }, |
| { |
| "epoch": 52.94117647058823, |
| "grad_norm": 1.505751609802246, |
| "learning_rate": 0.001, |
| "loss": 1.4689, |
| "step": 163800 |
| }, |
| { |
| "epoch": 52.97349709114415, |
| "grad_norm": 1.652345895767212, |
| "learning_rate": 0.001, |
| "loss": 1.4698, |
| "step": 163900 |
| }, |
| { |
| "epoch": 53.00581771170007, |
| "grad_norm": 1.6074447631835938, |
| "learning_rate": 0.001, |
| "loss": 1.4543, |
| "step": 164000 |
| }, |
| { |
| "epoch": 53.03813833225598, |
| "grad_norm": 1.6570724248886108, |
| "learning_rate": 0.001, |
| "loss": 1.3409, |
| "step": 164100 |
| }, |
| { |
| "epoch": 53.0704589528119, |
| "grad_norm": 1.9990196228027344, |
| "learning_rate": 0.001, |
| "loss": 1.3491, |
| "step": 164200 |
| }, |
| { |
| "epoch": 53.10277957336781, |
| "grad_norm": 1.4702783823013306, |
| "learning_rate": 0.001, |
| "loss": 1.3252, |
| "step": 164300 |
| }, |
| { |
| "epoch": 53.135100193923726, |
| "grad_norm": 2.1267101764678955, |
| "learning_rate": 0.001, |
| "loss": 1.3418, |
| "step": 164400 |
| }, |
| { |
| "epoch": 53.16742081447964, |
| "grad_norm": 1.68787682056427, |
| "learning_rate": 0.001, |
| "loss": 1.3588, |
| "step": 164500 |
| }, |
| { |
| "epoch": 53.199741435035556, |
| "grad_norm": 1.6803096532821655, |
| "learning_rate": 0.001, |
| "loss": 1.3685, |
| "step": 164600 |
| }, |
| { |
| "epoch": 53.23206205559147, |
| "grad_norm": 1.3000693321228027, |
| "learning_rate": 0.001, |
| "loss": 1.3675, |
| "step": 164700 |
| }, |
| { |
| "epoch": 53.264382676147385, |
| "grad_norm": 1.6280704736709595, |
| "learning_rate": 0.001, |
| "loss": 1.3659, |
| "step": 164800 |
| }, |
| { |
| "epoch": 53.2967032967033, |
| "grad_norm": 1.7576541900634766, |
| "learning_rate": 0.001, |
| "loss": 1.383, |
| "step": 164900 |
| }, |
| { |
| "epoch": 53.329023917259214, |
| "grad_norm": 1.8576921224594116, |
| "learning_rate": 0.001, |
| "loss": 1.373, |
| "step": 165000 |
| }, |
| { |
| "epoch": 53.36134453781513, |
| "grad_norm": 1.5620722770690918, |
| "learning_rate": 0.001, |
| "loss": 1.3778, |
| "step": 165100 |
| }, |
| { |
| "epoch": 53.39366515837104, |
| "grad_norm": 1.6440199613571167, |
| "learning_rate": 0.001, |
| "loss": 1.413, |
| "step": 165200 |
| }, |
| { |
| "epoch": 53.42598577892696, |
| "grad_norm": 2.071763038635254, |
| "learning_rate": 0.001, |
| "loss": 1.3942, |
| "step": 165300 |
| }, |
| { |
| "epoch": 53.45830639948287, |
| "grad_norm": 1.619279384613037, |
| "learning_rate": 0.001, |
| "loss": 1.4029, |
| "step": 165400 |
| }, |
| { |
| "epoch": 53.49062702003879, |
| "grad_norm": 1.8446378707885742, |
| "learning_rate": 0.001, |
| "loss": 1.398, |
| "step": 165500 |
| }, |
| { |
| "epoch": 53.5229476405947, |
| "grad_norm": 6.851656913757324, |
| "learning_rate": 0.001, |
| "loss": 1.406, |
| "step": 165600 |
| }, |
| { |
| "epoch": 53.555268261150616, |
| "grad_norm": 1.4749475717544556, |
| "learning_rate": 0.001, |
| "loss": 1.4049, |
| "step": 165700 |
| }, |
| { |
| "epoch": 53.58758888170653, |
| "grad_norm": 1.752159595489502, |
| "learning_rate": 0.001, |
| "loss": 1.3914, |
| "step": 165800 |
| }, |
| { |
| "epoch": 53.619909502262445, |
| "grad_norm": 1.5363788604736328, |
| "learning_rate": 0.001, |
| "loss": 1.4229, |
| "step": 165900 |
| }, |
| { |
| "epoch": 53.65223012281836, |
| "grad_norm": 1.6976779699325562, |
| "learning_rate": 0.001, |
| "loss": 1.4316, |
| "step": 166000 |
| }, |
| { |
| "epoch": 53.684550743374274, |
| "grad_norm": 1.7210675477981567, |
| "learning_rate": 0.001, |
| "loss": 1.4234, |
| "step": 166100 |
| }, |
| { |
| "epoch": 53.71687136393019, |
| "grad_norm": 1.6245074272155762, |
| "learning_rate": 0.001, |
| "loss": 1.4233, |
| "step": 166200 |
| }, |
| { |
| "epoch": 53.7491919844861, |
| "grad_norm": 1.5693413019180298, |
| "learning_rate": 0.001, |
| "loss": 1.4393, |
| "step": 166300 |
| }, |
| { |
| "epoch": 53.78151260504202, |
| "grad_norm": 1.9812067747116089, |
| "learning_rate": 0.001, |
| "loss": 1.4486, |
| "step": 166400 |
| }, |
| { |
| "epoch": 53.81383322559793, |
| "grad_norm": 1.4747971296310425, |
| "learning_rate": 0.001, |
| "loss": 1.4399, |
| "step": 166500 |
| }, |
| { |
| "epoch": 53.84615384615385, |
| "grad_norm": 1.851163625717163, |
| "learning_rate": 0.001, |
| "loss": 1.4454, |
| "step": 166600 |
| }, |
| { |
| "epoch": 53.87847446670976, |
| "grad_norm": 2.2305819988250732, |
| "learning_rate": 0.001, |
| "loss": 1.4636, |
| "step": 166700 |
| }, |
| { |
| "epoch": 53.910795087265676, |
| "grad_norm": 1.541678547859192, |
| "learning_rate": 0.001, |
| "loss": 1.4269, |
| "step": 166800 |
| }, |
| { |
| "epoch": 53.94311570782159, |
| "grad_norm": 1.715518593788147, |
| "learning_rate": 0.001, |
| "loss": 1.4537, |
| "step": 166900 |
| }, |
| { |
| "epoch": 53.975436328377505, |
| "grad_norm": 1.4480615854263306, |
| "learning_rate": 0.001, |
| "loss": 1.4655, |
| "step": 167000 |
| }, |
| { |
| "epoch": 54.00775694893342, |
| "grad_norm": 1.692636251449585, |
| "learning_rate": 0.001, |
| "loss": 1.4344, |
| "step": 167100 |
| }, |
| { |
| "epoch": 54.040077569489334, |
| "grad_norm": 1.7430903911590576, |
| "learning_rate": 0.001, |
| "loss": 1.3191, |
| "step": 167200 |
| }, |
| { |
| "epoch": 54.07239819004525, |
| "grad_norm": 1.308061957359314, |
| "learning_rate": 0.001, |
| "loss": 1.3301, |
| "step": 167300 |
| }, |
| { |
| "epoch": 54.10471881060116, |
| "grad_norm": 1.7454525232315063, |
| "learning_rate": 0.001, |
| "loss": 1.3403, |
| "step": 167400 |
| }, |
| { |
| "epoch": 54.13703943115708, |
| "grad_norm": 1.5327422618865967, |
| "learning_rate": 0.001, |
| "loss": 1.3231, |
| "step": 167500 |
| }, |
| { |
| "epoch": 54.16936005171299, |
| "grad_norm": 1.5026171207427979, |
| "learning_rate": 0.001, |
| "loss": 1.3433, |
| "step": 167600 |
| }, |
| { |
| "epoch": 54.20168067226891, |
| "grad_norm": 1.3935140371322632, |
| "learning_rate": 0.001, |
| "loss": 1.3591, |
| "step": 167700 |
| }, |
| { |
| "epoch": 54.23400129282482, |
| "grad_norm": 1.617540717124939, |
| "learning_rate": 0.001, |
| "loss": 1.3346, |
| "step": 167800 |
| }, |
| { |
| "epoch": 54.266321913380736, |
| "grad_norm": 2.285799741744995, |
| "learning_rate": 0.001, |
| "loss": 1.3603, |
| "step": 167900 |
| }, |
| { |
| "epoch": 54.29864253393665, |
| "grad_norm": 1.4892338514328003, |
| "learning_rate": 0.001, |
| "loss": 1.3933, |
| "step": 168000 |
| }, |
| { |
| "epoch": 54.330963154492565, |
| "grad_norm": 1.4886034727096558, |
| "learning_rate": 0.001, |
| "loss": 1.3704, |
| "step": 168100 |
| }, |
| { |
| "epoch": 54.36328377504848, |
| "grad_norm": 1.4371678829193115, |
| "learning_rate": 0.001, |
| "loss": 1.3819, |
| "step": 168200 |
| }, |
| { |
| "epoch": 54.395604395604394, |
| "grad_norm": 2.0230281352996826, |
| "learning_rate": 0.001, |
| "loss": 1.3826, |
| "step": 168300 |
| }, |
| { |
| "epoch": 54.42792501616031, |
| "grad_norm": 1.5713683366775513, |
| "learning_rate": 0.001, |
| "loss": 1.3671, |
| "step": 168400 |
| }, |
| { |
| "epoch": 54.46024563671622, |
| "grad_norm": 1.58280348777771, |
| "learning_rate": 0.001, |
| "loss": 1.3767, |
| "step": 168500 |
| }, |
| { |
| "epoch": 54.49256625727214, |
| "grad_norm": 1.4634507894515991, |
| "learning_rate": 0.001, |
| "loss": 1.3916, |
| "step": 168600 |
| }, |
| { |
| "epoch": 54.52488687782805, |
| "grad_norm": 1.4438565969467163, |
| "learning_rate": 0.001, |
| "loss": 1.4001, |
| "step": 168700 |
| }, |
| { |
| "epoch": 54.55720749838397, |
| "grad_norm": 1.6048426628112793, |
| "learning_rate": 0.001, |
| "loss": 1.3831, |
| "step": 168800 |
| }, |
| { |
| "epoch": 54.58952811893988, |
| "grad_norm": 1.7846475839614868, |
| "learning_rate": 0.001, |
| "loss": 1.3927, |
| "step": 168900 |
| }, |
| { |
| "epoch": 54.621848739495796, |
| "grad_norm": 1.8997995853424072, |
| "learning_rate": 0.001, |
| "loss": 1.4055, |
| "step": 169000 |
| }, |
| { |
| "epoch": 54.65416936005171, |
| "grad_norm": 1.6765133142471313, |
| "learning_rate": 0.001, |
| "loss": 1.4073, |
| "step": 169100 |
| }, |
| { |
| "epoch": 54.686489980607625, |
| "grad_norm": 1.3951281309127808, |
| "learning_rate": 0.001, |
| "loss": 1.4121, |
| "step": 169200 |
| }, |
| { |
| "epoch": 54.71881060116354, |
| "grad_norm": 1.5928879976272583, |
| "learning_rate": 0.001, |
| "loss": 1.425, |
| "step": 169300 |
| }, |
| { |
| "epoch": 54.751131221719454, |
| "grad_norm": 1.5925168991088867, |
| "learning_rate": 0.001, |
| "loss": 1.4258, |
| "step": 169400 |
| }, |
| { |
| "epoch": 54.78345184227537, |
| "grad_norm": 1.8010461330413818, |
| "learning_rate": 0.001, |
| "loss": 1.4312, |
| "step": 169500 |
| }, |
| { |
| "epoch": 54.81577246283128, |
| "grad_norm": 2.023576259613037, |
| "learning_rate": 0.001, |
| "loss": 1.4175, |
| "step": 169600 |
| }, |
| { |
| "epoch": 54.8480930833872, |
| "grad_norm": 1.5003081560134888, |
| "learning_rate": 0.001, |
| "loss": 1.445, |
| "step": 169700 |
| }, |
| { |
| "epoch": 54.88041370394311, |
| "grad_norm": 1.3284757137298584, |
| "learning_rate": 0.001, |
| "loss": 1.4262, |
| "step": 169800 |
| }, |
| { |
| "epoch": 54.91273432449903, |
| "grad_norm": 1.90644371509552, |
| "learning_rate": 0.001, |
| "loss": 1.42, |
| "step": 169900 |
| }, |
| { |
| "epoch": 54.94505494505494, |
| "grad_norm": 2.487614393234253, |
| "learning_rate": 0.001, |
| "loss": 1.4205, |
| "step": 170000 |
| }, |
| { |
| "epoch": 54.977375565610856, |
| "grad_norm": 2.273512601852417, |
| "learning_rate": 0.001, |
| "loss": 1.4401, |
| "step": 170100 |
| }, |
| { |
| "epoch": 55.00969618616678, |
| "grad_norm": 1.6120694875717163, |
| "learning_rate": 0.001, |
| "loss": 1.3946, |
| "step": 170200 |
| }, |
| { |
| "epoch": 55.04201680672269, |
| "grad_norm": 1.6807186603546143, |
| "learning_rate": 0.001, |
| "loss": 1.3055, |
| "step": 170300 |
| }, |
| { |
| "epoch": 55.07433742727861, |
| "grad_norm": 1.9705253839492798, |
| "learning_rate": 0.001, |
| "loss": 1.318, |
| "step": 170400 |
| }, |
| { |
| "epoch": 55.10665804783452, |
| "grad_norm": 1.604475498199463, |
| "learning_rate": 0.001, |
| "loss": 1.3274, |
| "step": 170500 |
| }, |
| { |
| "epoch": 55.138978668390436, |
| "grad_norm": 6.541220664978027, |
| "learning_rate": 0.001, |
| "loss": 1.332, |
| "step": 170600 |
| }, |
| { |
| "epoch": 55.17129928894635, |
| "grad_norm": 2.1754775047302246, |
| "learning_rate": 0.001, |
| "loss": 1.3366, |
| "step": 170700 |
| }, |
| { |
| "epoch": 55.203619909502265, |
| "grad_norm": 1.566157341003418, |
| "learning_rate": 0.001, |
| "loss": 1.3424, |
| "step": 170800 |
| }, |
| { |
| "epoch": 55.23594053005818, |
| "grad_norm": 1.5534968376159668, |
| "learning_rate": 0.001, |
| "loss": 1.3364, |
| "step": 170900 |
| }, |
| { |
| "epoch": 55.268261150614094, |
| "grad_norm": 2.1709282398223877, |
| "learning_rate": 0.001, |
| "loss": 1.3536, |
| "step": 171000 |
| }, |
| { |
| "epoch": 55.30058177117001, |
| "grad_norm": 1.6569381952285767, |
| "learning_rate": 0.001, |
| "loss": 1.3321, |
| "step": 171100 |
| }, |
| { |
| "epoch": 55.33290239172592, |
| "grad_norm": 1.758289098739624, |
| "learning_rate": 0.001, |
| "loss": 1.3522, |
| "step": 171200 |
| }, |
| { |
| "epoch": 55.36522301228184, |
| "grad_norm": 1.7878657579421997, |
| "learning_rate": 0.001, |
| "loss": 1.3564, |
| "step": 171300 |
| }, |
| { |
| "epoch": 55.39754363283775, |
| "grad_norm": 1.8341219425201416, |
| "learning_rate": 0.001, |
| "loss": 1.3519, |
| "step": 171400 |
| }, |
| { |
| "epoch": 55.42986425339367, |
| "grad_norm": 1.522491216659546, |
| "learning_rate": 0.001, |
| "loss": 1.3586, |
| "step": 171500 |
| }, |
| { |
| "epoch": 55.46218487394958, |
| "grad_norm": 2.06044864654541, |
| "learning_rate": 0.001, |
| "loss": 1.3746, |
| "step": 171600 |
| }, |
| { |
| "epoch": 55.494505494505496, |
| "grad_norm": 1.9041239023208618, |
| "learning_rate": 0.001, |
| "loss": 1.3811, |
| "step": 171700 |
| }, |
| { |
| "epoch": 55.52682611506141, |
| "grad_norm": 1.7315250635147095, |
| "learning_rate": 0.001, |
| "loss": 1.3812, |
| "step": 171800 |
| }, |
| { |
| "epoch": 55.559146735617325, |
| "grad_norm": 1.8228223323822021, |
| "learning_rate": 0.001, |
| "loss": 1.3803, |
| "step": 171900 |
| }, |
| { |
| "epoch": 55.59146735617324, |
| "grad_norm": 1.6775150299072266, |
| "learning_rate": 0.001, |
| "loss": 1.395, |
| "step": 172000 |
| }, |
| { |
| "epoch": 55.623787976729155, |
| "grad_norm": 1.7854257822036743, |
| "learning_rate": 0.001, |
| "loss": 1.3913, |
| "step": 172100 |
| }, |
| { |
| "epoch": 55.65610859728507, |
| "grad_norm": 1.8913499116897583, |
| "learning_rate": 0.001, |
| "loss": 1.3857, |
| "step": 172200 |
| }, |
| { |
| "epoch": 55.688429217840984, |
| "grad_norm": 1.7007423639297485, |
| "learning_rate": 0.001, |
| "loss": 1.411, |
| "step": 172300 |
| }, |
| { |
| "epoch": 55.7207498383969, |
| "grad_norm": 1.6376309394836426, |
| "learning_rate": 0.001, |
| "loss": 1.3868, |
| "step": 172400 |
| }, |
| { |
| "epoch": 55.75307045895281, |
| "grad_norm": 1.9880905151367188, |
| "learning_rate": 0.001, |
| "loss": 1.4165, |
| "step": 172500 |
| }, |
| { |
| "epoch": 55.78539107950873, |
| "grad_norm": 1.8876042366027832, |
| "learning_rate": 0.001, |
| "loss": 1.3944, |
| "step": 172600 |
| }, |
| { |
| "epoch": 55.81771170006464, |
| "grad_norm": 1.67818284034729, |
| "learning_rate": 0.001, |
| "loss": 1.4349, |
| "step": 172700 |
| }, |
| { |
| "epoch": 55.85003232062056, |
| "grad_norm": 1.440558671951294, |
| "learning_rate": 0.001, |
| "loss": 1.4127, |
| "step": 172800 |
| }, |
| { |
| "epoch": 55.88235294117647, |
| "grad_norm": 1.8764231204986572, |
| "learning_rate": 0.001, |
| "loss": 1.4169, |
| "step": 172900 |
| }, |
| { |
| "epoch": 55.914673561732386, |
| "grad_norm": 2.102301597595215, |
| "learning_rate": 0.001, |
| "loss": 1.4181, |
| "step": 173000 |
| }, |
| { |
| "epoch": 55.9469941822883, |
| "grad_norm": 1.709457278251648, |
| "learning_rate": 0.001, |
| "loss": 1.4222, |
| "step": 173100 |
| }, |
| { |
| "epoch": 55.979314802844215, |
| "grad_norm": 1.5915135145187378, |
| "learning_rate": 0.001, |
| "loss": 1.4292, |
| "step": 173200 |
| }, |
| { |
| "epoch": 56.01163542340013, |
| "grad_norm": 2.3316972255706787, |
| "learning_rate": 0.001, |
| "loss": 1.378, |
| "step": 173300 |
| }, |
| { |
| "epoch": 56.043956043956044, |
| "grad_norm": 2.285443067550659, |
| "learning_rate": 0.001, |
| "loss": 1.2946, |
| "step": 173400 |
| }, |
| { |
| "epoch": 56.07627666451196, |
| "grad_norm": 1.96236252784729, |
| "learning_rate": 0.001, |
| "loss": 1.302, |
| "step": 173500 |
| }, |
| { |
| "epoch": 56.10859728506787, |
| "grad_norm": 2.4459619522094727, |
| "learning_rate": 0.001, |
| "loss": 1.304, |
| "step": 173600 |
| }, |
| { |
| "epoch": 56.14091790562379, |
| "grad_norm": 2.265803813934326, |
| "learning_rate": 0.001, |
| "loss": 1.3218, |
| "step": 173700 |
| }, |
| { |
| "epoch": 56.1732385261797, |
| "grad_norm": 1.7974573373794556, |
| "learning_rate": 0.001, |
| "loss": 1.3045, |
| "step": 173800 |
| }, |
| { |
| "epoch": 56.20555914673562, |
| "grad_norm": 1.8836841583251953, |
| "learning_rate": 0.001, |
| "loss": 1.3102, |
| "step": 173900 |
| }, |
| { |
| "epoch": 56.23787976729153, |
| "grad_norm": 1.8796806335449219, |
| "learning_rate": 0.001, |
| "loss": 1.3573, |
| "step": 174000 |
| }, |
| { |
| "epoch": 56.270200387847446, |
| "grad_norm": 1.8522303104400635, |
| "learning_rate": 0.001, |
| "loss": 1.3435, |
| "step": 174100 |
| }, |
| { |
| "epoch": 56.30252100840336, |
| "grad_norm": 1.5742342472076416, |
| "learning_rate": 0.001, |
| "loss": 1.3479, |
| "step": 174200 |
| }, |
| { |
| "epoch": 56.334841628959275, |
| "grad_norm": 1.655555248260498, |
| "learning_rate": 0.001, |
| "loss": 1.3261, |
| "step": 174300 |
| }, |
| { |
| "epoch": 56.36716224951519, |
| "grad_norm": 1.8719924688339233, |
| "learning_rate": 0.001, |
| "loss": 1.3298, |
| "step": 174400 |
| }, |
| { |
| "epoch": 56.399482870071104, |
| "grad_norm": 1.8895421028137207, |
| "learning_rate": 0.001, |
| "loss": 1.3586, |
| "step": 174500 |
| }, |
| { |
| "epoch": 56.43180349062702, |
| "grad_norm": 1.98106849193573, |
| "learning_rate": 0.001, |
| "loss": 1.3609, |
| "step": 174600 |
| }, |
| { |
| "epoch": 56.46412411118293, |
| "grad_norm": 1.809706687927246, |
| "learning_rate": 0.001, |
| "loss": 1.3688, |
| "step": 174700 |
| }, |
| { |
| "epoch": 56.49644473173885, |
| "grad_norm": 1.962716817855835, |
| "learning_rate": 0.001, |
| "loss": 1.3798, |
| "step": 174800 |
| }, |
| { |
| "epoch": 56.52876535229476, |
| "grad_norm": 2.347630500793457, |
| "learning_rate": 0.001, |
| "loss": 1.3793, |
| "step": 174900 |
| }, |
| { |
| "epoch": 56.56108597285068, |
| "grad_norm": 1.7523319721221924, |
| "learning_rate": 0.001, |
| "loss": 1.3641, |
| "step": 175000 |
| }, |
| { |
| "epoch": 56.59340659340659, |
| "grad_norm": 2.0193288326263428, |
| "learning_rate": 0.001, |
| "loss": 1.3682, |
| "step": 175100 |
| }, |
| { |
| "epoch": 56.625727213962506, |
| "grad_norm": 1.7714121341705322, |
| "learning_rate": 0.001, |
| "loss": 1.3583, |
| "step": 175200 |
| }, |
| { |
| "epoch": 56.65804783451842, |
| "grad_norm": 1.6798348426818848, |
| "learning_rate": 0.001, |
| "loss": 1.371, |
| "step": 175300 |
| }, |
| { |
| "epoch": 56.690368455074335, |
| "grad_norm": 1.8950109481811523, |
| "learning_rate": 0.001, |
| "loss": 1.3803, |
| "step": 175400 |
| }, |
| { |
| "epoch": 56.72268907563025, |
| "grad_norm": 2.079096555709839, |
| "learning_rate": 0.001, |
| "loss": 1.383, |
| "step": 175500 |
| }, |
| { |
| "epoch": 56.755009696186164, |
| "grad_norm": 1.8894920349121094, |
| "learning_rate": 0.001, |
| "loss": 1.3906, |
| "step": 175600 |
| }, |
| { |
| "epoch": 56.78733031674208, |
| "grad_norm": 1.8664498329162598, |
| "learning_rate": 0.001, |
| "loss": 1.3867, |
| "step": 175700 |
| }, |
| { |
| "epoch": 56.81965093729799, |
| "grad_norm": 1.9517700672149658, |
| "learning_rate": 0.001, |
| "loss": 1.4043, |
| "step": 175800 |
| }, |
| { |
| "epoch": 56.85197155785391, |
| "grad_norm": 1.8154116868972778, |
| "learning_rate": 0.001, |
| "loss": 1.4082, |
| "step": 175900 |
| }, |
| { |
| "epoch": 56.88429217840982, |
| "grad_norm": 1.7526671886444092, |
| "learning_rate": 0.001, |
| "loss": 1.4217, |
| "step": 176000 |
| }, |
| { |
| "epoch": 56.91661279896574, |
| "grad_norm": 1.6760896444320679, |
| "learning_rate": 0.001, |
| "loss": 1.411, |
| "step": 176100 |
| }, |
| { |
| "epoch": 56.94893341952165, |
| "grad_norm": 1.8976603746414185, |
| "learning_rate": 0.001, |
| "loss": 1.4064, |
| "step": 176200 |
| }, |
| { |
| "epoch": 56.981254040077566, |
| "grad_norm": 1.5558828115463257, |
| "learning_rate": 0.001, |
| "loss": 1.4082, |
| "step": 176300 |
| }, |
| { |
| "epoch": 57.01357466063349, |
| "grad_norm": 1.8569399118423462, |
| "learning_rate": 0.001, |
| "loss": 1.3491, |
| "step": 176400 |
| }, |
| { |
| "epoch": 57.0458952811894, |
| "grad_norm": 2.1090731620788574, |
| "learning_rate": 0.001, |
| "loss": 1.2905, |
| "step": 176500 |
| }, |
| { |
| "epoch": 57.07821590174532, |
| "grad_norm": 1.87632417678833, |
| "learning_rate": 0.001, |
| "loss": 1.2937, |
| "step": 176600 |
| }, |
| { |
| "epoch": 57.11053652230123, |
| "grad_norm": 2.033785820007324, |
| "learning_rate": 0.001, |
| "loss": 1.2854, |
| "step": 176700 |
| }, |
| { |
| "epoch": 57.142857142857146, |
| "grad_norm": 3.1014299392700195, |
| "learning_rate": 0.001, |
| "loss": 1.3014, |
| "step": 176800 |
| }, |
| { |
| "epoch": 57.17517776341306, |
| "grad_norm": 1.5487799644470215, |
| "learning_rate": 0.001, |
| "loss": 1.2949, |
| "step": 176900 |
| }, |
| { |
| "epoch": 57.207498383968975, |
| "grad_norm": 1.8203353881835938, |
| "learning_rate": 0.001, |
| "loss": 1.3163, |
| "step": 177000 |
| }, |
| { |
| "epoch": 57.23981900452489, |
| "grad_norm": 2.1298274993896484, |
| "learning_rate": 0.001, |
| "loss": 1.3148, |
| "step": 177100 |
| }, |
| { |
| "epoch": 57.272139625080804, |
| "grad_norm": 2.3572335243225098, |
| "learning_rate": 0.001, |
| "loss": 1.3131, |
| "step": 177200 |
| }, |
| { |
| "epoch": 57.30446024563672, |
| "grad_norm": 1.9870686531066895, |
| "learning_rate": 0.001, |
| "loss": 1.3295, |
| "step": 177300 |
| }, |
| { |
| "epoch": 57.33678086619263, |
| "grad_norm": 1.9002041816711426, |
| "learning_rate": 0.001, |
| "loss": 1.3113, |
| "step": 177400 |
| }, |
| { |
| "epoch": 57.36910148674855, |
| "grad_norm": 2.0429224967956543, |
| "learning_rate": 0.001, |
| "loss": 1.3369, |
| "step": 177500 |
| }, |
| { |
| "epoch": 57.40142210730446, |
| "grad_norm": 2.353167772293091, |
| "learning_rate": 0.001, |
| "loss": 1.3369, |
| "step": 177600 |
| }, |
| { |
| "epoch": 57.43374272786038, |
| "grad_norm": 1.6245406866073608, |
| "learning_rate": 0.001, |
| "loss": 1.3467, |
| "step": 177700 |
| }, |
| { |
| "epoch": 57.46606334841629, |
| "grad_norm": 1.77859628200531, |
| "learning_rate": 0.001, |
| "loss": 1.3399, |
| "step": 177800 |
| }, |
| { |
| "epoch": 57.498383968972206, |
| "grad_norm": 1.8216441869735718, |
| "learning_rate": 0.001, |
| "loss": 1.3448, |
| "step": 177900 |
| }, |
| { |
| "epoch": 57.53070458952812, |
| "grad_norm": 1.8462305068969727, |
| "learning_rate": 0.001, |
| "loss": 1.3505, |
| "step": 178000 |
| }, |
| { |
| "epoch": 57.563025210084035, |
| "grad_norm": 2.081976890563965, |
| "learning_rate": 0.001, |
| "loss": 1.3565, |
| "step": 178100 |
| }, |
| { |
| "epoch": 57.59534583063995, |
| "grad_norm": 1.996206521987915, |
| "learning_rate": 0.001, |
| "loss": 1.3618, |
| "step": 178200 |
| }, |
| { |
| "epoch": 57.627666451195864, |
| "grad_norm": 2.0252761840820312, |
| "learning_rate": 0.001, |
| "loss": 1.3762, |
| "step": 178300 |
| }, |
| { |
| "epoch": 57.65998707175178, |
| "grad_norm": 1.6856194734573364, |
| "learning_rate": 0.001, |
| "loss": 1.3561, |
| "step": 178400 |
| }, |
| { |
| "epoch": 57.69230769230769, |
| "grad_norm": 1.955251693725586, |
| "learning_rate": 0.001, |
| "loss": 1.3857, |
| "step": 178500 |
| }, |
| { |
| "epoch": 57.72462831286361, |
| "grad_norm": 1.9201545715332031, |
| "learning_rate": 0.001, |
| "loss": 1.3897, |
| "step": 178600 |
| }, |
| { |
| "epoch": 57.75694893341952, |
| "grad_norm": 2.1210641860961914, |
| "learning_rate": 0.001, |
| "loss": 1.3676, |
| "step": 178700 |
| }, |
| { |
| "epoch": 57.78926955397544, |
| "grad_norm": 1.891424536705017, |
| "learning_rate": 0.001, |
| "loss": 1.3912, |
| "step": 178800 |
| }, |
| { |
| "epoch": 57.82159017453135, |
| "grad_norm": 1.859992504119873, |
| "learning_rate": 0.001, |
| "loss": 1.3864, |
| "step": 178900 |
| }, |
| { |
| "epoch": 57.853910795087266, |
| "grad_norm": 2.3439786434173584, |
| "learning_rate": 0.001, |
| "loss": 1.3738, |
| "step": 179000 |
| }, |
| { |
| "epoch": 57.88623141564318, |
| "grad_norm": 1.835242748260498, |
| "learning_rate": 0.001, |
| "loss": 1.3923, |
| "step": 179100 |
| }, |
| { |
| "epoch": 57.918552036199095, |
| "grad_norm": 2.018841028213501, |
| "learning_rate": 0.001, |
| "loss": 1.3933, |
| "step": 179200 |
| }, |
| { |
| "epoch": 57.95087265675501, |
| "grad_norm": 2.056886672973633, |
| "learning_rate": 0.001, |
| "loss": 1.3979, |
| "step": 179300 |
| }, |
| { |
| "epoch": 57.983193277310924, |
| "grad_norm": 2.031996965408325, |
| "learning_rate": 0.001, |
| "loss": 1.3989, |
| "step": 179400 |
| }, |
| { |
| "epoch": 58.01551389786684, |
| "grad_norm": 1.9441972970962524, |
| "learning_rate": 0.001, |
| "loss": 1.3036, |
| "step": 179500 |
| }, |
| { |
| "epoch": 58.04783451842275, |
| "grad_norm": 2.2586889266967773, |
| "learning_rate": 0.001, |
| "loss": 1.2679, |
| "step": 179600 |
| }, |
| { |
| "epoch": 58.08015513897867, |
| "grad_norm": 2.025006055831909, |
| "learning_rate": 0.001, |
| "loss": 1.2601, |
| "step": 179700 |
| }, |
| { |
| "epoch": 58.11247575953458, |
| "grad_norm": 4.384105682373047, |
| "learning_rate": 0.001, |
| "loss": 1.2748, |
| "step": 179800 |
| }, |
| { |
| "epoch": 58.1447963800905, |
| "grad_norm": 1.9416863918304443, |
| "learning_rate": 0.001, |
| "loss": 1.2984, |
| "step": 179900 |
| }, |
| { |
| "epoch": 58.17711700064641, |
| "grad_norm": 2.0471954345703125, |
| "learning_rate": 0.001, |
| "loss": 1.2953, |
| "step": 180000 |
| }, |
| { |
| "epoch": 58.209437621202326, |
| "grad_norm": 1.9570256471633911, |
| "learning_rate": 0.001, |
| "loss": 1.2878, |
| "step": 180100 |
| }, |
| { |
| "epoch": 58.24175824175824, |
| "grad_norm": 2.0297162532806396, |
| "learning_rate": 0.001, |
| "loss": 1.3079, |
| "step": 180200 |
| }, |
| { |
| "epoch": 58.274078862314155, |
| "grad_norm": 1.9571456909179688, |
| "learning_rate": 0.001, |
| "loss": 1.3165, |
| "step": 180300 |
| }, |
| { |
| "epoch": 58.30639948287007, |
| "grad_norm": 3.118157148361206, |
| "learning_rate": 0.001, |
| "loss": 1.3078, |
| "step": 180400 |
| }, |
| { |
| "epoch": 58.338720103425985, |
| "grad_norm": 2.2558462619781494, |
| "learning_rate": 0.001, |
| "loss": 1.3159, |
| "step": 180500 |
| }, |
| { |
| "epoch": 58.3710407239819, |
| "grad_norm": 2.5575222969055176, |
| "learning_rate": 0.001, |
| "loss": 1.3179, |
| "step": 180600 |
| }, |
| { |
| "epoch": 58.403361344537814, |
| "grad_norm": 2.0723485946655273, |
| "learning_rate": 0.001, |
| "loss": 1.3242, |
| "step": 180700 |
| }, |
| { |
| "epoch": 58.43568196509373, |
| "grad_norm": 2.4277594089508057, |
| "learning_rate": 0.001, |
| "loss": 1.3392, |
| "step": 180800 |
| }, |
| { |
| "epoch": 58.46800258564964, |
| "grad_norm": 2.787843942642212, |
| "learning_rate": 0.001, |
| "loss": 1.3318, |
| "step": 180900 |
| }, |
| { |
| "epoch": 58.50032320620556, |
| "grad_norm": 2.410322666168213, |
| "learning_rate": 0.001, |
| "loss": 1.3433, |
| "step": 181000 |
| }, |
| { |
| "epoch": 58.53264382676147, |
| "grad_norm": 2.142733335494995, |
| "learning_rate": 0.001, |
| "loss": 1.3394, |
| "step": 181100 |
| }, |
| { |
| "epoch": 58.56496444731739, |
| "grad_norm": 2.8335678577423096, |
| "learning_rate": 0.001, |
| "loss": 1.3411, |
| "step": 181200 |
| }, |
| { |
| "epoch": 58.5972850678733, |
| "grad_norm": 2.7325358390808105, |
| "learning_rate": 0.001, |
| "loss": 1.3377, |
| "step": 181300 |
| }, |
| { |
| "epoch": 58.629605688429216, |
| "grad_norm": 2.1823666095733643, |
| "learning_rate": 0.001, |
| "loss": 1.3473, |
| "step": 181400 |
| }, |
| { |
| "epoch": 58.66192630898513, |
| "grad_norm": 2.302861213684082, |
| "learning_rate": 0.001, |
| "loss": 1.36, |
| "step": 181500 |
| }, |
| { |
| "epoch": 58.694246929541045, |
| "grad_norm": 2.170161485671997, |
| "learning_rate": 0.001, |
| "loss": 1.3743, |
| "step": 181600 |
| }, |
| { |
| "epoch": 58.72656755009696, |
| "grad_norm": 2.141266345977783, |
| "learning_rate": 0.001, |
| "loss": 1.3702, |
| "step": 181700 |
| }, |
| { |
| "epoch": 58.758888170652874, |
| "grad_norm": 2.1460530757904053, |
| "learning_rate": 0.001, |
| "loss": 1.3566, |
| "step": 181800 |
| }, |
| { |
| "epoch": 58.79120879120879, |
| "grad_norm": 2.7301716804504395, |
| "learning_rate": 0.001, |
| "loss": 1.3815, |
| "step": 181900 |
| }, |
| { |
| "epoch": 58.8235294117647, |
| "grad_norm": 2.333367109298706, |
| "learning_rate": 0.001, |
| "loss": 1.372, |
| "step": 182000 |
| }, |
| { |
| "epoch": 58.85585003232062, |
| "grad_norm": 2.421165943145752, |
| "learning_rate": 0.001, |
| "loss": 1.3698, |
| "step": 182100 |
| }, |
| { |
| "epoch": 58.88817065287653, |
| "grad_norm": 2.190744638442993, |
| "learning_rate": 0.001, |
| "loss": 1.3695, |
| "step": 182200 |
| }, |
| { |
| "epoch": 58.92049127343245, |
| "grad_norm": 2.4283175468444824, |
| "learning_rate": 0.001, |
| "loss": 1.3902, |
| "step": 182300 |
| }, |
| { |
| "epoch": 58.95281189398836, |
| "grad_norm": 2.749220132827759, |
| "learning_rate": 0.001, |
| "loss": 1.3813, |
| "step": 182400 |
| }, |
| { |
| "epoch": 58.985132514544276, |
| "grad_norm": 1.7633317708969116, |
| "learning_rate": 0.001, |
| "loss": 1.3811, |
| "step": 182500 |
| }, |
| { |
| "epoch": 59.0174531351002, |
| "grad_norm": 2.3028452396392822, |
| "learning_rate": 0.001, |
| "loss": 1.3115, |
| "step": 182600 |
| }, |
| { |
| "epoch": 59.04977375565611, |
| "grad_norm": 1.91004478931427, |
| "learning_rate": 0.001, |
| "loss": 1.2523, |
| "step": 182700 |
| }, |
| { |
| "epoch": 59.08209437621203, |
| "grad_norm": 1.9824846982955933, |
| "learning_rate": 0.001, |
| "loss": 1.2691, |
| "step": 182800 |
| }, |
| { |
| "epoch": 59.11441499676794, |
| "grad_norm": 1.9162917137145996, |
| "learning_rate": 0.001, |
| "loss": 1.2654, |
| "step": 182900 |
| }, |
| { |
| "epoch": 59.146735617323856, |
| "grad_norm": 2.174314022064209, |
| "learning_rate": 0.001, |
| "loss": 1.2729, |
| "step": 183000 |
| }, |
| { |
| "epoch": 59.17905623787977, |
| "grad_norm": 1.950962781906128, |
| "learning_rate": 0.001, |
| "loss": 1.2842, |
| "step": 183100 |
| }, |
| { |
| "epoch": 59.211376858435685, |
| "grad_norm": 2.099749803543091, |
| "learning_rate": 0.001, |
| "loss": 1.2827, |
| "step": 183200 |
| }, |
| { |
| "epoch": 59.2436974789916, |
| "grad_norm": 1.7778706550598145, |
| "learning_rate": 0.001, |
| "loss": 1.2878, |
| "step": 183300 |
| }, |
| { |
| "epoch": 59.276018099547514, |
| "grad_norm": 2.519252300262451, |
| "learning_rate": 0.001, |
| "loss": 1.2948, |
| "step": 183400 |
| }, |
| { |
| "epoch": 59.30833872010343, |
| "grad_norm": 2.304509162902832, |
| "learning_rate": 0.001, |
| "loss": 1.2958, |
| "step": 183500 |
| }, |
| { |
| "epoch": 59.34065934065934, |
| "grad_norm": 2.029158353805542, |
| "learning_rate": 0.001, |
| "loss": 1.3077, |
| "step": 183600 |
| }, |
| { |
| "epoch": 59.37297996121526, |
| "grad_norm": 2.7044732570648193, |
| "learning_rate": 0.001, |
| "loss": 1.3067, |
| "step": 183700 |
| }, |
| { |
| "epoch": 59.40530058177117, |
| "grad_norm": 2.5257177352905273, |
| "learning_rate": 0.001, |
| "loss": 1.3115, |
| "step": 183800 |
| }, |
| { |
| "epoch": 59.43762120232709, |
| "grad_norm": 2.422498941421509, |
| "learning_rate": 0.001, |
| "loss": 1.3333, |
| "step": 183900 |
| }, |
| { |
| "epoch": 59.469941822883, |
| "grad_norm": 2.1336445808410645, |
| "learning_rate": 0.001, |
| "loss": 1.3216, |
| "step": 184000 |
| }, |
| { |
| "epoch": 59.502262443438916, |
| "grad_norm": 1.7418371438980103, |
| "learning_rate": 0.001, |
| "loss": 1.3296, |
| "step": 184100 |
| }, |
| { |
| "epoch": 59.53458306399483, |
| "grad_norm": 1.9461792707443237, |
| "learning_rate": 0.001, |
| "loss": 1.3301, |
| "step": 184200 |
| }, |
| { |
| "epoch": 59.566903684550745, |
| "grad_norm": 1.9766813516616821, |
| "learning_rate": 0.001, |
| "loss": 1.3293, |
| "step": 184300 |
| }, |
| { |
| "epoch": 59.59922430510666, |
| "grad_norm": 1.7881988286972046, |
| "learning_rate": 0.001, |
| "loss": 1.3434, |
| "step": 184400 |
| }, |
| { |
| "epoch": 59.631544925662574, |
| "grad_norm": 2.1620521545410156, |
| "learning_rate": 0.001, |
| "loss": 1.3423, |
| "step": 184500 |
| }, |
| { |
| "epoch": 59.66386554621849, |
| "grad_norm": 1.9429798126220703, |
| "learning_rate": 0.001, |
| "loss": 1.3373, |
| "step": 184600 |
| }, |
| { |
| "epoch": 59.6961861667744, |
| "grad_norm": 1.764739990234375, |
| "learning_rate": 0.001, |
| "loss": 1.3384, |
| "step": 184700 |
| }, |
| { |
| "epoch": 59.72850678733032, |
| "grad_norm": 1.7464152574539185, |
| "learning_rate": 0.001, |
| "loss": 1.3531, |
| "step": 184800 |
| }, |
| { |
| "epoch": 59.76082740788623, |
| "grad_norm": 2.2472636699676514, |
| "learning_rate": 0.001, |
| "loss": 1.359, |
| "step": 184900 |
| }, |
| { |
| "epoch": 59.79314802844215, |
| "grad_norm": 1.8305083513259888, |
| "learning_rate": 0.001, |
| "loss": 1.353, |
| "step": 185000 |
| }, |
| { |
| "epoch": 59.82546864899806, |
| "grad_norm": 1.6903504133224487, |
| "learning_rate": 0.001, |
| "loss": 1.3556, |
| "step": 185100 |
| }, |
| { |
| "epoch": 59.857789269553976, |
| "grad_norm": 2.093635082244873, |
| "learning_rate": 0.001, |
| "loss": 1.3483, |
| "step": 185200 |
| }, |
| { |
| "epoch": 59.89010989010989, |
| "grad_norm": 2.056464910507202, |
| "learning_rate": 0.001, |
| "loss": 1.3502, |
| "step": 185300 |
| }, |
| { |
| "epoch": 59.922430510665805, |
| "grad_norm": 1.7775726318359375, |
| "learning_rate": 0.001, |
| "loss": 1.3602, |
| "step": 185400 |
| }, |
| { |
| "epoch": 59.95475113122172, |
| "grad_norm": 1.6163804531097412, |
| "learning_rate": 0.001, |
| "loss": 1.3618, |
| "step": 185500 |
| }, |
| { |
| "epoch": 59.987071751777634, |
| "grad_norm": 1.7184257507324219, |
| "learning_rate": 0.001, |
| "loss": 1.3704, |
| "step": 185600 |
| }, |
| { |
| "epoch": 60.01939237233355, |
| "grad_norm": 1.7940794229507446, |
| "learning_rate": 0.001, |
| "loss": 1.2951, |
| "step": 185700 |
| }, |
| { |
| "epoch": 60.05171299288946, |
| "grad_norm": 1.667309284210205, |
| "learning_rate": 0.001, |
| "loss": 1.248, |
| "step": 185800 |
| }, |
| { |
| "epoch": 60.08403361344538, |
| "grad_norm": 1.5899906158447266, |
| "learning_rate": 0.001, |
| "loss": 1.2604, |
| "step": 185900 |
| }, |
| { |
| "epoch": 60.11635423400129, |
| "grad_norm": 1.9902898073196411, |
| "learning_rate": 0.001, |
| "loss": 1.2485, |
| "step": 186000 |
| }, |
| { |
| "epoch": 60.14867485455721, |
| "grad_norm": 1.522161841392517, |
| "learning_rate": 0.001, |
| "loss": 1.2438, |
| "step": 186100 |
| }, |
| { |
| "epoch": 60.18099547511312, |
| "grad_norm": 1.5227113962173462, |
| "learning_rate": 0.001, |
| "loss": 1.2651, |
| "step": 186200 |
| }, |
| { |
| "epoch": 60.213316095669036, |
| "grad_norm": 1.6835823059082031, |
| "learning_rate": 0.001, |
| "loss": 1.2652, |
| "step": 186300 |
| }, |
| { |
| "epoch": 60.24563671622495, |
| "grad_norm": 1.817858338356018, |
| "learning_rate": 0.001, |
| "loss": 1.2638, |
| "step": 186400 |
| }, |
| { |
| "epoch": 60.277957336780865, |
| "grad_norm": 1.9655687808990479, |
| "learning_rate": 0.001, |
| "loss": 1.2967, |
| "step": 186500 |
| }, |
| { |
| "epoch": 60.31027795733678, |
| "grad_norm": 2.3373231887817383, |
| "learning_rate": 0.001, |
| "loss": 1.2971, |
| "step": 186600 |
| }, |
| { |
| "epoch": 60.342598577892694, |
| "grad_norm": 2.2942612171173096, |
| "learning_rate": 0.001, |
| "loss": 1.2899, |
| "step": 186700 |
| }, |
| { |
| "epoch": 60.37491919844861, |
| "grad_norm": 2.092560291290283, |
| "learning_rate": 0.001, |
| "loss": 1.2844, |
| "step": 186800 |
| }, |
| { |
| "epoch": 60.40723981900452, |
| "grad_norm": 1.5219141244888306, |
| "learning_rate": 0.001, |
| "loss": 1.2982, |
| "step": 186900 |
| }, |
| { |
| "epoch": 60.43956043956044, |
| "grad_norm": 1.906146764755249, |
| "learning_rate": 0.001, |
| "loss": 1.2941, |
| "step": 187000 |
| }, |
| { |
| "epoch": 60.47188106011635, |
| "grad_norm": 1.6990609169006348, |
| "learning_rate": 0.001, |
| "loss": 1.2908, |
| "step": 187100 |
| }, |
| { |
| "epoch": 60.50420168067227, |
| "grad_norm": 1.9348620176315308, |
| "learning_rate": 0.001, |
| "loss": 1.3104, |
| "step": 187200 |
| }, |
| { |
| "epoch": 60.53652230122818, |
| "grad_norm": 1.878623366355896, |
| "learning_rate": 0.001, |
| "loss": 1.3012, |
| "step": 187300 |
| }, |
| { |
| "epoch": 60.568842921784096, |
| "grad_norm": 1.4890978336334229, |
| "learning_rate": 0.001, |
| "loss": 1.342, |
| "step": 187400 |
| }, |
| { |
| "epoch": 60.60116354234001, |
| "grad_norm": 3.4084646701812744, |
| "learning_rate": 0.001, |
| "loss": 1.3242, |
| "step": 187500 |
| }, |
| { |
| "epoch": 60.633484162895925, |
| "grad_norm": 1.784811019897461, |
| "learning_rate": 0.001, |
| "loss": 1.3028, |
| "step": 187600 |
| }, |
| { |
| "epoch": 60.66580478345184, |
| "grad_norm": 1.9564650058746338, |
| "learning_rate": 0.001, |
| "loss": 1.3375, |
| "step": 187700 |
| }, |
| { |
| "epoch": 60.698125404007754, |
| "grad_norm": 1.9819107055664062, |
| "learning_rate": 0.001, |
| "loss": 1.3309, |
| "step": 187800 |
| }, |
| { |
| "epoch": 60.73044602456367, |
| "grad_norm": 1.938475489616394, |
| "learning_rate": 0.001, |
| "loss": 1.3285, |
| "step": 187900 |
| }, |
| { |
| "epoch": 60.762766645119584, |
| "grad_norm": 1.7497060298919678, |
| "learning_rate": 0.001, |
| "loss": 1.3384, |
| "step": 188000 |
| }, |
| { |
| "epoch": 60.7950872656755, |
| "grad_norm": 1.5079044103622437, |
| "learning_rate": 0.001, |
| "loss": 1.342, |
| "step": 188100 |
| }, |
| { |
| "epoch": 60.82740788623141, |
| "grad_norm": 1.7045460939407349, |
| "learning_rate": 0.001, |
| "loss": 1.3398, |
| "step": 188200 |
| }, |
| { |
| "epoch": 60.85972850678733, |
| "grad_norm": 1.809365153312683, |
| "learning_rate": 0.001, |
| "loss": 1.3565, |
| "step": 188300 |
| }, |
| { |
| "epoch": 60.89204912734324, |
| "grad_norm": 1.9220020771026611, |
| "learning_rate": 0.001, |
| "loss": 1.3592, |
| "step": 188400 |
| }, |
| { |
| "epoch": 60.924369747899156, |
| "grad_norm": 1.7121220827102661, |
| "learning_rate": 0.001, |
| "loss": 1.3586, |
| "step": 188500 |
| }, |
| { |
| "epoch": 60.95669036845507, |
| "grad_norm": 1.5285897254943848, |
| "learning_rate": 0.001, |
| "loss": 1.3651, |
| "step": 188600 |
| }, |
| { |
| "epoch": 60.98901098901099, |
| "grad_norm": 1.6838990449905396, |
| "learning_rate": 0.001, |
| "loss": 1.3529, |
| "step": 188700 |
| }, |
| { |
| "epoch": 61.02133160956691, |
| "grad_norm": 1.7248560190200806, |
| "learning_rate": 0.001, |
| "loss": 1.2819, |
| "step": 188800 |
| }, |
| { |
| "epoch": 61.05365223012282, |
| "grad_norm": 1.699639916419983, |
| "learning_rate": 0.001, |
| "loss": 1.2323, |
| "step": 188900 |
| }, |
| { |
| "epoch": 61.085972850678736, |
| "grad_norm": 1.7705409526824951, |
| "learning_rate": 0.001, |
| "loss": 1.2365, |
| "step": 189000 |
| }, |
| { |
| "epoch": 61.11829347123465, |
| "grad_norm": 1.709913969039917, |
| "learning_rate": 0.001, |
| "loss": 1.257, |
| "step": 189100 |
| }, |
| { |
| "epoch": 61.150614091790565, |
| "grad_norm": 1.7027641534805298, |
| "learning_rate": 0.001, |
| "loss": 1.2317, |
| "step": 189200 |
| }, |
| { |
| "epoch": 61.18293471234648, |
| "grad_norm": 1.5179117918014526, |
| "learning_rate": 0.001, |
| "loss": 1.2539, |
| "step": 189300 |
| }, |
| { |
| "epoch": 61.215255332902395, |
| "grad_norm": 3.146793842315674, |
| "learning_rate": 0.001, |
| "loss": 1.2494, |
| "step": 189400 |
| }, |
| { |
| "epoch": 61.24757595345831, |
| "grad_norm": 1.4820927381515503, |
| "learning_rate": 0.001, |
| "loss": 1.26, |
| "step": 189500 |
| }, |
| { |
| "epoch": 61.279896574014224, |
| "grad_norm": 2.3077597618103027, |
| "learning_rate": 0.001, |
| "loss": 1.2502, |
| "step": 189600 |
| }, |
| { |
| "epoch": 61.31221719457014, |
| "grad_norm": 1.8483630418777466, |
| "learning_rate": 0.001, |
| "loss": 1.2922, |
| "step": 189700 |
| }, |
| { |
| "epoch": 61.34453781512605, |
| "grad_norm": 1.5808696746826172, |
| "learning_rate": 0.001, |
| "loss": 1.2773, |
| "step": 189800 |
| }, |
| { |
| "epoch": 61.37685843568197, |
| "grad_norm": 1.8577014207839966, |
| "learning_rate": 0.001, |
| "loss": 1.2712, |
| "step": 189900 |
| }, |
| { |
| "epoch": 61.40917905623788, |
| "grad_norm": 1.837640643119812, |
| "learning_rate": 0.001, |
| "loss": 1.2924, |
| "step": 190000 |
| }, |
| { |
| "epoch": 61.441499676793796, |
| "grad_norm": 1.4489262104034424, |
| "learning_rate": 0.001, |
| "loss": 1.2856, |
| "step": 190100 |
| }, |
| { |
| "epoch": 61.47382029734971, |
| "grad_norm": 1.756155014038086, |
| "learning_rate": 0.001, |
| "loss": 1.2945, |
| "step": 190200 |
| }, |
| { |
| "epoch": 61.506140917905626, |
| "grad_norm": 1.7968300580978394, |
| "learning_rate": 0.001, |
| "loss": 1.3083, |
| "step": 190300 |
| }, |
| { |
| "epoch": 61.53846153846154, |
| "grad_norm": 1.3242460489273071, |
| "learning_rate": 0.001, |
| "loss": 1.3004, |
| "step": 190400 |
| }, |
| { |
| "epoch": 61.570782159017455, |
| "grad_norm": 1.5521363019943237, |
| "learning_rate": 0.001, |
| "loss": 1.3086, |
| "step": 190500 |
| }, |
| { |
| "epoch": 61.60310277957337, |
| "grad_norm": 1.6019891500473022, |
| "learning_rate": 0.001, |
| "loss": 1.3037, |
| "step": 190600 |
| }, |
| { |
| "epoch": 61.635423400129284, |
| "grad_norm": 2.068342685699463, |
| "learning_rate": 0.001, |
| "loss": 1.3043, |
| "step": 190700 |
| }, |
| { |
| "epoch": 61.6677440206852, |
| "grad_norm": 1.7331029176712036, |
| "learning_rate": 0.001, |
| "loss": 1.3196, |
| "step": 190800 |
| }, |
| { |
| "epoch": 61.70006464124111, |
| "grad_norm": 2.1270153522491455, |
| "learning_rate": 0.001, |
| "loss": 1.3156, |
| "step": 190900 |
| }, |
| { |
| "epoch": 61.73238526179703, |
| "grad_norm": 1.7159593105316162, |
| "learning_rate": 0.001, |
| "loss": 1.3161, |
| "step": 191000 |
| }, |
| { |
| "epoch": 61.76470588235294, |
| "grad_norm": 1.6671262979507446, |
| "learning_rate": 0.001, |
| "loss": 1.3241, |
| "step": 191100 |
| }, |
| { |
| "epoch": 61.79702650290886, |
| "grad_norm": 1.7520173788070679, |
| "learning_rate": 0.001, |
| "loss": 1.3196, |
| "step": 191200 |
| }, |
| { |
| "epoch": 61.82934712346477, |
| "grad_norm": 1.407358169555664, |
| "learning_rate": 0.001, |
| "loss": 1.3465, |
| "step": 191300 |
| }, |
| { |
| "epoch": 61.861667744020686, |
| "grad_norm": 1.7164902687072754, |
| "learning_rate": 0.001, |
| "loss": 1.3407, |
| "step": 191400 |
| }, |
| { |
| "epoch": 61.8939883645766, |
| "grad_norm": 1.714900016784668, |
| "learning_rate": 0.001, |
| "loss": 1.3496, |
| "step": 191500 |
| }, |
| { |
| "epoch": 61.926308985132515, |
| "grad_norm": 2.160857677459717, |
| "learning_rate": 0.001, |
| "loss": 1.3361, |
| "step": 191600 |
| }, |
| { |
| "epoch": 61.95862960568843, |
| "grad_norm": 1.8158776760101318, |
| "learning_rate": 0.001, |
| "loss": 1.3433, |
| "step": 191700 |
| }, |
| { |
| "epoch": 61.990950226244344, |
| "grad_norm": 1.847709059715271, |
| "learning_rate": 0.001, |
| "loss": 1.3598, |
| "step": 191800 |
| }, |
| { |
| "epoch": 62.02327084680026, |
| "grad_norm": 1.7720732688903809, |
| "learning_rate": 0.001, |
| "loss": 1.2744, |
| "step": 191900 |
| }, |
| { |
| "epoch": 62.05559146735617, |
| "grad_norm": 1.5437538623809814, |
| "learning_rate": 0.001, |
| "loss": 1.2139, |
| "step": 192000 |
| }, |
| { |
| "epoch": 62.08791208791209, |
| "grad_norm": 1.6394867897033691, |
| "learning_rate": 0.001, |
| "loss": 1.2301, |
| "step": 192100 |
| }, |
| { |
| "epoch": 62.120232708468, |
| "grad_norm": 1.5179033279418945, |
| "learning_rate": 0.001, |
| "loss": 1.2352, |
| "step": 192200 |
| }, |
| { |
| "epoch": 62.15255332902392, |
| "grad_norm": 2.0513007640838623, |
| "learning_rate": 0.001, |
| "loss": 1.2292, |
| "step": 192300 |
| }, |
| { |
| "epoch": 62.18487394957983, |
| "grad_norm": 1.76735258102417, |
| "learning_rate": 0.001, |
| "loss": 1.2531, |
| "step": 192400 |
| }, |
| { |
| "epoch": 62.217194570135746, |
| "grad_norm": 1.9428764581680298, |
| "learning_rate": 0.001, |
| "loss": 1.2507, |
| "step": 192500 |
| }, |
| { |
| "epoch": 62.24951519069166, |
| "grad_norm": 1.3896160125732422, |
| "learning_rate": 0.001, |
| "loss": 1.2556, |
| "step": 192600 |
| }, |
| { |
| "epoch": 62.281835811247575, |
| "grad_norm": 1.634589433670044, |
| "learning_rate": 0.001, |
| "loss": 1.2598, |
| "step": 192700 |
| }, |
| { |
| "epoch": 62.31415643180349, |
| "grad_norm": 1.906327486038208, |
| "learning_rate": 0.001, |
| "loss": 1.2587, |
| "step": 192800 |
| }, |
| { |
| "epoch": 62.346477052359404, |
| "grad_norm": 1.7237509489059448, |
| "learning_rate": 0.001, |
| "loss": 1.2544, |
| "step": 192900 |
| }, |
| { |
| "epoch": 62.37879767291532, |
| "grad_norm": 1.5861009359359741, |
| "learning_rate": 0.001, |
| "loss": 1.2693, |
| "step": 193000 |
| }, |
| { |
| "epoch": 62.41111829347123, |
| "grad_norm": 1.6999458074569702, |
| "learning_rate": 0.001, |
| "loss": 1.2815, |
| "step": 193100 |
| }, |
| { |
| "epoch": 62.44343891402715, |
| "grad_norm": 1.9093917608261108, |
| "learning_rate": 0.001, |
| "loss": 1.268, |
| "step": 193200 |
| }, |
| { |
| "epoch": 62.47575953458306, |
| "grad_norm": 1.64690363407135, |
| "learning_rate": 0.001, |
| "loss": 1.2734, |
| "step": 193300 |
| }, |
| { |
| "epoch": 62.50808015513898, |
| "grad_norm": 1.8700169324874878, |
| "learning_rate": 0.001, |
| "loss": 1.2684, |
| "step": 193400 |
| }, |
| { |
| "epoch": 62.54040077569489, |
| "grad_norm": 1.862876534461975, |
| "learning_rate": 0.001, |
| "loss": 1.3038, |
| "step": 193500 |
| }, |
| { |
| "epoch": 62.572721396250806, |
| "grad_norm": 1.5933880805969238, |
| "learning_rate": 0.001, |
| "loss": 1.3032, |
| "step": 193600 |
| }, |
| { |
| "epoch": 62.60504201680672, |
| "grad_norm": 1.569177269935608, |
| "learning_rate": 0.001, |
| "loss": 1.2934, |
| "step": 193700 |
| }, |
| { |
| "epoch": 62.637362637362635, |
| "grad_norm": 1.7045800685882568, |
| "learning_rate": 0.001, |
| "loss": 1.3054, |
| "step": 193800 |
| }, |
| { |
| "epoch": 62.66968325791855, |
| "grad_norm": 1.502102017402649, |
| "learning_rate": 0.001, |
| "loss": 1.2973, |
| "step": 193900 |
| }, |
| { |
| "epoch": 62.702003878474464, |
| "grad_norm": 1.3741238117218018, |
| "learning_rate": 0.001, |
| "loss": 1.3095, |
| "step": 194000 |
| }, |
| { |
| "epoch": 62.73432449903038, |
| "grad_norm": 1.508573293685913, |
| "learning_rate": 0.001, |
| "loss": 1.3103, |
| "step": 194100 |
| }, |
| { |
| "epoch": 62.76664511958629, |
| "grad_norm": 1.6713041067123413, |
| "learning_rate": 0.001, |
| "loss": 1.3066, |
| "step": 194200 |
| }, |
| { |
| "epoch": 62.79896574014221, |
| "grad_norm": 2.0911645889282227, |
| "learning_rate": 0.001, |
| "loss": 1.3418, |
| "step": 194300 |
| }, |
| { |
| "epoch": 62.83128636069812, |
| "grad_norm": 1.6197428703308105, |
| "learning_rate": 0.001, |
| "loss": 1.3179, |
| "step": 194400 |
| }, |
| { |
| "epoch": 62.86360698125404, |
| "grad_norm": 1.4563323259353638, |
| "learning_rate": 0.001, |
| "loss": 1.3078, |
| "step": 194500 |
| }, |
| { |
| "epoch": 62.89592760180995, |
| "grad_norm": 1.447568655014038, |
| "learning_rate": 0.001, |
| "loss": 1.3128, |
| "step": 194600 |
| }, |
| { |
| "epoch": 62.928248222365866, |
| "grad_norm": 1.577903151512146, |
| "learning_rate": 0.001, |
| "loss": 1.3518, |
| "step": 194700 |
| }, |
| { |
| "epoch": 62.96056884292178, |
| "grad_norm": 1.6062977313995361, |
| "learning_rate": 0.001, |
| "loss": 1.3364, |
| "step": 194800 |
| }, |
| { |
| "epoch": 62.992889463477695, |
| "grad_norm": 1.6443058252334595, |
| "learning_rate": 0.001, |
| "loss": 1.3298, |
| "step": 194900 |
| }, |
| { |
| "epoch": 63.02521008403362, |
| "grad_norm": 1.7947728633880615, |
| "learning_rate": 0.001, |
| "loss": 1.2272, |
| "step": 195000 |
| }, |
| { |
| "epoch": 63.05753070458953, |
| "grad_norm": 1.7414778470993042, |
| "learning_rate": 0.001, |
| "loss": 1.2157, |
| "step": 195100 |
| }, |
| { |
| "epoch": 63.089851325145446, |
| "grad_norm": 1.7546195983886719, |
| "learning_rate": 0.001, |
| "loss": 1.2209, |
| "step": 195200 |
| }, |
| { |
| "epoch": 63.12217194570136, |
| "grad_norm": 1.7215334177017212, |
| "learning_rate": 0.001, |
| "loss": 1.2283, |
| "step": 195300 |
| }, |
| { |
| "epoch": 63.154492566257275, |
| "grad_norm": 1.6781865358352661, |
| "learning_rate": 0.001, |
| "loss": 1.2115, |
| "step": 195400 |
| }, |
| { |
| "epoch": 63.18681318681319, |
| "grad_norm": 2.100222110748291, |
| "learning_rate": 0.001, |
| "loss": 1.2348, |
| "step": 195500 |
| }, |
| { |
| "epoch": 63.219133807369104, |
| "grad_norm": 1.6543059349060059, |
| "learning_rate": 0.001, |
| "loss": 1.2364, |
| "step": 195600 |
| }, |
| { |
| "epoch": 63.25145442792502, |
| "grad_norm": 1.8639600276947021, |
| "learning_rate": 0.001, |
| "loss": 1.2418, |
| "step": 195700 |
| }, |
| { |
| "epoch": 63.28377504848093, |
| "grad_norm": 1.4900529384613037, |
| "learning_rate": 0.001, |
| "loss": 1.2321, |
| "step": 195800 |
| }, |
| { |
| "epoch": 63.31609566903685, |
| "grad_norm": 1.7212891578674316, |
| "learning_rate": 0.001, |
| "loss": 1.2556, |
| "step": 195900 |
| }, |
| { |
| "epoch": 63.34841628959276, |
| "grad_norm": 1.827954649925232, |
| "learning_rate": 0.001, |
| "loss": 1.2661, |
| "step": 196000 |
| }, |
| { |
| "epoch": 63.38073691014868, |
| "grad_norm": 2.1511712074279785, |
| "learning_rate": 0.001, |
| "loss": 1.2529, |
| "step": 196100 |
| }, |
| { |
| "epoch": 63.41305753070459, |
| "grad_norm": 1.4950047731399536, |
| "learning_rate": 0.001, |
| "loss": 1.2808, |
| "step": 196200 |
| }, |
| { |
| "epoch": 63.445378151260506, |
| "grad_norm": 1.7102982997894287, |
| "learning_rate": 0.001, |
| "loss": 1.2631, |
| "step": 196300 |
| }, |
| { |
| "epoch": 63.47769877181642, |
| "grad_norm": 1.6856714487075806, |
| "learning_rate": 0.001, |
| "loss": 1.2602, |
| "step": 196400 |
| }, |
| { |
| "epoch": 63.510019392372335, |
| "grad_norm": 1.7770227193832397, |
| "learning_rate": 0.001, |
| "loss": 1.2767, |
| "step": 196500 |
| }, |
| { |
| "epoch": 63.54234001292825, |
| "grad_norm": 2.618396520614624, |
| "learning_rate": 0.001, |
| "loss": 1.2671, |
| "step": 196600 |
| }, |
| { |
| "epoch": 63.574660633484164, |
| "grad_norm": 2.238501787185669, |
| "learning_rate": 0.001, |
| "loss": 1.2707, |
| "step": 196700 |
| }, |
| { |
| "epoch": 63.60698125404008, |
| "grad_norm": 1.4879484176635742, |
| "learning_rate": 0.001, |
| "loss": 1.2657, |
| "step": 196800 |
| }, |
| { |
| "epoch": 63.63930187459599, |
| "grad_norm": 1.6655293703079224, |
| "learning_rate": 0.001, |
| "loss": 1.2964, |
| "step": 196900 |
| }, |
| { |
| "epoch": 63.67162249515191, |
| "grad_norm": 1.5491960048675537, |
| "learning_rate": 0.001, |
| "loss": 1.2974, |
| "step": 197000 |
| }, |
| { |
| "epoch": 63.70394311570782, |
| "grad_norm": 2.067685842514038, |
| "learning_rate": 0.001, |
| "loss": 1.2997, |
| "step": 197100 |
| }, |
| { |
| "epoch": 63.73626373626374, |
| "grad_norm": 1.9535744190216064, |
| "learning_rate": 0.001, |
| "loss": 1.298, |
| "step": 197200 |
| }, |
| { |
| "epoch": 63.76858435681965, |
| "grad_norm": 2.4942731857299805, |
| "learning_rate": 0.001, |
| "loss": 1.2957, |
| "step": 197300 |
| }, |
| { |
| "epoch": 63.800904977375566, |
| "grad_norm": 1.6687902212142944, |
| "learning_rate": 0.001, |
| "loss": 1.2998, |
| "step": 197400 |
| }, |
| { |
| "epoch": 63.83322559793148, |
| "grad_norm": 1.4590330123901367, |
| "learning_rate": 0.001, |
| "loss": 1.2974, |
| "step": 197500 |
| }, |
| { |
| "epoch": 63.865546218487395, |
| "grad_norm": 1.4519684314727783, |
| "learning_rate": 0.001, |
| "loss": 1.3264, |
| "step": 197600 |
| }, |
| { |
| "epoch": 63.89786683904331, |
| "grad_norm": 1.676956057548523, |
| "learning_rate": 0.001, |
| "loss": 1.3151, |
| "step": 197700 |
| }, |
| { |
| "epoch": 63.930187459599225, |
| "grad_norm": 1.7003228664398193, |
| "learning_rate": 0.001, |
| "loss": 1.314, |
| "step": 197800 |
| }, |
| { |
| "epoch": 63.96250808015514, |
| "grad_norm": 1.785687804222107, |
| "learning_rate": 0.001, |
| "loss": 1.3328, |
| "step": 197900 |
| }, |
| { |
| "epoch": 63.994828700711054, |
| "grad_norm": 1.8566793203353882, |
| "learning_rate": 0.001, |
| "loss": 1.3199, |
| "step": 198000 |
| }, |
| { |
| "epoch": 64.02714932126698, |
| "grad_norm": 2.3256516456604004, |
| "learning_rate": 0.001, |
| "loss": 1.2133, |
| "step": 198100 |
| }, |
| { |
| "epoch": 64.05946994182288, |
| "grad_norm": 1.8626536130905151, |
| "learning_rate": 0.001, |
| "loss": 1.1985, |
| "step": 198200 |
| }, |
| { |
| "epoch": 64.0917905623788, |
| "grad_norm": 1.5357648134231567, |
| "learning_rate": 0.001, |
| "loss": 1.2016, |
| "step": 198300 |
| }, |
| { |
| "epoch": 64.12411118293471, |
| "grad_norm": 2.2160654067993164, |
| "learning_rate": 0.001, |
| "loss": 1.2191, |
| "step": 198400 |
| }, |
| { |
| "epoch": 64.15643180349063, |
| "grad_norm": 1.763607144355774, |
| "learning_rate": 0.001, |
| "loss": 1.2095, |
| "step": 198500 |
| }, |
| { |
| "epoch": 64.18875242404654, |
| "grad_norm": 5.209096431732178, |
| "learning_rate": 0.001, |
| "loss": 1.2131, |
| "step": 198600 |
| }, |
| { |
| "epoch": 64.22107304460246, |
| "grad_norm": 1.854581356048584, |
| "learning_rate": 0.001, |
| "loss": 1.2368, |
| "step": 198700 |
| }, |
| { |
| "epoch": 64.25339366515837, |
| "grad_norm": 1.6673282384872437, |
| "learning_rate": 0.001, |
| "loss": 1.2243, |
| "step": 198800 |
| }, |
| { |
| "epoch": 64.28571428571429, |
| "grad_norm": 81.45235443115234, |
| "learning_rate": 0.001, |
| "loss": 1.2377, |
| "step": 198900 |
| }, |
| { |
| "epoch": 64.3180349062702, |
| "grad_norm": 2.5092577934265137, |
| "learning_rate": 0.001, |
| "loss": 1.2312, |
| "step": 199000 |
| }, |
| { |
| "epoch": 64.35035552682612, |
| "grad_norm": 1.8041945695877075, |
| "learning_rate": 0.001, |
| "loss": 1.2396, |
| "step": 199100 |
| }, |
| { |
| "epoch": 64.38267614738203, |
| "grad_norm": 1.6541348695755005, |
| "learning_rate": 0.001, |
| "loss": 1.2544, |
| "step": 199200 |
| }, |
| { |
| "epoch": 64.41499676793795, |
| "grad_norm": 2.0701494216918945, |
| "learning_rate": 0.001, |
| "loss": 1.2392, |
| "step": 199300 |
| }, |
| { |
| "epoch": 64.44731738849386, |
| "grad_norm": 1.6884028911590576, |
| "learning_rate": 0.001, |
| "loss": 1.2514, |
| "step": 199400 |
| }, |
| { |
| "epoch": 64.47963800904978, |
| "grad_norm": 1.775291085243225, |
| "learning_rate": 0.001, |
| "loss": 1.2639, |
| "step": 199500 |
| }, |
| { |
| "epoch": 64.51195862960569, |
| "grad_norm": 2.3357295989990234, |
| "learning_rate": 0.001, |
| "loss": 1.2685, |
| "step": 199600 |
| }, |
| { |
| "epoch": 64.54427925016161, |
| "grad_norm": 2.315829277038574, |
| "learning_rate": 0.001, |
| "loss": 1.2687, |
| "step": 199700 |
| }, |
| { |
| "epoch": 64.57659987071752, |
| "grad_norm": 1.7337130308151245, |
| "learning_rate": 0.001, |
| "loss": 1.2687, |
| "step": 199800 |
| }, |
| { |
| "epoch": 64.60892049127344, |
| "grad_norm": 1.704189658164978, |
| "learning_rate": 0.001, |
| "loss": 1.2801, |
| "step": 199900 |
| }, |
| { |
| "epoch": 64.64124111182934, |
| "grad_norm": 1.7531574964523315, |
| "learning_rate": 0.001, |
| "loss": 1.2692, |
| "step": 200000 |
| }, |
| { |
| "epoch": 64.67356173238527, |
| "grad_norm": 2.4136340618133545, |
| "learning_rate": 0.001, |
| "loss": 1.2724, |
| "step": 200100 |
| }, |
| { |
| "epoch": 64.70588235294117, |
| "grad_norm": 1.4422646760940552, |
| "learning_rate": 0.001, |
| "loss": 1.2875, |
| "step": 200200 |
| }, |
| { |
| "epoch": 64.7382029734971, |
| "grad_norm": 1.8993738889694214, |
| "learning_rate": 0.001, |
| "loss": 1.2896, |
| "step": 200300 |
| }, |
| { |
| "epoch": 64.770523594053, |
| "grad_norm": 1.7030922174453735, |
| "learning_rate": 0.001, |
| "loss": 1.2888, |
| "step": 200400 |
| }, |
| { |
| "epoch": 64.80284421460892, |
| "grad_norm": 1.6060155630111694, |
| "learning_rate": 0.001, |
| "loss": 1.3016, |
| "step": 200500 |
| }, |
| { |
| "epoch": 64.83516483516483, |
| "grad_norm": 4.930392265319824, |
| "learning_rate": 0.001, |
| "loss": 1.299, |
| "step": 200600 |
| }, |
| { |
| "epoch": 64.86748545572075, |
| "grad_norm": 1.7060980796813965, |
| "learning_rate": 0.001, |
| "loss": 1.3025, |
| "step": 200700 |
| }, |
| { |
| "epoch": 64.89980607627666, |
| "grad_norm": 1.754961609840393, |
| "learning_rate": 0.001, |
| "loss": 1.2893, |
| "step": 200800 |
| }, |
| { |
| "epoch": 64.93212669683258, |
| "grad_norm": 1.7960935831069946, |
| "learning_rate": 0.001, |
| "loss": 1.2953, |
| "step": 200900 |
| }, |
| { |
| "epoch": 64.96444731738849, |
| "grad_norm": 2.0267605781555176, |
| "learning_rate": 0.001, |
| "loss": 1.3143, |
| "step": 201000 |
| }, |
| { |
| "epoch": 64.99676793794441, |
| "grad_norm": 2.2149808406829834, |
| "learning_rate": 0.001, |
| "loss": 1.2921, |
| "step": 201100 |
| }, |
| { |
| "epoch": 65.02908855850032, |
| "grad_norm": 2.4537580013275146, |
| "learning_rate": 0.001, |
| "loss": 1.1977, |
| "step": 201200 |
| }, |
| { |
| "epoch": 65.06140917905624, |
| "grad_norm": 1.6021087169647217, |
| "learning_rate": 0.001, |
| "loss": 1.1736, |
| "step": 201300 |
| }, |
| { |
| "epoch": 65.09372979961215, |
| "grad_norm": 1.9216489791870117, |
| "learning_rate": 0.001, |
| "loss": 1.1835, |
| "step": 201400 |
| }, |
| { |
| "epoch": 65.12605042016807, |
| "grad_norm": 2.218745470046997, |
| "learning_rate": 0.001, |
| "loss": 1.2057, |
| "step": 201500 |
| }, |
| { |
| "epoch": 65.15837104072398, |
| "grad_norm": 1.8575019836425781, |
| "learning_rate": 0.001, |
| "loss": 1.2034, |
| "step": 201600 |
| }, |
| { |
| "epoch": 65.1906916612799, |
| "grad_norm": 2.2216298580169678, |
| "learning_rate": 0.001, |
| "loss": 1.2068, |
| "step": 201700 |
| }, |
| { |
| "epoch": 65.2230122818358, |
| "grad_norm": 1.9984310865402222, |
| "learning_rate": 0.001, |
| "loss": 1.1996, |
| "step": 201800 |
| }, |
| { |
| "epoch": 65.25533290239173, |
| "grad_norm": 1.926300048828125, |
| "learning_rate": 0.001, |
| "loss": 1.2163, |
| "step": 201900 |
| }, |
| { |
| "epoch": 65.28765352294764, |
| "grad_norm": 2.752197504043579, |
| "learning_rate": 0.001, |
| "loss": 1.2257, |
| "step": 202000 |
| }, |
| { |
| "epoch": 65.31997414350356, |
| "grad_norm": 1.9841582775115967, |
| "learning_rate": 0.001, |
| "loss": 1.2055, |
| "step": 202100 |
| }, |
| { |
| "epoch": 65.35229476405947, |
| "grad_norm": 2.0355589389801025, |
| "learning_rate": 0.001, |
| "loss": 1.2373, |
| "step": 202200 |
| }, |
| { |
| "epoch": 65.38461538461539, |
| "grad_norm": 1.867073893547058, |
| "learning_rate": 0.001, |
| "loss": 1.227, |
| "step": 202300 |
| }, |
| { |
| "epoch": 65.4169360051713, |
| "grad_norm": 1.8634486198425293, |
| "learning_rate": 0.001, |
| "loss": 1.2322, |
| "step": 202400 |
| }, |
| { |
| "epoch": 65.44925662572722, |
| "grad_norm": 1.804103136062622, |
| "learning_rate": 0.001, |
| "loss": 1.2494, |
| "step": 202500 |
| }, |
| { |
| "epoch": 65.48157724628312, |
| "grad_norm": 1.7396485805511475, |
| "learning_rate": 0.001, |
| "loss": 1.2445, |
| "step": 202600 |
| }, |
| { |
| "epoch": 65.51389786683905, |
| "grad_norm": 2.412324905395508, |
| "learning_rate": 0.001, |
| "loss": 1.2476, |
| "step": 202700 |
| }, |
| { |
| "epoch": 65.54621848739495, |
| "grad_norm": 1.8536796569824219, |
| "learning_rate": 0.001, |
| "loss": 1.259, |
| "step": 202800 |
| }, |
| { |
| "epoch": 65.57853910795087, |
| "grad_norm": 1.7683600187301636, |
| "learning_rate": 0.001, |
| "loss": 1.2411, |
| "step": 202900 |
| }, |
| { |
| "epoch": 65.61085972850678, |
| "grad_norm": 1.793859839439392, |
| "learning_rate": 0.001, |
| "loss": 1.2516, |
| "step": 203000 |
| }, |
| { |
| "epoch": 65.6431803490627, |
| "grad_norm": 1.6775269508361816, |
| "learning_rate": 0.001, |
| "loss": 1.2822, |
| "step": 203100 |
| }, |
| { |
| "epoch": 65.67550096961861, |
| "grad_norm": 1.8493618965148926, |
| "learning_rate": 0.001, |
| "loss": 1.2761, |
| "step": 203200 |
| }, |
| { |
| "epoch": 65.70782159017453, |
| "grad_norm": 1.6558525562286377, |
| "learning_rate": 0.001, |
| "loss": 1.2712, |
| "step": 203300 |
| }, |
| { |
| "epoch": 65.74014221073044, |
| "grad_norm": 2.0000391006469727, |
| "learning_rate": 0.001, |
| "loss": 1.2709, |
| "step": 203400 |
| }, |
| { |
| "epoch": 65.77246283128636, |
| "grad_norm": 2.3382959365844727, |
| "learning_rate": 0.001, |
| "loss": 1.2822, |
| "step": 203500 |
| }, |
| { |
| "epoch": 65.80478345184227, |
| "grad_norm": 1.79212486743927, |
| "learning_rate": 0.001, |
| "loss": 1.2702, |
| "step": 203600 |
| }, |
| { |
| "epoch": 65.83710407239819, |
| "grad_norm": 1.6742587089538574, |
| "learning_rate": 0.001, |
| "loss": 1.2835, |
| "step": 203700 |
| }, |
| { |
| "epoch": 65.8694246929541, |
| "grad_norm": 2.517657995223999, |
| "learning_rate": 0.001, |
| "loss": 1.2946, |
| "step": 203800 |
| }, |
| { |
| "epoch": 65.90174531351002, |
| "grad_norm": 1.6825872659683228, |
| "learning_rate": 0.001, |
| "loss": 1.2763, |
| "step": 203900 |
| }, |
| { |
| "epoch": 65.93406593406593, |
| "grad_norm": 2.0730693340301514, |
| "learning_rate": 0.001, |
| "loss": 1.3014, |
| "step": 204000 |
| }, |
| { |
| "epoch": 65.96638655462185, |
| "grad_norm": 2.2603330612182617, |
| "learning_rate": 0.001, |
| "loss": 1.3025, |
| "step": 204100 |
| }, |
| { |
| "epoch": 65.99870717517777, |
| "grad_norm": 2.1212308406829834, |
| "learning_rate": 0.001, |
| "loss": 1.2994, |
| "step": 204200 |
| }, |
| { |
| "epoch": 66.03102779573368, |
| "grad_norm": 2.014443874359131, |
| "learning_rate": 0.001, |
| "loss": 1.1643, |
| "step": 204300 |
| }, |
| { |
| "epoch": 66.0633484162896, |
| "grad_norm": 1.8272758722305298, |
| "learning_rate": 0.001, |
| "loss": 1.1669, |
| "step": 204400 |
| }, |
| { |
| "epoch": 66.0956690368455, |
| "grad_norm": 1.8921802043914795, |
| "learning_rate": 0.001, |
| "loss": 1.1608, |
| "step": 204500 |
| }, |
| { |
| "epoch": 66.12798965740143, |
| "grad_norm": 2.260369062423706, |
| "learning_rate": 0.001, |
| "loss": 1.2049, |
| "step": 204600 |
| }, |
| { |
| "epoch": 66.16031027795734, |
| "grad_norm": 2.3378474712371826, |
| "learning_rate": 0.001, |
| "loss": 1.175, |
| "step": 204700 |
| }, |
| { |
| "epoch": 66.19263089851326, |
| "grad_norm": 1.8673335313796997, |
| "learning_rate": 0.001, |
| "loss": 1.1971, |
| "step": 204800 |
| }, |
| { |
| "epoch": 66.22495151906917, |
| "grad_norm": 2.035820960998535, |
| "learning_rate": 0.001, |
| "loss": 1.2128, |
| "step": 204900 |
| }, |
| { |
| "epoch": 66.25727213962509, |
| "grad_norm": 1.6351029872894287, |
| "learning_rate": 0.001, |
| "loss": 1.2055, |
| "step": 205000 |
| }, |
| { |
| "epoch": 66.289592760181, |
| "grad_norm": 2.6478259563446045, |
| "learning_rate": 0.001, |
| "loss": 1.1864, |
| "step": 205100 |
| }, |
| { |
| "epoch": 66.32191338073692, |
| "grad_norm": 2.325328826904297, |
| "learning_rate": 0.001, |
| "loss": 1.2158, |
| "step": 205200 |
| }, |
| { |
| "epoch": 66.35423400129282, |
| "grad_norm": 1.886999487876892, |
| "learning_rate": 0.001, |
| "loss": 1.2038, |
| "step": 205300 |
| }, |
| { |
| "epoch": 66.38655462184875, |
| "grad_norm": 2.104828119277954, |
| "learning_rate": 0.001, |
| "loss": 1.2265, |
| "step": 205400 |
| }, |
| { |
| "epoch": 66.41887524240465, |
| "grad_norm": 1.7025809288024902, |
| "learning_rate": 0.001, |
| "loss": 1.2375, |
| "step": 205500 |
| }, |
| { |
| "epoch": 66.45119586296057, |
| "grad_norm": 2.26194429397583, |
| "learning_rate": 0.001, |
| "loss": 1.2465, |
| "step": 205600 |
| }, |
| { |
| "epoch": 66.48351648351648, |
| "grad_norm": 2.2679295539855957, |
| "learning_rate": 0.001, |
| "loss": 1.2504, |
| "step": 205700 |
| }, |
| { |
| "epoch": 66.5158371040724, |
| "grad_norm": 2.020747184753418, |
| "learning_rate": 0.001, |
| "loss": 1.2355, |
| "step": 205800 |
| }, |
| { |
| "epoch": 66.54815772462831, |
| "grad_norm": 2.4566147327423096, |
| "learning_rate": 0.001, |
| "loss": 1.2323, |
| "step": 205900 |
| }, |
| { |
| "epoch": 66.58047834518423, |
| "grad_norm": 1.9596573114395142, |
| "learning_rate": 0.001, |
| "loss": 1.2406, |
| "step": 206000 |
| }, |
| { |
| "epoch": 66.61279896574014, |
| "grad_norm": 2.363355875015259, |
| "learning_rate": 0.001, |
| "loss": 1.2559, |
| "step": 206100 |
| }, |
| { |
| "epoch": 66.64511958629606, |
| "grad_norm": 2.451207160949707, |
| "learning_rate": 0.001, |
| "loss": 1.2636, |
| "step": 206200 |
| }, |
| { |
| "epoch": 66.67744020685197, |
| "grad_norm": 1.8735805749893188, |
| "learning_rate": 0.001, |
| "loss": 1.2496, |
| "step": 206300 |
| }, |
| { |
| "epoch": 66.70976082740789, |
| "grad_norm": 2.4483344554901123, |
| "learning_rate": 0.001, |
| "loss": 1.2541, |
| "step": 206400 |
| }, |
| { |
| "epoch": 66.7420814479638, |
| "grad_norm": 2.172673463821411, |
| "learning_rate": 0.001, |
| "loss": 1.2746, |
| "step": 206500 |
| }, |
| { |
| "epoch": 66.77440206851972, |
| "grad_norm": 1.755159616470337, |
| "learning_rate": 0.001, |
| "loss": 1.2705, |
| "step": 206600 |
| }, |
| { |
| "epoch": 66.80672268907563, |
| "grad_norm": 2.2386600971221924, |
| "learning_rate": 0.001, |
| "loss": 1.2813, |
| "step": 206700 |
| }, |
| { |
| "epoch": 66.83904330963155, |
| "grad_norm": 2.0152530670166016, |
| "learning_rate": 0.001, |
| "loss": 1.2673, |
| "step": 206800 |
| }, |
| { |
| "epoch": 66.87136393018746, |
| "grad_norm": 2.021780014038086, |
| "learning_rate": 0.001, |
| "loss": 1.2761, |
| "step": 206900 |
| }, |
| { |
| "epoch": 66.90368455074338, |
| "grad_norm": 1.9706413745880127, |
| "learning_rate": 0.001, |
| "loss": 1.2577, |
| "step": 207000 |
| }, |
| { |
| "epoch": 66.93600517129929, |
| "grad_norm": 1.9766478538513184, |
| "learning_rate": 0.001, |
| "loss": 1.2831, |
| "step": 207100 |
| }, |
| { |
| "epoch": 66.96832579185521, |
| "grad_norm": 2.4492130279541016, |
| "learning_rate": 0.001, |
| "loss": 1.2767, |
| "step": 207200 |
| }, |
| { |
| "epoch": 67.00064641241111, |
| "grad_norm": 3.763784885406494, |
| "learning_rate": 0.001, |
| "loss": 1.2547, |
| "step": 207300 |
| }, |
| { |
| "epoch": 67.03296703296704, |
| "grad_norm": 1.9353259801864624, |
| "learning_rate": 0.001, |
| "loss": 1.1491, |
| "step": 207400 |
| }, |
| { |
| "epoch": 67.06528765352294, |
| "grad_norm": 2.1818177700042725, |
| "learning_rate": 0.001, |
| "loss": 1.1611, |
| "step": 207500 |
| }, |
| { |
| "epoch": 67.09760827407887, |
| "grad_norm": 2.305077314376831, |
| "learning_rate": 0.001, |
| "loss": 1.166, |
| "step": 207600 |
| }, |
| { |
| "epoch": 67.12992889463477, |
| "grad_norm": 3.8952369689941406, |
| "learning_rate": 0.001, |
| "loss": 1.1686, |
| "step": 207700 |
| }, |
| { |
| "epoch": 67.1622495151907, |
| "grad_norm": 2.74645733833313, |
| "learning_rate": 0.001, |
| "loss": 1.1822, |
| "step": 207800 |
| }, |
| { |
| "epoch": 67.1945701357466, |
| "grad_norm": 2.66611909866333, |
| "learning_rate": 0.001, |
| "loss": 1.1769, |
| "step": 207900 |
| }, |
| { |
| "epoch": 67.22689075630252, |
| "grad_norm": 4.485386848449707, |
| "learning_rate": 0.001, |
| "loss": 1.1983, |
| "step": 208000 |
| }, |
| { |
| "epoch": 67.25921137685843, |
| "grad_norm": 2.7096071243286133, |
| "learning_rate": 0.001, |
| "loss": 1.1863, |
| "step": 208100 |
| }, |
| { |
| "epoch": 67.29153199741435, |
| "grad_norm": 2.0292038917541504, |
| "learning_rate": 0.001, |
| "loss": 1.1882, |
| "step": 208200 |
| }, |
| { |
| "epoch": 67.32385261797026, |
| "grad_norm": 2.7626845836639404, |
| "learning_rate": 0.001, |
| "loss": 1.2086, |
| "step": 208300 |
| }, |
| { |
| "epoch": 67.35617323852618, |
| "grad_norm": 2.0532233715057373, |
| "learning_rate": 0.001, |
| "loss": 1.2166, |
| "step": 208400 |
| }, |
| { |
| "epoch": 67.38849385908209, |
| "grad_norm": 1.7796385288238525, |
| "learning_rate": 0.001, |
| "loss": 1.2025, |
| "step": 208500 |
| }, |
| { |
| "epoch": 67.42081447963801, |
| "grad_norm": 2.683412790298462, |
| "learning_rate": 0.001, |
| "loss": 1.2139, |
| "step": 208600 |
| }, |
| { |
| "epoch": 67.45313510019392, |
| "grad_norm": 2.9881527423858643, |
| "learning_rate": 0.001, |
| "loss": 1.2179, |
| "step": 208700 |
| }, |
| { |
| "epoch": 67.48545572074984, |
| "grad_norm": 2.409613609313965, |
| "learning_rate": 0.001, |
| "loss": 1.2216, |
| "step": 208800 |
| }, |
| { |
| "epoch": 67.51777634130575, |
| "grad_norm": 2.4920945167541504, |
| "learning_rate": 0.001, |
| "loss": 1.2171, |
| "step": 208900 |
| }, |
| { |
| "epoch": 67.55009696186167, |
| "grad_norm": 2.262345790863037, |
| "learning_rate": 0.001, |
| "loss": 1.2262, |
| "step": 209000 |
| }, |
| { |
| "epoch": 67.58241758241758, |
| "grad_norm": 2.0987603664398193, |
| "learning_rate": 0.001, |
| "loss": 1.243, |
| "step": 209100 |
| }, |
| { |
| "epoch": 67.6147382029735, |
| "grad_norm": 2.0718164443969727, |
| "learning_rate": 0.001, |
| "loss": 1.2397, |
| "step": 209200 |
| }, |
| { |
| "epoch": 67.6470588235294, |
| "grad_norm": 3.155087947845459, |
| "learning_rate": 0.001, |
| "loss": 1.2412, |
| "step": 209300 |
| }, |
| { |
| "epoch": 67.67937944408533, |
| "grad_norm": 2.312764883041382, |
| "learning_rate": 0.001, |
| "loss": 1.2456, |
| "step": 209400 |
| }, |
| { |
| "epoch": 67.71170006464124, |
| "grad_norm": 2.4429471492767334, |
| "learning_rate": 0.001, |
| "loss": 1.2403, |
| "step": 209500 |
| }, |
| { |
| "epoch": 67.74402068519716, |
| "grad_norm": 2.445016384124756, |
| "learning_rate": 0.001, |
| "loss": 1.2438, |
| "step": 209600 |
| }, |
| { |
| "epoch": 67.77634130575306, |
| "grad_norm": 2.587137460708618, |
| "learning_rate": 0.001, |
| "loss": 1.269, |
| "step": 209700 |
| }, |
| { |
| "epoch": 67.80866192630899, |
| "grad_norm": 3.0269641876220703, |
| "learning_rate": 0.001, |
| "loss": 1.2673, |
| "step": 209800 |
| }, |
| { |
| "epoch": 67.8409825468649, |
| "grad_norm": 3.1837799549102783, |
| "learning_rate": 0.001, |
| "loss": 1.2538, |
| "step": 209900 |
| }, |
| { |
| "epoch": 67.87330316742081, |
| "grad_norm": 3.0893819332122803, |
| "learning_rate": 0.001, |
| "loss": 1.2495, |
| "step": 210000 |
| }, |
| { |
| "epoch": 67.90562378797672, |
| "grad_norm": 2.351608991622925, |
| "learning_rate": 0.001, |
| "loss": 1.2596, |
| "step": 210100 |
| }, |
| { |
| "epoch": 67.93794440853264, |
| "grad_norm": 2.225374221801758, |
| "learning_rate": 0.001, |
| "loss": 1.2566, |
| "step": 210200 |
| }, |
| { |
| "epoch": 67.97026502908855, |
| "grad_norm": 2.7132184505462646, |
| "learning_rate": 0.001, |
| "loss": 1.2556, |
| "step": 210300 |
| }, |
| { |
| "epoch": 68.00258564964447, |
| "grad_norm": 1.5622951984405518, |
| "learning_rate": 0.001, |
| "loss": 1.2869, |
| "step": 210400 |
| }, |
| { |
| "epoch": 68.0349062702004, |
| "grad_norm": 2.333732843399048, |
| "learning_rate": 0.001, |
| "loss": 1.1528, |
| "step": 210500 |
| }, |
| { |
| "epoch": 68.0672268907563, |
| "grad_norm": 1.970744252204895, |
| "learning_rate": 0.001, |
| "loss": 1.1497, |
| "step": 210600 |
| }, |
| { |
| "epoch": 68.09954751131222, |
| "grad_norm": 2.23043155670166, |
| "learning_rate": 0.001, |
| "loss": 1.1657, |
| "step": 210700 |
| }, |
| { |
| "epoch": 68.13186813186813, |
| "grad_norm": 1.8723633289337158, |
| "learning_rate": 0.001, |
| "loss": 1.1477, |
| "step": 210800 |
| }, |
| { |
| "epoch": 68.16418875242405, |
| "grad_norm": 2.50980544090271, |
| "learning_rate": 0.001, |
| "loss": 1.1604, |
| "step": 210900 |
| }, |
| { |
| "epoch": 68.19650937297996, |
| "grad_norm": 2.1375374794006348, |
| "learning_rate": 0.001, |
| "loss": 1.1707, |
| "step": 211000 |
| }, |
| { |
| "epoch": 68.22882999353588, |
| "grad_norm": 1.930857539176941, |
| "learning_rate": 0.001, |
| "loss": 1.1758, |
| "step": 211100 |
| }, |
| { |
| "epoch": 68.26115061409179, |
| "grad_norm": 2.3334290981292725, |
| "learning_rate": 0.001, |
| "loss": 1.1827, |
| "step": 211200 |
| }, |
| { |
| "epoch": 68.29347123464771, |
| "grad_norm": 2.729995012283325, |
| "learning_rate": 0.001, |
| "loss": 1.1833, |
| "step": 211300 |
| }, |
| { |
| "epoch": 68.32579185520362, |
| "grad_norm": 3.255042552947998, |
| "learning_rate": 0.001, |
| "loss": 1.1921, |
| "step": 211400 |
| }, |
| { |
| "epoch": 68.35811247575954, |
| "grad_norm": 1.533887505531311, |
| "learning_rate": 0.001, |
| "loss": 1.1909, |
| "step": 211500 |
| }, |
| { |
| "epoch": 68.39043309631545, |
| "grad_norm": 1.8926416635513306, |
| "learning_rate": 0.001, |
| "loss": 1.2026, |
| "step": 211600 |
| }, |
| { |
| "epoch": 68.42275371687137, |
| "grad_norm": 2.0479161739349365, |
| "learning_rate": 0.001, |
| "loss": 1.2002, |
| "step": 211700 |
| }, |
| { |
| "epoch": 68.45507433742728, |
| "grad_norm": 1.674736499786377, |
| "learning_rate": 0.001, |
| "loss": 1.2203, |
| "step": 211800 |
| }, |
| { |
| "epoch": 68.4873949579832, |
| "grad_norm": 2.498920440673828, |
| "learning_rate": 0.001, |
| "loss": 1.2082, |
| "step": 211900 |
| }, |
| { |
| "epoch": 68.5197155785391, |
| "grad_norm": 2.435779571533203, |
| "learning_rate": 0.001, |
| "loss": 1.226, |
| "step": 212000 |
| }, |
| { |
| "epoch": 68.55203619909503, |
| "grad_norm": 1.8328531980514526, |
| "learning_rate": 0.001, |
| "loss": 1.2292, |
| "step": 212100 |
| }, |
| { |
| "epoch": 68.58435681965094, |
| "grad_norm": 2.3320796489715576, |
| "learning_rate": 0.001, |
| "loss": 1.2184, |
| "step": 212200 |
| }, |
| { |
| "epoch": 68.61667744020686, |
| "grad_norm": 1.9430440664291382, |
| "learning_rate": 0.001, |
| "loss": 1.2313, |
| "step": 212300 |
| }, |
| { |
| "epoch": 68.64899806076276, |
| "grad_norm": 2.1987974643707275, |
| "learning_rate": 0.001, |
| "loss": 1.2309, |
| "step": 212400 |
| }, |
| { |
| "epoch": 68.68131868131869, |
| "grad_norm": 1.669899344444275, |
| "learning_rate": 0.001, |
| "loss": 1.2107, |
| "step": 212500 |
| }, |
| { |
| "epoch": 68.7136393018746, |
| "grad_norm": 2.1277084350585938, |
| "learning_rate": 0.001, |
| "loss": 1.2357, |
| "step": 212600 |
| }, |
| { |
| "epoch": 68.74595992243052, |
| "grad_norm": 1.9171146154403687, |
| "learning_rate": 0.001, |
| "loss": 1.2358, |
| "step": 212700 |
| }, |
| { |
| "epoch": 68.77828054298642, |
| "grad_norm": 1.9358433485031128, |
| "learning_rate": 0.001, |
| "loss": 1.2294, |
| "step": 212800 |
| }, |
| { |
| "epoch": 68.81060116354234, |
| "grad_norm": 1.9799690246582031, |
| "learning_rate": 0.001, |
| "loss": 1.242, |
| "step": 212900 |
| }, |
| { |
| "epoch": 68.84292178409825, |
| "grad_norm": 1.7972420454025269, |
| "learning_rate": 0.001, |
| "loss": 1.2474, |
| "step": 213000 |
| }, |
| { |
| "epoch": 68.87524240465417, |
| "grad_norm": 1.9665274620056152, |
| "learning_rate": 0.001, |
| "loss": 1.2567, |
| "step": 213100 |
| }, |
| { |
| "epoch": 68.90756302521008, |
| "grad_norm": 2.131694793701172, |
| "learning_rate": 0.001, |
| "loss": 1.2475, |
| "step": 213200 |
| }, |
| { |
| "epoch": 68.939883645766, |
| "grad_norm": 1.7594505548477173, |
| "learning_rate": 0.001, |
| "loss": 1.2593, |
| "step": 213300 |
| }, |
| { |
| "epoch": 68.97220426632191, |
| "grad_norm": 5.345921993255615, |
| "learning_rate": 0.001, |
| "loss": 1.2423, |
| "step": 213400 |
| }, |
| { |
| "epoch": 69.00452488687783, |
| "grad_norm": 1.549072504043579, |
| "learning_rate": 0.001, |
| "loss": 1.2657, |
| "step": 213500 |
| }, |
| { |
| "epoch": 69.03684550743374, |
| "grad_norm": 1.8662244081497192, |
| "learning_rate": 0.001, |
| "loss": 1.1174, |
| "step": 213600 |
| }, |
| { |
| "epoch": 69.06916612798966, |
| "grad_norm": 1.5122578144073486, |
| "learning_rate": 0.001, |
| "loss": 1.1488, |
| "step": 213700 |
| }, |
| { |
| "epoch": 69.10148674854557, |
| "grad_norm": 2.3468809127807617, |
| "learning_rate": 0.001, |
| "loss": 1.1498, |
| "step": 213800 |
| }, |
| { |
| "epoch": 69.13380736910149, |
| "grad_norm": 1.8483396768569946, |
| "learning_rate": 0.001, |
| "loss": 1.1517, |
| "step": 213900 |
| }, |
| { |
| "epoch": 69.1661279896574, |
| "grad_norm": 1.6163078546524048, |
| "learning_rate": 0.001, |
| "loss": 1.1765, |
| "step": 214000 |
| }, |
| { |
| "epoch": 69.19844861021332, |
| "grad_norm": 1.5859019756317139, |
| "learning_rate": 0.001, |
| "loss": 1.1647, |
| "step": 214100 |
| }, |
| { |
| "epoch": 69.23076923076923, |
| "grad_norm": 1.5599806308746338, |
| "learning_rate": 0.001, |
| "loss": 1.1526, |
| "step": 214200 |
| }, |
| { |
| "epoch": 69.26308985132515, |
| "grad_norm": 2.253723382949829, |
| "learning_rate": 0.001, |
| "loss": 1.1671, |
| "step": 214300 |
| }, |
| { |
| "epoch": 69.29541047188106, |
| "grad_norm": 1.7934225797653198, |
| "learning_rate": 0.001, |
| "loss": 1.1669, |
| "step": 214400 |
| }, |
| { |
| "epoch": 69.32773109243698, |
| "grad_norm": 1.9382452964782715, |
| "learning_rate": 0.001, |
| "loss": 1.165, |
| "step": 214500 |
| }, |
| { |
| "epoch": 69.36005171299288, |
| "grad_norm": 2.22196626663208, |
| "learning_rate": 0.001, |
| "loss": 1.1777, |
| "step": 214600 |
| }, |
| { |
| "epoch": 69.3923723335488, |
| "grad_norm": 1.5601698160171509, |
| "learning_rate": 0.001, |
| "loss": 1.1841, |
| "step": 214700 |
| }, |
| { |
| "epoch": 69.42469295410471, |
| "grad_norm": 2.0012974739074707, |
| "learning_rate": 0.001, |
| "loss": 1.1786, |
| "step": 214800 |
| }, |
| { |
| "epoch": 69.45701357466064, |
| "grad_norm": 1.9047846794128418, |
| "learning_rate": 0.001, |
| "loss": 1.2062, |
| "step": 214900 |
| }, |
| { |
| "epoch": 69.48933419521654, |
| "grad_norm": 1.562074899673462, |
| "learning_rate": 0.001, |
| "loss": 1.2138, |
| "step": 215000 |
| }, |
| { |
| "epoch": 69.52165481577246, |
| "grad_norm": 1.7316213846206665, |
| "learning_rate": 0.001, |
| "loss": 1.2113, |
| "step": 215100 |
| }, |
| { |
| "epoch": 69.55397543632837, |
| "grad_norm": 1.642343282699585, |
| "learning_rate": 0.001, |
| "loss": 1.2102, |
| "step": 215200 |
| }, |
| { |
| "epoch": 69.5862960568843, |
| "grad_norm": 1.579736590385437, |
| "learning_rate": 0.001, |
| "loss": 1.2164, |
| "step": 215300 |
| }, |
| { |
| "epoch": 69.6186166774402, |
| "grad_norm": 1.445095181465149, |
| "learning_rate": 0.001, |
| "loss": 1.2076, |
| "step": 215400 |
| }, |
| { |
| "epoch": 69.65093729799612, |
| "grad_norm": 1.5851157903671265, |
| "learning_rate": 0.001, |
| "loss": 1.2212, |
| "step": 215500 |
| }, |
| { |
| "epoch": 69.68325791855203, |
| "grad_norm": 1.7522008419036865, |
| "learning_rate": 0.001, |
| "loss": 1.2242, |
| "step": 215600 |
| }, |
| { |
| "epoch": 69.71557853910795, |
| "grad_norm": 1.8869768381118774, |
| "learning_rate": 0.001, |
| "loss": 1.2357, |
| "step": 215700 |
| }, |
| { |
| "epoch": 69.74789915966386, |
| "grad_norm": 1.9056206941604614, |
| "learning_rate": 0.001, |
| "loss": 1.2266, |
| "step": 215800 |
| }, |
| { |
| "epoch": 69.78021978021978, |
| "grad_norm": 1.9747616052627563, |
| "learning_rate": 0.001, |
| "loss": 1.2128, |
| "step": 215900 |
| }, |
| { |
| "epoch": 69.81254040077569, |
| "grad_norm": 1.853060245513916, |
| "learning_rate": 0.001, |
| "loss": 1.2285, |
| "step": 216000 |
| }, |
| { |
| "epoch": 69.84486102133161, |
| "grad_norm": 1.8886538743972778, |
| "learning_rate": 0.001, |
| "loss": 1.2295, |
| "step": 216100 |
| }, |
| { |
| "epoch": 69.87718164188752, |
| "grad_norm": 2.0137014389038086, |
| "learning_rate": 0.001, |
| "loss": 1.2251, |
| "step": 216200 |
| }, |
| { |
| "epoch": 69.90950226244344, |
| "grad_norm": 1.9366350173950195, |
| "learning_rate": 0.001, |
| "loss": 1.2532, |
| "step": 216300 |
| }, |
| { |
| "epoch": 69.94182288299935, |
| "grad_norm": 2.439671277999878, |
| "learning_rate": 0.001, |
| "loss": 1.2342, |
| "step": 216400 |
| }, |
| { |
| "epoch": 69.97414350355527, |
| "grad_norm": 1.625091552734375, |
| "learning_rate": 0.001, |
| "loss": 1.2523, |
| "step": 216500 |
| }, |
| { |
| "epoch": 70.00646412411119, |
| "grad_norm": 1.5262469053268433, |
| "learning_rate": 0.001, |
| "loss": 1.2416, |
| "step": 216600 |
| }, |
| { |
| "epoch": 70.0387847446671, |
| "grad_norm": 1.6262037754058838, |
| "learning_rate": 0.001, |
| "loss": 1.1363, |
| "step": 216700 |
| }, |
| { |
| "epoch": 70.07110536522302, |
| "grad_norm": 1.6918765306472778, |
| "learning_rate": 0.001, |
| "loss": 1.1288, |
| "step": 216800 |
| }, |
| { |
| "epoch": 70.10342598577893, |
| "grad_norm": 1.7063210010528564, |
| "learning_rate": 0.001, |
| "loss": 1.1313, |
| "step": 216900 |
| }, |
| { |
| "epoch": 70.13574660633485, |
| "grad_norm": 2.047490358352661, |
| "learning_rate": 0.001, |
| "loss": 1.1365, |
| "step": 217000 |
| }, |
| { |
| "epoch": 70.16806722689076, |
| "grad_norm": 1.7240195274353027, |
| "learning_rate": 0.001, |
| "loss": 1.1556, |
| "step": 217100 |
| }, |
| { |
| "epoch": 70.20038784744668, |
| "grad_norm": 2.204714775085449, |
| "learning_rate": 0.001, |
| "loss": 1.1435, |
| "step": 217200 |
| }, |
| { |
| "epoch": 70.23270846800258, |
| "grad_norm": 1.780590295791626, |
| "learning_rate": 0.001, |
| "loss": 1.1467, |
| "step": 217300 |
| }, |
| { |
| "epoch": 70.2650290885585, |
| "grad_norm": 1.6181354522705078, |
| "learning_rate": 0.001, |
| "loss": 1.1562, |
| "step": 217400 |
| }, |
| { |
| "epoch": 70.29734970911441, |
| "grad_norm": 1.7674238681793213, |
| "learning_rate": 0.001, |
| "loss": 1.1726, |
| "step": 217500 |
| }, |
| { |
| "epoch": 70.32967032967034, |
| "grad_norm": 1.9929686784744263, |
| "learning_rate": 0.001, |
| "loss": 1.1684, |
| "step": 217600 |
| }, |
| { |
| "epoch": 70.36199095022624, |
| "grad_norm": 1.562532901763916, |
| "learning_rate": 0.001, |
| "loss": 1.1832, |
| "step": 217700 |
| }, |
| { |
| "epoch": 70.39431157078216, |
| "grad_norm": 1.6133959293365479, |
| "learning_rate": 0.001, |
| "loss": 1.174, |
| "step": 217800 |
| }, |
| { |
| "epoch": 70.42663219133807, |
| "grad_norm": 1.9063694477081299, |
| "learning_rate": 0.001, |
| "loss": 1.1721, |
| "step": 217900 |
| }, |
| { |
| "epoch": 70.458952811894, |
| "grad_norm": 1.6396887302398682, |
| "learning_rate": 0.001, |
| "loss": 1.1818, |
| "step": 218000 |
| }, |
| { |
| "epoch": 70.4912734324499, |
| "grad_norm": 5.434491157531738, |
| "learning_rate": 0.001, |
| "loss": 1.1929, |
| "step": 218100 |
| }, |
| { |
| "epoch": 70.52359405300582, |
| "grad_norm": 2.1285393238067627, |
| "learning_rate": 0.001, |
| "loss": 1.2068, |
| "step": 218200 |
| }, |
| { |
| "epoch": 70.55591467356173, |
| "grad_norm": 2.146338701248169, |
| "learning_rate": 0.001, |
| "loss": 1.1927, |
| "step": 218300 |
| }, |
| { |
| "epoch": 70.58823529411765, |
| "grad_norm": 1.5270682573318481, |
| "learning_rate": 0.001, |
| "loss": 1.196, |
| "step": 218400 |
| }, |
| { |
| "epoch": 70.62055591467356, |
| "grad_norm": 1.8791449069976807, |
| "learning_rate": 0.001, |
| "loss": 1.2008, |
| "step": 218500 |
| }, |
| { |
| "epoch": 70.65287653522948, |
| "grad_norm": 1.5155069828033447, |
| "learning_rate": 0.001, |
| "loss": 1.202, |
| "step": 218600 |
| }, |
| { |
| "epoch": 70.68519715578539, |
| "grad_norm": 1.610196828842163, |
| "learning_rate": 0.001, |
| "loss": 1.2087, |
| "step": 218700 |
| }, |
| { |
| "epoch": 70.71751777634131, |
| "grad_norm": 1.5798680782318115, |
| "learning_rate": 0.001, |
| "loss": 1.1986, |
| "step": 218800 |
| }, |
| { |
| "epoch": 70.74983839689722, |
| "grad_norm": 1.494688868522644, |
| "learning_rate": 0.001, |
| "loss": 1.2055, |
| "step": 218900 |
| }, |
| { |
| "epoch": 70.78215901745314, |
| "grad_norm": 1.8557690382003784, |
| "learning_rate": 0.001, |
| "loss": 1.2105, |
| "step": 219000 |
| }, |
| { |
| "epoch": 70.81447963800905, |
| "grad_norm": 1.6135377883911133, |
| "learning_rate": 0.001, |
| "loss": 1.2151, |
| "step": 219100 |
| }, |
| { |
| "epoch": 70.84680025856497, |
| "grad_norm": 1.7288098335266113, |
| "learning_rate": 0.001, |
| "loss": 1.2289, |
| "step": 219200 |
| }, |
| { |
| "epoch": 70.87912087912088, |
| "grad_norm": 1.838810920715332, |
| "learning_rate": 0.001, |
| "loss": 1.2268, |
| "step": 219300 |
| }, |
| { |
| "epoch": 70.9114414996768, |
| "grad_norm": 2.151858329772949, |
| "learning_rate": 0.001, |
| "loss": 1.2403, |
| "step": 219400 |
| }, |
| { |
| "epoch": 70.9437621202327, |
| "grad_norm": 1.791812539100647, |
| "learning_rate": 0.001, |
| "loss": 1.2395, |
| "step": 219500 |
| }, |
| { |
| "epoch": 70.97608274078863, |
| "grad_norm": 1.4549312591552734, |
| "learning_rate": 0.001, |
| "loss": 1.2375, |
| "step": 219600 |
| }, |
| { |
| "epoch": 71.00840336134453, |
| "grad_norm": 1.5893354415893555, |
| "learning_rate": 0.001, |
| "loss": 1.1986, |
| "step": 219700 |
| }, |
| { |
| "epoch": 71.04072398190046, |
| "grad_norm": 1.598484992980957, |
| "learning_rate": 0.001, |
| "loss": 1.1016, |
| "step": 219800 |
| }, |
| { |
| "epoch": 71.07304460245636, |
| "grad_norm": 1.74235200881958, |
| "learning_rate": 0.001, |
| "loss": 1.1297, |
| "step": 219900 |
| }, |
| { |
| "epoch": 71.10536522301229, |
| "grad_norm": 2.0175647735595703, |
| "learning_rate": 0.001, |
| "loss": 1.1254, |
| "step": 220000 |
| }, |
| { |
| "epoch": 71.13768584356819, |
| "grad_norm": 1.877590537071228, |
| "learning_rate": 0.001, |
| "loss": 1.1301, |
| "step": 220100 |
| }, |
| { |
| "epoch": 71.17000646412411, |
| "grad_norm": 1.6253721714019775, |
| "learning_rate": 0.001, |
| "loss": 1.127, |
| "step": 220200 |
| }, |
| { |
| "epoch": 71.20232708468002, |
| "grad_norm": 1.6335793733596802, |
| "learning_rate": 0.001, |
| "loss": 1.1491, |
| "step": 220300 |
| }, |
| { |
| "epoch": 71.23464770523594, |
| "grad_norm": 1.8717635869979858, |
| "learning_rate": 0.001, |
| "loss": 1.129, |
| "step": 220400 |
| }, |
| { |
| "epoch": 71.26696832579185, |
| "grad_norm": 1.9798500537872314, |
| "learning_rate": 0.001, |
| "loss": 1.1498, |
| "step": 220500 |
| }, |
| { |
| "epoch": 71.29928894634777, |
| "grad_norm": 1.6062580347061157, |
| "learning_rate": 0.001, |
| "loss": 1.149, |
| "step": 220600 |
| }, |
| { |
| "epoch": 71.33160956690368, |
| "grad_norm": 1.7187769412994385, |
| "learning_rate": 0.001, |
| "loss": 1.1589, |
| "step": 220700 |
| }, |
| { |
| "epoch": 71.3639301874596, |
| "grad_norm": 1.6825973987579346, |
| "learning_rate": 0.001, |
| "loss": 1.1705, |
| "step": 220800 |
| }, |
| { |
| "epoch": 71.39625080801551, |
| "grad_norm": 1.5847103595733643, |
| "learning_rate": 0.001, |
| "loss": 1.1657, |
| "step": 220900 |
| }, |
| { |
| "epoch": 71.42857142857143, |
| "grad_norm": 1.477260947227478, |
| "learning_rate": 0.001, |
| "loss": 1.1714, |
| "step": 221000 |
| }, |
| { |
| "epoch": 71.46089204912734, |
| "grad_norm": 2.0396647453308105, |
| "learning_rate": 0.001, |
| "loss": 1.1745, |
| "step": 221100 |
| }, |
| { |
| "epoch": 71.49321266968326, |
| "grad_norm": 1.5809693336486816, |
| "learning_rate": 0.001, |
| "loss": 1.169, |
| "step": 221200 |
| }, |
| { |
| "epoch": 71.52553329023917, |
| "grad_norm": 1.5389111042022705, |
| "learning_rate": 0.001, |
| "loss": 1.184, |
| "step": 221300 |
| }, |
| { |
| "epoch": 71.55785391079509, |
| "grad_norm": 1.5018457174301147, |
| "learning_rate": 0.001, |
| "loss": 1.1905, |
| "step": 221400 |
| }, |
| { |
| "epoch": 71.590174531351, |
| "grad_norm": 1.8730908632278442, |
| "learning_rate": 0.001, |
| "loss": 1.1808, |
| "step": 221500 |
| }, |
| { |
| "epoch": 71.62249515190692, |
| "grad_norm": 1.9678943157196045, |
| "learning_rate": 0.001, |
| "loss": 1.1696, |
| "step": 221600 |
| }, |
| { |
| "epoch": 71.65481577246283, |
| "grad_norm": 1.6994378566741943, |
| "learning_rate": 0.001, |
| "loss": 1.1976, |
| "step": 221700 |
| }, |
| { |
| "epoch": 71.68713639301875, |
| "grad_norm": 1.820391058921814, |
| "learning_rate": 0.001, |
| "loss": 1.2076, |
| "step": 221800 |
| }, |
| { |
| "epoch": 71.71945701357465, |
| "grad_norm": 1.6418462991714478, |
| "learning_rate": 0.001, |
| "loss": 1.1973, |
| "step": 221900 |
| }, |
| { |
| "epoch": 71.75177763413058, |
| "grad_norm": 1.805640459060669, |
| "learning_rate": 0.001, |
| "loss": 1.2126, |
| "step": 222000 |
| }, |
| { |
| "epoch": 71.78409825468648, |
| "grad_norm": 1.5427037477493286, |
| "learning_rate": 0.001, |
| "loss": 1.2058, |
| "step": 222100 |
| }, |
| { |
| "epoch": 71.8164188752424, |
| "grad_norm": 1.6487582921981812, |
| "learning_rate": 0.001, |
| "loss": 1.2155, |
| "step": 222200 |
| }, |
| { |
| "epoch": 71.84873949579831, |
| "grad_norm": 2.0085551738739014, |
| "learning_rate": 0.001, |
| "loss": 1.2167, |
| "step": 222300 |
| }, |
| { |
| "epoch": 71.88106011635423, |
| "grad_norm": 1.4483178853988647, |
| "learning_rate": 0.001, |
| "loss": 1.2155, |
| "step": 222400 |
| }, |
| { |
| "epoch": 71.91338073691014, |
| "grad_norm": 1.6381824016571045, |
| "learning_rate": 0.001, |
| "loss": 1.2192, |
| "step": 222500 |
| }, |
| { |
| "epoch": 71.94570135746606, |
| "grad_norm": 2.2828452587127686, |
| "learning_rate": 0.001, |
| "loss": 1.227, |
| "step": 222600 |
| }, |
| { |
| "epoch": 71.97802197802197, |
| "grad_norm": 2.1275863647460938, |
| "learning_rate": 0.001, |
| "loss": 1.2355, |
| "step": 222700 |
| }, |
| { |
| "epoch": 72.01034259857789, |
| "grad_norm": 1.8210406303405762, |
| "learning_rate": 0.001, |
| "loss": 1.1774, |
| "step": 222800 |
| }, |
| { |
| "epoch": 72.04266321913381, |
| "grad_norm": 1.9121840000152588, |
| "learning_rate": 0.001, |
| "loss": 1.102, |
| "step": 222900 |
| }, |
| { |
| "epoch": 72.07498383968972, |
| "grad_norm": 1.948397159576416, |
| "learning_rate": 0.001, |
| "loss": 1.1163, |
| "step": 223000 |
| }, |
| { |
| "epoch": 72.10730446024564, |
| "grad_norm": 2.113853693008423, |
| "learning_rate": 0.001, |
| "loss": 1.1221, |
| "step": 223100 |
| }, |
| { |
| "epoch": 72.13962508080155, |
| "grad_norm": 1.9064280986785889, |
| "learning_rate": 0.001, |
| "loss": 1.1297, |
| "step": 223200 |
| }, |
| { |
| "epoch": 72.17194570135747, |
| "grad_norm": 1.8943758010864258, |
| "learning_rate": 0.001, |
| "loss": 1.1214, |
| "step": 223300 |
| }, |
| { |
| "epoch": 72.20426632191338, |
| "grad_norm": 2.0406997203826904, |
| "learning_rate": 0.001, |
| "loss": 1.134, |
| "step": 223400 |
| }, |
| { |
| "epoch": 72.2365869424693, |
| "grad_norm": 1.837704062461853, |
| "learning_rate": 0.001, |
| "loss": 1.1316, |
| "step": 223500 |
| }, |
| { |
| "epoch": 72.26890756302521, |
| "grad_norm": 1.7503560781478882, |
| "learning_rate": 0.001, |
| "loss": 1.1263, |
| "step": 223600 |
| }, |
| { |
| "epoch": 72.30122818358113, |
| "grad_norm": 1.602188229560852, |
| "learning_rate": 0.001, |
| "loss": 1.1543, |
| "step": 223700 |
| }, |
| { |
| "epoch": 72.33354880413704, |
| "grad_norm": 1.8570600748062134, |
| "learning_rate": 0.001, |
| "loss": 1.159, |
| "step": 223800 |
| }, |
| { |
| "epoch": 72.36586942469296, |
| "grad_norm": 1.7517465353012085, |
| "learning_rate": 0.001, |
| "loss": 1.1539, |
| "step": 223900 |
| }, |
| { |
| "epoch": 72.39819004524887, |
| "grad_norm": 1.8670244216918945, |
| "learning_rate": 0.001, |
| "loss": 1.1495, |
| "step": 224000 |
| }, |
| { |
| "epoch": 72.43051066580479, |
| "grad_norm": 1.831101655960083, |
| "learning_rate": 0.001, |
| "loss": 1.1613, |
| "step": 224100 |
| }, |
| { |
| "epoch": 72.4628312863607, |
| "grad_norm": 1.6500862836837769, |
| "learning_rate": 0.001, |
| "loss": 1.1413, |
| "step": 224200 |
| }, |
| { |
| "epoch": 72.49515190691662, |
| "grad_norm": 1.9786334037780762, |
| "learning_rate": 0.001, |
| "loss": 1.1655, |
| "step": 224300 |
| }, |
| { |
| "epoch": 72.52747252747253, |
| "grad_norm": 1.7236011028289795, |
| "learning_rate": 0.001, |
| "loss": 1.1654, |
| "step": 224400 |
| }, |
| { |
| "epoch": 72.55979314802845, |
| "grad_norm": 2.018129587173462, |
| "learning_rate": 0.001, |
| "loss": 1.1609, |
| "step": 224500 |
| }, |
| { |
| "epoch": 72.59211376858435, |
| "grad_norm": 1.9249852895736694, |
| "learning_rate": 0.001, |
| "loss": 1.1889, |
| "step": 224600 |
| }, |
| { |
| "epoch": 72.62443438914028, |
| "grad_norm": 1.736116886138916, |
| "learning_rate": 0.001, |
| "loss": 1.1828, |
| "step": 224700 |
| }, |
| { |
| "epoch": 72.65675500969618, |
| "grad_norm": 1.6699976921081543, |
| "learning_rate": 0.001, |
| "loss": 1.173, |
| "step": 224800 |
| }, |
| { |
| "epoch": 72.6890756302521, |
| "grad_norm": 2.20131778717041, |
| "learning_rate": 0.001, |
| "loss": 1.1921, |
| "step": 224900 |
| }, |
| { |
| "epoch": 72.72139625080801, |
| "grad_norm": 1.9852443933486938, |
| "learning_rate": 0.001, |
| "loss": 1.1904, |
| "step": 225000 |
| }, |
| { |
| "epoch": 72.75371687136393, |
| "grad_norm": 1.9196783304214478, |
| "learning_rate": 0.001, |
| "loss": 1.1867, |
| "step": 225100 |
| }, |
| { |
| "epoch": 72.78603749191984, |
| "grad_norm": 1.7682510614395142, |
| "learning_rate": 0.001, |
| "loss": 1.193, |
| "step": 225200 |
| }, |
| { |
| "epoch": 72.81835811247576, |
| "grad_norm": 2.267354726791382, |
| "learning_rate": 0.001, |
| "loss": 1.1856, |
| "step": 225300 |
| }, |
| { |
| "epoch": 72.85067873303167, |
| "grad_norm": 1.7886394262313843, |
| "learning_rate": 0.001, |
| "loss": 1.2008, |
| "step": 225400 |
| }, |
| { |
| "epoch": 72.88299935358759, |
| "grad_norm": 1.6785390377044678, |
| "learning_rate": 0.001, |
| "loss": 1.2051, |
| "step": 225500 |
| }, |
| { |
| "epoch": 72.9153199741435, |
| "grad_norm": 1.8354374170303345, |
| "learning_rate": 0.001, |
| "loss": 1.1972, |
| "step": 225600 |
| }, |
| { |
| "epoch": 72.94764059469942, |
| "grad_norm": 1.6264238357543945, |
| "learning_rate": 0.001, |
| "loss": 1.2162, |
| "step": 225700 |
| }, |
| { |
| "epoch": 72.97996121525533, |
| "grad_norm": 2.1394410133361816, |
| "learning_rate": 0.001, |
| "loss": 1.223, |
| "step": 225800 |
| }, |
| { |
| "epoch": 73.01228183581125, |
| "grad_norm": 1.795905351638794, |
| "learning_rate": 0.001, |
| "loss": 1.1582, |
| "step": 225900 |
| }, |
| { |
| "epoch": 73.04460245636716, |
| "grad_norm": 2.1484296321868896, |
| "learning_rate": 0.001, |
| "loss": 1.0819, |
| "step": 226000 |
| }, |
| { |
| "epoch": 73.07692307692308, |
| "grad_norm": 1.72504460811615, |
| "learning_rate": 0.001, |
| "loss": 1.0955, |
| "step": 226100 |
| }, |
| { |
| "epoch": 73.10924369747899, |
| "grad_norm": 1.5780655145645142, |
| "learning_rate": 0.001, |
| "loss": 1.0913, |
| "step": 226200 |
| }, |
| { |
| "epoch": 73.14156431803491, |
| "grad_norm": 1.83950674533844, |
| "learning_rate": 0.001, |
| "loss": 1.1112, |
| "step": 226300 |
| }, |
| { |
| "epoch": 73.17388493859082, |
| "grad_norm": 2.016282320022583, |
| "learning_rate": 0.001, |
| "loss": 1.1083, |
| "step": 226400 |
| }, |
| { |
| "epoch": 73.20620555914674, |
| "grad_norm": 2.395186185836792, |
| "learning_rate": 0.001, |
| "loss": 1.111, |
| "step": 226500 |
| }, |
| { |
| "epoch": 73.23852617970265, |
| "grad_norm": 2.154127836227417, |
| "learning_rate": 0.001, |
| "loss": 1.1132, |
| "step": 226600 |
| }, |
| { |
| "epoch": 73.27084680025857, |
| "grad_norm": 1.6973828077316284, |
| "learning_rate": 0.001, |
| "loss": 1.1365, |
| "step": 226700 |
| }, |
| { |
| "epoch": 73.30316742081448, |
| "grad_norm": 1.861863613128662, |
| "learning_rate": 0.001, |
| "loss": 1.1306, |
| "step": 226800 |
| }, |
| { |
| "epoch": 73.3354880413704, |
| "grad_norm": 2.0509049892425537, |
| "learning_rate": 0.001, |
| "loss": 1.1463, |
| "step": 226900 |
| }, |
| { |
| "epoch": 73.3678086619263, |
| "grad_norm": 1.933553695678711, |
| "learning_rate": 0.001, |
| "loss": 1.161, |
| "step": 227000 |
| }, |
| { |
| "epoch": 73.40012928248223, |
| "grad_norm": 2.0474693775177, |
| "learning_rate": 0.001, |
| "loss": 1.1489, |
| "step": 227100 |
| }, |
| { |
| "epoch": 73.43244990303813, |
| "grad_norm": 2.182870388031006, |
| "learning_rate": 0.001, |
| "loss": 1.1481, |
| "step": 227200 |
| }, |
| { |
| "epoch": 73.46477052359405, |
| "grad_norm": 1.6766440868377686, |
| "learning_rate": 0.001, |
| "loss": 1.1566, |
| "step": 227300 |
| }, |
| { |
| "epoch": 73.49709114414996, |
| "grad_norm": 2.327120542526245, |
| "learning_rate": 0.001, |
| "loss": 1.1424, |
| "step": 227400 |
| }, |
| { |
| "epoch": 73.52941176470588, |
| "grad_norm": 1.8558249473571777, |
| "learning_rate": 0.001, |
| "loss": 1.1572, |
| "step": 227500 |
| }, |
| { |
| "epoch": 73.56173238526179, |
| "grad_norm": 2.0666706562042236, |
| "learning_rate": 0.001, |
| "loss": 1.1534, |
| "step": 227600 |
| }, |
| { |
| "epoch": 73.59405300581771, |
| "grad_norm": 2.563960313796997, |
| "learning_rate": 0.001, |
| "loss": 1.1588, |
| "step": 227700 |
| }, |
| { |
| "epoch": 73.62637362637362, |
| "grad_norm": 1.841058373451233, |
| "learning_rate": 0.001, |
| "loss": 1.174, |
| "step": 227800 |
| }, |
| { |
| "epoch": 73.65869424692954, |
| "grad_norm": 1.7862833738327026, |
| "learning_rate": 0.001, |
| "loss": 1.1698, |
| "step": 227900 |
| }, |
| { |
| "epoch": 73.69101486748545, |
| "grad_norm": 1.935362458229065, |
| "learning_rate": 0.001, |
| "loss": 1.1793, |
| "step": 228000 |
| }, |
| { |
| "epoch": 73.72333548804137, |
| "grad_norm": 1.609352707862854, |
| "learning_rate": 0.001, |
| "loss": 1.1831, |
| "step": 228100 |
| }, |
| { |
| "epoch": 73.75565610859728, |
| "grad_norm": 2.0712058544158936, |
| "learning_rate": 0.001, |
| "loss": 1.18, |
| "step": 228200 |
| }, |
| { |
| "epoch": 73.7879767291532, |
| "grad_norm": 1.8805397748947144, |
| "learning_rate": 0.001, |
| "loss": 1.1897, |
| "step": 228300 |
| }, |
| { |
| "epoch": 73.82029734970911, |
| "grad_norm": 1.8375552892684937, |
| "learning_rate": 0.001, |
| "loss": 1.1886, |
| "step": 228400 |
| }, |
| { |
| "epoch": 73.85261797026503, |
| "grad_norm": 2.0276403427124023, |
| "learning_rate": 0.001, |
| "loss": 1.1811, |
| "step": 228500 |
| }, |
| { |
| "epoch": 73.88493859082094, |
| "grad_norm": 2.1257636547088623, |
| "learning_rate": 0.001, |
| "loss": 1.1942, |
| "step": 228600 |
| }, |
| { |
| "epoch": 73.91725921137686, |
| "grad_norm": 1.6965306997299194, |
| "learning_rate": 0.001, |
| "loss": 1.2039, |
| "step": 228700 |
| }, |
| { |
| "epoch": 73.94957983193277, |
| "grad_norm": 2.1263046264648438, |
| "learning_rate": 0.001, |
| "loss": 1.2031, |
| "step": 228800 |
| }, |
| { |
| "epoch": 73.98190045248869, |
| "grad_norm": 1.8232370615005493, |
| "learning_rate": 0.001, |
| "loss": 1.2104, |
| "step": 228900 |
| }, |
| { |
| "epoch": 74.01422107304461, |
| "grad_norm": 2.033893346786499, |
| "learning_rate": 0.001, |
| "loss": 1.1334, |
| "step": 229000 |
| }, |
| { |
| "epoch": 74.04654169360052, |
| "grad_norm": 1.9348260164260864, |
| "learning_rate": 0.001, |
| "loss": 1.0812, |
| "step": 229100 |
| }, |
| { |
| "epoch": 74.07886231415644, |
| "grad_norm": 2.0578017234802246, |
| "learning_rate": 0.001, |
| "loss": 1.0994, |
| "step": 229200 |
| }, |
| { |
| "epoch": 74.11118293471235, |
| "grad_norm": 2.131514072418213, |
| "learning_rate": 0.001, |
| "loss": 1.1066, |
| "step": 229300 |
| }, |
| { |
| "epoch": 74.14350355526827, |
| "grad_norm": 2.09892201423645, |
| "learning_rate": 0.001, |
| "loss": 1.0978, |
| "step": 229400 |
| }, |
| { |
| "epoch": 74.17582417582418, |
| "grad_norm": 2.2361555099487305, |
| "learning_rate": 0.001, |
| "loss": 1.0948, |
| "step": 229500 |
| }, |
| { |
| "epoch": 74.2081447963801, |
| "grad_norm": 2.1429715156555176, |
| "learning_rate": 0.001, |
| "loss": 1.12, |
| "step": 229600 |
| }, |
| { |
| "epoch": 74.240465416936, |
| "grad_norm": 1.7811630964279175, |
| "learning_rate": 0.001, |
| "loss": 1.1277, |
| "step": 229700 |
| }, |
| { |
| "epoch": 74.27278603749193, |
| "grad_norm": 2.2443344593048096, |
| "learning_rate": 0.001, |
| "loss": 1.1082, |
| "step": 229800 |
| }, |
| { |
| "epoch": 74.30510665804783, |
| "grad_norm": 2.121556520462036, |
| "learning_rate": 0.001, |
| "loss": 1.1229, |
| "step": 229900 |
| }, |
| { |
| "epoch": 74.33742727860376, |
| "grad_norm": 2.5482897758483887, |
| "learning_rate": 0.001, |
| "loss": 1.1368, |
| "step": 230000 |
| }, |
| { |
| "epoch": 74.36974789915966, |
| "grad_norm": 1.9960894584655762, |
| "learning_rate": 0.001, |
| "loss": 1.1245, |
| "step": 230100 |
| }, |
| { |
| "epoch": 74.40206851971558, |
| "grad_norm": 2.089287757873535, |
| "learning_rate": 0.001, |
| "loss": 1.1405, |
| "step": 230200 |
| }, |
| { |
| "epoch": 74.43438914027149, |
| "grad_norm": 1.8604276180267334, |
| "learning_rate": 0.001, |
| "loss": 1.1538, |
| "step": 230300 |
| }, |
| { |
| "epoch": 74.46670976082741, |
| "grad_norm": 1.9729381799697876, |
| "learning_rate": 0.001, |
| "loss": 1.1571, |
| "step": 230400 |
| }, |
| { |
| "epoch": 74.49903038138332, |
| "grad_norm": 1.8836462497711182, |
| "learning_rate": 0.001, |
| "loss": 1.1498, |
| "step": 230500 |
| }, |
| { |
| "epoch": 74.53135100193924, |
| "grad_norm": 1.781795859336853, |
| "learning_rate": 0.001, |
| "loss": 1.1463, |
| "step": 230600 |
| }, |
| { |
| "epoch": 74.56367162249515, |
| "grad_norm": 2.167222023010254, |
| "learning_rate": 0.001, |
| "loss": 1.1387, |
| "step": 230700 |
| }, |
| { |
| "epoch": 74.59599224305107, |
| "grad_norm": 2.2089223861694336, |
| "learning_rate": 0.001, |
| "loss": 1.1499, |
| "step": 230800 |
| }, |
| { |
| "epoch": 74.62831286360698, |
| "grad_norm": 2.309671401977539, |
| "learning_rate": 0.001, |
| "loss": 1.1569, |
| "step": 230900 |
| }, |
| { |
| "epoch": 74.6606334841629, |
| "grad_norm": 1.9005663394927979, |
| "learning_rate": 0.001, |
| "loss": 1.1723, |
| "step": 231000 |
| }, |
| { |
| "epoch": 74.69295410471881, |
| "grad_norm": 1.8752561807632446, |
| "learning_rate": 0.001, |
| "loss": 1.1675, |
| "step": 231100 |
| }, |
| { |
| "epoch": 74.72527472527473, |
| "grad_norm": 2.7028305530548096, |
| "learning_rate": 0.001, |
| "loss": 1.1656, |
| "step": 231200 |
| }, |
| { |
| "epoch": 74.75759534583064, |
| "grad_norm": 2.19155216217041, |
| "learning_rate": 0.001, |
| "loss": 1.1671, |
| "step": 231300 |
| }, |
| { |
| "epoch": 74.78991596638656, |
| "grad_norm": 2.1153290271759033, |
| "learning_rate": 0.001, |
| "loss": 1.1774, |
| "step": 231400 |
| }, |
| { |
| "epoch": 74.82223658694247, |
| "grad_norm": 1.7875677347183228, |
| "learning_rate": 0.001, |
| "loss": 1.1752, |
| "step": 231500 |
| }, |
| { |
| "epoch": 74.85455720749839, |
| "grad_norm": 2.1596906185150146, |
| "learning_rate": 0.001, |
| "loss": 1.1678, |
| "step": 231600 |
| }, |
| { |
| "epoch": 74.8868778280543, |
| "grad_norm": 2.106273889541626, |
| "learning_rate": 0.001, |
| "loss": 1.1814, |
| "step": 231700 |
| }, |
| { |
| "epoch": 74.91919844861022, |
| "grad_norm": 2.4843180179595947, |
| "learning_rate": 0.001, |
| "loss": 1.1823, |
| "step": 231800 |
| }, |
| { |
| "epoch": 74.95151906916612, |
| "grad_norm": 2.45287823677063, |
| "learning_rate": 0.001, |
| "loss": 1.1806, |
| "step": 231900 |
| }, |
| { |
| "epoch": 74.98383968972205, |
| "grad_norm": 2.348428249359131, |
| "learning_rate": 0.001, |
| "loss": 1.1847, |
| "step": 232000 |
| }, |
| { |
| "epoch": 75.01616031027795, |
| "grad_norm": 2.1072657108306885, |
| "learning_rate": 0.001, |
| "loss": 1.1007, |
| "step": 232100 |
| }, |
| { |
| "epoch": 75.04848093083388, |
| "grad_norm": 2.227376699447632, |
| "learning_rate": 0.001, |
| "loss": 1.0656, |
| "step": 232200 |
| }, |
| { |
| "epoch": 75.08080155138978, |
| "grad_norm": 2.6325557231903076, |
| "learning_rate": 0.001, |
| "loss": 1.0806, |
| "step": 232300 |
| }, |
| { |
| "epoch": 75.1131221719457, |
| "grad_norm": 2.4202475547790527, |
| "learning_rate": 0.001, |
| "loss": 1.0943, |
| "step": 232400 |
| }, |
| { |
| "epoch": 75.14544279250161, |
| "grad_norm": 2.6263668537139893, |
| "learning_rate": 0.001, |
| "loss": 1.0991, |
| "step": 232500 |
| }, |
| { |
| "epoch": 75.17776341305753, |
| "grad_norm": 2.652233600616455, |
| "learning_rate": 0.001, |
| "loss": 1.0959, |
| "step": 232600 |
| }, |
| { |
| "epoch": 75.21008403361344, |
| "grad_norm": 2.31381893157959, |
| "learning_rate": 0.001, |
| "loss": 1.1064, |
| "step": 232700 |
| }, |
| { |
| "epoch": 75.24240465416936, |
| "grad_norm": 2.8877060413360596, |
| "learning_rate": 0.001, |
| "loss": 1.101, |
| "step": 232800 |
| }, |
| { |
| "epoch": 75.27472527472527, |
| "grad_norm": 3.5789542198181152, |
| "learning_rate": 0.001, |
| "loss": 1.1082, |
| "step": 232900 |
| }, |
| { |
| "epoch": 75.30704589528119, |
| "grad_norm": 2.622349500656128, |
| "learning_rate": 0.001, |
| "loss": 1.101, |
| "step": 233000 |
| }, |
| { |
| "epoch": 75.3393665158371, |
| "grad_norm": 2.9478626251220703, |
| "learning_rate": 0.001, |
| "loss": 1.1134, |
| "step": 233100 |
| }, |
| { |
| "epoch": 75.37168713639302, |
| "grad_norm": 2.258899688720703, |
| "learning_rate": 0.001, |
| "loss": 1.1272, |
| "step": 233200 |
| }, |
| { |
| "epoch": 75.40400775694893, |
| "grad_norm": 2.8616788387298584, |
| "learning_rate": 0.001, |
| "loss": 1.1265, |
| "step": 233300 |
| }, |
| { |
| "epoch": 75.43632837750485, |
| "grad_norm": 2.5713586807250977, |
| "learning_rate": 0.001, |
| "loss": 1.1222, |
| "step": 233400 |
| }, |
| { |
| "epoch": 75.46864899806076, |
| "grad_norm": 2.9132118225097656, |
| "learning_rate": 0.001, |
| "loss": 1.129, |
| "step": 233500 |
| }, |
| { |
| "epoch": 75.50096961861668, |
| "grad_norm": 2.412076473236084, |
| "learning_rate": 0.001, |
| "loss": 1.1272, |
| "step": 233600 |
| }, |
| { |
| "epoch": 75.53329023917259, |
| "grad_norm": 2.0582900047302246, |
| "learning_rate": 0.001, |
| "loss": 1.1382, |
| "step": 233700 |
| }, |
| { |
| "epoch": 75.56561085972851, |
| "grad_norm": 2.4409642219543457, |
| "learning_rate": 0.001, |
| "loss": 1.1436, |
| "step": 233800 |
| }, |
| { |
| "epoch": 75.59793148028442, |
| "grad_norm": 2.7730462551116943, |
| "learning_rate": 0.001, |
| "loss": 1.1466, |
| "step": 233900 |
| }, |
| { |
| "epoch": 75.63025210084034, |
| "grad_norm": 2.9127862453460693, |
| "learning_rate": 0.001, |
| "loss": 1.1634, |
| "step": 234000 |
| }, |
| { |
| "epoch": 75.66257272139624, |
| "grad_norm": 2.6338980197906494, |
| "learning_rate": 0.001, |
| "loss": 1.1515, |
| "step": 234100 |
| }, |
| { |
| "epoch": 75.69489334195217, |
| "grad_norm": 2.3682878017425537, |
| "learning_rate": 0.001, |
| "loss": 1.1716, |
| "step": 234200 |
| }, |
| { |
| "epoch": 75.72721396250807, |
| "grad_norm": 3.0970637798309326, |
| "learning_rate": 0.001, |
| "loss": 1.1681, |
| "step": 234300 |
| }, |
| { |
| "epoch": 75.759534583064, |
| "grad_norm": 3.246640682220459, |
| "learning_rate": 0.001, |
| "loss": 1.1656, |
| "step": 234400 |
| }, |
| { |
| "epoch": 75.7918552036199, |
| "grad_norm": 2.827653169631958, |
| "learning_rate": 0.001, |
| "loss": 1.1712, |
| "step": 234500 |
| }, |
| { |
| "epoch": 75.82417582417582, |
| "grad_norm": 2.37351393699646, |
| "learning_rate": 0.001, |
| "loss": 1.1767, |
| "step": 234600 |
| }, |
| { |
| "epoch": 75.85649644473173, |
| "grad_norm": 2.936460018157959, |
| "learning_rate": 0.001, |
| "loss": 1.1798, |
| "step": 234700 |
| }, |
| { |
| "epoch": 75.88881706528765, |
| "grad_norm": 2.810807228088379, |
| "learning_rate": 0.001, |
| "loss": 1.1756, |
| "step": 234800 |
| }, |
| { |
| "epoch": 75.92113768584356, |
| "grad_norm": 2.5156216621398926, |
| "learning_rate": 0.001, |
| "loss": 1.1682, |
| "step": 234900 |
| }, |
| { |
| "epoch": 75.95345830639948, |
| "grad_norm": 2.3264551162719727, |
| "learning_rate": 0.001, |
| "loss": 1.1573, |
| "step": 235000 |
| }, |
| { |
| "epoch": 75.98577892695539, |
| "grad_norm": 2.1307079792022705, |
| "learning_rate": 0.001, |
| "loss": 1.189, |
| "step": 235100 |
| }, |
| { |
| "epoch": 76.01809954751131, |
| "grad_norm": 1.8315860033035278, |
| "learning_rate": 0.001, |
| "loss": 1.1347, |
| "step": 235200 |
| }, |
| { |
| "epoch": 76.05042016806723, |
| "grad_norm": 1.5642365217208862, |
| "learning_rate": 0.001, |
| "loss": 1.062, |
| "step": 235300 |
| }, |
| { |
| "epoch": 76.08274078862314, |
| "grad_norm": 2.2328715324401855, |
| "learning_rate": 0.001, |
| "loss": 1.0708, |
| "step": 235400 |
| }, |
| { |
| "epoch": 76.11506140917906, |
| "grad_norm": 1.7951757907867432, |
| "learning_rate": 0.001, |
| "loss": 1.0796, |
| "step": 235500 |
| }, |
| { |
| "epoch": 76.14738202973497, |
| "grad_norm": 1.8427295684814453, |
| "learning_rate": 0.001, |
| "loss": 1.0759, |
| "step": 235600 |
| }, |
| { |
| "epoch": 76.17970265029089, |
| "grad_norm": 1.8815374374389648, |
| "learning_rate": 0.001, |
| "loss": 1.0794, |
| "step": 235700 |
| }, |
| { |
| "epoch": 76.2120232708468, |
| "grad_norm": 2.425978899002075, |
| "learning_rate": 0.001, |
| "loss": 1.1024, |
| "step": 235800 |
| }, |
| { |
| "epoch": 76.24434389140272, |
| "grad_norm": 2.3707029819488525, |
| "learning_rate": 0.001, |
| "loss": 1.0746, |
| "step": 235900 |
| }, |
| { |
| "epoch": 76.27666451195863, |
| "grad_norm": 2.2675940990448, |
| "learning_rate": 0.001, |
| "loss": 1.1096, |
| "step": 236000 |
| }, |
| { |
| "epoch": 76.30898513251455, |
| "grad_norm": 1.627684473991394, |
| "learning_rate": 0.001, |
| "loss": 1.1072, |
| "step": 236100 |
| }, |
| { |
| "epoch": 76.34130575307046, |
| "grad_norm": 1.8927665948867798, |
| "learning_rate": 0.001, |
| "loss": 1.1098, |
| "step": 236200 |
| }, |
| { |
| "epoch": 76.37362637362638, |
| "grad_norm": 1.8863893747329712, |
| "learning_rate": 0.001, |
| "loss": 1.0979, |
| "step": 236300 |
| }, |
| { |
| "epoch": 76.40594699418229, |
| "grad_norm": 2.0543971061706543, |
| "learning_rate": 0.001, |
| "loss": 1.1123, |
| "step": 236400 |
| }, |
| { |
| "epoch": 76.43826761473821, |
| "grad_norm": 2.1276564598083496, |
| "learning_rate": 0.001, |
| "loss": 1.1174, |
| "step": 236500 |
| }, |
| { |
| "epoch": 76.47058823529412, |
| "grad_norm": 2.0694077014923096, |
| "learning_rate": 0.001, |
| "loss": 1.1169, |
| "step": 236600 |
| }, |
| { |
| "epoch": 76.50290885585004, |
| "grad_norm": 1.7249040603637695, |
| "learning_rate": 0.001, |
| "loss": 1.1289, |
| "step": 236700 |
| }, |
| { |
| "epoch": 76.53522947640595, |
| "grad_norm": 2.4833602905273438, |
| "learning_rate": 0.001, |
| "loss": 1.1457, |
| "step": 236800 |
| }, |
| { |
| "epoch": 76.56755009696187, |
| "grad_norm": 1.8441822528839111, |
| "learning_rate": 0.001, |
| "loss": 1.122, |
| "step": 236900 |
| }, |
| { |
| "epoch": 76.59987071751777, |
| "grad_norm": 1.9829216003417969, |
| "learning_rate": 0.001, |
| "loss": 1.134, |
| "step": 237000 |
| }, |
| { |
| "epoch": 76.6321913380737, |
| "grad_norm": 2.1754186153411865, |
| "learning_rate": 0.001, |
| "loss": 1.1358, |
| "step": 237100 |
| }, |
| { |
| "epoch": 76.6645119586296, |
| "grad_norm": 1.6874982118606567, |
| "learning_rate": 0.001, |
| "loss": 1.1385, |
| "step": 237200 |
| }, |
| { |
| "epoch": 76.69683257918552, |
| "grad_norm": 1.662231683731079, |
| "learning_rate": 0.001, |
| "loss": 1.1278, |
| "step": 237300 |
| }, |
| { |
| "epoch": 76.72915319974143, |
| "grad_norm": 1.977017879486084, |
| "learning_rate": 0.001, |
| "loss": 1.1418, |
| "step": 237400 |
| }, |
| { |
| "epoch": 76.76147382029735, |
| "grad_norm": 1.5747963190078735, |
| "learning_rate": 0.001, |
| "loss": 1.1458, |
| "step": 237500 |
| }, |
| { |
| "epoch": 76.79379444085326, |
| "grad_norm": 2.3878796100616455, |
| "learning_rate": 0.001, |
| "loss": 1.1603, |
| "step": 237600 |
| }, |
| { |
| "epoch": 76.82611506140918, |
| "grad_norm": 2.2227580547332764, |
| "learning_rate": 0.001, |
| "loss": 1.174, |
| "step": 237700 |
| }, |
| { |
| "epoch": 76.85843568196509, |
| "grad_norm": 2.162853479385376, |
| "learning_rate": 0.001, |
| "loss": 1.1545, |
| "step": 237800 |
| }, |
| { |
| "epoch": 76.89075630252101, |
| "grad_norm": 1.6438781023025513, |
| "learning_rate": 0.001, |
| "loss": 1.1794, |
| "step": 237900 |
| }, |
| { |
| "epoch": 76.92307692307692, |
| "grad_norm": 1.6767332553863525, |
| "learning_rate": 0.001, |
| "loss": 1.1709, |
| "step": 238000 |
| }, |
| { |
| "epoch": 76.95539754363284, |
| "grad_norm": 1.6060292720794678, |
| "learning_rate": 0.001, |
| "loss": 1.1717, |
| "step": 238100 |
| }, |
| { |
| "epoch": 76.98771816418875, |
| "grad_norm": 1.7817925214767456, |
| "learning_rate": 0.001, |
| "loss": 1.1818, |
| "step": 238200 |
| }, |
| { |
| "epoch": 77.02003878474467, |
| "grad_norm": 1.6019141674041748, |
| "learning_rate": 0.001, |
| "loss": 1.1144, |
| "step": 238300 |
| }, |
| { |
| "epoch": 77.05235940530058, |
| "grad_norm": 1.992630124092102, |
| "learning_rate": 0.001, |
| "loss": 1.0535, |
| "step": 238400 |
| }, |
| { |
| "epoch": 77.0846800258565, |
| "grad_norm": 1.9400583505630493, |
| "learning_rate": 0.001, |
| "loss": 1.0624, |
| "step": 238500 |
| }, |
| { |
| "epoch": 77.11700064641241, |
| "grad_norm": 1.91287100315094, |
| "learning_rate": 0.001, |
| "loss": 1.0571, |
| "step": 238600 |
| }, |
| { |
| "epoch": 77.14932126696833, |
| "grad_norm": 1.7860596179962158, |
| "learning_rate": 0.001, |
| "loss": 1.0697, |
| "step": 238700 |
| }, |
| { |
| "epoch": 77.18164188752424, |
| "grad_norm": 1.6752883195877075, |
| "learning_rate": 0.001, |
| "loss": 1.0746, |
| "step": 238800 |
| }, |
| { |
| "epoch": 77.21396250808016, |
| "grad_norm": 1.7331321239471436, |
| "learning_rate": 0.001, |
| "loss": 1.0964, |
| "step": 238900 |
| }, |
| { |
| "epoch": 77.24628312863607, |
| "grad_norm": 1.943953275680542, |
| "learning_rate": 0.001, |
| "loss": 1.0953, |
| "step": 239000 |
| }, |
| { |
| "epoch": 77.27860374919199, |
| "grad_norm": 1.7045990228652954, |
| "learning_rate": 0.001, |
| "loss": 1.0782, |
| "step": 239100 |
| }, |
| { |
| "epoch": 77.3109243697479, |
| "grad_norm": 2.1768674850463867, |
| "learning_rate": 0.001, |
| "loss": 1.0918, |
| "step": 239200 |
| }, |
| { |
| "epoch": 77.34324499030382, |
| "grad_norm": 1.8054300546646118, |
| "learning_rate": 0.001, |
| "loss": 1.0982, |
| "step": 239300 |
| }, |
| { |
| "epoch": 77.37556561085972, |
| "grad_norm": 1.6111589670181274, |
| "learning_rate": 0.001, |
| "loss": 1.094, |
| "step": 239400 |
| }, |
| { |
| "epoch": 77.40788623141565, |
| "grad_norm": 1.7453973293304443, |
| "learning_rate": 0.001, |
| "loss": 1.1012, |
| "step": 239500 |
| }, |
| { |
| "epoch": 77.44020685197155, |
| "grad_norm": 2.423070192337036, |
| "learning_rate": 0.001, |
| "loss": 1.1149, |
| "step": 239600 |
| }, |
| { |
| "epoch": 77.47252747252747, |
| "grad_norm": 2.0236239433288574, |
| "learning_rate": 0.001, |
| "loss": 1.1102, |
| "step": 239700 |
| }, |
| { |
| "epoch": 77.50484809308338, |
| "grad_norm": 2.0543761253356934, |
| "learning_rate": 0.001, |
| "loss": 1.1263, |
| "step": 239800 |
| }, |
| { |
| "epoch": 77.5371687136393, |
| "grad_norm": 1.643644094467163, |
| "learning_rate": 0.001, |
| "loss": 1.1168, |
| "step": 239900 |
| }, |
| { |
| "epoch": 77.56948933419521, |
| "grad_norm": 1.8869572877883911, |
| "learning_rate": 0.001, |
| "loss": 1.125, |
| "step": 240000 |
| }, |
| { |
| "epoch": 77.60180995475113, |
| "grad_norm": 1.5091863870620728, |
| "learning_rate": 0.001, |
| "loss": 1.1353, |
| "step": 240100 |
| }, |
| { |
| "epoch": 77.63413057530704, |
| "grad_norm": 1.7741518020629883, |
| "learning_rate": 0.001, |
| "loss": 1.1292, |
| "step": 240200 |
| }, |
| { |
| "epoch": 77.66645119586296, |
| "grad_norm": 1.767402172088623, |
| "learning_rate": 0.001, |
| "loss": 1.1508, |
| "step": 240300 |
| }, |
| { |
| "epoch": 77.69877181641887, |
| "grad_norm": 1.9282313585281372, |
| "learning_rate": 0.001, |
| "loss": 1.1398, |
| "step": 240400 |
| }, |
| { |
| "epoch": 77.73109243697479, |
| "grad_norm": 1.8274956941604614, |
| "learning_rate": 0.001, |
| "loss": 1.1345, |
| "step": 240500 |
| }, |
| { |
| "epoch": 77.7634130575307, |
| "grad_norm": 1.7720264196395874, |
| "learning_rate": 0.001, |
| "loss": 1.1591, |
| "step": 240600 |
| }, |
| { |
| "epoch": 77.79573367808662, |
| "grad_norm": 1.7153974771499634, |
| "learning_rate": 0.001, |
| "loss": 1.1481, |
| "step": 240700 |
| }, |
| { |
| "epoch": 77.82805429864253, |
| "grad_norm": 2.328188180923462, |
| "learning_rate": 0.001, |
| "loss": 1.1558, |
| "step": 240800 |
| }, |
| { |
| "epoch": 77.86037491919845, |
| "grad_norm": 1.966943383216858, |
| "learning_rate": 0.001, |
| "loss": 1.1406, |
| "step": 240900 |
| }, |
| { |
| "epoch": 77.89269553975436, |
| "grad_norm": 1.7131937742233276, |
| "learning_rate": 0.001, |
| "loss": 1.1459, |
| "step": 241000 |
| }, |
| { |
| "epoch": 77.92501616031028, |
| "grad_norm": 1.452742576599121, |
| "learning_rate": 0.001, |
| "loss": 1.1633, |
| "step": 241100 |
| }, |
| { |
| "epoch": 77.95733678086619, |
| "grad_norm": 1.5621833801269531, |
| "learning_rate": 0.001, |
| "loss": 1.1689, |
| "step": 241200 |
| }, |
| { |
| "epoch": 77.98965740142211, |
| "grad_norm": 2.072866916656494, |
| "learning_rate": 0.001, |
| "loss": 1.1554, |
| "step": 241300 |
| }, |
| { |
| "epoch": 78.02197802197803, |
| "grad_norm": 1.7882293462753296, |
| "learning_rate": 0.001, |
| "loss": 1.0927, |
| "step": 241400 |
| }, |
| { |
| "epoch": 78.05429864253394, |
| "grad_norm": 1.8420997858047485, |
| "learning_rate": 0.001, |
| "loss": 1.051, |
| "step": 241500 |
| }, |
| { |
| "epoch": 78.08661926308986, |
| "grad_norm": 1.671029806137085, |
| "learning_rate": 0.001, |
| "loss": 1.0459, |
| "step": 241600 |
| }, |
| { |
| "epoch": 78.11893988364577, |
| "grad_norm": 2.1033594608306885, |
| "learning_rate": 0.001, |
| "loss": 1.0547, |
| "step": 241700 |
| }, |
| { |
| "epoch": 78.15126050420169, |
| "grad_norm": 1.9632328748703003, |
| "learning_rate": 0.001, |
| "loss": 1.0743, |
| "step": 241800 |
| }, |
| { |
| "epoch": 78.1835811247576, |
| "grad_norm": 1.8516623973846436, |
| "learning_rate": 0.001, |
| "loss": 1.0665, |
| "step": 241900 |
| }, |
| { |
| "epoch": 78.21590174531352, |
| "grad_norm": 1.782353162765503, |
| "learning_rate": 0.001, |
| "loss": 1.081, |
| "step": 242000 |
| }, |
| { |
| "epoch": 78.24822236586942, |
| "grad_norm": 2.159865140914917, |
| "learning_rate": 0.001, |
| "loss": 1.0792, |
| "step": 242100 |
| }, |
| { |
| "epoch": 78.28054298642535, |
| "grad_norm": 1.7599161863327026, |
| "learning_rate": 0.001, |
| "loss": 1.0962, |
| "step": 242200 |
| }, |
| { |
| "epoch": 78.31286360698125, |
| "grad_norm": 1.6156737804412842, |
| "learning_rate": 0.001, |
| "loss": 1.0881, |
| "step": 242300 |
| }, |
| { |
| "epoch": 78.34518422753717, |
| "grad_norm": 1.9483507871627808, |
| "learning_rate": 0.001, |
| "loss": 1.093, |
| "step": 242400 |
| }, |
| { |
| "epoch": 78.37750484809308, |
| "grad_norm": 1.5878406763076782, |
| "learning_rate": 0.001, |
| "loss": 1.0838, |
| "step": 242500 |
| }, |
| { |
| "epoch": 78.409825468649, |
| "grad_norm": 1.6766425371170044, |
| "learning_rate": 0.001, |
| "loss": 1.0998, |
| "step": 242600 |
| }, |
| { |
| "epoch": 78.44214608920491, |
| "grad_norm": 1.4642685651779175, |
| "learning_rate": 0.001, |
| "loss": 1.0929, |
| "step": 242700 |
| }, |
| { |
| "epoch": 78.47446670976083, |
| "grad_norm": 1.8012272119522095, |
| "learning_rate": 0.001, |
| "loss": 1.1047, |
| "step": 242800 |
| }, |
| { |
| "epoch": 78.50678733031674, |
| "grad_norm": 1.8630805015563965, |
| "learning_rate": 0.001, |
| "loss": 1.1029, |
| "step": 242900 |
| }, |
| { |
| "epoch": 78.53910795087266, |
| "grad_norm": 1.4753458499908447, |
| "learning_rate": 0.001, |
| "loss": 1.1177, |
| "step": 243000 |
| }, |
| { |
| "epoch": 78.57142857142857, |
| "grad_norm": 2.500214099884033, |
| "learning_rate": 0.001, |
| "loss": 1.109, |
| "step": 243100 |
| }, |
| { |
| "epoch": 78.60374919198449, |
| "grad_norm": 1.680084466934204, |
| "learning_rate": 0.001, |
| "loss": 1.1114, |
| "step": 243200 |
| }, |
| { |
| "epoch": 78.6360698125404, |
| "grad_norm": 1.6189630031585693, |
| "learning_rate": 0.001, |
| "loss": 1.124, |
| "step": 243300 |
| }, |
| { |
| "epoch": 78.66839043309632, |
| "grad_norm": 1.7440742254257202, |
| "learning_rate": 0.001, |
| "loss": 1.1218, |
| "step": 243400 |
| }, |
| { |
| "epoch": 78.70071105365223, |
| "grad_norm": 1.8845442533493042, |
| "learning_rate": 0.001, |
| "loss": 1.1245, |
| "step": 243500 |
| }, |
| { |
| "epoch": 78.73303167420815, |
| "grad_norm": 1.6810959577560425, |
| "learning_rate": 0.001, |
| "loss": 1.123, |
| "step": 243600 |
| }, |
| { |
| "epoch": 78.76535229476406, |
| "grad_norm": 2.0967421531677246, |
| "learning_rate": 0.001, |
| "loss": 1.1421, |
| "step": 243700 |
| }, |
| { |
| "epoch": 78.79767291531998, |
| "grad_norm": 1.5401570796966553, |
| "learning_rate": 0.001, |
| "loss": 1.1464, |
| "step": 243800 |
| }, |
| { |
| "epoch": 78.82999353587589, |
| "grad_norm": 1.8480286598205566, |
| "learning_rate": 0.001, |
| "loss": 1.1326, |
| "step": 243900 |
| }, |
| { |
| "epoch": 78.86231415643181, |
| "grad_norm": 1.814274787902832, |
| "learning_rate": 0.001, |
| "loss": 1.1383, |
| "step": 244000 |
| }, |
| { |
| "epoch": 78.89463477698771, |
| "grad_norm": 2.2483580112457275, |
| "learning_rate": 0.001, |
| "loss": 1.157, |
| "step": 244100 |
| }, |
| { |
| "epoch": 78.92695539754364, |
| "grad_norm": 1.8789129257202148, |
| "learning_rate": 0.001, |
| "loss": 1.1414, |
| "step": 244200 |
| }, |
| { |
| "epoch": 78.95927601809954, |
| "grad_norm": 1.6194325685501099, |
| "learning_rate": 0.001, |
| "loss": 1.1507, |
| "step": 244300 |
| }, |
| { |
| "epoch": 78.99159663865547, |
| "grad_norm": 1.7908927202224731, |
| "learning_rate": 0.001, |
| "loss": 1.1566, |
| "step": 244400 |
| }, |
| { |
| "epoch": 79.02391725921137, |
| "grad_norm": 2.1163265705108643, |
| "learning_rate": 0.001, |
| "loss": 1.0654, |
| "step": 244500 |
| }, |
| { |
| "epoch": 79.0562378797673, |
| "grad_norm": 1.7553027868270874, |
| "learning_rate": 0.001, |
| "loss": 1.0315, |
| "step": 244600 |
| }, |
| { |
| "epoch": 79.0885585003232, |
| "grad_norm": 1.6223750114440918, |
| "learning_rate": 0.001, |
| "loss": 1.0521, |
| "step": 244700 |
| }, |
| { |
| "epoch": 79.12087912087912, |
| "grad_norm": 1.7326061725616455, |
| "learning_rate": 0.001, |
| "loss": 1.0585, |
| "step": 244800 |
| }, |
| { |
| "epoch": 79.15319974143503, |
| "grad_norm": 2.0212154388427734, |
| "learning_rate": 0.001, |
| "loss": 1.0589, |
| "step": 244900 |
| }, |
| { |
| "epoch": 79.18552036199095, |
| "grad_norm": 1.9795660972595215, |
| "learning_rate": 0.001, |
| "loss": 1.0626, |
| "step": 245000 |
| }, |
| { |
| "epoch": 79.21784098254686, |
| "grad_norm": 2.4282681941986084, |
| "learning_rate": 0.001, |
| "loss": 1.0559, |
| "step": 245100 |
| }, |
| { |
| "epoch": 79.25016160310278, |
| "grad_norm": 1.6875724792480469, |
| "learning_rate": 0.001, |
| "loss": 1.083, |
| "step": 245200 |
| }, |
| { |
| "epoch": 79.28248222365869, |
| "grad_norm": 2.1071879863739014, |
| "learning_rate": 0.001, |
| "loss": 1.0644, |
| "step": 245300 |
| }, |
| { |
| "epoch": 79.31480284421461, |
| "grad_norm": 1.889708161354065, |
| "learning_rate": 0.001, |
| "loss": 1.0704, |
| "step": 245400 |
| }, |
| { |
| "epoch": 79.34712346477052, |
| "grad_norm": 1.9262644052505493, |
| "learning_rate": 0.001, |
| "loss": 1.0836, |
| "step": 245500 |
| }, |
| { |
| "epoch": 79.37944408532644, |
| "grad_norm": 1.769736886024475, |
| "learning_rate": 0.001, |
| "loss": 1.0883, |
| "step": 245600 |
| }, |
| { |
| "epoch": 79.41176470588235, |
| "grad_norm": 1.7323601245880127, |
| "learning_rate": 0.001, |
| "loss": 1.0888, |
| "step": 245700 |
| }, |
| { |
| "epoch": 79.44408532643827, |
| "grad_norm": 1.7969982624053955, |
| "learning_rate": 0.001, |
| "loss": 1.0957, |
| "step": 245800 |
| }, |
| { |
| "epoch": 79.47640594699418, |
| "grad_norm": 1.693835735321045, |
| "learning_rate": 0.001, |
| "loss": 1.0999, |
| "step": 245900 |
| }, |
| { |
| "epoch": 79.5087265675501, |
| "grad_norm": 1.5995159149169922, |
| "learning_rate": 0.001, |
| "loss": 1.0871, |
| "step": 246000 |
| }, |
| { |
| "epoch": 79.541047188106, |
| "grad_norm": 1.8406943082809448, |
| "learning_rate": 0.001, |
| "loss": 1.0955, |
| "step": 246100 |
| }, |
| { |
| "epoch": 79.57336780866193, |
| "grad_norm": 1.8488978147506714, |
| "learning_rate": 0.001, |
| "loss": 1.1048, |
| "step": 246200 |
| }, |
| { |
| "epoch": 79.60568842921784, |
| "grad_norm": 1.6870185136795044, |
| "learning_rate": 0.001, |
| "loss": 1.0973, |
| "step": 246300 |
| }, |
| { |
| "epoch": 79.63800904977376, |
| "grad_norm": 1.5949409008026123, |
| "learning_rate": 0.001, |
| "loss": 1.1263, |
| "step": 246400 |
| }, |
| { |
| "epoch": 79.67032967032966, |
| "grad_norm": 1.8820393085479736, |
| "learning_rate": 0.001, |
| "loss": 1.1124, |
| "step": 246500 |
| }, |
| { |
| "epoch": 79.70265029088559, |
| "grad_norm": 1.714228868484497, |
| "learning_rate": 0.001, |
| "loss": 1.1208, |
| "step": 246600 |
| }, |
| { |
| "epoch": 79.7349709114415, |
| "grad_norm": 1.7019908428192139, |
| "learning_rate": 0.001, |
| "loss": 1.1255, |
| "step": 246700 |
| }, |
| { |
| "epoch": 79.76729153199742, |
| "grad_norm": 1.826647162437439, |
| "learning_rate": 0.001, |
| "loss": 1.1164, |
| "step": 246800 |
| }, |
| { |
| "epoch": 79.79961215255332, |
| "grad_norm": 1.647612452507019, |
| "learning_rate": 0.001, |
| "loss": 1.1187, |
| "step": 246900 |
| }, |
| { |
| "epoch": 79.83193277310924, |
| "grad_norm": 1.8978357315063477, |
| "learning_rate": 0.001, |
| "loss": 1.1302, |
| "step": 247000 |
| }, |
| { |
| "epoch": 79.86425339366515, |
| "grad_norm": 1.7612745761871338, |
| "learning_rate": 0.001, |
| "loss": 1.1271, |
| "step": 247100 |
| }, |
| { |
| "epoch": 79.89657401422107, |
| "grad_norm": 1.9454165697097778, |
| "learning_rate": 0.001, |
| "loss": 1.146, |
| "step": 247200 |
| }, |
| { |
| "epoch": 79.92889463477698, |
| "grad_norm": 1.8270703554153442, |
| "learning_rate": 0.001, |
| "loss": 1.1378, |
| "step": 247300 |
| }, |
| { |
| "epoch": 79.9612152553329, |
| "grad_norm": 1.6154886484146118, |
| "learning_rate": 0.001, |
| "loss": 1.1621, |
| "step": 247400 |
| }, |
| { |
| "epoch": 79.99353587588882, |
| "grad_norm": 2.264509916305542, |
| "learning_rate": 0.001, |
| "loss": 1.1423, |
| "step": 247500 |
| }, |
| { |
| "epoch": 80.02585649644473, |
| "grad_norm": 2.3280816078186035, |
| "learning_rate": 0.001, |
| "loss": 1.0525, |
| "step": 247600 |
| }, |
| { |
| "epoch": 80.05817711700065, |
| "grad_norm": 1.731689453125, |
| "learning_rate": 0.001, |
| "loss": 1.0274, |
| "step": 247700 |
| }, |
| { |
| "epoch": 80.09049773755656, |
| "grad_norm": 2.1244609355926514, |
| "learning_rate": 0.001, |
| "loss": 1.031, |
| "step": 247800 |
| }, |
| { |
| "epoch": 80.12281835811248, |
| "grad_norm": 1.8890272378921509, |
| "learning_rate": 0.001, |
| "loss": 1.0379, |
| "step": 247900 |
| }, |
| { |
| "epoch": 80.15513897866839, |
| "grad_norm": 2.0371882915496826, |
| "learning_rate": 0.001, |
| "loss": 1.0401, |
| "step": 248000 |
| }, |
| { |
| "epoch": 80.18745959922431, |
| "grad_norm": 1.8187429904937744, |
| "learning_rate": 0.001, |
| "loss": 1.0508, |
| "step": 248100 |
| }, |
| { |
| "epoch": 80.21978021978022, |
| "grad_norm": 1.776618480682373, |
| "learning_rate": 0.001, |
| "loss": 1.0607, |
| "step": 248200 |
| }, |
| { |
| "epoch": 80.25210084033614, |
| "grad_norm": 1.967862844467163, |
| "learning_rate": 0.001, |
| "loss": 1.0589, |
| "step": 248300 |
| }, |
| { |
| "epoch": 80.28442146089205, |
| "grad_norm": 1.8256206512451172, |
| "learning_rate": 0.001, |
| "loss": 1.066, |
| "step": 248400 |
| }, |
| { |
| "epoch": 80.31674208144797, |
| "grad_norm": 2.1693742275238037, |
| "learning_rate": 0.001, |
| "loss": 1.0786, |
| "step": 248500 |
| }, |
| { |
| "epoch": 80.34906270200388, |
| "grad_norm": 2.159891128540039, |
| "learning_rate": 0.001, |
| "loss": 1.0698, |
| "step": 248600 |
| }, |
| { |
| "epoch": 80.3813833225598, |
| "grad_norm": 1.8442610502243042, |
| "learning_rate": 0.001, |
| "loss": 1.0768, |
| "step": 248700 |
| }, |
| { |
| "epoch": 80.4137039431157, |
| "grad_norm": 1.802513599395752, |
| "learning_rate": 0.001, |
| "loss": 1.0811, |
| "step": 248800 |
| }, |
| { |
| "epoch": 80.44602456367163, |
| "grad_norm": 1.897873878479004, |
| "learning_rate": 0.001, |
| "loss": 1.0844, |
| "step": 248900 |
| }, |
| { |
| "epoch": 80.47834518422754, |
| "grad_norm": 2.003659248352051, |
| "learning_rate": 0.001, |
| "loss": 1.0747, |
| "step": 249000 |
| }, |
| { |
| "epoch": 80.51066580478346, |
| "grad_norm": 1.620818853378296, |
| "learning_rate": 0.001, |
| "loss": 1.0853, |
| "step": 249100 |
| }, |
| { |
| "epoch": 80.54298642533936, |
| "grad_norm": 1.9488434791564941, |
| "learning_rate": 0.001, |
| "loss": 1.0955, |
| "step": 249200 |
| }, |
| { |
| "epoch": 80.57530704589529, |
| "grad_norm": 1.8911210298538208, |
| "learning_rate": 0.001, |
| "loss": 1.1133, |
| "step": 249300 |
| }, |
| { |
| "epoch": 80.6076276664512, |
| "grad_norm": 2.607034206390381, |
| "learning_rate": 0.001, |
| "loss": 1.0753, |
| "step": 249400 |
| }, |
| { |
| "epoch": 80.63994828700712, |
| "grad_norm": 1.5252186059951782, |
| "learning_rate": 0.001, |
| "loss": 1.0995, |
| "step": 249500 |
| }, |
| { |
| "epoch": 80.67226890756302, |
| "grad_norm": 1.8382611274719238, |
| "learning_rate": 0.001, |
| "loss": 1.1067, |
| "step": 249600 |
| }, |
| { |
| "epoch": 80.70458952811894, |
| "grad_norm": 1.9256694316864014, |
| "learning_rate": 0.001, |
| "loss": 1.113, |
| "step": 249700 |
| }, |
| { |
| "epoch": 80.73691014867485, |
| "grad_norm": 2.985775947570801, |
| "learning_rate": 0.001, |
| "loss": 1.1075, |
| "step": 249800 |
| }, |
| { |
| "epoch": 80.76923076923077, |
| "grad_norm": 1.662009835243225, |
| "learning_rate": 0.001, |
| "loss": 1.1271, |
| "step": 249900 |
| }, |
| { |
| "epoch": 80.80155138978668, |
| "grad_norm": 2.561980724334717, |
| "learning_rate": 0.001, |
| "loss": 1.1167, |
| "step": 250000 |
| }, |
| { |
| "epoch": 80.8338720103426, |
| "grad_norm": 1.9441149234771729, |
| "learning_rate": 0.001, |
| "loss": 1.1162, |
| "step": 250100 |
| }, |
| { |
| "epoch": 80.86619263089851, |
| "grad_norm": 2.120574474334717, |
| "learning_rate": 0.001, |
| "loss": 1.1231, |
| "step": 250200 |
| }, |
| { |
| "epoch": 80.89851325145443, |
| "grad_norm": 1.7940996885299683, |
| "learning_rate": 0.001, |
| "loss": 1.1185, |
| "step": 250300 |
| }, |
| { |
| "epoch": 80.93083387201034, |
| "grad_norm": 2.060868263244629, |
| "learning_rate": 0.001, |
| "loss": 1.1226, |
| "step": 250400 |
| }, |
| { |
| "epoch": 80.96315449256626, |
| "grad_norm": 1.9433925151824951, |
| "learning_rate": 0.001, |
| "loss": 1.1511, |
| "step": 250500 |
| }, |
| { |
| "epoch": 80.99547511312217, |
| "grad_norm": 2.249880075454712, |
| "learning_rate": 0.001, |
| "loss": 1.1191, |
| "step": 250600 |
| }, |
| { |
| "epoch": 81.02779573367809, |
| "grad_norm": 2.3257598876953125, |
| "learning_rate": 0.001, |
| "loss": 1.0245, |
| "step": 250700 |
| }, |
| { |
| "epoch": 81.060116354234, |
| "grad_norm": 2.1970770359039307, |
| "learning_rate": 0.001, |
| "loss": 1.02, |
| "step": 250800 |
| }, |
| { |
| "epoch": 81.09243697478992, |
| "grad_norm": 1.8370674848556519, |
| "learning_rate": 0.001, |
| "loss": 1.0309, |
| "step": 250900 |
| }, |
| { |
| "epoch": 81.12475759534583, |
| "grad_norm": 2.330951452255249, |
| "learning_rate": 0.001, |
| "loss": 1.0438, |
| "step": 251000 |
| }, |
| { |
| "epoch": 81.15707821590175, |
| "grad_norm": 1.8591450452804565, |
| "learning_rate": 0.001, |
| "loss": 1.0488, |
| "step": 251100 |
| }, |
| { |
| "epoch": 81.18939883645766, |
| "grad_norm": 2.0385501384735107, |
| "learning_rate": 0.001, |
| "loss": 1.0499, |
| "step": 251200 |
| }, |
| { |
| "epoch": 81.22171945701358, |
| "grad_norm": 1.6502009630203247, |
| "learning_rate": 0.001, |
| "loss": 1.0435, |
| "step": 251300 |
| }, |
| { |
| "epoch": 81.25404007756948, |
| "grad_norm": 2.523043394088745, |
| "learning_rate": 0.001, |
| "loss": 1.0426, |
| "step": 251400 |
| }, |
| { |
| "epoch": 81.2863606981254, |
| "grad_norm": 2.149442434310913, |
| "learning_rate": 0.001, |
| "loss": 1.0617, |
| "step": 251500 |
| }, |
| { |
| "epoch": 81.31868131868131, |
| "grad_norm": 1.8482860326766968, |
| "learning_rate": 0.001, |
| "loss": 1.0735, |
| "step": 251600 |
| }, |
| { |
| "epoch": 81.35100193923724, |
| "grad_norm": 2.3649866580963135, |
| "learning_rate": 0.001, |
| "loss": 1.0445, |
| "step": 251700 |
| }, |
| { |
| "epoch": 81.38332255979314, |
| "grad_norm": 1.7468416690826416, |
| "learning_rate": 0.001, |
| "loss": 1.0568, |
| "step": 251800 |
| }, |
| { |
| "epoch": 81.41564318034906, |
| "grad_norm": 2.0502419471740723, |
| "learning_rate": 0.001, |
| "loss": 1.0767, |
| "step": 251900 |
| }, |
| { |
| "epoch": 81.44796380090497, |
| "grad_norm": 1.6154065132141113, |
| "learning_rate": 0.001, |
| "loss": 1.0785, |
| "step": 252000 |
| }, |
| { |
| "epoch": 81.4802844214609, |
| "grad_norm": 1.6533535718917847, |
| "learning_rate": 0.001, |
| "loss": 1.0839, |
| "step": 252100 |
| }, |
| { |
| "epoch": 81.5126050420168, |
| "grad_norm": 1.7687009572982788, |
| "learning_rate": 0.001, |
| "loss": 1.0721, |
| "step": 252200 |
| }, |
| { |
| "epoch": 81.54492566257272, |
| "grad_norm": 1.9510821104049683, |
| "learning_rate": 0.001, |
| "loss": 1.0825, |
| "step": 252300 |
| }, |
| { |
| "epoch": 81.57724628312863, |
| "grad_norm": 2.1591577529907227, |
| "learning_rate": 0.001, |
| "loss": 1.0866, |
| "step": 252400 |
| }, |
| { |
| "epoch": 81.60956690368455, |
| "grad_norm": 2.1044604778289795, |
| "learning_rate": 0.001, |
| "loss": 1.1099, |
| "step": 252500 |
| }, |
| { |
| "epoch": 81.64188752424046, |
| "grad_norm": 2.2758374214172363, |
| "learning_rate": 0.001, |
| "loss": 1.0963, |
| "step": 252600 |
| }, |
| { |
| "epoch": 81.67420814479638, |
| "grad_norm": 2.09716534614563, |
| "learning_rate": 0.001, |
| "loss": 1.0998, |
| "step": 252700 |
| }, |
| { |
| "epoch": 81.70652876535229, |
| "grad_norm": 2.193350076675415, |
| "learning_rate": 0.001, |
| "loss": 1.0906, |
| "step": 252800 |
| }, |
| { |
| "epoch": 81.73884938590821, |
| "grad_norm": 1.868490219116211, |
| "learning_rate": 0.001, |
| "loss": 1.1172, |
| "step": 252900 |
| }, |
| { |
| "epoch": 81.77117000646412, |
| "grad_norm": 2.276905059814453, |
| "learning_rate": 0.001, |
| "loss": 1.1076, |
| "step": 253000 |
| }, |
| { |
| "epoch": 81.80349062702004, |
| "grad_norm": 1.9997442960739136, |
| "learning_rate": 0.001, |
| "loss": 1.1122, |
| "step": 253100 |
| }, |
| { |
| "epoch": 81.83581124757595, |
| "grad_norm": 2.066814422607422, |
| "learning_rate": 0.001, |
| "loss": 1.1113, |
| "step": 253200 |
| }, |
| { |
| "epoch": 81.86813186813187, |
| "grad_norm": 2.0630595684051514, |
| "learning_rate": 0.001, |
| "loss": 1.14, |
| "step": 253300 |
| }, |
| { |
| "epoch": 81.90045248868778, |
| "grad_norm": 1.7941197156906128, |
| "learning_rate": 0.001, |
| "loss": 1.1236, |
| "step": 253400 |
| }, |
| { |
| "epoch": 81.9327731092437, |
| "grad_norm": 1.8599138259887695, |
| "learning_rate": 0.001, |
| "loss": 1.1066, |
| "step": 253500 |
| }, |
| { |
| "epoch": 81.9650937297996, |
| "grad_norm": 2.007969379425049, |
| "learning_rate": 0.001, |
| "loss": 1.1161, |
| "step": 253600 |
| }, |
| { |
| "epoch": 81.99741435035553, |
| "grad_norm": 2.2284483909606934, |
| "learning_rate": 0.001, |
| "loss": 1.1156, |
| "step": 253700 |
| }, |
| { |
| "epoch": 82.02973497091145, |
| "grad_norm": 2.0131847858428955, |
| "learning_rate": 0.001, |
| "loss": 1.0191, |
| "step": 253800 |
| }, |
| { |
| "epoch": 82.06205559146736, |
| "grad_norm": 2.0108609199523926, |
| "learning_rate": 0.001, |
| "loss": 1.01, |
| "step": 253900 |
| }, |
| { |
| "epoch": 82.09437621202328, |
| "grad_norm": 2.120751142501831, |
| "learning_rate": 0.001, |
| "loss": 1.0098, |
| "step": 254000 |
| }, |
| { |
| "epoch": 82.12669683257919, |
| "grad_norm": 2.449652671813965, |
| "learning_rate": 0.001, |
| "loss": 1.021, |
| "step": 254100 |
| }, |
| { |
| "epoch": 82.1590174531351, |
| "grad_norm": 2.4260470867156982, |
| "learning_rate": 0.001, |
| "loss": 1.0347, |
| "step": 254200 |
| }, |
| { |
| "epoch": 82.19133807369101, |
| "grad_norm": 2.6759181022644043, |
| "learning_rate": 0.001, |
| "loss": 1.0325, |
| "step": 254300 |
| }, |
| { |
| "epoch": 82.22365869424694, |
| "grad_norm": 2.0493171215057373, |
| "learning_rate": 0.001, |
| "loss": 1.0422, |
| "step": 254400 |
| }, |
| { |
| "epoch": 82.25597931480284, |
| "grad_norm": 2.4781503677368164, |
| "learning_rate": 0.001, |
| "loss": 1.0344, |
| "step": 254500 |
| }, |
| { |
| "epoch": 82.28829993535876, |
| "grad_norm": 3.0941216945648193, |
| "learning_rate": 0.001, |
| "loss": 1.0478, |
| "step": 254600 |
| }, |
| { |
| "epoch": 82.32062055591467, |
| "grad_norm": 2.149822235107422, |
| "learning_rate": 0.001, |
| "loss": 1.0428, |
| "step": 254700 |
| }, |
| { |
| "epoch": 82.3529411764706, |
| "grad_norm": 1.9726589918136597, |
| "learning_rate": 0.001, |
| "loss": 1.0629, |
| "step": 254800 |
| }, |
| { |
| "epoch": 82.3852617970265, |
| "grad_norm": 2.1840827465057373, |
| "learning_rate": 0.001, |
| "loss": 1.0648, |
| "step": 254900 |
| }, |
| { |
| "epoch": 82.41758241758242, |
| "grad_norm": 2.0328707695007324, |
| "learning_rate": 0.001, |
| "loss": 1.0667, |
| "step": 255000 |
| }, |
| { |
| "epoch": 82.44990303813833, |
| "grad_norm": 2.3106980323791504, |
| "learning_rate": 0.001, |
| "loss": 1.0656, |
| "step": 255100 |
| }, |
| { |
| "epoch": 82.48222365869425, |
| "grad_norm": 2.14380145072937, |
| "learning_rate": 0.001, |
| "loss": 1.0788, |
| "step": 255200 |
| }, |
| { |
| "epoch": 82.51454427925016, |
| "grad_norm": 1.6182340383529663, |
| "learning_rate": 0.001, |
| "loss": 1.0802, |
| "step": 255300 |
| }, |
| { |
| "epoch": 82.54686489980608, |
| "grad_norm": 2.0344672203063965, |
| "learning_rate": 0.001, |
| "loss": 1.0871, |
| "step": 255400 |
| }, |
| { |
| "epoch": 82.57918552036199, |
| "grad_norm": 1.7792701721191406, |
| "learning_rate": 0.001, |
| "loss": 1.0804, |
| "step": 255500 |
| }, |
| { |
| "epoch": 82.61150614091791, |
| "grad_norm": 2.1773808002471924, |
| "learning_rate": 0.001, |
| "loss": 1.0792, |
| "step": 255600 |
| }, |
| { |
| "epoch": 82.64382676147382, |
| "grad_norm": 1.9904121160507202, |
| "learning_rate": 0.001, |
| "loss": 1.0807, |
| "step": 255700 |
| }, |
| { |
| "epoch": 82.67614738202974, |
| "grad_norm": 2.2778069972991943, |
| "learning_rate": 0.001, |
| "loss": 1.0802, |
| "step": 255800 |
| }, |
| { |
| "epoch": 82.70846800258565, |
| "grad_norm": 2.273298978805542, |
| "learning_rate": 0.001, |
| "loss": 1.1049, |
| "step": 255900 |
| }, |
| { |
| "epoch": 82.74078862314157, |
| "grad_norm": 2.3957090377807617, |
| "learning_rate": 0.001, |
| "loss": 1.0932, |
| "step": 256000 |
| }, |
| { |
| "epoch": 82.77310924369748, |
| "grad_norm": 1.931657314300537, |
| "learning_rate": 0.001, |
| "loss": 1.1028, |
| "step": 256100 |
| }, |
| { |
| "epoch": 82.8054298642534, |
| "grad_norm": 1.7905445098876953, |
| "learning_rate": 0.001, |
| "loss": 1.0865, |
| "step": 256200 |
| }, |
| { |
| "epoch": 82.8377504848093, |
| "grad_norm": 1.855185627937317, |
| "learning_rate": 0.001, |
| "loss": 1.0979, |
| "step": 256300 |
| }, |
| { |
| "epoch": 82.87007110536523, |
| "grad_norm": 1.7903704643249512, |
| "learning_rate": 0.001, |
| "loss": 1.0997, |
| "step": 256400 |
| }, |
| { |
| "epoch": 82.90239172592113, |
| "grad_norm": 2.024670124053955, |
| "learning_rate": 0.001, |
| "loss": 1.0971, |
| "step": 256500 |
| }, |
| { |
| "epoch": 82.93471234647706, |
| "grad_norm": 2.054471492767334, |
| "learning_rate": 0.001, |
| "loss": 1.1191, |
| "step": 256600 |
| }, |
| { |
| "epoch": 82.96703296703296, |
| "grad_norm": 2.021584987640381, |
| "learning_rate": 0.001, |
| "loss": 1.1027, |
| "step": 256700 |
| }, |
| { |
| "epoch": 82.99935358758889, |
| "grad_norm": 2.977588176727295, |
| "learning_rate": 0.001, |
| "loss": 1.0963, |
| "step": 256800 |
| }, |
| { |
| "epoch": 83.03167420814479, |
| "grad_norm": 2.254401922225952, |
| "learning_rate": 0.001, |
| "loss": 0.9857, |
| "step": 256900 |
| }, |
| { |
| "epoch": 83.06399482870071, |
| "grad_norm": 2.0758557319641113, |
| "learning_rate": 0.001, |
| "loss": 0.9981, |
| "step": 257000 |
| }, |
| { |
| "epoch": 83.09631544925662, |
| "grad_norm": 2.851766586303711, |
| "learning_rate": 0.001, |
| "loss": 1.0049, |
| "step": 257100 |
| }, |
| { |
| "epoch": 83.12863606981254, |
| "grad_norm": 1.9387279748916626, |
| "learning_rate": 0.001, |
| "loss": 1.0158, |
| "step": 257200 |
| }, |
| { |
| "epoch": 83.16095669036845, |
| "grad_norm": 2.2792084217071533, |
| "learning_rate": 0.001, |
| "loss": 1.0109, |
| "step": 257300 |
| }, |
| { |
| "epoch": 83.19327731092437, |
| "grad_norm": 2.8158106803894043, |
| "learning_rate": 0.001, |
| "loss": 1.0325, |
| "step": 257400 |
| }, |
| { |
| "epoch": 83.22559793148028, |
| "grad_norm": 1.8931865692138672, |
| "learning_rate": 0.001, |
| "loss": 1.0084, |
| "step": 257500 |
| }, |
| { |
| "epoch": 83.2579185520362, |
| "grad_norm": 2.0553183555603027, |
| "learning_rate": 0.001, |
| "loss": 1.0223, |
| "step": 257600 |
| }, |
| { |
| "epoch": 83.29023917259211, |
| "grad_norm": 2.032785415649414, |
| "learning_rate": 0.001, |
| "loss": 1.0368, |
| "step": 257700 |
| }, |
| { |
| "epoch": 83.32255979314803, |
| "grad_norm": 2.276414632797241, |
| "learning_rate": 0.001, |
| "loss": 1.0434, |
| "step": 257800 |
| }, |
| { |
| "epoch": 83.35488041370394, |
| "grad_norm": 2.2769389152526855, |
| "learning_rate": 0.001, |
| "loss": 1.0405, |
| "step": 257900 |
| }, |
| { |
| "epoch": 83.38720103425986, |
| "grad_norm": 2.457798719406128, |
| "learning_rate": 0.001, |
| "loss": 1.0536, |
| "step": 258000 |
| }, |
| { |
| "epoch": 83.41952165481577, |
| "grad_norm": 2.427795886993408, |
| "learning_rate": 0.001, |
| "loss": 1.0574, |
| "step": 258100 |
| }, |
| { |
| "epoch": 83.45184227537169, |
| "grad_norm": 2.3485844135284424, |
| "learning_rate": 0.001, |
| "loss": 1.0604, |
| "step": 258200 |
| }, |
| { |
| "epoch": 83.4841628959276, |
| "grad_norm": 2.122995615005493, |
| "learning_rate": 0.001, |
| "loss": 1.0593, |
| "step": 258300 |
| }, |
| { |
| "epoch": 83.51648351648352, |
| "grad_norm": 2.6342482566833496, |
| "learning_rate": 0.001, |
| "loss": 1.0711, |
| "step": 258400 |
| }, |
| { |
| "epoch": 83.54880413703943, |
| "grad_norm": 2.1874687671661377, |
| "learning_rate": 0.001, |
| "loss": 1.063, |
| "step": 258500 |
| }, |
| { |
| "epoch": 83.58112475759535, |
| "grad_norm": 2.3595123291015625, |
| "learning_rate": 0.001, |
| "loss": 1.0756, |
| "step": 258600 |
| }, |
| { |
| "epoch": 83.61344537815125, |
| "grad_norm": 2.185002326965332, |
| "learning_rate": 0.001, |
| "loss": 1.0728, |
| "step": 258700 |
| }, |
| { |
| "epoch": 83.64576599870718, |
| "grad_norm": 2.350257158279419, |
| "learning_rate": 0.001, |
| "loss": 1.0819, |
| "step": 258800 |
| }, |
| { |
| "epoch": 83.67808661926308, |
| "grad_norm": 2.661860227584839, |
| "learning_rate": 0.001, |
| "loss": 1.0873, |
| "step": 258900 |
| }, |
| { |
| "epoch": 83.710407239819, |
| "grad_norm": 2.071593999862671, |
| "learning_rate": 0.001, |
| "loss": 1.0969, |
| "step": 259000 |
| }, |
| { |
| "epoch": 83.74272786037491, |
| "grad_norm": 2.097931146621704, |
| "learning_rate": 0.001, |
| "loss": 1.0821, |
| "step": 259100 |
| }, |
| { |
| "epoch": 83.77504848093083, |
| "grad_norm": 2.636651039123535, |
| "learning_rate": 0.001, |
| "loss": 1.0815, |
| "step": 259200 |
| }, |
| { |
| "epoch": 83.80736910148674, |
| "grad_norm": 2.398634433746338, |
| "learning_rate": 0.001, |
| "loss": 1.0849, |
| "step": 259300 |
| }, |
| { |
| "epoch": 83.83968972204266, |
| "grad_norm": 1.94718599319458, |
| "learning_rate": 0.001, |
| "loss": 1.0987, |
| "step": 259400 |
| }, |
| { |
| "epoch": 83.87201034259857, |
| "grad_norm": 2.3222267627716064, |
| "learning_rate": 0.001, |
| "loss": 1.0896, |
| "step": 259500 |
| }, |
| { |
| "epoch": 83.9043309631545, |
| "grad_norm": 2.0322906970977783, |
| "learning_rate": 0.001, |
| "loss": 1.0968, |
| "step": 259600 |
| }, |
| { |
| "epoch": 83.9366515837104, |
| "grad_norm": 2.293200731277466, |
| "learning_rate": 0.001, |
| "loss": 1.1018, |
| "step": 259700 |
| }, |
| { |
| "epoch": 83.96897220426632, |
| "grad_norm": 2.212113380432129, |
| "learning_rate": 0.001, |
| "loss": 1.1003, |
| "step": 259800 |
| }, |
| { |
| "epoch": 84.00129282482224, |
| "grad_norm": 1.9160456657409668, |
| "learning_rate": 0.001, |
| "loss": 1.1094, |
| "step": 259900 |
| }, |
| { |
| "epoch": 84.03361344537815, |
| "grad_norm": 2.0068440437316895, |
| "learning_rate": 0.001, |
| "loss": 0.9994, |
| "step": 260000 |
| }, |
| { |
| "epoch": 84.06593406593407, |
| "grad_norm": 1.8730653524398804, |
| "learning_rate": 0.001, |
| "loss": 0.9916, |
| "step": 260100 |
| }, |
| { |
| "epoch": 84.09825468648998, |
| "grad_norm": 1.9432926177978516, |
| "learning_rate": 0.001, |
| "loss": 0.9884, |
| "step": 260200 |
| }, |
| { |
| "epoch": 84.1305753070459, |
| "grad_norm": 2.5531952381134033, |
| "learning_rate": 0.001, |
| "loss": 1.0135, |
| "step": 260300 |
| }, |
| { |
| "epoch": 84.16289592760181, |
| "grad_norm": 1.827590823173523, |
| "learning_rate": 0.001, |
| "loss": 1.0184, |
| "step": 260400 |
| }, |
| { |
| "epoch": 84.19521654815773, |
| "grad_norm": 1.953426718711853, |
| "learning_rate": 0.001, |
| "loss": 1.0346, |
| "step": 260500 |
| }, |
| { |
| "epoch": 84.22753716871364, |
| "grad_norm": 1.895742416381836, |
| "learning_rate": 0.001, |
| "loss": 1.0192, |
| "step": 260600 |
| }, |
| { |
| "epoch": 84.25985778926956, |
| "grad_norm": 2.4154608249664307, |
| "learning_rate": 0.001, |
| "loss": 1.0212, |
| "step": 260700 |
| }, |
| { |
| "epoch": 84.29217840982547, |
| "grad_norm": 1.9845346212387085, |
| "learning_rate": 0.001, |
| "loss": 1.0385, |
| "step": 260800 |
| }, |
| { |
| "epoch": 84.32449903038139, |
| "grad_norm": 1.9528859853744507, |
| "learning_rate": 0.001, |
| "loss": 1.038, |
| "step": 260900 |
| }, |
| { |
| "epoch": 84.3568196509373, |
| "grad_norm": 1.5986595153808594, |
| "learning_rate": 0.001, |
| "loss": 1.0472, |
| "step": 261000 |
| }, |
| { |
| "epoch": 84.38914027149322, |
| "grad_norm": 2.879544734954834, |
| "learning_rate": 0.001, |
| "loss": 1.0387, |
| "step": 261100 |
| }, |
| { |
| "epoch": 84.42146089204913, |
| "grad_norm": 2.07099986076355, |
| "learning_rate": 0.001, |
| "loss": 1.0485, |
| "step": 261200 |
| }, |
| { |
| "epoch": 84.45378151260505, |
| "grad_norm": 1.9697378873825073, |
| "learning_rate": 0.001, |
| "loss": 1.046, |
| "step": 261300 |
| }, |
| { |
| "epoch": 84.48610213316095, |
| "grad_norm": 2.495154619216919, |
| "learning_rate": 0.001, |
| "loss": 1.0427, |
| "step": 261400 |
| }, |
| { |
| "epoch": 84.51842275371688, |
| "grad_norm": 2.241727113723755, |
| "learning_rate": 0.001, |
| "loss": 1.0599, |
| "step": 261500 |
| }, |
| { |
| "epoch": 84.55074337427278, |
| "grad_norm": 2.0560214519500732, |
| "learning_rate": 0.001, |
| "loss": 1.0515, |
| "step": 261600 |
| }, |
| { |
| "epoch": 84.5830639948287, |
| "grad_norm": 2.1529815196990967, |
| "learning_rate": 0.001, |
| "loss": 1.0516, |
| "step": 261700 |
| }, |
| { |
| "epoch": 84.61538461538461, |
| "grad_norm": 2.4618332386016846, |
| "learning_rate": 0.001, |
| "loss": 1.0721, |
| "step": 261800 |
| }, |
| { |
| "epoch": 84.64770523594053, |
| "grad_norm": 1.9415512084960938, |
| "learning_rate": 0.001, |
| "loss": 1.0545, |
| "step": 261900 |
| }, |
| { |
| "epoch": 84.68002585649644, |
| "grad_norm": 40.67611312866211, |
| "learning_rate": 0.001, |
| "loss": 1.0695, |
| "step": 262000 |
| }, |
| { |
| "epoch": 84.71234647705236, |
| "grad_norm": 1.830438256263733, |
| "learning_rate": 0.001, |
| "loss": 1.0702, |
| "step": 262100 |
| }, |
| { |
| "epoch": 84.74466709760827, |
| "grad_norm": 2.0758326053619385, |
| "learning_rate": 0.001, |
| "loss": 1.0715, |
| "step": 262200 |
| }, |
| { |
| "epoch": 84.7769877181642, |
| "grad_norm": 1.9768143892288208, |
| "learning_rate": 0.001, |
| "loss": 1.0926, |
| "step": 262300 |
| }, |
| { |
| "epoch": 84.8093083387201, |
| "grad_norm": 2.4921510219573975, |
| "learning_rate": 0.001, |
| "loss": 1.0887, |
| "step": 262400 |
| }, |
| { |
| "epoch": 84.84162895927602, |
| "grad_norm": 1.8019990921020508, |
| "learning_rate": 0.001, |
| "loss": 1.1003, |
| "step": 262500 |
| }, |
| { |
| "epoch": 84.87394957983193, |
| "grad_norm": 1.7619433403015137, |
| "learning_rate": 0.001, |
| "loss": 1.0909, |
| "step": 262600 |
| }, |
| { |
| "epoch": 84.90627020038785, |
| "grad_norm": 2.410701274871826, |
| "learning_rate": 0.001, |
| "loss": 1.094, |
| "step": 262700 |
| }, |
| { |
| "epoch": 84.93859082094376, |
| "grad_norm": 2.021223306655884, |
| "learning_rate": 0.001, |
| "loss": 1.0909, |
| "step": 262800 |
| }, |
| { |
| "epoch": 84.97091144149968, |
| "grad_norm": 10.952349662780762, |
| "learning_rate": 0.001, |
| "loss": 1.1052, |
| "step": 262900 |
| }, |
| { |
| "epoch": 85.00323206205559, |
| "grad_norm": 1.784826636314392, |
| "learning_rate": 0.001, |
| "loss": 1.1001, |
| "step": 263000 |
| }, |
| { |
| "epoch": 85.03555268261151, |
| "grad_norm": 2.587707042694092, |
| "learning_rate": 0.001, |
| "loss": 0.9838, |
| "step": 263100 |
| }, |
| { |
| "epoch": 85.06787330316742, |
| "grad_norm": 2.302337169647217, |
| "learning_rate": 0.001, |
| "loss": 0.9897, |
| "step": 263200 |
| }, |
| { |
| "epoch": 85.10019392372334, |
| "grad_norm": 1.8791141510009766, |
| "learning_rate": 0.001, |
| "loss": 0.9905, |
| "step": 263300 |
| }, |
| { |
| "epoch": 85.13251454427925, |
| "grad_norm": 1.891874074935913, |
| "learning_rate": 0.001, |
| "loss": 0.9889, |
| "step": 263400 |
| }, |
| { |
| "epoch": 85.16483516483517, |
| "grad_norm": 2.0654654502868652, |
| "learning_rate": 0.001, |
| "loss": 0.9969, |
| "step": 263500 |
| }, |
| { |
| "epoch": 85.19715578539108, |
| "grad_norm": 2.999326229095459, |
| "learning_rate": 0.001, |
| "loss": 1.0026, |
| "step": 263600 |
| }, |
| { |
| "epoch": 85.229476405947, |
| "grad_norm": 2.2999064922332764, |
| "learning_rate": 0.001, |
| "loss": 1.0114, |
| "step": 263700 |
| }, |
| { |
| "epoch": 85.2617970265029, |
| "grad_norm": 2.1461431980133057, |
| "learning_rate": 0.001, |
| "loss": 1.0403, |
| "step": 263800 |
| }, |
| { |
| "epoch": 85.29411764705883, |
| "grad_norm": 1.715585708618164, |
| "learning_rate": 0.001, |
| "loss": 1.0147, |
| "step": 263900 |
| }, |
| { |
| "epoch": 85.32643826761473, |
| "grad_norm": 2.0307717323303223, |
| "learning_rate": 0.001, |
| "loss": 1.0311, |
| "step": 264000 |
| }, |
| { |
| "epoch": 85.35875888817066, |
| "grad_norm": 1.924255609512329, |
| "learning_rate": 0.001, |
| "loss": 1.0376, |
| "step": 264100 |
| }, |
| { |
| "epoch": 85.39107950872656, |
| "grad_norm": 3.5688138008117676, |
| "learning_rate": 0.001, |
| "loss": 1.0331, |
| "step": 264200 |
| }, |
| { |
| "epoch": 85.42340012928248, |
| "grad_norm": 1.627633810043335, |
| "learning_rate": 0.001, |
| "loss": 1.0413, |
| "step": 264300 |
| }, |
| { |
| "epoch": 85.45572074983839, |
| "grad_norm": 2.091357946395874, |
| "learning_rate": 0.001, |
| "loss": 1.0438, |
| "step": 264400 |
| }, |
| { |
| "epoch": 85.48804137039431, |
| "grad_norm": 1.7921959161758423, |
| "learning_rate": 0.001, |
| "loss": 1.0332, |
| "step": 264500 |
| }, |
| { |
| "epoch": 85.52036199095022, |
| "grad_norm": 1.9144126176834106, |
| "learning_rate": 0.001, |
| "loss": 1.0537, |
| "step": 264600 |
| }, |
| { |
| "epoch": 85.55268261150614, |
| "grad_norm": 1.995881199836731, |
| "learning_rate": 0.001, |
| "loss": 1.0525, |
| "step": 264700 |
| }, |
| { |
| "epoch": 85.58500323206205, |
| "grad_norm": 1.8649122714996338, |
| "learning_rate": 0.001, |
| "loss": 1.0506, |
| "step": 264800 |
| }, |
| { |
| "epoch": 85.61732385261797, |
| "grad_norm": 1.7572340965270996, |
| "learning_rate": 0.001, |
| "loss": 1.0468, |
| "step": 264900 |
| }, |
| { |
| "epoch": 85.64964447317388, |
| "grad_norm": 2.5630545616149902, |
| "learning_rate": 0.001, |
| "loss": 1.0482, |
| "step": 265000 |
| }, |
| { |
| "epoch": 85.6819650937298, |
| "grad_norm": 1.790952205657959, |
| "learning_rate": 0.001, |
| "loss": 1.0607, |
| "step": 265100 |
| }, |
| { |
| "epoch": 85.71428571428571, |
| "grad_norm": 1.7042549848556519, |
| "learning_rate": 0.001, |
| "loss": 1.0452, |
| "step": 265200 |
| }, |
| { |
| "epoch": 85.74660633484163, |
| "grad_norm": 2.4065299034118652, |
| "learning_rate": 0.001, |
| "loss": 1.0821, |
| "step": 265300 |
| }, |
| { |
| "epoch": 85.77892695539754, |
| "grad_norm": 1.7829989194869995, |
| "learning_rate": 0.001, |
| "loss": 1.0725, |
| "step": 265400 |
| }, |
| { |
| "epoch": 85.81124757595346, |
| "grad_norm": 2.1033124923706055, |
| "learning_rate": 0.001, |
| "loss": 1.0794, |
| "step": 265500 |
| }, |
| { |
| "epoch": 85.84356819650937, |
| "grad_norm": 1.9515258073806763, |
| "learning_rate": 0.001, |
| "loss": 1.0688, |
| "step": 265600 |
| }, |
| { |
| "epoch": 85.87588881706529, |
| "grad_norm": 1.796294927597046, |
| "learning_rate": 0.001, |
| "loss": 1.0845, |
| "step": 265700 |
| }, |
| { |
| "epoch": 85.9082094376212, |
| "grad_norm": 1.7326546907424927, |
| "learning_rate": 0.001, |
| "loss": 1.0781, |
| "step": 265800 |
| }, |
| { |
| "epoch": 85.94053005817712, |
| "grad_norm": 1.7627480030059814, |
| "learning_rate": 0.001, |
| "loss": 1.0979, |
| "step": 265900 |
| }, |
| { |
| "epoch": 85.97285067873302, |
| "grad_norm": 2.2406697273254395, |
| "learning_rate": 0.001, |
| "loss": 1.0872, |
| "step": 266000 |
| }, |
| { |
| "epoch": 86.00517129928895, |
| "grad_norm": 1.6462572813034058, |
| "learning_rate": 0.001, |
| "loss": 1.08, |
| "step": 266100 |
| }, |
| { |
| "epoch": 86.03749191984487, |
| "grad_norm": 1.6872225999832153, |
| "learning_rate": 0.001, |
| "loss": 0.9723, |
| "step": 266200 |
| }, |
| { |
| "epoch": 86.06981254040078, |
| "grad_norm": 2.2746024131774902, |
| "learning_rate": 0.001, |
| "loss": 0.9815, |
| "step": 266300 |
| }, |
| { |
| "epoch": 86.1021331609567, |
| "grad_norm": 1.6541063785552979, |
| "learning_rate": 0.001, |
| "loss": 0.9831, |
| "step": 266400 |
| }, |
| { |
| "epoch": 86.1344537815126, |
| "grad_norm": 1.9259800910949707, |
| "learning_rate": 0.001, |
| "loss": 0.9992, |
| "step": 266500 |
| }, |
| { |
| "epoch": 86.16677440206853, |
| "grad_norm": 1.8844987154006958, |
| "learning_rate": 0.001, |
| "loss": 0.9949, |
| "step": 266600 |
| }, |
| { |
| "epoch": 86.19909502262443, |
| "grad_norm": 1.5561342239379883, |
| "learning_rate": 0.001, |
| "loss": 0.9989, |
| "step": 266700 |
| }, |
| { |
| "epoch": 86.23141564318036, |
| "grad_norm": 2.4345591068267822, |
| "learning_rate": 0.001, |
| "loss": 0.9958, |
| "step": 266800 |
| }, |
| { |
| "epoch": 86.26373626373626, |
| "grad_norm": 1.8101435899734497, |
| "learning_rate": 0.001, |
| "loss": 1.0139, |
| "step": 266900 |
| }, |
| { |
| "epoch": 86.29605688429218, |
| "grad_norm": 2.0286176204681396, |
| "learning_rate": 0.001, |
| "loss": 1.0251, |
| "step": 267000 |
| }, |
| { |
| "epoch": 86.32837750484809, |
| "grad_norm": 1.9802911281585693, |
| "learning_rate": 0.001, |
| "loss": 1.0139, |
| "step": 267100 |
| }, |
| { |
| "epoch": 86.36069812540401, |
| "grad_norm": 1.6296907663345337, |
| "learning_rate": 0.001, |
| "loss": 1.0182, |
| "step": 267200 |
| }, |
| { |
| "epoch": 86.39301874595992, |
| "grad_norm": 2.130852460861206, |
| "learning_rate": 0.001, |
| "loss": 1.0272, |
| "step": 267300 |
| }, |
| { |
| "epoch": 86.42533936651584, |
| "grad_norm": 1.6675856113433838, |
| "learning_rate": 0.001, |
| "loss": 1.0309, |
| "step": 267400 |
| }, |
| { |
| "epoch": 86.45765998707175, |
| "grad_norm": 1.873880386352539, |
| "learning_rate": 0.001, |
| "loss": 1.02, |
| "step": 267500 |
| }, |
| { |
| "epoch": 86.48998060762767, |
| "grad_norm": 1.6466994285583496, |
| "learning_rate": 0.001, |
| "loss": 1.036, |
| "step": 267600 |
| }, |
| { |
| "epoch": 86.52230122818358, |
| "grad_norm": 2.3659939765930176, |
| "learning_rate": 0.001, |
| "loss": 1.0379, |
| "step": 267700 |
| }, |
| { |
| "epoch": 86.5546218487395, |
| "grad_norm": 1.846489667892456, |
| "learning_rate": 0.001, |
| "loss": 1.0386, |
| "step": 267800 |
| }, |
| { |
| "epoch": 86.58694246929541, |
| "grad_norm": 1.9823304414749146, |
| "learning_rate": 0.001, |
| "loss": 1.0549, |
| "step": 267900 |
| }, |
| { |
| "epoch": 86.61926308985133, |
| "grad_norm": 2.0953738689422607, |
| "learning_rate": 0.001, |
| "loss": 1.0404, |
| "step": 268000 |
| }, |
| { |
| "epoch": 86.65158371040724, |
| "grad_norm": 5.9522705078125, |
| "learning_rate": 0.001, |
| "loss": 1.0526, |
| "step": 268100 |
| }, |
| { |
| "epoch": 86.68390433096316, |
| "grad_norm": 1.8562268018722534, |
| "learning_rate": 0.001, |
| "loss": 1.0652, |
| "step": 268200 |
| }, |
| { |
| "epoch": 86.71622495151907, |
| "grad_norm": 1.8963710069656372, |
| "learning_rate": 0.001, |
| "loss": 1.0499, |
| "step": 268300 |
| }, |
| { |
| "epoch": 86.74854557207499, |
| "grad_norm": 1.8700145483016968, |
| "learning_rate": 0.001, |
| "loss": 1.0587, |
| "step": 268400 |
| }, |
| { |
| "epoch": 86.7808661926309, |
| "grad_norm": 1.7103973627090454, |
| "learning_rate": 0.001, |
| "loss": 1.0584, |
| "step": 268500 |
| }, |
| { |
| "epoch": 86.81318681318682, |
| "grad_norm": 1.864179253578186, |
| "learning_rate": 0.001, |
| "loss": 1.0672, |
| "step": 268600 |
| }, |
| { |
| "epoch": 86.84550743374272, |
| "grad_norm": 2.295189380645752, |
| "learning_rate": 0.001, |
| "loss": 1.0639, |
| "step": 268700 |
| }, |
| { |
| "epoch": 86.87782805429865, |
| "grad_norm": 1.8950257301330566, |
| "learning_rate": 0.001, |
| "loss": 1.0659, |
| "step": 268800 |
| }, |
| { |
| "epoch": 86.91014867485455, |
| "grad_norm": 2.016186237335205, |
| "learning_rate": 0.001, |
| "loss": 1.0682, |
| "step": 268900 |
| }, |
| { |
| "epoch": 86.94246929541048, |
| "grad_norm": 1.8546134233474731, |
| "learning_rate": 0.001, |
| "loss": 1.0724, |
| "step": 269000 |
| }, |
| { |
| "epoch": 86.97478991596638, |
| "grad_norm": 1.4972987174987793, |
| "learning_rate": 0.001, |
| "loss": 1.0884, |
| "step": 269100 |
| }, |
| { |
| "epoch": 87.0071105365223, |
| "grad_norm": 2.2357635498046875, |
| "learning_rate": 0.001, |
| "loss": 1.0689, |
| "step": 269200 |
| }, |
| { |
| "epoch": 87.03943115707821, |
| "grad_norm": 1.957870602607727, |
| "learning_rate": 0.001, |
| "loss": 0.9617, |
| "step": 269300 |
| }, |
| { |
| "epoch": 87.07175177763413, |
| "grad_norm": 1.9988821744918823, |
| "learning_rate": 0.001, |
| "loss": 0.9642, |
| "step": 269400 |
| }, |
| { |
| "epoch": 87.10407239819004, |
| "grad_norm": 1.902817964553833, |
| "learning_rate": 0.001, |
| "loss": 0.9724, |
| "step": 269500 |
| }, |
| { |
| "epoch": 87.13639301874596, |
| "grad_norm": 1.8384301662445068, |
| "learning_rate": 0.001, |
| "loss": 0.9825, |
| "step": 269600 |
| }, |
| { |
| "epoch": 87.16871363930187, |
| "grad_norm": 1.9688271284103394, |
| "learning_rate": 0.001, |
| "loss": 0.9702, |
| "step": 269700 |
| }, |
| { |
| "epoch": 87.20103425985779, |
| "grad_norm": 1.5213277339935303, |
| "learning_rate": 0.001, |
| "loss": 0.9891, |
| "step": 269800 |
| }, |
| { |
| "epoch": 87.2333548804137, |
| "grad_norm": 1.8877270221710205, |
| "learning_rate": 0.001, |
| "loss": 0.9974, |
| "step": 269900 |
| }, |
| { |
| "epoch": 87.26567550096962, |
| "grad_norm": 1.5288257598876953, |
| "learning_rate": 0.001, |
| "loss": 0.999, |
| "step": 270000 |
| }, |
| { |
| "epoch": 87.29799612152553, |
| "grad_norm": 1.6828035116195679, |
| "learning_rate": 0.001, |
| "loss": 1.0073, |
| "step": 270100 |
| }, |
| { |
| "epoch": 87.33031674208145, |
| "grad_norm": 1.9386004209518433, |
| "learning_rate": 0.001, |
| "loss": 1.0163, |
| "step": 270200 |
| }, |
| { |
| "epoch": 87.36263736263736, |
| "grad_norm": 1.8773216009140015, |
| "learning_rate": 0.001, |
| "loss": 1.0245, |
| "step": 270300 |
| }, |
| { |
| "epoch": 87.39495798319328, |
| "grad_norm": 1.8428542613983154, |
| "learning_rate": 0.001, |
| "loss": 1.0207, |
| "step": 270400 |
| }, |
| { |
| "epoch": 87.42727860374919, |
| "grad_norm": 2.0465738773345947, |
| "learning_rate": 0.001, |
| "loss": 1.0034, |
| "step": 270500 |
| }, |
| { |
| "epoch": 87.45959922430511, |
| "grad_norm": 2.624429225921631, |
| "learning_rate": 0.001, |
| "loss": 1.0296, |
| "step": 270600 |
| }, |
| { |
| "epoch": 87.49191984486102, |
| "grad_norm": 2.0403313636779785, |
| "learning_rate": 0.001, |
| "loss": 1.0304, |
| "step": 270700 |
| }, |
| { |
| "epoch": 87.52424046541694, |
| "grad_norm": 1.6845457553863525, |
| "learning_rate": 0.001, |
| "loss": 1.0235, |
| "step": 270800 |
| }, |
| { |
| "epoch": 87.55656108597285, |
| "grad_norm": 1.5582964420318604, |
| "learning_rate": 0.001, |
| "loss": 1.0235, |
| "step": 270900 |
| }, |
| { |
| "epoch": 87.58888170652877, |
| "grad_norm": 2.086839199066162, |
| "learning_rate": 0.001, |
| "loss": 1.0346, |
| "step": 271000 |
| }, |
| { |
| "epoch": 87.62120232708467, |
| "grad_norm": 1.4940403699874878, |
| "learning_rate": 0.001, |
| "loss": 1.0434, |
| "step": 271100 |
| }, |
| { |
| "epoch": 87.6535229476406, |
| "grad_norm": 1.7732195854187012, |
| "learning_rate": 0.001, |
| "loss": 1.0391, |
| "step": 271200 |
| }, |
| { |
| "epoch": 87.6858435681965, |
| "grad_norm": 1.9552558660507202, |
| "learning_rate": 0.001, |
| "loss": 1.0588, |
| "step": 271300 |
| }, |
| { |
| "epoch": 87.71816418875243, |
| "grad_norm": 1.6922935247421265, |
| "learning_rate": 0.001, |
| "loss": 1.0525, |
| "step": 271400 |
| }, |
| { |
| "epoch": 87.75048480930833, |
| "grad_norm": 1.606885552406311, |
| "learning_rate": 0.001, |
| "loss": 1.0609, |
| "step": 271500 |
| }, |
| { |
| "epoch": 87.78280542986425, |
| "grad_norm": 1.7126843929290771, |
| "learning_rate": 0.001, |
| "loss": 1.0544, |
| "step": 271600 |
| }, |
| { |
| "epoch": 87.81512605042016, |
| "grad_norm": 1.9816648960113525, |
| "learning_rate": 0.001, |
| "loss": 1.0636, |
| "step": 271700 |
| }, |
| { |
| "epoch": 87.84744667097608, |
| "grad_norm": 1.776976466178894, |
| "learning_rate": 0.001, |
| "loss": 1.0594, |
| "step": 271800 |
| }, |
| { |
| "epoch": 87.87976729153199, |
| "grad_norm": 2.1870553493499756, |
| "learning_rate": 0.001, |
| "loss": 1.0482, |
| "step": 271900 |
| }, |
| { |
| "epoch": 87.91208791208791, |
| "grad_norm": 1.7203819751739502, |
| "learning_rate": 0.001, |
| "loss": 1.0711, |
| "step": 272000 |
| }, |
| { |
| "epoch": 87.94440853264382, |
| "grad_norm": 1.6706738471984863, |
| "learning_rate": 0.001, |
| "loss": 1.0778, |
| "step": 272100 |
| }, |
| { |
| "epoch": 87.97672915319974, |
| "grad_norm": 2.048996925354004, |
| "learning_rate": 0.001, |
| "loss": 1.071, |
| "step": 272200 |
| }, |
| { |
| "epoch": 88.00904977375566, |
| "grad_norm": 1.791468858718872, |
| "learning_rate": 0.001, |
| "loss": 1.0356, |
| "step": 272300 |
| }, |
| { |
| "epoch": 88.04137039431157, |
| "grad_norm": 2.0778145790100098, |
| "learning_rate": 0.001, |
| "loss": 0.9626, |
| "step": 272400 |
| }, |
| { |
| "epoch": 88.07369101486749, |
| "grad_norm": 2.016916275024414, |
| "learning_rate": 0.001, |
| "loss": 0.9573, |
| "step": 272500 |
| }, |
| { |
| "epoch": 88.1060116354234, |
| "grad_norm": 1.8074690103530884, |
| "learning_rate": 0.001, |
| "loss": 0.9704, |
| "step": 272600 |
| }, |
| { |
| "epoch": 88.13833225597932, |
| "grad_norm": 1.7361093759536743, |
| "learning_rate": 0.001, |
| "loss": 0.9908, |
| "step": 272700 |
| }, |
| { |
| "epoch": 88.17065287653523, |
| "grad_norm": 1.7573654651641846, |
| "learning_rate": 0.001, |
| "loss": 0.9922, |
| "step": 272800 |
| }, |
| { |
| "epoch": 88.20297349709115, |
| "grad_norm": 1.6067391633987427, |
| "learning_rate": 0.001, |
| "loss": 0.9822, |
| "step": 272900 |
| }, |
| { |
| "epoch": 88.23529411764706, |
| "grad_norm": 1.9911582469940186, |
| "learning_rate": 0.001, |
| "loss": 0.9969, |
| "step": 273000 |
| }, |
| { |
| "epoch": 88.26761473820298, |
| "grad_norm": 1.9228503704071045, |
| "learning_rate": 0.001, |
| "loss": 0.9908, |
| "step": 273100 |
| }, |
| { |
| "epoch": 88.29993535875889, |
| "grad_norm": 1.954790711402893, |
| "learning_rate": 0.001, |
| "loss": 0.9979, |
| "step": 273200 |
| }, |
| { |
| "epoch": 88.33225597931481, |
| "grad_norm": 2.02158784866333, |
| "learning_rate": 0.001, |
| "loss": 1.0014, |
| "step": 273300 |
| }, |
| { |
| "epoch": 88.36457659987072, |
| "grad_norm": 1.9888592958450317, |
| "learning_rate": 0.001, |
| "loss": 0.9951, |
| "step": 273400 |
| }, |
| { |
| "epoch": 88.39689722042664, |
| "grad_norm": 1.7298367023468018, |
| "learning_rate": 0.001, |
| "loss": 1.0075, |
| "step": 273500 |
| }, |
| { |
| "epoch": 88.42921784098255, |
| "grad_norm": 1.7865296602249146, |
| "learning_rate": 0.001, |
| "loss": 1.0108, |
| "step": 273600 |
| }, |
| { |
| "epoch": 88.46153846153847, |
| "grad_norm": 2.6155149936676025, |
| "learning_rate": 0.001, |
| "loss": 1.0153, |
| "step": 273700 |
| }, |
| { |
| "epoch": 88.49385908209437, |
| "grad_norm": 1.765749216079712, |
| "learning_rate": 0.001, |
| "loss": 1.0148, |
| "step": 273800 |
| }, |
| { |
| "epoch": 88.5261797026503, |
| "grad_norm": 1.943948745727539, |
| "learning_rate": 0.001, |
| "loss": 1.017, |
| "step": 273900 |
| }, |
| { |
| "epoch": 88.5585003232062, |
| "grad_norm": 1.7162889242172241, |
| "learning_rate": 0.001, |
| "loss": 1.0314, |
| "step": 274000 |
| }, |
| { |
| "epoch": 88.59082094376213, |
| "grad_norm": 1.5523930788040161, |
| "learning_rate": 0.001, |
| "loss": 1.0302, |
| "step": 274100 |
| }, |
| { |
| "epoch": 88.62314156431803, |
| "grad_norm": 1.733382225036621, |
| "learning_rate": 0.001, |
| "loss": 1.0303, |
| "step": 274200 |
| }, |
| { |
| "epoch": 88.65546218487395, |
| "grad_norm": 2.412778615951538, |
| "learning_rate": 0.001, |
| "loss": 1.0385, |
| "step": 274300 |
| }, |
| { |
| "epoch": 88.68778280542986, |
| "grad_norm": 1.9336631298065186, |
| "learning_rate": 0.001, |
| "loss": 1.0359, |
| "step": 274400 |
| }, |
| { |
| "epoch": 88.72010342598578, |
| "grad_norm": 2.2607991695404053, |
| "learning_rate": 0.001, |
| "loss": 1.0412, |
| "step": 274500 |
| }, |
| { |
| "epoch": 88.75242404654169, |
| "grad_norm": 1.6918398141860962, |
| "learning_rate": 0.001, |
| "loss": 1.056, |
| "step": 274600 |
| }, |
| { |
| "epoch": 88.78474466709761, |
| "grad_norm": 1.6877381801605225, |
| "learning_rate": 0.001, |
| "loss": 1.0467, |
| "step": 274700 |
| }, |
| { |
| "epoch": 88.81706528765352, |
| "grad_norm": 1.8707000017166138, |
| "learning_rate": 0.001, |
| "loss": 1.0523, |
| "step": 274800 |
| }, |
| { |
| "epoch": 88.84938590820944, |
| "grad_norm": 1.7763044834136963, |
| "learning_rate": 0.001, |
| "loss": 1.044, |
| "step": 274900 |
| }, |
| { |
| "epoch": 88.88170652876535, |
| "grad_norm": 2.0772578716278076, |
| "learning_rate": 0.001, |
| "loss": 1.0556, |
| "step": 275000 |
| }, |
| { |
| "epoch": 88.91402714932127, |
| "grad_norm": 1.7194854021072388, |
| "learning_rate": 0.001, |
| "loss": 1.0596, |
| "step": 275100 |
| }, |
| { |
| "epoch": 88.94634776987718, |
| "grad_norm": 2.1079659461975098, |
| "learning_rate": 0.001, |
| "loss": 1.0678, |
| "step": 275200 |
| }, |
| { |
| "epoch": 88.9786683904331, |
| "grad_norm": 1.841643214225769, |
| "learning_rate": 0.001, |
| "loss": 1.0617, |
| "step": 275300 |
| }, |
| { |
| "epoch": 89.01098901098901, |
| "grad_norm": 1.7587463855743408, |
| "learning_rate": 0.001, |
| "loss": 1.0222, |
| "step": 275400 |
| }, |
| { |
| "epoch": 89.04330963154493, |
| "grad_norm": 1.8258470296859741, |
| "learning_rate": 0.001, |
| "loss": 0.9631, |
| "step": 275500 |
| }, |
| { |
| "epoch": 89.07563025210084, |
| "grad_norm": 1.7834150791168213, |
| "learning_rate": 0.001, |
| "loss": 0.9544, |
| "step": 275600 |
| }, |
| { |
| "epoch": 89.10795087265676, |
| "grad_norm": 1.8407515287399292, |
| "learning_rate": 0.001, |
| "loss": 0.9701, |
| "step": 275700 |
| }, |
| { |
| "epoch": 89.14027149321267, |
| "grad_norm": 1.9101598262786865, |
| "learning_rate": 0.001, |
| "loss": 0.9584, |
| "step": 275800 |
| }, |
| { |
| "epoch": 89.17259211376859, |
| "grad_norm": 1.7279962301254272, |
| "learning_rate": 0.001, |
| "loss": 0.983, |
| "step": 275900 |
| }, |
| { |
| "epoch": 89.2049127343245, |
| "grad_norm": 1.8472046852111816, |
| "learning_rate": 0.001, |
| "loss": 0.9722, |
| "step": 276000 |
| }, |
| { |
| "epoch": 89.23723335488042, |
| "grad_norm": 1.6440308094024658, |
| "learning_rate": 0.001, |
| "loss": 0.984, |
| "step": 276100 |
| }, |
| { |
| "epoch": 89.26955397543632, |
| "grad_norm": 1.48750901222229, |
| "learning_rate": 0.001, |
| "loss": 0.9765, |
| "step": 276200 |
| }, |
| { |
| "epoch": 89.30187459599225, |
| "grad_norm": 1.7771786451339722, |
| "learning_rate": 0.001, |
| "loss": 1.0027, |
| "step": 276300 |
| }, |
| { |
| "epoch": 89.33419521654815, |
| "grad_norm": 1.5432380437850952, |
| "learning_rate": 0.001, |
| "loss": 0.9903, |
| "step": 276400 |
| }, |
| { |
| "epoch": 89.36651583710407, |
| "grad_norm": 2.1023707389831543, |
| "learning_rate": 0.001, |
| "loss": 0.9946, |
| "step": 276500 |
| }, |
| { |
| "epoch": 89.39883645765998, |
| "grad_norm": 1.942596435546875, |
| "learning_rate": 0.001, |
| "loss": 1.0045, |
| "step": 276600 |
| }, |
| { |
| "epoch": 89.4311570782159, |
| "grad_norm": 2.248884916305542, |
| "learning_rate": 0.001, |
| "loss": 0.9991, |
| "step": 276700 |
| }, |
| { |
| "epoch": 89.46347769877181, |
| "grad_norm": 2.2643485069274902, |
| "learning_rate": 0.001, |
| "loss": 1.0091, |
| "step": 276800 |
| }, |
| { |
| "epoch": 89.49579831932773, |
| "grad_norm": 2.112210273742676, |
| "learning_rate": 0.001, |
| "loss": 1.0035, |
| "step": 276900 |
| }, |
| { |
| "epoch": 89.52811893988364, |
| "grad_norm": 2.1388866901397705, |
| "learning_rate": 0.001, |
| "loss": 1.0105, |
| "step": 277000 |
| }, |
| { |
| "epoch": 89.56043956043956, |
| "grad_norm": 1.910740613937378, |
| "learning_rate": 0.001, |
| "loss": 1.0142, |
| "step": 277100 |
| }, |
| { |
| "epoch": 89.59276018099547, |
| "grad_norm": 2.0235660076141357, |
| "learning_rate": 0.001, |
| "loss": 1.0254, |
| "step": 277200 |
| }, |
| { |
| "epoch": 89.62508080155139, |
| "grad_norm": 1.9163745641708374, |
| "learning_rate": 0.001, |
| "loss": 1.0277, |
| "step": 277300 |
| }, |
| { |
| "epoch": 89.6574014221073, |
| "grad_norm": 1.8493446111679077, |
| "learning_rate": 0.001, |
| "loss": 1.0239, |
| "step": 277400 |
| }, |
| { |
| "epoch": 89.68972204266322, |
| "grad_norm": 2.082435131072998, |
| "learning_rate": 0.001, |
| "loss": 1.0255, |
| "step": 277500 |
| }, |
| { |
| "epoch": 89.72204266321913, |
| "grad_norm": 1.9139171838760376, |
| "learning_rate": 0.001, |
| "loss": 1.0456, |
| "step": 277600 |
| }, |
| { |
| "epoch": 89.75436328377505, |
| "grad_norm": 1.9875322580337524, |
| "learning_rate": 0.001, |
| "loss": 1.034, |
| "step": 277700 |
| }, |
| { |
| "epoch": 89.78668390433096, |
| "grad_norm": 2.1025478839874268, |
| "learning_rate": 0.001, |
| "loss": 1.0329, |
| "step": 277800 |
| }, |
| { |
| "epoch": 89.81900452488688, |
| "grad_norm": 1.9872552156448364, |
| "learning_rate": 0.001, |
| "loss": 1.042, |
| "step": 277900 |
| }, |
| { |
| "epoch": 89.85132514544279, |
| "grad_norm": 2.346891164779663, |
| "learning_rate": 0.001, |
| "loss": 1.0439, |
| "step": 278000 |
| }, |
| { |
| "epoch": 89.88364576599871, |
| "grad_norm": 2.0617330074310303, |
| "learning_rate": 0.001, |
| "loss": 1.0526, |
| "step": 278100 |
| }, |
| { |
| "epoch": 89.91596638655462, |
| "grad_norm": 2.033355474472046, |
| "learning_rate": 0.001, |
| "loss": 1.053, |
| "step": 278200 |
| }, |
| { |
| "epoch": 89.94828700711054, |
| "grad_norm": 1.9766206741333008, |
| "learning_rate": 0.001, |
| "loss": 1.0571, |
| "step": 278300 |
| }, |
| { |
| "epoch": 89.98060762766644, |
| "grad_norm": 1.6863994598388672, |
| "learning_rate": 0.001, |
| "loss": 1.0536, |
| "step": 278400 |
| }, |
| { |
| "epoch": 90.01292824822237, |
| "grad_norm": 2.222926616668701, |
| "learning_rate": 0.001, |
| "loss": 0.9922, |
| "step": 278500 |
| }, |
| { |
| "epoch": 90.04524886877829, |
| "grad_norm": 2.0000972747802734, |
| "learning_rate": 0.001, |
| "loss": 0.9525, |
| "step": 278600 |
| }, |
| { |
| "epoch": 90.0775694893342, |
| "grad_norm": 2.640467643737793, |
| "learning_rate": 0.001, |
| "loss": 0.9637, |
| "step": 278700 |
| }, |
| { |
| "epoch": 90.10989010989012, |
| "grad_norm": 2.6437063217163086, |
| "learning_rate": 0.001, |
| "loss": 0.9518, |
| "step": 278800 |
| }, |
| { |
| "epoch": 90.14221073044602, |
| "grad_norm": 2.0729470252990723, |
| "learning_rate": 0.001, |
| "loss": 0.9644, |
| "step": 278900 |
| }, |
| { |
| "epoch": 90.17453135100195, |
| "grad_norm": 1.665229320526123, |
| "learning_rate": 0.001, |
| "loss": 0.9703, |
| "step": 279000 |
| }, |
| { |
| "epoch": 90.20685197155785, |
| "grad_norm": 2.4748964309692383, |
| "learning_rate": 0.001, |
| "loss": 0.9828, |
| "step": 279100 |
| }, |
| { |
| "epoch": 90.23917259211377, |
| "grad_norm": 2.3275153636932373, |
| "learning_rate": 0.001, |
| "loss": 0.9758, |
| "step": 279200 |
| }, |
| { |
| "epoch": 90.27149321266968, |
| "grad_norm": 2.267796516418457, |
| "learning_rate": 0.001, |
| "loss": 0.9762, |
| "step": 279300 |
| }, |
| { |
| "epoch": 90.3038138332256, |
| "grad_norm": 3.0510411262512207, |
| "learning_rate": 0.001, |
| "loss": 0.9634, |
| "step": 279400 |
| }, |
| { |
| "epoch": 90.33613445378151, |
| "grad_norm": 1.9009895324707031, |
| "learning_rate": 0.001, |
| "loss": 0.9854, |
| "step": 279500 |
| }, |
| { |
| "epoch": 90.36845507433743, |
| "grad_norm": 2.0102601051330566, |
| "learning_rate": 0.001, |
| "loss": 0.9867, |
| "step": 279600 |
| }, |
| { |
| "epoch": 90.40077569489334, |
| "grad_norm": 1.7540924549102783, |
| "learning_rate": 0.001, |
| "loss": 1.0011, |
| "step": 279700 |
| }, |
| { |
| "epoch": 90.43309631544926, |
| "grad_norm": 2.0557961463928223, |
| "learning_rate": 0.001, |
| "loss": 0.9923, |
| "step": 279800 |
| }, |
| { |
| "epoch": 90.46541693600517, |
| "grad_norm": 2.086256980895996, |
| "learning_rate": 0.001, |
| "loss": 0.9925, |
| "step": 279900 |
| }, |
| { |
| "epoch": 90.49773755656109, |
| "grad_norm": 2.0977132320404053, |
| "learning_rate": 0.001, |
| "loss": 1.0084, |
| "step": 280000 |
| }, |
| { |
| "epoch": 90.530058177117, |
| "grad_norm": 2.174241304397583, |
| "learning_rate": 0.001, |
| "loss": 1.0013, |
| "step": 280100 |
| }, |
| { |
| "epoch": 90.56237879767292, |
| "grad_norm": 1.7331504821777344, |
| "learning_rate": 0.001, |
| "loss": 1.0204, |
| "step": 280200 |
| }, |
| { |
| "epoch": 90.59469941822883, |
| "grad_norm": 1.912105679512024, |
| "learning_rate": 0.001, |
| "loss": 1.0077, |
| "step": 280300 |
| }, |
| { |
| "epoch": 90.62702003878475, |
| "grad_norm": 1.9463039636611938, |
| "learning_rate": 0.001, |
| "loss": 1.014, |
| "step": 280400 |
| }, |
| { |
| "epoch": 90.65934065934066, |
| "grad_norm": 1.9718258380889893, |
| "learning_rate": 0.001, |
| "loss": 1.0205, |
| "step": 280500 |
| }, |
| { |
| "epoch": 90.69166127989658, |
| "grad_norm": 2.5784502029418945, |
| "learning_rate": 0.001, |
| "loss": 1.0149, |
| "step": 280600 |
| }, |
| { |
| "epoch": 90.72398190045249, |
| "grad_norm": 1.9825204610824585, |
| "learning_rate": 0.001, |
| "loss": 1.025, |
| "step": 280700 |
| }, |
| { |
| "epoch": 90.75630252100841, |
| "grad_norm": 1.9849514961242676, |
| "learning_rate": 0.001, |
| "loss": 1.0323, |
| "step": 280800 |
| }, |
| { |
| "epoch": 90.78862314156432, |
| "grad_norm": 2.067756414413452, |
| "learning_rate": 0.001, |
| "loss": 1.0378, |
| "step": 280900 |
| }, |
| { |
| "epoch": 90.82094376212024, |
| "grad_norm": 3.0538594722747803, |
| "learning_rate": 0.001, |
| "loss": 1.0244, |
| "step": 281000 |
| }, |
| { |
| "epoch": 90.85326438267614, |
| "grad_norm": 2.0136399269104004, |
| "learning_rate": 0.001, |
| "loss": 1.0411, |
| "step": 281100 |
| }, |
| { |
| "epoch": 90.88558500323207, |
| "grad_norm": 1.8992334604263306, |
| "learning_rate": 0.001, |
| "loss": 1.0451, |
| "step": 281200 |
| }, |
| { |
| "epoch": 90.91790562378797, |
| "grad_norm": 2.309920310974121, |
| "learning_rate": 0.001, |
| "loss": 1.0304, |
| "step": 281300 |
| }, |
| { |
| "epoch": 90.9502262443439, |
| "grad_norm": 2.1204164028167725, |
| "learning_rate": 0.001, |
| "loss": 1.0425, |
| "step": 281400 |
| }, |
| { |
| "epoch": 90.9825468648998, |
| "grad_norm": 2.363699436187744, |
| "learning_rate": 0.001, |
| "loss": 1.0557, |
| "step": 281500 |
| }, |
| { |
| "epoch": 91.01486748545572, |
| "grad_norm": 2.024256706237793, |
| "learning_rate": 0.001, |
| "loss": 0.98, |
| "step": 281600 |
| }, |
| { |
| "epoch": 91.04718810601163, |
| "grad_norm": 1.8037205934524536, |
| "learning_rate": 0.001, |
| "loss": 0.9362, |
| "step": 281700 |
| }, |
| { |
| "epoch": 91.07950872656755, |
| "grad_norm": 2.3736801147460938, |
| "learning_rate": 0.001, |
| "loss": 0.944, |
| "step": 281800 |
| }, |
| { |
| "epoch": 91.11182934712346, |
| "grad_norm": 2.0845656394958496, |
| "learning_rate": 0.001, |
| "loss": 0.9526, |
| "step": 281900 |
| }, |
| { |
| "epoch": 91.14414996767938, |
| "grad_norm": 1.9515149593353271, |
| "learning_rate": 0.001, |
| "loss": 0.9533, |
| "step": 282000 |
| }, |
| { |
| "epoch": 91.17647058823529, |
| "grad_norm": 2.203016757965088, |
| "learning_rate": 0.001, |
| "loss": 0.9585, |
| "step": 282100 |
| }, |
| { |
| "epoch": 91.20879120879121, |
| "grad_norm": 2.299415349960327, |
| "learning_rate": 0.001, |
| "loss": 0.952, |
| "step": 282200 |
| }, |
| { |
| "epoch": 91.24111182934712, |
| "grad_norm": 1.9382708072662354, |
| "learning_rate": 0.001, |
| "loss": 0.9691, |
| "step": 282300 |
| }, |
| { |
| "epoch": 91.27343244990304, |
| "grad_norm": 2.493281602859497, |
| "learning_rate": 0.001, |
| "loss": 0.9665, |
| "step": 282400 |
| }, |
| { |
| "epoch": 91.30575307045895, |
| "grad_norm": 2.7134528160095215, |
| "learning_rate": 0.001, |
| "loss": 0.971, |
| "step": 282500 |
| }, |
| { |
| "epoch": 91.33807369101487, |
| "grad_norm": 2.434889078140259, |
| "learning_rate": 0.001, |
| "loss": 0.973, |
| "step": 282600 |
| }, |
| { |
| "epoch": 91.37039431157078, |
| "grad_norm": 1.6504502296447754, |
| "learning_rate": 0.001, |
| "loss": 0.9882, |
| "step": 282700 |
| }, |
| { |
| "epoch": 91.4027149321267, |
| "grad_norm": 2.516812324523926, |
| "learning_rate": 0.001, |
| "loss": 0.9917, |
| "step": 282800 |
| }, |
| { |
| "epoch": 91.4350355526826, |
| "grad_norm": 2.250521421432495, |
| "learning_rate": 0.001, |
| "loss": 0.9858, |
| "step": 282900 |
| }, |
| { |
| "epoch": 91.46735617323853, |
| "grad_norm": 2.721055746078491, |
| "learning_rate": 0.001, |
| "loss": 0.9903, |
| "step": 283000 |
| }, |
| { |
| "epoch": 91.49967679379444, |
| "grad_norm": 2.0062334537506104, |
| "learning_rate": 0.001, |
| "loss": 1.0004, |
| "step": 283100 |
| }, |
| { |
| "epoch": 91.53199741435036, |
| "grad_norm": 2.329655408859253, |
| "learning_rate": 0.001, |
| "loss": 0.9853, |
| "step": 283200 |
| }, |
| { |
| "epoch": 91.56431803490626, |
| "grad_norm": 1.8753358125686646, |
| "learning_rate": 0.001, |
| "loss": 1.0124, |
| "step": 283300 |
| }, |
| { |
| "epoch": 91.59663865546219, |
| "grad_norm": 2.209588050842285, |
| "learning_rate": 0.001, |
| "loss": 1.0197, |
| "step": 283400 |
| }, |
| { |
| "epoch": 91.6289592760181, |
| "grad_norm": 2.219310760498047, |
| "learning_rate": 0.001, |
| "loss": 1.0008, |
| "step": 283500 |
| }, |
| { |
| "epoch": 91.66127989657402, |
| "grad_norm": 2.1283211708068848, |
| "learning_rate": 0.001, |
| "loss": 1.0384, |
| "step": 283600 |
| }, |
| { |
| "epoch": 91.69360051712992, |
| "grad_norm": 1.7601873874664307, |
| "learning_rate": 0.001, |
| "loss": 1.0298, |
| "step": 283700 |
| }, |
| { |
| "epoch": 91.72592113768584, |
| "grad_norm": 1.8941537141799927, |
| "learning_rate": 0.001, |
| "loss": 1.0165, |
| "step": 283800 |
| }, |
| { |
| "epoch": 91.75824175824175, |
| "grad_norm": 2.771538734436035, |
| "learning_rate": 0.001, |
| "loss": 1.0324, |
| "step": 283900 |
| }, |
| { |
| "epoch": 91.79056237879767, |
| "grad_norm": 2.580246686935425, |
| "learning_rate": 0.001, |
| "loss": 1.0321, |
| "step": 284000 |
| }, |
| { |
| "epoch": 91.82288299935358, |
| "grad_norm": 2.1796884536743164, |
| "learning_rate": 0.001, |
| "loss": 1.0281, |
| "step": 284100 |
| }, |
| { |
| "epoch": 91.8552036199095, |
| "grad_norm": 2.224008798599243, |
| "learning_rate": 0.001, |
| "loss": 1.0264, |
| "step": 284200 |
| }, |
| { |
| "epoch": 91.88752424046541, |
| "grad_norm": 2.6273763179779053, |
| "learning_rate": 0.001, |
| "loss": 1.0378, |
| "step": 284300 |
| }, |
| { |
| "epoch": 91.91984486102133, |
| "grad_norm": 2.327000856399536, |
| "learning_rate": 0.001, |
| "loss": 1.0412, |
| "step": 284400 |
| }, |
| { |
| "epoch": 91.95216548157724, |
| "grad_norm": 2.1067605018615723, |
| "learning_rate": 0.001, |
| "loss": 1.0331, |
| "step": 284500 |
| }, |
| { |
| "epoch": 91.98448610213316, |
| "grad_norm": 2.0482475757598877, |
| "learning_rate": 0.001, |
| "loss": 1.0539, |
| "step": 284600 |
| }, |
| { |
| "epoch": 92.01680672268908, |
| "grad_norm": 2.45538067817688, |
| "learning_rate": 0.001, |
| "loss": 0.9696, |
| "step": 284700 |
| }, |
| { |
| "epoch": 92.04912734324499, |
| "grad_norm": 2.8478100299835205, |
| "learning_rate": 0.001, |
| "loss": 0.9344, |
| "step": 284800 |
| }, |
| { |
| "epoch": 92.08144796380091, |
| "grad_norm": 2.6036441326141357, |
| "learning_rate": 0.001, |
| "loss": 0.9354, |
| "step": 284900 |
| }, |
| { |
| "epoch": 92.11376858435682, |
| "grad_norm": 3.401431083679199, |
| "learning_rate": 0.001, |
| "loss": 0.9437, |
| "step": 285000 |
| }, |
| { |
| "epoch": 92.14608920491274, |
| "grad_norm": 2.8432137966156006, |
| "learning_rate": 0.001, |
| "loss": 0.9512, |
| "step": 285100 |
| }, |
| { |
| "epoch": 92.17840982546865, |
| "grad_norm": 2.483217477798462, |
| "learning_rate": 0.001, |
| "loss": 0.9671, |
| "step": 285200 |
| }, |
| { |
| "epoch": 92.21073044602457, |
| "grad_norm": 2.2827484607696533, |
| "learning_rate": 0.001, |
| "loss": 0.9642, |
| "step": 285300 |
| }, |
| { |
| "epoch": 92.24305106658048, |
| "grad_norm": 1.9338748455047607, |
| "learning_rate": 0.001, |
| "loss": 0.9574, |
| "step": 285400 |
| }, |
| { |
| "epoch": 92.2753716871364, |
| "grad_norm": 3.546093225479126, |
| "learning_rate": 0.001, |
| "loss": 0.9691, |
| "step": 285500 |
| }, |
| { |
| "epoch": 92.3076923076923, |
| "grad_norm": 2.77447247505188, |
| "learning_rate": 0.001, |
| "loss": 0.967, |
| "step": 285600 |
| }, |
| { |
| "epoch": 92.34001292824823, |
| "grad_norm": 2.7795073986053467, |
| "learning_rate": 0.001, |
| "loss": 0.9534, |
| "step": 285700 |
| }, |
| { |
| "epoch": 92.37233354880414, |
| "grad_norm": 3.57477068901062, |
| "learning_rate": 0.001, |
| "loss": 0.9676, |
| "step": 285800 |
| }, |
| { |
| "epoch": 92.40465416936006, |
| "grad_norm": 2.2975080013275146, |
| "learning_rate": 0.001, |
| "loss": 0.979, |
| "step": 285900 |
| }, |
| { |
| "epoch": 92.43697478991596, |
| "grad_norm": 2.965996026992798, |
| "learning_rate": 0.001, |
| "loss": 0.9877, |
| "step": 286000 |
| }, |
| { |
| "epoch": 92.46929541047189, |
| "grad_norm": 2.1376302242279053, |
| "learning_rate": 0.001, |
| "loss": 1.0011, |
| "step": 286100 |
| }, |
| { |
| "epoch": 92.5016160310278, |
| "grad_norm": 2.3904857635498047, |
| "learning_rate": 0.001, |
| "loss": 0.9948, |
| "step": 286200 |
| }, |
| { |
| "epoch": 92.53393665158372, |
| "grad_norm": 2.1248250007629395, |
| "learning_rate": 0.001, |
| "loss": 0.9912, |
| "step": 286300 |
| }, |
| { |
| "epoch": 92.56625727213962, |
| "grad_norm": 3.2705423831939697, |
| "learning_rate": 0.001, |
| "loss": 0.9846, |
| "step": 286400 |
| }, |
| { |
| "epoch": 92.59857789269554, |
| "grad_norm": 2.439633369445801, |
| "learning_rate": 0.001, |
| "loss": 0.9929, |
| "step": 286500 |
| }, |
| { |
| "epoch": 92.63089851325145, |
| "grad_norm": 2.881314277648926, |
| "learning_rate": 0.001, |
| "loss": 1.0093, |
| "step": 286600 |
| }, |
| { |
| "epoch": 92.66321913380737, |
| "grad_norm": 3.5969815254211426, |
| "learning_rate": 0.001, |
| "loss": 0.9981, |
| "step": 286700 |
| }, |
| { |
| "epoch": 92.69553975436328, |
| "grad_norm": 1.9576853513717651, |
| "learning_rate": 0.001, |
| "loss": 1.0196, |
| "step": 286800 |
| }, |
| { |
| "epoch": 92.7278603749192, |
| "grad_norm": 3.114851713180542, |
| "learning_rate": 0.001, |
| "loss": 1.0087, |
| "step": 286900 |
| }, |
| { |
| "epoch": 92.76018099547511, |
| "grad_norm": 2.5850727558135986, |
| "learning_rate": 0.001, |
| "loss": 1.012, |
| "step": 287000 |
| }, |
| { |
| "epoch": 92.79250161603103, |
| "grad_norm": 2.376007080078125, |
| "learning_rate": 0.001, |
| "loss": 1.0238, |
| "step": 287100 |
| }, |
| { |
| "epoch": 92.82482223658694, |
| "grad_norm": 2.491420269012451, |
| "learning_rate": 0.001, |
| "loss": 1.023, |
| "step": 287200 |
| }, |
| { |
| "epoch": 92.85714285714286, |
| "grad_norm": 2.0345892906188965, |
| "learning_rate": 0.001, |
| "loss": 1.0287, |
| "step": 287300 |
| }, |
| { |
| "epoch": 92.88946347769877, |
| "grad_norm": 4.1525702476501465, |
| "learning_rate": 0.001, |
| "loss": 1.037, |
| "step": 287400 |
| }, |
| { |
| "epoch": 92.92178409825469, |
| "grad_norm": 3.620197057723999, |
| "learning_rate": 0.001, |
| "loss": 1.0469, |
| "step": 287500 |
| }, |
| { |
| "epoch": 92.9541047188106, |
| "grad_norm": 3.1248631477355957, |
| "learning_rate": 0.001, |
| "loss": 1.0341, |
| "step": 287600 |
| }, |
| { |
| "epoch": 92.98642533936652, |
| "grad_norm": 2.6980485916137695, |
| "learning_rate": 0.001, |
| "loss": 1.0328, |
| "step": 287700 |
| }, |
| { |
| "epoch": 93.01874595992243, |
| "grad_norm": 2.4870355129241943, |
| "learning_rate": 0.001, |
| "loss": 0.9789, |
| "step": 287800 |
| }, |
| { |
| "epoch": 93.05106658047835, |
| "grad_norm": 2.198549270629883, |
| "learning_rate": 0.001, |
| "loss": 0.9173, |
| "step": 287900 |
| }, |
| { |
| "epoch": 93.08338720103426, |
| "grad_norm": 1.8441483974456787, |
| "learning_rate": 0.001, |
| "loss": 0.934, |
| "step": 288000 |
| }, |
| { |
| "epoch": 93.11570782159018, |
| "grad_norm": 2.0669450759887695, |
| "learning_rate": 0.001, |
| "loss": 0.9305, |
| "step": 288100 |
| }, |
| { |
| "epoch": 93.14802844214609, |
| "grad_norm": 2.3490703105926514, |
| "learning_rate": 0.001, |
| "loss": 0.9552, |
| "step": 288200 |
| }, |
| { |
| "epoch": 93.180349062702, |
| "grad_norm": 2.179919481277466, |
| "learning_rate": 0.001, |
| "loss": 0.9391, |
| "step": 288300 |
| }, |
| { |
| "epoch": 93.21266968325791, |
| "grad_norm": 1.8578362464904785, |
| "learning_rate": 0.001, |
| "loss": 0.942, |
| "step": 288400 |
| }, |
| { |
| "epoch": 93.24499030381384, |
| "grad_norm": 1.6484123468399048, |
| "learning_rate": 0.001, |
| "loss": 0.9545, |
| "step": 288500 |
| }, |
| { |
| "epoch": 93.27731092436974, |
| "grad_norm": 2.483506441116333, |
| "learning_rate": 0.001, |
| "loss": 0.9774, |
| "step": 288600 |
| }, |
| { |
| "epoch": 93.30963154492567, |
| "grad_norm": 1.8346855640411377, |
| "learning_rate": 0.001, |
| "loss": 0.9638, |
| "step": 288700 |
| }, |
| { |
| "epoch": 93.34195216548157, |
| "grad_norm": 1.6694176197052002, |
| "learning_rate": 0.001, |
| "loss": 0.9704, |
| "step": 288800 |
| }, |
| { |
| "epoch": 93.3742727860375, |
| "grad_norm": 2.2945752143859863, |
| "learning_rate": 0.001, |
| "loss": 0.9813, |
| "step": 288900 |
| }, |
| { |
| "epoch": 93.4065934065934, |
| "grad_norm": 2.2705020904541016, |
| "learning_rate": 0.001, |
| "loss": 0.9744, |
| "step": 289000 |
| }, |
| { |
| "epoch": 93.43891402714932, |
| "grad_norm": 2.5650382041931152, |
| "learning_rate": 0.001, |
| "loss": 0.9751, |
| "step": 289100 |
| }, |
| { |
| "epoch": 93.47123464770523, |
| "grad_norm": 1.7471798658370972, |
| "learning_rate": 0.001, |
| "loss": 0.9724, |
| "step": 289200 |
| }, |
| { |
| "epoch": 93.50355526826115, |
| "grad_norm": 2.5118908882141113, |
| "learning_rate": 0.001, |
| "loss": 0.9774, |
| "step": 289300 |
| }, |
| { |
| "epoch": 93.53587588881706, |
| "grad_norm": 1.7975685596466064, |
| "learning_rate": 0.001, |
| "loss": 0.9868, |
| "step": 289400 |
| }, |
| { |
| "epoch": 93.56819650937298, |
| "grad_norm": 2.0186667442321777, |
| "learning_rate": 0.001, |
| "loss": 0.9923, |
| "step": 289500 |
| }, |
| { |
| "epoch": 93.60051712992889, |
| "grad_norm": 3.608877420425415, |
| "learning_rate": 0.001, |
| "loss": 0.987, |
| "step": 289600 |
| }, |
| { |
| "epoch": 93.63283775048481, |
| "grad_norm": 2.746725559234619, |
| "learning_rate": 0.001, |
| "loss": 0.9888, |
| "step": 289700 |
| }, |
| { |
| "epoch": 93.66515837104072, |
| "grad_norm": 2.6709630489349365, |
| "learning_rate": 0.001, |
| "loss": 0.9949, |
| "step": 289800 |
| }, |
| { |
| "epoch": 93.69747899159664, |
| "grad_norm": 2.15086030960083, |
| "learning_rate": 0.001, |
| "loss": 0.9904, |
| "step": 289900 |
| }, |
| { |
| "epoch": 93.72979961215255, |
| "grad_norm": 2.7408602237701416, |
| "learning_rate": 0.001, |
| "loss": 1.0152, |
| "step": 290000 |
| }, |
| { |
| "epoch": 93.76212023270847, |
| "grad_norm": 2.362180233001709, |
| "learning_rate": 0.001, |
| "loss": 1.0091, |
| "step": 290100 |
| }, |
| { |
| "epoch": 93.79444085326438, |
| "grad_norm": 1.734055519104004, |
| "learning_rate": 0.001, |
| "loss": 1.0105, |
| "step": 290200 |
| }, |
| { |
| "epoch": 93.8267614738203, |
| "grad_norm": 1.879744052886963, |
| "learning_rate": 0.001, |
| "loss": 1.0108, |
| "step": 290300 |
| }, |
| { |
| "epoch": 93.8590820943762, |
| "grad_norm": 2.03096342086792, |
| "learning_rate": 0.001, |
| "loss": 1.0089, |
| "step": 290400 |
| }, |
| { |
| "epoch": 93.89140271493213, |
| "grad_norm": 1.8851549625396729, |
| "learning_rate": 0.001, |
| "loss": 1.0173, |
| "step": 290500 |
| }, |
| { |
| "epoch": 93.92372333548803, |
| "grad_norm": 2.0545690059661865, |
| "learning_rate": 0.001, |
| "loss": 1.0307, |
| "step": 290600 |
| }, |
| { |
| "epoch": 93.95604395604396, |
| "grad_norm": 2.3553688526153564, |
| "learning_rate": 0.001, |
| "loss": 1.023, |
| "step": 290700 |
| }, |
| { |
| "epoch": 93.98836457659988, |
| "grad_norm": 2.5887322425842285, |
| "learning_rate": 0.001, |
| "loss": 1.0195, |
| "step": 290800 |
| }, |
| { |
| "epoch": 94.02068519715579, |
| "grad_norm": 1.7967345714569092, |
| "learning_rate": 0.001, |
| "loss": 0.9646, |
| "step": 290900 |
| }, |
| { |
| "epoch": 94.0530058177117, |
| "grad_norm": 2.0765960216522217, |
| "learning_rate": 0.001, |
| "loss": 0.9075, |
| "step": 291000 |
| }, |
| { |
| "epoch": 94.08532643826761, |
| "grad_norm": 1.8205326795578003, |
| "learning_rate": 0.001, |
| "loss": 0.9234, |
| "step": 291100 |
| }, |
| { |
| "epoch": 94.11764705882354, |
| "grad_norm": 2.5845706462860107, |
| "learning_rate": 0.001, |
| "loss": 0.9408, |
| "step": 291200 |
| }, |
| { |
| "epoch": 94.14996767937944, |
| "grad_norm": 2.3111801147460938, |
| "learning_rate": 0.001, |
| "loss": 0.9202, |
| "step": 291300 |
| }, |
| { |
| "epoch": 94.18228829993537, |
| "grad_norm": 2.1115758419036865, |
| "learning_rate": 0.001, |
| "loss": 0.941, |
| "step": 291400 |
| }, |
| { |
| "epoch": 94.21460892049127, |
| "grad_norm": 1.778016448020935, |
| "learning_rate": 0.001, |
| "loss": 0.9442, |
| "step": 291500 |
| }, |
| { |
| "epoch": 94.2469295410472, |
| "grad_norm": 2.3348562717437744, |
| "learning_rate": 0.001, |
| "loss": 0.9538, |
| "step": 291600 |
| }, |
| { |
| "epoch": 94.2792501616031, |
| "grad_norm": 1.9034233093261719, |
| "learning_rate": 0.001, |
| "loss": 0.9438, |
| "step": 291700 |
| }, |
| { |
| "epoch": 94.31157078215902, |
| "grad_norm": 1.7321242094039917, |
| "learning_rate": 0.001, |
| "loss": 0.9677, |
| "step": 291800 |
| }, |
| { |
| "epoch": 94.34389140271493, |
| "grad_norm": 1.788539171218872, |
| "learning_rate": 0.001, |
| "loss": 0.9492, |
| "step": 291900 |
| }, |
| { |
| "epoch": 94.37621202327085, |
| "grad_norm": 1.9123979806900024, |
| "learning_rate": 0.001, |
| "loss": 0.9652, |
| "step": 292000 |
| }, |
| { |
| "epoch": 94.40853264382676, |
| "grad_norm": 2.391134262084961, |
| "learning_rate": 0.001, |
| "loss": 0.9594, |
| "step": 292100 |
| }, |
| { |
| "epoch": 94.44085326438268, |
| "grad_norm": 1.6947717666625977, |
| "learning_rate": 0.001, |
| "loss": 0.9634, |
| "step": 292200 |
| }, |
| { |
| "epoch": 94.47317388493859, |
| "grad_norm": 3.093050241470337, |
| "learning_rate": 0.001, |
| "loss": 0.9646, |
| "step": 292300 |
| }, |
| { |
| "epoch": 94.50549450549451, |
| "grad_norm": 2.0479140281677246, |
| "learning_rate": 0.001, |
| "loss": 0.9826, |
| "step": 292400 |
| }, |
| { |
| "epoch": 94.53781512605042, |
| "grad_norm": 2.06463623046875, |
| "learning_rate": 0.001, |
| "loss": 0.9834, |
| "step": 292500 |
| }, |
| { |
| "epoch": 94.57013574660634, |
| "grad_norm": 1.81849205493927, |
| "learning_rate": 0.001, |
| "loss": 0.976, |
| "step": 292600 |
| }, |
| { |
| "epoch": 94.60245636716225, |
| "grad_norm": 1.5885215997695923, |
| "learning_rate": 0.001, |
| "loss": 0.9767, |
| "step": 292700 |
| }, |
| { |
| "epoch": 94.63477698771817, |
| "grad_norm": 3.0277457237243652, |
| "learning_rate": 0.001, |
| "loss": 0.985, |
| "step": 292800 |
| }, |
| { |
| "epoch": 94.66709760827408, |
| "grad_norm": 1.899543046951294, |
| "learning_rate": 0.001, |
| "loss": 0.9824, |
| "step": 292900 |
| }, |
| { |
| "epoch": 94.69941822883, |
| "grad_norm": 2.1208620071411133, |
| "learning_rate": 0.001, |
| "loss": 1.0109, |
| "step": 293000 |
| }, |
| { |
| "epoch": 94.7317388493859, |
| "grad_norm": 2.451232671737671, |
| "learning_rate": 0.001, |
| "loss": 0.9952, |
| "step": 293100 |
| }, |
| { |
| "epoch": 94.76405946994183, |
| "grad_norm": 2.232257604598999, |
| "learning_rate": 0.001, |
| "loss": 1.0, |
| "step": 293200 |
| }, |
| { |
| "epoch": 94.79638009049773, |
| "grad_norm": 1.7241202592849731, |
| "learning_rate": 0.001, |
| "loss": 0.9966, |
| "step": 293300 |
| }, |
| { |
| "epoch": 94.82870071105366, |
| "grad_norm": 2.0741376876831055, |
| "learning_rate": 0.001, |
| "loss": 1.0213, |
| "step": 293400 |
| }, |
| { |
| "epoch": 94.86102133160956, |
| "grad_norm": 1.8772655725479126, |
| "learning_rate": 0.001, |
| "loss": 1.0044, |
| "step": 293500 |
| }, |
| { |
| "epoch": 94.89334195216549, |
| "grad_norm": 2.1163671016693115, |
| "learning_rate": 0.001, |
| "loss": 1.0106, |
| "step": 293600 |
| }, |
| { |
| "epoch": 94.9256625727214, |
| "grad_norm": 2.3701653480529785, |
| "learning_rate": 0.001, |
| "loss": 1.0054, |
| "step": 293700 |
| }, |
| { |
| "epoch": 94.95798319327731, |
| "grad_norm": 1.8388867378234863, |
| "learning_rate": 0.001, |
| "loss": 1.0137, |
| "step": 293800 |
| }, |
| { |
| "epoch": 94.99030381383322, |
| "grad_norm": 1.9023023843765259, |
| "learning_rate": 0.001, |
| "loss": 1.0238, |
| "step": 293900 |
| }, |
| { |
| "epoch": 95.02262443438914, |
| "grad_norm": 2.3602747917175293, |
| "learning_rate": 0.001, |
| "loss": 0.9515, |
| "step": 294000 |
| }, |
| { |
| "epoch": 95.05494505494505, |
| "grad_norm": 1.9043653011322021, |
| "learning_rate": 0.001, |
| "loss": 0.9271, |
| "step": 294100 |
| }, |
| { |
| "epoch": 95.08726567550097, |
| "grad_norm": 1.6567951440811157, |
| "learning_rate": 0.001, |
| "loss": 0.9284, |
| "step": 294200 |
| }, |
| { |
| "epoch": 95.11958629605688, |
| "grad_norm": 1.8202804327011108, |
| "learning_rate": 0.001, |
| "loss": 0.9266, |
| "step": 294300 |
| }, |
| { |
| "epoch": 95.1519069166128, |
| "grad_norm": 1.613312005996704, |
| "learning_rate": 0.001, |
| "loss": 0.9181, |
| "step": 294400 |
| }, |
| { |
| "epoch": 95.18422753716871, |
| "grad_norm": 2.1593878269195557, |
| "learning_rate": 0.001, |
| "loss": 0.932, |
| "step": 294500 |
| }, |
| { |
| "epoch": 95.21654815772463, |
| "grad_norm": 1.7870193719863892, |
| "learning_rate": 0.001, |
| "loss": 0.9439, |
| "step": 294600 |
| }, |
| { |
| "epoch": 95.24886877828054, |
| "grad_norm": 1.8779339790344238, |
| "learning_rate": 0.001, |
| "loss": 0.9384, |
| "step": 294700 |
| }, |
| { |
| "epoch": 95.28118939883646, |
| "grad_norm": 1.589490532875061, |
| "learning_rate": 0.001, |
| "loss": 0.9503, |
| "step": 294800 |
| }, |
| { |
| "epoch": 95.31351001939237, |
| "grad_norm": 2.23694109916687, |
| "learning_rate": 0.001, |
| "loss": 0.9386, |
| "step": 294900 |
| }, |
| { |
| "epoch": 95.34583063994829, |
| "grad_norm": 1.9749599695205688, |
| "learning_rate": 0.001, |
| "loss": 0.9478, |
| "step": 295000 |
| }, |
| { |
| "epoch": 95.3781512605042, |
| "grad_norm": 1.9359219074249268, |
| "learning_rate": 0.001, |
| "loss": 0.9549, |
| "step": 295100 |
| }, |
| { |
| "epoch": 95.41047188106012, |
| "grad_norm": 1.7539458274841309, |
| "learning_rate": 0.001, |
| "loss": 0.9532, |
| "step": 295200 |
| }, |
| { |
| "epoch": 95.44279250161603, |
| "grad_norm": 2.1427483558654785, |
| "learning_rate": 0.001, |
| "loss": 0.954, |
| "step": 295300 |
| }, |
| { |
| "epoch": 95.47511312217195, |
| "grad_norm": 1.6027040481567383, |
| "learning_rate": 0.001, |
| "loss": 0.9658, |
| "step": 295400 |
| }, |
| { |
| "epoch": 95.50743374272786, |
| "grad_norm": 1.9406092166900635, |
| "learning_rate": 0.001, |
| "loss": 0.9661, |
| "step": 295500 |
| }, |
| { |
| "epoch": 95.53975436328378, |
| "grad_norm": 1.6757909059524536, |
| "learning_rate": 0.001, |
| "loss": 0.9748, |
| "step": 295600 |
| }, |
| { |
| "epoch": 95.57207498383968, |
| "grad_norm": 1.8995640277862549, |
| "learning_rate": 0.001, |
| "loss": 0.9726, |
| "step": 295700 |
| }, |
| { |
| "epoch": 95.6043956043956, |
| "grad_norm": 1.8258506059646606, |
| "learning_rate": 0.001, |
| "loss": 0.9771, |
| "step": 295800 |
| }, |
| { |
| "epoch": 95.63671622495151, |
| "grad_norm": 2.0489180088043213, |
| "learning_rate": 0.001, |
| "loss": 0.9804, |
| "step": 295900 |
| }, |
| { |
| "epoch": 95.66903684550743, |
| "grad_norm": 1.8056089878082275, |
| "learning_rate": 0.001, |
| "loss": 0.9902, |
| "step": 296000 |
| }, |
| { |
| "epoch": 95.70135746606334, |
| "grad_norm": 1.7540534734725952, |
| "learning_rate": 0.001, |
| "loss": 0.9825, |
| "step": 296100 |
| }, |
| { |
| "epoch": 95.73367808661926, |
| "grad_norm": 1.9533356428146362, |
| "learning_rate": 0.001, |
| "loss": 0.9919, |
| "step": 296200 |
| }, |
| { |
| "epoch": 95.76599870717517, |
| "grad_norm": 2.378761053085327, |
| "learning_rate": 0.001, |
| "loss": 0.99, |
| "step": 296300 |
| }, |
| { |
| "epoch": 95.7983193277311, |
| "grad_norm": 1.724198818206787, |
| "learning_rate": 0.001, |
| "loss": 0.9953, |
| "step": 296400 |
| }, |
| { |
| "epoch": 95.830639948287, |
| "grad_norm": 2.2945659160614014, |
| "learning_rate": 0.001, |
| "loss": 0.9999, |
| "step": 296500 |
| }, |
| { |
| "epoch": 95.86296056884292, |
| "grad_norm": 1.710324764251709, |
| "learning_rate": 0.001, |
| "loss": 1.0072, |
| "step": 296600 |
| }, |
| { |
| "epoch": 95.89528118939883, |
| "grad_norm": 1.5836914777755737, |
| "learning_rate": 0.001, |
| "loss": 0.9861, |
| "step": 296700 |
| }, |
| { |
| "epoch": 95.92760180995475, |
| "grad_norm": 1.6677262783050537, |
| "learning_rate": 0.001, |
| "loss": 0.9965, |
| "step": 296800 |
| }, |
| { |
| "epoch": 95.95992243051066, |
| "grad_norm": 1.6849901676177979, |
| "learning_rate": 0.001, |
| "loss": 1.0093, |
| "step": 296900 |
| }, |
| { |
| "epoch": 95.99224305106658, |
| "grad_norm": 1.8210523128509521, |
| "learning_rate": 0.001, |
| "loss": 1.0217, |
| "step": 297000 |
| }, |
| { |
| "epoch": 96.0245636716225, |
| "grad_norm": 1.721255898475647, |
| "learning_rate": 0.001, |
| "loss": 0.9372, |
| "step": 297100 |
| }, |
| { |
| "epoch": 96.05688429217841, |
| "grad_norm": 1.9859453439712524, |
| "learning_rate": 0.001, |
| "loss": 0.9033, |
| "step": 297200 |
| }, |
| { |
| "epoch": 96.08920491273433, |
| "grad_norm": 1.655301809310913, |
| "learning_rate": 0.001, |
| "loss": 0.9167, |
| "step": 297300 |
| }, |
| { |
| "epoch": 96.12152553329024, |
| "grad_norm": 2.0336883068084717, |
| "learning_rate": 0.001, |
| "loss": 0.9247, |
| "step": 297400 |
| }, |
| { |
| "epoch": 96.15384615384616, |
| "grad_norm": 2.0027976036071777, |
| "learning_rate": 0.001, |
| "loss": 0.9255, |
| "step": 297500 |
| }, |
| { |
| "epoch": 96.18616677440207, |
| "grad_norm": 1.4069693088531494, |
| "learning_rate": 0.001, |
| "loss": 0.9324, |
| "step": 297600 |
| }, |
| { |
| "epoch": 96.21848739495799, |
| "grad_norm": 1.8238356113433838, |
| "learning_rate": 0.001, |
| "loss": 0.9208, |
| "step": 297700 |
| }, |
| { |
| "epoch": 96.2508080155139, |
| "grad_norm": 1.9688968658447266, |
| "learning_rate": 0.001, |
| "loss": 0.9407, |
| "step": 297800 |
| }, |
| { |
| "epoch": 96.28312863606982, |
| "grad_norm": 1.9155817031860352, |
| "learning_rate": 0.001, |
| "loss": 0.939, |
| "step": 297900 |
| }, |
| { |
| "epoch": 96.31544925662573, |
| "grad_norm": 1.7525216341018677, |
| "learning_rate": 0.001, |
| "loss": 0.9424, |
| "step": 298000 |
| }, |
| { |
| "epoch": 96.34776987718165, |
| "grad_norm": 2.573129415512085, |
| "learning_rate": 0.001, |
| "loss": 0.9396, |
| "step": 298100 |
| }, |
| { |
| "epoch": 96.38009049773756, |
| "grad_norm": 2.0800180435180664, |
| "learning_rate": 0.001, |
| "loss": 0.9402, |
| "step": 298200 |
| }, |
| { |
| "epoch": 96.41241111829348, |
| "grad_norm": 1.723068118095398, |
| "learning_rate": 0.001, |
| "loss": 0.9481, |
| "step": 298300 |
| }, |
| { |
| "epoch": 96.44473173884938, |
| "grad_norm": 1.7221217155456543, |
| "learning_rate": 0.001, |
| "loss": 0.9542, |
| "step": 298400 |
| }, |
| { |
| "epoch": 96.4770523594053, |
| "grad_norm": 1.8597713708877563, |
| "learning_rate": 0.001, |
| "loss": 0.9532, |
| "step": 298500 |
| }, |
| { |
| "epoch": 96.50937297996121, |
| "grad_norm": 1.7525596618652344, |
| "learning_rate": 0.001, |
| "loss": 0.9557, |
| "step": 298600 |
| }, |
| { |
| "epoch": 96.54169360051714, |
| "grad_norm": 2.1579041481018066, |
| "learning_rate": 0.001, |
| "loss": 0.9692, |
| "step": 298700 |
| }, |
| { |
| "epoch": 96.57401422107304, |
| "grad_norm": 1.7372926473617554, |
| "learning_rate": 0.001, |
| "loss": 0.9585, |
| "step": 298800 |
| }, |
| { |
| "epoch": 96.60633484162896, |
| "grad_norm": 1.6883844137191772, |
| "learning_rate": 0.001, |
| "loss": 0.9598, |
| "step": 298900 |
| }, |
| { |
| "epoch": 96.63865546218487, |
| "grad_norm": 2.018057346343994, |
| "learning_rate": 0.001, |
| "loss": 0.9729, |
| "step": 299000 |
| }, |
| { |
| "epoch": 96.6709760827408, |
| "grad_norm": 1.6414058208465576, |
| "learning_rate": 0.001, |
| "loss": 0.9783, |
| "step": 299100 |
| }, |
| { |
| "epoch": 96.7032967032967, |
| "grad_norm": 1.5033763647079468, |
| "learning_rate": 0.001, |
| "loss": 0.9791, |
| "step": 299200 |
| }, |
| { |
| "epoch": 96.73561732385262, |
| "grad_norm": 1.5835376977920532, |
| "learning_rate": 0.001, |
| "loss": 0.9804, |
| "step": 299300 |
| }, |
| { |
| "epoch": 96.76793794440853, |
| "grad_norm": 2.2092549800872803, |
| "learning_rate": 0.001, |
| "loss": 1.0009, |
| "step": 299400 |
| }, |
| { |
| "epoch": 96.80025856496445, |
| "grad_norm": 1.7708107233047485, |
| "learning_rate": 0.001, |
| "loss": 0.9765, |
| "step": 299500 |
| }, |
| { |
| "epoch": 96.83257918552036, |
| "grad_norm": 1.8122010231018066, |
| "learning_rate": 0.001, |
| "loss": 1.0012, |
| "step": 299600 |
| }, |
| { |
| "epoch": 96.86489980607628, |
| "grad_norm": 1.8053666353225708, |
| "learning_rate": 0.001, |
| "loss": 0.9871, |
| "step": 299700 |
| }, |
| { |
| "epoch": 96.89722042663219, |
| "grad_norm": 2.1326189041137695, |
| "learning_rate": 0.001, |
| "loss": 0.9737, |
| "step": 299800 |
| }, |
| { |
| "epoch": 96.92954104718811, |
| "grad_norm": 2.2978103160858154, |
| "learning_rate": 0.001, |
| "loss": 0.9991, |
| "step": 299900 |
| }, |
| { |
| "epoch": 96.96186166774402, |
| "grad_norm": 1.8257157802581787, |
| "learning_rate": 0.001, |
| "loss": 1.0042, |
| "step": 300000 |
| }, |
| { |
| "epoch": 96.99418228829994, |
| "grad_norm": 1.9147756099700928, |
| "learning_rate": 0.001, |
| "loss": 0.9982, |
| "step": 300100 |
| }, |
| { |
| "epoch": 97.02650290885585, |
| "grad_norm": 1.7436898946762085, |
| "learning_rate": 0.001, |
| "loss": 0.9217, |
| "step": 300200 |
| }, |
| { |
| "epoch": 97.05882352941177, |
| "grad_norm": 2.3132598400115967, |
| "learning_rate": 0.001, |
| "loss": 0.9009, |
| "step": 300300 |
| }, |
| { |
| "epoch": 97.09114414996768, |
| "grad_norm": 1.7439414262771606, |
| "learning_rate": 0.001, |
| "loss": 0.9058, |
| "step": 300400 |
| }, |
| { |
| "epoch": 97.1234647705236, |
| "grad_norm": 1.6833429336547852, |
| "learning_rate": 0.001, |
| "loss": 0.8999, |
| "step": 300500 |
| }, |
| { |
| "epoch": 97.1557853910795, |
| "grad_norm": 1.65819251537323, |
| "learning_rate": 0.001, |
| "loss": 0.9105, |
| "step": 300600 |
| }, |
| { |
| "epoch": 97.18810601163543, |
| "grad_norm": 1.7139781713485718, |
| "learning_rate": 0.001, |
| "loss": 0.9218, |
| "step": 300700 |
| }, |
| { |
| "epoch": 97.22042663219133, |
| "grad_norm": 2.181692361831665, |
| "learning_rate": 0.001, |
| "loss": 0.9336, |
| "step": 300800 |
| }, |
| { |
| "epoch": 97.25274725274726, |
| "grad_norm": 2.0716326236724854, |
| "learning_rate": 0.001, |
| "loss": 0.9164, |
| "step": 300900 |
| }, |
| { |
| "epoch": 97.28506787330316, |
| "grad_norm": 2.2653331756591797, |
| "learning_rate": 0.001, |
| "loss": 0.9367, |
| "step": 301000 |
| }, |
| { |
| "epoch": 97.31738849385908, |
| "grad_norm": 2.1563384532928467, |
| "learning_rate": 0.001, |
| "loss": 0.9216, |
| "step": 301100 |
| }, |
| { |
| "epoch": 97.34970911441499, |
| "grad_norm": 2.309046983718872, |
| "learning_rate": 0.001, |
| "loss": 0.9338, |
| "step": 301200 |
| }, |
| { |
| "epoch": 97.38202973497091, |
| "grad_norm": 2.024261236190796, |
| "learning_rate": 0.001, |
| "loss": 0.9365, |
| "step": 301300 |
| }, |
| { |
| "epoch": 97.41435035552682, |
| "grad_norm": 2.32181978225708, |
| "learning_rate": 0.001, |
| "loss": 0.9496, |
| "step": 301400 |
| }, |
| { |
| "epoch": 97.44667097608274, |
| "grad_norm": 1.8799872398376465, |
| "learning_rate": 0.001, |
| "loss": 0.9549, |
| "step": 301500 |
| }, |
| { |
| "epoch": 97.47899159663865, |
| "grad_norm": 2.4056990146636963, |
| "learning_rate": 0.001, |
| "loss": 0.954, |
| "step": 301600 |
| }, |
| { |
| "epoch": 97.51131221719457, |
| "grad_norm": 2.1647398471832275, |
| "learning_rate": 0.001, |
| "loss": 0.9651, |
| "step": 301700 |
| }, |
| { |
| "epoch": 97.54363283775048, |
| "grad_norm": 2.175403356552124, |
| "learning_rate": 0.001, |
| "loss": 0.9534, |
| "step": 301800 |
| }, |
| { |
| "epoch": 97.5759534583064, |
| "grad_norm": 1.9020843505859375, |
| "learning_rate": 0.001, |
| "loss": 0.9618, |
| "step": 301900 |
| }, |
| { |
| "epoch": 97.60827407886231, |
| "grad_norm": 2.246063232421875, |
| "learning_rate": 0.001, |
| "loss": 0.9539, |
| "step": 302000 |
| }, |
| { |
| "epoch": 97.64059469941823, |
| "grad_norm": 1.7729310989379883, |
| "learning_rate": 0.001, |
| "loss": 0.9634, |
| "step": 302100 |
| }, |
| { |
| "epoch": 97.67291531997414, |
| "grad_norm": 1.6846141815185547, |
| "learning_rate": 0.001, |
| "loss": 0.9614, |
| "step": 302200 |
| }, |
| { |
| "epoch": 97.70523594053006, |
| "grad_norm": 1.7244802713394165, |
| "learning_rate": 0.001, |
| "loss": 0.9859, |
| "step": 302300 |
| }, |
| { |
| "epoch": 97.73755656108597, |
| "grad_norm": 2.1463193893432617, |
| "learning_rate": 0.001, |
| "loss": 0.9803, |
| "step": 302400 |
| }, |
| { |
| "epoch": 97.76987718164189, |
| "grad_norm": 1.5155985355377197, |
| "learning_rate": 0.001, |
| "loss": 0.9796, |
| "step": 302500 |
| }, |
| { |
| "epoch": 97.8021978021978, |
| "grad_norm": 2.3664512634277344, |
| "learning_rate": 0.001, |
| "loss": 0.9756, |
| "step": 302600 |
| }, |
| { |
| "epoch": 97.83451842275372, |
| "grad_norm": 2.193302631378174, |
| "learning_rate": 0.001, |
| "loss": 0.9786, |
| "step": 302700 |
| }, |
| { |
| "epoch": 97.86683904330962, |
| "grad_norm": 1.757538080215454, |
| "learning_rate": 0.001, |
| "loss": 0.9773, |
| "step": 302800 |
| }, |
| { |
| "epoch": 97.89915966386555, |
| "grad_norm": 2.0512661933898926, |
| "learning_rate": 0.001, |
| "loss": 0.9928, |
| "step": 302900 |
| }, |
| { |
| "epoch": 97.93148028442145, |
| "grad_norm": 1.9938185214996338, |
| "learning_rate": 0.001, |
| "loss": 0.9976, |
| "step": 303000 |
| }, |
| { |
| "epoch": 97.96380090497738, |
| "grad_norm": 2.04455304145813, |
| "learning_rate": 0.001, |
| "loss": 0.9859, |
| "step": 303100 |
| }, |
| { |
| "epoch": 97.99612152553328, |
| "grad_norm": 2.0681285858154297, |
| "learning_rate": 0.001, |
| "loss": 0.994, |
| "step": 303200 |
| }, |
| { |
| "epoch": 98.0284421460892, |
| "grad_norm": 1.7298678159713745, |
| "learning_rate": 0.001, |
| "loss": 0.9041, |
| "step": 303300 |
| }, |
| { |
| "epoch": 98.06076276664513, |
| "grad_norm": 1.8354662656784058, |
| "learning_rate": 0.001, |
| "loss": 0.888, |
| "step": 303400 |
| }, |
| { |
| "epoch": 98.09308338720103, |
| "grad_norm": 1.9386900663375854, |
| "learning_rate": 0.001, |
| "loss": 0.8944, |
| "step": 303500 |
| }, |
| { |
| "epoch": 98.12540400775696, |
| "grad_norm": 2.130673408508301, |
| "learning_rate": 0.001, |
| "loss": 0.9052, |
| "step": 303600 |
| }, |
| { |
| "epoch": 98.15772462831286, |
| "grad_norm": 1.854596734046936, |
| "learning_rate": 0.001, |
| "loss": 0.9047, |
| "step": 303700 |
| }, |
| { |
| "epoch": 98.19004524886878, |
| "grad_norm": 1.988502860069275, |
| "learning_rate": 0.001, |
| "loss": 0.9114, |
| "step": 303800 |
| }, |
| { |
| "epoch": 98.22236586942469, |
| "grad_norm": 1.8438735008239746, |
| "learning_rate": 0.001, |
| "loss": 0.905, |
| "step": 303900 |
| }, |
| { |
| "epoch": 98.25468648998061, |
| "grad_norm": 1.9197919368743896, |
| "learning_rate": 0.001, |
| "loss": 0.9029, |
| "step": 304000 |
| }, |
| { |
| "epoch": 98.28700711053652, |
| "grad_norm": 1.8311904668807983, |
| "learning_rate": 0.001, |
| "loss": 0.9201, |
| "step": 304100 |
| }, |
| { |
| "epoch": 98.31932773109244, |
| "grad_norm": 2.5999064445495605, |
| "learning_rate": 0.001, |
| "loss": 0.93, |
| "step": 304200 |
| }, |
| { |
| "epoch": 98.35164835164835, |
| "grad_norm": 2.2026302814483643, |
| "learning_rate": 0.001, |
| "loss": 0.9327, |
| "step": 304300 |
| }, |
| { |
| "epoch": 98.38396897220427, |
| "grad_norm": 2.3874950408935547, |
| "learning_rate": 0.001, |
| "loss": 0.9415, |
| "step": 304400 |
| }, |
| { |
| "epoch": 98.41628959276018, |
| "grad_norm": 1.6854950189590454, |
| "learning_rate": 0.001, |
| "loss": 0.9401, |
| "step": 304500 |
| }, |
| { |
| "epoch": 98.4486102133161, |
| "grad_norm": 1.915907621383667, |
| "learning_rate": 0.001, |
| "loss": 0.9611, |
| "step": 304600 |
| }, |
| { |
| "epoch": 98.48093083387201, |
| "grad_norm": 2.3232321739196777, |
| "learning_rate": 0.001, |
| "loss": 0.9398, |
| "step": 304700 |
| }, |
| { |
| "epoch": 98.51325145442793, |
| "grad_norm": 1.8774093389511108, |
| "learning_rate": 0.001, |
| "loss": 0.9582, |
| "step": 304800 |
| }, |
| { |
| "epoch": 98.54557207498384, |
| "grad_norm": 2.2585456371307373, |
| "learning_rate": 0.001, |
| "loss": 0.9513, |
| "step": 304900 |
| }, |
| { |
| "epoch": 98.57789269553976, |
| "grad_norm": 1.8954609632492065, |
| "learning_rate": 0.001, |
| "loss": 0.9578, |
| "step": 305000 |
| }, |
| { |
| "epoch": 98.61021331609567, |
| "grad_norm": 2.1864609718322754, |
| "learning_rate": 0.001, |
| "loss": 0.9581, |
| "step": 305100 |
| }, |
| { |
| "epoch": 98.64253393665159, |
| "grad_norm": 2.2927186489105225, |
| "learning_rate": 0.001, |
| "loss": 0.956, |
| "step": 305200 |
| }, |
| { |
| "epoch": 98.6748545572075, |
| "grad_norm": 2.110893964767456, |
| "learning_rate": 0.001, |
| "loss": 0.9598, |
| "step": 305300 |
| }, |
| { |
| "epoch": 98.70717517776342, |
| "grad_norm": 1.7718578577041626, |
| "learning_rate": 0.001, |
| "loss": 0.95, |
| "step": 305400 |
| }, |
| { |
| "epoch": 98.73949579831933, |
| "grad_norm": 1.7689002752304077, |
| "learning_rate": 0.001, |
| "loss": 0.9672, |
| "step": 305500 |
| }, |
| { |
| "epoch": 98.77181641887525, |
| "grad_norm": 2.084601879119873, |
| "learning_rate": 0.001, |
| "loss": 0.9858, |
| "step": 305600 |
| }, |
| { |
| "epoch": 98.80413703943115, |
| "grad_norm": 1.817209243774414, |
| "learning_rate": 0.001, |
| "loss": 0.9709, |
| "step": 305700 |
| }, |
| { |
| "epoch": 98.83645765998708, |
| "grad_norm": 2.194476842880249, |
| "learning_rate": 0.001, |
| "loss": 0.9676, |
| "step": 305800 |
| }, |
| { |
| "epoch": 98.86877828054298, |
| "grad_norm": 2.089932680130005, |
| "learning_rate": 0.001, |
| "loss": 0.9804, |
| "step": 305900 |
| }, |
| { |
| "epoch": 98.9010989010989, |
| "grad_norm": 1.8631789684295654, |
| "learning_rate": 0.001, |
| "loss": 0.9771, |
| "step": 306000 |
| }, |
| { |
| "epoch": 98.93341952165481, |
| "grad_norm": 2.2455215454101562, |
| "learning_rate": 0.001, |
| "loss": 0.9932, |
| "step": 306100 |
| }, |
| { |
| "epoch": 98.96574014221073, |
| "grad_norm": 1.965256690979004, |
| "learning_rate": 0.001, |
| "loss": 0.9878, |
| "step": 306200 |
| }, |
| { |
| "epoch": 98.99806076276664, |
| "grad_norm": 2.0805888175964355, |
| "learning_rate": 0.001, |
| "loss": 0.9783, |
| "step": 306300 |
| }, |
| { |
| "epoch": 99.03038138332256, |
| "grad_norm": 1.8865318298339844, |
| "learning_rate": 0.001, |
| "loss": 0.8933, |
| "step": 306400 |
| }, |
| { |
| "epoch": 99.06270200387847, |
| "grad_norm": 1.988250970840454, |
| "learning_rate": 0.001, |
| "loss": 0.8875, |
| "step": 306500 |
| }, |
| { |
| "epoch": 99.09502262443439, |
| "grad_norm": 1.8929754495620728, |
| "learning_rate": 0.001, |
| "loss": 0.892, |
| "step": 306600 |
| }, |
| { |
| "epoch": 99.1273432449903, |
| "grad_norm": 2.4540634155273438, |
| "learning_rate": 0.001, |
| "loss": 0.886, |
| "step": 306700 |
| }, |
| { |
| "epoch": 99.15966386554622, |
| "grad_norm": 2.009747266769409, |
| "learning_rate": 0.001, |
| "loss": 0.904, |
| "step": 306800 |
| }, |
| { |
| "epoch": 99.19198448610213, |
| "grad_norm": 1.8114582300186157, |
| "learning_rate": 0.001, |
| "loss": 0.9166, |
| "step": 306900 |
| }, |
| { |
| "epoch": 99.22430510665805, |
| "grad_norm": 2.278742790222168, |
| "learning_rate": 0.001, |
| "loss": 0.9117, |
| "step": 307000 |
| }, |
| { |
| "epoch": 99.25662572721396, |
| "grad_norm": 2.1780436038970947, |
| "learning_rate": 0.001, |
| "loss": 0.9065, |
| "step": 307100 |
| }, |
| { |
| "epoch": 99.28894634776988, |
| "grad_norm": 2.099867343902588, |
| "learning_rate": 0.001, |
| "loss": 0.9221, |
| "step": 307200 |
| }, |
| { |
| "epoch": 99.32126696832579, |
| "grad_norm": 2.2217254638671875, |
| "learning_rate": 0.001, |
| "loss": 0.9354, |
| "step": 307300 |
| }, |
| { |
| "epoch": 99.35358758888171, |
| "grad_norm": 1.9928747415542603, |
| "learning_rate": 0.001, |
| "loss": 0.9202, |
| "step": 307400 |
| }, |
| { |
| "epoch": 99.38590820943762, |
| "grad_norm": 1.8501205444335938, |
| "learning_rate": 0.001, |
| "loss": 0.9434, |
| "step": 307500 |
| }, |
| { |
| "epoch": 99.41822882999354, |
| "grad_norm": 2.4966423511505127, |
| "learning_rate": 0.001, |
| "loss": 0.9282, |
| "step": 307600 |
| }, |
| { |
| "epoch": 99.45054945054945, |
| "grad_norm": 2.4920759201049805, |
| "learning_rate": 0.001, |
| "loss": 0.9306, |
| "step": 307700 |
| }, |
| { |
| "epoch": 99.48287007110537, |
| "grad_norm": 2.0279624462127686, |
| "learning_rate": 0.001, |
| "loss": 0.9554, |
| "step": 307800 |
| }, |
| { |
| "epoch": 99.51519069166127, |
| "grad_norm": 2.1771886348724365, |
| "learning_rate": 0.001, |
| "loss": 0.9452, |
| "step": 307900 |
| }, |
| { |
| "epoch": 99.5475113122172, |
| "grad_norm": 2.0133235454559326, |
| "learning_rate": 0.001, |
| "loss": 0.9396, |
| "step": 308000 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 309400, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 100, |
| "save_steps": 2000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.247081673967452e+17, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|