| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.5855562784645413, |
| "eval_steps": 500, |
| "global_step": 1200, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.00048796356538711777, |
| "grad_norm": 0.4446243345737457, |
| "learning_rate": 0.0001, |
| "loss": 1.8998, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0009759271307742355, |
| "grad_norm": 0.443472683429718, |
| "learning_rate": 0.0001, |
| "loss": 2.146, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0014638906961613532, |
| "grad_norm": 0.246729776263237, |
| "learning_rate": 0.0001, |
| "loss": 1.8931, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.001951854261548471, |
| "grad_norm": 0.3018186688423157, |
| "learning_rate": 0.0001, |
| "loss": 1.984, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.002439817826935589, |
| "grad_norm": 0.2850761413574219, |
| "learning_rate": 0.0001, |
| "loss": 1.863, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.0029277813923227064, |
| "grad_norm": 0.23705212771892548, |
| "learning_rate": 0.0001, |
| "loss": 1.8384, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.0034157449577098243, |
| "grad_norm": 0.24392390251159668, |
| "learning_rate": 0.0001, |
| "loss": 1.8743, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.003903708523096942, |
| "grad_norm": 0.24215014278888702, |
| "learning_rate": 0.0001, |
| "loss": 1.8048, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.00439167208848406, |
| "grad_norm": 0.22235405445098877, |
| "learning_rate": 0.0001, |
| "loss": 1.8098, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.004879635653871178, |
| "grad_norm": 0.1880388706922531, |
| "learning_rate": 0.0001, |
| "loss": 1.7519, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.005367599219258295, |
| "grad_norm": 0.2197292149066925, |
| "learning_rate": 0.0001, |
| "loss": 1.905, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.005855562784645413, |
| "grad_norm": 0.20583945512771606, |
| "learning_rate": 0.0001, |
| "loss": 1.8143, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.006343526350032531, |
| "grad_norm": 0.20737111568450928, |
| "learning_rate": 0.0001, |
| "loss": 1.8505, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.0068314899154196486, |
| "grad_norm": 0.19384053349494934, |
| "learning_rate": 0.0001, |
| "loss": 1.7528, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.007319453480806766, |
| "grad_norm": 0.23753000795841217, |
| "learning_rate": 0.0001, |
| "loss": 1.7206, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.007807417046193884, |
| "grad_norm": 0.1946115642786026, |
| "learning_rate": 0.0001, |
| "loss": 1.7562, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.008295380611581003, |
| "grad_norm": 0.18985839188098907, |
| "learning_rate": 0.0001, |
| "loss": 1.6665, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.00878334417696812, |
| "grad_norm": 0.20499983429908752, |
| "learning_rate": 0.0001, |
| "loss": 1.9491, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.009271307742355238, |
| "grad_norm": 0.1874532699584961, |
| "learning_rate": 0.0001, |
| "loss": 1.7975, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.009759271307742356, |
| "grad_norm": 0.18048429489135742, |
| "learning_rate": 0.0001, |
| "loss": 1.7799, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.010247234873129472, |
| "grad_norm": 0.1777779906988144, |
| "learning_rate": 0.0001, |
| "loss": 1.7816, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.01073519843851659, |
| "grad_norm": 0.17349651455879211, |
| "learning_rate": 0.0001, |
| "loss": 1.7431, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.011223162003903709, |
| "grad_norm": 0.18479375541210175, |
| "learning_rate": 0.0001, |
| "loss": 1.903, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.011711125569290826, |
| "grad_norm": 0.1918632984161377, |
| "learning_rate": 0.0001, |
| "loss": 1.7957, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.012199089134677944, |
| "grad_norm": 0.18239013850688934, |
| "learning_rate": 0.0001, |
| "loss": 1.8039, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.012687052700065062, |
| "grad_norm": 0.17392802238464355, |
| "learning_rate": 0.0001, |
| "loss": 1.7022, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.013175016265452179, |
| "grad_norm": 0.1769259124994278, |
| "learning_rate": 0.0001, |
| "loss": 1.7131, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.013662979830839297, |
| "grad_norm": 0.17371872067451477, |
| "learning_rate": 0.0001, |
| "loss": 1.7657, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.014150943396226415, |
| "grad_norm": 0.19897091388702393, |
| "learning_rate": 0.0001, |
| "loss": 1.8791, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.014638906961613532, |
| "grad_norm": 0.17471033334732056, |
| "learning_rate": 0.0001, |
| "loss": 1.8765, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.01512687052700065, |
| "grad_norm": 0.17650161683559418, |
| "learning_rate": 0.0001, |
| "loss": 1.8181, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.015614834092387769, |
| "grad_norm": 0.18008925020694733, |
| "learning_rate": 0.0001, |
| "loss": 1.8138, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.016102797657774885, |
| "grad_norm": 0.18406356871128082, |
| "learning_rate": 0.0001, |
| "loss": 1.907, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.016590761223162005, |
| "grad_norm": 0.18869489431381226, |
| "learning_rate": 0.0001, |
| "loss": 1.9043, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.017078724788549122, |
| "grad_norm": 0.18416965007781982, |
| "learning_rate": 0.0001, |
| "loss": 1.7695, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.01756668835393624, |
| "grad_norm": 0.18121257424354553, |
| "learning_rate": 0.0001, |
| "loss": 1.8342, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.01805465191932336, |
| "grad_norm": 0.18426860868930817, |
| "learning_rate": 0.0001, |
| "loss": 1.818, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.018542615484710475, |
| "grad_norm": 0.18800823390483856, |
| "learning_rate": 0.0001, |
| "loss": 1.8019, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.01903057905009759, |
| "grad_norm": 0.18787121772766113, |
| "learning_rate": 0.0001, |
| "loss": 1.8052, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.01951854261548471, |
| "grad_norm": 0.18341200053691864, |
| "learning_rate": 0.0001, |
| "loss": 1.7288, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.020006506180871828, |
| "grad_norm": 0.18460282683372498, |
| "learning_rate": 0.0001, |
| "loss": 1.9984, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.020494469746258945, |
| "grad_norm": 0.17212441563606262, |
| "learning_rate": 0.0001, |
| "loss": 1.7928, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.020982433311646065, |
| "grad_norm": 0.18548350036144257, |
| "learning_rate": 0.0001, |
| "loss": 1.9719, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.02147039687703318, |
| "grad_norm": 0.18035617470741272, |
| "learning_rate": 0.0001, |
| "loss": 1.9265, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.021958360442420298, |
| "grad_norm": 0.16300201416015625, |
| "learning_rate": 0.0001, |
| "loss": 1.6821, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.022446324007807418, |
| "grad_norm": 0.1797887086868286, |
| "learning_rate": 0.0001, |
| "loss": 1.8276, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.022934287573194535, |
| "grad_norm": 0.18614032864570618, |
| "learning_rate": 0.0001, |
| "loss": 1.769, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.02342225113858165, |
| "grad_norm": 0.18762686848640442, |
| "learning_rate": 0.0001, |
| "loss": 1.7716, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.02391021470396877, |
| "grad_norm": 0.1779824048280716, |
| "learning_rate": 0.0001, |
| "loss": 1.7047, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.024398178269355888, |
| "grad_norm": 0.1713806688785553, |
| "learning_rate": 0.0001, |
| "loss": 1.7085, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.024886141834743004, |
| "grad_norm": 0.17888174951076508, |
| "learning_rate": 0.0001, |
| "loss": 1.8539, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.025374105400130124, |
| "grad_norm": 0.18366138637065887, |
| "learning_rate": 0.0001, |
| "loss": 1.7948, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.02586206896551724, |
| "grad_norm": 0.1684766262769699, |
| "learning_rate": 0.0001, |
| "loss": 1.7752, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.026350032530904358, |
| "grad_norm": 0.18316026031970978, |
| "learning_rate": 0.0001, |
| "loss": 1.8153, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.026837996096291478, |
| "grad_norm": 0.1712900847196579, |
| "learning_rate": 0.0001, |
| "loss": 1.8209, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.027325959661678594, |
| "grad_norm": 0.17653001844882965, |
| "learning_rate": 0.0001, |
| "loss": 1.7142, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.02781392322706571, |
| "grad_norm": 0.17115001380443573, |
| "learning_rate": 0.0001, |
| "loss": 1.7014, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.02830188679245283, |
| "grad_norm": 0.19934123754501343, |
| "learning_rate": 0.0001, |
| "loss": 1.8184, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.028789850357839947, |
| "grad_norm": 0.20567697286605835, |
| "learning_rate": 0.0001, |
| "loss": 1.9174, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.029277813923227064, |
| "grad_norm": 0.17345917224884033, |
| "learning_rate": 0.0001, |
| "loss": 1.7448, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.029765777488614184, |
| "grad_norm": 0.24353067576885223, |
| "learning_rate": 0.0001, |
| "loss": 1.7974, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.0302537410540013, |
| "grad_norm": 0.18949398398399353, |
| "learning_rate": 0.0001, |
| "loss": 1.8231, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.03074170461938842, |
| "grad_norm": 0.22029712796211243, |
| "learning_rate": 0.0001, |
| "loss": 1.8535, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.031229668184775537, |
| "grad_norm": 0.16962048411369324, |
| "learning_rate": 0.0001, |
| "loss": 1.7686, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.03171763175016266, |
| "grad_norm": 0.19039765000343323, |
| "learning_rate": 0.0001, |
| "loss": 1.8303, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.03220559531554977, |
| "grad_norm": 0.20166978240013123, |
| "learning_rate": 0.0001, |
| "loss": 1.768, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.03269355888093689, |
| "grad_norm": 0.173394113779068, |
| "learning_rate": 0.0001, |
| "loss": 1.8253, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.03318152244632401, |
| "grad_norm": 0.19260728359222412, |
| "learning_rate": 0.0001, |
| "loss": 1.7589, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.033669486011711124, |
| "grad_norm": 0.19539032876491547, |
| "learning_rate": 0.0001, |
| "loss": 1.749, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.034157449577098244, |
| "grad_norm": 0.16770870983600616, |
| "learning_rate": 0.0001, |
| "loss": 1.7132, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.034645413142485364, |
| "grad_norm": 0.19755178689956665, |
| "learning_rate": 0.0001, |
| "loss": 1.8323, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.03513337670787248, |
| "grad_norm": 0.18038292229175568, |
| "learning_rate": 0.0001, |
| "loss": 1.7599, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.0356213402732596, |
| "grad_norm": 0.17995433509349823, |
| "learning_rate": 0.0001, |
| "loss": 1.9183, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.03610930383864672, |
| "grad_norm": 0.19222807884216309, |
| "learning_rate": 0.0001, |
| "loss": 1.8642, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.03659726740403383, |
| "grad_norm": 0.16965682804584503, |
| "learning_rate": 0.0001, |
| "loss": 1.7271, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.03708523096942095, |
| "grad_norm": 0.17662999033927917, |
| "learning_rate": 0.0001, |
| "loss": 1.8263, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.03757319453480807, |
| "grad_norm": 0.1699201613664627, |
| "learning_rate": 0.0001, |
| "loss": 1.6818, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.03806115810019518, |
| "grad_norm": 0.17309829592704773, |
| "learning_rate": 0.0001, |
| "loss": 1.7424, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.0385491216655823, |
| "grad_norm": 0.18537020683288574, |
| "learning_rate": 0.0001, |
| "loss": 1.7986, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.03903708523096942, |
| "grad_norm": 0.1709861010313034, |
| "learning_rate": 0.0001, |
| "loss": 1.6091, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.039525048796356536, |
| "grad_norm": 0.17050296068191528, |
| "learning_rate": 0.0001, |
| "loss": 1.6904, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.040013012361743656, |
| "grad_norm": 0.17640157043933868, |
| "learning_rate": 0.0001, |
| "loss": 1.7087, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.040500975927130776, |
| "grad_norm": 0.1919400542974472, |
| "learning_rate": 0.0001, |
| "loss": 1.8223, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.04098893949251789, |
| "grad_norm": 0.19427765905857086, |
| "learning_rate": 0.0001, |
| "loss": 1.7443, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.04147690305790501, |
| "grad_norm": 0.19496281445026398, |
| "learning_rate": 0.0001, |
| "loss": 1.8336, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.04196486662329213, |
| "grad_norm": 0.18101565539836884, |
| "learning_rate": 0.0001, |
| "loss": 1.8422, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.04245283018867924, |
| "grad_norm": 0.19941496849060059, |
| "learning_rate": 0.0001, |
| "loss": 1.7168, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.04294079375406636, |
| "grad_norm": 0.1963973492383957, |
| "learning_rate": 0.0001, |
| "loss": 1.7558, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.04342875731945348, |
| "grad_norm": 0.17694450914859772, |
| "learning_rate": 0.0001, |
| "loss": 1.6953, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.043916720884840596, |
| "grad_norm": 0.19362711906433105, |
| "learning_rate": 0.0001, |
| "loss": 1.8165, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.044404684450227716, |
| "grad_norm": 0.1736024022102356, |
| "learning_rate": 0.0001, |
| "loss": 1.777, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.044892648015614836, |
| "grad_norm": 0.17649488151073456, |
| "learning_rate": 0.0001, |
| "loss": 1.7507, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.04538061158100195, |
| "grad_norm": 0.2002265304327011, |
| "learning_rate": 0.0001, |
| "loss": 1.8796, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.04586857514638907, |
| "grad_norm": 0.1667991429567337, |
| "learning_rate": 0.0001, |
| "loss": 1.7051, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.04635653871177619, |
| "grad_norm": 0.1868171989917755, |
| "learning_rate": 0.0001, |
| "loss": 1.747, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.0468445022771633, |
| "grad_norm": 0.18312174081802368, |
| "learning_rate": 0.0001, |
| "loss": 1.7835, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.04733246584255042, |
| "grad_norm": 0.1762659102678299, |
| "learning_rate": 0.0001, |
| "loss": 1.6517, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.04782042940793754, |
| "grad_norm": 0.19766494631767273, |
| "learning_rate": 0.0001, |
| "loss": 1.826, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.048308392973324656, |
| "grad_norm": 0.17331789433956146, |
| "learning_rate": 0.0001, |
| "loss": 1.7506, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.048796356538711776, |
| "grad_norm": 0.16851170361042023, |
| "learning_rate": 0.0001, |
| "loss": 1.744, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.049284320104098896, |
| "grad_norm": 0.17572622001171112, |
| "learning_rate": 0.0001, |
| "loss": 1.6986, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.04977228366948601, |
| "grad_norm": 0.1850849688053131, |
| "learning_rate": 0.0001, |
| "loss": 1.7895, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.05026024723487313, |
| "grad_norm": 0.18450362980365753, |
| "learning_rate": 0.0001, |
| "loss": 1.8234, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.05074821080026025, |
| "grad_norm": 0.1832476705312729, |
| "learning_rate": 0.0001, |
| "loss": 1.7986, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.05123617436564736, |
| "grad_norm": 0.1809314638376236, |
| "learning_rate": 0.0001, |
| "loss": 1.7923, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.05172413793103448, |
| "grad_norm": 0.17974039912223816, |
| "learning_rate": 0.0001, |
| "loss": 1.7095, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.0522121014964216, |
| "grad_norm": 0.16436076164245605, |
| "learning_rate": 0.0001, |
| "loss": 1.6873, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.052700065061808715, |
| "grad_norm": 0.16344858705997467, |
| "learning_rate": 0.0001, |
| "loss": 1.6991, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.053188028627195835, |
| "grad_norm": 0.17950277030467987, |
| "learning_rate": 0.0001, |
| "loss": 1.8591, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.053675992192582955, |
| "grad_norm": 0.18337760865688324, |
| "learning_rate": 0.0001, |
| "loss": 1.784, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.05416395575797007, |
| "grad_norm": 0.1895488053560257, |
| "learning_rate": 0.0001, |
| "loss": 1.7853, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.05465191932335719, |
| "grad_norm": 0.17522425949573517, |
| "learning_rate": 0.0001, |
| "loss": 1.7127, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.05513988288874431, |
| "grad_norm": 0.17943814396858215, |
| "learning_rate": 0.0001, |
| "loss": 1.755, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.05562784645413142, |
| "grad_norm": 0.1815492808818817, |
| "learning_rate": 0.0001, |
| "loss": 1.7687, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.05611581001951854, |
| "grad_norm": 0.16954658925533295, |
| "learning_rate": 0.0001, |
| "loss": 1.7562, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.05660377358490566, |
| "grad_norm": 0.17870648205280304, |
| "learning_rate": 0.0001, |
| "loss": 1.841, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.057091737150292775, |
| "grad_norm": 0.17044954001903534, |
| "learning_rate": 0.0001, |
| "loss": 1.7118, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.057579700715679895, |
| "grad_norm": 0.17524173855781555, |
| "learning_rate": 0.0001, |
| "loss": 1.6045, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.058067664281067015, |
| "grad_norm": 0.17537613213062286, |
| "learning_rate": 0.0001, |
| "loss": 1.8018, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.05855562784645413, |
| "grad_norm": 0.17819495499134064, |
| "learning_rate": 0.0001, |
| "loss": 1.7723, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.05904359141184125, |
| "grad_norm": 0.17807795107364655, |
| "learning_rate": 0.0001, |
| "loss": 1.8558, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.05953155497722837, |
| "grad_norm": 0.1687198132276535, |
| "learning_rate": 0.0001, |
| "loss": 1.7673, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.06001951854261549, |
| "grad_norm": 0.17069241404533386, |
| "learning_rate": 0.0001, |
| "loss": 1.7561, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.0605074821080026, |
| "grad_norm": 0.1655956506729126, |
| "learning_rate": 0.0001, |
| "loss": 1.6607, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.06099544567338972, |
| "grad_norm": 0.1846679002046585, |
| "learning_rate": 0.0001, |
| "loss": 1.8676, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.06148340923877684, |
| "grad_norm": 0.17344145476818085, |
| "learning_rate": 0.0001, |
| "loss": 1.7427, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.061971372804163954, |
| "grad_norm": 0.17264996469020844, |
| "learning_rate": 0.0001, |
| "loss": 1.7279, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.062459336369551074, |
| "grad_norm": 0.18628281354904175, |
| "learning_rate": 0.0001, |
| "loss": 1.6708, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.0629472999349382, |
| "grad_norm": 0.178174689412117, |
| "learning_rate": 0.0001, |
| "loss": 1.7931, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.06343526350032531, |
| "grad_norm": 0.17690585553646088, |
| "learning_rate": 0.0001, |
| "loss": 1.7647, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.06392322706571242, |
| "grad_norm": 0.18117444217205048, |
| "learning_rate": 0.0001, |
| "loss": 1.7376, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.06441119063109954, |
| "grad_norm": 0.17523089051246643, |
| "learning_rate": 0.0001, |
| "loss": 1.8403, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.06489915419648666, |
| "grad_norm": 0.16988244652748108, |
| "learning_rate": 0.0001, |
| "loss": 1.6958, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.06538711776187378, |
| "grad_norm": 0.1890041083097458, |
| "learning_rate": 0.0001, |
| "loss": 1.7388, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.0658750813272609, |
| "grad_norm": 0.1703094244003296, |
| "learning_rate": 0.0001, |
| "loss": 1.6424, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.06636304489264802, |
| "grad_norm": 0.17852698266506195, |
| "learning_rate": 0.0001, |
| "loss": 1.7786, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.06685100845803513, |
| "grad_norm": 0.17648550868034363, |
| "learning_rate": 0.0001, |
| "loss": 1.7172, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.06733897202342225, |
| "grad_norm": 0.18284566700458527, |
| "learning_rate": 0.0001, |
| "loss": 1.7491, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.06782693558880937, |
| "grad_norm": 0.1686737835407257, |
| "learning_rate": 0.0001, |
| "loss": 1.7218, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.06831489915419649, |
| "grad_norm": 0.1741771250963211, |
| "learning_rate": 0.0001, |
| "loss": 1.7534, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.06880286271958361, |
| "grad_norm": 0.1778876781463623, |
| "learning_rate": 0.0001, |
| "loss": 1.7388, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.06929082628497073, |
| "grad_norm": 0.1860485076904297, |
| "learning_rate": 0.0001, |
| "loss": 1.8109, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.06977878985035783, |
| "grad_norm": 0.17966079711914062, |
| "learning_rate": 0.0001, |
| "loss": 1.7171, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.07026675341574495, |
| "grad_norm": 0.19341900944709778, |
| "learning_rate": 0.0001, |
| "loss": 1.7911, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.07075471698113207, |
| "grad_norm": 0.1968701183795929, |
| "learning_rate": 0.0001, |
| "loss": 1.858, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.0712426805465192, |
| "grad_norm": 0.17585061490535736, |
| "learning_rate": 0.0001, |
| "loss": 1.6731, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.07173064411190631, |
| "grad_norm": 0.17294664680957794, |
| "learning_rate": 0.0001, |
| "loss": 1.7284, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.07221860767729343, |
| "grad_norm": 0.18245872855186462, |
| "learning_rate": 0.0001, |
| "loss": 1.7595, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.07270657124268054, |
| "grad_norm": 0.16850219666957855, |
| "learning_rate": 0.0001, |
| "loss": 1.73, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.07319453480806766, |
| "grad_norm": 0.16891759634017944, |
| "learning_rate": 0.0001, |
| "loss": 1.7434, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.07368249837345478, |
| "grad_norm": 0.17363204061985016, |
| "learning_rate": 0.0001, |
| "loss": 1.738, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.0741704619388419, |
| "grad_norm": 0.16307075321674347, |
| "learning_rate": 0.0001, |
| "loss": 1.6285, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.07465842550422902, |
| "grad_norm": 0.1735111027956009, |
| "learning_rate": 0.0001, |
| "loss": 1.5711, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.07514638906961614, |
| "grad_norm": 0.18169796466827393, |
| "learning_rate": 0.0001, |
| "loss": 1.7395, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.07563435263500325, |
| "grad_norm": 0.16926725208759308, |
| "learning_rate": 0.0001, |
| "loss": 1.7534, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.07612231620039037, |
| "grad_norm": 0.19919319450855255, |
| "learning_rate": 0.0001, |
| "loss": 1.6975, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.07661027976577749, |
| "grad_norm": 0.19146177172660828, |
| "learning_rate": 0.0001, |
| "loss": 1.8272, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.0770982433311646, |
| "grad_norm": 0.19453231990337372, |
| "learning_rate": 0.0001, |
| "loss": 1.8229, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.07758620689655173, |
| "grad_norm": 0.20597495138645172, |
| "learning_rate": 0.0001, |
| "loss": 1.8567, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.07807417046193885, |
| "grad_norm": 0.18599432706832886, |
| "learning_rate": 0.0001, |
| "loss": 1.7587, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.07856213402732595, |
| "grad_norm": 0.21232162415981293, |
| "learning_rate": 0.0001, |
| "loss": 1.7179, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.07905009759271307, |
| "grad_norm": 0.1712743043899536, |
| "learning_rate": 0.0001, |
| "loss": 1.678, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.07953806115810019, |
| "grad_norm": 0.18402481079101562, |
| "learning_rate": 0.0001, |
| "loss": 1.7731, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.08002602472348731, |
| "grad_norm": 0.18908202648162842, |
| "learning_rate": 0.0001, |
| "loss": 1.841, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.08051398828887443, |
| "grad_norm": 0.17370882630348206, |
| "learning_rate": 0.0001, |
| "loss": 1.6713, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.08100195185426155, |
| "grad_norm": 0.1881919503211975, |
| "learning_rate": 0.0001, |
| "loss": 1.8285, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.08148991541964867, |
| "grad_norm": 0.1770172417163849, |
| "learning_rate": 0.0001, |
| "loss": 1.7292, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.08197787898503578, |
| "grad_norm": 0.1822032779455185, |
| "learning_rate": 0.0001, |
| "loss": 1.6977, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.0824658425504229, |
| "grad_norm": 0.19020989537239075, |
| "learning_rate": 0.0001, |
| "loss": 1.6964, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.08295380611581002, |
| "grad_norm": 0.17227591574192047, |
| "learning_rate": 0.0001, |
| "loss": 1.703, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.08344176968119714, |
| "grad_norm": 0.19228717684745789, |
| "learning_rate": 0.0001, |
| "loss": 1.7247, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.08392973324658426, |
| "grad_norm": 0.1909552961587906, |
| "learning_rate": 0.0001, |
| "loss": 1.7973, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.08441769681197138, |
| "grad_norm": 0.18189294636249542, |
| "learning_rate": 0.0001, |
| "loss": 1.7579, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.08490566037735849, |
| "grad_norm": 0.19137217104434967, |
| "learning_rate": 0.0001, |
| "loss": 1.7198, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.0853936239427456, |
| "grad_norm": 0.18612581491470337, |
| "learning_rate": 0.0001, |
| "loss": 1.7585, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.08588158750813273, |
| "grad_norm": 0.1759909838438034, |
| "learning_rate": 0.0001, |
| "loss": 1.6732, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.08636955107351985, |
| "grad_norm": 0.18982531130313873, |
| "learning_rate": 0.0001, |
| "loss": 1.8301, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.08685751463890697, |
| "grad_norm": 0.16662733256816864, |
| "learning_rate": 0.0001, |
| "loss": 1.6799, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.08734547820429409, |
| "grad_norm": 0.17956425249576569, |
| "learning_rate": 0.0001, |
| "loss": 1.671, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.08783344176968119, |
| "grad_norm": 0.18416181206703186, |
| "learning_rate": 0.0001, |
| "loss": 1.7922, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.08832140533506831, |
| "grad_norm": 0.16633754968643188, |
| "learning_rate": 0.0001, |
| "loss": 1.7096, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.08880936890045543, |
| "grad_norm": 0.19759412109851837, |
| "learning_rate": 0.0001, |
| "loss": 1.8402, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.08929733246584255, |
| "grad_norm": 0.17006362974643707, |
| "learning_rate": 0.0001, |
| "loss": 1.6922, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.08978529603122967, |
| "grad_norm": 0.16919896006584167, |
| "learning_rate": 0.0001, |
| "loss": 1.6657, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.09027325959661679, |
| "grad_norm": 0.20307502150535583, |
| "learning_rate": 0.0001, |
| "loss": 1.8772, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.0907612231620039, |
| "grad_norm": 0.17572732269763947, |
| "learning_rate": 0.0001, |
| "loss": 1.7666, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.09124918672739102, |
| "grad_norm": 0.17327293753623962, |
| "learning_rate": 0.0001, |
| "loss": 1.8206, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.09173715029277814, |
| "grad_norm": 0.18354281783103943, |
| "learning_rate": 0.0001, |
| "loss": 1.8013, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.09222511385816526, |
| "grad_norm": 0.16821032762527466, |
| "learning_rate": 0.0001, |
| "loss": 1.6893, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.09271307742355238, |
| "grad_norm": 0.17506404221057892, |
| "learning_rate": 0.0001, |
| "loss": 1.7657, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.0932010409889395, |
| "grad_norm": 0.1758153885602951, |
| "learning_rate": 0.0001, |
| "loss": 1.7095, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.0936890045543266, |
| "grad_norm": 0.18787072598934174, |
| "learning_rate": 0.0001, |
| "loss": 1.7312, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.09417696811971372, |
| "grad_norm": 0.1803017109632492, |
| "learning_rate": 0.0001, |
| "loss": 1.7521, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.09466493168510084, |
| "grad_norm": 0.18097610771656036, |
| "learning_rate": 0.0001, |
| "loss": 1.6861, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.09515289525048796, |
| "grad_norm": 0.1760302186012268, |
| "learning_rate": 0.0001, |
| "loss": 1.6703, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.09564085881587508, |
| "grad_norm": 0.17225316166877747, |
| "learning_rate": 0.0001, |
| "loss": 1.73, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.0961288223812622, |
| "grad_norm": 0.1856345683336258, |
| "learning_rate": 0.0001, |
| "loss": 1.6828, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.09661678594664931, |
| "grad_norm": 0.18595090508460999, |
| "learning_rate": 0.0001, |
| "loss": 1.7136, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.09710474951203643, |
| "grad_norm": 0.1780211329460144, |
| "learning_rate": 0.0001, |
| "loss": 1.8146, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.09759271307742355, |
| "grad_norm": 0.17781271040439606, |
| "learning_rate": 0.0001, |
| "loss": 1.6679, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.09808067664281067, |
| "grad_norm": 0.17124401032924652, |
| "learning_rate": 0.0001, |
| "loss": 1.7077, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.09856864020819779, |
| "grad_norm": 0.18443076312541962, |
| "learning_rate": 0.0001, |
| "loss": 1.8058, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.09905660377358491, |
| "grad_norm": 0.1758834272623062, |
| "learning_rate": 0.0001, |
| "loss": 1.81, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.09954456733897202, |
| "grad_norm": 0.17878177762031555, |
| "learning_rate": 0.0001, |
| "loss": 1.7515, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.10003253090435914, |
| "grad_norm": 0.18028298020362854, |
| "learning_rate": 0.0001, |
| "loss": 1.7733, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.10052049446974626, |
| "grad_norm": 0.17935384809970856, |
| "learning_rate": 0.0001, |
| "loss": 1.8011, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.10100845803513338, |
| "grad_norm": 0.19665150344371796, |
| "learning_rate": 0.0001, |
| "loss": 1.7667, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.1014964216005205, |
| "grad_norm": 0.16669659316539764, |
| "learning_rate": 0.0001, |
| "loss": 1.7046, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.10198438516590762, |
| "grad_norm": 0.17783086001873016, |
| "learning_rate": 0.0001, |
| "loss": 1.6424, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.10247234873129472, |
| "grad_norm": 0.1761302351951599, |
| "learning_rate": 0.0001, |
| "loss": 1.726, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.10296031229668184, |
| "grad_norm": 0.17417997121810913, |
| "learning_rate": 0.0001, |
| "loss": 1.7181, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.10344827586206896, |
| "grad_norm": 0.17537769675254822, |
| "learning_rate": 0.0001, |
| "loss": 1.6876, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.10393623942745608, |
| "grad_norm": 0.16924896836280823, |
| "learning_rate": 0.0001, |
| "loss": 1.768, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.1044242029928432, |
| "grad_norm": 0.20247921347618103, |
| "learning_rate": 0.0001, |
| "loss": 1.9159, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.10491216655823032, |
| "grad_norm": 0.16506172716617584, |
| "learning_rate": 0.0001, |
| "loss": 1.6491, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.10540013012361743, |
| "grad_norm": 0.17558075487613678, |
| "learning_rate": 0.0001, |
| "loss": 1.7169, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.10588809368900455, |
| "grad_norm": 0.17124514281749725, |
| "learning_rate": 0.0001, |
| "loss": 1.6931, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.10637605725439167, |
| "grad_norm": 0.16885621845722198, |
| "learning_rate": 0.0001, |
| "loss": 1.6946, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.10686402081977879, |
| "grad_norm": 0.17787247896194458, |
| "learning_rate": 0.0001, |
| "loss": 1.7477, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.10735198438516591, |
| "grad_norm": 0.17979493737220764, |
| "learning_rate": 0.0001, |
| "loss": 1.7215, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.10783994795055303, |
| "grad_norm": 0.187989741563797, |
| "learning_rate": 0.0001, |
| "loss": 1.6946, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.10832791151594014, |
| "grad_norm": 0.18497705459594727, |
| "learning_rate": 0.0001, |
| "loss": 1.7725, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.10881587508132726, |
| "grad_norm": 0.1895315796136856, |
| "learning_rate": 0.0001, |
| "loss": 1.7455, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.10930383864671438, |
| "grad_norm": 0.17897574603557587, |
| "learning_rate": 0.0001, |
| "loss": 1.7297, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.1097918022121015, |
| "grad_norm": 0.18770314753055573, |
| "learning_rate": 0.0001, |
| "loss": 1.7948, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.11027976577748862, |
| "grad_norm": 0.1812209188938141, |
| "learning_rate": 0.0001, |
| "loss": 1.8229, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.11076772934287574, |
| "grad_norm": 0.17030760645866394, |
| "learning_rate": 0.0001, |
| "loss": 1.6029, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.11125569290826284, |
| "grad_norm": 0.18503767251968384, |
| "learning_rate": 0.0001, |
| "loss": 1.644, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.11174365647364996, |
| "grad_norm": 0.17443233728408813, |
| "learning_rate": 0.0001, |
| "loss": 1.7024, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.11223162003903708, |
| "grad_norm": 0.1859743744134903, |
| "learning_rate": 0.0001, |
| "loss": 1.7859, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.1127195836044242, |
| "grad_norm": 0.1692182421684265, |
| "learning_rate": 0.0001, |
| "loss": 1.6996, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.11320754716981132, |
| "grad_norm": 0.16695043444633484, |
| "learning_rate": 0.0001, |
| "loss": 1.7185, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.11369551073519844, |
| "grad_norm": 0.18184787034988403, |
| "learning_rate": 0.0001, |
| "loss": 1.712, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.11418347430058555, |
| "grad_norm": 0.19107092916965485, |
| "learning_rate": 0.0001, |
| "loss": 1.8902, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.11467143786597267, |
| "grad_norm": 0.1724960058927536, |
| "learning_rate": 0.0001, |
| "loss": 1.7464, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.11515940143135979, |
| "grad_norm": 0.17673127353191376, |
| "learning_rate": 0.0001, |
| "loss": 1.785, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.11564736499674691, |
| "grad_norm": 0.18474438786506653, |
| "learning_rate": 0.0001, |
| "loss": 1.8143, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.11613532856213403, |
| "grad_norm": 0.17361678183078766, |
| "learning_rate": 0.0001, |
| "loss": 1.7558, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.11662329212752115, |
| "grad_norm": 0.17701455950737, |
| "learning_rate": 0.0001, |
| "loss": 1.5568, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.11711125569290826, |
| "grad_norm": 0.18372413516044617, |
| "learning_rate": 0.0001, |
| "loss": 1.7913, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.11759921925829538, |
| "grad_norm": 0.17780154943466187, |
| "learning_rate": 0.0001, |
| "loss": 1.668, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.1180871828236825, |
| "grad_norm": 0.17763271927833557, |
| "learning_rate": 0.0001, |
| "loss": 1.7006, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.11857514638906962, |
| "grad_norm": 0.17323441803455353, |
| "learning_rate": 0.0001, |
| "loss": 1.5985, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.11906310995445674, |
| "grad_norm": 0.1981297731399536, |
| "learning_rate": 0.0001, |
| "loss": 1.7938, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.11955107351984386, |
| "grad_norm": 0.1856129914522171, |
| "learning_rate": 0.0001, |
| "loss": 1.7469, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.12003903708523098, |
| "grad_norm": 0.17878711223602295, |
| "learning_rate": 0.0001, |
| "loss": 1.7156, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.12052700065061808, |
| "grad_norm": 0.18860337138175964, |
| "learning_rate": 0.0001, |
| "loss": 1.6269, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.1210149642160052, |
| "grad_norm": 0.17960023880004883, |
| "learning_rate": 0.0001, |
| "loss": 1.7484, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.12150292778139232, |
| "grad_norm": 0.21390804648399353, |
| "learning_rate": 0.0001, |
| "loss": 1.7815, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.12199089134677944, |
| "grad_norm": 0.18213345110416412, |
| "learning_rate": 0.0001, |
| "loss": 1.8368, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.12247885491216656, |
| "grad_norm": 0.19667306542396545, |
| "learning_rate": 0.0001, |
| "loss": 1.7547, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.12296681847755368, |
| "grad_norm": 0.18796378374099731, |
| "learning_rate": 0.0001, |
| "loss": 1.6831, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.12345478204294079, |
| "grad_norm": 0.18432985246181488, |
| "learning_rate": 0.0001, |
| "loss": 1.8219, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.12394274560832791, |
| "grad_norm": 0.19263121485710144, |
| "learning_rate": 0.0001, |
| "loss": 1.7033, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.12443070917371503, |
| "grad_norm": 0.19383201003074646, |
| "learning_rate": 0.0001, |
| "loss": 1.723, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.12491867273910215, |
| "grad_norm": 0.17456290125846863, |
| "learning_rate": 0.0001, |
| "loss": 1.7354, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.12540663630448926, |
| "grad_norm": 0.2073334902524948, |
| "learning_rate": 0.0001, |
| "loss": 1.7359, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.1258945998698764, |
| "grad_norm": 0.1819145232439041, |
| "learning_rate": 0.0001, |
| "loss": 1.661, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.1263825634352635, |
| "grad_norm": 0.18823570013046265, |
| "learning_rate": 0.0001, |
| "loss": 1.7093, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.12687052700065063, |
| "grad_norm": 0.2142113894224167, |
| "learning_rate": 0.0001, |
| "loss": 1.7367, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.12735849056603774, |
| "grad_norm": 0.17133839428424835, |
| "learning_rate": 0.0001, |
| "loss": 1.7257, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.12784645413142484, |
| "grad_norm": 0.20852066576480865, |
| "learning_rate": 0.0001, |
| "loss": 1.7453, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.12833441769681198, |
| "grad_norm": 0.19172458350658417, |
| "learning_rate": 0.0001, |
| "loss": 1.817, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.12882238126219908, |
| "grad_norm": 0.1805960088968277, |
| "learning_rate": 0.0001, |
| "loss": 1.7679, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.12931034482758622, |
| "grad_norm": 0.2055218368768692, |
| "learning_rate": 0.0001, |
| "loss": 1.7874, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.12979830839297332, |
| "grad_norm": 0.16831174492835999, |
| "learning_rate": 0.0001, |
| "loss": 1.6342, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.13028627195836046, |
| "grad_norm": 0.17563872039318085, |
| "learning_rate": 0.0001, |
| "loss": 1.7768, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.13077423552374756, |
| "grad_norm": 0.1891409158706665, |
| "learning_rate": 0.0001, |
| "loss": 1.7653, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.13126219908913467, |
| "grad_norm": 0.2160748541355133, |
| "learning_rate": 0.0001, |
| "loss": 1.6957, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.1317501626545218, |
| "grad_norm": 0.16802331805229187, |
| "learning_rate": 0.0001, |
| "loss": 1.6474, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.1322381262199089, |
| "grad_norm": 0.21498991549015045, |
| "learning_rate": 0.0001, |
| "loss": 1.7201, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.13272608978529604, |
| "grad_norm": 0.1941365897655487, |
| "learning_rate": 0.0001, |
| "loss": 1.7387, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.13321405335068315, |
| "grad_norm": 0.19020740687847137, |
| "learning_rate": 0.0001, |
| "loss": 1.6985, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.13370201691607025, |
| "grad_norm": 0.18627683818340302, |
| "learning_rate": 0.0001, |
| "loss": 1.7752, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.1341899804814574, |
| "grad_norm": 0.1916990429162979, |
| "learning_rate": 0.0001, |
| "loss": 1.7438, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.1346779440468445, |
| "grad_norm": 0.18649545311927795, |
| "learning_rate": 0.0001, |
| "loss": 1.663, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.13516590761223163, |
| "grad_norm": 0.17986956238746643, |
| "learning_rate": 0.0001, |
| "loss": 1.7905, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.13565387117761873, |
| "grad_norm": 0.18601469695568085, |
| "learning_rate": 0.0001, |
| "loss": 1.5608, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.13614183474300587, |
| "grad_norm": 0.19612380862236023, |
| "learning_rate": 0.0001, |
| "loss": 1.7317, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.13662979830839297, |
| "grad_norm": 0.17528840899467468, |
| "learning_rate": 0.0001, |
| "loss": 1.7114, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.13711776187378008, |
| "grad_norm": 0.196456179022789, |
| "learning_rate": 0.0001, |
| "loss": 1.674, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.13760572543916721, |
| "grad_norm": 0.18218737840652466, |
| "learning_rate": 0.0001, |
| "loss": 1.6971, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.13809368900455432, |
| "grad_norm": 0.18146923184394836, |
| "learning_rate": 0.0001, |
| "loss": 1.7656, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.13858165256994145, |
| "grad_norm": 0.17707045376300812, |
| "learning_rate": 0.0001, |
| "loss": 1.6322, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.13906961613532856, |
| "grad_norm": 0.18990135192871094, |
| "learning_rate": 0.0001, |
| "loss": 1.7412, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.13955757970071567, |
| "grad_norm": 0.17993967235088348, |
| "learning_rate": 0.0001, |
| "loss": 1.6734, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.1400455432661028, |
| "grad_norm": 0.20445284247398376, |
| "learning_rate": 0.0001, |
| "loss": 1.9164, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.1405335068314899, |
| "grad_norm": 0.18881991505622864, |
| "learning_rate": 0.0001, |
| "loss": 1.8395, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.14102147039687704, |
| "grad_norm": 0.17268231511116028, |
| "learning_rate": 0.0001, |
| "loss": 1.6494, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.14150943396226415, |
| "grad_norm": 0.17375007271766663, |
| "learning_rate": 0.0001, |
| "loss": 1.6968, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.14199739752765128, |
| "grad_norm": 0.17844517529010773, |
| "learning_rate": 0.0001, |
| "loss": 1.8686, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.1424853610930384, |
| "grad_norm": 0.18538935482501984, |
| "learning_rate": 0.0001, |
| "loss": 1.8035, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.1429733246584255, |
| "grad_norm": 0.18314018845558167, |
| "learning_rate": 0.0001, |
| "loss": 1.8051, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.14346128822381263, |
| "grad_norm": 0.18008261919021606, |
| "learning_rate": 0.0001, |
| "loss": 1.7992, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.14394925178919973, |
| "grad_norm": 0.19243541359901428, |
| "learning_rate": 0.0001, |
| "loss": 1.7394, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.14443721535458687, |
| "grad_norm": 0.18523713946342468, |
| "learning_rate": 0.0001, |
| "loss": 1.7845, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.14492517891997397, |
| "grad_norm": 0.1781051605939865, |
| "learning_rate": 0.0001, |
| "loss": 1.6748, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.14541314248536108, |
| "grad_norm": 0.18994836509227753, |
| "learning_rate": 0.0001, |
| "loss": 1.704, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.1459011060507482, |
| "grad_norm": 0.17285694181919098, |
| "learning_rate": 0.0001, |
| "loss": 1.7832, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.14638906961613532, |
| "grad_norm": 0.20339974761009216, |
| "learning_rate": 0.0001, |
| "loss": 1.7191, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.14687703318152245, |
| "grad_norm": 0.17608943581581116, |
| "learning_rate": 0.0001, |
| "loss": 1.6315, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.14736499674690956, |
| "grad_norm": 0.17653749883174896, |
| "learning_rate": 0.0001, |
| "loss": 1.6948, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.1478529603122967, |
| "grad_norm": 0.1792931854724884, |
| "learning_rate": 0.0001, |
| "loss": 1.7027, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.1483409238776838, |
| "grad_norm": 0.18247826397418976, |
| "learning_rate": 0.0001, |
| "loss": 1.7433, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.1488288874430709, |
| "grad_norm": 0.1712041050195694, |
| "learning_rate": 0.0001, |
| "loss": 1.6548, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.14931685100845804, |
| "grad_norm": 0.184691920876503, |
| "learning_rate": 0.0001, |
| "loss": 1.7226, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.14980481457384515, |
| "grad_norm": 0.1834600865840912, |
| "learning_rate": 0.0001, |
| "loss": 1.7894, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.15029277813923228, |
| "grad_norm": 0.1753443032503128, |
| "learning_rate": 0.0001, |
| "loss": 1.636, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.1507807417046194, |
| "grad_norm": 0.16590848565101624, |
| "learning_rate": 0.0001, |
| "loss": 1.6802, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.1512687052700065, |
| "grad_norm": 0.17210128903388977, |
| "learning_rate": 0.0001, |
| "loss": 1.758, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.15175666883539363, |
| "grad_norm": 0.19016823172569275, |
| "learning_rate": 0.0001, |
| "loss": 1.8243, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.15224463240078073, |
| "grad_norm": 0.1756354421377182, |
| "learning_rate": 0.0001, |
| "loss": 1.7666, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.15273259596616787, |
| "grad_norm": 0.19266565144062042, |
| "learning_rate": 0.0001, |
| "loss": 1.7856, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.15322055953155497, |
| "grad_norm": 0.17626765370368958, |
| "learning_rate": 0.0001, |
| "loss": 1.7453, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.1537085230969421, |
| "grad_norm": 0.1796361356973648, |
| "learning_rate": 0.0001, |
| "loss": 1.8428, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.1541964866623292, |
| "grad_norm": 0.1971481889486313, |
| "learning_rate": 0.0001, |
| "loss": 1.8298, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.15468445022771632, |
| "grad_norm": 0.17479249835014343, |
| "learning_rate": 0.0001, |
| "loss": 1.7243, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.15517241379310345, |
| "grad_norm": 0.18558745086193085, |
| "learning_rate": 0.0001, |
| "loss": 1.8265, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.15566037735849056, |
| "grad_norm": 0.17821088433265686, |
| "learning_rate": 0.0001, |
| "loss": 1.6735, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.1561483409238777, |
| "grad_norm": 0.17939302325248718, |
| "learning_rate": 0.0001, |
| "loss": 1.7158, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.1566363044892648, |
| "grad_norm": 0.17538347840309143, |
| "learning_rate": 0.0001, |
| "loss": 1.7467, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.1571242680546519, |
| "grad_norm": 0.1796545684337616, |
| "learning_rate": 0.0001, |
| "loss": 1.7148, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.15761223162003904, |
| "grad_norm": 0.19828006625175476, |
| "learning_rate": 0.0001, |
| "loss": 1.8431, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.15810019518542615, |
| "grad_norm": 0.17246133089065552, |
| "learning_rate": 0.0001, |
| "loss": 1.7291, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.15858815875081328, |
| "grad_norm": 0.1835339218378067, |
| "learning_rate": 0.0001, |
| "loss": 1.7319, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.15907612231620039, |
| "grad_norm": 0.18122561275959015, |
| "learning_rate": 0.0001, |
| "loss": 1.7263, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.15956408588158752, |
| "grad_norm": 0.19297321140766144, |
| "learning_rate": 0.0001, |
| "loss": 1.8792, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.16005204944697463, |
| "grad_norm": 0.1762656420469284, |
| "learning_rate": 0.0001, |
| "loss": 1.7495, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.16054001301236173, |
| "grad_norm": 0.17146944999694824, |
| "learning_rate": 0.0001, |
| "loss": 1.7089, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.16102797657774887, |
| "grad_norm": 0.17192597687244415, |
| "learning_rate": 0.0001, |
| "loss": 1.694, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.16151594014313597, |
| "grad_norm": 0.17271386086940765, |
| "learning_rate": 0.0001, |
| "loss": 1.6223, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.1620039037085231, |
| "grad_norm": 0.17589011788368225, |
| "learning_rate": 0.0001, |
| "loss": 1.7123, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.1624918672739102, |
| "grad_norm": 0.17920418083667755, |
| "learning_rate": 0.0001, |
| "loss": 1.6938, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.16297983083929735, |
| "grad_norm": 0.16645678877830505, |
| "learning_rate": 0.0001, |
| "loss": 1.6704, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.16346779440468445, |
| "grad_norm": 0.1698988974094391, |
| "learning_rate": 0.0001, |
| "loss": 1.7562, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.16395575797007156, |
| "grad_norm": 0.17255748808383942, |
| "learning_rate": 0.0001, |
| "loss": 1.7408, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.1644437215354587, |
| "grad_norm": 0.16908328235149384, |
| "learning_rate": 0.0001, |
| "loss": 1.711, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.1649316851008458, |
| "grad_norm": 0.17891424894332886, |
| "learning_rate": 0.0001, |
| "loss": 1.7199, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.16541964866623293, |
| "grad_norm": 0.17500531673431396, |
| "learning_rate": 0.0001, |
| "loss": 1.8027, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.16590761223162004, |
| "grad_norm": 0.1908222734928131, |
| "learning_rate": 0.0001, |
| "loss": 1.7267, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.16639557579700714, |
| "grad_norm": 0.16457560658454895, |
| "learning_rate": 0.0001, |
| "loss": 1.6551, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.16688353936239428, |
| "grad_norm": 0.17455148696899414, |
| "learning_rate": 0.0001, |
| "loss": 1.7536, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.16737150292778138, |
| "grad_norm": 0.24865932762622833, |
| "learning_rate": 0.0001, |
| "loss": 1.7038, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.16785946649316852, |
| "grad_norm": 0.16769102215766907, |
| "learning_rate": 0.0001, |
| "loss": 1.6666, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.16834743005855562, |
| "grad_norm": 0.17845629155635834, |
| "learning_rate": 0.0001, |
| "loss": 1.7729, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.16883539362394276, |
| "grad_norm": 0.18893101811408997, |
| "learning_rate": 0.0001, |
| "loss": 1.6953, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.16932335718932987, |
| "grad_norm": 0.17489705979824066, |
| "learning_rate": 0.0001, |
| "loss": 1.6451, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.16981132075471697, |
| "grad_norm": 0.1895252764225006, |
| "learning_rate": 0.0001, |
| "loss": 1.6664, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.1702992843201041, |
| "grad_norm": 0.18796460330486298, |
| "learning_rate": 0.0001, |
| "loss": 1.8179, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.1707872478854912, |
| "grad_norm": 0.18239444494247437, |
| "learning_rate": 0.0001, |
| "loss": 1.7895, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.17127521145087835, |
| "grad_norm": 0.18578602373600006, |
| "learning_rate": 0.0001, |
| "loss": 1.7201, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.17176317501626545, |
| "grad_norm": 0.17505811154842377, |
| "learning_rate": 0.0001, |
| "loss": 1.6738, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.17225113858165256, |
| "grad_norm": 0.16880185902118683, |
| "learning_rate": 0.0001, |
| "loss": 1.7064, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.1727391021470397, |
| "grad_norm": 0.1847655326128006, |
| "learning_rate": 0.0001, |
| "loss": 1.6227, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.1732270657124268, |
| "grad_norm": 0.18033885955810547, |
| "learning_rate": 0.0001, |
| "loss": 1.7613, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.17371502927781393, |
| "grad_norm": 0.2022799551486969, |
| "learning_rate": 0.0001, |
| "loss": 1.6975, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.17420299284320104, |
| "grad_norm": 0.18487118184566498, |
| "learning_rate": 0.0001, |
| "loss": 1.6245, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.17469095640858817, |
| "grad_norm": 0.18200282752513885, |
| "learning_rate": 0.0001, |
| "loss": 1.8013, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.17517891997397528, |
| "grad_norm": 0.16840700805187225, |
| "learning_rate": 0.0001, |
| "loss": 1.6904, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.17566688353936238, |
| "grad_norm": 0.17556121945381165, |
| "learning_rate": 0.0001, |
| "loss": 1.7331, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.17615484710474952, |
| "grad_norm": 0.18641792237758636, |
| "learning_rate": 0.0001, |
| "loss": 1.8248, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.17664281067013662, |
| "grad_norm": 0.16753801703453064, |
| "learning_rate": 0.0001, |
| "loss": 1.591, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.17713077423552376, |
| "grad_norm": 0.16265541315078735, |
| "learning_rate": 0.0001, |
| "loss": 1.5814, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.17761873780091086, |
| "grad_norm": 0.17881396412849426, |
| "learning_rate": 0.0001, |
| "loss": 1.8452, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.17810670136629797, |
| "grad_norm": 0.18160590529441833, |
| "learning_rate": 0.0001, |
| "loss": 1.7977, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.1785946649316851, |
| "grad_norm": 0.1778435856103897, |
| "learning_rate": 0.0001, |
| "loss": 1.7319, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.1790826284970722, |
| "grad_norm": 0.17236903309822083, |
| "learning_rate": 0.0001, |
| "loss": 1.6572, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.17957059206245934, |
| "grad_norm": 0.16980677843093872, |
| "learning_rate": 0.0001, |
| "loss": 1.6814, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.18005855562784645, |
| "grad_norm": 0.17113539576530457, |
| "learning_rate": 0.0001, |
| "loss": 1.5835, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.18054651919323358, |
| "grad_norm": 0.22926300764083862, |
| "learning_rate": 0.0001, |
| "loss": 1.7127, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.1810344827586207, |
| "grad_norm": 0.1766396313905716, |
| "learning_rate": 0.0001, |
| "loss": 1.8002, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.1815224463240078, |
| "grad_norm": 0.1911155730485916, |
| "learning_rate": 0.0001, |
| "loss": 1.7287, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.18201040988939493, |
| "grad_norm": 0.1996450275182724, |
| "learning_rate": 0.0001, |
| "loss": 1.5601, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.18249837345478204, |
| "grad_norm": 0.17531970143318176, |
| "learning_rate": 0.0001, |
| "loss": 1.674, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.18298633702016917, |
| "grad_norm": 0.19017955660820007, |
| "learning_rate": 0.0001, |
| "loss": 1.8052, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.18347430058555628, |
| "grad_norm": 0.195291206240654, |
| "learning_rate": 0.0001, |
| "loss": 1.6787, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.18396226415094338, |
| "grad_norm": 0.18030132353305817, |
| "learning_rate": 0.0001, |
| "loss": 1.6931, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.18445022771633052, |
| "grad_norm": 0.1725359857082367, |
| "learning_rate": 0.0001, |
| "loss": 1.5814, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.18493819128171762, |
| "grad_norm": 0.18235339224338531, |
| "learning_rate": 0.0001, |
| "loss": 1.7759, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.18542615484710476, |
| "grad_norm": 0.19052359461784363, |
| "learning_rate": 0.0001, |
| "loss": 1.7898, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.18591411841249186, |
| "grad_norm": 0.1713322550058365, |
| "learning_rate": 0.0001, |
| "loss": 1.623, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.186402081977879, |
| "grad_norm": 0.19699741899967194, |
| "learning_rate": 0.0001, |
| "loss": 1.7517, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.1868900455432661, |
| "grad_norm": 0.17510955035686493, |
| "learning_rate": 0.0001, |
| "loss": 1.7045, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.1873780091086532, |
| "grad_norm": 0.17883911728858948, |
| "learning_rate": 0.0001, |
| "loss": 1.6763, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.18786597267404034, |
| "grad_norm": 0.18562713265419006, |
| "learning_rate": 0.0001, |
| "loss": 1.6603, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.18835393623942745, |
| "grad_norm": 0.18200963735580444, |
| "learning_rate": 0.0001, |
| "loss": 1.7698, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.18884189980481458, |
| "grad_norm": 0.192865788936615, |
| "learning_rate": 0.0001, |
| "loss": 1.8058, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.1893298633702017, |
| "grad_norm": 0.17498141527175903, |
| "learning_rate": 0.0001, |
| "loss": 1.657, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.1898178269355888, |
| "grad_norm": 0.17550218105316162, |
| "learning_rate": 0.0001, |
| "loss": 1.7638, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.19030579050097593, |
| "grad_norm": 0.19263967871665955, |
| "learning_rate": 0.0001, |
| "loss": 1.7375, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.19079375406636304, |
| "grad_norm": 0.1728338897228241, |
| "learning_rate": 0.0001, |
| "loss": 1.7467, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.19128171763175017, |
| "grad_norm": 0.17929600179195404, |
| "learning_rate": 0.0001, |
| "loss": 1.6489, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.19176968119713728, |
| "grad_norm": 0.18325988948345184, |
| "learning_rate": 0.0001, |
| "loss": 1.8676, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.1922576447625244, |
| "grad_norm": 0.17365989089012146, |
| "learning_rate": 0.0001, |
| "loss": 1.6916, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.19274560832791152, |
| "grad_norm": 0.17361170053482056, |
| "learning_rate": 0.0001, |
| "loss": 1.7118, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.19323357189329862, |
| "grad_norm": 0.181492879986763, |
| "learning_rate": 0.0001, |
| "loss": 1.7197, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.19372153545868576, |
| "grad_norm": 0.19113008677959442, |
| "learning_rate": 0.0001, |
| "loss": 1.788, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.19420949902407286, |
| "grad_norm": 0.173355832695961, |
| "learning_rate": 0.0001, |
| "loss": 1.6866, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.19469746258946, |
| "grad_norm": 0.1797139197587967, |
| "learning_rate": 0.0001, |
| "loss": 1.7505, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.1951854261548471, |
| "grad_norm": 0.18337444961071014, |
| "learning_rate": 0.0001, |
| "loss": 1.7099, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.1956733897202342, |
| "grad_norm": 0.17387695610523224, |
| "learning_rate": 0.0001, |
| "loss": 1.737, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.19616135328562134, |
| "grad_norm": 0.1695685237646103, |
| "learning_rate": 0.0001, |
| "loss": 1.6916, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.19664931685100845, |
| "grad_norm": 0.1874959021806717, |
| "learning_rate": 0.0001, |
| "loss": 1.6919, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.19713728041639558, |
| "grad_norm": 0.17886492609977722, |
| "learning_rate": 0.0001, |
| "loss": 1.737, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.1976252439817827, |
| "grad_norm": 0.19390465319156647, |
| "learning_rate": 0.0001, |
| "loss": 1.8003, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.19811320754716982, |
| "grad_norm": 0.17292645573616028, |
| "learning_rate": 0.0001, |
| "loss": 1.6714, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.19860117111255693, |
| "grad_norm": 0.16998599469661713, |
| "learning_rate": 0.0001, |
| "loss": 1.7242, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.19908913467794404, |
| "grad_norm": 0.18668459355831146, |
| "learning_rate": 0.0001, |
| "loss": 1.7025, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.19957709824333117, |
| "grad_norm": 0.16807502508163452, |
| "learning_rate": 0.0001, |
| "loss": 1.6738, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.20006506180871828, |
| "grad_norm": 0.1849876344203949, |
| "learning_rate": 0.0001, |
| "loss": 1.8173, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.2005530253741054, |
| "grad_norm": 0.18935902416706085, |
| "learning_rate": 0.0001, |
| "loss": 1.7108, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.20104098893949252, |
| "grad_norm": 0.17630939185619354, |
| "learning_rate": 0.0001, |
| "loss": 1.7023, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.20152895250487965, |
| "grad_norm": 0.19990061223506927, |
| "learning_rate": 0.0001, |
| "loss": 1.6862, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.20201691607026676, |
| "grad_norm": 0.18538086116313934, |
| "learning_rate": 0.0001, |
| "loss": 1.796, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.20250487963565386, |
| "grad_norm": 0.18812508881092072, |
| "learning_rate": 0.0001, |
| "loss": 1.7034, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.202992843201041, |
| "grad_norm": 0.19069646298885345, |
| "learning_rate": 0.0001, |
| "loss": 1.7504, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.2034808067664281, |
| "grad_norm": 0.17794154584407806, |
| "learning_rate": 0.0001, |
| "loss": 1.6469, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.20396877033181524, |
| "grad_norm": 0.17641998827457428, |
| "learning_rate": 0.0001, |
| "loss": 1.7526, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.20445673389720234, |
| "grad_norm": 0.19693951308727264, |
| "learning_rate": 0.0001, |
| "loss": 1.7007, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.20494469746258945, |
| "grad_norm": 0.1921786069869995, |
| "learning_rate": 0.0001, |
| "loss": 1.7514, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.20543266102797658, |
| "grad_norm": 0.1899469792842865, |
| "learning_rate": 0.0001, |
| "loss": 1.7508, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.2059206245933637, |
| "grad_norm": 0.16994713246822357, |
| "learning_rate": 0.0001, |
| "loss": 1.6313, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.20640858815875082, |
| "grad_norm": 0.20480570197105408, |
| "learning_rate": 0.0001, |
| "loss": 1.7714, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.20689655172413793, |
| "grad_norm": 0.20870919525623322, |
| "learning_rate": 0.0001, |
| "loss": 1.7782, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.20738451528952506, |
| "grad_norm": 0.18410471081733704, |
| "learning_rate": 0.0001, |
| "loss": 1.72, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.20787247885491217, |
| "grad_norm": 0.23531974852085114, |
| "learning_rate": 0.0001, |
| "loss": 1.8923, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.20836044242029927, |
| "grad_norm": 0.18552608788013458, |
| "learning_rate": 0.0001, |
| "loss": 1.7272, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.2088484059856864, |
| "grad_norm": 0.2085346281528473, |
| "learning_rate": 0.0001, |
| "loss": 1.6953, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.20933636955107351, |
| "grad_norm": 0.1959279626607895, |
| "learning_rate": 0.0001, |
| "loss": 1.6288, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.20982433311646065, |
| "grad_norm": 0.17610879242420197, |
| "learning_rate": 0.0001, |
| "loss": 1.7151, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.21031229668184775, |
| "grad_norm": 0.1928284466266632, |
| "learning_rate": 0.0001, |
| "loss": 1.687, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.21080026024723486, |
| "grad_norm": 0.199452742934227, |
| "learning_rate": 0.0001, |
| "loss": 1.7704, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.211288223812622, |
| "grad_norm": 0.18074338138103485, |
| "learning_rate": 0.0001, |
| "loss": 1.7899, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.2117761873780091, |
| "grad_norm": 0.19121356308460236, |
| "learning_rate": 0.0001, |
| "loss": 1.694, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.21226415094339623, |
| "grad_norm": 0.18307030200958252, |
| "learning_rate": 0.0001, |
| "loss": 1.6335, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.21275211450878334, |
| "grad_norm": 0.18400311470031738, |
| "learning_rate": 0.0001, |
| "loss": 1.7526, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.21324007807417047, |
| "grad_norm": 0.1944567859172821, |
| "learning_rate": 0.0001, |
| "loss": 1.7884, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.21372804163955758, |
| "grad_norm": 0.18847782909870148, |
| "learning_rate": 0.0001, |
| "loss": 1.6859, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.2142160052049447, |
| "grad_norm": 0.17663119733333588, |
| "learning_rate": 0.0001, |
| "loss": 1.615, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.21470396877033182, |
| "grad_norm": 0.18704909086227417, |
| "learning_rate": 0.0001, |
| "loss": 1.7352, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.21519193233571893, |
| "grad_norm": 0.19525641202926636, |
| "learning_rate": 0.0001, |
| "loss": 1.6241, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.21567989590110606, |
| "grad_norm": 0.19030174612998962, |
| "learning_rate": 0.0001, |
| "loss": 1.7425, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.21616785946649317, |
| "grad_norm": 0.18872150778770447, |
| "learning_rate": 0.0001, |
| "loss": 1.7177, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.21665582303188027, |
| "grad_norm": 0.17374157905578613, |
| "learning_rate": 0.0001, |
| "loss": 1.7236, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.2171437865972674, |
| "grad_norm": 0.18159011006355286, |
| "learning_rate": 0.0001, |
| "loss": 1.6885, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.2176317501626545, |
| "grad_norm": 0.18726180493831635, |
| "learning_rate": 0.0001, |
| "loss": 1.8226, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.21811971372804165, |
| "grad_norm": 0.193464457988739, |
| "learning_rate": 0.0001, |
| "loss": 1.7834, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.21860767729342875, |
| "grad_norm": 0.19700440764427185, |
| "learning_rate": 0.0001, |
| "loss": 1.6766, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.2190956408588159, |
| "grad_norm": 0.16808220744132996, |
| "learning_rate": 0.0001, |
| "loss": 1.6773, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.219583604424203, |
| "grad_norm": 0.1885610967874527, |
| "learning_rate": 0.0001, |
| "loss": 1.7195, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.2200715679895901, |
| "grad_norm": 0.17235183715820312, |
| "learning_rate": 0.0001, |
| "loss": 1.6651, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.22055953155497723, |
| "grad_norm": 0.17667032778263092, |
| "learning_rate": 0.0001, |
| "loss": 1.647, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.22104749512036434, |
| "grad_norm": 0.17659679055213928, |
| "learning_rate": 0.0001, |
| "loss": 1.8337, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.22153545868575147, |
| "grad_norm": 0.17201969027519226, |
| "learning_rate": 0.0001, |
| "loss": 1.7385, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.22202342225113858, |
| "grad_norm": 0.17937779426574707, |
| "learning_rate": 0.0001, |
| "loss": 1.7864, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.2225113858165257, |
| "grad_norm": 0.1681385189294815, |
| "learning_rate": 0.0001, |
| "loss": 1.636, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.22299934938191282, |
| "grad_norm": 0.17030152678489685, |
| "learning_rate": 0.0001, |
| "loss": 1.7613, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.22348731294729993, |
| "grad_norm": 0.18430882692337036, |
| "learning_rate": 0.0001, |
| "loss": 1.7746, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.22397527651268706, |
| "grad_norm": 0.17070208489894867, |
| "learning_rate": 0.0001, |
| "loss": 1.619, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.22446324007807417, |
| "grad_norm": 0.1672583520412445, |
| "learning_rate": 0.0001, |
| "loss": 1.6935, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.2249512036434613, |
| "grad_norm": 0.18070879578590393, |
| "learning_rate": 0.0001, |
| "loss": 1.7752, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.2254391672088484, |
| "grad_norm": 0.17931310832500458, |
| "learning_rate": 0.0001, |
| "loss": 1.8331, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.2259271307742355, |
| "grad_norm": 0.18687482178211212, |
| "learning_rate": 0.0001, |
| "loss": 1.7745, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.22641509433962265, |
| "grad_norm": 0.18673428893089294, |
| "learning_rate": 0.0001, |
| "loss": 1.8001, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.22690305790500975, |
| "grad_norm": 0.18758326768875122, |
| "learning_rate": 0.0001, |
| "loss": 1.8024, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.2273910214703969, |
| "grad_norm": 0.17651711404323578, |
| "learning_rate": 0.0001, |
| "loss": 1.6348, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.227878985035784, |
| "grad_norm": 0.17466424405574799, |
| "learning_rate": 0.0001, |
| "loss": 1.6529, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.2283669486011711, |
| "grad_norm": 0.17049545049667358, |
| "learning_rate": 0.0001, |
| "loss": 1.6707, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.22885491216655823, |
| "grad_norm": 0.19238895177841187, |
| "learning_rate": 0.0001, |
| "loss": 1.7262, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.22934287573194534, |
| "grad_norm": 0.183549702167511, |
| "learning_rate": 0.0001, |
| "loss": 1.6949, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.22983083929733247, |
| "grad_norm": 0.19222155213356018, |
| "learning_rate": 0.0001, |
| "loss": 1.7727, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.23031880286271958, |
| "grad_norm": 0.18078762292861938, |
| "learning_rate": 0.0001, |
| "loss": 1.8166, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.2308067664281067, |
| "grad_norm": 0.17769628763198853, |
| "learning_rate": 0.0001, |
| "loss": 1.7215, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.23129472999349382, |
| "grad_norm": 0.1750006526708603, |
| "learning_rate": 0.0001, |
| "loss": 1.7311, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.23178269355888093, |
| "grad_norm": 0.1803676038980484, |
| "learning_rate": 0.0001, |
| "loss": 1.7596, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.23227065712426806, |
| "grad_norm": 0.18478356301784515, |
| "learning_rate": 0.0001, |
| "loss": 1.7262, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.23275862068965517, |
| "grad_norm": 0.16509763896465302, |
| "learning_rate": 0.0001, |
| "loss": 1.623, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.2332465842550423, |
| "grad_norm": 0.19317001104354858, |
| "learning_rate": 0.0001, |
| "loss": 1.6284, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.2337345478204294, |
| "grad_norm": 0.18081186711788177, |
| "learning_rate": 0.0001, |
| "loss": 1.6959, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.2342225113858165, |
| "grad_norm": 0.18306545913219452, |
| "learning_rate": 0.0001, |
| "loss": 1.7328, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.23471047495120365, |
| "grad_norm": 0.18552261590957642, |
| "learning_rate": 0.0001, |
| "loss": 1.6847, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.23519843851659075, |
| "grad_norm": 0.17930322885513306, |
| "learning_rate": 0.0001, |
| "loss": 1.7678, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.23568640208197789, |
| "grad_norm": 0.17558367550373077, |
| "learning_rate": 0.0001, |
| "loss": 1.6756, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.236174365647365, |
| "grad_norm": 0.18899041414260864, |
| "learning_rate": 0.0001, |
| "loss": 1.7778, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.23666232921275213, |
| "grad_norm": 0.17528998851776123, |
| "learning_rate": 0.0001, |
| "loss": 1.6651, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.23715029277813923, |
| "grad_norm": 0.16732053458690643, |
| "learning_rate": 0.0001, |
| "loss": 1.6796, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.23763825634352634, |
| "grad_norm": 0.1849820613861084, |
| "learning_rate": 0.0001, |
| "loss": 1.737, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.23812621990891347, |
| "grad_norm": 0.1789163500070572, |
| "learning_rate": 0.0001, |
| "loss": 1.6919, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.23861418347430058, |
| "grad_norm": 0.1739804446697235, |
| "learning_rate": 0.0001, |
| "loss": 1.8225, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.2391021470396877, |
| "grad_norm": 0.18246984481811523, |
| "learning_rate": 0.0001, |
| "loss": 1.734, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.23959011060507482, |
| "grad_norm": 0.17464157938957214, |
| "learning_rate": 0.0001, |
| "loss": 1.7442, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.24007807417046195, |
| "grad_norm": 0.19501306116580963, |
| "learning_rate": 0.0001, |
| "loss": 1.7521, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.24056603773584906, |
| "grad_norm": 0.17958857119083405, |
| "learning_rate": 0.0001, |
| "loss": 1.8191, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.24105400130123616, |
| "grad_norm": 0.18241986632347107, |
| "learning_rate": 0.0001, |
| "loss": 1.7709, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.2415419648666233, |
| "grad_norm": 0.18529468774795532, |
| "learning_rate": 0.0001, |
| "loss": 1.6871, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.2420299284320104, |
| "grad_norm": 0.18519562482833862, |
| "learning_rate": 0.0001, |
| "loss": 1.7605, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.24251789199739754, |
| "grad_norm": 0.17868764698505402, |
| "learning_rate": 0.0001, |
| "loss": 1.725, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.24300585556278465, |
| "grad_norm": 0.17040537297725677, |
| "learning_rate": 0.0001, |
| "loss": 1.6161, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.24349381912817175, |
| "grad_norm": 0.1820056289434433, |
| "learning_rate": 0.0001, |
| "loss": 1.7249, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.24398178269355889, |
| "grad_norm": 0.1877366453409195, |
| "learning_rate": 0.0001, |
| "loss": 1.6976, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.244469746258946, |
| "grad_norm": 0.1717415153980255, |
| "learning_rate": 0.0001, |
| "loss": 1.6109, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.24495770982433313, |
| "grad_norm": 0.17338915169239044, |
| "learning_rate": 0.0001, |
| "loss": 1.7433, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.24544567338972023, |
| "grad_norm": 0.18489517271518707, |
| "learning_rate": 0.0001, |
| "loss": 1.7283, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.24593363695510737, |
| "grad_norm": 0.17153921723365784, |
| "learning_rate": 0.0001, |
| "loss": 1.7261, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.24642160052049447, |
| "grad_norm": 0.19024662673473358, |
| "learning_rate": 0.0001, |
| "loss": 1.8498, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.24690956408588158, |
| "grad_norm": 0.1675989329814911, |
| "learning_rate": 0.0001, |
| "loss": 1.5903, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.2473975276512687, |
| "grad_norm": 0.18422546982765198, |
| "learning_rate": 0.0001, |
| "loss": 1.7294, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.24788549121665582, |
| "grad_norm": 0.17943088710308075, |
| "learning_rate": 0.0001, |
| "loss": 1.6842, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.24837345478204295, |
| "grad_norm": 0.18048308789730072, |
| "learning_rate": 0.0001, |
| "loss": 1.677, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.24886141834743006, |
| "grad_norm": 0.17185211181640625, |
| "learning_rate": 0.0001, |
| "loss": 1.6738, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.24934938191281716, |
| "grad_norm": 0.1717991977930069, |
| "learning_rate": 0.0001, |
| "loss": 1.7077, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.2498373454782043, |
| "grad_norm": 0.18661388754844666, |
| "learning_rate": 0.0001, |
| "loss": 1.8163, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.2503253090435914, |
| "grad_norm": 0.19672876596450806, |
| "learning_rate": 0.0001, |
| "loss": 1.7733, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.2508132726089785, |
| "grad_norm": 0.18052315711975098, |
| "learning_rate": 0.0001, |
| "loss": 1.7242, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.25130123617436567, |
| "grad_norm": 0.17241713404655457, |
| "learning_rate": 0.0001, |
| "loss": 1.6513, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.2517891997397528, |
| "grad_norm": 0.1861806958913803, |
| "learning_rate": 0.0001, |
| "loss": 1.7189, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.2522771633051399, |
| "grad_norm": 0.17267678678035736, |
| "learning_rate": 0.0001, |
| "loss": 1.5993, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.252765126870527, |
| "grad_norm": 0.16948658227920532, |
| "learning_rate": 0.0001, |
| "loss": 1.5733, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.2532530904359141, |
| "grad_norm": 0.18075625598430634, |
| "learning_rate": 0.0001, |
| "loss": 1.7755, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.25374105400130126, |
| "grad_norm": 0.17203836143016815, |
| "learning_rate": 0.0001, |
| "loss": 1.6755, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.25422901756668836, |
| "grad_norm": 0.1631672978401184, |
| "learning_rate": 0.0001, |
| "loss": 1.5949, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.25471698113207547, |
| "grad_norm": 0.1776244342327118, |
| "learning_rate": 0.0001, |
| "loss": 1.7231, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.2552049446974626, |
| "grad_norm": 0.18010790646076202, |
| "learning_rate": 0.0001, |
| "loss": 1.7575, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.2556929082628497, |
| "grad_norm": 0.16827166080474854, |
| "learning_rate": 0.0001, |
| "loss": 1.6907, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.25618087182823684, |
| "grad_norm": 0.19028151035308838, |
| "learning_rate": 0.0001, |
| "loss": 1.6602, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.25666883539362395, |
| "grad_norm": 0.17831748723983765, |
| "learning_rate": 0.0001, |
| "loss": 1.7746, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.25715679895901106, |
| "grad_norm": 0.19768738746643066, |
| "learning_rate": 0.0001, |
| "loss": 1.7111, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.25764476252439816, |
| "grad_norm": 0.1869453638792038, |
| "learning_rate": 0.0001, |
| "loss": 1.7493, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.25813272608978527, |
| "grad_norm": 0.17493435740470886, |
| "learning_rate": 0.0001, |
| "loss": 1.6401, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.25862068965517243, |
| "grad_norm": 0.1741894632577896, |
| "learning_rate": 0.0001, |
| "loss": 1.6737, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.25910865322055954, |
| "grad_norm": 0.19671699404716492, |
| "learning_rate": 0.0001, |
| "loss": 1.7265, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.25959661678594664, |
| "grad_norm": 0.1766589730978012, |
| "learning_rate": 0.0001, |
| "loss": 1.6655, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.26008458035133375, |
| "grad_norm": 0.17494948208332062, |
| "learning_rate": 0.0001, |
| "loss": 1.6571, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.2605725439167209, |
| "grad_norm": 0.20303772389888763, |
| "learning_rate": 0.0001, |
| "loss": 1.7987, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.261060507482108, |
| "grad_norm": 0.18097007274627686, |
| "learning_rate": 0.0001, |
| "loss": 1.6341, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.2615484710474951, |
| "grad_norm": 0.20877449214458466, |
| "learning_rate": 0.0001, |
| "loss": 1.7057, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.26203643461288223, |
| "grad_norm": 0.19047099351882935, |
| "learning_rate": 0.0001, |
| "loss": 1.7048, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.26252439817826934, |
| "grad_norm": 0.18251296877861023, |
| "learning_rate": 0.0001, |
| "loss": 1.6979, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.2630123617436565, |
| "grad_norm": 0.18078570067882538, |
| "learning_rate": 0.0001, |
| "loss": 1.801, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.2635003253090436, |
| "grad_norm": 0.18725551664829254, |
| "learning_rate": 0.0001, |
| "loss": 1.7638, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.2639882888744307, |
| "grad_norm": 0.20769141614437103, |
| "learning_rate": 0.0001, |
| "loss": 1.8201, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.2644762524398178, |
| "grad_norm": 0.16759508848190308, |
| "learning_rate": 0.0001, |
| "loss": 1.6739, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.2649642160052049, |
| "grad_norm": 0.20297077298164368, |
| "learning_rate": 0.0001, |
| "loss": 1.8241, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.2654521795705921, |
| "grad_norm": 0.17038699984550476, |
| "learning_rate": 0.0001, |
| "loss": 1.6566, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.2659401431359792, |
| "grad_norm": 0.17414064705371857, |
| "learning_rate": 0.0001, |
| "loss": 1.5866, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.2664281067013663, |
| "grad_norm": 0.1856188178062439, |
| "learning_rate": 0.0001, |
| "loss": 1.7166, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.2669160702667534, |
| "grad_norm": 0.17565833032131195, |
| "learning_rate": 0.0001, |
| "loss": 1.7206, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.2674040338321405, |
| "grad_norm": 0.18267709016799927, |
| "learning_rate": 0.0001, |
| "loss": 1.6728, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.26789199739752767, |
| "grad_norm": 0.18981780111789703, |
| "learning_rate": 0.0001, |
| "loss": 1.7425, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.2683799609629148, |
| "grad_norm": 0.18254795670509338, |
| "learning_rate": 0.0001, |
| "loss": 1.6948, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.2688679245283019, |
| "grad_norm": 0.18846552073955536, |
| "learning_rate": 0.0001, |
| "loss": 1.6572, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.269355888093689, |
| "grad_norm": 0.1776316910982132, |
| "learning_rate": 0.0001, |
| "loss": 1.618, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.2698438516590761, |
| "grad_norm": 0.1822226643562317, |
| "learning_rate": 0.0001, |
| "loss": 1.8876, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.27033181522446326, |
| "grad_norm": 0.1873788982629776, |
| "learning_rate": 0.0001, |
| "loss": 1.7301, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.27081977878985036, |
| "grad_norm": 0.19234952330589294, |
| "learning_rate": 0.0001, |
| "loss": 1.7235, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.27130774235523747, |
| "grad_norm": 0.17642012238502502, |
| "learning_rate": 0.0001, |
| "loss": 1.7258, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.2717957059206246, |
| "grad_norm": 0.21255896985530853, |
| "learning_rate": 0.0001, |
| "loss": 1.6937, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.27228366948601174, |
| "grad_norm": 0.2181590497493744, |
| "learning_rate": 0.0001, |
| "loss": 1.9076, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.27277163305139884, |
| "grad_norm": 0.16595962643623352, |
| "learning_rate": 0.0001, |
| "loss": 1.5664, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.27325959661678595, |
| "grad_norm": 0.1832776963710785, |
| "learning_rate": 0.0001, |
| "loss": 1.658, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.27374756018217306, |
| "grad_norm": 0.18969666957855225, |
| "learning_rate": 0.0001, |
| "loss": 1.8031, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.27423552374756016, |
| "grad_norm": 0.1813500076532364, |
| "learning_rate": 0.0001, |
| "loss": 1.7209, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.2747234873129473, |
| "grad_norm": 0.18055056035518646, |
| "learning_rate": 0.0001, |
| "loss": 1.7658, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.27521145087833443, |
| "grad_norm": 0.17362233996391296, |
| "learning_rate": 0.0001, |
| "loss": 1.7746, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.27569941444372154, |
| "grad_norm": 0.19305916130542755, |
| "learning_rate": 0.0001, |
| "loss": 1.9062, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.27618737800910864, |
| "grad_norm": 0.17458635568618774, |
| "learning_rate": 0.0001, |
| "loss": 1.6339, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.27667534157449575, |
| "grad_norm": 0.18760624527931213, |
| "learning_rate": 0.0001, |
| "loss": 1.6433, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.2771633051398829, |
| "grad_norm": 0.17057117819786072, |
| "learning_rate": 0.0001, |
| "loss": 1.6318, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.27765126870527, |
| "grad_norm": 0.17930074036121368, |
| "learning_rate": 0.0001, |
| "loss": 1.7227, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.2781392322706571, |
| "grad_norm": 0.17012158036231995, |
| "learning_rate": 0.0001, |
| "loss": 1.6309, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.27862719583604423, |
| "grad_norm": 0.17562495172023773, |
| "learning_rate": 0.0001, |
| "loss": 1.6351, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.27911515940143133, |
| "grad_norm": 0.18494853377342224, |
| "learning_rate": 0.0001, |
| "loss": 1.8355, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.2796031229668185, |
| "grad_norm": 0.18261797726154327, |
| "learning_rate": 0.0001, |
| "loss": 1.6015, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.2800910865322056, |
| "grad_norm": 0.18148979544639587, |
| "learning_rate": 0.0001, |
| "loss": 1.797, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.2805790500975927, |
| "grad_norm": 0.16941653192043304, |
| "learning_rate": 0.0001, |
| "loss": 1.6382, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.2810670136629798, |
| "grad_norm": 0.18611697852611542, |
| "learning_rate": 0.0001, |
| "loss": 1.6595, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.281554977228367, |
| "grad_norm": 0.16945675015449524, |
| "learning_rate": 0.0001, |
| "loss": 1.6678, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.2820429407937541, |
| "grad_norm": 0.17999336123466492, |
| "learning_rate": 0.0001, |
| "loss": 1.7161, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.2825309043591412, |
| "grad_norm": 0.185410276055336, |
| "learning_rate": 0.0001, |
| "loss": 1.6731, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.2830188679245283, |
| "grad_norm": 0.1757509708404541, |
| "learning_rate": 0.0001, |
| "loss": 1.7162, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.2835068314899154, |
| "grad_norm": 0.1721939593553543, |
| "learning_rate": 0.0001, |
| "loss": 1.6374, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.28399479505530256, |
| "grad_norm": 0.17961697280406952, |
| "learning_rate": 0.0001, |
| "loss": 1.5798, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.28448275862068967, |
| "grad_norm": 0.18612822890281677, |
| "learning_rate": 0.0001, |
| "loss": 1.7694, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.2849707221860768, |
| "grad_norm": 0.18089883029460907, |
| "learning_rate": 0.0001, |
| "loss": 1.7426, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.2854586857514639, |
| "grad_norm": 0.19402338564395905, |
| "learning_rate": 0.0001, |
| "loss": 1.7604, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.285946649316851, |
| "grad_norm": 0.18208986520767212, |
| "learning_rate": 0.0001, |
| "loss": 1.6998, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.28643461288223815, |
| "grad_norm": 0.19270221889019012, |
| "learning_rate": 0.0001, |
| "loss": 1.6564, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.28692257644762525, |
| "grad_norm": 0.17604075372219086, |
| "learning_rate": 0.0001, |
| "loss": 1.653, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.28741054001301236, |
| "grad_norm": 0.17964652180671692, |
| "learning_rate": 0.0001, |
| "loss": 1.7613, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.28789850357839947, |
| "grad_norm": 0.18317797780036926, |
| "learning_rate": 0.0001, |
| "loss": 1.6621, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.2883864671437866, |
| "grad_norm": 0.18271799385547638, |
| "learning_rate": 0.0001, |
| "loss": 1.8067, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.28887443070917374, |
| "grad_norm": 0.19613641500473022, |
| "learning_rate": 0.0001, |
| "loss": 1.8544, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.28936239427456084, |
| "grad_norm": 0.19165842235088348, |
| "learning_rate": 0.0001, |
| "loss": 1.8834, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.28985035783994795, |
| "grad_norm": 0.18238607048988342, |
| "learning_rate": 0.0001, |
| "loss": 1.7776, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.29033832140533505, |
| "grad_norm": 0.16585291922092438, |
| "learning_rate": 0.0001, |
| "loss": 1.5959, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.29082628497072216, |
| "grad_norm": 0.1774480640888214, |
| "learning_rate": 0.0001, |
| "loss": 1.6114, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.2913142485361093, |
| "grad_norm": 0.17970281839370728, |
| "learning_rate": 0.0001, |
| "loss": 1.79, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.2918022121014964, |
| "grad_norm": 0.18806995451450348, |
| "learning_rate": 0.0001, |
| "loss": 1.7842, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.29229017566688353, |
| "grad_norm": 0.16845998167991638, |
| "learning_rate": 0.0001, |
| "loss": 1.6788, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.29277813923227064, |
| "grad_norm": 0.18506960570812225, |
| "learning_rate": 0.0001, |
| "loss": 1.758, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.2932661027976578, |
| "grad_norm": 0.1771155744791031, |
| "learning_rate": 0.0001, |
| "loss": 1.7259, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.2937540663630449, |
| "grad_norm": 0.1760523021221161, |
| "learning_rate": 0.0001, |
| "loss": 1.7807, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.294242029928432, |
| "grad_norm": 0.1765487641096115, |
| "learning_rate": 0.0001, |
| "loss": 1.5886, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.2947299934938191, |
| "grad_norm": 0.17646710574626923, |
| "learning_rate": 0.0001, |
| "loss": 1.6508, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.2952179570592062, |
| "grad_norm": 0.18383362889289856, |
| "learning_rate": 0.0001, |
| "loss": 1.7049, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.2957059206245934, |
| "grad_norm": 0.18808609247207642, |
| "learning_rate": 0.0001, |
| "loss": 1.6948, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.2961938841899805, |
| "grad_norm": 0.18178711831569672, |
| "learning_rate": 0.0001, |
| "loss": 1.7306, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.2966818477553676, |
| "grad_norm": 0.18499815464019775, |
| "learning_rate": 0.0001, |
| "loss": 1.6072, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.2971698113207547, |
| "grad_norm": 0.18511821329593658, |
| "learning_rate": 0.0001, |
| "loss": 1.6383, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.2976577748861418, |
| "grad_norm": 0.17731331288814545, |
| "learning_rate": 0.0001, |
| "loss": 1.738, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.298145738451529, |
| "grad_norm": 0.19273065030574799, |
| "learning_rate": 0.0001, |
| "loss": 1.6286, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.2986337020169161, |
| "grad_norm": 0.1858029067516327, |
| "learning_rate": 0.0001, |
| "loss": 1.6565, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.2991216655823032, |
| "grad_norm": 0.18791264295578003, |
| "learning_rate": 0.0001, |
| "loss": 1.6857, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.2996096291476903, |
| "grad_norm": 0.19478711485862732, |
| "learning_rate": 0.0001, |
| "loss": 1.6655, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.3000975927130774, |
| "grad_norm": 0.18538743257522583, |
| "learning_rate": 0.0001, |
| "loss": 1.701, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.30058555627846456, |
| "grad_norm": 0.1899065524339676, |
| "learning_rate": 0.0001, |
| "loss": 1.7014, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.30107351984385167, |
| "grad_norm": 0.19550780951976776, |
| "learning_rate": 0.0001, |
| "loss": 1.8021, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.3015614834092388, |
| "grad_norm": 0.1695028841495514, |
| "learning_rate": 0.0001, |
| "loss": 1.6423, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.3020494469746259, |
| "grad_norm": 0.18605121970176697, |
| "learning_rate": 0.0001, |
| "loss": 1.7441, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.302537410540013, |
| "grad_norm": 0.20526890456676483, |
| "learning_rate": 0.0001, |
| "loss": 1.7878, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.30302537410540015, |
| "grad_norm": 0.17033647000789642, |
| "learning_rate": 0.0001, |
| "loss": 1.688, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.30351333767078725, |
| "grad_norm": 0.1756584197282791, |
| "learning_rate": 0.0001, |
| "loss": 1.6914, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.30400130123617436, |
| "grad_norm": 0.18451380729675293, |
| "learning_rate": 0.0001, |
| "loss": 1.6135, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.30448926480156147, |
| "grad_norm": 0.17828862369060516, |
| "learning_rate": 0.0001, |
| "loss": 1.677, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.3049772283669486, |
| "grad_norm": 0.17056816816329956, |
| "learning_rate": 0.0001, |
| "loss": 1.647, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.30546519193233573, |
| "grad_norm": 0.1786261945962906, |
| "learning_rate": 0.0001, |
| "loss": 1.7212, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.30595315549772284, |
| "grad_norm": 0.1788036823272705, |
| "learning_rate": 0.0001, |
| "loss": 1.6646, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.30644111906310995, |
| "grad_norm": 0.17864547669887543, |
| "learning_rate": 0.0001, |
| "loss": 1.7123, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.30692908262849705, |
| "grad_norm": 0.19462743401527405, |
| "learning_rate": 0.0001, |
| "loss": 1.7975, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.3074170461938842, |
| "grad_norm": 0.17800424993038177, |
| "learning_rate": 0.0001, |
| "loss": 1.5499, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.3079050097592713, |
| "grad_norm": 0.1856238692998886, |
| "learning_rate": 0.0001, |
| "loss": 1.9104, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.3083929733246584, |
| "grad_norm": 0.17673279345035553, |
| "learning_rate": 0.0001, |
| "loss": 1.6382, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.30888093689004553, |
| "grad_norm": 0.18032853305339813, |
| "learning_rate": 0.0001, |
| "loss": 1.7374, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.30936890045543264, |
| "grad_norm": 0.17968174815177917, |
| "learning_rate": 0.0001, |
| "loss": 1.662, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.3098568640208198, |
| "grad_norm": 0.1789749562740326, |
| "learning_rate": 0.0001, |
| "loss": 1.6044, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.3103448275862069, |
| "grad_norm": 0.175074502825737, |
| "learning_rate": 0.0001, |
| "loss": 1.7047, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.310832791151594, |
| "grad_norm": 0.17318876087665558, |
| "learning_rate": 0.0001, |
| "loss": 1.6148, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.3113207547169811, |
| "grad_norm": 0.20739412307739258, |
| "learning_rate": 0.0001, |
| "loss": 1.9162, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.3118087182823682, |
| "grad_norm": 0.1787186861038208, |
| "learning_rate": 0.0001, |
| "loss": 1.6657, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.3122966818477554, |
| "grad_norm": 0.1855590045452118, |
| "learning_rate": 0.0001, |
| "loss": 1.7058, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.3127846454131425, |
| "grad_norm": 0.17939618229866028, |
| "learning_rate": 0.0001, |
| "loss": 1.7663, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.3132726089785296, |
| "grad_norm": 0.17440925538539886, |
| "learning_rate": 0.0001, |
| "loss": 1.6337, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.3137605725439167, |
| "grad_norm": 0.19695165753364563, |
| "learning_rate": 0.0001, |
| "loss": 1.6048, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.3142485361093038, |
| "grad_norm": 0.16877804696559906, |
| "learning_rate": 0.0001, |
| "loss": 1.6677, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.314736499674691, |
| "grad_norm": 0.1742711365222931, |
| "learning_rate": 0.0001, |
| "loss": 1.6459, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.3152244632400781, |
| "grad_norm": 0.18073154985904694, |
| "learning_rate": 0.0001, |
| "loss": 1.7392, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.3157124268054652, |
| "grad_norm": 0.1714729368686676, |
| "learning_rate": 0.0001, |
| "loss": 1.6981, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.3162003903708523, |
| "grad_norm": 0.17316888272762299, |
| "learning_rate": 0.0001, |
| "loss": 1.6746, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.31668835393623945, |
| "grad_norm": 0.1779533475637436, |
| "learning_rate": 0.0001, |
| "loss": 1.7709, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.31717631750162656, |
| "grad_norm": 0.1709679216146469, |
| "learning_rate": 0.0001, |
| "loss": 1.5822, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.31766428106701367, |
| "grad_norm": 0.17804761230945587, |
| "learning_rate": 0.0001, |
| "loss": 1.7638, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.31815224463240077, |
| "grad_norm": 0.18509989976882935, |
| "learning_rate": 0.0001, |
| "loss": 1.8712, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.3186402081977879, |
| "grad_norm": 0.1751030832529068, |
| "learning_rate": 0.0001, |
| "loss": 1.7032, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.31912817176317504, |
| "grad_norm": 0.17232050001621246, |
| "learning_rate": 0.0001, |
| "loss": 1.6331, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.31961613532856215, |
| "grad_norm": 0.17198053002357483, |
| "learning_rate": 0.0001, |
| "loss": 1.7067, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.32010409889394925, |
| "grad_norm": 0.1797952950000763, |
| "learning_rate": 0.0001, |
| "loss": 1.687, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.32059206245933636, |
| "grad_norm": 0.1817045360803604, |
| "learning_rate": 0.0001, |
| "loss": 1.7448, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.32108002602472346, |
| "grad_norm": 0.1710105687379837, |
| "learning_rate": 0.0001, |
| "loss": 1.6186, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.3215679895901106, |
| "grad_norm": 0.19661752879619598, |
| "learning_rate": 0.0001, |
| "loss": 1.7867, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.32205595315549773, |
| "grad_norm": 0.1723627746105194, |
| "learning_rate": 0.0001, |
| "loss": 1.5887, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.32254391672088484, |
| "grad_norm": 0.21364371478557587, |
| "learning_rate": 0.0001, |
| "loss": 1.8418, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.32303188028627194, |
| "grad_norm": 0.17605622112751007, |
| "learning_rate": 0.0001, |
| "loss": 1.6892, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.32351984385165905, |
| "grad_norm": 0.17851850390434265, |
| "learning_rate": 0.0001, |
| "loss": 1.7639, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.3240078074170462, |
| "grad_norm": 0.1816173940896988, |
| "learning_rate": 0.0001, |
| "loss": 1.6567, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.3244957709824333, |
| "grad_norm": 0.17529702186584473, |
| "learning_rate": 0.0001, |
| "loss": 1.6945, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.3249837345478204, |
| "grad_norm": 0.16997535526752472, |
| "learning_rate": 0.0001, |
| "loss": 1.6833, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.32547169811320753, |
| "grad_norm": 0.18423834443092346, |
| "learning_rate": 0.0001, |
| "loss": 1.7486, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.3259596616785947, |
| "grad_norm": 0.18737761676311493, |
| "learning_rate": 0.0001, |
| "loss": 1.7561, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.3264476252439818, |
| "grad_norm": 0.17731069028377533, |
| "learning_rate": 0.0001, |
| "loss": 1.5679, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.3269355888093689, |
| "grad_norm": 0.197565495967865, |
| "learning_rate": 0.0001, |
| "loss": 1.7457, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.327423552374756, |
| "grad_norm": 0.19319871068000793, |
| "learning_rate": 0.0001, |
| "loss": 1.8458, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.3279115159401431, |
| "grad_norm": 0.18049995601177216, |
| "learning_rate": 0.0001, |
| "loss": 1.7076, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.3283994795055303, |
| "grad_norm": 0.18907921016216278, |
| "learning_rate": 0.0001, |
| "loss": 1.7031, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.3288874430709174, |
| "grad_norm": 0.18252240121364594, |
| "learning_rate": 0.0001, |
| "loss": 1.6304, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.3293754066363045, |
| "grad_norm": 0.1798553168773651, |
| "learning_rate": 0.0001, |
| "loss": 1.6504, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.3298633702016916, |
| "grad_norm": 0.1712959110736847, |
| "learning_rate": 0.0001, |
| "loss": 1.6827, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.3303513337670787, |
| "grad_norm": 0.169499009847641, |
| "learning_rate": 0.0001, |
| "loss": 1.67, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.33083929733246586, |
| "grad_norm": 0.17921562492847443, |
| "learning_rate": 0.0001, |
| "loss": 1.6913, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.33132726089785297, |
| "grad_norm": 0.16730189323425293, |
| "learning_rate": 0.0001, |
| "loss": 1.6585, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.3318152244632401, |
| "grad_norm": 0.1731245219707489, |
| "learning_rate": 0.0001, |
| "loss": 1.6891, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.3323031880286272, |
| "grad_norm": 0.18989908695220947, |
| "learning_rate": 0.0001, |
| "loss": 1.8335, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.3327911515940143, |
| "grad_norm": 0.17079797387123108, |
| "learning_rate": 0.0001, |
| "loss": 1.6074, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.33327911515940145, |
| "grad_norm": 0.1855732947587967, |
| "learning_rate": 0.0001, |
| "loss": 1.8051, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.33376707872478856, |
| "grad_norm": 0.19362801313400269, |
| "learning_rate": 0.0001, |
| "loss": 1.7934, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.33425504229017566, |
| "grad_norm": 0.18407447636127472, |
| "learning_rate": 0.0001, |
| "loss": 1.7676, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.33474300585556277, |
| "grad_norm": 0.17326807975769043, |
| "learning_rate": 0.0001, |
| "loss": 1.6867, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.3352309694209499, |
| "grad_norm": 0.18629767000675201, |
| "learning_rate": 0.0001, |
| "loss": 1.7577, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.33571893298633704, |
| "grad_norm": 0.19202108681201935, |
| "learning_rate": 0.0001, |
| "loss": 1.7742, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.33620689655172414, |
| "grad_norm": 0.1923230141401291, |
| "learning_rate": 0.0001, |
| "loss": 1.7646, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.33669486011711125, |
| "grad_norm": 0.1855097860097885, |
| "learning_rate": 0.0001, |
| "loss": 1.7189, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.33718282368249836, |
| "grad_norm": 0.17661595344543457, |
| "learning_rate": 0.0001, |
| "loss": 1.6404, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.3376707872478855, |
| "grad_norm": 0.19284093379974365, |
| "learning_rate": 0.0001, |
| "loss": 1.7621, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.3381587508132726, |
| "grad_norm": 0.18006063997745514, |
| "learning_rate": 0.0001, |
| "loss": 1.6163, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.33864671437865973, |
| "grad_norm": 0.1881456822156906, |
| "learning_rate": 0.0001, |
| "loss": 1.732, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.33913467794404684, |
| "grad_norm": 0.17196986079216003, |
| "learning_rate": 0.0001, |
| "loss": 1.7099, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.33962264150943394, |
| "grad_norm": 0.186056986451149, |
| "learning_rate": 0.0001, |
| "loss": 1.8247, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.3401106050748211, |
| "grad_norm": 0.18548524379730225, |
| "learning_rate": 0.0001, |
| "loss": 1.7185, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.3405985686402082, |
| "grad_norm": 0.182390958070755, |
| "learning_rate": 0.0001, |
| "loss": 1.8278, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.3410865322055953, |
| "grad_norm": 0.18355803191661835, |
| "learning_rate": 0.0001, |
| "loss": 1.6432, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.3415744957709824, |
| "grad_norm": 0.176362544298172, |
| "learning_rate": 0.0001, |
| "loss": 1.71, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.34206245933636953, |
| "grad_norm": 0.1753791868686676, |
| "learning_rate": 0.0001, |
| "loss": 1.7079, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.3425504229017567, |
| "grad_norm": 0.17833958566188812, |
| "learning_rate": 0.0001, |
| "loss": 1.6155, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.3430383864671438, |
| "grad_norm": 0.18626241385936737, |
| "learning_rate": 0.0001, |
| "loss": 1.8164, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.3435263500325309, |
| "grad_norm": 0.18040528893470764, |
| "learning_rate": 0.0001, |
| "loss": 1.7061, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.344014313597918, |
| "grad_norm": 0.18248948454856873, |
| "learning_rate": 0.0001, |
| "loss": 1.7002, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.3445022771633051, |
| "grad_norm": 0.18155597150325775, |
| "learning_rate": 0.0001, |
| "loss": 1.7623, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.3449902407286923, |
| "grad_norm": 0.18167854845523834, |
| "learning_rate": 0.0001, |
| "loss": 1.7209, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.3454782042940794, |
| "grad_norm": 0.18228544294834137, |
| "learning_rate": 0.0001, |
| "loss": 1.7166, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.3459661678594665, |
| "grad_norm": 0.1872456818819046, |
| "learning_rate": 0.0001, |
| "loss": 1.8073, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.3464541314248536, |
| "grad_norm": 0.17062440514564514, |
| "learning_rate": 0.0001, |
| "loss": 1.653, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.3469420949902407, |
| "grad_norm": 0.17459101974964142, |
| "learning_rate": 0.0001, |
| "loss": 1.6982, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.34743005855562786, |
| "grad_norm": 0.1724562644958496, |
| "learning_rate": 0.0001, |
| "loss": 1.7638, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.34791802212101497, |
| "grad_norm": 0.16791169345378876, |
| "learning_rate": 0.0001, |
| "loss": 1.5451, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.3484059856864021, |
| "grad_norm": 0.17250396311283112, |
| "learning_rate": 0.0001, |
| "loss": 1.6266, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.3488939492517892, |
| "grad_norm": 0.17893101274967194, |
| "learning_rate": 0.0001, |
| "loss": 1.7786, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.34938191281717634, |
| "grad_norm": 0.1739955097436905, |
| "learning_rate": 0.0001, |
| "loss": 1.6286, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.34986987638256345, |
| "grad_norm": 0.183289036154747, |
| "learning_rate": 0.0001, |
| "loss": 1.7026, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.35035783994795056, |
| "grad_norm": 0.1769326776266098, |
| "learning_rate": 0.0001, |
| "loss": 1.7008, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.35084580351333766, |
| "grad_norm": 0.1857866495847702, |
| "learning_rate": 0.0001, |
| "loss": 1.6844, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.35133376707872477, |
| "grad_norm": 0.18651182949543, |
| "learning_rate": 0.0001, |
| "loss": 1.7033, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.35182173064411193, |
| "grad_norm": 0.18966244161128998, |
| "learning_rate": 0.0001, |
| "loss": 1.7673, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.35230969420949904, |
| "grad_norm": 0.1810387372970581, |
| "learning_rate": 0.0001, |
| "loss": 1.7161, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.35279765777488614, |
| "grad_norm": 0.17334793508052826, |
| "learning_rate": 0.0001, |
| "loss": 1.5957, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.35328562134027325, |
| "grad_norm": 0.18044047057628632, |
| "learning_rate": 0.0001, |
| "loss": 1.6443, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.35377358490566035, |
| "grad_norm": 0.18923179805278778, |
| "learning_rate": 0.0001, |
| "loss": 1.7244, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.3542615484710475, |
| "grad_norm": 0.18003158271312714, |
| "learning_rate": 0.0001, |
| "loss": 1.7655, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.3547495120364346, |
| "grad_norm": 0.18161289393901825, |
| "learning_rate": 0.0001, |
| "loss": 1.7199, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.35523747560182173, |
| "grad_norm": 0.19969268143177032, |
| "learning_rate": 0.0001, |
| "loss": 1.7138, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.35572543916720883, |
| "grad_norm": 0.1782398670911789, |
| "learning_rate": 0.0001, |
| "loss": 1.6231, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.35621340273259594, |
| "grad_norm": 0.20619311928749084, |
| "learning_rate": 0.0001, |
| "loss": 1.7745, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.3567013662979831, |
| "grad_norm": 0.1790829598903656, |
| "learning_rate": 0.0001, |
| "loss": 1.6251, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.3571893298633702, |
| "grad_norm": 0.17978286743164062, |
| "learning_rate": 0.0001, |
| "loss": 1.6495, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.3576772934287573, |
| "grad_norm": 0.20410868525505066, |
| "learning_rate": 0.0001, |
| "loss": 1.7264, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.3581652569941444, |
| "grad_norm": 0.18116474151611328, |
| "learning_rate": 0.0001, |
| "loss": 1.7379, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.3586532205595316, |
| "grad_norm": 0.20212259888648987, |
| "learning_rate": 0.0001, |
| "loss": 1.6964, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.3591411841249187, |
| "grad_norm": 0.17794452607631683, |
| "learning_rate": 0.0001, |
| "loss": 1.6666, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.3596291476903058, |
| "grad_norm": 0.17267604172229767, |
| "learning_rate": 0.0001, |
| "loss": 1.5783, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.3601171112556929, |
| "grad_norm": 0.21285639703273773, |
| "learning_rate": 0.0001, |
| "loss": 1.7575, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.36060507482108, |
| "grad_norm": 0.1822413057088852, |
| "learning_rate": 0.0001, |
| "loss": 1.7244, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.36109303838646717, |
| "grad_norm": 0.1909700185060501, |
| "learning_rate": 0.0001, |
| "loss": 1.7614, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.3615810019518543, |
| "grad_norm": 0.19396358728408813, |
| "learning_rate": 0.0001, |
| "loss": 1.701, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.3620689655172414, |
| "grad_norm": 0.18860898911952972, |
| "learning_rate": 0.0001, |
| "loss": 1.7215, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.3625569290826285, |
| "grad_norm": 0.1891864836215973, |
| "learning_rate": 0.0001, |
| "loss": 1.7127, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.3630448926480156, |
| "grad_norm": 0.18963932991027832, |
| "learning_rate": 0.0001, |
| "loss": 1.6591, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.36353285621340276, |
| "grad_norm": 0.17823189496994019, |
| "learning_rate": 0.0001, |
| "loss": 1.7356, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.36402081977878986, |
| "grad_norm": 0.19020548462867737, |
| "learning_rate": 0.0001, |
| "loss": 1.7591, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.36450878334417697, |
| "grad_norm": 0.1983988732099533, |
| "learning_rate": 0.0001, |
| "loss": 1.6688, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.3649967469095641, |
| "grad_norm": 0.17455948889255524, |
| "learning_rate": 0.0001, |
| "loss": 1.6981, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.3654847104749512, |
| "grad_norm": 0.19214113056659698, |
| "learning_rate": 0.0001, |
| "loss": 1.6858, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.36597267404033834, |
| "grad_norm": 0.19815075397491455, |
| "learning_rate": 0.0001, |
| "loss": 1.7088, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.36646063760572545, |
| "grad_norm": 0.18052172660827637, |
| "learning_rate": 0.0001, |
| "loss": 1.7046, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.36694860117111255, |
| "grad_norm": 0.19308723509311676, |
| "learning_rate": 0.0001, |
| "loss": 1.7145, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.36743656473649966, |
| "grad_norm": 0.20036271214485168, |
| "learning_rate": 0.0001, |
| "loss": 1.6666, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.36792452830188677, |
| "grad_norm": 0.18619637191295624, |
| "learning_rate": 0.0001, |
| "loss": 1.7144, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.36841249186727393, |
| "grad_norm": 0.19576376676559448, |
| "learning_rate": 0.0001, |
| "loss": 1.7653, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.36890045543266103, |
| "grad_norm": 0.18974775075912476, |
| "learning_rate": 0.0001, |
| "loss": 1.7836, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.36938841899804814, |
| "grad_norm": 0.17752085626125336, |
| "learning_rate": 0.0001, |
| "loss": 1.6496, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.36987638256343525, |
| "grad_norm": 0.1844092309474945, |
| "learning_rate": 0.0001, |
| "loss": 1.6863, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.3703643461288224, |
| "grad_norm": 0.18102730810642242, |
| "learning_rate": 0.0001, |
| "loss": 1.5805, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.3708523096942095, |
| "grad_norm": 0.1773853898048401, |
| "learning_rate": 0.0001, |
| "loss": 1.7169, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.3713402732595966, |
| "grad_norm": 0.17917506396770477, |
| "learning_rate": 0.0001, |
| "loss": 1.705, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.3718282368249837, |
| "grad_norm": 0.1869056671857834, |
| "learning_rate": 0.0001, |
| "loss": 1.5653, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.37231620039037083, |
| "grad_norm": 0.1744174063205719, |
| "learning_rate": 0.0001, |
| "loss": 1.7014, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.372804163955758, |
| "grad_norm": 0.18072061240673065, |
| "learning_rate": 0.0001, |
| "loss": 1.6638, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.3732921275211451, |
| "grad_norm": 0.17331485450267792, |
| "learning_rate": 0.0001, |
| "loss": 1.6642, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.3737800910865322, |
| "grad_norm": 0.1780969500541687, |
| "learning_rate": 0.0001, |
| "loss": 1.6563, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.3742680546519193, |
| "grad_norm": 0.1959829479455948, |
| "learning_rate": 0.0001, |
| "loss": 1.8421, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.3747560182173064, |
| "grad_norm": 0.18532420694828033, |
| "learning_rate": 0.0001, |
| "loss": 1.7752, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.3752439817826936, |
| "grad_norm": 0.1861323118209839, |
| "learning_rate": 0.0001, |
| "loss": 1.6672, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.3757319453480807, |
| "grad_norm": 0.17399415373802185, |
| "learning_rate": 0.0001, |
| "loss": 1.506, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.3762199089134678, |
| "grad_norm": 0.1861727237701416, |
| "learning_rate": 0.0001, |
| "loss": 1.7164, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.3767078724788549, |
| "grad_norm": 0.17571841180324554, |
| "learning_rate": 0.0001, |
| "loss": 1.6256, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.377195836044242, |
| "grad_norm": 0.1843421310186386, |
| "learning_rate": 0.0001, |
| "loss": 1.7273, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.37768379960962917, |
| "grad_norm": 0.17336313426494598, |
| "learning_rate": 0.0001, |
| "loss": 1.628, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.3781717631750163, |
| "grad_norm": 0.173604816198349, |
| "learning_rate": 0.0001, |
| "loss": 1.6492, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.3786597267404034, |
| "grad_norm": 0.19042102992534637, |
| "learning_rate": 0.0001, |
| "loss": 1.7671, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.3791476903057905, |
| "grad_norm": 0.19237715005874634, |
| "learning_rate": 0.0001, |
| "loss": 1.6948, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.3796356538711776, |
| "grad_norm": 0.1934320628643036, |
| "learning_rate": 0.0001, |
| "loss": 1.7704, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.38012361743656475, |
| "grad_norm": 0.18237414956092834, |
| "learning_rate": 0.0001, |
| "loss": 1.7163, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.38061158100195186, |
| "grad_norm": 0.1750539243221283, |
| "learning_rate": 0.0001, |
| "loss": 1.675, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.38109954456733897, |
| "grad_norm": 0.18425478041172028, |
| "learning_rate": 0.0001, |
| "loss": 1.803, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.38158750813272607, |
| "grad_norm": 0.17386333644390106, |
| "learning_rate": 0.0001, |
| "loss": 1.5968, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.38207547169811323, |
| "grad_norm": 0.1958070695400238, |
| "learning_rate": 0.0001, |
| "loss": 1.7117, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.38256343526350034, |
| "grad_norm": 0.18313884735107422, |
| "learning_rate": 0.0001, |
| "loss": 1.7634, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.38305139882888745, |
| "grad_norm": 0.1904529333114624, |
| "learning_rate": 0.0001, |
| "loss": 1.7944, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.38353936239427455, |
| "grad_norm": 0.18762192130088806, |
| "learning_rate": 0.0001, |
| "loss": 1.6575, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.38402732595966166, |
| "grad_norm": 0.1828492432832718, |
| "learning_rate": 0.0001, |
| "loss": 1.6451, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.3845152895250488, |
| "grad_norm": 0.19027890264987946, |
| "learning_rate": 0.0001, |
| "loss": 1.7919, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.3850032530904359, |
| "grad_norm": 0.17186413705348969, |
| "learning_rate": 0.0001, |
| "loss": 1.6794, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.38549121665582303, |
| "grad_norm": 0.1878061145544052, |
| "learning_rate": 0.0001, |
| "loss": 1.6987, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.38597918022121014, |
| "grad_norm": 0.18121576309204102, |
| "learning_rate": 0.0001, |
| "loss": 1.796, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.38646714378659724, |
| "grad_norm": 0.19097453355789185, |
| "learning_rate": 0.0001, |
| "loss": 1.7155, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.3869551073519844, |
| "grad_norm": 0.18126630783081055, |
| "learning_rate": 0.0001, |
| "loss": 1.7499, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.3874430709173715, |
| "grad_norm": 0.1922173947095871, |
| "learning_rate": 0.0001, |
| "loss": 1.7447, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.3879310344827586, |
| "grad_norm": 0.17474421858787537, |
| "learning_rate": 0.0001, |
| "loss": 1.6234, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.3884189980481457, |
| "grad_norm": 0.19023337960243225, |
| "learning_rate": 0.0001, |
| "loss": 1.7285, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.38890696161353283, |
| "grad_norm": 0.17856378853321075, |
| "learning_rate": 0.0001, |
| "loss": 1.598, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.38939492517892, |
| "grad_norm": 0.17470918595790863, |
| "learning_rate": 0.0001, |
| "loss": 1.7021, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.3898828887443071, |
| "grad_norm": 0.20127350091934204, |
| "learning_rate": 0.0001, |
| "loss": 1.6433, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.3903708523096942, |
| "grad_norm": 0.17676322162151337, |
| "learning_rate": 0.0001, |
| "loss": 1.6967, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.3908588158750813, |
| "grad_norm": 0.17519530653953552, |
| "learning_rate": 0.0001, |
| "loss": 1.7357, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.3913467794404684, |
| "grad_norm": 0.19061584770679474, |
| "learning_rate": 0.0001, |
| "loss": 1.7182, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.3918347430058556, |
| "grad_norm": 0.18246081471443176, |
| "learning_rate": 0.0001, |
| "loss": 1.7688, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.3923227065712427, |
| "grad_norm": 0.20583999156951904, |
| "learning_rate": 0.0001, |
| "loss": 1.8205, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.3928106701366298, |
| "grad_norm": 0.18392029404640198, |
| "learning_rate": 0.0001, |
| "loss": 1.7499, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.3932986337020169, |
| "grad_norm": 0.18296070396900177, |
| "learning_rate": 0.0001, |
| "loss": 1.7422, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.39378659726740406, |
| "grad_norm": 0.176628977060318, |
| "learning_rate": 0.0001, |
| "loss": 1.6818, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.39427456083279117, |
| "grad_norm": 0.17783887684345245, |
| "learning_rate": 0.0001, |
| "loss": 1.6935, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.39476252439817827, |
| "grad_norm": 0.18225261569023132, |
| "learning_rate": 0.0001, |
| "loss": 1.7117, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.3952504879635654, |
| "grad_norm": 0.18413884937763214, |
| "learning_rate": 0.0001, |
| "loss": 1.6266, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.3957384515289525, |
| "grad_norm": 0.18847863376140594, |
| "learning_rate": 0.0001, |
| "loss": 1.6942, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.39622641509433965, |
| "grad_norm": 0.177464559674263, |
| "learning_rate": 0.0001, |
| "loss": 1.7731, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.39671437865972675, |
| "grad_norm": 0.18517576158046722, |
| "learning_rate": 0.0001, |
| "loss": 1.709, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.39720234222511386, |
| "grad_norm": 0.18677739799022675, |
| "learning_rate": 0.0001, |
| "loss": 1.709, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.39769030579050096, |
| "grad_norm": 0.1786472350358963, |
| "learning_rate": 0.0001, |
| "loss": 1.6966, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.39817826935588807, |
| "grad_norm": 0.18321356177330017, |
| "learning_rate": 0.0001, |
| "loss": 1.6611, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.39866623292127523, |
| "grad_norm": 0.19883863627910614, |
| "learning_rate": 0.0001, |
| "loss": 1.7824, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.39915419648666234, |
| "grad_norm": 0.18374767899513245, |
| "learning_rate": 0.0001, |
| "loss": 1.8102, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.39964216005204944, |
| "grad_norm": 0.1768617182970047, |
| "learning_rate": 0.0001, |
| "loss": 1.6278, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.40013012361743655, |
| "grad_norm": 0.17839239537715912, |
| "learning_rate": 0.0001, |
| "loss": 1.5887, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.40061808718282366, |
| "grad_norm": 0.18420036137104034, |
| "learning_rate": 0.0001, |
| "loss": 1.7334, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.4011060507482108, |
| "grad_norm": 0.18662692606449127, |
| "learning_rate": 0.0001, |
| "loss": 1.7035, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.4015940143135979, |
| "grad_norm": 0.1809212863445282, |
| "learning_rate": 0.0001, |
| "loss": 1.6425, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.40208197787898503, |
| "grad_norm": 0.18343691527843475, |
| "learning_rate": 0.0001, |
| "loss": 1.6915, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.40256994144437214, |
| "grad_norm": 0.19546520709991455, |
| "learning_rate": 0.0001, |
| "loss": 1.5398, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.4030579050097593, |
| "grad_norm": 0.18498557806015015, |
| "learning_rate": 0.0001, |
| "loss": 1.76, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.4035458685751464, |
| "grad_norm": 0.1787293255329132, |
| "learning_rate": 0.0001, |
| "loss": 1.7072, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.4040338321405335, |
| "grad_norm": 0.18626105785369873, |
| "learning_rate": 0.0001, |
| "loss": 1.6154, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.4045217957059206, |
| "grad_norm": 0.18181754648685455, |
| "learning_rate": 0.0001, |
| "loss": 1.6343, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.4050097592713077, |
| "grad_norm": 0.1738763153553009, |
| "learning_rate": 0.0001, |
| "loss": 1.6003, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.4054977228366949, |
| "grad_norm": 0.19205868244171143, |
| "learning_rate": 0.0001, |
| "loss": 1.6516, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.405985686402082, |
| "grad_norm": 0.17389516532421112, |
| "learning_rate": 0.0001, |
| "loss": 1.6675, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.4064736499674691, |
| "grad_norm": 0.17901460826396942, |
| "learning_rate": 0.0001, |
| "loss": 1.7835, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.4069616135328562, |
| "grad_norm": 0.16918572783470154, |
| "learning_rate": 0.0001, |
| "loss": 1.5688, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.4074495770982433, |
| "grad_norm": 0.17327755689620972, |
| "learning_rate": 0.0001, |
| "loss": 1.612, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.40793754066363047, |
| "grad_norm": 0.17260931432247162, |
| "learning_rate": 0.0001, |
| "loss": 1.5631, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.4084255042290176, |
| "grad_norm": 0.18616695702075958, |
| "learning_rate": 0.0001, |
| "loss": 1.8026, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.4089134677944047, |
| "grad_norm": 0.1833159476518631, |
| "learning_rate": 0.0001, |
| "loss": 1.7407, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.4094014313597918, |
| "grad_norm": 0.17563556134700775, |
| "learning_rate": 0.0001, |
| "loss": 1.6497, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.4098893949251789, |
| "grad_norm": 0.1728363335132599, |
| "learning_rate": 0.0001, |
| "loss": 1.7369, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.41037735849056606, |
| "grad_norm": 0.16742554306983948, |
| "learning_rate": 0.0001, |
| "loss": 1.5323, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.41086532205595316, |
| "grad_norm": 0.18149816989898682, |
| "learning_rate": 0.0001, |
| "loss": 1.6658, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.41135328562134027, |
| "grad_norm": 0.1730806678533554, |
| "learning_rate": 0.0001, |
| "loss": 1.6736, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.4118412491867274, |
| "grad_norm": 0.19350793957710266, |
| "learning_rate": 0.0001, |
| "loss": 1.7305, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.4123292127521145, |
| "grad_norm": 0.17669609189033508, |
| "learning_rate": 0.0001, |
| "loss": 1.7208, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.41281717631750164, |
| "grad_norm": 0.18896430730819702, |
| "learning_rate": 0.0001, |
| "loss": 1.7677, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.41330513988288875, |
| "grad_norm": 0.18296490609645844, |
| "learning_rate": 0.0001, |
| "loss": 1.7551, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.41379310344827586, |
| "grad_norm": 0.18311992287635803, |
| "learning_rate": 0.0001, |
| "loss": 1.6724, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.41428106701366296, |
| "grad_norm": 0.1732887476682663, |
| "learning_rate": 0.0001, |
| "loss": 1.6779, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.4147690305790501, |
| "grad_norm": 0.18442484736442566, |
| "learning_rate": 0.0001, |
| "loss": 1.6707, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.41525699414443723, |
| "grad_norm": 0.18358947336673737, |
| "learning_rate": 0.0001, |
| "loss": 1.7059, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.41574495770982434, |
| "grad_norm": 0.17849397659301758, |
| "learning_rate": 0.0001, |
| "loss": 1.6633, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.41623292127521144, |
| "grad_norm": 0.17558790743350983, |
| "learning_rate": 0.0001, |
| "loss": 1.7351, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.41672088484059855, |
| "grad_norm": 0.18554963171482086, |
| "learning_rate": 0.0001, |
| "loss": 1.722, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.4172088484059857, |
| "grad_norm": 0.17529337108135223, |
| "learning_rate": 0.0001, |
| "loss": 1.7565, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.4176968119713728, |
| "grad_norm": 0.1806408166885376, |
| "learning_rate": 0.0001, |
| "loss": 1.6164, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.4181847755367599, |
| "grad_norm": 0.17640672624111176, |
| "learning_rate": 0.0001, |
| "loss": 1.6622, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.41867273910214703, |
| "grad_norm": 0.18511973321437836, |
| "learning_rate": 0.0001, |
| "loss": 1.7708, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.41916070266753414, |
| "grad_norm": 0.17402327060699463, |
| "learning_rate": 0.0001, |
| "loss": 1.5703, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.4196486662329213, |
| "grad_norm": 0.1716722548007965, |
| "learning_rate": 0.0001, |
| "loss": 1.6326, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.4201366297983084, |
| "grad_norm": 0.18517763912677765, |
| "learning_rate": 0.0001, |
| "loss": 1.638, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.4206245933636955, |
| "grad_norm": 0.18149396777153015, |
| "learning_rate": 0.0001, |
| "loss": 1.772, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.4211125569290826, |
| "grad_norm": 0.1842370480298996, |
| "learning_rate": 0.0001, |
| "loss": 1.7326, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.4216005204944697, |
| "grad_norm": 0.1832754909992218, |
| "learning_rate": 0.0001, |
| "loss": 1.571, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.4220884840598569, |
| "grad_norm": 0.18610063195228577, |
| "learning_rate": 0.0001, |
| "loss": 1.6853, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.422576447625244, |
| "grad_norm": 0.18227741122245789, |
| "learning_rate": 0.0001, |
| "loss": 1.7299, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.4230644111906311, |
| "grad_norm": 0.1710875779390335, |
| "learning_rate": 0.0001, |
| "loss": 1.6311, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.4235523747560182, |
| "grad_norm": 0.1772422045469284, |
| "learning_rate": 0.0001, |
| "loss": 1.6997, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.4240403383214053, |
| "grad_norm": 0.18706001341342926, |
| "learning_rate": 0.0001, |
| "loss": 1.7453, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.42452830188679247, |
| "grad_norm": 0.18400168418884277, |
| "learning_rate": 0.0001, |
| "loss": 1.7748, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.4250162654521796, |
| "grad_norm": 0.1813107579946518, |
| "learning_rate": 0.0001, |
| "loss": 1.6386, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.4255042290175667, |
| "grad_norm": 0.18432138860225677, |
| "learning_rate": 0.0001, |
| "loss": 1.6548, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.4259921925829538, |
| "grad_norm": 0.1701667755842209, |
| "learning_rate": 0.0001, |
| "loss": 1.7228, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.42648015614834095, |
| "grad_norm": 0.17490911483764648, |
| "learning_rate": 0.0001, |
| "loss": 1.6574, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.42696811971372806, |
| "grad_norm": 0.1863052397966385, |
| "learning_rate": 0.0001, |
| "loss": 1.6902, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.42745608327911516, |
| "grad_norm": 0.17869678139686584, |
| "learning_rate": 0.0001, |
| "loss": 1.7961, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.42794404684450227, |
| "grad_norm": 0.17393270134925842, |
| "learning_rate": 0.0001, |
| "loss": 1.6968, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.4284320104098894, |
| "grad_norm": 0.1801164150238037, |
| "learning_rate": 0.0001, |
| "loss": 1.8419, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.42891997397527654, |
| "grad_norm": 0.17271965742111206, |
| "learning_rate": 0.0001, |
| "loss": 1.6948, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.42940793754066364, |
| "grad_norm": 0.18875744938850403, |
| "learning_rate": 0.0001, |
| "loss": 1.7529, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.42989590110605075, |
| "grad_norm": 0.18350331485271454, |
| "learning_rate": 0.0001, |
| "loss": 1.7162, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.43038386467143785, |
| "grad_norm": 0.18316605687141418, |
| "learning_rate": 0.0001, |
| "loss": 1.7071, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.43087182823682496, |
| "grad_norm": 0.17159631848335266, |
| "learning_rate": 0.0001, |
| "loss": 1.5494, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.4313597918022121, |
| "grad_norm": 0.1835523098707199, |
| "learning_rate": 0.0001, |
| "loss": 1.7773, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.43184775536759923, |
| "grad_norm": 0.18305568397045135, |
| "learning_rate": 0.0001, |
| "loss": 1.6616, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.43233571893298633, |
| "grad_norm": 0.18325333297252655, |
| "learning_rate": 0.0001, |
| "loss": 1.71, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.43282368249837344, |
| "grad_norm": 0.16807565093040466, |
| "learning_rate": 0.0001, |
| "loss": 1.5946, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.43331164606376055, |
| "grad_norm": 0.17560525238513947, |
| "learning_rate": 0.0001, |
| "loss": 1.573, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.4337996096291477, |
| "grad_norm": 0.1823277622461319, |
| "learning_rate": 0.0001, |
| "loss": 1.616, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.4342875731945348, |
| "grad_norm": 0.17946025729179382, |
| "learning_rate": 0.0001, |
| "loss": 1.5907, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.4347755367599219, |
| "grad_norm": 0.18940189480781555, |
| "learning_rate": 0.0001, |
| "loss": 1.6697, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.435263500325309, |
| "grad_norm": 0.17899388074874878, |
| "learning_rate": 0.0001, |
| "loss": 1.6849, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.4357514638906962, |
| "grad_norm": 0.1885358840227127, |
| "learning_rate": 0.0001, |
| "loss": 1.6212, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.4362394274560833, |
| "grad_norm": 0.1721390187740326, |
| "learning_rate": 0.0001, |
| "loss": 1.6514, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.4367273910214704, |
| "grad_norm": 0.19019658863544464, |
| "learning_rate": 0.0001, |
| "loss": 1.7234, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.4372153545868575, |
| "grad_norm": 0.17101971805095673, |
| "learning_rate": 0.0001, |
| "loss": 1.6003, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.4377033181522446, |
| "grad_norm": 0.192877396941185, |
| "learning_rate": 0.0001, |
| "loss": 1.8151, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.4381912817176318, |
| "grad_norm": 0.17775356769561768, |
| "learning_rate": 0.0001, |
| "loss": 1.5926, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.4386792452830189, |
| "grad_norm": 0.19545124471187592, |
| "learning_rate": 0.0001, |
| "loss": 1.7123, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.439167208848406, |
| "grad_norm": 0.17418169975280762, |
| "learning_rate": 0.0001, |
| "loss": 1.6774, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.4396551724137931, |
| "grad_norm": 0.19206389784812927, |
| "learning_rate": 0.0001, |
| "loss": 1.7278, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.4401431359791802, |
| "grad_norm": 0.18674510717391968, |
| "learning_rate": 0.0001, |
| "loss": 1.6049, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.44063109954456736, |
| "grad_norm": 0.18307790160179138, |
| "learning_rate": 0.0001, |
| "loss": 1.6985, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.44111906310995447, |
| "grad_norm": 0.1894843429327011, |
| "learning_rate": 0.0001, |
| "loss": 1.676, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.4416070266753416, |
| "grad_norm": 0.17619220912456512, |
| "learning_rate": 0.0001, |
| "loss": 1.6807, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.4420949902407287, |
| "grad_norm": 0.1805913895368576, |
| "learning_rate": 0.0001, |
| "loss": 1.6704, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.4425829538061158, |
| "grad_norm": 0.17293816804885864, |
| "learning_rate": 0.0001, |
| "loss": 1.597, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.44307091737150295, |
| "grad_norm": 0.17609193921089172, |
| "learning_rate": 0.0001, |
| "loss": 1.6562, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.44355888093689005, |
| "grad_norm": 0.17432111501693726, |
| "learning_rate": 0.0001, |
| "loss": 1.594, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.44404684450227716, |
| "grad_norm": 0.17889589071273804, |
| "learning_rate": 0.0001, |
| "loss": 1.8029, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.44453480806766427, |
| "grad_norm": 0.17299845814704895, |
| "learning_rate": 0.0001, |
| "loss": 1.6116, |
| "step": 911 |
| }, |
| { |
| "epoch": 0.4450227716330514, |
| "grad_norm": 0.17839674651622772, |
| "learning_rate": 0.0001, |
| "loss": 1.7055, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.44551073519843853, |
| "grad_norm": 0.1751437783241272, |
| "learning_rate": 0.0001, |
| "loss": 1.6218, |
| "step": 913 |
| }, |
| { |
| "epoch": 0.44599869876382564, |
| "grad_norm": 0.1901925653219223, |
| "learning_rate": 0.0001, |
| "loss": 1.6578, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.44648666232921275, |
| "grad_norm": 0.17236626148223877, |
| "learning_rate": 0.0001, |
| "loss": 1.6951, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.44697462589459985, |
| "grad_norm": 0.17387427389621735, |
| "learning_rate": 0.0001, |
| "loss": 1.5922, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.447462589459987, |
| "grad_norm": 0.1684548258781433, |
| "learning_rate": 0.0001, |
| "loss": 1.5566, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.4479505530253741, |
| "grad_norm": 0.18070632219314575, |
| "learning_rate": 0.0001, |
| "loss": 1.6904, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.4484385165907612, |
| "grad_norm": 0.1905713975429535, |
| "learning_rate": 0.0001, |
| "loss": 1.8206, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.44892648015614833, |
| "grad_norm": 0.1828422248363495, |
| "learning_rate": 0.0001, |
| "loss": 1.7974, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.44941444372153544, |
| "grad_norm": 0.17595981061458588, |
| "learning_rate": 0.0001, |
| "loss": 1.7308, |
| "step": 921 |
| }, |
| { |
| "epoch": 0.4499024072869226, |
| "grad_norm": 0.18210361897945404, |
| "learning_rate": 0.0001, |
| "loss": 1.6915, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.4503903708523097, |
| "grad_norm": 0.18826089799404144, |
| "learning_rate": 0.0001, |
| "loss": 1.7588, |
| "step": 923 |
| }, |
| { |
| "epoch": 0.4508783344176968, |
| "grad_norm": 0.17665328085422516, |
| "learning_rate": 0.0001, |
| "loss": 1.6797, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.4513662979830839, |
| "grad_norm": 0.17838731408119202, |
| "learning_rate": 0.0001, |
| "loss": 1.6644, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.451854261548471, |
| "grad_norm": 0.18045654892921448, |
| "learning_rate": 0.0001, |
| "loss": 1.689, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.4523422251138582, |
| "grad_norm": 0.18226969242095947, |
| "learning_rate": 0.0001, |
| "loss": 1.8157, |
| "step": 927 |
| }, |
| { |
| "epoch": 0.4528301886792453, |
| "grad_norm": 0.17917855083942413, |
| "learning_rate": 0.0001, |
| "loss": 1.7772, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.4533181522446324, |
| "grad_norm": 0.1778966784477234, |
| "learning_rate": 0.0001, |
| "loss": 1.6912, |
| "step": 929 |
| }, |
| { |
| "epoch": 0.4538061158100195, |
| "grad_norm": 0.18105091154575348, |
| "learning_rate": 0.0001, |
| "loss": 1.7072, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.4542940793754066, |
| "grad_norm": 0.17502936720848083, |
| "learning_rate": 0.0001, |
| "loss": 1.6462, |
| "step": 931 |
| }, |
| { |
| "epoch": 0.4547820429407938, |
| "grad_norm": 0.1830134093761444, |
| "learning_rate": 0.0001, |
| "loss": 1.6876, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.4552700065061809, |
| "grad_norm": 0.18607327342033386, |
| "learning_rate": 0.0001, |
| "loss": 1.7082, |
| "step": 933 |
| }, |
| { |
| "epoch": 0.455757970071568, |
| "grad_norm": 0.18888945877552032, |
| "learning_rate": 0.0001, |
| "loss": 1.7509, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.4562459336369551, |
| "grad_norm": 0.1867811232805252, |
| "learning_rate": 0.0001, |
| "loss": 1.7233, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.4567338972023422, |
| "grad_norm": 0.1898915022611618, |
| "learning_rate": 0.0001, |
| "loss": 1.6237, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.45722186076772936, |
| "grad_norm": 0.1797095388174057, |
| "learning_rate": 0.0001, |
| "loss": 1.7404, |
| "step": 937 |
| }, |
| { |
| "epoch": 0.45770982433311647, |
| "grad_norm": 0.17534306645393372, |
| "learning_rate": 0.0001, |
| "loss": 1.6726, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.4581977878985036, |
| "grad_norm": 0.19073282182216644, |
| "learning_rate": 0.0001, |
| "loss": 1.8081, |
| "step": 939 |
| }, |
| { |
| "epoch": 0.4586857514638907, |
| "grad_norm": 0.1878473460674286, |
| "learning_rate": 0.0001, |
| "loss": 1.6855, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.45917371502927784, |
| "grad_norm": 0.18376657366752625, |
| "learning_rate": 0.0001, |
| "loss": 1.6833, |
| "step": 941 |
| }, |
| { |
| "epoch": 0.45966167859466495, |
| "grad_norm": 0.18948735296726227, |
| "learning_rate": 0.0001, |
| "loss": 1.7525, |
| "step": 942 |
| }, |
| { |
| "epoch": 0.46014964216005205, |
| "grad_norm": 0.18738175928592682, |
| "learning_rate": 0.0001, |
| "loss": 1.752, |
| "step": 943 |
| }, |
| { |
| "epoch": 0.46063760572543916, |
| "grad_norm": 0.1765458881855011, |
| "learning_rate": 0.0001, |
| "loss": 1.696, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.46112556929082626, |
| "grad_norm": 0.18650664389133453, |
| "learning_rate": 0.0001, |
| "loss": 1.7409, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.4616135328562134, |
| "grad_norm": 0.1759469360113144, |
| "learning_rate": 0.0001, |
| "loss": 1.6119, |
| "step": 946 |
| }, |
| { |
| "epoch": 0.46210149642160053, |
| "grad_norm": 0.18343883752822876, |
| "learning_rate": 0.0001, |
| "loss": 1.7091, |
| "step": 947 |
| }, |
| { |
| "epoch": 0.46258945998698764, |
| "grad_norm": 0.1964959353208542, |
| "learning_rate": 0.0001, |
| "loss": 1.7388, |
| "step": 948 |
| }, |
| { |
| "epoch": 0.46307742355237475, |
| "grad_norm": 0.18265226483345032, |
| "learning_rate": 0.0001, |
| "loss": 1.7036, |
| "step": 949 |
| }, |
| { |
| "epoch": 0.46356538711776185, |
| "grad_norm": 0.18132254481315613, |
| "learning_rate": 0.0001, |
| "loss": 1.688, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.464053350683149, |
| "grad_norm": 0.18742497265338898, |
| "learning_rate": 0.0001, |
| "loss": 1.6212, |
| "step": 951 |
| }, |
| { |
| "epoch": 0.4645413142485361, |
| "grad_norm": 0.1776818335056305, |
| "learning_rate": 0.0001, |
| "loss": 1.5739, |
| "step": 952 |
| }, |
| { |
| "epoch": 0.4650292778139232, |
| "grad_norm": 0.193990558385849, |
| "learning_rate": 0.0001, |
| "loss": 1.6852, |
| "step": 953 |
| }, |
| { |
| "epoch": 0.46551724137931033, |
| "grad_norm": 0.1853352040052414, |
| "learning_rate": 0.0001, |
| "loss": 1.6057, |
| "step": 954 |
| }, |
| { |
| "epoch": 0.46600520494469744, |
| "grad_norm": 0.2000368982553482, |
| "learning_rate": 0.0001, |
| "loss": 1.7329, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.4664931685100846, |
| "grad_norm": 0.20909981429576874, |
| "learning_rate": 0.0001, |
| "loss": 1.687, |
| "step": 956 |
| }, |
| { |
| "epoch": 0.4669811320754717, |
| "grad_norm": 0.21065653860569, |
| "learning_rate": 0.0001, |
| "loss": 1.7239, |
| "step": 957 |
| }, |
| { |
| "epoch": 0.4674690956408588, |
| "grad_norm": 0.1819789707660675, |
| "learning_rate": 0.0001, |
| "loss": 1.7258, |
| "step": 958 |
| }, |
| { |
| "epoch": 0.4679570592062459, |
| "grad_norm": 0.20444951951503754, |
| "learning_rate": 0.0001, |
| "loss": 1.679, |
| "step": 959 |
| }, |
| { |
| "epoch": 0.468445022771633, |
| "grad_norm": 0.19722609221935272, |
| "learning_rate": 0.0001, |
| "loss": 1.6114, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.4689329863370202, |
| "grad_norm": 0.18290160596370697, |
| "learning_rate": 0.0001, |
| "loss": 1.7676, |
| "step": 961 |
| }, |
| { |
| "epoch": 0.4694209499024073, |
| "grad_norm": 0.20910906791687012, |
| "learning_rate": 0.0001, |
| "loss": 1.6688, |
| "step": 962 |
| }, |
| { |
| "epoch": 0.4699089134677944, |
| "grad_norm": 0.2053229659795761, |
| "learning_rate": 0.0001, |
| "loss": 1.7208, |
| "step": 963 |
| }, |
| { |
| "epoch": 0.4703968770331815, |
| "grad_norm": 0.18317236006259918, |
| "learning_rate": 0.0001, |
| "loss": 1.6808, |
| "step": 964 |
| }, |
| { |
| "epoch": 0.47088484059856867, |
| "grad_norm": 0.20331262052059174, |
| "learning_rate": 0.0001, |
| "loss": 1.7621, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.47137280416395577, |
| "grad_norm": 0.194210484623909, |
| "learning_rate": 0.0001, |
| "loss": 1.7045, |
| "step": 966 |
| }, |
| { |
| "epoch": 0.4718607677293429, |
| "grad_norm": 0.18274177610874176, |
| "learning_rate": 0.0001, |
| "loss": 1.7462, |
| "step": 967 |
| }, |
| { |
| "epoch": 0.47234873129473, |
| "grad_norm": 0.211595356464386, |
| "learning_rate": 0.0001, |
| "loss": 1.7322, |
| "step": 968 |
| }, |
| { |
| "epoch": 0.4728366948601171, |
| "grad_norm": 0.1885220855474472, |
| "learning_rate": 0.0001, |
| "loss": 1.6825, |
| "step": 969 |
| }, |
| { |
| "epoch": 0.47332465842550425, |
| "grad_norm": 0.17875580489635468, |
| "learning_rate": 0.0001, |
| "loss": 1.6192, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.47381262199089136, |
| "grad_norm": 0.1805390864610672, |
| "learning_rate": 0.0001, |
| "loss": 1.6668, |
| "step": 971 |
| }, |
| { |
| "epoch": 0.47430058555627846, |
| "grad_norm": 0.19222760200500488, |
| "learning_rate": 0.0001, |
| "loss": 1.7478, |
| "step": 972 |
| }, |
| { |
| "epoch": 0.47478854912166557, |
| "grad_norm": 0.18637999892234802, |
| "learning_rate": 0.0001, |
| "loss": 1.7773, |
| "step": 973 |
| }, |
| { |
| "epoch": 0.4752765126870527, |
| "grad_norm": 0.18341195583343506, |
| "learning_rate": 0.0001, |
| "loss": 1.7021, |
| "step": 974 |
| }, |
| { |
| "epoch": 0.47576447625243984, |
| "grad_norm": 0.17885076999664307, |
| "learning_rate": 0.0001, |
| "loss": 1.6424, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.47625243981782694, |
| "grad_norm": 0.1952183097600937, |
| "learning_rate": 0.0001, |
| "loss": 1.9142, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.47674040338321405, |
| "grad_norm": 0.18243496119976044, |
| "learning_rate": 0.0001, |
| "loss": 1.6983, |
| "step": 977 |
| }, |
| { |
| "epoch": 0.47722836694860116, |
| "grad_norm": 0.18224705755710602, |
| "learning_rate": 0.0001, |
| "loss": 1.5847, |
| "step": 978 |
| }, |
| { |
| "epoch": 0.47771633051398826, |
| "grad_norm": 0.25170522928237915, |
| "learning_rate": 0.0001, |
| "loss": 1.9113, |
| "step": 979 |
| }, |
| { |
| "epoch": 0.4782042940793754, |
| "grad_norm": 0.18615500628948212, |
| "learning_rate": 0.0001, |
| "loss": 1.7893, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.47869225764476253, |
| "grad_norm": 0.18177960813045502, |
| "learning_rate": 0.0001, |
| "loss": 1.753, |
| "step": 981 |
| }, |
| { |
| "epoch": 0.47918022121014964, |
| "grad_norm": 0.17566373944282532, |
| "learning_rate": 0.0001, |
| "loss": 1.749, |
| "step": 982 |
| }, |
| { |
| "epoch": 0.47966818477553674, |
| "grad_norm": 0.18363641202449799, |
| "learning_rate": 0.0001, |
| "loss": 1.7202, |
| "step": 983 |
| }, |
| { |
| "epoch": 0.4801561483409239, |
| "grad_norm": 0.18019676208496094, |
| "learning_rate": 0.0001, |
| "loss": 1.7756, |
| "step": 984 |
| }, |
| { |
| "epoch": 0.480644111906311, |
| "grad_norm": 0.18838275969028473, |
| "learning_rate": 0.0001, |
| "loss": 1.6533, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.4811320754716981, |
| "grad_norm": 0.17840002477169037, |
| "learning_rate": 0.0001, |
| "loss": 1.6495, |
| "step": 986 |
| }, |
| { |
| "epoch": 0.4816200390370852, |
| "grad_norm": 0.18629398941993713, |
| "learning_rate": 0.0001, |
| "loss": 1.746, |
| "step": 987 |
| }, |
| { |
| "epoch": 0.48210800260247233, |
| "grad_norm": 0.19068728387355804, |
| "learning_rate": 0.0001, |
| "loss": 1.7956, |
| "step": 988 |
| }, |
| { |
| "epoch": 0.4825959661678595, |
| "grad_norm": 0.17752403020858765, |
| "learning_rate": 0.0001, |
| "loss": 1.6085, |
| "step": 989 |
| }, |
| { |
| "epoch": 0.4830839297332466, |
| "grad_norm": 0.17869940400123596, |
| "learning_rate": 0.0001, |
| "loss": 1.687, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.4835718932986337, |
| "grad_norm": 0.19462576508522034, |
| "learning_rate": 0.0001, |
| "loss": 1.766, |
| "step": 991 |
| }, |
| { |
| "epoch": 0.4840598568640208, |
| "grad_norm": 0.17635509371757507, |
| "learning_rate": 0.0001, |
| "loss": 1.6512, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.4845478204294079, |
| "grad_norm": 0.18457075953483582, |
| "learning_rate": 0.0001, |
| "loss": 1.6829, |
| "step": 993 |
| }, |
| { |
| "epoch": 0.4850357839947951, |
| "grad_norm": 0.19008415937423706, |
| "learning_rate": 0.0001, |
| "loss": 1.8335, |
| "step": 994 |
| }, |
| { |
| "epoch": 0.4855237475601822, |
| "grad_norm": 0.1748104840517044, |
| "learning_rate": 0.0001, |
| "loss": 1.6822, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.4860117111255693, |
| "grad_norm": 0.18871375918388367, |
| "learning_rate": 0.0001, |
| "loss": 1.7749, |
| "step": 996 |
| }, |
| { |
| "epoch": 0.4864996746909564, |
| "grad_norm": 0.19204716384410858, |
| "learning_rate": 0.0001, |
| "loss": 1.7027, |
| "step": 997 |
| }, |
| { |
| "epoch": 0.4869876382563435, |
| "grad_norm": 0.17363031208515167, |
| "learning_rate": 0.0001, |
| "loss": 1.6329, |
| "step": 998 |
| }, |
| { |
| "epoch": 0.48747560182173066, |
| "grad_norm": 0.18046556413173676, |
| "learning_rate": 0.0001, |
| "loss": 1.6251, |
| "step": 999 |
| }, |
| { |
| "epoch": 0.48796356538711777, |
| "grad_norm": 0.18280474841594696, |
| "learning_rate": 0.0001, |
| "loss": 1.7468, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.4884515289525049, |
| "grad_norm": 0.1856307089328766, |
| "learning_rate": 0.0001, |
| "loss": 1.8059, |
| "step": 1001 |
| }, |
| { |
| "epoch": 0.488939492517892, |
| "grad_norm": 0.18734587728977203, |
| "learning_rate": 0.0001, |
| "loss": 1.7482, |
| "step": 1002 |
| }, |
| { |
| "epoch": 0.4894274560832791, |
| "grad_norm": 0.18201518058776855, |
| "learning_rate": 0.0001, |
| "loss": 1.6618, |
| "step": 1003 |
| }, |
| { |
| "epoch": 0.48991541964866625, |
| "grad_norm": 0.18317224085330963, |
| "learning_rate": 0.0001, |
| "loss": 1.6556, |
| "step": 1004 |
| }, |
| { |
| "epoch": 0.49040338321405336, |
| "grad_norm": 0.18233336508274078, |
| "learning_rate": 0.0001, |
| "loss": 1.7073, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.49089134677944046, |
| "grad_norm": 0.19454477727413177, |
| "learning_rate": 0.0001, |
| "loss": 1.5993, |
| "step": 1006 |
| }, |
| { |
| "epoch": 0.49137931034482757, |
| "grad_norm": 0.1874353140592575, |
| "learning_rate": 0.0001, |
| "loss": 1.6976, |
| "step": 1007 |
| }, |
| { |
| "epoch": 0.49186727391021473, |
| "grad_norm": 0.18378609418869019, |
| "learning_rate": 0.0001, |
| "loss": 1.7292, |
| "step": 1008 |
| }, |
| { |
| "epoch": 0.49235523747560184, |
| "grad_norm": 0.18301472067832947, |
| "learning_rate": 0.0001, |
| "loss": 1.6702, |
| "step": 1009 |
| }, |
| { |
| "epoch": 0.49284320104098894, |
| "grad_norm": 0.18581345677375793, |
| "learning_rate": 0.0001, |
| "loss": 1.769, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.49333116460637605, |
| "grad_norm": 0.18604816496372223, |
| "learning_rate": 0.0001, |
| "loss": 1.7022, |
| "step": 1011 |
| }, |
| { |
| "epoch": 0.49381912817176316, |
| "grad_norm": 0.1670636236667633, |
| "learning_rate": 0.0001, |
| "loss": 1.6245, |
| "step": 1012 |
| }, |
| { |
| "epoch": 0.4943070917371503, |
| "grad_norm": 0.18545298278331757, |
| "learning_rate": 0.0001, |
| "loss": 1.777, |
| "step": 1013 |
| }, |
| { |
| "epoch": 0.4947950553025374, |
| "grad_norm": 0.18108947575092316, |
| "learning_rate": 0.0001, |
| "loss": 1.7066, |
| "step": 1014 |
| }, |
| { |
| "epoch": 0.49528301886792453, |
| "grad_norm": 0.18042118847370148, |
| "learning_rate": 0.0001, |
| "loss": 1.6393, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.49577098243331164, |
| "grad_norm": 0.19193610548973083, |
| "learning_rate": 0.0001, |
| "loss": 1.8438, |
| "step": 1016 |
| }, |
| { |
| "epoch": 0.49625894599869874, |
| "grad_norm": 0.18542861938476562, |
| "learning_rate": 0.0001, |
| "loss": 1.8076, |
| "step": 1017 |
| }, |
| { |
| "epoch": 0.4967469095640859, |
| "grad_norm": 0.17646706104278564, |
| "learning_rate": 0.0001, |
| "loss": 1.4699, |
| "step": 1018 |
| }, |
| { |
| "epoch": 0.497234873129473, |
| "grad_norm": 0.18862095475196838, |
| "learning_rate": 0.0001, |
| "loss": 1.7165, |
| "step": 1019 |
| }, |
| { |
| "epoch": 0.4977228366948601, |
| "grad_norm": 0.18618489801883698, |
| "learning_rate": 0.0001, |
| "loss": 1.7683, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.4982108002602472, |
| "grad_norm": 0.18750105798244476, |
| "learning_rate": 0.0001, |
| "loss": 1.6681, |
| "step": 1021 |
| }, |
| { |
| "epoch": 0.49869876382563433, |
| "grad_norm": 0.1942930370569229, |
| "learning_rate": 0.0001, |
| "loss": 1.6555, |
| "step": 1022 |
| }, |
| { |
| "epoch": 0.4991867273910215, |
| "grad_norm": 0.18165245652198792, |
| "learning_rate": 0.0001, |
| "loss": 1.7059, |
| "step": 1023 |
| }, |
| { |
| "epoch": 0.4996746909564086, |
| "grad_norm": 0.18349111080169678, |
| "learning_rate": 0.0001, |
| "loss": 1.7165, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.5001626545217958, |
| "grad_norm": 0.17459173500537872, |
| "learning_rate": 0.0001, |
| "loss": 1.6784, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.5006506180871828, |
| "grad_norm": 0.19236469268798828, |
| "learning_rate": 0.0001, |
| "loss": 1.6727, |
| "step": 1026 |
| }, |
| { |
| "epoch": 0.50113858165257, |
| "grad_norm": 0.18120145797729492, |
| "learning_rate": 0.0001, |
| "loss": 1.7109, |
| "step": 1027 |
| }, |
| { |
| "epoch": 0.501626545217957, |
| "grad_norm": 0.18319325149059296, |
| "learning_rate": 0.0001, |
| "loss": 1.6353, |
| "step": 1028 |
| }, |
| { |
| "epoch": 0.5021145087833442, |
| "grad_norm": 0.1807912439107895, |
| "learning_rate": 0.0001, |
| "loss": 1.6866, |
| "step": 1029 |
| }, |
| { |
| "epoch": 0.5026024723487313, |
| "grad_norm": 0.1748090237379074, |
| "learning_rate": 0.0001, |
| "loss": 1.6196, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.5030904359141184, |
| "grad_norm": 0.1822468489408493, |
| "learning_rate": 0.0001, |
| "loss": 1.7539, |
| "step": 1031 |
| }, |
| { |
| "epoch": 0.5035783994795056, |
| "grad_norm": 0.18360479176044464, |
| "learning_rate": 0.0001, |
| "loss": 1.6853, |
| "step": 1032 |
| }, |
| { |
| "epoch": 0.5040663630448926, |
| "grad_norm": 0.18836341798305511, |
| "learning_rate": 0.0001, |
| "loss": 1.6796, |
| "step": 1033 |
| }, |
| { |
| "epoch": 0.5045543266102798, |
| "grad_norm": 0.18044047057628632, |
| "learning_rate": 0.0001, |
| "loss": 1.6929, |
| "step": 1034 |
| }, |
| { |
| "epoch": 0.5050422901756669, |
| "grad_norm": 0.18836145102977753, |
| "learning_rate": 0.0001, |
| "loss": 1.8204, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.505530253741054, |
| "grad_norm": 0.1829444319009781, |
| "learning_rate": 0.0001, |
| "loss": 1.7364, |
| "step": 1036 |
| }, |
| { |
| "epoch": 0.5060182173064411, |
| "grad_norm": 0.1847165822982788, |
| "learning_rate": 0.0001, |
| "loss": 1.6792, |
| "step": 1037 |
| }, |
| { |
| "epoch": 0.5065061808718282, |
| "grad_norm": 0.17972713708877563, |
| "learning_rate": 0.0001, |
| "loss": 1.5694, |
| "step": 1038 |
| }, |
| { |
| "epoch": 0.5069941444372154, |
| "grad_norm": 0.1910099983215332, |
| "learning_rate": 0.0001, |
| "loss": 1.6189, |
| "step": 1039 |
| }, |
| { |
| "epoch": 0.5074821080026025, |
| "grad_norm": 0.18901146948337555, |
| "learning_rate": 0.0001, |
| "loss": 1.7515, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.5079700715679896, |
| "grad_norm": 0.18210864067077637, |
| "learning_rate": 0.0001, |
| "loss": 1.729, |
| "step": 1041 |
| }, |
| { |
| "epoch": 0.5084580351333767, |
| "grad_norm": 0.18417298793792725, |
| "learning_rate": 0.0001, |
| "loss": 1.7392, |
| "step": 1042 |
| }, |
| { |
| "epoch": 0.5089459986987638, |
| "grad_norm": 0.18548882007598877, |
| "learning_rate": 0.0001, |
| "loss": 1.7452, |
| "step": 1043 |
| }, |
| { |
| "epoch": 0.5094339622641509, |
| "grad_norm": 0.17644409835338593, |
| "learning_rate": 0.0001, |
| "loss": 1.5658, |
| "step": 1044 |
| }, |
| { |
| "epoch": 0.5099219258295381, |
| "grad_norm": 0.18809697031974792, |
| "learning_rate": 0.0001, |
| "loss": 1.6806, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.5104098893949252, |
| "grad_norm": 0.18309113383293152, |
| "learning_rate": 0.0001, |
| "loss": 1.7068, |
| "step": 1046 |
| }, |
| { |
| "epoch": 0.5108978529603123, |
| "grad_norm": 0.1873452365398407, |
| "learning_rate": 0.0001, |
| "loss": 1.7401, |
| "step": 1047 |
| }, |
| { |
| "epoch": 0.5113858165256994, |
| "grad_norm": 0.18118296563625336, |
| "learning_rate": 0.0001, |
| "loss": 1.6853, |
| "step": 1048 |
| }, |
| { |
| "epoch": 0.5118737800910865, |
| "grad_norm": 0.19551081955432892, |
| "learning_rate": 0.0001, |
| "loss": 1.6851, |
| "step": 1049 |
| }, |
| { |
| "epoch": 0.5123617436564737, |
| "grad_norm": 0.19051168859004974, |
| "learning_rate": 0.0001, |
| "loss": 1.7153, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.5128497072218607, |
| "grad_norm": 0.1723107546567917, |
| "learning_rate": 0.0001, |
| "loss": 1.6446, |
| "step": 1051 |
| }, |
| { |
| "epoch": 0.5133376707872479, |
| "grad_norm": 0.18448057770729065, |
| "learning_rate": 0.0001, |
| "loss": 1.6798, |
| "step": 1052 |
| }, |
| { |
| "epoch": 0.513825634352635, |
| "grad_norm": 0.1888912320137024, |
| "learning_rate": 0.0001, |
| "loss": 1.7696, |
| "step": 1053 |
| }, |
| { |
| "epoch": 0.5143135979180221, |
| "grad_norm": 0.19481922686100006, |
| "learning_rate": 0.0001, |
| "loss": 1.6657, |
| "step": 1054 |
| }, |
| { |
| "epoch": 0.5148015614834093, |
| "grad_norm": 0.17614057660102844, |
| "learning_rate": 0.0001, |
| "loss": 1.6758, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.5152895250487963, |
| "grad_norm": 0.1752062737941742, |
| "learning_rate": 0.0001, |
| "loss": 1.644, |
| "step": 1056 |
| }, |
| { |
| "epoch": 0.5157774886141835, |
| "grad_norm": 0.1882951855659485, |
| "learning_rate": 0.0001, |
| "loss": 1.6644, |
| "step": 1057 |
| }, |
| { |
| "epoch": 0.5162654521795705, |
| "grad_norm": 0.20255088806152344, |
| "learning_rate": 0.0001, |
| "loss": 1.7119, |
| "step": 1058 |
| }, |
| { |
| "epoch": 0.5167534157449577, |
| "grad_norm": 0.181501105427742, |
| "learning_rate": 0.0001, |
| "loss": 1.662, |
| "step": 1059 |
| }, |
| { |
| "epoch": 0.5172413793103449, |
| "grad_norm": 0.1865651160478592, |
| "learning_rate": 0.0001, |
| "loss": 1.7279, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.5177293428757319, |
| "grad_norm": 0.1911836862564087, |
| "learning_rate": 0.0001, |
| "loss": 1.6795, |
| "step": 1061 |
| }, |
| { |
| "epoch": 0.5182173064411191, |
| "grad_norm": 0.18534213304519653, |
| "learning_rate": 0.0001, |
| "loss": 1.7126, |
| "step": 1062 |
| }, |
| { |
| "epoch": 0.5187052700065062, |
| "grad_norm": 0.1829744428396225, |
| "learning_rate": 0.0001, |
| "loss": 1.6598, |
| "step": 1063 |
| }, |
| { |
| "epoch": 0.5191932335718933, |
| "grad_norm": 0.17899416387081146, |
| "learning_rate": 0.0001, |
| "loss": 1.6293, |
| "step": 1064 |
| }, |
| { |
| "epoch": 0.5196811971372804, |
| "grad_norm": 0.17233431339263916, |
| "learning_rate": 0.0001, |
| "loss": 1.6195, |
| "step": 1065 |
| }, |
| { |
| "epoch": 0.5201691607026675, |
| "grad_norm": 0.1891251802444458, |
| "learning_rate": 0.0001, |
| "loss": 1.72, |
| "step": 1066 |
| }, |
| { |
| "epoch": 0.5206571242680547, |
| "grad_norm": 0.19288107752799988, |
| "learning_rate": 0.0001, |
| "loss": 1.8331, |
| "step": 1067 |
| }, |
| { |
| "epoch": 0.5211450878334418, |
| "grad_norm": 0.18534426391124725, |
| "learning_rate": 0.0001, |
| "loss": 1.6229, |
| "step": 1068 |
| }, |
| { |
| "epoch": 0.5216330513988289, |
| "grad_norm": 0.19013041257858276, |
| "learning_rate": 0.0001, |
| "loss": 1.7331, |
| "step": 1069 |
| }, |
| { |
| "epoch": 0.522121014964216, |
| "grad_norm": 0.18765857815742493, |
| "learning_rate": 0.0001, |
| "loss": 1.6951, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.5226089785296031, |
| "grad_norm": 0.17150448262691498, |
| "learning_rate": 0.0001, |
| "loss": 1.6581, |
| "step": 1071 |
| }, |
| { |
| "epoch": 0.5230969420949902, |
| "grad_norm": 0.20504555106163025, |
| "learning_rate": 0.0001, |
| "loss": 1.7247, |
| "step": 1072 |
| }, |
| { |
| "epoch": 0.5235849056603774, |
| "grad_norm": 0.17816084623336792, |
| "learning_rate": 0.0001, |
| "loss": 1.5654, |
| "step": 1073 |
| }, |
| { |
| "epoch": 0.5240728692257645, |
| "grad_norm": 0.1842648684978485, |
| "learning_rate": 0.0001, |
| "loss": 1.601, |
| "step": 1074 |
| }, |
| { |
| "epoch": 0.5245608327911516, |
| "grad_norm": 0.18370290100574493, |
| "learning_rate": 0.0001, |
| "loss": 1.6369, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.5250487963565387, |
| "grad_norm": 0.18270552158355713, |
| "learning_rate": 0.0001, |
| "loss": 1.6541, |
| "step": 1076 |
| }, |
| { |
| "epoch": 0.5255367599219258, |
| "grad_norm": 0.1808508038520813, |
| "learning_rate": 0.0001, |
| "loss": 1.6598, |
| "step": 1077 |
| }, |
| { |
| "epoch": 0.526024723487313, |
| "grad_norm": 0.17794300615787506, |
| "learning_rate": 0.0001, |
| "loss": 1.7294, |
| "step": 1078 |
| }, |
| { |
| "epoch": 0.5265126870527, |
| "grad_norm": 0.18382461369037628, |
| "learning_rate": 0.0001, |
| "loss": 1.6901, |
| "step": 1079 |
| }, |
| { |
| "epoch": 0.5270006506180872, |
| "grad_norm": 0.1806422621011734, |
| "learning_rate": 0.0001, |
| "loss": 1.6193, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.5274886141834743, |
| "grad_norm": 0.18108539283275604, |
| "learning_rate": 0.0001, |
| "loss": 1.6911, |
| "step": 1081 |
| }, |
| { |
| "epoch": 0.5279765777488614, |
| "grad_norm": 0.18681305646896362, |
| "learning_rate": 0.0001, |
| "loss": 1.726, |
| "step": 1082 |
| }, |
| { |
| "epoch": 0.5284645413142486, |
| "grad_norm": 0.18909889459609985, |
| "learning_rate": 0.0001, |
| "loss": 1.6857, |
| "step": 1083 |
| }, |
| { |
| "epoch": 0.5289525048796356, |
| "grad_norm": 0.18421509861946106, |
| "learning_rate": 0.0001, |
| "loss": 1.6564, |
| "step": 1084 |
| }, |
| { |
| "epoch": 0.5294404684450228, |
| "grad_norm": 0.18811306357383728, |
| "learning_rate": 0.0001, |
| "loss": 1.7817, |
| "step": 1085 |
| }, |
| { |
| "epoch": 0.5299284320104098, |
| "grad_norm": 0.17478449642658234, |
| "learning_rate": 0.0001, |
| "loss": 1.681, |
| "step": 1086 |
| }, |
| { |
| "epoch": 0.530416395575797, |
| "grad_norm": 0.1789132058620453, |
| "learning_rate": 0.0001, |
| "loss": 1.6906, |
| "step": 1087 |
| }, |
| { |
| "epoch": 0.5309043591411842, |
| "grad_norm": 0.18358959257602692, |
| "learning_rate": 0.0001, |
| "loss": 1.6347, |
| "step": 1088 |
| }, |
| { |
| "epoch": 0.5313923227065712, |
| "grad_norm": 0.18565410375595093, |
| "learning_rate": 0.0001, |
| "loss": 1.7078, |
| "step": 1089 |
| }, |
| { |
| "epoch": 0.5318802862719584, |
| "grad_norm": 0.19210746884346008, |
| "learning_rate": 0.0001, |
| "loss": 1.6195, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.5323682498373454, |
| "grad_norm": 0.18205370008945465, |
| "learning_rate": 0.0001, |
| "loss": 1.6541, |
| "step": 1091 |
| }, |
| { |
| "epoch": 0.5328562134027326, |
| "grad_norm": 0.19181987643241882, |
| "learning_rate": 0.0001, |
| "loss": 1.8033, |
| "step": 1092 |
| }, |
| { |
| "epoch": 0.5333441769681198, |
| "grad_norm": 0.20362940430641174, |
| "learning_rate": 0.0001, |
| "loss": 1.7497, |
| "step": 1093 |
| }, |
| { |
| "epoch": 0.5338321405335068, |
| "grad_norm": 0.1858234405517578, |
| "learning_rate": 0.0001, |
| "loss": 1.6342, |
| "step": 1094 |
| }, |
| { |
| "epoch": 0.534320104098894, |
| "grad_norm": 0.19925346970558167, |
| "learning_rate": 0.0001, |
| "loss": 1.686, |
| "step": 1095 |
| }, |
| { |
| "epoch": 0.534808067664281, |
| "grad_norm": 0.19114282727241516, |
| "learning_rate": 0.0001, |
| "loss": 1.7186, |
| "step": 1096 |
| }, |
| { |
| "epoch": 0.5352960312296682, |
| "grad_norm": 0.1771971732378006, |
| "learning_rate": 0.0001, |
| "loss": 1.776, |
| "step": 1097 |
| }, |
| { |
| "epoch": 0.5357839947950553, |
| "grad_norm": 0.18942809104919434, |
| "learning_rate": 0.0001, |
| "loss": 1.7179, |
| "step": 1098 |
| }, |
| { |
| "epoch": 0.5362719583604424, |
| "grad_norm": 0.1868084967136383, |
| "learning_rate": 0.0001, |
| "loss": 1.6454, |
| "step": 1099 |
| }, |
| { |
| "epoch": 0.5367599219258296, |
| "grad_norm": 0.18689820170402527, |
| "learning_rate": 0.0001, |
| "loss": 1.6196, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.5372478854912166, |
| "grad_norm": 0.1820572018623352, |
| "learning_rate": 0.0001, |
| "loss": 1.6673, |
| "step": 1101 |
| }, |
| { |
| "epoch": 0.5377358490566038, |
| "grad_norm": 0.17870689928531647, |
| "learning_rate": 0.0001, |
| "loss": 1.5968, |
| "step": 1102 |
| }, |
| { |
| "epoch": 0.5382238126219909, |
| "grad_norm": 0.18118569254875183, |
| "learning_rate": 0.0001, |
| "loss": 1.7227, |
| "step": 1103 |
| }, |
| { |
| "epoch": 0.538711776187378, |
| "grad_norm": 0.1880924552679062, |
| "learning_rate": 0.0001, |
| "loss": 1.6108, |
| "step": 1104 |
| }, |
| { |
| "epoch": 0.5391997397527651, |
| "grad_norm": 0.18598206341266632, |
| "learning_rate": 0.0001, |
| "loss": 1.6542, |
| "step": 1105 |
| }, |
| { |
| "epoch": 0.5396877033181522, |
| "grad_norm": 0.1872934103012085, |
| "learning_rate": 0.0001, |
| "loss": 1.737, |
| "step": 1106 |
| }, |
| { |
| "epoch": 0.5401756668835394, |
| "grad_norm": 0.1890784651041031, |
| "learning_rate": 0.0001, |
| "loss": 1.6661, |
| "step": 1107 |
| }, |
| { |
| "epoch": 0.5406636304489265, |
| "grad_norm": 0.18039381504058838, |
| "learning_rate": 0.0001, |
| "loss": 1.6276, |
| "step": 1108 |
| }, |
| { |
| "epoch": 0.5411515940143136, |
| "grad_norm": 0.18550348281860352, |
| "learning_rate": 0.0001, |
| "loss": 1.6828, |
| "step": 1109 |
| }, |
| { |
| "epoch": 0.5416395575797007, |
| "grad_norm": 0.17449964582920074, |
| "learning_rate": 0.0001, |
| "loss": 1.5034, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.5421275211450879, |
| "grad_norm": 0.18202394247055054, |
| "learning_rate": 0.0001, |
| "loss": 1.6561, |
| "step": 1111 |
| }, |
| { |
| "epoch": 0.5426154847104749, |
| "grad_norm": 0.19365155696868896, |
| "learning_rate": 0.0001, |
| "loss": 1.5, |
| "step": 1112 |
| }, |
| { |
| "epoch": 0.5431034482758621, |
| "grad_norm": 0.17744717001914978, |
| "learning_rate": 0.0001, |
| "loss": 1.5921, |
| "step": 1113 |
| }, |
| { |
| "epoch": 0.5435914118412492, |
| "grad_norm": 0.17965885996818542, |
| "learning_rate": 0.0001, |
| "loss": 1.6819, |
| "step": 1114 |
| }, |
| { |
| "epoch": 0.5440793754066363, |
| "grad_norm": 0.17675574123859406, |
| "learning_rate": 0.0001, |
| "loss": 1.6471, |
| "step": 1115 |
| }, |
| { |
| "epoch": 0.5445673389720235, |
| "grad_norm": 0.17376431822776794, |
| "learning_rate": 0.0001, |
| "loss": 1.7007, |
| "step": 1116 |
| }, |
| { |
| "epoch": 0.5450553025374105, |
| "grad_norm": 0.18188650906085968, |
| "learning_rate": 0.0001, |
| "loss": 1.774, |
| "step": 1117 |
| }, |
| { |
| "epoch": 0.5455432661027977, |
| "grad_norm": 0.17877081036567688, |
| "learning_rate": 0.0001, |
| "loss": 1.6535, |
| "step": 1118 |
| }, |
| { |
| "epoch": 0.5460312296681847, |
| "grad_norm": 0.17933769524097443, |
| "learning_rate": 0.0001, |
| "loss": 1.7362, |
| "step": 1119 |
| }, |
| { |
| "epoch": 0.5465191932335719, |
| "grad_norm": 0.1805192083120346, |
| "learning_rate": 0.0001, |
| "loss": 1.7321, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.5470071567989591, |
| "grad_norm": 0.17312046885490417, |
| "learning_rate": 0.0001, |
| "loss": 1.6415, |
| "step": 1121 |
| }, |
| { |
| "epoch": 0.5474951203643461, |
| "grad_norm": 0.18119437992572784, |
| "learning_rate": 0.0001, |
| "loss": 1.7104, |
| "step": 1122 |
| }, |
| { |
| "epoch": 0.5479830839297333, |
| "grad_norm": 0.182356595993042, |
| "learning_rate": 0.0001, |
| "loss": 1.6866, |
| "step": 1123 |
| }, |
| { |
| "epoch": 0.5484710474951203, |
| "grad_norm": 0.1846156120300293, |
| "learning_rate": 0.0001, |
| "loss": 1.6612, |
| "step": 1124 |
| }, |
| { |
| "epoch": 0.5489590110605075, |
| "grad_norm": 0.17960377037525177, |
| "learning_rate": 0.0001, |
| "loss": 1.6848, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.5494469746258946, |
| "grad_norm": 0.17133495211601257, |
| "learning_rate": 0.0001, |
| "loss": 1.5885, |
| "step": 1126 |
| }, |
| { |
| "epoch": 0.5499349381912817, |
| "grad_norm": 0.18075834214687347, |
| "learning_rate": 0.0001, |
| "loss": 1.7428, |
| "step": 1127 |
| }, |
| { |
| "epoch": 0.5504229017566689, |
| "grad_norm": 0.18319405615329742, |
| "learning_rate": 0.0001, |
| "loss": 1.5856, |
| "step": 1128 |
| }, |
| { |
| "epoch": 0.5509108653220559, |
| "grad_norm": 0.17644239962100983, |
| "learning_rate": 0.0001, |
| "loss": 1.6198, |
| "step": 1129 |
| }, |
| { |
| "epoch": 0.5513988288874431, |
| "grad_norm": 0.18394580483436584, |
| "learning_rate": 0.0001, |
| "loss": 1.6435, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.5518867924528302, |
| "grad_norm": 0.1763201355934143, |
| "learning_rate": 0.0001, |
| "loss": 1.6975, |
| "step": 1131 |
| }, |
| { |
| "epoch": 0.5523747560182173, |
| "grad_norm": 0.16742850840091705, |
| "learning_rate": 0.0001, |
| "loss": 1.5377, |
| "step": 1132 |
| }, |
| { |
| "epoch": 0.5528627195836044, |
| "grad_norm": 0.1892685890197754, |
| "learning_rate": 0.0001, |
| "loss": 1.6111, |
| "step": 1133 |
| }, |
| { |
| "epoch": 0.5533506831489915, |
| "grad_norm": 0.18346691131591797, |
| "learning_rate": 0.0001, |
| "loss": 1.6844, |
| "step": 1134 |
| }, |
| { |
| "epoch": 0.5538386467143787, |
| "grad_norm": 0.1796543449163437, |
| "learning_rate": 0.0001, |
| "loss": 1.7746, |
| "step": 1135 |
| }, |
| { |
| "epoch": 0.5543266102797658, |
| "grad_norm": 0.18673722445964813, |
| "learning_rate": 0.0001, |
| "loss": 1.6464, |
| "step": 1136 |
| }, |
| { |
| "epoch": 0.5548145738451529, |
| "grad_norm": 0.17763900756835938, |
| "learning_rate": 0.0001, |
| "loss": 1.6237, |
| "step": 1137 |
| }, |
| { |
| "epoch": 0.55530253741054, |
| "grad_norm": 0.17686204612255096, |
| "learning_rate": 0.0001, |
| "loss": 1.5131, |
| "step": 1138 |
| }, |
| { |
| "epoch": 0.5557905009759271, |
| "grad_norm": 0.18360872566699982, |
| "learning_rate": 0.0001, |
| "loss": 1.6699, |
| "step": 1139 |
| }, |
| { |
| "epoch": 0.5562784645413142, |
| "grad_norm": 0.1827259063720703, |
| "learning_rate": 0.0001, |
| "loss": 1.746, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.5567664281067014, |
| "grad_norm": 0.17962484061717987, |
| "learning_rate": 0.0001, |
| "loss": 1.6284, |
| "step": 1141 |
| }, |
| { |
| "epoch": 0.5572543916720885, |
| "grad_norm": 0.18114878237247467, |
| "learning_rate": 0.0001, |
| "loss": 1.6737, |
| "step": 1142 |
| }, |
| { |
| "epoch": 0.5577423552374756, |
| "grad_norm": 0.18968282639980316, |
| "learning_rate": 0.0001, |
| "loss": 1.7798, |
| "step": 1143 |
| }, |
| { |
| "epoch": 0.5582303188028627, |
| "grad_norm": 0.18505877256393433, |
| "learning_rate": 0.0001, |
| "loss": 1.708, |
| "step": 1144 |
| }, |
| { |
| "epoch": 0.5587182823682498, |
| "grad_norm": 0.1776040643453598, |
| "learning_rate": 0.0001, |
| "loss": 1.7424, |
| "step": 1145 |
| }, |
| { |
| "epoch": 0.559206245933637, |
| "grad_norm": 0.17982693016529083, |
| "learning_rate": 0.0001, |
| "loss": 1.6197, |
| "step": 1146 |
| }, |
| { |
| "epoch": 0.559694209499024, |
| "grad_norm": 0.19187504053115845, |
| "learning_rate": 0.0001, |
| "loss": 1.7451, |
| "step": 1147 |
| }, |
| { |
| "epoch": 0.5601821730644112, |
| "grad_norm": 0.17975229024887085, |
| "learning_rate": 0.0001, |
| "loss": 1.6236, |
| "step": 1148 |
| }, |
| { |
| "epoch": 0.5606701366297983, |
| "grad_norm": 0.18996664881706238, |
| "learning_rate": 0.0001, |
| "loss": 1.7377, |
| "step": 1149 |
| }, |
| { |
| "epoch": 0.5611581001951854, |
| "grad_norm": 0.18252383172512054, |
| "learning_rate": 0.0001, |
| "loss": 1.628, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.5616460637605726, |
| "grad_norm": 0.18448345363140106, |
| "learning_rate": 0.0001, |
| "loss": 1.7109, |
| "step": 1151 |
| }, |
| { |
| "epoch": 0.5621340273259596, |
| "grad_norm": 0.17741243541240692, |
| "learning_rate": 0.0001, |
| "loss": 1.6088, |
| "step": 1152 |
| }, |
| { |
| "epoch": 0.5626219908913468, |
| "grad_norm": 0.19825778901576996, |
| "learning_rate": 0.0001, |
| "loss": 1.5972, |
| "step": 1153 |
| }, |
| { |
| "epoch": 0.563109954456734, |
| "grad_norm": 0.18595324456691742, |
| "learning_rate": 0.0001, |
| "loss": 1.672, |
| "step": 1154 |
| }, |
| { |
| "epoch": 0.563597918022121, |
| "grad_norm": 0.18176652491092682, |
| "learning_rate": 0.0001, |
| "loss": 1.6216, |
| "step": 1155 |
| }, |
| { |
| "epoch": 0.5640858815875082, |
| "grad_norm": 0.1950223743915558, |
| "learning_rate": 0.0001, |
| "loss": 1.705, |
| "step": 1156 |
| }, |
| { |
| "epoch": 0.5645738451528952, |
| "grad_norm": 0.1990990787744522, |
| "learning_rate": 0.0001, |
| "loss": 1.7031, |
| "step": 1157 |
| }, |
| { |
| "epoch": 0.5650618087182824, |
| "grad_norm": 0.1937246173620224, |
| "learning_rate": 0.0001, |
| "loss": 1.6838, |
| "step": 1158 |
| }, |
| { |
| "epoch": 0.5655497722836695, |
| "grad_norm": 0.1884077787399292, |
| "learning_rate": 0.0001, |
| "loss": 1.5994, |
| "step": 1159 |
| }, |
| { |
| "epoch": 0.5660377358490566, |
| "grad_norm": 0.19293847680091858, |
| "learning_rate": 0.0001, |
| "loss": 1.7657, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.5665256994144438, |
| "grad_norm": 0.18362392485141754, |
| "learning_rate": 0.0001, |
| "loss": 1.7443, |
| "step": 1161 |
| }, |
| { |
| "epoch": 0.5670136629798308, |
| "grad_norm": 0.17800559103488922, |
| "learning_rate": 0.0001, |
| "loss": 1.6433, |
| "step": 1162 |
| }, |
| { |
| "epoch": 0.567501626545218, |
| "grad_norm": 0.1774267852306366, |
| "learning_rate": 0.0001, |
| "loss": 1.6468, |
| "step": 1163 |
| }, |
| { |
| "epoch": 0.5679895901106051, |
| "grad_norm": 0.18834517896175385, |
| "learning_rate": 0.0001, |
| "loss": 1.7715, |
| "step": 1164 |
| }, |
| { |
| "epoch": 0.5684775536759922, |
| "grad_norm": 0.1841384768486023, |
| "learning_rate": 0.0001, |
| "loss": 1.7604, |
| "step": 1165 |
| }, |
| { |
| "epoch": 0.5689655172413793, |
| "grad_norm": 0.18285635113716125, |
| "learning_rate": 0.0001, |
| "loss": 1.6941, |
| "step": 1166 |
| }, |
| { |
| "epoch": 0.5694534808067664, |
| "grad_norm": 0.1796160191297531, |
| "learning_rate": 0.0001, |
| "loss": 1.6835, |
| "step": 1167 |
| }, |
| { |
| "epoch": 0.5699414443721535, |
| "grad_norm": 0.18359331786632538, |
| "learning_rate": 0.0001, |
| "loss": 1.6658, |
| "step": 1168 |
| }, |
| { |
| "epoch": 0.5704294079375407, |
| "grad_norm": 0.17833665013313293, |
| "learning_rate": 0.0001, |
| "loss": 1.6455, |
| "step": 1169 |
| }, |
| { |
| "epoch": 0.5709173715029278, |
| "grad_norm": 0.17929013073444366, |
| "learning_rate": 0.0001, |
| "loss": 1.5912, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.5714053350683149, |
| "grad_norm": 0.18901382386684418, |
| "learning_rate": 0.0001, |
| "loss": 1.7305, |
| "step": 1171 |
| }, |
| { |
| "epoch": 0.571893298633702, |
| "grad_norm": 0.18040084838867188, |
| "learning_rate": 0.0001, |
| "loss": 1.6239, |
| "step": 1172 |
| }, |
| { |
| "epoch": 0.5723812621990891, |
| "grad_norm": 0.1832232028245926, |
| "learning_rate": 0.0001, |
| "loss": 1.6594, |
| "step": 1173 |
| }, |
| { |
| "epoch": 0.5728692257644763, |
| "grad_norm": 0.1900448203086853, |
| "learning_rate": 0.0001, |
| "loss": 1.7176, |
| "step": 1174 |
| }, |
| { |
| "epoch": 0.5733571893298633, |
| "grad_norm": 0.1859886199235916, |
| "learning_rate": 0.0001, |
| "loss": 1.6823, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.5738451528952505, |
| "grad_norm": 0.1816965788602829, |
| "learning_rate": 0.0001, |
| "loss": 1.6936, |
| "step": 1176 |
| }, |
| { |
| "epoch": 0.5743331164606376, |
| "grad_norm": 0.1927751749753952, |
| "learning_rate": 0.0001, |
| "loss": 1.7069, |
| "step": 1177 |
| }, |
| { |
| "epoch": 0.5748210800260247, |
| "grad_norm": 0.20290379226207733, |
| "learning_rate": 0.0001, |
| "loss": 1.7987, |
| "step": 1178 |
| }, |
| { |
| "epoch": 0.5753090435914119, |
| "grad_norm": 0.1756032556295395, |
| "learning_rate": 0.0001, |
| "loss": 1.656, |
| "step": 1179 |
| }, |
| { |
| "epoch": 0.5757970071567989, |
| "grad_norm": 0.19676676392555237, |
| "learning_rate": 0.0001, |
| "loss": 1.8415, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.5762849707221861, |
| "grad_norm": 0.18112622201442719, |
| "learning_rate": 0.0001, |
| "loss": 1.6081, |
| "step": 1181 |
| }, |
| { |
| "epoch": 0.5767729342875731, |
| "grad_norm": 0.20109887421131134, |
| "learning_rate": 0.0001, |
| "loss": 1.7772, |
| "step": 1182 |
| }, |
| { |
| "epoch": 0.5772608978529603, |
| "grad_norm": 0.191656693816185, |
| "learning_rate": 0.0001, |
| "loss": 1.6869, |
| "step": 1183 |
| }, |
| { |
| "epoch": 0.5777488614183475, |
| "grad_norm": 0.17886236310005188, |
| "learning_rate": 0.0001, |
| "loss": 1.5931, |
| "step": 1184 |
| }, |
| { |
| "epoch": 0.5782368249837345, |
| "grad_norm": 0.18148286640644073, |
| "learning_rate": 0.0001, |
| "loss": 1.6056, |
| "step": 1185 |
| }, |
| { |
| "epoch": 0.5787247885491217, |
| "grad_norm": 0.20596817135810852, |
| "learning_rate": 0.0001, |
| "loss": 1.6129, |
| "step": 1186 |
| }, |
| { |
| "epoch": 0.5792127521145087, |
| "grad_norm": 0.17900511622428894, |
| "learning_rate": 0.0001, |
| "loss": 1.6487, |
| "step": 1187 |
| }, |
| { |
| "epoch": 0.5797007156798959, |
| "grad_norm": 0.1893642693758011, |
| "learning_rate": 0.0001, |
| "loss": 1.7566, |
| "step": 1188 |
| }, |
| { |
| "epoch": 0.5801886792452831, |
| "grad_norm": 0.19354504346847534, |
| "learning_rate": 0.0001, |
| "loss": 1.6665, |
| "step": 1189 |
| }, |
| { |
| "epoch": 0.5806766428106701, |
| "grad_norm": 0.18692192435264587, |
| "learning_rate": 0.0001, |
| "loss": 1.7069, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.5811646063760573, |
| "grad_norm": 0.204212948679924, |
| "learning_rate": 0.0001, |
| "loss": 1.7943, |
| "step": 1191 |
| }, |
| { |
| "epoch": 0.5816525699414443, |
| "grad_norm": 0.18666908144950867, |
| "learning_rate": 0.0001, |
| "loss": 1.7031, |
| "step": 1192 |
| }, |
| { |
| "epoch": 0.5821405335068315, |
| "grad_norm": 0.1859620362520218, |
| "learning_rate": 0.0001, |
| "loss": 1.7443, |
| "step": 1193 |
| }, |
| { |
| "epoch": 0.5826284970722186, |
| "grad_norm": 0.1774389147758484, |
| "learning_rate": 0.0001, |
| "loss": 1.6697, |
| "step": 1194 |
| }, |
| { |
| "epoch": 0.5831164606376057, |
| "grad_norm": 0.17645440995693207, |
| "learning_rate": 0.0001, |
| "loss": 1.7566, |
| "step": 1195 |
| }, |
| { |
| "epoch": 0.5836044242029929, |
| "grad_norm": 0.17927305400371552, |
| "learning_rate": 0.0001, |
| "loss": 1.5341, |
| "step": 1196 |
| }, |
| { |
| "epoch": 0.5840923877683799, |
| "grad_norm": 0.19179411232471466, |
| "learning_rate": 0.0001, |
| "loss": 1.706, |
| "step": 1197 |
| }, |
| { |
| "epoch": 0.5845803513337671, |
| "grad_norm": 0.18921273946762085, |
| "learning_rate": 0.0001, |
| "loss": 1.6651, |
| "step": 1198 |
| }, |
| { |
| "epoch": 0.5850683148991542, |
| "grad_norm": 0.20988748967647552, |
| "learning_rate": 0.0001, |
| "loss": 1.8307, |
| "step": 1199 |
| }, |
| { |
| "epoch": 0.5855562784645413, |
| "grad_norm": 0.1767909973859787, |
| "learning_rate": 0.0001, |
| "loss": 1.7116, |
| "step": 1200 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 2049, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 300, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 5.490022511384986e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|