| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.0, | |
| "eval_steps": 500, | |
| "global_step": 814, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.002457002457002457, | |
| "grad_norm": 12.175590846877855, | |
| "learning_rate": 0.0, | |
| "loss": 1.2148, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.004914004914004914, | |
| "grad_norm": 13.112846331209713, | |
| "learning_rate": 5e-08, | |
| "loss": 1.3154, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.007371007371007371, | |
| "grad_norm": 12.768616794783732, | |
| "learning_rate": 1e-07, | |
| "loss": 1.2865, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.009828009828009828, | |
| "grad_norm": 12.570106090577944, | |
| "learning_rate": 1.5e-07, | |
| "loss": 1.2459, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.012285012285012284, | |
| "grad_norm": 12.026844749086528, | |
| "learning_rate": 2e-07, | |
| "loss": 1.2496, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.014742014742014743, | |
| "grad_norm": 12.166619597781125, | |
| "learning_rate": 2.5e-07, | |
| "loss": 1.2179, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.0171990171990172, | |
| "grad_norm": 12.450604071787023, | |
| "learning_rate": 3e-07, | |
| "loss": 1.2451, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.019656019656019656, | |
| "grad_norm": 11.75536378463392, | |
| "learning_rate": 3.5e-07, | |
| "loss": 1.2372, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.022113022113022112, | |
| "grad_norm": 11.7494451205682, | |
| "learning_rate": 4e-07, | |
| "loss": 1.2174, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.02457002457002457, | |
| "grad_norm": 11.485164591676915, | |
| "learning_rate": 4.5e-07, | |
| "loss": 1.1803, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.02702702702702703, | |
| "grad_norm": 11.41671410777608, | |
| "learning_rate": 5e-07, | |
| "loss": 1.1829, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.029484029484029485, | |
| "grad_norm": 10.737969644794076, | |
| "learning_rate": 5.5e-07, | |
| "loss": 1.1912, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.03194103194103194, | |
| "grad_norm": 12.0315769410197, | |
| "learning_rate": 6e-07, | |
| "loss": 1.2516, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.0343980343980344, | |
| "grad_norm": 11.703183966857443, | |
| "learning_rate": 6.5e-07, | |
| "loss": 1.2533, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.036855036855036855, | |
| "grad_norm": 10.75777994461547, | |
| "learning_rate": 7e-07, | |
| "loss": 1.1763, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.03931203931203931, | |
| "grad_norm": 9.517567846059583, | |
| "learning_rate": 7.5e-07, | |
| "loss": 1.1474, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.04176904176904177, | |
| "grad_norm": 9.825919996727293, | |
| "learning_rate": 8e-07, | |
| "loss": 1.1725, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.044226044226044224, | |
| "grad_norm": 9.370499752545927, | |
| "learning_rate": 8.499999999999999e-07, | |
| "loss": 1.2048, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.04668304668304668, | |
| "grad_norm": 8.582608952468464, | |
| "learning_rate": 9e-07, | |
| "loss": 1.1428, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.04914004914004914, | |
| "grad_norm": 7.4140901371154, | |
| "learning_rate": 9.499999999999999e-07, | |
| "loss": 1.1583, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.051597051597051594, | |
| "grad_norm": 6.890519345341131, | |
| "learning_rate": 1e-06, | |
| "loss": 1.1961, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.05405405405405406, | |
| "grad_norm": 5.47854198998431, | |
| "learning_rate": 1.05e-06, | |
| "loss": 1.126, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.056511056511056514, | |
| "grad_norm": 5.350914917259245, | |
| "learning_rate": 1.1e-06, | |
| "loss": 1.1484, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.05896805896805897, | |
| "grad_norm": 4.114918765551221, | |
| "learning_rate": 1.1499999999999998e-06, | |
| "loss": 1.1328, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.06142506142506143, | |
| "grad_norm": 3.535376327350945, | |
| "learning_rate": 1.2e-06, | |
| "loss": 1.0889, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.06388206388206388, | |
| "grad_norm": 3.2480152784573586, | |
| "learning_rate": 1.2499999999999999e-06, | |
| "loss": 1.0437, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.06633906633906633, | |
| "grad_norm": 3.4795119249436293, | |
| "learning_rate": 1.3e-06, | |
| "loss": 1.0966, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.0687960687960688, | |
| "grad_norm": 2.8566181371095047, | |
| "learning_rate": 1.35e-06, | |
| "loss": 1.1234, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.07125307125307126, | |
| "grad_norm": 3.6578709990785048, | |
| "learning_rate": 1.4e-06, | |
| "loss": 1.1265, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.07371007371007371, | |
| "grad_norm": 3.541143081282278, | |
| "learning_rate": 1.4499999999999999e-06, | |
| "loss": 1.0543, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.07616707616707617, | |
| "grad_norm": 6.531880288467122, | |
| "learning_rate": 1.5e-06, | |
| "loss": 1.1109, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.07862407862407862, | |
| "grad_norm": 6.4715106690759, | |
| "learning_rate": 1.55e-06, | |
| "loss": 1.1257, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.08108108108108109, | |
| "grad_norm": 5.399614723171929, | |
| "learning_rate": 1.6e-06, | |
| "loss": 1.0694, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.08353808353808354, | |
| "grad_norm": 5.447520160063898, | |
| "learning_rate": 1.6499999999999999e-06, | |
| "loss": 1.047, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.085995085995086, | |
| "grad_norm": 15.759399235218536, | |
| "learning_rate": 1.6999999999999998e-06, | |
| "loss": 1.0684, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.08845208845208845, | |
| "grad_norm": 6.138108825882396, | |
| "learning_rate": 1.75e-06, | |
| "loss": 1.0568, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.09090909090909091, | |
| "grad_norm": 28.96303499581736, | |
| "learning_rate": 1.8e-06, | |
| "loss": 1.0748, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.09336609336609336, | |
| "grad_norm": 3.400779377353331, | |
| "learning_rate": 1.85e-06, | |
| "loss": 1.0841, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.09582309582309582, | |
| "grad_norm": 3.2088443423156305, | |
| "learning_rate": 1.8999999999999998e-06, | |
| "loss": 1.0637, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.09828009828009827, | |
| "grad_norm": 3.650839591876822, | |
| "learning_rate": 1.95e-06, | |
| "loss": 1.065, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.10073710073710074, | |
| "grad_norm": 2.48874031529166, | |
| "learning_rate": 2e-06, | |
| "loss": 1.0325, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.10319410319410319, | |
| "grad_norm": 2.4764261500155436, | |
| "learning_rate": 1.9999917626554467e-06, | |
| "loss": 1.0484, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.10565110565110565, | |
| "grad_norm": 2.0092623603336794, | |
| "learning_rate": 1.9999670507574944e-06, | |
| "loss": 1.0859, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.10810810810810811, | |
| "grad_norm": 1.915125996872455, | |
| "learning_rate": 1.9999258647132643e-06, | |
| "loss": 1.0423, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.11056511056511056, | |
| "grad_norm": 1.8459238588876408, | |
| "learning_rate": 1.9998682052012837e-06, | |
| "loss": 1.0686, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.11302211302211303, | |
| "grad_norm": 2.238673168744122, | |
| "learning_rate": 1.9997940731714745e-06, | |
| "loss": 1.0601, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.11547911547911548, | |
| "grad_norm": 4.306515287574078, | |
| "learning_rate": 1.9997034698451393e-06, | |
| "loss": 1.0367, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.11793611793611794, | |
| "grad_norm": 5.689973830908464, | |
| "learning_rate": 1.9995963967149398e-06, | |
| "loss": 1.0542, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.12039312039312039, | |
| "grad_norm": 2.1880376147294522, | |
| "learning_rate": 1.999472855544872e-06, | |
| "loss": 1.0635, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.12285012285012285, | |
| "grad_norm": 3.433335591384566, | |
| "learning_rate": 1.999332848370239e-06, | |
| "loss": 1.0323, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.12530712530712532, | |
| "grad_norm": 1.8169785229927564, | |
| "learning_rate": 1.9991763774976158e-06, | |
| "loss": 1.0218, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.12776412776412777, | |
| "grad_norm": 1.8670733937364128, | |
| "learning_rate": 1.9990034455048096e-06, | |
| "loss": 1.0405, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.13022113022113022, | |
| "grad_norm": 2.439448369982022, | |
| "learning_rate": 1.998814055240823e-06, | |
| "loss": 1.0519, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.13267813267813267, | |
| "grad_norm": 1.5940089652939573, | |
| "learning_rate": 1.9986082098258006e-06, | |
| "loss": 1.0359, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.13513513513513514, | |
| "grad_norm": 1.7404888899145354, | |
| "learning_rate": 1.9983859126509824e-06, | |
| "loss": 1.0087, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.1375921375921376, | |
| "grad_norm": 2.2476917865688026, | |
| "learning_rate": 1.998147167378645e-06, | |
| "loss": 1.0138, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.14004914004914004, | |
| "grad_norm": 1.8002247115537675, | |
| "learning_rate": 1.9978919779420425e-06, | |
| "loss": 1.0488, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.14250614250614252, | |
| "grad_norm": 1.663251752279236, | |
| "learning_rate": 1.9976203485453413e-06, | |
| "loss": 0.9809, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.14496314496314497, | |
| "grad_norm": 1.7653797682084729, | |
| "learning_rate": 1.9973322836635515e-06, | |
| "loss": 1.0686, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.14742014742014742, | |
| "grad_norm": 1.5336222427197481, | |
| "learning_rate": 1.9970277880424526e-06, | |
| "loss": 0.986, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.14987714987714987, | |
| "grad_norm": 1.4421704567479543, | |
| "learning_rate": 1.9967068666985146e-06, | |
| "loss": 0.9948, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.15233415233415235, | |
| "grad_norm": 1.4042358001427657, | |
| "learning_rate": 1.996369524918818e-06, | |
| "loss": 0.967, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.1547911547911548, | |
| "grad_norm": 1.4295825578558987, | |
| "learning_rate": 1.9960157682609633e-06, | |
| "loss": 0.9723, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.15724815724815724, | |
| "grad_norm": 1.6093180146467703, | |
| "learning_rate": 1.9956456025529804e-06, | |
| "loss": 0.9737, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.1597051597051597, | |
| "grad_norm": 1.7273275051707897, | |
| "learning_rate": 1.9952590338932358e-06, | |
| "loss": 0.9935, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.16216216216216217, | |
| "grad_norm": 1.647950655242118, | |
| "learning_rate": 1.994856068650327e-06, | |
| "loss": 1.0509, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.16461916461916462, | |
| "grad_norm": 1.6052330571582885, | |
| "learning_rate": 1.994436713462982e-06, | |
| "loss": 1.0295, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.16707616707616707, | |
| "grad_norm": 1.694905424784479, | |
| "learning_rate": 1.994000975239946e-06, | |
| "loss": 1.0256, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.16953316953316952, | |
| "grad_norm": 1.5039835360361733, | |
| "learning_rate": 1.9935488611598713e-06, | |
| "loss": 1.0347, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.171990171990172, | |
| "grad_norm": 1.734126157202943, | |
| "learning_rate": 1.9930803786711967e-06, | |
| "loss": 1.023, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.17444717444717445, | |
| "grad_norm": 1.5242931763881968, | |
| "learning_rate": 1.9925955354920264e-06, | |
| "loss": 1.0406, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.1769041769041769, | |
| "grad_norm": 1.606370733528827, | |
| "learning_rate": 1.99209433961e-06, | |
| "loss": 1.0222, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.17936117936117937, | |
| "grad_norm": 2.4762050432839318, | |
| "learning_rate": 1.991576799282164e-06, | |
| "loss": 1.0278, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.18181818181818182, | |
| "grad_norm": 1.4747727870105878, | |
| "learning_rate": 1.9910429230348343e-06, | |
| "loss": 0.9779, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.18427518427518427, | |
| "grad_norm": 2.0977776864765167, | |
| "learning_rate": 1.990492719663457e-06, | |
| "loss": 1.0235, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.18673218673218672, | |
| "grad_norm": 1.9728896794221442, | |
| "learning_rate": 1.9899261982324607e-06, | |
| "loss": 0.9926, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.1891891891891892, | |
| "grad_norm": 1.7736978075188574, | |
| "learning_rate": 1.98934336807511e-06, | |
| "loss": 1.0354, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.19164619164619165, | |
| "grad_norm": 2.1716440393079464, | |
| "learning_rate": 1.988744238793351e-06, | |
| "loss": 0.977, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.1941031941031941, | |
| "grad_norm": 1.599345820298087, | |
| "learning_rate": 1.9881288202576518e-06, | |
| "loss": 0.9685, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.19656019656019655, | |
| "grad_norm": 1.4582630785882413, | |
| "learning_rate": 1.9874971226068415e-06, | |
| "loss": 0.943, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.19901719901719903, | |
| "grad_norm": 1.4275025088638074, | |
| "learning_rate": 1.9868491562479424e-06, | |
| "loss": 1.0295, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.20147420147420148, | |
| "grad_norm": 1.609247365977497, | |
| "learning_rate": 1.9861849318559994e-06, | |
| "loss": 0.9531, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.20393120393120392, | |
| "grad_norm": 1.711398906589491, | |
| "learning_rate": 1.9855044603739028e-06, | |
| "loss": 1.0302, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.20638820638820637, | |
| "grad_norm": 1.56099328882278, | |
| "learning_rate": 1.984807753012208e-06, | |
| "loss": 0.9684, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.20884520884520885, | |
| "grad_norm": 1.5428758027146767, | |
| "learning_rate": 1.9840948212489527e-06, | |
| "loss": 1.0129, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.2113022113022113, | |
| "grad_norm": 1.9669978126079752, | |
| "learning_rate": 1.983365676829466e-06, | |
| "loss": 0.9976, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.21375921375921375, | |
| "grad_norm": 1.5144216325008544, | |
| "learning_rate": 1.9826203317661756e-06, | |
| "loss": 0.9967, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.21621621621621623, | |
| "grad_norm": 2.1395022590018367, | |
| "learning_rate": 1.9818587983384094e-06, | |
| "loss": 0.9693, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.21867321867321868, | |
| "grad_norm": 1.5091268331439933, | |
| "learning_rate": 1.981081089092194e-06, | |
| "loss": 0.9271, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.22113022113022113, | |
| "grad_norm": 1.8173026339961775, | |
| "learning_rate": 1.9802872168400476e-06, | |
| "loss": 1.0819, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.22358722358722358, | |
| "grad_norm": 1.5986438761107704, | |
| "learning_rate": 1.979477194660769e-06, | |
| "loss": 0.9594, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.22604422604422605, | |
| "grad_norm": 1.5027188037219825, | |
| "learning_rate": 1.978651035899221e-06, | |
| "loss": 0.9964, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.2285012285012285, | |
| "grad_norm": 1.3856569445993616, | |
| "learning_rate": 1.977808754166113e-06, | |
| "loss": 0.9476, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.23095823095823095, | |
| "grad_norm": 1.445303089718021, | |
| "learning_rate": 1.976950363337774e-06, | |
| "loss": 0.9664, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.2334152334152334, | |
| "grad_norm": 1.9267867671090184, | |
| "learning_rate": 1.9760758775559273e-06, | |
| "loss": 1.0446, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.23587223587223588, | |
| "grad_norm": 1.4687411243380588, | |
| "learning_rate": 1.975185311227453e-06, | |
| "loss": 0.9483, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.23832923832923833, | |
| "grad_norm": 1.5489115105120785, | |
| "learning_rate": 1.9742786790241546e-06, | |
| "loss": 1.0073, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.24078624078624078, | |
| "grad_norm": 2.5463794252310676, | |
| "learning_rate": 1.9733559958825166e-06, | |
| "loss": 1.0046, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.24324324324324326, | |
| "grad_norm": 1.56125807056489, | |
| "learning_rate": 1.9724172770034566e-06, | |
| "loss": 0.971, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.2457002457002457, | |
| "grad_norm": 1.740701138818393, | |
| "learning_rate": 1.971462537852076e-06, | |
| "loss": 0.9856, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.24815724815724816, | |
| "grad_norm": 1.4470402917114091, | |
| "learning_rate": 1.970491794157405e-06, | |
| "loss": 0.9897, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.25061425061425063, | |
| "grad_norm": 1.5706400471340238, | |
| "learning_rate": 1.9695050619121456e-06, | |
| "loss": 0.9328, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.25307125307125306, | |
| "grad_norm": 1.9773708156391945, | |
| "learning_rate": 1.9685023573724034e-06, | |
| "loss": 0.9719, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.25552825552825553, | |
| "grad_norm": 1.9195746558098163, | |
| "learning_rate": 1.9674836970574253e-06, | |
| "loss": 0.978, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.257985257985258, | |
| "grad_norm": 1.8027098570962006, | |
| "learning_rate": 1.966449097749322e-06, | |
| "loss": 1.0023, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.26044226044226043, | |
| "grad_norm": 1.7805523167150648, | |
| "learning_rate": 1.965398576492796e-06, | |
| "loss": 0.9768, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.2628992628992629, | |
| "grad_norm": 1.9751836869745634, | |
| "learning_rate": 1.9643321505948585e-06, | |
| "loss": 1.0132, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.26535626535626533, | |
| "grad_norm": 1.6135714113454362, | |
| "learning_rate": 1.9632498376245444e-06, | |
| "loss": 1.0169, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.2678132678132678, | |
| "grad_norm": 1.6366575829852479, | |
| "learning_rate": 1.9621516554126237e-06, | |
| "loss": 0.9618, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.2702702702702703, | |
| "grad_norm": 1.4439243574845881, | |
| "learning_rate": 1.9610376220513063e-06, | |
| "loss": 1.0111, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.2727272727272727, | |
| "grad_norm": 1.574054550140938, | |
| "learning_rate": 1.9599077558939466e-06, | |
| "loss": 0.9979, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.2751842751842752, | |
| "grad_norm": 1.9768484700868765, | |
| "learning_rate": 1.958762075554737e-06, | |
| "loss": 1.0266, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.27764127764127766, | |
| "grad_norm": 1.6821288693476542, | |
| "learning_rate": 1.957600599908406e-06, | |
| "loss": 0.983, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.2800982800982801, | |
| "grad_norm": 1.6250908693533597, | |
| "learning_rate": 1.956423348089903e-06, | |
| "loss": 0.9735, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.28255528255528256, | |
| "grad_norm": 1.5158186810772534, | |
| "learning_rate": 1.9552303394940857e-06, | |
| "loss": 0.9767, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.28501228501228504, | |
| "grad_norm": 1.7591914283817052, | |
| "learning_rate": 1.9540215937754004e-06, | |
| "loss": 0.9515, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.28746928746928746, | |
| "grad_norm": 1.329980995859771, | |
| "learning_rate": 1.952797130847557e-06, | |
| "loss": 0.944, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.28992628992628994, | |
| "grad_norm": 1.7122579873222803, | |
| "learning_rate": 1.951556970883201e-06, | |
| "loss": 1.0098, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.29238329238329236, | |
| "grad_norm": 1.8427163819779844, | |
| "learning_rate": 1.9503011343135825e-06, | |
| "loss": 0.9815, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.29484029484029484, | |
| "grad_norm": 1.3722808372435795, | |
| "learning_rate": 1.9490296418282184e-06, | |
| "loss": 0.9914, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.2972972972972973, | |
| "grad_norm": 1.2858110734745247, | |
| "learning_rate": 1.9477425143745523e-06, | |
| "loss": 0.9727, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.29975429975429974, | |
| "grad_norm": 1.3478569354201173, | |
| "learning_rate": 1.9464397731576094e-06, | |
| "loss": 0.9557, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.3022113022113022, | |
| "grad_norm": 1.534212309472206, | |
| "learning_rate": 1.945121439639645e-06, | |
| "loss": 0.9995, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.3046683046683047, | |
| "grad_norm": 1.4583668620611645, | |
| "learning_rate": 1.943787535539795e-06, | |
| "loss": 0.9244, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.3071253071253071, | |
| "grad_norm": 1.3048849492711914, | |
| "learning_rate": 1.9424380828337143e-06, | |
| "loss": 0.9749, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.3095823095823096, | |
| "grad_norm": 1.9214890498265684, | |
| "learning_rate": 1.9410731037532167e-06, | |
| "loss": 1.0126, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.31203931203931207, | |
| "grad_norm": 1.492998355230107, | |
| "learning_rate": 1.9396926207859082e-06, | |
| "loss": 0.9216, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.3144963144963145, | |
| "grad_norm": 1.529088663498555, | |
| "learning_rate": 1.9382966566748166e-06, | |
| "loss": 1.0266, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.31695331695331697, | |
| "grad_norm": 1.4961622214991601, | |
| "learning_rate": 1.9368852344180167e-06, | |
| "loss": 0.949, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.3194103194103194, | |
| "grad_norm": 1.5657840764329438, | |
| "learning_rate": 1.935458377268251e-06, | |
| "loss": 0.9768, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.32186732186732187, | |
| "grad_norm": 1.458088079896784, | |
| "learning_rate": 1.934016108732548e-06, | |
| "loss": 0.9831, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.32432432432432434, | |
| "grad_norm": 1.5704249898970843, | |
| "learning_rate": 1.932558452571833e-06, | |
| "loss": 0.9845, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.32678132678132676, | |
| "grad_norm": 1.366875012537187, | |
| "learning_rate": 1.9310854328005377e-06, | |
| "loss": 0.9751, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.32923832923832924, | |
| "grad_norm": 1.6682519282454646, | |
| "learning_rate": 1.9295970736862064e-06, | |
| "loss": 0.9278, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.3316953316953317, | |
| "grad_norm": 1.7010058864881312, | |
| "learning_rate": 1.928093399749091e-06, | |
| "loss": 0.9479, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.33415233415233414, | |
| "grad_norm": 2.774339563092048, | |
| "learning_rate": 1.926574435761753e-06, | |
| "loss": 0.9216, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.3366093366093366, | |
| "grad_norm": 1.5996469164653206, | |
| "learning_rate": 1.925040206748652e-06, | |
| "loss": 0.9749, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.33906633906633904, | |
| "grad_norm": 1.6140642370600065, | |
| "learning_rate": 1.9234907379857334e-06, | |
| "loss": 1.0195, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.3415233415233415, | |
| "grad_norm": 1.6724211149688903, | |
| "learning_rate": 1.9219260550000143e-06, | |
| "loss": 1.0086, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.343980343980344, | |
| "grad_norm": 1.6697054307200352, | |
| "learning_rate": 1.920346183569159e-06, | |
| "loss": 1.0177, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.3464373464373464, | |
| "grad_norm": 1.5507362171936665, | |
| "learning_rate": 1.91875114972106e-06, | |
| "loss": 0.9701, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.3488943488943489, | |
| "grad_norm": 1.572772556059398, | |
| "learning_rate": 1.9171409797334026e-06, | |
| "loss": 0.9893, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.35135135135135137, | |
| "grad_norm": 1.4851773686144312, | |
| "learning_rate": 1.9155157001332373e-06, | |
| "loss": 0.9617, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.3538083538083538, | |
| "grad_norm": 1.3935370189220326, | |
| "learning_rate": 1.91387533769654e-06, | |
| "loss": 0.9647, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.35626535626535627, | |
| "grad_norm": 2.086032038197472, | |
| "learning_rate": 1.912219919447772e-06, | |
| "loss": 1.0138, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.35872235872235875, | |
| "grad_norm": 1.8988521674993861, | |
| "learning_rate": 1.9105494726594343e-06, | |
| "loss": 0.9619, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.36117936117936117, | |
| "grad_norm": 1.7707337422933225, | |
| "learning_rate": 1.9088640248516184e-06, | |
| "loss": 0.9689, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.36363636363636365, | |
| "grad_norm": 10.58019032383696, | |
| "learning_rate": 1.9071636037915533e-06, | |
| "loss": 0.9886, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.36609336609336607, | |
| "grad_norm": 1.6326349036691947, | |
| "learning_rate": 1.9054482374931466e-06, | |
| "loss": 0.9928, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.36855036855036855, | |
| "grad_norm": 1.4602979723959129, | |
| "learning_rate": 1.9037179542165251e-06, | |
| "loss": 0.9451, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.371007371007371, | |
| "grad_norm": 1.5983113709927816, | |
| "learning_rate": 1.9019727824675682e-06, | |
| "loss": 0.9488, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.37346437346437344, | |
| "grad_norm": 1.394284340663908, | |
| "learning_rate": 1.9002127509974373e-06, | |
| "loss": 0.9717, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.3759213759213759, | |
| "grad_norm": 1.856689232868256, | |
| "learning_rate": 1.8984378888021041e-06, | |
| "loss": 0.9286, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.3783783783783784, | |
| "grad_norm": 1.5404154844189102, | |
| "learning_rate": 1.8966482251218714e-06, | |
| "loss": 0.9625, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.3808353808353808, | |
| "grad_norm": 1.5417688714414741, | |
| "learning_rate": 1.8948437894408916e-06, | |
| "loss": 0.9314, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.3832923832923833, | |
| "grad_norm": 1.8457706124864284, | |
| "learning_rate": 1.8930246114866822e-06, | |
| "loss": 1.0147, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.3857493857493858, | |
| "grad_norm": 1.4971057129868792, | |
| "learning_rate": 1.8911907212296341e-06, | |
| "loss": 0.9309, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.3882063882063882, | |
| "grad_norm": 1.6347670467700182, | |
| "learning_rate": 1.8893421488825188e-06, | |
| "loss": 0.9621, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.3906633906633907, | |
| "grad_norm": 1.6324227560172353, | |
| "learning_rate": 1.8874789248999915e-06, | |
| "loss": 0.9772, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.3931203931203931, | |
| "grad_norm": 1.6267750782874413, | |
| "learning_rate": 1.885601079978088e-06, | |
| "loss": 0.9861, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.3955773955773956, | |
| "grad_norm": 1.4406136327704948, | |
| "learning_rate": 1.8837086450537192e-06, | |
| "loss": 0.9406, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.39803439803439805, | |
| "grad_norm": 1.5622857080744104, | |
| "learning_rate": 1.8818016513041624e-06, | |
| "loss": 0.94, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.4004914004914005, | |
| "grad_norm": 1.7312279395333572, | |
| "learning_rate": 1.8798801301465468e-06, | |
| "loss": 0.96, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.40294840294840295, | |
| "grad_norm": 1.5736688729195003, | |
| "learning_rate": 1.877944113237336e-06, | |
| "loss": 0.9793, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.40540540540540543, | |
| "grad_norm": 1.501899237192728, | |
| "learning_rate": 1.8759936324718066e-06, | |
| "loss": 0.9178, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.40786240786240785, | |
| "grad_norm": 1.3483386139829852, | |
| "learning_rate": 1.874028719983523e-06, | |
| "loss": 0.9593, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.4103194103194103, | |
| "grad_norm": 1.5326296765065217, | |
| "learning_rate": 1.8720494081438077e-06, | |
| "loss": 0.9794, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.41277641277641275, | |
| "grad_norm": 1.5759774818522712, | |
| "learning_rate": 1.870055729561207e-06, | |
| "loss": 0.9418, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.4152334152334152, | |
| "grad_norm": 2.475109331033288, | |
| "learning_rate": 1.8680477170809572e-06, | |
| "loss": 0.9933, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.4176904176904177, | |
| "grad_norm": 1.4136668445270784, | |
| "learning_rate": 1.8660254037844386e-06, | |
| "loss": 0.9287, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.4201474201474201, | |
| "grad_norm": 1.7485785437438173, | |
| "learning_rate": 1.863988822988634e-06, | |
| "loss": 0.9749, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.4226044226044226, | |
| "grad_norm": 1.825009421972696, | |
| "learning_rate": 1.8619380082455796e-06, | |
| "loss": 0.9637, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.4250614250614251, | |
| "grad_norm": 1.6686645342967068, | |
| "learning_rate": 1.85987299334181e-06, | |
| "loss": 0.9951, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.4275184275184275, | |
| "grad_norm": 1.9731456114498873, | |
| "learning_rate": 1.857793812297804e-06, | |
| "loss": 0.9426, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.42997542997543, | |
| "grad_norm": 1.5558317172807916, | |
| "learning_rate": 1.855700499367423e-06, | |
| "loss": 0.9787, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.43243243243243246, | |
| "grad_norm": 2.098470725111635, | |
| "learning_rate": 1.8535930890373463e-06, | |
| "loss": 0.9936, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.4348894348894349, | |
| "grad_norm": 1.525785332653322, | |
| "learning_rate": 1.8514716160265044e-06, | |
| "loss": 0.9727, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.43734643734643736, | |
| "grad_norm": 1.4913555879508107, | |
| "learning_rate": 1.8493361152855056e-06, | |
| "loss": 0.9748, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.4398034398034398, | |
| "grad_norm": 1.340195087338962, | |
| "learning_rate": 1.84718662199606e-06, | |
| "loss": 0.9706, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.44226044226044225, | |
| "grad_norm": 2.5585128719755867, | |
| "learning_rate": 1.8450231715704026e-06, | |
| "loss": 0.9467, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.44471744471744473, | |
| "grad_norm": 1.3066387641923265, | |
| "learning_rate": 1.8428457996507053e-06, | |
| "loss": 0.9194, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.44717444717444715, | |
| "grad_norm": 1.6465015301936612, | |
| "learning_rate": 1.8406545421084938e-06, | |
| "loss": 1.0045, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.44963144963144963, | |
| "grad_norm": 1.4122218928192685, | |
| "learning_rate": 1.8384494350440552e-06, | |
| "loss": 0.9596, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.4520884520884521, | |
| "grad_norm": 1.4364915893717223, | |
| "learning_rate": 1.8362305147858427e-06, | |
| "loss": 0.9375, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.45454545454545453, | |
| "grad_norm": 1.5320964201665865, | |
| "learning_rate": 1.833997817889878e-06, | |
| "loss": 0.98, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.457002457002457, | |
| "grad_norm": 1.8963928416673594, | |
| "learning_rate": 1.8317513811391477e-06, | |
| "loss": 0.9499, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.4594594594594595, | |
| "grad_norm": 1.4450513723222074, | |
| "learning_rate": 1.8294912415429993e-06, | |
| "loss": 0.9387, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.4619164619164619, | |
| "grad_norm": 1.4227834836237356, | |
| "learning_rate": 1.8272174363365298e-06, | |
| "loss": 0.9856, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.4643734643734644, | |
| "grad_norm": 1.4225544869881186, | |
| "learning_rate": 1.8249300029799732e-06, | |
| "loss": 0.9201, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.4668304668304668, | |
| "grad_norm": 1.6580547740077536, | |
| "learning_rate": 1.822628979158083e-06, | |
| "loss": 0.9746, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.4692874692874693, | |
| "grad_norm": 1.6225782675239633, | |
| "learning_rate": 1.8203144027795108e-06, | |
| "loss": 0.9493, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.47174447174447176, | |
| "grad_norm": 1.8558149480612793, | |
| "learning_rate": 1.8179863119761834e-06, | |
| "loss": 0.9551, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.4742014742014742, | |
| "grad_norm": 1.5948966144212158, | |
| "learning_rate": 1.815644745102673e-06, | |
| "loss": 0.9799, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.47665847665847666, | |
| "grad_norm": 1.464419249562776, | |
| "learning_rate": 1.8132897407355654e-06, | |
| "loss": 0.9634, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.47911547911547914, | |
| "grad_norm": 1.6709063152230734, | |
| "learning_rate": 1.8109213376728257e-06, | |
| "loss": 0.977, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.48157248157248156, | |
| "grad_norm": 1.4980373204140025, | |
| "learning_rate": 1.8085395749331579e-06, | |
| "loss": 0.9839, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.48402948402948404, | |
| "grad_norm": 1.4728568491372855, | |
| "learning_rate": 1.8061444917553627e-06, | |
| "loss": 0.9791, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.4864864864864865, | |
| "grad_norm": 1.3529262817353658, | |
| "learning_rate": 1.803736127597691e-06, | |
| "loss": 0.9534, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.48894348894348894, | |
| "grad_norm": 1.4726330094781561, | |
| "learning_rate": 1.8013145221371935e-06, | |
| "loss": 0.9397, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.4914004914004914, | |
| "grad_norm": 1.4468557644109676, | |
| "learning_rate": 1.7988797152690668e-06, | |
| "loss": 0.8993, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.49385749385749383, | |
| "grad_norm": 1.39576948942371, | |
| "learning_rate": 1.796431747105998e-06, | |
| "loss": 0.945, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.4963144963144963, | |
| "grad_norm": 1.4248364851440443, | |
| "learning_rate": 1.793970657977501e-06, | |
| "loss": 0.9669, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.4987714987714988, | |
| "grad_norm": 1.4850571599264226, | |
| "learning_rate": 1.791496488429254e-06, | |
| "loss": 0.9572, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.5012285012285013, | |
| "grad_norm": 1.4619304758472125, | |
| "learning_rate": 1.7890092792224313e-06, | |
| "loss": 0.9604, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.5036855036855037, | |
| "grad_norm": 1.6208674632812228, | |
| "learning_rate": 1.786509071333031e-06, | |
| "loss": 0.9577, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.5061425061425061, | |
| "grad_norm": 1.4904987175582993, | |
| "learning_rate": 1.7839959059512013e-06, | |
| "loss": 0.9039, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.5085995085995086, | |
| "grad_norm": 1.4636256072722436, | |
| "learning_rate": 1.7814698244805601e-06, | |
| "loss": 0.9481, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.5110565110565111, | |
| "grad_norm": 1.3573990171789403, | |
| "learning_rate": 1.7789308685375146e-06, | |
| "loss": 0.9531, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.5135135135135135, | |
| "grad_norm": 1.4176202461890393, | |
| "learning_rate": 1.7763790799505745e-06, | |
| "loss": 0.959, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.515970515970516, | |
| "grad_norm": 1.5707837841315286, | |
| "learning_rate": 1.773814500759663e-06, | |
| "loss": 0.9798, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.5184275184275184, | |
| "grad_norm": 1.4706701895149787, | |
| "learning_rate": 1.7712371732154256e-06, | |
| "loss": 0.9928, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.5208845208845209, | |
| "grad_norm": 2.069991518090132, | |
| "learning_rate": 1.7686471397785318e-06, | |
| "loss": 1.0153, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.5233415233415234, | |
| "grad_norm": 1.443867260475285, | |
| "learning_rate": 1.766044443118978e-06, | |
| "loss": 0.9452, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.5257985257985258, | |
| "grad_norm": 1.5151736278472239, | |
| "learning_rate": 1.7634291261153817e-06, | |
| "loss": 0.9114, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.5282555282555282, | |
| "grad_norm": 1.3847804036493465, | |
| "learning_rate": 1.7608012318542777e-06, | |
| "loss": 0.9144, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.5307125307125307, | |
| "grad_norm": 1.415085937475802, | |
| "learning_rate": 1.7581608036294075e-06, | |
| "loss": 0.9726, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.5331695331695332, | |
| "grad_norm": 1.558702474871841, | |
| "learning_rate": 1.7555078849410042e-06, | |
| "loss": 0.9243, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.5356265356265356, | |
| "grad_norm": 1.826623068304597, | |
| "learning_rate": 1.7528425194950793e-06, | |
| "loss": 1.0106, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.538083538083538, | |
| "grad_norm": 1.5238015749509666, | |
| "learning_rate": 1.7501647512026994e-06, | |
| "loss": 1.0093, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.5405405405405406, | |
| "grad_norm": 1.6514187408836531, | |
| "learning_rate": 1.7474746241792646e-06, | |
| "loss": 0.991, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.542997542997543, | |
| "grad_norm": 1.5401870547381438, | |
| "learning_rate": 1.7447721827437819e-06, | |
| "loss": 0.8904, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.5454545454545454, | |
| "grad_norm": 1.58085107300811, | |
| "learning_rate": 1.742057471418133e-06, | |
| "loss": 0.907, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.547911547911548, | |
| "grad_norm": 1.4376088351554372, | |
| "learning_rate": 1.7393305349263432e-06, | |
| "loss": 0.9369, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.5503685503685504, | |
| "grad_norm": 1.3807320173451696, | |
| "learning_rate": 1.7365914181938437e-06, | |
| "loss": 0.8945, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.5528255528255528, | |
| "grad_norm": 1.5104969018841128, | |
| "learning_rate": 1.7338401663467309e-06, | |
| "loss": 0.9537, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.5552825552825553, | |
| "grad_norm": 1.4036248703567802, | |
| "learning_rate": 1.7310768247110232e-06, | |
| "loss": 0.9112, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.5577395577395577, | |
| "grad_norm": 1.4153113695817048, | |
| "learning_rate": 1.7283014388119158e-06, | |
| "loss": 0.9285, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.5601965601965602, | |
| "grad_norm": 1.6827361424168579, | |
| "learning_rate": 1.7255140543730282e-06, | |
| "loss": 0.9368, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.5626535626535627, | |
| "grad_norm": 1.52442672792253, | |
| "learning_rate": 1.7227147173156523e-06, | |
| "loss": 0.9838, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.5651105651105651, | |
| "grad_norm": 1.8631309960035536, | |
| "learning_rate": 1.7199034737579958e-06, | |
| "loss": 0.9311, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.5675675675675675, | |
| "grad_norm": 1.38816407656762, | |
| "learning_rate": 1.7170803700144224e-06, | |
| "loss": 0.945, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.5700245700245701, | |
| "grad_norm": 1.5370320107384547, | |
| "learning_rate": 1.7142454525946889e-06, | |
| "loss": 0.955, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.5724815724815725, | |
| "grad_norm": 1.5632251869407223, | |
| "learning_rate": 1.7113987682031778e-06, | |
| "loss": 0.9384, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.5749385749385749, | |
| "grad_norm": 1.602474983632772, | |
| "learning_rate": 1.70854036373813e-06, | |
| "loss": 0.992, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.5773955773955773, | |
| "grad_norm": 1.3375523516298262, | |
| "learning_rate": 1.70567028629087e-06, | |
| "loss": 0.941, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.5798525798525799, | |
| "grad_norm": 1.539702405955399, | |
| "learning_rate": 1.7027885831450317e-06, | |
| "loss": 0.9516, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.5823095823095823, | |
| "grad_norm": 1.6116292160800432, | |
| "learning_rate": 1.6998953017757784e-06, | |
| "loss": 0.992, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.5847665847665847, | |
| "grad_norm": 2.3507568939715835, | |
| "learning_rate": 1.6969904898490213e-06, | |
| "loss": 0.9165, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.5872235872235873, | |
| "grad_norm": 2.847402959001731, | |
| "learning_rate": 1.694074195220634e-06, | |
| "loss": 0.9985, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.5896805896805897, | |
| "grad_norm": 1.5211685594705786, | |
| "learning_rate": 1.6911464659356632e-06, | |
| "loss": 0.9365, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.5921375921375921, | |
| "grad_norm": 2.034636443003393, | |
| "learning_rate": 1.688207350227539e-06, | |
| "loss": 0.9094, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.5945945945945946, | |
| "grad_norm": 1.8768465174864484, | |
| "learning_rate": 1.6852568965172791e-06, | |
| "loss": 0.9898, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.597051597051597, | |
| "grad_norm": 1.7384412195063934, | |
| "learning_rate": 1.6822951534126908e-06, | |
| "loss": 0.9955, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.5995085995085995, | |
| "grad_norm": 1.4908111245385776, | |
| "learning_rate": 1.6793221697075715e-06, | |
| "loss": 0.9503, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.601965601965602, | |
| "grad_norm": 1.3104233864174921, | |
| "learning_rate": 1.6763379943809026e-06, | |
| "loss": 0.9087, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.6044226044226044, | |
| "grad_norm": 1.8378209471753764, | |
| "learning_rate": 1.6733426765960456e-06, | |
| "loss": 0.9295, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.6068796068796068, | |
| "grad_norm": 1.5513249549838153, | |
| "learning_rate": 1.6703362656999299e-06, | |
| "loss": 0.9622, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.6093366093366094, | |
| "grad_norm": 1.950619315682982, | |
| "learning_rate": 1.6673188112222395e-06, | |
| "loss": 0.9114, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.6117936117936118, | |
| "grad_norm": 1.5443899853661804, | |
| "learning_rate": 1.6642903628745993e-06, | |
| "loss": 0.9385, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.6142506142506142, | |
| "grad_norm": 1.842691343791668, | |
| "learning_rate": 1.6612509705497541e-06, | |
| "loss": 0.9681, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.6167076167076168, | |
| "grad_norm": 1.4579884817325401, | |
| "learning_rate": 1.6582006843207479e-06, | |
| "loss": 0.9967, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.6191646191646192, | |
| "grad_norm": 1.602595851064425, | |
| "learning_rate": 1.655139554440098e-06, | |
| "loss": 0.9674, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.6216216216216216, | |
| "grad_norm": 1.394530716544746, | |
| "learning_rate": 1.652067631338967e-06, | |
| "loss": 0.907, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.6240786240786241, | |
| "grad_norm": 1.6411505351669575, | |
| "learning_rate": 1.6489849656263334e-06, | |
| "loss": 0.9602, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.6265356265356266, | |
| "grad_norm": 1.7684281051463475, | |
| "learning_rate": 1.6458916080881565e-06, | |
| "loss": 1.0141, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.628992628992629, | |
| "grad_norm": 1.771327098736445, | |
| "learning_rate": 1.6427876096865393e-06, | |
| "loss": 0.9917, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.6314496314496314, | |
| "grad_norm": 1.440535084208374, | |
| "learning_rate": 1.6396730215588912e-06, | |
| "loss": 0.9326, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.6339066339066339, | |
| "grad_norm": 1.5197592107480333, | |
| "learning_rate": 1.6365478950170833e-06, | |
| "loss": 0.9475, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.6363636363636364, | |
| "grad_norm": 1.706431633980188, | |
| "learning_rate": 1.6334122815466031e-06, | |
| "loss": 0.9351, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.6388206388206388, | |
| "grad_norm": 1.323673519490258, | |
| "learning_rate": 1.6302662328057085e-06, | |
| "loss": 0.911, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.6412776412776413, | |
| "grad_norm": 1.5394197428392233, | |
| "learning_rate": 1.627109800624574e-06, | |
| "loss": 0.9812, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.6437346437346437, | |
| "grad_norm": 1.4717326057521631, | |
| "learning_rate": 1.6239430370044386e-06, | |
| "loss": 0.8713, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.6461916461916462, | |
| "grad_norm": 1.5141060178146766, | |
| "learning_rate": 1.6207659941167484e-06, | |
| "loss": 0.934, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.6486486486486487, | |
| "grad_norm": 1.606823271595554, | |
| "learning_rate": 1.617578724302297e-06, | |
| "loss": 0.9689, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.6511056511056511, | |
| "grad_norm": 1.436173862475149, | |
| "learning_rate": 1.6143812800703643e-06, | |
| "loss": 0.9524, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.6535626535626535, | |
| "grad_norm": 1.6794632700616086, | |
| "learning_rate": 1.6111737140978491e-06, | |
| "loss": 0.9377, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.6560196560196561, | |
| "grad_norm": 1.481170005860829, | |
| "learning_rate": 1.6079560792284045e-06, | |
| "loss": 0.9718, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.6584766584766585, | |
| "grad_norm": 1.5488863504284036, | |
| "learning_rate": 1.604728428471564e-06, | |
| "loss": 0.9597, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.6609336609336609, | |
| "grad_norm": 1.5751377061062102, | |
| "learning_rate": 1.6014908150018703e-06, | |
| "loss": 0.9741, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.6633906633906634, | |
| "grad_norm": 1.8133205336540266, | |
| "learning_rate": 1.598243292157999e-06, | |
| "loss": 0.9234, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.6658476658476659, | |
| "grad_norm": 2.014347908541205, | |
| "learning_rate": 1.5949859134418797e-06, | |
| "loss": 0.976, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.6683046683046683, | |
| "grad_norm": 1.3995748329733648, | |
| "learning_rate": 1.5917187325178136e-06, | |
| "loss": 0.9182, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.6707616707616708, | |
| "grad_norm": 1.536609373517338, | |
| "learning_rate": 1.5884418032115906e-06, | |
| "loss": 0.8921, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.6732186732186732, | |
| "grad_norm": 1.7529235851864882, | |
| "learning_rate": 1.5851551795096026e-06, | |
| "loss": 0.9083, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.6756756756756757, | |
| "grad_norm": 1.5263377609284405, | |
| "learning_rate": 1.5818589155579529e-06, | |
| "loss": 0.921, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.6781326781326781, | |
| "grad_norm": 1.841830762323041, | |
| "learning_rate": 1.5785530656615652e-06, | |
| "loss": 0.943, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.6805896805896806, | |
| "grad_norm": 1.5135421099319812, | |
| "learning_rate": 1.5752376842832898e-06, | |
| "loss": 0.9315, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.683046683046683, | |
| "grad_norm": 1.492488292559091, | |
| "learning_rate": 1.571912826043003e-06, | |
| "loss": 0.891, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.6855036855036855, | |
| "grad_norm": 1.9836354455315945, | |
| "learning_rate": 1.5685785457167113e-06, | |
| "loss": 0.8706, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.687960687960688, | |
| "grad_norm": 1.5134741374320924, | |
| "learning_rate": 1.565234898235646e-06, | |
| "loss": 0.9786, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.6904176904176904, | |
| "grad_norm": 1.4859838259301754, | |
| "learning_rate": 1.5618819386853603e-06, | |
| "loss": 0.9595, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.6928746928746928, | |
| "grad_norm": 1.567447965101693, | |
| "learning_rate": 1.5585197223048202e-06, | |
| "loss": 0.9373, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.6953316953316954, | |
| "grad_norm": 1.361598883424366, | |
| "learning_rate": 1.5551483044854952e-06, | |
| "loss": 0.9697, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.6977886977886978, | |
| "grad_norm": 1.6640657059750952, | |
| "learning_rate": 1.551767740770446e-06, | |
| "loss": 0.9244, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.7002457002457002, | |
| "grad_norm": 1.362536522369026, | |
| "learning_rate": 1.5483780868534083e-06, | |
| "loss": 0.981, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.7027027027027027, | |
| "grad_norm": 1.6010684699844948, | |
| "learning_rate": 1.544979398577877e-06, | |
| "loss": 1.0169, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.7051597051597052, | |
| "grad_norm": 1.6122664157541595, | |
| "learning_rate": 1.5415717319361845e-06, | |
| "loss": 0.9507, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.7076167076167076, | |
| "grad_norm": 1.5044459630841276, | |
| "learning_rate": 1.5381551430685796e-06, | |
| "loss": 0.9431, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.7100737100737101, | |
| "grad_norm": 1.464490997097453, | |
| "learning_rate": 1.5347296882623016e-06, | |
| "loss": 0.9726, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.7125307125307125, | |
| "grad_norm": 1.8674476124140849, | |
| "learning_rate": 1.5312954239506533e-06, | |
| "loss": 0.9406, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.714987714987715, | |
| "grad_norm": 1.3556749625191538, | |
| "learning_rate": 1.5278524067120718e-06, | |
| "loss": 0.8896, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.7174447174447175, | |
| "grad_norm": 1.4118443972410692, | |
| "learning_rate": 1.5244006932691954e-06, | |
| "loss": 0.9722, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.7199017199017199, | |
| "grad_norm": 1.6225004312255737, | |
| "learning_rate": 1.5209403404879303e-06, | |
| "loss": 0.929, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.7223587223587223, | |
| "grad_norm": 1.6022157008964288, | |
| "learning_rate": 1.5174714053765123e-06, | |
| "loss": 0.9497, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.7248157248157249, | |
| "grad_norm": 1.5891302329335815, | |
| "learning_rate": 1.5139939450845699e-06, | |
| "loss": 0.973, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.7272727272727273, | |
| "grad_norm": 1.6988844584422054, | |
| "learning_rate": 1.5105080169021789e-06, | |
| "loss": 0.9184, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.7297297297297297, | |
| "grad_norm": 1.453985001696748, | |
| "learning_rate": 1.5070136782589234e-06, | |
| "loss": 0.9477, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.7321867321867321, | |
| "grad_norm": 1.604839180071105, | |
| "learning_rate": 1.5035109867229457e-06, | |
| "loss": 0.9409, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.7346437346437347, | |
| "grad_norm": 1.5773771725228862, | |
| "learning_rate": 1.5e-06, | |
| "loss": 0.937, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.7371007371007371, | |
| "grad_norm": 1.4272988880705397, | |
| "learning_rate": 1.496480775932501e-06, | |
| "loss": 0.9481, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.7395577395577395, | |
| "grad_norm": 1.534183187925811, | |
| "learning_rate": 1.492953372498571e-06, | |
| "loss": 0.9111, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.742014742014742, | |
| "grad_norm": 1.6285985984253928, | |
| "learning_rate": 1.4894178478110854e-06, | |
| "loss": 0.949, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.7444717444717445, | |
| "grad_norm": 1.521782496026601, | |
| "learning_rate": 1.4858742601167138e-06, | |
| "loss": 0.9405, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.7469287469287469, | |
| "grad_norm": 1.522930763087887, | |
| "learning_rate": 1.4823226677949622e-06, | |
| "loss": 0.949, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.7493857493857494, | |
| "grad_norm": 5.354032445168916, | |
| "learning_rate": 1.4787631293572092e-06, | |
| "loss": 0.9708, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.7518427518427518, | |
| "grad_norm": 1.199402404810751, | |
| "learning_rate": 1.4751957034457446e-06, | |
| "loss": 0.8895, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.7542997542997543, | |
| "grad_norm": 1.4889622591506924, | |
| "learning_rate": 1.4716204488328005e-06, | |
| "loss": 0.9258, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.7567567567567568, | |
| "grad_norm": 1.586929861427342, | |
| "learning_rate": 1.4680374244195858e-06, | |
| "loss": 0.9523, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.7592137592137592, | |
| "grad_norm": 1.7542006852572127, | |
| "learning_rate": 1.464446689235314e-06, | |
| "loss": 0.9671, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.7616707616707616, | |
| "grad_norm": 1.906886186477315, | |
| "learning_rate": 1.4608483024362302e-06, | |
| "loss": 0.9479, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.7641277641277642, | |
| "grad_norm": 1.3426576295326547, | |
| "learning_rate": 1.4572423233046385e-06, | |
| "loss": 0.9245, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.7665847665847666, | |
| "grad_norm": 1.544123812480689, | |
| "learning_rate": 1.453628811247924e-06, | |
| "loss": 0.9833, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.769041769041769, | |
| "grad_norm": 1.3070207560130556, | |
| "learning_rate": 1.4500078257975745e-06, | |
| "loss": 0.9518, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.7714987714987716, | |
| "grad_norm": 1.953852840958092, | |
| "learning_rate": 1.4463794266081992e-06, | |
| "loss": 0.8871, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.773955773955774, | |
| "grad_norm": 1.5767416529599687, | |
| "learning_rate": 1.4427436734565473e-06, | |
| "loss": 0.9921, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.7764127764127764, | |
| "grad_norm": 1.5600472966901433, | |
| "learning_rate": 1.4391006262405212e-06, | |
| "loss": 0.9068, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.7788697788697788, | |
| "grad_norm": 1.8524989491255999, | |
| "learning_rate": 1.4354503449781913e-06, | |
| "loss": 0.8956, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.7813267813267813, | |
| "grad_norm": 1.4701804028885344, | |
| "learning_rate": 1.4317928898068066e-06, | |
| "loss": 0.9547, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.7837837837837838, | |
| "grad_norm": 1.505304585103799, | |
| "learning_rate": 1.4281283209818039e-06, | |
| "loss": 0.9214, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.7862407862407862, | |
| "grad_norm": 1.5458422714438642, | |
| "learning_rate": 1.424456698875815e-06, | |
| "loss": 0.9711, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.7886977886977887, | |
| "grad_norm": 1.8126365786822831, | |
| "learning_rate": 1.4207780839776732e-06, | |
| "loss": 0.9638, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.7911547911547911, | |
| "grad_norm": 1.3634824425424106, | |
| "learning_rate": 1.4170925368914151e-06, | |
| "loss": 0.925, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.7936117936117936, | |
| "grad_norm": 1.5523412959213532, | |
| "learning_rate": 1.413400118335283e-06, | |
| "loss": 0.9681, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.7960687960687961, | |
| "grad_norm": 1.4281815228481956, | |
| "learning_rate": 1.4097008891407245e-06, | |
| "loss": 0.9469, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.7985257985257985, | |
| "grad_norm": 1.755464439866289, | |
| "learning_rate": 1.4059949102513913e-06, | |
| "loss": 0.9614, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.800982800982801, | |
| "grad_norm": 1.3755915444522364, | |
| "learning_rate": 1.4022822427221323e-06, | |
| "loss": 0.9344, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.8034398034398035, | |
| "grad_norm": 1.6217750884199984, | |
| "learning_rate": 1.3985629477179915e-06, | |
| "loss": 0.9499, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.8058968058968059, | |
| "grad_norm": 1.5965334157340827, | |
| "learning_rate": 1.3948370865131978e-06, | |
| "loss": 0.9433, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.8083538083538083, | |
| "grad_norm": 1.8371436403022903, | |
| "learning_rate": 1.3911047204901558e-06, | |
| "loss": 0.9899, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.8108108108108109, | |
| "grad_norm": 1.8839520322523513, | |
| "learning_rate": 1.387365911138436e-06, | |
| "loss": 0.9664, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.8132678132678133, | |
| "grad_norm": 1.495735998773526, | |
| "learning_rate": 1.3836207200537594e-06, | |
| "loss": 0.9784, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.8157248157248157, | |
| "grad_norm": 1.4893770976659941, | |
| "learning_rate": 1.3798692089369855e-06, | |
| "loss": 0.9046, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.8181818181818182, | |
| "grad_norm": 1.5373781226372853, | |
| "learning_rate": 1.3761114395930928e-06, | |
| "loss": 0.9461, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.8206388206388207, | |
| "grad_norm": 1.497679579290753, | |
| "learning_rate": 1.3723474739301634e-06, | |
| "loss": 0.9109, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.8230958230958231, | |
| "grad_norm": 1.6333646657141205, | |
| "learning_rate": 1.3685773739583618e-06, | |
| "loss": 0.893, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.8255528255528255, | |
| "grad_norm": 1.4444606139573457, | |
| "learning_rate": 1.3648012017889121e-06, | |
| "loss": 0.9616, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.828009828009828, | |
| "grad_norm": 1.633866194720376, | |
| "learning_rate": 1.3610190196330775e-06, | |
| "loss": 0.9316, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.8304668304668305, | |
| "grad_norm": 1.4965633625059045, | |
| "learning_rate": 1.3572308898011327e-06, | |
| "loss": 0.8995, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.8329238329238329, | |
| "grad_norm": 1.4593159509482825, | |
| "learning_rate": 1.3534368747013394e-06, | |
| "loss": 0.9416, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.8353808353808354, | |
| "grad_norm": 1.5060565758495885, | |
| "learning_rate": 1.3496370368389165e-06, | |
| "loss": 0.9385, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.8378378378378378, | |
| "grad_norm": 1.5228727270675948, | |
| "learning_rate": 1.3458314388150113e-06, | |
| "loss": 0.9249, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.8402948402948403, | |
| "grad_norm": 1.6137336621038334, | |
| "learning_rate": 1.3420201433256689e-06, | |
| "loss": 0.9549, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.8427518427518428, | |
| "grad_norm": 1.2744326108891244, | |
| "learning_rate": 1.3382032131607966e-06, | |
| "loss": 0.9275, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.8452088452088452, | |
| "grad_norm": 1.5480593545772812, | |
| "learning_rate": 1.3343807112031326e-06, | |
| "loss": 0.9342, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.8476658476658476, | |
| "grad_norm": 1.6690119423341088, | |
| "learning_rate": 1.3305527004272087e-06, | |
| "loss": 0.9375, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.8501228501228502, | |
| "grad_norm": 1.4907525884430786, | |
| "learning_rate": 1.3267192438983117e-06, | |
| "loss": 0.9183, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.8525798525798526, | |
| "grad_norm": 1.487251447150466, | |
| "learning_rate": 1.3228804047714462e-06, | |
| "loss": 0.9572, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.855036855036855, | |
| "grad_norm": 2.022254168777352, | |
| "learning_rate": 1.3190362462902936e-06, | |
| "loss": 0.9431, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.8574938574938575, | |
| "grad_norm": 1.570248437527878, | |
| "learning_rate": 1.31518683178617e-06, | |
| "loss": 0.8945, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.85995085995086, | |
| "grad_norm": 1.5017196199325449, | |
| "learning_rate": 1.3113322246769816e-06, | |
| "loss": 0.8841, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.8624078624078624, | |
| "grad_norm": 1.5003226831400922, | |
| "learning_rate": 1.3074724884661831e-06, | |
| "loss": 0.9785, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.8648648648648649, | |
| "grad_norm": 1.6237720015743136, | |
| "learning_rate": 1.3036076867417286e-06, | |
| "loss": 0.9942, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.8673218673218673, | |
| "grad_norm": 1.586236748920305, | |
| "learning_rate": 1.299737883175024e-06, | |
| "loss": 0.9036, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.8697788697788698, | |
| "grad_norm": 1.6356021082940453, | |
| "learning_rate": 1.2958631415198813e-06, | |
| "loss": 0.9723, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.8722358722358723, | |
| "grad_norm": 1.5448677133530493, | |
| "learning_rate": 1.2919835256114637e-06, | |
| "loss": 0.9391, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.8746928746928747, | |
| "grad_norm": 1.6212696290821345, | |
| "learning_rate": 1.2880990993652377e-06, | |
| "loss": 0.9202, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.8771498771498771, | |
| "grad_norm": 1.5176857112402762, | |
| "learning_rate": 1.2842099267759175e-06, | |
| "loss": 0.9037, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.8796068796068796, | |
| "grad_norm": 1.3944467856106335, | |
| "learning_rate": 1.2803160719164126e-06, | |
| "loss": 0.8854, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.8820638820638821, | |
| "grad_norm": 1.4377067419239682, | |
| "learning_rate": 1.2764175989367715e-06, | |
| "loss": 0.905, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.8845208845208845, | |
| "grad_norm": 1.6123612736739008, | |
| "learning_rate": 1.272514572063124e-06, | |
| "loss": 0.9881, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.8869778869778869, | |
| "grad_norm": 1.589984246633212, | |
| "learning_rate": 1.2686070555966253e-06, | |
| "loss": 0.9529, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.8894348894348895, | |
| "grad_norm": 1.4049932596277341, | |
| "learning_rate": 1.2646951139123933e-06, | |
| "loss": 0.8715, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.8918918918918919, | |
| "grad_norm": 1.7376495814908663, | |
| "learning_rate": 1.260778811458452e-06, | |
| "loss": 0.9551, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.8943488943488943, | |
| "grad_norm": 1.6356081133105504, | |
| "learning_rate": 1.2568582127546661e-06, | |
| "loss": 0.9357, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.8968058968058968, | |
| "grad_norm": 1.5620481027616258, | |
| "learning_rate": 1.2529333823916806e-06, | |
| "loss": 0.9724, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.8992628992628993, | |
| "grad_norm": 1.678396238284589, | |
| "learning_rate": 1.2490043850298557e-06, | |
| "loss": 0.9156, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.9017199017199017, | |
| "grad_norm": 2.354758429687385, | |
| "learning_rate": 1.2450712853982013e-06, | |
| "loss": 0.8628, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.9041769041769042, | |
| "grad_norm": 1.600899191967723, | |
| "learning_rate": 1.2411341482933108e-06, | |
| "loss": 0.9431, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.9066339066339066, | |
| "grad_norm": 1.4058440155556784, | |
| "learning_rate": 1.2371930385782943e-06, | |
| "loss": 0.9446, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.9090909090909091, | |
| "grad_norm": 1.4613065350370973, | |
| "learning_rate": 1.233248021181709e-06, | |
| "loss": 0.9164, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.9115479115479116, | |
| "grad_norm": 1.6113469899034865, | |
| "learning_rate": 1.22929916109649e-06, | |
| "loss": 0.9209, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.914004914004914, | |
| "grad_norm": 1.4540218388463704, | |
| "learning_rate": 1.2253465233788794e-06, | |
| "loss": 0.9865, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.9164619164619164, | |
| "grad_norm": 1.4002046410996662, | |
| "learning_rate": 1.2213901731473553e-06, | |
| "loss": 0.9229, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.918918918918919, | |
| "grad_norm": 1.4545211228962136, | |
| "learning_rate": 1.217430175581557e-06, | |
| "loss": 0.9401, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.9213759213759214, | |
| "grad_norm": 1.4460543335595146, | |
| "learning_rate": 1.2134665959212135e-06, | |
| "loss": 0.9552, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.9238329238329238, | |
| "grad_norm": 1.5791517432252247, | |
| "learning_rate": 1.209499499465068e-06, | |
| "loss": 0.9547, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.9262899262899262, | |
| "grad_norm": 1.4523281683604505, | |
| "learning_rate": 1.2055289515698006e-06, | |
| "loss": 0.9669, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.9287469287469288, | |
| "grad_norm": 1.8584285735562196, | |
| "learning_rate": 1.2015550176489537e-06, | |
| "loss": 0.9434, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.9312039312039312, | |
| "grad_norm": 1.5086682042761248, | |
| "learning_rate": 1.1975777631718531e-06, | |
| "loss": 0.9377, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.9336609336609336, | |
| "grad_norm": 1.7455613963553367, | |
| "learning_rate": 1.19359725366253e-06, | |
| "loss": 0.919, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.9361179361179361, | |
| "grad_norm": 1.5307030954736855, | |
| "learning_rate": 1.1896135546986409e-06, | |
| "loss": 0.9282, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.9385749385749386, | |
| "grad_norm": 1.5159714020228543, | |
| "learning_rate": 1.1856267319103876e-06, | |
| "loss": 0.9765, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.941031941031941, | |
| "grad_norm": 1.5839779729377836, | |
| "learning_rate": 1.1816368509794364e-06, | |
| "loss": 0.9793, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.9434889434889435, | |
| "grad_norm": 1.3472301054596898, | |
| "learning_rate": 1.177643977637835e-06, | |
| "loss": 0.9189, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.9459459459459459, | |
| "grad_norm": 1.5340710636652473, | |
| "learning_rate": 1.1736481776669305e-06, | |
| "loss": 0.9663, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.9484029484029484, | |
| "grad_norm": 1.4047565697947708, | |
| "learning_rate": 1.1696495168962846e-06, | |
| "loss": 0.9207, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.9508599508599509, | |
| "grad_norm": 1.6035760956637677, | |
| "learning_rate": 1.165648061202591e-06, | |
| "loss": 0.9793, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.9533169533169533, | |
| "grad_norm": 1.4948457768549497, | |
| "learning_rate": 1.1616438765085882e-06, | |
| "loss": 0.9381, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.9557739557739557, | |
| "grad_norm": 1.5241351339551144, | |
| "learning_rate": 1.1576370287819734e-06, | |
| "loss": 0.931, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.9582309582309583, | |
| "grad_norm": 1.4995817174577337, | |
| "learning_rate": 1.1536275840343183e-06, | |
| "loss": 0.9594, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.9606879606879607, | |
| "grad_norm": 1.8781230050093483, | |
| "learning_rate": 1.149615608319978e-06, | |
| "loss": 0.9536, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.9631449631449631, | |
| "grad_norm": 1.5385341714632899, | |
| "learning_rate": 1.145601167735005e-06, | |
| "loss": 0.9281, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.9656019656019657, | |
| "grad_norm": 1.6183085918500024, | |
| "learning_rate": 1.1415843284160597e-06, | |
| "loss": 0.9366, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.9680589680589681, | |
| "grad_norm": 1.5534604794185942, | |
| "learning_rate": 1.137565156539322e-06, | |
| "loss": 0.9316, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.9705159705159705, | |
| "grad_norm": 1.423811752511575, | |
| "learning_rate": 1.133543718319398e-06, | |
| "loss": 0.8927, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.972972972972973, | |
| "grad_norm": 1.4578205842089746, | |
| "learning_rate": 1.1295200800082325e-06, | |
| "loss": 0.951, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.9754299754299754, | |
| "grad_norm": 1.6386231495575438, | |
| "learning_rate": 1.125494307894016e-06, | |
| "loss": 0.8952, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.9778869778869779, | |
| "grad_norm": 1.5075389802303287, | |
| "learning_rate": 1.1214664683000924e-06, | |
| "loss": 0.8966, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.9803439803439803, | |
| "grad_norm": 1.631579827773725, | |
| "learning_rate": 1.1174366275838662e-06, | |
| "loss": 0.9411, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.9828009828009828, | |
| "grad_norm": 1.8249088799770508, | |
| "learning_rate": 1.1134048521357116e-06, | |
| "loss": 0.9232, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.9852579852579852, | |
| "grad_norm": 1.4813873308103862, | |
| "learning_rate": 1.1093712083778746e-06, | |
| "loss": 0.9625, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.9877149877149877, | |
| "grad_norm": 1.5996089579510389, | |
| "learning_rate": 1.105335762763382e-06, | |
| "loss": 0.9592, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.9901719901719902, | |
| "grad_norm": 1.4507338601430246, | |
| "learning_rate": 1.1012985817749462e-06, | |
| "loss": 0.9558, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.9926289926289926, | |
| "grad_norm": 1.452275573847242, | |
| "learning_rate": 1.097259731923869e-06, | |
| "loss": 0.9582, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.995085995085995, | |
| "grad_norm": 1.4529835839819856, | |
| "learning_rate": 1.0932192797489458e-06, | |
| "loss": 0.8952, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.9975429975429976, | |
| "grad_norm": 2.622272111272181, | |
| "learning_rate": 1.0891772918153694e-06, | |
| "loss": 0.918, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 1.5285112861748582, | |
| "learning_rate": 1.0851338347136355e-06, | |
| "loss": 0.877, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 1.0024570024570025, | |
| "grad_norm": 1.4118845101420152, | |
| "learning_rate": 1.0810889750584424e-06, | |
| "loss": 0.8647, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 1.0049140049140048, | |
| "grad_norm": 1.6747623259684568, | |
| "learning_rate": 1.0770427794875948e-06, | |
| "loss": 0.9671, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 1.0073710073710074, | |
| "grad_norm": 1.307864675513283, | |
| "learning_rate": 1.0729953146609075e-06, | |
| "loss": 0.8296, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.00982800982801, | |
| "grad_norm": 1.4553500313995769, | |
| "learning_rate": 1.0689466472591048e-06, | |
| "loss": 0.937, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 1.0122850122850122, | |
| "grad_norm": 1.4315419855901317, | |
| "learning_rate": 1.0648968439827239e-06, | |
| "loss": 0.9042, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 1.0147420147420148, | |
| "grad_norm": 1.5739844573571937, | |
| "learning_rate": 1.0608459715510139e-06, | |
| "loss": 0.9011, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 1.0171990171990173, | |
| "grad_norm": 1.437687738713176, | |
| "learning_rate": 1.0567940967008395e-06, | |
| "loss": 0.9363, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 1.0196560196560196, | |
| "grad_norm": 1.534448861262034, | |
| "learning_rate": 1.052741286185579e-06, | |
| "loss": 0.9539, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 1.0221130221130221, | |
| "grad_norm": 1.644806928510893, | |
| "learning_rate": 1.0486876067740252e-06, | |
| "loss": 0.9802, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 1.0245700245700247, | |
| "grad_norm": 1.5874571507432869, | |
| "learning_rate": 1.0446331252492864e-06, | |
| "loss": 0.8751, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 1.027027027027027, | |
| "grad_norm": 1.5244813435420985, | |
| "learning_rate": 1.0405779084076856e-06, | |
| "loss": 0.8844, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 1.0294840294840295, | |
| "grad_norm": 1.5721376461339462, | |
| "learning_rate": 1.036522023057659e-06, | |
| "loss": 0.9593, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 1.031941031941032, | |
| "grad_norm": 3.006639344932307, | |
| "learning_rate": 1.0324655360186567e-06, | |
| "loss": 0.9055, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.0343980343980343, | |
| "grad_norm": 1.3782225003410025, | |
| "learning_rate": 1.0284085141200423e-06, | |
| "loss": 0.9183, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 1.0368550368550369, | |
| "grad_norm": 1.376873801999615, | |
| "learning_rate": 1.0243510241999897e-06, | |
| "loss": 0.8767, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 1.0393120393120394, | |
| "grad_norm": 1.394373200963149, | |
| "learning_rate": 1.0202931331043839e-06, | |
| "loss": 0.8821, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 1.0417690417690417, | |
| "grad_norm": 1.571320978169866, | |
| "learning_rate": 1.0162349076857191e-06, | |
| "loss": 0.8959, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 1.0442260442260443, | |
| "grad_norm": 1.5336886338120022, | |
| "learning_rate": 1.0121764148019975e-06, | |
| "loss": 0.9375, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 1.0466830466830466, | |
| "grad_norm": 1.4693963607748404, | |
| "learning_rate": 1.0081177213156277e-06, | |
| "loss": 0.8909, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 1.049140049140049, | |
| "grad_norm": 1.5830327984938106, | |
| "learning_rate": 1.004058894092323e-06, | |
| "loss": 0.9207, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 1.0515970515970516, | |
| "grad_norm": 1.5170501490428927, | |
| "learning_rate": 1e-06, | |
| "loss": 0.9036, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 1.054054054054054, | |
| "grad_norm": 1.5692066826348359, | |
| "learning_rate": 9.95941105907677e-07, | |
| "loss": 0.9193, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 1.0565110565110565, | |
| "grad_norm": 1.4357790543992526, | |
| "learning_rate": 9.918822786843724e-07, | |
| "loss": 0.9124, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.058968058968059, | |
| "grad_norm": 1.423440533928829, | |
| "learning_rate": 9.878235851980024e-07, | |
| "loss": 0.9146, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 1.0614250614250613, | |
| "grad_norm": 1.590468077725602, | |
| "learning_rate": 9.837650923142808e-07, | |
| "loss": 0.8705, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 1.0638820638820639, | |
| "grad_norm": 1.461231262115798, | |
| "learning_rate": 9.797068668956163e-07, | |
| "loss": 0.9438, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 1.0663390663390664, | |
| "grad_norm": 1.42661968764142, | |
| "learning_rate": 9.756489758000105e-07, | |
| "loss": 0.8747, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 1.0687960687960687, | |
| "grad_norm": 1.4657641576186988, | |
| "learning_rate": 9.715914858799576e-07, | |
| "loss": 0.8939, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 1.0712530712530712, | |
| "grad_norm": 1.5022324634860065, | |
| "learning_rate": 9.675344639813434e-07, | |
| "loss": 0.922, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 1.0737100737100738, | |
| "grad_norm": 1.424039088022573, | |
| "learning_rate": 9.63477976942341e-07, | |
| "loss": 0.953, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 1.076167076167076, | |
| "grad_norm": 1.5965077678963564, | |
| "learning_rate": 9.594220915923147e-07, | |
| "loss": 0.9443, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 1.0786240786240786, | |
| "grad_norm": 1.6371319904729476, | |
| "learning_rate": 9.553668747507138e-07, | |
| "loss": 0.9228, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 1.0810810810810811, | |
| "grad_norm": 1.8309314739878833, | |
| "learning_rate": 9.51312393225975e-07, | |
| "loss": 0.8753, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.0835380835380835, | |
| "grad_norm": 1.39319457517673, | |
| "learning_rate": 9.472587138144215e-07, | |
| "loss": 0.8939, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 1.085995085995086, | |
| "grad_norm": 1.5070197645260877, | |
| "learning_rate": 9.432059032991607e-07, | |
| "loss": 0.9123, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 1.0884520884520885, | |
| "grad_norm": 1.3753531136081873, | |
| "learning_rate": 9.391540284489861e-07, | |
| "loss": 0.9328, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 1.0909090909090908, | |
| "grad_norm": 1.4036108581529319, | |
| "learning_rate": 9.351031560172764e-07, | |
| "loss": 0.9138, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 1.0933660933660934, | |
| "grad_norm": 1.460079134056615, | |
| "learning_rate": 9.310533527408951e-07, | |
| "loss": 0.8736, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 1.095823095823096, | |
| "grad_norm": 2.1567900139463663, | |
| "learning_rate": 9.270046853390924e-07, | |
| "loss": 0.9649, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 1.0982800982800982, | |
| "grad_norm": 1.5086116320926348, | |
| "learning_rate": 9.229572205124051e-07, | |
| "loss": 0.8839, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 1.1007371007371007, | |
| "grad_norm": 1.4696338643312472, | |
| "learning_rate": 9.189110249415576e-07, | |
| "loss": 0.8971, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 1.1031941031941033, | |
| "grad_norm": 1.5799491675072612, | |
| "learning_rate": 9.148661652863642e-07, | |
| "loss": 0.8839, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 1.1056511056511056, | |
| "grad_norm": 1.633966629237029, | |
| "learning_rate": 9.108227081846304e-07, | |
| "loss": 0.9053, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.1081081081081081, | |
| "grad_norm": 1.5227318714591582, | |
| "learning_rate": 9.067807202510542e-07, | |
| "loss": 0.8734, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 1.1105651105651106, | |
| "grad_norm": 1.6390683121943366, | |
| "learning_rate": 9.027402680761308e-07, | |
| "loss": 0.8389, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 1.113022113022113, | |
| "grad_norm": 1.8480120093100039, | |
| "learning_rate": 8.987014182250537e-07, | |
| "loss": 0.8977, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 1.1154791154791155, | |
| "grad_norm": 1.6432113495263132, | |
| "learning_rate": 8.94664237236618e-07, | |
| "loss": 0.8924, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 1.117936117936118, | |
| "grad_norm": 1.7383339935167776, | |
| "learning_rate": 8.906287916221258e-07, | |
| "loss": 0.9181, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 1.1203931203931203, | |
| "grad_norm": 1.5888604077241295, | |
| "learning_rate": 8.865951478642886e-07, | |
| "loss": 0.9276, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 1.1228501228501229, | |
| "grad_norm": 1.4607976669981695, | |
| "learning_rate": 8.825633724161335e-07, | |
| "loss": 0.8802, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 1.1253071253071254, | |
| "grad_norm": 1.1981728862199128, | |
| "learning_rate": 8.785335316999078e-07, | |
| "loss": 0.9242, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 1.1277641277641277, | |
| "grad_norm": 1.5537897318688183, | |
| "learning_rate": 8.745056921059839e-07, | |
| "loss": 0.9001, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 1.1302211302211302, | |
| "grad_norm": 1.475963647033354, | |
| "learning_rate": 8.704799199917672e-07, | |
| "loss": 0.8775, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.1326781326781328, | |
| "grad_norm": 1.3982302407588918, | |
| "learning_rate": 8.664562816806021e-07, | |
| "loss": 0.8828, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 1.135135135135135, | |
| "grad_norm": 1.4892642936338247, | |
| "learning_rate": 8.624348434606781e-07, | |
| "loss": 0.9042, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 1.1375921375921376, | |
| "grad_norm": 1.8141339480004706, | |
| "learning_rate": 8.584156715839401e-07, | |
| "loss": 0.8748, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 1.1400491400491402, | |
| "grad_norm": 1.641438804529928, | |
| "learning_rate": 8.543988322649953e-07, | |
| "loss": 0.9006, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 1.1425061425061425, | |
| "grad_norm": 1.5215620001272283, | |
| "learning_rate": 8.503843916800219e-07, | |
| "loss": 0.9418, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 1.144963144963145, | |
| "grad_norm": 1.5095379213125475, | |
| "learning_rate": 8.463724159656815e-07, | |
| "loss": 0.9072, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 1.1474201474201475, | |
| "grad_norm": 1.4101920682297036, | |
| "learning_rate": 8.423629712180265e-07, | |
| "loss": 0.8929, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 1.1498771498771498, | |
| "grad_norm": 1.5017183949322925, | |
| "learning_rate": 8.38356123491412e-07, | |
| "loss": 0.9246, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 1.1523341523341524, | |
| "grad_norm": 1.4487713518305103, | |
| "learning_rate": 8.34351938797409e-07, | |
| "loss": 0.9978, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 1.154791154791155, | |
| "grad_norm": 1.9019229859778328, | |
| "learning_rate": 8.303504831037153e-07, | |
| "loss": 0.8963, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.1572481572481572, | |
| "grad_norm": 1.5553306743565323, | |
| "learning_rate": 8.263518223330696e-07, | |
| "loss": 0.9076, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 1.1597051597051597, | |
| "grad_norm": 1.528301521498108, | |
| "learning_rate": 8.22356022362165e-07, | |
| "loss": 0.9005, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 1.1621621621621623, | |
| "grad_norm": 1.4246755498283765, | |
| "learning_rate": 8.183631490205635e-07, | |
| "loss": 0.8751, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 1.1646191646191646, | |
| "grad_norm": 1.4837658920113785, | |
| "learning_rate": 8.143732680896123e-07, | |
| "loss": 0.8911, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 1.1670761670761671, | |
| "grad_norm": 1.3838573607006248, | |
| "learning_rate": 8.103864453013592e-07, | |
| "loss": 0.8752, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 1.1695331695331694, | |
| "grad_norm": 1.3956886722399857, | |
| "learning_rate": 8.0640274633747e-07, | |
| "loss": 0.907, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 1.171990171990172, | |
| "grad_norm": 1.4606205140333972, | |
| "learning_rate": 8.024222368281468e-07, | |
| "loss": 0.964, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 1.1744471744471745, | |
| "grad_norm": 1.438679071763127, | |
| "learning_rate": 7.984449823510466e-07, | |
| "loss": 0.8499, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 1.1769041769041768, | |
| "grad_norm": 1.6564632287349166, | |
| "learning_rate": 7.944710484301994e-07, | |
| "loss": 0.892, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 1.1793611793611793, | |
| "grad_norm": 1.5772645911948133, | |
| "learning_rate": 7.905005005349319e-07, | |
| "loss": 0.8451, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.1818181818181819, | |
| "grad_norm": 1.5408140168729385, | |
| "learning_rate": 7.865334040787865e-07, | |
| "loss": 0.9295, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 1.1842751842751842, | |
| "grad_norm": 1.5386445563903186, | |
| "learning_rate": 7.825698244184431e-07, | |
| "loss": 0.9029, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 1.1867321867321867, | |
| "grad_norm": 1.3266763848904035, | |
| "learning_rate": 7.786098268526447e-07, | |
| "loss": 0.8801, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 1.1891891891891893, | |
| "grad_norm": 1.7962161638699905, | |
| "learning_rate": 7.746534766211206e-07, | |
| "loss": 0.8578, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 1.1916461916461916, | |
| "grad_norm": 1.477358511769343, | |
| "learning_rate": 7.7070083890351e-07, | |
| "loss": 0.9048, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 1.194103194103194, | |
| "grad_norm": 1.4147723236609893, | |
| "learning_rate": 7.667519788182911e-07, | |
| "loss": 0.8898, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 1.1965601965601966, | |
| "grad_norm": 1.55983989969961, | |
| "learning_rate": 7.628069614217057e-07, | |
| "loss": 0.9509, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 1.199017199017199, | |
| "grad_norm": 1.4383185383531423, | |
| "learning_rate": 7.588658517066892e-07, | |
| "loss": 0.9418, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 1.2014742014742015, | |
| "grad_norm": 1.7624942370774765, | |
| "learning_rate": 7.549287146017988e-07, | |
| "loss": 0.8313, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 1.203931203931204, | |
| "grad_norm": 1.67001710532923, | |
| "learning_rate": 7.509956149701443e-07, | |
| "loss": 0.8549, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.2063882063882063, | |
| "grad_norm": 1.6268288582039303, | |
| "learning_rate": 7.470666176083191e-07, | |
| "loss": 0.8649, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 1.2088452088452089, | |
| "grad_norm": 1.4853773676445685, | |
| "learning_rate": 7.431417872453339e-07, | |
| "loss": 0.8956, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 1.2113022113022114, | |
| "grad_norm": 1.548578088799907, | |
| "learning_rate": 7.39221188541548e-07, | |
| "loss": 0.923, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 1.2137592137592137, | |
| "grad_norm": 1.710249233454194, | |
| "learning_rate": 7.353048860876063e-07, | |
| "loss": 0.9336, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 1.2162162162162162, | |
| "grad_norm": 1.5916044293544283, | |
| "learning_rate": 7.31392944403375e-07, | |
| "loss": 0.9304, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 1.2186732186732188, | |
| "grad_norm": 1.5308549158931122, | |
| "learning_rate": 7.274854279368758e-07, | |
| "loss": 0.9349, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 1.221130221130221, | |
| "grad_norm": 1.5229178156260872, | |
| "learning_rate": 7.235824010632283e-07, | |
| "loss": 0.9153, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 1.2235872235872236, | |
| "grad_norm": 1.462343882146908, | |
| "learning_rate": 7.196839280835875e-07, | |
| "loss": 0.9376, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 1.2260442260442261, | |
| "grad_norm": 1.5273674514727056, | |
| "learning_rate": 7.157900732240827e-07, | |
| "loss": 0.9231, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 1.2285012285012284, | |
| "grad_norm": 1.396230424270659, | |
| "learning_rate": 7.119009006347624e-07, | |
| "loss": 0.8868, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.230958230958231, | |
| "grad_norm": 1.6364859015395088, | |
| "learning_rate": 7.080164743885363e-07, | |
| "loss": 0.9287, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 1.2334152334152333, | |
| "grad_norm": 1.5048783187743708, | |
| "learning_rate": 7.041368584801186e-07, | |
| "loss": 0.932, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 1.2358722358722358, | |
| "grad_norm": 1.5929336784240413, | |
| "learning_rate": 7.002621168249758e-07, | |
| "loss": 0.9302, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 1.2383292383292384, | |
| "grad_norm": 1.5383384042486663, | |
| "learning_rate": 6.963923132582715e-07, | |
| "loss": 0.9059, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 1.2407862407862407, | |
| "grad_norm": 1.6790422515254697, | |
| "learning_rate": 6.925275115338167e-07, | |
| "loss": 0.8862, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 1.2432432432432432, | |
| "grad_norm": 1.4423572903428208, | |
| "learning_rate": 6.886677753230183e-07, | |
| "loss": 0.9004, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 1.2457002457002457, | |
| "grad_norm": 1.5146851789864428, | |
| "learning_rate": 6.848131682138303e-07, | |
| "loss": 0.8873, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 1.248157248157248, | |
| "grad_norm": 1.7060116939979457, | |
| "learning_rate": 6.809637537097062e-07, | |
| "loss": 0.9047, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 1.2506142506142506, | |
| "grad_norm": 1.3733517118208705, | |
| "learning_rate": 6.77119595228554e-07, | |
| "loss": 0.8742, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 1.253071253071253, | |
| "grad_norm": 1.4283683639472458, | |
| "learning_rate": 6.732807561016885e-07, | |
| "loss": 0.8697, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.2555282555282554, | |
| "grad_norm": 1.4832890039194944, | |
| "learning_rate": 6.694472995727914e-07, | |
| "loss": 0.925, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 1.257985257985258, | |
| "grad_norm": 1.4760580473983451, | |
| "learning_rate": 6.656192887968674e-07, | |
| "loss": 0.8959, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 1.2604422604422605, | |
| "grad_norm": 1.4517094927145753, | |
| "learning_rate": 6.617967868392036e-07, | |
| "loss": 0.8935, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 1.2628992628992628, | |
| "grad_norm": 1.5125319950260128, | |
| "learning_rate": 6.579798566743313e-07, | |
| "loss": 0.8556, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 1.2653562653562653, | |
| "grad_norm": 1.4510038145639286, | |
| "learning_rate": 6.541685611849886e-07, | |
| "loss": 0.9068, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 1.2678132678132679, | |
| "grad_norm": 1.786431321409276, | |
| "learning_rate": 6.503629631610836e-07, | |
| "loss": 0.8951, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 1.2702702702702702, | |
| "grad_norm": 4.646846867306252, | |
| "learning_rate": 6.465631252986608e-07, | |
| "loss": 0.8717, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 1.2727272727272727, | |
| "grad_norm": 1.6325223465886023, | |
| "learning_rate": 6.427691101988672e-07, | |
| "loss": 0.9277, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 1.2751842751842752, | |
| "grad_norm": 1.46996914036538, | |
| "learning_rate": 6.389809803669225e-07, | |
| "loss": 0.8614, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 1.2776412776412776, | |
| "grad_norm": 1.4901572767377598, | |
| "learning_rate": 6.351987982110879e-07, | |
| "loss": 0.8791, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.28009828009828, | |
| "grad_norm": 1.3956563694145843, | |
| "learning_rate": 6.314226260416383e-07, | |
| "loss": 0.8682, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 1.2825552825552826, | |
| "grad_norm": 1.527881875257898, | |
| "learning_rate": 6.276525260698363e-07, | |
| "loss": 0.9153, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 1.285012285012285, | |
| "grad_norm": 1.5192904446038689, | |
| "learning_rate": 6.238885604069074e-07, | |
| "loss": 0.9263, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 1.2874692874692875, | |
| "grad_norm": 1.5953405324354368, | |
| "learning_rate": 6.201307910630145e-07, | |
| "loss": 0.8788, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 1.28992628992629, | |
| "grad_norm": 1.4403593350780215, | |
| "learning_rate": 6.163792799462402e-07, | |
| "loss": 0.9414, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 1.2923832923832923, | |
| "grad_norm": 1.4506987352971705, | |
| "learning_rate": 6.126340888615642e-07, | |
| "loss": 0.9307, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 1.2948402948402948, | |
| "grad_norm": 1.610182690312099, | |
| "learning_rate": 6.088952795098441e-07, | |
| "loss": 0.9168, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 1.2972972972972974, | |
| "grad_norm": 1.5978713160514366, | |
| "learning_rate": 6.051629134868019e-07, | |
| "loss": 0.8393, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 1.2997542997542997, | |
| "grad_norm": 1.3547671161607415, | |
| "learning_rate": 6.014370522820085e-07, | |
| "loss": 0.8972, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 1.3022113022113022, | |
| "grad_norm": 1.3090769200596557, | |
| "learning_rate": 5.977177572778677e-07, | |
| "loss": 0.8823, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.3046683046683047, | |
| "grad_norm": 1.4761807072270576, | |
| "learning_rate": 5.940050897486089e-07, | |
| "loss": 0.9733, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 1.307125307125307, | |
| "grad_norm": 1.5243972285729563, | |
| "learning_rate": 5.902991108592754e-07, | |
| "loss": 0.8859, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 1.3095823095823096, | |
| "grad_norm": 1.764985461344906, | |
| "learning_rate": 5.865998816647171e-07, | |
| "loss": 0.8868, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 1.3120393120393121, | |
| "grad_norm": 1.6317180984567103, | |
| "learning_rate": 5.829074631085852e-07, | |
| "loss": 0.9361, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 1.3144963144963144, | |
| "grad_norm": 1.4981668702031359, | |
| "learning_rate": 5.792219160223267e-07, | |
| "loss": 0.8472, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 1.316953316953317, | |
| "grad_norm": 2.637743576229916, | |
| "learning_rate": 5.755433011241851e-07, | |
| "loss": 0.8736, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 1.3194103194103195, | |
| "grad_norm": 1.3474590182260624, | |
| "learning_rate": 5.718716790181965e-07, | |
| "loss": 0.8794, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 1.3218673218673218, | |
| "grad_norm": 1.42787784398219, | |
| "learning_rate": 5.682071101931936e-07, | |
| "loss": 0.8738, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 1.3243243243243243, | |
| "grad_norm": 1.6067570011968997, | |
| "learning_rate": 5.645496550218089e-07, | |
| "loss": 0.8879, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 1.3267813267813269, | |
| "grad_norm": 1.5914444312640619, | |
| "learning_rate": 5.60899373759479e-07, | |
| "loss": 0.8754, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.3292383292383292, | |
| "grad_norm": 1.4758896244599573, | |
| "learning_rate": 5.572563265434526e-07, | |
| "loss": 0.9307, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 1.3316953316953317, | |
| "grad_norm": 1.2626534960911335, | |
| "learning_rate": 5.536205733918007e-07, | |
| "loss": 0.843, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 1.3341523341523343, | |
| "grad_norm": 1.701842268857865, | |
| "learning_rate": 5.499921742024257e-07, | |
| "loss": 0.9247, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 1.3366093366093366, | |
| "grad_norm": 1.4205425380782735, | |
| "learning_rate": 5.463711887520759e-07, | |
| "loss": 0.8628, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 1.339066339066339, | |
| "grad_norm": 1.9140145931124193, | |
| "learning_rate": 5.427576766953614e-07, | |
| "loss": 0.9073, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 1.3415233415233416, | |
| "grad_norm": 1.7824661674136197, | |
| "learning_rate": 5.391516975637699e-07, | |
| "loss": 0.9133, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 1.343980343980344, | |
| "grad_norm": 1.5617612083029389, | |
| "learning_rate": 5.355533107646858e-07, | |
| "loss": 0.9531, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 1.3464373464373465, | |
| "grad_norm": 1.5245643140200376, | |
| "learning_rate": 5.319625755804138e-07, | |
| "loss": 0.8915, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 1.348894348894349, | |
| "grad_norm": 1.7286911090374188, | |
| "learning_rate": 5.283795511671994e-07, | |
| "loss": 0.863, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 1.3513513513513513, | |
| "grad_norm": 1.6181660068040473, | |
| "learning_rate": 5.248042965542559e-07, | |
| "loss": 0.8852, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.3538083538083538, | |
| "grad_norm": 1.5075998723303305, | |
| "learning_rate": 5.212368706427912e-07, | |
| "loss": 0.8965, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 1.3562653562653564, | |
| "grad_norm": 1.5100541752180852, | |
| "learning_rate": 5.176773322050381e-07, | |
| "loss": 0.8829, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 1.3587223587223587, | |
| "grad_norm": 1.863955084942053, | |
| "learning_rate": 5.141257398832862e-07, | |
| "loss": 0.9475, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 1.3611793611793612, | |
| "grad_norm": 1.314918195852255, | |
| "learning_rate": 5.105821521889147e-07, | |
| "loss": 0.8643, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 1.3636363636363638, | |
| "grad_norm": 1.8350990255330366, | |
| "learning_rate": 5.070466275014287e-07, | |
| "loss": 0.9441, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 1.366093366093366, | |
| "grad_norm": 1.5322707436835765, | |
| "learning_rate": 5.03519224067499e-07, | |
| "loss": 0.9033, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 1.3685503685503686, | |
| "grad_norm": 1.5974243335350318, | |
| "learning_rate": 5.000000000000002e-07, | |
| "loss": 0.8914, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 1.3710073710073711, | |
| "grad_norm": 1.4435566226659817, | |
| "learning_rate": 4.964890132770543e-07, | |
| "loss": 0.878, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 1.3734643734643734, | |
| "grad_norm": 1.8477167694746348, | |
| "learning_rate": 4.929863217410767e-07, | |
| "loss": 0.8909, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 1.375921375921376, | |
| "grad_norm": 1.4801649083461632, | |
| "learning_rate": 4.894919830978212e-07, | |
| "loss": 0.9079, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.3783783783783785, | |
| "grad_norm": 1.6811015996059142, | |
| "learning_rate": 4.860060549154301e-07, | |
| "loss": 0.815, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 1.3808353808353808, | |
| "grad_norm": 2.3145109947990155, | |
| "learning_rate": 4.825285946234874e-07, | |
| "loss": 0.9305, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 1.3832923832923834, | |
| "grad_norm": 1.4759252526730806, | |
| "learning_rate": 4.790596595120698e-07, | |
| "loss": 0.8589, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 1.3857493857493859, | |
| "grad_norm": 1.3378195425536465, | |
| "learning_rate": 4.7559930673080475e-07, | |
| "loss": 0.8659, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 1.3882063882063882, | |
| "grad_norm": 1.6393333264623262, | |
| "learning_rate": 4.721475932879282e-07, | |
| "loss": 0.8618, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 1.3906633906633907, | |
| "grad_norm": 1.6916138016252256, | |
| "learning_rate": 4.6870457604934675e-07, | |
| "loss": 0.9028, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 1.393120393120393, | |
| "grad_norm": 1.4225654726940884, | |
| "learning_rate": 4.6527031173769857e-07, | |
| "loss": 0.8765, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 1.3955773955773956, | |
| "grad_norm": 1.342379943145193, | |
| "learning_rate": 4.618448569314206e-07, | |
| "loss": 0.8855, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 1.398034398034398, | |
| "grad_norm": 2.442614220259753, | |
| "learning_rate": 4.584282680638154e-07, | |
| "loss": 0.8804, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 1.4004914004914004, | |
| "grad_norm": 1.5267874267053692, | |
| "learning_rate": 4.550206014221232e-07, | |
| "loss": 0.9668, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.402948402948403, | |
| "grad_norm": 1.8692131154426705, | |
| "learning_rate": 4.5162191314659194e-07, | |
| "loss": 0.8979, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 1.4054054054054055, | |
| "grad_norm": 2.022804110507779, | |
| "learning_rate": 4.48232259229554e-07, | |
| "loss": 0.8396, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 1.4078624078624078, | |
| "grad_norm": 1.444700494646088, | |
| "learning_rate": 4.448516955145047e-07, | |
| "loss": 0.8798, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 1.4103194103194103, | |
| "grad_norm": 1.5147425793812193, | |
| "learning_rate": 4.414802776951798e-07, | |
| "loss": 0.884, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 1.4127764127764126, | |
| "grad_norm": 1.4905275337775719, | |
| "learning_rate": 4.3811806131463946e-07, | |
| "loss": 0.8755, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 1.4152334152334152, | |
| "grad_norm": 1.5203731499114503, | |
| "learning_rate": 4.347651017643539e-07, | |
| "loss": 0.9089, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 1.4176904176904177, | |
| "grad_norm": 1.4792627181832774, | |
| "learning_rate": 4.314214542832888e-07, | |
| "loss": 0.8995, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 1.42014742014742, | |
| "grad_norm": 1.498952396682308, | |
| "learning_rate": 4.280871739569971e-07, | |
| "loss": 0.9, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 1.4226044226044225, | |
| "grad_norm": 1.5993242263021854, | |
| "learning_rate": 4.247623157167102e-07, | |
| "loss": 0.9004, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 1.425061425061425, | |
| "grad_norm": 1.2909684524904117, | |
| "learning_rate": 4.214469343384346e-07, | |
| "loss": 0.9442, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.4275184275184274, | |
| "grad_norm": 1.6601643291459118, | |
| "learning_rate": 4.181410844420473e-07, | |
| "loss": 0.8924, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 1.42997542997543, | |
| "grad_norm": 1.5456219866657583, | |
| "learning_rate": 4.148448204903977e-07, | |
| "loss": 0.8785, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 1.4324324324324325, | |
| "grad_norm": 2.3413636182110906, | |
| "learning_rate": 4.115581967884093e-07, | |
| "loss": 0.8867, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 1.4348894348894348, | |
| "grad_norm": 1.600188568539889, | |
| "learning_rate": 4.082812674821865e-07, | |
| "loss": 0.9163, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 1.4373464373464373, | |
| "grad_norm": 1.775885867839352, | |
| "learning_rate": 4.0501408655812044e-07, | |
| "loss": 0.94, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 1.4398034398034398, | |
| "grad_norm": 1.4853461496779028, | |
| "learning_rate": 4.0175670784200066e-07, | |
| "loss": 0.8828, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 1.4422604422604421, | |
| "grad_norm": 1.3437874918142632, | |
| "learning_rate": 3.985091849981297e-07, | |
| "loss": 0.906, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 1.4447174447174447, | |
| "grad_norm": 1.8022847542011342, | |
| "learning_rate": 3.952715715284363e-07, | |
| "loss": 0.9078, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 1.4471744471744472, | |
| "grad_norm": 1.4544558952772775, | |
| "learning_rate": 3.9204392077159544e-07, | |
| "loss": 0.9007, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 1.4496314496314495, | |
| "grad_norm": 1.5362982163739145, | |
| "learning_rate": 3.888262859021507e-07, | |
| "loss": 0.9329, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.452088452088452, | |
| "grad_norm": 1.35776640743268, | |
| "learning_rate": 3.856187199296358e-07, | |
| "loss": 0.9142, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 1.4545454545454546, | |
| "grad_norm": 1.552024766985016, | |
| "learning_rate": 3.8242127569770265e-07, | |
| "loss": 0.8624, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 1.457002457002457, | |
| "grad_norm": 1.6837813084688251, | |
| "learning_rate": 3.792340058832515e-07, | |
| "loss": 0.8515, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 1.4594594594594594, | |
| "grad_norm": 1.6200568897908438, | |
| "learning_rate": 3.760569629955613e-07, | |
| "loss": 0.9182, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 1.461916461916462, | |
| "grad_norm": 1.3613611600691327, | |
| "learning_rate": 3.72890199375426e-07, | |
| "loss": 0.9212, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 1.4643734643734643, | |
| "grad_norm": 1.5082364375002426, | |
| "learning_rate": 3.697337671942913e-07, | |
| "loss": 0.9084, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 1.4668304668304668, | |
| "grad_norm": 1.6876950054566262, | |
| "learning_rate": 3.6658771845339676e-07, | |
| "loss": 0.9065, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 1.4692874692874693, | |
| "grad_norm": 1.5733845918989662, | |
| "learning_rate": 3.634521049829169e-07, | |
| "loss": 0.8815, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 1.4717444717444716, | |
| "grad_norm": 1.8147232098247035, | |
| "learning_rate": 3.603269784411089e-07, | |
| "loss": 0.9133, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 1.4742014742014742, | |
| "grad_norm": 1.4595323005431404, | |
| "learning_rate": 3.5721239031346063e-07, | |
| "loss": 0.8071, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.4766584766584767, | |
| "grad_norm": 2.769727304993137, | |
| "learning_rate": 3.541083919118438e-07, | |
| "loss": 0.9423, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 1.479115479115479, | |
| "grad_norm": 1.610693658075675, | |
| "learning_rate": 3.5101503437366675e-07, | |
| "loss": 0.8946, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 1.4815724815724816, | |
| "grad_norm": 1.6365761390735136, | |
| "learning_rate": 3.479323686610329e-07, | |
| "loss": 0.9079, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 1.484029484029484, | |
| "grad_norm": 1.5083166362769518, | |
| "learning_rate": 3.448604455599021e-07, | |
| "loss": 0.8975, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 1.4864864864864864, | |
| "grad_norm": 1.60995686456512, | |
| "learning_rate": 3.4179931567925214e-07, | |
| "loss": 0.9498, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 1.488943488943489, | |
| "grad_norm": 1.7185280239304535, | |
| "learning_rate": 3.387490294502456e-07, | |
| "loss": 0.906, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 1.4914004914004915, | |
| "grad_norm": 1.321610762107921, | |
| "learning_rate": 3.357096371254008e-07, | |
| "loss": 0.8499, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 1.4938574938574938, | |
| "grad_norm": 1.2808013142695502, | |
| "learning_rate": 3.3268118877776064e-07, | |
| "loss": 0.8546, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 1.4963144963144963, | |
| "grad_norm": 1.3917333892709016, | |
| "learning_rate": 3.296637343000704e-07, | |
| "loss": 0.8776, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 1.4987714987714988, | |
| "grad_norm": 1.9760211104870224, | |
| "learning_rate": 3.266573234039541e-07, | |
| "loss": 0.8829, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.5012285012285012, | |
| "grad_norm": 1.9349300228421435, | |
| "learning_rate": 3.2366200561909717e-07, | |
| "loss": 0.9721, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 1.5036855036855037, | |
| "grad_norm": 1.5825092236415208, | |
| "learning_rate": 3.2067783029242865e-07, | |
| "loss": 0.8569, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 1.5061425061425062, | |
| "grad_norm": 1.7071421937295232, | |
| "learning_rate": 3.1770484658730887e-07, | |
| "loss": 0.9586, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 1.5085995085995085, | |
| "grad_norm": 1.3114653023022718, | |
| "learning_rate": 3.147431034827208e-07, | |
| "loss": 0.8879, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 1.511056511056511, | |
| "grad_norm": 1.3660758453242297, | |
| "learning_rate": 3.11792649772461e-07, | |
| "loss": 0.9227, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 1.5135135135135136, | |
| "grad_norm": 1.5443322628991898, | |
| "learning_rate": 3.08853534064337e-07, | |
| "loss": 0.8756, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 1.515970515970516, | |
| "grad_norm": 1.6205632640066696, | |
| "learning_rate": 3.0592580477936604e-07, | |
| "loss": 0.8631, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 1.5184275184275184, | |
| "grad_norm": 1.6750050731142352, | |
| "learning_rate": 3.0300951015097863e-07, | |
| "loss": 0.9308, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 1.520884520884521, | |
| "grad_norm": 1.7541208454820925, | |
| "learning_rate": 3.001046982242216e-07, | |
| "loss": 0.9097, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 1.5233415233415233, | |
| "grad_norm": 1.9317379819011606, | |
| "learning_rate": 2.972114168549682e-07, | |
| "loss": 0.9212, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.5257985257985258, | |
| "grad_norm": 1.2650696951039682, | |
| "learning_rate": 2.9432971370913e-07, | |
| "loss": 0.8836, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 1.5282555282555284, | |
| "grad_norm": 1.4960196905274168, | |
| "learning_rate": 2.914596362618701e-07, | |
| "loss": 0.8946, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 1.5307125307125307, | |
| "grad_norm": 1.507394873293028, | |
| "learning_rate": 2.8860123179682236e-07, | |
| "loss": 0.9141, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 1.5331695331695332, | |
| "grad_norm": 1.3335466599194608, | |
| "learning_rate": 2.8575454740531115e-07, | |
| "loss": 0.8667, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 1.5356265356265357, | |
| "grad_norm": 1.522699880941422, | |
| "learning_rate": 2.829196299855775e-07, | |
| "loss": 0.9901, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 1.538083538083538, | |
| "grad_norm": 1.4978237170833106, | |
| "learning_rate": 2.800965262420043e-07, | |
| "loss": 0.8367, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 1.5405405405405406, | |
| "grad_norm": 1.386657482706535, | |
| "learning_rate": 2.7728528268434757e-07, | |
| "loss": 0.8411, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 1.542997542997543, | |
| "grad_norm": 1.6884446408530738, | |
| "learning_rate": 2.744859456269718e-07, | |
| "loss": 0.942, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 1.5454545454545454, | |
| "grad_norm": 1.5125614736127029, | |
| "learning_rate": 2.716985611880841e-07, | |
| "loss": 0.8738, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 1.547911547911548, | |
| "grad_norm": 1.469704160523891, | |
| "learning_rate": 2.689231752889768e-07, | |
| "loss": 0.9165, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.5503685503685505, | |
| "grad_norm": 1.4870015368902652, | |
| "learning_rate": 2.661598336532692e-07, | |
| "loss": 0.8944, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 1.5528255528255528, | |
| "grad_norm": 1.4322815674497984, | |
| "learning_rate": 2.6340858180615645e-07, | |
| "loss": 0.8968, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 1.5552825552825553, | |
| "grad_norm": 1.3623692296259504, | |
| "learning_rate": 2.6066946507365696e-07, | |
| "loss": 0.8935, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 1.5577395577395579, | |
| "grad_norm": 1.484336106115407, | |
| "learning_rate": 2.5794252858186706e-07, | |
| "loss": 0.8993, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 1.5601965601965602, | |
| "grad_norm": 1.4403746721888513, | |
| "learning_rate": 2.552278172562181e-07, | |
| "loss": 0.8937, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 1.5626535626535627, | |
| "grad_norm": 1.8086294010381407, | |
| "learning_rate": 2.525253758207353e-07, | |
| "loss": 0.939, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 1.5651105651105652, | |
| "grad_norm": 1.4766926006894956, | |
| "learning_rate": 2.4983524879730066e-07, | |
| "loss": 0.9023, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 1.5675675675675675, | |
| "grad_norm": 1.3776357069750294, | |
| "learning_rate": 2.471574805049206e-07, | |
| "loss": 0.9233, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 1.57002457002457, | |
| "grad_norm": 1.6402577176804487, | |
| "learning_rate": 2.444921150589957e-07, | |
| "loss": 0.9319, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 1.5724815724815726, | |
| "grad_norm": 1.633491894306784, | |
| "learning_rate": 2.418391963705927e-07, | |
| "loss": 0.8818, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.574938574938575, | |
| "grad_norm": 1.5059260563661474, | |
| "learning_rate": 2.391987681457219e-07, | |
| "loss": 0.9138, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 1.5773955773955772, | |
| "grad_norm": 1.474101178051224, | |
| "learning_rate": 2.3657087388461815e-07, | |
| "loss": 0.8847, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 1.57985257985258, | |
| "grad_norm": 1.5606071866155247, | |
| "learning_rate": 2.339555568810221e-07, | |
| "loss": 0.9451, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 1.5823095823095823, | |
| "grad_norm": 1.5959144837669506, | |
| "learning_rate": 2.3135286022146782e-07, | |
| "loss": 0.9202, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 1.5847665847665846, | |
| "grad_norm": 1.4739665598722158, | |
| "learning_rate": 2.2876282678457438e-07, | |
| "loss": 0.8617, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 1.5872235872235874, | |
| "grad_norm": 1.324486221068965, | |
| "learning_rate": 2.2618549924033704e-07, | |
| "loss": 0.8702, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 1.5896805896805897, | |
| "grad_norm": 1.3762365282216713, | |
| "learning_rate": 2.236209200494258e-07, | |
| "loss": 0.9137, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 1.592137592137592, | |
| "grad_norm": 1.5081639071286088, | |
| "learning_rate": 2.210691314624854e-07, | |
| "loss": 0.8869, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 1.5945945945945947, | |
| "grad_norm": 1.5818213999124708, | |
| "learning_rate": 2.1853017551943987e-07, | |
| "loss": 0.9312, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 1.597051597051597, | |
| "grad_norm": 1.7757893297397498, | |
| "learning_rate": 2.160040940487987e-07, | |
| "loss": 0.8902, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.5995085995085994, | |
| "grad_norm": 1.3784061956740852, | |
| "learning_rate": 2.1349092866696906e-07, | |
| "loss": 0.8984, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 1.6019656019656021, | |
| "grad_norm": 1.4232738419586703, | |
| "learning_rate": 2.1099072077756863e-07, | |
| "loss": 0.8676, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 1.6044226044226044, | |
| "grad_norm": 3.799736709775996, | |
| "learning_rate": 2.0850351157074597e-07, | |
| "loss": 0.9107, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 1.6068796068796067, | |
| "grad_norm": 2.8003735792292055, | |
| "learning_rate": 2.060293420224991e-07, | |
| "loss": 0.8836, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 1.6093366093366095, | |
| "grad_norm": 1.320688739165864, | |
| "learning_rate": 2.0356825289400183e-07, | |
| "loss": 0.8912, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 1.6117936117936118, | |
| "grad_norm": 1.9634181459213618, | |
| "learning_rate": 2.011202847309329e-07, | |
| "loss": 0.9238, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 1.6142506142506141, | |
| "grad_norm": 1.5624806988822288, | |
| "learning_rate": 1.9868547786280665e-07, | |
| "loss": 0.8707, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 1.6167076167076169, | |
| "grad_norm": 1.4107049261598619, | |
| "learning_rate": 1.9626387240230891e-07, | |
| "loss": 0.868, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 1.6191646191646192, | |
| "grad_norm": 1.492505632847286, | |
| "learning_rate": 1.9385550824463725e-07, | |
| "loss": 0.8894, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 1.6216216216216215, | |
| "grad_norm": 1.4382690869482055, | |
| "learning_rate": 1.9146042506684223e-07, | |
| "loss": 0.8978, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.6240786240786242, | |
| "grad_norm": 1.627212055293953, | |
| "learning_rate": 1.890786623271743e-07, | |
| "loss": 0.871, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 1.6265356265356266, | |
| "grad_norm": 1.6843094898216577, | |
| "learning_rate": 1.8671025926443463e-07, | |
| "loss": 0.8952, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 1.6289926289926289, | |
| "grad_norm": 1.6654696363977666, | |
| "learning_rate": 1.8435525489732718e-07, | |
| "loss": 0.8591, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 1.6314496314496314, | |
| "grad_norm": 1.5188779880666268, | |
| "learning_rate": 1.8201368802381657e-07, | |
| "loss": 0.9323, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 1.633906633906634, | |
| "grad_norm": 1.3399754864662434, | |
| "learning_rate": 1.7968559722048903e-07, | |
| "loss": 0.8543, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 1.6363636363636362, | |
| "grad_norm": 1.3715522224138303, | |
| "learning_rate": 1.7737102084191703e-07, | |
| "loss": 0.918, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 1.6388206388206388, | |
| "grad_norm": 1.6259531621746015, | |
| "learning_rate": 1.7506999702002678e-07, | |
| "loss": 0.8694, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 1.6412776412776413, | |
| "grad_norm": 2.365813450212486, | |
| "learning_rate": 1.7278256366347032e-07, | |
| "loss": 0.8874, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 1.6437346437346436, | |
| "grad_norm": 1.7862394193970157, | |
| "learning_rate": 1.7050875845700064e-07, | |
| "loss": 0.8803, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 1.6461916461916462, | |
| "grad_norm": 1.557725205467593, | |
| "learning_rate": 1.682486188608523e-07, | |
| "loss": 0.8842, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.6486486486486487, | |
| "grad_norm": 1.6097866192991963, | |
| "learning_rate": 1.6600218211012218e-07, | |
| "loss": 0.9428, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 1.651105651105651, | |
| "grad_norm": 1.4644367524635458, | |
| "learning_rate": 1.6376948521415711e-07, | |
| "loss": 0.9066, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 1.6535626535626535, | |
| "grad_norm": 1.6645946570893102, | |
| "learning_rate": 1.6155056495594465e-07, | |
| "loss": 0.905, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 1.656019656019656, | |
| "grad_norm": 1.3397486338053894, | |
| "learning_rate": 1.5934545789150622e-07, | |
| "loss": 0.8785, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 1.6584766584766584, | |
| "grad_norm": 1.3685689789923863, | |
| "learning_rate": 1.5715420034929473e-07, | |
| "loss": 0.9179, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 1.660933660933661, | |
| "grad_norm": 1.5445547208764443, | |
| "learning_rate": 1.5497682842959747e-07, | |
| "loss": 0.9187, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 1.6633906633906634, | |
| "grad_norm": 1.7915250554930906, | |
| "learning_rate": 1.5281337800393968e-07, | |
| "loss": 0.8996, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 1.6658476658476657, | |
| "grad_norm": 1.425206424154818, | |
| "learning_rate": 1.5066388471449453e-07, | |
| "loss": 0.9244, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 1.6683046683046683, | |
| "grad_norm": 1.3711937617299463, | |
| "learning_rate": 1.4852838397349542e-07, | |
| "loss": 0.9239, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 1.6707616707616708, | |
| "grad_norm": 1.4206125252604587, | |
| "learning_rate": 1.4640691096265357e-07, | |
| "loss": 0.9127, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.6732186732186731, | |
| "grad_norm": 1.5824939121397916, | |
| "learning_rate": 1.4429950063257712e-07, | |
| "loss": 0.9243, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 1.6756756756756757, | |
| "grad_norm": 1.5523209337418313, | |
| "learning_rate": 1.4220618770219604e-07, | |
| "loss": 0.8743, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 1.6781326781326782, | |
| "grad_norm": 2.449668270011937, | |
| "learning_rate": 1.4012700665818988e-07, | |
| "loss": 0.9022, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 1.6805896805896805, | |
| "grad_norm": 2.797795087084986, | |
| "learning_rate": 1.3806199175442046e-07, | |
| "loss": 0.9127, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 1.683046683046683, | |
| "grad_norm": 1.594299579779607, | |
| "learning_rate": 1.3601117701136588e-07, | |
| "loss": 0.8958, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 1.6855036855036856, | |
| "grad_norm": 1.3744841196118298, | |
| "learning_rate": 1.3397459621556128e-07, | |
| "loss": 0.8847, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 1.6879606879606879, | |
| "grad_norm": 1.4967014666487075, | |
| "learning_rate": 1.319522829190427e-07, | |
| "loss": 0.9375, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 1.6904176904176904, | |
| "grad_norm": 1.257226409390929, | |
| "learning_rate": 1.2994427043879275e-07, | |
| "loss": 0.87, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 1.692874692874693, | |
| "grad_norm": 1.5489462493350459, | |
| "learning_rate": 1.279505918561923e-07, | |
| "loss": 0.8997, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 1.6953316953316953, | |
| "grad_norm": 1.478167106209186, | |
| "learning_rate": 1.2597128001647695e-07, | |
| "loss": 0.88, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.6977886977886978, | |
| "grad_norm": 1.530749437315356, | |
| "learning_rate": 1.2400636752819337e-07, | |
| "loss": 0.9155, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 1.7002457002457003, | |
| "grad_norm": 1.312749085771371, | |
| "learning_rate": 1.2205588676266386e-07, | |
| "loss": 0.9148, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 1.7027027027027026, | |
| "grad_norm": 1.2944478484019626, | |
| "learning_rate": 1.2011986985345313e-07, | |
| "loss": 0.9421, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 1.7051597051597052, | |
| "grad_norm": 1.9652193892846588, | |
| "learning_rate": 1.1819834869583767e-07, | |
| "loss": 0.8827, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 1.7076167076167077, | |
| "grad_norm": 1.4459180494396398, | |
| "learning_rate": 1.1629135494628095e-07, | |
| "loss": 0.9373, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 1.71007371007371, | |
| "grad_norm": 1.4835720268158477, | |
| "learning_rate": 1.1439892002191231e-07, | |
| "loss": 0.8919, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 1.7125307125307125, | |
| "grad_norm": 1.8195889649889634, | |
| "learning_rate": 1.1252107510000841e-07, | |
| "loss": 0.8234, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 1.714987714987715, | |
| "grad_norm": 1.3885033939390314, | |
| "learning_rate": 1.1065785111748117e-07, | |
| "loss": 0.8727, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 1.7174447174447174, | |
| "grad_norm": 1.8847414217621874, | |
| "learning_rate": 1.0880927877036606e-07, | |
| "loss": 0.9219, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 1.71990171990172, | |
| "grad_norm": 1.4570728152695678, | |
| "learning_rate": 1.0697538851331767e-07, | |
| "loss": 0.8921, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.7223587223587224, | |
| "grad_norm": 1.6297111465619418, | |
| "learning_rate": 1.0515621055910817e-07, | |
| "loss": 0.8523, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 1.7248157248157248, | |
| "grad_norm": 1.3305859076255666, | |
| "learning_rate": 1.0335177487812862e-07, | |
| "loss": 0.8873, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 1.7272727272727273, | |
| "grad_norm": 1.4781364488325777, | |
| "learning_rate": 1.0156211119789581e-07, | |
| "loss": 0.9512, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 1.7297297297297298, | |
| "grad_norm": 1.3860356549035335, | |
| "learning_rate": 9.978724900256264e-08, | |
| "loss": 0.816, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 1.7321867321867321, | |
| "grad_norm": 1.8635382664585312, | |
| "learning_rate": 9.802721753243193e-08, | |
| "loss": 0.8981, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 1.7346437346437347, | |
| "grad_norm": 1.7250194554756548, | |
| "learning_rate": 9.628204578347466e-08, | |
| "loss": 0.8479, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 1.7371007371007372, | |
| "grad_norm": 1.697724825854203, | |
| "learning_rate": 9.455176250685337e-08, | |
| "loss": 0.8986, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 1.7395577395577395, | |
| "grad_norm": 1.6181977387870101, | |
| "learning_rate": 9.283639620844686e-08, | |
| "loss": 0.8641, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 1.742014742014742, | |
| "grad_norm": 1.318237926606282, | |
| "learning_rate": 9.113597514838134e-08, | |
| "loss": 0.9309, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 1.7444717444717446, | |
| "grad_norm": 2.254843209174964, | |
| "learning_rate": 8.94505273405658e-08, | |
| "loss": 0.94, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.746928746928747, | |
| "grad_norm": 1.362243307286928, | |
| "learning_rate": 8.778008055222807e-08, | |
| "loss": 0.8757, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 1.7493857493857494, | |
| "grad_norm": 1.6314398152119172, | |
| "learning_rate": 8.612466230346016e-08, | |
| "loss": 0.8814, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 1.751842751842752, | |
| "grad_norm": 1.754218537191555, | |
| "learning_rate": 8.448429986676298e-08, | |
| "loss": 0.9166, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 1.7542997542997543, | |
| "grad_norm": 1.4911185442079653, | |
| "learning_rate": 8.285902026659741e-08, | |
| "loss": 0.9213, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 1.7567567567567568, | |
| "grad_norm": 1.4620109120308156, | |
| "learning_rate": 8.124885027894013e-08, | |
| "loss": 0.9285, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 1.7592137592137593, | |
| "grad_norm": 1.5197712931486254, | |
| "learning_rate": 7.965381643084068e-08, | |
| "loss": 0.8578, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 1.7616707616707616, | |
| "grad_norm": 1.3554180543190764, | |
| "learning_rate": 7.807394499998576e-08, | |
| "loss": 0.8945, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 1.7641277641277642, | |
| "grad_norm": 1.7074271145927862, | |
| "learning_rate": 7.650926201426634e-08, | |
| "loss": 0.9014, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 1.7665847665847667, | |
| "grad_norm": 1.7305821717380618, | |
| "learning_rate": 7.495979325134805e-08, | |
| "loss": 0.9427, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 1.769041769041769, | |
| "grad_norm": 1.6549796883627756, | |
| "learning_rate": 7.342556423824675e-08, | |
| "loss": 0.8929, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.7714987714987716, | |
| "grad_norm": 1.693588021942915, | |
| "learning_rate": 7.19066002509089e-08, | |
| "loss": 0.9149, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 1.773955773955774, | |
| "grad_norm": 1.3883677751773678, | |
| "learning_rate": 7.040292631379386e-08, | |
| "loss": 0.9379, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 1.7764127764127764, | |
| "grad_norm": 1.415233971009828, | |
| "learning_rate": 6.891456719946187e-08, | |
| "loss": 0.8786, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 1.7788697788697787, | |
| "grad_norm": 1.3887984611992066, | |
| "learning_rate": 6.744154742816721e-08, | |
| "loss": 0.8809, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 1.7813267813267815, | |
| "grad_norm": 1.4599586632095534, | |
| "learning_rate": 6.598389126745207e-08, | |
| "loss": 0.8906, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 1.7837837837837838, | |
| "grad_norm": 1.3618867212694985, | |
| "learning_rate": 6.454162273174879e-08, | |
| "loss": 0.8875, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 1.786240786240786, | |
| "grad_norm": 1.616703409825558, | |
| "learning_rate": 6.311476558198337e-08, | |
| "loss": 0.9186, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 1.7886977886977888, | |
| "grad_norm": 1.5643574404054077, | |
| "learning_rate": 6.170334332518323e-08, | |
| "loss": 0.8191, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 1.7911547911547911, | |
| "grad_norm": 1.6547417394218757, | |
| "learning_rate": 6.030737921409168e-08, | |
| "loss": 0.9015, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 1.7936117936117935, | |
| "grad_norm": 1.3258614962104087, | |
| "learning_rate": 5.892689624678326e-08, | |
| "loss": 0.9003, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.7960687960687962, | |
| "grad_norm": 1.4886792779962632, | |
| "learning_rate": 5.756191716628556e-08, | |
| "loss": 0.9197, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 1.7985257985257985, | |
| "grad_norm": 1.4228121623738594, | |
| "learning_rate": 5.621246446020489e-08, | |
| "loss": 0.9151, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 1.8009828009828008, | |
| "grad_norm": 1.4694783098206843, | |
| "learning_rate": 5.487856036035488e-08, | |
| "loss": 0.9478, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 1.8034398034398036, | |
| "grad_norm": 1.6835863076974373, | |
| "learning_rate": 5.356022684239059e-08, | |
| "loss": 0.8852, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 1.805896805896806, | |
| "grad_norm": 1.468678481999939, | |
| "learning_rate": 5.22574856254474e-08, | |
| "loss": 0.9227, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 1.8083538083538082, | |
| "grad_norm": 1.664873807912385, | |
| "learning_rate": 5.0970358171781525e-08, | |
| "loss": 0.9709, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 1.810810810810811, | |
| "grad_norm": 1.4869146515566096, | |
| "learning_rate": 4.969886568641757e-08, | |
| "loss": 0.8627, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 1.8132678132678133, | |
| "grad_norm": 1.5302504751076207, | |
| "learning_rate": 4.844302911679909e-08, | |
| "loss": 0.8694, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 1.8157248157248156, | |
| "grad_norm": 1.3703447874730004, | |
| "learning_rate": 4.720286915244309e-08, | |
| "loss": 0.8388, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 1.8181818181818183, | |
| "grad_norm": 1.8909215620115458, | |
| "learning_rate": 4.597840622459936e-08, | |
| "loss": 0.8697, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.8206388206388207, | |
| "grad_norm": 1.8396393900066417, | |
| "learning_rate": 4.476966050591413e-08, | |
| "loss": 0.9238, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 1.823095823095823, | |
| "grad_norm": 1.4266543724530107, | |
| "learning_rate": 4.3576651910097076e-08, | |
| "loss": 0.8901, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 1.8255528255528255, | |
| "grad_norm": 1.3873776301248721, | |
| "learning_rate": 4.239940009159415e-08, | |
| "loss": 0.904, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 1.828009828009828, | |
| "grad_norm": 1.554746782909149, | |
| "learning_rate": 4.1237924445262864e-08, | |
| "loss": 0.8605, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 1.8304668304668303, | |
| "grad_norm": 1.5101859068421868, | |
| "learning_rate": 4.0092244106053386e-08, | |
| "loss": 0.8981, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 1.8329238329238329, | |
| "grad_norm": 1.559988660277713, | |
| "learning_rate": 3.896237794869339e-08, | |
| "loss": 0.8617, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 1.8353808353808354, | |
| "grad_norm": 1.7088654168690152, | |
| "learning_rate": 3.7848344587376294e-08, | |
| "loss": 0.8696, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 1.8378378378378377, | |
| "grad_norm": 1.6151695281653022, | |
| "learning_rate": 3.675016237545536e-08, | |
| "loss": 0.9057, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 1.8402948402948403, | |
| "grad_norm": 1.6260854298806076, | |
| "learning_rate": 3.566784940514145e-08, | |
| "loss": 0.8715, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 1.8427518427518428, | |
| "grad_norm": 1.5483874580947579, | |
| "learning_rate": 3.460142350720396e-08, | |
| "loss": 0.8928, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.845208845208845, | |
| "grad_norm": 1.3856178647115844, | |
| "learning_rate": 3.3550902250677914e-08, | |
| "loss": 0.8898, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 1.8476658476658476, | |
| "grad_norm": 1.6217388770422025, | |
| "learning_rate": 3.251630294257479e-08, | |
| "loss": 0.8887, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 1.8501228501228502, | |
| "grad_norm": 1.7461700477068736, | |
| "learning_rate": 3.1497642627596245e-08, | |
| "loss": 0.9144, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 1.8525798525798525, | |
| "grad_norm": 1.3204466111990083, | |
| "learning_rate": 3.0494938087854306e-08, | |
| "loss": 0.9046, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 1.855036855036855, | |
| "grad_norm": 1.5311966806895796, | |
| "learning_rate": 2.9508205842594724e-08, | |
| "loss": 0.9614, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 1.8574938574938575, | |
| "grad_norm": 1.5618296845089266, | |
| "learning_rate": 2.8537462147924184e-08, | |
| "loss": 0.9499, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 1.8599508599508598, | |
| "grad_norm": 1.506551170833026, | |
| "learning_rate": 2.7582722996543405e-08, | |
| "loss": 0.8963, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 1.8624078624078624, | |
| "grad_norm": 1.6976856223987622, | |
| "learning_rate": 2.6644004117483355e-08, | |
| "loss": 0.938, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 1.864864864864865, | |
| "grad_norm": 1.7152873001996989, | |
| "learning_rate": 2.572132097584523e-08, | |
| "loss": 0.9302, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 1.8673218673218672, | |
| "grad_norm": 1.4747780984909165, | |
| "learning_rate": 2.481468877254722e-08, | |
| "loss": 0.8927, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.8697788697788698, | |
| "grad_norm": 1.6455964184060006, | |
| "learning_rate": 2.3924122444072936e-08, | |
| "loss": 0.92, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 1.8722358722358723, | |
| "grad_norm": 1.4135913992521028, | |
| "learning_rate": 2.3049636662225702e-08, | |
| "loss": 0.9095, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 1.8746928746928746, | |
| "grad_norm": 1.3283327210815499, | |
| "learning_rate": 2.2191245833886985e-08, | |
| "loss": 0.923, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 1.8771498771498771, | |
| "grad_norm": 2.1206540317643316, | |
| "learning_rate": 2.134896410077891e-08, | |
| "loss": 0.8759, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 1.8796068796068797, | |
| "grad_norm": 1.4686301829717907, | |
| "learning_rate": 2.052280533923101e-08, | |
| "loss": 0.91, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 1.882063882063882, | |
| "grad_norm": 2.2833412802518835, | |
| "learning_rate": 1.9712783159952063e-08, | |
| "loss": 0.8681, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 1.8845208845208845, | |
| "grad_norm": 1.5664189962805568, | |
| "learning_rate": 1.8918910907805728e-08, | |
| "loss": 0.8958, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 1.886977886977887, | |
| "grad_norm": 1.3597898472460088, | |
| "learning_rate": 1.81412016615905e-08, | |
| "loss": 0.8846, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 1.8894348894348894, | |
| "grad_norm": 1.794391013663922, | |
| "learning_rate": 1.737966823382442e-08, | |
| "loss": 0.8704, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 1.8918918918918919, | |
| "grad_norm": 1.3876663428773923, | |
| "learning_rate": 1.6634323170533926e-08, | |
| "loss": 0.8826, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.8943488943488944, | |
| "grad_norm": 1.964992745747424, | |
| "learning_rate": 1.5905178751047134e-08, | |
| "loss": 0.9233, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 1.8968058968058967, | |
| "grad_norm": 1.5474022069405389, | |
| "learning_rate": 1.519224698779198e-08, | |
| "loss": 0.9574, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 1.8992628992628993, | |
| "grad_norm": 1.367308795693324, | |
| "learning_rate": 1.4495539626097287e-08, | |
| "loss": 0.8783, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 1.9017199017199018, | |
| "grad_norm": 1.4446894160381547, | |
| "learning_rate": 1.3815068144000352e-08, | |
| "loss": 0.9278, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 1.904176904176904, | |
| "grad_norm": 1.4909159417858395, | |
| "learning_rate": 1.3150843752057438e-08, | |
| "loss": 0.8681, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 1.9066339066339066, | |
| "grad_norm": 1.358589936876708, | |
| "learning_rate": 1.2502877393158585e-08, | |
| "loss": 0.8662, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 1.9090909090909092, | |
| "grad_norm": 1.4371079365419295, | |
| "learning_rate": 1.1871179742348413e-08, | |
| "loss": 0.8844, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 1.9115479115479115, | |
| "grad_norm": 1.5058059815383584, | |
| "learning_rate": 1.1255761206649061e-08, | |
| "loss": 0.8885, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 1.914004914004914, | |
| "grad_norm": 1.4300739849238244, | |
| "learning_rate": 1.0656631924889748e-08, | |
| "loss": 0.8997, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 1.9164619164619165, | |
| "grad_norm": 1.5658510974559385, | |
| "learning_rate": 1.0073801767539247e-08, | |
| "loss": 0.8983, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.9189189189189189, | |
| "grad_norm": 1.4965427543973944, | |
| "learning_rate": 9.507280336542911e-09, | |
| "loss": 0.8676, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 1.9213759213759214, | |
| "grad_norm": 1.36581135793563, | |
| "learning_rate": 8.957076965165234e-09, | |
| "loss": 0.8792, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 1.923832923832924, | |
| "grad_norm": 1.5156402618603095, | |
| "learning_rate": 8.423200717835976e-09, | |
| "loss": 0.8626, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 1.9262899262899262, | |
| "grad_norm": 1.3074563333752685, | |
| "learning_rate": 7.905660390000069e-09, | |
| "loss": 0.9315, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 1.9287469287469288, | |
| "grad_norm": 1.7609221141327198, | |
| "learning_rate": 7.404464507973607e-09, | |
| "loss": 0.8862, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 1.9312039312039313, | |
| "grad_norm": 1.5426817574716647, | |
| "learning_rate": 6.919621328802972e-09, | |
| "loss": 0.9543, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 1.9336609336609336, | |
| "grad_norm": 2.3064282757005663, | |
| "learning_rate": 6.4511388401286e-09, | |
| "loss": 0.9583, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 1.9361179361179361, | |
| "grad_norm": 1.67978354162131, | |
| "learning_rate": 5.999024760054094e-09, | |
| "loss": 0.9364, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 1.9385749385749387, | |
| "grad_norm": 2.269285024101559, | |
| "learning_rate": 5.563286537018319e-09, | |
| "loss": 0.858, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 1.941031941031941, | |
| "grad_norm": 1.7349413650625205, | |
| "learning_rate": 5.143931349672837e-09, | |
| "loss": 0.9019, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.9434889434889435, | |
| "grad_norm": 1.652477600280537, | |
| "learning_rate": 4.7409661067642215e-09, | |
| "loss": 0.9325, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 1.945945945945946, | |
| "grad_norm": 1.354819301610776, | |
| "learning_rate": 4.354397447019375e-09, | |
| "loss": 0.9047, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 1.9484029484029484, | |
| "grad_norm": 1.615707852796759, | |
| "learning_rate": 3.9842317390369474e-09, | |
| "loss": 0.9033, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 1.950859950859951, | |
| "grad_norm": 1.4784764684056093, | |
| "learning_rate": 3.630475081181861e-09, | |
| "loss": 0.8419, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 1.9533169533169534, | |
| "grad_norm": 1.316270670020737, | |
| "learning_rate": 3.2931333014850626e-09, | |
| "loss": 0.8478, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 1.9557739557739557, | |
| "grad_norm": 1.4172812525176979, | |
| "learning_rate": 2.972211957547377e-09, | |
| "loss": 0.8208, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 1.9582309582309583, | |
| "grad_norm": 1.6364175608068232, | |
| "learning_rate": 2.667716336448356e-09, | |
| "loss": 0.9062, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 1.9606879606879608, | |
| "grad_norm": 1.6122614160580477, | |
| "learning_rate": 2.379651454658571e-09, | |
| "loss": 0.8939, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 1.9631449631449631, | |
| "grad_norm": 1.5173118240679087, | |
| "learning_rate": 2.108022057957348e-09, | |
| "loss": 0.8899, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 1.9656019656019657, | |
| "grad_norm": 1.4032962894445904, | |
| "learning_rate": 1.8528326213548273e-09, | |
| "loss": 0.9269, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.9680589680589682, | |
| "grad_norm": 1.491232169954221, | |
| "learning_rate": 1.6140873490172457e-09, | |
| "loss": 0.887, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 1.9705159705159705, | |
| "grad_norm": 1.452706323624227, | |
| "learning_rate": 1.3917901741989923e-09, | |
| "loss": 0.9033, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 1.972972972972973, | |
| "grad_norm": 2.9039670137491176, | |
| "learning_rate": 1.1859447591769934e-09, | |
| "loss": 0.8912, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 1.9754299754299756, | |
| "grad_norm": 1.426897627539779, | |
| "learning_rate": 9.96554495190205e-10, | |
| "loss": 0.8626, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 1.9778869778869779, | |
| "grad_norm": 1.333240805775527, | |
| "learning_rate": 8.236225023844357e-10, | |
| "loss": 0.8773, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 1.9803439803439802, | |
| "grad_norm": 1.3907079742810688, | |
| "learning_rate": 6.671516297606094e-10, | |
| "loss": 0.9252, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 1.982800982800983, | |
| "grad_norm": 1.698786893046404, | |
| "learning_rate": 5.271444551276927e-10, | |
| "loss": 0.9504, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 1.9852579852579852, | |
| "grad_norm": 1.8549325727891992, | |
| "learning_rate": 4.0360328506017226e-10, | |
| "loss": 0.8953, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 1.9877149877149876, | |
| "grad_norm": 1.4404540052198962, | |
| "learning_rate": 2.965301548606414e-10, | |
| "loss": 0.8946, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 1.9901719901719903, | |
| "grad_norm": 1.2420916620768656, | |
| "learning_rate": 2.0592682852549338e-10, | |
| "loss": 0.8543, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.9926289926289926, | |
| "grad_norm": 1.5902830913290869, | |
| "learning_rate": 1.317947987163892e-10, | |
| "loss": 0.9287, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 1.995085995085995, | |
| "grad_norm": 1.7356553731894724, | |
| "learning_rate": 7.41352867354994e-11, | |
| "loss": 0.9318, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 1.9975429975429977, | |
| "grad_norm": 1.3632994827873626, | |
| "learning_rate": 3.294924250540898e-11, | |
| "loss": 0.9022, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 1.5450655192747615, | |
| "learning_rate": 8.237344553241321e-12, | |
| "loss": 0.9407, | |
| "step": 814 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 814, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 407, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 8.602630032143155e+17, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |