| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 1901, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0005260389268805891, |
| "grad_norm": 4.7434234619140625, |
| "learning_rate": 5.0000000000000004e-08, |
| "loss": 1.7896, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0010520778537611783, |
| "grad_norm": 4.893940448760986, |
| "learning_rate": 1.0000000000000001e-07, |
| "loss": 1.8423, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0015781167806417674, |
| "grad_norm": 5.008203029632568, |
| "learning_rate": 1.5000000000000002e-07, |
| "loss": 1.7775, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0021041557075223566, |
| "grad_norm": 4.682094097137451, |
| "learning_rate": 2.0000000000000002e-07, |
| "loss": 1.7132, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.0026301946344029457, |
| "grad_norm": 5.076476097106934, |
| "learning_rate": 2.5000000000000004e-07, |
| "loss": 1.7946, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.003156233561283535, |
| "grad_norm": 5.164911270141602, |
| "learning_rate": 3.0000000000000004e-07, |
| "loss": 1.7562, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.003682272488164124, |
| "grad_norm": 5.532482624053955, |
| "learning_rate": 3.5000000000000004e-07, |
| "loss": 1.9173, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.004208311415044713, |
| "grad_norm": 4.994466304779053, |
| "learning_rate": 4.0000000000000003e-07, |
| "loss": 1.8048, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.004734350341925302, |
| "grad_norm": 4.728099822998047, |
| "learning_rate": 4.5000000000000003e-07, |
| "loss": 1.8313, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.0052603892688058915, |
| "grad_norm": 4.757445335388184, |
| "learning_rate": 5.000000000000001e-07, |
| "loss": 1.7745, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.005786428195686481, |
| "grad_norm": 4.926065444946289, |
| "learning_rate": 5.5e-07, |
| "loss": 1.8448, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.00631246712256707, |
| "grad_norm": 4.987133979797363, |
| "learning_rate": 6.000000000000001e-07, |
| "loss": 1.7755, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.006838506049447659, |
| "grad_norm": 4.783141613006592, |
| "learning_rate": 6.5e-07, |
| "loss": 1.7815, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.007364544976328248, |
| "grad_norm": 4.668217182159424, |
| "learning_rate": 7.000000000000001e-07, |
| "loss": 1.754, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.007890583903208837, |
| "grad_norm": 4.673665523529053, |
| "learning_rate": 7.5e-07, |
| "loss": 1.7608, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.008416622830089426, |
| "grad_norm": 4.452486991882324, |
| "learning_rate": 8.000000000000001e-07, |
| "loss": 1.7222, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.008942661756970016, |
| "grad_norm": 4.257665157318115, |
| "learning_rate": 8.500000000000001e-07, |
| "loss": 1.7556, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.009468700683850605, |
| "grad_norm": 4.1270432472229, |
| "learning_rate": 9.000000000000001e-07, |
| "loss": 1.7121, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.009994739610731194, |
| "grad_norm": 4.321215629577637, |
| "learning_rate": 9.500000000000001e-07, |
| "loss": 1.7584, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.010520778537611783, |
| "grad_norm": 3.8703970909118652, |
| "learning_rate": 1.0000000000000002e-06, |
| "loss": 1.6611, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.011046817464492372, |
| "grad_norm": 4.07947301864624, |
| "learning_rate": 1.0500000000000001e-06, |
| "loss": 1.7914, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.011572856391372961, |
| "grad_norm": 3.9068686962127686, |
| "learning_rate": 1.1e-06, |
| "loss": 1.7848, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.01209889531825355, |
| "grad_norm": 3.7697386741638184, |
| "learning_rate": 1.1500000000000002e-06, |
| "loss": 1.6694, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.01262493424513414, |
| "grad_norm": 3.795276641845703, |
| "learning_rate": 1.2000000000000002e-06, |
| "loss": 1.759, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.013150973172014729, |
| "grad_norm": 3.331472396850586, |
| "learning_rate": 1.25e-06, |
| "loss": 1.7053, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.013677012098895318, |
| "grad_norm": 3.381592035293579, |
| "learning_rate": 1.3e-06, |
| "loss": 1.683, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.014203051025775907, |
| "grad_norm": 3.2494184970855713, |
| "learning_rate": 1.3500000000000002e-06, |
| "loss": 1.5756, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.014729089952656496, |
| "grad_norm": 3.124213695526123, |
| "learning_rate": 1.4000000000000001e-06, |
| "loss": 1.7102, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.015255128879537085, |
| "grad_norm": 2.9148762226104736, |
| "learning_rate": 1.45e-06, |
| "loss": 1.6007, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.015781167806417674, |
| "grad_norm": 2.886734962463379, |
| "learning_rate": 1.5e-06, |
| "loss": 1.7086, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.016307206733298264, |
| "grad_norm": 2.6898605823516846, |
| "learning_rate": 1.5500000000000002e-06, |
| "loss": 1.5655, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.016833245660178853, |
| "grad_norm": 2.6458981037139893, |
| "learning_rate": 1.6000000000000001e-06, |
| "loss": 1.4881, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.017359284587059442, |
| "grad_norm": 2.481387138366699, |
| "learning_rate": 1.6500000000000003e-06, |
| "loss": 1.5608, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.01788532351394003, |
| "grad_norm": 2.743023633956909, |
| "learning_rate": 1.7000000000000002e-06, |
| "loss": 1.5705, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.01841136244082062, |
| "grad_norm": 2.7273406982421875, |
| "learning_rate": 1.75e-06, |
| "loss": 1.5819, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.01893740136770121, |
| "grad_norm": 2.7253308296203613, |
| "learning_rate": 1.8000000000000001e-06, |
| "loss": 1.5201, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.0194634402945818, |
| "grad_norm": 2.8794732093811035, |
| "learning_rate": 1.85e-06, |
| "loss": 1.4743, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.019989479221462388, |
| "grad_norm": 2.767172336578369, |
| "learning_rate": 1.9000000000000002e-06, |
| "loss": 1.5366, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.020515518148342977, |
| "grad_norm": 2.84169864654541, |
| "learning_rate": 1.9500000000000004e-06, |
| "loss": 1.5635, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.021041557075223566, |
| "grad_norm": 2.6982147693634033, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 1.49, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.021567596002104155, |
| "grad_norm": 2.597731590270996, |
| "learning_rate": 2.05e-06, |
| "loss": 1.5189, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.022093634928984744, |
| "grad_norm": 2.4286556243896484, |
| "learning_rate": 2.1000000000000002e-06, |
| "loss": 1.4439, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.022619673855865333, |
| "grad_norm": 2.6267499923706055, |
| "learning_rate": 2.15e-06, |
| "loss": 1.3522, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.023145712782745922, |
| "grad_norm": 2.2576816082000732, |
| "learning_rate": 2.2e-06, |
| "loss": 1.4713, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.02367175170962651, |
| "grad_norm": 2.406381368637085, |
| "learning_rate": 2.25e-06, |
| "loss": 1.47, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.0241977906365071, |
| "grad_norm": 2.2341415882110596, |
| "learning_rate": 2.3000000000000004e-06, |
| "loss": 1.4041, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.02472382956338769, |
| "grad_norm": 2.5055644512176514, |
| "learning_rate": 2.35e-06, |
| "loss": 1.4321, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.02524986849026828, |
| "grad_norm": 2.2131927013397217, |
| "learning_rate": 2.4000000000000003e-06, |
| "loss": 1.3631, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.025775907417148868, |
| "grad_norm": 2.3399457931518555, |
| "learning_rate": 2.4500000000000003e-06, |
| "loss": 1.4055, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.026301946344029457, |
| "grad_norm": 2.2194554805755615, |
| "learning_rate": 2.5e-06, |
| "loss": 1.3722, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.026827985270910047, |
| "grad_norm": 2.196530342102051, |
| "learning_rate": 2.55e-06, |
| "loss": 1.4126, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.027354024197790636, |
| "grad_norm": 2.401376485824585, |
| "learning_rate": 2.6e-06, |
| "loss": 1.4174, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.027880063124671225, |
| "grad_norm": 2.2509777545928955, |
| "learning_rate": 2.6500000000000005e-06, |
| "loss": 1.3725, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.028406102051551814, |
| "grad_norm": 2.2538340091705322, |
| "learning_rate": 2.7000000000000004e-06, |
| "loss": 1.4274, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.028932140978432403, |
| "grad_norm": 2.218494176864624, |
| "learning_rate": 2.7500000000000004e-06, |
| "loss": 1.4518, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.029458179905312992, |
| "grad_norm": 2.06544828414917, |
| "learning_rate": 2.8000000000000003e-06, |
| "loss": 1.3547, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.02998421883219358, |
| "grad_norm": 2.014075994491577, |
| "learning_rate": 2.85e-06, |
| "loss": 1.2274, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.03051025775907417, |
| "grad_norm": 2.187418222427368, |
| "learning_rate": 2.9e-06, |
| "loss": 1.3663, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.03103629668595476, |
| "grad_norm": 1.993913173675537, |
| "learning_rate": 2.95e-06, |
| "loss": 1.3357, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.03156233561283535, |
| "grad_norm": 2.1067426204681396, |
| "learning_rate": 3e-06, |
| "loss": 1.3627, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.03208837453971594, |
| "grad_norm": 2.0144565105438232, |
| "learning_rate": 3.05e-06, |
| "loss": 1.394, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.03261441346659653, |
| "grad_norm": 2.2240288257598877, |
| "learning_rate": 3.1000000000000004e-06, |
| "loss": 1.3657, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.03314045239347712, |
| "grad_norm": 2.0080718994140625, |
| "learning_rate": 3.1500000000000003e-06, |
| "loss": 1.2954, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.033666491320357705, |
| "grad_norm": 2.1592211723327637, |
| "learning_rate": 3.2000000000000003e-06, |
| "loss": 1.363, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.0341925302472383, |
| "grad_norm": 2.1390435695648193, |
| "learning_rate": 3.2500000000000002e-06, |
| "loss": 1.3329, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.034718569174118884, |
| "grad_norm": 2.309795379638672, |
| "learning_rate": 3.3000000000000006e-06, |
| "loss": 1.3378, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.035244608100999476, |
| "grad_norm": 2.0283970832824707, |
| "learning_rate": 3.3500000000000005e-06, |
| "loss": 1.2707, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.03577064702788006, |
| "grad_norm": 2.3350703716278076, |
| "learning_rate": 3.4000000000000005e-06, |
| "loss": 1.3149, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.036296685954760655, |
| "grad_norm": 2.1374268531799316, |
| "learning_rate": 3.45e-06, |
| "loss": 1.3181, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.03682272488164124, |
| "grad_norm": 2.1340744495391846, |
| "learning_rate": 3.5e-06, |
| "loss": 1.2968, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.03734876380852183, |
| "grad_norm": 2.212939500808716, |
| "learning_rate": 3.5500000000000003e-06, |
| "loss": 1.3285, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.03787480273540242, |
| "grad_norm": 2.0891077518463135, |
| "learning_rate": 3.6000000000000003e-06, |
| "loss": 1.3142, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.03840084166228301, |
| "grad_norm": 2.0146496295928955, |
| "learning_rate": 3.65e-06, |
| "loss": 1.2932, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.0389268805891636, |
| "grad_norm": 2.2315266132354736, |
| "learning_rate": 3.7e-06, |
| "loss": 1.3515, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.03945291951604419, |
| "grad_norm": 2.0311717987060547, |
| "learning_rate": 3.7500000000000005e-06, |
| "loss": 1.2601, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.039978958442924775, |
| "grad_norm": 1.9522899389266968, |
| "learning_rate": 3.8000000000000005e-06, |
| "loss": 1.3521, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.04050499736980537, |
| "grad_norm": 2.0501742362976074, |
| "learning_rate": 3.85e-06, |
| "loss": 1.3243, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.041031036296685953, |
| "grad_norm": 2.136033535003662, |
| "learning_rate": 3.900000000000001e-06, |
| "loss": 1.3373, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.041557075223566546, |
| "grad_norm": 2.328866958618164, |
| "learning_rate": 3.95e-06, |
| "loss": 1.2864, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.04208311415044713, |
| "grad_norm": 2.0889344215393066, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 1.2692, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.042609153077327724, |
| "grad_norm": 2.088667631149292, |
| "learning_rate": 4.05e-06, |
| "loss": 1.2232, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.04313519200420831, |
| "grad_norm": 2.0293898582458496, |
| "learning_rate": 4.1e-06, |
| "loss": 1.2505, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.0436612309310889, |
| "grad_norm": 2.240025281906128, |
| "learning_rate": 4.15e-06, |
| "loss": 1.3107, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.04418726985796949, |
| "grad_norm": 2.123445987701416, |
| "learning_rate": 4.2000000000000004e-06, |
| "loss": 1.1674, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.04471330878485008, |
| "grad_norm": 2.1865620613098145, |
| "learning_rate": 4.25e-06, |
| "loss": 1.3257, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.04523934771173067, |
| "grad_norm": 2.1336405277252197, |
| "learning_rate": 4.3e-06, |
| "loss": 1.2968, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.04576538663861126, |
| "grad_norm": 2.117763042449951, |
| "learning_rate": 4.350000000000001e-06, |
| "loss": 1.2294, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.046291425565491845, |
| "grad_norm": 1.9969348907470703, |
| "learning_rate": 4.4e-06, |
| "loss": 1.2621, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.04681746449237244, |
| "grad_norm": 2.24861741065979, |
| "learning_rate": 4.450000000000001e-06, |
| "loss": 1.2909, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.04734350341925302, |
| "grad_norm": 2.08335542678833, |
| "learning_rate": 4.5e-06, |
| "loss": 1.2691, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.047869542346133616, |
| "grad_norm": 2.1306045055389404, |
| "learning_rate": 4.5500000000000005e-06, |
| "loss": 1.3248, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.0483955812730142, |
| "grad_norm": 2.2251298427581787, |
| "learning_rate": 4.600000000000001e-06, |
| "loss": 1.2391, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.048921620199894794, |
| "grad_norm": 2.1604959964752197, |
| "learning_rate": 4.65e-06, |
| "loss": 1.2169, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.04944765912677538, |
| "grad_norm": 2.0155038833618164, |
| "learning_rate": 4.7e-06, |
| "loss": 1.2533, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.04997369805365597, |
| "grad_norm": 1.9579726457595825, |
| "learning_rate": 4.75e-06, |
| "loss": 1.2228, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.05049973698053656, |
| "grad_norm": 2.129992961883545, |
| "learning_rate": 4.800000000000001e-06, |
| "loss": 1.2573, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.05102577590741715, |
| "grad_norm": 2.0832459926605225, |
| "learning_rate": 4.85e-06, |
| "loss": 1.241, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.051551814834297736, |
| "grad_norm": 2.278550148010254, |
| "learning_rate": 4.9000000000000005e-06, |
| "loss": 1.2565, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.05207785376117833, |
| "grad_norm": 2.0997259616851807, |
| "learning_rate": 4.95e-06, |
| "loss": 1.2445, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.052603892688058915, |
| "grad_norm": 2.127976417541504, |
| "learning_rate": 5e-06, |
| "loss": 1.2605, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.05312993161493951, |
| "grad_norm": 2.1200127601623535, |
| "learning_rate": 4.9999999034856715e-06, |
| "loss": 1.3057, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.05365597054182009, |
| "grad_norm": 2.456881046295166, |
| "learning_rate": 4.999999613942694e-06, |
| "loss": 1.2741, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.054182009468700686, |
| "grad_norm": 2.189507484436035, |
| "learning_rate": 4.9999991313710884e-06, |
| "loss": 1.2399, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.05470804839558127, |
| "grad_norm": 2.258619785308838, |
| "learning_rate": 4.9999984557708936e-06, |
| "loss": 1.2161, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.055234087322461864, |
| "grad_norm": 1.983225703239441, |
| "learning_rate": 4.999997587142161e-06, |
| "loss": 1.2027, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.05576012624934245, |
| "grad_norm": 2.1400973796844482, |
| "learning_rate": 4.999996525484957e-06, |
| "loss": 1.2685, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.05628616517622304, |
| "grad_norm": 1.9494950771331787, |
| "learning_rate": 4.999995270799365e-06, |
| "loss": 1.2604, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.05681220410310363, |
| "grad_norm": 2.1203386783599854, |
| "learning_rate": 4.9999938230854814e-06, |
| "loss": 1.2345, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.05733824302998422, |
| "grad_norm": 2.131884813308716, |
| "learning_rate": 4.999992182343417e-06, |
| "loss": 1.2097, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.057864281956864806, |
| "grad_norm": 2.136289119720459, |
| "learning_rate": 4.9999903485732996e-06, |
| "loss": 1.2617, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.0583903208837454, |
| "grad_norm": 2.025071144104004, |
| "learning_rate": 4.9999883217752705e-06, |
| "loss": 1.2004, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.058916359810625984, |
| "grad_norm": 2.513960838317871, |
| "learning_rate": 4.999986101949486e-06, |
| "loss": 1.2399, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.05944239873750658, |
| "grad_norm": 2.2483277320861816, |
| "learning_rate": 4.999983689096117e-06, |
| "loss": 1.2265, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.05996843766438716, |
| "grad_norm": 2.0863187313079834, |
| "learning_rate": 4.999981083215352e-06, |
| "loss": 1.1969, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.060494476591267755, |
| "grad_norm": 2.1240596771240234, |
| "learning_rate": 4.99997828430739e-06, |
| "loss": 1.275, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.06102051551814834, |
| "grad_norm": 2.3810060024261475, |
| "learning_rate": 4.9999752923724465e-06, |
| "loss": 1.3054, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.061546554445028934, |
| "grad_norm": 2.1266205310821533, |
| "learning_rate": 4.999972107410754e-06, |
| "loss": 1.1933, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.06207259337190952, |
| "grad_norm": 2.039619207382202, |
| "learning_rate": 4.999968729422559e-06, |
| "loss": 1.1886, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.0625986322987901, |
| "grad_norm": 2.024503707885742, |
| "learning_rate": 4.999965158408122e-06, |
| "loss": 1.2008, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.0631246712256707, |
| "grad_norm": 2.058926582336426, |
| "learning_rate": 4.999961394367717e-06, |
| "loss": 1.1772, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.06365071015255129, |
| "grad_norm": 1.989399790763855, |
| "learning_rate": 4.999957437301637e-06, |
| "loss": 1.1869, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.06417674907943188, |
| "grad_norm": 2.0462567806243896, |
| "learning_rate": 4.999953287210185e-06, |
| "loss": 1.1944, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.06470278800631246, |
| "grad_norm": 2.258549213409424, |
| "learning_rate": 4.999948944093683e-06, |
| "loss": 1.2304, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.06522882693319305, |
| "grad_norm": 2.115344285964966, |
| "learning_rate": 4.999944407952467e-06, |
| "loss": 1.1901, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.06575486586007365, |
| "grad_norm": 2.082406997680664, |
| "learning_rate": 4.999939678786886e-06, |
| "loss": 1.2481, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.06628090478695424, |
| "grad_norm": 2.5095906257629395, |
| "learning_rate": 4.999934756597305e-06, |
| "loss": 1.2526, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.06680694371383482, |
| "grad_norm": 1.989524483680725, |
| "learning_rate": 4.999929641384105e-06, |
| "loss": 1.2298, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.06733298264071541, |
| "grad_norm": 2.3429722785949707, |
| "learning_rate": 4.999924333147681e-06, |
| "loss": 1.2511, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.067859021567596, |
| "grad_norm": 2.064497232437134, |
| "learning_rate": 4.999918831888441e-06, |
| "loss": 1.2041, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.0683850604944766, |
| "grad_norm": 2.099992513656616, |
| "learning_rate": 4.999913137606813e-06, |
| "loss": 1.2256, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.06891109942135717, |
| "grad_norm": 2.188778877258301, |
| "learning_rate": 4.999907250303234e-06, |
| "loss": 1.2009, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.06943713834823777, |
| "grad_norm": 2.154895067214966, |
| "learning_rate": 4.999901169978158e-06, |
| "loss": 1.273, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.06996317727511836, |
| "grad_norm": 2.457084894180298, |
| "learning_rate": 4.999894896632058e-06, |
| "loss": 1.2003, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.07048921620199895, |
| "grad_norm": 2.0455472469329834, |
| "learning_rate": 4.999888430265415e-06, |
| "loss": 1.1909, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.07101525512887953, |
| "grad_norm": 2.3690097332000732, |
| "learning_rate": 4.99988177087873e-06, |
| "loss": 1.2414, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.07154129405576012, |
| "grad_norm": 2.0194432735443115, |
| "learning_rate": 4.999874918472516e-06, |
| "loss": 1.2072, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.07206733298264072, |
| "grad_norm": 2.0639989376068115, |
| "learning_rate": 4.999867873047303e-06, |
| "loss": 1.1853, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.07259337190952131, |
| "grad_norm": 2.1263129711151123, |
| "learning_rate": 4.999860634603635e-06, |
| "loss": 1.1915, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.07311941083640189, |
| "grad_norm": 1.9768770933151245, |
| "learning_rate": 4.99985320314207e-06, |
| "loss": 1.1623, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.07364544976328248, |
| "grad_norm": 2.4466986656188965, |
| "learning_rate": 4.9998455786631835e-06, |
| "loss": 1.2549, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.07417148869016307, |
| "grad_norm": 2.482954263687134, |
| "learning_rate": 4.999837761167563e-06, |
| "loss": 1.1503, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.07469752761704367, |
| "grad_norm": 2.1949164867401123, |
| "learning_rate": 4.9998297506558116e-06, |
| "loss": 1.2515, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.07522356654392424, |
| "grad_norm": 2.3435401916503906, |
| "learning_rate": 4.9998215471285486e-06, |
| "loss": 1.2231, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.07574960547080484, |
| "grad_norm": 2.2442994117736816, |
| "learning_rate": 4.9998131505864064e-06, |
| "loss": 1.2472, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.07627564439768543, |
| "grad_norm": 2.4117157459259033, |
| "learning_rate": 4.999804561030036e-06, |
| "loss": 1.2303, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.07680168332456602, |
| "grad_norm": 2.263303279876709, |
| "learning_rate": 4.999795778460097e-06, |
| "loss": 1.2435, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.0773277222514466, |
| "grad_norm": 2.174962282180786, |
| "learning_rate": 4.99978680287727e-06, |
| "loss": 1.2074, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.0778537611783272, |
| "grad_norm": 2.1498875617980957, |
| "learning_rate": 4.999777634282248e-06, |
| "loss": 1.1665, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.07837980010520779, |
| "grad_norm": 2.0245747566223145, |
| "learning_rate": 4.999768272675737e-06, |
| "loss": 1.169, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.07890583903208838, |
| "grad_norm": 2.03243350982666, |
| "learning_rate": 4.999758718058462e-06, |
| "loss": 1.2113, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.07943187795896896, |
| "grad_norm": 2.104052782058716, |
| "learning_rate": 4.9997489704311586e-06, |
| "loss": 1.1792, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.07995791688584955, |
| "grad_norm": 2.16056752204895, |
| "learning_rate": 4.999739029794581e-06, |
| "loss": 1.2183, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.08048395581273014, |
| "grad_norm": 2.1418581008911133, |
| "learning_rate": 4.9997288961494975e-06, |
| "loss": 1.2024, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.08100999473961074, |
| "grad_norm": 2.235917329788208, |
| "learning_rate": 4.999718569496688e-06, |
| "loss": 1.2234, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.08153603366649131, |
| "grad_norm": 2.0039474964141846, |
| "learning_rate": 4.999708049836952e-06, |
| "loss": 1.1164, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.08206207259337191, |
| "grad_norm": 2.0888242721557617, |
| "learning_rate": 4.9996973371710995e-06, |
| "loss": 1.1935, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.0825881115202525, |
| "grad_norm": 2.245558500289917, |
| "learning_rate": 4.999686431499961e-06, |
| "loss": 1.1438, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.08311415044713309, |
| "grad_norm": 2.351905345916748, |
| "learning_rate": 4.999675332824376e-06, |
| "loss": 1.2208, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.08364018937401367, |
| "grad_norm": 2.0418808460235596, |
| "learning_rate": 4.999664041145201e-06, |
| "loss": 1.1537, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.08416622830089426, |
| "grad_norm": 2.194399118423462, |
| "learning_rate": 4.99965255646331e-06, |
| "loss": 1.1602, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.08469226722777486, |
| "grad_norm": 2.4853098392486572, |
| "learning_rate": 4.999640878779588e-06, |
| "loss": 1.1981, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.08521830615465545, |
| "grad_norm": 2.1702558994293213, |
| "learning_rate": 4.9996290080949386e-06, |
| "loss": 1.1682, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.08574434508153603, |
| "grad_norm": 2.150707960128784, |
| "learning_rate": 4.999616944410276e-06, |
| "loss": 1.2123, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.08627038400841662, |
| "grad_norm": 2.166897773742676, |
| "learning_rate": 4.9996046877265325e-06, |
| "loss": 1.1855, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.08679642293529721, |
| "grad_norm": 2.1538188457489014, |
| "learning_rate": 4.999592238044655e-06, |
| "loss": 1.1797, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.0873224618621778, |
| "grad_norm": 2.222170114517212, |
| "learning_rate": 4.999579595365604e-06, |
| "loss": 1.1606, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.08784850078905838, |
| "grad_norm": 2.264437437057495, |
| "learning_rate": 4.999566759690356e-06, |
| "loss": 1.1662, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.08837453971593898, |
| "grad_norm": 2.2306337356567383, |
| "learning_rate": 4.999553731019903e-06, |
| "loss": 1.1933, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.08890057864281957, |
| "grad_norm": 2.2025609016418457, |
| "learning_rate": 4.9995405093552495e-06, |
| "loss": 1.2241, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.08942661756970016, |
| "grad_norm": 2.3908772468566895, |
| "learning_rate": 4.999527094697418e-06, |
| "loss": 1.1954, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.08995265649658074, |
| "grad_norm": 2.1161653995513916, |
| "learning_rate": 4.999513487047442e-06, |
| "loss": 1.2315, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.09047869542346133, |
| "grad_norm": 2.0984017848968506, |
| "learning_rate": 4.9994996864063735e-06, |
| "loss": 1.2413, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.09100473435034193, |
| "grad_norm": 2.205087900161743, |
| "learning_rate": 4.999485692775279e-06, |
| "loss": 1.2267, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.09153077327722252, |
| "grad_norm": 2.224553108215332, |
| "learning_rate": 4.9994715061552365e-06, |
| "loss": 1.1613, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.0920568122041031, |
| "grad_norm": 2.191676139831543, |
| "learning_rate": 4.999457126547344e-06, |
| "loss": 1.168, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.09258285113098369, |
| "grad_norm": 2.2432751655578613, |
| "learning_rate": 4.99944255395271e-06, |
| "loss": 1.218, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.09310889005786428, |
| "grad_norm": 2.1327083110809326, |
| "learning_rate": 4.999427788372461e-06, |
| "loss": 1.1994, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.09363492898474488, |
| "grad_norm": 2.146256923675537, |
| "learning_rate": 4.999412829807735e-06, |
| "loss": 1.1387, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.09416096791162545, |
| "grad_norm": 2.377356767654419, |
| "learning_rate": 4.999397678259689e-06, |
| "loss": 1.1901, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.09468700683850605, |
| "grad_norm": 2.192535638809204, |
| "learning_rate": 4.999382333729492e-06, |
| "loss": 1.2079, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.09521304576538664, |
| "grad_norm": 2.0958621501922607, |
| "learning_rate": 4.999366796218329e-06, |
| "loss": 1.1663, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.09573908469226723, |
| "grad_norm": 2.1492772102355957, |
| "learning_rate": 4.9993510657274e-06, |
| "loss": 1.1877, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.09626512361914781, |
| "grad_norm": 2.366111993789673, |
| "learning_rate": 4.999335142257919e-06, |
| "loss": 1.1849, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.0967911625460284, |
| "grad_norm": 2.144526243209839, |
| "learning_rate": 4.999319025811116e-06, |
| "loss": 1.1739, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.097317201472909, |
| "grad_norm": 2.3407647609710693, |
| "learning_rate": 4.999302716388234e-06, |
| "loss": 1.1987, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.09784324039978959, |
| "grad_norm": 2.3771328926086426, |
| "learning_rate": 4.999286213990534e-06, |
| "loss": 1.2024, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.09836927932667017, |
| "grad_norm": 2.2484753131866455, |
| "learning_rate": 4.99926951861929e-06, |
| "loss": 1.2087, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.09889531825355076, |
| "grad_norm": 2.276099681854248, |
| "learning_rate": 4.99925263027579e-06, |
| "loss": 1.1696, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.09942135718043135, |
| "grad_norm": 2.1576876640319824, |
| "learning_rate": 4.999235548961338e-06, |
| "loss": 1.1404, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.09994739610731194, |
| "grad_norm": 2.1412558555603027, |
| "learning_rate": 4.999218274677254e-06, |
| "loss": 1.1279, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.10047343503419252, |
| "grad_norm": 2.1507153511047363, |
| "learning_rate": 4.999200807424871e-06, |
| "loss": 1.1841, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.10099947396107312, |
| "grad_norm": 2.236116886138916, |
| "learning_rate": 4.999183147205538e-06, |
| "loss": 1.208, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.10152551288795371, |
| "grad_norm": 2.1643691062927246, |
| "learning_rate": 4.9991652940206185e-06, |
| "loss": 1.1325, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.1020515518148343, |
| "grad_norm": 2.11639142036438, |
| "learning_rate": 4.999147247871491e-06, |
| "loss": 1.2073, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.10257759074171488, |
| "grad_norm": 1.9682193994522095, |
| "learning_rate": 4.9991290087595475e-06, |
| "loss": 1.1447, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.10310362966859547, |
| "grad_norm": 1.9927830696105957, |
| "learning_rate": 4.9991105766861996e-06, |
| "loss": 1.1694, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.10362966859547607, |
| "grad_norm": 2.0124592781066895, |
| "learning_rate": 4.999091951652867e-06, |
| "loss": 1.152, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.10415570752235666, |
| "grad_norm": 2.1793248653411865, |
| "learning_rate": 4.99907313366099e-06, |
| "loss": 1.228, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.10468174644923724, |
| "grad_norm": 2.1615028381347656, |
| "learning_rate": 4.99905412271202e-06, |
| "loss": 1.2106, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.10520778537611783, |
| "grad_norm": 1.9827650785446167, |
| "learning_rate": 4.999034918807425e-06, |
| "loss": 1.1829, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.10573382430299842, |
| "grad_norm": 2.1772680282592773, |
| "learning_rate": 4.999015521948689e-06, |
| "loss": 1.13, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.10625986322987901, |
| "grad_norm": 2.257385492324829, |
| "learning_rate": 4.99899593213731e-06, |
| "loss": 1.2144, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.1067859021567596, |
| "grad_norm": 2.104809045791626, |
| "learning_rate": 4.998976149374799e-06, |
| "loss": 1.1715, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.10731194108364019, |
| "grad_norm": 2.116504430770874, |
| "learning_rate": 4.998956173662683e-06, |
| "loss": 1.1442, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.10783798001052078, |
| "grad_norm": 2.2018845081329346, |
| "learning_rate": 4.998936005002507e-06, |
| "loss": 1.1327, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.10836401893740137, |
| "grad_norm": 2.2733311653137207, |
| "learning_rate": 4.998915643395826e-06, |
| "loss": 1.1821, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.10889005786428195, |
| "grad_norm": 2.0005805492401123, |
| "learning_rate": 4.998895088844212e-06, |
| "loss": 1.0955, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.10941609679116254, |
| "grad_norm": 2.0851638317108154, |
| "learning_rate": 4.998874341349253e-06, |
| "loss": 1.1851, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.10994213571804314, |
| "grad_norm": 2.032989501953125, |
| "learning_rate": 4.998853400912552e-06, |
| "loss": 1.1069, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.11046817464492373, |
| "grad_norm": 2.295994520187378, |
| "learning_rate": 4.9988322675357235e-06, |
| "loss": 1.1511, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.1109942135718043, |
| "grad_norm": 1.9963881969451904, |
| "learning_rate": 4.9988109412204015e-06, |
| "loss": 1.1497, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.1115202524986849, |
| "grad_norm": 2.6223835945129395, |
| "learning_rate": 4.998789421968231e-06, |
| "loss": 1.1692, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.11204629142556549, |
| "grad_norm": 2.1924188137054443, |
| "learning_rate": 4.998767709780873e-06, |
| "loss": 1.1659, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.11257233035244608, |
| "grad_norm": 2.4124836921691895, |
| "learning_rate": 4.998745804660005e-06, |
| "loss": 1.1965, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.11309836927932668, |
| "grad_norm": 2.15348482131958, |
| "learning_rate": 4.99872370660732e-06, |
| "loss": 1.1337, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.11362440820620726, |
| "grad_norm": 2.3462562561035156, |
| "learning_rate": 4.9987014156245215e-06, |
| "loss": 1.1793, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.11415044713308785, |
| "grad_norm": 2.1864969730377197, |
| "learning_rate": 4.998678931713331e-06, |
| "loss": 1.1139, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.11467648605996844, |
| "grad_norm": 2.1411378383636475, |
| "learning_rate": 4.998656254875486e-06, |
| "loss": 1.1582, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.11520252498684903, |
| "grad_norm": 2.2826247215270996, |
| "learning_rate": 4.998633385112737e-06, |
| "loss": 1.1779, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.11572856391372961, |
| "grad_norm": 2.0697169303894043, |
| "learning_rate": 4.998610322426848e-06, |
| "loss": 1.1775, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.1162546028406102, |
| "grad_norm": 2.153381824493408, |
| "learning_rate": 4.998587066819602e-06, |
| "loss": 1.2244, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.1167806417674908, |
| "grad_norm": 2.151595115661621, |
| "learning_rate": 4.998563618292793e-06, |
| "loss": 1.1562, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.11730668069437139, |
| "grad_norm": 2.1102607250213623, |
| "learning_rate": 4.998539976848233e-06, |
| "loss": 1.1326, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.11783271962125197, |
| "grad_norm": 2.3099205493927, |
| "learning_rate": 4.998516142487746e-06, |
| "loss": 1.1934, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.11835875854813256, |
| "grad_norm": 2.0830485820770264, |
| "learning_rate": 4.998492115213173e-06, |
| "loss": 1.105, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.11888479747501315, |
| "grad_norm": 1.965256929397583, |
| "learning_rate": 4.998467895026369e-06, |
| "loss": 1.1496, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.11941083640189375, |
| "grad_norm": 2.060734272003174, |
| "learning_rate": 4.9984434819292036e-06, |
| "loss": 1.1256, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.11993687532877433, |
| "grad_norm": 2.278106927871704, |
| "learning_rate": 4.998418875923563e-06, |
| "loss": 1.1557, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.12046291425565492, |
| "grad_norm": 2.562490463256836, |
| "learning_rate": 4.998394077011346e-06, |
| "loss": 1.1579, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.12098895318253551, |
| "grad_norm": 2.20798921585083, |
| "learning_rate": 4.998369085194468e-06, |
| "loss": 1.181, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.1215149921094161, |
| "grad_norm": 2.3529961109161377, |
| "learning_rate": 4.998343900474858e-06, |
| "loss": 1.1514, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.12204103103629668, |
| "grad_norm": 2.2413651943206787, |
| "learning_rate": 4.998318522854461e-06, |
| "loss": 1.1317, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.12256706996317727, |
| "grad_norm": 2.2179031372070312, |
| "learning_rate": 4.998292952335236e-06, |
| "loss": 1.1784, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.12309310889005787, |
| "grad_norm": 2.2591211795806885, |
| "learning_rate": 4.998267188919158e-06, |
| "loss": 1.1587, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.12361914781693846, |
| "grad_norm": 2.4820573329925537, |
| "learning_rate": 4.998241232608216e-06, |
| "loss": 1.1448, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.12414518674381904, |
| "grad_norm": 2.202066659927368, |
| "learning_rate": 4.998215083404414e-06, |
| "loss": 1.1859, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.12467122567069963, |
| "grad_norm": 2.246918201446533, |
| "learning_rate": 4.9981887413097705e-06, |
| "loss": 1.1778, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.1251972645975802, |
| "grad_norm": 2.166926145553589, |
| "learning_rate": 4.9981622063263205e-06, |
| "loss": 1.16, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.12572330352446082, |
| "grad_norm": 2.2850661277770996, |
| "learning_rate": 4.998135478456112e-06, |
| "loss": 1.1522, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.1262493424513414, |
| "grad_norm": 2.1694653034210205, |
| "learning_rate": 4.9981085577012095e-06, |
| "loss": 1.1394, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.126775381378222, |
| "grad_norm": 2.061791181564331, |
| "learning_rate": 4.998081444063691e-06, |
| "loss": 1.1551, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.12730142030510258, |
| "grad_norm": 2.1517114639282227, |
| "learning_rate": 4.998054137545649e-06, |
| "loss": 1.1487, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.12782745923198316, |
| "grad_norm": 2.118903398513794, |
| "learning_rate": 4.9980266381491935e-06, |
| "loss": 1.1871, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.12835349815886377, |
| "grad_norm": 2.271512508392334, |
| "learning_rate": 4.997998945876448e-06, |
| "loss": 1.21, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.12887953708574434, |
| "grad_norm": 2.199542760848999, |
| "learning_rate": 4.997971060729549e-06, |
| "loss": 1.17, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.12940557601262492, |
| "grad_norm": 2.213566303253174, |
| "learning_rate": 4.997942982710651e-06, |
| "loss": 1.1521, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.12993161493950553, |
| "grad_norm": 2.291456699371338, |
| "learning_rate": 4.997914711821921e-06, |
| "loss": 1.1671, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.1304576538663861, |
| "grad_norm": 2.017871856689453, |
| "learning_rate": 4.997886248065542e-06, |
| "loss": 1.1522, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.13098369279326671, |
| "grad_norm": 2.1125521659851074, |
| "learning_rate": 4.9978575914437115e-06, |
| "loss": 1.1335, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.1315097317201473, |
| "grad_norm": 2.262874126434326, |
| "learning_rate": 4.997828741958643e-06, |
| "loss": 1.1697, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.13203577064702787, |
| "grad_norm": 2.450192451477051, |
| "learning_rate": 4.997799699612563e-06, |
| "loss": 1.1329, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.13256180957390848, |
| "grad_norm": 2.0831351280212402, |
| "learning_rate": 4.997770464407715e-06, |
| "loss": 1.1711, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.13308784850078906, |
| "grad_norm": 2.2078895568847656, |
| "learning_rate": 4.997741036346357e-06, |
| "loss": 1.1998, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.13361388742766964, |
| "grad_norm": 2.175858497619629, |
| "learning_rate": 4.997711415430759e-06, |
| "loss": 1.1083, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.13413992635455024, |
| "grad_norm": 2.203817129135132, |
| "learning_rate": 4.997681601663207e-06, |
| "loss": 1.088, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.13466596528143082, |
| "grad_norm": 2.0065557956695557, |
| "learning_rate": 4.997651595046007e-06, |
| "loss": 1.1584, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.13519200420831143, |
| "grad_norm": 2.299633264541626, |
| "learning_rate": 4.997621395581474e-06, |
| "loss": 1.2102, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.135718043135192, |
| "grad_norm": 2.2972707748413086, |
| "learning_rate": 4.997591003271938e-06, |
| "loss": 1.1821, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.13624408206207259, |
| "grad_norm": 2.399705171585083, |
| "learning_rate": 4.997560418119749e-06, |
| "loss": 1.1325, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.1367701209889532, |
| "grad_norm": 2.2461678981781006, |
| "learning_rate": 4.997529640127266e-06, |
| "loss": 1.2361, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.13729615991583377, |
| "grad_norm": 2.236917495727539, |
| "learning_rate": 4.997498669296865e-06, |
| "loss": 1.1159, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.13782219884271435, |
| "grad_norm": 2.2851338386535645, |
| "learning_rate": 4.99746750563094e-06, |
| "loss": 1.1688, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.13834823776959496, |
| "grad_norm": 2.1499626636505127, |
| "learning_rate": 4.997436149131894e-06, |
| "loss": 1.1478, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.13887427669647553, |
| "grad_norm": 2.0969858169555664, |
| "learning_rate": 4.997404599802151e-06, |
| "loss": 1.1102, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.13940031562335614, |
| "grad_norm": 2.5635933876037598, |
| "learning_rate": 4.997372857644146e-06, |
| "loss": 1.1173, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.13992635455023672, |
| "grad_norm": 2.1076197624206543, |
| "learning_rate": 4.997340922660329e-06, |
| "loss": 1.1321, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.1404523934771173, |
| "grad_norm": 2.179189443588257, |
| "learning_rate": 4.997308794853165e-06, |
| "loss": 1.1325, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.1409784324039979, |
| "grad_norm": 2.0838067531585693, |
| "learning_rate": 4.9972764742251375e-06, |
| "loss": 1.1243, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.14150447133087848, |
| "grad_norm": 2.1462979316711426, |
| "learning_rate": 4.9972439607787405e-06, |
| "loss": 1.1251, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.14203051025775906, |
| "grad_norm": 2.144658088684082, |
| "learning_rate": 4.997211254516484e-06, |
| "loss": 1.1879, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.14255654918463967, |
| "grad_norm": 2.118098020553589, |
| "learning_rate": 4.997178355440892e-06, |
| "loss": 1.1635, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.14308258811152025, |
| "grad_norm": 2.284640312194824, |
| "learning_rate": 4.99714526355451e-06, |
| "loss": 1.1181, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.14360862703840085, |
| "grad_norm": 2.2020652294158936, |
| "learning_rate": 4.997111978859886e-06, |
| "loss": 1.1234, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.14413466596528143, |
| "grad_norm": 2.164998769760132, |
| "learning_rate": 4.997078501359595e-06, |
| "loss": 1.1723, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.144660704892162, |
| "grad_norm": 2.1917877197265625, |
| "learning_rate": 4.9970448310562196e-06, |
| "loss": 1.1222, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.14518674381904262, |
| "grad_norm": 2.314770221710205, |
| "learning_rate": 4.99701096795236e-06, |
| "loss": 1.183, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.1457127827459232, |
| "grad_norm": 2.217176675796509, |
| "learning_rate": 4.996976912050632e-06, |
| "loss": 1.1509, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.14623882167280378, |
| "grad_norm": 2.253232002258301, |
| "learning_rate": 4.996942663353663e-06, |
| "loss": 1.1733, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.14676486059968438, |
| "grad_norm": 2.091414213180542, |
| "learning_rate": 4.996908221864099e-06, |
| "loss": 1.1479, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.14729089952656496, |
| "grad_norm": 2.391035556793213, |
| "learning_rate": 4.996873587584599e-06, |
| "loss": 1.1646, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.14781693845344557, |
| "grad_norm": 1.941179871559143, |
| "learning_rate": 4.996838760517836e-06, |
| "loss": 1.1362, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.14834297738032615, |
| "grad_norm": 2.3869614601135254, |
| "learning_rate": 4.9968037406665e-06, |
| "loss": 1.1455, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.14886901630720673, |
| "grad_norm": 2.2253477573394775, |
| "learning_rate": 4.9967685280332955e-06, |
| "loss": 1.1934, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.14939505523408733, |
| "grad_norm": 2.235481023788452, |
| "learning_rate": 4.99673312262094e-06, |
| "loss": 1.1457, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.1499210941609679, |
| "grad_norm": 2.1756770610809326, |
| "learning_rate": 4.996697524432169e-06, |
| "loss": 1.1874, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.1504471330878485, |
| "grad_norm": 1.9890838861465454, |
| "learning_rate": 4.99666173346973e-06, |
| "loss": 1.1381, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.1509731720147291, |
| "grad_norm": 2.032940149307251, |
| "learning_rate": 4.996625749736386e-06, |
| "loss": 1.1408, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.15149921094160967, |
| "grad_norm": 2.38653564453125, |
| "learning_rate": 4.996589573234915e-06, |
| "loss": 1.1137, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.15202524986849028, |
| "grad_norm": 2.5009000301361084, |
| "learning_rate": 4.9965532039681116e-06, |
| "loss": 1.1404, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.15255128879537086, |
| "grad_norm": 2.113600969314575, |
| "learning_rate": 4.996516641938784e-06, |
| "loss": 1.0764, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.15307732772225144, |
| "grad_norm": 2.2645368576049805, |
| "learning_rate": 4.996479887149754e-06, |
| "loss": 1.1499, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.15360336664913204, |
| "grad_norm": 2.015124559402466, |
| "learning_rate": 4.99644293960386e-06, |
| "loss": 1.0487, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.15412940557601262, |
| "grad_norm": 2.121588706970215, |
| "learning_rate": 4.996405799303955e-06, |
| "loss": 1.1119, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.1546554445028932, |
| "grad_norm": 2.3707003593444824, |
| "learning_rate": 4.996368466252907e-06, |
| "loss": 1.1797, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.1551814834297738, |
| "grad_norm": 2.3027000427246094, |
| "learning_rate": 4.996330940453598e-06, |
| "loss": 1.1228, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.1557075223566544, |
| "grad_norm": 2.0909178256988525, |
| "learning_rate": 4.996293221908925e-06, |
| "loss": 1.0932, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.156233561283535, |
| "grad_norm": 2.362823486328125, |
| "learning_rate": 4.996255310621801e-06, |
| "loss": 1.1507, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.15675960021041557, |
| "grad_norm": 2.080667495727539, |
| "learning_rate": 4.996217206595153e-06, |
| "loss": 1.1158, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.15728563913729615, |
| "grad_norm": 2.0508742332458496, |
| "learning_rate": 4.996178909831922e-06, |
| "loss": 1.1326, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.15781167806417676, |
| "grad_norm": 2.1632707118988037, |
| "learning_rate": 4.996140420335068e-06, |
| "loss": 1.0946, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.15833771699105734, |
| "grad_norm": 1.9084789752960205, |
| "learning_rate": 4.996101738107559e-06, |
| "loss": 1.0939, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.15886375591793792, |
| "grad_norm": 1.9817906618118286, |
| "learning_rate": 4.996062863152385e-06, |
| "loss": 1.1013, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.15938979484481852, |
| "grad_norm": 1.9947365522384644, |
| "learning_rate": 4.9960237954725446e-06, |
| "loss": 1.0635, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.1599158337716991, |
| "grad_norm": 2.0908870697021484, |
| "learning_rate": 4.995984535071056e-06, |
| "loss": 1.0914, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.1604418726985797, |
| "grad_norm": 2.1920530796051025, |
| "learning_rate": 4.995945081950952e-06, |
| "loss": 1.1816, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.16096791162546029, |
| "grad_norm": 2.250007152557373, |
| "learning_rate": 4.995905436115276e-06, |
| "loss": 1.1543, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.16149395055234086, |
| "grad_norm": 2.3157906532287598, |
| "learning_rate": 4.995865597567091e-06, |
| "loss": 1.1349, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.16201998947922147, |
| "grad_norm": 2.816443681716919, |
| "learning_rate": 4.995825566309471e-06, |
| "loss": 1.1154, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.16254602840610205, |
| "grad_norm": 2.3194282054901123, |
| "learning_rate": 4.995785342345509e-06, |
| "loss": 1.1547, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.16307206733298263, |
| "grad_norm": 2.1249098777770996, |
| "learning_rate": 4.99574492567831e-06, |
| "loss": 1.0995, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.16359810625986324, |
| "grad_norm": 2.100315809249878, |
| "learning_rate": 4.995704316310994e-06, |
| "loss": 1.1662, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.16412414518674381, |
| "grad_norm": 2.1664323806762695, |
| "learning_rate": 4.995663514246697e-06, |
| "loss": 1.1466, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.16465018411362442, |
| "grad_norm": 2.217438220977783, |
| "learning_rate": 4.9956225194885704e-06, |
| "loss": 1.1908, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.165176223040505, |
| "grad_norm": 2.3328514099121094, |
| "learning_rate": 4.995581332039778e-06, |
| "loss": 1.0809, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.16570226196738558, |
| "grad_norm": 2.088467836380005, |
| "learning_rate": 4.9955399519035e-06, |
| "loss": 1.0908, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.16622830089426618, |
| "grad_norm": 2.2554612159729004, |
| "learning_rate": 4.995498379082932e-06, |
| "loss": 1.1702, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.16675433982114676, |
| "grad_norm": 2.2798142433166504, |
| "learning_rate": 4.995456613581284e-06, |
| "loss": 1.107, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.16728037874802734, |
| "grad_norm": 2.4394755363464355, |
| "learning_rate": 4.9954146554017816e-06, |
| "loss": 1.0881, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.16780641767490795, |
| "grad_norm": 2.1176295280456543, |
| "learning_rate": 4.995372504547662e-06, |
| "loss": 1.1177, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.16833245660178853, |
| "grad_norm": 2.141923189163208, |
| "learning_rate": 4.995330161022181e-06, |
| "loss": 1.1321, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.16885849552866913, |
| "grad_norm": 2.273068428039551, |
| "learning_rate": 4.9952876248286086e-06, |
| "loss": 1.1832, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.1693845344555497, |
| "grad_norm": 2.267636299133301, |
| "learning_rate": 4.995244895970228e-06, |
| "loss": 1.1058, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.1699105733824303, |
| "grad_norm": 2.133772850036621, |
| "learning_rate": 4.99520197445034e-06, |
| "loss": 1.1478, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.1704366123093109, |
| "grad_norm": 2.2782862186431885, |
| "learning_rate": 4.995158860272257e-06, |
| "loss": 1.1074, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.17096265123619148, |
| "grad_norm": 2.544316053390503, |
| "learning_rate": 4.995115553439308e-06, |
| "loss": 1.0583, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.17148869016307206, |
| "grad_norm": 2.2900187969207764, |
| "learning_rate": 4.995072053954838e-06, |
| "loss": 1.1933, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.17201472908995266, |
| "grad_norm": 2.190380811691284, |
| "learning_rate": 4.995028361822206e-06, |
| "loss": 1.135, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.17254076801683324, |
| "grad_norm": 2.4495794773101807, |
| "learning_rate": 4.9949844770447834e-06, |
| "loss": 1.1214, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.17306680694371385, |
| "grad_norm": 2.332644462585449, |
| "learning_rate": 4.994940399625959e-06, |
| "loss": 1.1017, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.17359284587059443, |
| "grad_norm": 2.0709457397460938, |
| "learning_rate": 4.994896129569138e-06, |
| "loss": 1.1073, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.174118884797475, |
| "grad_norm": 2.8817923069000244, |
| "learning_rate": 4.994851666877736e-06, |
| "loss": 1.0758, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.1746449237243556, |
| "grad_norm": 2.2557790279388428, |
| "learning_rate": 4.994807011555189e-06, |
| "loss": 1.173, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.1751709626512362, |
| "grad_norm": 2.2412662506103516, |
| "learning_rate": 4.994762163604942e-06, |
| "loss": 1.1357, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.17569700157811677, |
| "grad_norm": 2.1749277114868164, |
| "learning_rate": 4.9947171230304595e-06, |
| "loss": 1.0988, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.17622304050499737, |
| "grad_norm": 2.4530062675476074, |
| "learning_rate": 4.994671889835218e-06, |
| "loss": 1.1377, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.17674907943187795, |
| "grad_norm": 2.2602410316467285, |
| "learning_rate": 4.994626464022711e-06, |
| "loss": 1.0799, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.17727511835875856, |
| "grad_norm": 2.0797061920166016, |
| "learning_rate": 4.994580845596446e-06, |
| "loss": 1.1214, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.17780115728563914, |
| "grad_norm": 2.1437630653381348, |
| "learning_rate": 4.994535034559945e-06, |
| "loss": 1.1794, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.17832719621251972, |
| "grad_norm": 2.0809285640716553, |
| "learning_rate": 4.994489030916745e-06, |
| "loss": 1.1331, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.17885323513940032, |
| "grad_norm": 2.31193208694458, |
| "learning_rate": 4.994442834670397e-06, |
| "loss": 1.1425, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.1793792740662809, |
| "grad_norm": 2.0348451137542725, |
| "learning_rate": 4.99439644582447e-06, |
| "loss": 1.1149, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.17990531299316148, |
| "grad_norm": 2.2816810607910156, |
| "learning_rate": 4.994349864382544e-06, |
| "loss": 1.1509, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.1804313519200421, |
| "grad_norm": 2.08492374420166, |
| "learning_rate": 4.994303090348217e-06, |
| "loss": 1.0854, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.18095739084692267, |
| "grad_norm": 2.0389866828918457, |
| "learning_rate": 4.994256123725098e-06, |
| "loss": 1.1195, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.18148342977380327, |
| "grad_norm": 2.2040510177612305, |
| "learning_rate": 4.9942089645168175e-06, |
| "loss": 1.1112, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.18200946870068385, |
| "grad_norm": 2.058849811553955, |
| "learning_rate": 4.994161612727013e-06, |
| "loss": 1.1462, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.18253550762756443, |
| "grad_norm": 2.2940948009490967, |
| "learning_rate": 4.994114068359343e-06, |
| "loss": 1.2183, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.18306154655444504, |
| "grad_norm": 2.0303874015808105, |
| "learning_rate": 4.9940663314174756e-06, |
| "loss": 1.1136, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.18358758548132562, |
| "grad_norm": 2.208289861679077, |
| "learning_rate": 4.9940184019051e-06, |
| "loss": 1.1507, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.1841136244082062, |
| "grad_norm": 2.438228130340576, |
| "learning_rate": 4.993970279825915e-06, |
| "loss": 1.1619, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.1846396633350868, |
| "grad_norm": 2.1701645851135254, |
| "learning_rate": 4.993921965183636e-06, |
| "loss": 1.1057, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.18516570226196738, |
| "grad_norm": 2.345054864883423, |
| "learning_rate": 4.9938734579819944e-06, |
| "loss": 1.1758, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.185691741188848, |
| "grad_norm": 2.3761768341064453, |
| "learning_rate": 4.9938247582247345e-06, |
| "loss": 1.1093, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.18621778011572857, |
| "grad_norm": 2.2209126949310303, |
| "learning_rate": 4.993775865915618e-06, |
| "loss": 1.0882, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.18674381904260914, |
| "grad_norm": 2.093406915664673, |
| "learning_rate": 4.993726781058419e-06, |
| "loss": 1.1621, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.18726985796948975, |
| "grad_norm": 2.509725332260132, |
| "learning_rate": 4.993677503656927e-06, |
| "loss": 1.1411, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.18779589689637033, |
| "grad_norm": 2.2245242595672607, |
| "learning_rate": 4.993628033714947e-06, |
| "loss": 1.1042, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.1883219358232509, |
| "grad_norm": 1.838408350944519, |
| "learning_rate": 4.9935783712363e-06, |
| "loss": 1.0204, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.18884797475013151, |
| "grad_norm": 2.0559537410736084, |
| "learning_rate": 4.993528516224818e-06, |
| "loss": 1.0681, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.1893740136770121, |
| "grad_norm": 2.084890604019165, |
| "learning_rate": 4.993478468684352e-06, |
| "loss": 1.1149, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.1899000526038927, |
| "grad_norm": 2.179478168487549, |
| "learning_rate": 4.993428228618767e-06, |
| "loss": 1.1342, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.19042609153077328, |
| "grad_norm": 2.082578182220459, |
| "learning_rate": 4.99337779603194e-06, |
| "loss": 1.1293, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.19095213045765386, |
| "grad_norm": 2.031831979751587, |
| "learning_rate": 4.993327170927766e-06, |
| "loss": 1.0728, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.19147816938453446, |
| "grad_norm": 2.1939597129821777, |
| "learning_rate": 4.993276353310155e-06, |
| "loss": 1.1252, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.19200420831141504, |
| "grad_norm": 2.031350612640381, |
| "learning_rate": 4.9932253431830295e-06, |
| "loss": 1.1039, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.19253024723829562, |
| "grad_norm": 2.3367671966552734, |
| "learning_rate": 4.993174140550327e-06, |
| "loss": 1.1211, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.19305628616517623, |
| "grad_norm": 2.2768945693969727, |
| "learning_rate": 4.993122745416003e-06, |
| "loss": 1.1119, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.1935823250920568, |
| "grad_norm": 2.220766544342041, |
| "learning_rate": 4.993071157784025e-06, |
| "loss": 1.1451, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.1941083640189374, |
| "grad_norm": 2.3694369792938232, |
| "learning_rate": 4.993019377658376e-06, |
| "loss": 1.1156, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.194634402945818, |
| "grad_norm": 2.245237350463867, |
| "learning_rate": 4.9929674050430535e-06, |
| "loss": 1.1316, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.19516044187269857, |
| "grad_norm": 2.720625400543213, |
| "learning_rate": 4.992915239942071e-06, |
| "loss": 1.1092, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.19568648079957918, |
| "grad_norm": 2.115727424621582, |
| "learning_rate": 4.992862882359457e-06, |
| "loss": 1.1769, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.19621251972645976, |
| "grad_norm": 2.235677480697632, |
| "learning_rate": 4.992810332299253e-06, |
| "loss": 1.1786, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.19673855865334033, |
| "grad_norm": 2.539433002471924, |
| "learning_rate": 4.992757589765516e-06, |
| "loss": 1.1251, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.19726459758022094, |
| "grad_norm": 5.042508602142334, |
| "learning_rate": 4.99270465476232e-06, |
| "loss": 1.0706, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.19779063650710152, |
| "grad_norm": 2.1171703338623047, |
| "learning_rate": 4.9926515272937516e-06, |
| "loss": 1.1287, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.19831667543398213, |
| "grad_norm": 2.4587223529815674, |
| "learning_rate": 4.992598207363912e-06, |
| "loss": 1.053, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.1988427143608627, |
| "grad_norm": 2.1502695083618164, |
| "learning_rate": 4.9925446949769184e-06, |
| "loss": 1.0837, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.19936875328774328, |
| "grad_norm": 2.139822483062744, |
| "learning_rate": 4.992490990136903e-06, |
| "loss": 1.1358, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.1998947922146239, |
| "grad_norm": 2.4914610385894775, |
| "learning_rate": 4.992437092848012e-06, |
| "loss": 1.1053, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.20042083114150447, |
| "grad_norm": 2.24576735496521, |
| "learning_rate": 4.992383003114408e-06, |
| "loss": 1.1034, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.20094687006838505, |
| "grad_norm": 2.1979477405548096, |
| "learning_rate": 4.992328720940266e-06, |
| "loss": 1.0839, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.20147290899526565, |
| "grad_norm": 2.1680850982666016, |
| "learning_rate": 4.992274246329778e-06, |
| "loss": 1.1011, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.20199894792214623, |
| "grad_norm": 2.3214027881622314, |
| "learning_rate": 4.9922195792871495e-06, |
| "loss": 1.03, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.20252498684902684, |
| "grad_norm": 2.162393808364868, |
| "learning_rate": 4.9921647198166014e-06, |
| "loss": 1.0466, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.20305102577590742, |
| "grad_norm": 2.184163808822632, |
| "learning_rate": 4.99210966792237e-06, |
| "loss": 1.1379, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.203577064702788, |
| "grad_norm": 2.3308913707733154, |
| "learning_rate": 4.992054423608706e-06, |
| "loss": 1.1751, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.2041031036296686, |
| "grad_norm": 2.123298168182373, |
| "learning_rate": 4.991998986879874e-06, |
| "loss": 1.1079, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.20462914255654918, |
| "grad_norm": 2.229844331741333, |
| "learning_rate": 4.991943357740155e-06, |
| "loss": 1.1242, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.20515518148342976, |
| "grad_norm": 2.1815683841705322, |
| "learning_rate": 4.991887536193845e-06, |
| "loss": 1.0949, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.20568122041031037, |
| "grad_norm": 2.4636261463165283, |
| "learning_rate": 4.991831522245253e-06, |
| "loss": 1.1118, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.20620725933719095, |
| "grad_norm": 2.0095014572143555, |
| "learning_rate": 4.991775315898703e-06, |
| "loss": 1.0197, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.20673329826407155, |
| "grad_norm": 2.1244406700134277, |
| "learning_rate": 4.991718917158538e-06, |
| "loss": 1.1081, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.20725933719095213, |
| "grad_norm": 1.9773920774459839, |
| "learning_rate": 4.991662326029109e-06, |
| "loss": 1.0657, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.2077853761178327, |
| "grad_norm": 2.204554796218872, |
| "learning_rate": 4.9916055425147874e-06, |
| "loss": 1.1434, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.20831141504471332, |
| "grad_norm": 2.068147659301758, |
| "learning_rate": 4.991548566619957e-06, |
| "loss": 1.1281, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.2088374539715939, |
| "grad_norm": 2.1518101692199707, |
| "learning_rate": 4.991491398349017e-06, |
| "loss": 1.0977, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.20936349289847447, |
| "grad_norm": 2.091654062271118, |
| "learning_rate": 4.991434037706382e-06, |
| "loss": 1.1033, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.20988953182535508, |
| "grad_norm": 2.8754067420959473, |
| "learning_rate": 4.9913764846964805e-06, |
| "loss": 1.1237, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.21041557075223566, |
| "grad_norm": 2.2165675163269043, |
| "learning_rate": 4.991318739323757e-06, |
| "loss": 1.1298, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.21094160967911627, |
| "grad_norm": 2.1219065189361572, |
| "learning_rate": 4.991260801592668e-06, |
| "loss": 1.0795, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.21146764860599684, |
| "grad_norm": 2.132737159729004, |
| "learning_rate": 4.9912026715076885e-06, |
| "loss": 1.0546, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.21199368753287742, |
| "grad_norm": 2.228076457977295, |
| "learning_rate": 4.9911443490733075e-06, |
| "loss": 1.1759, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.21251972645975803, |
| "grad_norm": 2.1305177211761475, |
| "learning_rate": 4.991085834294027e-06, |
| "loss": 1.0865, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.2130457653866386, |
| "grad_norm": 2.1550936698913574, |
| "learning_rate": 4.991027127174365e-06, |
| "loss": 1.1027, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.2135718043135192, |
| "grad_norm": 2.3489346504211426, |
| "learning_rate": 4.990968227718854e-06, |
| "loss": 1.184, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.2140978432403998, |
| "grad_norm": 2.2208189964294434, |
| "learning_rate": 4.9909091359320434e-06, |
| "loss": 1.1476, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.21462388216728037, |
| "grad_norm": 2.230978012084961, |
| "learning_rate": 4.990849851818494e-06, |
| "loss": 1.1125, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.21514992109416098, |
| "grad_norm": 2.294647216796875, |
| "learning_rate": 4.990790375382784e-06, |
| "loss": 1.1526, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.21567596002104156, |
| "grad_norm": 2.160446882247925, |
| "learning_rate": 4.990730706629507e-06, |
| "loss": 1.1569, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.21620199894792214, |
| "grad_norm": 2.1352434158325195, |
| "learning_rate": 4.990670845563268e-06, |
| "loss": 1.049, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.21672803787480274, |
| "grad_norm": 2.0740866661071777, |
| "learning_rate": 4.99061079218869e-06, |
| "loss": 1.104, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.21725407680168332, |
| "grad_norm": 2.302877426147461, |
| "learning_rate": 4.990550546510408e-06, |
| "loss": 1.0942, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.2177801157285639, |
| "grad_norm": 2.270836353302002, |
| "learning_rate": 4.990490108533076e-06, |
| "loss": 1.107, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.2183061546554445, |
| "grad_norm": 2.05703067779541, |
| "learning_rate": 4.99042947826136e-06, |
| "loss": 1.1284, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.21883219358232509, |
| "grad_norm": 2.3524155616760254, |
| "learning_rate": 4.990368655699941e-06, |
| "loss": 1.068, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.2193582325092057, |
| "grad_norm": 2.5300350189208984, |
| "learning_rate": 4.9903076408535145e-06, |
| "loss": 1.0993, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.21988427143608627, |
| "grad_norm": 2.1858162879943848, |
| "learning_rate": 4.990246433726793e-06, |
| "loss": 1.1398, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.22041031036296685, |
| "grad_norm": 1.9856489896774292, |
| "learning_rate": 4.990185034324501e-06, |
| "loss": 1.0671, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.22093634928984746, |
| "grad_norm": 2.177152156829834, |
| "learning_rate": 4.99012344265138e-06, |
| "loss": 1.1673, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.22146238821672803, |
| "grad_norm": 2.128787040710449, |
| "learning_rate": 4.990061658712186e-06, |
| "loss": 1.1629, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.2219884271436086, |
| "grad_norm": 2.1840457916259766, |
| "learning_rate": 4.989999682511688e-06, |
| "loss": 1.0739, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.22251446607048922, |
| "grad_norm": 2.37825608253479, |
| "learning_rate": 4.989937514054673e-06, |
| "loss": 1.1179, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.2230405049973698, |
| "grad_norm": 2.2746498584747314, |
| "learning_rate": 4.98987515334594e-06, |
| "loss": 1.1117, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.2235665439242504, |
| "grad_norm": 2.441087007522583, |
| "learning_rate": 4.989812600390304e-06, |
| "loss": 1.134, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.22409258285113098, |
| "grad_norm": 1.9548932313919067, |
| "learning_rate": 4.989749855192596e-06, |
| "loss": 1.0962, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.22461862177801156, |
| "grad_norm": 2.382025957107544, |
| "learning_rate": 4.989686917757659e-06, |
| "loss": 1.1233, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.22514466070489217, |
| "grad_norm": 2.1739771366119385, |
| "learning_rate": 4.989623788090353e-06, |
| "loss": 1.0665, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.22567069963177275, |
| "grad_norm": 2.3246262073516846, |
| "learning_rate": 4.989560466195553e-06, |
| "loss": 1.0834, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.22619673855865335, |
| "grad_norm": 2.1649882793426514, |
| "learning_rate": 4.9894969520781475e-06, |
| "loss": 1.1144, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.22672277748553393, |
| "grad_norm": 2.307199001312256, |
| "learning_rate": 4.98943324574304e-06, |
| "loss": 1.2195, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.2272488164124145, |
| "grad_norm": 2.2414958477020264, |
| "learning_rate": 4.989369347195151e-06, |
| "loss": 1.0549, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.22777485533929512, |
| "grad_norm": 2.12762713432312, |
| "learning_rate": 4.989305256439413e-06, |
| "loss": 1.1185, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.2283008942661757, |
| "grad_norm": 2.1503520011901855, |
| "learning_rate": 4.989240973480774e-06, |
| "loss": 1.1294, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.22882693319305628, |
| "grad_norm": 2.1283833980560303, |
| "learning_rate": 4.9891764983242e-06, |
| "loss": 1.1154, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.22935297211993688, |
| "grad_norm": 2.239828109741211, |
| "learning_rate": 4.9891118309746666e-06, |
| "loss": 1.073, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.22987901104681746, |
| "grad_norm": 2.396672248840332, |
| "learning_rate": 4.989046971437167e-06, |
| "loss": 1.0916, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.23040504997369807, |
| "grad_norm": 2.1172304153442383, |
| "learning_rate": 4.98898191971671e-06, |
| "loss": 1.1001, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.23093108890057865, |
| "grad_norm": 2.1714346408843994, |
| "learning_rate": 4.98891667581832e-06, |
| "loss": 1.1672, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.23145712782745922, |
| "grad_norm": 2.058523178100586, |
| "learning_rate": 4.98885123974703e-06, |
| "loss": 1.0842, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.23198316675433983, |
| "grad_norm": 2.4147160053253174, |
| "learning_rate": 4.988785611507896e-06, |
| "loss": 1.0755, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.2325092056812204, |
| "grad_norm": 2.274296283721924, |
| "learning_rate": 4.988719791105985e-06, |
| "loss": 1.1141, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.233035244608101, |
| "grad_norm": 2.178182363510132, |
| "learning_rate": 4.988653778546379e-06, |
| "loss": 1.212, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.2335612835349816, |
| "grad_norm": 2.200793743133545, |
| "learning_rate": 4.988587573834173e-06, |
| "loss": 1.0992, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.23408732246186217, |
| "grad_norm": 1.9726881980895996, |
| "learning_rate": 4.98852117697448e-06, |
| "loss": 1.1165, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.23461336138874278, |
| "grad_norm": 2.1173300743103027, |
| "learning_rate": 4.988454587972428e-06, |
| "loss": 1.1162, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.23513940031562336, |
| "grad_norm": 2.1428768634796143, |
| "learning_rate": 4.9883878068331556e-06, |
| "loss": 1.1343, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.23566543924250394, |
| "grad_norm": 2.00190806388855, |
| "learning_rate": 4.988320833561822e-06, |
| "loss": 1.0873, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.23619147816938454, |
| "grad_norm": 2.2472777366638184, |
| "learning_rate": 4.988253668163596e-06, |
| "loss": 1.1209, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.23671751709626512, |
| "grad_norm": 2.0522475242614746, |
| "learning_rate": 4.988186310643666e-06, |
| "loss": 1.0912, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.2372435560231457, |
| "grad_norm": 2.1521215438842773, |
| "learning_rate": 4.98811876100723e-06, |
| "loss": 1.0971, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.2377695949500263, |
| "grad_norm": 2.1117734909057617, |
| "learning_rate": 4.988051019259505e-06, |
| "loss": 1.1247, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.2382956338769069, |
| "grad_norm": 2.1884706020355225, |
| "learning_rate": 4.987983085405722e-06, |
| "loss": 1.1255, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.2388216728037875, |
| "grad_norm": 2.138962984085083, |
| "learning_rate": 4.9879149594511245e-06, |
| "loss": 1.0787, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.23934771173066807, |
| "grad_norm": 2.553452730178833, |
| "learning_rate": 4.987846641400974e-06, |
| "loss": 1.1178, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.23987375065754865, |
| "grad_norm": 2.5340464115142822, |
| "learning_rate": 4.987778131260546e-06, |
| "loss": 1.1577, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.24039978958442926, |
| "grad_norm": 2.2375919818878174, |
| "learning_rate": 4.987709429035128e-06, |
| "loss": 1.0711, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.24092582851130984, |
| "grad_norm": 2.35756254196167, |
| "learning_rate": 4.987640534730027e-06, |
| "loss": 1.1031, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.24145186743819042, |
| "grad_norm": 2.03385591506958, |
| "learning_rate": 4.987571448350561e-06, |
| "loss": 1.0869, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.24197790636507102, |
| "grad_norm": 2.662584066390991, |
| "learning_rate": 4.987502169902065e-06, |
| "loss": 1.0909, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.2425039452919516, |
| "grad_norm": 2.2569165229797363, |
| "learning_rate": 4.987432699389888e-06, |
| "loss": 1.1576, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.2430299842188322, |
| "grad_norm": 1.9718097448349, |
| "learning_rate": 4.987363036819393e-06, |
| "loss": 1.0577, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.24355602314571279, |
| "grad_norm": 2.2083537578582764, |
| "learning_rate": 4.987293182195959e-06, |
| "loss": 1.1328, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.24408206207259336, |
| "grad_norm": 2.2045726776123047, |
| "learning_rate": 4.987223135524981e-06, |
| "loss": 1.0908, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.24460810099947397, |
| "grad_norm": 2.213714122772217, |
| "learning_rate": 4.987152896811866e-06, |
| "loss": 1.124, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.24513413992635455, |
| "grad_norm": 4.030746936798096, |
| "learning_rate": 4.987082466062038e-06, |
| "loss": 1.0855, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.24566017885323513, |
| "grad_norm": 2.1142022609710693, |
| "learning_rate": 4.987011843280934e-06, |
| "loss": 1.1305, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.24618621778011573, |
| "grad_norm": 2.1746232509613037, |
| "learning_rate": 4.986941028474009e-06, |
| "loss": 1.0846, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.2467122567069963, |
| "grad_norm": 2.038947820663452, |
| "learning_rate": 4.986870021646728e-06, |
| "loss": 1.0907, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.24723829563387692, |
| "grad_norm": 12.261099815368652, |
| "learning_rate": 4.986798822804576e-06, |
| "loss": 1.1012, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.2477643345607575, |
| "grad_norm": 2.020077705383301, |
| "learning_rate": 4.986727431953048e-06, |
| "loss": 1.097, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.24829037348763808, |
| "grad_norm": 2.070114850997925, |
| "learning_rate": 4.986655849097658e-06, |
| "loss": 1.175, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.24881641241451868, |
| "grad_norm": 2.0364394187927246, |
| "learning_rate": 4.986584074243932e-06, |
| "loss": 1.0892, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.24934245134139926, |
| "grad_norm": 2.1961004734039307, |
| "learning_rate": 4.986512107397413e-06, |
| "loss": 1.0867, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.24986849026827984, |
| "grad_norm": 3.1488072872161865, |
| "learning_rate": 4.986439948563656e-06, |
| "loss": 1.0276, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.2503945291951604, |
| "grad_norm": 2.3070068359375, |
| "learning_rate": 4.986367597748235e-06, |
| "loss": 1.0897, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.25092056812204105, |
| "grad_norm": 2.0328757762908936, |
| "learning_rate": 4.986295054956733e-06, |
| "loss": 1.0573, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.25144660704892163, |
| "grad_norm": 2.4608747959136963, |
| "learning_rate": 4.986222320194754e-06, |
| "loss": 1.1343, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.2519726459758022, |
| "grad_norm": 2.249994993209839, |
| "learning_rate": 4.986149393467913e-06, |
| "loss": 1.0771, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.2524986849026828, |
| "grad_norm": 2.1573803424835205, |
| "learning_rate": 4.98607627478184e-06, |
| "loss": 1.0795, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.25302472382956337, |
| "grad_norm": 2.6239383220672607, |
| "learning_rate": 4.986002964142182e-06, |
| "loss": 1.0874, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.253550762756444, |
| "grad_norm": 2.0815794467926025, |
| "learning_rate": 4.985929461554597e-06, |
| "loss": 1.0729, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.2540768016833246, |
| "grad_norm": 2.156259059906006, |
| "learning_rate": 4.985855767024763e-06, |
| "loss": 1.0912, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.25460284061020516, |
| "grad_norm": 2.4136252403259277, |
| "learning_rate": 4.985781880558369e-06, |
| "loss": 1.1365, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.25512887953708574, |
| "grad_norm": 2.265622854232788, |
| "learning_rate": 4.98570780216112e-06, |
| "loss": 1.1218, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.2556549184639663, |
| "grad_norm": 2.1097841262817383, |
| "learning_rate": 4.985633531838735e-06, |
| "loss": 1.1238, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.2561809573908469, |
| "grad_norm": 2.205012083053589, |
| "learning_rate": 4.985559069596949e-06, |
| "loss": 1.0664, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.25670699631772753, |
| "grad_norm": 2.1896169185638428, |
| "learning_rate": 4.9854844154415115e-06, |
| "loss": 1.0374, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.2572330352446081, |
| "grad_norm": 2.0652949810028076, |
| "learning_rate": 4.985409569378187e-06, |
| "loss": 1.1016, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.2577590741714887, |
| "grad_norm": 2.1278676986694336, |
| "learning_rate": 4.985334531412754e-06, |
| "loss": 1.147, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.25828511309836927, |
| "grad_norm": 2.2769057750701904, |
| "learning_rate": 4.985259301551005e-06, |
| "loss": 1.1389, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.25881115202524985, |
| "grad_norm": 2.0440104007720947, |
| "learning_rate": 4.985183879798751e-06, |
| "loss": 1.0826, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.2593371909521305, |
| "grad_norm": 2.4153213500976562, |
| "learning_rate": 4.985108266161815e-06, |
| "loss": 1.105, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.25986322987901106, |
| "grad_norm": 2.3863043785095215, |
| "learning_rate": 4.985032460646033e-06, |
| "loss": 1.1023, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.26038926880589164, |
| "grad_norm": 2.2597336769104004, |
| "learning_rate": 4.98495646325726e-06, |
| "loss": 1.1046, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.2609153077327722, |
| "grad_norm": 2.541444778442383, |
| "learning_rate": 4.984880274001364e-06, |
| "loss": 1.1149, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.2614413466596528, |
| "grad_norm": 2.3011064529418945, |
| "learning_rate": 4.984803892884227e-06, |
| "loss": 1.0757, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.26196738558653343, |
| "grad_norm": 2.116774797439575, |
| "learning_rate": 4.9847273199117475e-06, |
| "loss": 1.1151, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.262493424513414, |
| "grad_norm": 2.2372357845306396, |
| "learning_rate": 4.984650555089836e-06, |
| "loss": 1.1107, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.2630194634402946, |
| "grad_norm": 2.0782155990600586, |
| "learning_rate": 4.984573598424421e-06, |
| "loss": 1.1174, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.26354550236717517, |
| "grad_norm": 2.0625476837158203, |
| "learning_rate": 4.984496449921444e-06, |
| "loss": 1.0965, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.26407154129405574, |
| "grad_norm": 2.142184019088745, |
| "learning_rate": 4.9844191095868615e-06, |
| "loss": 1.0678, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.2645975802209363, |
| "grad_norm": 2.1218082904815674, |
| "learning_rate": 4.984341577426646e-06, |
| "loss": 1.0661, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.26512361914781696, |
| "grad_norm": 2.2910757064819336, |
| "learning_rate": 4.984263853446783e-06, |
| "loss": 1.1111, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.26564965807469754, |
| "grad_norm": 2.0604546070098877, |
| "learning_rate": 4.984185937653274e-06, |
| "loss": 1.0614, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.2661756970015781, |
| "grad_norm": 2.1210556030273438, |
| "learning_rate": 4.984107830052134e-06, |
| "loss": 1.0925, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.2667017359284587, |
| "grad_norm": 2.535501003265381, |
| "learning_rate": 4.984029530649396e-06, |
| "loss": 1.1238, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.2672277748553393, |
| "grad_norm": 2.2978546619415283, |
| "learning_rate": 4.9839510394511035e-06, |
| "loss": 1.1615, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.2677538137822199, |
| "grad_norm": 2.0443382263183594, |
| "learning_rate": 4.983872356463318e-06, |
| "loss": 1.1087, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.2682798527091005, |
| "grad_norm": 2.216139316558838, |
| "learning_rate": 4.983793481692114e-06, |
| "loss": 1.1431, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.26880589163598106, |
| "grad_norm": 1.9255571365356445, |
| "learning_rate": 4.983714415143583e-06, |
| "loss": 1.0204, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.26933193056286164, |
| "grad_norm": 2.103969097137451, |
| "learning_rate": 4.9836351568238286e-06, |
| "loss": 1.0855, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.2698579694897422, |
| "grad_norm": 2.5458972454071045, |
| "learning_rate": 4.98355570673897e-06, |
| "loss": 1.0747, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.27038400841662286, |
| "grad_norm": 2.023601531982422, |
| "learning_rate": 4.983476064895143e-06, |
| "loss": 1.0471, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.27091004734350344, |
| "grad_norm": 2.0976908206939697, |
| "learning_rate": 4.983396231298496e-06, |
| "loss": 1.0658, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.271436086270384, |
| "grad_norm": 2.4051074981689453, |
| "learning_rate": 4.9833162059551936e-06, |
| "loss": 1.0624, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.2719621251972646, |
| "grad_norm": 2.0524230003356934, |
| "learning_rate": 4.983235988871414e-06, |
| "loss": 1.1261, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.27248816412414517, |
| "grad_norm": 2.1440162658691406, |
| "learning_rate": 4.983155580053351e-06, |
| "loss": 0.9893, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.27301420305102575, |
| "grad_norm": 2.1923670768737793, |
| "learning_rate": 4.983074979507213e-06, |
| "loss": 1.1066, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.2735402419779064, |
| "grad_norm": 2.2967565059661865, |
| "learning_rate": 4.982994187239225e-06, |
| "loss": 1.1256, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.27406628090478696, |
| "grad_norm": 2.0392587184906006, |
| "learning_rate": 4.982913203255623e-06, |
| "loss": 1.1026, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.27459231983166754, |
| "grad_norm": 2.371121644973755, |
| "learning_rate": 4.9828320275626605e-06, |
| "loss": 1.0607, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.2751183587585481, |
| "grad_norm": 2.082239866256714, |
| "learning_rate": 4.982750660166606e-06, |
| "loss": 1.0749, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.2756443976854287, |
| "grad_norm": 2.2039687633514404, |
| "learning_rate": 4.98266910107374e-06, |
| "loss": 1.0769, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.27617043661230933, |
| "grad_norm": 2.087859869003296, |
| "learning_rate": 4.9825873502903625e-06, |
| "loss": 1.1575, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.2766964755391899, |
| "grad_norm": 2.1991021633148193, |
| "learning_rate": 4.982505407822783e-06, |
| "loss": 1.1149, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.2772225144660705, |
| "grad_norm": 2.2656140327453613, |
| "learning_rate": 4.98242327367733e-06, |
| "loss": 1.0948, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.27774855339295107, |
| "grad_norm": 2.1107430458068848, |
| "learning_rate": 4.982340947860344e-06, |
| "loss": 1.0289, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.27827459231983165, |
| "grad_norm": 2.2510344982147217, |
| "learning_rate": 4.982258430378184e-06, |
| "loss": 1.0694, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.2788006312467123, |
| "grad_norm": 2.252258062362671, |
| "learning_rate": 4.982175721237218e-06, |
| "loss": 1.0435, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.27932667017359286, |
| "grad_norm": 2.12455677986145, |
| "learning_rate": 4.982092820443834e-06, |
| "loss": 1.0202, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.27985270910047344, |
| "grad_norm": 2.3654651641845703, |
| "learning_rate": 4.982009728004433e-06, |
| "loss": 1.1282, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.280378748027354, |
| "grad_norm": 2.3759138584136963, |
| "learning_rate": 4.981926443925431e-06, |
| "loss": 1.1557, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.2809047869542346, |
| "grad_norm": 1.9874821901321411, |
| "learning_rate": 4.981842968213256e-06, |
| "loss": 1.0723, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.2814308258811152, |
| "grad_norm": 2.154383897781372, |
| "learning_rate": 4.981759300874356e-06, |
| "loss": 1.0786, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.2819568648079958, |
| "grad_norm": 2.1774797439575195, |
| "learning_rate": 4.9816754419151906e-06, |
| "loss": 1.0457, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.2824829037348764, |
| "grad_norm": 2.206082820892334, |
| "learning_rate": 4.981591391342233e-06, |
| "loss": 1.0216, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.28300894266175697, |
| "grad_norm": 2.008676528930664, |
| "learning_rate": 4.981507149161975e-06, |
| "loss": 1.0297, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.28353498158863755, |
| "grad_norm": 2.0553462505340576, |
| "learning_rate": 4.981422715380919e-06, |
| "loss": 1.0967, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.2840610205155181, |
| "grad_norm": 2.047567844390869, |
| "learning_rate": 4.981338090005586e-06, |
| "loss": 1.0524, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.28458705944239876, |
| "grad_norm": 2.2144312858581543, |
| "learning_rate": 4.981253273042509e-06, |
| "loss": 1.1178, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.28511309836927934, |
| "grad_norm": 2.388124465942383, |
| "learning_rate": 4.981168264498238e-06, |
| "loss": 1.0728, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.2856391372961599, |
| "grad_norm": 2.152280807495117, |
| "learning_rate": 4.981083064379335e-06, |
| "loss": 1.1146, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.2861651762230405, |
| "grad_norm": 2.1481564044952393, |
| "learning_rate": 4.98099767269238e-06, |
| "loss": 1.1376, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.2866912151499211, |
| "grad_norm": 2.060664415359497, |
| "learning_rate": 4.980912089443966e-06, |
| "loss": 1.0961, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.2872172540768017, |
| "grad_norm": 2.032557964324951, |
| "learning_rate": 4.9808263146406985e-06, |
| "loss": 1.1055, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.2877432930036823, |
| "grad_norm": 2.0957093238830566, |
| "learning_rate": 4.980740348289204e-06, |
| "loss": 1.0444, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.28826933193056287, |
| "grad_norm": 2.0774853229522705, |
| "learning_rate": 4.980654190396118e-06, |
| "loss": 1.0963, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.28879537085744345, |
| "grad_norm": 2.0808207988739014, |
| "learning_rate": 4.980567840968094e-06, |
| "loss": 1.0634, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.289321409784324, |
| "grad_norm": 2.2924559116363525, |
| "learning_rate": 4.980481300011797e-06, |
| "loss": 1.0805, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.2898474487112046, |
| "grad_norm": 2.041088104248047, |
| "learning_rate": 4.980394567533911e-06, |
| "loss": 1.0983, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.29037348763808524, |
| "grad_norm": 2.030073881149292, |
| "learning_rate": 4.980307643541132e-06, |
| "loss": 1.1334, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.2908995265649658, |
| "grad_norm": 2.15849232673645, |
| "learning_rate": 4.980220528040172e-06, |
| "loss": 1.0906, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.2914255654918464, |
| "grad_norm": 2.094135284423828, |
| "learning_rate": 4.9801332210377574e-06, |
| "loss": 1.0644, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.291951604418727, |
| "grad_norm": 2.193941354751587, |
| "learning_rate": 4.980045722540628e-06, |
| "loss": 1.0819, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.29247764334560755, |
| "grad_norm": 2.2015504837036133, |
| "learning_rate": 4.979958032555542e-06, |
| "loss": 1.0759, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.2930036822724882, |
| "grad_norm": 2.1240222454071045, |
| "learning_rate": 4.979870151089267e-06, |
| "loss": 1.1268, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.29352972119936876, |
| "grad_norm": 2.0243959426879883, |
| "learning_rate": 4.9797820781485905e-06, |
| "loss": 1.0449, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.29405576012624934, |
| "grad_norm": 2.2300705909729004, |
| "learning_rate": 4.979693813740313e-06, |
| "loss": 1.0493, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.2945817990531299, |
| "grad_norm": 2.1185836791992188, |
| "learning_rate": 4.979605357871249e-06, |
| "loss": 1.0921, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.2951078379800105, |
| "grad_norm": 2.091691732406616, |
| "learning_rate": 4.979516710548227e-06, |
| "loss": 1.1025, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.29563387690689114, |
| "grad_norm": 2.1666178703308105, |
| "learning_rate": 4.979427871778094e-06, |
| "loss": 1.1245, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.2961599158337717, |
| "grad_norm": 2.6985056400299072, |
| "learning_rate": 4.9793388415677066e-06, |
| "loss": 1.1398, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.2966859547606523, |
| "grad_norm": 2.118074655532837, |
| "learning_rate": 4.979249619923942e-06, |
| "loss": 1.0897, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.29721199368753287, |
| "grad_norm": 2.246856927871704, |
| "learning_rate": 4.979160206853687e-06, |
| "loss": 1.0714, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.29773803261441345, |
| "grad_norm": 2.201953887939453, |
| "learning_rate": 4.979070602363846e-06, |
| "loss": 1.1466, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.29826407154129403, |
| "grad_norm": 2.048617362976074, |
| "learning_rate": 4.9789808064613375e-06, |
| "loss": 1.1368, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.29879011046817466, |
| "grad_norm": 2.1507785320281982, |
| "learning_rate": 4.978890819153095e-06, |
| "loss": 1.1499, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.29931614939505524, |
| "grad_norm": 1.9633440971374512, |
| "learning_rate": 4.978800640446066e-06, |
| "loss": 1.0667, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.2998421883219358, |
| "grad_norm": 2.1089606285095215, |
| "learning_rate": 4.978710270347214e-06, |
| "loss": 1.0611, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.3003682272488164, |
| "grad_norm": 2.170901298522949, |
| "learning_rate": 4.9786197088635145e-06, |
| "loss": 1.1524, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.300894266175697, |
| "grad_norm": 2.165510892868042, |
| "learning_rate": 4.978528956001964e-06, |
| "loss": 1.0987, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.3014203051025776, |
| "grad_norm": 2.0415878295898438, |
| "learning_rate": 4.978438011769565e-06, |
| "loss": 1.1582, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.3019463440294582, |
| "grad_norm": 2.110260248184204, |
| "learning_rate": 4.978346876173342e-06, |
| "loss": 1.0587, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.30247238295633877, |
| "grad_norm": 2.253488063812256, |
| "learning_rate": 4.9782555492203334e-06, |
| "loss": 1.1038, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.30299842188321935, |
| "grad_norm": 2.0166091918945312, |
| "learning_rate": 4.978164030917587e-06, |
| "loss": 1.0367, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.3035244608100999, |
| "grad_norm": 2.2842600345611572, |
| "learning_rate": 4.978072321272171e-06, |
| "loss": 1.0996, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.30405049973698056, |
| "grad_norm": 2.0563907623291016, |
| "learning_rate": 4.977980420291166e-06, |
| "loss": 1.1219, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.30457653866386114, |
| "grad_norm": 2.059800863265991, |
| "learning_rate": 4.977888327981668e-06, |
| "loss": 1.1193, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.3051025775907417, |
| "grad_norm": 2.242919921875, |
| "learning_rate": 4.977796044350788e-06, |
| "loss": 1.0701, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.3056286165176223, |
| "grad_norm": 1.9749282598495483, |
| "learning_rate": 4.977703569405651e-06, |
| "loss": 1.0771, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.3061546554445029, |
| "grad_norm": 2.2251386642456055, |
| "learning_rate": 4.977610903153397e-06, |
| "loss": 1.084, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.30668069437138346, |
| "grad_norm": 2.0289855003356934, |
| "learning_rate": 4.97751804560118e-06, |
| "loss": 1.0732, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.3072067332982641, |
| "grad_norm": 2.152841806411743, |
| "learning_rate": 4.977424996756171e-06, |
| "loss": 1.0712, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.30773277222514467, |
| "grad_norm": 2.3243937492370605, |
| "learning_rate": 4.977331756625555e-06, |
| "loss": 1.0197, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.30825881115202525, |
| "grad_norm": 2.293274402618408, |
| "learning_rate": 4.97723832521653e-06, |
| "loss": 1.1121, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.3087848500789058, |
| "grad_norm": 2.139958143234253, |
| "learning_rate": 4.97714470253631e-06, |
| "loss": 1.0799, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.3093108890057864, |
| "grad_norm": 2.269357442855835, |
| "learning_rate": 4.977050888592123e-06, |
| "loss": 1.0872, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.30983692793266704, |
| "grad_norm": 2.268691301345825, |
| "learning_rate": 4.976956883391215e-06, |
| "loss": 1.1079, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.3103629668595476, |
| "grad_norm": 2.127131223678589, |
| "learning_rate": 4.976862686940842e-06, |
| "loss": 1.1217, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.3108890057864282, |
| "grad_norm": 2.0126006603240967, |
| "learning_rate": 4.976768299248278e-06, |
| "loss": 1.0719, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.3114150447133088, |
| "grad_norm": 1.965903639793396, |
| "learning_rate": 4.97667372032081e-06, |
| "loss": 1.0843, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.31194108364018935, |
| "grad_norm": 2.1280322074890137, |
| "learning_rate": 4.976578950165742e-06, |
| "loss": 1.0676, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.31246712256707, |
| "grad_norm": 2.2355756759643555, |
| "learning_rate": 4.976483988790391e-06, |
| "loss": 1.0855, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.31299316149395057, |
| "grad_norm": 2.153095245361328, |
| "learning_rate": 4.976388836202088e-06, |
| "loss": 1.0357, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.31351920042083115, |
| "grad_norm": 2.023137092590332, |
| "learning_rate": 4.97629349240818e-06, |
| "loss": 1.0381, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.3140452393477117, |
| "grad_norm": 2.2524759769439697, |
| "learning_rate": 4.97619795741603e-06, |
| "loss": 1.0911, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.3145712782745923, |
| "grad_norm": 2.1904008388519287, |
| "learning_rate": 4.9761022312330135e-06, |
| "loss": 1.047, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.3150973172014729, |
| "grad_norm": 2.3166565895080566, |
| "learning_rate": 4.976006313866521e-06, |
| "loss": 1.0663, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.3156233561283535, |
| "grad_norm": 2.11413836479187, |
| "learning_rate": 4.975910205323959e-06, |
| "loss": 1.0843, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.3161493950552341, |
| "grad_norm": 2.1609344482421875, |
| "learning_rate": 4.975813905612749e-06, |
| "loss": 1.1344, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.3166754339821147, |
| "grad_norm": 2.055330276489258, |
| "learning_rate": 4.975717414740326e-06, |
| "loss": 1.0663, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.31720147290899525, |
| "grad_norm": 2.2735755443573, |
| "learning_rate": 4.975620732714139e-06, |
| "loss": 1.1061, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.31772751183587583, |
| "grad_norm": 2.1966300010681152, |
| "learning_rate": 4.975523859541654e-06, |
| "loss": 1.1498, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.31825355076275647, |
| "grad_norm": 2.20951247215271, |
| "learning_rate": 4.975426795230351e-06, |
| "loss": 1.1057, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.31877958968963704, |
| "grad_norm": 2.0706050395965576, |
| "learning_rate": 4.975329539787725e-06, |
| "loss": 1.0906, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.3193056286165176, |
| "grad_norm": 2.0394089221954346, |
| "learning_rate": 4.975232093221284e-06, |
| "loss": 1.0514, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.3198316675433982, |
| "grad_norm": 2.1639111042022705, |
| "learning_rate": 4.975134455538551e-06, |
| "loss": 1.0787, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.3203577064702788, |
| "grad_norm": 2.025575876235962, |
| "learning_rate": 4.975036626747067e-06, |
| "loss": 1.0451, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.3208837453971594, |
| "grad_norm": 2.060215950012207, |
| "learning_rate": 4.974938606854384e-06, |
| "loss": 1.0821, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.32140978432404, |
| "grad_norm": 2.265155792236328, |
| "learning_rate": 4.974840395868073e-06, |
| "loss": 1.1341, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.32193582325092057, |
| "grad_norm": 2.22503924369812, |
| "learning_rate": 4.974741993795712e-06, |
| "loss": 1.1643, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.32246186217780115, |
| "grad_norm": 2.11155104637146, |
| "learning_rate": 4.9746434006449034e-06, |
| "loss": 1.0548, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.32298790110468173, |
| "grad_norm": 2.0055696964263916, |
| "learning_rate": 4.974544616423258e-06, |
| "loss": 1.0769, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.3235139400315623, |
| "grad_norm": 2.0843770503997803, |
| "learning_rate": 4.974445641138403e-06, |
| "loss": 1.0701, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.32403997895844294, |
| "grad_norm": 2.0580337047576904, |
| "learning_rate": 4.9743464747979785e-06, |
| "loss": 1.0465, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.3245660178853235, |
| "grad_norm": 2.3719844818115234, |
| "learning_rate": 4.974247117409645e-06, |
| "loss": 1.1498, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.3250920568122041, |
| "grad_norm": 1.9926241636276245, |
| "learning_rate": 4.974147568981072e-06, |
| "loss": 1.081, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.3256180957390847, |
| "grad_norm": 2.029318332672119, |
| "learning_rate": 4.974047829519946e-06, |
| "loss": 1.139, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.32614413466596526, |
| "grad_norm": 2.0171804428100586, |
| "learning_rate": 4.973947899033969e-06, |
| "loss": 1.0887, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.3266701735928459, |
| "grad_norm": 2.3209071159362793, |
| "learning_rate": 4.973847777530854e-06, |
| "loss": 1.1156, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.32719621251972647, |
| "grad_norm": 2.360849142074585, |
| "learning_rate": 4.973747465018334e-06, |
| "loss": 1.1305, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.32772225144660705, |
| "grad_norm": 2.1828086376190186, |
| "learning_rate": 4.973646961504154e-06, |
| "loss": 1.091, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.32824829037348763, |
| "grad_norm": 1.9628446102142334, |
| "learning_rate": 4.973546266996074e-06, |
| "loss": 1.0932, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.3287743293003682, |
| "grad_norm": 2.0040283203125, |
| "learning_rate": 4.973445381501868e-06, |
| "loss": 1.0723, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.32930036822724884, |
| "grad_norm": 2.289292097091675, |
| "learning_rate": 4.973344305029326e-06, |
| "loss": 1.1526, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.3298264071541294, |
| "grad_norm": 2.1106910705566406, |
| "learning_rate": 4.973243037586252e-06, |
| "loss": 1.1327, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.33035244608101, |
| "grad_norm": 2.326677083969116, |
| "learning_rate": 4.9731415791804655e-06, |
| "loss": 1.0898, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.3308784850078906, |
| "grad_norm": 2.086299180984497, |
| "learning_rate": 4.9730399298198e-06, |
| "loss": 1.0842, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.33140452393477116, |
| "grad_norm": 2.045738935470581, |
| "learning_rate": 4.972938089512104e-06, |
| "loss": 1.0156, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.33193056286165173, |
| "grad_norm": 2.038058280944824, |
| "learning_rate": 4.97283605826524e-06, |
| "loss": 1.0545, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.33245660178853237, |
| "grad_norm": 2.0892717838287354, |
| "learning_rate": 4.972733836087088e-06, |
| "loss": 1.099, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.33298264071541295, |
| "grad_norm": 2.2152934074401855, |
| "learning_rate": 4.972631422985538e-06, |
| "loss": 1.0775, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.3335086796422935, |
| "grad_norm": 2.3605494499206543, |
| "learning_rate": 4.9725288189685e-06, |
| "loss": 1.0682, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.3340347185691741, |
| "grad_norm": 2.076491117477417, |
| "learning_rate": 4.9724260240438945e-06, |
| "loss": 1.063, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.3345607574960547, |
| "grad_norm": 3.2677767276763916, |
| "learning_rate": 4.97232303821966e-06, |
| "loss": 1.1173, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.3350867964229353, |
| "grad_norm": 2.110320568084717, |
| "learning_rate": 4.972219861503746e-06, |
| "loss": 1.0264, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.3356128353498159, |
| "grad_norm": 2.101353406906128, |
| "learning_rate": 4.972116493904121e-06, |
| "loss": 1.0806, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.3361388742766965, |
| "grad_norm": 2.247091293334961, |
| "learning_rate": 4.972012935428765e-06, |
| "loss": 1.1178, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.33666491320357705, |
| "grad_norm": 2.183757781982422, |
| "learning_rate": 4.971909186085675e-06, |
| "loss": 1.0615, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.33719095213045763, |
| "grad_norm": 2.0801236629486084, |
| "learning_rate": 4.97180524588286e-06, |
| "loss": 1.0441, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.33771699105733827, |
| "grad_norm": 1.9939873218536377, |
| "learning_rate": 4.9717011148283455e-06, |
| "loss": 1.0853, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.33824302998421885, |
| "grad_norm": 2.13399338722229, |
| "learning_rate": 4.971596792930174e-06, |
| "loss": 0.9943, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.3387690689110994, |
| "grad_norm": 2.1221766471862793, |
| "learning_rate": 4.971492280196397e-06, |
| "loss": 1.0088, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.33929510783798, |
| "grad_norm": 2.023320436477661, |
| "learning_rate": 4.971387576635087e-06, |
| "loss": 1.0449, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.3398211467648606, |
| "grad_norm": 2.1422126293182373, |
| "learning_rate": 4.971282682254327e-06, |
| "loss": 1.0987, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.3403471856917412, |
| "grad_norm": 2.136868715286255, |
| "learning_rate": 4.971177597062215e-06, |
| "loss": 1.0983, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.3408732246186218, |
| "grad_norm": 2.1036930084228516, |
| "learning_rate": 4.971072321066868e-06, |
| "loss": 1.1284, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.3413992635455024, |
| "grad_norm": 2.147191286087036, |
| "learning_rate": 4.970966854276411e-06, |
| "loss": 1.1165, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.34192530247238295, |
| "grad_norm": 2.1734893321990967, |
| "learning_rate": 4.970861196698988e-06, |
| "loss": 1.0834, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.34245134139926353, |
| "grad_norm": 2.038435459136963, |
| "learning_rate": 4.97075534834276e-06, |
| "loss": 1.0193, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.3429773803261441, |
| "grad_norm": 2.077822208404541, |
| "learning_rate": 4.970649309215895e-06, |
| "loss": 1.0697, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.34350341925302474, |
| "grad_norm": 2.056907892227173, |
| "learning_rate": 4.970543079326584e-06, |
| "loss": 1.0593, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.3440294581799053, |
| "grad_norm": 2.7795369625091553, |
| "learning_rate": 4.9704366586830275e-06, |
| "loss": 1.122, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.3445554971067859, |
| "grad_norm": 2.0807559490203857, |
| "learning_rate": 4.970330047293443e-06, |
| "loss": 1.0225, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.3450815360336665, |
| "grad_norm": 2.219024658203125, |
| "learning_rate": 4.970223245166062e-06, |
| "loss": 1.1506, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.34560757496054706, |
| "grad_norm": 2.1809475421905518, |
| "learning_rate": 4.970116252309131e-06, |
| "loss": 1.1094, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.3461336138874277, |
| "grad_norm": 2.243777275085449, |
| "learning_rate": 4.970009068730911e-06, |
| "loss": 1.0942, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.3466596528143083, |
| "grad_norm": 2.106391191482544, |
| "learning_rate": 4.969901694439677e-06, |
| "loss": 1.0899, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.34718569174118885, |
| "grad_norm": 2.1109979152679443, |
| "learning_rate": 4.96979412944372e-06, |
| "loss": 1.0622, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.34771173066806943, |
| "grad_norm": 2.292466163635254, |
| "learning_rate": 4.969686373751347e-06, |
| "loss": 1.1081, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.34823776959495, |
| "grad_norm": 1.9919096231460571, |
| "learning_rate": 4.9695784273708755e-06, |
| "loss": 1.0774, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.34876380852183064, |
| "grad_norm": 2.2421789169311523, |
| "learning_rate": 4.969470290310641e-06, |
| "loss": 1.0958, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.3492898474487112, |
| "grad_norm": 2.069939613342285, |
| "learning_rate": 4.969361962578994e-06, |
| "loss": 1.0758, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.3498158863755918, |
| "grad_norm": 2.0892951488494873, |
| "learning_rate": 4.969253444184297e-06, |
| "loss": 1.105, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.3503419253024724, |
| "grad_norm": 2.1536753177642822, |
| "learning_rate": 4.969144735134929e-06, |
| "loss": 1.0655, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.35086796422935296, |
| "grad_norm": 2.031996250152588, |
| "learning_rate": 4.969035835439284e-06, |
| "loss": 1.1107, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.35139400315623354, |
| "grad_norm": 2.068693161010742, |
| "learning_rate": 4.9689267451057714e-06, |
| "loss": 1.0293, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.35192004208311417, |
| "grad_norm": 2.1489906311035156, |
| "learning_rate": 4.9688174641428136e-06, |
| "loss": 1.0656, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.35244608100999475, |
| "grad_norm": 2.5132720470428467, |
| "learning_rate": 4.9687079925588475e-06, |
| "loss": 1.0558, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.35297211993687533, |
| "grad_norm": 1.9639642238616943, |
| "learning_rate": 4.968598330362326e-06, |
| "loss": 1.0498, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.3534981588637559, |
| "grad_norm": 2.2413175106048584, |
| "learning_rate": 4.968488477561716e-06, |
| "loss": 0.986, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.3540241977906365, |
| "grad_norm": 2.0109381675720215, |
| "learning_rate": 4.968378434165501e-06, |
| "loss": 1.1112, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.3545502367175171, |
| "grad_norm": 2.1863934993743896, |
| "learning_rate": 4.968268200182175e-06, |
| "loss": 1.0843, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.3550762756443977, |
| "grad_norm": 2.262173652648926, |
| "learning_rate": 4.968157775620252e-06, |
| "loss": 1.0938, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.3556023145712783, |
| "grad_norm": 2.261918067932129, |
| "learning_rate": 4.968047160488256e-06, |
| "loss": 1.1004, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.35612835349815886, |
| "grad_norm": 2.13324236869812, |
| "learning_rate": 4.967936354794728e-06, |
| "loss": 1.0881, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.35665439242503943, |
| "grad_norm": 2.271207809448242, |
| "learning_rate": 4.967825358548225e-06, |
| "loss": 1.0967, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.35718043135192007, |
| "grad_norm": 2.177339553833008, |
| "learning_rate": 4.967714171757315e-06, |
| "loss": 1.1131, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.35770647027880065, |
| "grad_norm": 2.1329848766326904, |
| "learning_rate": 4.967602794430585e-06, |
| "loss": 1.112, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.3582325092056812, |
| "grad_norm": 2.0018250942230225, |
| "learning_rate": 4.967491226576634e-06, |
| "loss": 1.0853, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.3587585481325618, |
| "grad_norm": 2.06925106048584, |
| "learning_rate": 4.967379468204075e-06, |
| "loss": 1.1405, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.3592845870594424, |
| "grad_norm": 2.0437614917755127, |
| "learning_rate": 4.967267519321538e-06, |
| "loss": 1.1165, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.35981062598632296, |
| "grad_norm": 2.043297290802002, |
| "learning_rate": 4.9671553799376685e-06, |
| "loss": 1.0438, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.3603366649132036, |
| "grad_norm": 2.060760259628296, |
| "learning_rate": 4.967043050061121e-06, |
| "loss": 1.0401, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.3608627038400842, |
| "grad_norm": 2.3929009437561035, |
| "learning_rate": 4.966930529700572e-06, |
| "loss": 1.0812, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.36138874276696475, |
| "grad_norm": 2.2057461738586426, |
| "learning_rate": 4.966817818864708e-06, |
| "loss": 1.0499, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.36191478169384533, |
| "grad_norm": 2.0358550548553467, |
| "learning_rate": 4.966704917562231e-06, |
| "loss": 1.1603, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.3624408206207259, |
| "grad_norm": 2.0840682983398438, |
| "learning_rate": 4.966591825801859e-06, |
| "loss": 1.0967, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.36296685954760655, |
| "grad_norm": 2.0170061588287354, |
| "learning_rate": 4.9664785435923255e-06, |
| "loss": 1.0573, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.3634928984744871, |
| "grad_norm": 2.1349408626556396, |
| "learning_rate": 4.966365070942375e-06, |
| "loss": 1.0665, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.3640189374013677, |
| "grad_norm": 2.1616368293762207, |
| "learning_rate": 4.966251407860769e-06, |
| "loss": 1.0306, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.3645449763282483, |
| "grad_norm": 2.2529335021972656, |
| "learning_rate": 4.966137554356285e-06, |
| "loss": 1.0445, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.36507101525512886, |
| "grad_norm": 2.041102170944214, |
| "learning_rate": 4.966023510437713e-06, |
| "loss": 1.0395, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.3655970541820095, |
| "grad_norm": 2.0450620651245117, |
| "learning_rate": 4.9659092761138585e-06, |
| "loss": 1.064, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.3661230931088901, |
| "grad_norm": 2.163081407546997, |
| "learning_rate": 4.965794851393541e-06, |
| "loss": 1.0729, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.36664913203577065, |
| "grad_norm": 2.1602089405059814, |
| "learning_rate": 4.965680236285596e-06, |
| "loss": 1.0707, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.36717517096265123, |
| "grad_norm": 2.3263938426971436, |
| "learning_rate": 4.965565430798875e-06, |
| "loss": 1.0146, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.3677012098895318, |
| "grad_norm": 2.0192365646362305, |
| "learning_rate": 4.965450434942238e-06, |
| "loss": 1.0751, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.3682272488164124, |
| "grad_norm": 2.0557174682617188, |
| "learning_rate": 4.965335248724568e-06, |
| "loss": 1.0749, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.368753287743293, |
| "grad_norm": 2.29679799079895, |
| "learning_rate": 4.965219872154757e-06, |
| "loss": 1.0516, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.3692793266701736, |
| "grad_norm": 2.2303829193115234, |
| "learning_rate": 4.965104305241713e-06, |
| "loss": 1.1586, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.3698053655970542, |
| "grad_norm": 2.112283706665039, |
| "learning_rate": 4.964988547994361e-06, |
| "loss": 1.0833, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.37033140452393476, |
| "grad_norm": 2.1807613372802734, |
| "learning_rate": 4.9648726004216354e-06, |
| "loss": 1.0786, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.37085744345081534, |
| "grad_norm": 2.0990889072418213, |
| "learning_rate": 4.964756462532492e-06, |
| "loss": 1.0555, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.371383482377696, |
| "grad_norm": 2.2034318447113037, |
| "learning_rate": 4.964640134335896e-06, |
| "loss": 1.0696, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.37190952130457655, |
| "grad_norm": 2.207235813140869, |
| "learning_rate": 4.964523615840831e-06, |
| "loss": 1.0897, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.37243556023145713, |
| "grad_norm": 1.8820483684539795, |
| "learning_rate": 4.964406907056291e-06, |
| "loss": 1.0822, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.3729615991583377, |
| "grad_norm": 2.2243785858154297, |
| "learning_rate": 4.964290007991291e-06, |
| "loss": 1.0958, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.3734876380852183, |
| "grad_norm": 2.208770990371704, |
| "learning_rate": 4.964172918654854e-06, |
| "loss": 1.0803, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.3740136770120989, |
| "grad_norm": 2.1083521842956543, |
| "learning_rate": 4.96405563905602e-06, |
| "loss": 1.0513, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.3745397159389795, |
| "grad_norm": 2.0161774158477783, |
| "learning_rate": 4.963938169203847e-06, |
| "loss": 1.0775, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.3750657548658601, |
| "grad_norm": 2.1578962802886963, |
| "learning_rate": 4.963820509107403e-06, |
| "loss": 1.0695, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.37559179379274066, |
| "grad_norm": 2.1972339153289795, |
| "learning_rate": 4.963702658775774e-06, |
| "loss": 1.0703, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.37611783271962124, |
| "grad_norm": 2.338205575942993, |
| "learning_rate": 4.9635846182180594e-06, |
| "loss": 1.0756, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.3766438716465018, |
| "grad_norm": 2.281242847442627, |
| "learning_rate": 4.963466387443372e-06, |
| "loss": 1.1177, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.37716991057338245, |
| "grad_norm": 2.092036724090576, |
| "learning_rate": 4.963347966460841e-06, |
| "loss": 1.1004, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.37769594950026303, |
| "grad_norm": 2.148244857788086, |
| "learning_rate": 4.963229355279611e-06, |
| "loss": 1.1157, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.3782219884271436, |
| "grad_norm": 1.9961777925491333, |
| "learning_rate": 4.963110553908838e-06, |
| "loss": 1.0703, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.3787480273540242, |
| "grad_norm": 2.299091339111328, |
| "learning_rate": 4.962991562357697e-06, |
| "loss": 1.1265, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.37927406628090476, |
| "grad_norm": 2.1055006980895996, |
| "learning_rate": 4.962872380635374e-06, |
| "loss": 1.0361, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.3798001052077854, |
| "grad_norm": 2.1554667949676514, |
| "learning_rate": 4.9627530087510725e-06, |
| "loss": 1.0603, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.380326144134666, |
| "grad_norm": 2.1003949642181396, |
| "learning_rate": 4.962633446714009e-06, |
| "loss": 1.0714, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.38085218306154656, |
| "grad_norm": 2.1850736141204834, |
| "learning_rate": 4.962513694533414e-06, |
| "loss": 1.0795, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.38137822198842714, |
| "grad_norm": 2.0440175533294678, |
| "learning_rate": 4.962393752218535e-06, |
| "loss": 1.0882, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.3819042609153077, |
| "grad_norm": 2.2579755783081055, |
| "learning_rate": 4.962273619778632e-06, |
| "loss": 1.1066, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.38243029984218835, |
| "grad_norm": 2.0210318565368652, |
| "learning_rate": 4.962153297222981e-06, |
| "loss": 1.0843, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.3829563387690689, |
| "grad_norm": 2.1218135356903076, |
| "learning_rate": 4.962032784560873e-06, |
| "loss": 1.1039, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.3834823776959495, |
| "grad_norm": 2.2498831748962402, |
| "learning_rate": 4.961912081801612e-06, |
| "loss": 1.0389, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.3840084166228301, |
| "grad_norm": 2.6789276599884033, |
| "learning_rate": 4.9617911889545175e-06, |
| "loss": 1.0772, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.38453445554971066, |
| "grad_norm": 1.9847339391708374, |
| "learning_rate": 4.961670106028924e-06, |
| "loss": 1.0804, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.38506049447659124, |
| "grad_norm": 2.048737049102783, |
| "learning_rate": 4.9615488330341814e-06, |
| "loss": 1.1089, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.3855865334034719, |
| "grad_norm": 2.2241313457489014, |
| "learning_rate": 4.961427369979652e-06, |
| "loss": 1.0618, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.38611257233035245, |
| "grad_norm": 1.9084025621414185, |
| "learning_rate": 4.961305716874716e-06, |
| "loss": 1.0316, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.38663861125723303, |
| "grad_norm": 2.0064773559570312, |
| "learning_rate": 4.9611838737287646e-06, |
| "loss": 1.0289, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.3871646501841136, |
| "grad_norm": 2.386962652206421, |
| "learning_rate": 4.961061840551205e-06, |
| "loss": 1.1488, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.3876906891109942, |
| "grad_norm": 2.0626862049102783, |
| "learning_rate": 4.960939617351462e-06, |
| "loss": 1.0793, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.3882167280378748, |
| "grad_norm": 2.1622767448425293, |
| "learning_rate": 4.960817204138971e-06, |
| "loss": 1.0923, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.3887427669647554, |
| "grad_norm": 2.049163818359375, |
| "learning_rate": 4.9606946009231834e-06, |
| "loss": 1.0423, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.389268805891636, |
| "grad_norm": 2.0196399688720703, |
| "learning_rate": 4.960571807713568e-06, |
| "loss": 0.9832, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.38979484481851656, |
| "grad_norm": 1.982647180557251, |
| "learning_rate": 4.960448824519602e-06, |
| "loss": 1.0424, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.39032088374539714, |
| "grad_norm": 2.0468926429748535, |
| "learning_rate": 4.960325651350784e-06, |
| "loss": 1.074, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.3908469226722778, |
| "grad_norm": 2.402381181716919, |
| "learning_rate": 4.960202288216624e-06, |
| "loss": 1.058, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.39137296159915835, |
| "grad_norm": 2.065232753753662, |
| "learning_rate": 4.960078735126646e-06, |
| "loss": 1.0985, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.39189900052603893, |
| "grad_norm": 2.1949756145477295, |
| "learning_rate": 4.95995499209039e-06, |
| "loss": 1.0791, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.3924250394529195, |
| "grad_norm": 2.121232271194458, |
| "learning_rate": 4.959831059117411e-06, |
| "loss": 1.0606, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.3929510783798001, |
| "grad_norm": 2.247145652770996, |
| "learning_rate": 4.959706936217278e-06, |
| "loss": 1.0991, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.39347711730668067, |
| "grad_norm": 2.0540339946746826, |
| "learning_rate": 4.9595826233995735e-06, |
| "loss": 1.0835, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.3940031562335613, |
| "grad_norm": 2.173257350921631, |
| "learning_rate": 4.959458120673898e-06, |
| "loss": 1.0588, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.3945291951604419, |
| "grad_norm": 2.1530778408050537, |
| "learning_rate": 4.959333428049862e-06, |
| "loss": 1.0395, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.39505523408732246, |
| "grad_norm": 2.0705490112304688, |
| "learning_rate": 4.959208545537095e-06, |
| "loss": 1.071, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.39558127301420304, |
| "grad_norm": 1.9439338445663452, |
| "learning_rate": 4.95908347314524e-06, |
| "loss": 1.0224, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.3961073119410836, |
| "grad_norm": 2.1683454513549805, |
| "learning_rate": 4.958958210883952e-06, |
| "loss": 1.0745, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.39663335086796425, |
| "grad_norm": 2.2809042930603027, |
| "learning_rate": 4.958832758762903e-06, |
| "loss": 1.0887, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.39715938979484483, |
| "grad_norm": 2.161447048187256, |
| "learning_rate": 4.9587071167917814e-06, |
| "loss": 1.1447, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.3976854287217254, |
| "grad_norm": 2.1375932693481445, |
| "learning_rate": 4.958581284980285e-06, |
| "loss": 1.0295, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.398211467648606, |
| "grad_norm": 2.0431041717529297, |
| "learning_rate": 4.958455263338133e-06, |
| "loss": 1.0567, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.39873750657548657, |
| "grad_norm": 2.0288238525390625, |
| "learning_rate": 4.958329051875053e-06, |
| "loss": 1.0736, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.3992635455023672, |
| "grad_norm": 2.146132230758667, |
| "learning_rate": 4.958202650600791e-06, |
| "loss": 1.0744, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.3997895844292478, |
| "grad_norm": 2.1740963459014893, |
| "learning_rate": 4.958076059525107e-06, |
| "loss": 1.0263, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.40031562335612836, |
| "grad_norm": 2.1219875812530518, |
| "learning_rate": 4.957949278657773e-06, |
| "loss": 1.0508, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.40084166228300894, |
| "grad_norm": 2.0742340087890625, |
| "learning_rate": 4.9578223080085815e-06, |
| "loss": 1.0455, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.4013677012098895, |
| "grad_norm": 2.1779415607452393, |
| "learning_rate": 4.957695147587334e-06, |
| "loss": 1.1079, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.4018937401367701, |
| "grad_norm": 2.151047706604004, |
| "learning_rate": 4.957567797403848e-06, |
| "loss": 1.0893, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.40241977906365073, |
| "grad_norm": 2.1728570461273193, |
| "learning_rate": 4.9574402574679594e-06, |
| "loss": 1.0726, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.4029458179905313, |
| "grad_norm": 1.982230305671692, |
| "learning_rate": 4.957312527789512e-06, |
| "loss": 1.0629, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.4034718569174119, |
| "grad_norm": 1.953464150428772, |
| "learning_rate": 4.95718460837837e-06, |
| "loss": 1.1093, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.40399789584429247, |
| "grad_norm": 1.9718215465545654, |
| "learning_rate": 4.9570564992444116e-06, |
| "loss": 1.1018, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.40452393477117304, |
| "grad_norm": 2.067629337310791, |
| "learning_rate": 4.956928200397526e-06, |
| "loss": 1.0364, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.4050499736980537, |
| "grad_norm": 2.1172022819519043, |
| "learning_rate": 4.956799711847619e-06, |
| "loss": 1.0693, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.40557601262493426, |
| "grad_norm": 2.0539615154266357, |
| "learning_rate": 4.956671033604613e-06, |
| "loss": 1.0034, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.40610205155181484, |
| "grad_norm": 1.9780375957489014, |
| "learning_rate": 4.956542165678443e-06, |
| "loss": 1.0515, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.4066280904786954, |
| "grad_norm": 2.0974819660186768, |
| "learning_rate": 4.95641310807906e-06, |
| "loss": 1.0754, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.407154129405576, |
| "grad_norm": 2.1018221378326416, |
| "learning_rate": 4.956283860816427e-06, |
| "loss": 1.1102, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.4076801683324566, |
| "grad_norm": 2.3969085216522217, |
| "learning_rate": 4.9561544239005235e-06, |
| "loss": 1.0455, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.4082062072593372, |
| "grad_norm": 2.2645649909973145, |
| "learning_rate": 4.956024797341345e-06, |
| "loss": 0.9724, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.4087322461862178, |
| "grad_norm": 2.3406150341033936, |
| "learning_rate": 4.955894981148898e-06, |
| "loss": 1.1341, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.40925828511309836, |
| "grad_norm": 2.0782880783081055, |
| "learning_rate": 4.955764975333208e-06, |
| "loss": 1.0116, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.40978432403997894, |
| "grad_norm": 2.1269314289093018, |
| "learning_rate": 4.955634779904312e-06, |
| "loss": 1.0967, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.4103103629668595, |
| "grad_norm": 2.198559522628784, |
| "learning_rate": 4.9555043948722625e-06, |
| "loss": 1.0815, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.41083640189374016, |
| "grad_norm": 2.2189719676971436, |
| "learning_rate": 4.9553738202471264e-06, |
| "loss": 1.0559, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.41136244082062073, |
| "grad_norm": 2.2313179969787598, |
| "learning_rate": 4.955243056038986e-06, |
| "loss": 1.046, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.4118884797475013, |
| "grad_norm": 1.9563003778457642, |
| "learning_rate": 4.955112102257939e-06, |
| "loss": 1.0735, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.4124145186743819, |
| "grad_norm": 1.99479341506958, |
| "learning_rate": 4.954980958914093e-06, |
| "loss": 1.0657, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.41294055760126247, |
| "grad_norm": 2.029634714126587, |
| "learning_rate": 4.954849626017577e-06, |
| "loss": 1.0811, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.4134665965281431, |
| "grad_norm": 2.2947723865509033, |
| "learning_rate": 4.9547181035785314e-06, |
| "loss": 1.0807, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.4139926354550237, |
| "grad_norm": 2.0323445796966553, |
| "learning_rate": 4.9545863916071094e-06, |
| "loss": 1.0715, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.41451867438190426, |
| "grad_norm": 2.0068464279174805, |
| "learning_rate": 4.954454490113482e-06, |
| "loss": 1.0447, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.41504471330878484, |
| "grad_norm": 2.132549285888672, |
| "learning_rate": 4.954322399107833e-06, |
| "loss": 1.0454, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.4155707522356654, |
| "grad_norm": 2.0086755752563477, |
| "learning_rate": 4.954190118600361e-06, |
| "loss": 1.0724, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.41609679116254605, |
| "grad_norm": 2.1461241245269775, |
| "learning_rate": 4.95405764860128e-06, |
| "loss": 1.0391, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.41662283008942663, |
| "grad_norm": 2.1352107524871826, |
| "learning_rate": 4.953924989120818e-06, |
| "loss": 0.9898, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.4171488690163072, |
| "grad_norm": 2.0694406032562256, |
| "learning_rate": 4.953792140169219e-06, |
| "loss": 1.0819, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.4176749079431878, |
| "grad_norm": 2.088433027267456, |
| "learning_rate": 4.953659101756739e-06, |
| "loss": 1.0833, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.41820094687006837, |
| "grad_norm": 2.1760306358337402, |
| "learning_rate": 4.95352587389365e-06, |
| "loss": 1.0535, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.41872698579694895, |
| "grad_norm": 2.2031099796295166, |
| "learning_rate": 4.95339245659024e-06, |
| "loss": 1.0389, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.4192530247238296, |
| "grad_norm": 2.247276782989502, |
| "learning_rate": 4.953258849856809e-06, |
| "loss": 1.0839, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.41977906365071016, |
| "grad_norm": 2.24357271194458, |
| "learning_rate": 4.953125053703674e-06, |
| "loss": 1.0666, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.42030510257759074, |
| "grad_norm": 2.240151882171631, |
| "learning_rate": 4.952991068141165e-06, |
| "loss": 1.1009, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.4208311415044713, |
| "grad_norm": 2.2172327041625977, |
| "learning_rate": 4.952856893179628e-06, |
| "loss": 1.0928, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.4213571804313519, |
| "grad_norm": 2.377336025238037, |
| "learning_rate": 4.952722528829422e-06, |
| "loss": 1.0968, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.42188321935823253, |
| "grad_norm": 2.466841459274292, |
| "learning_rate": 4.9525879751009205e-06, |
| "loss": 1.0631, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.4224092582851131, |
| "grad_norm": 2.035644054412842, |
| "learning_rate": 4.952453232004516e-06, |
| "loss": 1.0609, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.4229352972119937, |
| "grad_norm": 2.2472054958343506, |
| "learning_rate": 4.952318299550608e-06, |
| "loss": 1.0613, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.42346133613887427, |
| "grad_norm": 2.175999879837036, |
| "learning_rate": 4.952183177749618e-06, |
| "loss": 1.0954, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.42398737506575485, |
| "grad_norm": 2.206052303314209, |
| "learning_rate": 4.952047866611978e-06, |
| "loss": 1.0965, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.4245134139926355, |
| "grad_norm": 1.9550546407699585, |
| "learning_rate": 4.951912366148135e-06, |
| "loss": 1.0835, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.42503945291951606, |
| "grad_norm": 2.194734811782837, |
| "learning_rate": 4.951776676368552e-06, |
| "loss": 1.1179, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.42556549184639664, |
| "grad_norm": 2.094862222671509, |
| "learning_rate": 4.951640797283704e-06, |
| "loss": 1.0634, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.4260915307732772, |
| "grad_norm": 1.980043888092041, |
| "learning_rate": 4.951504728904085e-06, |
| "loss": 1.0874, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.4266175697001578, |
| "grad_norm": 2.2654919624328613, |
| "learning_rate": 4.9513684712402e-06, |
| "loss": 1.057, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.4271436086270384, |
| "grad_norm": 2.197120189666748, |
| "learning_rate": 4.951232024302569e-06, |
| "loss": 1.1114, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.427669647553919, |
| "grad_norm": 2.143324375152588, |
| "learning_rate": 4.9510953881017275e-06, |
| "loss": 1.07, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.4281956864807996, |
| "grad_norm": 2.1920077800750732, |
| "learning_rate": 4.950958562648226e-06, |
| "loss": 1.0373, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.42872172540768017, |
| "grad_norm": 2.0401923656463623, |
| "learning_rate": 4.950821547952629e-06, |
| "loss": 1.1111, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.42924776433456074, |
| "grad_norm": 1.9541674852371216, |
| "learning_rate": 4.950684344025515e-06, |
| "loss": 1.0153, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.4297738032614413, |
| "grad_norm": 3.5096704959869385, |
| "learning_rate": 4.9505469508774776e-06, |
| "loss": 1.0435, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.43029984218832196, |
| "grad_norm": 2.0304462909698486, |
| "learning_rate": 4.9504093685191255e-06, |
| "loss": 1.0786, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.43082588111520254, |
| "grad_norm": 2.115224599838257, |
| "learning_rate": 4.950271596961082e-06, |
| "loss": 1.0854, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.4313519200420831, |
| "grad_norm": 2.176621913909912, |
| "learning_rate": 4.950133636213984e-06, |
| "loss": 0.9909, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.4318779589689637, |
| "grad_norm": 2.2046449184417725, |
| "learning_rate": 4.949995486288484e-06, |
| "loss": 1.0688, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.43240399789584427, |
| "grad_norm": 2.1462888717651367, |
| "learning_rate": 4.949857147195249e-06, |
| "loss": 1.0644, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.4329300368227249, |
| "grad_norm": 2.0735347270965576, |
| "learning_rate": 4.94971861894496e-06, |
| "loss": 1.022, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.4334560757496055, |
| "grad_norm": 2.086724042892456, |
| "learning_rate": 4.949579901548312e-06, |
| "loss": 1.02, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.43398211467648606, |
| "grad_norm": 2.078622341156006, |
| "learning_rate": 4.949440995016018e-06, |
| "loss": 1.0653, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.43450815360336664, |
| "grad_norm": 2.1504440307617188, |
| "learning_rate": 4.949301899358801e-06, |
| "loss": 1.0708, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.4350341925302472, |
| "grad_norm": 2.2340216636657715, |
| "learning_rate": 4.949162614587401e-06, |
| "loss": 1.0688, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.4355602314571278, |
| "grad_norm": 2.2017569541931152, |
| "learning_rate": 4.949023140712574e-06, |
| "loss": 1.0935, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.43608627038400843, |
| "grad_norm": 2.117745876312256, |
| "learning_rate": 4.948883477745088e-06, |
| "loss": 1.0868, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.436612309310889, |
| "grad_norm": 2.0983524322509766, |
| "learning_rate": 4.948743625695726e-06, |
| "loss": 1.0695, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.4371383482377696, |
| "grad_norm": 2.205693244934082, |
| "learning_rate": 4.948603584575287e-06, |
| "loss": 1.0541, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.43766438716465017, |
| "grad_norm": 1.9967527389526367, |
| "learning_rate": 4.948463354394583e-06, |
| "loss": 0.9933, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.43819042609153075, |
| "grad_norm": 2.113577127456665, |
| "learning_rate": 4.948322935164442e-06, |
| "loss": 1.0199, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.4387164650184114, |
| "grad_norm": 2.0825533866882324, |
| "learning_rate": 4.948182326895705e-06, |
| "loss": 1.0446, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.43924250394529196, |
| "grad_norm": 2.0186421871185303, |
| "learning_rate": 4.94804152959923e-06, |
| "loss": 1.0798, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.43976854287217254, |
| "grad_norm": 2.3025147914886475, |
| "learning_rate": 4.947900543285888e-06, |
| "loss": 0.9977, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.4402945817990531, |
| "grad_norm": 2.1662867069244385, |
| "learning_rate": 4.947759367966564e-06, |
| "loss": 1.048, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.4408206207259337, |
| "grad_norm": 2.0708656311035156, |
| "learning_rate": 4.947618003652158e-06, |
| "loss": 1.0715, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.44134665965281433, |
| "grad_norm": 2.2494263648986816, |
| "learning_rate": 4.947476450353586e-06, |
| "loss": 1.0901, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.4418726985796949, |
| "grad_norm": 2.3319430351257324, |
| "learning_rate": 4.947334708081777e-06, |
| "loss": 1.0308, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.4423987375065755, |
| "grad_norm": 2.134620428085327, |
| "learning_rate": 4.947192776847676e-06, |
| "loss": 1.0459, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.44292477643345607, |
| "grad_norm": 2.075429916381836, |
| "learning_rate": 4.94705065666224e-06, |
| "loss": 1.0733, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.44345081536033665, |
| "grad_norm": 2.173069953918457, |
| "learning_rate": 4.946908347536444e-06, |
| "loss": 1.1092, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.4439768542872172, |
| "grad_norm": 2.1481893062591553, |
| "learning_rate": 4.946765849481274e-06, |
| "loss": 1.0822, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.44450289321409786, |
| "grad_norm": 2.247277021408081, |
| "learning_rate": 4.9466231625077354e-06, |
| "loss": 1.0777, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.44502893214097844, |
| "grad_norm": 2.1181042194366455, |
| "learning_rate": 4.946480286626842e-06, |
| "loss": 1.1139, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.445554971067859, |
| "grad_norm": 2.05195951461792, |
| "learning_rate": 4.946337221849628e-06, |
| "loss": 1.0738, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.4460810099947396, |
| "grad_norm": 2.122732639312744, |
| "learning_rate": 4.946193968187139e-06, |
| "loss": 1.061, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.4466070489216202, |
| "grad_norm": 1.8827515840530396, |
| "learning_rate": 4.946050525650434e-06, |
| "loss": 1.061, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.4471330878485008, |
| "grad_norm": 2.3874471187591553, |
| "learning_rate": 4.945906894250591e-06, |
| "loss": 1.0667, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.4476591267753814, |
| "grad_norm": 2.274724006652832, |
| "learning_rate": 4.945763073998699e-06, |
| "loss": 1.0559, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.44818516570226197, |
| "grad_norm": 2.2730906009674072, |
| "learning_rate": 4.945619064905861e-06, |
| "loss": 1.0952, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.44871120462914255, |
| "grad_norm": 2.190969944000244, |
| "learning_rate": 4.945474866983199e-06, |
| "loss": 1.0816, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.4492372435560231, |
| "grad_norm": 3.6214282512664795, |
| "learning_rate": 4.945330480241844e-06, |
| "loss": 1.09, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.44976328248290376, |
| "grad_norm": 2.0487356185913086, |
| "learning_rate": 4.945185904692946e-06, |
| "loss": 1.0279, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.45028932140978434, |
| "grad_norm": 2.074282646179199, |
| "learning_rate": 4.945041140347669e-06, |
| "loss": 1.0514, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.4508153603366649, |
| "grad_norm": 2.126495838165283, |
| "learning_rate": 4.944896187217187e-06, |
| "loss": 1.0819, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.4513413992635455, |
| "grad_norm": 2.0265605449676514, |
| "learning_rate": 4.944751045312695e-06, |
| "loss": 1.0282, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.4518674381904261, |
| "grad_norm": 2.0557355880737305, |
| "learning_rate": 4.944605714645399e-06, |
| "loss": 1.1052, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.4523934771173067, |
| "grad_norm": 2.026393175125122, |
| "learning_rate": 4.944460195226519e-06, |
| "loss": 0.982, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.4529195160441873, |
| "grad_norm": 2.1781463623046875, |
| "learning_rate": 4.9443144870672925e-06, |
| "loss": 1.1251, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.45344555497106787, |
| "grad_norm": 2.053683042526245, |
| "learning_rate": 4.944168590178968e-06, |
| "loss": 1.0766, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.45397159389794844, |
| "grad_norm": 2.1147496700286865, |
| "learning_rate": 4.944022504572811e-06, |
| "loss": 1.0174, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.454497632824829, |
| "grad_norm": 2.06046199798584, |
| "learning_rate": 4.943876230260102e-06, |
| "loss": 1.0836, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.4550236717517096, |
| "grad_norm": 2.171419382095337, |
| "learning_rate": 4.9437297672521345e-06, |
| "loss": 1.0695, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.45554971067859024, |
| "grad_norm": 2.064301013946533, |
| "learning_rate": 4.943583115560217e-06, |
| "loss": 1.0147, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.4560757496054708, |
| "grad_norm": 2.6638195514678955, |
| "learning_rate": 4.943436275195673e-06, |
| "loss": 1.0565, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.4566017885323514, |
| "grad_norm": 3.9418976306915283, |
| "learning_rate": 4.943289246169839e-06, |
| "loss": 1.0768, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.457127827459232, |
| "grad_norm": 2.114297389984131, |
| "learning_rate": 4.943142028494069e-06, |
| "loss": 1.0687, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.45765386638611255, |
| "grad_norm": 2.139803171157837, |
| "learning_rate": 4.942994622179729e-06, |
| "loss": 1.0464, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.4581799053129932, |
| "grad_norm": 2.011474370956421, |
| "learning_rate": 4.942847027238201e-06, |
| "loss": 1.0181, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.45870594423987376, |
| "grad_norm": 2.1592113971710205, |
| "learning_rate": 4.94269924368088e-06, |
| "loss": 1.0699, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.45923198316675434, |
| "grad_norm": 2.0230283737182617, |
| "learning_rate": 4.942551271519178e-06, |
| "loss": 1.075, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.4597580220936349, |
| "grad_norm": 2.286768913269043, |
| "learning_rate": 4.942403110764518e-06, |
| "loss": 1.0604, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.4602840610205155, |
| "grad_norm": 2.305375337600708, |
| "learning_rate": 4.942254761428343e-06, |
| "loss": 1.0067, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.46081009994739613, |
| "grad_norm": 2.416245698928833, |
| "learning_rate": 4.942106223522104e-06, |
| "loss": 1.1109, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.4613361388742767, |
| "grad_norm": 2.1339962482452393, |
| "learning_rate": 4.941957497057272e-06, |
| "loss": 1.0708, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.4618621778011573, |
| "grad_norm": 1.9983795881271362, |
| "learning_rate": 4.941808582045329e-06, |
| "loss": 1.0032, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.46238821672803787, |
| "grad_norm": 2.1115024089813232, |
| "learning_rate": 4.9416594784977735e-06, |
| "loss": 1.0272, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.46291425565491845, |
| "grad_norm": 2.2785818576812744, |
| "learning_rate": 4.941510186426118e-06, |
| "loss": 1.0538, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.46344029458179903, |
| "grad_norm": 2.009938955307007, |
| "learning_rate": 4.94136070584189e-06, |
| "loss": 1.0432, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.46396633350867966, |
| "grad_norm": 2.119264841079712, |
| "learning_rate": 4.94121103675663e-06, |
| "loss": 1.063, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.46449237243556024, |
| "grad_norm": 2.267575979232788, |
| "learning_rate": 4.941061179181896e-06, |
| "loss": 1.0698, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.4650184113624408, |
| "grad_norm": 2.2345592975616455, |
| "learning_rate": 4.940911133129257e-06, |
| "loss": 1.0898, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.4655444502893214, |
| "grad_norm": 2.175180673599243, |
| "learning_rate": 4.940760898610299e-06, |
| "loss": 1.0915, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.466070489216202, |
| "grad_norm": 2.036628246307373, |
| "learning_rate": 4.940610475636621e-06, |
| "loss": 1.0981, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.4665965281430826, |
| "grad_norm": 2.193129539489746, |
| "learning_rate": 4.9404598642198386e-06, |
| "loss": 1.1237, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.4671225670699632, |
| "grad_norm": 1.920074462890625, |
| "learning_rate": 4.9403090643715804e-06, |
| "loss": 1.0358, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.46764860599684377, |
| "grad_norm": 2.0745346546173096, |
| "learning_rate": 4.940158076103489e-06, |
| "loss": 1.0487, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.46817464492372435, |
| "grad_norm": 1.9645469188690186, |
| "learning_rate": 4.940006899427225e-06, |
| "loss": 1.0256, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.4687006838506049, |
| "grad_norm": 1.9696778059005737, |
| "learning_rate": 4.939855534354458e-06, |
| "loss": 1.0302, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.46922672277748556, |
| "grad_norm": 2.1893057823181152, |
| "learning_rate": 4.939703980896875e-06, |
| "loss": 1.0391, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.46975276170436614, |
| "grad_norm": 2.0537021160125732, |
| "learning_rate": 4.93955223906618e-06, |
| "loss": 1.0498, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.4702788006312467, |
| "grad_norm": 2.4528138637542725, |
| "learning_rate": 4.9394003088740875e-06, |
| "loss": 1.0393, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.4708048395581273, |
| "grad_norm": 2.2085723876953125, |
| "learning_rate": 4.93924819033233e-06, |
| "loss": 1.0789, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.4713308784850079, |
| "grad_norm": 2.0029642581939697, |
| "learning_rate": 4.9390958834526504e-06, |
| "loss": 1.0621, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.47185691741188845, |
| "grad_norm": 2.0400004386901855, |
| "learning_rate": 4.93894338824681e-06, |
| "loss": 1.0426, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.4723829563387691, |
| "grad_norm": 2.3174595832824707, |
| "learning_rate": 4.9387907047265825e-06, |
| "loss": 1.0273, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.47290899526564967, |
| "grad_norm": 1.998889446258545, |
| "learning_rate": 4.938637832903758e-06, |
| "loss": 1.0401, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.47343503419253025, |
| "grad_norm": 2.0847246646881104, |
| "learning_rate": 4.93848477279014e-06, |
| "loss": 1.0677, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.4739610731194108, |
| "grad_norm": 2.086249351501465, |
| "learning_rate": 4.938331524397544e-06, |
| "loss": 1.043, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.4744871120462914, |
| "grad_norm": 2.1909382343292236, |
| "learning_rate": 4.938178087737805e-06, |
| "loss": 0.9977, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.47501315097317204, |
| "grad_norm": 2.066394567489624, |
| "learning_rate": 4.938024462822769e-06, |
| "loss": 1.044, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.4755391899000526, |
| "grad_norm": 2.1768858432769775, |
| "learning_rate": 4.937870649664299e-06, |
| "loss": 0.9886, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.4760652288269332, |
| "grad_norm": 2.0450236797332764, |
| "learning_rate": 4.937716648274269e-06, |
| "loss": 1.0471, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.4765912677538138, |
| "grad_norm": 2.218719720840454, |
| "learning_rate": 4.937562458664571e-06, |
| "loss": 1.0324, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.47711730668069435, |
| "grad_norm": 2.2519423961639404, |
| "learning_rate": 4.937408080847109e-06, |
| "loss": 1.0899, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.477643345607575, |
| "grad_norm": 2.045959234237671, |
| "learning_rate": 4.9372535148338055e-06, |
| "loss": 1.0383, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.47816938453445557, |
| "grad_norm": 2.1137306690216064, |
| "learning_rate": 4.937098760636591e-06, |
| "loss": 1.0223, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.47869542346133614, |
| "grad_norm": 2.2585835456848145, |
| "learning_rate": 4.936943818267418e-06, |
| "loss": 1.027, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.4792214623882167, |
| "grad_norm": 2.161625862121582, |
| "learning_rate": 4.936788687738247e-06, |
| "loss": 1.0318, |
| "step": 911 |
| }, |
| { |
| "epoch": 0.4797475013150973, |
| "grad_norm": 2.0743277072906494, |
| "learning_rate": 4.936633369061057e-06, |
| "loss": 1.1014, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.4802735402419779, |
| "grad_norm": 2.1271307468414307, |
| "learning_rate": 4.936477862247841e-06, |
| "loss": 1.0403, |
| "step": 913 |
| }, |
| { |
| "epoch": 0.4807995791688585, |
| "grad_norm": 2.0820491313934326, |
| "learning_rate": 4.9363221673106046e-06, |
| "loss": 1.069, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.4813256180957391, |
| "grad_norm": 2.0069093704223633, |
| "learning_rate": 4.936166284261369e-06, |
| "loss": 1.0752, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.4818516570226197, |
| "grad_norm": 2.2541720867156982, |
| "learning_rate": 4.936010213112172e-06, |
| "loss": 1.0309, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.48237769594950025, |
| "grad_norm": 2.155980110168457, |
| "learning_rate": 4.9358539538750636e-06, |
| "loss": 1.0078, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.48290373487638083, |
| "grad_norm": 2.217339038848877, |
| "learning_rate": 4.935697506562107e-06, |
| "loss": 1.0522, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.48342977380326146, |
| "grad_norm": 1.963270902633667, |
| "learning_rate": 4.935540871185384e-06, |
| "loss": 1.0692, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.48395581273014204, |
| "grad_norm": 1.9923917055130005, |
| "learning_rate": 4.935384047756987e-06, |
| "loss": 1.0926, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.4844818516570226, |
| "grad_norm": 2.177624464035034, |
| "learning_rate": 4.935227036289026e-06, |
| "loss": 1.0727, |
| "step": 921 |
| }, |
| { |
| "epoch": 0.4850078905839032, |
| "grad_norm": 2.022496461868286, |
| "learning_rate": 4.935069836793622e-06, |
| "loss": 1.0267, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.4855339295107838, |
| "grad_norm": 2.0110666751861572, |
| "learning_rate": 4.9349124492829155e-06, |
| "loss": 1.0911, |
| "step": 923 |
| }, |
| { |
| "epoch": 0.4860599684376644, |
| "grad_norm": 2.1780877113342285, |
| "learning_rate": 4.934754873769057e-06, |
| "loss": 1.0494, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.486586007364545, |
| "grad_norm": 2.0291390419006348, |
| "learning_rate": 4.934597110264212e-06, |
| "loss": 1.0485, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.48711204629142557, |
| "grad_norm": 1.947896957397461, |
| "learning_rate": 4.9344391587805626e-06, |
| "loss": 1.0789, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.48763808521830615, |
| "grad_norm": 1.9520971775054932, |
| "learning_rate": 4.934281019330305e-06, |
| "loss": 1.0644, |
| "step": 927 |
| }, |
| { |
| "epoch": 0.48816412414518673, |
| "grad_norm": 2.0348432064056396, |
| "learning_rate": 4.93412269192565e-06, |
| "loss": 1.0471, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.4886901630720673, |
| "grad_norm": 2.214876651763916, |
| "learning_rate": 4.93396417657882e-06, |
| "loss": 1.0921, |
| "step": 929 |
| }, |
| { |
| "epoch": 0.48921620199894794, |
| "grad_norm": 1.9910991191864014, |
| "learning_rate": 4.933805473302057e-06, |
| "loss": 1.0962, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.4897422409258285, |
| "grad_norm": 2.0497536659240723, |
| "learning_rate": 4.933646582107612e-06, |
| "loss": 1.0502, |
| "step": 931 |
| }, |
| { |
| "epoch": 0.4902682798527091, |
| "grad_norm": 2.102994203567505, |
| "learning_rate": 4.933487503007756e-06, |
| "loss": 1.0676, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.4907943187795897, |
| "grad_norm": 1.885666012763977, |
| "learning_rate": 4.933328236014768e-06, |
| "loss": 1.0005, |
| "step": 933 |
| }, |
| { |
| "epoch": 0.49132035770647026, |
| "grad_norm": 2.1525766849517822, |
| "learning_rate": 4.933168781140949e-06, |
| "loss": 1.0997, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.4918463966333509, |
| "grad_norm": 2.0346620082855225, |
| "learning_rate": 4.9330091383986086e-06, |
| "loss": 1.0651, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.49237243556023147, |
| "grad_norm": 2.0436878204345703, |
| "learning_rate": 4.932849307800074e-06, |
| "loss": 1.0539, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.49289847448711205, |
| "grad_norm": 2.1023032665252686, |
| "learning_rate": 4.932689289357686e-06, |
| "loss": 1.0583, |
| "step": 937 |
| }, |
| { |
| "epoch": 0.4934245134139926, |
| "grad_norm": 2.0781443119049072, |
| "learning_rate": 4.932529083083798e-06, |
| "loss": 1.0753, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.4939505523408732, |
| "grad_norm": 2.0385992527008057, |
| "learning_rate": 4.932368688990783e-06, |
| "loss": 1.0165, |
| "step": 939 |
| }, |
| { |
| "epoch": 0.49447659126775384, |
| "grad_norm": 2.350186586380005, |
| "learning_rate": 4.932208107091022e-06, |
| "loss": 1.0834, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.4950026301946344, |
| "grad_norm": 2.2009286880493164, |
| "learning_rate": 4.932047337396917e-06, |
| "loss": 1.0975, |
| "step": 941 |
| }, |
| { |
| "epoch": 0.495528669121515, |
| "grad_norm": 2.389380931854248, |
| "learning_rate": 4.931886379920878e-06, |
| "loss": 1.0853, |
| "step": 942 |
| }, |
| { |
| "epoch": 0.4960547080483956, |
| "grad_norm": 2.016162157058716, |
| "learning_rate": 4.931725234675334e-06, |
| "loss": 1.039, |
| "step": 943 |
| }, |
| { |
| "epoch": 0.49658074697527615, |
| "grad_norm": 2.116718292236328, |
| "learning_rate": 4.9315639016727286e-06, |
| "loss": 1.0182, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.49710678590215673, |
| "grad_norm": 2.1381125450134277, |
| "learning_rate": 4.931402380925517e-06, |
| "loss": 1.1051, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.49763282482903737, |
| "grad_norm": 2.0954737663269043, |
| "learning_rate": 4.931240672446171e-06, |
| "loss": 1.038, |
| "step": 946 |
| }, |
| { |
| "epoch": 0.49815886375591795, |
| "grad_norm": 2.167865037918091, |
| "learning_rate": 4.931078776247176e-06, |
| "loss": 1.0998, |
| "step": 947 |
| }, |
| { |
| "epoch": 0.4986849026827985, |
| "grad_norm": 2.1278021335601807, |
| "learning_rate": 4.930916692341034e-06, |
| "loss": 1.0374, |
| "step": 948 |
| }, |
| { |
| "epoch": 0.4992109416096791, |
| "grad_norm": 2.088512420654297, |
| "learning_rate": 4.9307544207402565e-06, |
| "loss": 1.0954, |
| "step": 949 |
| }, |
| { |
| "epoch": 0.4997369805365597, |
| "grad_norm": 2.015916109085083, |
| "learning_rate": 4.930591961457375e-06, |
| "loss": 1.0163, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.5002630194634403, |
| "grad_norm": 2.0662143230438232, |
| "learning_rate": 4.930429314504933e-06, |
| "loss": 1.0968, |
| "step": 951 |
| }, |
| { |
| "epoch": 0.5007890583903208, |
| "grad_norm": 2.0692410469055176, |
| "learning_rate": 4.930266479895488e-06, |
| "loss": 1.0772, |
| "step": 952 |
| }, |
| { |
| "epoch": 0.5013150973172015, |
| "grad_norm": 2.0734803676605225, |
| "learning_rate": 4.930103457641613e-06, |
| "loss": 1.1096, |
| "step": 953 |
| }, |
| { |
| "epoch": 0.5018411362440821, |
| "grad_norm": 2.167228937149048, |
| "learning_rate": 4.929940247755896e-06, |
| "loss": 1.0608, |
| "step": 954 |
| }, |
| { |
| "epoch": 0.5023671751709626, |
| "grad_norm": 2.272087574005127, |
| "learning_rate": 4.929776850250937e-06, |
| "loss": 1.0825, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.5028932140978433, |
| "grad_norm": 2.0937726497650146, |
| "learning_rate": 4.929613265139354e-06, |
| "loss": 1.0651, |
| "step": 956 |
| }, |
| { |
| "epoch": 0.5034192530247238, |
| "grad_norm": 2.168090343475342, |
| "learning_rate": 4.929449492433777e-06, |
| "loss": 1.0821, |
| "step": 957 |
| }, |
| { |
| "epoch": 0.5039452919516044, |
| "grad_norm": 2.0708675384521484, |
| "learning_rate": 4.92928553214685e-06, |
| "loss": 1.0655, |
| "step": 958 |
| }, |
| { |
| "epoch": 0.5044713308784851, |
| "grad_norm": 2.067678689956665, |
| "learning_rate": 4.929121384291234e-06, |
| "loss": 1.05, |
| "step": 959 |
| }, |
| { |
| "epoch": 0.5049973698053656, |
| "grad_norm": 1.9181219339370728, |
| "learning_rate": 4.928957048879602e-06, |
| "loss": 0.9935, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.5055234087322462, |
| "grad_norm": 2.217785358428955, |
| "learning_rate": 4.928792525924644e-06, |
| "loss": 0.97, |
| "step": 961 |
| }, |
| { |
| "epoch": 0.5060494476591267, |
| "grad_norm": 2.084656238555908, |
| "learning_rate": 4.928627815439062e-06, |
| "loss": 1.0541, |
| "step": 962 |
| }, |
| { |
| "epoch": 0.5065754865860074, |
| "grad_norm": 2.035367727279663, |
| "learning_rate": 4.928462917435574e-06, |
| "loss": 1.0694, |
| "step": 963 |
| }, |
| { |
| "epoch": 0.507101525512888, |
| "grad_norm": 2.001654624938965, |
| "learning_rate": 4.928297831926912e-06, |
| "loss": 1.0232, |
| "step": 964 |
| }, |
| { |
| "epoch": 0.5076275644397685, |
| "grad_norm": 2.57733154296875, |
| "learning_rate": 4.928132558925822e-06, |
| "loss": 1.0664, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.5081536033666492, |
| "grad_norm": 2.1757423877716064, |
| "learning_rate": 4.927967098445066e-06, |
| "loss": 1.1119, |
| "step": 966 |
| }, |
| { |
| "epoch": 0.5086796422935297, |
| "grad_norm": 2.089594602584839, |
| "learning_rate": 4.927801450497417e-06, |
| "loss": 1.0212, |
| "step": 967 |
| }, |
| { |
| "epoch": 0.5092056812204103, |
| "grad_norm": 2.078519821166992, |
| "learning_rate": 4.927635615095668e-06, |
| "loss": 1.0381, |
| "step": 968 |
| }, |
| { |
| "epoch": 0.5097317201472908, |
| "grad_norm": 2.0807132720947266, |
| "learning_rate": 4.927469592252621e-06, |
| "loss": 1.0272, |
| "step": 969 |
| }, |
| { |
| "epoch": 0.5102577590741715, |
| "grad_norm": 2.1806020736694336, |
| "learning_rate": 4.927303381981098e-06, |
| "loss": 1.0846, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.5107837980010521, |
| "grad_norm": 2.1434948444366455, |
| "learning_rate": 4.927136984293928e-06, |
| "loss": 1.0775, |
| "step": 971 |
| }, |
| { |
| "epoch": 0.5113098369279326, |
| "grad_norm": 2.000924825668335, |
| "learning_rate": 4.926970399203962e-06, |
| "loss": 1.0272, |
| "step": 972 |
| }, |
| { |
| "epoch": 0.5118358758548133, |
| "grad_norm": 2.1742711067199707, |
| "learning_rate": 4.926803626724062e-06, |
| "loss": 1.0253, |
| "step": 973 |
| }, |
| { |
| "epoch": 0.5123619147816938, |
| "grad_norm": 2.1074674129486084, |
| "learning_rate": 4.926636666867103e-06, |
| "loss": 1.0146, |
| "step": 974 |
| }, |
| { |
| "epoch": 0.5128879537085744, |
| "grad_norm": 2.1562392711639404, |
| "learning_rate": 4.926469519645976e-06, |
| "loss": 1.0364, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.5134139926354551, |
| "grad_norm": 2.4177775382995605, |
| "learning_rate": 4.926302185073591e-06, |
| "loss": 1.0658, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.5139400315623356, |
| "grad_norm": 2.322571277618408, |
| "learning_rate": 4.9261346631628635e-06, |
| "loss": 1.0489, |
| "step": 977 |
| }, |
| { |
| "epoch": 0.5144660704892162, |
| "grad_norm": 2.0937836170196533, |
| "learning_rate": 4.925966953926729e-06, |
| "loss": 1.0535, |
| "step": 978 |
| }, |
| { |
| "epoch": 0.5149921094160967, |
| "grad_norm": 2.065213680267334, |
| "learning_rate": 4.925799057378139e-06, |
| "loss": 1.0097, |
| "step": 979 |
| }, |
| { |
| "epoch": 0.5155181483429774, |
| "grad_norm": 2.0844249725341797, |
| "learning_rate": 4.925630973530054e-06, |
| "loss": 1.0719, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.516044187269858, |
| "grad_norm": 2.4148666858673096, |
| "learning_rate": 4.925462702395454e-06, |
| "loss": 1.0906, |
| "step": 981 |
| }, |
| { |
| "epoch": 0.5165702261967385, |
| "grad_norm": 2.071423053741455, |
| "learning_rate": 4.925294243987331e-06, |
| "loss": 1.03, |
| "step": 982 |
| }, |
| { |
| "epoch": 0.5170962651236192, |
| "grad_norm": 2.2397513389587402, |
| "learning_rate": 4.9251255983186915e-06, |
| "loss": 1.0412, |
| "step": 983 |
| }, |
| { |
| "epoch": 0.5176223040504997, |
| "grad_norm": 2.171269655227661, |
| "learning_rate": 4.924956765402557e-06, |
| "loss": 1.1, |
| "step": 984 |
| }, |
| { |
| "epoch": 0.5181483429773803, |
| "grad_norm": 2.1705877780914307, |
| "learning_rate": 4.924787745251963e-06, |
| "loss": 1.0534, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.518674381904261, |
| "grad_norm": 2.178514003753662, |
| "learning_rate": 4.924618537879961e-06, |
| "loss": 1.0759, |
| "step": 986 |
| }, |
| { |
| "epoch": 0.5192004208311415, |
| "grad_norm": 2.072097063064575, |
| "learning_rate": 4.924449143299614e-06, |
| "loss": 1.0321, |
| "step": 987 |
| }, |
| { |
| "epoch": 0.5197264597580221, |
| "grad_norm": 2.124030351638794, |
| "learning_rate": 4.924279561524004e-06, |
| "loss": 1.0465, |
| "step": 988 |
| }, |
| { |
| "epoch": 0.5202524986849026, |
| "grad_norm": 2.0927019119262695, |
| "learning_rate": 4.924109792566222e-06, |
| "loss": 1.0716, |
| "step": 989 |
| }, |
| { |
| "epoch": 0.5207785376117833, |
| "grad_norm": 2.0673232078552246, |
| "learning_rate": 4.923939836439377e-06, |
| "loss": 1.0628, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.5213045765386639, |
| "grad_norm": 2.2538528442382812, |
| "learning_rate": 4.92376969315659e-06, |
| "loss": 1.0687, |
| "step": 991 |
| }, |
| { |
| "epoch": 0.5218306154655444, |
| "grad_norm": 2.120530366897583, |
| "learning_rate": 4.923599362731001e-06, |
| "loss": 1.0893, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.5223566543924251, |
| "grad_norm": 2.0750699043273926, |
| "learning_rate": 4.92342884517576e-06, |
| "loss": 1.0016, |
| "step": 993 |
| }, |
| { |
| "epoch": 0.5228826933193056, |
| "grad_norm": 1.9984569549560547, |
| "learning_rate": 4.923258140504032e-06, |
| "loss": 1.0326, |
| "step": 994 |
| }, |
| { |
| "epoch": 0.5234087322461862, |
| "grad_norm": 2.201758861541748, |
| "learning_rate": 4.923087248729e-06, |
| "loss": 1.0413, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.5239347711730669, |
| "grad_norm": 2.1322648525238037, |
| "learning_rate": 4.922916169863855e-06, |
| "loss": 1.0505, |
| "step": 996 |
| }, |
| { |
| "epoch": 0.5244608100999474, |
| "grad_norm": 2.0557119846343994, |
| "learning_rate": 4.922744903921809e-06, |
| "loss": 0.9761, |
| "step": 997 |
| }, |
| { |
| "epoch": 0.524986849026828, |
| "grad_norm": 2.0989720821380615, |
| "learning_rate": 4.922573450916086e-06, |
| "loss": 1.0436, |
| "step": 998 |
| }, |
| { |
| "epoch": 0.5255128879537085, |
| "grad_norm": 2.152665138244629, |
| "learning_rate": 4.922401810859922e-06, |
| "loss": 1.0567, |
| "step": 999 |
| }, |
| { |
| "epoch": 0.5260389268805892, |
| "grad_norm": 1.959796667098999, |
| "learning_rate": 4.922229983766571e-06, |
| "loss": 1.0694, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.5265649658074697, |
| "grad_norm": 2.121493101119995, |
| "learning_rate": 4.9220579696493e-06, |
| "loss": 1.1024, |
| "step": 1001 |
| }, |
| { |
| "epoch": 0.5270910047343503, |
| "grad_norm": 1.9629384279251099, |
| "learning_rate": 4.92188576852139e-06, |
| "loss": 1.0538, |
| "step": 1002 |
| }, |
| { |
| "epoch": 0.527617043661231, |
| "grad_norm": 2.396224021911621, |
| "learning_rate": 4.921713380396137e-06, |
| "loss": 1.0711, |
| "step": 1003 |
| }, |
| { |
| "epoch": 0.5281430825881115, |
| "grad_norm": 2.1571781635284424, |
| "learning_rate": 4.921540805286852e-06, |
| "loss": 1.0663, |
| "step": 1004 |
| }, |
| { |
| "epoch": 0.5286691215149921, |
| "grad_norm": 2.032282590866089, |
| "learning_rate": 4.921368043206858e-06, |
| "loss": 1.0658, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.5291951604418726, |
| "grad_norm": 1.9589232206344604, |
| "learning_rate": 4.921195094169496e-06, |
| "loss": 0.9755, |
| "step": 1006 |
| }, |
| { |
| "epoch": 0.5297211993687533, |
| "grad_norm": 1.9304051399230957, |
| "learning_rate": 4.92102195818812e-06, |
| "loss": 1.011, |
| "step": 1007 |
| }, |
| { |
| "epoch": 0.5302472382956339, |
| "grad_norm": 2.306674003601074, |
| "learning_rate": 4.920848635276096e-06, |
| "loss": 1.0626, |
| "step": 1008 |
| }, |
| { |
| "epoch": 0.5307732772225144, |
| "grad_norm": 2.156906843185425, |
| "learning_rate": 4.920675125446809e-06, |
| "loss": 1.0107, |
| "step": 1009 |
| }, |
| { |
| "epoch": 0.5312993161493951, |
| "grad_norm": 2.2959272861480713, |
| "learning_rate": 4.9205014287136535e-06, |
| "loss": 1.0527, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.5318253550762756, |
| "grad_norm": 2.046900510787964, |
| "learning_rate": 4.9203275450900426e-06, |
| "loss": 1.0154, |
| "step": 1011 |
| }, |
| { |
| "epoch": 0.5323513940031562, |
| "grad_norm": 1.9947476387023926, |
| "learning_rate": 4.920153474589401e-06, |
| "loss": 1.0456, |
| "step": 1012 |
| }, |
| { |
| "epoch": 0.5328774329300369, |
| "grad_norm": 2.3516438007354736, |
| "learning_rate": 4.919979217225169e-06, |
| "loss": 1.0982, |
| "step": 1013 |
| }, |
| { |
| "epoch": 0.5334034718569174, |
| "grad_norm": 2.5909998416900635, |
| "learning_rate": 4.919804773010802e-06, |
| "loss": 1.0436, |
| "step": 1014 |
| }, |
| { |
| "epoch": 0.533929510783798, |
| "grad_norm": 2.5206117630004883, |
| "learning_rate": 4.91963014195977e-06, |
| "loss": 1.0405, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.5344555497106785, |
| "grad_norm": 2.21992826461792, |
| "learning_rate": 4.919455324085554e-06, |
| "loss": 1.0594, |
| "step": 1016 |
| }, |
| { |
| "epoch": 0.5349815886375592, |
| "grad_norm": 2.2773711681365967, |
| "learning_rate": 4.919280319401654e-06, |
| "loss": 1.0501, |
| "step": 1017 |
| }, |
| { |
| "epoch": 0.5355076275644398, |
| "grad_norm": 2.075962543487549, |
| "learning_rate": 4.919105127921582e-06, |
| "loss": 1.0052, |
| "step": 1018 |
| }, |
| { |
| "epoch": 0.5360336664913203, |
| "grad_norm": 2.108670473098755, |
| "learning_rate": 4.9189297496588636e-06, |
| "loss": 1.0675, |
| "step": 1019 |
| }, |
| { |
| "epoch": 0.536559705418201, |
| "grad_norm": 2.125927209854126, |
| "learning_rate": 4.918754184627041e-06, |
| "loss": 1.0912, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.5370857443450815, |
| "grad_norm": 2.1099467277526855, |
| "learning_rate": 4.91857843283967e-06, |
| "loss": 1.0424, |
| "step": 1021 |
| }, |
| { |
| "epoch": 0.5376117832719621, |
| "grad_norm": 2.0880467891693115, |
| "learning_rate": 4.918402494310319e-06, |
| "loss": 1.061, |
| "step": 1022 |
| }, |
| { |
| "epoch": 0.5381378221988428, |
| "grad_norm": 2.1544101238250732, |
| "learning_rate": 4.918226369052575e-06, |
| "loss": 1.0608, |
| "step": 1023 |
| }, |
| { |
| "epoch": 0.5386638611257233, |
| "grad_norm": 2.213214635848999, |
| "learning_rate": 4.918050057080036e-06, |
| "loss": 1.1368, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.5391899000526039, |
| "grad_norm": 2.062903642654419, |
| "learning_rate": 4.917873558406315e-06, |
| "loss": 1.0861, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.5397159389794844, |
| "grad_norm": 1.9643436670303345, |
| "learning_rate": 4.917696873045039e-06, |
| "loss": 1.0008, |
| "step": 1026 |
| }, |
| { |
| "epoch": 0.5402419779063651, |
| "grad_norm": 2.276639699935913, |
| "learning_rate": 4.917520001009851e-06, |
| "loss": 0.9812, |
| "step": 1027 |
| }, |
| { |
| "epoch": 0.5407680168332457, |
| "grad_norm": 2.1487631797790527, |
| "learning_rate": 4.917342942314407e-06, |
| "loss": 1.0603, |
| "step": 1028 |
| }, |
| { |
| "epoch": 0.5412940557601262, |
| "grad_norm": 2.1040542125701904, |
| "learning_rate": 4.917165696972379e-06, |
| "loss": 1.0425, |
| "step": 1029 |
| }, |
| { |
| "epoch": 0.5418200946870069, |
| "grad_norm": 2.214475154876709, |
| "learning_rate": 4.916988264997452e-06, |
| "loss": 1.032, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.5423461336138874, |
| "grad_norm": 2.154320001602173, |
| "learning_rate": 4.916810646403325e-06, |
| "loss": 1.0371, |
| "step": 1031 |
| }, |
| { |
| "epoch": 0.542872172540768, |
| "grad_norm": 2.1565327644348145, |
| "learning_rate": 4.916632841203714e-06, |
| "loss": 1.0866, |
| "step": 1032 |
| }, |
| { |
| "epoch": 0.5433982114676486, |
| "grad_norm": 2.197402238845825, |
| "learning_rate": 4.916454849412344e-06, |
| "loss": 1.0531, |
| "step": 1033 |
| }, |
| { |
| "epoch": 0.5439242503945292, |
| "grad_norm": 2.0249993801116943, |
| "learning_rate": 4.916276671042962e-06, |
| "loss": 1.0485, |
| "step": 1034 |
| }, |
| { |
| "epoch": 0.5444502893214098, |
| "grad_norm": 2.077765703201294, |
| "learning_rate": 4.916098306109323e-06, |
| "loss": 1.0731, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.5449763282482903, |
| "grad_norm": 2.0669186115264893, |
| "learning_rate": 4.915919754625199e-06, |
| "loss": 1.0912, |
| "step": 1036 |
| }, |
| { |
| "epoch": 0.545502367175171, |
| "grad_norm": 2.160076379776001, |
| "learning_rate": 4.915741016604378e-06, |
| "loss": 1.0523, |
| "step": 1037 |
| }, |
| { |
| "epoch": 0.5460284061020515, |
| "grad_norm": 1.8992373943328857, |
| "learning_rate": 4.915562092060659e-06, |
| "loss": 1.0185, |
| "step": 1038 |
| }, |
| { |
| "epoch": 0.5465544450289321, |
| "grad_norm": 2.0712900161743164, |
| "learning_rate": 4.915382981007857e-06, |
| "loss": 1.0581, |
| "step": 1039 |
| }, |
| { |
| "epoch": 0.5470804839558128, |
| "grad_norm": 2.2600317001342773, |
| "learning_rate": 4.915203683459802e-06, |
| "loss": 1.0154, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.5476065228826933, |
| "grad_norm": 2.050366163253784, |
| "learning_rate": 4.915024199430338e-06, |
| "loss": 1.0371, |
| "step": 1041 |
| }, |
| { |
| "epoch": 0.5481325618095739, |
| "grad_norm": 2.208393096923828, |
| "learning_rate": 4.914844528933322e-06, |
| "loss": 1.0767, |
| "step": 1042 |
| }, |
| { |
| "epoch": 0.5486586007364544, |
| "grad_norm": 2.1388466358184814, |
| "learning_rate": 4.914664671982629e-06, |
| "loss": 1.1074, |
| "step": 1043 |
| }, |
| { |
| "epoch": 0.5491846396633351, |
| "grad_norm": 2.253007411956787, |
| "learning_rate": 4.914484628592144e-06, |
| "loss": 1.0455, |
| "step": 1044 |
| }, |
| { |
| "epoch": 0.5497106785902157, |
| "grad_norm": 2.2380669116973877, |
| "learning_rate": 4.9143043987757684e-06, |
| "loss": 1.0581, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.5502367175170962, |
| "grad_norm": 2.136256456375122, |
| "learning_rate": 4.914123982547419e-06, |
| "loss": 1.0588, |
| "step": 1046 |
| }, |
| { |
| "epoch": 0.5507627564439769, |
| "grad_norm": 2.0044257640838623, |
| "learning_rate": 4.913943379921025e-06, |
| "loss": 0.9918, |
| "step": 1047 |
| }, |
| { |
| "epoch": 0.5512887953708574, |
| "grad_norm": 2.089315414428711, |
| "learning_rate": 4.913762590910533e-06, |
| "loss": 1.0675, |
| "step": 1048 |
| }, |
| { |
| "epoch": 0.551814834297738, |
| "grad_norm": 2.048976182937622, |
| "learning_rate": 4.9135816155298985e-06, |
| "loss": 1.0259, |
| "step": 1049 |
| }, |
| { |
| "epoch": 0.5523408732246187, |
| "grad_norm": 2.273501396179199, |
| "learning_rate": 4.913400453793098e-06, |
| "loss": 1.0743, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.5528669121514992, |
| "grad_norm": 2.0761802196502686, |
| "learning_rate": 4.913219105714117e-06, |
| "loss": 1.0199, |
| "step": 1051 |
| }, |
| { |
| "epoch": 0.5533929510783798, |
| "grad_norm": 1.9552183151245117, |
| "learning_rate": 4.913037571306961e-06, |
| "loss": 1.0582, |
| "step": 1052 |
| }, |
| { |
| "epoch": 0.5539189900052603, |
| "grad_norm": 2.0593061447143555, |
| "learning_rate": 4.9128558505856425e-06, |
| "loss": 0.9626, |
| "step": 1053 |
| }, |
| { |
| "epoch": 0.554445028932141, |
| "grad_norm": 2.026820659637451, |
| "learning_rate": 4.9126739435641955e-06, |
| "loss": 1.0253, |
| "step": 1054 |
| }, |
| { |
| "epoch": 0.5549710678590216, |
| "grad_norm": 2.22835111618042, |
| "learning_rate": 4.9124918502566635e-06, |
| "loss": 1.0176, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.5554971067859021, |
| "grad_norm": 1.9653559923171997, |
| "learning_rate": 4.9123095706771064e-06, |
| "loss": 0.9886, |
| "step": 1056 |
| }, |
| { |
| "epoch": 0.5560231457127828, |
| "grad_norm": 2.083310842514038, |
| "learning_rate": 4.912127104839599e-06, |
| "loss": 1.0105, |
| "step": 1057 |
| }, |
| { |
| "epoch": 0.5565491846396633, |
| "grad_norm": 2.1681482791900635, |
| "learning_rate": 4.91194445275823e-06, |
| "loss": 1.0359, |
| "step": 1058 |
| }, |
| { |
| "epoch": 0.5570752235665439, |
| "grad_norm": 1.990717887878418, |
| "learning_rate": 4.911761614447101e-06, |
| "loss": 1.0369, |
| "step": 1059 |
| }, |
| { |
| "epoch": 0.5576012624934246, |
| "grad_norm": 2.159813642501831, |
| "learning_rate": 4.91157858992033e-06, |
| "loss": 1.0341, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.5581273014203051, |
| "grad_norm": 1.9474655389785767, |
| "learning_rate": 4.911395379192048e-06, |
| "loss": 1.0432, |
| "step": 1061 |
| }, |
| { |
| "epoch": 0.5586533403471857, |
| "grad_norm": 2.140634536743164, |
| "learning_rate": 4.911211982276402e-06, |
| "loss": 1.0485, |
| "step": 1062 |
| }, |
| { |
| "epoch": 0.5591793792740662, |
| "grad_norm": 2.2925636768341064, |
| "learning_rate": 4.911028399187552e-06, |
| "loss": 1.0648, |
| "step": 1063 |
| }, |
| { |
| "epoch": 0.5597054182009469, |
| "grad_norm": 2.037755250930786, |
| "learning_rate": 4.910844629939672e-06, |
| "loss": 1.0568, |
| "step": 1064 |
| }, |
| { |
| "epoch": 0.5602314571278275, |
| "grad_norm": 1.997471809387207, |
| "learning_rate": 4.910660674546951e-06, |
| "loss": 1.0109, |
| "step": 1065 |
| }, |
| { |
| "epoch": 0.560757496054708, |
| "grad_norm": 2.109219551086426, |
| "learning_rate": 4.910476533023593e-06, |
| "loss": 1.0658, |
| "step": 1066 |
| }, |
| { |
| "epoch": 0.5612835349815887, |
| "grad_norm": 2.10469388961792, |
| "learning_rate": 4.9102922053838175e-06, |
| "loss": 1.0612, |
| "step": 1067 |
| }, |
| { |
| "epoch": 0.5618095739084692, |
| "grad_norm": 2.2748658657073975, |
| "learning_rate": 4.9101076916418535e-06, |
| "loss": 1.0422, |
| "step": 1068 |
| }, |
| { |
| "epoch": 0.5623356128353498, |
| "grad_norm": 2.0472326278686523, |
| "learning_rate": 4.90992299181195e-06, |
| "loss": 1.0339, |
| "step": 1069 |
| }, |
| { |
| "epoch": 0.5628616517622304, |
| "grad_norm": 2.0694494247436523, |
| "learning_rate": 4.909738105908367e-06, |
| "loss": 1.0083, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.563387690689111, |
| "grad_norm": 2.0032498836517334, |
| "learning_rate": 4.909553033945379e-06, |
| "loss": 0.9889, |
| "step": 1071 |
| }, |
| { |
| "epoch": 0.5639137296159916, |
| "grad_norm": 2.091393232345581, |
| "learning_rate": 4.909367775937278e-06, |
| "loss": 1.0856, |
| "step": 1072 |
| }, |
| { |
| "epoch": 0.5644397685428721, |
| "grad_norm": 2.0542173385620117, |
| "learning_rate": 4.909182331898366e-06, |
| "loss": 1.0422, |
| "step": 1073 |
| }, |
| { |
| "epoch": 0.5649658074697528, |
| "grad_norm": 2.009228467941284, |
| "learning_rate": 4.908996701842962e-06, |
| "loss": 1.0594, |
| "step": 1074 |
| }, |
| { |
| "epoch": 0.5654918463966333, |
| "grad_norm": 1.9546911716461182, |
| "learning_rate": 4.9088108857853985e-06, |
| "loss": 1.0691, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.5660178853235139, |
| "grad_norm": 2.1657440662384033, |
| "learning_rate": 4.908624883740023e-06, |
| "loss": 1.0252, |
| "step": 1076 |
| }, |
| { |
| "epoch": 0.5665439242503946, |
| "grad_norm": 2.151035785675049, |
| "learning_rate": 4.9084386957211975e-06, |
| "loss": 1.0587, |
| "step": 1077 |
| }, |
| { |
| "epoch": 0.5670699631772751, |
| "grad_norm": 2.299673557281494, |
| "learning_rate": 4.908252321743296e-06, |
| "loss": 1.0221, |
| "step": 1078 |
| }, |
| { |
| "epoch": 0.5675960021041557, |
| "grad_norm": 2.0144848823547363, |
| "learning_rate": 4.908065761820711e-06, |
| "loss": 1.0256, |
| "step": 1079 |
| }, |
| { |
| "epoch": 0.5681220410310363, |
| "grad_norm": 2.172971725463867, |
| "learning_rate": 4.907879015967846e-06, |
| "loss": 1.0231, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.5686480799579169, |
| "grad_norm": 2.0427041053771973, |
| "learning_rate": 4.907692084199119e-06, |
| "loss": 1.0433, |
| "step": 1081 |
| }, |
| { |
| "epoch": 0.5691741188847975, |
| "grad_norm": 2.1561834812164307, |
| "learning_rate": 4.907504966528966e-06, |
| "loss": 1.0478, |
| "step": 1082 |
| }, |
| { |
| "epoch": 0.569700157811678, |
| "grad_norm": 2.012385606765747, |
| "learning_rate": 4.907317662971831e-06, |
| "loss": 1.0703, |
| "step": 1083 |
| }, |
| { |
| "epoch": 0.5702261967385587, |
| "grad_norm": 2.137075424194336, |
| "learning_rate": 4.907130173542179e-06, |
| "loss": 1.0527, |
| "step": 1084 |
| }, |
| { |
| "epoch": 0.5707522356654392, |
| "grad_norm": 2.039424180984497, |
| "learning_rate": 4.906942498254485e-06, |
| "loss": 0.9969, |
| "step": 1085 |
| }, |
| { |
| "epoch": 0.5712782745923198, |
| "grad_norm": 2.0207748413085938, |
| "learning_rate": 4.90675463712324e-06, |
| "loss": 1.0157, |
| "step": 1086 |
| }, |
| { |
| "epoch": 0.5718043135192005, |
| "grad_norm": 2.024454116821289, |
| "learning_rate": 4.906566590162949e-06, |
| "loss": 1.0699, |
| "step": 1087 |
| }, |
| { |
| "epoch": 0.572330352446081, |
| "grad_norm": 2.256537675857544, |
| "learning_rate": 4.90637835738813e-06, |
| "loss": 1.1083, |
| "step": 1088 |
| }, |
| { |
| "epoch": 0.5728563913729616, |
| "grad_norm": 2.099698543548584, |
| "learning_rate": 4.90618993881332e-06, |
| "loss": 1.0242, |
| "step": 1089 |
| }, |
| { |
| "epoch": 0.5733824302998421, |
| "grad_norm": 2.0367214679718018, |
| "learning_rate": 4.906001334453064e-06, |
| "loss": 1.0088, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.5739084692267228, |
| "grad_norm": 1.9988690614700317, |
| "learning_rate": 4.9058125443219245e-06, |
| "loss": 1.044, |
| "step": 1091 |
| }, |
| { |
| "epoch": 0.5744345081536034, |
| "grad_norm": 1.9970273971557617, |
| "learning_rate": 4.9056235684344805e-06, |
| "loss": 1.0847, |
| "step": 1092 |
| }, |
| { |
| "epoch": 0.5749605470804839, |
| "grad_norm": 2.152602434158325, |
| "learning_rate": 4.905434406805322e-06, |
| "loss": 1.0931, |
| "step": 1093 |
| }, |
| { |
| "epoch": 0.5754865860073646, |
| "grad_norm": 2.0728707313537598, |
| "learning_rate": 4.905245059449053e-06, |
| "loss": 1.0401, |
| "step": 1094 |
| }, |
| { |
| "epoch": 0.5760126249342451, |
| "grad_norm": 1.94095778465271, |
| "learning_rate": 4.9050555263802954e-06, |
| "loss": 1.0262, |
| "step": 1095 |
| }, |
| { |
| "epoch": 0.5765386638611257, |
| "grad_norm": 2.126347780227661, |
| "learning_rate": 4.904865807613683e-06, |
| "loss": 1.0678, |
| "step": 1096 |
| }, |
| { |
| "epoch": 0.5770647027880064, |
| "grad_norm": 2.085378646850586, |
| "learning_rate": 4.904675903163864e-06, |
| "loss": 1.0665, |
| "step": 1097 |
| }, |
| { |
| "epoch": 0.5775907417148869, |
| "grad_norm": 2.2276804447174072, |
| "learning_rate": 4.9044858130454995e-06, |
| "loss": 1.0718, |
| "step": 1098 |
| }, |
| { |
| "epoch": 0.5781167806417675, |
| "grad_norm": 2.2318899631500244, |
| "learning_rate": 4.904295537273269e-06, |
| "loss": 1.0663, |
| "step": 1099 |
| }, |
| { |
| "epoch": 0.578642819568648, |
| "grad_norm": 2.0555522441864014, |
| "learning_rate": 4.904105075861864e-06, |
| "loss": 0.9989, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.5791688584955287, |
| "grad_norm": 2.094501256942749, |
| "learning_rate": 4.9039144288259876e-06, |
| "loss": 1.0802, |
| "step": 1101 |
| }, |
| { |
| "epoch": 0.5796948974224092, |
| "grad_norm": 2.7403769493103027, |
| "learning_rate": 4.903723596180363e-06, |
| "loss": 1.0024, |
| "step": 1102 |
| }, |
| { |
| "epoch": 0.5802209363492898, |
| "grad_norm": 2.1775436401367188, |
| "learning_rate": 4.9035325779397225e-06, |
| "loss": 1.0234, |
| "step": 1103 |
| }, |
| { |
| "epoch": 0.5807469752761705, |
| "grad_norm": 2.2489676475524902, |
| "learning_rate": 4.903341374118816e-06, |
| "loss": 1.0188, |
| "step": 1104 |
| }, |
| { |
| "epoch": 0.581273014203051, |
| "grad_norm": 2.2214367389678955, |
| "learning_rate": 4.903149984732407e-06, |
| "loss": 1.0835, |
| "step": 1105 |
| }, |
| { |
| "epoch": 0.5817990531299316, |
| "grad_norm": 2.203273296356201, |
| "learning_rate": 4.902958409795272e-06, |
| "loss": 1.0547, |
| "step": 1106 |
| }, |
| { |
| "epoch": 0.5823250920568122, |
| "grad_norm": 2.1076622009277344, |
| "learning_rate": 4.902766649322204e-06, |
| "loss": 1.0571, |
| "step": 1107 |
| }, |
| { |
| "epoch": 0.5828511309836928, |
| "grad_norm": 2.1270394325256348, |
| "learning_rate": 4.902574703328007e-06, |
| "loss": 0.9863, |
| "step": 1108 |
| }, |
| { |
| "epoch": 0.5833771699105734, |
| "grad_norm": 2.1030006408691406, |
| "learning_rate": 4.902382571827503e-06, |
| "loss": 1.0404, |
| "step": 1109 |
| }, |
| { |
| "epoch": 0.583903208837454, |
| "grad_norm": 2.1046831607818604, |
| "learning_rate": 4.9021902548355275e-06, |
| "loss": 1.018, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.5844292477643346, |
| "grad_norm": 2.0193376541137695, |
| "learning_rate": 4.901997752366927e-06, |
| "loss": 1.0035, |
| "step": 1111 |
| }, |
| { |
| "epoch": 0.5849552866912151, |
| "grad_norm": 2.0812923908233643, |
| "learning_rate": 4.9018050644365675e-06, |
| "loss": 0.9928, |
| "step": 1112 |
| }, |
| { |
| "epoch": 0.5854813256180957, |
| "grad_norm": 2.035750150680542, |
| "learning_rate": 4.901612191059325e-06, |
| "loss": 1.0658, |
| "step": 1113 |
| }, |
| { |
| "epoch": 0.5860073645449764, |
| "grad_norm": 2.093606948852539, |
| "learning_rate": 4.901419132250093e-06, |
| "loss": 1.0019, |
| "step": 1114 |
| }, |
| { |
| "epoch": 0.5865334034718569, |
| "grad_norm": 2.4018402099609375, |
| "learning_rate": 4.901225888023776e-06, |
| "loss": 1.0785, |
| "step": 1115 |
| }, |
| { |
| "epoch": 0.5870594423987375, |
| "grad_norm": 2.1731529235839844, |
| "learning_rate": 4.901032458395296e-06, |
| "loss": 1.0437, |
| "step": 1116 |
| }, |
| { |
| "epoch": 0.587585481325618, |
| "grad_norm": 2.085692882537842, |
| "learning_rate": 4.900838843379588e-06, |
| "loss": 1.0122, |
| "step": 1117 |
| }, |
| { |
| "epoch": 0.5881115202524987, |
| "grad_norm": 2.272787094116211, |
| "learning_rate": 4.900645042991601e-06, |
| "loss": 1.0708, |
| "step": 1118 |
| }, |
| { |
| "epoch": 0.5886375591793793, |
| "grad_norm": 2.197758913040161, |
| "learning_rate": 4.900451057246298e-06, |
| "loss": 1.037, |
| "step": 1119 |
| }, |
| { |
| "epoch": 0.5891635981062598, |
| "grad_norm": 2.228980779647827, |
| "learning_rate": 4.900256886158658e-06, |
| "loss": 1.0306, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.5896896370331405, |
| "grad_norm": 2.010698080062866, |
| "learning_rate": 4.900062529743672e-06, |
| "loss": 1.0777, |
| "step": 1121 |
| }, |
| { |
| "epoch": 0.590215675960021, |
| "grad_norm": 2.0015103816986084, |
| "learning_rate": 4.899867988016348e-06, |
| "loss": 0.9991, |
| "step": 1122 |
| }, |
| { |
| "epoch": 0.5907417148869016, |
| "grad_norm": 1.9307256937026978, |
| "learning_rate": 4.899673260991706e-06, |
| "loss": 1.0655, |
| "step": 1123 |
| }, |
| { |
| "epoch": 0.5912677538137823, |
| "grad_norm": 2.339930295944214, |
| "learning_rate": 4.899478348684782e-06, |
| "loss": 1.0177, |
| "step": 1124 |
| }, |
| { |
| "epoch": 0.5917937927406628, |
| "grad_norm": 2.000337839126587, |
| "learning_rate": 4.899283251110624e-06, |
| "loss": 1.036, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.5923198316675434, |
| "grad_norm": 2.0116374492645264, |
| "learning_rate": 4.899087968284297e-06, |
| "loss": 0.9666, |
| "step": 1126 |
| }, |
| { |
| "epoch": 0.592845870594424, |
| "grad_norm": 2.27270245552063, |
| "learning_rate": 4.898892500220878e-06, |
| "loss": 1.0526, |
| "step": 1127 |
| }, |
| { |
| "epoch": 0.5933719095213046, |
| "grad_norm": 2.1844749450683594, |
| "learning_rate": 4.89869684693546e-06, |
| "loss": 1.0606, |
| "step": 1128 |
| }, |
| { |
| "epoch": 0.5938979484481852, |
| "grad_norm": 2.112031936645508, |
| "learning_rate": 4.898501008443151e-06, |
| "loss": 1.0846, |
| "step": 1129 |
| }, |
| { |
| "epoch": 0.5944239873750657, |
| "grad_norm": 2.251878499984741, |
| "learning_rate": 4.898304984759069e-06, |
| "loss": 1.023, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.5949500263019464, |
| "grad_norm": 2.064732074737549, |
| "learning_rate": 4.898108775898351e-06, |
| "loss": 1.066, |
| "step": 1131 |
| }, |
| { |
| "epoch": 0.5954760652288269, |
| "grad_norm": 2.10412335395813, |
| "learning_rate": 4.897912381876147e-06, |
| "loss": 1.0476, |
| "step": 1132 |
| }, |
| { |
| "epoch": 0.5960021041557075, |
| "grad_norm": 2.1343259811401367, |
| "learning_rate": 4.897715802707621e-06, |
| "loss": 1.0264, |
| "step": 1133 |
| }, |
| { |
| "epoch": 0.5965281430825881, |
| "grad_norm": 2.3453173637390137, |
| "learning_rate": 4.89751903840795e-06, |
| "loss": 1.076, |
| "step": 1134 |
| }, |
| { |
| "epoch": 0.5970541820094687, |
| "grad_norm": 2.040123462677002, |
| "learning_rate": 4.897322088992326e-06, |
| "loss": 1.0494, |
| "step": 1135 |
| }, |
| { |
| "epoch": 0.5975802209363493, |
| "grad_norm": 2.070585012435913, |
| "learning_rate": 4.897124954475958e-06, |
| "loss": 1.0904, |
| "step": 1136 |
| }, |
| { |
| "epoch": 0.5981062598632298, |
| "grad_norm": 2.048081159591675, |
| "learning_rate": 4.896927634874065e-06, |
| "loss": 0.9855, |
| "step": 1137 |
| }, |
| { |
| "epoch": 0.5986322987901105, |
| "grad_norm": 2.07633113861084, |
| "learning_rate": 4.896730130201883e-06, |
| "loss": 1.0848, |
| "step": 1138 |
| }, |
| { |
| "epoch": 0.599158337716991, |
| "grad_norm": 2.233821153640747, |
| "learning_rate": 4.8965324404746624e-06, |
| "loss": 1.0419, |
| "step": 1139 |
| }, |
| { |
| "epoch": 0.5996843766438716, |
| "grad_norm": 2.1806929111480713, |
| "learning_rate": 4.896334565707666e-06, |
| "loss": 1.0377, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.6002104155707523, |
| "grad_norm": 2.056483268737793, |
| "learning_rate": 4.896136505916174e-06, |
| "loss": 1.0269, |
| "step": 1141 |
| }, |
| { |
| "epoch": 0.6007364544976328, |
| "grad_norm": 1.9446007013320923, |
| "learning_rate": 4.895938261115476e-06, |
| "loss": 0.9958, |
| "step": 1142 |
| }, |
| { |
| "epoch": 0.6012624934245134, |
| "grad_norm": 1.9170737266540527, |
| "learning_rate": 4.8957398313208795e-06, |
| "loss": 1.0083, |
| "step": 1143 |
| }, |
| { |
| "epoch": 0.601788532351394, |
| "grad_norm": 2.0455801486968994, |
| "learning_rate": 4.895541216547707e-06, |
| "loss": 1.0819, |
| "step": 1144 |
| }, |
| { |
| "epoch": 0.6023145712782746, |
| "grad_norm": 2.410231828689575, |
| "learning_rate": 4.8953424168112925e-06, |
| "loss": 1.0265, |
| "step": 1145 |
| }, |
| { |
| "epoch": 0.6028406102051552, |
| "grad_norm": 2.0946412086486816, |
| "learning_rate": 4.895143432126986e-06, |
| "loss": 1.014, |
| "step": 1146 |
| }, |
| { |
| "epoch": 0.6033666491320357, |
| "grad_norm": 1.9825836420059204, |
| "learning_rate": 4.894944262510152e-06, |
| "loss": 0.9721, |
| "step": 1147 |
| }, |
| { |
| "epoch": 0.6038926880589164, |
| "grad_norm": 2.1228606700897217, |
| "learning_rate": 4.8947449079761685e-06, |
| "loss": 1.0971, |
| "step": 1148 |
| }, |
| { |
| "epoch": 0.6044187269857969, |
| "grad_norm": 2.1443943977355957, |
| "learning_rate": 4.894545368540427e-06, |
| "loss": 0.9956, |
| "step": 1149 |
| }, |
| { |
| "epoch": 0.6049447659126775, |
| "grad_norm": 1.9651165008544922, |
| "learning_rate": 4.894345644218335e-06, |
| "loss": 1.0103, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.6054708048395582, |
| "grad_norm": 1.9829816818237305, |
| "learning_rate": 4.8941457350253134e-06, |
| "loss": 1.0425, |
| "step": 1151 |
| }, |
| { |
| "epoch": 0.6059968437664387, |
| "grad_norm": 2.122873067855835, |
| "learning_rate": 4.893945640976798e-06, |
| "loss": 1.0532, |
| "step": 1152 |
| }, |
| { |
| "epoch": 0.6065228826933193, |
| "grad_norm": 2.0714738368988037, |
| "learning_rate": 4.8937453620882365e-06, |
| "loss": 1.0307, |
| "step": 1153 |
| }, |
| { |
| "epoch": 0.6070489216201999, |
| "grad_norm": 1.9049363136291504, |
| "learning_rate": 4.893544898375096e-06, |
| "loss": 0.9805, |
| "step": 1154 |
| }, |
| { |
| "epoch": 0.6075749605470805, |
| "grad_norm": 2.432041645050049, |
| "learning_rate": 4.893344249852851e-06, |
| "loss": 1.0833, |
| "step": 1155 |
| }, |
| { |
| "epoch": 0.6081009994739611, |
| "grad_norm": 2.055748224258423, |
| "learning_rate": 4.893143416536997e-06, |
| "loss": 1.0315, |
| "step": 1156 |
| }, |
| { |
| "epoch": 0.6086270384008416, |
| "grad_norm": 1.9813153743743896, |
| "learning_rate": 4.892942398443037e-06, |
| "loss": 1.0786, |
| "step": 1157 |
| }, |
| { |
| "epoch": 0.6091530773277223, |
| "grad_norm": 2.2038941383361816, |
| "learning_rate": 4.892741195586496e-06, |
| "loss": 1.0604, |
| "step": 1158 |
| }, |
| { |
| "epoch": 0.6096791162546028, |
| "grad_norm": 2.0015673637390137, |
| "learning_rate": 4.892539807982906e-06, |
| "loss": 0.9863, |
| "step": 1159 |
| }, |
| { |
| "epoch": 0.6102051551814834, |
| "grad_norm": 2.0392401218414307, |
| "learning_rate": 4.892338235647818e-06, |
| "loss": 1.0218, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.6107311941083641, |
| "grad_norm": 2.0060133934020996, |
| "learning_rate": 4.892136478596796e-06, |
| "loss": 1.0134, |
| "step": 1161 |
| }, |
| { |
| "epoch": 0.6112572330352446, |
| "grad_norm": 1.9645148515701294, |
| "learning_rate": 4.8919345368454164e-06, |
| "loss": 1.0206, |
| "step": 1162 |
| }, |
| { |
| "epoch": 0.6117832719621252, |
| "grad_norm": 1.9299581050872803, |
| "learning_rate": 4.8917324104092725e-06, |
| "loss": 1.0243, |
| "step": 1163 |
| }, |
| { |
| "epoch": 0.6123093108890058, |
| "grad_norm": 2.071143388748169, |
| "learning_rate": 4.891530099303971e-06, |
| "loss": 1.0466, |
| "step": 1164 |
| }, |
| { |
| "epoch": 0.6128353498158864, |
| "grad_norm": 2.122020959854126, |
| "learning_rate": 4.891327603545132e-06, |
| "loss": 1.0886, |
| "step": 1165 |
| }, |
| { |
| "epoch": 0.6133613887427669, |
| "grad_norm": 2.0861775875091553, |
| "learning_rate": 4.891124923148391e-06, |
| "loss": 1.0481, |
| "step": 1166 |
| }, |
| { |
| "epoch": 0.6138874276696475, |
| "grad_norm": 2.053553581237793, |
| "learning_rate": 4.890922058129396e-06, |
| "loss": 1.0332, |
| "step": 1167 |
| }, |
| { |
| "epoch": 0.6144134665965282, |
| "grad_norm": 2.0698556900024414, |
| "learning_rate": 4.890719008503813e-06, |
| "loss": 0.9913, |
| "step": 1168 |
| }, |
| { |
| "epoch": 0.6149395055234087, |
| "grad_norm": 2.0626866817474365, |
| "learning_rate": 4.890515774287317e-06, |
| "loss": 1.0383, |
| "step": 1169 |
| }, |
| { |
| "epoch": 0.6154655444502893, |
| "grad_norm": 2.001122236251831, |
| "learning_rate": 4.890312355495602e-06, |
| "loss": 0.997, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.6159915833771699, |
| "grad_norm": 2.141261577606201, |
| "learning_rate": 4.890108752144373e-06, |
| "loss": 1.0139, |
| "step": 1171 |
| }, |
| { |
| "epoch": 0.6165176223040505, |
| "grad_norm": 2.0430335998535156, |
| "learning_rate": 4.8899049642493514e-06, |
| "loss": 1.0177, |
| "step": 1172 |
| }, |
| { |
| "epoch": 0.6170436612309311, |
| "grad_norm": 2.0376110076904297, |
| "learning_rate": 4.889700991826271e-06, |
| "loss": 1.0306, |
| "step": 1173 |
| }, |
| { |
| "epoch": 0.6175697001578117, |
| "grad_norm": 2.0546419620513916, |
| "learning_rate": 4.889496834890882e-06, |
| "loss": 1.0379, |
| "step": 1174 |
| }, |
| { |
| "epoch": 0.6180957390846923, |
| "grad_norm": 2.004117012023926, |
| "learning_rate": 4.889292493458947e-06, |
| "loss": 1.1014, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.6186217780115728, |
| "grad_norm": 2.1904101371765137, |
| "learning_rate": 4.889087967546243e-06, |
| "loss": 1.0252, |
| "step": 1176 |
| }, |
| { |
| "epoch": 0.6191478169384534, |
| "grad_norm": 2.2026965618133545, |
| "learning_rate": 4.8888832571685626e-06, |
| "loss": 1.0309, |
| "step": 1177 |
| }, |
| { |
| "epoch": 0.6196738558653341, |
| "grad_norm": 1.9925811290740967, |
| "learning_rate": 4.888678362341711e-06, |
| "loss": 1.0157, |
| "step": 1178 |
| }, |
| { |
| "epoch": 0.6201998947922146, |
| "grad_norm": 2.4098422527313232, |
| "learning_rate": 4.88847328308151e-06, |
| "loss": 0.9825, |
| "step": 1179 |
| }, |
| { |
| "epoch": 0.6207259337190952, |
| "grad_norm": 1.9352220296859741, |
| "learning_rate": 4.888268019403792e-06, |
| "loss": 1.0235, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.6212519726459758, |
| "grad_norm": 1.9798966646194458, |
| "learning_rate": 4.888062571324407e-06, |
| "loss": 1.0124, |
| "step": 1181 |
| }, |
| { |
| "epoch": 0.6217780115728564, |
| "grad_norm": 1.9737377166748047, |
| "learning_rate": 4.887856938859218e-06, |
| "loss": 1.005, |
| "step": 1182 |
| }, |
| { |
| "epoch": 0.622304050499737, |
| "grad_norm": 2.2528250217437744, |
| "learning_rate": 4.887651122024102e-06, |
| "loss": 1.0207, |
| "step": 1183 |
| }, |
| { |
| "epoch": 0.6228300894266176, |
| "grad_norm": 2.01436185836792, |
| "learning_rate": 4.887445120834949e-06, |
| "loss": 1.0368, |
| "step": 1184 |
| }, |
| { |
| "epoch": 0.6233561283534982, |
| "grad_norm": 2.0212924480438232, |
| "learning_rate": 4.887238935307667e-06, |
| "loss": 1.0136, |
| "step": 1185 |
| }, |
| { |
| "epoch": 0.6238821672803787, |
| "grad_norm": 2.080514669418335, |
| "learning_rate": 4.887032565458174e-06, |
| "loss": 1.0012, |
| "step": 1186 |
| }, |
| { |
| "epoch": 0.6244082062072593, |
| "grad_norm": 2.220168113708496, |
| "learning_rate": 4.886826011302406e-06, |
| "loss": 1.0055, |
| "step": 1187 |
| }, |
| { |
| "epoch": 0.62493424513414, |
| "grad_norm": 2.042325258255005, |
| "learning_rate": 4.886619272856309e-06, |
| "loss": 1.0793, |
| "step": 1188 |
| }, |
| { |
| "epoch": 0.6254602840610205, |
| "grad_norm": 2.0139427185058594, |
| "learning_rate": 4.886412350135848e-06, |
| "loss": 1.0853, |
| "step": 1189 |
| }, |
| { |
| "epoch": 0.6259863229879011, |
| "grad_norm": 2.072531223297119, |
| "learning_rate": 4.886205243156998e-06, |
| "loss": 1.0611, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.6265123619147817, |
| "grad_norm": 2.1070992946624756, |
| "learning_rate": 4.8859979519357505e-06, |
| "loss": 1.0171, |
| "step": 1191 |
| }, |
| { |
| "epoch": 0.6270384008416623, |
| "grad_norm": 1.9750585556030273, |
| "learning_rate": 4.885790476488111e-06, |
| "loss": 1.01, |
| "step": 1192 |
| }, |
| { |
| "epoch": 0.6275644397685429, |
| "grad_norm": 1.9221036434173584, |
| "learning_rate": 4.885582816830099e-06, |
| "loss": 1.0173, |
| "step": 1193 |
| }, |
| { |
| "epoch": 0.6280904786954234, |
| "grad_norm": 2.0700929164886475, |
| "learning_rate": 4.885374972977748e-06, |
| "loss": 1.0469, |
| "step": 1194 |
| }, |
| { |
| "epoch": 0.6286165176223041, |
| "grad_norm": 2.1358914375305176, |
| "learning_rate": 4.885166944947106e-06, |
| "loss": 1.0144, |
| "step": 1195 |
| }, |
| { |
| "epoch": 0.6291425565491846, |
| "grad_norm": 2.0657570362091064, |
| "learning_rate": 4.884958732754236e-06, |
| "loss": 1.0278, |
| "step": 1196 |
| }, |
| { |
| "epoch": 0.6296685954760652, |
| "grad_norm": 2.050619125366211, |
| "learning_rate": 4.884750336415213e-06, |
| "loss": 1.0401, |
| "step": 1197 |
| }, |
| { |
| "epoch": 0.6301946344029458, |
| "grad_norm": 2.029069423675537, |
| "learning_rate": 4.884541755946127e-06, |
| "loss": 1.0265, |
| "step": 1198 |
| }, |
| { |
| "epoch": 0.6307206733298264, |
| "grad_norm": 2.2242050170898438, |
| "learning_rate": 4.884332991363086e-06, |
| "loss": 1.043, |
| "step": 1199 |
| }, |
| { |
| "epoch": 0.631246712256707, |
| "grad_norm": 1.9235576391220093, |
| "learning_rate": 4.8841240426822056e-06, |
| "loss": 1.0323, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.6317727511835876, |
| "grad_norm": 2.0110039710998535, |
| "learning_rate": 4.88391490991962e-06, |
| "loss": 0.9861, |
| "step": 1201 |
| }, |
| { |
| "epoch": 0.6322987901104682, |
| "grad_norm": 1.9583542346954346, |
| "learning_rate": 4.883705593091478e-06, |
| "loss": 1.0907, |
| "step": 1202 |
| }, |
| { |
| "epoch": 0.6328248290373487, |
| "grad_norm": 2.046147346496582, |
| "learning_rate": 4.88349609221394e-06, |
| "loss": 1.0264, |
| "step": 1203 |
| }, |
| { |
| "epoch": 0.6333508679642293, |
| "grad_norm": 2.072329521179199, |
| "learning_rate": 4.8832864073031826e-06, |
| "loss": 1.0273, |
| "step": 1204 |
| }, |
| { |
| "epoch": 0.63387690689111, |
| "grad_norm": 2.163562774658203, |
| "learning_rate": 4.883076538375395e-06, |
| "loss": 0.9729, |
| "step": 1205 |
| }, |
| { |
| "epoch": 0.6344029458179905, |
| "grad_norm": 2.018745183944702, |
| "learning_rate": 4.8828664854467825e-06, |
| "loss": 1.0349, |
| "step": 1206 |
| }, |
| { |
| "epoch": 0.6349289847448711, |
| "grad_norm": 1.9641830921173096, |
| "learning_rate": 4.882656248533562e-06, |
| "loss": 1.0254, |
| "step": 1207 |
| }, |
| { |
| "epoch": 0.6354550236717517, |
| "grad_norm": 2.189903736114502, |
| "learning_rate": 4.8824458276519676e-06, |
| "loss": 1.0347, |
| "step": 1208 |
| }, |
| { |
| "epoch": 0.6359810625986323, |
| "grad_norm": 1.9000815153121948, |
| "learning_rate": 4.882235222818245e-06, |
| "loss": 1.0068, |
| "step": 1209 |
| }, |
| { |
| "epoch": 0.6365071015255129, |
| "grad_norm": 2.008253335952759, |
| "learning_rate": 4.882024434048658e-06, |
| "loss": 0.9951, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.6370331404523935, |
| "grad_norm": 2.254880905151367, |
| "learning_rate": 4.881813461359479e-06, |
| "loss": 1.0254, |
| "step": 1211 |
| }, |
| { |
| "epoch": 0.6375591793792741, |
| "grad_norm": 2.079281806945801, |
| "learning_rate": 4.881602304766999e-06, |
| "loss": 1.0138, |
| "step": 1212 |
| }, |
| { |
| "epoch": 0.6380852183061546, |
| "grad_norm": 1.9515445232391357, |
| "learning_rate": 4.881390964287521e-06, |
| "loss": 0.9896, |
| "step": 1213 |
| }, |
| { |
| "epoch": 0.6386112572330352, |
| "grad_norm": 2.118746757507324, |
| "learning_rate": 4.881179439937363e-06, |
| "loss": 1.0554, |
| "step": 1214 |
| }, |
| { |
| "epoch": 0.6391372961599159, |
| "grad_norm": 1.9809492826461792, |
| "learning_rate": 4.8809677317328574e-06, |
| "loss": 1.0327, |
| "step": 1215 |
| }, |
| { |
| "epoch": 0.6396633350867964, |
| "grad_norm": 2.0196714401245117, |
| "learning_rate": 4.88075583969035e-06, |
| "loss": 1.0072, |
| "step": 1216 |
| }, |
| { |
| "epoch": 0.640189374013677, |
| "grad_norm": 2.075596570968628, |
| "learning_rate": 4.8805437638262024e-06, |
| "loss": 1.0088, |
| "step": 1217 |
| }, |
| { |
| "epoch": 0.6407154129405576, |
| "grad_norm": 1.919331431388855, |
| "learning_rate": 4.880331504156788e-06, |
| "loss": 0.9561, |
| "step": 1218 |
| }, |
| { |
| "epoch": 0.6412414518674382, |
| "grad_norm": 2.1209754943847656, |
| "learning_rate": 4.8801190606984974e-06, |
| "loss": 1.0436, |
| "step": 1219 |
| }, |
| { |
| "epoch": 0.6417674907943188, |
| "grad_norm": 2.1692416667938232, |
| "learning_rate": 4.879906433467731e-06, |
| "loss": 1.0596, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.6422935297211994, |
| "grad_norm": 2.127383232116699, |
| "learning_rate": 4.879693622480908e-06, |
| "loss": 1.0527, |
| "step": 1221 |
| }, |
| { |
| "epoch": 0.64281956864808, |
| "grad_norm": 2.0686752796173096, |
| "learning_rate": 4.87948062775446e-06, |
| "loss": 1.0161, |
| "step": 1222 |
| }, |
| { |
| "epoch": 0.6433456075749605, |
| "grad_norm": 1.9912559986114502, |
| "learning_rate": 4.879267449304831e-06, |
| "loss": 1.0246, |
| "step": 1223 |
| }, |
| { |
| "epoch": 0.6438716465018411, |
| "grad_norm": 1.9714523553848267, |
| "learning_rate": 4.879054087148483e-06, |
| "loss": 1.0669, |
| "step": 1224 |
| }, |
| { |
| "epoch": 0.6443976854287218, |
| "grad_norm": 2.0122146606445312, |
| "learning_rate": 4.878840541301888e-06, |
| "loss": 1.0383, |
| "step": 1225 |
| }, |
| { |
| "epoch": 0.6449237243556023, |
| "grad_norm": 2.191110134124756, |
| "learning_rate": 4.878626811781536e-06, |
| "loss": 1.0832, |
| "step": 1226 |
| }, |
| { |
| "epoch": 0.6454497632824829, |
| "grad_norm": 2.018800735473633, |
| "learning_rate": 4.8784128986039274e-06, |
| "loss": 1.0588, |
| "step": 1227 |
| }, |
| { |
| "epoch": 0.6459758022093635, |
| "grad_norm": 2.0812923908233643, |
| "learning_rate": 4.87819880178558e-06, |
| "loss": 1.0221, |
| "step": 1228 |
| }, |
| { |
| "epoch": 0.6465018411362441, |
| "grad_norm": 2.110596179962158, |
| "learning_rate": 4.877984521343025e-06, |
| "loss": 1.0252, |
| "step": 1229 |
| }, |
| { |
| "epoch": 0.6470278800631246, |
| "grad_norm": 2.2176296710968018, |
| "learning_rate": 4.877770057292806e-06, |
| "loss": 1.0575, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.6475539189900053, |
| "grad_norm": 2.0294981002807617, |
| "learning_rate": 4.8775554096514836e-06, |
| "loss": 0.9862, |
| "step": 1231 |
| }, |
| { |
| "epoch": 0.6480799579168859, |
| "grad_norm": 2.03635573387146, |
| "learning_rate": 4.8773405784356285e-06, |
| "loss": 1.0229, |
| "step": 1232 |
| }, |
| { |
| "epoch": 0.6486059968437664, |
| "grad_norm": 2.2391481399536133, |
| "learning_rate": 4.877125563661831e-06, |
| "loss": 1.1258, |
| "step": 1233 |
| }, |
| { |
| "epoch": 0.649132035770647, |
| "grad_norm": 2.1449427604675293, |
| "learning_rate": 4.876910365346691e-06, |
| "loss": 1.039, |
| "step": 1234 |
| }, |
| { |
| "epoch": 0.6496580746975276, |
| "grad_norm": 2.075510025024414, |
| "learning_rate": 4.876694983506826e-06, |
| "loss": 1.047, |
| "step": 1235 |
| }, |
| { |
| "epoch": 0.6501841136244082, |
| "grad_norm": 1.9154462814331055, |
| "learning_rate": 4.876479418158862e-06, |
| "loss": 0.9906, |
| "step": 1236 |
| }, |
| { |
| "epoch": 0.6507101525512888, |
| "grad_norm": 2.2096331119537354, |
| "learning_rate": 4.876263669319449e-06, |
| "loss": 1.0843, |
| "step": 1237 |
| }, |
| { |
| "epoch": 0.6512361914781694, |
| "grad_norm": 2.0682895183563232, |
| "learning_rate": 4.87604773700524e-06, |
| "loss": 1.0262, |
| "step": 1238 |
| }, |
| { |
| "epoch": 0.65176223040505, |
| "grad_norm": 2.0859344005584717, |
| "learning_rate": 4.8758316212329106e-06, |
| "loss": 1.02, |
| "step": 1239 |
| }, |
| { |
| "epoch": 0.6522882693319305, |
| "grad_norm": 2.060521364212036, |
| "learning_rate": 4.875615322019146e-06, |
| "loss": 1.0455, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.6528143082588111, |
| "grad_norm": 2.049457311630249, |
| "learning_rate": 4.875398839380647e-06, |
| "loss": 1.0763, |
| "step": 1241 |
| }, |
| { |
| "epoch": 0.6533403471856918, |
| "grad_norm": 2.2475039958953857, |
| "learning_rate": 4.875182173334129e-06, |
| "loss": 1.0599, |
| "step": 1242 |
| }, |
| { |
| "epoch": 0.6538663861125723, |
| "grad_norm": 1.9375535249710083, |
| "learning_rate": 4.874965323896321e-06, |
| "loss": 0.9758, |
| "step": 1243 |
| }, |
| { |
| "epoch": 0.6543924250394529, |
| "grad_norm": 2.0157570838928223, |
| "learning_rate": 4.874748291083967e-06, |
| "loss": 1.0491, |
| "step": 1244 |
| }, |
| { |
| "epoch": 0.6549184639663335, |
| "grad_norm": 2.1339237689971924, |
| "learning_rate": 4.874531074913823e-06, |
| "loss": 0.9634, |
| "step": 1245 |
| }, |
| { |
| "epoch": 0.6554445028932141, |
| "grad_norm": 1.946191430091858, |
| "learning_rate": 4.874313675402662e-06, |
| "loss": 1.0407, |
| "step": 1246 |
| }, |
| { |
| "epoch": 0.6559705418200947, |
| "grad_norm": 1.9623258113861084, |
| "learning_rate": 4.874096092567268e-06, |
| "loss": 1.0662, |
| "step": 1247 |
| }, |
| { |
| "epoch": 0.6564965807469753, |
| "grad_norm": 2.092224359512329, |
| "learning_rate": 4.873878326424443e-06, |
| "loss": 1.0802, |
| "step": 1248 |
| }, |
| { |
| "epoch": 0.6570226196738559, |
| "grad_norm": 1.863853931427002, |
| "learning_rate": 4.873660376990999e-06, |
| "loss": 1.0789, |
| "step": 1249 |
| }, |
| { |
| "epoch": 0.6575486586007364, |
| "grad_norm": 2.146857976913452, |
| "learning_rate": 4.8734422442837655e-06, |
| "loss": 1.0132, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.658074697527617, |
| "grad_norm": 2.022573232650757, |
| "learning_rate": 4.8732239283195844e-06, |
| "loss": 1.0252, |
| "step": 1251 |
| }, |
| { |
| "epoch": 0.6586007364544977, |
| "grad_norm": 2.160632848739624, |
| "learning_rate": 4.873005429115312e-06, |
| "loss": 1.0235, |
| "step": 1252 |
| }, |
| { |
| "epoch": 0.6591267753813782, |
| "grad_norm": 2.0909252166748047, |
| "learning_rate": 4.87278674668782e-06, |
| "loss": 1.0671, |
| "step": 1253 |
| }, |
| { |
| "epoch": 0.6596528143082588, |
| "grad_norm": 1.9689445495605469, |
| "learning_rate": 4.872567881053991e-06, |
| "loss": 1.0323, |
| "step": 1254 |
| }, |
| { |
| "epoch": 0.6601788532351394, |
| "grad_norm": 2.141439914703369, |
| "learning_rate": 4.872348832230727e-06, |
| "loss": 1.0019, |
| "step": 1255 |
| }, |
| { |
| "epoch": 0.66070489216202, |
| "grad_norm": 1.9927963018417358, |
| "learning_rate": 4.872129600234938e-06, |
| "loss": 1.0262, |
| "step": 1256 |
| }, |
| { |
| "epoch": 0.6612309310889006, |
| "grad_norm": 2.1227667331695557, |
| "learning_rate": 4.871910185083554e-06, |
| "loss": 1.0341, |
| "step": 1257 |
| }, |
| { |
| "epoch": 0.6617569700157812, |
| "grad_norm": 2.0554583072662354, |
| "learning_rate": 4.871690586793514e-06, |
| "loss": 1.0458, |
| "step": 1258 |
| }, |
| { |
| "epoch": 0.6622830089426618, |
| "grad_norm": 1.9936654567718506, |
| "learning_rate": 4.871470805381775e-06, |
| "loss": 1.0125, |
| "step": 1259 |
| }, |
| { |
| "epoch": 0.6628090478695423, |
| "grad_norm": 2.0953080654144287, |
| "learning_rate": 4.871250840865306e-06, |
| "loss": 1.0518, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.663335086796423, |
| "grad_norm": 1.9445053339004517, |
| "learning_rate": 4.871030693261091e-06, |
| "loss": 0.9892, |
| "step": 1261 |
| }, |
| { |
| "epoch": 0.6638611257233035, |
| "grad_norm": 2.054898500442505, |
| "learning_rate": 4.870810362586127e-06, |
| "loss": 1.0712, |
| "step": 1262 |
| }, |
| { |
| "epoch": 0.6643871646501841, |
| "grad_norm": 2.158090114593506, |
| "learning_rate": 4.870589848857428e-06, |
| "loss": 0.9874, |
| "step": 1263 |
| }, |
| { |
| "epoch": 0.6649132035770647, |
| "grad_norm": 2.081550121307373, |
| "learning_rate": 4.870369152092019e-06, |
| "loss": 1.0299, |
| "step": 1264 |
| }, |
| { |
| "epoch": 0.6654392425039453, |
| "grad_norm": 1.9839400053024292, |
| "learning_rate": 4.87014827230694e-06, |
| "loss": 0.9997, |
| "step": 1265 |
| }, |
| { |
| "epoch": 0.6659652814308259, |
| "grad_norm": 2.0596096515655518, |
| "learning_rate": 4.869927209519246e-06, |
| "loss": 1.0655, |
| "step": 1266 |
| }, |
| { |
| "epoch": 0.6664913203577064, |
| "grad_norm": 2.3403422832489014, |
| "learning_rate": 4.8697059637460055e-06, |
| "loss": 1.0551, |
| "step": 1267 |
| }, |
| { |
| "epoch": 0.667017359284587, |
| "grad_norm": 2.072814702987671, |
| "learning_rate": 4.8694845350043004e-06, |
| "loss": 1.0454, |
| "step": 1268 |
| }, |
| { |
| "epoch": 0.6675433982114677, |
| "grad_norm": 2.2819271087646484, |
| "learning_rate": 4.86926292331123e-06, |
| "loss": 1.0076, |
| "step": 1269 |
| }, |
| { |
| "epoch": 0.6680694371383482, |
| "grad_norm": 2.162179708480835, |
| "learning_rate": 4.8690411286839024e-06, |
| "loss": 1.0145, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.6685954760652288, |
| "grad_norm": 2.1072568893432617, |
| "learning_rate": 4.868819151139443e-06, |
| "loss": 1.0936, |
| "step": 1271 |
| }, |
| { |
| "epoch": 0.6691215149921094, |
| "grad_norm": 2.113056182861328, |
| "learning_rate": 4.868596990694994e-06, |
| "loss": 1.044, |
| "step": 1272 |
| }, |
| { |
| "epoch": 0.66964755391899, |
| "grad_norm": 1.9856184720993042, |
| "learning_rate": 4.868374647367705e-06, |
| "loss": 1.0119, |
| "step": 1273 |
| }, |
| { |
| "epoch": 0.6701735928458706, |
| "grad_norm": 2.013106346130371, |
| "learning_rate": 4.868152121174746e-06, |
| "loss": 1.0913, |
| "step": 1274 |
| }, |
| { |
| "epoch": 0.6706996317727512, |
| "grad_norm": 1.8831686973571777, |
| "learning_rate": 4.867929412133297e-06, |
| "loss": 1.0077, |
| "step": 1275 |
| }, |
| { |
| "epoch": 0.6712256706996318, |
| "grad_norm": 2.035214424133301, |
| "learning_rate": 4.867706520260554e-06, |
| "loss": 0.9683, |
| "step": 1276 |
| }, |
| { |
| "epoch": 0.6717517096265123, |
| "grad_norm": 2.0336945056915283, |
| "learning_rate": 4.867483445573727e-06, |
| "loss": 1.0583, |
| "step": 1277 |
| }, |
| { |
| "epoch": 0.672277748553393, |
| "grad_norm": 1.9241890907287598, |
| "learning_rate": 4.867260188090041e-06, |
| "loss": 1.0162, |
| "step": 1278 |
| }, |
| { |
| "epoch": 0.6728037874802736, |
| "grad_norm": 2.122288942337036, |
| "learning_rate": 4.8670367478267335e-06, |
| "loss": 1.0633, |
| "step": 1279 |
| }, |
| { |
| "epoch": 0.6733298264071541, |
| "grad_norm": 1.964282512664795, |
| "learning_rate": 4.8668131248010555e-06, |
| "loss": 1.0009, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.6738558653340347, |
| "grad_norm": 2.075181722640991, |
| "learning_rate": 4.866589319030273e-06, |
| "loss": 1.0535, |
| "step": 1281 |
| }, |
| { |
| "epoch": 0.6743819042609153, |
| "grad_norm": 2.086574077606201, |
| "learning_rate": 4.866365330531668e-06, |
| "loss": 1.0125, |
| "step": 1282 |
| }, |
| { |
| "epoch": 0.6749079431877959, |
| "grad_norm": 2.176712989807129, |
| "learning_rate": 4.866141159322535e-06, |
| "loss": 1.0883, |
| "step": 1283 |
| }, |
| { |
| "epoch": 0.6754339821146765, |
| "grad_norm": 2.4133596420288086, |
| "learning_rate": 4.865916805420181e-06, |
| "loss": 1.1115, |
| "step": 1284 |
| }, |
| { |
| "epoch": 0.6759600210415571, |
| "grad_norm": 1.9632985591888428, |
| "learning_rate": 4.865692268841931e-06, |
| "loss": 0.9837, |
| "step": 1285 |
| }, |
| { |
| "epoch": 0.6764860599684377, |
| "grad_norm": 2.320810556411743, |
| "learning_rate": 4.865467549605119e-06, |
| "loss": 1.0307, |
| "step": 1286 |
| }, |
| { |
| "epoch": 0.6770120988953182, |
| "grad_norm": 2.259291172027588, |
| "learning_rate": 4.865242647727097e-06, |
| "loss": 1.0125, |
| "step": 1287 |
| }, |
| { |
| "epoch": 0.6775381378221988, |
| "grad_norm": 2.069227695465088, |
| "learning_rate": 4.8650175632252314e-06, |
| "loss": 1.0348, |
| "step": 1288 |
| }, |
| { |
| "epoch": 0.6780641767490795, |
| "grad_norm": 2.093912363052368, |
| "learning_rate": 4.8647922961169e-06, |
| "loss": 1.0628, |
| "step": 1289 |
| }, |
| { |
| "epoch": 0.67859021567596, |
| "grad_norm": 2.0842857360839844, |
| "learning_rate": 4.864566846419497e-06, |
| "loss": 1.0296, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.6791162546028406, |
| "grad_norm": 2.1448631286621094, |
| "learning_rate": 4.864341214150428e-06, |
| "loss": 1.0344, |
| "step": 1291 |
| }, |
| { |
| "epoch": 0.6796422935297212, |
| "grad_norm": 2.173478841781616, |
| "learning_rate": 4.864115399327115e-06, |
| "loss": 1.0662, |
| "step": 1292 |
| }, |
| { |
| "epoch": 0.6801683324566018, |
| "grad_norm": 2.1156740188598633, |
| "learning_rate": 4.863889401966995e-06, |
| "loss": 1.0568, |
| "step": 1293 |
| }, |
| { |
| "epoch": 0.6806943713834824, |
| "grad_norm": 2.0641050338745117, |
| "learning_rate": 4.863663222087515e-06, |
| "loss": 1.0508, |
| "step": 1294 |
| }, |
| { |
| "epoch": 0.681220410310363, |
| "grad_norm": 2.050645112991333, |
| "learning_rate": 4.863436859706141e-06, |
| "loss": 1.0198, |
| "step": 1295 |
| }, |
| { |
| "epoch": 0.6817464492372436, |
| "grad_norm": 1.9624086618423462, |
| "learning_rate": 4.86321031484035e-06, |
| "loss": 1.012, |
| "step": 1296 |
| }, |
| { |
| "epoch": 0.6822724881641241, |
| "grad_norm": 2.2763307094573975, |
| "learning_rate": 4.8629835875076325e-06, |
| "loss": 1.0208, |
| "step": 1297 |
| }, |
| { |
| "epoch": 0.6827985270910047, |
| "grad_norm": 1.952094316482544, |
| "learning_rate": 4.862756677725496e-06, |
| "loss": 0.9912, |
| "step": 1298 |
| }, |
| { |
| "epoch": 0.6833245660178853, |
| "grad_norm": 1.9964386224746704, |
| "learning_rate": 4.862529585511461e-06, |
| "loss": 1.0216, |
| "step": 1299 |
| }, |
| { |
| "epoch": 0.6838506049447659, |
| "grad_norm": 2.0915441513061523, |
| "learning_rate": 4.862302310883061e-06, |
| "loss": 1.028, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.6843766438716465, |
| "grad_norm": 2.239182233810425, |
| "learning_rate": 4.862074853857843e-06, |
| "loss": 1.1119, |
| "step": 1301 |
| }, |
| { |
| "epoch": 0.6849026827985271, |
| "grad_norm": 2.120128870010376, |
| "learning_rate": 4.861847214453371e-06, |
| "loss": 1.0811, |
| "step": 1302 |
| }, |
| { |
| "epoch": 0.6854287217254077, |
| "grad_norm": 1.8495033979415894, |
| "learning_rate": 4.86161939268722e-06, |
| "loss": 0.9559, |
| "step": 1303 |
| }, |
| { |
| "epoch": 0.6859547606522882, |
| "grad_norm": 1.9767253398895264, |
| "learning_rate": 4.861391388576982e-06, |
| "loss": 0.9942, |
| "step": 1304 |
| }, |
| { |
| "epoch": 0.6864807995791689, |
| "grad_norm": 1.9148463010787964, |
| "learning_rate": 4.8611632021402605e-06, |
| "loss": 1.0152, |
| "step": 1305 |
| }, |
| { |
| "epoch": 0.6870068385060495, |
| "grad_norm": 2.036726474761963, |
| "learning_rate": 4.860934833394674e-06, |
| "loss": 1.0692, |
| "step": 1306 |
| }, |
| { |
| "epoch": 0.68753287743293, |
| "grad_norm": 2.03383731842041, |
| "learning_rate": 4.860706282357856e-06, |
| "loss": 1.0429, |
| "step": 1307 |
| }, |
| { |
| "epoch": 0.6880589163598106, |
| "grad_norm": 1.986863374710083, |
| "learning_rate": 4.860477549047452e-06, |
| "loss": 0.9737, |
| "step": 1308 |
| }, |
| { |
| "epoch": 0.6885849552866912, |
| "grad_norm": 1.9917157888412476, |
| "learning_rate": 4.860248633481124e-06, |
| "loss": 0.9808, |
| "step": 1309 |
| }, |
| { |
| "epoch": 0.6891109942135718, |
| "grad_norm": 1.9868308305740356, |
| "learning_rate": 4.860019535676546e-06, |
| "loss": 1.0001, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.6896370331404524, |
| "grad_norm": 1.9900240898132324, |
| "learning_rate": 4.859790255651408e-06, |
| "loss": 1.0561, |
| "step": 1311 |
| }, |
| { |
| "epoch": 0.690163072067333, |
| "grad_norm": 1.987703800201416, |
| "learning_rate": 4.859560793423412e-06, |
| "loss": 1.013, |
| "step": 1312 |
| }, |
| { |
| "epoch": 0.6906891109942136, |
| "grad_norm": 1.9851711988449097, |
| "learning_rate": 4.859331149010276e-06, |
| "loss": 1.0727, |
| "step": 1313 |
| }, |
| { |
| "epoch": 0.6912151499210941, |
| "grad_norm": 1.9733060598373413, |
| "learning_rate": 4.8591013224297304e-06, |
| "loss": 0.9924, |
| "step": 1314 |
| }, |
| { |
| "epoch": 0.6917411888479748, |
| "grad_norm": 1.9737035036087036, |
| "learning_rate": 4.85887131369952e-06, |
| "loss": 1.0131, |
| "step": 1315 |
| }, |
| { |
| "epoch": 0.6922672277748554, |
| "grad_norm": 2.176969528198242, |
| "learning_rate": 4.858641122837407e-06, |
| "loss": 1.0382, |
| "step": 1316 |
| }, |
| { |
| "epoch": 0.6927932667017359, |
| "grad_norm": 1.951177716255188, |
| "learning_rate": 4.858410749861161e-06, |
| "loss": 1.011, |
| "step": 1317 |
| }, |
| { |
| "epoch": 0.6933193056286165, |
| "grad_norm": 2.009986639022827, |
| "learning_rate": 4.858180194788572e-06, |
| "loss": 1.0999, |
| "step": 1318 |
| }, |
| { |
| "epoch": 0.6938453445554971, |
| "grad_norm": 2.0470845699310303, |
| "learning_rate": 4.857949457637441e-06, |
| "loss": 1.0477, |
| "step": 1319 |
| }, |
| { |
| "epoch": 0.6943713834823777, |
| "grad_norm": 2.163547992706299, |
| "learning_rate": 4.857718538425582e-06, |
| "loss": 1.0229, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.6948974224092583, |
| "grad_norm": 2.0979368686676025, |
| "learning_rate": 4.857487437170827e-06, |
| "loss": 1.0686, |
| "step": 1321 |
| }, |
| { |
| "epoch": 0.6954234613361389, |
| "grad_norm": 2.0388388633728027, |
| "learning_rate": 4.857256153891017e-06, |
| "loss": 0.991, |
| "step": 1322 |
| }, |
| { |
| "epoch": 0.6959495002630195, |
| "grad_norm": 2.136115312576294, |
| "learning_rate": 4.8570246886040124e-06, |
| "loss": 1.0249, |
| "step": 1323 |
| }, |
| { |
| "epoch": 0.6964755391899, |
| "grad_norm": 2.0932974815368652, |
| "learning_rate": 4.8567930413276835e-06, |
| "loss": 1.0649, |
| "step": 1324 |
| }, |
| { |
| "epoch": 0.6970015781167807, |
| "grad_norm": 2.0559682846069336, |
| "learning_rate": 4.856561212079916e-06, |
| "loss": 0.9931, |
| "step": 1325 |
| }, |
| { |
| "epoch": 0.6975276170436613, |
| "grad_norm": 1.9723689556121826, |
| "learning_rate": 4.856329200878611e-06, |
| "loss": 0.9628, |
| "step": 1326 |
| }, |
| { |
| "epoch": 0.6980536559705418, |
| "grad_norm": 2.054049253463745, |
| "learning_rate": 4.8560970077416805e-06, |
| "loss": 1.0322, |
| "step": 1327 |
| }, |
| { |
| "epoch": 0.6985796948974224, |
| "grad_norm": 2.100574254989624, |
| "learning_rate": 4.855864632687055e-06, |
| "loss": 1.0941, |
| "step": 1328 |
| }, |
| { |
| "epoch": 0.699105733824303, |
| "grad_norm": 2.1415367126464844, |
| "learning_rate": 4.8556320757326735e-06, |
| "loss": 1.0341, |
| "step": 1329 |
| }, |
| { |
| "epoch": 0.6996317727511836, |
| "grad_norm": 1.988004207611084, |
| "learning_rate": 4.855399336896495e-06, |
| "loss": 1.0357, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.7001578116780641, |
| "grad_norm": 2.0249714851379395, |
| "learning_rate": 4.855166416196487e-06, |
| "loss": 1.0489, |
| "step": 1331 |
| }, |
| { |
| "epoch": 0.7006838506049448, |
| "grad_norm": 1.9197039604187012, |
| "learning_rate": 4.8549333136506356e-06, |
| "loss": 1.0094, |
| "step": 1332 |
| }, |
| { |
| "epoch": 0.7012098895318254, |
| "grad_norm": 2.153716564178467, |
| "learning_rate": 4.854700029276938e-06, |
| "loss": 1.0613, |
| "step": 1333 |
| }, |
| { |
| "epoch": 0.7017359284587059, |
| "grad_norm": 1.9626339673995972, |
| "learning_rate": 4.854466563093407e-06, |
| "loss": 1.024, |
| "step": 1334 |
| }, |
| { |
| "epoch": 0.7022619673855865, |
| "grad_norm": 2.0288281440734863, |
| "learning_rate": 4.854232915118068e-06, |
| "loss": 0.9778, |
| "step": 1335 |
| }, |
| { |
| "epoch": 0.7027880063124671, |
| "grad_norm": 1.9677989482879639, |
| "learning_rate": 4.853999085368963e-06, |
| "loss": 0.9802, |
| "step": 1336 |
| }, |
| { |
| "epoch": 0.7033140452393477, |
| "grad_norm": 2.054617404937744, |
| "learning_rate": 4.853765073864144e-06, |
| "loss": 0.9523, |
| "step": 1337 |
| }, |
| { |
| "epoch": 0.7038400841662283, |
| "grad_norm": 2.0509955883026123, |
| "learning_rate": 4.853530880621681e-06, |
| "loss": 1.0324, |
| "step": 1338 |
| }, |
| { |
| "epoch": 0.7043661230931089, |
| "grad_norm": 2.224724054336548, |
| "learning_rate": 4.853296505659657e-06, |
| "loss": 1.0965, |
| "step": 1339 |
| }, |
| { |
| "epoch": 0.7048921620199895, |
| "grad_norm": 1.9698208570480347, |
| "learning_rate": 4.8530619489961664e-06, |
| "loss": 1.0486, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.70541820094687, |
| "grad_norm": 2.129383087158203, |
| "learning_rate": 4.85282721064932e-06, |
| "loss": 1.0857, |
| "step": 1341 |
| }, |
| { |
| "epoch": 0.7059442398737507, |
| "grad_norm": 2.2943053245544434, |
| "learning_rate": 4.852592290637244e-06, |
| "loss": 1.0628, |
| "step": 1342 |
| }, |
| { |
| "epoch": 0.7064702788006313, |
| "grad_norm": 2.0792641639709473, |
| "learning_rate": 4.852357188978075e-06, |
| "loss": 1.0604, |
| "step": 1343 |
| }, |
| { |
| "epoch": 0.7069963177275118, |
| "grad_norm": 2.0224812030792236, |
| "learning_rate": 4.852121905689968e-06, |
| "loss": 1.0687, |
| "step": 1344 |
| }, |
| { |
| "epoch": 0.7075223566543924, |
| "grad_norm": 2.4030919075012207, |
| "learning_rate": 4.851886440791087e-06, |
| "loss": 1.0942, |
| "step": 1345 |
| }, |
| { |
| "epoch": 0.708048395581273, |
| "grad_norm": 2.190215826034546, |
| "learning_rate": 4.851650794299614e-06, |
| "loss": 1.0393, |
| "step": 1346 |
| }, |
| { |
| "epoch": 0.7085744345081536, |
| "grad_norm": 2.1099565029144287, |
| "learning_rate": 4.851414966233743e-06, |
| "loss": 1.0452, |
| "step": 1347 |
| }, |
| { |
| "epoch": 0.7091004734350342, |
| "grad_norm": 2.156395673751831, |
| "learning_rate": 4.851178956611682e-06, |
| "loss": 1.0625, |
| "step": 1348 |
| }, |
| { |
| "epoch": 0.7096265123619148, |
| "grad_norm": 2.1840314865112305, |
| "learning_rate": 4.850942765451655e-06, |
| "loss": 1.0467, |
| "step": 1349 |
| }, |
| { |
| "epoch": 0.7101525512887954, |
| "grad_norm": 2.0080723762512207, |
| "learning_rate": 4.850706392771899e-06, |
| "loss": 1.0187, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.7106785902156759, |
| "grad_norm": 2.1242828369140625, |
| "learning_rate": 4.850469838590664e-06, |
| "loss": 1.0459, |
| "step": 1351 |
| }, |
| { |
| "epoch": 0.7112046291425566, |
| "grad_norm": 1.9652162790298462, |
| "learning_rate": 4.8502331029262125e-06, |
| "loss": 1.0404, |
| "step": 1352 |
| }, |
| { |
| "epoch": 0.7117306680694372, |
| "grad_norm": 2.2363545894622803, |
| "learning_rate": 4.849996185796827e-06, |
| "loss": 1.0182, |
| "step": 1353 |
| }, |
| { |
| "epoch": 0.7122567069963177, |
| "grad_norm": 2.028017044067383, |
| "learning_rate": 4.849759087220798e-06, |
| "loss": 1.0213, |
| "step": 1354 |
| }, |
| { |
| "epoch": 0.7127827459231983, |
| "grad_norm": 2.265037775039673, |
| "learning_rate": 4.849521807216432e-06, |
| "loss": 1.0316, |
| "step": 1355 |
| }, |
| { |
| "epoch": 0.7133087848500789, |
| "grad_norm": 2.083799362182617, |
| "learning_rate": 4.849284345802051e-06, |
| "loss": 1.0133, |
| "step": 1356 |
| }, |
| { |
| "epoch": 0.7138348237769595, |
| "grad_norm": 1.9307647943496704, |
| "learning_rate": 4.8490467029959895e-06, |
| "loss": 1.0023, |
| "step": 1357 |
| }, |
| { |
| "epoch": 0.7143608627038401, |
| "grad_norm": 2.1079766750335693, |
| "learning_rate": 4.848808878816595e-06, |
| "loss": 1.0208, |
| "step": 1358 |
| }, |
| { |
| "epoch": 0.7148869016307207, |
| "grad_norm": 2.0214877128601074, |
| "learning_rate": 4.8485708732822315e-06, |
| "loss": 0.9904, |
| "step": 1359 |
| }, |
| { |
| "epoch": 0.7154129405576013, |
| "grad_norm": 2.150768756866455, |
| "learning_rate": 4.848332686411276e-06, |
| "loss": 0.9969, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.7159389794844818, |
| "grad_norm": 2.0330607891082764, |
| "learning_rate": 4.8480943182221184e-06, |
| "loss": 0.9865, |
| "step": 1361 |
| }, |
| { |
| "epoch": 0.7164650184113625, |
| "grad_norm": 1.973970651626587, |
| "learning_rate": 4.847855768733163e-06, |
| "loss": 0.9815, |
| "step": 1362 |
| }, |
| { |
| "epoch": 0.716991057338243, |
| "grad_norm": 2.074868679046631, |
| "learning_rate": 4.84761703796283e-06, |
| "loss": 1.0499, |
| "step": 1363 |
| }, |
| { |
| "epoch": 0.7175170962651236, |
| "grad_norm": 1.9750478267669678, |
| "learning_rate": 4.8473781259295514e-06, |
| "loss": 0.9797, |
| "step": 1364 |
| }, |
| { |
| "epoch": 0.7180431351920042, |
| "grad_norm": 1.971375823020935, |
| "learning_rate": 4.847139032651774e-06, |
| "loss": 0.9805, |
| "step": 1365 |
| }, |
| { |
| "epoch": 0.7185691741188848, |
| "grad_norm": 2.0710880756378174, |
| "learning_rate": 4.846899758147958e-06, |
| "loss": 1.0143, |
| "step": 1366 |
| }, |
| { |
| "epoch": 0.7190952130457654, |
| "grad_norm": 1.9696688652038574, |
| "learning_rate": 4.8466603024365785e-06, |
| "loss": 0.9869, |
| "step": 1367 |
| }, |
| { |
| "epoch": 0.7196212519726459, |
| "grad_norm": 2.1022462844848633, |
| "learning_rate": 4.846420665536126e-06, |
| "loss": 1.0048, |
| "step": 1368 |
| }, |
| { |
| "epoch": 0.7201472908995266, |
| "grad_norm": 2.164783000946045, |
| "learning_rate": 4.8461808474651e-06, |
| "loss": 1.0114, |
| "step": 1369 |
| }, |
| { |
| "epoch": 0.7206733298264072, |
| "grad_norm": 2.0148744583129883, |
| "learning_rate": 4.845940848242019e-06, |
| "loss": 1.0232, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.7211993687532877, |
| "grad_norm": 2.0193605422973633, |
| "learning_rate": 4.845700667885414e-06, |
| "loss": 0.9764, |
| "step": 1371 |
| }, |
| { |
| "epoch": 0.7217254076801684, |
| "grad_norm": 2.005157232284546, |
| "learning_rate": 4.845460306413829e-06, |
| "loss": 1.0242, |
| "step": 1372 |
| }, |
| { |
| "epoch": 0.7222514466070489, |
| "grad_norm": 2.128805637359619, |
| "learning_rate": 4.845219763845823e-06, |
| "loss": 0.9964, |
| "step": 1373 |
| }, |
| { |
| "epoch": 0.7227774855339295, |
| "grad_norm": 1.9924060106277466, |
| "learning_rate": 4.844979040199968e-06, |
| "loss": 1.0185, |
| "step": 1374 |
| }, |
| { |
| "epoch": 0.7233035244608101, |
| "grad_norm": 2.2126121520996094, |
| "learning_rate": 4.844738135494851e-06, |
| "loss": 1.0013, |
| "step": 1375 |
| }, |
| { |
| "epoch": 0.7238295633876907, |
| "grad_norm": 2.0494630336761475, |
| "learning_rate": 4.844497049749073e-06, |
| "loss": 1.0628, |
| "step": 1376 |
| }, |
| { |
| "epoch": 0.7243556023145713, |
| "grad_norm": 2.4115402698516846, |
| "learning_rate": 4.844255782981249e-06, |
| "loss": 1.0623, |
| "step": 1377 |
| }, |
| { |
| "epoch": 0.7248816412414518, |
| "grad_norm": 2.062485933303833, |
| "learning_rate": 4.8440143352100054e-06, |
| "loss": 1.0115, |
| "step": 1378 |
| }, |
| { |
| "epoch": 0.7254076801683325, |
| "grad_norm": 2.995894432067871, |
| "learning_rate": 4.843772706453988e-06, |
| "loss": 1.0805, |
| "step": 1379 |
| }, |
| { |
| "epoch": 0.7259337190952131, |
| "grad_norm": 1.9974204301834106, |
| "learning_rate": 4.84353089673185e-06, |
| "loss": 1.0221, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.7264597580220936, |
| "grad_norm": 2.1927318572998047, |
| "learning_rate": 4.843288906062264e-06, |
| "loss": 1.0273, |
| "step": 1381 |
| }, |
| { |
| "epoch": 0.7269857969489742, |
| "grad_norm": 2.0213675498962402, |
| "learning_rate": 4.8430467344639136e-06, |
| "loss": 0.968, |
| "step": 1382 |
| }, |
| { |
| "epoch": 0.7275118358758548, |
| "grad_norm": 2.2534306049346924, |
| "learning_rate": 4.842804381955497e-06, |
| "loss": 1.0457, |
| "step": 1383 |
| }, |
| { |
| "epoch": 0.7280378748027354, |
| "grad_norm": 2.003638505935669, |
| "learning_rate": 4.842561848555728e-06, |
| "loss": 1.0471, |
| "step": 1384 |
| }, |
| { |
| "epoch": 0.728563913729616, |
| "grad_norm": 2.217237949371338, |
| "learning_rate": 4.842319134283331e-06, |
| "loss": 1.0348, |
| "step": 1385 |
| }, |
| { |
| "epoch": 0.7290899526564966, |
| "grad_norm": 2.1162800788879395, |
| "learning_rate": 4.842076239157047e-06, |
| "loss": 1.0548, |
| "step": 1386 |
| }, |
| { |
| "epoch": 0.7296159915833772, |
| "grad_norm": 2.043252944946289, |
| "learning_rate": 4.8418331631956325e-06, |
| "loss": 1.0931, |
| "step": 1387 |
| }, |
| { |
| "epoch": 0.7301420305102577, |
| "grad_norm": 2.099283218383789, |
| "learning_rate": 4.841589906417853e-06, |
| "loss": 1.0059, |
| "step": 1388 |
| }, |
| { |
| "epoch": 0.7306680694371384, |
| "grad_norm": 1.9934890270233154, |
| "learning_rate": 4.8413464688424904e-06, |
| "loss": 1.0327, |
| "step": 1389 |
| }, |
| { |
| "epoch": 0.731194108364019, |
| "grad_norm": 1.868202567100525, |
| "learning_rate": 4.841102850488343e-06, |
| "loss": 0.9622, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.7317201472908995, |
| "grad_norm": 1.9592076539993286, |
| "learning_rate": 4.84085905137422e-06, |
| "loss": 1.0413, |
| "step": 1391 |
| }, |
| { |
| "epoch": 0.7322461862177801, |
| "grad_norm": 2.0478546619415283, |
| "learning_rate": 4.840615071518946e-06, |
| "loss": 1.0343, |
| "step": 1392 |
| }, |
| { |
| "epoch": 0.7327722251446607, |
| "grad_norm": 2.4996554851531982, |
| "learning_rate": 4.840370910941358e-06, |
| "loss": 1.1106, |
| "step": 1393 |
| }, |
| { |
| "epoch": 0.7332982640715413, |
| "grad_norm": 2.0023233890533447, |
| "learning_rate": 4.8401265696603085e-06, |
| "loss": 1.0273, |
| "step": 1394 |
| }, |
| { |
| "epoch": 0.7338243029984218, |
| "grad_norm": 2.0366029739379883, |
| "learning_rate": 4.8398820476946625e-06, |
| "loss": 1.0092, |
| "step": 1395 |
| }, |
| { |
| "epoch": 0.7343503419253025, |
| "grad_norm": 2.2142248153686523, |
| "learning_rate": 4.839637345063302e-06, |
| "loss": 0.9884, |
| "step": 1396 |
| }, |
| { |
| "epoch": 0.7348763808521831, |
| "grad_norm": 1.9955226182937622, |
| "learning_rate": 4.839392461785119e-06, |
| "loss": 1.054, |
| "step": 1397 |
| }, |
| { |
| "epoch": 0.7354024197790636, |
| "grad_norm": 2.0607223510742188, |
| "learning_rate": 4.839147397879023e-06, |
| "loss": 0.9826, |
| "step": 1398 |
| }, |
| { |
| "epoch": 0.7359284587059443, |
| "grad_norm": 2.054483652114868, |
| "learning_rate": 4.8389021533639345e-06, |
| "loss": 1.0738, |
| "step": 1399 |
| }, |
| { |
| "epoch": 0.7364544976328248, |
| "grad_norm": 2.1066908836364746, |
| "learning_rate": 4.8386567282587886e-06, |
| "loss": 1.0937, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.7369805365597054, |
| "grad_norm": 2.018155097961426, |
| "learning_rate": 4.8384111225825355e-06, |
| "loss": 0.9767, |
| "step": 1401 |
| }, |
| { |
| "epoch": 0.737506575486586, |
| "grad_norm": 2.152189016342163, |
| "learning_rate": 4.83816533635414e-06, |
| "loss": 1.0062, |
| "step": 1402 |
| }, |
| { |
| "epoch": 0.7380326144134666, |
| "grad_norm": 1.9946335554122925, |
| "learning_rate": 4.8379193695925785e-06, |
| "loss": 1.0724, |
| "step": 1403 |
| }, |
| { |
| "epoch": 0.7385586533403472, |
| "grad_norm": 2.077017307281494, |
| "learning_rate": 4.837673222316843e-06, |
| "loss": 1.0991, |
| "step": 1404 |
| }, |
| { |
| "epoch": 0.7390846922672277, |
| "grad_norm": 2.0850563049316406, |
| "learning_rate": 4.837426894545938e-06, |
| "loss": 1.0527, |
| "step": 1405 |
| }, |
| { |
| "epoch": 0.7396107311941084, |
| "grad_norm": 1.9786406755447388, |
| "learning_rate": 4.837180386298883e-06, |
| "loss": 0.9666, |
| "step": 1406 |
| }, |
| { |
| "epoch": 0.740136770120989, |
| "grad_norm": 2.0060155391693115, |
| "learning_rate": 4.836933697594711e-06, |
| "loss": 1.0795, |
| "step": 1407 |
| }, |
| { |
| "epoch": 0.7406628090478695, |
| "grad_norm": 2.086906909942627, |
| "learning_rate": 4.836686828452471e-06, |
| "loss": 0.9925, |
| "step": 1408 |
| }, |
| { |
| "epoch": 0.7411888479747502, |
| "grad_norm": 2.0125632286071777, |
| "learning_rate": 4.836439778891223e-06, |
| "loss": 0.9706, |
| "step": 1409 |
| }, |
| { |
| "epoch": 0.7417148869016307, |
| "grad_norm": 1.8921434879302979, |
| "learning_rate": 4.836192548930041e-06, |
| "loss": 1.0237, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.7422409258285113, |
| "grad_norm": 1.9400858879089355, |
| "learning_rate": 4.835945138588015e-06, |
| "loss": 1.0444, |
| "step": 1411 |
| }, |
| { |
| "epoch": 0.742766964755392, |
| "grad_norm": 2.083749294281006, |
| "learning_rate": 4.835697547884248e-06, |
| "loss": 1.0136, |
| "step": 1412 |
| }, |
| { |
| "epoch": 0.7432930036822725, |
| "grad_norm": 2.0750844478607178, |
| "learning_rate": 4.8354497768378575e-06, |
| "loss": 1.0863, |
| "step": 1413 |
| }, |
| { |
| "epoch": 0.7438190426091531, |
| "grad_norm": 2.137214183807373, |
| "learning_rate": 4.835201825467973e-06, |
| "loss": 1.0095, |
| "step": 1414 |
| }, |
| { |
| "epoch": 0.7443450815360336, |
| "grad_norm": 2.06549072265625, |
| "learning_rate": 4.834953693793739e-06, |
| "loss": 1.0449, |
| "step": 1415 |
| }, |
| { |
| "epoch": 0.7448711204629143, |
| "grad_norm": 2.0396728515625, |
| "learning_rate": 4.834705381834315e-06, |
| "loss": 1.0093, |
| "step": 1416 |
| }, |
| { |
| "epoch": 0.7453971593897949, |
| "grad_norm": 1.993697166442871, |
| "learning_rate": 4.834456889608874e-06, |
| "loss": 1.0075, |
| "step": 1417 |
| }, |
| { |
| "epoch": 0.7459231983166754, |
| "grad_norm": 2.1017816066741943, |
| "learning_rate": 4.834208217136601e-06, |
| "loss": 1.0687, |
| "step": 1418 |
| }, |
| { |
| "epoch": 0.746449237243556, |
| "grad_norm": 2.0740413665771484, |
| "learning_rate": 4.833959364436698e-06, |
| "loss": 0.9777, |
| "step": 1419 |
| }, |
| { |
| "epoch": 0.7469752761704366, |
| "grad_norm": 2.0858206748962402, |
| "learning_rate": 4.833710331528377e-06, |
| "loss": 1.044, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.7475013150973172, |
| "grad_norm": 2.33298921585083, |
| "learning_rate": 4.833461118430869e-06, |
| "loss": 1.0602, |
| "step": 1421 |
| }, |
| { |
| "epoch": 0.7480273540241978, |
| "grad_norm": 2.1458897590637207, |
| "learning_rate": 4.833211725163414e-06, |
| "loss": 0.9903, |
| "step": 1422 |
| }, |
| { |
| "epoch": 0.7485533929510784, |
| "grad_norm": 2.15071177482605, |
| "learning_rate": 4.8329621517452685e-06, |
| "loss": 1.011, |
| "step": 1423 |
| }, |
| { |
| "epoch": 0.749079431877959, |
| "grad_norm": 2.0375895500183105, |
| "learning_rate": 4.8327123981957025e-06, |
| "loss": 1.0021, |
| "step": 1424 |
| }, |
| { |
| "epoch": 0.7496054708048395, |
| "grad_norm": 1.9808685779571533, |
| "learning_rate": 4.832462464534e-06, |
| "loss": 1.025, |
| "step": 1425 |
| }, |
| { |
| "epoch": 0.7501315097317202, |
| "grad_norm": 2.046558380126953, |
| "learning_rate": 4.832212350779459e-06, |
| "loss": 1.0435, |
| "step": 1426 |
| }, |
| { |
| "epoch": 0.7506575486586007, |
| "grad_norm": 2.0020248889923096, |
| "learning_rate": 4.831962056951392e-06, |
| "loss": 1.0207, |
| "step": 1427 |
| }, |
| { |
| "epoch": 0.7511835875854813, |
| "grad_norm": 1.9901740550994873, |
| "learning_rate": 4.831711583069122e-06, |
| "loss": 1.0505, |
| "step": 1428 |
| }, |
| { |
| "epoch": 0.751709626512362, |
| "grad_norm": 2.112236738204956, |
| "learning_rate": 4.83146092915199e-06, |
| "loss": 1.0353, |
| "step": 1429 |
| }, |
| { |
| "epoch": 0.7522356654392425, |
| "grad_norm": 2.0244028568267822, |
| "learning_rate": 4.831210095219349e-06, |
| "loss": 1.0169, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.7527617043661231, |
| "grad_norm": 2.298645257949829, |
| "learning_rate": 4.830959081290567e-06, |
| "loss": 1.0498, |
| "step": 1431 |
| }, |
| { |
| "epoch": 0.7532877432930036, |
| "grad_norm": 2.1593234539031982, |
| "learning_rate": 4.8307078873850244e-06, |
| "loss": 1.0954, |
| "step": 1432 |
| }, |
| { |
| "epoch": 0.7538137822198843, |
| "grad_norm": 1.9387123584747314, |
| "learning_rate": 4.830456513522117e-06, |
| "loss": 0.9784, |
| "step": 1433 |
| }, |
| { |
| "epoch": 0.7543398211467649, |
| "grad_norm": 2.1634531021118164, |
| "learning_rate": 4.830204959721253e-06, |
| "loss": 1.0516, |
| "step": 1434 |
| }, |
| { |
| "epoch": 0.7548658600736454, |
| "grad_norm": 1.9310704469680786, |
| "learning_rate": 4.829953226001855e-06, |
| "loss": 0.9648, |
| "step": 1435 |
| }, |
| { |
| "epoch": 0.7553918990005261, |
| "grad_norm": 2.0547149181365967, |
| "learning_rate": 4.8297013123833605e-06, |
| "loss": 1.0734, |
| "step": 1436 |
| }, |
| { |
| "epoch": 0.7559179379274066, |
| "grad_norm": 2.222872734069824, |
| "learning_rate": 4.829449218885219e-06, |
| "loss": 0.9645, |
| "step": 1437 |
| }, |
| { |
| "epoch": 0.7564439768542872, |
| "grad_norm": 2.128120183944702, |
| "learning_rate": 4.829196945526897e-06, |
| "loss": 1.046, |
| "step": 1438 |
| }, |
| { |
| "epoch": 0.7569700157811678, |
| "grad_norm": 2.0309526920318604, |
| "learning_rate": 4.828944492327872e-06, |
| "loss": 1.0596, |
| "step": 1439 |
| }, |
| { |
| "epoch": 0.7574960547080484, |
| "grad_norm": 2.0946176052093506, |
| "learning_rate": 4.828691859307635e-06, |
| "loss": 1.0134, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.758022093634929, |
| "grad_norm": 1.9159823656082153, |
| "learning_rate": 4.828439046485693e-06, |
| "loss": 1.0081, |
| "step": 1441 |
| }, |
| { |
| "epoch": 0.7585481325618095, |
| "grad_norm": 2.203627586364746, |
| "learning_rate": 4.828186053881566e-06, |
| "loss": 1.0451, |
| "step": 1442 |
| }, |
| { |
| "epoch": 0.7590741714886902, |
| "grad_norm": 2.065521240234375, |
| "learning_rate": 4.8279328815147895e-06, |
| "loss": 1.0289, |
| "step": 1443 |
| }, |
| { |
| "epoch": 0.7596002104155708, |
| "grad_norm": 2.1597719192504883, |
| "learning_rate": 4.827679529404909e-06, |
| "loss": 1.0373, |
| "step": 1444 |
| }, |
| { |
| "epoch": 0.7601262493424513, |
| "grad_norm": 2.6100237369537354, |
| "learning_rate": 4.827425997571488e-06, |
| "loss": 1.0254, |
| "step": 1445 |
| }, |
| { |
| "epoch": 0.760652288269332, |
| "grad_norm": 2.1975550651550293, |
| "learning_rate": 4.8271722860341e-06, |
| "loss": 1.0254, |
| "step": 1446 |
| }, |
| { |
| "epoch": 0.7611783271962125, |
| "grad_norm": 2.019261360168457, |
| "learning_rate": 4.826918394812336e-06, |
| "loss": 1.0823, |
| "step": 1447 |
| }, |
| { |
| "epoch": 0.7617043661230931, |
| "grad_norm": 1.9351961612701416, |
| "learning_rate": 4.8266643239257996e-06, |
| "loss": 1.0248, |
| "step": 1448 |
| }, |
| { |
| "epoch": 0.7622304050499737, |
| "grad_norm": 1.9437129497528076, |
| "learning_rate": 4.826410073394106e-06, |
| "loss": 0.984, |
| "step": 1449 |
| }, |
| { |
| "epoch": 0.7627564439768543, |
| "grad_norm": 2.277479887008667, |
| "learning_rate": 4.826155643236889e-06, |
| "loss": 1.0264, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.7632824829037349, |
| "grad_norm": 2.2033772468566895, |
| "learning_rate": 4.825901033473791e-06, |
| "loss": 1.0249, |
| "step": 1451 |
| }, |
| { |
| "epoch": 0.7638085218306154, |
| "grad_norm": 2.1912593841552734, |
| "learning_rate": 4.825646244124472e-06, |
| "loss": 1.0366, |
| "step": 1452 |
| }, |
| { |
| "epoch": 0.7643345607574961, |
| "grad_norm": 2.0046746730804443, |
| "learning_rate": 4.825391275208606e-06, |
| "loss": 1.0411, |
| "step": 1453 |
| }, |
| { |
| "epoch": 0.7648605996843767, |
| "grad_norm": 2.0601322650909424, |
| "learning_rate": 4.825136126745877e-06, |
| "loss": 1.052, |
| "step": 1454 |
| }, |
| { |
| "epoch": 0.7653866386112572, |
| "grad_norm": 2.148794651031494, |
| "learning_rate": 4.824880798755986e-06, |
| "loss": 1.04, |
| "step": 1455 |
| }, |
| { |
| "epoch": 0.7659126775381379, |
| "grad_norm": 2.027374505996704, |
| "learning_rate": 4.824625291258649e-06, |
| "loss": 1.005, |
| "step": 1456 |
| }, |
| { |
| "epoch": 0.7664387164650184, |
| "grad_norm": 2.0703351497650146, |
| "learning_rate": 4.824369604273592e-06, |
| "loss": 1.0157, |
| "step": 1457 |
| }, |
| { |
| "epoch": 0.766964755391899, |
| "grad_norm": 2.1002986431121826, |
| "learning_rate": 4.8241137378205575e-06, |
| "loss": 1.0355, |
| "step": 1458 |
| }, |
| { |
| "epoch": 0.7674907943187795, |
| "grad_norm": 1.9970546960830688, |
| "learning_rate": 4.823857691919302e-06, |
| "loss": 0.9833, |
| "step": 1459 |
| }, |
| { |
| "epoch": 0.7680168332456602, |
| "grad_norm": 2.0489771366119385, |
| "learning_rate": 4.823601466589595e-06, |
| "loss": 1.0351, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.7685428721725408, |
| "grad_norm": 2.0190834999084473, |
| "learning_rate": 4.823345061851219e-06, |
| "loss": 1.0406, |
| "step": 1461 |
| }, |
| { |
| "epoch": 0.7690689110994213, |
| "grad_norm": 2.0567877292633057, |
| "learning_rate": 4.823088477723973e-06, |
| "loss": 1.0593, |
| "step": 1462 |
| }, |
| { |
| "epoch": 0.769594950026302, |
| "grad_norm": 1.883132815361023, |
| "learning_rate": 4.822831714227667e-06, |
| "loss": 1.0055, |
| "step": 1463 |
| }, |
| { |
| "epoch": 0.7701209889531825, |
| "grad_norm": 1.9520277976989746, |
| "learning_rate": 4.822574771382127e-06, |
| "loss": 0.9831, |
| "step": 1464 |
| }, |
| { |
| "epoch": 0.7706470278800631, |
| "grad_norm": 2.0123813152313232, |
| "learning_rate": 4.822317649207191e-06, |
| "loss": 0.9841, |
| "step": 1465 |
| }, |
| { |
| "epoch": 0.7711730668069438, |
| "grad_norm": 2.089940309524536, |
| "learning_rate": 4.8220603477227124e-06, |
| "loss": 1.0121, |
| "step": 1466 |
| }, |
| { |
| "epoch": 0.7716991057338243, |
| "grad_norm": 1.9485499858856201, |
| "learning_rate": 4.8218028669485585e-06, |
| "loss": 0.9744, |
| "step": 1467 |
| }, |
| { |
| "epoch": 0.7722251446607049, |
| "grad_norm": 2.2764859199523926, |
| "learning_rate": 4.821545206904608e-06, |
| "loss": 1.0018, |
| "step": 1468 |
| }, |
| { |
| "epoch": 0.7727511835875854, |
| "grad_norm": 2.039769411087036, |
| "learning_rate": 4.821287367610756e-06, |
| "loss": 1.0256, |
| "step": 1469 |
| }, |
| { |
| "epoch": 0.7732772225144661, |
| "grad_norm": 2.0036065578460693, |
| "learning_rate": 4.821029349086911e-06, |
| "loss": 1.0399, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.7738032614413467, |
| "grad_norm": 2.056286573410034, |
| "learning_rate": 4.820771151352996e-06, |
| "loss": 1.0077, |
| "step": 1471 |
| }, |
| { |
| "epoch": 0.7743293003682272, |
| "grad_norm": 2.0001938343048096, |
| "learning_rate": 4.820512774428944e-06, |
| "loss": 1.0109, |
| "step": 1472 |
| }, |
| { |
| "epoch": 0.7748553392951079, |
| "grad_norm": 2.007289409637451, |
| "learning_rate": 4.820254218334707e-06, |
| "loss": 1.0223, |
| "step": 1473 |
| }, |
| { |
| "epoch": 0.7753813782219884, |
| "grad_norm": 2.079768657684326, |
| "learning_rate": 4.8199954830902465e-06, |
| "loss": 1.0565, |
| "step": 1474 |
| }, |
| { |
| "epoch": 0.775907417148869, |
| "grad_norm": 2.030198097229004, |
| "learning_rate": 4.819736568715543e-06, |
| "loss": 1.033, |
| "step": 1475 |
| }, |
| { |
| "epoch": 0.7764334560757497, |
| "grad_norm": 2.6482961177825928, |
| "learning_rate": 4.819477475230584e-06, |
| "loss": 1.0595, |
| "step": 1476 |
| }, |
| { |
| "epoch": 0.7769594950026302, |
| "grad_norm": 2.160472869873047, |
| "learning_rate": 4.8192182026553775e-06, |
| "loss": 1.0214, |
| "step": 1477 |
| }, |
| { |
| "epoch": 0.7774855339295108, |
| "grad_norm": 2.1956963539123535, |
| "learning_rate": 4.818958751009941e-06, |
| "loss": 1.0647, |
| "step": 1478 |
| }, |
| { |
| "epoch": 0.7780115728563913, |
| "grad_norm": 2.346040725708008, |
| "learning_rate": 4.818699120314306e-06, |
| "loss": 1.0289, |
| "step": 1479 |
| }, |
| { |
| "epoch": 0.778537611783272, |
| "grad_norm": 2.049593448638916, |
| "learning_rate": 4.818439310588521e-06, |
| "loss": 1.0188, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.7790636507101526, |
| "grad_norm": 1.9567065238952637, |
| "learning_rate": 4.818179321852646e-06, |
| "loss": 1.0645, |
| "step": 1481 |
| }, |
| { |
| "epoch": 0.7795896896370331, |
| "grad_norm": 2.0995101928710938, |
| "learning_rate": 4.817919154126753e-06, |
| "loss": 1.0283, |
| "step": 1482 |
| }, |
| { |
| "epoch": 0.7801157285639138, |
| "grad_norm": 2.117649555206299, |
| "learning_rate": 4.817658807430933e-06, |
| "loss": 0.9973, |
| "step": 1483 |
| }, |
| { |
| "epoch": 0.7806417674907943, |
| "grad_norm": 2.058525800704956, |
| "learning_rate": 4.817398281785286e-06, |
| "loss": 1.0278, |
| "step": 1484 |
| }, |
| { |
| "epoch": 0.7811678064176749, |
| "grad_norm": 1.9914313554763794, |
| "learning_rate": 4.817137577209927e-06, |
| "loss": 0.9591, |
| "step": 1485 |
| }, |
| { |
| "epoch": 0.7816938453445555, |
| "grad_norm": 1.9432276487350464, |
| "learning_rate": 4.816876693724987e-06, |
| "loss": 0.9964, |
| "step": 1486 |
| }, |
| { |
| "epoch": 0.7822198842714361, |
| "grad_norm": 2.011399507522583, |
| "learning_rate": 4.816615631350608e-06, |
| "loss": 0.9963, |
| "step": 1487 |
| }, |
| { |
| "epoch": 0.7827459231983167, |
| "grad_norm": 1.9606966972351074, |
| "learning_rate": 4.816354390106947e-06, |
| "loss": 0.9756, |
| "step": 1488 |
| }, |
| { |
| "epoch": 0.7832719621251972, |
| "grad_norm": 2.011887788772583, |
| "learning_rate": 4.816092970014176e-06, |
| "loss": 1.0194, |
| "step": 1489 |
| }, |
| { |
| "epoch": 0.7837980010520779, |
| "grad_norm": 2.0520918369293213, |
| "learning_rate": 4.815831371092478e-06, |
| "loss": 1.02, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.7843240399789585, |
| "grad_norm": 2.018293619155884, |
| "learning_rate": 4.815569593362053e-06, |
| "loss": 1.0289, |
| "step": 1491 |
| }, |
| { |
| "epoch": 0.784850078905839, |
| "grad_norm": 2.016738176345825, |
| "learning_rate": 4.815307636843112e-06, |
| "loss": 1.0523, |
| "step": 1492 |
| }, |
| { |
| "epoch": 0.7853761178327197, |
| "grad_norm": 2.063619375228882, |
| "learning_rate": 4.815045501555882e-06, |
| "loss": 1.0099, |
| "step": 1493 |
| }, |
| { |
| "epoch": 0.7859021567596002, |
| "grad_norm": 2.122360944747925, |
| "learning_rate": 4.814783187520602e-06, |
| "loss": 1.0346, |
| "step": 1494 |
| }, |
| { |
| "epoch": 0.7864281956864808, |
| "grad_norm": 2.040095329284668, |
| "learning_rate": 4.814520694757526e-06, |
| "loss": 1.0017, |
| "step": 1495 |
| }, |
| { |
| "epoch": 0.7869542346133613, |
| "grad_norm": 2.003471612930298, |
| "learning_rate": 4.814258023286922e-06, |
| "loss": 0.975, |
| "step": 1496 |
| }, |
| { |
| "epoch": 0.787480273540242, |
| "grad_norm": 1.905517816543579, |
| "learning_rate": 4.81399517312907e-06, |
| "loss": 0.9899, |
| "step": 1497 |
| }, |
| { |
| "epoch": 0.7880063124671226, |
| "grad_norm": 2.047112226486206, |
| "learning_rate": 4.813732144304266e-06, |
| "loss": 0.9558, |
| "step": 1498 |
| }, |
| { |
| "epoch": 0.7885323513940031, |
| "grad_norm": 1.9621355533599854, |
| "learning_rate": 4.8134689368328194e-06, |
| "loss": 1.0668, |
| "step": 1499 |
| }, |
| { |
| "epoch": 0.7890583903208838, |
| "grad_norm": 1.9221957921981812, |
| "learning_rate": 4.813205550735052e-06, |
| "loss": 1.0082, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.7895844292477643, |
| "grad_norm": 2.002659797668457, |
| "learning_rate": 4.812941986031299e-06, |
| "loss": 1.0192, |
| "step": 1501 |
| }, |
| { |
| "epoch": 0.7901104681746449, |
| "grad_norm": 2.1077136993408203, |
| "learning_rate": 4.812678242741913e-06, |
| "loss": 1.0316, |
| "step": 1502 |
| }, |
| { |
| "epoch": 0.7906365071015256, |
| "grad_norm": 2.0782320499420166, |
| "learning_rate": 4.812414320887256e-06, |
| "loss": 1.058, |
| "step": 1503 |
| }, |
| { |
| "epoch": 0.7911625460284061, |
| "grad_norm": 2.049888849258423, |
| "learning_rate": 4.812150220487708e-06, |
| "loss": 1.0033, |
| "step": 1504 |
| }, |
| { |
| "epoch": 0.7916885849552867, |
| "grad_norm": 2.025468587875366, |
| "learning_rate": 4.811885941563659e-06, |
| "loss": 1.0066, |
| "step": 1505 |
| }, |
| { |
| "epoch": 0.7922146238821672, |
| "grad_norm": 2.0612878799438477, |
| "learning_rate": 4.8116214841355145e-06, |
| "loss": 0.9783, |
| "step": 1506 |
| }, |
| { |
| "epoch": 0.7927406628090479, |
| "grad_norm": 1.9370075464248657, |
| "learning_rate": 4.811356848223693e-06, |
| "loss": 1.0171, |
| "step": 1507 |
| }, |
| { |
| "epoch": 0.7932667017359285, |
| "grad_norm": 2.069326877593994, |
| "learning_rate": 4.8110920338486285e-06, |
| "loss": 1.0283, |
| "step": 1508 |
| }, |
| { |
| "epoch": 0.793792740662809, |
| "grad_norm": 2.076786518096924, |
| "learning_rate": 4.810827041030768e-06, |
| "loss": 0.9942, |
| "step": 1509 |
| }, |
| { |
| "epoch": 0.7943187795896897, |
| "grad_norm": 1.8861708641052246, |
| "learning_rate": 4.810561869790571e-06, |
| "loss": 0.9909, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.7948448185165702, |
| "grad_norm": 2.064493417739868, |
| "learning_rate": 4.810296520148513e-06, |
| "loss": 1.0302, |
| "step": 1511 |
| }, |
| { |
| "epoch": 0.7953708574434508, |
| "grad_norm": 2.0212459564208984, |
| "learning_rate": 4.810030992125081e-06, |
| "loss": 0.9912, |
| "step": 1512 |
| }, |
| { |
| "epoch": 0.7958968963703315, |
| "grad_norm": 2.047384023666382, |
| "learning_rate": 4.809765285740776e-06, |
| "loss": 1.0165, |
| "step": 1513 |
| }, |
| { |
| "epoch": 0.796422935297212, |
| "grad_norm": 2.2222740650177, |
| "learning_rate": 4.809499401016115e-06, |
| "loss": 1.0295, |
| "step": 1514 |
| }, |
| { |
| "epoch": 0.7969489742240926, |
| "grad_norm": 1.9516112804412842, |
| "learning_rate": 4.809233337971627e-06, |
| "loss": 0.9562, |
| "step": 1515 |
| }, |
| { |
| "epoch": 0.7974750131509731, |
| "grad_norm": 2.0002121925354004, |
| "learning_rate": 4.808967096627855e-06, |
| "loss": 1.0076, |
| "step": 1516 |
| }, |
| { |
| "epoch": 0.7980010520778538, |
| "grad_norm": 2.182039260864258, |
| "learning_rate": 4.808700677005357e-06, |
| "loss": 0.9925, |
| "step": 1517 |
| }, |
| { |
| "epoch": 0.7985270910047344, |
| "grad_norm": 2.0578761100769043, |
| "learning_rate": 4.808434079124701e-06, |
| "loss": 0.9831, |
| "step": 1518 |
| }, |
| { |
| "epoch": 0.7990531299316149, |
| "grad_norm": 1.8856642246246338, |
| "learning_rate": 4.8081673030064735e-06, |
| "loss": 1.0309, |
| "step": 1519 |
| }, |
| { |
| "epoch": 0.7995791688584956, |
| "grad_norm": 2.1273880004882812, |
| "learning_rate": 4.807900348671272e-06, |
| "loss": 1.0581, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.8001052077853761, |
| "grad_norm": 2.0696675777435303, |
| "learning_rate": 4.8076332161397085e-06, |
| "loss": 1.0402, |
| "step": 1521 |
| }, |
| { |
| "epoch": 0.8006312467122567, |
| "grad_norm": 2.034176826477051, |
| "learning_rate": 4.80736590543241e-06, |
| "loss": 1.018, |
| "step": 1522 |
| }, |
| { |
| "epoch": 0.8011572856391374, |
| "grad_norm": 1.9405510425567627, |
| "learning_rate": 4.807098416570014e-06, |
| "loss": 1.037, |
| "step": 1523 |
| }, |
| { |
| "epoch": 0.8016833245660179, |
| "grad_norm": 2.0185844898223877, |
| "learning_rate": 4.806830749573174e-06, |
| "loss": 1.0817, |
| "step": 1524 |
| }, |
| { |
| "epoch": 0.8022093634928985, |
| "grad_norm": 2.0617692470550537, |
| "learning_rate": 4.806562904462559e-06, |
| "loss": 0.989, |
| "step": 1525 |
| }, |
| { |
| "epoch": 0.802735402419779, |
| "grad_norm": 2.022000789642334, |
| "learning_rate": 4.806294881258846e-06, |
| "loss": 1.0245, |
| "step": 1526 |
| }, |
| { |
| "epoch": 0.8032614413466597, |
| "grad_norm": 2.189361572265625, |
| "learning_rate": 4.806026679982733e-06, |
| "loss": 1.0537, |
| "step": 1527 |
| }, |
| { |
| "epoch": 0.8037874802735402, |
| "grad_norm": 2.0094563961029053, |
| "learning_rate": 4.805758300654926e-06, |
| "loss": 1.0437, |
| "step": 1528 |
| }, |
| { |
| "epoch": 0.8043135192004208, |
| "grad_norm": 1.8940585851669312, |
| "learning_rate": 4.805489743296148e-06, |
| "loss": 0.9811, |
| "step": 1529 |
| }, |
| { |
| "epoch": 0.8048395581273015, |
| "grad_norm": 2.0169241428375244, |
| "learning_rate": 4.805221007927134e-06, |
| "loss": 1.0354, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.805365597054182, |
| "grad_norm": 2.1269545555114746, |
| "learning_rate": 4.804952094568635e-06, |
| "loss": 1.0439, |
| "step": 1531 |
| }, |
| { |
| "epoch": 0.8058916359810626, |
| "grad_norm": 1.99850332736969, |
| "learning_rate": 4.804683003241413e-06, |
| "loss": 1.0313, |
| "step": 1532 |
| }, |
| { |
| "epoch": 0.8064176749079431, |
| "grad_norm": 2.0577683448791504, |
| "learning_rate": 4.804413733966244e-06, |
| "loss": 1.0319, |
| "step": 1533 |
| }, |
| { |
| "epoch": 0.8069437138348238, |
| "grad_norm": 1.993945837020874, |
| "learning_rate": 4.804144286763921e-06, |
| "loss": 1.0571, |
| "step": 1534 |
| }, |
| { |
| "epoch": 0.8074697527617044, |
| "grad_norm": 2.00144624710083, |
| "learning_rate": 4.803874661655246e-06, |
| "loss": 1.0136, |
| "step": 1535 |
| }, |
| { |
| "epoch": 0.8079957916885849, |
| "grad_norm": 2.114583969116211, |
| "learning_rate": 4.8036048586610394e-06, |
| "loss": 0.9996, |
| "step": 1536 |
| }, |
| { |
| "epoch": 0.8085218306154656, |
| "grad_norm": 2.019767999649048, |
| "learning_rate": 4.803334877802131e-06, |
| "loss": 0.9812, |
| "step": 1537 |
| }, |
| { |
| "epoch": 0.8090478695423461, |
| "grad_norm": 2.1253437995910645, |
| "learning_rate": 4.803064719099368e-06, |
| "loss": 1.041, |
| "step": 1538 |
| }, |
| { |
| "epoch": 0.8095739084692267, |
| "grad_norm": 2.055514335632324, |
| "learning_rate": 4.802794382573609e-06, |
| "loss": 0.9733, |
| "step": 1539 |
| }, |
| { |
| "epoch": 0.8100999473961074, |
| "grad_norm": 2.0274434089660645, |
| "learning_rate": 4.802523868245727e-06, |
| "loss": 1.0222, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.8106259863229879, |
| "grad_norm": 2.1663291454315186, |
| "learning_rate": 4.80225317613661e-06, |
| "loss": 1.0308, |
| "step": 1541 |
| }, |
| { |
| "epoch": 0.8111520252498685, |
| "grad_norm": 1.8864918947219849, |
| "learning_rate": 4.801982306267156e-06, |
| "loss": 0.9551, |
| "step": 1542 |
| }, |
| { |
| "epoch": 0.811678064176749, |
| "grad_norm": 2.1302011013031006, |
| "learning_rate": 4.801711258658281e-06, |
| "loss": 1.0188, |
| "step": 1543 |
| }, |
| { |
| "epoch": 0.8122041031036297, |
| "grad_norm": 1.9002829790115356, |
| "learning_rate": 4.801440033330914e-06, |
| "loss": 1.0278, |
| "step": 1544 |
| }, |
| { |
| "epoch": 0.8127301420305103, |
| "grad_norm": 2.1114113330841064, |
| "learning_rate": 4.801168630305995e-06, |
| "loss": 1.0616, |
| "step": 1545 |
| }, |
| { |
| "epoch": 0.8132561809573908, |
| "grad_norm": 1.9383304119110107, |
| "learning_rate": 4.800897049604479e-06, |
| "loss": 0.9977, |
| "step": 1546 |
| }, |
| { |
| "epoch": 0.8137822198842715, |
| "grad_norm": 1.9206221103668213, |
| "learning_rate": 4.800625291247338e-06, |
| "loss": 0.9758, |
| "step": 1547 |
| }, |
| { |
| "epoch": 0.814308258811152, |
| "grad_norm": 1.9258513450622559, |
| "learning_rate": 4.800353355255552e-06, |
| "loss": 0.985, |
| "step": 1548 |
| }, |
| { |
| "epoch": 0.8148342977380326, |
| "grad_norm": 1.9767898321151733, |
| "learning_rate": 4.800081241650117e-06, |
| "loss": 0.9802, |
| "step": 1549 |
| }, |
| { |
| "epoch": 0.8153603366649133, |
| "grad_norm": 1.9899487495422363, |
| "learning_rate": 4.799808950452047e-06, |
| "loss": 1.0104, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.8158863755917938, |
| "grad_norm": 1.9970616102218628, |
| "learning_rate": 4.799536481682362e-06, |
| "loss": 1.0125, |
| "step": 1551 |
| }, |
| { |
| "epoch": 0.8164124145186744, |
| "grad_norm": 1.9914542436599731, |
| "learning_rate": 4.799263835362103e-06, |
| "loss": 1.0458, |
| "step": 1552 |
| }, |
| { |
| "epoch": 0.8169384534455549, |
| "grad_norm": 2.072939157485962, |
| "learning_rate": 4.798991011512319e-06, |
| "loss": 1.0663, |
| "step": 1553 |
| }, |
| { |
| "epoch": 0.8174644923724356, |
| "grad_norm": 1.9783833026885986, |
| "learning_rate": 4.798718010154076e-06, |
| "loss": 1.0281, |
| "step": 1554 |
| }, |
| { |
| "epoch": 0.8179905312993162, |
| "grad_norm": 2.4431405067443848, |
| "learning_rate": 4.798444831308454e-06, |
| "loss": 1.0667, |
| "step": 1555 |
| }, |
| { |
| "epoch": 0.8185165702261967, |
| "grad_norm": 2.1270408630371094, |
| "learning_rate": 4.798171474996543e-06, |
| "loss": 1.0217, |
| "step": 1556 |
| }, |
| { |
| "epoch": 0.8190426091530774, |
| "grad_norm": 2.091042995452881, |
| "learning_rate": 4.797897941239452e-06, |
| "loss": 1.0126, |
| "step": 1557 |
| }, |
| { |
| "epoch": 0.8195686480799579, |
| "grad_norm": 2.016575336456299, |
| "learning_rate": 4.797624230058299e-06, |
| "loss": 1.0269, |
| "step": 1558 |
| }, |
| { |
| "epoch": 0.8200946870068385, |
| "grad_norm": 2.1780738830566406, |
| "learning_rate": 4.797350341474218e-06, |
| "loss": 1.0405, |
| "step": 1559 |
| }, |
| { |
| "epoch": 0.820620725933719, |
| "grad_norm": 2.0331525802612305, |
| "learning_rate": 4.797076275508358e-06, |
| "loss": 1.0452, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.8211467648605997, |
| "grad_norm": 2.0023865699768066, |
| "learning_rate": 4.796802032181877e-06, |
| "loss": 0.9752, |
| "step": 1561 |
| }, |
| { |
| "epoch": 0.8216728037874803, |
| "grad_norm": 2.11030912399292, |
| "learning_rate": 4.796527611515952e-06, |
| "loss": 1.0675, |
| "step": 1562 |
| }, |
| { |
| "epoch": 0.8221988427143608, |
| "grad_norm": 2.0733113288879395, |
| "learning_rate": 4.7962530135317705e-06, |
| "loss": 1.0511, |
| "step": 1563 |
| }, |
| { |
| "epoch": 0.8227248816412415, |
| "grad_norm": 2.0920655727386475, |
| "learning_rate": 4.795978238250535e-06, |
| "loss": 1.0797, |
| "step": 1564 |
| }, |
| { |
| "epoch": 0.823250920568122, |
| "grad_norm": 2.218693256378174, |
| "learning_rate": 4.795703285693461e-06, |
| "loss": 1.0385, |
| "step": 1565 |
| }, |
| { |
| "epoch": 0.8237769594950026, |
| "grad_norm": 1.9661623239517212, |
| "learning_rate": 4.795428155881779e-06, |
| "loss": 1.001, |
| "step": 1566 |
| }, |
| { |
| "epoch": 0.8243029984218833, |
| "grad_norm": 2.1669209003448486, |
| "learning_rate": 4.795152848836731e-06, |
| "loss": 1.0317, |
| "step": 1567 |
| }, |
| { |
| "epoch": 0.8248290373487638, |
| "grad_norm": 1.9323532581329346, |
| "learning_rate": 4.794877364579573e-06, |
| "loss": 1.0182, |
| "step": 1568 |
| }, |
| { |
| "epoch": 0.8253550762756444, |
| "grad_norm": 1.9551295042037964, |
| "learning_rate": 4.794601703131579e-06, |
| "loss": 1.0048, |
| "step": 1569 |
| }, |
| { |
| "epoch": 0.8258811152025249, |
| "grad_norm": 1.9809366464614868, |
| "learning_rate": 4.7943258645140285e-06, |
| "loss": 1.0377, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.8264071541294056, |
| "grad_norm": 2.0074756145477295, |
| "learning_rate": 4.794049848748224e-06, |
| "loss": 1.0218, |
| "step": 1571 |
| }, |
| { |
| "epoch": 0.8269331930562862, |
| "grad_norm": 2.0177736282348633, |
| "learning_rate": 4.793773655855474e-06, |
| "loss": 1.0402, |
| "step": 1572 |
| }, |
| { |
| "epoch": 0.8274592319831667, |
| "grad_norm": 2.0348360538482666, |
| "learning_rate": 4.7934972858571035e-06, |
| "loss": 1.0312, |
| "step": 1573 |
| }, |
| { |
| "epoch": 0.8279852709100474, |
| "grad_norm": 2.097808599472046, |
| "learning_rate": 4.793220738774455e-06, |
| "loss": 1.0618, |
| "step": 1574 |
| }, |
| { |
| "epoch": 0.8285113098369279, |
| "grad_norm": 2.061023473739624, |
| "learning_rate": 4.792944014628877e-06, |
| "loss": 1.0464, |
| "step": 1575 |
| }, |
| { |
| "epoch": 0.8290373487638085, |
| "grad_norm": 2.1510798931121826, |
| "learning_rate": 4.792667113441738e-06, |
| "loss": 1.0102, |
| "step": 1576 |
| }, |
| { |
| "epoch": 0.8295633876906892, |
| "grad_norm": 2.1446409225463867, |
| "learning_rate": 4.7923900352344185e-06, |
| "loss": 1.0577, |
| "step": 1577 |
| }, |
| { |
| "epoch": 0.8300894266175697, |
| "grad_norm": 2.2582831382751465, |
| "learning_rate": 4.79211278002831e-06, |
| "loss": 1.1042, |
| "step": 1578 |
| }, |
| { |
| "epoch": 0.8306154655444503, |
| "grad_norm": 2.0069401264190674, |
| "learning_rate": 4.791835347844821e-06, |
| "loss": 0.9835, |
| "step": 1579 |
| }, |
| { |
| "epoch": 0.8311415044713308, |
| "grad_norm": 2.0074360370635986, |
| "learning_rate": 4.791557738705372e-06, |
| "loss": 1.0596, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.8316675433982115, |
| "grad_norm": 2.2237892150878906, |
| "learning_rate": 4.791279952631399e-06, |
| "loss": 1.0162, |
| "step": 1581 |
| }, |
| { |
| "epoch": 0.8321935823250921, |
| "grad_norm": 2.0037453174591064, |
| "learning_rate": 4.791001989644349e-06, |
| "loss": 0.9879, |
| "step": 1582 |
| }, |
| { |
| "epoch": 0.8327196212519726, |
| "grad_norm": 1.994869351387024, |
| "learning_rate": 4.790723849765684e-06, |
| "loss": 0.9908, |
| "step": 1583 |
| }, |
| { |
| "epoch": 0.8332456601788533, |
| "grad_norm": 2.1808955669403076, |
| "learning_rate": 4.790445533016879e-06, |
| "loss": 0.9896, |
| "step": 1584 |
| }, |
| { |
| "epoch": 0.8337716991057338, |
| "grad_norm": 1.9274131059646606, |
| "learning_rate": 4.790167039419424e-06, |
| "loss": 0.9383, |
| "step": 1585 |
| }, |
| { |
| "epoch": 0.8342977380326144, |
| "grad_norm": 2.0095322132110596, |
| "learning_rate": 4.789888368994823e-06, |
| "loss": 1.0282, |
| "step": 1586 |
| }, |
| { |
| "epoch": 0.8348237769594951, |
| "grad_norm": 1.957546353340149, |
| "learning_rate": 4.7896095217645895e-06, |
| "loss": 0.9559, |
| "step": 1587 |
| }, |
| { |
| "epoch": 0.8353498158863756, |
| "grad_norm": 2.1231918334960938, |
| "learning_rate": 4.789330497750258e-06, |
| "loss": 1.0414, |
| "step": 1588 |
| }, |
| { |
| "epoch": 0.8358758548132562, |
| "grad_norm": 2.0618984699249268, |
| "learning_rate": 4.789051296973368e-06, |
| "loss": 0.9931, |
| "step": 1589 |
| }, |
| { |
| "epoch": 0.8364018937401367, |
| "grad_norm": 2.023416042327881, |
| "learning_rate": 4.78877191945548e-06, |
| "loss": 0.963, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.8369279326670174, |
| "grad_norm": 2.0902810096740723, |
| "learning_rate": 4.788492365218164e-06, |
| "loss": 1.076, |
| "step": 1591 |
| }, |
| { |
| "epoch": 0.8374539715938979, |
| "grad_norm": 1.9094164371490479, |
| "learning_rate": 4.788212634283005e-06, |
| "loss": 0.9444, |
| "step": 1592 |
| }, |
| { |
| "epoch": 0.8379800105207785, |
| "grad_norm": 1.9887592792510986, |
| "learning_rate": 4.7879327266716e-06, |
| "loss": 1.0364, |
| "step": 1593 |
| }, |
| { |
| "epoch": 0.8385060494476592, |
| "grad_norm": 2.0019707679748535, |
| "learning_rate": 4.787652642405564e-06, |
| "loss": 1.0544, |
| "step": 1594 |
| }, |
| { |
| "epoch": 0.8390320883745397, |
| "grad_norm": 2.0776329040527344, |
| "learning_rate": 4.787372381506521e-06, |
| "loss": 0.9949, |
| "step": 1595 |
| }, |
| { |
| "epoch": 0.8395581273014203, |
| "grad_norm": 2.0091662406921387, |
| "learning_rate": 4.7870919439961094e-06, |
| "loss": 1.0165, |
| "step": 1596 |
| }, |
| { |
| "epoch": 0.8400841662283008, |
| "grad_norm": 2.0458288192749023, |
| "learning_rate": 4.786811329895984e-06, |
| "loss": 1.0341, |
| "step": 1597 |
| }, |
| { |
| "epoch": 0.8406102051551815, |
| "grad_norm": 2.0741751194000244, |
| "learning_rate": 4.78653053922781e-06, |
| "loss": 1.0509, |
| "step": 1598 |
| }, |
| { |
| "epoch": 0.8411362440820621, |
| "grad_norm": 2.141406774520874, |
| "learning_rate": 4.7862495720132695e-06, |
| "loss": 1.0665, |
| "step": 1599 |
| }, |
| { |
| "epoch": 0.8416622830089426, |
| "grad_norm": 2.2400975227355957, |
| "learning_rate": 4.785968428274055e-06, |
| "loss": 0.93, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.8421883219358233, |
| "grad_norm": 1.929742455482483, |
| "learning_rate": 4.785687108031875e-06, |
| "loss": 1.0339, |
| "step": 1601 |
| }, |
| { |
| "epoch": 0.8427143608627038, |
| "grad_norm": 2.012728452682495, |
| "learning_rate": 4.785405611308448e-06, |
| "loss": 0.9945, |
| "step": 1602 |
| }, |
| { |
| "epoch": 0.8432403997895844, |
| "grad_norm": 2.0826306343078613, |
| "learning_rate": 4.785123938125511e-06, |
| "loss": 1.0322, |
| "step": 1603 |
| }, |
| { |
| "epoch": 0.8437664387164651, |
| "grad_norm": 2.0303595066070557, |
| "learning_rate": 4.784842088504813e-06, |
| "loss": 1.0304, |
| "step": 1604 |
| }, |
| { |
| "epoch": 0.8442924776433456, |
| "grad_norm": 2.0710513591766357, |
| "learning_rate": 4.7845600624681145e-06, |
| "loss": 1.0358, |
| "step": 1605 |
| }, |
| { |
| "epoch": 0.8448185165702262, |
| "grad_norm": 2.052515983581543, |
| "learning_rate": 4.784277860037192e-06, |
| "loss": 1.0316, |
| "step": 1606 |
| }, |
| { |
| "epoch": 0.8453445554971067, |
| "grad_norm": 2.1331636905670166, |
| "learning_rate": 4.783995481233835e-06, |
| "loss": 1.0139, |
| "step": 1607 |
| }, |
| { |
| "epoch": 0.8458705944239874, |
| "grad_norm": 1.9738709926605225, |
| "learning_rate": 4.783712926079846e-06, |
| "loss": 1.034, |
| "step": 1608 |
| }, |
| { |
| "epoch": 0.846396633350868, |
| "grad_norm": 2.059412956237793, |
| "learning_rate": 4.78343019459704e-06, |
| "loss": 1.0468, |
| "step": 1609 |
| }, |
| { |
| "epoch": 0.8469226722777485, |
| "grad_norm": 2.027773141860962, |
| "learning_rate": 4.783147286807249e-06, |
| "loss": 1.0028, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.8474487112046292, |
| "grad_norm": 2.1288933753967285, |
| "learning_rate": 4.782864202732317e-06, |
| "loss": 1.0177, |
| "step": 1611 |
| }, |
| { |
| "epoch": 0.8479747501315097, |
| "grad_norm": 2.160947322845459, |
| "learning_rate": 4.7825809423941e-06, |
| "loss": 0.9814, |
| "step": 1612 |
| }, |
| { |
| "epoch": 0.8485007890583903, |
| "grad_norm": 2.021970272064209, |
| "learning_rate": 4.782297505814469e-06, |
| "loss": 1.0198, |
| "step": 1613 |
| }, |
| { |
| "epoch": 0.849026827985271, |
| "grad_norm": 1.9154043197631836, |
| "learning_rate": 4.7820138930153106e-06, |
| "loss": 1.0044, |
| "step": 1614 |
| }, |
| { |
| "epoch": 0.8495528669121515, |
| "grad_norm": 2.0858964920043945, |
| "learning_rate": 4.781730104018521e-06, |
| "loss": 0.9932, |
| "step": 1615 |
| }, |
| { |
| "epoch": 0.8500789058390321, |
| "grad_norm": 2.236711025238037, |
| "learning_rate": 4.7814461388460105e-06, |
| "loss": 1.0495, |
| "step": 1616 |
| }, |
| { |
| "epoch": 0.8506049447659126, |
| "grad_norm": 2.0810344219207764, |
| "learning_rate": 4.781161997519707e-06, |
| "loss": 1.0617, |
| "step": 1617 |
| }, |
| { |
| "epoch": 0.8511309836927933, |
| "grad_norm": 2.224187135696411, |
| "learning_rate": 4.780877680061551e-06, |
| "loss": 0.9911, |
| "step": 1618 |
| }, |
| { |
| "epoch": 0.8516570226196739, |
| "grad_norm": 1.8846218585968018, |
| "learning_rate": 4.780593186493491e-06, |
| "loss": 1.0185, |
| "step": 1619 |
| }, |
| { |
| "epoch": 0.8521830615465544, |
| "grad_norm": 2.0876333713531494, |
| "learning_rate": 4.780308516837495e-06, |
| "loss": 1.0173, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.8527091004734351, |
| "grad_norm": 1.942492961883545, |
| "learning_rate": 4.780023671115544e-06, |
| "loss": 1.0154, |
| "step": 1621 |
| }, |
| { |
| "epoch": 0.8532351394003156, |
| "grad_norm": 1.9483400583267212, |
| "learning_rate": 4.779738649349629e-06, |
| "loss": 1.0492, |
| "step": 1622 |
| }, |
| { |
| "epoch": 0.8537611783271962, |
| "grad_norm": 1.8866205215454102, |
| "learning_rate": 4.7794534515617586e-06, |
| "loss": 0.9896, |
| "step": 1623 |
| }, |
| { |
| "epoch": 0.8542872172540767, |
| "grad_norm": 2.146117687225342, |
| "learning_rate": 4.779168077773953e-06, |
| "loss": 1.0391, |
| "step": 1624 |
| }, |
| { |
| "epoch": 0.8548132561809574, |
| "grad_norm": 2.099858283996582, |
| "learning_rate": 4.778882528008245e-06, |
| "loss": 1.0185, |
| "step": 1625 |
| }, |
| { |
| "epoch": 0.855339295107838, |
| "grad_norm": 2.0597662925720215, |
| "learning_rate": 4.7785968022866846e-06, |
| "loss": 1.0373, |
| "step": 1626 |
| }, |
| { |
| "epoch": 0.8558653340347185, |
| "grad_norm": 2.0234663486480713, |
| "learning_rate": 4.7783109006313316e-06, |
| "loss": 1.0471, |
| "step": 1627 |
| }, |
| { |
| "epoch": 0.8563913729615992, |
| "grad_norm": 1.9113049507141113, |
| "learning_rate": 4.778024823064261e-06, |
| "loss": 1.01, |
| "step": 1628 |
| }, |
| { |
| "epoch": 0.8569174118884797, |
| "grad_norm": 2.4924910068511963, |
| "learning_rate": 4.777738569607562e-06, |
| "loss": 1.0267, |
| "step": 1629 |
| }, |
| { |
| "epoch": 0.8574434508153603, |
| "grad_norm": 1.9605613946914673, |
| "learning_rate": 4.777452140283336e-06, |
| "loss": 1.0237, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.857969489742241, |
| "grad_norm": 2.1404225826263428, |
| "learning_rate": 4.7771655351136996e-06, |
| "loss": 1.0353, |
| "step": 1631 |
| }, |
| { |
| "epoch": 0.8584955286691215, |
| "grad_norm": 2.1174509525299072, |
| "learning_rate": 4.776878754120781e-06, |
| "loss": 1.0517, |
| "step": 1632 |
| }, |
| { |
| "epoch": 0.8590215675960021, |
| "grad_norm": 1.895843267440796, |
| "learning_rate": 4.7765917973267226e-06, |
| "loss": 0.9479, |
| "step": 1633 |
| }, |
| { |
| "epoch": 0.8595476065228826, |
| "grad_norm": 2.080152988433838, |
| "learning_rate": 4.776304664753682e-06, |
| "loss": 1.0642, |
| "step": 1634 |
| }, |
| { |
| "epoch": 0.8600736454497633, |
| "grad_norm": 1.9730490446090698, |
| "learning_rate": 4.776017356423827e-06, |
| "loss": 1.0059, |
| "step": 1635 |
| }, |
| { |
| "epoch": 0.8605996843766439, |
| "grad_norm": 2.19085693359375, |
| "learning_rate": 4.775729872359343e-06, |
| "loss": 1.0368, |
| "step": 1636 |
| }, |
| { |
| "epoch": 0.8611257233035244, |
| "grad_norm": 2.14911150932312, |
| "learning_rate": 4.775442212582428e-06, |
| "loss": 1.0583, |
| "step": 1637 |
| }, |
| { |
| "epoch": 0.8616517622304051, |
| "grad_norm": 1.9603419303894043, |
| "learning_rate": 4.775154377115291e-06, |
| "loss": 1.0336, |
| "step": 1638 |
| }, |
| { |
| "epoch": 0.8621778011572856, |
| "grad_norm": 1.9417442083358765, |
| "learning_rate": 4.774866365980156e-06, |
| "loss": 0.9885, |
| "step": 1639 |
| }, |
| { |
| "epoch": 0.8627038400841662, |
| "grad_norm": 2.092170000076294, |
| "learning_rate": 4.774578179199261e-06, |
| "loss": 1.0496, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.8632298790110469, |
| "grad_norm": 2.0614163875579834, |
| "learning_rate": 4.774289816794858e-06, |
| "loss": 1.0011, |
| "step": 1641 |
| }, |
| { |
| "epoch": 0.8637559179379274, |
| "grad_norm": 2.168977975845337, |
| "learning_rate": 4.774001278789211e-06, |
| "loss": 1.0342, |
| "step": 1642 |
| }, |
| { |
| "epoch": 0.864281956864808, |
| "grad_norm": 2.0560708045959473, |
| "learning_rate": 4.773712565204599e-06, |
| "loss": 1.0239, |
| "step": 1643 |
| }, |
| { |
| "epoch": 0.8648079957916885, |
| "grad_norm": 1.9980727434158325, |
| "learning_rate": 4.773423676063314e-06, |
| "loss": 1.0312, |
| "step": 1644 |
| }, |
| { |
| "epoch": 0.8653340347185692, |
| "grad_norm": 2.0650413036346436, |
| "learning_rate": 4.773134611387661e-06, |
| "loss": 1.0468, |
| "step": 1645 |
| }, |
| { |
| "epoch": 0.8658600736454498, |
| "grad_norm": 1.954148530960083, |
| "learning_rate": 4.77284537119996e-06, |
| "loss": 1.0138, |
| "step": 1646 |
| }, |
| { |
| "epoch": 0.8663861125723303, |
| "grad_norm": 2.092515468597412, |
| "learning_rate": 4.772555955522543e-06, |
| "loss": 0.987, |
| "step": 1647 |
| }, |
| { |
| "epoch": 0.866912151499211, |
| "grad_norm": 2.007941246032715, |
| "learning_rate": 4.772266364377757e-06, |
| "loss": 0.9918, |
| "step": 1648 |
| }, |
| { |
| "epoch": 0.8674381904260915, |
| "grad_norm": 1.9608757495880127, |
| "learning_rate": 4.77197659778796e-06, |
| "loss": 1.0502, |
| "step": 1649 |
| }, |
| { |
| "epoch": 0.8679642293529721, |
| "grad_norm": 2.0067436695098877, |
| "learning_rate": 4.771686655775527e-06, |
| "loss": 1.0335, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.8684902682798528, |
| "grad_norm": 2.079745292663574, |
| "learning_rate": 4.771396538362845e-06, |
| "loss": 1.043, |
| "step": 1651 |
| }, |
| { |
| "epoch": 0.8690163072067333, |
| "grad_norm": 1.9542405605316162, |
| "learning_rate": 4.771106245572313e-06, |
| "loss": 0.984, |
| "step": 1652 |
| }, |
| { |
| "epoch": 0.8695423461336139, |
| "grad_norm": 2.028416872024536, |
| "learning_rate": 4.770815777426346e-06, |
| "loss": 0.9933, |
| "step": 1653 |
| }, |
| { |
| "epoch": 0.8700683850604944, |
| "grad_norm": 1.9436818361282349, |
| "learning_rate": 4.77052513394737e-06, |
| "loss": 1.0118, |
| "step": 1654 |
| }, |
| { |
| "epoch": 0.8705944239873751, |
| "grad_norm": 2.028409004211426, |
| "learning_rate": 4.770234315157828e-06, |
| "loss": 1.0494, |
| "step": 1655 |
| }, |
| { |
| "epoch": 0.8711204629142556, |
| "grad_norm": 2.0709540843963623, |
| "learning_rate": 4.769943321080174e-06, |
| "loss": 1.0542, |
| "step": 1656 |
| }, |
| { |
| "epoch": 0.8716465018411362, |
| "grad_norm": 2.0256619453430176, |
| "learning_rate": 4.7696521517368755e-06, |
| "loss": 1.0011, |
| "step": 1657 |
| }, |
| { |
| "epoch": 0.8721725407680169, |
| "grad_norm": 2.0937297344207764, |
| "learning_rate": 4.769360807150414e-06, |
| "loss": 0.9974, |
| "step": 1658 |
| }, |
| { |
| "epoch": 0.8726985796948974, |
| "grad_norm": 2.2346062660217285, |
| "learning_rate": 4.769069287343285e-06, |
| "loss": 1.0128, |
| "step": 1659 |
| }, |
| { |
| "epoch": 0.873224618621778, |
| "grad_norm": 2.1082491874694824, |
| "learning_rate": 4.7687775923379975e-06, |
| "loss": 1.0321, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.8737506575486585, |
| "grad_norm": 2.0769453048706055, |
| "learning_rate": 4.768485722157074e-06, |
| "loss": 0.973, |
| "step": 1661 |
| }, |
| { |
| "epoch": 0.8742766964755392, |
| "grad_norm": 2.0329558849334717, |
| "learning_rate": 4.768193676823048e-06, |
| "loss": 1.0102, |
| "step": 1662 |
| }, |
| { |
| "epoch": 0.8748027354024198, |
| "grad_norm": 2.0758261680603027, |
| "learning_rate": 4.767901456358471e-06, |
| "loss": 1.0125, |
| "step": 1663 |
| }, |
| { |
| "epoch": 0.8753287743293003, |
| "grad_norm": 2.12320613861084, |
| "learning_rate": 4.767609060785905e-06, |
| "loss": 1.0294, |
| "step": 1664 |
| }, |
| { |
| "epoch": 0.875854813256181, |
| "grad_norm": 1.9771841764450073, |
| "learning_rate": 4.767316490127927e-06, |
| "loss": 0.9886, |
| "step": 1665 |
| }, |
| { |
| "epoch": 0.8763808521830615, |
| "grad_norm": 1.9373329877853394, |
| "learning_rate": 4.7670237444071255e-06, |
| "loss": 0.994, |
| "step": 1666 |
| }, |
| { |
| "epoch": 0.8769068911099421, |
| "grad_norm": 2.0343801975250244, |
| "learning_rate": 4.766730823646105e-06, |
| "loss": 1.0352, |
| "step": 1667 |
| }, |
| { |
| "epoch": 0.8774329300368228, |
| "grad_norm": 2.020343542098999, |
| "learning_rate": 4.766437727867481e-06, |
| "loss": 0.979, |
| "step": 1668 |
| }, |
| { |
| "epoch": 0.8779589689637033, |
| "grad_norm": 2.107820510864258, |
| "learning_rate": 4.766144457093886e-06, |
| "loss": 1.0296, |
| "step": 1669 |
| }, |
| { |
| "epoch": 0.8784850078905839, |
| "grad_norm": 2.1452198028564453, |
| "learning_rate": 4.765851011347962e-06, |
| "loss": 1.0438, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.8790110468174644, |
| "grad_norm": 2.087686777114868, |
| "learning_rate": 4.7655573906523665e-06, |
| "loss": 0.9788, |
| "step": 1671 |
| }, |
| { |
| "epoch": 0.8795370857443451, |
| "grad_norm": 2.083097457885742, |
| "learning_rate": 4.765263595029771e-06, |
| "loss": 0.9921, |
| "step": 1672 |
| }, |
| { |
| "epoch": 0.8800631246712257, |
| "grad_norm": 2.0001168251037598, |
| "learning_rate": 4.76496962450286e-06, |
| "loss": 0.9784, |
| "step": 1673 |
| }, |
| { |
| "epoch": 0.8805891635981062, |
| "grad_norm": 1.9493898153305054, |
| "learning_rate": 4.7646754790943315e-06, |
| "loss": 1.0145, |
| "step": 1674 |
| }, |
| { |
| "epoch": 0.8811152025249869, |
| "grad_norm": 2.140746831893921, |
| "learning_rate": 4.764381158826896e-06, |
| "loss": 1.0286, |
| "step": 1675 |
| }, |
| { |
| "epoch": 0.8816412414518674, |
| "grad_norm": 2.0411407947540283, |
| "learning_rate": 4.764086663723278e-06, |
| "loss": 1.0297, |
| "step": 1676 |
| }, |
| { |
| "epoch": 0.882167280378748, |
| "grad_norm": 2.164043664932251, |
| "learning_rate": 4.763791993806218e-06, |
| "loss": 1.0246, |
| "step": 1677 |
| }, |
| { |
| "epoch": 0.8826933193056287, |
| "grad_norm": 2.0231616497039795, |
| "learning_rate": 4.7634971490984675e-06, |
| "loss": 0.9692, |
| "step": 1678 |
| }, |
| { |
| "epoch": 0.8832193582325092, |
| "grad_norm": 2.0884130001068115, |
| "learning_rate": 4.763202129622789e-06, |
| "loss": 1.0441, |
| "step": 1679 |
| }, |
| { |
| "epoch": 0.8837453971593898, |
| "grad_norm": 1.959078311920166, |
| "learning_rate": 4.7629069354019654e-06, |
| "loss": 1.0166, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.8842714360862703, |
| "grad_norm": 1.836121916770935, |
| "learning_rate": 4.762611566458786e-06, |
| "loss": 1.0347, |
| "step": 1681 |
| }, |
| { |
| "epoch": 0.884797475013151, |
| "grad_norm": 2.099907398223877, |
| "learning_rate": 4.762316022816058e-06, |
| "loss": 1.0309, |
| "step": 1682 |
| }, |
| { |
| "epoch": 0.8853235139400316, |
| "grad_norm": 1.941465139389038, |
| "learning_rate": 4.7620203044966004e-06, |
| "loss": 1.0203, |
| "step": 1683 |
| }, |
| { |
| "epoch": 0.8858495528669121, |
| "grad_norm": 1.893522024154663, |
| "learning_rate": 4.761724411523247e-06, |
| "loss": 0.9769, |
| "step": 1684 |
| }, |
| { |
| "epoch": 0.8863755917937928, |
| "grad_norm": 1.9919662475585938, |
| "learning_rate": 4.7614283439188426e-06, |
| "loss": 1.0116, |
| "step": 1685 |
| }, |
| { |
| "epoch": 0.8869016307206733, |
| "grad_norm": 1.9670614004135132, |
| "learning_rate": 4.761132101706249e-06, |
| "loss": 0.9719, |
| "step": 1686 |
| }, |
| { |
| "epoch": 0.8874276696475539, |
| "grad_norm": 1.9545384645462036, |
| "learning_rate": 4.760835684908337e-06, |
| "loss": 0.9986, |
| "step": 1687 |
| }, |
| { |
| "epoch": 0.8879537085744345, |
| "grad_norm": 1.9402283430099487, |
| "learning_rate": 4.7605390935479946e-06, |
| "loss": 0.9911, |
| "step": 1688 |
| }, |
| { |
| "epoch": 0.8884797475013151, |
| "grad_norm": 1.954526424407959, |
| "learning_rate": 4.760242327648122e-06, |
| "loss": 1.0021, |
| "step": 1689 |
| }, |
| { |
| "epoch": 0.8890057864281957, |
| "grad_norm": 1.9458253383636475, |
| "learning_rate": 4.759945387231633e-06, |
| "loss": 1.0346, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.8895318253550762, |
| "grad_norm": 1.9583990573883057, |
| "learning_rate": 4.7596482723214565e-06, |
| "loss": 1.0509, |
| "step": 1691 |
| }, |
| { |
| "epoch": 0.8900578642819569, |
| "grad_norm": 2.0227482318878174, |
| "learning_rate": 4.75935098294053e-06, |
| "loss": 1.0651, |
| "step": 1692 |
| }, |
| { |
| "epoch": 0.8905839032088374, |
| "grad_norm": 1.977971076965332, |
| "learning_rate": 4.7590535191118096e-06, |
| "loss": 1.0609, |
| "step": 1693 |
| }, |
| { |
| "epoch": 0.891109942135718, |
| "grad_norm": 2.0564186573028564, |
| "learning_rate": 4.758755880858262e-06, |
| "loss": 1.0125, |
| "step": 1694 |
| }, |
| { |
| "epoch": 0.8916359810625987, |
| "grad_norm": 1.9081783294677734, |
| "learning_rate": 4.75845806820287e-06, |
| "loss": 1.007, |
| "step": 1695 |
| }, |
| { |
| "epoch": 0.8921620199894792, |
| "grad_norm": 2.0456745624542236, |
| "learning_rate": 4.758160081168626e-06, |
| "loss": 1.0116, |
| "step": 1696 |
| }, |
| { |
| "epoch": 0.8926880589163598, |
| "grad_norm": 1.9237746000289917, |
| "learning_rate": 4.757861919778539e-06, |
| "loss": 1.0023, |
| "step": 1697 |
| }, |
| { |
| "epoch": 0.8932140978432404, |
| "grad_norm": 1.9402356147766113, |
| "learning_rate": 4.75756358405563e-06, |
| "loss": 1.0264, |
| "step": 1698 |
| }, |
| { |
| "epoch": 0.893740136770121, |
| "grad_norm": 1.9538573026657104, |
| "learning_rate": 4.757265074022935e-06, |
| "loss": 0.9582, |
| "step": 1699 |
| }, |
| { |
| "epoch": 0.8942661756970016, |
| "grad_norm": 2.09053897857666, |
| "learning_rate": 4.756966389703501e-06, |
| "loss": 1.0245, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.8947922146238821, |
| "grad_norm": 2.071685552597046, |
| "learning_rate": 4.756667531120391e-06, |
| "loss": 1.0124, |
| "step": 1701 |
| }, |
| { |
| "epoch": 0.8953182535507628, |
| "grad_norm": 2.0141103267669678, |
| "learning_rate": 4.75636849829668e-06, |
| "loss": 0.9852, |
| "step": 1702 |
| }, |
| { |
| "epoch": 0.8958442924776433, |
| "grad_norm": 1.9167203903198242, |
| "learning_rate": 4.756069291255456e-06, |
| "loss": 1.0194, |
| "step": 1703 |
| }, |
| { |
| "epoch": 0.8963703314045239, |
| "grad_norm": 2.011918067932129, |
| "learning_rate": 4.755769910019823e-06, |
| "loss": 1.0029, |
| "step": 1704 |
| }, |
| { |
| "epoch": 0.8968963703314046, |
| "grad_norm": 2.1252031326293945, |
| "learning_rate": 4.755470354612895e-06, |
| "loss": 1.0071, |
| "step": 1705 |
| }, |
| { |
| "epoch": 0.8974224092582851, |
| "grad_norm": 2.0214016437530518, |
| "learning_rate": 4.755170625057801e-06, |
| "loss": 1.0371, |
| "step": 1706 |
| }, |
| { |
| "epoch": 0.8979484481851657, |
| "grad_norm": 2.4289193153381348, |
| "learning_rate": 4.754870721377685e-06, |
| "loss": 1.0581, |
| "step": 1707 |
| }, |
| { |
| "epoch": 0.8984744871120462, |
| "grad_norm": 2.1093404293060303, |
| "learning_rate": 4.754570643595702e-06, |
| "loss": 1.0017, |
| "step": 1708 |
| }, |
| { |
| "epoch": 0.8990005260389269, |
| "grad_norm": 2.0420546531677246, |
| "learning_rate": 4.7542703917350215e-06, |
| "loss": 1.0642, |
| "step": 1709 |
| }, |
| { |
| "epoch": 0.8995265649658075, |
| "grad_norm": 1.9818446636199951, |
| "learning_rate": 4.753969965818827e-06, |
| "loss": 1.0313, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.900052603892688, |
| "grad_norm": 1.897628664970398, |
| "learning_rate": 4.753669365870313e-06, |
| "loss": 0.9875, |
| "step": 1711 |
| }, |
| { |
| "epoch": 0.9005786428195687, |
| "grad_norm": 2.0208487510681152, |
| "learning_rate": 4.753368591912693e-06, |
| "loss": 1.0271, |
| "step": 1712 |
| }, |
| { |
| "epoch": 0.9011046817464492, |
| "grad_norm": 1.9346519708633423, |
| "learning_rate": 4.753067643969186e-06, |
| "loss": 1.0352, |
| "step": 1713 |
| }, |
| { |
| "epoch": 0.9016307206733298, |
| "grad_norm": 2.0617661476135254, |
| "learning_rate": 4.75276652206303e-06, |
| "loss": 0.9806, |
| "step": 1714 |
| }, |
| { |
| "epoch": 0.9021567596002105, |
| "grad_norm": 1.8809938430786133, |
| "learning_rate": 4.752465226217477e-06, |
| "loss": 1.0333, |
| "step": 1715 |
| }, |
| { |
| "epoch": 0.902682798527091, |
| "grad_norm": 2.047309398651123, |
| "learning_rate": 4.752163756455789e-06, |
| "loss": 1.0614, |
| "step": 1716 |
| }, |
| { |
| "epoch": 0.9032088374539716, |
| "grad_norm": 2.1308083534240723, |
| "learning_rate": 4.751862112801242e-06, |
| "loss": 1.0229, |
| "step": 1717 |
| }, |
| { |
| "epoch": 0.9037348763808521, |
| "grad_norm": 2.0333852767944336, |
| "learning_rate": 4.751560295277127e-06, |
| "loss": 1.0077, |
| "step": 1718 |
| }, |
| { |
| "epoch": 0.9042609153077328, |
| "grad_norm": 1.9486128091812134, |
| "learning_rate": 4.7512583039067485e-06, |
| "loss": 1.0026, |
| "step": 1719 |
| }, |
| { |
| "epoch": 0.9047869542346134, |
| "grad_norm": 2.004258394241333, |
| "learning_rate": 4.750956138713424e-06, |
| "loss": 0.986, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.9053129931614939, |
| "grad_norm": 2.5763192176818848, |
| "learning_rate": 4.750653799720483e-06, |
| "loss": 0.979, |
| "step": 1721 |
| }, |
| { |
| "epoch": 0.9058390320883746, |
| "grad_norm": 2.1086039543151855, |
| "learning_rate": 4.750351286951269e-06, |
| "loss": 1.0368, |
| "step": 1722 |
| }, |
| { |
| "epoch": 0.9063650710152551, |
| "grad_norm": 2.0445361137390137, |
| "learning_rate": 4.750048600429141e-06, |
| "loss": 0.9756, |
| "step": 1723 |
| }, |
| { |
| "epoch": 0.9068911099421357, |
| "grad_norm": 1.8900635242462158, |
| "learning_rate": 4.7497457401774694e-06, |
| "loss": 0.8947, |
| "step": 1724 |
| }, |
| { |
| "epoch": 0.9074171488690163, |
| "grad_norm": 2.116900682449341, |
| "learning_rate": 4.749442706219638e-06, |
| "loss": 1.0502, |
| "step": 1725 |
| }, |
| { |
| "epoch": 0.9079431877958969, |
| "grad_norm": 2.1096391677856445, |
| "learning_rate": 4.749139498579044e-06, |
| "loss": 1.0089, |
| "step": 1726 |
| }, |
| { |
| "epoch": 0.9084692267227775, |
| "grad_norm": 2.2117018699645996, |
| "learning_rate": 4.7488361172791005e-06, |
| "loss": 1.056, |
| "step": 1727 |
| }, |
| { |
| "epoch": 0.908995265649658, |
| "grad_norm": 2.0012335777282715, |
| "learning_rate": 4.748532562343231e-06, |
| "loss": 0.916, |
| "step": 1728 |
| }, |
| { |
| "epoch": 0.9095213045765387, |
| "grad_norm": 1.8673421144485474, |
| "learning_rate": 4.748228833794872e-06, |
| "loss": 0.9844, |
| "step": 1729 |
| }, |
| { |
| "epoch": 0.9100473435034192, |
| "grad_norm": 1.9152559041976929, |
| "learning_rate": 4.747924931657477e-06, |
| "loss": 0.9619, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.9105733824302998, |
| "grad_norm": 2.107985496520996, |
| "learning_rate": 4.7476208559545104e-06, |
| "loss": 1.017, |
| "step": 1731 |
| }, |
| { |
| "epoch": 0.9110994213571805, |
| "grad_norm": 2.162464141845703, |
| "learning_rate": 4.7473166067094474e-06, |
| "loss": 1.0197, |
| "step": 1732 |
| }, |
| { |
| "epoch": 0.911625460284061, |
| "grad_norm": 2.085958480834961, |
| "learning_rate": 4.747012183945784e-06, |
| "loss": 1.0166, |
| "step": 1733 |
| }, |
| { |
| "epoch": 0.9121514992109416, |
| "grad_norm": 2.0198309421539307, |
| "learning_rate": 4.746707587687022e-06, |
| "loss": 0.9883, |
| "step": 1734 |
| }, |
| { |
| "epoch": 0.9126775381378222, |
| "grad_norm": 2.013784646987915, |
| "learning_rate": 4.746402817956681e-06, |
| "loss": 0.9775, |
| "step": 1735 |
| }, |
| { |
| "epoch": 0.9132035770647028, |
| "grad_norm": 2.1442627906799316, |
| "learning_rate": 4.746097874778293e-06, |
| "loss": 1.0358, |
| "step": 1736 |
| }, |
| { |
| "epoch": 0.9137296159915834, |
| "grad_norm": 2.143627643585205, |
| "learning_rate": 4.745792758175402e-06, |
| "loss": 0.9537, |
| "step": 1737 |
| }, |
| { |
| "epoch": 0.914255654918464, |
| "grad_norm": 1.9581515789031982, |
| "learning_rate": 4.745487468171566e-06, |
| "loss": 0.9756, |
| "step": 1738 |
| }, |
| { |
| "epoch": 0.9147816938453446, |
| "grad_norm": 1.9869537353515625, |
| "learning_rate": 4.74518200479036e-06, |
| "loss": 0.995, |
| "step": 1739 |
| }, |
| { |
| "epoch": 0.9153077327722251, |
| "grad_norm": 1.9129465818405151, |
| "learning_rate": 4.744876368055365e-06, |
| "loss": 1.0088, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.9158337716991057, |
| "grad_norm": 1.957229733467102, |
| "learning_rate": 4.744570557990183e-06, |
| "loss": 0.9832, |
| "step": 1741 |
| }, |
| { |
| "epoch": 0.9163598106259864, |
| "grad_norm": 2.061002492904663, |
| "learning_rate": 4.744264574618425e-06, |
| "loss": 1.0338, |
| "step": 1742 |
| }, |
| { |
| "epoch": 0.9168858495528669, |
| "grad_norm": 2.0439558029174805, |
| "learning_rate": 4.743958417963715e-06, |
| "loss": 1.0678, |
| "step": 1743 |
| }, |
| { |
| "epoch": 0.9174118884797475, |
| "grad_norm": 2.0407450199127197, |
| "learning_rate": 4.743652088049695e-06, |
| "loss": 1.0219, |
| "step": 1744 |
| }, |
| { |
| "epoch": 0.917937927406628, |
| "grad_norm": 2.2696166038513184, |
| "learning_rate": 4.743345584900014e-06, |
| "loss": 0.9909, |
| "step": 1745 |
| }, |
| { |
| "epoch": 0.9184639663335087, |
| "grad_norm": 1.9783145189285278, |
| "learning_rate": 4.74303890853834e-06, |
| "loss": 0.9423, |
| "step": 1746 |
| }, |
| { |
| "epoch": 0.9189900052603893, |
| "grad_norm": 2.019179344177246, |
| "learning_rate": 4.74273205898835e-06, |
| "loss": 0.9985, |
| "step": 1747 |
| }, |
| { |
| "epoch": 0.9195160441872698, |
| "grad_norm": 1.966417670249939, |
| "learning_rate": 4.742425036273737e-06, |
| "loss": 1.0605, |
| "step": 1748 |
| }, |
| { |
| "epoch": 0.9200420831141505, |
| "grad_norm": 1.9425163269042969, |
| "learning_rate": 4.742117840418207e-06, |
| "loss": 0.9855, |
| "step": 1749 |
| }, |
| { |
| "epoch": 0.920568122041031, |
| "grad_norm": 1.9825159311294556, |
| "learning_rate": 4.741810471445478e-06, |
| "loss": 1.0214, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.9210941609679116, |
| "grad_norm": 1.9764158725738525, |
| "learning_rate": 4.741502929379284e-06, |
| "loss": 1.0249, |
| "step": 1751 |
| }, |
| { |
| "epoch": 0.9216201998947923, |
| "grad_norm": 2.0177724361419678, |
| "learning_rate": 4.74119521424337e-06, |
| "loss": 1.0434, |
| "step": 1752 |
| }, |
| { |
| "epoch": 0.9221462388216728, |
| "grad_norm": 2.0949506759643555, |
| "learning_rate": 4.740887326061495e-06, |
| "loss": 1.0331, |
| "step": 1753 |
| }, |
| { |
| "epoch": 0.9226722777485534, |
| "grad_norm": 1.9468920230865479, |
| "learning_rate": 4.740579264857431e-06, |
| "loss": 0.9212, |
| "step": 1754 |
| }, |
| { |
| "epoch": 0.923198316675434, |
| "grad_norm": 2.2116925716400146, |
| "learning_rate": 4.740271030654965e-06, |
| "loss": 1.0241, |
| "step": 1755 |
| }, |
| { |
| "epoch": 0.9237243556023146, |
| "grad_norm": 1.9227603673934937, |
| "learning_rate": 4.739962623477896e-06, |
| "loss": 0.98, |
| "step": 1756 |
| }, |
| { |
| "epoch": 0.9242503945291951, |
| "grad_norm": 2.013141632080078, |
| "learning_rate": 4.739654043350036e-06, |
| "loss": 1.0321, |
| "step": 1757 |
| }, |
| { |
| "epoch": 0.9247764334560757, |
| "grad_norm": 2.1053218841552734, |
| "learning_rate": 4.739345290295211e-06, |
| "loss": 1.0359, |
| "step": 1758 |
| }, |
| { |
| "epoch": 0.9253024723829564, |
| "grad_norm": 2.072932243347168, |
| "learning_rate": 4.739036364337261e-06, |
| "loss": 0.9826, |
| "step": 1759 |
| }, |
| { |
| "epoch": 0.9258285113098369, |
| "grad_norm": 2.104072093963623, |
| "learning_rate": 4.738727265500037e-06, |
| "loss": 1.0239, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.9263545502367175, |
| "grad_norm": 2.0704009532928467, |
| "learning_rate": 4.738417993807407e-06, |
| "loss": 1.0235, |
| "step": 1761 |
| }, |
| { |
| "epoch": 0.9268805891635981, |
| "grad_norm": 1.9992990493774414, |
| "learning_rate": 4.738108549283249e-06, |
| "loss": 0.988, |
| "step": 1762 |
| }, |
| { |
| "epoch": 0.9274066280904787, |
| "grad_norm": 2.150501251220703, |
| "learning_rate": 4.737798931951456e-06, |
| "loss": 1.0574, |
| "step": 1763 |
| }, |
| { |
| "epoch": 0.9279326670173593, |
| "grad_norm": 1.906421184539795, |
| "learning_rate": 4.7374891418359345e-06, |
| "loss": 1.0479, |
| "step": 1764 |
| }, |
| { |
| "epoch": 0.9284587059442398, |
| "grad_norm": 1.8720351457595825, |
| "learning_rate": 4.737179178960603e-06, |
| "loss": 1.038, |
| "step": 1765 |
| }, |
| { |
| "epoch": 0.9289847448711205, |
| "grad_norm": 1.9185991287231445, |
| "learning_rate": 4.736869043349394e-06, |
| "loss": 1.0632, |
| "step": 1766 |
| }, |
| { |
| "epoch": 0.929510783798001, |
| "grad_norm": 2.040290594100952, |
| "learning_rate": 4.736558735026255e-06, |
| "loss": 0.9857, |
| "step": 1767 |
| }, |
| { |
| "epoch": 0.9300368227248816, |
| "grad_norm": 1.9188529253005981, |
| "learning_rate": 4.7362482540151445e-06, |
| "loss": 1.0115, |
| "step": 1768 |
| }, |
| { |
| "epoch": 0.9305628616517623, |
| "grad_norm": 2.092855215072632, |
| "learning_rate": 4.7359376003400345e-06, |
| "loss": 1.0318, |
| "step": 1769 |
| }, |
| { |
| "epoch": 0.9310889005786428, |
| "grad_norm": 1.9537826776504517, |
| "learning_rate": 4.735626774024912e-06, |
| "loss": 1.0005, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.9316149395055234, |
| "grad_norm": 1.8022964000701904, |
| "learning_rate": 4.735315775093775e-06, |
| "loss": 0.9696, |
| "step": 1771 |
| }, |
| { |
| "epoch": 0.932140978432404, |
| "grad_norm": 2.0534324645996094, |
| "learning_rate": 4.735004603570639e-06, |
| "loss": 1.0647, |
| "step": 1772 |
| }, |
| { |
| "epoch": 0.9326670173592846, |
| "grad_norm": 2.082421064376831, |
| "learning_rate": 4.734693259479527e-06, |
| "loss": 1.0168, |
| "step": 1773 |
| }, |
| { |
| "epoch": 0.9331930562861652, |
| "grad_norm": 2.2331955432891846, |
| "learning_rate": 4.734381742844481e-06, |
| "loss": 1.0288, |
| "step": 1774 |
| }, |
| { |
| "epoch": 0.9337190952130457, |
| "grad_norm": 1.9978649616241455, |
| "learning_rate": 4.73407005368955e-06, |
| "loss": 0.9542, |
| "step": 1775 |
| }, |
| { |
| "epoch": 0.9342451341399264, |
| "grad_norm": 2.054856061935425, |
| "learning_rate": 4.733758192038804e-06, |
| "loss": 1.0457, |
| "step": 1776 |
| }, |
| { |
| "epoch": 0.9347711730668069, |
| "grad_norm": 2.1446175575256348, |
| "learning_rate": 4.733446157916319e-06, |
| "loss": 1.0767, |
| "step": 1777 |
| }, |
| { |
| "epoch": 0.9352972119936875, |
| "grad_norm": 2.149594783782959, |
| "learning_rate": 4.7331339513461905e-06, |
| "loss": 0.9975, |
| "step": 1778 |
| }, |
| { |
| "epoch": 0.9358232509205682, |
| "grad_norm": 2.0066800117492676, |
| "learning_rate": 4.732821572352522e-06, |
| "loss": 1.0296, |
| "step": 1779 |
| }, |
| { |
| "epoch": 0.9363492898474487, |
| "grad_norm": 2.4036574363708496, |
| "learning_rate": 4.732509020959434e-06, |
| "loss": 0.9726, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.9368753287743293, |
| "grad_norm": 2.0901482105255127, |
| "learning_rate": 4.73219629719106e-06, |
| "loss": 1.0748, |
| "step": 1781 |
| }, |
| { |
| "epoch": 0.9374013677012099, |
| "grad_norm": 2.093503713607788, |
| "learning_rate": 4.731883401071543e-06, |
| "loss": 1.0413, |
| "step": 1782 |
| }, |
| { |
| "epoch": 0.9379274066280905, |
| "grad_norm": 2.1437647342681885, |
| "learning_rate": 4.731570332625044e-06, |
| "loss": 1.0624, |
| "step": 1783 |
| }, |
| { |
| "epoch": 0.9384534455549711, |
| "grad_norm": 2.141866445541382, |
| "learning_rate": 4.731257091875736e-06, |
| "loss": 0.9547, |
| "step": 1784 |
| }, |
| { |
| "epoch": 0.9389794844818516, |
| "grad_norm": 2.138530731201172, |
| "learning_rate": 4.730943678847804e-06, |
| "loss": 1.0498, |
| "step": 1785 |
| }, |
| { |
| "epoch": 0.9395055234087323, |
| "grad_norm": 2.192941188812256, |
| "learning_rate": 4.730630093565447e-06, |
| "loss": 1.0426, |
| "step": 1786 |
| }, |
| { |
| "epoch": 0.9400315623356128, |
| "grad_norm": 1.9256808757781982, |
| "learning_rate": 4.730316336052877e-06, |
| "loss": 0.9864, |
| "step": 1787 |
| }, |
| { |
| "epoch": 0.9405576012624934, |
| "grad_norm": 2.1694893836975098, |
| "learning_rate": 4.730002406334321e-06, |
| "loss": 0.9926, |
| "step": 1788 |
| }, |
| { |
| "epoch": 0.941083640189374, |
| "grad_norm": 1.9891979694366455, |
| "learning_rate": 4.729688304434017e-06, |
| "loss": 0.9835, |
| "step": 1789 |
| }, |
| { |
| "epoch": 0.9416096791162546, |
| "grad_norm": 2.112396240234375, |
| "learning_rate": 4.729374030376217e-06, |
| "loss": 1.0131, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.9421357180431352, |
| "grad_norm": 2.049139976501465, |
| "learning_rate": 4.729059584185187e-06, |
| "loss": 1.0176, |
| "step": 1791 |
| }, |
| { |
| "epoch": 0.9426617569700158, |
| "grad_norm": 2.259706497192383, |
| "learning_rate": 4.728744965885207e-06, |
| "loss": 1.0566, |
| "step": 1792 |
| }, |
| { |
| "epoch": 0.9431877958968964, |
| "grad_norm": 1.9924520254135132, |
| "learning_rate": 4.728430175500567e-06, |
| "loss": 0.9912, |
| "step": 1793 |
| }, |
| { |
| "epoch": 0.9437138348237769, |
| "grad_norm": 2.1724114418029785, |
| "learning_rate": 4.728115213055573e-06, |
| "loss": 0.9919, |
| "step": 1794 |
| }, |
| { |
| "epoch": 0.9442398737506575, |
| "grad_norm": 2.083853244781494, |
| "learning_rate": 4.7278000785745445e-06, |
| "loss": 1.0368, |
| "step": 1795 |
| }, |
| { |
| "epoch": 0.9447659126775382, |
| "grad_norm": 2.089245080947876, |
| "learning_rate": 4.727484772081814e-06, |
| "loss": 1.0471, |
| "step": 1796 |
| }, |
| { |
| "epoch": 0.9452919516044187, |
| "grad_norm": 1.9880348443984985, |
| "learning_rate": 4.727169293601725e-06, |
| "loss": 0.9752, |
| "step": 1797 |
| }, |
| { |
| "epoch": 0.9458179905312993, |
| "grad_norm": 2.0518887042999268, |
| "learning_rate": 4.7268536431586375e-06, |
| "loss": 0.977, |
| "step": 1798 |
| }, |
| { |
| "epoch": 0.9463440294581799, |
| "grad_norm": 2.3292527198791504, |
| "learning_rate": 4.726537820776922e-06, |
| "loss": 0.9696, |
| "step": 1799 |
| }, |
| { |
| "epoch": 0.9468700683850605, |
| "grad_norm": 2.093759775161743, |
| "learning_rate": 4.7262218264809656e-06, |
| "loss": 1.028, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.9473961073119411, |
| "grad_norm": 1.9579375982284546, |
| "learning_rate": 4.7259056602951644e-06, |
| "loss": 0.9797, |
| "step": 1801 |
| }, |
| { |
| "epoch": 0.9479221462388217, |
| "grad_norm": 2.1174583435058594, |
| "learning_rate": 4.725589322243932e-06, |
| "loss": 0.9993, |
| "step": 1802 |
| }, |
| { |
| "epoch": 0.9484481851657023, |
| "grad_norm": 2.167732000350952, |
| "learning_rate": 4.725272812351692e-06, |
| "loss": 1.0031, |
| "step": 1803 |
| }, |
| { |
| "epoch": 0.9489742240925828, |
| "grad_norm": 2.1166253089904785, |
| "learning_rate": 4.724956130642883e-06, |
| "loss": 1.0029, |
| "step": 1804 |
| }, |
| { |
| "epoch": 0.9495002630194634, |
| "grad_norm": 2.0212886333465576, |
| "learning_rate": 4.724639277141957e-06, |
| "loss": 1.0202, |
| "step": 1805 |
| }, |
| { |
| "epoch": 0.9500263019463441, |
| "grad_norm": 2.1849446296691895, |
| "learning_rate": 4.7243222518733775e-06, |
| "loss": 0.9847, |
| "step": 1806 |
| }, |
| { |
| "epoch": 0.9505523408732246, |
| "grad_norm": 2.019671678543091, |
| "learning_rate": 4.724005054861623e-06, |
| "loss": 1.0141, |
| "step": 1807 |
| }, |
| { |
| "epoch": 0.9510783798001052, |
| "grad_norm": 2.0654826164245605, |
| "learning_rate": 4.723687686131186e-06, |
| "loss": 1.0266, |
| "step": 1808 |
| }, |
| { |
| "epoch": 0.9516044187269858, |
| "grad_norm": 2.0668342113494873, |
| "learning_rate": 4.7233701457065694e-06, |
| "loss": 1.0249, |
| "step": 1809 |
| }, |
| { |
| "epoch": 0.9521304576538664, |
| "grad_norm": 1.9022929668426514, |
| "learning_rate": 4.723052433612292e-06, |
| "loss": 1.0092, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.952656496580747, |
| "grad_norm": 2.0411059856414795, |
| "learning_rate": 4.722734549872884e-06, |
| "loss": 0.9896, |
| "step": 1811 |
| }, |
| { |
| "epoch": 0.9531825355076275, |
| "grad_norm": 2.0354626178741455, |
| "learning_rate": 4.722416494512889e-06, |
| "loss": 0.9529, |
| "step": 1812 |
| }, |
| { |
| "epoch": 0.9537085744345082, |
| "grad_norm": 1.866688847541809, |
| "learning_rate": 4.722098267556867e-06, |
| "loss": 0.971, |
| "step": 1813 |
| }, |
| { |
| "epoch": 0.9542346133613887, |
| "grad_norm": 1.9963386058807373, |
| "learning_rate": 4.721779869029387e-06, |
| "loss": 0.9931, |
| "step": 1814 |
| }, |
| { |
| "epoch": 0.9547606522882693, |
| "grad_norm": 1.9810550212860107, |
| "learning_rate": 4.721461298955033e-06, |
| "loss": 1.0335, |
| "step": 1815 |
| }, |
| { |
| "epoch": 0.95528669121515, |
| "grad_norm": 2.0094194412231445, |
| "learning_rate": 4.721142557358402e-06, |
| "loss": 1.0248, |
| "step": 1816 |
| }, |
| { |
| "epoch": 0.9558127301420305, |
| "grad_norm": 2.110318183898926, |
| "learning_rate": 4.720823644264106e-06, |
| "loss": 0.9726, |
| "step": 1817 |
| }, |
| { |
| "epoch": 0.9563387690689111, |
| "grad_norm": 2.051914691925049, |
| "learning_rate": 4.720504559696768e-06, |
| "loss": 1.0205, |
| "step": 1818 |
| }, |
| { |
| "epoch": 0.9568648079957917, |
| "grad_norm": 2.0969302654266357, |
| "learning_rate": 4.7201853036810245e-06, |
| "loss": 1.0313, |
| "step": 1819 |
| }, |
| { |
| "epoch": 0.9573908469226723, |
| "grad_norm": 2.098721742630005, |
| "learning_rate": 4.719865876241525e-06, |
| "loss": 1.0276, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.9579168858495528, |
| "grad_norm": 1.9741021394729614, |
| "learning_rate": 4.719546277402936e-06, |
| "loss": 1.0142, |
| "step": 1821 |
| }, |
| { |
| "epoch": 0.9584429247764334, |
| "grad_norm": 2.1097187995910645, |
| "learning_rate": 4.71922650718993e-06, |
| "loss": 0.9812, |
| "step": 1822 |
| }, |
| { |
| "epoch": 0.9589689637033141, |
| "grad_norm": 2.1343348026275635, |
| "learning_rate": 4.718906565627201e-06, |
| "loss": 1.0126, |
| "step": 1823 |
| }, |
| { |
| "epoch": 0.9594950026301946, |
| "grad_norm": 2.089698553085327, |
| "learning_rate": 4.71858645273945e-06, |
| "loss": 0.9982, |
| "step": 1824 |
| }, |
| { |
| "epoch": 0.9600210415570752, |
| "grad_norm": 2.1942148208618164, |
| "learning_rate": 4.7182661685513925e-06, |
| "loss": 1.0781, |
| "step": 1825 |
| }, |
| { |
| "epoch": 0.9605470804839558, |
| "grad_norm": 1.92880380153656, |
| "learning_rate": 4.7179457130877605e-06, |
| "loss": 1.0214, |
| "step": 1826 |
| }, |
| { |
| "epoch": 0.9610731194108364, |
| "grad_norm": 2.093219518661499, |
| "learning_rate": 4.717625086373295e-06, |
| "loss": 1.0411, |
| "step": 1827 |
| }, |
| { |
| "epoch": 0.961599158337717, |
| "grad_norm": 1.9406787157058716, |
| "learning_rate": 4.7173042884327525e-06, |
| "loss": 1.0296, |
| "step": 1828 |
| }, |
| { |
| "epoch": 0.9621251972645976, |
| "grad_norm": 1.9737564325332642, |
| "learning_rate": 4.7169833192909025e-06, |
| "loss": 1.0119, |
| "step": 1829 |
| }, |
| { |
| "epoch": 0.9626512361914782, |
| "grad_norm": 1.9281796216964722, |
| "learning_rate": 4.7166621789725276e-06, |
| "loss": 1.0203, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.9631772751183587, |
| "grad_norm": 2.128120183944702, |
| "learning_rate": 4.716340867502424e-06, |
| "loss": 1.087, |
| "step": 1831 |
| }, |
| { |
| "epoch": 0.9637033140452393, |
| "grad_norm": 2.1313352584838867, |
| "learning_rate": 4.716019384905399e-06, |
| "loss": 1.0049, |
| "step": 1832 |
| }, |
| { |
| "epoch": 0.96422935297212, |
| "grad_norm": 1.882323980331421, |
| "learning_rate": 4.715697731206275e-06, |
| "loss": 1.052, |
| "step": 1833 |
| }, |
| { |
| "epoch": 0.9647553918990005, |
| "grad_norm": 1.902729868888855, |
| "learning_rate": 4.71537590642989e-06, |
| "loss": 1.013, |
| "step": 1834 |
| }, |
| { |
| "epoch": 0.9652814308258811, |
| "grad_norm": 1.9752705097198486, |
| "learning_rate": 4.715053910601089e-06, |
| "loss": 0.9964, |
| "step": 1835 |
| }, |
| { |
| "epoch": 0.9658074697527617, |
| "grad_norm": 2.2092044353485107, |
| "learning_rate": 4.714731743744736e-06, |
| "loss": 1.0142, |
| "step": 1836 |
| }, |
| { |
| "epoch": 0.9663335086796423, |
| "grad_norm": 1.9738699197769165, |
| "learning_rate": 4.714409405885706e-06, |
| "loss": 1.0431, |
| "step": 1837 |
| }, |
| { |
| "epoch": 0.9668595476065229, |
| "grad_norm": 1.94752836227417, |
| "learning_rate": 4.714086897048886e-06, |
| "loss": 0.9776, |
| "step": 1838 |
| }, |
| { |
| "epoch": 0.9673855865334035, |
| "grad_norm": 2.044384717941284, |
| "learning_rate": 4.713764217259178e-06, |
| "loss": 0.9428, |
| "step": 1839 |
| }, |
| { |
| "epoch": 0.9679116254602841, |
| "grad_norm": 2.067378520965576, |
| "learning_rate": 4.713441366541497e-06, |
| "loss": 1.0222, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.9684376643871646, |
| "grad_norm": 2.0729427337646484, |
| "learning_rate": 4.71311834492077e-06, |
| "loss": 1.0244, |
| "step": 1841 |
| }, |
| { |
| "epoch": 0.9689637033140452, |
| "grad_norm": 1.9986896514892578, |
| "learning_rate": 4.712795152421938e-06, |
| "loss": 1.0246, |
| "step": 1842 |
| }, |
| { |
| "epoch": 0.9694897422409259, |
| "grad_norm": 2.134274482727051, |
| "learning_rate": 4.712471789069956e-06, |
| "loss": 1.0317, |
| "step": 1843 |
| }, |
| { |
| "epoch": 0.9700157811678064, |
| "grad_norm": 2.116116762161255, |
| "learning_rate": 4.7121482548897896e-06, |
| "loss": 1.0431, |
| "step": 1844 |
| }, |
| { |
| "epoch": 0.970541820094687, |
| "grad_norm": 2.146329164505005, |
| "learning_rate": 4.7118245499064205e-06, |
| "loss": 1.0185, |
| "step": 1845 |
| }, |
| { |
| "epoch": 0.9710678590215676, |
| "grad_norm": 2.2587080001831055, |
| "learning_rate": 4.711500674144844e-06, |
| "loss": 1.0172, |
| "step": 1846 |
| }, |
| { |
| "epoch": 0.9715938979484482, |
| "grad_norm": 2.133565902709961, |
| "learning_rate": 4.7111766276300645e-06, |
| "loss": 1.0887, |
| "step": 1847 |
| }, |
| { |
| "epoch": 0.9721199368753288, |
| "grad_norm": 2.4180047512054443, |
| "learning_rate": 4.710852410387103e-06, |
| "loss": 1.0686, |
| "step": 1848 |
| }, |
| { |
| "epoch": 0.9726459758022094, |
| "grad_norm": 1.9758679866790771, |
| "learning_rate": 4.7105280224409936e-06, |
| "loss": 0.9851, |
| "step": 1849 |
| }, |
| { |
| "epoch": 0.97317201472909, |
| "grad_norm": 2.0190632343292236, |
| "learning_rate": 4.710203463816782e-06, |
| "loss": 0.9967, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.9736980536559705, |
| "grad_norm": 2.0636117458343506, |
| "learning_rate": 4.709878734539527e-06, |
| "loss": 1.0209, |
| "step": 1851 |
| }, |
| { |
| "epoch": 0.9742240925828511, |
| "grad_norm": 2.0756478309631348, |
| "learning_rate": 4.709553834634303e-06, |
| "loss": 0.9793, |
| "step": 1852 |
| }, |
| { |
| "epoch": 0.9747501315097317, |
| "grad_norm": 1.94191312789917, |
| "learning_rate": 4.709228764126195e-06, |
| "loss": 0.9697, |
| "step": 1853 |
| }, |
| { |
| "epoch": 0.9752761704366123, |
| "grad_norm": 2.057345390319824, |
| "learning_rate": 4.708903523040303e-06, |
| "loss": 0.938, |
| "step": 1854 |
| }, |
| { |
| "epoch": 0.9758022093634929, |
| "grad_norm": 2.1611337661743164, |
| "learning_rate": 4.7085781114017384e-06, |
| "loss": 1.0464, |
| "step": 1855 |
| }, |
| { |
| "epoch": 0.9763282482903735, |
| "grad_norm": 1.9461411237716675, |
| "learning_rate": 4.708252529235627e-06, |
| "loss": 0.9934, |
| "step": 1856 |
| }, |
| { |
| "epoch": 0.9768542872172541, |
| "grad_norm": 1.9107236862182617, |
| "learning_rate": 4.707926776567108e-06, |
| "loss": 0.9895, |
| "step": 1857 |
| }, |
| { |
| "epoch": 0.9773803261441346, |
| "grad_norm": 2.0953640937805176, |
| "learning_rate": 4.707600853421332e-06, |
| "loss": 1.0009, |
| "step": 1858 |
| }, |
| { |
| "epoch": 0.9779063650710152, |
| "grad_norm": 2.126648187637329, |
| "learning_rate": 4.707274759823466e-06, |
| "loss": 0.9801, |
| "step": 1859 |
| }, |
| { |
| "epoch": 0.9784324039978959, |
| "grad_norm": 2.0868916511535645, |
| "learning_rate": 4.706948495798687e-06, |
| "loss": 0.9765, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.9789584429247764, |
| "grad_norm": 2.0332181453704834, |
| "learning_rate": 4.706622061372185e-06, |
| "loss": 1.0216, |
| "step": 1861 |
| }, |
| { |
| "epoch": 0.979484481851657, |
| "grad_norm": 2.05155348777771, |
| "learning_rate": 4.706295456569167e-06, |
| "loss": 1.0594, |
| "step": 1862 |
| }, |
| { |
| "epoch": 0.9800105207785376, |
| "grad_norm": 2.1178739070892334, |
| "learning_rate": 4.7059686814148485e-06, |
| "loss": 1.0463, |
| "step": 1863 |
| }, |
| { |
| "epoch": 0.9805365597054182, |
| "grad_norm": 1.9961886405944824, |
| "learning_rate": 4.705641735934462e-06, |
| "loss": 0.9658, |
| "step": 1864 |
| }, |
| { |
| "epoch": 0.9810625986322988, |
| "grad_norm": 1.9905188083648682, |
| "learning_rate": 4.705314620153251e-06, |
| "loss": 0.9677, |
| "step": 1865 |
| }, |
| { |
| "epoch": 0.9815886375591794, |
| "grad_norm": 1.9200838804244995, |
| "learning_rate": 4.704987334096471e-06, |
| "loss": 1.0011, |
| "step": 1866 |
| }, |
| { |
| "epoch": 0.98211467648606, |
| "grad_norm": 2.069359302520752, |
| "learning_rate": 4.704659877789395e-06, |
| "loss": 1.01, |
| "step": 1867 |
| }, |
| { |
| "epoch": 0.9826407154129405, |
| "grad_norm": 1.8069074153900146, |
| "learning_rate": 4.704332251257304e-06, |
| "loss": 1.037, |
| "step": 1868 |
| }, |
| { |
| "epoch": 0.9831667543398211, |
| "grad_norm": 1.9900349378585815, |
| "learning_rate": 4.704004454525496e-06, |
| "loss": 1.0035, |
| "step": 1869 |
| }, |
| { |
| "epoch": 0.9836927932667018, |
| "grad_norm": 1.902032494544983, |
| "learning_rate": 4.70367648761928e-06, |
| "loss": 1.0001, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.9842188321935823, |
| "grad_norm": 2.5718839168548584, |
| "learning_rate": 4.703348350563978e-06, |
| "loss": 1.002, |
| "step": 1871 |
| }, |
| { |
| "epoch": 0.9847448711204629, |
| "grad_norm": 1.90852952003479, |
| "learning_rate": 4.703020043384927e-06, |
| "loss": 1.0338, |
| "step": 1872 |
| }, |
| { |
| "epoch": 0.9852709100473435, |
| "grad_norm": 2.0179872512817383, |
| "learning_rate": 4.702691566107477e-06, |
| "loss": 0.9724, |
| "step": 1873 |
| }, |
| { |
| "epoch": 0.9857969489742241, |
| "grad_norm": 2.0315425395965576, |
| "learning_rate": 4.702362918756988e-06, |
| "loss": 1.0256, |
| "step": 1874 |
| }, |
| { |
| "epoch": 0.9863229879011047, |
| "grad_norm": 1.898896336555481, |
| "learning_rate": 4.702034101358837e-06, |
| "loss": 0.9695, |
| "step": 1875 |
| }, |
| { |
| "epoch": 0.9868490268279853, |
| "grad_norm": 2.1176962852478027, |
| "learning_rate": 4.701705113938411e-06, |
| "loss": 1.0217, |
| "step": 1876 |
| }, |
| { |
| "epoch": 0.9873750657548659, |
| "grad_norm": 1.94914972782135, |
| "learning_rate": 4.701375956521113e-06, |
| "loss": 1.0081, |
| "step": 1877 |
| }, |
| { |
| "epoch": 0.9879011046817464, |
| "grad_norm": 1.9665032625198364, |
| "learning_rate": 4.701046629132358e-06, |
| "loss": 1.0174, |
| "step": 1878 |
| }, |
| { |
| "epoch": 0.988427143608627, |
| "grad_norm": 2.005793571472168, |
| "learning_rate": 4.700717131797573e-06, |
| "loss": 0.9653, |
| "step": 1879 |
| }, |
| { |
| "epoch": 0.9889531825355077, |
| "grad_norm": 2.0769705772399902, |
| "learning_rate": 4.700387464542199e-06, |
| "loss": 1.0142, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.9894792214623882, |
| "grad_norm": 1.9945422410964966, |
| "learning_rate": 4.700057627391689e-06, |
| "loss": 1.0225, |
| "step": 1881 |
| }, |
| { |
| "epoch": 0.9900052603892688, |
| "grad_norm": 2.1121349334716797, |
| "learning_rate": 4.699727620371513e-06, |
| "loss": 1.0056, |
| "step": 1882 |
| }, |
| { |
| "epoch": 0.9905312993161494, |
| "grad_norm": 2.156942844390869, |
| "learning_rate": 4.699397443507148e-06, |
| "loss": 1.0049, |
| "step": 1883 |
| }, |
| { |
| "epoch": 0.99105733824303, |
| "grad_norm": 2.065075159072876, |
| "learning_rate": 4.699067096824091e-06, |
| "loss": 0.9694, |
| "step": 1884 |
| }, |
| { |
| "epoch": 0.9915833771699105, |
| "grad_norm": 2.12490177154541, |
| "learning_rate": 4.698736580347845e-06, |
| "loss": 1.0268, |
| "step": 1885 |
| }, |
| { |
| "epoch": 0.9921094160967912, |
| "grad_norm": 2.039874792098999, |
| "learning_rate": 4.698405894103932e-06, |
| "loss": 1.0122, |
| "step": 1886 |
| }, |
| { |
| "epoch": 0.9926354550236718, |
| "grad_norm": 2.0004734992980957, |
| "learning_rate": 4.698075038117884e-06, |
| "loss": 0.9996, |
| "step": 1887 |
| }, |
| { |
| "epoch": 0.9931614939505523, |
| "grad_norm": 1.996697187423706, |
| "learning_rate": 4.697744012415248e-06, |
| "loss": 1.0658, |
| "step": 1888 |
| }, |
| { |
| "epoch": 0.9936875328774329, |
| "grad_norm": 1.9783189296722412, |
| "learning_rate": 4.69741281702158e-06, |
| "loss": 0.9799, |
| "step": 1889 |
| }, |
| { |
| "epoch": 0.9942135718043135, |
| "grad_norm": 2.054898738861084, |
| "learning_rate": 4.697081451962456e-06, |
| "loss": 1.0302, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.9947396107311941, |
| "grad_norm": 1.953337550163269, |
| "learning_rate": 4.696749917263458e-06, |
| "loss": 0.9634, |
| "step": 1891 |
| }, |
| { |
| "epoch": 0.9952656496580747, |
| "grad_norm": 2.6126086711883545, |
| "learning_rate": 4.6964182129501855e-06, |
| "loss": 0.9659, |
| "step": 1892 |
| }, |
| { |
| "epoch": 0.9957916885849553, |
| "grad_norm": 1.931026816368103, |
| "learning_rate": 4.69608633904825e-06, |
| "loss": 1.0456, |
| "step": 1893 |
| }, |
| { |
| "epoch": 0.9963177275118359, |
| "grad_norm": 1.9246487617492676, |
| "learning_rate": 4.695754295583276e-06, |
| "loss": 1.0057, |
| "step": 1894 |
| }, |
| { |
| "epoch": 0.9968437664387164, |
| "grad_norm": 1.9731547832489014, |
| "learning_rate": 4.695422082580901e-06, |
| "loss": 0.9619, |
| "step": 1895 |
| }, |
| { |
| "epoch": 0.997369805365597, |
| "grad_norm": 2.1975600719451904, |
| "learning_rate": 4.695089700066776e-06, |
| "loss": 0.9667, |
| "step": 1896 |
| }, |
| { |
| "epoch": 0.9978958442924777, |
| "grad_norm": 1.9038164615631104, |
| "learning_rate": 4.6947571480665636e-06, |
| "loss": 0.9564, |
| "step": 1897 |
| }, |
| { |
| "epoch": 0.9984218832193582, |
| "grad_norm": 1.9997332096099854, |
| "learning_rate": 4.694424426605942e-06, |
| "loss": 0.9717, |
| "step": 1898 |
| }, |
| { |
| "epoch": 0.9989479221462388, |
| "grad_norm": 2.0790839195251465, |
| "learning_rate": 4.6940915357106e-06, |
| "loss": 1.044, |
| "step": 1899 |
| }, |
| { |
| "epoch": 0.9994739610731194, |
| "grad_norm": 2.0779690742492676, |
| "learning_rate": 4.693758475406241e-06, |
| "loss": 1.052, |
| "step": 1900 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 2.3423078060150146, |
| "learning_rate": 4.693425245718581e-06, |
| "loss": 0.9887, |
| "step": 1901 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 11406, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 6, |
| "save_steps": 1901, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 4.801364251367178e+18, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|