| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 2884, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.00034674063800277393, |
| "grad_norm": 3.4529595375061035, |
| "learning_rate": 0.0, |
| "loss": 0.2571, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0006934812760055479, |
| "grad_norm": 1.7922414541244507, |
| "learning_rate": 1e-06, |
| "loss": 0.2304, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0010402219140083217, |
| "grad_norm": 2.7978198528289795, |
| "learning_rate": 1e-06, |
| "loss": 0.2421, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0013869625520110957, |
| "grad_norm": 1.7440896034240723, |
| "learning_rate": 1e-06, |
| "loss": 0.2437, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.0017337031900138697, |
| "grad_norm": 2.0253705978393555, |
| "learning_rate": 1e-06, |
| "loss": 0.2456, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.0020804438280166435, |
| "grad_norm": 7.390465259552002, |
| "learning_rate": 1e-06, |
| "loss": 0.285, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.0024271844660194173, |
| "grad_norm": 2.6305766105651855, |
| "learning_rate": 1e-06, |
| "loss": 0.2477, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.0027739251040221915, |
| "grad_norm": 3.094043254852295, |
| "learning_rate": 1e-06, |
| "loss": 0.2229, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.0031206657420249652, |
| "grad_norm": 2.3640098571777344, |
| "learning_rate": 1e-06, |
| "loss": 0.2233, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.0034674063800277394, |
| "grad_norm": 2.151405096054077, |
| "learning_rate": 1e-06, |
| "loss": 0.2211, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.0038141470180305132, |
| "grad_norm": 5.151231288909912, |
| "learning_rate": 1e-06, |
| "loss": 0.2354, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.004160887656033287, |
| "grad_norm": 2.285297155380249, |
| "learning_rate": 1e-06, |
| "loss": 0.2483, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.004507628294036061, |
| "grad_norm": 2.348036050796509, |
| "learning_rate": 1e-06, |
| "loss": 0.2419, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.0048543689320388345, |
| "grad_norm": 2.321516275405884, |
| "learning_rate": 1e-06, |
| "loss": 0.2659, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.005201109570041609, |
| "grad_norm": 2.291966438293457, |
| "learning_rate": 1e-06, |
| "loss": 0.2315, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.005547850208044383, |
| "grad_norm": 2.938875198364258, |
| "learning_rate": 1e-06, |
| "loss": 0.2479, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.005894590846047157, |
| "grad_norm": 1.7737529277801514, |
| "learning_rate": 1e-06, |
| "loss": 0.2242, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.0062413314840499305, |
| "grad_norm": 2.331916332244873, |
| "learning_rate": 1e-06, |
| "loss": 0.2386, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.006588072122052704, |
| "grad_norm": 2.8218319416046143, |
| "learning_rate": 1e-06, |
| "loss": 0.2655, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.006934812760055479, |
| "grad_norm": 1.9200022220611572, |
| "learning_rate": 1e-06, |
| "loss": 0.2384, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.007281553398058253, |
| "grad_norm": 2.6691722869873047, |
| "learning_rate": 1e-06, |
| "loss": 0.2594, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.0076282940360610264, |
| "grad_norm": 3.2480461597442627, |
| "learning_rate": 1e-06, |
| "loss": 0.1936, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.0079750346740638, |
| "grad_norm": 4.234226226806641, |
| "learning_rate": 1e-06, |
| "loss": 0.2672, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.008321775312066574, |
| "grad_norm": 4.796227931976318, |
| "learning_rate": 1e-06, |
| "loss": 0.2711, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.008668515950069348, |
| "grad_norm": 4.371148109436035, |
| "learning_rate": 1e-06, |
| "loss": 0.2256, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.009015256588072122, |
| "grad_norm": 2.540862798690796, |
| "learning_rate": 1e-06, |
| "loss": 0.2418, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.009361997226074895, |
| "grad_norm": 1.8659073114395142, |
| "learning_rate": 1e-06, |
| "loss": 0.2439, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.009708737864077669, |
| "grad_norm": 2.3605165481567383, |
| "learning_rate": 1e-06, |
| "loss": 0.2177, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.010055478502080445, |
| "grad_norm": 2.2104761600494385, |
| "learning_rate": 1e-06, |
| "loss": 0.2504, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.010402219140083218, |
| "grad_norm": 2.1602864265441895, |
| "learning_rate": 1e-06, |
| "loss": 0.2372, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.010748959778085992, |
| "grad_norm": 2.1729562282562256, |
| "learning_rate": 1e-06, |
| "loss": 0.2562, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.011095700416088766, |
| "grad_norm": 2.284585475921631, |
| "learning_rate": 1e-06, |
| "loss": 0.2355, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.01144244105409154, |
| "grad_norm": 2.268206834793091, |
| "learning_rate": 1e-06, |
| "loss": 0.2454, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.011789181692094313, |
| "grad_norm": 2.5299553871154785, |
| "learning_rate": 1e-06, |
| "loss": 0.2471, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.012135922330097087, |
| "grad_norm": 3.3298118114471436, |
| "learning_rate": 1e-06, |
| "loss": 0.2728, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.012482662968099861, |
| "grad_norm": 4.213827133178711, |
| "learning_rate": 1e-06, |
| "loss": 0.2271, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.012829403606102635, |
| "grad_norm": 4.147632122039795, |
| "learning_rate": 1e-06, |
| "loss": 0.2474, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.013176144244105409, |
| "grad_norm": 3.017467737197876, |
| "learning_rate": 1e-06, |
| "loss": 0.2443, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.013522884882108182, |
| "grad_norm": 3.92279314994812, |
| "learning_rate": 1e-06, |
| "loss": 0.2309, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.013869625520110958, |
| "grad_norm": 2.6705775260925293, |
| "learning_rate": 1e-06, |
| "loss": 0.2122, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.014216366158113732, |
| "grad_norm": 2.2579407691955566, |
| "learning_rate": 1e-06, |
| "loss": 0.2189, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.014563106796116505, |
| "grad_norm": 3.3366434574127197, |
| "learning_rate": 1e-06, |
| "loss": 0.2268, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.014909847434119279, |
| "grad_norm": 1.8895633220672607, |
| "learning_rate": 1e-06, |
| "loss": 0.2219, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.015256588072122053, |
| "grad_norm": 4.174144268035889, |
| "learning_rate": 1e-06, |
| "loss": 0.209, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.015603328710124827, |
| "grad_norm": 2.0545594692230225, |
| "learning_rate": 1e-06, |
| "loss": 0.2464, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.0159500693481276, |
| "grad_norm": 6.63257360458374, |
| "learning_rate": 1e-06, |
| "loss": 0.2905, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.016296809986130374, |
| "grad_norm": 3.2842488288879395, |
| "learning_rate": 1e-06, |
| "loss": 0.2674, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.016643550624133148, |
| "grad_norm": 2.0572311878204346, |
| "learning_rate": 1e-06, |
| "loss": 0.237, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.01699029126213592, |
| "grad_norm": 1.9547162055969238, |
| "learning_rate": 1e-06, |
| "loss": 0.2268, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.017337031900138695, |
| "grad_norm": 2.775322914123535, |
| "learning_rate": 1e-06, |
| "loss": 0.2315, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.01768377253814147, |
| "grad_norm": 1.8670415878295898, |
| "learning_rate": 1e-06, |
| "loss": 0.21, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.018030513176144243, |
| "grad_norm": 2.3691036701202393, |
| "learning_rate": 1e-06, |
| "loss": 0.2465, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.018377253814147017, |
| "grad_norm": 4.806734561920166, |
| "learning_rate": 1e-06, |
| "loss": 0.2437, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.01872399445214979, |
| "grad_norm": 4.114093780517578, |
| "learning_rate": 1e-06, |
| "loss": 0.2402, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.019070735090152564, |
| "grad_norm": 2.5198612213134766, |
| "learning_rate": 1e-06, |
| "loss": 0.2409, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.019417475728155338, |
| "grad_norm": 2.066634178161621, |
| "learning_rate": 1e-06, |
| "loss": 0.2435, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.019764216366158115, |
| "grad_norm": 2.1365854740142822, |
| "learning_rate": 1e-06, |
| "loss": 0.2321, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.02011095700416089, |
| "grad_norm": 3.2070484161376953, |
| "learning_rate": 1e-06, |
| "loss": 0.2347, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.020457697642163663, |
| "grad_norm": 2.717198133468628, |
| "learning_rate": 1e-06, |
| "loss": 0.1966, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.020804438280166437, |
| "grad_norm": 3.56074857711792, |
| "learning_rate": 1e-06, |
| "loss": 0.2505, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.02115117891816921, |
| "grad_norm": 3.655073642730713, |
| "learning_rate": 1e-06, |
| "loss": 0.2467, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.021497919556171984, |
| "grad_norm": 3.183065414428711, |
| "learning_rate": 1e-06, |
| "loss": 0.2201, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.021844660194174758, |
| "grad_norm": 3.160886764526367, |
| "learning_rate": 1e-06, |
| "loss": 0.2187, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.022191400832177532, |
| "grad_norm": 1.6657497882843018, |
| "learning_rate": 1e-06, |
| "loss": 0.2096, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.022538141470180306, |
| "grad_norm": 1.8679834604263306, |
| "learning_rate": 1e-06, |
| "loss": 0.2478, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.02288488210818308, |
| "grad_norm": 1.8762993812561035, |
| "learning_rate": 1e-06, |
| "loss": 0.218, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.023231622746185853, |
| "grad_norm": 2.1899123191833496, |
| "learning_rate": 1e-06, |
| "loss": 0.2422, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.023578363384188627, |
| "grad_norm": 2.1151392459869385, |
| "learning_rate": 1e-06, |
| "loss": 0.267, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.0239251040221914, |
| "grad_norm": 3.3461737632751465, |
| "learning_rate": 1e-06, |
| "loss": 0.2615, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.024271844660194174, |
| "grad_norm": 1.9115928411483765, |
| "learning_rate": 1e-06, |
| "loss": 0.2385, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.024618585298196948, |
| "grad_norm": 1.9966187477111816, |
| "learning_rate": 1e-06, |
| "loss": 0.2341, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.024965325936199722, |
| "grad_norm": 4.439002990722656, |
| "learning_rate": 1e-06, |
| "loss": 0.2683, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.025312066574202496, |
| "grad_norm": 3.926988124847412, |
| "learning_rate": 1e-06, |
| "loss": 0.2489, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.02565880721220527, |
| "grad_norm": 2.2309978008270264, |
| "learning_rate": 1e-06, |
| "loss": 0.2471, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.026005547850208043, |
| "grad_norm": 3.369230031967163, |
| "learning_rate": 1e-06, |
| "loss": 0.2282, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.026352288488210817, |
| "grad_norm": 4.4944047927856445, |
| "learning_rate": 1e-06, |
| "loss": 0.227, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.02669902912621359, |
| "grad_norm": 5.281230449676514, |
| "learning_rate": 1e-06, |
| "loss": 0.2358, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.027045769764216365, |
| "grad_norm": 2.726041555404663, |
| "learning_rate": 1e-06, |
| "loss": 0.2569, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.027392510402219142, |
| "grad_norm": 2.2781665325164795, |
| "learning_rate": 1e-06, |
| "loss": 0.2436, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.027739251040221916, |
| "grad_norm": 5.479579925537109, |
| "learning_rate": 1e-06, |
| "loss": 0.2588, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.02808599167822469, |
| "grad_norm": 5.336531162261963, |
| "learning_rate": 1e-06, |
| "loss": 0.2315, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.028432732316227463, |
| "grad_norm": 1.9904639720916748, |
| "learning_rate": 1e-06, |
| "loss": 0.249, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.028779472954230237, |
| "grad_norm": 2.493849277496338, |
| "learning_rate": 1e-06, |
| "loss": 0.2586, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.02912621359223301, |
| "grad_norm": 2.0761678218841553, |
| "learning_rate": 1e-06, |
| "loss": 0.2256, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.029472954230235784, |
| "grad_norm": 3.9057183265686035, |
| "learning_rate": 1e-06, |
| "loss": 0.2605, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.029819694868238558, |
| "grad_norm": 1.8108359575271606, |
| "learning_rate": 1e-06, |
| "loss": 0.209, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.030166435506241332, |
| "grad_norm": 3.9006757736206055, |
| "learning_rate": 1e-06, |
| "loss": 0.2302, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.030513176144244106, |
| "grad_norm": 3.529353141784668, |
| "learning_rate": 1e-06, |
| "loss": 0.2497, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.03085991678224688, |
| "grad_norm": 1.9021555185317993, |
| "learning_rate": 1e-06, |
| "loss": 0.2343, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.031206657420249653, |
| "grad_norm": 2.0818121433258057, |
| "learning_rate": 1e-06, |
| "loss": 0.2296, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.03155339805825243, |
| "grad_norm": 2.9069199562072754, |
| "learning_rate": 1e-06, |
| "loss": 0.2336, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.0319001386962552, |
| "grad_norm": 2.5499114990234375, |
| "learning_rate": 1e-06, |
| "loss": 0.2326, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.032246879334257975, |
| "grad_norm": 4.565133571624756, |
| "learning_rate": 1e-06, |
| "loss": 0.2325, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.03259361997226075, |
| "grad_norm": 1.965366005897522, |
| "learning_rate": 1e-06, |
| "loss": 0.2436, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.03294036061026352, |
| "grad_norm": 3.4335970878601074, |
| "learning_rate": 1e-06, |
| "loss": 0.2517, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.033287101248266296, |
| "grad_norm": 2.7447807788848877, |
| "learning_rate": 1e-06, |
| "loss": 0.219, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.03363384188626907, |
| "grad_norm": 1.9556831121444702, |
| "learning_rate": 1e-06, |
| "loss": 0.2071, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.03398058252427184, |
| "grad_norm": 2.175020694732666, |
| "learning_rate": 1e-06, |
| "loss": 0.2481, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.03432732316227462, |
| "grad_norm": 3.7034220695495605, |
| "learning_rate": 1e-06, |
| "loss": 0.2525, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.03467406380027739, |
| "grad_norm": 4.264119625091553, |
| "learning_rate": 1e-06, |
| "loss": 0.21, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.035020804438280165, |
| "grad_norm": 4.019951820373535, |
| "learning_rate": 1e-06, |
| "loss": 0.2495, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.03536754507628294, |
| "grad_norm": 2.4060921669006348, |
| "learning_rate": 1e-06, |
| "loss": 0.2295, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.03571428571428571, |
| "grad_norm": 2.3502883911132812, |
| "learning_rate": 1e-06, |
| "loss": 0.232, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.036061026352288486, |
| "grad_norm": 4.202142715454102, |
| "learning_rate": 1e-06, |
| "loss": 0.2159, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.03640776699029126, |
| "grad_norm": 6.274311542510986, |
| "learning_rate": 1e-06, |
| "loss": 0.2825, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.036754507628294034, |
| "grad_norm": 2.756082057952881, |
| "learning_rate": 1e-06, |
| "loss": 0.251, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.03710124826629681, |
| "grad_norm": 1.8495687246322632, |
| "learning_rate": 1e-06, |
| "loss": 0.2371, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.03744798890429958, |
| "grad_norm": 2.5674338340759277, |
| "learning_rate": 1e-06, |
| "loss": 0.2157, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.037794729542302355, |
| "grad_norm": 3.304002285003662, |
| "learning_rate": 1e-06, |
| "loss": 0.2552, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.03814147018030513, |
| "grad_norm": 4.199469566345215, |
| "learning_rate": 1e-06, |
| "loss": 0.2165, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.0384882108183079, |
| "grad_norm": 2.1667439937591553, |
| "learning_rate": 1e-06, |
| "loss": 0.2639, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.038834951456310676, |
| "grad_norm": 2.8193678855895996, |
| "learning_rate": 1e-06, |
| "loss": 0.2174, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.03918169209431346, |
| "grad_norm": 1.8696953058242798, |
| "learning_rate": 1e-06, |
| "loss": 0.2057, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.03952843273231623, |
| "grad_norm": 2.3659722805023193, |
| "learning_rate": 1e-06, |
| "loss": 0.2731, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.039875173370319004, |
| "grad_norm": 1.782591700553894, |
| "learning_rate": 1e-06, |
| "loss": 0.2287, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.04022191400832178, |
| "grad_norm": 1.8214987516403198, |
| "learning_rate": 1e-06, |
| "loss": 0.2225, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.04056865464632455, |
| "grad_norm": 1.8799686431884766, |
| "learning_rate": 1e-06, |
| "loss": 0.2216, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.040915395284327326, |
| "grad_norm": 2.0166141986846924, |
| "learning_rate": 1e-06, |
| "loss": 0.2429, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.0412621359223301, |
| "grad_norm": 5.748719215393066, |
| "learning_rate": 1e-06, |
| "loss": 0.2297, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.04160887656033287, |
| "grad_norm": 2.980369806289673, |
| "learning_rate": 1e-06, |
| "loss": 0.2321, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.04195561719833565, |
| "grad_norm": 2.343538999557495, |
| "learning_rate": 1e-06, |
| "loss": 0.2129, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.04230235783633842, |
| "grad_norm": 1.8937276601791382, |
| "learning_rate": 1e-06, |
| "loss": 0.2297, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.042649098474341195, |
| "grad_norm": 7.0609354972839355, |
| "learning_rate": 1e-06, |
| "loss": 0.2407, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.04299583911234397, |
| "grad_norm": 1.7186024188995361, |
| "learning_rate": 1e-06, |
| "loss": 0.2042, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.04334257975034674, |
| "grad_norm": 2.7151777744293213, |
| "learning_rate": 1e-06, |
| "loss": 0.2323, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.043689320388349516, |
| "grad_norm": 2.631126642227173, |
| "learning_rate": 1e-06, |
| "loss": 0.258, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.04403606102635229, |
| "grad_norm": 2.087282657623291, |
| "learning_rate": 1e-06, |
| "loss": 0.2013, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.044382801664355064, |
| "grad_norm": 3.90163254737854, |
| "learning_rate": 1e-06, |
| "loss": 0.21, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.04472954230235784, |
| "grad_norm": 5.360821723937988, |
| "learning_rate": 1e-06, |
| "loss": 0.2395, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.04507628294036061, |
| "grad_norm": 2.5944528579711914, |
| "learning_rate": 1e-06, |
| "loss": 0.2365, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.045423023578363385, |
| "grad_norm": 2.6796722412109375, |
| "learning_rate": 1e-06, |
| "loss": 0.2361, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.04576976421636616, |
| "grad_norm": 2.1970760822296143, |
| "learning_rate": 1e-06, |
| "loss": 0.2208, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.04611650485436893, |
| "grad_norm": 2.5466551780700684, |
| "learning_rate": 1e-06, |
| "loss": 0.2339, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.046463245492371706, |
| "grad_norm": 4.48819637298584, |
| "learning_rate": 1e-06, |
| "loss": 0.2848, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.04680998613037448, |
| "grad_norm": 1.9005647897720337, |
| "learning_rate": 1e-06, |
| "loss": 0.21, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.047156726768377254, |
| "grad_norm": 9.14037036895752, |
| "learning_rate": 1e-06, |
| "loss": 0.2805, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.04750346740638003, |
| "grad_norm": 1.840706467628479, |
| "learning_rate": 1e-06, |
| "loss": 0.2, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.0478502080443828, |
| "grad_norm": 5.4056315422058105, |
| "learning_rate": 1e-06, |
| "loss": 0.2754, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.048196948682385575, |
| "grad_norm": 1.9696849584579468, |
| "learning_rate": 1e-06, |
| "loss": 0.1991, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.04854368932038835, |
| "grad_norm": 1.8743115663528442, |
| "learning_rate": 1e-06, |
| "loss": 0.2051, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.04889042995839112, |
| "grad_norm": 2.6441774368286133, |
| "learning_rate": 1e-06, |
| "loss": 0.2093, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.049237170596393896, |
| "grad_norm": 1.965954303741455, |
| "learning_rate": 1e-06, |
| "loss": 0.2259, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.04958391123439667, |
| "grad_norm": 3.0601775646209717, |
| "learning_rate": 1e-06, |
| "loss": 0.2273, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.049930651872399444, |
| "grad_norm": 2.259979009628296, |
| "learning_rate": 1e-06, |
| "loss": 0.2355, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.05027739251040222, |
| "grad_norm": 3.249948024749756, |
| "learning_rate": 1e-06, |
| "loss": 0.2634, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.05062413314840499, |
| "grad_norm": 4.351866245269775, |
| "learning_rate": 1e-06, |
| "loss": 0.2353, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.050970873786407765, |
| "grad_norm": 1.9071911573410034, |
| "learning_rate": 1e-06, |
| "loss": 0.2508, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.05131761442441054, |
| "grad_norm": 4.363613128662109, |
| "learning_rate": 1e-06, |
| "loss": 0.2179, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.05166435506241331, |
| "grad_norm": 3.450490713119507, |
| "learning_rate": 1e-06, |
| "loss": 0.2519, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.052011095700416086, |
| "grad_norm": 4.706002712249756, |
| "learning_rate": 1e-06, |
| "loss": 0.2135, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.05235783633841886, |
| "grad_norm": 2.632718324661255, |
| "learning_rate": 1e-06, |
| "loss": 0.2218, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.052704576976421634, |
| "grad_norm": 3.9972386360168457, |
| "learning_rate": 1e-06, |
| "loss": 0.2227, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.05305131761442441, |
| "grad_norm": 1.96170175075531, |
| "learning_rate": 1e-06, |
| "loss": 0.2189, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.05339805825242718, |
| "grad_norm": 4.996349811553955, |
| "learning_rate": 1e-06, |
| "loss": 0.2225, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.053744798890429955, |
| "grad_norm": 5.351596832275391, |
| "learning_rate": 1e-06, |
| "loss": 0.2043, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.05409153952843273, |
| "grad_norm": 2.1169140338897705, |
| "learning_rate": 1e-06, |
| "loss": 0.2483, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.0544382801664355, |
| "grad_norm": 2.441293478012085, |
| "learning_rate": 1e-06, |
| "loss": 0.216, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.054785020804438284, |
| "grad_norm": 2.3679656982421875, |
| "learning_rate": 1e-06, |
| "loss": 0.2292, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.05513176144244106, |
| "grad_norm": 5.575881004333496, |
| "learning_rate": 1e-06, |
| "loss": 0.2512, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.05547850208044383, |
| "grad_norm": 2.48142671585083, |
| "learning_rate": 1e-06, |
| "loss": 0.2249, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.055825242718446605, |
| "grad_norm": 3.078421115875244, |
| "learning_rate": 1e-06, |
| "loss": 0.2205, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.05617198335644938, |
| "grad_norm": 2.7186150550842285, |
| "learning_rate": 1e-06, |
| "loss": 0.2208, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.05651872399445215, |
| "grad_norm": 2.42346453666687, |
| "learning_rate": 1e-06, |
| "loss": 0.2404, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.056865464632454926, |
| "grad_norm": 2.095087766647339, |
| "learning_rate": 1e-06, |
| "loss": 0.2354, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.0572122052704577, |
| "grad_norm": 2.2342190742492676, |
| "learning_rate": 1e-06, |
| "loss": 0.2002, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.057558945908460474, |
| "grad_norm": 1.9590411186218262, |
| "learning_rate": 1e-06, |
| "loss": 0.2073, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.05790568654646325, |
| "grad_norm": 4.556865692138672, |
| "learning_rate": 1e-06, |
| "loss": 0.2742, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.05825242718446602, |
| "grad_norm": 2.7081010341644287, |
| "learning_rate": 1e-06, |
| "loss": 0.2455, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.058599167822468795, |
| "grad_norm": 2.4702224731445312, |
| "learning_rate": 1e-06, |
| "loss": 0.2209, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.05894590846047157, |
| "grad_norm": 2.41284441947937, |
| "learning_rate": 1e-06, |
| "loss": 0.259, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.05929264909847434, |
| "grad_norm": 3.244028091430664, |
| "learning_rate": 1e-06, |
| "loss": 0.2358, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.059639389736477116, |
| "grad_norm": 2.7794923782348633, |
| "learning_rate": 1e-06, |
| "loss": 0.1973, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.05998613037447989, |
| "grad_norm": 3.4682586193084717, |
| "learning_rate": 1e-06, |
| "loss": 0.2523, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.060332871012482664, |
| "grad_norm": 2.3559231758117676, |
| "learning_rate": 1e-06, |
| "loss": 0.2529, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.06067961165048544, |
| "grad_norm": 3.0808186531066895, |
| "learning_rate": 1e-06, |
| "loss": 0.1952, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.06102635228848821, |
| "grad_norm": 2.975972890853882, |
| "learning_rate": 1e-06, |
| "loss": 0.2226, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.061373092926490985, |
| "grad_norm": 2.133094072341919, |
| "learning_rate": 1e-06, |
| "loss": 0.2202, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.06171983356449376, |
| "grad_norm": 1.9771744012832642, |
| "learning_rate": 1e-06, |
| "loss": 0.2025, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.06206657420249653, |
| "grad_norm": 1.7458380460739136, |
| "learning_rate": 1e-06, |
| "loss": 0.1922, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.06241331484049931, |
| "grad_norm": 2.3998160362243652, |
| "learning_rate": 1e-06, |
| "loss": 0.1807, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.06276005547850208, |
| "grad_norm": 2.3493542671203613, |
| "learning_rate": 1e-06, |
| "loss": 0.2146, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.06310679611650485, |
| "grad_norm": 6.937662124633789, |
| "learning_rate": 1e-06, |
| "loss": 0.2579, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.06345353675450763, |
| "grad_norm": 1.6203336715698242, |
| "learning_rate": 1e-06, |
| "loss": 0.198, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.0638002773925104, |
| "grad_norm": 1.5239827632904053, |
| "learning_rate": 1e-06, |
| "loss": 0.189, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.06414701803051318, |
| "grad_norm": 3.796680450439453, |
| "learning_rate": 1e-06, |
| "loss": 0.2651, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.06449375866851595, |
| "grad_norm": 2.5366694927215576, |
| "learning_rate": 1e-06, |
| "loss": 0.2121, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.06484049930651872, |
| "grad_norm": 1.8307462930679321, |
| "learning_rate": 1e-06, |
| "loss": 0.2303, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.0651872399445215, |
| "grad_norm": 4.409234523773193, |
| "learning_rate": 1e-06, |
| "loss": 0.2159, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.06553398058252427, |
| "grad_norm": 4.105565547943115, |
| "learning_rate": 1e-06, |
| "loss": 0.2293, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.06588072122052704, |
| "grad_norm": 4.292323589324951, |
| "learning_rate": 1e-06, |
| "loss": 0.2063, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.06622746185852982, |
| "grad_norm": 2.141298294067383, |
| "learning_rate": 1e-06, |
| "loss": 0.1966, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.06657420249653259, |
| "grad_norm": 2.1823067665100098, |
| "learning_rate": 1e-06, |
| "loss": 0.2019, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.06692094313453537, |
| "grad_norm": 4.099167823791504, |
| "learning_rate": 1e-06, |
| "loss": 0.237, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.06726768377253814, |
| "grad_norm": 2.6066272258758545, |
| "learning_rate": 1e-06, |
| "loss": 0.2275, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.06761442441054091, |
| "grad_norm": 2.4590089321136475, |
| "learning_rate": 1e-06, |
| "loss": 0.2467, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.06796116504854369, |
| "grad_norm": 3.860490083694458, |
| "learning_rate": 1e-06, |
| "loss": 0.2552, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.06830790568654646, |
| "grad_norm": 2.6223294734954834, |
| "learning_rate": 1e-06, |
| "loss": 0.2669, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.06865464632454923, |
| "grad_norm": 3.189718723297119, |
| "learning_rate": 1e-06, |
| "loss": 0.2096, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.06900138696255201, |
| "grad_norm": 5.834605693817139, |
| "learning_rate": 1e-06, |
| "loss": 0.2272, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.06934812760055478, |
| "grad_norm": 4.342336177825928, |
| "learning_rate": 1e-06, |
| "loss": 0.205, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.06969486823855756, |
| "grad_norm": 5.050157070159912, |
| "learning_rate": 1e-06, |
| "loss": 0.2252, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.07004160887656033, |
| "grad_norm": 1.8504279851913452, |
| "learning_rate": 1e-06, |
| "loss": 0.1912, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.0703883495145631, |
| "grad_norm": 2.362457275390625, |
| "learning_rate": 1e-06, |
| "loss": 0.2248, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.07073509015256588, |
| "grad_norm": 4.938218116760254, |
| "learning_rate": 1e-06, |
| "loss": 0.2119, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.07108183079056865, |
| "grad_norm": 4.182404041290283, |
| "learning_rate": 1e-06, |
| "loss": 0.2224, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.07142857142857142, |
| "grad_norm": 1.9187567234039307, |
| "learning_rate": 1e-06, |
| "loss": 0.2193, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.0717753120665742, |
| "grad_norm": 4.429518699645996, |
| "learning_rate": 1e-06, |
| "loss": 0.2174, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.07212205270457697, |
| "grad_norm": 5.2877044677734375, |
| "learning_rate": 1e-06, |
| "loss": 0.2216, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.07246879334257975, |
| "grad_norm": 4.316680431365967, |
| "learning_rate": 1e-06, |
| "loss": 0.254, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.07281553398058252, |
| "grad_norm": 2.9150466918945312, |
| "learning_rate": 1e-06, |
| "loss": 0.2083, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.0731622746185853, |
| "grad_norm": 1.7052415609359741, |
| "learning_rate": 1e-06, |
| "loss": 0.2126, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.07350901525658807, |
| "grad_norm": 3.053804636001587, |
| "learning_rate": 1e-06, |
| "loss": 0.2239, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.07385575589459084, |
| "grad_norm": 2.949108123779297, |
| "learning_rate": 1e-06, |
| "loss": 0.2101, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.07420249653259361, |
| "grad_norm": 1.8862674236297607, |
| "learning_rate": 1e-06, |
| "loss": 0.2066, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.07454923717059639, |
| "grad_norm": 2.371164321899414, |
| "learning_rate": 1e-06, |
| "loss": 0.2532, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.07489597780859916, |
| "grad_norm": 2.709270477294922, |
| "learning_rate": 1e-06, |
| "loss": 0.1968, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.07524271844660194, |
| "grad_norm": 5.096902847290039, |
| "learning_rate": 1e-06, |
| "loss": 0.2111, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.07558945908460471, |
| "grad_norm": 2.7359046936035156, |
| "learning_rate": 1e-06, |
| "loss": 0.1927, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.07593619972260748, |
| "grad_norm": 1.800238847732544, |
| "learning_rate": 1e-06, |
| "loss": 0.1898, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.07628294036061026, |
| "grad_norm": 4.802062511444092, |
| "learning_rate": 1e-06, |
| "loss": 0.221, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.07662968099861303, |
| "grad_norm": 2.447291374206543, |
| "learning_rate": 1e-06, |
| "loss": 0.2287, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.0769764216366158, |
| "grad_norm": 7.738630771636963, |
| "learning_rate": 1e-06, |
| "loss": 0.2055, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.07732316227461858, |
| "grad_norm": 2.3890295028686523, |
| "learning_rate": 1e-06, |
| "loss": 0.2246, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.07766990291262135, |
| "grad_norm": 4.384284496307373, |
| "learning_rate": 1e-06, |
| "loss": 0.2348, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.07801664355062413, |
| "grad_norm": 2.8400914669036865, |
| "learning_rate": 1e-06, |
| "loss": 0.2089, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.07836338418862691, |
| "grad_norm": 2.7480292320251465, |
| "learning_rate": 1e-06, |
| "loss": 0.2736, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.07871012482662969, |
| "grad_norm": 5.927616596221924, |
| "learning_rate": 1e-06, |
| "loss": 0.2613, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.07905686546463246, |
| "grad_norm": 5.0669846534729, |
| "learning_rate": 1e-06, |
| "loss": 0.24, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.07940360610263524, |
| "grad_norm": 5.679924011230469, |
| "learning_rate": 1e-06, |
| "loss": 0.2188, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.07975034674063801, |
| "grad_norm": 3.200345277786255, |
| "learning_rate": 1e-06, |
| "loss": 0.2018, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.08009708737864078, |
| "grad_norm": 3.164886474609375, |
| "learning_rate": 1e-06, |
| "loss": 0.2325, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.08044382801664356, |
| "grad_norm": 3.412248373031616, |
| "learning_rate": 1e-06, |
| "loss": 0.2029, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.08079056865464633, |
| "grad_norm": 4.144454479217529, |
| "learning_rate": 1e-06, |
| "loss": 0.1915, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.0811373092926491, |
| "grad_norm": 2.3590505123138428, |
| "learning_rate": 1e-06, |
| "loss": 0.2341, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.08148404993065188, |
| "grad_norm": 1.8131296634674072, |
| "learning_rate": 1e-06, |
| "loss": 0.2044, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.08183079056865465, |
| "grad_norm": 3.05785870552063, |
| "learning_rate": 1e-06, |
| "loss": 0.2044, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.08217753120665743, |
| "grad_norm": 3.1392011642456055, |
| "learning_rate": 1e-06, |
| "loss": 0.203, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.0825242718446602, |
| "grad_norm": 2.5274972915649414, |
| "learning_rate": 1e-06, |
| "loss": 0.2357, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.08287101248266297, |
| "grad_norm": 1.6625158786773682, |
| "learning_rate": 1e-06, |
| "loss": 0.2181, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.08321775312066575, |
| "grad_norm": 1.920361876487732, |
| "learning_rate": 1e-06, |
| "loss": 0.2186, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.08356449375866852, |
| "grad_norm": 3.1764955520629883, |
| "learning_rate": 1e-06, |
| "loss": 0.1844, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.0839112343966713, |
| "grad_norm": 3.478422164916992, |
| "learning_rate": 1e-06, |
| "loss": 0.2248, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.08425797503467407, |
| "grad_norm": 2.3174679279327393, |
| "learning_rate": 1e-06, |
| "loss": 0.2345, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.08460471567267684, |
| "grad_norm": 3.9944794178009033, |
| "learning_rate": 1e-06, |
| "loss": 0.1961, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.08495145631067962, |
| "grad_norm": 4.681294918060303, |
| "learning_rate": 1e-06, |
| "loss": 0.2107, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.08529819694868239, |
| "grad_norm": 1.9410604238510132, |
| "learning_rate": 1e-06, |
| "loss": 0.2098, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.08564493758668516, |
| "grad_norm": 6.453781604766846, |
| "learning_rate": 1e-06, |
| "loss": 0.2285, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.08599167822468794, |
| "grad_norm": 1.9601918458938599, |
| "learning_rate": 1e-06, |
| "loss": 0.2293, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.08633841886269071, |
| "grad_norm": 3.6088569164276123, |
| "learning_rate": 1e-06, |
| "loss": 0.2397, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.08668515950069348, |
| "grad_norm": 6.8316826820373535, |
| "learning_rate": 1e-06, |
| "loss": 0.2361, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.08703190013869626, |
| "grad_norm": 4.1882147789001465, |
| "learning_rate": 1e-06, |
| "loss": 0.2352, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.08737864077669903, |
| "grad_norm": 2.6391122341156006, |
| "learning_rate": 1e-06, |
| "loss": 0.2207, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.0877253814147018, |
| "grad_norm": 1.6957664489746094, |
| "learning_rate": 1e-06, |
| "loss": 0.1932, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.08807212205270458, |
| "grad_norm": 2.1076745986938477, |
| "learning_rate": 1e-06, |
| "loss": 0.2082, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.08841886269070735, |
| "grad_norm": 2.3015823364257812, |
| "learning_rate": 1e-06, |
| "loss": 0.2228, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.08876560332871013, |
| "grad_norm": 4.031891822814941, |
| "learning_rate": 1e-06, |
| "loss": 0.2055, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.0891123439667129, |
| "grad_norm": 2.306821346282959, |
| "learning_rate": 1e-06, |
| "loss": 0.2286, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.08945908460471567, |
| "grad_norm": 2.1814613342285156, |
| "learning_rate": 1e-06, |
| "loss": 0.2046, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.08980582524271845, |
| "grad_norm": 5.232451438903809, |
| "learning_rate": 1e-06, |
| "loss": 0.216, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.09015256588072122, |
| "grad_norm": 7.794690132141113, |
| "learning_rate": 1e-06, |
| "loss": 0.1838, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.090499306518724, |
| "grad_norm": 3.107283115386963, |
| "learning_rate": 1e-06, |
| "loss": 0.2261, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.09084604715672677, |
| "grad_norm": 1.7422479391098022, |
| "learning_rate": 1e-06, |
| "loss": 0.2222, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.09119278779472954, |
| "grad_norm": 2.579590320587158, |
| "learning_rate": 1e-06, |
| "loss": 0.2015, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.09153952843273232, |
| "grad_norm": 3.788679838180542, |
| "learning_rate": 1e-06, |
| "loss": 0.2373, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.09188626907073509, |
| "grad_norm": 2.0979104042053223, |
| "learning_rate": 1e-06, |
| "loss": 0.2183, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.09223300970873786, |
| "grad_norm": 2.019399642944336, |
| "learning_rate": 1e-06, |
| "loss": 0.2195, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.09257975034674064, |
| "grad_norm": 2.5858969688415527, |
| "learning_rate": 1e-06, |
| "loss": 0.234, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.09292649098474341, |
| "grad_norm": 2.279453754425049, |
| "learning_rate": 1e-06, |
| "loss": 0.2312, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.09327323162274619, |
| "grad_norm": 3.3690133094787598, |
| "learning_rate": 1e-06, |
| "loss": 0.2499, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.09361997226074896, |
| "grad_norm": 1.591291904449463, |
| "learning_rate": 1e-06, |
| "loss": 0.1986, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.09396671289875173, |
| "grad_norm": 2.8157074451446533, |
| "learning_rate": 1e-06, |
| "loss": 0.2185, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.09431345353675451, |
| "grad_norm": 2.9287891387939453, |
| "learning_rate": 1e-06, |
| "loss": 0.2151, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.09466019417475728, |
| "grad_norm": 3.26141357421875, |
| "learning_rate": 1e-06, |
| "loss": 0.2295, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.09500693481276005, |
| "grad_norm": 1.793872594833374, |
| "learning_rate": 1e-06, |
| "loss": 0.2053, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.09535367545076283, |
| "grad_norm": 6.906111717224121, |
| "learning_rate": 1e-06, |
| "loss": 0.1894, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.0957004160887656, |
| "grad_norm": 5.939563751220703, |
| "learning_rate": 1e-06, |
| "loss": 0.2454, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.09604715672676838, |
| "grad_norm": 4.869255065917969, |
| "learning_rate": 1e-06, |
| "loss": 0.2029, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.09639389736477115, |
| "grad_norm": 2.0359835624694824, |
| "learning_rate": 1e-06, |
| "loss": 0.2193, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.09674063800277392, |
| "grad_norm": 1.732591152191162, |
| "learning_rate": 1e-06, |
| "loss": 0.2223, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.0970873786407767, |
| "grad_norm": 2.112185478210449, |
| "learning_rate": 1e-06, |
| "loss": 0.2261, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.09743411927877947, |
| "grad_norm": 1.9228016138076782, |
| "learning_rate": 1e-06, |
| "loss": 0.2168, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.09778085991678225, |
| "grad_norm": 2.5188448429107666, |
| "learning_rate": 1e-06, |
| "loss": 0.2526, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.09812760055478502, |
| "grad_norm": 6.440067291259766, |
| "learning_rate": 1e-06, |
| "loss": 0.1962, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.09847434119278779, |
| "grad_norm": 2.1563827991485596, |
| "learning_rate": 1e-06, |
| "loss": 0.2155, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.09882108183079057, |
| "grad_norm": 2.890427589416504, |
| "learning_rate": 1e-06, |
| "loss": 0.204, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.09916782246879334, |
| "grad_norm": 3.064075469970703, |
| "learning_rate": 1e-06, |
| "loss": 0.2124, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.09951456310679611, |
| "grad_norm": 2.013902187347412, |
| "learning_rate": 1e-06, |
| "loss": 0.2011, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.09986130374479889, |
| "grad_norm": 5.711750507354736, |
| "learning_rate": 1e-06, |
| "loss": 0.198, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.10020804438280166, |
| "grad_norm": 4.605215072631836, |
| "learning_rate": 1e-06, |
| "loss": 0.2074, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.10055478502080444, |
| "grad_norm": 3.5413124561309814, |
| "learning_rate": 1e-06, |
| "loss": 0.2516, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.10090152565880721, |
| "grad_norm": 2.2203333377838135, |
| "learning_rate": 1e-06, |
| "loss": 0.2353, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.10124826629680998, |
| "grad_norm": 3.4780688285827637, |
| "learning_rate": 1e-06, |
| "loss": 0.2278, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.10159500693481276, |
| "grad_norm": 2.2636001110076904, |
| "learning_rate": 1e-06, |
| "loss": 0.1992, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.10194174757281553, |
| "grad_norm": 4.070486545562744, |
| "learning_rate": 1e-06, |
| "loss": 0.2037, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.1022884882108183, |
| "grad_norm": 2.2457258701324463, |
| "learning_rate": 1e-06, |
| "loss": 0.2409, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.10263522884882108, |
| "grad_norm": 2.1032586097717285, |
| "learning_rate": 1e-06, |
| "loss": 0.2105, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.10298196948682385, |
| "grad_norm": 2.8881735801696777, |
| "learning_rate": 1e-06, |
| "loss": 0.2115, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.10332871012482663, |
| "grad_norm": 2.166180372238159, |
| "learning_rate": 1e-06, |
| "loss": 0.2231, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.1036754507628294, |
| "grad_norm": 2.0594778060913086, |
| "learning_rate": 1e-06, |
| "loss": 0.2409, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.10402219140083217, |
| "grad_norm": 1.8544009923934937, |
| "learning_rate": 1e-06, |
| "loss": 0.1873, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.10436893203883495, |
| "grad_norm": 1.979411005973816, |
| "learning_rate": 1e-06, |
| "loss": 0.2177, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.10471567267683772, |
| "grad_norm": 2.5190136432647705, |
| "learning_rate": 1e-06, |
| "loss": 0.2206, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.1050624133148405, |
| "grad_norm": 2.110924482345581, |
| "learning_rate": 1e-06, |
| "loss": 0.2087, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.10540915395284327, |
| "grad_norm": 5.314784526824951, |
| "learning_rate": 1e-06, |
| "loss": 0.1707, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.10575589459084604, |
| "grad_norm": 3.8186211585998535, |
| "learning_rate": 1e-06, |
| "loss": 0.2255, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.10610263522884882, |
| "grad_norm": 2.197397232055664, |
| "learning_rate": 1e-06, |
| "loss": 0.2026, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.10644937586685159, |
| "grad_norm": 3.364658832550049, |
| "learning_rate": 1e-06, |
| "loss": 0.237, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.10679611650485436, |
| "grad_norm": 3.6538567543029785, |
| "learning_rate": 1e-06, |
| "loss": 0.2135, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.10714285714285714, |
| "grad_norm": 2.080467462539673, |
| "learning_rate": 1e-06, |
| "loss": 0.2011, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.10748959778085991, |
| "grad_norm": 2.394052267074585, |
| "learning_rate": 1e-06, |
| "loss": 0.2081, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.10783633841886268, |
| "grad_norm": 3.270670175552368, |
| "learning_rate": 1e-06, |
| "loss": 0.2139, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.10818307905686546, |
| "grad_norm": 4.5176682472229, |
| "learning_rate": 1e-06, |
| "loss": 0.2214, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.10852981969486823, |
| "grad_norm": 2.9834976196289062, |
| "learning_rate": 1e-06, |
| "loss": 0.2235, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.108876560332871, |
| "grad_norm": 3.658803701400757, |
| "learning_rate": 1e-06, |
| "loss": 0.1903, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.10922330097087378, |
| "grad_norm": 2.972036600112915, |
| "learning_rate": 1e-06, |
| "loss": 0.2044, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.10957004160887657, |
| "grad_norm": 2.014233350753784, |
| "learning_rate": 1e-06, |
| "loss": 0.2177, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.10991678224687934, |
| "grad_norm": 3.3329989910125732, |
| "learning_rate": 1e-06, |
| "loss": 0.212, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.11026352288488211, |
| "grad_norm": 4.286015510559082, |
| "learning_rate": 1e-06, |
| "loss": 0.2164, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.11061026352288489, |
| "grad_norm": 2.517414093017578, |
| "learning_rate": 1e-06, |
| "loss": 0.2443, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.11095700416088766, |
| "grad_norm": 2.3605127334594727, |
| "learning_rate": 1e-06, |
| "loss": 0.2136, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.11130374479889044, |
| "grad_norm": 2.0342857837677, |
| "learning_rate": 1e-06, |
| "loss": 0.2117, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.11165048543689321, |
| "grad_norm": 3.313946485519409, |
| "learning_rate": 1e-06, |
| "loss": 0.2159, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.11199722607489598, |
| "grad_norm": 2.8215742111206055, |
| "learning_rate": 1e-06, |
| "loss": 0.1666, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.11234396671289876, |
| "grad_norm": 3.7066569328308105, |
| "learning_rate": 1e-06, |
| "loss": 0.2115, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.11269070735090153, |
| "grad_norm": 2.824295997619629, |
| "learning_rate": 1e-06, |
| "loss": 0.2068, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.1130374479889043, |
| "grad_norm": 3.302109956741333, |
| "learning_rate": 1e-06, |
| "loss": 0.2007, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.11338418862690708, |
| "grad_norm": 1.7297828197479248, |
| "learning_rate": 1e-06, |
| "loss": 0.1796, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.11373092926490985, |
| "grad_norm": 2.3774609565734863, |
| "learning_rate": 1e-06, |
| "loss": 0.197, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.11407766990291263, |
| "grad_norm": 3.876222610473633, |
| "learning_rate": 1e-06, |
| "loss": 0.2106, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.1144244105409154, |
| "grad_norm": 2.891040086746216, |
| "learning_rate": 1e-06, |
| "loss": 0.1809, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.11477115117891817, |
| "grad_norm": 3.9112064838409424, |
| "learning_rate": 1e-06, |
| "loss": 0.2031, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.11511789181692095, |
| "grad_norm": 1.772921085357666, |
| "learning_rate": 1e-06, |
| "loss": 0.2011, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.11546463245492372, |
| "grad_norm": 2.6547513008117676, |
| "learning_rate": 1e-06, |
| "loss": 0.2123, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.1158113730929265, |
| "grad_norm": 5.290209770202637, |
| "learning_rate": 1e-06, |
| "loss": 0.2275, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.11615811373092927, |
| "grad_norm": 5.404000759124756, |
| "learning_rate": 1e-06, |
| "loss": 0.2422, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.11650485436893204, |
| "grad_norm": 2.187218427658081, |
| "learning_rate": 1e-06, |
| "loss": 0.1854, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.11685159500693482, |
| "grad_norm": 3.028299570083618, |
| "learning_rate": 1e-06, |
| "loss": 0.2331, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.11719833564493759, |
| "grad_norm": 2.9846150875091553, |
| "learning_rate": 1e-06, |
| "loss": 0.2288, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.11754507628294036, |
| "grad_norm": 3.630133867263794, |
| "learning_rate": 1e-06, |
| "loss": 0.1559, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.11789181692094314, |
| "grad_norm": 6.968530654907227, |
| "learning_rate": 1e-06, |
| "loss": 0.2637, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.11823855755894591, |
| "grad_norm": 2.9122650623321533, |
| "learning_rate": 1e-06, |
| "loss": 0.1821, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.11858529819694869, |
| "grad_norm": 2.7790746688842773, |
| "learning_rate": 1e-06, |
| "loss": 0.1917, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.11893203883495146, |
| "grad_norm": 4.776673793792725, |
| "learning_rate": 1e-06, |
| "loss": 0.1928, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.11927877947295423, |
| "grad_norm": 2.7039010524749756, |
| "learning_rate": 1e-06, |
| "loss": 0.2258, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.119625520110957, |
| "grad_norm": 2.733076333999634, |
| "learning_rate": 1e-06, |
| "loss": 0.1988, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.11997226074895978, |
| "grad_norm": 3.1806342601776123, |
| "learning_rate": 1e-06, |
| "loss": 0.2147, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.12031900138696255, |
| "grad_norm": 2.9872305393218994, |
| "learning_rate": 1e-06, |
| "loss": 0.1832, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.12066574202496533, |
| "grad_norm": 6.822136402130127, |
| "learning_rate": 1e-06, |
| "loss": 0.2202, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.1210124826629681, |
| "grad_norm": 2.3304481506347656, |
| "learning_rate": 1e-06, |
| "loss": 0.2352, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.12135922330097088, |
| "grad_norm": 2.4243297576904297, |
| "learning_rate": 1e-06, |
| "loss": 0.2004, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.12170596393897365, |
| "grad_norm": 3.050774097442627, |
| "learning_rate": 1e-06, |
| "loss": 0.2103, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.12205270457697642, |
| "grad_norm": 2.177351951599121, |
| "learning_rate": 1e-06, |
| "loss": 0.2416, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.1223994452149792, |
| "grad_norm": 4.790650844573975, |
| "learning_rate": 1e-06, |
| "loss": 0.2174, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.12274618585298197, |
| "grad_norm": 1.6365033388137817, |
| "learning_rate": 1e-06, |
| "loss": 0.1874, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.12309292649098474, |
| "grad_norm": 3.9262518882751465, |
| "learning_rate": 1e-06, |
| "loss": 0.2175, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.12343966712898752, |
| "grad_norm": 1.7626748085021973, |
| "learning_rate": 1e-06, |
| "loss": 0.2129, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.12378640776699029, |
| "grad_norm": 1.7612718343734741, |
| "learning_rate": 1e-06, |
| "loss": 0.1816, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.12413314840499307, |
| "grad_norm": 1.9998083114624023, |
| "learning_rate": 1e-06, |
| "loss": 0.1955, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.12447988904299584, |
| "grad_norm": 1.9432895183563232, |
| "learning_rate": 1e-06, |
| "loss": 0.1793, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.12482662968099861, |
| "grad_norm": 2.2494137287139893, |
| "learning_rate": 1e-06, |
| "loss": 0.2182, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.1251733703190014, |
| "grad_norm": 2.6575872898101807, |
| "learning_rate": 1e-06, |
| "loss": 0.2126, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.12552011095700416, |
| "grad_norm": 2.1404902935028076, |
| "learning_rate": 1e-06, |
| "loss": 0.1727, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.12586685159500693, |
| "grad_norm": 3.4852030277252197, |
| "learning_rate": 1e-06, |
| "loss": 0.2132, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.1262135922330097, |
| "grad_norm": 1.9237537384033203, |
| "learning_rate": 1e-06, |
| "loss": 0.1882, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.12656033287101248, |
| "grad_norm": 2.6047005653381348, |
| "learning_rate": 1e-06, |
| "loss": 0.2276, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.12690707350901526, |
| "grad_norm": 4.0389838218688965, |
| "learning_rate": 1e-06, |
| "loss": 0.2294, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.12725381414701803, |
| "grad_norm": 2.9058427810668945, |
| "learning_rate": 1e-06, |
| "loss": 0.1704, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.1276005547850208, |
| "grad_norm": 2.214973211288452, |
| "learning_rate": 1e-06, |
| "loss": 0.1933, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.12794729542302358, |
| "grad_norm": 2.8029608726501465, |
| "learning_rate": 1e-06, |
| "loss": 0.238, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.12829403606102635, |
| "grad_norm": 3.4244496822357178, |
| "learning_rate": 1e-06, |
| "loss": 0.2404, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.12864077669902912, |
| "grad_norm": 3.9695568084716797, |
| "learning_rate": 1e-06, |
| "loss": 0.2299, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.1289875173370319, |
| "grad_norm": 2.100534439086914, |
| "learning_rate": 1e-06, |
| "loss": 0.1878, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.12933425797503467, |
| "grad_norm": 1.942069172859192, |
| "learning_rate": 1e-06, |
| "loss": 0.227, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.12968099861303745, |
| "grad_norm": 2.7720251083374023, |
| "learning_rate": 1e-06, |
| "loss": 0.2054, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.13002773925104022, |
| "grad_norm": 1.916839599609375, |
| "learning_rate": 1e-06, |
| "loss": 0.2135, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.130374479889043, |
| "grad_norm": 2.759535312652588, |
| "learning_rate": 1e-06, |
| "loss": 0.2327, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.13072122052704577, |
| "grad_norm": 2.059061288833618, |
| "learning_rate": 1e-06, |
| "loss": 0.199, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.13106796116504854, |
| "grad_norm": 1.892815113067627, |
| "learning_rate": 1e-06, |
| "loss": 0.1972, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.13141470180305131, |
| "grad_norm": 2.945600748062134, |
| "learning_rate": 1e-06, |
| "loss": 0.2046, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.1317614424410541, |
| "grad_norm": 2.75258207321167, |
| "learning_rate": 1e-06, |
| "loss": 0.2171, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.13210818307905686, |
| "grad_norm": 3.2966949939727783, |
| "learning_rate": 1e-06, |
| "loss": 0.2423, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.13245492371705964, |
| "grad_norm": 2.6719970703125, |
| "learning_rate": 1e-06, |
| "loss": 0.1777, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.1328016643550624, |
| "grad_norm": 3.278883695602417, |
| "learning_rate": 1e-06, |
| "loss": 0.16, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.13314840499306518, |
| "grad_norm": 3.322058916091919, |
| "learning_rate": 1e-06, |
| "loss": 0.1984, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.13349514563106796, |
| "grad_norm": 2.3730714321136475, |
| "learning_rate": 1e-06, |
| "loss": 0.1836, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.13384188626907073, |
| "grad_norm": 1.6652333736419678, |
| "learning_rate": 1e-06, |
| "loss": 0.1896, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.1341886269070735, |
| "grad_norm": 3.528460741043091, |
| "learning_rate": 1e-06, |
| "loss": 0.2363, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.13453536754507628, |
| "grad_norm": 2.009749174118042, |
| "learning_rate": 1e-06, |
| "loss": 0.1814, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.13488210818307905, |
| "grad_norm": 2.015735149383545, |
| "learning_rate": 1e-06, |
| "loss": 0.2067, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.13522884882108183, |
| "grad_norm": 1.9921092987060547, |
| "learning_rate": 1e-06, |
| "loss": 0.1973, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.1355755894590846, |
| "grad_norm": 2.4047930240631104, |
| "learning_rate": 1e-06, |
| "loss": 0.1924, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.13592233009708737, |
| "grad_norm": 2.010021924972534, |
| "learning_rate": 1e-06, |
| "loss": 0.1744, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.13626907073509015, |
| "grad_norm": 2.5969536304473877, |
| "learning_rate": 1e-06, |
| "loss": 0.2092, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.13661581137309292, |
| "grad_norm": 3.3786892890930176, |
| "learning_rate": 1e-06, |
| "loss": 0.2044, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.1369625520110957, |
| "grad_norm": 1.805240511894226, |
| "learning_rate": 1e-06, |
| "loss": 0.2015, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.13730929264909847, |
| "grad_norm": 1.8784292936325073, |
| "learning_rate": 1e-06, |
| "loss": 0.2059, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.13765603328710124, |
| "grad_norm": 2.3622231483459473, |
| "learning_rate": 1e-06, |
| "loss": 0.1786, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.13800277392510402, |
| "grad_norm": 1.7961145639419556, |
| "learning_rate": 1e-06, |
| "loss": 0.2168, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.1383495145631068, |
| "grad_norm": 1.9970123767852783, |
| "learning_rate": 1e-06, |
| "loss": 0.1922, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.13869625520110956, |
| "grad_norm": 5.615191459655762, |
| "learning_rate": 1e-06, |
| "loss": 0.2319, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.13904299583911234, |
| "grad_norm": 2.6220815181732178, |
| "learning_rate": 1e-06, |
| "loss": 0.1899, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.1393897364771151, |
| "grad_norm": 3.2528562545776367, |
| "learning_rate": 1e-06, |
| "loss": 0.1744, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.13973647711511789, |
| "grad_norm": 2.6345772743225098, |
| "learning_rate": 1e-06, |
| "loss": 0.2226, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.14008321775312066, |
| "grad_norm": 5.005814075469971, |
| "learning_rate": 1e-06, |
| "loss": 0.2174, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.14042995839112343, |
| "grad_norm": 2.4105288982391357, |
| "learning_rate": 1e-06, |
| "loss": 0.2154, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.1407766990291262, |
| "grad_norm": 3.282689332962036, |
| "learning_rate": 1e-06, |
| "loss": 0.246, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.14112343966712898, |
| "grad_norm": 1.84548020362854, |
| "learning_rate": 1e-06, |
| "loss": 0.204, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.14147018030513175, |
| "grad_norm": 3.254951000213623, |
| "learning_rate": 1e-06, |
| "loss": 0.2355, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.14181692094313453, |
| "grad_norm": 2.499875068664551, |
| "learning_rate": 1e-06, |
| "loss": 0.2076, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.1421636615811373, |
| "grad_norm": 1.6304558515548706, |
| "learning_rate": 1e-06, |
| "loss": 0.1925, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.14251040221914008, |
| "grad_norm": 2.8476548194885254, |
| "learning_rate": 1e-06, |
| "loss": 0.2329, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.14285714285714285, |
| "grad_norm": 2.403111696243286, |
| "learning_rate": 1e-06, |
| "loss": 0.2016, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.14320388349514562, |
| "grad_norm": 2.050206422805786, |
| "learning_rate": 1e-06, |
| "loss": 0.227, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.1435506241331484, |
| "grad_norm": 3.8210604190826416, |
| "learning_rate": 1e-06, |
| "loss": 0.1825, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.14389736477115117, |
| "grad_norm": 2.2794923782348633, |
| "learning_rate": 1e-06, |
| "loss": 0.1822, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.14424410540915394, |
| "grad_norm": 2.152036666870117, |
| "learning_rate": 1e-06, |
| "loss": 0.2087, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.14459084604715672, |
| "grad_norm": 2.4530344009399414, |
| "learning_rate": 1e-06, |
| "loss": 0.2381, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.1449375866851595, |
| "grad_norm": 2.0212299823760986, |
| "learning_rate": 1e-06, |
| "loss": 0.2166, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.14528432732316227, |
| "grad_norm": 5.47108268737793, |
| "learning_rate": 1e-06, |
| "loss": 0.1952, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.14563106796116504, |
| "grad_norm": 2.0776853561401367, |
| "learning_rate": 1e-06, |
| "loss": 0.2232, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.1459778085991678, |
| "grad_norm": 5.302248477935791, |
| "learning_rate": 1e-06, |
| "loss": 0.1972, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.1463245492371706, |
| "grad_norm": 6.040057182312012, |
| "learning_rate": 1e-06, |
| "loss": 0.236, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.14667128987517336, |
| "grad_norm": 2.0244548320770264, |
| "learning_rate": 1e-06, |
| "loss": 0.189, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.14701803051317613, |
| "grad_norm": 1.9456743001937866, |
| "learning_rate": 1e-06, |
| "loss": 0.2195, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.1473647711511789, |
| "grad_norm": 2.1498379707336426, |
| "learning_rate": 1e-06, |
| "loss": 0.1891, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.14771151178918168, |
| "grad_norm": 2.6949310302734375, |
| "learning_rate": 1e-06, |
| "loss": 0.1933, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.14805825242718446, |
| "grad_norm": 2.2090682983398438, |
| "learning_rate": 1e-06, |
| "loss": 0.2001, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.14840499306518723, |
| "grad_norm": 2.047943592071533, |
| "learning_rate": 1e-06, |
| "loss": 0.2008, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.14875173370319, |
| "grad_norm": 3.49019718170166, |
| "learning_rate": 1e-06, |
| "loss": 0.25, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.14909847434119278, |
| "grad_norm": 2.07293963432312, |
| "learning_rate": 1e-06, |
| "loss": 0.176, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.14944521497919555, |
| "grad_norm": 1.9637081623077393, |
| "learning_rate": 1e-06, |
| "loss": 0.1918, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.14979195561719832, |
| "grad_norm": 4.7957658767700195, |
| "learning_rate": 1e-06, |
| "loss": 0.2399, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.1501386962552011, |
| "grad_norm": 2.5030736923217773, |
| "learning_rate": 1e-06, |
| "loss": 0.1782, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.15048543689320387, |
| "grad_norm": 1.744457483291626, |
| "learning_rate": 1e-06, |
| "loss": 0.1907, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.15083217753120665, |
| "grad_norm": 2.6001105308532715, |
| "learning_rate": 1e-06, |
| "loss": 0.1946, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.15117891816920942, |
| "grad_norm": 4.785390377044678, |
| "learning_rate": 1e-06, |
| "loss": 0.2152, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.1515256588072122, |
| "grad_norm": 1.5918031930923462, |
| "learning_rate": 1e-06, |
| "loss": 0.1817, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.15187239944521497, |
| "grad_norm": 2.5173332691192627, |
| "learning_rate": 1e-06, |
| "loss": 0.2512, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.15221914008321774, |
| "grad_norm": 2.2436468601226807, |
| "learning_rate": 1e-06, |
| "loss": 0.2083, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.15256588072122051, |
| "grad_norm": 5.61021089553833, |
| "learning_rate": 1e-06, |
| "loss": 0.2143, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.1529126213592233, |
| "grad_norm": 3.092174768447876, |
| "learning_rate": 1e-06, |
| "loss": 0.2081, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.15325936199722606, |
| "grad_norm": 1.9928513765335083, |
| "learning_rate": 1e-06, |
| "loss": 0.1816, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.15360610263522884, |
| "grad_norm": 2.1547915935516357, |
| "learning_rate": 1e-06, |
| "loss": 0.1919, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.1539528432732316, |
| "grad_norm": 4.531169414520264, |
| "learning_rate": 1e-06, |
| "loss": 0.2085, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.15429958391123438, |
| "grad_norm": 1.7542948722839355, |
| "learning_rate": 1e-06, |
| "loss": 0.1803, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.15464632454923716, |
| "grad_norm": 2.1614997386932373, |
| "learning_rate": 1e-06, |
| "loss": 0.1946, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.15499306518723993, |
| "grad_norm": 5.018011093139648, |
| "learning_rate": 1e-06, |
| "loss": 0.1974, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.1553398058252427, |
| "grad_norm": 2.907313346862793, |
| "learning_rate": 1e-06, |
| "loss": 0.1916, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.15568654646324548, |
| "grad_norm": 2.7045724391937256, |
| "learning_rate": 1e-06, |
| "loss": 0.1847, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.15603328710124825, |
| "grad_norm": 1.8427867889404297, |
| "learning_rate": 1e-06, |
| "loss": 0.1918, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.15638002773925105, |
| "grad_norm": 2.3798131942749023, |
| "learning_rate": 1e-06, |
| "loss": 0.2125, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.15672676837725383, |
| "grad_norm": 3.0421640872955322, |
| "learning_rate": 1e-06, |
| "loss": 0.1729, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.1570735090152566, |
| "grad_norm": 2.487687110900879, |
| "learning_rate": 1e-06, |
| "loss": 0.1791, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.15742024965325938, |
| "grad_norm": 3.9708778858184814, |
| "learning_rate": 1e-06, |
| "loss": 0.1902, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.15776699029126215, |
| "grad_norm": 2.342562437057495, |
| "learning_rate": 1e-06, |
| "loss": 0.1757, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.15811373092926492, |
| "grad_norm": 3.1214346885681152, |
| "learning_rate": 1e-06, |
| "loss": 0.1758, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.1584604715672677, |
| "grad_norm": 3.499249219894409, |
| "learning_rate": 1e-06, |
| "loss": 0.1947, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.15880721220527047, |
| "grad_norm": 1.9343475103378296, |
| "learning_rate": 1e-06, |
| "loss": 0.1835, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.15915395284327324, |
| "grad_norm": 3.174394130706787, |
| "learning_rate": 1e-06, |
| "loss": 0.2083, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.15950069348127602, |
| "grad_norm": 1.8774772882461548, |
| "learning_rate": 1e-06, |
| "loss": 0.1937, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.1598474341192788, |
| "grad_norm": 1.7514275312423706, |
| "learning_rate": 1e-06, |
| "loss": 0.1865, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.16019417475728157, |
| "grad_norm": 2.4523355960845947, |
| "learning_rate": 1e-06, |
| "loss": 0.1719, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.16054091539528434, |
| "grad_norm": 5.636368274688721, |
| "learning_rate": 1e-06, |
| "loss": 0.1751, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.1608876560332871, |
| "grad_norm": 2.248699426651001, |
| "learning_rate": 1e-06, |
| "loss": 0.2034, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.1612343966712899, |
| "grad_norm": 1.9587477445602417, |
| "learning_rate": 1e-06, |
| "loss": 0.1991, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.16158113730929266, |
| "grad_norm": 1.5505951642990112, |
| "learning_rate": 1e-06, |
| "loss": 0.1662, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.16192787794729543, |
| "grad_norm": 1.778409719467163, |
| "learning_rate": 1e-06, |
| "loss": 0.206, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.1622746185852982, |
| "grad_norm": 3.1874828338623047, |
| "learning_rate": 1e-06, |
| "loss": 0.1773, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.16262135922330098, |
| "grad_norm": 3.800231695175171, |
| "learning_rate": 1e-06, |
| "loss": 0.209, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.16296809986130376, |
| "grad_norm": 2.2624082565307617, |
| "learning_rate": 1e-06, |
| "loss": 0.1968, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.16331484049930653, |
| "grad_norm": 2.0975418090820312, |
| "learning_rate": 1e-06, |
| "loss": 0.235, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.1636615811373093, |
| "grad_norm": 5.558737277984619, |
| "learning_rate": 1e-06, |
| "loss": 0.1581, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.16400832177531208, |
| "grad_norm": 3.688441514968872, |
| "learning_rate": 1e-06, |
| "loss": 0.2038, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.16435506241331485, |
| "grad_norm": 1.7553553581237793, |
| "learning_rate": 1e-06, |
| "loss": 0.178, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.16470180305131762, |
| "grad_norm": 4.0227952003479, |
| "learning_rate": 1e-06, |
| "loss": 0.1732, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.1650485436893204, |
| "grad_norm": 1.9997745752334595, |
| "learning_rate": 1e-06, |
| "loss": 0.2068, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.16539528432732317, |
| "grad_norm": 3.5058529376983643, |
| "learning_rate": 1e-06, |
| "loss": 0.2066, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.16574202496532595, |
| "grad_norm": 3.745401382446289, |
| "learning_rate": 1e-06, |
| "loss": 0.2022, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.16608876560332872, |
| "grad_norm": 1.898235559463501, |
| "learning_rate": 1e-06, |
| "loss": 0.1787, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.1664355062413315, |
| "grad_norm": 2.2373104095458984, |
| "learning_rate": 1e-06, |
| "loss": 0.202, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.16678224687933427, |
| "grad_norm": 3.4441022872924805, |
| "learning_rate": 1e-06, |
| "loss": 0.2223, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.16712898751733704, |
| "grad_norm": 2.0082736015319824, |
| "learning_rate": 1e-06, |
| "loss": 0.1436, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.16747572815533981, |
| "grad_norm": 7.100375175476074, |
| "learning_rate": 1e-06, |
| "loss": 0.2622, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.1678224687933426, |
| "grad_norm": 3.85292649269104, |
| "learning_rate": 1e-06, |
| "loss": 0.2141, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.16816920943134536, |
| "grad_norm": 2.212933301925659, |
| "learning_rate": 1e-06, |
| "loss": 0.2052, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.16851595006934814, |
| "grad_norm": 2.0661942958831787, |
| "learning_rate": 1e-06, |
| "loss": 0.1676, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.1688626907073509, |
| "grad_norm": 4.262602806091309, |
| "learning_rate": 1e-06, |
| "loss": 0.2382, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.16920943134535368, |
| "grad_norm": 2.734023094177246, |
| "learning_rate": 1e-06, |
| "loss": 0.1833, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.16955617198335646, |
| "grad_norm": 1.9461660385131836, |
| "learning_rate": 1e-06, |
| "loss": 0.1833, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.16990291262135923, |
| "grad_norm": 2.6612391471862793, |
| "learning_rate": 1e-06, |
| "loss": 0.1996, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.170249653259362, |
| "grad_norm": 2.2470898628234863, |
| "learning_rate": 1e-06, |
| "loss": 0.1882, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.17059639389736478, |
| "grad_norm": 4.1081743240356445, |
| "learning_rate": 1e-06, |
| "loss": 0.1783, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.17094313453536755, |
| "grad_norm": 2.1462888717651367, |
| "learning_rate": 1e-06, |
| "loss": 0.2158, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.17128987517337033, |
| "grad_norm": 1.8906127214431763, |
| "learning_rate": 1e-06, |
| "loss": 0.196, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.1716366158113731, |
| "grad_norm": 3.4147090911865234, |
| "learning_rate": 1e-06, |
| "loss": 0.2561, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.17198335644937587, |
| "grad_norm": 3.48602032661438, |
| "learning_rate": 1e-06, |
| "loss": 0.17, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.17233009708737865, |
| "grad_norm": 1.873856544494629, |
| "learning_rate": 1e-06, |
| "loss": 0.1979, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.17267683772538142, |
| "grad_norm": 3.3756508827209473, |
| "learning_rate": 1e-06, |
| "loss": 0.1932, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.1730235783633842, |
| "grad_norm": 1.7137960195541382, |
| "learning_rate": 1e-06, |
| "loss": 0.1957, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.17337031900138697, |
| "grad_norm": 2.221604347229004, |
| "learning_rate": 1e-06, |
| "loss": 0.2075, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.17371705963938974, |
| "grad_norm": 1.7929978370666504, |
| "learning_rate": 1e-06, |
| "loss": 0.179, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.17406380027739252, |
| "grad_norm": 2.439054250717163, |
| "learning_rate": 1e-06, |
| "loss": 0.1962, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.1744105409153953, |
| "grad_norm": 2.1946470737457275, |
| "learning_rate": 1e-06, |
| "loss": 0.1734, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.17475728155339806, |
| "grad_norm": 2.1295015811920166, |
| "learning_rate": 1e-06, |
| "loss": 0.1943, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.17510402219140084, |
| "grad_norm": 3.222607135772705, |
| "learning_rate": 1e-06, |
| "loss": 0.192, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.1754507628294036, |
| "grad_norm": 3.0692672729492188, |
| "learning_rate": 1e-06, |
| "loss": 0.1785, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.17579750346740639, |
| "grad_norm": 3.299917459487915, |
| "learning_rate": 1e-06, |
| "loss": 0.1768, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.17614424410540916, |
| "grad_norm": 2.0762133598327637, |
| "learning_rate": 1e-06, |
| "loss": 0.1948, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.17649098474341193, |
| "grad_norm": 2.0623905658721924, |
| "learning_rate": 1e-06, |
| "loss": 0.2167, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.1768377253814147, |
| "grad_norm": 2.0946035385131836, |
| "learning_rate": 1e-06, |
| "loss": 0.2028, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.17718446601941748, |
| "grad_norm": 1.861733078956604, |
| "learning_rate": 1e-06, |
| "loss": 0.2162, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.17753120665742025, |
| "grad_norm": 2.4840567111968994, |
| "learning_rate": 1e-06, |
| "loss": 0.1863, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.17787794729542303, |
| "grad_norm": 1.8076410293579102, |
| "learning_rate": 1e-06, |
| "loss": 0.2008, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.1782246879334258, |
| "grad_norm": 2.526137351989746, |
| "learning_rate": 1e-06, |
| "loss": 0.1904, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.17857142857142858, |
| "grad_norm": 1.856165885925293, |
| "learning_rate": 1e-06, |
| "loss": 0.2123, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.17891816920943135, |
| "grad_norm": 3.9912686347961426, |
| "learning_rate": 1e-06, |
| "loss": 0.1335, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.17926490984743412, |
| "grad_norm": 2.328289270401001, |
| "learning_rate": 1e-06, |
| "loss": 0.2084, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.1796116504854369, |
| "grad_norm": 1.7976534366607666, |
| "learning_rate": 1e-06, |
| "loss": 0.1879, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.17995839112343967, |
| "grad_norm": 2.629750967025757, |
| "learning_rate": 1e-06, |
| "loss": 0.2154, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.18030513176144244, |
| "grad_norm": 2.200639009475708, |
| "learning_rate": 1e-06, |
| "loss": 0.1987, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.18065187239944522, |
| "grad_norm": 2.636253833770752, |
| "learning_rate": 1e-06, |
| "loss": 0.2092, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.180998613037448, |
| "grad_norm": 2.329237222671509, |
| "learning_rate": 1e-06, |
| "loss": 0.1865, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.18134535367545077, |
| "grad_norm": 3.2682480812072754, |
| "learning_rate": 1e-06, |
| "loss": 0.2204, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.18169209431345354, |
| "grad_norm": 2.117788791656494, |
| "learning_rate": 1e-06, |
| "loss": 0.1871, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.1820388349514563, |
| "grad_norm": 1.9039613008499146, |
| "learning_rate": 1e-06, |
| "loss": 0.1856, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.1823855755894591, |
| "grad_norm": 2.3658318519592285, |
| "learning_rate": 1e-06, |
| "loss": 0.1799, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.18273231622746186, |
| "grad_norm": 3.818437337875366, |
| "learning_rate": 1e-06, |
| "loss": 0.1944, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.18307905686546463, |
| "grad_norm": 2.8354179859161377, |
| "learning_rate": 1e-06, |
| "loss": 0.1592, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.1834257975034674, |
| "grad_norm": 1.872366189956665, |
| "learning_rate": 1e-06, |
| "loss": 0.1782, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.18377253814147018, |
| "grad_norm": 2.1778335571289062, |
| "learning_rate": 1e-06, |
| "loss": 0.1787, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.18411927877947296, |
| "grad_norm": 2.6627323627471924, |
| "learning_rate": 1e-06, |
| "loss": 0.1775, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.18446601941747573, |
| "grad_norm": 2.1031336784362793, |
| "learning_rate": 1e-06, |
| "loss": 0.2051, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.1848127600554785, |
| "grad_norm": 4.2585272789001465, |
| "learning_rate": 1e-06, |
| "loss": 0.2088, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.18515950069348128, |
| "grad_norm": 3.1983723640441895, |
| "learning_rate": 1e-06, |
| "loss": 0.177, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.18550624133148405, |
| "grad_norm": 2.121945381164551, |
| "learning_rate": 1e-06, |
| "loss": 0.212, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.18585298196948682, |
| "grad_norm": 4.327321529388428, |
| "learning_rate": 1e-06, |
| "loss": 0.2301, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.1861997226074896, |
| "grad_norm": 3.546032190322876, |
| "learning_rate": 1e-06, |
| "loss": 0.2165, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.18654646324549237, |
| "grad_norm": 2.3898680210113525, |
| "learning_rate": 1e-06, |
| "loss": 0.1892, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.18689320388349515, |
| "grad_norm": 2.9088211059570312, |
| "learning_rate": 1e-06, |
| "loss": 0.2018, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.18723994452149792, |
| "grad_norm": 2.1512014865875244, |
| "learning_rate": 1e-06, |
| "loss": 0.2037, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.1875866851595007, |
| "grad_norm": 2.608860731124878, |
| "learning_rate": 1e-06, |
| "loss": 0.1905, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.18793342579750347, |
| "grad_norm": 2.0679538249969482, |
| "learning_rate": 1e-06, |
| "loss": 0.1969, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.18828016643550624, |
| "grad_norm": 2.928675889968872, |
| "learning_rate": 1e-06, |
| "loss": 0.2332, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.18862690707350901, |
| "grad_norm": 6.560424327850342, |
| "learning_rate": 1e-06, |
| "loss": 0.2278, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.1889736477115118, |
| "grad_norm": 1.9088329076766968, |
| "learning_rate": 1e-06, |
| "loss": 0.2027, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.18932038834951456, |
| "grad_norm": 2.4593074321746826, |
| "learning_rate": 1e-06, |
| "loss": 0.1856, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.18966712898751734, |
| "grad_norm": 4.2560858726501465, |
| "learning_rate": 1e-06, |
| "loss": 0.1764, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.1900138696255201, |
| "grad_norm": 1.7965539693832397, |
| "learning_rate": 1e-06, |
| "loss": 0.1932, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.19036061026352288, |
| "grad_norm": 1.9651305675506592, |
| "learning_rate": 1e-06, |
| "loss": 0.2102, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.19070735090152566, |
| "grad_norm": 1.5734628438949585, |
| "learning_rate": 1e-06, |
| "loss": 0.1769, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.19105409153952843, |
| "grad_norm": 3.6712100505828857, |
| "learning_rate": 1e-06, |
| "loss": 0.2034, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.1914008321775312, |
| "grad_norm": 4.593487739562988, |
| "learning_rate": 1e-06, |
| "loss": 0.1643, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.19174757281553398, |
| "grad_norm": 2.358043909072876, |
| "learning_rate": 1e-06, |
| "loss": 0.1652, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.19209431345353675, |
| "grad_norm": 2.64416241645813, |
| "learning_rate": 1e-06, |
| "loss": 0.239, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.19244105409153953, |
| "grad_norm": 4.924618721008301, |
| "learning_rate": 1e-06, |
| "loss": 0.1842, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.1927877947295423, |
| "grad_norm": 2.018336534500122, |
| "learning_rate": 1e-06, |
| "loss": 0.2024, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.19313453536754507, |
| "grad_norm": 2.0920143127441406, |
| "learning_rate": 1e-06, |
| "loss": 0.2031, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.19348127600554785, |
| "grad_norm": 1.8479957580566406, |
| "learning_rate": 1e-06, |
| "loss": 0.1916, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.19382801664355062, |
| "grad_norm": 5.079397201538086, |
| "learning_rate": 1e-06, |
| "loss": 0.2203, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.1941747572815534, |
| "grad_norm": 4.152866363525391, |
| "learning_rate": 1e-06, |
| "loss": 0.1936, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.19452149791955617, |
| "grad_norm": 1.6684824228286743, |
| "learning_rate": 1e-06, |
| "loss": 0.163, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.19486823855755894, |
| "grad_norm": 2.2925472259521484, |
| "learning_rate": 1e-06, |
| "loss": 0.1826, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.19521497919556172, |
| "grad_norm": 1.73776113986969, |
| "learning_rate": 1e-06, |
| "loss": 0.175, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.1955617198335645, |
| "grad_norm": 6.04637336730957, |
| "learning_rate": 1e-06, |
| "loss": 0.2135, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.19590846047156726, |
| "grad_norm": 2.765383005142212, |
| "learning_rate": 1e-06, |
| "loss": 0.1885, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.19625520110957004, |
| "grad_norm": 2.205034017562866, |
| "learning_rate": 1e-06, |
| "loss": 0.1965, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.1966019417475728, |
| "grad_norm": 2.0300064086914062, |
| "learning_rate": 1e-06, |
| "loss": 0.206, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.19694868238557559, |
| "grad_norm": 1.9674218893051147, |
| "learning_rate": 1e-06, |
| "loss": 0.162, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.19729542302357836, |
| "grad_norm": 2.875797986984253, |
| "learning_rate": 1e-06, |
| "loss": 0.2251, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.19764216366158113, |
| "grad_norm": 1.855223536491394, |
| "learning_rate": 1e-06, |
| "loss": 0.2211, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.1979889042995839, |
| "grad_norm": 2.8994317054748535, |
| "learning_rate": 1e-06, |
| "loss": 0.206, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.19833564493758668, |
| "grad_norm": 2.2230451107025146, |
| "learning_rate": 1e-06, |
| "loss": 0.1947, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.19868238557558945, |
| "grad_norm": 5.828412055969238, |
| "learning_rate": 1e-06, |
| "loss": 0.21, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.19902912621359223, |
| "grad_norm": 2.926950454711914, |
| "learning_rate": 1e-06, |
| "loss": 0.2018, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.199375866851595, |
| "grad_norm": 3.7960805892944336, |
| "learning_rate": 1e-06, |
| "loss": 0.188, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.19972260748959778, |
| "grad_norm": 1.8869881629943848, |
| "learning_rate": 1e-06, |
| "loss": 0.2025, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.20006934812760055, |
| "grad_norm": 3.9236044883728027, |
| "learning_rate": 1e-06, |
| "loss": 0.1571, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.20041608876560332, |
| "grad_norm": 1.937950849533081, |
| "learning_rate": 1e-06, |
| "loss": 0.1944, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.2007628294036061, |
| "grad_norm": 2.139031410217285, |
| "learning_rate": 1e-06, |
| "loss": 0.1991, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.20110957004160887, |
| "grad_norm": 4.336677551269531, |
| "learning_rate": 1e-06, |
| "loss": 0.2159, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.20145631067961164, |
| "grad_norm": 3.819993495941162, |
| "learning_rate": 1e-06, |
| "loss": 0.1833, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.20180305131761442, |
| "grad_norm": 1.7961671352386475, |
| "learning_rate": 1e-06, |
| "loss": 0.1807, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.2021497919556172, |
| "grad_norm": 2.0944950580596924, |
| "learning_rate": 1e-06, |
| "loss": 0.2336, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.20249653259361997, |
| "grad_norm": 2.7267916202545166, |
| "learning_rate": 1e-06, |
| "loss": 0.2282, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.20284327323162274, |
| "grad_norm": 1.8287086486816406, |
| "learning_rate": 1e-06, |
| "loss": 0.1684, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.2031900138696255, |
| "grad_norm": 3.0344362258911133, |
| "learning_rate": 1e-06, |
| "loss": 0.1965, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.2035367545076283, |
| "grad_norm": 4.7607340812683105, |
| "learning_rate": 1e-06, |
| "loss": 0.193, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.20388349514563106, |
| "grad_norm": 1.6755648851394653, |
| "learning_rate": 1e-06, |
| "loss": 0.1764, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.20423023578363383, |
| "grad_norm": 2.2113454341888428, |
| "learning_rate": 1e-06, |
| "loss": 0.1614, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.2045769764216366, |
| "grad_norm": 2.667480945587158, |
| "learning_rate": 1e-06, |
| "loss": 0.173, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.20492371705963938, |
| "grad_norm": 2.3278677463531494, |
| "learning_rate": 1e-06, |
| "loss": 0.1876, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.20527045769764216, |
| "grad_norm": 3.0954275131225586, |
| "learning_rate": 1e-06, |
| "loss": 0.219, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.20561719833564493, |
| "grad_norm": 1.8866382837295532, |
| "learning_rate": 1e-06, |
| "loss": 0.1709, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.2059639389736477, |
| "grad_norm": 2.9739086627960205, |
| "learning_rate": 1e-06, |
| "loss": 0.1719, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.20631067961165048, |
| "grad_norm": 1.8454285860061646, |
| "learning_rate": 1e-06, |
| "loss": 0.1959, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.20665742024965325, |
| "grad_norm": 1.798344373703003, |
| "learning_rate": 1e-06, |
| "loss": 0.1655, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.20700416088765602, |
| "grad_norm": 3.577150821685791, |
| "learning_rate": 1e-06, |
| "loss": 0.2016, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.2073509015256588, |
| "grad_norm": 1.9027544260025024, |
| "learning_rate": 1e-06, |
| "loss": 0.1939, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.20769764216366157, |
| "grad_norm": 4.212296962738037, |
| "learning_rate": 1e-06, |
| "loss": 0.1663, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.20804438280166435, |
| "grad_norm": 2.9982285499572754, |
| "learning_rate": 1e-06, |
| "loss": 0.2316, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.20839112343966712, |
| "grad_norm": 2.4148337841033936, |
| "learning_rate": 1e-06, |
| "loss": 0.194, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.2087378640776699, |
| "grad_norm": 5.278850078582764, |
| "learning_rate": 1e-06, |
| "loss": 0.1835, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.20908460471567267, |
| "grad_norm": 2.83439564704895, |
| "learning_rate": 1e-06, |
| "loss": 0.1839, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.20943134535367544, |
| "grad_norm": 2.2179036140441895, |
| "learning_rate": 1e-06, |
| "loss": 0.2021, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.20977808599167821, |
| "grad_norm": 5.919695854187012, |
| "learning_rate": 1e-06, |
| "loss": 0.2135, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.210124826629681, |
| "grad_norm": 1.5985592603683472, |
| "learning_rate": 1e-06, |
| "loss": 0.1672, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.21047156726768376, |
| "grad_norm": 3.6065750122070312, |
| "learning_rate": 1e-06, |
| "loss": 0.2133, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.21081830790568654, |
| "grad_norm": 3.56968355178833, |
| "learning_rate": 1e-06, |
| "loss": 0.1867, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.2111650485436893, |
| "grad_norm": 2.662672758102417, |
| "learning_rate": 1e-06, |
| "loss": 0.1773, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.21151178918169208, |
| "grad_norm": 1.9275373220443726, |
| "learning_rate": 1e-06, |
| "loss": 0.1926, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.21185852981969486, |
| "grad_norm": 4.133257865905762, |
| "learning_rate": 1e-06, |
| "loss": 0.1931, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.21220527045769763, |
| "grad_norm": 1.791725516319275, |
| "learning_rate": 1e-06, |
| "loss": 0.1874, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.2125520110957004, |
| "grad_norm": 2.296025514602661, |
| "learning_rate": 1e-06, |
| "loss": 0.1968, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.21289875173370318, |
| "grad_norm": 1.7383009195327759, |
| "learning_rate": 1e-06, |
| "loss": 0.1597, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.21324549237170595, |
| "grad_norm": 2.899918556213379, |
| "learning_rate": 1e-06, |
| "loss": 0.1843, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.21359223300970873, |
| "grad_norm": 7.615829944610596, |
| "learning_rate": 1e-06, |
| "loss": 0.2117, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.2139389736477115, |
| "grad_norm": 1.9387335777282715, |
| "learning_rate": 1e-06, |
| "loss": 0.1798, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.21428571428571427, |
| "grad_norm": 4.627980709075928, |
| "learning_rate": 1e-06, |
| "loss": 0.1829, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.21463245492371705, |
| "grad_norm": 7.2196526527404785, |
| "learning_rate": 1e-06, |
| "loss": 0.2323, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.21497919556171982, |
| "grad_norm": 1.4321414232254028, |
| "learning_rate": 1e-06, |
| "loss": 0.157, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.2153259361997226, |
| "grad_norm": 1.6646205186843872, |
| "learning_rate": 1e-06, |
| "loss": 0.1922, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.21567267683772537, |
| "grad_norm": 1.867525577545166, |
| "learning_rate": 1e-06, |
| "loss": 0.1995, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.21601941747572814, |
| "grad_norm": 2.072211503982544, |
| "learning_rate": 1e-06, |
| "loss": 0.1848, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.21636615811373092, |
| "grad_norm": 3.1472299098968506, |
| "learning_rate": 1e-06, |
| "loss": 0.17, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.2167128987517337, |
| "grad_norm": 5.170080184936523, |
| "learning_rate": 1e-06, |
| "loss": 0.1975, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.21705963938973646, |
| "grad_norm": 1.9573615789413452, |
| "learning_rate": 1e-06, |
| "loss": 0.2022, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.21740638002773924, |
| "grad_norm": 4.052754878997803, |
| "learning_rate": 1e-06, |
| "loss": 0.1595, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.217753120665742, |
| "grad_norm": 2.469994306564331, |
| "learning_rate": 1e-06, |
| "loss": 0.2114, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.21809986130374479, |
| "grad_norm": 1.9860141277313232, |
| "learning_rate": 1e-06, |
| "loss": 0.1842, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.21844660194174756, |
| "grad_norm": 3.6322503089904785, |
| "learning_rate": 1e-06, |
| "loss": 0.183, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.21879334257975036, |
| "grad_norm": 3.5005853176116943, |
| "learning_rate": 1e-06, |
| "loss": 0.1921, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.21914008321775313, |
| "grad_norm": 1.7438961267471313, |
| "learning_rate": 1e-06, |
| "loss": 0.1883, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.2194868238557559, |
| "grad_norm": 2.330274820327759, |
| "learning_rate": 1e-06, |
| "loss": 0.1777, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.21983356449375868, |
| "grad_norm": 2.0113935470581055, |
| "learning_rate": 1e-06, |
| "loss": 0.198, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.22018030513176146, |
| "grad_norm": 2.120816946029663, |
| "learning_rate": 1e-06, |
| "loss": 0.195, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.22052704576976423, |
| "grad_norm": 3.88839054107666, |
| "learning_rate": 1e-06, |
| "loss": 0.2156, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.220873786407767, |
| "grad_norm": 2.330038070678711, |
| "learning_rate": 1e-06, |
| "loss": 0.1525, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.22122052704576978, |
| "grad_norm": 2.0380382537841797, |
| "learning_rate": 1e-06, |
| "loss": 0.1892, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.22156726768377255, |
| "grad_norm": 2.988846778869629, |
| "learning_rate": 1e-06, |
| "loss": 0.1784, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.22191400832177532, |
| "grad_norm": 3.193902015686035, |
| "learning_rate": 1e-06, |
| "loss": 0.2036, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.2222607489597781, |
| "grad_norm": 3.138087511062622, |
| "learning_rate": 1e-06, |
| "loss": 0.1995, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.22260748959778087, |
| "grad_norm": 3.0959274768829346, |
| "learning_rate": 1e-06, |
| "loss": 0.212, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.22295423023578365, |
| "grad_norm": 3.1513280868530273, |
| "learning_rate": 1e-06, |
| "loss": 0.1928, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.22330097087378642, |
| "grad_norm": 3.7054238319396973, |
| "learning_rate": 1e-06, |
| "loss": 0.1547, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.2236477115117892, |
| "grad_norm": 1.7885769605636597, |
| "learning_rate": 1e-06, |
| "loss": 0.1764, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.22399445214979197, |
| "grad_norm": 4.171994686126709, |
| "learning_rate": 1e-06, |
| "loss": 0.1813, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.22434119278779474, |
| "grad_norm": 2.313520669937134, |
| "learning_rate": 1e-06, |
| "loss": 0.2098, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.22468793342579751, |
| "grad_norm": 3.154613971710205, |
| "learning_rate": 1e-06, |
| "loss": 0.1776, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.2250346740638003, |
| "grad_norm": 5.905630111694336, |
| "learning_rate": 1e-06, |
| "loss": 0.1632, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.22538141470180306, |
| "grad_norm": 3.555774211883545, |
| "learning_rate": 1e-06, |
| "loss": 0.1747, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.22572815533980584, |
| "grad_norm": 2.0930142402648926, |
| "learning_rate": 1e-06, |
| "loss": 0.1962, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.2260748959778086, |
| "grad_norm": 3.322031021118164, |
| "learning_rate": 1e-06, |
| "loss": 0.1887, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.22642163661581138, |
| "grad_norm": 3.3001041412353516, |
| "learning_rate": 1e-06, |
| "loss": 0.1579, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.22676837725381416, |
| "grad_norm": 2.0050160884857178, |
| "learning_rate": 1e-06, |
| "loss": 0.1696, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.22711511789181693, |
| "grad_norm": 2.4040985107421875, |
| "learning_rate": 1e-06, |
| "loss": 0.1937, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.2274618585298197, |
| "grad_norm": 2.627812147140503, |
| "learning_rate": 1e-06, |
| "loss": 0.1667, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.22780859916782248, |
| "grad_norm": 4.34411096572876, |
| "learning_rate": 1e-06, |
| "loss": 0.1815, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.22815533980582525, |
| "grad_norm": 4.466109275817871, |
| "learning_rate": 1e-06, |
| "loss": 0.1798, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.22850208044382803, |
| "grad_norm": 1.537264347076416, |
| "learning_rate": 1e-06, |
| "loss": 0.1758, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.2288488210818308, |
| "grad_norm": 3.8718326091766357, |
| "learning_rate": 1e-06, |
| "loss": 0.1796, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.22919556171983357, |
| "grad_norm": 3.2782208919525146, |
| "learning_rate": 1e-06, |
| "loss": 0.158, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.22954230235783635, |
| "grad_norm": 1.7151904106140137, |
| "learning_rate": 1e-06, |
| "loss": 0.1769, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.22988904299583912, |
| "grad_norm": 1.8331800699234009, |
| "learning_rate": 1e-06, |
| "loss": 0.1671, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.2302357836338419, |
| "grad_norm": 2.3678629398345947, |
| "learning_rate": 1e-06, |
| "loss": 0.164, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.23058252427184467, |
| "grad_norm": 1.5758439302444458, |
| "learning_rate": 1e-06, |
| "loss": 0.1689, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.23092926490984744, |
| "grad_norm": 2.456174850463867, |
| "learning_rate": 1e-06, |
| "loss": 0.1681, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.23127600554785022, |
| "grad_norm": 1.6386404037475586, |
| "learning_rate": 1e-06, |
| "loss": 0.1722, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.231622746185853, |
| "grad_norm": 1.6972070932388306, |
| "learning_rate": 1e-06, |
| "loss": 0.1699, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.23196948682385576, |
| "grad_norm": 2.991753101348877, |
| "learning_rate": 1e-06, |
| "loss": 0.194, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.23231622746185854, |
| "grad_norm": 1.5393069982528687, |
| "learning_rate": 1e-06, |
| "loss": 0.1717, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.2326629680998613, |
| "grad_norm": 2.0264406204223633, |
| "learning_rate": 1e-06, |
| "loss": 0.1961, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.23300970873786409, |
| "grad_norm": 4.824713706970215, |
| "learning_rate": 1e-06, |
| "loss": 0.1516, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.23335644937586686, |
| "grad_norm": 1.823652982711792, |
| "learning_rate": 1e-06, |
| "loss": 0.1913, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.23370319001386963, |
| "grad_norm": 2.0635838508605957, |
| "learning_rate": 1e-06, |
| "loss": 0.1702, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.2340499306518724, |
| "grad_norm": 1.7453879117965698, |
| "learning_rate": 1e-06, |
| "loss": 0.171, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.23439667128987518, |
| "grad_norm": 3.4135522842407227, |
| "learning_rate": 1e-06, |
| "loss": 0.2, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.23474341192787795, |
| "grad_norm": 4.439614295959473, |
| "learning_rate": 1e-06, |
| "loss": 0.1779, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.23509015256588073, |
| "grad_norm": 1.9684357643127441, |
| "learning_rate": 1e-06, |
| "loss": 0.1828, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.2354368932038835, |
| "grad_norm": 1.9556607007980347, |
| "learning_rate": 1e-06, |
| "loss": 0.1967, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.23578363384188628, |
| "grad_norm": 3.652463436126709, |
| "learning_rate": 1e-06, |
| "loss": 0.2005, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.23613037447988905, |
| "grad_norm": 1.9398771524429321, |
| "learning_rate": 1e-06, |
| "loss": 0.158, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.23647711511789182, |
| "grad_norm": 3.950073480606079, |
| "learning_rate": 1e-06, |
| "loss": 0.192, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.2368238557558946, |
| "grad_norm": 3.032841205596924, |
| "learning_rate": 1e-06, |
| "loss": 0.2069, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.23717059639389737, |
| "grad_norm": 1.8892643451690674, |
| "learning_rate": 1e-06, |
| "loss": 0.1994, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.23751733703190014, |
| "grad_norm": 2.4548637866973877, |
| "learning_rate": 1e-06, |
| "loss": 0.1818, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.23786407766990292, |
| "grad_norm": 1.8674260377883911, |
| "learning_rate": 1e-06, |
| "loss": 0.1712, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.2382108183079057, |
| "grad_norm": 5.607412338256836, |
| "learning_rate": 1e-06, |
| "loss": 0.205, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.23855755894590847, |
| "grad_norm": 1.955690860748291, |
| "learning_rate": 1e-06, |
| "loss": 0.192, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.23890429958391124, |
| "grad_norm": 3.3477094173431396, |
| "learning_rate": 1e-06, |
| "loss": 0.1538, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.239251040221914, |
| "grad_norm": 4.284628868103027, |
| "learning_rate": 1e-06, |
| "loss": 0.1592, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.2395977808599168, |
| "grad_norm": 2.2527334690093994, |
| "learning_rate": 1e-06, |
| "loss": 0.1675, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.23994452149791956, |
| "grad_norm": 2.17332124710083, |
| "learning_rate": 1e-06, |
| "loss": 0.1988, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.24029126213592233, |
| "grad_norm": 3.59987211227417, |
| "learning_rate": 1e-06, |
| "loss": 0.2038, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.2406380027739251, |
| "grad_norm": 4.105143070220947, |
| "learning_rate": 1e-06, |
| "loss": 0.2028, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.24098474341192788, |
| "grad_norm": 1.9248536825180054, |
| "learning_rate": 1e-06, |
| "loss": 0.2031, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.24133148404993066, |
| "grad_norm": 2.1153578758239746, |
| "learning_rate": 1e-06, |
| "loss": 0.2164, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.24167822468793343, |
| "grad_norm": 3.2150540351867676, |
| "learning_rate": 1e-06, |
| "loss": 0.1985, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.2420249653259362, |
| "grad_norm": 2.063730478286743, |
| "learning_rate": 1e-06, |
| "loss": 0.1914, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.24237170596393898, |
| "grad_norm": 1.997815728187561, |
| "learning_rate": 1e-06, |
| "loss": 0.1956, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.24271844660194175, |
| "grad_norm": 2.1964597702026367, |
| "learning_rate": 1e-06, |
| "loss": 0.1665, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.24306518723994452, |
| "grad_norm": 1.6123956441879272, |
| "learning_rate": 1e-06, |
| "loss": 0.1647, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.2434119278779473, |
| "grad_norm": 4.534806251525879, |
| "learning_rate": 1e-06, |
| "loss": 0.1932, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.24375866851595007, |
| "grad_norm": 3.9443535804748535, |
| "learning_rate": 1e-06, |
| "loss": 0.1928, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.24410540915395285, |
| "grad_norm": 6.974303245544434, |
| "learning_rate": 1e-06, |
| "loss": 0.1648, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.24445214979195562, |
| "grad_norm": 2.646906614303589, |
| "learning_rate": 1e-06, |
| "loss": 0.2277, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.2447988904299584, |
| "grad_norm": 2.9131650924682617, |
| "learning_rate": 1e-06, |
| "loss": 0.1663, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.24514563106796117, |
| "grad_norm": 1.7801005840301514, |
| "learning_rate": 1e-06, |
| "loss": 0.1656, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.24549237170596394, |
| "grad_norm": 2.639249563217163, |
| "learning_rate": 1e-06, |
| "loss": 0.1751, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.24583911234396671, |
| "grad_norm": 1.4407970905303955, |
| "learning_rate": 1e-06, |
| "loss": 0.1702, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.2461858529819695, |
| "grad_norm": 1.4568276405334473, |
| "learning_rate": 1e-06, |
| "loss": 0.1659, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.24653259361997226, |
| "grad_norm": 4.625824928283691, |
| "learning_rate": 1e-06, |
| "loss": 0.2295, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.24687933425797504, |
| "grad_norm": 4.630171775817871, |
| "learning_rate": 1e-06, |
| "loss": 0.1668, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.2472260748959778, |
| "grad_norm": 1.7043591737747192, |
| "learning_rate": 1e-06, |
| "loss": 0.179, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.24757281553398058, |
| "grad_norm": 2.682840585708618, |
| "learning_rate": 1e-06, |
| "loss": 0.1708, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.24791955617198336, |
| "grad_norm": 1.8791319131851196, |
| "learning_rate": 1e-06, |
| "loss": 0.195, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.24826629680998613, |
| "grad_norm": 1.7238434553146362, |
| "learning_rate": 1e-06, |
| "loss": 0.1711, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.2486130374479889, |
| "grad_norm": 3.407539129257202, |
| "learning_rate": 1e-06, |
| "loss": 0.1666, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.24895977808599168, |
| "grad_norm": 5.695379257202148, |
| "learning_rate": 1e-06, |
| "loss": 0.1979, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.24930651872399445, |
| "grad_norm": 4.801501274108887, |
| "learning_rate": 1e-06, |
| "loss": 0.1888, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.24965325936199723, |
| "grad_norm": 4.1193389892578125, |
| "learning_rate": 1e-06, |
| "loss": 0.202, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 2.602652072906494, |
| "learning_rate": 1e-06, |
| "loss": 0.1926, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.2503467406380028, |
| "grad_norm": 2.5733113288879395, |
| "learning_rate": 1e-06, |
| "loss": 0.1964, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.25069348127600555, |
| "grad_norm": 1.5140520334243774, |
| "learning_rate": 1e-06, |
| "loss": 0.1719, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.2510402219140083, |
| "grad_norm": 1.8638113737106323, |
| "learning_rate": 1e-06, |
| "loss": 0.1745, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.2513869625520111, |
| "grad_norm": 1.9770817756652832, |
| "learning_rate": 1e-06, |
| "loss": 0.2094, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.25173370319001387, |
| "grad_norm": 3.411306619644165, |
| "learning_rate": 1e-06, |
| "loss": 0.1674, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.25208044382801664, |
| "grad_norm": 5.473649024963379, |
| "learning_rate": 1e-06, |
| "loss": 0.1843, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.2524271844660194, |
| "grad_norm": 3.618953227996826, |
| "learning_rate": 1e-06, |
| "loss": 0.1691, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.2527739251040222, |
| "grad_norm": 2.7553508281707764, |
| "learning_rate": 1e-06, |
| "loss": 0.1745, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.25312066574202496, |
| "grad_norm": 3.168771266937256, |
| "learning_rate": 1e-06, |
| "loss": 0.1952, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.25346740638002774, |
| "grad_norm": 2.5667014122009277, |
| "learning_rate": 1e-06, |
| "loss": 0.1685, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.2538141470180305, |
| "grad_norm": 2.024369716644287, |
| "learning_rate": 1e-06, |
| "loss": 0.1782, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.2541608876560333, |
| "grad_norm": 3.3991565704345703, |
| "learning_rate": 1e-06, |
| "loss": 0.1628, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.25450762829403606, |
| "grad_norm": 3.4920365810394287, |
| "learning_rate": 1e-06, |
| "loss": 0.1869, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.25485436893203883, |
| "grad_norm": 2.4127650260925293, |
| "learning_rate": 1e-06, |
| "loss": 0.2054, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.2552011095700416, |
| "grad_norm": 2.5283799171447754, |
| "learning_rate": 1e-06, |
| "loss": 0.2128, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.2555478502080444, |
| "grad_norm": 3.1239235401153564, |
| "learning_rate": 1e-06, |
| "loss": 0.189, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.25589459084604715, |
| "grad_norm": 4.53823184967041, |
| "learning_rate": 1e-06, |
| "loss": 0.2216, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.25624133148404993, |
| "grad_norm": 1.9192531108856201, |
| "learning_rate": 1e-06, |
| "loss": 0.1937, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.2565880721220527, |
| "grad_norm": 3.241487979888916, |
| "learning_rate": 1e-06, |
| "loss": 0.1744, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.2569348127600555, |
| "grad_norm": 4.408164024353027, |
| "learning_rate": 1e-06, |
| "loss": 0.1792, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.25728155339805825, |
| "grad_norm": 2.0512523651123047, |
| "learning_rate": 1e-06, |
| "loss": 0.158, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.257628294036061, |
| "grad_norm": 1.6755858659744263, |
| "learning_rate": 1e-06, |
| "loss": 0.1819, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.2579750346740638, |
| "grad_norm": 1.6671578884124756, |
| "learning_rate": 1e-06, |
| "loss": 0.1634, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.25832177531206657, |
| "grad_norm": 2.0156643390655518, |
| "learning_rate": 1e-06, |
| "loss": 0.169, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.25866851595006934, |
| "grad_norm": 3.1058249473571777, |
| "learning_rate": 1e-06, |
| "loss": 0.1633, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.2590152565880721, |
| "grad_norm": 2.4028232097625732, |
| "learning_rate": 1e-06, |
| "loss": 0.1907, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.2593619972260749, |
| "grad_norm": 5.4095354080200195, |
| "learning_rate": 1e-06, |
| "loss": 0.1564, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.25970873786407767, |
| "grad_norm": 2.967435359954834, |
| "learning_rate": 1e-06, |
| "loss": 0.1811, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.26005547850208044, |
| "grad_norm": 2.7029571533203125, |
| "learning_rate": 1e-06, |
| "loss": 0.2192, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.2604022191400832, |
| "grad_norm": 2.9570839405059814, |
| "learning_rate": 1e-06, |
| "loss": 0.1605, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.260748959778086, |
| "grad_norm": 2.031963348388672, |
| "learning_rate": 1e-06, |
| "loss": 0.2144, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.26109570041608876, |
| "grad_norm": 1.6963039636611938, |
| "learning_rate": 1e-06, |
| "loss": 0.1506, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.26144244105409153, |
| "grad_norm": 1.8354779481887817, |
| "learning_rate": 1e-06, |
| "loss": 0.1736, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.2617891816920943, |
| "grad_norm": 2.998814582824707, |
| "learning_rate": 1e-06, |
| "loss": 0.1888, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.2621359223300971, |
| "grad_norm": 1.5606882572174072, |
| "learning_rate": 1e-06, |
| "loss": 0.1434, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.26248266296809986, |
| "grad_norm": 6.14403772354126, |
| "learning_rate": 1e-06, |
| "loss": 0.1722, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.26282940360610263, |
| "grad_norm": 4.8302321434021, |
| "learning_rate": 1e-06, |
| "loss": 0.1674, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.2631761442441054, |
| "grad_norm": 6.868436813354492, |
| "learning_rate": 1e-06, |
| "loss": 0.2065, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.2635228848821082, |
| "grad_norm": 5.897651672363281, |
| "learning_rate": 1e-06, |
| "loss": 0.1934, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.26386962552011095, |
| "grad_norm": 3.1365790367126465, |
| "learning_rate": 1e-06, |
| "loss": 0.2086, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.2642163661581137, |
| "grad_norm": 4.416586875915527, |
| "learning_rate": 1e-06, |
| "loss": 0.1834, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.2645631067961165, |
| "grad_norm": 3.5971829891204834, |
| "learning_rate": 1e-06, |
| "loss": 0.1734, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.26490984743411927, |
| "grad_norm": 2.017256259918213, |
| "learning_rate": 1e-06, |
| "loss": 0.2148, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.26525658807212205, |
| "grad_norm": 1.7896546125411987, |
| "learning_rate": 1e-06, |
| "loss": 0.1934, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.2656033287101248, |
| "grad_norm": 2.142129898071289, |
| "learning_rate": 1e-06, |
| "loss": 0.1691, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.2659500693481276, |
| "grad_norm": 2.0466034412384033, |
| "learning_rate": 1e-06, |
| "loss": 0.1889, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.26629680998613037, |
| "grad_norm": 5.9090189933776855, |
| "learning_rate": 1e-06, |
| "loss": 0.181, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.26664355062413314, |
| "grad_norm": 3.580826997756958, |
| "learning_rate": 1e-06, |
| "loss": 0.1827, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.2669902912621359, |
| "grad_norm": 4.003768444061279, |
| "learning_rate": 1e-06, |
| "loss": 0.1917, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.2673370319001387, |
| "grad_norm": 1.884700059890747, |
| "learning_rate": 1e-06, |
| "loss": 0.182, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.26768377253814146, |
| "grad_norm": 2.163311243057251, |
| "learning_rate": 1e-06, |
| "loss": 0.2078, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.26803051317614424, |
| "grad_norm": 4.079468250274658, |
| "learning_rate": 1e-06, |
| "loss": 0.1485, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.268377253814147, |
| "grad_norm": 3.486384391784668, |
| "learning_rate": 1e-06, |
| "loss": 0.1655, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.2687239944521498, |
| "grad_norm": 3.0869040489196777, |
| "learning_rate": 1e-06, |
| "loss": 0.1543, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.26907073509015256, |
| "grad_norm": 1.9200345277786255, |
| "learning_rate": 1e-06, |
| "loss": 0.1797, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.26941747572815533, |
| "grad_norm": 2.677326202392578, |
| "learning_rate": 1e-06, |
| "loss": 0.1624, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.2697642163661581, |
| "grad_norm": 4.965575695037842, |
| "learning_rate": 1e-06, |
| "loss": 0.1843, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.2701109570041609, |
| "grad_norm": 3.100609064102173, |
| "learning_rate": 1e-06, |
| "loss": 0.2043, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.27045769764216365, |
| "grad_norm": 3.7525222301483154, |
| "learning_rate": 1e-06, |
| "loss": 0.1853, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.2708044382801664, |
| "grad_norm": 3.449674606323242, |
| "learning_rate": 1e-06, |
| "loss": 0.1619, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.2711511789181692, |
| "grad_norm": 2.731919050216675, |
| "learning_rate": 1e-06, |
| "loss": 0.2145, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.271497919556172, |
| "grad_norm": 5.897451877593994, |
| "learning_rate": 1e-06, |
| "loss": 0.196, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.27184466019417475, |
| "grad_norm": 3.070488452911377, |
| "learning_rate": 1e-06, |
| "loss": 0.1985, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.2721914008321775, |
| "grad_norm": 2.275068521499634, |
| "learning_rate": 1e-06, |
| "loss": 0.1865, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.2725381414701803, |
| "grad_norm": 2.4451684951782227, |
| "learning_rate": 1e-06, |
| "loss": 0.1983, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.27288488210818307, |
| "grad_norm": 2.198415517807007, |
| "learning_rate": 1e-06, |
| "loss": 0.1975, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.27323162274618584, |
| "grad_norm": 4.776117324829102, |
| "learning_rate": 1e-06, |
| "loss": 0.1747, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.2735783633841886, |
| "grad_norm": 4.163407325744629, |
| "learning_rate": 1e-06, |
| "loss": 0.1743, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.2739251040221914, |
| "grad_norm": 1.9131619930267334, |
| "learning_rate": 1e-06, |
| "loss": 0.1942, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.27427184466019416, |
| "grad_norm": 2.953847885131836, |
| "learning_rate": 1e-06, |
| "loss": 0.1653, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.27461858529819694, |
| "grad_norm": 1.98551344871521, |
| "learning_rate": 1e-06, |
| "loss": 0.1992, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.2749653259361997, |
| "grad_norm": 5.063632965087891, |
| "learning_rate": 1e-06, |
| "loss": 0.1385, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.2753120665742025, |
| "grad_norm": 3.6445019245147705, |
| "learning_rate": 1e-06, |
| "loss": 0.1653, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.27565880721220526, |
| "grad_norm": 1.9771358966827393, |
| "learning_rate": 1e-06, |
| "loss": 0.2067, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.27600554785020803, |
| "grad_norm": 1.9333562850952148, |
| "learning_rate": 1e-06, |
| "loss": 0.1988, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.2763522884882108, |
| "grad_norm": 1.9832173585891724, |
| "learning_rate": 1e-06, |
| "loss": 0.1646, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.2766990291262136, |
| "grad_norm": 2.332988977432251, |
| "learning_rate": 1e-06, |
| "loss": 0.164, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.27704576976421635, |
| "grad_norm": 2.837818145751953, |
| "learning_rate": 1e-06, |
| "loss": 0.1404, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.27739251040221913, |
| "grad_norm": 3.3492045402526855, |
| "learning_rate": 1e-06, |
| "loss": 0.1635, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.2777392510402219, |
| "grad_norm": 1.856004238128662, |
| "learning_rate": 1e-06, |
| "loss": 0.152, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.2780859916782247, |
| "grad_norm": 5.804831504821777, |
| "learning_rate": 1e-06, |
| "loss": 0.1833, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.27843273231622745, |
| "grad_norm": 1.7844274044036865, |
| "learning_rate": 1e-06, |
| "loss": 0.1768, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.2787794729542302, |
| "grad_norm": 2.5231499671936035, |
| "learning_rate": 1e-06, |
| "loss": 0.193, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.279126213592233, |
| "grad_norm": 3.219759464263916, |
| "learning_rate": 1e-06, |
| "loss": 0.1958, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.27947295423023577, |
| "grad_norm": 5.376789093017578, |
| "learning_rate": 1e-06, |
| "loss": 0.197, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.27981969486823854, |
| "grad_norm": 2.796621084213257, |
| "learning_rate": 1e-06, |
| "loss": 0.2056, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.2801664355062413, |
| "grad_norm": 1.9890904426574707, |
| "learning_rate": 1e-06, |
| "loss": 0.1505, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.2805131761442441, |
| "grad_norm": 3.3186163902282715, |
| "learning_rate": 1e-06, |
| "loss": 0.1487, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.28085991678224687, |
| "grad_norm": 3.1521759033203125, |
| "learning_rate": 1e-06, |
| "loss": 0.1981, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.28120665742024964, |
| "grad_norm": 1.7996686697006226, |
| "learning_rate": 1e-06, |
| "loss": 0.2118, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.2815533980582524, |
| "grad_norm": 6.712550640106201, |
| "learning_rate": 1e-06, |
| "loss": 0.1817, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.2819001386962552, |
| "grad_norm": 4.741528511047363, |
| "learning_rate": 1e-06, |
| "loss": 0.1332, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.28224687933425796, |
| "grad_norm": 1.8851327896118164, |
| "learning_rate": 1e-06, |
| "loss": 0.2028, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.28259361997226073, |
| "grad_norm": 2.5891757011413574, |
| "learning_rate": 1e-06, |
| "loss": 0.2061, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.2829403606102635, |
| "grad_norm": 2.130458116531372, |
| "learning_rate": 1e-06, |
| "loss": 0.1825, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.2832871012482663, |
| "grad_norm": 2.5108346939086914, |
| "learning_rate": 1e-06, |
| "loss": 0.1774, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.28363384188626906, |
| "grad_norm": 1.7813079357147217, |
| "learning_rate": 1e-06, |
| "loss": 0.1809, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.28398058252427183, |
| "grad_norm": 1.8457926511764526, |
| "learning_rate": 1e-06, |
| "loss": 0.1468, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.2843273231622746, |
| "grad_norm": 2.4858312606811523, |
| "learning_rate": 1e-06, |
| "loss": 0.1721, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.2846740638002774, |
| "grad_norm": 2.8823843002319336, |
| "learning_rate": 1e-06, |
| "loss": 0.1908, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.28502080443828015, |
| "grad_norm": 2.092439889907837, |
| "learning_rate": 1e-06, |
| "loss": 0.2033, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.2853675450762829, |
| "grad_norm": 2.2995755672454834, |
| "learning_rate": 1e-06, |
| "loss": 0.1974, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.2857142857142857, |
| "grad_norm": 1.6263433694839478, |
| "learning_rate": 1e-06, |
| "loss": 0.1792, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.28606102635228847, |
| "grad_norm": 3.589700222015381, |
| "learning_rate": 1e-06, |
| "loss": 0.1548, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.28640776699029125, |
| "grad_norm": 2.6296792030334473, |
| "learning_rate": 1e-06, |
| "loss": 0.177, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.286754507628294, |
| "grad_norm": 3.712181568145752, |
| "learning_rate": 1e-06, |
| "loss": 0.1846, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.2871012482662968, |
| "grad_norm": 8.185032844543457, |
| "learning_rate": 1e-06, |
| "loss": 0.2371, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.28744798890429957, |
| "grad_norm": 3.00469708442688, |
| "learning_rate": 1e-06, |
| "loss": 0.1415, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.28779472954230234, |
| "grad_norm": 3.0410897731781006, |
| "learning_rate": 1e-06, |
| "loss": 0.1644, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.2881414701803051, |
| "grad_norm": 2.678393840789795, |
| "learning_rate": 1e-06, |
| "loss": 0.2029, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.2884882108183079, |
| "grad_norm": 2.8907225131988525, |
| "learning_rate": 1e-06, |
| "loss": 0.1449, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.28883495145631066, |
| "grad_norm": 1.4863390922546387, |
| "learning_rate": 1e-06, |
| "loss": 0.1468, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.28918169209431344, |
| "grad_norm": 1.769559621810913, |
| "learning_rate": 1e-06, |
| "loss": 0.1932, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.2895284327323162, |
| "grad_norm": 1.6298843622207642, |
| "learning_rate": 1e-06, |
| "loss": 0.1409, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.289875173370319, |
| "grad_norm": 4.213280200958252, |
| "learning_rate": 1e-06, |
| "loss": 0.1718, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.29022191400832176, |
| "grad_norm": 5.342798233032227, |
| "learning_rate": 1e-06, |
| "loss": 0.2073, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.29056865464632453, |
| "grad_norm": 2.607419967651367, |
| "learning_rate": 1e-06, |
| "loss": 0.175, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.2909153952843273, |
| "grad_norm": 3.2880568504333496, |
| "learning_rate": 1e-06, |
| "loss": 0.2236, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.2912621359223301, |
| "grad_norm": 2.370192527770996, |
| "learning_rate": 1e-06, |
| "loss": 0.1806, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.29160887656033285, |
| "grad_norm": 1.8948553800582886, |
| "learning_rate": 1e-06, |
| "loss": 0.1832, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.2919556171983356, |
| "grad_norm": 2.092898368835449, |
| "learning_rate": 1e-06, |
| "loss": 0.1868, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.2923023578363384, |
| "grad_norm": 1.5037286281585693, |
| "learning_rate": 1e-06, |
| "loss": 0.1581, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.2926490984743412, |
| "grad_norm": 3.512408494949341, |
| "learning_rate": 1e-06, |
| "loss": 0.1883, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.29299583911234395, |
| "grad_norm": 7.901118278503418, |
| "learning_rate": 1e-06, |
| "loss": 0.1555, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.2933425797503467, |
| "grad_norm": 5.720544815063477, |
| "learning_rate": 1e-06, |
| "loss": 0.1626, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.2936893203883495, |
| "grad_norm": 2.572035312652588, |
| "learning_rate": 1e-06, |
| "loss": 0.2006, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.29403606102635227, |
| "grad_norm": 1.5427334308624268, |
| "learning_rate": 1e-06, |
| "loss": 0.1666, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.29438280166435504, |
| "grad_norm": 2.345066547393799, |
| "learning_rate": 1e-06, |
| "loss": 0.1867, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.2947295423023578, |
| "grad_norm": 2.536377429962158, |
| "learning_rate": 1e-06, |
| "loss": 0.1918, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.2950762829403606, |
| "grad_norm": 1.5710004568099976, |
| "learning_rate": 1e-06, |
| "loss": 0.1747, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.29542302357836336, |
| "grad_norm": 1.5936557054519653, |
| "learning_rate": 1e-06, |
| "loss": 0.1569, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.29576976421636614, |
| "grad_norm": 4.063443660736084, |
| "learning_rate": 1e-06, |
| "loss": 0.1922, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.2961165048543689, |
| "grad_norm": 1.6471939086914062, |
| "learning_rate": 1e-06, |
| "loss": 0.1688, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.2964632454923717, |
| "grad_norm": 3.7626683712005615, |
| "learning_rate": 1e-06, |
| "loss": 0.1441, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.29680998613037446, |
| "grad_norm": 3.8700716495513916, |
| "learning_rate": 1e-06, |
| "loss": 0.1727, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.29715672676837723, |
| "grad_norm": 3.989415407180786, |
| "learning_rate": 1e-06, |
| "loss": 0.1821, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.29750346740638, |
| "grad_norm": 5.3771538734436035, |
| "learning_rate": 1e-06, |
| "loss": 0.2133, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.2978502080443828, |
| "grad_norm": 1.7356910705566406, |
| "learning_rate": 1e-06, |
| "loss": 0.1702, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.29819694868238555, |
| "grad_norm": 1.795180320739746, |
| "learning_rate": 1e-06, |
| "loss": 0.1836, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.29854368932038833, |
| "grad_norm": 4.196115970611572, |
| "learning_rate": 1e-06, |
| "loss": 0.173, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.2988904299583911, |
| "grad_norm": 2.782108783721924, |
| "learning_rate": 1e-06, |
| "loss": 0.1905, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.2992371705963939, |
| "grad_norm": 3.027663230895996, |
| "learning_rate": 1e-06, |
| "loss": 0.1974, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.29958391123439665, |
| "grad_norm": 1.9452866315841675, |
| "learning_rate": 1e-06, |
| "loss": 0.1655, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.2999306518723994, |
| "grad_norm": 2.2103707790374756, |
| "learning_rate": 1e-06, |
| "loss": 0.2024, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.3002773925104022, |
| "grad_norm": 2.538545846939087, |
| "learning_rate": 1e-06, |
| "loss": 0.1733, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.30062413314840497, |
| "grad_norm": 2.4375507831573486, |
| "learning_rate": 1e-06, |
| "loss": 0.1841, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.30097087378640774, |
| "grad_norm": 2.466561794281006, |
| "learning_rate": 1e-06, |
| "loss": 0.1835, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.3013176144244105, |
| "grad_norm": 1.5926258563995361, |
| "learning_rate": 1e-06, |
| "loss": 0.1591, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.3016643550624133, |
| "grad_norm": 1.525660514831543, |
| "learning_rate": 1e-06, |
| "loss": 0.1355, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.30201109570041607, |
| "grad_norm": 1.8583039045333862, |
| "learning_rate": 1e-06, |
| "loss": 0.164, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.30235783633841884, |
| "grad_norm": 7.7814531326293945, |
| "learning_rate": 1e-06, |
| "loss": 0.2342, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.3027045769764216, |
| "grad_norm": 1.736899495124817, |
| "learning_rate": 1e-06, |
| "loss": 0.1928, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.3030513176144244, |
| "grad_norm": 2.617612361907959, |
| "learning_rate": 1e-06, |
| "loss": 0.2155, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.30339805825242716, |
| "grad_norm": 4.537561416625977, |
| "learning_rate": 1e-06, |
| "loss": 0.1747, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.30374479889042993, |
| "grad_norm": 3.117304563522339, |
| "learning_rate": 1e-06, |
| "loss": 0.1786, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.3040915395284327, |
| "grad_norm": 1.9936450719833374, |
| "learning_rate": 1e-06, |
| "loss": 0.1963, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.3044382801664355, |
| "grad_norm": 2.502594470977783, |
| "learning_rate": 1e-06, |
| "loss": 0.1719, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.30478502080443826, |
| "grad_norm": 2.9981226921081543, |
| "learning_rate": 1e-06, |
| "loss": 0.1975, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.30513176144244103, |
| "grad_norm": 1.8054769039154053, |
| "learning_rate": 1e-06, |
| "loss": 0.1663, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.3054785020804438, |
| "grad_norm": 1.8026496171951294, |
| "learning_rate": 1e-06, |
| "loss": 0.183, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.3058252427184466, |
| "grad_norm": 2.0367887020111084, |
| "learning_rate": 1e-06, |
| "loss": 0.1754, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.30617198335644935, |
| "grad_norm": 1.685020089149475, |
| "learning_rate": 1e-06, |
| "loss": 0.1749, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.3065187239944521, |
| "grad_norm": 2.1757829189300537, |
| "learning_rate": 1e-06, |
| "loss": 0.2091, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.3068654646324549, |
| "grad_norm": 5.054877281188965, |
| "learning_rate": 1e-06, |
| "loss": 0.1787, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.30721220527045767, |
| "grad_norm": 1.8964219093322754, |
| "learning_rate": 1e-06, |
| "loss": 0.1853, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.30755894590846045, |
| "grad_norm": 6.154984951019287, |
| "learning_rate": 1e-06, |
| "loss": 0.1642, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.3079056865464632, |
| "grad_norm": 3.810920238494873, |
| "learning_rate": 1e-06, |
| "loss": 0.1482, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.308252427184466, |
| "grad_norm": 1.7902008295059204, |
| "learning_rate": 1e-06, |
| "loss": 0.2041, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.30859916782246877, |
| "grad_norm": 2.2116265296936035, |
| "learning_rate": 1e-06, |
| "loss": 0.1945, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.30894590846047154, |
| "grad_norm": 4.08473014831543, |
| "learning_rate": 1e-06, |
| "loss": 0.1814, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.3092926490984743, |
| "grad_norm": 4.12503719329834, |
| "learning_rate": 1e-06, |
| "loss": 0.1697, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.3096393897364771, |
| "grad_norm": 2.198702096939087, |
| "learning_rate": 1e-06, |
| "loss": 0.1646, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.30998613037447986, |
| "grad_norm": 2.104871988296509, |
| "learning_rate": 1e-06, |
| "loss": 0.1837, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.31033287101248264, |
| "grad_norm": 1.7803840637207031, |
| "learning_rate": 1e-06, |
| "loss": 0.1687, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.3106796116504854, |
| "grad_norm": 2.5232043266296387, |
| "learning_rate": 1e-06, |
| "loss": 0.1852, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.3110263522884882, |
| "grad_norm": 1.5908442735671997, |
| "learning_rate": 1e-06, |
| "loss": 0.161, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.31137309292649096, |
| "grad_norm": 3.704270839691162, |
| "learning_rate": 1e-06, |
| "loss": 0.1547, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.31171983356449373, |
| "grad_norm": 1.6646925210952759, |
| "learning_rate": 1e-06, |
| "loss": 0.1621, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.3120665742024965, |
| "grad_norm": 1.6272737979888916, |
| "learning_rate": 1e-06, |
| "loss": 0.1738, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.3124133148404993, |
| "grad_norm": 2.2414157390594482, |
| "learning_rate": 1e-06, |
| "loss": 0.1857, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.3127600554785021, |
| "grad_norm": 4.613629341125488, |
| "learning_rate": 1e-06, |
| "loss": 0.2038, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.3131067961165049, |
| "grad_norm": 2.6360721588134766, |
| "learning_rate": 1e-06, |
| "loss": 0.1689, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.31345353675450766, |
| "grad_norm": 2.4351446628570557, |
| "learning_rate": 1e-06, |
| "loss": 0.1699, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.31380027739251043, |
| "grad_norm": 1.645674228668213, |
| "learning_rate": 1e-06, |
| "loss": 0.1747, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.3141470180305132, |
| "grad_norm": 2.1112935543060303, |
| "learning_rate": 1e-06, |
| "loss": 0.1725, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.314493758668516, |
| "grad_norm": 1.8450239896774292, |
| "learning_rate": 1e-06, |
| "loss": 0.1783, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.31484049930651875, |
| "grad_norm": 3.959017276763916, |
| "learning_rate": 1e-06, |
| "loss": 0.165, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.3151872399445215, |
| "grad_norm": 2.1411192417144775, |
| "learning_rate": 1e-06, |
| "loss": 0.1566, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.3155339805825243, |
| "grad_norm": 2.4546244144439697, |
| "learning_rate": 1e-06, |
| "loss": 0.193, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.31588072122052707, |
| "grad_norm": 4.3430280685424805, |
| "learning_rate": 1e-06, |
| "loss": 0.1721, |
| "step": 911 |
| }, |
| { |
| "epoch": 0.31622746185852985, |
| "grad_norm": 3.988929271697998, |
| "learning_rate": 1e-06, |
| "loss": 0.159, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.3165742024965326, |
| "grad_norm": 3.2247719764709473, |
| "learning_rate": 1e-06, |
| "loss": 0.1624, |
| "step": 913 |
| }, |
| { |
| "epoch": 0.3169209431345354, |
| "grad_norm": 1.647119402885437, |
| "learning_rate": 1e-06, |
| "loss": 0.1659, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.31726768377253817, |
| "grad_norm": 3.0910604000091553, |
| "learning_rate": 1e-06, |
| "loss": 0.18, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.31761442441054094, |
| "grad_norm": 1.7037394046783447, |
| "learning_rate": 1e-06, |
| "loss": 0.2035, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.3179611650485437, |
| "grad_norm": 3.9424848556518555, |
| "learning_rate": 1e-06, |
| "loss": 0.1755, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.3183079056865465, |
| "grad_norm": 1.8996752500534058, |
| "learning_rate": 1e-06, |
| "loss": 0.1674, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.31865464632454926, |
| "grad_norm": 1.9568631649017334, |
| "learning_rate": 1e-06, |
| "loss": 0.1741, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.31900138696255204, |
| "grad_norm": 2.990325927734375, |
| "learning_rate": 1e-06, |
| "loss": 0.1907, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.3193481276005548, |
| "grad_norm": 1.8824876546859741, |
| "learning_rate": 1e-06, |
| "loss": 0.1751, |
| "step": 921 |
| }, |
| { |
| "epoch": 0.3196948682385576, |
| "grad_norm": 1.7028950452804565, |
| "learning_rate": 1e-06, |
| "loss": 0.1362, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.32004160887656036, |
| "grad_norm": 2.010843515396118, |
| "learning_rate": 1e-06, |
| "loss": 0.1758, |
| "step": 923 |
| }, |
| { |
| "epoch": 0.32038834951456313, |
| "grad_norm": 1.7268744707107544, |
| "learning_rate": 1e-06, |
| "loss": 0.1853, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.3207350901525659, |
| "grad_norm": 2.266728162765503, |
| "learning_rate": 1e-06, |
| "loss": 0.1644, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.3210818307905687, |
| "grad_norm": 5.072295188903809, |
| "learning_rate": 1e-06, |
| "loss": 0.1793, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.32142857142857145, |
| "grad_norm": 2.210855484008789, |
| "learning_rate": 1e-06, |
| "loss": 0.1425, |
| "step": 927 |
| }, |
| { |
| "epoch": 0.3217753120665742, |
| "grad_norm": 2.0803961753845215, |
| "learning_rate": 1e-06, |
| "loss": 0.1651, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.322122052704577, |
| "grad_norm": 2.160520076751709, |
| "learning_rate": 1e-06, |
| "loss": 0.1596, |
| "step": 929 |
| }, |
| { |
| "epoch": 0.3224687933425798, |
| "grad_norm": 1.9977649450302124, |
| "learning_rate": 1e-06, |
| "loss": 0.1876, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.32281553398058255, |
| "grad_norm": 1.5417920351028442, |
| "learning_rate": 1e-06, |
| "loss": 0.1503, |
| "step": 931 |
| }, |
| { |
| "epoch": 0.3231622746185853, |
| "grad_norm": 2.099778175354004, |
| "learning_rate": 1e-06, |
| "loss": 0.1836, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.3235090152565881, |
| "grad_norm": 3.9819977283477783, |
| "learning_rate": 1e-06, |
| "loss": 0.1587, |
| "step": 933 |
| }, |
| { |
| "epoch": 0.32385575589459087, |
| "grad_norm": 1.6170867681503296, |
| "learning_rate": 1e-06, |
| "loss": 0.147, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.32420249653259364, |
| "grad_norm": 3.3660576343536377, |
| "learning_rate": 1e-06, |
| "loss": 0.1445, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.3245492371705964, |
| "grad_norm": 2.2826545238494873, |
| "learning_rate": 1e-06, |
| "loss": 0.1579, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.3248959778085992, |
| "grad_norm": 5.107556343078613, |
| "learning_rate": 1e-06, |
| "loss": 0.1964, |
| "step": 937 |
| }, |
| { |
| "epoch": 0.32524271844660196, |
| "grad_norm": 3.9381065368652344, |
| "learning_rate": 1e-06, |
| "loss": 0.2003, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.32558945908460474, |
| "grad_norm": 2.5233023166656494, |
| "learning_rate": 1e-06, |
| "loss": 0.1747, |
| "step": 939 |
| }, |
| { |
| "epoch": 0.3259361997226075, |
| "grad_norm": 1.8627601861953735, |
| "learning_rate": 1e-06, |
| "loss": 0.177, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.3262829403606103, |
| "grad_norm": 1.7760263681411743, |
| "learning_rate": 1e-06, |
| "loss": 0.1548, |
| "step": 941 |
| }, |
| { |
| "epoch": 0.32662968099861306, |
| "grad_norm": 2.0792856216430664, |
| "learning_rate": 1e-06, |
| "loss": 0.1762, |
| "step": 942 |
| }, |
| { |
| "epoch": 0.32697642163661583, |
| "grad_norm": 4.968944549560547, |
| "learning_rate": 1e-06, |
| "loss": 0.163, |
| "step": 943 |
| }, |
| { |
| "epoch": 0.3273231622746186, |
| "grad_norm": 2.9295568466186523, |
| "learning_rate": 1e-06, |
| "loss": 0.1928, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.3276699029126214, |
| "grad_norm": 3.2143867015838623, |
| "learning_rate": 1e-06, |
| "loss": 0.1401, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.32801664355062415, |
| "grad_norm": 4.367483139038086, |
| "learning_rate": 1e-06, |
| "loss": 0.2002, |
| "step": 946 |
| }, |
| { |
| "epoch": 0.32836338418862693, |
| "grad_norm": 2.355031967163086, |
| "learning_rate": 1e-06, |
| "loss": 0.1719, |
| "step": 947 |
| }, |
| { |
| "epoch": 0.3287101248266297, |
| "grad_norm": 2.9974913597106934, |
| "learning_rate": 1e-06, |
| "loss": 0.1559, |
| "step": 948 |
| }, |
| { |
| "epoch": 0.3290568654646325, |
| "grad_norm": 2.6766953468322754, |
| "learning_rate": 1e-06, |
| "loss": 0.1834, |
| "step": 949 |
| }, |
| { |
| "epoch": 0.32940360610263525, |
| "grad_norm": 3.276123523712158, |
| "learning_rate": 1e-06, |
| "loss": 0.1751, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.329750346740638, |
| "grad_norm": 1.6151905059814453, |
| "learning_rate": 1e-06, |
| "loss": 0.1809, |
| "step": 951 |
| }, |
| { |
| "epoch": 0.3300970873786408, |
| "grad_norm": 1.8168013095855713, |
| "learning_rate": 1e-06, |
| "loss": 0.1436, |
| "step": 952 |
| }, |
| { |
| "epoch": 0.33044382801664357, |
| "grad_norm": 4.220736503601074, |
| "learning_rate": 1e-06, |
| "loss": 0.1696, |
| "step": 953 |
| }, |
| { |
| "epoch": 0.33079056865464634, |
| "grad_norm": 2.646458625793457, |
| "learning_rate": 1e-06, |
| "loss": 0.14, |
| "step": 954 |
| }, |
| { |
| "epoch": 0.3311373092926491, |
| "grad_norm": 5.339474201202393, |
| "learning_rate": 1e-06, |
| "loss": 0.2126, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.3314840499306519, |
| "grad_norm": 3.148782730102539, |
| "learning_rate": 1e-06, |
| "loss": 0.1692, |
| "step": 956 |
| }, |
| { |
| "epoch": 0.33183079056865467, |
| "grad_norm": 3.1843745708465576, |
| "learning_rate": 1e-06, |
| "loss": 0.1804, |
| "step": 957 |
| }, |
| { |
| "epoch": 0.33217753120665744, |
| "grad_norm": 2.0136914253234863, |
| "learning_rate": 1e-06, |
| "loss": 0.1817, |
| "step": 958 |
| }, |
| { |
| "epoch": 0.3325242718446602, |
| "grad_norm": 2.250756025314331, |
| "learning_rate": 1e-06, |
| "loss": 0.1885, |
| "step": 959 |
| }, |
| { |
| "epoch": 0.332871012482663, |
| "grad_norm": 3.832793951034546, |
| "learning_rate": 1e-06, |
| "loss": 0.1617, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.33321775312066576, |
| "grad_norm": 3.097174644470215, |
| "learning_rate": 1e-06, |
| "loss": 0.163, |
| "step": 961 |
| }, |
| { |
| "epoch": 0.33356449375866853, |
| "grad_norm": 3.634685516357422, |
| "learning_rate": 1e-06, |
| "loss": 0.1465, |
| "step": 962 |
| }, |
| { |
| "epoch": 0.3339112343966713, |
| "grad_norm": 1.968932032585144, |
| "learning_rate": 1e-06, |
| "loss": 0.1819, |
| "step": 963 |
| }, |
| { |
| "epoch": 0.3342579750346741, |
| "grad_norm": 3.506141424179077, |
| "learning_rate": 1e-06, |
| "loss": 0.1623, |
| "step": 964 |
| }, |
| { |
| "epoch": 0.33460471567267686, |
| "grad_norm": 2.201019048690796, |
| "learning_rate": 1e-06, |
| "loss": 0.1987, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.33495145631067963, |
| "grad_norm": 1.871758222579956, |
| "learning_rate": 1e-06, |
| "loss": 0.1651, |
| "step": 966 |
| }, |
| { |
| "epoch": 0.3352981969486824, |
| "grad_norm": 2.852182626724243, |
| "learning_rate": 1e-06, |
| "loss": 0.1751, |
| "step": 967 |
| }, |
| { |
| "epoch": 0.3356449375866852, |
| "grad_norm": 2.830751657485962, |
| "learning_rate": 1e-06, |
| "loss": 0.1934, |
| "step": 968 |
| }, |
| { |
| "epoch": 0.33599167822468795, |
| "grad_norm": 2.9230191707611084, |
| "learning_rate": 1e-06, |
| "loss": 0.1689, |
| "step": 969 |
| }, |
| { |
| "epoch": 0.3363384188626907, |
| "grad_norm": 3.4389359951019287, |
| "learning_rate": 1e-06, |
| "loss": 0.1606, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.3366851595006935, |
| "grad_norm": 3.4921634197235107, |
| "learning_rate": 1e-06, |
| "loss": 0.1891, |
| "step": 971 |
| }, |
| { |
| "epoch": 0.33703190013869627, |
| "grad_norm": 2.9009199142456055, |
| "learning_rate": 1e-06, |
| "loss": 0.1508, |
| "step": 972 |
| }, |
| { |
| "epoch": 0.33737864077669905, |
| "grad_norm": 1.8419466018676758, |
| "learning_rate": 1e-06, |
| "loss": 0.173, |
| "step": 973 |
| }, |
| { |
| "epoch": 0.3377253814147018, |
| "grad_norm": 4.729164123535156, |
| "learning_rate": 1e-06, |
| "loss": 0.1863, |
| "step": 974 |
| }, |
| { |
| "epoch": 0.3380721220527046, |
| "grad_norm": 4.153334617614746, |
| "learning_rate": 1e-06, |
| "loss": 0.189, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.33841886269070737, |
| "grad_norm": 2.406278371810913, |
| "learning_rate": 1e-06, |
| "loss": 0.1726, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.33876560332871014, |
| "grad_norm": 1.854493498802185, |
| "learning_rate": 1e-06, |
| "loss": 0.1859, |
| "step": 977 |
| }, |
| { |
| "epoch": 0.3391123439667129, |
| "grad_norm": 1.7517307996749878, |
| "learning_rate": 1e-06, |
| "loss": 0.1849, |
| "step": 978 |
| }, |
| { |
| "epoch": 0.3394590846047157, |
| "grad_norm": 3.5556588172912598, |
| "learning_rate": 1e-06, |
| "loss": 0.1691, |
| "step": 979 |
| }, |
| { |
| "epoch": 0.33980582524271846, |
| "grad_norm": 3.863245725631714, |
| "learning_rate": 1e-06, |
| "loss": 0.1758, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.34015256588072124, |
| "grad_norm": 4.660958766937256, |
| "learning_rate": 1e-06, |
| "loss": 0.1492, |
| "step": 981 |
| }, |
| { |
| "epoch": 0.340499306518724, |
| "grad_norm": 2.8175435066223145, |
| "learning_rate": 1e-06, |
| "loss": 0.1559, |
| "step": 982 |
| }, |
| { |
| "epoch": 0.3408460471567268, |
| "grad_norm": 3.012770891189575, |
| "learning_rate": 1e-06, |
| "loss": 0.1876, |
| "step": 983 |
| }, |
| { |
| "epoch": 0.34119278779472956, |
| "grad_norm": 3.963909387588501, |
| "learning_rate": 1e-06, |
| "loss": 0.1512, |
| "step": 984 |
| }, |
| { |
| "epoch": 0.34153952843273233, |
| "grad_norm": 1.6655025482177734, |
| "learning_rate": 1e-06, |
| "loss": 0.1429, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.3418862690707351, |
| "grad_norm": 2.783829689025879, |
| "learning_rate": 1e-06, |
| "loss": 0.2169, |
| "step": 986 |
| }, |
| { |
| "epoch": 0.3422330097087379, |
| "grad_norm": 1.5330981016159058, |
| "learning_rate": 1e-06, |
| "loss": 0.1479, |
| "step": 987 |
| }, |
| { |
| "epoch": 0.34257975034674065, |
| "grad_norm": 1.6944447755813599, |
| "learning_rate": 1e-06, |
| "loss": 0.1762, |
| "step": 988 |
| }, |
| { |
| "epoch": 0.3429264909847434, |
| "grad_norm": 2.4283335208892822, |
| "learning_rate": 1e-06, |
| "loss": 0.1577, |
| "step": 989 |
| }, |
| { |
| "epoch": 0.3432732316227462, |
| "grad_norm": 2.7645480632781982, |
| "learning_rate": 1e-06, |
| "loss": 0.1697, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.343619972260749, |
| "grad_norm": 4.196599960327148, |
| "learning_rate": 1e-06, |
| "loss": 0.1777, |
| "step": 991 |
| }, |
| { |
| "epoch": 0.34396671289875175, |
| "grad_norm": 1.866174340248108, |
| "learning_rate": 1e-06, |
| "loss": 0.1461, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.3443134535367545, |
| "grad_norm": 2.0059845447540283, |
| "learning_rate": 1e-06, |
| "loss": 0.182, |
| "step": 993 |
| }, |
| { |
| "epoch": 0.3446601941747573, |
| "grad_norm": 1.9645408391952515, |
| "learning_rate": 1e-06, |
| "loss": 0.1908, |
| "step": 994 |
| }, |
| { |
| "epoch": 0.34500693481276007, |
| "grad_norm": 2.1238350868225098, |
| "learning_rate": 1e-06, |
| "loss": 0.2052, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.34535367545076284, |
| "grad_norm": 1.8180639743804932, |
| "learning_rate": 1e-06, |
| "loss": 0.1684, |
| "step": 996 |
| }, |
| { |
| "epoch": 0.3457004160887656, |
| "grad_norm": 2.0264620780944824, |
| "learning_rate": 1e-06, |
| "loss": 0.1827, |
| "step": 997 |
| }, |
| { |
| "epoch": 0.3460471567267684, |
| "grad_norm": 2.302675724029541, |
| "learning_rate": 1e-06, |
| "loss": 0.1999, |
| "step": 998 |
| }, |
| { |
| "epoch": 0.34639389736477116, |
| "grad_norm": 2.18147873878479, |
| "learning_rate": 1e-06, |
| "loss": 0.1694, |
| "step": 999 |
| }, |
| { |
| "epoch": 0.34674063800277394, |
| "grad_norm": 7.13607931137085, |
| "learning_rate": 1e-06, |
| "loss": 0.1692, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.3470873786407767, |
| "grad_norm": 3.1698849201202393, |
| "learning_rate": 1e-06, |
| "loss": 0.1861, |
| "step": 1001 |
| }, |
| { |
| "epoch": 0.3474341192787795, |
| "grad_norm": 2.142284393310547, |
| "learning_rate": 1e-06, |
| "loss": 0.157, |
| "step": 1002 |
| }, |
| { |
| "epoch": 0.34778085991678226, |
| "grad_norm": 2.648524284362793, |
| "learning_rate": 1e-06, |
| "loss": 0.1809, |
| "step": 1003 |
| }, |
| { |
| "epoch": 0.34812760055478503, |
| "grad_norm": 1.6139206886291504, |
| "learning_rate": 1e-06, |
| "loss": 0.1659, |
| "step": 1004 |
| }, |
| { |
| "epoch": 0.3484743411927878, |
| "grad_norm": 1.9868088960647583, |
| "learning_rate": 1e-06, |
| "loss": 0.142, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.3488210818307906, |
| "grad_norm": 1.7184799909591675, |
| "learning_rate": 1e-06, |
| "loss": 0.1547, |
| "step": 1006 |
| }, |
| { |
| "epoch": 0.34916782246879335, |
| "grad_norm": 1.670831561088562, |
| "learning_rate": 1e-06, |
| "loss": 0.1624, |
| "step": 1007 |
| }, |
| { |
| "epoch": 0.34951456310679613, |
| "grad_norm": 1.932572364807129, |
| "learning_rate": 1e-06, |
| "loss": 0.1428, |
| "step": 1008 |
| }, |
| { |
| "epoch": 0.3498613037447989, |
| "grad_norm": 3.854353189468384, |
| "learning_rate": 1e-06, |
| "loss": 0.1436, |
| "step": 1009 |
| }, |
| { |
| "epoch": 0.3502080443828017, |
| "grad_norm": 1.7538644075393677, |
| "learning_rate": 1e-06, |
| "loss": 0.1491, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.35055478502080445, |
| "grad_norm": 1.8496301174163818, |
| "learning_rate": 1e-06, |
| "loss": 0.1756, |
| "step": 1011 |
| }, |
| { |
| "epoch": 0.3509015256588072, |
| "grad_norm": 4.00302267074585, |
| "learning_rate": 1e-06, |
| "loss": 0.1809, |
| "step": 1012 |
| }, |
| { |
| "epoch": 0.35124826629681, |
| "grad_norm": 1.931908369064331, |
| "learning_rate": 1e-06, |
| "loss": 0.1675, |
| "step": 1013 |
| }, |
| { |
| "epoch": 0.35159500693481277, |
| "grad_norm": 1.933568000793457, |
| "learning_rate": 1e-06, |
| "loss": 0.1556, |
| "step": 1014 |
| }, |
| { |
| "epoch": 0.35194174757281554, |
| "grad_norm": 1.5976754426956177, |
| "learning_rate": 1e-06, |
| "loss": 0.1566, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.3522884882108183, |
| "grad_norm": 1.9067904949188232, |
| "learning_rate": 1e-06, |
| "loss": 0.157, |
| "step": 1016 |
| }, |
| { |
| "epoch": 0.3526352288488211, |
| "grad_norm": 2.5264508724212646, |
| "learning_rate": 1e-06, |
| "loss": 0.1512, |
| "step": 1017 |
| }, |
| { |
| "epoch": 0.35298196948682387, |
| "grad_norm": 3.613884925842285, |
| "learning_rate": 1e-06, |
| "loss": 0.1775, |
| "step": 1018 |
| }, |
| { |
| "epoch": 0.35332871012482664, |
| "grad_norm": 2.958832263946533, |
| "learning_rate": 1e-06, |
| "loss": 0.1838, |
| "step": 1019 |
| }, |
| { |
| "epoch": 0.3536754507628294, |
| "grad_norm": 1.8982412815093994, |
| "learning_rate": 1e-06, |
| "loss": 0.1416, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.3540221914008322, |
| "grad_norm": 2.6603124141693115, |
| "learning_rate": 1e-06, |
| "loss": 0.1839, |
| "step": 1021 |
| }, |
| { |
| "epoch": 0.35436893203883496, |
| "grad_norm": 3.2685227394104004, |
| "learning_rate": 1e-06, |
| "loss": 0.1893, |
| "step": 1022 |
| }, |
| { |
| "epoch": 0.35471567267683773, |
| "grad_norm": 2.6243014335632324, |
| "learning_rate": 1e-06, |
| "loss": 0.1805, |
| "step": 1023 |
| }, |
| { |
| "epoch": 0.3550624133148405, |
| "grad_norm": 3.69974946975708, |
| "learning_rate": 1e-06, |
| "loss": 0.199, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.3554091539528433, |
| "grad_norm": 1.906920313835144, |
| "learning_rate": 1e-06, |
| "loss": 0.1526, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.35575589459084606, |
| "grad_norm": 4.538094520568848, |
| "learning_rate": 1e-06, |
| "loss": 0.2041, |
| "step": 1026 |
| }, |
| { |
| "epoch": 0.35610263522884883, |
| "grad_norm": 1.9397320747375488, |
| "learning_rate": 1e-06, |
| "loss": 0.1619, |
| "step": 1027 |
| }, |
| { |
| "epoch": 0.3564493758668516, |
| "grad_norm": 3.0926973819732666, |
| "learning_rate": 1e-06, |
| "loss": 0.193, |
| "step": 1028 |
| }, |
| { |
| "epoch": 0.3567961165048544, |
| "grad_norm": 6.100024223327637, |
| "learning_rate": 1e-06, |
| "loss": 0.1759, |
| "step": 1029 |
| }, |
| { |
| "epoch": 0.35714285714285715, |
| "grad_norm": 1.7386844158172607, |
| "learning_rate": 1e-06, |
| "loss": 0.1573, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.3574895977808599, |
| "grad_norm": 2.905871629714966, |
| "learning_rate": 1e-06, |
| "loss": 0.1522, |
| "step": 1031 |
| }, |
| { |
| "epoch": 0.3578363384188627, |
| "grad_norm": 2.167205333709717, |
| "learning_rate": 1e-06, |
| "loss": 0.1628, |
| "step": 1032 |
| }, |
| { |
| "epoch": 0.35818307905686547, |
| "grad_norm": 2.418584108352661, |
| "learning_rate": 1e-06, |
| "loss": 0.1755, |
| "step": 1033 |
| }, |
| { |
| "epoch": 0.35852981969486825, |
| "grad_norm": 2.9872934818267822, |
| "learning_rate": 1e-06, |
| "loss": 0.174, |
| "step": 1034 |
| }, |
| { |
| "epoch": 0.358876560332871, |
| "grad_norm": 3.5263776779174805, |
| "learning_rate": 1e-06, |
| "loss": 0.1802, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.3592233009708738, |
| "grad_norm": 3.891584634780884, |
| "learning_rate": 1e-06, |
| "loss": 0.1996, |
| "step": 1036 |
| }, |
| { |
| "epoch": 0.35957004160887657, |
| "grad_norm": 2.081064224243164, |
| "learning_rate": 1e-06, |
| "loss": 0.1578, |
| "step": 1037 |
| }, |
| { |
| "epoch": 0.35991678224687934, |
| "grad_norm": 2.100369930267334, |
| "learning_rate": 1e-06, |
| "loss": 0.1572, |
| "step": 1038 |
| }, |
| { |
| "epoch": 0.3602635228848821, |
| "grad_norm": 2.148822069168091, |
| "learning_rate": 1e-06, |
| "loss": 0.1788, |
| "step": 1039 |
| }, |
| { |
| "epoch": 0.3606102635228849, |
| "grad_norm": 2.7405498027801514, |
| "learning_rate": 1e-06, |
| "loss": 0.1863, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.36095700416088766, |
| "grad_norm": 4.158656597137451, |
| "learning_rate": 1e-06, |
| "loss": 0.1699, |
| "step": 1041 |
| }, |
| { |
| "epoch": 0.36130374479889044, |
| "grad_norm": 2.858583688735962, |
| "learning_rate": 1e-06, |
| "loss": 0.1855, |
| "step": 1042 |
| }, |
| { |
| "epoch": 0.3616504854368932, |
| "grad_norm": 5.225622177124023, |
| "learning_rate": 1e-06, |
| "loss": 0.1613, |
| "step": 1043 |
| }, |
| { |
| "epoch": 0.361997226074896, |
| "grad_norm": 2.721017599105835, |
| "learning_rate": 1e-06, |
| "loss": 0.1532, |
| "step": 1044 |
| }, |
| { |
| "epoch": 0.36234396671289876, |
| "grad_norm": 3.189161539077759, |
| "learning_rate": 1e-06, |
| "loss": 0.1966, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.36269070735090153, |
| "grad_norm": 2.8146345615386963, |
| "learning_rate": 1e-06, |
| "loss": 0.1786, |
| "step": 1046 |
| }, |
| { |
| "epoch": 0.3630374479889043, |
| "grad_norm": 3.777596950531006, |
| "learning_rate": 1e-06, |
| "loss": 0.1579, |
| "step": 1047 |
| }, |
| { |
| "epoch": 0.3633841886269071, |
| "grad_norm": 2.2806339263916016, |
| "learning_rate": 1e-06, |
| "loss": 0.1447, |
| "step": 1048 |
| }, |
| { |
| "epoch": 0.36373092926490985, |
| "grad_norm": 2.7215936183929443, |
| "learning_rate": 1e-06, |
| "loss": 0.187, |
| "step": 1049 |
| }, |
| { |
| "epoch": 0.3640776699029126, |
| "grad_norm": 3.6170296669006348, |
| "learning_rate": 1e-06, |
| "loss": 0.192, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.3644244105409154, |
| "grad_norm": 2.5755667686462402, |
| "learning_rate": 1e-06, |
| "loss": 0.1592, |
| "step": 1051 |
| }, |
| { |
| "epoch": 0.3647711511789182, |
| "grad_norm": 2.1443443298339844, |
| "learning_rate": 1e-06, |
| "loss": 0.1704, |
| "step": 1052 |
| }, |
| { |
| "epoch": 0.36511789181692095, |
| "grad_norm": 2.3216934204101562, |
| "learning_rate": 1e-06, |
| "loss": 0.1787, |
| "step": 1053 |
| }, |
| { |
| "epoch": 0.3654646324549237, |
| "grad_norm": 2.0402567386627197, |
| "learning_rate": 1e-06, |
| "loss": 0.1474, |
| "step": 1054 |
| }, |
| { |
| "epoch": 0.3658113730929265, |
| "grad_norm": 2.2533066272735596, |
| "learning_rate": 1e-06, |
| "loss": 0.2121, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.36615811373092927, |
| "grad_norm": 2.638406753540039, |
| "learning_rate": 1e-06, |
| "loss": 0.165, |
| "step": 1056 |
| }, |
| { |
| "epoch": 0.36650485436893204, |
| "grad_norm": 1.7857331037521362, |
| "learning_rate": 1e-06, |
| "loss": 0.1653, |
| "step": 1057 |
| }, |
| { |
| "epoch": 0.3668515950069348, |
| "grad_norm": 4.700150966644287, |
| "learning_rate": 1e-06, |
| "loss": 0.1723, |
| "step": 1058 |
| }, |
| { |
| "epoch": 0.3671983356449376, |
| "grad_norm": 1.8558968305587769, |
| "learning_rate": 1e-06, |
| "loss": 0.1674, |
| "step": 1059 |
| }, |
| { |
| "epoch": 0.36754507628294036, |
| "grad_norm": 2.047917604446411, |
| "learning_rate": 1e-06, |
| "loss": 0.1757, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.36789181692094314, |
| "grad_norm": 2.157421588897705, |
| "learning_rate": 1e-06, |
| "loss": 0.1655, |
| "step": 1061 |
| }, |
| { |
| "epoch": 0.3682385575589459, |
| "grad_norm": 4.857705593109131, |
| "learning_rate": 1e-06, |
| "loss": 0.2087, |
| "step": 1062 |
| }, |
| { |
| "epoch": 0.3685852981969487, |
| "grad_norm": 2.143380641937256, |
| "learning_rate": 1e-06, |
| "loss": 0.1595, |
| "step": 1063 |
| }, |
| { |
| "epoch": 0.36893203883495146, |
| "grad_norm": 3.1953229904174805, |
| "learning_rate": 1e-06, |
| "loss": 0.1592, |
| "step": 1064 |
| }, |
| { |
| "epoch": 0.36927877947295423, |
| "grad_norm": 1.5903429985046387, |
| "learning_rate": 1e-06, |
| "loss": 0.1705, |
| "step": 1065 |
| }, |
| { |
| "epoch": 0.369625520110957, |
| "grad_norm": 2.1189424991607666, |
| "learning_rate": 1e-06, |
| "loss": 0.1611, |
| "step": 1066 |
| }, |
| { |
| "epoch": 0.3699722607489598, |
| "grad_norm": 2.089324712753296, |
| "learning_rate": 1e-06, |
| "loss": 0.1625, |
| "step": 1067 |
| }, |
| { |
| "epoch": 0.37031900138696255, |
| "grad_norm": 2.1971182823181152, |
| "learning_rate": 1e-06, |
| "loss": 0.2072, |
| "step": 1068 |
| }, |
| { |
| "epoch": 0.37066574202496533, |
| "grad_norm": 3.347195625305176, |
| "learning_rate": 1e-06, |
| "loss": 0.1455, |
| "step": 1069 |
| }, |
| { |
| "epoch": 0.3710124826629681, |
| "grad_norm": 3.526571035385132, |
| "learning_rate": 1e-06, |
| "loss": 0.154, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.3713592233009709, |
| "grad_norm": 3.1888411045074463, |
| "learning_rate": 1e-06, |
| "loss": 0.1559, |
| "step": 1071 |
| }, |
| { |
| "epoch": 0.37170596393897365, |
| "grad_norm": 2.727715253829956, |
| "learning_rate": 1e-06, |
| "loss": 0.1538, |
| "step": 1072 |
| }, |
| { |
| "epoch": 0.3720527045769764, |
| "grad_norm": 2.437049150466919, |
| "learning_rate": 1e-06, |
| "loss": 0.1637, |
| "step": 1073 |
| }, |
| { |
| "epoch": 0.3723994452149792, |
| "grad_norm": 4.141301155090332, |
| "learning_rate": 1e-06, |
| "loss": 0.192, |
| "step": 1074 |
| }, |
| { |
| "epoch": 0.37274618585298197, |
| "grad_norm": 3.7499258518218994, |
| "learning_rate": 1e-06, |
| "loss": 0.1791, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.37309292649098474, |
| "grad_norm": 1.5035871267318726, |
| "learning_rate": 1e-06, |
| "loss": 0.1553, |
| "step": 1076 |
| }, |
| { |
| "epoch": 0.3734396671289875, |
| "grad_norm": 1.849098563194275, |
| "learning_rate": 1e-06, |
| "loss": 0.1851, |
| "step": 1077 |
| }, |
| { |
| "epoch": 0.3737864077669903, |
| "grad_norm": 3.0276803970336914, |
| "learning_rate": 1e-06, |
| "loss": 0.1682, |
| "step": 1078 |
| }, |
| { |
| "epoch": 0.37413314840499307, |
| "grad_norm": 3.6269848346710205, |
| "learning_rate": 1e-06, |
| "loss": 0.1584, |
| "step": 1079 |
| }, |
| { |
| "epoch": 0.37447988904299584, |
| "grad_norm": 1.632621169090271, |
| "learning_rate": 1e-06, |
| "loss": 0.1598, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.3748266296809986, |
| "grad_norm": 4.8789472579956055, |
| "learning_rate": 1e-06, |
| "loss": 0.1871, |
| "step": 1081 |
| }, |
| { |
| "epoch": 0.3751733703190014, |
| "grad_norm": 2.575338363647461, |
| "learning_rate": 1e-06, |
| "loss": 0.1781, |
| "step": 1082 |
| }, |
| { |
| "epoch": 0.37552011095700416, |
| "grad_norm": 5.345282077789307, |
| "learning_rate": 1e-06, |
| "loss": 0.1923, |
| "step": 1083 |
| }, |
| { |
| "epoch": 0.37586685159500693, |
| "grad_norm": 1.7190324068069458, |
| "learning_rate": 1e-06, |
| "loss": 0.155, |
| "step": 1084 |
| }, |
| { |
| "epoch": 0.3762135922330097, |
| "grad_norm": 2.7497055530548096, |
| "learning_rate": 1e-06, |
| "loss": 0.1667, |
| "step": 1085 |
| }, |
| { |
| "epoch": 0.3765603328710125, |
| "grad_norm": 2.812009334564209, |
| "learning_rate": 1e-06, |
| "loss": 0.2117, |
| "step": 1086 |
| }, |
| { |
| "epoch": 0.37690707350901526, |
| "grad_norm": 1.6642462015151978, |
| "learning_rate": 1e-06, |
| "loss": 0.1749, |
| "step": 1087 |
| }, |
| { |
| "epoch": 0.37725381414701803, |
| "grad_norm": 1.9300440549850464, |
| "learning_rate": 1e-06, |
| "loss": 0.1683, |
| "step": 1088 |
| }, |
| { |
| "epoch": 0.3776005547850208, |
| "grad_norm": 3.1472578048706055, |
| "learning_rate": 1e-06, |
| "loss": 0.2015, |
| "step": 1089 |
| }, |
| { |
| "epoch": 0.3779472954230236, |
| "grad_norm": 2.2062807083129883, |
| "learning_rate": 1e-06, |
| "loss": 0.1713, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.37829403606102635, |
| "grad_norm": 2.400383472442627, |
| "learning_rate": 1e-06, |
| "loss": 0.2022, |
| "step": 1091 |
| }, |
| { |
| "epoch": 0.3786407766990291, |
| "grad_norm": 1.869706153869629, |
| "learning_rate": 1e-06, |
| "loss": 0.1863, |
| "step": 1092 |
| }, |
| { |
| "epoch": 0.3789875173370319, |
| "grad_norm": 4.637445449829102, |
| "learning_rate": 1e-06, |
| "loss": 0.1642, |
| "step": 1093 |
| }, |
| { |
| "epoch": 0.37933425797503467, |
| "grad_norm": 3.4108481407165527, |
| "learning_rate": 1e-06, |
| "loss": 0.1535, |
| "step": 1094 |
| }, |
| { |
| "epoch": 0.37968099861303745, |
| "grad_norm": 2.6168668270111084, |
| "learning_rate": 1e-06, |
| "loss": 0.1479, |
| "step": 1095 |
| }, |
| { |
| "epoch": 0.3800277392510402, |
| "grad_norm": 2.7562978267669678, |
| "learning_rate": 1e-06, |
| "loss": 0.2129, |
| "step": 1096 |
| }, |
| { |
| "epoch": 0.380374479889043, |
| "grad_norm": 2.557950019836426, |
| "learning_rate": 1e-06, |
| "loss": 0.1644, |
| "step": 1097 |
| }, |
| { |
| "epoch": 0.38072122052704577, |
| "grad_norm": 4.20210075378418, |
| "learning_rate": 1e-06, |
| "loss": 0.177, |
| "step": 1098 |
| }, |
| { |
| "epoch": 0.38106796116504854, |
| "grad_norm": 2.3209750652313232, |
| "learning_rate": 1e-06, |
| "loss": 0.1768, |
| "step": 1099 |
| }, |
| { |
| "epoch": 0.3814147018030513, |
| "grad_norm": 2.318815231323242, |
| "learning_rate": 1e-06, |
| "loss": 0.2034, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.3817614424410541, |
| "grad_norm": 4.568731307983398, |
| "learning_rate": 1e-06, |
| "loss": 0.171, |
| "step": 1101 |
| }, |
| { |
| "epoch": 0.38210818307905686, |
| "grad_norm": 1.882871150970459, |
| "learning_rate": 1e-06, |
| "loss": 0.1263, |
| "step": 1102 |
| }, |
| { |
| "epoch": 0.38245492371705964, |
| "grad_norm": 2.3090946674346924, |
| "learning_rate": 1e-06, |
| "loss": 0.2043, |
| "step": 1103 |
| }, |
| { |
| "epoch": 0.3828016643550624, |
| "grad_norm": 2.3740408420562744, |
| "learning_rate": 1e-06, |
| "loss": 0.1483, |
| "step": 1104 |
| }, |
| { |
| "epoch": 0.3831484049930652, |
| "grad_norm": 3.6114842891693115, |
| "learning_rate": 1e-06, |
| "loss": 0.1848, |
| "step": 1105 |
| }, |
| { |
| "epoch": 0.38349514563106796, |
| "grad_norm": 1.9591243267059326, |
| "learning_rate": 1e-06, |
| "loss": 0.1751, |
| "step": 1106 |
| }, |
| { |
| "epoch": 0.38384188626907073, |
| "grad_norm": 1.7068411111831665, |
| "learning_rate": 1e-06, |
| "loss": 0.1449, |
| "step": 1107 |
| }, |
| { |
| "epoch": 0.3841886269070735, |
| "grad_norm": 1.9355899095535278, |
| "learning_rate": 1e-06, |
| "loss": 0.1902, |
| "step": 1108 |
| }, |
| { |
| "epoch": 0.3845353675450763, |
| "grad_norm": 6.148166179656982, |
| "learning_rate": 1e-06, |
| "loss": 0.1864, |
| "step": 1109 |
| }, |
| { |
| "epoch": 0.38488210818307905, |
| "grad_norm": 3.4833147525787354, |
| "learning_rate": 1e-06, |
| "loss": 0.1521, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.3852288488210818, |
| "grad_norm": 5.457380294799805, |
| "learning_rate": 1e-06, |
| "loss": 0.17, |
| "step": 1111 |
| }, |
| { |
| "epoch": 0.3855755894590846, |
| "grad_norm": 2.54488468170166, |
| "learning_rate": 1e-06, |
| "loss": 0.187, |
| "step": 1112 |
| }, |
| { |
| "epoch": 0.3859223300970874, |
| "grad_norm": 1.795543909072876, |
| "learning_rate": 1e-06, |
| "loss": 0.2005, |
| "step": 1113 |
| }, |
| { |
| "epoch": 0.38626907073509015, |
| "grad_norm": 2.521865129470825, |
| "learning_rate": 1e-06, |
| "loss": 0.2035, |
| "step": 1114 |
| }, |
| { |
| "epoch": 0.3866158113730929, |
| "grad_norm": 2.5379583835601807, |
| "learning_rate": 1e-06, |
| "loss": 0.184, |
| "step": 1115 |
| }, |
| { |
| "epoch": 0.3869625520110957, |
| "grad_norm": 1.7145198583602905, |
| "learning_rate": 1e-06, |
| "loss": 0.1709, |
| "step": 1116 |
| }, |
| { |
| "epoch": 0.38730929264909847, |
| "grad_norm": 3.1036429405212402, |
| "learning_rate": 1e-06, |
| "loss": 0.2025, |
| "step": 1117 |
| }, |
| { |
| "epoch": 0.38765603328710124, |
| "grad_norm": 2.6453635692596436, |
| "learning_rate": 1e-06, |
| "loss": 0.1301, |
| "step": 1118 |
| }, |
| { |
| "epoch": 0.388002773925104, |
| "grad_norm": 2.3852477073669434, |
| "learning_rate": 1e-06, |
| "loss": 0.1355, |
| "step": 1119 |
| }, |
| { |
| "epoch": 0.3883495145631068, |
| "grad_norm": 1.9018865823745728, |
| "learning_rate": 1e-06, |
| "loss": 0.1618, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.38869625520110956, |
| "grad_norm": 2.8111538887023926, |
| "learning_rate": 1e-06, |
| "loss": 0.1624, |
| "step": 1121 |
| }, |
| { |
| "epoch": 0.38904299583911234, |
| "grad_norm": 2.226997137069702, |
| "learning_rate": 1e-06, |
| "loss": 0.1671, |
| "step": 1122 |
| }, |
| { |
| "epoch": 0.3893897364771151, |
| "grad_norm": 2.0597381591796875, |
| "learning_rate": 1e-06, |
| "loss": 0.1604, |
| "step": 1123 |
| }, |
| { |
| "epoch": 0.3897364771151179, |
| "grad_norm": 5.358310699462891, |
| "learning_rate": 1e-06, |
| "loss": 0.2211, |
| "step": 1124 |
| }, |
| { |
| "epoch": 0.39008321775312066, |
| "grad_norm": 2.202601909637451, |
| "learning_rate": 1e-06, |
| "loss": 0.1544, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.39042995839112343, |
| "grad_norm": 1.6089842319488525, |
| "learning_rate": 1e-06, |
| "loss": 0.1582, |
| "step": 1126 |
| }, |
| { |
| "epoch": 0.3907766990291262, |
| "grad_norm": 2.50658917427063, |
| "learning_rate": 1e-06, |
| "loss": 0.1792, |
| "step": 1127 |
| }, |
| { |
| "epoch": 0.391123439667129, |
| "grad_norm": 1.6953543424606323, |
| "learning_rate": 1e-06, |
| "loss": 0.1624, |
| "step": 1128 |
| }, |
| { |
| "epoch": 0.39147018030513175, |
| "grad_norm": 1.8754124641418457, |
| "learning_rate": 1e-06, |
| "loss": 0.1586, |
| "step": 1129 |
| }, |
| { |
| "epoch": 0.39181692094313453, |
| "grad_norm": 4.671680450439453, |
| "learning_rate": 1e-06, |
| "loss": 0.1969, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.3921636615811373, |
| "grad_norm": 2.1152446269989014, |
| "learning_rate": 1e-06, |
| "loss": 0.1634, |
| "step": 1131 |
| }, |
| { |
| "epoch": 0.3925104022191401, |
| "grad_norm": 4.22470235824585, |
| "learning_rate": 1e-06, |
| "loss": 0.1453, |
| "step": 1132 |
| }, |
| { |
| "epoch": 0.39285714285714285, |
| "grad_norm": 1.6116793155670166, |
| "learning_rate": 1e-06, |
| "loss": 0.1672, |
| "step": 1133 |
| }, |
| { |
| "epoch": 0.3932038834951456, |
| "grad_norm": 1.9633543491363525, |
| "learning_rate": 1e-06, |
| "loss": 0.1529, |
| "step": 1134 |
| }, |
| { |
| "epoch": 0.3935506241331484, |
| "grad_norm": 2.0340752601623535, |
| "learning_rate": 1e-06, |
| "loss": 0.1708, |
| "step": 1135 |
| }, |
| { |
| "epoch": 0.39389736477115117, |
| "grad_norm": 2.2635862827301025, |
| "learning_rate": 1e-06, |
| "loss": 0.1787, |
| "step": 1136 |
| }, |
| { |
| "epoch": 0.39424410540915394, |
| "grad_norm": 2.268153667449951, |
| "learning_rate": 1e-06, |
| "loss": 0.1992, |
| "step": 1137 |
| }, |
| { |
| "epoch": 0.3945908460471567, |
| "grad_norm": 1.7230418920516968, |
| "learning_rate": 1e-06, |
| "loss": 0.1372, |
| "step": 1138 |
| }, |
| { |
| "epoch": 0.3949375866851595, |
| "grad_norm": 1.6801517009735107, |
| "learning_rate": 1e-06, |
| "loss": 0.1428, |
| "step": 1139 |
| }, |
| { |
| "epoch": 0.39528432732316227, |
| "grad_norm": 2.6721580028533936, |
| "learning_rate": 1e-06, |
| "loss": 0.1597, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.39563106796116504, |
| "grad_norm": 2.748892307281494, |
| "learning_rate": 1e-06, |
| "loss": 0.1554, |
| "step": 1141 |
| }, |
| { |
| "epoch": 0.3959778085991678, |
| "grad_norm": 2.1693079471588135, |
| "learning_rate": 1e-06, |
| "loss": 0.1856, |
| "step": 1142 |
| }, |
| { |
| "epoch": 0.3963245492371706, |
| "grad_norm": 3.472423553466797, |
| "learning_rate": 1e-06, |
| "loss": 0.1748, |
| "step": 1143 |
| }, |
| { |
| "epoch": 0.39667128987517336, |
| "grad_norm": 3.2652645111083984, |
| "learning_rate": 1e-06, |
| "loss": 0.1756, |
| "step": 1144 |
| }, |
| { |
| "epoch": 0.39701803051317613, |
| "grad_norm": 1.8527421951293945, |
| "learning_rate": 1e-06, |
| "loss": 0.1743, |
| "step": 1145 |
| }, |
| { |
| "epoch": 0.3973647711511789, |
| "grad_norm": 1.7605152130126953, |
| "learning_rate": 1e-06, |
| "loss": 0.1524, |
| "step": 1146 |
| }, |
| { |
| "epoch": 0.3977115117891817, |
| "grad_norm": 1.6430308818817139, |
| "learning_rate": 1e-06, |
| "loss": 0.1571, |
| "step": 1147 |
| }, |
| { |
| "epoch": 0.39805825242718446, |
| "grad_norm": 1.800592064857483, |
| "learning_rate": 1e-06, |
| "loss": 0.1962, |
| "step": 1148 |
| }, |
| { |
| "epoch": 0.39840499306518723, |
| "grad_norm": 1.925662875175476, |
| "learning_rate": 1e-06, |
| "loss": 0.1923, |
| "step": 1149 |
| }, |
| { |
| "epoch": 0.39875173370319, |
| "grad_norm": 2.714669942855835, |
| "learning_rate": 1e-06, |
| "loss": 0.1562, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.3990984743411928, |
| "grad_norm": 1.951225996017456, |
| "learning_rate": 1e-06, |
| "loss": 0.16, |
| "step": 1151 |
| }, |
| { |
| "epoch": 0.39944521497919555, |
| "grad_norm": 3.028454542160034, |
| "learning_rate": 1e-06, |
| "loss": 0.1745, |
| "step": 1152 |
| }, |
| { |
| "epoch": 0.3997919556171983, |
| "grad_norm": 5.070191860198975, |
| "learning_rate": 1e-06, |
| "loss": 0.1875, |
| "step": 1153 |
| }, |
| { |
| "epoch": 0.4001386962552011, |
| "grad_norm": 1.9278351068496704, |
| "learning_rate": 1e-06, |
| "loss": 0.1608, |
| "step": 1154 |
| }, |
| { |
| "epoch": 0.40048543689320387, |
| "grad_norm": 2.457918643951416, |
| "learning_rate": 1e-06, |
| "loss": 0.2081, |
| "step": 1155 |
| }, |
| { |
| "epoch": 0.40083217753120665, |
| "grad_norm": 2.267364501953125, |
| "learning_rate": 1e-06, |
| "loss": 0.147, |
| "step": 1156 |
| }, |
| { |
| "epoch": 0.4011789181692094, |
| "grad_norm": 2.4611542224884033, |
| "learning_rate": 1e-06, |
| "loss": 0.1763, |
| "step": 1157 |
| }, |
| { |
| "epoch": 0.4015256588072122, |
| "grad_norm": 1.9886502027511597, |
| "learning_rate": 1e-06, |
| "loss": 0.183, |
| "step": 1158 |
| }, |
| { |
| "epoch": 0.40187239944521497, |
| "grad_norm": 2.998033285140991, |
| "learning_rate": 1e-06, |
| "loss": 0.1725, |
| "step": 1159 |
| }, |
| { |
| "epoch": 0.40221914008321774, |
| "grad_norm": 5.4129319190979, |
| "learning_rate": 1e-06, |
| "loss": 0.2027, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.4025658807212205, |
| "grad_norm": 3.6759653091430664, |
| "learning_rate": 1e-06, |
| "loss": 0.1682, |
| "step": 1161 |
| }, |
| { |
| "epoch": 0.4029126213592233, |
| "grad_norm": 2.2087016105651855, |
| "learning_rate": 1e-06, |
| "loss": 0.1633, |
| "step": 1162 |
| }, |
| { |
| "epoch": 0.40325936199722606, |
| "grad_norm": 3.113842725753784, |
| "learning_rate": 1e-06, |
| "loss": 0.1823, |
| "step": 1163 |
| }, |
| { |
| "epoch": 0.40360610263522884, |
| "grad_norm": 1.9368499517440796, |
| "learning_rate": 1e-06, |
| "loss": 0.1677, |
| "step": 1164 |
| }, |
| { |
| "epoch": 0.4039528432732316, |
| "grad_norm": 3.0826315879821777, |
| "learning_rate": 1e-06, |
| "loss": 0.1492, |
| "step": 1165 |
| }, |
| { |
| "epoch": 0.4042995839112344, |
| "grad_norm": 4.97300386428833, |
| "learning_rate": 1e-06, |
| "loss": 0.1753, |
| "step": 1166 |
| }, |
| { |
| "epoch": 0.40464632454923716, |
| "grad_norm": 4.992250442504883, |
| "learning_rate": 1e-06, |
| "loss": 0.1532, |
| "step": 1167 |
| }, |
| { |
| "epoch": 0.40499306518723993, |
| "grad_norm": 7.608388423919678, |
| "learning_rate": 1e-06, |
| "loss": 0.1693, |
| "step": 1168 |
| }, |
| { |
| "epoch": 0.4053398058252427, |
| "grad_norm": 2.3100359439849854, |
| "learning_rate": 1e-06, |
| "loss": 0.1581, |
| "step": 1169 |
| }, |
| { |
| "epoch": 0.4056865464632455, |
| "grad_norm": 2.648092746734619, |
| "learning_rate": 1e-06, |
| "loss": 0.1489, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.40603328710124825, |
| "grad_norm": 1.7646437883377075, |
| "learning_rate": 1e-06, |
| "loss": 0.1578, |
| "step": 1171 |
| }, |
| { |
| "epoch": 0.406380027739251, |
| "grad_norm": 2.959559679031372, |
| "learning_rate": 1e-06, |
| "loss": 0.1574, |
| "step": 1172 |
| }, |
| { |
| "epoch": 0.4067267683772538, |
| "grad_norm": 2.033961057662964, |
| "learning_rate": 1e-06, |
| "loss": 0.1493, |
| "step": 1173 |
| }, |
| { |
| "epoch": 0.4070735090152566, |
| "grad_norm": 1.6733283996582031, |
| "learning_rate": 1e-06, |
| "loss": 0.1473, |
| "step": 1174 |
| }, |
| { |
| "epoch": 0.40742024965325935, |
| "grad_norm": 2.8953778743743896, |
| "learning_rate": 1e-06, |
| "loss": 0.1538, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.4077669902912621, |
| "grad_norm": 2.2271323204040527, |
| "learning_rate": 1e-06, |
| "loss": 0.1749, |
| "step": 1176 |
| }, |
| { |
| "epoch": 0.4081137309292649, |
| "grad_norm": 1.6847202777862549, |
| "learning_rate": 1e-06, |
| "loss": 0.1325, |
| "step": 1177 |
| }, |
| { |
| "epoch": 0.40846047156726767, |
| "grad_norm": 5.325228214263916, |
| "learning_rate": 1e-06, |
| "loss": 0.2158, |
| "step": 1178 |
| }, |
| { |
| "epoch": 0.40880721220527044, |
| "grad_norm": 2.9201323986053467, |
| "learning_rate": 1e-06, |
| "loss": 0.2057, |
| "step": 1179 |
| }, |
| { |
| "epoch": 0.4091539528432732, |
| "grad_norm": 3.783559560775757, |
| "learning_rate": 1e-06, |
| "loss": 0.1741, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.409500693481276, |
| "grad_norm": 2.83846378326416, |
| "learning_rate": 1e-06, |
| "loss": 0.1893, |
| "step": 1181 |
| }, |
| { |
| "epoch": 0.40984743411927876, |
| "grad_norm": 3.131998300552368, |
| "learning_rate": 1e-06, |
| "loss": 0.1753, |
| "step": 1182 |
| }, |
| { |
| "epoch": 0.41019417475728154, |
| "grad_norm": 3.4768800735473633, |
| "learning_rate": 1e-06, |
| "loss": 0.1638, |
| "step": 1183 |
| }, |
| { |
| "epoch": 0.4105409153952843, |
| "grad_norm": 3.8055405616760254, |
| "learning_rate": 1e-06, |
| "loss": 0.1453, |
| "step": 1184 |
| }, |
| { |
| "epoch": 0.4108876560332871, |
| "grad_norm": 3.457226514816284, |
| "learning_rate": 1e-06, |
| "loss": 0.1789, |
| "step": 1185 |
| }, |
| { |
| "epoch": 0.41123439667128986, |
| "grad_norm": 2.19962477684021, |
| "learning_rate": 1e-06, |
| "loss": 0.1832, |
| "step": 1186 |
| }, |
| { |
| "epoch": 0.41158113730929263, |
| "grad_norm": 4.11622428894043, |
| "learning_rate": 1e-06, |
| "loss": 0.1351, |
| "step": 1187 |
| }, |
| { |
| "epoch": 0.4119278779472954, |
| "grad_norm": 2.369816541671753, |
| "learning_rate": 1e-06, |
| "loss": 0.1943, |
| "step": 1188 |
| }, |
| { |
| "epoch": 0.4122746185852982, |
| "grad_norm": 6.653234958648682, |
| "learning_rate": 1e-06, |
| "loss": 0.1635, |
| "step": 1189 |
| }, |
| { |
| "epoch": 0.41262135922330095, |
| "grad_norm": 7.1791887283325195, |
| "learning_rate": 1e-06, |
| "loss": 0.1677, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.41296809986130373, |
| "grad_norm": 3.2327146530151367, |
| "learning_rate": 1e-06, |
| "loss": 0.1546, |
| "step": 1191 |
| }, |
| { |
| "epoch": 0.4133148404993065, |
| "grad_norm": 3.696610927581787, |
| "learning_rate": 1e-06, |
| "loss": 0.1586, |
| "step": 1192 |
| }, |
| { |
| "epoch": 0.4136615811373093, |
| "grad_norm": 2.113956928253174, |
| "learning_rate": 1e-06, |
| "loss": 0.1764, |
| "step": 1193 |
| }, |
| { |
| "epoch": 0.41400832177531205, |
| "grad_norm": 3.988048553466797, |
| "learning_rate": 1e-06, |
| "loss": 0.1883, |
| "step": 1194 |
| }, |
| { |
| "epoch": 0.4143550624133148, |
| "grad_norm": 2.866166353225708, |
| "learning_rate": 1e-06, |
| "loss": 0.1633, |
| "step": 1195 |
| }, |
| { |
| "epoch": 0.4147018030513176, |
| "grad_norm": 2.072659492492676, |
| "learning_rate": 1e-06, |
| "loss": 0.1714, |
| "step": 1196 |
| }, |
| { |
| "epoch": 0.41504854368932037, |
| "grad_norm": 1.681881308555603, |
| "learning_rate": 1e-06, |
| "loss": 0.1443, |
| "step": 1197 |
| }, |
| { |
| "epoch": 0.41539528432732314, |
| "grad_norm": 1.793905258178711, |
| "learning_rate": 1e-06, |
| "loss": 0.1258, |
| "step": 1198 |
| }, |
| { |
| "epoch": 0.4157420249653259, |
| "grad_norm": 5.42755126953125, |
| "learning_rate": 1e-06, |
| "loss": 0.2034, |
| "step": 1199 |
| }, |
| { |
| "epoch": 0.4160887656033287, |
| "grad_norm": 2.7761476039886475, |
| "learning_rate": 1e-06, |
| "loss": 0.1226, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.41643550624133147, |
| "grad_norm": 3.929356575012207, |
| "learning_rate": 1e-06, |
| "loss": 0.1742, |
| "step": 1201 |
| }, |
| { |
| "epoch": 0.41678224687933424, |
| "grad_norm": 1.8514819145202637, |
| "learning_rate": 1e-06, |
| "loss": 0.1527, |
| "step": 1202 |
| }, |
| { |
| "epoch": 0.417128987517337, |
| "grad_norm": 1.546459436416626, |
| "learning_rate": 1e-06, |
| "loss": 0.1307, |
| "step": 1203 |
| }, |
| { |
| "epoch": 0.4174757281553398, |
| "grad_norm": 2.12691330909729, |
| "learning_rate": 1e-06, |
| "loss": 0.2007, |
| "step": 1204 |
| }, |
| { |
| "epoch": 0.41782246879334256, |
| "grad_norm": 5.694792747497559, |
| "learning_rate": 1e-06, |
| "loss": 0.1964, |
| "step": 1205 |
| }, |
| { |
| "epoch": 0.41816920943134533, |
| "grad_norm": 5.02055549621582, |
| "learning_rate": 1e-06, |
| "loss": 0.1751, |
| "step": 1206 |
| }, |
| { |
| "epoch": 0.4185159500693481, |
| "grad_norm": 2.231020927429199, |
| "learning_rate": 1e-06, |
| "loss": 0.1956, |
| "step": 1207 |
| }, |
| { |
| "epoch": 0.4188626907073509, |
| "grad_norm": 1.8750131130218506, |
| "learning_rate": 1e-06, |
| "loss": 0.1278, |
| "step": 1208 |
| }, |
| { |
| "epoch": 0.41920943134535366, |
| "grad_norm": 2.0554685592651367, |
| "learning_rate": 1e-06, |
| "loss": 0.1586, |
| "step": 1209 |
| }, |
| { |
| "epoch": 0.41955617198335643, |
| "grad_norm": 1.9559741020202637, |
| "learning_rate": 1e-06, |
| "loss": 0.1896, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.4199029126213592, |
| "grad_norm": 4.545790672302246, |
| "learning_rate": 1e-06, |
| "loss": 0.1508, |
| "step": 1211 |
| }, |
| { |
| "epoch": 0.420249653259362, |
| "grad_norm": 4.00286340713501, |
| "learning_rate": 1e-06, |
| "loss": 0.142, |
| "step": 1212 |
| }, |
| { |
| "epoch": 0.42059639389736475, |
| "grad_norm": 5.119708061218262, |
| "learning_rate": 1e-06, |
| "loss": 0.1608, |
| "step": 1213 |
| }, |
| { |
| "epoch": 0.4209431345353675, |
| "grad_norm": 1.9498828649520874, |
| "learning_rate": 1e-06, |
| "loss": 0.1715, |
| "step": 1214 |
| }, |
| { |
| "epoch": 0.4212898751733703, |
| "grad_norm": 3.187913417816162, |
| "learning_rate": 1e-06, |
| "loss": 0.1756, |
| "step": 1215 |
| }, |
| { |
| "epoch": 0.42163661581137307, |
| "grad_norm": 2.6275475025177, |
| "learning_rate": 1e-06, |
| "loss": 0.1494, |
| "step": 1216 |
| }, |
| { |
| "epoch": 0.42198335644937585, |
| "grad_norm": 4.876014232635498, |
| "learning_rate": 1e-06, |
| "loss": 0.2037, |
| "step": 1217 |
| }, |
| { |
| "epoch": 0.4223300970873786, |
| "grad_norm": 1.6075856685638428, |
| "learning_rate": 1e-06, |
| "loss": 0.1337, |
| "step": 1218 |
| }, |
| { |
| "epoch": 0.4226768377253814, |
| "grad_norm": 2.4928295612335205, |
| "learning_rate": 1e-06, |
| "loss": 0.1601, |
| "step": 1219 |
| }, |
| { |
| "epoch": 0.42302357836338417, |
| "grad_norm": 2.0485427379608154, |
| "learning_rate": 1e-06, |
| "loss": 0.1533, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.42337031900138694, |
| "grad_norm": 2.0547242164611816, |
| "learning_rate": 1e-06, |
| "loss": 0.1723, |
| "step": 1221 |
| }, |
| { |
| "epoch": 0.4237170596393897, |
| "grad_norm": 1.4388387203216553, |
| "learning_rate": 1e-06, |
| "loss": 0.1404, |
| "step": 1222 |
| }, |
| { |
| "epoch": 0.4240638002773925, |
| "grad_norm": 3.0887644290924072, |
| "learning_rate": 1e-06, |
| "loss": 0.185, |
| "step": 1223 |
| }, |
| { |
| "epoch": 0.42441054091539526, |
| "grad_norm": 2.907358169555664, |
| "learning_rate": 1e-06, |
| "loss": 0.1759, |
| "step": 1224 |
| }, |
| { |
| "epoch": 0.42475728155339804, |
| "grad_norm": 1.9200416803359985, |
| "learning_rate": 1e-06, |
| "loss": 0.1861, |
| "step": 1225 |
| }, |
| { |
| "epoch": 0.4251040221914008, |
| "grad_norm": 3.701133966445923, |
| "learning_rate": 1e-06, |
| "loss": 0.1473, |
| "step": 1226 |
| }, |
| { |
| "epoch": 0.4254507628294036, |
| "grad_norm": 1.728384256362915, |
| "learning_rate": 1e-06, |
| "loss": 0.1704, |
| "step": 1227 |
| }, |
| { |
| "epoch": 0.42579750346740636, |
| "grad_norm": 2.6459665298461914, |
| "learning_rate": 1e-06, |
| "loss": 0.1521, |
| "step": 1228 |
| }, |
| { |
| "epoch": 0.42614424410540913, |
| "grad_norm": 3.3006246089935303, |
| "learning_rate": 1e-06, |
| "loss": 0.1552, |
| "step": 1229 |
| }, |
| { |
| "epoch": 0.4264909847434119, |
| "grad_norm": 1.900184154510498, |
| "learning_rate": 1e-06, |
| "loss": 0.1576, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.4268377253814147, |
| "grad_norm": 1.7001935243606567, |
| "learning_rate": 1e-06, |
| "loss": 0.1413, |
| "step": 1231 |
| }, |
| { |
| "epoch": 0.42718446601941745, |
| "grad_norm": 2.1855411529541016, |
| "learning_rate": 1e-06, |
| "loss": 0.1584, |
| "step": 1232 |
| }, |
| { |
| "epoch": 0.4275312066574202, |
| "grad_norm": 1.9435559511184692, |
| "learning_rate": 1e-06, |
| "loss": 0.1849, |
| "step": 1233 |
| }, |
| { |
| "epoch": 0.427877947295423, |
| "grad_norm": 1.7659322023391724, |
| "learning_rate": 1e-06, |
| "loss": 0.145, |
| "step": 1234 |
| }, |
| { |
| "epoch": 0.4282246879334258, |
| "grad_norm": 2.6840810775756836, |
| "learning_rate": 1e-06, |
| "loss": 0.1511, |
| "step": 1235 |
| }, |
| { |
| "epoch": 0.42857142857142855, |
| "grad_norm": 1.843663215637207, |
| "learning_rate": 1e-06, |
| "loss": 0.1653, |
| "step": 1236 |
| }, |
| { |
| "epoch": 0.4289181692094313, |
| "grad_norm": 1.7395163774490356, |
| "learning_rate": 1e-06, |
| "loss": 0.167, |
| "step": 1237 |
| }, |
| { |
| "epoch": 0.4292649098474341, |
| "grad_norm": 5.5851664543151855, |
| "learning_rate": 1e-06, |
| "loss": 0.1967, |
| "step": 1238 |
| }, |
| { |
| "epoch": 0.42961165048543687, |
| "grad_norm": 3.6831417083740234, |
| "learning_rate": 1e-06, |
| "loss": 0.167, |
| "step": 1239 |
| }, |
| { |
| "epoch": 0.42995839112343964, |
| "grad_norm": 2.4813308715820312, |
| "learning_rate": 1e-06, |
| "loss": 0.1743, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.4303051317614424, |
| "grad_norm": 1.4829434156417847, |
| "learning_rate": 1e-06, |
| "loss": 0.1298, |
| "step": 1241 |
| }, |
| { |
| "epoch": 0.4306518723994452, |
| "grad_norm": 6.014243125915527, |
| "learning_rate": 1e-06, |
| "loss": 0.1911, |
| "step": 1242 |
| }, |
| { |
| "epoch": 0.43099861303744796, |
| "grad_norm": 3.8425610065460205, |
| "learning_rate": 1e-06, |
| "loss": 0.1447, |
| "step": 1243 |
| }, |
| { |
| "epoch": 0.43134535367545074, |
| "grad_norm": 2.8617472648620605, |
| "learning_rate": 1e-06, |
| "loss": 0.1752, |
| "step": 1244 |
| }, |
| { |
| "epoch": 0.4316920943134535, |
| "grad_norm": 1.48086678981781, |
| "learning_rate": 1e-06, |
| "loss": 0.1398, |
| "step": 1245 |
| }, |
| { |
| "epoch": 0.4320388349514563, |
| "grad_norm": 1.9533336162567139, |
| "learning_rate": 1e-06, |
| "loss": 0.1919, |
| "step": 1246 |
| }, |
| { |
| "epoch": 0.43238557558945906, |
| "grad_norm": 2.6993250846862793, |
| "learning_rate": 1e-06, |
| "loss": 0.1761, |
| "step": 1247 |
| }, |
| { |
| "epoch": 0.43273231622746183, |
| "grad_norm": 2.825752019882202, |
| "learning_rate": 1e-06, |
| "loss": 0.1489, |
| "step": 1248 |
| }, |
| { |
| "epoch": 0.4330790568654646, |
| "grad_norm": 2.8645107746124268, |
| "learning_rate": 1e-06, |
| "loss": 0.1537, |
| "step": 1249 |
| }, |
| { |
| "epoch": 0.4334257975034674, |
| "grad_norm": 1.9935320615768433, |
| "learning_rate": 1e-06, |
| "loss": 0.1725, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.43377253814147015, |
| "grad_norm": 3.51171612739563, |
| "learning_rate": 1e-06, |
| "loss": 0.1773, |
| "step": 1251 |
| }, |
| { |
| "epoch": 0.43411927877947293, |
| "grad_norm": 1.9769134521484375, |
| "learning_rate": 1e-06, |
| "loss": 0.1814, |
| "step": 1252 |
| }, |
| { |
| "epoch": 0.4344660194174757, |
| "grad_norm": 2.077732801437378, |
| "learning_rate": 1e-06, |
| "loss": 0.1867, |
| "step": 1253 |
| }, |
| { |
| "epoch": 0.4348127600554785, |
| "grad_norm": 1.734952688217163, |
| "learning_rate": 1e-06, |
| "loss": 0.1546, |
| "step": 1254 |
| }, |
| { |
| "epoch": 0.43515950069348125, |
| "grad_norm": 1.789803385734558, |
| "learning_rate": 1e-06, |
| "loss": 0.1589, |
| "step": 1255 |
| }, |
| { |
| "epoch": 0.435506241331484, |
| "grad_norm": 2.2564432621002197, |
| "learning_rate": 1e-06, |
| "loss": 0.2033, |
| "step": 1256 |
| }, |
| { |
| "epoch": 0.4358529819694868, |
| "grad_norm": 2.3573691844940186, |
| "learning_rate": 1e-06, |
| "loss": 0.2015, |
| "step": 1257 |
| }, |
| { |
| "epoch": 0.43619972260748957, |
| "grad_norm": 1.9252046346664429, |
| "learning_rate": 1e-06, |
| "loss": 0.1751, |
| "step": 1258 |
| }, |
| { |
| "epoch": 0.43654646324549234, |
| "grad_norm": 1.6210649013519287, |
| "learning_rate": 1e-06, |
| "loss": 0.1694, |
| "step": 1259 |
| }, |
| { |
| "epoch": 0.4368932038834951, |
| "grad_norm": 2.1824262142181396, |
| "learning_rate": 1e-06, |
| "loss": 0.1634, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.4372399445214979, |
| "grad_norm": 1.7464162111282349, |
| "learning_rate": 1e-06, |
| "loss": 0.1703, |
| "step": 1261 |
| }, |
| { |
| "epoch": 0.4375866851595007, |
| "grad_norm": 2.9391233921051025, |
| "learning_rate": 1e-06, |
| "loss": 0.212, |
| "step": 1262 |
| }, |
| { |
| "epoch": 0.4379334257975035, |
| "grad_norm": 5.7660722732543945, |
| "learning_rate": 1e-06, |
| "loss": 0.1808, |
| "step": 1263 |
| }, |
| { |
| "epoch": 0.43828016643550627, |
| "grad_norm": 1.7276206016540527, |
| "learning_rate": 1e-06, |
| "loss": 0.1597, |
| "step": 1264 |
| }, |
| { |
| "epoch": 0.43862690707350904, |
| "grad_norm": 1.5537587404251099, |
| "learning_rate": 1e-06, |
| "loss": 0.1627, |
| "step": 1265 |
| }, |
| { |
| "epoch": 0.4389736477115118, |
| "grad_norm": 2.254763126373291, |
| "learning_rate": 1e-06, |
| "loss": 0.163, |
| "step": 1266 |
| }, |
| { |
| "epoch": 0.4393203883495146, |
| "grad_norm": 1.903600811958313, |
| "learning_rate": 1e-06, |
| "loss": 0.1533, |
| "step": 1267 |
| }, |
| { |
| "epoch": 0.43966712898751736, |
| "grad_norm": 2.922935962677002, |
| "learning_rate": 1e-06, |
| "loss": 0.1979, |
| "step": 1268 |
| }, |
| { |
| "epoch": 0.44001386962552014, |
| "grad_norm": 4.926792621612549, |
| "learning_rate": 1e-06, |
| "loss": 0.1434, |
| "step": 1269 |
| }, |
| { |
| "epoch": 0.4403606102635229, |
| "grad_norm": 2.8258774280548096, |
| "learning_rate": 1e-06, |
| "loss": 0.1578, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.4407073509015257, |
| "grad_norm": 3.5319342613220215, |
| "learning_rate": 1e-06, |
| "loss": 0.1085, |
| "step": 1271 |
| }, |
| { |
| "epoch": 0.44105409153952846, |
| "grad_norm": 1.9517085552215576, |
| "learning_rate": 1e-06, |
| "loss": 0.1877, |
| "step": 1272 |
| }, |
| { |
| "epoch": 0.44140083217753123, |
| "grad_norm": 1.7757554054260254, |
| "learning_rate": 1e-06, |
| "loss": 0.1774, |
| "step": 1273 |
| }, |
| { |
| "epoch": 0.441747572815534, |
| "grad_norm": 2.927002191543579, |
| "learning_rate": 1e-06, |
| "loss": 0.1516, |
| "step": 1274 |
| }, |
| { |
| "epoch": 0.4420943134535368, |
| "grad_norm": 2.922168493270874, |
| "learning_rate": 1e-06, |
| "loss": 0.1405, |
| "step": 1275 |
| }, |
| { |
| "epoch": 0.44244105409153955, |
| "grad_norm": 1.9187227487564087, |
| "learning_rate": 1e-06, |
| "loss": 0.1424, |
| "step": 1276 |
| }, |
| { |
| "epoch": 0.44278779472954233, |
| "grad_norm": 3.046382427215576, |
| "learning_rate": 1e-06, |
| "loss": 0.1587, |
| "step": 1277 |
| }, |
| { |
| "epoch": 0.4431345353675451, |
| "grad_norm": 2.2099106311798096, |
| "learning_rate": 1e-06, |
| "loss": 0.1518, |
| "step": 1278 |
| }, |
| { |
| "epoch": 0.4434812760055479, |
| "grad_norm": 3.655850887298584, |
| "learning_rate": 1e-06, |
| "loss": 0.1733, |
| "step": 1279 |
| }, |
| { |
| "epoch": 0.44382801664355065, |
| "grad_norm": 3.842125415802002, |
| "learning_rate": 1e-06, |
| "loss": 0.1927, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.4441747572815534, |
| "grad_norm": 1.9096598625183105, |
| "learning_rate": 1e-06, |
| "loss": 0.128, |
| "step": 1281 |
| }, |
| { |
| "epoch": 0.4445214979195562, |
| "grad_norm": 3.1731748580932617, |
| "learning_rate": 1e-06, |
| "loss": 0.1563, |
| "step": 1282 |
| }, |
| { |
| "epoch": 0.44486823855755897, |
| "grad_norm": 2.3779473304748535, |
| "learning_rate": 1e-06, |
| "loss": 0.171, |
| "step": 1283 |
| }, |
| { |
| "epoch": 0.44521497919556174, |
| "grad_norm": 4.150517463684082, |
| "learning_rate": 1e-06, |
| "loss": 0.1452, |
| "step": 1284 |
| }, |
| { |
| "epoch": 0.4455617198335645, |
| "grad_norm": 3.774120807647705, |
| "learning_rate": 1e-06, |
| "loss": 0.1926, |
| "step": 1285 |
| }, |
| { |
| "epoch": 0.4459084604715673, |
| "grad_norm": 3.833528995513916, |
| "learning_rate": 1e-06, |
| "loss": 0.1771, |
| "step": 1286 |
| }, |
| { |
| "epoch": 0.44625520110957007, |
| "grad_norm": 3.3106303215026855, |
| "learning_rate": 1e-06, |
| "loss": 0.1918, |
| "step": 1287 |
| }, |
| { |
| "epoch": 0.44660194174757284, |
| "grad_norm": 3.2806928157806396, |
| "learning_rate": 1e-06, |
| "loss": 0.147, |
| "step": 1288 |
| }, |
| { |
| "epoch": 0.4469486823855756, |
| "grad_norm": 2.7862508296966553, |
| "learning_rate": 1e-06, |
| "loss": 0.1768, |
| "step": 1289 |
| }, |
| { |
| "epoch": 0.4472954230235784, |
| "grad_norm": 2.7099242210388184, |
| "learning_rate": 1e-06, |
| "loss": 0.2095, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.44764216366158116, |
| "grad_norm": 3.24961256980896, |
| "learning_rate": 1e-06, |
| "loss": 0.171, |
| "step": 1291 |
| }, |
| { |
| "epoch": 0.44798890429958393, |
| "grad_norm": 3.220797061920166, |
| "learning_rate": 1e-06, |
| "loss": 0.1641, |
| "step": 1292 |
| }, |
| { |
| "epoch": 0.4483356449375867, |
| "grad_norm": 4.3876237869262695, |
| "learning_rate": 1e-06, |
| "loss": 0.1526, |
| "step": 1293 |
| }, |
| { |
| "epoch": 0.4486823855755895, |
| "grad_norm": 2.9639668464660645, |
| "learning_rate": 1e-06, |
| "loss": 0.1649, |
| "step": 1294 |
| }, |
| { |
| "epoch": 0.44902912621359226, |
| "grad_norm": 2.0333526134490967, |
| "learning_rate": 1e-06, |
| "loss": 0.1648, |
| "step": 1295 |
| }, |
| { |
| "epoch": 0.44937586685159503, |
| "grad_norm": 3.781501531600952, |
| "learning_rate": 1e-06, |
| "loss": 0.1571, |
| "step": 1296 |
| }, |
| { |
| "epoch": 0.4497226074895978, |
| "grad_norm": 1.5544551610946655, |
| "learning_rate": 1e-06, |
| "loss": 0.1496, |
| "step": 1297 |
| }, |
| { |
| "epoch": 0.4500693481276006, |
| "grad_norm": 5.4716596603393555, |
| "learning_rate": 1e-06, |
| "loss": 0.199, |
| "step": 1298 |
| }, |
| { |
| "epoch": 0.45041608876560335, |
| "grad_norm": 1.998415470123291, |
| "learning_rate": 1e-06, |
| "loss": 0.1236, |
| "step": 1299 |
| }, |
| { |
| "epoch": 0.4507628294036061, |
| "grad_norm": 5.753319263458252, |
| "learning_rate": 1e-06, |
| "loss": 0.2119, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.4511095700416089, |
| "grad_norm": 3.596370220184326, |
| "learning_rate": 1e-06, |
| "loss": 0.1901, |
| "step": 1301 |
| }, |
| { |
| "epoch": 0.45145631067961167, |
| "grad_norm": 3.2227611541748047, |
| "learning_rate": 1e-06, |
| "loss": 0.1738, |
| "step": 1302 |
| }, |
| { |
| "epoch": 0.45180305131761445, |
| "grad_norm": 3.6115000247955322, |
| "learning_rate": 1e-06, |
| "loss": 0.2224, |
| "step": 1303 |
| }, |
| { |
| "epoch": 0.4521497919556172, |
| "grad_norm": 3.304997682571411, |
| "learning_rate": 1e-06, |
| "loss": 0.1662, |
| "step": 1304 |
| }, |
| { |
| "epoch": 0.45249653259362, |
| "grad_norm": 2.065157175064087, |
| "learning_rate": 1e-06, |
| "loss": 0.1958, |
| "step": 1305 |
| }, |
| { |
| "epoch": 0.45284327323162277, |
| "grad_norm": 3.3705663681030273, |
| "learning_rate": 1e-06, |
| "loss": 0.133, |
| "step": 1306 |
| }, |
| { |
| "epoch": 0.45319001386962554, |
| "grad_norm": 3.053819417953491, |
| "learning_rate": 1e-06, |
| "loss": 0.1409, |
| "step": 1307 |
| }, |
| { |
| "epoch": 0.4535367545076283, |
| "grad_norm": 2.586345911026001, |
| "learning_rate": 1e-06, |
| "loss": 0.1698, |
| "step": 1308 |
| }, |
| { |
| "epoch": 0.4538834951456311, |
| "grad_norm": 1.9915062189102173, |
| "learning_rate": 1e-06, |
| "loss": 0.1884, |
| "step": 1309 |
| }, |
| { |
| "epoch": 0.45423023578363386, |
| "grad_norm": 2.1440911293029785, |
| "learning_rate": 1e-06, |
| "loss": 0.1587, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.45457697642163664, |
| "grad_norm": 2.2233524322509766, |
| "learning_rate": 1e-06, |
| "loss": 0.1383, |
| "step": 1311 |
| }, |
| { |
| "epoch": 0.4549237170596394, |
| "grad_norm": 1.7290481328964233, |
| "learning_rate": 1e-06, |
| "loss": 0.1783, |
| "step": 1312 |
| }, |
| { |
| "epoch": 0.4552704576976422, |
| "grad_norm": 1.7241450548171997, |
| "learning_rate": 1e-06, |
| "loss": 0.1505, |
| "step": 1313 |
| }, |
| { |
| "epoch": 0.45561719833564496, |
| "grad_norm": 2.3768575191497803, |
| "learning_rate": 1e-06, |
| "loss": 0.1672, |
| "step": 1314 |
| }, |
| { |
| "epoch": 0.45596393897364773, |
| "grad_norm": 2.1226019859313965, |
| "learning_rate": 1e-06, |
| "loss": 0.1497, |
| "step": 1315 |
| }, |
| { |
| "epoch": 0.4563106796116505, |
| "grad_norm": 1.9038270711898804, |
| "learning_rate": 1e-06, |
| "loss": 0.1664, |
| "step": 1316 |
| }, |
| { |
| "epoch": 0.4566574202496533, |
| "grad_norm": 1.9552680253982544, |
| "learning_rate": 1e-06, |
| "loss": 0.1473, |
| "step": 1317 |
| }, |
| { |
| "epoch": 0.45700416088765605, |
| "grad_norm": 1.8354042768478394, |
| "learning_rate": 1e-06, |
| "loss": 0.1628, |
| "step": 1318 |
| }, |
| { |
| "epoch": 0.4573509015256588, |
| "grad_norm": 2.84702467918396, |
| "learning_rate": 1e-06, |
| "loss": 0.2017, |
| "step": 1319 |
| }, |
| { |
| "epoch": 0.4576976421636616, |
| "grad_norm": 2.1125497817993164, |
| "learning_rate": 1e-06, |
| "loss": 0.1753, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.4580443828016644, |
| "grad_norm": 1.6211894750595093, |
| "learning_rate": 1e-06, |
| "loss": 0.1473, |
| "step": 1321 |
| }, |
| { |
| "epoch": 0.45839112343966715, |
| "grad_norm": 3.268822431564331, |
| "learning_rate": 1e-06, |
| "loss": 0.1463, |
| "step": 1322 |
| }, |
| { |
| "epoch": 0.4587378640776699, |
| "grad_norm": 1.8956842422485352, |
| "learning_rate": 1e-06, |
| "loss": 0.1445, |
| "step": 1323 |
| }, |
| { |
| "epoch": 0.4590846047156727, |
| "grad_norm": 3.552382230758667, |
| "learning_rate": 1e-06, |
| "loss": 0.1357, |
| "step": 1324 |
| }, |
| { |
| "epoch": 0.45943134535367547, |
| "grad_norm": 1.8421423435211182, |
| "learning_rate": 1e-06, |
| "loss": 0.1479, |
| "step": 1325 |
| }, |
| { |
| "epoch": 0.45977808599167824, |
| "grad_norm": 4.14149808883667, |
| "learning_rate": 1e-06, |
| "loss": 0.1504, |
| "step": 1326 |
| }, |
| { |
| "epoch": 0.460124826629681, |
| "grad_norm": 1.7335238456726074, |
| "learning_rate": 1e-06, |
| "loss": 0.1548, |
| "step": 1327 |
| }, |
| { |
| "epoch": 0.4604715672676838, |
| "grad_norm": 4.671210289001465, |
| "learning_rate": 1e-06, |
| "loss": 0.2263, |
| "step": 1328 |
| }, |
| { |
| "epoch": 0.46081830790568656, |
| "grad_norm": 2.813931941986084, |
| "learning_rate": 1e-06, |
| "loss": 0.1668, |
| "step": 1329 |
| }, |
| { |
| "epoch": 0.46116504854368934, |
| "grad_norm": 2.9509613513946533, |
| "learning_rate": 1e-06, |
| "loss": 0.1484, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.4615117891816921, |
| "grad_norm": 2.949826240539551, |
| "learning_rate": 1e-06, |
| "loss": 0.1827, |
| "step": 1331 |
| }, |
| { |
| "epoch": 0.4618585298196949, |
| "grad_norm": 1.8698536157608032, |
| "learning_rate": 1e-06, |
| "loss": 0.168, |
| "step": 1332 |
| }, |
| { |
| "epoch": 0.46220527045769766, |
| "grad_norm": 3.174593210220337, |
| "learning_rate": 1e-06, |
| "loss": 0.2109, |
| "step": 1333 |
| }, |
| { |
| "epoch": 0.46255201109570043, |
| "grad_norm": 1.977563738822937, |
| "learning_rate": 1e-06, |
| "loss": 0.1426, |
| "step": 1334 |
| }, |
| { |
| "epoch": 0.4628987517337032, |
| "grad_norm": 1.8068509101867676, |
| "learning_rate": 1e-06, |
| "loss": 0.1172, |
| "step": 1335 |
| }, |
| { |
| "epoch": 0.463245492371706, |
| "grad_norm": 2.076296329498291, |
| "learning_rate": 1e-06, |
| "loss": 0.1608, |
| "step": 1336 |
| }, |
| { |
| "epoch": 0.46359223300970875, |
| "grad_norm": 2.3091671466827393, |
| "learning_rate": 1e-06, |
| "loss": 0.1665, |
| "step": 1337 |
| }, |
| { |
| "epoch": 0.46393897364771153, |
| "grad_norm": 1.894162654876709, |
| "learning_rate": 1e-06, |
| "loss": 0.1522, |
| "step": 1338 |
| }, |
| { |
| "epoch": 0.4642857142857143, |
| "grad_norm": 1.876305341720581, |
| "learning_rate": 1e-06, |
| "loss": 0.1444, |
| "step": 1339 |
| }, |
| { |
| "epoch": 0.4646324549237171, |
| "grad_norm": 2.8168094158172607, |
| "learning_rate": 1e-06, |
| "loss": 0.1686, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.46497919556171985, |
| "grad_norm": 2.988266706466675, |
| "learning_rate": 1e-06, |
| "loss": 0.1545, |
| "step": 1341 |
| }, |
| { |
| "epoch": 0.4653259361997226, |
| "grad_norm": 1.7541871070861816, |
| "learning_rate": 1e-06, |
| "loss": 0.1423, |
| "step": 1342 |
| }, |
| { |
| "epoch": 0.4656726768377254, |
| "grad_norm": 4.07562255859375, |
| "learning_rate": 1e-06, |
| "loss": 0.1447, |
| "step": 1343 |
| }, |
| { |
| "epoch": 0.46601941747572817, |
| "grad_norm": 4.45770263671875, |
| "learning_rate": 1e-06, |
| "loss": 0.1639, |
| "step": 1344 |
| }, |
| { |
| "epoch": 0.46636615811373094, |
| "grad_norm": 1.9045706987380981, |
| "learning_rate": 1e-06, |
| "loss": 0.1765, |
| "step": 1345 |
| }, |
| { |
| "epoch": 0.4667128987517337, |
| "grad_norm": 1.927842378616333, |
| "learning_rate": 1e-06, |
| "loss": 0.1485, |
| "step": 1346 |
| }, |
| { |
| "epoch": 0.4670596393897365, |
| "grad_norm": 7.443863868713379, |
| "learning_rate": 1e-06, |
| "loss": 0.1642, |
| "step": 1347 |
| }, |
| { |
| "epoch": 0.46740638002773927, |
| "grad_norm": 2.3793625831604004, |
| "learning_rate": 1e-06, |
| "loss": 0.1906, |
| "step": 1348 |
| }, |
| { |
| "epoch": 0.46775312066574204, |
| "grad_norm": 2.352898120880127, |
| "learning_rate": 1e-06, |
| "loss": 0.1796, |
| "step": 1349 |
| }, |
| { |
| "epoch": 0.4680998613037448, |
| "grad_norm": 3.9866578578948975, |
| "learning_rate": 1e-06, |
| "loss": 0.1445, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.4684466019417476, |
| "grad_norm": 1.6399085521697998, |
| "learning_rate": 1e-06, |
| "loss": 0.1484, |
| "step": 1351 |
| }, |
| { |
| "epoch": 0.46879334257975036, |
| "grad_norm": 1.9889776706695557, |
| "learning_rate": 1e-06, |
| "loss": 0.174, |
| "step": 1352 |
| }, |
| { |
| "epoch": 0.46914008321775313, |
| "grad_norm": 2.0575404167175293, |
| "learning_rate": 1e-06, |
| "loss": 0.1832, |
| "step": 1353 |
| }, |
| { |
| "epoch": 0.4694868238557559, |
| "grad_norm": 2.2382092475891113, |
| "learning_rate": 1e-06, |
| "loss": 0.1551, |
| "step": 1354 |
| }, |
| { |
| "epoch": 0.4698335644937587, |
| "grad_norm": 2.102724552154541, |
| "learning_rate": 1e-06, |
| "loss": 0.1566, |
| "step": 1355 |
| }, |
| { |
| "epoch": 0.47018030513176146, |
| "grad_norm": 2.692265033721924, |
| "learning_rate": 1e-06, |
| "loss": 0.1921, |
| "step": 1356 |
| }, |
| { |
| "epoch": 0.47052704576976423, |
| "grad_norm": 1.9280555248260498, |
| "learning_rate": 1e-06, |
| "loss": 0.1678, |
| "step": 1357 |
| }, |
| { |
| "epoch": 0.470873786407767, |
| "grad_norm": 1.9627320766448975, |
| "learning_rate": 1e-06, |
| "loss": 0.1591, |
| "step": 1358 |
| }, |
| { |
| "epoch": 0.4712205270457698, |
| "grad_norm": 1.7975282669067383, |
| "learning_rate": 1e-06, |
| "loss": 0.1312, |
| "step": 1359 |
| }, |
| { |
| "epoch": 0.47156726768377255, |
| "grad_norm": 3.5527241230010986, |
| "learning_rate": 1e-06, |
| "loss": 0.1574, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.4719140083217753, |
| "grad_norm": 3.669248104095459, |
| "learning_rate": 1e-06, |
| "loss": 0.2004, |
| "step": 1361 |
| }, |
| { |
| "epoch": 0.4722607489597781, |
| "grad_norm": 1.9283638000488281, |
| "learning_rate": 1e-06, |
| "loss": 0.1776, |
| "step": 1362 |
| }, |
| { |
| "epoch": 0.47260748959778087, |
| "grad_norm": 1.8212238550186157, |
| "learning_rate": 1e-06, |
| "loss": 0.1401, |
| "step": 1363 |
| }, |
| { |
| "epoch": 0.47295423023578365, |
| "grad_norm": 3.3076553344726562, |
| "learning_rate": 1e-06, |
| "loss": 0.1734, |
| "step": 1364 |
| }, |
| { |
| "epoch": 0.4733009708737864, |
| "grad_norm": 2.9192631244659424, |
| "learning_rate": 1e-06, |
| "loss": 0.1458, |
| "step": 1365 |
| }, |
| { |
| "epoch": 0.4736477115117892, |
| "grad_norm": 5.290735244750977, |
| "learning_rate": 1e-06, |
| "loss": 0.18, |
| "step": 1366 |
| }, |
| { |
| "epoch": 0.47399445214979197, |
| "grad_norm": 3.645962953567505, |
| "learning_rate": 1e-06, |
| "loss": 0.1608, |
| "step": 1367 |
| }, |
| { |
| "epoch": 0.47434119278779474, |
| "grad_norm": 4.024303913116455, |
| "learning_rate": 1e-06, |
| "loss": 0.1925, |
| "step": 1368 |
| }, |
| { |
| "epoch": 0.4746879334257975, |
| "grad_norm": 2.006380081176758, |
| "learning_rate": 1e-06, |
| "loss": 0.1215, |
| "step": 1369 |
| }, |
| { |
| "epoch": 0.4750346740638003, |
| "grad_norm": 3.3509087562561035, |
| "learning_rate": 1e-06, |
| "loss": 0.1481, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.47538141470180306, |
| "grad_norm": 1.8464128971099854, |
| "learning_rate": 1e-06, |
| "loss": 0.1433, |
| "step": 1371 |
| }, |
| { |
| "epoch": 0.47572815533980584, |
| "grad_norm": 2.2263832092285156, |
| "learning_rate": 1e-06, |
| "loss": 0.1968, |
| "step": 1372 |
| }, |
| { |
| "epoch": 0.4760748959778086, |
| "grad_norm": 2.3892805576324463, |
| "learning_rate": 1e-06, |
| "loss": 0.1736, |
| "step": 1373 |
| }, |
| { |
| "epoch": 0.4764216366158114, |
| "grad_norm": 2.452542781829834, |
| "learning_rate": 1e-06, |
| "loss": 0.1808, |
| "step": 1374 |
| }, |
| { |
| "epoch": 0.47676837725381416, |
| "grad_norm": 2.9188780784606934, |
| "learning_rate": 1e-06, |
| "loss": 0.1254, |
| "step": 1375 |
| }, |
| { |
| "epoch": 0.47711511789181693, |
| "grad_norm": 1.651590347290039, |
| "learning_rate": 1e-06, |
| "loss": 0.1679, |
| "step": 1376 |
| }, |
| { |
| "epoch": 0.4774618585298197, |
| "grad_norm": 1.9459658861160278, |
| "learning_rate": 1e-06, |
| "loss": 0.1622, |
| "step": 1377 |
| }, |
| { |
| "epoch": 0.4778085991678225, |
| "grad_norm": 2.0248286724090576, |
| "learning_rate": 1e-06, |
| "loss": 0.1365, |
| "step": 1378 |
| }, |
| { |
| "epoch": 0.47815533980582525, |
| "grad_norm": 4.144098281860352, |
| "learning_rate": 1e-06, |
| "loss": 0.1347, |
| "step": 1379 |
| }, |
| { |
| "epoch": 0.478502080443828, |
| "grad_norm": 1.46219801902771, |
| "learning_rate": 1e-06, |
| "loss": 0.1468, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.4788488210818308, |
| "grad_norm": 2.1017038822174072, |
| "learning_rate": 1e-06, |
| "loss": 0.1694, |
| "step": 1381 |
| }, |
| { |
| "epoch": 0.4791955617198336, |
| "grad_norm": 2.1770832538604736, |
| "learning_rate": 1e-06, |
| "loss": 0.1829, |
| "step": 1382 |
| }, |
| { |
| "epoch": 0.47954230235783635, |
| "grad_norm": 3.3750691413879395, |
| "learning_rate": 1e-06, |
| "loss": 0.1604, |
| "step": 1383 |
| }, |
| { |
| "epoch": 0.4798890429958391, |
| "grad_norm": 2.4006078243255615, |
| "learning_rate": 1e-06, |
| "loss": 0.1437, |
| "step": 1384 |
| }, |
| { |
| "epoch": 0.4802357836338419, |
| "grad_norm": 2.0251758098602295, |
| "learning_rate": 1e-06, |
| "loss": 0.163, |
| "step": 1385 |
| }, |
| { |
| "epoch": 0.48058252427184467, |
| "grad_norm": 1.4575427770614624, |
| "learning_rate": 1e-06, |
| "loss": 0.144, |
| "step": 1386 |
| }, |
| { |
| "epoch": 0.48092926490984744, |
| "grad_norm": 2.5154523849487305, |
| "learning_rate": 1e-06, |
| "loss": 0.1608, |
| "step": 1387 |
| }, |
| { |
| "epoch": 0.4812760055478502, |
| "grad_norm": 2.151294231414795, |
| "learning_rate": 1e-06, |
| "loss": 0.1779, |
| "step": 1388 |
| }, |
| { |
| "epoch": 0.481622746185853, |
| "grad_norm": 2.023602247238159, |
| "learning_rate": 1e-06, |
| "loss": 0.1693, |
| "step": 1389 |
| }, |
| { |
| "epoch": 0.48196948682385576, |
| "grad_norm": 2.615605115890503, |
| "learning_rate": 1e-06, |
| "loss": 0.1406, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.48231622746185854, |
| "grad_norm": 1.9372549057006836, |
| "learning_rate": 1e-06, |
| "loss": 0.1482, |
| "step": 1391 |
| }, |
| { |
| "epoch": 0.4826629680998613, |
| "grad_norm": 2.0867562294006348, |
| "learning_rate": 1e-06, |
| "loss": 0.1745, |
| "step": 1392 |
| }, |
| { |
| "epoch": 0.4830097087378641, |
| "grad_norm": 2.231527090072632, |
| "learning_rate": 1e-06, |
| "loss": 0.1585, |
| "step": 1393 |
| }, |
| { |
| "epoch": 0.48335644937586686, |
| "grad_norm": 1.7819068431854248, |
| "learning_rate": 1e-06, |
| "loss": 0.1544, |
| "step": 1394 |
| }, |
| { |
| "epoch": 0.48370319001386963, |
| "grad_norm": 2.0249552726745605, |
| "learning_rate": 1e-06, |
| "loss": 0.1983, |
| "step": 1395 |
| }, |
| { |
| "epoch": 0.4840499306518724, |
| "grad_norm": 2.4923202991485596, |
| "learning_rate": 1e-06, |
| "loss": 0.1355, |
| "step": 1396 |
| }, |
| { |
| "epoch": 0.4843966712898752, |
| "grad_norm": 1.6963531970977783, |
| "learning_rate": 1e-06, |
| "loss": 0.1464, |
| "step": 1397 |
| }, |
| { |
| "epoch": 0.48474341192787795, |
| "grad_norm": 2.0951385498046875, |
| "learning_rate": 1e-06, |
| "loss": 0.1533, |
| "step": 1398 |
| }, |
| { |
| "epoch": 0.48509015256588073, |
| "grad_norm": 3.5178959369659424, |
| "learning_rate": 1e-06, |
| "loss": 0.1273, |
| "step": 1399 |
| }, |
| { |
| "epoch": 0.4854368932038835, |
| "grad_norm": 2.0446174144744873, |
| "learning_rate": 1e-06, |
| "loss": 0.1815, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.4857836338418863, |
| "grad_norm": 4.695765972137451, |
| "learning_rate": 1e-06, |
| "loss": 0.1558, |
| "step": 1401 |
| }, |
| { |
| "epoch": 0.48613037447988905, |
| "grad_norm": 2.182650089263916, |
| "learning_rate": 1e-06, |
| "loss": 0.1541, |
| "step": 1402 |
| }, |
| { |
| "epoch": 0.4864771151178918, |
| "grad_norm": 2.1337432861328125, |
| "learning_rate": 1e-06, |
| "loss": 0.157, |
| "step": 1403 |
| }, |
| { |
| "epoch": 0.4868238557558946, |
| "grad_norm": 2.5917625427246094, |
| "learning_rate": 1e-06, |
| "loss": 0.1789, |
| "step": 1404 |
| }, |
| { |
| "epoch": 0.48717059639389737, |
| "grad_norm": 1.9805132150650024, |
| "learning_rate": 1e-06, |
| "loss": 0.1449, |
| "step": 1405 |
| }, |
| { |
| "epoch": 0.48751733703190014, |
| "grad_norm": 2.243947982788086, |
| "learning_rate": 1e-06, |
| "loss": 0.1448, |
| "step": 1406 |
| }, |
| { |
| "epoch": 0.4878640776699029, |
| "grad_norm": 1.755910873413086, |
| "learning_rate": 1e-06, |
| "loss": 0.1519, |
| "step": 1407 |
| }, |
| { |
| "epoch": 0.4882108183079057, |
| "grad_norm": 1.5781508684158325, |
| "learning_rate": 1e-06, |
| "loss": 0.1566, |
| "step": 1408 |
| }, |
| { |
| "epoch": 0.48855755894590847, |
| "grad_norm": 1.7904160022735596, |
| "learning_rate": 1e-06, |
| "loss": 0.1246, |
| "step": 1409 |
| }, |
| { |
| "epoch": 0.48890429958391124, |
| "grad_norm": 3.9564223289489746, |
| "learning_rate": 1e-06, |
| "loss": 0.1654, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.489251040221914, |
| "grad_norm": 1.6851087808609009, |
| "learning_rate": 1e-06, |
| "loss": 0.1368, |
| "step": 1411 |
| }, |
| { |
| "epoch": 0.4895977808599168, |
| "grad_norm": 3.7889657020568848, |
| "learning_rate": 1e-06, |
| "loss": 0.1632, |
| "step": 1412 |
| }, |
| { |
| "epoch": 0.48994452149791956, |
| "grad_norm": 4.374680995941162, |
| "learning_rate": 1e-06, |
| "loss": 0.1564, |
| "step": 1413 |
| }, |
| { |
| "epoch": 0.49029126213592233, |
| "grad_norm": 3.0737457275390625, |
| "learning_rate": 1e-06, |
| "loss": 0.1501, |
| "step": 1414 |
| }, |
| { |
| "epoch": 0.4906380027739251, |
| "grad_norm": 2.6151061058044434, |
| "learning_rate": 1e-06, |
| "loss": 0.1595, |
| "step": 1415 |
| }, |
| { |
| "epoch": 0.4909847434119279, |
| "grad_norm": 2.651824951171875, |
| "learning_rate": 1e-06, |
| "loss": 0.1469, |
| "step": 1416 |
| }, |
| { |
| "epoch": 0.49133148404993066, |
| "grad_norm": 2.8488190174102783, |
| "learning_rate": 1e-06, |
| "loss": 0.1519, |
| "step": 1417 |
| }, |
| { |
| "epoch": 0.49167822468793343, |
| "grad_norm": 2.576077699661255, |
| "learning_rate": 1e-06, |
| "loss": 0.153, |
| "step": 1418 |
| }, |
| { |
| "epoch": 0.4920249653259362, |
| "grad_norm": 1.594548225402832, |
| "learning_rate": 1e-06, |
| "loss": 0.1542, |
| "step": 1419 |
| }, |
| { |
| "epoch": 0.492371705963939, |
| "grad_norm": 2.900624990463257, |
| "learning_rate": 1e-06, |
| "loss": 0.1542, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.49271844660194175, |
| "grad_norm": 3.2703654766082764, |
| "learning_rate": 1e-06, |
| "loss": 0.1285, |
| "step": 1421 |
| }, |
| { |
| "epoch": 0.4930651872399445, |
| "grad_norm": 1.6843549013137817, |
| "learning_rate": 1e-06, |
| "loss": 0.1513, |
| "step": 1422 |
| }, |
| { |
| "epoch": 0.4934119278779473, |
| "grad_norm": 1.9763399362564087, |
| "learning_rate": 1e-06, |
| "loss": 0.1504, |
| "step": 1423 |
| }, |
| { |
| "epoch": 0.49375866851595007, |
| "grad_norm": 1.569488763809204, |
| "learning_rate": 1e-06, |
| "loss": 0.1452, |
| "step": 1424 |
| }, |
| { |
| "epoch": 0.49410540915395285, |
| "grad_norm": 2.2819178104400635, |
| "learning_rate": 1e-06, |
| "loss": 0.1346, |
| "step": 1425 |
| }, |
| { |
| "epoch": 0.4944521497919556, |
| "grad_norm": 1.7116472721099854, |
| "learning_rate": 1e-06, |
| "loss": 0.1288, |
| "step": 1426 |
| }, |
| { |
| "epoch": 0.4947988904299584, |
| "grad_norm": 2.4184670448303223, |
| "learning_rate": 1e-06, |
| "loss": 0.1365, |
| "step": 1427 |
| }, |
| { |
| "epoch": 0.49514563106796117, |
| "grad_norm": 2.8264670372009277, |
| "learning_rate": 1e-06, |
| "loss": 0.1521, |
| "step": 1428 |
| }, |
| { |
| "epoch": 0.49549237170596394, |
| "grad_norm": 1.987285852432251, |
| "learning_rate": 1e-06, |
| "loss": 0.1617, |
| "step": 1429 |
| }, |
| { |
| "epoch": 0.4958391123439667, |
| "grad_norm": 2.5394463539123535, |
| "learning_rate": 1e-06, |
| "loss": 0.1418, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.4961858529819695, |
| "grad_norm": 2.5165560245513916, |
| "learning_rate": 1e-06, |
| "loss": 0.1605, |
| "step": 1431 |
| }, |
| { |
| "epoch": 0.49653259361997226, |
| "grad_norm": 2.378039836883545, |
| "learning_rate": 1e-06, |
| "loss": 0.1866, |
| "step": 1432 |
| }, |
| { |
| "epoch": 0.49687933425797504, |
| "grad_norm": 2.0523364543914795, |
| "learning_rate": 1e-06, |
| "loss": 0.1298, |
| "step": 1433 |
| }, |
| { |
| "epoch": 0.4972260748959778, |
| "grad_norm": 3.141749382019043, |
| "learning_rate": 1e-06, |
| "loss": 0.1296, |
| "step": 1434 |
| }, |
| { |
| "epoch": 0.4975728155339806, |
| "grad_norm": 2.1089577674865723, |
| "learning_rate": 1e-06, |
| "loss": 0.135, |
| "step": 1435 |
| }, |
| { |
| "epoch": 0.49791955617198336, |
| "grad_norm": 1.6610194444656372, |
| "learning_rate": 1e-06, |
| "loss": 0.1194, |
| "step": 1436 |
| }, |
| { |
| "epoch": 0.49826629680998613, |
| "grad_norm": 3.3861851692199707, |
| "learning_rate": 1e-06, |
| "loss": 0.1649, |
| "step": 1437 |
| }, |
| { |
| "epoch": 0.4986130374479889, |
| "grad_norm": 2.972479820251465, |
| "learning_rate": 1e-06, |
| "loss": 0.1565, |
| "step": 1438 |
| }, |
| { |
| "epoch": 0.4989597780859917, |
| "grad_norm": 4.443920612335205, |
| "learning_rate": 1e-06, |
| "loss": 0.1518, |
| "step": 1439 |
| }, |
| { |
| "epoch": 0.49930651872399445, |
| "grad_norm": 3.384650707244873, |
| "learning_rate": 1e-06, |
| "loss": 0.1399, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.4996532593619972, |
| "grad_norm": 2.9029901027679443, |
| "learning_rate": 1e-06, |
| "loss": 0.1523, |
| "step": 1441 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 1.911129117012024, |
| "learning_rate": 1e-06, |
| "loss": 0.1748, |
| "step": 1442 |
| }, |
| { |
| "epoch": 0.5003467406380028, |
| "grad_norm": 3.056776285171509, |
| "learning_rate": 1e-06, |
| "loss": 0.1394, |
| "step": 1443 |
| }, |
| { |
| "epoch": 0.5006934812760055, |
| "grad_norm": 2.821357011795044, |
| "learning_rate": 1e-06, |
| "loss": 0.1946, |
| "step": 1444 |
| }, |
| { |
| "epoch": 0.5010402219140083, |
| "grad_norm": 3.346324920654297, |
| "learning_rate": 1e-06, |
| "loss": 0.1793, |
| "step": 1445 |
| }, |
| { |
| "epoch": 0.5013869625520111, |
| "grad_norm": 3.7908129692077637, |
| "learning_rate": 1e-06, |
| "loss": 0.1635, |
| "step": 1446 |
| }, |
| { |
| "epoch": 0.5017337031900139, |
| "grad_norm": 1.877949595451355, |
| "learning_rate": 1e-06, |
| "loss": 0.1678, |
| "step": 1447 |
| }, |
| { |
| "epoch": 0.5020804438280166, |
| "grad_norm": 1.9401321411132812, |
| "learning_rate": 1e-06, |
| "loss": 0.1575, |
| "step": 1448 |
| }, |
| { |
| "epoch": 0.5024271844660194, |
| "grad_norm": 1.9344282150268555, |
| "learning_rate": 1e-06, |
| "loss": 0.1646, |
| "step": 1449 |
| }, |
| { |
| "epoch": 0.5027739251040222, |
| "grad_norm": 2.282867193222046, |
| "learning_rate": 1e-06, |
| "loss": 0.1888, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.503120665742025, |
| "grad_norm": 3.797800064086914, |
| "learning_rate": 1e-06, |
| "loss": 0.1256, |
| "step": 1451 |
| }, |
| { |
| "epoch": 0.5034674063800277, |
| "grad_norm": 3.1414899826049805, |
| "learning_rate": 1e-06, |
| "loss": 0.1426, |
| "step": 1452 |
| }, |
| { |
| "epoch": 0.5038141470180305, |
| "grad_norm": 1.9722864627838135, |
| "learning_rate": 1e-06, |
| "loss": 0.1408, |
| "step": 1453 |
| }, |
| { |
| "epoch": 0.5041608876560333, |
| "grad_norm": 1.5774776935577393, |
| "learning_rate": 1e-06, |
| "loss": 0.1137, |
| "step": 1454 |
| }, |
| { |
| "epoch": 0.5045076282940361, |
| "grad_norm": 3.0094971656799316, |
| "learning_rate": 1e-06, |
| "loss": 0.1554, |
| "step": 1455 |
| }, |
| { |
| "epoch": 0.5048543689320388, |
| "grad_norm": 3.39162015914917, |
| "learning_rate": 1e-06, |
| "loss": 0.1611, |
| "step": 1456 |
| }, |
| { |
| "epoch": 0.5052011095700416, |
| "grad_norm": 2.489313840866089, |
| "learning_rate": 1e-06, |
| "loss": 0.1535, |
| "step": 1457 |
| }, |
| { |
| "epoch": 0.5055478502080444, |
| "grad_norm": 4.133990287780762, |
| "learning_rate": 1e-06, |
| "loss": 0.1775, |
| "step": 1458 |
| }, |
| { |
| "epoch": 0.5058945908460472, |
| "grad_norm": 2.7449488639831543, |
| "learning_rate": 1e-06, |
| "loss": 0.1495, |
| "step": 1459 |
| }, |
| { |
| "epoch": 0.5062413314840499, |
| "grad_norm": 4.382802486419678, |
| "learning_rate": 1e-06, |
| "loss": 0.133, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.5065880721220527, |
| "grad_norm": 2.110130786895752, |
| "learning_rate": 1e-06, |
| "loss": 0.1564, |
| "step": 1461 |
| }, |
| { |
| "epoch": 0.5069348127600555, |
| "grad_norm": 2.527834415435791, |
| "learning_rate": 1e-06, |
| "loss": 0.1757, |
| "step": 1462 |
| }, |
| { |
| "epoch": 0.5072815533980582, |
| "grad_norm": 2.9167022705078125, |
| "learning_rate": 1e-06, |
| "loss": 0.1686, |
| "step": 1463 |
| }, |
| { |
| "epoch": 0.507628294036061, |
| "grad_norm": 2.2591302394866943, |
| "learning_rate": 1e-06, |
| "loss": 0.1355, |
| "step": 1464 |
| }, |
| { |
| "epoch": 0.5079750346740638, |
| "grad_norm": 4.148419380187988, |
| "learning_rate": 1e-06, |
| "loss": 0.1391, |
| "step": 1465 |
| }, |
| { |
| "epoch": 0.5083217753120666, |
| "grad_norm": 4.044282913208008, |
| "learning_rate": 1e-06, |
| "loss": 0.1642, |
| "step": 1466 |
| }, |
| { |
| "epoch": 0.5086685159500693, |
| "grad_norm": 1.8170924186706543, |
| "learning_rate": 1e-06, |
| "loss": 0.1218, |
| "step": 1467 |
| }, |
| { |
| "epoch": 0.5090152565880721, |
| "grad_norm": 2.241746664047241, |
| "learning_rate": 1e-06, |
| "loss": 0.1643, |
| "step": 1468 |
| }, |
| { |
| "epoch": 0.5093619972260749, |
| "grad_norm": 3.467628240585327, |
| "learning_rate": 1e-06, |
| "loss": 0.1355, |
| "step": 1469 |
| }, |
| { |
| "epoch": 0.5097087378640777, |
| "grad_norm": 3.2897260189056396, |
| "learning_rate": 1e-06, |
| "loss": 0.145, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.5100554785020804, |
| "grad_norm": 1.7362409830093384, |
| "learning_rate": 1e-06, |
| "loss": 0.1376, |
| "step": 1471 |
| }, |
| { |
| "epoch": 0.5104022191400832, |
| "grad_norm": 2.2274043560028076, |
| "learning_rate": 1e-06, |
| "loss": 0.1581, |
| "step": 1472 |
| }, |
| { |
| "epoch": 0.510748959778086, |
| "grad_norm": 1.8008100986480713, |
| "learning_rate": 1e-06, |
| "loss": 0.1359, |
| "step": 1473 |
| }, |
| { |
| "epoch": 0.5110957004160888, |
| "grad_norm": 1.5842705965042114, |
| "learning_rate": 1e-06, |
| "loss": 0.1264, |
| "step": 1474 |
| }, |
| { |
| "epoch": 0.5114424410540915, |
| "grad_norm": 3.425924301147461, |
| "learning_rate": 1e-06, |
| "loss": 0.1281, |
| "step": 1475 |
| }, |
| { |
| "epoch": 0.5117891816920943, |
| "grad_norm": 2.418201208114624, |
| "learning_rate": 1e-06, |
| "loss": 0.1695, |
| "step": 1476 |
| }, |
| { |
| "epoch": 0.5121359223300971, |
| "grad_norm": 2.0753753185272217, |
| "learning_rate": 1e-06, |
| "loss": 0.1415, |
| "step": 1477 |
| }, |
| { |
| "epoch": 0.5124826629680999, |
| "grad_norm": 2.378450632095337, |
| "learning_rate": 1e-06, |
| "loss": 0.2064, |
| "step": 1478 |
| }, |
| { |
| "epoch": 0.5128294036061026, |
| "grad_norm": 2.1089658737182617, |
| "learning_rate": 1e-06, |
| "loss": 0.1471, |
| "step": 1479 |
| }, |
| { |
| "epoch": 0.5131761442441054, |
| "grad_norm": 2.442574977874756, |
| "learning_rate": 1e-06, |
| "loss": 0.175, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.5135228848821082, |
| "grad_norm": 2.503943681716919, |
| "learning_rate": 1e-06, |
| "loss": 0.1389, |
| "step": 1481 |
| }, |
| { |
| "epoch": 0.513869625520111, |
| "grad_norm": 1.4603307247161865, |
| "learning_rate": 1e-06, |
| "loss": 0.1318, |
| "step": 1482 |
| }, |
| { |
| "epoch": 0.5142163661581137, |
| "grad_norm": 2.270416498184204, |
| "learning_rate": 1e-06, |
| "loss": 0.1572, |
| "step": 1483 |
| }, |
| { |
| "epoch": 0.5145631067961165, |
| "grad_norm": 2.438401699066162, |
| "learning_rate": 1e-06, |
| "loss": 0.1337, |
| "step": 1484 |
| }, |
| { |
| "epoch": 0.5149098474341193, |
| "grad_norm": 2.581489086151123, |
| "learning_rate": 1e-06, |
| "loss": 0.1554, |
| "step": 1485 |
| }, |
| { |
| "epoch": 0.515256588072122, |
| "grad_norm": 2.019855260848999, |
| "learning_rate": 1e-06, |
| "loss": 0.1386, |
| "step": 1486 |
| }, |
| { |
| "epoch": 0.5156033287101248, |
| "grad_norm": 4.10310173034668, |
| "learning_rate": 1e-06, |
| "loss": 0.1458, |
| "step": 1487 |
| }, |
| { |
| "epoch": 0.5159500693481276, |
| "grad_norm": 1.7705767154693604, |
| "learning_rate": 1e-06, |
| "loss": 0.1464, |
| "step": 1488 |
| }, |
| { |
| "epoch": 0.5162968099861304, |
| "grad_norm": 2.2230114936828613, |
| "learning_rate": 1e-06, |
| "loss": 0.1376, |
| "step": 1489 |
| }, |
| { |
| "epoch": 0.5166435506241331, |
| "grad_norm": 6.302447319030762, |
| "learning_rate": 1e-06, |
| "loss": 0.1822, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.5169902912621359, |
| "grad_norm": 3.1601343154907227, |
| "learning_rate": 1e-06, |
| "loss": 0.1539, |
| "step": 1491 |
| }, |
| { |
| "epoch": 0.5173370319001387, |
| "grad_norm": 2.8884189128875732, |
| "learning_rate": 1e-06, |
| "loss": 0.1727, |
| "step": 1492 |
| }, |
| { |
| "epoch": 0.5176837725381415, |
| "grad_norm": 2.00549578666687, |
| "learning_rate": 1e-06, |
| "loss": 0.135, |
| "step": 1493 |
| }, |
| { |
| "epoch": 0.5180305131761442, |
| "grad_norm": 3.5855584144592285, |
| "learning_rate": 1e-06, |
| "loss": 0.2021, |
| "step": 1494 |
| }, |
| { |
| "epoch": 0.518377253814147, |
| "grad_norm": 4.1129069328308105, |
| "learning_rate": 1e-06, |
| "loss": 0.1407, |
| "step": 1495 |
| }, |
| { |
| "epoch": 0.5187239944521498, |
| "grad_norm": 2.877685546875, |
| "learning_rate": 1e-06, |
| "loss": 0.195, |
| "step": 1496 |
| }, |
| { |
| "epoch": 0.5190707350901526, |
| "grad_norm": 2.007178544998169, |
| "learning_rate": 1e-06, |
| "loss": 0.1874, |
| "step": 1497 |
| }, |
| { |
| "epoch": 0.5194174757281553, |
| "grad_norm": 1.9459450244903564, |
| "learning_rate": 1e-06, |
| "loss": 0.1465, |
| "step": 1498 |
| }, |
| { |
| "epoch": 0.5197642163661581, |
| "grad_norm": 3.181014060974121, |
| "learning_rate": 1e-06, |
| "loss": 0.1346, |
| "step": 1499 |
| }, |
| { |
| "epoch": 0.5201109570041609, |
| "grad_norm": 2.48248553276062, |
| "learning_rate": 1e-06, |
| "loss": 0.1531, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.5204576976421637, |
| "grad_norm": 2.8141188621520996, |
| "learning_rate": 1e-06, |
| "loss": 0.1957, |
| "step": 1501 |
| }, |
| { |
| "epoch": 0.5208044382801664, |
| "grad_norm": 1.9651800394058228, |
| "learning_rate": 1e-06, |
| "loss": 0.1665, |
| "step": 1502 |
| }, |
| { |
| "epoch": 0.5211511789181692, |
| "grad_norm": 1.5798146724700928, |
| "learning_rate": 1e-06, |
| "loss": 0.1452, |
| "step": 1503 |
| }, |
| { |
| "epoch": 0.521497919556172, |
| "grad_norm": 1.6036591529846191, |
| "learning_rate": 1e-06, |
| "loss": 0.1268, |
| "step": 1504 |
| }, |
| { |
| "epoch": 0.5218446601941747, |
| "grad_norm": 2.003415107727051, |
| "learning_rate": 1e-06, |
| "loss": 0.1337, |
| "step": 1505 |
| }, |
| { |
| "epoch": 0.5221914008321775, |
| "grad_norm": 3.0233986377716064, |
| "learning_rate": 1e-06, |
| "loss": 0.2012, |
| "step": 1506 |
| }, |
| { |
| "epoch": 0.5225381414701803, |
| "grad_norm": 1.6863069534301758, |
| "learning_rate": 1e-06, |
| "loss": 0.1401, |
| "step": 1507 |
| }, |
| { |
| "epoch": 0.5228848821081831, |
| "grad_norm": 4.177762985229492, |
| "learning_rate": 1e-06, |
| "loss": 0.1805, |
| "step": 1508 |
| }, |
| { |
| "epoch": 0.5232316227461858, |
| "grad_norm": 1.8863307237625122, |
| "learning_rate": 1e-06, |
| "loss": 0.1557, |
| "step": 1509 |
| }, |
| { |
| "epoch": 0.5235783633841886, |
| "grad_norm": 4.922123432159424, |
| "learning_rate": 1e-06, |
| "loss": 0.211, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.5239251040221914, |
| "grad_norm": 2.77559232711792, |
| "learning_rate": 1e-06, |
| "loss": 0.1621, |
| "step": 1511 |
| }, |
| { |
| "epoch": 0.5242718446601942, |
| "grad_norm": 1.7231181859970093, |
| "learning_rate": 1e-06, |
| "loss": 0.1443, |
| "step": 1512 |
| }, |
| { |
| "epoch": 0.5246185852981969, |
| "grad_norm": 2.0931448936462402, |
| "learning_rate": 1e-06, |
| "loss": 0.1776, |
| "step": 1513 |
| }, |
| { |
| "epoch": 0.5249653259361997, |
| "grad_norm": 2.012320041656494, |
| "learning_rate": 1e-06, |
| "loss": 0.1765, |
| "step": 1514 |
| }, |
| { |
| "epoch": 0.5253120665742025, |
| "grad_norm": 1.9464123249053955, |
| "learning_rate": 1e-06, |
| "loss": 0.1426, |
| "step": 1515 |
| }, |
| { |
| "epoch": 0.5256588072122053, |
| "grad_norm": 4.585052013397217, |
| "learning_rate": 1e-06, |
| "loss": 0.1579, |
| "step": 1516 |
| }, |
| { |
| "epoch": 0.526005547850208, |
| "grad_norm": 2.2083330154418945, |
| "learning_rate": 1e-06, |
| "loss": 0.1476, |
| "step": 1517 |
| }, |
| { |
| "epoch": 0.5263522884882108, |
| "grad_norm": 2.2266805171966553, |
| "learning_rate": 1e-06, |
| "loss": 0.1477, |
| "step": 1518 |
| }, |
| { |
| "epoch": 0.5266990291262136, |
| "grad_norm": 2.1281206607818604, |
| "learning_rate": 1e-06, |
| "loss": 0.137, |
| "step": 1519 |
| }, |
| { |
| "epoch": 0.5270457697642164, |
| "grad_norm": 2.8681931495666504, |
| "learning_rate": 1e-06, |
| "loss": 0.1794, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.5273925104022191, |
| "grad_norm": 2.800225257873535, |
| "learning_rate": 1e-06, |
| "loss": 0.1278, |
| "step": 1521 |
| }, |
| { |
| "epoch": 0.5277392510402219, |
| "grad_norm": 4.79030704498291, |
| "learning_rate": 1e-06, |
| "loss": 0.2031, |
| "step": 1522 |
| }, |
| { |
| "epoch": 0.5280859916782247, |
| "grad_norm": 1.678977370262146, |
| "learning_rate": 1e-06, |
| "loss": 0.1378, |
| "step": 1523 |
| }, |
| { |
| "epoch": 0.5284327323162274, |
| "grad_norm": 2.589163064956665, |
| "learning_rate": 1e-06, |
| "loss": 0.1535, |
| "step": 1524 |
| }, |
| { |
| "epoch": 0.5287794729542302, |
| "grad_norm": 2.825577974319458, |
| "learning_rate": 1e-06, |
| "loss": 0.1411, |
| "step": 1525 |
| }, |
| { |
| "epoch": 0.529126213592233, |
| "grad_norm": 2.0116469860076904, |
| "learning_rate": 1e-06, |
| "loss": 0.1568, |
| "step": 1526 |
| }, |
| { |
| "epoch": 0.5294729542302358, |
| "grad_norm": 1.699302077293396, |
| "learning_rate": 1e-06, |
| "loss": 0.1558, |
| "step": 1527 |
| }, |
| { |
| "epoch": 0.5298196948682385, |
| "grad_norm": 2.1007838249206543, |
| "learning_rate": 1e-06, |
| "loss": 0.1772, |
| "step": 1528 |
| }, |
| { |
| "epoch": 0.5301664355062413, |
| "grad_norm": 2.103917360305786, |
| "learning_rate": 1e-06, |
| "loss": 0.1601, |
| "step": 1529 |
| }, |
| { |
| "epoch": 0.5305131761442441, |
| "grad_norm": 3.3394370079040527, |
| "learning_rate": 1e-06, |
| "loss": 0.1823, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.5308599167822469, |
| "grad_norm": 2.7562904357910156, |
| "learning_rate": 1e-06, |
| "loss": 0.1509, |
| "step": 1531 |
| }, |
| { |
| "epoch": 0.5312066574202496, |
| "grad_norm": 2.3599746227264404, |
| "learning_rate": 1e-06, |
| "loss": 0.1563, |
| "step": 1532 |
| }, |
| { |
| "epoch": 0.5315533980582524, |
| "grad_norm": 2.014448404312134, |
| "learning_rate": 1e-06, |
| "loss": 0.1273, |
| "step": 1533 |
| }, |
| { |
| "epoch": 0.5319001386962552, |
| "grad_norm": 1.5994056463241577, |
| "learning_rate": 1e-06, |
| "loss": 0.1494, |
| "step": 1534 |
| }, |
| { |
| "epoch": 0.532246879334258, |
| "grad_norm": 1.998543620109558, |
| "learning_rate": 1e-06, |
| "loss": 0.1479, |
| "step": 1535 |
| }, |
| { |
| "epoch": 0.5325936199722607, |
| "grad_norm": 1.7799046039581299, |
| "learning_rate": 1e-06, |
| "loss": 0.1466, |
| "step": 1536 |
| }, |
| { |
| "epoch": 0.5329403606102635, |
| "grad_norm": 2.455554723739624, |
| "learning_rate": 1e-06, |
| "loss": 0.1201, |
| "step": 1537 |
| }, |
| { |
| "epoch": 0.5332871012482663, |
| "grad_norm": 1.9349944591522217, |
| "learning_rate": 1e-06, |
| "loss": 0.1483, |
| "step": 1538 |
| }, |
| { |
| "epoch": 0.5336338418862691, |
| "grad_norm": 2.089660406112671, |
| "learning_rate": 1e-06, |
| "loss": 0.1631, |
| "step": 1539 |
| }, |
| { |
| "epoch": 0.5339805825242718, |
| "grad_norm": 2.356295108795166, |
| "learning_rate": 1e-06, |
| "loss": 0.1547, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.5343273231622746, |
| "grad_norm": 2.6225013732910156, |
| "learning_rate": 1e-06, |
| "loss": 0.1634, |
| "step": 1541 |
| }, |
| { |
| "epoch": 0.5346740638002774, |
| "grad_norm": 4.054690361022949, |
| "learning_rate": 1e-06, |
| "loss": 0.1597, |
| "step": 1542 |
| }, |
| { |
| "epoch": 0.5350208044382802, |
| "grad_norm": 2.222895860671997, |
| "learning_rate": 1e-06, |
| "loss": 0.1719, |
| "step": 1543 |
| }, |
| { |
| "epoch": 0.5353675450762829, |
| "grad_norm": 1.8733757734298706, |
| "learning_rate": 1e-06, |
| "loss": 0.1387, |
| "step": 1544 |
| }, |
| { |
| "epoch": 0.5357142857142857, |
| "grad_norm": 3.893399715423584, |
| "learning_rate": 1e-06, |
| "loss": 0.1212, |
| "step": 1545 |
| }, |
| { |
| "epoch": 0.5360610263522885, |
| "grad_norm": 2.2785661220550537, |
| "learning_rate": 1e-06, |
| "loss": 0.1573, |
| "step": 1546 |
| }, |
| { |
| "epoch": 0.5364077669902912, |
| "grad_norm": 2.716165065765381, |
| "learning_rate": 1e-06, |
| "loss": 0.1809, |
| "step": 1547 |
| }, |
| { |
| "epoch": 0.536754507628294, |
| "grad_norm": 2.239154100418091, |
| "learning_rate": 1e-06, |
| "loss": 0.1484, |
| "step": 1548 |
| }, |
| { |
| "epoch": 0.5371012482662968, |
| "grad_norm": 2.2777199745178223, |
| "learning_rate": 1e-06, |
| "loss": 0.1776, |
| "step": 1549 |
| }, |
| { |
| "epoch": 0.5374479889042996, |
| "grad_norm": 3.0242958068847656, |
| "learning_rate": 1e-06, |
| "loss": 0.1308, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.5377947295423023, |
| "grad_norm": 2.226205587387085, |
| "learning_rate": 1e-06, |
| "loss": 0.1737, |
| "step": 1551 |
| }, |
| { |
| "epoch": 0.5381414701803051, |
| "grad_norm": 2.850829601287842, |
| "learning_rate": 1e-06, |
| "loss": 0.1871, |
| "step": 1552 |
| }, |
| { |
| "epoch": 0.5384882108183079, |
| "grad_norm": 3.371608257293701, |
| "learning_rate": 1e-06, |
| "loss": 0.1564, |
| "step": 1553 |
| }, |
| { |
| "epoch": 0.5388349514563107, |
| "grad_norm": 2.1789321899414062, |
| "learning_rate": 1e-06, |
| "loss": 0.1589, |
| "step": 1554 |
| }, |
| { |
| "epoch": 0.5391816920943134, |
| "grad_norm": 1.6266393661499023, |
| "learning_rate": 1e-06, |
| "loss": 0.1507, |
| "step": 1555 |
| }, |
| { |
| "epoch": 0.5395284327323162, |
| "grad_norm": 1.8946685791015625, |
| "learning_rate": 1e-06, |
| "loss": 0.1556, |
| "step": 1556 |
| }, |
| { |
| "epoch": 0.539875173370319, |
| "grad_norm": 3.5388271808624268, |
| "learning_rate": 1e-06, |
| "loss": 0.1408, |
| "step": 1557 |
| }, |
| { |
| "epoch": 0.5402219140083218, |
| "grad_norm": 3.0828049182891846, |
| "learning_rate": 1e-06, |
| "loss": 0.1673, |
| "step": 1558 |
| }, |
| { |
| "epoch": 0.5405686546463245, |
| "grad_norm": 2.063697576522827, |
| "learning_rate": 1e-06, |
| "loss": 0.1394, |
| "step": 1559 |
| }, |
| { |
| "epoch": 0.5409153952843273, |
| "grad_norm": 2.2390551567077637, |
| "learning_rate": 1e-06, |
| "loss": 0.1453, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.5412621359223301, |
| "grad_norm": 4.824660778045654, |
| "learning_rate": 1e-06, |
| "loss": 0.1882, |
| "step": 1561 |
| }, |
| { |
| "epoch": 0.5416088765603329, |
| "grad_norm": 1.9755252599716187, |
| "learning_rate": 1e-06, |
| "loss": 0.1711, |
| "step": 1562 |
| }, |
| { |
| "epoch": 0.5419556171983356, |
| "grad_norm": 1.8800153732299805, |
| "learning_rate": 1e-06, |
| "loss": 0.1458, |
| "step": 1563 |
| }, |
| { |
| "epoch": 0.5423023578363384, |
| "grad_norm": 3.3188271522521973, |
| "learning_rate": 1e-06, |
| "loss": 0.1564, |
| "step": 1564 |
| }, |
| { |
| "epoch": 0.5426490984743412, |
| "grad_norm": 1.953864574432373, |
| "learning_rate": 1e-06, |
| "loss": 0.1517, |
| "step": 1565 |
| }, |
| { |
| "epoch": 0.542995839112344, |
| "grad_norm": 2.689492702484131, |
| "learning_rate": 1e-06, |
| "loss": 0.1671, |
| "step": 1566 |
| }, |
| { |
| "epoch": 0.5433425797503467, |
| "grad_norm": 2.0836610794067383, |
| "learning_rate": 1e-06, |
| "loss": 0.1683, |
| "step": 1567 |
| }, |
| { |
| "epoch": 0.5436893203883495, |
| "grad_norm": 2.045745849609375, |
| "learning_rate": 1e-06, |
| "loss": 0.1545, |
| "step": 1568 |
| }, |
| { |
| "epoch": 0.5440360610263523, |
| "grad_norm": 4.5482401847839355, |
| "learning_rate": 1e-06, |
| "loss": 0.1518, |
| "step": 1569 |
| }, |
| { |
| "epoch": 0.544382801664355, |
| "grad_norm": 2.2771973609924316, |
| "learning_rate": 1e-06, |
| "loss": 0.1972, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.5447295423023578, |
| "grad_norm": 2.4855430126190186, |
| "learning_rate": 1e-06, |
| "loss": 0.1717, |
| "step": 1571 |
| }, |
| { |
| "epoch": 0.5450762829403606, |
| "grad_norm": 1.9103244543075562, |
| "learning_rate": 1e-06, |
| "loss": 0.1662, |
| "step": 1572 |
| }, |
| { |
| "epoch": 0.5454230235783634, |
| "grad_norm": 2.2929623126983643, |
| "learning_rate": 1e-06, |
| "loss": 0.1974, |
| "step": 1573 |
| }, |
| { |
| "epoch": 0.5457697642163661, |
| "grad_norm": 2.0059826374053955, |
| "learning_rate": 1e-06, |
| "loss": 0.1622, |
| "step": 1574 |
| }, |
| { |
| "epoch": 0.5461165048543689, |
| "grad_norm": 2.1234233379364014, |
| "learning_rate": 1e-06, |
| "loss": 0.178, |
| "step": 1575 |
| }, |
| { |
| "epoch": 0.5464632454923717, |
| "grad_norm": 4.070279598236084, |
| "learning_rate": 1e-06, |
| "loss": 0.215, |
| "step": 1576 |
| }, |
| { |
| "epoch": 0.5468099861303745, |
| "grad_norm": 2.5643842220306396, |
| "learning_rate": 1e-06, |
| "loss": 0.159, |
| "step": 1577 |
| }, |
| { |
| "epoch": 0.5471567267683772, |
| "grad_norm": 1.6535149812698364, |
| "learning_rate": 1e-06, |
| "loss": 0.146, |
| "step": 1578 |
| }, |
| { |
| "epoch": 0.54750346740638, |
| "grad_norm": 2.068358898162842, |
| "learning_rate": 1e-06, |
| "loss": 0.1691, |
| "step": 1579 |
| }, |
| { |
| "epoch": 0.5478502080443828, |
| "grad_norm": 2.275562286376953, |
| "learning_rate": 1e-06, |
| "loss": 0.1564, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.5481969486823856, |
| "grad_norm": 2.2625136375427246, |
| "learning_rate": 1e-06, |
| "loss": 0.1399, |
| "step": 1581 |
| }, |
| { |
| "epoch": 0.5485436893203883, |
| "grad_norm": 2.553527593612671, |
| "learning_rate": 1e-06, |
| "loss": 0.1339, |
| "step": 1582 |
| }, |
| { |
| "epoch": 0.5488904299583911, |
| "grad_norm": 2.405510425567627, |
| "learning_rate": 1e-06, |
| "loss": 0.1518, |
| "step": 1583 |
| }, |
| { |
| "epoch": 0.5492371705963939, |
| "grad_norm": 2.205770969390869, |
| "learning_rate": 1e-06, |
| "loss": 0.1656, |
| "step": 1584 |
| }, |
| { |
| "epoch": 0.5495839112343966, |
| "grad_norm": 2.961263418197632, |
| "learning_rate": 1e-06, |
| "loss": 0.1549, |
| "step": 1585 |
| }, |
| { |
| "epoch": 0.5499306518723994, |
| "grad_norm": 2.239316701889038, |
| "learning_rate": 1e-06, |
| "loss": 0.1637, |
| "step": 1586 |
| }, |
| { |
| "epoch": 0.5502773925104022, |
| "grad_norm": 2.7622323036193848, |
| "learning_rate": 1e-06, |
| "loss": 0.1726, |
| "step": 1587 |
| }, |
| { |
| "epoch": 0.550624133148405, |
| "grad_norm": 2.3954594135284424, |
| "learning_rate": 1e-06, |
| "loss": 0.152, |
| "step": 1588 |
| }, |
| { |
| "epoch": 0.5509708737864077, |
| "grad_norm": 2.528770923614502, |
| "learning_rate": 1e-06, |
| "loss": 0.1328, |
| "step": 1589 |
| }, |
| { |
| "epoch": 0.5513176144244105, |
| "grad_norm": 4.014219760894775, |
| "learning_rate": 1e-06, |
| "loss": 0.1167, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.5516643550624133, |
| "grad_norm": 1.444198727607727, |
| "learning_rate": 1e-06, |
| "loss": 0.1387, |
| "step": 1591 |
| }, |
| { |
| "epoch": 0.5520110957004161, |
| "grad_norm": 1.6705647706985474, |
| "learning_rate": 1e-06, |
| "loss": 0.1104, |
| "step": 1592 |
| }, |
| { |
| "epoch": 0.5523578363384188, |
| "grad_norm": 2.243098735809326, |
| "learning_rate": 1e-06, |
| "loss": 0.1426, |
| "step": 1593 |
| }, |
| { |
| "epoch": 0.5527045769764216, |
| "grad_norm": 2.3548195362091064, |
| "learning_rate": 1e-06, |
| "loss": 0.1432, |
| "step": 1594 |
| }, |
| { |
| "epoch": 0.5530513176144244, |
| "grad_norm": 1.7457523345947266, |
| "learning_rate": 1e-06, |
| "loss": 0.131, |
| "step": 1595 |
| }, |
| { |
| "epoch": 0.5533980582524272, |
| "grad_norm": 2.6490869522094727, |
| "learning_rate": 1e-06, |
| "loss": 0.1571, |
| "step": 1596 |
| }, |
| { |
| "epoch": 0.5537447988904299, |
| "grad_norm": 3.868406057357788, |
| "learning_rate": 1e-06, |
| "loss": 0.1713, |
| "step": 1597 |
| }, |
| { |
| "epoch": 0.5540915395284327, |
| "grad_norm": 3.2075355052948, |
| "learning_rate": 1e-06, |
| "loss": 0.1467, |
| "step": 1598 |
| }, |
| { |
| "epoch": 0.5544382801664355, |
| "grad_norm": 2.0262351036071777, |
| "learning_rate": 1e-06, |
| "loss": 0.1598, |
| "step": 1599 |
| }, |
| { |
| "epoch": 0.5547850208044383, |
| "grad_norm": 2.029439926147461, |
| "learning_rate": 1e-06, |
| "loss": 0.1578, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.555131761442441, |
| "grad_norm": 1.949021339416504, |
| "learning_rate": 1e-06, |
| "loss": 0.1524, |
| "step": 1601 |
| }, |
| { |
| "epoch": 0.5554785020804438, |
| "grad_norm": 4.777602195739746, |
| "learning_rate": 1e-06, |
| "loss": 0.1553, |
| "step": 1602 |
| }, |
| { |
| "epoch": 0.5558252427184466, |
| "grad_norm": 2.1241836547851562, |
| "learning_rate": 1e-06, |
| "loss": 0.1868, |
| "step": 1603 |
| }, |
| { |
| "epoch": 0.5561719833564494, |
| "grad_norm": 2.2839200496673584, |
| "learning_rate": 1e-06, |
| "loss": 0.151, |
| "step": 1604 |
| }, |
| { |
| "epoch": 0.5565187239944521, |
| "grad_norm": 2.7008562088012695, |
| "learning_rate": 1e-06, |
| "loss": 0.132, |
| "step": 1605 |
| }, |
| { |
| "epoch": 0.5568654646324549, |
| "grad_norm": 2.1138343811035156, |
| "learning_rate": 1e-06, |
| "loss": 0.1349, |
| "step": 1606 |
| }, |
| { |
| "epoch": 0.5572122052704577, |
| "grad_norm": 2.0788066387176514, |
| "learning_rate": 1e-06, |
| "loss": 0.1803, |
| "step": 1607 |
| }, |
| { |
| "epoch": 0.5575589459084604, |
| "grad_norm": 2.323345899581909, |
| "learning_rate": 1e-06, |
| "loss": 0.1749, |
| "step": 1608 |
| }, |
| { |
| "epoch": 0.5579056865464632, |
| "grad_norm": 3.4080355167388916, |
| "learning_rate": 1e-06, |
| "loss": 0.1453, |
| "step": 1609 |
| }, |
| { |
| "epoch": 0.558252427184466, |
| "grad_norm": 2.0123281478881836, |
| "learning_rate": 1e-06, |
| "loss": 0.1345, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.5585991678224688, |
| "grad_norm": 2.1220619678497314, |
| "learning_rate": 1e-06, |
| "loss": 0.1823, |
| "step": 1611 |
| }, |
| { |
| "epoch": 0.5589459084604715, |
| "grad_norm": 2.4971022605895996, |
| "learning_rate": 1e-06, |
| "loss": 0.1591, |
| "step": 1612 |
| }, |
| { |
| "epoch": 0.5592926490984743, |
| "grad_norm": 1.527266025543213, |
| "learning_rate": 1e-06, |
| "loss": 0.1459, |
| "step": 1613 |
| }, |
| { |
| "epoch": 0.5596393897364771, |
| "grad_norm": 2.1191792488098145, |
| "learning_rate": 1e-06, |
| "loss": 0.1411, |
| "step": 1614 |
| }, |
| { |
| "epoch": 0.5599861303744799, |
| "grad_norm": 1.978819489479065, |
| "learning_rate": 1e-06, |
| "loss": 0.1459, |
| "step": 1615 |
| }, |
| { |
| "epoch": 0.5603328710124826, |
| "grad_norm": 2.3482275009155273, |
| "learning_rate": 1e-06, |
| "loss": 0.1784, |
| "step": 1616 |
| }, |
| { |
| "epoch": 0.5606796116504854, |
| "grad_norm": 1.5257140398025513, |
| "learning_rate": 1e-06, |
| "loss": 0.142, |
| "step": 1617 |
| }, |
| { |
| "epoch": 0.5610263522884882, |
| "grad_norm": 3.646221399307251, |
| "learning_rate": 1e-06, |
| "loss": 0.1756, |
| "step": 1618 |
| }, |
| { |
| "epoch": 0.561373092926491, |
| "grad_norm": 1.7928833961486816, |
| "learning_rate": 1e-06, |
| "loss": 0.1423, |
| "step": 1619 |
| }, |
| { |
| "epoch": 0.5617198335644937, |
| "grad_norm": 2.0367002487182617, |
| "learning_rate": 1e-06, |
| "loss": 0.1382, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.5620665742024965, |
| "grad_norm": 1.9482024908065796, |
| "learning_rate": 1e-06, |
| "loss": 0.1573, |
| "step": 1621 |
| }, |
| { |
| "epoch": 0.5624133148404993, |
| "grad_norm": 2.336115598678589, |
| "learning_rate": 1e-06, |
| "loss": 0.147, |
| "step": 1622 |
| }, |
| { |
| "epoch": 0.562760055478502, |
| "grad_norm": 4.602109909057617, |
| "learning_rate": 1e-06, |
| "loss": 0.1847, |
| "step": 1623 |
| }, |
| { |
| "epoch": 0.5631067961165048, |
| "grad_norm": 2.374413251876831, |
| "learning_rate": 1e-06, |
| "loss": 0.1668, |
| "step": 1624 |
| }, |
| { |
| "epoch": 0.5634535367545076, |
| "grad_norm": 3.371469497680664, |
| "learning_rate": 1e-06, |
| "loss": 0.1654, |
| "step": 1625 |
| }, |
| { |
| "epoch": 0.5638002773925104, |
| "grad_norm": 1.608858585357666, |
| "learning_rate": 1e-06, |
| "loss": 0.1259, |
| "step": 1626 |
| }, |
| { |
| "epoch": 0.5641470180305131, |
| "grad_norm": 1.8223319053649902, |
| "learning_rate": 1e-06, |
| "loss": 0.1599, |
| "step": 1627 |
| }, |
| { |
| "epoch": 0.5644937586685159, |
| "grad_norm": 3.2663540840148926, |
| "learning_rate": 1e-06, |
| "loss": 0.1525, |
| "step": 1628 |
| }, |
| { |
| "epoch": 0.5648404993065187, |
| "grad_norm": 2.5820138454437256, |
| "learning_rate": 1e-06, |
| "loss": 0.1748, |
| "step": 1629 |
| }, |
| { |
| "epoch": 0.5651872399445215, |
| "grad_norm": 2.0734665393829346, |
| "learning_rate": 1e-06, |
| "loss": 0.1456, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.5655339805825242, |
| "grad_norm": 2.0002732276916504, |
| "learning_rate": 1e-06, |
| "loss": 0.1547, |
| "step": 1631 |
| }, |
| { |
| "epoch": 0.565880721220527, |
| "grad_norm": 2.328211545944214, |
| "learning_rate": 1e-06, |
| "loss": 0.1603, |
| "step": 1632 |
| }, |
| { |
| "epoch": 0.5662274618585298, |
| "grad_norm": 3.090592861175537, |
| "learning_rate": 1e-06, |
| "loss": 0.1467, |
| "step": 1633 |
| }, |
| { |
| "epoch": 0.5665742024965326, |
| "grad_norm": 1.7330372333526611, |
| "learning_rate": 1e-06, |
| "loss": 0.1289, |
| "step": 1634 |
| }, |
| { |
| "epoch": 0.5669209431345353, |
| "grad_norm": 1.6095702648162842, |
| "learning_rate": 1e-06, |
| "loss": 0.1386, |
| "step": 1635 |
| }, |
| { |
| "epoch": 0.5672676837725381, |
| "grad_norm": 2.5205414295196533, |
| "learning_rate": 1e-06, |
| "loss": 0.1817, |
| "step": 1636 |
| }, |
| { |
| "epoch": 0.5676144244105409, |
| "grad_norm": 2.5302205085754395, |
| "learning_rate": 1e-06, |
| "loss": 0.1515, |
| "step": 1637 |
| }, |
| { |
| "epoch": 0.5679611650485437, |
| "grad_norm": 2.0151143074035645, |
| "learning_rate": 1e-06, |
| "loss": 0.1376, |
| "step": 1638 |
| }, |
| { |
| "epoch": 0.5683079056865464, |
| "grad_norm": 4.463467121124268, |
| "learning_rate": 1e-06, |
| "loss": 0.1865, |
| "step": 1639 |
| }, |
| { |
| "epoch": 0.5686546463245492, |
| "grad_norm": 2.7108094692230225, |
| "learning_rate": 1e-06, |
| "loss": 0.1527, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.569001386962552, |
| "grad_norm": 3.0464956760406494, |
| "learning_rate": 1e-06, |
| "loss": 0.1671, |
| "step": 1641 |
| }, |
| { |
| "epoch": 0.5693481276005548, |
| "grad_norm": 2.9522790908813477, |
| "learning_rate": 1e-06, |
| "loss": 0.1863, |
| "step": 1642 |
| }, |
| { |
| "epoch": 0.5696948682385575, |
| "grad_norm": 2.414750099182129, |
| "learning_rate": 1e-06, |
| "loss": 0.1537, |
| "step": 1643 |
| }, |
| { |
| "epoch": 0.5700416088765603, |
| "grad_norm": 1.9789800643920898, |
| "learning_rate": 1e-06, |
| "loss": 0.1405, |
| "step": 1644 |
| }, |
| { |
| "epoch": 0.5703883495145631, |
| "grad_norm": 2.12092661857605, |
| "learning_rate": 1e-06, |
| "loss": 0.1452, |
| "step": 1645 |
| }, |
| { |
| "epoch": 0.5707350901525658, |
| "grad_norm": 2.106600046157837, |
| "learning_rate": 1e-06, |
| "loss": 0.1769, |
| "step": 1646 |
| }, |
| { |
| "epoch": 0.5710818307905686, |
| "grad_norm": 1.6850626468658447, |
| "learning_rate": 1e-06, |
| "loss": 0.1415, |
| "step": 1647 |
| }, |
| { |
| "epoch": 0.5714285714285714, |
| "grad_norm": 2.36781907081604, |
| "learning_rate": 1e-06, |
| "loss": 0.1574, |
| "step": 1648 |
| }, |
| { |
| "epoch": 0.5717753120665742, |
| "grad_norm": 3.573758840560913, |
| "learning_rate": 1e-06, |
| "loss": 0.1754, |
| "step": 1649 |
| }, |
| { |
| "epoch": 0.5721220527045769, |
| "grad_norm": 2.212064743041992, |
| "learning_rate": 1e-06, |
| "loss": 0.1398, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.5724687933425797, |
| "grad_norm": 3.7138683795928955, |
| "learning_rate": 1e-06, |
| "loss": 0.1486, |
| "step": 1651 |
| }, |
| { |
| "epoch": 0.5728155339805825, |
| "grad_norm": 2.328507900238037, |
| "learning_rate": 1e-06, |
| "loss": 0.181, |
| "step": 1652 |
| }, |
| { |
| "epoch": 0.5731622746185853, |
| "grad_norm": 2.0306859016418457, |
| "learning_rate": 1e-06, |
| "loss": 0.1678, |
| "step": 1653 |
| }, |
| { |
| "epoch": 0.573509015256588, |
| "grad_norm": 2.6543784141540527, |
| "learning_rate": 1e-06, |
| "loss": 0.1798, |
| "step": 1654 |
| }, |
| { |
| "epoch": 0.5738557558945908, |
| "grad_norm": 2.0634257793426514, |
| "learning_rate": 1e-06, |
| "loss": 0.1597, |
| "step": 1655 |
| }, |
| { |
| "epoch": 0.5742024965325936, |
| "grad_norm": 3.299668788909912, |
| "learning_rate": 1e-06, |
| "loss": 0.1498, |
| "step": 1656 |
| }, |
| { |
| "epoch": 0.5745492371705964, |
| "grad_norm": 1.8128036260604858, |
| "learning_rate": 1e-06, |
| "loss": 0.1252, |
| "step": 1657 |
| }, |
| { |
| "epoch": 0.5748959778085991, |
| "grad_norm": 2.365511894226074, |
| "learning_rate": 1e-06, |
| "loss": 0.1581, |
| "step": 1658 |
| }, |
| { |
| "epoch": 0.5752427184466019, |
| "grad_norm": 2.4852473735809326, |
| "learning_rate": 1e-06, |
| "loss": 0.1702, |
| "step": 1659 |
| }, |
| { |
| "epoch": 0.5755894590846047, |
| "grad_norm": 2.0349810123443604, |
| "learning_rate": 1e-06, |
| "loss": 0.1515, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.5759361997226075, |
| "grad_norm": 1.8448779582977295, |
| "learning_rate": 1e-06, |
| "loss": 0.1553, |
| "step": 1661 |
| }, |
| { |
| "epoch": 0.5762829403606102, |
| "grad_norm": 3.345630645751953, |
| "learning_rate": 1e-06, |
| "loss": 0.1508, |
| "step": 1662 |
| }, |
| { |
| "epoch": 0.576629680998613, |
| "grad_norm": 2.9367878437042236, |
| "learning_rate": 1e-06, |
| "loss": 0.1637, |
| "step": 1663 |
| }, |
| { |
| "epoch": 0.5769764216366158, |
| "grad_norm": 1.9184025526046753, |
| "learning_rate": 1e-06, |
| "loss": 0.1452, |
| "step": 1664 |
| }, |
| { |
| "epoch": 0.5773231622746186, |
| "grad_norm": 3.0208446979522705, |
| "learning_rate": 1e-06, |
| "loss": 0.1319, |
| "step": 1665 |
| }, |
| { |
| "epoch": 0.5776699029126213, |
| "grad_norm": 1.9824481010437012, |
| "learning_rate": 1e-06, |
| "loss": 0.1285, |
| "step": 1666 |
| }, |
| { |
| "epoch": 0.5780166435506241, |
| "grad_norm": 1.768882393836975, |
| "learning_rate": 1e-06, |
| "loss": 0.1589, |
| "step": 1667 |
| }, |
| { |
| "epoch": 0.5783633841886269, |
| "grad_norm": 5.792402267456055, |
| "learning_rate": 1e-06, |
| "loss": 0.1643, |
| "step": 1668 |
| }, |
| { |
| "epoch": 0.5787101248266296, |
| "grad_norm": 2.2808361053466797, |
| "learning_rate": 1e-06, |
| "loss": 0.1925, |
| "step": 1669 |
| }, |
| { |
| "epoch": 0.5790568654646324, |
| "grad_norm": 1.661110520362854, |
| "learning_rate": 1e-06, |
| "loss": 0.1383, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.5794036061026352, |
| "grad_norm": 2.764539957046509, |
| "learning_rate": 1e-06, |
| "loss": 0.1346, |
| "step": 1671 |
| }, |
| { |
| "epoch": 0.579750346740638, |
| "grad_norm": 2.591721296310425, |
| "learning_rate": 1e-06, |
| "loss": 0.1683, |
| "step": 1672 |
| }, |
| { |
| "epoch": 0.5800970873786407, |
| "grad_norm": 1.6234064102172852, |
| "learning_rate": 1e-06, |
| "loss": 0.1638, |
| "step": 1673 |
| }, |
| { |
| "epoch": 0.5804438280166435, |
| "grad_norm": 3.9577250480651855, |
| "learning_rate": 1e-06, |
| "loss": 0.1731, |
| "step": 1674 |
| }, |
| { |
| "epoch": 0.5807905686546463, |
| "grad_norm": 3.799262762069702, |
| "learning_rate": 1e-06, |
| "loss": 0.1556, |
| "step": 1675 |
| }, |
| { |
| "epoch": 0.5811373092926491, |
| "grad_norm": 2.9457106590270996, |
| "learning_rate": 1e-06, |
| "loss": 0.1611, |
| "step": 1676 |
| }, |
| { |
| "epoch": 0.5814840499306518, |
| "grad_norm": 2.1177031993865967, |
| "learning_rate": 1e-06, |
| "loss": 0.1723, |
| "step": 1677 |
| }, |
| { |
| "epoch": 0.5818307905686546, |
| "grad_norm": 1.5257512331008911, |
| "learning_rate": 1e-06, |
| "loss": 0.132, |
| "step": 1678 |
| }, |
| { |
| "epoch": 0.5821775312066574, |
| "grad_norm": 2.6540706157684326, |
| "learning_rate": 1e-06, |
| "loss": 0.1648, |
| "step": 1679 |
| }, |
| { |
| "epoch": 0.5825242718446602, |
| "grad_norm": 2.4282069206237793, |
| "learning_rate": 1e-06, |
| "loss": 0.1462, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.5828710124826629, |
| "grad_norm": 1.7106441259384155, |
| "learning_rate": 1e-06, |
| "loss": 0.1528, |
| "step": 1681 |
| }, |
| { |
| "epoch": 0.5832177531206657, |
| "grad_norm": 2.3406167030334473, |
| "learning_rate": 1e-06, |
| "loss": 0.1909, |
| "step": 1682 |
| }, |
| { |
| "epoch": 0.5835644937586685, |
| "grad_norm": 2.504379987716675, |
| "learning_rate": 1e-06, |
| "loss": 0.1444, |
| "step": 1683 |
| }, |
| { |
| "epoch": 0.5839112343966713, |
| "grad_norm": 4.548439979553223, |
| "learning_rate": 1e-06, |
| "loss": 0.1736, |
| "step": 1684 |
| }, |
| { |
| "epoch": 0.584257975034674, |
| "grad_norm": 2.725050449371338, |
| "learning_rate": 1e-06, |
| "loss": 0.1406, |
| "step": 1685 |
| }, |
| { |
| "epoch": 0.5846047156726768, |
| "grad_norm": 2.026104211807251, |
| "learning_rate": 1e-06, |
| "loss": 0.1531, |
| "step": 1686 |
| }, |
| { |
| "epoch": 0.5849514563106796, |
| "grad_norm": 2.1058292388916016, |
| "learning_rate": 1e-06, |
| "loss": 0.1434, |
| "step": 1687 |
| }, |
| { |
| "epoch": 0.5852981969486823, |
| "grad_norm": 2.1308064460754395, |
| "learning_rate": 1e-06, |
| "loss": 0.1529, |
| "step": 1688 |
| }, |
| { |
| "epoch": 0.5856449375866851, |
| "grad_norm": 4.247373104095459, |
| "learning_rate": 1e-06, |
| "loss": 0.1962, |
| "step": 1689 |
| }, |
| { |
| "epoch": 0.5859916782246879, |
| "grad_norm": 4.272634029388428, |
| "learning_rate": 1e-06, |
| "loss": 0.1319, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.5863384188626907, |
| "grad_norm": 1.9161146879196167, |
| "learning_rate": 1e-06, |
| "loss": 0.1794, |
| "step": 1691 |
| }, |
| { |
| "epoch": 0.5866851595006934, |
| "grad_norm": 3.2819652557373047, |
| "learning_rate": 1e-06, |
| "loss": 0.1388, |
| "step": 1692 |
| }, |
| { |
| "epoch": 0.5870319001386962, |
| "grad_norm": 2.6425933837890625, |
| "learning_rate": 1e-06, |
| "loss": 0.1508, |
| "step": 1693 |
| }, |
| { |
| "epoch": 0.587378640776699, |
| "grad_norm": 2.2003285884857178, |
| "learning_rate": 1e-06, |
| "loss": 0.1315, |
| "step": 1694 |
| }, |
| { |
| "epoch": 0.5877253814147018, |
| "grad_norm": 2.436519145965576, |
| "learning_rate": 1e-06, |
| "loss": 0.1888, |
| "step": 1695 |
| }, |
| { |
| "epoch": 0.5880721220527045, |
| "grad_norm": 2.4235095977783203, |
| "learning_rate": 1e-06, |
| "loss": 0.1574, |
| "step": 1696 |
| }, |
| { |
| "epoch": 0.5884188626907073, |
| "grad_norm": 2.7736899852752686, |
| "learning_rate": 1e-06, |
| "loss": 0.1324, |
| "step": 1697 |
| }, |
| { |
| "epoch": 0.5887656033287101, |
| "grad_norm": 6.031193256378174, |
| "learning_rate": 1e-06, |
| "loss": 0.1635, |
| "step": 1698 |
| }, |
| { |
| "epoch": 0.5891123439667129, |
| "grad_norm": 1.7690778970718384, |
| "learning_rate": 1e-06, |
| "loss": 0.1351, |
| "step": 1699 |
| }, |
| { |
| "epoch": 0.5894590846047156, |
| "grad_norm": 1.9819053411483765, |
| "learning_rate": 1e-06, |
| "loss": 0.1649, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.5898058252427184, |
| "grad_norm": 2.2417149543762207, |
| "learning_rate": 1e-06, |
| "loss": 0.1551, |
| "step": 1701 |
| }, |
| { |
| "epoch": 0.5901525658807212, |
| "grad_norm": 1.7193907499313354, |
| "learning_rate": 1e-06, |
| "loss": 0.1373, |
| "step": 1702 |
| }, |
| { |
| "epoch": 0.590499306518724, |
| "grad_norm": 2.16274356842041, |
| "learning_rate": 1e-06, |
| "loss": 0.1629, |
| "step": 1703 |
| }, |
| { |
| "epoch": 0.5908460471567267, |
| "grad_norm": 2.157911539077759, |
| "learning_rate": 1e-06, |
| "loss": 0.1289, |
| "step": 1704 |
| }, |
| { |
| "epoch": 0.5911927877947295, |
| "grad_norm": 3.0244007110595703, |
| "learning_rate": 1e-06, |
| "loss": 0.157, |
| "step": 1705 |
| }, |
| { |
| "epoch": 0.5915395284327323, |
| "grad_norm": 1.9083997011184692, |
| "learning_rate": 1e-06, |
| "loss": 0.1736, |
| "step": 1706 |
| }, |
| { |
| "epoch": 0.591886269070735, |
| "grad_norm": 2.4870333671569824, |
| "learning_rate": 1e-06, |
| "loss": 0.1541, |
| "step": 1707 |
| }, |
| { |
| "epoch": 0.5922330097087378, |
| "grad_norm": 2.714510202407837, |
| "learning_rate": 1e-06, |
| "loss": 0.1561, |
| "step": 1708 |
| }, |
| { |
| "epoch": 0.5925797503467406, |
| "grad_norm": 1.7150081396102905, |
| "learning_rate": 1e-06, |
| "loss": 0.1437, |
| "step": 1709 |
| }, |
| { |
| "epoch": 0.5929264909847434, |
| "grad_norm": 2.5376031398773193, |
| "learning_rate": 1e-06, |
| "loss": 0.1588, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.5932732316227461, |
| "grad_norm": 2.173816680908203, |
| "learning_rate": 1e-06, |
| "loss": 0.1668, |
| "step": 1711 |
| }, |
| { |
| "epoch": 0.5936199722607489, |
| "grad_norm": 3.2527916431427, |
| "learning_rate": 1e-06, |
| "loss": 0.1579, |
| "step": 1712 |
| }, |
| { |
| "epoch": 0.5939667128987517, |
| "grad_norm": 2.0413970947265625, |
| "learning_rate": 1e-06, |
| "loss": 0.1535, |
| "step": 1713 |
| }, |
| { |
| "epoch": 0.5943134535367545, |
| "grad_norm": 2.2500665187835693, |
| "learning_rate": 1e-06, |
| "loss": 0.1744, |
| "step": 1714 |
| }, |
| { |
| "epoch": 0.5946601941747572, |
| "grad_norm": 2.931854248046875, |
| "learning_rate": 1e-06, |
| "loss": 0.1729, |
| "step": 1715 |
| }, |
| { |
| "epoch": 0.59500693481276, |
| "grad_norm": 1.800407886505127, |
| "learning_rate": 1e-06, |
| "loss": 0.148, |
| "step": 1716 |
| }, |
| { |
| "epoch": 0.5953536754507628, |
| "grad_norm": 3.381312370300293, |
| "learning_rate": 1e-06, |
| "loss": 0.1358, |
| "step": 1717 |
| }, |
| { |
| "epoch": 0.5957004160887656, |
| "grad_norm": 3.778513193130493, |
| "learning_rate": 1e-06, |
| "loss": 0.156, |
| "step": 1718 |
| }, |
| { |
| "epoch": 0.5960471567267683, |
| "grad_norm": 1.6460663080215454, |
| "learning_rate": 1e-06, |
| "loss": 0.1479, |
| "step": 1719 |
| }, |
| { |
| "epoch": 0.5963938973647711, |
| "grad_norm": 2.169105052947998, |
| "learning_rate": 1e-06, |
| "loss": 0.1712, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.5967406380027739, |
| "grad_norm": 1.761500358581543, |
| "learning_rate": 1e-06, |
| "loss": 0.1442, |
| "step": 1721 |
| }, |
| { |
| "epoch": 0.5970873786407767, |
| "grad_norm": 1.7620950937271118, |
| "learning_rate": 1e-06, |
| "loss": 0.1423, |
| "step": 1722 |
| }, |
| { |
| "epoch": 0.5974341192787794, |
| "grad_norm": 2.3129122257232666, |
| "learning_rate": 1e-06, |
| "loss": 0.1765, |
| "step": 1723 |
| }, |
| { |
| "epoch": 0.5977808599167822, |
| "grad_norm": 2.858874559402466, |
| "learning_rate": 1e-06, |
| "loss": 0.1467, |
| "step": 1724 |
| }, |
| { |
| "epoch": 0.598127600554785, |
| "grad_norm": 3.7320244312286377, |
| "learning_rate": 1e-06, |
| "loss": 0.1796, |
| "step": 1725 |
| }, |
| { |
| "epoch": 0.5984743411927878, |
| "grad_norm": 3.407935380935669, |
| "learning_rate": 1e-06, |
| "loss": 0.1386, |
| "step": 1726 |
| }, |
| { |
| "epoch": 0.5988210818307905, |
| "grad_norm": 2.2188820838928223, |
| "learning_rate": 1e-06, |
| "loss": 0.1723, |
| "step": 1727 |
| }, |
| { |
| "epoch": 0.5991678224687933, |
| "grad_norm": 1.947733759880066, |
| "learning_rate": 1e-06, |
| "loss": 0.1716, |
| "step": 1728 |
| }, |
| { |
| "epoch": 0.5995145631067961, |
| "grad_norm": 2.274256467819214, |
| "learning_rate": 1e-06, |
| "loss": 0.1519, |
| "step": 1729 |
| }, |
| { |
| "epoch": 0.5998613037447988, |
| "grad_norm": 1.881277084350586, |
| "learning_rate": 1e-06, |
| "loss": 0.127, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.6002080443828016, |
| "grad_norm": 2.522675037384033, |
| "learning_rate": 1e-06, |
| "loss": 0.1806, |
| "step": 1731 |
| }, |
| { |
| "epoch": 0.6005547850208044, |
| "grad_norm": 3.52107834815979, |
| "learning_rate": 1e-06, |
| "loss": 0.1581, |
| "step": 1732 |
| }, |
| { |
| "epoch": 0.6009015256588072, |
| "grad_norm": 2.1973533630371094, |
| "learning_rate": 1e-06, |
| "loss": 0.126, |
| "step": 1733 |
| }, |
| { |
| "epoch": 0.6012482662968099, |
| "grad_norm": 2.215819835662842, |
| "learning_rate": 1e-06, |
| "loss": 0.1454, |
| "step": 1734 |
| }, |
| { |
| "epoch": 0.6015950069348127, |
| "grad_norm": 1.8062198162078857, |
| "learning_rate": 1e-06, |
| "loss": 0.1583, |
| "step": 1735 |
| }, |
| { |
| "epoch": 0.6019417475728155, |
| "grad_norm": 2.090193510055542, |
| "learning_rate": 1e-06, |
| "loss": 0.1434, |
| "step": 1736 |
| }, |
| { |
| "epoch": 0.6022884882108183, |
| "grad_norm": 1.7331854104995728, |
| "learning_rate": 1e-06, |
| "loss": 0.1323, |
| "step": 1737 |
| }, |
| { |
| "epoch": 0.602635228848821, |
| "grad_norm": 2.4180166721343994, |
| "learning_rate": 1e-06, |
| "loss": 0.1384, |
| "step": 1738 |
| }, |
| { |
| "epoch": 0.6029819694868238, |
| "grad_norm": 3.263031482696533, |
| "learning_rate": 1e-06, |
| "loss": 0.1324, |
| "step": 1739 |
| }, |
| { |
| "epoch": 0.6033287101248266, |
| "grad_norm": 2.341269016265869, |
| "learning_rate": 1e-06, |
| "loss": 0.1579, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.6036754507628294, |
| "grad_norm": 3.2346274852752686, |
| "learning_rate": 1e-06, |
| "loss": 0.1382, |
| "step": 1741 |
| }, |
| { |
| "epoch": 0.6040221914008321, |
| "grad_norm": 1.9541387557983398, |
| "learning_rate": 1e-06, |
| "loss": 0.1673, |
| "step": 1742 |
| }, |
| { |
| "epoch": 0.6043689320388349, |
| "grad_norm": 3.1931421756744385, |
| "learning_rate": 1e-06, |
| "loss": 0.1437, |
| "step": 1743 |
| }, |
| { |
| "epoch": 0.6047156726768377, |
| "grad_norm": 1.9747329950332642, |
| "learning_rate": 1e-06, |
| "loss": 0.1346, |
| "step": 1744 |
| }, |
| { |
| "epoch": 0.6050624133148405, |
| "grad_norm": 3.360968589782715, |
| "learning_rate": 1e-06, |
| "loss": 0.1584, |
| "step": 1745 |
| }, |
| { |
| "epoch": 0.6054091539528432, |
| "grad_norm": 2.160388469696045, |
| "learning_rate": 1e-06, |
| "loss": 0.132, |
| "step": 1746 |
| }, |
| { |
| "epoch": 0.605755894590846, |
| "grad_norm": 4.136070728302002, |
| "learning_rate": 1e-06, |
| "loss": 0.1506, |
| "step": 1747 |
| }, |
| { |
| "epoch": 0.6061026352288488, |
| "grad_norm": 2.1393015384674072, |
| "learning_rate": 1e-06, |
| "loss": 0.1568, |
| "step": 1748 |
| }, |
| { |
| "epoch": 0.6064493758668515, |
| "grad_norm": 2.9097952842712402, |
| "learning_rate": 1e-06, |
| "loss": 0.1343, |
| "step": 1749 |
| }, |
| { |
| "epoch": 0.6067961165048543, |
| "grad_norm": 1.9220006465911865, |
| "learning_rate": 1e-06, |
| "loss": 0.1446, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.6071428571428571, |
| "grad_norm": 2.2562577724456787, |
| "learning_rate": 1e-06, |
| "loss": 0.1751, |
| "step": 1751 |
| }, |
| { |
| "epoch": 0.6074895977808599, |
| "grad_norm": 2.5833182334899902, |
| "learning_rate": 1e-06, |
| "loss": 0.1606, |
| "step": 1752 |
| }, |
| { |
| "epoch": 0.6078363384188626, |
| "grad_norm": 6.893828392028809, |
| "learning_rate": 1e-06, |
| "loss": 0.1516, |
| "step": 1753 |
| }, |
| { |
| "epoch": 0.6081830790568654, |
| "grad_norm": 6.116687774658203, |
| "learning_rate": 1e-06, |
| "loss": 0.1175, |
| "step": 1754 |
| }, |
| { |
| "epoch": 0.6085298196948682, |
| "grad_norm": 1.879164218902588, |
| "learning_rate": 1e-06, |
| "loss": 0.1381, |
| "step": 1755 |
| }, |
| { |
| "epoch": 0.608876560332871, |
| "grad_norm": 3.298027753829956, |
| "learning_rate": 1e-06, |
| "loss": 0.1726, |
| "step": 1756 |
| }, |
| { |
| "epoch": 0.6092233009708737, |
| "grad_norm": 2.1097750663757324, |
| "learning_rate": 1e-06, |
| "loss": 0.165, |
| "step": 1757 |
| }, |
| { |
| "epoch": 0.6095700416088765, |
| "grad_norm": 4.932060241699219, |
| "learning_rate": 1e-06, |
| "loss": 0.1399, |
| "step": 1758 |
| }, |
| { |
| "epoch": 0.6099167822468793, |
| "grad_norm": 1.9408305883407593, |
| "learning_rate": 1e-06, |
| "loss": 0.1661, |
| "step": 1759 |
| }, |
| { |
| "epoch": 0.6102635228848821, |
| "grad_norm": 1.8838698863983154, |
| "learning_rate": 1e-06, |
| "loss": 0.1617, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.6106102635228848, |
| "grad_norm": 2.995274782180786, |
| "learning_rate": 1e-06, |
| "loss": 0.1361, |
| "step": 1761 |
| }, |
| { |
| "epoch": 0.6109570041608876, |
| "grad_norm": 2.9905359745025635, |
| "learning_rate": 1e-06, |
| "loss": 0.1485, |
| "step": 1762 |
| }, |
| { |
| "epoch": 0.6113037447988904, |
| "grad_norm": 2.9674386978149414, |
| "learning_rate": 1e-06, |
| "loss": 0.1503, |
| "step": 1763 |
| }, |
| { |
| "epoch": 0.6116504854368932, |
| "grad_norm": 2.390683889389038, |
| "learning_rate": 1e-06, |
| "loss": 0.1585, |
| "step": 1764 |
| }, |
| { |
| "epoch": 0.6119972260748959, |
| "grad_norm": 2.8863577842712402, |
| "learning_rate": 1e-06, |
| "loss": 0.1579, |
| "step": 1765 |
| }, |
| { |
| "epoch": 0.6123439667128987, |
| "grad_norm": 3.183380365371704, |
| "learning_rate": 1e-06, |
| "loss": 0.1292, |
| "step": 1766 |
| }, |
| { |
| "epoch": 0.6126907073509015, |
| "grad_norm": 2.080923318862915, |
| "learning_rate": 1e-06, |
| "loss": 0.1166, |
| "step": 1767 |
| }, |
| { |
| "epoch": 0.6130374479889042, |
| "grad_norm": 2.8261148929595947, |
| "learning_rate": 1e-06, |
| "loss": 0.159, |
| "step": 1768 |
| }, |
| { |
| "epoch": 0.613384188626907, |
| "grad_norm": 2.0629355907440186, |
| "learning_rate": 1e-06, |
| "loss": 0.1433, |
| "step": 1769 |
| }, |
| { |
| "epoch": 0.6137309292649098, |
| "grad_norm": 1.8249051570892334, |
| "learning_rate": 1e-06, |
| "loss": 0.1448, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.6140776699029126, |
| "grad_norm": 2.489915132522583, |
| "learning_rate": 1e-06, |
| "loss": 0.1975, |
| "step": 1771 |
| }, |
| { |
| "epoch": 0.6144244105409153, |
| "grad_norm": 2.171525716781616, |
| "learning_rate": 1e-06, |
| "loss": 0.143, |
| "step": 1772 |
| }, |
| { |
| "epoch": 0.6147711511789181, |
| "grad_norm": 3.3242056369781494, |
| "learning_rate": 1e-06, |
| "loss": 0.1337, |
| "step": 1773 |
| }, |
| { |
| "epoch": 0.6151178918169209, |
| "grad_norm": 3.56597900390625, |
| "learning_rate": 1e-06, |
| "loss": 0.1278, |
| "step": 1774 |
| }, |
| { |
| "epoch": 0.6154646324549237, |
| "grad_norm": 1.8567633628845215, |
| "learning_rate": 1e-06, |
| "loss": 0.1546, |
| "step": 1775 |
| }, |
| { |
| "epoch": 0.6158113730929264, |
| "grad_norm": 2.3509087562561035, |
| "learning_rate": 1e-06, |
| "loss": 0.1677, |
| "step": 1776 |
| }, |
| { |
| "epoch": 0.6161581137309292, |
| "grad_norm": 2.3500568866729736, |
| "learning_rate": 1e-06, |
| "loss": 0.1573, |
| "step": 1777 |
| }, |
| { |
| "epoch": 0.616504854368932, |
| "grad_norm": 1.687183141708374, |
| "learning_rate": 1e-06, |
| "loss": 0.1384, |
| "step": 1778 |
| }, |
| { |
| "epoch": 0.6168515950069348, |
| "grad_norm": 2.0229380130767822, |
| "learning_rate": 1e-06, |
| "loss": 0.1231, |
| "step": 1779 |
| }, |
| { |
| "epoch": 0.6171983356449375, |
| "grad_norm": 2.1885972023010254, |
| "learning_rate": 1e-06, |
| "loss": 0.1656, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.6175450762829403, |
| "grad_norm": 4.203535556793213, |
| "learning_rate": 1e-06, |
| "loss": 0.1725, |
| "step": 1781 |
| }, |
| { |
| "epoch": 0.6178918169209431, |
| "grad_norm": 2.7054553031921387, |
| "learning_rate": 1e-06, |
| "loss": 0.1316, |
| "step": 1782 |
| }, |
| { |
| "epoch": 0.6182385575589459, |
| "grad_norm": 2.386046886444092, |
| "learning_rate": 1e-06, |
| "loss": 0.1086, |
| "step": 1783 |
| }, |
| { |
| "epoch": 0.6185852981969486, |
| "grad_norm": 2.22220516204834, |
| "learning_rate": 1e-06, |
| "loss": 0.199, |
| "step": 1784 |
| }, |
| { |
| "epoch": 0.6189320388349514, |
| "grad_norm": 2.0740716457366943, |
| "learning_rate": 1e-06, |
| "loss": 0.1167, |
| "step": 1785 |
| }, |
| { |
| "epoch": 0.6192787794729542, |
| "grad_norm": 3.928887128829956, |
| "learning_rate": 1e-06, |
| "loss": 0.1659, |
| "step": 1786 |
| }, |
| { |
| "epoch": 0.619625520110957, |
| "grad_norm": 3.2319023609161377, |
| "learning_rate": 1e-06, |
| "loss": 0.1538, |
| "step": 1787 |
| }, |
| { |
| "epoch": 0.6199722607489597, |
| "grad_norm": 2.2219529151916504, |
| "learning_rate": 1e-06, |
| "loss": 0.1689, |
| "step": 1788 |
| }, |
| { |
| "epoch": 0.6203190013869625, |
| "grad_norm": 1.9555144309997559, |
| "learning_rate": 1e-06, |
| "loss": 0.1572, |
| "step": 1789 |
| }, |
| { |
| "epoch": 0.6206657420249653, |
| "grad_norm": 1.8324187994003296, |
| "learning_rate": 1e-06, |
| "loss": 0.1549, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.621012482662968, |
| "grad_norm": 4.268178462982178, |
| "learning_rate": 1e-06, |
| "loss": 0.1676, |
| "step": 1791 |
| }, |
| { |
| "epoch": 0.6213592233009708, |
| "grad_norm": 2.28480863571167, |
| "learning_rate": 1e-06, |
| "loss": 0.1555, |
| "step": 1792 |
| }, |
| { |
| "epoch": 0.6217059639389736, |
| "grad_norm": 2.970492362976074, |
| "learning_rate": 1e-06, |
| "loss": 0.1369, |
| "step": 1793 |
| }, |
| { |
| "epoch": 0.6220527045769764, |
| "grad_norm": 2.2355594635009766, |
| "learning_rate": 1e-06, |
| "loss": 0.2016, |
| "step": 1794 |
| }, |
| { |
| "epoch": 0.6223994452149791, |
| "grad_norm": 2.1921377182006836, |
| "learning_rate": 1e-06, |
| "loss": 0.1664, |
| "step": 1795 |
| }, |
| { |
| "epoch": 0.6227461858529819, |
| "grad_norm": 2.3036246299743652, |
| "learning_rate": 1e-06, |
| "loss": 0.1617, |
| "step": 1796 |
| }, |
| { |
| "epoch": 0.6230929264909847, |
| "grad_norm": 2.2555198669433594, |
| "learning_rate": 1e-06, |
| "loss": 0.1693, |
| "step": 1797 |
| }, |
| { |
| "epoch": 0.6234396671289875, |
| "grad_norm": 2.416224718093872, |
| "learning_rate": 1e-06, |
| "loss": 0.1745, |
| "step": 1798 |
| }, |
| { |
| "epoch": 0.6237864077669902, |
| "grad_norm": 1.9002783298492432, |
| "learning_rate": 1e-06, |
| "loss": 0.1618, |
| "step": 1799 |
| }, |
| { |
| "epoch": 0.624133148404993, |
| "grad_norm": 1.8858561515808105, |
| "learning_rate": 1e-06, |
| "loss": 0.1885, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.6244798890429958, |
| "grad_norm": 3.2046573162078857, |
| "learning_rate": 1e-06, |
| "loss": 0.1588, |
| "step": 1801 |
| }, |
| { |
| "epoch": 0.6248266296809986, |
| "grad_norm": 3.2242980003356934, |
| "learning_rate": 1e-06, |
| "loss": 0.1758, |
| "step": 1802 |
| }, |
| { |
| "epoch": 0.6251733703190014, |
| "grad_norm": 4.321735858917236, |
| "learning_rate": 1e-06, |
| "loss": 0.1745, |
| "step": 1803 |
| }, |
| { |
| "epoch": 0.6255201109570042, |
| "grad_norm": 5.258564472198486, |
| "learning_rate": 1e-06, |
| "loss": 0.157, |
| "step": 1804 |
| }, |
| { |
| "epoch": 0.625866851595007, |
| "grad_norm": 2.0180282592773438, |
| "learning_rate": 1e-06, |
| "loss": 0.1327, |
| "step": 1805 |
| }, |
| { |
| "epoch": 0.6262135922330098, |
| "grad_norm": 2.150374412536621, |
| "learning_rate": 1e-06, |
| "loss": 0.129, |
| "step": 1806 |
| }, |
| { |
| "epoch": 0.6265603328710125, |
| "grad_norm": 2.426352024078369, |
| "learning_rate": 1e-06, |
| "loss": 0.155, |
| "step": 1807 |
| }, |
| { |
| "epoch": 0.6269070735090153, |
| "grad_norm": 3.8777828216552734, |
| "learning_rate": 1e-06, |
| "loss": 0.134, |
| "step": 1808 |
| }, |
| { |
| "epoch": 0.6272538141470181, |
| "grad_norm": 2.8608689308166504, |
| "learning_rate": 1e-06, |
| "loss": 0.1723, |
| "step": 1809 |
| }, |
| { |
| "epoch": 0.6276005547850209, |
| "grad_norm": 2.592209577560425, |
| "learning_rate": 1e-06, |
| "loss": 0.1473, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.6279472954230236, |
| "grad_norm": 1.9494845867156982, |
| "learning_rate": 1e-06, |
| "loss": 0.1771, |
| "step": 1811 |
| }, |
| { |
| "epoch": 0.6282940360610264, |
| "grad_norm": 1.7008180618286133, |
| "learning_rate": 1e-06, |
| "loss": 0.1429, |
| "step": 1812 |
| }, |
| { |
| "epoch": 0.6286407766990292, |
| "grad_norm": 1.7186037302017212, |
| "learning_rate": 1e-06, |
| "loss": 0.161, |
| "step": 1813 |
| }, |
| { |
| "epoch": 0.628987517337032, |
| "grad_norm": 2.3491291999816895, |
| "learning_rate": 1e-06, |
| "loss": 0.1767, |
| "step": 1814 |
| }, |
| { |
| "epoch": 0.6293342579750347, |
| "grad_norm": 2.4897589683532715, |
| "learning_rate": 1e-06, |
| "loss": 0.1528, |
| "step": 1815 |
| }, |
| { |
| "epoch": 0.6296809986130375, |
| "grad_norm": 1.9620110988616943, |
| "learning_rate": 1e-06, |
| "loss": 0.1481, |
| "step": 1816 |
| }, |
| { |
| "epoch": 0.6300277392510403, |
| "grad_norm": 2.08915114402771, |
| "learning_rate": 1e-06, |
| "loss": 0.1465, |
| "step": 1817 |
| }, |
| { |
| "epoch": 0.630374479889043, |
| "grad_norm": 3.368116617202759, |
| "learning_rate": 1e-06, |
| "loss": 0.1516, |
| "step": 1818 |
| }, |
| { |
| "epoch": 0.6307212205270458, |
| "grad_norm": 2.13322377204895, |
| "learning_rate": 1e-06, |
| "loss": 0.153, |
| "step": 1819 |
| }, |
| { |
| "epoch": 0.6310679611650486, |
| "grad_norm": 2.479769229888916, |
| "learning_rate": 1e-06, |
| "loss": 0.1646, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.6314147018030514, |
| "grad_norm": 3.7209994792938232, |
| "learning_rate": 1e-06, |
| "loss": 0.1795, |
| "step": 1821 |
| }, |
| { |
| "epoch": 0.6317614424410541, |
| "grad_norm": 4.989049434661865, |
| "learning_rate": 1e-06, |
| "loss": 0.1582, |
| "step": 1822 |
| }, |
| { |
| "epoch": 0.6321081830790569, |
| "grad_norm": 2.99127459526062, |
| "learning_rate": 1e-06, |
| "loss": 0.1149, |
| "step": 1823 |
| }, |
| { |
| "epoch": 0.6324549237170597, |
| "grad_norm": 3.3212974071502686, |
| "learning_rate": 1e-06, |
| "loss": 0.1359, |
| "step": 1824 |
| }, |
| { |
| "epoch": 0.6328016643550625, |
| "grad_norm": 2.3676962852478027, |
| "learning_rate": 1e-06, |
| "loss": 0.1448, |
| "step": 1825 |
| }, |
| { |
| "epoch": 0.6331484049930652, |
| "grad_norm": 2.0515060424804688, |
| "learning_rate": 1e-06, |
| "loss": 0.1557, |
| "step": 1826 |
| }, |
| { |
| "epoch": 0.633495145631068, |
| "grad_norm": 3.021026372909546, |
| "learning_rate": 1e-06, |
| "loss": 0.143, |
| "step": 1827 |
| }, |
| { |
| "epoch": 0.6338418862690708, |
| "grad_norm": 2.328517198562622, |
| "learning_rate": 1e-06, |
| "loss": 0.1265, |
| "step": 1828 |
| }, |
| { |
| "epoch": 0.6341886269070736, |
| "grad_norm": 1.7838841676712036, |
| "learning_rate": 1e-06, |
| "loss": 0.1188, |
| "step": 1829 |
| }, |
| { |
| "epoch": 0.6345353675450763, |
| "grad_norm": 2.873117685317993, |
| "learning_rate": 1e-06, |
| "loss": 0.1251, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.6348821081830791, |
| "grad_norm": 2.515712022781372, |
| "learning_rate": 1e-06, |
| "loss": 0.1258, |
| "step": 1831 |
| }, |
| { |
| "epoch": 0.6352288488210819, |
| "grad_norm": 4.897085189819336, |
| "learning_rate": 1e-06, |
| "loss": 0.1528, |
| "step": 1832 |
| }, |
| { |
| "epoch": 0.6355755894590847, |
| "grad_norm": 2.860135793685913, |
| "learning_rate": 1e-06, |
| "loss": 0.1385, |
| "step": 1833 |
| }, |
| { |
| "epoch": 0.6359223300970874, |
| "grad_norm": 2.492570400238037, |
| "learning_rate": 1e-06, |
| "loss": 0.1761, |
| "step": 1834 |
| }, |
| { |
| "epoch": 0.6362690707350902, |
| "grad_norm": 1.7522510290145874, |
| "learning_rate": 1e-06, |
| "loss": 0.1309, |
| "step": 1835 |
| }, |
| { |
| "epoch": 0.636615811373093, |
| "grad_norm": 2.5122199058532715, |
| "learning_rate": 1e-06, |
| "loss": 0.1502, |
| "step": 1836 |
| }, |
| { |
| "epoch": 0.6369625520110958, |
| "grad_norm": 1.8004939556121826, |
| "learning_rate": 1e-06, |
| "loss": 0.1482, |
| "step": 1837 |
| }, |
| { |
| "epoch": 0.6373092926490985, |
| "grad_norm": 2.451542854309082, |
| "learning_rate": 1e-06, |
| "loss": 0.1656, |
| "step": 1838 |
| }, |
| { |
| "epoch": 0.6376560332871013, |
| "grad_norm": 3.232908248901367, |
| "learning_rate": 1e-06, |
| "loss": 0.1625, |
| "step": 1839 |
| }, |
| { |
| "epoch": 0.6380027739251041, |
| "grad_norm": 2.172776460647583, |
| "learning_rate": 1e-06, |
| "loss": 0.1713, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.6383495145631068, |
| "grad_norm": 2.3084630966186523, |
| "learning_rate": 1e-06, |
| "loss": 0.1474, |
| "step": 1841 |
| }, |
| { |
| "epoch": 0.6386962552011096, |
| "grad_norm": 1.8688488006591797, |
| "learning_rate": 1e-06, |
| "loss": 0.1386, |
| "step": 1842 |
| }, |
| { |
| "epoch": 0.6390429958391124, |
| "grad_norm": 2.7231826782226562, |
| "learning_rate": 1e-06, |
| "loss": 0.1523, |
| "step": 1843 |
| }, |
| { |
| "epoch": 0.6393897364771152, |
| "grad_norm": 2.4542200565338135, |
| "learning_rate": 1e-06, |
| "loss": 0.1505, |
| "step": 1844 |
| }, |
| { |
| "epoch": 0.6397364771151179, |
| "grad_norm": 2.8579893112182617, |
| "learning_rate": 1e-06, |
| "loss": 0.1565, |
| "step": 1845 |
| }, |
| { |
| "epoch": 0.6400832177531207, |
| "grad_norm": 3.261667251586914, |
| "learning_rate": 1e-06, |
| "loss": 0.1216, |
| "step": 1846 |
| }, |
| { |
| "epoch": 0.6404299583911235, |
| "grad_norm": 2.358067035675049, |
| "learning_rate": 1e-06, |
| "loss": 0.17, |
| "step": 1847 |
| }, |
| { |
| "epoch": 0.6407766990291263, |
| "grad_norm": 3.6049678325653076, |
| "learning_rate": 1e-06, |
| "loss": 0.1488, |
| "step": 1848 |
| }, |
| { |
| "epoch": 0.641123439667129, |
| "grad_norm": 2.246796131134033, |
| "learning_rate": 1e-06, |
| "loss": 0.1601, |
| "step": 1849 |
| }, |
| { |
| "epoch": 0.6414701803051318, |
| "grad_norm": 1.8105849027633667, |
| "learning_rate": 1e-06, |
| "loss": 0.1093, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.6418169209431346, |
| "grad_norm": 1.771470546722412, |
| "learning_rate": 1e-06, |
| "loss": 0.1511, |
| "step": 1851 |
| }, |
| { |
| "epoch": 0.6421636615811374, |
| "grad_norm": 2.7330245971679688, |
| "learning_rate": 1e-06, |
| "loss": 0.176, |
| "step": 1852 |
| }, |
| { |
| "epoch": 0.6425104022191401, |
| "grad_norm": 4.204245567321777, |
| "learning_rate": 1e-06, |
| "loss": 0.1684, |
| "step": 1853 |
| }, |
| { |
| "epoch": 0.6428571428571429, |
| "grad_norm": 2.937845468521118, |
| "learning_rate": 1e-06, |
| "loss": 0.1684, |
| "step": 1854 |
| }, |
| { |
| "epoch": 0.6432038834951457, |
| "grad_norm": 3.5551681518554688, |
| "learning_rate": 1e-06, |
| "loss": 0.1421, |
| "step": 1855 |
| }, |
| { |
| "epoch": 0.6435506241331485, |
| "grad_norm": 3.067307472229004, |
| "learning_rate": 1e-06, |
| "loss": 0.1413, |
| "step": 1856 |
| }, |
| { |
| "epoch": 0.6438973647711512, |
| "grad_norm": 2.2362422943115234, |
| "learning_rate": 1e-06, |
| "loss": 0.1781, |
| "step": 1857 |
| }, |
| { |
| "epoch": 0.644244105409154, |
| "grad_norm": 2.397939443588257, |
| "learning_rate": 1e-06, |
| "loss": 0.1694, |
| "step": 1858 |
| }, |
| { |
| "epoch": 0.6445908460471568, |
| "grad_norm": 2.447730779647827, |
| "learning_rate": 1e-06, |
| "loss": 0.1727, |
| "step": 1859 |
| }, |
| { |
| "epoch": 0.6449375866851595, |
| "grad_norm": 2.1447441577911377, |
| "learning_rate": 1e-06, |
| "loss": 0.1688, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.6452843273231623, |
| "grad_norm": 2.5228350162506104, |
| "learning_rate": 1e-06, |
| "loss": 0.1489, |
| "step": 1861 |
| }, |
| { |
| "epoch": 0.6456310679611651, |
| "grad_norm": 3.115823268890381, |
| "learning_rate": 1e-06, |
| "loss": 0.1276, |
| "step": 1862 |
| }, |
| { |
| "epoch": 0.6459778085991679, |
| "grad_norm": 1.9809269905090332, |
| "learning_rate": 1e-06, |
| "loss": 0.1688, |
| "step": 1863 |
| }, |
| { |
| "epoch": 0.6463245492371706, |
| "grad_norm": 1.765738606452942, |
| "learning_rate": 1e-06, |
| "loss": 0.1619, |
| "step": 1864 |
| }, |
| { |
| "epoch": 0.6466712898751734, |
| "grad_norm": 2.3409016132354736, |
| "learning_rate": 1e-06, |
| "loss": 0.1492, |
| "step": 1865 |
| }, |
| { |
| "epoch": 0.6470180305131762, |
| "grad_norm": 2.63494873046875, |
| "learning_rate": 1e-06, |
| "loss": 0.155, |
| "step": 1866 |
| }, |
| { |
| "epoch": 0.647364771151179, |
| "grad_norm": 2.3334195613861084, |
| "learning_rate": 1e-06, |
| "loss": 0.1476, |
| "step": 1867 |
| }, |
| { |
| "epoch": 0.6477115117891817, |
| "grad_norm": 2.024077892303467, |
| "learning_rate": 1e-06, |
| "loss": 0.1419, |
| "step": 1868 |
| }, |
| { |
| "epoch": 0.6480582524271845, |
| "grad_norm": 2.0084269046783447, |
| "learning_rate": 1e-06, |
| "loss": 0.152, |
| "step": 1869 |
| }, |
| { |
| "epoch": 0.6484049930651873, |
| "grad_norm": 2.7949881553649902, |
| "learning_rate": 1e-06, |
| "loss": 0.2076, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.6487517337031901, |
| "grad_norm": 4.183343887329102, |
| "learning_rate": 1e-06, |
| "loss": 0.1928, |
| "step": 1871 |
| }, |
| { |
| "epoch": 0.6490984743411928, |
| "grad_norm": 3.4971086978912354, |
| "learning_rate": 1e-06, |
| "loss": 0.1486, |
| "step": 1872 |
| }, |
| { |
| "epoch": 0.6494452149791956, |
| "grad_norm": 2.022676944732666, |
| "learning_rate": 1e-06, |
| "loss": 0.1332, |
| "step": 1873 |
| }, |
| { |
| "epoch": 0.6497919556171984, |
| "grad_norm": 3.9578323364257812, |
| "learning_rate": 1e-06, |
| "loss": 0.1666, |
| "step": 1874 |
| }, |
| { |
| "epoch": 0.6501386962552012, |
| "grad_norm": 2.3028218746185303, |
| "learning_rate": 1e-06, |
| "loss": 0.1662, |
| "step": 1875 |
| }, |
| { |
| "epoch": 0.6504854368932039, |
| "grad_norm": 2.505436897277832, |
| "learning_rate": 1e-06, |
| "loss": 0.1291, |
| "step": 1876 |
| }, |
| { |
| "epoch": 0.6508321775312067, |
| "grad_norm": 3.1190056800842285, |
| "learning_rate": 1e-06, |
| "loss": 0.1731, |
| "step": 1877 |
| }, |
| { |
| "epoch": 0.6511789181692095, |
| "grad_norm": 2.1968798637390137, |
| "learning_rate": 1e-06, |
| "loss": 0.1481, |
| "step": 1878 |
| }, |
| { |
| "epoch": 0.6515256588072122, |
| "grad_norm": 2.4867115020751953, |
| "learning_rate": 1e-06, |
| "loss": 0.1194, |
| "step": 1879 |
| }, |
| { |
| "epoch": 0.651872399445215, |
| "grad_norm": 2.8347103595733643, |
| "learning_rate": 1e-06, |
| "loss": 0.1717, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.6522191400832178, |
| "grad_norm": 3.8689327239990234, |
| "learning_rate": 1e-06, |
| "loss": 0.1226, |
| "step": 1881 |
| }, |
| { |
| "epoch": 0.6525658807212206, |
| "grad_norm": 2.115302801132202, |
| "learning_rate": 1e-06, |
| "loss": 0.1491, |
| "step": 1882 |
| }, |
| { |
| "epoch": 0.6529126213592233, |
| "grad_norm": 3.8725671768188477, |
| "learning_rate": 1e-06, |
| "loss": 0.1623, |
| "step": 1883 |
| }, |
| { |
| "epoch": 0.6532593619972261, |
| "grad_norm": 2.957167863845825, |
| "learning_rate": 1e-06, |
| "loss": 0.1231, |
| "step": 1884 |
| }, |
| { |
| "epoch": 0.6536061026352289, |
| "grad_norm": 2.131582021713257, |
| "learning_rate": 1e-06, |
| "loss": 0.1722, |
| "step": 1885 |
| }, |
| { |
| "epoch": 0.6539528432732317, |
| "grad_norm": 2.6788721084594727, |
| "learning_rate": 1e-06, |
| "loss": 0.143, |
| "step": 1886 |
| }, |
| { |
| "epoch": 0.6542995839112344, |
| "grad_norm": 2.4989116191864014, |
| "learning_rate": 1e-06, |
| "loss": 0.1345, |
| "step": 1887 |
| }, |
| { |
| "epoch": 0.6546463245492372, |
| "grad_norm": 2.2083873748779297, |
| "learning_rate": 1e-06, |
| "loss": 0.1272, |
| "step": 1888 |
| }, |
| { |
| "epoch": 0.65499306518724, |
| "grad_norm": 3.461536407470703, |
| "learning_rate": 1e-06, |
| "loss": 0.1506, |
| "step": 1889 |
| }, |
| { |
| "epoch": 0.6553398058252428, |
| "grad_norm": 1.8483829498291016, |
| "learning_rate": 1e-06, |
| "loss": 0.1281, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.6556865464632455, |
| "grad_norm": 5.3467302322387695, |
| "learning_rate": 1e-06, |
| "loss": 0.163, |
| "step": 1891 |
| }, |
| { |
| "epoch": 0.6560332871012483, |
| "grad_norm": 3.6457359790802, |
| "learning_rate": 1e-06, |
| "loss": 0.1626, |
| "step": 1892 |
| }, |
| { |
| "epoch": 0.6563800277392511, |
| "grad_norm": 3.4049503803253174, |
| "learning_rate": 1e-06, |
| "loss": 0.1452, |
| "step": 1893 |
| }, |
| { |
| "epoch": 0.6567267683772539, |
| "grad_norm": 5.646260738372803, |
| "learning_rate": 1e-06, |
| "loss": 0.1739, |
| "step": 1894 |
| }, |
| { |
| "epoch": 0.6570735090152566, |
| "grad_norm": 3.8253393173217773, |
| "learning_rate": 1e-06, |
| "loss": 0.1399, |
| "step": 1895 |
| }, |
| { |
| "epoch": 0.6574202496532594, |
| "grad_norm": 3.993398904800415, |
| "learning_rate": 1e-06, |
| "loss": 0.1651, |
| "step": 1896 |
| }, |
| { |
| "epoch": 0.6577669902912622, |
| "grad_norm": 4.906624794006348, |
| "learning_rate": 1e-06, |
| "loss": 0.109, |
| "step": 1897 |
| }, |
| { |
| "epoch": 0.658113730929265, |
| "grad_norm": 2.4523298740386963, |
| "learning_rate": 1e-06, |
| "loss": 0.1374, |
| "step": 1898 |
| }, |
| { |
| "epoch": 0.6584604715672677, |
| "grad_norm": 4.38106632232666, |
| "learning_rate": 1e-06, |
| "loss": 0.1601, |
| "step": 1899 |
| }, |
| { |
| "epoch": 0.6588072122052705, |
| "grad_norm": 2.361201047897339, |
| "learning_rate": 1e-06, |
| "loss": 0.1647, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.6591539528432733, |
| "grad_norm": 1.7961761951446533, |
| "learning_rate": 1e-06, |
| "loss": 0.1503, |
| "step": 1901 |
| }, |
| { |
| "epoch": 0.659500693481276, |
| "grad_norm": 4.200663089752197, |
| "learning_rate": 1e-06, |
| "loss": 0.1362, |
| "step": 1902 |
| }, |
| { |
| "epoch": 0.6598474341192788, |
| "grad_norm": 2.141098976135254, |
| "learning_rate": 1e-06, |
| "loss": 0.1287, |
| "step": 1903 |
| }, |
| { |
| "epoch": 0.6601941747572816, |
| "grad_norm": 3.659383773803711, |
| "learning_rate": 1e-06, |
| "loss": 0.1679, |
| "step": 1904 |
| }, |
| { |
| "epoch": 0.6605409153952844, |
| "grad_norm": 2.159740447998047, |
| "learning_rate": 1e-06, |
| "loss": 0.151, |
| "step": 1905 |
| }, |
| { |
| "epoch": 0.6608876560332871, |
| "grad_norm": 2.612746238708496, |
| "learning_rate": 1e-06, |
| "loss": 0.1331, |
| "step": 1906 |
| }, |
| { |
| "epoch": 0.6612343966712899, |
| "grad_norm": 2.067161798477173, |
| "learning_rate": 1e-06, |
| "loss": 0.1375, |
| "step": 1907 |
| }, |
| { |
| "epoch": 0.6615811373092927, |
| "grad_norm": 2.2887136936187744, |
| "learning_rate": 1e-06, |
| "loss": 0.1445, |
| "step": 1908 |
| }, |
| { |
| "epoch": 0.6619278779472955, |
| "grad_norm": 3.688062906265259, |
| "learning_rate": 1e-06, |
| "loss": 0.1943, |
| "step": 1909 |
| }, |
| { |
| "epoch": 0.6622746185852982, |
| "grad_norm": 2.4100046157836914, |
| "learning_rate": 1e-06, |
| "loss": 0.1798, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.662621359223301, |
| "grad_norm": 1.7269797325134277, |
| "learning_rate": 1e-06, |
| "loss": 0.1404, |
| "step": 1911 |
| }, |
| { |
| "epoch": 0.6629680998613038, |
| "grad_norm": 1.8225007057189941, |
| "learning_rate": 1e-06, |
| "loss": 0.1252, |
| "step": 1912 |
| }, |
| { |
| "epoch": 0.6633148404993066, |
| "grad_norm": 2.511077404022217, |
| "learning_rate": 1e-06, |
| "loss": 0.1458, |
| "step": 1913 |
| }, |
| { |
| "epoch": 0.6636615811373093, |
| "grad_norm": 4.242014408111572, |
| "learning_rate": 1e-06, |
| "loss": 0.1833, |
| "step": 1914 |
| }, |
| { |
| "epoch": 0.6640083217753121, |
| "grad_norm": 2.4541306495666504, |
| "learning_rate": 1e-06, |
| "loss": 0.1639, |
| "step": 1915 |
| }, |
| { |
| "epoch": 0.6643550624133149, |
| "grad_norm": 4.093621730804443, |
| "learning_rate": 1e-06, |
| "loss": 0.1533, |
| "step": 1916 |
| }, |
| { |
| "epoch": 0.6647018030513177, |
| "grad_norm": 1.6138478517532349, |
| "learning_rate": 1e-06, |
| "loss": 0.1365, |
| "step": 1917 |
| }, |
| { |
| "epoch": 0.6650485436893204, |
| "grad_norm": 2.1072335243225098, |
| "learning_rate": 1e-06, |
| "loss": 0.1502, |
| "step": 1918 |
| }, |
| { |
| "epoch": 0.6653952843273232, |
| "grad_norm": 4.632135391235352, |
| "learning_rate": 1e-06, |
| "loss": 0.1454, |
| "step": 1919 |
| }, |
| { |
| "epoch": 0.665742024965326, |
| "grad_norm": 2.38916015625, |
| "learning_rate": 1e-06, |
| "loss": 0.1426, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.6660887656033287, |
| "grad_norm": 1.7111142873764038, |
| "learning_rate": 1e-06, |
| "loss": 0.1491, |
| "step": 1921 |
| }, |
| { |
| "epoch": 0.6664355062413315, |
| "grad_norm": 1.6361645460128784, |
| "learning_rate": 1e-06, |
| "loss": 0.1181, |
| "step": 1922 |
| }, |
| { |
| "epoch": 0.6667822468793343, |
| "grad_norm": 2.845128297805786, |
| "learning_rate": 1e-06, |
| "loss": 0.1508, |
| "step": 1923 |
| }, |
| { |
| "epoch": 0.6671289875173371, |
| "grad_norm": 3.6008825302124023, |
| "learning_rate": 1e-06, |
| "loss": 0.1773, |
| "step": 1924 |
| }, |
| { |
| "epoch": 0.6674757281553398, |
| "grad_norm": 2.7898175716400146, |
| "learning_rate": 1e-06, |
| "loss": 0.1485, |
| "step": 1925 |
| }, |
| { |
| "epoch": 0.6678224687933426, |
| "grad_norm": 2.1968812942504883, |
| "learning_rate": 1e-06, |
| "loss": 0.1538, |
| "step": 1926 |
| }, |
| { |
| "epoch": 0.6681692094313454, |
| "grad_norm": 3.2181034088134766, |
| "learning_rate": 1e-06, |
| "loss": 0.1637, |
| "step": 1927 |
| }, |
| { |
| "epoch": 0.6685159500693482, |
| "grad_norm": 3.3751060962677, |
| "learning_rate": 1e-06, |
| "loss": 0.1967, |
| "step": 1928 |
| }, |
| { |
| "epoch": 0.6688626907073509, |
| "grad_norm": 4.066105842590332, |
| "learning_rate": 1e-06, |
| "loss": 0.1598, |
| "step": 1929 |
| }, |
| { |
| "epoch": 0.6692094313453537, |
| "grad_norm": 3.0509390830993652, |
| "learning_rate": 1e-06, |
| "loss": 0.2023, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.6695561719833565, |
| "grad_norm": 2.0706655979156494, |
| "learning_rate": 1e-06, |
| "loss": 0.1549, |
| "step": 1931 |
| }, |
| { |
| "epoch": 0.6699029126213593, |
| "grad_norm": 2.6645240783691406, |
| "learning_rate": 1e-06, |
| "loss": 0.1451, |
| "step": 1932 |
| }, |
| { |
| "epoch": 0.670249653259362, |
| "grad_norm": 2.8021857738494873, |
| "learning_rate": 1e-06, |
| "loss": 0.1238, |
| "step": 1933 |
| }, |
| { |
| "epoch": 0.6705963938973648, |
| "grad_norm": 2.668379068374634, |
| "learning_rate": 1e-06, |
| "loss": 0.1606, |
| "step": 1934 |
| }, |
| { |
| "epoch": 0.6709431345353676, |
| "grad_norm": 2.5608723163604736, |
| "learning_rate": 1e-06, |
| "loss": 0.1425, |
| "step": 1935 |
| }, |
| { |
| "epoch": 0.6712898751733704, |
| "grad_norm": 2.992034435272217, |
| "learning_rate": 1e-06, |
| "loss": 0.1378, |
| "step": 1936 |
| }, |
| { |
| "epoch": 0.6716366158113731, |
| "grad_norm": 1.993120789527893, |
| "learning_rate": 1e-06, |
| "loss": 0.1504, |
| "step": 1937 |
| }, |
| { |
| "epoch": 0.6719833564493759, |
| "grad_norm": 1.990553379058838, |
| "learning_rate": 1e-06, |
| "loss": 0.1648, |
| "step": 1938 |
| }, |
| { |
| "epoch": 0.6723300970873787, |
| "grad_norm": 3.997328758239746, |
| "learning_rate": 1e-06, |
| "loss": 0.1319, |
| "step": 1939 |
| }, |
| { |
| "epoch": 0.6726768377253814, |
| "grad_norm": 3.1100356578826904, |
| "learning_rate": 1e-06, |
| "loss": 0.1824, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.6730235783633842, |
| "grad_norm": 1.7388405799865723, |
| "learning_rate": 1e-06, |
| "loss": 0.1388, |
| "step": 1941 |
| }, |
| { |
| "epoch": 0.673370319001387, |
| "grad_norm": 2.0104892253875732, |
| "learning_rate": 1e-06, |
| "loss": 0.1537, |
| "step": 1942 |
| }, |
| { |
| "epoch": 0.6737170596393898, |
| "grad_norm": 4.981960296630859, |
| "learning_rate": 1e-06, |
| "loss": 0.1823, |
| "step": 1943 |
| }, |
| { |
| "epoch": 0.6740638002773925, |
| "grad_norm": 1.5539988279342651, |
| "learning_rate": 1e-06, |
| "loss": 0.1507, |
| "step": 1944 |
| }, |
| { |
| "epoch": 0.6744105409153953, |
| "grad_norm": 3.975370168685913, |
| "learning_rate": 1e-06, |
| "loss": 0.1869, |
| "step": 1945 |
| }, |
| { |
| "epoch": 0.6747572815533981, |
| "grad_norm": 2.3512701988220215, |
| "learning_rate": 1e-06, |
| "loss": 0.1363, |
| "step": 1946 |
| }, |
| { |
| "epoch": 0.6751040221914009, |
| "grad_norm": 1.7181259393692017, |
| "learning_rate": 1e-06, |
| "loss": 0.1472, |
| "step": 1947 |
| }, |
| { |
| "epoch": 0.6754507628294036, |
| "grad_norm": 1.7866528034210205, |
| "learning_rate": 1e-06, |
| "loss": 0.1358, |
| "step": 1948 |
| }, |
| { |
| "epoch": 0.6757975034674064, |
| "grad_norm": 2.635709524154663, |
| "learning_rate": 1e-06, |
| "loss": 0.1659, |
| "step": 1949 |
| }, |
| { |
| "epoch": 0.6761442441054092, |
| "grad_norm": 1.9648034572601318, |
| "learning_rate": 1e-06, |
| "loss": 0.1443, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.676490984743412, |
| "grad_norm": 2.1203794479370117, |
| "learning_rate": 1e-06, |
| "loss": 0.1283, |
| "step": 1951 |
| }, |
| { |
| "epoch": 0.6768377253814147, |
| "grad_norm": 2.54052996635437, |
| "learning_rate": 1e-06, |
| "loss": 0.1447, |
| "step": 1952 |
| }, |
| { |
| "epoch": 0.6771844660194175, |
| "grad_norm": 2.9203107357025146, |
| "learning_rate": 1e-06, |
| "loss": 0.1362, |
| "step": 1953 |
| }, |
| { |
| "epoch": 0.6775312066574203, |
| "grad_norm": 2.1829326152801514, |
| "learning_rate": 1e-06, |
| "loss": 0.1267, |
| "step": 1954 |
| }, |
| { |
| "epoch": 0.6778779472954231, |
| "grad_norm": 2.608271837234497, |
| "learning_rate": 1e-06, |
| "loss": 0.1673, |
| "step": 1955 |
| }, |
| { |
| "epoch": 0.6782246879334258, |
| "grad_norm": 2.003397226333618, |
| "learning_rate": 1e-06, |
| "loss": 0.1687, |
| "step": 1956 |
| }, |
| { |
| "epoch": 0.6785714285714286, |
| "grad_norm": 2.5364773273468018, |
| "learning_rate": 1e-06, |
| "loss": 0.1331, |
| "step": 1957 |
| }, |
| { |
| "epoch": 0.6789181692094314, |
| "grad_norm": 2.5657753944396973, |
| "learning_rate": 1e-06, |
| "loss": 0.1485, |
| "step": 1958 |
| }, |
| { |
| "epoch": 0.6792649098474342, |
| "grad_norm": 2.2307422161102295, |
| "learning_rate": 1e-06, |
| "loss": 0.16, |
| "step": 1959 |
| }, |
| { |
| "epoch": 0.6796116504854369, |
| "grad_norm": 2.0911974906921387, |
| "learning_rate": 1e-06, |
| "loss": 0.1338, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.6799583911234397, |
| "grad_norm": 3.5447793006896973, |
| "learning_rate": 1e-06, |
| "loss": 0.1949, |
| "step": 1961 |
| }, |
| { |
| "epoch": 0.6803051317614425, |
| "grad_norm": 2.7882168292999268, |
| "learning_rate": 1e-06, |
| "loss": 0.1912, |
| "step": 1962 |
| }, |
| { |
| "epoch": 0.6806518723994452, |
| "grad_norm": 1.985961675643921, |
| "learning_rate": 1e-06, |
| "loss": 0.1228, |
| "step": 1963 |
| }, |
| { |
| "epoch": 0.680998613037448, |
| "grad_norm": 4.209602355957031, |
| "learning_rate": 1e-06, |
| "loss": 0.1476, |
| "step": 1964 |
| }, |
| { |
| "epoch": 0.6813453536754508, |
| "grad_norm": 2.3590171337127686, |
| "learning_rate": 1e-06, |
| "loss": 0.1246, |
| "step": 1965 |
| }, |
| { |
| "epoch": 0.6816920943134536, |
| "grad_norm": 2.680187225341797, |
| "learning_rate": 1e-06, |
| "loss": 0.1812, |
| "step": 1966 |
| }, |
| { |
| "epoch": 0.6820388349514563, |
| "grad_norm": 2.99859881401062, |
| "learning_rate": 1e-06, |
| "loss": 0.1483, |
| "step": 1967 |
| }, |
| { |
| "epoch": 0.6823855755894591, |
| "grad_norm": 2.140812635421753, |
| "learning_rate": 1e-06, |
| "loss": 0.1315, |
| "step": 1968 |
| }, |
| { |
| "epoch": 0.6827323162274619, |
| "grad_norm": 2.1156227588653564, |
| "learning_rate": 1e-06, |
| "loss": 0.1576, |
| "step": 1969 |
| }, |
| { |
| "epoch": 0.6830790568654647, |
| "grad_norm": 2.218289613723755, |
| "learning_rate": 1e-06, |
| "loss": 0.1378, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.6834257975034674, |
| "grad_norm": 2.0559825897216797, |
| "learning_rate": 1e-06, |
| "loss": 0.158, |
| "step": 1971 |
| }, |
| { |
| "epoch": 0.6837725381414702, |
| "grad_norm": 2.555392026901245, |
| "learning_rate": 1e-06, |
| "loss": 0.1377, |
| "step": 1972 |
| }, |
| { |
| "epoch": 0.684119278779473, |
| "grad_norm": 2.068892240524292, |
| "learning_rate": 1e-06, |
| "loss": 0.134, |
| "step": 1973 |
| }, |
| { |
| "epoch": 0.6844660194174758, |
| "grad_norm": 1.8548657894134521, |
| "learning_rate": 1e-06, |
| "loss": 0.1438, |
| "step": 1974 |
| }, |
| { |
| "epoch": 0.6848127600554785, |
| "grad_norm": 5.5719499588012695, |
| "learning_rate": 1e-06, |
| "loss": 0.1765, |
| "step": 1975 |
| }, |
| { |
| "epoch": 0.6851595006934813, |
| "grad_norm": 3.041820526123047, |
| "learning_rate": 1e-06, |
| "loss": 0.129, |
| "step": 1976 |
| }, |
| { |
| "epoch": 0.6855062413314841, |
| "grad_norm": 7.963346004486084, |
| "learning_rate": 1e-06, |
| "loss": 0.2188, |
| "step": 1977 |
| }, |
| { |
| "epoch": 0.6858529819694869, |
| "grad_norm": 2.143159866333008, |
| "learning_rate": 1e-06, |
| "loss": 0.1669, |
| "step": 1978 |
| }, |
| { |
| "epoch": 0.6861997226074896, |
| "grad_norm": 2.7842423915863037, |
| "learning_rate": 1e-06, |
| "loss": 0.1568, |
| "step": 1979 |
| }, |
| { |
| "epoch": 0.6865464632454924, |
| "grad_norm": 2.4829189777374268, |
| "learning_rate": 1e-06, |
| "loss": 0.1609, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.6868932038834952, |
| "grad_norm": 2.9691684246063232, |
| "learning_rate": 1e-06, |
| "loss": 0.1566, |
| "step": 1981 |
| }, |
| { |
| "epoch": 0.687239944521498, |
| "grad_norm": 1.885637640953064, |
| "learning_rate": 1e-06, |
| "loss": 0.1251, |
| "step": 1982 |
| }, |
| { |
| "epoch": 0.6875866851595007, |
| "grad_norm": 3.081054210662842, |
| "learning_rate": 1e-06, |
| "loss": 0.1216, |
| "step": 1983 |
| }, |
| { |
| "epoch": 0.6879334257975035, |
| "grad_norm": 2.235093832015991, |
| "learning_rate": 1e-06, |
| "loss": 0.1424, |
| "step": 1984 |
| }, |
| { |
| "epoch": 0.6882801664355063, |
| "grad_norm": 2.051729679107666, |
| "learning_rate": 1e-06, |
| "loss": 0.1411, |
| "step": 1985 |
| }, |
| { |
| "epoch": 0.688626907073509, |
| "grad_norm": 3.0905375480651855, |
| "learning_rate": 1e-06, |
| "loss": 0.1706, |
| "step": 1986 |
| }, |
| { |
| "epoch": 0.6889736477115118, |
| "grad_norm": 2.348816156387329, |
| "learning_rate": 1e-06, |
| "loss": 0.1609, |
| "step": 1987 |
| }, |
| { |
| "epoch": 0.6893203883495146, |
| "grad_norm": 3.2591960430145264, |
| "learning_rate": 1e-06, |
| "loss": 0.189, |
| "step": 1988 |
| }, |
| { |
| "epoch": 0.6896671289875174, |
| "grad_norm": 5.137045860290527, |
| "learning_rate": 1e-06, |
| "loss": 0.1329, |
| "step": 1989 |
| }, |
| { |
| "epoch": 0.6900138696255201, |
| "grad_norm": 3.611876964569092, |
| "learning_rate": 1e-06, |
| "loss": 0.13, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.6903606102635229, |
| "grad_norm": 3.3672244548797607, |
| "learning_rate": 1e-06, |
| "loss": 0.1583, |
| "step": 1991 |
| }, |
| { |
| "epoch": 0.6907073509015257, |
| "grad_norm": 2.499140501022339, |
| "learning_rate": 1e-06, |
| "loss": 0.12, |
| "step": 1992 |
| }, |
| { |
| "epoch": 0.6910540915395285, |
| "grad_norm": 2.993394136428833, |
| "learning_rate": 1e-06, |
| "loss": 0.1329, |
| "step": 1993 |
| }, |
| { |
| "epoch": 0.6914008321775312, |
| "grad_norm": 3.3767669200897217, |
| "learning_rate": 1e-06, |
| "loss": 0.1607, |
| "step": 1994 |
| }, |
| { |
| "epoch": 0.691747572815534, |
| "grad_norm": 4.418931484222412, |
| "learning_rate": 1e-06, |
| "loss": 0.17, |
| "step": 1995 |
| }, |
| { |
| "epoch": 0.6920943134535368, |
| "grad_norm": 2.544867753982544, |
| "learning_rate": 1e-06, |
| "loss": 0.1642, |
| "step": 1996 |
| }, |
| { |
| "epoch": 0.6924410540915396, |
| "grad_norm": 4.367650508880615, |
| "learning_rate": 1e-06, |
| "loss": 0.161, |
| "step": 1997 |
| }, |
| { |
| "epoch": 0.6927877947295423, |
| "grad_norm": 2.103868007659912, |
| "learning_rate": 1e-06, |
| "loss": 0.1351, |
| "step": 1998 |
| }, |
| { |
| "epoch": 0.6931345353675451, |
| "grad_norm": 2.2191128730773926, |
| "learning_rate": 1e-06, |
| "loss": 0.1622, |
| "step": 1999 |
| }, |
| { |
| "epoch": 0.6934812760055479, |
| "grad_norm": 2.239701747894287, |
| "learning_rate": 1e-06, |
| "loss": 0.1547, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.6938280166435506, |
| "grad_norm": 2.166487216949463, |
| "learning_rate": 1e-06, |
| "loss": 0.1431, |
| "step": 2001 |
| }, |
| { |
| "epoch": 0.6941747572815534, |
| "grad_norm": 2.3545658588409424, |
| "learning_rate": 1e-06, |
| "loss": 0.1271, |
| "step": 2002 |
| }, |
| { |
| "epoch": 0.6945214979195562, |
| "grad_norm": 2.040910005569458, |
| "learning_rate": 1e-06, |
| "loss": 0.1502, |
| "step": 2003 |
| }, |
| { |
| "epoch": 0.694868238557559, |
| "grad_norm": 2.179579973220825, |
| "learning_rate": 1e-06, |
| "loss": 0.1385, |
| "step": 2004 |
| }, |
| { |
| "epoch": 0.6952149791955617, |
| "grad_norm": 2.0149500370025635, |
| "learning_rate": 1e-06, |
| "loss": 0.1396, |
| "step": 2005 |
| }, |
| { |
| "epoch": 0.6955617198335645, |
| "grad_norm": 3.165092706680298, |
| "learning_rate": 1e-06, |
| "loss": 0.1217, |
| "step": 2006 |
| }, |
| { |
| "epoch": 0.6959084604715673, |
| "grad_norm": 2.45447039604187, |
| "learning_rate": 1e-06, |
| "loss": 0.1455, |
| "step": 2007 |
| }, |
| { |
| "epoch": 0.6962552011095701, |
| "grad_norm": 4.395773410797119, |
| "learning_rate": 1e-06, |
| "loss": 0.1568, |
| "step": 2008 |
| }, |
| { |
| "epoch": 0.6966019417475728, |
| "grad_norm": 4.389420986175537, |
| "learning_rate": 1e-06, |
| "loss": 0.1294, |
| "step": 2009 |
| }, |
| { |
| "epoch": 0.6969486823855756, |
| "grad_norm": 1.8730595111846924, |
| "learning_rate": 1e-06, |
| "loss": 0.1371, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.6972954230235784, |
| "grad_norm": 2.394237518310547, |
| "learning_rate": 1e-06, |
| "loss": 0.1588, |
| "step": 2011 |
| }, |
| { |
| "epoch": 0.6976421636615812, |
| "grad_norm": 2.2952566146850586, |
| "learning_rate": 1e-06, |
| "loss": 0.1362, |
| "step": 2012 |
| }, |
| { |
| "epoch": 0.6979889042995839, |
| "grad_norm": 3.177366018295288, |
| "learning_rate": 1e-06, |
| "loss": 0.1169, |
| "step": 2013 |
| }, |
| { |
| "epoch": 0.6983356449375867, |
| "grad_norm": 2.6569299697875977, |
| "learning_rate": 1e-06, |
| "loss": 0.1503, |
| "step": 2014 |
| }, |
| { |
| "epoch": 0.6986823855755895, |
| "grad_norm": 2.000425338745117, |
| "learning_rate": 1e-06, |
| "loss": 0.1399, |
| "step": 2015 |
| }, |
| { |
| "epoch": 0.6990291262135923, |
| "grad_norm": 3.407623529434204, |
| "learning_rate": 1e-06, |
| "loss": 0.1704, |
| "step": 2016 |
| }, |
| { |
| "epoch": 0.699375866851595, |
| "grad_norm": 2.73795747756958, |
| "learning_rate": 1e-06, |
| "loss": 0.1476, |
| "step": 2017 |
| }, |
| { |
| "epoch": 0.6997226074895978, |
| "grad_norm": 3.8470706939697266, |
| "learning_rate": 1e-06, |
| "loss": 0.1694, |
| "step": 2018 |
| }, |
| { |
| "epoch": 0.7000693481276006, |
| "grad_norm": 2.0070230960845947, |
| "learning_rate": 1e-06, |
| "loss": 0.1629, |
| "step": 2019 |
| }, |
| { |
| "epoch": 0.7004160887656034, |
| "grad_norm": 2.150434970855713, |
| "learning_rate": 1e-06, |
| "loss": 0.1228, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.7007628294036061, |
| "grad_norm": 1.901470422744751, |
| "learning_rate": 1e-06, |
| "loss": 0.114, |
| "step": 2021 |
| }, |
| { |
| "epoch": 0.7011095700416089, |
| "grad_norm": 2.0084474086761475, |
| "learning_rate": 1e-06, |
| "loss": 0.1431, |
| "step": 2022 |
| }, |
| { |
| "epoch": 0.7014563106796117, |
| "grad_norm": 2.262711763381958, |
| "learning_rate": 1e-06, |
| "loss": 0.1298, |
| "step": 2023 |
| }, |
| { |
| "epoch": 0.7018030513176144, |
| "grad_norm": 2.019993782043457, |
| "learning_rate": 1e-06, |
| "loss": 0.1568, |
| "step": 2024 |
| }, |
| { |
| "epoch": 0.7021497919556172, |
| "grad_norm": 3.4103362560272217, |
| "learning_rate": 1e-06, |
| "loss": 0.1283, |
| "step": 2025 |
| }, |
| { |
| "epoch": 0.70249653259362, |
| "grad_norm": 2.4157567024230957, |
| "learning_rate": 1e-06, |
| "loss": 0.1739, |
| "step": 2026 |
| }, |
| { |
| "epoch": 0.7028432732316228, |
| "grad_norm": 2.405488967895508, |
| "learning_rate": 1e-06, |
| "loss": 0.1439, |
| "step": 2027 |
| }, |
| { |
| "epoch": 0.7031900138696255, |
| "grad_norm": 1.8161323070526123, |
| "learning_rate": 1e-06, |
| "loss": 0.1458, |
| "step": 2028 |
| }, |
| { |
| "epoch": 0.7035367545076283, |
| "grad_norm": 4.244541168212891, |
| "learning_rate": 1e-06, |
| "loss": 0.1481, |
| "step": 2029 |
| }, |
| { |
| "epoch": 0.7038834951456311, |
| "grad_norm": 2.1027145385742188, |
| "learning_rate": 1e-06, |
| "loss": 0.1376, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.7042302357836339, |
| "grad_norm": 1.911086916923523, |
| "learning_rate": 1e-06, |
| "loss": 0.1393, |
| "step": 2031 |
| }, |
| { |
| "epoch": 0.7045769764216366, |
| "grad_norm": 3.033745527267456, |
| "learning_rate": 1e-06, |
| "loss": 0.1313, |
| "step": 2032 |
| }, |
| { |
| "epoch": 0.7049237170596394, |
| "grad_norm": 2.4053955078125, |
| "learning_rate": 1e-06, |
| "loss": 0.1483, |
| "step": 2033 |
| }, |
| { |
| "epoch": 0.7052704576976422, |
| "grad_norm": 1.796697735786438, |
| "learning_rate": 1e-06, |
| "loss": 0.1207, |
| "step": 2034 |
| }, |
| { |
| "epoch": 0.705617198335645, |
| "grad_norm": 2.3117001056671143, |
| "learning_rate": 1e-06, |
| "loss": 0.1391, |
| "step": 2035 |
| }, |
| { |
| "epoch": 0.7059639389736477, |
| "grad_norm": 3.2871193885803223, |
| "learning_rate": 1e-06, |
| "loss": 0.169, |
| "step": 2036 |
| }, |
| { |
| "epoch": 0.7063106796116505, |
| "grad_norm": 2.3646295070648193, |
| "learning_rate": 1e-06, |
| "loss": 0.1827, |
| "step": 2037 |
| }, |
| { |
| "epoch": 0.7066574202496533, |
| "grad_norm": 1.7376772165298462, |
| "learning_rate": 1e-06, |
| "loss": 0.1375, |
| "step": 2038 |
| }, |
| { |
| "epoch": 0.707004160887656, |
| "grad_norm": 2.200528383255005, |
| "learning_rate": 1e-06, |
| "loss": 0.1417, |
| "step": 2039 |
| }, |
| { |
| "epoch": 0.7073509015256588, |
| "grad_norm": 2.742612838745117, |
| "learning_rate": 1e-06, |
| "loss": 0.156, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.7076976421636616, |
| "grad_norm": 2.012239694595337, |
| "learning_rate": 1e-06, |
| "loss": 0.1768, |
| "step": 2041 |
| }, |
| { |
| "epoch": 0.7080443828016644, |
| "grad_norm": 1.6217942237854004, |
| "learning_rate": 1e-06, |
| "loss": 0.1093, |
| "step": 2042 |
| }, |
| { |
| "epoch": 0.7083911234396671, |
| "grad_norm": 3.5539634227752686, |
| "learning_rate": 1e-06, |
| "loss": 0.1688, |
| "step": 2043 |
| }, |
| { |
| "epoch": 0.7087378640776699, |
| "grad_norm": 2.4913277626037598, |
| "learning_rate": 1e-06, |
| "loss": 0.1956, |
| "step": 2044 |
| }, |
| { |
| "epoch": 0.7090846047156727, |
| "grad_norm": 1.9440995454788208, |
| "learning_rate": 1e-06, |
| "loss": 0.1648, |
| "step": 2045 |
| }, |
| { |
| "epoch": 0.7094313453536755, |
| "grad_norm": 2.7142701148986816, |
| "learning_rate": 1e-06, |
| "loss": 0.1566, |
| "step": 2046 |
| }, |
| { |
| "epoch": 0.7097780859916782, |
| "grad_norm": 2.7249319553375244, |
| "learning_rate": 1e-06, |
| "loss": 0.1664, |
| "step": 2047 |
| }, |
| { |
| "epoch": 0.710124826629681, |
| "grad_norm": 2.3271358013153076, |
| "learning_rate": 1e-06, |
| "loss": 0.1705, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.7104715672676838, |
| "grad_norm": 1.7891216278076172, |
| "learning_rate": 1e-06, |
| "loss": 0.1293, |
| "step": 2049 |
| }, |
| { |
| "epoch": 0.7108183079056866, |
| "grad_norm": 2.274380922317505, |
| "learning_rate": 1e-06, |
| "loss": 0.1559, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.7111650485436893, |
| "grad_norm": 2.742744207382202, |
| "learning_rate": 1e-06, |
| "loss": 0.1763, |
| "step": 2051 |
| }, |
| { |
| "epoch": 0.7115117891816921, |
| "grad_norm": 1.602330207824707, |
| "learning_rate": 1e-06, |
| "loss": 0.1516, |
| "step": 2052 |
| }, |
| { |
| "epoch": 0.7118585298196949, |
| "grad_norm": 1.677738904953003, |
| "learning_rate": 1e-06, |
| "loss": 0.1205, |
| "step": 2053 |
| }, |
| { |
| "epoch": 0.7122052704576977, |
| "grad_norm": 2.515608549118042, |
| "learning_rate": 1e-06, |
| "loss": 0.1495, |
| "step": 2054 |
| }, |
| { |
| "epoch": 0.7125520110957004, |
| "grad_norm": 2.719072103500366, |
| "learning_rate": 1e-06, |
| "loss": 0.142, |
| "step": 2055 |
| }, |
| { |
| "epoch": 0.7128987517337032, |
| "grad_norm": 2.3802108764648438, |
| "learning_rate": 1e-06, |
| "loss": 0.1374, |
| "step": 2056 |
| }, |
| { |
| "epoch": 0.713245492371706, |
| "grad_norm": 3.3662099838256836, |
| "learning_rate": 1e-06, |
| "loss": 0.1593, |
| "step": 2057 |
| }, |
| { |
| "epoch": 0.7135922330097088, |
| "grad_norm": 2.0012762546539307, |
| "learning_rate": 1e-06, |
| "loss": 0.1675, |
| "step": 2058 |
| }, |
| { |
| "epoch": 0.7139389736477115, |
| "grad_norm": 1.8083027601242065, |
| "learning_rate": 1e-06, |
| "loss": 0.1215, |
| "step": 2059 |
| }, |
| { |
| "epoch": 0.7142857142857143, |
| "grad_norm": 3.830106735229492, |
| "learning_rate": 1e-06, |
| "loss": 0.1446, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.7146324549237171, |
| "grad_norm": 2.1997570991516113, |
| "learning_rate": 1e-06, |
| "loss": 0.1455, |
| "step": 2061 |
| }, |
| { |
| "epoch": 0.7149791955617198, |
| "grad_norm": 1.8245985507965088, |
| "learning_rate": 1e-06, |
| "loss": 0.1576, |
| "step": 2062 |
| }, |
| { |
| "epoch": 0.7153259361997226, |
| "grad_norm": 2.8333470821380615, |
| "learning_rate": 1e-06, |
| "loss": 0.1414, |
| "step": 2063 |
| }, |
| { |
| "epoch": 0.7156726768377254, |
| "grad_norm": 1.8098995685577393, |
| "learning_rate": 1e-06, |
| "loss": 0.1155, |
| "step": 2064 |
| }, |
| { |
| "epoch": 0.7160194174757282, |
| "grad_norm": 2.2318859100341797, |
| "learning_rate": 1e-06, |
| "loss": 0.145, |
| "step": 2065 |
| }, |
| { |
| "epoch": 0.7163661581137309, |
| "grad_norm": 3.0203652381896973, |
| "learning_rate": 1e-06, |
| "loss": 0.1889, |
| "step": 2066 |
| }, |
| { |
| "epoch": 0.7167128987517337, |
| "grad_norm": 3.068648099899292, |
| "learning_rate": 1e-06, |
| "loss": 0.1269, |
| "step": 2067 |
| }, |
| { |
| "epoch": 0.7170596393897365, |
| "grad_norm": 2.7916111946105957, |
| "learning_rate": 1e-06, |
| "loss": 0.156, |
| "step": 2068 |
| }, |
| { |
| "epoch": 0.7174063800277393, |
| "grad_norm": 1.9404290914535522, |
| "learning_rate": 1e-06, |
| "loss": 0.1426, |
| "step": 2069 |
| }, |
| { |
| "epoch": 0.717753120665742, |
| "grad_norm": 2.287555694580078, |
| "learning_rate": 1e-06, |
| "loss": 0.121, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.7180998613037448, |
| "grad_norm": 2.1457648277282715, |
| "learning_rate": 1e-06, |
| "loss": 0.1232, |
| "step": 2071 |
| }, |
| { |
| "epoch": 0.7184466019417476, |
| "grad_norm": 4.4433441162109375, |
| "learning_rate": 1e-06, |
| "loss": 0.1315, |
| "step": 2072 |
| }, |
| { |
| "epoch": 0.7187933425797504, |
| "grad_norm": 3.2125916481018066, |
| "learning_rate": 1e-06, |
| "loss": 0.1623, |
| "step": 2073 |
| }, |
| { |
| "epoch": 0.7191400832177531, |
| "grad_norm": 2.4481723308563232, |
| "learning_rate": 1e-06, |
| "loss": 0.1458, |
| "step": 2074 |
| }, |
| { |
| "epoch": 0.7194868238557559, |
| "grad_norm": 2.894010543823242, |
| "learning_rate": 1e-06, |
| "loss": 0.1623, |
| "step": 2075 |
| }, |
| { |
| "epoch": 0.7198335644937587, |
| "grad_norm": 2.1684398651123047, |
| "learning_rate": 1e-06, |
| "loss": 0.1323, |
| "step": 2076 |
| }, |
| { |
| "epoch": 0.7201803051317615, |
| "grad_norm": 2.481157064437866, |
| "learning_rate": 1e-06, |
| "loss": 0.1576, |
| "step": 2077 |
| }, |
| { |
| "epoch": 0.7205270457697642, |
| "grad_norm": 2.519806385040283, |
| "learning_rate": 1e-06, |
| "loss": 0.1507, |
| "step": 2078 |
| }, |
| { |
| "epoch": 0.720873786407767, |
| "grad_norm": 2.813413381576538, |
| "learning_rate": 1e-06, |
| "loss": 0.143, |
| "step": 2079 |
| }, |
| { |
| "epoch": 0.7212205270457698, |
| "grad_norm": 2.9213216304779053, |
| "learning_rate": 1e-06, |
| "loss": 0.157, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.7215672676837726, |
| "grad_norm": 2.8391432762145996, |
| "learning_rate": 1e-06, |
| "loss": 0.1652, |
| "step": 2081 |
| }, |
| { |
| "epoch": 0.7219140083217753, |
| "grad_norm": 2.9582345485687256, |
| "learning_rate": 1e-06, |
| "loss": 0.1511, |
| "step": 2082 |
| }, |
| { |
| "epoch": 0.7222607489597781, |
| "grad_norm": 2.236300230026245, |
| "learning_rate": 1e-06, |
| "loss": 0.1272, |
| "step": 2083 |
| }, |
| { |
| "epoch": 0.7226074895977809, |
| "grad_norm": 2.000270366668701, |
| "learning_rate": 1e-06, |
| "loss": 0.1579, |
| "step": 2084 |
| }, |
| { |
| "epoch": 0.7229542302357836, |
| "grad_norm": 2.317187547683716, |
| "learning_rate": 1e-06, |
| "loss": 0.16, |
| "step": 2085 |
| }, |
| { |
| "epoch": 0.7233009708737864, |
| "grad_norm": 2.866093397140503, |
| "learning_rate": 1e-06, |
| "loss": 0.1539, |
| "step": 2086 |
| }, |
| { |
| "epoch": 0.7236477115117892, |
| "grad_norm": 3.5143120288848877, |
| "learning_rate": 1e-06, |
| "loss": 0.1434, |
| "step": 2087 |
| }, |
| { |
| "epoch": 0.723994452149792, |
| "grad_norm": 3.5509307384490967, |
| "learning_rate": 1e-06, |
| "loss": 0.1357, |
| "step": 2088 |
| }, |
| { |
| "epoch": 0.7243411927877947, |
| "grad_norm": 1.9065073728561401, |
| "learning_rate": 1e-06, |
| "loss": 0.1227, |
| "step": 2089 |
| }, |
| { |
| "epoch": 0.7246879334257975, |
| "grad_norm": 2.9716525077819824, |
| "learning_rate": 1e-06, |
| "loss": 0.1538, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.7250346740638003, |
| "grad_norm": 2.858944892883301, |
| "learning_rate": 1e-06, |
| "loss": 0.1333, |
| "step": 2091 |
| }, |
| { |
| "epoch": 0.7253814147018031, |
| "grad_norm": 4.433228969573975, |
| "learning_rate": 1e-06, |
| "loss": 0.1487, |
| "step": 2092 |
| }, |
| { |
| "epoch": 0.7257281553398058, |
| "grad_norm": 2.223188877105713, |
| "learning_rate": 1e-06, |
| "loss": 0.152, |
| "step": 2093 |
| }, |
| { |
| "epoch": 0.7260748959778086, |
| "grad_norm": 2.040834665298462, |
| "learning_rate": 1e-06, |
| "loss": 0.1542, |
| "step": 2094 |
| }, |
| { |
| "epoch": 0.7264216366158114, |
| "grad_norm": 2.6200249195098877, |
| "learning_rate": 1e-06, |
| "loss": 0.1551, |
| "step": 2095 |
| }, |
| { |
| "epoch": 0.7267683772538142, |
| "grad_norm": 2.9047865867614746, |
| "learning_rate": 1e-06, |
| "loss": 0.1368, |
| "step": 2096 |
| }, |
| { |
| "epoch": 0.7271151178918169, |
| "grad_norm": 2.617482900619507, |
| "learning_rate": 1e-06, |
| "loss": 0.1762, |
| "step": 2097 |
| }, |
| { |
| "epoch": 0.7274618585298197, |
| "grad_norm": 2.3887550830841064, |
| "learning_rate": 1e-06, |
| "loss": 0.1652, |
| "step": 2098 |
| }, |
| { |
| "epoch": 0.7278085991678225, |
| "grad_norm": 2.3276662826538086, |
| "learning_rate": 1e-06, |
| "loss": 0.1696, |
| "step": 2099 |
| }, |
| { |
| "epoch": 0.7281553398058253, |
| "grad_norm": 2.4688990116119385, |
| "learning_rate": 1e-06, |
| "loss": 0.1865, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.728502080443828, |
| "grad_norm": 3.6115245819091797, |
| "learning_rate": 1e-06, |
| "loss": 0.1667, |
| "step": 2101 |
| }, |
| { |
| "epoch": 0.7288488210818308, |
| "grad_norm": 4.399360179901123, |
| "learning_rate": 1e-06, |
| "loss": 0.1383, |
| "step": 2102 |
| }, |
| { |
| "epoch": 0.7291955617198336, |
| "grad_norm": 5.1829729080200195, |
| "learning_rate": 1e-06, |
| "loss": 0.1576, |
| "step": 2103 |
| }, |
| { |
| "epoch": 0.7295423023578363, |
| "grad_norm": 4.441248416900635, |
| "learning_rate": 1e-06, |
| "loss": 0.139, |
| "step": 2104 |
| }, |
| { |
| "epoch": 0.7298890429958391, |
| "grad_norm": 3.7266438007354736, |
| "learning_rate": 1e-06, |
| "loss": 0.1497, |
| "step": 2105 |
| }, |
| { |
| "epoch": 0.7302357836338419, |
| "grad_norm": 2.000267505645752, |
| "learning_rate": 1e-06, |
| "loss": 0.1643, |
| "step": 2106 |
| }, |
| { |
| "epoch": 0.7305825242718447, |
| "grad_norm": 2.909482955932617, |
| "learning_rate": 1e-06, |
| "loss": 0.1409, |
| "step": 2107 |
| }, |
| { |
| "epoch": 0.7309292649098474, |
| "grad_norm": 3.0315585136413574, |
| "learning_rate": 1e-06, |
| "loss": 0.1822, |
| "step": 2108 |
| }, |
| { |
| "epoch": 0.7312760055478502, |
| "grad_norm": 2.9591143131256104, |
| "learning_rate": 1e-06, |
| "loss": 0.1946, |
| "step": 2109 |
| }, |
| { |
| "epoch": 0.731622746185853, |
| "grad_norm": 3.1562037467956543, |
| "learning_rate": 1e-06, |
| "loss": 0.1889, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.7319694868238558, |
| "grad_norm": 3.8917124271392822, |
| "learning_rate": 1e-06, |
| "loss": 0.1746, |
| "step": 2111 |
| }, |
| { |
| "epoch": 0.7323162274618585, |
| "grad_norm": 2.4440743923187256, |
| "learning_rate": 1e-06, |
| "loss": 0.1837, |
| "step": 2112 |
| }, |
| { |
| "epoch": 0.7326629680998613, |
| "grad_norm": 2.5348663330078125, |
| "learning_rate": 1e-06, |
| "loss": 0.1625, |
| "step": 2113 |
| }, |
| { |
| "epoch": 0.7330097087378641, |
| "grad_norm": 1.7352176904678345, |
| "learning_rate": 1e-06, |
| "loss": 0.1518, |
| "step": 2114 |
| }, |
| { |
| "epoch": 0.7333564493758669, |
| "grad_norm": 2.3291268348693848, |
| "learning_rate": 1e-06, |
| "loss": 0.1341, |
| "step": 2115 |
| }, |
| { |
| "epoch": 0.7337031900138696, |
| "grad_norm": 3.349982738494873, |
| "learning_rate": 1e-06, |
| "loss": 0.166, |
| "step": 2116 |
| }, |
| { |
| "epoch": 0.7340499306518724, |
| "grad_norm": 2.1478614807128906, |
| "learning_rate": 1e-06, |
| "loss": 0.161, |
| "step": 2117 |
| }, |
| { |
| "epoch": 0.7343966712898752, |
| "grad_norm": 2.126044750213623, |
| "learning_rate": 1e-06, |
| "loss": 0.1441, |
| "step": 2118 |
| }, |
| { |
| "epoch": 0.734743411927878, |
| "grad_norm": 2.5404157638549805, |
| "learning_rate": 1e-06, |
| "loss": 0.164, |
| "step": 2119 |
| }, |
| { |
| "epoch": 0.7350901525658807, |
| "grad_norm": 2.651552677154541, |
| "learning_rate": 1e-06, |
| "loss": 0.1599, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.7354368932038835, |
| "grad_norm": 4.563192367553711, |
| "learning_rate": 1e-06, |
| "loss": 0.124, |
| "step": 2121 |
| }, |
| { |
| "epoch": 0.7357836338418863, |
| "grad_norm": 1.917719841003418, |
| "learning_rate": 1e-06, |
| "loss": 0.165, |
| "step": 2122 |
| }, |
| { |
| "epoch": 0.736130374479889, |
| "grad_norm": 3.211930274963379, |
| "learning_rate": 1e-06, |
| "loss": 0.1625, |
| "step": 2123 |
| }, |
| { |
| "epoch": 0.7364771151178918, |
| "grad_norm": 2.4502320289611816, |
| "learning_rate": 1e-06, |
| "loss": 0.1318, |
| "step": 2124 |
| }, |
| { |
| "epoch": 0.7368238557558946, |
| "grad_norm": 2.0151150226593018, |
| "learning_rate": 1e-06, |
| "loss": 0.1832, |
| "step": 2125 |
| }, |
| { |
| "epoch": 0.7371705963938974, |
| "grad_norm": 3.2877564430236816, |
| "learning_rate": 1e-06, |
| "loss": 0.149, |
| "step": 2126 |
| }, |
| { |
| "epoch": 0.7375173370319001, |
| "grad_norm": 2.4384498596191406, |
| "learning_rate": 1e-06, |
| "loss": 0.1717, |
| "step": 2127 |
| }, |
| { |
| "epoch": 0.7378640776699029, |
| "grad_norm": 1.685181736946106, |
| "learning_rate": 1e-06, |
| "loss": 0.1361, |
| "step": 2128 |
| }, |
| { |
| "epoch": 0.7382108183079057, |
| "grad_norm": 1.982335090637207, |
| "learning_rate": 1e-06, |
| "loss": 0.1598, |
| "step": 2129 |
| }, |
| { |
| "epoch": 0.7385575589459085, |
| "grad_norm": 1.7395703792572021, |
| "learning_rate": 1e-06, |
| "loss": 0.1523, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.7389042995839112, |
| "grad_norm": 4.207993507385254, |
| "learning_rate": 1e-06, |
| "loss": 0.1749, |
| "step": 2131 |
| }, |
| { |
| "epoch": 0.739251040221914, |
| "grad_norm": 2.950206995010376, |
| "learning_rate": 1e-06, |
| "loss": 0.1748, |
| "step": 2132 |
| }, |
| { |
| "epoch": 0.7395977808599168, |
| "grad_norm": 2.3146812915802, |
| "learning_rate": 1e-06, |
| "loss": 0.1379, |
| "step": 2133 |
| }, |
| { |
| "epoch": 0.7399445214979196, |
| "grad_norm": 3.0385754108428955, |
| "learning_rate": 1e-06, |
| "loss": 0.149, |
| "step": 2134 |
| }, |
| { |
| "epoch": 0.7402912621359223, |
| "grad_norm": 2.4939308166503906, |
| "learning_rate": 1e-06, |
| "loss": 0.1621, |
| "step": 2135 |
| }, |
| { |
| "epoch": 0.7406380027739251, |
| "grad_norm": 3.453312397003174, |
| "learning_rate": 1e-06, |
| "loss": 0.1817, |
| "step": 2136 |
| }, |
| { |
| "epoch": 0.7409847434119279, |
| "grad_norm": 1.8451257944107056, |
| "learning_rate": 1e-06, |
| "loss": 0.1169, |
| "step": 2137 |
| }, |
| { |
| "epoch": 0.7413314840499307, |
| "grad_norm": 1.7449294328689575, |
| "learning_rate": 1e-06, |
| "loss": 0.1307, |
| "step": 2138 |
| }, |
| { |
| "epoch": 0.7416782246879334, |
| "grad_norm": 2.023353338241577, |
| "learning_rate": 1e-06, |
| "loss": 0.1229, |
| "step": 2139 |
| }, |
| { |
| "epoch": 0.7420249653259362, |
| "grad_norm": 2.990633726119995, |
| "learning_rate": 1e-06, |
| "loss": 0.1469, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.742371705963939, |
| "grad_norm": 2.25154709815979, |
| "learning_rate": 1e-06, |
| "loss": 0.1447, |
| "step": 2141 |
| }, |
| { |
| "epoch": 0.7427184466019418, |
| "grad_norm": 2.082965135574341, |
| "learning_rate": 1e-06, |
| "loss": 0.1315, |
| "step": 2142 |
| }, |
| { |
| "epoch": 0.7430651872399445, |
| "grad_norm": 2.342172145843506, |
| "learning_rate": 1e-06, |
| "loss": 0.1613, |
| "step": 2143 |
| }, |
| { |
| "epoch": 0.7434119278779473, |
| "grad_norm": 1.9159072637557983, |
| "learning_rate": 1e-06, |
| "loss": 0.128, |
| "step": 2144 |
| }, |
| { |
| "epoch": 0.7437586685159501, |
| "grad_norm": 2.7071194648742676, |
| "learning_rate": 1e-06, |
| "loss": 0.1023, |
| "step": 2145 |
| }, |
| { |
| "epoch": 0.7441054091539528, |
| "grad_norm": 1.955583095550537, |
| "learning_rate": 1e-06, |
| "loss": 0.1694, |
| "step": 2146 |
| }, |
| { |
| "epoch": 0.7444521497919556, |
| "grad_norm": 1.6116520166397095, |
| "learning_rate": 1e-06, |
| "loss": 0.1452, |
| "step": 2147 |
| }, |
| { |
| "epoch": 0.7447988904299584, |
| "grad_norm": 2.063347101211548, |
| "learning_rate": 1e-06, |
| "loss": 0.1365, |
| "step": 2148 |
| }, |
| { |
| "epoch": 0.7451456310679612, |
| "grad_norm": 2.245347023010254, |
| "learning_rate": 1e-06, |
| "loss": 0.1437, |
| "step": 2149 |
| }, |
| { |
| "epoch": 0.7454923717059639, |
| "grad_norm": 2.005086660385132, |
| "learning_rate": 1e-06, |
| "loss": 0.1031, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.7458391123439667, |
| "grad_norm": 3.6264290809631348, |
| "learning_rate": 1e-06, |
| "loss": 0.1509, |
| "step": 2151 |
| }, |
| { |
| "epoch": 0.7461858529819695, |
| "grad_norm": 3.764726161956787, |
| "learning_rate": 1e-06, |
| "loss": 0.1373, |
| "step": 2152 |
| }, |
| { |
| "epoch": 0.7465325936199723, |
| "grad_norm": 5.636833667755127, |
| "learning_rate": 1e-06, |
| "loss": 0.1851, |
| "step": 2153 |
| }, |
| { |
| "epoch": 0.746879334257975, |
| "grad_norm": 3.568516969680786, |
| "learning_rate": 1e-06, |
| "loss": 0.1337, |
| "step": 2154 |
| }, |
| { |
| "epoch": 0.7472260748959778, |
| "grad_norm": 1.9982422590255737, |
| "learning_rate": 1e-06, |
| "loss": 0.1399, |
| "step": 2155 |
| }, |
| { |
| "epoch": 0.7475728155339806, |
| "grad_norm": 2.262208938598633, |
| "learning_rate": 1e-06, |
| "loss": 0.1318, |
| "step": 2156 |
| }, |
| { |
| "epoch": 0.7479195561719834, |
| "grad_norm": 2.338360071182251, |
| "learning_rate": 1e-06, |
| "loss": 0.1532, |
| "step": 2157 |
| }, |
| { |
| "epoch": 0.7482662968099861, |
| "grad_norm": 2.570775032043457, |
| "learning_rate": 1e-06, |
| "loss": 0.157, |
| "step": 2158 |
| }, |
| { |
| "epoch": 0.7486130374479889, |
| "grad_norm": 2.5094552040100098, |
| "learning_rate": 1e-06, |
| "loss": 0.1252, |
| "step": 2159 |
| }, |
| { |
| "epoch": 0.7489597780859917, |
| "grad_norm": 3.4843361377716064, |
| "learning_rate": 1e-06, |
| "loss": 0.1238, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.7493065187239945, |
| "grad_norm": 2.467364549636841, |
| "learning_rate": 1e-06, |
| "loss": 0.1446, |
| "step": 2161 |
| }, |
| { |
| "epoch": 0.7496532593619972, |
| "grad_norm": 2.1688084602355957, |
| "learning_rate": 1e-06, |
| "loss": 0.146, |
| "step": 2162 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 2.409573793411255, |
| "learning_rate": 1e-06, |
| "loss": 0.1702, |
| "step": 2163 |
| }, |
| { |
| "epoch": 0.7503467406380028, |
| "grad_norm": 3.1284704208374023, |
| "learning_rate": 1e-06, |
| "loss": 0.1131, |
| "step": 2164 |
| }, |
| { |
| "epoch": 0.7506934812760055, |
| "grad_norm": 2.3599703311920166, |
| "learning_rate": 1e-06, |
| "loss": 0.1569, |
| "step": 2165 |
| }, |
| { |
| "epoch": 0.7510402219140083, |
| "grad_norm": 2.9270598888397217, |
| "learning_rate": 1e-06, |
| "loss": 0.1723, |
| "step": 2166 |
| }, |
| { |
| "epoch": 0.7513869625520111, |
| "grad_norm": 3.9771506786346436, |
| "learning_rate": 1e-06, |
| "loss": 0.1508, |
| "step": 2167 |
| }, |
| { |
| "epoch": 0.7517337031900139, |
| "grad_norm": 2.2273499965667725, |
| "learning_rate": 1e-06, |
| "loss": 0.1313, |
| "step": 2168 |
| }, |
| { |
| "epoch": 0.7520804438280166, |
| "grad_norm": 2.1485869884490967, |
| "learning_rate": 1e-06, |
| "loss": 0.1242, |
| "step": 2169 |
| }, |
| { |
| "epoch": 0.7524271844660194, |
| "grad_norm": 3.504756212234497, |
| "learning_rate": 1e-06, |
| "loss": 0.1515, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.7527739251040222, |
| "grad_norm": 3.7918713092803955, |
| "learning_rate": 1e-06, |
| "loss": 0.1426, |
| "step": 2171 |
| }, |
| { |
| "epoch": 0.753120665742025, |
| "grad_norm": 3.0239686965942383, |
| "learning_rate": 1e-06, |
| "loss": 0.1645, |
| "step": 2172 |
| }, |
| { |
| "epoch": 0.7534674063800277, |
| "grad_norm": 2.892434597015381, |
| "learning_rate": 1e-06, |
| "loss": 0.133, |
| "step": 2173 |
| }, |
| { |
| "epoch": 0.7538141470180305, |
| "grad_norm": 3.0266504287719727, |
| "learning_rate": 1e-06, |
| "loss": 0.1423, |
| "step": 2174 |
| }, |
| { |
| "epoch": 0.7541608876560333, |
| "grad_norm": 2.232647180557251, |
| "learning_rate": 1e-06, |
| "loss": 0.1101, |
| "step": 2175 |
| }, |
| { |
| "epoch": 0.7545076282940361, |
| "grad_norm": 2.5927672386169434, |
| "learning_rate": 1e-06, |
| "loss": 0.1474, |
| "step": 2176 |
| }, |
| { |
| "epoch": 0.7548543689320388, |
| "grad_norm": 4.152268409729004, |
| "learning_rate": 1e-06, |
| "loss": 0.1451, |
| "step": 2177 |
| }, |
| { |
| "epoch": 0.7552011095700416, |
| "grad_norm": 2.1622960567474365, |
| "learning_rate": 1e-06, |
| "loss": 0.1412, |
| "step": 2178 |
| }, |
| { |
| "epoch": 0.7555478502080444, |
| "grad_norm": 3.911947250366211, |
| "learning_rate": 1e-06, |
| "loss": 0.1432, |
| "step": 2179 |
| }, |
| { |
| "epoch": 0.7558945908460472, |
| "grad_norm": 3.2365176677703857, |
| "learning_rate": 1e-06, |
| "loss": 0.1284, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.7562413314840499, |
| "grad_norm": 2.945903778076172, |
| "learning_rate": 1e-06, |
| "loss": 0.1461, |
| "step": 2181 |
| }, |
| { |
| "epoch": 0.7565880721220527, |
| "grad_norm": 3.0714845657348633, |
| "learning_rate": 1e-06, |
| "loss": 0.1857, |
| "step": 2182 |
| }, |
| { |
| "epoch": 0.7569348127600555, |
| "grad_norm": 2.2900753021240234, |
| "learning_rate": 1e-06, |
| "loss": 0.1419, |
| "step": 2183 |
| }, |
| { |
| "epoch": 0.7572815533980582, |
| "grad_norm": 2.031371593475342, |
| "learning_rate": 1e-06, |
| "loss": 0.1216, |
| "step": 2184 |
| }, |
| { |
| "epoch": 0.757628294036061, |
| "grad_norm": 2.7789697647094727, |
| "learning_rate": 1e-06, |
| "loss": 0.1275, |
| "step": 2185 |
| }, |
| { |
| "epoch": 0.7579750346740638, |
| "grad_norm": 2.324058771133423, |
| "learning_rate": 1e-06, |
| "loss": 0.1307, |
| "step": 2186 |
| }, |
| { |
| "epoch": 0.7583217753120666, |
| "grad_norm": 3.2857003211975098, |
| "learning_rate": 1e-06, |
| "loss": 0.1331, |
| "step": 2187 |
| }, |
| { |
| "epoch": 0.7586685159500693, |
| "grad_norm": 3.0428361892700195, |
| "learning_rate": 1e-06, |
| "loss": 0.1491, |
| "step": 2188 |
| }, |
| { |
| "epoch": 0.7590152565880721, |
| "grad_norm": 2.756199598312378, |
| "learning_rate": 1e-06, |
| "loss": 0.115, |
| "step": 2189 |
| }, |
| { |
| "epoch": 0.7593619972260749, |
| "grad_norm": 2.3134708404541016, |
| "learning_rate": 1e-06, |
| "loss": 0.1233, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.7597087378640777, |
| "grad_norm": 2.598783016204834, |
| "learning_rate": 1e-06, |
| "loss": 0.1333, |
| "step": 2191 |
| }, |
| { |
| "epoch": 0.7600554785020804, |
| "grad_norm": 3.0439116954803467, |
| "learning_rate": 1e-06, |
| "loss": 0.1508, |
| "step": 2192 |
| }, |
| { |
| "epoch": 0.7604022191400832, |
| "grad_norm": 2.078136682510376, |
| "learning_rate": 1e-06, |
| "loss": 0.1192, |
| "step": 2193 |
| }, |
| { |
| "epoch": 0.760748959778086, |
| "grad_norm": 2.642509937286377, |
| "learning_rate": 1e-06, |
| "loss": 0.1577, |
| "step": 2194 |
| }, |
| { |
| "epoch": 0.7610957004160888, |
| "grad_norm": 2.7496683597564697, |
| "learning_rate": 1e-06, |
| "loss": 0.1658, |
| "step": 2195 |
| }, |
| { |
| "epoch": 0.7614424410540915, |
| "grad_norm": 2.2078957557678223, |
| "learning_rate": 1e-06, |
| "loss": 0.1016, |
| "step": 2196 |
| }, |
| { |
| "epoch": 0.7617891816920943, |
| "grad_norm": 1.7725173234939575, |
| "learning_rate": 1e-06, |
| "loss": 0.1178, |
| "step": 2197 |
| }, |
| { |
| "epoch": 0.7621359223300971, |
| "grad_norm": 2.821902275085449, |
| "learning_rate": 1e-06, |
| "loss": 0.1306, |
| "step": 2198 |
| }, |
| { |
| "epoch": 0.7624826629680999, |
| "grad_norm": 1.9677958488464355, |
| "learning_rate": 1e-06, |
| "loss": 0.1499, |
| "step": 2199 |
| }, |
| { |
| "epoch": 0.7628294036061026, |
| "grad_norm": 1.9492521286010742, |
| "learning_rate": 1e-06, |
| "loss": 0.1408, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.7631761442441054, |
| "grad_norm": 2.5732200145721436, |
| "learning_rate": 1e-06, |
| "loss": 0.1239, |
| "step": 2201 |
| }, |
| { |
| "epoch": 0.7635228848821082, |
| "grad_norm": 5.270609378814697, |
| "learning_rate": 1e-06, |
| "loss": 0.1715, |
| "step": 2202 |
| }, |
| { |
| "epoch": 0.763869625520111, |
| "grad_norm": 2.55741286277771, |
| "learning_rate": 1e-06, |
| "loss": 0.1183, |
| "step": 2203 |
| }, |
| { |
| "epoch": 0.7642163661581137, |
| "grad_norm": 2.0879695415496826, |
| "learning_rate": 1e-06, |
| "loss": 0.1571, |
| "step": 2204 |
| }, |
| { |
| "epoch": 0.7645631067961165, |
| "grad_norm": 3.306610584259033, |
| "learning_rate": 1e-06, |
| "loss": 0.1819, |
| "step": 2205 |
| }, |
| { |
| "epoch": 0.7649098474341193, |
| "grad_norm": 5.571447849273682, |
| "learning_rate": 1e-06, |
| "loss": 0.1288, |
| "step": 2206 |
| }, |
| { |
| "epoch": 0.765256588072122, |
| "grad_norm": 2.29826283454895, |
| "learning_rate": 1e-06, |
| "loss": 0.1707, |
| "step": 2207 |
| }, |
| { |
| "epoch": 0.7656033287101248, |
| "grad_norm": 2.8895623683929443, |
| "learning_rate": 1e-06, |
| "loss": 0.1426, |
| "step": 2208 |
| }, |
| { |
| "epoch": 0.7659500693481276, |
| "grad_norm": 2.1935150623321533, |
| "learning_rate": 1e-06, |
| "loss": 0.148, |
| "step": 2209 |
| }, |
| { |
| "epoch": 0.7662968099861304, |
| "grad_norm": 2.136641502380371, |
| "learning_rate": 1e-06, |
| "loss": 0.1317, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.7666435506241331, |
| "grad_norm": 2.338508367538452, |
| "learning_rate": 1e-06, |
| "loss": 0.143, |
| "step": 2211 |
| }, |
| { |
| "epoch": 0.7669902912621359, |
| "grad_norm": 1.9527918100357056, |
| "learning_rate": 1e-06, |
| "loss": 0.129, |
| "step": 2212 |
| }, |
| { |
| "epoch": 0.7673370319001387, |
| "grad_norm": 3.5570404529571533, |
| "learning_rate": 1e-06, |
| "loss": 0.1631, |
| "step": 2213 |
| }, |
| { |
| "epoch": 0.7676837725381415, |
| "grad_norm": 1.9036303758621216, |
| "learning_rate": 1e-06, |
| "loss": 0.1453, |
| "step": 2214 |
| }, |
| { |
| "epoch": 0.7680305131761442, |
| "grad_norm": 3.279881238937378, |
| "learning_rate": 1e-06, |
| "loss": 0.1561, |
| "step": 2215 |
| }, |
| { |
| "epoch": 0.768377253814147, |
| "grad_norm": 3.6818034648895264, |
| "learning_rate": 1e-06, |
| "loss": 0.1404, |
| "step": 2216 |
| }, |
| { |
| "epoch": 0.7687239944521498, |
| "grad_norm": 2.868618965148926, |
| "learning_rate": 1e-06, |
| "loss": 0.1492, |
| "step": 2217 |
| }, |
| { |
| "epoch": 0.7690707350901526, |
| "grad_norm": 2.422971725463867, |
| "learning_rate": 1e-06, |
| "loss": 0.1465, |
| "step": 2218 |
| }, |
| { |
| "epoch": 0.7694174757281553, |
| "grad_norm": 3.9464166164398193, |
| "learning_rate": 1e-06, |
| "loss": 0.1541, |
| "step": 2219 |
| }, |
| { |
| "epoch": 0.7697642163661581, |
| "grad_norm": 2.302083969116211, |
| "learning_rate": 1e-06, |
| "loss": 0.1585, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.7701109570041609, |
| "grad_norm": 2.771343946456909, |
| "learning_rate": 1e-06, |
| "loss": 0.127, |
| "step": 2221 |
| }, |
| { |
| "epoch": 0.7704576976421637, |
| "grad_norm": 2.0313382148742676, |
| "learning_rate": 1e-06, |
| "loss": 0.1635, |
| "step": 2222 |
| }, |
| { |
| "epoch": 0.7708044382801664, |
| "grad_norm": 3.974736213684082, |
| "learning_rate": 1e-06, |
| "loss": 0.1493, |
| "step": 2223 |
| }, |
| { |
| "epoch": 0.7711511789181692, |
| "grad_norm": 2.2979185581207275, |
| "learning_rate": 1e-06, |
| "loss": 0.1205, |
| "step": 2224 |
| }, |
| { |
| "epoch": 0.771497919556172, |
| "grad_norm": 2.6536171436309814, |
| "learning_rate": 1e-06, |
| "loss": 0.1431, |
| "step": 2225 |
| }, |
| { |
| "epoch": 0.7718446601941747, |
| "grad_norm": 2.917592763900757, |
| "learning_rate": 1e-06, |
| "loss": 0.1473, |
| "step": 2226 |
| }, |
| { |
| "epoch": 0.7721914008321775, |
| "grad_norm": 2.4181203842163086, |
| "learning_rate": 1e-06, |
| "loss": 0.1502, |
| "step": 2227 |
| }, |
| { |
| "epoch": 0.7725381414701803, |
| "grad_norm": 2.419362783432007, |
| "learning_rate": 1e-06, |
| "loss": 0.1183, |
| "step": 2228 |
| }, |
| { |
| "epoch": 0.7728848821081831, |
| "grad_norm": 2.5661659240722656, |
| "learning_rate": 1e-06, |
| "loss": 0.1402, |
| "step": 2229 |
| }, |
| { |
| "epoch": 0.7732316227461858, |
| "grad_norm": 1.9720306396484375, |
| "learning_rate": 1e-06, |
| "loss": 0.1253, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.7735783633841886, |
| "grad_norm": 3.7642719745635986, |
| "learning_rate": 1e-06, |
| "loss": 0.1552, |
| "step": 2231 |
| }, |
| { |
| "epoch": 0.7739251040221914, |
| "grad_norm": 8.331310272216797, |
| "learning_rate": 1e-06, |
| "loss": 0.2239, |
| "step": 2232 |
| }, |
| { |
| "epoch": 0.7742718446601942, |
| "grad_norm": 2.8353569507598877, |
| "learning_rate": 1e-06, |
| "loss": 0.1348, |
| "step": 2233 |
| }, |
| { |
| "epoch": 0.7746185852981969, |
| "grad_norm": 3.4421985149383545, |
| "learning_rate": 1e-06, |
| "loss": 0.1813, |
| "step": 2234 |
| }, |
| { |
| "epoch": 0.7749653259361997, |
| "grad_norm": 3.0186569690704346, |
| "learning_rate": 1e-06, |
| "loss": 0.1741, |
| "step": 2235 |
| }, |
| { |
| "epoch": 0.7753120665742025, |
| "grad_norm": 1.918044924736023, |
| "learning_rate": 1e-06, |
| "loss": 0.1231, |
| "step": 2236 |
| }, |
| { |
| "epoch": 0.7756588072122053, |
| "grad_norm": 2.8227319717407227, |
| "learning_rate": 1e-06, |
| "loss": 0.141, |
| "step": 2237 |
| }, |
| { |
| "epoch": 0.776005547850208, |
| "grad_norm": 1.9563891887664795, |
| "learning_rate": 1e-06, |
| "loss": 0.1403, |
| "step": 2238 |
| }, |
| { |
| "epoch": 0.7763522884882108, |
| "grad_norm": 3.1032557487487793, |
| "learning_rate": 1e-06, |
| "loss": 0.1455, |
| "step": 2239 |
| }, |
| { |
| "epoch": 0.7766990291262136, |
| "grad_norm": 2.261861801147461, |
| "learning_rate": 1e-06, |
| "loss": 0.1397, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.7770457697642164, |
| "grad_norm": 3.733213186264038, |
| "learning_rate": 1e-06, |
| "loss": 0.167, |
| "step": 2241 |
| }, |
| { |
| "epoch": 0.7773925104022191, |
| "grad_norm": 2.189244270324707, |
| "learning_rate": 1e-06, |
| "loss": 0.1243, |
| "step": 2242 |
| }, |
| { |
| "epoch": 0.7777392510402219, |
| "grad_norm": 3.7104716300964355, |
| "learning_rate": 1e-06, |
| "loss": 0.1295, |
| "step": 2243 |
| }, |
| { |
| "epoch": 0.7780859916782247, |
| "grad_norm": 1.9497910737991333, |
| "learning_rate": 1e-06, |
| "loss": 0.1408, |
| "step": 2244 |
| }, |
| { |
| "epoch": 0.7784327323162274, |
| "grad_norm": 3.3845322132110596, |
| "learning_rate": 1e-06, |
| "loss": 0.1478, |
| "step": 2245 |
| }, |
| { |
| "epoch": 0.7787794729542302, |
| "grad_norm": 1.5930759906768799, |
| "learning_rate": 1e-06, |
| "loss": 0.1126, |
| "step": 2246 |
| }, |
| { |
| "epoch": 0.779126213592233, |
| "grad_norm": 2.993636131286621, |
| "learning_rate": 1e-06, |
| "loss": 0.1232, |
| "step": 2247 |
| }, |
| { |
| "epoch": 0.7794729542302358, |
| "grad_norm": 2.2873573303222656, |
| "learning_rate": 1e-06, |
| "loss": 0.1456, |
| "step": 2248 |
| }, |
| { |
| "epoch": 0.7798196948682385, |
| "grad_norm": 3.0485949516296387, |
| "learning_rate": 1e-06, |
| "loss": 0.1277, |
| "step": 2249 |
| }, |
| { |
| "epoch": 0.7801664355062413, |
| "grad_norm": 3.5445034503936768, |
| "learning_rate": 1e-06, |
| "loss": 0.145, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.7805131761442441, |
| "grad_norm": 2.8255326747894287, |
| "learning_rate": 1e-06, |
| "loss": 0.1321, |
| "step": 2251 |
| }, |
| { |
| "epoch": 0.7808599167822469, |
| "grad_norm": 2.1605257987976074, |
| "learning_rate": 1e-06, |
| "loss": 0.1152, |
| "step": 2252 |
| }, |
| { |
| "epoch": 0.7812066574202496, |
| "grad_norm": 2.541396379470825, |
| "learning_rate": 1e-06, |
| "loss": 0.1488, |
| "step": 2253 |
| }, |
| { |
| "epoch": 0.7815533980582524, |
| "grad_norm": 2.67930006980896, |
| "learning_rate": 1e-06, |
| "loss": 0.1334, |
| "step": 2254 |
| }, |
| { |
| "epoch": 0.7819001386962552, |
| "grad_norm": 2.717165470123291, |
| "learning_rate": 1e-06, |
| "loss": 0.1487, |
| "step": 2255 |
| }, |
| { |
| "epoch": 0.782246879334258, |
| "grad_norm": 2.5122649669647217, |
| "learning_rate": 1e-06, |
| "loss": 0.128, |
| "step": 2256 |
| }, |
| { |
| "epoch": 0.7825936199722607, |
| "grad_norm": 3.2565906047821045, |
| "learning_rate": 1e-06, |
| "loss": 0.1541, |
| "step": 2257 |
| }, |
| { |
| "epoch": 0.7829403606102635, |
| "grad_norm": 2.227794647216797, |
| "learning_rate": 1e-06, |
| "loss": 0.1562, |
| "step": 2258 |
| }, |
| { |
| "epoch": 0.7832871012482663, |
| "grad_norm": 1.8468554019927979, |
| "learning_rate": 1e-06, |
| "loss": 0.1258, |
| "step": 2259 |
| }, |
| { |
| "epoch": 0.7836338418862691, |
| "grad_norm": 2.6450536251068115, |
| "learning_rate": 1e-06, |
| "loss": 0.1218, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.7839805825242718, |
| "grad_norm": 3.5764429569244385, |
| "learning_rate": 1e-06, |
| "loss": 0.1399, |
| "step": 2261 |
| }, |
| { |
| "epoch": 0.7843273231622746, |
| "grad_norm": 3.0315048694610596, |
| "learning_rate": 1e-06, |
| "loss": 0.1651, |
| "step": 2262 |
| }, |
| { |
| "epoch": 0.7846740638002774, |
| "grad_norm": 3.2928550243377686, |
| "learning_rate": 1e-06, |
| "loss": 0.1461, |
| "step": 2263 |
| }, |
| { |
| "epoch": 0.7850208044382802, |
| "grad_norm": 2.096841812133789, |
| "learning_rate": 1e-06, |
| "loss": 0.1297, |
| "step": 2264 |
| }, |
| { |
| "epoch": 0.7853675450762829, |
| "grad_norm": 2.753180980682373, |
| "learning_rate": 1e-06, |
| "loss": 0.1545, |
| "step": 2265 |
| }, |
| { |
| "epoch": 0.7857142857142857, |
| "grad_norm": 3.1257476806640625, |
| "learning_rate": 1e-06, |
| "loss": 0.1371, |
| "step": 2266 |
| }, |
| { |
| "epoch": 0.7860610263522885, |
| "grad_norm": 3.747185707092285, |
| "learning_rate": 1e-06, |
| "loss": 0.148, |
| "step": 2267 |
| }, |
| { |
| "epoch": 0.7864077669902912, |
| "grad_norm": 2.7665867805480957, |
| "learning_rate": 1e-06, |
| "loss": 0.1316, |
| "step": 2268 |
| }, |
| { |
| "epoch": 0.786754507628294, |
| "grad_norm": 4.6021318435668945, |
| "learning_rate": 1e-06, |
| "loss": 0.1613, |
| "step": 2269 |
| }, |
| { |
| "epoch": 0.7871012482662968, |
| "grad_norm": 3.030388116836548, |
| "learning_rate": 1e-06, |
| "loss": 0.1333, |
| "step": 2270 |
| }, |
| { |
| "epoch": 0.7874479889042996, |
| "grad_norm": 2.6754000186920166, |
| "learning_rate": 1e-06, |
| "loss": 0.1391, |
| "step": 2271 |
| }, |
| { |
| "epoch": 0.7877947295423023, |
| "grad_norm": 3.4339709281921387, |
| "learning_rate": 1e-06, |
| "loss": 0.1393, |
| "step": 2272 |
| }, |
| { |
| "epoch": 0.7881414701803051, |
| "grad_norm": 2.5777359008789062, |
| "learning_rate": 1e-06, |
| "loss": 0.1269, |
| "step": 2273 |
| }, |
| { |
| "epoch": 0.7884882108183079, |
| "grad_norm": 3.1862616539001465, |
| "learning_rate": 1e-06, |
| "loss": 0.175, |
| "step": 2274 |
| }, |
| { |
| "epoch": 0.7888349514563107, |
| "grad_norm": 2.623753786087036, |
| "learning_rate": 1e-06, |
| "loss": 0.1374, |
| "step": 2275 |
| }, |
| { |
| "epoch": 0.7891816920943134, |
| "grad_norm": 2.4776697158813477, |
| "learning_rate": 1e-06, |
| "loss": 0.1435, |
| "step": 2276 |
| }, |
| { |
| "epoch": 0.7895284327323162, |
| "grad_norm": 2.2111032009124756, |
| "learning_rate": 1e-06, |
| "loss": 0.1139, |
| "step": 2277 |
| }, |
| { |
| "epoch": 0.789875173370319, |
| "grad_norm": 2.097119092941284, |
| "learning_rate": 1e-06, |
| "loss": 0.1246, |
| "step": 2278 |
| }, |
| { |
| "epoch": 0.7902219140083218, |
| "grad_norm": 2.417785882949829, |
| "learning_rate": 1e-06, |
| "loss": 0.1433, |
| "step": 2279 |
| }, |
| { |
| "epoch": 0.7905686546463245, |
| "grad_norm": 3.1172475814819336, |
| "learning_rate": 1e-06, |
| "loss": 0.1155, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.7909153952843273, |
| "grad_norm": 2.320932626724243, |
| "learning_rate": 1e-06, |
| "loss": 0.1368, |
| "step": 2281 |
| }, |
| { |
| "epoch": 0.7912621359223301, |
| "grad_norm": 1.8932015895843506, |
| "learning_rate": 1e-06, |
| "loss": 0.1325, |
| "step": 2282 |
| }, |
| { |
| "epoch": 0.7916088765603329, |
| "grad_norm": 2.369478225708008, |
| "learning_rate": 1e-06, |
| "loss": 0.1814, |
| "step": 2283 |
| }, |
| { |
| "epoch": 0.7919556171983356, |
| "grad_norm": 1.7178212404251099, |
| "learning_rate": 1e-06, |
| "loss": 0.1213, |
| "step": 2284 |
| }, |
| { |
| "epoch": 0.7923023578363384, |
| "grad_norm": 2.493131399154663, |
| "learning_rate": 1e-06, |
| "loss": 0.1378, |
| "step": 2285 |
| }, |
| { |
| "epoch": 0.7926490984743412, |
| "grad_norm": 2.1028831005096436, |
| "learning_rate": 1e-06, |
| "loss": 0.137, |
| "step": 2286 |
| }, |
| { |
| "epoch": 0.792995839112344, |
| "grad_norm": 3.674328088760376, |
| "learning_rate": 1e-06, |
| "loss": 0.156, |
| "step": 2287 |
| }, |
| { |
| "epoch": 0.7933425797503467, |
| "grad_norm": 2.515397310256958, |
| "learning_rate": 1e-06, |
| "loss": 0.1278, |
| "step": 2288 |
| }, |
| { |
| "epoch": 0.7936893203883495, |
| "grad_norm": 2.559921979904175, |
| "learning_rate": 1e-06, |
| "loss": 0.1363, |
| "step": 2289 |
| }, |
| { |
| "epoch": 0.7940360610263523, |
| "grad_norm": 3.420501232147217, |
| "learning_rate": 1e-06, |
| "loss": 0.1326, |
| "step": 2290 |
| }, |
| { |
| "epoch": 0.794382801664355, |
| "grad_norm": 1.9830430746078491, |
| "learning_rate": 1e-06, |
| "loss": 0.1242, |
| "step": 2291 |
| }, |
| { |
| "epoch": 0.7947295423023578, |
| "grad_norm": 1.8026798963546753, |
| "learning_rate": 1e-06, |
| "loss": 0.116, |
| "step": 2292 |
| }, |
| { |
| "epoch": 0.7950762829403606, |
| "grad_norm": 2.667938232421875, |
| "learning_rate": 1e-06, |
| "loss": 0.14, |
| "step": 2293 |
| }, |
| { |
| "epoch": 0.7954230235783634, |
| "grad_norm": 3.140871524810791, |
| "learning_rate": 1e-06, |
| "loss": 0.137, |
| "step": 2294 |
| }, |
| { |
| "epoch": 0.7957697642163661, |
| "grad_norm": 3.2242789268493652, |
| "learning_rate": 1e-06, |
| "loss": 0.1559, |
| "step": 2295 |
| }, |
| { |
| "epoch": 0.7961165048543689, |
| "grad_norm": 2.4139838218688965, |
| "learning_rate": 1e-06, |
| "loss": 0.1286, |
| "step": 2296 |
| }, |
| { |
| "epoch": 0.7964632454923717, |
| "grad_norm": 1.957815408706665, |
| "learning_rate": 1e-06, |
| "loss": 0.1292, |
| "step": 2297 |
| }, |
| { |
| "epoch": 0.7968099861303745, |
| "grad_norm": 4.165436267852783, |
| "learning_rate": 1e-06, |
| "loss": 0.1342, |
| "step": 2298 |
| }, |
| { |
| "epoch": 0.7971567267683772, |
| "grad_norm": 2.260488748550415, |
| "learning_rate": 1e-06, |
| "loss": 0.126, |
| "step": 2299 |
| }, |
| { |
| "epoch": 0.79750346740638, |
| "grad_norm": 2.7062175273895264, |
| "learning_rate": 1e-06, |
| "loss": 0.1285, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.7978502080443828, |
| "grad_norm": 3.5681710243225098, |
| "learning_rate": 1e-06, |
| "loss": 0.1115, |
| "step": 2301 |
| }, |
| { |
| "epoch": 0.7981969486823856, |
| "grad_norm": 4.171554088592529, |
| "learning_rate": 1e-06, |
| "loss": 0.12, |
| "step": 2302 |
| }, |
| { |
| "epoch": 0.7985436893203883, |
| "grad_norm": 1.9496839046478271, |
| "learning_rate": 1e-06, |
| "loss": 0.1323, |
| "step": 2303 |
| }, |
| { |
| "epoch": 0.7988904299583911, |
| "grad_norm": 2.0774519443511963, |
| "learning_rate": 1e-06, |
| "loss": 0.1353, |
| "step": 2304 |
| }, |
| { |
| "epoch": 0.7992371705963939, |
| "grad_norm": 2.0676655769348145, |
| "learning_rate": 1e-06, |
| "loss": 0.1294, |
| "step": 2305 |
| }, |
| { |
| "epoch": 0.7995839112343966, |
| "grad_norm": 2.6793768405914307, |
| "learning_rate": 1e-06, |
| "loss": 0.1349, |
| "step": 2306 |
| }, |
| { |
| "epoch": 0.7999306518723994, |
| "grad_norm": 1.8901208639144897, |
| "learning_rate": 1e-06, |
| "loss": 0.1318, |
| "step": 2307 |
| }, |
| { |
| "epoch": 0.8002773925104022, |
| "grad_norm": 2.2167181968688965, |
| "learning_rate": 1e-06, |
| "loss": 0.1393, |
| "step": 2308 |
| }, |
| { |
| "epoch": 0.800624133148405, |
| "grad_norm": 3.0722689628601074, |
| "learning_rate": 1e-06, |
| "loss": 0.1565, |
| "step": 2309 |
| }, |
| { |
| "epoch": 0.8009708737864077, |
| "grad_norm": 2.4346120357513428, |
| "learning_rate": 1e-06, |
| "loss": 0.1498, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.8013176144244105, |
| "grad_norm": 1.9527393579483032, |
| "learning_rate": 1e-06, |
| "loss": 0.1459, |
| "step": 2311 |
| }, |
| { |
| "epoch": 0.8016643550624133, |
| "grad_norm": 2.2069756984710693, |
| "learning_rate": 1e-06, |
| "loss": 0.166, |
| "step": 2312 |
| }, |
| { |
| "epoch": 0.8020110957004161, |
| "grad_norm": 3.2333059310913086, |
| "learning_rate": 1e-06, |
| "loss": 0.1786, |
| "step": 2313 |
| }, |
| { |
| "epoch": 0.8023578363384188, |
| "grad_norm": 3.749083995819092, |
| "learning_rate": 1e-06, |
| "loss": 0.1594, |
| "step": 2314 |
| }, |
| { |
| "epoch": 0.8027045769764216, |
| "grad_norm": 2.32126522064209, |
| "learning_rate": 1e-06, |
| "loss": 0.1161, |
| "step": 2315 |
| }, |
| { |
| "epoch": 0.8030513176144244, |
| "grad_norm": 3.9740304946899414, |
| "learning_rate": 1e-06, |
| "loss": 0.1492, |
| "step": 2316 |
| }, |
| { |
| "epoch": 0.8033980582524272, |
| "grad_norm": 2.4023282527923584, |
| "learning_rate": 1e-06, |
| "loss": 0.1319, |
| "step": 2317 |
| }, |
| { |
| "epoch": 0.8037447988904299, |
| "grad_norm": 2.1061739921569824, |
| "learning_rate": 1e-06, |
| "loss": 0.1239, |
| "step": 2318 |
| }, |
| { |
| "epoch": 0.8040915395284327, |
| "grad_norm": 2.2139999866485596, |
| "learning_rate": 1e-06, |
| "loss": 0.1479, |
| "step": 2319 |
| }, |
| { |
| "epoch": 0.8044382801664355, |
| "grad_norm": 2.9679105281829834, |
| "learning_rate": 1e-06, |
| "loss": 0.1466, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.8047850208044383, |
| "grad_norm": 2.461606740951538, |
| "learning_rate": 1e-06, |
| "loss": 0.1505, |
| "step": 2321 |
| }, |
| { |
| "epoch": 0.805131761442441, |
| "grad_norm": 2.9136569499969482, |
| "learning_rate": 1e-06, |
| "loss": 0.1626, |
| "step": 2322 |
| }, |
| { |
| "epoch": 0.8054785020804438, |
| "grad_norm": 1.9064357280731201, |
| "learning_rate": 1e-06, |
| "loss": 0.138, |
| "step": 2323 |
| }, |
| { |
| "epoch": 0.8058252427184466, |
| "grad_norm": 2.3706328868865967, |
| "learning_rate": 1e-06, |
| "loss": 0.1311, |
| "step": 2324 |
| }, |
| { |
| "epoch": 0.8061719833564494, |
| "grad_norm": 3.0558860301971436, |
| "learning_rate": 1e-06, |
| "loss": 0.1171, |
| "step": 2325 |
| }, |
| { |
| "epoch": 0.8065187239944521, |
| "grad_norm": 4.301748275756836, |
| "learning_rate": 1e-06, |
| "loss": 0.1781, |
| "step": 2326 |
| }, |
| { |
| "epoch": 0.8068654646324549, |
| "grad_norm": 3.325167655944824, |
| "learning_rate": 1e-06, |
| "loss": 0.1443, |
| "step": 2327 |
| }, |
| { |
| "epoch": 0.8072122052704577, |
| "grad_norm": 2.0337326526641846, |
| "learning_rate": 1e-06, |
| "loss": 0.1317, |
| "step": 2328 |
| }, |
| { |
| "epoch": 0.8075589459084604, |
| "grad_norm": 2.0139780044555664, |
| "learning_rate": 1e-06, |
| "loss": 0.163, |
| "step": 2329 |
| }, |
| { |
| "epoch": 0.8079056865464632, |
| "grad_norm": 2.4566049575805664, |
| "learning_rate": 1e-06, |
| "loss": 0.1758, |
| "step": 2330 |
| }, |
| { |
| "epoch": 0.808252427184466, |
| "grad_norm": 2.1362626552581787, |
| "learning_rate": 1e-06, |
| "loss": 0.1435, |
| "step": 2331 |
| }, |
| { |
| "epoch": 0.8085991678224688, |
| "grad_norm": 1.9229339361190796, |
| "learning_rate": 1e-06, |
| "loss": 0.1444, |
| "step": 2332 |
| }, |
| { |
| "epoch": 0.8089459084604715, |
| "grad_norm": 3.5861167907714844, |
| "learning_rate": 1e-06, |
| "loss": 0.1486, |
| "step": 2333 |
| }, |
| { |
| "epoch": 0.8092926490984743, |
| "grad_norm": 1.9148346185684204, |
| "learning_rate": 1e-06, |
| "loss": 0.1355, |
| "step": 2334 |
| }, |
| { |
| "epoch": 0.8096393897364771, |
| "grad_norm": 2.1705756187438965, |
| "learning_rate": 1e-06, |
| "loss": 0.1402, |
| "step": 2335 |
| }, |
| { |
| "epoch": 0.8099861303744799, |
| "grad_norm": 1.75710928440094, |
| "learning_rate": 1e-06, |
| "loss": 0.1188, |
| "step": 2336 |
| }, |
| { |
| "epoch": 0.8103328710124826, |
| "grad_norm": 1.7081013917922974, |
| "learning_rate": 1e-06, |
| "loss": 0.1314, |
| "step": 2337 |
| }, |
| { |
| "epoch": 0.8106796116504854, |
| "grad_norm": 3.3994858264923096, |
| "learning_rate": 1e-06, |
| "loss": 0.1017, |
| "step": 2338 |
| }, |
| { |
| "epoch": 0.8110263522884882, |
| "grad_norm": 3.6731457710266113, |
| "learning_rate": 1e-06, |
| "loss": 0.1161, |
| "step": 2339 |
| }, |
| { |
| "epoch": 0.811373092926491, |
| "grad_norm": 2.8746140003204346, |
| "learning_rate": 1e-06, |
| "loss": 0.1453, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.8117198335644937, |
| "grad_norm": 2.074737310409546, |
| "learning_rate": 1e-06, |
| "loss": 0.1477, |
| "step": 2341 |
| }, |
| { |
| "epoch": 0.8120665742024965, |
| "grad_norm": 3.902578592300415, |
| "learning_rate": 1e-06, |
| "loss": 0.1372, |
| "step": 2342 |
| }, |
| { |
| "epoch": 0.8124133148404993, |
| "grad_norm": 2.1763041019439697, |
| "learning_rate": 1e-06, |
| "loss": 0.1246, |
| "step": 2343 |
| }, |
| { |
| "epoch": 0.812760055478502, |
| "grad_norm": 3.487217903137207, |
| "learning_rate": 1e-06, |
| "loss": 0.1074, |
| "step": 2344 |
| }, |
| { |
| "epoch": 0.8131067961165048, |
| "grad_norm": 1.8151427507400513, |
| "learning_rate": 1e-06, |
| "loss": 0.0811, |
| "step": 2345 |
| }, |
| { |
| "epoch": 0.8134535367545076, |
| "grad_norm": 2.7768118381500244, |
| "learning_rate": 1e-06, |
| "loss": 0.17, |
| "step": 2346 |
| }, |
| { |
| "epoch": 0.8138002773925104, |
| "grad_norm": 2.1280786991119385, |
| "learning_rate": 1e-06, |
| "loss": 0.152, |
| "step": 2347 |
| }, |
| { |
| "epoch": 0.8141470180305131, |
| "grad_norm": 3.410444974899292, |
| "learning_rate": 1e-06, |
| "loss": 0.1596, |
| "step": 2348 |
| }, |
| { |
| "epoch": 0.8144937586685159, |
| "grad_norm": 1.9879850149154663, |
| "learning_rate": 1e-06, |
| "loss": 0.1277, |
| "step": 2349 |
| }, |
| { |
| "epoch": 0.8148404993065187, |
| "grad_norm": 2.854005813598633, |
| "learning_rate": 1e-06, |
| "loss": 0.1358, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.8151872399445215, |
| "grad_norm": 2.091987133026123, |
| "learning_rate": 1e-06, |
| "loss": 0.1321, |
| "step": 2351 |
| }, |
| { |
| "epoch": 0.8155339805825242, |
| "grad_norm": 2.376237392425537, |
| "learning_rate": 1e-06, |
| "loss": 0.1499, |
| "step": 2352 |
| }, |
| { |
| "epoch": 0.815880721220527, |
| "grad_norm": 2.1092143058776855, |
| "learning_rate": 1e-06, |
| "loss": 0.1543, |
| "step": 2353 |
| }, |
| { |
| "epoch": 0.8162274618585298, |
| "grad_norm": 2.8994827270507812, |
| "learning_rate": 1e-06, |
| "loss": 0.1463, |
| "step": 2354 |
| }, |
| { |
| "epoch": 0.8165742024965326, |
| "grad_norm": 2.607811689376831, |
| "learning_rate": 1e-06, |
| "loss": 0.1171, |
| "step": 2355 |
| }, |
| { |
| "epoch": 0.8169209431345353, |
| "grad_norm": 3.0912883281707764, |
| "learning_rate": 1e-06, |
| "loss": 0.1444, |
| "step": 2356 |
| }, |
| { |
| "epoch": 0.8172676837725381, |
| "grad_norm": 2.976580858230591, |
| "learning_rate": 1e-06, |
| "loss": 0.1761, |
| "step": 2357 |
| }, |
| { |
| "epoch": 0.8176144244105409, |
| "grad_norm": 2.745866060256958, |
| "learning_rate": 1e-06, |
| "loss": 0.1282, |
| "step": 2358 |
| }, |
| { |
| "epoch": 0.8179611650485437, |
| "grad_norm": 2.7969820499420166, |
| "learning_rate": 1e-06, |
| "loss": 0.117, |
| "step": 2359 |
| }, |
| { |
| "epoch": 0.8183079056865464, |
| "grad_norm": 2.0547497272491455, |
| "learning_rate": 1e-06, |
| "loss": 0.1322, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.8186546463245492, |
| "grad_norm": 2.9290833473205566, |
| "learning_rate": 1e-06, |
| "loss": 0.18, |
| "step": 2361 |
| }, |
| { |
| "epoch": 0.819001386962552, |
| "grad_norm": 2.636199474334717, |
| "learning_rate": 1e-06, |
| "loss": 0.1636, |
| "step": 2362 |
| }, |
| { |
| "epoch": 0.8193481276005548, |
| "grad_norm": 2.2920427322387695, |
| "learning_rate": 1e-06, |
| "loss": 0.1362, |
| "step": 2363 |
| }, |
| { |
| "epoch": 0.8196948682385575, |
| "grad_norm": 2.04429030418396, |
| "learning_rate": 1e-06, |
| "loss": 0.1486, |
| "step": 2364 |
| }, |
| { |
| "epoch": 0.8200416088765603, |
| "grad_norm": 2.0373737812042236, |
| "learning_rate": 1e-06, |
| "loss": 0.1257, |
| "step": 2365 |
| }, |
| { |
| "epoch": 0.8203883495145631, |
| "grad_norm": 2.8232688903808594, |
| "learning_rate": 1e-06, |
| "loss": 0.1549, |
| "step": 2366 |
| }, |
| { |
| "epoch": 0.8207350901525658, |
| "grad_norm": 2.8030526638031006, |
| "learning_rate": 1e-06, |
| "loss": 0.1429, |
| "step": 2367 |
| }, |
| { |
| "epoch": 0.8210818307905686, |
| "grad_norm": 3.216235399246216, |
| "learning_rate": 1e-06, |
| "loss": 0.1781, |
| "step": 2368 |
| }, |
| { |
| "epoch": 0.8214285714285714, |
| "grad_norm": 2.1101338863372803, |
| "learning_rate": 1e-06, |
| "loss": 0.1353, |
| "step": 2369 |
| }, |
| { |
| "epoch": 0.8217753120665742, |
| "grad_norm": 3.3085856437683105, |
| "learning_rate": 1e-06, |
| "loss": 0.1526, |
| "step": 2370 |
| }, |
| { |
| "epoch": 0.8221220527045769, |
| "grad_norm": 2.0516085624694824, |
| "learning_rate": 1e-06, |
| "loss": 0.148, |
| "step": 2371 |
| }, |
| { |
| "epoch": 0.8224687933425797, |
| "grad_norm": 2.2691268920898438, |
| "learning_rate": 1e-06, |
| "loss": 0.1306, |
| "step": 2372 |
| }, |
| { |
| "epoch": 0.8228155339805825, |
| "grad_norm": 2.2179465293884277, |
| "learning_rate": 1e-06, |
| "loss": 0.1412, |
| "step": 2373 |
| }, |
| { |
| "epoch": 0.8231622746185853, |
| "grad_norm": 2.9574997425079346, |
| "learning_rate": 1e-06, |
| "loss": 0.1928, |
| "step": 2374 |
| }, |
| { |
| "epoch": 0.823509015256588, |
| "grad_norm": 2.3397562503814697, |
| "learning_rate": 1e-06, |
| "loss": 0.1478, |
| "step": 2375 |
| }, |
| { |
| "epoch": 0.8238557558945908, |
| "grad_norm": 2.1710567474365234, |
| "learning_rate": 1e-06, |
| "loss": 0.1619, |
| "step": 2376 |
| }, |
| { |
| "epoch": 0.8242024965325936, |
| "grad_norm": 2.402174472808838, |
| "learning_rate": 1e-06, |
| "loss": 0.1495, |
| "step": 2377 |
| }, |
| { |
| "epoch": 0.8245492371705964, |
| "grad_norm": 2.147606372833252, |
| "learning_rate": 1e-06, |
| "loss": 0.1261, |
| "step": 2378 |
| }, |
| { |
| "epoch": 0.8248959778085991, |
| "grad_norm": 2.674955129623413, |
| "learning_rate": 1e-06, |
| "loss": 0.1174, |
| "step": 2379 |
| }, |
| { |
| "epoch": 0.8252427184466019, |
| "grad_norm": 3.098621129989624, |
| "learning_rate": 1e-06, |
| "loss": 0.1686, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.8255894590846047, |
| "grad_norm": 2.411407232284546, |
| "learning_rate": 1e-06, |
| "loss": 0.1175, |
| "step": 2381 |
| }, |
| { |
| "epoch": 0.8259361997226075, |
| "grad_norm": 3.399182081222534, |
| "learning_rate": 1e-06, |
| "loss": 0.1345, |
| "step": 2382 |
| }, |
| { |
| "epoch": 0.8262829403606102, |
| "grad_norm": 3.7523751258850098, |
| "learning_rate": 1e-06, |
| "loss": 0.139, |
| "step": 2383 |
| }, |
| { |
| "epoch": 0.826629680998613, |
| "grad_norm": 2.860729217529297, |
| "learning_rate": 1e-06, |
| "loss": 0.1782, |
| "step": 2384 |
| }, |
| { |
| "epoch": 0.8269764216366158, |
| "grad_norm": 4.691393852233887, |
| "learning_rate": 1e-06, |
| "loss": 0.1536, |
| "step": 2385 |
| }, |
| { |
| "epoch": 0.8273231622746186, |
| "grad_norm": 2.227851152420044, |
| "learning_rate": 1e-06, |
| "loss": 0.1156, |
| "step": 2386 |
| }, |
| { |
| "epoch": 0.8276699029126213, |
| "grad_norm": 2.429328680038452, |
| "learning_rate": 1e-06, |
| "loss": 0.1305, |
| "step": 2387 |
| }, |
| { |
| "epoch": 0.8280166435506241, |
| "grad_norm": 1.7986654043197632, |
| "learning_rate": 1e-06, |
| "loss": 0.1271, |
| "step": 2388 |
| }, |
| { |
| "epoch": 0.8283633841886269, |
| "grad_norm": 2.666243314743042, |
| "learning_rate": 1e-06, |
| "loss": 0.1173, |
| "step": 2389 |
| }, |
| { |
| "epoch": 0.8287101248266296, |
| "grad_norm": 3.4500293731689453, |
| "learning_rate": 1e-06, |
| "loss": 0.1979, |
| "step": 2390 |
| }, |
| { |
| "epoch": 0.8290568654646324, |
| "grad_norm": 2.057880401611328, |
| "learning_rate": 1e-06, |
| "loss": 0.1349, |
| "step": 2391 |
| }, |
| { |
| "epoch": 0.8294036061026352, |
| "grad_norm": 2.378908634185791, |
| "learning_rate": 1e-06, |
| "loss": 0.129, |
| "step": 2392 |
| }, |
| { |
| "epoch": 0.829750346740638, |
| "grad_norm": 1.690531849861145, |
| "learning_rate": 1e-06, |
| "loss": 0.1249, |
| "step": 2393 |
| }, |
| { |
| "epoch": 0.8300970873786407, |
| "grad_norm": 3.056180953979492, |
| "learning_rate": 1e-06, |
| "loss": 0.1269, |
| "step": 2394 |
| }, |
| { |
| "epoch": 0.8304438280166435, |
| "grad_norm": 4.274582862854004, |
| "learning_rate": 1e-06, |
| "loss": 0.1123, |
| "step": 2395 |
| }, |
| { |
| "epoch": 0.8307905686546463, |
| "grad_norm": 2.3732125759124756, |
| "learning_rate": 1e-06, |
| "loss": 0.1626, |
| "step": 2396 |
| }, |
| { |
| "epoch": 0.8311373092926491, |
| "grad_norm": 2.0551693439483643, |
| "learning_rate": 1e-06, |
| "loss": 0.1381, |
| "step": 2397 |
| }, |
| { |
| "epoch": 0.8314840499306518, |
| "grad_norm": 3.1651957035064697, |
| "learning_rate": 1e-06, |
| "loss": 0.143, |
| "step": 2398 |
| }, |
| { |
| "epoch": 0.8318307905686546, |
| "grad_norm": 1.9696643352508545, |
| "learning_rate": 1e-06, |
| "loss": 0.1238, |
| "step": 2399 |
| }, |
| { |
| "epoch": 0.8321775312066574, |
| "grad_norm": 2.9673922061920166, |
| "learning_rate": 1e-06, |
| "loss": 0.1699, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.8325242718446602, |
| "grad_norm": 2.1757307052612305, |
| "learning_rate": 1e-06, |
| "loss": 0.1287, |
| "step": 2401 |
| }, |
| { |
| "epoch": 0.8328710124826629, |
| "grad_norm": 2.689530611038208, |
| "learning_rate": 1e-06, |
| "loss": 0.1407, |
| "step": 2402 |
| }, |
| { |
| "epoch": 0.8332177531206657, |
| "grad_norm": 3.808122396469116, |
| "learning_rate": 1e-06, |
| "loss": 0.1323, |
| "step": 2403 |
| }, |
| { |
| "epoch": 0.8335644937586685, |
| "grad_norm": 2.106861114501953, |
| "learning_rate": 1e-06, |
| "loss": 0.1375, |
| "step": 2404 |
| }, |
| { |
| "epoch": 0.8339112343966713, |
| "grad_norm": 3.0327420234680176, |
| "learning_rate": 1e-06, |
| "loss": 0.1323, |
| "step": 2405 |
| }, |
| { |
| "epoch": 0.834257975034674, |
| "grad_norm": 2.0406875610351562, |
| "learning_rate": 1e-06, |
| "loss": 0.1349, |
| "step": 2406 |
| }, |
| { |
| "epoch": 0.8346047156726768, |
| "grad_norm": 5.959853172302246, |
| "learning_rate": 1e-06, |
| "loss": 0.1603, |
| "step": 2407 |
| }, |
| { |
| "epoch": 0.8349514563106796, |
| "grad_norm": 2.409048080444336, |
| "learning_rate": 1e-06, |
| "loss": 0.1401, |
| "step": 2408 |
| }, |
| { |
| "epoch": 0.8352981969486823, |
| "grad_norm": 2.135956287384033, |
| "learning_rate": 1e-06, |
| "loss": 0.1406, |
| "step": 2409 |
| }, |
| { |
| "epoch": 0.8356449375866851, |
| "grad_norm": 2.256753921508789, |
| "learning_rate": 1e-06, |
| "loss": 0.1348, |
| "step": 2410 |
| }, |
| { |
| "epoch": 0.8359916782246879, |
| "grad_norm": 2.43808913230896, |
| "learning_rate": 1e-06, |
| "loss": 0.1495, |
| "step": 2411 |
| }, |
| { |
| "epoch": 0.8363384188626907, |
| "grad_norm": 2.6997463703155518, |
| "learning_rate": 1e-06, |
| "loss": 0.1227, |
| "step": 2412 |
| }, |
| { |
| "epoch": 0.8366851595006934, |
| "grad_norm": 3.3730976581573486, |
| "learning_rate": 1e-06, |
| "loss": 0.1212, |
| "step": 2413 |
| }, |
| { |
| "epoch": 0.8370319001386962, |
| "grad_norm": 3.1687965393066406, |
| "learning_rate": 1e-06, |
| "loss": 0.1268, |
| "step": 2414 |
| }, |
| { |
| "epoch": 0.837378640776699, |
| "grad_norm": 2.4090065956115723, |
| "learning_rate": 1e-06, |
| "loss": 0.1449, |
| "step": 2415 |
| }, |
| { |
| "epoch": 0.8377253814147018, |
| "grad_norm": 2.7174880504608154, |
| "learning_rate": 1e-06, |
| "loss": 0.1413, |
| "step": 2416 |
| }, |
| { |
| "epoch": 0.8380721220527045, |
| "grad_norm": 2.5149476528167725, |
| "learning_rate": 1e-06, |
| "loss": 0.1568, |
| "step": 2417 |
| }, |
| { |
| "epoch": 0.8384188626907073, |
| "grad_norm": 4.081730842590332, |
| "learning_rate": 1e-06, |
| "loss": 0.1159, |
| "step": 2418 |
| }, |
| { |
| "epoch": 0.8387656033287101, |
| "grad_norm": 2.8811569213867188, |
| "learning_rate": 1e-06, |
| "loss": 0.1729, |
| "step": 2419 |
| }, |
| { |
| "epoch": 0.8391123439667129, |
| "grad_norm": 2.758857011795044, |
| "learning_rate": 1e-06, |
| "loss": 0.1391, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.8394590846047156, |
| "grad_norm": 2.5798659324645996, |
| "learning_rate": 1e-06, |
| "loss": 0.1211, |
| "step": 2421 |
| }, |
| { |
| "epoch": 0.8398058252427184, |
| "grad_norm": 5.093081951141357, |
| "learning_rate": 1e-06, |
| "loss": 0.1562, |
| "step": 2422 |
| }, |
| { |
| "epoch": 0.8401525658807212, |
| "grad_norm": 2.7829928398132324, |
| "learning_rate": 1e-06, |
| "loss": 0.116, |
| "step": 2423 |
| }, |
| { |
| "epoch": 0.840499306518724, |
| "grad_norm": 5.886650562286377, |
| "learning_rate": 1e-06, |
| "loss": 0.1584, |
| "step": 2424 |
| }, |
| { |
| "epoch": 0.8408460471567267, |
| "grad_norm": 2.695272207260132, |
| "learning_rate": 1e-06, |
| "loss": 0.1519, |
| "step": 2425 |
| }, |
| { |
| "epoch": 0.8411927877947295, |
| "grad_norm": 1.8741930723190308, |
| "learning_rate": 1e-06, |
| "loss": 0.1183, |
| "step": 2426 |
| }, |
| { |
| "epoch": 0.8415395284327323, |
| "grad_norm": 2.3263349533081055, |
| "learning_rate": 1e-06, |
| "loss": 0.1567, |
| "step": 2427 |
| }, |
| { |
| "epoch": 0.841886269070735, |
| "grad_norm": 2.279158353805542, |
| "learning_rate": 1e-06, |
| "loss": 0.1386, |
| "step": 2428 |
| }, |
| { |
| "epoch": 0.8422330097087378, |
| "grad_norm": 2.275637626647949, |
| "learning_rate": 1e-06, |
| "loss": 0.1325, |
| "step": 2429 |
| }, |
| { |
| "epoch": 0.8425797503467406, |
| "grad_norm": 2.705381393432617, |
| "learning_rate": 1e-06, |
| "loss": 0.1842, |
| "step": 2430 |
| }, |
| { |
| "epoch": 0.8429264909847434, |
| "grad_norm": 2.2983760833740234, |
| "learning_rate": 1e-06, |
| "loss": 0.1288, |
| "step": 2431 |
| }, |
| { |
| "epoch": 0.8432732316227461, |
| "grad_norm": 2.5210258960723877, |
| "learning_rate": 1e-06, |
| "loss": 0.1506, |
| "step": 2432 |
| }, |
| { |
| "epoch": 0.8436199722607489, |
| "grad_norm": 2.4116249084472656, |
| "learning_rate": 1e-06, |
| "loss": 0.1377, |
| "step": 2433 |
| }, |
| { |
| "epoch": 0.8439667128987517, |
| "grad_norm": 3.0181736946105957, |
| "learning_rate": 1e-06, |
| "loss": 0.1206, |
| "step": 2434 |
| }, |
| { |
| "epoch": 0.8443134535367545, |
| "grad_norm": 2.6214163303375244, |
| "learning_rate": 1e-06, |
| "loss": 0.1807, |
| "step": 2435 |
| }, |
| { |
| "epoch": 0.8446601941747572, |
| "grad_norm": 2.952951669692993, |
| "learning_rate": 1e-06, |
| "loss": 0.1392, |
| "step": 2436 |
| }, |
| { |
| "epoch": 0.84500693481276, |
| "grad_norm": 2.5941317081451416, |
| "learning_rate": 1e-06, |
| "loss": 0.1359, |
| "step": 2437 |
| }, |
| { |
| "epoch": 0.8453536754507628, |
| "grad_norm": 3.742173433303833, |
| "learning_rate": 1e-06, |
| "loss": 0.1257, |
| "step": 2438 |
| }, |
| { |
| "epoch": 0.8457004160887656, |
| "grad_norm": 2.2225148677825928, |
| "learning_rate": 1e-06, |
| "loss": 0.1341, |
| "step": 2439 |
| }, |
| { |
| "epoch": 0.8460471567267683, |
| "grad_norm": 2.1798903942108154, |
| "learning_rate": 1e-06, |
| "loss": 0.1436, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.8463938973647711, |
| "grad_norm": 2.250204563140869, |
| "learning_rate": 1e-06, |
| "loss": 0.1274, |
| "step": 2441 |
| }, |
| { |
| "epoch": 0.8467406380027739, |
| "grad_norm": 2.7729578018188477, |
| "learning_rate": 1e-06, |
| "loss": 0.1432, |
| "step": 2442 |
| }, |
| { |
| "epoch": 0.8470873786407767, |
| "grad_norm": 2.45955491065979, |
| "learning_rate": 1e-06, |
| "loss": 0.1196, |
| "step": 2443 |
| }, |
| { |
| "epoch": 0.8474341192787794, |
| "grad_norm": 2.97157621383667, |
| "learning_rate": 1e-06, |
| "loss": 0.1218, |
| "step": 2444 |
| }, |
| { |
| "epoch": 0.8477808599167822, |
| "grad_norm": 2.3688995838165283, |
| "learning_rate": 1e-06, |
| "loss": 0.1171, |
| "step": 2445 |
| }, |
| { |
| "epoch": 0.848127600554785, |
| "grad_norm": 2.0294275283813477, |
| "learning_rate": 1e-06, |
| "loss": 0.1428, |
| "step": 2446 |
| }, |
| { |
| "epoch": 0.8484743411927878, |
| "grad_norm": 2.387620687484741, |
| "learning_rate": 1e-06, |
| "loss": 0.1597, |
| "step": 2447 |
| }, |
| { |
| "epoch": 0.8488210818307905, |
| "grad_norm": 2.6711483001708984, |
| "learning_rate": 1e-06, |
| "loss": 0.1353, |
| "step": 2448 |
| }, |
| { |
| "epoch": 0.8491678224687933, |
| "grad_norm": 1.8898664712905884, |
| "learning_rate": 1e-06, |
| "loss": 0.1174, |
| "step": 2449 |
| }, |
| { |
| "epoch": 0.8495145631067961, |
| "grad_norm": 3.391063928604126, |
| "learning_rate": 1e-06, |
| "loss": 0.1485, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.8498613037447988, |
| "grad_norm": 2.1252994537353516, |
| "learning_rate": 1e-06, |
| "loss": 0.1212, |
| "step": 2451 |
| }, |
| { |
| "epoch": 0.8502080443828016, |
| "grad_norm": 2.0090253353118896, |
| "learning_rate": 1e-06, |
| "loss": 0.1171, |
| "step": 2452 |
| }, |
| { |
| "epoch": 0.8505547850208044, |
| "grad_norm": 2.170214891433716, |
| "learning_rate": 1e-06, |
| "loss": 0.1487, |
| "step": 2453 |
| }, |
| { |
| "epoch": 0.8509015256588072, |
| "grad_norm": 2.922431468963623, |
| "learning_rate": 1e-06, |
| "loss": 0.1148, |
| "step": 2454 |
| }, |
| { |
| "epoch": 0.8512482662968099, |
| "grad_norm": 2.2969465255737305, |
| "learning_rate": 1e-06, |
| "loss": 0.1417, |
| "step": 2455 |
| }, |
| { |
| "epoch": 0.8515950069348127, |
| "grad_norm": 2.0030033588409424, |
| "learning_rate": 1e-06, |
| "loss": 0.1146, |
| "step": 2456 |
| }, |
| { |
| "epoch": 0.8519417475728155, |
| "grad_norm": 2.1828982830047607, |
| "learning_rate": 1e-06, |
| "loss": 0.1204, |
| "step": 2457 |
| }, |
| { |
| "epoch": 0.8522884882108183, |
| "grad_norm": 1.8292583227157593, |
| "learning_rate": 1e-06, |
| "loss": 0.1263, |
| "step": 2458 |
| }, |
| { |
| "epoch": 0.852635228848821, |
| "grad_norm": 2.6252031326293945, |
| "learning_rate": 1e-06, |
| "loss": 0.1443, |
| "step": 2459 |
| }, |
| { |
| "epoch": 0.8529819694868238, |
| "grad_norm": 2.30623459815979, |
| "learning_rate": 1e-06, |
| "loss": 0.1436, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.8533287101248266, |
| "grad_norm": 2.8300130367279053, |
| "learning_rate": 1e-06, |
| "loss": 0.1644, |
| "step": 2461 |
| }, |
| { |
| "epoch": 0.8536754507628294, |
| "grad_norm": 2.417668104171753, |
| "learning_rate": 1e-06, |
| "loss": 0.1432, |
| "step": 2462 |
| }, |
| { |
| "epoch": 0.8540221914008321, |
| "grad_norm": 2.5126116275787354, |
| "learning_rate": 1e-06, |
| "loss": 0.1228, |
| "step": 2463 |
| }, |
| { |
| "epoch": 0.8543689320388349, |
| "grad_norm": 2.0431835651397705, |
| "learning_rate": 1e-06, |
| "loss": 0.135, |
| "step": 2464 |
| }, |
| { |
| "epoch": 0.8547156726768377, |
| "grad_norm": 3.008413076400757, |
| "learning_rate": 1e-06, |
| "loss": 0.1218, |
| "step": 2465 |
| }, |
| { |
| "epoch": 0.8550624133148405, |
| "grad_norm": 2.4672775268554688, |
| "learning_rate": 1e-06, |
| "loss": 0.157, |
| "step": 2466 |
| }, |
| { |
| "epoch": 0.8554091539528432, |
| "grad_norm": 3.271653652191162, |
| "learning_rate": 1e-06, |
| "loss": 0.1474, |
| "step": 2467 |
| }, |
| { |
| "epoch": 0.855755894590846, |
| "grad_norm": 2.1060640811920166, |
| "learning_rate": 1e-06, |
| "loss": 0.112, |
| "step": 2468 |
| }, |
| { |
| "epoch": 0.8561026352288488, |
| "grad_norm": 1.9942084550857544, |
| "learning_rate": 1e-06, |
| "loss": 0.1288, |
| "step": 2469 |
| }, |
| { |
| "epoch": 0.8564493758668515, |
| "grad_norm": 2.6707653999328613, |
| "learning_rate": 1e-06, |
| "loss": 0.1613, |
| "step": 2470 |
| }, |
| { |
| "epoch": 0.8567961165048543, |
| "grad_norm": 3.239352226257324, |
| "learning_rate": 1e-06, |
| "loss": 0.1444, |
| "step": 2471 |
| }, |
| { |
| "epoch": 0.8571428571428571, |
| "grad_norm": 2.7429757118225098, |
| "learning_rate": 1e-06, |
| "loss": 0.1516, |
| "step": 2472 |
| }, |
| { |
| "epoch": 0.8574895977808599, |
| "grad_norm": 2.1264328956604004, |
| "learning_rate": 1e-06, |
| "loss": 0.1657, |
| "step": 2473 |
| }, |
| { |
| "epoch": 0.8578363384188626, |
| "grad_norm": 2.4109182357788086, |
| "learning_rate": 1e-06, |
| "loss": 0.1331, |
| "step": 2474 |
| }, |
| { |
| "epoch": 0.8581830790568654, |
| "grad_norm": 2.9945809841156006, |
| "learning_rate": 1e-06, |
| "loss": 0.1349, |
| "step": 2475 |
| }, |
| { |
| "epoch": 0.8585298196948682, |
| "grad_norm": 2.7945027351379395, |
| "learning_rate": 1e-06, |
| "loss": 0.151, |
| "step": 2476 |
| }, |
| { |
| "epoch": 0.858876560332871, |
| "grad_norm": 1.5665547847747803, |
| "learning_rate": 1e-06, |
| "loss": 0.1231, |
| "step": 2477 |
| }, |
| { |
| "epoch": 0.8592233009708737, |
| "grad_norm": 2.601025342941284, |
| "learning_rate": 1e-06, |
| "loss": 0.1207, |
| "step": 2478 |
| }, |
| { |
| "epoch": 0.8595700416088765, |
| "grad_norm": 3.027022123336792, |
| "learning_rate": 1e-06, |
| "loss": 0.1592, |
| "step": 2479 |
| }, |
| { |
| "epoch": 0.8599167822468793, |
| "grad_norm": 2.1945347785949707, |
| "learning_rate": 1e-06, |
| "loss": 0.142, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.8602635228848821, |
| "grad_norm": 2.1606740951538086, |
| "learning_rate": 1e-06, |
| "loss": 0.1387, |
| "step": 2481 |
| }, |
| { |
| "epoch": 0.8606102635228848, |
| "grad_norm": 3.8076674938201904, |
| "learning_rate": 1e-06, |
| "loss": 0.1128, |
| "step": 2482 |
| }, |
| { |
| "epoch": 0.8609570041608876, |
| "grad_norm": 3.2100863456726074, |
| "learning_rate": 1e-06, |
| "loss": 0.1345, |
| "step": 2483 |
| }, |
| { |
| "epoch": 0.8613037447988904, |
| "grad_norm": 2.945937156677246, |
| "learning_rate": 1e-06, |
| "loss": 0.1253, |
| "step": 2484 |
| }, |
| { |
| "epoch": 0.8616504854368932, |
| "grad_norm": 2.1155993938446045, |
| "learning_rate": 1e-06, |
| "loss": 0.1366, |
| "step": 2485 |
| }, |
| { |
| "epoch": 0.8619972260748959, |
| "grad_norm": 2.137275457382202, |
| "learning_rate": 1e-06, |
| "loss": 0.1434, |
| "step": 2486 |
| }, |
| { |
| "epoch": 0.8623439667128987, |
| "grad_norm": 3.105685234069824, |
| "learning_rate": 1e-06, |
| "loss": 0.1242, |
| "step": 2487 |
| }, |
| { |
| "epoch": 0.8626907073509015, |
| "grad_norm": 2.3001792430877686, |
| "learning_rate": 1e-06, |
| "loss": 0.1322, |
| "step": 2488 |
| }, |
| { |
| "epoch": 0.8630374479889042, |
| "grad_norm": 4.752837657928467, |
| "learning_rate": 1e-06, |
| "loss": 0.1471, |
| "step": 2489 |
| }, |
| { |
| "epoch": 0.863384188626907, |
| "grad_norm": 4.404865264892578, |
| "learning_rate": 1e-06, |
| "loss": 0.1753, |
| "step": 2490 |
| }, |
| { |
| "epoch": 0.8637309292649098, |
| "grad_norm": 2.0198276042938232, |
| "learning_rate": 1e-06, |
| "loss": 0.1351, |
| "step": 2491 |
| }, |
| { |
| "epoch": 0.8640776699029126, |
| "grad_norm": 4.447784900665283, |
| "learning_rate": 1e-06, |
| "loss": 0.1736, |
| "step": 2492 |
| }, |
| { |
| "epoch": 0.8644244105409153, |
| "grad_norm": 2.295395612716675, |
| "learning_rate": 1e-06, |
| "loss": 0.1327, |
| "step": 2493 |
| }, |
| { |
| "epoch": 0.8647711511789181, |
| "grad_norm": 1.9984354972839355, |
| "learning_rate": 1e-06, |
| "loss": 0.1164, |
| "step": 2494 |
| }, |
| { |
| "epoch": 0.8651178918169209, |
| "grad_norm": 2.847456693649292, |
| "learning_rate": 1e-06, |
| "loss": 0.1441, |
| "step": 2495 |
| }, |
| { |
| "epoch": 0.8654646324549237, |
| "grad_norm": 3.244466543197632, |
| "learning_rate": 1e-06, |
| "loss": 0.1434, |
| "step": 2496 |
| }, |
| { |
| "epoch": 0.8658113730929264, |
| "grad_norm": 2.110191583633423, |
| "learning_rate": 1e-06, |
| "loss": 0.1269, |
| "step": 2497 |
| }, |
| { |
| "epoch": 0.8661581137309292, |
| "grad_norm": 2.554386615753174, |
| "learning_rate": 1e-06, |
| "loss": 0.1522, |
| "step": 2498 |
| }, |
| { |
| "epoch": 0.866504854368932, |
| "grad_norm": 3.446542501449585, |
| "learning_rate": 1e-06, |
| "loss": 0.1483, |
| "step": 2499 |
| }, |
| { |
| "epoch": 0.8668515950069348, |
| "grad_norm": 2.2236502170562744, |
| "learning_rate": 1e-06, |
| "loss": 0.1384, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.8671983356449375, |
| "grad_norm": 2.7683444023132324, |
| "learning_rate": 1e-06, |
| "loss": 0.1283, |
| "step": 2501 |
| }, |
| { |
| "epoch": 0.8675450762829403, |
| "grad_norm": 3.002000093460083, |
| "learning_rate": 1e-06, |
| "loss": 0.1742, |
| "step": 2502 |
| }, |
| { |
| "epoch": 0.8678918169209431, |
| "grad_norm": 5.195772647857666, |
| "learning_rate": 1e-06, |
| "loss": 0.1753, |
| "step": 2503 |
| }, |
| { |
| "epoch": 0.8682385575589459, |
| "grad_norm": 3.325066566467285, |
| "learning_rate": 1e-06, |
| "loss": 0.1238, |
| "step": 2504 |
| }, |
| { |
| "epoch": 0.8685852981969486, |
| "grad_norm": 2.8764262199401855, |
| "learning_rate": 1e-06, |
| "loss": 0.1269, |
| "step": 2505 |
| }, |
| { |
| "epoch": 0.8689320388349514, |
| "grad_norm": 4.183605670928955, |
| "learning_rate": 1e-06, |
| "loss": 0.173, |
| "step": 2506 |
| }, |
| { |
| "epoch": 0.8692787794729542, |
| "grad_norm": 2.656033515930176, |
| "learning_rate": 1e-06, |
| "loss": 0.1767, |
| "step": 2507 |
| }, |
| { |
| "epoch": 0.869625520110957, |
| "grad_norm": 2.7101523876190186, |
| "learning_rate": 1e-06, |
| "loss": 0.1274, |
| "step": 2508 |
| }, |
| { |
| "epoch": 0.8699722607489597, |
| "grad_norm": 2.912912368774414, |
| "learning_rate": 1e-06, |
| "loss": 0.1603, |
| "step": 2509 |
| }, |
| { |
| "epoch": 0.8703190013869625, |
| "grad_norm": 2.7677292823791504, |
| "learning_rate": 1e-06, |
| "loss": 0.1403, |
| "step": 2510 |
| }, |
| { |
| "epoch": 0.8706657420249653, |
| "grad_norm": 7.316020965576172, |
| "learning_rate": 1e-06, |
| "loss": 0.1854, |
| "step": 2511 |
| }, |
| { |
| "epoch": 0.871012482662968, |
| "grad_norm": 2.9252498149871826, |
| "learning_rate": 1e-06, |
| "loss": 0.1385, |
| "step": 2512 |
| }, |
| { |
| "epoch": 0.8713592233009708, |
| "grad_norm": 2.6173603534698486, |
| "learning_rate": 1e-06, |
| "loss": 0.1405, |
| "step": 2513 |
| }, |
| { |
| "epoch": 0.8717059639389736, |
| "grad_norm": 3.6852455139160156, |
| "learning_rate": 1e-06, |
| "loss": 0.1621, |
| "step": 2514 |
| }, |
| { |
| "epoch": 0.8720527045769764, |
| "grad_norm": 3.2382307052612305, |
| "learning_rate": 1e-06, |
| "loss": 0.0994, |
| "step": 2515 |
| }, |
| { |
| "epoch": 0.8723994452149791, |
| "grad_norm": 2.688394784927368, |
| "learning_rate": 1e-06, |
| "loss": 0.1423, |
| "step": 2516 |
| }, |
| { |
| "epoch": 0.8727461858529819, |
| "grad_norm": 3.035778522491455, |
| "learning_rate": 1e-06, |
| "loss": 0.1563, |
| "step": 2517 |
| }, |
| { |
| "epoch": 0.8730929264909847, |
| "grad_norm": 2.3126778602600098, |
| "learning_rate": 1e-06, |
| "loss": 0.1394, |
| "step": 2518 |
| }, |
| { |
| "epoch": 0.8734396671289875, |
| "grad_norm": 2.3858559131622314, |
| "learning_rate": 1e-06, |
| "loss": 0.1215, |
| "step": 2519 |
| }, |
| { |
| "epoch": 0.8737864077669902, |
| "grad_norm": 3.0119643211364746, |
| "learning_rate": 1e-06, |
| "loss": 0.1157, |
| "step": 2520 |
| }, |
| { |
| "epoch": 0.874133148404993, |
| "grad_norm": 2.6011157035827637, |
| "learning_rate": 1e-06, |
| "loss": 0.1646, |
| "step": 2521 |
| }, |
| { |
| "epoch": 0.8744798890429958, |
| "grad_norm": 2.6988086700439453, |
| "learning_rate": 1e-06, |
| "loss": 0.1424, |
| "step": 2522 |
| }, |
| { |
| "epoch": 0.8748266296809986, |
| "grad_norm": 2.830876588821411, |
| "learning_rate": 1e-06, |
| "loss": 0.1378, |
| "step": 2523 |
| }, |
| { |
| "epoch": 0.8751733703190014, |
| "grad_norm": 1.7022074460983276, |
| "learning_rate": 1e-06, |
| "loss": 0.1197, |
| "step": 2524 |
| }, |
| { |
| "epoch": 0.8755201109570042, |
| "grad_norm": 2.1738150119781494, |
| "learning_rate": 1e-06, |
| "loss": 0.123, |
| "step": 2525 |
| }, |
| { |
| "epoch": 0.875866851595007, |
| "grad_norm": 3.662080764770508, |
| "learning_rate": 1e-06, |
| "loss": 0.1436, |
| "step": 2526 |
| }, |
| { |
| "epoch": 0.8762135922330098, |
| "grad_norm": 1.7804447412490845, |
| "learning_rate": 1e-06, |
| "loss": 0.0925, |
| "step": 2527 |
| }, |
| { |
| "epoch": 0.8765603328710125, |
| "grad_norm": 2.030134916305542, |
| "learning_rate": 1e-06, |
| "loss": 0.1294, |
| "step": 2528 |
| }, |
| { |
| "epoch": 0.8769070735090153, |
| "grad_norm": 1.968540072441101, |
| "learning_rate": 1e-06, |
| "loss": 0.1247, |
| "step": 2529 |
| }, |
| { |
| "epoch": 0.8772538141470181, |
| "grad_norm": 3.905348539352417, |
| "learning_rate": 1e-06, |
| "loss": 0.1248, |
| "step": 2530 |
| }, |
| { |
| "epoch": 0.8776005547850209, |
| "grad_norm": 2.988689661026001, |
| "learning_rate": 1e-06, |
| "loss": 0.1408, |
| "step": 2531 |
| }, |
| { |
| "epoch": 0.8779472954230236, |
| "grad_norm": 2.6646010875701904, |
| "learning_rate": 1e-06, |
| "loss": 0.137, |
| "step": 2532 |
| }, |
| { |
| "epoch": 0.8782940360610264, |
| "grad_norm": 3.523129940032959, |
| "learning_rate": 1e-06, |
| "loss": 0.1305, |
| "step": 2533 |
| }, |
| { |
| "epoch": 0.8786407766990292, |
| "grad_norm": 7.069967269897461, |
| "learning_rate": 1e-06, |
| "loss": 0.151, |
| "step": 2534 |
| }, |
| { |
| "epoch": 0.878987517337032, |
| "grad_norm": 2.2202308177948, |
| "learning_rate": 1e-06, |
| "loss": 0.1281, |
| "step": 2535 |
| }, |
| { |
| "epoch": 0.8793342579750347, |
| "grad_norm": 4.117976188659668, |
| "learning_rate": 1e-06, |
| "loss": 0.1423, |
| "step": 2536 |
| }, |
| { |
| "epoch": 0.8796809986130375, |
| "grad_norm": 2.210732936859131, |
| "learning_rate": 1e-06, |
| "loss": 0.1138, |
| "step": 2537 |
| }, |
| { |
| "epoch": 0.8800277392510403, |
| "grad_norm": 2.42452335357666, |
| "learning_rate": 1e-06, |
| "loss": 0.1292, |
| "step": 2538 |
| }, |
| { |
| "epoch": 0.880374479889043, |
| "grad_norm": 4.303539276123047, |
| "learning_rate": 1e-06, |
| "loss": 0.1631, |
| "step": 2539 |
| }, |
| { |
| "epoch": 0.8807212205270458, |
| "grad_norm": 3.3438968658447266, |
| "learning_rate": 1e-06, |
| "loss": 0.1535, |
| "step": 2540 |
| }, |
| { |
| "epoch": 0.8810679611650486, |
| "grad_norm": 3.1318681240081787, |
| "learning_rate": 1e-06, |
| "loss": 0.1806, |
| "step": 2541 |
| }, |
| { |
| "epoch": 0.8814147018030514, |
| "grad_norm": 2.541802167892456, |
| "learning_rate": 1e-06, |
| "loss": 0.132, |
| "step": 2542 |
| }, |
| { |
| "epoch": 0.8817614424410541, |
| "grad_norm": 3.502878427505493, |
| "learning_rate": 1e-06, |
| "loss": 0.127, |
| "step": 2543 |
| }, |
| { |
| "epoch": 0.8821081830790569, |
| "grad_norm": 5.988008499145508, |
| "learning_rate": 1e-06, |
| "loss": 0.1334, |
| "step": 2544 |
| }, |
| { |
| "epoch": 0.8824549237170597, |
| "grad_norm": 3.4589040279388428, |
| "learning_rate": 1e-06, |
| "loss": 0.1198, |
| "step": 2545 |
| }, |
| { |
| "epoch": 0.8828016643550625, |
| "grad_norm": 4.258084774017334, |
| "learning_rate": 1e-06, |
| "loss": 0.1556, |
| "step": 2546 |
| }, |
| { |
| "epoch": 0.8831484049930652, |
| "grad_norm": 2.9290645122528076, |
| "learning_rate": 1e-06, |
| "loss": 0.1383, |
| "step": 2547 |
| }, |
| { |
| "epoch": 0.883495145631068, |
| "grad_norm": 5.24904727935791, |
| "learning_rate": 1e-06, |
| "loss": 0.1404, |
| "step": 2548 |
| }, |
| { |
| "epoch": 0.8838418862690708, |
| "grad_norm": 2.883603572845459, |
| "learning_rate": 1e-06, |
| "loss": 0.1282, |
| "step": 2549 |
| }, |
| { |
| "epoch": 0.8841886269070736, |
| "grad_norm": 2.8196918964385986, |
| "learning_rate": 1e-06, |
| "loss": 0.1478, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.8845353675450763, |
| "grad_norm": 7.683722972869873, |
| "learning_rate": 1e-06, |
| "loss": 0.2069, |
| "step": 2551 |
| }, |
| { |
| "epoch": 0.8848821081830791, |
| "grad_norm": 3.080634832382202, |
| "learning_rate": 1e-06, |
| "loss": 0.1451, |
| "step": 2552 |
| }, |
| { |
| "epoch": 0.8852288488210819, |
| "grad_norm": 2.3532392978668213, |
| "learning_rate": 1e-06, |
| "loss": 0.1343, |
| "step": 2553 |
| }, |
| { |
| "epoch": 0.8855755894590847, |
| "grad_norm": 2.798870325088501, |
| "learning_rate": 1e-06, |
| "loss": 0.1315, |
| "step": 2554 |
| }, |
| { |
| "epoch": 0.8859223300970874, |
| "grad_norm": 2.4134509563446045, |
| "learning_rate": 1e-06, |
| "loss": 0.1507, |
| "step": 2555 |
| }, |
| { |
| "epoch": 0.8862690707350902, |
| "grad_norm": 3.6513829231262207, |
| "learning_rate": 1e-06, |
| "loss": 0.1658, |
| "step": 2556 |
| }, |
| { |
| "epoch": 0.886615811373093, |
| "grad_norm": 4.6206817626953125, |
| "learning_rate": 1e-06, |
| "loss": 0.1489, |
| "step": 2557 |
| }, |
| { |
| "epoch": 0.8869625520110958, |
| "grad_norm": 4.091174125671387, |
| "learning_rate": 1e-06, |
| "loss": 0.152, |
| "step": 2558 |
| }, |
| { |
| "epoch": 0.8873092926490985, |
| "grad_norm": 1.8325977325439453, |
| "learning_rate": 1e-06, |
| "loss": 0.1384, |
| "step": 2559 |
| }, |
| { |
| "epoch": 0.8876560332871013, |
| "grad_norm": 2.203538656234741, |
| "learning_rate": 1e-06, |
| "loss": 0.1476, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.8880027739251041, |
| "grad_norm": 3.418199300765991, |
| "learning_rate": 1e-06, |
| "loss": 0.11, |
| "step": 2561 |
| }, |
| { |
| "epoch": 0.8883495145631068, |
| "grad_norm": 3.521099805831909, |
| "learning_rate": 1e-06, |
| "loss": 0.1088, |
| "step": 2562 |
| }, |
| { |
| "epoch": 0.8886962552011096, |
| "grad_norm": 5.303309917449951, |
| "learning_rate": 1e-06, |
| "loss": 0.1504, |
| "step": 2563 |
| }, |
| { |
| "epoch": 0.8890429958391124, |
| "grad_norm": 2.706254243850708, |
| "learning_rate": 1e-06, |
| "loss": 0.1522, |
| "step": 2564 |
| }, |
| { |
| "epoch": 0.8893897364771152, |
| "grad_norm": 3.3216984272003174, |
| "learning_rate": 1e-06, |
| "loss": 0.1324, |
| "step": 2565 |
| }, |
| { |
| "epoch": 0.8897364771151179, |
| "grad_norm": 3.598803997039795, |
| "learning_rate": 1e-06, |
| "loss": 0.1523, |
| "step": 2566 |
| }, |
| { |
| "epoch": 0.8900832177531207, |
| "grad_norm": 2.6814308166503906, |
| "learning_rate": 1e-06, |
| "loss": 0.1539, |
| "step": 2567 |
| }, |
| { |
| "epoch": 0.8904299583911235, |
| "grad_norm": 2.5854735374450684, |
| "learning_rate": 1e-06, |
| "loss": 0.1025, |
| "step": 2568 |
| }, |
| { |
| "epoch": 0.8907766990291263, |
| "grad_norm": 2.8812289237976074, |
| "learning_rate": 1e-06, |
| "loss": 0.1313, |
| "step": 2569 |
| }, |
| { |
| "epoch": 0.891123439667129, |
| "grad_norm": 3.4628384113311768, |
| "learning_rate": 1e-06, |
| "loss": 0.1494, |
| "step": 2570 |
| }, |
| { |
| "epoch": 0.8914701803051318, |
| "grad_norm": 2.1708106994628906, |
| "learning_rate": 1e-06, |
| "loss": 0.1214, |
| "step": 2571 |
| }, |
| { |
| "epoch": 0.8918169209431346, |
| "grad_norm": 2.2976863384246826, |
| "learning_rate": 1e-06, |
| "loss": 0.1184, |
| "step": 2572 |
| }, |
| { |
| "epoch": 0.8921636615811374, |
| "grad_norm": 2.7640933990478516, |
| "learning_rate": 1e-06, |
| "loss": 0.1234, |
| "step": 2573 |
| }, |
| { |
| "epoch": 0.8925104022191401, |
| "grad_norm": 3.4392740726470947, |
| "learning_rate": 1e-06, |
| "loss": 0.1495, |
| "step": 2574 |
| }, |
| { |
| "epoch": 0.8928571428571429, |
| "grad_norm": 3.151865005493164, |
| "learning_rate": 1e-06, |
| "loss": 0.1204, |
| "step": 2575 |
| }, |
| { |
| "epoch": 0.8932038834951457, |
| "grad_norm": 2.665188789367676, |
| "learning_rate": 1e-06, |
| "loss": 0.1312, |
| "step": 2576 |
| }, |
| { |
| "epoch": 0.8935506241331485, |
| "grad_norm": 2.2830886840820312, |
| "learning_rate": 1e-06, |
| "loss": 0.1069, |
| "step": 2577 |
| }, |
| { |
| "epoch": 0.8938973647711512, |
| "grad_norm": 3.063302755355835, |
| "learning_rate": 1e-06, |
| "loss": 0.1506, |
| "step": 2578 |
| }, |
| { |
| "epoch": 0.894244105409154, |
| "grad_norm": 2.6693129539489746, |
| "learning_rate": 1e-06, |
| "loss": 0.1242, |
| "step": 2579 |
| }, |
| { |
| "epoch": 0.8945908460471568, |
| "grad_norm": 3.1677896976470947, |
| "learning_rate": 1e-06, |
| "loss": 0.127, |
| "step": 2580 |
| }, |
| { |
| "epoch": 0.8949375866851595, |
| "grad_norm": 3.562784194946289, |
| "learning_rate": 1e-06, |
| "loss": 0.1502, |
| "step": 2581 |
| }, |
| { |
| "epoch": 0.8952843273231623, |
| "grad_norm": 2.2113330364227295, |
| "learning_rate": 1e-06, |
| "loss": 0.1146, |
| "step": 2582 |
| }, |
| { |
| "epoch": 0.8956310679611651, |
| "grad_norm": 3.4925997257232666, |
| "learning_rate": 1e-06, |
| "loss": 0.133, |
| "step": 2583 |
| }, |
| { |
| "epoch": 0.8959778085991679, |
| "grad_norm": 2.26218843460083, |
| "learning_rate": 1e-06, |
| "loss": 0.14, |
| "step": 2584 |
| }, |
| { |
| "epoch": 0.8963245492371706, |
| "grad_norm": 2.437492847442627, |
| "learning_rate": 1e-06, |
| "loss": 0.1292, |
| "step": 2585 |
| }, |
| { |
| "epoch": 0.8966712898751734, |
| "grad_norm": 3.4927961826324463, |
| "learning_rate": 1e-06, |
| "loss": 0.1195, |
| "step": 2586 |
| }, |
| { |
| "epoch": 0.8970180305131762, |
| "grad_norm": 4.102070331573486, |
| "learning_rate": 1e-06, |
| "loss": 0.1356, |
| "step": 2587 |
| }, |
| { |
| "epoch": 0.897364771151179, |
| "grad_norm": 5.60887336730957, |
| "learning_rate": 1e-06, |
| "loss": 0.1479, |
| "step": 2588 |
| }, |
| { |
| "epoch": 0.8977115117891817, |
| "grad_norm": 2.31508731842041, |
| "learning_rate": 1e-06, |
| "loss": 0.1579, |
| "step": 2589 |
| }, |
| { |
| "epoch": 0.8980582524271845, |
| "grad_norm": 2.7380573749542236, |
| "learning_rate": 1e-06, |
| "loss": 0.1463, |
| "step": 2590 |
| }, |
| { |
| "epoch": 0.8984049930651873, |
| "grad_norm": 1.9896982908248901, |
| "learning_rate": 1e-06, |
| "loss": 0.1162, |
| "step": 2591 |
| }, |
| { |
| "epoch": 0.8987517337031901, |
| "grad_norm": 2.7056241035461426, |
| "learning_rate": 1e-06, |
| "loss": 0.1591, |
| "step": 2592 |
| }, |
| { |
| "epoch": 0.8990984743411928, |
| "grad_norm": 1.9622806310653687, |
| "learning_rate": 1e-06, |
| "loss": 0.1138, |
| "step": 2593 |
| }, |
| { |
| "epoch": 0.8994452149791956, |
| "grad_norm": 3.957826614379883, |
| "learning_rate": 1e-06, |
| "loss": 0.1561, |
| "step": 2594 |
| }, |
| { |
| "epoch": 0.8997919556171984, |
| "grad_norm": 2.8110544681549072, |
| "learning_rate": 1e-06, |
| "loss": 0.1379, |
| "step": 2595 |
| }, |
| { |
| "epoch": 0.9001386962552012, |
| "grad_norm": 3.7810873985290527, |
| "learning_rate": 1e-06, |
| "loss": 0.1533, |
| "step": 2596 |
| }, |
| { |
| "epoch": 0.9004854368932039, |
| "grad_norm": 2.166959762573242, |
| "learning_rate": 1e-06, |
| "loss": 0.1411, |
| "step": 2597 |
| }, |
| { |
| "epoch": 0.9008321775312067, |
| "grad_norm": 2.021667718887329, |
| "learning_rate": 1e-06, |
| "loss": 0.1501, |
| "step": 2598 |
| }, |
| { |
| "epoch": 0.9011789181692095, |
| "grad_norm": 3.390395402908325, |
| "learning_rate": 1e-06, |
| "loss": 0.1468, |
| "step": 2599 |
| }, |
| { |
| "epoch": 0.9015256588072122, |
| "grad_norm": 2.1694798469543457, |
| "learning_rate": 1e-06, |
| "loss": 0.1283, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.901872399445215, |
| "grad_norm": 3.0354669094085693, |
| "learning_rate": 1e-06, |
| "loss": 0.1327, |
| "step": 2601 |
| }, |
| { |
| "epoch": 0.9022191400832178, |
| "grad_norm": 3.5150482654571533, |
| "learning_rate": 1e-06, |
| "loss": 0.1492, |
| "step": 2602 |
| }, |
| { |
| "epoch": 0.9025658807212206, |
| "grad_norm": 2.1176624298095703, |
| "learning_rate": 1e-06, |
| "loss": 0.1426, |
| "step": 2603 |
| }, |
| { |
| "epoch": 0.9029126213592233, |
| "grad_norm": 2.6938793659210205, |
| "learning_rate": 1e-06, |
| "loss": 0.1662, |
| "step": 2604 |
| }, |
| { |
| "epoch": 0.9032593619972261, |
| "grad_norm": 3.2767770290374756, |
| "learning_rate": 1e-06, |
| "loss": 0.1132, |
| "step": 2605 |
| }, |
| { |
| "epoch": 0.9036061026352289, |
| "grad_norm": 3.4244790077209473, |
| "learning_rate": 1e-06, |
| "loss": 0.152, |
| "step": 2606 |
| }, |
| { |
| "epoch": 0.9039528432732317, |
| "grad_norm": 2.3270678520202637, |
| "learning_rate": 1e-06, |
| "loss": 0.164, |
| "step": 2607 |
| }, |
| { |
| "epoch": 0.9042995839112344, |
| "grad_norm": 2.2908077239990234, |
| "learning_rate": 1e-06, |
| "loss": 0.1561, |
| "step": 2608 |
| }, |
| { |
| "epoch": 0.9046463245492372, |
| "grad_norm": 1.976806879043579, |
| "learning_rate": 1e-06, |
| "loss": 0.1219, |
| "step": 2609 |
| }, |
| { |
| "epoch": 0.90499306518724, |
| "grad_norm": 2.578418731689453, |
| "learning_rate": 1e-06, |
| "loss": 0.1454, |
| "step": 2610 |
| }, |
| { |
| "epoch": 0.9053398058252428, |
| "grad_norm": 2.3116960525512695, |
| "learning_rate": 1e-06, |
| "loss": 0.1499, |
| "step": 2611 |
| }, |
| { |
| "epoch": 0.9056865464632455, |
| "grad_norm": 3.2435667514801025, |
| "learning_rate": 1e-06, |
| "loss": 0.1213, |
| "step": 2612 |
| }, |
| { |
| "epoch": 0.9060332871012483, |
| "grad_norm": 1.842995524406433, |
| "learning_rate": 1e-06, |
| "loss": 0.1074, |
| "step": 2613 |
| }, |
| { |
| "epoch": 0.9063800277392511, |
| "grad_norm": 2.7041714191436768, |
| "learning_rate": 1e-06, |
| "loss": 0.1504, |
| "step": 2614 |
| }, |
| { |
| "epoch": 0.9067267683772539, |
| "grad_norm": 3.233328104019165, |
| "learning_rate": 1e-06, |
| "loss": 0.1284, |
| "step": 2615 |
| }, |
| { |
| "epoch": 0.9070735090152566, |
| "grad_norm": 2.0158610343933105, |
| "learning_rate": 1e-06, |
| "loss": 0.1097, |
| "step": 2616 |
| }, |
| { |
| "epoch": 0.9074202496532594, |
| "grad_norm": 2.651226043701172, |
| "learning_rate": 1e-06, |
| "loss": 0.1276, |
| "step": 2617 |
| }, |
| { |
| "epoch": 0.9077669902912622, |
| "grad_norm": 3.511500835418701, |
| "learning_rate": 1e-06, |
| "loss": 0.1523, |
| "step": 2618 |
| }, |
| { |
| "epoch": 0.908113730929265, |
| "grad_norm": 2.868360996246338, |
| "learning_rate": 1e-06, |
| "loss": 0.1482, |
| "step": 2619 |
| }, |
| { |
| "epoch": 0.9084604715672677, |
| "grad_norm": 5.283854961395264, |
| "learning_rate": 1e-06, |
| "loss": 0.1529, |
| "step": 2620 |
| }, |
| { |
| "epoch": 0.9088072122052705, |
| "grad_norm": 2.312080144882202, |
| "learning_rate": 1e-06, |
| "loss": 0.1197, |
| "step": 2621 |
| }, |
| { |
| "epoch": 0.9091539528432733, |
| "grad_norm": 2.4903831481933594, |
| "learning_rate": 1e-06, |
| "loss": 0.1488, |
| "step": 2622 |
| }, |
| { |
| "epoch": 0.909500693481276, |
| "grad_norm": 3.8020405769348145, |
| "learning_rate": 1e-06, |
| "loss": 0.1167, |
| "step": 2623 |
| }, |
| { |
| "epoch": 0.9098474341192788, |
| "grad_norm": 4.8802666664123535, |
| "learning_rate": 1e-06, |
| "loss": 0.1598, |
| "step": 2624 |
| }, |
| { |
| "epoch": 0.9101941747572816, |
| "grad_norm": 2.2983484268188477, |
| "learning_rate": 1e-06, |
| "loss": 0.1255, |
| "step": 2625 |
| }, |
| { |
| "epoch": 0.9105409153952844, |
| "grad_norm": 2.7895936965942383, |
| "learning_rate": 1e-06, |
| "loss": 0.1464, |
| "step": 2626 |
| }, |
| { |
| "epoch": 0.9108876560332871, |
| "grad_norm": 3.0631260871887207, |
| "learning_rate": 1e-06, |
| "loss": 0.1165, |
| "step": 2627 |
| }, |
| { |
| "epoch": 0.9112343966712899, |
| "grad_norm": 5.066497802734375, |
| "learning_rate": 1e-06, |
| "loss": 0.1336, |
| "step": 2628 |
| }, |
| { |
| "epoch": 0.9115811373092927, |
| "grad_norm": 2.471651792526245, |
| "learning_rate": 1e-06, |
| "loss": 0.1418, |
| "step": 2629 |
| }, |
| { |
| "epoch": 0.9119278779472955, |
| "grad_norm": 2.3465819358825684, |
| "learning_rate": 1e-06, |
| "loss": 0.1309, |
| "step": 2630 |
| }, |
| { |
| "epoch": 0.9122746185852982, |
| "grad_norm": 2.3085386753082275, |
| "learning_rate": 1e-06, |
| "loss": 0.1099, |
| "step": 2631 |
| }, |
| { |
| "epoch": 0.912621359223301, |
| "grad_norm": 6.42535400390625, |
| "learning_rate": 1e-06, |
| "loss": 0.1657, |
| "step": 2632 |
| }, |
| { |
| "epoch": 0.9129680998613038, |
| "grad_norm": 2.8673324584960938, |
| "learning_rate": 1e-06, |
| "loss": 0.1509, |
| "step": 2633 |
| }, |
| { |
| "epoch": 0.9133148404993066, |
| "grad_norm": 2.016261339187622, |
| "learning_rate": 1e-06, |
| "loss": 0.1464, |
| "step": 2634 |
| }, |
| { |
| "epoch": 0.9136615811373093, |
| "grad_norm": 2.854090690612793, |
| "learning_rate": 1e-06, |
| "loss": 0.1341, |
| "step": 2635 |
| }, |
| { |
| "epoch": 0.9140083217753121, |
| "grad_norm": 2.522303342819214, |
| "learning_rate": 1e-06, |
| "loss": 0.1272, |
| "step": 2636 |
| }, |
| { |
| "epoch": 0.9143550624133149, |
| "grad_norm": 2.4291818141937256, |
| "learning_rate": 1e-06, |
| "loss": 0.1476, |
| "step": 2637 |
| }, |
| { |
| "epoch": 0.9147018030513177, |
| "grad_norm": 1.8062353134155273, |
| "learning_rate": 1e-06, |
| "loss": 0.1041, |
| "step": 2638 |
| }, |
| { |
| "epoch": 0.9150485436893204, |
| "grad_norm": 2.9814038276672363, |
| "learning_rate": 1e-06, |
| "loss": 0.1324, |
| "step": 2639 |
| }, |
| { |
| "epoch": 0.9153952843273232, |
| "grad_norm": 2.3664748668670654, |
| "learning_rate": 1e-06, |
| "loss": 0.1533, |
| "step": 2640 |
| }, |
| { |
| "epoch": 0.915742024965326, |
| "grad_norm": 2.347658634185791, |
| "learning_rate": 1e-06, |
| "loss": 0.1217, |
| "step": 2641 |
| }, |
| { |
| "epoch": 0.9160887656033287, |
| "grad_norm": 3.068976640701294, |
| "learning_rate": 1e-06, |
| "loss": 0.1383, |
| "step": 2642 |
| }, |
| { |
| "epoch": 0.9164355062413315, |
| "grad_norm": 2.92535400390625, |
| "learning_rate": 1e-06, |
| "loss": 0.1459, |
| "step": 2643 |
| }, |
| { |
| "epoch": 0.9167822468793343, |
| "grad_norm": 2.56888747215271, |
| "learning_rate": 1e-06, |
| "loss": 0.1308, |
| "step": 2644 |
| }, |
| { |
| "epoch": 0.9171289875173371, |
| "grad_norm": 2.389007806777954, |
| "learning_rate": 1e-06, |
| "loss": 0.1628, |
| "step": 2645 |
| }, |
| { |
| "epoch": 0.9174757281553398, |
| "grad_norm": 2.4458770751953125, |
| "learning_rate": 1e-06, |
| "loss": 0.1135, |
| "step": 2646 |
| }, |
| { |
| "epoch": 0.9178224687933426, |
| "grad_norm": 2.0837292671203613, |
| "learning_rate": 1e-06, |
| "loss": 0.1154, |
| "step": 2647 |
| }, |
| { |
| "epoch": 0.9181692094313454, |
| "grad_norm": 5.257343769073486, |
| "learning_rate": 1e-06, |
| "loss": 0.1318, |
| "step": 2648 |
| }, |
| { |
| "epoch": 0.9185159500693482, |
| "grad_norm": 4.321478366851807, |
| "learning_rate": 1e-06, |
| "loss": 0.1418, |
| "step": 2649 |
| }, |
| { |
| "epoch": 0.9188626907073509, |
| "grad_norm": 2.9317991733551025, |
| "learning_rate": 1e-06, |
| "loss": 0.1491, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.9192094313453537, |
| "grad_norm": 3.1797657012939453, |
| "learning_rate": 1e-06, |
| "loss": 0.1566, |
| "step": 2651 |
| }, |
| { |
| "epoch": 0.9195561719833565, |
| "grad_norm": 2.146343231201172, |
| "learning_rate": 1e-06, |
| "loss": 0.1431, |
| "step": 2652 |
| }, |
| { |
| "epoch": 0.9199029126213593, |
| "grad_norm": 2.9985647201538086, |
| "learning_rate": 1e-06, |
| "loss": 0.1404, |
| "step": 2653 |
| }, |
| { |
| "epoch": 0.920249653259362, |
| "grad_norm": 1.452409267425537, |
| "learning_rate": 1e-06, |
| "loss": 0.0951, |
| "step": 2654 |
| }, |
| { |
| "epoch": 0.9205963938973648, |
| "grad_norm": 3.1620914936065674, |
| "learning_rate": 1e-06, |
| "loss": 0.1582, |
| "step": 2655 |
| }, |
| { |
| "epoch": 0.9209431345353676, |
| "grad_norm": 4.072134971618652, |
| "learning_rate": 1e-06, |
| "loss": 0.1415, |
| "step": 2656 |
| }, |
| { |
| "epoch": 0.9212898751733704, |
| "grad_norm": 2.393535852432251, |
| "learning_rate": 1e-06, |
| "loss": 0.1214, |
| "step": 2657 |
| }, |
| { |
| "epoch": 0.9216366158113731, |
| "grad_norm": 1.983525037765503, |
| "learning_rate": 1e-06, |
| "loss": 0.1196, |
| "step": 2658 |
| }, |
| { |
| "epoch": 0.9219833564493759, |
| "grad_norm": 2.7609565258026123, |
| "learning_rate": 1e-06, |
| "loss": 0.1005, |
| "step": 2659 |
| }, |
| { |
| "epoch": 0.9223300970873787, |
| "grad_norm": 2.900486707687378, |
| "learning_rate": 1e-06, |
| "loss": 0.1112, |
| "step": 2660 |
| }, |
| { |
| "epoch": 0.9226768377253814, |
| "grad_norm": 2.1307523250579834, |
| "learning_rate": 1e-06, |
| "loss": 0.114, |
| "step": 2661 |
| }, |
| { |
| "epoch": 0.9230235783633842, |
| "grad_norm": 3.795403242111206, |
| "learning_rate": 1e-06, |
| "loss": 0.1341, |
| "step": 2662 |
| }, |
| { |
| "epoch": 0.923370319001387, |
| "grad_norm": 2.027860403060913, |
| "learning_rate": 1e-06, |
| "loss": 0.1359, |
| "step": 2663 |
| }, |
| { |
| "epoch": 0.9237170596393898, |
| "grad_norm": 1.9218776226043701, |
| "learning_rate": 1e-06, |
| "loss": 0.1206, |
| "step": 2664 |
| }, |
| { |
| "epoch": 0.9240638002773925, |
| "grad_norm": 2.235786199569702, |
| "learning_rate": 1e-06, |
| "loss": 0.1399, |
| "step": 2665 |
| }, |
| { |
| "epoch": 0.9244105409153953, |
| "grad_norm": 2.2658088207244873, |
| "learning_rate": 1e-06, |
| "loss": 0.1469, |
| "step": 2666 |
| }, |
| { |
| "epoch": 0.9247572815533981, |
| "grad_norm": 2.2411727905273438, |
| "learning_rate": 1e-06, |
| "loss": 0.1334, |
| "step": 2667 |
| }, |
| { |
| "epoch": 0.9251040221914009, |
| "grad_norm": 2.2818808555603027, |
| "learning_rate": 1e-06, |
| "loss": 0.1323, |
| "step": 2668 |
| }, |
| { |
| "epoch": 0.9254507628294036, |
| "grad_norm": 2.430490255355835, |
| "learning_rate": 1e-06, |
| "loss": 0.129, |
| "step": 2669 |
| }, |
| { |
| "epoch": 0.9257975034674064, |
| "grad_norm": 2.8982584476470947, |
| "learning_rate": 1e-06, |
| "loss": 0.1231, |
| "step": 2670 |
| }, |
| { |
| "epoch": 0.9261442441054092, |
| "grad_norm": 3.195540428161621, |
| "learning_rate": 1e-06, |
| "loss": 0.1309, |
| "step": 2671 |
| }, |
| { |
| "epoch": 0.926490984743412, |
| "grad_norm": 2.102155923843384, |
| "learning_rate": 1e-06, |
| "loss": 0.1392, |
| "step": 2672 |
| }, |
| { |
| "epoch": 0.9268377253814147, |
| "grad_norm": 2.8941521644592285, |
| "learning_rate": 1e-06, |
| "loss": 0.1402, |
| "step": 2673 |
| }, |
| { |
| "epoch": 0.9271844660194175, |
| "grad_norm": 1.982313632965088, |
| "learning_rate": 1e-06, |
| "loss": 0.1215, |
| "step": 2674 |
| }, |
| { |
| "epoch": 0.9275312066574203, |
| "grad_norm": 3.4784913063049316, |
| "learning_rate": 1e-06, |
| "loss": 0.1597, |
| "step": 2675 |
| }, |
| { |
| "epoch": 0.9278779472954231, |
| "grad_norm": 2.5161221027374268, |
| "learning_rate": 1e-06, |
| "loss": 0.1721, |
| "step": 2676 |
| }, |
| { |
| "epoch": 0.9282246879334258, |
| "grad_norm": 2.5937342643737793, |
| "learning_rate": 1e-06, |
| "loss": 0.1466, |
| "step": 2677 |
| }, |
| { |
| "epoch": 0.9285714285714286, |
| "grad_norm": 2.7261319160461426, |
| "learning_rate": 1e-06, |
| "loss": 0.1446, |
| "step": 2678 |
| }, |
| { |
| "epoch": 0.9289181692094314, |
| "grad_norm": 2.406301498413086, |
| "learning_rate": 1e-06, |
| "loss": 0.143, |
| "step": 2679 |
| }, |
| { |
| "epoch": 0.9292649098474342, |
| "grad_norm": 3.284876823425293, |
| "learning_rate": 1e-06, |
| "loss": 0.1859, |
| "step": 2680 |
| }, |
| { |
| "epoch": 0.9296116504854369, |
| "grad_norm": 2.8308169841766357, |
| "learning_rate": 1e-06, |
| "loss": 0.1483, |
| "step": 2681 |
| }, |
| { |
| "epoch": 0.9299583911234397, |
| "grad_norm": 2.309736490249634, |
| "learning_rate": 1e-06, |
| "loss": 0.1341, |
| "step": 2682 |
| }, |
| { |
| "epoch": 0.9303051317614425, |
| "grad_norm": 2.3416709899902344, |
| "learning_rate": 1e-06, |
| "loss": 0.09, |
| "step": 2683 |
| }, |
| { |
| "epoch": 0.9306518723994452, |
| "grad_norm": 2.0100162029266357, |
| "learning_rate": 1e-06, |
| "loss": 0.1185, |
| "step": 2684 |
| }, |
| { |
| "epoch": 0.930998613037448, |
| "grad_norm": 2.5902180671691895, |
| "learning_rate": 1e-06, |
| "loss": 0.1393, |
| "step": 2685 |
| }, |
| { |
| "epoch": 0.9313453536754508, |
| "grad_norm": 4.1081767082214355, |
| "learning_rate": 1e-06, |
| "loss": 0.1468, |
| "step": 2686 |
| }, |
| { |
| "epoch": 0.9316920943134536, |
| "grad_norm": 2.6473231315612793, |
| "learning_rate": 1e-06, |
| "loss": 0.1258, |
| "step": 2687 |
| }, |
| { |
| "epoch": 0.9320388349514563, |
| "grad_norm": 2.6274592876434326, |
| "learning_rate": 1e-06, |
| "loss": 0.1216, |
| "step": 2688 |
| }, |
| { |
| "epoch": 0.9323855755894591, |
| "grad_norm": 2.321573495864868, |
| "learning_rate": 1e-06, |
| "loss": 0.1156, |
| "step": 2689 |
| }, |
| { |
| "epoch": 0.9327323162274619, |
| "grad_norm": 2.83542799949646, |
| "learning_rate": 1e-06, |
| "loss": 0.113, |
| "step": 2690 |
| }, |
| { |
| "epoch": 0.9330790568654647, |
| "grad_norm": 3.1709511280059814, |
| "learning_rate": 1e-06, |
| "loss": 0.1644, |
| "step": 2691 |
| }, |
| { |
| "epoch": 0.9334257975034674, |
| "grad_norm": 1.8596209287643433, |
| "learning_rate": 1e-06, |
| "loss": 0.126, |
| "step": 2692 |
| }, |
| { |
| "epoch": 0.9337725381414702, |
| "grad_norm": 2.7414655685424805, |
| "learning_rate": 1e-06, |
| "loss": 0.1305, |
| "step": 2693 |
| }, |
| { |
| "epoch": 0.934119278779473, |
| "grad_norm": 3.0172135829925537, |
| "learning_rate": 1e-06, |
| "loss": 0.1463, |
| "step": 2694 |
| }, |
| { |
| "epoch": 0.9344660194174758, |
| "grad_norm": 2.8152413368225098, |
| "learning_rate": 1e-06, |
| "loss": 0.1396, |
| "step": 2695 |
| }, |
| { |
| "epoch": 0.9348127600554785, |
| "grad_norm": 2.0890872478485107, |
| "learning_rate": 1e-06, |
| "loss": 0.1034, |
| "step": 2696 |
| }, |
| { |
| "epoch": 0.9351595006934813, |
| "grad_norm": 2.5807907581329346, |
| "learning_rate": 1e-06, |
| "loss": 0.1488, |
| "step": 2697 |
| }, |
| { |
| "epoch": 0.9355062413314841, |
| "grad_norm": 2.900175094604492, |
| "learning_rate": 1e-06, |
| "loss": 0.1427, |
| "step": 2698 |
| }, |
| { |
| "epoch": 0.9358529819694869, |
| "grad_norm": 2.0167062282562256, |
| "learning_rate": 1e-06, |
| "loss": 0.1071, |
| "step": 2699 |
| }, |
| { |
| "epoch": 0.9361997226074896, |
| "grad_norm": 2.26310658454895, |
| "learning_rate": 1e-06, |
| "loss": 0.1393, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.9365464632454924, |
| "grad_norm": 2.191679000854492, |
| "learning_rate": 1e-06, |
| "loss": 0.1299, |
| "step": 2701 |
| }, |
| { |
| "epoch": 0.9368932038834952, |
| "grad_norm": 2.8167102336883545, |
| "learning_rate": 1e-06, |
| "loss": 0.1317, |
| "step": 2702 |
| }, |
| { |
| "epoch": 0.937239944521498, |
| "grad_norm": 2.700887441635132, |
| "learning_rate": 1e-06, |
| "loss": 0.1726, |
| "step": 2703 |
| }, |
| { |
| "epoch": 0.9375866851595007, |
| "grad_norm": 2.511220932006836, |
| "learning_rate": 1e-06, |
| "loss": 0.1579, |
| "step": 2704 |
| }, |
| { |
| "epoch": 0.9379334257975035, |
| "grad_norm": 3.294839859008789, |
| "learning_rate": 1e-06, |
| "loss": 0.142, |
| "step": 2705 |
| }, |
| { |
| "epoch": 0.9382801664355063, |
| "grad_norm": 2.9182393550872803, |
| "learning_rate": 1e-06, |
| "loss": 0.1242, |
| "step": 2706 |
| }, |
| { |
| "epoch": 0.938626907073509, |
| "grad_norm": 1.9845600128173828, |
| "learning_rate": 1e-06, |
| "loss": 0.1103, |
| "step": 2707 |
| }, |
| { |
| "epoch": 0.9389736477115118, |
| "grad_norm": 3.3602540493011475, |
| "learning_rate": 1e-06, |
| "loss": 0.1374, |
| "step": 2708 |
| }, |
| { |
| "epoch": 0.9393203883495146, |
| "grad_norm": 2.03686261177063, |
| "learning_rate": 1e-06, |
| "loss": 0.1203, |
| "step": 2709 |
| }, |
| { |
| "epoch": 0.9396671289875174, |
| "grad_norm": 2.6273975372314453, |
| "learning_rate": 1e-06, |
| "loss": 0.1296, |
| "step": 2710 |
| }, |
| { |
| "epoch": 0.9400138696255201, |
| "grad_norm": 2.5899906158447266, |
| "learning_rate": 1e-06, |
| "loss": 0.1411, |
| "step": 2711 |
| }, |
| { |
| "epoch": 0.9403606102635229, |
| "grad_norm": 2.5806844234466553, |
| "learning_rate": 1e-06, |
| "loss": 0.1371, |
| "step": 2712 |
| }, |
| { |
| "epoch": 0.9407073509015257, |
| "grad_norm": 2.3415279388427734, |
| "learning_rate": 1e-06, |
| "loss": 0.1231, |
| "step": 2713 |
| }, |
| { |
| "epoch": 0.9410540915395285, |
| "grad_norm": 3.5045316219329834, |
| "learning_rate": 1e-06, |
| "loss": 0.1382, |
| "step": 2714 |
| }, |
| { |
| "epoch": 0.9414008321775312, |
| "grad_norm": 2.0843143463134766, |
| "learning_rate": 1e-06, |
| "loss": 0.1078, |
| "step": 2715 |
| }, |
| { |
| "epoch": 0.941747572815534, |
| "grad_norm": 2.7572216987609863, |
| "learning_rate": 1e-06, |
| "loss": 0.1407, |
| "step": 2716 |
| }, |
| { |
| "epoch": 0.9420943134535368, |
| "grad_norm": 2.4735355377197266, |
| "learning_rate": 1e-06, |
| "loss": 0.1305, |
| "step": 2717 |
| }, |
| { |
| "epoch": 0.9424410540915396, |
| "grad_norm": 2.1515815258026123, |
| "learning_rate": 1e-06, |
| "loss": 0.1231, |
| "step": 2718 |
| }, |
| { |
| "epoch": 0.9427877947295423, |
| "grad_norm": 3.494328737258911, |
| "learning_rate": 1e-06, |
| "loss": 0.1017, |
| "step": 2719 |
| }, |
| { |
| "epoch": 0.9431345353675451, |
| "grad_norm": 2.391047239303589, |
| "learning_rate": 1e-06, |
| "loss": 0.125, |
| "step": 2720 |
| }, |
| { |
| "epoch": 0.9434812760055479, |
| "grad_norm": 1.7509092092514038, |
| "learning_rate": 1e-06, |
| "loss": 0.1099, |
| "step": 2721 |
| }, |
| { |
| "epoch": 0.9438280166435506, |
| "grad_norm": 2.7417235374450684, |
| "learning_rate": 1e-06, |
| "loss": 0.1167, |
| "step": 2722 |
| }, |
| { |
| "epoch": 0.9441747572815534, |
| "grad_norm": 2.740318536758423, |
| "learning_rate": 1e-06, |
| "loss": 0.1291, |
| "step": 2723 |
| }, |
| { |
| "epoch": 0.9445214979195562, |
| "grad_norm": 3.362915515899658, |
| "learning_rate": 1e-06, |
| "loss": 0.1361, |
| "step": 2724 |
| }, |
| { |
| "epoch": 0.944868238557559, |
| "grad_norm": 2.27724552154541, |
| "learning_rate": 1e-06, |
| "loss": 0.1218, |
| "step": 2725 |
| }, |
| { |
| "epoch": 0.9452149791955617, |
| "grad_norm": 2.4609215259552, |
| "learning_rate": 1e-06, |
| "loss": 0.1397, |
| "step": 2726 |
| }, |
| { |
| "epoch": 0.9455617198335645, |
| "grad_norm": 3.4139716625213623, |
| "learning_rate": 1e-06, |
| "loss": 0.1238, |
| "step": 2727 |
| }, |
| { |
| "epoch": 0.9459084604715673, |
| "grad_norm": 2.5708959102630615, |
| "learning_rate": 1e-06, |
| "loss": 0.1173, |
| "step": 2728 |
| }, |
| { |
| "epoch": 0.9462552011095701, |
| "grad_norm": 2.1245861053466797, |
| "learning_rate": 1e-06, |
| "loss": 0.1325, |
| "step": 2729 |
| }, |
| { |
| "epoch": 0.9466019417475728, |
| "grad_norm": 3.1224300861358643, |
| "learning_rate": 1e-06, |
| "loss": 0.1674, |
| "step": 2730 |
| }, |
| { |
| "epoch": 0.9469486823855756, |
| "grad_norm": 2.9298243522644043, |
| "learning_rate": 1e-06, |
| "loss": 0.1628, |
| "step": 2731 |
| }, |
| { |
| "epoch": 0.9472954230235784, |
| "grad_norm": 2.5283334255218506, |
| "learning_rate": 1e-06, |
| "loss": 0.1238, |
| "step": 2732 |
| }, |
| { |
| "epoch": 0.9476421636615812, |
| "grad_norm": 1.95565927028656, |
| "learning_rate": 1e-06, |
| "loss": 0.1154, |
| "step": 2733 |
| }, |
| { |
| "epoch": 0.9479889042995839, |
| "grad_norm": 2.800057888031006, |
| "learning_rate": 1e-06, |
| "loss": 0.1088, |
| "step": 2734 |
| }, |
| { |
| "epoch": 0.9483356449375867, |
| "grad_norm": 2.9999020099639893, |
| "learning_rate": 1e-06, |
| "loss": 0.1263, |
| "step": 2735 |
| }, |
| { |
| "epoch": 0.9486823855755895, |
| "grad_norm": 2.755236864089966, |
| "learning_rate": 1e-06, |
| "loss": 0.1282, |
| "step": 2736 |
| }, |
| { |
| "epoch": 0.9490291262135923, |
| "grad_norm": 2.161933422088623, |
| "learning_rate": 1e-06, |
| "loss": 0.1441, |
| "step": 2737 |
| }, |
| { |
| "epoch": 0.949375866851595, |
| "grad_norm": 2.8767547607421875, |
| "learning_rate": 1e-06, |
| "loss": 0.1047, |
| "step": 2738 |
| }, |
| { |
| "epoch": 0.9497226074895978, |
| "grad_norm": 2.172853946685791, |
| "learning_rate": 1e-06, |
| "loss": 0.1201, |
| "step": 2739 |
| }, |
| { |
| "epoch": 0.9500693481276006, |
| "grad_norm": 2.792728900909424, |
| "learning_rate": 1e-06, |
| "loss": 0.1136, |
| "step": 2740 |
| }, |
| { |
| "epoch": 0.9504160887656034, |
| "grad_norm": 2.2738940715789795, |
| "learning_rate": 1e-06, |
| "loss": 0.0994, |
| "step": 2741 |
| }, |
| { |
| "epoch": 0.9507628294036061, |
| "grad_norm": 3.087293863296509, |
| "learning_rate": 1e-06, |
| "loss": 0.1417, |
| "step": 2742 |
| }, |
| { |
| "epoch": 0.9511095700416089, |
| "grad_norm": 2.7275638580322266, |
| "learning_rate": 1e-06, |
| "loss": 0.1498, |
| "step": 2743 |
| }, |
| { |
| "epoch": 0.9514563106796117, |
| "grad_norm": 3.0619494915008545, |
| "learning_rate": 1e-06, |
| "loss": 0.1445, |
| "step": 2744 |
| }, |
| { |
| "epoch": 0.9518030513176144, |
| "grad_norm": 2.5986380577087402, |
| "learning_rate": 1e-06, |
| "loss": 0.1363, |
| "step": 2745 |
| }, |
| { |
| "epoch": 0.9521497919556172, |
| "grad_norm": 3.417404890060425, |
| "learning_rate": 1e-06, |
| "loss": 0.1448, |
| "step": 2746 |
| }, |
| { |
| "epoch": 0.95249653259362, |
| "grad_norm": 4.566203594207764, |
| "learning_rate": 1e-06, |
| "loss": 0.1653, |
| "step": 2747 |
| }, |
| { |
| "epoch": 0.9528432732316228, |
| "grad_norm": 3.027130126953125, |
| "learning_rate": 1e-06, |
| "loss": 0.1543, |
| "step": 2748 |
| }, |
| { |
| "epoch": 0.9531900138696255, |
| "grad_norm": 3.81862211227417, |
| "learning_rate": 1e-06, |
| "loss": 0.1651, |
| "step": 2749 |
| }, |
| { |
| "epoch": 0.9535367545076283, |
| "grad_norm": 2.1442902088165283, |
| "learning_rate": 1e-06, |
| "loss": 0.1094, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.9538834951456311, |
| "grad_norm": 2.5918867588043213, |
| "learning_rate": 1e-06, |
| "loss": 0.1348, |
| "step": 2751 |
| }, |
| { |
| "epoch": 0.9542302357836339, |
| "grad_norm": 2.379676103591919, |
| "learning_rate": 1e-06, |
| "loss": 0.1376, |
| "step": 2752 |
| }, |
| { |
| "epoch": 0.9545769764216366, |
| "grad_norm": 2.339801549911499, |
| "learning_rate": 1e-06, |
| "loss": 0.1239, |
| "step": 2753 |
| }, |
| { |
| "epoch": 0.9549237170596394, |
| "grad_norm": 3.523301839828491, |
| "learning_rate": 1e-06, |
| "loss": 0.1241, |
| "step": 2754 |
| }, |
| { |
| "epoch": 0.9552704576976422, |
| "grad_norm": 5.052760124206543, |
| "learning_rate": 1e-06, |
| "loss": 0.132, |
| "step": 2755 |
| }, |
| { |
| "epoch": 0.955617198335645, |
| "grad_norm": 2.946012020111084, |
| "learning_rate": 1e-06, |
| "loss": 0.1742, |
| "step": 2756 |
| }, |
| { |
| "epoch": 0.9559639389736477, |
| "grad_norm": 3.70878529548645, |
| "learning_rate": 1e-06, |
| "loss": 0.1526, |
| "step": 2757 |
| }, |
| { |
| "epoch": 0.9563106796116505, |
| "grad_norm": 2.7530357837677, |
| "learning_rate": 1e-06, |
| "loss": 0.1757, |
| "step": 2758 |
| }, |
| { |
| "epoch": 0.9566574202496533, |
| "grad_norm": 5.642550945281982, |
| "learning_rate": 1e-06, |
| "loss": 0.1614, |
| "step": 2759 |
| }, |
| { |
| "epoch": 0.957004160887656, |
| "grad_norm": 3.5328824520111084, |
| "learning_rate": 1e-06, |
| "loss": 0.1212, |
| "step": 2760 |
| }, |
| { |
| "epoch": 0.9573509015256588, |
| "grad_norm": 2.6719205379486084, |
| "learning_rate": 1e-06, |
| "loss": 0.136, |
| "step": 2761 |
| }, |
| { |
| "epoch": 0.9576976421636616, |
| "grad_norm": 3.643240213394165, |
| "learning_rate": 1e-06, |
| "loss": 0.1279, |
| "step": 2762 |
| }, |
| { |
| "epoch": 0.9580443828016644, |
| "grad_norm": 3.318526029586792, |
| "learning_rate": 1e-06, |
| "loss": 0.1327, |
| "step": 2763 |
| }, |
| { |
| "epoch": 0.9583911234396671, |
| "grad_norm": 2.845430850982666, |
| "learning_rate": 1e-06, |
| "loss": 0.1291, |
| "step": 2764 |
| }, |
| { |
| "epoch": 0.9587378640776699, |
| "grad_norm": 2.6309032440185547, |
| "learning_rate": 1e-06, |
| "loss": 0.1295, |
| "step": 2765 |
| }, |
| { |
| "epoch": 0.9590846047156727, |
| "grad_norm": 2.3172414302825928, |
| "learning_rate": 1e-06, |
| "loss": 0.1494, |
| "step": 2766 |
| }, |
| { |
| "epoch": 0.9594313453536755, |
| "grad_norm": 2.4579062461853027, |
| "learning_rate": 1e-06, |
| "loss": 0.1252, |
| "step": 2767 |
| }, |
| { |
| "epoch": 0.9597780859916782, |
| "grad_norm": 2.277562379837036, |
| "learning_rate": 1e-06, |
| "loss": 0.127, |
| "step": 2768 |
| }, |
| { |
| "epoch": 0.960124826629681, |
| "grad_norm": 2.561744451522827, |
| "learning_rate": 1e-06, |
| "loss": 0.1183, |
| "step": 2769 |
| }, |
| { |
| "epoch": 0.9604715672676838, |
| "grad_norm": 2.368710517883301, |
| "learning_rate": 1e-06, |
| "loss": 0.1209, |
| "step": 2770 |
| }, |
| { |
| "epoch": 0.9608183079056866, |
| "grad_norm": 2.4350576400756836, |
| "learning_rate": 1e-06, |
| "loss": 0.1615, |
| "step": 2771 |
| }, |
| { |
| "epoch": 0.9611650485436893, |
| "grad_norm": 2.933718204498291, |
| "learning_rate": 1e-06, |
| "loss": 0.1183, |
| "step": 2772 |
| }, |
| { |
| "epoch": 0.9615117891816921, |
| "grad_norm": 2.6752936840057373, |
| "learning_rate": 1e-06, |
| "loss": 0.1351, |
| "step": 2773 |
| }, |
| { |
| "epoch": 0.9618585298196949, |
| "grad_norm": 2.8626444339752197, |
| "learning_rate": 1e-06, |
| "loss": 0.1445, |
| "step": 2774 |
| }, |
| { |
| "epoch": 0.9622052704576977, |
| "grad_norm": 2.7919156551361084, |
| "learning_rate": 1e-06, |
| "loss": 0.1015, |
| "step": 2775 |
| }, |
| { |
| "epoch": 0.9625520110957004, |
| "grad_norm": 2.687023878097534, |
| "learning_rate": 1e-06, |
| "loss": 0.1398, |
| "step": 2776 |
| }, |
| { |
| "epoch": 0.9628987517337032, |
| "grad_norm": 3.4772815704345703, |
| "learning_rate": 1e-06, |
| "loss": 0.1276, |
| "step": 2777 |
| }, |
| { |
| "epoch": 0.963245492371706, |
| "grad_norm": 3.7592716217041016, |
| "learning_rate": 1e-06, |
| "loss": 0.1196, |
| "step": 2778 |
| }, |
| { |
| "epoch": 0.9635922330097088, |
| "grad_norm": 2.2832236289978027, |
| "learning_rate": 1e-06, |
| "loss": 0.1149, |
| "step": 2779 |
| }, |
| { |
| "epoch": 0.9639389736477115, |
| "grad_norm": 2.2533531188964844, |
| "learning_rate": 1e-06, |
| "loss": 0.1361, |
| "step": 2780 |
| }, |
| { |
| "epoch": 0.9642857142857143, |
| "grad_norm": 2.458859920501709, |
| "learning_rate": 1e-06, |
| "loss": 0.1248, |
| "step": 2781 |
| }, |
| { |
| "epoch": 0.9646324549237171, |
| "grad_norm": 2.3881120681762695, |
| "learning_rate": 1e-06, |
| "loss": 0.1365, |
| "step": 2782 |
| }, |
| { |
| "epoch": 0.9649791955617198, |
| "grad_norm": 2.8706626892089844, |
| "learning_rate": 1e-06, |
| "loss": 0.1324, |
| "step": 2783 |
| }, |
| { |
| "epoch": 0.9653259361997226, |
| "grad_norm": 2.1797640323638916, |
| "learning_rate": 1e-06, |
| "loss": 0.1178, |
| "step": 2784 |
| }, |
| { |
| "epoch": 0.9656726768377254, |
| "grad_norm": 3.3081295490264893, |
| "learning_rate": 1e-06, |
| "loss": 0.1058, |
| "step": 2785 |
| }, |
| { |
| "epoch": 0.9660194174757282, |
| "grad_norm": 4.193579196929932, |
| "learning_rate": 1e-06, |
| "loss": 0.1522, |
| "step": 2786 |
| }, |
| { |
| "epoch": 0.9663661581137309, |
| "grad_norm": 2.6109237670898438, |
| "learning_rate": 1e-06, |
| "loss": 0.1091, |
| "step": 2787 |
| }, |
| { |
| "epoch": 0.9667128987517337, |
| "grad_norm": 2.3282434940338135, |
| "learning_rate": 1e-06, |
| "loss": 0.1333, |
| "step": 2788 |
| }, |
| { |
| "epoch": 0.9670596393897365, |
| "grad_norm": 2.578936815261841, |
| "learning_rate": 1e-06, |
| "loss": 0.1161, |
| "step": 2789 |
| }, |
| { |
| "epoch": 0.9674063800277393, |
| "grad_norm": 3.2605860233306885, |
| "learning_rate": 1e-06, |
| "loss": 0.1535, |
| "step": 2790 |
| }, |
| { |
| "epoch": 0.967753120665742, |
| "grad_norm": 2.45969295501709, |
| "learning_rate": 1e-06, |
| "loss": 0.1199, |
| "step": 2791 |
| }, |
| { |
| "epoch": 0.9680998613037448, |
| "grad_norm": 3.5829577445983887, |
| "learning_rate": 1e-06, |
| "loss": 0.1001, |
| "step": 2792 |
| }, |
| { |
| "epoch": 0.9684466019417476, |
| "grad_norm": 2.413280963897705, |
| "learning_rate": 1e-06, |
| "loss": 0.1261, |
| "step": 2793 |
| }, |
| { |
| "epoch": 0.9687933425797504, |
| "grad_norm": 2.541856050491333, |
| "learning_rate": 1e-06, |
| "loss": 0.106, |
| "step": 2794 |
| }, |
| { |
| "epoch": 0.9691400832177531, |
| "grad_norm": 1.7989451885223389, |
| "learning_rate": 1e-06, |
| "loss": 0.1139, |
| "step": 2795 |
| }, |
| { |
| "epoch": 0.9694868238557559, |
| "grad_norm": 5.392777442932129, |
| "learning_rate": 1e-06, |
| "loss": 0.1379, |
| "step": 2796 |
| }, |
| { |
| "epoch": 0.9698335644937587, |
| "grad_norm": 2.561255931854248, |
| "learning_rate": 1e-06, |
| "loss": 0.1301, |
| "step": 2797 |
| }, |
| { |
| "epoch": 0.9701803051317615, |
| "grad_norm": 2.3186416625976562, |
| "learning_rate": 1e-06, |
| "loss": 0.1049, |
| "step": 2798 |
| }, |
| { |
| "epoch": 0.9705270457697642, |
| "grad_norm": 2.255427122116089, |
| "learning_rate": 1e-06, |
| "loss": 0.1084, |
| "step": 2799 |
| }, |
| { |
| "epoch": 0.970873786407767, |
| "grad_norm": 3.2925469875335693, |
| "learning_rate": 1e-06, |
| "loss": 0.1509, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.9712205270457698, |
| "grad_norm": 2.2866995334625244, |
| "learning_rate": 1e-06, |
| "loss": 0.1221, |
| "step": 2801 |
| }, |
| { |
| "epoch": 0.9715672676837726, |
| "grad_norm": 2.409914016723633, |
| "learning_rate": 1e-06, |
| "loss": 0.1279, |
| "step": 2802 |
| }, |
| { |
| "epoch": 0.9719140083217753, |
| "grad_norm": 2.746307849884033, |
| "learning_rate": 1e-06, |
| "loss": 0.1119, |
| "step": 2803 |
| }, |
| { |
| "epoch": 0.9722607489597781, |
| "grad_norm": 2.264927387237549, |
| "learning_rate": 1e-06, |
| "loss": 0.1333, |
| "step": 2804 |
| }, |
| { |
| "epoch": 0.9726074895977809, |
| "grad_norm": 1.9397131204605103, |
| "learning_rate": 1e-06, |
| "loss": 0.0929, |
| "step": 2805 |
| }, |
| { |
| "epoch": 0.9729542302357836, |
| "grad_norm": 4.742498397827148, |
| "learning_rate": 1e-06, |
| "loss": 0.1577, |
| "step": 2806 |
| }, |
| { |
| "epoch": 0.9733009708737864, |
| "grad_norm": 3.283451795578003, |
| "learning_rate": 1e-06, |
| "loss": 0.114, |
| "step": 2807 |
| }, |
| { |
| "epoch": 0.9736477115117892, |
| "grad_norm": 2.6818947792053223, |
| "learning_rate": 1e-06, |
| "loss": 0.1446, |
| "step": 2808 |
| }, |
| { |
| "epoch": 0.973994452149792, |
| "grad_norm": 4.0017476081848145, |
| "learning_rate": 1e-06, |
| "loss": 0.1241, |
| "step": 2809 |
| }, |
| { |
| "epoch": 0.9743411927877947, |
| "grad_norm": 2.343931198120117, |
| "learning_rate": 1e-06, |
| "loss": 0.1157, |
| "step": 2810 |
| }, |
| { |
| "epoch": 0.9746879334257975, |
| "grad_norm": 2.369596004486084, |
| "learning_rate": 1e-06, |
| "loss": 0.1451, |
| "step": 2811 |
| }, |
| { |
| "epoch": 0.9750346740638003, |
| "grad_norm": 2.386434316635132, |
| "learning_rate": 1e-06, |
| "loss": 0.1205, |
| "step": 2812 |
| }, |
| { |
| "epoch": 0.9753814147018031, |
| "grad_norm": 1.8188420534133911, |
| "learning_rate": 1e-06, |
| "loss": 0.0877, |
| "step": 2813 |
| }, |
| { |
| "epoch": 0.9757281553398058, |
| "grad_norm": 2.927330493927002, |
| "learning_rate": 1e-06, |
| "loss": 0.1493, |
| "step": 2814 |
| }, |
| { |
| "epoch": 0.9760748959778086, |
| "grad_norm": 2.0784151554107666, |
| "learning_rate": 1e-06, |
| "loss": 0.1269, |
| "step": 2815 |
| }, |
| { |
| "epoch": 0.9764216366158114, |
| "grad_norm": 2.062995433807373, |
| "learning_rate": 1e-06, |
| "loss": 0.1287, |
| "step": 2816 |
| }, |
| { |
| "epoch": 0.9767683772538142, |
| "grad_norm": 2.0508055686950684, |
| "learning_rate": 1e-06, |
| "loss": 0.0901, |
| "step": 2817 |
| }, |
| { |
| "epoch": 0.9771151178918169, |
| "grad_norm": 3.7478532791137695, |
| "learning_rate": 1e-06, |
| "loss": 0.1304, |
| "step": 2818 |
| }, |
| { |
| "epoch": 0.9774618585298197, |
| "grad_norm": 2.1847145557403564, |
| "learning_rate": 1e-06, |
| "loss": 0.1275, |
| "step": 2819 |
| }, |
| { |
| "epoch": 0.9778085991678225, |
| "grad_norm": 2.580650568008423, |
| "learning_rate": 1e-06, |
| "loss": 0.157, |
| "step": 2820 |
| }, |
| { |
| "epoch": 0.9781553398058253, |
| "grad_norm": 2.9201722145080566, |
| "learning_rate": 1e-06, |
| "loss": 0.1486, |
| "step": 2821 |
| }, |
| { |
| "epoch": 0.978502080443828, |
| "grad_norm": 5.508189678192139, |
| "learning_rate": 1e-06, |
| "loss": 0.1458, |
| "step": 2822 |
| }, |
| { |
| "epoch": 0.9788488210818308, |
| "grad_norm": 3.1913814544677734, |
| "learning_rate": 1e-06, |
| "loss": 0.1162, |
| "step": 2823 |
| }, |
| { |
| "epoch": 0.9791955617198336, |
| "grad_norm": 4.063955783843994, |
| "learning_rate": 1e-06, |
| "loss": 0.159, |
| "step": 2824 |
| }, |
| { |
| "epoch": 0.9795423023578363, |
| "grad_norm": 2.553622245788574, |
| "learning_rate": 1e-06, |
| "loss": 0.1343, |
| "step": 2825 |
| }, |
| { |
| "epoch": 0.9798890429958391, |
| "grad_norm": 2.749812364578247, |
| "learning_rate": 1e-06, |
| "loss": 0.1393, |
| "step": 2826 |
| }, |
| { |
| "epoch": 0.9802357836338419, |
| "grad_norm": 3.2445449829101562, |
| "learning_rate": 1e-06, |
| "loss": 0.1138, |
| "step": 2827 |
| }, |
| { |
| "epoch": 0.9805825242718447, |
| "grad_norm": 2.1383461952209473, |
| "learning_rate": 1e-06, |
| "loss": 0.1053, |
| "step": 2828 |
| }, |
| { |
| "epoch": 0.9809292649098474, |
| "grad_norm": 3.216644763946533, |
| "learning_rate": 1e-06, |
| "loss": 0.0934, |
| "step": 2829 |
| }, |
| { |
| "epoch": 0.9812760055478502, |
| "grad_norm": 2.851259231567383, |
| "learning_rate": 1e-06, |
| "loss": 0.1455, |
| "step": 2830 |
| }, |
| { |
| "epoch": 0.981622746185853, |
| "grad_norm": 3.037447452545166, |
| "learning_rate": 1e-06, |
| "loss": 0.1392, |
| "step": 2831 |
| }, |
| { |
| "epoch": 0.9819694868238558, |
| "grad_norm": 3.316592216491699, |
| "learning_rate": 1e-06, |
| "loss": 0.1772, |
| "step": 2832 |
| }, |
| { |
| "epoch": 0.9823162274618585, |
| "grad_norm": 2.238222599029541, |
| "learning_rate": 1e-06, |
| "loss": 0.1162, |
| "step": 2833 |
| }, |
| { |
| "epoch": 0.9826629680998613, |
| "grad_norm": 2.7036538124084473, |
| "learning_rate": 1e-06, |
| "loss": 0.1116, |
| "step": 2834 |
| }, |
| { |
| "epoch": 0.9830097087378641, |
| "grad_norm": 2.6380455493927, |
| "learning_rate": 1e-06, |
| "loss": 0.0934, |
| "step": 2835 |
| }, |
| { |
| "epoch": 0.9833564493758669, |
| "grad_norm": 2.5659232139587402, |
| "learning_rate": 1e-06, |
| "loss": 0.1418, |
| "step": 2836 |
| }, |
| { |
| "epoch": 0.9837031900138696, |
| "grad_norm": 3.315857172012329, |
| "learning_rate": 1e-06, |
| "loss": 0.16, |
| "step": 2837 |
| }, |
| { |
| "epoch": 0.9840499306518724, |
| "grad_norm": 2.2236742973327637, |
| "learning_rate": 1e-06, |
| "loss": 0.1433, |
| "step": 2838 |
| }, |
| { |
| "epoch": 0.9843966712898752, |
| "grad_norm": 2.794739246368408, |
| "learning_rate": 1e-06, |
| "loss": 0.1276, |
| "step": 2839 |
| }, |
| { |
| "epoch": 0.984743411927878, |
| "grad_norm": 1.8826152086257935, |
| "learning_rate": 1e-06, |
| "loss": 0.1023, |
| "step": 2840 |
| }, |
| { |
| "epoch": 0.9850901525658807, |
| "grad_norm": 2.971917152404785, |
| "learning_rate": 1e-06, |
| "loss": 0.1238, |
| "step": 2841 |
| }, |
| { |
| "epoch": 0.9854368932038835, |
| "grad_norm": 3.801140069961548, |
| "learning_rate": 1e-06, |
| "loss": 0.1215, |
| "step": 2842 |
| }, |
| { |
| "epoch": 0.9857836338418863, |
| "grad_norm": 5.8529276847839355, |
| "learning_rate": 1e-06, |
| "loss": 0.1557, |
| "step": 2843 |
| }, |
| { |
| "epoch": 0.986130374479889, |
| "grad_norm": 3.4524965286254883, |
| "learning_rate": 1e-06, |
| "loss": 0.1246, |
| "step": 2844 |
| }, |
| { |
| "epoch": 0.9864771151178918, |
| "grad_norm": 2.8966329097747803, |
| "learning_rate": 1e-06, |
| "loss": 0.1083, |
| "step": 2845 |
| }, |
| { |
| "epoch": 0.9868238557558946, |
| "grad_norm": 3.0084590911865234, |
| "learning_rate": 1e-06, |
| "loss": 0.1523, |
| "step": 2846 |
| }, |
| { |
| "epoch": 0.9871705963938974, |
| "grad_norm": 2.85929536819458, |
| "learning_rate": 1e-06, |
| "loss": 0.1506, |
| "step": 2847 |
| }, |
| { |
| "epoch": 0.9875173370319001, |
| "grad_norm": 3.1562740802764893, |
| "learning_rate": 1e-06, |
| "loss": 0.1013, |
| "step": 2848 |
| }, |
| { |
| "epoch": 0.9878640776699029, |
| "grad_norm": 2.7616796493530273, |
| "learning_rate": 1e-06, |
| "loss": 0.139, |
| "step": 2849 |
| }, |
| { |
| "epoch": 0.9882108183079057, |
| "grad_norm": 2.781879186630249, |
| "learning_rate": 1e-06, |
| "loss": 0.164, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.9885575589459085, |
| "grad_norm": 2.50597882270813, |
| "learning_rate": 1e-06, |
| "loss": 0.1022, |
| "step": 2851 |
| }, |
| { |
| "epoch": 0.9889042995839112, |
| "grad_norm": 4.0737833976745605, |
| "learning_rate": 1e-06, |
| "loss": 0.1652, |
| "step": 2852 |
| }, |
| { |
| "epoch": 0.989251040221914, |
| "grad_norm": 3.9901700019836426, |
| "learning_rate": 1e-06, |
| "loss": 0.1411, |
| "step": 2853 |
| }, |
| { |
| "epoch": 0.9895977808599168, |
| "grad_norm": 2.159637928009033, |
| "learning_rate": 1e-06, |
| "loss": 0.1058, |
| "step": 2854 |
| }, |
| { |
| "epoch": 0.9899445214979196, |
| "grad_norm": 3.102248430252075, |
| "learning_rate": 1e-06, |
| "loss": 0.136, |
| "step": 2855 |
| }, |
| { |
| "epoch": 0.9902912621359223, |
| "grad_norm": 2.2245311737060547, |
| "learning_rate": 1e-06, |
| "loss": 0.1498, |
| "step": 2856 |
| }, |
| { |
| "epoch": 0.9906380027739251, |
| "grad_norm": 3.013869524002075, |
| "learning_rate": 1e-06, |
| "loss": 0.1276, |
| "step": 2857 |
| }, |
| { |
| "epoch": 0.9909847434119279, |
| "grad_norm": 2.9369828701019287, |
| "learning_rate": 1e-06, |
| "loss": 0.1353, |
| "step": 2858 |
| }, |
| { |
| "epoch": 0.9913314840499307, |
| "grad_norm": 2.4606969356536865, |
| "learning_rate": 1e-06, |
| "loss": 0.1251, |
| "step": 2859 |
| }, |
| { |
| "epoch": 0.9916782246879334, |
| "grad_norm": 2.1041226387023926, |
| "learning_rate": 1e-06, |
| "loss": 0.1168, |
| "step": 2860 |
| }, |
| { |
| "epoch": 0.9920249653259362, |
| "grad_norm": 2.6059367656707764, |
| "learning_rate": 1e-06, |
| "loss": 0.1419, |
| "step": 2861 |
| }, |
| { |
| "epoch": 0.992371705963939, |
| "grad_norm": 2.68571138381958, |
| "learning_rate": 1e-06, |
| "loss": 0.147, |
| "step": 2862 |
| }, |
| { |
| "epoch": 0.9927184466019418, |
| "grad_norm": 2.2065820693969727, |
| "learning_rate": 1e-06, |
| "loss": 0.1141, |
| "step": 2863 |
| }, |
| { |
| "epoch": 0.9930651872399445, |
| "grad_norm": 2.7700083255767822, |
| "learning_rate": 1e-06, |
| "loss": 0.1321, |
| "step": 2864 |
| }, |
| { |
| "epoch": 0.9934119278779473, |
| "grad_norm": 1.815712571144104, |
| "learning_rate": 1e-06, |
| "loss": 0.104, |
| "step": 2865 |
| }, |
| { |
| "epoch": 0.9937586685159501, |
| "grad_norm": 2.593801259994507, |
| "learning_rate": 1e-06, |
| "loss": 0.1425, |
| "step": 2866 |
| }, |
| { |
| "epoch": 0.9941054091539528, |
| "grad_norm": 2.9171457290649414, |
| "learning_rate": 1e-06, |
| "loss": 0.1342, |
| "step": 2867 |
| }, |
| { |
| "epoch": 0.9944521497919556, |
| "grad_norm": 4.014968395233154, |
| "learning_rate": 1e-06, |
| "loss": 0.1385, |
| "step": 2868 |
| }, |
| { |
| "epoch": 0.9947988904299584, |
| "grad_norm": 2.8752143383026123, |
| "learning_rate": 1e-06, |
| "loss": 0.1386, |
| "step": 2869 |
| }, |
| { |
| "epoch": 0.9951456310679612, |
| "grad_norm": 3.8755412101745605, |
| "learning_rate": 1e-06, |
| "loss": 0.1815, |
| "step": 2870 |
| }, |
| { |
| "epoch": 0.9954923717059639, |
| "grad_norm": 2.6354281902313232, |
| "learning_rate": 1e-06, |
| "loss": 0.1115, |
| "step": 2871 |
| }, |
| { |
| "epoch": 0.9958391123439667, |
| "grad_norm": 2.4390392303466797, |
| "learning_rate": 1e-06, |
| "loss": 0.1543, |
| "step": 2872 |
| }, |
| { |
| "epoch": 0.9961858529819695, |
| "grad_norm": 2.396690607070923, |
| "learning_rate": 1e-06, |
| "loss": 0.1096, |
| "step": 2873 |
| }, |
| { |
| "epoch": 0.9965325936199723, |
| "grad_norm": 2.379737615585327, |
| "learning_rate": 1e-06, |
| "loss": 0.1426, |
| "step": 2874 |
| }, |
| { |
| "epoch": 0.996879334257975, |
| "grad_norm": 3.5744974613189697, |
| "learning_rate": 1e-06, |
| "loss": 0.1485, |
| "step": 2875 |
| }, |
| { |
| "epoch": 0.9972260748959778, |
| "grad_norm": 2.513230323791504, |
| "learning_rate": 1e-06, |
| "loss": 0.1598, |
| "step": 2876 |
| }, |
| { |
| "epoch": 0.9975728155339806, |
| "grad_norm": 2.260457992553711, |
| "learning_rate": 1e-06, |
| "loss": 0.1226, |
| "step": 2877 |
| }, |
| { |
| "epoch": 0.9979195561719834, |
| "grad_norm": 2.7690110206604004, |
| "learning_rate": 1e-06, |
| "loss": 0.1366, |
| "step": 2878 |
| }, |
| { |
| "epoch": 0.9982662968099861, |
| "grad_norm": 2.5981392860412598, |
| "learning_rate": 1e-06, |
| "loss": 0.1333, |
| "step": 2879 |
| }, |
| { |
| "epoch": 0.9986130374479889, |
| "grad_norm": 3.166869878768921, |
| "learning_rate": 1e-06, |
| "loss": 0.1061, |
| "step": 2880 |
| }, |
| { |
| "epoch": 0.9989597780859917, |
| "grad_norm": 1.9656025171279907, |
| "learning_rate": 1e-06, |
| "loss": 0.12, |
| "step": 2881 |
| }, |
| { |
| "epoch": 0.9993065187239945, |
| "grad_norm": 2.704719066619873, |
| "learning_rate": 1e-06, |
| "loss": 0.124, |
| "step": 2882 |
| }, |
| { |
| "epoch": 0.9996532593619972, |
| "grad_norm": 2.3944320678710938, |
| "learning_rate": 1e-06, |
| "loss": 0.1216, |
| "step": 2883 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 2.362536907196045, |
| "learning_rate": 1e-06, |
| "loss": 0.1326, |
| "step": 2884 |
| }, |
| { |
| "epoch": 1.0, |
| "step": 2884, |
| "total_flos": 9.774378103106175e+18, |
| "train_loss": 0.16740735881443908, |
| "train_runtime": 15269.0756, |
| "train_samples_per_second": 24.172, |
| "train_steps_per_second": 0.189 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 2884, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 9.774378103106175e+18, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|