| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.6598482349059717, |
| "eval_steps": 500, |
| "global_step": 3000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.00021994941163532388, |
| "grad_norm": 0.7960259914398193, |
| "learning_rate": 8e-05, |
| "loss": 2.2745, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.00043989882327064776, |
| "grad_norm": 0.3960127830505371, |
| "learning_rate": 8e-05, |
| "loss": 1.9155, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0006598482349059716, |
| "grad_norm": 0.3869185745716095, |
| "learning_rate": 8e-05, |
| "loss": 1.8754, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0008797976465412955, |
| "grad_norm": 0.33234134316444397, |
| "learning_rate": 8e-05, |
| "loss": 1.9126, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.0010997470581766194, |
| "grad_norm": 0.3670472502708435, |
| "learning_rate": 8e-05, |
| "loss": 1.9912, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.0013196964698119432, |
| "grad_norm": 0.32942938804626465, |
| "learning_rate": 8e-05, |
| "loss": 1.8746, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.001539645881447267, |
| "grad_norm": 0.3588086664676666, |
| "learning_rate": 8e-05, |
| "loss": 1.9545, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.001759595293082591, |
| "grad_norm": 0.33002492785453796, |
| "learning_rate": 8e-05, |
| "loss": 1.8781, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.001979544704717915, |
| "grad_norm": 0.3024381101131439, |
| "learning_rate": 8e-05, |
| "loss": 1.8859, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.002199494116353239, |
| "grad_norm": 0.3224199712276459, |
| "learning_rate": 8e-05, |
| "loss": 1.829, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.0024194435279885624, |
| "grad_norm": 0.31481102108955383, |
| "learning_rate": 8e-05, |
| "loss": 1.817, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.0026393929396238865, |
| "grad_norm": 0.3078259825706482, |
| "learning_rate": 8e-05, |
| "loss": 1.9631, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.0028593423512592105, |
| "grad_norm": 0.3141743540763855, |
| "learning_rate": 8e-05, |
| "loss": 1.8879, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.003079291762894534, |
| "grad_norm": 0.29574745893478394, |
| "learning_rate": 8e-05, |
| "loss": 1.799, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.003299241174529858, |
| "grad_norm": 0.3095031976699829, |
| "learning_rate": 8e-05, |
| "loss": 1.8741, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.003519190586165182, |
| "grad_norm": 0.28804337978363037, |
| "learning_rate": 8e-05, |
| "loss": 1.9931, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.0037391399978005057, |
| "grad_norm": 0.25137585401535034, |
| "learning_rate": 8e-05, |
| "loss": 1.6762, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.00395908940943583, |
| "grad_norm": 0.28565698862075806, |
| "learning_rate": 8e-05, |
| "loss": 1.8489, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.004179038821071153, |
| "grad_norm": 0.2877500355243683, |
| "learning_rate": 8e-05, |
| "loss": 1.8871, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.004398988232706478, |
| "grad_norm": 0.28803154826164246, |
| "learning_rate": 8e-05, |
| "loss": 1.6956, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.004618937644341801, |
| "grad_norm": 0.32161301374435425, |
| "learning_rate": 8e-05, |
| "loss": 1.8243, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.004838887055977125, |
| "grad_norm": 0.2959391176700592, |
| "learning_rate": 8e-05, |
| "loss": 1.8991, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.005058836467612449, |
| "grad_norm": 0.3021189868450165, |
| "learning_rate": 8e-05, |
| "loss": 1.9975, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.005278785879247773, |
| "grad_norm": 0.2793104946613312, |
| "learning_rate": 8e-05, |
| "loss": 1.8792, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.0054987352908830965, |
| "grad_norm": 0.2658381760120392, |
| "learning_rate": 8e-05, |
| "loss": 1.6467, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.005718684702518421, |
| "grad_norm": 0.2793010175228119, |
| "learning_rate": 8e-05, |
| "loss": 1.7479, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.0059386341141537445, |
| "grad_norm": 0.2800044119358063, |
| "learning_rate": 8e-05, |
| "loss": 1.7885, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.006158583525789068, |
| "grad_norm": 0.2864585220813751, |
| "learning_rate": 8e-05, |
| "loss": 1.9257, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.006378532937424393, |
| "grad_norm": 0.301496684551239, |
| "learning_rate": 8e-05, |
| "loss": 1.8586, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.006598482349059716, |
| "grad_norm": 0.2858293354511261, |
| "learning_rate": 8e-05, |
| "loss": 1.8541, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.00681843176069504, |
| "grad_norm": 0.31271278858184814, |
| "learning_rate": 8e-05, |
| "loss": 1.8774, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.007038381172330364, |
| "grad_norm": 0.30428266525268555, |
| "learning_rate": 8e-05, |
| "loss": 1.8464, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.007258330583965688, |
| "grad_norm": 0.26637139916419983, |
| "learning_rate": 8e-05, |
| "loss": 1.7896, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.007478279995601011, |
| "grad_norm": 0.2802716791629791, |
| "learning_rate": 8e-05, |
| "loss": 1.9534, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.007698229407236336, |
| "grad_norm": 0.35580113530158997, |
| "learning_rate": 8e-05, |
| "loss": 1.8236, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.00791817881887166, |
| "grad_norm": 0.2794848382472992, |
| "learning_rate": 8e-05, |
| "loss": 1.8089, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.008138128230506983, |
| "grad_norm": 0.27942711114883423, |
| "learning_rate": 8e-05, |
| "loss": 1.7725, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.008358077642142307, |
| "grad_norm": 0.2882610857486725, |
| "learning_rate": 8e-05, |
| "loss": 1.8279, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.008578027053777632, |
| "grad_norm": 0.29375842213630676, |
| "learning_rate": 8e-05, |
| "loss": 2.0123, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.008797976465412955, |
| "grad_norm": 0.26120567321777344, |
| "learning_rate": 8e-05, |
| "loss": 1.6731, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.009017925877048279, |
| "grad_norm": 0.25272971391677856, |
| "learning_rate": 8e-05, |
| "loss": 1.5723, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.009237875288683603, |
| "grad_norm": 0.30548569560050964, |
| "learning_rate": 8e-05, |
| "loss": 1.9269, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.009457824700318926, |
| "grad_norm": 0.2714739441871643, |
| "learning_rate": 8e-05, |
| "loss": 1.6715, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.00967777411195425, |
| "grad_norm": 0.3086313009262085, |
| "learning_rate": 8e-05, |
| "loss": 1.8903, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.009897723523589575, |
| "grad_norm": 0.28676554560661316, |
| "learning_rate": 8e-05, |
| "loss": 1.8257, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.010117672935224899, |
| "grad_norm": 0.2898331880569458, |
| "learning_rate": 8e-05, |
| "loss": 1.822, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.010337622346860222, |
| "grad_norm": 0.2887754440307617, |
| "learning_rate": 8e-05, |
| "loss": 1.7629, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.010557571758495546, |
| "grad_norm": 0.28026437759399414, |
| "learning_rate": 8e-05, |
| "loss": 1.8874, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.01077752117013087, |
| "grad_norm": 0.29256439208984375, |
| "learning_rate": 8e-05, |
| "loss": 1.9169, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.010997470581766193, |
| "grad_norm": 0.29388460516929626, |
| "learning_rate": 8e-05, |
| "loss": 1.8341, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.011217419993401518, |
| "grad_norm": 0.29456326365470886, |
| "learning_rate": 8e-05, |
| "loss": 1.7088, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.011437369405036842, |
| "grad_norm": 0.2810533046722412, |
| "learning_rate": 8e-05, |
| "loss": 1.8564, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.011657318816672166, |
| "grad_norm": 0.3049224019050598, |
| "learning_rate": 8e-05, |
| "loss": 2.0114, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.011877268228307489, |
| "grad_norm": 0.347817987203598, |
| "learning_rate": 8e-05, |
| "loss": 1.6936, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.012097217639942813, |
| "grad_norm": 0.28999242186546326, |
| "learning_rate": 8e-05, |
| "loss": 1.5852, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.012317167051578136, |
| "grad_norm": 0.32856103777885437, |
| "learning_rate": 8e-05, |
| "loss": 1.8249, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.012537116463213462, |
| "grad_norm": 0.3450610339641571, |
| "learning_rate": 8e-05, |
| "loss": 1.9309, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.012757065874848785, |
| "grad_norm": 0.27445971965789795, |
| "learning_rate": 8e-05, |
| "loss": 1.8153, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.012977015286484109, |
| "grad_norm": 0.28595077991485596, |
| "learning_rate": 8e-05, |
| "loss": 1.8061, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.013196964698119432, |
| "grad_norm": 0.2909082770347595, |
| "learning_rate": 8e-05, |
| "loss": 1.7793, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.013416914109754756, |
| "grad_norm": 0.28822049498558044, |
| "learning_rate": 8e-05, |
| "loss": 1.7218, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.01363686352139008, |
| "grad_norm": 0.29159948229789734, |
| "learning_rate": 8e-05, |
| "loss": 1.7985, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.013856812933025405, |
| "grad_norm": 0.29802417755126953, |
| "learning_rate": 8e-05, |
| "loss": 1.8903, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.014076762344660728, |
| "grad_norm": 0.29128944873809814, |
| "learning_rate": 8e-05, |
| "loss": 1.7928, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.014296711756296052, |
| "grad_norm": 0.3093227446079254, |
| "learning_rate": 8e-05, |
| "loss": 1.8409, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.014516661167931376, |
| "grad_norm": 0.2688956558704376, |
| "learning_rate": 8e-05, |
| "loss": 1.6136, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.0147366105795667, |
| "grad_norm": 0.316579133272171, |
| "learning_rate": 8e-05, |
| "loss": 1.8153, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.014956559991202023, |
| "grad_norm": 0.30234795808792114, |
| "learning_rate": 8e-05, |
| "loss": 1.7311, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.015176509402837348, |
| "grad_norm": 0.2790556848049164, |
| "learning_rate": 8e-05, |
| "loss": 1.7714, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.015396458814472672, |
| "grad_norm": 0.29012972116470337, |
| "learning_rate": 8e-05, |
| "loss": 1.7528, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.015616408226107995, |
| "grad_norm": 0.28507527709007263, |
| "learning_rate": 8e-05, |
| "loss": 1.6452, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.01583635763774332, |
| "grad_norm": 0.28862133622169495, |
| "learning_rate": 8e-05, |
| "loss": 1.7473, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.016056307049378642, |
| "grad_norm": 0.2726048231124878, |
| "learning_rate": 8e-05, |
| "loss": 1.7519, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.016276256461013966, |
| "grad_norm": 0.26808786392211914, |
| "learning_rate": 8e-05, |
| "loss": 1.6332, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.01649620587264929, |
| "grad_norm": 0.32144519686698914, |
| "learning_rate": 8e-05, |
| "loss": 1.7115, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.016716155284284613, |
| "grad_norm": 0.26930421590805054, |
| "learning_rate": 8e-05, |
| "loss": 1.7854, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.016936104695919937, |
| "grad_norm": 0.29462486505508423, |
| "learning_rate": 8e-05, |
| "loss": 1.6919, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.017156054107555264, |
| "grad_norm": 0.2780003249645233, |
| "learning_rate": 8e-05, |
| "loss": 1.6355, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.017376003519190587, |
| "grad_norm": 0.29219016432762146, |
| "learning_rate": 8e-05, |
| "loss": 1.883, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.01759595293082591, |
| "grad_norm": 0.2893241047859192, |
| "learning_rate": 8e-05, |
| "loss": 1.8548, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.017815902342461234, |
| "grad_norm": 0.283512145280838, |
| "learning_rate": 8e-05, |
| "loss": 1.79, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.018035851754096558, |
| "grad_norm": 0.2679024040699005, |
| "learning_rate": 8e-05, |
| "loss": 1.5866, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.01825580116573188, |
| "grad_norm": 0.2892123758792877, |
| "learning_rate": 8e-05, |
| "loss": 1.9033, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.018475750577367205, |
| "grad_norm": 0.2680201530456543, |
| "learning_rate": 8e-05, |
| "loss": 1.8557, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.01869569998900253, |
| "grad_norm": 0.30922645330429077, |
| "learning_rate": 8e-05, |
| "loss": 1.8885, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.018915649400637852, |
| "grad_norm": 0.2735271751880646, |
| "learning_rate": 8e-05, |
| "loss": 1.8765, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.019135598812273176, |
| "grad_norm": 0.28639712929725647, |
| "learning_rate": 8e-05, |
| "loss": 1.9429, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.0193555482239085, |
| "grad_norm": 0.28437235951423645, |
| "learning_rate": 8e-05, |
| "loss": 1.8405, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.019575497635543827, |
| "grad_norm": 0.276517778635025, |
| "learning_rate": 8e-05, |
| "loss": 1.7496, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.01979544704717915, |
| "grad_norm": 0.273404598236084, |
| "learning_rate": 8e-05, |
| "loss": 1.704, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.020015396458814474, |
| "grad_norm": 0.2707740366458893, |
| "learning_rate": 8e-05, |
| "loss": 1.8274, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.020235345870449797, |
| "grad_norm": 0.26880595088005066, |
| "learning_rate": 8e-05, |
| "loss": 1.7695, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.02045529528208512, |
| "grad_norm": 0.28712528944015503, |
| "learning_rate": 8e-05, |
| "loss": 1.9436, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.020675244693720445, |
| "grad_norm": 0.26633599400520325, |
| "learning_rate": 8e-05, |
| "loss": 1.7877, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.020895194105355768, |
| "grad_norm": 0.2843431532382965, |
| "learning_rate": 8e-05, |
| "loss": 1.8389, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.02111514351699109, |
| "grad_norm": 0.2597465515136719, |
| "learning_rate": 8e-05, |
| "loss": 1.7047, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.021335092928626415, |
| "grad_norm": 0.2804902493953705, |
| "learning_rate": 8e-05, |
| "loss": 1.9375, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.02155504234026174, |
| "grad_norm": 0.2825285792350769, |
| "learning_rate": 8e-05, |
| "loss": 1.8348, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.021774991751897062, |
| "grad_norm": 0.26459112763404846, |
| "learning_rate": 8e-05, |
| "loss": 1.7416, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.021994941163532386, |
| "grad_norm": 0.28523096442222595, |
| "learning_rate": 8e-05, |
| "loss": 1.9202, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.022214890575167713, |
| "grad_norm": 0.2679818570613861, |
| "learning_rate": 8e-05, |
| "loss": 1.6741, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.022434839986803037, |
| "grad_norm": 0.2798464894294739, |
| "learning_rate": 8e-05, |
| "loss": 1.6622, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.02265478939843836, |
| "grad_norm": 0.2826269567012787, |
| "learning_rate": 8e-05, |
| "loss": 1.7577, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.022874738810073684, |
| "grad_norm": 0.3859495222568512, |
| "learning_rate": 8e-05, |
| "loss": 1.9705, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.023094688221709007, |
| "grad_norm": 0.2766650319099426, |
| "learning_rate": 8e-05, |
| "loss": 1.7706, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.02331463763334433, |
| "grad_norm": 0.2804067134857178, |
| "learning_rate": 8e-05, |
| "loss": 1.8007, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.023534587044979655, |
| "grad_norm": 0.27818629145622253, |
| "learning_rate": 8e-05, |
| "loss": 1.7913, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.023754536456614978, |
| "grad_norm": 0.2697458267211914, |
| "learning_rate": 8e-05, |
| "loss": 1.8458, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.023974485868250302, |
| "grad_norm": 0.28805410861968994, |
| "learning_rate": 8e-05, |
| "loss": 1.7543, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.024194435279885625, |
| "grad_norm": 0.28452396392822266, |
| "learning_rate": 8e-05, |
| "loss": 1.8499, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.02441438469152095, |
| "grad_norm": 0.2837978005409241, |
| "learning_rate": 8e-05, |
| "loss": 1.797, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.024634334103156273, |
| "grad_norm": 0.2965853810310364, |
| "learning_rate": 8e-05, |
| "loss": 1.7988, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.0248542835147916, |
| "grad_norm": 0.28529393672943115, |
| "learning_rate": 8e-05, |
| "loss": 1.7886, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.025074232926426923, |
| "grad_norm": 0.285199910402298, |
| "learning_rate": 8e-05, |
| "loss": 1.9112, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.025294182338062247, |
| "grad_norm": 0.286316454410553, |
| "learning_rate": 8e-05, |
| "loss": 1.6735, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.02551413174969757, |
| "grad_norm": 0.2648874819278717, |
| "learning_rate": 8e-05, |
| "loss": 1.5333, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.025734081161332894, |
| "grad_norm": 0.2834017276763916, |
| "learning_rate": 8e-05, |
| "loss": 1.7524, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.025954030572968217, |
| "grad_norm": 0.27846938371658325, |
| "learning_rate": 8e-05, |
| "loss": 1.8448, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.02617397998460354, |
| "grad_norm": 0.3278025984764099, |
| "learning_rate": 8e-05, |
| "loss": 1.9158, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.026393929396238865, |
| "grad_norm": 0.30259498953819275, |
| "learning_rate": 8e-05, |
| "loss": 1.7897, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.026613878807874188, |
| "grad_norm": 0.27566099166870117, |
| "learning_rate": 8e-05, |
| "loss": 1.682, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.026833828219509512, |
| "grad_norm": 0.2959173321723938, |
| "learning_rate": 8e-05, |
| "loss": 1.9032, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.027053777631144835, |
| "grad_norm": 0.29449525475502014, |
| "learning_rate": 8e-05, |
| "loss": 1.6174, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.02727372704278016, |
| "grad_norm": 0.3012568950653076, |
| "learning_rate": 8e-05, |
| "loss": 1.6817, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.027493676454415486, |
| "grad_norm": 0.29086676239967346, |
| "learning_rate": 8e-05, |
| "loss": 1.833, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.02771362586605081, |
| "grad_norm": 0.2756067216396332, |
| "learning_rate": 8e-05, |
| "loss": 1.7807, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.027933575277686133, |
| "grad_norm": 0.3420695662498474, |
| "learning_rate": 8e-05, |
| "loss": 1.8652, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.028153524689321457, |
| "grad_norm": 0.2899749279022217, |
| "learning_rate": 8e-05, |
| "loss": 1.7199, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.02837347410095678, |
| "grad_norm": 0.274718701839447, |
| "learning_rate": 8e-05, |
| "loss": 1.7322, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.028593423512592104, |
| "grad_norm": 0.3784034848213196, |
| "learning_rate": 8e-05, |
| "loss": 1.8917, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.028813372924227428, |
| "grad_norm": 0.2814437448978424, |
| "learning_rate": 8e-05, |
| "loss": 1.726, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.02903332233586275, |
| "grad_norm": 0.287701815366745, |
| "learning_rate": 8e-05, |
| "loss": 1.8166, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.029253271747498075, |
| "grad_norm": 0.28487101197242737, |
| "learning_rate": 8e-05, |
| "loss": 1.7183, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.0294732211591334, |
| "grad_norm": 0.27141597867012024, |
| "learning_rate": 8e-05, |
| "loss": 1.7436, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.029693170570768722, |
| "grad_norm": 0.2708652913570404, |
| "learning_rate": 8e-05, |
| "loss": 1.8116, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.029913119982404045, |
| "grad_norm": 0.2789991796016693, |
| "learning_rate": 8e-05, |
| "loss": 1.7942, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.030133069394039372, |
| "grad_norm": 0.3053725063800812, |
| "learning_rate": 8e-05, |
| "loss": 1.8508, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.030353018805674696, |
| "grad_norm": 0.30432772636413574, |
| "learning_rate": 8e-05, |
| "loss": 1.8129, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.03057296821731002, |
| "grad_norm": 0.2873070240020752, |
| "learning_rate": 8e-05, |
| "loss": 1.8713, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.030792917628945343, |
| "grad_norm": 0.2777135968208313, |
| "learning_rate": 8e-05, |
| "loss": 1.7065, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.031012867040580667, |
| "grad_norm": 0.29774004220962524, |
| "learning_rate": 8e-05, |
| "loss": 1.6471, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.03123281645221599, |
| "grad_norm": 0.2803782522678375, |
| "learning_rate": 8e-05, |
| "loss": 1.6992, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.03145276586385132, |
| "grad_norm": 0.2777007818222046, |
| "learning_rate": 8e-05, |
| "loss": 1.8398, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.03167271527548664, |
| "grad_norm": 0.26938894391059875, |
| "learning_rate": 8e-05, |
| "loss": 1.6082, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.031892664687121965, |
| "grad_norm": 0.2934747338294983, |
| "learning_rate": 8e-05, |
| "loss": 1.6929, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.032112614098757285, |
| "grad_norm": 0.2687772214412689, |
| "learning_rate": 8e-05, |
| "loss": 1.6472, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.03233256351039261, |
| "grad_norm": 0.2758256793022156, |
| "learning_rate": 8e-05, |
| "loss": 1.7128, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.03255251292202793, |
| "grad_norm": 0.26065707206726074, |
| "learning_rate": 8e-05, |
| "loss": 1.7108, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.03277246233366326, |
| "grad_norm": 0.31668898463249207, |
| "learning_rate": 8e-05, |
| "loss": 1.9365, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.03299241174529858, |
| "grad_norm": 0.2915947437286377, |
| "learning_rate": 8e-05, |
| "loss": 1.855, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.033212361156933906, |
| "grad_norm": 0.2741534113883972, |
| "learning_rate": 8e-05, |
| "loss": 1.735, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.033432310568569226, |
| "grad_norm": 0.300800085067749, |
| "learning_rate": 8e-05, |
| "loss": 1.7161, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.03365225998020455, |
| "grad_norm": 0.26691076159477234, |
| "learning_rate": 8e-05, |
| "loss": 1.665, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.03387220939183987, |
| "grad_norm": 0.2605098485946655, |
| "learning_rate": 8e-05, |
| "loss": 1.7288, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.0340921588034752, |
| "grad_norm": 0.2728619873523712, |
| "learning_rate": 8e-05, |
| "loss": 1.7237, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.03431210821511053, |
| "grad_norm": 0.29627877473831177, |
| "learning_rate": 8e-05, |
| "loss": 1.8024, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.03453205762674585, |
| "grad_norm": 0.27106964588165283, |
| "learning_rate": 8e-05, |
| "loss": 1.8166, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.034752007038381175, |
| "grad_norm": 0.26806893944740295, |
| "learning_rate": 8e-05, |
| "loss": 1.7061, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.034971956450016495, |
| "grad_norm": 0.2509767413139343, |
| "learning_rate": 8e-05, |
| "loss": 1.6897, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.03519190586165182, |
| "grad_norm": 0.34342750906944275, |
| "learning_rate": 8e-05, |
| "loss": 1.7151, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.03541185527328714, |
| "grad_norm": 0.27948594093322754, |
| "learning_rate": 8e-05, |
| "loss": 1.6574, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.03563180468492247, |
| "grad_norm": 0.28651687502861023, |
| "learning_rate": 8e-05, |
| "loss": 1.839, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.03585175409655779, |
| "grad_norm": 0.2787701189517975, |
| "learning_rate": 8e-05, |
| "loss": 1.8146, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.036071703508193116, |
| "grad_norm": 0.2596721351146698, |
| "learning_rate": 8e-05, |
| "loss": 1.6088, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.036291652919828436, |
| "grad_norm": 0.2630285322666168, |
| "learning_rate": 8e-05, |
| "loss": 1.6941, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.03651160233146376, |
| "grad_norm": 0.30072465538978577, |
| "learning_rate": 8e-05, |
| "loss": 1.8684, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.03673155174309909, |
| "grad_norm": 0.2789234519004822, |
| "learning_rate": 8e-05, |
| "loss": 1.9136, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.03695150115473441, |
| "grad_norm": 0.25597283244132996, |
| "learning_rate": 8e-05, |
| "loss": 1.669, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.03717145056636974, |
| "grad_norm": 0.30354219675064087, |
| "learning_rate": 8e-05, |
| "loss": 1.7845, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.03739139997800506, |
| "grad_norm": 0.26998043060302734, |
| "learning_rate": 8e-05, |
| "loss": 1.6626, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.037611349389640385, |
| "grad_norm": 0.27418825030326843, |
| "learning_rate": 8e-05, |
| "loss": 1.6444, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.037831298801275705, |
| "grad_norm": 0.2858507037162781, |
| "learning_rate": 8e-05, |
| "loss": 1.8584, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.03805124821291103, |
| "grad_norm": 0.26513633131980896, |
| "learning_rate": 8e-05, |
| "loss": 1.7107, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.03827119762454635, |
| "grad_norm": 0.3162567913532257, |
| "learning_rate": 8e-05, |
| "loss": 1.7153, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.03849114703618168, |
| "grad_norm": 0.28961601853370667, |
| "learning_rate": 8e-05, |
| "loss": 1.8455, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.038711096447817, |
| "grad_norm": 0.29676249623298645, |
| "learning_rate": 8e-05, |
| "loss": 1.9303, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.038931045859452326, |
| "grad_norm": 0.2863664925098419, |
| "learning_rate": 8e-05, |
| "loss": 1.6975, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.03915099527108765, |
| "grad_norm": 0.2715422213077545, |
| "learning_rate": 8e-05, |
| "loss": 1.5472, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.03937094468272297, |
| "grad_norm": 0.2740415036678314, |
| "learning_rate": 8e-05, |
| "loss": 1.7113, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.0395908940943583, |
| "grad_norm": 0.29612302780151367, |
| "learning_rate": 8e-05, |
| "loss": 1.8689, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.03981084350599362, |
| "grad_norm": 0.26745903491973877, |
| "learning_rate": 8e-05, |
| "loss": 1.6076, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.04003079291762895, |
| "grad_norm": 0.296695739030838, |
| "learning_rate": 8e-05, |
| "loss": 1.846, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.04025074232926427, |
| "grad_norm": 0.27626705169677734, |
| "learning_rate": 8e-05, |
| "loss": 1.8103, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.040470691740899595, |
| "grad_norm": 0.2597677409648895, |
| "learning_rate": 8e-05, |
| "loss": 1.6432, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.040690641152534915, |
| "grad_norm": 0.2738899290561676, |
| "learning_rate": 8e-05, |
| "loss": 1.8351, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.04091059056417024, |
| "grad_norm": 0.2683742344379425, |
| "learning_rate": 8e-05, |
| "loss": 1.6453, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.04113053997580556, |
| "grad_norm": 0.28722816705703735, |
| "learning_rate": 8e-05, |
| "loss": 1.7685, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.04135048938744089, |
| "grad_norm": 0.2851015627384186, |
| "learning_rate": 8e-05, |
| "loss": 1.8464, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.04157043879907621, |
| "grad_norm": 0.2630920112133026, |
| "learning_rate": 8e-05, |
| "loss": 1.7176, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.041790388210711536, |
| "grad_norm": 0.2678779661655426, |
| "learning_rate": 8e-05, |
| "loss": 1.671, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.04201033762234686, |
| "grad_norm": 0.27810946106910706, |
| "learning_rate": 8e-05, |
| "loss": 1.6467, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.04223028703398218, |
| "grad_norm": 0.2831014394760132, |
| "learning_rate": 8e-05, |
| "loss": 1.8784, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.04245023644561751, |
| "grad_norm": 0.2643384635448456, |
| "learning_rate": 8e-05, |
| "loss": 1.6239, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.04267018585725283, |
| "grad_norm": 0.27143070101737976, |
| "learning_rate": 8e-05, |
| "loss": 1.8012, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.04289013526888816, |
| "grad_norm": 0.28524088859558105, |
| "learning_rate": 8e-05, |
| "loss": 1.7534, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.04311008468052348, |
| "grad_norm": 0.27226153016090393, |
| "learning_rate": 8e-05, |
| "loss": 1.847, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.043330034092158805, |
| "grad_norm": 0.27042534947395325, |
| "learning_rate": 8e-05, |
| "loss": 1.698, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.043549983503794125, |
| "grad_norm": 0.2673223912715912, |
| "learning_rate": 8e-05, |
| "loss": 1.7825, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.04376993291542945, |
| "grad_norm": 0.26485180854797363, |
| "learning_rate": 8e-05, |
| "loss": 1.7755, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.04398988232706477, |
| "grad_norm": 0.26945164799690247, |
| "learning_rate": 8e-05, |
| "loss": 1.8612, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.0442098317387001, |
| "grad_norm": 0.30337756872177124, |
| "learning_rate": 8e-05, |
| "loss": 1.8556, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.044429781150335426, |
| "grad_norm": 0.26593855023384094, |
| "learning_rate": 8e-05, |
| "loss": 1.7633, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.044649730561970746, |
| "grad_norm": 0.26703208684921265, |
| "learning_rate": 8e-05, |
| "loss": 1.7787, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.04486967997360607, |
| "grad_norm": 0.2799319922924042, |
| "learning_rate": 8e-05, |
| "loss": 1.8946, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.04508962938524139, |
| "grad_norm": 0.261406809091568, |
| "learning_rate": 8e-05, |
| "loss": 1.714, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.04530957879687672, |
| "grad_norm": 0.30923140048980713, |
| "learning_rate": 8e-05, |
| "loss": 1.9953, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.04552952820851204, |
| "grad_norm": 0.28189903497695923, |
| "learning_rate": 8e-05, |
| "loss": 1.8068, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.04574947762014737, |
| "grad_norm": 0.28659504652023315, |
| "learning_rate": 8e-05, |
| "loss": 1.7961, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.04596942703178269, |
| "grad_norm": 0.27828094363212585, |
| "learning_rate": 8e-05, |
| "loss": 1.6398, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.046189376443418015, |
| "grad_norm": 0.2826248109340668, |
| "learning_rate": 8e-05, |
| "loss": 1.8442, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.046409325855053335, |
| "grad_norm": 0.2596709430217743, |
| "learning_rate": 8e-05, |
| "loss": 1.7269, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.04662927526668866, |
| "grad_norm": 0.26883357763290405, |
| "learning_rate": 8e-05, |
| "loss": 1.7396, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.04684922467832398, |
| "grad_norm": 0.2834852933883667, |
| "learning_rate": 8e-05, |
| "loss": 1.6992, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.04706917408995931, |
| "grad_norm": 0.30232125520706177, |
| "learning_rate": 8e-05, |
| "loss": 1.8216, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.047289123501594636, |
| "grad_norm": 0.2887151539325714, |
| "learning_rate": 8e-05, |
| "loss": 1.5633, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.047509072913229956, |
| "grad_norm": 0.27171874046325684, |
| "learning_rate": 8e-05, |
| "loss": 1.8272, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.04772902232486528, |
| "grad_norm": 0.35441088676452637, |
| "learning_rate": 8e-05, |
| "loss": 1.8308, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.047948971736500604, |
| "grad_norm": 0.28351160883903503, |
| "learning_rate": 8e-05, |
| "loss": 1.8697, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.04816892114813593, |
| "grad_norm": 0.26361364126205444, |
| "learning_rate": 8e-05, |
| "loss": 1.7044, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.04838887055977125, |
| "grad_norm": 0.2720041871070862, |
| "learning_rate": 8e-05, |
| "loss": 1.7718, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.04860881997140658, |
| "grad_norm": 0.28131023049354553, |
| "learning_rate": 8e-05, |
| "loss": 1.8066, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.0488287693830419, |
| "grad_norm": 0.2640543580055237, |
| "learning_rate": 8e-05, |
| "loss": 1.69, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.049048718794677225, |
| "grad_norm": 0.26101046800613403, |
| "learning_rate": 8e-05, |
| "loss": 1.6372, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.049268668206312545, |
| "grad_norm": 0.3021651804447174, |
| "learning_rate": 8e-05, |
| "loss": 1.8528, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.04948861761794787, |
| "grad_norm": 0.2655261158943176, |
| "learning_rate": 8e-05, |
| "loss": 1.7406, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.0497085670295832, |
| "grad_norm": 0.2873914837837219, |
| "learning_rate": 8e-05, |
| "loss": 1.7643, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.04992851644121852, |
| "grad_norm": 0.31813880801200867, |
| "learning_rate": 8e-05, |
| "loss": 1.8645, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.050148465852853846, |
| "grad_norm": 0.2996014654636383, |
| "learning_rate": 8e-05, |
| "loss": 1.6685, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.050368415264489166, |
| "grad_norm": 0.2837509512901306, |
| "learning_rate": 8e-05, |
| "loss": 1.9227, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.05058836467612449, |
| "grad_norm": 0.29532885551452637, |
| "learning_rate": 8e-05, |
| "loss": 1.9073, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.050808314087759814, |
| "grad_norm": 0.285295307636261, |
| "learning_rate": 8e-05, |
| "loss": 1.8248, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.05102826349939514, |
| "grad_norm": 0.26331770420074463, |
| "learning_rate": 8e-05, |
| "loss": 1.7146, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.05124821291103046, |
| "grad_norm": 0.24956567585468292, |
| "learning_rate": 8e-05, |
| "loss": 1.5574, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.05146816232266579, |
| "grad_norm": 0.27515965700149536, |
| "learning_rate": 8e-05, |
| "loss": 1.7854, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.05168811173430111, |
| "grad_norm": 0.28268730640411377, |
| "learning_rate": 8e-05, |
| "loss": 1.8294, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.051908061145936435, |
| "grad_norm": 0.25420427322387695, |
| "learning_rate": 8e-05, |
| "loss": 1.6735, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.052128010557571755, |
| "grad_norm": 0.2869463860988617, |
| "learning_rate": 8e-05, |
| "loss": 1.808, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.05234795996920708, |
| "grad_norm": 0.2574792206287384, |
| "learning_rate": 8e-05, |
| "loss": 1.7563, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.05256790938084241, |
| "grad_norm": 0.26652273535728455, |
| "learning_rate": 8e-05, |
| "loss": 1.743, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.05278785879247773, |
| "grad_norm": 0.2956235408782959, |
| "learning_rate": 8e-05, |
| "loss": 1.9169, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.053007808204113056, |
| "grad_norm": 0.274142861366272, |
| "learning_rate": 8e-05, |
| "loss": 1.8321, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.053227757615748376, |
| "grad_norm": 0.27525436878204346, |
| "learning_rate": 8e-05, |
| "loss": 1.8206, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.053447707027383703, |
| "grad_norm": 0.26323091983795166, |
| "learning_rate": 8e-05, |
| "loss": 1.7574, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.053667656439019024, |
| "grad_norm": 0.28554126620292664, |
| "learning_rate": 8e-05, |
| "loss": 1.9293, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.05388760585065435, |
| "grad_norm": 0.2651476562023163, |
| "learning_rate": 8e-05, |
| "loss": 1.808, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.05410755526228967, |
| "grad_norm": 0.27941837906837463, |
| "learning_rate": 8e-05, |
| "loss": 1.7838, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.054327504673925, |
| "grad_norm": 0.26575711369514465, |
| "learning_rate": 8e-05, |
| "loss": 1.6117, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.05454745408556032, |
| "grad_norm": 0.2620556354522705, |
| "learning_rate": 8e-05, |
| "loss": 1.7703, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.054767403497195645, |
| "grad_norm": 0.2782936990261078, |
| "learning_rate": 8e-05, |
| "loss": 1.753, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.05498735290883097, |
| "grad_norm": 0.28347843885421753, |
| "learning_rate": 8e-05, |
| "loss": 1.8365, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.05520730232046629, |
| "grad_norm": 0.2740314304828644, |
| "learning_rate": 8e-05, |
| "loss": 1.7448, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.05542725173210162, |
| "grad_norm": 0.2779199779033661, |
| "learning_rate": 8e-05, |
| "loss": 1.8025, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.05564720114373694, |
| "grad_norm": 0.27700838446617126, |
| "learning_rate": 8e-05, |
| "loss": 1.6368, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.055867150555372266, |
| "grad_norm": 0.2753797173500061, |
| "learning_rate": 8e-05, |
| "loss": 1.7058, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.056087099967007586, |
| "grad_norm": 0.2677604556083679, |
| "learning_rate": 8e-05, |
| "loss": 1.772, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.056307049378642914, |
| "grad_norm": 0.291358083486557, |
| "learning_rate": 8e-05, |
| "loss": 1.7229, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.056526998790278234, |
| "grad_norm": 0.2605611979961395, |
| "learning_rate": 8e-05, |
| "loss": 1.6654, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.05674694820191356, |
| "grad_norm": 0.2726796865463257, |
| "learning_rate": 8e-05, |
| "loss": 1.8524, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.05696689761354888, |
| "grad_norm": 0.2769307494163513, |
| "learning_rate": 8e-05, |
| "loss": 1.913, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.05718684702518421, |
| "grad_norm": 0.27163514494895935, |
| "learning_rate": 8e-05, |
| "loss": 1.7076, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.057406796436819535, |
| "grad_norm": 0.27037522196769714, |
| "learning_rate": 8e-05, |
| "loss": 1.7461, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.057626745848454855, |
| "grad_norm": 0.2570153772830963, |
| "learning_rate": 8e-05, |
| "loss": 1.6714, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.05784669526009018, |
| "grad_norm": 0.2802227735519409, |
| "learning_rate": 8e-05, |
| "loss": 1.6782, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.0580666446717255, |
| "grad_norm": 0.293969064950943, |
| "learning_rate": 8e-05, |
| "loss": 1.6253, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.05828659408336083, |
| "grad_norm": 0.28199446201324463, |
| "learning_rate": 8e-05, |
| "loss": 1.791, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.05850654349499615, |
| "grad_norm": 0.3037835657596588, |
| "learning_rate": 8e-05, |
| "loss": 1.8553, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.058726492906631476, |
| "grad_norm": 0.2814860939979553, |
| "learning_rate": 8e-05, |
| "loss": 1.7237, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.0589464423182668, |
| "grad_norm": 0.29769864678382874, |
| "learning_rate": 8e-05, |
| "loss": 1.8635, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.059166391729902124, |
| "grad_norm": 0.26650169491767883, |
| "learning_rate": 8e-05, |
| "loss": 1.8173, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.059386341141537444, |
| "grad_norm": 0.29682958126068115, |
| "learning_rate": 8e-05, |
| "loss": 1.6548, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.05960629055317277, |
| "grad_norm": 0.2702498137950897, |
| "learning_rate": 8e-05, |
| "loss": 1.6022, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.05982623996480809, |
| "grad_norm": 0.2940424680709839, |
| "learning_rate": 8e-05, |
| "loss": 1.7955, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.06004618937644342, |
| "grad_norm": 0.2655317485332489, |
| "learning_rate": 8e-05, |
| "loss": 1.786, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.060266138788078745, |
| "grad_norm": 0.28093400597572327, |
| "learning_rate": 8e-05, |
| "loss": 1.9798, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.060486088199714065, |
| "grad_norm": 0.2635514736175537, |
| "learning_rate": 8e-05, |
| "loss": 1.6737, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.06070603761134939, |
| "grad_norm": 0.2648226320743561, |
| "learning_rate": 8e-05, |
| "loss": 1.8771, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.06092598702298471, |
| "grad_norm": 0.2934603691101074, |
| "learning_rate": 8e-05, |
| "loss": 1.4751, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.06114593643462004, |
| "grad_norm": 0.26369500160217285, |
| "learning_rate": 8e-05, |
| "loss": 1.7832, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.06136588584625536, |
| "grad_norm": 0.26159989833831787, |
| "learning_rate": 8e-05, |
| "loss": 1.7276, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.061585835257890686, |
| "grad_norm": 0.2826705873012543, |
| "learning_rate": 8e-05, |
| "loss": 1.8767, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.06180578466952601, |
| "grad_norm": 0.2911459505558014, |
| "learning_rate": 8e-05, |
| "loss": 1.7795, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.062025734081161334, |
| "grad_norm": 0.27846869826316833, |
| "learning_rate": 8e-05, |
| "loss": 1.838, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.062245683492796654, |
| "grad_norm": 0.33195585012435913, |
| "learning_rate": 8e-05, |
| "loss": 1.8576, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.06246563290443198, |
| "grad_norm": 0.26306337118148804, |
| "learning_rate": 8e-05, |
| "loss": 1.7202, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.0626855823160673, |
| "grad_norm": 0.2703022360801697, |
| "learning_rate": 8e-05, |
| "loss": 1.6962, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.06290553172770263, |
| "grad_norm": 0.2754605710506439, |
| "learning_rate": 8e-05, |
| "loss": 1.6468, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.06312548113933796, |
| "grad_norm": 0.2995694577693939, |
| "learning_rate": 8e-05, |
| "loss": 1.9298, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.06334543055097328, |
| "grad_norm": 0.27501800656318665, |
| "learning_rate": 8e-05, |
| "loss": 1.8152, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.0635653799626086, |
| "grad_norm": 0.2668202519416809, |
| "learning_rate": 8e-05, |
| "loss": 1.8809, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.06378532937424393, |
| "grad_norm": 0.26209571957588196, |
| "learning_rate": 8e-05, |
| "loss": 1.4927, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.06400527878587925, |
| "grad_norm": 0.35276591777801514, |
| "learning_rate": 8e-05, |
| "loss": 1.9654, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.06422522819751457, |
| "grad_norm": 0.26070040464401245, |
| "learning_rate": 8e-05, |
| "loss": 1.7332, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.06444517760914989, |
| "grad_norm": 0.26518604159355164, |
| "learning_rate": 8e-05, |
| "loss": 1.6867, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.06466512702078522, |
| "grad_norm": 0.28992095589637756, |
| "learning_rate": 8e-05, |
| "loss": 1.7498, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.06488507643242054, |
| "grad_norm": 0.27465108036994934, |
| "learning_rate": 8e-05, |
| "loss": 1.6095, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.06510502584405586, |
| "grad_norm": 0.2841359078884125, |
| "learning_rate": 8e-05, |
| "loss": 1.6869, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.0653249752556912, |
| "grad_norm": 0.28873759508132935, |
| "learning_rate": 8e-05, |
| "loss": 1.7954, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.06554492466732652, |
| "grad_norm": 0.2542605698108673, |
| "learning_rate": 8e-05, |
| "loss": 1.6075, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.06576487407896184, |
| "grad_norm": 0.270823210477829, |
| "learning_rate": 8e-05, |
| "loss": 1.7238, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.06598482349059716, |
| "grad_norm": 0.2610267102718353, |
| "learning_rate": 8e-05, |
| "loss": 1.697, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.06620477290223249, |
| "grad_norm": 0.28088685870170593, |
| "learning_rate": 8e-05, |
| "loss": 1.6806, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.06642472231386781, |
| "grad_norm": 0.2656930088996887, |
| "learning_rate": 8e-05, |
| "loss": 1.8744, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.06664467172550313, |
| "grad_norm": 0.2721637189388275, |
| "learning_rate": 8e-05, |
| "loss": 1.6903, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.06686462113713845, |
| "grad_norm": 0.2612883746623993, |
| "learning_rate": 8e-05, |
| "loss": 1.7444, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.06708457054877379, |
| "grad_norm": 0.2533530592918396, |
| "learning_rate": 8e-05, |
| "loss": 1.6427, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.0673045199604091, |
| "grad_norm": 0.27200043201446533, |
| "learning_rate": 8e-05, |
| "loss": 1.769, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.06752446937204443, |
| "grad_norm": 0.2626403272151947, |
| "learning_rate": 8e-05, |
| "loss": 1.64, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.06774441878367975, |
| "grad_norm": 0.3720408082008362, |
| "learning_rate": 8e-05, |
| "loss": 1.9055, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.06796436819531508, |
| "grad_norm": 0.2745527923107147, |
| "learning_rate": 8e-05, |
| "loss": 1.7844, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.0681843176069504, |
| "grad_norm": 0.2568323612213135, |
| "learning_rate": 8e-05, |
| "loss": 1.6728, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.06840426701858572, |
| "grad_norm": 0.2704140543937683, |
| "learning_rate": 8e-05, |
| "loss": 1.7685, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.06862421643022105, |
| "grad_norm": 0.27828502655029297, |
| "learning_rate": 8e-05, |
| "loss": 1.7957, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.06884416584185638, |
| "grad_norm": 0.2951858341693878, |
| "learning_rate": 8e-05, |
| "loss": 1.7709, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.0690641152534917, |
| "grad_norm": 0.2756475806236267, |
| "learning_rate": 8e-05, |
| "loss": 1.6348, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.06928406466512702, |
| "grad_norm": 0.2913607954978943, |
| "learning_rate": 8e-05, |
| "loss": 1.7888, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.06950401407676235, |
| "grad_norm": 0.2798636853694916, |
| "learning_rate": 8e-05, |
| "loss": 1.7806, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.06972396348839767, |
| "grad_norm": 0.27596554160118103, |
| "learning_rate": 8e-05, |
| "loss": 1.7458, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.06994391290003299, |
| "grad_norm": 0.26655322313308716, |
| "learning_rate": 8e-05, |
| "loss": 1.5985, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.07016386231166831, |
| "grad_norm": 0.2731332778930664, |
| "learning_rate": 8e-05, |
| "loss": 1.5995, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.07038381172330364, |
| "grad_norm": 0.2769210934638977, |
| "learning_rate": 8e-05, |
| "loss": 1.6748, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.07060376113493896, |
| "grad_norm": 0.290889173746109, |
| "learning_rate": 8e-05, |
| "loss": 1.9427, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.07082371054657428, |
| "grad_norm": 0.2911258339881897, |
| "learning_rate": 8e-05, |
| "loss": 1.7723, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.07104365995820962, |
| "grad_norm": 0.301992267370224, |
| "learning_rate": 8e-05, |
| "loss": 1.7772, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.07126360936984494, |
| "grad_norm": 0.3023516535758972, |
| "learning_rate": 8e-05, |
| "loss": 1.8363, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.07148355878148026, |
| "grad_norm": 0.3058542013168335, |
| "learning_rate": 8e-05, |
| "loss": 1.8762, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.07170350819311558, |
| "grad_norm": 0.3215092718601227, |
| "learning_rate": 8e-05, |
| "loss": 1.7265, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.07192345760475091, |
| "grad_norm": 0.2762998342514038, |
| "learning_rate": 8e-05, |
| "loss": 1.6361, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.07214340701638623, |
| "grad_norm": 0.258635014295578, |
| "learning_rate": 8e-05, |
| "loss": 1.7031, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.07236335642802155, |
| "grad_norm": 0.27160710096359253, |
| "learning_rate": 8e-05, |
| "loss": 1.6759, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.07258330583965687, |
| "grad_norm": 0.31089314818382263, |
| "learning_rate": 8e-05, |
| "loss": 1.8141, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.0728032552512922, |
| "grad_norm": 0.3026575744152069, |
| "learning_rate": 8e-05, |
| "loss": 1.9513, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.07302320466292753, |
| "grad_norm": 0.2692122161388397, |
| "learning_rate": 8e-05, |
| "loss": 1.8277, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.07324315407456285, |
| "grad_norm": 0.27460286021232605, |
| "learning_rate": 8e-05, |
| "loss": 1.6426, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.07346310348619818, |
| "grad_norm": 0.2557325065135956, |
| "learning_rate": 8e-05, |
| "loss": 1.6418, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.0736830528978335, |
| "grad_norm": 0.28074318170547485, |
| "learning_rate": 8e-05, |
| "loss": 1.79, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.07390300230946882, |
| "grad_norm": 0.28538671135902405, |
| "learning_rate": 8e-05, |
| "loss": 1.7363, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.07412295172110414, |
| "grad_norm": 0.27379995584487915, |
| "learning_rate": 8e-05, |
| "loss": 1.7881, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.07434290113273948, |
| "grad_norm": 0.2628316283226013, |
| "learning_rate": 8e-05, |
| "loss": 1.745, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.0745628505443748, |
| "grad_norm": 0.2573058009147644, |
| "learning_rate": 8e-05, |
| "loss": 1.7997, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.07478279995601012, |
| "grad_norm": 0.31905651092529297, |
| "learning_rate": 8e-05, |
| "loss": 1.8125, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.07500274936764544, |
| "grad_norm": 0.2501446604728699, |
| "learning_rate": 8e-05, |
| "loss": 1.557, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.07522269877928077, |
| "grad_norm": 0.26969289779663086, |
| "learning_rate": 8e-05, |
| "loss": 1.7819, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.07544264819091609, |
| "grad_norm": 0.28457415103912354, |
| "learning_rate": 8e-05, |
| "loss": 1.7682, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.07566259760255141, |
| "grad_norm": 0.27833452820777893, |
| "learning_rate": 8e-05, |
| "loss": 1.8436, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.07588254701418674, |
| "grad_norm": 0.2574867010116577, |
| "learning_rate": 8e-05, |
| "loss": 1.7196, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.07610249642582206, |
| "grad_norm": 0.30035245418548584, |
| "learning_rate": 8e-05, |
| "loss": 1.7159, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.07632244583745738, |
| "grad_norm": 0.284169465303421, |
| "learning_rate": 8e-05, |
| "loss": 1.7238, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.0765423952490927, |
| "grad_norm": 0.257168173789978, |
| "learning_rate": 8e-05, |
| "loss": 1.8531, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.07676234466072804, |
| "grad_norm": 0.2611413300037384, |
| "learning_rate": 8e-05, |
| "loss": 1.7753, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.07698229407236336, |
| "grad_norm": 0.26592132449150085, |
| "learning_rate": 8e-05, |
| "loss": 1.7557, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.07720224348399868, |
| "grad_norm": 0.27427396178245544, |
| "learning_rate": 8e-05, |
| "loss": 1.8699, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.077422192895634, |
| "grad_norm": 0.27014485001564026, |
| "learning_rate": 8e-05, |
| "loss": 1.816, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.07764214230726933, |
| "grad_norm": 0.27720019221305847, |
| "learning_rate": 8e-05, |
| "loss": 1.9601, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.07786209171890465, |
| "grad_norm": 0.3222314417362213, |
| "learning_rate": 8e-05, |
| "loss": 1.6726, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.07808204113053997, |
| "grad_norm": 0.2675410211086273, |
| "learning_rate": 8e-05, |
| "loss": 1.7113, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.0783019905421753, |
| "grad_norm": 0.2902251183986664, |
| "learning_rate": 8e-05, |
| "loss": 1.7734, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.07852193995381063, |
| "grad_norm": 0.2985514998435974, |
| "learning_rate": 8e-05, |
| "loss": 1.9182, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.07874188936544595, |
| "grad_norm": 0.30351343750953674, |
| "learning_rate": 8e-05, |
| "loss": 1.7795, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.07896183877708127, |
| "grad_norm": 0.2885829210281372, |
| "learning_rate": 8e-05, |
| "loss": 1.8054, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.0791817881887166, |
| "grad_norm": 0.273366242647171, |
| "learning_rate": 8e-05, |
| "loss": 1.7903, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.07940173760035192, |
| "grad_norm": 0.2959200441837311, |
| "learning_rate": 8e-05, |
| "loss": 1.9163, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.07962168701198724, |
| "grad_norm": 0.2587856948375702, |
| "learning_rate": 8e-05, |
| "loss": 1.5969, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.07984163642362256, |
| "grad_norm": 0.27777665853500366, |
| "learning_rate": 8e-05, |
| "loss": 1.8769, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.0800615858352579, |
| "grad_norm": 0.2635156512260437, |
| "learning_rate": 8e-05, |
| "loss": 1.8236, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.08028153524689322, |
| "grad_norm": 0.26534774899482727, |
| "learning_rate": 8e-05, |
| "loss": 1.6824, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.08050148465852854, |
| "grad_norm": 0.26372772455215454, |
| "learning_rate": 8e-05, |
| "loss": 1.517, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.08072143407016386, |
| "grad_norm": 0.2707895338535309, |
| "learning_rate": 8e-05, |
| "loss": 1.6757, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.08094138348179919, |
| "grad_norm": 0.2712070345878601, |
| "learning_rate": 8e-05, |
| "loss": 1.7261, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.08116133289343451, |
| "grad_norm": 0.2870525121688843, |
| "learning_rate": 8e-05, |
| "loss": 1.6337, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.08138128230506983, |
| "grad_norm": 0.30548396706581116, |
| "learning_rate": 8e-05, |
| "loss": 1.8733, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.08160123171670516, |
| "grad_norm": 0.2853962182998657, |
| "learning_rate": 8e-05, |
| "loss": 1.7938, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.08182118112834048, |
| "grad_norm": 0.2716579735279083, |
| "learning_rate": 8e-05, |
| "loss": 1.6733, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.0820411305399758, |
| "grad_norm": 0.3110131025314331, |
| "learning_rate": 8e-05, |
| "loss": 1.8554, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.08226107995161112, |
| "grad_norm": 0.28003835678100586, |
| "learning_rate": 8e-05, |
| "loss": 1.8032, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.08248102936324646, |
| "grad_norm": 0.28504347801208496, |
| "learning_rate": 8e-05, |
| "loss": 1.942, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.08270097877488178, |
| "grad_norm": 0.2593232989311218, |
| "learning_rate": 8e-05, |
| "loss": 1.4993, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.0829209281865171, |
| "grad_norm": 0.35680094361305237, |
| "learning_rate": 8e-05, |
| "loss": 1.8997, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.08314087759815242, |
| "grad_norm": 0.2747777998447418, |
| "learning_rate": 8e-05, |
| "loss": 1.7364, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.08336082700978775, |
| "grad_norm": 0.26816287636756897, |
| "learning_rate": 8e-05, |
| "loss": 1.7011, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.08358077642142307, |
| "grad_norm": 0.31877851486206055, |
| "learning_rate": 8e-05, |
| "loss": 1.6131, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.08380072583305839, |
| "grad_norm": 0.2845601737499237, |
| "learning_rate": 8e-05, |
| "loss": 1.6544, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.08402067524469373, |
| "grad_norm": 0.27758803963661194, |
| "learning_rate": 8e-05, |
| "loss": 1.8891, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.08424062465632905, |
| "grad_norm": 0.2832657992839813, |
| "learning_rate": 8e-05, |
| "loss": 1.7505, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.08446057406796437, |
| "grad_norm": 0.2901705801486969, |
| "learning_rate": 8e-05, |
| "loss": 1.7501, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.08468052347959969, |
| "grad_norm": 0.31189531087875366, |
| "learning_rate": 8e-05, |
| "loss": 1.8132, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.08490047289123502, |
| "grad_norm": 0.27582603693008423, |
| "learning_rate": 8e-05, |
| "loss": 1.7693, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.08512042230287034, |
| "grad_norm": 0.3030100464820862, |
| "learning_rate": 8e-05, |
| "loss": 1.7327, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.08534037171450566, |
| "grad_norm": 0.26879045367240906, |
| "learning_rate": 8e-05, |
| "loss": 1.6614, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.08556032112614098, |
| "grad_norm": 0.29507508873939514, |
| "learning_rate": 8e-05, |
| "loss": 1.9483, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.08578027053777632, |
| "grad_norm": 0.27386122941970825, |
| "learning_rate": 8e-05, |
| "loss": 1.8974, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.08600021994941164, |
| "grad_norm": 0.27103161811828613, |
| "learning_rate": 8e-05, |
| "loss": 1.7579, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.08622016936104696, |
| "grad_norm": 0.3045141100883484, |
| "learning_rate": 8e-05, |
| "loss": 1.8175, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.08644011877268229, |
| "grad_norm": 0.29032695293426514, |
| "learning_rate": 8e-05, |
| "loss": 1.7493, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.08666006818431761, |
| "grad_norm": 0.27853158116340637, |
| "learning_rate": 8e-05, |
| "loss": 1.7297, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.08688001759595293, |
| "grad_norm": 0.3007650375366211, |
| "learning_rate": 8e-05, |
| "loss": 1.6736, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.08709996700758825, |
| "grad_norm": 0.28009670972824097, |
| "learning_rate": 8e-05, |
| "loss": 1.9539, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.08731991641922358, |
| "grad_norm": 0.2512955665588379, |
| "learning_rate": 8e-05, |
| "loss": 1.6362, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.0875398658308589, |
| "grad_norm": 0.297489732503891, |
| "learning_rate": 8e-05, |
| "loss": 1.9097, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.08775981524249422, |
| "grad_norm": 0.2735532522201538, |
| "learning_rate": 8e-05, |
| "loss": 1.8348, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.08797976465412954, |
| "grad_norm": 0.2559053897857666, |
| "learning_rate": 8e-05, |
| "loss": 1.685, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.08819971406576488, |
| "grad_norm": 0.27982097864151, |
| "learning_rate": 8e-05, |
| "loss": 1.6801, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.0884196634774002, |
| "grad_norm": 0.26066988706588745, |
| "learning_rate": 8e-05, |
| "loss": 1.7732, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.08863961288903552, |
| "grad_norm": 0.26763463020324707, |
| "learning_rate": 8e-05, |
| "loss": 1.7214, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.08885956230067085, |
| "grad_norm": 0.2795925736427307, |
| "learning_rate": 8e-05, |
| "loss": 1.8387, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.08907951171230617, |
| "grad_norm": 0.266305148601532, |
| "learning_rate": 8e-05, |
| "loss": 1.6515, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.08929946112394149, |
| "grad_norm": 0.27049583196640015, |
| "learning_rate": 8e-05, |
| "loss": 1.7824, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.08951941053557681, |
| "grad_norm": 0.2959458529949188, |
| "learning_rate": 8e-05, |
| "loss": 1.8766, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.08973935994721215, |
| "grad_norm": 0.28563347458839417, |
| "learning_rate": 8e-05, |
| "loss": 1.8618, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.08995930935884747, |
| "grad_norm": 0.2840110659599304, |
| "learning_rate": 8e-05, |
| "loss": 1.6834, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.09017925877048279, |
| "grad_norm": 0.25303247570991516, |
| "learning_rate": 8e-05, |
| "loss": 1.6477, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.09039920818211811, |
| "grad_norm": 0.27236899733543396, |
| "learning_rate": 8e-05, |
| "loss": 1.7004, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.09061915759375344, |
| "grad_norm": 0.2795659899711609, |
| "learning_rate": 8e-05, |
| "loss": 1.7492, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.09083910700538876, |
| "grad_norm": 0.26019132137298584, |
| "learning_rate": 8e-05, |
| "loss": 1.691, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.09105905641702408, |
| "grad_norm": 0.26624274253845215, |
| "learning_rate": 8e-05, |
| "loss": 1.7001, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.09127900582865942, |
| "grad_norm": 0.2661585509777069, |
| "learning_rate": 8e-05, |
| "loss": 1.6762, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.09149895524029474, |
| "grad_norm": 0.2719002068042755, |
| "learning_rate": 8e-05, |
| "loss": 1.6915, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.09171890465193006, |
| "grad_norm": 0.24670244753360748, |
| "learning_rate": 8e-05, |
| "loss": 1.5598, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.09193885406356538, |
| "grad_norm": 0.2550405263900757, |
| "learning_rate": 8e-05, |
| "loss": 1.4817, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.09215880347520071, |
| "grad_norm": 0.26272761821746826, |
| "learning_rate": 8e-05, |
| "loss": 1.7016, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.09237875288683603, |
| "grad_norm": 0.2673632502555847, |
| "learning_rate": 8e-05, |
| "loss": 1.7626, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.09259870229847135, |
| "grad_norm": 0.25949448347091675, |
| "learning_rate": 8e-05, |
| "loss": 1.6273, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.09281865171010667, |
| "grad_norm": 0.27953028678894043, |
| "learning_rate": 8e-05, |
| "loss": 1.8843, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.093038601121742, |
| "grad_norm": 0.2534630298614502, |
| "learning_rate": 8e-05, |
| "loss": 1.7305, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.09325855053337732, |
| "grad_norm": 0.2573072910308838, |
| "learning_rate": 8e-05, |
| "loss": 1.6397, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.09347849994501264, |
| "grad_norm": 0.2604135572910309, |
| "learning_rate": 8e-05, |
| "loss": 1.6696, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.09369844935664796, |
| "grad_norm": 0.25805628299713135, |
| "learning_rate": 8e-05, |
| "loss": 1.6441, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.0939183987682833, |
| "grad_norm": 0.2935563027858734, |
| "learning_rate": 8e-05, |
| "loss": 1.6475, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.09413834817991862, |
| "grad_norm": 0.25222933292388916, |
| "learning_rate": 8e-05, |
| "loss": 1.727, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.09435829759155394, |
| "grad_norm": 0.2593076527118683, |
| "learning_rate": 8e-05, |
| "loss": 1.7066, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.09457824700318927, |
| "grad_norm": 0.25259336829185486, |
| "learning_rate": 8e-05, |
| "loss": 1.6821, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.09479819641482459, |
| "grad_norm": 0.2512541115283966, |
| "learning_rate": 8e-05, |
| "loss": 1.5923, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.09501814582645991, |
| "grad_norm": 0.2711183726787567, |
| "learning_rate": 8e-05, |
| "loss": 1.755, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.09523809523809523, |
| "grad_norm": 0.2782961130142212, |
| "learning_rate": 8e-05, |
| "loss": 1.8914, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.09545804464973057, |
| "grad_norm": 0.25964146852493286, |
| "learning_rate": 8e-05, |
| "loss": 1.6588, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.09567799406136589, |
| "grad_norm": 0.27077510952949524, |
| "learning_rate": 8e-05, |
| "loss": 1.753, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.09589794347300121, |
| "grad_norm": 0.2923937141895294, |
| "learning_rate": 8e-05, |
| "loss": 1.8218, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.09611789288463653, |
| "grad_norm": 0.2513190805912018, |
| "learning_rate": 8e-05, |
| "loss": 1.6232, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.09633784229627186, |
| "grad_norm": 0.28531181812286377, |
| "learning_rate": 8e-05, |
| "loss": 1.7199, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.09655779170790718, |
| "grad_norm": 0.302020400762558, |
| "learning_rate": 8e-05, |
| "loss": 1.8359, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.0967777411195425, |
| "grad_norm": 0.28001338243484497, |
| "learning_rate": 8e-05, |
| "loss": 1.8434, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.09699769053117784, |
| "grad_norm": 0.2990663945674896, |
| "learning_rate": 8e-05, |
| "loss": 1.6995, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.09721763994281316, |
| "grad_norm": 0.266197144985199, |
| "learning_rate": 8e-05, |
| "loss": 1.6195, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.09743758935444848, |
| "grad_norm": 0.28108519315719604, |
| "learning_rate": 8e-05, |
| "loss": 1.8108, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.0976575387660838, |
| "grad_norm": 0.26744788885116577, |
| "learning_rate": 8e-05, |
| "loss": 1.6497, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.09787748817771913, |
| "grad_norm": 0.28030574321746826, |
| "learning_rate": 8e-05, |
| "loss": 1.8143, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.09809743758935445, |
| "grad_norm": 0.27872079610824585, |
| "learning_rate": 8e-05, |
| "loss": 1.6319, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.09831738700098977, |
| "grad_norm": 0.2816067039966583, |
| "learning_rate": 8e-05, |
| "loss": 1.8385, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.09853733641262509, |
| "grad_norm": 0.25677627325057983, |
| "learning_rate": 8e-05, |
| "loss": 1.6885, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.09875728582426042, |
| "grad_norm": 0.276569128036499, |
| "learning_rate": 8e-05, |
| "loss": 1.7652, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.09897723523589574, |
| "grad_norm": 0.2765633463859558, |
| "learning_rate": 8e-05, |
| "loss": 1.7763, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.09919718464753106, |
| "grad_norm": 0.27050015330314636, |
| "learning_rate": 8e-05, |
| "loss": 1.6459, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.0994171340591664, |
| "grad_norm": 0.2552846372127533, |
| "learning_rate": 8e-05, |
| "loss": 1.6877, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.09963708347080172, |
| "grad_norm": 0.2653469741344452, |
| "learning_rate": 8e-05, |
| "loss": 1.6536, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.09985703288243704, |
| "grad_norm": 0.28801941871643066, |
| "learning_rate": 8e-05, |
| "loss": 1.7643, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.10007698229407236, |
| "grad_norm": 0.2930269241333008, |
| "learning_rate": 8e-05, |
| "loss": 1.7766, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.10029693170570769, |
| "grad_norm": 0.2718334496021271, |
| "learning_rate": 8e-05, |
| "loss": 1.7347, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.10051688111734301, |
| "grad_norm": 0.2807629704475403, |
| "learning_rate": 8e-05, |
| "loss": 1.7245, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.10073683052897833, |
| "grad_norm": 0.2801489531993866, |
| "learning_rate": 8e-05, |
| "loss": 1.7854, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.10095677994061365, |
| "grad_norm": 0.2616996765136719, |
| "learning_rate": 8e-05, |
| "loss": 1.6179, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.10117672935224899, |
| "grad_norm": 0.2626480758190155, |
| "learning_rate": 8e-05, |
| "loss": 1.7475, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.10139667876388431, |
| "grad_norm": 0.27338841557502747, |
| "learning_rate": 8e-05, |
| "loss": 1.8972, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.10161662817551963, |
| "grad_norm": 0.2695038616657257, |
| "learning_rate": 8e-05, |
| "loss": 1.7279, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.10183657758715496, |
| "grad_norm": 0.25614050030708313, |
| "learning_rate": 8e-05, |
| "loss": 1.6, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.10205652699879028, |
| "grad_norm": 0.2722180187702179, |
| "learning_rate": 8e-05, |
| "loss": 1.9241, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.1022764764104256, |
| "grad_norm": 0.2580203115940094, |
| "learning_rate": 8e-05, |
| "loss": 1.693, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.10249642582206092, |
| "grad_norm": 0.2848857641220093, |
| "learning_rate": 8e-05, |
| "loss": 1.9072, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.10271637523369626, |
| "grad_norm": 0.2783052325248718, |
| "learning_rate": 8e-05, |
| "loss": 1.9102, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.10293632464533158, |
| "grad_norm": 0.279695987701416, |
| "learning_rate": 8e-05, |
| "loss": 1.7491, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.1031562740569669, |
| "grad_norm": 0.2493034154176712, |
| "learning_rate": 8e-05, |
| "loss": 1.6789, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.10337622346860222, |
| "grad_norm": 0.2751196622848511, |
| "learning_rate": 8e-05, |
| "loss": 1.8132, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.10359617288023755, |
| "grad_norm": 0.2739677131175995, |
| "learning_rate": 8e-05, |
| "loss": 1.7945, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.10381612229187287, |
| "grad_norm": 0.30357351899147034, |
| "learning_rate": 8e-05, |
| "loss": 1.9113, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.10403607170350819, |
| "grad_norm": 0.2646970748901367, |
| "learning_rate": 8e-05, |
| "loss": 1.811, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.10425602111514351, |
| "grad_norm": 0.2626940608024597, |
| "learning_rate": 8e-05, |
| "loss": 1.6911, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.10447597052677884, |
| "grad_norm": 0.2613508701324463, |
| "learning_rate": 8e-05, |
| "loss": 1.7209, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.10469591993841416, |
| "grad_norm": 0.2609264552593231, |
| "learning_rate": 8e-05, |
| "loss": 1.6303, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.10491586935004948, |
| "grad_norm": 0.2549975514411926, |
| "learning_rate": 8e-05, |
| "loss": 1.7769, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.10513581876168482, |
| "grad_norm": 0.2742570638656616, |
| "learning_rate": 8e-05, |
| "loss": 1.8101, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.10535576817332014, |
| "grad_norm": 0.267070472240448, |
| "learning_rate": 8e-05, |
| "loss": 1.787, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.10557571758495546, |
| "grad_norm": 0.2735085189342499, |
| "learning_rate": 8e-05, |
| "loss": 1.8112, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.10579566699659078, |
| "grad_norm": 0.260111540555954, |
| "learning_rate": 8e-05, |
| "loss": 1.6926, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.10601561640822611, |
| "grad_norm": 0.26309284567832947, |
| "learning_rate": 8e-05, |
| "loss": 1.778, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.10623556581986143, |
| "grad_norm": 0.2658458948135376, |
| "learning_rate": 8e-05, |
| "loss": 1.7179, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.10645551523149675, |
| "grad_norm": 0.27498647570610046, |
| "learning_rate": 8e-05, |
| "loss": 1.6689, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.10667546464313207, |
| "grad_norm": 0.2658367156982422, |
| "learning_rate": 8e-05, |
| "loss": 1.6786, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.10689541405476741, |
| "grad_norm": 0.26023292541503906, |
| "learning_rate": 8e-05, |
| "loss": 1.6995, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.10711536346640273, |
| "grad_norm": 0.25749459862709045, |
| "learning_rate": 8e-05, |
| "loss": 1.6614, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.10733531287803805, |
| "grad_norm": 0.26305267214775085, |
| "learning_rate": 8e-05, |
| "loss": 1.6838, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.10755526228967338, |
| "grad_norm": 0.25277695059776306, |
| "learning_rate": 8e-05, |
| "loss": 1.6975, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.1077752117013087, |
| "grad_norm": 0.2584420144557953, |
| "learning_rate": 8e-05, |
| "loss": 1.7434, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.10799516111294402, |
| "grad_norm": 0.28107360005378723, |
| "learning_rate": 8e-05, |
| "loss": 1.8037, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.10821511052457934, |
| "grad_norm": 0.553341269493103, |
| "learning_rate": 8e-05, |
| "loss": 1.8896, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.10843505993621468, |
| "grad_norm": 0.2718677222728729, |
| "learning_rate": 8e-05, |
| "loss": 1.6646, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.10865500934785, |
| "grad_norm": 0.27301734685897827, |
| "learning_rate": 8e-05, |
| "loss": 1.6663, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.10887495875948532, |
| "grad_norm": 0.26952439546585083, |
| "learning_rate": 8e-05, |
| "loss": 1.7228, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.10909490817112064, |
| "grad_norm": 0.3017599582672119, |
| "learning_rate": 8e-05, |
| "loss": 1.7936, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.10931485758275597, |
| "grad_norm": 0.2676602303981781, |
| "learning_rate": 8e-05, |
| "loss": 1.7861, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.10953480699439129, |
| "grad_norm": 0.27192267775535583, |
| "learning_rate": 8e-05, |
| "loss": 1.8032, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.10975475640602661, |
| "grad_norm": 0.2807183861732483, |
| "learning_rate": 8e-05, |
| "loss": 1.6331, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.10997470581766194, |
| "grad_norm": 0.2652963399887085, |
| "learning_rate": 8e-05, |
| "loss": 1.6231, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.11019465522929726, |
| "grad_norm": 0.26010751724243164, |
| "learning_rate": 8e-05, |
| "loss": 1.729, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.11041460464093258, |
| "grad_norm": 0.29573148488998413, |
| "learning_rate": 8e-05, |
| "loss": 1.8082, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.1106345540525679, |
| "grad_norm": 0.28008025884628296, |
| "learning_rate": 8e-05, |
| "loss": 1.6829, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.11085450346420324, |
| "grad_norm": 0.3029135763645172, |
| "learning_rate": 8e-05, |
| "loss": 1.7699, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.11107445287583856, |
| "grad_norm": 0.2821674346923828, |
| "learning_rate": 8e-05, |
| "loss": 1.7337, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.11129440228747388, |
| "grad_norm": 0.274880975484848, |
| "learning_rate": 8e-05, |
| "loss": 1.7973, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.1115143516991092, |
| "grad_norm": 0.28885796666145325, |
| "learning_rate": 8e-05, |
| "loss": 1.7756, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.11173430111074453, |
| "grad_norm": 0.2744079530239105, |
| "learning_rate": 8e-05, |
| "loss": 1.7991, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.11195425052237985, |
| "grad_norm": 0.2645000219345093, |
| "learning_rate": 8e-05, |
| "loss": 1.6566, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.11217419993401517, |
| "grad_norm": 0.2640466094017029, |
| "learning_rate": 8e-05, |
| "loss": 1.6649, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.11239414934565051, |
| "grad_norm": 0.2965867817401886, |
| "learning_rate": 8e-05, |
| "loss": 1.7733, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.11261409875728583, |
| "grad_norm": 0.2533203661441803, |
| "learning_rate": 8e-05, |
| "loss": 1.7194, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.11283404816892115, |
| "grad_norm": 0.261994868516922, |
| "learning_rate": 8e-05, |
| "loss": 1.7387, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.11305399758055647, |
| "grad_norm": 0.2868165969848633, |
| "learning_rate": 8e-05, |
| "loss": 1.7444, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.1132739469921918, |
| "grad_norm": 0.2836281657218933, |
| "learning_rate": 8e-05, |
| "loss": 1.6507, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.11349389640382712, |
| "grad_norm": 0.28675276041030884, |
| "learning_rate": 8e-05, |
| "loss": 1.7054, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.11371384581546244, |
| "grad_norm": 0.2745465040206909, |
| "learning_rate": 8e-05, |
| "loss": 1.77, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.11393379522709776, |
| "grad_norm": 0.27250972390174866, |
| "learning_rate": 8e-05, |
| "loss": 1.9102, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.1141537446387331, |
| "grad_norm": 0.2781262695789337, |
| "learning_rate": 8e-05, |
| "loss": 1.8126, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.11437369405036842, |
| "grad_norm": 0.2691183388233185, |
| "learning_rate": 8e-05, |
| "loss": 1.5978, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.11459364346200374, |
| "grad_norm": 0.29496780037879944, |
| "learning_rate": 8e-05, |
| "loss": 1.9214, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.11481359287363907, |
| "grad_norm": 0.27725401520729065, |
| "learning_rate": 8e-05, |
| "loss": 1.8722, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.11503354228527439, |
| "grad_norm": 0.28819364309310913, |
| "learning_rate": 8e-05, |
| "loss": 1.6739, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.11525349169690971, |
| "grad_norm": 0.278857946395874, |
| "learning_rate": 8e-05, |
| "loss": 1.8137, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.11547344110854503, |
| "grad_norm": 0.26911258697509766, |
| "learning_rate": 8e-05, |
| "loss": 1.7123, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.11569339052018036, |
| "grad_norm": 0.2656850814819336, |
| "learning_rate": 8e-05, |
| "loss": 1.8124, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.11591333993181568, |
| "grad_norm": 0.26521819829940796, |
| "learning_rate": 8e-05, |
| "loss": 1.8188, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.116133289343451, |
| "grad_norm": 0.2821720540523529, |
| "learning_rate": 8e-05, |
| "loss": 1.7607, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.11635323875508632, |
| "grad_norm": 0.294612854719162, |
| "learning_rate": 8e-05, |
| "loss": 1.8142, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.11657318816672166, |
| "grad_norm": 0.29858094453811646, |
| "learning_rate": 8e-05, |
| "loss": 1.8795, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.11679313757835698, |
| "grad_norm": 0.2726878821849823, |
| "learning_rate": 8e-05, |
| "loss": 1.7988, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.1170130869899923, |
| "grad_norm": 0.2651258111000061, |
| "learning_rate": 8e-05, |
| "loss": 1.8106, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.11723303640162762, |
| "grad_norm": 0.2681291997432709, |
| "learning_rate": 8e-05, |
| "loss": 1.6692, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.11745298581326295, |
| "grad_norm": 0.2641060948371887, |
| "learning_rate": 8e-05, |
| "loss": 1.6479, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.11767293522489827, |
| "grad_norm": 0.2850191593170166, |
| "learning_rate": 8e-05, |
| "loss": 1.7337, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.1178928846365336, |
| "grad_norm": 0.2718667685985565, |
| "learning_rate": 8e-05, |
| "loss": 1.7069, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.11811283404816893, |
| "grad_norm": 0.27950581908226013, |
| "learning_rate": 8e-05, |
| "loss": 1.83, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.11833278345980425, |
| "grad_norm": 0.26720213890075684, |
| "learning_rate": 8e-05, |
| "loss": 1.6787, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.11855273287143957, |
| "grad_norm": 0.25440508127212524, |
| "learning_rate": 8e-05, |
| "loss": 1.5966, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.11877268228307489, |
| "grad_norm": 0.2716729938983917, |
| "learning_rate": 8e-05, |
| "loss": 1.793, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.11899263169471022, |
| "grad_norm": 0.26204821467399597, |
| "learning_rate": 8e-05, |
| "loss": 1.5882, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.11921258110634554, |
| "grad_norm": 0.2756775915622711, |
| "learning_rate": 8e-05, |
| "loss": 1.7529, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.11943253051798086, |
| "grad_norm": 0.27235740423202515, |
| "learning_rate": 8e-05, |
| "loss": 1.7607, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.11965247992961618, |
| "grad_norm": 0.27712538838386536, |
| "learning_rate": 8e-05, |
| "loss": 1.7504, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.11987242934125152, |
| "grad_norm": 0.27800193428993225, |
| "learning_rate": 8e-05, |
| "loss": 1.7421, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.12009237875288684, |
| "grad_norm": 0.27911701798439026, |
| "learning_rate": 8e-05, |
| "loss": 1.6683, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.12031232816452216, |
| "grad_norm": 0.27643364667892456, |
| "learning_rate": 8e-05, |
| "loss": 1.6393, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.12053227757615749, |
| "grad_norm": 0.25785166025161743, |
| "learning_rate": 8e-05, |
| "loss": 1.641, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.12075222698779281, |
| "grad_norm": 0.2791956067085266, |
| "learning_rate": 8e-05, |
| "loss": 1.7585, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.12097217639942813, |
| "grad_norm": 0.28245967626571655, |
| "learning_rate": 8e-05, |
| "loss": 1.8716, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.12119212581106345, |
| "grad_norm": 0.27160346508026123, |
| "learning_rate": 8e-05, |
| "loss": 1.7023, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.12141207522269878, |
| "grad_norm": 0.2670506536960602, |
| "learning_rate": 8e-05, |
| "loss": 1.5844, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.1216320246343341, |
| "grad_norm": 0.2762441337108612, |
| "learning_rate": 8e-05, |
| "loss": 1.7286, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.12185197404596942, |
| "grad_norm": 0.29608720541000366, |
| "learning_rate": 8e-05, |
| "loss": 1.7875, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.12207192345760474, |
| "grad_norm": 0.2847777307033539, |
| "learning_rate": 8e-05, |
| "loss": 1.7388, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.12229187286924008, |
| "grad_norm": 0.2769443988800049, |
| "learning_rate": 8e-05, |
| "loss": 1.8129, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.1225118222808754, |
| "grad_norm": 0.27490487694740295, |
| "learning_rate": 8e-05, |
| "loss": 1.678, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.12273177169251072, |
| "grad_norm": 0.2851822078227997, |
| "learning_rate": 8e-05, |
| "loss": 1.8268, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.12295172110414605, |
| "grad_norm": 0.31336653232574463, |
| "learning_rate": 8e-05, |
| "loss": 1.8247, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.12317167051578137, |
| "grad_norm": 0.26455923914909363, |
| "learning_rate": 8e-05, |
| "loss": 1.5548, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.12339161992741669, |
| "grad_norm": 0.2750054597854614, |
| "learning_rate": 8e-05, |
| "loss": 1.7912, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.12361156933905201, |
| "grad_norm": 0.28016433119773865, |
| "learning_rate": 8e-05, |
| "loss": 1.7367, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.12383151875068735, |
| "grad_norm": 0.30594533681869507, |
| "learning_rate": 8e-05, |
| "loss": 1.7959, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.12405146816232267, |
| "grad_norm": 0.2753421664237976, |
| "learning_rate": 8e-05, |
| "loss": 1.6714, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.12427141757395799, |
| "grad_norm": 0.3309609889984131, |
| "learning_rate": 8e-05, |
| "loss": 1.7632, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.12449136698559331, |
| "grad_norm": 0.3116569221019745, |
| "learning_rate": 8e-05, |
| "loss": 1.8312, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.12471131639722864, |
| "grad_norm": 0.27756184339523315, |
| "learning_rate": 8e-05, |
| "loss": 1.6622, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.12493126580886396, |
| "grad_norm": 0.2740349769592285, |
| "learning_rate": 8e-05, |
| "loss": 1.7015, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.1251512152204993, |
| "grad_norm": 0.2696126401424408, |
| "learning_rate": 8e-05, |
| "loss": 1.6063, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.1253711646321346, |
| "grad_norm": 0.29191461205482483, |
| "learning_rate": 8e-05, |
| "loss": 1.8429, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.12559111404376994, |
| "grad_norm": 0.2984013855457306, |
| "learning_rate": 8e-05, |
| "loss": 1.8194, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.12581106345540527, |
| "grad_norm": 0.27315613627433777, |
| "learning_rate": 8e-05, |
| "loss": 1.7027, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.12603101286704058, |
| "grad_norm": 0.28547149896621704, |
| "learning_rate": 8e-05, |
| "loss": 1.694, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.1262509622786759, |
| "grad_norm": 0.26458805799484253, |
| "learning_rate": 8e-05, |
| "loss": 1.7978, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.12647091169031122, |
| "grad_norm": 0.29676830768585205, |
| "learning_rate": 8e-05, |
| "loss": 1.8295, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.12669086110194655, |
| "grad_norm": 0.28077611327171326, |
| "learning_rate": 8e-05, |
| "loss": 1.7711, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.12691081051358188, |
| "grad_norm": 0.256736159324646, |
| "learning_rate": 8e-05, |
| "loss": 1.5371, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.1271307599252172, |
| "grad_norm": 0.2888578474521637, |
| "learning_rate": 8e-05, |
| "loss": 1.7532, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.12735070933685252, |
| "grad_norm": 0.29349133372306824, |
| "learning_rate": 8e-05, |
| "loss": 1.856, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.12757065874848786, |
| "grad_norm": 0.2626110911369324, |
| "learning_rate": 8e-05, |
| "loss": 1.5482, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.12779060816012316, |
| "grad_norm": 0.2715248167514801, |
| "learning_rate": 8e-05, |
| "loss": 1.7003, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.1280105575717585, |
| "grad_norm": 0.2800534963607788, |
| "learning_rate": 8e-05, |
| "loss": 1.7065, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.12823050698339383, |
| "grad_norm": 0.3190186619758606, |
| "learning_rate": 8e-05, |
| "loss": 1.8099, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.12845045639502914, |
| "grad_norm": 0.2689470648765564, |
| "learning_rate": 8e-05, |
| "loss": 1.7824, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.12867040580666447, |
| "grad_norm": 0.2715473473072052, |
| "learning_rate": 8e-05, |
| "loss": 1.7721, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.12889035521829978, |
| "grad_norm": 0.27956798672676086, |
| "learning_rate": 8e-05, |
| "loss": 1.7888, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.1291103046299351, |
| "grad_norm": 0.2842330038547516, |
| "learning_rate": 8e-05, |
| "loss": 1.7131, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.12933025404157045, |
| "grad_norm": 0.2888692021369934, |
| "learning_rate": 8e-05, |
| "loss": 1.7509, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.12955020345320575, |
| "grad_norm": 0.27673423290252686, |
| "learning_rate": 8e-05, |
| "loss": 1.7235, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.1297701528648411, |
| "grad_norm": 0.26007330417633057, |
| "learning_rate": 8e-05, |
| "loss": 1.7157, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.12999010227647642, |
| "grad_norm": 0.27521616220474243, |
| "learning_rate": 8e-05, |
| "loss": 1.7519, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.13021005168811173, |
| "grad_norm": 0.2753496766090393, |
| "learning_rate": 8e-05, |
| "loss": 1.6956, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.13043000109974706, |
| "grad_norm": 0.25559505820274353, |
| "learning_rate": 8e-05, |
| "loss": 1.522, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.1306499505113824, |
| "grad_norm": 0.26815375685691833, |
| "learning_rate": 8e-05, |
| "loss": 1.7658, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.1308698999230177, |
| "grad_norm": 0.26870042085647583, |
| "learning_rate": 8e-05, |
| "loss": 1.779, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.13108984933465304, |
| "grad_norm": 0.27346327900886536, |
| "learning_rate": 8e-05, |
| "loss": 1.7397, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.13130979874628834, |
| "grad_norm": 0.26674172282218933, |
| "learning_rate": 8e-05, |
| "loss": 1.906, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.13152974815792368, |
| "grad_norm": 0.266916960477829, |
| "learning_rate": 8e-05, |
| "loss": 1.6896, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.131749697569559, |
| "grad_norm": 0.2620035707950592, |
| "learning_rate": 8e-05, |
| "loss": 1.8032, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.13196964698119432, |
| "grad_norm": 0.2721168100833893, |
| "learning_rate": 8e-05, |
| "loss": 1.7992, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.13218959639282965, |
| "grad_norm": 0.2902929186820984, |
| "learning_rate": 8e-05, |
| "loss": 1.8392, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.13240954580446498, |
| "grad_norm": 0.267459899187088, |
| "learning_rate": 8e-05, |
| "loss": 1.8469, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.1326294952161003, |
| "grad_norm": 0.25643131136894226, |
| "learning_rate": 8e-05, |
| "loss": 1.5562, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.13284944462773562, |
| "grad_norm": 0.2919185757637024, |
| "learning_rate": 8e-05, |
| "loss": 1.7108, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.13306939403937096, |
| "grad_norm": 0.2631925046443939, |
| "learning_rate": 8e-05, |
| "loss": 1.4344, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.13328934345100626, |
| "grad_norm": 0.2710738182067871, |
| "learning_rate": 8e-05, |
| "loss": 1.6774, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.1335092928626416, |
| "grad_norm": 0.2641798257827759, |
| "learning_rate": 8e-05, |
| "loss": 1.8032, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.1337292422742769, |
| "grad_norm": 0.2571311891078949, |
| "learning_rate": 8e-05, |
| "loss": 1.63, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.13394919168591224, |
| "grad_norm": 0.24528057873249054, |
| "learning_rate": 8e-05, |
| "loss": 1.4576, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.13416914109754757, |
| "grad_norm": 0.270641028881073, |
| "learning_rate": 8e-05, |
| "loss": 1.7896, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.13438909050918288, |
| "grad_norm": 0.2723008990287781, |
| "learning_rate": 8e-05, |
| "loss": 1.7894, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.1346090399208182, |
| "grad_norm": 0.26487669348716736, |
| "learning_rate": 8e-05, |
| "loss": 1.7646, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.13482898933245355, |
| "grad_norm": 0.26771143078804016, |
| "learning_rate": 8e-05, |
| "loss": 1.8015, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.13504893874408885, |
| "grad_norm": 0.2585919499397278, |
| "learning_rate": 8e-05, |
| "loss": 1.6487, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.1352688881557242, |
| "grad_norm": 0.28161996603012085, |
| "learning_rate": 8e-05, |
| "loss": 1.7813, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.1354888375673595, |
| "grad_norm": 0.25246456265449524, |
| "learning_rate": 8e-05, |
| "loss": 1.5549, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.13570878697899483, |
| "grad_norm": 0.2803630530834198, |
| "learning_rate": 8e-05, |
| "loss": 1.7545, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.13592873639063016, |
| "grad_norm": 0.2587769031524658, |
| "learning_rate": 8e-05, |
| "loss": 1.6755, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.13614868580226547, |
| "grad_norm": 0.2890148162841797, |
| "learning_rate": 8e-05, |
| "loss": 1.9753, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.1363686352139008, |
| "grad_norm": 0.2924948036670685, |
| "learning_rate": 8e-05, |
| "loss": 1.7611, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.13658858462553614, |
| "grad_norm": 0.2594594359397888, |
| "learning_rate": 8e-05, |
| "loss": 1.6945, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.13680853403717144, |
| "grad_norm": 0.2853068709373474, |
| "learning_rate": 8e-05, |
| "loss": 1.8637, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.13702848344880678, |
| "grad_norm": 0.2696111798286438, |
| "learning_rate": 8e-05, |
| "loss": 1.7777, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.1372484328604421, |
| "grad_norm": 0.3137861490249634, |
| "learning_rate": 8e-05, |
| "loss": 1.8799, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.13746838227207742, |
| "grad_norm": 0.25645750761032104, |
| "learning_rate": 8e-05, |
| "loss": 1.5023, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.13768833168371275, |
| "grad_norm": 0.29853489995002747, |
| "learning_rate": 8e-05, |
| "loss": 1.9131, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.13790828109534806, |
| "grad_norm": 0.2653225362300873, |
| "learning_rate": 8e-05, |
| "loss": 1.6835, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.1381282305069834, |
| "grad_norm": 0.26686328649520874, |
| "learning_rate": 8e-05, |
| "loss": 1.7667, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.13834817991861872, |
| "grad_norm": 0.26114073395729065, |
| "learning_rate": 8e-05, |
| "loss": 1.6925, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.13856812933025403, |
| "grad_norm": 0.2520682215690613, |
| "learning_rate": 8e-05, |
| "loss": 1.6065, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.13878807874188936, |
| "grad_norm": 0.2676456868648529, |
| "learning_rate": 8e-05, |
| "loss": 1.7353, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.1390080281535247, |
| "grad_norm": 0.2525452673435211, |
| "learning_rate": 8e-05, |
| "loss": 1.5993, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.13922797756516, |
| "grad_norm": 0.25620371103286743, |
| "learning_rate": 8e-05, |
| "loss": 1.7188, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.13944792697679534, |
| "grad_norm": 0.4071904420852661, |
| "learning_rate": 8e-05, |
| "loss": 1.9348, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.13966787638843067, |
| "grad_norm": 0.2656376361846924, |
| "learning_rate": 8e-05, |
| "loss": 1.7833, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.13988782580006598, |
| "grad_norm": 0.25558993220329285, |
| "learning_rate": 8e-05, |
| "loss": 1.715, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.1401077752117013, |
| "grad_norm": 0.28318601846694946, |
| "learning_rate": 8e-05, |
| "loss": 1.8012, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.14032772462333662, |
| "grad_norm": 0.2558564245700836, |
| "learning_rate": 8e-05, |
| "loss": 1.5802, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.14054767403497195, |
| "grad_norm": 0.26874974370002747, |
| "learning_rate": 8e-05, |
| "loss": 1.7884, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.1407676234466073, |
| "grad_norm": 0.2960795760154724, |
| "learning_rate": 8e-05, |
| "loss": 1.7884, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.1409875728582426, |
| "grad_norm": 0.3098964989185333, |
| "learning_rate": 8e-05, |
| "loss": 1.8844, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.14120752226987793, |
| "grad_norm": 0.2819165885448456, |
| "learning_rate": 8e-05, |
| "loss": 1.7111, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.14142747168151326, |
| "grad_norm": 0.26352617144584656, |
| "learning_rate": 8e-05, |
| "loss": 1.7337, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.14164742109314857, |
| "grad_norm": 0.2622654139995575, |
| "learning_rate": 8e-05, |
| "loss": 1.7284, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.1418673705047839, |
| "grad_norm": 0.2793010473251343, |
| "learning_rate": 8e-05, |
| "loss": 1.8534, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.14208731991641924, |
| "grad_norm": 0.27972397208213806, |
| "learning_rate": 8e-05, |
| "loss": 1.7658, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.14230726932805454, |
| "grad_norm": 0.25940972566604614, |
| "learning_rate": 8e-05, |
| "loss": 1.6676, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.14252721873968988, |
| "grad_norm": 0.29578897356987, |
| "learning_rate": 8e-05, |
| "loss": 1.8002, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.14274716815132518, |
| "grad_norm": 0.2577681541442871, |
| "learning_rate": 8e-05, |
| "loss": 1.6154, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.14296711756296052, |
| "grad_norm": 0.2615002989768982, |
| "learning_rate": 8e-05, |
| "loss": 1.7539, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.14318706697459585, |
| "grad_norm": 0.26044437289237976, |
| "learning_rate": 8e-05, |
| "loss": 1.5284, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.14340701638623116, |
| "grad_norm": 0.28386443853378296, |
| "learning_rate": 8e-05, |
| "loss": 1.7188, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.1436269657978665, |
| "grad_norm": 0.2579086124897003, |
| "learning_rate": 8e-05, |
| "loss": 1.6758, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.14384691520950182, |
| "grad_norm": 0.263192743062973, |
| "learning_rate": 8e-05, |
| "loss": 1.7013, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.14406686462113713, |
| "grad_norm": 0.26551106572151184, |
| "learning_rate": 8e-05, |
| "loss": 1.7314, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.14428681403277246, |
| "grad_norm": 0.26143091917037964, |
| "learning_rate": 8e-05, |
| "loss": 1.7041, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.1445067634444078, |
| "grad_norm": 0.26432663202285767, |
| "learning_rate": 8e-05, |
| "loss": 1.601, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.1447267128560431, |
| "grad_norm": 0.2831920087337494, |
| "learning_rate": 8e-05, |
| "loss": 1.8573, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.14494666226767844, |
| "grad_norm": 0.3045855462551117, |
| "learning_rate": 8e-05, |
| "loss": 1.7853, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.14516661167931375, |
| "grad_norm": 0.28249257802963257, |
| "learning_rate": 8e-05, |
| "loss": 1.7525, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.14538656109094908, |
| "grad_norm": 0.27501189708709717, |
| "learning_rate": 8e-05, |
| "loss": 1.6939, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.1456065105025844, |
| "grad_norm": 0.28419750928878784, |
| "learning_rate": 8e-05, |
| "loss": 1.837, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.14582645991421972, |
| "grad_norm": 0.28872454166412354, |
| "learning_rate": 8e-05, |
| "loss": 1.623, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.14604640932585505, |
| "grad_norm": 0.2926316559314728, |
| "learning_rate": 8e-05, |
| "loss": 1.7438, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.1462663587374904, |
| "grad_norm": 0.2716543972492218, |
| "learning_rate": 8e-05, |
| "loss": 1.8925, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.1464863081491257, |
| "grad_norm": 0.2707289159297943, |
| "learning_rate": 8e-05, |
| "loss": 1.8218, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.14670625756076103, |
| "grad_norm": 0.2609579265117645, |
| "learning_rate": 8e-05, |
| "loss": 1.4612, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.14692620697239636, |
| "grad_norm": 0.2958548367023468, |
| "learning_rate": 8e-05, |
| "loss": 1.6191, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.14714615638403167, |
| "grad_norm": 0.2585492730140686, |
| "learning_rate": 8e-05, |
| "loss": 1.7161, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.147366105795667, |
| "grad_norm": 0.2637808322906494, |
| "learning_rate": 8e-05, |
| "loss": 1.6534, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.1475860552073023, |
| "grad_norm": 0.2885671854019165, |
| "learning_rate": 8e-05, |
| "loss": 1.7663, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.14780600461893764, |
| "grad_norm": 0.27028244733810425, |
| "learning_rate": 8e-05, |
| "loss": 1.7718, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.14802595403057298, |
| "grad_norm": 0.27723586559295654, |
| "learning_rate": 8e-05, |
| "loss": 1.7762, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.14824590344220828, |
| "grad_norm": 0.26336848735809326, |
| "learning_rate": 8e-05, |
| "loss": 1.6114, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.14846585285384362, |
| "grad_norm": 0.26031750440597534, |
| "learning_rate": 8e-05, |
| "loss": 1.7259, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.14868580226547895, |
| "grad_norm": 0.30176040530204773, |
| "learning_rate": 8e-05, |
| "loss": 1.7007, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.14890575167711426, |
| "grad_norm": 0.25952771306037903, |
| "learning_rate": 8e-05, |
| "loss": 1.6573, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.1491257010887496, |
| "grad_norm": 0.2727009356021881, |
| "learning_rate": 8e-05, |
| "loss": 1.7725, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.14934565050038492, |
| "grad_norm": 0.26398420333862305, |
| "learning_rate": 8e-05, |
| "loss": 1.7245, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.14956559991202023, |
| "grad_norm": 0.273967981338501, |
| "learning_rate": 8e-05, |
| "loss": 1.7231, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.14978554932365556, |
| "grad_norm": 0.27241724729537964, |
| "learning_rate": 8e-05, |
| "loss": 1.6896, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.15000549873529087, |
| "grad_norm": 0.26996085047721863, |
| "learning_rate": 8e-05, |
| "loss": 1.6767, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.1502254481469262, |
| "grad_norm": 0.27165672183036804, |
| "learning_rate": 8e-05, |
| "loss": 1.7747, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.15044539755856154, |
| "grad_norm": 0.26840028166770935, |
| "learning_rate": 8e-05, |
| "loss": 1.7616, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.15066534697019685, |
| "grad_norm": 0.27101555466651917, |
| "learning_rate": 8e-05, |
| "loss": 1.622, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.15088529638183218, |
| "grad_norm": 0.2691043019294739, |
| "learning_rate": 8e-05, |
| "loss": 1.7514, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.1511052457934675, |
| "grad_norm": 0.2926357090473175, |
| "learning_rate": 8e-05, |
| "loss": 1.6953, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.15132519520510282, |
| "grad_norm": 0.2730226516723633, |
| "learning_rate": 8e-05, |
| "loss": 1.6286, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.15154514461673815, |
| "grad_norm": 0.2618841826915741, |
| "learning_rate": 8e-05, |
| "loss": 1.7194, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.1517650940283735, |
| "grad_norm": 0.2584119737148285, |
| "learning_rate": 8e-05, |
| "loss": 1.6032, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.1519850434400088, |
| "grad_norm": 0.26063093543052673, |
| "learning_rate": 8e-05, |
| "loss": 1.63, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.15220499285164413, |
| "grad_norm": 0.267938494682312, |
| "learning_rate": 8e-05, |
| "loss": 1.7087, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.15242494226327943, |
| "grad_norm": 0.2709169089794159, |
| "learning_rate": 8e-05, |
| "loss": 1.6663, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.15264489167491477, |
| "grad_norm": 0.3015836775302887, |
| "learning_rate": 8e-05, |
| "loss": 1.6797, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.1528648410865501, |
| "grad_norm": 0.27824944257736206, |
| "learning_rate": 8e-05, |
| "loss": 1.7972, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.1530847904981854, |
| "grad_norm": 0.31089073419570923, |
| "learning_rate": 8e-05, |
| "loss": 1.7352, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.15330473990982074, |
| "grad_norm": 0.2804546654224396, |
| "learning_rate": 8e-05, |
| "loss": 1.6898, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.15352468932145608, |
| "grad_norm": 0.2804514765739441, |
| "learning_rate": 8e-05, |
| "loss": 1.8409, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.15374463873309138, |
| "grad_norm": 0.31666815280914307, |
| "learning_rate": 8e-05, |
| "loss": 1.6569, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.15396458814472672, |
| "grad_norm": 0.2846215069293976, |
| "learning_rate": 8e-05, |
| "loss": 1.8081, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.15418453755636205, |
| "grad_norm": 0.2656068801879883, |
| "learning_rate": 8e-05, |
| "loss": 1.5747, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.15440448696799736, |
| "grad_norm": 0.2633317708969116, |
| "learning_rate": 8e-05, |
| "loss": 1.6027, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.1546244363796327, |
| "grad_norm": 0.2669740319252014, |
| "learning_rate": 8e-05, |
| "loss": 1.6964, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.154844385791268, |
| "grad_norm": 0.2878497540950775, |
| "learning_rate": 8e-05, |
| "loss": 1.677, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.15506433520290333, |
| "grad_norm": 0.2624325156211853, |
| "learning_rate": 8e-05, |
| "loss": 1.6247, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.15528428461453866, |
| "grad_norm": 0.2894291579723358, |
| "learning_rate": 8e-05, |
| "loss": 1.7271, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.15550423402617397, |
| "grad_norm": 0.2924456298351288, |
| "learning_rate": 8e-05, |
| "loss": 1.7475, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.1557241834378093, |
| "grad_norm": 0.2519112229347229, |
| "learning_rate": 8e-05, |
| "loss": 1.6306, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.15594413284944464, |
| "grad_norm": 0.2831405699253082, |
| "learning_rate": 8e-05, |
| "loss": 1.7571, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.15616408226107995, |
| "grad_norm": 0.2804257273674011, |
| "learning_rate": 8e-05, |
| "loss": 1.6721, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.15638403167271528, |
| "grad_norm": 0.27130362391471863, |
| "learning_rate": 8e-05, |
| "loss": 1.7451, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.1566039810843506, |
| "grad_norm": 0.27843937277793884, |
| "learning_rate": 8e-05, |
| "loss": 1.7187, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.15682393049598592, |
| "grad_norm": 0.26205387711524963, |
| "learning_rate": 8e-05, |
| "loss": 1.7667, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.15704387990762125, |
| "grad_norm": 0.25978967547416687, |
| "learning_rate": 8e-05, |
| "loss": 1.6595, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.15726382931925656, |
| "grad_norm": 0.26331478357315063, |
| "learning_rate": 8e-05, |
| "loss": 1.8067, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.1574837787308919, |
| "grad_norm": 0.26023924350738525, |
| "learning_rate": 8e-05, |
| "loss": 1.8533, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.15770372814252723, |
| "grad_norm": 0.27147844433784485, |
| "learning_rate": 8e-05, |
| "loss": 1.6309, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.15792367755416253, |
| "grad_norm": 0.286035418510437, |
| "learning_rate": 8e-05, |
| "loss": 1.72, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.15814362696579787, |
| "grad_norm": 0.3167229890823364, |
| "learning_rate": 8e-05, |
| "loss": 1.9007, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.1583635763774332, |
| "grad_norm": 0.283975750207901, |
| "learning_rate": 8e-05, |
| "loss": 1.6662, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.1585835257890685, |
| "grad_norm": 0.2812137007713318, |
| "learning_rate": 8e-05, |
| "loss": 1.7651, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.15880347520070384, |
| "grad_norm": 0.2737642526626587, |
| "learning_rate": 8e-05, |
| "loss": 1.7679, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.15902342461233915, |
| "grad_norm": 0.30812978744506836, |
| "learning_rate": 8e-05, |
| "loss": 1.8408, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.15924337402397448, |
| "grad_norm": 0.27026352286338806, |
| "learning_rate": 8e-05, |
| "loss": 1.7362, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.15946332343560982, |
| "grad_norm": 0.2788861393928528, |
| "learning_rate": 8e-05, |
| "loss": 1.8371, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.15968327284724512, |
| "grad_norm": 0.2623996138572693, |
| "learning_rate": 8e-05, |
| "loss": 1.5855, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.15990322225888046, |
| "grad_norm": 0.2764820158481598, |
| "learning_rate": 8e-05, |
| "loss": 1.8185, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.1601231716705158, |
| "grad_norm": 0.27394816279411316, |
| "learning_rate": 8e-05, |
| "loss": 1.641, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.1603431210821511, |
| "grad_norm": 0.2726307511329651, |
| "learning_rate": 8e-05, |
| "loss": 1.6128, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.16056307049378643, |
| "grad_norm": 0.28221258521080017, |
| "learning_rate": 8e-05, |
| "loss": 1.8413, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.16078301990542176, |
| "grad_norm": 0.2649543881416321, |
| "learning_rate": 8e-05, |
| "loss": 1.5707, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.16100296931705707, |
| "grad_norm": 0.2659435570240021, |
| "learning_rate": 8e-05, |
| "loss": 1.6761, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.1612229187286924, |
| "grad_norm": 0.3131570518016815, |
| "learning_rate": 8e-05, |
| "loss": 1.9439, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.1614428681403277, |
| "grad_norm": 0.263069748878479, |
| "learning_rate": 8e-05, |
| "loss": 1.7069, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.16166281755196305, |
| "grad_norm": 0.2708505392074585, |
| "learning_rate": 8e-05, |
| "loss": 1.8031, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.16188276696359838, |
| "grad_norm": 0.26446613669395447, |
| "learning_rate": 8e-05, |
| "loss": 1.6419, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.16210271637523369, |
| "grad_norm": 0.27720367908477783, |
| "learning_rate": 8e-05, |
| "loss": 1.8291, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.16232266578686902, |
| "grad_norm": 0.25950226187705994, |
| "learning_rate": 8e-05, |
| "loss": 1.7498, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.16254261519850435, |
| "grad_norm": 0.25445327162742615, |
| "learning_rate": 8e-05, |
| "loss": 1.6804, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.16276256461013966, |
| "grad_norm": 0.2868766784667969, |
| "learning_rate": 8e-05, |
| "loss": 1.8058, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.162982514021775, |
| "grad_norm": 0.2775559425354004, |
| "learning_rate": 8e-05, |
| "loss": 1.7971, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.16320246343341033, |
| "grad_norm": 0.2822381556034088, |
| "learning_rate": 8e-05, |
| "loss": 1.7294, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.16342241284504563, |
| "grad_norm": 0.26617857813835144, |
| "learning_rate": 8e-05, |
| "loss": 1.8011, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.16364236225668097, |
| "grad_norm": 0.25615090131759644, |
| "learning_rate": 8e-05, |
| "loss": 1.6328, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.16386231166831627, |
| "grad_norm": 0.25831338763237, |
| "learning_rate": 8e-05, |
| "loss": 1.6174, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.1640822610799516, |
| "grad_norm": 0.2707291543483734, |
| "learning_rate": 8e-05, |
| "loss": 1.8217, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.16430221049158694, |
| "grad_norm": 0.3028862774372101, |
| "learning_rate": 8e-05, |
| "loss": 1.5852, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.16452215990322225, |
| "grad_norm": 0.26598575711250305, |
| "learning_rate": 8e-05, |
| "loss": 1.7213, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.16474210931485758, |
| "grad_norm": 0.27408871054649353, |
| "learning_rate": 8e-05, |
| "loss": 1.7109, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.16496205872649292, |
| "grad_norm": 0.27065837383270264, |
| "learning_rate": 8e-05, |
| "loss": 1.6696, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.16518200813812822, |
| "grad_norm": 0.2721879184246063, |
| "learning_rate": 8e-05, |
| "loss": 1.7055, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.16540195754976356, |
| "grad_norm": 0.29569125175476074, |
| "learning_rate": 8e-05, |
| "loss": 1.5921, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.1656219069613989, |
| "grad_norm": 0.28580978512763977, |
| "learning_rate": 8e-05, |
| "loss": 1.7518, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.1658418563730342, |
| "grad_norm": 0.2869469225406647, |
| "learning_rate": 8e-05, |
| "loss": 1.8164, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.16606180578466953, |
| "grad_norm": 0.2796071171760559, |
| "learning_rate": 8e-05, |
| "loss": 1.8325, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.16628175519630484, |
| "grad_norm": 0.27365031838417053, |
| "learning_rate": 8e-05, |
| "loss": 1.7287, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.16650170460794017, |
| "grad_norm": 0.2524491846561432, |
| "learning_rate": 8e-05, |
| "loss": 1.5379, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.1667216540195755, |
| "grad_norm": 0.259860634803772, |
| "learning_rate": 8e-05, |
| "loss": 1.5204, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.1669416034312108, |
| "grad_norm": 0.2714100182056427, |
| "learning_rate": 8e-05, |
| "loss": 1.7245, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.16716155284284615, |
| "grad_norm": 0.2729417383670807, |
| "learning_rate": 8e-05, |
| "loss": 1.6889, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.16738150225448148, |
| "grad_norm": 0.2753896415233612, |
| "learning_rate": 8e-05, |
| "loss": 1.7345, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.16760145166611679, |
| "grad_norm": 0.2830727994441986, |
| "learning_rate": 8e-05, |
| "loss": 1.6884, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.16782140107775212, |
| "grad_norm": 0.27818116545677185, |
| "learning_rate": 8e-05, |
| "loss": 1.7819, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.16804135048938745, |
| "grad_norm": 0.2601570785045624, |
| "learning_rate": 8e-05, |
| "loss": 1.6323, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.16826129990102276, |
| "grad_norm": 0.2638706564903259, |
| "learning_rate": 8e-05, |
| "loss": 1.5957, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.1684812493126581, |
| "grad_norm": 0.2798631489276886, |
| "learning_rate": 8e-05, |
| "loss": 1.7946, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.1687011987242934, |
| "grad_norm": 0.2975100874900818, |
| "learning_rate": 8e-05, |
| "loss": 1.871, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.16892114813592873, |
| "grad_norm": 0.28308364748954773, |
| "learning_rate": 8e-05, |
| "loss": 1.7184, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.16914109754756407, |
| "grad_norm": 0.2594911456108093, |
| "learning_rate": 8e-05, |
| "loss": 1.5867, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.16936104695919937, |
| "grad_norm": 0.27594470977783203, |
| "learning_rate": 8e-05, |
| "loss": 1.7722, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.1695809963708347, |
| "grad_norm": 0.2783298194408417, |
| "learning_rate": 8e-05, |
| "loss": 1.7891, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.16980094578247004, |
| "grad_norm": 0.2863733172416687, |
| "learning_rate": 8e-05, |
| "loss": 1.6274, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.17002089519410535, |
| "grad_norm": 0.27953147888183594, |
| "learning_rate": 8e-05, |
| "loss": 1.7287, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.17024084460574068, |
| "grad_norm": 0.2736772894859314, |
| "learning_rate": 8e-05, |
| "loss": 1.6802, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.17046079401737602, |
| "grad_norm": 0.27663713693618774, |
| "learning_rate": 8e-05, |
| "loss": 1.6607, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.17068074342901132, |
| "grad_norm": 0.3064086437225342, |
| "learning_rate": 8e-05, |
| "loss": 1.8244, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.17090069284064666, |
| "grad_norm": 0.29848581552505493, |
| "learning_rate": 8e-05, |
| "loss": 1.7702, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.17112064225228196, |
| "grad_norm": 0.3101220726966858, |
| "learning_rate": 8e-05, |
| "loss": 1.7714, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.1713405916639173, |
| "grad_norm": 0.2754581868648529, |
| "learning_rate": 8e-05, |
| "loss": 1.6367, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.17156054107555263, |
| "grad_norm": 0.2706362307071686, |
| "learning_rate": 8e-05, |
| "loss": 1.6236, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.17178049048718794, |
| "grad_norm": 0.29135438799858093, |
| "learning_rate": 8e-05, |
| "loss": 1.8478, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.17200043989882327, |
| "grad_norm": 0.2751868963241577, |
| "learning_rate": 8e-05, |
| "loss": 1.751, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.1722203893104586, |
| "grad_norm": 0.2871004045009613, |
| "learning_rate": 8e-05, |
| "loss": 1.6793, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.1724403387220939, |
| "grad_norm": 0.31024861335754395, |
| "learning_rate": 8e-05, |
| "loss": 1.7419, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.17266028813372924, |
| "grad_norm": 0.2917722165584564, |
| "learning_rate": 8e-05, |
| "loss": 1.8913, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.17288023754536458, |
| "grad_norm": 0.25443291664123535, |
| "learning_rate": 8e-05, |
| "loss": 1.6991, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.17310018695699989, |
| "grad_norm": 0.2827921211719513, |
| "learning_rate": 8e-05, |
| "loss": 1.8408, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.17332013636863522, |
| "grad_norm": 0.26190435886383057, |
| "learning_rate": 8e-05, |
| "loss": 1.6841, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.17354008578027053, |
| "grad_norm": 0.31557098031044006, |
| "learning_rate": 8e-05, |
| "loss": 1.8838, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.17376003519190586, |
| "grad_norm": 0.27622002363204956, |
| "learning_rate": 8e-05, |
| "loss": 1.6558, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.1739799846035412, |
| "grad_norm": 0.3161294758319855, |
| "learning_rate": 8e-05, |
| "loss": 1.5771, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.1741999340151765, |
| "grad_norm": 0.3014603555202484, |
| "learning_rate": 8e-05, |
| "loss": 1.7742, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.17441988342681183, |
| "grad_norm": 0.24996457993984222, |
| "learning_rate": 8e-05, |
| "loss": 1.5667, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.17463983283844717, |
| "grad_norm": 0.29180648922920227, |
| "learning_rate": 8e-05, |
| "loss": 1.7703, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.17485978225008247, |
| "grad_norm": 0.26707547903060913, |
| "learning_rate": 8e-05, |
| "loss": 1.7964, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.1750797316617178, |
| "grad_norm": 0.24924349784851074, |
| "learning_rate": 8e-05, |
| "loss": 1.6619, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.17529968107335314, |
| "grad_norm": 0.29872292280197144, |
| "learning_rate": 8e-05, |
| "loss": 1.8561, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.17551963048498845, |
| "grad_norm": 0.2770175337791443, |
| "learning_rate": 8e-05, |
| "loss": 1.6352, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.17573957989662378, |
| "grad_norm": 0.26890453696250916, |
| "learning_rate": 8e-05, |
| "loss": 1.885, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.1759595293082591, |
| "grad_norm": 0.2830483317375183, |
| "learning_rate": 8e-05, |
| "loss": 1.6029, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.17617947871989442, |
| "grad_norm": 0.27421921491622925, |
| "learning_rate": 8e-05, |
| "loss": 1.6845, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.17639942813152976, |
| "grad_norm": 0.29273220896720886, |
| "learning_rate": 8e-05, |
| "loss": 1.8135, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.17661937754316506, |
| "grad_norm": 0.2675575315952301, |
| "learning_rate": 8e-05, |
| "loss": 1.571, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.1768393269548004, |
| "grad_norm": 0.2821138799190521, |
| "learning_rate": 8e-05, |
| "loss": 1.9244, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.17705927636643573, |
| "grad_norm": 0.28082311153411865, |
| "learning_rate": 8e-05, |
| "loss": 1.7395, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.17727922577807104, |
| "grad_norm": 0.27897313237190247, |
| "learning_rate": 8e-05, |
| "loss": 1.6347, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.17749917518970637, |
| "grad_norm": 0.27358707785606384, |
| "learning_rate": 8e-05, |
| "loss": 1.7643, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.1777191246013417, |
| "grad_norm": 0.284059077501297, |
| "learning_rate": 8e-05, |
| "loss": 1.5789, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.177939074012977, |
| "grad_norm": 0.26125824451446533, |
| "learning_rate": 8e-05, |
| "loss": 1.7029, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.17815902342461234, |
| "grad_norm": 0.26438888907432556, |
| "learning_rate": 8e-05, |
| "loss": 1.6424, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.17837897283624765, |
| "grad_norm": 0.2746163010597229, |
| "learning_rate": 8e-05, |
| "loss": 1.7992, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.17859892224788299, |
| "grad_norm": 0.27717527747154236, |
| "learning_rate": 8e-05, |
| "loss": 1.7603, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.17881887165951832, |
| "grad_norm": 0.28336596488952637, |
| "learning_rate": 8e-05, |
| "loss": 1.7133, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.17903882107115363, |
| "grad_norm": 0.2701306939125061, |
| "learning_rate": 8e-05, |
| "loss": 1.7724, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.17925877048278896, |
| "grad_norm": 0.2807336449623108, |
| "learning_rate": 8e-05, |
| "loss": 1.7832, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.1794787198944243, |
| "grad_norm": 0.2847912907600403, |
| "learning_rate": 8e-05, |
| "loss": 1.7042, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.1796986693060596, |
| "grad_norm": 0.2836345434188843, |
| "learning_rate": 8e-05, |
| "loss": 1.8506, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.17991861871769493, |
| "grad_norm": 0.30620551109313965, |
| "learning_rate": 8e-05, |
| "loss": 1.7695, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.18013856812933027, |
| "grad_norm": 0.2698993980884552, |
| "learning_rate": 8e-05, |
| "loss": 1.6388, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.18035851754096557, |
| "grad_norm": 0.2937266528606415, |
| "learning_rate": 8e-05, |
| "loss": 1.8648, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.1805784669526009, |
| "grad_norm": 0.2661988139152527, |
| "learning_rate": 8e-05, |
| "loss": 1.7563, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.18079841636423621, |
| "grad_norm": 0.2944018840789795, |
| "learning_rate": 8e-05, |
| "loss": 1.882, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.18101836577587155, |
| "grad_norm": 0.2774435579776764, |
| "learning_rate": 8e-05, |
| "loss": 1.8117, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.18123831518750688, |
| "grad_norm": 0.27865204215049744, |
| "learning_rate": 8e-05, |
| "loss": 1.8815, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.1814582645991422, |
| "grad_norm": 0.26444011926651, |
| "learning_rate": 8e-05, |
| "loss": 1.5844, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.18167821401077752, |
| "grad_norm": 0.27044716477394104, |
| "learning_rate": 8e-05, |
| "loss": 1.7403, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.18189816342241286, |
| "grad_norm": 0.28727805614471436, |
| "learning_rate": 8e-05, |
| "loss": 1.8556, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.18211811283404816, |
| "grad_norm": 0.26131972670555115, |
| "learning_rate": 8e-05, |
| "loss": 1.7727, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.1823380622456835, |
| "grad_norm": 0.269638329744339, |
| "learning_rate": 8e-05, |
| "loss": 1.6795, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.18255801165731883, |
| "grad_norm": 0.2671653628349304, |
| "learning_rate": 8e-05, |
| "loss": 1.6811, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.18277796106895414, |
| "grad_norm": 0.2659014165401459, |
| "learning_rate": 8e-05, |
| "loss": 1.7166, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.18299791048058947, |
| "grad_norm": 0.2719801962375641, |
| "learning_rate": 8e-05, |
| "loss": 1.6938, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.18321785989222478, |
| "grad_norm": 0.3272366225719452, |
| "learning_rate": 8e-05, |
| "loss": 1.8213, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.1834378093038601, |
| "grad_norm": 0.2635113000869751, |
| "learning_rate": 8e-05, |
| "loss": 1.6291, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.18365775871549544, |
| "grad_norm": 0.29401281476020813, |
| "learning_rate": 8e-05, |
| "loss": 1.8234, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.18387770812713075, |
| "grad_norm": 0.29188451170921326, |
| "learning_rate": 8e-05, |
| "loss": 1.7359, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.18409765753876609, |
| "grad_norm": 0.2688080072402954, |
| "learning_rate": 8e-05, |
| "loss": 1.7088, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.18431760695040142, |
| "grad_norm": 0.27907344698905945, |
| "learning_rate": 8e-05, |
| "loss": 1.6762, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.18453755636203673, |
| "grad_norm": 0.2875908315181732, |
| "learning_rate": 8e-05, |
| "loss": 1.7612, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.18475750577367206, |
| "grad_norm": 0.2683177888393402, |
| "learning_rate": 8e-05, |
| "loss": 1.5965, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.18497745518530737, |
| "grad_norm": 0.29948660731315613, |
| "learning_rate": 8e-05, |
| "loss": 1.7358, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.1851974045969427, |
| "grad_norm": 0.28153204917907715, |
| "learning_rate": 8e-05, |
| "loss": 1.8089, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.18541735400857803, |
| "grad_norm": 0.29185283184051514, |
| "learning_rate": 8e-05, |
| "loss": 1.8357, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.18563730342021334, |
| "grad_norm": 0.27565860748291016, |
| "learning_rate": 8e-05, |
| "loss": 1.9565, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.18585725283184867, |
| "grad_norm": 0.2811479866504669, |
| "learning_rate": 8e-05, |
| "loss": 1.8493, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.186077202243484, |
| "grad_norm": 0.271893173456192, |
| "learning_rate": 8e-05, |
| "loss": 1.7622, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.18629715165511931, |
| "grad_norm": 0.26383113861083984, |
| "learning_rate": 8e-05, |
| "loss": 1.7392, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.18651710106675465, |
| "grad_norm": 0.2863881289958954, |
| "learning_rate": 8e-05, |
| "loss": 1.7367, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.18673705047838998, |
| "grad_norm": 0.28036433458328247, |
| "learning_rate": 8e-05, |
| "loss": 1.6587, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.1869569998900253, |
| "grad_norm": 0.2938581705093384, |
| "learning_rate": 8e-05, |
| "loss": 1.7411, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.18717694930166062, |
| "grad_norm": 0.27487799525260925, |
| "learning_rate": 8e-05, |
| "loss": 1.8054, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.18739689871329593, |
| "grad_norm": 0.2693670690059662, |
| "learning_rate": 8e-05, |
| "loss": 1.7361, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.18761684812493126, |
| "grad_norm": 0.2999705970287323, |
| "learning_rate": 8e-05, |
| "loss": 1.909, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.1878367975365666, |
| "grad_norm": 0.28235265612602234, |
| "learning_rate": 8e-05, |
| "loss": 1.8611, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.1880567469482019, |
| "grad_norm": 0.28417298197746277, |
| "learning_rate": 8e-05, |
| "loss": 1.683, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.18827669635983724, |
| "grad_norm": 0.2697356045246124, |
| "learning_rate": 8e-05, |
| "loss": 1.7138, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.18849664577147257, |
| "grad_norm": 0.26900357007980347, |
| "learning_rate": 8e-05, |
| "loss": 1.5579, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.18871659518310788, |
| "grad_norm": 0.259941041469574, |
| "learning_rate": 8e-05, |
| "loss": 1.7106, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.1889365445947432, |
| "grad_norm": 0.26958781480789185, |
| "learning_rate": 8e-05, |
| "loss": 1.6454, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.18915649400637854, |
| "grad_norm": 0.26425305008888245, |
| "learning_rate": 8e-05, |
| "loss": 1.6408, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.18937644341801385, |
| "grad_norm": 0.26996907591819763, |
| "learning_rate": 8e-05, |
| "loss": 1.6949, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.18959639282964919, |
| "grad_norm": 0.25882837176322937, |
| "learning_rate": 8e-05, |
| "loss": 1.6142, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.1898163422412845, |
| "grad_norm": 0.28000783920288086, |
| "learning_rate": 8e-05, |
| "loss": 1.8007, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.19003629165291983, |
| "grad_norm": 0.2744222581386566, |
| "learning_rate": 8e-05, |
| "loss": 1.6604, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.19025624106455516, |
| "grad_norm": 0.2791576683521271, |
| "learning_rate": 8e-05, |
| "loss": 1.7061, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.19047619047619047, |
| "grad_norm": 0.27878084778785706, |
| "learning_rate": 8e-05, |
| "loss": 1.8604, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.1906961398878258, |
| "grad_norm": 0.3818608820438385, |
| "learning_rate": 8e-05, |
| "loss": 1.8616, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.19091608929946113, |
| "grad_norm": 0.27952665090560913, |
| "learning_rate": 8e-05, |
| "loss": 1.7616, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.19113603871109644, |
| "grad_norm": 0.2711832523345947, |
| "learning_rate": 8e-05, |
| "loss": 1.7974, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.19135598812273177, |
| "grad_norm": 0.2572176456451416, |
| "learning_rate": 8e-05, |
| "loss": 1.584, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.1915759375343671, |
| "grad_norm": 0.2847760319709778, |
| "learning_rate": 8e-05, |
| "loss": 1.8598, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.19179588694600241, |
| "grad_norm": 0.29798731207847595, |
| "learning_rate": 8e-05, |
| "loss": 1.6689, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.19201583635763775, |
| "grad_norm": 0.2674097716808319, |
| "learning_rate": 8e-05, |
| "loss": 1.6694, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.19223578576927305, |
| "grad_norm": 0.27707335352897644, |
| "learning_rate": 8e-05, |
| "loss": 1.7251, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.1924557351809084, |
| "grad_norm": 0.2801666259765625, |
| "learning_rate": 8e-05, |
| "loss": 1.7251, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.19267568459254372, |
| "grad_norm": 0.2656191885471344, |
| "learning_rate": 8e-05, |
| "loss": 1.6232, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.19289563400417903, |
| "grad_norm": 0.2588733732700348, |
| "learning_rate": 8e-05, |
| "loss": 1.7595, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.19311558341581436, |
| "grad_norm": 0.2999958097934723, |
| "learning_rate": 8e-05, |
| "loss": 1.9095, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.1933355328274497, |
| "grad_norm": 0.27143120765686035, |
| "learning_rate": 8e-05, |
| "loss": 1.6698, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.193555482239085, |
| "grad_norm": 0.29155731201171875, |
| "learning_rate": 8e-05, |
| "loss": 1.6437, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.19377543165072034, |
| "grad_norm": 0.26307716965675354, |
| "learning_rate": 8e-05, |
| "loss": 1.6161, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.19399538106235567, |
| "grad_norm": 0.27041196823120117, |
| "learning_rate": 8e-05, |
| "loss": 1.5374, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.19421533047399098, |
| "grad_norm": 0.2752692699432373, |
| "learning_rate": 8e-05, |
| "loss": 1.6543, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.1944352798856263, |
| "grad_norm": 0.2883388102054596, |
| "learning_rate": 8e-05, |
| "loss": 1.7503, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.19465522929726162, |
| "grad_norm": 0.27332282066345215, |
| "learning_rate": 8e-05, |
| "loss": 1.8456, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.19487517870889695, |
| "grad_norm": 0.26226627826690674, |
| "learning_rate": 8e-05, |
| "loss": 1.2577, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.19509512812053229, |
| "grad_norm": 0.2709749639034271, |
| "learning_rate": 8e-05, |
| "loss": 1.7854, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.1953150775321676, |
| "grad_norm": 0.28380879759788513, |
| "learning_rate": 8e-05, |
| "loss": 1.7151, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.19553502694380293, |
| "grad_norm": 0.2702254354953766, |
| "learning_rate": 8e-05, |
| "loss": 1.6779, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.19575497635543826, |
| "grad_norm": 0.2620486617088318, |
| "learning_rate": 8e-05, |
| "loss": 1.7166, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.19597492576707357, |
| "grad_norm": 0.27195873856544495, |
| "learning_rate": 8e-05, |
| "loss": 1.7263, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.1961948751787089, |
| "grad_norm": 0.2719867527484894, |
| "learning_rate": 8e-05, |
| "loss": 1.6982, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.19641482459034423, |
| "grad_norm": 0.27889111638069153, |
| "learning_rate": 8e-05, |
| "loss": 1.7726, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.19663477400197954, |
| "grad_norm": 0.2745397686958313, |
| "learning_rate": 8e-05, |
| "loss": 1.7737, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.19685472341361487, |
| "grad_norm": 0.2698670029640198, |
| "learning_rate": 8e-05, |
| "loss": 1.7149, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.19707467282525018, |
| "grad_norm": 0.27113667130470276, |
| "learning_rate": 8e-05, |
| "loss": 1.7887, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.19729462223688551, |
| "grad_norm": 0.2772979140281677, |
| "learning_rate": 8e-05, |
| "loss": 1.8163, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.19751457164852085, |
| "grad_norm": 0.2757657766342163, |
| "learning_rate": 8e-05, |
| "loss": 1.636, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.19773452106015615, |
| "grad_norm": 0.26945242285728455, |
| "learning_rate": 8e-05, |
| "loss": 1.7639, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.1979544704717915, |
| "grad_norm": 0.27328991889953613, |
| "learning_rate": 8e-05, |
| "loss": 1.8421, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.19817441988342682, |
| "grad_norm": 0.2721468210220337, |
| "learning_rate": 8e-05, |
| "loss": 1.6926, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.19839436929506213, |
| "grad_norm": 0.2633766233921051, |
| "learning_rate": 8e-05, |
| "loss": 1.7629, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.19861431870669746, |
| "grad_norm": 0.26183879375457764, |
| "learning_rate": 8e-05, |
| "loss": 1.7149, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.1988342681183328, |
| "grad_norm": 0.2837960422039032, |
| "learning_rate": 8e-05, |
| "loss": 1.7923, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.1990542175299681, |
| "grad_norm": 0.30745571851730347, |
| "learning_rate": 8e-05, |
| "loss": 1.8315, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.19927416694160344, |
| "grad_norm": 0.2734341323375702, |
| "learning_rate": 8e-05, |
| "loss": 1.7424, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.19949411635323874, |
| "grad_norm": 0.2613460123538971, |
| "learning_rate": 8e-05, |
| "loss": 1.6445, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.19971406576487408, |
| "grad_norm": 0.27867522835731506, |
| "learning_rate": 8e-05, |
| "loss": 1.7075, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.1999340151765094, |
| "grad_norm": 0.269789457321167, |
| "learning_rate": 8e-05, |
| "loss": 1.6801, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.20015396458814472, |
| "grad_norm": 0.2684427797794342, |
| "learning_rate": 8e-05, |
| "loss": 1.6862, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.20037391399978005, |
| "grad_norm": 0.2929883897304535, |
| "learning_rate": 8e-05, |
| "loss": 1.7972, |
| "step": 911 |
| }, |
| { |
| "epoch": 0.20059386341141539, |
| "grad_norm": 0.2757764756679535, |
| "learning_rate": 8e-05, |
| "loss": 1.6198, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.2008138128230507, |
| "grad_norm": 0.28071129322052, |
| "learning_rate": 8e-05, |
| "loss": 1.8268, |
| "step": 913 |
| }, |
| { |
| "epoch": 0.20103376223468603, |
| "grad_norm": 0.2964448928833008, |
| "learning_rate": 8e-05, |
| "loss": 1.7806, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.20125371164632136, |
| "grad_norm": 0.2682490050792694, |
| "learning_rate": 8e-05, |
| "loss": 1.7016, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.20147366105795667, |
| "grad_norm": 0.2838338613510132, |
| "learning_rate": 8e-05, |
| "loss": 1.7402, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.201693610469592, |
| "grad_norm": 0.27621790766716003, |
| "learning_rate": 8e-05, |
| "loss": 1.7442, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.2019135598812273, |
| "grad_norm": 0.29265734553337097, |
| "learning_rate": 8e-05, |
| "loss": 1.6924, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.20213350929286264, |
| "grad_norm": 0.27404630184173584, |
| "learning_rate": 8e-05, |
| "loss": 1.7312, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.20235345870449797, |
| "grad_norm": 0.2742730975151062, |
| "learning_rate": 8e-05, |
| "loss": 1.8645, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.20257340811613328, |
| "grad_norm": 0.28536343574523926, |
| "learning_rate": 8e-05, |
| "loss": 1.717, |
| "step": 921 |
| }, |
| { |
| "epoch": 0.20279335752776861, |
| "grad_norm": 0.28739288449287415, |
| "learning_rate": 8e-05, |
| "loss": 1.8356, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.20301330693940395, |
| "grad_norm": 0.2650564908981323, |
| "learning_rate": 8e-05, |
| "loss": 1.717, |
| "step": 923 |
| }, |
| { |
| "epoch": 0.20323325635103925, |
| "grad_norm": 0.28638410568237305, |
| "learning_rate": 8e-05, |
| "loss": 1.8822, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.2034532057626746, |
| "grad_norm": 0.25474488735198975, |
| "learning_rate": 8e-05, |
| "loss": 1.5533, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.20367315517430992, |
| "grad_norm": 0.2719588279724121, |
| "learning_rate": 8e-05, |
| "loss": 1.7887, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.20389310458594523, |
| "grad_norm": 0.2572193741798401, |
| "learning_rate": 8e-05, |
| "loss": 1.5735, |
| "step": 927 |
| }, |
| { |
| "epoch": 0.20411305399758056, |
| "grad_norm": 0.2975933253765106, |
| "learning_rate": 8e-05, |
| "loss": 1.7875, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.20433300340921587, |
| "grad_norm": 0.2562117874622345, |
| "learning_rate": 8e-05, |
| "loss": 1.5977, |
| "step": 929 |
| }, |
| { |
| "epoch": 0.2045529528208512, |
| "grad_norm": 0.2524821162223816, |
| "learning_rate": 8e-05, |
| "loss": 1.6356, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.20477290223248654, |
| "grad_norm": 0.2621130347251892, |
| "learning_rate": 8e-05, |
| "loss": 1.6082, |
| "step": 931 |
| }, |
| { |
| "epoch": 0.20499285164412184, |
| "grad_norm": 0.27930355072021484, |
| "learning_rate": 8e-05, |
| "loss": 1.7618, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.20521280105575718, |
| "grad_norm": 0.29147934913635254, |
| "learning_rate": 8e-05, |
| "loss": 1.8223, |
| "step": 933 |
| }, |
| { |
| "epoch": 0.2054327504673925, |
| "grad_norm": 0.2584928870201111, |
| "learning_rate": 8e-05, |
| "loss": 1.6916, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.20565269987902782, |
| "grad_norm": 0.27299705147743225, |
| "learning_rate": 8e-05, |
| "loss": 1.5535, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.20587264929066315, |
| "grad_norm": 0.2682443857192993, |
| "learning_rate": 8e-05, |
| "loss": 1.7119, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.20609259870229849, |
| "grad_norm": 0.29716598987579346, |
| "learning_rate": 8e-05, |
| "loss": 2.0561, |
| "step": 937 |
| }, |
| { |
| "epoch": 0.2063125481139338, |
| "grad_norm": 0.27801281213760376, |
| "learning_rate": 8e-05, |
| "loss": 1.7605, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.20653249752556913, |
| "grad_norm": 0.26767662167549133, |
| "learning_rate": 8e-05, |
| "loss": 1.6462, |
| "step": 939 |
| }, |
| { |
| "epoch": 0.20675244693720443, |
| "grad_norm": 0.27354639768600464, |
| "learning_rate": 8e-05, |
| "loss": 1.7241, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.20697239634883977, |
| "grad_norm": 0.2684631049633026, |
| "learning_rate": 8e-05, |
| "loss": 1.8066, |
| "step": 941 |
| }, |
| { |
| "epoch": 0.2071923457604751, |
| "grad_norm": 0.27846816182136536, |
| "learning_rate": 8e-05, |
| "loss": 1.7553, |
| "step": 942 |
| }, |
| { |
| "epoch": 0.2074122951721104, |
| "grad_norm": 0.2820284366607666, |
| "learning_rate": 8e-05, |
| "loss": 1.8302, |
| "step": 943 |
| }, |
| { |
| "epoch": 0.20763224458374574, |
| "grad_norm": 0.28080835938453674, |
| "learning_rate": 8e-05, |
| "loss": 1.7544, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.20785219399538107, |
| "grad_norm": 0.28095102310180664, |
| "learning_rate": 8e-05, |
| "loss": 1.7271, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.20807214340701638, |
| "grad_norm": 0.27856144309043884, |
| "learning_rate": 8e-05, |
| "loss": 1.8441, |
| "step": 946 |
| }, |
| { |
| "epoch": 0.20829209281865171, |
| "grad_norm": 0.27816230058670044, |
| "learning_rate": 8e-05, |
| "loss": 1.981, |
| "step": 947 |
| }, |
| { |
| "epoch": 0.20851204223028702, |
| "grad_norm": 0.2954215705394745, |
| "learning_rate": 8e-05, |
| "loss": 1.8001, |
| "step": 948 |
| }, |
| { |
| "epoch": 0.20873199164192235, |
| "grad_norm": 0.24413350224494934, |
| "learning_rate": 8e-05, |
| "loss": 1.5436, |
| "step": 949 |
| }, |
| { |
| "epoch": 0.2089519410535577, |
| "grad_norm": 0.2849874198436737, |
| "learning_rate": 8e-05, |
| "loss": 1.7027, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.209171890465193, |
| "grad_norm": 0.2710252106189728, |
| "learning_rate": 8e-05, |
| "loss": 1.7222, |
| "step": 951 |
| }, |
| { |
| "epoch": 0.20939183987682833, |
| "grad_norm": 0.2557348608970642, |
| "learning_rate": 8e-05, |
| "loss": 1.6469, |
| "step": 952 |
| }, |
| { |
| "epoch": 0.20961178928846366, |
| "grad_norm": 0.2688618004322052, |
| "learning_rate": 8e-05, |
| "loss": 1.6471, |
| "step": 953 |
| }, |
| { |
| "epoch": 0.20983173870009897, |
| "grad_norm": 0.28641626238822937, |
| "learning_rate": 8e-05, |
| "loss": 1.8796, |
| "step": 954 |
| }, |
| { |
| "epoch": 0.2100516881117343, |
| "grad_norm": 0.2582222521305084, |
| "learning_rate": 8e-05, |
| "loss": 1.5961, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.21027163752336964, |
| "grad_norm": 0.2615504562854767, |
| "learning_rate": 8e-05, |
| "loss": 1.6668, |
| "step": 956 |
| }, |
| { |
| "epoch": 0.21049158693500494, |
| "grad_norm": 0.2669670879840851, |
| "learning_rate": 8e-05, |
| "loss": 1.663, |
| "step": 957 |
| }, |
| { |
| "epoch": 0.21071153634664028, |
| "grad_norm": 0.2649092972278595, |
| "learning_rate": 8e-05, |
| "loss": 1.5377, |
| "step": 958 |
| }, |
| { |
| "epoch": 0.21093148575827558, |
| "grad_norm": 0.2936461865901947, |
| "learning_rate": 8e-05, |
| "loss": 1.5659, |
| "step": 959 |
| }, |
| { |
| "epoch": 0.21115143516991092, |
| "grad_norm": 0.2878846824169159, |
| "learning_rate": 8e-05, |
| "loss": 1.8567, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.21137138458154625, |
| "grad_norm": 0.2928799092769623, |
| "learning_rate": 8e-05, |
| "loss": 1.8423, |
| "step": 961 |
| }, |
| { |
| "epoch": 0.21159133399318156, |
| "grad_norm": 0.2641200125217438, |
| "learning_rate": 8e-05, |
| "loss": 1.6403, |
| "step": 962 |
| }, |
| { |
| "epoch": 0.2118112834048169, |
| "grad_norm": 0.26553985476493835, |
| "learning_rate": 8e-05, |
| "loss": 1.7436, |
| "step": 963 |
| }, |
| { |
| "epoch": 0.21203123281645223, |
| "grad_norm": 0.25616276264190674, |
| "learning_rate": 8e-05, |
| "loss": 1.5959, |
| "step": 964 |
| }, |
| { |
| "epoch": 0.21225118222808753, |
| "grad_norm": 0.29729175567626953, |
| "learning_rate": 8e-05, |
| "loss": 1.7164, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.21247113163972287, |
| "grad_norm": 0.2739759683609009, |
| "learning_rate": 8e-05, |
| "loss": 1.7797, |
| "step": 966 |
| }, |
| { |
| "epoch": 0.2126910810513582, |
| "grad_norm": 0.2686353921890259, |
| "learning_rate": 8e-05, |
| "loss": 1.6974, |
| "step": 967 |
| }, |
| { |
| "epoch": 0.2129110304629935, |
| "grad_norm": 0.261820912361145, |
| "learning_rate": 8e-05, |
| "loss": 1.6864, |
| "step": 968 |
| }, |
| { |
| "epoch": 0.21313097987462884, |
| "grad_norm": 0.26877105236053467, |
| "learning_rate": 8e-05, |
| "loss": 1.6164, |
| "step": 969 |
| }, |
| { |
| "epoch": 0.21335092928626415, |
| "grad_norm": 0.2555043399333954, |
| "learning_rate": 8e-05, |
| "loss": 1.6898, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.21357087869789948, |
| "grad_norm": 0.28584909439086914, |
| "learning_rate": 8e-05, |
| "loss": 1.9125, |
| "step": 971 |
| }, |
| { |
| "epoch": 0.21379082810953481, |
| "grad_norm": 0.2830945551395416, |
| "learning_rate": 8e-05, |
| "loss": 1.6416, |
| "step": 972 |
| }, |
| { |
| "epoch": 0.21401077752117012, |
| "grad_norm": 0.27979904413223267, |
| "learning_rate": 8e-05, |
| "loss": 1.8355, |
| "step": 973 |
| }, |
| { |
| "epoch": 0.21423072693280545, |
| "grad_norm": 0.2672286033630371, |
| "learning_rate": 8e-05, |
| "loss": 1.685, |
| "step": 974 |
| }, |
| { |
| "epoch": 0.2144506763444408, |
| "grad_norm": 0.26699069142341614, |
| "learning_rate": 8e-05, |
| "loss": 1.5951, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.2146706257560761, |
| "grad_norm": 0.2720418870449066, |
| "learning_rate": 8e-05, |
| "loss": 1.7558, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.21489057516771143, |
| "grad_norm": 0.26792463660240173, |
| "learning_rate": 8e-05, |
| "loss": 1.7407, |
| "step": 977 |
| }, |
| { |
| "epoch": 0.21511052457934676, |
| "grad_norm": 0.2763652503490448, |
| "learning_rate": 8e-05, |
| "loss": 1.7525, |
| "step": 978 |
| }, |
| { |
| "epoch": 0.21533047399098207, |
| "grad_norm": 0.2952554225921631, |
| "learning_rate": 8e-05, |
| "loss": 1.6535, |
| "step": 979 |
| }, |
| { |
| "epoch": 0.2155504234026174, |
| "grad_norm": 0.24981874227523804, |
| "learning_rate": 8e-05, |
| "loss": 1.6055, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.2157703728142527, |
| "grad_norm": 0.29071807861328125, |
| "learning_rate": 8e-05, |
| "loss": 1.7461, |
| "step": 981 |
| }, |
| { |
| "epoch": 0.21599032222588804, |
| "grad_norm": 0.26875782012939453, |
| "learning_rate": 8e-05, |
| "loss": 1.5809, |
| "step": 982 |
| }, |
| { |
| "epoch": 0.21621027163752338, |
| "grad_norm": 0.2519072890281677, |
| "learning_rate": 8e-05, |
| "loss": 1.7001, |
| "step": 983 |
| }, |
| { |
| "epoch": 0.21643022104915868, |
| "grad_norm": 0.2748781144618988, |
| "learning_rate": 8e-05, |
| "loss": 1.8367, |
| "step": 984 |
| }, |
| { |
| "epoch": 0.21665017046079402, |
| "grad_norm": 0.274047315120697, |
| "learning_rate": 8e-05, |
| "loss": 1.7698, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.21687011987242935, |
| "grad_norm": 0.2614712119102478, |
| "learning_rate": 8e-05, |
| "loss": 1.5411, |
| "step": 986 |
| }, |
| { |
| "epoch": 0.21709006928406466, |
| "grad_norm": 0.2714536190032959, |
| "learning_rate": 8e-05, |
| "loss": 1.6058, |
| "step": 987 |
| }, |
| { |
| "epoch": 0.2173100186957, |
| "grad_norm": 0.28763729333877563, |
| "learning_rate": 8e-05, |
| "loss": 1.6711, |
| "step": 988 |
| }, |
| { |
| "epoch": 0.21752996810733533, |
| "grad_norm": 0.26780402660369873, |
| "learning_rate": 8e-05, |
| "loss": 1.549, |
| "step": 989 |
| }, |
| { |
| "epoch": 0.21774991751897063, |
| "grad_norm": 0.28782159090042114, |
| "learning_rate": 8e-05, |
| "loss": 1.8305, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.21796986693060597, |
| "grad_norm": 0.2859013080596924, |
| "learning_rate": 8e-05, |
| "loss": 1.7794, |
| "step": 991 |
| }, |
| { |
| "epoch": 0.21818981634224127, |
| "grad_norm": 0.2893369197845459, |
| "learning_rate": 8e-05, |
| "loss": 1.6284, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.2184097657538766, |
| "grad_norm": 0.2809627652168274, |
| "learning_rate": 8e-05, |
| "loss": 1.6401, |
| "step": 993 |
| }, |
| { |
| "epoch": 0.21862971516551194, |
| "grad_norm": 0.2700895667076111, |
| "learning_rate": 8e-05, |
| "loss": 1.7153, |
| "step": 994 |
| }, |
| { |
| "epoch": 0.21884966457714725, |
| "grad_norm": 0.26506903767585754, |
| "learning_rate": 8e-05, |
| "loss": 1.4688, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.21906961398878258, |
| "grad_norm": 0.28202024102211, |
| "learning_rate": 8e-05, |
| "loss": 1.7009, |
| "step": 996 |
| }, |
| { |
| "epoch": 0.21928956340041791, |
| "grad_norm": 0.2625409960746765, |
| "learning_rate": 8e-05, |
| "loss": 1.6491, |
| "step": 997 |
| }, |
| { |
| "epoch": 0.21950951281205322, |
| "grad_norm": 0.29967787861824036, |
| "learning_rate": 8e-05, |
| "loss": 1.7231, |
| "step": 998 |
| }, |
| { |
| "epoch": 0.21972946222368855, |
| "grad_norm": 0.2992357909679413, |
| "learning_rate": 8e-05, |
| "loss": 1.7028, |
| "step": 999 |
| }, |
| { |
| "epoch": 0.2199494116353239, |
| "grad_norm": 0.28712475299835205, |
| "learning_rate": 8e-05, |
| "loss": 1.763, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.2201693610469592, |
| "grad_norm": 0.26186901330947876, |
| "learning_rate": 8e-05, |
| "loss": 1.5695, |
| "step": 1001 |
| }, |
| { |
| "epoch": 0.22038931045859453, |
| "grad_norm": 0.2897952198982239, |
| "learning_rate": 8e-05, |
| "loss": 1.6303, |
| "step": 1002 |
| }, |
| { |
| "epoch": 0.22060925987022983, |
| "grad_norm": 0.2761494815349579, |
| "learning_rate": 8e-05, |
| "loss": 1.7448, |
| "step": 1003 |
| }, |
| { |
| "epoch": 0.22082920928186517, |
| "grad_norm": 0.2604154944419861, |
| "learning_rate": 8e-05, |
| "loss": 1.53, |
| "step": 1004 |
| }, |
| { |
| "epoch": 0.2210491586935005, |
| "grad_norm": 0.2897418737411499, |
| "learning_rate": 8e-05, |
| "loss": 1.7639, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.2212691081051358, |
| "grad_norm": 0.28289687633514404, |
| "learning_rate": 8e-05, |
| "loss": 1.7202, |
| "step": 1006 |
| }, |
| { |
| "epoch": 0.22148905751677114, |
| "grad_norm": 0.26917099952697754, |
| "learning_rate": 8e-05, |
| "loss": 1.7183, |
| "step": 1007 |
| }, |
| { |
| "epoch": 0.22170900692840648, |
| "grad_norm": 0.26708024740219116, |
| "learning_rate": 8e-05, |
| "loss": 1.636, |
| "step": 1008 |
| }, |
| { |
| "epoch": 0.22192895634004178, |
| "grad_norm": 0.2759459316730499, |
| "learning_rate": 8e-05, |
| "loss": 1.6537, |
| "step": 1009 |
| }, |
| { |
| "epoch": 0.22214890575167712, |
| "grad_norm": 0.3040393590927124, |
| "learning_rate": 8e-05, |
| "loss": 1.7849, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.22236885516331245, |
| "grad_norm": 0.2729750871658325, |
| "learning_rate": 8e-05, |
| "loss": 1.8199, |
| "step": 1011 |
| }, |
| { |
| "epoch": 0.22258880457494776, |
| "grad_norm": 0.28002965450286865, |
| "learning_rate": 8e-05, |
| "loss": 1.8286, |
| "step": 1012 |
| }, |
| { |
| "epoch": 0.2228087539865831, |
| "grad_norm": 0.27389100193977356, |
| "learning_rate": 8e-05, |
| "loss": 1.6472, |
| "step": 1013 |
| }, |
| { |
| "epoch": 0.2230287033982184, |
| "grad_norm": 0.2610195279121399, |
| "learning_rate": 8e-05, |
| "loss": 1.6096, |
| "step": 1014 |
| }, |
| { |
| "epoch": 0.22324865280985373, |
| "grad_norm": 0.2683162987232208, |
| "learning_rate": 8e-05, |
| "loss": 1.6477, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.22346860222148907, |
| "grad_norm": 0.26524773240089417, |
| "learning_rate": 8e-05, |
| "loss": 1.6224, |
| "step": 1016 |
| }, |
| { |
| "epoch": 0.22368855163312437, |
| "grad_norm": 0.26295366883277893, |
| "learning_rate": 8e-05, |
| "loss": 1.7316, |
| "step": 1017 |
| }, |
| { |
| "epoch": 0.2239085010447597, |
| "grad_norm": 0.2837565243244171, |
| "learning_rate": 8e-05, |
| "loss": 1.9452, |
| "step": 1018 |
| }, |
| { |
| "epoch": 0.22412845045639504, |
| "grad_norm": 0.28365132212638855, |
| "learning_rate": 8e-05, |
| "loss": 1.6577, |
| "step": 1019 |
| }, |
| { |
| "epoch": 0.22434839986803035, |
| "grad_norm": 0.2736522853374481, |
| "learning_rate": 8e-05, |
| "loss": 1.6644, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.22456834927966568, |
| "grad_norm": 0.2878374755382538, |
| "learning_rate": 8e-05, |
| "loss": 1.5844, |
| "step": 1021 |
| }, |
| { |
| "epoch": 0.22478829869130101, |
| "grad_norm": 0.28223422169685364, |
| "learning_rate": 8e-05, |
| "loss": 1.881, |
| "step": 1022 |
| }, |
| { |
| "epoch": 0.22500824810293632, |
| "grad_norm": 0.26408734917640686, |
| "learning_rate": 8e-05, |
| "loss": 1.6201, |
| "step": 1023 |
| }, |
| { |
| "epoch": 0.22522819751457165, |
| "grad_norm": 0.28506824374198914, |
| "learning_rate": 8e-05, |
| "loss": 1.8146, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.22544814692620696, |
| "grad_norm": 0.2808188796043396, |
| "learning_rate": 8e-05, |
| "loss": 1.8394, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.2256680963378423, |
| "grad_norm": 0.2950645387172699, |
| "learning_rate": 8e-05, |
| "loss": 1.7993, |
| "step": 1026 |
| }, |
| { |
| "epoch": 0.22588804574947763, |
| "grad_norm": 0.27935850620269775, |
| "learning_rate": 8e-05, |
| "loss": 1.6506, |
| "step": 1027 |
| }, |
| { |
| "epoch": 0.22610799516111293, |
| "grad_norm": 0.2576957643032074, |
| "learning_rate": 8e-05, |
| "loss": 1.6987, |
| "step": 1028 |
| }, |
| { |
| "epoch": 0.22632794457274827, |
| "grad_norm": 0.2719384729862213, |
| "learning_rate": 8e-05, |
| "loss": 1.6407, |
| "step": 1029 |
| }, |
| { |
| "epoch": 0.2265478939843836, |
| "grad_norm": 0.25457167625427246, |
| "learning_rate": 8e-05, |
| "loss": 1.6877, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.2267678433960189, |
| "grad_norm": 0.2758035659790039, |
| "learning_rate": 8e-05, |
| "loss": 1.6739, |
| "step": 1031 |
| }, |
| { |
| "epoch": 0.22698779280765424, |
| "grad_norm": 0.27135321497917175, |
| "learning_rate": 8e-05, |
| "loss": 1.7124, |
| "step": 1032 |
| }, |
| { |
| "epoch": 0.22720774221928958, |
| "grad_norm": 0.2675740420818329, |
| "learning_rate": 8e-05, |
| "loss": 1.7857, |
| "step": 1033 |
| }, |
| { |
| "epoch": 0.22742769163092488, |
| "grad_norm": 0.28627943992614746, |
| "learning_rate": 8e-05, |
| "loss": 1.7012, |
| "step": 1034 |
| }, |
| { |
| "epoch": 0.22764764104256022, |
| "grad_norm": 0.2710109353065491, |
| "learning_rate": 8e-05, |
| "loss": 1.6463, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.22786759045419552, |
| "grad_norm": 0.27190473675727844, |
| "learning_rate": 8e-05, |
| "loss": 1.7288, |
| "step": 1036 |
| }, |
| { |
| "epoch": 0.22808753986583086, |
| "grad_norm": 0.2503564953804016, |
| "learning_rate": 8e-05, |
| "loss": 1.566, |
| "step": 1037 |
| }, |
| { |
| "epoch": 0.2283074892774662, |
| "grad_norm": 0.26503992080688477, |
| "learning_rate": 8e-05, |
| "loss": 1.7034, |
| "step": 1038 |
| }, |
| { |
| "epoch": 0.2285274386891015, |
| "grad_norm": 0.29445260763168335, |
| "learning_rate": 8e-05, |
| "loss": 1.6739, |
| "step": 1039 |
| }, |
| { |
| "epoch": 0.22874738810073683, |
| "grad_norm": 0.25705471634864807, |
| "learning_rate": 8e-05, |
| "loss": 1.6503, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.22896733751237217, |
| "grad_norm": 0.27109014987945557, |
| "learning_rate": 8e-05, |
| "loss": 1.8045, |
| "step": 1041 |
| }, |
| { |
| "epoch": 0.22918728692400747, |
| "grad_norm": 0.2972055673599243, |
| "learning_rate": 8e-05, |
| "loss": 1.6439, |
| "step": 1042 |
| }, |
| { |
| "epoch": 0.2294072363356428, |
| "grad_norm": 0.27126485109329224, |
| "learning_rate": 8e-05, |
| "loss": 1.672, |
| "step": 1043 |
| }, |
| { |
| "epoch": 0.22962718574727814, |
| "grad_norm": 0.2731145918369293, |
| "learning_rate": 8e-05, |
| "loss": 1.7795, |
| "step": 1044 |
| }, |
| { |
| "epoch": 0.22984713515891345, |
| "grad_norm": 0.2768365442752838, |
| "learning_rate": 8e-05, |
| "loss": 1.7145, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.23006708457054878, |
| "grad_norm": 0.2606940269470215, |
| "learning_rate": 8e-05, |
| "loss": 1.6169, |
| "step": 1046 |
| }, |
| { |
| "epoch": 0.2302870339821841, |
| "grad_norm": 0.2898729741573334, |
| "learning_rate": 8e-05, |
| "loss": 1.7315, |
| "step": 1047 |
| }, |
| { |
| "epoch": 0.23050698339381942, |
| "grad_norm": 0.2772413194179535, |
| "learning_rate": 8e-05, |
| "loss": 1.8632, |
| "step": 1048 |
| }, |
| { |
| "epoch": 0.23072693280545475, |
| "grad_norm": 0.25808605551719666, |
| "learning_rate": 8e-05, |
| "loss": 1.6626, |
| "step": 1049 |
| }, |
| { |
| "epoch": 0.23094688221709006, |
| "grad_norm": 0.2727161645889282, |
| "learning_rate": 8e-05, |
| "loss": 1.7848, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.2311668316287254, |
| "grad_norm": 0.25677087903022766, |
| "learning_rate": 8e-05, |
| "loss": 1.6168, |
| "step": 1051 |
| }, |
| { |
| "epoch": 0.23138678104036073, |
| "grad_norm": 0.2761050760746002, |
| "learning_rate": 8e-05, |
| "loss": 1.8615, |
| "step": 1052 |
| }, |
| { |
| "epoch": 0.23160673045199603, |
| "grad_norm": 0.2862778604030609, |
| "learning_rate": 8e-05, |
| "loss": 1.8728, |
| "step": 1053 |
| }, |
| { |
| "epoch": 0.23182667986363137, |
| "grad_norm": 0.27526941895484924, |
| "learning_rate": 8e-05, |
| "loss": 1.7627, |
| "step": 1054 |
| }, |
| { |
| "epoch": 0.23204662927526667, |
| "grad_norm": 0.2932235896587372, |
| "learning_rate": 8e-05, |
| "loss": 1.8539, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.232266578686902, |
| "grad_norm": 0.2770839035511017, |
| "learning_rate": 8e-05, |
| "loss": 1.7393, |
| "step": 1056 |
| }, |
| { |
| "epoch": 0.23248652809853734, |
| "grad_norm": 0.2741580307483673, |
| "learning_rate": 8e-05, |
| "loss": 1.6076, |
| "step": 1057 |
| }, |
| { |
| "epoch": 0.23270647751017265, |
| "grad_norm": 0.2788783311843872, |
| "learning_rate": 8e-05, |
| "loss": 1.7615, |
| "step": 1058 |
| }, |
| { |
| "epoch": 0.23292642692180798, |
| "grad_norm": 0.28565406799316406, |
| "learning_rate": 8e-05, |
| "loss": 1.6266, |
| "step": 1059 |
| }, |
| { |
| "epoch": 0.23314637633344332, |
| "grad_norm": 0.26543545722961426, |
| "learning_rate": 8e-05, |
| "loss": 1.7192, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.23336632574507862, |
| "grad_norm": 0.2770478129386902, |
| "learning_rate": 8e-05, |
| "loss": 1.8056, |
| "step": 1061 |
| }, |
| { |
| "epoch": 0.23358627515671396, |
| "grad_norm": 0.27805015444755554, |
| "learning_rate": 8e-05, |
| "loss": 1.6735, |
| "step": 1062 |
| }, |
| { |
| "epoch": 0.2338062245683493, |
| "grad_norm": 0.309862345457077, |
| "learning_rate": 8e-05, |
| "loss": 1.7235, |
| "step": 1063 |
| }, |
| { |
| "epoch": 0.2340261739799846, |
| "grad_norm": 0.27140697836875916, |
| "learning_rate": 8e-05, |
| "loss": 1.6883, |
| "step": 1064 |
| }, |
| { |
| "epoch": 0.23424612339161993, |
| "grad_norm": 0.3052090108394623, |
| "learning_rate": 8e-05, |
| "loss": 1.8792, |
| "step": 1065 |
| }, |
| { |
| "epoch": 0.23446607280325524, |
| "grad_norm": 0.2995065450668335, |
| "learning_rate": 8e-05, |
| "loss": 1.6632, |
| "step": 1066 |
| }, |
| { |
| "epoch": 0.23468602221489057, |
| "grad_norm": 0.2782532870769501, |
| "learning_rate": 8e-05, |
| "loss": 1.7395, |
| "step": 1067 |
| }, |
| { |
| "epoch": 0.2349059716265259, |
| "grad_norm": 0.28436902165412903, |
| "learning_rate": 8e-05, |
| "loss": 1.8416, |
| "step": 1068 |
| }, |
| { |
| "epoch": 0.2351259210381612, |
| "grad_norm": 0.2740377187728882, |
| "learning_rate": 8e-05, |
| "loss": 1.9026, |
| "step": 1069 |
| }, |
| { |
| "epoch": 0.23534587044979655, |
| "grad_norm": 0.2978285849094391, |
| "learning_rate": 8e-05, |
| "loss": 1.7277, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.23556581986143188, |
| "grad_norm": 0.27265986800193787, |
| "learning_rate": 8e-05, |
| "loss": 1.7376, |
| "step": 1071 |
| }, |
| { |
| "epoch": 0.2357857692730672, |
| "grad_norm": 0.24915599822998047, |
| "learning_rate": 8e-05, |
| "loss": 1.6151, |
| "step": 1072 |
| }, |
| { |
| "epoch": 0.23600571868470252, |
| "grad_norm": 0.28203171491622925, |
| "learning_rate": 8e-05, |
| "loss": 1.7713, |
| "step": 1073 |
| }, |
| { |
| "epoch": 0.23622566809633785, |
| "grad_norm": 0.278793066740036, |
| "learning_rate": 8e-05, |
| "loss": 1.6717, |
| "step": 1074 |
| }, |
| { |
| "epoch": 0.23644561750797316, |
| "grad_norm": 0.2760609984397888, |
| "learning_rate": 8e-05, |
| "loss": 1.5866, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.2366655669196085, |
| "grad_norm": 0.2726036012172699, |
| "learning_rate": 8e-05, |
| "loss": 1.6774, |
| "step": 1076 |
| }, |
| { |
| "epoch": 0.2368855163312438, |
| "grad_norm": 0.27443891763687134, |
| "learning_rate": 8e-05, |
| "loss": 1.7615, |
| "step": 1077 |
| }, |
| { |
| "epoch": 0.23710546574287913, |
| "grad_norm": 0.2818880081176758, |
| "learning_rate": 8e-05, |
| "loss": 1.7433, |
| "step": 1078 |
| }, |
| { |
| "epoch": 0.23732541515451447, |
| "grad_norm": 0.2646252512931824, |
| "learning_rate": 8e-05, |
| "loss": 1.5498, |
| "step": 1079 |
| }, |
| { |
| "epoch": 0.23754536456614977, |
| "grad_norm": 0.2964784502983093, |
| "learning_rate": 8e-05, |
| "loss": 1.6162, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.2377653139777851, |
| "grad_norm": 0.3044411242008209, |
| "learning_rate": 8e-05, |
| "loss": 1.7395, |
| "step": 1081 |
| }, |
| { |
| "epoch": 0.23798526338942044, |
| "grad_norm": 0.28679221868515015, |
| "learning_rate": 8e-05, |
| "loss": 1.8126, |
| "step": 1082 |
| }, |
| { |
| "epoch": 0.23820521280105575, |
| "grad_norm": 0.26326417922973633, |
| "learning_rate": 8e-05, |
| "loss": 1.6451, |
| "step": 1083 |
| }, |
| { |
| "epoch": 0.23842516221269108, |
| "grad_norm": 0.28527480363845825, |
| "learning_rate": 8e-05, |
| "loss": 1.8442, |
| "step": 1084 |
| }, |
| { |
| "epoch": 0.23864511162432642, |
| "grad_norm": 0.28897759318351746, |
| "learning_rate": 8e-05, |
| "loss": 1.8224, |
| "step": 1085 |
| }, |
| { |
| "epoch": 0.23886506103596172, |
| "grad_norm": 0.2955721616744995, |
| "learning_rate": 8e-05, |
| "loss": 1.7304, |
| "step": 1086 |
| }, |
| { |
| "epoch": 0.23908501044759706, |
| "grad_norm": 0.26267075538635254, |
| "learning_rate": 8e-05, |
| "loss": 1.67, |
| "step": 1087 |
| }, |
| { |
| "epoch": 0.23930495985923236, |
| "grad_norm": 0.27105912566185, |
| "learning_rate": 8e-05, |
| "loss": 1.7461, |
| "step": 1088 |
| }, |
| { |
| "epoch": 0.2395249092708677, |
| "grad_norm": 0.26483941078186035, |
| "learning_rate": 8e-05, |
| "loss": 1.6215, |
| "step": 1089 |
| }, |
| { |
| "epoch": 0.23974485868250303, |
| "grad_norm": 0.2804373800754547, |
| "learning_rate": 8e-05, |
| "loss": 1.6618, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.23996480809413834, |
| "grad_norm": 0.26146185398101807, |
| "learning_rate": 8e-05, |
| "loss": 1.6641, |
| "step": 1091 |
| }, |
| { |
| "epoch": 0.24018475750577367, |
| "grad_norm": 0.2839837372303009, |
| "learning_rate": 8e-05, |
| "loss": 1.5898, |
| "step": 1092 |
| }, |
| { |
| "epoch": 0.240404706917409, |
| "grad_norm": 0.26833322644233704, |
| "learning_rate": 8e-05, |
| "loss": 1.8341, |
| "step": 1093 |
| }, |
| { |
| "epoch": 0.2406246563290443, |
| "grad_norm": 0.2779574394226074, |
| "learning_rate": 8e-05, |
| "loss": 1.7142, |
| "step": 1094 |
| }, |
| { |
| "epoch": 0.24084460574067965, |
| "grad_norm": 0.2821759879589081, |
| "learning_rate": 8e-05, |
| "loss": 1.7261, |
| "step": 1095 |
| }, |
| { |
| "epoch": 0.24106455515231498, |
| "grad_norm": 0.2849150002002716, |
| "learning_rate": 8e-05, |
| "loss": 1.6834, |
| "step": 1096 |
| }, |
| { |
| "epoch": 0.24128450456395029, |
| "grad_norm": 0.277148574590683, |
| "learning_rate": 8e-05, |
| "loss": 1.5617, |
| "step": 1097 |
| }, |
| { |
| "epoch": 0.24150445397558562, |
| "grad_norm": 0.28307756781578064, |
| "learning_rate": 8e-05, |
| "loss": 1.8104, |
| "step": 1098 |
| }, |
| { |
| "epoch": 0.24172440338722093, |
| "grad_norm": 0.28540289402008057, |
| "learning_rate": 8e-05, |
| "loss": 1.7331, |
| "step": 1099 |
| }, |
| { |
| "epoch": 0.24194435279885626, |
| "grad_norm": 0.277544766664505, |
| "learning_rate": 8e-05, |
| "loss": 1.762, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.2421643022104916, |
| "grad_norm": 0.259435772895813, |
| "learning_rate": 8e-05, |
| "loss": 1.5474, |
| "step": 1101 |
| }, |
| { |
| "epoch": 0.2423842516221269, |
| "grad_norm": 0.2759372591972351, |
| "learning_rate": 8e-05, |
| "loss": 1.6535, |
| "step": 1102 |
| }, |
| { |
| "epoch": 0.24260420103376223, |
| "grad_norm": 0.27163347601890564, |
| "learning_rate": 8e-05, |
| "loss": 1.6035, |
| "step": 1103 |
| }, |
| { |
| "epoch": 0.24282415044539757, |
| "grad_norm": 0.26722922921180725, |
| "learning_rate": 8e-05, |
| "loss": 1.7607, |
| "step": 1104 |
| }, |
| { |
| "epoch": 0.24304409985703287, |
| "grad_norm": 0.2925039529800415, |
| "learning_rate": 8e-05, |
| "loss": 1.6441, |
| "step": 1105 |
| }, |
| { |
| "epoch": 0.2432640492686682, |
| "grad_norm": 0.271672785282135, |
| "learning_rate": 8e-05, |
| "loss": 1.658, |
| "step": 1106 |
| }, |
| { |
| "epoch": 0.24348399868030354, |
| "grad_norm": 0.2827896773815155, |
| "learning_rate": 8e-05, |
| "loss": 1.6258, |
| "step": 1107 |
| }, |
| { |
| "epoch": 0.24370394809193885, |
| "grad_norm": 0.2732497751712799, |
| "learning_rate": 8e-05, |
| "loss": 1.6379, |
| "step": 1108 |
| }, |
| { |
| "epoch": 0.24392389750357418, |
| "grad_norm": 0.28081193566322327, |
| "learning_rate": 8e-05, |
| "loss": 1.7901, |
| "step": 1109 |
| }, |
| { |
| "epoch": 0.2441438469152095, |
| "grad_norm": 0.2799675762653351, |
| "learning_rate": 8e-05, |
| "loss": 1.8323, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.24436379632684482, |
| "grad_norm": 0.2677648961544037, |
| "learning_rate": 8e-05, |
| "loss": 1.7372, |
| "step": 1111 |
| }, |
| { |
| "epoch": 0.24458374573848016, |
| "grad_norm": 0.2644648551940918, |
| "learning_rate": 8e-05, |
| "loss": 1.6594, |
| "step": 1112 |
| }, |
| { |
| "epoch": 0.24480369515011546, |
| "grad_norm": 0.2704750895500183, |
| "learning_rate": 8e-05, |
| "loss": 1.706, |
| "step": 1113 |
| }, |
| { |
| "epoch": 0.2450236445617508, |
| "grad_norm": 0.2762587368488312, |
| "learning_rate": 8e-05, |
| "loss": 1.7445, |
| "step": 1114 |
| }, |
| { |
| "epoch": 0.24524359397338613, |
| "grad_norm": 0.2578018307685852, |
| "learning_rate": 8e-05, |
| "loss": 1.5707, |
| "step": 1115 |
| }, |
| { |
| "epoch": 0.24546354338502144, |
| "grad_norm": 0.2892129719257355, |
| "learning_rate": 8e-05, |
| "loss": 1.805, |
| "step": 1116 |
| }, |
| { |
| "epoch": 0.24568349279665677, |
| "grad_norm": 0.2868081033229828, |
| "learning_rate": 8e-05, |
| "loss": 1.7756, |
| "step": 1117 |
| }, |
| { |
| "epoch": 0.2459034422082921, |
| "grad_norm": 0.2820534110069275, |
| "learning_rate": 8e-05, |
| "loss": 1.7826, |
| "step": 1118 |
| }, |
| { |
| "epoch": 0.2461233916199274, |
| "grad_norm": 0.2824958264827728, |
| "learning_rate": 8e-05, |
| "loss": 1.6752, |
| "step": 1119 |
| }, |
| { |
| "epoch": 0.24634334103156275, |
| "grad_norm": 0.2782610356807709, |
| "learning_rate": 8e-05, |
| "loss": 1.7536, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.24656329044319805, |
| "grad_norm": 0.27147912979125977, |
| "learning_rate": 8e-05, |
| "loss": 1.6783, |
| "step": 1121 |
| }, |
| { |
| "epoch": 0.24678323985483339, |
| "grad_norm": 0.2740795612335205, |
| "learning_rate": 8e-05, |
| "loss": 1.7702, |
| "step": 1122 |
| }, |
| { |
| "epoch": 0.24700318926646872, |
| "grad_norm": 0.2922619879245758, |
| "learning_rate": 8e-05, |
| "loss": 1.8204, |
| "step": 1123 |
| }, |
| { |
| "epoch": 0.24722313867810403, |
| "grad_norm": 0.2872619926929474, |
| "learning_rate": 8e-05, |
| "loss": 1.714, |
| "step": 1124 |
| }, |
| { |
| "epoch": 0.24744308808973936, |
| "grad_norm": 0.27333369851112366, |
| "learning_rate": 8e-05, |
| "loss": 1.6575, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.2476630375013747, |
| "grad_norm": 0.28192320466041565, |
| "learning_rate": 8e-05, |
| "loss": 1.7221, |
| "step": 1126 |
| }, |
| { |
| "epoch": 0.24788298691301, |
| "grad_norm": 0.26607248187065125, |
| "learning_rate": 8e-05, |
| "loss": 1.7262, |
| "step": 1127 |
| }, |
| { |
| "epoch": 0.24810293632464533, |
| "grad_norm": 0.279690682888031, |
| "learning_rate": 8e-05, |
| "loss": 1.7004, |
| "step": 1128 |
| }, |
| { |
| "epoch": 0.24832288573628067, |
| "grad_norm": 0.27289190888404846, |
| "learning_rate": 8e-05, |
| "loss": 1.6916, |
| "step": 1129 |
| }, |
| { |
| "epoch": 0.24854283514791597, |
| "grad_norm": 0.27388349175453186, |
| "learning_rate": 8e-05, |
| "loss": 1.6656, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.2487627845595513, |
| "grad_norm": 0.2912501096725464, |
| "learning_rate": 8e-05, |
| "loss": 1.8086, |
| "step": 1131 |
| }, |
| { |
| "epoch": 0.24898273397118661, |
| "grad_norm": 0.2999799847602844, |
| "learning_rate": 8e-05, |
| "loss": 1.7659, |
| "step": 1132 |
| }, |
| { |
| "epoch": 0.24920268338282195, |
| "grad_norm": 0.262207955121994, |
| "learning_rate": 8e-05, |
| "loss": 1.6581, |
| "step": 1133 |
| }, |
| { |
| "epoch": 0.24942263279445728, |
| "grad_norm": 0.2571624517440796, |
| "learning_rate": 8e-05, |
| "loss": 1.6509, |
| "step": 1134 |
| }, |
| { |
| "epoch": 0.2496425822060926, |
| "grad_norm": 0.26213690638542175, |
| "learning_rate": 8e-05, |
| "loss": 1.6044, |
| "step": 1135 |
| }, |
| { |
| "epoch": 0.24986253161772792, |
| "grad_norm": 0.2870398461818695, |
| "learning_rate": 8e-05, |
| "loss": 1.6678, |
| "step": 1136 |
| }, |
| { |
| "epoch": 0.25008248102936326, |
| "grad_norm": 0.2672583758831024, |
| "learning_rate": 8e-05, |
| "loss": 1.6563, |
| "step": 1137 |
| }, |
| { |
| "epoch": 0.2503024304409986, |
| "grad_norm": 0.29864680767059326, |
| "learning_rate": 8e-05, |
| "loss": 1.858, |
| "step": 1138 |
| }, |
| { |
| "epoch": 0.25052237985263387, |
| "grad_norm": 0.3096907436847687, |
| "learning_rate": 8e-05, |
| "loss": 1.7731, |
| "step": 1139 |
| }, |
| { |
| "epoch": 0.2507423292642692, |
| "grad_norm": 0.2668014466762543, |
| "learning_rate": 8e-05, |
| "loss": 1.6173, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.25096227867590454, |
| "grad_norm": 0.275074303150177, |
| "learning_rate": 8e-05, |
| "loss": 1.704, |
| "step": 1141 |
| }, |
| { |
| "epoch": 0.25118222808753987, |
| "grad_norm": 0.29657119512557983, |
| "learning_rate": 8e-05, |
| "loss": 1.9789, |
| "step": 1142 |
| }, |
| { |
| "epoch": 0.2514021774991752, |
| "grad_norm": 0.26117807626724243, |
| "learning_rate": 8e-05, |
| "loss": 1.6815, |
| "step": 1143 |
| }, |
| { |
| "epoch": 0.25162212691081054, |
| "grad_norm": 0.2738019824028015, |
| "learning_rate": 8e-05, |
| "loss": 1.7031, |
| "step": 1144 |
| }, |
| { |
| "epoch": 0.2518420763224458, |
| "grad_norm": 0.27922967076301575, |
| "learning_rate": 8e-05, |
| "loss": 1.7914, |
| "step": 1145 |
| }, |
| { |
| "epoch": 0.25206202573408115, |
| "grad_norm": 0.2876172661781311, |
| "learning_rate": 8e-05, |
| "loss": 1.721, |
| "step": 1146 |
| }, |
| { |
| "epoch": 0.2522819751457165, |
| "grad_norm": 0.28017961978912354, |
| "learning_rate": 8e-05, |
| "loss": 1.6731, |
| "step": 1147 |
| }, |
| { |
| "epoch": 0.2525019245573518, |
| "grad_norm": 0.2898389399051666, |
| "learning_rate": 8e-05, |
| "loss": 1.8749, |
| "step": 1148 |
| }, |
| { |
| "epoch": 0.25272187396898715, |
| "grad_norm": 0.2742408812046051, |
| "learning_rate": 8e-05, |
| "loss": 1.6811, |
| "step": 1149 |
| }, |
| { |
| "epoch": 0.25294182338062243, |
| "grad_norm": 0.2806207835674286, |
| "learning_rate": 8e-05, |
| "loss": 1.7082, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.25316177279225777, |
| "grad_norm": 0.27871328592300415, |
| "learning_rate": 8e-05, |
| "loss": 1.7142, |
| "step": 1151 |
| }, |
| { |
| "epoch": 0.2533817222038931, |
| "grad_norm": 0.2792799472808838, |
| "learning_rate": 8e-05, |
| "loss": 1.5703, |
| "step": 1152 |
| }, |
| { |
| "epoch": 0.25360167161552843, |
| "grad_norm": 0.27358901500701904, |
| "learning_rate": 8e-05, |
| "loss": 1.7576, |
| "step": 1153 |
| }, |
| { |
| "epoch": 0.25382162102716377, |
| "grad_norm": 0.26983192563056946, |
| "learning_rate": 8e-05, |
| "loss": 1.6646, |
| "step": 1154 |
| }, |
| { |
| "epoch": 0.2540415704387991, |
| "grad_norm": 0.2711959183216095, |
| "learning_rate": 8e-05, |
| "loss": 1.7698, |
| "step": 1155 |
| }, |
| { |
| "epoch": 0.2542615198504344, |
| "grad_norm": 0.28412333130836487, |
| "learning_rate": 8e-05, |
| "loss": 1.7446, |
| "step": 1156 |
| }, |
| { |
| "epoch": 0.2544814692620697, |
| "grad_norm": 0.2698575258255005, |
| "learning_rate": 8e-05, |
| "loss": 1.6861, |
| "step": 1157 |
| }, |
| { |
| "epoch": 0.25470141867370505, |
| "grad_norm": 0.2806732952594757, |
| "learning_rate": 8e-05, |
| "loss": 1.7308, |
| "step": 1158 |
| }, |
| { |
| "epoch": 0.2549213680853404, |
| "grad_norm": 0.2715948522090912, |
| "learning_rate": 8e-05, |
| "loss": 1.852, |
| "step": 1159 |
| }, |
| { |
| "epoch": 0.2551413174969757, |
| "grad_norm": 0.33048170804977417, |
| "learning_rate": 8e-05, |
| "loss": 1.881, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.255361266908611, |
| "grad_norm": 0.27907994389533997, |
| "learning_rate": 8e-05, |
| "loss": 1.6501, |
| "step": 1161 |
| }, |
| { |
| "epoch": 0.25558121632024633, |
| "grad_norm": 0.2747988998889923, |
| "learning_rate": 8e-05, |
| "loss": 1.7265, |
| "step": 1162 |
| }, |
| { |
| "epoch": 0.25580116573188166, |
| "grad_norm": 0.28321677446365356, |
| "learning_rate": 8e-05, |
| "loss": 1.8602, |
| "step": 1163 |
| }, |
| { |
| "epoch": 0.256021115143517, |
| "grad_norm": 0.2695465683937073, |
| "learning_rate": 8e-05, |
| "loss": 1.6091, |
| "step": 1164 |
| }, |
| { |
| "epoch": 0.25624106455515233, |
| "grad_norm": 0.272135466337204, |
| "learning_rate": 8e-05, |
| "loss": 1.6236, |
| "step": 1165 |
| }, |
| { |
| "epoch": 0.25646101396678767, |
| "grad_norm": 0.2715020775794983, |
| "learning_rate": 8e-05, |
| "loss": 1.674, |
| "step": 1166 |
| }, |
| { |
| "epoch": 0.25668096337842294, |
| "grad_norm": 0.2879820764064789, |
| "learning_rate": 8e-05, |
| "loss": 1.8393, |
| "step": 1167 |
| }, |
| { |
| "epoch": 0.2569009127900583, |
| "grad_norm": 0.2616657018661499, |
| "learning_rate": 8e-05, |
| "loss": 1.6391, |
| "step": 1168 |
| }, |
| { |
| "epoch": 0.2571208622016936, |
| "grad_norm": 0.2558441460132599, |
| "learning_rate": 8e-05, |
| "loss": 1.606, |
| "step": 1169 |
| }, |
| { |
| "epoch": 0.25734081161332895, |
| "grad_norm": 0.26944512128829956, |
| "learning_rate": 8e-05, |
| "loss": 1.7288, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.2575607610249643, |
| "grad_norm": 0.26958367228507996, |
| "learning_rate": 8e-05, |
| "loss": 1.6233, |
| "step": 1171 |
| }, |
| { |
| "epoch": 0.25778071043659956, |
| "grad_norm": 0.29003527760505676, |
| "learning_rate": 8e-05, |
| "loss": 1.777, |
| "step": 1172 |
| }, |
| { |
| "epoch": 0.2580006598482349, |
| "grad_norm": 0.2677457630634308, |
| "learning_rate": 8e-05, |
| "loss": 1.6835, |
| "step": 1173 |
| }, |
| { |
| "epoch": 0.2582206092598702, |
| "grad_norm": 0.28062689304351807, |
| "learning_rate": 8e-05, |
| "loss": 1.726, |
| "step": 1174 |
| }, |
| { |
| "epoch": 0.25844055867150556, |
| "grad_norm": 0.26764920353889465, |
| "learning_rate": 8e-05, |
| "loss": 1.6575, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.2586605080831409, |
| "grad_norm": 0.28183332085609436, |
| "learning_rate": 8e-05, |
| "loss": 1.784, |
| "step": 1176 |
| }, |
| { |
| "epoch": 0.25888045749477623, |
| "grad_norm": 0.25718390941619873, |
| "learning_rate": 8e-05, |
| "loss": 1.6317, |
| "step": 1177 |
| }, |
| { |
| "epoch": 0.2591004069064115, |
| "grad_norm": 0.25523149967193604, |
| "learning_rate": 8e-05, |
| "loss": 1.5634, |
| "step": 1178 |
| }, |
| { |
| "epoch": 0.25932035631804684, |
| "grad_norm": 0.2539874315261841, |
| "learning_rate": 8e-05, |
| "loss": 1.5878, |
| "step": 1179 |
| }, |
| { |
| "epoch": 0.2595403057296822, |
| "grad_norm": 0.2868393659591675, |
| "learning_rate": 8e-05, |
| "loss": 1.7301, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.2597602551413175, |
| "grad_norm": 0.27819645404815674, |
| "learning_rate": 8e-05, |
| "loss": 1.6895, |
| "step": 1181 |
| }, |
| { |
| "epoch": 0.25998020455295284, |
| "grad_norm": 0.27499255537986755, |
| "learning_rate": 8e-05, |
| "loss": 1.7462, |
| "step": 1182 |
| }, |
| { |
| "epoch": 0.2602001539645881, |
| "grad_norm": 0.2858695685863495, |
| "learning_rate": 8e-05, |
| "loss": 1.8199, |
| "step": 1183 |
| }, |
| { |
| "epoch": 0.26042010337622346, |
| "grad_norm": 0.2646760642528534, |
| "learning_rate": 8e-05, |
| "loss": 1.6597, |
| "step": 1184 |
| }, |
| { |
| "epoch": 0.2606400527878588, |
| "grad_norm": 0.2831268310546875, |
| "learning_rate": 8e-05, |
| "loss": 1.8383, |
| "step": 1185 |
| }, |
| { |
| "epoch": 0.2608600021994941, |
| "grad_norm": 0.2593746483325958, |
| "learning_rate": 8e-05, |
| "loss": 1.6115, |
| "step": 1186 |
| }, |
| { |
| "epoch": 0.26107995161112946, |
| "grad_norm": 0.26519641280174255, |
| "learning_rate": 8e-05, |
| "loss": 1.5959, |
| "step": 1187 |
| }, |
| { |
| "epoch": 0.2612999010227648, |
| "grad_norm": 0.2733252942562103, |
| "learning_rate": 8e-05, |
| "loss": 1.6318, |
| "step": 1188 |
| }, |
| { |
| "epoch": 0.26151985043440007, |
| "grad_norm": 0.27299511432647705, |
| "learning_rate": 8e-05, |
| "loss": 1.7313, |
| "step": 1189 |
| }, |
| { |
| "epoch": 0.2617397998460354, |
| "grad_norm": 0.2684955894947052, |
| "learning_rate": 8e-05, |
| "loss": 1.5826, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.26195974925767074, |
| "grad_norm": 0.2747553586959839, |
| "learning_rate": 8e-05, |
| "loss": 1.7008, |
| "step": 1191 |
| }, |
| { |
| "epoch": 0.26217969866930607, |
| "grad_norm": 0.26033639907836914, |
| "learning_rate": 8e-05, |
| "loss": 1.5571, |
| "step": 1192 |
| }, |
| { |
| "epoch": 0.2623996480809414, |
| "grad_norm": 0.2640804350376129, |
| "learning_rate": 8e-05, |
| "loss": 1.5317, |
| "step": 1193 |
| }, |
| { |
| "epoch": 0.2626195974925767, |
| "grad_norm": 0.27063700556755066, |
| "learning_rate": 8e-05, |
| "loss": 1.5501, |
| "step": 1194 |
| }, |
| { |
| "epoch": 0.262839546904212, |
| "grad_norm": 0.2677111029624939, |
| "learning_rate": 8e-05, |
| "loss": 1.5894, |
| "step": 1195 |
| }, |
| { |
| "epoch": 0.26305949631584735, |
| "grad_norm": 0.28144168853759766, |
| "learning_rate": 8e-05, |
| "loss": 1.7496, |
| "step": 1196 |
| }, |
| { |
| "epoch": 0.2632794457274827, |
| "grad_norm": 0.2602388858795166, |
| "learning_rate": 8e-05, |
| "loss": 1.571, |
| "step": 1197 |
| }, |
| { |
| "epoch": 0.263499395139118, |
| "grad_norm": 0.2941505014896393, |
| "learning_rate": 8e-05, |
| "loss": 1.6692, |
| "step": 1198 |
| }, |
| { |
| "epoch": 0.26371934455075335, |
| "grad_norm": 0.264433354139328, |
| "learning_rate": 8e-05, |
| "loss": 1.6922, |
| "step": 1199 |
| }, |
| { |
| "epoch": 0.26393929396238863, |
| "grad_norm": 0.25587090849876404, |
| "learning_rate": 8e-05, |
| "loss": 1.5599, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.26415924337402397, |
| "grad_norm": 0.3012869358062744, |
| "learning_rate": 8e-05, |
| "loss": 1.9195, |
| "step": 1201 |
| }, |
| { |
| "epoch": 0.2643791927856593, |
| "grad_norm": 0.2762719392776489, |
| "learning_rate": 8e-05, |
| "loss": 1.898, |
| "step": 1202 |
| }, |
| { |
| "epoch": 0.26459914219729463, |
| "grad_norm": 0.2701188325881958, |
| "learning_rate": 8e-05, |
| "loss": 1.7312, |
| "step": 1203 |
| }, |
| { |
| "epoch": 0.26481909160892997, |
| "grad_norm": 0.29665982723236084, |
| "learning_rate": 8e-05, |
| "loss": 1.8089, |
| "step": 1204 |
| }, |
| { |
| "epoch": 0.26503904102056525, |
| "grad_norm": 0.26700517535209656, |
| "learning_rate": 8e-05, |
| "loss": 1.8401, |
| "step": 1205 |
| }, |
| { |
| "epoch": 0.2652589904322006, |
| "grad_norm": 0.2828493118286133, |
| "learning_rate": 8e-05, |
| "loss": 1.8622, |
| "step": 1206 |
| }, |
| { |
| "epoch": 0.2654789398438359, |
| "grad_norm": 0.2746271789073944, |
| "learning_rate": 8e-05, |
| "loss": 1.6521, |
| "step": 1207 |
| }, |
| { |
| "epoch": 0.26569888925547125, |
| "grad_norm": 0.2882270812988281, |
| "learning_rate": 8e-05, |
| "loss": 1.7168, |
| "step": 1208 |
| }, |
| { |
| "epoch": 0.2659188386671066, |
| "grad_norm": 0.29784512519836426, |
| "learning_rate": 8e-05, |
| "loss": 1.6968, |
| "step": 1209 |
| }, |
| { |
| "epoch": 0.2661387880787419, |
| "grad_norm": 0.2807427942752838, |
| "learning_rate": 8e-05, |
| "loss": 1.6004, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.2663587374903772, |
| "grad_norm": 0.2956424951553345, |
| "learning_rate": 8e-05, |
| "loss": 1.8325, |
| "step": 1211 |
| }, |
| { |
| "epoch": 0.26657868690201253, |
| "grad_norm": 0.2647739350795746, |
| "learning_rate": 8e-05, |
| "loss": 1.6391, |
| "step": 1212 |
| }, |
| { |
| "epoch": 0.26679863631364786, |
| "grad_norm": 0.2955171465873718, |
| "learning_rate": 8e-05, |
| "loss": 1.7893, |
| "step": 1213 |
| }, |
| { |
| "epoch": 0.2670185857252832, |
| "grad_norm": 0.27241894602775574, |
| "learning_rate": 8e-05, |
| "loss": 1.781, |
| "step": 1214 |
| }, |
| { |
| "epoch": 0.26723853513691853, |
| "grad_norm": 0.2841251492500305, |
| "learning_rate": 8e-05, |
| "loss": 1.8612, |
| "step": 1215 |
| }, |
| { |
| "epoch": 0.2674584845485538, |
| "grad_norm": 0.327891081571579, |
| "learning_rate": 8e-05, |
| "loss": 1.844, |
| "step": 1216 |
| }, |
| { |
| "epoch": 0.26767843396018914, |
| "grad_norm": 0.26434099674224854, |
| "learning_rate": 8e-05, |
| "loss": 1.6325, |
| "step": 1217 |
| }, |
| { |
| "epoch": 0.2678983833718245, |
| "grad_norm": 0.2868417799472809, |
| "learning_rate": 8e-05, |
| "loss": 1.7087, |
| "step": 1218 |
| }, |
| { |
| "epoch": 0.2681183327834598, |
| "grad_norm": 0.27408069372177124, |
| "learning_rate": 8e-05, |
| "loss": 1.6006, |
| "step": 1219 |
| }, |
| { |
| "epoch": 0.26833828219509515, |
| "grad_norm": 0.2697390019893646, |
| "learning_rate": 8e-05, |
| "loss": 1.6833, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.2685582316067304, |
| "grad_norm": 0.27598559856414795, |
| "learning_rate": 8e-05, |
| "loss": 1.7192, |
| "step": 1221 |
| }, |
| { |
| "epoch": 0.26877818101836576, |
| "grad_norm": 0.26871007680892944, |
| "learning_rate": 8e-05, |
| "loss": 1.6301, |
| "step": 1222 |
| }, |
| { |
| "epoch": 0.2689981304300011, |
| "grad_norm": 0.2739337980747223, |
| "learning_rate": 8e-05, |
| "loss": 1.6828, |
| "step": 1223 |
| }, |
| { |
| "epoch": 0.2692180798416364, |
| "grad_norm": 0.286530464887619, |
| "learning_rate": 8e-05, |
| "loss": 1.6484, |
| "step": 1224 |
| }, |
| { |
| "epoch": 0.26943802925327176, |
| "grad_norm": 0.27509886026382446, |
| "learning_rate": 8e-05, |
| "loss": 1.6647, |
| "step": 1225 |
| }, |
| { |
| "epoch": 0.2696579786649071, |
| "grad_norm": 0.2916969358921051, |
| "learning_rate": 8e-05, |
| "loss": 1.7908, |
| "step": 1226 |
| }, |
| { |
| "epoch": 0.2698779280765424, |
| "grad_norm": 0.26566174626350403, |
| "learning_rate": 8e-05, |
| "loss": 1.6075, |
| "step": 1227 |
| }, |
| { |
| "epoch": 0.2700978774881777, |
| "grad_norm": 0.27648022770881653, |
| "learning_rate": 8e-05, |
| "loss": 1.7536, |
| "step": 1228 |
| }, |
| { |
| "epoch": 0.27031782689981304, |
| "grad_norm": 0.27313023805618286, |
| "learning_rate": 8e-05, |
| "loss": 1.6978, |
| "step": 1229 |
| }, |
| { |
| "epoch": 0.2705377763114484, |
| "grad_norm": 0.2755061388015747, |
| "learning_rate": 8e-05, |
| "loss": 1.7196, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.2707577257230837, |
| "grad_norm": 0.25907769799232483, |
| "learning_rate": 8e-05, |
| "loss": 1.5518, |
| "step": 1231 |
| }, |
| { |
| "epoch": 0.270977675134719, |
| "grad_norm": 0.26485681533813477, |
| "learning_rate": 8e-05, |
| "loss": 1.5053, |
| "step": 1232 |
| }, |
| { |
| "epoch": 0.2711976245463543, |
| "grad_norm": 0.27980178594589233, |
| "learning_rate": 8e-05, |
| "loss": 1.7824, |
| "step": 1233 |
| }, |
| { |
| "epoch": 0.27141757395798966, |
| "grad_norm": 0.2750954329967499, |
| "learning_rate": 8e-05, |
| "loss": 1.6973, |
| "step": 1234 |
| }, |
| { |
| "epoch": 0.271637523369625, |
| "grad_norm": 0.27367594838142395, |
| "learning_rate": 8e-05, |
| "loss": 1.6691, |
| "step": 1235 |
| }, |
| { |
| "epoch": 0.2718574727812603, |
| "grad_norm": 0.27089521288871765, |
| "learning_rate": 8e-05, |
| "loss": 1.7532, |
| "step": 1236 |
| }, |
| { |
| "epoch": 0.27207742219289566, |
| "grad_norm": 0.30656641721725464, |
| "learning_rate": 8e-05, |
| "loss": 1.8411, |
| "step": 1237 |
| }, |
| { |
| "epoch": 0.27229737160453094, |
| "grad_norm": 0.25732672214508057, |
| "learning_rate": 8e-05, |
| "loss": 1.5599, |
| "step": 1238 |
| }, |
| { |
| "epoch": 0.27251732101616627, |
| "grad_norm": 0.2643807828426361, |
| "learning_rate": 8e-05, |
| "loss": 1.6654, |
| "step": 1239 |
| }, |
| { |
| "epoch": 0.2727372704278016, |
| "grad_norm": 0.2703326344490051, |
| "learning_rate": 8e-05, |
| "loss": 1.594, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.27295721983943694, |
| "grad_norm": 0.27907243371009827, |
| "learning_rate": 8e-05, |
| "loss": 1.7531, |
| "step": 1241 |
| }, |
| { |
| "epoch": 0.27317716925107227, |
| "grad_norm": 0.2482902854681015, |
| "learning_rate": 8e-05, |
| "loss": 1.3586, |
| "step": 1242 |
| }, |
| { |
| "epoch": 0.27339711866270755, |
| "grad_norm": 0.2879469394683838, |
| "learning_rate": 8e-05, |
| "loss": 1.76, |
| "step": 1243 |
| }, |
| { |
| "epoch": 0.2736170680743429, |
| "grad_norm": 0.26334571838378906, |
| "learning_rate": 8e-05, |
| "loss": 1.536, |
| "step": 1244 |
| }, |
| { |
| "epoch": 0.2738370174859782, |
| "grad_norm": 0.27328065037727356, |
| "learning_rate": 8e-05, |
| "loss": 1.7199, |
| "step": 1245 |
| }, |
| { |
| "epoch": 0.27405696689761355, |
| "grad_norm": 0.27392926812171936, |
| "learning_rate": 8e-05, |
| "loss": 1.7731, |
| "step": 1246 |
| }, |
| { |
| "epoch": 0.2742769163092489, |
| "grad_norm": 0.29755476117134094, |
| "learning_rate": 8e-05, |
| "loss": 1.7184, |
| "step": 1247 |
| }, |
| { |
| "epoch": 0.2744968657208842, |
| "grad_norm": 0.29554107785224915, |
| "learning_rate": 8e-05, |
| "loss": 1.7442, |
| "step": 1248 |
| }, |
| { |
| "epoch": 0.2747168151325195, |
| "grad_norm": 0.2562367618083954, |
| "learning_rate": 8e-05, |
| "loss": 1.63, |
| "step": 1249 |
| }, |
| { |
| "epoch": 0.27493676454415483, |
| "grad_norm": 0.27746453881263733, |
| "learning_rate": 8e-05, |
| "loss": 1.7396, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.27515671395579017, |
| "grad_norm": 0.2747843265533447, |
| "learning_rate": 8e-05, |
| "loss": 1.6628, |
| "step": 1251 |
| }, |
| { |
| "epoch": 0.2753766633674255, |
| "grad_norm": 0.2650463581085205, |
| "learning_rate": 8e-05, |
| "loss": 1.6409, |
| "step": 1252 |
| }, |
| { |
| "epoch": 0.27559661277906083, |
| "grad_norm": 0.30537328124046326, |
| "learning_rate": 8e-05, |
| "loss": 1.4927, |
| "step": 1253 |
| }, |
| { |
| "epoch": 0.2758165621906961, |
| "grad_norm": 0.26015424728393555, |
| "learning_rate": 8e-05, |
| "loss": 1.718, |
| "step": 1254 |
| }, |
| { |
| "epoch": 0.27603651160233145, |
| "grad_norm": 0.2512992322444916, |
| "learning_rate": 8e-05, |
| "loss": 1.4757, |
| "step": 1255 |
| }, |
| { |
| "epoch": 0.2762564610139668, |
| "grad_norm": 0.28478461503982544, |
| "learning_rate": 8e-05, |
| "loss": 1.9081, |
| "step": 1256 |
| }, |
| { |
| "epoch": 0.2764764104256021, |
| "grad_norm": 0.28490516543388367, |
| "learning_rate": 8e-05, |
| "loss": 1.8495, |
| "step": 1257 |
| }, |
| { |
| "epoch": 0.27669635983723745, |
| "grad_norm": 0.2758481204509735, |
| "learning_rate": 8e-05, |
| "loss": 1.767, |
| "step": 1258 |
| }, |
| { |
| "epoch": 0.2769163092488728, |
| "grad_norm": 0.28743213415145874, |
| "learning_rate": 8e-05, |
| "loss": 1.6548, |
| "step": 1259 |
| }, |
| { |
| "epoch": 0.27713625866050806, |
| "grad_norm": 0.2738385796546936, |
| "learning_rate": 8e-05, |
| "loss": 1.5616, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.2773562080721434, |
| "grad_norm": 0.27758583426475525, |
| "learning_rate": 8e-05, |
| "loss": 1.7793, |
| "step": 1261 |
| }, |
| { |
| "epoch": 0.27757615748377873, |
| "grad_norm": 0.2830480635166168, |
| "learning_rate": 8e-05, |
| "loss": 1.8048, |
| "step": 1262 |
| }, |
| { |
| "epoch": 0.27779610689541406, |
| "grad_norm": 0.296036034822464, |
| "learning_rate": 8e-05, |
| "loss": 1.7844, |
| "step": 1263 |
| }, |
| { |
| "epoch": 0.2780160563070494, |
| "grad_norm": 0.28651297092437744, |
| "learning_rate": 8e-05, |
| "loss": 1.7239, |
| "step": 1264 |
| }, |
| { |
| "epoch": 0.2782360057186847, |
| "grad_norm": 0.2826116979122162, |
| "learning_rate": 8e-05, |
| "loss": 1.8415, |
| "step": 1265 |
| }, |
| { |
| "epoch": 0.27845595513032, |
| "grad_norm": 0.27445724606513977, |
| "learning_rate": 8e-05, |
| "loss": 1.6738, |
| "step": 1266 |
| }, |
| { |
| "epoch": 0.27867590454195534, |
| "grad_norm": 0.28153640031814575, |
| "learning_rate": 8e-05, |
| "loss": 1.6519, |
| "step": 1267 |
| }, |
| { |
| "epoch": 0.2788958539535907, |
| "grad_norm": 0.27389946579933167, |
| "learning_rate": 8e-05, |
| "loss": 1.681, |
| "step": 1268 |
| }, |
| { |
| "epoch": 0.279115803365226, |
| "grad_norm": 0.2639203667640686, |
| "learning_rate": 8e-05, |
| "loss": 1.6398, |
| "step": 1269 |
| }, |
| { |
| "epoch": 0.27933575277686135, |
| "grad_norm": 0.2787509560585022, |
| "learning_rate": 8e-05, |
| "loss": 1.7199, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.2795557021884966, |
| "grad_norm": 0.28468430042266846, |
| "learning_rate": 8e-05, |
| "loss": 1.8668, |
| "step": 1271 |
| }, |
| { |
| "epoch": 0.27977565160013196, |
| "grad_norm": 0.2907005250453949, |
| "learning_rate": 8e-05, |
| "loss": 1.9328, |
| "step": 1272 |
| }, |
| { |
| "epoch": 0.2799956010117673, |
| "grad_norm": 0.2607463300228119, |
| "learning_rate": 8e-05, |
| "loss": 1.5958, |
| "step": 1273 |
| }, |
| { |
| "epoch": 0.2802155504234026, |
| "grad_norm": 0.2695181965827942, |
| "learning_rate": 8e-05, |
| "loss": 1.6708, |
| "step": 1274 |
| }, |
| { |
| "epoch": 0.28043549983503796, |
| "grad_norm": 0.28671538829803467, |
| "learning_rate": 8e-05, |
| "loss": 1.7736, |
| "step": 1275 |
| }, |
| { |
| "epoch": 0.28065544924667324, |
| "grad_norm": 0.3246489465236664, |
| "learning_rate": 8e-05, |
| "loss": 1.8145, |
| "step": 1276 |
| }, |
| { |
| "epoch": 0.2808753986583086, |
| "grad_norm": 0.2879314720630646, |
| "learning_rate": 8e-05, |
| "loss": 1.782, |
| "step": 1277 |
| }, |
| { |
| "epoch": 0.2810953480699439, |
| "grad_norm": 0.27141574025154114, |
| "learning_rate": 8e-05, |
| "loss": 1.8069, |
| "step": 1278 |
| }, |
| { |
| "epoch": 0.28131529748157924, |
| "grad_norm": 0.2893892228603363, |
| "learning_rate": 8e-05, |
| "loss": 1.7893, |
| "step": 1279 |
| }, |
| { |
| "epoch": 0.2815352468932146, |
| "grad_norm": 0.2985538840293884, |
| "learning_rate": 8e-05, |
| "loss": 1.7804, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.2817551963048499, |
| "grad_norm": 0.2664276957511902, |
| "learning_rate": 8e-05, |
| "loss": 1.6785, |
| "step": 1281 |
| }, |
| { |
| "epoch": 0.2819751457164852, |
| "grad_norm": 0.3002198040485382, |
| "learning_rate": 8e-05, |
| "loss": 1.6109, |
| "step": 1282 |
| }, |
| { |
| "epoch": 0.2821950951281205, |
| "grad_norm": 0.27687907218933105, |
| "learning_rate": 8e-05, |
| "loss": 1.6322, |
| "step": 1283 |
| }, |
| { |
| "epoch": 0.28241504453975586, |
| "grad_norm": 0.28822144865989685, |
| "learning_rate": 8e-05, |
| "loss": 1.6785, |
| "step": 1284 |
| }, |
| { |
| "epoch": 0.2826349939513912, |
| "grad_norm": 0.2801685333251953, |
| "learning_rate": 8e-05, |
| "loss": 1.69, |
| "step": 1285 |
| }, |
| { |
| "epoch": 0.2828549433630265, |
| "grad_norm": 0.27876734733581543, |
| "learning_rate": 8e-05, |
| "loss": 1.6442, |
| "step": 1286 |
| }, |
| { |
| "epoch": 0.2830748927746618, |
| "grad_norm": 0.2990095317363739, |
| "learning_rate": 8e-05, |
| "loss": 1.7439, |
| "step": 1287 |
| }, |
| { |
| "epoch": 0.28329484218629714, |
| "grad_norm": 0.2710682451725006, |
| "learning_rate": 8e-05, |
| "loss": 1.6908, |
| "step": 1288 |
| }, |
| { |
| "epoch": 0.28351479159793247, |
| "grad_norm": 0.2922731935977936, |
| "learning_rate": 8e-05, |
| "loss": 1.8361, |
| "step": 1289 |
| }, |
| { |
| "epoch": 0.2837347410095678, |
| "grad_norm": 0.2638223171234131, |
| "learning_rate": 8e-05, |
| "loss": 1.6233, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.28395469042120314, |
| "grad_norm": 0.27564552426338196, |
| "learning_rate": 8e-05, |
| "loss": 1.7624, |
| "step": 1291 |
| }, |
| { |
| "epoch": 0.28417463983283847, |
| "grad_norm": 0.28238940238952637, |
| "learning_rate": 8e-05, |
| "loss": 1.8649, |
| "step": 1292 |
| }, |
| { |
| "epoch": 0.28439458924447375, |
| "grad_norm": 0.27798035740852356, |
| "learning_rate": 8e-05, |
| "loss": 1.7877, |
| "step": 1293 |
| }, |
| { |
| "epoch": 0.2846145386561091, |
| "grad_norm": 0.29618534445762634, |
| "learning_rate": 8e-05, |
| "loss": 1.816, |
| "step": 1294 |
| }, |
| { |
| "epoch": 0.2848344880677444, |
| "grad_norm": 0.27669045329093933, |
| "learning_rate": 8e-05, |
| "loss": 1.7014, |
| "step": 1295 |
| }, |
| { |
| "epoch": 0.28505443747937975, |
| "grad_norm": 0.27973508834838867, |
| "learning_rate": 8e-05, |
| "loss": 1.7491, |
| "step": 1296 |
| }, |
| { |
| "epoch": 0.2852743868910151, |
| "grad_norm": 0.28833356499671936, |
| "learning_rate": 8e-05, |
| "loss": 1.6948, |
| "step": 1297 |
| }, |
| { |
| "epoch": 0.28549433630265036, |
| "grad_norm": 0.2751030921936035, |
| "learning_rate": 8e-05, |
| "loss": 1.6846, |
| "step": 1298 |
| }, |
| { |
| "epoch": 0.2857142857142857, |
| "grad_norm": 0.2766781449317932, |
| "learning_rate": 8e-05, |
| "loss": 1.5442, |
| "step": 1299 |
| }, |
| { |
| "epoch": 0.28593423512592103, |
| "grad_norm": 0.29664894938468933, |
| "learning_rate": 8e-05, |
| "loss": 1.6884, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.28615418453755637, |
| "grad_norm": 0.2771795392036438, |
| "learning_rate": 8e-05, |
| "loss": 1.6479, |
| "step": 1301 |
| }, |
| { |
| "epoch": 0.2863741339491917, |
| "grad_norm": 0.2623322904109955, |
| "learning_rate": 8e-05, |
| "loss": 1.5803, |
| "step": 1302 |
| }, |
| { |
| "epoch": 0.28659408336082703, |
| "grad_norm": 0.2821153998374939, |
| "learning_rate": 8e-05, |
| "loss": 1.7758, |
| "step": 1303 |
| }, |
| { |
| "epoch": 0.2868140327724623, |
| "grad_norm": 0.29058384895324707, |
| "learning_rate": 8e-05, |
| "loss": 1.7244, |
| "step": 1304 |
| }, |
| { |
| "epoch": 0.28703398218409765, |
| "grad_norm": 0.2811940312385559, |
| "learning_rate": 8e-05, |
| "loss": 1.6708, |
| "step": 1305 |
| }, |
| { |
| "epoch": 0.287253931595733, |
| "grad_norm": 0.2773367762565613, |
| "learning_rate": 8e-05, |
| "loss": 1.7857, |
| "step": 1306 |
| }, |
| { |
| "epoch": 0.2874738810073683, |
| "grad_norm": 0.2689999043941498, |
| "learning_rate": 8e-05, |
| "loss": 1.7432, |
| "step": 1307 |
| }, |
| { |
| "epoch": 0.28769383041900365, |
| "grad_norm": 0.26896870136260986, |
| "learning_rate": 8e-05, |
| "loss": 1.6389, |
| "step": 1308 |
| }, |
| { |
| "epoch": 0.2879137798306389, |
| "grad_norm": 0.2981964349746704, |
| "learning_rate": 8e-05, |
| "loss": 1.8771, |
| "step": 1309 |
| }, |
| { |
| "epoch": 0.28813372924227426, |
| "grad_norm": 0.2872856855392456, |
| "learning_rate": 8e-05, |
| "loss": 1.785, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.2883536786539096, |
| "grad_norm": 0.3186649680137634, |
| "learning_rate": 8e-05, |
| "loss": 1.9051, |
| "step": 1311 |
| }, |
| { |
| "epoch": 0.28857362806554493, |
| "grad_norm": 0.2802119255065918, |
| "learning_rate": 8e-05, |
| "loss": 1.6532, |
| "step": 1312 |
| }, |
| { |
| "epoch": 0.28879357747718026, |
| "grad_norm": 0.2864134907722473, |
| "learning_rate": 8e-05, |
| "loss": 1.7373, |
| "step": 1313 |
| }, |
| { |
| "epoch": 0.2890135268888156, |
| "grad_norm": 0.2739737331867218, |
| "learning_rate": 8e-05, |
| "loss": 1.5365, |
| "step": 1314 |
| }, |
| { |
| "epoch": 0.2892334763004509, |
| "grad_norm": 0.2707555294036865, |
| "learning_rate": 8e-05, |
| "loss": 1.6516, |
| "step": 1315 |
| }, |
| { |
| "epoch": 0.2894534257120862, |
| "grad_norm": 0.2895212173461914, |
| "learning_rate": 8e-05, |
| "loss": 1.5634, |
| "step": 1316 |
| }, |
| { |
| "epoch": 0.28967337512372154, |
| "grad_norm": 0.26424047350883484, |
| "learning_rate": 8e-05, |
| "loss": 1.6543, |
| "step": 1317 |
| }, |
| { |
| "epoch": 0.2898933245353569, |
| "grad_norm": 0.26237159967422485, |
| "learning_rate": 8e-05, |
| "loss": 1.6814, |
| "step": 1318 |
| }, |
| { |
| "epoch": 0.2901132739469922, |
| "grad_norm": 0.27964159846305847, |
| "learning_rate": 8e-05, |
| "loss": 1.7505, |
| "step": 1319 |
| }, |
| { |
| "epoch": 0.2903332233586275, |
| "grad_norm": 0.27128270268440247, |
| "learning_rate": 8e-05, |
| "loss": 1.7229, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.2905531727702628, |
| "grad_norm": 0.3012688159942627, |
| "learning_rate": 8e-05, |
| "loss": 1.6851, |
| "step": 1321 |
| }, |
| { |
| "epoch": 0.29077312218189816, |
| "grad_norm": 0.2725695073604584, |
| "learning_rate": 8e-05, |
| "loss": 1.6552, |
| "step": 1322 |
| }, |
| { |
| "epoch": 0.2909930715935335, |
| "grad_norm": 0.2855455279350281, |
| "learning_rate": 8e-05, |
| "loss": 1.7779, |
| "step": 1323 |
| }, |
| { |
| "epoch": 0.2912130210051688, |
| "grad_norm": 0.2906174659729004, |
| "learning_rate": 8e-05, |
| "loss": 1.8209, |
| "step": 1324 |
| }, |
| { |
| "epoch": 0.29143297041680416, |
| "grad_norm": 0.26015472412109375, |
| "learning_rate": 8e-05, |
| "loss": 1.5403, |
| "step": 1325 |
| }, |
| { |
| "epoch": 0.29165291982843944, |
| "grad_norm": 0.29065820574760437, |
| "learning_rate": 8e-05, |
| "loss": 1.8499, |
| "step": 1326 |
| }, |
| { |
| "epoch": 0.2918728692400748, |
| "grad_norm": 0.28715917468070984, |
| "learning_rate": 8e-05, |
| "loss": 1.854, |
| "step": 1327 |
| }, |
| { |
| "epoch": 0.2920928186517101, |
| "grad_norm": 0.26932859420776367, |
| "learning_rate": 8e-05, |
| "loss": 1.6254, |
| "step": 1328 |
| }, |
| { |
| "epoch": 0.29231276806334544, |
| "grad_norm": 0.2757404148578644, |
| "learning_rate": 8e-05, |
| "loss": 1.6077, |
| "step": 1329 |
| }, |
| { |
| "epoch": 0.2925327174749808, |
| "grad_norm": 0.26532551646232605, |
| "learning_rate": 8e-05, |
| "loss": 1.6551, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.29275266688661605, |
| "grad_norm": 0.2754289209842682, |
| "learning_rate": 8e-05, |
| "loss": 1.7276, |
| "step": 1331 |
| }, |
| { |
| "epoch": 0.2929726162982514, |
| "grad_norm": 0.290568470954895, |
| "learning_rate": 8e-05, |
| "loss": 1.7622, |
| "step": 1332 |
| }, |
| { |
| "epoch": 0.2931925657098867, |
| "grad_norm": 0.3045903742313385, |
| "learning_rate": 8e-05, |
| "loss": 1.7937, |
| "step": 1333 |
| }, |
| { |
| "epoch": 0.29341251512152206, |
| "grad_norm": 0.2594483196735382, |
| "learning_rate": 8e-05, |
| "loss": 1.6163, |
| "step": 1334 |
| }, |
| { |
| "epoch": 0.2936324645331574, |
| "grad_norm": 0.3054102957248688, |
| "learning_rate": 8e-05, |
| "loss": 1.7767, |
| "step": 1335 |
| }, |
| { |
| "epoch": 0.2938524139447927, |
| "grad_norm": 0.27347666025161743, |
| "learning_rate": 8e-05, |
| "loss": 1.682, |
| "step": 1336 |
| }, |
| { |
| "epoch": 0.294072363356428, |
| "grad_norm": 0.2639494836330414, |
| "learning_rate": 8e-05, |
| "loss": 1.4616, |
| "step": 1337 |
| }, |
| { |
| "epoch": 0.29429231276806334, |
| "grad_norm": 0.2842942178249359, |
| "learning_rate": 8e-05, |
| "loss": 1.7625, |
| "step": 1338 |
| }, |
| { |
| "epoch": 0.29451226217969867, |
| "grad_norm": 0.2895960509777069, |
| "learning_rate": 8e-05, |
| "loss": 1.7127, |
| "step": 1339 |
| }, |
| { |
| "epoch": 0.294732211591334, |
| "grad_norm": 0.2836678624153137, |
| "learning_rate": 8e-05, |
| "loss": 1.7765, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.29495216100296934, |
| "grad_norm": 0.26315444707870483, |
| "learning_rate": 8e-05, |
| "loss": 1.6592, |
| "step": 1341 |
| }, |
| { |
| "epoch": 0.2951721104146046, |
| "grad_norm": 0.2601313591003418, |
| "learning_rate": 8e-05, |
| "loss": 1.5803, |
| "step": 1342 |
| }, |
| { |
| "epoch": 0.29539205982623995, |
| "grad_norm": 0.28084784746170044, |
| "learning_rate": 8e-05, |
| "loss": 1.6172, |
| "step": 1343 |
| }, |
| { |
| "epoch": 0.2956120092378753, |
| "grad_norm": 0.27707698941230774, |
| "learning_rate": 8e-05, |
| "loss": 1.6774, |
| "step": 1344 |
| }, |
| { |
| "epoch": 0.2958319586495106, |
| "grad_norm": 0.28750407695770264, |
| "learning_rate": 8e-05, |
| "loss": 1.7775, |
| "step": 1345 |
| }, |
| { |
| "epoch": 0.29605190806114595, |
| "grad_norm": 0.27315664291381836, |
| "learning_rate": 8e-05, |
| "loss": 1.6578, |
| "step": 1346 |
| }, |
| { |
| "epoch": 0.2962718574727813, |
| "grad_norm": 0.26131486892700195, |
| "learning_rate": 8e-05, |
| "loss": 1.6429, |
| "step": 1347 |
| }, |
| { |
| "epoch": 0.29649180688441656, |
| "grad_norm": 0.27198976278305054, |
| "learning_rate": 8e-05, |
| "loss": 1.6594, |
| "step": 1348 |
| }, |
| { |
| "epoch": 0.2967117562960519, |
| "grad_norm": 0.2785218060016632, |
| "learning_rate": 8e-05, |
| "loss": 1.6959, |
| "step": 1349 |
| }, |
| { |
| "epoch": 0.29693170570768723, |
| "grad_norm": 0.26987215876579285, |
| "learning_rate": 8e-05, |
| "loss": 1.6561, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.29715165511932257, |
| "grad_norm": 0.2634013295173645, |
| "learning_rate": 8e-05, |
| "loss": 1.6817, |
| "step": 1351 |
| }, |
| { |
| "epoch": 0.2973716045309579, |
| "grad_norm": 0.2584557831287384, |
| "learning_rate": 8e-05, |
| "loss": 1.5104, |
| "step": 1352 |
| }, |
| { |
| "epoch": 0.2975915539425932, |
| "grad_norm": 0.28787991404533386, |
| "learning_rate": 8e-05, |
| "loss": 1.8217, |
| "step": 1353 |
| }, |
| { |
| "epoch": 0.2978115033542285, |
| "grad_norm": 0.5047094225883484, |
| "learning_rate": 8e-05, |
| "loss": 1.7733, |
| "step": 1354 |
| }, |
| { |
| "epoch": 0.29803145276586385, |
| "grad_norm": 0.26776471734046936, |
| "learning_rate": 8e-05, |
| "loss": 1.6961, |
| "step": 1355 |
| }, |
| { |
| "epoch": 0.2982514021774992, |
| "grad_norm": 0.30351778864860535, |
| "learning_rate": 8e-05, |
| "loss": 1.7104, |
| "step": 1356 |
| }, |
| { |
| "epoch": 0.2984713515891345, |
| "grad_norm": 0.27889010310173035, |
| "learning_rate": 8e-05, |
| "loss": 1.7276, |
| "step": 1357 |
| }, |
| { |
| "epoch": 0.29869130100076985, |
| "grad_norm": 0.2656184136867523, |
| "learning_rate": 8e-05, |
| "loss": 1.7438, |
| "step": 1358 |
| }, |
| { |
| "epoch": 0.2989112504124051, |
| "grad_norm": 0.27338340878486633, |
| "learning_rate": 8e-05, |
| "loss": 1.7526, |
| "step": 1359 |
| }, |
| { |
| "epoch": 0.29913119982404046, |
| "grad_norm": 0.3266398310661316, |
| "learning_rate": 8e-05, |
| "loss": 1.8091, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.2993511492356758, |
| "grad_norm": 0.309469997882843, |
| "learning_rate": 8e-05, |
| "loss": 2.0485, |
| "step": 1361 |
| }, |
| { |
| "epoch": 0.29957109864731113, |
| "grad_norm": 0.2768929600715637, |
| "learning_rate": 8e-05, |
| "loss": 1.7977, |
| "step": 1362 |
| }, |
| { |
| "epoch": 0.29979104805894646, |
| "grad_norm": 0.27685433626174927, |
| "learning_rate": 8e-05, |
| "loss": 1.5712, |
| "step": 1363 |
| }, |
| { |
| "epoch": 0.30001099747058174, |
| "grad_norm": 0.26404622197151184, |
| "learning_rate": 8e-05, |
| "loss": 1.6639, |
| "step": 1364 |
| }, |
| { |
| "epoch": 0.3002309468822171, |
| "grad_norm": 0.2719237208366394, |
| "learning_rate": 8e-05, |
| "loss": 1.788, |
| "step": 1365 |
| }, |
| { |
| "epoch": 0.3004508962938524, |
| "grad_norm": 0.27983394265174866, |
| "learning_rate": 8e-05, |
| "loss": 1.7361, |
| "step": 1366 |
| }, |
| { |
| "epoch": 0.30067084570548774, |
| "grad_norm": 0.2673875689506531, |
| "learning_rate": 8e-05, |
| "loss": 1.6288, |
| "step": 1367 |
| }, |
| { |
| "epoch": 0.3008907951171231, |
| "grad_norm": 0.2850426435470581, |
| "learning_rate": 8e-05, |
| "loss": 1.8328, |
| "step": 1368 |
| }, |
| { |
| "epoch": 0.3011107445287584, |
| "grad_norm": 0.2577967345714569, |
| "learning_rate": 8e-05, |
| "loss": 1.6267, |
| "step": 1369 |
| }, |
| { |
| "epoch": 0.3013306939403937, |
| "grad_norm": 0.276094913482666, |
| "learning_rate": 8e-05, |
| "loss": 1.7673, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.301550643352029, |
| "grad_norm": 0.2834344208240509, |
| "learning_rate": 8e-05, |
| "loss": 1.6692, |
| "step": 1371 |
| }, |
| { |
| "epoch": 0.30177059276366436, |
| "grad_norm": 0.2617560029029846, |
| "learning_rate": 8e-05, |
| "loss": 1.7734, |
| "step": 1372 |
| }, |
| { |
| "epoch": 0.3019905421752997, |
| "grad_norm": 0.27122870087623596, |
| "learning_rate": 8e-05, |
| "loss": 1.6988, |
| "step": 1373 |
| }, |
| { |
| "epoch": 0.302210491586935, |
| "grad_norm": 0.26526594161987305, |
| "learning_rate": 8e-05, |
| "loss": 1.7459, |
| "step": 1374 |
| }, |
| { |
| "epoch": 0.3024304409985703, |
| "grad_norm": 0.2893051207065582, |
| "learning_rate": 8e-05, |
| "loss": 1.8214, |
| "step": 1375 |
| }, |
| { |
| "epoch": 0.30265039041020564, |
| "grad_norm": 0.2735356092453003, |
| "learning_rate": 8e-05, |
| "loss": 1.8437, |
| "step": 1376 |
| }, |
| { |
| "epoch": 0.302870339821841, |
| "grad_norm": 0.2743459939956665, |
| "learning_rate": 8e-05, |
| "loss": 1.8365, |
| "step": 1377 |
| }, |
| { |
| "epoch": 0.3030902892334763, |
| "grad_norm": 0.28047019243240356, |
| "learning_rate": 8e-05, |
| "loss": 1.6143, |
| "step": 1378 |
| }, |
| { |
| "epoch": 0.30331023864511164, |
| "grad_norm": 0.268197238445282, |
| "learning_rate": 8e-05, |
| "loss": 1.591, |
| "step": 1379 |
| }, |
| { |
| "epoch": 0.303530188056747, |
| "grad_norm": 0.2890843451023102, |
| "learning_rate": 8e-05, |
| "loss": 1.7757, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.30375013746838225, |
| "grad_norm": 0.2765072286128998, |
| "learning_rate": 8e-05, |
| "loss": 1.6363, |
| "step": 1381 |
| }, |
| { |
| "epoch": 0.3039700868800176, |
| "grad_norm": 0.290147602558136, |
| "learning_rate": 8e-05, |
| "loss": 1.7615, |
| "step": 1382 |
| }, |
| { |
| "epoch": 0.3041900362916529, |
| "grad_norm": 0.2721220850944519, |
| "learning_rate": 8e-05, |
| "loss": 1.7101, |
| "step": 1383 |
| }, |
| { |
| "epoch": 0.30440998570328826, |
| "grad_norm": 0.27125662565231323, |
| "learning_rate": 8e-05, |
| "loss": 1.7291, |
| "step": 1384 |
| }, |
| { |
| "epoch": 0.3046299351149236, |
| "grad_norm": 0.2594304084777832, |
| "learning_rate": 8e-05, |
| "loss": 1.6754, |
| "step": 1385 |
| }, |
| { |
| "epoch": 0.30484988452655887, |
| "grad_norm": 0.28582707047462463, |
| "learning_rate": 8e-05, |
| "loss": 1.6808, |
| "step": 1386 |
| }, |
| { |
| "epoch": 0.3050698339381942, |
| "grad_norm": 0.2853895425796509, |
| "learning_rate": 8e-05, |
| "loss": 1.779, |
| "step": 1387 |
| }, |
| { |
| "epoch": 0.30528978334982954, |
| "grad_norm": 0.2580530345439911, |
| "learning_rate": 8e-05, |
| "loss": 1.6316, |
| "step": 1388 |
| }, |
| { |
| "epoch": 0.30550973276146487, |
| "grad_norm": 0.2793220281600952, |
| "learning_rate": 8e-05, |
| "loss": 1.7326, |
| "step": 1389 |
| }, |
| { |
| "epoch": 0.3057296821731002, |
| "grad_norm": 0.2672085165977478, |
| "learning_rate": 8e-05, |
| "loss": 1.6544, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.30594963158473554, |
| "grad_norm": 0.27718111872673035, |
| "learning_rate": 8e-05, |
| "loss": 1.6307, |
| "step": 1391 |
| }, |
| { |
| "epoch": 0.3061695809963708, |
| "grad_norm": 0.29295554757118225, |
| "learning_rate": 8e-05, |
| "loss": 1.502, |
| "step": 1392 |
| }, |
| { |
| "epoch": 0.30638953040800615, |
| "grad_norm": 0.2840512990951538, |
| "learning_rate": 8e-05, |
| "loss": 1.6326, |
| "step": 1393 |
| }, |
| { |
| "epoch": 0.3066094798196415, |
| "grad_norm": 0.2897029519081116, |
| "learning_rate": 8e-05, |
| "loss": 1.7543, |
| "step": 1394 |
| }, |
| { |
| "epoch": 0.3068294292312768, |
| "grad_norm": 0.28060710430145264, |
| "learning_rate": 8e-05, |
| "loss": 1.7227, |
| "step": 1395 |
| }, |
| { |
| "epoch": 0.30704937864291215, |
| "grad_norm": 0.27874305844306946, |
| "learning_rate": 8e-05, |
| "loss": 1.6639, |
| "step": 1396 |
| }, |
| { |
| "epoch": 0.30726932805454743, |
| "grad_norm": 0.2679193615913391, |
| "learning_rate": 8e-05, |
| "loss": 1.7226, |
| "step": 1397 |
| }, |
| { |
| "epoch": 0.30748927746618276, |
| "grad_norm": 0.2769779562950134, |
| "learning_rate": 8e-05, |
| "loss": 1.6384, |
| "step": 1398 |
| }, |
| { |
| "epoch": 0.3077092268778181, |
| "grad_norm": 0.26620879769325256, |
| "learning_rate": 8e-05, |
| "loss": 1.7134, |
| "step": 1399 |
| }, |
| { |
| "epoch": 0.30792917628945343, |
| "grad_norm": 0.277423620223999, |
| "learning_rate": 8e-05, |
| "loss": 1.7376, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.30814912570108877, |
| "grad_norm": 0.2629416882991791, |
| "learning_rate": 8e-05, |
| "loss": 1.598, |
| "step": 1401 |
| }, |
| { |
| "epoch": 0.3083690751127241, |
| "grad_norm": 0.2844812572002411, |
| "learning_rate": 8e-05, |
| "loss": 1.7067, |
| "step": 1402 |
| }, |
| { |
| "epoch": 0.3085890245243594, |
| "grad_norm": 0.2731526494026184, |
| "learning_rate": 8e-05, |
| "loss": 1.8571, |
| "step": 1403 |
| }, |
| { |
| "epoch": 0.3088089739359947, |
| "grad_norm": 0.287438303232193, |
| "learning_rate": 8e-05, |
| "loss": 1.7612, |
| "step": 1404 |
| }, |
| { |
| "epoch": 0.30902892334763005, |
| "grad_norm": 0.266718327999115, |
| "learning_rate": 8e-05, |
| "loss": 1.6106, |
| "step": 1405 |
| }, |
| { |
| "epoch": 0.3092488727592654, |
| "grad_norm": 0.28080686926841736, |
| "learning_rate": 8e-05, |
| "loss": 1.8281, |
| "step": 1406 |
| }, |
| { |
| "epoch": 0.3094688221709007, |
| "grad_norm": 0.27558308839797974, |
| "learning_rate": 8e-05, |
| "loss": 1.8677, |
| "step": 1407 |
| }, |
| { |
| "epoch": 0.309688771582536, |
| "grad_norm": 0.2798183262348175, |
| "learning_rate": 8e-05, |
| "loss": 1.7867, |
| "step": 1408 |
| }, |
| { |
| "epoch": 0.3099087209941713, |
| "grad_norm": 0.25823187828063965, |
| "learning_rate": 8e-05, |
| "loss": 1.6743, |
| "step": 1409 |
| }, |
| { |
| "epoch": 0.31012867040580666, |
| "grad_norm": 0.27356335520744324, |
| "learning_rate": 8e-05, |
| "loss": 1.7039, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.310348619817442, |
| "grad_norm": 0.2842661440372467, |
| "learning_rate": 8e-05, |
| "loss": 1.7046, |
| "step": 1411 |
| }, |
| { |
| "epoch": 0.31056856922907733, |
| "grad_norm": 0.2561197876930237, |
| "learning_rate": 8e-05, |
| "loss": 1.4887, |
| "step": 1412 |
| }, |
| { |
| "epoch": 0.31078851864071266, |
| "grad_norm": 0.2851184904575348, |
| "learning_rate": 8e-05, |
| "loss": 1.7074, |
| "step": 1413 |
| }, |
| { |
| "epoch": 0.31100846805234794, |
| "grad_norm": 0.2655506432056427, |
| "learning_rate": 8e-05, |
| "loss": 1.6049, |
| "step": 1414 |
| }, |
| { |
| "epoch": 0.3112284174639833, |
| "grad_norm": 0.26412099599838257, |
| "learning_rate": 8e-05, |
| "loss": 1.6052, |
| "step": 1415 |
| }, |
| { |
| "epoch": 0.3114483668756186, |
| "grad_norm": 0.3026227056980133, |
| "learning_rate": 8e-05, |
| "loss": 1.7085, |
| "step": 1416 |
| }, |
| { |
| "epoch": 0.31166831628725394, |
| "grad_norm": 0.28821703791618347, |
| "learning_rate": 8e-05, |
| "loss": 1.7573, |
| "step": 1417 |
| }, |
| { |
| "epoch": 0.3118882656988893, |
| "grad_norm": 0.26806455850601196, |
| "learning_rate": 8e-05, |
| "loss": 1.7136, |
| "step": 1418 |
| }, |
| { |
| "epoch": 0.31210821511052456, |
| "grad_norm": 0.28336799144744873, |
| "learning_rate": 8e-05, |
| "loss": 1.8445, |
| "step": 1419 |
| }, |
| { |
| "epoch": 0.3123281645221599, |
| "grad_norm": 0.2772139012813568, |
| "learning_rate": 8e-05, |
| "loss": 1.692, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.3125481139337952, |
| "grad_norm": 0.2815256714820862, |
| "learning_rate": 8e-05, |
| "loss": 1.77, |
| "step": 1421 |
| }, |
| { |
| "epoch": 0.31276806334543056, |
| "grad_norm": 0.4029920697212219, |
| "learning_rate": 8e-05, |
| "loss": 1.8103, |
| "step": 1422 |
| }, |
| { |
| "epoch": 0.3129880127570659, |
| "grad_norm": 0.2677610218524933, |
| "learning_rate": 8e-05, |
| "loss": 1.5898, |
| "step": 1423 |
| }, |
| { |
| "epoch": 0.3132079621687012, |
| "grad_norm": 0.2605397701263428, |
| "learning_rate": 8e-05, |
| "loss": 1.5735, |
| "step": 1424 |
| }, |
| { |
| "epoch": 0.3134279115803365, |
| "grad_norm": 0.2831586003303528, |
| "learning_rate": 8e-05, |
| "loss": 1.6641, |
| "step": 1425 |
| }, |
| { |
| "epoch": 0.31364786099197184, |
| "grad_norm": 0.2746485471725464, |
| "learning_rate": 8e-05, |
| "loss": 1.618, |
| "step": 1426 |
| }, |
| { |
| "epoch": 0.3138678104036072, |
| "grad_norm": 0.283342182636261, |
| "learning_rate": 8e-05, |
| "loss": 1.6963, |
| "step": 1427 |
| }, |
| { |
| "epoch": 0.3140877598152425, |
| "grad_norm": 0.27635300159454346, |
| "learning_rate": 8e-05, |
| "loss": 1.6911, |
| "step": 1428 |
| }, |
| { |
| "epoch": 0.31430770922687784, |
| "grad_norm": 0.2719132900238037, |
| "learning_rate": 8e-05, |
| "loss": 1.7063, |
| "step": 1429 |
| }, |
| { |
| "epoch": 0.3145276586385131, |
| "grad_norm": 0.27162256836891174, |
| "learning_rate": 8e-05, |
| "loss": 1.6397, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.31474760805014845, |
| "grad_norm": 0.2934938073158264, |
| "learning_rate": 8e-05, |
| "loss": 1.7555, |
| "step": 1431 |
| }, |
| { |
| "epoch": 0.3149675574617838, |
| "grad_norm": 0.3060123920440674, |
| "learning_rate": 8e-05, |
| "loss": 1.642, |
| "step": 1432 |
| }, |
| { |
| "epoch": 0.3151875068734191, |
| "grad_norm": 0.280846506357193, |
| "learning_rate": 8e-05, |
| "loss": 1.6805, |
| "step": 1433 |
| }, |
| { |
| "epoch": 0.31540745628505445, |
| "grad_norm": 0.2768997550010681, |
| "learning_rate": 8e-05, |
| "loss": 1.7359, |
| "step": 1434 |
| }, |
| { |
| "epoch": 0.3156274056966898, |
| "grad_norm": 0.29172810912132263, |
| "learning_rate": 8e-05, |
| "loss": 1.821, |
| "step": 1435 |
| }, |
| { |
| "epoch": 0.31584735510832507, |
| "grad_norm": 0.30742648243904114, |
| "learning_rate": 8e-05, |
| "loss": 1.8198, |
| "step": 1436 |
| }, |
| { |
| "epoch": 0.3160673045199604, |
| "grad_norm": 0.2889997065067291, |
| "learning_rate": 8e-05, |
| "loss": 1.6733, |
| "step": 1437 |
| }, |
| { |
| "epoch": 0.31628725393159574, |
| "grad_norm": 0.2859675884246826, |
| "learning_rate": 8e-05, |
| "loss": 1.7655, |
| "step": 1438 |
| }, |
| { |
| "epoch": 0.31650720334323107, |
| "grad_norm": 0.2926831543445587, |
| "learning_rate": 8e-05, |
| "loss": 1.7871, |
| "step": 1439 |
| }, |
| { |
| "epoch": 0.3167271527548664, |
| "grad_norm": 0.28924524784088135, |
| "learning_rate": 8e-05, |
| "loss": 1.665, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.3169471021665017, |
| "grad_norm": 0.2940097749233246, |
| "learning_rate": 8e-05, |
| "loss": 1.8364, |
| "step": 1441 |
| }, |
| { |
| "epoch": 0.317167051578137, |
| "grad_norm": 0.2923974096775055, |
| "learning_rate": 8e-05, |
| "loss": 1.8071, |
| "step": 1442 |
| }, |
| { |
| "epoch": 0.31738700098977235, |
| "grad_norm": 0.28991878032684326, |
| "learning_rate": 8e-05, |
| "loss": 1.7445, |
| "step": 1443 |
| }, |
| { |
| "epoch": 0.3176069504014077, |
| "grad_norm": 0.283600777387619, |
| "learning_rate": 8e-05, |
| "loss": 1.8043, |
| "step": 1444 |
| }, |
| { |
| "epoch": 0.317826899813043, |
| "grad_norm": 0.3082323372364044, |
| "learning_rate": 8e-05, |
| "loss": 1.7858, |
| "step": 1445 |
| }, |
| { |
| "epoch": 0.3180468492246783, |
| "grad_norm": 0.28433462977409363, |
| "learning_rate": 8e-05, |
| "loss": 1.6911, |
| "step": 1446 |
| }, |
| { |
| "epoch": 0.31826679863631363, |
| "grad_norm": 0.27776578068733215, |
| "learning_rate": 8e-05, |
| "loss": 1.7212, |
| "step": 1447 |
| }, |
| { |
| "epoch": 0.31848674804794896, |
| "grad_norm": 0.29395151138305664, |
| "learning_rate": 8e-05, |
| "loss": 1.7221, |
| "step": 1448 |
| }, |
| { |
| "epoch": 0.3187066974595843, |
| "grad_norm": 0.27507245540618896, |
| "learning_rate": 8e-05, |
| "loss": 1.7358, |
| "step": 1449 |
| }, |
| { |
| "epoch": 0.31892664687121963, |
| "grad_norm": 0.25614190101623535, |
| "learning_rate": 8e-05, |
| "loss": 1.5138, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.31914659628285497, |
| "grad_norm": 0.2908024489879608, |
| "learning_rate": 8e-05, |
| "loss": 1.756, |
| "step": 1451 |
| }, |
| { |
| "epoch": 0.31936654569449024, |
| "grad_norm": 0.2729463577270508, |
| "learning_rate": 8e-05, |
| "loss": 1.5542, |
| "step": 1452 |
| }, |
| { |
| "epoch": 0.3195864951061256, |
| "grad_norm": 0.27094194293022156, |
| "learning_rate": 8e-05, |
| "loss": 1.5917, |
| "step": 1453 |
| }, |
| { |
| "epoch": 0.3198064445177609, |
| "grad_norm": 0.28125494718551636, |
| "learning_rate": 8e-05, |
| "loss": 1.6584, |
| "step": 1454 |
| }, |
| { |
| "epoch": 0.32002639392939625, |
| "grad_norm": 0.29033198952674866, |
| "learning_rate": 8e-05, |
| "loss": 1.7332, |
| "step": 1455 |
| }, |
| { |
| "epoch": 0.3202463433410316, |
| "grad_norm": 0.26570284366607666, |
| "learning_rate": 8e-05, |
| "loss": 1.6159, |
| "step": 1456 |
| }, |
| { |
| "epoch": 0.32046629275266686, |
| "grad_norm": 0.307412713766098, |
| "learning_rate": 8e-05, |
| "loss": 1.7351, |
| "step": 1457 |
| }, |
| { |
| "epoch": 0.3206862421643022, |
| "grad_norm": 0.29387474060058594, |
| "learning_rate": 8e-05, |
| "loss": 1.9386, |
| "step": 1458 |
| }, |
| { |
| "epoch": 0.3209061915759375, |
| "grad_norm": 0.26545315980911255, |
| "learning_rate": 8e-05, |
| "loss": 1.6343, |
| "step": 1459 |
| }, |
| { |
| "epoch": 0.32112614098757286, |
| "grad_norm": 0.279238224029541, |
| "learning_rate": 8e-05, |
| "loss": 1.6245, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.3213460903992082, |
| "grad_norm": 0.2766862213611603, |
| "learning_rate": 8e-05, |
| "loss": 1.7135, |
| "step": 1461 |
| }, |
| { |
| "epoch": 0.32156603981084353, |
| "grad_norm": 0.2705351412296295, |
| "learning_rate": 8e-05, |
| "loss": 1.6526, |
| "step": 1462 |
| }, |
| { |
| "epoch": 0.3217859892224788, |
| "grad_norm": 0.27870967984199524, |
| "learning_rate": 8e-05, |
| "loss": 1.6512, |
| "step": 1463 |
| }, |
| { |
| "epoch": 0.32200593863411414, |
| "grad_norm": 0.284407377243042, |
| "learning_rate": 8e-05, |
| "loss": 1.755, |
| "step": 1464 |
| }, |
| { |
| "epoch": 0.3222258880457495, |
| "grad_norm": 0.2897641062736511, |
| "learning_rate": 8e-05, |
| "loss": 1.8383, |
| "step": 1465 |
| }, |
| { |
| "epoch": 0.3224458374573848, |
| "grad_norm": 0.2667568624019623, |
| "learning_rate": 8e-05, |
| "loss": 1.6989, |
| "step": 1466 |
| }, |
| { |
| "epoch": 0.32266578686902014, |
| "grad_norm": 0.26580294966697693, |
| "learning_rate": 8e-05, |
| "loss": 1.5895, |
| "step": 1467 |
| }, |
| { |
| "epoch": 0.3228857362806554, |
| "grad_norm": 0.26188549399375916, |
| "learning_rate": 8e-05, |
| "loss": 1.5799, |
| "step": 1468 |
| }, |
| { |
| "epoch": 0.32310568569229076, |
| "grad_norm": 0.27703747153282166, |
| "learning_rate": 8e-05, |
| "loss": 1.8306, |
| "step": 1469 |
| }, |
| { |
| "epoch": 0.3233256351039261, |
| "grad_norm": 0.27643802762031555, |
| "learning_rate": 8e-05, |
| "loss": 1.6864, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.3235455845155614, |
| "grad_norm": 0.27216553688049316, |
| "learning_rate": 8e-05, |
| "loss": 1.6006, |
| "step": 1471 |
| }, |
| { |
| "epoch": 0.32376553392719676, |
| "grad_norm": 0.2984940707683563, |
| "learning_rate": 8e-05, |
| "loss": 1.7548, |
| "step": 1472 |
| }, |
| { |
| "epoch": 0.3239854833388321, |
| "grad_norm": 0.30579298734664917, |
| "learning_rate": 8e-05, |
| "loss": 1.8307, |
| "step": 1473 |
| }, |
| { |
| "epoch": 0.32420543275046737, |
| "grad_norm": 0.27524709701538086, |
| "learning_rate": 8e-05, |
| "loss": 1.6134, |
| "step": 1474 |
| }, |
| { |
| "epoch": 0.3244253821621027, |
| "grad_norm": 0.2788650393486023, |
| "learning_rate": 8e-05, |
| "loss": 1.8194, |
| "step": 1475 |
| }, |
| { |
| "epoch": 0.32464533157373804, |
| "grad_norm": 0.28263744711875916, |
| "learning_rate": 8e-05, |
| "loss": 1.7633, |
| "step": 1476 |
| }, |
| { |
| "epoch": 0.3248652809853734, |
| "grad_norm": 0.30234408378601074, |
| "learning_rate": 8e-05, |
| "loss": 1.6057, |
| "step": 1477 |
| }, |
| { |
| "epoch": 0.3250852303970087, |
| "grad_norm": 0.2820134162902832, |
| "learning_rate": 8e-05, |
| "loss": 1.6913, |
| "step": 1478 |
| }, |
| { |
| "epoch": 0.325305179808644, |
| "grad_norm": 0.28929245471954346, |
| "learning_rate": 8e-05, |
| "loss": 1.9538, |
| "step": 1479 |
| }, |
| { |
| "epoch": 0.3255251292202793, |
| "grad_norm": 0.26399463415145874, |
| "learning_rate": 8e-05, |
| "loss": 1.7309, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.32574507863191465, |
| "grad_norm": 0.2722630202770233, |
| "learning_rate": 8e-05, |
| "loss": 1.5595, |
| "step": 1481 |
| }, |
| { |
| "epoch": 0.32596502804355, |
| "grad_norm": 0.2759261727333069, |
| "learning_rate": 8e-05, |
| "loss": 1.6272, |
| "step": 1482 |
| }, |
| { |
| "epoch": 0.3261849774551853, |
| "grad_norm": 0.28047022223472595, |
| "learning_rate": 8e-05, |
| "loss": 1.6933, |
| "step": 1483 |
| }, |
| { |
| "epoch": 0.32640492686682065, |
| "grad_norm": 0.2835995554924011, |
| "learning_rate": 8e-05, |
| "loss": 1.7165, |
| "step": 1484 |
| }, |
| { |
| "epoch": 0.32662487627845593, |
| "grad_norm": 0.28965097665786743, |
| "learning_rate": 8e-05, |
| "loss": 1.6399, |
| "step": 1485 |
| }, |
| { |
| "epoch": 0.32684482569009127, |
| "grad_norm": 0.2729817032814026, |
| "learning_rate": 8e-05, |
| "loss": 1.7397, |
| "step": 1486 |
| }, |
| { |
| "epoch": 0.3270647751017266, |
| "grad_norm": 0.26809874176979065, |
| "learning_rate": 8e-05, |
| "loss": 1.6954, |
| "step": 1487 |
| }, |
| { |
| "epoch": 0.32728472451336194, |
| "grad_norm": 0.29766684770584106, |
| "learning_rate": 8e-05, |
| "loss": 1.6947, |
| "step": 1488 |
| }, |
| { |
| "epoch": 0.32750467392499727, |
| "grad_norm": 0.27032670378685, |
| "learning_rate": 8e-05, |
| "loss": 1.8036, |
| "step": 1489 |
| }, |
| { |
| "epoch": 0.32772462333663255, |
| "grad_norm": 0.2694716453552246, |
| "learning_rate": 8e-05, |
| "loss": 1.6856, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.3279445727482679, |
| "grad_norm": 0.27968841791152954, |
| "learning_rate": 8e-05, |
| "loss": 1.7466, |
| "step": 1491 |
| }, |
| { |
| "epoch": 0.3281645221599032, |
| "grad_norm": 0.2956348955631256, |
| "learning_rate": 8e-05, |
| "loss": 1.833, |
| "step": 1492 |
| }, |
| { |
| "epoch": 0.32838447157153855, |
| "grad_norm": 0.27069491147994995, |
| "learning_rate": 8e-05, |
| "loss": 1.715, |
| "step": 1493 |
| }, |
| { |
| "epoch": 0.3286044209831739, |
| "grad_norm": 0.26747795939445496, |
| "learning_rate": 8e-05, |
| "loss": 1.6663, |
| "step": 1494 |
| }, |
| { |
| "epoch": 0.3288243703948092, |
| "grad_norm": 0.2619915008544922, |
| "learning_rate": 8e-05, |
| "loss": 1.6503, |
| "step": 1495 |
| }, |
| { |
| "epoch": 0.3290443198064445, |
| "grad_norm": 0.2720276117324829, |
| "learning_rate": 8e-05, |
| "loss": 1.7174, |
| "step": 1496 |
| }, |
| { |
| "epoch": 0.32926426921807983, |
| "grad_norm": 0.26874253153800964, |
| "learning_rate": 8e-05, |
| "loss": 1.6503, |
| "step": 1497 |
| }, |
| { |
| "epoch": 0.32948421862971516, |
| "grad_norm": 0.28397336602211, |
| "learning_rate": 8e-05, |
| "loss": 1.67, |
| "step": 1498 |
| }, |
| { |
| "epoch": 0.3297041680413505, |
| "grad_norm": 0.2544403076171875, |
| "learning_rate": 8e-05, |
| "loss": 1.5153, |
| "step": 1499 |
| }, |
| { |
| "epoch": 0.32992411745298583, |
| "grad_norm": 0.2819180488586426, |
| "learning_rate": 8e-05, |
| "loss": 1.6704, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.3301440668646211, |
| "grad_norm": 0.28150951862335205, |
| "learning_rate": 8e-05, |
| "loss": 1.8451, |
| "step": 1501 |
| }, |
| { |
| "epoch": 0.33036401627625644, |
| "grad_norm": 0.27396339178085327, |
| "learning_rate": 8e-05, |
| "loss": 1.7631, |
| "step": 1502 |
| }, |
| { |
| "epoch": 0.3305839656878918, |
| "grad_norm": 0.2954351007938385, |
| "learning_rate": 8e-05, |
| "loss": 1.8101, |
| "step": 1503 |
| }, |
| { |
| "epoch": 0.3308039150995271, |
| "grad_norm": 0.27129319310188293, |
| "learning_rate": 8e-05, |
| "loss": 1.6484, |
| "step": 1504 |
| }, |
| { |
| "epoch": 0.33102386451116245, |
| "grad_norm": 0.27612754702568054, |
| "learning_rate": 8e-05, |
| "loss": 1.6178, |
| "step": 1505 |
| }, |
| { |
| "epoch": 0.3312438139227978, |
| "grad_norm": 0.26097655296325684, |
| "learning_rate": 8e-05, |
| "loss": 1.5781, |
| "step": 1506 |
| }, |
| { |
| "epoch": 0.33146376333443306, |
| "grad_norm": 0.2704753577709198, |
| "learning_rate": 8e-05, |
| "loss": 1.6919, |
| "step": 1507 |
| }, |
| { |
| "epoch": 0.3316837127460684, |
| "grad_norm": 0.26866593956947327, |
| "learning_rate": 8e-05, |
| "loss": 1.6795, |
| "step": 1508 |
| }, |
| { |
| "epoch": 0.3319036621577037, |
| "grad_norm": 0.31797948479652405, |
| "learning_rate": 8e-05, |
| "loss": 1.7511, |
| "step": 1509 |
| }, |
| { |
| "epoch": 0.33212361156933906, |
| "grad_norm": 0.29456841945648193, |
| "learning_rate": 8e-05, |
| "loss": 1.7041, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.3323435609809744, |
| "grad_norm": 0.28345033526420593, |
| "learning_rate": 8e-05, |
| "loss": 1.7499, |
| "step": 1511 |
| }, |
| { |
| "epoch": 0.3325635103926097, |
| "grad_norm": 0.28679129481315613, |
| "learning_rate": 8e-05, |
| "loss": 1.8304, |
| "step": 1512 |
| }, |
| { |
| "epoch": 0.332783459804245, |
| "grad_norm": 0.2799399793148041, |
| "learning_rate": 8e-05, |
| "loss": 1.6461, |
| "step": 1513 |
| }, |
| { |
| "epoch": 0.33300340921588034, |
| "grad_norm": 0.3234422206878662, |
| "learning_rate": 8e-05, |
| "loss": 1.5622, |
| "step": 1514 |
| }, |
| { |
| "epoch": 0.3332233586275157, |
| "grad_norm": 0.27786344289779663, |
| "learning_rate": 8e-05, |
| "loss": 1.6718, |
| "step": 1515 |
| }, |
| { |
| "epoch": 0.333443308039151, |
| "grad_norm": 0.27040839195251465, |
| "learning_rate": 8e-05, |
| "loss": 1.7428, |
| "step": 1516 |
| }, |
| { |
| "epoch": 0.33366325745078634, |
| "grad_norm": 0.2837252616882324, |
| "learning_rate": 8e-05, |
| "loss": 1.6929, |
| "step": 1517 |
| }, |
| { |
| "epoch": 0.3338832068624216, |
| "grad_norm": 0.27352792024612427, |
| "learning_rate": 8e-05, |
| "loss": 1.7804, |
| "step": 1518 |
| }, |
| { |
| "epoch": 0.33410315627405696, |
| "grad_norm": 0.27237218618392944, |
| "learning_rate": 8e-05, |
| "loss": 1.7652, |
| "step": 1519 |
| }, |
| { |
| "epoch": 0.3343231056856923, |
| "grad_norm": 0.3166270852088928, |
| "learning_rate": 8e-05, |
| "loss": 1.6363, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.3345430550973276, |
| "grad_norm": 0.2650817930698395, |
| "learning_rate": 8e-05, |
| "loss": 1.6954, |
| "step": 1521 |
| }, |
| { |
| "epoch": 0.33476300450896296, |
| "grad_norm": 0.2907481789588928, |
| "learning_rate": 8e-05, |
| "loss": 1.809, |
| "step": 1522 |
| }, |
| { |
| "epoch": 0.33498295392059824, |
| "grad_norm": 0.2754502296447754, |
| "learning_rate": 8e-05, |
| "loss": 1.8143, |
| "step": 1523 |
| }, |
| { |
| "epoch": 0.33520290333223357, |
| "grad_norm": 0.2890012264251709, |
| "learning_rate": 8e-05, |
| "loss": 1.6603, |
| "step": 1524 |
| }, |
| { |
| "epoch": 0.3354228527438689, |
| "grad_norm": 0.271720826625824, |
| "learning_rate": 8e-05, |
| "loss": 1.7186, |
| "step": 1525 |
| }, |
| { |
| "epoch": 0.33564280215550424, |
| "grad_norm": 0.2845331132411957, |
| "learning_rate": 8e-05, |
| "loss": 1.7739, |
| "step": 1526 |
| }, |
| { |
| "epoch": 0.3358627515671396, |
| "grad_norm": 0.2787776291370392, |
| "learning_rate": 8e-05, |
| "loss": 1.6146, |
| "step": 1527 |
| }, |
| { |
| "epoch": 0.3360827009787749, |
| "grad_norm": 0.2612919211387634, |
| "learning_rate": 8e-05, |
| "loss": 1.5575, |
| "step": 1528 |
| }, |
| { |
| "epoch": 0.3363026503904102, |
| "grad_norm": 0.279220849275589, |
| "learning_rate": 8e-05, |
| "loss": 1.7661, |
| "step": 1529 |
| }, |
| { |
| "epoch": 0.3365225998020455, |
| "grad_norm": 0.2812168300151825, |
| "learning_rate": 8e-05, |
| "loss": 1.7011, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.33674254921368085, |
| "grad_norm": 0.28216826915740967, |
| "learning_rate": 8e-05, |
| "loss": 1.7856, |
| "step": 1531 |
| }, |
| { |
| "epoch": 0.3369624986253162, |
| "grad_norm": 0.279895156621933, |
| "learning_rate": 8e-05, |
| "loss": 1.6793, |
| "step": 1532 |
| }, |
| { |
| "epoch": 0.3371824480369515, |
| "grad_norm": 0.2694056034088135, |
| "learning_rate": 8e-05, |
| "loss": 1.6289, |
| "step": 1533 |
| }, |
| { |
| "epoch": 0.3374023974485868, |
| "grad_norm": 0.2692592740058899, |
| "learning_rate": 8e-05, |
| "loss": 1.5595, |
| "step": 1534 |
| }, |
| { |
| "epoch": 0.33762234686022213, |
| "grad_norm": 0.32149383425712585, |
| "learning_rate": 8e-05, |
| "loss": 1.6667, |
| "step": 1535 |
| }, |
| { |
| "epoch": 0.33784229627185747, |
| "grad_norm": 0.28884437680244446, |
| "learning_rate": 8e-05, |
| "loss": 1.7836, |
| "step": 1536 |
| }, |
| { |
| "epoch": 0.3380622456834928, |
| "grad_norm": 0.276017963886261, |
| "learning_rate": 8e-05, |
| "loss": 1.712, |
| "step": 1537 |
| }, |
| { |
| "epoch": 0.33828219509512814, |
| "grad_norm": 0.26901450753211975, |
| "learning_rate": 8e-05, |
| "loss": 1.6442, |
| "step": 1538 |
| }, |
| { |
| "epoch": 0.33850214450676347, |
| "grad_norm": 0.29827412962913513, |
| "learning_rate": 8e-05, |
| "loss": 1.7619, |
| "step": 1539 |
| }, |
| { |
| "epoch": 0.33872209391839875, |
| "grad_norm": 0.2763231098651886, |
| "learning_rate": 8e-05, |
| "loss": 1.6344, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.3389420433300341, |
| "grad_norm": 0.26493677496910095, |
| "learning_rate": 8e-05, |
| "loss": 1.6964, |
| "step": 1541 |
| }, |
| { |
| "epoch": 0.3391619927416694, |
| "grad_norm": 0.2956371605396271, |
| "learning_rate": 8e-05, |
| "loss": 1.7328, |
| "step": 1542 |
| }, |
| { |
| "epoch": 0.33938194215330475, |
| "grad_norm": 0.2845339775085449, |
| "learning_rate": 8e-05, |
| "loss": 1.6477, |
| "step": 1543 |
| }, |
| { |
| "epoch": 0.3396018915649401, |
| "grad_norm": 0.29501214623451233, |
| "learning_rate": 8e-05, |
| "loss": 1.7951, |
| "step": 1544 |
| }, |
| { |
| "epoch": 0.33982184097657536, |
| "grad_norm": 0.2859644591808319, |
| "learning_rate": 8e-05, |
| "loss": 1.6607, |
| "step": 1545 |
| }, |
| { |
| "epoch": 0.3400417903882107, |
| "grad_norm": 0.2733168303966522, |
| "learning_rate": 8e-05, |
| "loss": 1.6397, |
| "step": 1546 |
| }, |
| { |
| "epoch": 0.34026173979984603, |
| "grad_norm": 0.2580598294734955, |
| "learning_rate": 8e-05, |
| "loss": 1.5692, |
| "step": 1547 |
| }, |
| { |
| "epoch": 0.34048168921148136, |
| "grad_norm": 0.3042803406715393, |
| "learning_rate": 8e-05, |
| "loss": 1.7063, |
| "step": 1548 |
| }, |
| { |
| "epoch": 0.3407016386231167, |
| "grad_norm": 0.2833859324455261, |
| "learning_rate": 8e-05, |
| "loss": 1.7531, |
| "step": 1549 |
| }, |
| { |
| "epoch": 0.34092158803475203, |
| "grad_norm": 0.259620726108551, |
| "learning_rate": 8e-05, |
| "loss": 1.6179, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.3411415374463873, |
| "grad_norm": 0.268355131149292, |
| "learning_rate": 8e-05, |
| "loss": 1.6009, |
| "step": 1551 |
| }, |
| { |
| "epoch": 0.34136148685802264, |
| "grad_norm": 0.2858780324459076, |
| "learning_rate": 8e-05, |
| "loss": 1.7033, |
| "step": 1552 |
| }, |
| { |
| "epoch": 0.341581436269658, |
| "grad_norm": 0.2777354121208191, |
| "learning_rate": 8e-05, |
| "loss": 1.7615, |
| "step": 1553 |
| }, |
| { |
| "epoch": 0.3418013856812933, |
| "grad_norm": 0.27899524569511414, |
| "learning_rate": 8e-05, |
| "loss": 1.6684, |
| "step": 1554 |
| }, |
| { |
| "epoch": 0.34202133509292865, |
| "grad_norm": 0.3156200349330902, |
| "learning_rate": 8e-05, |
| "loss": 1.5658, |
| "step": 1555 |
| }, |
| { |
| "epoch": 0.3422412845045639, |
| "grad_norm": 0.27549582719802856, |
| "learning_rate": 8e-05, |
| "loss": 1.692, |
| "step": 1556 |
| }, |
| { |
| "epoch": 0.34246123391619926, |
| "grad_norm": 0.27770310640335083, |
| "learning_rate": 8e-05, |
| "loss": 1.5891, |
| "step": 1557 |
| }, |
| { |
| "epoch": 0.3426811833278346, |
| "grad_norm": 0.28138646483421326, |
| "learning_rate": 8e-05, |
| "loss": 1.5949, |
| "step": 1558 |
| }, |
| { |
| "epoch": 0.3429011327394699, |
| "grad_norm": 0.2790684998035431, |
| "learning_rate": 8e-05, |
| "loss": 1.6371, |
| "step": 1559 |
| }, |
| { |
| "epoch": 0.34312108215110526, |
| "grad_norm": 0.303230345249176, |
| "learning_rate": 8e-05, |
| "loss": 1.7416, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.3433410315627406, |
| "grad_norm": 0.26891767978668213, |
| "learning_rate": 8e-05, |
| "loss": 1.8044, |
| "step": 1561 |
| }, |
| { |
| "epoch": 0.3435609809743759, |
| "grad_norm": 0.2734631896018982, |
| "learning_rate": 8e-05, |
| "loss": 1.7171, |
| "step": 1562 |
| }, |
| { |
| "epoch": 0.3437809303860112, |
| "grad_norm": 0.29556018114089966, |
| "learning_rate": 8e-05, |
| "loss": 1.9085, |
| "step": 1563 |
| }, |
| { |
| "epoch": 0.34400087979764654, |
| "grad_norm": 0.26478004455566406, |
| "learning_rate": 8e-05, |
| "loss": 1.6153, |
| "step": 1564 |
| }, |
| { |
| "epoch": 0.3442208292092819, |
| "grad_norm": 0.27655404806137085, |
| "learning_rate": 8e-05, |
| "loss": 1.7384, |
| "step": 1565 |
| }, |
| { |
| "epoch": 0.3444407786209172, |
| "grad_norm": 0.2902698218822479, |
| "learning_rate": 8e-05, |
| "loss": 1.6589, |
| "step": 1566 |
| }, |
| { |
| "epoch": 0.3446607280325525, |
| "grad_norm": 0.2857147455215454, |
| "learning_rate": 8e-05, |
| "loss": 1.6598, |
| "step": 1567 |
| }, |
| { |
| "epoch": 0.3448806774441878, |
| "grad_norm": 0.28339943289756775, |
| "learning_rate": 8e-05, |
| "loss": 1.7356, |
| "step": 1568 |
| }, |
| { |
| "epoch": 0.34510062685582316, |
| "grad_norm": 0.29340776801109314, |
| "learning_rate": 8e-05, |
| "loss": 1.8316, |
| "step": 1569 |
| }, |
| { |
| "epoch": 0.3453205762674585, |
| "grad_norm": 0.26669397950172424, |
| "learning_rate": 8e-05, |
| "loss": 1.6803, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.3455405256790938, |
| "grad_norm": 0.28508248925209045, |
| "learning_rate": 8e-05, |
| "loss": 1.7702, |
| "step": 1571 |
| }, |
| { |
| "epoch": 0.34576047509072916, |
| "grad_norm": 0.25610047578811646, |
| "learning_rate": 8e-05, |
| "loss": 1.6343, |
| "step": 1572 |
| }, |
| { |
| "epoch": 0.34598042450236444, |
| "grad_norm": 0.2758273482322693, |
| "learning_rate": 8e-05, |
| "loss": 1.7875, |
| "step": 1573 |
| }, |
| { |
| "epoch": 0.34620037391399977, |
| "grad_norm": 0.2674688398838043, |
| "learning_rate": 8e-05, |
| "loss": 1.6804, |
| "step": 1574 |
| }, |
| { |
| "epoch": 0.3464203233256351, |
| "grad_norm": 0.2796163558959961, |
| "learning_rate": 8e-05, |
| "loss": 1.6135, |
| "step": 1575 |
| }, |
| { |
| "epoch": 0.34664027273727044, |
| "grad_norm": 0.26260775327682495, |
| "learning_rate": 8e-05, |
| "loss": 1.6752, |
| "step": 1576 |
| }, |
| { |
| "epoch": 0.3468602221489058, |
| "grad_norm": 0.2897137403488159, |
| "learning_rate": 8e-05, |
| "loss": 1.6743, |
| "step": 1577 |
| }, |
| { |
| "epoch": 0.34708017156054105, |
| "grad_norm": 0.27681732177734375, |
| "learning_rate": 8e-05, |
| "loss": 1.6436, |
| "step": 1578 |
| }, |
| { |
| "epoch": 0.3473001209721764, |
| "grad_norm": 0.2694265842437744, |
| "learning_rate": 8e-05, |
| "loss": 1.6343, |
| "step": 1579 |
| }, |
| { |
| "epoch": 0.3475200703838117, |
| "grad_norm": 0.28179508447647095, |
| "learning_rate": 8e-05, |
| "loss": 1.676, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.34774001979544705, |
| "grad_norm": 0.29600057005882263, |
| "learning_rate": 8e-05, |
| "loss": 1.786, |
| "step": 1581 |
| }, |
| { |
| "epoch": 0.3479599692070824, |
| "grad_norm": 0.28932616114616394, |
| "learning_rate": 8e-05, |
| "loss": 1.7151, |
| "step": 1582 |
| }, |
| { |
| "epoch": 0.3481799186187177, |
| "grad_norm": 0.2912417948246002, |
| "learning_rate": 8e-05, |
| "loss": 1.7788, |
| "step": 1583 |
| }, |
| { |
| "epoch": 0.348399868030353, |
| "grad_norm": 0.2844431698322296, |
| "learning_rate": 8e-05, |
| "loss": 1.5585, |
| "step": 1584 |
| }, |
| { |
| "epoch": 0.34861981744198833, |
| "grad_norm": 0.2916630804538727, |
| "learning_rate": 8e-05, |
| "loss": 1.7484, |
| "step": 1585 |
| }, |
| { |
| "epoch": 0.34883976685362367, |
| "grad_norm": 0.2785089612007141, |
| "learning_rate": 8e-05, |
| "loss": 1.61, |
| "step": 1586 |
| }, |
| { |
| "epoch": 0.349059716265259, |
| "grad_norm": 0.2777422368526459, |
| "learning_rate": 8e-05, |
| "loss": 1.7183, |
| "step": 1587 |
| }, |
| { |
| "epoch": 0.34927966567689434, |
| "grad_norm": 0.28772565722465515, |
| "learning_rate": 8e-05, |
| "loss": 1.7161, |
| "step": 1588 |
| }, |
| { |
| "epoch": 0.3494996150885296, |
| "grad_norm": 0.28452831506729126, |
| "learning_rate": 8e-05, |
| "loss": 1.8004, |
| "step": 1589 |
| }, |
| { |
| "epoch": 0.34971956450016495, |
| "grad_norm": 0.2837449014186859, |
| "learning_rate": 8e-05, |
| "loss": 1.7992, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.3499395139118003, |
| "grad_norm": 0.2874920666217804, |
| "learning_rate": 8e-05, |
| "loss": 1.6408, |
| "step": 1591 |
| }, |
| { |
| "epoch": 0.3501594633234356, |
| "grad_norm": 0.26615065336227417, |
| "learning_rate": 8e-05, |
| "loss": 1.658, |
| "step": 1592 |
| }, |
| { |
| "epoch": 0.35037941273507095, |
| "grad_norm": 0.27493569254875183, |
| "learning_rate": 8e-05, |
| "loss": 1.6843, |
| "step": 1593 |
| }, |
| { |
| "epoch": 0.3505993621467063, |
| "grad_norm": 0.291886568069458, |
| "learning_rate": 8e-05, |
| "loss": 1.7683, |
| "step": 1594 |
| }, |
| { |
| "epoch": 0.35081931155834156, |
| "grad_norm": 0.2868814468383789, |
| "learning_rate": 8e-05, |
| "loss": 1.7825, |
| "step": 1595 |
| }, |
| { |
| "epoch": 0.3510392609699769, |
| "grad_norm": 0.30988067388534546, |
| "learning_rate": 8e-05, |
| "loss": 1.7511, |
| "step": 1596 |
| }, |
| { |
| "epoch": 0.35125921038161223, |
| "grad_norm": 0.2746553122997284, |
| "learning_rate": 8e-05, |
| "loss": 1.6298, |
| "step": 1597 |
| }, |
| { |
| "epoch": 0.35147915979324756, |
| "grad_norm": 0.3013536036014557, |
| "learning_rate": 8e-05, |
| "loss": 1.7883, |
| "step": 1598 |
| }, |
| { |
| "epoch": 0.3516991092048829, |
| "grad_norm": 0.2906748056411743, |
| "learning_rate": 8e-05, |
| "loss": 1.5819, |
| "step": 1599 |
| }, |
| { |
| "epoch": 0.3519190586165182, |
| "grad_norm": 0.28082364797592163, |
| "learning_rate": 8e-05, |
| "loss": 1.7525, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.3521390080281535, |
| "grad_norm": 0.28713324666023254, |
| "learning_rate": 8e-05, |
| "loss": 1.79, |
| "step": 1601 |
| }, |
| { |
| "epoch": 0.35235895743978884, |
| "grad_norm": 0.2819896638393402, |
| "learning_rate": 8e-05, |
| "loss": 1.6514, |
| "step": 1602 |
| }, |
| { |
| "epoch": 0.3525789068514242, |
| "grad_norm": 0.27669310569763184, |
| "learning_rate": 8e-05, |
| "loss": 1.5888, |
| "step": 1603 |
| }, |
| { |
| "epoch": 0.3527988562630595, |
| "grad_norm": 0.2873641848564148, |
| "learning_rate": 8e-05, |
| "loss": 1.8206, |
| "step": 1604 |
| }, |
| { |
| "epoch": 0.35301880567469485, |
| "grad_norm": 0.28426647186279297, |
| "learning_rate": 8e-05, |
| "loss": 1.7736, |
| "step": 1605 |
| }, |
| { |
| "epoch": 0.3532387550863301, |
| "grad_norm": 0.2733590602874756, |
| "learning_rate": 8e-05, |
| "loss": 1.6653, |
| "step": 1606 |
| }, |
| { |
| "epoch": 0.35345870449796546, |
| "grad_norm": 0.26751479506492615, |
| "learning_rate": 8e-05, |
| "loss": 1.5841, |
| "step": 1607 |
| }, |
| { |
| "epoch": 0.3536786539096008, |
| "grad_norm": 0.2767663598060608, |
| "learning_rate": 8e-05, |
| "loss": 1.6859, |
| "step": 1608 |
| }, |
| { |
| "epoch": 0.3538986033212361, |
| "grad_norm": 0.28359255194664, |
| "learning_rate": 8e-05, |
| "loss": 1.8799, |
| "step": 1609 |
| }, |
| { |
| "epoch": 0.35411855273287146, |
| "grad_norm": 0.27551594376564026, |
| "learning_rate": 8e-05, |
| "loss": 1.6429, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.35433850214450674, |
| "grad_norm": 0.26260972023010254, |
| "learning_rate": 8e-05, |
| "loss": 1.6068, |
| "step": 1611 |
| }, |
| { |
| "epoch": 0.3545584515561421, |
| "grad_norm": 0.2778937518596649, |
| "learning_rate": 8e-05, |
| "loss": 1.8057, |
| "step": 1612 |
| }, |
| { |
| "epoch": 0.3547784009677774, |
| "grad_norm": 0.27607765793800354, |
| "learning_rate": 8e-05, |
| "loss": 1.7439, |
| "step": 1613 |
| }, |
| { |
| "epoch": 0.35499835037941274, |
| "grad_norm": 0.2628287076950073, |
| "learning_rate": 8e-05, |
| "loss": 1.6916, |
| "step": 1614 |
| }, |
| { |
| "epoch": 0.3552182997910481, |
| "grad_norm": 0.2767592966556549, |
| "learning_rate": 8e-05, |
| "loss": 1.7185, |
| "step": 1615 |
| }, |
| { |
| "epoch": 0.3554382492026834, |
| "grad_norm": 0.2666943669319153, |
| "learning_rate": 8e-05, |
| "loss": 1.7351, |
| "step": 1616 |
| }, |
| { |
| "epoch": 0.3556581986143187, |
| "grad_norm": 0.28780093789100647, |
| "learning_rate": 8e-05, |
| "loss": 1.757, |
| "step": 1617 |
| }, |
| { |
| "epoch": 0.355878148025954, |
| "grad_norm": 0.30761584639549255, |
| "learning_rate": 8e-05, |
| "loss": 1.8096, |
| "step": 1618 |
| }, |
| { |
| "epoch": 0.35609809743758936, |
| "grad_norm": 0.2926090359687805, |
| "learning_rate": 8e-05, |
| "loss": 1.8609, |
| "step": 1619 |
| }, |
| { |
| "epoch": 0.3563180468492247, |
| "grad_norm": 0.27546852827072144, |
| "learning_rate": 8e-05, |
| "loss": 1.6422, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.35653799626086, |
| "grad_norm": 0.28559309244155884, |
| "learning_rate": 8e-05, |
| "loss": 1.8225, |
| "step": 1621 |
| }, |
| { |
| "epoch": 0.3567579456724953, |
| "grad_norm": 0.2804494798183441, |
| "learning_rate": 8e-05, |
| "loss": 1.9108, |
| "step": 1622 |
| }, |
| { |
| "epoch": 0.35697789508413064, |
| "grad_norm": 0.2643645703792572, |
| "learning_rate": 8e-05, |
| "loss": 1.5462, |
| "step": 1623 |
| }, |
| { |
| "epoch": 0.35719784449576597, |
| "grad_norm": 0.2888531982898712, |
| "learning_rate": 8e-05, |
| "loss": 1.701, |
| "step": 1624 |
| }, |
| { |
| "epoch": 0.3574177939074013, |
| "grad_norm": 0.28601035475730896, |
| "learning_rate": 8e-05, |
| "loss": 1.628, |
| "step": 1625 |
| }, |
| { |
| "epoch": 0.35763774331903664, |
| "grad_norm": 0.2877524197101593, |
| "learning_rate": 8e-05, |
| "loss": 1.8403, |
| "step": 1626 |
| }, |
| { |
| "epoch": 0.357857692730672, |
| "grad_norm": 0.2658945918083191, |
| "learning_rate": 8e-05, |
| "loss": 1.4552, |
| "step": 1627 |
| }, |
| { |
| "epoch": 0.35807764214230725, |
| "grad_norm": 0.2911885976791382, |
| "learning_rate": 8e-05, |
| "loss": 1.7753, |
| "step": 1628 |
| }, |
| { |
| "epoch": 0.3582975915539426, |
| "grad_norm": 0.29072439670562744, |
| "learning_rate": 8e-05, |
| "loss": 1.7229, |
| "step": 1629 |
| }, |
| { |
| "epoch": 0.3585175409655779, |
| "grad_norm": 0.29961150884628296, |
| "learning_rate": 8e-05, |
| "loss": 1.7694, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.35873749037721325, |
| "grad_norm": 0.2760653793811798, |
| "learning_rate": 8e-05, |
| "loss": 1.719, |
| "step": 1631 |
| }, |
| { |
| "epoch": 0.3589574397888486, |
| "grad_norm": 0.2739832103252411, |
| "learning_rate": 8e-05, |
| "loss": 1.7367, |
| "step": 1632 |
| }, |
| { |
| "epoch": 0.35917738920048387, |
| "grad_norm": 0.2669771611690521, |
| "learning_rate": 8e-05, |
| "loss": 1.5306, |
| "step": 1633 |
| }, |
| { |
| "epoch": 0.3593973386121192, |
| "grad_norm": 0.2744583189487457, |
| "learning_rate": 8e-05, |
| "loss": 1.5713, |
| "step": 1634 |
| }, |
| { |
| "epoch": 0.35961728802375453, |
| "grad_norm": 0.2943086326122284, |
| "learning_rate": 8e-05, |
| "loss": 1.6569, |
| "step": 1635 |
| }, |
| { |
| "epoch": 0.35983723743538987, |
| "grad_norm": 0.2873243987560272, |
| "learning_rate": 8e-05, |
| "loss": 1.6864, |
| "step": 1636 |
| }, |
| { |
| "epoch": 0.3600571868470252, |
| "grad_norm": 0.27217867970466614, |
| "learning_rate": 8e-05, |
| "loss": 1.7519, |
| "step": 1637 |
| }, |
| { |
| "epoch": 0.36027713625866054, |
| "grad_norm": 0.28656938672065735, |
| "learning_rate": 8e-05, |
| "loss": 1.7892, |
| "step": 1638 |
| }, |
| { |
| "epoch": 0.3604970856702958, |
| "grad_norm": 0.2876884937286377, |
| "learning_rate": 8e-05, |
| "loss": 1.6709, |
| "step": 1639 |
| }, |
| { |
| "epoch": 0.36071703508193115, |
| "grad_norm": 0.2873481512069702, |
| "learning_rate": 8e-05, |
| "loss": 1.8336, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.3609369844935665, |
| "grad_norm": 0.28285419940948486, |
| "learning_rate": 8e-05, |
| "loss": 1.5887, |
| "step": 1641 |
| }, |
| { |
| "epoch": 0.3611569339052018, |
| "grad_norm": 0.2624582052230835, |
| "learning_rate": 8e-05, |
| "loss": 1.6248, |
| "step": 1642 |
| }, |
| { |
| "epoch": 0.36137688331683715, |
| "grad_norm": 0.2794424891471863, |
| "learning_rate": 8e-05, |
| "loss": 1.7191, |
| "step": 1643 |
| }, |
| { |
| "epoch": 0.36159683272847243, |
| "grad_norm": 0.2890479862689972, |
| "learning_rate": 8e-05, |
| "loss": 1.905, |
| "step": 1644 |
| }, |
| { |
| "epoch": 0.36181678214010776, |
| "grad_norm": 0.28444570302963257, |
| "learning_rate": 8e-05, |
| "loss": 1.6948, |
| "step": 1645 |
| }, |
| { |
| "epoch": 0.3620367315517431, |
| "grad_norm": 0.27037203311920166, |
| "learning_rate": 8e-05, |
| "loss": 1.6245, |
| "step": 1646 |
| }, |
| { |
| "epoch": 0.36225668096337843, |
| "grad_norm": 0.2864437699317932, |
| "learning_rate": 8e-05, |
| "loss": 1.688, |
| "step": 1647 |
| }, |
| { |
| "epoch": 0.36247663037501376, |
| "grad_norm": 0.27912065386772156, |
| "learning_rate": 8e-05, |
| "loss": 1.6056, |
| "step": 1648 |
| }, |
| { |
| "epoch": 0.3626965797866491, |
| "grad_norm": 0.26467230916023254, |
| "learning_rate": 8e-05, |
| "loss": 1.5786, |
| "step": 1649 |
| }, |
| { |
| "epoch": 0.3629165291982844, |
| "grad_norm": 0.2793690264225006, |
| "learning_rate": 8e-05, |
| "loss": 1.6003, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.3631364786099197, |
| "grad_norm": 0.288629949092865, |
| "learning_rate": 8e-05, |
| "loss": 1.6752, |
| "step": 1651 |
| }, |
| { |
| "epoch": 0.36335642802155504, |
| "grad_norm": 0.283195823431015, |
| "learning_rate": 8e-05, |
| "loss": 1.6854, |
| "step": 1652 |
| }, |
| { |
| "epoch": 0.3635763774331904, |
| "grad_norm": 0.2929665446281433, |
| "learning_rate": 8e-05, |
| "loss": 1.7191, |
| "step": 1653 |
| }, |
| { |
| "epoch": 0.3637963268448257, |
| "grad_norm": 0.28676289319992065, |
| "learning_rate": 8e-05, |
| "loss": 1.6959, |
| "step": 1654 |
| }, |
| { |
| "epoch": 0.364016276256461, |
| "grad_norm": 0.264635294675827, |
| "learning_rate": 8e-05, |
| "loss": 1.6232, |
| "step": 1655 |
| }, |
| { |
| "epoch": 0.3642362256680963, |
| "grad_norm": 0.2763380706310272, |
| "learning_rate": 8e-05, |
| "loss": 1.7631, |
| "step": 1656 |
| }, |
| { |
| "epoch": 0.36445617507973166, |
| "grad_norm": 0.2624233365058899, |
| "learning_rate": 8e-05, |
| "loss": 1.6635, |
| "step": 1657 |
| }, |
| { |
| "epoch": 0.364676124491367, |
| "grad_norm": 0.2564058303833008, |
| "learning_rate": 8e-05, |
| "loss": 1.4745, |
| "step": 1658 |
| }, |
| { |
| "epoch": 0.3648960739030023, |
| "grad_norm": 0.2966236174106598, |
| "learning_rate": 8e-05, |
| "loss": 1.6892, |
| "step": 1659 |
| }, |
| { |
| "epoch": 0.36511602331463766, |
| "grad_norm": 0.30588555335998535, |
| "learning_rate": 8e-05, |
| "loss": 1.6884, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.36533597272627294, |
| "grad_norm": 0.2692076861858368, |
| "learning_rate": 8e-05, |
| "loss": 1.7158, |
| "step": 1661 |
| }, |
| { |
| "epoch": 0.3655559221379083, |
| "grad_norm": 0.29388558864593506, |
| "learning_rate": 8e-05, |
| "loss": 1.7133, |
| "step": 1662 |
| }, |
| { |
| "epoch": 0.3657758715495436, |
| "grad_norm": 0.28685635328292847, |
| "learning_rate": 8e-05, |
| "loss": 1.7444, |
| "step": 1663 |
| }, |
| { |
| "epoch": 0.36599582096117894, |
| "grad_norm": 0.2885795831680298, |
| "learning_rate": 8e-05, |
| "loss": 1.7537, |
| "step": 1664 |
| }, |
| { |
| "epoch": 0.3662157703728143, |
| "grad_norm": 0.3066631853580475, |
| "learning_rate": 8e-05, |
| "loss": 1.7843, |
| "step": 1665 |
| }, |
| { |
| "epoch": 0.36643571978444955, |
| "grad_norm": 0.31112298369407654, |
| "learning_rate": 8e-05, |
| "loss": 1.4934, |
| "step": 1666 |
| }, |
| { |
| "epoch": 0.3666556691960849, |
| "grad_norm": 0.2751656472682953, |
| "learning_rate": 8e-05, |
| "loss": 1.7463, |
| "step": 1667 |
| }, |
| { |
| "epoch": 0.3668756186077202, |
| "grad_norm": 0.2834889590740204, |
| "learning_rate": 8e-05, |
| "loss": 1.7757, |
| "step": 1668 |
| }, |
| { |
| "epoch": 0.36709556801935556, |
| "grad_norm": 0.2778145968914032, |
| "learning_rate": 8e-05, |
| "loss": 1.7423, |
| "step": 1669 |
| }, |
| { |
| "epoch": 0.3673155174309909, |
| "grad_norm": 0.32161521911621094, |
| "learning_rate": 8e-05, |
| "loss": 1.8312, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.36753546684262617, |
| "grad_norm": 0.27995115518569946, |
| "learning_rate": 8e-05, |
| "loss": 1.7694, |
| "step": 1671 |
| }, |
| { |
| "epoch": 0.3677554162542615, |
| "grad_norm": 0.27701541781425476, |
| "learning_rate": 8e-05, |
| "loss": 1.8054, |
| "step": 1672 |
| }, |
| { |
| "epoch": 0.36797536566589684, |
| "grad_norm": 0.2757355570793152, |
| "learning_rate": 8e-05, |
| "loss": 1.7036, |
| "step": 1673 |
| }, |
| { |
| "epoch": 0.36819531507753217, |
| "grad_norm": 0.27305907011032104, |
| "learning_rate": 8e-05, |
| "loss": 1.6627, |
| "step": 1674 |
| }, |
| { |
| "epoch": 0.3684152644891675, |
| "grad_norm": 0.299679696559906, |
| "learning_rate": 8e-05, |
| "loss": 1.7552, |
| "step": 1675 |
| }, |
| { |
| "epoch": 0.36863521390080284, |
| "grad_norm": 0.2728777825832367, |
| "learning_rate": 8e-05, |
| "loss": 1.7649, |
| "step": 1676 |
| }, |
| { |
| "epoch": 0.3688551633124381, |
| "grad_norm": 0.26330089569091797, |
| "learning_rate": 8e-05, |
| "loss": 1.5887, |
| "step": 1677 |
| }, |
| { |
| "epoch": 0.36907511272407345, |
| "grad_norm": 0.2850317060947418, |
| "learning_rate": 8e-05, |
| "loss": 1.6255, |
| "step": 1678 |
| }, |
| { |
| "epoch": 0.3692950621357088, |
| "grad_norm": 0.2784862220287323, |
| "learning_rate": 8e-05, |
| "loss": 1.7123, |
| "step": 1679 |
| }, |
| { |
| "epoch": 0.3695150115473441, |
| "grad_norm": 0.284298300743103, |
| "learning_rate": 8e-05, |
| "loss": 1.5809, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.36973496095897945, |
| "grad_norm": 0.2725334167480469, |
| "learning_rate": 8e-05, |
| "loss": 1.7037, |
| "step": 1681 |
| }, |
| { |
| "epoch": 0.36995491037061473, |
| "grad_norm": 0.2760758399963379, |
| "learning_rate": 8e-05, |
| "loss": 1.6827, |
| "step": 1682 |
| }, |
| { |
| "epoch": 0.37017485978225007, |
| "grad_norm": 0.2661541700363159, |
| "learning_rate": 8e-05, |
| "loss": 1.7042, |
| "step": 1683 |
| }, |
| { |
| "epoch": 0.3703948091938854, |
| "grad_norm": 0.27737516164779663, |
| "learning_rate": 8e-05, |
| "loss": 1.7689, |
| "step": 1684 |
| }, |
| { |
| "epoch": 0.37061475860552073, |
| "grad_norm": 0.2607424259185791, |
| "learning_rate": 8e-05, |
| "loss": 1.6356, |
| "step": 1685 |
| }, |
| { |
| "epoch": 0.37083470801715607, |
| "grad_norm": 0.2802969217300415, |
| "learning_rate": 8e-05, |
| "loss": 1.7004, |
| "step": 1686 |
| }, |
| { |
| "epoch": 0.3710546574287914, |
| "grad_norm": 0.2660817801952362, |
| "learning_rate": 8e-05, |
| "loss": 1.5539, |
| "step": 1687 |
| }, |
| { |
| "epoch": 0.3712746068404267, |
| "grad_norm": 0.27867192029953003, |
| "learning_rate": 8e-05, |
| "loss": 1.6531, |
| "step": 1688 |
| }, |
| { |
| "epoch": 0.371494556252062, |
| "grad_norm": 0.27857083082199097, |
| "learning_rate": 8e-05, |
| "loss": 1.8023, |
| "step": 1689 |
| }, |
| { |
| "epoch": 0.37171450566369735, |
| "grad_norm": 0.2689161002635956, |
| "learning_rate": 8e-05, |
| "loss": 1.7601, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.3719344550753327, |
| "grad_norm": 0.297826886177063, |
| "learning_rate": 8e-05, |
| "loss": 1.7627, |
| "step": 1691 |
| }, |
| { |
| "epoch": 0.372154404486968, |
| "grad_norm": 0.2592705190181732, |
| "learning_rate": 8e-05, |
| "loss": 1.7132, |
| "step": 1692 |
| }, |
| { |
| "epoch": 0.3723743538986033, |
| "grad_norm": 0.28288522362709045, |
| "learning_rate": 8e-05, |
| "loss": 1.7604, |
| "step": 1693 |
| }, |
| { |
| "epoch": 0.37259430331023863, |
| "grad_norm": 0.30823859572410583, |
| "learning_rate": 8e-05, |
| "loss": 1.8563, |
| "step": 1694 |
| }, |
| { |
| "epoch": 0.37281425272187396, |
| "grad_norm": 0.27835527062416077, |
| "learning_rate": 8e-05, |
| "loss": 1.6816, |
| "step": 1695 |
| }, |
| { |
| "epoch": 0.3730342021335093, |
| "grad_norm": 0.2626672089099884, |
| "learning_rate": 8e-05, |
| "loss": 1.6185, |
| "step": 1696 |
| }, |
| { |
| "epoch": 0.37325415154514463, |
| "grad_norm": 0.2489227056503296, |
| "learning_rate": 8e-05, |
| "loss": 1.6119, |
| "step": 1697 |
| }, |
| { |
| "epoch": 0.37347410095677996, |
| "grad_norm": 0.28637897968292236, |
| "learning_rate": 8e-05, |
| "loss": 1.6695, |
| "step": 1698 |
| }, |
| { |
| "epoch": 0.37369405036841524, |
| "grad_norm": 0.27077022194862366, |
| "learning_rate": 8e-05, |
| "loss": 1.6095, |
| "step": 1699 |
| }, |
| { |
| "epoch": 0.3739139997800506, |
| "grad_norm": 0.32049357891082764, |
| "learning_rate": 8e-05, |
| "loss": 1.875, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.3741339491916859, |
| "grad_norm": 0.2890382707118988, |
| "learning_rate": 8e-05, |
| "loss": 1.7129, |
| "step": 1701 |
| }, |
| { |
| "epoch": 0.37435389860332124, |
| "grad_norm": 0.2785224914550781, |
| "learning_rate": 8e-05, |
| "loss": 1.7162, |
| "step": 1702 |
| }, |
| { |
| "epoch": 0.3745738480149566, |
| "grad_norm": 0.2685299217700958, |
| "learning_rate": 8e-05, |
| "loss": 1.7358, |
| "step": 1703 |
| }, |
| { |
| "epoch": 0.37479379742659186, |
| "grad_norm": 0.2840120494365692, |
| "learning_rate": 8e-05, |
| "loss": 1.9123, |
| "step": 1704 |
| }, |
| { |
| "epoch": 0.3750137468382272, |
| "grad_norm": 0.27426856756210327, |
| "learning_rate": 8e-05, |
| "loss": 1.7144, |
| "step": 1705 |
| }, |
| { |
| "epoch": 0.3752336962498625, |
| "grad_norm": 0.2707318663597107, |
| "learning_rate": 8e-05, |
| "loss": 1.6961, |
| "step": 1706 |
| }, |
| { |
| "epoch": 0.37545364566149786, |
| "grad_norm": 0.3059745728969574, |
| "learning_rate": 8e-05, |
| "loss": 1.7491, |
| "step": 1707 |
| }, |
| { |
| "epoch": 0.3756735950731332, |
| "grad_norm": 0.27109962701797485, |
| "learning_rate": 8e-05, |
| "loss": 1.6515, |
| "step": 1708 |
| }, |
| { |
| "epoch": 0.3758935444847685, |
| "grad_norm": 0.26874709129333496, |
| "learning_rate": 8e-05, |
| "loss": 1.7119, |
| "step": 1709 |
| }, |
| { |
| "epoch": 0.3761134938964038, |
| "grad_norm": 0.27959340810775757, |
| "learning_rate": 8e-05, |
| "loss": 1.5449, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.37633344330803914, |
| "grad_norm": 0.284386545419693, |
| "learning_rate": 8e-05, |
| "loss": 1.8336, |
| "step": 1711 |
| }, |
| { |
| "epoch": 0.3765533927196745, |
| "grad_norm": 0.27861231565475464, |
| "learning_rate": 8e-05, |
| "loss": 1.7547, |
| "step": 1712 |
| }, |
| { |
| "epoch": 0.3767733421313098, |
| "grad_norm": 0.26845625042915344, |
| "learning_rate": 8e-05, |
| "loss": 1.6838, |
| "step": 1713 |
| }, |
| { |
| "epoch": 0.37699329154294514, |
| "grad_norm": 0.31240981817245483, |
| "learning_rate": 8e-05, |
| "loss": 1.7489, |
| "step": 1714 |
| }, |
| { |
| "epoch": 0.3772132409545804, |
| "grad_norm": 0.2878013253211975, |
| "learning_rate": 8e-05, |
| "loss": 1.7533, |
| "step": 1715 |
| }, |
| { |
| "epoch": 0.37743319036621575, |
| "grad_norm": 0.27676892280578613, |
| "learning_rate": 8e-05, |
| "loss": 1.6218, |
| "step": 1716 |
| }, |
| { |
| "epoch": 0.3776531397778511, |
| "grad_norm": 0.2782065272331238, |
| "learning_rate": 8e-05, |
| "loss": 1.6311, |
| "step": 1717 |
| }, |
| { |
| "epoch": 0.3778730891894864, |
| "grad_norm": 0.2829797863960266, |
| "learning_rate": 8e-05, |
| "loss": 1.5863, |
| "step": 1718 |
| }, |
| { |
| "epoch": 0.37809303860112176, |
| "grad_norm": 0.2851261794567108, |
| "learning_rate": 8e-05, |
| "loss": 1.8365, |
| "step": 1719 |
| }, |
| { |
| "epoch": 0.3783129880127571, |
| "grad_norm": 0.2844488322734833, |
| "learning_rate": 8e-05, |
| "loss": 1.7765, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.37853293742439237, |
| "grad_norm": 0.2976120412349701, |
| "learning_rate": 8e-05, |
| "loss": 1.7334, |
| "step": 1721 |
| }, |
| { |
| "epoch": 0.3787528868360277, |
| "grad_norm": 0.27947840094566345, |
| "learning_rate": 8e-05, |
| "loss": 1.6641, |
| "step": 1722 |
| }, |
| { |
| "epoch": 0.37897283624766304, |
| "grad_norm": 0.2986278831958771, |
| "learning_rate": 8e-05, |
| "loss": 1.8201, |
| "step": 1723 |
| }, |
| { |
| "epoch": 0.37919278565929837, |
| "grad_norm": 0.26200374960899353, |
| "learning_rate": 8e-05, |
| "loss": 1.5835, |
| "step": 1724 |
| }, |
| { |
| "epoch": 0.3794127350709337, |
| "grad_norm": 0.2846388816833496, |
| "learning_rate": 8e-05, |
| "loss": 1.7863, |
| "step": 1725 |
| }, |
| { |
| "epoch": 0.379632684482569, |
| "grad_norm": 0.2809320390224457, |
| "learning_rate": 8e-05, |
| "loss": 1.6667, |
| "step": 1726 |
| }, |
| { |
| "epoch": 0.3798526338942043, |
| "grad_norm": 0.28523099422454834, |
| "learning_rate": 8e-05, |
| "loss": 1.6647, |
| "step": 1727 |
| }, |
| { |
| "epoch": 0.38007258330583965, |
| "grad_norm": 0.2719436287879944, |
| "learning_rate": 8e-05, |
| "loss": 1.6, |
| "step": 1728 |
| }, |
| { |
| "epoch": 0.380292532717475, |
| "grad_norm": 0.2762429118156433, |
| "learning_rate": 8e-05, |
| "loss": 1.6888, |
| "step": 1729 |
| }, |
| { |
| "epoch": 0.3805124821291103, |
| "grad_norm": 0.30161863565444946, |
| "learning_rate": 8e-05, |
| "loss": 1.6659, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.38073243154074565, |
| "grad_norm": 0.27962687611579895, |
| "learning_rate": 8e-05, |
| "loss": 1.629, |
| "step": 1731 |
| }, |
| { |
| "epoch": 0.38095238095238093, |
| "grad_norm": 0.27580323815345764, |
| "learning_rate": 8e-05, |
| "loss": 1.689, |
| "step": 1732 |
| }, |
| { |
| "epoch": 0.38117233036401627, |
| "grad_norm": 0.2676113545894623, |
| "learning_rate": 8e-05, |
| "loss": 1.7195, |
| "step": 1733 |
| }, |
| { |
| "epoch": 0.3813922797756516, |
| "grad_norm": 0.27840152382850647, |
| "learning_rate": 8e-05, |
| "loss": 1.6433, |
| "step": 1734 |
| }, |
| { |
| "epoch": 0.38161222918728693, |
| "grad_norm": 0.27100005745887756, |
| "learning_rate": 8e-05, |
| "loss": 1.6517, |
| "step": 1735 |
| }, |
| { |
| "epoch": 0.38183217859892227, |
| "grad_norm": 0.2874828577041626, |
| "learning_rate": 8e-05, |
| "loss": 1.9139, |
| "step": 1736 |
| }, |
| { |
| "epoch": 0.38205212801055755, |
| "grad_norm": 0.2685931324958801, |
| "learning_rate": 8e-05, |
| "loss": 1.7373, |
| "step": 1737 |
| }, |
| { |
| "epoch": 0.3822720774221929, |
| "grad_norm": 0.2895548641681671, |
| "learning_rate": 8e-05, |
| "loss": 1.7828, |
| "step": 1738 |
| }, |
| { |
| "epoch": 0.3824920268338282, |
| "grad_norm": 0.29109206795692444, |
| "learning_rate": 8e-05, |
| "loss": 1.6347, |
| "step": 1739 |
| }, |
| { |
| "epoch": 0.38271197624546355, |
| "grad_norm": 0.2804923951625824, |
| "learning_rate": 8e-05, |
| "loss": 1.5978, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.3829319256570989, |
| "grad_norm": 0.2829732894897461, |
| "learning_rate": 8e-05, |
| "loss": 1.6271, |
| "step": 1741 |
| }, |
| { |
| "epoch": 0.3831518750687342, |
| "grad_norm": 0.28979840874671936, |
| "learning_rate": 8e-05, |
| "loss": 1.7244, |
| "step": 1742 |
| }, |
| { |
| "epoch": 0.3833718244803695, |
| "grad_norm": 0.30159792304039, |
| "learning_rate": 8e-05, |
| "loss": 1.8074, |
| "step": 1743 |
| }, |
| { |
| "epoch": 0.38359177389200483, |
| "grad_norm": 0.28228580951690674, |
| "learning_rate": 8e-05, |
| "loss": 1.6669, |
| "step": 1744 |
| }, |
| { |
| "epoch": 0.38381172330364016, |
| "grad_norm": 0.27950945496559143, |
| "learning_rate": 8e-05, |
| "loss": 1.6583, |
| "step": 1745 |
| }, |
| { |
| "epoch": 0.3840316727152755, |
| "grad_norm": 0.2708896994590759, |
| "learning_rate": 8e-05, |
| "loss": 1.5793, |
| "step": 1746 |
| }, |
| { |
| "epoch": 0.38425162212691083, |
| "grad_norm": 0.27368029952049255, |
| "learning_rate": 8e-05, |
| "loss": 1.6371, |
| "step": 1747 |
| }, |
| { |
| "epoch": 0.3844715715385461, |
| "grad_norm": 0.27621379494667053, |
| "learning_rate": 8e-05, |
| "loss": 1.5737, |
| "step": 1748 |
| }, |
| { |
| "epoch": 0.38469152095018144, |
| "grad_norm": 0.27143922448158264, |
| "learning_rate": 8e-05, |
| "loss": 1.7289, |
| "step": 1749 |
| }, |
| { |
| "epoch": 0.3849114703618168, |
| "grad_norm": 0.28887274861335754, |
| "learning_rate": 8e-05, |
| "loss": 1.7262, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.3851314197734521, |
| "grad_norm": 0.26516541838645935, |
| "learning_rate": 8e-05, |
| "loss": 1.6358, |
| "step": 1751 |
| }, |
| { |
| "epoch": 0.38535136918508744, |
| "grad_norm": 0.31475701928138733, |
| "learning_rate": 8e-05, |
| "loss": 1.8599, |
| "step": 1752 |
| }, |
| { |
| "epoch": 0.3855713185967228, |
| "grad_norm": 0.27711552381515503, |
| "learning_rate": 8e-05, |
| "loss": 1.632, |
| "step": 1753 |
| }, |
| { |
| "epoch": 0.38579126800835806, |
| "grad_norm": 0.27542901039123535, |
| "learning_rate": 8e-05, |
| "loss": 1.6741, |
| "step": 1754 |
| }, |
| { |
| "epoch": 0.3860112174199934, |
| "grad_norm": 0.2941054701805115, |
| "learning_rate": 8e-05, |
| "loss": 1.6396, |
| "step": 1755 |
| }, |
| { |
| "epoch": 0.3862311668316287, |
| "grad_norm": 0.27836698293685913, |
| "learning_rate": 8e-05, |
| "loss": 1.5689, |
| "step": 1756 |
| }, |
| { |
| "epoch": 0.38645111624326406, |
| "grad_norm": 0.29147645831108093, |
| "learning_rate": 8e-05, |
| "loss": 1.7523, |
| "step": 1757 |
| }, |
| { |
| "epoch": 0.3866710656548994, |
| "grad_norm": 0.30084285140037537, |
| "learning_rate": 8e-05, |
| "loss": 1.6932, |
| "step": 1758 |
| }, |
| { |
| "epoch": 0.38689101506653467, |
| "grad_norm": 0.2850727140903473, |
| "learning_rate": 8e-05, |
| "loss": 1.7276, |
| "step": 1759 |
| }, |
| { |
| "epoch": 0.38711096447817, |
| "grad_norm": 0.27011391520500183, |
| "learning_rate": 8e-05, |
| "loss": 1.6635, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.38733091388980534, |
| "grad_norm": 0.28682348132133484, |
| "learning_rate": 8e-05, |
| "loss": 1.7642, |
| "step": 1761 |
| }, |
| { |
| "epoch": 0.3875508633014407, |
| "grad_norm": 0.27676117420196533, |
| "learning_rate": 8e-05, |
| "loss": 1.7406, |
| "step": 1762 |
| }, |
| { |
| "epoch": 0.387770812713076, |
| "grad_norm": 0.2654523551464081, |
| "learning_rate": 8e-05, |
| "loss": 1.6484, |
| "step": 1763 |
| }, |
| { |
| "epoch": 0.38799076212471134, |
| "grad_norm": 0.28026026487350464, |
| "learning_rate": 8e-05, |
| "loss": 1.6714, |
| "step": 1764 |
| }, |
| { |
| "epoch": 0.3882107115363466, |
| "grad_norm": 0.3003789782524109, |
| "learning_rate": 8e-05, |
| "loss": 1.8121, |
| "step": 1765 |
| }, |
| { |
| "epoch": 0.38843066094798195, |
| "grad_norm": 0.35523107647895813, |
| "learning_rate": 8e-05, |
| "loss": 1.9299, |
| "step": 1766 |
| }, |
| { |
| "epoch": 0.3886506103596173, |
| "grad_norm": 0.26844245195388794, |
| "learning_rate": 8e-05, |
| "loss": 1.6358, |
| "step": 1767 |
| }, |
| { |
| "epoch": 0.3888705597712526, |
| "grad_norm": 0.27308356761932373, |
| "learning_rate": 8e-05, |
| "loss": 1.6104, |
| "step": 1768 |
| }, |
| { |
| "epoch": 0.38909050918288796, |
| "grad_norm": 0.2775373160839081, |
| "learning_rate": 8e-05, |
| "loss": 1.5679, |
| "step": 1769 |
| }, |
| { |
| "epoch": 0.38931045859452323, |
| "grad_norm": 0.29753705859184265, |
| "learning_rate": 8e-05, |
| "loss": 1.7678, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.38953040800615857, |
| "grad_norm": 0.2798722982406616, |
| "learning_rate": 8e-05, |
| "loss": 1.7034, |
| "step": 1771 |
| }, |
| { |
| "epoch": 0.3897503574177939, |
| "grad_norm": 0.2842818796634674, |
| "learning_rate": 8e-05, |
| "loss": 1.727, |
| "step": 1772 |
| }, |
| { |
| "epoch": 0.38997030682942924, |
| "grad_norm": 0.27555832266807556, |
| "learning_rate": 8e-05, |
| "loss": 1.7438, |
| "step": 1773 |
| }, |
| { |
| "epoch": 0.39019025624106457, |
| "grad_norm": 0.2824547588825226, |
| "learning_rate": 8e-05, |
| "loss": 1.5733, |
| "step": 1774 |
| }, |
| { |
| "epoch": 0.3904102056526999, |
| "grad_norm": 0.2658035159111023, |
| "learning_rate": 8e-05, |
| "loss": 1.5997, |
| "step": 1775 |
| }, |
| { |
| "epoch": 0.3906301550643352, |
| "grad_norm": 0.27601394057273865, |
| "learning_rate": 8e-05, |
| "loss": 1.7025, |
| "step": 1776 |
| }, |
| { |
| "epoch": 0.3908501044759705, |
| "grad_norm": 0.2990022897720337, |
| "learning_rate": 8e-05, |
| "loss": 1.8278, |
| "step": 1777 |
| }, |
| { |
| "epoch": 0.39107005388760585, |
| "grad_norm": 0.29378873109817505, |
| "learning_rate": 8e-05, |
| "loss": 1.7549, |
| "step": 1778 |
| }, |
| { |
| "epoch": 0.3912900032992412, |
| "grad_norm": 0.29202136397361755, |
| "learning_rate": 8e-05, |
| "loss": 1.6602, |
| "step": 1779 |
| }, |
| { |
| "epoch": 0.3915099527108765, |
| "grad_norm": 0.28191903233528137, |
| "learning_rate": 8e-05, |
| "loss": 1.6618, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.3917299021225118, |
| "grad_norm": 0.26916682720184326, |
| "learning_rate": 8e-05, |
| "loss": 1.6523, |
| "step": 1781 |
| }, |
| { |
| "epoch": 0.39194985153414713, |
| "grad_norm": 0.2886850833892822, |
| "learning_rate": 8e-05, |
| "loss": 1.5752, |
| "step": 1782 |
| }, |
| { |
| "epoch": 0.39216980094578247, |
| "grad_norm": 0.2749246656894684, |
| "learning_rate": 8e-05, |
| "loss": 1.6731, |
| "step": 1783 |
| }, |
| { |
| "epoch": 0.3923897503574178, |
| "grad_norm": 0.28945374488830566, |
| "learning_rate": 8e-05, |
| "loss": 1.7404, |
| "step": 1784 |
| }, |
| { |
| "epoch": 0.39260969976905313, |
| "grad_norm": 0.27297836542129517, |
| "learning_rate": 8e-05, |
| "loss": 1.7176, |
| "step": 1785 |
| }, |
| { |
| "epoch": 0.39282964918068847, |
| "grad_norm": 0.2738782465457916, |
| "learning_rate": 8e-05, |
| "loss": 1.6844, |
| "step": 1786 |
| }, |
| { |
| "epoch": 0.39304959859232375, |
| "grad_norm": 0.2897050082683563, |
| "learning_rate": 8e-05, |
| "loss": 1.6522, |
| "step": 1787 |
| }, |
| { |
| "epoch": 0.3932695480039591, |
| "grad_norm": 0.31031668186187744, |
| "learning_rate": 8e-05, |
| "loss": 1.8216, |
| "step": 1788 |
| }, |
| { |
| "epoch": 0.3934894974155944, |
| "grad_norm": 0.2869516909122467, |
| "learning_rate": 8e-05, |
| "loss": 1.7598, |
| "step": 1789 |
| }, |
| { |
| "epoch": 0.39370944682722975, |
| "grad_norm": 0.3080596625804901, |
| "learning_rate": 8e-05, |
| "loss": 1.7645, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.3939293962388651, |
| "grad_norm": 0.27992716431617737, |
| "learning_rate": 8e-05, |
| "loss": 1.8373, |
| "step": 1791 |
| }, |
| { |
| "epoch": 0.39414934565050036, |
| "grad_norm": 0.2761777341365814, |
| "learning_rate": 8e-05, |
| "loss": 1.6767, |
| "step": 1792 |
| }, |
| { |
| "epoch": 0.3943692950621357, |
| "grad_norm": 0.30193084478378296, |
| "learning_rate": 8e-05, |
| "loss": 1.7394, |
| "step": 1793 |
| }, |
| { |
| "epoch": 0.39458924447377103, |
| "grad_norm": 0.29375529289245605, |
| "learning_rate": 8e-05, |
| "loss": 1.761, |
| "step": 1794 |
| }, |
| { |
| "epoch": 0.39480919388540636, |
| "grad_norm": 0.32190364599227905, |
| "learning_rate": 8e-05, |
| "loss": 1.7045, |
| "step": 1795 |
| }, |
| { |
| "epoch": 0.3950291432970417, |
| "grad_norm": 0.27505311369895935, |
| "learning_rate": 8e-05, |
| "loss": 1.6678, |
| "step": 1796 |
| }, |
| { |
| "epoch": 0.39524909270867703, |
| "grad_norm": 0.28678107261657715, |
| "learning_rate": 8e-05, |
| "loss": 1.7155, |
| "step": 1797 |
| }, |
| { |
| "epoch": 0.3954690421203123, |
| "grad_norm": 0.28372088074684143, |
| "learning_rate": 8e-05, |
| "loss": 1.8833, |
| "step": 1798 |
| }, |
| { |
| "epoch": 0.39568899153194764, |
| "grad_norm": 0.27803388237953186, |
| "learning_rate": 8e-05, |
| "loss": 1.7125, |
| "step": 1799 |
| }, |
| { |
| "epoch": 0.395908940943583, |
| "grad_norm": 0.278728187084198, |
| "learning_rate": 8e-05, |
| "loss": 1.7019, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.3961288903552183, |
| "grad_norm": 0.29563671350479126, |
| "learning_rate": 8e-05, |
| "loss": 1.6588, |
| "step": 1801 |
| }, |
| { |
| "epoch": 0.39634883976685364, |
| "grad_norm": 0.35585105419158936, |
| "learning_rate": 8e-05, |
| "loss": 1.8864, |
| "step": 1802 |
| }, |
| { |
| "epoch": 0.3965687891784889, |
| "grad_norm": 0.27399691939353943, |
| "learning_rate": 8e-05, |
| "loss": 1.6222, |
| "step": 1803 |
| }, |
| { |
| "epoch": 0.39678873859012426, |
| "grad_norm": 0.2557234764099121, |
| "learning_rate": 8e-05, |
| "loss": 1.4835, |
| "step": 1804 |
| }, |
| { |
| "epoch": 0.3970086880017596, |
| "grad_norm": 0.2929818332195282, |
| "learning_rate": 8e-05, |
| "loss": 1.7737, |
| "step": 1805 |
| }, |
| { |
| "epoch": 0.3972286374133949, |
| "grad_norm": 0.279729425907135, |
| "learning_rate": 8e-05, |
| "loss": 1.6073, |
| "step": 1806 |
| }, |
| { |
| "epoch": 0.39744858682503026, |
| "grad_norm": 0.2622847259044647, |
| "learning_rate": 8e-05, |
| "loss": 1.5368, |
| "step": 1807 |
| }, |
| { |
| "epoch": 0.3976685362366656, |
| "grad_norm": 0.2704038619995117, |
| "learning_rate": 8e-05, |
| "loss": 1.5809, |
| "step": 1808 |
| }, |
| { |
| "epoch": 0.39788848564830087, |
| "grad_norm": 0.2785516679286957, |
| "learning_rate": 8e-05, |
| "loss": 1.6416, |
| "step": 1809 |
| }, |
| { |
| "epoch": 0.3981084350599362, |
| "grad_norm": 0.29611852765083313, |
| "learning_rate": 8e-05, |
| "loss": 1.7186, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.39832838447157154, |
| "grad_norm": 0.28127896785736084, |
| "learning_rate": 8e-05, |
| "loss": 1.6248, |
| "step": 1811 |
| }, |
| { |
| "epoch": 0.3985483338832069, |
| "grad_norm": 0.2746615707874298, |
| "learning_rate": 8e-05, |
| "loss": 1.7029, |
| "step": 1812 |
| }, |
| { |
| "epoch": 0.3987682832948422, |
| "grad_norm": 0.2650880515575409, |
| "learning_rate": 8e-05, |
| "loss": 1.7828, |
| "step": 1813 |
| }, |
| { |
| "epoch": 0.3989882327064775, |
| "grad_norm": 0.28278401494026184, |
| "learning_rate": 8e-05, |
| "loss": 1.6383, |
| "step": 1814 |
| }, |
| { |
| "epoch": 0.3992081821181128, |
| "grad_norm": 0.2749755382537842, |
| "learning_rate": 8e-05, |
| "loss": 1.7551, |
| "step": 1815 |
| }, |
| { |
| "epoch": 0.39942813152974815, |
| "grad_norm": 0.26788878440856934, |
| "learning_rate": 8e-05, |
| "loss": 1.5206, |
| "step": 1816 |
| }, |
| { |
| "epoch": 0.3996480809413835, |
| "grad_norm": 0.28166842460632324, |
| "learning_rate": 8e-05, |
| "loss": 1.7134, |
| "step": 1817 |
| }, |
| { |
| "epoch": 0.3998680303530188, |
| "grad_norm": 0.2781674563884735, |
| "learning_rate": 8e-05, |
| "loss": 1.7864, |
| "step": 1818 |
| }, |
| { |
| "epoch": 0.40008797976465416, |
| "grad_norm": 0.2810186445713043, |
| "learning_rate": 8e-05, |
| "loss": 1.7017, |
| "step": 1819 |
| }, |
| { |
| "epoch": 0.40030792917628943, |
| "grad_norm": 0.2872167229652405, |
| "learning_rate": 8e-05, |
| "loss": 1.563, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.40052787858792477, |
| "grad_norm": 0.2864447832107544, |
| "learning_rate": 8e-05, |
| "loss": 1.8261, |
| "step": 1821 |
| }, |
| { |
| "epoch": 0.4007478279995601, |
| "grad_norm": 0.2633639872074127, |
| "learning_rate": 8e-05, |
| "loss": 1.5864, |
| "step": 1822 |
| }, |
| { |
| "epoch": 0.40096777741119544, |
| "grad_norm": 0.3556326925754547, |
| "learning_rate": 8e-05, |
| "loss": 1.5781, |
| "step": 1823 |
| }, |
| { |
| "epoch": 0.40118772682283077, |
| "grad_norm": 0.2832813560962677, |
| "learning_rate": 8e-05, |
| "loss": 1.7145, |
| "step": 1824 |
| }, |
| { |
| "epoch": 0.40140767623446605, |
| "grad_norm": 0.2862699627876282, |
| "learning_rate": 8e-05, |
| "loss": 1.7753, |
| "step": 1825 |
| }, |
| { |
| "epoch": 0.4016276256461014, |
| "grad_norm": 0.3274460732936859, |
| "learning_rate": 8e-05, |
| "loss": 1.7979, |
| "step": 1826 |
| }, |
| { |
| "epoch": 0.4018475750577367, |
| "grad_norm": 0.277118444442749, |
| "learning_rate": 8e-05, |
| "loss": 1.6089, |
| "step": 1827 |
| }, |
| { |
| "epoch": 0.40206752446937205, |
| "grad_norm": 0.278337687253952, |
| "learning_rate": 8e-05, |
| "loss": 1.5788, |
| "step": 1828 |
| }, |
| { |
| "epoch": 0.4022874738810074, |
| "grad_norm": 0.28072914481163025, |
| "learning_rate": 8e-05, |
| "loss": 1.6735, |
| "step": 1829 |
| }, |
| { |
| "epoch": 0.4025074232926427, |
| "grad_norm": 0.2815505564212799, |
| "learning_rate": 8e-05, |
| "loss": 1.7593, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.402727372704278, |
| "grad_norm": 0.2957006096839905, |
| "learning_rate": 8e-05, |
| "loss": 1.865, |
| "step": 1831 |
| }, |
| { |
| "epoch": 0.40294732211591333, |
| "grad_norm": 0.3079582452774048, |
| "learning_rate": 8e-05, |
| "loss": 1.7421, |
| "step": 1832 |
| }, |
| { |
| "epoch": 0.40316727152754867, |
| "grad_norm": 0.2924387454986572, |
| "learning_rate": 8e-05, |
| "loss": 1.7462, |
| "step": 1833 |
| }, |
| { |
| "epoch": 0.403387220939184, |
| "grad_norm": 0.28879454731941223, |
| "learning_rate": 8e-05, |
| "loss": 1.7433, |
| "step": 1834 |
| }, |
| { |
| "epoch": 0.40360717035081933, |
| "grad_norm": 0.27446237206459045, |
| "learning_rate": 8e-05, |
| "loss": 1.5869, |
| "step": 1835 |
| }, |
| { |
| "epoch": 0.4038271197624546, |
| "grad_norm": 0.3164878487586975, |
| "learning_rate": 8e-05, |
| "loss": 1.7505, |
| "step": 1836 |
| }, |
| { |
| "epoch": 0.40404706917408995, |
| "grad_norm": 0.25979530811309814, |
| "learning_rate": 8e-05, |
| "loss": 1.6001, |
| "step": 1837 |
| }, |
| { |
| "epoch": 0.4042670185857253, |
| "grad_norm": 0.30625709891319275, |
| "learning_rate": 8e-05, |
| "loss": 1.7907, |
| "step": 1838 |
| }, |
| { |
| "epoch": 0.4044869679973606, |
| "grad_norm": 0.27351540327072144, |
| "learning_rate": 8e-05, |
| "loss": 1.5835, |
| "step": 1839 |
| }, |
| { |
| "epoch": 0.40470691740899595, |
| "grad_norm": 0.302372545003891, |
| "learning_rate": 8e-05, |
| "loss": 1.7821, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.4049268668206313, |
| "grad_norm": 0.2910183370113373, |
| "learning_rate": 8e-05, |
| "loss": 1.7993, |
| "step": 1841 |
| }, |
| { |
| "epoch": 0.40514681623226656, |
| "grad_norm": 0.2934883236885071, |
| "learning_rate": 8e-05, |
| "loss": 1.5928, |
| "step": 1842 |
| }, |
| { |
| "epoch": 0.4053667656439019, |
| "grad_norm": 0.2586327791213989, |
| "learning_rate": 8e-05, |
| "loss": 1.5714, |
| "step": 1843 |
| }, |
| { |
| "epoch": 0.40558671505553723, |
| "grad_norm": 0.27952027320861816, |
| "learning_rate": 8e-05, |
| "loss": 1.8168, |
| "step": 1844 |
| }, |
| { |
| "epoch": 0.40580666446717256, |
| "grad_norm": 0.2987437844276428, |
| "learning_rate": 8e-05, |
| "loss": 1.555, |
| "step": 1845 |
| }, |
| { |
| "epoch": 0.4060266138788079, |
| "grad_norm": 0.29165002703666687, |
| "learning_rate": 8e-05, |
| "loss": 1.6865, |
| "step": 1846 |
| }, |
| { |
| "epoch": 0.4062465632904432, |
| "grad_norm": 0.2825503945350647, |
| "learning_rate": 8e-05, |
| "loss": 1.8814, |
| "step": 1847 |
| }, |
| { |
| "epoch": 0.4064665127020785, |
| "grad_norm": 0.27995482087135315, |
| "learning_rate": 8e-05, |
| "loss": 1.6897, |
| "step": 1848 |
| }, |
| { |
| "epoch": 0.40668646211371384, |
| "grad_norm": 0.2735064923763275, |
| "learning_rate": 8e-05, |
| "loss": 1.7279, |
| "step": 1849 |
| }, |
| { |
| "epoch": 0.4069064115253492, |
| "grad_norm": 0.2850511074066162, |
| "learning_rate": 8e-05, |
| "loss": 1.6459, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.4071263609369845, |
| "grad_norm": 0.3000599145889282, |
| "learning_rate": 8e-05, |
| "loss": 1.7301, |
| "step": 1851 |
| }, |
| { |
| "epoch": 0.40734631034861984, |
| "grad_norm": 0.2768002152442932, |
| "learning_rate": 8e-05, |
| "loss": 1.5748, |
| "step": 1852 |
| }, |
| { |
| "epoch": 0.4075662597602551, |
| "grad_norm": 0.26737141609191895, |
| "learning_rate": 8e-05, |
| "loss": 1.5895, |
| "step": 1853 |
| }, |
| { |
| "epoch": 0.40778620917189046, |
| "grad_norm": 0.26408424973487854, |
| "learning_rate": 8e-05, |
| "loss": 1.5611, |
| "step": 1854 |
| }, |
| { |
| "epoch": 0.4080061585835258, |
| "grad_norm": 0.2646276354789734, |
| "learning_rate": 8e-05, |
| "loss": 1.5865, |
| "step": 1855 |
| }, |
| { |
| "epoch": 0.4082261079951611, |
| "grad_norm": 0.27871212363243103, |
| "learning_rate": 8e-05, |
| "loss": 1.8202, |
| "step": 1856 |
| }, |
| { |
| "epoch": 0.40844605740679646, |
| "grad_norm": 0.3234533965587616, |
| "learning_rate": 8e-05, |
| "loss": 1.8213, |
| "step": 1857 |
| }, |
| { |
| "epoch": 0.40866600681843174, |
| "grad_norm": 0.2705099284648895, |
| "learning_rate": 8e-05, |
| "loss": 1.6637, |
| "step": 1858 |
| }, |
| { |
| "epoch": 0.40888595623006707, |
| "grad_norm": 0.28647711873054504, |
| "learning_rate": 8e-05, |
| "loss": 1.7396, |
| "step": 1859 |
| }, |
| { |
| "epoch": 0.4091059056417024, |
| "grad_norm": 0.2812083959579468, |
| "learning_rate": 8e-05, |
| "loss": 1.5663, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.40932585505333774, |
| "grad_norm": 0.2818193733692169, |
| "learning_rate": 8e-05, |
| "loss": 1.6073, |
| "step": 1861 |
| }, |
| { |
| "epoch": 0.4095458044649731, |
| "grad_norm": 0.29906994104385376, |
| "learning_rate": 8e-05, |
| "loss": 1.7061, |
| "step": 1862 |
| }, |
| { |
| "epoch": 0.4097657538766084, |
| "grad_norm": 0.27941465377807617, |
| "learning_rate": 8e-05, |
| "loss": 1.7282, |
| "step": 1863 |
| }, |
| { |
| "epoch": 0.4099857032882437, |
| "grad_norm": 0.27629899978637695, |
| "learning_rate": 8e-05, |
| "loss": 1.6879, |
| "step": 1864 |
| }, |
| { |
| "epoch": 0.410205652699879, |
| "grad_norm": 0.2792319059371948, |
| "learning_rate": 8e-05, |
| "loss": 1.7196, |
| "step": 1865 |
| }, |
| { |
| "epoch": 0.41042560211151435, |
| "grad_norm": 0.2763090431690216, |
| "learning_rate": 8e-05, |
| "loss": 1.673, |
| "step": 1866 |
| }, |
| { |
| "epoch": 0.4106455515231497, |
| "grad_norm": 0.2930999994277954, |
| "learning_rate": 8e-05, |
| "loss": 1.6919, |
| "step": 1867 |
| }, |
| { |
| "epoch": 0.410865500934785, |
| "grad_norm": 0.2748461365699768, |
| "learning_rate": 8e-05, |
| "loss": 1.7553, |
| "step": 1868 |
| }, |
| { |
| "epoch": 0.4110854503464203, |
| "grad_norm": 0.2742187976837158, |
| "learning_rate": 8e-05, |
| "loss": 1.6786, |
| "step": 1869 |
| }, |
| { |
| "epoch": 0.41130539975805563, |
| "grad_norm": 0.3050731420516968, |
| "learning_rate": 8e-05, |
| "loss": 1.4902, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.41152534916969097, |
| "grad_norm": 0.29456627368927, |
| "learning_rate": 8e-05, |
| "loss": 1.758, |
| "step": 1871 |
| }, |
| { |
| "epoch": 0.4117452985813263, |
| "grad_norm": 0.2844219505786896, |
| "learning_rate": 8e-05, |
| "loss": 1.6206, |
| "step": 1872 |
| }, |
| { |
| "epoch": 0.41196524799296164, |
| "grad_norm": 0.28889915347099304, |
| "learning_rate": 8e-05, |
| "loss": 1.6907, |
| "step": 1873 |
| }, |
| { |
| "epoch": 0.41218519740459697, |
| "grad_norm": 0.27245181798934937, |
| "learning_rate": 8e-05, |
| "loss": 1.6749, |
| "step": 1874 |
| }, |
| { |
| "epoch": 0.41240514681623225, |
| "grad_norm": 0.2927252948284149, |
| "learning_rate": 8e-05, |
| "loss": 1.6382, |
| "step": 1875 |
| }, |
| { |
| "epoch": 0.4126250962278676, |
| "grad_norm": 0.27153030037879944, |
| "learning_rate": 8e-05, |
| "loss": 1.6011, |
| "step": 1876 |
| }, |
| { |
| "epoch": 0.4128450456395029, |
| "grad_norm": 0.2807110846042633, |
| "learning_rate": 8e-05, |
| "loss": 1.7126, |
| "step": 1877 |
| }, |
| { |
| "epoch": 0.41306499505113825, |
| "grad_norm": 0.27375784516334534, |
| "learning_rate": 8e-05, |
| "loss": 1.7443, |
| "step": 1878 |
| }, |
| { |
| "epoch": 0.4132849444627736, |
| "grad_norm": 0.27330929040908813, |
| "learning_rate": 8e-05, |
| "loss": 1.6305, |
| "step": 1879 |
| }, |
| { |
| "epoch": 0.41350489387440886, |
| "grad_norm": 0.27126336097717285, |
| "learning_rate": 8e-05, |
| "loss": 1.6688, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.4137248432860442, |
| "grad_norm": 0.2768147885799408, |
| "learning_rate": 8e-05, |
| "loss": 1.7274, |
| "step": 1881 |
| }, |
| { |
| "epoch": 0.41394479269767953, |
| "grad_norm": 0.2686031460762024, |
| "learning_rate": 8e-05, |
| "loss": 1.6445, |
| "step": 1882 |
| }, |
| { |
| "epoch": 0.41416474210931487, |
| "grad_norm": 0.27737778425216675, |
| "learning_rate": 8e-05, |
| "loss": 1.5226, |
| "step": 1883 |
| }, |
| { |
| "epoch": 0.4143846915209502, |
| "grad_norm": 0.2761901319026947, |
| "learning_rate": 8e-05, |
| "loss": 1.6884, |
| "step": 1884 |
| }, |
| { |
| "epoch": 0.41460464093258553, |
| "grad_norm": 0.28609856963157654, |
| "learning_rate": 8e-05, |
| "loss": 1.7719, |
| "step": 1885 |
| }, |
| { |
| "epoch": 0.4148245903442208, |
| "grad_norm": 0.2904943525791168, |
| "learning_rate": 8e-05, |
| "loss": 1.6979, |
| "step": 1886 |
| }, |
| { |
| "epoch": 0.41504453975585615, |
| "grad_norm": 0.3016435503959656, |
| "learning_rate": 8e-05, |
| "loss": 1.7912, |
| "step": 1887 |
| }, |
| { |
| "epoch": 0.4152644891674915, |
| "grad_norm": 0.27562782168388367, |
| "learning_rate": 8e-05, |
| "loss": 1.5822, |
| "step": 1888 |
| }, |
| { |
| "epoch": 0.4154844385791268, |
| "grad_norm": 0.2841348648071289, |
| "learning_rate": 8e-05, |
| "loss": 1.7524, |
| "step": 1889 |
| }, |
| { |
| "epoch": 0.41570438799076215, |
| "grad_norm": 0.26393935084342957, |
| "learning_rate": 8e-05, |
| "loss": 1.6219, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.4159243374023974, |
| "grad_norm": 0.2792678773403168, |
| "learning_rate": 8e-05, |
| "loss": 1.7243, |
| "step": 1891 |
| }, |
| { |
| "epoch": 0.41614428681403276, |
| "grad_norm": 0.291425496339798, |
| "learning_rate": 8e-05, |
| "loss": 1.7499, |
| "step": 1892 |
| }, |
| { |
| "epoch": 0.4163642362256681, |
| "grad_norm": 0.2737634778022766, |
| "learning_rate": 8e-05, |
| "loss": 1.6565, |
| "step": 1893 |
| }, |
| { |
| "epoch": 0.41658418563730343, |
| "grad_norm": 0.26807767152786255, |
| "learning_rate": 8e-05, |
| "loss": 1.6149, |
| "step": 1894 |
| }, |
| { |
| "epoch": 0.41680413504893876, |
| "grad_norm": 0.28826507925987244, |
| "learning_rate": 8e-05, |
| "loss": 1.6857, |
| "step": 1895 |
| }, |
| { |
| "epoch": 0.41702408446057404, |
| "grad_norm": 0.27604466676712036, |
| "learning_rate": 8e-05, |
| "loss": 1.7689, |
| "step": 1896 |
| }, |
| { |
| "epoch": 0.4172440338722094, |
| "grad_norm": 0.27355703711509705, |
| "learning_rate": 8e-05, |
| "loss": 1.7254, |
| "step": 1897 |
| }, |
| { |
| "epoch": 0.4174639832838447, |
| "grad_norm": 0.26692044734954834, |
| "learning_rate": 8e-05, |
| "loss": 1.6372, |
| "step": 1898 |
| }, |
| { |
| "epoch": 0.41768393269548004, |
| "grad_norm": 0.27527916431427, |
| "learning_rate": 8e-05, |
| "loss": 1.6913, |
| "step": 1899 |
| }, |
| { |
| "epoch": 0.4179038821071154, |
| "grad_norm": 0.26881837844848633, |
| "learning_rate": 8e-05, |
| "loss": 1.6663, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.4181238315187507, |
| "grad_norm": 0.27977946400642395, |
| "learning_rate": 8e-05, |
| "loss": 1.8117, |
| "step": 1901 |
| }, |
| { |
| "epoch": 0.418343780930386, |
| "grad_norm": 0.2958911955356598, |
| "learning_rate": 8e-05, |
| "loss": 1.6603, |
| "step": 1902 |
| }, |
| { |
| "epoch": 0.4185637303420213, |
| "grad_norm": 0.2845151424407959, |
| "learning_rate": 8e-05, |
| "loss": 1.7517, |
| "step": 1903 |
| }, |
| { |
| "epoch": 0.41878367975365666, |
| "grad_norm": 0.2804581820964813, |
| "learning_rate": 8e-05, |
| "loss": 1.765, |
| "step": 1904 |
| }, |
| { |
| "epoch": 0.419003629165292, |
| "grad_norm": 0.29568520188331604, |
| "learning_rate": 8e-05, |
| "loss": 1.6995, |
| "step": 1905 |
| }, |
| { |
| "epoch": 0.4192235785769273, |
| "grad_norm": 0.303100049495697, |
| "learning_rate": 8e-05, |
| "loss": 1.77, |
| "step": 1906 |
| }, |
| { |
| "epoch": 0.4194435279885626, |
| "grad_norm": 0.26847636699676514, |
| "learning_rate": 8e-05, |
| "loss": 1.6964, |
| "step": 1907 |
| }, |
| { |
| "epoch": 0.41966347740019794, |
| "grad_norm": 0.2791590094566345, |
| "learning_rate": 8e-05, |
| "loss": 1.5912, |
| "step": 1908 |
| }, |
| { |
| "epoch": 0.41988342681183327, |
| "grad_norm": 0.2687268555164337, |
| "learning_rate": 8e-05, |
| "loss": 1.6163, |
| "step": 1909 |
| }, |
| { |
| "epoch": 0.4201033762234686, |
| "grad_norm": 0.29087433218955994, |
| "learning_rate": 8e-05, |
| "loss": 1.6588, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.42032332563510394, |
| "grad_norm": 0.29639971256256104, |
| "learning_rate": 8e-05, |
| "loss": 1.6911, |
| "step": 1911 |
| }, |
| { |
| "epoch": 0.4205432750467393, |
| "grad_norm": 0.27669841051101685, |
| "learning_rate": 8e-05, |
| "loss": 1.7078, |
| "step": 1912 |
| }, |
| { |
| "epoch": 0.42076322445837455, |
| "grad_norm": 0.2851327955722809, |
| "learning_rate": 8e-05, |
| "loss": 1.7217, |
| "step": 1913 |
| }, |
| { |
| "epoch": 0.4209831738700099, |
| "grad_norm": 0.27069011330604553, |
| "learning_rate": 8e-05, |
| "loss": 1.7076, |
| "step": 1914 |
| }, |
| { |
| "epoch": 0.4212031232816452, |
| "grad_norm": 0.26195240020751953, |
| "learning_rate": 8e-05, |
| "loss": 1.6647, |
| "step": 1915 |
| }, |
| { |
| "epoch": 0.42142307269328055, |
| "grad_norm": 0.3046209216117859, |
| "learning_rate": 8e-05, |
| "loss": 1.5303, |
| "step": 1916 |
| }, |
| { |
| "epoch": 0.4216430221049159, |
| "grad_norm": 0.29437899589538574, |
| "learning_rate": 8e-05, |
| "loss": 1.6589, |
| "step": 1917 |
| }, |
| { |
| "epoch": 0.42186297151655117, |
| "grad_norm": 0.2954728603363037, |
| "learning_rate": 8e-05, |
| "loss": 1.777, |
| "step": 1918 |
| }, |
| { |
| "epoch": 0.4220829209281865, |
| "grad_norm": 0.2612738609313965, |
| "learning_rate": 8e-05, |
| "loss": 1.5668, |
| "step": 1919 |
| }, |
| { |
| "epoch": 0.42230287033982183, |
| "grad_norm": 0.3015122413635254, |
| "learning_rate": 8e-05, |
| "loss": 1.7861, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.42252281975145717, |
| "grad_norm": 0.3785838484764099, |
| "learning_rate": 8e-05, |
| "loss": 1.8979, |
| "step": 1921 |
| }, |
| { |
| "epoch": 0.4227427691630925, |
| "grad_norm": 0.2849038541316986, |
| "learning_rate": 8e-05, |
| "loss": 1.6891, |
| "step": 1922 |
| }, |
| { |
| "epoch": 0.42296271857472784, |
| "grad_norm": 0.278728723526001, |
| "learning_rate": 8e-05, |
| "loss": 1.7891, |
| "step": 1923 |
| }, |
| { |
| "epoch": 0.4231826679863631, |
| "grad_norm": 0.27032172679901123, |
| "learning_rate": 8e-05, |
| "loss": 1.6963, |
| "step": 1924 |
| }, |
| { |
| "epoch": 0.42340261739799845, |
| "grad_norm": 0.2731832265853882, |
| "learning_rate": 8e-05, |
| "loss": 1.632, |
| "step": 1925 |
| }, |
| { |
| "epoch": 0.4236225668096338, |
| "grad_norm": 0.30378425121307373, |
| "learning_rate": 8e-05, |
| "loss": 1.7823, |
| "step": 1926 |
| }, |
| { |
| "epoch": 0.4238425162212691, |
| "grad_norm": 0.27693971991539, |
| "learning_rate": 8e-05, |
| "loss": 1.483, |
| "step": 1927 |
| }, |
| { |
| "epoch": 0.42406246563290445, |
| "grad_norm": 0.2719477415084839, |
| "learning_rate": 8e-05, |
| "loss": 1.6708, |
| "step": 1928 |
| }, |
| { |
| "epoch": 0.42428241504453973, |
| "grad_norm": 0.26625335216522217, |
| "learning_rate": 8e-05, |
| "loss": 1.4946, |
| "step": 1929 |
| }, |
| { |
| "epoch": 0.42450236445617506, |
| "grad_norm": 0.2843473553657532, |
| "learning_rate": 8e-05, |
| "loss": 1.722, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.4247223138678104, |
| "grad_norm": 0.3453083336353302, |
| "learning_rate": 8e-05, |
| "loss": 1.7238, |
| "step": 1931 |
| }, |
| { |
| "epoch": 0.42494226327944573, |
| "grad_norm": 0.25626078248023987, |
| "learning_rate": 8e-05, |
| "loss": 1.4706, |
| "step": 1932 |
| }, |
| { |
| "epoch": 0.42516221269108107, |
| "grad_norm": 0.2908123731613159, |
| "learning_rate": 8e-05, |
| "loss": 1.7105, |
| "step": 1933 |
| }, |
| { |
| "epoch": 0.4253821621027164, |
| "grad_norm": 0.33517104387283325, |
| "learning_rate": 8e-05, |
| "loss": 1.8023, |
| "step": 1934 |
| }, |
| { |
| "epoch": 0.4256021115143517, |
| "grad_norm": 0.28047069907188416, |
| "learning_rate": 8e-05, |
| "loss": 1.6266, |
| "step": 1935 |
| }, |
| { |
| "epoch": 0.425822060925987, |
| "grad_norm": 0.2778942584991455, |
| "learning_rate": 8e-05, |
| "loss": 1.6866, |
| "step": 1936 |
| }, |
| { |
| "epoch": 0.42604201033762235, |
| "grad_norm": 0.3038877248764038, |
| "learning_rate": 8e-05, |
| "loss": 1.6075, |
| "step": 1937 |
| }, |
| { |
| "epoch": 0.4262619597492577, |
| "grad_norm": 0.2814297378063202, |
| "learning_rate": 8e-05, |
| "loss": 1.5939, |
| "step": 1938 |
| }, |
| { |
| "epoch": 0.426481909160893, |
| "grad_norm": 0.27854403853416443, |
| "learning_rate": 8e-05, |
| "loss": 1.5943, |
| "step": 1939 |
| }, |
| { |
| "epoch": 0.4267018585725283, |
| "grad_norm": 0.2924019694328308, |
| "learning_rate": 8e-05, |
| "loss": 1.8193, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.4269218079841636, |
| "grad_norm": 0.2862766683101654, |
| "learning_rate": 8e-05, |
| "loss": 1.6065, |
| "step": 1941 |
| }, |
| { |
| "epoch": 0.42714175739579896, |
| "grad_norm": 0.2696346342563629, |
| "learning_rate": 8e-05, |
| "loss": 1.5343, |
| "step": 1942 |
| }, |
| { |
| "epoch": 0.4273617068074343, |
| "grad_norm": 0.2578338384628296, |
| "learning_rate": 8e-05, |
| "loss": 1.5786, |
| "step": 1943 |
| }, |
| { |
| "epoch": 0.42758165621906963, |
| "grad_norm": 0.28594937920570374, |
| "learning_rate": 8e-05, |
| "loss": 1.725, |
| "step": 1944 |
| }, |
| { |
| "epoch": 0.42780160563070496, |
| "grad_norm": 0.2808282971382141, |
| "learning_rate": 8e-05, |
| "loss": 1.7951, |
| "step": 1945 |
| }, |
| { |
| "epoch": 0.42802155504234024, |
| "grad_norm": 0.32533401250839233, |
| "learning_rate": 8e-05, |
| "loss": 1.9645, |
| "step": 1946 |
| }, |
| { |
| "epoch": 0.4282415044539756, |
| "grad_norm": 0.2737642228603363, |
| "learning_rate": 8e-05, |
| "loss": 1.6243, |
| "step": 1947 |
| }, |
| { |
| "epoch": 0.4284614538656109, |
| "grad_norm": 0.2885657250881195, |
| "learning_rate": 8e-05, |
| "loss": 1.7338, |
| "step": 1948 |
| }, |
| { |
| "epoch": 0.42868140327724624, |
| "grad_norm": 0.2788100242614746, |
| "learning_rate": 8e-05, |
| "loss": 1.76, |
| "step": 1949 |
| }, |
| { |
| "epoch": 0.4289013526888816, |
| "grad_norm": 0.2899073362350464, |
| "learning_rate": 8e-05, |
| "loss": 1.7739, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.42912130210051685, |
| "grad_norm": 0.2874782681465149, |
| "learning_rate": 8e-05, |
| "loss": 1.8283, |
| "step": 1951 |
| }, |
| { |
| "epoch": 0.4293412515121522, |
| "grad_norm": 0.2757413685321808, |
| "learning_rate": 8e-05, |
| "loss": 1.641, |
| "step": 1952 |
| }, |
| { |
| "epoch": 0.4295612009237875, |
| "grad_norm": 0.2811121940612793, |
| "learning_rate": 8e-05, |
| "loss": 1.7231, |
| "step": 1953 |
| }, |
| { |
| "epoch": 0.42978115033542286, |
| "grad_norm": 0.3400493860244751, |
| "learning_rate": 8e-05, |
| "loss": 1.8431, |
| "step": 1954 |
| }, |
| { |
| "epoch": 0.4300010997470582, |
| "grad_norm": 0.29006627202033997, |
| "learning_rate": 8e-05, |
| "loss": 1.7438, |
| "step": 1955 |
| }, |
| { |
| "epoch": 0.4302210491586935, |
| "grad_norm": 0.30233392119407654, |
| "learning_rate": 8e-05, |
| "loss": 1.6603, |
| "step": 1956 |
| }, |
| { |
| "epoch": 0.4304409985703288, |
| "grad_norm": 0.2921263873577118, |
| "learning_rate": 8e-05, |
| "loss": 1.6604, |
| "step": 1957 |
| }, |
| { |
| "epoch": 0.43066094798196414, |
| "grad_norm": 0.27695250511169434, |
| "learning_rate": 8e-05, |
| "loss": 1.7536, |
| "step": 1958 |
| }, |
| { |
| "epoch": 0.43088089739359947, |
| "grad_norm": 0.2827337980270386, |
| "learning_rate": 8e-05, |
| "loss": 1.6324, |
| "step": 1959 |
| }, |
| { |
| "epoch": 0.4311008468052348, |
| "grad_norm": 0.27993375062942505, |
| "learning_rate": 8e-05, |
| "loss": 1.7168, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.43132079621687014, |
| "grad_norm": 0.2801220417022705, |
| "learning_rate": 8e-05, |
| "loss": 1.705, |
| "step": 1961 |
| }, |
| { |
| "epoch": 0.4315407456285054, |
| "grad_norm": 0.27520567178726196, |
| "learning_rate": 8e-05, |
| "loss": 1.664, |
| "step": 1962 |
| }, |
| { |
| "epoch": 0.43176069504014075, |
| "grad_norm": 0.26910632848739624, |
| "learning_rate": 8e-05, |
| "loss": 1.3616, |
| "step": 1963 |
| }, |
| { |
| "epoch": 0.4319806444517761, |
| "grad_norm": 0.27770352363586426, |
| "learning_rate": 8e-05, |
| "loss": 1.6689, |
| "step": 1964 |
| }, |
| { |
| "epoch": 0.4322005938634114, |
| "grad_norm": 0.27606719732284546, |
| "learning_rate": 8e-05, |
| "loss": 1.6644, |
| "step": 1965 |
| }, |
| { |
| "epoch": 0.43242054327504675, |
| "grad_norm": 0.27787330746650696, |
| "learning_rate": 8e-05, |
| "loss": 1.8854, |
| "step": 1966 |
| }, |
| { |
| "epoch": 0.4326404926866821, |
| "grad_norm": 0.26479870080947876, |
| "learning_rate": 8e-05, |
| "loss": 1.5904, |
| "step": 1967 |
| }, |
| { |
| "epoch": 0.43286044209831737, |
| "grad_norm": 0.27598053216934204, |
| "learning_rate": 8e-05, |
| "loss": 1.5666, |
| "step": 1968 |
| }, |
| { |
| "epoch": 0.4330803915099527, |
| "grad_norm": 0.27461937069892883, |
| "learning_rate": 8e-05, |
| "loss": 1.5487, |
| "step": 1969 |
| }, |
| { |
| "epoch": 0.43330034092158803, |
| "grad_norm": 0.2928270399570465, |
| "learning_rate": 8e-05, |
| "loss": 1.8173, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.43352029033322337, |
| "grad_norm": 0.30754199624061584, |
| "learning_rate": 8e-05, |
| "loss": 1.6762, |
| "step": 1971 |
| }, |
| { |
| "epoch": 0.4337402397448587, |
| "grad_norm": 0.2676936089992523, |
| "learning_rate": 8e-05, |
| "loss": 1.7314, |
| "step": 1972 |
| }, |
| { |
| "epoch": 0.433960189156494, |
| "grad_norm": 0.2919710576534271, |
| "learning_rate": 8e-05, |
| "loss": 1.8586, |
| "step": 1973 |
| }, |
| { |
| "epoch": 0.4341801385681293, |
| "grad_norm": 0.28165963292121887, |
| "learning_rate": 8e-05, |
| "loss": 1.7943, |
| "step": 1974 |
| }, |
| { |
| "epoch": 0.43440008797976465, |
| "grad_norm": 0.2700537443161011, |
| "learning_rate": 8e-05, |
| "loss": 1.613, |
| "step": 1975 |
| }, |
| { |
| "epoch": 0.4346200373914, |
| "grad_norm": 0.26830658316612244, |
| "learning_rate": 8e-05, |
| "loss": 1.5854, |
| "step": 1976 |
| }, |
| { |
| "epoch": 0.4348399868030353, |
| "grad_norm": 0.28799256682395935, |
| "learning_rate": 8e-05, |
| "loss": 1.8246, |
| "step": 1977 |
| }, |
| { |
| "epoch": 0.43505993621467065, |
| "grad_norm": 0.27226150035858154, |
| "learning_rate": 8e-05, |
| "loss": 1.6252, |
| "step": 1978 |
| }, |
| { |
| "epoch": 0.43527988562630593, |
| "grad_norm": 0.2646162807941437, |
| "learning_rate": 8e-05, |
| "loss": 1.5699, |
| "step": 1979 |
| }, |
| { |
| "epoch": 0.43549983503794126, |
| "grad_norm": 0.27331140637397766, |
| "learning_rate": 8e-05, |
| "loss": 1.6893, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.4357197844495766, |
| "grad_norm": 0.26996269822120667, |
| "learning_rate": 8e-05, |
| "loss": 1.6004, |
| "step": 1981 |
| }, |
| { |
| "epoch": 0.43593973386121193, |
| "grad_norm": 0.29484307765960693, |
| "learning_rate": 8e-05, |
| "loss": 1.551, |
| "step": 1982 |
| }, |
| { |
| "epoch": 0.43615968327284727, |
| "grad_norm": 0.28224268555641174, |
| "learning_rate": 8e-05, |
| "loss": 1.6898, |
| "step": 1983 |
| }, |
| { |
| "epoch": 0.43637963268448254, |
| "grad_norm": 0.26172178983688354, |
| "learning_rate": 8e-05, |
| "loss": 1.4375, |
| "step": 1984 |
| }, |
| { |
| "epoch": 0.4365995820961179, |
| "grad_norm": 0.2603735029697418, |
| "learning_rate": 8e-05, |
| "loss": 1.4528, |
| "step": 1985 |
| }, |
| { |
| "epoch": 0.4368195315077532, |
| "grad_norm": 0.30643707513809204, |
| "learning_rate": 8e-05, |
| "loss": 1.688, |
| "step": 1986 |
| }, |
| { |
| "epoch": 0.43703948091938855, |
| "grad_norm": 0.2951216995716095, |
| "learning_rate": 8e-05, |
| "loss": 1.7769, |
| "step": 1987 |
| }, |
| { |
| "epoch": 0.4372594303310239, |
| "grad_norm": 0.2939329445362091, |
| "learning_rate": 8e-05, |
| "loss": 1.8161, |
| "step": 1988 |
| }, |
| { |
| "epoch": 0.4374793797426592, |
| "grad_norm": 0.27539846301078796, |
| "learning_rate": 8e-05, |
| "loss": 1.6019, |
| "step": 1989 |
| }, |
| { |
| "epoch": 0.4376993291542945, |
| "grad_norm": 0.2770693898200989, |
| "learning_rate": 8e-05, |
| "loss": 1.5972, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.4379192785659298, |
| "grad_norm": 0.2832552492618561, |
| "learning_rate": 8e-05, |
| "loss": 1.7467, |
| "step": 1991 |
| }, |
| { |
| "epoch": 0.43813922797756516, |
| "grad_norm": 0.2983148992061615, |
| "learning_rate": 8e-05, |
| "loss": 1.7181, |
| "step": 1992 |
| }, |
| { |
| "epoch": 0.4383591773892005, |
| "grad_norm": 0.2829340994358063, |
| "learning_rate": 8e-05, |
| "loss": 1.5984, |
| "step": 1993 |
| }, |
| { |
| "epoch": 0.43857912680083583, |
| "grad_norm": 0.2857687473297119, |
| "learning_rate": 8e-05, |
| "loss": 1.6471, |
| "step": 1994 |
| }, |
| { |
| "epoch": 0.4387990762124711, |
| "grad_norm": 0.2669824957847595, |
| "learning_rate": 8e-05, |
| "loss": 1.6215, |
| "step": 1995 |
| }, |
| { |
| "epoch": 0.43901902562410644, |
| "grad_norm": 0.28832894563674927, |
| "learning_rate": 8e-05, |
| "loss": 1.6884, |
| "step": 1996 |
| }, |
| { |
| "epoch": 0.4392389750357418, |
| "grad_norm": 0.2919970154762268, |
| "learning_rate": 8e-05, |
| "loss": 1.7462, |
| "step": 1997 |
| }, |
| { |
| "epoch": 0.4394589244473771, |
| "grad_norm": 0.2998509109020233, |
| "learning_rate": 8e-05, |
| "loss": 1.7219, |
| "step": 1998 |
| }, |
| { |
| "epoch": 0.43967887385901244, |
| "grad_norm": 0.2780647575855255, |
| "learning_rate": 8e-05, |
| "loss": 1.8219, |
| "step": 1999 |
| }, |
| { |
| "epoch": 0.4398988232706478, |
| "grad_norm": 0.2833268940448761, |
| "learning_rate": 8e-05, |
| "loss": 1.6873, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.44011877268228305, |
| "grad_norm": 0.2802470624446869, |
| "learning_rate": 8e-05, |
| "loss": 1.7249, |
| "step": 2001 |
| }, |
| { |
| "epoch": 0.4403387220939184, |
| "grad_norm": 0.2767699658870697, |
| "learning_rate": 8e-05, |
| "loss": 1.6789, |
| "step": 2002 |
| }, |
| { |
| "epoch": 0.4405586715055537, |
| "grad_norm": 0.28534451127052307, |
| "learning_rate": 8e-05, |
| "loss": 1.6269, |
| "step": 2003 |
| }, |
| { |
| "epoch": 0.44077862091718906, |
| "grad_norm": 0.28716540336608887, |
| "learning_rate": 8e-05, |
| "loss": 1.8097, |
| "step": 2004 |
| }, |
| { |
| "epoch": 0.4409985703288244, |
| "grad_norm": 0.27516409754753113, |
| "learning_rate": 8e-05, |
| "loss": 1.837, |
| "step": 2005 |
| }, |
| { |
| "epoch": 0.44121851974045967, |
| "grad_norm": 0.2910866439342499, |
| "learning_rate": 8e-05, |
| "loss": 1.61, |
| "step": 2006 |
| }, |
| { |
| "epoch": 0.441438469152095, |
| "grad_norm": 0.2851128578186035, |
| "learning_rate": 8e-05, |
| "loss": 1.6939, |
| "step": 2007 |
| }, |
| { |
| "epoch": 0.44165841856373034, |
| "grad_norm": 0.2709331214427948, |
| "learning_rate": 8e-05, |
| "loss": 1.6352, |
| "step": 2008 |
| }, |
| { |
| "epoch": 0.44187836797536567, |
| "grad_norm": 0.28077712655067444, |
| "learning_rate": 8e-05, |
| "loss": 1.6119, |
| "step": 2009 |
| }, |
| { |
| "epoch": 0.442098317387001, |
| "grad_norm": 0.2804681956768036, |
| "learning_rate": 8e-05, |
| "loss": 1.706, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.44231826679863634, |
| "grad_norm": 0.28305575251579285, |
| "learning_rate": 8e-05, |
| "loss": 1.7501, |
| "step": 2011 |
| }, |
| { |
| "epoch": 0.4425382162102716, |
| "grad_norm": 0.30372944474220276, |
| "learning_rate": 8e-05, |
| "loss": 1.69, |
| "step": 2012 |
| }, |
| { |
| "epoch": 0.44275816562190695, |
| "grad_norm": 0.2695739269256592, |
| "learning_rate": 8e-05, |
| "loss": 1.4976, |
| "step": 2013 |
| }, |
| { |
| "epoch": 0.4429781150335423, |
| "grad_norm": 0.27175822854042053, |
| "learning_rate": 8e-05, |
| "loss": 1.6941, |
| "step": 2014 |
| }, |
| { |
| "epoch": 0.4431980644451776, |
| "grad_norm": 0.2786177396774292, |
| "learning_rate": 8e-05, |
| "loss": 1.719, |
| "step": 2015 |
| }, |
| { |
| "epoch": 0.44341801385681295, |
| "grad_norm": 0.26625001430511475, |
| "learning_rate": 8e-05, |
| "loss": 1.6197, |
| "step": 2016 |
| }, |
| { |
| "epoch": 0.44363796326844823, |
| "grad_norm": 0.29516807198524475, |
| "learning_rate": 8e-05, |
| "loss": 1.7305, |
| "step": 2017 |
| }, |
| { |
| "epoch": 0.44385791268008357, |
| "grad_norm": 0.39562076330184937, |
| "learning_rate": 8e-05, |
| "loss": 1.783, |
| "step": 2018 |
| }, |
| { |
| "epoch": 0.4440778620917189, |
| "grad_norm": 0.27659425139427185, |
| "learning_rate": 8e-05, |
| "loss": 1.6948, |
| "step": 2019 |
| }, |
| { |
| "epoch": 0.44429781150335423, |
| "grad_norm": 0.2787366211414337, |
| "learning_rate": 8e-05, |
| "loss": 1.6237, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.44451776091498957, |
| "grad_norm": 0.27939459681510925, |
| "learning_rate": 8e-05, |
| "loss": 1.7628, |
| "step": 2021 |
| }, |
| { |
| "epoch": 0.4447377103266249, |
| "grad_norm": 0.27395081520080566, |
| "learning_rate": 8e-05, |
| "loss": 1.5619, |
| "step": 2022 |
| }, |
| { |
| "epoch": 0.4449576597382602, |
| "grad_norm": 0.28255096077919006, |
| "learning_rate": 8e-05, |
| "loss": 1.7556, |
| "step": 2023 |
| }, |
| { |
| "epoch": 0.4451776091498955, |
| "grad_norm": 0.2922489643096924, |
| "learning_rate": 8e-05, |
| "loss": 1.624, |
| "step": 2024 |
| }, |
| { |
| "epoch": 0.44539755856153085, |
| "grad_norm": 0.4039583206176758, |
| "learning_rate": 8e-05, |
| "loss": 1.507, |
| "step": 2025 |
| }, |
| { |
| "epoch": 0.4456175079731662, |
| "grad_norm": 0.28025928139686584, |
| "learning_rate": 8e-05, |
| "loss": 1.8057, |
| "step": 2026 |
| }, |
| { |
| "epoch": 0.4458374573848015, |
| "grad_norm": 0.2777588963508606, |
| "learning_rate": 8e-05, |
| "loss": 1.6104, |
| "step": 2027 |
| }, |
| { |
| "epoch": 0.4460574067964368, |
| "grad_norm": 0.2689501941204071, |
| "learning_rate": 8e-05, |
| "loss": 1.5339, |
| "step": 2028 |
| }, |
| { |
| "epoch": 0.44627735620807213, |
| "grad_norm": 0.28021785616874695, |
| "learning_rate": 8e-05, |
| "loss": 1.7819, |
| "step": 2029 |
| }, |
| { |
| "epoch": 0.44649730561970746, |
| "grad_norm": 0.26980918645858765, |
| "learning_rate": 8e-05, |
| "loss": 1.5995, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.4467172550313428, |
| "grad_norm": 0.293047696352005, |
| "learning_rate": 8e-05, |
| "loss": 1.8571, |
| "step": 2031 |
| }, |
| { |
| "epoch": 0.44693720444297813, |
| "grad_norm": 0.2841939330101013, |
| "learning_rate": 8e-05, |
| "loss": 1.5794, |
| "step": 2032 |
| }, |
| { |
| "epoch": 0.44715715385461346, |
| "grad_norm": 0.2845712900161743, |
| "learning_rate": 8e-05, |
| "loss": 1.6635, |
| "step": 2033 |
| }, |
| { |
| "epoch": 0.44737710326624874, |
| "grad_norm": 0.27919885516166687, |
| "learning_rate": 8e-05, |
| "loss": 1.7094, |
| "step": 2034 |
| }, |
| { |
| "epoch": 0.4475970526778841, |
| "grad_norm": 0.30076712369918823, |
| "learning_rate": 8e-05, |
| "loss": 1.6989, |
| "step": 2035 |
| }, |
| { |
| "epoch": 0.4478170020895194, |
| "grad_norm": 0.2666080892086029, |
| "learning_rate": 8e-05, |
| "loss": 1.689, |
| "step": 2036 |
| }, |
| { |
| "epoch": 0.44803695150115475, |
| "grad_norm": 0.29133087396621704, |
| "learning_rate": 8e-05, |
| "loss": 1.8559, |
| "step": 2037 |
| }, |
| { |
| "epoch": 0.4482569009127901, |
| "grad_norm": 0.28610843420028687, |
| "learning_rate": 8e-05, |
| "loss": 1.6556, |
| "step": 2038 |
| }, |
| { |
| "epoch": 0.44847685032442536, |
| "grad_norm": 0.26724278926849365, |
| "learning_rate": 8e-05, |
| "loss": 1.6104, |
| "step": 2039 |
| }, |
| { |
| "epoch": 0.4486967997360607, |
| "grad_norm": 0.2724173665046692, |
| "learning_rate": 8e-05, |
| "loss": 1.645, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.448916749147696, |
| "grad_norm": 0.26570823788642883, |
| "learning_rate": 8e-05, |
| "loss": 1.4611, |
| "step": 2041 |
| }, |
| { |
| "epoch": 0.44913669855933136, |
| "grad_norm": 0.29498788714408875, |
| "learning_rate": 8e-05, |
| "loss": 1.8787, |
| "step": 2042 |
| }, |
| { |
| "epoch": 0.4493566479709667, |
| "grad_norm": 0.28211459517478943, |
| "learning_rate": 8e-05, |
| "loss": 1.6062, |
| "step": 2043 |
| }, |
| { |
| "epoch": 0.44957659738260203, |
| "grad_norm": 0.3148192763328552, |
| "learning_rate": 8e-05, |
| "loss": 1.8734, |
| "step": 2044 |
| }, |
| { |
| "epoch": 0.4497965467942373, |
| "grad_norm": 0.27721115946769714, |
| "learning_rate": 8e-05, |
| "loss": 1.7274, |
| "step": 2045 |
| }, |
| { |
| "epoch": 0.45001649620587264, |
| "grad_norm": 0.29178541898727417, |
| "learning_rate": 8e-05, |
| "loss": 1.7568, |
| "step": 2046 |
| }, |
| { |
| "epoch": 0.450236445617508, |
| "grad_norm": 0.27845948934555054, |
| "learning_rate": 8e-05, |
| "loss": 1.6615, |
| "step": 2047 |
| }, |
| { |
| "epoch": 0.4504563950291433, |
| "grad_norm": 0.2741856873035431, |
| "learning_rate": 8e-05, |
| "loss": 1.5667, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.45067634444077864, |
| "grad_norm": 0.28572753071784973, |
| "learning_rate": 8e-05, |
| "loss": 1.7115, |
| "step": 2049 |
| }, |
| { |
| "epoch": 0.4508962938524139, |
| "grad_norm": 0.2769505977630615, |
| "learning_rate": 8e-05, |
| "loss": 1.6297, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.45111624326404925, |
| "grad_norm": 0.27633893489837646, |
| "learning_rate": 8e-05, |
| "loss": 1.6455, |
| "step": 2051 |
| }, |
| { |
| "epoch": 0.4513361926756846, |
| "grad_norm": 0.28455862402915955, |
| "learning_rate": 8e-05, |
| "loss": 1.7496, |
| "step": 2052 |
| }, |
| { |
| "epoch": 0.4515561420873199, |
| "grad_norm": 0.2920532524585724, |
| "learning_rate": 8e-05, |
| "loss": 1.7671, |
| "step": 2053 |
| }, |
| { |
| "epoch": 0.45177609149895526, |
| "grad_norm": 0.27528077363967896, |
| "learning_rate": 8e-05, |
| "loss": 1.6535, |
| "step": 2054 |
| }, |
| { |
| "epoch": 0.4519960409105906, |
| "grad_norm": 0.2949519157409668, |
| "learning_rate": 8e-05, |
| "loss": 1.6796, |
| "step": 2055 |
| }, |
| { |
| "epoch": 0.45221599032222587, |
| "grad_norm": 0.2740989923477173, |
| "learning_rate": 8e-05, |
| "loss": 1.4341, |
| "step": 2056 |
| }, |
| { |
| "epoch": 0.4524359397338612, |
| "grad_norm": 0.31732696294784546, |
| "learning_rate": 8e-05, |
| "loss": 1.717, |
| "step": 2057 |
| }, |
| { |
| "epoch": 0.45265588914549654, |
| "grad_norm": 0.2747776210308075, |
| "learning_rate": 8e-05, |
| "loss": 1.7162, |
| "step": 2058 |
| }, |
| { |
| "epoch": 0.45287583855713187, |
| "grad_norm": 0.3037000596523285, |
| "learning_rate": 8e-05, |
| "loss": 1.6399, |
| "step": 2059 |
| }, |
| { |
| "epoch": 0.4530957879687672, |
| "grad_norm": 0.29499107599258423, |
| "learning_rate": 8e-05, |
| "loss": 1.6306, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.4533157373804025, |
| "grad_norm": 0.2890235483646393, |
| "learning_rate": 8e-05, |
| "loss": 1.8048, |
| "step": 2061 |
| }, |
| { |
| "epoch": 0.4535356867920378, |
| "grad_norm": 0.28515708446502686, |
| "learning_rate": 8e-05, |
| "loss": 1.843, |
| "step": 2062 |
| }, |
| { |
| "epoch": 0.45375563620367315, |
| "grad_norm": 0.28525930643081665, |
| "learning_rate": 8e-05, |
| "loss": 1.6568, |
| "step": 2063 |
| }, |
| { |
| "epoch": 0.4539755856153085, |
| "grad_norm": 0.27117711305618286, |
| "learning_rate": 8e-05, |
| "loss": 1.6605, |
| "step": 2064 |
| }, |
| { |
| "epoch": 0.4541955350269438, |
| "grad_norm": 0.2771861255168915, |
| "learning_rate": 8e-05, |
| "loss": 1.7553, |
| "step": 2065 |
| }, |
| { |
| "epoch": 0.45441548443857915, |
| "grad_norm": 0.29263827204704285, |
| "learning_rate": 8e-05, |
| "loss": 1.7919, |
| "step": 2066 |
| }, |
| { |
| "epoch": 0.45463543385021443, |
| "grad_norm": 0.2785603702068329, |
| "learning_rate": 8e-05, |
| "loss": 1.6686, |
| "step": 2067 |
| }, |
| { |
| "epoch": 0.45485538326184977, |
| "grad_norm": 0.2752209007740021, |
| "learning_rate": 8e-05, |
| "loss": 1.5984, |
| "step": 2068 |
| }, |
| { |
| "epoch": 0.4550753326734851, |
| "grad_norm": 0.279784232378006, |
| "learning_rate": 8e-05, |
| "loss": 1.6903, |
| "step": 2069 |
| }, |
| { |
| "epoch": 0.45529528208512043, |
| "grad_norm": 0.2957722246646881, |
| "learning_rate": 8e-05, |
| "loss": 1.8176, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.45551523149675577, |
| "grad_norm": 0.2798726260662079, |
| "learning_rate": 8e-05, |
| "loss": 1.6637, |
| "step": 2071 |
| }, |
| { |
| "epoch": 0.45573518090839105, |
| "grad_norm": 0.26509538292884827, |
| "learning_rate": 8e-05, |
| "loss": 1.5034, |
| "step": 2072 |
| }, |
| { |
| "epoch": 0.4559551303200264, |
| "grad_norm": 0.2984442114830017, |
| "learning_rate": 8e-05, |
| "loss": 1.8225, |
| "step": 2073 |
| }, |
| { |
| "epoch": 0.4561750797316617, |
| "grad_norm": 0.28242239356040955, |
| "learning_rate": 8e-05, |
| "loss": 1.6707, |
| "step": 2074 |
| }, |
| { |
| "epoch": 0.45639502914329705, |
| "grad_norm": 0.2722650468349457, |
| "learning_rate": 8e-05, |
| "loss": 1.5723, |
| "step": 2075 |
| }, |
| { |
| "epoch": 0.4566149785549324, |
| "grad_norm": 0.25942909717559814, |
| "learning_rate": 8e-05, |
| "loss": 1.5417, |
| "step": 2076 |
| }, |
| { |
| "epoch": 0.4568349279665677, |
| "grad_norm": 0.2782632112503052, |
| "learning_rate": 8e-05, |
| "loss": 1.5857, |
| "step": 2077 |
| }, |
| { |
| "epoch": 0.457054877378203, |
| "grad_norm": 0.28298354148864746, |
| "learning_rate": 8e-05, |
| "loss": 1.6124, |
| "step": 2078 |
| }, |
| { |
| "epoch": 0.45727482678983833, |
| "grad_norm": 0.2920227348804474, |
| "learning_rate": 8e-05, |
| "loss": 1.6247, |
| "step": 2079 |
| }, |
| { |
| "epoch": 0.45749477620147366, |
| "grad_norm": 0.30804532766342163, |
| "learning_rate": 8e-05, |
| "loss": 1.8689, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.457714725613109, |
| "grad_norm": 0.2759280204772949, |
| "learning_rate": 8e-05, |
| "loss": 1.6366, |
| "step": 2081 |
| }, |
| { |
| "epoch": 0.45793467502474433, |
| "grad_norm": 0.27967414259910583, |
| "learning_rate": 8e-05, |
| "loss": 1.6391, |
| "step": 2082 |
| }, |
| { |
| "epoch": 0.4581546244363796, |
| "grad_norm": 0.30624908208847046, |
| "learning_rate": 8e-05, |
| "loss": 1.8188, |
| "step": 2083 |
| }, |
| { |
| "epoch": 0.45837457384801494, |
| "grad_norm": 0.2747632563114166, |
| "learning_rate": 8e-05, |
| "loss": 1.6394, |
| "step": 2084 |
| }, |
| { |
| "epoch": 0.4585945232596503, |
| "grad_norm": 0.29921606183052063, |
| "learning_rate": 8e-05, |
| "loss": 1.6264, |
| "step": 2085 |
| }, |
| { |
| "epoch": 0.4588144726712856, |
| "grad_norm": 0.27374643087387085, |
| "learning_rate": 8e-05, |
| "loss": 1.5955, |
| "step": 2086 |
| }, |
| { |
| "epoch": 0.45903442208292095, |
| "grad_norm": 0.2804218530654907, |
| "learning_rate": 8e-05, |
| "loss": 1.6936, |
| "step": 2087 |
| }, |
| { |
| "epoch": 0.4592543714945563, |
| "grad_norm": 0.288095623254776, |
| "learning_rate": 8e-05, |
| "loss": 1.658, |
| "step": 2088 |
| }, |
| { |
| "epoch": 0.45947432090619156, |
| "grad_norm": 0.2622469961643219, |
| "learning_rate": 8e-05, |
| "loss": 1.5468, |
| "step": 2089 |
| }, |
| { |
| "epoch": 0.4596942703178269, |
| "grad_norm": 0.298968106508255, |
| "learning_rate": 8e-05, |
| "loss": 1.8415, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.4599142197294622, |
| "grad_norm": 0.27127620577812195, |
| "learning_rate": 8e-05, |
| "loss": 1.5154, |
| "step": 2091 |
| }, |
| { |
| "epoch": 0.46013416914109756, |
| "grad_norm": 0.3025810122489929, |
| "learning_rate": 8e-05, |
| "loss": 1.7251, |
| "step": 2092 |
| }, |
| { |
| "epoch": 0.4603541185527329, |
| "grad_norm": 0.2805241346359253, |
| "learning_rate": 8e-05, |
| "loss": 1.786, |
| "step": 2093 |
| }, |
| { |
| "epoch": 0.4605740679643682, |
| "grad_norm": 0.28292620182037354, |
| "learning_rate": 8e-05, |
| "loss": 1.6205, |
| "step": 2094 |
| }, |
| { |
| "epoch": 0.4607940173760035, |
| "grad_norm": 0.27890294790267944, |
| "learning_rate": 8e-05, |
| "loss": 1.6182, |
| "step": 2095 |
| }, |
| { |
| "epoch": 0.46101396678763884, |
| "grad_norm": 0.2704887390136719, |
| "learning_rate": 8e-05, |
| "loss": 1.643, |
| "step": 2096 |
| }, |
| { |
| "epoch": 0.4612339161992742, |
| "grad_norm": 0.27034714818000793, |
| "learning_rate": 8e-05, |
| "loss": 1.6918, |
| "step": 2097 |
| }, |
| { |
| "epoch": 0.4614538656109095, |
| "grad_norm": 0.2763729691505432, |
| "learning_rate": 8e-05, |
| "loss": 1.6218, |
| "step": 2098 |
| }, |
| { |
| "epoch": 0.46167381502254484, |
| "grad_norm": 0.28457143902778625, |
| "learning_rate": 8e-05, |
| "loss": 1.5602, |
| "step": 2099 |
| }, |
| { |
| "epoch": 0.4618937644341801, |
| "grad_norm": 0.3102862536907196, |
| "learning_rate": 8e-05, |
| "loss": 1.6946, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.46211371384581545, |
| "grad_norm": 0.2817099690437317, |
| "learning_rate": 8e-05, |
| "loss": 1.69, |
| "step": 2101 |
| }, |
| { |
| "epoch": 0.4623336632574508, |
| "grad_norm": 0.2816404104232788, |
| "learning_rate": 8e-05, |
| "loss": 1.6651, |
| "step": 2102 |
| }, |
| { |
| "epoch": 0.4625536126690861, |
| "grad_norm": 0.2756252884864807, |
| "learning_rate": 8e-05, |
| "loss": 1.6935, |
| "step": 2103 |
| }, |
| { |
| "epoch": 0.46277356208072146, |
| "grad_norm": 0.28443071246147156, |
| "learning_rate": 8e-05, |
| "loss": 1.8278, |
| "step": 2104 |
| }, |
| { |
| "epoch": 0.46299351149235674, |
| "grad_norm": 0.2955114543437958, |
| "learning_rate": 8e-05, |
| "loss": 1.7654, |
| "step": 2105 |
| }, |
| { |
| "epoch": 0.46321346090399207, |
| "grad_norm": 0.30527764558792114, |
| "learning_rate": 8e-05, |
| "loss": 1.921, |
| "step": 2106 |
| }, |
| { |
| "epoch": 0.4634334103156274, |
| "grad_norm": 0.28985050320625305, |
| "learning_rate": 8e-05, |
| "loss": 1.7637, |
| "step": 2107 |
| }, |
| { |
| "epoch": 0.46365335972726274, |
| "grad_norm": 0.2904118299484253, |
| "learning_rate": 8e-05, |
| "loss": 1.7645, |
| "step": 2108 |
| }, |
| { |
| "epoch": 0.46387330913889807, |
| "grad_norm": 0.3137964606285095, |
| "learning_rate": 8e-05, |
| "loss": 1.8301, |
| "step": 2109 |
| }, |
| { |
| "epoch": 0.46409325855053335, |
| "grad_norm": 0.2634297013282776, |
| "learning_rate": 8e-05, |
| "loss": 1.5916, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.4643132079621687, |
| "grad_norm": 0.26435586810112, |
| "learning_rate": 8e-05, |
| "loss": 1.5805, |
| "step": 2111 |
| }, |
| { |
| "epoch": 0.464533157373804, |
| "grad_norm": 0.2845149040222168, |
| "learning_rate": 8e-05, |
| "loss": 1.6894, |
| "step": 2112 |
| }, |
| { |
| "epoch": 0.46475310678543935, |
| "grad_norm": 0.3034592568874359, |
| "learning_rate": 8e-05, |
| "loss": 1.7122, |
| "step": 2113 |
| }, |
| { |
| "epoch": 0.4649730561970747, |
| "grad_norm": 0.2862027585506439, |
| "learning_rate": 8e-05, |
| "loss": 1.5886, |
| "step": 2114 |
| }, |
| { |
| "epoch": 0.46519300560871, |
| "grad_norm": 0.2709789574146271, |
| "learning_rate": 8e-05, |
| "loss": 1.6112, |
| "step": 2115 |
| }, |
| { |
| "epoch": 0.4654129550203453, |
| "grad_norm": 0.3048953115940094, |
| "learning_rate": 8e-05, |
| "loss": 1.718, |
| "step": 2116 |
| }, |
| { |
| "epoch": 0.46563290443198063, |
| "grad_norm": 0.295149564743042, |
| "learning_rate": 8e-05, |
| "loss": 1.7339, |
| "step": 2117 |
| }, |
| { |
| "epoch": 0.46585285384361597, |
| "grad_norm": 0.27533626556396484, |
| "learning_rate": 8e-05, |
| "loss": 1.6143, |
| "step": 2118 |
| }, |
| { |
| "epoch": 0.4660728032552513, |
| "grad_norm": 0.30383235216140747, |
| "learning_rate": 8e-05, |
| "loss": 1.7154, |
| "step": 2119 |
| }, |
| { |
| "epoch": 0.46629275266688663, |
| "grad_norm": 0.2834450304508209, |
| "learning_rate": 8e-05, |
| "loss": 1.6892, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.4665127020785219, |
| "grad_norm": 0.27407264709472656, |
| "learning_rate": 8e-05, |
| "loss": 1.7138, |
| "step": 2121 |
| }, |
| { |
| "epoch": 0.46673265149015725, |
| "grad_norm": 0.28417688608169556, |
| "learning_rate": 8e-05, |
| "loss": 1.7416, |
| "step": 2122 |
| }, |
| { |
| "epoch": 0.4669526009017926, |
| "grad_norm": 0.2915797233581543, |
| "learning_rate": 8e-05, |
| "loss": 1.7104, |
| "step": 2123 |
| }, |
| { |
| "epoch": 0.4671725503134279, |
| "grad_norm": 0.29155269265174866, |
| "learning_rate": 8e-05, |
| "loss": 1.7374, |
| "step": 2124 |
| }, |
| { |
| "epoch": 0.46739249972506325, |
| "grad_norm": 0.27683204412460327, |
| "learning_rate": 8e-05, |
| "loss": 1.5669, |
| "step": 2125 |
| }, |
| { |
| "epoch": 0.4676124491366986, |
| "grad_norm": 0.2835148870944977, |
| "learning_rate": 8e-05, |
| "loss": 1.7426, |
| "step": 2126 |
| }, |
| { |
| "epoch": 0.46783239854833386, |
| "grad_norm": 0.27906641364097595, |
| "learning_rate": 8e-05, |
| "loss": 1.6297, |
| "step": 2127 |
| }, |
| { |
| "epoch": 0.4680523479599692, |
| "grad_norm": 0.28407955169677734, |
| "learning_rate": 8e-05, |
| "loss": 1.7351, |
| "step": 2128 |
| }, |
| { |
| "epoch": 0.46827229737160453, |
| "grad_norm": 0.2793600261211395, |
| "learning_rate": 8e-05, |
| "loss": 1.5786, |
| "step": 2129 |
| }, |
| { |
| "epoch": 0.46849224678323986, |
| "grad_norm": 0.2806802988052368, |
| "learning_rate": 8e-05, |
| "loss": 1.7466, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.4687121961948752, |
| "grad_norm": 0.30251967906951904, |
| "learning_rate": 8e-05, |
| "loss": 1.6703, |
| "step": 2131 |
| }, |
| { |
| "epoch": 0.4689321456065105, |
| "grad_norm": 0.275473415851593, |
| "learning_rate": 8e-05, |
| "loss": 1.5881, |
| "step": 2132 |
| }, |
| { |
| "epoch": 0.4691520950181458, |
| "grad_norm": 0.28925517201423645, |
| "learning_rate": 8e-05, |
| "loss": 1.7369, |
| "step": 2133 |
| }, |
| { |
| "epoch": 0.46937204442978114, |
| "grad_norm": 0.26768866181373596, |
| "learning_rate": 8e-05, |
| "loss": 1.5469, |
| "step": 2134 |
| }, |
| { |
| "epoch": 0.4695919938414165, |
| "grad_norm": 0.272969514131546, |
| "learning_rate": 8e-05, |
| "loss": 1.6235, |
| "step": 2135 |
| }, |
| { |
| "epoch": 0.4698119432530518, |
| "grad_norm": 0.39006346464157104, |
| "learning_rate": 8e-05, |
| "loss": 1.6943, |
| "step": 2136 |
| }, |
| { |
| "epoch": 0.47003189266468715, |
| "grad_norm": 0.2898694574832916, |
| "learning_rate": 8e-05, |
| "loss": 1.7196, |
| "step": 2137 |
| }, |
| { |
| "epoch": 0.4702518420763224, |
| "grad_norm": 0.28824204206466675, |
| "learning_rate": 8e-05, |
| "loss": 1.6807, |
| "step": 2138 |
| }, |
| { |
| "epoch": 0.47047179148795776, |
| "grad_norm": 0.3024749755859375, |
| "learning_rate": 8e-05, |
| "loss": 1.7435, |
| "step": 2139 |
| }, |
| { |
| "epoch": 0.4706917408995931, |
| "grad_norm": 0.2894933521747589, |
| "learning_rate": 8e-05, |
| "loss": 1.8047, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.4709116903112284, |
| "grad_norm": 0.2900967299938202, |
| "learning_rate": 8e-05, |
| "loss": 1.6536, |
| "step": 2141 |
| }, |
| { |
| "epoch": 0.47113163972286376, |
| "grad_norm": 0.2727701961994171, |
| "learning_rate": 8e-05, |
| "loss": 1.6187, |
| "step": 2142 |
| }, |
| { |
| "epoch": 0.47135158913449904, |
| "grad_norm": 0.2630798816680908, |
| "learning_rate": 8e-05, |
| "loss": 1.5558, |
| "step": 2143 |
| }, |
| { |
| "epoch": 0.4715715385461344, |
| "grad_norm": 0.2755641043186188, |
| "learning_rate": 8e-05, |
| "loss": 1.6247, |
| "step": 2144 |
| }, |
| { |
| "epoch": 0.4717914879577697, |
| "grad_norm": 0.26855289936065674, |
| "learning_rate": 8e-05, |
| "loss": 1.6908, |
| "step": 2145 |
| }, |
| { |
| "epoch": 0.47201143736940504, |
| "grad_norm": 0.26333558559417725, |
| "learning_rate": 8e-05, |
| "loss": 1.6467, |
| "step": 2146 |
| }, |
| { |
| "epoch": 0.4722313867810404, |
| "grad_norm": 0.2696126103401184, |
| "learning_rate": 8e-05, |
| "loss": 1.646, |
| "step": 2147 |
| }, |
| { |
| "epoch": 0.4724513361926757, |
| "grad_norm": 0.2838461101055145, |
| "learning_rate": 8e-05, |
| "loss": 1.7269, |
| "step": 2148 |
| }, |
| { |
| "epoch": 0.472671285604311, |
| "grad_norm": 0.27359622716903687, |
| "learning_rate": 8e-05, |
| "loss": 1.6578, |
| "step": 2149 |
| }, |
| { |
| "epoch": 0.4728912350159463, |
| "grad_norm": 0.28489992022514343, |
| "learning_rate": 8e-05, |
| "loss": 1.7451, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.47311118442758165, |
| "grad_norm": 0.30069005489349365, |
| "learning_rate": 8e-05, |
| "loss": 1.7789, |
| "step": 2151 |
| }, |
| { |
| "epoch": 0.473331133839217, |
| "grad_norm": 0.2787550091743469, |
| "learning_rate": 8e-05, |
| "loss": 1.8532, |
| "step": 2152 |
| }, |
| { |
| "epoch": 0.4735510832508523, |
| "grad_norm": 0.28521937131881714, |
| "learning_rate": 8e-05, |
| "loss": 1.6202, |
| "step": 2153 |
| }, |
| { |
| "epoch": 0.4737710326624876, |
| "grad_norm": 0.27512073516845703, |
| "learning_rate": 8e-05, |
| "loss": 1.6982, |
| "step": 2154 |
| }, |
| { |
| "epoch": 0.47399098207412294, |
| "grad_norm": 0.28500398993492126, |
| "learning_rate": 8e-05, |
| "loss": 1.7262, |
| "step": 2155 |
| }, |
| { |
| "epoch": 0.47421093148575827, |
| "grad_norm": 0.2889910340309143, |
| "learning_rate": 8e-05, |
| "loss": 1.7001, |
| "step": 2156 |
| }, |
| { |
| "epoch": 0.4744308808973936, |
| "grad_norm": 0.2868637144565582, |
| "learning_rate": 8e-05, |
| "loss": 1.6569, |
| "step": 2157 |
| }, |
| { |
| "epoch": 0.47465083030902894, |
| "grad_norm": 0.27974042296409607, |
| "learning_rate": 8e-05, |
| "loss": 1.7308, |
| "step": 2158 |
| }, |
| { |
| "epoch": 0.47487077972066427, |
| "grad_norm": 0.2812412977218628, |
| "learning_rate": 8e-05, |
| "loss": 1.6586, |
| "step": 2159 |
| }, |
| { |
| "epoch": 0.47509072913229955, |
| "grad_norm": 0.27973487973213196, |
| "learning_rate": 8e-05, |
| "loss": 1.7688, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.4753106785439349, |
| "grad_norm": 0.2852223515510559, |
| "learning_rate": 8e-05, |
| "loss": 1.7773, |
| "step": 2161 |
| }, |
| { |
| "epoch": 0.4755306279555702, |
| "grad_norm": 0.2702232301235199, |
| "learning_rate": 8e-05, |
| "loss": 1.5842, |
| "step": 2162 |
| }, |
| { |
| "epoch": 0.47575057736720555, |
| "grad_norm": 0.26885986328125, |
| "learning_rate": 8e-05, |
| "loss": 1.6179, |
| "step": 2163 |
| }, |
| { |
| "epoch": 0.4759705267788409, |
| "grad_norm": 0.26561740040779114, |
| "learning_rate": 8e-05, |
| "loss": 1.6105, |
| "step": 2164 |
| }, |
| { |
| "epoch": 0.47619047619047616, |
| "grad_norm": 0.3136744499206543, |
| "learning_rate": 8e-05, |
| "loss": 1.7263, |
| "step": 2165 |
| }, |
| { |
| "epoch": 0.4764104256021115, |
| "grad_norm": 0.29980844259262085, |
| "learning_rate": 8e-05, |
| "loss": 1.6428, |
| "step": 2166 |
| }, |
| { |
| "epoch": 0.47663037501374683, |
| "grad_norm": 0.2782588303089142, |
| "learning_rate": 8e-05, |
| "loss": 1.5905, |
| "step": 2167 |
| }, |
| { |
| "epoch": 0.47685032442538217, |
| "grad_norm": 0.3134911358356476, |
| "learning_rate": 8e-05, |
| "loss": 1.7437, |
| "step": 2168 |
| }, |
| { |
| "epoch": 0.4770702738370175, |
| "grad_norm": 0.28305792808532715, |
| "learning_rate": 8e-05, |
| "loss": 1.6187, |
| "step": 2169 |
| }, |
| { |
| "epoch": 0.47729022324865283, |
| "grad_norm": 0.2741806209087372, |
| "learning_rate": 8e-05, |
| "loss": 1.7017, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.4775101726602881, |
| "grad_norm": 0.2861132323741913, |
| "learning_rate": 8e-05, |
| "loss": 1.7167, |
| "step": 2171 |
| }, |
| { |
| "epoch": 0.47773012207192345, |
| "grad_norm": 0.2796178162097931, |
| "learning_rate": 8e-05, |
| "loss": 1.7747, |
| "step": 2172 |
| }, |
| { |
| "epoch": 0.4779500714835588, |
| "grad_norm": 0.3019583821296692, |
| "learning_rate": 8e-05, |
| "loss": 1.9244, |
| "step": 2173 |
| }, |
| { |
| "epoch": 0.4781700208951941, |
| "grad_norm": 0.2874825894832611, |
| "learning_rate": 8e-05, |
| "loss": 1.676, |
| "step": 2174 |
| }, |
| { |
| "epoch": 0.47838997030682945, |
| "grad_norm": 0.2864963412284851, |
| "learning_rate": 8e-05, |
| "loss": 1.8289, |
| "step": 2175 |
| }, |
| { |
| "epoch": 0.4786099197184647, |
| "grad_norm": 0.3347536623477936, |
| "learning_rate": 8e-05, |
| "loss": 1.8204, |
| "step": 2176 |
| }, |
| { |
| "epoch": 0.47882986913010006, |
| "grad_norm": 0.2859993577003479, |
| "learning_rate": 8e-05, |
| "loss": 1.5472, |
| "step": 2177 |
| }, |
| { |
| "epoch": 0.4790498185417354, |
| "grad_norm": 0.2972160875797272, |
| "learning_rate": 8e-05, |
| "loss": 1.6577, |
| "step": 2178 |
| }, |
| { |
| "epoch": 0.47926976795337073, |
| "grad_norm": 0.26402032375335693, |
| "learning_rate": 8e-05, |
| "loss": 1.4936, |
| "step": 2179 |
| }, |
| { |
| "epoch": 0.47948971736500606, |
| "grad_norm": 0.28069886565208435, |
| "learning_rate": 8e-05, |
| "loss": 1.8158, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.4797096667766414, |
| "grad_norm": 0.2630525529384613, |
| "learning_rate": 8e-05, |
| "loss": 1.5814, |
| "step": 2181 |
| }, |
| { |
| "epoch": 0.4799296161882767, |
| "grad_norm": 0.2999349534511566, |
| "learning_rate": 8e-05, |
| "loss": 1.7879, |
| "step": 2182 |
| }, |
| { |
| "epoch": 0.480149565599912, |
| "grad_norm": 0.28722846508026123, |
| "learning_rate": 8e-05, |
| "loss": 1.7143, |
| "step": 2183 |
| }, |
| { |
| "epoch": 0.48036951501154734, |
| "grad_norm": 0.2746049165725708, |
| "learning_rate": 8e-05, |
| "loss": 1.5197, |
| "step": 2184 |
| }, |
| { |
| "epoch": 0.4805894644231827, |
| "grad_norm": 0.2719694972038269, |
| "learning_rate": 8e-05, |
| "loss": 1.6642, |
| "step": 2185 |
| }, |
| { |
| "epoch": 0.480809413834818, |
| "grad_norm": 0.286636084318161, |
| "learning_rate": 8e-05, |
| "loss": 1.6885, |
| "step": 2186 |
| }, |
| { |
| "epoch": 0.4810293632464533, |
| "grad_norm": 0.3076469600200653, |
| "learning_rate": 8e-05, |
| "loss": 1.7681, |
| "step": 2187 |
| }, |
| { |
| "epoch": 0.4812493126580886, |
| "grad_norm": 0.3075680434703827, |
| "learning_rate": 8e-05, |
| "loss": 1.8007, |
| "step": 2188 |
| }, |
| { |
| "epoch": 0.48146926206972396, |
| "grad_norm": 0.2793465852737427, |
| "learning_rate": 8e-05, |
| "loss": 1.6552, |
| "step": 2189 |
| }, |
| { |
| "epoch": 0.4816892114813593, |
| "grad_norm": 0.2886781692504883, |
| "learning_rate": 8e-05, |
| "loss": 1.7827, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.4819091608929946, |
| "grad_norm": 0.27432283759117126, |
| "learning_rate": 8e-05, |
| "loss": 1.7724, |
| "step": 2191 |
| }, |
| { |
| "epoch": 0.48212911030462996, |
| "grad_norm": 0.26780393719673157, |
| "learning_rate": 8e-05, |
| "loss": 1.4613, |
| "step": 2192 |
| }, |
| { |
| "epoch": 0.48234905971626524, |
| "grad_norm": 0.27178165316581726, |
| "learning_rate": 8e-05, |
| "loss": 1.6439, |
| "step": 2193 |
| }, |
| { |
| "epoch": 0.48256900912790057, |
| "grad_norm": 0.27651992440223694, |
| "learning_rate": 8e-05, |
| "loss": 1.6672, |
| "step": 2194 |
| }, |
| { |
| "epoch": 0.4827889585395359, |
| "grad_norm": 0.26919931173324585, |
| "learning_rate": 8e-05, |
| "loss": 1.594, |
| "step": 2195 |
| }, |
| { |
| "epoch": 0.48300890795117124, |
| "grad_norm": 0.267678439617157, |
| "learning_rate": 8e-05, |
| "loss": 1.5852, |
| "step": 2196 |
| }, |
| { |
| "epoch": 0.4832288573628066, |
| "grad_norm": 0.2895921766757965, |
| "learning_rate": 8e-05, |
| "loss": 1.6456, |
| "step": 2197 |
| }, |
| { |
| "epoch": 0.48344880677444185, |
| "grad_norm": 0.27052855491638184, |
| "learning_rate": 8e-05, |
| "loss": 1.5393, |
| "step": 2198 |
| }, |
| { |
| "epoch": 0.4836687561860772, |
| "grad_norm": 0.2793048024177551, |
| "learning_rate": 8e-05, |
| "loss": 1.7006, |
| "step": 2199 |
| }, |
| { |
| "epoch": 0.4838887055977125, |
| "grad_norm": 0.2838841676712036, |
| "learning_rate": 8e-05, |
| "loss": 1.6547, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.48410865500934785, |
| "grad_norm": 0.29814717173576355, |
| "learning_rate": 8e-05, |
| "loss": 1.7392, |
| "step": 2201 |
| }, |
| { |
| "epoch": 0.4843286044209832, |
| "grad_norm": 0.28823426365852356, |
| "learning_rate": 8e-05, |
| "loss": 1.6434, |
| "step": 2202 |
| }, |
| { |
| "epoch": 0.4845485538326185, |
| "grad_norm": 0.2645476460456848, |
| "learning_rate": 8e-05, |
| "loss": 1.5663, |
| "step": 2203 |
| }, |
| { |
| "epoch": 0.4847685032442538, |
| "grad_norm": 0.3046651780605316, |
| "learning_rate": 8e-05, |
| "loss": 1.5941, |
| "step": 2204 |
| }, |
| { |
| "epoch": 0.48498845265588914, |
| "grad_norm": 0.28958046436309814, |
| "learning_rate": 8e-05, |
| "loss": 1.7348, |
| "step": 2205 |
| }, |
| { |
| "epoch": 0.48520840206752447, |
| "grad_norm": 0.25703537464141846, |
| "learning_rate": 8e-05, |
| "loss": 1.5712, |
| "step": 2206 |
| }, |
| { |
| "epoch": 0.4854283514791598, |
| "grad_norm": 0.2969980537891388, |
| "learning_rate": 8e-05, |
| "loss": 1.7966, |
| "step": 2207 |
| }, |
| { |
| "epoch": 0.48564830089079514, |
| "grad_norm": 0.27638381719589233, |
| "learning_rate": 8e-05, |
| "loss": 1.6478, |
| "step": 2208 |
| }, |
| { |
| "epoch": 0.4858682503024304, |
| "grad_norm": 0.283682644367218, |
| "learning_rate": 8e-05, |
| "loss": 1.6524, |
| "step": 2209 |
| }, |
| { |
| "epoch": 0.48608819971406575, |
| "grad_norm": 0.2837259769439697, |
| "learning_rate": 8e-05, |
| "loss": 1.7302, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.4863081491257011, |
| "grad_norm": 0.3042410612106323, |
| "learning_rate": 8e-05, |
| "loss": 1.5969, |
| "step": 2211 |
| }, |
| { |
| "epoch": 0.4865280985373364, |
| "grad_norm": 0.2819627821445465, |
| "learning_rate": 8e-05, |
| "loss": 1.5621, |
| "step": 2212 |
| }, |
| { |
| "epoch": 0.48674804794897175, |
| "grad_norm": 0.28049173951148987, |
| "learning_rate": 8e-05, |
| "loss": 1.6831, |
| "step": 2213 |
| }, |
| { |
| "epoch": 0.4869679973606071, |
| "grad_norm": 0.29762500524520874, |
| "learning_rate": 8e-05, |
| "loss": 1.7883, |
| "step": 2214 |
| }, |
| { |
| "epoch": 0.48718794677224236, |
| "grad_norm": 0.3022189140319824, |
| "learning_rate": 8e-05, |
| "loss": 1.5744, |
| "step": 2215 |
| }, |
| { |
| "epoch": 0.4874078961838777, |
| "grad_norm": 0.28249025344848633, |
| "learning_rate": 8e-05, |
| "loss": 1.6514, |
| "step": 2216 |
| }, |
| { |
| "epoch": 0.48762784559551303, |
| "grad_norm": 0.3398612141609192, |
| "learning_rate": 8e-05, |
| "loss": 1.4215, |
| "step": 2217 |
| }, |
| { |
| "epoch": 0.48784779500714837, |
| "grad_norm": 0.28481197357177734, |
| "learning_rate": 8e-05, |
| "loss": 1.7065, |
| "step": 2218 |
| }, |
| { |
| "epoch": 0.4880677444187837, |
| "grad_norm": 0.3076950013637543, |
| "learning_rate": 8e-05, |
| "loss": 1.7665, |
| "step": 2219 |
| }, |
| { |
| "epoch": 0.488287693830419, |
| "grad_norm": 0.28533896803855896, |
| "learning_rate": 8e-05, |
| "loss": 1.6156, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.4885076432420543, |
| "grad_norm": 0.2940129041671753, |
| "learning_rate": 8e-05, |
| "loss": 1.6957, |
| "step": 2221 |
| }, |
| { |
| "epoch": 0.48872759265368965, |
| "grad_norm": 0.30342915654182434, |
| "learning_rate": 8e-05, |
| "loss": 1.6014, |
| "step": 2222 |
| }, |
| { |
| "epoch": 0.488947542065325, |
| "grad_norm": 0.2851787507534027, |
| "learning_rate": 8e-05, |
| "loss": 1.8816, |
| "step": 2223 |
| }, |
| { |
| "epoch": 0.4891674914769603, |
| "grad_norm": 0.3813328146934509, |
| "learning_rate": 8e-05, |
| "loss": 1.6608, |
| "step": 2224 |
| }, |
| { |
| "epoch": 0.48938744088859565, |
| "grad_norm": 0.329373300075531, |
| "learning_rate": 8e-05, |
| "loss": 1.7455, |
| "step": 2225 |
| }, |
| { |
| "epoch": 0.4896073903002309, |
| "grad_norm": 0.2751460373401642, |
| "learning_rate": 8e-05, |
| "loss": 1.6854, |
| "step": 2226 |
| }, |
| { |
| "epoch": 0.48982733971186626, |
| "grad_norm": 0.283033549785614, |
| "learning_rate": 8e-05, |
| "loss": 1.7018, |
| "step": 2227 |
| }, |
| { |
| "epoch": 0.4900472891235016, |
| "grad_norm": 0.2869894504547119, |
| "learning_rate": 8e-05, |
| "loss": 1.7062, |
| "step": 2228 |
| }, |
| { |
| "epoch": 0.49026723853513693, |
| "grad_norm": 0.2895123064517975, |
| "learning_rate": 8e-05, |
| "loss": 1.625, |
| "step": 2229 |
| }, |
| { |
| "epoch": 0.49048718794677226, |
| "grad_norm": 0.28288763761520386, |
| "learning_rate": 8e-05, |
| "loss": 1.6901, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.49070713735840754, |
| "grad_norm": 0.28054291009902954, |
| "learning_rate": 8e-05, |
| "loss": 1.7782, |
| "step": 2231 |
| }, |
| { |
| "epoch": 0.4909270867700429, |
| "grad_norm": 0.2727196216583252, |
| "learning_rate": 8e-05, |
| "loss": 1.531, |
| "step": 2232 |
| }, |
| { |
| "epoch": 0.4911470361816782, |
| "grad_norm": 0.28192493319511414, |
| "learning_rate": 8e-05, |
| "loss": 1.7144, |
| "step": 2233 |
| }, |
| { |
| "epoch": 0.49136698559331354, |
| "grad_norm": 0.28133469820022583, |
| "learning_rate": 8e-05, |
| "loss": 1.6448, |
| "step": 2234 |
| }, |
| { |
| "epoch": 0.4915869350049489, |
| "grad_norm": 0.3175356686115265, |
| "learning_rate": 8e-05, |
| "loss": 1.648, |
| "step": 2235 |
| }, |
| { |
| "epoch": 0.4918068844165842, |
| "grad_norm": 0.2990395128726959, |
| "learning_rate": 8e-05, |
| "loss": 1.6871, |
| "step": 2236 |
| }, |
| { |
| "epoch": 0.4920268338282195, |
| "grad_norm": 0.3121372163295746, |
| "learning_rate": 8e-05, |
| "loss": 1.7671, |
| "step": 2237 |
| }, |
| { |
| "epoch": 0.4922467832398548, |
| "grad_norm": 0.2772499918937683, |
| "learning_rate": 8e-05, |
| "loss": 1.674, |
| "step": 2238 |
| }, |
| { |
| "epoch": 0.49246673265149016, |
| "grad_norm": 0.2990477383136749, |
| "learning_rate": 8e-05, |
| "loss": 1.8202, |
| "step": 2239 |
| }, |
| { |
| "epoch": 0.4926866820631255, |
| "grad_norm": 0.28028279542922974, |
| "learning_rate": 8e-05, |
| "loss": 1.7255, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.4929066314747608, |
| "grad_norm": 0.28883370757102966, |
| "learning_rate": 8e-05, |
| "loss": 1.6102, |
| "step": 2241 |
| }, |
| { |
| "epoch": 0.4931265808863961, |
| "grad_norm": 0.2775261700153351, |
| "learning_rate": 8e-05, |
| "loss": 1.8572, |
| "step": 2242 |
| }, |
| { |
| "epoch": 0.49334653029803144, |
| "grad_norm": 0.2821192443370819, |
| "learning_rate": 8e-05, |
| "loss": 1.8068, |
| "step": 2243 |
| }, |
| { |
| "epoch": 0.49356647970966677, |
| "grad_norm": 0.29555544257164, |
| "learning_rate": 8e-05, |
| "loss": 1.6875, |
| "step": 2244 |
| }, |
| { |
| "epoch": 0.4937864291213021, |
| "grad_norm": 0.28827783465385437, |
| "learning_rate": 8e-05, |
| "loss": 1.7123, |
| "step": 2245 |
| }, |
| { |
| "epoch": 0.49400637853293744, |
| "grad_norm": 0.27672290802001953, |
| "learning_rate": 8e-05, |
| "loss": 1.4739, |
| "step": 2246 |
| }, |
| { |
| "epoch": 0.4942263279445728, |
| "grad_norm": 0.27185946702957153, |
| "learning_rate": 8e-05, |
| "loss": 1.7748, |
| "step": 2247 |
| }, |
| { |
| "epoch": 0.49444627735620805, |
| "grad_norm": 0.2972213327884674, |
| "learning_rate": 8e-05, |
| "loss": 1.7122, |
| "step": 2248 |
| }, |
| { |
| "epoch": 0.4946662267678434, |
| "grad_norm": 0.30817538499832153, |
| "learning_rate": 8e-05, |
| "loss": 1.926, |
| "step": 2249 |
| }, |
| { |
| "epoch": 0.4948861761794787, |
| "grad_norm": 0.2821509838104248, |
| "learning_rate": 8e-05, |
| "loss": 1.7608, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.49510612559111405, |
| "grad_norm": 0.29807963967323303, |
| "learning_rate": 8e-05, |
| "loss": 1.8233, |
| "step": 2251 |
| }, |
| { |
| "epoch": 0.4953260750027494, |
| "grad_norm": 0.29549431800842285, |
| "learning_rate": 8e-05, |
| "loss": 1.7963, |
| "step": 2252 |
| }, |
| { |
| "epoch": 0.49554602441438467, |
| "grad_norm": 0.3025868535041809, |
| "learning_rate": 8e-05, |
| "loss": 1.7597, |
| "step": 2253 |
| }, |
| { |
| "epoch": 0.49576597382602, |
| "grad_norm": 0.30950862169265747, |
| "learning_rate": 8e-05, |
| "loss": 1.8901, |
| "step": 2254 |
| }, |
| { |
| "epoch": 0.49598592323765534, |
| "grad_norm": 0.3357299864292145, |
| "learning_rate": 8e-05, |
| "loss": 1.7692, |
| "step": 2255 |
| }, |
| { |
| "epoch": 0.49620587264929067, |
| "grad_norm": 0.2873973548412323, |
| "learning_rate": 8e-05, |
| "loss": 1.6934, |
| "step": 2256 |
| }, |
| { |
| "epoch": 0.496425822060926, |
| "grad_norm": 0.2997465431690216, |
| "learning_rate": 8e-05, |
| "loss": 1.5939, |
| "step": 2257 |
| }, |
| { |
| "epoch": 0.49664577147256134, |
| "grad_norm": 0.269217312335968, |
| "learning_rate": 8e-05, |
| "loss": 1.5687, |
| "step": 2258 |
| }, |
| { |
| "epoch": 0.4968657208841966, |
| "grad_norm": 0.27386826276779175, |
| "learning_rate": 8e-05, |
| "loss": 1.6858, |
| "step": 2259 |
| }, |
| { |
| "epoch": 0.49708567029583195, |
| "grad_norm": 0.2911466658115387, |
| "learning_rate": 8e-05, |
| "loss": 1.701, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.4973056197074673, |
| "grad_norm": 0.2837962508201599, |
| "learning_rate": 8e-05, |
| "loss": 1.6886, |
| "step": 2261 |
| }, |
| { |
| "epoch": 0.4975255691191026, |
| "grad_norm": 0.3071229159832001, |
| "learning_rate": 8e-05, |
| "loss": 1.73, |
| "step": 2262 |
| }, |
| { |
| "epoch": 0.49774551853073795, |
| "grad_norm": 0.303252249956131, |
| "learning_rate": 8e-05, |
| "loss": 1.7693, |
| "step": 2263 |
| }, |
| { |
| "epoch": 0.49796546794237323, |
| "grad_norm": 0.2802221179008484, |
| "learning_rate": 8e-05, |
| "loss": 1.6394, |
| "step": 2264 |
| }, |
| { |
| "epoch": 0.49818541735400856, |
| "grad_norm": 0.28856000304222107, |
| "learning_rate": 8e-05, |
| "loss": 1.6035, |
| "step": 2265 |
| }, |
| { |
| "epoch": 0.4984053667656439, |
| "grad_norm": 0.28943875432014465, |
| "learning_rate": 8e-05, |
| "loss": 1.7989, |
| "step": 2266 |
| }, |
| { |
| "epoch": 0.49862531617727923, |
| "grad_norm": 0.26969149708747864, |
| "learning_rate": 8e-05, |
| "loss": 1.6607, |
| "step": 2267 |
| }, |
| { |
| "epoch": 0.49884526558891457, |
| "grad_norm": 0.311819851398468, |
| "learning_rate": 8e-05, |
| "loss": 1.7912, |
| "step": 2268 |
| }, |
| { |
| "epoch": 0.4990652150005499, |
| "grad_norm": 0.296274334192276, |
| "learning_rate": 8e-05, |
| "loss": 1.6953, |
| "step": 2269 |
| }, |
| { |
| "epoch": 0.4992851644121852, |
| "grad_norm": 0.26551195979118347, |
| "learning_rate": 8e-05, |
| "loss": 1.3549, |
| "step": 2270 |
| }, |
| { |
| "epoch": 0.4995051138238205, |
| "grad_norm": 0.28540030121803284, |
| "learning_rate": 8e-05, |
| "loss": 1.7142, |
| "step": 2271 |
| }, |
| { |
| "epoch": 0.49972506323545585, |
| "grad_norm": 0.2834233045578003, |
| "learning_rate": 8e-05, |
| "loss": 1.7293, |
| "step": 2272 |
| }, |
| { |
| "epoch": 0.4999450126470912, |
| "grad_norm": 0.34650975465774536, |
| "learning_rate": 8e-05, |
| "loss": 1.7652, |
| "step": 2273 |
| }, |
| { |
| "epoch": 0.5001649620587265, |
| "grad_norm": 0.2988453507423401, |
| "learning_rate": 8e-05, |
| "loss": 1.8895, |
| "step": 2274 |
| }, |
| { |
| "epoch": 0.5003849114703618, |
| "grad_norm": 0.2912983000278473, |
| "learning_rate": 8e-05, |
| "loss": 1.6431, |
| "step": 2275 |
| }, |
| { |
| "epoch": 0.5006048608819972, |
| "grad_norm": 0.28406545519828796, |
| "learning_rate": 8e-05, |
| "loss": 1.7805, |
| "step": 2276 |
| }, |
| { |
| "epoch": 0.5008248102936325, |
| "grad_norm": 0.2748315632343292, |
| "learning_rate": 8e-05, |
| "loss": 1.5514, |
| "step": 2277 |
| }, |
| { |
| "epoch": 0.5010447597052677, |
| "grad_norm": 0.3016912341117859, |
| "learning_rate": 8e-05, |
| "loss": 1.6271, |
| "step": 2278 |
| }, |
| { |
| "epoch": 0.5012647091169031, |
| "grad_norm": 0.3006996512413025, |
| "learning_rate": 8e-05, |
| "loss": 1.7195, |
| "step": 2279 |
| }, |
| { |
| "epoch": 0.5014846585285384, |
| "grad_norm": 0.27598950266838074, |
| "learning_rate": 8e-05, |
| "loss": 1.4975, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.5017046079401738, |
| "grad_norm": 0.2810399830341339, |
| "learning_rate": 8e-05, |
| "loss": 1.6498, |
| "step": 2281 |
| }, |
| { |
| "epoch": 0.5019245573518091, |
| "grad_norm": 0.3018679916858673, |
| "learning_rate": 8e-05, |
| "loss": 1.6711, |
| "step": 2282 |
| }, |
| { |
| "epoch": 0.5021445067634444, |
| "grad_norm": 0.2889658510684967, |
| "learning_rate": 8e-05, |
| "loss": 1.7318, |
| "step": 2283 |
| }, |
| { |
| "epoch": 0.5023644561750797, |
| "grad_norm": 0.28475597500801086, |
| "learning_rate": 8e-05, |
| "loss": 1.7573, |
| "step": 2284 |
| }, |
| { |
| "epoch": 0.502584405586715, |
| "grad_norm": 0.3001987338066101, |
| "learning_rate": 8e-05, |
| "loss": 1.8463, |
| "step": 2285 |
| }, |
| { |
| "epoch": 0.5028043549983504, |
| "grad_norm": 0.273773193359375, |
| "learning_rate": 8e-05, |
| "loss": 1.6465, |
| "step": 2286 |
| }, |
| { |
| "epoch": 0.5030243044099857, |
| "grad_norm": 0.34727615118026733, |
| "learning_rate": 8e-05, |
| "loss": 1.9431, |
| "step": 2287 |
| }, |
| { |
| "epoch": 0.5032442538216211, |
| "grad_norm": 0.2818615138530731, |
| "learning_rate": 8e-05, |
| "loss": 1.6113, |
| "step": 2288 |
| }, |
| { |
| "epoch": 0.5034642032332564, |
| "grad_norm": 0.26619333028793335, |
| "learning_rate": 8e-05, |
| "loss": 1.5167, |
| "step": 2289 |
| }, |
| { |
| "epoch": 0.5036841526448916, |
| "grad_norm": 0.2748722434043884, |
| "learning_rate": 8e-05, |
| "loss": 1.6384, |
| "step": 2290 |
| }, |
| { |
| "epoch": 0.503904102056527, |
| "grad_norm": 0.29873546957969666, |
| "learning_rate": 8e-05, |
| "loss": 1.7309, |
| "step": 2291 |
| }, |
| { |
| "epoch": 0.5041240514681623, |
| "grad_norm": 0.28378361463546753, |
| "learning_rate": 8e-05, |
| "loss": 1.6788, |
| "step": 2292 |
| }, |
| { |
| "epoch": 0.5043440008797977, |
| "grad_norm": 0.28786107897758484, |
| "learning_rate": 8e-05, |
| "loss": 1.7281, |
| "step": 2293 |
| }, |
| { |
| "epoch": 0.504563950291433, |
| "grad_norm": 0.2831546366214752, |
| "learning_rate": 8e-05, |
| "loss": 1.7644, |
| "step": 2294 |
| }, |
| { |
| "epoch": 0.5047838997030683, |
| "grad_norm": 0.28964316844940186, |
| "learning_rate": 8e-05, |
| "loss": 1.6409, |
| "step": 2295 |
| }, |
| { |
| "epoch": 0.5050038491147036, |
| "grad_norm": 0.2778918743133545, |
| "learning_rate": 8e-05, |
| "loss": 1.6078, |
| "step": 2296 |
| }, |
| { |
| "epoch": 0.5052237985263389, |
| "grad_norm": 0.2749491035938263, |
| "learning_rate": 8e-05, |
| "loss": 1.668, |
| "step": 2297 |
| }, |
| { |
| "epoch": 0.5054437479379743, |
| "grad_norm": 0.2856389880180359, |
| "learning_rate": 8e-05, |
| "loss": 1.7052, |
| "step": 2298 |
| }, |
| { |
| "epoch": 0.5056636973496096, |
| "grad_norm": 0.28082379698753357, |
| "learning_rate": 8e-05, |
| "loss": 1.6627, |
| "step": 2299 |
| }, |
| { |
| "epoch": 0.5058836467612449, |
| "grad_norm": 0.27894240617752075, |
| "learning_rate": 8e-05, |
| "loss": 1.6408, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.5061035961728803, |
| "grad_norm": 0.31712663173675537, |
| "learning_rate": 8e-05, |
| "loss": 1.6148, |
| "step": 2301 |
| }, |
| { |
| "epoch": 0.5063235455845155, |
| "grad_norm": 0.28600454330444336, |
| "learning_rate": 8e-05, |
| "loss": 1.7695, |
| "step": 2302 |
| }, |
| { |
| "epoch": 0.5065434949961509, |
| "grad_norm": 0.3285694718360901, |
| "learning_rate": 8e-05, |
| "loss": 1.7882, |
| "step": 2303 |
| }, |
| { |
| "epoch": 0.5067634444077862, |
| "grad_norm": 0.27823877334594727, |
| "learning_rate": 8e-05, |
| "loss": 1.4648, |
| "step": 2304 |
| }, |
| { |
| "epoch": 0.5069833938194215, |
| "grad_norm": 0.3157597482204437, |
| "learning_rate": 8e-05, |
| "loss": 1.8441, |
| "step": 2305 |
| }, |
| { |
| "epoch": 0.5072033432310569, |
| "grad_norm": 0.2913108170032501, |
| "learning_rate": 8e-05, |
| "loss": 1.7584, |
| "step": 2306 |
| }, |
| { |
| "epoch": 0.5074232926426921, |
| "grad_norm": 0.28753626346588135, |
| "learning_rate": 8e-05, |
| "loss": 1.7045, |
| "step": 2307 |
| }, |
| { |
| "epoch": 0.5076432420543275, |
| "grad_norm": 0.2981377840042114, |
| "learning_rate": 8e-05, |
| "loss": 1.54, |
| "step": 2308 |
| }, |
| { |
| "epoch": 0.5078631914659628, |
| "grad_norm": 0.2911180853843689, |
| "learning_rate": 8e-05, |
| "loss": 1.8266, |
| "step": 2309 |
| }, |
| { |
| "epoch": 0.5080831408775982, |
| "grad_norm": 0.2862488031387329, |
| "learning_rate": 8e-05, |
| "loss": 1.7503, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.5083030902892335, |
| "grad_norm": 0.3015568256378174, |
| "learning_rate": 8e-05, |
| "loss": 1.8209, |
| "step": 2311 |
| }, |
| { |
| "epoch": 0.5085230397008688, |
| "grad_norm": 0.27230823040008545, |
| "learning_rate": 8e-05, |
| "loss": 1.7977, |
| "step": 2312 |
| }, |
| { |
| "epoch": 0.5087429891125042, |
| "grad_norm": 0.27871981263160706, |
| "learning_rate": 8e-05, |
| "loss": 1.5971, |
| "step": 2313 |
| }, |
| { |
| "epoch": 0.5089629385241394, |
| "grad_norm": 0.2884436249732971, |
| "learning_rate": 8e-05, |
| "loss": 1.7726, |
| "step": 2314 |
| }, |
| { |
| "epoch": 0.5091828879357748, |
| "grad_norm": 0.2778714895248413, |
| "learning_rate": 8e-05, |
| "loss": 1.7104, |
| "step": 2315 |
| }, |
| { |
| "epoch": 0.5094028373474101, |
| "grad_norm": 0.30202385783195496, |
| "learning_rate": 8e-05, |
| "loss": 1.658, |
| "step": 2316 |
| }, |
| { |
| "epoch": 0.5096227867590454, |
| "grad_norm": 0.2785106599330902, |
| "learning_rate": 8e-05, |
| "loss": 1.717, |
| "step": 2317 |
| }, |
| { |
| "epoch": 0.5098427361706808, |
| "grad_norm": 0.28846096992492676, |
| "learning_rate": 8e-05, |
| "loss": 1.7456, |
| "step": 2318 |
| }, |
| { |
| "epoch": 0.510062685582316, |
| "grad_norm": 0.27753984928131104, |
| "learning_rate": 8e-05, |
| "loss": 1.6569, |
| "step": 2319 |
| }, |
| { |
| "epoch": 0.5102826349939514, |
| "grad_norm": 0.2834784686565399, |
| "learning_rate": 8e-05, |
| "loss": 1.6348, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.5105025844055867, |
| "grad_norm": 0.27789169549942017, |
| "learning_rate": 8e-05, |
| "loss": 1.5586, |
| "step": 2321 |
| }, |
| { |
| "epoch": 0.510722533817222, |
| "grad_norm": 0.28466710448265076, |
| "learning_rate": 8e-05, |
| "loss": 1.6793, |
| "step": 2322 |
| }, |
| { |
| "epoch": 0.5109424832288574, |
| "grad_norm": 0.2759189009666443, |
| "learning_rate": 8e-05, |
| "loss": 1.5311, |
| "step": 2323 |
| }, |
| { |
| "epoch": 0.5111624326404927, |
| "grad_norm": 0.2931334674358368, |
| "learning_rate": 8e-05, |
| "loss": 1.7258, |
| "step": 2324 |
| }, |
| { |
| "epoch": 0.511382382052128, |
| "grad_norm": 0.2740546464920044, |
| "learning_rate": 8e-05, |
| "loss": 1.558, |
| "step": 2325 |
| }, |
| { |
| "epoch": 0.5116023314637633, |
| "grad_norm": 0.29584407806396484, |
| "learning_rate": 8e-05, |
| "loss": 1.7777, |
| "step": 2326 |
| }, |
| { |
| "epoch": 0.5118222808753986, |
| "grad_norm": 0.2948019504547119, |
| "learning_rate": 8e-05, |
| "loss": 1.7722, |
| "step": 2327 |
| }, |
| { |
| "epoch": 0.512042230287034, |
| "grad_norm": 0.27409225702285767, |
| "learning_rate": 8e-05, |
| "loss": 1.6524, |
| "step": 2328 |
| }, |
| { |
| "epoch": 0.5122621796986693, |
| "grad_norm": 0.26278048753738403, |
| "learning_rate": 8e-05, |
| "loss": 1.5945, |
| "step": 2329 |
| }, |
| { |
| "epoch": 0.5124821291103047, |
| "grad_norm": 0.29483261704444885, |
| "learning_rate": 8e-05, |
| "loss": 1.8132, |
| "step": 2330 |
| }, |
| { |
| "epoch": 0.5127020785219399, |
| "grad_norm": 0.27037349343299866, |
| "learning_rate": 8e-05, |
| "loss": 1.5657, |
| "step": 2331 |
| }, |
| { |
| "epoch": 0.5129220279335753, |
| "grad_norm": 0.2826361060142517, |
| "learning_rate": 8e-05, |
| "loss": 1.6955, |
| "step": 2332 |
| }, |
| { |
| "epoch": 0.5131419773452106, |
| "grad_norm": 0.2957696318626404, |
| "learning_rate": 8e-05, |
| "loss": 1.814, |
| "step": 2333 |
| }, |
| { |
| "epoch": 0.5133619267568459, |
| "grad_norm": 0.2752826511859894, |
| "learning_rate": 8e-05, |
| "loss": 1.636, |
| "step": 2334 |
| }, |
| { |
| "epoch": 0.5135818761684813, |
| "grad_norm": 0.28523313999176025, |
| "learning_rate": 8e-05, |
| "loss": 1.5444, |
| "step": 2335 |
| }, |
| { |
| "epoch": 0.5138018255801166, |
| "grad_norm": 0.286304235458374, |
| "learning_rate": 8e-05, |
| "loss": 1.4665, |
| "step": 2336 |
| }, |
| { |
| "epoch": 0.514021774991752, |
| "grad_norm": 0.28738734126091003, |
| "learning_rate": 8e-05, |
| "loss": 1.6802, |
| "step": 2337 |
| }, |
| { |
| "epoch": 0.5142417244033872, |
| "grad_norm": 0.2669237554073334, |
| "learning_rate": 8e-05, |
| "loss": 1.6011, |
| "step": 2338 |
| }, |
| { |
| "epoch": 0.5144616738150225, |
| "grad_norm": 0.274325430393219, |
| "learning_rate": 8e-05, |
| "loss": 1.6248, |
| "step": 2339 |
| }, |
| { |
| "epoch": 0.5146816232266579, |
| "grad_norm": 0.2798522710800171, |
| "learning_rate": 8e-05, |
| "loss": 1.7636, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.5149015726382932, |
| "grad_norm": 0.27266305685043335, |
| "learning_rate": 8e-05, |
| "loss": 1.6434, |
| "step": 2341 |
| }, |
| { |
| "epoch": 0.5151215220499286, |
| "grad_norm": 0.2740791440010071, |
| "learning_rate": 8e-05, |
| "loss": 1.7084, |
| "step": 2342 |
| }, |
| { |
| "epoch": 0.5153414714615638, |
| "grad_norm": 0.28098320960998535, |
| "learning_rate": 8e-05, |
| "loss": 1.6356, |
| "step": 2343 |
| }, |
| { |
| "epoch": 0.5155614208731991, |
| "grad_norm": 0.2760515809059143, |
| "learning_rate": 8e-05, |
| "loss": 1.661, |
| "step": 2344 |
| }, |
| { |
| "epoch": 0.5157813702848345, |
| "grad_norm": 0.27894532680511475, |
| "learning_rate": 8e-05, |
| "loss": 1.6794, |
| "step": 2345 |
| }, |
| { |
| "epoch": 0.5160013196964698, |
| "grad_norm": 0.2972679138183594, |
| "learning_rate": 8e-05, |
| "loss": 1.6943, |
| "step": 2346 |
| }, |
| { |
| "epoch": 0.5162212691081052, |
| "grad_norm": 0.3100125789642334, |
| "learning_rate": 8e-05, |
| "loss": 1.7214, |
| "step": 2347 |
| }, |
| { |
| "epoch": 0.5164412185197405, |
| "grad_norm": 0.2743578255176544, |
| "learning_rate": 8e-05, |
| "loss": 1.7021, |
| "step": 2348 |
| }, |
| { |
| "epoch": 0.5166611679313757, |
| "grad_norm": 0.29266777634620667, |
| "learning_rate": 8e-05, |
| "loss": 1.7585, |
| "step": 2349 |
| }, |
| { |
| "epoch": 0.5168811173430111, |
| "grad_norm": 0.2791600227355957, |
| "learning_rate": 8e-05, |
| "loss": 1.7012, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.5171010667546464, |
| "grad_norm": 0.28535401821136475, |
| "learning_rate": 8e-05, |
| "loss": 1.6695, |
| "step": 2351 |
| }, |
| { |
| "epoch": 0.5173210161662818, |
| "grad_norm": 0.2860865592956543, |
| "learning_rate": 8e-05, |
| "loss": 1.7419, |
| "step": 2352 |
| }, |
| { |
| "epoch": 0.5175409655779171, |
| "grad_norm": 0.27693790197372437, |
| "learning_rate": 8e-05, |
| "loss": 1.5459, |
| "step": 2353 |
| }, |
| { |
| "epoch": 0.5177609149895525, |
| "grad_norm": 0.2858433723449707, |
| "learning_rate": 8e-05, |
| "loss": 1.799, |
| "step": 2354 |
| }, |
| { |
| "epoch": 0.5179808644011877, |
| "grad_norm": 0.30761632323265076, |
| "learning_rate": 8e-05, |
| "loss": 1.5971, |
| "step": 2355 |
| }, |
| { |
| "epoch": 0.518200813812823, |
| "grad_norm": 0.2943046987056732, |
| "learning_rate": 8e-05, |
| "loss": 1.7399, |
| "step": 2356 |
| }, |
| { |
| "epoch": 0.5184207632244584, |
| "grad_norm": 0.2748922109603882, |
| "learning_rate": 8e-05, |
| "loss": 1.7202, |
| "step": 2357 |
| }, |
| { |
| "epoch": 0.5186407126360937, |
| "grad_norm": 0.2818071246147156, |
| "learning_rate": 8e-05, |
| "loss": 1.5918, |
| "step": 2358 |
| }, |
| { |
| "epoch": 0.5188606620477291, |
| "grad_norm": 0.28235137462615967, |
| "learning_rate": 8e-05, |
| "loss": 1.5728, |
| "step": 2359 |
| }, |
| { |
| "epoch": 0.5190806114593643, |
| "grad_norm": 0.27833959460258484, |
| "learning_rate": 8e-05, |
| "loss": 1.5966, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.5193005608709996, |
| "grad_norm": 0.2731468677520752, |
| "learning_rate": 8e-05, |
| "loss": 1.6518, |
| "step": 2361 |
| }, |
| { |
| "epoch": 0.519520510282635, |
| "grad_norm": 0.2777821719646454, |
| "learning_rate": 8e-05, |
| "loss": 1.6885, |
| "step": 2362 |
| }, |
| { |
| "epoch": 0.5197404596942703, |
| "grad_norm": 0.2685951590538025, |
| "learning_rate": 8e-05, |
| "loss": 1.6315, |
| "step": 2363 |
| }, |
| { |
| "epoch": 0.5199604091059057, |
| "grad_norm": 0.3087875545024872, |
| "learning_rate": 8e-05, |
| "loss": 1.7525, |
| "step": 2364 |
| }, |
| { |
| "epoch": 0.520180358517541, |
| "grad_norm": 0.2693195044994354, |
| "learning_rate": 8e-05, |
| "loss": 1.5993, |
| "step": 2365 |
| }, |
| { |
| "epoch": 0.5204003079291762, |
| "grad_norm": 0.2832968235015869, |
| "learning_rate": 8e-05, |
| "loss": 1.5798, |
| "step": 2366 |
| }, |
| { |
| "epoch": 0.5206202573408116, |
| "grad_norm": 0.2873738706111908, |
| "learning_rate": 8e-05, |
| "loss": 1.7373, |
| "step": 2367 |
| }, |
| { |
| "epoch": 0.5208402067524469, |
| "grad_norm": 0.2888682782649994, |
| "learning_rate": 8e-05, |
| "loss": 1.6995, |
| "step": 2368 |
| }, |
| { |
| "epoch": 0.5210601561640823, |
| "grad_norm": 0.2788809537887573, |
| "learning_rate": 8e-05, |
| "loss": 1.5928, |
| "step": 2369 |
| }, |
| { |
| "epoch": 0.5212801055757176, |
| "grad_norm": 0.28021880984306335, |
| "learning_rate": 8e-05, |
| "loss": 1.6692, |
| "step": 2370 |
| }, |
| { |
| "epoch": 0.5215000549873529, |
| "grad_norm": 0.3041718006134033, |
| "learning_rate": 8e-05, |
| "loss": 1.7378, |
| "step": 2371 |
| }, |
| { |
| "epoch": 0.5217200043989882, |
| "grad_norm": 0.2774748206138611, |
| "learning_rate": 8e-05, |
| "loss": 1.5802, |
| "step": 2372 |
| }, |
| { |
| "epoch": 0.5219399538106235, |
| "grad_norm": 0.2876451015472412, |
| "learning_rate": 8e-05, |
| "loss": 1.8057, |
| "step": 2373 |
| }, |
| { |
| "epoch": 0.5221599032222589, |
| "grad_norm": 0.2740166485309601, |
| "learning_rate": 8e-05, |
| "loss": 1.6694, |
| "step": 2374 |
| }, |
| { |
| "epoch": 0.5223798526338942, |
| "grad_norm": 0.288555771112442, |
| "learning_rate": 8e-05, |
| "loss": 1.792, |
| "step": 2375 |
| }, |
| { |
| "epoch": 0.5225998020455296, |
| "grad_norm": 0.2777664065361023, |
| "learning_rate": 8e-05, |
| "loss": 1.6781, |
| "step": 2376 |
| }, |
| { |
| "epoch": 0.5228197514571649, |
| "grad_norm": 0.27108079195022583, |
| "learning_rate": 8e-05, |
| "loss": 1.4881, |
| "step": 2377 |
| }, |
| { |
| "epoch": 0.5230397008688001, |
| "grad_norm": 0.2909669578075409, |
| "learning_rate": 8e-05, |
| "loss": 1.7174, |
| "step": 2378 |
| }, |
| { |
| "epoch": 0.5232596502804355, |
| "grad_norm": 0.2978494167327881, |
| "learning_rate": 8e-05, |
| "loss": 1.8641, |
| "step": 2379 |
| }, |
| { |
| "epoch": 0.5234795996920708, |
| "grad_norm": 0.2649437487125397, |
| "learning_rate": 8e-05, |
| "loss": 1.473, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.5236995491037062, |
| "grad_norm": 0.28939372301101685, |
| "learning_rate": 8e-05, |
| "loss": 1.5567, |
| "step": 2381 |
| }, |
| { |
| "epoch": 0.5239194985153415, |
| "grad_norm": 0.2740820646286011, |
| "learning_rate": 8e-05, |
| "loss": 1.6321, |
| "step": 2382 |
| }, |
| { |
| "epoch": 0.5241394479269768, |
| "grad_norm": 0.28426122665405273, |
| "learning_rate": 8e-05, |
| "loss": 1.5952, |
| "step": 2383 |
| }, |
| { |
| "epoch": 0.5243593973386121, |
| "grad_norm": 0.28176257014274597, |
| "learning_rate": 8e-05, |
| "loss": 1.6231, |
| "step": 2384 |
| }, |
| { |
| "epoch": 0.5245793467502474, |
| "grad_norm": 0.29681360721588135, |
| "learning_rate": 8e-05, |
| "loss": 1.8203, |
| "step": 2385 |
| }, |
| { |
| "epoch": 0.5247992961618828, |
| "grad_norm": 0.272658109664917, |
| "learning_rate": 8e-05, |
| "loss": 1.6942, |
| "step": 2386 |
| }, |
| { |
| "epoch": 0.5250192455735181, |
| "grad_norm": 0.27786141633987427, |
| "learning_rate": 8e-05, |
| "loss": 1.6081, |
| "step": 2387 |
| }, |
| { |
| "epoch": 0.5252391949851534, |
| "grad_norm": 0.2938309609889984, |
| "learning_rate": 8e-05, |
| "loss": 1.7454, |
| "step": 2388 |
| }, |
| { |
| "epoch": 0.5254591443967888, |
| "grad_norm": 0.2710343599319458, |
| "learning_rate": 8e-05, |
| "loss": 1.6391, |
| "step": 2389 |
| }, |
| { |
| "epoch": 0.525679093808424, |
| "grad_norm": 0.2757870554924011, |
| "learning_rate": 8e-05, |
| "loss": 1.6526, |
| "step": 2390 |
| }, |
| { |
| "epoch": 0.5258990432200594, |
| "grad_norm": 0.2581859827041626, |
| "learning_rate": 8e-05, |
| "loss": 1.4344, |
| "step": 2391 |
| }, |
| { |
| "epoch": 0.5261189926316947, |
| "grad_norm": 0.2732166647911072, |
| "learning_rate": 8e-05, |
| "loss": 1.5987, |
| "step": 2392 |
| }, |
| { |
| "epoch": 0.52633894204333, |
| "grad_norm": 0.2859753370285034, |
| "learning_rate": 8e-05, |
| "loss": 1.6654, |
| "step": 2393 |
| }, |
| { |
| "epoch": 0.5265588914549654, |
| "grad_norm": 0.2680748701095581, |
| "learning_rate": 8e-05, |
| "loss": 1.5764, |
| "step": 2394 |
| }, |
| { |
| "epoch": 0.5267788408666007, |
| "grad_norm": 0.2866816818714142, |
| "learning_rate": 8e-05, |
| "loss": 1.7725, |
| "step": 2395 |
| }, |
| { |
| "epoch": 0.526998790278236, |
| "grad_norm": 0.27792397141456604, |
| "learning_rate": 8e-05, |
| "loss": 1.5932, |
| "step": 2396 |
| }, |
| { |
| "epoch": 0.5272187396898713, |
| "grad_norm": 0.26985982060432434, |
| "learning_rate": 8e-05, |
| "loss": 1.5955, |
| "step": 2397 |
| }, |
| { |
| "epoch": 0.5274386891015067, |
| "grad_norm": 0.28183454275131226, |
| "learning_rate": 8e-05, |
| "loss": 1.553, |
| "step": 2398 |
| }, |
| { |
| "epoch": 0.527658638513142, |
| "grad_norm": 0.29282763600349426, |
| "learning_rate": 8e-05, |
| "loss": 1.7858, |
| "step": 2399 |
| }, |
| { |
| "epoch": 0.5278785879247773, |
| "grad_norm": 0.30619367957115173, |
| "learning_rate": 8e-05, |
| "loss": 1.718, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.5280985373364127, |
| "grad_norm": 0.26707130670547485, |
| "learning_rate": 8e-05, |
| "loss": 1.5899, |
| "step": 2401 |
| }, |
| { |
| "epoch": 0.5283184867480479, |
| "grad_norm": 0.3182383179664612, |
| "learning_rate": 8e-05, |
| "loss": 1.7268, |
| "step": 2402 |
| }, |
| { |
| "epoch": 0.5285384361596833, |
| "grad_norm": 0.3178313374519348, |
| "learning_rate": 8e-05, |
| "loss": 1.7282, |
| "step": 2403 |
| }, |
| { |
| "epoch": 0.5287583855713186, |
| "grad_norm": 0.26504799723625183, |
| "learning_rate": 8e-05, |
| "loss": 1.6046, |
| "step": 2404 |
| }, |
| { |
| "epoch": 0.5289783349829539, |
| "grad_norm": 0.2749512195587158, |
| "learning_rate": 8e-05, |
| "loss": 1.6251, |
| "step": 2405 |
| }, |
| { |
| "epoch": 0.5291982843945893, |
| "grad_norm": 0.27312803268432617, |
| "learning_rate": 8e-05, |
| "loss": 1.746, |
| "step": 2406 |
| }, |
| { |
| "epoch": 0.5294182338062245, |
| "grad_norm": 0.26339027285575867, |
| "learning_rate": 8e-05, |
| "loss": 1.5214, |
| "step": 2407 |
| }, |
| { |
| "epoch": 0.5296381832178599, |
| "grad_norm": 0.28254935145378113, |
| "learning_rate": 8e-05, |
| "loss": 1.629, |
| "step": 2408 |
| }, |
| { |
| "epoch": 0.5298581326294952, |
| "grad_norm": 0.2761283218860626, |
| "learning_rate": 8e-05, |
| "loss": 1.7202, |
| "step": 2409 |
| }, |
| { |
| "epoch": 0.5300780820411305, |
| "grad_norm": 0.27570095658302307, |
| "learning_rate": 8e-05, |
| "loss": 1.7042, |
| "step": 2410 |
| }, |
| { |
| "epoch": 0.5302980314527659, |
| "grad_norm": 0.2886349856853485, |
| "learning_rate": 8e-05, |
| "loss": 1.7923, |
| "step": 2411 |
| }, |
| { |
| "epoch": 0.5305179808644012, |
| "grad_norm": 0.29611504077911377, |
| "learning_rate": 8e-05, |
| "loss": 1.7219, |
| "step": 2412 |
| }, |
| { |
| "epoch": 0.5307379302760366, |
| "grad_norm": 0.28122174739837646, |
| "learning_rate": 8e-05, |
| "loss": 1.6888, |
| "step": 2413 |
| }, |
| { |
| "epoch": 0.5309578796876718, |
| "grad_norm": 0.2690391540527344, |
| "learning_rate": 8e-05, |
| "loss": 1.2745, |
| "step": 2414 |
| }, |
| { |
| "epoch": 0.5311778290993071, |
| "grad_norm": 0.2676471173763275, |
| "learning_rate": 8e-05, |
| "loss": 1.6422, |
| "step": 2415 |
| }, |
| { |
| "epoch": 0.5313977785109425, |
| "grad_norm": 0.2947712540626526, |
| "learning_rate": 8e-05, |
| "loss": 1.654, |
| "step": 2416 |
| }, |
| { |
| "epoch": 0.5316177279225778, |
| "grad_norm": 0.27766644954681396, |
| "learning_rate": 8e-05, |
| "loss": 1.6208, |
| "step": 2417 |
| }, |
| { |
| "epoch": 0.5318376773342132, |
| "grad_norm": 0.28579944372177124, |
| "learning_rate": 8e-05, |
| "loss": 1.8171, |
| "step": 2418 |
| }, |
| { |
| "epoch": 0.5320576267458484, |
| "grad_norm": 0.2734217345714569, |
| "learning_rate": 8e-05, |
| "loss": 1.658, |
| "step": 2419 |
| }, |
| { |
| "epoch": 0.5322775761574838, |
| "grad_norm": 0.28343021869659424, |
| "learning_rate": 8e-05, |
| "loss": 1.6291, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.5324975255691191, |
| "grad_norm": 0.2881801128387451, |
| "learning_rate": 8e-05, |
| "loss": 1.5212, |
| "step": 2421 |
| }, |
| { |
| "epoch": 0.5327174749807544, |
| "grad_norm": 0.27267688512802124, |
| "learning_rate": 8e-05, |
| "loss": 1.6599, |
| "step": 2422 |
| }, |
| { |
| "epoch": 0.5329374243923898, |
| "grad_norm": 0.29100489616394043, |
| "learning_rate": 8e-05, |
| "loss": 1.6636, |
| "step": 2423 |
| }, |
| { |
| "epoch": 0.5331573738040251, |
| "grad_norm": 0.301812082529068, |
| "learning_rate": 8e-05, |
| "loss": 1.9097, |
| "step": 2424 |
| }, |
| { |
| "epoch": 0.5333773232156604, |
| "grad_norm": 0.2864093482494354, |
| "learning_rate": 8e-05, |
| "loss": 1.6535, |
| "step": 2425 |
| }, |
| { |
| "epoch": 0.5335972726272957, |
| "grad_norm": 0.28721320629119873, |
| "learning_rate": 8e-05, |
| "loss": 1.7307, |
| "step": 2426 |
| }, |
| { |
| "epoch": 0.533817222038931, |
| "grad_norm": 0.3100323975086212, |
| "learning_rate": 8e-05, |
| "loss": 1.7155, |
| "step": 2427 |
| }, |
| { |
| "epoch": 0.5340371714505664, |
| "grad_norm": 0.2595236301422119, |
| "learning_rate": 8e-05, |
| "loss": 1.4525, |
| "step": 2428 |
| }, |
| { |
| "epoch": 0.5342571208622017, |
| "grad_norm": 0.27981269359588623, |
| "learning_rate": 8e-05, |
| "loss": 1.6821, |
| "step": 2429 |
| }, |
| { |
| "epoch": 0.5344770702738371, |
| "grad_norm": 0.28523892164230347, |
| "learning_rate": 8e-05, |
| "loss": 1.6213, |
| "step": 2430 |
| }, |
| { |
| "epoch": 0.5346970196854723, |
| "grad_norm": 0.2951820194721222, |
| "learning_rate": 8e-05, |
| "loss": 1.7798, |
| "step": 2431 |
| }, |
| { |
| "epoch": 0.5349169690971076, |
| "grad_norm": 0.27744752168655396, |
| "learning_rate": 8e-05, |
| "loss": 1.664, |
| "step": 2432 |
| }, |
| { |
| "epoch": 0.535136918508743, |
| "grad_norm": 0.2700327932834625, |
| "learning_rate": 8e-05, |
| "loss": 1.6476, |
| "step": 2433 |
| }, |
| { |
| "epoch": 0.5353568679203783, |
| "grad_norm": 0.3043116331100464, |
| "learning_rate": 8e-05, |
| "loss": 1.8377, |
| "step": 2434 |
| }, |
| { |
| "epoch": 0.5355768173320137, |
| "grad_norm": 0.2886519730091095, |
| "learning_rate": 8e-05, |
| "loss": 1.7098, |
| "step": 2435 |
| }, |
| { |
| "epoch": 0.535796766743649, |
| "grad_norm": 0.28121626377105713, |
| "learning_rate": 8e-05, |
| "loss": 1.5902, |
| "step": 2436 |
| }, |
| { |
| "epoch": 0.5360167161552842, |
| "grad_norm": 0.28657859563827515, |
| "learning_rate": 8e-05, |
| "loss": 1.6769, |
| "step": 2437 |
| }, |
| { |
| "epoch": 0.5362366655669196, |
| "grad_norm": 0.3111754059791565, |
| "learning_rate": 8e-05, |
| "loss": 1.8352, |
| "step": 2438 |
| }, |
| { |
| "epoch": 0.5364566149785549, |
| "grad_norm": 0.27172762155532837, |
| "learning_rate": 8e-05, |
| "loss": 1.5897, |
| "step": 2439 |
| }, |
| { |
| "epoch": 0.5366765643901903, |
| "grad_norm": 0.28469017148017883, |
| "learning_rate": 8e-05, |
| "loss": 1.6976, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.5368965138018256, |
| "grad_norm": 0.29801180958747864, |
| "learning_rate": 8e-05, |
| "loss": 1.7811, |
| "step": 2441 |
| }, |
| { |
| "epoch": 0.5371164632134608, |
| "grad_norm": 0.2860267758369446, |
| "learning_rate": 8e-05, |
| "loss": 1.7026, |
| "step": 2442 |
| }, |
| { |
| "epoch": 0.5373364126250962, |
| "grad_norm": 0.3069910705089569, |
| "learning_rate": 8e-05, |
| "loss": 1.823, |
| "step": 2443 |
| }, |
| { |
| "epoch": 0.5375563620367315, |
| "grad_norm": 0.29847028851509094, |
| "learning_rate": 8e-05, |
| "loss": 1.6314, |
| "step": 2444 |
| }, |
| { |
| "epoch": 0.5377763114483669, |
| "grad_norm": 0.2970685660839081, |
| "learning_rate": 8e-05, |
| "loss": 1.8591, |
| "step": 2445 |
| }, |
| { |
| "epoch": 0.5379962608600022, |
| "grad_norm": 0.28767916560173035, |
| "learning_rate": 8e-05, |
| "loss": 1.6244, |
| "step": 2446 |
| }, |
| { |
| "epoch": 0.5382162102716376, |
| "grad_norm": 0.2858954966068268, |
| "learning_rate": 8e-05, |
| "loss": 1.6002, |
| "step": 2447 |
| }, |
| { |
| "epoch": 0.5384361596832729, |
| "grad_norm": 0.25083082914352417, |
| "learning_rate": 8e-05, |
| "loss": 1.4772, |
| "step": 2448 |
| }, |
| { |
| "epoch": 0.5386561090949081, |
| "grad_norm": 0.28772327303886414, |
| "learning_rate": 8e-05, |
| "loss": 1.7885, |
| "step": 2449 |
| }, |
| { |
| "epoch": 0.5388760585065435, |
| "grad_norm": 0.300503671169281, |
| "learning_rate": 8e-05, |
| "loss": 1.8074, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.5390960079181788, |
| "grad_norm": 0.29243797063827515, |
| "learning_rate": 8e-05, |
| "loss": 1.7033, |
| "step": 2451 |
| }, |
| { |
| "epoch": 0.5393159573298142, |
| "grad_norm": 0.28921830654144287, |
| "learning_rate": 8e-05, |
| "loss": 1.734, |
| "step": 2452 |
| }, |
| { |
| "epoch": 0.5395359067414495, |
| "grad_norm": 0.2754501700401306, |
| "learning_rate": 8e-05, |
| "loss": 1.577, |
| "step": 2453 |
| }, |
| { |
| "epoch": 0.5397558561530847, |
| "grad_norm": 0.26824522018432617, |
| "learning_rate": 8e-05, |
| "loss": 1.6434, |
| "step": 2454 |
| }, |
| { |
| "epoch": 0.5399758055647201, |
| "grad_norm": 0.26851388812065125, |
| "learning_rate": 8e-05, |
| "loss": 1.6706, |
| "step": 2455 |
| }, |
| { |
| "epoch": 0.5401957549763554, |
| "grad_norm": 0.2697846293449402, |
| "learning_rate": 8e-05, |
| "loss": 1.6052, |
| "step": 2456 |
| }, |
| { |
| "epoch": 0.5404157043879908, |
| "grad_norm": 0.27774059772491455, |
| "learning_rate": 8e-05, |
| "loss": 1.663, |
| "step": 2457 |
| }, |
| { |
| "epoch": 0.5406356537996261, |
| "grad_norm": 0.2799103558063507, |
| "learning_rate": 8e-05, |
| "loss": 1.9412, |
| "step": 2458 |
| }, |
| { |
| "epoch": 0.5408556032112614, |
| "grad_norm": 0.2874007523059845, |
| "learning_rate": 8e-05, |
| "loss": 1.6366, |
| "step": 2459 |
| }, |
| { |
| "epoch": 0.5410755526228967, |
| "grad_norm": 0.29054176807403564, |
| "learning_rate": 8e-05, |
| "loss": 1.7546, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.541295502034532, |
| "grad_norm": 0.29359421133995056, |
| "learning_rate": 8e-05, |
| "loss": 1.8274, |
| "step": 2461 |
| }, |
| { |
| "epoch": 0.5415154514461674, |
| "grad_norm": 0.29589033126831055, |
| "learning_rate": 8e-05, |
| "loss": 1.8619, |
| "step": 2462 |
| }, |
| { |
| "epoch": 0.5417354008578027, |
| "grad_norm": 0.2997150421142578, |
| "learning_rate": 8e-05, |
| "loss": 1.7685, |
| "step": 2463 |
| }, |
| { |
| "epoch": 0.541955350269438, |
| "grad_norm": 0.2759319543838501, |
| "learning_rate": 8e-05, |
| "loss": 1.5652, |
| "step": 2464 |
| }, |
| { |
| "epoch": 0.5421752996810734, |
| "grad_norm": 0.27741214632987976, |
| "learning_rate": 8e-05, |
| "loss": 1.6247, |
| "step": 2465 |
| }, |
| { |
| "epoch": 0.5423952490927086, |
| "grad_norm": 0.29365673661231995, |
| "learning_rate": 8e-05, |
| "loss": 1.7768, |
| "step": 2466 |
| }, |
| { |
| "epoch": 0.542615198504344, |
| "grad_norm": 0.2897026836872101, |
| "learning_rate": 8e-05, |
| "loss": 1.7435, |
| "step": 2467 |
| }, |
| { |
| "epoch": 0.5428351479159793, |
| "grad_norm": 0.2963312566280365, |
| "learning_rate": 8e-05, |
| "loss": 1.7792, |
| "step": 2468 |
| }, |
| { |
| "epoch": 0.5430550973276147, |
| "grad_norm": 0.3142043948173523, |
| "learning_rate": 8e-05, |
| "loss": 1.8348, |
| "step": 2469 |
| }, |
| { |
| "epoch": 0.54327504673925, |
| "grad_norm": 0.28869184851646423, |
| "learning_rate": 8e-05, |
| "loss": 1.6916, |
| "step": 2470 |
| }, |
| { |
| "epoch": 0.5434949961508853, |
| "grad_norm": 0.27220281958580017, |
| "learning_rate": 8e-05, |
| "loss": 1.5963, |
| "step": 2471 |
| }, |
| { |
| "epoch": 0.5437149455625206, |
| "grad_norm": 0.3002524971961975, |
| "learning_rate": 8e-05, |
| "loss": 1.7516, |
| "step": 2472 |
| }, |
| { |
| "epoch": 0.5439348949741559, |
| "grad_norm": 0.27016308903694153, |
| "learning_rate": 8e-05, |
| "loss": 1.5655, |
| "step": 2473 |
| }, |
| { |
| "epoch": 0.5441548443857913, |
| "grad_norm": 0.2886146903038025, |
| "learning_rate": 8e-05, |
| "loss": 1.6563, |
| "step": 2474 |
| }, |
| { |
| "epoch": 0.5443747937974266, |
| "grad_norm": 0.2743261158466339, |
| "learning_rate": 8e-05, |
| "loss": 1.7916, |
| "step": 2475 |
| }, |
| { |
| "epoch": 0.5445947432090619, |
| "grad_norm": 0.27933475375175476, |
| "learning_rate": 8e-05, |
| "loss": 1.7397, |
| "step": 2476 |
| }, |
| { |
| "epoch": 0.5448146926206973, |
| "grad_norm": 0.2805885672569275, |
| "learning_rate": 8e-05, |
| "loss": 1.6608, |
| "step": 2477 |
| }, |
| { |
| "epoch": 0.5450346420323325, |
| "grad_norm": 0.26985716819763184, |
| "learning_rate": 8e-05, |
| "loss": 1.6551, |
| "step": 2478 |
| }, |
| { |
| "epoch": 0.5452545914439679, |
| "grad_norm": 0.2778765857219696, |
| "learning_rate": 8e-05, |
| "loss": 1.6792, |
| "step": 2479 |
| }, |
| { |
| "epoch": 0.5454745408556032, |
| "grad_norm": 0.27623313665390015, |
| "learning_rate": 8e-05, |
| "loss": 1.6867, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.5456944902672385, |
| "grad_norm": 0.27185389399528503, |
| "learning_rate": 8e-05, |
| "loss": 1.6184, |
| "step": 2481 |
| }, |
| { |
| "epoch": 0.5459144396788739, |
| "grad_norm": 0.29302138090133667, |
| "learning_rate": 8e-05, |
| "loss": 1.7075, |
| "step": 2482 |
| }, |
| { |
| "epoch": 0.5461343890905092, |
| "grad_norm": 0.26639193296432495, |
| "learning_rate": 8e-05, |
| "loss": 1.6138, |
| "step": 2483 |
| }, |
| { |
| "epoch": 0.5463543385021445, |
| "grad_norm": 0.28048211336135864, |
| "learning_rate": 8e-05, |
| "loss": 1.6672, |
| "step": 2484 |
| }, |
| { |
| "epoch": 0.5465742879137798, |
| "grad_norm": 0.2844570577144623, |
| "learning_rate": 8e-05, |
| "loss": 1.6888, |
| "step": 2485 |
| }, |
| { |
| "epoch": 0.5467942373254151, |
| "grad_norm": 0.2801128923892975, |
| "learning_rate": 8e-05, |
| "loss": 1.8506, |
| "step": 2486 |
| }, |
| { |
| "epoch": 0.5470141867370505, |
| "grad_norm": 0.2718241810798645, |
| "learning_rate": 8e-05, |
| "loss": 1.6105, |
| "step": 2487 |
| }, |
| { |
| "epoch": 0.5472341361486858, |
| "grad_norm": 0.28759825229644775, |
| "learning_rate": 8e-05, |
| "loss": 1.7449, |
| "step": 2488 |
| }, |
| { |
| "epoch": 0.5474540855603212, |
| "grad_norm": 0.29218876361846924, |
| "learning_rate": 8e-05, |
| "loss": 1.8732, |
| "step": 2489 |
| }, |
| { |
| "epoch": 0.5476740349719564, |
| "grad_norm": 0.29760751128196716, |
| "learning_rate": 8e-05, |
| "loss": 1.7804, |
| "step": 2490 |
| }, |
| { |
| "epoch": 0.5478939843835918, |
| "grad_norm": 0.28636956214904785, |
| "learning_rate": 8e-05, |
| "loss": 1.6994, |
| "step": 2491 |
| }, |
| { |
| "epoch": 0.5481139337952271, |
| "grad_norm": 0.2892046570777893, |
| "learning_rate": 8e-05, |
| "loss": 1.759, |
| "step": 2492 |
| }, |
| { |
| "epoch": 0.5483338832068624, |
| "grad_norm": 0.280556857585907, |
| "learning_rate": 8e-05, |
| "loss": 1.8084, |
| "step": 2493 |
| }, |
| { |
| "epoch": 0.5485538326184978, |
| "grad_norm": 0.2733471691608429, |
| "learning_rate": 8e-05, |
| "loss": 1.7293, |
| "step": 2494 |
| }, |
| { |
| "epoch": 0.548773782030133, |
| "grad_norm": 0.2813643515110016, |
| "learning_rate": 8e-05, |
| "loss": 1.6178, |
| "step": 2495 |
| }, |
| { |
| "epoch": 0.5489937314417684, |
| "grad_norm": 0.27255943417549133, |
| "learning_rate": 8e-05, |
| "loss": 1.6827, |
| "step": 2496 |
| }, |
| { |
| "epoch": 0.5492136808534037, |
| "grad_norm": 0.2690375745296478, |
| "learning_rate": 8e-05, |
| "loss": 1.684, |
| "step": 2497 |
| }, |
| { |
| "epoch": 0.549433630265039, |
| "grad_norm": 0.30036401748657227, |
| "learning_rate": 8e-05, |
| "loss": 1.8676, |
| "step": 2498 |
| }, |
| { |
| "epoch": 0.5496535796766744, |
| "grad_norm": 0.27924251556396484, |
| "learning_rate": 8e-05, |
| "loss": 1.6619, |
| "step": 2499 |
| }, |
| { |
| "epoch": 0.5498735290883097, |
| "grad_norm": 0.2792947590351105, |
| "learning_rate": 8e-05, |
| "loss": 1.6121, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.550093478499945, |
| "grad_norm": 0.27976930141448975, |
| "learning_rate": 8e-05, |
| "loss": 1.5815, |
| "step": 2501 |
| }, |
| { |
| "epoch": 0.5503134279115803, |
| "grad_norm": 0.28429850935935974, |
| "learning_rate": 8e-05, |
| "loss": 1.6713, |
| "step": 2502 |
| }, |
| { |
| "epoch": 0.5505333773232156, |
| "grad_norm": 0.2669944763183594, |
| "learning_rate": 8e-05, |
| "loss": 1.5065, |
| "step": 2503 |
| }, |
| { |
| "epoch": 0.550753326734851, |
| "grad_norm": 0.2846994400024414, |
| "learning_rate": 8e-05, |
| "loss": 1.7238, |
| "step": 2504 |
| }, |
| { |
| "epoch": 0.5509732761464863, |
| "grad_norm": 0.27598071098327637, |
| "learning_rate": 8e-05, |
| "loss": 1.5364, |
| "step": 2505 |
| }, |
| { |
| "epoch": 0.5511932255581217, |
| "grad_norm": 0.27275460958480835, |
| "learning_rate": 8e-05, |
| "loss": 1.6171, |
| "step": 2506 |
| }, |
| { |
| "epoch": 0.551413174969757, |
| "grad_norm": 0.2846895456314087, |
| "learning_rate": 8e-05, |
| "loss": 1.7082, |
| "step": 2507 |
| }, |
| { |
| "epoch": 0.5516331243813922, |
| "grad_norm": 0.3010547161102295, |
| "learning_rate": 8e-05, |
| "loss": 1.7946, |
| "step": 2508 |
| }, |
| { |
| "epoch": 0.5518530737930276, |
| "grad_norm": 0.28405773639678955, |
| "learning_rate": 8e-05, |
| "loss": 1.6063, |
| "step": 2509 |
| }, |
| { |
| "epoch": 0.5520730232046629, |
| "grad_norm": 0.2855536639690399, |
| "learning_rate": 8e-05, |
| "loss": 1.6138, |
| "step": 2510 |
| }, |
| { |
| "epoch": 0.5522929726162983, |
| "grad_norm": 0.2949456572532654, |
| "learning_rate": 8e-05, |
| "loss": 1.7734, |
| "step": 2511 |
| }, |
| { |
| "epoch": 0.5525129220279336, |
| "grad_norm": 0.31665512919425964, |
| "learning_rate": 8e-05, |
| "loss": 1.7163, |
| "step": 2512 |
| }, |
| { |
| "epoch": 0.552732871439569, |
| "grad_norm": 0.2881389260292053, |
| "learning_rate": 8e-05, |
| "loss": 1.5617, |
| "step": 2513 |
| }, |
| { |
| "epoch": 0.5529528208512042, |
| "grad_norm": 0.26758721470832825, |
| "learning_rate": 8e-05, |
| "loss": 1.6714, |
| "step": 2514 |
| }, |
| { |
| "epoch": 0.5531727702628395, |
| "grad_norm": 0.29549580812454224, |
| "learning_rate": 8e-05, |
| "loss": 1.6516, |
| "step": 2515 |
| }, |
| { |
| "epoch": 0.5533927196744749, |
| "grad_norm": 0.2811340391635895, |
| "learning_rate": 8e-05, |
| "loss": 1.6026, |
| "step": 2516 |
| }, |
| { |
| "epoch": 0.5536126690861102, |
| "grad_norm": 0.2837204039096832, |
| "learning_rate": 8e-05, |
| "loss": 1.8413, |
| "step": 2517 |
| }, |
| { |
| "epoch": 0.5538326184977456, |
| "grad_norm": 0.276216983795166, |
| "learning_rate": 8e-05, |
| "loss": 1.6671, |
| "step": 2518 |
| }, |
| { |
| "epoch": 0.5540525679093808, |
| "grad_norm": 0.2781767249107361, |
| "learning_rate": 8e-05, |
| "loss": 1.5886, |
| "step": 2519 |
| }, |
| { |
| "epoch": 0.5542725173210161, |
| "grad_norm": 0.2830861210823059, |
| "learning_rate": 8e-05, |
| "loss": 1.6097, |
| "step": 2520 |
| }, |
| { |
| "epoch": 0.5544924667326515, |
| "grad_norm": 0.2746805250644684, |
| "learning_rate": 8e-05, |
| "loss": 1.7153, |
| "step": 2521 |
| }, |
| { |
| "epoch": 0.5547124161442868, |
| "grad_norm": 0.2781994640827179, |
| "learning_rate": 8e-05, |
| "loss": 1.6597, |
| "step": 2522 |
| }, |
| { |
| "epoch": 0.5549323655559222, |
| "grad_norm": 0.2919979393482208, |
| "learning_rate": 8e-05, |
| "loss": 1.715, |
| "step": 2523 |
| }, |
| { |
| "epoch": 0.5551523149675575, |
| "grad_norm": 0.27563661336898804, |
| "learning_rate": 8e-05, |
| "loss": 1.6129, |
| "step": 2524 |
| }, |
| { |
| "epoch": 0.5553722643791927, |
| "grad_norm": 0.3070942163467407, |
| "learning_rate": 8e-05, |
| "loss": 1.875, |
| "step": 2525 |
| }, |
| { |
| "epoch": 0.5555922137908281, |
| "grad_norm": 0.278039813041687, |
| "learning_rate": 8e-05, |
| "loss": 1.6894, |
| "step": 2526 |
| }, |
| { |
| "epoch": 0.5558121632024634, |
| "grad_norm": 0.2709571421146393, |
| "learning_rate": 8e-05, |
| "loss": 1.5268, |
| "step": 2527 |
| }, |
| { |
| "epoch": 0.5560321126140988, |
| "grad_norm": 0.27659523487091064, |
| "learning_rate": 8e-05, |
| "loss": 1.5817, |
| "step": 2528 |
| }, |
| { |
| "epoch": 0.5562520620257341, |
| "grad_norm": 0.33376970887184143, |
| "learning_rate": 8e-05, |
| "loss": 1.6275, |
| "step": 2529 |
| }, |
| { |
| "epoch": 0.5564720114373694, |
| "grad_norm": 0.28663134574890137, |
| "learning_rate": 8e-05, |
| "loss": 1.6497, |
| "step": 2530 |
| }, |
| { |
| "epoch": 0.5566919608490047, |
| "grad_norm": 0.27400556206703186, |
| "learning_rate": 8e-05, |
| "loss": 1.6768, |
| "step": 2531 |
| }, |
| { |
| "epoch": 0.55691191026064, |
| "grad_norm": 0.3359694182872772, |
| "learning_rate": 8e-05, |
| "loss": 1.7628, |
| "step": 2532 |
| }, |
| { |
| "epoch": 0.5571318596722754, |
| "grad_norm": 0.3009445071220398, |
| "learning_rate": 8e-05, |
| "loss": 1.6368, |
| "step": 2533 |
| }, |
| { |
| "epoch": 0.5573518090839107, |
| "grad_norm": 0.2951606512069702, |
| "learning_rate": 8e-05, |
| "loss": 1.6468, |
| "step": 2534 |
| }, |
| { |
| "epoch": 0.5575717584955461, |
| "grad_norm": 0.298835426568985, |
| "learning_rate": 8e-05, |
| "loss": 1.679, |
| "step": 2535 |
| }, |
| { |
| "epoch": 0.5577917079071814, |
| "grad_norm": 0.29196399450302124, |
| "learning_rate": 8e-05, |
| "loss": 1.6865, |
| "step": 2536 |
| }, |
| { |
| "epoch": 0.5580116573188166, |
| "grad_norm": 0.3057127296924591, |
| "learning_rate": 8e-05, |
| "loss": 1.7979, |
| "step": 2537 |
| }, |
| { |
| "epoch": 0.558231606730452, |
| "grad_norm": 0.3170565664768219, |
| "learning_rate": 8e-05, |
| "loss": 1.7815, |
| "step": 2538 |
| }, |
| { |
| "epoch": 0.5584515561420873, |
| "grad_norm": 0.28287273645401, |
| "learning_rate": 8e-05, |
| "loss": 1.7151, |
| "step": 2539 |
| }, |
| { |
| "epoch": 0.5586715055537227, |
| "grad_norm": 0.30313780903816223, |
| "learning_rate": 8e-05, |
| "loss": 1.6501, |
| "step": 2540 |
| }, |
| { |
| "epoch": 0.558891454965358, |
| "grad_norm": 0.28195586800575256, |
| "learning_rate": 8e-05, |
| "loss": 1.7281, |
| "step": 2541 |
| }, |
| { |
| "epoch": 0.5591114043769932, |
| "grad_norm": 0.2734014391899109, |
| "learning_rate": 8e-05, |
| "loss": 1.6504, |
| "step": 2542 |
| }, |
| { |
| "epoch": 0.5593313537886286, |
| "grad_norm": 0.28178513050079346, |
| "learning_rate": 8e-05, |
| "loss": 1.5946, |
| "step": 2543 |
| }, |
| { |
| "epoch": 0.5595513032002639, |
| "grad_norm": 0.2800062894821167, |
| "learning_rate": 8e-05, |
| "loss": 1.7498, |
| "step": 2544 |
| }, |
| { |
| "epoch": 0.5597712526118993, |
| "grad_norm": 0.28368762135505676, |
| "learning_rate": 8e-05, |
| "loss": 1.6732, |
| "step": 2545 |
| }, |
| { |
| "epoch": 0.5599912020235346, |
| "grad_norm": 0.3069396913051605, |
| "learning_rate": 8e-05, |
| "loss": 1.7647, |
| "step": 2546 |
| }, |
| { |
| "epoch": 0.5602111514351699, |
| "grad_norm": 0.27336394786834717, |
| "learning_rate": 8e-05, |
| "loss": 1.4557, |
| "step": 2547 |
| }, |
| { |
| "epoch": 0.5604311008468053, |
| "grad_norm": 0.28363245725631714, |
| "learning_rate": 8e-05, |
| "loss": 1.7301, |
| "step": 2548 |
| }, |
| { |
| "epoch": 0.5606510502584405, |
| "grad_norm": 0.3097067177295685, |
| "learning_rate": 8e-05, |
| "loss": 1.7322, |
| "step": 2549 |
| }, |
| { |
| "epoch": 0.5608709996700759, |
| "grad_norm": 0.28125154972076416, |
| "learning_rate": 8e-05, |
| "loss": 1.5551, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.5610909490817112, |
| "grad_norm": 0.3111821413040161, |
| "learning_rate": 8e-05, |
| "loss": 1.8965, |
| "step": 2551 |
| }, |
| { |
| "epoch": 0.5613108984933465, |
| "grad_norm": 0.2920529842376709, |
| "learning_rate": 8e-05, |
| "loss": 1.6835, |
| "step": 2552 |
| }, |
| { |
| "epoch": 0.5615308479049819, |
| "grad_norm": 0.27278631925582886, |
| "learning_rate": 8e-05, |
| "loss": 1.7175, |
| "step": 2553 |
| }, |
| { |
| "epoch": 0.5617507973166171, |
| "grad_norm": 0.2742355763912201, |
| "learning_rate": 8e-05, |
| "loss": 1.6745, |
| "step": 2554 |
| }, |
| { |
| "epoch": 0.5619707467282525, |
| "grad_norm": 0.2675003707408905, |
| "learning_rate": 8e-05, |
| "loss": 1.5771, |
| "step": 2555 |
| }, |
| { |
| "epoch": 0.5621906961398878, |
| "grad_norm": 0.2805350422859192, |
| "learning_rate": 8e-05, |
| "loss": 1.7503, |
| "step": 2556 |
| }, |
| { |
| "epoch": 0.5624106455515232, |
| "grad_norm": 0.27205830812454224, |
| "learning_rate": 8e-05, |
| "loss": 1.6591, |
| "step": 2557 |
| }, |
| { |
| "epoch": 0.5626305949631585, |
| "grad_norm": 0.26984983682632446, |
| "learning_rate": 8e-05, |
| "loss": 1.6351, |
| "step": 2558 |
| }, |
| { |
| "epoch": 0.5628505443747938, |
| "grad_norm": 0.3067481517791748, |
| "learning_rate": 8e-05, |
| "loss": 1.5304, |
| "step": 2559 |
| }, |
| { |
| "epoch": 0.5630704937864291, |
| "grad_norm": 0.28945624828338623, |
| "learning_rate": 8e-05, |
| "loss": 1.7099, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.5632904431980644, |
| "grad_norm": 0.269144743680954, |
| "learning_rate": 8e-05, |
| "loss": 1.557, |
| "step": 2561 |
| }, |
| { |
| "epoch": 0.5635103926096998, |
| "grad_norm": 0.329520583152771, |
| "learning_rate": 8e-05, |
| "loss": 1.7867, |
| "step": 2562 |
| }, |
| { |
| "epoch": 0.5637303420213351, |
| "grad_norm": 0.35944700241088867, |
| "learning_rate": 8e-05, |
| "loss": 1.8146, |
| "step": 2563 |
| }, |
| { |
| "epoch": 0.5639502914329704, |
| "grad_norm": 0.30693116784095764, |
| "learning_rate": 8e-05, |
| "loss": 1.7709, |
| "step": 2564 |
| }, |
| { |
| "epoch": 0.5641702408446058, |
| "grad_norm": 0.31814631819725037, |
| "learning_rate": 8e-05, |
| "loss": 1.8679, |
| "step": 2565 |
| }, |
| { |
| "epoch": 0.564390190256241, |
| "grad_norm": 0.2988479435443878, |
| "learning_rate": 8e-05, |
| "loss": 1.7675, |
| "step": 2566 |
| }, |
| { |
| "epoch": 0.5646101396678764, |
| "grad_norm": 0.2955850064754486, |
| "learning_rate": 8e-05, |
| "loss": 1.7082, |
| "step": 2567 |
| }, |
| { |
| "epoch": 0.5648300890795117, |
| "grad_norm": 0.27773404121398926, |
| "learning_rate": 8e-05, |
| "loss": 1.5378, |
| "step": 2568 |
| }, |
| { |
| "epoch": 0.565050038491147, |
| "grad_norm": 0.2847524583339691, |
| "learning_rate": 8e-05, |
| "loss": 1.792, |
| "step": 2569 |
| }, |
| { |
| "epoch": 0.5652699879027824, |
| "grad_norm": 0.29024967551231384, |
| "learning_rate": 8e-05, |
| "loss": 1.6185, |
| "step": 2570 |
| }, |
| { |
| "epoch": 0.5654899373144177, |
| "grad_norm": 0.27534323930740356, |
| "learning_rate": 8e-05, |
| "loss": 1.7044, |
| "step": 2571 |
| }, |
| { |
| "epoch": 0.565709886726053, |
| "grad_norm": 0.28059902787208557, |
| "learning_rate": 8e-05, |
| "loss": 1.7199, |
| "step": 2572 |
| }, |
| { |
| "epoch": 0.5659298361376883, |
| "grad_norm": 0.29140958189964294, |
| "learning_rate": 8e-05, |
| "loss": 1.6534, |
| "step": 2573 |
| }, |
| { |
| "epoch": 0.5661497855493236, |
| "grad_norm": 0.303821861743927, |
| "learning_rate": 8e-05, |
| "loss": 1.7163, |
| "step": 2574 |
| }, |
| { |
| "epoch": 0.566369734960959, |
| "grad_norm": 0.3073093295097351, |
| "learning_rate": 8e-05, |
| "loss": 1.7885, |
| "step": 2575 |
| }, |
| { |
| "epoch": 0.5665896843725943, |
| "grad_norm": 0.2976214289665222, |
| "learning_rate": 8e-05, |
| "loss": 1.7059, |
| "step": 2576 |
| }, |
| { |
| "epoch": 0.5668096337842297, |
| "grad_norm": 0.3081284761428833, |
| "learning_rate": 8e-05, |
| "loss": 1.5529, |
| "step": 2577 |
| }, |
| { |
| "epoch": 0.5670295831958649, |
| "grad_norm": 0.2893354594707489, |
| "learning_rate": 8e-05, |
| "loss": 1.8564, |
| "step": 2578 |
| }, |
| { |
| "epoch": 0.5672495326075003, |
| "grad_norm": 0.2904176115989685, |
| "learning_rate": 8e-05, |
| "loss": 1.6903, |
| "step": 2579 |
| }, |
| { |
| "epoch": 0.5674694820191356, |
| "grad_norm": 0.2907819449901581, |
| "learning_rate": 8e-05, |
| "loss": 1.5663, |
| "step": 2580 |
| }, |
| { |
| "epoch": 0.5676894314307709, |
| "grad_norm": 0.27938172221183777, |
| "learning_rate": 8e-05, |
| "loss": 1.6031, |
| "step": 2581 |
| }, |
| { |
| "epoch": 0.5679093808424063, |
| "grad_norm": 0.28864786028862, |
| "learning_rate": 8e-05, |
| "loss": 1.666, |
| "step": 2582 |
| }, |
| { |
| "epoch": 0.5681293302540416, |
| "grad_norm": 0.29587891697883606, |
| "learning_rate": 8e-05, |
| "loss": 1.7545, |
| "step": 2583 |
| }, |
| { |
| "epoch": 0.5683492796656769, |
| "grad_norm": 0.26541203260421753, |
| "learning_rate": 8e-05, |
| "loss": 1.6059, |
| "step": 2584 |
| }, |
| { |
| "epoch": 0.5685692290773122, |
| "grad_norm": 0.2819576561450958, |
| "learning_rate": 8e-05, |
| "loss": 1.7227, |
| "step": 2585 |
| }, |
| { |
| "epoch": 0.5687891784889475, |
| "grad_norm": 0.2920463979244232, |
| "learning_rate": 8e-05, |
| "loss": 1.7553, |
| "step": 2586 |
| }, |
| { |
| "epoch": 0.5690091279005829, |
| "grad_norm": 0.29490089416503906, |
| "learning_rate": 8e-05, |
| "loss": 1.7117, |
| "step": 2587 |
| }, |
| { |
| "epoch": 0.5692290773122182, |
| "grad_norm": 0.29847970604896545, |
| "learning_rate": 8e-05, |
| "loss": 1.8931, |
| "step": 2588 |
| }, |
| { |
| "epoch": 0.5694490267238536, |
| "grad_norm": 0.28575995564460754, |
| "learning_rate": 8e-05, |
| "loss": 1.595, |
| "step": 2589 |
| }, |
| { |
| "epoch": 0.5696689761354888, |
| "grad_norm": 0.28053271770477295, |
| "learning_rate": 8e-05, |
| "loss": 1.6089, |
| "step": 2590 |
| }, |
| { |
| "epoch": 0.5698889255471241, |
| "grad_norm": 0.27538979053497314, |
| "learning_rate": 8e-05, |
| "loss": 1.7808, |
| "step": 2591 |
| }, |
| { |
| "epoch": 0.5701088749587595, |
| "grad_norm": 0.2819748520851135, |
| "learning_rate": 8e-05, |
| "loss": 1.7355, |
| "step": 2592 |
| }, |
| { |
| "epoch": 0.5703288243703948, |
| "grad_norm": 0.3023085594177246, |
| "learning_rate": 8e-05, |
| "loss": 1.759, |
| "step": 2593 |
| }, |
| { |
| "epoch": 0.5705487737820302, |
| "grad_norm": 0.28369995951652527, |
| "learning_rate": 8e-05, |
| "loss": 1.7796, |
| "step": 2594 |
| }, |
| { |
| "epoch": 0.5707687231936655, |
| "grad_norm": 0.27062156796455383, |
| "learning_rate": 8e-05, |
| "loss": 1.6206, |
| "step": 2595 |
| }, |
| { |
| "epoch": 0.5709886726053007, |
| "grad_norm": 0.2928752303123474, |
| "learning_rate": 8e-05, |
| "loss": 1.7578, |
| "step": 2596 |
| }, |
| { |
| "epoch": 0.5712086220169361, |
| "grad_norm": 0.28366369009017944, |
| "learning_rate": 8e-05, |
| "loss": 1.6367, |
| "step": 2597 |
| }, |
| { |
| "epoch": 0.5714285714285714, |
| "grad_norm": 0.2794798016548157, |
| "learning_rate": 8e-05, |
| "loss": 1.7006, |
| "step": 2598 |
| }, |
| { |
| "epoch": 0.5716485208402068, |
| "grad_norm": 0.278814435005188, |
| "learning_rate": 8e-05, |
| "loss": 1.6883, |
| "step": 2599 |
| }, |
| { |
| "epoch": 0.5718684702518421, |
| "grad_norm": 0.28789058327674866, |
| "learning_rate": 8e-05, |
| "loss": 1.76, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.5720884196634775, |
| "grad_norm": 0.289120614528656, |
| "learning_rate": 8e-05, |
| "loss": 1.5449, |
| "step": 2601 |
| }, |
| { |
| "epoch": 0.5723083690751127, |
| "grad_norm": 0.27491265535354614, |
| "learning_rate": 8e-05, |
| "loss": 1.6287, |
| "step": 2602 |
| }, |
| { |
| "epoch": 0.572528318486748, |
| "grad_norm": 0.2837536931037903, |
| "learning_rate": 8e-05, |
| "loss": 1.6618, |
| "step": 2603 |
| }, |
| { |
| "epoch": 0.5727482678983834, |
| "grad_norm": 0.27386194467544556, |
| "learning_rate": 8e-05, |
| "loss": 1.5815, |
| "step": 2604 |
| }, |
| { |
| "epoch": 0.5729682173100187, |
| "grad_norm": 0.2818918228149414, |
| "learning_rate": 8e-05, |
| "loss": 1.511, |
| "step": 2605 |
| }, |
| { |
| "epoch": 0.5731881667216541, |
| "grad_norm": 0.29329514503479004, |
| "learning_rate": 8e-05, |
| "loss": 1.5494, |
| "step": 2606 |
| }, |
| { |
| "epoch": 0.5734081161332893, |
| "grad_norm": 0.29942408204078674, |
| "learning_rate": 8e-05, |
| "loss": 1.8049, |
| "step": 2607 |
| }, |
| { |
| "epoch": 0.5736280655449246, |
| "grad_norm": 0.30527159571647644, |
| "learning_rate": 8e-05, |
| "loss": 1.8735, |
| "step": 2608 |
| }, |
| { |
| "epoch": 0.57384801495656, |
| "grad_norm": 0.2842453122138977, |
| "learning_rate": 8e-05, |
| "loss": 1.7124, |
| "step": 2609 |
| }, |
| { |
| "epoch": 0.5740679643681953, |
| "grad_norm": 0.3305295407772064, |
| "learning_rate": 8e-05, |
| "loss": 1.6555, |
| "step": 2610 |
| }, |
| { |
| "epoch": 0.5742879137798307, |
| "grad_norm": 0.28134140372276306, |
| "learning_rate": 8e-05, |
| "loss": 1.639, |
| "step": 2611 |
| }, |
| { |
| "epoch": 0.574507863191466, |
| "grad_norm": 0.2862444818019867, |
| "learning_rate": 8e-05, |
| "loss": 1.7949, |
| "step": 2612 |
| }, |
| { |
| "epoch": 0.5747278126031012, |
| "grad_norm": 0.3089071214199066, |
| "learning_rate": 8e-05, |
| "loss": 1.7782, |
| "step": 2613 |
| }, |
| { |
| "epoch": 0.5749477620147366, |
| "grad_norm": 0.3113284111022949, |
| "learning_rate": 8e-05, |
| "loss": 1.5588, |
| "step": 2614 |
| }, |
| { |
| "epoch": 0.5751677114263719, |
| "grad_norm": 0.2865052819252014, |
| "learning_rate": 8e-05, |
| "loss": 1.6919, |
| "step": 2615 |
| }, |
| { |
| "epoch": 0.5753876608380073, |
| "grad_norm": 0.26997220516204834, |
| "learning_rate": 8e-05, |
| "loss": 1.6853, |
| "step": 2616 |
| }, |
| { |
| "epoch": 0.5756076102496426, |
| "grad_norm": 0.3056239187717438, |
| "learning_rate": 8e-05, |
| "loss": 1.7971, |
| "step": 2617 |
| }, |
| { |
| "epoch": 0.5758275596612779, |
| "grad_norm": 0.3041035234928131, |
| "learning_rate": 8e-05, |
| "loss": 1.7889, |
| "step": 2618 |
| }, |
| { |
| "epoch": 0.5760475090729132, |
| "grad_norm": 0.2829764187335968, |
| "learning_rate": 8e-05, |
| "loss": 1.6371, |
| "step": 2619 |
| }, |
| { |
| "epoch": 0.5762674584845485, |
| "grad_norm": 0.29050111770629883, |
| "learning_rate": 8e-05, |
| "loss": 1.8909, |
| "step": 2620 |
| }, |
| { |
| "epoch": 0.5764874078961839, |
| "grad_norm": 0.29888811707496643, |
| "learning_rate": 8e-05, |
| "loss": 1.6974, |
| "step": 2621 |
| }, |
| { |
| "epoch": 0.5767073573078192, |
| "grad_norm": 0.3193587362766266, |
| "learning_rate": 8e-05, |
| "loss": 1.7083, |
| "step": 2622 |
| }, |
| { |
| "epoch": 0.5769273067194546, |
| "grad_norm": 0.2855699360370636, |
| "learning_rate": 8e-05, |
| "loss": 1.6106, |
| "step": 2623 |
| }, |
| { |
| "epoch": 0.5771472561310899, |
| "grad_norm": 0.29608815908432007, |
| "learning_rate": 8e-05, |
| "loss": 1.7813, |
| "step": 2624 |
| }, |
| { |
| "epoch": 0.5773672055427251, |
| "grad_norm": 0.2846873700618744, |
| "learning_rate": 8e-05, |
| "loss": 1.6164, |
| "step": 2625 |
| }, |
| { |
| "epoch": 0.5775871549543605, |
| "grad_norm": 0.3074873983860016, |
| "learning_rate": 8e-05, |
| "loss": 1.7278, |
| "step": 2626 |
| }, |
| { |
| "epoch": 0.5778071043659958, |
| "grad_norm": 0.3016159236431122, |
| "learning_rate": 8e-05, |
| "loss": 1.6286, |
| "step": 2627 |
| }, |
| { |
| "epoch": 0.5780270537776312, |
| "grad_norm": 0.28926798701286316, |
| "learning_rate": 8e-05, |
| "loss": 1.746, |
| "step": 2628 |
| }, |
| { |
| "epoch": 0.5782470031892665, |
| "grad_norm": 0.3222711682319641, |
| "learning_rate": 8e-05, |
| "loss": 1.8108, |
| "step": 2629 |
| }, |
| { |
| "epoch": 0.5784669526009018, |
| "grad_norm": 0.30052945017814636, |
| "learning_rate": 8e-05, |
| "loss": 1.7374, |
| "step": 2630 |
| }, |
| { |
| "epoch": 0.5786869020125371, |
| "grad_norm": 0.2880706787109375, |
| "learning_rate": 8e-05, |
| "loss": 1.627, |
| "step": 2631 |
| }, |
| { |
| "epoch": 0.5789068514241724, |
| "grad_norm": 0.30028629302978516, |
| "learning_rate": 8e-05, |
| "loss": 1.8345, |
| "step": 2632 |
| }, |
| { |
| "epoch": 0.5791268008358078, |
| "grad_norm": 0.3164263665676117, |
| "learning_rate": 8e-05, |
| "loss": 1.9713, |
| "step": 2633 |
| }, |
| { |
| "epoch": 0.5793467502474431, |
| "grad_norm": 0.294114887714386, |
| "learning_rate": 8e-05, |
| "loss": 1.6083, |
| "step": 2634 |
| }, |
| { |
| "epoch": 0.5795666996590784, |
| "grad_norm": 0.31809002161026, |
| "learning_rate": 8e-05, |
| "loss": 1.794, |
| "step": 2635 |
| }, |
| { |
| "epoch": 0.5797866490707138, |
| "grad_norm": 0.3005049526691437, |
| "learning_rate": 8e-05, |
| "loss": 1.469, |
| "step": 2636 |
| }, |
| { |
| "epoch": 0.580006598482349, |
| "grad_norm": 0.2874310314655304, |
| "learning_rate": 8e-05, |
| "loss": 1.7345, |
| "step": 2637 |
| }, |
| { |
| "epoch": 0.5802265478939844, |
| "grad_norm": 0.295523077249527, |
| "learning_rate": 8e-05, |
| "loss": 1.6995, |
| "step": 2638 |
| }, |
| { |
| "epoch": 0.5804464973056197, |
| "grad_norm": 0.29120928049087524, |
| "learning_rate": 8e-05, |
| "loss": 1.7736, |
| "step": 2639 |
| }, |
| { |
| "epoch": 0.580666446717255, |
| "grad_norm": 0.2916790246963501, |
| "learning_rate": 8e-05, |
| "loss": 1.74, |
| "step": 2640 |
| }, |
| { |
| "epoch": 0.5808863961288904, |
| "grad_norm": 0.285230427980423, |
| "learning_rate": 8e-05, |
| "loss": 1.7685, |
| "step": 2641 |
| }, |
| { |
| "epoch": 0.5811063455405256, |
| "grad_norm": 0.2743189334869385, |
| "learning_rate": 8e-05, |
| "loss": 1.6751, |
| "step": 2642 |
| }, |
| { |
| "epoch": 0.581326294952161, |
| "grad_norm": 0.2997332811355591, |
| "learning_rate": 8e-05, |
| "loss": 1.5959, |
| "step": 2643 |
| }, |
| { |
| "epoch": 0.5815462443637963, |
| "grad_norm": 0.28394201397895813, |
| "learning_rate": 8e-05, |
| "loss": 1.6288, |
| "step": 2644 |
| }, |
| { |
| "epoch": 0.5817661937754317, |
| "grad_norm": 0.2787470519542694, |
| "learning_rate": 8e-05, |
| "loss": 1.7496, |
| "step": 2645 |
| }, |
| { |
| "epoch": 0.581986143187067, |
| "grad_norm": 0.2853599488735199, |
| "learning_rate": 8e-05, |
| "loss": 1.6439, |
| "step": 2646 |
| }, |
| { |
| "epoch": 0.5822060925987023, |
| "grad_norm": 0.2939299941062927, |
| "learning_rate": 8e-05, |
| "loss": 1.7293, |
| "step": 2647 |
| }, |
| { |
| "epoch": 0.5824260420103377, |
| "grad_norm": 0.27831408381462097, |
| "learning_rate": 8e-05, |
| "loss": 1.6748, |
| "step": 2648 |
| }, |
| { |
| "epoch": 0.5826459914219729, |
| "grad_norm": 0.296762615442276, |
| "learning_rate": 8e-05, |
| "loss": 1.6735, |
| "step": 2649 |
| }, |
| { |
| "epoch": 0.5828659408336083, |
| "grad_norm": 0.27961719036102295, |
| "learning_rate": 8e-05, |
| "loss": 1.6837, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.5830858902452436, |
| "grad_norm": 0.27915704250335693, |
| "learning_rate": 8e-05, |
| "loss": 1.6745, |
| "step": 2651 |
| }, |
| { |
| "epoch": 0.5833058396568789, |
| "grad_norm": 0.273799329996109, |
| "learning_rate": 8e-05, |
| "loss": 1.5609, |
| "step": 2652 |
| }, |
| { |
| "epoch": 0.5835257890685143, |
| "grad_norm": 0.287383109331131, |
| "learning_rate": 8e-05, |
| "loss": 1.7569, |
| "step": 2653 |
| }, |
| { |
| "epoch": 0.5837457384801495, |
| "grad_norm": 0.27745500206947327, |
| "learning_rate": 8e-05, |
| "loss": 1.6093, |
| "step": 2654 |
| }, |
| { |
| "epoch": 0.5839656878917849, |
| "grad_norm": 0.2954557240009308, |
| "learning_rate": 8e-05, |
| "loss": 1.788, |
| "step": 2655 |
| }, |
| { |
| "epoch": 0.5841856373034202, |
| "grad_norm": 0.28464850783348083, |
| "learning_rate": 8e-05, |
| "loss": 1.7079, |
| "step": 2656 |
| }, |
| { |
| "epoch": 0.5844055867150555, |
| "grad_norm": 0.27475497126579285, |
| "learning_rate": 8e-05, |
| "loss": 1.6137, |
| "step": 2657 |
| }, |
| { |
| "epoch": 0.5846255361266909, |
| "grad_norm": 0.27928462624549866, |
| "learning_rate": 8e-05, |
| "loss": 1.5776, |
| "step": 2658 |
| }, |
| { |
| "epoch": 0.5848454855383262, |
| "grad_norm": 0.2889251708984375, |
| "learning_rate": 8e-05, |
| "loss": 1.7871, |
| "step": 2659 |
| }, |
| { |
| "epoch": 0.5850654349499615, |
| "grad_norm": 0.29489466547966003, |
| "learning_rate": 8e-05, |
| "loss": 1.7299, |
| "step": 2660 |
| }, |
| { |
| "epoch": 0.5852853843615968, |
| "grad_norm": 0.27761825919151306, |
| "learning_rate": 8e-05, |
| "loss": 1.6772, |
| "step": 2661 |
| }, |
| { |
| "epoch": 0.5855053337732321, |
| "grad_norm": 0.2886674702167511, |
| "learning_rate": 8e-05, |
| "loss": 1.6718, |
| "step": 2662 |
| }, |
| { |
| "epoch": 0.5857252831848675, |
| "grad_norm": 0.2736080586910248, |
| "learning_rate": 8e-05, |
| "loss": 1.5834, |
| "step": 2663 |
| }, |
| { |
| "epoch": 0.5859452325965028, |
| "grad_norm": 0.29493847489356995, |
| "learning_rate": 8e-05, |
| "loss": 1.7123, |
| "step": 2664 |
| }, |
| { |
| "epoch": 0.5861651820081382, |
| "grad_norm": 0.2919282615184784, |
| "learning_rate": 8e-05, |
| "loss": 1.7192, |
| "step": 2665 |
| }, |
| { |
| "epoch": 0.5863851314197734, |
| "grad_norm": 0.2883647680282593, |
| "learning_rate": 8e-05, |
| "loss": 1.6271, |
| "step": 2666 |
| }, |
| { |
| "epoch": 0.5866050808314087, |
| "grad_norm": 0.2852446734905243, |
| "learning_rate": 8e-05, |
| "loss": 1.7295, |
| "step": 2667 |
| }, |
| { |
| "epoch": 0.5868250302430441, |
| "grad_norm": 0.3113778531551361, |
| "learning_rate": 8e-05, |
| "loss": 1.6605, |
| "step": 2668 |
| }, |
| { |
| "epoch": 0.5870449796546794, |
| "grad_norm": 0.2629379630088806, |
| "learning_rate": 8e-05, |
| "loss": 1.3457, |
| "step": 2669 |
| }, |
| { |
| "epoch": 0.5872649290663148, |
| "grad_norm": 0.28648287057876587, |
| "learning_rate": 8e-05, |
| "loss": 1.7137, |
| "step": 2670 |
| }, |
| { |
| "epoch": 0.5874848784779501, |
| "grad_norm": 0.30140426754951477, |
| "learning_rate": 8e-05, |
| "loss": 1.7612, |
| "step": 2671 |
| }, |
| { |
| "epoch": 0.5877048278895854, |
| "grad_norm": 0.29059261083602905, |
| "learning_rate": 8e-05, |
| "loss": 1.8526, |
| "step": 2672 |
| }, |
| { |
| "epoch": 0.5879247773012207, |
| "grad_norm": 0.2913878560066223, |
| "learning_rate": 8e-05, |
| "loss": 1.7214, |
| "step": 2673 |
| }, |
| { |
| "epoch": 0.588144726712856, |
| "grad_norm": 0.3046487271785736, |
| "learning_rate": 8e-05, |
| "loss": 1.8342, |
| "step": 2674 |
| }, |
| { |
| "epoch": 0.5883646761244914, |
| "grad_norm": 0.2699670195579529, |
| "learning_rate": 8e-05, |
| "loss": 1.6057, |
| "step": 2675 |
| }, |
| { |
| "epoch": 0.5885846255361267, |
| "grad_norm": 0.2722747027873993, |
| "learning_rate": 8e-05, |
| "loss": 1.5067, |
| "step": 2676 |
| }, |
| { |
| "epoch": 0.5888045749477621, |
| "grad_norm": 0.27758973836898804, |
| "learning_rate": 8e-05, |
| "loss": 1.613, |
| "step": 2677 |
| }, |
| { |
| "epoch": 0.5890245243593973, |
| "grad_norm": 0.30234992504119873, |
| "learning_rate": 8e-05, |
| "loss": 1.7266, |
| "step": 2678 |
| }, |
| { |
| "epoch": 0.5892444737710326, |
| "grad_norm": 0.3146234452724457, |
| "learning_rate": 8e-05, |
| "loss": 1.6588, |
| "step": 2679 |
| }, |
| { |
| "epoch": 0.589464423182668, |
| "grad_norm": 0.2867683470249176, |
| "learning_rate": 8e-05, |
| "loss": 1.6726, |
| "step": 2680 |
| }, |
| { |
| "epoch": 0.5896843725943033, |
| "grad_norm": 0.28295040130615234, |
| "learning_rate": 8e-05, |
| "loss": 1.7338, |
| "step": 2681 |
| }, |
| { |
| "epoch": 0.5899043220059387, |
| "grad_norm": 0.28655725717544556, |
| "learning_rate": 8e-05, |
| "loss": 1.6791, |
| "step": 2682 |
| }, |
| { |
| "epoch": 0.590124271417574, |
| "grad_norm": 0.2897862493991852, |
| "learning_rate": 8e-05, |
| "loss": 1.7127, |
| "step": 2683 |
| }, |
| { |
| "epoch": 0.5903442208292092, |
| "grad_norm": 0.278427392244339, |
| "learning_rate": 8e-05, |
| "loss": 1.7166, |
| "step": 2684 |
| }, |
| { |
| "epoch": 0.5905641702408446, |
| "grad_norm": 0.28383758664131165, |
| "learning_rate": 8e-05, |
| "loss": 1.8498, |
| "step": 2685 |
| }, |
| { |
| "epoch": 0.5907841196524799, |
| "grad_norm": 0.2690020501613617, |
| "learning_rate": 8e-05, |
| "loss": 1.5887, |
| "step": 2686 |
| }, |
| { |
| "epoch": 0.5910040690641153, |
| "grad_norm": 0.2910546362400055, |
| "learning_rate": 8e-05, |
| "loss": 1.7525, |
| "step": 2687 |
| }, |
| { |
| "epoch": 0.5912240184757506, |
| "grad_norm": 0.2932651937007904, |
| "learning_rate": 8e-05, |
| "loss": 1.6113, |
| "step": 2688 |
| }, |
| { |
| "epoch": 0.5914439678873858, |
| "grad_norm": 0.275622695684433, |
| "learning_rate": 8e-05, |
| "loss": 1.6322, |
| "step": 2689 |
| }, |
| { |
| "epoch": 0.5916639172990212, |
| "grad_norm": 0.2838039696216583, |
| "learning_rate": 8e-05, |
| "loss": 1.8021, |
| "step": 2690 |
| }, |
| { |
| "epoch": 0.5918838667106565, |
| "grad_norm": 0.290005087852478, |
| "learning_rate": 8e-05, |
| "loss": 1.6378, |
| "step": 2691 |
| }, |
| { |
| "epoch": 0.5921038161222919, |
| "grad_norm": 0.2730334401130676, |
| "learning_rate": 8e-05, |
| "loss": 1.6665, |
| "step": 2692 |
| }, |
| { |
| "epoch": 0.5923237655339272, |
| "grad_norm": 0.27828192710876465, |
| "learning_rate": 8e-05, |
| "loss": 1.6905, |
| "step": 2693 |
| }, |
| { |
| "epoch": 0.5925437149455626, |
| "grad_norm": 0.26481491327285767, |
| "learning_rate": 8e-05, |
| "loss": 1.5056, |
| "step": 2694 |
| }, |
| { |
| "epoch": 0.5927636643571979, |
| "grad_norm": 0.2684583365917206, |
| "learning_rate": 8e-05, |
| "loss": 1.4181, |
| "step": 2695 |
| }, |
| { |
| "epoch": 0.5929836137688331, |
| "grad_norm": 0.2848527431488037, |
| "learning_rate": 8e-05, |
| "loss": 1.6243, |
| "step": 2696 |
| }, |
| { |
| "epoch": 0.5932035631804685, |
| "grad_norm": 0.2943567931652069, |
| "learning_rate": 8e-05, |
| "loss": 1.7296, |
| "step": 2697 |
| }, |
| { |
| "epoch": 0.5934235125921038, |
| "grad_norm": 0.2790435552597046, |
| "learning_rate": 8e-05, |
| "loss": 1.611, |
| "step": 2698 |
| }, |
| { |
| "epoch": 0.5936434620037392, |
| "grad_norm": 0.3020678460597992, |
| "learning_rate": 8e-05, |
| "loss": 1.8619, |
| "step": 2699 |
| }, |
| { |
| "epoch": 0.5938634114153745, |
| "grad_norm": 0.2809624969959259, |
| "learning_rate": 8e-05, |
| "loss": 1.7253, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.5940833608270097, |
| "grad_norm": 0.2655926048755646, |
| "learning_rate": 8e-05, |
| "loss": 1.5514, |
| "step": 2701 |
| }, |
| { |
| "epoch": 0.5943033102386451, |
| "grad_norm": 0.28663522005081177, |
| "learning_rate": 8e-05, |
| "loss": 1.6708, |
| "step": 2702 |
| }, |
| { |
| "epoch": 0.5945232596502804, |
| "grad_norm": 0.28419819474220276, |
| "learning_rate": 8e-05, |
| "loss": 1.6551, |
| "step": 2703 |
| }, |
| { |
| "epoch": 0.5947432090619158, |
| "grad_norm": 0.29084041714668274, |
| "learning_rate": 8e-05, |
| "loss": 1.7509, |
| "step": 2704 |
| }, |
| { |
| "epoch": 0.5949631584735511, |
| "grad_norm": 0.27892929315567017, |
| "learning_rate": 8e-05, |
| "loss": 1.6507, |
| "step": 2705 |
| }, |
| { |
| "epoch": 0.5951831078851864, |
| "grad_norm": 0.29692748188972473, |
| "learning_rate": 8e-05, |
| "loss": 1.8667, |
| "step": 2706 |
| }, |
| { |
| "epoch": 0.5954030572968217, |
| "grad_norm": 0.2867085933685303, |
| "learning_rate": 8e-05, |
| "loss": 1.6157, |
| "step": 2707 |
| }, |
| { |
| "epoch": 0.595623006708457, |
| "grad_norm": 0.29867735505104065, |
| "learning_rate": 8e-05, |
| "loss": 1.7813, |
| "step": 2708 |
| }, |
| { |
| "epoch": 0.5958429561200924, |
| "grad_norm": 0.28061944246292114, |
| "learning_rate": 8e-05, |
| "loss": 1.6157, |
| "step": 2709 |
| }, |
| { |
| "epoch": 0.5960629055317277, |
| "grad_norm": 0.2807196080684662, |
| "learning_rate": 8e-05, |
| "loss": 1.5745, |
| "step": 2710 |
| }, |
| { |
| "epoch": 0.596282854943363, |
| "grad_norm": 0.2854728698730469, |
| "learning_rate": 8e-05, |
| "loss": 1.78, |
| "step": 2711 |
| }, |
| { |
| "epoch": 0.5965028043549984, |
| "grad_norm": 0.2980540990829468, |
| "learning_rate": 8e-05, |
| "loss": 1.8421, |
| "step": 2712 |
| }, |
| { |
| "epoch": 0.5967227537666336, |
| "grad_norm": 0.2892910838127136, |
| "learning_rate": 8e-05, |
| "loss": 1.6555, |
| "step": 2713 |
| }, |
| { |
| "epoch": 0.596942703178269, |
| "grad_norm": 0.2773078680038452, |
| "learning_rate": 8e-05, |
| "loss": 1.609, |
| "step": 2714 |
| }, |
| { |
| "epoch": 0.5971626525899043, |
| "grad_norm": 0.29283806681632996, |
| "learning_rate": 8e-05, |
| "loss": 1.7398, |
| "step": 2715 |
| }, |
| { |
| "epoch": 0.5973826020015397, |
| "grad_norm": 0.2872734069824219, |
| "learning_rate": 8e-05, |
| "loss": 1.6864, |
| "step": 2716 |
| }, |
| { |
| "epoch": 0.597602551413175, |
| "grad_norm": 0.26770031452178955, |
| "learning_rate": 8e-05, |
| "loss": 1.5877, |
| "step": 2717 |
| }, |
| { |
| "epoch": 0.5978225008248103, |
| "grad_norm": 0.2958748936653137, |
| "learning_rate": 8e-05, |
| "loss": 1.6565, |
| "step": 2718 |
| }, |
| { |
| "epoch": 0.5980424502364456, |
| "grad_norm": 0.30203044414520264, |
| "learning_rate": 8e-05, |
| "loss": 1.7878, |
| "step": 2719 |
| }, |
| { |
| "epoch": 0.5982623996480809, |
| "grad_norm": 0.29320842027664185, |
| "learning_rate": 8e-05, |
| "loss": 1.5706, |
| "step": 2720 |
| }, |
| { |
| "epoch": 0.5984823490597163, |
| "grad_norm": 0.29835638403892517, |
| "learning_rate": 8e-05, |
| "loss": 1.7271, |
| "step": 2721 |
| }, |
| { |
| "epoch": 0.5987022984713516, |
| "grad_norm": 0.36251741647720337, |
| "learning_rate": 8e-05, |
| "loss": 1.5832, |
| "step": 2722 |
| }, |
| { |
| "epoch": 0.5989222478829869, |
| "grad_norm": 0.28875645995140076, |
| "learning_rate": 8e-05, |
| "loss": 1.8148, |
| "step": 2723 |
| }, |
| { |
| "epoch": 0.5991421972946223, |
| "grad_norm": 0.27607399225234985, |
| "learning_rate": 8e-05, |
| "loss": 1.6024, |
| "step": 2724 |
| }, |
| { |
| "epoch": 0.5993621467062575, |
| "grad_norm": 0.290351539850235, |
| "learning_rate": 8e-05, |
| "loss": 1.7287, |
| "step": 2725 |
| }, |
| { |
| "epoch": 0.5995820961178929, |
| "grad_norm": 0.28432413935661316, |
| "learning_rate": 8e-05, |
| "loss": 1.8603, |
| "step": 2726 |
| }, |
| { |
| "epoch": 0.5998020455295282, |
| "grad_norm": 0.2780609130859375, |
| "learning_rate": 8e-05, |
| "loss": 1.6839, |
| "step": 2727 |
| }, |
| { |
| "epoch": 0.6000219949411635, |
| "grad_norm": 0.31952062249183655, |
| "learning_rate": 8e-05, |
| "loss": 1.585, |
| "step": 2728 |
| }, |
| { |
| "epoch": 0.6002419443527989, |
| "grad_norm": 0.2631243169307709, |
| "learning_rate": 8e-05, |
| "loss": 1.6074, |
| "step": 2729 |
| }, |
| { |
| "epoch": 0.6004618937644342, |
| "grad_norm": 0.28518691658973694, |
| "learning_rate": 8e-05, |
| "loss": 1.6944, |
| "step": 2730 |
| }, |
| { |
| "epoch": 0.6006818431760695, |
| "grad_norm": 0.29021504521369934, |
| "learning_rate": 8e-05, |
| "loss": 1.5919, |
| "step": 2731 |
| }, |
| { |
| "epoch": 0.6009017925877048, |
| "grad_norm": 0.2772546410560608, |
| "learning_rate": 8e-05, |
| "loss": 1.6372, |
| "step": 2732 |
| }, |
| { |
| "epoch": 0.6011217419993401, |
| "grad_norm": 0.27938538789749146, |
| "learning_rate": 8e-05, |
| "loss": 1.7311, |
| "step": 2733 |
| }, |
| { |
| "epoch": 0.6013416914109755, |
| "grad_norm": 0.2936658561229706, |
| "learning_rate": 8e-05, |
| "loss": 1.695, |
| "step": 2734 |
| }, |
| { |
| "epoch": 0.6015616408226108, |
| "grad_norm": 0.2893039286136627, |
| "learning_rate": 8e-05, |
| "loss": 1.6837, |
| "step": 2735 |
| }, |
| { |
| "epoch": 0.6017815902342462, |
| "grad_norm": 0.28634974360466003, |
| "learning_rate": 8e-05, |
| "loss": 1.6155, |
| "step": 2736 |
| }, |
| { |
| "epoch": 0.6020015396458814, |
| "grad_norm": 0.2868409752845764, |
| "learning_rate": 8e-05, |
| "loss": 1.5901, |
| "step": 2737 |
| }, |
| { |
| "epoch": 0.6022214890575168, |
| "grad_norm": 0.28888818621635437, |
| "learning_rate": 8e-05, |
| "loss": 1.6951, |
| "step": 2738 |
| }, |
| { |
| "epoch": 0.6024414384691521, |
| "grad_norm": 0.2881872355937958, |
| "learning_rate": 8e-05, |
| "loss": 1.7648, |
| "step": 2739 |
| }, |
| { |
| "epoch": 0.6026613878807874, |
| "grad_norm": 0.29601114988327026, |
| "learning_rate": 8e-05, |
| "loss": 1.6499, |
| "step": 2740 |
| }, |
| { |
| "epoch": 0.6028813372924228, |
| "grad_norm": 0.28861135244369507, |
| "learning_rate": 8e-05, |
| "loss": 1.6896, |
| "step": 2741 |
| }, |
| { |
| "epoch": 0.603101286704058, |
| "grad_norm": 0.30852892994880676, |
| "learning_rate": 8e-05, |
| "loss": 1.8539, |
| "step": 2742 |
| }, |
| { |
| "epoch": 0.6033212361156934, |
| "grad_norm": 0.2659029960632324, |
| "learning_rate": 8e-05, |
| "loss": 1.5581, |
| "step": 2743 |
| }, |
| { |
| "epoch": 0.6035411855273287, |
| "grad_norm": 0.2938629686832428, |
| "learning_rate": 8e-05, |
| "loss": 1.6893, |
| "step": 2744 |
| }, |
| { |
| "epoch": 0.603761134938964, |
| "grad_norm": 0.3215024769306183, |
| "learning_rate": 8e-05, |
| "loss": 1.809, |
| "step": 2745 |
| }, |
| { |
| "epoch": 0.6039810843505994, |
| "grad_norm": 0.3320122957229614, |
| "learning_rate": 8e-05, |
| "loss": 1.7137, |
| "step": 2746 |
| }, |
| { |
| "epoch": 0.6042010337622347, |
| "grad_norm": 0.2901141047477722, |
| "learning_rate": 8e-05, |
| "loss": 1.7675, |
| "step": 2747 |
| }, |
| { |
| "epoch": 0.60442098317387, |
| "grad_norm": 0.2905901074409485, |
| "learning_rate": 8e-05, |
| "loss": 1.7454, |
| "step": 2748 |
| }, |
| { |
| "epoch": 0.6046409325855053, |
| "grad_norm": 0.2820628583431244, |
| "learning_rate": 8e-05, |
| "loss": 1.7143, |
| "step": 2749 |
| }, |
| { |
| "epoch": 0.6048608819971406, |
| "grad_norm": 0.29754188656806946, |
| "learning_rate": 8e-05, |
| "loss": 1.6957, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.605080831408776, |
| "grad_norm": 0.28644484281539917, |
| "learning_rate": 8e-05, |
| "loss": 1.864, |
| "step": 2751 |
| }, |
| { |
| "epoch": 0.6053007808204113, |
| "grad_norm": 0.2816253900527954, |
| "learning_rate": 8e-05, |
| "loss": 1.6956, |
| "step": 2752 |
| }, |
| { |
| "epoch": 0.6055207302320467, |
| "grad_norm": 0.27785420417785645, |
| "learning_rate": 8e-05, |
| "loss": 1.6618, |
| "step": 2753 |
| }, |
| { |
| "epoch": 0.605740679643682, |
| "grad_norm": 0.2993432283401489, |
| "learning_rate": 8e-05, |
| "loss": 1.6782, |
| "step": 2754 |
| }, |
| { |
| "epoch": 0.6059606290553172, |
| "grad_norm": 0.2837073802947998, |
| "learning_rate": 8e-05, |
| "loss": 1.6096, |
| "step": 2755 |
| }, |
| { |
| "epoch": 0.6061805784669526, |
| "grad_norm": 0.2930501401424408, |
| "learning_rate": 8e-05, |
| "loss": 1.7036, |
| "step": 2756 |
| }, |
| { |
| "epoch": 0.6064005278785879, |
| "grad_norm": 0.2830953299999237, |
| "learning_rate": 8e-05, |
| "loss": 1.7393, |
| "step": 2757 |
| }, |
| { |
| "epoch": 0.6066204772902233, |
| "grad_norm": 0.3069010078907013, |
| "learning_rate": 8e-05, |
| "loss": 1.5636, |
| "step": 2758 |
| }, |
| { |
| "epoch": 0.6068404267018586, |
| "grad_norm": 0.2761766314506531, |
| "learning_rate": 8e-05, |
| "loss": 1.7233, |
| "step": 2759 |
| }, |
| { |
| "epoch": 0.607060376113494, |
| "grad_norm": 0.28254058957099915, |
| "learning_rate": 8e-05, |
| "loss": 1.7132, |
| "step": 2760 |
| }, |
| { |
| "epoch": 0.6072803255251292, |
| "grad_norm": 0.27911651134490967, |
| "learning_rate": 8e-05, |
| "loss": 1.7782, |
| "step": 2761 |
| }, |
| { |
| "epoch": 0.6075002749367645, |
| "grad_norm": 0.2875358462333679, |
| "learning_rate": 8e-05, |
| "loss": 1.6974, |
| "step": 2762 |
| }, |
| { |
| "epoch": 0.6077202243483999, |
| "grad_norm": 0.28940457105636597, |
| "learning_rate": 8e-05, |
| "loss": 1.6274, |
| "step": 2763 |
| }, |
| { |
| "epoch": 0.6079401737600352, |
| "grad_norm": 0.27163782715797424, |
| "learning_rate": 8e-05, |
| "loss": 1.6507, |
| "step": 2764 |
| }, |
| { |
| "epoch": 0.6081601231716706, |
| "grad_norm": 0.2914412021636963, |
| "learning_rate": 8e-05, |
| "loss": 1.6826, |
| "step": 2765 |
| }, |
| { |
| "epoch": 0.6083800725833058, |
| "grad_norm": 0.31414681673049927, |
| "learning_rate": 8e-05, |
| "loss": 1.8466, |
| "step": 2766 |
| }, |
| { |
| "epoch": 0.6086000219949411, |
| "grad_norm": 0.3015105426311493, |
| "learning_rate": 8e-05, |
| "loss": 1.599, |
| "step": 2767 |
| }, |
| { |
| "epoch": 0.6088199714065765, |
| "grad_norm": 0.27743127942085266, |
| "learning_rate": 8e-05, |
| "loss": 1.5278, |
| "step": 2768 |
| }, |
| { |
| "epoch": 0.6090399208182118, |
| "grad_norm": 0.2868049442768097, |
| "learning_rate": 8e-05, |
| "loss": 1.7008, |
| "step": 2769 |
| }, |
| { |
| "epoch": 0.6092598702298472, |
| "grad_norm": 0.2832272946834564, |
| "learning_rate": 8e-05, |
| "loss": 1.6368, |
| "step": 2770 |
| }, |
| { |
| "epoch": 0.6094798196414825, |
| "grad_norm": 0.28054770827293396, |
| "learning_rate": 8e-05, |
| "loss": 1.6475, |
| "step": 2771 |
| }, |
| { |
| "epoch": 0.6096997690531177, |
| "grad_norm": 0.28185421228408813, |
| "learning_rate": 8e-05, |
| "loss": 1.7731, |
| "step": 2772 |
| }, |
| { |
| "epoch": 0.6099197184647531, |
| "grad_norm": 0.2819845676422119, |
| "learning_rate": 8e-05, |
| "loss": 1.6824, |
| "step": 2773 |
| }, |
| { |
| "epoch": 0.6101396678763884, |
| "grad_norm": 0.2764539420604706, |
| "learning_rate": 8e-05, |
| "loss": 1.7001, |
| "step": 2774 |
| }, |
| { |
| "epoch": 0.6103596172880238, |
| "grad_norm": 0.30475977063179016, |
| "learning_rate": 8e-05, |
| "loss": 1.8297, |
| "step": 2775 |
| }, |
| { |
| "epoch": 0.6105795666996591, |
| "grad_norm": 0.2848237454891205, |
| "learning_rate": 8e-05, |
| "loss": 1.7453, |
| "step": 2776 |
| }, |
| { |
| "epoch": 0.6107995161112943, |
| "grad_norm": 0.28268033266067505, |
| "learning_rate": 8e-05, |
| "loss": 1.5241, |
| "step": 2777 |
| }, |
| { |
| "epoch": 0.6110194655229297, |
| "grad_norm": 0.27673062682151794, |
| "learning_rate": 8e-05, |
| "loss": 1.5273, |
| "step": 2778 |
| }, |
| { |
| "epoch": 0.611239414934565, |
| "grad_norm": 0.28202882409095764, |
| "learning_rate": 8e-05, |
| "loss": 1.5769, |
| "step": 2779 |
| }, |
| { |
| "epoch": 0.6114593643462004, |
| "grad_norm": 0.28480303287506104, |
| "learning_rate": 8e-05, |
| "loss": 1.7456, |
| "step": 2780 |
| }, |
| { |
| "epoch": 0.6116793137578357, |
| "grad_norm": 0.3028055727481842, |
| "learning_rate": 8e-05, |
| "loss": 1.6652, |
| "step": 2781 |
| }, |
| { |
| "epoch": 0.6118992631694711, |
| "grad_norm": 0.28677237033843994, |
| "learning_rate": 8e-05, |
| "loss": 1.6877, |
| "step": 2782 |
| }, |
| { |
| "epoch": 0.6121192125811064, |
| "grad_norm": 0.3057413399219513, |
| "learning_rate": 8e-05, |
| "loss": 1.9811, |
| "step": 2783 |
| }, |
| { |
| "epoch": 0.6123391619927416, |
| "grad_norm": 0.2802276313304901, |
| "learning_rate": 8e-05, |
| "loss": 1.5965, |
| "step": 2784 |
| }, |
| { |
| "epoch": 0.612559111404377, |
| "grad_norm": 0.27934229373931885, |
| "learning_rate": 8e-05, |
| "loss": 1.5959, |
| "step": 2785 |
| }, |
| { |
| "epoch": 0.6127790608160123, |
| "grad_norm": 0.2864493429660797, |
| "learning_rate": 8e-05, |
| "loss": 1.6289, |
| "step": 2786 |
| }, |
| { |
| "epoch": 0.6129990102276477, |
| "grad_norm": 0.26668915152549744, |
| "learning_rate": 8e-05, |
| "loss": 1.4584, |
| "step": 2787 |
| }, |
| { |
| "epoch": 0.613218959639283, |
| "grad_norm": 0.28092291951179504, |
| "learning_rate": 8e-05, |
| "loss": 1.6641, |
| "step": 2788 |
| }, |
| { |
| "epoch": 0.6134389090509182, |
| "grad_norm": 0.2933676242828369, |
| "learning_rate": 8e-05, |
| "loss": 1.9453, |
| "step": 2789 |
| }, |
| { |
| "epoch": 0.6136588584625536, |
| "grad_norm": 0.31618431210517883, |
| "learning_rate": 8e-05, |
| "loss": 1.594, |
| "step": 2790 |
| }, |
| { |
| "epoch": 0.6138788078741889, |
| "grad_norm": 0.28090760111808777, |
| "learning_rate": 8e-05, |
| "loss": 1.7531, |
| "step": 2791 |
| }, |
| { |
| "epoch": 0.6140987572858243, |
| "grad_norm": 0.3137405216693878, |
| "learning_rate": 8e-05, |
| "loss": 1.7243, |
| "step": 2792 |
| }, |
| { |
| "epoch": 0.6143187066974596, |
| "grad_norm": 0.2949986755847931, |
| "learning_rate": 8e-05, |
| "loss": 1.7581, |
| "step": 2793 |
| }, |
| { |
| "epoch": 0.6145386561090949, |
| "grad_norm": 0.28396037220954895, |
| "learning_rate": 8e-05, |
| "loss": 1.6995, |
| "step": 2794 |
| }, |
| { |
| "epoch": 0.6147586055207303, |
| "grad_norm": 0.26976051926612854, |
| "learning_rate": 8e-05, |
| "loss": 1.654, |
| "step": 2795 |
| }, |
| { |
| "epoch": 0.6149785549323655, |
| "grad_norm": 0.27323633432388306, |
| "learning_rate": 8e-05, |
| "loss": 1.6944, |
| "step": 2796 |
| }, |
| { |
| "epoch": 0.6151985043440009, |
| "grad_norm": 0.29849350452423096, |
| "learning_rate": 8e-05, |
| "loss": 1.6127, |
| "step": 2797 |
| }, |
| { |
| "epoch": 0.6154184537556362, |
| "grad_norm": 0.28575918078422546, |
| "learning_rate": 8e-05, |
| "loss": 1.7579, |
| "step": 2798 |
| }, |
| { |
| "epoch": 0.6156384031672715, |
| "grad_norm": 0.26723456382751465, |
| "learning_rate": 8e-05, |
| "loss": 1.5461, |
| "step": 2799 |
| }, |
| { |
| "epoch": 0.6158583525789069, |
| "grad_norm": 0.29076528549194336, |
| "learning_rate": 8e-05, |
| "loss": 1.7001, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.6160783019905421, |
| "grad_norm": 0.27913492918014526, |
| "learning_rate": 8e-05, |
| "loss": 1.3878, |
| "step": 2801 |
| }, |
| { |
| "epoch": 0.6162982514021775, |
| "grad_norm": 0.2841816246509552, |
| "learning_rate": 8e-05, |
| "loss": 1.6871, |
| "step": 2802 |
| }, |
| { |
| "epoch": 0.6165182008138128, |
| "grad_norm": 0.26845458149909973, |
| "learning_rate": 8e-05, |
| "loss": 1.5518, |
| "step": 2803 |
| }, |
| { |
| "epoch": 0.6167381502254482, |
| "grad_norm": 0.30308809876441956, |
| "learning_rate": 8e-05, |
| "loss": 1.7735, |
| "step": 2804 |
| }, |
| { |
| "epoch": 0.6169580996370835, |
| "grad_norm": 0.2812938690185547, |
| "learning_rate": 8e-05, |
| "loss": 1.7043, |
| "step": 2805 |
| }, |
| { |
| "epoch": 0.6171780490487188, |
| "grad_norm": 0.27101054787635803, |
| "learning_rate": 8e-05, |
| "loss": 1.6156, |
| "step": 2806 |
| }, |
| { |
| "epoch": 0.6173979984603541, |
| "grad_norm": 0.2900649607181549, |
| "learning_rate": 8e-05, |
| "loss": 1.7119, |
| "step": 2807 |
| }, |
| { |
| "epoch": 0.6176179478719894, |
| "grad_norm": 0.3011523187160492, |
| "learning_rate": 8e-05, |
| "loss": 1.7085, |
| "step": 2808 |
| }, |
| { |
| "epoch": 0.6178378972836248, |
| "grad_norm": 0.2845047116279602, |
| "learning_rate": 8e-05, |
| "loss": 1.6691, |
| "step": 2809 |
| }, |
| { |
| "epoch": 0.6180578466952601, |
| "grad_norm": 0.31060662865638733, |
| "learning_rate": 8e-05, |
| "loss": 1.831, |
| "step": 2810 |
| }, |
| { |
| "epoch": 0.6182777961068954, |
| "grad_norm": 0.27987706661224365, |
| "learning_rate": 8e-05, |
| "loss": 1.4881, |
| "step": 2811 |
| }, |
| { |
| "epoch": 0.6184977455185308, |
| "grad_norm": 0.3197080194950104, |
| "learning_rate": 8e-05, |
| "loss": 1.7112, |
| "step": 2812 |
| }, |
| { |
| "epoch": 0.618717694930166, |
| "grad_norm": 0.31402066349983215, |
| "learning_rate": 8e-05, |
| "loss": 1.6535, |
| "step": 2813 |
| }, |
| { |
| "epoch": 0.6189376443418014, |
| "grad_norm": 0.303529292345047, |
| "learning_rate": 8e-05, |
| "loss": 1.6563, |
| "step": 2814 |
| }, |
| { |
| "epoch": 0.6191575937534367, |
| "grad_norm": 0.26674556732177734, |
| "learning_rate": 8e-05, |
| "loss": 1.5202, |
| "step": 2815 |
| }, |
| { |
| "epoch": 0.619377543165072, |
| "grad_norm": 0.30466997623443604, |
| "learning_rate": 8e-05, |
| "loss": 1.6014, |
| "step": 2816 |
| }, |
| { |
| "epoch": 0.6195974925767074, |
| "grad_norm": 0.2991195619106293, |
| "learning_rate": 8e-05, |
| "loss": 1.765, |
| "step": 2817 |
| }, |
| { |
| "epoch": 0.6198174419883427, |
| "grad_norm": 0.30000337958335876, |
| "learning_rate": 8e-05, |
| "loss": 1.7794, |
| "step": 2818 |
| }, |
| { |
| "epoch": 0.620037391399978, |
| "grad_norm": 0.29237842559814453, |
| "learning_rate": 8e-05, |
| "loss": 1.8753, |
| "step": 2819 |
| }, |
| { |
| "epoch": 0.6202573408116133, |
| "grad_norm": 0.2896344065666199, |
| "learning_rate": 8e-05, |
| "loss": 1.6137, |
| "step": 2820 |
| }, |
| { |
| "epoch": 0.6204772902232486, |
| "grad_norm": 0.34269601106643677, |
| "learning_rate": 8e-05, |
| "loss": 1.7693, |
| "step": 2821 |
| }, |
| { |
| "epoch": 0.620697239634884, |
| "grad_norm": 0.30044153332710266, |
| "learning_rate": 8e-05, |
| "loss": 1.7286, |
| "step": 2822 |
| }, |
| { |
| "epoch": 0.6209171890465193, |
| "grad_norm": 0.2616185247898102, |
| "learning_rate": 8e-05, |
| "loss": 1.5373, |
| "step": 2823 |
| }, |
| { |
| "epoch": 0.6211371384581547, |
| "grad_norm": 0.3217238485813141, |
| "learning_rate": 8e-05, |
| "loss": 1.7681, |
| "step": 2824 |
| }, |
| { |
| "epoch": 0.6213570878697899, |
| "grad_norm": 0.284446120262146, |
| "learning_rate": 8e-05, |
| "loss": 1.5597, |
| "step": 2825 |
| }, |
| { |
| "epoch": 0.6215770372814253, |
| "grad_norm": 0.28698036074638367, |
| "learning_rate": 8e-05, |
| "loss": 1.6844, |
| "step": 2826 |
| }, |
| { |
| "epoch": 0.6217969866930606, |
| "grad_norm": 0.2828524708747864, |
| "learning_rate": 8e-05, |
| "loss": 1.7142, |
| "step": 2827 |
| }, |
| { |
| "epoch": 0.6220169361046959, |
| "grad_norm": 0.3004125952720642, |
| "learning_rate": 8e-05, |
| "loss": 1.6812, |
| "step": 2828 |
| }, |
| { |
| "epoch": 0.6222368855163313, |
| "grad_norm": 0.30438825488090515, |
| "learning_rate": 8e-05, |
| "loss": 1.6696, |
| "step": 2829 |
| }, |
| { |
| "epoch": 0.6224568349279666, |
| "grad_norm": 0.2654431164264679, |
| "learning_rate": 8e-05, |
| "loss": 1.5656, |
| "step": 2830 |
| }, |
| { |
| "epoch": 0.6226767843396019, |
| "grad_norm": 0.28561410307884216, |
| "learning_rate": 8e-05, |
| "loss": 1.5924, |
| "step": 2831 |
| }, |
| { |
| "epoch": 0.6228967337512372, |
| "grad_norm": 0.29075953364372253, |
| "learning_rate": 8e-05, |
| "loss": 1.6026, |
| "step": 2832 |
| }, |
| { |
| "epoch": 0.6231166831628725, |
| "grad_norm": 0.3002355098724365, |
| "learning_rate": 8e-05, |
| "loss": 1.7783, |
| "step": 2833 |
| }, |
| { |
| "epoch": 0.6233366325745079, |
| "grad_norm": 0.2757151424884796, |
| "learning_rate": 8e-05, |
| "loss": 1.6404, |
| "step": 2834 |
| }, |
| { |
| "epoch": 0.6235565819861432, |
| "grad_norm": 0.28108781576156616, |
| "learning_rate": 8e-05, |
| "loss": 1.6727, |
| "step": 2835 |
| }, |
| { |
| "epoch": 0.6237765313977786, |
| "grad_norm": 0.31818297505378723, |
| "learning_rate": 8e-05, |
| "loss": 1.8859, |
| "step": 2836 |
| }, |
| { |
| "epoch": 0.6239964808094138, |
| "grad_norm": 0.30283015966415405, |
| "learning_rate": 8e-05, |
| "loss": 1.6967, |
| "step": 2837 |
| }, |
| { |
| "epoch": 0.6242164302210491, |
| "grad_norm": 0.28991082310676575, |
| "learning_rate": 8e-05, |
| "loss": 1.7564, |
| "step": 2838 |
| }, |
| { |
| "epoch": 0.6244363796326845, |
| "grad_norm": 0.27985113859176636, |
| "learning_rate": 8e-05, |
| "loss": 1.645, |
| "step": 2839 |
| }, |
| { |
| "epoch": 0.6246563290443198, |
| "grad_norm": 0.28599318861961365, |
| "learning_rate": 8e-05, |
| "loss": 1.7069, |
| "step": 2840 |
| }, |
| { |
| "epoch": 0.6248762784559552, |
| "grad_norm": 0.291962593793869, |
| "learning_rate": 8e-05, |
| "loss": 1.7303, |
| "step": 2841 |
| }, |
| { |
| "epoch": 0.6250962278675904, |
| "grad_norm": 0.2977605164051056, |
| "learning_rate": 8e-05, |
| "loss": 1.6251, |
| "step": 2842 |
| }, |
| { |
| "epoch": 0.6253161772792257, |
| "grad_norm": 0.280979186296463, |
| "learning_rate": 8e-05, |
| "loss": 1.6132, |
| "step": 2843 |
| }, |
| { |
| "epoch": 0.6255361266908611, |
| "grad_norm": 0.30565154552459717, |
| "learning_rate": 8e-05, |
| "loss": 1.8351, |
| "step": 2844 |
| }, |
| { |
| "epoch": 0.6257560761024964, |
| "grad_norm": 0.2870398759841919, |
| "learning_rate": 8e-05, |
| "loss": 1.7169, |
| "step": 2845 |
| }, |
| { |
| "epoch": 0.6259760255141318, |
| "grad_norm": 0.2740568518638611, |
| "learning_rate": 8e-05, |
| "loss": 1.6666, |
| "step": 2846 |
| }, |
| { |
| "epoch": 0.6261959749257671, |
| "grad_norm": 0.27255693078041077, |
| "learning_rate": 8e-05, |
| "loss": 1.6112, |
| "step": 2847 |
| }, |
| { |
| "epoch": 0.6264159243374025, |
| "grad_norm": 0.2785317003726959, |
| "learning_rate": 8e-05, |
| "loss": 1.6532, |
| "step": 2848 |
| }, |
| { |
| "epoch": 0.6266358737490377, |
| "grad_norm": 0.2979902923107147, |
| "learning_rate": 8e-05, |
| "loss": 1.7981, |
| "step": 2849 |
| }, |
| { |
| "epoch": 0.626855823160673, |
| "grad_norm": 0.29625701904296875, |
| "learning_rate": 8e-05, |
| "loss": 1.7212, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.6270757725723084, |
| "grad_norm": 0.2768239676952362, |
| "learning_rate": 8e-05, |
| "loss": 1.6358, |
| "step": 2851 |
| }, |
| { |
| "epoch": 0.6272957219839437, |
| "grad_norm": 0.2931036055088043, |
| "learning_rate": 8e-05, |
| "loss": 1.7728, |
| "step": 2852 |
| }, |
| { |
| "epoch": 0.6275156713955791, |
| "grad_norm": 0.2883271872997284, |
| "learning_rate": 8e-05, |
| "loss": 1.6403, |
| "step": 2853 |
| }, |
| { |
| "epoch": 0.6277356208072143, |
| "grad_norm": 0.31137219071388245, |
| "learning_rate": 8e-05, |
| "loss": 1.6454, |
| "step": 2854 |
| }, |
| { |
| "epoch": 0.6279555702188496, |
| "grad_norm": 0.3026840388774872, |
| "learning_rate": 8e-05, |
| "loss": 1.6534, |
| "step": 2855 |
| }, |
| { |
| "epoch": 0.628175519630485, |
| "grad_norm": 0.2950657606124878, |
| "learning_rate": 8e-05, |
| "loss": 1.7298, |
| "step": 2856 |
| }, |
| { |
| "epoch": 0.6283954690421203, |
| "grad_norm": 0.29347553849220276, |
| "learning_rate": 8e-05, |
| "loss": 1.58, |
| "step": 2857 |
| }, |
| { |
| "epoch": 0.6286154184537557, |
| "grad_norm": 0.28075262904167175, |
| "learning_rate": 8e-05, |
| "loss": 1.6262, |
| "step": 2858 |
| }, |
| { |
| "epoch": 0.628835367865391, |
| "grad_norm": 0.26556506752967834, |
| "learning_rate": 8e-05, |
| "loss": 1.5666, |
| "step": 2859 |
| }, |
| { |
| "epoch": 0.6290553172770262, |
| "grad_norm": 0.28918468952178955, |
| "learning_rate": 8e-05, |
| "loss": 1.6918, |
| "step": 2860 |
| }, |
| { |
| "epoch": 0.6292752666886616, |
| "grad_norm": 0.2816839814186096, |
| "learning_rate": 8e-05, |
| "loss": 1.6387, |
| "step": 2861 |
| }, |
| { |
| "epoch": 0.6294952161002969, |
| "grad_norm": 0.2819633185863495, |
| "learning_rate": 8e-05, |
| "loss": 1.6945, |
| "step": 2862 |
| }, |
| { |
| "epoch": 0.6297151655119323, |
| "grad_norm": 0.2847195863723755, |
| "learning_rate": 8e-05, |
| "loss": 1.744, |
| "step": 2863 |
| }, |
| { |
| "epoch": 0.6299351149235676, |
| "grad_norm": 0.2706061899662018, |
| "learning_rate": 8e-05, |
| "loss": 1.6246, |
| "step": 2864 |
| }, |
| { |
| "epoch": 0.6301550643352029, |
| "grad_norm": 0.281125545501709, |
| "learning_rate": 8e-05, |
| "loss": 1.49, |
| "step": 2865 |
| }, |
| { |
| "epoch": 0.6303750137468382, |
| "grad_norm": 0.2861780822277069, |
| "learning_rate": 8e-05, |
| "loss": 1.7375, |
| "step": 2866 |
| }, |
| { |
| "epoch": 0.6305949631584735, |
| "grad_norm": 0.2654918134212494, |
| "learning_rate": 8e-05, |
| "loss": 1.5997, |
| "step": 2867 |
| }, |
| { |
| "epoch": 0.6308149125701089, |
| "grad_norm": 0.29169219732284546, |
| "learning_rate": 8e-05, |
| "loss": 1.8921, |
| "step": 2868 |
| }, |
| { |
| "epoch": 0.6310348619817442, |
| "grad_norm": 0.2858426570892334, |
| "learning_rate": 8e-05, |
| "loss": 1.8006, |
| "step": 2869 |
| }, |
| { |
| "epoch": 0.6312548113933796, |
| "grad_norm": 0.2712969183921814, |
| "learning_rate": 8e-05, |
| "loss": 1.5761, |
| "step": 2870 |
| }, |
| { |
| "epoch": 0.6314747608050149, |
| "grad_norm": 0.28961536288261414, |
| "learning_rate": 8e-05, |
| "loss": 1.618, |
| "step": 2871 |
| }, |
| { |
| "epoch": 0.6316947102166501, |
| "grad_norm": 0.2879860997200012, |
| "learning_rate": 8e-05, |
| "loss": 1.6116, |
| "step": 2872 |
| }, |
| { |
| "epoch": 0.6319146596282855, |
| "grad_norm": 0.3009500801563263, |
| "learning_rate": 8e-05, |
| "loss": 1.8099, |
| "step": 2873 |
| }, |
| { |
| "epoch": 0.6321346090399208, |
| "grad_norm": 0.3012961149215698, |
| "learning_rate": 8e-05, |
| "loss": 1.8123, |
| "step": 2874 |
| }, |
| { |
| "epoch": 0.6323545584515562, |
| "grad_norm": 0.27382341027259827, |
| "learning_rate": 8e-05, |
| "loss": 1.5486, |
| "step": 2875 |
| }, |
| { |
| "epoch": 0.6325745078631915, |
| "grad_norm": 0.45538756251335144, |
| "learning_rate": 8e-05, |
| "loss": 1.7382, |
| "step": 2876 |
| }, |
| { |
| "epoch": 0.6327944572748267, |
| "grad_norm": 0.27454543113708496, |
| "learning_rate": 8e-05, |
| "loss": 1.6488, |
| "step": 2877 |
| }, |
| { |
| "epoch": 0.6330144066864621, |
| "grad_norm": 0.28111204504966736, |
| "learning_rate": 8e-05, |
| "loss": 1.7262, |
| "step": 2878 |
| }, |
| { |
| "epoch": 0.6332343560980974, |
| "grad_norm": 0.2855817675590515, |
| "learning_rate": 8e-05, |
| "loss": 1.6841, |
| "step": 2879 |
| }, |
| { |
| "epoch": 0.6334543055097328, |
| "grad_norm": 0.3017145097255707, |
| "learning_rate": 8e-05, |
| "loss": 1.759, |
| "step": 2880 |
| }, |
| { |
| "epoch": 0.6336742549213681, |
| "grad_norm": 0.27578651905059814, |
| "learning_rate": 8e-05, |
| "loss": 1.5341, |
| "step": 2881 |
| }, |
| { |
| "epoch": 0.6338942043330034, |
| "grad_norm": 0.28522011637687683, |
| "learning_rate": 8e-05, |
| "loss": 1.5042, |
| "step": 2882 |
| }, |
| { |
| "epoch": 0.6341141537446388, |
| "grad_norm": 0.28013676404953003, |
| "learning_rate": 8e-05, |
| "loss": 1.591, |
| "step": 2883 |
| }, |
| { |
| "epoch": 0.634334103156274, |
| "grad_norm": 0.30440640449523926, |
| "learning_rate": 8e-05, |
| "loss": 1.6698, |
| "step": 2884 |
| }, |
| { |
| "epoch": 0.6345540525679094, |
| "grad_norm": 0.28555527329444885, |
| "learning_rate": 8e-05, |
| "loss": 1.6199, |
| "step": 2885 |
| }, |
| { |
| "epoch": 0.6347740019795447, |
| "grad_norm": 0.31451916694641113, |
| "learning_rate": 8e-05, |
| "loss": 1.6717, |
| "step": 2886 |
| }, |
| { |
| "epoch": 0.63499395139118, |
| "grad_norm": 0.3116842806339264, |
| "learning_rate": 8e-05, |
| "loss": 1.7043, |
| "step": 2887 |
| }, |
| { |
| "epoch": 0.6352139008028154, |
| "grad_norm": 0.30441299080848694, |
| "learning_rate": 8e-05, |
| "loss": 1.6953, |
| "step": 2888 |
| }, |
| { |
| "epoch": 0.6354338502144506, |
| "grad_norm": 0.2890806496143341, |
| "learning_rate": 8e-05, |
| "loss": 1.7363, |
| "step": 2889 |
| }, |
| { |
| "epoch": 0.635653799626086, |
| "grad_norm": 0.2715187072753906, |
| "learning_rate": 8e-05, |
| "loss": 1.5412, |
| "step": 2890 |
| }, |
| { |
| "epoch": 0.6358737490377213, |
| "grad_norm": 0.32213905453681946, |
| "learning_rate": 8e-05, |
| "loss": 1.7735, |
| "step": 2891 |
| }, |
| { |
| "epoch": 0.6360936984493566, |
| "grad_norm": 0.28850191831588745, |
| "learning_rate": 8e-05, |
| "loss": 1.5156, |
| "step": 2892 |
| }, |
| { |
| "epoch": 0.636313647860992, |
| "grad_norm": 0.2934744358062744, |
| "learning_rate": 8e-05, |
| "loss": 1.6822, |
| "step": 2893 |
| }, |
| { |
| "epoch": 0.6365335972726273, |
| "grad_norm": 0.29068851470947266, |
| "learning_rate": 8e-05, |
| "loss": 1.7723, |
| "step": 2894 |
| }, |
| { |
| "epoch": 0.6367535466842627, |
| "grad_norm": 0.28490251302719116, |
| "learning_rate": 8e-05, |
| "loss": 1.7666, |
| "step": 2895 |
| }, |
| { |
| "epoch": 0.6369734960958979, |
| "grad_norm": 0.28677448630332947, |
| "learning_rate": 8e-05, |
| "loss": 1.6792, |
| "step": 2896 |
| }, |
| { |
| "epoch": 0.6371934455075333, |
| "grad_norm": 0.29424387216567993, |
| "learning_rate": 8e-05, |
| "loss": 1.6224, |
| "step": 2897 |
| }, |
| { |
| "epoch": 0.6374133949191686, |
| "grad_norm": 0.2872456908226013, |
| "learning_rate": 8e-05, |
| "loss": 1.6677, |
| "step": 2898 |
| }, |
| { |
| "epoch": 0.6376333443308039, |
| "grad_norm": 0.26886799931526184, |
| "learning_rate": 8e-05, |
| "loss": 1.5086, |
| "step": 2899 |
| }, |
| { |
| "epoch": 0.6378532937424393, |
| "grad_norm": 0.2737233638763428, |
| "learning_rate": 8e-05, |
| "loss": 1.6832, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.6380732431540745, |
| "grad_norm": 0.2912994623184204, |
| "learning_rate": 8e-05, |
| "loss": 1.5945, |
| "step": 2901 |
| }, |
| { |
| "epoch": 0.6382931925657099, |
| "grad_norm": 0.2800372540950775, |
| "learning_rate": 8e-05, |
| "loss": 1.5661, |
| "step": 2902 |
| }, |
| { |
| "epoch": 0.6385131419773452, |
| "grad_norm": 0.3248150050640106, |
| "learning_rate": 8e-05, |
| "loss": 1.6098, |
| "step": 2903 |
| }, |
| { |
| "epoch": 0.6387330913889805, |
| "grad_norm": 0.27953609824180603, |
| "learning_rate": 8e-05, |
| "loss": 1.7255, |
| "step": 2904 |
| }, |
| { |
| "epoch": 0.6389530408006159, |
| "grad_norm": 0.276395708322525, |
| "learning_rate": 8e-05, |
| "loss": 1.6782, |
| "step": 2905 |
| }, |
| { |
| "epoch": 0.6391729902122512, |
| "grad_norm": 0.2754693031311035, |
| "learning_rate": 8e-05, |
| "loss": 1.5135, |
| "step": 2906 |
| }, |
| { |
| "epoch": 0.6393929396238865, |
| "grad_norm": 0.2756873369216919, |
| "learning_rate": 8e-05, |
| "loss": 1.6208, |
| "step": 2907 |
| }, |
| { |
| "epoch": 0.6396128890355218, |
| "grad_norm": 0.3032161593437195, |
| "learning_rate": 8e-05, |
| "loss": 1.7675, |
| "step": 2908 |
| }, |
| { |
| "epoch": 0.6398328384471571, |
| "grad_norm": 0.2915925085544586, |
| "learning_rate": 8e-05, |
| "loss": 1.6899, |
| "step": 2909 |
| }, |
| { |
| "epoch": 0.6400527878587925, |
| "grad_norm": 0.28415006399154663, |
| "learning_rate": 8e-05, |
| "loss": 1.7218, |
| "step": 2910 |
| }, |
| { |
| "epoch": 0.6402727372704278, |
| "grad_norm": 0.3023785352706909, |
| "learning_rate": 8e-05, |
| "loss": 1.7959, |
| "step": 2911 |
| }, |
| { |
| "epoch": 0.6404926866820632, |
| "grad_norm": 0.2656283974647522, |
| "learning_rate": 8e-05, |
| "loss": 1.6287, |
| "step": 2912 |
| }, |
| { |
| "epoch": 0.6407126360936984, |
| "grad_norm": 0.2835081219673157, |
| "learning_rate": 8e-05, |
| "loss": 1.6597, |
| "step": 2913 |
| }, |
| { |
| "epoch": 0.6409325855053337, |
| "grad_norm": 0.2756771743297577, |
| "learning_rate": 8e-05, |
| "loss": 1.625, |
| "step": 2914 |
| }, |
| { |
| "epoch": 0.6411525349169691, |
| "grad_norm": 0.283149778842926, |
| "learning_rate": 8e-05, |
| "loss": 1.662, |
| "step": 2915 |
| }, |
| { |
| "epoch": 0.6413724843286044, |
| "grad_norm": 0.28902921080589294, |
| "learning_rate": 8e-05, |
| "loss": 1.4862, |
| "step": 2916 |
| }, |
| { |
| "epoch": 0.6415924337402398, |
| "grad_norm": 0.28932076692581177, |
| "learning_rate": 8e-05, |
| "loss": 1.6109, |
| "step": 2917 |
| }, |
| { |
| "epoch": 0.641812383151875, |
| "grad_norm": 0.30964934825897217, |
| "learning_rate": 8e-05, |
| "loss": 1.7021, |
| "step": 2918 |
| }, |
| { |
| "epoch": 0.6420323325635104, |
| "grad_norm": 0.28258854150772095, |
| "learning_rate": 8e-05, |
| "loss": 1.6536, |
| "step": 2919 |
| }, |
| { |
| "epoch": 0.6422522819751457, |
| "grad_norm": 0.2939313054084778, |
| "learning_rate": 8e-05, |
| "loss": 1.6299, |
| "step": 2920 |
| }, |
| { |
| "epoch": 0.642472231386781, |
| "grad_norm": 0.31722190976142883, |
| "learning_rate": 8e-05, |
| "loss": 1.6789, |
| "step": 2921 |
| }, |
| { |
| "epoch": 0.6426921807984164, |
| "grad_norm": 0.29024428129196167, |
| "learning_rate": 8e-05, |
| "loss": 1.7429, |
| "step": 2922 |
| }, |
| { |
| "epoch": 0.6429121302100517, |
| "grad_norm": 0.2716485559940338, |
| "learning_rate": 8e-05, |
| "loss": 1.619, |
| "step": 2923 |
| }, |
| { |
| "epoch": 0.6431320796216871, |
| "grad_norm": 0.2986311614513397, |
| "learning_rate": 8e-05, |
| "loss": 1.7574, |
| "step": 2924 |
| }, |
| { |
| "epoch": 0.6433520290333223, |
| "grad_norm": 0.29542550444602966, |
| "learning_rate": 8e-05, |
| "loss": 1.7322, |
| "step": 2925 |
| }, |
| { |
| "epoch": 0.6435719784449576, |
| "grad_norm": 0.27215078473091125, |
| "learning_rate": 8e-05, |
| "loss": 1.5544, |
| "step": 2926 |
| }, |
| { |
| "epoch": 0.643791927856593, |
| "grad_norm": 0.29105404019355774, |
| "learning_rate": 8e-05, |
| "loss": 1.7499, |
| "step": 2927 |
| }, |
| { |
| "epoch": 0.6440118772682283, |
| "grad_norm": 0.2990782558917999, |
| "learning_rate": 8e-05, |
| "loss": 1.7987, |
| "step": 2928 |
| }, |
| { |
| "epoch": 0.6442318266798637, |
| "grad_norm": 0.27296003699302673, |
| "learning_rate": 8e-05, |
| "loss": 1.6271, |
| "step": 2929 |
| }, |
| { |
| "epoch": 0.644451776091499, |
| "grad_norm": 0.27194517850875854, |
| "learning_rate": 8e-05, |
| "loss": 1.5178, |
| "step": 2930 |
| }, |
| { |
| "epoch": 0.6446717255031342, |
| "grad_norm": 0.2720150053501129, |
| "learning_rate": 8e-05, |
| "loss": 1.5665, |
| "step": 2931 |
| }, |
| { |
| "epoch": 0.6448916749147696, |
| "grad_norm": 0.2825513780117035, |
| "learning_rate": 8e-05, |
| "loss": 1.6276, |
| "step": 2932 |
| }, |
| { |
| "epoch": 0.6451116243264049, |
| "grad_norm": 0.2869420349597931, |
| "learning_rate": 8e-05, |
| "loss": 1.5306, |
| "step": 2933 |
| }, |
| { |
| "epoch": 0.6453315737380403, |
| "grad_norm": 0.2829979360103607, |
| "learning_rate": 8e-05, |
| "loss": 1.7132, |
| "step": 2934 |
| }, |
| { |
| "epoch": 0.6455515231496756, |
| "grad_norm": 0.28047260642051697, |
| "learning_rate": 8e-05, |
| "loss": 1.5932, |
| "step": 2935 |
| }, |
| { |
| "epoch": 0.6457714725613108, |
| "grad_norm": 0.287765234708786, |
| "learning_rate": 8e-05, |
| "loss": 1.6029, |
| "step": 2936 |
| }, |
| { |
| "epoch": 0.6459914219729462, |
| "grad_norm": 0.2858487665653229, |
| "learning_rate": 8e-05, |
| "loss": 1.5959, |
| "step": 2937 |
| }, |
| { |
| "epoch": 0.6462113713845815, |
| "grad_norm": 0.29041311144828796, |
| "learning_rate": 8e-05, |
| "loss": 1.6348, |
| "step": 2938 |
| }, |
| { |
| "epoch": 0.6464313207962169, |
| "grad_norm": 0.2873425781726837, |
| "learning_rate": 8e-05, |
| "loss": 1.6514, |
| "step": 2939 |
| }, |
| { |
| "epoch": 0.6466512702078522, |
| "grad_norm": 0.2767978310585022, |
| "learning_rate": 8e-05, |
| "loss": 1.5471, |
| "step": 2940 |
| }, |
| { |
| "epoch": 0.6468712196194876, |
| "grad_norm": 0.27061501145362854, |
| "learning_rate": 8e-05, |
| "loss": 1.5439, |
| "step": 2941 |
| }, |
| { |
| "epoch": 0.6470911690311228, |
| "grad_norm": 0.2724677324295044, |
| "learning_rate": 8e-05, |
| "loss": 1.6808, |
| "step": 2942 |
| }, |
| { |
| "epoch": 0.6473111184427581, |
| "grad_norm": 0.2804121971130371, |
| "learning_rate": 8e-05, |
| "loss": 1.7923, |
| "step": 2943 |
| }, |
| { |
| "epoch": 0.6475310678543935, |
| "grad_norm": 0.2881599962711334, |
| "learning_rate": 8e-05, |
| "loss": 1.745, |
| "step": 2944 |
| }, |
| { |
| "epoch": 0.6477510172660288, |
| "grad_norm": 0.3064921200275421, |
| "learning_rate": 8e-05, |
| "loss": 1.7387, |
| "step": 2945 |
| }, |
| { |
| "epoch": 0.6479709666776642, |
| "grad_norm": 0.2994825541973114, |
| "learning_rate": 8e-05, |
| "loss": 1.7143, |
| "step": 2946 |
| }, |
| { |
| "epoch": 0.6481909160892995, |
| "grad_norm": 0.29315468668937683, |
| "learning_rate": 8e-05, |
| "loss": 1.7712, |
| "step": 2947 |
| }, |
| { |
| "epoch": 0.6484108655009347, |
| "grad_norm": 0.2923111915588379, |
| "learning_rate": 8e-05, |
| "loss": 1.6861, |
| "step": 2948 |
| }, |
| { |
| "epoch": 0.6486308149125701, |
| "grad_norm": 0.2861957848072052, |
| "learning_rate": 8e-05, |
| "loss": 1.5951, |
| "step": 2949 |
| }, |
| { |
| "epoch": 0.6488507643242054, |
| "grad_norm": 0.2978787422180176, |
| "learning_rate": 8e-05, |
| "loss": 1.6617, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.6490707137358408, |
| "grad_norm": 0.28596314787864685, |
| "learning_rate": 8e-05, |
| "loss": 1.7623, |
| "step": 2951 |
| }, |
| { |
| "epoch": 0.6492906631474761, |
| "grad_norm": 0.29754844307899475, |
| "learning_rate": 8e-05, |
| "loss": 1.6497, |
| "step": 2952 |
| }, |
| { |
| "epoch": 0.6495106125591114, |
| "grad_norm": 0.2931132912635803, |
| "learning_rate": 8e-05, |
| "loss": 1.5972, |
| "step": 2953 |
| }, |
| { |
| "epoch": 0.6497305619707467, |
| "grad_norm": 0.2667228877544403, |
| "learning_rate": 8e-05, |
| "loss": 1.5598, |
| "step": 2954 |
| }, |
| { |
| "epoch": 0.649950511382382, |
| "grad_norm": 0.2866271436214447, |
| "learning_rate": 8e-05, |
| "loss": 1.5908, |
| "step": 2955 |
| }, |
| { |
| "epoch": 0.6501704607940174, |
| "grad_norm": 0.28429698944091797, |
| "learning_rate": 8e-05, |
| "loss": 1.6582, |
| "step": 2956 |
| }, |
| { |
| "epoch": 0.6503904102056527, |
| "grad_norm": 0.27636975049972534, |
| "learning_rate": 8e-05, |
| "loss": 1.6778, |
| "step": 2957 |
| }, |
| { |
| "epoch": 0.650610359617288, |
| "grad_norm": 0.3005516529083252, |
| "learning_rate": 8e-05, |
| "loss": 1.7191, |
| "step": 2958 |
| }, |
| { |
| "epoch": 0.6508303090289234, |
| "grad_norm": 0.28478094935417175, |
| "learning_rate": 8e-05, |
| "loss": 1.5651, |
| "step": 2959 |
| }, |
| { |
| "epoch": 0.6510502584405586, |
| "grad_norm": 0.2879832088947296, |
| "learning_rate": 8e-05, |
| "loss": 1.7055, |
| "step": 2960 |
| }, |
| { |
| "epoch": 0.651270207852194, |
| "grad_norm": 0.2899249196052551, |
| "learning_rate": 8e-05, |
| "loss": 1.6682, |
| "step": 2961 |
| }, |
| { |
| "epoch": 0.6514901572638293, |
| "grad_norm": 0.26806798577308655, |
| "learning_rate": 8e-05, |
| "loss": 1.6962, |
| "step": 2962 |
| }, |
| { |
| "epoch": 0.6517101066754647, |
| "grad_norm": 0.2929481565952301, |
| "learning_rate": 8e-05, |
| "loss": 1.6388, |
| "step": 2963 |
| }, |
| { |
| "epoch": 0.6519300560871, |
| "grad_norm": 0.2920469641685486, |
| "learning_rate": 8e-05, |
| "loss": 1.8584, |
| "step": 2964 |
| }, |
| { |
| "epoch": 0.6521500054987353, |
| "grad_norm": 0.285696417093277, |
| "learning_rate": 8e-05, |
| "loss": 1.6491, |
| "step": 2965 |
| }, |
| { |
| "epoch": 0.6523699549103706, |
| "grad_norm": 0.2991807162761688, |
| "learning_rate": 8e-05, |
| "loss": 1.6565, |
| "step": 2966 |
| }, |
| { |
| "epoch": 0.6525899043220059, |
| "grad_norm": 0.27987217903137207, |
| "learning_rate": 8e-05, |
| "loss": 1.6274, |
| "step": 2967 |
| }, |
| { |
| "epoch": 0.6528098537336413, |
| "grad_norm": 0.2810576856136322, |
| "learning_rate": 8e-05, |
| "loss": 1.5294, |
| "step": 2968 |
| }, |
| { |
| "epoch": 0.6530298031452766, |
| "grad_norm": 0.2755715847015381, |
| "learning_rate": 8e-05, |
| "loss": 1.6319, |
| "step": 2969 |
| }, |
| { |
| "epoch": 0.6532497525569119, |
| "grad_norm": 0.3041331171989441, |
| "learning_rate": 8e-05, |
| "loss": 1.5308, |
| "step": 2970 |
| }, |
| { |
| "epoch": 0.6534697019685473, |
| "grad_norm": 0.2858032286167145, |
| "learning_rate": 8e-05, |
| "loss": 1.6557, |
| "step": 2971 |
| }, |
| { |
| "epoch": 0.6536896513801825, |
| "grad_norm": 0.3001968562602997, |
| "learning_rate": 8e-05, |
| "loss": 1.6841, |
| "step": 2972 |
| }, |
| { |
| "epoch": 0.6539096007918179, |
| "grad_norm": 0.29567384719848633, |
| "learning_rate": 8e-05, |
| "loss": 1.6329, |
| "step": 2973 |
| }, |
| { |
| "epoch": 0.6541295502034532, |
| "grad_norm": 0.29874905943870544, |
| "learning_rate": 8e-05, |
| "loss": 1.7178, |
| "step": 2974 |
| }, |
| { |
| "epoch": 0.6543494996150885, |
| "grad_norm": 0.34721627831459045, |
| "learning_rate": 8e-05, |
| "loss": 1.8192, |
| "step": 2975 |
| }, |
| { |
| "epoch": 0.6545694490267239, |
| "grad_norm": 0.2965874671936035, |
| "learning_rate": 8e-05, |
| "loss": 1.8156, |
| "step": 2976 |
| }, |
| { |
| "epoch": 0.6547893984383591, |
| "grad_norm": 0.2710880637168884, |
| "learning_rate": 8e-05, |
| "loss": 1.5101, |
| "step": 2977 |
| }, |
| { |
| "epoch": 0.6550093478499945, |
| "grad_norm": 0.2852049171924591, |
| "learning_rate": 8e-05, |
| "loss": 1.5835, |
| "step": 2978 |
| }, |
| { |
| "epoch": 0.6552292972616298, |
| "grad_norm": 0.2898092567920685, |
| "learning_rate": 8e-05, |
| "loss": 1.599, |
| "step": 2979 |
| }, |
| { |
| "epoch": 0.6554492466732651, |
| "grad_norm": 0.2880117893218994, |
| "learning_rate": 8e-05, |
| "loss": 1.5904, |
| "step": 2980 |
| }, |
| { |
| "epoch": 0.6556691960849005, |
| "grad_norm": 0.2850951850414276, |
| "learning_rate": 8e-05, |
| "loss": 1.5551, |
| "step": 2981 |
| }, |
| { |
| "epoch": 0.6558891454965358, |
| "grad_norm": 0.292883038520813, |
| "learning_rate": 8e-05, |
| "loss": 1.5587, |
| "step": 2982 |
| }, |
| { |
| "epoch": 0.6561090949081712, |
| "grad_norm": 0.3050660490989685, |
| "learning_rate": 8e-05, |
| "loss": 1.7904, |
| "step": 2983 |
| }, |
| { |
| "epoch": 0.6563290443198064, |
| "grad_norm": 0.31059566140174866, |
| "learning_rate": 8e-05, |
| "loss": 1.7716, |
| "step": 2984 |
| }, |
| { |
| "epoch": 0.6565489937314418, |
| "grad_norm": 0.33118265867233276, |
| "learning_rate": 8e-05, |
| "loss": 1.7878, |
| "step": 2985 |
| }, |
| { |
| "epoch": 0.6567689431430771, |
| "grad_norm": 0.27835318446159363, |
| "learning_rate": 8e-05, |
| "loss": 1.7069, |
| "step": 2986 |
| }, |
| { |
| "epoch": 0.6569888925547124, |
| "grad_norm": 0.33706921339035034, |
| "learning_rate": 8e-05, |
| "loss": 1.7135, |
| "step": 2987 |
| }, |
| { |
| "epoch": 0.6572088419663478, |
| "grad_norm": 0.3115323781967163, |
| "learning_rate": 8e-05, |
| "loss": 1.7396, |
| "step": 2988 |
| }, |
| { |
| "epoch": 0.657428791377983, |
| "grad_norm": 0.2827862501144409, |
| "learning_rate": 8e-05, |
| "loss": 1.6889, |
| "step": 2989 |
| }, |
| { |
| "epoch": 0.6576487407896184, |
| "grad_norm": 0.29057440161705017, |
| "learning_rate": 8e-05, |
| "loss": 1.7208, |
| "step": 2990 |
| }, |
| { |
| "epoch": 0.6578686902012537, |
| "grad_norm": 0.2977316081523895, |
| "learning_rate": 8e-05, |
| "loss": 1.7192, |
| "step": 2991 |
| }, |
| { |
| "epoch": 0.658088639612889, |
| "grad_norm": 0.296475887298584, |
| "learning_rate": 8e-05, |
| "loss": 1.6815, |
| "step": 2992 |
| }, |
| { |
| "epoch": 0.6583085890245244, |
| "grad_norm": 0.29579752683639526, |
| "learning_rate": 8e-05, |
| "loss": 1.781, |
| "step": 2993 |
| }, |
| { |
| "epoch": 0.6585285384361597, |
| "grad_norm": 0.2853552997112274, |
| "learning_rate": 8e-05, |
| "loss": 1.69, |
| "step": 2994 |
| }, |
| { |
| "epoch": 0.658748487847795, |
| "grad_norm": 0.2831558883190155, |
| "learning_rate": 8e-05, |
| "loss": 1.7118, |
| "step": 2995 |
| }, |
| { |
| "epoch": 0.6589684372594303, |
| "grad_norm": 0.30975469946861267, |
| "learning_rate": 8e-05, |
| "loss": 1.7153, |
| "step": 2996 |
| }, |
| { |
| "epoch": 0.6591883866710656, |
| "grad_norm": 0.287047803401947, |
| "learning_rate": 8e-05, |
| "loss": 1.7241, |
| "step": 2997 |
| }, |
| { |
| "epoch": 0.659408336082701, |
| "grad_norm": 0.2812976837158203, |
| "learning_rate": 8e-05, |
| "loss": 1.6536, |
| "step": 2998 |
| }, |
| { |
| "epoch": 0.6596282854943363, |
| "grad_norm": 0.2794138491153717, |
| "learning_rate": 8e-05, |
| "loss": 1.6176, |
| "step": 2999 |
| }, |
| { |
| "epoch": 0.6598482349059717, |
| "grad_norm": 0.2949649691581726, |
| "learning_rate": 8e-05, |
| "loss": 1.6499, |
| "step": 3000 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 4546, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 5.65692592029696e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|