| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.16478536705940514, |
| "eval_steps": 500, |
| "global_step": 2000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 8.239268352970256e-05, |
| "grad_norm": 370.92846474567483, |
| "learning_rate": 0.0, |
| "loss": 1.9502, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.00016478536705940512, |
| "grad_norm": 478.9745480395951, |
| "learning_rate": 1.3717421124828532e-08, |
| "loss": 1.7099, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0002471780505891077, |
| "grad_norm": 816.010339911805, |
| "learning_rate": 2.7434842249657065e-08, |
| "loss": 1.9407, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.00032957073411881023, |
| "grad_norm": 472.8469994501748, |
| "learning_rate": 4.1152263374485605e-08, |
| "loss": 2.0365, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.00041196341764851283, |
| "grad_norm": 578.3746838755396, |
| "learning_rate": 5.486968449931413e-08, |
| "loss": 1.9464, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.0004943561011782154, |
| "grad_norm": 404.71316071877214, |
| "learning_rate": 6.858710562414266e-08, |
| "loss": 1.6153, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.000576748784707918, |
| "grad_norm": 336.6982918558558, |
| "learning_rate": 8.230452674897121e-08, |
| "loss": 1.5113, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.0006591414682376205, |
| "grad_norm": 341.52944273145397, |
| "learning_rate": 9.602194787379974e-08, |
| "loss": 1.8882, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.0007415341517673231, |
| "grad_norm": 391.4084771497659, |
| "learning_rate": 1.0973936899862826e-07, |
| "loss": 2.0696, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.0008239268352970257, |
| "grad_norm": 422.9036219570121, |
| "learning_rate": 1.234567901234568e-07, |
| "loss": 2.0605, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.0009063195188267282, |
| "grad_norm": 797.3739771528897, |
| "learning_rate": 1.3717421124828532e-07, |
| "loss": 2.4847, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.0009887122023564308, |
| "grad_norm": 375.3289213197193, |
| "learning_rate": 1.5089163237311387e-07, |
| "loss": 1.9973, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.0010711048858861334, |
| "grad_norm": 392.38417270652195, |
| "learning_rate": 1.6460905349794242e-07, |
| "loss": 1.8486, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.001153497569415836, |
| "grad_norm": 299.91546496064956, |
| "learning_rate": 1.7832647462277092e-07, |
| "loss": 2.0903, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.0012358902529455383, |
| "grad_norm": 323.60559376717487, |
| "learning_rate": 1.9204389574759947e-07, |
| "loss": 2.199, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.001318282936475241, |
| "grad_norm": 448.30929677447216, |
| "learning_rate": 2.05761316872428e-07, |
| "loss": 1.9866, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.0014006756200049435, |
| "grad_norm": 496.01105968376754, |
| "learning_rate": 2.1947873799725652e-07, |
| "loss": 1.7145, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.0014830683035346461, |
| "grad_norm": 254.87258601806334, |
| "learning_rate": 2.3319615912208507e-07, |
| "loss": 1.4035, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.0015654609870643487, |
| "grad_norm": 196.08575158272188, |
| "learning_rate": 2.469135802469136e-07, |
| "loss": 1.5703, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.0016478536705940513, |
| "grad_norm": 181.93540410857392, |
| "learning_rate": 2.606310013717421e-07, |
| "loss": 1.2346, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.0017302463541237537, |
| "grad_norm": 292.3097054026038, |
| "learning_rate": 2.7434842249657064e-07, |
| "loss": 1.6449, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.0018126390376534563, |
| "grad_norm": 189.7490456066442, |
| "learning_rate": 2.880658436213992e-07, |
| "loss": 1.3064, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.001895031721183159, |
| "grad_norm": 154.94758342589185, |
| "learning_rate": 3.0178326474622774e-07, |
| "loss": 0.8725, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.0019774244047128615, |
| "grad_norm": 175.30016139724518, |
| "learning_rate": 3.1550068587105627e-07, |
| "loss": 1.0209, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.002059817088242564, |
| "grad_norm": 138.98861988367486, |
| "learning_rate": 3.2921810699588484e-07, |
| "loss": 0.819, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.0021422097717722667, |
| "grad_norm": 171.4439871730333, |
| "learning_rate": 3.4293552812071337e-07, |
| "loss": 0.9231, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.0022246024553019693, |
| "grad_norm": 198.66126017668074, |
| "learning_rate": 3.5665294924554184e-07, |
| "loss": 1.1098, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.002306995138831672, |
| "grad_norm": 128.99561291431132, |
| "learning_rate": 3.7037037037037036e-07, |
| "loss": 0.6259, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.0023893878223613745, |
| "grad_norm": 139.7562027108659, |
| "learning_rate": 3.8408779149519894e-07, |
| "loss": 0.9026, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.0024717805058910767, |
| "grad_norm": 8493.779228229192, |
| "learning_rate": 3.9780521262002746e-07, |
| "loss": 7.6096, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.0025541731894207793, |
| "grad_norm": 96.52109090080035, |
| "learning_rate": 4.11522633744856e-07, |
| "loss": 0.9848, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.002636565872950482, |
| "grad_norm": 46.277401372799, |
| "learning_rate": 4.252400548696845e-07, |
| "loss": 0.7812, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.0027189585564801845, |
| "grad_norm": 90.33996022715957, |
| "learning_rate": 4.3895747599451304e-07, |
| "loss": 0.9017, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.002801351240009887, |
| "grad_norm": 34.651039695496756, |
| "learning_rate": 4.526748971193416e-07, |
| "loss": 0.5284, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.0028837439235395897, |
| "grad_norm": 57.438589709256526, |
| "learning_rate": 4.6639231824417014e-07, |
| "loss": 0.6959, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.0029661366070692923, |
| "grad_norm": 88.03798435445572, |
| "learning_rate": 4.801097393689986e-07, |
| "loss": 0.6558, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.003048529290598995, |
| "grad_norm": 168.93972734994549, |
| "learning_rate": 4.938271604938272e-07, |
| "loss": 1.0815, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.0031309219741286975, |
| "grad_norm": 46.38654936308973, |
| "learning_rate": 5.075445816186558e-07, |
| "loss": 0.6026, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.0032133146576584, |
| "grad_norm": 44.94814475982433, |
| "learning_rate": 5.212620027434842e-07, |
| "loss": 0.7385, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.0032957073411881027, |
| "grad_norm": 165.7034467276052, |
| "learning_rate": 5.349794238683128e-07, |
| "loss": 0.6596, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.0033781000247178053, |
| "grad_norm": 97.98006118196572, |
| "learning_rate": 5.486968449931413e-07, |
| "loss": 1.02, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.0034604927082475074, |
| "grad_norm": 69.93402365846087, |
| "learning_rate": 5.624142661179699e-07, |
| "loss": 0.674, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.00354288539177721, |
| "grad_norm": 1163.1480162660114, |
| "learning_rate": 5.761316872427984e-07, |
| "loss": 3.2917, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.0036252780753069126, |
| "grad_norm": 74.37426736775474, |
| "learning_rate": 5.898491083676269e-07, |
| "loss": 0.7122, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.0037076707588366152, |
| "grad_norm": 91.55829350532069, |
| "learning_rate": 6.035665294924555e-07, |
| "loss": 0.8284, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.003790063442366318, |
| "grad_norm": 46.12057854425198, |
| "learning_rate": 6.17283950617284e-07, |
| "loss": 0.8129, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.0038724561258960204, |
| "grad_norm": 65.2084871860374, |
| "learning_rate": 6.310013717421125e-07, |
| "loss": 0.7338, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.003954848809425723, |
| "grad_norm": 76.89157130197114, |
| "learning_rate": 6.44718792866941e-07, |
| "loss": 0.3108, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.004037241492955425, |
| "grad_norm": 73.53979766200762, |
| "learning_rate": 6.584362139917697e-07, |
| "loss": 0.508, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.004119634176485128, |
| "grad_norm": 94.68664560631554, |
| "learning_rate": 6.721536351165982e-07, |
| "loss": 0.9084, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.00420202686001483, |
| "grad_norm": 146.08418676025838, |
| "learning_rate": 6.858710562414267e-07, |
| "loss": 0.8076, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.004284419543544533, |
| "grad_norm": 98.79171062611543, |
| "learning_rate": 6.995884773662552e-07, |
| "loss": 0.5388, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.004366812227074236, |
| "grad_norm": 63.9354584989466, |
| "learning_rate": 7.133058984910837e-07, |
| "loss": 0.6614, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.004449204910603939, |
| "grad_norm": 28.560826747239517, |
| "learning_rate": 7.270233196159123e-07, |
| "loss": 0.221, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.004531597594133641, |
| "grad_norm": 119.28245305633594, |
| "learning_rate": 7.407407407407407e-07, |
| "loss": 0.7162, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.004613990277663344, |
| "grad_norm": 106.52974721356492, |
| "learning_rate": 7.544581618655693e-07, |
| "loss": 0.7543, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.004696382961193046, |
| "grad_norm": 216.2323272199254, |
| "learning_rate": 7.681755829903979e-07, |
| "loss": 0.612, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.004778775644722749, |
| "grad_norm": 81.76961781882962, |
| "learning_rate": 7.818930041152265e-07, |
| "loss": 0.9122, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.004861168328252451, |
| "grad_norm": 81.51218859422868, |
| "learning_rate": 7.956104252400549e-07, |
| "loss": 0.4165, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.004943561011782153, |
| "grad_norm": 45.85355666847451, |
| "learning_rate": 8.093278463648835e-07, |
| "loss": 0.3937, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.005025953695311856, |
| "grad_norm": 113.09646005752293, |
| "learning_rate": 8.23045267489712e-07, |
| "loss": 0.7733, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.0051083463788415585, |
| "grad_norm": 42.137339872436065, |
| "learning_rate": 8.367626886145406e-07, |
| "loss": 0.5397, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.005190739062371262, |
| "grad_norm": 99.72516559451445, |
| "learning_rate": 8.50480109739369e-07, |
| "loss": 0.7653, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.005273131745900964, |
| "grad_norm": 37.868334658532945, |
| "learning_rate": 8.641975308641976e-07, |
| "loss": 0.4084, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.005355524429430667, |
| "grad_norm": 253.35802935100432, |
| "learning_rate": 8.779149519890261e-07, |
| "loss": 0.6051, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.005437917112960369, |
| "grad_norm": 29.821485219757093, |
| "learning_rate": 8.916323731138548e-07, |
| "loss": 0.4946, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.005520309796490072, |
| "grad_norm": 26.807855345732474, |
| "learning_rate": 9.053497942386832e-07, |
| "loss": 0.3619, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.005602702480019774, |
| "grad_norm": 45.035127254975706, |
| "learning_rate": 9.190672153635118e-07, |
| "loss": 0.5831, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.005685095163549477, |
| "grad_norm": 44.226686731133306, |
| "learning_rate": 9.327846364883403e-07, |
| "loss": 0.6431, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.005767487847079179, |
| "grad_norm": 60.33898555308833, |
| "learning_rate": 9.465020576131687e-07, |
| "loss": 0.4899, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.005849880530608882, |
| "grad_norm": 35.25027498428163, |
| "learning_rate": 9.602194787379972e-07, |
| "loss": 0.3887, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.0059322732141385845, |
| "grad_norm": 73.25174042894214, |
| "learning_rate": 9.73936899862826e-07, |
| "loss": 0.6252, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.006014665897668287, |
| "grad_norm": 52.590662239348354, |
| "learning_rate": 9.876543209876544e-07, |
| "loss": 0.3872, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.00609705858119799, |
| "grad_norm": 29.726473681600194, |
| "learning_rate": 1.001371742112483e-06, |
| "loss": 0.4349, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.006179451264727692, |
| "grad_norm": 26.072142658169017, |
| "learning_rate": 1.0150891632373115e-06, |
| "loss": 0.6492, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.006261843948257395, |
| "grad_norm": 38.68492891617437, |
| "learning_rate": 1.02880658436214e-06, |
| "loss": 0.8022, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.006344236631787097, |
| "grad_norm": 120.40708886210712, |
| "learning_rate": 1.0425240054869685e-06, |
| "loss": 0.5611, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.0064266293153168, |
| "grad_norm": 20.221000748278993, |
| "learning_rate": 1.0562414266117972e-06, |
| "loss": 0.2969, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.006509021998846502, |
| "grad_norm": 137.6805973085389, |
| "learning_rate": 1.0699588477366256e-06, |
| "loss": 0.648, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.006591414682376205, |
| "grad_norm": 40.1096379523084, |
| "learning_rate": 1.083676268861454e-06, |
| "loss": 0.8103, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.0066738073659059075, |
| "grad_norm": 32.096473511201374, |
| "learning_rate": 1.0973936899862826e-06, |
| "loss": 0.6186, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.0067562000494356105, |
| "grad_norm": 24.075343816904766, |
| "learning_rate": 1.111111111111111e-06, |
| "loss": 0.3877, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.006838592732965313, |
| "grad_norm": 48.42109801664082, |
| "learning_rate": 1.1248285322359397e-06, |
| "loss": 0.447, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.006920985416495015, |
| "grad_norm": 42.65831233770232, |
| "learning_rate": 1.1385459533607684e-06, |
| "loss": 0.7162, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.007003378100024718, |
| "grad_norm": 71.20273416415172, |
| "learning_rate": 1.1522633744855969e-06, |
| "loss": 0.6573, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.00708577078355442, |
| "grad_norm": 70.73981135151499, |
| "learning_rate": 1.1659807956104253e-06, |
| "loss": 0.3774, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.007168163467084123, |
| "grad_norm": 17.02358862648308, |
| "learning_rate": 1.1796982167352538e-06, |
| "loss": 0.4372, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.007250556150613825, |
| "grad_norm": 38.56110621340388, |
| "learning_rate": 1.1934156378600823e-06, |
| "loss": 0.5007, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.007332948834143528, |
| "grad_norm": 21.689880371993823, |
| "learning_rate": 1.207133058984911e-06, |
| "loss": 0.4363, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.0074153415176732304, |
| "grad_norm": 53.876169409804625, |
| "learning_rate": 1.2208504801097394e-06, |
| "loss": 0.4091, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.0074977342012029335, |
| "grad_norm": 48.147837297588566, |
| "learning_rate": 1.234567901234568e-06, |
| "loss": 0.6979, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.007580126884732636, |
| "grad_norm": 20.467449188390766, |
| "learning_rate": 1.2482853223593966e-06, |
| "loss": 0.4081, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.007662519568262339, |
| "grad_norm": 23.825819702066855, |
| "learning_rate": 1.262002743484225e-06, |
| "loss": 0.5095, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.007744912251792041, |
| "grad_norm": 49.54875914048349, |
| "learning_rate": 1.2757201646090535e-06, |
| "loss": 0.8153, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.007827304935321744, |
| "grad_norm": 36.71859670716872, |
| "learning_rate": 1.289437585733882e-06, |
| "loss": 0.4975, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.007909697618851446, |
| "grad_norm": 52.89761869922755, |
| "learning_rate": 1.3031550068587107e-06, |
| "loss": 0.6777, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.007992090302381148, |
| "grad_norm": 262.046184232095, |
| "learning_rate": 1.3168724279835394e-06, |
| "loss": 0.5125, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.00807448298591085, |
| "grad_norm": 23.8518705316023, |
| "learning_rate": 1.3305898491083676e-06, |
| "loss": 0.5802, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.008156875669440554, |
| "grad_norm": 24.43774608417277, |
| "learning_rate": 1.3443072702331963e-06, |
| "loss": 0.4466, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.008239268352970256, |
| "grad_norm": 27.243336976835526, |
| "learning_rate": 1.3580246913580248e-06, |
| "loss": 0.606, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.008321661036499959, |
| "grad_norm": 18.838152665368614, |
| "learning_rate": 1.3717421124828535e-06, |
| "loss": 0.4605, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.00840405372002966, |
| "grad_norm": 26.949888572345216, |
| "learning_rate": 1.3854595336076817e-06, |
| "loss": 0.4532, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.008486446403559365, |
| "grad_norm": 21.572594872057856, |
| "learning_rate": 1.3991769547325104e-06, |
| "loss": 0.4991, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.008568839087089067, |
| "grad_norm": 28.33027763947139, |
| "learning_rate": 1.412894375857339e-06, |
| "loss": 0.5669, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.008651231770618769, |
| "grad_norm": 31.09867407487906, |
| "learning_rate": 1.4266117969821674e-06, |
| "loss": 0.5158, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.008733624454148471, |
| "grad_norm": 47.502117851757255, |
| "learning_rate": 1.440329218106996e-06, |
| "loss": 0.6078, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.008816017137678173, |
| "grad_norm": 30.0902294117928, |
| "learning_rate": 1.4540466392318245e-06, |
| "loss": 0.6867, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.008898409821207877, |
| "grad_norm": 16.049485540251304, |
| "learning_rate": 1.4677640603566532e-06, |
| "loss": 0.5267, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.00898080250473758, |
| "grad_norm": 30.186256751846674, |
| "learning_rate": 1.4814814814814815e-06, |
| "loss": 0.6437, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.009063195188267282, |
| "grad_norm": 23.921754142654017, |
| "learning_rate": 1.4951989026063101e-06, |
| "loss": 0.7187, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.009145587871796984, |
| "grad_norm": 40.13689702977842, |
| "learning_rate": 1.5089163237311386e-06, |
| "loss": 0.4591, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.009227980555326688, |
| "grad_norm": 21.792212279571824, |
| "learning_rate": 1.5226337448559673e-06, |
| "loss": 0.4377, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.00931037323885639, |
| "grad_norm": 12.609083806149128, |
| "learning_rate": 1.5363511659807958e-06, |
| "loss": 0.4975, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.009392765922386092, |
| "grad_norm": 19.801853097766696, |
| "learning_rate": 1.5500685871056242e-06, |
| "loss": 0.4519, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.009475158605915794, |
| "grad_norm": 44.527628785852514, |
| "learning_rate": 1.563786008230453e-06, |
| "loss": 0.5393, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.009557551289445498, |
| "grad_norm": 17.968320630306675, |
| "learning_rate": 1.5775034293552812e-06, |
| "loss": 0.6014, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.0096399439729752, |
| "grad_norm": 23.423995548663576, |
| "learning_rate": 1.5912208504801099e-06, |
| "loss": 0.4331, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.009722336656504902, |
| "grad_norm": 18.98686296805731, |
| "learning_rate": 1.6049382716049383e-06, |
| "loss": 0.5621, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.009804729340034605, |
| "grad_norm": 13.635326129289362, |
| "learning_rate": 1.618655692729767e-06, |
| "loss": 0.2893, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.009887122023564307, |
| "grad_norm": 29.502202435441244, |
| "learning_rate": 1.6323731138545953e-06, |
| "loss": 0.5988, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.00996951470709401, |
| "grad_norm": 26.759044629536252, |
| "learning_rate": 1.646090534979424e-06, |
| "loss": 0.6966, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.010051907390623713, |
| "grad_norm": 16.944673727591262, |
| "learning_rate": 1.6598079561042526e-06, |
| "loss": 0.6288, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.010134300074153415, |
| "grad_norm": 22.18252955446083, |
| "learning_rate": 1.6735253772290811e-06, |
| "loss": 0.6527, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.010216692757683117, |
| "grad_norm": 14.663608441939818, |
| "learning_rate": 1.6872427983539098e-06, |
| "loss": 0.4992, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.010299085441212821, |
| "grad_norm": 27.846664256554586, |
| "learning_rate": 1.700960219478738e-06, |
| "loss": 0.6578, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.010381478124742523, |
| "grad_norm": 48.120411539456136, |
| "learning_rate": 1.7146776406035667e-06, |
| "loss": 0.7731, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.010463870808272225, |
| "grad_norm": 29.505384191045792, |
| "learning_rate": 1.7283950617283952e-06, |
| "loss": 0.4631, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.010546263491801927, |
| "grad_norm": 27.267562026668486, |
| "learning_rate": 1.7421124828532237e-06, |
| "loss": 0.7196, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.010628656175331631, |
| "grad_norm": 16.00289092345597, |
| "learning_rate": 1.7558299039780521e-06, |
| "loss": 0.5238, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.010711048858861334, |
| "grad_norm": 20.034041777867913, |
| "learning_rate": 1.7695473251028808e-06, |
| "loss": 0.48, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.010793441542391036, |
| "grad_norm": 16.125317675567455, |
| "learning_rate": 1.7832647462277095e-06, |
| "loss": 0.6135, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.010875834225920738, |
| "grad_norm": 198.72635885269693, |
| "learning_rate": 1.7969821673525378e-06, |
| "loss": 1.8359, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.01095822690945044, |
| "grad_norm": 11.49829810544229, |
| "learning_rate": 1.8106995884773665e-06, |
| "loss": 0.4496, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.011040619592980144, |
| "grad_norm": 54.65603884199396, |
| "learning_rate": 1.824417009602195e-06, |
| "loss": 0.6657, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.011123012276509846, |
| "grad_norm": 23.069821716903398, |
| "learning_rate": 1.8381344307270236e-06, |
| "loss": 0.5426, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.011205404960039548, |
| "grad_norm": 13.204812144916009, |
| "learning_rate": 1.8518518518518519e-06, |
| "loss": 0.612, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.01128779764356925, |
| "grad_norm": 13.956836795334933, |
| "learning_rate": 1.8655692729766806e-06, |
| "loss": 0.532, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.011370190327098954, |
| "grad_norm": 42.68872796386726, |
| "learning_rate": 1.879286694101509e-06, |
| "loss": 0.7006, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.011452583010628656, |
| "grad_norm": 16.612308273214413, |
| "learning_rate": 1.8930041152263375e-06, |
| "loss": 0.5123, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.011534975694158359, |
| "grad_norm": 18.144654907032912, |
| "learning_rate": 1.9067215363511662e-06, |
| "loss": 0.3141, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.01161736837768806, |
| "grad_norm": 16.814344046499077, |
| "learning_rate": 1.9204389574759944e-06, |
| "loss": 0.6542, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.011699761061217765, |
| "grad_norm": 21.160095993478766, |
| "learning_rate": 1.9341563786008233e-06, |
| "loss": 0.5819, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.011782153744747467, |
| "grad_norm": 21.297656919271585, |
| "learning_rate": 1.947873799725652e-06, |
| "loss": 0.5107, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.011864546428277169, |
| "grad_norm": 15.566851005374614, |
| "learning_rate": 1.9615912208504803e-06, |
| "loss": 0.6187, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.011946939111806871, |
| "grad_norm": 16.02129799647006, |
| "learning_rate": 1.9753086419753087e-06, |
| "loss": 0.4715, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.012029331795336573, |
| "grad_norm": 11.717994264174337, |
| "learning_rate": 1.9890260631001372e-06, |
| "loss": 0.4021, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.012111724478866277, |
| "grad_norm": 21.22813881358679, |
| "learning_rate": 2.002743484224966e-06, |
| "loss": 0.2599, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.01219411716239598, |
| "grad_norm": 15.200100122381537, |
| "learning_rate": 2.0164609053497946e-06, |
| "loss": 0.45, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.012276509845925682, |
| "grad_norm": 24.89750355075059, |
| "learning_rate": 2.030178326474623e-06, |
| "loss": 0.772, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.012358902529455384, |
| "grad_norm": 16.304820858469412, |
| "learning_rate": 2.0438957475994515e-06, |
| "loss": 0.446, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.012441295212985088, |
| "grad_norm": 20.608928374910505, |
| "learning_rate": 2.05761316872428e-06, |
| "loss": 0.5507, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.01252368789651479, |
| "grad_norm": 10.483108607114513, |
| "learning_rate": 2.0713305898491085e-06, |
| "loss": 0.4834, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.012606080580044492, |
| "grad_norm": 12.697803561984879, |
| "learning_rate": 2.085048010973937e-06, |
| "loss": 0.5253, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.012688473263574194, |
| "grad_norm": 24.461540625272452, |
| "learning_rate": 2.0987654320987654e-06, |
| "loss": 0.6982, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.012770865947103896, |
| "grad_norm": 17.323695037238057, |
| "learning_rate": 2.1124828532235943e-06, |
| "loss": 0.6608, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.0128532586306336, |
| "grad_norm": 19.1728467069908, |
| "learning_rate": 2.1262002743484228e-06, |
| "loss": 0.6158, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.012935651314163302, |
| "grad_norm": 14.335840726971144, |
| "learning_rate": 2.1399176954732512e-06, |
| "loss": 0.6844, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.013018043997693005, |
| "grad_norm": 20.095242492343232, |
| "learning_rate": 2.1536351165980797e-06, |
| "loss": 0.497, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.013100436681222707, |
| "grad_norm": 10.114501664370549, |
| "learning_rate": 2.167352537722908e-06, |
| "loss": 0.5262, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.01318282936475241, |
| "grad_norm": 13.305214604549445, |
| "learning_rate": 2.1810699588477367e-06, |
| "loss": 0.5619, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.013265222048282113, |
| "grad_norm": 19.721782800895156, |
| "learning_rate": 2.194787379972565e-06, |
| "loss": 0.5357, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.013347614731811815, |
| "grad_norm": 19.7228102937409, |
| "learning_rate": 2.208504801097394e-06, |
| "loss": 0.4225, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.013430007415341517, |
| "grad_norm": 240.83778830697852, |
| "learning_rate": 2.222222222222222e-06, |
| "loss": 2.4384, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.013512400098871221, |
| "grad_norm": 11.380285250812992, |
| "learning_rate": 2.235939643347051e-06, |
| "loss": 0.6533, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.013594792782400923, |
| "grad_norm": 9.94152540099469, |
| "learning_rate": 2.2496570644718794e-06, |
| "loss": 0.5497, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.013677185465930625, |
| "grad_norm": 12.090836450223387, |
| "learning_rate": 2.263374485596708e-06, |
| "loss": 0.4756, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.013759578149460328, |
| "grad_norm": 14.813308219199923, |
| "learning_rate": 2.277091906721537e-06, |
| "loss": 0.5355, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.01384197083299003, |
| "grad_norm": 13.192872206591804, |
| "learning_rate": 2.290809327846365e-06, |
| "loss": 0.5875, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.013924363516519734, |
| "grad_norm": 16.210695640291387, |
| "learning_rate": 2.3045267489711937e-06, |
| "loss": 0.5253, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.014006756200049436, |
| "grad_norm": 12.039792190252744, |
| "learning_rate": 2.3182441700960222e-06, |
| "loss": 0.4517, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.014089148883579138, |
| "grad_norm": 23.04062666474093, |
| "learning_rate": 2.3319615912208507e-06, |
| "loss": 0.4083, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.01417154156710884, |
| "grad_norm": 19.979153089914988, |
| "learning_rate": 2.345679012345679e-06, |
| "loss": 0.6887, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.014253934250638544, |
| "grad_norm": 21.895537557735427, |
| "learning_rate": 2.3593964334705076e-06, |
| "loss": 0.8094, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.014336326934168246, |
| "grad_norm": 31.47401830070671, |
| "learning_rate": 2.3731138545953365e-06, |
| "loss": 0.7431, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.014418719617697948, |
| "grad_norm": 12.750465460746202, |
| "learning_rate": 2.3868312757201646e-06, |
| "loss": 0.6583, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.01450111230122765, |
| "grad_norm": 13.307184351874149, |
| "learning_rate": 2.4005486968449935e-06, |
| "loss": 0.6077, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.014583504984757354, |
| "grad_norm": 10.435374769314452, |
| "learning_rate": 2.414266117969822e-06, |
| "loss": 0.5739, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.014665897668287057, |
| "grad_norm": 15.566819292000186, |
| "learning_rate": 2.4279835390946504e-06, |
| "loss": 0.644, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.014748290351816759, |
| "grad_norm": 12.814513858300232, |
| "learning_rate": 2.441700960219479e-06, |
| "loss": 0.5858, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.014830683035346461, |
| "grad_norm": 12.12622273494356, |
| "learning_rate": 2.4554183813443074e-06, |
| "loss": 0.5202, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.014913075718876163, |
| "grad_norm": 16.96998648395035, |
| "learning_rate": 2.469135802469136e-06, |
| "loss": 0.3457, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.014995468402405867, |
| "grad_norm": 13.91986946254961, |
| "learning_rate": 2.4828532235939647e-06, |
| "loss": 0.5681, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.015077861085935569, |
| "grad_norm": 12.486810040618805, |
| "learning_rate": 2.496570644718793e-06, |
| "loss": 0.5125, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.015160253769465271, |
| "grad_norm": 10.303008103171251, |
| "learning_rate": 2.5102880658436217e-06, |
| "loss": 0.6385, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.015242646452994973, |
| "grad_norm": 13.183010022460554, |
| "learning_rate": 2.52400548696845e-06, |
| "loss": 0.3552, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.015325039136524677, |
| "grad_norm": 10.107898578134508, |
| "learning_rate": 2.5377229080932786e-06, |
| "loss": 0.4341, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.01540743182005438, |
| "grad_norm": 8.570843302612268, |
| "learning_rate": 2.551440329218107e-06, |
| "loss": 0.4343, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.015489824503584082, |
| "grad_norm": 17.3196847847868, |
| "learning_rate": 2.565157750342936e-06, |
| "loss": 0.6971, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.015572217187113784, |
| "grad_norm": 11.86766768913693, |
| "learning_rate": 2.578875171467764e-06, |
| "loss": 0.5436, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.015654609870643488, |
| "grad_norm": 10.49550664029216, |
| "learning_rate": 2.5925925925925925e-06, |
| "loss": 0.342, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.01573700255417319, |
| "grad_norm": 9.038437970250417, |
| "learning_rate": 2.6063100137174214e-06, |
| "loss": 0.3151, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.015819395237702892, |
| "grad_norm": 15.678199292955869, |
| "learning_rate": 2.62002743484225e-06, |
| "loss": 0.7268, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.015901787921232594, |
| "grad_norm": 13.800404804526247, |
| "learning_rate": 2.6337448559670788e-06, |
| "loss": 0.5118, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.015984180604762296, |
| "grad_norm": 74.10559559217063, |
| "learning_rate": 2.647462277091907e-06, |
| "loss": 0.7444, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.016066573288292, |
| "grad_norm": 12.20315952893777, |
| "learning_rate": 2.6611796982167353e-06, |
| "loss": 0.4277, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.0161489659718217, |
| "grad_norm": 10.05719320789487, |
| "learning_rate": 2.674897119341564e-06, |
| "loss": 0.4664, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.016231358655351406, |
| "grad_norm": 42.082856786319546, |
| "learning_rate": 2.6886145404663926e-06, |
| "loss": 0.3969, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.01631375133888111, |
| "grad_norm": 15.787631693690875, |
| "learning_rate": 2.7023319615912207e-06, |
| "loss": 0.7307, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.01639614402241081, |
| "grad_norm": 8.901740684680457, |
| "learning_rate": 2.7160493827160496e-06, |
| "loss": 0.5109, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.016478536705940513, |
| "grad_norm": 28.934834071007202, |
| "learning_rate": 2.729766803840878e-06, |
| "loss": 0.4942, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.016560929389470215, |
| "grad_norm": 18.793354020178867, |
| "learning_rate": 2.743484224965707e-06, |
| "loss": 0.5592, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.016643322072999917, |
| "grad_norm": 13.60338783501572, |
| "learning_rate": 2.7572016460905354e-06, |
| "loss": 0.6025, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.01672571475652962, |
| "grad_norm": 8.038968073425716, |
| "learning_rate": 2.7709190672153635e-06, |
| "loss": 0.5211, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.01680810744005932, |
| "grad_norm": 11.559001618222288, |
| "learning_rate": 2.7846364883401924e-06, |
| "loss": 0.5185, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.016890500123589024, |
| "grad_norm": 10.70606495183075, |
| "learning_rate": 2.798353909465021e-06, |
| "loss": 0.5378, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.01697289280711873, |
| "grad_norm": 15.724659491801045, |
| "learning_rate": 2.8120713305898493e-06, |
| "loss": 0.3996, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.01705528549064843, |
| "grad_norm": 15.632077558092512, |
| "learning_rate": 2.825788751714678e-06, |
| "loss": 0.5294, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.017137678174178134, |
| "grad_norm": 15.35567010238041, |
| "learning_rate": 2.8395061728395062e-06, |
| "loss": 0.5789, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.017220070857707836, |
| "grad_norm": 12.247079248152177, |
| "learning_rate": 2.8532235939643347e-06, |
| "loss": 0.4783, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.017302463541237538, |
| "grad_norm": 13.787412538148317, |
| "learning_rate": 2.8669410150891636e-06, |
| "loss": 0.6358, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.01738485622476724, |
| "grad_norm": 10.388866874954653, |
| "learning_rate": 2.880658436213992e-06, |
| "loss": 0.4617, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.017467248908296942, |
| "grad_norm": 10.149440548768066, |
| "learning_rate": 2.89437585733882e-06, |
| "loss": 0.372, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.017549641591826644, |
| "grad_norm": 12.782054026030952, |
| "learning_rate": 2.908093278463649e-06, |
| "loss": 0.5502, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.017632034275356347, |
| "grad_norm": 8.980692409189274, |
| "learning_rate": 2.9218106995884775e-06, |
| "loss": 0.5311, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.017714426958886052, |
| "grad_norm": 12.126638458623237, |
| "learning_rate": 2.9355281207133064e-06, |
| "loss": 0.6599, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.017796819642415754, |
| "grad_norm": 10.503433095750024, |
| "learning_rate": 2.949245541838135e-06, |
| "loss": 0.4327, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.017879212325945457, |
| "grad_norm": 12.219841823090144, |
| "learning_rate": 2.962962962962963e-06, |
| "loss": 0.7044, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.01796160500947516, |
| "grad_norm": 18.87320464359166, |
| "learning_rate": 2.976680384087792e-06, |
| "loss": 0.7467, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.01804399769300486, |
| "grad_norm": 289.359254982659, |
| "learning_rate": 2.9903978052126203e-06, |
| "loss": 3.0217, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.018126390376534563, |
| "grad_norm": 7.733672679042532, |
| "learning_rate": 3.004115226337449e-06, |
| "loss": 0.4293, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.018208783060064265, |
| "grad_norm": 16.61269730251294, |
| "learning_rate": 3.0178326474622772e-06, |
| "loss": 0.6614, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.018291175743593967, |
| "grad_norm": 8.31112554516155, |
| "learning_rate": 3.0315500685871057e-06, |
| "loss": 0.5271, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.018373568427123673, |
| "grad_norm": 13.664445288630535, |
| "learning_rate": 3.0452674897119346e-06, |
| "loss": 0.7018, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.018455961110653375, |
| "grad_norm": 10.005927544238816, |
| "learning_rate": 3.058984910836763e-06, |
| "loss": 0.5524, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.018538353794183077, |
| "grad_norm": 15.446861208215383, |
| "learning_rate": 3.0727023319615915e-06, |
| "loss": 0.6341, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.01862074647771278, |
| "grad_norm": 16.079846485759564, |
| "learning_rate": 3.08641975308642e-06, |
| "loss": 0.8794, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.018703139161242482, |
| "grad_norm": 10.175892407022696, |
| "learning_rate": 3.1001371742112485e-06, |
| "loss": 0.6755, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.018785531844772184, |
| "grad_norm": 56.127455072454026, |
| "learning_rate": 3.113854595336077e-06, |
| "loss": 0.3321, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.018867924528301886, |
| "grad_norm": 12.930244631445957, |
| "learning_rate": 3.127572016460906e-06, |
| "loss": 0.4804, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.018950317211831588, |
| "grad_norm": 12.529583269551953, |
| "learning_rate": 3.141289437585734e-06, |
| "loss": 0.5312, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.01903270989536129, |
| "grad_norm": 9.78335819090374, |
| "learning_rate": 3.1550068587105624e-06, |
| "loss": 0.6044, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.019115102578890996, |
| "grad_norm": 9.981952585751747, |
| "learning_rate": 3.1687242798353912e-06, |
| "loss": 0.7049, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.019197495262420698, |
| "grad_norm": 17.065859536580135, |
| "learning_rate": 3.1824417009602197e-06, |
| "loss": 0.6509, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.0192798879459504, |
| "grad_norm": 10.93465261939953, |
| "learning_rate": 3.1961591220850486e-06, |
| "loss": 0.7744, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.019362280629480103, |
| "grad_norm": 9.705094089624701, |
| "learning_rate": 3.2098765432098767e-06, |
| "loss": 0.6289, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.019444673313009805, |
| "grad_norm": 11.03377155515954, |
| "learning_rate": 3.223593964334705e-06, |
| "loss": 0.4788, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.019527065996539507, |
| "grad_norm": 9.129123781076657, |
| "learning_rate": 3.237311385459534e-06, |
| "loss": 0.5489, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.01960945868006921, |
| "grad_norm": 8.697937237915472, |
| "learning_rate": 3.2510288065843625e-06, |
| "loss": 0.5861, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.01969185136359891, |
| "grad_norm": 8.870677568511018, |
| "learning_rate": 3.2647462277091905e-06, |
| "loss": 0.5946, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.019774244047128613, |
| "grad_norm": 23.189212971761012, |
| "learning_rate": 3.2784636488340194e-06, |
| "loss": 0.4848, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.01985663673065832, |
| "grad_norm": 8.803471237780029, |
| "learning_rate": 3.292181069958848e-06, |
| "loss": 0.3981, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.01993902941418802, |
| "grad_norm": 9.276823579497725, |
| "learning_rate": 3.305898491083677e-06, |
| "loss": 0.3854, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.020021422097717723, |
| "grad_norm": 15.048560056515383, |
| "learning_rate": 3.3196159122085053e-06, |
| "loss": 0.737, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.020103814781247425, |
| "grad_norm": 8.848106416589038, |
| "learning_rate": 3.3333333333333333e-06, |
| "loss": 0.2963, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.020186207464777128, |
| "grad_norm": 7.313536463258056, |
| "learning_rate": 3.3470507544581622e-06, |
| "loss": 0.2768, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.02026860014830683, |
| "grad_norm": 12.219856537027805, |
| "learning_rate": 3.3607681755829907e-06, |
| "loss": 0.4306, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.020350992831836532, |
| "grad_norm": 10.00453954068384, |
| "learning_rate": 3.3744855967078196e-06, |
| "loss": 0.2573, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.020433385515366234, |
| "grad_norm": 16.88134336345359, |
| "learning_rate": 3.3882030178326476e-06, |
| "loss": 0.2629, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.020515778198895936, |
| "grad_norm": 12.88958428233626, |
| "learning_rate": 3.401920438957476e-06, |
| "loss": 0.4504, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.020598170882425642, |
| "grad_norm": 19.916078497234878, |
| "learning_rate": 3.415637860082305e-06, |
| "loss": 0.7189, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.020680563565955344, |
| "grad_norm": 12.379454068967135, |
| "learning_rate": 3.4293552812071335e-06, |
| "loss": 0.5047, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.020762956249485046, |
| "grad_norm": 7.363299478552216, |
| "learning_rate": 3.443072702331962e-06, |
| "loss": 0.308, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.02084534893301475, |
| "grad_norm": 14.9221134616295, |
| "learning_rate": 3.4567901234567904e-06, |
| "loss": 0.5358, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.02092774161654445, |
| "grad_norm": 75.3026629510051, |
| "learning_rate": 3.470507544581619e-06, |
| "loss": 1.3456, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.021010134300074153, |
| "grad_norm": 13.144224277254176, |
| "learning_rate": 3.4842249657064474e-06, |
| "loss": 0.7256, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.021092526983603855, |
| "grad_norm": 8.29902926659797, |
| "learning_rate": 3.4979423868312762e-06, |
| "loss": 0.479, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.021174919667133557, |
| "grad_norm": 36.142615394611894, |
| "learning_rate": 3.5116598079561043e-06, |
| "loss": 0.5905, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.021257312350663263, |
| "grad_norm": 18.894771750217856, |
| "learning_rate": 3.5253772290809328e-06, |
| "loss": 0.5372, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.021339705034192965, |
| "grad_norm": 8.876306670885448, |
| "learning_rate": 3.5390946502057617e-06, |
| "loss": 0.507, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.021422097717722667, |
| "grad_norm": 12.322148718207554, |
| "learning_rate": 3.55281207133059e-06, |
| "loss": 0.5313, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.02150449040125237, |
| "grad_norm": 9.429328982690008, |
| "learning_rate": 3.566529492455419e-06, |
| "loss": 0.6338, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.02158688308478207, |
| "grad_norm": 8.439579932933407, |
| "learning_rate": 3.580246913580247e-06, |
| "loss": 0.64, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.021669275768311774, |
| "grad_norm": 7.027927606341166, |
| "learning_rate": 3.5939643347050755e-06, |
| "loss": 0.694, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.021751668451841476, |
| "grad_norm": 18.461422860766792, |
| "learning_rate": 3.6076817558299044e-06, |
| "loss": 0.8644, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.021834061135371178, |
| "grad_norm": 7.312928184572379, |
| "learning_rate": 3.621399176954733e-06, |
| "loss": 0.5441, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.02191645381890088, |
| "grad_norm": 10.896094086625412, |
| "learning_rate": 3.635116598079561e-06, |
| "loss": 0.3631, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.021998846502430586, |
| "grad_norm": 12.46612913960122, |
| "learning_rate": 3.64883401920439e-06, |
| "loss": 0.6946, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.022081239185960288, |
| "grad_norm": 8.87385816825834, |
| "learning_rate": 3.6625514403292183e-06, |
| "loss": 0.6441, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.02216363186948999, |
| "grad_norm": 7.66230536481842, |
| "learning_rate": 3.6762688614540472e-06, |
| "loss": 0.4107, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.022246024553019692, |
| "grad_norm": 20.729872546438557, |
| "learning_rate": 3.6899862825788757e-06, |
| "loss": 0.575, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.022328417236549394, |
| "grad_norm": 8.579634165930928, |
| "learning_rate": 3.7037037037037037e-06, |
| "loss": 0.2851, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.022410809920079097, |
| "grad_norm": 8.32603360510797, |
| "learning_rate": 3.7174211248285326e-06, |
| "loss": 0.549, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.0224932026036088, |
| "grad_norm": 7.119710330257647, |
| "learning_rate": 3.731138545953361e-06, |
| "loss": 0.6911, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.0225755952871385, |
| "grad_norm": 12.334011335320461, |
| "learning_rate": 3.7448559670781896e-06, |
| "loss": 0.2933, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.022657987970668203, |
| "grad_norm": 8.819773878214544, |
| "learning_rate": 3.758573388203018e-06, |
| "loss": 0.5093, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.02274038065419791, |
| "grad_norm": 7.853520271881538, |
| "learning_rate": 3.7722908093278465e-06, |
| "loss": 0.4793, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.02282277333772761, |
| "grad_norm": 12.938012573178968, |
| "learning_rate": 3.786008230452675e-06, |
| "loss": 0.7915, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.022905166021257313, |
| "grad_norm": 7.351505350233861, |
| "learning_rate": 3.799725651577504e-06, |
| "loss": 0.4733, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.022987558704787015, |
| "grad_norm": 7.3673534060195855, |
| "learning_rate": 3.8134430727023324e-06, |
| "loss": 0.41, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.023069951388316717, |
| "grad_norm": 9.289270184893226, |
| "learning_rate": 3.827160493827161e-06, |
| "loss": 0.4778, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.02315234407184642, |
| "grad_norm": 6.28612952057349, |
| "learning_rate": 3.840877914951989e-06, |
| "loss": 0.406, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.02323473675537612, |
| "grad_norm": 5.946903311816241, |
| "learning_rate": 3.854595336076818e-06, |
| "loss": 0.4755, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.023317129438905824, |
| "grad_norm": 6.834284548380493, |
| "learning_rate": 3.868312757201647e-06, |
| "loss": 0.5116, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.02339952212243553, |
| "grad_norm": 11.774766376537002, |
| "learning_rate": 3.882030178326475e-06, |
| "loss": 0.3654, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.02348191480596523, |
| "grad_norm": 10.79007061340619, |
| "learning_rate": 3.895747599451304e-06, |
| "loss": 0.3865, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.023564307489494934, |
| "grad_norm": 8.26845323051975, |
| "learning_rate": 3.909465020576132e-06, |
| "loss": 0.6248, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.023646700173024636, |
| "grad_norm": 49.81368555681276, |
| "learning_rate": 3.9231824417009605e-06, |
| "loss": 0.6136, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.023729092856554338, |
| "grad_norm": 13.035734882105107, |
| "learning_rate": 3.9368998628257894e-06, |
| "loss": 0.7522, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.02381148554008404, |
| "grad_norm": 10.878498977499254, |
| "learning_rate": 3.9506172839506175e-06, |
| "loss": 0.6219, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.023893878223613742, |
| "grad_norm": 11.982100652311225, |
| "learning_rate": 3.964334705075446e-06, |
| "loss": 0.6203, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.023976270907143445, |
| "grad_norm": 9.985093354905656, |
| "learning_rate": 3.9780521262002744e-06, |
| "loss": 0.6091, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.024058663590673147, |
| "grad_norm": 10.79524941182704, |
| "learning_rate": 3.991769547325103e-06, |
| "loss": 0.511, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.024141056274202852, |
| "grad_norm": 8.306861975711865, |
| "learning_rate": 4.005486968449932e-06, |
| "loss": 0.6458, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.024223448957732555, |
| "grad_norm": 8.445514735869802, |
| "learning_rate": 4.01920438957476e-06, |
| "loss": 0.4246, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.024305841641262257, |
| "grad_norm": 10.946877499366206, |
| "learning_rate": 4.032921810699589e-06, |
| "loss": 0.4455, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.02438823432479196, |
| "grad_norm": 9.95334826152276, |
| "learning_rate": 4.046639231824417e-06, |
| "loss": 0.7388, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.02447062700832166, |
| "grad_norm": 6.182291262554031, |
| "learning_rate": 4.060356652949246e-06, |
| "loss": 0.255, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.024553019691851363, |
| "grad_norm": 6.230437793654582, |
| "learning_rate": 4.074074074074074e-06, |
| "loss": 0.317, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.024635412375381065, |
| "grad_norm": 37.925863944916884, |
| "learning_rate": 4.087791495198903e-06, |
| "loss": 0.6706, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.024717805058910768, |
| "grad_norm": 9.275615694704983, |
| "learning_rate": 4.101508916323731e-06, |
| "loss": 0.7037, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.02480019774244047, |
| "grad_norm": 8.011699796447031, |
| "learning_rate": 4.11522633744856e-06, |
| "loss": 0.5876, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.024882590425970175, |
| "grad_norm": 6.150375331005549, |
| "learning_rate": 4.128943758573389e-06, |
| "loss": 0.5513, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.024964983109499878, |
| "grad_norm": 9.66910213632119, |
| "learning_rate": 4.142661179698217e-06, |
| "loss": 0.7654, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.02504737579302958, |
| "grad_norm": 7.492282251093323, |
| "learning_rate": 4.156378600823046e-06, |
| "loss": 0.5467, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.025129768476559282, |
| "grad_norm": 8.84736509671235, |
| "learning_rate": 4.170096021947874e-06, |
| "loss": 0.6802, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.025212161160088984, |
| "grad_norm": 11.147679298984809, |
| "learning_rate": 4.183813443072703e-06, |
| "loss": 0.3183, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.025294553843618686, |
| "grad_norm": 6.052642841781604, |
| "learning_rate": 4.197530864197531e-06, |
| "loss": 0.2843, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.02537694652714839, |
| "grad_norm": 5.853639145291225, |
| "learning_rate": 4.21124828532236e-06, |
| "loss": 0.6171, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.02545933921067809, |
| "grad_norm": 7.664697260789782, |
| "learning_rate": 4.224965706447189e-06, |
| "loss": 0.5526, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.025541731894207793, |
| "grad_norm": 8.309328255797821, |
| "learning_rate": 4.238683127572017e-06, |
| "loss": 0.3675, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.0256241245777375, |
| "grad_norm": 6.048028087236039, |
| "learning_rate": 4.2524005486968456e-06, |
| "loss": 0.3439, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.0257065172612672, |
| "grad_norm": 7.684783842069482, |
| "learning_rate": 4.266117969821674e-06, |
| "loss": 0.671, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.025788909944796903, |
| "grad_norm": 14.534992720713337, |
| "learning_rate": 4.2798353909465025e-06, |
| "loss": 0.7617, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.025871302628326605, |
| "grad_norm": 8.615671182896788, |
| "learning_rate": 4.293552812071331e-06, |
| "loss": 0.6877, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.025953695311856307, |
| "grad_norm": 7.575816304061312, |
| "learning_rate": 4.3072702331961594e-06, |
| "loss": 0.6431, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.02603608799538601, |
| "grad_norm": 8.768345528296296, |
| "learning_rate": 4.3209876543209875e-06, |
| "loss": 0.4367, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.02611848067891571, |
| "grad_norm": 7.718681777042332, |
| "learning_rate": 4.334705075445816e-06, |
| "loss": 0.3518, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.026200873362445413, |
| "grad_norm": 15.692592743928309, |
| "learning_rate": 4.348422496570645e-06, |
| "loss": 0.8163, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.02628326604597512, |
| "grad_norm": 8.46510288305704, |
| "learning_rate": 4.362139917695473e-06, |
| "loss": 0.6321, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.02636565872950482, |
| "grad_norm": 6.317038560782752, |
| "learning_rate": 4.375857338820302e-06, |
| "loss": 0.4448, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.026448051413034523, |
| "grad_norm": 17.22722159022544, |
| "learning_rate": 4.38957475994513e-06, |
| "loss": 0.6319, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.026530444096564226, |
| "grad_norm": 35.34200236087575, |
| "learning_rate": 4.403292181069959e-06, |
| "loss": 0.6193, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.026612836780093928, |
| "grad_norm": 11.559769027360641, |
| "learning_rate": 4.417009602194788e-06, |
| "loss": 0.6904, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.02669522946362363, |
| "grad_norm": 7.249712253160824, |
| "learning_rate": 4.430727023319616e-06, |
| "loss": 0.4605, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.026777622147153332, |
| "grad_norm": 5.9808851831275, |
| "learning_rate": 4.444444444444444e-06, |
| "loss": 0.3127, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.026860014830683034, |
| "grad_norm": 6.4015006508429995, |
| "learning_rate": 4.458161865569273e-06, |
| "loss": 0.6494, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.026942407514212736, |
| "grad_norm": 7.388398333528457, |
| "learning_rate": 4.471879286694102e-06, |
| "loss": 0.4854, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.027024800197742442, |
| "grad_norm": 5.459089110209384, |
| "learning_rate": 4.485596707818931e-06, |
| "loss": 0.463, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.027107192881272144, |
| "grad_norm": 7.377223891634756, |
| "learning_rate": 4.499314128943759e-06, |
| "loss": 0.6919, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.027189585564801846, |
| "grad_norm": 6.3768264626554805, |
| "learning_rate": 4.513031550068587e-06, |
| "loss": 0.6361, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.02727197824833155, |
| "grad_norm": 6.99781725288093, |
| "learning_rate": 4.526748971193416e-06, |
| "loss": 0.6743, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.02735437093186125, |
| "grad_norm": 5.9726845052369075, |
| "learning_rate": 4.540466392318245e-06, |
| "loss": 0.3525, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.027436763615390953, |
| "grad_norm": 4.715507305833575, |
| "learning_rate": 4.554183813443074e-06, |
| "loss": 0.365, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.027519156298920655, |
| "grad_norm": 5.786778668319323, |
| "learning_rate": 4.567901234567902e-06, |
| "loss": 0.3823, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.027601548982450357, |
| "grad_norm": 7.779154962035555, |
| "learning_rate": 4.58161865569273e-06, |
| "loss": 0.5933, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.02768394166598006, |
| "grad_norm": 12.355416669442642, |
| "learning_rate": 4.595336076817559e-06, |
| "loss": 0.6039, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.027766334349509765, |
| "grad_norm": 5.078241648282684, |
| "learning_rate": 4.6090534979423875e-06, |
| "loss": 0.3775, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.027848727033039467, |
| "grad_norm": 5.863453886765012, |
| "learning_rate": 4.622770919067216e-06, |
| "loss": 0.2638, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.02793111971656917, |
| "grad_norm": 12.580103906062597, |
| "learning_rate": 4.6364883401920444e-06, |
| "loss": 0.7607, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.02801351240009887, |
| "grad_norm": 6.6784963620823525, |
| "learning_rate": 4.6502057613168725e-06, |
| "loss": 0.4435, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.028095905083628574, |
| "grad_norm": 7.038657857694269, |
| "learning_rate": 4.663923182441701e-06, |
| "loss": 0.6857, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.028178297767158276, |
| "grad_norm": 14.379372920193825, |
| "learning_rate": 4.67764060356653e-06, |
| "loss": 0.8087, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.028260690450687978, |
| "grad_norm": 9.837898915303215, |
| "learning_rate": 4.691358024691358e-06, |
| "loss": 0.5434, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.02834308313421768, |
| "grad_norm": 6.687023299218438, |
| "learning_rate": 4.705075445816187e-06, |
| "loss": 0.3655, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.028425475817747382, |
| "grad_norm": 5.726375583817776, |
| "learning_rate": 4.718792866941015e-06, |
| "loss": 0.5816, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.028507868501277088, |
| "grad_norm": 6.8852392247210945, |
| "learning_rate": 4.732510288065844e-06, |
| "loss": 0.5028, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.02859026118480679, |
| "grad_norm": 5.798984025320741, |
| "learning_rate": 4.746227709190673e-06, |
| "loss": 0.5701, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.028672653868336492, |
| "grad_norm": 10.24662425737303, |
| "learning_rate": 4.759945130315501e-06, |
| "loss": 0.6865, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.028755046551866194, |
| "grad_norm": 6.089865434146081, |
| "learning_rate": 4.773662551440329e-06, |
| "loss": 0.6887, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.028837439235395897, |
| "grad_norm": 6.365563959115913, |
| "learning_rate": 4.787379972565158e-06, |
| "loss": 0.6076, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.0289198319189256, |
| "grad_norm": 5.916203223471868, |
| "learning_rate": 4.801097393689987e-06, |
| "loss": 0.6597, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.0290022246024553, |
| "grad_norm": 4.599031509093365, |
| "learning_rate": 4.814814814814815e-06, |
| "loss": 0.395, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.029084617285985003, |
| "grad_norm": 6.6869153491637485, |
| "learning_rate": 4.828532235939644e-06, |
| "loss": 0.548, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.02916700996951471, |
| "grad_norm": 4.333465973974785, |
| "learning_rate": 4.842249657064472e-06, |
| "loss": 0.5945, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.02924940265304441, |
| "grad_norm": 6.808396103175679, |
| "learning_rate": 4.855967078189301e-06, |
| "loss": 0.5964, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.029331795336574113, |
| "grad_norm": 9.755100603471288, |
| "learning_rate": 4.86968449931413e-06, |
| "loss": 0.6287, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.029414188020103815, |
| "grad_norm": 5.80905261329336, |
| "learning_rate": 4.883401920438958e-06, |
| "loss": 0.4366, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.029496580703633517, |
| "grad_norm": 5.5904604000702545, |
| "learning_rate": 4.897119341563787e-06, |
| "loss": 0.5093, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.02957897338716322, |
| "grad_norm": 7.090237952729793, |
| "learning_rate": 4.910836762688615e-06, |
| "loss": 0.4749, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.029661366070692922, |
| "grad_norm": 6.9638534563545695, |
| "learning_rate": 4.924554183813444e-06, |
| "loss": 0.6429, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.029743758754222624, |
| "grad_norm": 4.026899519732204, |
| "learning_rate": 4.938271604938272e-06, |
| "loss": 0.4878, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.029826151437752326, |
| "grad_norm": 7.3582470565677065, |
| "learning_rate": 4.9519890260631005e-06, |
| "loss": 0.5546, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.029908544121282032, |
| "grad_norm": 5.356724068201691, |
| "learning_rate": 4.9657064471879294e-06, |
| "loss": 0.345, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.029990936804811734, |
| "grad_norm": 5.364214773411196, |
| "learning_rate": 4.9794238683127575e-06, |
| "loss": 0.3246, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.030073329488341436, |
| "grad_norm": 8.332851292756342, |
| "learning_rate": 4.993141289437586e-06, |
| "loss": 0.6733, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.030155722171871138, |
| "grad_norm": 9.765581947528334, |
| "learning_rate": 5.0068587105624144e-06, |
| "loss": 0.4704, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.03023811485540084, |
| "grad_norm": 5.498161157247211, |
| "learning_rate": 5.020576131687243e-06, |
| "loss": 0.2827, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.030320507538930543, |
| "grad_norm": 43.68184948115611, |
| "learning_rate": 5.034293552812071e-06, |
| "loss": 0.3449, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.030402900222460245, |
| "grad_norm": 6.219439174212411, |
| "learning_rate": 5.0480109739369e-06, |
| "loss": 0.3369, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.030485292905989947, |
| "grad_norm": 5.267942442082339, |
| "learning_rate": 5.061728395061729e-06, |
| "loss": 0.4036, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.03056768558951965, |
| "grad_norm": 4.7582676290183, |
| "learning_rate": 5.075445816186557e-06, |
| "loss": 0.2824, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.030650078273049355, |
| "grad_norm": 8.46553998751592, |
| "learning_rate": 5.089163237311386e-06, |
| "loss": 0.5826, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.030732470956579057, |
| "grad_norm": 9.33378948895317, |
| "learning_rate": 5.102880658436214e-06, |
| "loss": 0.6385, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.03081486364010876, |
| "grad_norm": 10.627769537470304, |
| "learning_rate": 5.116598079561042e-06, |
| "loss": 0.6659, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.03089725632363846, |
| "grad_norm": 6.414932231735032, |
| "learning_rate": 5.130315500685872e-06, |
| "loss": 0.4209, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.030979649007168163, |
| "grad_norm": 5.200756588061871, |
| "learning_rate": 5.1440329218107e-06, |
| "loss": 0.3344, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.031062041690697866, |
| "grad_norm": 8.066096499546177, |
| "learning_rate": 5.157750342935528e-06, |
| "loss": 0.5315, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.031144434374227568, |
| "grad_norm": 7.5104265708571125, |
| "learning_rate": 5.171467764060357e-06, |
| "loss": 0.5785, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.03122682705775727, |
| "grad_norm": 7.326972182415438, |
| "learning_rate": 5.185185185185185e-06, |
| "loss": 0.6173, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.031309219741286975, |
| "grad_norm": 7.064196146503117, |
| "learning_rate": 5.198902606310015e-06, |
| "loss": 0.4073, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.031391612424816674, |
| "grad_norm": 10.55313726796645, |
| "learning_rate": 5.212620027434843e-06, |
| "loss": 0.5923, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.03147400510834638, |
| "grad_norm": 9.355498638304265, |
| "learning_rate": 5.226337448559671e-06, |
| "loss": 0.3621, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.03155639779187608, |
| "grad_norm": 6.985263971534283, |
| "learning_rate": 5.2400548696845e-06, |
| "loss": 0.5704, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.031638790475405784, |
| "grad_norm": 12.680293995262826, |
| "learning_rate": 5.253772290809328e-06, |
| "loss": 0.6467, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.03172118315893549, |
| "grad_norm": 8.65674983418551, |
| "learning_rate": 5.2674897119341575e-06, |
| "loss": 0.5878, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.03180357584246519, |
| "grad_norm": 6.341472071022504, |
| "learning_rate": 5.2812071330589856e-06, |
| "loss": 0.4152, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.031885968525994894, |
| "grad_norm": 6.869168720162001, |
| "learning_rate": 5.294924554183814e-06, |
| "loss": 0.4097, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.03196836120952459, |
| "grad_norm": 14.190249862283896, |
| "learning_rate": 5.3086419753086425e-06, |
| "loss": 0.8292, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.0320507538930543, |
| "grad_norm": 8.275823508063906, |
| "learning_rate": 5.3223593964334705e-06, |
| "loss": 0.5986, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.032133146576584, |
| "grad_norm": 6.358393504534048, |
| "learning_rate": 5.3360768175583e-06, |
| "loss": 0.4381, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.0322155392601137, |
| "grad_norm": 7.516552473352774, |
| "learning_rate": 5.349794238683128e-06, |
| "loss": 0.6122, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.0322979319436434, |
| "grad_norm": 8.856630051592028, |
| "learning_rate": 5.363511659807956e-06, |
| "loss": 0.5578, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.03238032462717311, |
| "grad_norm": 6.892129730394783, |
| "learning_rate": 5.377229080932785e-06, |
| "loss": 0.3553, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.03246271731070281, |
| "grad_norm": 21.05820599044793, |
| "learning_rate": 5.390946502057613e-06, |
| "loss": 0.7038, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.03254510999423251, |
| "grad_norm": 8.434869352368931, |
| "learning_rate": 5.404663923182441e-06, |
| "loss": 0.6087, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.03262750267776222, |
| "grad_norm": 7.561351421213893, |
| "learning_rate": 5.418381344307271e-06, |
| "loss": 0.5764, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.032709895361291916, |
| "grad_norm": 8.211243703767535, |
| "learning_rate": 5.432098765432099e-06, |
| "loss": 0.4147, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.03279228804482162, |
| "grad_norm": 6.985876038283628, |
| "learning_rate": 5.445816186556928e-06, |
| "loss": 0.6712, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.03287468072835132, |
| "grad_norm": 7.796492055840742, |
| "learning_rate": 5.459533607681756e-06, |
| "loss": 0.5813, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.032957073411881026, |
| "grad_norm": 103.54807755066446, |
| "learning_rate": 5.473251028806584e-06, |
| "loss": 2.5632, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.033039466095410724, |
| "grad_norm": 5.515831392953944, |
| "learning_rate": 5.486968449931414e-06, |
| "loss": 0.4869, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.03312185877894043, |
| "grad_norm": 7.030021195326967, |
| "learning_rate": 5.500685871056242e-06, |
| "loss": 0.5261, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.033204251462470136, |
| "grad_norm": 19.87615017638583, |
| "learning_rate": 5.514403292181071e-06, |
| "loss": 0.6917, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.033286644145999834, |
| "grad_norm": 7.081294911924975, |
| "learning_rate": 5.528120713305899e-06, |
| "loss": 0.6978, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.03336903682952954, |
| "grad_norm": 11.979085035070433, |
| "learning_rate": 5.541838134430727e-06, |
| "loss": 0.8872, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.03345142951305924, |
| "grad_norm": 8.195578186353957, |
| "learning_rate": 5.555555555555557e-06, |
| "loss": 0.6859, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.033533822196588944, |
| "grad_norm": 8.47406281800443, |
| "learning_rate": 5.569272976680385e-06, |
| "loss": 0.8004, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.03361621488011864, |
| "grad_norm": 7.986033367143951, |
| "learning_rate": 5.582990397805214e-06, |
| "loss": 0.6459, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.03369860756364835, |
| "grad_norm": 56.85009393490188, |
| "learning_rate": 5.596707818930042e-06, |
| "loss": 0.4587, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.03378100024717805, |
| "grad_norm": 7.34821171599197, |
| "learning_rate": 5.61042524005487e-06, |
| "loss": 0.3975, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.03386339293070775, |
| "grad_norm": 10.977592364670041, |
| "learning_rate": 5.624142661179699e-06, |
| "loss": 0.6514, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.03394578561423746, |
| "grad_norm": 6.281256114995981, |
| "learning_rate": 5.6378600823045275e-06, |
| "loss": 0.5191, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.03402817829776716, |
| "grad_norm": 8.676725239026284, |
| "learning_rate": 5.651577503429356e-06, |
| "loss": 0.6749, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.03411057098129686, |
| "grad_norm": 5.271388347433627, |
| "learning_rate": 5.6652949245541844e-06, |
| "loss": 0.3394, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.03419296366482656, |
| "grad_norm": 8.62394479818176, |
| "learning_rate": 5.6790123456790125e-06, |
| "loss": 0.6579, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.03427535634835627, |
| "grad_norm": 8.725171620314873, |
| "learning_rate": 5.692729766803841e-06, |
| "loss": 0.5777, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.034357749031885966, |
| "grad_norm": 103.08679101276228, |
| "learning_rate": 5.7064471879286694e-06, |
| "loss": 1.5575, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.03444014171541567, |
| "grad_norm": 5.761947609743853, |
| "learning_rate": 5.720164609053498e-06, |
| "loss": 0.5843, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.03452253439894537, |
| "grad_norm": 7.898907683412111, |
| "learning_rate": 5.733882030178327e-06, |
| "loss": 0.4965, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.034604927082475076, |
| "grad_norm": 8.409149079491211, |
| "learning_rate": 5.747599451303155e-06, |
| "loss": 0.5187, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.03468731976600478, |
| "grad_norm": 7.164102449901402, |
| "learning_rate": 5.761316872427984e-06, |
| "loss": 0.6237, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.03476971244953448, |
| "grad_norm": 6.78383472471162, |
| "learning_rate": 5.775034293552812e-06, |
| "loss": 0.47, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.034852105133064186, |
| "grad_norm": 8.352679629190035, |
| "learning_rate": 5.78875171467764e-06, |
| "loss": 0.6486, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.034934497816593885, |
| "grad_norm": 10.944499686428724, |
| "learning_rate": 5.80246913580247e-06, |
| "loss": 0.6787, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.03501689050012359, |
| "grad_norm": 7.169250883656542, |
| "learning_rate": 5.816186556927298e-06, |
| "loss": 0.6341, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.03509928318365329, |
| "grad_norm": 8.444427076931605, |
| "learning_rate": 5.829903978052127e-06, |
| "loss": 0.3875, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.035181675867182995, |
| "grad_norm": 5.943310209215223, |
| "learning_rate": 5.843621399176955e-06, |
| "loss": 0.5543, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.03526406855071269, |
| "grad_norm": 9.00551355023434, |
| "learning_rate": 5.857338820301783e-06, |
| "loss": 0.4304, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.0353464612342424, |
| "grad_norm": 5.450933353978881, |
| "learning_rate": 5.871056241426613e-06, |
| "loss": 0.439, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.035428853917772105, |
| "grad_norm": 5.508996916628157, |
| "learning_rate": 5.884773662551441e-06, |
| "loss": 0.3802, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.0355112466013018, |
| "grad_norm": 5.247685983735031, |
| "learning_rate": 5.89849108367627e-06, |
| "loss": 0.2684, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.03559363928483151, |
| "grad_norm": 5.397506286271877, |
| "learning_rate": 5.912208504801098e-06, |
| "loss": 0.341, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.03567603196836121, |
| "grad_norm": 5.27131135952353, |
| "learning_rate": 5.925925925925926e-06, |
| "loss": 0.2659, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.03575842465189091, |
| "grad_norm": 480.0659176486544, |
| "learning_rate": 5.9396433470507556e-06, |
| "loss": 0.638, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.03584081733542061, |
| "grad_norm": 13.678674149772142, |
| "learning_rate": 5.953360768175584e-06, |
| "loss": 0.7309, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.03592321001895032, |
| "grad_norm": 10.780807692308517, |
| "learning_rate": 5.967078189300412e-06, |
| "loss": 0.7032, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.03600560270248002, |
| "grad_norm": 13.02329032201945, |
| "learning_rate": 5.9807956104252405e-06, |
| "loss": 0.5507, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.03608799538600972, |
| "grad_norm": 6.048832451175678, |
| "learning_rate": 5.994513031550069e-06, |
| "loss": 0.4034, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.03617038806953943, |
| "grad_norm": 6.96057463234594, |
| "learning_rate": 6.008230452674898e-06, |
| "loss": 0.4898, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.036252780753069126, |
| "grad_norm": 9.398283406999298, |
| "learning_rate": 6.021947873799726e-06, |
| "loss": 0.645, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.03633517343659883, |
| "grad_norm": 8.87917346131629, |
| "learning_rate": 6.0356652949245544e-06, |
| "loss": 0.6331, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.03641756612012853, |
| "grad_norm": 7.58770542825193, |
| "learning_rate": 6.049382716049383e-06, |
| "loss": 0.496, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.036499958803658236, |
| "grad_norm": 7.319287387022868, |
| "learning_rate": 6.063100137174211e-06, |
| "loss": 0.4107, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.036582351487187935, |
| "grad_norm": 6.047688078633024, |
| "learning_rate": 6.076817558299041e-06, |
| "loss": 0.5706, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.03666474417071764, |
| "grad_norm": 4.016360698698718, |
| "learning_rate": 6.090534979423869e-06, |
| "loss": 0.4581, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.036747136854247346, |
| "grad_norm": 4.965075908435688, |
| "learning_rate": 6.104252400548697e-06, |
| "loss": 0.4266, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.036829529537777045, |
| "grad_norm": 6.216596173925147, |
| "learning_rate": 6.117969821673526e-06, |
| "loss": 0.4522, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.03691192222130675, |
| "grad_norm": 4.842662632695187, |
| "learning_rate": 6.131687242798354e-06, |
| "loss": 0.4335, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.03699431490483645, |
| "grad_norm": 4.9752196790209515, |
| "learning_rate": 6.145404663923183e-06, |
| "loss": 0.557, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.037076707588366155, |
| "grad_norm": 5.735323380928238, |
| "learning_rate": 6.159122085048012e-06, |
| "loss": 0.4763, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.037159100271895854, |
| "grad_norm": 5.77466848724865, |
| "learning_rate": 6.17283950617284e-06, |
| "loss": 0.4934, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.03724149295542556, |
| "grad_norm": 9.112092371493077, |
| "learning_rate": 6.186556927297669e-06, |
| "loss": 0.7463, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.03732388563895526, |
| "grad_norm": 5.7102176201346, |
| "learning_rate": 6.200274348422497e-06, |
| "loss": 0.5117, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.037406278322484963, |
| "grad_norm": 78.79906885419953, |
| "learning_rate": 6.213991769547325e-06, |
| "loss": 0.4034, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.03748867100601467, |
| "grad_norm": 6.538900045540373, |
| "learning_rate": 6.227709190672154e-06, |
| "loss": 0.5889, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.03757106368954437, |
| "grad_norm": 6.303771338902271, |
| "learning_rate": 6.241426611796983e-06, |
| "loss": 0.4705, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.03765345637307407, |
| "grad_norm": 6.123584679010139, |
| "learning_rate": 6.255144032921812e-06, |
| "loss": 0.4935, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.03773584905660377, |
| "grad_norm": 6.632670832269488, |
| "learning_rate": 6.26886145404664e-06, |
| "loss": 0.4198, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.03781824174013348, |
| "grad_norm": 5.546218761188075, |
| "learning_rate": 6.282578875171468e-06, |
| "loss": 0.4775, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.037900634423663176, |
| "grad_norm": 7.410628500383756, |
| "learning_rate": 6.296296296296297e-06, |
| "loss": 0.5848, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.03798302710719288, |
| "grad_norm": 13.29983237450669, |
| "learning_rate": 6.310013717421125e-06, |
| "loss": 0.5264, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.03806541979072258, |
| "grad_norm": 6.248701868467927, |
| "learning_rate": 6.3237311385459544e-06, |
| "loss": 0.4628, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.038147812474252286, |
| "grad_norm": 5.555557826856918, |
| "learning_rate": 6.3374485596707825e-06, |
| "loss": 0.4553, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.03823020515778199, |
| "grad_norm": 9.184422285800041, |
| "learning_rate": 6.3511659807956105e-06, |
| "loss": 0.8573, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.03831259784131169, |
| "grad_norm": 5.747050476310712, |
| "learning_rate": 6.3648834019204394e-06, |
| "loss": 0.482, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.038394990524841396, |
| "grad_norm": 4.763610826797223, |
| "learning_rate": 6.3786008230452675e-06, |
| "loss": 0.5323, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.038477383208371095, |
| "grad_norm": 5.592179783867718, |
| "learning_rate": 6.392318244170097e-06, |
| "loss": 0.4239, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.0385597758919008, |
| "grad_norm": 6.874653123972646, |
| "learning_rate": 6.406035665294925e-06, |
| "loss": 0.6832, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.0386421685754305, |
| "grad_norm": 8.90284342707074, |
| "learning_rate": 6.419753086419753e-06, |
| "loss": 0.7136, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.038724561258960205, |
| "grad_norm": 4.321479415948776, |
| "learning_rate": 6.433470507544582e-06, |
| "loss": 0.3613, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.038806953942489904, |
| "grad_norm": 6.212842061197888, |
| "learning_rate": 6.44718792866941e-06, |
| "loss": 0.5118, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.03888934662601961, |
| "grad_norm": 6.620819776714046, |
| "learning_rate": 6.460905349794238e-06, |
| "loss": 0.6616, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.038971739309549315, |
| "grad_norm": 4.727885284155705, |
| "learning_rate": 6.474622770919068e-06, |
| "loss": 0.5001, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.039054131993079014, |
| "grad_norm": 5.762977515077748, |
| "learning_rate": 6.488340192043896e-06, |
| "loss": 0.496, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.03913652467660872, |
| "grad_norm": 4.548105976567005, |
| "learning_rate": 6.502057613168725e-06, |
| "loss": 0.4174, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.03921891736013842, |
| "grad_norm": 10.758680715013131, |
| "learning_rate": 6.515775034293553e-06, |
| "loss": 1.0219, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.039301310043668124, |
| "grad_norm": 4.833629523221548, |
| "learning_rate": 6.529492455418381e-06, |
| "loss": 0.4714, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.03938370272719782, |
| "grad_norm": 6.168003173970242, |
| "learning_rate": 6.543209876543211e-06, |
| "loss": 0.5518, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.03946609541072753, |
| "grad_norm": 6.781568533057639, |
| "learning_rate": 6.556927297668039e-06, |
| "loss": 0.6373, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.03954848809425723, |
| "grad_norm": 7.807648679748847, |
| "learning_rate": 6.570644718792868e-06, |
| "loss": 0.6692, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.03963088077778693, |
| "grad_norm": 5.261036851137123, |
| "learning_rate": 6.584362139917696e-06, |
| "loss": 0.4972, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.03971327346131664, |
| "grad_norm": 8.82287432796905, |
| "learning_rate": 6.598079561042524e-06, |
| "loss": 0.5782, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.03979566614484634, |
| "grad_norm": 4.74653534153213, |
| "learning_rate": 6.611796982167354e-06, |
| "loss": 0.4713, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.03987805882837604, |
| "grad_norm": 6.115095781407729, |
| "learning_rate": 6.625514403292182e-06, |
| "loss": 0.4716, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.03996045151190574, |
| "grad_norm": 9.137058349523514, |
| "learning_rate": 6.6392318244170106e-06, |
| "loss": 0.5758, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.04004284419543545, |
| "grad_norm": 6.208461928327539, |
| "learning_rate": 6.652949245541839e-06, |
| "loss": 0.6577, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.040125236878965145, |
| "grad_norm": 7.110169283978004, |
| "learning_rate": 6.666666666666667e-06, |
| "loss": 0.5648, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.04020762956249485, |
| "grad_norm": 5.955335908180957, |
| "learning_rate": 6.680384087791496e-06, |
| "loss": 0.6219, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.04029002224602455, |
| "grad_norm": 11.624360168848643, |
| "learning_rate": 6.6941015089163244e-06, |
| "loss": 0.5879, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.040372414929554255, |
| "grad_norm": 6.045762234406257, |
| "learning_rate": 6.707818930041153e-06, |
| "loss": 0.5048, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.04045480761308396, |
| "grad_norm": 7.274352306633563, |
| "learning_rate": 6.721536351165981e-06, |
| "loss": 0.7685, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.04053720029661366, |
| "grad_norm": 5.5877312296120465, |
| "learning_rate": 6.7352537722908094e-06, |
| "loss": 0.4779, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.040619592980143365, |
| "grad_norm": 190.3033861867491, |
| "learning_rate": 6.748971193415639e-06, |
| "loss": 2.688, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.040701985663673064, |
| "grad_norm": 10.422219949359063, |
| "learning_rate": 6.762688614540467e-06, |
| "loss": 0.6075, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.04078437834720277, |
| "grad_norm": 5.781767462518796, |
| "learning_rate": 6.776406035665295e-06, |
| "loss": 0.4923, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.04086677103073247, |
| "grad_norm": 7.096850817273703, |
| "learning_rate": 6.790123456790124e-06, |
| "loss": 0.3914, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.040949163714262174, |
| "grad_norm": 6.715044383416897, |
| "learning_rate": 6.803840877914952e-06, |
| "loss": 0.6559, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.04103155639779187, |
| "grad_norm": 5.869148124670727, |
| "learning_rate": 6.817558299039781e-06, |
| "loss": 0.6119, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.04111394908132158, |
| "grad_norm": 6.817431946634836, |
| "learning_rate": 6.83127572016461e-06, |
| "loss": 0.7011, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.041196341764851284, |
| "grad_norm": 6.726271933368034, |
| "learning_rate": 6.844993141289438e-06, |
| "loss": 0.5462, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.04127873444838098, |
| "grad_norm": 5.340136233900829, |
| "learning_rate": 6.858710562414267e-06, |
| "loss": 0.4988, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.04136112713191069, |
| "grad_norm": 6.910491082536719, |
| "learning_rate": 6.872427983539095e-06, |
| "loss": 0.3951, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.04144351981544039, |
| "grad_norm": 5.788747410824949, |
| "learning_rate": 6.886145404663924e-06, |
| "loss": 0.5783, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.04152591249897009, |
| "grad_norm": 4.802638214101094, |
| "learning_rate": 6.899862825788752e-06, |
| "loss": 0.2877, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.04160830518249979, |
| "grad_norm": 4.484566887630588, |
| "learning_rate": 6.913580246913581e-06, |
| "loss": 0.3811, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.0416906978660295, |
| "grad_norm": 6.051035690893021, |
| "learning_rate": 6.92729766803841e-06, |
| "loss": 0.4715, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.0417730905495592, |
| "grad_norm": 6.986842742202583, |
| "learning_rate": 6.941015089163238e-06, |
| "loss": 0.3989, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.0418554832330889, |
| "grad_norm": 3.822985062741508, |
| "learning_rate": 6.954732510288067e-06, |
| "loss": 0.3269, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.04193787591661861, |
| "grad_norm": 6.271803341046997, |
| "learning_rate": 6.968449931412895e-06, |
| "loss": 0.5455, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.042020268600148306, |
| "grad_norm": 4.324558170988178, |
| "learning_rate": 6.982167352537723e-06, |
| "loss": 0.3514, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.04210266128367801, |
| "grad_norm": 7.0738043807784265, |
| "learning_rate": 6.9958847736625525e-06, |
| "loss": 0.4582, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.04218505396720771, |
| "grad_norm": 4.303336598762236, |
| "learning_rate": 7.0096021947873805e-06, |
| "loss": 0.2196, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.042267446650737416, |
| "grad_norm": 6.729727246828568, |
| "learning_rate": 7.023319615912209e-06, |
| "loss": 0.5618, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.042349839334267114, |
| "grad_norm": 5.868167847006668, |
| "learning_rate": 7.0370370370370375e-06, |
| "loss": 0.5361, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.04243223201779682, |
| "grad_norm": 8.605825638540875, |
| "learning_rate": 7.0507544581618655e-06, |
| "loss": 0.6892, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.042514624701326525, |
| "grad_norm": 5.431792569281863, |
| "learning_rate": 7.064471879286695e-06, |
| "loss": 0.4384, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.042597017384856224, |
| "grad_norm": 6.984146114234522, |
| "learning_rate": 7.078189300411523e-06, |
| "loss": 0.483, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.04267941006838593, |
| "grad_norm": 9.3422311974361, |
| "learning_rate": 7.091906721536351e-06, |
| "loss": 0.5842, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.04276180275191563, |
| "grad_norm": 5.732842769025313, |
| "learning_rate": 7.10562414266118e-06, |
| "loss": 0.3972, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.042844195435445334, |
| "grad_norm": 8.65862913312267, |
| "learning_rate": 7.119341563786008e-06, |
| "loss": 0.6456, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.04292658811897503, |
| "grad_norm": 6.864043755425524, |
| "learning_rate": 7.133058984910838e-06, |
| "loss": 0.5415, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.04300898080250474, |
| "grad_norm": 7.980923425776543, |
| "learning_rate": 7.146776406035666e-06, |
| "loss": 0.6034, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.04309137348603444, |
| "grad_norm": 6.747289314662945, |
| "learning_rate": 7.160493827160494e-06, |
| "loss": 0.5615, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.04317376616956414, |
| "grad_norm": 5.139243463936733, |
| "learning_rate": 7.174211248285323e-06, |
| "loss": 0.4397, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.04325615885309385, |
| "grad_norm": 4.121695404994115, |
| "learning_rate": 7.187928669410151e-06, |
| "loss": 0.576, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.04333855153662355, |
| "grad_norm": 4.650257020504757, |
| "learning_rate": 7.201646090534981e-06, |
| "loss": 0.3738, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.04342094422015325, |
| "grad_norm": 5.611100034997747, |
| "learning_rate": 7.215363511659809e-06, |
| "loss": 0.5172, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.04350333690368295, |
| "grad_norm": 7.677372417584333, |
| "learning_rate": 7.229080932784637e-06, |
| "loss": 0.6671, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.04358572958721266, |
| "grad_norm": 6.6136776618758875, |
| "learning_rate": 7.242798353909466e-06, |
| "loss": 0.7559, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.043668122270742356, |
| "grad_norm": 4.792630608864752, |
| "learning_rate": 7.256515775034294e-06, |
| "loss": 0.3489, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.04375051495427206, |
| "grad_norm": 6.35062782260829, |
| "learning_rate": 7.270233196159122e-06, |
| "loss": 0.6693, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.04383290763780176, |
| "grad_norm": 5.605832169231131, |
| "learning_rate": 7.283950617283952e-06, |
| "loss": 0.6839, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.043915300321331466, |
| "grad_norm": 7.340777524703994, |
| "learning_rate": 7.29766803840878e-06, |
| "loss": 0.4743, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.04399769300486117, |
| "grad_norm": 5.736671549282368, |
| "learning_rate": 7.311385459533609e-06, |
| "loss": 0.5146, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.04408008568839087, |
| "grad_norm": 5.290429644156163, |
| "learning_rate": 7.325102880658437e-06, |
| "loss": 0.4368, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.044162478371920576, |
| "grad_norm": 5.60282614307985, |
| "learning_rate": 7.338820301783265e-06, |
| "loss": 0.5957, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.044244871055450274, |
| "grad_norm": 5.166334947065374, |
| "learning_rate": 7.3525377229080944e-06, |
| "loss": 0.3518, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.04432726373897998, |
| "grad_norm": 4.03828573750434, |
| "learning_rate": 7.3662551440329225e-06, |
| "loss": 0.4423, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.04440965642250968, |
| "grad_norm": 5.668301363403015, |
| "learning_rate": 7.379972565157751e-06, |
| "loss": 0.5681, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.044492049106039384, |
| "grad_norm": 5.359958652089056, |
| "learning_rate": 7.3936899862825794e-06, |
| "loss": 0.2408, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.04457444178956908, |
| "grad_norm": 7.013014929960838, |
| "learning_rate": 7.4074074074074075e-06, |
| "loss": 0.5804, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.04465683447309879, |
| "grad_norm": 4.355083270145565, |
| "learning_rate": 7.421124828532237e-06, |
| "loss": 0.3215, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.044739227156628494, |
| "grad_norm": 4.950584901024228, |
| "learning_rate": 7.434842249657065e-06, |
| "loss": 0.4059, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.04482161984015819, |
| "grad_norm": 7.881681417974008, |
| "learning_rate": 7.448559670781894e-06, |
| "loss": 0.6569, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.0449040125236879, |
| "grad_norm": 8.797495038662335, |
| "learning_rate": 7.462277091906722e-06, |
| "loss": 0.6289, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.0449864052072176, |
| "grad_norm": 5.34524571582464, |
| "learning_rate": 7.47599451303155e-06, |
| "loss": 0.4069, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.0450687978907473, |
| "grad_norm": 6.158525652435086, |
| "learning_rate": 7.489711934156379e-06, |
| "loss": 0.2462, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.045151190574277, |
| "grad_norm": 4.744502082646233, |
| "learning_rate": 7.503429355281208e-06, |
| "loss": 0.3248, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.04523358325780671, |
| "grad_norm": 5.917171401303492, |
| "learning_rate": 7.517146776406036e-06, |
| "loss": 0.5406, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.045315975941336406, |
| "grad_norm": 12.458906212002898, |
| "learning_rate": 7.530864197530865e-06, |
| "loss": 0.7253, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.04539836862486611, |
| "grad_norm": 7.531914989608172, |
| "learning_rate": 7.544581618655693e-06, |
| "loss": 0.5998, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.04548076130839582, |
| "grad_norm": 4.0280860261800004, |
| "learning_rate": 7.558299039780522e-06, |
| "loss": 0.4643, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.045563153991925516, |
| "grad_norm": 6.015033091452991, |
| "learning_rate": 7.57201646090535e-06, |
| "loss": 0.4256, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.04564554667545522, |
| "grad_norm": 7.13456394416836, |
| "learning_rate": 7.585733882030179e-06, |
| "loss": 0.7066, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.04572793935898492, |
| "grad_norm": 9.287192614752263, |
| "learning_rate": 7.599451303155008e-06, |
| "loss": 0.7301, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.045810332042514626, |
| "grad_norm": 6.284874505694774, |
| "learning_rate": 7.613168724279836e-06, |
| "loss": 0.433, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.045892724726044325, |
| "grad_norm": 5.947339381004487, |
| "learning_rate": 7.626886145404665e-06, |
| "loss": 0.4314, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.04597511740957403, |
| "grad_norm": 5.263524060407473, |
| "learning_rate": 7.640603566529494e-06, |
| "loss": 0.4686, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.04605751009310373, |
| "grad_norm": 6.4191613503614775, |
| "learning_rate": 7.654320987654322e-06, |
| "loss": 0.3771, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.046139902776633435, |
| "grad_norm": 4.786426740476273, |
| "learning_rate": 7.66803840877915e-06, |
| "loss": 0.5297, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.04622229546016314, |
| "grad_norm": 5.969988084382956, |
| "learning_rate": 7.681755829903978e-06, |
| "loss": 0.538, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.04630468814369284, |
| "grad_norm": 7.308885572221812, |
| "learning_rate": 7.695473251028807e-06, |
| "loss": 0.5234, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.046387080827222545, |
| "grad_norm": 5.97558760032448, |
| "learning_rate": 7.709190672153636e-06, |
| "loss": 0.4623, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.04646947351075224, |
| "grad_norm": 6.703370454236969, |
| "learning_rate": 7.722908093278464e-06, |
| "loss": 0.4321, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.04655186619428195, |
| "grad_norm": 5.449290012281466, |
| "learning_rate": 7.736625514403293e-06, |
| "loss": 0.4356, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.04663425887781165, |
| "grad_norm": 7.6918159276005245, |
| "learning_rate": 7.750342935528121e-06, |
| "loss": 0.7377, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.04671665156134135, |
| "grad_norm": 6.70070388013919, |
| "learning_rate": 7.76406035665295e-06, |
| "loss": 0.4832, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.04679904424487106, |
| "grad_norm": 7.054794661079072, |
| "learning_rate": 7.77777777777778e-06, |
| "loss": 0.5, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.04688143692840076, |
| "grad_norm": 6.480167958787211, |
| "learning_rate": 7.791495198902607e-06, |
| "loss": 0.6175, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.04696382961193046, |
| "grad_norm": 7.506915605641678, |
| "learning_rate": 7.805212620027435e-06, |
| "loss": 0.6497, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.04704622229546016, |
| "grad_norm": 5.494983006094659, |
| "learning_rate": 7.818930041152263e-06, |
| "loss": 0.4892, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.04712861497898987, |
| "grad_norm": 4.3290727230186175, |
| "learning_rate": 7.832647462277091e-06, |
| "loss": 0.3603, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.047211007662519566, |
| "grad_norm": 5.4053345835693545, |
| "learning_rate": 7.846364883401921e-06, |
| "loss": 0.4964, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.04729340034604927, |
| "grad_norm": 6.8723464860852275, |
| "learning_rate": 7.860082304526749e-06, |
| "loss": 0.5672, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.04737579302957897, |
| "grad_norm": 6.656820969576108, |
| "learning_rate": 7.873799725651579e-06, |
| "loss": 0.645, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.047458185713108676, |
| "grad_norm": 5.514831433681132, |
| "learning_rate": 7.887517146776407e-06, |
| "loss": 0.4784, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.04754057839663838, |
| "grad_norm": 4.686188017061002, |
| "learning_rate": 7.901234567901235e-06, |
| "loss": 0.3503, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.04762297108016808, |
| "grad_norm": 4.387746808622081, |
| "learning_rate": 7.914951989026065e-06, |
| "loss": 0.4085, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.047705363763697786, |
| "grad_norm": 5.842284742709113, |
| "learning_rate": 7.928669410150893e-06, |
| "loss": 0.6309, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.047787756447227485, |
| "grad_norm": 3.9959223903308883, |
| "learning_rate": 7.94238683127572e-06, |
| "loss": 0.4597, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.04787014913075719, |
| "grad_norm": 5.369023494162627, |
| "learning_rate": 7.956104252400549e-06, |
| "loss": 0.3751, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.04795254181428689, |
| "grad_norm": 5.2283340567591345, |
| "learning_rate": 7.969821673525377e-06, |
| "loss": 0.5132, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.048034934497816595, |
| "grad_norm": 5.349903681875905, |
| "learning_rate": 7.983539094650207e-06, |
| "loss": 0.3902, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.048117327181346294, |
| "grad_norm": 6.025785140359736, |
| "learning_rate": 7.997256515775035e-06, |
| "loss": 0.4025, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.048199719864876, |
| "grad_norm": 11.461654268608273, |
| "learning_rate": 8.010973936899864e-06, |
| "loss": 0.679, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.048282112548405705, |
| "grad_norm": 6.778197127568214, |
| "learning_rate": 8.024691358024692e-06, |
| "loss": 0.6373, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.048364505231935404, |
| "grad_norm": 6.682544812430659, |
| "learning_rate": 8.03840877914952e-06, |
| "loss": 0.6251, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.04844689791546511, |
| "grad_norm": 4.574480748574406, |
| "learning_rate": 8.052126200274349e-06, |
| "loss": 0.3578, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.04852929059899481, |
| "grad_norm": 5.884448857284855, |
| "learning_rate": 8.065843621399178e-06, |
| "loss": 0.4917, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.048611683282524513, |
| "grad_norm": 7.305232846822941, |
| "learning_rate": 8.079561042524006e-06, |
| "loss": 0.606, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.04869407596605421, |
| "grad_norm": 5.674679345815404, |
| "learning_rate": 8.093278463648834e-06, |
| "loss": 0.6098, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.04877646864958392, |
| "grad_norm": 5.8357949188415805, |
| "learning_rate": 8.106995884773662e-06, |
| "loss": 0.4771, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.048858861333113617, |
| "grad_norm": 6.271507937692957, |
| "learning_rate": 8.120713305898492e-06, |
| "loss": 0.4724, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.04894125401664332, |
| "grad_norm": 6.878289052557951, |
| "learning_rate": 8.13443072702332e-06, |
| "loss": 0.6355, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.04902364670017303, |
| "grad_norm": 6.77176191369003, |
| "learning_rate": 8.148148148148148e-06, |
| "loss": 0.5007, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.049106039383702726, |
| "grad_norm": 5.259636242937289, |
| "learning_rate": 8.161865569272978e-06, |
| "loss": 0.4991, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.04918843206723243, |
| "grad_norm": 6.83053596050915, |
| "learning_rate": 8.175582990397806e-06, |
| "loss": 0.4805, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.04927082475076213, |
| "grad_norm": 8.169736862097166, |
| "learning_rate": 8.189300411522634e-06, |
| "loss": 0.5456, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.049353217434291836, |
| "grad_norm": 7.689359450945521, |
| "learning_rate": 8.203017832647462e-06, |
| "loss": 0.6017, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.049435610117821535, |
| "grad_norm": 5.228139769258261, |
| "learning_rate": 8.21673525377229e-06, |
| "loss": 0.5305, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.04951800280135124, |
| "grad_norm": 8.65116366410673, |
| "learning_rate": 8.23045267489712e-06, |
| "loss": 0.5437, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.04960039548488094, |
| "grad_norm": 11.17746374283472, |
| "learning_rate": 8.244170096021948e-06, |
| "loss": 0.7125, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.049682788168410645, |
| "grad_norm": 7.301491118538198, |
| "learning_rate": 8.257887517146778e-06, |
| "loss": 0.5577, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.04976518085194035, |
| "grad_norm": 4.970573337029868, |
| "learning_rate": 8.271604938271606e-06, |
| "loss": 0.5968, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.04984757353547005, |
| "grad_norm": 5.587652792417023, |
| "learning_rate": 8.285322359396434e-06, |
| "loss": 0.6005, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.049929966218999755, |
| "grad_norm": 5.549916285590465, |
| "learning_rate": 8.299039780521264e-06, |
| "loss": 0.3548, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.050012358902529454, |
| "grad_norm": 8.10309244938579, |
| "learning_rate": 8.312757201646092e-06, |
| "loss": 0.5691, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.05009475158605916, |
| "grad_norm": 5.8031246153733935, |
| "learning_rate": 8.32647462277092e-06, |
| "loss": 0.5858, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.05017714426958886, |
| "grad_norm": 4.633845595880233, |
| "learning_rate": 8.340192043895748e-06, |
| "loss": 0.6119, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.050259536953118564, |
| "grad_norm": 4.538100964584221, |
| "learning_rate": 8.353909465020576e-06, |
| "loss": 0.4402, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.05034192963664826, |
| "grad_norm": 5.323060646938032, |
| "learning_rate": 8.367626886145406e-06, |
| "loss": 0.634, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.05042432232017797, |
| "grad_norm": 8.019191719629505, |
| "learning_rate": 8.381344307270234e-06, |
| "loss": 0.5162, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.050506715003707674, |
| "grad_norm": 8.490592176269905, |
| "learning_rate": 8.395061728395062e-06, |
| "loss": 0.7862, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.05058910768723737, |
| "grad_norm": 6.715022235925982, |
| "learning_rate": 8.408779149519891e-06, |
| "loss": 0.693, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.05067150037076708, |
| "grad_norm": 6.662593981470133, |
| "learning_rate": 8.42249657064472e-06, |
| "loss": 0.7293, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.05075389305429678, |
| "grad_norm": 6.601584476220066, |
| "learning_rate": 8.43621399176955e-06, |
| "loss": 0.8338, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.05083628573782648, |
| "grad_norm": 4.392372074940804, |
| "learning_rate": 8.449931412894377e-06, |
| "loss": 0.362, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.05091867842135618, |
| "grad_norm": 67.48725205124786, |
| "learning_rate": 8.463648834019205e-06, |
| "loss": 2.8128, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.05100107110488589, |
| "grad_norm": 7.506512019819455, |
| "learning_rate": 8.477366255144033e-06, |
| "loss": 0.5441, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.051083463788415585, |
| "grad_norm": 6.880593438312231, |
| "learning_rate": 8.491083676268861e-06, |
| "loss": 0.5519, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.05116585647194529, |
| "grad_norm": 5.888795257341883, |
| "learning_rate": 8.504801097393691e-06, |
| "loss": 0.5516, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.051248249155475, |
| "grad_norm": 6.252602124665069, |
| "learning_rate": 8.518518518518519e-06, |
| "loss": 0.6957, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.051330641839004695, |
| "grad_norm": 5.4090002095589975, |
| "learning_rate": 8.532235939643347e-06, |
| "loss": 0.5359, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.0514130345225344, |
| "grad_norm": 10.890995083855032, |
| "learning_rate": 8.545953360768177e-06, |
| "loss": 0.709, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.0514954272060641, |
| "grad_norm": 5.34598899270713, |
| "learning_rate": 8.559670781893005e-06, |
| "loss": 0.7751, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.051577819889593805, |
| "grad_norm": 4.45040160733867, |
| "learning_rate": 8.573388203017833e-06, |
| "loss": 0.2535, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.051660212573123504, |
| "grad_norm": 5.585882232730492, |
| "learning_rate": 8.587105624142663e-06, |
| "loss": 0.6672, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.05174260525665321, |
| "grad_norm": 5.864022504893711, |
| "learning_rate": 8.60082304526749e-06, |
| "loss": 0.7359, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.05182499794018291, |
| "grad_norm": 4.476550092270306, |
| "learning_rate": 8.614540466392319e-06, |
| "loss": 0.4212, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.051907390623712614, |
| "grad_norm": 4.761925495673636, |
| "learning_rate": 8.628257887517147e-06, |
| "loss": 0.5568, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.05198978330724232, |
| "grad_norm": 4.598522719894157, |
| "learning_rate": 8.641975308641975e-06, |
| "loss": 0.2614, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.05207217599077202, |
| "grad_norm": 5.5021749646336175, |
| "learning_rate": 8.655692729766805e-06, |
| "loss": 0.6382, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.052154568674301724, |
| "grad_norm": 6.517012701844157, |
| "learning_rate": 8.669410150891633e-06, |
| "loss": 0.5804, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.05223696135783142, |
| "grad_norm": 6.805425239578879, |
| "learning_rate": 8.683127572016463e-06, |
| "loss": 0.5015, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.05231935404136113, |
| "grad_norm": 4.6738708514525715, |
| "learning_rate": 8.69684499314129e-06, |
| "loss": 0.3992, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.05240174672489083, |
| "grad_norm": 8.234844782748597, |
| "learning_rate": 8.710562414266119e-06, |
| "loss": 0.7507, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.05248413940842053, |
| "grad_norm": 6.698687047110895, |
| "learning_rate": 8.724279835390947e-06, |
| "loss": 0.6197, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.05256653209195024, |
| "grad_norm": 4.1168902182520615, |
| "learning_rate": 8.737997256515776e-06, |
| "loss": 0.3694, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.05264892477547994, |
| "grad_norm": 4.351008788296417, |
| "learning_rate": 8.751714677640604e-06, |
| "loss": 0.4905, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.05273131745900964, |
| "grad_norm": 9.457012453724198, |
| "learning_rate": 8.765432098765432e-06, |
| "loss": 0.7262, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.05281371014253934, |
| "grad_norm": 4.28519533402721, |
| "learning_rate": 8.77914951989026e-06, |
| "loss": 0.4703, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.05289610282606905, |
| "grad_norm": 7.396862648357201, |
| "learning_rate": 8.79286694101509e-06, |
| "loss": 0.4252, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.052978495509598746, |
| "grad_norm": 4.898822144574726, |
| "learning_rate": 8.806584362139918e-06, |
| "loss": 0.4963, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.05306088819312845, |
| "grad_norm": 5.754512361115338, |
| "learning_rate": 8.820301783264746e-06, |
| "loss": 0.7162, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.05314328087665815, |
| "grad_norm": 3.6053519506068605, |
| "learning_rate": 8.834019204389576e-06, |
| "loss": 0.2067, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.053225673560187856, |
| "grad_norm": 13.554388572711437, |
| "learning_rate": 8.847736625514404e-06, |
| "loss": 0.7916, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.05330806624371756, |
| "grad_norm": 9.04741748435677, |
| "learning_rate": 8.861454046639232e-06, |
| "loss": 0.6137, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.05339045892724726, |
| "grad_norm": 5.876495893543201, |
| "learning_rate": 8.87517146776406e-06, |
| "loss": 0.5187, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.053472851610776966, |
| "grad_norm": 16.50292992475323, |
| "learning_rate": 8.888888888888888e-06, |
| "loss": 0.5344, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.053555244294306664, |
| "grad_norm": 16.27884445947484, |
| "learning_rate": 8.902606310013718e-06, |
| "loss": 0.8425, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.05363763697783637, |
| "grad_norm": 6.05812949734961, |
| "learning_rate": 8.916323731138546e-06, |
| "loss": 0.5581, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.05372002966136607, |
| "grad_norm": 4.631703021154219, |
| "learning_rate": 8.930041152263376e-06, |
| "loss": 0.6053, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.053802422344895774, |
| "grad_norm": 5.0840641963520925, |
| "learning_rate": 8.943758573388204e-06, |
| "loss": 0.4591, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.05388481502842547, |
| "grad_norm": 11.145274102530228, |
| "learning_rate": 8.957475994513032e-06, |
| "loss": 0.4096, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.05396720771195518, |
| "grad_norm": 5.150949637450351, |
| "learning_rate": 8.971193415637862e-06, |
| "loss": 0.6682, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.054049600395484884, |
| "grad_norm": 6.053088680153872, |
| "learning_rate": 8.98491083676269e-06, |
| "loss": 0.6298, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.05413199307901458, |
| "grad_norm": 4.576638977362141, |
| "learning_rate": 8.998628257887518e-06, |
| "loss": 0.5385, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.05421438576254429, |
| "grad_norm": 5.279899070079792, |
| "learning_rate": 9.012345679012346e-06, |
| "loss": 0.5517, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.05429677844607399, |
| "grad_norm": 9.816689344331193, |
| "learning_rate": 9.026063100137174e-06, |
| "loss": 0.528, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.05437917112960369, |
| "grad_norm": 5.344268401980664, |
| "learning_rate": 9.039780521262004e-06, |
| "loss": 0.6586, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.05446156381313339, |
| "grad_norm": 4.907761624647467, |
| "learning_rate": 9.053497942386832e-06, |
| "loss": 0.416, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.0545439564966631, |
| "grad_norm": 8.816994387823925, |
| "learning_rate": 9.067215363511661e-06, |
| "loss": 0.8245, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.054626349180192796, |
| "grad_norm": 6.742675433781916, |
| "learning_rate": 9.08093278463649e-06, |
| "loss": 0.4017, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.0547087418637225, |
| "grad_norm": 5.140173007369312, |
| "learning_rate": 9.094650205761317e-06, |
| "loss": 0.5631, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.05479113454725221, |
| "grad_norm": 6.004556037601133, |
| "learning_rate": 9.108367626886147e-06, |
| "loss": 0.5043, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.054873527230781906, |
| "grad_norm": 11.766169776927814, |
| "learning_rate": 9.122085048010975e-06, |
| "loss": 0.5823, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.05495591991431161, |
| "grad_norm": 3.9038912525359484, |
| "learning_rate": 9.135802469135803e-06, |
| "loss": 0.4185, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.05503831259784131, |
| "grad_norm": 5.764415526037728, |
| "learning_rate": 9.149519890260631e-06, |
| "loss": 0.7135, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.055120705281371016, |
| "grad_norm": 5.200914706950143, |
| "learning_rate": 9.16323731138546e-06, |
| "loss": 0.5353, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.055203097964900714, |
| "grad_norm": 5.012014991774245, |
| "learning_rate": 9.17695473251029e-06, |
| "loss": 0.372, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.05528549064843042, |
| "grad_norm": 2.8951621516844677, |
| "learning_rate": 9.190672153635117e-06, |
| "loss": 0.2485, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.05536788333196012, |
| "grad_norm": 5.756434032002608, |
| "learning_rate": 9.204389574759945e-06, |
| "loss": 0.5129, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.055450276015489824, |
| "grad_norm": 5.5513950603318785, |
| "learning_rate": 9.218106995884775e-06, |
| "loss": 0.5475, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.05553266869901953, |
| "grad_norm": 7.155824300287789, |
| "learning_rate": 9.231824417009603e-06, |
| "loss": 0.6587, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.05561506138254923, |
| "grad_norm": 6.693719659190916, |
| "learning_rate": 9.245541838134433e-06, |
| "loss": 0.6653, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.055697454066078934, |
| "grad_norm": 5.899028184857873, |
| "learning_rate": 9.25925925925926e-06, |
| "loss": 0.5912, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.05577984674960863, |
| "grad_norm": 4.996123098753804, |
| "learning_rate": 9.272976680384089e-06, |
| "loss": 0.4981, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.05586223943313834, |
| "grad_norm": 7.59709581355784, |
| "learning_rate": 9.286694101508917e-06, |
| "loss": 0.6263, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.05594463211666804, |
| "grad_norm": 7.995633663002308, |
| "learning_rate": 9.300411522633745e-06, |
| "loss": 0.6424, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.05602702480019774, |
| "grad_norm": 4.669826110736232, |
| "learning_rate": 9.314128943758575e-06, |
| "loss": 0.4041, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.05610941748372744, |
| "grad_norm": 6.343393787810919, |
| "learning_rate": 9.327846364883403e-06, |
| "loss": 0.7479, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.05619181016725715, |
| "grad_norm": 96.70000934708833, |
| "learning_rate": 9.34156378600823e-06, |
| "loss": 2.6674, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.05627420285078685, |
| "grad_norm": 5.214445104499895, |
| "learning_rate": 9.35528120713306e-06, |
| "loss": 0.3759, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.05635659553431655, |
| "grad_norm": 6.426486685956931, |
| "learning_rate": 9.368998628257889e-06, |
| "loss": 0.7181, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.05643898821784626, |
| "grad_norm": 7.141014135224707, |
| "learning_rate": 9.382716049382717e-06, |
| "loss": 0.7562, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.056521380901375956, |
| "grad_norm": 5.988619932398916, |
| "learning_rate": 9.396433470507545e-06, |
| "loss": 0.3709, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.05660377358490566, |
| "grad_norm": 5.891136898704754, |
| "learning_rate": 9.410150891632374e-06, |
| "loss": 0.6162, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.05668616626843536, |
| "grad_norm": 6.170436173120623, |
| "learning_rate": 9.423868312757202e-06, |
| "loss": 0.576, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.056768558951965066, |
| "grad_norm": 6.151013921299717, |
| "learning_rate": 9.43758573388203e-06, |
| "loss": 0.478, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.056850951635494765, |
| "grad_norm": 6.117432660032868, |
| "learning_rate": 9.451303155006859e-06, |
| "loss": 0.5562, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.05693334431902447, |
| "grad_norm": 6.9566546187232206, |
| "learning_rate": 9.465020576131688e-06, |
| "loss": 0.7103, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.057015737002554176, |
| "grad_norm": 7.5413525683464435, |
| "learning_rate": 9.478737997256516e-06, |
| "loss": 0.5024, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.057098129686083875, |
| "grad_norm": 6.783803405617549, |
| "learning_rate": 9.492455418381346e-06, |
| "loss": 0.6286, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.05718052236961358, |
| "grad_norm": 7.745792551245552, |
| "learning_rate": 9.506172839506174e-06, |
| "loss": 0.5794, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.05726291505314328, |
| "grad_norm": 5.774054351127429, |
| "learning_rate": 9.519890260631002e-06, |
| "loss": 0.5072, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.057345307736672985, |
| "grad_norm": 5.0098435672277555, |
| "learning_rate": 9.53360768175583e-06, |
| "loss": 0.5214, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.05742770042020268, |
| "grad_norm": 6.134234294504796, |
| "learning_rate": 9.547325102880658e-06, |
| "loss": 0.6675, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.05751009310373239, |
| "grad_norm": 8.689201856152978, |
| "learning_rate": 9.561042524005488e-06, |
| "loss": 0.5794, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.05759248578726209, |
| "grad_norm": 5.8119206456550145, |
| "learning_rate": 9.574759945130316e-06, |
| "loss": 0.3022, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.05767487847079179, |
| "grad_norm": 32.92612650154318, |
| "learning_rate": 9.588477366255144e-06, |
| "loss": 0.527, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.0577572711543215, |
| "grad_norm": 8.182146639732006, |
| "learning_rate": 9.602194787379974e-06, |
| "loss": 0.6942, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.0578396638378512, |
| "grad_norm": 4.748298256564357, |
| "learning_rate": 9.615912208504802e-06, |
| "loss": 0.3809, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.0579220565213809, |
| "grad_norm": 7.767690253299567, |
| "learning_rate": 9.62962962962963e-06, |
| "loss": 0.7925, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.0580044492049106, |
| "grad_norm": 6.152146994551039, |
| "learning_rate": 9.64334705075446e-06, |
| "loss": 0.6161, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.05808684188844031, |
| "grad_norm": 5.059103423212747, |
| "learning_rate": 9.657064471879288e-06, |
| "loss": 0.32, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.058169234571970006, |
| "grad_norm": 5.104441529492062, |
| "learning_rate": 9.670781893004116e-06, |
| "loss": 0.4551, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.05825162725549971, |
| "grad_norm": 10.994238478560392, |
| "learning_rate": 9.684499314128944e-06, |
| "loss": 0.7538, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.05833401993902942, |
| "grad_norm": 4.968918212244643, |
| "learning_rate": 9.698216735253772e-06, |
| "loss": 0.5932, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.058416412622559116, |
| "grad_norm": 6.423172055805545, |
| "learning_rate": 9.711934156378602e-06, |
| "loss": 0.7786, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.05849880530608882, |
| "grad_norm": 3.880934215636923, |
| "learning_rate": 9.72565157750343e-06, |
| "loss": 0.4232, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.05858119798961852, |
| "grad_norm": 8.269601261803912, |
| "learning_rate": 9.73936899862826e-06, |
| "loss": 0.6481, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.058663590673148226, |
| "grad_norm": 3.8961399197860658, |
| "learning_rate": 9.753086419753087e-06, |
| "loss": 0.3662, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.058745983356677925, |
| "grad_norm": 6.851544898008151, |
| "learning_rate": 9.766803840877916e-06, |
| "loss": 0.6113, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.05882837604020763, |
| "grad_norm": 4.163553582824758, |
| "learning_rate": 9.780521262002745e-06, |
| "loss": 0.4059, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.05891076872373733, |
| "grad_norm": 8.045649533095332, |
| "learning_rate": 9.794238683127573e-06, |
| "loss": 0.5178, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.058993161407267035, |
| "grad_norm": 8.105313201818435, |
| "learning_rate": 9.807956104252401e-06, |
| "loss": 0.6752, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.05907555409079674, |
| "grad_norm": 7.225672961161458, |
| "learning_rate": 9.82167352537723e-06, |
| "loss": 0.557, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.05915794677432644, |
| "grad_norm": 4.4835661046768776, |
| "learning_rate": 9.835390946502057e-06, |
| "loss": 0.4336, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.059240339457856145, |
| "grad_norm": 4.824952188625617, |
| "learning_rate": 9.849108367626887e-06, |
| "loss": 0.4902, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.059322732141385844, |
| "grad_norm": 4.503287721058772, |
| "learning_rate": 9.862825788751715e-06, |
| "loss": 0.4704, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.05940512482491555, |
| "grad_norm": 3.7716661123547413, |
| "learning_rate": 9.876543209876543e-06, |
| "loss": 0.2055, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.05948751750844525, |
| "grad_norm": 5.833818295505862, |
| "learning_rate": 9.890260631001373e-06, |
| "loss": 0.3876, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.059569910191974954, |
| "grad_norm": 8.755791086817371, |
| "learning_rate": 9.903978052126201e-06, |
| "loss": 0.687, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.05965230287550465, |
| "grad_norm": 6.667121450588804, |
| "learning_rate": 9.91769547325103e-06, |
| "loss": 0.4723, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.05973469555903436, |
| "grad_norm": 4.706421774706928, |
| "learning_rate": 9.931412894375859e-06, |
| "loss": 0.5538, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.059817088242564063, |
| "grad_norm": 10.070112083827578, |
| "learning_rate": 9.945130315500687e-06, |
| "loss": 0.8218, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.05989948092609376, |
| "grad_norm": 5.252163320718281, |
| "learning_rate": 9.958847736625515e-06, |
| "loss": 0.4948, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.05998187360962347, |
| "grad_norm": 9.412813237828644, |
| "learning_rate": 9.972565157750343e-06, |
| "loss": 0.81, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.060064266293153167, |
| "grad_norm": 4.587877285973218, |
| "learning_rate": 9.986282578875173e-06, |
| "loss": 0.4304, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.06014665897668287, |
| "grad_norm": 4.75570214128782, |
| "learning_rate": 1e-05, |
| "loss": 0.4285, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.06022905166021257, |
| "grad_norm": 4.53172116436025, |
| "learning_rate": 9.999999955491562e-06, |
| "loss": 0.4013, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.060311444343742276, |
| "grad_norm": 4.01036872409321, |
| "learning_rate": 9.999999821966245e-06, |
| "loss": 0.4827, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.060393837027271975, |
| "grad_norm": 5.750887446275034, |
| "learning_rate": 9.999999599424054e-06, |
| "loss": 0.554, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.06047622971080168, |
| "grad_norm": 11.992520749521988, |
| "learning_rate": 9.99999928786499e-06, |
| "loss": 0.5623, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.060558622394331386, |
| "grad_norm": 8.169748148911982, |
| "learning_rate": 9.999998887289063e-06, |
| "loss": 0.5052, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.060641015077861085, |
| "grad_norm": 4.563084100494972, |
| "learning_rate": 9.999998397696277e-06, |
| "loss": 0.2568, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.06072340776139079, |
| "grad_norm": 5.198391042090528, |
| "learning_rate": 9.999997819086641e-06, |
| "loss": 0.4664, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.06080580044492049, |
| "grad_norm": 5.384798403619332, |
| "learning_rate": 9.999997151460166e-06, |
| "loss": 0.4522, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.060888193128450195, |
| "grad_norm": 4.41206122981705, |
| "learning_rate": 9.999996394816863e-06, |
| "loss": 0.36, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.060970585811979894, |
| "grad_norm": 11.803173169655384, |
| "learning_rate": 9.999995549156746e-06, |
| "loss": 0.4347, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.0610529784955096, |
| "grad_norm": 7.305968155022703, |
| "learning_rate": 9.999994614479829e-06, |
| "loss": 0.6298, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.0611353711790393, |
| "grad_norm": 6.627646463372147, |
| "learning_rate": 9.999993590786133e-06, |
| "loss": 0.4627, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.061217763862569004, |
| "grad_norm": 7.961247066335053, |
| "learning_rate": 9.999992478075669e-06, |
| "loss": 0.7048, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.06130015654609871, |
| "grad_norm": 11.004530222939419, |
| "learning_rate": 9.999991276348463e-06, |
| "loss": 0.3497, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.06138254922962841, |
| "grad_norm": 7.087534685277768, |
| "learning_rate": 9.999989985604533e-06, |
| "loss": 0.7451, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.061464941913158114, |
| "grad_norm": 4.0530498589819155, |
| "learning_rate": 9.999988605843905e-06, |
| "loss": 0.3691, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.06154733459668781, |
| "grad_norm": 3.7550933352714186, |
| "learning_rate": 9.9999871370666e-06, |
| "loss": 0.2569, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.06162972728021752, |
| "grad_norm": 5.5935186429304, |
| "learning_rate": 9.999985579272646e-06, |
| "loss": 0.687, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.06171211996374722, |
| "grad_norm": 5.254856304496337, |
| "learning_rate": 9.99998393246207e-06, |
| "loss": 0.4639, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.06179451264727692, |
| "grad_norm": 7.5048343045294965, |
| "learning_rate": 9.999982196634904e-06, |
| "loss": 0.7888, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.06187690533080662, |
| "grad_norm": 6.480623845749662, |
| "learning_rate": 9.999980371791175e-06, |
| "loss": 0.7256, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.06195929801433633, |
| "grad_norm": 3.675842921010652, |
| "learning_rate": 9.999978457930918e-06, |
| "loss": 0.4443, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.06204169069786603, |
| "grad_norm": 16.26561159606985, |
| "learning_rate": 9.999976455054165e-06, |
| "loss": 0.7932, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.06212408338139573, |
| "grad_norm": 5.493234318965289, |
| "learning_rate": 9.999974363160954e-06, |
| "loss": 0.3184, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.06220647606492544, |
| "grad_norm": 5.1041491695509436, |
| "learning_rate": 9.999972182251323e-06, |
| "loss": 0.6043, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.062288868748455135, |
| "grad_norm": 4.851912554069924, |
| "learning_rate": 9.999969912325307e-06, |
| "loss": 0.5401, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.06237126143198484, |
| "grad_norm": 5.751575373973642, |
| "learning_rate": 9.999967553382947e-06, |
| "loss": 0.641, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.06245365411551454, |
| "grad_norm": 9.081514003066161, |
| "learning_rate": 9.999965105424289e-06, |
| "loss": 0.6134, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.06253604679904425, |
| "grad_norm": 4.811807174517819, |
| "learning_rate": 9.999962568449374e-06, |
| "loss": 0.4439, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.06261843948257395, |
| "grad_norm": 4.62379971606797, |
| "learning_rate": 9.999959942458246e-06, |
| "loss": 0.6629, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.06270083216610366, |
| "grad_norm": 11.03505749827588, |
| "learning_rate": 9.999957227450953e-06, |
| "loss": 0.6224, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.06278322484963335, |
| "grad_norm": 25.986656518689404, |
| "learning_rate": 9.999954423427545e-06, |
| "loss": 0.3417, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.06286561753316305, |
| "grad_norm": 7.11476454390553, |
| "learning_rate": 9.99995153038807e-06, |
| "loss": 0.6036, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.06294801021669276, |
| "grad_norm": 7.766009203845259, |
| "learning_rate": 9.999948548332579e-06, |
| "loss": 0.5608, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.06303040290022247, |
| "grad_norm": 5.920541614447556, |
| "learning_rate": 9.999945477261124e-06, |
| "loss": 0.5298, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.06311279558375216, |
| "grad_norm": 12.67177199319788, |
| "learning_rate": 9.999942317173764e-06, |
| "loss": 0.7621, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.06319518826728186, |
| "grad_norm": 14.925189851609852, |
| "learning_rate": 9.999939068070552e-06, |
| "loss": 0.6965, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.06327758095081157, |
| "grad_norm": 3.6293587522698427, |
| "learning_rate": 9.999935729951547e-06, |
| "loss": 0.4481, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.06335997363434127, |
| "grad_norm": 4.466378171099753, |
| "learning_rate": 9.999932302816808e-06, |
| "loss": 0.5852, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.06344236631787098, |
| "grad_norm": 5.132249154356962, |
| "learning_rate": 9.999928786666395e-06, |
| "loss": 0.3901, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.06352475900140067, |
| "grad_norm": 4.746744082094216, |
| "learning_rate": 9.999925181500372e-06, |
| "loss": 0.4565, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.06360715168493038, |
| "grad_norm": 5.8813150292186505, |
| "learning_rate": 9.999921487318805e-06, |
| "loss": 0.3263, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.06368954436846008, |
| "grad_norm": 4.410894729084581, |
| "learning_rate": 9.999917704121756e-06, |
| "loss": 0.345, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.06377193705198979, |
| "grad_norm": 6.405832396274801, |
| "learning_rate": 9.999913831909292e-06, |
| "loss": 0.8081, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.06385432973551948, |
| "grad_norm": 6.920285535512553, |
| "learning_rate": 9.999909870681486e-06, |
| "loss": 0.6784, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.06393672241904919, |
| "grad_norm": 5.579593067307145, |
| "learning_rate": 9.999905820438407e-06, |
| "loss": 0.578, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.06401911510257889, |
| "grad_norm": 6.074551579414587, |
| "learning_rate": 9.999901681180123e-06, |
| "loss": 0.5795, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.0641015077861086, |
| "grad_norm": 7.013231380533223, |
| "learning_rate": 9.999897452906715e-06, |
| "loss": 0.437, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.0641839004696383, |
| "grad_norm": 5.4113592934672985, |
| "learning_rate": 9.999893135618255e-06, |
| "loss": 0.5025, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.064266293153168, |
| "grad_norm": 6.6071206609748865, |
| "learning_rate": 9.999888729314817e-06, |
| "loss": 0.7329, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.0643486858366977, |
| "grad_norm": 5.440902339138829, |
| "learning_rate": 9.999884233996482e-06, |
| "loss": 0.4127, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.0644310785202274, |
| "grad_norm": 4.994301771597056, |
| "learning_rate": 9.999879649663332e-06, |
| "loss": 0.5911, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.06451347120375711, |
| "grad_norm": 6.0543541357228055, |
| "learning_rate": 9.999874976315443e-06, |
| "loss": 0.5825, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.0645958638872868, |
| "grad_norm": 4.3097161136235655, |
| "learning_rate": 9.999870213952904e-06, |
| "loss": 0.5132, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.06467825657081651, |
| "grad_norm": 6.888363932085567, |
| "learning_rate": 9.999865362575799e-06, |
| "loss": 0.7543, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.06476064925434621, |
| "grad_norm": 4.724388546371243, |
| "learning_rate": 9.999860422184209e-06, |
| "loss": 0.5942, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.06484304193787592, |
| "grad_norm": 7.4935192383230955, |
| "learning_rate": 9.999855392778228e-06, |
| "loss": 0.4375, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.06492543462140563, |
| "grad_norm": 6.498332631896668, |
| "learning_rate": 9.999850274357943e-06, |
| "loss": 0.6782, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.06500782730493532, |
| "grad_norm": 5.187935409845064, |
| "learning_rate": 9.999845066923445e-06, |
| "loss": 0.5646, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.06509021998846502, |
| "grad_norm": 5.9252659829991075, |
| "learning_rate": 9.999839770474827e-06, |
| "loss": 0.4834, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.06517261267199473, |
| "grad_norm": 4.848728708034422, |
| "learning_rate": 9.999834385012184e-06, |
| "loss": 0.4574, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.06525500535552443, |
| "grad_norm": 4.1703778115278665, |
| "learning_rate": 9.999828910535612e-06, |
| "loss": 0.5278, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.06533739803905413, |
| "grad_norm": 5.464196351317177, |
| "learning_rate": 9.999823347045206e-06, |
| "loss": 0.481, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.06541979072258383, |
| "grad_norm": 9.661214620692148, |
| "learning_rate": 9.999817694541067e-06, |
| "loss": 0.7433, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.06550218340611354, |
| "grad_norm": 6.375475870240334, |
| "learning_rate": 9.999811953023297e-06, |
| "loss": 0.6541, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.06558457608964324, |
| "grad_norm": 5.1053662811437865, |
| "learning_rate": 9.999806122491998e-06, |
| "loss": 0.4034, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.06566696877317295, |
| "grad_norm": 4.706843481268382, |
| "learning_rate": 9.99980020294727e-06, |
| "loss": 0.5359, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.06574936145670264, |
| "grad_norm": 5.942544872541858, |
| "learning_rate": 9.99979419438922e-06, |
| "loss": 0.5096, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.06583175414023235, |
| "grad_norm": 7.443525959047362, |
| "learning_rate": 9.999788096817957e-06, |
| "loss": 0.3826, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.06591414682376205, |
| "grad_norm": 5.603804250629685, |
| "learning_rate": 9.999781910233589e-06, |
| "loss": 0.5671, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.06599653950729176, |
| "grad_norm": 4.5063553175865705, |
| "learning_rate": 9.999775634636226e-06, |
| "loss": 0.4009, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.06607893219082145, |
| "grad_norm": 8.076520038006072, |
| "learning_rate": 9.999769270025978e-06, |
| "loss": 0.8698, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.06616132487435115, |
| "grad_norm": 5.500455378105405, |
| "learning_rate": 9.99976281640296e-06, |
| "loss": 0.4873, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.06624371755788086, |
| "grad_norm": 3.348900655594007, |
| "learning_rate": 9.999756273767288e-06, |
| "loss": 0.4912, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.06632611024141057, |
| "grad_norm": 4.699563891054634, |
| "learning_rate": 9.999749642119075e-06, |
| "loss": 0.4622, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.06640850292494027, |
| "grad_norm": 6.752287413544824, |
| "learning_rate": 9.99974292145844e-06, |
| "loss": 0.7243, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.06649089560846996, |
| "grad_norm": 7.99053258176144, |
| "learning_rate": 9.999736111785507e-06, |
| "loss": 0.6806, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.06657328829199967, |
| "grad_norm": 4.006803923880306, |
| "learning_rate": 9.99972921310039e-06, |
| "loss": 0.3127, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.06665568097552937, |
| "grad_norm": 5.684928104174344, |
| "learning_rate": 9.99972222540322e-06, |
| "loss": 0.3958, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.06673807365905908, |
| "grad_norm": 6.2655870002623715, |
| "learning_rate": 9.999715148694114e-06, |
| "loss": 0.4125, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.06682046634258877, |
| "grad_norm": 4.858773634051298, |
| "learning_rate": 9.999707982973203e-06, |
| "loss": 0.4663, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.06690285902611848, |
| "grad_norm": 4.4468583578233645, |
| "learning_rate": 9.999700728240612e-06, |
| "loss": 0.4221, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.06698525170964818, |
| "grad_norm": 5.30420324889732, |
| "learning_rate": 9.999693384496469e-06, |
| "loss": 0.6381, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.06706764439317789, |
| "grad_norm": 5.03373160052799, |
| "learning_rate": 9.99968595174091e-06, |
| "loss": 0.5375, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.0671500370767076, |
| "grad_norm": 4.342016897255091, |
| "learning_rate": 9.999678429974063e-06, |
| "loss": 0.5741, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.06723242976023729, |
| "grad_norm": 4.020245998757659, |
| "learning_rate": 9.999670819196061e-06, |
| "loss": 0.5157, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.06731482244376699, |
| "grad_norm": 5.037402626653137, |
| "learning_rate": 9.999663119407043e-06, |
| "loss": 0.5872, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.0673972151272967, |
| "grad_norm": 5.351119050045274, |
| "learning_rate": 9.999655330607143e-06, |
| "loss": 0.4749, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.0674796078108264, |
| "grad_norm": 3.9429223358828294, |
| "learning_rate": 9.999647452796502e-06, |
| "loss": 0.4117, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.0675620004943561, |
| "grad_norm": 5.59405888819271, |
| "learning_rate": 9.99963948597526e-06, |
| "loss": 0.314, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.0676443931778858, |
| "grad_norm": 4.687434371571609, |
| "learning_rate": 9.999631430143558e-06, |
| "loss": 0.5861, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.0677267858614155, |
| "grad_norm": 4.995276130163719, |
| "learning_rate": 9.999623285301538e-06, |
| "loss": 0.4674, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.06780917854494521, |
| "grad_norm": 4.533079712780735, |
| "learning_rate": 9.999615051449348e-06, |
| "loss": 0.5473, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.06789157122847492, |
| "grad_norm": 4.406552313322744, |
| "learning_rate": 9.999606728587134e-06, |
| "loss": 0.6765, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.06797396391200461, |
| "grad_norm": 5.688948880327145, |
| "learning_rate": 9.999598316715043e-06, |
| "loss": 0.5709, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.06805635659553431, |
| "grad_norm": 4.327277968958632, |
| "learning_rate": 9.999589815833224e-06, |
| "loss": 0.3639, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.06813874927906402, |
| "grad_norm": 5.27747946166832, |
| "learning_rate": 9.999581225941829e-06, |
| "loss": 0.3616, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.06822114196259373, |
| "grad_norm": 4.249209100815019, |
| "learning_rate": 9.999572547041013e-06, |
| "loss": 0.4393, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.06830353464612342, |
| "grad_norm": 7.1321873495194525, |
| "learning_rate": 9.999563779130928e-06, |
| "loss": 0.7852, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.06838592732965312, |
| "grad_norm": 5.422129659924177, |
| "learning_rate": 9.999554922211732e-06, |
| "loss": 0.4847, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.06846832001318283, |
| "grad_norm": 5.212554077950715, |
| "learning_rate": 9.99954597628358e-06, |
| "loss": 0.5653, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.06855071269671253, |
| "grad_norm": 4.785517487664035, |
| "learning_rate": 9.999536941346635e-06, |
| "loss": 0.3497, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.06863310538024224, |
| "grad_norm": 3.9488684791068165, |
| "learning_rate": 9.999527817401053e-06, |
| "loss": 0.4563, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.06871549806377193, |
| "grad_norm": 5.63061411017133, |
| "learning_rate": 9.999518604447003e-06, |
| "loss": 0.5806, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.06879789074730164, |
| "grad_norm": 4.343401510593881, |
| "learning_rate": 9.999509302484642e-06, |
| "loss": 0.5283, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.06888028343083134, |
| "grad_norm": 4.23058174413733, |
| "learning_rate": 9.99949991151414e-06, |
| "loss": 0.558, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.06896267611436105, |
| "grad_norm": 5.669493224488694, |
| "learning_rate": 9.999490431535664e-06, |
| "loss": 0.5318, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.06904506879789074, |
| "grad_norm": 4.17773496395434, |
| "learning_rate": 9.999480862549383e-06, |
| "loss": 0.531, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.06912746148142045, |
| "grad_norm": 5.0993501898509574, |
| "learning_rate": 9.999471204555464e-06, |
| "loss": 0.5367, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.06920985416495015, |
| "grad_norm": 6.258867241760913, |
| "learning_rate": 9.99946145755408e-06, |
| "loss": 0.625, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.06929224684847986, |
| "grad_norm": 6.14568981769412, |
| "learning_rate": 9.999451621545408e-06, |
| "loss": 0.6203, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.06937463953200956, |
| "grad_norm": 4.50524135844046, |
| "learning_rate": 9.99944169652962e-06, |
| "loss": 0.4648, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.06945703221553925, |
| "grad_norm": 4.759545744558735, |
| "learning_rate": 9.999431682506893e-06, |
| "loss": 0.3331, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.06953942489906896, |
| "grad_norm": 4.161271349934926, |
| "learning_rate": 9.999421579477406e-06, |
| "loss": 0.3352, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.06962181758259867, |
| "grad_norm": 4.557594062709752, |
| "learning_rate": 9.99941138744134e-06, |
| "loss": 0.6043, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.06970421026612837, |
| "grad_norm": 5.655003710422895, |
| "learning_rate": 9.999401106398874e-06, |
| "loss": 0.5817, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.06978660294965806, |
| "grad_norm": 4.925240291130296, |
| "learning_rate": 9.999390736350192e-06, |
| "loss": 0.5053, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.06986899563318777, |
| "grad_norm": 5.566654097661635, |
| "learning_rate": 9.99938027729548e-06, |
| "loss": 0.5705, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.06995138831671747, |
| "grad_norm": 4.473152490217241, |
| "learning_rate": 9.999369729234923e-06, |
| "loss": 0.3942, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.07003378100024718, |
| "grad_norm": 3.0533580021318665, |
| "learning_rate": 9.999359092168707e-06, |
| "loss": 0.3085, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.07011617368377689, |
| "grad_norm": 3.8516009324085747, |
| "learning_rate": 9.999348366097024e-06, |
| "loss": 0.3975, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.07019856636730658, |
| "grad_norm": 5.43210019067299, |
| "learning_rate": 9.999337551020062e-06, |
| "loss": 0.5565, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.07028095905083628, |
| "grad_norm": 5.9263640825101405, |
| "learning_rate": 9.999326646938019e-06, |
| "loss": 0.486, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.07036335173436599, |
| "grad_norm": 4.914603335651239, |
| "learning_rate": 9.999315653851085e-06, |
| "loss": 0.5884, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.0704457444178957, |
| "grad_norm": 4.624934035035607, |
| "learning_rate": 9.999304571759456e-06, |
| "loss": 0.446, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.07052813710142539, |
| "grad_norm": 4.290804101316048, |
| "learning_rate": 9.99929340066333e-06, |
| "loss": 0.501, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.07061052978495509, |
| "grad_norm": 9.34261485450524, |
| "learning_rate": 9.999282140562905e-06, |
| "loss": 0.7741, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.0706929224684848, |
| "grad_norm": 4.590850263399005, |
| "learning_rate": 9.999270791458383e-06, |
| "loss": 0.4548, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.0707753151520145, |
| "grad_norm": 4.7695609921791355, |
| "learning_rate": 9.999259353349964e-06, |
| "loss": 0.53, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.07085770783554421, |
| "grad_norm": 3.904977095403514, |
| "learning_rate": 9.999247826237854e-06, |
| "loss": 0.3604, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.0709401005190739, |
| "grad_norm": 8.682031296595586, |
| "learning_rate": 9.999236210122256e-06, |
| "loss": 0.78, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.0710224932026036, |
| "grad_norm": 4.5193801163329494, |
| "learning_rate": 9.999224505003379e-06, |
| "loss": 0.5151, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.07110488588613331, |
| "grad_norm": 6.453604475727589, |
| "learning_rate": 9.999212710881429e-06, |
| "loss": 0.7898, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.07118727856966302, |
| "grad_norm": 5.0929895898718165, |
| "learning_rate": 9.99920082775662e-06, |
| "loss": 0.3436, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.07126967125319271, |
| "grad_norm": 3.977322671420692, |
| "learning_rate": 9.999188855629159e-06, |
| "loss": 0.6677, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.07135206393672242, |
| "grad_norm": 4.011549270668664, |
| "learning_rate": 9.99917679449926e-06, |
| "loss": 0.5224, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.07143445662025212, |
| "grad_norm": 4.242067648981852, |
| "learning_rate": 9.999164644367139e-06, |
| "loss": 0.6407, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.07151684930378183, |
| "grad_norm": 4.6287285335817225, |
| "learning_rate": 9.999152405233013e-06, |
| "loss": 0.4401, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.07159924198731153, |
| "grad_norm": 7.160675102549129, |
| "learning_rate": 9.999140077097096e-06, |
| "loss": 0.6419, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.07168163467084122, |
| "grad_norm": 5.074238907861252, |
| "learning_rate": 9.999127659959613e-06, |
| "loss": 0.6444, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.07176402735437093, |
| "grad_norm": 3.6382794617465253, |
| "learning_rate": 9.999115153820782e-06, |
| "loss": 0.4528, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.07184642003790064, |
| "grad_norm": 4.599057975224609, |
| "learning_rate": 9.999102558680827e-06, |
| "loss": 0.531, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.07192881272143034, |
| "grad_norm": 4.5872619129769046, |
| "learning_rate": 9.999089874539968e-06, |
| "loss": 0.4827, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.07201120540496005, |
| "grad_norm": 3.689648865108007, |
| "learning_rate": 9.999077101398437e-06, |
| "loss": 0.492, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.07209359808848974, |
| "grad_norm": 5.8761872625125715, |
| "learning_rate": 9.999064239256459e-06, |
| "loss": 0.543, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.07217599077201944, |
| "grad_norm": 5.870430873318824, |
| "learning_rate": 9.99905128811426e-06, |
| "loss": 0.5407, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.07225838345554915, |
| "grad_norm": 6.386341937119981, |
| "learning_rate": 9.999038247972076e-06, |
| "loss": 0.6123, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.07234077613907886, |
| "grad_norm": 5.280967236144987, |
| "learning_rate": 9.999025118830134e-06, |
| "loss": 0.4707, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.07242316882260855, |
| "grad_norm": 5.724340686807843, |
| "learning_rate": 9.999011900688672e-06, |
| "loss": 0.6817, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.07250556150613825, |
| "grad_norm": 5.777864365455918, |
| "learning_rate": 9.998998593547923e-06, |
| "loss": 0.7443, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.07258795418966796, |
| "grad_norm": 4.481030640410297, |
| "learning_rate": 9.998985197408122e-06, |
| "loss": 0.4052, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.07267034687319766, |
| "grad_norm": 5.7539373865432895, |
| "learning_rate": 9.998971712269512e-06, |
| "loss": 0.6414, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.07275273955672737, |
| "grad_norm": 5.074248646089831, |
| "learning_rate": 9.99895813813233e-06, |
| "loss": 0.6889, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.07283513224025706, |
| "grad_norm": 6.08389615807529, |
| "learning_rate": 9.998944474996817e-06, |
| "loss": 0.6358, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.07291752492378677, |
| "grad_norm": 7.007593747793593, |
| "learning_rate": 9.99893072286322e-06, |
| "loss": 0.723, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.07299991760731647, |
| "grad_norm": 5.295613649313166, |
| "learning_rate": 9.998916881731781e-06, |
| "loss": 0.5226, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.07308231029084618, |
| "grad_norm": 5.1499745635077225, |
| "learning_rate": 9.998902951602746e-06, |
| "loss": 0.6138, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.07316470297437587, |
| "grad_norm": 4.404865550062876, |
| "learning_rate": 9.998888932476365e-06, |
| "loss": 0.4733, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.07324709565790558, |
| "grad_norm": 5.914838603616755, |
| "learning_rate": 9.998874824352887e-06, |
| "loss": 0.5345, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.07332948834143528, |
| "grad_norm": 9.339300610542013, |
| "learning_rate": 9.99886062723256e-06, |
| "loss": 0.7452, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.07341188102496499, |
| "grad_norm": 5.575845113308426, |
| "learning_rate": 9.998846341115642e-06, |
| "loss": 0.5513, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.07349427370849469, |
| "grad_norm": 4.403803637718023, |
| "learning_rate": 9.998831966002385e-06, |
| "loss": 0.4927, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.07357666639202438, |
| "grad_norm": 4.716127059524325, |
| "learning_rate": 9.998817501893044e-06, |
| "loss": 0.5894, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.07365905907555409, |
| "grad_norm": 4.162456660043013, |
| "learning_rate": 9.998802948787878e-06, |
| "loss": 0.5348, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.0737414517590838, |
| "grad_norm": 7.865128926097237, |
| "learning_rate": 9.998788306687144e-06, |
| "loss": 0.7982, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.0738238444426135, |
| "grad_norm": 5.062814936226218, |
| "learning_rate": 9.998773575591105e-06, |
| "loss": 0.5444, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.07390623712614319, |
| "grad_norm": 5.0766827309307345, |
| "learning_rate": 9.998758755500022e-06, |
| "loss": 0.5396, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.0739886298096729, |
| "grad_norm": 5.588620720256569, |
| "learning_rate": 9.998743846414158e-06, |
| "loss": 0.607, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.0740710224932026, |
| "grad_norm": 4.508870984621243, |
| "learning_rate": 9.998728848333781e-06, |
| "loss": 0.5817, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.07415341517673231, |
| "grad_norm": 6.255880304807757, |
| "learning_rate": 9.998713761259157e-06, |
| "loss": 0.624, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.07423580786026202, |
| "grad_norm": 6.448010541448679, |
| "learning_rate": 9.998698585190554e-06, |
| "loss": 0.592, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.07431820054379171, |
| "grad_norm": 6.581360057122206, |
| "learning_rate": 9.998683320128242e-06, |
| "loss": 0.7091, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.07440059322732141, |
| "grad_norm": 7.178574221854368, |
| "learning_rate": 9.998667966072492e-06, |
| "loss": 0.514, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.07448298591085112, |
| "grad_norm": 5.640474989894486, |
| "learning_rate": 9.998652523023582e-06, |
| "loss": 0.5192, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.07456537859438082, |
| "grad_norm": 4.859242002152594, |
| "learning_rate": 9.99863699098178e-06, |
| "loss": 0.393, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.07464777127791052, |
| "grad_norm": 4.7363551715969905, |
| "learning_rate": 9.998621369947368e-06, |
| "loss": 0.5423, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.07473016396144022, |
| "grad_norm": 7.11873781156602, |
| "learning_rate": 9.998605659920621e-06, |
| "loss": 0.6214, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.07481255664496993, |
| "grad_norm": 4.642272372196515, |
| "learning_rate": 9.99858986090182e-06, |
| "loss": 0.3049, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.07489494932849963, |
| "grad_norm": 4.975074076132109, |
| "learning_rate": 9.998573972891246e-06, |
| "loss": 0.5958, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.07497734201202934, |
| "grad_norm": 6.247010066207867, |
| "learning_rate": 9.998557995889183e-06, |
| "loss": 0.5501, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.07505973469555903, |
| "grad_norm": 6.758308077386494, |
| "learning_rate": 9.998541929895912e-06, |
| "loss": 0.4744, |
| "step": 911 |
| }, |
| { |
| "epoch": 0.07514212737908874, |
| "grad_norm": 6.321946117889499, |
| "learning_rate": 9.998525774911723e-06, |
| "loss": 0.4651, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.07522452006261844, |
| "grad_norm": 5.910428930462289, |
| "learning_rate": 9.998509530936901e-06, |
| "loss": 0.5662, |
| "step": 913 |
| }, |
| { |
| "epoch": 0.07530691274614815, |
| "grad_norm": 4.4469553646271525, |
| "learning_rate": 9.998493197971737e-06, |
| "loss": 0.529, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.07538930542967784, |
| "grad_norm": 6.494702959527984, |
| "learning_rate": 9.998476776016521e-06, |
| "loss": 0.5877, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.07547169811320754, |
| "grad_norm": 7.669316446508771, |
| "learning_rate": 9.998460265071546e-06, |
| "loss": 0.6812, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.07555409079673725, |
| "grad_norm": 14.38156671027216, |
| "learning_rate": 9.998443665137104e-06, |
| "loss": 0.8116, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.07563648348026696, |
| "grad_norm": 8.288458207815854, |
| "learning_rate": 9.998426976213493e-06, |
| "loss": 0.656, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.07571887616379666, |
| "grad_norm": 3.703380187983116, |
| "learning_rate": 9.998410198301007e-06, |
| "loss": 0.3688, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.07580126884732635, |
| "grad_norm": 5.446454718623625, |
| "learning_rate": 9.99839333139995e-06, |
| "loss": 0.5218, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.07588366153085606, |
| "grad_norm": 5.7339819465370825, |
| "learning_rate": 9.998376375510617e-06, |
| "loss": 0.4874, |
| "step": 921 |
| }, |
| { |
| "epoch": 0.07596605421438576, |
| "grad_norm": 5.321192426624907, |
| "learning_rate": 9.99835933063331e-06, |
| "loss": 0.3845, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.07604844689791547, |
| "grad_norm": 5.797682110550929, |
| "learning_rate": 9.998342196768337e-06, |
| "loss": 0.4487, |
| "step": 923 |
| }, |
| { |
| "epoch": 0.07613083958144516, |
| "grad_norm": 6.324776101604922, |
| "learning_rate": 9.998324973915999e-06, |
| "loss": 0.5774, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.07621323226497487, |
| "grad_norm": 6.886486065521574, |
| "learning_rate": 9.998307662076604e-06, |
| "loss": 0.5918, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.07629562494850457, |
| "grad_norm": 4.598957573831131, |
| "learning_rate": 9.998290261250461e-06, |
| "loss": 0.5424, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.07637801763203428, |
| "grad_norm": 5.054899919494791, |
| "learning_rate": 9.998272771437878e-06, |
| "loss": 0.3453, |
| "step": 927 |
| }, |
| { |
| "epoch": 0.07646041031556398, |
| "grad_norm": 4.915345392459135, |
| "learning_rate": 9.998255192639167e-06, |
| "loss": 0.5505, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.07654280299909368, |
| "grad_norm": 13.147418222703912, |
| "learning_rate": 9.998237524854643e-06, |
| "loss": 0.7975, |
| "step": 929 |
| }, |
| { |
| "epoch": 0.07662519568262338, |
| "grad_norm": 4.249042164087028, |
| "learning_rate": 9.998219768084619e-06, |
| "loss": 0.5132, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.07670758836615309, |
| "grad_norm": 4.6864159906629865, |
| "learning_rate": 9.998201922329409e-06, |
| "loss": 0.5093, |
| "step": 931 |
| }, |
| { |
| "epoch": 0.07678998104968279, |
| "grad_norm": 4.704532568350368, |
| "learning_rate": 9.998183987589332e-06, |
| "loss": 0.5385, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.07687237373321248, |
| "grad_norm": 4.250326737192859, |
| "learning_rate": 9.99816596386471e-06, |
| "loss": 0.5081, |
| "step": 933 |
| }, |
| { |
| "epoch": 0.07695476641674219, |
| "grad_norm": 3.2609943143186393, |
| "learning_rate": 9.998147851155862e-06, |
| "loss": 0.3386, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.0770371591002719, |
| "grad_norm": 4.686119380610933, |
| "learning_rate": 9.998129649463108e-06, |
| "loss": 0.4959, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.0771195517838016, |
| "grad_norm": 8.093847783734134, |
| "learning_rate": 9.998111358786777e-06, |
| "loss": 0.6091, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.07720194446733131, |
| "grad_norm": 4.128988601766982, |
| "learning_rate": 9.998092979127191e-06, |
| "loss": 0.3564, |
| "step": 937 |
| }, |
| { |
| "epoch": 0.077284337150861, |
| "grad_norm": 12.520048545236683, |
| "learning_rate": 9.998074510484679e-06, |
| "loss": 0.8451, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.0773667298343907, |
| "grad_norm": 5.0581596409425025, |
| "learning_rate": 9.998055952859567e-06, |
| "loss": 0.4862, |
| "step": 939 |
| }, |
| { |
| "epoch": 0.07744912251792041, |
| "grad_norm": 6.315311219118197, |
| "learning_rate": 9.998037306252188e-06, |
| "loss": 0.5742, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.07753151520145012, |
| "grad_norm": 5.819386724118808, |
| "learning_rate": 9.998018570662875e-06, |
| "loss": 0.6873, |
| "step": 941 |
| }, |
| { |
| "epoch": 0.07761390788497981, |
| "grad_norm": 6.606119564610502, |
| "learning_rate": 9.99799974609196e-06, |
| "loss": 0.6462, |
| "step": 942 |
| }, |
| { |
| "epoch": 0.07769630056850951, |
| "grad_norm": 4.9064741570302965, |
| "learning_rate": 9.997980832539775e-06, |
| "loss": 0.4563, |
| "step": 943 |
| }, |
| { |
| "epoch": 0.07777869325203922, |
| "grad_norm": 6.403026987978093, |
| "learning_rate": 9.997961830006663e-06, |
| "loss": 0.5919, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.07786108593556892, |
| "grad_norm": 3.995002622383316, |
| "learning_rate": 9.997942738492959e-06, |
| "loss": 0.6035, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.07794347861909863, |
| "grad_norm": 5.979508594311581, |
| "learning_rate": 9.997923557999001e-06, |
| "loss": 0.6438, |
| "step": 946 |
| }, |
| { |
| "epoch": 0.07802587130262832, |
| "grad_norm": 12.403971641606825, |
| "learning_rate": 9.997904288525133e-06, |
| "loss": 0.7055, |
| "step": 947 |
| }, |
| { |
| "epoch": 0.07810826398615803, |
| "grad_norm": 5.065356925074259, |
| "learning_rate": 9.997884930071698e-06, |
| "loss": 0.489, |
| "step": 948 |
| }, |
| { |
| "epoch": 0.07819065666968773, |
| "grad_norm": 6.739162552025105, |
| "learning_rate": 9.99786548263904e-06, |
| "loss": 0.6978, |
| "step": 949 |
| }, |
| { |
| "epoch": 0.07827304935321744, |
| "grad_norm": 5.869680200446658, |
| "learning_rate": 9.997845946227506e-06, |
| "loss": 0.6373, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.07835544203674713, |
| "grad_norm": 5.498434870964948, |
| "learning_rate": 9.997826320837445e-06, |
| "loss": 0.5772, |
| "step": 951 |
| }, |
| { |
| "epoch": 0.07843783472027684, |
| "grad_norm": 6.304272494085451, |
| "learning_rate": 9.997806606469201e-06, |
| "loss": 0.3892, |
| "step": 952 |
| }, |
| { |
| "epoch": 0.07852022740380654, |
| "grad_norm": 6.170055796070485, |
| "learning_rate": 9.997786803123131e-06, |
| "loss": 0.5676, |
| "step": 953 |
| }, |
| { |
| "epoch": 0.07860262008733625, |
| "grad_norm": 4.355001196596673, |
| "learning_rate": 9.997766910799585e-06, |
| "loss": 0.3932, |
| "step": 954 |
| }, |
| { |
| "epoch": 0.07868501277086595, |
| "grad_norm": 4.587353573095941, |
| "learning_rate": 9.997746929498915e-06, |
| "loss": 0.4951, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.07876740545439564, |
| "grad_norm": 4.7560661272306115, |
| "learning_rate": 9.997726859221482e-06, |
| "loss": 0.3379, |
| "step": 956 |
| }, |
| { |
| "epoch": 0.07884979813792535, |
| "grad_norm": 3.875204186498522, |
| "learning_rate": 9.997706699967638e-06, |
| "loss": 0.4937, |
| "step": 957 |
| }, |
| { |
| "epoch": 0.07893219082145506, |
| "grad_norm": 4.5436308343446665, |
| "learning_rate": 9.997686451737745e-06, |
| "loss": 0.4664, |
| "step": 958 |
| }, |
| { |
| "epoch": 0.07901458350498476, |
| "grad_norm": 5.085715886421841, |
| "learning_rate": 9.997666114532166e-06, |
| "loss": 0.3532, |
| "step": 959 |
| }, |
| { |
| "epoch": 0.07909697618851445, |
| "grad_norm": 5.736037074111639, |
| "learning_rate": 9.997645688351256e-06, |
| "loss": 0.6229, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.07917936887204416, |
| "grad_norm": 4.6641539689143885, |
| "learning_rate": 9.997625173195384e-06, |
| "loss": 0.5927, |
| "step": 961 |
| }, |
| { |
| "epoch": 0.07926176155557386, |
| "grad_norm": 6.986142276021053, |
| "learning_rate": 9.997604569064913e-06, |
| "loss": 0.6011, |
| "step": 962 |
| }, |
| { |
| "epoch": 0.07934415423910357, |
| "grad_norm": 4.936390533586499, |
| "learning_rate": 9.99758387596021e-06, |
| "loss": 0.6004, |
| "step": 963 |
| }, |
| { |
| "epoch": 0.07942654692263328, |
| "grad_norm": 25.174262625674295, |
| "learning_rate": 9.997563093881647e-06, |
| "loss": 1.0481, |
| "step": 964 |
| }, |
| { |
| "epoch": 0.07950893960616297, |
| "grad_norm": 5.911910212113453, |
| "learning_rate": 9.997542222829588e-06, |
| "loss": 0.5712, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.07959133228969267, |
| "grad_norm": 5.683154041671146, |
| "learning_rate": 9.997521262804408e-06, |
| "loss": 0.5152, |
| "step": 966 |
| }, |
| { |
| "epoch": 0.07967372497322238, |
| "grad_norm": 6.247591970007456, |
| "learning_rate": 9.997500213806481e-06, |
| "loss": 0.589, |
| "step": 967 |
| }, |
| { |
| "epoch": 0.07975611765675208, |
| "grad_norm": 6.037728527699037, |
| "learning_rate": 9.997479075836179e-06, |
| "loss": 0.4865, |
| "step": 968 |
| }, |
| { |
| "epoch": 0.07983851034028178, |
| "grad_norm": 4.848668306571336, |
| "learning_rate": 9.997457848893881e-06, |
| "loss": 0.5364, |
| "step": 969 |
| }, |
| { |
| "epoch": 0.07992090302381148, |
| "grad_norm": 5.738086820942736, |
| "learning_rate": 9.997436532979963e-06, |
| "loss": 0.4475, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.08000329570734119, |
| "grad_norm": 7.7003958590754245, |
| "learning_rate": 9.997415128094805e-06, |
| "loss": 0.6297, |
| "step": 971 |
| }, |
| { |
| "epoch": 0.0800856883908709, |
| "grad_norm": 4.6301976482339375, |
| "learning_rate": 9.997393634238789e-06, |
| "loss": 0.4354, |
| "step": 972 |
| }, |
| { |
| "epoch": 0.0801680810744006, |
| "grad_norm": 5.051471456165753, |
| "learning_rate": 9.997372051412296e-06, |
| "loss": 0.5246, |
| "step": 973 |
| }, |
| { |
| "epoch": 0.08025047375793029, |
| "grad_norm": 6.840214215762055, |
| "learning_rate": 9.997350379615712e-06, |
| "loss": 0.6289, |
| "step": 974 |
| }, |
| { |
| "epoch": 0.08033286644146, |
| "grad_norm": 5.025951954709692, |
| "learning_rate": 9.997328618849422e-06, |
| "loss": 0.6347, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.0804152591249897, |
| "grad_norm": 4.175068481847607, |
| "learning_rate": 9.997306769113812e-06, |
| "loss": 0.4474, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.08049765180851941, |
| "grad_norm": 6.49606794332559, |
| "learning_rate": 9.997284830409275e-06, |
| "loss": 0.7058, |
| "step": 977 |
| }, |
| { |
| "epoch": 0.0805800444920491, |
| "grad_norm": 4.355993261479888, |
| "learning_rate": 9.997262802736197e-06, |
| "loss": 0.3175, |
| "step": 978 |
| }, |
| { |
| "epoch": 0.0806624371755788, |
| "grad_norm": 4.173278705106478, |
| "learning_rate": 9.997240686094974e-06, |
| "loss": 0.4082, |
| "step": 979 |
| }, |
| { |
| "epoch": 0.08074482985910851, |
| "grad_norm": 3.6798094983336638, |
| "learning_rate": 9.997218480485994e-06, |
| "loss": 0.266, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.08082722254263822, |
| "grad_norm": 6.875883284171418, |
| "learning_rate": 9.997196185909662e-06, |
| "loss": 0.397, |
| "step": 981 |
| }, |
| { |
| "epoch": 0.08090961522616792, |
| "grad_norm": 4.7547519593139125, |
| "learning_rate": 9.997173802366365e-06, |
| "loss": 0.6101, |
| "step": 982 |
| }, |
| { |
| "epoch": 0.08099200790969761, |
| "grad_norm": 7.068828139356332, |
| "learning_rate": 9.997151329856508e-06, |
| "loss": 0.753, |
| "step": 983 |
| }, |
| { |
| "epoch": 0.08107440059322732, |
| "grad_norm": 5.32212084183463, |
| "learning_rate": 9.997128768380486e-06, |
| "loss": 0.5187, |
| "step": 984 |
| }, |
| { |
| "epoch": 0.08115679327675702, |
| "grad_norm": 4.63635990769238, |
| "learning_rate": 9.997106117938704e-06, |
| "loss": 0.5448, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.08123918596028673, |
| "grad_norm": 5.156911960898855, |
| "learning_rate": 9.997083378531567e-06, |
| "loss": 0.6237, |
| "step": 986 |
| }, |
| { |
| "epoch": 0.08132157864381642, |
| "grad_norm": 66.02610969540054, |
| "learning_rate": 9.997060550159477e-06, |
| "loss": 2.7918, |
| "step": 987 |
| }, |
| { |
| "epoch": 0.08140397132734613, |
| "grad_norm": 4.63314347325949, |
| "learning_rate": 9.997037632822839e-06, |
| "loss": 0.4784, |
| "step": 988 |
| }, |
| { |
| "epoch": 0.08148636401087583, |
| "grad_norm": 17.125886475254017, |
| "learning_rate": 9.997014626522064e-06, |
| "loss": 0.5182, |
| "step": 989 |
| }, |
| { |
| "epoch": 0.08156875669440554, |
| "grad_norm": 5.112815655421804, |
| "learning_rate": 9.99699153125756e-06, |
| "loss": 0.5992, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.08165114937793524, |
| "grad_norm": 4.634165444142863, |
| "learning_rate": 9.996968347029739e-06, |
| "loss": 0.5552, |
| "step": 991 |
| }, |
| { |
| "epoch": 0.08173354206146494, |
| "grad_norm": 4.500448623996411, |
| "learning_rate": 9.996945073839015e-06, |
| "loss": 0.5293, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.08181593474499464, |
| "grad_norm": 4.985902758629872, |
| "learning_rate": 9.996921711685798e-06, |
| "loss": 0.5077, |
| "step": 993 |
| }, |
| { |
| "epoch": 0.08189832742852435, |
| "grad_norm": 11.160741895344973, |
| "learning_rate": 9.99689826057051e-06, |
| "loss": 0.6087, |
| "step": 994 |
| }, |
| { |
| "epoch": 0.08198072011205405, |
| "grad_norm": 5.767479903001831, |
| "learning_rate": 9.996874720493563e-06, |
| "loss": 0.5006, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.08206311279558375, |
| "grad_norm": 4.604269350433775, |
| "learning_rate": 9.996851091455379e-06, |
| "loss": 0.4231, |
| "step": 996 |
| }, |
| { |
| "epoch": 0.08214550547911345, |
| "grad_norm": 6.964228664965358, |
| "learning_rate": 9.996827373456379e-06, |
| "loss": 0.7993, |
| "step": 997 |
| }, |
| { |
| "epoch": 0.08222789816264316, |
| "grad_norm": 4.503845133301578, |
| "learning_rate": 9.996803566496982e-06, |
| "loss": 0.574, |
| "step": 998 |
| }, |
| { |
| "epoch": 0.08231029084617286, |
| "grad_norm": 5.102706241374167, |
| "learning_rate": 9.996779670577615e-06, |
| "loss": 0.523, |
| "step": 999 |
| }, |
| { |
| "epoch": 0.08239268352970257, |
| "grad_norm": 4.718633665300077, |
| "learning_rate": 9.996755685698703e-06, |
| "loss": 0.4039, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.08247507621323226, |
| "grad_norm": 31.817192653927222, |
| "learning_rate": 9.996731611860674e-06, |
| "loss": 0.4298, |
| "step": 1001 |
| }, |
| { |
| "epoch": 0.08255746889676197, |
| "grad_norm": 21.27663857241567, |
| "learning_rate": 9.996707449063952e-06, |
| "loss": 0.2222, |
| "step": 1002 |
| }, |
| { |
| "epoch": 0.08263986158029167, |
| "grad_norm": 4.401888936601498, |
| "learning_rate": 9.996683197308973e-06, |
| "loss": 0.4995, |
| "step": 1003 |
| }, |
| { |
| "epoch": 0.08272225426382138, |
| "grad_norm": 5.508952355584309, |
| "learning_rate": 9.996658856596165e-06, |
| "loss": 0.5681, |
| "step": 1004 |
| }, |
| { |
| "epoch": 0.08280464694735108, |
| "grad_norm": 4.992828446636793, |
| "learning_rate": 9.996634426925962e-06, |
| "loss": 0.4845, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.08288703963088077, |
| "grad_norm": 7.4045853788823015, |
| "learning_rate": 9.9966099082988e-06, |
| "loss": 0.6107, |
| "step": 1006 |
| }, |
| { |
| "epoch": 0.08296943231441048, |
| "grad_norm": 7.359292979226814, |
| "learning_rate": 9.996585300715117e-06, |
| "loss": 0.7944, |
| "step": 1007 |
| }, |
| { |
| "epoch": 0.08305182499794019, |
| "grad_norm": 5.680106009666031, |
| "learning_rate": 9.996560604175344e-06, |
| "loss": 0.4504, |
| "step": 1008 |
| }, |
| { |
| "epoch": 0.08313421768146989, |
| "grad_norm": 4.403376579772996, |
| "learning_rate": 9.99653581867993e-06, |
| "loss": 0.5481, |
| "step": 1009 |
| }, |
| { |
| "epoch": 0.08321661036499958, |
| "grad_norm": 10.223711349213156, |
| "learning_rate": 9.99651094422931e-06, |
| "loss": 0.9755, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.08329900304852929, |
| "grad_norm": 4.683680775421051, |
| "learning_rate": 9.99648598082393e-06, |
| "loss": 0.6508, |
| "step": 1011 |
| }, |
| { |
| "epoch": 0.083381395732059, |
| "grad_norm": 6.840856586449906, |
| "learning_rate": 9.99646092846423e-06, |
| "loss": 0.6494, |
| "step": 1012 |
| }, |
| { |
| "epoch": 0.0834637884155887, |
| "grad_norm": 5.564536440133124, |
| "learning_rate": 9.996435787150663e-06, |
| "loss": 0.6494, |
| "step": 1013 |
| }, |
| { |
| "epoch": 0.0835461810991184, |
| "grad_norm": 8.55630832138283, |
| "learning_rate": 9.996410556883672e-06, |
| "loss": 0.5978, |
| "step": 1014 |
| }, |
| { |
| "epoch": 0.0836285737826481, |
| "grad_norm": 4.505682298137676, |
| "learning_rate": 9.996385237663706e-06, |
| "loss": 0.4981, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.0837109664661778, |
| "grad_norm": 6.026491102637283, |
| "learning_rate": 9.996359829491218e-06, |
| "loss": 0.6929, |
| "step": 1016 |
| }, |
| { |
| "epoch": 0.08379335914970751, |
| "grad_norm": 5.23031637253995, |
| "learning_rate": 9.996334332366658e-06, |
| "loss": 0.468, |
| "step": 1017 |
| }, |
| { |
| "epoch": 0.08387575183323721, |
| "grad_norm": 6.02650829355012, |
| "learning_rate": 9.996308746290482e-06, |
| "loss": 0.6166, |
| "step": 1018 |
| }, |
| { |
| "epoch": 0.0839581445167669, |
| "grad_norm": 4.1819439593996535, |
| "learning_rate": 9.996283071263145e-06, |
| "loss": 0.4417, |
| "step": 1019 |
| }, |
| { |
| "epoch": 0.08404053720029661, |
| "grad_norm": 5.489497484777263, |
| "learning_rate": 9.996257307285102e-06, |
| "loss": 0.441, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.08412292988382632, |
| "grad_norm": 6.1845291497461545, |
| "learning_rate": 9.996231454356814e-06, |
| "loss": 0.8055, |
| "step": 1021 |
| }, |
| { |
| "epoch": 0.08420532256735602, |
| "grad_norm": 6.335171357274994, |
| "learning_rate": 9.996205512478741e-06, |
| "loss": 0.7177, |
| "step": 1022 |
| }, |
| { |
| "epoch": 0.08428771525088573, |
| "grad_norm": 5.77099800958352, |
| "learning_rate": 9.996179481651345e-06, |
| "loss": 0.6201, |
| "step": 1023 |
| }, |
| { |
| "epoch": 0.08437010793441542, |
| "grad_norm": 5.277909161458072, |
| "learning_rate": 9.996153361875086e-06, |
| "loss": 0.5087, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.08445250061794513, |
| "grad_norm": 4.995606457759668, |
| "learning_rate": 9.996127153150436e-06, |
| "loss": 0.4032, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.08453489330147483, |
| "grad_norm": 4.431456907318335, |
| "learning_rate": 9.996100855477856e-06, |
| "loss": 0.3881, |
| "step": 1026 |
| }, |
| { |
| "epoch": 0.08461728598500454, |
| "grad_norm": 3.511167845084898, |
| "learning_rate": 9.996074468857815e-06, |
| "loss": 0.4317, |
| "step": 1027 |
| }, |
| { |
| "epoch": 0.08469967866853423, |
| "grad_norm": 4.355083337852244, |
| "learning_rate": 9.996047993290784e-06, |
| "loss": 0.481, |
| "step": 1028 |
| }, |
| { |
| "epoch": 0.08478207135206393, |
| "grad_norm": 4.7477919241040425, |
| "learning_rate": 9.996021428777234e-06, |
| "loss": 0.4123, |
| "step": 1029 |
| }, |
| { |
| "epoch": 0.08486446403559364, |
| "grad_norm": 4.845625383441968, |
| "learning_rate": 9.99599477531764e-06, |
| "loss": 0.7018, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.08494685671912335, |
| "grad_norm": 5.263537268769247, |
| "learning_rate": 9.995968032912471e-06, |
| "loss": 0.5584, |
| "step": 1031 |
| }, |
| { |
| "epoch": 0.08502924940265305, |
| "grad_norm": 4.316936086223206, |
| "learning_rate": 9.995941201562207e-06, |
| "loss": 0.5342, |
| "step": 1032 |
| }, |
| { |
| "epoch": 0.08511164208618274, |
| "grad_norm": 4.862616503432274, |
| "learning_rate": 9.995914281267326e-06, |
| "loss": 0.5874, |
| "step": 1033 |
| }, |
| { |
| "epoch": 0.08519403476971245, |
| "grad_norm": 6.347242300001709, |
| "learning_rate": 9.995887272028307e-06, |
| "loss": 0.6603, |
| "step": 1034 |
| }, |
| { |
| "epoch": 0.08527642745324215, |
| "grad_norm": 4.525346388262154, |
| "learning_rate": 9.995860173845629e-06, |
| "loss": 0.441, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.08535882013677186, |
| "grad_norm": 4.606242184878409, |
| "learning_rate": 9.995832986719776e-06, |
| "loss": 0.5658, |
| "step": 1036 |
| }, |
| { |
| "epoch": 0.08544121282030155, |
| "grad_norm": 5.55541041688934, |
| "learning_rate": 9.995805710651233e-06, |
| "loss": 0.5883, |
| "step": 1037 |
| }, |
| { |
| "epoch": 0.08552360550383126, |
| "grad_norm": 4.805999702498609, |
| "learning_rate": 9.995778345640481e-06, |
| "loss": 0.5197, |
| "step": 1038 |
| }, |
| { |
| "epoch": 0.08560599818736096, |
| "grad_norm": 5.6071103942756535, |
| "learning_rate": 9.995750891688013e-06, |
| "loss": 0.4935, |
| "step": 1039 |
| }, |
| { |
| "epoch": 0.08568839087089067, |
| "grad_norm": 50.57617337717754, |
| "learning_rate": 9.995723348794315e-06, |
| "loss": 2.4806, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.08577078355442037, |
| "grad_norm": 6.25280098712178, |
| "learning_rate": 9.995695716959877e-06, |
| "loss": 0.6218, |
| "step": 1041 |
| }, |
| { |
| "epoch": 0.08585317623795007, |
| "grad_norm": 3.9373446441524025, |
| "learning_rate": 9.995667996185193e-06, |
| "loss": 0.5496, |
| "step": 1042 |
| }, |
| { |
| "epoch": 0.08593556892147977, |
| "grad_norm": 4.343696783721304, |
| "learning_rate": 9.995640186470755e-06, |
| "loss": 0.4876, |
| "step": 1043 |
| }, |
| { |
| "epoch": 0.08601796160500948, |
| "grad_norm": 5.0506015991461, |
| "learning_rate": 9.995612287817056e-06, |
| "loss": 0.5382, |
| "step": 1044 |
| }, |
| { |
| "epoch": 0.08610035428853918, |
| "grad_norm": 3.3472528865228015, |
| "learning_rate": 9.995584300224597e-06, |
| "loss": 0.4219, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.08618274697206887, |
| "grad_norm": 3.6590082207213666, |
| "learning_rate": 9.995556223693874e-06, |
| "loss": 0.4519, |
| "step": 1046 |
| }, |
| { |
| "epoch": 0.08626513965559858, |
| "grad_norm": 3.3110168095564414, |
| "learning_rate": 9.995528058225386e-06, |
| "loss": 0.3475, |
| "step": 1047 |
| }, |
| { |
| "epoch": 0.08634753233912829, |
| "grad_norm": 5.863164197441982, |
| "learning_rate": 9.995499803819637e-06, |
| "loss": 0.6212, |
| "step": 1048 |
| }, |
| { |
| "epoch": 0.08642992502265799, |
| "grad_norm": 5.518382693746084, |
| "learning_rate": 9.995471460477127e-06, |
| "loss": 0.5021, |
| "step": 1049 |
| }, |
| { |
| "epoch": 0.0865123177061877, |
| "grad_norm": 6.110381483524074, |
| "learning_rate": 9.995443028198362e-06, |
| "loss": 0.4432, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.08659471038971739, |
| "grad_norm": 4.892003499667656, |
| "learning_rate": 9.99541450698385e-06, |
| "loss": 0.3957, |
| "step": 1051 |
| }, |
| { |
| "epoch": 0.0866771030732471, |
| "grad_norm": 7.104164940402655, |
| "learning_rate": 9.995385896834095e-06, |
| "loss": 0.676, |
| "step": 1052 |
| }, |
| { |
| "epoch": 0.0867594957567768, |
| "grad_norm": 11.570101337811126, |
| "learning_rate": 9.995357197749611e-06, |
| "loss": 0.5474, |
| "step": 1053 |
| }, |
| { |
| "epoch": 0.0868418884403065, |
| "grad_norm": 4.624106631340867, |
| "learning_rate": 9.995328409730905e-06, |
| "loss": 0.506, |
| "step": 1054 |
| }, |
| { |
| "epoch": 0.0869242811238362, |
| "grad_norm": 5.743273453010476, |
| "learning_rate": 9.99529953277849e-06, |
| "loss": 0.3698, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.0870066738073659, |
| "grad_norm": 8.029930627600796, |
| "learning_rate": 9.995270566892884e-06, |
| "loss": 0.5471, |
| "step": 1056 |
| }, |
| { |
| "epoch": 0.08708906649089561, |
| "grad_norm": 5.194894530300615, |
| "learning_rate": 9.995241512074596e-06, |
| "loss": 0.6335, |
| "step": 1057 |
| }, |
| { |
| "epoch": 0.08717145917442531, |
| "grad_norm": 6.84466633274184, |
| "learning_rate": 9.995212368324147e-06, |
| "loss": 0.5793, |
| "step": 1058 |
| }, |
| { |
| "epoch": 0.08725385185795502, |
| "grad_norm": 7.268033072915504, |
| "learning_rate": 9.99518313564206e-06, |
| "loss": 0.653, |
| "step": 1059 |
| }, |
| { |
| "epoch": 0.08733624454148471, |
| "grad_norm": 4.021612447179571, |
| "learning_rate": 9.995153814028846e-06, |
| "loss": 0.4125, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.08741863722501442, |
| "grad_norm": 5.826262293101434, |
| "learning_rate": 9.995124403485036e-06, |
| "loss": 0.5812, |
| "step": 1061 |
| }, |
| { |
| "epoch": 0.08750102990854412, |
| "grad_norm": 4.557140038560946, |
| "learning_rate": 9.995094904011148e-06, |
| "loss": 0.3531, |
| "step": 1062 |
| }, |
| { |
| "epoch": 0.08758342259207383, |
| "grad_norm": 7.238981062698408, |
| "learning_rate": 9.99506531560771e-06, |
| "loss": 0.7622, |
| "step": 1063 |
| }, |
| { |
| "epoch": 0.08766581527560352, |
| "grad_norm": 4.21469929038344, |
| "learning_rate": 9.995035638275248e-06, |
| "loss": 0.6258, |
| "step": 1064 |
| }, |
| { |
| "epoch": 0.08774820795913323, |
| "grad_norm": 3.6655616059927345, |
| "learning_rate": 9.995005872014289e-06, |
| "loss": 0.3423, |
| "step": 1065 |
| }, |
| { |
| "epoch": 0.08783060064266293, |
| "grad_norm": 5.412977135346199, |
| "learning_rate": 9.994976016825367e-06, |
| "loss": 0.6841, |
| "step": 1066 |
| }, |
| { |
| "epoch": 0.08791299332619264, |
| "grad_norm": 5.379025435070761, |
| "learning_rate": 9.994946072709007e-06, |
| "loss": 0.6847, |
| "step": 1067 |
| }, |
| { |
| "epoch": 0.08799538600972234, |
| "grad_norm": 4.472227140989464, |
| "learning_rate": 9.994916039665748e-06, |
| "loss": 0.4647, |
| "step": 1068 |
| }, |
| { |
| "epoch": 0.08807777869325203, |
| "grad_norm": 5.060338129572461, |
| "learning_rate": 9.994885917696122e-06, |
| "loss": 0.6175, |
| "step": 1069 |
| }, |
| { |
| "epoch": 0.08816017137678174, |
| "grad_norm": 4.715322598305957, |
| "learning_rate": 9.994855706800666e-06, |
| "loss": 0.4338, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.08824256406031145, |
| "grad_norm": 4.281526810708517, |
| "learning_rate": 9.994825406979918e-06, |
| "loss": 0.5457, |
| "step": 1071 |
| }, |
| { |
| "epoch": 0.08832495674384115, |
| "grad_norm": 3.4716224269919476, |
| "learning_rate": 9.994795018234416e-06, |
| "loss": 0.4955, |
| "step": 1072 |
| }, |
| { |
| "epoch": 0.08840734942737084, |
| "grad_norm": 5.027771756957362, |
| "learning_rate": 9.994764540564702e-06, |
| "loss": 0.6585, |
| "step": 1073 |
| }, |
| { |
| "epoch": 0.08848974211090055, |
| "grad_norm": 4.724781093913095, |
| "learning_rate": 9.99473397397132e-06, |
| "loss": 0.4618, |
| "step": 1074 |
| }, |
| { |
| "epoch": 0.08857213479443025, |
| "grad_norm": 6.9846657667334915, |
| "learning_rate": 9.99470331845481e-06, |
| "loss": 0.5641, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.08865452747795996, |
| "grad_norm": 5.484701110827303, |
| "learning_rate": 9.994672574015724e-06, |
| "loss": 0.542, |
| "step": 1076 |
| }, |
| { |
| "epoch": 0.08873692016148967, |
| "grad_norm": 7.037433703723267, |
| "learning_rate": 9.994641740654604e-06, |
| "loss": 0.4367, |
| "step": 1077 |
| }, |
| { |
| "epoch": 0.08881931284501936, |
| "grad_norm": 5.776582968652833, |
| "learning_rate": 9.994610818372002e-06, |
| "loss": 0.5423, |
| "step": 1078 |
| }, |
| { |
| "epoch": 0.08890170552854906, |
| "grad_norm": 6.745245173679915, |
| "learning_rate": 9.994579807168468e-06, |
| "loss": 0.698, |
| "step": 1079 |
| }, |
| { |
| "epoch": 0.08898409821207877, |
| "grad_norm": 5.473254655410174, |
| "learning_rate": 9.994548707044551e-06, |
| "loss": 0.4812, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.08906649089560847, |
| "grad_norm": 4.628085367767044, |
| "learning_rate": 9.994517518000809e-06, |
| "loss": 0.4693, |
| "step": 1081 |
| }, |
| { |
| "epoch": 0.08914888357913817, |
| "grad_norm": 7.525465774535276, |
| "learning_rate": 9.994486240037794e-06, |
| "loss": 0.6911, |
| "step": 1082 |
| }, |
| { |
| "epoch": 0.08923127626266787, |
| "grad_norm": 5.893129420637143, |
| "learning_rate": 9.994454873156068e-06, |
| "loss": 0.6289, |
| "step": 1083 |
| }, |
| { |
| "epoch": 0.08931366894619758, |
| "grad_norm": 5.50149802924285, |
| "learning_rate": 9.994423417356183e-06, |
| "loss": 0.6096, |
| "step": 1084 |
| }, |
| { |
| "epoch": 0.08939606162972728, |
| "grad_norm": 4.773359114172165, |
| "learning_rate": 9.994391872638702e-06, |
| "loss": 0.4555, |
| "step": 1085 |
| }, |
| { |
| "epoch": 0.08947845431325699, |
| "grad_norm": 7.212055616353917, |
| "learning_rate": 9.994360239004186e-06, |
| "loss": 0.7443, |
| "step": 1086 |
| }, |
| { |
| "epoch": 0.08956084699678668, |
| "grad_norm": 7.876765760130183, |
| "learning_rate": 9.9943285164532e-06, |
| "loss": 0.7077, |
| "step": 1087 |
| }, |
| { |
| "epoch": 0.08964323968031639, |
| "grad_norm": 7.317645647763081, |
| "learning_rate": 9.994296704986306e-06, |
| "loss": 0.6041, |
| "step": 1088 |
| }, |
| { |
| "epoch": 0.08972563236384609, |
| "grad_norm": 4.530023086536175, |
| "learning_rate": 9.994264804604073e-06, |
| "loss": 0.4931, |
| "step": 1089 |
| }, |
| { |
| "epoch": 0.0898080250473758, |
| "grad_norm": 4.2526214869784065, |
| "learning_rate": 9.994232815307065e-06, |
| "loss": 0.2995, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.08989041773090549, |
| "grad_norm": 3.505242547342838, |
| "learning_rate": 9.994200737095857e-06, |
| "loss": 0.4473, |
| "step": 1091 |
| }, |
| { |
| "epoch": 0.0899728104144352, |
| "grad_norm": 4.429638177463622, |
| "learning_rate": 9.994168569971017e-06, |
| "loss": 0.5841, |
| "step": 1092 |
| }, |
| { |
| "epoch": 0.0900552030979649, |
| "grad_norm": 5.82955819981545, |
| "learning_rate": 9.994136313933117e-06, |
| "loss": 0.4789, |
| "step": 1093 |
| }, |
| { |
| "epoch": 0.0901375957814946, |
| "grad_norm": 13.672647133825611, |
| "learning_rate": 9.994103968982733e-06, |
| "loss": 0.8772, |
| "step": 1094 |
| }, |
| { |
| "epoch": 0.09021998846502431, |
| "grad_norm": 4.710965238730624, |
| "learning_rate": 9.994071535120439e-06, |
| "loss": 0.4686, |
| "step": 1095 |
| }, |
| { |
| "epoch": 0.090302381148554, |
| "grad_norm": 3.9920741559548367, |
| "learning_rate": 9.994039012346814e-06, |
| "loss": 0.4907, |
| "step": 1096 |
| }, |
| { |
| "epoch": 0.09038477383208371, |
| "grad_norm": 5.7078667377661825, |
| "learning_rate": 9.994006400662436e-06, |
| "loss": 0.617, |
| "step": 1097 |
| }, |
| { |
| "epoch": 0.09046716651561341, |
| "grad_norm": 3.8186640801772125, |
| "learning_rate": 9.993973700067888e-06, |
| "loss": 0.4375, |
| "step": 1098 |
| }, |
| { |
| "epoch": 0.09054955919914312, |
| "grad_norm": 5.873776628166568, |
| "learning_rate": 9.99394091056375e-06, |
| "loss": 0.5642, |
| "step": 1099 |
| }, |
| { |
| "epoch": 0.09063195188267281, |
| "grad_norm": 6.100201545489032, |
| "learning_rate": 9.993908032150604e-06, |
| "loss": 0.603, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.09071434456620252, |
| "grad_norm": 5.704701398966367, |
| "learning_rate": 9.99387506482904e-06, |
| "loss": 0.4836, |
| "step": 1101 |
| }, |
| { |
| "epoch": 0.09079673724973222, |
| "grad_norm": 3.5476812408346934, |
| "learning_rate": 9.99384200859964e-06, |
| "loss": 0.2808, |
| "step": 1102 |
| }, |
| { |
| "epoch": 0.09087912993326193, |
| "grad_norm": 4.151988355820425, |
| "learning_rate": 9.993808863462995e-06, |
| "loss": 0.381, |
| "step": 1103 |
| }, |
| { |
| "epoch": 0.09096152261679163, |
| "grad_norm": 4.313112079524609, |
| "learning_rate": 9.993775629419696e-06, |
| "loss": 0.3598, |
| "step": 1104 |
| }, |
| { |
| "epoch": 0.09104391530032133, |
| "grad_norm": 3.5113027125878085, |
| "learning_rate": 9.993742306470332e-06, |
| "loss": 0.2947, |
| "step": 1105 |
| }, |
| { |
| "epoch": 0.09112630798385103, |
| "grad_norm": 5.254248735680192, |
| "learning_rate": 9.993708894615502e-06, |
| "loss": 0.3881, |
| "step": 1106 |
| }, |
| { |
| "epoch": 0.09120870066738074, |
| "grad_norm": 7.426494059694848, |
| "learning_rate": 9.993675393855793e-06, |
| "loss": 0.812, |
| "step": 1107 |
| }, |
| { |
| "epoch": 0.09129109335091044, |
| "grad_norm": 8.860170716007438, |
| "learning_rate": 9.993641804191805e-06, |
| "loss": 0.7974, |
| "step": 1108 |
| }, |
| { |
| "epoch": 0.09137348603444014, |
| "grad_norm": 8.101723704520365, |
| "learning_rate": 9.99360812562414e-06, |
| "loss": 0.5309, |
| "step": 1109 |
| }, |
| { |
| "epoch": 0.09145587871796984, |
| "grad_norm": 42.341845463398435, |
| "learning_rate": 9.99357435815339e-06, |
| "loss": 1.8562, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.09153827140149955, |
| "grad_norm": 6.81287903015811, |
| "learning_rate": 9.993540501780161e-06, |
| "loss": 0.6941, |
| "step": 1111 |
| }, |
| { |
| "epoch": 0.09162066408502925, |
| "grad_norm": 5.635578780831226, |
| "learning_rate": 9.993506556505054e-06, |
| "loss": 0.4578, |
| "step": 1112 |
| }, |
| { |
| "epoch": 0.09170305676855896, |
| "grad_norm": 5.211604855452364, |
| "learning_rate": 9.993472522328676e-06, |
| "loss": 0.6778, |
| "step": 1113 |
| }, |
| { |
| "epoch": 0.09178544945208865, |
| "grad_norm": 5.1777549097773665, |
| "learning_rate": 9.99343839925163e-06, |
| "loss": 0.5578, |
| "step": 1114 |
| }, |
| { |
| "epoch": 0.09186784213561835, |
| "grad_norm": 4.842543684963219, |
| "learning_rate": 9.993404187274522e-06, |
| "loss": 0.5595, |
| "step": 1115 |
| }, |
| { |
| "epoch": 0.09195023481914806, |
| "grad_norm": 6.42302616275195, |
| "learning_rate": 9.993369886397967e-06, |
| "loss": 0.7556, |
| "step": 1116 |
| }, |
| { |
| "epoch": 0.09203262750267777, |
| "grad_norm": 6.211592177133203, |
| "learning_rate": 9.99333549662257e-06, |
| "loss": 0.5456, |
| "step": 1117 |
| }, |
| { |
| "epoch": 0.09211502018620746, |
| "grad_norm": 4.630213204114295, |
| "learning_rate": 9.993301017948946e-06, |
| "loss": 0.4993, |
| "step": 1118 |
| }, |
| { |
| "epoch": 0.09219741286973716, |
| "grad_norm": 7.009132877346605, |
| "learning_rate": 9.99326645037771e-06, |
| "loss": 0.8535, |
| "step": 1119 |
| }, |
| { |
| "epoch": 0.09227980555326687, |
| "grad_norm": 4.964465276218794, |
| "learning_rate": 9.993231793909474e-06, |
| "loss": 0.4111, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.09236219823679657, |
| "grad_norm": 5.696307168046908, |
| "learning_rate": 9.993197048544857e-06, |
| "loss": 0.5841, |
| "step": 1121 |
| }, |
| { |
| "epoch": 0.09244459092032628, |
| "grad_norm": 5.527989894111306, |
| "learning_rate": 9.993162214284478e-06, |
| "loss": 0.5463, |
| "step": 1122 |
| }, |
| { |
| "epoch": 0.09252698360385597, |
| "grad_norm": 5.171245280928823, |
| "learning_rate": 9.993127291128956e-06, |
| "loss": 0.6916, |
| "step": 1123 |
| }, |
| { |
| "epoch": 0.09260937628738568, |
| "grad_norm": 5.320160130999334, |
| "learning_rate": 9.993092279078914e-06, |
| "loss": 0.406, |
| "step": 1124 |
| }, |
| { |
| "epoch": 0.09269176897091538, |
| "grad_norm": 5.163510317928108, |
| "learning_rate": 9.993057178134973e-06, |
| "loss": 0.6965, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.09277416165444509, |
| "grad_norm": 6.282384783932995, |
| "learning_rate": 9.99302198829776e-06, |
| "loss": 0.6963, |
| "step": 1126 |
| }, |
| { |
| "epoch": 0.09285655433797478, |
| "grad_norm": 6.815231221580075, |
| "learning_rate": 9.992986709567902e-06, |
| "loss": 0.6793, |
| "step": 1127 |
| }, |
| { |
| "epoch": 0.09293894702150449, |
| "grad_norm": 4.643382322672108, |
| "learning_rate": 9.992951341946025e-06, |
| "loss": 0.3584, |
| "step": 1128 |
| }, |
| { |
| "epoch": 0.09302133970503419, |
| "grad_norm": 17.845535518840695, |
| "learning_rate": 9.992915885432759e-06, |
| "loss": 0.781, |
| "step": 1129 |
| }, |
| { |
| "epoch": 0.0931037323885639, |
| "grad_norm": 4.085175770281815, |
| "learning_rate": 9.992880340028736e-06, |
| "loss": 0.2735, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.0931861250720936, |
| "grad_norm": 4.864690311245262, |
| "learning_rate": 9.992844705734591e-06, |
| "loss": 0.547, |
| "step": 1131 |
| }, |
| { |
| "epoch": 0.0932685177556233, |
| "grad_norm": 5.03732878733345, |
| "learning_rate": 9.992808982550955e-06, |
| "loss": 0.5577, |
| "step": 1132 |
| }, |
| { |
| "epoch": 0.093350910439153, |
| "grad_norm": 8.201592365589455, |
| "learning_rate": 9.992773170478465e-06, |
| "loss": 0.7697, |
| "step": 1133 |
| }, |
| { |
| "epoch": 0.0934333031226827, |
| "grad_norm": 5.9794222226843114, |
| "learning_rate": 9.992737269517759e-06, |
| "loss": 0.6587, |
| "step": 1134 |
| }, |
| { |
| "epoch": 0.09351569580621241, |
| "grad_norm": 4.465989480795388, |
| "learning_rate": 9.992701279669477e-06, |
| "loss": 0.5631, |
| "step": 1135 |
| }, |
| { |
| "epoch": 0.09359808848974212, |
| "grad_norm": 4.85574702436748, |
| "learning_rate": 9.992665200934258e-06, |
| "loss": 0.4923, |
| "step": 1136 |
| }, |
| { |
| "epoch": 0.09368048117327181, |
| "grad_norm": 5.903537966151732, |
| "learning_rate": 9.992629033312744e-06, |
| "loss": 0.5924, |
| "step": 1137 |
| }, |
| { |
| "epoch": 0.09376287385680152, |
| "grad_norm": 5.9804211363312065, |
| "learning_rate": 9.99259277680558e-06, |
| "loss": 0.6413, |
| "step": 1138 |
| }, |
| { |
| "epoch": 0.09384526654033122, |
| "grad_norm": 6.216811540759033, |
| "learning_rate": 9.992556431413412e-06, |
| "loss": 0.4857, |
| "step": 1139 |
| }, |
| { |
| "epoch": 0.09392765922386093, |
| "grad_norm": 3.992746573683693, |
| "learning_rate": 9.992519997136887e-06, |
| "loss": 0.5609, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.09401005190739062, |
| "grad_norm": 6.133646477579303, |
| "learning_rate": 9.992483473976652e-06, |
| "loss": 0.7192, |
| "step": 1141 |
| }, |
| { |
| "epoch": 0.09409244459092032, |
| "grad_norm": 3.996697571383684, |
| "learning_rate": 9.992446861933358e-06, |
| "loss": 0.5403, |
| "step": 1142 |
| }, |
| { |
| "epoch": 0.09417483727445003, |
| "grad_norm": 4.876746898177986, |
| "learning_rate": 9.992410161007658e-06, |
| "loss": 0.6047, |
| "step": 1143 |
| }, |
| { |
| "epoch": 0.09425722995797974, |
| "grad_norm": 7.113186432470744, |
| "learning_rate": 9.992373371200206e-06, |
| "loss": 0.6164, |
| "step": 1144 |
| }, |
| { |
| "epoch": 0.09433962264150944, |
| "grad_norm": 4.5979462744336494, |
| "learning_rate": 9.992336492511653e-06, |
| "loss": 0.5623, |
| "step": 1145 |
| }, |
| { |
| "epoch": 0.09442201532503913, |
| "grad_norm": 5.0662774129679935, |
| "learning_rate": 9.992299524942658e-06, |
| "loss": 0.5222, |
| "step": 1146 |
| }, |
| { |
| "epoch": 0.09450440800856884, |
| "grad_norm": 6.317547083033332, |
| "learning_rate": 9.992262468493883e-06, |
| "loss": 0.7313, |
| "step": 1147 |
| }, |
| { |
| "epoch": 0.09458680069209854, |
| "grad_norm": 6.083162361809943, |
| "learning_rate": 9.99222532316598e-06, |
| "loss": 0.7805, |
| "step": 1148 |
| }, |
| { |
| "epoch": 0.09466919337562825, |
| "grad_norm": 6.645090805437031, |
| "learning_rate": 9.992188088959616e-06, |
| "loss": 0.5836, |
| "step": 1149 |
| }, |
| { |
| "epoch": 0.09475158605915794, |
| "grad_norm": 4.61742230222751, |
| "learning_rate": 9.992150765875452e-06, |
| "loss": 0.3845, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.09483397874268765, |
| "grad_norm": 4.9888454907973, |
| "learning_rate": 9.992113353914153e-06, |
| "loss": 0.5926, |
| "step": 1151 |
| }, |
| { |
| "epoch": 0.09491637142621735, |
| "grad_norm": 4.584756235692825, |
| "learning_rate": 9.992075853076385e-06, |
| "loss": 0.3355, |
| "step": 1152 |
| }, |
| { |
| "epoch": 0.09499876410974706, |
| "grad_norm": 3.6417513821813583, |
| "learning_rate": 9.992038263362815e-06, |
| "loss": 0.4314, |
| "step": 1153 |
| }, |
| { |
| "epoch": 0.09508115679327676, |
| "grad_norm": 4.554586660083454, |
| "learning_rate": 9.992000584774113e-06, |
| "loss": 0.5483, |
| "step": 1154 |
| }, |
| { |
| "epoch": 0.09516354947680646, |
| "grad_norm": 4.795571278019529, |
| "learning_rate": 9.991962817310947e-06, |
| "loss": 0.7088, |
| "step": 1155 |
| }, |
| { |
| "epoch": 0.09524594216033616, |
| "grad_norm": 5.551121573962564, |
| "learning_rate": 9.991924960973995e-06, |
| "loss": 0.6027, |
| "step": 1156 |
| }, |
| { |
| "epoch": 0.09532833484386587, |
| "grad_norm": 5.793039171212162, |
| "learning_rate": 9.991887015763926e-06, |
| "loss": 0.5796, |
| "step": 1157 |
| }, |
| { |
| "epoch": 0.09541072752739557, |
| "grad_norm": 3.914653681105744, |
| "learning_rate": 9.991848981681417e-06, |
| "loss": 0.5456, |
| "step": 1158 |
| }, |
| { |
| "epoch": 0.09549312021092526, |
| "grad_norm": 4.822015558280715, |
| "learning_rate": 9.991810858727147e-06, |
| "loss": 0.4228, |
| "step": 1159 |
| }, |
| { |
| "epoch": 0.09557551289445497, |
| "grad_norm": 6.430460772568001, |
| "learning_rate": 9.991772646901793e-06, |
| "loss": 0.511, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.09565790557798468, |
| "grad_norm": 5.31544358134059, |
| "learning_rate": 9.991734346206034e-06, |
| "loss": 0.4908, |
| "step": 1161 |
| }, |
| { |
| "epoch": 0.09574029826151438, |
| "grad_norm": 4.579164539846181, |
| "learning_rate": 9.991695956640555e-06, |
| "loss": 0.5216, |
| "step": 1162 |
| }, |
| { |
| "epoch": 0.09582269094504409, |
| "grad_norm": 10.980434026023534, |
| "learning_rate": 9.991657478206037e-06, |
| "loss": 0.4225, |
| "step": 1163 |
| }, |
| { |
| "epoch": 0.09590508362857378, |
| "grad_norm": 11.232153491350044, |
| "learning_rate": 9.991618910903165e-06, |
| "loss": 0.4346, |
| "step": 1164 |
| }, |
| { |
| "epoch": 0.09598747631210348, |
| "grad_norm": 5.016670131834788, |
| "learning_rate": 9.99158025473263e-06, |
| "loss": 0.4156, |
| "step": 1165 |
| }, |
| { |
| "epoch": 0.09606986899563319, |
| "grad_norm": 5.823029951418563, |
| "learning_rate": 9.991541509695113e-06, |
| "loss": 0.6179, |
| "step": 1166 |
| }, |
| { |
| "epoch": 0.0961522616791629, |
| "grad_norm": 6.14466976320633, |
| "learning_rate": 9.991502675791308e-06, |
| "loss": 0.5943, |
| "step": 1167 |
| }, |
| { |
| "epoch": 0.09623465436269259, |
| "grad_norm": 5.663257166183292, |
| "learning_rate": 9.991463753021907e-06, |
| "loss": 0.4774, |
| "step": 1168 |
| }, |
| { |
| "epoch": 0.09631704704622229, |
| "grad_norm": 4.613753681467516, |
| "learning_rate": 9.991424741387601e-06, |
| "loss": 0.3702, |
| "step": 1169 |
| }, |
| { |
| "epoch": 0.096399439729752, |
| "grad_norm": 6.32248078526071, |
| "learning_rate": 9.991385640889087e-06, |
| "loss": 0.548, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.0964818324132817, |
| "grad_norm": 6.541355663315621, |
| "learning_rate": 9.991346451527058e-06, |
| "loss": 0.6273, |
| "step": 1171 |
| }, |
| { |
| "epoch": 0.09656422509681141, |
| "grad_norm": 5.283297075822956, |
| "learning_rate": 9.991307173302212e-06, |
| "loss": 0.3891, |
| "step": 1172 |
| }, |
| { |
| "epoch": 0.0966466177803411, |
| "grad_norm": 4.233220917693166, |
| "learning_rate": 9.991267806215251e-06, |
| "loss": 0.3118, |
| "step": 1173 |
| }, |
| { |
| "epoch": 0.09672901046387081, |
| "grad_norm": 4.413307234382051, |
| "learning_rate": 9.991228350266875e-06, |
| "loss": 0.5013, |
| "step": 1174 |
| }, |
| { |
| "epoch": 0.09681140314740051, |
| "grad_norm": 3.56821873368059, |
| "learning_rate": 9.991188805457784e-06, |
| "loss": 0.4205, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.09689379583093022, |
| "grad_norm": 6.493304664428958, |
| "learning_rate": 9.991149171788686e-06, |
| "loss": 0.6007, |
| "step": 1176 |
| }, |
| { |
| "epoch": 0.09697618851445991, |
| "grad_norm": 4.839910460452555, |
| "learning_rate": 9.991109449260283e-06, |
| "loss": 0.511, |
| "step": 1177 |
| }, |
| { |
| "epoch": 0.09705858119798962, |
| "grad_norm": 6.988660129796937, |
| "learning_rate": 9.991069637873282e-06, |
| "loss": 0.8373, |
| "step": 1178 |
| }, |
| { |
| "epoch": 0.09714097388151932, |
| "grad_norm": 7.174369580906187, |
| "learning_rate": 9.991029737628397e-06, |
| "loss": 0.4762, |
| "step": 1179 |
| }, |
| { |
| "epoch": 0.09722336656504903, |
| "grad_norm": 5.530363323174134, |
| "learning_rate": 9.990989748526334e-06, |
| "loss": 0.4079, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.09730575924857873, |
| "grad_norm": 7.764029045054272, |
| "learning_rate": 9.990949670567804e-06, |
| "loss": 0.5609, |
| "step": 1181 |
| }, |
| { |
| "epoch": 0.09738815193210842, |
| "grad_norm": 4.6082841469746025, |
| "learning_rate": 9.990909503753524e-06, |
| "loss": 0.5465, |
| "step": 1182 |
| }, |
| { |
| "epoch": 0.09747054461563813, |
| "grad_norm": 4.287818673511596, |
| "learning_rate": 9.990869248084205e-06, |
| "loss": 0.3848, |
| "step": 1183 |
| }, |
| { |
| "epoch": 0.09755293729916784, |
| "grad_norm": 5.323729474167395, |
| "learning_rate": 9.990828903560568e-06, |
| "loss": 0.5052, |
| "step": 1184 |
| }, |
| { |
| "epoch": 0.09763532998269754, |
| "grad_norm": 3.5424616789602856, |
| "learning_rate": 9.990788470183328e-06, |
| "loss": 0.2952, |
| "step": 1185 |
| }, |
| { |
| "epoch": 0.09771772266622723, |
| "grad_norm": 3.9137775832429584, |
| "learning_rate": 9.990747947953207e-06, |
| "loss": 0.2791, |
| "step": 1186 |
| }, |
| { |
| "epoch": 0.09780011534975694, |
| "grad_norm": 5.032302762992251, |
| "learning_rate": 9.990707336870925e-06, |
| "loss": 0.3739, |
| "step": 1187 |
| }, |
| { |
| "epoch": 0.09788250803328664, |
| "grad_norm": 5.581163892106261, |
| "learning_rate": 9.990666636937207e-06, |
| "loss": 0.5531, |
| "step": 1188 |
| }, |
| { |
| "epoch": 0.09796490071681635, |
| "grad_norm": 6.884687269301154, |
| "learning_rate": 9.990625848152775e-06, |
| "loss": 0.7531, |
| "step": 1189 |
| }, |
| { |
| "epoch": 0.09804729340034606, |
| "grad_norm": 5.374496441514754, |
| "learning_rate": 9.990584970518355e-06, |
| "loss": 0.6825, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.09812968608387575, |
| "grad_norm": 4.491076319002902, |
| "learning_rate": 9.99054400403468e-06, |
| "loss": 0.5471, |
| "step": 1191 |
| }, |
| { |
| "epoch": 0.09821207876740545, |
| "grad_norm": 4.568092241493959, |
| "learning_rate": 9.990502948702472e-06, |
| "loss": 0.2779, |
| "step": 1192 |
| }, |
| { |
| "epoch": 0.09829447145093516, |
| "grad_norm": 5.707272721328885, |
| "learning_rate": 9.990461804522466e-06, |
| "loss": 0.6366, |
| "step": 1193 |
| }, |
| { |
| "epoch": 0.09837686413446486, |
| "grad_norm": 4.826130217944562, |
| "learning_rate": 9.990420571495394e-06, |
| "loss": 0.605, |
| "step": 1194 |
| }, |
| { |
| "epoch": 0.09845925681799456, |
| "grad_norm": 4.33472383368196, |
| "learning_rate": 9.990379249621991e-06, |
| "loss": 0.6158, |
| "step": 1195 |
| }, |
| { |
| "epoch": 0.09854164950152426, |
| "grad_norm": 4.997331262544171, |
| "learning_rate": 9.990337838902992e-06, |
| "loss": 0.5247, |
| "step": 1196 |
| }, |
| { |
| "epoch": 0.09862404218505397, |
| "grad_norm": 3.544784718142565, |
| "learning_rate": 9.990296339339131e-06, |
| "loss": 0.5761, |
| "step": 1197 |
| }, |
| { |
| "epoch": 0.09870643486858367, |
| "grad_norm": 4.904002550116714, |
| "learning_rate": 9.990254750931153e-06, |
| "loss": 0.4465, |
| "step": 1198 |
| }, |
| { |
| "epoch": 0.09878882755211338, |
| "grad_norm": 4.097219050092533, |
| "learning_rate": 9.990213073679793e-06, |
| "loss": 0.5315, |
| "step": 1199 |
| }, |
| { |
| "epoch": 0.09887122023564307, |
| "grad_norm": 5.885850480185024, |
| "learning_rate": 9.990171307585797e-06, |
| "loss": 0.4493, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.09895361291917278, |
| "grad_norm": 4.33215452297953, |
| "learning_rate": 9.990129452649906e-06, |
| "loss": 0.4882, |
| "step": 1201 |
| }, |
| { |
| "epoch": 0.09903600560270248, |
| "grad_norm": 4.268587794277847, |
| "learning_rate": 9.990087508872865e-06, |
| "loss": 0.444, |
| "step": 1202 |
| }, |
| { |
| "epoch": 0.09911839828623219, |
| "grad_norm": 42.319887791095226, |
| "learning_rate": 9.990045476255422e-06, |
| "loss": 1.8771, |
| "step": 1203 |
| }, |
| { |
| "epoch": 0.09920079096976188, |
| "grad_norm": 40.893472301948556, |
| "learning_rate": 9.990003354798326e-06, |
| "loss": 1.546, |
| "step": 1204 |
| }, |
| { |
| "epoch": 0.09928318365329158, |
| "grad_norm": 4.301738471538322, |
| "learning_rate": 9.989961144502324e-06, |
| "loss": 0.6113, |
| "step": 1205 |
| }, |
| { |
| "epoch": 0.09936557633682129, |
| "grad_norm": 4.904331603406959, |
| "learning_rate": 9.98991884536817e-06, |
| "loss": 0.504, |
| "step": 1206 |
| }, |
| { |
| "epoch": 0.099447969020351, |
| "grad_norm": 4.841374601844491, |
| "learning_rate": 9.989876457396616e-06, |
| "loss": 0.6375, |
| "step": 1207 |
| }, |
| { |
| "epoch": 0.0995303617038807, |
| "grad_norm": 3.6457744187919996, |
| "learning_rate": 9.989833980588419e-06, |
| "loss": 0.4475, |
| "step": 1208 |
| }, |
| { |
| "epoch": 0.0996127543874104, |
| "grad_norm": 6.177090683739293, |
| "learning_rate": 9.989791414944332e-06, |
| "loss": 0.4527, |
| "step": 1209 |
| }, |
| { |
| "epoch": 0.0996951470709401, |
| "grad_norm": 6.948933775306833, |
| "learning_rate": 9.989748760465114e-06, |
| "loss": 0.4229, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.0997775397544698, |
| "grad_norm": 4.081918060719312, |
| "learning_rate": 9.989706017151526e-06, |
| "loss": 0.4226, |
| "step": 1211 |
| }, |
| { |
| "epoch": 0.09985993243799951, |
| "grad_norm": 5.241837641447958, |
| "learning_rate": 9.989663185004326e-06, |
| "loss": 0.6111, |
| "step": 1212 |
| }, |
| { |
| "epoch": 0.0999423251215292, |
| "grad_norm": 4.874711378457223, |
| "learning_rate": 9.989620264024278e-06, |
| "loss": 0.5264, |
| "step": 1213 |
| }, |
| { |
| "epoch": 0.10002471780505891, |
| "grad_norm": 6.576308785350509, |
| "learning_rate": 9.989577254212147e-06, |
| "loss": 0.7179, |
| "step": 1214 |
| }, |
| { |
| "epoch": 0.10010711048858861, |
| "grad_norm": 5.81751849230087, |
| "learning_rate": 9.989534155568696e-06, |
| "loss": 0.4763, |
| "step": 1215 |
| }, |
| { |
| "epoch": 0.10018950317211832, |
| "grad_norm": 5.144482743322481, |
| "learning_rate": 9.989490968094695e-06, |
| "loss": 0.5334, |
| "step": 1216 |
| }, |
| { |
| "epoch": 0.10027189585564802, |
| "grad_norm": 4.558820107084972, |
| "learning_rate": 9.989447691790912e-06, |
| "loss": 0.4786, |
| "step": 1217 |
| }, |
| { |
| "epoch": 0.10035428853917772, |
| "grad_norm": 5.042904719669194, |
| "learning_rate": 9.98940432665812e-06, |
| "loss": 0.351, |
| "step": 1218 |
| }, |
| { |
| "epoch": 0.10043668122270742, |
| "grad_norm": 7.9762498299247095, |
| "learning_rate": 9.989360872697085e-06, |
| "loss": 0.0755, |
| "step": 1219 |
| }, |
| { |
| "epoch": 0.10051907390623713, |
| "grad_norm": 5.90945797643557, |
| "learning_rate": 9.989317329908585e-06, |
| "loss": 0.5389, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.10060146658976683, |
| "grad_norm": 6.595876901546739, |
| "learning_rate": 9.989273698293396e-06, |
| "loss": 0.5458, |
| "step": 1221 |
| }, |
| { |
| "epoch": 0.10068385927329652, |
| "grad_norm": 3.5037946342073076, |
| "learning_rate": 9.989229977852292e-06, |
| "loss": 0.3967, |
| "step": 1222 |
| }, |
| { |
| "epoch": 0.10076625195682623, |
| "grad_norm": 4.8746671742155145, |
| "learning_rate": 9.989186168586054e-06, |
| "loss": 0.536, |
| "step": 1223 |
| }, |
| { |
| "epoch": 0.10084864464035594, |
| "grad_norm": 6.885826091957547, |
| "learning_rate": 9.989142270495458e-06, |
| "loss": 0.7177, |
| "step": 1224 |
| }, |
| { |
| "epoch": 0.10093103732388564, |
| "grad_norm": 4.581417536969941, |
| "learning_rate": 9.98909828358129e-06, |
| "loss": 0.4632, |
| "step": 1225 |
| }, |
| { |
| "epoch": 0.10101343000741535, |
| "grad_norm": 6.322372039008234, |
| "learning_rate": 9.989054207844331e-06, |
| "loss": 0.6098, |
| "step": 1226 |
| }, |
| { |
| "epoch": 0.10109582269094504, |
| "grad_norm": 5.273630448320928, |
| "learning_rate": 9.989010043285365e-06, |
| "loss": 0.5149, |
| "step": 1227 |
| }, |
| { |
| "epoch": 0.10117821537447474, |
| "grad_norm": 7.791366040723516, |
| "learning_rate": 9.988965789905179e-06, |
| "loss": 0.693, |
| "step": 1228 |
| }, |
| { |
| "epoch": 0.10126060805800445, |
| "grad_norm": 9.738396758859928, |
| "learning_rate": 9.988921447704563e-06, |
| "loss": 0.5488, |
| "step": 1229 |
| }, |
| { |
| "epoch": 0.10134300074153416, |
| "grad_norm": 5.146859630461013, |
| "learning_rate": 9.988877016684302e-06, |
| "loss": 0.5047, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.10142539342506385, |
| "grad_norm": 5.257519730963288, |
| "learning_rate": 9.98883249684519e-06, |
| "loss": 0.6279, |
| "step": 1231 |
| }, |
| { |
| "epoch": 0.10150778610859355, |
| "grad_norm": 6.374293874468573, |
| "learning_rate": 9.988787888188021e-06, |
| "loss": 0.5565, |
| "step": 1232 |
| }, |
| { |
| "epoch": 0.10159017879212326, |
| "grad_norm": 3.579113688809698, |
| "learning_rate": 9.988743190713585e-06, |
| "loss": 0.3567, |
| "step": 1233 |
| }, |
| { |
| "epoch": 0.10167257147565296, |
| "grad_norm": 5.160088719699968, |
| "learning_rate": 9.988698404422682e-06, |
| "loss": 0.512, |
| "step": 1234 |
| }, |
| { |
| "epoch": 0.10175496415918267, |
| "grad_norm": 6.862175012633274, |
| "learning_rate": 9.988653529316106e-06, |
| "loss": 0.4836, |
| "step": 1235 |
| }, |
| { |
| "epoch": 0.10183735684271236, |
| "grad_norm": 7.94291959200385, |
| "learning_rate": 9.988608565394658e-06, |
| "loss": 0.6196, |
| "step": 1236 |
| }, |
| { |
| "epoch": 0.10191974952624207, |
| "grad_norm": 8.071509431185923, |
| "learning_rate": 9.988563512659137e-06, |
| "loss": 0.7937, |
| "step": 1237 |
| }, |
| { |
| "epoch": 0.10200214220977177, |
| "grad_norm": 4.085112773207024, |
| "learning_rate": 9.988518371110346e-06, |
| "loss": 0.5843, |
| "step": 1238 |
| }, |
| { |
| "epoch": 0.10208453489330148, |
| "grad_norm": 3.4369756535780036, |
| "learning_rate": 9.988473140749089e-06, |
| "loss": 0.4593, |
| "step": 1239 |
| }, |
| { |
| "epoch": 0.10216692757683117, |
| "grad_norm": 4.233546401366111, |
| "learning_rate": 9.98842782157617e-06, |
| "loss": 0.1937, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.10224932026036088, |
| "grad_norm": 5.0020721675763165, |
| "learning_rate": 9.988382413592398e-06, |
| "loss": 0.3163, |
| "step": 1241 |
| }, |
| { |
| "epoch": 0.10233171294389058, |
| "grad_norm": 6.741423995217005, |
| "learning_rate": 9.98833691679858e-06, |
| "loss": 0.4598, |
| "step": 1242 |
| }, |
| { |
| "epoch": 0.10241410562742029, |
| "grad_norm": 5.601574362819235, |
| "learning_rate": 9.988291331195525e-06, |
| "loss": 0.468, |
| "step": 1243 |
| }, |
| { |
| "epoch": 0.10249649831095, |
| "grad_norm": 5.251186632818674, |
| "learning_rate": 9.988245656784045e-06, |
| "loss": 0.4222, |
| "step": 1244 |
| }, |
| { |
| "epoch": 0.10257889099447969, |
| "grad_norm": 8.378822533378939, |
| "learning_rate": 9.988199893564956e-06, |
| "loss": 0.8973, |
| "step": 1245 |
| }, |
| { |
| "epoch": 0.10266128367800939, |
| "grad_norm": 5.961577135188696, |
| "learning_rate": 9.98815404153907e-06, |
| "loss": 0.5762, |
| "step": 1246 |
| }, |
| { |
| "epoch": 0.1027436763615391, |
| "grad_norm": 9.099142670765637, |
| "learning_rate": 9.988108100707203e-06, |
| "loss": 0.7662, |
| "step": 1247 |
| }, |
| { |
| "epoch": 0.1028260690450688, |
| "grad_norm": 6.092979434479812, |
| "learning_rate": 9.988062071070174e-06, |
| "loss": 0.6146, |
| "step": 1248 |
| }, |
| { |
| "epoch": 0.1029084617285985, |
| "grad_norm": 5.159988340065593, |
| "learning_rate": 9.988015952628802e-06, |
| "loss": 0.4235, |
| "step": 1249 |
| }, |
| { |
| "epoch": 0.1029908544121282, |
| "grad_norm": 5.5843012218596995, |
| "learning_rate": 9.987969745383908e-06, |
| "loss": 0.6002, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.1030732470956579, |
| "grad_norm": 5.194975705721119, |
| "learning_rate": 9.987923449336316e-06, |
| "loss": 0.3804, |
| "step": 1251 |
| }, |
| { |
| "epoch": 0.10315563977918761, |
| "grad_norm": 4.495661995779881, |
| "learning_rate": 9.98787706448685e-06, |
| "loss": 0.5055, |
| "step": 1252 |
| }, |
| { |
| "epoch": 0.10323803246271732, |
| "grad_norm": 4.23602600968263, |
| "learning_rate": 9.987830590836335e-06, |
| "loss": 0.5776, |
| "step": 1253 |
| }, |
| { |
| "epoch": 0.10332042514624701, |
| "grad_norm": 6.237589314146654, |
| "learning_rate": 9.987784028385596e-06, |
| "loss": 0.4792, |
| "step": 1254 |
| }, |
| { |
| "epoch": 0.10340281782977671, |
| "grad_norm": 7.598718878609416, |
| "learning_rate": 9.987737377135464e-06, |
| "loss": 0.8099, |
| "step": 1255 |
| }, |
| { |
| "epoch": 0.10348521051330642, |
| "grad_norm": 3.2818516957200283, |
| "learning_rate": 9.987690637086772e-06, |
| "loss": 0.3107, |
| "step": 1256 |
| }, |
| { |
| "epoch": 0.10356760319683612, |
| "grad_norm": 5.155044529125087, |
| "learning_rate": 9.987643808240351e-06, |
| "loss": 0.4354, |
| "step": 1257 |
| }, |
| { |
| "epoch": 0.10364999588036582, |
| "grad_norm": 4.263048262690633, |
| "learning_rate": 9.98759689059703e-06, |
| "loss": 0.3961, |
| "step": 1258 |
| }, |
| { |
| "epoch": 0.10373238856389552, |
| "grad_norm": 5.420585016670734, |
| "learning_rate": 9.987549884157652e-06, |
| "loss": 0.5856, |
| "step": 1259 |
| }, |
| { |
| "epoch": 0.10381478124742523, |
| "grad_norm": 5.035305806630145, |
| "learning_rate": 9.987502788923047e-06, |
| "loss": 0.5991, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.10389717393095493, |
| "grad_norm": 5.210172313300331, |
| "learning_rate": 9.987455604894059e-06, |
| "loss": 0.6802, |
| "step": 1261 |
| }, |
| { |
| "epoch": 0.10397956661448464, |
| "grad_norm": 5.208839152242212, |
| "learning_rate": 9.987408332071522e-06, |
| "loss": 0.5894, |
| "step": 1262 |
| }, |
| { |
| "epoch": 0.10406195929801433, |
| "grad_norm": 5.007424953872612, |
| "learning_rate": 9.987360970456284e-06, |
| "loss": 0.6866, |
| "step": 1263 |
| }, |
| { |
| "epoch": 0.10414435198154404, |
| "grad_norm": 5.492166942733279, |
| "learning_rate": 9.987313520049184e-06, |
| "loss": 0.5856, |
| "step": 1264 |
| }, |
| { |
| "epoch": 0.10422674466507374, |
| "grad_norm": 44.58199918343172, |
| "learning_rate": 9.987265980851069e-06, |
| "loss": 1.9599, |
| "step": 1265 |
| }, |
| { |
| "epoch": 0.10430913734860345, |
| "grad_norm": 4.5829858248139175, |
| "learning_rate": 9.987218352862781e-06, |
| "loss": 0.5187, |
| "step": 1266 |
| }, |
| { |
| "epoch": 0.10439153003213314, |
| "grad_norm": 5.025959968933363, |
| "learning_rate": 9.987170636085175e-06, |
| "loss": 0.5232, |
| "step": 1267 |
| }, |
| { |
| "epoch": 0.10447392271566285, |
| "grad_norm": 33.25337475336521, |
| "learning_rate": 9.987122830519096e-06, |
| "loss": 0.675, |
| "step": 1268 |
| }, |
| { |
| "epoch": 0.10455631539919255, |
| "grad_norm": 6.627024708916473, |
| "learning_rate": 9.987074936165394e-06, |
| "loss": 0.6327, |
| "step": 1269 |
| }, |
| { |
| "epoch": 0.10463870808272226, |
| "grad_norm": 14.391260890255582, |
| "learning_rate": 9.987026953024927e-06, |
| "loss": 0.171, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.10472110076625196, |
| "grad_norm": 4.702108978609021, |
| "learning_rate": 9.986978881098543e-06, |
| "loss": 0.3207, |
| "step": 1271 |
| }, |
| { |
| "epoch": 0.10480349344978165, |
| "grad_norm": 6.034579770929771, |
| "learning_rate": 9.986930720387103e-06, |
| "loss": 0.4834, |
| "step": 1272 |
| }, |
| { |
| "epoch": 0.10488588613331136, |
| "grad_norm": 5.096157625239223, |
| "learning_rate": 9.986882470891458e-06, |
| "loss": 0.3464, |
| "step": 1273 |
| }, |
| { |
| "epoch": 0.10496827881684107, |
| "grad_norm": 6.817695716429012, |
| "learning_rate": 9.986834132612475e-06, |
| "loss": 0.6021, |
| "step": 1274 |
| }, |
| { |
| "epoch": 0.10505067150037077, |
| "grad_norm": 5.953244971338033, |
| "learning_rate": 9.98678570555101e-06, |
| "loss": 0.682, |
| "step": 1275 |
| }, |
| { |
| "epoch": 0.10513306418390048, |
| "grad_norm": 5.0509725079955965, |
| "learning_rate": 9.986737189707924e-06, |
| "loss": 0.6976, |
| "step": 1276 |
| }, |
| { |
| "epoch": 0.10521545686743017, |
| "grad_norm": 5.45215491215228, |
| "learning_rate": 9.986688585084086e-06, |
| "loss": 0.5298, |
| "step": 1277 |
| }, |
| { |
| "epoch": 0.10529784955095987, |
| "grad_norm": 4.412292451733061, |
| "learning_rate": 9.986639891680356e-06, |
| "loss": 0.3362, |
| "step": 1278 |
| }, |
| { |
| "epoch": 0.10538024223448958, |
| "grad_norm": 5.221866379996899, |
| "learning_rate": 9.986591109497601e-06, |
| "loss": 0.5397, |
| "step": 1279 |
| }, |
| { |
| "epoch": 0.10546263491801929, |
| "grad_norm": 5.480429366404604, |
| "learning_rate": 9.986542238536694e-06, |
| "loss": 0.4179, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.10554502760154898, |
| "grad_norm": 4.6929338481975, |
| "learning_rate": 9.986493278798502e-06, |
| "loss": 0.3414, |
| "step": 1281 |
| }, |
| { |
| "epoch": 0.10562742028507868, |
| "grad_norm": 5.128214656502921, |
| "learning_rate": 9.986444230283896e-06, |
| "loss": 0.3893, |
| "step": 1282 |
| }, |
| { |
| "epoch": 0.10570981296860839, |
| "grad_norm": 6.237929917953249, |
| "learning_rate": 9.986395092993751e-06, |
| "loss": 0.479, |
| "step": 1283 |
| }, |
| { |
| "epoch": 0.1057922056521381, |
| "grad_norm": 7.2745793133813015, |
| "learning_rate": 9.98634586692894e-06, |
| "loss": 0.6739, |
| "step": 1284 |
| }, |
| { |
| "epoch": 0.1058745983356678, |
| "grad_norm": 4.95514436276389, |
| "learning_rate": 9.986296552090343e-06, |
| "loss": 0.3535, |
| "step": 1285 |
| }, |
| { |
| "epoch": 0.10595699101919749, |
| "grad_norm": 6.709823023172423, |
| "learning_rate": 9.986247148478834e-06, |
| "loss": 0.4273, |
| "step": 1286 |
| }, |
| { |
| "epoch": 0.1060393837027272, |
| "grad_norm": 7.894794011841938, |
| "learning_rate": 9.986197656095293e-06, |
| "loss": 0.5231, |
| "step": 1287 |
| }, |
| { |
| "epoch": 0.1061217763862569, |
| "grad_norm": 6.771758510510305, |
| "learning_rate": 9.986148074940602e-06, |
| "loss": 0.6098, |
| "step": 1288 |
| }, |
| { |
| "epoch": 0.10620416906978661, |
| "grad_norm": 8.620140427216912, |
| "learning_rate": 9.986098405015646e-06, |
| "loss": 0.6816, |
| "step": 1289 |
| }, |
| { |
| "epoch": 0.1062865617533163, |
| "grad_norm": 7.649934951424651, |
| "learning_rate": 9.986048646321306e-06, |
| "loss": 0.6417, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.106368954436846, |
| "grad_norm": 5.89217219407464, |
| "learning_rate": 9.98599879885847e-06, |
| "loss": 0.436, |
| "step": 1291 |
| }, |
| { |
| "epoch": 0.10645134712037571, |
| "grad_norm": 4.562164944360262, |
| "learning_rate": 9.985948862628023e-06, |
| "loss": 0.4035, |
| "step": 1292 |
| }, |
| { |
| "epoch": 0.10653373980390542, |
| "grad_norm": 10.224434742049766, |
| "learning_rate": 9.985898837630856e-06, |
| "loss": 0.7638, |
| "step": 1293 |
| }, |
| { |
| "epoch": 0.10661613248743512, |
| "grad_norm": 6.688388151927518, |
| "learning_rate": 9.98584872386786e-06, |
| "loss": 0.6856, |
| "step": 1294 |
| }, |
| { |
| "epoch": 0.10669852517096481, |
| "grad_norm": 5.548007921469972, |
| "learning_rate": 9.985798521339924e-06, |
| "loss": 0.4693, |
| "step": 1295 |
| }, |
| { |
| "epoch": 0.10678091785449452, |
| "grad_norm": 6.389072481221684, |
| "learning_rate": 9.985748230047944e-06, |
| "loss": 0.7325, |
| "step": 1296 |
| }, |
| { |
| "epoch": 0.10686331053802423, |
| "grad_norm": 4.857740386437905, |
| "learning_rate": 9.985697849992818e-06, |
| "loss": 0.4256, |
| "step": 1297 |
| }, |
| { |
| "epoch": 0.10694570322155393, |
| "grad_norm": 4.244141671261363, |
| "learning_rate": 9.98564738117544e-06, |
| "loss": 0.4349, |
| "step": 1298 |
| }, |
| { |
| "epoch": 0.10702809590508362, |
| "grad_norm": 8.506166832118577, |
| "learning_rate": 9.985596823596708e-06, |
| "loss": 0.6764, |
| "step": 1299 |
| }, |
| { |
| "epoch": 0.10711048858861333, |
| "grad_norm": 35.49665995961184, |
| "learning_rate": 9.985546177257523e-06, |
| "loss": 0.8473, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.10719288127214303, |
| "grad_norm": 4.860038932251913, |
| "learning_rate": 9.985495442158785e-06, |
| "loss": 0.4825, |
| "step": 1301 |
| }, |
| { |
| "epoch": 0.10727527395567274, |
| "grad_norm": 6.230406301466884, |
| "learning_rate": 9.985444618301401e-06, |
| "loss": 0.65, |
| "step": 1302 |
| }, |
| { |
| "epoch": 0.10735766663920245, |
| "grad_norm": 4.218360186395634, |
| "learning_rate": 9.985393705686274e-06, |
| "loss": 0.3347, |
| "step": 1303 |
| }, |
| { |
| "epoch": 0.10744005932273214, |
| "grad_norm": 14.47184287987901, |
| "learning_rate": 9.985342704314308e-06, |
| "loss": 1.0207, |
| "step": 1304 |
| }, |
| { |
| "epoch": 0.10752245200626184, |
| "grad_norm": 6.850987039774206, |
| "learning_rate": 9.985291614186417e-06, |
| "loss": 0.7262, |
| "step": 1305 |
| }, |
| { |
| "epoch": 0.10760484468979155, |
| "grad_norm": 5.9020975240354385, |
| "learning_rate": 9.985240435303505e-06, |
| "loss": 0.4397, |
| "step": 1306 |
| }, |
| { |
| "epoch": 0.10768723737332125, |
| "grad_norm": 4.756886284647011, |
| "learning_rate": 9.985189167666484e-06, |
| "loss": 0.3688, |
| "step": 1307 |
| }, |
| { |
| "epoch": 0.10776963005685095, |
| "grad_norm": 5.157047719389633, |
| "learning_rate": 9.985137811276268e-06, |
| "loss": 0.6059, |
| "step": 1308 |
| }, |
| { |
| "epoch": 0.10785202274038065, |
| "grad_norm": 6.27032589224842, |
| "learning_rate": 9.985086366133771e-06, |
| "loss": 0.7094, |
| "step": 1309 |
| }, |
| { |
| "epoch": 0.10793441542391036, |
| "grad_norm": 4.952356354117285, |
| "learning_rate": 9.985034832239908e-06, |
| "loss": 0.4373, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.10801680810744006, |
| "grad_norm": 6.469389577291621, |
| "learning_rate": 9.984983209595598e-06, |
| "loss": 0.5434, |
| "step": 1311 |
| }, |
| { |
| "epoch": 0.10809920079096977, |
| "grad_norm": 4.993310174827445, |
| "learning_rate": 9.98493149820176e-06, |
| "loss": 0.5393, |
| "step": 1312 |
| }, |
| { |
| "epoch": 0.10818159347449946, |
| "grad_norm": 3.6178325149897046, |
| "learning_rate": 9.984879698059314e-06, |
| "loss": 0.2839, |
| "step": 1313 |
| }, |
| { |
| "epoch": 0.10826398615802917, |
| "grad_norm": 4.710143431767484, |
| "learning_rate": 9.98482780916918e-06, |
| "loss": 0.5721, |
| "step": 1314 |
| }, |
| { |
| "epoch": 0.10834637884155887, |
| "grad_norm": 7.01427162272288, |
| "learning_rate": 9.984775831532288e-06, |
| "loss": 0.6726, |
| "step": 1315 |
| }, |
| { |
| "epoch": 0.10842877152508858, |
| "grad_norm": 6.672994509695413, |
| "learning_rate": 9.984723765149555e-06, |
| "loss": 0.6024, |
| "step": 1316 |
| }, |
| { |
| "epoch": 0.10851116420861827, |
| "grad_norm": 5.677032485840668, |
| "learning_rate": 9.984671610021916e-06, |
| "loss": 0.6153, |
| "step": 1317 |
| }, |
| { |
| "epoch": 0.10859355689214797, |
| "grad_norm": 5.458697212060859, |
| "learning_rate": 9.984619366150294e-06, |
| "loss": 0.599, |
| "step": 1318 |
| }, |
| { |
| "epoch": 0.10867594957567768, |
| "grad_norm": 4.616675469079038, |
| "learning_rate": 9.98456703353562e-06, |
| "loss": 0.5492, |
| "step": 1319 |
| }, |
| { |
| "epoch": 0.10875834225920739, |
| "grad_norm": 6.386675749076506, |
| "learning_rate": 9.98451461217883e-06, |
| "loss": 0.6014, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.10884073494273709, |
| "grad_norm": 7.024628115810297, |
| "learning_rate": 9.984462102080852e-06, |
| "loss": 0.778, |
| "step": 1321 |
| }, |
| { |
| "epoch": 0.10892312762626678, |
| "grad_norm": 5.033824012329933, |
| "learning_rate": 9.984409503242623e-06, |
| "loss": 0.4687, |
| "step": 1322 |
| }, |
| { |
| "epoch": 0.10900552030979649, |
| "grad_norm": 4.962648224329656, |
| "learning_rate": 9.98435681566508e-06, |
| "loss": 0.5488, |
| "step": 1323 |
| }, |
| { |
| "epoch": 0.1090879129933262, |
| "grad_norm": 7.920473586250384, |
| "learning_rate": 9.984304039349159e-06, |
| "loss": 0.6991, |
| "step": 1324 |
| }, |
| { |
| "epoch": 0.1091703056768559, |
| "grad_norm": 5.738981101778399, |
| "learning_rate": 9.9842511742958e-06, |
| "loss": 0.5916, |
| "step": 1325 |
| }, |
| { |
| "epoch": 0.10925269836038559, |
| "grad_norm": 4.679325132639655, |
| "learning_rate": 9.984198220505947e-06, |
| "loss": 0.5934, |
| "step": 1326 |
| }, |
| { |
| "epoch": 0.1093350910439153, |
| "grad_norm": 3.9674401675344346, |
| "learning_rate": 9.984145177980541e-06, |
| "loss": 0.5008, |
| "step": 1327 |
| }, |
| { |
| "epoch": 0.109417483727445, |
| "grad_norm": 5.381770819469537, |
| "learning_rate": 9.984092046720526e-06, |
| "loss": 0.4177, |
| "step": 1328 |
| }, |
| { |
| "epoch": 0.10949987641097471, |
| "grad_norm": 5.770846813479589, |
| "learning_rate": 9.984038826726847e-06, |
| "loss": 0.4167, |
| "step": 1329 |
| }, |
| { |
| "epoch": 0.10958226909450441, |
| "grad_norm": 6.865983869702629, |
| "learning_rate": 9.983985518000455e-06, |
| "loss": 0.7061, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.1096646617780341, |
| "grad_norm": 3.9275472528501925, |
| "learning_rate": 9.983932120542294e-06, |
| "loss": 0.2918, |
| "step": 1331 |
| }, |
| { |
| "epoch": 0.10974705446156381, |
| "grad_norm": 6.07045204922866, |
| "learning_rate": 9.983878634353317e-06, |
| "loss": 0.6954, |
| "step": 1332 |
| }, |
| { |
| "epoch": 0.10982944714509352, |
| "grad_norm": 4.141327747866078, |
| "learning_rate": 9.983825059434478e-06, |
| "loss": 0.3842, |
| "step": 1333 |
| }, |
| { |
| "epoch": 0.10991183982862322, |
| "grad_norm": 5.786533535647971, |
| "learning_rate": 9.98377139578673e-06, |
| "loss": 0.5725, |
| "step": 1334 |
| }, |
| { |
| "epoch": 0.10999423251215291, |
| "grad_norm": 6.8133544098549415, |
| "learning_rate": 9.983717643411027e-06, |
| "loss": 0.7385, |
| "step": 1335 |
| }, |
| { |
| "epoch": 0.11007662519568262, |
| "grad_norm": 6.85720202285886, |
| "learning_rate": 9.983663802308326e-06, |
| "loss": 0.5718, |
| "step": 1336 |
| }, |
| { |
| "epoch": 0.11015901787921233, |
| "grad_norm": 6.136362509792475, |
| "learning_rate": 9.983609872479587e-06, |
| "loss": 0.3897, |
| "step": 1337 |
| }, |
| { |
| "epoch": 0.11024141056274203, |
| "grad_norm": 4.797859015857666, |
| "learning_rate": 9.98355585392577e-06, |
| "loss": 0.6325, |
| "step": 1338 |
| }, |
| { |
| "epoch": 0.11032380324627174, |
| "grad_norm": 8.316767655871656, |
| "learning_rate": 9.983501746647835e-06, |
| "loss": 0.6866, |
| "step": 1339 |
| }, |
| { |
| "epoch": 0.11040619592980143, |
| "grad_norm": 9.033213998462069, |
| "learning_rate": 9.983447550646748e-06, |
| "loss": 0.6542, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.11048858861333113, |
| "grad_norm": 33.85428238105279, |
| "learning_rate": 9.98339326592347e-06, |
| "loss": 0.9791, |
| "step": 1341 |
| }, |
| { |
| "epoch": 0.11057098129686084, |
| "grad_norm": 6.407367839369825, |
| "learning_rate": 9.98333889247897e-06, |
| "loss": 0.6029, |
| "step": 1342 |
| }, |
| { |
| "epoch": 0.11065337398039055, |
| "grad_norm": 20.144367498349798, |
| "learning_rate": 9.983284430314217e-06, |
| "loss": 0.5827, |
| "step": 1343 |
| }, |
| { |
| "epoch": 0.11073576666392024, |
| "grad_norm": 4.6534351475822655, |
| "learning_rate": 9.98322987943018e-06, |
| "loss": 0.4664, |
| "step": 1344 |
| }, |
| { |
| "epoch": 0.11081815934744994, |
| "grad_norm": 8.15101971157244, |
| "learning_rate": 9.983175239827829e-06, |
| "loss": 0.7332, |
| "step": 1345 |
| }, |
| { |
| "epoch": 0.11090055203097965, |
| "grad_norm": 4.753437093306661, |
| "learning_rate": 9.983120511508136e-06, |
| "loss": 0.5571, |
| "step": 1346 |
| }, |
| { |
| "epoch": 0.11098294471450935, |
| "grad_norm": 11.681875120750966, |
| "learning_rate": 9.983065694472078e-06, |
| "loss": 0.4647, |
| "step": 1347 |
| }, |
| { |
| "epoch": 0.11106533739803906, |
| "grad_norm": 12.117460717638025, |
| "learning_rate": 9.983010788720629e-06, |
| "loss": 0.538, |
| "step": 1348 |
| }, |
| { |
| "epoch": 0.11114773008156875, |
| "grad_norm": 6.829274575800984, |
| "learning_rate": 9.982955794254768e-06, |
| "loss": 0.7028, |
| "step": 1349 |
| }, |
| { |
| "epoch": 0.11123012276509846, |
| "grad_norm": 13.602827030669436, |
| "learning_rate": 9.982900711075473e-06, |
| "loss": 0.7064, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.11131251544862816, |
| "grad_norm": 5.8175552794909775, |
| "learning_rate": 9.982845539183724e-06, |
| "loss": 0.6018, |
| "step": 1351 |
| }, |
| { |
| "epoch": 0.11139490813215787, |
| "grad_norm": 8.31044623036391, |
| "learning_rate": 9.982790278580505e-06, |
| "loss": 0.3879, |
| "step": 1352 |
| }, |
| { |
| "epoch": 0.11147730081568756, |
| "grad_norm": 6.099537144897278, |
| "learning_rate": 9.982734929266799e-06, |
| "loss": 0.5985, |
| "step": 1353 |
| }, |
| { |
| "epoch": 0.11155969349921727, |
| "grad_norm": 5.0871649102753675, |
| "learning_rate": 9.98267949124359e-06, |
| "loss": 0.6366, |
| "step": 1354 |
| }, |
| { |
| "epoch": 0.11164208618274697, |
| "grad_norm": 7.594378062700149, |
| "learning_rate": 9.982623964511868e-06, |
| "loss": 0.3574, |
| "step": 1355 |
| }, |
| { |
| "epoch": 0.11172447886627668, |
| "grad_norm": 6.421114608398679, |
| "learning_rate": 9.982568349072619e-06, |
| "loss": 0.424, |
| "step": 1356 |
| }, |
| { |
| "epoch": 0.11180687154980638, |
| "grad_norm": 4.43829490618891, |
| "learning_rate": 9.982512644926835e-06, |
| "loss": 0.5444, |
| "step": 1357 |
| }, |
| { |
| "epoch": 0.11188926423333607, |
| "grad_norm": 5.484026046951222, |
| "learning_rate": 9.982456852075505e-06, |
| "loss": 0.4623, |
| "step": 1358 |
| }, |
| { |
| "epoch": 0.11197165691686578, |
| "grad_norm": 4.551122319765745, |
| "learning_rate": 9.982400970519625e-06, |
| "loss": 0.482, |
| "step": 1359 |
| }, |
| { |
| "epoch": 0.11205404960039549, |
| "grad_norm": 5.123980967275811, |
| "learning_rate": 9.982345000260189e-06, |
| "loss": 0.6261, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.11213644228392519, |
| "grad_norm": 7.0859474884143845, |
| "learning_rate": 9.982288941298193e-06, |
| "loss": 0.482, |
| "step": 1361 |
| }, |
| { |
| "epoch": 0.11221883496745488, |
| "grad_norm": 5.062990740013323, |
| "learning_rate": 9.982232793634637e-06, |
| "loss": 0.4078, |
| "step": 1362 |
| }, |
| { |
| "epoch": 0.11230122765098459, |
| "grad_norm": 6.563773730859556, |
| "learning_rate": 9.982176557270518e-06, |
| "loss": 0.6887, |
| "step": 1363 |
| }, |
| { |
| "epoch": 0.1123836203345143, |
| "grad_norm": 3.9679727896989507, |
| "learning_rate": 9.982120232206837e-06, |
| "loss": 0.5059, |
| "step": 1364 |
| }, |
| { |
| "epoch": 0.112466013018044, |
| "grad_norm": 4.109093720360083, |
| "learning_rate": 9.9820638184446e-06, |
| "loss": 0.3438, |
| "step": 1365 |
| }, |
| { |
| "epoch": 0.1125484057015737, |
| "grad_norm": 5.911123344422388, |
| "learning_rate": 9.98200731598481e-06, |
| "loss": 0.5155, |
| "step": 1366 |
| }, |
| { |
| "epoch": 0.1126307983851034, |
| "grad_norm": 3.88318872578385, |
| "learning_rate": 9.98195072482847e-06, |
| "loss": 0.5121, |
| "step": 1367 |
| }, |
| { |
| "epoch": 0.1127131910686331, |
| "grad_norm": 5.233996486233231, |
| "learning_rate": 9.98189404497659e-06, |
| "loss": 0.6168, |
| "step": 1368 |
| }, |
| { |
| "epoch": 0.11279558375216281, |
| "grad_norm": 4.602934758003588, |
| "learning_rate": 9.981837276430181e-06, |
| "loss": 0.3514, |
| "step": 1369 |
| }, |
| { |
| "epoch": 0.11287797643569251, |
| "grad_norm": 6.096124850039538, |
| "learning_rate": 9.98178041919025e-06, |
| "loss": 0.7239, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.1129603691192222, |
| "grad_norm": 3.74675189974131, |
| "learning_rate": 9.981723473257812e-06, |
| "loss": 0.2741, |
| "step": 1371 |
| }, |
| { |
| "epoch": 0.11304276180275191, |
| "grad_norm": 4.56350112320027, |
| "learning_rate": 9.981666438633877e-06, |
| "loss": 0.4282, |
| "step": 1372 |
| }, |
| { |
| "epoch": 0.11312515448628162, |
| "grad_norm": 14.508417238617053, |
| "learning_rate": 9.981609315319467e-06, |
| "loss": 0.6197, |
| "step": 1373 |
| }, |
| { |
| "epoch": 0.11320754716981132, |
| "grad_norm": 3.0941627955941367, |
| "learning_rate": 9.981552103315593e-06, |
| "loss": 0.2163, |
| "step": 1374 |
| }, |
| { |
| "epoch": 0.11328993985334103, |
| "grad_norm": 4.946676977341611, |
| "learning_rate": 9.981494802623275e-06, |
| "loss": 0.387, |
| "step": 1375 |
| }, |
| { |
| "epoch": 0.11337233253687072, |
| "grad_norm": 7.823751960171278, |
| "learning_rate": 9.981437413243535e-06, |
| "loss": 0.6005, |
| "step": 1376 |
| }, |
| { |
| "epoch": 0.11345472522040043, |
| "grad_norm": 4.525532982557422, |
| "learning_rate": 9.981379935177393e-06, |
| "loss": 0.5959, |
| "step": 1377 |
| }, |
| { |
| "epoch": 0.11353711790393013, |
| "grad_norm": 6.635732248640508, |
| "learning_rate": 9.981322368425873e-06, |
| "loss": 0.5028, |
| "step": 1378 |
| }, |
| { |
| "epoch": 0.11361951058745984, |
| "grad_norm": 7.813709567215677, |
| "learning_rate": 9.98126471299e-06, |
| "loss": 0.5495, |
| "step": 1379 |
| }, |
| { |
| "epoch": 0.11370190327098953, |
| "grad_norm": 4.630427176502277, |
| "learning_rate": 9.981206968870798e-06, |
| "loss": 0.5631, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.11378429595451924, |
| "grad_norm": 33.40325879735037, |
| "learning_rate": 9.9811491360693e-06, |
| "loss": 1.1373, |
| "step": 1381 |
| }, |
| { |
| "epoch": 0.11386668863804894, |
| "grad_norm": 4.6566891571505575, |
| "learning_rate": 9.981091214586533e-06, |
| "loss": 0.4544, |
| "step": 1382 |
| }, |
| { |
| "epoch": 0.11394908132157865, |
| "grad_norm": 5.069079604968983, |
| "learning_rate": 9.981033204423526e-06, |
| "loss": 0.5782, |
| "step": 1383 |
| }, |
| { |
| "epoch": 0.11403147400510835, |
| "grad_norm": 4.434254165048741, |
| "learning_rate": 9.980975105581315e-06, |
| "loss": 0.5051, |
| "step": 1384 |
| }, |
| { |
| "epoch": 0.11411386668863804, |
| "grad_norm": 7.74689606386784, |
| "learning_rate": 9.980916918060932e-06, |
| "loss": 0.5908, |
| "step": 1385 |
| }, |
| { |
| "epoch": 0.11419625937216775, |
| "grad_norm": 4.577069084019305, |
| "learning_rate": 9.980858641863415e-06, |
| "loss": 0.5266, |
| "step": 1386 |
| }, |
| { |
| "epoch": 0.11427865205569745, |
| "grad_norm": 3.7822421197101046, |
| "learning_rate": 9.980800276989802e-06, |
| "loss": 0.5155, |
| "step": 1387 |
| }, |
| { |
| "epoch": 0.11436104473922716, |
| "grad_norm": 6.354305643899875, |
| "learning_rate": 9.98074182344113e-06, |
| "loss": 0.5797, |
| "step": 1388 |
| }, |
| { |
| "epoch": 0.11444343742275685, |
| "grad_norm": 5.1062797154471085, |
| "learning_rate": 9.980683281218438e-06, |
| "loss": 0.3497, |
| "step": 1389 |
| }, |
| { |
| "epoch": 0.11452583010628656, |
| "grad_norm": 4.339769494165447, |
| "learning_rate": 9.980624650322772e-06, |
| "loss": 0.5299, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.11460822278981626, |
| "grad_norm": 7.573051401034229, |
| "learning_rate": 9.980565930755174e-06, |
| "loss": 0.681, |
| "step": 1391 |
| }, |
| { |
| "epoch": 0.11469061547334597, |
| "grad_norm": 14.775312547328694, |
| "learning_rate": 9.980507122516692e-06, |
| "loss": 0.7312, |
| "step": 1392 |
| }, |
| { |
| "epoch": 0.11477300815687567, |
| "grad_norm": 4.248998579788246, |
| "learning_rate": 9.980448225608369e-06, |
| "loss": 0.5819, |
| "step": 1393 |
| }, |
| { |
| "epoch": 0.11485540084040537, |
| "grad_norm": 4.820974256874798, |
| "learning_rate": 9.980389240031256e-06, |
| "loss": 0.3988, |
| "step": 1394 |
| }, |
| { |
| "epoch": 0.11493779352393507, |
| "grad_norm": 5.3611932668737134, |
| "learning_rate": 9.980330165786403e-06, |
| "loss": 0.553, |
| "step": 1395 |
| }, |
| { |
| "epoch": 0.11502018620746478, |
| "grad_norm": 4.803109353772144, |
| "learning_rate": 9.98027100287486e-06, |
| "loss": 0.4254, |
| "step": 1396 |
| }, |
| { |
| "epoch": 0.11510257889099448, |
| "grad_norm": 6.166043944368681, |
| "learning_rate": 9.980211751297682e-06, |
| "loss": 0.6435, |
| "step": 1397 |
| }, |
| { |
| "epoch": 0.11518497157452418, |
| "grad_norm": 6.321449333735212, |
| "learning_rate": 9.980152411055923e-06, |
| "loss": 0.5901, |
| "step": 1398 |
| }, |
| { |
| "epoch": 0.11526736425805388, |
| "grad_norm": 3.9058938395757736, |
| "learning_rate": 9.980092982150641e-06, |
| "loss": 0.4481, |
| "step": 1399 |
| }, |
| { |
| "epoch": 0.11534975694158359, |
| "grad_norm": 7.060663282013449, |
| "learning_rate": 9.980033464582892e-06, |
| "loss": 0.7435, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.11543214962511329, |
| "grad_norm": 5.044696423020422, |
| "learning_rate": 9.979973858353738e-06, |
| "loss": 0.4583, |
| "step": 1401 |
| }, |
| { |
| "epoch": 0.115514542308643, |
| "grad_norm": 4.010481581383983, |
| "learning_rate": 9.979914163464237e-06, |
| "loss": 0.4307, |
| "step": 1402 |
| }, |
| { |
| "epoch": 0.11559693499217269, |
| "grad_norm": 4.715002567821283, |
| "learning_rate": 9.979854379915454e-06, |
| "loss": 0.4067, |
| "step": 1403 |
| }, |
| { |
| "epoch": 0.1156793276757024, |
| "grad_norm": 5.095282676387148, |
| "learning_rate": 9.979794507708453e-06, |
| "loss": 0.5319, |
| "step": 1404 |
| }, |
| { |
| "epoch": 0.1157617203592321, |
| "grad_norm": 5.434641667200585, |
| "learning_rate": 9.979734546844301e-06, |
| "loss": 0.5371, |
| "step": 1405 |
| }, |
| { |
| "epoch": 0.1158441130427618, |
| "grad_norm": 5.864807571595335, |
| "learning_rate": 9.979674497324063e-06, |
| "loss": 0.7502, |
| "step": 1406 |
| }, |
| { |
| "epoch": 0.11592650572629151, |
| "grad_norm": 3.931855774749118, |
| "learning_rate": 9.979614359148809e-06, |
| "loss": 0.4857, |
| "step": 1407 |
| }, |
| { |
| "epoch": 0.1160088984098212, |
| "grad_norm": 4.415958936160234, |
| "learning_rate": 9.97955413231961e-06, |
| "loss": 0.4143, |
| "step": 1408 |
| }, |
| { |
| "epoch": 0.11609129109335091, |
| "grad_norm": 5.477681605181166, |
| "learning_rate": 9.97949381683754e-06, |
| "loss": 0.5853, |
| "step": 1409 |
| }, |
| { |
| "epoch": 0.11617368377688062, |
| "grad_norm": 5.394465919333391, |
| "learning_rate": 9.97943341270367e-06, |
| "loss": 0.6864, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.11625607646041032, |
| "grad_norm": 7.0540860642905825, |
| "learning_rate": 9.979372919919077e-06, |
| "loss": 0.5353, |
| "step": 1411 |
| }, |
| { |
| "epoch": 0.11633846914394001, |
| "grad_norm": 4.457918745669958, |
| "learning_rate": 9.979312338484837e-06, |
| "loss": 0.5332, |
| "step": 1412 |
| }, |
| { |
| "epoch": 0.11642086182746972, |
| "grad_norm": 4.318862068781239, |
| "learning_rate": 9.979251668402027e-06, |
| "loss": 0.5383, |
| "step": 1413 |
| }, |
| { |
| "epoch": 0.11650325451099942, |
| "grad_norm": 4.15910796680108, |
| "learning_rate": 9.979190909671732e-06, |
| "loss": 0.3852, |
| "step": 1414 |
| }, |
| { |
| "epoch": 0.11658564719452913, |
| "grad_norm": 4.409347363114283, |
| "learning_rate": 9.97913006229503e-06, |
| "loss": 0.5274, |
| "step": 1415 |
| }, |
| { |
| "epoch": 0.11666803987805884, |
| "grad_norm": 3.860140816734429, |
| "learning_rate": 9.979069126273006e-06, |
| "loss": 0.3016, |
| "step": 1416 |
| }, |
| { |
| "epoch": 0.11675043256158853, |
| "grad_norm": 4.05973495912906, |
| "learning_rate": 9.979008101606743e-06, |
| "loss": 0.5707, |
| "step": 1417 |
| }, |
| { |
| "epoch": 0.11683282524511823, |
| "grad_norm": 5.335313848877938, |
| "learning_rate": 9.978946988297329e-06, |
| "loss": 0.4813, |
| "step": 1418 |
| }, |
| { |
| "epoch": 0.11691521792864794, |
| "grad_norm": 5.075262879615677, |
| "learning_rate": 9.978885786345851e-06, |
| "loss": 0.463, |
| "step": 1419 |
| }, |
| { |
| "epoch": 0.11699761061217764, |
| "grad_norm": 5.215466147778947, |
| "learning_rate": 9.978824495753399e-06, |
| "loss": 0.5398, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.11708000329570734, |
| "grad_norm": 10.408226861855583, |
| "learning_rate": 9.978763116521065e-06, |
| "loss": 0.7895, |
| "step": 1421 |
| }, |
| { |
| "epoch": 0.11716239597923704, |
| "grad_norm": 3.486562350465299, |
| "learning_rate": 9.97870164864994e-06, |
| "loss": 0.5399, |
| "step": 1422 |
| }, |
| { |
| "epoch": 0.11724478866276675, |
| "grad_norm": 3.263975256020736, |
| "learning_rate": 9.97864009214112e-06, |
| "loss": 0.2916, |
| "step": 1423 |
| }, |
| { |
| "epoch": 0.11732718134629645, |
| "grad_norm": 3.687714872193806, |
| "learning_rate": 9.9785784469957e-06, |
| "loss": 0.304, |
| "step": 1424 |
| }, |
| { |
| "epoch": 0.11740957402982616, |
| "grad_norm": 5.509829107986992, |
| "learning_rate": 9.978516713214779e-06, |
| "loss": 0.5531, |
| "step": 1425 |
| }, |
| { |
| "epoch": 0.11749196671335585, |
| "grad_norm": 3.5921556435301474, |
| "learning_rate": 9.978454890799453e-06, |
| "loss": 0.3016, |
| "step": 1426 |
| }, |
| { |
| "epoch": 0.11757435939688556, |
| "grad_norm": 4.173814176850804, |
| "learning_rate": 9.978392979750825e-06, |
| "loss": 0.4396, |
| "step": 1427 |
| }, |
| { |
| "epoch": 0.11765675208041526, |
| "grad_norm": 33.23788817879558, |
| "learning_rate": 9.978330980069996e-06, |
| "loss": 1.1389, |
| "step": 1428 |
| }, |
| { |
| "epoch": 0.11773914476394497, |
| "grad_norm": 5.852780126730159, |
| "learning_rate": 9.978268891758072e-06, |
| "loss": 0.6514, |
| "step": 1429 |
| }, |
| { |
| "epoch": 0.11782153744747466, |
| "grad_norm": 4.939407725307317, |
| "learning_rate": 9.978206714816156e-06, |
| "loss": 0.7286, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.11790393013100436, |
| "grad_norm": 5.03926308808164, |
| "learning_rate": 9.978144449245357e-06, |
| "loss": 0.3388, |
| "step": 1431 |
| }, |
| { |
| "epoch": 0.11798632281453407, |
| "grad_norm": 6.049230382777763, |
| "learning_rate": 9.978082095046781e-06, |
| "loss": 0.4871, |
| "step": 1432 |
| }, |
| { |
| "epoch": 0.11806871549806378, |
| "grad_norm": 3.489939110522026, |
| "learning_rate": 9.978019652221543e-06, |
| "loss": 0.2893, |
| "step": 1433 |
| }, |
| { |
| "epoch": 0.11815110818159348, |
| "grad_norm": 5.028102433756064, |
| "learning_rate": 9.977957120770748e-06, |
| "loss": 0.5076, |
| "step": 1434 |
| }, |
| { |
| "epoch": 0.11823350086512317, |
| "grad_norm": 5.274453037216691, |
| "learning_rate": 9.977894500695512e-06, |
| "loss": 0.3798, |
| "step": 1435 |
| }, |
| { |
| "epoch": 0.11831589354865288, |
| "grad_norm": 4.751680688931771, |
| "learning_rate": 9.977831791996952e-06, |
| "loss": 0.4783, |
| "step": 1436 |
| }, |
| { |
| "epoch": 0.11839828623218258, |
| "grad_norm": 3.5749837950342274, |
| "learning_rate": 9.977768994676181e-06, |
| "loss": 0.3682, |
| "step": 1437 |
| }, |
| { |
| "epoch": 0.11848067891571229, |
| "grad_norm": 3.671226493661603, |
| "learning_rate": 9.97770610873432e-06, |
| "loss": 0.347, |
| "step": 1438 |
| }, |
| { |
| "epoch": 0.11856307159924198, |
| "grad_norm": 6.196939033251727, |
| "learning_rate": 9.977643134172487e-06, |
| "loss": 0.5274, |
| "step": 1439 |
| }, |
| { |
| "epoch": 0.11864546428277169, |
| "grad_norm": 5.2089347723562085, |
| "learning_rate": 9.977580070991804e-06, |
| "loss": 0.5175, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.11872785696630139, |
| "grad_norm": 6.334013108356391, |
| "learning_rate": 9.977516919193393e-06, |
| "loss": 0.6194, |
| "step": 1441 |
| }, |
| { |
| "epoch": 0.1188102496498311, |
| "grad_norm": 7.306839104821746, |
| "learning_rate": 9.977453678778379e-06, |
| "loss": 0.7709, |
| "step": 1442 |
| }, |
| { |
| "epoch": 0.1188926423333608, |
| "grad_norm": 4.519396435491379, |
| "learning_rate": 9.977390349747886e-06, |
| "loss": 0.4418, |
| "step": 1443 |
| }, |
| { |
| "epoch": 0.1189750350168905, |
| "grad_norm": 4.698209909244438, |
| "learning_rate": 9.977326932103044e-06, |
| "loss": 0.535, |
| "step": 1444 |
| }, |
| { |
| "epoch": 0.1190574277004202, |
| "grad_norm": 4.8932208776361446, |
| "learning_rate": 9.977263425844981e-06, |
| "loss": 0.5942, |
| "step": 1445 |
| }, |
| { |
| "epoch": 0.11913982038394991, |
| "grad_norm": 5.026667743592898, |
| "learning_rate": 9.977199830974826e-06, |
| "loss": 0.547, |
| "step": 1446 |
| }, |
| { |
| "epoch": 0.11922221306747961, |
| "grad_norm": 5.23253291298616, |
| "learning_rate": 9.977136147493715e-06, |
| "loss": 0.5333, |
| "step": 1447 |
| }, |
| { |
| "epoch": 0.1193046057510093, |
| "grad_norm": 4.719053095707587, |
| "learning_rate": 9.97707237540278e-06, |
| "loss": 0.5265, |
| "step": 1448 |
| }, |
| { |
| "epoch": 0.11938699843453901, |
| "grad_norm": 4.625540723780489, |
| "learning_rate": 9.977008514703153e-06, |
| "loss": 0.5827, |
| "step": 1449 |
| }, |
| { |
| "epoch": 0.11946939111806872, |
| "grad_norm": 3.856805995552182, |
| "learning_rate": 9.976944565395976e-06, |
| "loss": 0.6053, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.11955178380159842, |
| "grad_norm": 3.330853507113743, |
| "learning_rate": 9.976880527482385e-06, |
| "loss": 0.5254, |
| "step": 1451 |
| }, |
| { |
| "epoch": 0.11963417648512813, |
| "grad_norm": 4.459173017435613, |
| "learning_rate": 9.97681640096352e-06, |
| "loss": 0.4728, |
| "step": 1452 |
| }, |
| { |
| "epoch": 0.11971656916865782, |
| "grad_norm": 7.018979668301613, |
| "learning_rate": 9.976752185840524e-06, |
| "loss": 0.6592, |
| "step": 1453 |
| }, |
| { |
| "epoch": 0.11979896185218752, |
| "grad_norm": 4.695286668122802, |
| "learning_rate": 9.976687882114538e-06, |
| "loss": 0.2927, |
| "step": 1454 |
| }, |
| { |
| "epoch": 0.11988135453571723, |
| "grad_norm": 6.372078077902412, |
| "learning_rate": 9.976623489786708e-06, |
| "loss": 0.7203, |
| "step": 1455 |
| }, |
| { |
| "epoch": 0.11996374721924694, |
| "grad_norm": 13.058182473303905, |
| "learning_rate": 9.976559008858182e-06, |
| "loss": 0.8304, |
| "step": 1456 |
| }, |
| { |
| "epoch": 0.12004613990277663, |
| "grad_norm": 3.750428817875247, |
| "learning_rate": 9.976494439330106e-06, |
| "loss": 0.4544, |
| "step": 1457 |
| }, |
| { |
| "epoch": 0.12012853258630633, |
| "grad_norm": 5.714795325590294, |
| "learning_rate": 9.976429781203631e-06, |
| "loss": 0.5764, |
| "step": 1458 |
| }, |
| { |
| "epoch": 0.12021092526983604, |
| "grad_norm": 6.249017860839444, |
| "learning_rate": 9.976365034479907e-06, |
| "loss": 0.6097, |
| "step": 1459 |
| }, |
| { |
| "epoch": 0.12029331795336574, |
| "grad_norm": 4.366534697481048, |
| "learning_rate": 9.976300199160087e-06, |
| "loss": 0.5872, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.12037571063689545, |
| "grad_norm": 8.297730150263703, |
| "learning_rate": 9.976235275245325e-06, |
| "loss": 0.6445, |
| "step": 1461 |
| }, |
| { |
| "epoch": 0.12045810332042514, |
| "grad_norm": 3.0791550000816437, |
| "learning_rate": 9.976170262736777e-06, |
| "loss": 0.3265, |
| "step": 1462 |
| }, |
| { |
| "epoch": 0.12054049600395485, |
| "grad_norm": 4.587962090590767, |
| "learning_rate": 9.9761051616356e-06, |
| "loss": 0.4655, |
| "step": 1463 |
| }, |
| { |
| "epoch": 0.12062288868748455, |
| "grad_norm": 4.083352587701113, |
| "learning_rate": 9.976039971942955e-06, |
| "loss": 0.5081, |
| "step": 1464 |
| }, |
| { |
| "epoch": 0.12070528137101426, |
| "grad_norm": 19.36776574624167, |
| "learning_rate": 9.97597469366e-06, |
| "loss": 0.8676, |
| "step": 1465 |
| }, |
| { |
| "epoch": 0.12078767405454395, |
| "grad_norm": 13.102581672872773, |
| "learning_rate": 9.975909326787898e-06, |
| "loss": 0.6868, |
| "step": 1466 |
| }, |
| { |
| "epoch": 0.12087006673807366, |
| "grad_norm": 4.961145426331136, |
| "learning_rate": 9.975843871327815e-06, |
| "loss": 0.5178, |
| "step": 1467 |
| }, |
| { |
| "epoch": 0.12095245942160336, |
| "grad_norm": 3.5357157810853512, |
| "learning_rate": 9.975778327280914e-06, |
| "loss": 0.2661, |
| "step": 1468 |
| }, |
| { |
| "epoch": 0.12103485210513307, |
| "grad_norm": 3.0027106887596036, |
| "learning_rate": 9.97571269464836e-06, |
| "loss": 0.2429, |
| "step": 1469 |
| }, |
| { |
| "epoch": 0.12111724478866277, |
| "grad_norm": 4.596992981690704, |
| "learning_rate": 9.975646973431326e-06, |
| "loss": 0.4726, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.12119963747219246, |
| "grad_norm": 6.690419035435042, |
| "learning_rate": 9.975581163630981e-06, |
| "loss": 0.7149, |
| "step": 1471 |
| }, |
| { |
| "epoch": 0.12128203015572217, |
| "grad_norm": 5.306103849360317, |
| "learning_rate": 9.975515265248493e-06, |
| "loss": 0.5719, |
| "step": 1472 |
| }, |
| { |
| "epoch": 0.12136442283925188, |
| "grad_norm": 5.095282694288974, |
| "learning_rate": 9.975449278285038e-06, |
| "loss": 0.5793, |
| "step": 1473 |
| }, |
| { |
| "epoch": 0.12144681552278158, |
| "grad_norm": 5.007439354366816, |
| "learning_rate": 9.975383202741793e-06, |
| "loss": 0.5795, |
| "step": 1474 |
| }, |
| { |
| "epoch": 0.12152920820631127, |
| "grad_norm": 4.86506910122601, |
| "learning_rate": 9.97531703861993e-06, |
| "loss": 0.4545, |
| "step": 1475 |
| }, |
| { |
| "epoch": 0.12161160088984098, |
| "grad_norm": 12.362020895190154, |
| "learning_rate": 9.975250785920629e-06, |
| "loss": 0.6358, |
| "step": 1476 |
| }, |
| { |
| "epoch": 0.12169399357337068, |
| "grad_norm": 5.088274293351666, |
| "learning_rate": 9.97518444464507e-06, |
| "loss": 0.4832, |
| "step": 1477 |
| }, |
| { |
| "epoch": 0.12177638625690039, |
| "grad_norm": 4.852830296274398, |
| "learning_rate": 9.975118014794431e-06, |
| "loss": 0.4588, |
| "step": 1478 |
| }, |
| { |
| "epoch": 0.1218587789404301, |
| "grad_norm": 5.076993779434122, |
| "learning_rate": 9.975051496369899e-06, |
| "loss": 0.5489, |
| "step": 1479 |
| }, |
| { |
| "epoch": 0.12194117162395979, |
| "grad_norm": 4.492297833507457, |
| "learning_rate": 9.974984889372658e-06, |
| "loss": 0.3727, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.1220235643074895, |
| "grad_norm": 4.969531949366711, |
| "learning_rate": 9.97491819380389e-06, |
| "loss": 0.6192, |
| "step": 1481 |
| }, |
| { |
| "epoch": 0.1221059569910192, |
| "grad_norm": 5.5171550464058985, |
| "learning_rate": 9.974851409664786e-06, |
| "loss": 0.5168, |
| "step": 1482 |
| }, |
| { |
| "epoch": 0.1221883496745489, |
| "grad_norm": 4.549692286167438, |
| "learning_rate": 9.974784536956533e-06, |
| "loss": 0.5461, |
| "step": 1483 |
| }, |
| { |
| "epoch": 0.1222707423580786, |
| "grad_norm": 4.638601246264793, |
| "learning_rate": 9.974717575680321e-06, |
| "loss": 0.5586, |
| "step": 1484 |
| }, |
| { |
| "epoch": 0.1223531350416083, |
| "grad_norm": 3.9934881662408617, |
| "learning_rate": 9.974650525837345e-06, |
| "loss": 0.4364, |
| "step": 1485 |
| }, |
| { |
| "epoch": 0.12243552772513801, |
| "grad_norm": 8.227489553944196, |
| "learning_rate": 9.974583387428797e-06, |
| "loss": 0.8104, |
| "step": 1486 |
| }, |
| { |
| "epoch": 0.12251792040866771, |
| "grad_norm": 6.644787549148226, |
| "learning_rate": 9.974516160455872e-06, |
| "loss": 0.4537, |
| "step": 1487 |
| }, |
| { |
| "epoch": 0.12260031309219742, |
| "grad_norm": 4.325870512007751, |
| "learning_rate": 9.974448844919766e-06, |
| "loss": 0.4874, |
| "step": 1488 |
| }, |
| { |
| "epoch": 0.12268270577572711, |
| "grad_norm": 6.081726074936586, |
| "learning_rate": 9.97438144082168e-06, |
| "loss": 0.3875, |
| "step": 1489 |
| }, |
| { |
| "epoch": 0.12276509845925682, |
| "grad_norm": 4.548854894101285, |
| "learning_rate": 9.974313948162812e-06, |
| "loss": 0.5696, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.12284749114278652, |
| "grad_norm": 9.255791845151686, |
| "learning_rate": 9.974246366944364e-06, |
| "loss": 0.9999, |
| "step": 1491 |
| }, |
| { |
| "epoch": 0.12292988382631623, |
| "grad_norm": 6.508244913389245, |
| "learning_rate": 9.97417869716754e-06, |
| "loss": 0.6256, |
| "step": 1492 |
| }, |
| { |
| "epoch": 0.12301227650984592, |
| "grad_norm": 2.9004728725960067, |
| "learning_rate": 9.974110938833545e-06, |
| "loss": 0.2222, |
| "step": 1493 |
| }, |
| { |
| "epoch": 0.12309466919337562, |
| "grad_norm": 6.840818959886781, |
| "learning_rate": 9.974043091943584e-06, |
| "loss": 0.6488, |
| "step": 1494 |
| }, |
| { |
| "epoch": 0.12317706187690533, |
| "grad_norm": 4.564889329777813, |
| "learning_rate": 9.973975156498866e-06, |
| "loss": 0.4834, |
| "step": 1495 |
| }, |
| { |
| "epoch": 0.12325945456043504, |
| "grad_norm": 6.124262779948301, |
| "learning_rate": 9.973907132500597e-06, |
| "loss": 0.6345, |
| "step": 1496 |
| }, |
| { |
| "epoch": 0.12334184724396474, |
| "grad_norm": 4.143149551754576, |
| "learning_rate": 9.973839019949994e-06, |
| "loss": 0.5449, |
| "step": 1497 |
| }, |
| { |
| "epoch": 0.12342423992749443, |
| "grad_norm": 4.692433215382461, |
| "learning_rate": 9.973770818848265e-06, |
| "loss": 0.381, |
| "step": 1498 |
| }, |
| { |
| "epoch": 0.12350663261102414, |
| "grad_norm": 4.558902674629272, |
| "learning_rate": 9.973702529196627e-06, |
| "loss": 0.4342, |
| "step": 1499 |
| }, |
| { |
| "epoch": 0.12358902529455384, |
| "grad_norm": 4.544800628848942, |
| "learning_rate": 9.973634150996291e-06, |
| "loss": 0.3499, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.12367141797808355, |
| "grad_norm": 5.059581470168767, |
| "learning_rate": 9.973565684248483e-06, |
| "loss": 0.5135, |
| "step": 1501 |
| }, |
| { |
| "epoch": 0.12375381066161324, |
| "grad_norm": 4.258353140119297, |
| "learning_rate": 9.973497128954414e-06, |
| "loss": 0.269, |
| "step": 1502 |
| }, |
| { |
| "epoch": 0.12383620334514295, |
| "grad_norm": 7.132848659419424, |
| "learning_rate": 9.973428485115308e-06, |
| "loss": 0.6726, |
| "step": 1503 |
| }, |
| { |
| "epoch": 0.12391859602867265, |
| "grad_norm": 3.5983862118163277, |
| "learning_rate": 9.973359752732386e-06, |
| "loss": 0.4669, |
| "step": 1504 |
| }, |
| { |
| "epoch": 0.12400098871220236, |
| "grad_norm": 3.448183524414553, |
| "learning_rate": 9.973290931806874e-06, |
| "loss": 0.1703, |
| "step": 1505 |
| }, |
| { |
| "epoch": 0.12408338139573206, |
| "grad_norm": 3.0366884309895794, |
| "learning_rate": 9.973222022339992e-06, |
| "loss": 0.2643, |
| "step": 1506 |
| }, |
| { |
| "epoch": 0.12416577407926176, |
| "grad_norm": 4.8670408538461745, |
| "learning_rate": 9.973153024332974e-06, |
| "loss": 0.2684, |
| "step": 1507 |
| }, |
| { |
| "epoch": 0.12424816676279146, |
| "grad_norm": 5.9026783007837205, |
| "learning_rate": 9.973083937787042e-06, |
| "loss": 0.5869, |
| "step": 1508 |
| }, |
| { |
| "epoch": 0.12433055944632117, |
| "grad_norm": 6.025970687624214, |
| "learning_rate": 9.973014762703429e-06, |
| "loss": 0.4191, |
| "step": 1509 |
| }, |
| { |
| "epoch": 0.12441295212985087, |
| "grad_norm": 6.51904193753321, |
| "learning_rate": 9.972945499083366e-06, |
| "loss": 0.7139, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.12449534481338057, |
| "grad_norm": 6.684992245083261, |
| "learning_rate": 9.972876146928088e-06, |
| "loss": 0.6404, |
| "step": 1511 |
| }, |
| { |
| "epoch": 0.12457773749691027, |
| "grad_norm": 5.001971574274414, |
| "learning_rate": 9.972806706238826e-06, |
| "loss": 0.4946, |
| "step": 1512 |
| }, |
| { |
| "epoch": 0.12466013018043998, |
| "grad_norm": 6.7337019014227355, |
| "learning_rate": 9.97273717701682e-06, |
| "loss": 0.5832, |
| "step": 1513 |
| }, |
| { |
| "epoch": 0.12474252286396968, |
| "grad_norm": 5.220718061812807, |
| "learning_rate": 9.972667559263305e-06, |
| "loss": 0.5482, |
| "step": 1514 |
| }, |
| { |
| "epoch": 0.12482491554749939, |
| "grad_norm": 5.150466794347032, |
| "learning_rate": 9.97259785297952e-06, |
| "loss": 0.6143, |
| "step": 1515 |
| }, |
| { |
| "epoch": 0.12490730823102908, |
| "grad_norm": 6.537060800514755, |
| "learning_rate": 9.972528058166711e-06, |
| "loss": 0.6394, |
| "step": 1516 |
| }, |
| { |
| "epoch": 0.12498970091455879, |
| "grad_norm": 4.955150662710675, |
| "learning_rate": 9.972458174826115e-06, |
| "loss": 0.5091, |
| "step": 1517 |
| }, |
| { |
| "epoch": 0.1250720935980885, |
| "grad_norm": 4.842000273038944, |
| "learning_rate": 9.972388202958977e-06, |
| "loss": 0.4601, |
| "step": 1518 |
| }, |
| { |
| "epoch": 0.12515448628161818, |
| "grad_norm": 4.184733000664414, |
| "learning_rate": 9.972318142566547e-06, |
| "loss": 0.4797, |
| "step": 1519 |
| }, |
| { |
| "epoch": 0.1252368789651479, |
| "grad_norm": 5.585543182543029, |
| "learning_rate": 9.972247993650067e-06, |
| "loss": 0.6223, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.1253192716486776, |
| "grad_norm": 5.879739754393376, |
| "learning_rate": 9.97217775621079e-06, |
| "loss": 0.628, |
| "step": 1521 |
| }, |
| { |
| "epoch": 0.1254016643322073, |
| "grad_norm": 11.002403812555526, |
| "learning_rate": 9.972107430249963e-06, |
| "loss": 0.553, |
| "step": 1522 |
| }, |
| { |
| "epoch": 0.125484057015737, |
| "grad_norm": 6.203040642309966, |
| "learning_rate": 9.972037015768841e-06, |
| "loss": 0.7279, |
| "step": 1523 |
| }, |
| { |
| "epoch": 0.1255664496992667, |
| "grad_norm": 7.577934526871955, |
| "learning_rate": 9.971966512768677e-06, |
| "loss": 0.538, |
| "step": 1524 |
| }, |
| { |
| "epoch": 0.12564884238279642, |
| "grad_norm": 4.019605674039673, |
| "learning_rate": 9.971895921250723e-06, |
| "loss": 0.5441, |
| "step": 1525 |
| }, |
| { |
| "epoch": 0.1257312350663261, |
| "grad_norm": 5.764658658433788, |
| "learning_rate": 9.97182524121624e-06, |
| "loss": 0.6654, |
| "step": 1526 |
| }, |
| { |
| "epoch": 0.1258136277498558, |
| "grad_norm": 4.945527867426451, |
| "learning_rate": 9.971754472666484e-06, |
| "loss": 0.514, |
| "step": 1527 |
| }, |
| { |
| "epoch": 0.12589602043338552, |
| "grad_norm": 6.0441278677454005, |
| "learning_rate": 9.971683615602716e-06, |
| "loss": 0.4181, |
| "step": 1528 |
| }, |
| { |
| "epoch": 0.1259784131169152, |
| "grad_norm": 14.25711273437809, |
| "learning_rate": 9.971612670026196e-06, |
| "loss": 0.6254, |
| "step": 1529 |
| }, |
| { |
| "epoch": 0.12606080580044493, |
| "grad_norm": 4.700273765258455, |
| "learning_rate": 9.97154163593819e-06, |
| "loss": 0.6229, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.12614319848397462, |
| "grad_norm": 3.3864821654511945, |
| "learning_rate": 9.97147051333996e-06, |
| "loss": 0.399, |
| "step": 1531 |
| }, |
| { |
| "epoch": 0.12622559116750431, |
| "grad_norm": 5.12783193960563, |
| "learning_rate": 9.971399302232772e-06, |
| "loss": 0.5427, |
| "step": 1532 |
| }, |
| { |
| "epoch": 0.12630798385103403, |
| "grad_norm": 4.931238238343592, |
| "learning_rate": 9.971328002617895e-06, |
| "loss": 0.2851, |
| "step": 1533 |
| }, |
| { |
| "epoch": 0.12639037653456373, |
| "grad_norm": 5.285172145335731, |
| "learning_rate": 9.971256614496598e-06, |
| "loss": 0.5647, |
| "step": 1534 |
| }, |
| { |
| "epoch": 0.12647276921809344, |
| "grad_norm": 4.744357889613329, |
| "learning_rate": 9.971185137870155e-06, |
| "loss": 0.5237, |
| "step": 1535 |
| }, |
| { |
| "epoch": 0.12655516190162314, |
| "grad_norm": 8.652911164552046, |
| "learning_rate": 9.971113572739832e-06, |
| "loss": 0.8313, |
| "step": 1536 |
| }, |
| { |
| "epoch": 0.12663755458515283, |
| "grad_norm": 5.869655307998774, |
| "learning_rate": 9.971041919106908e-06, |
| "loss": 0.5802, |
| "step": 1537 |
| }, |
| { |
| "epoch": 0.12671994726868255, |
| "grad_norm": 6.3300754330457, |
| "learning_rate": 9.970970176972658e-06, |
| "loss": 0.6885, |
| "step": 1538 |
| }, |
| { |
| "epoch": 0.12680233995221224, |
| "grad_norm": 7.1758070025576055, |
| "learning_rate": 9.970898346338358e-06, |
| "loss": 0.6389, |
| "step": 1539 |
| }, |
| { |
| "epoch": 0.12688473263574196, |
| "grad_norm": 12.961913535688998, |
| "learning_rate": 9.970826427205287e-06, |
| "loss": 0.6393, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.12696712531927165, |
| "grad_norm": 5.401262177099866, |
| "learning_rate": 9.970754419574728e-06, |
| "loss": 0.5455, |
| "step": 1541 |
| }, |
| { |
| "epoch": 0.12704951800280134, |
| "grad_norm": 3.3340002399513, |
| "learning_rate": 9.970682323447959e-06, |
| "loss": 0.2345, |
| "step": 1542 |
| }, |
| { |
| "epoch": 0.12713191068633106, |
| "grad_norm": 8.894631262296059, |
| "learning_rate": 9.970610138826267e-06, |
| "loss": 0.7767, |
| "step": 1543 |
| }, |
| { |
| "epoch": 0.12721430336986075, |
| "grad_norm": 4.952958216270172, |
| "learning_rate": 9.970537865710934e-06, |
| "loss": 0.2786, |
| "step": 1544 |
| }, |
| { |
| "epoch": 0.12729669605339045, |
| "grad_norm": 2.6515561410684194, |
| "learning_rate": 9.970465504103249e-06, |
| "loss": 0.2038, |
| "step": 1545 |
| }, |
| { |
| "epoch": 0.12737908873692017, |
| "grad_norm": 5.298892635444798, |
| "learning_rate": 9.9703930540045e-06, |
| "loss": 0.4559, |
| "step": 1546 |
| }, |
| { |
| "epoch": 0.12746148142044986, |
| "grad_norm": 4.197621921991316, |
| "learning_rate": 9.970320515415974e-06, |
| "loss": 0.4502, |
| "step": 1547 |
| }, |
| { |
| "epoch": 0.12754387410397958, |
| "grad_norm": 4.800228256482724, |
| "learning_rate": 9.970247888338966e-06, |
| "loss": 0.5957, |
| "step": 1548 |
| }, |
| { |
| "epoch": 0.12762626678750927, |
| "grad_norm": 4.815816035104436, |
| "learning_rate": 9.970175172774768e-06, |
| "loss": 0.5874, |
| "step": 1549 |
| }, |
| { |
| "epoch": 0.12770865947103896, |
| "grad_norm": 6.545724686124551, |
| "learning_rate": 9.970102368724675e-06, |
| "loss": 0.5925, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.12779105215456868, |
| "grad_norm": 5.121538774261143, |
| "learning_rate": 9.970029476189984e-06, |
| "loss": 0.5063, |
| "step": 1551 |
| }, |
| { |
| "epoch": 0.12787344483809837, |
| "grad_norm": 5.7759067014268135, |
| "learning_rate": 9.969956495171989e-06, |
| "loss": 0.6866, |
| "step": 1552 |
| }, |
| { |
| "epoch": 0.1279558375216281, |
| "grad_norm": 6.588677492190531, |
| "learning_rate": 9.96988342567199e-06, |
| "loss": 0.7341, |
| "step": 1553 |
| }, |
| { |
| "epoch": 0.12803823020515778, |
| "grad_norm": 6.843692520636973, |
| "learning_rate": 9.969810267691293e-06, |
| "loss": 0.7034, |
| "step": 1554 |
| }, |
| { |
| "epoch": 0.12812062288868747, |
| "grad_norm": 5.436070981898065, |
| "learning_rate": 9.969737021231196e-06, |
| "loss": 0.4046, |
| "step": 1555 |
| }, |
| { |
| "epoch": 0.1282030155722172, |
| "grad_norm": 15.426453647795194, |
| "learning_rate": 9.969663686293003e-06, |
| "loss": 0.8425, |
| "step": 1556 |
| }, |
| { |
| "epoch": 0.12828540825574689, |
| "grad_norm": 6.468281649010783, |
| "learning_rate": 9.969590262878021e-06, |
| "loss": 0.5969, |
| "step": 1557 |
| }, |
| { |
| "epoch": 0.1283678009392766, |
| "grad_norm": 8.361371730974474, |
| "learning_rate": 9.969516750987558e-06, |
| "loss": 0.6787, |
| "step": 1558 |
| }, |
| { |
| "epoch": 0.1284501936228063, |
| "grad_norm": 5.3302943640173925, |
| "learning_rate": 9.969443150622921e-06, |
| "loss": 0.5459, |
| "step": 1559 |
| }, |
| { |
| "epoch": 0.128532586306336, |
| "grad_norm": 4.633429138093913, |
| "learning_rate": 9.96936946178542e-06, |
| "loss": 0.649, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.1286149789898657, |
| "grad_norm": 8.13100570319315, |
| "learning_rate": 9.96929568447637e-06, |
| "loss": 0.7892, |
| "step": 1561 |
| }, |
| { |
| "epoch": 0.1286973716733954, |
| "grad_norm": 5.039540157664388, |
| "learning_rate": 9.96922181869708e-06, |
| "loss": 0.5216, |
| "step": 1562 |
| }, |
| { |
| "epoch": 0.1287797643569251, |
| "grad_norm": 3.6431682961361633, |
| "learning_rate": 9.969147864448867e-06, |
| "loss": 0.2595, |
| "step": 1563 |
| }, |
| { |
| "epoch": 0.1288621570404548, |
| "grad_norm": 5.4887751624743375, |
| "learning_rate": 9.96907382173305e-06, |
| "loss": 0.5876, |
| "step": 1564 |
| }, |
| { |
| "epoch": 0.1289445497239845, |
| "grad_norm": 4.129305542648151, |
| "learning_rate": 9.968999690550945e-06, |
| "loss": 0.567, |
| "step": 1565 |
| }, |
| { |
| "epoch": 0.12902694240751422, |
| "grad_norm": 4.6202359784395695, |
| "learning_rate": 9.96892547090387e-06, |
| "loss": 0.3541, |
| "step": 1566 |
| }, |
| { |
| "epoch": 0.12910933509104391, |
| "grad_norm": 5.530337251130461, |
| "learning_rate": 9.968851162793149e-06, |
| "loss": 0.4995, |
| "step": 1567 |
| }, |
| { |
| "epoch": 0.1291917277745736, |
| "grad_norm": 6.098832238315356, |
| "learning_rate": 9.968776766220105e-06, |
| "loss": 0.6462, |
| "step": 1568 |
| }, |
| { |
| "epoch": 0.12927412045810333, |
| "grad_norm": 36.110655337933196, |
| "learning_rate": 9.968702281186062e-06, |
| "loss": 1.8144, |
| "step": 1569 |
| }, |
| { |
| "epoch": 0.12935651314163302, |
| "grad_norm": 3.73141968324195, |
| "learning_rate": 9.968627707692345e-06, |
| "loss": 0.4626, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.12943890582516274, |
| "grad_norm": 5.556066968723632, |
| "learning_rate": 9.968553045740283e-06, |
| "loss": 0.4893, |
| "step": 1571 |
| }, |
| { |
| "epoch": 0.12952129850869243, |
| "grad_norm": 3.5534727840085303, |
| "learning_rate": 9.968478295331206e-06, |
| "loss": 0.3499, |
| "step": 1572 |
| }, |
| { |
| "epoch": 0.12960369119222212, |
| "grad_norm": 5.954589534858549, |
| "learning_rate": 9.96840345646644e-06, |
| "loss": 0.7069, |
| "step": 1573 |
| }, |
| { |
| "epoch": 0.12968608387575184, |
| "grad_norm": 5.504329435904227, |
| "learning_rate": 9.968328529147324e-06, |
| "loss": 0.5345, |
| "step": 1574 |
| }, |
| { |
| "epoch": 0.12976847655928153, |
| "grad_norm": 4.780089942398109, |
| "learning_rate": 9.968253513375187e-06, |
| "loss": 0.6211, |
| "step": 1575 |
| }, |
| { |
| "epoch": 0.12985086924281125, |
| "grad_norm": 3.6400688935977183, |
| "learning_rate": 9.968178409151368e-06, |
| "loss": 0.4675, |
| "step": 1576 |
| }, |
| { |
| "epoch": 0.12993326192634094, |
| "grad_norm": 5.161844362296579, |
| "learning_rate": 9.968103216477203e-06, |
| "loss": 0.5463, |
| "step": 1577 |
| }, |
| { |
| "epoch": 0.13001565460987063, |
| "grad_norm": 6.01338401708055, |
| "learning_rate": 9.968027935354029e-06, |
| "loss": 0.5191, |
| "step": 1578 |
| }, |
| { |
| "epoch": 0.13009804729340035, |
| "grad_norm": 4.717446966537538, |
| "learning_rate": 9.967952565783188e-06, |
| "loss": 0.5651, |
| "step": 1579 |
| }, |
| { |
| "epoch": 0.13018043997693005, |
| "grad_norm": 5.2385252642930125, |
| "learning_rate": 9.96787710776602e-06, |
| "loss": 0.5444, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.13026283266045974, |
| "grad_norm": 4.091715325329443, |
| "learning_rate": 9.967801561303871e-06, |
| "loss": 0.4193, |
| "step": 1581 |
| }, |
| { |
| "epoch": 0.13034522534398946, |
| "grad_norm": 7.045107352433902, |
| "learning_rate": 9.967725926398086e-06, |
| "loss": 0.4062, |
| "step": 1582 |
| }, |
| { |
| "epoch": 0.13042761802751915, |
| "grad_norm": 4.770986545998961, |
| "learning_rate": 9.967650203050007e-06, |
| "loss": 0.5442, |
| "step": 1583 |
| }, |
| { |
| "epoch": 0.13051001071104887, |
| "grad_norm": 4.436604203751775, |
| "learning_rate": 9.967574391260988e-06, |
| "loss": 0.4965, |
| "step": 1584 |
| }, |
| { |
| "epoch": 0.13059240339457856, |
| "grad_norm": 5.81892153360883, |
| "learning_rate": 9.967498491032376e-06, |
| "loss": 0.5432, |
| "step": 1585 |
| }, |
| { |
| "epoch": 0.13067479607810825, |
| "grad_norm": 6.625865132029466, |
| "learning_rate": 9.967422502365523e-06, |
| "loss": 0.6075, |
| "step": 1586 |
| }, |
| { |
| "epoch": 0.13075718876163797, |
| "grad_norm": 5.357243836445354, |
| "learning_rate": 9.96734642526178e-06, |
| "loss": 0.5635, |
| "step": 1587 |
| }, |
| { |
| "epoch": 0.13083958144516766, |
| "grad_norm": 7.170092927283846, |
| "learning_rate": 9.9672702597225e-06, |
| "loss": 0.5262, |
| "step": 1588 |
| }, |
| { |
| "epoch": 0.13092197412869738, |
| "grad_norm": 5.58870075455088, |
| "learning_rate": 9.967194005749045e-06, |
| "loss": 0.5163, |
| "step": 1589 |
| }, |
| { |
| "epoch": 0.13100436681222707, |
| "grad_norm": 3.3901516521405113, |
| "learning_rate": 9.96711766334277e-06, |
| "loss": 0.3337, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.13108675949575677, |
| "grad_norm": 6.177430383120361, |
| "learning_rate": 9.967041232505032e-06, |
| "loss": 0.6221, |
| "step": 1591 |
| }, |
| { |
| "epoch": 0.13116915217928649, |
| "grad_norm": 3.409414315356522, |
| "learning_rate": 9.966964713237193e-06, |
| "loss": 0.4258, |
| "step": 1592 |
| }, |
| { |
| "epoch": 0.13125154486281618, |
| "grad_norm": 5.796187847217588, |
| "learning_rate": 9.966888105540615e-06, |
| "loss": 0.6415, |
| "step": 1593 |
| }, |
| { |
| "epoch": 0.1313339375463459, |
| "grad_norm": 4.297779577006258, |
| "learning_rate": 9.966811409416664e-06, |
| "loss": 0.353, |
| "step": 1594 |
| }, |
| { |
| "epoch": 0.1314163302298756, |
| "grad_norm": 8.019855525742495, |
| "learning_rate": 9.966734624866702e-06, |
| "loss": 0.6986, |
| "step": 1595 |
| }, |
| { |
| "epoch": 0.13149872291340528, |
| "grad_norm": 4.454829131276674, |
| "learning_rate": 9.966657751892099e-06, |
| "loss": 0.44, |
| "step": 1596 |
| }, |
| { |
| "epoch": 0.131581115596935, |
| "grad_norm": 5.627067380885521, |
| "learning_rate": 9.966580790494222e-06, |
| "loss": 0.5673, |
| "step": 1597 |
| }, |
| { |
| "epoch": 0.1316635082804647, |
| "grad_norm": 16.272684626405372, |
| "learning_rate": 9.96650374067444e-06, |
| "loss": 0.6624, |
| "step": 1598 |
| }, |
| { |
| "epoch": 0.13174590096399438, |
| "grad_norm": 4.793323279968309, |
| "learning_rate": 9.966426602434128e-06, |
| "loss": 0.543, |
| "step": 1599 |
| }, |
| { |
| "epoch": 0.1318282936475241, |
| "grad_norm": 5.433792865346555, |
| "learning_rate": 9.966349375774658e-06, |
| "loss": 0.5756, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.1319106863310538, |
| "grad_norm": 6.05169655830317, |
| "learning_rate": 9.966272060697403e-06, |
| "loss": 0.5257, |
| "step": 1601 |
| }, |
| { |
| "epoch": 0.13199307901458351, |
| "grad_norm": 28.334925324741096, |
| "learning_rate": 9.966194657203743e-06, |
| "loss": 0.7121, |
| "step": 1602 |
| }, |
| { |
| "epoch": 0.1320754716981132, |
| "grad_norm": 4.439485716182711, |
| "learning_rate": 9.966117165295053e-06, |
| "loss": 0.4213, |
| "step": 1603 |
| }, |
| { |
| "epoch": 0.1321578643816429, |
| "grad_norm": 4.634956482246697, |
| "learning_rate": 9.966039584972713e-06, |
| "loss": 0.5792, |
| "step": 1604 |
| }, |
| { |
| "epoch": 0.13224025706517262, |
| "grad_norm": 8.01022976032216, |
| "learning_rate": 9.965961916238105e-06, |
| "loss": 0.8657, |
| "step": 1605 |
| }, |
| { |
| "epoch": 0.1323226497487023, |
| "grad_norm": 4.263056023595695, |
| "learning_rate": 9.965884159092613e-06, |
| "loss": 0.5201, |
| "step": 1606 |
| }, |
| { |
| "epoch": 0.13240504243223203, |
| "grad_norm": 5.493539454273528, |
| "learning_rate": 9.965806313537618e-06, |
| "loss": 0.551, |
| "step": 1607 |
| }, |
| { |
| "epoch": 0.13248743511576172, |
| "grad_norm": 5.696550440916956, |
| "learning_rate": 9.965728379574508e-06, |
| "loss": 0.6679, |
| "step": 1608 |
| }, |
| { |
| "epoch": 0.1325698277992914, |
| "grad_norm": 5.757476810730032, |
| "learning_rate": 9.965650357204673e-06, |
| "loss": 0.347, |
| "step": 1609 |
| }, |
| { |
| "epoch": 0.13265222048282113, |
| "grad_norm": 3.9736532348314686, |
| "learning_rate": 9.965572246429498e-06, |
| "loss": 0.4657, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.13273461316635082, |
| "grad_norm": 5.014191491777652, |
| "learning_rate": 9.965494047250374e-06, |
| "loss": 0.5738, |
| "step": 1611 |
| }, |
| { |
| "epoch": 0.13281700584988054, |
| "grad_norm": 4.378594249950214, |
| "learning_rate": 9.965415759668696e-06, |
| "loss": 0.5816, |
| "step": 1612 |
| }, |
| { |
| "epoch": 0.13289939853341023, |
| "grad_norm": 4.370547607797723, |
| "learning_rate": 9.965337383685854e-06, |
| "loss": 0.4593, |
| "step": 1613 |
| }, |
| { |
| "epoch": 0.13298179121693993, |
| "grad_norm": 2.9688769844150844, |
| "learning_rate": 9.965258919303246e-06, |
| "loss": 0.4406, |
| "step": 1614 |
| }, |
| { |
| "epoch": 0.13306418390046965, |
| "grad_norm": 5.547068937775387, |
| "learning_rate": 9.965180366522269e-06, |
| "loss": 0.6537, |
| "step": 1615 |
| }, |
| { |
| "epoch": 0.13314657658399934, |
| "grad_norm": 6.145359120990056, |
| "learning_rate": 9.96510172534432e-06, |
| "loss": 0.5867, |
| "step": 1616 |
| }, |
| { |
| "epoch": 0.13322896926752906, |
| "grad_norm": 5.312644770705092, |
| "learning_rate": 9.9650229957708e-06, |
| "loss": 0.6586, |
| "step": 1617 |
| }, |
| { |
| "epoch": 0.13331136195105875, |
| "grad_norm": 5.562860466575434, |
| "learning_rate": 9.96494417780311e-06, |
| "loss": 0.5398, |
| "step": 1618 |
| }, |
| { |
| "epoch": 0.13339375463458844, |
| "grad_norm": 4.158707385172828, |
| "learning_rate": 9.964865271442656e-06, |
| "loss": 0.3144, |
| "step": 1619 |
| }, |
| { |
| "epoch": 0.13347614731811816, |
| "grad_norm": 4.431346560539185, |
| "learning_rate": 9.964786276690839e-06, |
| "loss": 0.4856, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.13355854000164785, |
| "grad_norm": 5.449051878207818, |
| "learning_rate": 9.964707193549069e-06, |
| "loss": 0.5363, |
| "step": 1621 |
| }, |
| { |
| "epoch": 0.13364093268517754, |
| "grad_norm": 6.425628126296071, |
| "learning_rate": 9.964628022018748e-06, |
| "loss": 0.7224, |
| "step": 1622 |
| }, |
| { |
| "epoch": 0.13372332536870726, |
| "grad_norm": 7.144634256191189, |
| "learning_rate": 9.964548762101293e-06, |
| "loss": 0.7207, |
| "step": 1623 |
| }, |
| { |
| "epoch": 0.13380571805223695, |
| "grad_norm": 5.800859450448251, |
| "learning_rate": 9.96446941379811e-06, |
| "loss": 0.6978, |
| "step": 1624 |
| }, |
| { |
| "epoch": 0.13388811073576667, |
| "grad_norm": 5.499598864277338, |
| "learning_rate": 9.964389977110613e-06, |
| "loss": 0.4624, |
| "step": 1625 |
| }, |
| { |
| "epoch": 0.13397050341929637, |
| "grad_norm": 4.9609394948407255, |
| "learning_rate": 9.964310452040216e-06, |
| "loss": 0.555, |
| "step": 1626 |
| }, |
| { |
| "epoch": 0.13405289610282606, |
| "grad_norm": 5.42629026019687, |
| "learning_rate": 9.964230838588336e-06, |
| "loss": 0.4247, |
| "step": 1627 |
| }, |
| { |
| "epoch": 0.13413528878635578, |
| "grad_norm": 4.866848300258573, |
| "learning_rate": 9.964151136756391e-06, |
| "loss": 0.5655, |
| "step": 1628 |
| }, |
| { |
| "epoch": 0.13421768146988547, |
| "grad_norm": 4.615561791061624, |
| "learning_rate": 9.964071346545796e-06, |
| "loss": 0.58, |
| "step": 1629 |
| }, |
| { |
| "epoch": 0.1343000741534152, |
| "grad_norm": 6.71642950132474, |
| "learning_rate": 9.963991467957977e-06, |
| "loss": 0.7631, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.13438246683694488, |
| "grad_norm": 4.315299600438297, |
| "learning_rate": 9.963911500994352e-06, |
| "loss": 0.5401, |
| "step": 1631 |
| }, |
| { |
| "epoch": 0.13446485952047457, |
| "grad_norm": 4.648644316335041, |
| "learning_rate": 9.963831445656345e-06, |
| "loss": 0.5922, |
| "step": 1632 |
| }, |
| { |
| "epoch": 0.1345472522040043, |
| "grad_norm": 3.5272454240753266, |
| "learning_rate": 9.96375130194538e-06, |
| "loss": 0.4294, |
| "step": 1633 |
| }, |
| { |
| "epoch": 0.13462964488753398, |
| "grad_norm": 4.157793884858299, |
| "learning_rate": 9.963671069862891e-06, |
| "loss": 0.3727, |
| "step": 1634 |
| }, |
| { |
| "epoch": 0.1347120375710637, |
| "grad_norm": 5.325914533957815, |
| "learning_rate": 9.9635907494103e-06, |
| "loss": 0.5745, |
| "step": 1635 |
| }, |
| { |
| "epoch": 0.1347944302545934, |
| "grad_norm": 5.069398707440809, |
| "learning_rate": 9.963510340589037e-06, |
| "loss": 0.706, |
| "step": 1636 |
| }, |
| { |
| "epoch": 0.1348768229381231, |
| "grad_norm": 28.812450237360544, |
| "learning_rate": 9.963429843400536e-06, |
| "loss": 0.7662, |
| "step": 1637 |
| }, |
| { |
| "epoch": 0.1349592156216528, |
| "grad_norm": 3.5444323522446624, |
| "learning_rate": 9.963349257846227e-06, |
| "loss": 0.524, |
| "step": 1638 |
| }, |
| { |
| "epoch": 0.1350416083051825, |
| "grad_norm": 3.5539689124915856, |
| "learning_rate": 9.963268583927549e-06, |
| "loss": 0.2982, |
| "step": 1639 |
| }, |
| { |
| "epoch": 0.1351240009887122, |
| "grad_norm": 5.938623235390135, |
| "learning_rate": 9.963187821645934e-06, |
| "loss": 0.7121, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.1352063936722419, |
| "grad_norm": 6.0354335604105565, |
| "learning_rate": 9.963106971002825e-06, |
| "loss": 0.4654, |
| "step": 1641 |
| }, |
| { |
| "epoch": 0.1352887863557716, |
| "grad_norm": 4.70397412329452, |
| "learning_rate": 9.963026031999657e-06, |
| "loss": 0.4274, |
| "step": 1642 |
| }, |
| { |
| "epoch": 0.13537117903930132, |
| "grad_norm": 10.030510740341603, |
| "learning_rate": 9.96294500463787e-06, |
| "loss": 0.946, |
| "step": 1643 |
| }, |
| { |
| "epoch": 0.135453571722831, |
| "grad_norm": 3.6329870853318207, |
| "learning_rate": 9.96286388891891e-06, |
| "loss": 0.3884, |
| "step": 1644 |
| }, |
| { |
| "epoch": 0.1355359644063607, |
| "grad_norm": 21.36243576092974, |
| "learning_rate": 9.962782684844222e-06, |
| "loss": 0.352, |
| "step": 1645 |
| }, |
| { |
| "epoch": 0.13561835708989042, |
| "grad_norm": 4.951295627734071, |
| "learning_rate": 9.962701392415248e-06, |
| "loss": 0.4897, |
| "step": 1646 |
| }, |
| { |
| "epoch": 0.13570074977342012, |
| "grad_norm": 8.046321604277672, |
| "learning_rate": 9.962620011633437e-06, |
| "loss": 0.535, |
| "step": 1647 |
| }, |
| { |
| "epoch": 0.13578314245694983, |
| "grad_norm": 6.575825142302053, |
| "learning_rate": 9.962538542500237e-06, |
| "loss": 0.309, |
| "step": 1648 |
| }, |
| { |
| "epoch": 0.13586553514047953, |
| "grad_norm": 6.94532275093602, |
| "learning_rate": 9.9624569850171e-06, |
| "loss": 0.6461, |
| "step": 1649 |
| }, |
| { |
| "epoch": 0.13594792782400922, |
| "grad_norm": 4.7357288915786855, |
| "learning_rate": 9.962375339185477e-06, |
| "loss": 0.4357, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.13603032050753894, |
| "grad_norm": 6.597949073770168, |
| "learning_rate": 9.962293605006824e-06, |
| "loss": 0.6975, |
| "step": 1651 |
| }, |
| { |
| "epoch": 0.13611271319106863, |
| "grad_norm": 4.15014398746701, |
| "learning_rate": 9.962211782482592e-06, |
| "loss": 0.3552, |
| "step": 1652 |
| }, |
| { |
| "epoch": 0.13619510587459835, |
| "grad_norm": 4.809601849964069, |
| "learning_rate": 9.962129871614238e-06, |
| "loss": 0.6046, |
| "step": 1653 |
| }, |
| { |
| "epoch": 0.13627749855812804, |
| "grad_norm": 4.127491331508017, |
| "learning_rate": 9.962047872403225e-06, |
| "loss": 0.4958, |
| "step": 1654 |
| }, |
| { |
| "epoch": 0.13635989124165773, |
| "grad_norm": 4.711346309289919, |
| "learning_rate": 9.961965784851008e-06, |
| "loss": 0.5391, |
| "step": 1655 |
| }, |
| { |
| "epoch": 0.13644228392518745, |
| "grad_norm": 5.386352678608556, |
| "learning_rate": 9.96188360895905e-06, |
| "loss": 0.5528, |
| "step": 1656 |
| }, |
| { |
| "epoch": 0.13652467660871714, |
| "grad_norm": 3.7171000317960674, |
| "learning_rate": 9.961801344728814e-06, |
| "loss": 0.3146, |
| "step": 1657 |
| }, |
| { |
| "epoch": 0.13660706929224684, |
| "grad_norm": 8.89069958620607, |
| "learning_rate": 9.961718992161766e-06, |
| "loss": 0.7587, |
| "step": 1658 |
| }, |
| { |
| "epoch": 0.13668946197577655, |
| "grad_norm": 4.697622743571729, |
| "learning_rate": 9.961636551259372e-06, |
| "loss": 0.5835, |
| "step": 1659 |
| }, |
| { |
| "epoch": 0.13677185465930625, |
| "grad_norm": 4.830696241299008, |
| "learning_rate": 9.961554022023096e-06, |
| "loss": 0.4971, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.13685424734283597, |
| "grad_norm": 6.104707702622647, |
| "learning_rate": 9.961471404454412e-06, |
| "loss": 0.7071, |
| "step": 1661 |
| }, |
| { |
| "epoch": 0.13693664002636566, |
| "grad_norm": 4.34172790300022, |
| "learning_rate": 9.961388698554788e-06, |
| "loss": 0.5556, |
| "step": 1662 |
| }, |
| { |
| "epoch": 0.13701903270989535, |
| "grad_norm": 4.298900363667948, |
| "learning_rate": 9.961305904325698e-06, |
| "loss": 0.5294, |
| "step": 1663 |
| }, |
| { |
| "epoch": 0.13710142539342507, |
| "grad_norm": 4.954490101460579, |
| "learning_rate": 9.961223021768616e-06, |
| "loss": 0.6465, |
| "step": 1664 |
| }, |
| { |
| "epoch": 0.13718381807695476, |
| "grad_norm": 3.726228481637675, |
| "learning_rate": 9.961140050885014e-06, |
| "loss": 0.631, |
| "step": 1665 |
| }, |
| { |
| "epoch": 0.13726621076048448, |
| "grad_norm": 4.358895727873771, |
| "learning_rate": 9.961056991676374e-06, |
| "loss": 0.3122, |
| "step": 1666 |
| }, |
| { |
| "epoch": 0.13734860344401417, |
| "grad_norm": 3.7045601167381945, |
| "learning_rate": 9.960973844144173e-06, |
| "loss": 0.4447, |
| "step": 1667 |
| }, |
| { |
| "epoch": 0.13743099612754386, |
| "grad_norm": 4.571967455353021, |
| "learning_rate": 9.960890608289892e-06, |
| "loss": 0.4863, |
| "step": 1668 |
| }, |
| { |
| "epoch": 0.13751338881107358, |
| "grad_norm": 5.0825669590129205, |
| "learning_rate": 9.96080728411501e-06, |
| "loss": 0.6621, |
| "step": 1669 |
| }, |
| { |
| "epoch": 0.13759578149460328, |
| "grad_norm": 5.845767088881154, |
| "learning_rate": 9.960723871621015e-06, |
| "loss": 0.7493, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.137678174178133, |
| "grad_norm": 13.232880223960649, |
| "learning_rate": 9.960640370809386e-06, |
| "loss": 0.8361, |
| "step": 1671 |
| }, |
| { |
| "epoch": 0.1377605668616627, |
| "grad_norm": 3.393345594707139, |
| "learning_rate": 9.960556781681617e-06, |
| "loss": 0.4943, |
| "step": 1672 |
| }, |
| { |
| "epoch": 0.13784295954519238, |
| "grad_norm": 4.815367039541761, |
| "learning_rate": 9.960473104239188e-06, |
| "loss": 0.5859, |
| "step": 1673 |
| }, |
| { |
| "epoch": 0.1379253522287221, |
| "grad_norm": 25.934815306172943, |
| "learning_rate": 9.960389338483595e-06, |
| "loss": 0.5897, |
| "step": 1674 |
| }, |
| { |
| "epoch": 0.1380077449122518, |
| "grad_norm": 24.48503833988488, |
| "learning_rate": 9.960305484416329e-06, |
| "loss": 0.4866, |
| "step": 1675 |
| }, |
| { |
| "epoch": 0.13809013759578148, |
| "grad_norm": 4.687506824918209, |
| "learning_rate": 9.96022154203888e-06, |
| "loss": 0.6256, |
| "step": 1676 |
| }, |
| { |
| "epoch": 0.1381725302793112, |
| "grad_norm": 4.850750355427269, |
| "learning_rate": 9.960137511352743e-06, |
| "loss": 0.6337, |
| "step": 1677 |
| }, |
| { |
| "epoch": 0.1382549229628409, |
| "grad_norm": 3.8668299030247524, |
| "learning_rate": 9.960053392359415e-06, |
| "loss": 0.469, |
| "step": 1678 |
| }, |
| { |
| "epoch": 0.1383373156463706, |
| "grad_norm": 6.853909013979112, |
| "learning_rate": 9.959969185060393e-06, |
| "loss": 0.6324, |
| "step": 1679 |
| }, |
| { |
| "epoch": 0.1384197083299003, |
| "grad_norm": 6.736126149335651, |
| "learning_rate": 9.959884889457176e-06, |
| "loss": 0.562, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.13850210101343, |
| "grad_norm": 3.2727359868331862, |
| "learning_rate": 9.959800505551266e-06, |
| "loss": 0.3456, |
| "step": 1681 |
| }, |
| { |
| "epoch": 0.13858449369695972, |
| "grad_norm": 5.234071812547031, |
| "learning_rate": 9.959716033344164e-06, |
| "loss": 0.5409, |
| "step": 1682 |
| }, |
| { |
| "epoch": 0.1386668863804894, |
| "grad_norm": 6.251936957368742, |
| "learning_rate": 9.959631472837376e-06, |
| "loss": 0.5655, |
| "step": 1683 |
| }, |
| { |
| "epoch": 0.13874927906401913, |
| "grad_norm": 5.288184313366442, |
| "learning_rate": 9.959546824032404e-06, |
| "loss": 0.5368, |
| "step": 1684 |
| }, |
| { |
| "epoch": 0.13883167174754882, |
| "grad_norm": 6.679955478940586, |
| "learning_rate": 9.959462086930757e-06, |
| "loss": 0.6028, |
| "step": 1685 |
| }, |
| { |
| "epoch": 0.1389140644310785, |
| "grad_norm": 5.000778668824838, |
| "learning_rate": 9.959377261533945e-06, |
| "loss": 0.5867, |
| "step": 1686 |
| }, |
| { |
| "epoch": 0.13899645711460823, |
| "grad_norm": 5.120889487517192, |
| "learning_rate": 9.959292347843476e-06, |
| "loss": 0.5128, |
| "step": 1687 |
| }, |
| { |
| "epoch": 0.13907884979813792, |
| "grad_norm": 6.017331185338668, |
| "learning_rate": 9.959207345860863e-06, |
| "loss": 0.8164, |
| "step": 1688 |
| }, |
| { |
| "epoch": 0.13916124248166764, |
| "grad_norm": 4.242760533926133, |
| "learning_rate": 9.959122255587617e-06, |
| "loss": 0.4745, |
| "step": 1689 |
| }, |
| { |
| "epoch": 0.13924363516519733, |
| "grad_norm": 6.532137559532413, |
| "learning_rate": 9.959037077025256e-06, |
| "loss": 0.6932, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.13932602784872702, |
| "grad_norm": 5.970476933345833, |
| "learning_rate": 9.958951810175294e-06, |
| "loss": 0.5707, |
| "step": 1691 |
| }, |
| { |
| "epoch": 0.13940842053225674, |
| "grad_norm": 4.783279541756054, |
| "learning_rate": 9.958866455039253e-06, |
| "loss": 0.4375, |
| "step": 1692 |
| }, |
| { |
| "epoch": 0.13949081321578644, |
| "grad_norm": 5.1671883616236025, |
| "learning_rate": 9.958781011618648e-06, |
| "loss": 0.5305, |
| "step": 1693 |
| }, |
| { |
| "epoch": 0.13957320589931613, |
| "grad_norm": 4.396863523163892, |
| "learning_rate": 9.958695479915002e-06, |
| "loss": 0.4693, |
| "step": 1694 |
| }, |
| { |
| "epoch": 0.13965559858284585, |
| "grad_norm": 6.13192250177938, |
| "learning_rate": 9.958609859929836e-06, |
| "loss": 0.7002, |
| "step": 1695 |
| }, |
| { |
| "epoch": 0.13973799126637554, |
| "grad_norm": 4.437325147612999, |
| "learning_rate": 9.958524151664677e-06, |
| "loss": 0.3917, |
| "step": 1696 |
| }, |
| { |
| "epoch": 0.13982038394990526, |
| "grad_norm": 5.070019626853925, |
| "learning_rate": 9.958438355121052e-06, |
| "loss": 0.4742, |
| "step": 1697 |
| }, |
| { |
| "epoch": 0.13990277663343495, |
| "grad_norm": 6.083396673742178, |
| "learning_rate": 9.958352470300485e-06, |
| "loss": 0.621, |
| "step": 1698 |
| }, |
| { |
| "epoch": 0.13998516931696464, |
| "grad_norm": 6.384051434280243, |
| "learning_rate": 9.958266497204506e-06, |
| "loss": 0.5026, |
| "step": 1699 |
| }, |
| { |
| "epoch": 0.14006756200049436, |
| "grad_norm": 4.010479307928655, |
| "learning_rate": 9.958180435834646e-06, |
| "loss": 0.4158, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.14014995468402405, |
| "grad_norm": 4.719623857149445, |
| "learning_rate": 9.958094286192437e-06, |
| "loss": 0.4985, |
| "step": 1701 |
| }, |
| { |
| "epoch": 0.14023234736755377, |
| "grad_norm": 5.426958394750245, |
| "learning_rate": 9.958008048279413e-06, |
| "loss": 0.5531, |
| "step": 1702 |
| }, |
| { |
| "epoch": 0.14031474005108346, |
| "grad_norm": 5.08856362765558, |
| "learning_rate": 9.95792172209711e-06, |
| "loss": 0.3943, |
| "step": 1703 |
| }, |
| { |
| "epoch": 0.14039713273461316, |
| "grad_norm": 5.638213422297256, |
| "learning_rate": 9.957835307647063e-06, |
| "loss": 0.6932, |
| "step": 1704 |
| }, |
| { |
| "epoch": 0.14047952541814288, |
| "grad_norm": 6.63185919475462, |
| "learning_rate": 9.957748804930813e-06, |
| "loss": 0.6148, |
| "step": 1705 |
| }, |
| { |
| "epoch": 0.14056191810167257, |
| "grad_norm": 7.754221383064333, |
| "learning_rate": 9.9576622139499e-06, |
| "loss": 0.7738, |
| "step": 1706 |
| }, |
| { |
| "epoch": 0.1406443107852023, |
| "grad_norm": 5.15021907426077, |
| "learning_rate": 9.957575534705861e-06, |
| "loss": 0.3595, |
| "step": 1707 |
| }, |
| { |
| "epoch": 0.14072670346873198, |
| "grad_norm": 7.00492373956156, |
| "learning_rate": 9.957488767200246e-06, |
| "loss": 0.6592, |
| "step": 1708 |
| }, |
| { |
| "epoch": 0.14080909615226167, |
| "grad_norm": 6.8725037736438885, |
| "learning_rate": 9.957401911434594e-06, |
| "loss": 0.6738, |
| "step": 1709 |
| }, |
| { |
| "epoch": 0.1408914888357914, |
| "grad_norm": 5.474394546552511, |
| "learning_rate": 9.957314967410455e-06, |
| "loss": 0.4472, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.14097388151932108, |
| "grad_norm": 6.395917661111817, |
| "learning_rate": 9.957227935129374e-06, |
| "loss": 0.781, |
| "step": 1711 |
| }, |
| { |
| "epoch": 0.14105627420285077, |
| "grad_norm": 5.013972013359134, |
| "learning_rate": 9.957140814592901e-06, |
| "loss": 0.5114, |
| "step": 1712 |
| }, |
| { |
| "epoch": 0.1411386668863805, |
| "grad_norm": 5.229649358585399, |
| "learning_rate": 9.95705360580259e-06, |
| "loss": 0.7029, |
| "step": 1713 |
| }, |
| { |
| "epoch": 0.14122105956991018, |
| "grad_norm": 4.092719529026001, |
| "learning_rate": 9.956966308759993e-06, |
| "loss": 0.3894, |
| "step": 1714 |
| }, |
| { |
| "epoch": 0.1413034522534399, |
| "grad_norm": 5.2989880902352136, |
| "learning_rate": 9.95687892346666e-06, |
| "loss": 0.6119, |
| "step": 1715 |
| }, |
| { |
| "epoch": 0.1413858449369696, |
| "grad_norm": 3.6082549788649962, |
| "learning_rate": 9.95679144992415e-06, |
| "loss": 0.2587, |
| "step": 1716 |
| }, |
| { |
| "epoch": 0.1414682376204993, |
| "grad_norm": 4.328630549185103, |
| "learning_rate": 9.95670388813402e-06, |
| "loss": 0.4679, |
| "step": 1717 |
| }, |
| { |
| "epoch": 0.141550630304029, |
| "grad_norm": 6.666267680234917, |
| "learning_rate": 9.95661623809783e-06, |
| "loss": 0.6764, |
| "step": 1718 |
| }, |
| { |
| "epoch": 0.1416330229875587, |
| "grad_norm": 6.030588485073613, |
| "learning_rate": 9.956528499817137e-06, |
| "loss": 0.5958, |
| "step": 1719 |
| }, |
| { |
| "epoch": 0.14171541567108842, |
| "grad_norm": 3.697734271168265, |
| "learning_rate": 9.956440673293508e-06, |
| "loss": 0.3724, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.1417978083546181, |
| "grad_norm": 5.078205684000341, |
| "learning_rate": 9.956352758528501e-06, |
| "loss": 0.4152, |
| "step": 1721 |
| }, |
| { |
| "epoch": 0.1418802010381478, |
| "grad_norm": 5.998477976751115, |
| "learning_rate": 9.956264755523687e-06, |
| "loss": 0.4393, |
| "step": 1722 |
| }, |
| { |
| "epoch": 0.14196259372167752, |
| "grad_norm": 4.1617349318422105, |
| "learning_rate": 9.956176664280628e-06, |
| "loss": 0.5035, |
| "step": 1723 |
| }, |
| { |
| "epoch": 0.1420449864052072, |
| "grad_norm": 5.193159222144419, |
| "learning_rate": 9.956088484800895e-06, |
| "loss": 0.5345, |
| "step": 1724 |
| }, |
| { |
| "epoch": 0.14212737908873693, |
| "grad_norm": 4.085571835755461, |
| "learning_rate": 9.956000217086055e-06, |
| "loss": 0.5145, |
| "step": 1725 |
| }, |
| { |
| "epoch": 0.14220977177226662, |
| "grad_norm": 8.724102472644716, |
| "learning_rate": 9.955911861137683e-06, |
| "loss": 0.7727, |
| "step": 1726 |
| }, |
| { |
| "epoch": 0.14229216445579632, |
| "grad_norm": 5.455750786296661, |
| "learning_rate": 9.95582341695735e-06, |
| "loss": 0.519, |
| "step": 1727 |
| }, |
| { |
| "epoch": 0.14237455713932604, |
| "grad_norm": 7.3612630042618, |
| "learning_rate": 9.955734884546632e-06, |
| "loss": 0.9181, |
| "step": 1728 |
| }, |
| { |
| "epoch": 0.14245694982285573, |
| "grad_norm": 6.860122301504194, |
| "learning_rate": 9.955646263907103e-06, |
| "loss": 0.7269, |
| "step": 1729 |
| }, |
| { |
| "epoch": 0.14253934250638542, |
| "grad_norm": 5.246035437947141, |
| "learning_rate": 9.955557555040344e-06, |
| "loss": 0.6018, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.14262173518991514, |
| "grad_norm": 4.620747795169099, |
| "learning_rate": 9.95546875794793e-06, |
| "loss": 0.4273, |
| "step": 1731 |
| }, |
| { |
| "epoch": 0.14270412787344483, |
| "grad_norm": 4.879100737888411, |
| "learning_rate": 9.955379872631447e-06, |
| "loss": 0.4053, |
| "step": 1732 |
| }, |
| { |
| "epoch": 0.14278652055697455, |
| "grad_norm": 5.1781106148154965, |
| "learning_rate": 9.955290899092473e-06, |
| "loss": 0.5273, |
| "step": 1733 |
| }, |
| { |
| "epoch": 0.14286891324050424, |
| "grad_norm": 5.28098812734887, |
| "learning_rate": 9.955201837332592e-06, |
| "loss": 0.4492, |
| "step": 1734 |
| }, |
| { |
| "epoch": 0.14295130592403393, |
| "grad_norm": 4.588098614679162, |
| "learning_rate": 9.955112687353395e-06, |
| "loss": 0.6444, |
| "step": 1735 |
| }, |
| { |
| "epoch": 0.14303369860756365, |
| "grad_norm": 6.807890987436337, |
| "learning_rate": 9.955023449156464e-06, |
| "loss": 0.8301, |
| "step": 1736 |
| }, |
| { |
| "epoch": 0.14311609129109334, |
| "grad_norm": 6.19317980765747, |
| "learning_rate": 9.95493412274339e-06, |
| "loss": 0.644, |
| "step": 1737 |
| }, |
| { |
| "epoch": 0.14319848397462306, |
| "grad_norm": 4.804293638065614, |
| "learning_rate": 9.954844708115761e-06, |
| "loss": 0.3949, |
| "step": 1738 |
| }, |
| { |
| "epoch": 0.14328087665815276, |
| "grad_norm": 5.446810889833297, |
| "learning_rate": 9.95475520527517e-06, |
| "loss": 0.4823, |
| "step": 1739 |
| }, |
| { |
| "epoch": 0.14336326934168245, |
| "grad_norm": 5.587157787333849, |
| "learning_rate": 9.954665614223212e-06, |
| "loss": 0.3342, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.14344566202521217, |
| "grad_norm": 6.671984779337379, |
| "learning_rate": 9.954575934961482e-06, |
| "loss": 0.4438, |
| "step": 1741 |
| }, |
| { |
| "epoch": 0.14352805470874186, |
| "grad_norm": 6.720067281421165, |
| "learning_rate": 9.954486167491574e-06, |
| "loss": 0.6546, |
| "step": 1742 |
| }, |
| { |
| "epoch": 0.14361044739227158, |
| "grad_norm": 4.310942822266015, |
| "learning_rate": 9.954396311815088e-06, |
| "loss": 0.3683, |
| "step": 1743 |
| }, |
| { |
| "epoch": 0.14369284007580127, |
| "grad_norm": 6.662276529673106, |
| "learning_rate": 9.954306367933623e-06, |
| "loss": 0.7405, |
| "step": 1744 |
| }, |
| { |
| "epoch": 0.14377523275933096, |
| "grad_norm": 5.668799637679965, |
| "learning_rate": 9.954216335848781e-06, |
| "loss": 0.5108, |
| "step": 1745 |
| }, |
| { |
| "epoch": 0.14385762544286068, |
| "grad_norm": 11.70486674091479, |
| "learning_rate": 9.954126215562165e-06, |
| "loss": 0.5593, |
| "step": 1746 |
| }, |
| { |
| "epoch": 0.14394001812639037, |
| "grad_norm": 4.7414430921442285, |
| "learning_rate": 9.954036007075378e-06, |
| "loss": 0.6503, |
| "step": 1747 |
| }, |
| { |
| "epoch": 0.1440224108099201, |
| "grad_norm": 5.383455910246553, |
| "learning_rate": 9.953945710390029e-06, |
| "loss": 0.534, |
| "step": 1748 |
| }, |
| { |
| "epoch": 0.14410480349344978, |
| "grad_norm": 5.207132564825904, |
| "learning_rate": 9.953855325507723e-06, |
| "loss": 0.6014, |
| "step": 1749 |
| }, |
| { |
| "epoch": 0.14418719617697948, |
| "grad_norm": 4.522652655624019, |
| "learning_rate": 9.95376485243007e-06, |
| "loss": 0.561, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.1442695888605092, |
| "grad_norm": 3.575322962748732, |
| "learning_rate": 9.95367429115868e-06, |
| "loss": 0.3608, |
| "step": 1751 |
| }, |
| { |
| "epoch": 0.1443519815440389, |
| "grad_norm": 3.799989212310609, |
| "learning_rate": 9.953583641695163e-06, |
| "loss": 0.5892, |
| "step": 1752 |
| }, |
| { |
| "epoch": 0.14443437422756858, |
| "grad_norm": 4.154410303606598, |
| "learning_rate": 9.95349290404114e-06, |
| "loss": 0.6148, |
| "step": 1753 |
| }, |
| { |
| "epoch": 0.1445167669110983, |
| "grad_norm": 4.526476783100888, |
| "learning_rate": 9.95340207819822e-06, |
| "loss": 0.5512, |
| "step": 1754 |
| }, |
| { |
| "epoch": 0.144599159594628, |
| "grad_norm": 5.470982965204849, |
| "learning_rate": 9.953311164168023e-06, |
| "loss": 0.6535, |
| "step": 1755 |
| }, |
| { |
| "epoch": 0.1446815522781577, |
| "grad_norm": 6.153122788881867, |
| "learning_rate": 9.953220161952165e-06, |
| "loss": 0.5768, |
| "step": 1756 |
| }, |
| { |
| "epoch": 0.1447639449616874, |
| "grad_norm": 3.888887680481176, |
| "learning_rate": 9.95312907155227e-06, |
| "loss": 0.3174, |
| "step": 1757 |
| }, |
| { |
| "epoch": 0.1448463376452171, |
| "grad_norm": 5.669629872152485, |
| "learning_rate": 9.953037892969957e-06, |
| "loss": 0.6727, |
| "step": 1758 |
| }, |
| { |
| "epoch": 0.1449287303287468, |
| "grad_norm": 35.771339221273166, |
| "learning_rate": 9.952946626206848e-06, |
| "loss": 1.7314, |
| "step": 1759 |
| }, |
| { |
| "epoch": 0.1450111230122765, |
| "grad_norm": 4.3289004927992725, |
| "learning_rate": 9.952855271264573e-06, |
| "loss": 0.5573, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.14509351569580622, |
| "grad_norm": 3.799289594497197, |
| "learning_rate": 9.952763828144752e-06, |
| "loss": 0.3963, |
| "step": 1761 |
| }, |
| { |
| "epoch": 0.14517590837933592, |
| "grad_norm": 5.288650375462546, |
| "learning_rate": 9.952672296849017e-06, |
| "loss": 0.475, |
| "step": 1762 |
| }, |
| { |
| "epoch": 0.1452583010628656, |
| "grad_norm": 3.9972186574876734, |
| "learning_rate": 9.952580677378998e-06, |
| "loss": 0.5127, |
| "step": 1763 |
| }, |
| { |
| "epoch": 0.14534069374639533, |
| "grad_norm": 4.3332751493193475, |
| "learning_rate": 9.952488969736324e-06, |
| "loss": 0.5247, |
| "step": 1764 |
| }, |
| { |
| "epoch": 0.14542308642992502, |
| "grad_norm": 4.195892689128413, |
| "learning_rate": 9.952397173922629e-06, |
| "loss": 0.3199, |
| "step": 1765 |
| }, |
| { |
| "epoch": 0.14550547911345474, |
| "grad_norm": 5.722985806247223, |
| "learning_rate": 9.952305289939545e-06, |
| "loss": 0.6056, |
| "step": 1766 |
| }, |
| { |
| "epoch": 0.14558787179698443, |
| "grad_norm": 6.368312651719165, |
| "learning_rate": 9.952213317788713e-06, |
| "loss": 0.5713, |
| "step": 1767 |
| }, |
| { |
| "epoch": 0.14567026448051412, |
| "grad_norm": 4.5910410814721585, |
| "learning_rate": 9.952121257471765e-06, |
| "loss": 0.5231, |
| "step": 1768 |
| }, |
| { |
| "epoch": 0.14575265716404384, |
| "grad_norm": 4.149070439095053, |
| "learning_rate": 9.952029108990341e-06, |
| "loss": 0.4691, |
| "step": 1769 |
| }, |
| { |
| "epoch": 0.14583504984757353, |
| "grad_norm": 6.698020175906564, |
| "learning_rate": 9.951936872346084e-06, |
| "loss": 0.6816, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.14591744253110323, |
| "grad_norm": 4.448335960350891, |
| "learning_rate": 9.951844547540634e-06, |
| "loss": 0.5778, |
| "step": 1771 |
| }, |
| { |
| "epoch": 0.14599983521463294, |
| "grad_norm": 5.697992048620652, |
| "learning_rate": 9.951752134575636e-06, |
| "loss": 0.5035, |
| "step": 1772 |
| }, |
| { |
| "epoch": 0.14608222789816264, |
| "grad_norm": 6.626396310224672, |
| "learning_rate": 9.951659633452735e-06, |
| "loss": 0.6606, |
| "step": 1773 |
| }, |
| { |
| "epoch": 0.14616462058169236, |
| "grad_norm": 4.046239182291733, |
| "learning_rate": 9.951567044173577e-06, |
| "loss": 0.3849, |
| "step": 1774 |
| }, |
| { |
| "epoch": 0.14624701326522205, |
| "grad_norm": 3.688760782701429, |
| "learning_rate": 9.951474366739811e-06, |
| "loss": 0.4589, |
| "step": 1775 |
| }, |
| { |
| "epoch": 0.14632940594875174, |
| "grad_norm": 4.790126428067029, |
| "learning_rate": 9.951381601153087e-06, |
| "loss": 0.6115, |
| "step": 1776 |
| }, |
| { |
| "epoch": 0.14641179863228146, |
| "grad_norm": 6.205649281121326, |
| "learning_rate": 9.951288747415055e-06, |
| "loss": 0.5969, |
| "step": 1777 |
| }, |
| { |
| "epoch": 0.14649419131581115, |
| "grad_norm": 3.3187533632736397, |
| "learning_rate": 9.95119580552737e-06, |
| "loss": 0.5073, |
| "step": 1778 |
| }, |
| { |
| "epoch": 0.14657658399934087, |
| "grad_norm": 3.258398469094614, |
| "learning_rate": 9.95110277549169e-06, |
| "loss": 0.2559, |
| "step": 1779 |
| }, |
| { |
| "epoch": 0.14665897668287056, |
| "grad_norm": 4.3933466019017375, |
| "learning_rate": 9.951009657309664e-06, |
| "loss": 0.4237, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.14674136936640025, |
| "grad_norm": 5.039755273960905, |
| "learning_rate": 9.950916450982954e-06, |
| "loss": 0.5285, |
| "step": 1781 |
| }, |
| { |
| "epoch": 0.14682376204992997, |
| "grad_norm": 5.052820158208282, |
| "learning_rate": 9.95082315651322e-06, |
| "loss": 0.5216, |
| "step": 1782 |
| }, |
| { |
| "epoch": 0.14690615473345967, |
| "grad_norm": 7.221433965049104, |
| "learning_rate": 9.950729773902119e-06, |
| "loss": 0.7875, |
| "step": 1783 |
| }, |
| { |
| "epoch": 0.14698854741698938, |
| "grad_norm": 4.022936489503606, |
| "learning_rate": 9.950636303151318e-06, |
| "loss": 0.3333, |
| "step": 1784 |
| }, |
| { |
| "epoch": 0.14707094010051908, |
| "grad_norm": 4.202488842137441, |
| "learning_rate": 9.950542744262478e-06, |
| "loss": 0.3781, |
| "step": 1785 |
| }, |
| { |
| "epoch": 0.14715333278404877, |
| "grad_norm": 7.63848412977035, |
| "learning_rate": 9.950449097237268e-06, |
| "loss": 0.6791, |
| "step": 1786 |
| }, |
| { |
| "epoch": 0.1472357254675785, |
| "grad_norm": 4.718688788366161, |
| "learning_rate": 9.950355362077351e-06, |
| "loss": 0.5764, |
| "step": 1787 |
| }, |
| { |
| "epoch": 0.14731811815110818, |
| "grad_norm": 7.447234767381768, |
| "learning_rate": 9.950261538784399e-06, |
| "loss": 0.4469, |
| "step": 1788 |
| }, |
| { |
| "epoch": 0.14740051083463787, |
| "grad_norm": 5.095202612388618, |
| "learning_rate": 9.950167627360078e-06, |
| "loss": 0.6372, |
| "step": 1789 |
| }, |
| { |
| "epoch": 0.1474829035181676, |
| "grad_norm": 7.204391060200845, |
| "learning_rate": 9.950073627806068e-06, |
| "loss": 0.4491, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.14756529620169728, |
| "grad_norm": 5.269735640638179, |
| "learning_rate": 9.949979540124036e-06, |
| "loss": 0.7009, |
| "step": 1791 |
| }, |
| { |
| "epoch": 0.147647688885227, |
| "grad_norm": 5.921433725601925, |
| "learning_rate": 9.949885364315659e-06, |
| "loss": 0.3743, |
| "step": 1792 |
| }, |
| { |
| "epoch": 0.1477300815687567, |
| "grad_norm": 4.473723831399909, |
| "learning_rate": 9.949791100382613e-06, |
| "loss": 0.5765, |
| "step": 1793 |
| }, |
| { |
| "epoch": 0.14781247425228639, |
| "grad_norm": 4.15202235170927, |
| "learning_rate": 9.949696748326576e-06, |
| "loss": 0.4384, |
| "step": 1794 |
| }, |
| { |
| "epoch": 0.1478948669358161, |
| "grad_norm": 5.1563131436767335, |
| "learning_rate": 9.94960230814923e-06, |
| "loss": 0.4914, |
| "step": 1795 |
| }, |
| { |
| "epoch": 0.1479772596193458, |
| "grad_norm": 7.2266024173875, |
| "learning_rate": 9.949507779852255e-06, |
| "loss": 0.8423, |
| "step": 1796 |
| }, |
| { |
| "epoch": 0.14805965230287552, |
| "grad_norm": 3.6509897775167817, |
| "learning_rate": 9.949413163437334e-06, |
| "loss": 0.2087, |
| "step": 1797 |
| }, |
| { |
| "epoch": 0.1481420449864052, |
| "grad_norm": 4.574456787530481, |
| "learning_rate": 9.94931845890615e-06, |
| "loss": 0.4738, |
| "step": 1798 |
| }, |
| { |
| "epoch": 0.1482244376699349, |
| "grad_norm": 5.648889619706296, |
| "learning_rate": 9.949223666260391e-06, |
| "loss": 0.5997, |
| "step": 1799 |
| }, |
| { |
| "epoch": 0.14830683035346462, |
| "grad_norm": 5.202785968386703, |
| "learning_rate": 9.949128785501744e-06, |
| "loss": 0.3894, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.1483892230369943, |
| "grad_norm": 10.997911457737233, |
| "learning_rate": 9.949033816631897e-06, |
| "loss": 0.6417, |
| "step": 1801 |
| }, |
| { |
| "epoch": 0.14847161572052403, |
| "grad_norm": 4.6037373374277015, |
| "learning_rate": 9.948938759652545e-06, |
| "loss": 0.4054, |
| "step": 1802 |
| }, |
| { |
| "epoch": 0.14855400840405372, |
| "grad_norm": 6.562807171790634, |
| "learning_rate": 9.948843614565373e-06, |
| "loss": 0.5643, |
| "step": 1803 |
| }, |
| { |
| "epoch": 0.14863640108758341, |
| "grad_norm": 4.45858174996238, |
| "learning_rate": 9.948748381372081e-06, |
| "loss": 0.4779, |
| "step": 1804 |
| }, |
| { |
| "epoch": 0.14871879377111313, |
| "grad_norm": 5.1171416882597915, |
| "learning_rate": 9.948653060074365e-06, |
| "loss": 0.6325, |
| "step": 1805 |
| }, |
| { |
| "epoch": 0.14880118645464283, |
| "grad_norm": 5.1648545519748765, |
| "learning_rate": 9.948557650673917e-06, |
| "loss": 0.6289, |
| "step": 1806 |
| }, |
| { |
| "epoch": 0.14888357913817252, |
| "grad_norm": 3.3249976860584125, |
| "learning_rate": 9.94846215317244e-06, |
| "loss": 0.5099, |
| "step": 1807 |
| }, |
| { |
| "epoch": 0.14896597182170224, |
| "grad_norm": 4.210265252795672, |
| "learning_rate": 9.94836656757163e-06, |
| "loss": 0.3661, |
| "step": 1808 |
| }, |
| { |
| "epoch": 0.14904836450523193, |
| "grad_norm": 5.856159591100659, |
| "learning_rate": 9.948270893873194e-06, |
| "loss": 0.6683, |
| "step": 1809 |
| }, |
| { |
| "epoch": 0.14913075718876165, |
| "grad_norm": 5.665378850590255, |
| "learning_rate": 9.94817513207883e-06, |
| "loss": 0.4274, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.14921314987229134, |
| "grad_norm": 3.2139105645087644, |
| "learning_rate": 9.948079282190246e-06, |
| "loss": 0.3374, |
| "step": 1811 |
| }, |
| { |
| "epoch": 0.14929554255582103, |
| "grad_norm": 4.41472261330769, |
| "learning_rate": 9.947983344209149e-06, |
| "loss": 0.5235, |
| "step": 1812 |
| }, |
| { |
| "epoch": 0.14937793523935075, |
| "grad_norm": 5.0108760558021235, |
| "learning_rate": 9.947887318137246e-06, |
| "loss": 0.4894, |
| "step": 1813 |
| }, |
| { |
| "epoch": 0.14946032792288044, |
| "grad_norm": 4.076125168503218, |
| "learning_rate": 9.947791203976246e-06, |
| "loss": 0.486, |
| "step": 1814 |
| }, |
| { |
| "epoch": 0.14954272060641016, |
| "grad_norm": 4.9262153367813175, |
| "learning_rate": 9.94769500172786e-06, |
| "loss": 0.4339, |
| "step": 1815 |
| }, |
| { |
| "epoch": 0.14962511328993985, |
| "grad_norm": 4.823936819797354, |
| "learning_rate": 9.947598711393803e-06, |
| "loss": 0.5129, |
| "step": 1816 |
| }, |
| { |
| "epoch": 0.14970750597346955, |
| "grad_norm": 7.422686319765741, |
| "learning_rate": 9.947502332975785e-06, |
| "loss": 0.724, |
| "step": 1817 |
| }, |
| { |
| "epoch": 0.14978989865699927, |
| "grad_norm": 5.069308352660441, |
| "learning_rate": 9.947405866475526e-06, |
| "loss": 0.3606, |
| "step": 1818 |
| }, |
| { |
| "epoch": 0.14987229134052896, |
| "grad_norm": 4.817079842052196, |
| "learning_rate": 9.947309311894741e-06, |
| "loss": 0.6129, |
| "step": 1819 |
| }, |
| { |
| "epoch": 0.14995468402405868, |
| "grad_norm": 4.409288579092636, |
| "learning_rate": 9.947212669235151e-06, |
| "loss": 0.3029, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.15003707670758837, |
| "grad_norm": 4.088287504533524, |
| "learning_rate": 9.947115938498475e-06, |
| "loss": 0.3747, |
| "step": 1821 |
| }, |
| { |
| "epoch": 0.15011946939111806, |
| "grad_norm": 4.526831599614096, |
| "learning_rate": 9.947019119686437e-06, |
| "loss": 0.5938, |
| "step": 1822 |
| }, |
| { |
| "epoch": 0.15020186207464778, |
| "grad_norm": 5.180661766219478, |
| "learning_rate": 9.946922212800758e-06, |
| "loss": 0.5274, |
| "step": 1823 |
| }, |
| { |
| "epoch": 0.15028425475817747, |
| "grad_norm": 4.075075934628573, |
| "learning_rate": 9.946825217843165e-06, |
| "loss": 0.5151, |
| "step": 1824 |
| }, |
| { |
| "epoch": 0.15036664744170716, |
| "grad_norm": 4.371753027926906, |
| "learning_rate": 9.946728134815384e-06, |
| "loss": 0.3841, |
| "step": 1825 |
| }, |
| { |
| "epoch": 0.15044904012523688, |
| "grad_norm": 7.004245095318552, |
| "learning_rate": 9.946630963719143e-06, |
| "loss": 0.7213, |
| "step": 1826 |
| }, |
| { |
| "epoch": 0.15053143280876657, |
| "grad_norm": 4.695580060254283, |
| "learning_rate": 9.946533704556174e-06, |
| "loss": 0.4254, |
| "step": 1827 |
| }, |
| { |
| "epoch": 0.1506138254922963, |
| "grad_norm": 6.379558882024517, |
| "learning_rate": 9.946436357328208e-06, |
| "loss": 0.4716, |
| "step": 1828 |
| }, |
| { |
| "epoch": 0.15069621817582599, |
| "grad_norm": 30.181991180317187, |
| "learning_rate": 9.946338922036977e-06, |
| "loss": 1.3403, |
| "step": 1829 |
| }, |
| { |
| "epoch": 0.15077861085935568, |
| "grad_norm": 4.760372710692016, |
| "learning_rate": 9.946241398684216e-06, |
| "loss": 0.6688, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.1508610035428854, |
| "grad_norm": 29.77514513346913, |
| "learning_rate": 9.94614378727166e-06, |
| "loss": 1.0769, |
| "step": 1831 |
| }, |
| { |
| "epoch": 0.1509433962264151, |
| "grad_norm": 5.446400107754182, |
| "learning_rate": 9.946046087801052e-06, |
| "loss": 0.4928, |
| "step": 1832 |
| }, |
| { |
| "epoch": 0.1510257889099448, |
| "grad_norm": 4.18231130411727, |
| "learning_rate": 9.945948300274124e-06, |
| "loss": 0.5164, |
| "step": 1833 |
| }, |
| { |
| "epoch": 0.1511081815934745, |
| "grad_norm": 9.784576516645163, |
| "learning_rate": 9.945850424692622e-06, |
| "loss": 0.7336, |
| "step": 1834 |
| }, |
| { |
| "epoch": 0.1511905742770042, |
| "grad_norm": 4.280452136926324, |
| "learning_rate": 9.945752461058286e-06, |
| "loss": 0.5356, |
| "step": 1835 |
| }, |
| { |
| "epoch": 0.1512729669605339, |
| "grad_norm": 4.514588928723341, |
| "learning_rate": 9.945654409372861e-06, |
| "loss": 0.3138, |
| "step": 1836 |
| }, |
| { |
| "epoch": 0.1513553596440636, |
| "grad_norm": 4.977510975280522, |
| "learning_rate": 9.945556269638095e-06, |
| "loss": 0.6125, |
| "step": 1837 |
| }, |
| { |
| "epoch": 0.15143775232759332, |
| "grad_norm": 3.7210768337208595, |
| "learning_rate": 9.945458041855732e-06, |
| "loss": 0.6259, |
| "step": 1838 |
| }, |
| { |
| "epoch": 0.15152014501112301, |
| "grad_norm": 3.836298613785647, |
| "learning_rate": 9.94535972602752e-06, |
| "loss": 0.4432, |
| "step": 1839 |
| }, |
| { |
| "epoch": 0.1516025376946527, |
| "grad_norm": 8.066856187714102, |
| "learning_rate": 9.945261322155213e-06, |
| "loss": 0.8116, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.15168493037818243, |
| "grad_norm": 4.468142883297208, |
| "learning_rate": 9.94516283024056e-06, |
| "loss": 0.4872, |
| "step": 1841 |
| }, |
| { |
| "epoch": 0.15176732306171212, |
| "grad_norm": 5.892106263712281, |
| "learning_rate": 9.945064250285318e-06, |
| "loss": 0.7393, |
| "step": 1842 |
| }, |
| { |
| "epoch": 0.1518497157452418, |
| "grad_norm": 5.748773653446861, |
| "learning_rate": 9.944965582291236e-06, |
| "loss": 0.491, |
| "step": 1843 |
| }, |
| { |
| "epoch": 0.15193210842877153, |
| "grad_norm": 6.267061350789159, |
| "learning_rate": 9.944866826260076e-06, |
| "loss": 0.5588, |
| "step": 1844 |
| }, |
| { |
| "epoch": 0.15201450111230122, |
| "grad_norm": 5.56231366013553, |
| "learning_rate": 9.944767982193595e-06, |
| "loss": 0.392, |
| "step": 1845 |
| }, |
| { |
| "epoch": 0.15209689379583094, |
| "grad_norm": 5.383200889814545, |
| "learning_rate": 9.944669050093552e-06, |
| "loss": 0.6692, |
| "step": 1846 |
| }, |
| { |
| "epoch": 0.15217928647936063, |
| "grad_norm": 6.870277257184773, |
| "learning_rate": 9.944570029961706e-06, |
| "loss": 0.7725, |
| "step": 1847 |
| }, |
| { |
| "epoch": 0.15226167916289032, |
| "grad_norm": 4.559517814413792, |
| "learning_rate": 9.944470921799825e-06, |
| "loss": 0.6481, |
| "step": 1848 |
| }, |
| { |
| "epoch": 0.15234407184642004, |
| "grad_norm": 4.74898634616392, |
| "learning_rate": 9.944371725609671e-06, |
| "loss": 0.5648, |
| "step": 1849 |
| }, |
| { |
| "epoch": 0.15242646452994973, |
| "grad_norm": 4.860513869955498, |
| "learning_rate": 9.944272441393008e-06, |
| "loss": 0.6931, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.15250885721347945, |
| "grad_norm": 4.3722520697227205, |
| "learning_rate": 9.944173069151609e-06, |
| "loss": 0.3393, |
| "step": 1851 |
| }, |
| { |
| "epoch": 0.15259124989700915, |
| "grad_norm": 5.0443736184423065, |
| "learning_rate": 9.944073608887235e-06, |
| "loss": 0.5772, |
| "step": 1852 |
| }, |
| { |
| "epoch": 0.15267364258053884, |
| "grad_norm": 8.549939972371906, |
| "learning_rate": 9.943974060601664e-06, |
| "loss": 0.5043, |
| "step": 1853 |
| }, |
| { |
| "epoch": 0.15275603526406856, |
| "grad_norm": 4.757709984298188, |
| "learning_rate": 9.943874424296666e-06, |
| "loss": 0.6423, |
| "step": 1854 |
| }, |
| { |
| "epoch": 0.15283842794759825, |
| "grad_norm": 5.4002386456791305, |
| "learning_rate": 9.943774699974014e-06, |
| "loss": 0.5686, |
| "step": 1855 |
| }, |
| { |
| "epoch": 0.15292082063112797, |
| "grad_norm": 4.215127806823176, |
| "learning_rate": 9.943674887635483e-06, |
| "loss": 0.5367, |
| "step": 1856 |
| }, |
| { |
| "epoch": 0.15300321331465766, |
| "grad_norm": 4.274941922810144, |
| "learning_rate": 9.943574987282853e-06, |
| "loss": 0.5136, |
| "step": 1857 |
| }, |
| { |
| "epoch": 0.15308560599818735, |
| "grad_norm": 5.007470901292181, |
| "learning_rate": 9.943474998917899e-06, |
| "loss": 0.4348, |
| "step": 1858 |
| }, |
| { |
| "epoch": 0.15316799868171707, |
| "grad_norm": 4.406761750552184, |
| "learning_rate": 9.943374922542403e-06, |
| "loss": 0.5084, |
| "step": 1859 |
| }, |
| { |
| "epoch": 0.15325039136524676, |
| "grad_norm": 5.414163225734359, |
| "learning_rate": 9.943274758158146e-06, |
| "loss": 0.545, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.15333278404877645, |
| "grad_norm": 4.477228911899572, |
| "learning_rate": 9.943174505766912e-06, |
| "loss": 0.5127, |
| "step": 1861 |
| }, |
| { |
| "epoch": 0.15341517673230617, |
| "grad_norm": 5.4286296996844134, |
| "learning_rate": 9.943074165370486e-06, |
| "loss": 0.5424, |
| "step": 1862 |
| }, |
| { |
| "epoch": 0.15349756941583587, |
| "grad_norm": 7.8227006152958625, |
| "learning_rate": 9.94297373697065e-06, |
| "loss": 0.6181, |
| "step": 1863 |
| }, |
| { |
| "epoch": 0.15357996209936559, |
| "grad_norm": 4.58280474696728, |
| "learning_rate": 9.942873220569201e-06, |
| "loss": 0.5705, |
| "step": 1864 |
| }, |
| { |
| "epoch": 0.15366235478289528, |
| "grad_norm": 5.000809086425357, |
| "learning_rate": 9.942772616167921e-06, |
| "loss": 0.5671, |
| "step": 1865 |
| }, |
| { |
| "epoch": 0.15374474746642497, |
| "grad_norm": 4.052269078249802, |
| "learning_rate": 9.942671923768604e-06, |
| "loss": 0.5368, |
| "step": 1866 |
| }, |
| { |
| "epoch": 0.1538271401499547, |
| "grad_norm": 4.354386031164391, |
| "learning_rate": 9.942571143373041e-06, |
| "loss": 0.4506, |
| "step": 1867 |
| }, |
| { |
| "epoch": 0.15390953283348438, |
| "grad_norm": 5.323874342321902, |
| "learning_rate": 9.942470274983029e-06, |
| "loss": 0.5499, |
| "step": 1868 |
| }, |
| { |
| "epoch": 0.1539919255170141, |
| "grad_norm": 4.166290067353925, |
| "learning_rate": 9.94236931860036e-06, |
| "loss": 0.57, |
| "step": 1869 |
| }, |
| { |
| "epoch": 0.1540743182005438, |
| "grad_norm": 4.663093189419887, |
| "learning_rate": 9.942268274226836e-06, |
| "loss": 0.6149, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.15415671088407348, |
| "grad_norm": 5.011905916587221, |
| "learning_rate": 9.942167141864252e-06, |
| "loss": 0.5146, |
| "step": 1871 |
| }, |
| { |
| "epoch": 0.1542391035676032, |
| "grad_norm": 3.684085210483375, |
| "learning_rate": 9.94206592151441e-06, |
| "loss": 0.471, |
| "step": 1872 |
| }, |
| { |
| "epoch": 0.1543214962511329, |
| "grad_norm": 4.529894364792616, |
| "learning_rate": 9.941964613179113e-06, |
| "loss": 0.6402, |
| "step": 1873 |
| }, |
| { |
| "epoch": 0.15440388893466261, |
| "grad_norm": 5.379119541477112, |
| "learning_rate": 9.941863216860161e-06, |
| "loss": 0.4688, |
| "step": 1874 |
| }, |
| { |
| "epoch": 0.1544862816181923, |
| "grad_norm": 4.711252577285658, |
| "learning_rate": 9.941761732559365e-06, |
| "loss": 0.4731, |
| "step": 1875 |
| }, |
| { |
| "epoch": 0.154568674301722, |
| "grad_norm": 4.627102248023416, |
| "learning_rate": 9.941660160278526e-06, |
| "loss": 0.5882, |
| "step": 1876 |
| }, |
| { |
| "epoch": 0.15465106698525172, |
| "grad_norm": 4.784269516867435, |
| "learning_rate": 9.941558500019458e-06, |
| "loss": 0.5784, |
| "step": 1877 |
| }, |
| { |
| "epoch": 0.1547334596687814, |
| "grad_norm": 3.964863589286543, |
| "learning_rate": 9.941456751783965e-06, |
| "loss": 0.4781, |
| "step": 1878 |
| }, |
| { |
| "epoch": 0.15481585235231113, |
| "grad_norm": 3.7942935218449954, |
| "learning_rate": 9.941354915573863e-06, |
| "loss": 0.3864, |
| "step": 1879 |
| }, |
| { |
| "epoch": 0.15489824503584082, |
| "grad_norm": 3.7341181608097385, |
| "learning_rate": 9.941252991390961e-06, |
| "loss": 0.4249, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.1549806377193705, |
| "grad_norm": 5.551866143123865, |
| "learning_rate": 9.941150979237078e-06, |
| "loss": 0.5836, |
| "step": 1881 |
| }, |
| { |
| "epoch": 0.15506303040290023, |
| "grad_norm": 9.280745802972426, |
| "learning_rate": 9.941048879114025e-06, |
| "loss": 0.7968, |
| "step": 1882 |
| }, |
| { |
| "epoch": 0.15514542308642992, |
| "grad_norm": 4.957929728130742, |
| "learning_rate": 9.940946691023625e-06, |
| "loss": 0.4156, |
| "step": 1883 |
| }, |
| { |
| "epoch": 0.15522781576995962, |
| "grad_norm": 5.612906720964241, |
| "learning_rate": 9.940844414967697e-06, |
| "loss": 0.5885, |
| "step": 1884 |
| }, |
| { |
| "epoch": 0.15531020845348933, |
| "grad_norm": 4.208810615796374, |
| "learning_rate": 9.940742050948057e-06, |
| "loss": 0.3961, |
| "step": 1885 |
| }, |
| { |
| "epoch": 0.15539260113701903, |
| "grad_norm": 23.92972768953157, |
| "learning_rate": 9.94063959896653e-06, |
| "loss": 0.6231, |
| "step": 1886 |
| }, |
| { |
| "epoch": 0.15547499382054875, |
| "grad_norm": 5.76934083591682, |
| "learning_rate": 9.940537059024942e-06, |
| "loss": 0.4364, |
| "step": 1887 |
| }, |
| { |
| "epoch": 0.15555738650407844, |
| "grad_norm": 5.707451138870683, |
| "learning_rate": 9.940434431125117e-06, |
| "loss": 0.5047, |
| "step": 1888 |
| }, |
| { |
| "epoch": 0.15563977918760813, |
| "grad_norm": 5.776218724756825, |
| "learning_rate": 9.940331715268883e-06, |
| "loss": 0.5968, |
| "step": 1889 |
| }, |
| { |
| "epoch": 0.15572217187113785, |
| "grad_norm": 4.954848575236138, |
| "learning_rate": 9.940228911458065e-06, |
| "loss": 0.5645, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.15580456455466754, |
| "grad_norm": 4.99108420622021, |
| "learning_rate": 9.940126019694498e-06, |
| "loss": 0.6215, |
| "step": 1891 |
| }, |
| { |
| "epoch": 0.15588695723819726, |
| "grad_norm": 4.922687738818404, |
| "learning_rate": 9.940023039980012e-06, |
| "loss": 0.4969, |
| "step": 1892 |
| }, |
| { |
| "epoch": 0.15596934992172695, |
| "grad_norm": 6.857403736754451, |
| "learning_rate": 9.939919972316437e-06, |
| "loss": 0.7445, |
| "step": 1893 |
| }, |
| { |
| "epoch": 0.15605174260525664, |
| "grad_norm": 5.0582162606668115, |
| "learning_rate": 9.939816816705615e-06, |
| "loss": 0.4787, |
| "step": 1894 |
| }, |
| { |
| "epoch": 0.15613413528878636, |
| "grad_norm": 4.756494625315935, |
| "learning_rate": 9.939713573149377e-06, |
| "loss": 0.6097, |
| "step": 1895 |
| }, |
| { |
| "epoch": 0.15621652797231605, |
| "grad_norm": 6.419763789825375, |
| "learning_rate": 9.939610241649561e-06, |
| "loss": 0.4783, |
| "step": 1896 |
| }, |
| { |
| "epoch": 0.15629892065584577, |
| "grad_norm": 4.5978040753369935, |
| "learning_rate": 9.93950682220801e-06, |
| "loss": 0.4438, |
| "step": 1897 |
| }, |
| { |
| "epoch": 0.15638131333937547, |
| "grad_norm": 4.462028232430049, |
| "learning_rate": 9.939403314826563e-06, |
| "loss": 0.3942, |
| "step": 1898 |
| }, |
| { |
| "epoch": 0.15646370602290516, |
| "grad_norm": 5.337997568156949, |
| "learning_rate": 9.939299719507065e-06, |
| "loss": 0.5616, |
| "step": 1899 |
| }, |
| { |
| "epoch": 0.15654609870643488, |
| "grad_norm": 3.60824355878785, |
| "learning_rate": 9.939196036251357e-06, |
| "loss": 0.4209, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.15662849138996457, |
| "grad_norm": 3.9143893571730146, |
| "learning_rate": 9.939092265061288e-06, |
| "loss": 0.3278, |
| "step": 1901 |
| }, |
| { |
| "epoch": 0.15671088407349426, |
| "grad_norm": 4.439793896511223, |
| "learning_rate": 9.938988405938703e-06, |
| "loss": 0.5322, |
| "step": 1902 |
| }, |
| { |
| "epoch": 0.15679327675702398, |
| "grad_norm": 4.36845403354661, |
| "learning_rate": 9.938884458885454e-06, |
| "loss": 0.5716, |
| "step": 1903 |
| }, |
| { |
| "epoch": 0.15687566944055367, |
| "grad_norm": 6.284830972083645, |
| "learning_rate": 9.938780423903387e-06, |
| "loss": 0.6454, |
| "step": 1904 |
| }, |
| { |
| "epoch": 0.1569580621240834, |
| "grad_norm": 6.495990019493336, |
| "learning_rate": 9.938676300994358e-06, |
| "loss": 0.5363, |
| "step": 1905 |
| }, |
| { |
| "epoch": 0.15704045480761308, |
| "grad_norm": 5.215129230889581, |
| "learning_rate": 9.938572090160222e-06, |
| "loss": 0.3534, |
| "step": 1906 |
| }, |
| { |
| "epoch": 0.15712284749114278, |
| "grad_norm": 5.0984361516420735, |
| "learning_rate": 9.938467791402828e-06, |
| "loss": 0.3469, |
| "step": 1907 |
| }, |
| { |
| "epoch": 0.1572052401746725, |
| "grad_norm": 35.6236332120135, |
| "learning_rate": 9.938363404724038e-06, |
| "loss": 1.4843, |
| "step": 1908 |
| }, |
| { |
| "epoch": 0.1572876328582022, |
| "grad_norm": 7.458138808494354, |
| "learning_rate": 9.93825893012571e-06, |
| "loss": 0.6091, |
| "step": 1909 |
| }, |
| { |
| "epoch": 0.1573700255417319, |
| "grad_norm": 6.398287378682458, |
| "learning_rate": 9.938154367609705e-06, |
| "loss": 0.5134, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.1574524182252616, |
| "grad_norm": 3.5343424335631433, |
| "learning_rate": 9.93804971717788e-06, |
| "loss": 0.286, |
| "step": 1911 |
| }, |
| { |
| "epoch": 0.1575348109087913, |
| "grad_norm": 4.504459000128934, |
| "learning_rate": 9.937944978832103e-06, |
| "loss": 0.4624, |
| "step": 1912 |
| }, |
| { |
| "epoch": 0.157617203592321, |
| "grad_norm": 7.108577599603217, |
| "learning_rate": 9.937840152574235e-06, |
| "loss": 0.7483, |
| "step": 1913 |
| }, |
| { |
| "epoch": 0.1576995962758507, |
| "grad_norm": 3.6174046857082898, |
| "learning_rate": 9.937735238406146e-06, |
| "loss": 0.4525, |
| "step": 1914 |
| }, |
| { |
| "epoch": 0.15778198895938042, |
| "grad_norm": 6.389978345222535, |
| "learning_rate": 9.9376302363297e-06, |
| "loss": 0.7896, |
| "step": 1915 |
| }, |
| { |
| "epoch": 0.1578643816429101, |
| "grad_norm": 6.548196848276821, |
| "learning_rate": 9.937525146346767e-06, |
| "loss": 0.6367, |
| "step": 1916 |
| }, |
| { |
| "epoch": 0.1579467743264398, |
| "grad_norm": 3.655411742497471, |
| "learning_rate": 9.937419968459221e-06, |
| "loss": 0.5116, |
| "step": 1917 |
| }, |
| { |
| "epoch": 0.15802916700996952, |
| "grad_norm": 4.929959679569643, |
| "learning_rate": 9.937314702668933e-06, |
| "loss": 0.4611, |
| "step": 1918 |
| }, |
| { |
| "epoch": 0.15811155969349922, |
| "grad_norm": 6.432318019871717, |
| "learning_rate": 9.937209348977776e-06, |
| "loss": 0.5361, |
| "step": 1919 |
| }, |
| { |
| "epoch": 0.1581939523770289, |
| "grad_norm": 4.031201714847671, |
| "learning_rate": 9.937103907387626e-06, |
| "loss": 0.3384, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.15827634506055863, |
| "grad_norm": 3.9206730902636253, |
| "learning_rate": 9.936998377900362e-06, |
| "loss": 0.5143, |
| "step": 1921 |
| }, |
| { |
| "epoch": 0.15835873774408832, |
| "grad_norm": 5.579814626803759, |
| "learning_rate": 9.93689276051786e-06, |
| "loss": 0.6045, |
| "step": 1922 |
| }, |
| { |
| "epoch": 0.15844113042761804, |
| "grad_norm": 3.917683732272094, |
| "learning_rate": 9.936787055242002e-06, |
| "loss": 0.2376, |
| "step": 1923 |
| }, |
| { |
| "epoch": 0.15852352311114773, |
| "grad_norm": 4.78423472401811, |
| "learning_rate": 9.93668126207467e-06, |
| "loss": 0.4524, |
| "step": 1924 |
| }, |
| { |
| "epoch": 0.15860591579467742, |
| "grad_norm": 3.2708331638186716, |
| "learning_rate": 9.936575381017746e-06, |
| "loss": 0.3629, |
| "step": 1925 |
| }, |
| { |
| "epoch": 0.15868830847820714, |
| "grad_norm": 23.158576456528266, |
| "learning_rate": 9.936469412073117e-06, |
| "loss": 0.4387, |
| "step": 1926 |
| }, |
| { |
| "epoch": 0.15877070116173683, |
| "grad_norm": 5.907761415620391, |
| "learning_rate": 9.936363355242668e-06, |
| "loss": 0.5724, |
| "step": 1927 |
| }, |
| { |
| "epoch": 0.15885309384526655, |
| "grad_norm": 4.421376470888521, |
| "learning_rate": 9.93625721052829e-06, |
| "loss": 0.6202, |
| "step": 1928 |
| }, |
| { |
| "epoch": 0.15893548652879624, |
| "grad_norm": 5.122603282277841, |
| "learning_rate": 9.936150977931869e-06, |
| "loss": 0.4856, |
| "step": 1929 |
| }, |
| { |
| "epoch": 0.15901787921232594, |
| "grad_norm": 5.697392483109681, |
| "learning_rate": 9.936044657455298e-06, |
| "loss": 0.7097, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.15910027189585565, |
| "grad_norm": 8.995800779777136, |
| "learning_rate": 9.93593824910047e-06, |
| "loss": 0.7263, |
| "step": 1931 |
| }, |
| { |
| "epoch": 0.15918266457938535, |
| "grad_norm": 3.015815073457665, |
| "learning_rate": 9.935831752869278e-06, |
| "loss": 0.4475, |
| "step": 1932 |
| }, |
| { |
| "epoch": 0.15926505726291507, |
| "grad_norm": 5.3090549745473075, |
| "learning_rate": 9.93572516876362e-06, |
| "loss": 0.5545, |
| "step": 1933 |
| }, |
| { |
| "epoch": 0.15934744994644476, |
| "grad_norm": 12.995406107077027, |
| "learning_rate": 9.935618496785396e-06, |
| "loss": 0.1749, |
| "step": 1934 |
| }, |
| { |
| "epoch": 0.15942984262997445, |
| "grad_norm": 4.825072935595757, |
| "learning_rate": 9.935511736936498e-06, |
| "loss": 0.4887, |
| "step": 1935 |
| }, |
| { |
| "epoch": 0.15951223531350417, |
| "grad_norm": 4.746919626773541, |
| "learning_rate": 9.935404889218831e-06, |
| "loss": 0.5112, |
| "step": 1936 |
| }, |
| { |
| "epoch": 0.15959462799703386, |
| "grad_norm": 6.689116278696351, |
| "learning_rate": 9.935297953634298e-06, |
| "loss": 0.7103, |
| "step": 1937 |
| }, |
| { |
| "epoch": 0.15967702068056355, |
| "grad_norm": 6.502278304742261, |
| "learning_rate": 9.935190930184802e-06, |
| "loss": 0.7145, |
| "step": 1938 |
| }, |
| { |
| "epoch": 0.15975941336409327, |
| "grad_norm": 5.451164256483429, |
| "learning_rate": 9.935083818872247e-06, |
| "loss": 0.4737, |
| "step": 1939 |
| }, |
| { |
| "epoch": 0.15984180604762296, |
| "grad_norm": 3.0051587922433067, |
| "learning_rate": 9.93497661969854e-06, |
| "loss": 0.2742, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.15992419873115268, |
| "grad_norm": 5.427591565725511, |
| "learning_rate": 9.934869332665592e-06, |
| "loss": 0.5207, |
| "step": 1941 |
| }, |
| { |
| "epoch": 0.16000659141468238, |
| "grad_norm": 5.4367848489833115, |
| "learning_rate": 9.934761957775312e-06, |
| "loss": 0.5983, |
| "step": 1942 |
| }, |
| { |
| "epoch": 0.16008898409821207, |
| "grad_norm": 7.154739577060193, |
| "learning_rate": 9.93465449502961e-06, |
| "loss": 0.6409, |
| "step": 1943 |
| }, |
| { |
| "epoch": 0.1601713767817418, |
| "grad_norm": 5.038406869203767, |
| "learning_rate": 9.934546944430402e-06, |
| "loss": 0.4853, |
| "step": 1944 |
| }, |
| { |
| "epoch": 0.16025376946527148, |
| "grad_norm": 5.650856613574145, |
| "learning_rate": 9.934439305979598e-06, |
| "loss": 0.6453, |
| "step": 1945 |
| }, |
| { |
| "epoch": 0.1603361621488012, |
| "grad_norm": 15.099928104720231, |
| "learning_rate": 9.934331579679119e-06, |
| "loss": 0.7712, |
| "step": 1946 |
| }, |
| { |
| "epoch": 0.1604185548323309, |
| "grad_norm": 5.331849451324917, |
| "learning_rate": 9.934223765530883e-06, |
| "loss": 0.5346, |
| "step": 1947 |
| }, |
| { |
| "epoch": 0.16050094751586058, |
| "grad_norm": 6.11974330303117, |
| "learning_rate": 9.934115863536806e-06, |
| "loss": 0.6636, |
| "step": 1948 |
| }, |
| { |
| "epoch": 0.1605833401993903, |
| "grad_norm": 5.259099308839429, |
| "learning_rate": 9.934007873698813e-06, |
| "loss": 0.4581, |
| "step": 1949 |
| }, |
| { |
| "epoch": 0.16066573288292, |
| "grad_norm": 4.763348066034409, |
| "learning_rate": 9.933899796018821e-06, |
| "loss": 0.7946, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.1607481255664497, |
| "grad_norm": 4.328616169486818, |
| "learning_rate": 9.933791630498761e-06, |
| "loss": 0.5788, |
| "step": 1951 |
| }, |
| { |
| "epoch": 0.1608305182499794, |
| "grad_norm": 5.067172957788821, |
| "learning_rate": 9.933683377140552e-06, |
| "loss": 0.6345, |
| "step": 1952 |
| }, |
| { |
| "epoch": 0.1609129109335091, |
| "grad_norm": 4.119522578831503, |
| "learning_rate": 9.933575035946128e-06, |
| "loss": 0.4333, |
| "step": 1953 |
| }, |
| { |
| "epoch": 0.16099530361703882, |
| "grad_norm": 4.532665725421111, |
| "learning_rate": 9.933466606917412e-06, |
| "loss": 0.4846, |
| "step": 1954 |
| }, |
| { |
| "epoch": 0.1610776963005685, |
| "grad_norm": 6.677989132375096, |
| "learning_rate": 9.933358090056337e-06, |
| "loss": 0.6531, |
| "step": 1955 |
| }, |
| { |
| "epoch": 0.1611600889840982, |
| "grad_norm": 5.737777079771404, |
| "learning_rate": 9.933249485364836e-06, |
| "loss": 0.551, |
| "step": 1956 |
| }, |
| { |
| "epoch": 0.16124248166762792, |
| "grad_norm": 5.303362303054575, |
| "learning_rate": 9.93314079284484e-06, |
| "loss": 0.5989, |
| "step": 1957 |
| }, |
| { |
| "epoch": 0.1613248743511576, |
| "grad_norm": 4.539301099456485, |
| "learning_rate": 9.933032012498287e-06, |
| "loss": 0.5385, |
| "step": 1958 |
| }, |
| { |
| "epoch": 0.16140726703468733, |
| "grad_norm": 7.619380498183769, |
| "learning_rate": 9.932923144327112e-06, |
| "loss": 0.5917, |
| "step": 1959 |
| }, |
| { |
| "epoch": 0.16148965971821702, |
| "grad_norm": 6.2711200766201225, |
| "learning_rate": 9.932814188333252e-06, |
| "loss": 0.6814, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.1615720524017467, |
| "grad_norm": 4.925280195591163, |
| "learning_rate": 9.932705144518648e-06, |
| "loss": 0.3257, |
| "step": 1961 |
| }, |
| { |
| "epoch": 0.16165444508527643, |
| "grad_norm": 4.900465005853736, |
| "learning_rate": 9.932596012885243e-06, |
| "loss": 0.6121, |
| "step": 1962 |
| }, |
| { |
| "epoch": 0.16173683776880612, |
| "grad_norm": 5.597180202998693, |
| "learning_rate": 9.932486793434976e-06, |
| "loss": 0.4417, |
| "step": 1963 |
| }, |
| { |
| "epoch": 0.16181923045233584, |
| "grad_norm": 3.786036286281869, |
| "learning_rate": 9.932377486169795e-06, |
| "loss": 0.4473, |
| "step": 1964 |
| }, |
| { |
| "epoch": 0.16190162313586554, |
| "grad_norm": 5.333467663504172, |
| "learning_rate": 9.932268091091647e-06, |
| "loss": 0.5273, |
| "step": 1965 |
| }, |
| { |
| "epoch": 0.16198401581939523, |
| "grad_norm": 3.114851131702418, |
| "learning_rate": 9.932158608202473e-06, |
| "loss": 0.2613, |
| "step": 1966 |
| }, |
| { |
| "epoch": 0.16206640850292495, |
| "grad_norm": 4.933213429654218, |
| "learning_rate": 9.932049037504228e-06, |
| "loss": 0.5545, |
| "step": 1967 |
| }, |
| { |
| "epoch": 0.16214880118645464, |
| "grad_norm": 5.732455463753953, |
| "learning_rate": 9.931939378998862e-06, |
| "loss": 0.5004, |
| "step": 1968 |
| }, |
| { |
| "epoch": 0.16223119386998436, |
| "grad_norm": 3.3159988830987936, |
| "learning_rate": 9.931829632688327e-06, |
| "loss": 0.2362, |
| "step": 1969 |
| }, |
| { |
| "epoch": 0.16231358655351405, |
| "grad_norm": 6.801154604066474, |
| "learning_rate": 9.931719798574577e-06, |
| "loss": 0.725, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.16239597923704374, |
| "grad_norm": 28.52359146325827, |
| "learning_rate": 9.931609876659567e-06, |
| "loss": 0.8996, |
| "step": 1971 |
| }, |
| { |
| "epoch": 0.16247837192057346, |
| "grad_norm": 7.673010563827642, |
| "learning_rate": 9.931499866945254e-06, |
| "loss": 0.6011, |
| "step": 1972 |
| }, |
| { |
| "epoch": 0.16256076460410315, |
| "grad_norm": 2.9164586276644355, |
| "learning_rate": 9.931389769433595e-06, |
| "loss": 0.257, |
| "step": 1973 |
| }, |
| { |
| "epoch": 0.16264315728763284, |
| "grad_norm": 3.4481637153537736, |
| "learning_rate": 9.931279584126552e-06, |
| "loss": 0.2657, |
| "step": 1974 |
| }, |
| { |
| "epoch": 0.16272554997116256, |
| "grad_norm": 5.503134114237322, |
| "learning_rate": 9.931169311026086e-06, |
| "loss": 0.478, |
| "step": 1975 |
| }, |
| { |
| "epoch": 0.16280794265469226, |
| "grad_norm": 7.31936675050305, |
| "learning_rate": 9.93105895013416e-06, |
| "loss": 0.7626, |
| "step": 1976 |
| }, |
| { |
| "epoch": 0.16289033533822198, |
| "grad_norm": 3.325018882378549, |
| "learning_rate": 9.930948501452739e-06, |
| "loss": 0.2196, |
| "step": 1977 |
| }, |
| { |
| "epoch": 0.16297272802175167, |
| "grad_norm": 4.015246823982774, |
| "learning_rate": 9.93083796498379e-06, |
| "loss": 0.3479, |
| "step": 1978 |
| }, |
| { |
| "epoch": 0.16305512070528136, |
| "grad_norm": 4.097019784111488, |
| "learning_rate": 9.930727340729283e-06, |
| "loss": 0.3443, |
| "step": 1979 |
| }, |
| { |
| "epoch": 0.16313751338881108, |
| "grad_norm": 4.548498573960237, |
| "learning_rate": 9.930616628691182e-06, |
| "loss": 0.4211, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.16321990607234077, |
| "grad_norm": 6.9065001345198045, |
| "learning_rate": 9.930505828871461e-06, |
| "loss": 0.6502, |
| "step": 1981 |
| }, |
| { |
| "epoch": 0.1633022987558705, |
| "grad_norm": 4.716154264543286, |
| "learning_rate": 9.930394941272094e-06, |
| "loss": 0.6323, |
| "step": 1982 |
| }, |
| { |
| "epoch": 0.16338469143940018, |
| "grad_norm": 5.198255317143385, |
| "learning_rate": 9.930283965895054e-06, |
| "loss": 0.5158, |
| "step": 1983 |
| }, |
| { |
| "epoch": 0.16346708412292987, |
| "grad_norm": 3.8635971124237267, |
| "learning_rate": 9.930172902742316e-06, |
| "loss": 0.393, |
| "step": 1984 |
| }, |
| { |
| "epoch": 0.1635494768064596, |
| "grad_norm": 4.73938319426752, |
| "learning_rate": 9.930061751815858e-06, |
| "loss": 0.53, |
| "step": 1985 |
| }, |
| { |
| "epoch": 0.16363186948998928, |
| "grad_norm": 5.053290616839663, |
| "learning_rate": 9.929950513117658e-06, |
| "loss": 0.6883, |
| "step": 1986 |
| }, |
| { |
| "epoch": 0.163714262173519, |
| "grad_norm": 4.256070865568776, |
| "learning_rate": 9.929839186649698e-06, |
| "loss": 0.4755, |
| "step": 1987 |
| }, |
| { |
| "epoch": 0.1637966548570487, |
| "grad_norm": 5.726661518698401, |
| "learning_rate": 9.929727772413959e-06, |
| "loss": 0.6225, |
| "step": 1988 |
| }, |
| { |
| "epoch": 0.1638790475405784, |
| "grad_norm": 6.136396113547857, |
| "learning_rate": 9.929616270412425e-06, |
| "loss": 0.5515, |
| "step": 1989 |
| }, |
| { |
| "epoch": 0.1639614402241081, |
| "grad_norm": 5.803352132593352, |
| "learning_rate": 9.92950468064708e-06, |
| "loss": 0.6151, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.1640438329076378, |
| "grad_norm": 5.500294699026293, |
| "learning_rate": 9.929393003119911e-06, |
| "loss": 0.645, |
| "step": 1991 |
| }, |
| { |
| "epoch": 0.1641262255911675, |
| "grad_norm": 5.126741967427504, |
| "learning_rate": 9.929281237832909e-06, |
| "loss": 0.4252, |
| "step": 1992 |
| }, |
| { |
| "epoch": 0.1642086182746972, |
| "grad_norm": 5.715611583303511, |
| "learning_rate": 9.92916938478806e-06, |
| "loss": 0.6065, |
| "step": 1993 |
| }, |
| { |
| "epoch": 0.1642910109582269, |
| "grad_norm": 4.5030944875150505, |
| "learning_rate": 9.929057443987356e-06, |
| "loss": 0.4517, |
| "step": 1994 |
| }, |
| { |
| "epoch": 0.16437340364175662, |
| "grad_norm": 4.036448430622849, |
| "learning_rate": 9.928945415432792e-06, |
| "loss": 0.3826, |
| "step": 1995 |
| }, |
| { |
| "epoch": 0.1644557963252863, |
| "grad_norm": 4.77432743749195, |
| "learning_rate": 9.92883329912636e-06, |
| "loss": 0.4729, |
| "step": 1996 |
| }, |
| { |
| "epoch": 0.164538189008816, |
| "grad_norm": 4.24213889175515, |
| "learning_rate": 9.92872109507006e-06, |
| "loss": 0.6217, |
| "step": 1997 |
| }, |
| { |
| "epoch": 0.16462058169234572, |
| "grad_norm": 6.054404874550711, |
| "learning_rate": 9.928608803265884e-06, |
| "loss": 0.5828, |
| "step": 1998 |
| }, |
| { |
| "epoch": 0.16470297437587542, |
| "grad_norm": 5.412374428435805, |
| "learning_rate": 9.928496423715835e-06, |
| "loss": 0.6126, |
| "step": 1999 |
| }, |
| { |
| "epoch": 0.16478536705940514, |
| "grad_norm": 6.090978296501382, |
| "learning_rate": 9.928383956421914e-06, |
| "loss": 0.6151, |
| "step": 2000 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 24274, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 200, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 7648832073984.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|