| { |
| "best_metric": 0.20408163265306123, |
| "best_model_checkpoint": "/mnt/home/cskokgibbs/ceph/GLM-Prior-exp/mouse-experiments/no_gene_tf_label_overlaps/mDC/prior_network/checkpoint-2832", |
| "epoch": 2.9977492387130944, |
| "eval_steps": 472, |
| "global_step": 2832, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0010591817820733483, |
| "grad_norm": 3.6720354557037354, |
| "learning_rate": 9.998940677966102e-06, |
| "loss": 0.3896, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0021183635641466966, |
| "grad_norm": 2.9420688152313232, |
| "learning_rate": 9.997881355932204e-06, |
| "loss": 0.3246, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.003177545346220045, |
| "grad_norm": 2.360626459121704, |
| "learning_rate": 9.996822033898305e-06, |
| "loss": 0.2778, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.004236727128293393, |
| "grad_norm": 1.8121588230133057, |
| "learning_rate": 9.995762711864408e-06, |
| "loss": 0.2364, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.005295908910366741, |
| "grad_norm": 1.351688265800476, |
| "learning_rate": 9.99470338983051e-06, |
| "loss": 0.2099, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.00635509069244009, |
| "grad_norm": 0.9306207299232483, |
| "learning_rate": 9.993644067796611e-06, |
| "loss": 0.1893, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.0074142724745134385, |
| "grad_norm": 0.5691781044006348, |
| "learning_rate": 9.992584745762712e-06, |
| "loss": 0.1759, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.008473454256586787, |
| "grad_norm": 0.274517685174942, |
| "learning_rate": 9.991525423728814e-06, |
| "loss": 0.1693, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.009532636038660135, |
| "grad_norm": 0.08555290102958679, |
| "learning_rate": 9.990466101694915e-06, |
| "loss": 0.167, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.010591817820733483, |
| "grad_norm": 0.1536484807729721, |
| "learning_rate": 9.989406779661017e-06, |
| "loss": 0.168, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.011650999602806831, |
| "grad_norm": 0.24952290952205658, |
| "learning_rate": 9.98834745762712e-06, |
| "loss": 0.1688, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.01271018138488018, |
| "grad_norm": 0.30534666776657104, |
| "learning_rate": 9.987288135593221e-06, |
| "loss": 0.1706, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.013769363166953529, |
| "grad_norm": 0.32023343443870544, |
| "learning_rate": 9.986228813559323e-06, |
| "loss": 0.1698, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.014828544949026877, |
| "grad_norm": 0.2840920686721802, |
| "learning_rate": 9.985169491525426e-06, |
| "loss": 0.1688, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.015887726731100223, |
| "grad_norm": 0.2063327431678772, |
| "learning_rate": 9.984110169491527e-06, |
| "loss": 0.1678, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.016946908513173573, |
| "grad_norm": 0.09346629679203033, |
| "learning_rate": 9.983050847457628e-06, |
| "loss": 0.167, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.018006090295246923, |
| "grad_norm": 0.09686527401208878, |
| "learning_rate": 9.98199152542373e-06, |
| "loss": 0.1671, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.01906527207732027, |
| "grad_norm": 0.1982012391090393, |
| "learning_rate": 9.980932203389831e-06, |
| "loss": 0.1678, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.02012445385939362, |
| "grad_norm": 0.251574844121933, |
| "learning_rate": 9.979872881355933e-06, |
| "loss": 0.1678, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.021183635641466966, |
| "grad_norm": 0.21433360874652863, |
| "learning_rate": 9.978813559322034e-06, |
| "loss": 0.1671, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.022242817423540315, |
| "grad_norm": 0.14189667999744415, |
| "learning_rate": 9.977754237288137e-06, |
| "loss": 0.1664, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.023301999205613662, |
| "grad_norm": 0.07194820791482925, |
| "learning_rate": 9.976694915254239e-06, |
| "loss": 0.1664, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.02436118098768701, |
| "grad_norm": 0.04847427085042, |
| "learning_rate": 9.97563559322034e-06, |
| "loss": 0.1655, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.02542036276976036, |
| "grad_norm": 0.08483133465051651, |
| "learning_rate": 9.974576271186441e-06, |
| "loss": 0.1657, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.026479544551833708, |
| "grad_norm": 0.11271404474973679, |
| "learning_rate": 9.973516949152543e-06, |
| "loss": 0.1661, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.027538726333907058, |
| "grad_norm": 0.1302892565727234, |
| "learning_rate": 9.972457627118644e-06, |
| "loss": 0.1659, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.028597908115980404, |
| "grad_norm": 0.12819303572177887, |
| "learning_rate": 9.971398305084746e-06, |
| "loss": 0.1657, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.029657089898053754, |
| "grad_norm": 0.10965385288000107, |
| "learning_rate": 9.970338983050847e-06, |
| "loss": 0.1658, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.0307162716801271, |
| "grad_norm": 0.07511338591575623, |
| "learning_rate": 9.96927966101695e-06, |
| "loss": 0.1652, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.03177545346220045, |
| "grad_norm": 0.04806717112660408, |
| "learning_rate": 9.968220338983052e-06, |
| "loss": 0.1647, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.0328346352442738, |
| "grad_norm": 0.06465736776590347, |
| "learning_rate": 9.967161016949153e-06, |
| "loss": 0.1644, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.033893817026347146, |
| "grad_norm": 0.10100921988487244, |
| "learning_rate": 9.966101694915256e-06, |
| "loss": 0.1652, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.034952998808420496, |
| "grad_norm": 0.11376876384019852, |
| "learning_rate": 9.965042372881358e-06, |
| "loss": 0.1642, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.036012180590493846, |
| "grad_norm": 0.09480928629636765, |
| "learning_rate": 9.963983050847459e-06, |
| "loss": 0.1638, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.03707136237256719, |
| "grad_norm": 0.07092452049255371, |
| "learning_rate": 9.96292372881356e-06, |
| "loss": 0.1636, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.03813054415464054, |
| "grad_norm": 0.049816809594631195, |
| "learning_rate": 9.961864406779662e-06, |
| "loss": 0.1639, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.03918972593671389, |
| "grad_norm": 0.0559360608458519, |
| "learning_rate": 9.960805084745763e-06, |
| "loss": 0.1634, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.04024890771878724, |
| "grad_norm": 0.06750863790512085, |
| "learning_rate": 9.959745762711866e-06, |
| "loss": 0.1632, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.04130808950086059, |
| "grad_norm": 0.09359966218471527, |
| "learning_rate": 9.958686440677968e-06, |
| "loss": 0.162, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.04236727128293393, |
| "grad_norm": 0.10011648386716843, |
| "learning_rate": 9.957627118644069e-06, |
| "loss": 0.1621, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.04342645306500728, |
| "grad_norm": 0.06335251033306122, |
| "learning_rate": 9.95656779661017e-06, |
| "loss": 0.1626, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.04448563484708063, |
| "grad_norm": 0.0592561773955822, |
| "learning_rate": 9.955508474576272e-06, |
| "loss": 0.1619, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.04554481662915398, |
| "grad_norm": 0.08311517536640167, |
| "learning_rate": 9.954449152542373e-06, |
| "loss": 0.163, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.046603998411227324, |
| "grad_norm": 0.10263793170452118, |
| "learning_rate": 9.953389830508475e-06, |
| "loss": 0.1625, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.047663180193300674, |
| "grad_norm": 0.10545139759778976, |
| "learning_rate": 9.952330508474576e-06, |
| "loss": 0.1618, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.04872236197537402, |
| "grad_norm": 0.0653265044093132, |
| "learning_rate": 9.95127118644068e-06, |
| "loss": 0.1606, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.04978154375744737, |
| "grad_norm": 0.0890883207321167, |
| "learning_rate": 9.95021186440678e-06, |
| "loss": 0.1611, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.05084072553952072, |
| "grad_norm": 0.08528076112270355, |
| "learning_rate": 9.949152542372882e-06, |
| "loss": 0.1612, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.051899907321594066, |
| "grad_norm": 0.0719696655869484, |
| "learning_rate": 9.948093220338983e-06, |
| "loss": 0.1601, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.052959089103667416, |
| "grad_norm": 0.09042180329561234, |
| "learning_rate": 9.947033898305085e-06, |
| "loss": 0.1604, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.054018270885740766, |
| "grad_norm": 0.08847080916166306, |
| "learning_rate": 9.945974576271186e-06, |
| "loss": 0.1578, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.055077452667814115, |
| "grad_norm": 0.10733941942453384, |
| "learning_rate": 9.944915254237288e-06, |
| "loss": 0.16, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.056136634449887465, |
| "grad_norm": 0.09904839843511581, |
| "learning_rate": 9.94385593220339e-06, |
| "loss": 0.1591, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.05719581623196081, |
| "grad_norm": 0.11285366863012314, |
| "learning_rate": 9.942796610169492e-06, |
| "loss": 0.1592, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.05825499801403416, |
| "grad_norm": 0.11618144810199738, |
| "learning_rate": 9.941737288135594e-06, |
| "loss": 0.1583, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.05931417979610751, |
| "grad_norm": 0.10348143428564072, |
| "learning_rate": 9.940677966101697e-06, |
| "loss": 0.1605, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.06037336157818086, |
| "grad_norm": 0.11249449104070663, |
| "learning_rate": 9.939618644067798e-06, |
| "loss": 0.1586, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.0614325433602542, |
| "grad_norm": 0.0900573581457138, |
| "learning_rate": 9.9385593220339e-06, |
| "loss": 0.1593, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.06249172514232755, |
| "grad_norm": 0.10308244079351425, |
| "learning_rate": 9.937500000000001e-06, |
| "loss": 0.1587, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.0635509069244009, |
| "grad_norm": 0.11713043600320816, |
| "learning_rate": 9.936440677966102e-06, |
| "loss": 0.1558, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.06461008870647425, |
| "grad_norm": 0.13361400365829468, |
| "learning_rate": 9.935381355932204e-06, |
| "loss": 0.1579, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.0656692704885476, |
| "grad_norm": 0.11023016273975372, |
| "learning_rate": 9.934322033898305e-06, |
| "loss": 0.1564, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.06672845227062095, |
| "grad_norm": 0.10673161596059799, |
| "learning_rate": 9.933262711864408e-06, |
| "loss": 0.156, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.06778763405269429, |
| "grad_norm": 0.12919782102108002, |
| "learning_rate": 9.93220338983051e-06, |
| "loss": 0.155, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.06884681583476764, |
| "grad_norm": 0.10146741569042206, |
| "learning_rate": 9.931144067796611e-06, |
| "loss": 0.1564, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.06990599761684099, |
| "grad_norm": 0.10749489068984985, |
| "learning_rate": 9.930084745762713e-06, |
| "loss": 0.1561, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.07096517939891434, |
| "grad_norm": 0.1070689857006073, |
| "learning_rate": 9.929025423728814e-06, |
| "loss": 0.1542, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.07202436118098769, |
| "grad_norm": 0.11663827300071716, |
| "learning_rate": 9.927966101694915e-06, |
| "loss": 0.1544, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.07308354296306104, |
| "grad_norm": 0.10666878521442413, |
| "learning_rate": 9.926906779661017e-06, |
| "loss": 0.155, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.07414272474513438, |
| "grad_norm": 0.12409048527479172, |
| "learning_rate": 9.92584745762712e-06, |
| "loss": 0.1533, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.07520190652720773, |
| "grad_norm": 0.14574819803237915, |
| "learning_rate": 9.924788135593221e-06, |
| "loss": 0.1536, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.07626108830928108, |
| "grad_norm": 0.15090584754943848, |
| "learning_rate": 9.923728813559323e-06, |
| "loss": 0.1538, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.07732027009135443, |
| "grad_norm": 0.1653479039669037, |
| "learning_rate": 9.922669491525424e-06, |
| "loss": 0.1526, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.07837945187342778, |
| "grad_norm": 0.17721739411354065, |
| "learning_rate": 9.921610169491527e-06, |
| "loss": 0.1527, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.07943863365550112, |
| "grad_norm": 0.15513458847999573, |
| "learning_rate": 9.920550847457629e-06, |
| "loss": 0.1555, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.08049781543757448, |
| "grad_norm": 0.149288609623909, |
| "learning_rate": 9.91949152542373e-06, |
| "loss": 0.153, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.08155699721964782, |
| "grad_norm": 0.14205437898635864, |
| "learning_rate": 9.918432203389831e-06, |
| "loss": 0.1519, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.08261617900172118, |
| "grad_norm": 0.14078938961029053, |
| "learning_rate": 9.917372881355933e-06, |
| "loss": 0.1488, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.08367536078379452, |
| "grad_norm": 0.18282672762870789, |
| "learning_rate": 9.916313559322034e-06, |
| "loss": 0.1517, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.08473454256586786, |
| "grad_norm": 0.1841512769460678, |
| "learning_rate": 9.915254237288137e-06, |
| "loss": 0.1511, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.08579372434794122, |
| "grad_norm": 0.14817704260349274, |
| "learning_rate": 9.914194915254239e-06, |
| "loss": 0.1528, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.08685290613001456, |
| "grad_norm": 0.1767425686120987, |
| "learning_rate": 9.91313559322034e-06, |
| "loss": 0.15, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.08791208791208792, |
| "grad_norm": 0.20340079069137573, |
| "learning_rate": 9.912076271186442e-06, |
| "loss": 0.146, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.08897126969416126, |
| "grad_norm": 0.19862839579582214, |
| "learning_rate": 9.911016949152543e-06, |
| "loss": 0.1492, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.0900304514762346, |
| "grad_norm": 0.2320733219385147, |
| "learning_rate": 9.909957627118644e-06, |
| "loss": 0.1501, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.09108963325830796, |
| "grad_norm": 0.229786679148674, |
| "learning_rate": 9.908898305084746e-06, |
| "loss": 0.1503, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.0921488150403813, |
| "grad_norm": 0.17259404063224792, |
| "learning_rate": 9.907838983050849e-06, |
| "loss": 0.1497, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.09320799682245465, |
| "grad_norm": 0.20388969779014587, |
| "learning_rate": 9.90677966101695e-06, |
| "loss": 0.151, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.094267178604528, |
| "grad_norm": 0.19713272154331207, |
| "learning_rate": 9.905720338983052e-06, |
| "loss": 0.1507, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.09532636038660135, |
| "grad_norm": 0.19459038972854614, |
| "learning_rate": 9.904661016949153e-06, |
| "loss": 0.1478, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.0963855421686747, |
| "grad_norm": 0.19698107242584229, |
| "learning_rate": 9.903601694915255e-06, |
| "loss": 0.15, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.09744472395074805, |
| "grad_norm": 0.16646313667297363, |
| "learning_rate": 9.902542372881356e-06, |
| "loss": 0.1512, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.09850390573282139, |
| "grad_norm": 0.15648747980594635, |
| "learning_rate": 9.901483050847457e-06, |
| "loss": 0.1482, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.09956308751489475, |
| "grad_norm": 0.2563489079475403, |
| "learning_rate": 9.900423728813559e-06, |
| "loss": 0.1487, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.10062226929696809, |
| "grad_norm": 0.21389760076999664, |
| "learning_rate": 9.899364406779662e-06, |
| "loss": 0.146, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.10168145107904145, |
| "grad_norm": 0.23555664718151093, |
| "learning_rate": 9.898305084745763e-06, |
| "loss": 0.1498, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.10274063286111479, |
| "grad_norm": 0.21392253041267395, |
| "learning_rate": 9.897245762711866e-06, |
| "loss": 0.1487, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.10379981464318813, |
| "grad_norm": 0.2690548896789551, |
| "learning_rate": 9.896186440677968e-06, |
| "loss": 0.1479, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.10485899642526149, |
| "grad_norm": 0.2211674153804779, |
| "learning_rate": 9.89512711864407e-06, |
| "loss": 0.1457, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.10591817820733483, |
| "grad_norm": 0.2760964632034302, |
| "learning_rate": 9.89406779661017e-06, |
| "loss": 0.1483, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.10697735998940819, |
| "grad_norm": 0.22649763524532318, |
| "learning_rate": 9.893008474576272e-06, |
| "loss": 0.1466, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.10803654177148153, |
| "grad_norm": 0.2080029994249344, |
| "learning_rate": 9.891949152542374e-06, |
| "loss": 0.1461, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.10909572355355487, |
| "grad_norm": 0.31648021936416626, |
| "learning_rate": 9.890889830508475e-06, |
| "loss": 0.1435, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.11015490533562823, |
| "grad_norm": 0.26235735416412354, |
| "learning_rate": 9.889830508474576e-06, |
| "loss": 0.1415, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.11121408711770157, |
| "grad_norm": 0.32502713799476624, |
| "learning_rate": 9.88877118644068e-06, |
| "loss": 0.146, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.11227326889977493, |
| "grad_norm": 0.28630173206329346, |
| "learning_rate": 9.887711864406781e-06, |
| "loss": 0.1483, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.11333245068184827, |
| "grad_norm": 0.21586979925632477, |
| "learning_rate": 9.886652542372882e-06, |
| "loss": 0.1447, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.11439163246392162, |
| "grad_norm": 0.328690767288208, |
| "learning_rate": 9.885593220338984e-06, |
| "loss": 0.1414, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.11545081424599497, |
| "grad_norm": 0.20366932451725006, |
| "learning_rate": 9.884533898305085e-06, |
| "loss": 0.1436, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.11650999602806832, |
| "grad_norm": 0.26369708776474, |
| "learning_rate": 9.883474576271186e-06, |
| "loss": 0.1445, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.11756917781014166, |
| "grad_norm": 0.31383395195007324, |
| "learning_rate": 9.882415254237288e-06, |
| "loss": 0.1424, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.11862835959221502, |
| "grad_norm": 0.251658171415329, |
| "learning_rate": 9.881355932203391e-06, |
| "loss": 0.1417, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.11968754137428836, |
| "grad_norm": 0.3859596252441406, |
| "learning_rate": 9.880296610169492e-06, |
| "loss": 0.1425, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.12074672315636172, |
| "grad_norm": 0.3530981242656708, |
| "learning_rate": 9.879237288135594e-06, |
| "loss": 0.1424, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.12180590493843506, |
| "grad_norm": 0.45179763436317444, |
| "learning_rate": 9.878177966101695e-06, |
| "loss": 0.1427, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.1228650867205084, |
| "grad_norm": 0.47741344571113586, |
| "learning_rate": 9.877118644067798e-06, |
| "loss": 0.1423, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.12392426850258176, |
| "grad_norm": 0.31315502524375916, |
| "learning_rate": 9.8760593220339e-06, |
| "loss": 0.1394, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.1249834502846551, |
| "grad_norm": 0.4611978530883789, |
| "learning_rate": 9.875000000000001e-06, |
| "loss": 0.1443, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.12604263206672844, |
| "grad_norm": 0.2832074463367462, |
| "learning_rate": 9.873940677966103e-06, |
| "loss": 0.1426, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.1271018138488018, |
| "grad_norm": 0.4847926199436188, |
| "learning_rate": 9.872881355932204e-06, |
| "loss": 0.1439, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.12816099563087516, |
| "grad_norm": 0.4041496515274048, |
| "learning_rate": 9.871822033898305e-06, |
| "loss": 0.1422, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.1292201774129485, |
| "grad_norm": 0.27493688464164734, |
| "learning_rate": 9.870762711864409e-06, |
| "loss": 0.1415, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.13027935919502184, |
| "grad_norm": 0.4400162994861603, |
| "learning_rate": 9.86970338983051e-06, |
| "loss": 0.1401, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.1313385409770952, |
| "grad_norm": 0.4497718811035156, |
| "learning_rate": 9.868644067796611e-06, |
| "loss": 0.1421, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.13239772275916853, |
| "grad_norm": 0.25661447644233704, |
| "learning_rate": 9.867584745762713e-06, |
| "loss": 0.138, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.1334569045412419, |
| "grad_norm": 0.29652678966522217, |
| "learning_rate": 9.866525423728814e-06, |
| "loss": 0.136, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.13451608632331524, |
| "grad_norm": 0.33211785554885864, |
| "learning_rate": 9.865466101694916e-06, |
| "loss": 0.1404, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.13557526810538859, |
| "grad_norm": 0.4695075452327728, |
| "learning_rate": 9.864406779661017e-06, |
| "loss": 0.1416, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.13663444988746193, |
| "grad_norm": 0.29075145721435547, |
| "learning_rate": 9.86334745762712e-06, |
| "loss": 0.1375, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.13769363166953527, |
| "grad_norm": 0.4561719596385956, |
| "learning_rate": 9.862288135593221e-06, |
| "loss": 0.1409, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.13875281345160864, |
| "grad_norm": 0.3260703980922699, |
| "learning_rate": 9.861228813559323e-06, |
| "loss": 0.1358, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.13981199523368198, |
| "grad_norm": 0.6748884320259094, |
| "learning_rate": 9.860169491525424e-06, |
| "loss": 0.1408, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.14087117701575533, |
| "grad_norm": 0.26813751459121704, |
| "learning_rate": 9.859110169491526e-06, |
| "loss": 0.1384, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.14193035879782867, |
| "grad_norm": 0.34277114272117615, |
| "learning_rate": 9.858050847457627e-06, |
| "loss": 0.1379, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.142989540579902, |
| "grad_norm": 0.3976724147796631, |
| "learning_rate": 9.856991525423729e-06, |
| "loss": 0.1409, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.14404872236197538, |
| "grad_norm": 0.2871648669242859, |
| "learning_rate": 9.855932203389832e-06, |
| "loss": 0.137, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.14510790414404873, |
| "grad_norm": 0.48152652382850647, |
| "learning_rate": 9.854872881355933e-06, |
| "loss": 0.1373, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.14616708592612207, |
| "grad_norm": 0.32819676399230957, |
| "learning_rate": 9.853813559322034e-06, |
| "loss": 0.1322, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.1472262677081954, |
| "grad_norm": 0.34722280502319336, |
| "learning_rate": 9.852754237288138e-06, |
| "loss": 0.1369, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.14828544949026876, |
| "grad_norm": 0.47408348321914673, |
| "learning_rate": 9.851694915254239e-06, |
| "loss": 0.1377, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.14934463127234213, |
| "grad_norm": 0.3838208317756653, |
| "learning_rate": 9.85063559322034e-06, |
| "loss": 0.1389, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.15040381305441547, |
| "grad_norm": 0.39561015367507935, |
| "learning_rate": 9.849576271186442e-06, |
| "loss": 0.1377, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.1514629948364888, |
| "grad_norm": 0.7962619066238403, |
| "learning_rate": 9.848516949152543e-06, |
| "loss": 0.1373, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.15252217661856216, |
| "grad_norm": 0.3441040813922882, |
| "learning_rate": 9.847457627118645e-06, |
| "loss": 0.1349, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.1535813584006355, |
| "grad_norm": 0.47094985842704773, |
| "learning_rate": 9.846398305084746e-06, |
| "loss": 0.1361, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.15464054018270887, |
| "grad_norm": 0.6072841882705688, |
| "learning_rate": 9.845338983050849e-06, |
| "loss": 0.1322, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.1556997219647822, |
| "grad_norm": 0.3189762830734253, |
| "learning_rate": 9.84427966101695e-06, |
| "loss": 0.1342, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.15675890374685555, |
| "grad_norm": 0.9441516995429993, |
| "learning_rate": 9.843220338983052e-06, |
| "loss": 0.1373, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.1578180855289289, |
| "grad_norm": 0.634678065776825, |
| "learning_rate": 9.842161016949153e-06, |
| "loss": 0.1372, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.15887726731100224, |
| "grad_norm": 0.6699573993682861, |
| "learning_rate": 9.841101694915255e-06, |
| "loss": 0.1389, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.1599364490930756, |
| "grad_norm": 0.8605893850326538, |
| "learning_rate": 9.840042372881356e-06, |
| "loss": 0.1362, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.16099563087514895, |
| "grad_norm": 0.3965975344181061, |
| "learning_rate": 9.838983050847458e-06, |
| "loss": 0.1315, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.1620548126572223, |
| "grad_norm": 1.6671831607818604, |
| "learning_rate": 9.837923728813559e-06, |
| "loss": 0.1449, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.16311399443929564, |
| "grad_norm": 1.1250773668289185, |
| "learning_rate": 9.836864406779662e-06, |
| "loss": 0.1388, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.16417317622136898, |
| "grad_norm": 0.45315343141555786, |
| "learning_rate": 9.835805084745764e-06, |
| "loss": 0.1368, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.16523235800344235, |
| "grad_norm": 0.6775908470153809, |
| "learning_rate": 9.834745762711865e-06, |
| "loss": 0.1387, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.1662915397855157, |
| "grad_norm": 0.595596194267273, |
| "learning_rate": 9.833686440677966e-06, |
| "loss": 0.1403, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.16735072156758904, |
| "grad_norm": 0.39271387457847595, |
| "learning_rate": 9.832627118644068e-06, |
| "loss": 0.1377, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.16840990334966238, |
| "grad_norm": 0.6346777677536011, |
| "learning_rate": 9.831567796610171e-06, |
| "loss": 0.1405, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.16946908513173572, |
| "grad_norm": 0.854393482208252, |
| "learning_rate": 9.830508474576272e-06, |
| "loss": 0.1395, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.1705282669138091, |
| "grad_norm": 0.3409649431705475, |
| "learning_rate": 9.829449152542374e-06, |
| "loss": 0.1351, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.17158744869588244, |
| "grad_norm": 0.5128641724586487, |
| "learning_rate": 9.828389830508475e-06, |
| "loss": 0.1384, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.17264663047795578, |
| "grad_norm": 0.6055179834365845, |
| "learning_rate": 9.827330508474578e-06, |
| "loss": 0.1356, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.17370581226002912, |
| "grad_norm": 0.3544069230556488, |
| "learning_rate": 9.82627118644068e-06, |
| "loss": 0.1302, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.17476499404210247, |
| "grad_norm": 0.4915960729122162, |
| "learning_rate": 9.825211864406781e-06, |
| "loss": 0.1351, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.17582417582417584, |
| "grad_norm": 0.7743620872497559, |
| "learning_rate": 9.824152542372882e-06, |
| "loss": 0.1345, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.17688335760624918, |
| "grad_norm": 0.5948208570480347, |
| "learning_rate": 9.823093220338984e-06, |
| "loss": 0.1372, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.17794253938832252, |
| "grad_norm": 0.37300121784210205, |
| "learning_rate": 9.822033898305085e-06, |
| "loss": 0.1335, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.17900172117039587, |
| "grad_norm": 0.5812225341796875, |
| "learning_rate": 9.820974576271187e-06, |
| "loss": 0.1355, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.1800609029524692, |
| "grad_norm": 0.6437628269195557, |
| "learning_rate": 9.819915254237288e-06, |
| "loss": 0.1352, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.18112008473454255, |
| "grad_norm": 0.3454924523830414, |
| "learning_rate": 9.818855932203391e-06, |
| "loss": 0.1355, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.18217926651661592, |
| "grad_norm": 0.656635582447052, |
| "learning_rate": 9.817796610169493e-06, |
| "loss": 0.1349, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.18323844829868927, |
| "grad_norm": 0.6227939128875732, |
| "learning_rate": 9.816737288135594e-06, |
| "loss": 0.1335, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.1842976300807626, |
| "grad_norm": 0.3746066689491272, |
| "learning_rate": 9.815677966101695e-06, |
| "loss": 0.1329, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.18535681186283595, |
| "grad_norm": 0.41495418548583984, |
| "learning_rate": 9.814618644067797e-06, |
| "loss": 0.131, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.1864159936449093, |
| "grad_norm": 0.3814808130264282, |
| "learning_rate": 9.813559322033898e-06, |
| "loss": 0.1304, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.18747517542698267, |
| "grad_norm": 0.46599066257476807, |
| "learning_rate": 9.8125e-06, |
| "loss": 0.13, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.188534357209056, |
| "grad_norm": 0.4790410101413727, |
| "learning_rate": 9.811440677966103e-06, |
| "loss": 0.1276, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.18959353899112935, |
| "grad_norm": 0.44499969482421875, |
| "learning_rate": 9.810381355932204e-06, |
| "loss": 0.1277, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.1906527207732027, |
| "grad_norm": 0.4200044870376587, |
| "learning_rate": 9.809322033898306e-06, |
| "loss": 0.1261, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.19171190255527604, |
| "grad_norm": 0.6261917352676392, |
| "learning_rate": 9.808262711864409e-06, |
| "loss": 0.1338, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.1927710843373494, |
| "grad_norm": 0.5882927179336548, |
| "learning_rate": 9.80720338983051e-06, |
| "loss": 0.1304, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.19383026611942275, |
| "grad_norm": 0.36154767870903015, |
| "learning_rate": 9.806144067796612e-06, |
| "loss": 0.1306, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.1948894479014961, |
| "grad_norm": 0.5511684417724609, |
| "learning_rate": 9.805084745762713e-06, |
| "loss": 0.1287, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.19594862968356944, |
| "grad_norm": 0.45902204513549805, |
| "learning_rate": 9.804025423728814e-06, |
| "loss": 0.1282, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.19700781146564278, |
| "grad_norm": 0.42389318346977234, |
| "learning_rate": 9.802966101694916e-06, |
| "loss": 0.1276, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.19806699324771615, |
| "grad_norm": 1.0849132537841797, |
| "learning_rate": 9.801906779661017e-06, |
| "loss": 0.1324, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.1991261750297895, |
| "grad_norm": 0.4857287108898163, |
| "learning_rate": 9.80084745762712e-06, |
| "loss": 0.1322, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.20018535681186284, |
| "grad_norm": 0.45011675357818604, |
| "learning_rate": 9.799788135593222e-06, |
| "loss": 0.1324, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.20124453859393618, |
| "grad_norm": 0.6487869024276733, |
| "learning_rate": 9.798728813559323e-06, |
| "loss": 0.1348, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.20230372037600952, |
| "grad_norm": 0.5920835137367249, |
| "learning_rate": 9.797669491525424e-06, |
| "loss": 0.1332, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.2033629021580829, |
| "grad_norm": 0.4455447793006897, |
| "learning_rate": 9.796610169491526e-06, |
| "loss": 0.1284, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.20442208394015624, |
| "grad_norm": 0.5541309118270874, |
| "learning_rate": 9.795550847457627e-06, |
| "loss": 0.1256, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.20548126572222958, |
| "grad_norm": 1.2032992839813232, |
| "learning_rate": 9.794491525423729e-06, |
| "loss": 0.1315, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.20654044750430292, |
| "grad_norm": 0.47795382142066956, |
| "learning_rate": 9.793432203389832e-06, |
| "loss": 0.1254, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.20759962928637626, |
| "grad_norm": 0.5737263560295105, |
| "learning_rate": 9.792372881355933e-06, |
| "loss": 0.1248, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.20865881106844963, |
| "grad_norm": 0.6147955656051636, |
| "learning_rate": 9.791313559322035e-06, |
| "loss": 0.1274, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.20971799285052298, |
| "grad_norm": 0.49131709337234497, |
| "learning_rate": 9.790254237288136e-06, |
| "loss": 0.1304, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.21077717463259632, |
| "grad_norm": 0.32255011796951294, |
| "learning_rate": 9.789194915254237e-06, |
| "loss": 0.1227, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.21183635641466966, |
| "grad_norm": 0.8382896780967712, |
| "learning_rate": 9.788135593220339e-06, |
| "loss": 0.1305, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.212895538196743, |
| "grad_norm": 0.6731181144714355, |
| "learning_rate": 9.787076271186442e-06, |
| "loss": 0.1279, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.21395471997881638, |
| "grad_norm": 0.34601402282714844, |
| "learning_rate": 9.786016949152543e-06, |
| "loss": 0.1265, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.21501390176088972, |
| "grad_norm": 0.5068625807762146, |
| "learning_rate": 9.784957627118645e-06, |
| "loss": 0.1233, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.21607308354296306, |
| "grad_norm": 0.5763030648231506, |
| "learning_rate": 9.783898305084746e-06, |
| "loss": 0.1235, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.2171322653250364, |
| "grad_norm": 0.46622368693351746, |
| "learning_rate": 9.78283898305085e-06, |
| "loss": 0.1243, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.21819144710710975, |
| "grad_norm": 0.6546030044555664, |
| "learning_rate": 9.78177966101695e-06, |
| "loss": 0.1272, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.21925062888918312, |
| "grad_norm": 0.9954103827476501, |
| "learning_rate": 9.780720338983052e-06, |
| "loss": 0.1266, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.22030981067125646, |
| "grad_norm": 0.3762997090816498, |
| "learning_rate": 9.779661016949154e-06, |
| "loss": 0.1237, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.2213689924533298, |
| "grad_norm": 0.4721704423427582, |
| "learning_rate": 9.778601694915255e-06, |
| "loss": 0.1248, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.22242817423540315, |
| "grad_norm": 0.4108166992664337, |
| "learning_rate": 9.777542372881356e-06, |
| "loss": 0.1226, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.2234873560174765, |
| "grad_norm": 0.4768858551979065, |
| "learning_rate": 9.776483050847458e-06, |
| "loss": 0.1238, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.22454653779954986, |
| "grad_norm": 0.46884432435035706, |
| "learning_rate": 9.775423728813561e-06, |
| "loss": 0.1232, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.2256057195816232, |
| "grad_norm": 0.49014735221862793, |
| "learning_rate": 9.774364406779662e-06, |
| "loss": 0.1257, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.22666490136369655, |
| "grad_norm": 0.8501893877983093, |
| "learning_rate": 9.773305084745764e-06, |
| "loss": 0.1277, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.2277240831457699, |
| "grad_norm": 0.6959905624389648, |
| "learning_rate": 9.772245762711865e-06, |
| "loss": 0.1261, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.22878326492784323, |
| "grad_norm": 0.5984706282615662, |
| "learning_rate": 9.771186440677967e-06, |
| "loss": 0.1299, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.22984244670991658, |
| "grad_norm": 0.574116587638855, |
| "learning_rate": 9.770127118644068e-06, |
| "loss": 0.1266, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.23090162849198995, |
| "grad_norm": 0.6310490369796753, |
| "learning_rate": 9.76906779661017e-06, |
| "loss": 0.1318, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.2319608102740633, |
| "grad_norm": 0.61598801612854, |
| "learning_rate": 9.76800847457627e-06, |
| "loss": 0.1247, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.23301999205613663, |
| "grad_norm": 0.6924020648002625, |
| "learning_rate": 9.766949152542374e-06, |
| "loss": 0.1255, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.23407917383820998, |
| "grad_norm": 0.8936102986335754, |
| "learning_rate": 9.765889830508475e-06, |
| "loss": 0.1275, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.23513835562028332, |
| "grad_norm": 0.6406232118606567, |
| "learning_rate": 9.764830508474578e-06, |
| "loss": 0.1276, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.2361975374023567, |
| "grad_norm": 0.6062114834785461, |
| "learning_rate": 9.76377118644068e-06, |
| "loss": 0.1249, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.23725671918443003, |
| "grad_norm": 0.5603080987930298, |
| "learning_rate": 9.762711864406781e-06, |
| "loss": 0.1265, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.23831590096650337, |
| "grad_norm": 0.6400798559188843, |
| "learning_rate": 9.761652542372883e-06, |
| "loss": 0.1285, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.23937508274857672, |
| "grad_norm": 0.3875858187675476, |
| "learning_rate": 9.760593220338984e-06, |
| "loss": 0.1206, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.24043426453065006, |
| "grad_norm": 0.45703446865081787, |
| "learning_rate": 9.759533898305085e-06, |
| "loss": 0.1218, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.24149344631272343, |
| "grad_norm": 1.033738613128662, |
| "learning_rate": 9.758474576271187e-06, |
| "loss": 0.1252, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.24255262809479677, |
| "grad_norm": 0.8972013592720032, |
| "learning_rate": 9.757415254237288e-06, |
| "loss": 0.125, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.24361180987687012, |
| "grad_norm": 0.4154466986656189, |
| "learning_rate": 9.756355932203391e-06, |
| "loss": 0.1241, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.24467099165894346, |
| "grad_norm": 0.6829087734222412, |
| "learning_rate": 9.755296610169493e-06, |
| "loss": 0.1271, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.2457301734410168, |
| "grad_norm": 0.6784948706626892, |
| "learning_rate": 9.754237288135594e-06, |
| "loss": 0.1251, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.24678935522309017, |
| "grad_norm": 0.6223576664924622, |
| "learning_rate": 9.753177966101696e-06, |
| "loss": 0.1177, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.24784853700516352, |
| "grad_norm": 0.3889577388763428, |
| "learning_rate": 9.752118644067797e-06, |
| "loss": 0.1188, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.24890771878723686, |
| "grad_norm": 1.6076977252960205, |
| "learning_rate": 9.751059322033898e-06, |
| "loss": 0.127, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.2499669005693102, |
| "grad_norm": 1.3593311309814453, |
| "learning_rate": 9.75e-06, |
| "loss": 0.1275, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.2510260823513836, |
| "grad_norm": 0.4061569571495056, |
| "learning_rate": 9.748940677966103e-06, |
| "loss": 0.1145, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.2520852641334569, |
| "grad_norm": 0.49842679500579834, |
| "learning_rate": 9.747881355932204e-06, |
| "loss": 0.1229, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.25314444591553026, |
| "grad_norm": 0.6197004914283752, |
| "learning_rate": 9.746822033898306e-06, |
| "loss": 0.1238, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.2542036276976036, |
| "grad_norm": 0.6113892793655396, |
| "learning_rate": 9.745762711864407e-06, |
| "loss": 0.1223, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.25526280947967694, |
| "grad_norm": 0.5286921858787537, |
| "learning_rate": 9.744703389830509e-06, |
| "loss": 0.1232, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.2563219912617503, |
| "grad_norm": 1.1775904893875122, |
| "learning_rate": 9.74364406779661e-06, |
| "loss": 0.1297, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.25738117304382363, |
| "grad_norm": 1.4024102687835693, |
| "learning_rate": 9.742584745762713e-06, |
| "loss": 0.1312, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.258440354825897, |
| "grad_norm": 0.5369304418563843, |
| "learning_rate": 9.741525423728814e-06, |
| "loss": 0.12, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.2594995366079703, |
| "grad_norm": 0.6096199750900269, |
| "learning_rate": 9.740466101694916e-06, |
| "loss": 0.1237, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.2605587183900437, |
| "grad_norm": 0.5490643978118896, |
| "learning_rate": 9.739406779661017e-06, |
| "loss": 0.125, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.26161790017211706, |
| "grad_norm": 0.458108514547348, |
| "learning_rate": 9.73834745762712e-06, |
| "loss": 0.1226, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.2626770819541904, |
| "grad_norm": 0.7059030532836914, |
| "learning_rate": 9.737288135593222e-06, |
| "loss": 0.1214, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.26373626373626374, |
| "grad_norm": 0.5895451903343201, |
| "learning_rate": 9.736228813559323e-06, |
| "loss": 0.1162, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.26479544551833706, |
| "grad_norm": 1.0854839086532593, |
| "learning_rate": 9.735169491525425e-06, |
| "loss": 0.1257, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.26585462730041043, |
| "grad_norm": 0.3744925856590271, |
| "learning_rate": 9.734110169491526e-06, |
| "loss": 0.1193, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.2669138090824838, |
| "grad_norm": 0.49556052684783936, |
| "learning_rate": 9.733050847457627e-06, |
| "loss": 0.1194, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.2679729908645571, |
| "grad_norm": 0.6708354949951172, |
| "learning_rate": 9.731991525423729e-06, |
| "loss": 0.1232, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.2690321726466305, |
| "grad_norm": 0.6407080292701721, |
| "learning_rate": 9.730932203389832e-06, |
| "loss": 0.1152, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.2700913544287038, |
| "grad_norm": 0.48788413405418396, |
| "learning_rate": 9.729872881355933e-06, |
| "loss": 0.1187, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.27115053621077717, |
| "grad_norm": 0.8405669331550598, |
| "learning_rate": 9.728813559322035e-06, |
| "loss": 0.1193, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.27220971799285054, |
| "grad_norm": 1.2621759176254272, |
| "learning_rate": 9.727754237288136e-06, |
| "loss": 0.1222, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.27326889977492386, |
| "grad_norm": 0.521096408367157, |
| "learning_rate": 9.726694915254238e-06, |
| "loss": 0.1164, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.2743280815569972, |
| "grad_norm": 0.5464190244674683, |
| "learning_rate": 9.725635593220339e-06, |
| "loss": 0.123, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.27538726333907054, |
| "grad_norm": 0.6365671157836914, |
| "learning_rate": 9.72457627118644e-06, |
| "loss": 0.117, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.2764464451211439, |
| "grad_norm": 0.5571804046630859, |
| "learning_rate": 9.723516949152544e-06, |
| "loss": 0.1248, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.2775056269032173, |
| "grad_norm": 0.49087226390838623, |
| "learning_rate": 9.722457627118645e-06, |
| "loss": 0.1209, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.2785648086852906, |
| "grad_norm": 0.8053379654884338, |
| "learning_rate": 9.721398305084746e-06, |
| "loss": 0.1151, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.27962399046736397, |
| "grad_norm": 1.0285648107528687, |
| "learning_rate": 9.72033898305085e-06, |
| "loss": 0.1207, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.2806831722494373, |
| "grad_norm": 0.5413007736206055, |
| "learning_rate": 9.719279661016951e-06, |
| "loss": 0.1151, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.28174235403151066, |
| "grad_norm": 0.5470491647720337, |
| "learning_rate": 9.718220338983052e-06, |
| "loss": 0.1198, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.282801535813584, |
| "grad_norm": 0.6262738108634949, |
| "learning_rate": 9.717161016949154e-06, |
| "loss": 0.1191, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.28386071759565734, |
| "grad_norm": 0.591308057308197, |
| "learning_rate": 9.716101694915255e-06, |
| "loss": 0.1205, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.2849198993777307, |
| "grad_norm": 0.5280201435089111, |
| "learning_rate": 9.715042372881357e-06, |
| "loss": 0.1176, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.285979081159804, |
| "grad_norm": 0.9757447838783264, |
| "learning_rate": 9.713983050847458e-06, |
| "loss": 0.1221, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.2870382629418774, |
| "grad_norm": 0.5570871829986572, |
| "learning_rate": 9.712923728813561e-06, |
| "loss": 0.1154, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.28809744472395077, |
| "grad_norm": 0.7283833026885986, |
| "learning_rate": 9.711864406779662e-06, |
| "loss": 0.1161, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.2891566265060241, |
| "grad_norm": 0.6055071353912354, |
| "learning_rate": 9.710805084745764e-06, |
| "loss": 0.1161, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.29021580828809745, |
| "grad_norm": 0.6126815676689148, |
| "learning_rate": 9.709745762711865e-06, |
| "loss": 0.12, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.29127499007017077, |
| "grad_norm": 0.5102630853652954, |
| "learning_rate": 9.708686440677967e-06, |
| "loss": 0.1176, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.29233417185224414, |
| "grad_norm": 0.6075724959373474, |
| "learning_rate": 9.707627118644068e-06, |
| "loss": 0.1154, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.2933933536343175, |
| "grad_norm": 0.6419102549552917, |
| "learning_rate": 9.70656779661017e-06, |
| "loss": 0.117, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.2944525354163908, |
| "grad_norm": 0.6876809000968933, |
| "learning_rate": 9.705508474576271e-06, |
| "loss": 0.1185, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.2955117171984642, |
| "grad_norm": 0.46148595213890076, |
| "learning_rate": 9.704449152542374e-06, |
| "loss": 0.1181, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.2965708989805375, |
| "grad_norm": 0.4858454167842865, |
| "learning_rate": 9.703389830508475e-06, |
| "loss": 0.1131, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.2976300807626109, |
| "grad_norm": 0.6200568675994873, |
| "learning_rate": 9.702330508474577e-06, |
| "loss": 0.1183, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.29868926254468425, |
| "grad_norm": 0.6180820465087891, |
| "learning_rate": 9.701271186440678e-06, |
| "loss": 0.1157, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.29974844432675757, |
| "grad_norm": 0.4233243465423584, |
| "learning_rate": 9.70021186440678e-06, |
| "loss": 0.1109, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.30080762610883094, |
| "grad_norm": 0.8406049013137817, |
| "learning_rate": 9.699152542372881e-06, |
| "loss": 0.11, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.30186680789090425, |
| "grad_norm": 1.2740683555603027, |
| "learning_rate": 9.698093220338984e-06, |
| "loss": 0.1238, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.3029259896729776, |
| "grad_norm": 0.7582654356956482, |
| "learning_rate": 9.697033898305086e-06, |
| "loss": 0.1141, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.303985171455051, |
| "grad_norm": 0.5228947401046753, |
| "learning_rate": 9.695974576271187e-06, |
| "loss": 0.117, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.3050443532371243, |
| "grad_norm": 0.6550778150558472, |
| "learning_rate": 9.69491525423729e-06, |
| "loss": 0.1191, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.3061035350191977, |
| "grad_norm": 0.8009293675422668, |
| "learning_rate": 9.693855932203392e-06, |
| "loss": 0.122, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.307162716801271, |
| "grad_norm": 0.5879274606704712, |
| "learning_rate": 9.692796610169493e-06, |
| "loss": 0.1182, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.30822189858334437, |
| "grad_norm": 0.49802204966545105, |
| "learning_rate": 9.691737288135594e-06, |
| "loss": 0.118, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.30928108036541774, |
| "grad_norm": 0.8825365900993347, |
| "learning_rate": 9.690677966101696e-06, |
| "loss": 0.1187, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.31034026214749105, |
| "grad_norm": 1.3932257890701294, |
| "learning_rate": 9.689618644067797e-06, |
| "loss": 0.1225, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.3113994439295644, |
| "grad_norm": 0.6308886408805847, |
| "learning_rate": 9.688559322033899e-06, |
| "loss": 0.1112, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.31245862571163774, |
| "grad_norm": 0.5725395083427429, |
| "learning_rate": 9.6875e-06, |
| "loss": 0.1133, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.3135178074937111, |
| "grad_norm": 0.6921817064285278, |
| "learning_rate": 9.686440677966103e-06, |
| "loss": 0.1158, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.3145769892757845, |
| "grad_norm": 0.5869089961051941, |
| "learning_rate": 9.685381355932205e-06, |
| "loss": 0.1153, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.3156361710578578, |
| "grad_norm": 0.6975012421607971, |
| "learning_rate": 9.684322033898306e-06, |
| "loss": 0.1163, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.31669535283993117, |
| "grad_norm": 0.9162189960479736, |
| "learning_rate": 9.683262711864407e-06, |
| "loss": 0.1161, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.3177545346220045, |
| "grad_norm": 0.5748836398124695, |
| "learning_rate": 9.682203389830509e-06, |
| "loss": 0.1187, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.31881371640407785, |
| "grad_norm": 0.554711103439331, |
| "learning_rate": 9.68114406779661e-06, |
| "loss": 0.1188, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.3198728981861512, |
| "grad_norm": 0.6140995025634766, |
| "learning_rate": 9.680084745762712e-06, |
| "loss": 0.1157, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.32093207996822454, |
| "grad_norm": 0.5292292237281799, |
| "learning_rate": 9.679025423728815e-06, |
| "loss": 0.1134, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.3219912617502979, |
| "grad_norm": 0.4888879358768463, |
| "learning_rate": 9.677966101694916e-06, |
| "loss": 0.116, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.3230504435323712, |
| "grad_norm": 0.5932160019874573, |
| "learning_rate": 9.676906779661017e-06, |
| "loss": 0.1167, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.3241096253144446, |
| "grad_norm": 0.5574305057525635, |
| "learning_rate": 9.67584745762712e-06, |
| "loss": 0.1201, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.32516880709651796, |
| "grad_norm": 1.0020751953125, |
| "learning_rate": 9.674788135593222e-06, |
| "loss": 0.1196, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.3262279888785913, |
| "grad_norm": 0.5251221656799316, |
| "learning_rate": 9.673728813559323e-06, |
| "loss": 0.1189, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.32728717066066465, |
| "grad_norm": 0.5894964933395386, |
| "learning_rate": 9.672669491525425e-06, |
| "loss": 0.1154, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.32834635244273797, |
| "grad_norm": 0.765792191028595, |
| "learning_rate": 9.671610169491526e-06, |
| "loss": 0.116, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.32940553422481134, |
| "grad_norm": 0.5536498427391052, |
| "learning_rate": 9.670550847457628e-06, |
| "loss": 0.1161, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.3304647160068847, |
| "grad_norm": 0.4105791449546814, |
| "learning_rate": 9.669491525423729e-06, |
| "loss": 0.1127, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.331523897788958, |
| "grad_norm": 1.1248149871826172, |
| "learning_rate": 9.668432203389832e-06, |
| "loss": 0.1197, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.3325830795710314, |
| "grad_norm": 1.1392658948898315, |
| "learning_rate": 9.667372881355934e-06, |
| "loss": 0.1169, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.3336422613531047, |
| "grad_norm": 0.37222108244895935, |
| "learning_rate": 9.666313559322035e-06, |
| "loss": 0.1148, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.3347014431351781, |
| "grad_norm": 0.48842084407806396, |
| "learning_rate": 9.665254237288136e-06, |
| "loss": 0.1154, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.33576062491725145, |
| "grad_norm": 0.5393485426902771, |
| "learning_rate": 9.664194915254238e-06, |
| "loss": 0.1186, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.33681980669932476, |
| "grad_norm": 0.556902289390564, |
| "learning_rate": 9.66313559322034e-06, |
| "loss": 0.1205, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.33787898848139813, |
| "grad_norm": 0.49229300022125244, |
| "learning_rate": 9.66207627118644e-06, |
| "loss": 0.1172, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.33893817026347145, |
| "grad_norm": 0.5494566559791565, |
| "learning_rate": 9.661016949152544e-06, |
| "loss": 0.1169, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.3399973520455448, |
| "grad_norm": 1.6041209697723389, |
| "learning_rate": 9.659957627118645e-06, |
| "loss": 0.1222, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.3410565338276182, |
| "grad_norm": 1.3501816987991333, |
| "learning_rate": 9.658898305084747e-06, |
| "loss": 0.12, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.3421157156096915, |
| "grad_norm": 0.6245529651641846, |
| "learning_rate": 9.657838983050848e-06, |
| "loss": 0.1149, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.3431748973917649, |
| "grad_norm": 0.4942384958267212, |
| "learning_rate": 9.65677966101695e-06, |
| "loss": 0.1179, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.3442340791738382, |
| "grad_norm": 0.5967822074890137, |
| "learning_rate": 9.65572033898305e-06, |
| "loss": 0.1183, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.34529326095591156, |
| "grad_norm": 0.5735800266265869, |
| "learning_rate": 9.654661016949152e-06, |
| "loss": 0.1187, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.34635244273798493, |
| "grad_norm": 0.4783755838871002, |
| "learning_rate": 9.653601694915255e-06, |
| "loss": 0.1198, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.34741162452005825, |
| "grad_norm": 1.1743202209472656, |
| "learning_rate": 9.652542372881357e-06, |
| "loss": 0.1192, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.3484708063021316, |
| "grad_norm": 1.20207679271698, |
| "learning_rate": 9.651483050847458e-06, |
| "loss": 0.1199, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.34952998808420493, |
| "grad_norm": 0.6972348093986511, |
| "learning_rate": 9.650423728813561e-06, |
| "loss": 0.1176, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.3505891698662783, |
| "grad_norm": 0.4509320557117462, |
| "learning_rate": 9.649364406779663e-06, |
| "loss": 0.1165, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.3516483516483517, |
| "grad_norm": 0.5181882381439209, |
| "learning_rate": 9.648305084745764e-06, |
| "loss": 0.1183, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.352707533430425, |
| "grad_norm": 0.5092886686325073, |
| "learning_rate": 9.647245762711865e-06, |
| "loss": 0.1156, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.35376671521249836, |
| "grad_norm": 0.4488472044467926, |
| "learning_rate": 9.646186440677967e-06, |
| "loss": 0.1174, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.3548258969945717, |
| "grad_norm": 0.4209890365600586, |
| "learning_rate": 9.645127118644068e-06, |
| "loss": 0.1139, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.35588507877664505, |
| "grad_norm": 0.888431966304779, |
| "learning_rate": 9.64406779661017e-06, |
| "loss": 0.1137, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.35694426055871836, |
| "grad_norm": 0.5200555324554443, |
| "learning_rate": 9.643008474576273e-06, |
| "loss": 0.1088, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.35800344234079173, |
| "grad_norm": 0.3198027014732361, |
| "learning_rate": 9.641949152542374e-06, |
| "loss": 0.1082, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.3590626241228651, |
| "grad_norm": 0.31940215826034546, |
| "learning_rate": 9.640889830508476e-06, |
| "loss": 0.1104, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.3601218059049384, |
| "grad_norm": 0.3348517417907715, |
| "learning_rate": 9.639830508474577e-06, |
| "loss": 0.1102, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.3611809876870118, |
| "grad_norm": 0.4220709502696991, |
| "learning_rate": 9.638771186440678e-06, |
| "loss": 0.1137, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.3622401694690851, |
| "grad_norm": 0.5341797471046448, |
| "learning_rate": 9.63771186440678e-06, |
| "loss": 0.1136, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.3632993512511585, |
| "grad_norm": 0.3537216782569885, |
| "learning_rate": 9.636652542372881e-06, |
| "loss": 0.1115, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.36435853303323185, |
| "grad_norm": 1.5838582515716553, |
| "learning_rate": 9.635593220338983e-06, |
| "loss": 0.1162, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.36541771481530516, |
| "grad_norm": 0.6268835067749023, |
| "learning_rate": 9.634533898305086e-06, |
| "loss": 0.1127, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.36647689659737853, |
| "grad_norm": 0.5873063206672668, |
| "learning_rate": 9.633474576271187e-06, |
| "loss": 0.107, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.36753607837945185, |
| "grad_norm": 0.6088775992393494, |
| "learning_rate": 9.632415254237289e-06, |
| "loss": 0.1118, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.3685952601615252, |
| "grad_norm": 0.41898828744888306, |
| "learning_rate": 9.631355932203392e-06, |
| "loss": 0.1156, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.3696544419435986, |
| "grad_norm": 0.5519065856933594, |
| "learning_rate": 9.630296610169493e-06, |
| "loss": 0.1145, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.3707136237256719, |
| "grad_norm": 0.8987408876419067, |
| "learning_rate": 9.629237288135595e-06, |
| "loss": 0.1182, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.3717728055077453, |
| "grad_norm": 0.7230226993560791, |
| "learning_rate": 9.628177966101696e-06, |
| "loss": 0.1214, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.3728319872898186, |
| "grad_norm": 0.4950161576271057, |
| "learning_rate": 9.627118644067797e-06, |
| "loss": 0.1124, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.37389116907189196, |
| "grad_norm": 0.4516774117946625, |
| "learning_rate": 9.626059322033899e-06, |
| "loss": 0.1096, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.37495035085396533, |
| "grad_norm": 0.4239518940448761, |
| "learning_rate": 9.625e-06, |
| "loss": 0.1112, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.37600953263603865, |
| "grad_norm": 0.35730719566345215, |
| "learning_rate": 9.623940677966103e-06, |
| "loss": 0.1075, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.377068714418112, |
| "grad_norm": 1.1237397193908691, |
| "learning_rate": 9.622881355932205e-06, |
| "loss": 0.1123, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.37812789620018533, |
| "grad_norm": 0.8006128668785095, |
| "learning_rate": 9.621822033898306e-06, |
| "loss": 0.113, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.3791870779822587, |
| "grad_norm": 0.537632942199707, |
| "learning_rate": 9.620762711864408e-06, |
| "loss": 0.1125, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.3802462597643321, |
| "grad_norm": 0.44080790877342224, |
| "learning_rate": 9.619703389830509e-06, |
| "loss": 0.1113, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.3813054415464054, |
| "grad_norm": 0.4815836250782013, |
| "learning_rate": 9.61864406779661e-06, |
| "loss": 0.1136, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.38236462332847876, |
| "grad_norm": 0.4750889539718628, |
| "learning_rate": 9.617584745762712e-06, |
| "loss": 0.1143, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.3834238051105521, |
| "grad_norm": 0.4339998662471771, |
| "learning_rate": 9.616525423728815e-06, |
| "loss": 0.1107, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.38448298689262544, |
| "grad_norm": 0.45202603936195374, |
| "learning_rate": 9.615466101694916e-06, |
| "loss": 0.1082, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.3855421686746988, |
| "grad_norm": 0.8911429643630981, |
| "learning_rate": 9.614406779661018e-06, |
| "loss": 0.113, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.38660135045677213, |
| "grad_norm": 0.7170503735542297, |
| "learning_rate": 9.613347457627119e-06, |
| "loss": 0.1097, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.3876605322388455, |
| "grad_norm": 0.42108699679374695, |
| "learning_rate": 9.61228813559322e-06, |
| "loss": 0.1126, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.3887197140209188, |
| "grad_norm": 0.4251386225223541, |
| "learning_rate": 9.611228813559322e-06, |
| "loss": 0.1116, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.3897788958029922, |
| "grad_norm": 0.49028900265693665, |
| "learning_rate": 9.610169491525423e-06, |
| "loss": 0.1134, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.39083807758506556, |
| "grad_norm": 0.44346433877944946, |
| "learning_rate": 9.609110169491526e-06, |
| "loss": 0.1095, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.3918972593671389, |
| "grad_norm": 0.3316284418106079, |
| "learning_rate": 9.608050847457628e-06, |
| "loss": 0.1127, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.39295644114921224, |
| "grad_norm": 0.5099537968635559, |
| "learning_rate": 9.60699152542373e-06, |
| "loss": 0.1081, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.39401562293128556, |
| "grad_norm": 0.30008023977279663, |
| "learning_rate": 9.605932203389832e-06, |
| "loss": 0.1075, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.39507480471335893, |
| "grad_norm": 0.5581114888191223, |
| "learning_rate": 9.604872881355934e-06, |
| "loss": 0.1099, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.3961339864954323, |
| "grad_norm": 0.5502544641494751, |
| "learning_rate": 9.603813559322035e-06, |
| "loss": 0.1061, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.3971931682775056, |
| "grad_norm": 0.33419784903526306, |
| "learning_rate": 9.602754237288137e-06, |
| "loss": 0.1105, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.398252350059579, |
| "grad_norm": 0.45939692854881287, |
| "learning_rate": 9.601694915254238e-06, |
| "loss": 0.1075, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.3993115318416523, |
| "grad_norm": 0.45434147119522095, |
| "learning_rate": 9.60063559322034e-06, |
| "loss": 0.113, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.40037071362372567, |
| "grad_norm": 0.5118069648742676, |
| "learning_rate": 9.59957627118644e-06, |
| "loss": 0.1134, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.40142989540579904, |
| "grad_norm": 0.4450303316116333, |
| "learning_rate": 9.598516949152544e-06, |
| "loss": 0.1101, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.40248907718787236, |
| "grad_norm": 0.6912146210670471, |
| "learning_rate": 9.597457627118645e-06, |
| "loss": 0.1131, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.40354825896994573, |
| "grad_norm": 0.4503055810928345, |
| "learning_rate": 9.596398305084747e-06, |
| "loss": 0.1146, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.40460744075201904, |
| "grad_norm": 0.46015700697898865, |
| "learning_rate": 9.595338983050848e-06, |
| "loss": 0.1059, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.4056666225340924, |
| "grad_norm": 0.48054563999176025, |
| "learning_rate": 9.59427966101695e-06, |
| "loss": 0.1079, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.4067258043161658, |
| "grad_norm": 0.32423877716064453, |
| "learning_rate": 9.593220338983051e-06, |
| "loss": 0.1105, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.4077849860982391, |
| "grad_norm": 0.5704624056816101, |
| "learning_rate": 9.592161016949152e-06, |
| "loss": 0.1089, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.40884416788031247, |
| "grad_norm": 0.8289555907249451, |
| "learning_rate": 9.591101694915255e-06, |
| "loss": 0.1126, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.4099033496623858, |
| "grad_norm": 0.4352301359176636, |
| "learning_rate": 9.590042372881357e-06, |
| "loss": 0.1074, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.41096253144445916, |
| "grad_norm": 0.3718428313732147, |
| "learning_rate": 9.588983050847458e-06, |
| "loss": 0.1077, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.4120217132265325, |
| "grad_norm": 0.49498993158340454, |
| "learning_rate": 9.58792372881356e-06, |
| "loss": 0.1086, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.41308089500860584, |
| "grad_norm": 0.3933676481246948, |
| "learning_rate": 9.586864406779663e-06, |
| "loss": 0.1102, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.4141400767906792, |
| "grad_norm": 0.2939629554748535, |
| "learning_rate": 9.585805084745764e-06, |
| "loss": 0.1044, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.4151992585727525, |
| "grad_norm": 0.5933998823165894, |
| "learning_rate": 9.584745762711866e-06, |
| "loss": 0.1078, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.4162584403548259, |
| "grad_norm": 0.6409616470336914, |
| "learning_rate": 9.583686440677967e-06, |
| "loss": 0.1115, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.41731762213689927, |
| "grad_norm": 0.5755448341369629, |
| "learning_rate": 9.582627118644068e-06, |
| "loss": 0.1114, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.4183768039189726, |
| "grad_norm": 0.4892319440841675, |
| "learning_rate": 9.58156779661017e-06, |
| "loss": 0.1103, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.41943598570104595, |
| "grad_norm": 0.5357713103294373, |
| "learning_rate": 9.580508474576273e-06, |
| "loss": 0.1082, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.42049516748311927, |
| "grad_norm": 0.4296826720237732, |
| "learning_rate": 9.579449152542374e-06, |
| "loss": 0.1051, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.42155434926519264, |
| "grad_norm": 0.31759247183799744, |
| "learning_rate": 9.578389830508476e-06, |
| "loss": 0.1061, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.422613531047266, |
| "grad_norm": 0.6764819622039795, |
| "learning_rate": 9.577330508474577e-06, |
| "loss": 0.1051, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.4236727128293393, |
| "grad_norm": 0.8692288994789124, |
| "learning_rate": 9.576271186440679e-06, |
| "loss": 0.1089, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.4247318946114127, |
| "grad_norm": 0.2702364921569824, |
| "learning_rate": 9.57521186440678e-06, |
| "loss": 0.1056, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.425791076393486, |
| "grad_norm": 0.35407984256744385, |
| "learning_rate": 9.574152542372881e-06, |
| "loss": 0.1104, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.4268502581755594, |
| "grad_norm": 0.36321505904197693, |
| "learning_rate": 9.573093220338983e-06, |
| "loss": 0.1019, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.42790943995763275, |
| "grad_norm": 0.42492276430130005, |
| "learning_rate": 9.572033898305086e-06, |
| "loss": 0.1082, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.42896862173970607, |
| "grad_norm": 0.6944525241851807, |
| "learning_rate": 9.570974576271187e-06, |
| "loss": 0.1092, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.43002780352177944, |
| "grad_norm": 0.3112322986125946, |
| "learning_rate": 9.569915254237289e-06, |
| "loss": 0.1041, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.43108698530385275, |
| "grad_norm": 0.3997170031070709, |
| "learning_rate": 9.56885593220339e-06, |
| "loss": 0.1057, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.4321461670859261, |
| "grad_norm": 0.5635967254638672, |
| "learning_rate": 9.567796610169492e-06, |
| "loss": 0.1078, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.4332053488679995, |
| "grad_norm": 0.7858012914657593, |
| "learning_rate": 9.566737288135593e-06, |
| "loss": 0.1077, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.4342645306500728, |
| "grad_norm": 1.2377523183822632, |
| "learning_rate": 9.565677966101694e-06, |
| "loss": 0.1046, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.4353237124321462, |
| "grad_norm": 0.4631684124469757, |
| "learning_rate": 9.564618644067798e-06, |
| "loss": 0.1097, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.4363828942142195, |
| "grad_norm": 0.4029408097267151, |
| "learning_rate": 9.563559322033899e-06, |
| "loss": 0.1075, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.43744207599629287, |
| "grad_norm": 0.977272093296051, |
| "learning_rate": 9.562500000000002e-06, |
| "loss": 0.1108, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.43850125777836624, |
| "grad_norm": 0.7255805134773254, |
| "learning_rate": 9.561440677966103e-06, |
| "loss": 0.1108, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.43956043956043955, |
| "grad_norm": 0.43987271189689636, |
| "learning_rate": 9.560381355932205e-06, |
| "loss": 0.1066, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.4406196213425129, |
| "grad_norm": 0.49002915620803833, |
| "learning_rate": 9.559322033898306e-06, |
| "loss": 0.1081, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.44167880312458624, |
| "grad_norm": 0.49192023277282715, |
| "learning_rate": 9.558262711864408e-06, |
| "loss": 0.1077, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.4427379849066596, |
| "grad_norm": 0.489654004573822, |
| "learning_rate": 9.557203389830509e-06, |
| "loss": 0.1092, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.443797166688733, |
| "grad_norm": 0.7146844863891602, |
| "learning_rate": 9.55614406779661e-06, |
| "loss": 0.1062, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.4448563484708063, |
| "grad_norm": 0.6408959031105042, |
| "learning_rate": 9.555084745762712e-06, |
| "loss": 0.1092, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.44591553025287967, |
| "grad_norm": 0.7169507145881653, |
| "learning_rate": 9.554025423728815e-06, |
| "loss": 0.1102, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.446974712034953, |
| "grad_norm": 0.6536822319030762, |
| "learning_rate": 9.552966101694916e-06, |
| "loss": 0.1117, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.44803389381702635, |
| "grad_norm": 0.5074172019958496, |
| "learning_rate": 9.551906779661018e-06, |
| "loss": 0.1141, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.4490930755990997, |
| "grad_norm": 0.5577737092971802, |
| "learning_rate": 9.55084745762712e-06, |
| "loss": 0.113, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.45015225738117304, |
| "grad_norm": 0.43877550959587097, |
| "learning_rate": 9.54978813559322e-06, |
| "loss": 0.1088, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.4512114391632464, |
| "grad_norm": 0.4872395396232605, |
| "learning_rate": 9.548728813559322e-06, |
| "loss": 0.1058, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.4522706209453197, |
| "grad_norm": 0.5919830799102783, |
| "learning_rate": 9.547669491525423e-06, |
| "loss": 0.1055, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.4533298027273931, |
| "grad_norm": 0.711881160736084, |
| "learning_rate": 9.546610169491527e-06, |
| "loss": 0.1096, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.45438898450946646, |
| "grad_norm": 0.571658194065094, |
| "learning_rate": 9.545550847457628e-06, |
| "loss": 0.11, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.4554481662915398, |
| "grad_norm": 0.4957394301891327, |
| "learning_rate": 9.54449152542373e-06, |
| "loss": 0.1092, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.45650734807361315, |
| "grad_norm": 0.473998099565506, |
| "learning_rate": 9.54343220338983e-06, |
| "loss": 0.1075, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.45756652985568647, |
| "grad_norm": 0.5281217098236084, |
| "learning_rate": 9.542372881355934e-06, |
| "loss": 0.1045, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.45862571163775984, |
| "grad_norm": 0.4293951690196991, |
| "learning_rate": 9.541313559322035e-06, |
| "loss": 0.1034, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.45968489341983315, |
| "grad_norm": 0.4172426760196686, |
| "learning_rate": 9.540254237288137e-06, |
| "loss": 0.106, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.4607440752019065, |
| "grad_norm": 0.4815566837787628, |
| "learning_rate": 9.539194915254238e-06, |
| "loss": 0.1046, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.4618032569839799, |
| "grad_norm": 0.9885016679763794, |
| "learning_rate": 9.53813559322034e-06, |
| "loss": 0.1067, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.4628624387660532, |
| "grad_norm": 0.5439627170562744, |
| "learning_rate": 9.537076271186441e-06, |
| "loss": 0.1033, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.4639216205481266, |
| "grad_norm": 0.47244539856910706, |
| "learning_rate": 9.536016949152544e-06, |
| "loss": 0.106, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.4649808023301999, |
| "grad_norm": 0.3577944338321686, |
| "learning_rate": 9.534957627118645e-06, |
| "loss": 0.1035, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.46603998411227326, |
| "grad_norm": 0.37519779801368713, |
| "learning_rate": 9.533898305084747e-06, |
| "loss": 0.1052, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.46709916589434664, |
| "grad_norm": 0.5341219902038574, |
| "learning_rate": 9.532838983050848e-06, |
| "loss": 0.104, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.46815834767641995, |
| "grad_norm": 0.5906185507774353, |
| "learning_rate": 9.53177966101695e-06, |
| "loss": 0.1093, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.4692175294584933, |
| "grad_norm": 0.7719901204109192, |
| "learning_rate": 9.530720338983051e-06, |
| "loss": 0.1042, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.47027671124056664, |
| "grad_norm": 0.43853050470352173, |
| "learning_rate": 9.529661016949153e-06, |
| "loss": 0.1033, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.47133589302264, |
| "grad_norm": 0.41483408212661743, |
| "learning_rate": 9.528601694915256e-06, |
| "loss": 0.1037, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.4723950748047134, |
| "grad_norm": 0.38403433561325073, |
| "learning_rate": 9.527542372881357e-06, |
| "loss": 0.0989, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.4734542565867867, |
| "grad_norm": 0.41726627945899963, |
| "learning_rate": 9.526483050847458e-06, |
| "loss": 0.1047, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.47451343836886006, |
| "grad_norm": 0.6198919415473938, |
| "learning_rate": 9.52542372881356e-06, |
| "loss": 0.1052, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.4755726201509334, |
| "grad_norm": 0.37252992391586304, |
| "learning_rate": 9.524364406779661e-06, |
| "loss": 0.105, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.47663180193300675, |
| "grad_norm": 0.48047640919685364, |
| "learning_rate": 9.523305084745763e-06, |
| "loss": 0.1053, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.4776909837150801, |
| "grad_norm": 0.4455186724662781, |
| "learning_rate": 9.522245762711864e-06, |
| "loss": 0.1037, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.47875016549715343, |
| "grad_norm": 0.5429503917694092, |
| "learning_rate": 9.521186440677967e-06, |
| "loss": 0.1018, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.4798093472792268, |
| "grad_norm": 0.3205244243144989, |
| "learning_rate": 9.520127118644069e-06, |
| "loss": 0.1006, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.4808685290613001, |
| "grad_norm": 0.7264949083328247, |
| "learning_rate": 9.51906779661017e-06, |
| "loss": 0.0994, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.4819277108433735, |
| "grad_norm": 0.3654707968235016, |
| "learning_rate": 9.518008474576273e-06, |
| "loss": 0.1006, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.48298689262544686, |
| "grad_norm": 0.419162780046463, |
| "learning_rate": 9.516949152542375e-06, |
| "loss": 0.1024, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.4840460744075202, |
| "grad_norm": 0.3804956078529358, |
| "learning_rate": 9.515889830508476e-06, |
| "loss": 0.1027, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.48510525618959355, |
| "grad_norm": 0.44833818078041077, |
| "learning_rate": 9.514830508474577e-06, |
| "loss": 0.1013, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.48616443797166686, |
| "grad_norm": 0.5254035592079163, |
| "learning_rate": 9.513771186440679e-06, |
| "loss": 0.1039, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.48722361975374023, |
| "grad_norm": 0.399044394493103, |
| "learning_rate": 9.51271186440678e-06, |
| "loss": 0.1075, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.4882828015358136, |
| "grad_norm": 0.38109609484672546, |
| "learning_rate": 9.511652542372882e-06, |
| "loss": 0.0988, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.4893419833178869, |
| "grad_norm": 0.5939087271690369, |
| "learning_rate": 9.510593220338985e-06, |
| "loss": 0.1009, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.4904011650999603, |
| "grad_norm": 0.407850056886673, |
| "learning_rate": 9.509533898305086e-06, |
| "loss": 0.1005, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.4914603468820336, |
| "grad_norm": 0.523597240447998, |
| "learning_rate": 9.508474576271188e-06, |
| "loss": 0.102, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.492519528664107, |
| "grad_norm": 0.7457444071769714, |
| "learning_rate": 9.507415254237289e-06, |
| "loss": 0.1037, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.49357871044618035, |
| "grad_norm": 0.4090770483016968, |
| "learning_rate": 9.50635593220339e-06, |
| "loss": 0.1057, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.49463789222825366, |
| "grad_norm": 0.3972409665584564, |
| "learning_rate": 9.505296610169492e-06, |
| "loss": 0.1028, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.49569707401032703, |
| "grad_norm": 0.38050827383995056, |
| "learning_rate": 9.504237288135593e-06, |
| "loss": 0.1049, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.49675625579240035, |
| "grad_norm": 0.3014926612377167, |
| "learning_rate": 9.503177966101695e-06, |
| "loss": 0.1012, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.4978154375744737, |
| "grad_norm": 0.42940622568130493, |
| "learning_rate": 9.502118644067798e-06, |
| "loss": 0.1065, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.4988746193565471, |
| "grad_norm": 0.2840025722980499, |
| "learning_rate": 9.501059322033899e-06, |
| "loss": 0.103, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.4999338011386204, |
| "grad_norm": 0.3117368817329407, |
| "learning_rate": 9.5e-06, |
| "loss": 0.1035, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.4999338011386204, |
| "eval_accuracy": 0.9803, |
| "eval_best_f1_from_thresholding": 0.15450643776824033, |
| "eval_loss": 0.13234694302082062, |
| "eval_matthews_corrcoef": 0.14806398220854253, |
| "eval_model_preparation_time": 0.0033, |
| "eval_negative_class_f1": 0.9900338948752972, |
| "eval_negative_class_precision": 0.992393509127789, |
| "eval_negative_class_recall": 0.9876854749167255, |
| "eval_positive_class_f1": 0.15450643776824036, |
| "eval_positive_class_precision": 0.12857142857142856, |
| "eval_positive_class_recall": 0.1935483870967742, |
| "eval_roc_auc": 0.8230006805224067, |
| "eval_runtime": 20.7498, |
| "eval_samples_per_second": 481.933, |
| "eval_steps_per_second": 7.566, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.5009929829206937, |
| "grad_norm": 1.087823748588562, |
| "learning_rate": 9.498940677966102e-06, |
| "loss": 0.1062, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.5020521647027671, |
| "grad_norm": 1.0358397960662842, |
| "learning_rate": 9.497881355932203e-06, |
| "loss": 0.1086, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.5031113464848405, |
| "grad_norm": 0.5633784532546997, |
| "learning_rate": 9.496822033898306e-06, |
| "loss": 0.1031, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.5041705282669138, |
| "grad_norm": 0.5209754705429077, |
| "learning_rate": 9.495762711864408e-06, |
| "loss": 0.1047, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.5052297100489872, |
| "grad_norm": 0.5898464322090149, |
| "learning_rate": 9.49470338983051e-06, |
| "loss": 0.1075, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.5062888918310605, |
| "grad_norm": 0.49827930331230164, |
| "learning_rate": 9.49364406779661e-06, |
| "loss": 0.104, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.5073480736131338, |
| "grad_norm": 0.4471459686756134, |
| "learning_rate": 9.492584745762712e-06, |
| "loss": 0.1048, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.5084072553952071, |
| "grad_norm": 0.3593554198741913, |
| "learning_rate": 9.491525423728815e-06, |
| "loss": 0.1025, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.5094664371772806, |
| "grad_norm": 0.5244583487510681, |
| "learning_rate": 9.490466101694917e-06, |
| "loss": 0.1067, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.5105256189593539, |
| "grad_norm": 1.4169367551803589, |
| "learning_rate": 9.489406779661018e-06, |
| "loss": 0.106, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.5115848007414272, |
| "grad_norm": 1.2409381866455078, |
| "learning_rate": 9.48834745762712e-06, |
| "loss": 0.1051, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.5126439825235006, |
| "grad_norm": 0.39561715722084045, |
| "learning_rate": 9.48728813559322e-06, |
| "loss": 0.1036, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.513703164305574, |
| "grad_norm": 0.5099272131919861, |
| "learning_rate": 9.486228813559322e-06, |
| "loss": 0.1032, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.5147623460876473, |
| "grad_norm": 0.5082338452339172, |
| "learning_rate": 9.485169491525424e-06, |
| "loss": 0.1039, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.5158215278697207, |
| "grad_norm": 0.46735939383506775, |
| "learning_rate": 9.484110169491527e-06, |
| "loss": 0.1, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.516880709651794, |
| "grad_norm": 0.4456905126571655, |
| "learning_rate": 9.483050847457628e-06, |
| "loss": 0.1025, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.5179398914338673, |
| "grad_norm": 0.5637014508247375, |
| "learning_rate": 9.48199152542373e-06, |
| "loss": 0.0996, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.5189990732159406, |
| "grad_norm": 1.9828752279281616, |
| "learning_rate": 9.480932203389831e-06, |
| "loss": 0.1019, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.5200582549980141, |
| "grad_norm": 0.8592916131019592, |
| "learning_rate": 9.479872881355932e-06, |
| "loss": 0.103, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.5211174367800874, |
| "grad_norm": 0.8089073896408081, |
| "learning_rate": 9.478813559322034e-06, |
| "loss": 0.1034, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.5221766185621607, |
| "grad_norm": 0.36452987790107727, |
| "learning_rate": 9.477754237288135e-06, |
| "loss": 0.1049, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.5232358003442341, |
| "grad_norm": 0.4801510274410248, |
| "learning_rate": 9.476694915254238e-06, |
| "loss": 0.1092, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.5242949821263074, |
| "grad_norm": 0.4750489592552185, |
| "learning_rate": 9.47563559322034e-06, |
| "loss": 0.1038, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.5253541639083807, |
| "grad_norm": 0.46922165155410767, |
| "learning_rate": 9.474576271186441e-06, |
| "loss": 0.1057, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.5264133456904542, |
| "grad_norm": 0.34764742851257324, |
| "learning_rate": 9.473516949152544e-06, |
| "loss": 0.0998, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.5274725274725275, |
| "grad_norm": 0.5010620951652527, |
| "learning_rate": 9.472457627118646e-06, |
| "loss": 0.1021, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.5285317092546008, |
| "grad_norm": 0.8562049865722656, |
| "learning_rate": 9.471398305084747e-06, |
| "loss": 0.1017, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.5295908910366741, |
| "grad_norm": 0.8053882122039795, |
| "learning_rate": 9.470338983050848e-06, |
| "loss": 0.1029, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.5306500728187475, |
| "grad_norm": 0.55223548412323, |
| "learning_rate": 9.46927966101695e-06, |
| "loss": 0.1075, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.5317092546008209, |
| "grad_norm": 0.5718464255332947, |
| "learning_rate": 9.468220338983051e-06, |
| "loss": 0.1007, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.5327684363828942, |
| "grad_norm": 0.42768341302871704, |
| "learning_rate": 9.467161016949153e-06, |
| "loss": 0.1018, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.5338276181649676, |
| "grad_norm": 0.6023754477500916, |
| "learning_rate": 9.466101694915256e-06, |
| "loss": 0.1071, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.5348867999470409, |
| "grad_norm": 0.42522376775741577, |
| "learning_rate": 9.465042372881357e-06, |
| "loss": 0.103, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.5359459817291142, |
| "grad_norm": 0.316423624753952, |
| "learning_rate": 9.463983050847459e-06, |
| "loss": 0.0995, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.5370051635111877, |
| "grad_norm": 0.8633352518081665, |
| "learning_rate": 9.46292372881356e-06, |
| "loss": 0.103, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.538064345293261, |
| "grad_norm": 0.4316701889038086, |
| "learning_rate": 9.461864406779661e-06, |
| "loss": 0.0972, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.5391235270753343, |
| "grad_norm": 0.3259594142436981, |
| "learning_rate": 9.460805084745763e-06, |
| "loss": 0.0972, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.5401827088574076, |
| "grad_norm": 0.6332740187644958, |
| "learning_rate": 9.459745762711864e-06, |
| "loss": 0.1005, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.541241890639481, |
| "grad_norm": 0.2722731828689575, |
| "learning_rate": 9.458686440677967e-06, |
| "loss": 0.0972, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.5423010724215543, |
| "grad_norm": 0.5190332531929016, |
| "learning_rate": 9.457627118644069e-06, |
| "loss": 0.1052, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.5433602542036277, |
| "grad_norm": 0.5916107296943665, |
| "learning_rate": 9.45656779661017e-06, |
| "loss": 0.1066, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.5444194359857011, |
| "grad_norm": 0.4169541895389557, |
| "learning_rate": 9.455508474576272e-06, |
| "loss": 0.099, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.5454786177677744, |
| "grad_norm": 0.6240091919898987, |
| "learning_rate": 9.454449152542373e-06, |
| "loss": 0.1006, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.5465377995498477, |
| "grad_norm": 0.38033053278923035, |
| "learning_rate": 9.453389830508474e-06, |
| "loss": 0.102, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.5475969813319211, |
| "grad_norm": 0.31779545545578003, |
| "learning_rate": 9.452330508474578e-06, |
| "loss": 0.097, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.5486561631139945, |
| "grad_norm": 0.6237276196479797, |
| "learning_rate": 9.451271186440679e-06, |
| "loss": 0.1013, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.5497153448960678, |
| "grad_norm": 1.495983600616455, |
| "learning_rate": 9.45021186440678e-06, |
| "loss": 0.1101, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.5507745266781411, |
| "grad_norm": 0.47101595997810364, |
| "learning_rate": 9.449152542372882e-06, |
| "loss": 0.1034, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.5518337084602145, |
| "grad_norm": 0.29455289244651794, |
| "learning_rate": 9.448093220338985e-06, |
| "loss": 0.1003, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.5528928902422878, |
| "grad_norm": 0.38901370763778687, |
| "learning_rate": 9.447033898305086e-06, |
| "loss": 0.1029, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.5539520720243611, |
| "grad_norm": 0.47442498803138733, |
| "learning_rate": 9.445974576271188e-06, |
| "loss": 0.1052, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.5550112538064346, |
| "grad_norm": 0.4412713646888733, |
| "learning_rate": 9.444915254237289e-06, |
| "loss": 0.1006, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.5560704355885079, |
| "grad_norm": 0.33992066979408264, |
| "learning_rate": 9.44385593220339e-06, |
| "loss": 0.0973, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.5571296173705812, |
| "grad_norm": 0.5417588353157043, |
| "learning_rate": 9.442796610169492e-06, |
| "loss": 0.1028, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.5581887991526546, |
| "grad_norm": 0.8810229301452637, |
| "learning_rate": 9.441737288135593e-06, |
| "loss": 0.1031, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.5592479809347279, |
| "grad_norm": 0.8370358347892761, |
| "learning_rate": 9.440677966101696e-06, |
| "loss": 0.1009, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.5603071627168013, |
| "grad_norm": 1.0851960182189941, |
| "learning_rate": 9.439618644067798e-06, |
| "loss": 0.0996, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.5613663444988746, |
| "grad_norm": 0.32936275005340576, |
| "learning_rate": 9.4385593220339e-06, |
| "loss": 0.1002, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.562425526280948, |
| "grad_norm": 0.6002232432365417, |
| "learning_rate": 9.4375e-06, |
| "loss": 0.1062, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.5634847080630213, |
| "grad_norm": 0.5921926498413086, |
| "learning_rate": 9.436440677966102e-06, |
| "loss": 0.1056, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.5645438898450946, |
| "grad_norm": 0.45451927185058594, |
| "learning_rate": 9.435381355932204e-06, |
| "loss": 0.1037, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.565603071627168, |
| "grad_norm": 0.5011440515518188, |
| "learning_rate": 9.434322033898305e-06, |
| "loss": 0.1035, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.5666622534092414, |
| "grad_norm": 0.526629626750946, |
| "learning_rate": 9.433262711864406e-06, |
| "loss": 0.1001, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.5677214351913147, |
| "grad_norm": 0.7653958797454834, |
| "learning_rate": 9.43220338983051e-06, |
| "loss": 0.1008, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.5687806169733881, |
| "grad_norm": 1.3027257919311523, |
| "learning_rate": 9.431144067796611e-06, |
| "loss": 0.1039, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.5698397987554614, |
| "grad_norm": 0.5976331830024719, |
| "learning_rate": 9.430084745762714e-06, |
| "loss": 0.0998, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.5708989805375347, |
| "grad_norm": 0.29188409447669983, |
| "learning_rate": 9.429025423728815e-06, |
| "loss": 0.0997, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.571958162319608, |
| "grad_norm": 0.4244849979877472, |
| "learning_rate": 9.427966101694917e-06, |
| "loss": 0.1062, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.5730173441016815, |
| "grad_norm": 0.4817642569541931, |
| "learning_rate": 9.426906779661018e-06, |
| "loss": 0.1035, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.5740765258837548, |
| "grad_norm": 0.5244449973106384, |
| "learning_rate": 9.42584745762712e-06, |
| "loss": 0.1046, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.5751357076658281, |
| "grad_norm": 0.5034027695655823, |
| "learning_rate": 9.424788135593221e-06, |
| "loss": 0.1054, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.5761948894479015, |
| "grad_norm": 0.4198877215385437, |
| "learning_rate": 9.423728813559322e-06, |
| "loss": 0.0991, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.5772540712299749, |
| "grad_norm": 0.6624194979667664, |
| "learning_rate": 9.422669491525424e-06, |
| "loss": 0.0983, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.5783132530120482, |
| "grad_norm": 0.3857661783695221, |
| "learning_rate": 9.421610169491527e-06, |
| "loss": 0.1005, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.5793724347941216, |
| "grad_norm": 0.6397581100463867, |
| "learning_rate": 9.420550847457628e-06, |
| "loss": 0.1007, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.5804316165761949, |
| "grad_norm": 0.7347425818443298, |
| "learning_rate": 9.41949152542373e-06, |
| "loss": 0.1012, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.5814907983582682, |
| "grad_norm": 0.3354703187942505, |
| "learning_rate": 9.418432203389831e-06, |
| "loss": 0.1046, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.5825499801403415, |
| "grad_norm": 0.5054658055305481, |
| "learning_rate": 9.417372881355933e-06, |
| "loss": 0.1045, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.583609161922415, |
| "grad_norm": 0.4023343324661255, |
| "learning_rate": 9.416313559322034e-06, |
| "loss": 0.1042, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.5846683437044883, |
| "grad_norm": 0.38671228289604187, |
| "learning_rate": 9.415254237288135e-06, |
| "loss": 0.0987, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.5857275254865616, |
| "grad_norm": 0.27861088514328003, |
| "learning_rate": 9.414194915254239e-06, |
| "loss": 0.0989, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.586786707268635, |
| "grad_norm": 0.7123656868934631, |
| "learning_rate": 9.41313559322034e-06, |
| "loss": 0.1009, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.5878458890507083, |
| "grad_norm": 1.1902016401290894, |
| "learning_rate": 9.412076271186441e-06, |
| "loss": 0.1051, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.5889050708327817, |
| "grad_norm": 1.7251577377319336, |
| "learning_rate": 9.411016949152543e-06, |
| "loss": 0.1072, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.5899642526148551, |
| "grad_norm": 0.29798540472984314, |
| "learning_rate": 9.409957627118644e-06, |
| "loss": 0.1017, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.5910234343969284, |
| "grad_norm": 0.418284147977829, |
| "learning_rate": 9.408898305084746e-06, |
| "loss": 0.0978, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.5920826161790017, |
| "grad_norm": 0.49750545620918274, |
| "learning_rate": 9.407838983050849e-06, |
| "loss": 0.1067, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.593141797961075, |
| "grad_norm": 0.47228100895881653, |
| "learning_rate": 9.40677966101695e-06, |
| "loss": 0.1042, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.5942009797431484, |
| "grad_norm": 0.6839432716369629, |
| "learning_rate": 9.405720338983051e-06, |
| "loss": 0.1018, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.5952601615252218, |
| "grad_norm": 0.4121408760547638, |
| "learning_rate": 9.404661016949153e-06, |
| "loss": 0.0983, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.5963193433072951, |
| "grad_norm": 0.36210134625434875, |
| "learning_rate": 9.403601694915256e-06, |
| "loss": 0.102, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.5973785250893685, |
| "grad_norm": 0.392327219247818, |
| "learning_rate": 9.402542372881357e-06, |
| "loss": 0.1017, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.5984377068714418, |
| "grad_norm": 1.075141429901123, |
| "learning_rate": 9.401483050847459e-06, |
| "loss": 0.1046, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.5994968886535151, |
| "grad_norm": 0.6502388715744019, |
| "learning_rate": 9.40042372881356e-06, |
| "loss": 0.1027, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.6005560704355886, |
| "grad_norm": 0.41776108741760254, |
| "learning_rate": 9.399364406779662e-06, |
| "loss": 0.1007, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.6016152522176619, |
| "grad_norm": 0.45106184482574463, |
| "learning_rate": 9.398305084745763e-06, |
| "loss": 0.1037, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.6026744339997352, |
| "grad_norm": 0.5499406456947327, |
| "learning_rate": 9.397245762711864e-06, |
| "loss": 0.105, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.6037336157818085, |
| "grad_norm": 0.43302425742149353, |
| "learning_rate": 9.396186440677968e-06, |
| "loss": 0.1009, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.6047927975638819, |
| "grad_norm": 0.3462725281715393, |
| "learning_rate": 9.395127118644069e-06, |
| "loss": 0.0988, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.6058519793459552, |
| "grad_norm": 0.29506900906562805, |
| "learning_rate": 9.39406779661017e-06, |
| "loss": 0.0994, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.6069111611280286, |
| "grad_norm": 1.0929666757583618, |
| "learning_rate": 9.393008474576272e-06, |
| "loss": 0.1017, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.607970342910102, |
| "grad_norm": 1.0449674129486084, |
| "learning_rate": 9.391949152542373e-06, |
| "loss": 0.1034, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.6090295246921753, |
| "grad_norm": 1.248158574104309, |
| "learning_rate": 9.390889830508475e-06, |
| "loss": 0.0996, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.6100887064742486, |
| "grad_norm": 0.38209083676338196, |
| "learning_rate": 9.389830508474576e-06, |
| "loss": 0.1029, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.6111478882563219, |
| "grad_norm": 0.4725791811943054, |
| "learning_rate": 9.388771186440679e-06, |
| "loss": 0.0998, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.6122070700383954, |
| "grad_norm": 0.5096263289451599, |
| "learning_rate": 9.38771186440678e-06, |
| "loss": 0.1012, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.6132662518204687, |
| "grad_norm": 0.3964233100414276, |
| "learning_rate": 9.386652542372882e-06, |
| "loss": 0.1032, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.614325433602542, |
| "grad_norm": 0.31194186210632324, |
| "learning_rate": 9.385593220338985e-06, |
| "loss": 0.0973, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.6153846153846154, |
| "grad_norm": 0.3112730383872986, |
| "learning_rate": 9.384533898305086e-06, |
| "loss": 0.0969, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.6164437971666887, |
| "grad_norm": 0.7090293765068054, |
| "learning_rate": 9.383474576271188e-06, |
| "loss": 0.099, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.617502978948762, |
| "grad_norm": 2.3716042041778564, |
| "learning_rate": 9.38241525423729e-06, |
| "loss": 0.1165, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.6185621607308355, |
| "grad_norm": 1.2725728750228882, |
| "learning_rate": 9.38135593220339e-06, |
| "loss": 0.1047, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.6196213425129088, |
| "grad_norm": 0.8259700536727905, |
| "learning_rate": 9.380296610169492e-06, |
| "loss": 0.1069, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.6206805242949821, |
| "grad_norm": 0.5297931432723999, |
| "learning_rate": 9.379237288135594e-06, |
| "loss": 0.1029, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.6217397060770554, |
| "grad_norm": 0.658423900604248, |
| "learning_rate": 9.378177966101697e-06, |
| "loss": 0.1086, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.6227988878591288, |
| "grad_norm": 0.6752994656562805, |
| "learning_rate": 9.377118644067798e-06, |
| "loss": 0.1063, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.6238580696412022, |
| "grad_norm": 0.45060423016548157, |
| "learning_rate": 9.3760593220339e-06, |
| "loss": 0.1021, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.6249172514232755, |
| "grad_norm": 0.394235223531723, |
| "learning_rate": 9.375000000000001e-06, |
| "loss": 0.1042, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.6259764332053489, |
| "grad_norm": 0.8248805403709412, |
| "learning_rate": 9.373940677966102e-06, |
| "loss": 0.1032, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.6270356149874222, |
| "grad_norm": 1.546491265296936, |
| "learning_rate": 9.372881355932204e-06, |
| "loss": 0.1065, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.6280947967694955, |
| "grad_norm": 1.0976604223251343, |
| "learning_rate": 9.371822033898305e-06, |
| "loss": 0.1049, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.629153978551569, |
| "grad_norm": 0.31807151436805725, |
| "learning_rate": 9.370762711864407e-06, |
| "loss": 0.096, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.6302131603336423, |
| "grad_norm": 0.33156925439834595, |
| "learning_rate": 9.36970338983051e-06, |
| "loss": 0.1021, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.6312723421157156, |
| "grad_norm": 0.5377479195594788, |
| "learning_rate": 9.368644067796611e-06, |
| "loss": 0.1046, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.6323315238977889, |
| "grad_norm": 0.8017779588699341, |
| "learning_rate": 9.367584745762712e-06, |
| "loss": 0.1052, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.6333907056798623, |
| "grad_norm": 0.6710719466209412, |
| "learning_rate": 9.366525423728814e-06, |
| "loss": 0.1054, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.6344498874619356, |
| "grad_norm": 0.5041128993034363, |
| "learning_rate": 9.365466101694915e-06, |
| "loss": 0.1065, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.635509069244009, |
| "grad_norm": 0.48250842094421387, |
| "learning_rate": 9.364406779661017e-06, |
| "loss": 0.1021, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.6365682510260824, |
| "grad_norm": 0.3540663421154022, |
| "learning_rate": 9.36334745762712e-06, |
| "loss": 0.0984, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.6376274328081557, |
| "grad_norm": 0.758277177810669, |
| "learning_rate": 9.362288135593221e-06, |
| "loss": 0.1028, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.638686614590229, |
| "grad_norm": 1.088519811630249, |
| "learning_rate": 9.361228813559323e-06, |
| "loss": 0.1025, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.6397457963723024, |
| "grad_norm": 0.6219644546508789, |
| "learning_rate": 9.360169491525426e-06, |
| "loss": 0.1012, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.6408049781543758, |
| "grad_norm": 0.5811134576797485, |
| "learning_rate": 9.359110169491527e-06, |
| "loss": 0.1008, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.6418641599364491, |
| "grad_norm": 0.4735073149204254, |
| "learning_rate": 9.358050847457629e-06, |
| "loss": 0.107, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.6429233417185224, |
| "grad_norm": 0.42501819133758545, |
| "learning_rate": 9.35699152542373e-06, |
| "loss": 0.1035, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.6439825235005958, |
| "grad_norm": 0.5019701719284058, |
| "learning_rate": 9.355932203389831e-06, |
| "loss": 0.1002, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.6450417052826691, |
| "grad_norm": 0.29166609048843384, |
| "learning_rate": 9.354872881355933e-06, |
| "loss": 0.1006, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.6461008870647424, |
| "grad_norm": 0.8186270594596863, |
| "learning_rate": 9.353813559322034e-06, |
| "loss": 0.1038, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.6471600688468159, |
| "grad_norm": 0.9089385867118835, |
| "learning_rate": 9.352754237288136e-06, |
| "loss": 0.1045, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.6482192506288892, |
| "grad_norm": 0.54569011926651, |
| "learning_rate": 9.351694915254239e-06, |
| "loss": 0.1012, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.6492784324109625, |
| "grad_norm": 0.6908009052276611, |
| "learning_rate": 9.35063559322034e-06, |
| "loss": 0.1025, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.6503376141930359, |
| "grad_norm": 0.5601445436477661, |
| "learning_rate": 9.349576271186442e-06, |
| "loss": 0.1054, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.6513967959751092, |
| "grad_norm": 0.5484585165977478, |
| "learning_rate": 9.348516949152543e-06, |
| "loss": 0.1025, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.6524559777571826, |
| "grad_norm": 0.3607555627822876, |
| "learning_rate": 9.347457627118644e-06, |
| "loss": 0.1029, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.6535151595392559, |
| "grad_norm": 0.8862431049346924, |
| "learning_rate": 9.346398305084746e-06, |
| "loss": 0.1002, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.6545743413213293, |
| "grad_norm": 0.34155333042144775, |
| "learning_rate": 9.345338983050847e-06, |
| "loss": 0.104, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.6556335231034026, |
| "grad_norm": 0.33383896946907043, |
| "learning_rate": 9.34427966101695e-06, |
| "loss": 0.1045, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.6566927048854759, |
| "grad_norm": 0.7721969485282898, |
| "learning_rate": 9.343220338983052e-06, |
| "loss": 0.1041, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.6577518866675494, |
| "grad_norm": 0.341325581073761, |
| "learning_rate": 9.342161016949153e-06, |
| "loss": 0.1061, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.6588110684496227, |
| "grad_norm": 0.4059706926345825, |
| "learning_rate": 9.341101694915256e-06, |
| "loss": 0.1019, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.659870250231696, |
| "grad_norm": 0.425484836101532, |
| "learning_rate": 9.340042372881358e-06, |
| "loss": 0.1028, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.6609294320137694, |
| "grad_norm": 0.3428255617618561, |
| "learning_rate": 9.338983050847459e-06, |
| "loss": 0.0983, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.6619886137958427, |
| "grad_norm": 0.43302783370018005, |
| "learning_rate": 9.33792372881356e-06, |
| "loss": 0.1041, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.663047795577916, |
| "grad_norm": 0.4151793122291565, |
| "learning_rate": 9.336864406779662e-06, |
| "loss": 0.1053, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.6641069773599894, |
| "grad_norm": 0.3322106599807739, |
| "learning_rate": 9.335805084745763e-06, |
| "loss": 0.0977, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.6651661591420628, |
| "grad_norm": 0.3826170265674591, |
| "learning_rate": 9.334745762711865e-06, |
| "loss": 0.0987, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.6662253409241361, |
| "grad_norm": 0.45868292450904846, |
| "learning_rate": 9.333686440677968e-06, |
| "loss": 0.0975, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.6672845227062094, |
| "grad_norm": 0.9810293912887573, |
| "learning_rate": 9.33262711864407e-06, |
| "loss": 0.1012, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.6683437044882828, |
| "grad_norm": 0.6601435542106628, |
| "learning_rate": 9.33156779661017e-06, |
| "loss": 0.0984, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.6694028862703562, |
| "grad_norm": 0.35631003975868225, |
| "learning_rate": 9.330508474576272e-06, |
| "loss": 0.1006, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.6704620680524295, |
| "grad_norm": 0.5122131705284119, |
| "learning_rate": 9.329449152542373e-06, |
| "loss": 0.0995, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.6715212498345029, |
| "grad_norm": 0.4161342680454254, |
| "learning_rate": 9.328389830508475e-06, |
| "loss": 0.1002, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.6725804316165762, |
| "grad_norm": 0.39421340823173523, |
| "learning_rate": 9.327330508474576e-06, |
| "loss": 0.099, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.6736396133986495, |
| "grad_norm": 0.3188948631286621, |
| "learning_rate": 9.32627118644068e-06, |
| "loss": 0.096, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.6746987951807228, |
| "grad_norm": 0.25863417983055115, |
| "learning_rate": 9.32521186440678e-06, |
| "loss": 0.0998, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.6757579769627963, |
| "grad_norm": 0.3487548232078552, |
| "learning_rate": 9.324152542372882e-06, |
| "loss": 0.098, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.6768171587448696, |
| "grad_norm": 1.3317673206329346, |
| "learning_rate": 9.323093220338984e-06, |
| "loss": 0.1035, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.6778763405269429, |
| "grad_norm": 0.9507758021354675, |
| "learning_rate": 9.322033898305085e-06, |
| "loss": 0.1063, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.6789355223090163, |
| "grad_norm": 0.49101585149765015, |
| "learning_rate": 9.320974576271186e-06, |
| "loss": 0.0989, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.6799947040910896, |
| "grad_norm": 0.3187989294528961, |
| "learning_rate": 9.319915254237288e-06, |
| "loss": 0.0992, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.681053885873163, |
| "grad_norm": 0.3617320656776428, |
| "learning_rate": 9.318855932203391e-06, |
| "loss": 0.0989, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.6821130676552364, |
| "grad_norm": 0.34317803382873535, |
| "learning_rate": 9.317796610169492e-06, |
| "loss": 0.097, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.6831722494373097, |
| "grad_norm": 0.3169088363647461, |
| "learning_rate": 9.316737288135594e-06, |
| "loss": 0.1021, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.684231431219383, |
| "grad_norm": 0.2917908728122711, |
| "learning_rate": 9.315677966101697e-06, |
| "loss": 0.096, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.6852906130014563, |
| "grad_norm": 0.4770563840866089, |
| "learning_rate": 9.314618644067798e-06, |
| "loss": 0.0973, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.6863497947835298, |
| "grad_norm": 1.0763368606567383, |
| "learning_rate": 9.3135593220339e-06, |
| "loss": 0.1054, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.6874089765656031, |
| "grad_norm": 1.1050769090652466, |
| "learning_rate": 9.312500000000001e-06, |
| "loss": 0.1025, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.6884681583476764, |
| "grad_norm": 0.4197370111942291, |
| "learning_rate": 9.311440677966102e-06, |
| "loss": 0.1016, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.6895273401297498, |
| "grad_norm": 0.29724442958831787, |
| "learning_rate": 9.310381355932204e-06, |
| "loss": 0.104, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.6905865219118231, |
| "grad_norm": 0.3933386504650116, |
| "learning_rate": 9.309322033898305e-06, |
| "loss": 0.1046, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.6916457036938964, |
| "grad_norm": 0.410281240940094, |
| "learning_rate": 9.308262711864408e-06, |
| "loss": 0.1016, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.6927048854759699, |
| "grad_norm": 0.6148337125778198, |
| "learning_rate": 9.30720338983051e-06, |
| "loss": 0.1007, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.6937640672580432, |
| "grad_norm": 0.41912174224853516, |
| "learning_rate": 9.306144067796611e-06, |
| "loss": 0.0959, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.6948232490401165, |
| "grad_norm": 0.3907654583454132, |
| "learning_rate": 9.305084745762713e-06, |
| "loss": 0.0986, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.6958824308221898, |
| "grad_norm": 0.24811263382434845, |
| "learning_rate": 9.304025423728814e-06, |
| "loss": 0.0963, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.6969416126042632, |
| "grad_norm": 0.4646623134613037, |
| "learning_rate": 9.302966101694915e-06, |
| "loss": 0.0973, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.6980007943863366, |
| "grad_norm": 0.873497486114502, |
| "learning_rate": 9.301906779661017e-06, |
| "loss": 0.0996, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.6990599761684099, |
| "grad_norm": 0.5654221773147583, |
| "learning_rate": 9.300847457627118e-06, |
| "loss": 0.1005, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.7001191579504833, |
| "grad_norm": 0.3629545271396637, |
| "learning_rate": 9.299788135593221e-06, |
| "loss": 0.0984, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.7011783397325566, |
| "grad_norm": 0.3742941617965698, |
| "learning_rate": 9.298728813559323e-06, |
| "loss": 0.0957, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.7022375215146299, |
| "grad_norm": 0.42546311020851135, |
| "learning_rate": 9.297669491525424e-06, |
| "loss": 0.1005, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.7032967032967034, |
| "grad_norm": 0.42576131224632263, |
| "learning_rate": 9.296610169491527e-06, |
| "loss": 0.1009, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.7043558850787767, |
| "grad_norm": 0.3426741659641266, |
| "learning_rate": 9.295550847457629e-06, |
| "loss": 0.1016, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.70541506686085, |
| "grad_norm": 0.5747078657150269, |
| "learning_rate": 9.29449152542373e-06, |
| "loss": 0.0988, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.7064742486429233, |
| "grad_norm": 0.8558834791183472, |
| "learning_rate": 9.293432203389832e-06, |
| "loss": 0.0997, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.7075334304249967, |
| "grad_norm": 0.42733004689216614, |
| "learning_rate": 9.292372881355933e-06, |
| "loss": 0.0977, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.70859261220707, |
| "grad_norm": 0.4254518449306488, |
| "learning_rate": 9.291313559322034e-06, |
| "loss": 0.1014, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.7096517939891434, |
| "grad_norm": 0.4694596529006958, |
| "learning_rate": 9.290254237288136e-06, |
| "loss": 0.0976, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.7107109757712168, |
| "grad_norm": 0.4888492226600647, |
| "learning_rate": 9.289194915254239e-06, |
| "loss": 0.1005, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.7117701575532901, |
| "grad_norm": 0.34919607639312744, |
| "learning_rate": 9.28813559322034e-06, |
| "loss": 0.1037, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.7128293393353634, |
| "grad_norm": 0.37741026282310486, |
| "learning_rate": 9.287076271186442e-06, |
| "loss": 0.0982, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.7138885211174367, |
| "grad_norm": 0.3407898247241974, |
| "learning_rate": 9.286016949152543e-06, |
| "loss": 0.0969, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.7149477028995102, |
| "grad_norm": 0.4599168300628662, |
| "learning_rate": 9.284957627118645e-06, |
| "loss": 0.0985, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.7160068846815835, |
| "grad_norm": 1.6454333066940308, |
| "learning_rate": 9.283898305084746e-06, |
| "loss": 0.1072, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.7170660664636568, |
| "grad_norm": 0.4284563362598419, |
| "learning_rate": 9.282838983050847e-06, |
| "loss": 0.1034, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.7181252482457302, |
| "grad_norm": 0.4040115475654602, |
| "learning_rate": 9.28177966101695e-06, |
| "loss": 0.1005, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.7191844300278035, |
| "grad_norm": 0.31793713569641113, |
| "learning_rate": 9.280720338983052e-06, |
| "loss": 0.1022, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.7202436118098768, |
| "grad_norm": 0.314280241727829, |
| "learning_rate": 9.279661016949153e-06, |
| "loss": 0.0973, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.7213027935919503, |
| "grad_norm": 0.3845478296279907, |
| "learning_rate": 9.278601694915255e-06, |
| "loss": 0.1012, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.7223619753740236, |
| "grad_norm": 0.5738639235496521, |
| "learning_rate": 9.277542372881356e-06, |
| "loss": 0.1017, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.7234211571560969, |
| "grad_norm": 0.5146239995956421, |
| "learning_rate": 9.276483050847457e-06, |
| "loss": 0.0998, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.7244803389381702, |
| "grad_norm": 0.31752634048461914, |
| "learning_rate": 9.275423728813559e-06, |
| "loss": 0.099, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.7255395207202436, |
| "grad_norm": 1.2720450162887573, |
| "learning_rate": 9.274364406779662e-06, |
| "loss": 0.102, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.726598702502317, |
| "grad_norm": 0.5716597437858582, |
| "learning_rate": 9.273305084745763e-06, |
| "loss": 0.0986, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.7276578842843903, |
| "grad_norm": 0.8185603022575378, |
| "learning_rate": 9.272245762711865e-06, |
| "loss": 0.0992, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.7287170660664637, |
| "grad_norm": 0.6696334481239319, |
| "learning_rate": 9.271186440677968e-06, |
| "loss": 0.0968, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.729776247848537, |
| "grad_norm": 0.31820279359817505, |
| "learning_rate": 9.27012711864407e-06, |
| "loss": 0.0961, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.7308354296306103, |
| "grad_norm": 0.38118937611579895, |
| "learning_rate": 9.26906779661017e-06, |
| "loss": 0.0995, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.7318946114126837, |
| "grad_norm": 0.3723813593387604, |
| "learning_rate": 9.268008474576272e-06, |
| "loss": 0.0943, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.7329537931947571, |
| "grad_norm": 0.2856021225452423, |
| "learning_rate": 9.266949152542374e-06, |
| "loss": 0.0944, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.7340129749768304, |
| "grad_norm": 0.28382545709609985, |
| "learning_rate": 9.265889830508475e-06, |
| "loss": 0.0965, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.7350721567589037, |
| "grad_norm": 0.40742388367652893, |
| "learning_rate": 9.264830508474576e-06, |
| "loss": 0.0937, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.7361313385409771, |
| "grad_norm": 1.0674604177474976, |
| "learning_rate": 9.26377118644068e-06, |
| "loss": 0.1037, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.7371905203230504, |
| "grad_norm": 0.8326146006584167, |
| "learning_rate": 9.262711864406781e-06, |
| "loss": 0.1003, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.7382497021051238, |
| "grad_norm": 0.3166416585445404, |
| "learning_rate": 9.261652542372882e-06, |
| "loss": 0.0946, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.7393088838871972, |
| "grad_norm": 0.3471001088619232, |
| "learning_rate": 9.260593220338984e-06, |
| "loss": 0.1024, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.7403680656692705, |
| "grad_norm": 0.48021578788757324, |
| "learning_rate": 9.259533898305085e-06, |
| "loss": 0.1033, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.7414272474513438, |
| "grad_norm": 0.40537741780281067, |
| "learning_rate": 9.258474576271187e-06, |
| "loss": 0.0966, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.7424864292334172, |
| "grad_norm": 0.32999980449676514, |
| "learning_rate": 9.257415254237288e-06, |
| "loss": 0.1037, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.7435456110154905, |
| "grad_norm": 0.31000056862831116, |
| "learning_rate": 9.256355932203391e-06, |
| "loss": 0.1036, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.7446047927975639, |
| "grad_norm": 0.931529700756073, |
| "learning_rate": 9.255296610169492e-06, |
| "loss": 0.0996, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.7456639745796372, |
| "grad_norm": 0.9548348188400269, |
| "learning_rate": 9.254237288135594e-06, |
| "loss": 0.0993, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.7467231563617106, |
| "grad_norm": 0.4264669120311737, |
| "learning_rate": 9.253177966101695e-06, |
| "loss": 0.1025, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.7477823381437839, |
| "grad_norm": 0.483395516872406, |
| "learning_rate": 9.252118644067798e-06, |
| "loss": 0.0998, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.7488415199258572, |
| "grad_norm": 0.8699389696121216, |
| "learning_rate": 9.2510593220339e-06, |
| "loss": 0.0972, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.7499007017079307, |
| "grad_norm": 0.6093174815177917, |
| "learning_rate": 9.250000000000001e-06, |
| "loss": 0.1021, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.750959883490004, |
| "grad_norm": 0.3400423526763916, |
| "learning_rate": 9.248940677966103e-06, |
| "loss": 0.0965, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.7520190652720773, |
| "grad_norm": 0.2628816068172455, |
| "learning_rate": 9.247881355932204e-06, |
| "loss": 0.0942, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.7530782470541507, |
| "grad_norm": 0.5596092343330383, |
| "learning_rate": 9.246822033898305e-06, |
| "loss": 0.0985, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.754137428836224, |
| "grad_norm": 0.3505241870880127, |
| "learning_rate": 9.245762711864409e-06, |
| "loss": 0.0977, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.7551966106182973, |
| "grad_norm": 1.2891738414764404, |
| "learning_rate": 9.24470338983051e-06, |
| "loss": 0.1018, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.7562557924003707, |
| "grad_norm": 0.7353067994117737, |
| "learning_rate": 9.243644067796611e-06, |
| "loss": 0.0998, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.7573149741824441, |
| "grad_norm": 0.5030686259269714, |
| "learning_rate": 9.242584745762713e-06, |
| "loss": 0.0948, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.7583741559645174, |
| "grad_norm": 0.3368113338947296, |
| "learning_rate": 9.241525423728814e-06, |
| "loss": 0.0965, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.7594333377465907, |
| "grad_norm": 0.42246052622795105, |
| "learning_rate": 9.240466101694916e-06, |
| "loss": 0.0975, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.7604925195286641, |
| "grad_norm": 0.4807589650154114, |
| "learning_rate": 9.239406779661017e-06, |
| "loss": 0.0996, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.7615517013107375, |
| "grad_norm": 0.9803975224494934, |
| "learning_rate": 9.238347457627118e-06, |
| "loss": 0.102, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.7626108830928108, |
| "grad_norm": 0.35242709517478943, |
| "learning_rate": 9.237288135593222e-06, |
| "loss": 0.1007, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.7636700648748842, |
| "grad_norm": 0.42026689648628235, |
| "learning_rate": 9.236228813559323e-06, |
| "loss": 0.095, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.7647292466569575, |
| "grad_norm": 0.9179818034172058, |
| "learning_rate": 9.235169491525424e-06, |
| "loss": 0.0978, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.7657884284390308, |
| "grad_norm": 0.645462155342102, |
| "learning_rate": 9.234110169491526e-06, |
| "loss": 0.0983, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.7668476102211041, |
| "grad_norm": 1.0488462448120117, |
| "learning_rate": 9.233050847457627e-06, |
| "loss": 0.0978, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.7679067920031776, |
| "grad_norm": 0.37248262763023376, |
| "learning_rate": 9.231991525423729e-06, |
| "loss": 0.1027, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.7689659737852509, |
| "grad_norm": 0.4381054937839508, |
| "learning_rate": 9.23093220338983e-06, |
| "loss": 0.101, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.7700251555673242, |
| "grad_norm": 0.428743839263916, |
| "learning_rate": 9.229872881355933e-06, |
| "loss": 0.0997, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.7710843373493976, |
| "grad_norm": 0.455432653427124, |
| "learning_rate": 9.228813559322035e-06, |
| "loss": 0.1007, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.772143519131471, |
| "grad_norm": 0.4211903512477875, |
| "learning_rate": 9.227754237288138e-06, |
| "loss": 0.1019, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.7732027009135443, |
| "grad_norm": 0.3063182830810547, |
| "learning_rate": 9.226694915254239e-06, |
| "loss": 0.098, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.7742618826956177, |
| "grad_norm": 0.37543249130249023, |
| "learning_rate": 9.22563559322034e-06, |
| "loss": 0.0957, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.775321064477691, |
| "grad_norm": 1.2880802154541016, |
| "learning_rate": 9.224576271186442e-06, |
| "loss": 0.1037, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.7763802462597643, |
| "grad_norm": 0.6766379475593567, |
| "learning_rate": 9.223516949152543e-06, |
| "loss": 0.0988, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.7774394280418376, |
| "grad_norm": 0.5379982590675354, |
| "learning_rate": 9.222457627118645e-06, |
| "loss": 0.0977, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.7784986098239111, |
| "grad_norm": 0.35098257660865784, |
| "learning_rate": 9.221398305084746e-06, |
| "loss": 0.1021, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.7795577916059844, |
| "grad_norm": 0.3439309298992157, |
| "learning_rate": 9.220338983050847e-06, |
| "loss": 0.0996, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.7806169733880577, |
| "grad_norm": 0.38784995675086975, |
| "learning_rate": 9.21927966101695e-06, |
| "loss": 0.0969, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.7816761551701311, |
| "grad_norm": 0.3558436930179596, |
| "learning_rate": 9.218220338983052e-06, |
| "loss": 0.0995, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.7827353369522044, |
| "grad_norm": 0.31726735830307007, |
| "learning_rate": 9.217161016949153e-06, |
| "loss": 0.0968, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.7837945187342777, |
| "grad_norm": 0.3207642138004303, |
| "learning_rate": 9.216101694915255e-06, |
| "loss": 0.0983, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.7848537005163512, |
| "grad_norm": 0.3509203791618347, |
| "learning_rate": 9.215042372881356e-06, |
| "loss": 0.0951, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.7859128822984245, |
| "grad_norm": 0.8775836229324341, |
| "learning_rate": 9.213983050847458e-06, |
| "loss": 0.1061, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.7869720640804978, |
| "grad_norm": 1.3842896223068237, |
| "learning_rate": 9.212923728813559e-06, |
| "loss": 0.1037, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.7880312458625711, |
| "grad_norm": 0.8937103152275085, |
| "learning_rate": 9.211864406779662e-06, |
| "loss": 0.0983, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.7890904276446445, |
| "grad_norm": 1.2839686870574951, |
| "learning_rate": 9.210805084745764e-06, |
| "loss": 0.1005, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.7901496094267179, |
| "grad_norm": 0.5457701683044434, |
| "learning_rate": 9.209745762711865e-06, |
| "loss": 0.0983, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.7912087912087912, |
| "grad_norm": 0.533908486366272, |
| "learning_rate": 9.208686440677966e-06, |
| "loss": 0.1009, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.7922679729908646, |
| "grad_norm": 0.4222644865512848, |
| "learning_rate": 9.207627118644068e-06, |
| "loss": 0.0978, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.7933271547729379, |
| "grad_norm": 0.3377843499183655, |
| "learning_rate": 9.206567796610171e-06, |
| "loss": 0.0966, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.7943863365550112, |
| "grad_norm": 0.3032763600349426, |
| "learning_rate": 9.205508474576272e-06, |
| "loss": 0.0983, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.7954455183370847, |
| "grad_norm": 0.8250672817230225, |
| "learning_rate": 9.204449152542374e-06, |
| "loss": 0.1021, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.796504700119158, |
| "grad_norm": 0.9752795100212097, |
| "learning_rate": 9.203389830508475e-06, |
| "loss": 0.1019, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.7975638819012313, |
| "grad_norm": 0.6035350561141968, |
| "learning_rate": 9.202330508474577e-06, |
| "loss": 0.0981, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.7986230636833046, |
| "grad_norm": 0.2572724521160126, |
| "learning_rate": 9.20127118644068e-06, |
| "loss": 0.097, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.799682245465378, |
| "grad_norm": 0.34515053033828735, |
| "learning_rate": 9.200211864406781e-06, |
| "loss": 0.1, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.8007414272474513, |
| "grad_norm": 0.4827374219894409, |
| "learning_rate": 9.199152542372882e-06, |
| "loss": 0.0979, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.8018006090295247, |
| "grad_norm": 0.3313664197921753, |
| "learning_rate": 9.198093220338984e-06, |
| "loss": 0.0985, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.8028597908115981, |
| "grad_norm": 0.8202570080757141, |
| "learning_rate": 9.197033898305085e-06, |
| "loss": 0.1025, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.8039189725936714, |
| "grad_norm": 0.29763662815093994, |
| "learning_rate": 9.195974576271187e-06, |
| "loss": 0.099, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.8049781543757447, |
| "grad_norm": 0.4088769853115082, |
| "learning_rate": 9.194915254237288e-06, |
| "loss": 0.0978, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.8060373361578181, |
| "grad_norm": 0.31369948387145996, |
| "learning_rate": 9.193855932203391e-06, |
| "loss": 0.0996, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.8070965179398915, |
| "grad_norm": 0.5770434737205505, |
| "learning_rate": 9.192796610169493e-06, |
| "loss": 0.1017, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.8081556997219648, |
| "grad_norm": 1.0593013763427734, |
| "learning_rate": 9.191737288135594e-06, |
| "loss": 0.1015, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.8092148815040381, |
| "grad_norm": 0.385418564081192, |
| "learning_rate": 9.190677966101695e-06, |
| "loss": 0.0963, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.8102740632861115, |
| "grad_norm": 0.2897985279560089, |
| "learning_rate": 9.189618644067797e-06, |
| "loss": 0.0987, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.8113332450681848, |
| "grad_norm": 0.2844506800174713, |
| "learning_rate": 9.188559322033898e-06, |
| "loss": 0.0977, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.8123924268502581, |
| "grad_norm": 0.33510622382164, |
| "learning_rate": 9.1875e-06, |
| "loss": 0.0952, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.8134516086323316, |
| "grad_norm": 0.37175965309143066, |
| "learning_rate": 9.186440677966101e-06, |
| "loss": 0.096, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.8145107904144049, |
| "grad_norm": 0.38538381457328796, |
| "learning_rate": 9.185381355932204e-06, |
| "loss": 0.098, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.8155699721964782, |
| "grad_norm": 1.1351072788238525, |
| "learning_rate": 9.184322033898306e-06, |
| "loss": 0.0953, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.8166291539785515, |
| "grad_norm": 0.7433465123176575, |
| "learning_rate": 9.183262711864409e-06, |
| "loss": 0.0979, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.8176883357606249, |
| "grad_norm": 0.5828851461410522, |
| "learning_rate": 9.18220338983051e-06, |
| "loss": 0.096, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.8187475175426983, |
| "grad_norm": 0.350429505109787, |
| "learning_rate": 9.181144067796612e-06, |
| "loss": 0.0944, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.8198066993247716, |
| "grad_norm": 0.5436673760414124, |
| "learning_rate": 9.180084745762713e-06, |
| "loss": 0.093, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.820865881106845, |
| "grad_norm": 0.3649758994579315, |
| "learning_rate": 9.179025423728814e-06, |
| "loss": 0.0964, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.8219250628889183, |
| "grad_norm": 0.3699047863483429, |
| "learning_rate": 9.177966101694916e-06, |
| "loss": 0.1, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.8229842446709916, |
| "grad_norm": 0.5588839054107666, |
| "learning_rate": 9.176906779661017e-06, |
| "loss": 0.0982, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.824043426453065, |
| "grad_norm": 0.28516167402267456, |
| "learning_rate": 9.17584745762712e-06, |
| "loss": 0.0948, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.8251026082351384, |
| "grad_norm": 0.5804077386856079, |
| "learning_rate": 9.174788135593222e-06, |
| "loss": 0.0968, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.8261617900172117, |
| "grad_norm": 0.5543787479400635, |
| "learning_rate": 9.173728813559323e-06, |
| "loss": 0.0951, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.827220971799285, |
| "grad_norm": 0.8272714018821716, |
| "learning_rate": 9.172669491525425e-06, |
| "loss": 0.0981, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.8282801535813584, |
| "grad_norm": 0.8714408278465271, |
| "learning_rate": 9.171610169491526e-06, |
| "loss": 0.096, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.8293393353634317, |
| "grad_norm": 0.34228700399398804, |
| "learning_rate": 9.170550847457627e-06, |
| "loss": 0.101, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.830398517145505, |
| "grad_norm": 0.6962174773216248, |
| "learning_rate": 9.169491525423729e-06, |
| "loss": 0.098, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.8314576989275785, |
| "grad_norm": 0.35371822118759155, |
| "learning_rate": 9.16843220338983e-06, |
| "loss": 0.1023, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.8325168807096518, |
| "grad_norm": 0.36805784702301025, |
| "learning_rate": 9.167372881355933e-06, |
| "loss": 0.0958, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.8335760624917251, |
| "grad_norm": 0.2960814833641052, |
| "learning_rate": 9.166313559322035e-06, |
| "loss": 0.0987, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.8346352442737985, |
| "grad_norm": 0.3210856020450592, |
| "learning_rate": 9.165254237288136e-06, |
| "loss": 0.0978, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.8356944260558719, |
| "grad_norm": 0.6966099143028259, |
| "learning_rate": 9.164194915254238e-06, |
| "loss": 0.0986, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.8367536078379452, |
| "grad_norm": 0.86994469165802, |
| "learning_rate": 9.163135593220339e-06, |
| "loss": 0.1009, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.8378127896200185, |
| "grad_norm": 1.0499882698059082, |
| "learning_rate": 9.162076271186442e-06, |
| "loss": 0.1001, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.8388719714020919, |
| "grad_norm": 0.25604158639907837, |
| "learning_rate": 9.161016949152543e-06, |
| "loss": 0.0968, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.8399311531841652, |
| "grad_norm": 0.2237943410873413, |
| "learning_rate": 9.159957627118645e-06, |
| "loss": 0.0974, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.8409903349662385, |
| "grad_norm": 0.2961702048778534, |
| "learning_rate": 9.158898305084746e-06, |
| "loss": 0.0963, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.842049516748312, |
| "grad_norm": 0.5092357397079468, |
| "learning_rate": 9.157838983050848e-06, |
| "loss": 0.0986, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.8431086985303853, |
| "grad_norm": 0.8456157445907593, |
| "learning_rate": 9.15677966101695e-06, |
| "loss": 0.0989, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.8441678803124586, |
| "grad_norm": 0.25902438163757324, |
| "learning_rate": 9.155720338983052e-06, |
| "loss": 0.0971, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.845227062094532, |
| "grad_norm": 0.43051254749298096, |
| "learning_rate": 9.154661016949154e-06, |
| "loss": 0.094, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.8462862438766053, |
| "grad_norm": 1.1956896781921387, |
| "learning_rate": 9.153601694915255e-06, |
| "loss": 0.0955, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.8473454256586787, |
| "grad_norm": 0.40283483266830444, |
| "learning_rate": 9.152542372881356e-06, |
| "loss": 0.0948, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.848404607440752, |
| "grad_norm": 1.097756266593933, |
| "learning_rate": 9.151483050847458e-06, |
| "loss": 0.1013, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.8494637892228254, |
| "grad_norm": 0.4723232686519623, |
| "learning_rate": 9.15042372881356e-06, |
| "loss": 0.0934, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.8505229710048987, |
| "grad_norm": 0.35449033975601196, |
| "learning_rate": 9.149364406779662e-06, |
| "loss": 0.0944, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.851582152786972, |
| "grad_norm": 0.33567023277282715, |
| "learning_rate": 9.148305084745764e-06, |
| "loss": 0.0987, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.8526413345690455, |
| "grad_norm": 0.3942662179470062, |
| "learning_rate": 9.147245762711865e-06, |
| "loss": 0.0972, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.8537005163511188, |
| "grad_norm": 0.3090916872024536, |
| "learning_rate": 9.146186440677967e-06, |
| "loss": 0.0962, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.8547596981331921, |
| "grad_norm": 0.7566470503807068, |
| "learning_rate": 9.145127118644068e-06, |
| "loss": 0.101, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.8558188799152655, |
| "grad_norm": 0.3098606467247009, |
| "learning_rate": 9.14406779661017e-06, |
| "loss": 0.098, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.8568780616973388, |
| "grad_norm": 0.6233766078948975, |
| "learning_rate": 9.14300847457627e-06, |
| "loss": 0.0964, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.8579372434794121, |
| "grad_norm": 0.5337977409362793, |
| "learning_rate": 9.141949152542374e-06, |
| "loss": 0.1023, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.8589964252614855, |
| "grad_norm": 0.33733704686164856, |
| "learning_rate": 9.140889830508475e-06, |
| "loss": 0.0966, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.8600556070435589, |
| "grad_norm": 0.25345900654792786, |
| "learning_rate": 9.139830508474577e-06, |
| "loss": 0.0981, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.8611147888256322, |
| "grad_norm": 0.42174944281578064, |
| "learning_rate": 9.13877118644068e-06, |
| "loss": 0.0975, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.8621739706077055, |
| "grad_norm": 0.5487157702445984, |
| "learning_rate": 9.137711864406781e-06, |
| "loss": 0.0967, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.8632331523897789, |
| "grad_norm": 0.2961113154888153, |
| "learning_rate": 9.136652542372883e-06, |
| "loss": 0.0982, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.8642923341718523, |
| "grad_norm": 0.26182422041893005, |
| "learning_rate": 9.135593220338984e-06, |
| "loss": 0.0957, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.8653515159539256, |
| "grad_norm": 0.2555879056453705, |
| "learning_rate": 9.134533898305085e-06, |
| "loss": 0.0891, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.866410697735999, |
| "grad_norm": 0.6295573711395264, |
| "learning_rate": 9.133474576271187e-06, |
| "loss": 0.0967, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.8674698795180723, |
| "grad_norm": 0.2758654057979584, |
| "learning_rate": 9.132415254237288e-06, |
| "loss": 0.0965, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.8685290613001456, |
| "grad_norm": 1.2126902341842651, |
| "learning_rate": 9.131355932203391e-06, |
| "loss": 0.0979, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.8695882430822189, |
| "grad_norm": 0.30555427074432373, |
| "learning_rate": 9.130296610169493e-06, |
| "loss": 0.0942, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.8706474248642924, |
| "grad_norm": 0.3945145308971405, |
| "learning_rate": 9.129237288135594e-06, |
| "loss": 0.0963, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.8717066066463657, |
| "grad_norm": 0.2948249876499176, |
| "learning_rate": 9.128177966101696e-06, |
| "loss": 0.0987, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.872765788428439, |
| "grad_norm": 0.32726436853408813, |
| "learning_rate": 9.127118644067797e-06, |
| "loss": 0.0956, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.8738249702105124, |
| "grad_norm": 0.5176602602005005, |
| "learning_rate": 9.126059322033898e-06, |
| "loss": 0.0992, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.8748841519925857, |
| "grad_norm": 0.2725953459739685, |
| "learning_rate": 9.125e-06, |
| "loss": 0.0971, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.875943333774659, |
| "grad_norm": 0.275778591632843, |
| "learning_rate": 9.123940677966103e-06, |
| "loss": 0.0946, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.8770025155567325, |
| "grad_norm": 0.6902645826339722, |
| "learning_rate": 9.122881355932204e-06, |
| "loss": 0.0975, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.8780616973388058, |
| "grad_norm": 0.5743526220321655, |
| "learning_rate": 9.121822033898306e-06, |
| "loss": 0.0978, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.8791208791208791, |
| "grad_norm": 0.3797874450683594, |
| "learning_rate": 9.120762711864407e-06, |
| "loss": 0.0947, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.8801800609029524, |
| "grad_norm": 0.30438700318336487, |
| "learning_rate": 9.119703389830509e-06, |
| "loss": 0.0972, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.8812392426850258, |
| "grad_norm": 0.25724926590919495, |
| "learning_rate": 9.11864406779661e-06, |
| "loss": 0.0945, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.8822984244670992, |
| "grad_norm": 0.3248598277568817, |
| "learning_rate": 9.117584745762713e-06, |
| "loss": 0.0938, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.8833576062491725, |
| "grad_norm": 0.40238186717033386, |
| "learning_rate": 9.116525423728815e-06, |
| "loss": 0.0943, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.8844167880312459, |
| "grad_norm": 0.4883701801300049, |
| "learning_rate": 9.115466101694916e-06, |
| "loss": 0.0982, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.8854759698133192, |
| "grad_norm": 0.46604031324386597, |
| "learning_rate": 9.114406779661017e-06, |
| "loss": 0.0968, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.8865351515953925, |
| "grad_norm": 0.32364338636398315, |
| "learning_rate": 9.11334745762712e-06, |
| "loss": 0.0978, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.887594333377466, |
| "grad_norm": 0.2980561852455139, |
| "learning_rate": 9.112288135593222e-06, |
| "loss": 0.0939, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.8886535151595393, |
| "grad_norm": 0.2934180796146393, |
| "learning_rate": 9.111228813559323e-06, |
| "loss": 0.0943, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.8897126969416126, |
| "grad_norm": 0.3933320939540863, |
| "learning_rate": 9.110169491525425e-06, |
| "loss": 0.092, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.8907718787236859, |
| "grad_norm": 0.5394145250320435, |
| "learning_rate": 9.109110169491526e-06, |
| "loss": 0.0966, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.8918310605057593, |
| "grad_norm": 0.4660944938659668, |
| "learning_rate": 9.108050847457628e-06, |
| "loss": 0.0971, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.8928902422878326, |
| "grad_norm": 0.2673965096473694, |
| "learning_rate": 9.106991525423729e-06, |
| "loss": 0.0956, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.893949424069906, |
| "grad_norm": 0.9778403043746948, |
| "learning_rate": 9.10593220338983e-06, |
| "loss": 0.0956, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.8950086058519794, |
| "grad_norm": 0.5092254877090454, |
| "learning_rate": 9.104872881355933e-06, |
| "loss": 0.0954, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.8960677876340527, |
| "grad_norm": 0.2559935450553894, |
| "learning_rate": 9.103813559322035e-06, |
| "loss": 0.0928, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.897126969416126, |
| "grad_norm": 0.2735789716243744, |
| "learning_rate": 9.102754237288136e-06, |
| "loss": 0.0941, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.8981861511981994, |
| "grad_norm": 0.6607424020767212, |
| "learning_rate": 9.101694915254238e-06, |
| "loss": 0.0934, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.8992453329802728, |
| "grad_norm": 0.5593097805976868, |
| "learning_rate": 9.100635593220339e-06, |
| "loss": 0.0956, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.9003045147623461, |
| "grad_norm": 0.7994453310966492, |
| "learning_rate": 9.09957627118644e-06, |
| "loss": 0.0962, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.9013636965444194, |
| "grad_norm": 0.29829198122024536, |
| "learning_rate": 9.098516949152542e-06, |
| "loss": 0.0971, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.9024228783264928, |
| "grad_norm": 0.3302725851535797, |
| "learning_rate": 9.097457627118645e-06, |
| "loss": 0.0945, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.9034820601085661, |
| "grad_norm": 0.3341391682624817, |
| "learning_rate": 9.096398305084746e-06, |
| "loss": 0.0933, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.9045412418906394, |
| "grad_norm": 0.45615267753601074, |
| "learning_rate": 9.09533898305085e-06, |
| "loss": 0.0945, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.9056004236727129, |
| "grad_norm": 0.3367563784122467, |
| "learning_rate": 9.094279661016951e-06, |
| "loss": 0.096, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.9066596054547862, |
| "grad_norm": 0.4675491154193878, |
| "learning_rate": 9.093220338983052e-06, |
| "loss": 0.0921, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.9077187872368595, |
| "grad_norm": 1.0834640264511108, |
| "learning_rate": 9.092161016949154e-06, |
| "loss": 0.0967, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.9087779690189329, |
| "grad_norm": 0.336375892162323, |
| "learning_rate": 9.091101694915255e-06, |
| "loss": 0.0976, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.9098371508010062, |
| "grad_norm": 0.5236802101135254, |
| "learning_rate": 9.090042372881357e-06, |
| "loss": 0.0926, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.9108963325830796, |
| "grad_norm": 0.30341917276382446, |
| "learning_rate": 9.088983050847458e-06, |
| "loss": 0.0929, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.9119555143651529, |
| "grad_norm": 0.33530858159065247, |
| "learning_rate": 9.08792372881356e-06, |
| "loss": 0.0966, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.9130146961472263, |
| "grad_norm": 0.39219993352890015, |
| "learning_rate": 9.086864406779663e-06, |
| "loss": 0.0964, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.9140738779292996, |
| "grad_norm": 1.3592567443847656, |
| "learning_rate": 9.085805084745764e-06, |
| "loss": 0.1003, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.9151330597113729, |
| "grad_norm": 0.28577157855033875, |
| "learning_rate": 9.084745762711865e-06, |
| "loss": 0.1003, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.9161922414934464, |
| "grad_norm": 0.2658151686191559, |
| "learning_rate": 9.083686440677967e-06, |
| "loss": 0.0993, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.9172514232755197, |
| "grad_norm": 0.26816198229789734, |
| "learning_rate": 9.082627118644068e-06, |
| "loss": 0.0915, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.918310605057593, |
| "grad_norm": 0.5962366461753845, |
| "learning_rate": 9.08156779661017e-06, |
| "loss": 0.0972, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.9193697868396663, |
| "grad_norm": 0.2852391302585602, |
| "learning_rate": 9.080508474576271e-06, |
| "loss": 0.0958, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.9204289686217397, |
| "grad_norm": 0.44219645857810974, |
| "learning_rate": 9.079449152542374e-06, |
| "loss": 0.0978, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.921488150403813, |
| "grad_norm": 0.27444911003112793, |
| "learning_rate": 9.078389830508476e-06, |
| "loss": 0.0942, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.9225473321858864, |
| "grad_norm": 0.5224287509918213, |
| "learning_rate": 9.077330508474577e-06, |
| "loss": 0.0943, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.9236065139679598, |
| "grad_norm": 0.2701222896575928, |
| "learning_rate": 9.076271186440678e-06, |
| "loss": 0.0936, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.9246656957500331, |
| "grad_norm": 0.3342016637325287, |
| "learning_rate": 9.07521186440678e-06, |
| "loss": 0.0894, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.9257248775321064, |
| "grad_norm": 0.30203700065612793, |
| "learning_rate": 9.074152542372881e-06, |
| "loss": 0.0958, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.9267840593141798, |
| "grad_norm": 0.5597049593925476, |
| "learning_rate": 9.073093220338984e-06, |
| "loss": 0.0937, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.9278432410962532, |
| "grad_norm": 0.3891024589538574, |
| "learning_rate": 9.072033898305086e-06, |
| "loss": 0.0924, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.9289024228783265, |
| "grad_norm": 0.8529596924781799, |
| "learning_rate": 9.070974576271187e-06, |
| "loss": 0.0927, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.9299616046603998, |
| "grad_norm": 0.24214965105056763, |
| "learning_rate": 9.069915254237288e-06, |
| "loss": 0.0918, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.9310207864424732, |
| "grad_norm": 0.26121532917022705, |
| "learning_rate": 9.068855932203392e-06, |
| "loss": 0.0972, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.9320799682245465, |
| "grad_norm": 0.2927854359149933, |
| "learning_rate": 9.067796610169493e-06, |
| "loss": 0.0932, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.9331391500066198, |
| "grad_norm": 0.30440691113471985, |
| "learning_rate": 9.066737288135594e-06, |
| "loss": 0.0943, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.9341983317886933, |
| "grad_norm": 0.2894492745399475, |
| "learning_rate": 9.065677966101696e-06, |
| "loss": 0.0919, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.9352575135707666, |
| "grad_norm": 0.28730008006095886, |
| "learning_rate": 9.064618644067797e-06, |
| "loss": 0.0924, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.9363166953528399, |
| "grad_norm": 1.8249741792678833, |
| "learning_rate": 9.063559322033899e-06, |
| "loss": 0.0961, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.9373758771349133, |
| "grad_norm": 0.44120654463768005, |
| "learning_rate": 9.0625e-06, |
| "loss": 0.0938, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.9384350589169866, |
| "grad_norm": 0.29483693838119507, |
| "learning_rate": 9.061440677966103e-06, |
| "loss": 0.0936, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.93949424069906, |
| "grad_norm": 0.33580589294433594, |
| "learning_rate": 9.060381355932205e-06, |
| "loss": 0.0967, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.9405534224811333, |
| "grad_norm": 0.3942689597606659, |
| "learning_rate": 9.059322033898306e-06, |
| "loss": 0.0941, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.9416126042632067, |
| "grad_norm": 0.43941256403923035, |
| "learning_rate": 9.058262711864407e-06, |
| "loss": 0.0996, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.94267178604528, |
| "grad_norm": 0.2815316319465637, |
| "learning_rate": 9.057203389830509e-06, |
| "loss": 0.0962, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.9437309678273533, |
| "grad_norm": 0.3286428451538086, |
| "learning_rate": 9.05614406779661e-06, |
| "loss": 0.0939, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.9447901496094268, |
| "grad_norm": 0.2403406798839569, |
| "learning_rate": 9.055084745762712e-06, |
| "loss": 0.095, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.9458493313915001, |
| "grad_norm": 0.7640528082847595, |
| "learning_rate": 9.054025423728813e-06, |
| "loss": 0.0909, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.9469085131735734, |
| "grad_norm": 0.756924033164978, |
| "learning_rate": 9.052966101694916e-06, |
| "loss": 0.0945, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.9479676949556468, |
| "grad_norm": 0.3299170732498169, |
| "learning_rate": 9.051906779661018e-06, |
| "loss": 0.0936, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.9490268767377201, |
| "grad_norm": 0.2867504060268402, |
| "learning_rate": 9.05084745762712e-06, |
| "loss": 0.0927, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.9500860585197934, |
| "grad_norm": 0.7162857055664062, |
| "learning_rate": 9.049788135593222e-06, |
| "loss": 0.0898, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.9511452403018668, |
| "grad_norm": 0.2733360230922699, |
| "learning_rate": 9.048728813559323e-06, |
| "loss": 0.092, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.9522044220839402, |
| "grad_norm": 0.991868257522583, |
| "learning_rate": 9.047669491525425e-06, |
| "loss": 0.098, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.9532636038660135, |
| "grad_norm": 0.34536120295524597, |
| "learning_rate": 9.046610169491526e-06, |
| "loss": 0.0917, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.9543227856480868, |
| "grad_norm": 0.29699528217315674, |
| "learning_rate": 9.045550847457628e-06, |
| "loss": 0.0953, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.9553819674301602, |
| "grad_norm": 0.25606414675712585, |
| "learning_rate": 9.044491525423729e-06, |
| "loss": 0.0928, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.9564411492122336, |
| "grad_norm": 0.6420966982841492, |
| "learning_rate": 9.043432203389832e-06, |
| "loss": 0.0951, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.9575003309943069, |
| "grad_norm": 1.2128124237060547, |
| "learning_rate": 9.042372881355934e-06, |
| "loss": 0.0908, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.9585595127763803, |
| "grad_norm": 0.25835537910461426, |
| "learning_rate": 9.041313559322035e-06, |
| "loss": 0.0981, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.9596186945584536, |
| "grad_norm": 0.25690484046936035, |
| "learning_rate": 9.040254237288136e-06, |
| "loss": 0.0925, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.9606778763405269, |
| "grad_norm": 0.327921986579895, |
| "learning_rate": 9.039194915254238e-06, |
| "loss": 0.0898, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.9617370581226002, |
| "grad_norm": 0.3588384985923767, |
| "learning_rate": 9.03813559322034e-06, |
| "loss": 0.0904, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.9627962399046737, |
| "grad_norm": 0.373099148273468, |
| "learning_rate": 9.03707627118644e-06, |
| "loss": 0.0907, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.963855421686747, |
| "grad_norm": 0.6619918346405029, |
| "learning_rate": 9.036016949152542e-06, |
| "loss": 0.0945, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.9649146034688203, |
| "grad_norm": 0.3509720265865326, |
| "learning_rate": 9.034957627118645e-06, |
| "loss": 0.0939, |
| "step": 911 |
| }, |
| { |
| "epoch": 0.9659737852508937, |
| "grad_norm": 0.3237987756729126, |
| "learning_rate": 9.033898305084747e-06, |
| "loss": 0.0901, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.967032967032967, |
| "grad_norm": 0.6936929225921631, |
| "learning_rate": 9.032838983050848e-06, |
| "loss": 0.0984, |
| "step": 913 |
| }, |
| { |
| "epoch": 0.9680921488150404, |
| "grad_norm": 0.38148897886276245, |
| "learning_rate": 9.03177966101695e-06, |
| "loss": 0.0939, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.9691513305971138, |
| "grad_norm": 0.4383428990840912, |
| "learning_rate": 9.03072033898305e-06, |
| "loss": 0.0934, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.9702105123791871, |
| "grad_norm": 0.5336405634880066, |
| "learning_rate": 9.029661016949152e-06, |
| "loss": 0.0937, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.9712696941612604, |
| "grad_norm": 0.3162240982055664, |
| "learning_rate": 9.028601694915255e-06, |
| "loss": 0.0952, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.9723288759433337, |
| "grad_norm": 0.2739526927471161, |
| "learning_rate": 9.027542372881357e-06, |
| "loss": 0.0925, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.9733880577254072, |
| "grad_norm": 0.3420490622520447, |
| "learning_rate": 9.026483050847458e-06, |
| "loss": 0.0928, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.9744472395074805, |
| "grad_norm": 0.3407108187675476, |
| "learning_rate": 9.02542372881356e-06, |
| "loss": 0.0912, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.9755064212895538, |
| "grad_norm": 0.3736214339733124, |
| "learning_rate": 9.024364406779663e-06, |
| "loss": 0.0936, |
| "step": 921 |
| }, |
| { |
| "epoch": 0.9765656030716272, |
| "grad_norm": 0.4933827519416809, |
| "learning_rate": 9.023305084745764e-06, |
| "loss": 0.0979, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.9776247848537005, |
| "grad_norm": 0.5475727915763855, |
| "learning_rate": 9.022245762711866e-06, |
| "loss": 0.0956, |
| "step": 923 |
| }, |
| { |
| "epoch": 0.9786839666357738, |
| "grad_norm": 0.29296788573265076, |
| "learning_rate": 9.021186440677967e-06, |
| "loss": 0.0946, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.9797431484178473, |
| "grad_norm": 0.29160603880882263, |
| "learning_rate": 9.020127118644068e-06, |
| "loss": 0.0935, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.9808023301999206, |
| "grad_norm": 0.3513566553592682, |
| "learning_rate": 9.01906779661017e-06, |
| "loss": 0.0895, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.9818615119819939, |
| "grad_norm": 0.49719667434692383, |
| "learning_rate": 9.018008474576271e-06, |
| "loss": 0.0907, |
| "step": 927 |
| }, |
| { |
| "epoch": 0.9829206937640672, |
| "grad_norm": 0.7228937149047852, |
| "learning_rate": 9.016949152542374e-06, |
| "loss": 0.0924, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.9839798755461406, |
| "grad_norm": 0.9527651071548462, |
| "learning_rate": 9.015889830508476e-06, |
| "loss": 0.0979, |
| "step": 929 |
| }, |
| { |
| "epoch": 0.985039057328214, |
| "grad_norm": 0.2832454741001129, |
| "learning_rate": 9.014830508474577e-06, |
| "loss": 0.0937, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.9860982391102873, |
| "grad_norm": 0.8853733539581299, |
| "learning_rate": 9.013771186440679e-06, |
| "loss": 0.0943, |
| "step": 931 |
| }, |
| { |
| "epoch": 0.9871574208923607, |
| "grad_norm": 0.5969071984291077, |
| "learning_rate": 9.01271186440678e-06, |
| "loss": 0.0961, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.988216602674434, |
| "grad_norm": 0.6646391749382019, |
| "learning_rate": 9.011652542372881e-06, |
| "loss": 0.0894, |
| "step": 933 |
| }, |
| { |
| "epoch": 0.9892757844565073, |
| "grad_norm": 0.3108821511268616, |
| "learning_rate": 9.010593220338983e-06, |
| "loss": 0.0941, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.9903349662385807, |
| "grad_norm": 0.3083324432373047, |
| "learning_rate": 9.009533898305086e-06, |
| "loss": 0.091, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.9913941480206541, |
| "grad_norm": 0.33282625675201416, |
| "learning_rate": 9.008474576271187e-06, |
| "loss": 0.0925, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.9924533298027274, |
| "grad_norm": 0.9061787128448486, |
| "learning_rate": 9.007415254237289e-06, |
| "loss": 0.0933, |
| "step": 937 |
| }, |
| { |
| "epoch": 0.9935125115848007, |
| "grad_norm": 0.33517178893089294, |
| "learning_rate": 9.006355932203392e-06, |
| "loss": 0.0956, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.9945716933668741, |
| "grad_norm": 0.6129999160766602, |
| "learning_rate": 9.005296610169493e-06, |
| "loss": 0.0901, |
| "step": 939 |
| }, |
| { |
| "epoch": 0.9956308751489474, |
| "grad_norm": 0.38508912920951843, |
| "learning_rate": 9.004237288135595e-06, |
| "loss": 0.0924, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.9966900569310208, |
| "grad_norm": 1.1568593978881836, |
| "learning_rate": 9.003177966101696e-06, |
| "loss": 0.0973, |
| "step": 941 |
| }, |
| { |
| "epoch": 0.9977492387130942, |
| "grad_norm": 0.3170551061630249, |
| "learning_rate": 9.002118644067797e-06, |
| "loss": 0.0966, |
| "step": 942 |
| }, |
| { |
| "epoch": 0.9988084204951675, |
| "grad_norm": 0.3123176395893097, |
| "learning_rate": 9.001059322033899e-06, |
| "loss": 0.0923, |
| "step": 943 |
| }, |
| { |
| "epoch": 0.9998676022772408, |
| "grad_norm": 0.38239729404449463, |
| "learning_rate": 9e-06, |
| "loss": 0.0991, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.9998676022772408, |
| "eval_accuracy": 0.9741, |
| "eval_best_f1_from_thresholding": 0.15081967213114753, |
| "eval_loss": 0.14503583312034607, |
| "eval_matthews_corrcoef": 0.15208233188029333, |
| "eval_model_preparation_time": 0.0033, |
| "eval_negative_class_f1": 0.9868494541761869, |
| "eval_negative_class_precision": 0.9928483857785043, |
| "eval_negative_class_recall": 0.9809225799939437, |
| "eval_positive_class_f1": 0.15081967213114755, |
| "eval_positive_class_precision": 0.10849056603773585, |
| "eval_positive_class_recall": 0.24731182795698925, |
| "eval_roc_auc": 0.8164722239407131, |
| "eval_runtime": 20.7251, |
| "eval_samples_per_second": 482.507, |
| "eval_steps_per_second": 7.575, |
| "step": 944 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.08045551180839539, |
| "learning_rate": 8.998940677966103e-06, |
| "loss": 0.0124, |
| "step": 945 |
| }, |
| { |
| "epoch": 1.0010591817820733, |
| "grad_norm": 0.32438716292381287, |
| "learning_rate": 8.997881355932205e-06, |
| "loss": 0.093, |
| "step": 946 |
| }, |
| { |
| "epoch": 1.0021183635641466, |
| "grad_norm": 0.34341174364089966, |
| "learning_rate": 8.996822033898306e-06, |
| "loss": 0.0952, |
| "step": 947 |
| }, |
| { |
| "epoch": 1.00317754534622, |
| "grad_norm": 0.30472978949546814, |
| "learning_rate": 8.995762711864408e-06, |
| "loss": 0.0923, |
| "step": 948 |
| }, |
| { |
| "epoch": 1.0042367271282935, |
| "grad_norm": 0.8406989574432373, |
| "learning_rate": 8.994703389830509e-06, |
| "loss": 0.0964, |
| "step": 949 |
| }, |
| { |
| "epoch": 1.0052959089103668, |
| "grad_norm": 0.338326632976532, |
| "learning_rate": 8.99364406779661e-06, |
| "loss": 0.088, |
| "step": 950 |
| }, |
| { |
| "epoch": 1.0063550906924401, |
| "grad_norm": 0.47067689895629883, |
| "learning_rate": 8.992584745762712e-06, |
| "loss": 0.0976, |
| "step": 951 |
| }, |
| { |
| "epoch": 1.0074142724745134, |
| "grad_norm": 0.40675660967826843, |
| "learning_rate": 8.991525423728815e-06, |
| "loss": 0.0906, |
| "step": 952 |
| }, |
| { |
| "epoch": 1.0084734542565867, |
| "grad_norm": 0.3261379301548004, |
| "learning_rate": 8.990466101694916e-06, |
| "loss": 0.095, |
| "step": 953 |
| }, |
| { |
| "epoch": 1.00953263603866, |
| "grad_norm": 0.31926316022872925, |
| "learning_rate": 8.989406779661018e-06, |
| "loss": 0.0948, |
| "step": 954 |
| }, |
| { |
| "epoch": 1.0105918178207334, |
| "grad_norm": 0.603122889995575, |
| "learning_rate": 8.988347457627119e-06, |
| "loss": 0.0925, |
| "step": 955 |
| }, |
| { |
| "epoch": 1.011650999602807, |
| "grad_norm": 0.26193147897720337, |
| "learning_rate": 8.98728813559322e-06, |
| "loss": 0.0906, |
| "step": 956 |
| }, |
| { |
| "epoch": 1.0127101813848802, |
| "grad_norm": 1.3907825946807861, |
| "learning_rate": 8.986228813559322e-06, |
| "loss": 0.0973, |
| "step": 957 |
| }, |
| { |
| "epoch": 1.0137693631669535, |
| "grad_norm": 0.30133160948753357, |
| "learning_rate": 8.985169491525423e-06, |
| "loss": 0.0936, |
| "step": 958 |
| }, |
| { |
| "epoch": 1.0148285449490269, |
| "grad_norm": 0.40129804611206055, |
| "learning_rate": 8.984110169491526e-06, |
| "loss": 0.0921, |
| "step": 959 |
| }, |
| { |
| "epoch": 1.0158877267311002, |
| "grad_norm": 0.6812584400177002, |
| "learning_rate": 8.983050847457628e-06, |
| "loss": 0.0941, |
| "step": 960 |
| }, |
| { |
| "epoch": 1.0169469085131735, |
| "grad_norm": 0.680328905582428, |
| "learning_rate": 8.98199152542373e-06, |
| "loss": 0.0929, |
| "step": 961 |
| }, |
| { |
| "epoch": 1.018006090295247, |
| "grad_norm": 0.433712899684906, |
| "learning_rate": 8.980932203389832e-06, |
| "loss": 0.0978, |
| "step": 962 |
| }, |
| { |
| "epoch": 1.0190652720773203, |
| "grad_norm": 0.44195568561553955, |
| "learning_rate": 8.979872881355934e-06, |
| "loss": 0.0977, |
| "step": 963 |
| }, |
| { |
| "epoch": 1.0201244538593937, |
| "grad_norm": 0.324758380651474, |
| "learning_rate": 8.978813559322035e-06, |
| "loss": 0.0997, |
| "step": 964 |
| }, |
| { |
| "epoch": 1.021183635641467, |
| "grad_norm": 0.32022592425346375, |
| "learning_rate": 8.977754237288137e-06, |
| "loss": 0.0953, |
| "step": 965 |
| }, |
| { |
| "epoch": 1.0222428174235403, |
| "grad_norm": 0.38843539357185364, |
| "learning_rate": 8.976694915254238e-06, |
| "loss": 0.0981, |
| "step": 966 |
| }, |
| { |
| "epoch": 1.0233019992056136, |
| "grad_norm": 0.36759236454963684, |
| "learning_rate": 8.97563559322034e-06, |
| "loss": 0.096, |
| "step": 967 |
| }, |
| { |
| "epoch": 1.024361180987687, |
| "grad_norm": 0.44051095843315125, |
| "learning_rate": 8.974576271186441e-06, |
| "loss": 0.098, |
| "step": 968 |
| }, |
| { |
| "epoch": 1.0254203627697605, |
| "grad_norm": 0.3786701560020447, |
| "learning_rate": 8.973516949152544e-06, |
| "loss": 0.0939, |
| "step": 969 |
| }, |
| { |
| "epoch": 1.0264795445518338, |
| "grad_norm": 1.195036768913269, |
| "learning_rate": 8.972457627118645e-06, |
| "loss": 0.0972, |
| "step": 970 |
| }, |
| { |
| "epoch": 1.027538726333907, |
| "grad_norm": 0.2937662601470947, |
| "learning_rate": 8.971398305084747e-06, |
| "loss": 0.0953, |
| "step": 971 |
| }, |
| { |
| "epoch": 1.0285979081159804, |
| "grad_norm": 0.5330064296722412, |
| "learning_rate": 8.970338983050848e-06, |
| "loss": 0.0974, |
| "step": 972 |
| }, |
| { |
| "epoch": 1.0296570898980537, |
| "grad_norm": 0.2991742789745331, |
| "learning_rate": 8.96927966101695e-06, |
| "loss": 0.0945, |
| "step": 973 |
| }, |
| { |
| "epoch": 1.030716271680127, |
| "grad_norm": 0.34784451127052307, |
| "learning_rate": 8.968220338983051e-06, |
| "loss": 0.0907, |
| "step": 974 |
| }, |
| { |
| "epoch": 1.0317754534622003, |
| "grad_norm": 0.3017341196537018, |
| "learning_rate": 8.967161016949152e-06, |
| "loss": 0.0932, |
| "step": 975 |
| }, |
| { |
| "epoch": 1.0328346352442739, |
| "grad_norm": 0.27960410714149475, |
| "learning_rate": 8.966101694915254e-06, |
| "loss": 0.0872, |
| "step": 976 |
| }, |
| { |
| "epoch": 1.0338938170263472, |
| "grad_norm": 0.950781524181366, |
| "learning_rate": 8.965042372881357e-06, |
| "loss": 0.0942, |
| "step": 977 |
| }, |
| { |
| "epoch": 1.0349529988084205, |
| "grad_norm": 1.0935500860214233, |
| "learning_rate": 8.963983050847458e-06, |
| "loss": 0.0947, |
| "step": 978 |
| }, |
| { |
| "epoch": 1.0360121805904938, |
| "grad_norm": 1.0450522899627686, |
| "learning_rate": 8.96292372881356e-06, |
| "loss": 0.0934, |
| "step": 979 |
| }, |
| { |
| "epoch": 1.0370713623725671, |
| "grad_norm": 0.31627514958381653, |
| "learning_rate": 8.961864406779663e-06, |
| "loss": 0.0943, |
| "step": 980 |
| }, |
| { |
| "epoch": 1.0381305441546405, |
| "grad_norm": 0.33799999952316284, |
| "learning_rate": 8.960805084745764e-06, |
| "loss": 0.097, |
| "step": 981 |
| }, |
| { |
| "epoch": 1.039189725936714, |
| "grad_norm": 0.4043204188346863, |
| "learning_rate": 8.959745762711866e-06, |
| "loss": 0.0882, |
| "step": 982 |
| }, |
| { |
| "epoch": 1.0402489077187873, |
| "grad_norm": 0.7949614524841309, |
| "learning_rate": 8.958686440677967e-06, |
| "loss": 0.0928, |
| "step": 983 |
| }, |
| { |
| "epoch": 1.0413080895008606, |
| "grad_norm": 0.4171614944934845, |
| "learning_rate": 8.957627118644069e-06, |
| "loss": 0.0954, |
| "step": 984 |
| }, |
| { |
| "epoch": 1.042367271282934, |
| "grad_norm": 0.34418338537216187, |
| "learning_rate": 8.95656779661017e-06, |
| "loss": 0.0912, |
| "step": 985 |
| }, |
| { |
| "epoch": 1.0434264530650073, |
| "grad_norm": 0.4631114602088928, |
| "learning_rate": 8.955508474576271e-06, |
| "loss": 0.0956, |
| "step": 986 |
| }, |
| { |
| "epoch": 1.0444856348470806, |
| "grad_norm": 1.044867992401123, |
| "learning_rate": 8.954449152542374e-06, |
| "loss": 0.0932, |
| "step": 987 |
| }, |
| { |
| "epoch": 1.0455448166291539, |
| "grad_norm": 0.6983899474143982, |
| "learning_rate": 8.953389830508476e-06, |
| "loss": 0.0996, |
| "step": 988 |
| }, |
| { |
| "epoch": 1.0466039984112274, |
| "grad_norm": 0.5399383306503296, |
| "learning_rate": 8.952330508474577e-06, |
| "loss": 0.0953, |
| "step": 989 |
| }, |
| { |
| "epoch": 1.0476631801933007, |
| "grad_norm": 0.30072763562202454, |
| "learning_rate": 8.951271186440679e-06, |
| "loss": 0.0946, |
| "step": 990 |
| }, |
| { |
| "epoch": 1.048722361975374, |
| "grad_norm": 0.7012078762054443, |
| "learning_rate": 8.95021186440678e-06, |
| "loss": 0.0963, |
| "step": 991 |
| }, |
| { |
| "epoch": 1.0497815437574474, |
| "grad_norm": 0.3555310070514679, |
| "learning_rate": 8.949152542372881e-06, |
| "loss": 0.0968, |
| "step": 992 |
| }, |
| { |
| "epoch": 1.0508407255395207, |
| "grad_norm": 0.40114033222198486, |
| "learning_rate": 8.948093220338983e-06, |
| "loss": 0.0968, |
| "step": 993 |
| }, |
| { |
| "epoch": 1.051899907321594, |
| "grad_norm": 0.3639932870864868, |
| "learning_rate": 8.947033898305086e-06, |
| "loss": 0.0947, |
| "step": 994 |
| }, |
| { |
| "epoch": 1.0529590891036673, |
| "grad_norm": 0.3300527036190033, |
| "learning_rate": 8.945974576271187e-06, |
| "loss": 0.0971, |
| "step": 995 |
| }, |
| { |
| "epoch": 1.0540182708857408, |
| "grad_norm": 0.3679925203323364, |
| "learning_rate": 8.944915254237289e-06, |
| "loss": 0.0952, |
| "step": 996 |
| }, |
| { |
| "epoch": 1.0550774526678142, |
| "grad_norm": 1.186897873878479, |
| "learning_rate": 8.94385593220339e-06, |
| "loss": 0.0963, |
| "step": 997 |
| }, |
| { |
| "epoch": 1.0561366344498875, |
| "grad_norm": 0.6484549045562744, |
| "learning_rate": 8.942796610169492e-06, |
| "loss": 0.0934, |
| "step": 998 |
| }, |
| { |
| "epoch": 1.0571958162319608, |
| "grad_norm": 0.5084095597267151, |
| "learning_rate": 8.941737288135593e-06, |
| "loss": 0.0928, |
| "step": 999 |
| }, |
| { |
| "epoch": 1.058254998014034, |
| "grad_norm": 0.5164026021957397, |
| "learning_rate": 8.940677966101694e-06, |
| "loss": 0.0967, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.0593141797961074, |
| "grad_norm": 0.2954999804496765, |
| "learning_rate": 8.939618644067798e-06, |
| "loss": 0.0963, |
| "step": 1001 |
| }, |
| { |
| "epoch": 1.060373361578181, |
| "grad_norm": 0.2980453372001648, |
| "learning_rate": 8.938559322033899e-06, |
| "loss": 0.0951, |
| "step": 1002 |
| }, |
| { |
| "epoch": 1.0614325433602543, |
| "grad_norm": 0.3077160120010376, |
| "learning_rate": 8.9375e-06, |
| "loss": 0.0943, |
| "step": 1003 |
| }, |
| { |
| "epoch": 1.0624917251423276, |
| "grad_norm": 0.2690112292766571, |
| "learning_rate": 8.936440677966104e-06, |
| "loss": 0.0941, |
| "step": 1004 |
| }, |
| { |
| "epoch": 1.063550906924401, |
| "grad_norm": 0.9549407362937927, |
| "learning_rate": 8.935381355932205e-06, |
| "loss": 0.0951, |
| "step": 1005 |
| }, |
| { |
| "epoch": 1.0646100887064742, |
| "grad_norm": 1.4373801946640015, |
| "learning_rate": 8.934322033898306e-06, |
| "loss": 0.0955, |
| "step": 1006 |
| }, |
| { |
| "epoch": 1.0656692704885475, |
| "grad_norm": 0.8598366379737854, |
| "learning_rate": 8.933262711864408e-06, |
| "loss": 0.0922, |
| "step": 1007 |
| }, |
| { |
| "epoch": 1.0667284522706209, |
| "grad_norm": 0.3050272464752197, |
| "learning_rate": 8.932203389830509e-06, |
| "loss": 0.0913, |
| "step": 1008 |
| }, |
| { |
| "epoch": 1.0677876340526944, |
| "grad_norm": 0.40523460507392883, |
| "learning_rate": 8.93114406779661e-06, |
| "loss": 0.0959, |
| "step": 1009 |
| }, |
| { |
| "epoch": 1.0688468158347677, |
| "grad_norm": 0.35312050580978394, |
| "learning_rate": 8.930084745762712e-06, |
| "loss": 0.0935, |
| "step": 1010 |
| }, |
| { |
| "epoch": 1.069905997616841, |
| "grad_norm": 0.9593795537948608, |
| "learning_rate": 8.929025423728815e-06, |
| "loss": 0.0994, |
| "step": 1011 |
| }, |
| { |
| "epoch": 1.0709651793989143, |
| "grad_norm": 0.45267030596733093, |
| "learning_rate": 8.927966101694916e-06, |
| "loss": 0.0943, |
| "step": 1012 |
| }, |
| { |
| "epoch": 1.0720243611809877, |
| "grad_norm": 1.2854154109954834, |
| "learning_rate": 8.926906779661018e-06, |
| "loss": 0.1023, |
| "step": 1013 |
| }, |
| { |
| "epoch": 1.073083542963061, |
| "grad_norm": 0.3618859350681305, |
| "learning_rate": 8.92584745762712e-06, |
| "loss": 0.0934, |
| "step": 1014 |
| }, |
| { |
| "epoch": 1.0741427247451343, |
| "grad_norm": 0.513184666633606, |
| "learning_rate": 8.92478813559322e-06, |
| "loss": 0.0966, |
| "step": 1015 |
| }, |
| { |
| "epoch": 1.0752019065272078, |
| "grad_norm": 0.4073401689529419, |
| "learning_rate": 8.923728813559322e-06, |
| "loss": 0.0948, |
| "step": 1016 |
| }, |
| { |
| "epoch": 1.0762610883092811, |
| "grad_norm": 0.5936095714569092, |
| "learning_rate": 8.922669491525424e-06, |
| "loss": 0.0929, |
| "step": 1017 |
| }, |
| { |
| "epoch": 1.0773202700913544, |
| "grad_norm": 1.5205022096633911, |
| "learning_rate": 8.921610169491527e-06, |
| "loss": 0.1004, |
| "step": 1018 |
| }, |
| { |
| "epoch": 1.0783794518734278, |
| "grad_norm": 0.30164584517478943, |
| "learning_rate": 8.920550847457628e-06, |
| "loss": 0.0918, |
| "step": 1019 |
| }, |
| { |
| "epoch": 1.079438633655501, |
| "grad_norm": 0.37064287066459656, |
| "learning_rate": 8.91949152542373e-06, |
| "loss": 0.0982, |
| "step": 1020 |
| }, |
| { |
| "epoch": 1.0804978154375744, |
| "grad_norm": 0.32274115085601807, |
| "learning_rate": 8.918432203389831e-06, |
| "loss": 0.0951, |
| "step": 1021 |
| }, |
| { |
| "epoch": 1.0815569972196477, |
| "grad_norm": 0.29008156061172485, |
| "learning_rate": 8.917372881355934e-06, |
| "loss": 0.0976, |
| "step": 1022 |
| }, |
| { |
| "epoch": 1.0826161790017212, |
| "grad_norm": 0.2828814685344696, |
| "learning_rate": 8.916313559322035e-06, |
| "loss": 0.0927, |
| "step": 1023 |
| }, |
| { |
| "epoch": 1.0836753607837946, |
| "grad_norm": 0.5621195435523987, |
| "learning_rate": 8.915254237288137e-06, |
| "loss": 0.0966, |
| "step": 1024 |
| }, |
| { |
| "epoch": 1.0847345425658679, |
| "grad_norm": 0.4781251847743988, |
| "learning_rate": 8.914194915254238e-06, |
| "loss": 0.0968, |
| "step": 1025 |
| }, |
| { |
| "epoch": 1.0857937243479412, |
| "grad_norm": 0.7829591035842896, |
| "learning_rate": 8.91313559322034e-06, |
| "loss": 0.0983, |
| "step": 1026 |
| }, |
| { |
| "epoch": 1.0868529061300145, |
| "grad_norm": 0.5441563725471497, |
| "learning_rate": 8.912076271186441e-06, |
| "loss": 0.0926, |
| "step": 1027 |
| }, |
| { |
| "epoch": 1.0879120879120878, |
| "grad_norm": 0.24053499102592468, |
| "learning_rate": 8.911016949152544e-06, |
| "loss": 0.0889, |
| "step": 1028 |
| }, |
| { |
| "epoch": 1.0889712696941614, |
| "grad_norm": 0.28083938360214233, |
| "learning_rate": 8.909957627118646e-06, |
| "loss": 0.0918, |
| "step": 1029 |
| }, |
| { |
| "epoch": 1.0900304514762347, |
| "grad_norm": 0.28952857851982117, |
| "learning_rate": 8.908898305084747e-06, |
| "loss": 0.09, |
| "step": 1030 |
| }, |
| { |
| "epoch": 1.091089633258308, |
| "grad_norm": 0.5241445899009705, |
| "learning_rate": 8.907838983050848e-06, |
| "loss": 0.0936, |
| "step": 1031 |
| }, |
| { |
| "epoch": 1.0921488150403813, |
| "grad_norm": 0.361904501914978, |
| "learning_rate": 8.90677966101695e-06, |
| "loss": 0.0979, |
| "step": 1032 |
| }, |
| { |
| "epoch": 1.0932079968224546, |
| "grad_norm": 0.2844066917896271, |
| "learning_rate": 8.905720338983051e-06, |
| "loss": 0.0922, |
| "step": 1033 |
| }, |
| { |
| "epoch": 1.094267178604528, |
| "grad_norm": 1.230337142944336, |
| "learning_rate": 8.904661016949153e-06, |
| "loss": 0.0984, |
| "step": 1034 |
| }, |
| { |
| "epoch": 1.0953263603866012, |
| "grad_norm": 0.9222045540809631, |
| "learning_rate": 8.903601694915254e-06, |
| "loss": 0.0895, |
| "step": 1035 |
| }, |
| { |
| "epoch": 1.0963855421686748, |
| "grad_norm": 0.4679190218448639, |
| "learning_rate": 8.902542372881357e-06, |
| "loss": 0.0933, |
| "step": 1036 |
| }, |
| { |
| "epoch": 1.097444723950748, |
| "grad_norm": 0.48615601658821106, |
| "learning_rate": 8.901483050847459e-06, |
| "loss": 0.1018, |
| "step": 1037 |
| }, |
| { |
| "epoch": 1.0985039057328214, |
| "grad_norm": 1.2758482694625854, |
| "learning_rate": 8.90042372881356e-06, |
| "loss": 0.0948, |
| "step": 1038 |
| }, |
| { |
| "epoch": 1.0995630875148947, |
| "grad_norm": 0.9381634593009949, |
| "learning_rate": 8.899364406779661e-06, |
| "loss": 0.0984, |
| "step": 1039 |
| }, |
| { |
| "epoch": 1.100622269296968, |
| "grad_norm": 2.216055393218994, |
| "learning_rate": 8.898305084745763e-06, |
| "loss": 0.0971, |
| "step": 1040 |
| }, |
| { |
| "epoch": 1.1016814510790414, |
| "grad_norm": 1.2314603328704834, |
| "learning_rate": 8.897245762711864e-06, |
| "loss": 0.0961, |
| "step": 1041 |
| }, |
| { |
| "epoch": 1.102740632861115, |
| "grad_norm": 0.4274296164512634, |
| "learning_rate": 8.896186440677966e-06, |
| "loss": 0.0937, |
| "step": 1042 |
| }, |
| { |
| "epoch": 1.1037998146431882, |
| "grad_norm": 0.3972962498664856, |
| "learning_rate": 8.895127118644069e-06, |
| "loss": 0.0966, |
| "step": 1043 |
| }, |
| { |
| "epoch": 1.1048589964252615, |
| "grad_norm": 0.32898983359336853, |
| "learning_rate": 8.89406779661017e-06, |
| "loss": 0.099, |
| "step": 1044 |
| }, |
| { |
| "epoch": 1.1059181782073348, |
| "grad_norm": 0.5144562721252441, |
| "learning_rate": 8.893008474576273e-06, |
| "loss": 0.0963, |
| "step": 1045 |
| }, |
| { |
| "epoch": 1.1069773599894082, |
| "grad_norm": 0.9342681169509888, |
| "learning_rate": 8.891949152542375e-06, |
| "loss": 0.0961, |
| "step": 1046 |
| }, |
| { |
| "epoch": 1.1080365417714815, |
| "grad_norm": 0.9471574425697327, |
| "learning_rate": 8.890889830508476e-06, |
| "loss": 0.0961, |
| "step": 1047 |
| }, |
| { |
| "epoch": 1.1090957235535548, |
| "grad_norm": 0.2984775900840759, |
| "learning_rate": 8.889830508474577e-06, |
| "loss": 0.0939, |
| "step": 1048 |
| }, |
| { |
| "epoch": 1.1101549053356283, |
| "grad_norm": 0.28858470916748047, |
| "learning_rate": 8.888771186440679e-06, |
| "loss": 0.0908, |
| "step": 1049 |
| }, |
| { |
| "epoch": 1.1112140871177016, |
| "grad_norm": 0.3262318968772888, |
| "learning_rate": 8.88771186440678e-06, |
| "loss": 0.0955, |
| "step": 1050 |
| }, |
| { |
| "epoch": 1.112273268899775, |
| "grad_norm": 0.4065167307853699, |
| "learning_rate": 8.886652542372882e-06, |
| "loss": 0.0981, |
| "step": 1051 |
| }, |
| { |
| "epoch": 1.1133324506818483, |
| "grad_norm": 0.4788059592247009, |
| "learning_rate": 8.885593220338983e-06, |
| "loss": 0.0949, |
| "step": 1052 |
| }, |
| { |
| "epoch": 1.1143916324639216, |
| "grad_norm": 0.7517910599708557, |
| "learning_rate": 8.884533898305086e-06, |
| "loss": 0.0925, |
| "step": 1053 |
| }, |
| { |
| "epoch": 1.115450814245995, |
| "grad_norm": 0.36450427770614624, |
| "learning_rate": 8.883474576271188e-06, |
| "loss": 0.0925, |
| "step": 1054 |
| }, |
| { |
| "epoch": 1.1165099960280682, |
| "grad_norm": 0.83025723695755, |
| "learning_rate": 8.882415254237289e-06, |
| "loss": 0.0981, |
| "step": 1055 |
| }, |
| { |
| "epoch": 1.1175691778101418, |
| "grad_norm": 0.5474830269813538, |
| "learning_rate": 8.88135593220339e-06, |
| "loss": 0.0944, |
| "step": 1056 |
| }, |
| { |
| "epoch": 1.118628359592215, |
| "grad_norm": 0.3668607175350189, |
| "learning_rate": 8.880296610169492e-06, |
| "loss": 0.0949, |
| "step": 1057 |
| }, |
| { |
| "epoch": 1.1196875413742884, |
| "grad_norm": 0.2962619960308075, |
| "learning_rate": 8.879237288135593e-06, |
| "loss": 0.0961, |
| "step": 1058 |
| }, |
| { |
| "epoch": 1.1207467231563617, |
| "grad_norm": 0.306918740272522, |
| "learning_rate": 8.878177966101695e-06, |
| "loss": 0.0953, |
| "step": 1059 |
| }, |
| { |
| "epoch": 1.121805904938435, |
| "grad_norm": 0.4895283579826355, |
| "learning_rate": 8.877118644067798e-06, |
| "loss": 0.0922, |
| "step": 1060 |
| }, |
| { |
| "epoch": 1.1228650867205083, |
| "grad_norm": 0.3019005060195923, |
| "learning_rate": 8.8760593220339e-06, |
| "loss": 0.0903, |
| "step": 1061 |
| }, |
| { |
| "epoch": 1.1239242685025816, |
| "grad_norm": 0.5289521217346191, |
| "learning_rate": 8.875e-06, |
| "loss": 0.0942, |
| "step": 1062 |
| }, |
| { |
| "epoch": 1.1249834502846552, |
| "grad_norm": 0.2790992259979248, |
| "learning_rate": 8.873940677966102e-06, |
| "loss": 0.0961, |
| "step": 1063 |
| }, |
| { |
| "epoch": 1.1260426320667285, |
| "grad_norm": 0.6811845302581787, |
| "learning_rate": 8.872881355932203e-06, |
| "loss": 0.0973, |
| "step": 1064 |
| }, |
| { |
| "epoch": 1.1271018138488018, |
| "grad_norm": 0.4769812822341919, |
| "learning_rate": 8.871822033898307e-06, |
| "loss": 0.093, |
| "step": 1065 |
| }, |
| { |
| "epoch": 1.1281609956308751, |
| "grad_norm": 0.6008502244949341, |
| "learning_rate": 8.870762711864408e-06, |
| "loss": 0.0992, |
| "step": 1066 |
| }, |
| { |
| "epoch": 1.1292201774129484, |
| "grad_norm": 0.292278915643692, |
| "learning_rate": 8.86970338983051e-06, |
| "loss": 0.0953, |
| "step": 1067 |
| }, |
| { |
| "epoch": 1.1302793591950218, |
| "grad_norm": 0.34272485971450806, |
| "learning_rate": 8.86864406779661e-06, |
| "loss": 0.0909, |
| "step": 1068 |
| }, |
| { |
| "epoch": 1.131338540977095, |
| "grad_norm": 0.3241511583328247, |
| "learning_rate": 8.867584745762712e-06, |
| "loss": 0.0896, |
| "step": 1069 |
| }, |
| { |
| "epoch": 1.1323977227591686, |
| "grad_norm": 0.7826002836227417, |
| "learning_rate": 8.866525423728815e-06, |
| "loss": 0.0955, |
| "step": 1070 |
| }, |
| { |
| "epoch": 1.133456904541242, |
| "grad_norm": 0.7677907943725586, |
| "learning_rate": 8.865466101694917e-06, |
| "loss": 0.0938, |
| "step": 1071 |
| }, |
| { |
| "epoch": 1.1345160863233152, |
| "grad_norm": 0.30883848667144775, |
| "learning_rate": 8.864406779661018e-06, |
| "loss": 0.094, |
| "step": 1072 |
| }, |
| { |
| "epoch": 1.1355752681053886, |
| "grad_norm": 0.2945443093776703, |
| "learning_rate": 8.86334745762712e-06, |
| "loss": 0.0935, |
| "step": 1073 |
| }, |
| { |
| "epoch": 1.1366344498874619, |
| "grad_norm": 0.38556718826293945, |
| "learning_rate": 8.862288135593221e-06, |
| "loss": 0.0941, |
| "step": 1074 |
| }, |
| { |
| "epoch": 1.1376936316695352, |
| "grad_norm": 1.0006963014602661, |
| "learning_rate": 8.861228813559322e-06, |
| "loss": 0.0937, |
| "step": 1075 |
| }, |
| { |
| "epoch": 1.1387528134516087, |
| "grad_norm": 0.6400341391563416, |
| "learning_rate": 8.860169491525424e-06, |
| "loss": 0.0965, |
| "step": 1076 |
| }, |
| { |
| "epoch": 1.139811995233682, |
| "grad_norm": 0.33833760023117065, |
| "learning_rate": 8.859110169491527e-06, |
| "loss": 0.0917, |
| "step": 1077 |
| }, |
| { |
| "epoch": 1.1408711770157554, |
| "grad_norm": 0.34555482864379883, |
| "learning_rate": 8.858050847457628e-06, |
| "loss": 0.0941, |
| "step": 1078 |
| }, |
| { |
| "epoch": 1.1419303587978287, |
| "grad_norm": 0.2939344644546509, |
| "learning_rate": 8.85699152542373e-06, |
| "loss": 0.0917, |
| "step": 1079 |
| }, |
| { |
| "epoch": 1.142989540579902, |
| "grad_norm": 0.3510951101779938, |
| "learning_rate": 8.855932203389831e-06, |
| "loss": 0.0935, |
| "step": 1080 |
| }, |
| { |
| "epoch": 1.1440487223619753, |
| "grad_norm": 0.40609949827194214, |
| "learning_rate": 8.854872881355932e-06, |
| "loss": 0.0965, |
| "step": 1081 |
| }, |
| { |
| "epoch": 1.1451079041440488, |
| "grad_norm": 1.6574863195419312, |
| "learning_rate": 8.853813559322034e-06, |
| "loss": 0.0985, |
| "step": 1082 |
| }, |
| { |
| "epoch": 1.1461670859261222, |
| "grad_norm": 1.687386155128479, |
| "learning_rate": 8.852754237288135e-06, |
| "loss": 0.0942, |
| "step": 1083 |
| }, |
| { |
| "epoch": 1.1472262677081955, |
| "grad_norm": 0.9652918577194214, |
| "learning_rate": 8.851694915254237e-06, |
| "loss": 0.0969, |
| "step": 1084 |
| }, |
| { |
| "epoch": 1.1482854494902688, |
| "grad_norm": 0.447201669216156, |
| "learning_rate": 8.85063559322034e-06, |
| "loss": 0.0946, |
| "step": 1085 |
| }, |
| { |
| "epoch": 1.149344631272342, |
| "grad_norm": 0.2875814735889435, |
| "learning_rate": 8.849576271186441e-06, |
| "loss": 0.0944, |
| "step": 1086 |
| }, |
| { |
| "epoch": 1.1504038130544154, |
| "grad_norm": 0.3672226667404175, |
| "learning_rate": 8.848516949152544e-06, |
| "loss": 0.0967, |
| "step": 1087 |
| }, |
| { |
| "epoch": 1.1514629948364887, |
| "grad_norm": 0.8760420680046082, |
| "learning_rate": 8.847457627118646e-06, |
| "loss": 0.0937, |
| "step": 1088 |
| }, |
| { |
| "epoch": 1.1525221766185623, |
| "grad_norm": 0.4764424264431, |
| "learning_rate": 8.846398305084747e-06, |
| "loss": 0.0958, |
| "step": 1089 |
| }, |
| { |
| "epoch": 1.1535813584006356, |
| "grad_norm": 0.45019716024398804, |
| "learning_rate": 8.845338983050849e-06, |
| "loss": 0.097, |
| "step": 1090 |
| }, |
| { |
| "epoch": 1.154640540182709, |
| "grad_norm": 0.6289698481559753, |
| "learning_rate": 8.84427966101695e-06, |
| "loss": 0.0948, |
| "step": 1091 |
| }, |
| { |
| "epoch": 1.1556997219647822, |
| "grad_norm": 0.30257681012153625, |
| "learning_rate": 8.843220338983051e-06, |
| "loss": 0.0947, |
| "step": 1092 |
| }, |
| { |
| "epoch": 1.1567589037468555, |
| "grad_norm": 0.26092973351478577, |
| "learning_rate": 8.842161016949153e-06, |
| "loss": 0.0976, |
| "step": 1093 |
| }, |
| { |
| "epoch": 1.1578180855289288, |
| "grad_norm": 0.35534968972206116, |
| "learning_rate": 8.841101694915256e-06, |
| "loss": 0.0949, |
| "step": 1094 |
| }, |
| { |
| "epoch": 1.1588772673110022, |
| "grad_norm": 0.5100005269050598, |
| "learning_rate": 8.840042372881357e-06, |
| "loss": 0.0971, |
| "step": 1095 |
| }, |
| { |
| "epoch": 1.1599364490930757, |
| "grad_norm": 0.2607521712779999, |
| "learning_rate": 8.838983050847459e-06, |
| "loss": 0.0907, |
| "step": 1096 |
| }, |
| { |
| "epoch": 1.160995630875149, |
| "grad_norm": 0.33206820487976074, |
| "learning_rate": 8.83792372881356e-06, |
| "loss": 0.0914, |
| "step": 1097 |
| }, |
| { |
| "epoch": 1.1620548126572223, |
| "grad_norm": 0.39160943031311035, |
| "learning_rate": 8.836864406779662e-06, |
| "loss": 0.0953, |
| "step": 1098 |
| }, |
| { |
| "epoch": 1.1631139944392956, |
| "grad_norm": 0.29793456196784973, |
| "learning_rate": 8.835805084745763e-06, |
| "loss": 0.0876, |
| "step": 1099 |
| }, |
| { |
| "epoch": 1.164173176221369, |
| "grad_norm": 0.2818388044834137, |
| "learning_rate": 8.834745762711864e-06, |
| "loss": 0.0937, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.1652323580034423, |
| "grad_norm": 0.31199049949645996, |
| "learning_rate": 8.833686440677966e-06, |
| "loss": 0.0901, |
| "step": 1101 |
| }, |
| { |
| "epoch": 1.1662915397855156, |
| "grad_norm": 0.2392428070306778, |
| "learning_rate": 8.832627118644069e-06, |
| "loss": 0.0917, |
| "step": 1102 |
| }, |
| { |
| "epoch": 1.1673507215675891, |
| "grad_norm": 0.6399659514427185, |
| "learning_rate": 8.83156779661017e-06, |
| "loss": 0.0953, |
| "step": 1103 |
| }, |
| { |
| "epoch": 1.1684099033496624, |
| "grad_norm": 1.0445541143417358, |
| "learning_rate": 8.830508474576272e-06, |
| "loss": 0.0955, |
| "step": 1104 |
| }, |
| { |
| "epoch": 1.1694690851317358, |
| "grad_norm": 0.34966060519218445, |
| "learning_rate": 8.829449152542373e-06, |
| "loss": 0.0928, |
| "step": 1105 |
| }, |
| { |
| "epoch": 1.170528266913809, |
| "grad_norm": 0.3811902701854706, |
| "learning_rate": 8.828389830508475e-06, |
| "loss": 0.0926, |
| "step": 1106 |
| }, |
| { |
| "epoch": 1.1715874486958824, |
| "grad_norm": 0.2676902413368225, |
| "learning_rate": 8.827330508474578e-06, |
| "loss": 0.0915, |
| "step": 1107 |
| }, |
| { |
| "epoch": 1.1726466304779557, |
| "grad_norm": 0.2894545793533325, |
| "learning_rate": 8.826271186440679e-06, |
| "loss": 0.0924, |
| "step": 1108 |
| }, |
| { |
| "epoch": 1.173705812260029, |
| "grad_norm": 0.26010259985923767, |
| "learning_rate": 8.82521186440678e-06, |
| "loss": 0.0924, |
| "step": 1109 |
| }, |
| { |
| "epoch": 1.1747649940421026, |
| "grad_norm": 0.41155922412872314, |
| "learning_rate": 8.824152542372882e-06, |
| "loss": 0.0944, |
| "step": 1110 |
| }, |
| { |
| "epoch": 1.1758241758241759, |
| "grad_norm": 0.4124395549297333, |
| "learning_rate": 8.823093220338983e-06, |
| "loss": 0.0938, |
| "step": 1111 |
| }, |
| { |
| "epoch": 1.1768833576062492, |
| "grad_norm": 1.0543724298477173, |
| "learning_rate": 8.822033898305086e-06, |
| "loss": 0.0987, |
| "step": 1112 |
| }, |
| { |
| "epoch": 1.1779425393883225, |
| "grad_norm": 0.40370675921440125, |
| "learning_rate": 8.820974576271188e-06, |
| "loss": 0.0974, |
| "step": 1113 |
| }, |
| { |
| "epoch": 1.1790017211703958, |
| "grad_norm": 0.3587372303009033, |
| "learning_rate": 8.81991525423729e-06, |
| "loss": 0.0965, |
| "step": 1114 |
| }, |
| { |
| "epoch": 1.1800609029524691, |
| "grad_norm": 0.2818925082683563, |
| "learning_rate": 8.81885593220339e-06, |
| "loss": 0.0922, |
| "step": 1115 |
| }, |
| { |
| "epoch": 1.1811200847345424, |
| "grad_norm": 0.2897863984107971, |
| "learning_rate": 8.817796610169492e-06, |
| "loss": 0.0971, |
| "step": 1116 |
| }, |
| { |
| "epoch": 1.182179266516616, |
| "grad_norm": 0.6749652028083801, |
| "learning_rate": 8.816737288135593e-06, |
| "loss": 0.0962, |
| "step": 1117 |
| }, |
| { |
| "epoch": 1.1832384482986893, |
| "grad_norm": 0.2877092659473419, |
| "learning_rate": 8.815677966101695e-06, |
| "loss": 0.0965, |
| "step": 1118 |
| }, |
| { |
| "epoch": 1.1842976300807626, |
| "grad_norm": 0.2991100549697876, |
| "learning_rate": 8.814618644067798e-06, |
| "loss": 0.0974, |
| "step": 1119 |
| }, |
| { |
| "epoch": 1.185356811862836, |
| "grad_norm": 0.27069613337516785, |
| "learning_rate": 8.8135593220339e-06, |
| "loss": 0.0934, |
| "step": 1120 |
| }, |
| { |
| "epoch": 1.1864159936449092, |
| "grad_norm": 0.42415690422058105, |
| "learning_rate": 8.8125e-06, |
| "loss": 0.0931, |
| "step": 1121 |
| }, |
| { |
| "epoch": 1.1874751754269828, |
| "grad_norm": 0.4270313084125519, |
| "learning_rate": 8.811440677966102e-06, |
| "loss": 0.0906, |
| "step": 1122 |
| }, |
| { |
| "epoch": 1.188534357209056, |
| "grad_norm": 0.24667994678020477, |
| "learning_rate": 8.810381355932204e-06, |
| "loss": 0.0929, |
| "step": 1123 |
| }, |
| { |
| "epoch": 1.1895935389911294, |
| "grad_norm": 0.7036119103431702, |
| "learning_rate": 8.809322033898305e-06, |
| "loss": 0.0949, |
| "step": 1124 |
| }, |
| { |
| "epoch": 1.1906527207732027, |
| "grad_norm": 0.3452354371547699, |
| "learning_rate": 8.808262711864406e-06, |
| "loss": 0.0915, |
| "step": 1125 |
| }, |
| { |
| "epoch": 1.191711902555276, |
| "grad_norm": 0.48815253376960754, |
| "learning_rate": 8.80720338983051e-06, |
| "loss": 0.0941, |
| "step": 1126 |
| }, |
| { |
| "epoch": 1.1927710843373494, |
| "grad_norm": 0.33038586378097534, |
| "learning_rate": 8.806144067796611e-06, |
| "loss": 0.0915, |
| "step": 1127 |
| }, |
| { |
| "epoch": 1.1938302661194227, |
| "grad_norm": 0.30816060304641724, |
| "learning_rate": 8.805084745762712e-06, |
| "loss": 0.0921, |
| "step": 1128 |
| }, |
| { |
| "epoch": 1.1948894479014962, |
| "grad_norm": 0.2555977702140808, |
| "learning_rate": 8.804025423728815e-06, |
| "loss": 0.0917, |
| "step": 1129 |
| }, |
| { |
| "epoch": 1.1959486296835695, |
| "grad_norm": 0.2907012403011322, |
| "learning_rate": 8.802966101694917e-06, |
| "loss": 0.0922, |
| "step": 1130 |
| }, |
| { |
| "epoch": 1.1970078114656428, |
| "grad_norm": 0.6297186613082886, |
| "learning_rate": 8.801906779661018e-06, |
| "loss": 0.0919, |
| "step": 1131 |
| }, |
| { |
| "epoch": 1.1980669932477161, |
| "grad_norm": 0.650320291519165, |
| "learning_rate": 8.80084745762712e-06, |
| "loss": 0.0908, |
| "step": 1132 |
| }, |
| { |
| "epoch": 1.1991261750297895, |
| "grad_norm": 0.5117915272712708, |
| "learning_rate": 8.799788135593221e-06, |
| "loss": 0.0937, |
| "step": 1133 |
| }, |
| { |
| "epoch": 1.2001853568118628, |
| "grad_norm": 0.2948932647705078, |
| "learning_rate": 8.798728813559322e-06, |
| "loss": 0.0914, |
| "step": 1134 |
| }, |
| { |
| "epoch": 1.201244538593936, |
| "grad_norm": 0.33919399976730347, |
| "learning_rate": 8.797669491525424e-06, |
| "loss": 0.0917, |
| "step": 1135 |
| }, |
| { |
| "epoch": 1.2023037203760096, |
| "grad_norm": 0.3182590901851654, |
| "learning_rate": 8.796610169491527e-06, |
| "loss": 0.0922, |
| "step": 1136 |
| }, |
| { |
| "epoch": 1.203362902158083, |
| "grad_norm": 0.26661911606788635, |
| "learning_rate": 8.795550847457628e-06, |
| "loss": 0.093, |
| "step": 1137 |
| }, |
| { |
| "epoch": 1.2044220839401563, |
| "grad_norm": 0.26091158390045166, |
| "learning_rate": 8.79449152542373e-06, |
| "loss": 0.0901, |
| "step": 1138 |
| }, |
| { |
| "epoch": 1.2054812657222296, |
| "grad_norm": 0.3877406716346741, |
| "learning_rate": 8.793432203389831e-06, |
| "loss": 0.0892, |
| "step": 1139 |
| }, |
| { |
| "epoch": 1.206540447504303, |
| "grad_norm": 0.8551795482635498, |
| "learning_rate": 8.792372881355933e-06, |
| "loss": 0.0949, |
| "step": 1140 |
| }, |
| { |
| "epoch": 1.2075996292863762, |
| "grad_norm": 0.5187364816665649, |
| "learning_rate": 8.791313559322034e-06, |
| "loss": 0.09, |
| "step": 1141 |
| }, |
| { |
| "epoch": 1.2086588110684495, |
| "grad_norm": 0.4340435266494751, |
| "learning_rate": 8.790254237288135e-06, |
| "loss": 0.09, |
| "step": 1142 |
| }, |
| { |
| "epoch": 1.209717992850523, |
| "grad_norm": 0.2178150713443756, |
| "learning_rate": 8.789194915254239e-06, |
| "loss": 0.0935, |
| "step": 1143 |
| }, |
| { |
| "epoch": 1.2107771746325964, |
| "grad_norm": 0.34359389543533325, |
| "learning_rate": 8.78813559322034e-06, |
| "loss": 0.0906, |
| "step": 1144 |
| }, |
| { |
| "epoch": 1.2118363564146697, |
| "grad_norm": 0.2701970636844635, |
| "learning_rate": 8.787076271186441e-06, |
| "loss": 0.0934, |
| "step": 1145 |
| }, |
| { |
| "epoch": 1.212895538196743, |
| "grad_norm": 1.226974368095398, |
| "learning_rate": 8.786016949152543e-06, |
| "loss": 0.0951, |
| "step": 1146 |
| }, |
| { |
| "epoch": 1.2139547199788163, |
| "grad_norm": 0.2872730493545532, |
| "learning_rate": 8.784957627118644e-06, |
| "loss": 0.0912, |
| "step": 1147 |
| }, |
| { |
| "epoch": 1.2150139017608896, |
| "grad_norm": 0.31621283292770386, |
| "learning_rate": 8.783898305084746e-06, |
| "loss": 0.0874, |
| "step": 1148 |
| }, |
| { |
| "epoch": 1.216073083542963, |
| "grad_norm": 0.8155948519706726, |
| "learning_rate": 8.782838983050849e-06, |
| "loss": 0.0917, |
| "step": 1149 |
| }, |
| { |
| "epoch": 1.2171322653250365, |
| "grad_norm": 0.29345694184303284, |
| "learning_rate": 8.78177966101695e-06, |
| "loss": 0.0909, |
| "step": 1150 |
| }, |
| { |
| "epoch": 1.2181914471071098, |
| "grad_norm": 0.2968461811542511, |
| "learning_rate": 8.780720338983052e-06, |
| "loss": 0.0959, |
| "step": 1151 |
| }, |
| { |
| "epoch": 1.2192506288891831, |
| "grad_norm": 0.611682116985321, |
| "learning_rate": 8.779661016949153e-06, |
| "loss": 0.0886, |
| "step": 1152 |
| }, |
| { |
| "epoch": 1.2203098106712564, |
| "grad_norm": 0.6184428334236145, |
| "learning_rate": 8.778601694915256e-06, |
| "loss": 0.0901, |
| "step": 1153 |
| }, |
| { |
| "epoch": 1.2213689924533297, |
| "grad_norm": 0.496878057718277, |
| "learning_rate": 8.777542372881357e-06, |
| "loss": 0.0943, |
| "step": 1154 |
| }, |
| { |
| "epoch": 1.222428174235403, |
| "grad_norm": 0.30651700496673584, |
| "learning_rate": 8.776483050847459e-06, |
| "loss": 0.0912, |
| "step": 1155 |
| }, |
| { |
| "epoch": 1.2234873560174764, |
| "grad_norm": 0.3181779384613037, |
| "learning_rate": 8.77542372881356e-06, |
| "loss": 0.0911, |
| "step": 1156 |
| }, |
| { |
| "epoch": 1.22454653779955, |
| "grad_norm": 0.26597824692726135, |
| "learning_rate": 8.774364406779662e-06, |
| "loss": 0.0919, |
| "step": 1157 |
| }, |
| { |
| "epoch": 1.2256057195816232, |
| "grad_norm": 0.35603615641593933, |
| "learning_rate": 8.773305084745763e-06, |
| "loss": 0.092, |
| "step": 1158 |
| }, |
| { |
| "epoch": 1.2266649013636965, |
| "grad_norm": 0.28663283586502075, |
| "learning_rate": 8.772245762711865e-06, |
| "loss": 0.0893, |
| "step": 1159 |
| }, |
| { |
| "epoch": 1.2277240831457699, |
| "grad_norm": 0.3351318836212158, |
| "learning_rate": 8.771186440677966e-06, |
| "loss": 0.0897, |
| "step": 1160 |
| }, |
| { |
| "epoch": 1.2287832649278432, |
| "grad_norm": 0.3456285297870636, |
| "learning_rate": 8.770127118644069e-06, |
| "loss": 0.0938, |
| "step": 1161 |
| }, |
| { |
| "epoch": 1.2298424467099165, |
| "grad_norm": 0.9174132347106934, |
| "learning_rate": 8.76906779661017e-06, |
| "loss": 0.0882, |
| "step": 1162 |
| }, |
| { |
| "epoch": 1.23090162849199, |
| "grad_norm": 0.3719140589237213, |
| "learning_rate": 8.768008474576272e-06, |
| "loss": 0.092, |
| "step": 1163 |
| }, |
| { |
| "epoch": 1.2319608102740633, |
| "grad_norm": 0.26492729783058167, |
| "learning_rate": 8.766949152542373e-06, |
| "loss": 0.0891, |
| "step": 1164 |
| }, |
| { |
| "epoch": 1.2330199920561367, |
| "grad_norm": 0.3745848834514618, |
| "learning_rate": 8.765889830508475e-06, |
| "loss": 0.0919, |
| "step": 1165 |
| }, |
| { |
| "epoch": 1.23407917383821, |
| "grad_norm": 0.2910005450248718, |
| "learning_rate": 8.764830508474576e-06, |
| "loss": 0.0917, |
| "step": 1166 |
| }, |
| { |
| "epoch": 1.2351383556202833, |
| "grad_norm": 0.27463725209236145, |
| "learning_rate": 8.763771186440678e-06, |
| "loss": 0.0905, |
| "step": 1167 |
| }, |
| { |
| "epoch": 1.2361975374023566, |
| "grad_norm": 0.3829032778739929, |
| "learning_rate": 8.76271186440678e-06, |
| "loss": 0.086, |
| "step": 1168 |
| }, |
| { |
| "epoch": 1.2372567191844301, |
| "grad_norm": 0.3017561137676239, |
| "learning_rate": 8.761652542372882e-06, |
| "loss": 0.0905, |
| "step": 1169 |
| }, |
| { |
| "epoch": 1.2383159009665035, |
| "grad_norm": 0.299236923456192, |
| "learning_rate": 8.760593220338985e-06, |
| "loss": 0.0913, |
| "step": 1170 |
| }, |
| { |
| "epoch": 1.2393750827485768, |
| "grad_norm": 1.07209312915802, |
| "learning_rate": 8.759533898305087e-06, |
| "loss": 0.0902, |
| "step": 1171 |
| }, |
| { |
| "epoch": 1.24043426453065, |
| "grad_norm": 0.7281382083892822, |
| "learning_rate": 8.758474576271188e-06, |
| "loss": 0.0941, |
| "step": 1172 |
| }, |
| { |
| "epoch": 1.2414934463127234, |
| "grad_norm": 0.37550926208496094, |
| "learning_rate": 8.75741525423729e-06, |
| "loss": 0.0899, |
| "step": 1173 |
| }, |
| { |
| "epoch": 1.2425526280947967, |
| "grad_norm": 0.3048110902309418, |
| "learning_rate": 8.75635593220339e-06, |
| "loss": 0.0917, |
| "step": 1174 |
| }, |
| { |
| "epoch": 1.24361180987687, |
| "grad_norm": 1.2708523273468018, |
| "learning_rate": 8.755296610169492e-06, |
| "loss": 0.0927, |
| "step": 1175 |
| }, |
| { |
| "epoch": 1.2446709916589436, |
| "grad_norm": 0.30342382192611694, |
| "learning_rate": 8.754237288135594e-06, |
| "loss": 0.0954, |
| "step": 1176 |
| }, |
| { |
| "epoch": 1.2457301734410169, |
| "grad_norm": 0.258834570646286, |
| "learning_rate": 8.753177966101695e-06, |
| "loss": 0.0868, |
| "step": 1177 |
| }, |
| { |
| "epoch": 1.2467893552230902, |
| "grad_norm": 0.3479873836040497, |
| "learning_rate": 8.752118644067798e-06, |
| "loss": 0.087, |
| "step": 1178 |
| }, |
| { |
| "epoch": 1.2478485370051635, |
| "grad_norm": 0.6660997867584229, |
| "learning_rate": 8.7510593220339e-06, |
| "loss": 0.093, |
| "step": 1179 |
| }, |
| { |
| "epoch": 1.2489077187872368, |
| "grad_norm": 0.5038926601409912, |
| "learning_rate": 8.750000000000001e-06, |
| "loss": 0.0898, |
| "step": 1180 |
| }, |
| { |
| "epoch": 1.2499669005693101, |
| "grad_norm": 0.49546748399734497, |
| "learning_rate": 8.748940677966102e-06, |
| "loss": 0.0973, |
| "step": 1181 |
| }, |
| { |
| "epoch": 1.2510260823513835, |
| "grad_norm": 0.7215166091918945, |
| "learning_rate": 8.747881355932204e-06, |
| "loss": 0.0882, |
| "step": 1182 |
| }, |
| { |
| "epoch": 1.252085264133457, |
| "grad_norm": 0.5412211418151855, |
| "learning_rate": 8.746822033898305e-06, |
| "loss": 0.0898, |
| "step": 1183 |
| }, |
| { |
| "epoch": 1.2531444459155303, |
| "grad_norm": 0.26789650321006775, |
| "learning_rate": 8.745762711864407e-06, |
| "loss": 0.0872, |
| "step": 1184 |
| }, |
| { |
| "epoch": 1.2542036276976036, |
| "grad_norm": 0.2784571349620819, |
| "learning_rate": 8.74470338983051e-06, |
| "loss": 0.0919, |
| "step": 1185 |
| }, |
| { |
| "epoch": 1.255262809479677, |
| "grad_norm": 0.28960517048835754, |
| "learning_rate": 8.743644067796611e-06, |
| "loss": 0.0932, |
| "step": 1186 |
| }, |
| { |
| "epoch": 1.2563219912617503, |
| "grad_norm": 0.4026484489440918, |
| "learning_rate": 8.742584745762712e-06, |
| "loss": 0.0916, |
| "step": 1187 |
| }, |
| { |
| "epoch": 1.2573811730438236, |
| "grad_norm": 0.4276771545410156, |
| "learning_rate": 8.741525423728814e-06, |
| "loss": 0.0933, |
| "step": 1188 |
| }, |
| { |
| "epoch": 1.258440354825897, |
| "grad_norm": 0.34631893038749695, |
| "learning_rate": 8.740466101694915e-06, |
| "loss": 0.0868, |
| "step": 1189 |
| }, |
| { |
| "epoch": 1.2594995366079704, |
| "grad_norm": 0.43336084485054016, |
| "learning_rate": 8.739406779661017e-06, |
| "loss": 0.0906, |
| "step": 1190 |
| }, |
| { |
| "epoch": 1.2605587183900437, |
| "grad_norm": 0.6585732698440552, |
| "learning_rate": 8.73834745762712e-06, |
| "loss": 0.0932, |
| "step": 1191 |
| }, |
| { |
| "epoch": 1.261617900172117, |
| "grad_norm": 0.2695441246032715, |
| "learning_rate": 8.737288135593221e-06, |
| "loss": 0.0918, |
| "step": 1192 |
| }, |
| { |
| "epoch": 1.2626770819541904, |
| "grad_norm": 0.30017632246017456, |
| "learning_rate": 8.736228813559323e-06, |
| "loss": 0.0886, |
| "step": 1193 |
| }, |
| { |
| "epoch": 1.2637362637362637, |
| "grad_norm": 0.2944658100605011, |
| "learning_rate": 8.735169491525424e-06, |
| "loss": 0.0861, |
| "step": 1194 |
| }, |
| { |
| "epoch": 1.264795445518337, |
| "grad_norm": 0.37323957681655884, |
| "learning_rate": 8.734110169491527e-06, |
| "loss": 0.0885, |
| "step": 1195 |
| }, |
| { |
| "epoch": 1.2658546273004103, |
| "grad_norm": 0.301430881023407, |
| "learning_rate": 8.733050847457629e-06, |
| "loss": 0.0899, |
| "step": 1196 |
| }, |
| { |
| "epoch": 1.2669138090824839, |
| "grad_norm": 0.7957718372344971, |
| "learning_rate": 8.73199152542373e-06, |
| "loss": 0.0906, |
| "step": 1197 |
| }, |
| { |
| "epoch": 1.2679729908645572, |
| "grad_norm": 0.39433908462524414, |
| "learning_rate": 8.730932203389831e-06, |
| "loss": 0.0934, |
| "step": 1198 |
| }, |
| { |
| "epoch": 1.2690321726466305, |
| "grad_norm": 0.6956431269645691, |
| "learning_rate": 8.729872881355933e-06, |
| "loss": 0.0907, |
| "step": 1199 |
| }, |
| { |
| "epoch": 1.2700913544287038, |
| "grad_norm": 0.6170002222061157, |
| "learning_rate": 8.728813559322034e-06, |
| "loss": 0.0884, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.2711505362107771, |
| "grad_norm": 0.5037360787391663, |
| "learning_rate": 8.727754237288136e-06, |
| "loss": 0.0919, |
| "step": 1201 |
| }, |
| { |
| "epoch": 1.2722097179928507, |
| "grad_norm": 0.5087904930114746, |
| "learning_rate": 8.726694915254239e-06, |
| "loss": 0.0908, |
| "step": 1202 |
| }, |
| { |
| "epoch": 1.2732688997749237, |
| "grad_norm": 0.3599529266357422, |
| "learning_rate": 8.72563559322034e-06, |
| "loss": 0.0907, |
| "step": 1203 |
| }, |
| { |
| "epoch": 1.2743280815569973, |
| "grad_norm": 0.36797240376472473, |
| "learning_rate": 8.724576271186442e-06, |
| "loss": 0.0913, |
| "step": 1204 |
| }, |
| { |
| "epoch": 1.2753872633390706, |
| "grad_norm": 0.46812427043914795, |
| "learning_rate": 8.723516949152543e-06, |
| "loss": 0.0884, |
| "step": 1205 |
| }, |
| { |
| "epoch": 1.276446445121144, |
| "grad_norm": 0.2462358921766281, |
| "learning_rate": 8.722457627118644e-06, |
| "loss": 0.0958, |
| "step": 1206 |
| }, |
| { |
| "epoch": 1.2775056269032172, |
| "grad_norm": 0.6306796669960022, |
| "learning_rate": 8.721398305084746e-06, |
| "loss": 0.0945, |
| "step": 1207 |
| }, |
| { |
| "epoch": 1.2785648086852905, |
| "grad_norm": 0.40346699953079224, |
| "learning_rate": 8.720338983050847e-06, |
| "loss": 0.0862, |
| "step": 1208 |
| }, |
| { |
| "epoch": 1.279623990467364, |
| "grad_norm": 0.2961828410625458, |
| "learning_rate": 8.719279661016949e-06, |
| "loss": 0.0896, |
| "step": 1209 |
| }, |
| { |
| "epoch": 1.2806831722494372, |
| "grad_norm": 0.3986334502696991, |
| "learning_rate": 8.718220338983052e-06, |
| "loss": 0.0923, |
| "step": 1210 |
| }, |
| { |
| "epoch": 1.2817423540315107, |
| "grad_norm": 0.411968857049942, |
| "learning_rate": 8.717161016949153e-06, |
| "loss": 0.0914, |
| "step": 1211 |
| }, |
| { |
| "epoch": 1.282801535813584, |
| "grad_norm": 0.321750670671463, |
| "learning_rate": 8.716101694915256e-06, |
| "loss": 0.0963, |
| "step": 1212 |
| }, |
| { |
| "epoch": 1.2838607175956573, |
| "grad_norm": 0.3059873878955841, |
| "learning_rate": 8.715042372881358e-06, |
| "loss": 0.0939, |
| "step": 1213 |
| }, |
| { |
| "epoch": 1.2849198993777307, |
| "grad_norm": 0.25494441390037537, |
| "learning_rate": 8.713983050847459e-06, |
| "loss": 0.0891, |
| "step": 1214 |
| }, |
| { |
| "epoch": 1.285979081159804, |
| "grad_norm": 0.6753748059272766, |
| "learning_rate": 8.71292372881356e-06, |
| "loss": 0.0892, |
| "step": 1215 |
| }, |
| { |
| "epoch": 1.2870382629418775, |
| "grad_norm": 0.5489036440849304, |
| "learning_rate": 8.711864406779662e-06, |
| "loss": 0.0901, |
| "step": 1216 |
| }, |
| { |
| "epoch": 1.2880974447239508, |
| "grad_norm": 0.4101668894290924, |
| "learning_rate": 8.710805084745763e-06, |
| "loss": 0.0864, |
| "step": 1217 |
| }, |
| { |
| "epoch": 1.2891566265060241, |
| "grad_norm": 0.516925036907196, |
| "learning_rate": 8.709745762711865e-06, |
| "loss": 0.0876, |
| "step": 1218 |
| }, |
| { |
| "epoch": 1.2902158082880975, |
| "grad_norm": 0.8851956725120544, |
| "learning_rate": 8.708686440677968e-06, |
| "loss": 0.0911, |
| "step": 1219 |
| }, |
| { |
| "epoch": 1.2912749900701708, |
| "grad_norm": 0.2738077640533447, |
| "learning_rate": 8.70762711864407e-06, |
| "loss": 0.092, |
| "step": 1220 |
| }, |
| { |
| "epoch": 1.292334171852244, |
| "grad_norm": 0.34898704290390015, |
| "learning_rate": 8.70656779661017e-06, |
| "loss": 0.0917, |
| "step": 1221 |
| }, |
| { |
| "epoch": 1.2933933536343174, |
| "grad_norm": 0.35455602407455444, |
| "learning_rate": 8.705508474576272e-06, |
| "loss": 0.0906, |
| "step": 1222 |
| }, |
| { |
| "epoch": 1.294452535416391, |
| "grad_norm": 0.2889910340309143, |
| "learning_rate": 8.704449152542373e-06, |
| "loss": 0.0898, |
| "step": 1223 |
| }, |
| { |
| "epoch": 1.2955117171984643, |
| "grad_norm": 0.7005313634872437, |
| "learning_rate": 8.703389830508475e-06, |
| "loss": 0.0914, |
| "step": 1224 |
| }, |
| { |
| "epoch": 1.2965708989805376, |
| "grad_norm": 0.8933342695236206, |
| "learning_rate": 8.702330508474576e-06, |
| "loss": 0.0908, |
| "step": 1225 |
| }, |
| { |
| "epoch": 1.2976300807626109, |
| "grad_norm": 0.34126585721969604, |
| "learning_rate": 8.701271186440678e-06, |
| "loss": 0.0944, |
| "step": 1226 |
| }, |
| { |
| "epoch": 1.2986892625446842, |
| "grad_norm": 0.2783612906932831, |
| "learning_rate": 8.70021186440678e-06, |
| "loss": 0.0918, |
| "step": 1227 |
| }, |
| { |
| "epoch": 1.2997484443267575, |
| "grad_norm": 1.59508216381073, |
| "learning_rate": 8.699152542372882e-06, |
| "loss": 0.0938, |
| "step": 1228 |
| }, |
| { |
| "epoch": 1.3008076261088308, |
| "grad_norm": 0.2347707450389862, |
| "learning_rate": 8.698093220338984e-06, |
| "loss": 0.0896, |
| "step": 1229 |
| }, |
| { |
| "epoch": 1.3018668078909044, |
| "grad_norm": 0.5518621206283569, |
| "learning_rate": 8.697033898305085e-06, |
| "loss": 0.0963, |
| "step": 1230 |
| }, |
| { |
| "epoch": 1.3029259896729777, |
| "grad_norm": 0.2808842957019806, |
| "learning_rate": 8.695974576271186e-06, |
| "loss": 0.0919, |
| "step": 1231 |
| }, |
| { |
| "epoch": 1.303985171455051, |
| "grad_norm": 0.2591249942779541, |
| "learning_rate": 8.694915254237288e-06, |
| "loss": 0.0931, |
| "step": 1232 |
| }, |
| { |
| "epoch": 1.3050443532371243, |
| "grad_norm": 0.5613085627555847, |
| "learning_rate": 8.693855932203391e-06, |
| "loss": 0.0915, |
| "step": 1233 |
| }, |
| { |
| "epoch": 1.3061035350191976, |
| "grad_norm": 1.0077388286590576, |
| "learning_rate": 8.692796610169492e-06, |
| "loss": 0.0926, |
| "step": 1234 |
| }, |
| { |
| "epoch": 1.307162716801271, |
| "grad_norm": 0.42165160179138184, |
| "learning_rate": 8.691737288135594e-06, |
| "loss": 0.0892, |
| "step": 1235 |
| }, |
| { |
| "epoch": 1.3082218985833443, |
| "grad_norm": 0.4207666218280792, |
| "learning_rate": 8.690677966101695e-06, |
| "loss": 0.0959, |
| "step": 1236 |
| }, |
| { |
| "epoch": 1.3092810803654178, |
| "grad_norm": 0.37785521149635315, |
| "learning_rate": 8.689618644067798e-06, |
| "loss": 0.0936, |
| "step": 1237 |
| }, |
| { |
| "epoch": 1.310340262147491, |
| "grad_norm": 0.35617318749427795, |
| "learning_rate": 8.6885593220339e-06, |
| "loss": 0.0888, |
| "step": 1238 |
| }, |
| { |
| "epoch": 1.3113994439295644, |
| "grad_norm": 0.7670419216156006, |
| "learning_rate": 8.687500000000001e-06, |
| "loss": 0.0896, |
| "step": 1239 |
| }, |
| { |
| "epoch": 1.3124586257116377, |
| "grad_norm": 0.2375904768705368, |
| "learning_rate": 8.686440677966103e-06, |
| "loss": 0.089, |
| "step": 1240 |
| }, |
| { |
| "epoch": 1.313517807493711, |
| "grad_norm": 0.43427926301956177, |
| "learning_rate": 8.685381355932204e-06, |
| "loss": 0.0901, |
| "step": 1241 |
| }, |
| { |
| "epoch": 1.3145769892757846, |
| "grad_norm": 0.74751877784729, |
| "learning_rate": 8.684322033898305e-06, |
| "loss": 0.0934, |
| "step": 1242 |
| }, |
| { |
| "epoch": 1.3156361710578577, |
| "grad_norm": 0.2564150094985962, |
| "learning_rate": 8.683262711864407e-06, |
| "loss": 0.0908, |
| "step": 1243 |
| }, |
| { |
| "epoch": 1.3166953528399312, |
| "grad_norm": 0.4206532835960388, |
| "learning_rate": 8.68220338983051e-06, |
| "loss": 0.0905, |
| "step": 1244 |
| }, |
| { |
| "epoch": 1.3177545346220045, |
| "grad_norm": 0.6286159753799438, |
| "learning_rate": 8.681144067796611e-06, |
| "loss": 0.0939, |
| "step": 1245 |
| }, |
| { |
| "epoch": 1.3188137164040779, |
| "grad_norm": 1.2643135786056519, |
| "learning_rate": 8.680084745762713e-06, |
| "loss": 0.0929, |
| "step": 1246 |
| }, |
| { |
| "epoch": 1.3198728981861512, |
| "grad_norm": 0.7013075351715088, |
| "learning_rate": 8.679025423728814e-06, |
| "loss": 0.0867, |
| "step": 1247 |
| }, |
| { |
| "epoch": 1.3209320799682245, |
| "grad_norm": 0.5261450409889221, |
| "learning_rate": 8.677966101694915e-06, |
| "loss": 0.0895, |
| "step": 1248 |
| }, |
| { |
| "epoch": 1.321991261750298, |
| "grad_norm": 0.32555660605430603, |
| "learning_rate": 8.676906779661017e-06, |
| "loss": 0.0921, |
| "step": 1249 |
| }, |
| { |
| "epoch": 1.3230504435323711, |
| "grad_norm": 0.33761289715766907, |
| "learning_rate": 8.675847457627118e-06, |
| "loss": 0.0913, |
| "step": 1250 |
| }, |
| { |
| "epoch": 1.3241096253144446, |
| "grad_norm": 0.40158653259277344, |
| "learning_rate": 8.674788135593221e-06, |
| "loss": 0.0959, |
| "step": 1251 |
| }, |
| { |
| "epoch": 1.325168807096518, |
| "grad_norm": 0.3522323966026306, |
| "learning_rate": 8.673728813559323e-06, |
| "loss": 0.0907, |
| "step": 1252 |
| }, |
| { |
| "epoch": 1.3262279888785913, |
| "grad_norm": 0.5156732201576233, |
| "learning_rate": 8.672669491525424e-06, |
| "loss": 0.0921, |
| "step": 1253 |
| }, |
| { |
| "epoch": 1.3272871706606646, |
| "grad_norm": 1.196157455444336, |
| "learning_rate": 8.671610169491527e-06, |
| "loss": 0.091, |
| "step": 1254 |
| }, |
| { |
| "epoch": 1.328346352442738, |
| "grad_norm": 0.3785947263240814, |
| "learning_rate": 8.670550847457629e-06, |
| "loss": 0.0909, |
| "step": 1255 |
| }, |
| { |
| "epoch": 1.3294055342248114, |
| "grad_norm": 0.41375061869621277, |
| "learning_rate": 8.66949152542373e-06, |
| "loss": 0.093, |
| "step": 1256 |
| }, |
| { |
| "epoch": 1.3304647160068848, |
| "grad_norm": 0.46027684211730957, |
| "learning_rate": 8.668432203389832e-06, |
| "loss": 0.0888, |
| "step": 1257 |
| }, |
| { |
| "epoch": 1.331523897788958, |
| "grad_norm": 0.29217252135276794, |
| "learning_rate": 8.667372881355933e-06, |
| "loss": 0.0933, |
| "step": 1258 |
| }, |
| { |
| "epoch": 1.3325830795710314, |
| "grad_norm": 0.3266601264476776, |
| "learning_rate": 8.666313559322034e-06, |
| "loss": 0.0938, |
| "step": 1259 |
| }, |
| { |
| "epoch": 1.3336422613531047, |
| "grad_norm": 0.22877094149589539, |
| "learning_rate": 8.665254237288136e-06, |
| "loss": 0.0907, |
| "step": 1260 |
| }, |
| { |
| "epoch": 1.334701443135178, |
| "grad_norm": 1.2058866024017334, |
| "learning_rate": 8.664194915254239e-06, |
| "loss": 0.0923, |
| "step": 1261 |
| }, |
| { |
| "epoch": 1.3357606249172513, |
| "grad_norm": 0.33053499460220337, |
| "learning_rate": 8.66313559322034e-06, |
| "loss": 0.0941, |
| "step": 1262 |
| }, |
| { |
| "epoch": 1.3368198066993249, |
| "grad_norm": 0.2770586609840393, |
| "learning_rate": 8.662076271186442e-06, |
| "loss": 0.0933, |
| "step": 1263 |
| }, |
| { |
| "epoch": 1.3378789884813982, |
| "grad_norm": 0.4117499887943268, |
| "learning_rate": 8.661016949152543e-06, |
| "loss": 0.094, |
| "step": 1264 |
| }, |
| { |
| "epoch": 1.3389381702634715, |
| "grad_norm": 0.4217805862426758, |
| "learning_rate": 8.659957627118645e-06, |
| "loss": 0.0926, |
| "step": 1265 |
| }, |
| { |
| "epoch": 1.3399973520455448, |
| "grad_norm": 0.4632180631160736, |
| "learning_rate": 8.658898305084746e-06, |
| "loss": 0.0917, |
| "step": 1266 |
| }, |
| { |
| "epoch": 1.3410565338276181, |
| "grad_norm": 0.279499351978302, |
| "learning_rate": 8.657838983050847e-06, |
| "loss": 0.0909, |
| "step": 1267 |
| }, |
| { |
| "epoch": 1.3421157156096915, |
| "grad_norm": 0.4228940010070801, |
| "learning_rate": 8.65677966101695e-06, |
| "loss": 0.0906, |
| "step": 1268 |
| }, |
| { |
| "epoch": 1.3431748973917648, |
| "grad_norm": 0.2591783106327057, |
| "learning_rate": 8.655720338983052e-06, |
| "loss": 0.0906, |
| "step": 1269 |
| }, |
| { |
| "epoch": 1.3442340791738383, |
| "grad_norm": 0.4122743606567383, |
| "learning_rate": 8.654661016949153e-06, |
| "loss": 0.0876, |
| "step": 1270 |
| }, |
| { |
| "epoch": 1.3452932609559116, |
| "grad_norm": 0.2966249883174896, |
| "learning_rate": 8.653601694915255e-06, |
| "loss": 0.0897, |
| "step": 1271 |
| }, |
| { |
| "epoch": 1.346352442737985, |
| "grad_norm": 0.3015781044960022, |
| "learning_rate": 8.652542372881356e-06, |
| "loss": 0.0945, |
| "step": 1272 |
| }, |
| { |
| "epoch": 1.3474116245200582, |
| "grad_norm": 0.38431188464164734, |
| "learning_rate": 8.651483050847458e-06, |
| "loss": 0.0922, |
| "step": 1273 |
| }, |
| { |
| "epoch": 1.3484708063021316, |
| "grad_norm": 0.9707023501396179, |
| "learning_rate": 8.650423728813559e-06, |
| "loss": 0.0921, |
| "step": 1274 |
| }, |
| { |
| "epoch": 1.3495299880842049, |
| "grad_norm": 0.5032562613487244, |
| "learning_rate": 8.649364406779662e-06, |
| "loss": 0.0927, |
| "step": 1275 |
| }, |
| { |
| "epoch": 1.3505891698662782, |
| "grad_norm": 0.2696177363395691, |
| "learning_rate": 8.648305084745763e-06, |
| "loss": 0.0909, |
| "step": 1276 |
| }, |
| { |
| "epoch": 1.3516483516483517, |
| "grad_norm": 0.4014403522014618, |
| "learning_rate": 8.647245762711865e-06, |
| "loss": 0.0889, |
| "step": 1277 |
| }, |
| { |
| "epoch": 1.352707533430425, |
| "grad_norm": 0.2904941439628601, |
| "learning_rate": 8.646186440677968e-06, |
| "loss": 0.0912, |
| "step": 1278 |
| }, |
| { |
| "epoch": 1.3537667152124984, |
| "grad_norm": 0.3719266951084137, |
| "learning_rate": 8.64512711864407e-06, |
| "loss": 0.093, |
| "step": 1279 |
| }, |
| { |
| "epoch": 1.3548258969945717, |
| "grad_norm": 0.917984664440155, |
| "learning_rate": 8.64406779661017e-06, |
| "loss": 0.0917, |
| "step": 1280 |
| }, |
| { |
| "epoch": 1.355885078776645, |
| "grad_norm": 0.8555766344070435, |
| "learning_rate": 8.643008474576272e-06, |
| "loss": 0.0915, |
| "step": 1281 |
| }, |
| { |
| "epoch": 1.3569442605587183, |
| "grad_norm": 0.2927437722682953, |
| "learning_rate": 8.641949152542374e-06, |
| "loss": 0.0876, |
| "step": 1282 |
| }, |
| { |
| "epoch": 1.3580034423407916, |
| "grad_norm": 1.2234313488006592, |
| "learning_rate": 8.640889830508475e-06, |
| "loss": 0.09, |
| "step": 1283 |
| }, |
| { |
| "epoch": 1.3590626241228652, |
| "grad_norm": 0.2912628948688507, |
| "learning_rate": 8.639830508474576e-06, |
| "loss": 0.0923, |
| "step": 1284 |
| }, |
| { |
| "epoch": 1.3601218059049385, |
| "grad_norm": 0.2900404930114746, |
| "learning_rate": 8.638771186440678e-06, |
| "loss": 0.0911, |
| "step": 1285 |
| }, |
| { |
| "epoch": 1.3611809876870118, |
| "grad_norm": 0.2967686653137207, |
| "learning_rate": 8.637711864406781e-06, |
| "loss": 0.0958, |
| "step": 1286 |
| }, |
| { |
| "epoch": 1.362240169469085, |
| "grad_norm": 0.3014371991157532, |
| "learning_rate": 8.636652542372882e-06, |
| "loss": 0.0931, |
| "step": 1287 |
| }, |
| { |
| "epoch": 1.3632993512511584, |
| "grad_norm": 0.4530743360519409, |
| "learning_rate": 8.635593220338984e-06, |
| "loss": 0.0911, |
| "step": 1288 |
| }, |
| { |
| "epoch": 1.364358533033232, |
| "grad_norm": 0.6225001811981201, |
| "learning_rate": 8.634533898305085e-06, |
| "loss": 0.089, |
| "step": 1289 |
| }, |
| { |
| "epoch": 1.365417714815305, |
| "grad_norm": 0.5223209261894226, |
| "learning_rate": 8.633474576271187e-06, |
| "loss": 0.0902, |
| "step": 1290 |
| }, |
| { |
| "epoch": 1.3664768965973786, |
| "grad_norm": 0.23648612201213837, |
| "learning_rate": 8.632415254237288e-06, |
| "loss": 0.0866, |
| "step": 1291 |
| }, |
| { |
| "epoch": 1.367536078379452, |
| "grad_norm": 0.3055684268474579, |
| "learning_rate": 8.63135593220339e-06, |
| "loss": 0.0899, |
| "step": 1292 |
| }, |
| { |
| "epoch": 1.3685952601615252, |
| "grad_norm": 0.2635006904602051, |
| "learning_rate": 8.630296610169493e-06, |
| "loss": 0.0925, |
| "step": 1293 |
| }, |
| { |
| "epoch": 1.3696544419435985, |
| "grad_norm": 0.2705018222332001, |
| "learning_rate": 8.629237288135594e-06, |
| "loss": 0.0928, |
| "step": 1294 |
| }, |
| { |
| "epoch": 1.3707136237256718, |
| "grad_norm": 0.33698102831840515, |
| "learning_rate": 8.628177966101695e-06, |
| "loss": 0.0912, |
| "step": 1295 |
| }, |
| { |
| "epoch": 1.3717728055077454, |
| "grad_norm": 0.35814690589904785, |
| "learning_rate": 8.627118644067798e-06, |
| "loss": 0.0953, |
| "step": 1296 |
| }, |
| { |
| "epoch": 1.3728319872898185, |
| "grad_norm": 0.29399535059928894, |
| "learning_rate": 8.6260593220339e-06, |
| "loss": 0.0906, |
| "step": 1297 |
| }, |
| { |
| "epoch": 1.373891169071892, |
| "grad_norm": 0.5228216052055359, |
| "learning_rate": 8.625000000000001e-06, |
| "loss": 0.0906, |
| "step": 1298 |
| }, |
| { |
| "epoch": 1.3749503508539653, |
| "grad_norm": 0.8515452742576599, |
| "learning_rate": 8.623940677966103e-06, |
| "loss": 0.0907, |
| "step": 1299 |
| }, |
| { |
| "epoch": 1.3760095326360386, |
| "grad_norm": 0.28903132677078247, |
| "learning_rate": 8.622881355932204e-06, |
| "loss": 0.0847, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.377068714418112, |
| "grad_norm": 0.26791977882385254, |
| "learning_rate": 8.621822033898306e-06, |
| "loss": 0.0888, |
| "step": 1301 |
| }, |
| { |
| "epoch": 1.3781278962001853, |
| "grad_norm": 0.8018907308578491, |
| "learning_rate": 8.620762711864407e-06, |
| "loss": 0.0899, |
| "step": 1302 |
| }, |
| { |
| "epoch": 1.3791870779822588, |
| "grad_norm": 0.48832303285598755, |
| "learning_rate": 8.61970338983051e-06, |
| "loss": 0.0886, |
| "step": 1303 |
| }, |
| { |
| "epoch": 1.3802462597643321, |
| "grad_norm": 0.3044288754463196, |
| "learning_rate": 8.618644067796611e-06, |
| "loss": 0.0897, |
| "step": 1304 |
| }, |
| { |
| "epoch": 1.3813054415464054, |
| "grad_norm": 0.306273490190506, |
| "learning_rate": 8.617584745762713e-06, |
| "loss": 0.093, |
| "step": 1305 |
| }, |
| { |
| "epoch": 1.3823646233284788, |
| "grad_norm": 0.38597342371940613, |
| "learning_rate": 8.616525423728814e-06, |
| "loss": 0.092, |
| "step": 1306 |
| }, |
| { |
| "epoch": 1.383423805110552, |
| "grad_norm": 0.5759603381156921, |
| "learning_rate": 8.615466101694916e-06, |
| "loss": 0.0908, |
| "step": 1307 |
| }, |
| { |
| "epoch": 1.3844829868926254, |
| "grad_norm": 0.2969980537891388, |
| "learning_rate": 8.614406779661017e-06, |
| "loss": 0.0904, |
| "step": 1308 |
| }, |
| { |
| "epoch": 1.3855421686746987, |
| "grad_norm": 0.6405715942382812, |
| "learning_rate": 8.613347457627118e-06, |
| "loss": 0.0931, |
| "step": 1309 |
| }, |
| { |
| "epoch": 1.3866013504567722, |
| "grad_norm": 0.47998374700546265, |
| "learning_rate": 8.612288135593222e-06, |
| "loss": 0.0907, |
| "step": 1310 |
| }, |
| { |
| "epoch": 1.3876605322388456, |
| "grad_norm": 0.8584996461868286, |
| "learning_rate": 8.611228813559323e-06, |
| "loss": 0.0918, |
| "step": 1311 |
| }, |
| { |
| "epoch": 1.3887197140209189, |
| "grad_norm": 0.3902839422225952, |
| "learning_rate": 8.610169491525424e-06, |
| "loss": 0.09, |
| "step": 1312 |
| }, |
| { |
| "epoch": 1.3897788958029922, |
| "grad_norm": 0.9900773763656616, |
| "learning_rate": 8.609110169491526e-06, |
| "loss": 0.0952, |
| "step": 1313 |
| }, |
| { |
| "epoch": 1.3908380775850655, |
| "grad_norm": 0.388141393661499, |
| "learning_rate": 8.608050847457627e-06, |
| "loss": 0.09, |
| "step": 1314 |
| }, |
| { |
| "epoch": 1.3918972593671388, |
| "grad_norm": 0.33699607849121094, |
| "learning_rate": 8.606991525423729e-06, |
| "loss": 0.0937, |
| "step": 1315 |
| }, |
| { |
| "epoch": 1.3929564411492121, |
| "grad_norm": 0.5017436146736145, |
| "learning_rate": 8.60593220338983e-06, |
| "loss": 0.0897, |
| "step": 1316 |
| }, |
| { |
| "epoch": 1.3940156229312857, |
| "grad_norm": 0.385785847902298, |
| "learning_rate": 8.604872881355933e-06, |
| "loss": 0.093, |
| "step": 1317 |
| }, |
| { |
| "epoch": 1.395074804713359, |
| "grad_norm": 0.8142003417015076, |
| "learning_rate": 8.603813559322035e-06, |
| "loss": 0.0951, |
| "step": 1318 |
| }, |
| { |
| "epoch": 1.3961339864954323, |
| "grad_norm": 0.9631067514419556, |
| "learning_rate": 8.602754237288136e-06, |
| "loss": 0.0892, |
| "step": 1319 |
| }, |
| { |
| "epoch": 1.3971931682775056, |
| "grad_norm": 0.3529389798641205, |
| "learning_rate": 8.601694915254239e-06, |
| "loss": 0.0927, |
| "step": 1320 |
| }, |
| { |
| "epoch": 1.398252350059579, |
| "grad_norm": 0.444048136472702, |
| "learning_rate": 8.60063559322034e-06, |
| "loss": 0.0914, |
| "step": 1321 |
| }, |
| { |
| "epoch": 1.3993115318416522, |
| "grad_norm": 0.29218658804893494, |
| "learning_rate": 8.599576271186442e-06, |
| "loss": 0.0912, |
| "step": 1322 |
| }, |
| { |
| "epoch": 1.4003707136237256, |
| "grad_norm": 0.34913161396980286, |
| "learning_rate": 8.598516949152543e-06, |
| "loss": 0.0922, |
| "step": 1323 |
| }, |
| { |
| "epoch": 1.401429895405799, |
| "grad_norm": 0.286085844039917, |
| "learning_rate": 8.597457627118645e-06, |
| "loss": 0.0909, |
| "step": 1324 |
| }, |
| { |
| "epoch": 1.4024890771878724, |
| "grad_norm": 0.2745821475982666, |
| "learning_rate": 8.596398305084746e-06, |
| "loss": 0.0892, |
| "step": 1325 |
| }, |
| { |
| "epoch": 1.4035482589699457, |
| "grad_norm": 0.3126218020915985, |
| "learning_rate": 8.595338983050848e-06, |
| "loss": 0.0929, |
| "step": 1326 |
| }, |
| { |
| "epoch": 1.404607440752019, |
| "grad_norm": 0.6711746454238892, |
| "learning_rate": 8.59427966101695e-06, |
| "loss": 0.0865, |
| "step": 1327 |
| }, |
| { |
| "epoch": 1.4056666225340924, |
| "grad_norm": 0.7869289517402649, |
| "learning_rate": 8.593220338983052e-06, |
| "loss": 0.0921, |
| "step": 1328 |
| }, |
| { |
| "epoch": 1.406725804316166, |
| "grad_norm": 0.4443584084510803, |
| "learning_rate": 8.592161016949153e-06, |
| "loss": 0.0916, |
| "step": 1329 |
| }, |
| { |
| "epoch": 1.407784986098239, |
| "grad_norm": 0.33459603786468506, |
| "learning_rate": 8.591101694915255e-06, |
| "loss": 0.089, |
| "step": 1330 |
| }, |
| { |
| "epoch": 1.4088441678803125, |
| "grad_norm": 0.6564658284187317, |
| "learning_rate": 8.590042372881356e-06, |
| "loss": 0.0947, |
| "step": 1331 |
| }, |
| { |
| "epoch": 1.4099033496623858, |
| "grad_norm": 0.37691906094551086, |
| "learning_rate": 8.588983050847458e-06, |
| "loss": 0.0903, |
| "step": 1332 |
| }, |
| { |
| "epoch": 1.4109625314444592, |
| "grad_norm": 0.4946768581867218, |
| "learning_rate": 8.587923728813559e-06, |
| "loss": 0.0909, |
| "step": 1333 |
| }, |
| { |
| "epoch": 1.4120217132265325, |
| "grad_norm": 0.29537561535835266, |
| "learning_rate": 8.58686440677966e-06, |
| "loss": 0.0914, |
| "step": 1334 |
| }, |
| { |
| "epoch": 1.4130808950086058, |
| "grad_norm": 0.35921117663383484, |
| "learning_rate": 8.585805084745764e-06, |
| "loss": 0.0961, |
| "step": 1335 |
| }, |
| { |
| "epoch": 1.4141400767906793, |
| "grad_norm": 0.37264665961265564, |
| "learning_rate": 8.584745762711865e-06, |
| "loss": 0.0888, |
| "step": 1336 |
| }, |
| { |
| "epoch": 1.4151992585727524, |
| "grad_norm": 1.4332431554794312, |
| "learning_rate": 8.583686440677966e-06, |
| "loss": 0.0932, |
| "step": 1337 |
| }, |
| { |
| "epoch": 1.416258440354826, |
| "grad_norm": 0.8218671083450317, |
| "learning_rate": 8.582627118644068e-06, |
| "loss": 0.0912, |
| "step": 1338 |
| }, |
| { |
| "epoch": 1.4173176221368993, |
| "grad_norm": 0.5757306814193726, |
| "learning_rate": 8.581567796610171e-06, |
| "loss": 0.0935, |
| "step": 1339 |
| }, |
| { |
| "epoch": 1.4183768039189726, |
| "grad_norm": 0.3226252794265747, |
| "learning_rate": 8.580508474576272e-06, |
| "loss": 0.0905, |
| "step": 1340 |
| }, |
| { |
| "epoch": 1.419435985701046, |
| "grad_norm": 0.24196362495422363, |
| "learning_rate": 8.579449152542374e-06, |
| "loss": 0.0871, |
| "step": 1341 |
| }, |
| { |
| "epoch": 1.4204951674831192, |
| "grad_norm": 0.4133007526397705, |
| "learning_rate": 8.578389830508475e-06, |
| "loss": 0.0892, |
| "step": 1342 |
| }, |
| { |
| "epoch": 1.4215543492651928, |
| "grad_norm": 0.6345924139022827, |
| "learning_rate": 8.577330508474577e-06, |
| "loss": 0.0921, |
| "step": 1343 |
| }, |
| { |
| "epoch": 1.422613531047266, |
| "grad_norm": 0.9568214416503906, |
| "learning_rate": 8.57627118644068e-06, |
| "loss": 0.0897, |
| "step": 1344 |
| }, |
| { |
| "epoch": 1.4236727128293394, |
| "grad_norm": 0.3031831383705139, |
| "learning_rate": 8.575211864406781e-06, |
| "loss": 0.0911, |
| "step": 1345 |
| }, |
| { |
| "epoch": 1.4247318946114127, |
| "grad_norm": 0.31946277618408203, |
| "learning_rate": 8.574152542372883e-06, |
| "loss": 0.0924, |
| "step": 1346 |
| }, |
| { |
| "epoch": 1.425791076393486, |
| "grad_norm": 0.2845727503299713, |
| "learning_rate": 8.573093220338984e-06, |
| "loss": 0.0942, |
| "step": 1347 |
| }, |
| { |
| "epoch": 1.4268502581755593, |
| "grad_norm": 1.0196945667266846, |
| "learning_rate": 8.572033898305085e-06, |
| "loss": 0.088, |
| "step": 1348 |
| }, |
| { |
| "epoch": 1.4279094399576326, |
| "grad_norm": 0.3057892620563507, |
| "learning_rate": 8.570974576271187e-06, |
| "loss": 0.0908, |
| "step": 1349 |
| }, |
| { |
| "epoch": 1.4289686217397062, |
| "grad_norm": 0.26774507761001587, |
| "learning_rate": 8.569915254237288e-06, |
| "loss": 0.0907, |
| "step": 1350 |
| }, |
| { |
| "epoch": 1.4300278035217795, |
| "grad_norm": 0.3148769736289978, |
| "learning_rate": 8.56885593220339e-06, |
| "loss": 0.0921, |
| "step": 1351 |
| }, |
| { |
| "epoch": 1.4310869853038528, |
| "grad_norm": 0.24930188059806824, |
| "learning_rate": 8.567796610169493e-06, |
| "loss": 0.0898, |
| "step": 1352 |
| }, |
| { |
| "epoch": 1.4321461670859261, |
| "grad_norm": 0.2720463275909424, |
| "learning_rate": 8.566737288135594e-06, |
| "loss": 0.0923, |
| "step": 1353 |
| }, |
| { |
| "epoch": 1.4332053488679994, |
| "grad_norm": 0.2629014253616333, |
| "learning_rate": 8.565677966101696e-06, |
| "loss": 0.0919, |
| "step": 1354 |
| }, |
| { |
| "epoch": 1.4342645306500728, |
| "grad_norm": 0.2753286361694336, |
| "learning_rate": 8.564618644067797e-06, |
| "loss": 0.0883, |
| "step": 1355 |
| }, |
| { |
| "epoch": 1.435323712432146, |
| "grad_norm": 0.8989904522895813, |
| "learning_rate": 8.563559322033898e-06, |
| "loss": 0.0933, |
| "step": 1356 |
| }, |
| { |
| "epoch": 1.4363828942142196, |
| "grad_norm": 0.30378690361976624, |
| "learning_rate": 8.5625e-06, |
| "loss": 0.0921, |
| "step": 1357 |
| }, |
| { |
| "epoch": 1.437442075996293, |
| "grad_norm": 0.3080299198627472, |
| "learning_rate": 8.561440677966101e-06, |
| "loss": 0.0905, |
| "step": 1358 |
| }, |
| { |
| "epoch": 1.4385012577783662, |
| "grad_norm": 0.3442351520061493, |
| "learning_rate": 8.560381355932204e-06, |
| "loss": 0.0916, |
| "step": 1359 |
| }, |
| { |
| "epoch": 1.4395604395604396, |
| "grad_norm": 0.35361289978027344, |
| "learning_rate": 8.559322033898306e-06, |
| "loss": 0.0879, |
| "step": 1360 |
| }, |
| { |
| "epoch": 1.4406196213425129, |
| "grad_norm": 0.48620983958244324, |
| "learning_rate": 8.558262711864407e-06, |
| "loss": 0.0905, |
| "step": 1361 |
| }, |
| { |
| "epoch": 1.4416788031245862, |
| "grad_norm": 0.40295881032943726, |
| "learning_rate": 8.55720338983051e-06, |
| "loss": 0.0897, |
| "step": 1362 |
| }, |
| { |
| "epoch": 1.4427379849066595, |
| "grad_norm": 0.24092480540275574, |
| "learning_rate": 8.556144067796612e-06, |
| "loss": 0.0898, |
| "step": 1363 |
| }, |
| { |
| "epoch": 1.443797166688733, |
| "grad_norm": 0.6216761469841003, |
| "learning_rate": 8.555084745762713e-06, |
| "loss": 0.0888, |
| "step": 1364 |
| }, |
| { |
| "epoch": 1.4448563484708064, |
| "grad_norm": 0.7443450689315796, |
| "learning_rate": 8.554025423728814e-06, |
| "loss": 0.0918, |
| "step": 1365 |
| }, |
| { |
| "epoch": 1.4459155302528797, |
| "grad_norm": 0.401972234249115, |
| "learning_rate": 8.552966101694916e-06, |
| "loss": 0.0915, |
| "step": 1366 |
| }, |
| { |
| "epoch": 1.446974712034953, |
| "grad_norm": 0.2976281940937042, |
| "learning_rate": 8.551906779661017e-06, |
| "loss": 0.091, |
| "step": 1367 |
| }, |
| { |
| "epoch": 1.4480338938170263, |
| "grad_norm": 0.4022362232208252, |
| "learning_rate": 8.550847457627119e-06, |
| "loss": 0.0934, |
| "step": 1368 |
| }, |
| { |
| "epoch": 1.4490930755990998, |
| "grad_norm": 0.4490336775779724, |
| "learning_rate": 8.549788135593222e-06, |
| "loss": 0.0921, |
| "step": 1369 |
| }, |
| { |
| "epoch": 1.450152257381173, |
| "grad_norm": 0.8399879932403564, |
| "learning_rate": 8.548728813559323e-06, |
| "loss": 0.0929, |
| "step": 1370 |
| }, |
| { |
| "epoch": 1.4512114391632465, |
| "grad_norm": 0.3975447416305542, |
| "learning_rate": 8.547669491525425e-06, |
| "loss": 0.0874, |
| "step": 1371 |
| }, |
| { |
| "epoch": 1.4522706209453198, |
| "grad_norm": 0.297023743391037, |
| "learning_rate": 8.546610169491526e-06, |
| "loss": 0.0889, |
| "step": 1372 |
| }, |
| { |
| "epoch": 1.453329802727393, |
| "grad_norm": 0.3174283504486084, |
| "learning_rate": 8.545550847457627e-06, |
| "loss": 0.0911, |
| "step": 1373 |
| }, |
| { |
| "epoch": 1.4543889845094664, |
| "grad_norm": 0.3391566276550293, |
| "learning_rate": 8.544491525423729e-06, |
| "loss": 0.0913, |
| "step": 1374 |
| }, |
| { |
| "epoch": 1.4554481662915397, |
| "grad_norm": 0.5378783941268921, |
| "learning_rate": 8.54343220338983e-06, |
| "loss": 0.0919, |
| "step": 1375 |
| }, |
| { |
| "epoch": 1.4565073480736133, |
| "grad_norm": 0.5963971018791199, |
| "learning_rate": 8.542372881355933e-06, |
| "loss": 0.0935, |
| "step": 1376 |
| }, |
| { |
| "epoch": 1.4575665298556864, |
| "grad_norm": 0.4562707841396332, |
| "learning_rate": 8.541313559322035e-06, |
| "loss": 0.0879, |
| "step": 1377 |
| }, |
| { |
| "epoch": 1.45862571163776, |
| "grad_norm": 0.28086161613464355, |
| "learning_rate": 8.540254237288136e-06, |
| "loss": 0.0881, |
| "step": 1378 |
| }, |
| { |
| "epoch": 1.4596848934198332, |
| "grad_norm": 0.21108803153038025, |
| "learning_rate": 8.539194915254238e-06, |
| "loss": 0.0903, |
| "step": 1379 |
| }, |
| { |
| "epoch": 1.4607440752019065, |
| "grad_norm": 0.26527541875839233, |
| "learning_rate": 8.538135593220339e-06, |
| "loss": 0.0897, |
| "step": 1380 |
| }, |
| { |
| "epoch": 1.4618032569839798, |
| "grad_norm": 0.26970967650413513, |
| "learning_rate": 8.537076271186442e-06, |
| "loss": 0.0899, |
| "step": 1381 |
| }, |
| { |
| "epoch": 1.4628624387660532, |
| "grad_norm": 0.30094999074935913, |
| "learning_rate": 8.536016949152543e-06, |
| "loss": 0.0878, |
| "step": 1382 |
| }, |
| { |
| "epoch": 1.4639216205481267, |
| "grad_norm": 1.1830754280090332, |
| "learning_rate": 8.534957627118645e-06, |
| "loss": 0.093, |
| "step": 1383 |
| }, |
| { |
| "epoch": 1.4649808023301998, |
| "grad_norm": 0.3395049273967743, |
| "learning_rate": 8.533898305084746e-06, |
| "loss": 0.0888, |
| "step": 1384 |
| }, |
| { |
| "epoch": 1.4660399841122733, |
| "grad_norm": 0.2931113541126251, |
| "learning_rate": 8.532838983050848e-06, |
| "loss": 0.0905, |
| "step": 1385 |
| }, |
| { |
| "epoch": 1.4670991658943466, |
| "grad_norm": 0.3630742132663727, |
| "learning_rate": 8.53177966101695e-06, |
| "loss": 0.0882, |
| "step": 1386 |
| }, |
| { |
| "epoch": 1.46815834767642, |
| "grad_norm": 0.46798133850097656, |
| "learning_rate": 8.530720338983052e-06, |
| "loss": 0.0915, |
| "step": 1387 |
| }, |
| { |
| "epoch": 1.4692175294584933, |
| "grad_norm": 0.27987757325172424, |
| "learning_rate": 8.529661016949154e-06, |
| "loss": 0.0915, |
| "step": 1388 |
| }, |
| { |
| "epoch": 1.4702767112405666, |
| "grad_norm": 0.33298471570014954, |
| "learning_rate": 8.528601694915255e-06, |
| "loss": 0.0892, |
| "step": 1389 |
| }, |
| { |
| "epoch": 1.4713358930226401, |
| "grad_norm": 0.2661607265472412, |
| "learning_rate": 8.527542372881356e-06, |
| "loss": 0.0893, |
| "step": 1390 |
| }, |
| { |
| "epoch": 1.4723950748047134, |
| "grad_norm": 0.38440874218940735, |
| "learning_rate": 8.526483050847458e-06, |
| "loss": 0.0875, |
| "step": 1391 |
| }, |
| { |
| "epoch": 1.4734542565867867, |
| "grad_norm": 0.4289654791355133, |
| "learning_rate": 8.52542372881356e-06, |
| "loss": 0.0903, |
| "step": 1392 |
| }, |
| { |
| "epoch": 1.47451343836886, |
| "grad_norm": 0.6656467318534851, |
| "learning_rate": 8.524364406779662e-06, |
| "loss": 0.0893, |
| "step": 1393 |
| }, |
| { |
| "epoch": 1.4755726201509334, |
| "grad_norm": 0.4115320146083832, |
| "learning_rate": 8.523305084745764e-06, |
| "loss": 0.0904, |
| "step": 1394 |
| }, |
| { |
| "epoch": 1.4766318019330067, |
| "grad_norm": 1.2950462102890015, |
| "learning_rate": 8.522245762711865e-06, |
| "loss": 0.0899, |
| "step": 1395 |
| }, |
| { |
| "epoch": 1.47769098371508, |
| "grad_norm": 0.5561743974685669, |
| "learning_rate": 8.521186440677967e-06, |
| "loss": 0.0923, |
| "step": 1396 |
| }, |
| { |
| "epoch": 1.4787501654971535, |
| "grad_norm": 0.29928943514823914, |
| "learning_rate": 8.520127118644068e-06, |
| "loss": 0.088, |
| "step": 1397 |
| }, |
| { |
| "epoch": 1.4798093472792269, |
| "grad_norm": 0.5597212910652161, |
| "learning_rate": 8.51906779661017e-06, |
| "loss": 0.0902, |
| "step": 1398 |
| }, |
| { |
| "epoch": 1.4808685290613002, |
| "grad_norm": 0.341529905796051, |
| "learning_rate": 8.518008474576271e-06, |
| "loss": 0.0869, |
| "step": 1399 |
| }, |
| { |
| "epoch": 1.4819277108433735, |
| "grad_norm": 0.2716520130634308, |
| "learning_rate": 8.516949152542372e-06, |
| "loss": 0.0917, |
| "step": 1400 |
| }, |
| { |
| "epoch": 1.4829868926254468, |
| "grad_norm": 0.2677081823348999, |
| "learning_rate": 8.515889830508475e-06, |
| "loss": 0.0881, |
| "step": 1401 |
| }, |
| { |
| "epoch": 1.4840460744075201, |
| "grad_norm": 0.27061936259269714, |
| "learning_rate": 8.514830508474577e-06, |
| "loss": 0.0901, |
| "step": 1402 |
| }, |
| { |
| "epoch": 1.4851052561895934, |
| "grad_norm": 0.6803617477416992, |
| "learning_rate": 8.51377118644068e-06, |
| "loss": 0.0885, |
| "step": 1403 |
| }, |
| { |
| "epoch": 1.486164437971667, |
| "grad_norm": 0.7752925157546997, |
| "learning_rate": 8.512711864406781e-06, |
| "loss": 0.09, |
| "step": 1404 |
| }, |
| { |
| "epoch": 1.4872236197537403, |
| "grad_norm": 1.0663254261016846, |
| "learning_rate": 8.511652542372883e-06, |
| "loss": 0.0927, |
| "step": 1405 |
| }, |
| { |
| "epoch": 1.4882828015358136, |
| "grad_norm": 1.3076659440994263, |
| "learning_rate": 8.510593220338984e-06, |
| "loss": 0.09, |
| "step": 1406 |
| }, |
| { |
| "epoch": 1.489341983317887, |
| "grad_norm": 0.2659742832183838, |
| "learning_rate": 8.509533898305086e-06, |
| "loss": 0.0907, |
| "step": 1407 |
| }, |
| { |
| "epoch": 1.4904011650999602, |
| "grad_norm": 0.34570935368537903, |
| "learning_rate": 8.508474576271187e-06, |
| "loss": 0.0896, |
| "step": 1408 |
| }, |
| { |
| "epoch": 1.4914603468820335, |
| "grad_norm": 0.34614187479019165, |
| "learning_rate": 8.507415254237288e-06, |
| "loss": 0.0911, |
| "step": 1409 |
| }, |
| { |
| "epoch": 1.4925195286641069, |
| "grad_norm": 0.24298426508903503, |
| "learning_rate": 8.50635593220339e-06, |
| "loss": 0.0917, |
| "step": 1410 |
| }, |
| { |
| "epoch": 1.4935787104461804, |
| "grad_norm": 0.49938711524009705, |
| "learning_rate": 8.505296610169493e-06, |
| "loss": 0.0958, |
| "step": 1411 |
| }, |
| { |
| "epoch": 1.4946378922282537, |
| "grad_norm": 0.9254084825515747, |
| "learning_rate": 8.504237288135594e-06, |
| "loss": 0.0909, |
| "step": 1412 |
| }, |
| { |
| "epoch": 1.495697074010327, |
| "grad_norm": 0.430133193731308, |
| "learning_rate": 8.503177966101696e-06, |
| "loss": 0.0917, |
| "step": 1413 |
| }, |
| { |
| "epoch": 1.4967562557924003, |
| "grad_norm": 1.0322993993759155, |
| "learning_rate": 8.502118644067797e-06, |
| "loss": 0.0922, |
| "step": 1414 |
| }, |
| { |
| "epoch": 1.4978154375744737, |
| "grad_norm": 0.6187616586685181, |
| "learning_rate": 8.501059322033899e-06, |
| "loss": 0.0951, |
| "step": 1415 |
| }, |
| { |
| "epoch": 1.4988746193565472, |
| "grad_norm": 0.33860644698143005, |
| "learning_rate": 8.5e-06, |
| "loss": 0.0933, |
| "step": 1416 |
| }, |
| { |
| "epoch": 1.4988746193565472, |
| "eval_accuracy": 0.968, |
| "eval_best_f1_from_thresholding": 0.1443850267379679, |
| "eval_loss": 0.13970085978507996, |
| "eval_matthews_corrcoef": 0.1537363369231449, |
| "eval_model_preparation_time": 0.0033, |
| "eval_negative_class_f1": 0.9836950983389381, |
| "eval_negative_class_precision": 0.9932091778989608, |
| "eval_negative_class_recall": 0.9743615625315434, |
| "eval_positive_class_f1": 0.1443850267379679, |
| "eval_positive_class_precision": 0.09608540925266904, |
| "eval_positive_class_recall": 0.2903225806451613, |
| "eval_roc_auc": 0.8216168430923719, |
| "eval_runtime": 20.7317, |
| "eval_samples_per_second": 482.353, |
| "eval_steps_per_second": 7.573, |
| "step": 1416 |
| }, |
| { |
| "epoch": 1.4999338011386203, |
| "grad_norm": 0.47634679079055786, |
| "learning_rate": 8.498940677966101e-06, |
| "loss": 0.0952, |
| "step": 1417 |
| }, |
| { |
| "epoch": 1.5009929829206938, |
| "grad_norm": 0.40228286385536194, |
| "learning_rate": 8.497881355932204e-06, |
| "loss": 0.0916, |
| "step": 1418 |
| }, |
| { |
| "epoch": 1.5020521647027671, |
| "grad_norm": 0.2725696563720703, |
| "learning_rate": 8.496822033898306e-06, |
| "loss": 0.0962, |
| "step": 1419 |
| }, |
| { |
| "epoch": 1.5031113464848405, |
| "grad_norm": 0.504587709903717, |
| "learning_rate": 8.495762711864407e-06, |
| "loss": 0.0919, |
| "step": 1420 |
| }, |
| { |
| "epoch": 1.5041705282669138, |
| "grad_norm": 0.424791544675827, |
| "learning_rate": 8.494703389830509e-06, |
| "loss": 0.0892, |
| "step": 1421 |
| }, |
| { |
| "epoch": 1.505229710048987, |
| "grad_norm": 0.5593955516815186, |
| "learning_rate": 8.49364406779661e-06, |
| "loss": 0.0897, |
| "step": 1422 |
| }, |
| { |
| "epoch": 1.5062888918310606, |
| "grad_norm": 0.7979548573493958, |
| "learning_rate": 8.492584745762713e-06, |
| "loss": 0.0889, |
| "step": 1423 |
| }, |
| { |
| "epoch": 1.5073480736131337, |
| "grad_norm": 0.6873000264167786, |
| "learning_rate": 8.491525423728815e-06, |
| "loss": 0.0909, |
| "step": 1424 |
| }, |
| { |
| "epoch": 1.5084072553952073, |
| "grad_norm": 0.43569207191467285, |
| "learning_rate": 8.490466101694916e-06, |
| "loss": 0.0899, |
| "step": 1425 |
| }, |
| { |
| "epoch": 1.5094664371772806, |
| "grad_norm": 0.3459659516811371, |
| "learning_rate": 8.489406779661017e-06, |
| "loss": 0.0934, |
| "step": 1426 |
| }, |
| { |
| "epoch": 1.5105256189593539, |
| "grad_norm": 0.7253245115280151, |
| "learning_rate": 8.488347457627119e-06, |
| "loss": 0.091, |
| "step": 1427 |
| }, |
| { |
| "epoch": 1.5115848007414272, |
| "grad_norm": 0.3521025776863098, |
| "learning_rate": 8.487288135593222e-06, |
| "loss": 0.0901, |
| "step": 1428 |
| }, |
| { |
| "epoch": 1.5126439825235005, |
| "grad_norm": 0.31998488306999207, |
| "learning_rate": 8.486228813559323e-06, |
| "loss": 0.0911, |
| "step": 1429 |
| }, |
| { |
| "epoch": 1.513703164305574, |
| "grad_norm": 0.29415151476860046, |
| "learning_rate": 8.485169491525425e-06, |
| "loss": 0.0899, |
| "step": 1430 |
| }, |
| { |
| "epoch": 1.5147623460876471, |
| "grad_norm": 0.7465116381645203, |
| "learning_rate": 8.484110169491526e-06, |
| "loss": 0.0893, |
| "step": 1431 |
| }, |
| { |
| "epoch": 1.5158215278697207, |
| "grad_norm": 0.37793874740600586, |
| "learning_rate": 8.483050847457628e-06, |
| "loss": 0.0872, |
| "step": 1432 |
| }, |
| { |
| "epoch": 1.516880709651794, |
| "grad_norm": 0.6085448265075684, |
| "learning_rate": 8.481991525423729e-06, |
| "loss": 0.0898, |
| "step": 1433 |
| }, |
| { |
| "epoch": 1.5179398914338673, |
| "grad_norm": 0.9050545692443848, |
| "learning_rate": 8.48093220338983e-06, |
| "loss": 0.0907, |
| "step": 1434 |
| }, |
| { |
| "epoch": 1.5189990732159406, |
| "grad_norm": 0.495643675327301, |
| "learning_rate": 8.479872881355934e-06, |
| "loss": 0.0891, |
| "step": 1435 |
| }, |
| { |
| "epoch": 1.520058254998014, |
| "grad_norm": 0.361016184091568, |
| "learning_rate": 8.478813559322035e-06, |
| "loss": 0.0888, |
| "step": 1436 |
| }, |
| { |
| "epoch": 1.5211174367800875, |
| "grad_norm": 0.3698117733001709, |
| "learning_rate": 8.477754237288136e-06, |
| "loss": 0.0919, |
| "step": 1437 |
| }, |
| { |
| "epoch": 1.5221766185621606, |
| "grad_norm": 0.29097628593444824, |
| "learning_rate": 8.476694915254238e-06, |
| "loss": 0.0924, |
| "step": 1438 |
| }, |
| { |
| "epoch": 1.5232358003442341, |
| "grad_norm": 0.42036259174346924, |
| "learning_rate": 8.47563559322034e-06, |
| "loss": 0.0934, |
| "step": 1439 |
| }, |
| { |
| "epoch": 1.5242949821263074, |
| "grad_norm": 0.24552021920681, |
| "learning_rate": 8.47457627118644e-06, |
| "loss": 0.0879, |
| "step": 1440 |
| }, |
| { |
| "epoch": 1.5253541639083807, |
| "grad_norm": 0.6841972470283508, |
| "learning_rate": 8.473516949152542e-06, |
| "loss": 0.0906, |
| "step": 1441 |
| }, |
| { |
| "epoch": 1.5264133456904543, |
| "grad_norm": 0.9200479388237, |
| "learning_rate": 8.472457627118645e-06, |
| "loss": 0.0885, |
| "step": 1442 |
| }, |
| { |
| "epoch": 1.5274725274725274, |
| "grad_norm": 0.5507006049156189, |
| "learning_rate": 8.471398305084746e-06, |
| "loss": 0.0908, |
| "step": 1443 |
| }, |
| { |
| "epoch": 1.528531709254601, |
| "grad_norm": 1.2419291734695435, |
| "learning_rate": 8.470338983050848e-06, |
| "loss": 0.0913, |
| "step": 1444 |
| }, |
| { |
| "epoch": 1.529590891036674, |
| "grad_norm": 0.3634277880191803, |
| "learning_rate": 8.469279661016951e-06, |
| "loss": 0.0896, |
| "step": 1445 |
| }, |
| { |
| "epoch": 1.5306500728187475, |
| "grad_norm": 0.5093683004379272, |
| "learning_rate": 8.468220338983052e-06, |
| "loss": 0.0939, |
| "step": 1446 |
| }, |
| { |
| "epoch": 1.5317092546008209, |
| "grad_norm": 0.3977075219154358, |
| "learning_rate": 8.467161016949154e-06, |
| "loss": 0.0893, |
| "step": 1447 |
| }, |
| { |
| "epoch": 1.5327684363828942, |
| "grad_norm": 0.6540356278419495, |
| "learning_rate": 8.466101694915255e-06, |
| "loss": 0.0903, |
| "step": 1448 |
| }, |
| { |
| "epoch": 1.5338276181649677, |
| "grad_norm": 0.3206644356250763, |
| "learning_rate": 8.465042372881357e-06, |
| "loss": 0.0908, |
| "step": 1449 |
| }, |
| { |
| "epoch": 1.5348867999470408, |
| "grad_norm": 0.5804445147514343, |
| "learning_rate": 8.463983050847458e-06, |
| "loss": 0.091, |
| "step": 1450 |
| }, |
| { |
| "epoch": 1.5359459817291143, |
| "grad_norm": 0.3737938404083252, |
| "learning_rate": 8.46292372881356e-06, |
| "loss": 0.0881, |
| "step": 1451 |
| }, |
| { |
| "epoch": 1.5370051635111877, |
| "grad_norm": 0.9696879982948303, |
| "learning_rate": 8.461864406779663e-06, |
| "loss": 0.0898, |
| "step": 1452 |
| }, |
| { |
| "epoch": 1.538064345293261, |
| "grad_norm": 0.3338160812854767, |
| "learning_rate": 8.460805084745764e-06, |
| "loss": 0.0871, |
| "step": 1453 |
| }, |
| { |
| "epoch": 1.5391235270753343, |
| "grad_norm": 0.5255388021469116, |
| "learning_rate": 8.459745762711865e-06, |
| "loss": 0.0868, |
| "step": 1454 |
| }, |
| { |
| "epoch": 1.5401827088574076, |
| "grad_norm": 0.3634493947029114, |
| "learning_rate": 8.458686440677967e-06, |
| "loss": 0.0884, |
| "step": 1455 |
| }, |
| { |
| "epoch": 1.5412418906394811, |
| "grad_norm": 0.2899482250213623, |
| "learning_rate": 8.457627118644068e-06, |
| "loss": 0.0855, |
| "step": 1456 |
| }, |
| { |
| "epoch": 1.5423010724215542, |
| "grad_norm": 0.4005317986011505, |
| "learning_rate": 8.45656779661017e-06, |
| "loss": 0.0912, |
| "step": 1457 |
| }, |
| { |
| "epoch": 1.5433602542036278, |
| "grad_norm": 0.35378047823905945, |
| "learning_rate": 8.455508474576271e-06, |
| "loss": 0.0897, |
| "step": 1458 |
| }, |
| { |
| "epoch": 1.544419435985701, |
| "grad_norm": 0.25792017579078674, |
| "learning_rate": 8.454449152542374e-06, |
| "loss": 0.0866, |
| "step": 1459 |
| }, |
| { |
| "epoch": 1.5454786177677744, |
| "grad_norm": 0.36935243010520935, |
| "learning_rate": 8.453389830508476e-06, |
| "loss": 0.0867, |
| "step": 1460 |
| }, |
| { |
| "epoch": 1.5465377995498477, |
| "grad_norm": 0.5194677114486694, |
| "learning_rate": 8.452330508474577e-06, |
| "loss": 0.0903, |
| "step": 1461 |
| }, |
| { |
| "epoch": 1.547596981331921, |
| "grad_norm": 0.4349111318588257, |
| "learning_rate": 8.451271186440678e-06, |
| "loss": 0.087, |
| "step": 1462 |
| }, |
| { |
| "epoch": 1.5486561631139946, |
| "grad_norm": 0.24651400744915009, |
| "learning_rate": 8.45021186440678e-06, |
| "loss": 0.0911, |
| "step": 1463 |
| }, |
| { |
| "epoch": 1.5497153448960677, |
| "grad_norm": 0.7304729223251343, |
| "learning_rate": 8.449152542372881e-06, |
| "loss": 0.096, |
| "step": 1464 |
| }, |
| { |
| "epoch": 1.5507745266781412, |
| "grad_norm": 0.7694931626319885, |
| "learning_rate": 8.448093220338984e-06, |
| "loss": 0.094, |
| "step": 1465 |
| }, |
| { |
| "epoch": 1.5518337084602145, |
| "grad_norm": 0.3536149561405182, |
| "learning_rate": 8.447033898305086e-06, |
| "loss": 0.0889, |
| "step": 1466 |
| }, |
| { |
| "epoch": 1.5528928902422878, |
| "grad_norm": 0.28751471638679504, |
| "learning_rate": 8.445974576271187e-06, |
| "loss": 0.0899, |
| "step": 1467 |
| }, |
| { |
| "epoch": 1.5539520720243611, |
| "grad_norm": 0.2782193720340729, |
| "learning_rate": 8.444915254237289e-06, |
| "loss": 0.0909, |
| "step": 1468 |
| }, |
| { |
| "epoch": 1.5550112538064345, |
| "grad_norm": 0.2645580470561981, |
| "learning_rate": 8.443855932203392e-06, |
| "loss": 0.0899, |
| "step": 1469 |
| }, |
| { |
| "epoch": 1.556070435588508, |
| "grad_norm": 0.2474951297044754, |
| "learning_rate": 8.442796610169493e-06, |
| "loss": 0.0874, |
| "step": 1470 |
| }, |
| { |
| "epoch": 1.557129617370581, |
| "grad_norm": 0.3451857566833496, |
| "learning_rate": 8.441737288135594e-06, |
| "loss": 0.092, |
| "step": 1471 |
| }, |
| { |
| "epoch": 1.5581887991526546, |
| "grad_norm": 1.3307249546051025, |
| "learning_rate": 8.440677966101696e-06, |
| "loss": 0.091, |
| "step": 1472 |
| }, |
| { |
| "epoch": 1.559247980934728, |
| "grad_norm": 0.5494109988212585, |
| "learning_rate": 8.439618644067797e-06, |
| "loss": 0.0883, |
| "step": 1473 |
| }, |
| { |
| "epoch": 1.5603071627168013, |
| "grad_norm": 0.6073052883148193, |
| "learning_rate": 8.438559322033899e-06, |
| "loss": 0.0874, |
| "step": 1474 |
| }, |
| { |
| "epoch": 1.5613663444988746, |
| "grad_norm": 0.2551688253879547, |
| "learning_rate": 8.4375e-06, |
| "loss": 0.0878, |
| "step": 1475 |
| }, |
| { |
| "epoch": 1.5624255262809479, |
| "grad_norm": 0.3065250813961029, |
| "learning_rate": 8.436440677966102e-06, |
| "loss": 0.087, |
| "step": 1476 |
| }, |
| { |
| "epoch": 1.5634847080630214, |
| "grad_norm": 0.472655713558197, |
| "learning_rate": 8.435381355932205e-06, |
| "loss": 0.0888, |
| "step": 1477 |
| }, |
| { |
| "epoch": 1.5645438898450945, |
| "grad_norm": 0.30595093965530396, |
| "learning_rate": 8.434322033898306e-06, |
| "loss": 0.0903, |
| "step": 1478 |
| }, |
| { |
| "epoch": 1.565603071627168, |
| "grad_norm": 0.5333871841430664, |
| "learning_rate": 8.433262711864407e-06, |
| "loss": 0.0912, |
| "step": 1479 |
| }, |
| { |
| "epoch": 1.5666622534092414, |
| "grad_norm": 0.34483611583709717, |
| "learning_rate": 8.432203389830509e-06, |
| "loss": 0.0891, |
| "step": 1480 |
| }, |
| { |
| "epoch": 1.5677214351913147, |
| "grad_norm": 0.3512003421783447, |
| "learning_rate": 8.43114406779661e-06, |
| "loss": 0.087, |
| "step": 1481 |
| }, |
| { |
| "epoch": 1.5687806169733882, |
| "grad_norm": 0.5785057544708252, |
| "learning_rate": 8.430084745762712e-06, |
| "loss": 0.0883, |
| "step": 1482 |
| }, |
| { |
| "epoch": 1.5698397987554613, |
| "grad_norm": 0.41664919257164, |
| "learning_rate": 8.429025423728813e-06, |
| "loss": 0.087, |
| "step": 1483 |
| }, |
| { |
| "epoch": 1.5708989805375349, |
| "grad_norm": 0.44375720620155334, |
| "learning_rate": 8.427966101694916e-06, |
| "loss": 0.0886, |
| "step": 1484 |
| }, |
| { |
| "epoch": 1.571958162319608, |
| "grad_norm": 0.24387286603450775, |
| "learning_rate": 8.426906779661018e-06, |
| "loss": 0.0922, |
| "step": 1485 |
| }, |
| { |
| "epoch": 1.5730173441016815, |
| "grad_norm": 0.48554837703704834, |
| "learning_rate": 8.42584745762712e-06, |
| "loss": 0.0905, |
| "step": 1486 |
| }, |
| { |
| "epoch": 1.5740765258837548, |
| "grad_norm": 0.38902896642684937, |
| "learning_rate": 8.424788135593222e-06, |
| "loss": 0.0913, |
| "step": 1487 |
| }, |
| { |
| "epoch": 1.575135707665828, |
| "grad_norm": 0.2936050593852997, |
| "learning_rate": 8.423728813559324e-06, |
| "loss": 0.0934, |
| "step": 1488 |
| }, |
| { |
| "epoch": 1.5761948894479016, |
| "grad_norm": 1.3971738815307617, |
| "learning_rate": 8.422669491525425e-06, |
| "loss": 0.0912, |
| "step": 1489 |
| }, |
| { |
| "epoch": 1.5772540712299747, |
| "grad_norm": 0.5207201838493347, |
| "learning_rate": 8.421610169491526e-06, |
| "loss": 0.0894, |
| "step": 1490 |
| }, |
| { |
| "epoch": 1.5783132530120483, |
| "grad_norm": 0.21842218935489655, |
| "learning_rate": 8.420550847457628e-06, |
| "loss": 0.0908, |
| "step": 1491 |
| }, |
| { |
| "epoch": 1.5793724347941216, |
| "grad_norm": 0.2736961543560028, |
| "learning_rate": 8.41949152542373e-06, |
| "loss": 0.0898, |
| "step": 1492 |
| }, |
| { |
| "epoch": 1.580431616576195, |
| "grad_norm": 1.9446067810058594, |
| "learning_rate": 8.41843220338983e-06, |
| "loss": 0.0896, |
| "step": 1493 |
| }, |
| { |
| "epoch": 1.5814907983582682, |
| "grad_norm": 0.3114362359046936, |
| "learning_rate": 8.417372881355934e-06, |
| "loss": 0.0931, |
| "step": 1494 |
| }, |
| { |
| "epoch": 1.5825499801403415, |
| "grad_norm": 0.23505762219429016, |
| "learning_rate": 8.416313559322035e-06, |
| "loss": 0.0896, |
| "step": 1495 |
| }, |
| { |
| "epoch": 1.583609161922415, |
| "grad_norm": 0.2370254397392273, |
| "learning_rate": 8.415254237288137e-06, |
| "loss": 0.0908, |
| "step": 1496 |
| }, |
| { |
| "epoch": 1.5846683437044882, |
| "grad_norm": 0.3030937910079956, |
| "learning_rate": 8.414194915254238e-06, |
| "loss": 0.0879, |
| "step": 1497 |
| }, |
| { |
| "epoch": 1.5857275254865617, |
| "grad_norm": 0.29918500781059265, |
| "learning_rate": 8.41313559322034e-06, |
| "loss": 0.0881, |
| "step": 1498 |
| }, |
| { |
| "epoch": 1.586786707268635, |
| "grad_norm": 0.627144455909729, |
| "learning_rate": 8.41207627118644e-06, |
| "loss": 0.0924, |
| "step": 1499 |
| }, |
| { |
| "epoch": 1.5878458890507083, |
| "grad_norm": 0.2884758412837982, |
| "learning_rate": 8.411016949152542e-06, |
| "loss": 0.0876, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.5889050708327817, |
| "grad_norm": 0.586335301399231, |
| "learning_rate": 8.409957627118645e-06, |
| "loss": 0.089, |
| "step": 1501 |
| }, |
| { |
| "epoch": 1.589964252614855, |
| "grad_norm": 0.25162607431411743, |
| "learning_rate": 8.408898305084747e-06, |
| "loss": 0.09, |
| "step": 1502 |
| }, |
| { |
| "epoch": 1.5910234343969285, |
| "grad_norm": 0.28372955322265625, |
| "learning_rate": 8.407838983050848e-06, |
| "loss": 0.0857, |
| "step": 1503 |
| }, |
| { |
| "epoch": 1.5920826161790016, |
| "grad_norm": 0.6223670244216919, |
| "learning_rate": 8.40677966101695e-06, |
| "loss": 0.0945, |
| "step": 1504 |
| }, |
| { |
| "epoch": 1.5931417979610751, |
| "grad_norm": 1.0832895040512085, |
| "learning_rate": 8.405720338983051e-06, |
| "loss": 0.088, |
| "step": 1505 |
| }, |
| { |
| "epoch": 1.5942009797431484, |
| "grad_norm": 0.2921046316623688, |
| "learning_rate": 8.404661016949152e-06, |
| "loss": 0.0884, |
| "step": 1506 |
| }, |
| { |
| "epoch": 1.5952601615252218, |
| "grad_norm": 0.20038729906082153, |
| "learning_rate": 8.403601694915255e-06, |
| "loss": 0.0848, |
| "step": 1507 |
| }, |
| { |
| "epoch": 1.596319343307295, |
| "grad_norm": 0.2741376757621765, |
| "learning_rate": 8.402542372881357e-06, |
| "loss": 0.0887, |
| "step": 1508 |
| }, |
| { |
| "epoch": 1.5973785250893684, |
| "grad_norm": 0.25497081875801086, |
| "learning_rate": 8.401483050847458e-06, |
| "loss": 0.0892, |
| "step": 1509 |
| }, |
| { |
| "epoch": 1.598437706871442, |
| "grad_norm": 0.22426116466522217, |
| "learning_rate": 8.40042372881356e-06, |
| "loss": 0.0912, |
| "step": 1510 |
| }, |
| { |
| "epoch": 1.599496888653515, |
| "grad_norm": 0.3952200412750244, |
| "learning_rate": 8.399364406779663e-06, |
| "loss": 0.0879, |
| "step": 1511 |
| }, |
| { |
| "epoch": 1.6005560704355886, |
| "grad_norm": 0.497310072183609, |
| "learning_rate": 8.398305084745764e-06, |
| "loss": 0.0896, |
| "step": 1512 |
| }, |
| { |
| "epoch": 1.6016152522176619, |
| "grad_norm": 0.5498274564743042, |
| "learning_rate": 8.397245762711866e-06, |
| "loss": 0.0915, |
| "step": 1513 |
| }, |
| { |
| "epoch": 1.6026744339997352, |
| "grad_norm": 0.28143325448036194, |
| "learning_rate": 8.396186440677967e-06, |
| "loss": 0.089, |
| "step": 1514 |
| }, |
| { |
| "epoch": 1.6037336157818085, |
| "grad_norm": 0.23657409846782684, |
| "learning_rate": 8.395127118644068e-06, |
| "loss": 0.0901, |
| "step": 1515 |
| }, |
| { |
| "epoch": 1.6047927975638818, |
| "grad_norm": 0.3381804823875427, |
| "learning_rate": 8.39406779661017e-06, |
| "loss": 0.0912, |
| "step": 1516 |
| }, |
| { |
| "epoch": 1.6058519793459554, |
| "grad_norm": 0.21086886525154114, |
| "learning_rate": 8.393008474576271e-06, |
| "loss": 0.0905, |
| "step": 1517 |
| }, |
| { |
| "epoch": 1.6069111611280285, |
| "grad_norm": 0.461028516292572, |
| "learning_rate": 8.391949152542374e-06, |
| "loss": 0.0893, |
| "step": 1518 |
| }, |
| { |
| "epoch": 1.607970342910102, |
| "grad_norm": 0.22690759599208832, |
| "learning_rate": 8.390889830508476e-06, |
| "loss": 0.0898, |
| "step": 1519 |
| }, |
| { |
| "epoch": 1.6090295246921753, |
| "grad_norm": 0.6278162598609924, |
| "learning_rate": 8.389830508474577e-06, |
| "loss": 0.09, |
| "step": 1520 |
| }, |
| { |
| "epoch": 1.6100887064742486, |
| "grad_norm": 0.30574408173561096, |
| "learning_rate": 8.388771186440679e-06, |
| "loss": 0.091, |
| "step": 1521 |
| }, |
| { |
| "epoch": 1.611147888256322, |
| "grad_norm": 0.25087496638298035, |
| "learning_rate": 8.38771186440678e-06, |
| "loss": 0.0841, |
| "step": 1522 |
| }, |
| { |
| "epoch": 1.6122070700383953, |
| "grad_norm": 0.9940456748008728, |
| "learning_rate": 8.386652542372881e-06, |
| "loss": 0.0855, |
| "step": 1523 |
| }, |
| { |
| "epoch": 1.6132662518204688, |
| "grad_norm": 0.22347316145896912, |
| "learning_rate": 8.385593220338983e-06, |
| "loss": 0.0888, |
| "step": 1524 |
| }, |
| { |
| "epoch": 1.6143254336025419, |
| "grad_norm": 0.3078644871711731, |
| "learning_rate": 8.384533898305084e-06, |
| "loss": 0.0867, |
| "step": 1525 |
| }, |
| { |
| "epoch": 1.6153846153846154, |
| "grad_norm": 0.2520007789134979, |
| "learning_rate": 8.383474576271187e-06, |
| "loss": 0.0863, |
| "step": 1526 |
| }, |
| { |
| "epoch": 1.6164437971666887, |
| "grad_norm": 0.3745609223842621, |
| "learning_rate": 8.382415254237289e-06, |
| "loss": 0.0879, |
| "step": 1527 |
| }, |
| { |
| "epoch": 1.617502978948762, |
| "grad_norm": 0.9579833149909973, |
| "learning_rate": 8.381355932203392e-06, |
| "loss": 0.095, |
| "step": 1528 |
| }, |
| { |
| "epoch": 1.6185621607308356, |
| "grad_norm": 0.8180900812149048, |
| "learning_rate": 8.380296610169493e-06, |
| "loss": 0.089, |
| "step": 1529 |
| }, |
| { |
| "epoch": 1.6196213425129087, |
| "grad_norm": 0.25263839960098267, |
| "learning_rate": 8.379237288135595e-06, |
| "loss": 0.0932, |
| "step": 1530 |
| }, |
| { |
| "epoch": 1.6206805242949822, |
| "grad_norm": 1.0263060331344604, |
| "learning_rate": 8.378177966101696e-06, |
| "loss": 0.0896, |
| "step": 1531 |
| }, |
| { |
| "epoch": 1.6217397060770553, |
| "grad_norm": 1.3182510137557983, |
| "learning_rate": 8.377118644067797e-06, |
| "loss": 0.093, |
| "step": 1532 |
| }, |
| { |
| "epoch": 1.6227988878591288, |
| "grad_norm": 0.6498829126358032, |
| "learning_rate": 8.376059322033899e-06, |
| "loss": 0.0909, |
| "step": 1533 |
| }, |
| { |
| "epoch": 1.6238580696412022, |
| "grad_norm": 0.34288349747657776, |
| "learning_rate": 8.375e-06, |
| "loss": 0.0911, |
| "step": 1534 |
| }, |
| { |
| "epoch": 1.6249172514232755, |
| "grad_norm": 0.627138078212738, |
| "learning_rate": 8.373940677966103e-06, |
| "loss": 0.0929, |
| "step": 1535 |
| }, |
| { |
| "epoch": 1.625976433205349, |
| "grad_norm": 0.3161716163158417, |
| "learning_rate": 8.372881355932205e-06, |
| "loss": 0.0901, |
| "step": 1536 |
| }, |
| { |
| "epoch": 1.627035614987422, |
| "grad_norm": 0.9007494449615479, |
| "learning_rate": 8.371822033898306e-06, |
| "loss": 0.0922, |
| "step": 1537 |
| }, |
| { |
| "epoch": 1.6280947967694956, |
| "grad_norm": 0.908442497253418, |
| "learning_rate": 8.370762711864408e-06, |
| "loss": 0.0906, |
| "step": 1538 |
| }, |
| { |
| "epoch": 1.629153978551569, |
| "grad_norm": 0.2966644763946533, |
| "learning_rate": 8.369703389830509e-06, |
| "loss": 0.0841, |
| "step": 1539 |
| }, |
| { |
| "epoch": 1.6302131603336423, |
| "grad_norm": 0.5009570121765137, |
| "learning_rate": 8.36864406779661e-06, |
| "loss": 0.0916, |
| "step": 1540 |
| }, |
| { |
| "epoch": 1.6312723421157156, |
| "grad_norm": 0.6445338129997253, |
| "learning_rate": 8.367584745762712e-06, |
| "loss": 0.0931, |
| "step": 1541 |
| }, |
| { |
| "epoch": 1.632331523897789, |
| "grad_norm": 0.3887215256690979, |
| "learning_rate": 8.366525423728813e-06, |
| "loss": 0.0887, |
| "step": 1542 |
| }, |
| { |
| "epoch": 1.6333907056798624, |
| "grad_norm": 0.7631832361221313, |
| "learning_rate": 8.365466101694916e-06, |
| "loss": 0.0934, |
| "step": 1543 |
| }, |
| { |
| "epoch": 1.6344498874619355, |
| "grad_norm": 0.2936621606349945, |
| "learning_rate": 8.364406779661018e-06, |
| "loss": 0.0952, |
| "step": 1544 |
| }, |
| { |
| "epoch": 1.635509069244009, |
| "grad_norm": 0.66786128282547, |
| "learning_rate": 8.36334745762712e-06, |
| "loss": 0.094, |
| "step": 1545 |
| }, |
| { |
| "epoch": 1.6365682510260824, |
| "grad_norm": 0.2722414433956146, |
| "learning_rate": 8.36228813559322e-06, |
| "loss": 0.0889, |
| "step": 1546 |
| }, |
| { |
| "epoch": 1.6376274328081557, |
| "grad_norm": 0.2932286560535431, |
| "learning_rate": 8.361228813559322e-06, |
| "loss": 0.0907, |
| "step": 1547 |
| }, |
| { |
| "epoch": 1.638686614590229, |
| "grad_norm": 0.2571784257888794, |
| "learning_rate": 8.360169491525423e-06, |
| "loss": 0.0888, |
| "step": 1548 |
| }, |
| { |
| "epoch": 1.6397457963723023, |
| "grad_norm": 0.48270049691200256, |
| "learning_rate": 8.359110169491527e-06, |
| "loss": 0.0881, |
| "step": 1549 |
| }, |
| { |
| "epoch": 1.6408049781543759, |
| "grad_norm": 0.5231984257698059, |
| "learning_rate": 8.358050847457628e-06, |
| "loss": 0.0903, |
| "step": 1550 |
| }, |
| { |
| "epoch": 1.641864159936449, |
| "grad_norm": 0.3783744275569916, |
| "learning_rate": 8.35699152542373e-06, |
| "loss": 0.0944, |
| "step": 1551 |
| }, |
| { |
| "epoch": 1.6429233417185225, |
| "grad_norm": 0.565405547618866, |
| "learning_rate": 8.35593220338983e-06, |
| "loss": 0.0904, |
| "step": 1552 |
| }, |
| { |
| "epoch": 1.6439825235005958, |
| "grad_norm": 0.739701509475708, |
| "learning_rate": 8.354872881355934e-06, |
| "loss": 0.0906, |
| "step": 1553 |
| }, |
| { |
| "epoch": 1.6450417052826691, |
| "grad_norm": 0.32626545429229736, |
| "learning_rate": 8.353813559322035e-06, |
| "loss": 0.089, |
| "step": 1554 |
| }, |
| { |
| "epoch": 1.6461008870647424, |
| "grad_norm": 0.32011836767196655, |
| "learning_rate": 8.352754237288137e-06, |
| "loss": 0.0932, |
| "step": 1555 |
| }, |
| { |
| "epoch": 1.6471600688468158, |
| "grad_norm": 0.5030165910720825, |
| "learning_rate": 8.351694915254238e-06, |
| "loss": 0.0894, |
| "step": 1556 |
| }, |
| { |
| "epoch": 1.6482192506288893, |
| "grad_norm": 0.5868280529975891, |
| "learning_rate": 8.35063559322034e-06, |
| "loss": 0.0892, |
| "step": 1557 |
| }, |
| { |
| "epoch": 1.6492784324109624, |
| "grad_norm": 0.5712450742721558, |
| "learning_rate": 8.349576271186441e-06, |
| "loss": 0.0905, |
| "step": 1558 |
| }, |
| { |
| "epoch": 1.650337614193036, |
| "grad_norm": 0.5775148272514343, |
| "learning_rate": 8.348516949152542e-06, |
| "loss": 0.0918, |
| "step": 1559 |
| }, |
| { |
| "epoch": 1.6513967959751092, |
| "grad_norm": 0.33030012249946594, |
| "learning_rate": 8.347457627118645e-06, |
| "loss": 0.0905, |
| "step": 1560 |
| }, |
| { |
| "epoch": 1.6524559777571826, |
| "grad_norm": 0.9324721097946167, |
| "learning_rate": 8.346398305084747e-06, |
| "loss": 0.0921, |
| "step": 1561 |
| }, |
| { |
| "epoch": 1.6535151595392559, |
| "grad_norm": 0.2637194097042084, |
| "learning_rate": 8.345338983050848e-06, |
| "loss": 0.087, |
| "step": 1562 |
| }, |
| { |
| "epoch": 1.6545743413213292, |
| "grad_norm": 1.0360710620880127, |
| "learning_rate": 8.34427966101695e-06, |
| "loss": 0.0916, |
| "step": 1563 |
| }, |
| { |
| "epoch": 1.6556335231034027, |
| "grad_norm": 0.3216269910335541, |
| "learning_rate": 8.343220338983051e-06, |
| "loss": 0.0923, |
| "step": 1564 |
| }, |
| { |
| "epoch": 1.6566927048854758, |
| "grad_norm": 0.30418795347213745, |
| "learning_rate": 8.342161016949152e-06, |
| "loss": 0.0895, |
| "step": 1565 |
| }, |
| { |
| "epoch": 1.6577518866675494, |
| "grad_norm": 0.3769773840904236, |
| "learning_rate": 8.341101694915254e-06, |
| "loss": 0.095, |
| "step": 1566 |
| }, |
| { |
| "epoch": 1.6588110684496227, |
| "grad_norm": 0.22719967365264893, |
| "learning_rate": 8.340042372881357e-06, |
| "loss": 0.0911, |
| "step": 1567 |
| }, |
| { |
| "epoch": 1.659870250231696, |
| "grad_norm": 0.3400397300720215, |
| "learning_rate": 8.338983050847458e-06, |
| "loss": 0.0908, |
| "step": 1568 |
| }, |
| { |
| "epoch": 1.6609294320137695, |
| "grad_norm": 1.1307333707809448, |
| "learning_rate": 8.33792372881356e-06, |
| "loss": 0.0892, |
| "step": 1569 |
| }, |
| { |
| "epoch": 1.6619886137958426, |
| "grad_norm": 0.2560805380344391, |
| "learning_rate": 8.336864406779663e-06, |
| "loss": 0.0892, |
| "step": 1570 |
| }, |
| { |
| "epoch": 1.6630477955779162, |
| "grad_norm": 0.7875747084617615, |
| "learning_rate": 8.335805084745764e-06, |
| "loss": 0.0933, |
| "step": 1571 |
| }, |
| { |
| "epoch": 1.6641069773599892, |
| "grad_norm": 0.311423122882843, |
| "learning_rate": 8.334745762711866e-06, |
| "loss": 0.0876, |
| "step": 1572 |
| }, |
| { |
| "epoch": 1.6651661591420628, |
| "grad_norm": 0.570884644985199, |
| "learning_rate": 8.333686440677967e-06, |
| "loss": 0.091, |
| "step": 1573 |
| }, |
| { |
| "epoch": 1.666225340924136, |
| "grad_norm": 0.5333699584007263, |
| "learning_rate": 8.332627118644069e-06, |
| "loss": 0.0874, |
| "step": 1574 |
| }, |
| { |
| "epoch": 1.6672845227062094, |
| "grad_norm": 0.6104627847671509, |
| "learning_rate": 8.33156779661017e-06, |
| "loss": 0.0904, |
| "step": 1575 |
| }, |
| { |
| "epoch": 1.668343704488283, |
| "grad_norm": 0.2895054519176483, |
| "learning_rate": 8.330508474576271e-06, |
| "loss": 0.0893, |
| "step": 1576 |
| }, |
| { |
| "epoch": 1.669402886270356, |
| "grad_norm": 0.2963944673538208, |
| "learning_rate": 8.329449152542374e-06, |
| "loss": 0.0914, |
| "step": 1577 |
| }, |
| { |
| "epoch": 1.6704620680524296, |
| "grad_norm": 0.3085760772228241, |
| "learning_rate": 8.328389830508476e-06, |
| "loss": 0.0891, |
| "step": 1578 |
| }, |
| { |
| "epoch": 1.671521249834503, |
| "grad_norm": 0.26071521639823914, |
| "learning_rate": 8.327330508474577e-06, |
| "loss": 0.0875, |
| "step": 1579 |
| }, |
| { |
| "epoch": 1.6725804316165762, |
| "grad_norm": 0.2093966007232666, |
| "learning_rate": 8.326271186440679e-06, |
| "loss": 0.088, |
| "step": 1580 |
| }, |
| { |
| "epoch": 1.6736396133986495, |
| "grad_norm": 0.2903918921947479, |
| "learning_rate": 8.32521186440678e-06, |
| "loss": 0.0871, |
| "step": 1581 |
| }, |
| { |
| "epoch": 1.6746987951807228, |
| "grad_norm": 0.3491870164871216, |
| "learning_rate": 8.324152542372882e-06, |
| "loss": 0.0917, |
| "step": 1582 |
| }, |
| { |
| "epoch": 1.6757579769627964, |
| "grad_norm": 0.45493122935295105, |
| "learning_rate": 8.323093220338983e-06, |
| "loss": 0.0918, |
| "step": 1583 |
| }, |
| { |
| "epoch": 1.6768171587448695, |
| "grad_norm": 0.7867631912231445, |
| "learning_rate": 8.322033898305086e-06, |
| "loss": 0.089, |
| "step": 1584 |
| }, |
| { |
| "epoch": 1.677876340526943, |
| "grad_norm": 1.0455607175827026, |
| "learning_rate": 8.320974576271187e-06, |
| "loss": 0.0918, |
| "step": 1585 |
| }, |
| { |
| "epoch": 1.6789355223090163, |
| "grad_norm": 0.3224550187587738, |
| "learning_rate": 8.319915254237289e-06, |
| "loss": 0.0896, |
| "step": 1586 |
| }, |
| { |
| "epoch": 1.6799947040910896, |
| "grad_norm": 0.30370235443115234, |
| "learning_rate": 8.31885593220339e-06, |
| "loss": 0.089, |
| "step": 1587 |
| }, |
| { |
| "epoch": 1.681053885873163, |
| "grad_norm": 0.2551031708717346, |
| "learning_rate": 8.317796610169492e-06, |
| "loss": 0.0905, |
| "step": 1588 |
| }, |
| { |
| "epoch": 1.6821130676552363, |
| "grad_norm": 0.3214464783668518, |
| "learning_rate": 8.316737288135593e-06, |
| "loss": 0.087, |
| "step": 1589 |
| }, |
| { |
| "epoch": 1.6831722494373098, |
| "grad_norm": 0.2692156136035919, |
| "learning_rate": 8.315677966101695e-06, |
| "loss": 0.0911, |
| "step": 1590 |
| }, |
| { |
| "epoch": 1.684231431219383, |
| "grad_norm": 0.8112454414367676, |
| "learning_rate": 8.314618644067798e-06, |
| "loss": 0.0883, |
| "step": 1591 |
| }, |
| { |
| "epoch": 1.6852906130014564, |
| "grad_norm": 0.2618613541126251, |
| "learning_rate": 8.313559322033899e-06, |
| "loss": 0.0867, |
| "step": 1592 |
| }, |
| { |
| "epoch": 1.6863497947835298, |
| "grad_norm": 0.323166161775589, |
| "learning_rate": 8.3125e-06, |
| "loss": 0.0924, |
| "step": 1593 |
| }, |
| { |
| "epoch": 1.687408976565603, |
| "grad_norm": 1.049118161201477, |
| "learning_rate": 8.311440677966104e-06, |
| "loss": 0.0934, |
| "step": 1594 |
| }, |
| { |
| "epoch": 1.6884681583476764, |
| "grad_norm": 0.3779328763484955, |
| "learning_rate": 8.310381355932205e-06, |
| "loss": 0.0921, |
| "step": 1595 |
| }, |
| { |
| "epoch": 1.6895273401297497, |
| "grad_norm": 0.3923911452293396, |
| "learning_rate": 8.309322033898306e-06, |
| "loss": 0.0919, |
| "step": 1596 |
| }, |
| { |
| "epoch": 1.6905865219118232, |
| "grad_norm": 0.28012555837631226, |
| "learning_rate": 8.308262711864408e-06, |
| "loss": 0.0919, |
| "step": 1597 |
| }, |
| { |
| "epoch": 1.6916457036938963, |
| "grad_norm": 0.7604759931564331, |
| "learning_rate": 8.30720338983051e-06, |
| "loss": 0.0914, |
| "step": 1598 |
| }, |
| { |
| "epoch": 1.6927048854759699, |
| "grad_norm": 0.36507344245910645, |
| "learning_rate": 8.30614406779661e-06, |
| "loss": 0.0885, |
| "step": 1599 |
| }, |
| { |
| "epoch": 1.6937640672580432, |
| "grad_norm": 0.32517632842063904, |
| "learning_rate": 8.305084745762712e-06, |
| "loss": 0.0835, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.6948232490401165, |
| "grad_norm": 0.25675442814826965, |
| "learning_rate": 8.304025423728813e-06, |
| "loss": 0.0885, |
| "step": 1601 |
| }, |
| { |
| "epoch": 1.6958824308221898, |
| "grad_norm": 0.2421637326478958, |
| "learning_rate": 8.302966101694917e-06, |
| "loss": 0.0881, |
| "step": 1602 |
| }, |
| { |
| "epoch": 1.6969416126042631, |
| "grad_norm": 0.5827629566192627, |
| "learning_rate": 8.301906779661018e-06, |
| "loss": 0.0886, |
| "step": 1603 |
| }, |
| { |
| "epoch": 1.6980007943863367, |
| "grad_norm": 0.5402868986129761, |
| "learning_rate": 8.30084745762712e-06, |
| "loss": 0.0899, |
| "step": 1604 |
| }, |
| { |
| "epoch": 1.6990599761684098, |
| "grad_norm": 0.6650782823562622, |
| "learning_rate": 8.29978813559322e-06, |
| "loss": 0.0921, |
| "step": 1605 |
| }, |
| { |
| "epoch": 1.7001191579504833, |
| "grad_norm": 0.29905128479003906, |
| "learning_rate": 8.298728813559322e-06, |
| "loss": 0.0866, |
| "step": 1606 |
| }, |
| { |
| "epoch": 1.7011783397325566, |
| "grad_norm": 0.2324621081352234, |
| "learning_rate": 8.297669491525424e-06, |
| "loss": 0.0854, |
| "step": 1607 |
| }, |
| { |
| "epoch": 1.70223752151463, |
| "grad_norm": 0.28160741925239563, |
| "learning_rate": 8.296610169491525e-06, |
| "loss": 0.0905, |
| "step": 1608 |
| }, |
| { |
| "epoch": 1.7032967032967035, |
| "grad_norm": 0.7123225331306458, |
| "learning_rate": 8.295550847457628e-06, |
| "loss": 0.0891, |
| "step": 1609 |
| }, |
| { |
| "epoch": 1.7043558850787766, |
| "grad_norm": 0.9756201505661011, |
| "learning_rate": 8.29449152542373e-06, |
| "loss": 0.0912, |
| "step": 1610 |
| }, |
| { |
| "epoch": 1.70541506686085, |
| "grad_norm": 0.6386804580688477, |
| "learning_rate": 8.293432203389831e-06, |
| "loss": 0.0906, |
| "step": 1611 |
| }, |
| { |
| "epoch": 1.7064742486429232, |
| "grad_norm": 0.34498628973960876, |
| "learning_rate": 8.292372881355934e-06, |
| "loss": 0.0882, |
| "step": 1612 |
| }, |
| { |
| "epoch": 1.7075334304249967, |
| "grad_norm": 0.6193973422050476, |
| "learning_rate": 8.291313559322035e-06, |
| "loss": 0.0879, |
| "step": 1613 |
| }, |
| { |
| "epoch": 1.70859261220707, |
| "grad_norm": 0.27502185106277466, |
| "learning_rate": 8.290254237288137e-06, |
| "loss": 0.0898, |
| "step": 1614 |
| }, |
| { |
| "epoch": 1.7096517939891434, |
| "grad_norm": 0.7583441138267517, |
| "learning_rate": 8.289194915254238e-06, |
| "loss": 0.0873, |
| "step": 1615 |
| }, |
| { |
| "epoch": 1.710710975771217, |
| "grad_norm": 0.29789406061172485, |
| "learning_rate": 8.28813559322034e-06, |
| "loss": 0.0903, |
| "step": 1616 |
| }, |
| { |
| "epoch": 1.71177015755329, |
| "grad_norm": 0.2680496573448181, |
| "learning_rate": 8.287076271186441e-06, |
| "loss": 0.091, |
| "step": 1617 |
| }, |
| { |
| "epoch": 1.7128293393353635, |
| "grad_norm": 0.22052001953125, |
| "learning_rate": 8.286016949152543e-06, |
| "loss": 0.0891, |
| "step": 1618 |
| }, |
| { |
| "epoch": 1.7138885211174366, |
| "grad_norm": 0.4834091365337372, |
| "learning_rate": 8.284957627118646e-06, |
| "loss": 0.0889, |
| "step": 1619 |
| }, |
| { |
| "epoch": 1.7149477028995102, |
| "grad_norm": 0.23187105357646942, |
| "learning_rate": 8.283898305084747e-06, |
| "loss": 0.0869, |
| "step": 1620 |
| }, |
| { |
| "epoch": 1.7160068846815835, |
| "grad_norm": 0.23044142127037048, |
| "learning_rate": 8.282838983050848e-06, |
| "loss": 0.0929, |
| "step": 1621 |
| }, |
| { |
| "epoch": 1.7170660664636568, |
| "grad_norm": 0.3926228880882263, |
| "learning_rate": 8.28177966101695e-06, |
| "loss": 0.0941, |
| "step": 1622 |
| }, |
| { |
| "epoch": 1.7181252482457303, |
| "grad_norm": 0.35786905884742737, |
| "learning_rate": 8.280720338983051e-06, |
| "loss": 0.092, |
| "step": 1623 |
| }, |
| { |
| "epoch": 1.7191844300278034, |
| "grad_norm": 0.3090403079986572, |
| "learning_rate": 8.279661016949153e-06, |
| "loss": 0.0933, |
| "step": 1624 |
| }, |
| { |
| "epoch": 1.720243611809877, |
| "grad_norm": 0.26617270708084106, |
| "learning_rate": 8.278601694915254e-06, |
| "loss": 0.0869, |
| "step": 1625 |
| }, |
| { |
| "epoch": 1.7213027935919503, |
| "grad_norm": 0.302048921585083, |
| "learning_rate": 8.277542372881357e-06, |
| "loss": 0.0914, |
| "step": 1626 |
| }, |
| { |
| "epoch": 1.7223619753740236, |
| "grad_norm": 0.27975502610206604, |
| "learning_rate": 8.276483050847459e-06, |
| "loss": 0.0911, |
| "step": 1627 |
| }, |
| { |
| "epoch": 1.723421157156097, |
| "grad_norm": 0.46345677971839905, |
| "learning_rate": 8.27542372881356e-06, |
| "loss": 0.0883, |
| "step": 1628 |
| }, |
| { |
| "epoch": 1.7244803389381702, |
| "grad_norm": 0.7359839081764221, |
| "learning_rate": 8.274364406779661e-06, |
| "loss": 0.0903, |
| "step": 1629 |
| }, |
| { |
| "epoch": 1.7255395207202437, |
| "grad_norm": 0.466911643743515, |
| "learning_rate": 8.273305084745763e-06, |
| "loss": 0.092, |
| "step": 1630 |
| }, |
| { |
| "epoch": 1.7265987025023168, |
| "grad_norm": 0.2327670305967331, |
| "learning_rate": 8.272245762711864e-06, |
| "loss": 0.0876, |
| "step": 1631 |
| }, |
| { |
| "epoch": 1.7276578842843904, |
| "grad_norm": 0.23270419239997864, |
| "learning_rate": 8.271186440677966e-06, |
| "loss": 0.089, |
| "step": 1632 |
| }, |
| { |
| "epoch": 1.7287170660664637, |
| "grad_norm": 0.32935333251953125, |
| "learning_rate": 8.270127118644069e-06, |
| "loss": 0.0866, |
| "step": 1633 |
| }, |
| { |
| "epoch": 1.729776247848537, |
| "grad_norm": 0.2330481857061386, |
| "learning_rate": 8.26906779661017e-06, |
| "loss": 0.0864, |
| "step": 1634 |
| }, |
| { |
| "epoch": 1.7308354296306103, |
| "grad_norm": 0.2925349175930023, |
| "learning_rate": 8.268008474576272e-06, |
| "loss": 0.0886, |
| "step": 1635 |
| }, |
| { |
| "epoch": 1.7318946114126836, |
| "grad_norm": 0.27524423599243164, |
| "learning_rate": 8.266949152542375e-06, |
| "loss": 0.0842, |
| "step": 1636 |
| }, |
| { |
| "epoch": 1.7329537931947572, |
| "grad_norm": 1.3679767847061157, |
| "learning_rate": 8.265889830508476e-06, |
| "loss": 0.0874, |
| "step": 1637 |
| }, |
| { |
| "epoch": 1.7340129749768303, |
| "grad_norm": 0.30476149916648865, |
| "learning_rate": 8.264830508474577e-06, |
| "loss": 0.0895, |
| "step": 1638 |
| }, |
| { |
| "epoch": 1.7350721567589038, |
| "grad_norm": 0.7838143706321716, |
| "learning_rate": 8.263771186440679e-06, |
| "loss": 0.0853, |
| "step": 1639 |
| }, |
| { |
| "epoch": 1.7361313385409771, |
| "grad_norm": 0.7583155035972595, |
| "learning_rate": 8.26271186440678e-06, |
| "loss": 0.0904, |
| "step": 1640 |
| }, |
| { |
| "epoch": 1.7371905203230504, |
| "grad_norm": 0.3275405168533325, |
| "learning_rate": 8.261652542372882e-06, |
| "loss": 0.0871, |
| "step": 1641 |
| }, |
| { |
| "epoch": 1.7382497021051238, |
| "grad_norm": 0.3539300262928009, |
| "learning_rate": 8.260593220338983e-06, |
| "loss": 0.087, |
| "step": 1642 |
| }, |
| { |
| "epoch": 1.739308883887197, |
| "grad_norm": 0.4087120294570923, |
| "learning_rate": 8.259533898305086e-06, |
| "loss": 0.0906, |
| "step": 1643 |
| }, |
| { |
| "epoch": 1.7403680656692706, |
| "grad_norm": 0.22603043913841248, |
| "learning_rate": 8.258474576271188e-06, |
| "loss": 0.0916, |
| "step": 1644 |
| }, |
| { |
| "epoch": 1.7414272474513437, |
| "grad_norm": 0.2526402771472931, |
| "learning_rate": 8.257415254237289e-06, |
| "loss": 0.0868, |
| "step": 1645 |
| }, |
| { |
| "epoch": 1.7424864292334172, |
| "grad_norm": 0.7923634052276611, |
| "learning_rate": 8.25635593220339e-06, |
| "loss": 0.0942, |
| "step": 1646 |
| }, |
| { |
| "epoch": 1.7435456110154905, |
| "grad_norm": 0.8598949313163757, |
| "learning_rate": 8.255296610169492e-06, |
| "loss": 0.0943, |
| "step": 1647 |
| }, |
| { |
| "epoch": 1.7446047927975639, |
| "grad_norm": 0.24176791310310364, |
| "learning_rate": 8.254237288135593e-06, |
| "loss": 0.0874, |
| "step": 1648 |
| }, |
| { |
| "epoch": 1.7456639745796372, |
| "grad_norm": 0.5251477360725403, |
| "learning_rate": 8.253177966101695e-06, |
| "loss": 0.0881, |
| "step": 1649 |
| }, |
| { |
| "epoch": 1.7467231563617105, |
| "grad_norm": 0.2412240356206894, |
| "learning_rate": 8.252118644067796e-06, |
| "loss": 0.0895, |
| "step": 1650 |
| }, |
| { |
| "epoch": 1.747782338143784, |
| "grad_norm": 0.3063221275806427, |
| "learning_rate": 8.2510593220339e-06, |
| "loss": 0.087, |
| "step": 1651 |
| }, |
| { |
| "epoch": 1.7488415199258571, |
| "grad_norm": 0.6908991932868958, |
| "learning_rate": 8.25e-06, |
| "loss": 0.0886, |
| "step": 1652 |
| }, |
| { |
| "epoch": 1.7499007017079307, |
| "grad_norm": 0.2941277027130127, |
| "learning_rate": 8.248940677966102e-06, |
| "loss": 0.0927, |
| "step": 1653 |
| }, |
| { |
| "epoch": 1.750959883490004, |
| "grad_norm": 0.8127859234809875, |
| "learning_rate": 8.247881355932203e-06, |
| "loss": 0.0892, |
| "step": 1654 |
| }, |
| { |
| "epoch": 1.7520190652720773, |
| "grad_norm": 0.8596274256706238, |
| "learning_rate": 8.246822033898307e-06, |
| "loss": 0.0898, |
| "step": 1655 |
| }, |
| { |
| "epoch": 1.7530782470541508, |
| "grad_norm": 0.47415459156036377, |
| "learning_rate": 8.245762711864408e-06, |
| "loss": 0.0917, |
| "step": 1656 |
| }, |
| { |
| "epoch": 1.754137428836224, |
| "grad_norm": 0.2681109607219696, |
| "learning_rate": 8.24470338983051e-06, |
| "loss": 0.0912, |
| "step": 1657 |
| }, |
| { |
| "epoch": 1.7551966106182975, |
| "grad_norm": 0.260294109582901, |
| "learning_rate": 8.24364406779661e-06, |
| "loss": 0.0874, |
| "step": 1658 |
| }, |
| { |
| "epoch": 1.7562557924003706, |
| "grad_norm": 0.24355289340019226, |
| "learning_rate": 8.242584745762712e-06, |
| "loss": 0.0905, |
| "step": 1659 |
| }, |
| { |
| "epoch": 1.757314974182444, |
| "grad_norm": 0.24448110163211823, |
| "learning_rate": 8.241525423728815e-06, |
| "loss": 0.0857, |
| "step": 1660 |
| }, |
| { |
| "epoch": 1.7583741559645174, |
| "grad_norm": 0.2583911418914795, |
| "learning_rate": 8.240466101694917e-06, |
| "loss": 0.0878, |
| "step": 1661 |
| }, |
| { |
| "epoch": 1.7594333377465907, |
| "grad_norm": 0.6160121560096741, |
| "learning_rate": 8.239406779661018e-06, |
| "loss": 0.0871, |
| "step": 1662 |
| }, |
| { |
| "epoch": 1.7604925195286643, |
| "grad_norm": 0.29204174876213074, |
| "learning_rate": 8.23834745762712e-06, |
| "loss": 0.0892, |
| "step": 1663 |
| }, |
| { |
| "epoch": 1.7615517013107374, |
| "grad_norm": 0.26087382435798645, |
| "learning_rate": 8.237288135593221e-06, |
| "loss": 0.0926, |
| "step": 1664 |
| }, |
| { |
| "epoch": 1.7626108830928109, |
| "grad_norm": 0.6562139987945557, |
| "learning_rate": 8.236228813559322e-06, |
| "loss": 0.0919, |
| "step": 1665 |
| }, |
| { |
| "epoch": 1.7636700648748842, |
| "grad_norm": 0.747133195400238, |
| "learning_rate": 8.235169491525424e-06, |
| "loss": 0.0869, |
| "step": 1666 |
| }, |
| { |
| "epoch": 1.7647292466569575, |
| "grad_norm": 0.4086461663246155, |
| "learning_rate": 8.234110169491525e-06, |
| "loss": 0.0898, |
| "step": 1667 |
| }, |
| { |
| "epoch": 1.7657884284390308, |
| "grad_norm": 0.2578766644001007, |
| "learning_rate": 8.233050847457628e-06, |
| "loss": 0.0888, |
| "step": 1668 |
| }, |
| { |
| "epoch": 1.7668476102211041, |
| "grad_norm": 0.27525123953819275, |
| "learning_rate": 8.23199152542373e-06, |
| "loss": 0.0876, |
| "step": 1669 |
| }, |
| { |
| "epoch": 1.7679067920031777, |
| "grad_norm": 0.38917550444602966, |
| "learning_rate": 8.230932203389831e-06, |
| "loss": 0.0923, |
| "step": 1670 |
| }, |
| { |
| "epoch": 1.7689659737852508, |
| "grad_norm": 0.2889343500137329, |
| "learning_rate": 8.229872881355933e-06, |
| "loss": 0.089, |
| "step": 1671 |
| }, |
| { |
| "epoch": 1.7700251555673243, |
| "grad_norm": 0.32560211420059204, |
| "learning_rate": 8.228813559322034e-06, |
| "loss": 0.0865, |
| "step": 1672 |
| }, |
| { |
| "epoch": 1.7710843373493976, |
| "grad_norm": 0.7226275205612183, |
| "learning_rate": 8.227754237288135e-06, |
| "loss": 0.0889, |
| "step": 1673 |
| }, |
| { |
| "epoch": 1.772143519131471, |
| "grad_norm": 0.7984499335289001, |
| "learning_rate": 8.226694915254237e-06, |
| "loss": 0.0891, |
| "step": 1674 |
| }, |
| { |
| "epoch": 1.7732027009135443, |
| "grad_norm": 0.24345754086971283, |
| "learning_rate": 8.22563559322034e-06, |
| "loss": 0.0896, |
| "step": 1675 |
| }, |
| { |
| "epoch": 1.7742618826956176, |
| "grad_norm": 0.2448401004076004, |
| "learning_rate": 8.224576271186441e-06, |
| "loss": 0.0856, |
| "step": 1676 |
| }, |
| { |
| "epoch": 1.7753210644776911, |
| "grad_norm": 0.25796836614608765, |
| "learning_rate": 8.223516949152543e-06, |
| "loss": 0.09, |
| "step": 1677 |
| }, |
| { |
| "epoch": 1.7763802462597642, |
| "grad_norm": 0.3721190392971039, |
| "learning_rate": 8.222457627118646e-06, |
| "loss": 0.0888, |
| "step": 1678 |
| }, |
| { |
| "epoch": 1.7774394280418377, |
| "grad_norm": 0.3655620813369751, |
| "learning_rate": 8.221398305084747e-06, |
| "loss": 0.0868, |
| "step": 1679 |
| }, |
| { |
| "epoch": 1.778498609823911, |
| "grad_norm": 0.29531556367874146, |
| "learning_rate": 8.220338983050849e-06, |
| "loss": 0.0928, |
| "step": 1680 |
| }, |
| { |
| "epoch": 1.7795577916059844, |
| "grad_norm": 1.9438749551773071, |
| "learning_rate": 8.21927966101695e-06, |
| "loss": 0.0913, |
| "step": 1681 |
| }, |
| { |
| "epoch": 1.7806169733880577, |
| "grad_norm": 0.3475892245769501, |
| "learning_rate": 8.218220338983051e-06, |
| "loss": 0.0876, |
| "step": 1682 |
| }, |
| { |
| "epoch": 1.781676155170131, |
| "grad_norm": 0.7077595591545105, |
| "learning_rate": 8.217161016949153e-06, |
| "loss": 0.0919, |
| "step": 1683 |
| }, |
| { |
| "epoch": 1.7827353369522045, |
| "grad_norm": 0.49300310015678406, |
| "learning_rate": 8.216101694915254e-06, |
| "loss": 0.0902, |
| "step": 1684 |
| }, |
| { |
| "epoch": 1.7837945187342776, |
| "grad_norm": 0.34648701548576355, |
| "learning_rate": 8.215042372881357e-06, |
| "loss": 0.0927, |
| "step": 1685 |
| }, |
| { |
| "epoch": 1.7848537005163512, |
| "grad_norm": 0.3139582872390747, |
| "learning_rate": 8.213983050847459e-06, |
| "loss": 0.0876, |
| "step": 1686 |
| }, |
| { |
| "epoch": 1.7859128822984245, |
| "grad_norm": 0.4565608501434326, |
| "learning_rate": 8.21292372881356e-06, |
| "loss": 0.0934, |
| "step": 1687 |
| }, |
| { |
| "epoch": 1.7869720640804978, |
| "grad_norm": 0.26290515065193176, |
| "learning_rate": 8.211864406779662e-06, |
| "loss": 0.0885, |
| "step": 1688 |
| }, |
| { |
| "epoch": 1.7880312458625711, |
| "grad_norm": 0.2464137226343155, |
| "learning_rate": 8.210805084745763e-06, |
| "loss": 0.0877, |
| "step": 1689 |
| }, |
| { |
| "epoch": 1.7890904276446444, |
| "grad_norm": 0.22745825350284576, |
| "learning_rate": 8.209745762711864e-06, |
| "loss": 0.0889, |
| "step": 1690 |
| }, |
| { |
| "epoch": 1.790149609426718, |
| "grad_norm": 0.32300448417663574, |
| "learning_rate": 8.208686440677966e-06, |
| "loss": 0.0852, |
| "step": 1691 |
| }, |
| { |
| "epoch": 1.791208791208791, |
| "grad_norm": 0.2802238166332245, |
| "learning_rate": 8.207627118644069e-06, |
| "loss": 0.0871, |
| "step": 1692 |
| }, |
| { |
| "epoch": 1.7922679729908646, |
| "grad_norm": 0.6324068307876587, |
| "learning_rate": 8.20656779661017e-06, |
| "loss": 0.0877, |
| "step": 1693 |
| }, |
| { |
| "epoch": 1.793327154772938, |
| "grad_norm": 0.5701524615287781, |
| "learning_rate": 8.205508474576272e-06, |
| "loss": 0.0869, |
| "step": 1694 |
| }, |
| { |
| "epoch": 1.7943863365550112, |
| "grad_norm": 0.8544675707817078, |
| "learning_rate": 8.204449152542373e-06, |
| "loss": 0.0925, |
| "step": 1695 |
| }, |
| { |
| "epoch": 1.7954455183370848, |
| "grad_norm": 0.3276161253452301, |
| "learning_rate": 8.203389830508475e-06, |
| "loss": 0.0898, |
| "step": 1696 |
| }, |
| { |
| "epoch": 1.7965047001191579, |
| "grad_norm": 0.31532466411590576, |
| "learning_rate": 8.202330508474578e-06, |
| "loss": 0.0903, |
| "step": 1697 |
| }, |
| { |
| "epoch": 1.7975638819012314, |
| "grad_norm": 0.361342191696167, |
| "learning_rate": 8.201271186440679e-06, |
| "loss": 0.0869, |
| "step": 1698 |
| }, |
| { |
| "epoch": 1.7986230636833045, |
| "grad_norm": 0.3243643045425415, |
| "learning_rate": 8.20021186440678e-06, |
| "loss": 0.0882, |
| "step": 1699 |
| }, |
| { |
| "epoch": 1.799682245465378, |
| "grad_norm": 0.3016441762447357, |
| "learning_rate": 8.199152542372882e-06, |
| "loss": 0.0892, |
| "step": 1700 |
| }, |
| { |
| "epoch": 1.8007414272474513, |
| "grad_norm": 0.2679581642150879, |
| "learning_rate": 8.198093220338983e-06, |
| "loss": 0.0874, |
| "step": 1701 |
| }, |
| { |
| "epoch": 1.8018006090295247, |
| "grad_norm": 0.29905226826667786, |
| "learning_rate": 8.197033898305086e-06, |
| "loss": 0.0907, |
| "step": 1702 |
| }, |
| { |
| "epoch": 1.8028597908115982, |
| "grad_norm": 1.1050947904586792, |
| "learning_rate": 8.195974576271188e-06, |
| "loss": 0.0952, |
| "step": 1703 |
| }, |
| { |
| "epoch": 1.8039189725936713, |
| "grad_norm": 0.38703450560569763, |
| "learning_rate": 8.19491525423729e-06, |
| "loss": 0.0895, |
| "step": 1704 |
| }, |
| { |
| "epoch": 1.8049781543757448, |
| "grad_norm": 0.22092828154563904, |
| "learning_rate": 8.19385593220339e-06, |
| "loss": 0.089, |
| "step": 1705 |
| }, |
| { |
| "epoch": 1.8060373361578181, |
| "grad_norm": 0.23202571272850037, |
| "learning_rate": 8.192796610169492e-06, |
| "loss": 0.0908, |
| "step": 1706 |
| }, |
| { |
| "epoch": 1.8070965179398915, |
| "grad_norm": 0.7841505408287048, |
| "learning_rate": 8.191737288135593e-06, |
| "loss": 0.0905, |
| "step": 1707 |
| }, |
| { |
| "epoch": 1.8081556997219648, |
| "grad_norm": 0.5167518258094788, |
| "learning_rate": 8.190677966101695e-06, |
| "loss": 0.0887, |
| "step": 1708 |
| }, |
| { |
| "epoch": 1.809214881504038, |
| "grad_norm": 0.4019578993320465, |
| "learning_rate": 8.189618644067798e-06, |
| "loss": 0.0882, |
| "step": 1709 |
| }, |
| { |
| "epoch": 1.8102740632861116, |
| "grad_norm": 0.7770986557006836, |
| "learning_rate": 8.1885593220339e-06, |
| "loss": 0.0913, |
| "step": 1710 |
| }, |
| { |
| "epoch": 1.8113332450681847, |
| "grad_norm": 0.39623284339904785, |
| "learning_rate": 8.1875e-06, |
| "loss": 0.0892, |
| "step": 1711 |
| }, |
| { |
| "epoch": 1.8123924268502583, |
| "grad_norm": 0.22474542260169983, |
| "learning_rate": 8.186440677966102e-06, |
| "loss": 0.0854, |
| "step": 1712 |
| }, |
| { |
| "epoch": 1.8134516086323316, |
| "grad_norm": 0.30103063583374023, |
| "learning_rate": 8.185381355932204e-06, |
| "loss": 0.0876, |
| "step": 1713 |
| }, |
| { |
| "epoch": 1.8145107904144049, |
| "grad_norm": 0.21331194043159485, |
| "learning_rate": 8.184322033898305e-06, |
| "loss": 0.09, |
| "step": 1714 |
| }, |
| { |
| "epoch": 1.8155699721964782, |
| "grad_norm": 0.22102200984954834, |
| "learning_rate": 8.183262711864406e-06, |
| "loss": 0.0841, |
| "step": 1715 |
| }, |
| { |
| "epoch": 1.8166291539785515, |
| "grad_norm": 0.2268272191286087, |
| "learning_rate": 8.182203389830508e-06, |
| "loss": 0.0879, |
| "step": 1716 |
| }, |
| { |
| "epoch": 1.817688335760625, |
| "grad_norm": 0.28173744678497314, |
| "learning_rate": 8.181144067796611e-06, |
| "loss": 0.087, |
| "step": 1717 |
| }, |
| { |
| "epoch": 1.8187475175426981, |
| "grad_norm": 1.1784483194351196, |
| "learning_rate": 8.180084745762712e-06, |
| "loss": 0.0876, |
| "step": 1718 |
| }, |
| { |
| "epoch": 1.8198066993247717, |
| "grad_norm": 0.25271815061569214, |
| "learning_rate": 8.179025423728815e-06, |
| "loss": 0.0832, |
| "step": 1719 |
| }, |
| { |
| "epoch": 1.820865881106845, |
| "grad_norm": 0.3612355589866638, |
| "learning_rate": 8.177966101694917e-06, |
| "loss": 0.0871, |
| "step": 1720 |
| }, |
| { |
| "epoch": 1.8219250628889183, |
| "grad_norm": 0.5274227261543274, |
| "learning_rate": 8.176906779661018e-06, |
| "loss": 0.0893, |
| "step": 1721 |
| }, |
| { |
| "epoch": 1.8229842446709916, |
| "grad_norm": 0.4560360610485077, |
| "learning_rate": 8.17584745762712e-06, |
| "loss": 0.0883, |
| "step": 1722 |
| }, |
| { |
| "epoch": 1.824043426453065, |
| "grad_norm": 0.9054328203201294, |
| "learning_rate": 8.174788135593221e-06, |
| "loss": 0.0876, |
| "step": 1723 |
| }, |
| { |
| "epoch": 1.8251026082351385, |
| "grad_norm": 0.28052154183387756, |
| "learning_rate": 8.173728813559323e-06, |
| "loss": 0.0854, |
| "step": 1724 |
| }, |
| { |
| "epoch": 1.8261617900172116, |
| "grad_norm": 0.3094460666179657, |
| "learning_rate": 8.172669491525424e-06, |
| "loss": 0.0868, |
| "step": 1725 |
| }, |
| { |
| "epoch": 1.827220971799285, |
| "grad_norm": 0.2433973103761673, |
| "learning_rate": 8.171610169491525e-06, |
| "loss": 0.0888, |
| "step": 1726 |
| }, |
| { |
| "epoch": 1.8282801535813584, |
| "grad_norm": 0.2780088782310486, |
| "learning_rate": 8.170550847457628e-06, |
| "loss": 0.0844, |
| "step": 1727 |
| }, |
| { |
| "epoch": 1.8293393353634317, |
| "grad_norm": 0.3073442578315735, |
| "learning_rate": 8.16949152542373e-06, |
| "loss": 0.0901, |
| "step": 1728 |
| }, |
| { |
| "epoch": 1.830398517145505, |
| "grad_norm": 0.3006991147994995, |
| "learning_rate": 8.168432203389831e-06, |
| "loss": 0.0868, |
| "step": 1729 |
| }, |
| { |
| "epoch": 1.8314576989275784, |
| "grad_norm": 0.243610218167305, |
| "learning_rate": 8.167372881355933e-06, |
| "loss": 0.0917, |
| "step": 1730 |
| }, |
| { |
| "epoch": 1.832516880709652, |
| "grad_norm": 0.7395919561386108, |
| "learning_rate": 8.166313559322034e-06, |
| "loss": 0.0882, |
| "step": 1731 |
| }, |
| { |
| "epoch": 1.833576062491725, |
| "grad_norm": 0.343936562538147, |
| "learning_rate": 8.165254237288136e-06, |
| "loss": 0.089, |
| "step": 1732 |
| }, |
| { |
| "epoch": 1.8346352442737985, |
| "grad_norm": 0.2729974091053009, |
| "learning_rate": 8.164194915254237e-06, |
| "loss": 0.0882, |
| "step": 1733 |
| }, |
| { |
| "epoch": 1.8356944260558719, |
| "grad_norm": 0.28195783495903015, |
| "learning_rate": 8.16313559322034e-06, |
| "loss": 0.0883, |
| "step": 1734 |
| }, |
| { |
| "epoch": 1.8367536078379452, |
| "grad_norm": 0.46009188890457153, |
| "learning_rate": 8.162076271186441e-06, |
| "loss": 0.0919, |
| "step": 1735 |
| }, |
| { |
| "epoch": 1.8378127896200185, |
| "grad_norm": 0.2789437770843506, |
| "learning_rate": 8.161016949152543e-06, |
| "loss": 0.0915, |
| "step": 1736 |
| }, |
| { |
| "epoch": 1.8388719714020918, |
| "grad_norm": 1.0787837505340576, |
| "learning_rate": 8.159957627118644e-06, |
| "loss": 0.0901, |
| "step": 1737 |
| }, |
| { |
| "epoch": 1.8399311531841653, |
| "grad_norm": 0.22357052564620972, |
| "learning_rate": 8.158898305084746e-06, |
| "loss": 0.09, |
| "step": 1738 |
| }, |
| { |
| "epoch": 1.8409903349662384, |
| "grad_norm": 0.3277105987071991, |
| "learning_rate": 8.157838983050849e-06, |
| "loss": 0.088, |
| "step": 1739 |
| }, |
| { |
| "epoch": 1.842049516748312, |
| "grad_norm": 0.22346952557563782, |
| "learning_rate": 8.15677966101695e-06, |
| "loss": 0.0889, |
| "step": 1740 |
| }, |
| { |
| "epoch": 1.8431086985303853, |
| "grad_norm": 0.22219225764274597, |
| "learning_rate": 8.155720338983052e-06, |
| "loss": 0.088, |
| "step": 1741 |
| }, |
| { |
| "epoch": 1.8441678803124586, |
| "grad_norm": 0.260712206363678, |
| "learning_rate": 8.154661016949153e-06, |
| "loss": 0.0881, |
| "step": 1742 |
| }, |
| { |
| "epoch": 1.8452270620945321, |
| "grad_norm": 0.2360645830631256, |
| "learning_rate": 8.153601694915254e-06, |
| "loss": 0.0854, |
| "step": 1743 |
| }, |
| { |
| "epoch": 1.8462862438766052, |
| "grad_norm": 0.46701470017433167, |
| "learning_rate": 8.152542372881358e-06, |
| "loss": 0.089, |
| "step": 1744 |
| }, |
| { |
| "epoch": 1.8473454256586788, |
| "grad_norm": 0.27775806188583374, |
| "learning_rate": 8.151483050847459e-06, |
| "loss": 0.0859, |
| "step": 1745 |
| }, |
| { |
| "epoch": 1.8484046074407519, |
| "grad_norm": 0.7500215172767639, |
| "learning_rate": 8.15042372881356e-06, |
| "loss": 0.0891, |
| "step": 1746 |
| }, |
| { |
| "epoch": 1.8494637892228254, |
| "grad_norm": 0.4179665446281433, |
| "learning_rate": 8.149364406779662e-06, |
| "loss": 0.0854, |
| "step": 1747 |
| }, |
| { |
| "epoch": 1.8505229710048987, |
| "grad_norm": 0.42210525274276733, |
| "learning_rate": 8.148305084745763e-06, |
| "loss": 0.085, |
| "step": 1748 |
| }, |
| { |
| "epoch": 1.851582152786972, |
| "grad_norm": 0.7828628420829773, |
| "learning_rate": 8.147245762711865e-06, |
| "loss": 0.0916, |
| "step": 1749 |
| }, |
| { |
| "epoch": 1.8526413345690456, |
| "grad_norm": 0.3319457769393921, |
| "learning_rate": 8.146186440677966e-06, |
| "loss": 0.0869, |
| "step": 1750 |
| }, |
| { |
| "epoch": 1.8537005163511187, |
| "grad_norm": 0.4556196331977844, |
| "learning_rate": 8.145127118644069e-06, |
| "loss": 0.0887, |
| "step": 1751 |
| }, |
| { |
| "epoch": 1.8547596981331922, |
| "grad_norm": 1.2392021417617798, |
| "learning_rate": 8.14406779661017e-06, |
| "loss": 0.0912, |
| "step": 1752 |
| }, |
| { |
| "epoch": 1.8558188799152655, |
| "grad_norm": 0.38705164194107056, |
| "learning_rate": 8.143008474576272e-06, |
| "loss": 0.0885, |
| "step": 1753 |
| }, |
| { |
| "epoch": 1.8568780616973388, |
| "grad_norm": 0.2887822985649109, |
| "learning_rate": 8.141949152542373e-06, |
| "loss": 0.0872, |
| "step": 1754 |
| }, |
| { |
| "epoch": 1.8579372434794121, |
| "grad_norm": 0.46790027618408203, |
| "learning_rate": 8.140889830508475e-06, |
| "loss": 0.091, |
| "step": 1755 |
| }, |
| { |
| "epoch": 1.8589964252614855, |
| "grad_norm": 0.31695792078971863, |
| "learning_rate": 8.139830508474576e-06, |
| "loss": 0.089, |
| "step": 1756 |
| }, |
| { |
| "epoch": 1.860055607043559, |
| "grad_norm": 0.368253231048584, |
| "learning_rate": 8.138771186440678e-06, |
| "loss": 0.0912, |
| "step": 1757 |
| }, |
| { |
| "epoch": 1.861114788825632, |
| "grad_norm": 0.6835266351699829, |
| "learning_rate": 8.13771186440678e-06, |
| "loss": 0.0862, |
| "step": 1758 |
| }, |
| { |
| "epoch": 1.8621739706077056, |
| "grad_norm": 0.2873135507106781, |
| "learning_rate": 8.136652542372882e-06, |
| "loss": 0.086, |
| "step": 1759 |
| }, |
| { |
| "epoch": 1.863233152389779, |
| "grad_norm": 0.8042873740196228, |
| "learning_rate": 8.135593220338983e-06, |
| "loss": 0.0922, |
| "step": 1760 |
| }, |
| { |
| "epoch": 1.8642923341718523, |
| "grad_norm": 0.7736334800720215, |
| "learning_rate": 8.134533898305087e-06, |
| "loss": 0.0913, |
| "step": 1761 |
| }, |
| { |
| "epoch": 1.8653515159539256, |
| "grad_norm": 0.3275861442089081, |
| "learning_rate": 8.133474576271188e-06, |
| "loss": 0.0838, |
| "step": 1762 |
| }, |
| { |
| "epoch": 1.8664106977359989, |
| "grad_norm": 0.28888413310050964, |
| "learning_rate": 8.13241525423729e-06, |
| "loss": 0.0895, |
| "step": 1763 |
| }, |
| { |
| "epoch": 1.8674698795180724, |
| "grad_norm": 0.38396742939949036, |
| "learning_rate": 8.13135593220339e-06, |
| "loss": 0.0904, |
| "step": 1764 |
| }, |
| { |
| "epoch": 1.8685290613001455, |
| "grad_norm": 0.33555108308792114, |
| "learning_rate": 8.130296610169492e-06, |
| "loss": 0.0876, |
| "step": 1765 |
| }, |
| { |
| "epoch": 1.869588243082219, |
| "grad_norm": 0.26808494329452515, |
| "learning_rate": 8.129237288135594e-06, |
| "loss": 0.0858, |
| "step": 1766 |
| }, |
| { |
| "epoch": 1.8706474248642924, |
| "grad_norm": 0.30014657974243164, |
| "learning_rate": 8.128177966101695e-06, |
| "loss": 0.0874, |
| "step": 1767 |
| }, |
| { |
| "epoch": 1.8717066066463657, |
| "grad_norm": 0.2523983418941498, |
| "learning_rate": 8.127118644067798e-06, |
| "loss": 0.0885, |
| "step": 1768 |
| }, |
| { |
| "epoch": 1.872765788428439, |
| "grad_norm": 0.44090861082077026, |
| "learning_rate": 8.1260593220339e-06, |
| "loss": 0.0887, |
| "step": 1769 |
| }, |
| { |
| "epoch": 1.8738249702105123, |
| "grad_norm": 0.3840296268463135, |
| "learning_rate": 8.125000000000001e-06, |
| "loss": 0.0908, |
| "step": 1770 |
| }, |
| { |
| "epoch": 1.8748841519925858, |
| "grad_norm": 0.2714099586009979, |
| "learning_rate": 8.123940677966102e-06, |
| "loss": 0.0902, |
| "step": 1771 |
| }, |
| { |
| "epoch": 1.875943333774659, |
| "grad_norm": 0.20345446467399597, |
| "learning_rate": 8.122881355932204e-06, |
| "loss": 0.0874, |
| "step": 1772 |
| }, |
| { |
| "epoch": 1.8770025155567325, |
| "grad_norm": 0.29075461626052856, |
| "learning_rate": 8.121822033898305e-06, |
| "loss": 0.0876, |
| "step": 1773 |
| }, |
| { |
| "epoch": 1.8780616973388058, |
| "grad_norm": 0.2817675769329071, |
| "learning_rate": 8.120762711864407e-06, |
| "loss": 0.0891, |
| "step": 1774 |
| }, |
| { |
| "epoch": 1.879120879120879, |
| "grad_norm": 0.4427769184112549, |
| "learning_rate": 8.119703389830508e-06, |
| "loss": 0.0877, |
| "step": 1775 |
| }, |
| { |
| "epoch": 1.8801800609029524, |
| "grad_norm": 1.1793564558029175, |
| "learning_rate": 8.118644067796611e-06, |
| "loss": 0.093, |
| "step": 1776 |
| }, |
| { |
| "epoch": 1.8812392426850257, |
| "grad_norm": 0.35909417271614075, |
| "learning_rate": 8.117584745762713e-06, |
| "loss": 0.0876, |
| "step": 1777 |
| }, |
| { |
| "epoch": 1.8822984244670993, |
| "grad_norm": 0.500996470451355, |
| "learning_rate": 8.116525423728814e-06, |
| "loss": 0.0881, |
| "step": 1778 |
| }, |
| { |
| "epoch": 1.8833576062491724, |
| "grad_norm": 0.47844716906547546, |
| "learning_rate": 8.115466101694915e-06, |
| "loss": 0.0868, |
| "step": 1779 |
| }, |
| { |
| "epoch": 1.884416788031246, |
| "grad_norm": 0.32108375430107117, |
| "learning_rate": 8.114406779661017e-06, |
| "loss": 0.09, |
| "step": 1780 |
| }, |
| { |
| "epoch": 1.8854759698133192, |
| "grad_norm": 0.5594972968101501, |
| "learning_rate": 8.11334745762712e-06, |
| "loss": 0.0881, |
| "step": 1781 |
| }, |
| { |
| "epoch": 1.8865351515953925, |
| "grad_norm": 0.33978521823883057, |
| "learning_rate": 8.112288135593221e-06, |
| "loss": 0.0893, |
| "step": 1782 |
| }, |
| { |
| "epoch": 1.887594333377466, |
| "grad_norm": 0.41193121671676636, |
| "learning_rate": 8.111228813559323e-06, |
| "loss": 0.087, |
| "step": 1783 |
| }, |
| { |
| "epoch": 1.8886535151595392, |
| "grad_norm": 0.35275036096572876, |
| "learning_rate": 8.110169491525424e-06, |
| "loss": 0.0862, |
| "step": 1784 |
| }, |
| { |
| "epoch": 1.8897126969416127, |
| "grad_norm": 0.32912677526474, |
| "learning_rate": 8.109110169491527e-06, |
| "loss": 0.0843, |
| "step": 1785 |
| }, |
| { |
| "epoch": 1.8907718787236858, |
| "grad_norm": 0.7398490309715271, |
| "learning_rate": 8.108050847457629e-06, |
| "loss": 0.0875, |
| "step": 1786 |
| }, |
| { |
| "epoch": 1.8918310605057593, |
| "grad_norm": 0.3203893303871155, |
| "learning_rate": 8.10699152542373e-06, |
| "loss": 0.0895, |
| "step": 1787 |
| }, |
| { |
| "epoch": 1.8928902422878326, |
| "grad_norm": 0.33273789286613464, |
| "learning_rate": 8.105932203389831e-06, |
| "loss": 0.0895, |
| "step": 1788 |
| }, |
| { |
| "epoch": 1.893949424069906, |
| "grad_norm": 0.6345044374465942, |
| "learning_rate": 8.104872881355933e-06, |
| "loss": 0.0883, |
| "step": 1789 |
| }, |
| { |
| "epoch": 1.8950086058519795, |
| "grad_norm": 0.6323248147964478, |
| "learning_rate": 8.103813559322034e-06, |
| "loss": 0.0886, |
| "step": 1790 |
| }, |
| { |
| "epoch": 1.8960677876340526, |
| "grad_norm": 1.5318913459777832, |
| "learning_rate": 8.102754237288136e-06, |
| "loss": 0.09, |
| "step": 1791 |
| }, |
| { |
| "epoch": 1.8971269694161261, |
| "grad_norm": 0.4862882196903229, |
| "learning_rate": 8.101694915254237e-06, |
| "loss": 0.089, |
| "step": 1792 |
| }, |
| { |
| "epoch": 1.8981861511981994, |
| "grad_norm": 0.26938316226005554, |
| "learning_rate": 8.10063559322034e-06, |
| "loss": 0.0864, |
| "step": 1793 |
| }, |
| { |
| "epoch": 1.8992453329802728, |
| "grad_norm": 0.3050606846809387, |
| "learning_rate": 8.099576271186442e-06, |
| "loss": 0.0882, |
| "step": 1794 |
| }, |
| { |
| "epoch": 1.900304514762346, |
| "grad_norm": 0.2837347090244293, |
| "learning_rate": 8.098516949152543e-06, |
| "loss": 0.0873, |
| "step": 1795 |
| }, |
| { |
| "epoch": 1.9013636965444194, |
| "grad_norm": 0.23105625808238983, |
| "learning_rate": 8.097457627118644e-06, |
| "loss": 0.0886, |
| "step": 1796 |
| }, |
| { |
| "epoch": 1.902422878326493, |
| "grad_norm": 0.3702641725540161, |
| "learning_rate": 8.096398305084746e-06, |
| "loss": 0.0863, |
| "step": 1797 |
| }, |
| { |
| "epoch": 1.903482060108566, |
| "grad_norm": 0.5003481507301331, |
| "learning_rate": 8.095338983050847e-06, |
| "loss": 0.0879, |
| "step": 1798 |
| }, |
| { |
| "epoch": 1.9045412418906396, |
| "grad_norm": 0.3741811215877533, |
| "learning_rate": 8.094279661016949e-06, |
| "loss": 0.0892, |
| "step": 1799 |
| }, |
| { |
| "epoch": 1.9056004236727129, |
| "grad_norm": 0.7591339945793152, |
| "learning_rate": 8.093220338983052e-06, |
| "loss": 0.0903, |
| "step": 1800 |
| }, |
| { |
| "epoch": 1.9066596054547862, |
| "grad_norm": 1.3595631122589111, |
| "learning_rate": 8.092161016949153e-06, |
| "loss": 0.0863, |
| "step": 1801 |
| }, |
| { |
| "epoch": 1.9077187872368595, |
| "grad_norm": 0.33199191093444824, |
| "learning_rate": 8.091101694915255e-06, |
| "loss": 0.0874, |
| "step": 1802 |
| }, |
| { |
| "epoch": 1.9087779690189328, |
| "grad_norm": 0.293661504983902, |
| "learning_rate": 8.090042372881358e-06, |
| "loss": 0.0902, |
| "step": 1803 |
| }, |
| { |
| "epoch": 1.9098371508010064, |
| "grad_norm": 0.29978156089782715, |
| "learning_rate": 8.088983050847459e-06, |
| "loss": 0.084, |
| "step": 1804 |
| }, |
| { |
| "epoch": 1.9108963325830794, |
| "grad_norm": 0.3597835600376129, |
| "learning_rate": 8.08792372881356e-06, |
| "loss": 0.0861, |
| "step": 1805 |
| }, |
| { |
| "epoch": 1.911955514365153, |
| "grad_norm": 0.5429406762123108, |
| "learning_rate": 8.086864406779662e-06, |
| "loss": 0.0918, |
| "step": 1806 |
| }, |
| { |
| "epoch": 1.9130146961472263, |
| "grad_norm": 0.5267550945281982, |
| "learning_rate": 8.085805084745763e-06, |
| "loss": 0.0878, |
| "step": 1807 |
| }, |
| { |
| "epoch": 1.9140738779292996, |
| "grad_norm": 0.4029728174209595, |
| "learning_rate": 8.084745762711865e-06, |
| "loss": 0.0904, |
| "step": 1808 |
| }, |
| { |
| "epoch": 1.915133059711373, |
| "grad_norm": 0.40597569942474365, |
| "learning_rate": 8.083686440677966e-06, |
| "loss": 0.0937, |
| "step": 1809 |
| }, |
| { |
| "epoch": 1.9161922414934462, |
| "grad_norm": 0.2881167531013489, |
| "learning_rate": 8.08262711864407e-06, |
| "loss": 0.092, |
| "step": 1810 |
| }, |
| { |
| "epoch": 1.9172514232755198, |
| "grad_norm": 0.9564570188522339, |
| "learning_rate": 8.08156779661017e-06, |
| "loss": 0.0843, |
| "step": 1811 |
| }, |
| { |
| "epoch": 1.9183106050575929, |
| "grad_norm": 0.24643473327159882, |
| "learning_rate": 8.080508474576272e-06, |
| "loss": 0.0879, |
| "step": 1812 |
| }, |
| { |
| "epoch": 1.9193697868396664, |
| "grad_norm": 0.7339272499084473, |
| "learning_rate": 8.079449152542374e-06, |
| "loss": 0.0911, |
| "step": 1813 |
| }, |
| { |
| "epoch": 1.9204289686217397, |
| "grad_norm": 0.3867321312427521, |
| "learning_rate": 8.078389830508475e-06, |
| "loss": 0.0921, |
| "step": 1814 |
| }, |
| { |
| "epoch": 1.921488150403813, |
| "grad_norm": 0.5025569796562195, |
| "learning_rate": 8.077330508474576e-06, |
| "loss": 0.0875, |
| "step": 1815 |
| }, |
| { |
| "epoch": 1.9225473321858864, |
| "grad_norm": 1.2964099645614624, |
| "learning_rate": 8.076271186440678e-06, |
| "loss": 0.0893, |
| "step": 1816 |
| }, |
| { |
| "epoch": 1.9236065139679597, |
| "grad_norm": 0.40289801359176636, |
| "learning_rate": 8.07521186440678e-06, |
| "loss": 0.0876, |
| "step": 1817 |
| }, |
| { |
| "epoch": 1.9246656957500332, |
| "grad_norm": 0.9181727766990662, |
| "learning_rate": 8.074152542372882e-06, |
| "loss": 0.0847, |
| "step": 1818 |
| }, |
| { |
| "epoch": 1.9257248775321063, |
| "grad_norm": 0.3142136335372925, |
| "learning_rate": 8.073093220338984e-06, |
| "loss": 0.0901, |
| "step": 1819 |
| }, |
| { |
| "epoch": 1.9267840593141798, |
| "grad_norm": 0.5350835919380188, |
| "learning_rate": 8.072033898305085e-06, |
| "loss": 0.0887, |
| "step": 1820 |
| }, |
| { |
| "epoch": 1.9278432410962532, |
| "grad_norm": 0.37053418159484863, |
| "learning_rate": 8.070974576271186e-06, |
| "loss": 0.0899, |
| "step": 1821 |
| }, |
| { |
| "epoch": 1.9289024228783265, |
| "grad_norm": 0.7820659279823303, |
| "learning_rate": 8.069915254237288e-06, |
| "loss": 0.0856, |
| "step": 1822 |
| }, |
| { |
| "epoch": 1.9299616046603998, |
| "grad_norm": 0.3069775700569153, |
| "learning_rate": 8.068855932203391e-06, |
| "loss": 0.0854, |
| "step": 1823 |
| }, |
| { |
| "epoch": 1.931020786442473, |
| "grad_norm": 0.4941248893737793, |
| "learning_rate": 8.067796610169492e-06, |
| "loss": 0.0908, |
| "step": 1824 |
| }, |
| { |
| "epoch": 1.9320799682245466, |
| "grad_norm": 0.5605942606925964, |
| "learning_rate": 8.066737288135594e-06, |
| "loss": 0.0885, |
| "step": 1825 |
| }, |
| { |
| "epoch": 1.9331391500066197, |
| "grad_norm": 0.8052049279212952, |
| "learning_rate": 8.065677966101695e-06, |
| "loss": 0.0889, |
| "step": 1826 |
| }, |
| { |
| "epoch": 1.9341983317886933, |
| "grad_norm": 0.2667480409145355, |
| "learning_rate": 8.064618644067798e-06, |
| "loss": 0.0888, |
| "step": 1827 |
| }, |
| { |
| "epoch": 1.9352575135707666, |
| "grad_norm": 0.29833167791366577, |
| "learning_rate": 8.0635593220339e-06, |
| "loss": 0.0876, |
| "step": 1828 |
| }, |
| { |
| "epoch": 1.93631669535284, |
| "grad_norm": 0.23164193332195282, |
| "learning_rate": 8.062500000000001e-06, |
| "loss": 0.0879, |
| "step": 1829 |
| }, |
| { |
| "epoch": 1.9373758771349134, |
| "grad_norm": 0.6006713509559631, |
| "learning_rate": 8.061440677966103e-06, |
| "loss": 0.0891, |
| "step": 1830 |
| }, |
| { |
| "epoch": 1.9384350589169865, |
| "grad_norm": 0.5523638725280762, |
| "learning_rate": 8.060381355932204e-06, |
| "loss": 0.088, |
| "step": 1831 |
| }, |
| { |
| "epoch": 1.93949424069906, |
| "grad_norm": 0.512315571308136, |
| "learning_rate": 8.059322033898305e-06, |
| "loss": 0.0909, |
| "step": 1832 |
| }, |
| { |
| "epoch": 1.9405534224811332, |
| "grad_norm": 0.28503870964050293, |
| "learning_rate": 8.058262711864407e-06, |
| "loss": 0.0872, |
| "step": 1833 |
| }, |
| { |
| "epoch": 1.9416126042632067, |
| "grad_norm": 0.26608702540397644, |
| "learning_rate": 8.05720338983051e-06, |
| "loss": 0.0908, |
| "step": 1834 |
| }, |
| { |
| "epoch": 1.94267178604528, |
| "grad_norm": 0.5513134002685547, |
| "learning_rate": 8.056144067796611e-06, |
| "loss": 0.0909, |
| "step": 1835 |
| }, |
| { |
| "epoch": 1.9437309678273533, |
| "grad_norm": 0.30664971470832825, |
| "learning_rate": 8.055084745762713e-06, |
| "loss": 0.0867, |
| "step": 1836 |
| }, |
| { |
| "epoch": 1.9447901496094269, |
| "grad_norm": 0.2737899720668793, |
| "learning_rate": 8.054025423728814e-06, |
| "loss": 0.0897, |
| "step": 1837 |
| }, |
| { |
| "epoch": 1.9458493313915, |
| "grad_norm": 0.8876556158065796, |
| "learning_rate": 8.052966101694916e-06, |
| "loss": 0.0839, |
| "step": 1838 |
| }, |
| { |
| "epoch": 1.9469085131735735, |
| "grad_norm": 0.5481213331222534, |
| "learning_rate": 8.051906779661017e-06, |
| "loss": 0.0871, |
| "step": 1839 |
| }, |
| { |
| "epoch": 1.9479676949556468, |
| "grad_norm": 0.36651283502578735, |
| "learning_rate": 8.050847457627118e-06, |
| "loss": 0.0887, |
| "step": 1840 |
| }, |
| { |
| "epoch": 1.9490268767377201, |
| "grad_norm": 0.6602075099945068, |
| "learning_rate": 8.04978813559322e-06, |
| "loss": 0.0874, |
| "step": 1841 |
| }, |
| { |
| "epoch": 1.9500860585197934, |
| "grad_norm": 0.5537017583847046, |
| "learning_rate": 8.048728813559323e-06, |
| "loss": 0.0864, |
| "step": 1842 |
| }, |
| { |
| "epoch": 1.9511452403018668, |
| "grad_norm": 0.8584939241409302, |
| "learning_rate": 8.047669491525424e-06, |
| "loss": 0.0877, |
| "step": 1843 |
| }, |
| { |
| "epoch": 1.9522044220839403, |
| "grad_norm": 0.3135605454444885, |
| "learning_rate": 8.046610169491527e-06, |
| "loss": 0.0885, |
| "step": 1844 |
| }, |
| { |
| "epoch": 1.9532636038660134, |
| "grad_norm": 0.26804184913635254, |
| "learning_rate": 8.045550847457629e-06, |
| "loss": 0.0853, |
| "step": 1845 |
| }, |
| { |
| "epoch": 1.954322785648087, |
| "grad_norm": 0.8978933095932007, |
| "learning_rate": 8.04449152542373e-06, |
| "loss": 0.0899, |
| "step": 1846 |
| }, |
| { |
| "epoch": 1.9553819674301602, |
| "grad_norm": 0.5151166319847107, |
| "learning_rate": 8.043432203389832e-06, |
| "loss": 0.088, |
| "step": 1847 |
| }, |
| { |
| "epoch": 1.9564411492122336, |
| "grad_norm": 0.8717884421348572, |
| "learning_rate": 8.042372881355933e-06, |
| "loss": 0.0888, |
| "step": 1848 |
| }, |
| { |
| "epoch": 1.9575003309943069, |
| "grad_norm": 0.37827199697494507, |
| "learning_rate": 8.041313559322034e-06, |
| "loss": 0.0846, |
| "step": 1849 |
| }, |
| { |
| "epoch": 1.9585595127763802, |
| "grad_norm": 0.3214173913002014, |
| "learning_rate": 8.040254237288136e-06, |
| "loss": 0.0926, |
| "step": 1850 |
| }, |
| { |
| "epoch": 1.9596186945584537, |
| "grad_norm": 0.41147252917289734, |
| "learning_rate": 8.039194915254237e-06, |
| "loss": 0.0886, |
| "step": 1851 |
| }, |
| { |
| "epoch": 1.9606778763405268, |
| "grad_norm": 0.24331054091453552, |
| "learning_rate": 8.03813559322034e-06, |
| "loss": 0.0836, |
| "step": 1852 |
| }, |
| { |
| "epoch": 1.9617370581226004, |
| "grad_norm": 0.2882451117038727, |
| "learning_rate": 8.037076271186442e-06, |
| "loss": 0.0842, |
| "step": 1853 |
| }, |
| { |
| "epoch": 1.9627962399046737, |
| "grad_norm": 0.41089218854904175, |
| "learning_rate": 8.036016949152543e-06, |
| "loss": 0.0854, |
| "step": 1854 |
| }, |
| { |
| "epoch": 1.963855421686747, |
| "grad_norm": 0.5555206537246704, |
| "learning_rate": 8.034957627118645e-06, |
| "loss": 0.0892, |
| "step": 1855 |
| }, |
| { |
| "epoch": 1.9649146034688203, |
| "grad_norm": 0.8138719201087952, |
| "learning_rate": 8.033898305084746e-06, |
| "loss": 0.0899, |
| "step": 1856 |
| }, |
| { |
| "epoch": 1.9659737852508936, |
| "grad_norm": 1.2507460117340088, |
| "learning_rate": 8.032838983050847e-06, |
| "loss": 0.089, |
| "step": 1857 |
| }, |
| { |
| "epoch": 1.9670329670329672, |
| "grad_norm": 0.4007565677165985, |
| "learning_rate": 8.031779661016949e-06, |
| "loss": 0.0923, |
| "step": 1858 |
| }, |
| { |
| "epoch": 1.9680921488150402, |
| "grad_norm": 0.31860870122909546, |
| "learning_rate": 8.030720338983052e-06, |
| "loss": 0.0891, |
| "step": 1859 |
| }, |
| { |
| "epoch": 1.9691513305971138, |
| "grad_norm": 0.8498591184616089, |
| "learning_rate": 8.029661016949153e-06, |
| "loss": 0.0909, |
| "step": 1860 |
| }, |
| { |
| "epoch": 1.970210512379187, |
| "grad_norm": 0.24927860498428345, |
| "learning_rate": 8.028601694915255e-06, |
| "loss": 0.0903, |
| "step": 1861 |
| }, |
| { |
| "epoch": 1.9712696941612604, |
| "grad_norm": 0.25672590732574463, |
| "learning_rate": 8.027542372881356e-06, |
| "loss": 0.0889, |
| "step": 1862 |
| }, |
| { |
| "epoch": 1.9723288759433337, |
| "grad_norm": 0.3672144412994385, |
| "learning_rate": 8.026483050847458e-06, |
| "loss": 0.0861, |
| "step": 1863 |
| }, |
| { |
| "epoch": 1.973388057725407, |
| "grad_norm": 0.9591440558433533, |
| "learning_rate": 8.025423728813559e-06, |
| "loss": 0.0873, |
| "step": 1864 |
| }, |
| { |
| "epoch": 1.9744472395074806, |
| "grad_norm": 0.2935021221637726, |
| "learning_rate": 8.024364406779662e-06, |
| "loss": 0.0866, |
| "step": 1865 |
| }, |
| { |
| "epoch": 1.9755064212895537, |
| "grad_norm": 0.37600111961364746, |
| "learning_rate": 8.023305084745764e-06, |
| "loss": 0.0873, |
| "step": 1866 |
| }, |
| { |
| "epoch": 1.9765656030716272, |
| "grad_norm": 0.4699721336364746, |
| "learning_rate": 8.022245762711865e-06, |
| "loss": 0.0924, |
| "step": 1867 |
| }, |
| { |
| "epoch": 1.9776247848537005, |
| "grad_norm": 0.37236911058425903, |
| "learning_rate": 8.021186440677966e-06, |
| "loss": 0.0894, |
| "step": 1868 |
| }, |
| { |
| "epoch": 1.9786839666357738, |
| "grad_norm": 0.2830633521080017, |
| "learning_rate": 8.02012711864407e-06, |
| "loss": 0.0892, |
| "step": 1869 |
| }, |
| { |
| "epoch": 1.9797431484178474, |
| "grad_norm": 0.6510921716690063, |
| "learning_rate": 8.019067796610171e-06, |
| "loss": 0.0898, |
| "step": 1870 |
| }, |
| { |
| "epoch": 1.9808023301999205, |
| "grad_norm": 1.2568674087524414, |
| "learning_rate": 8.018008474576272e-06, |
| "loss": 0.089, |
| "step": 1871 |
| }, |
| { |
| "epoch": 1.981861511981994, |
| "grad_norm": 0.8623310923576355, |
| "learning_rate": 8.016949152542374e-06, |
| "loss": 0.0863, |
| "step": 1872 |
| }, |
| { |
| "epoch": 1.982920693764067, |
| "grad_norm": 0.3959273099899292, |
| "learning_rate": 8.015889830508475e-06, |
| "loss": 0.0881, |
| "step": 1873 |
| }, |
| { |
| "epoch": 1.9839798755461406, |
| "grad_norm": 0.43136197328567505, |
| "learning_rate": 8.014830508474576e-06, |
| "loss": 0.0936, |
| "step": 1874 |
| }, |
| { |
| "epoch": 1.985039057328214, |
| "grad_norm": 0.26500093936920166, |
| "learning_rate": 8.013771186440678e-06, |
| "loss": 0.0891, |
| "step": 1875 |
| }, |
| { |
| "epoch": 1.9860982391102873, |
| "grad_norm": 1.2360635995864868, |
| "learning_rate": 8.012711864406781e-06, |
| "loss": 0.0906, |
| "step": 1876 |
| }, |
| { |
| "epoch": 1.9871574208923608, |
| "grad_norm": 0.2535933554172516, |
| "learning_rate": 8.011652542372882e-06, |
| "loss": 0.0881, |
| "step": 1877 |
| }, |
| { |
| "epoch": 1.988216602674434, |
| "grad_norm": 0.252047061920166, |
| "learning_rate": 8.010593220338984e-06, |
| "loss": 0.0828, |
| "step": 1878 |
| }, |
| { |
| "epoch": 1.9892757844565074, |
| "grad_norm": 0.8809779286384583, |
| "learning_rate": 8.009533898305085e-06, |
| "loss": 0.0903, |
| "step": 1879 |
| }, |
| { |
| "epoch": 1.9903349662385807, |
| "grad_norm": 0.5204029083251953, |
| "learning_rate": 8.008474576271187e-06, |
| "loss": 0.0872, |
| "step": 1880 |
| }, |
| { |
| "epoch": 1.991394148020654, |
| "grad_norm": 0.24637530744075775, |
| "learning_rate": 8.007415254237288e-06, |
| "loss": 0.0877, |
| "step": 1881 |
| }, |
| { |
| "epoch": 1.9924533298027274, |
| "grad_norm": 0.4711182117462158, |
| "learning_rate": 8.00635593220339e-06, |
| "loss": 0.0886, |
| "step": 1882 |
| }, |
| { |
| "epoch": 1.9935125115848007, |
| "grad_norm": 0.3140796422958374, |
| "learning_rate": 8.005296610169493e-06, |
| "loss": 0.0904, |
| "step": 1883 |
| }, |
| { |
| "epoch": 1.9945716933668742, |
| "grad_norm": 0.4233724772930145, |
| "learning_rate": 8.004237288135594e-06, |
| "loss": 0.0848, |
| "step": 1884 |
| }, |
| { |
| "epoch": 1.9956308751489473, |
| "grad_norm": 1.4377028942108154, |
| "learning_rate": 8.003177966101695e-06, |
| "loss": 0.0899, |
| "step": 1885 |
| }, |
| { |
| "epoch": 1.9966900569310209, |
| "grad_norm": 0.9122888445854187, |
| "learning_rate": 8.002118644067799e-06, |
| "loss": 0.0902, |
| "step": 1886 |
| }, |
| { |
| "epoch": 1.9977492387130942, |
| "grad_norm": 0.27381008863449097, |
| "learning_rate": 8.0010593220339e-06, |
| "loss": 0.0922, |
| "step": 1887 |
| }, |
| { |
| "epoch": 1.9988084204951675, |
| "grad_norm": 0.4732683300971985, |
| "learning_rate": 8.000000000000001e-06, |
| "loss": 0.0864, |
| "step": 1888 |
| }, |
| { |
| "epoch": 1.9988084204951675, |
| "eval_accuracy": 0.9814, |
| "eval_best_f1_from_thresholding": 0.15454545454545454, |
| "eval_loss": 0.13597536087036133, |
| "eval_matthews_corrcoef": 0.14717610184892235, |
| "eval_model_preparation_time": 0.0033, |
| "eval_negative_class_f1": 0.9905965621840243, |
| "eval_negative_class_precision": 0.9923022384280361, |
| "eval_negative_class_recall": 0.9888967396790148, |
| "eval_positive_class_f1": 0.15454545454545454, |
| "eval_positive_class_precision": 0.13385826771653545, |
| "eval_positive_class_recall": 0.1827956989247312, |
| "eval_roc_auc": 0.8065064237190821, |
| "eval_runtime": 20.7354, |
| "eval_samples_per_second": 482.267, |
| "eval_steps_per_second": 7.572, |
| "step": 1888 |
| }, |
| { |
| "epoch": 1.9998676022772408, |
| "grad_norm": 1.7929794788360596, |
| "learning_rate": 7.998940677966103e-06, |
| "loss": 0.0934, |
| "step": 1889 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.11227487027645111, |
| "learning_rate": 7.997881355932204e-06, |
| "loss": 0.0118, |
| "step": 1890 |
| }, |
| { |
| "epoch": 2.0010591817820735, |
| "grad_norm": 0.2703258693218231, |
| "learning_rate": 7.996822033898306e-06, |
| "loss": 0.0879, |
| "step": 1891 |
| }, |
| { |
| "epoch": 2.0021183635641466, |
| "grad_norm": 0.26941847801208496, |
| "learning_rate": 7.995762711864407e-06, |
| "loss": 0.0886, |
| "step": 1892 |
| }, |
| { |
| "epoch": 2.00317754534622, |
| "grad_norm": 0.29611021280288696, |
| "learning_rate": 7.99470338983051e-06, |
| "loss": 0.0879, |
| "step": 1893 |
| }, |
| { |
| "epoch": 2.0042367271282933, |
| "grad_norm": 0.3016314208507538, |
| "learning_rate": 7.993644067796611e-06, |
| "loss": 0.087, |
| "step": 1894 |
| }, |
| { |
| "epoch": 2.005295908910367, |
| "grad_norm": 0.27448010444641113, |
| "learning_rate": 7.992584745762713e-06, |
| "loss": 0.0823, |
| "step": 1895 |
| }, |
| { |
| "epoch": 2.00635509069244, |
| "grad_norm": 0.6257076263427734, |
| "learning_rate": 7.991525423728814e-06, |
| "loss": 0.0911, |
| "step": 1896 |
| }, |
| { |
| "epoch": 2.0074142724745134, |
| "grad_norm": 0.3046128451824188, |
| "learning_rate": 7.990466101694916e-06, |
| "loss": 0.0869, |
| "step": 1897 |
| }, |
| { |
| "epoch": 2.008473454256587, |
| "grad_norm": 0.3856375813484192, |
| "learning_rate": 7.989406779661017e-06, |
| "loss": 0.0901, |
| "step": 1898 |
| }, |
| { |
| "epoch": 2.00953263603866, |
| "grad_norm": 0.3367128074169159, |
| "learning_rate": 7.988347457627119e-06, |
| "loss": 0.0882, |
| "step": 1899 |
| }, |
| { |
| "epoch": 2.0105918178207336, |
| "grad_norm": 0.2662423551082611, |
| "learning_rate": 7.987288135593222e-06, |
| "loss": 0.0863, |
| "step": 1900 |
| }, |
| { |
| "epoch": 2.0116509996028067, |
| "grad_norm": 0.4654860496520996, |
| "learning_rate": 7.986228813559323e-06, |
| "loss": 0.0919, |
| "step": 1901 |
| }, |
| { |
| "epoch": 2.0127101813848802, |
| "grad_norm": 0.9134454727172852, |
| "learning_rate": 7.985169491525424e-06, |
| "loss": 0.0941, |
| "step": 1902 |
| }, |
| { |
| "epoch": 2.0137693631669533, |
| "grad_norm": 0.2735442817211151, |
| "learning_rate": 7.984110169491526e-06, |
| "loss": 0.0894, |
| "step": 1903 |
| }, |
| { |
| "epoch": 2.014828544949027, |
| "grad_norm": 0.31145164370536804, |
| "learning_rate": 7.983050847457627e-06, |
| "loss": 0.0863, |
| "step": 1904 |
| }, |
| { |
| "epoch": 2.0158877267311004, |
| "grad_norm": 0.2529715597629547, |
| "learning_rate": 7.981991525423729e-06, |
| "loss": 0.0873, |
| "step": 1905 |
| }, |
| { |
| "epoch": 2.0169469085131735, |
| "grad_norm": 0.25238585472106934, |
| "learning_rate": 7.98093220338983e-06, |
| "loss": 0.0882, |
| "step": 1906 |
| }, |
| { |
| "epoch": 2.018006090295247, |
| "grad_norm": 0.26991987228393555, |
| "learning_rate": 7.979872881355933e-06, |
| "loss": 0.0905, |
| "step": 1907 |
| }, |
| { |
| "epoch": 2.01906527207732, |
| "grad_norm": 0.6508270502090454, |
| "learning_rate": 7.978813559322035e-06, |
| "loss": 0.0919, |
| "step": 1908 |
| }, |
| { |
| "epoch": 2.0201244538593937, |
| "grad_norm": 0.7955009341239929, |
| "learning_rate": 7.977754237288136e-06, |
| "loss": 0.0934, |
| "step": 1909 |
| }, |
| { |
| "epoch": 2.0211836356414667, |
| "grad_norm": 0.7356244325637817, |
| "learning_rate": 7.976694915254239e-06, |
| "loss": 0.0895, |
| "step": 1910 |
| }, |
| { |
| "epoch": 2.0222428174235403, |
| "grad_norm": 0.556024968624115, |
| "learning_rate": 7.97563559322034e-06, |
| "loss": 0.0915, |
| "step": 1911 |
| }, |
| { |
| "epoch": 2.023301999205614, |
| "grad_norm": 0.35753247141838074, |
| "learning_rate": 7.974576271186442e-06, |
| "loss": 0.09, |
| "step": 1912 |
| }, |
| { |
| "epoch": 2.024361180987687, |
| "grad_norm": 0.27930715680122375, |
| "learning_rate": 7.973516949152543e-06, |
| "loss": 0.0913, |
| "step": 1913 |
| }, |
| { |
| "epoch": 2.0254203627697605, |
| "grad_norm": 0.7513614296913147, |
| "learning_rate": 7.972457627118645e-06, |
| "loss": 0.0894, |
| "step": 1914 |
| }, |
| { |
| "epoch": 2.0264795445518335, |
| "grad_norm": 0.3672800064086914, |
| "learning_rate": 7.971398305084746e-06, |
| "loss": 0.086, |
| "step": 1915 |
| }, |
| { |
| "epoch": 2.027538726333907, |
| "grad_norm": 0.8558787703514099, |
| "learning_rate": 7.970338983050848e-06, |
| "loss": 0.0907, |
| "step": 1916 |
| }, |
| { |
| "epoch": 2.0285979081159806, |
| "grad_norm": 0.8640574216842651, |
| "learning_rate": 7.969279661016949e-06, |
| "loss": 0.0906, |
| "step": 1917 |
| }, |
| { |
| "epoch": 2.0296570898980537, |
| "grad_norm": 0.3139023184776306, |
| "learning_rate": 7.968220338983052e-06, |
| "loss": 0.0882, |
| "step": 1918 |
| }, |
| { |
| "epoch": 2.0307162716801272, |
| "grad_norm": 0.30195289850234985, |
| "learning_rate": 7.967161016949154e-06, |
| "loss": 0.0854, |
| "step": 1919 |
| }, |
| { |
| "epoch": 2.0317754534622003, |
| "grad_norm": 0.2863333225250244, |
| "learning_rate": 7.966101694915255e-06, |
| "loss": 0.0896, |
| "step": 1920 |
| }, |
| { |
| "epoch": 2.032834635244274, |
| "grad_norm": 0.42541301250457764, |
| "learning_rate": 7.965042372881356e-06, |
| "loss": 0.0837, |
| "step": 1921 |
| }, |
| { |
| "epoch": 2.033893817026347, |
| "grad_norm": 0.2463466227054596, |
| "learning_rate": 7.963983050847458e-06, |
| "loss": 0.0869, |
| "step": 1922 |
| }, |
| { |
| "epoch": 2.0349529988084205, |
| "grad_norm": 0.256345272064209, |
| "learning_rate": 7.96292372881356e-06, |
| "loss": 0.0875, |
| "step": 1923 |
| }, |
| { |
| "epoch": 2.036012180590494, |
| "grad_norm": 0.3295067250728607, |
| "learning_rate": 7.96186440677966e-06, |
| "loss": 0.0877, |
| "step": 1924 |
| }, |
| { |
| "epoch": 2.037071362372567, |
| "grad_norm": 0.6613496541976929, |
| "learning_rate": 7.960805084745764e-06, |
| "loss": 0.0884, |
| "step": 1925 |
| }, |
| { |
| "epoch": 2.0381305441546407, |
| "grad_norm": 0.3493386209011078, |
| "learning_rate": 7.959745762711865e-06, |
| "loss": 0.0905, |
| "step": 1926 |
| }, |
| { |
| "epoch": 2.0391897259367138, |
| "grad_norm": 0.7373542189598083, |
| "learning_rate": 7.958686440677967e-06, |
| "loss": 0.0854, |
| "step": 1927 |
| }, |
| { |
| "epoch": 2.0402489077187873, |
| "grad_norm": 0.28745999932289124, |
| "learning_rate": 7.957627118644068e-06, |
| "loss": 0.0895, |
| "step": 1928 |
| }, |
| { |
| "epoch": 2.0413080895008604, |
| "grad_norm": 0.6971897482872009, |
| "learning_rate": 7.956567796610171e-06, |
| "loss": 0.0895, |
| "step": 1929 |
| }, |
| { |
| "epoch": 2.042367271282934, |
| "grad_norm": 0.8651426434516907, |
| "learning_rate": 7.955508474576272e-06, |
| "loss": 0.0871, |
| "step": 1930 |
| }, |
| { |
| "epoch": 2.0434264530650075, |
| "grad_norm": 0.2681843936443329, |
| "learning_rate": 7.954449152542374e-06, |
| "loss": 0.0891, |
| "step": 1931 |
| }, |
| { |
| "epoch": 2.0444856348470806, |
| "grad_norm": 0.27684885263442993, |
| "learning_rate": 7.953389830508475e-06, |
| "loss": 0.0859, |
| "step": 1932 |
| }, |
| { |
| "epoch": 2.045544816629154, |
| "grad_norm": 0.2875300347805023, |
| "learning_rate": 7.952330508474577e-06, |
| "loss": 0.0904, |
| "step": 1933 |
| }, |
| { |
| "epoch": 2.046603998411227, |
| "grad_norm": 0.5383615493774414, |
| "learning_rate": 7.951271186440678e-06, |
| "loss": 0.0873, |
| "step": 1934 |
| }, |
| { |
| "epoch": 2.0476631801933007, |
| "grad_norm": 0.2454068958759308, |
| "learning_rate": 7.950211864406781e-06, |
| "loss": 0.0883, |
| "step": 1935 |
| }, |
| { |
| "epoch": 2.048722361975374, |
| "grad_norm": 0.29819706082344055, |
| "learning_rate": 7.949152542372883e-06, |
| "loss": 0.088, |
| "step": 1936 |
| }, |
| { |
| "epoch": 2.0497815437574474, |
| "grad_norm": 0.3372708261013031, |
| "learning_rate": 7.948093220338984e-06, |
| "loss": 0.0889, |
| "step": 1937 |
| }, |
| { |
| "epoch": 2.050840725539521, |
| "grad_norm": 0.39405205845832825, |
| "learning_rate": 7.947033898305085e-06, |
| "loss": 0.0906, |
| "step": 1938 |
| }, |
| { |
| "epoch": 2.051899907321594, |
| "grad_norm": 0.3588440418243408, |
| "learning_rate": 7.945974576271187e-06, |
| "loss": 0.0864, |
| "step": 1939 |
| }, |
| { |
| "epoch": 2.0529590891036675, |
| "grad_norm": 0.3285020589828491, |
| "learning_rate": 7.944915254237288e-06, |
| "loss": 0.0889, |
| "step": 1940 |
| }, |
| { |
| "epoch": 2.0540182708857406, |
| "grad_norm": 1.0305137634277344, |
| "learning_rate": 7.94385593220339e-06, |
| "loss": 0.0886, |
| "step": 1941 |
| }, |
| { |
| "epoch": 2.055077452667814, |
| "grad_norm": 0.6089313626289368, |
| "learning_rate": 7.942796610169493e-06, |
| "loss": 0.0904, |
| "step": 1942 |
| }, |
| { |
| "epoch": 2.0561366344498873, |
| "grad_norm": 0.23426276445388794, |
| "learning_rate": 7.941737288135594e-06, |
| "loss": 0.0853, |
| "step": 1943 |
| }, |
| { |
| "epoch": 2.057195816231961, |
| "grad_norm": 0.28794458508491516, |
| "learning_rate": 7.940677966101696e-06, |
| "loss": 0.0847, |
| "step": 1944 |
| }, |
| { |
| "epoch": 2.0582549980140343, |
| "grad_norm": 0.435823529958725, |
| "learning_rate": 7.939618644067797e-06, |
| "loss": 0.0905, |
| "step": 1945 |
| }, |
| { |
| "epoch": 2.0593141797961074, |
| "grad_norm": 0.5630863904953003, |
| "learning_rate": 7.938559322033898e-06, |
| "loss": 0.0896, |
| "step": 1946 |
| }, |
| { |
| "epoch": 2.060373361578181, |
| "grad_norm": 0.24650096893310547, |
| "learning_rate": 7.9375e-06, |
| "loss": 0.0888, |
| "step": 1947 |
| }, |
| { |
| "epoch": 2.061432543360254, |
| "grad_norm": 0.2687147259712219, |
| "learning_rate": 7.936440677966101e-06, |
| "loss": 0.0888, |
| "step": 1948 |
| }, |
| { |
| "epoch": 2.0624917251423276, |
| "grad_norm": 0.36030933260917664, |
| "learning_rate": 7.935381355932204e-06, |
| "loss": 0.0871, |
| "step": 1949 |
| }, |
| { |
| "epoch": 2.0635509069244007, |
| "grad_norm": 0.5701349973678589, |
| "learning_rate": 7.934322033898306e-06, |
| "loss": 0.09, |
| "step": 1950 |
| }, |
| { |
| "epoch": 2.0646100887064742, |
| "grad_norm": 0.8344652652740479, |
| "learning_rate": 7.933262711864407e-06, |
| "loss": 0.0883, |
| "step": 1951 |
| }, |
| { |
| "epoch": 2.0656692704885478, |
| "grad_norm": 0.4540218114852905, |
| "learning_rate": 7.93220338983051e-06, |
| "loss": 0.0856, |
| "step": 1952 |
| }, |
| { |
| "epoch": 2.066728452270621, |
| "grad_norm": 0.20542527735233307, |
| "learning_rate": 7.931144067796612e-06, |
| "loss": 0.0852, |
| "step": 1953 |
| }, |
| { |
| "epoch": 2.0677876340526944, |
| "grad_norm": 0.6437293887138367, |
| "learning_rate": 7.930084745762713e-06, |
| "loss": 0.0927, |
| "step": 1954 |
| }, |
| { |
| "epoch": 2.0688468158347675, |
| "grad_norm": 0.33412033319473267, |
| "learning_rate": 7.929025423728814e-06, |
| "loss": 0.0867, |
| "step": 1955 |
| }, |
| { |
| "epoch": 2.069905997616841, |
| "grad_norm": 0.4800800681114197, |
| "learning_rate": 7.927966101694916e-06, |
| "loss": 0.0899, |
| "step": 1956 |
| }, |
| { |
| "epoch": 2.0709651793989146, |
| "grad_norm": 0.6857233047485352, |
| "learning_rate": 7.926906779661017e-06, |
| "loss": 0.0891, |
| "step": 1957 |
| }, |
| { |
| "epoch": 2.0720243611809877, |
| "grad_norm": 0.7694417834281921, |
| "learning_rate": 7.925847457627119e-06, |
| "loss": 0.0855, |
| "step": 1958 |
| }, |
| { |
| "epoch": 2.073083542963061, |
| "grad_norm": 0.3634096086025238, |
| "learning_rate": 7.924788135593222e-06, |
| "loss": 0.0877, |
| "step": 1959 |
| }, |
| { |
| "epoch": 2.0741427247451343, |
| "grad_norm": 0.38182634115219116, |
| "learning_rate": 7.923728813559323e-06, |
| "loss": 0.089, |
| "step": 1960 |
| }, |
| { |
| "epoch": 2.075201906527208, |
| "grad_norm": 0.3368748724460602, |
| "learning_rate": 7.922669491525425e-06, |
| "loss": 0.0886, |
| "step": 1961 |
| }, |
| { |
| "epoch": 2.076261088309281, |
| "grad_norm": 0.5356863737106323, |
| "learning_rate": 7.921610169491526e-06, |
| "loss": 0.0882, |
| "step": 1962 |
| }, |
| { |
| "epoch": 2.0773202700913544, |
| "grad_norm": 0.5707106590270996, |
| "learning_rate": 7.920550847457627e-06, |
| "loss": 0.0881, |
| "step": 1963 |
| }, |
| { |
| "epoch": 2.078379451873428, |
| "grad_norm": 0.32848355174064636, |
| "learning_rate": 7.919491525423729e-06, |
| "loss": 0.0867, |
| "step": 1964 |
| }, |
| { |
| "epoch": 2.079438633655501, |
| "grad_norm": 0.24699488282203674, |
| "learning_rate": 7.91843220338983e-06, |
| "loss": 0.0917, |
| "step": 1965 |
| }, |
| { |
| "epoch": 2.0804978154375746, |
| "grad_norm": 0.422572523355484, |
| "learning_rate": 7.917372881355932e-06, |
| "loss": 0.089, |
| "step": 1966 |
| }, |
| { |
| "epoch": 2.0815569972196477, |
| "grad_norm": 0.4969422221183777, |
| "learning_rate": 7.916313559322035e-06, |
| "loss": 0.0908, |
| "step": 1967 |
| }, |
| { |
| "epoch": 2.0826161790017212, |
| "grad_norm": 0.25381964445114136, |
| "learning_rate": 7.915254237288136e-06, |
| "loss": 0.0852, |
| "step": 1968 |
| }, |
| { |
| "epoch": 2.0836753607837943, |
| "grad_norm": 0.35738474130630493, |
| "learning_rate": 7.914194915254238e-06, |
| "loss": 0.089, |
| "step": 1969 |
| }, |
| { |
| "epoch": 2.084734542565868, |
| "grad_norm": 0.7216671109199524, |
| "learning_rate": 7.913135593220339e-06, |
| "loss": 0.0896, |
| "step": 1970 |
| }, |
| { |
| "epoch": 2.0857937243479414, |
| "grad_norm": 0.20853260159492493, |
| "learning_rate": 7.912076271186442e-06, |
| "loss": 0.0896, |
| "step": 1971 |
| }, |
| { |
| "epoch": 2.0868529061300145, |
| "grad_norm": 0.2022741138935089, |
| "learning_rate": 7.911016949152544e-06, |
| "loss": 0.0847, |
| "step": 1972 |
| }, |
| { |
| "epoch": 2.087912087912088, |
| "grad_norm": 0.857926607131958, |
| "learning_rate": 7.909957627118645e-06, |
| "loss": 0.084, |
| "step": 1973 |
| }, |
| { |
| "epoch": 2.088971269694161, |
| "grad_norm": 0.9053319096565247, |
| "learning_rate": 7.908898305084746e-06, |
| "loss": 0.0871, |
| "step": 1974 |
| }, |
| { |
| "epoch": 2.0900304514762347, |
| "grad_norm": 0.21877585351467133, |
| "learning_rate": 7.907838983050848e-06, |
| "loss": 0.084, |
| "step": 1975 |
| }, |
| { |
| "epoch": 2.0910896332583078, |
| "grad_norm": 0.831366777420044, |
| "learning_rate": 7.906779661016951e-06, |
| "loss": 0.0879, |
| "step": 1976 |
| }, |
| { |
| "epoch": 2.0921488150403813, |
| "grad_norm": 0.595077633857727, |
| "learning_rate": 7.905720338983052e-06, |
| "loss": 0.0916, |
| "step": 1977 |
| }, |
| { |
| "epoch": 2.093207996822455, |
| "grad_norm": 0.31096351146698, |
| "learning_rate": 7.904661016949154e-06, |
| "loss": 0.089, |
| "step": 1978 |
| }, |
| { |
| "epoch": 2.094267178604528, |
| "grad_norm": 0.30939215421676636, |
| "learning_rate": 7.903601694915255e-06, |
| "loss": 0.0923, |
| "step": 1979 |
| }, |
| { |
| "epoch": 2.0953263603866015, |
| "grad_norm": 0.5221177935600281, |
| "learning_rate": 7.902542372881357e-06, |
| "loss": 0.0857, |
| "step": 1980 |
| }, |
| { |
| "epoch": 2.0963855421686746, |
| "grad_norm": 0.5102083683013916, |
| "learning_rate": 7.901483050847458e-06, |
| "loss": 0.0922, |
| "step": 1981 |
| }, |
| { |
| "epoch": 2.097444723950748, |
| "grad_norm": 0.2757764160633087, |
| "learning_rate": 7.90042372881356e-06, |
| "loss": 0.0915, |
| "step": 1982 |
| }, |
| { |
| "epoch": 2.098503905732821, |
| "grad_norm": 0.39589911699295044, |
| "learning_rate": 7.89936440677966e-06, |
| "loss": 0.0868, |
| "step": 1983 |
| }, |
| { |
| "epoch": 2.0995630875148947, |
| "grad_norm": 0.3275192677974701, |
| "learning_rate": 7.898305084745764e-06, |
| "loss": 0.0885, |
| "step": 1984 |
| }, |
| { |
| "epoch": 2.1006222692969683, |
| "grad_norm": 1.0052108764648438, |
| "learning_rate": 7.897245762711865e-06, |
| "loss": 0.0904, |
| "step": 1985 |
| }, |
| { |
| "epoch": 2.1016814510790414, |
| "grad_norm": 0.4538261890411377, |
| "learning_rate": 7.896186440677967e-06, |
| "loss": 0.0882, |
| "step": 1986 |
| }, |
| { |
| "epoch": 2.102740632861115, |
| "grad_norm": 0.4319491982460022, |
| "learning_rate": 7.895127118644068e-06, |
| "loss": 0.0855, |
| "step": 1987 |
| }, |
| { |
| "epoch": 2.103799814643188, |
| "grad_norm": 0.2973875105381012, |
| "learning_rate": 7.89406779661017e-06, |
| "loss": 0.089, |
| "step": 1988 |
| }, |
| { |
| "epoch": 2.1048589964252615, |
| "grad_norm": 0.2854253649711609, |
| "learning_rate": 7.893008474576271e-06, |
| "loss": 0.0928, |
| "step": 1989 |
| }, |
| { |
| "epoch": 2.1059181782073346, |
| "grad_norm": 0.36707934737205505, |
| "learning_rate": 7.891949152542372e-06, |
| "loss": 0.09, |
| "step": 1990 |
| }, |
| { |
| "epoch": 2.106977359989408, |
| "grad_norm": 0.3122307360172272, |
| "learning_rate": 7.890889830508475e-06, |
| "loss": 0.0882, |
| "step": 1991 |
| }, |
| { |
| "epoch": 2.1080365417714817, |
| "grad_norm": 0.6345651149749756, |
| "learning_rate": 7.889830508474577e-06, |
| "loss": 0.0884, |
| "step": 1992 |
| }, |
| { |
| "epoch": 2.109095723553555, |
| "grad_norm": 0.660128653049469, |
| "learning_rate": 7.888771186440678e-06, |
| "loss": 0.0886, |
| "step": 1993 |
| }, |
| { |
| "epoch": 2.1101549053356283, |
| "grad_norm": 1.1793729066848755, |
| "learning_rate": 7.887711864406781e-06, |
| "loss": 0.088, |
| "step": 1994 |
| }, |
| { |
| "epoch": 2.1112140871177014, |
| "grad_norm": 0.2564319968223572, |
| "learning_rate": 7.886652542372883e-06, |
| "loss": 0.0889, |
| "step": 1995 |
| }, |
| { |
| "epoch": 2.112273268899775, |
| "grad_norm": 0.3774588108062744, |
| "learning_rate": 7.885593220338984e-06, |
| "loss": 0.0902, |
| "step": 1996 |
| }, |
| { |
| "epoch": 2.113332450681848, |
| "grad_norm": 0.26271554827690125, |
| "learning_rate": 7.884533898305086e-06, |
| "loss": 0.088, |
| "step": 1997 |
| }, |
| { |
| "epoch": 2.1143916324639216, |
| "grad_norm": 0.2500177025794983, |
| "learning_rate": 7.883474576271187e-06, |
| "loss": 0.0878, |
| "step": 1998 |
| }, |
| { |
| "epoch": 2.115450814245995, |
| "grad_norm": 0.4386039972305298, |
| "learning_rate": 7.882415254237288e-06, |
| "loss": 0.0849, |
| "step": 1999 |
| }, |
| { |
| "epoch": 2.116509996028068, |
| "grad_norm": 0.3402063846588135, |
| "learning_rate": 7.88135593220339e-06, |
| "loss": 0.0896, |
| "step": 2000 |
| }, |
| { |
| "epoch": 2.1175691778101418, |
| "grad_norm": 0.5039743185043335, |
| "learning_rate": 7.880296610169493e-06, |
| "loss": 0.0895, |
| "step": 2001 |
| }, |
| { |
| "epoch": 2.118628359592215, |
| "grad_norm": 0.8807342052459717, |
| "learning_rate": 7.879237288135594e-06, |
| "loss": 0.0863, |
| "step": 2002 |
| }, |
| { |
| "epoch": 2.1196875413742884, |
| "grad_norm": 0.3534659445285797, |
| "learning_rate": 7.878177966101696e-06, |
| "loss": 0.0894, |
| "step": 2003 |
| }, |
| { |
| "epoch": 2.120746723156362, |
| "grad_norm": 0.23499086499214172, |
| "learning_rate": 7.877118644067797e-06, |
| "loss": 0.0865, |
| "step": 2004 |
| }, |
| { |
| "epoch": 2.121805904938435, |
| "grad_norm": 0.23860704898834229, |
| "learning_rate": 7.876059322033899e-06, |
| "loss": 0.0842, |
| "step": 2005 |
| }, |
| { |
| "epoch": 2.1228650867205086, |
| "grad_norm": 0.25318294763565063, |
| "learning_rate": 7.875e-06, |
| "loss": 0.0847, |
| "step": 2006 |
| }, |
| { |
| "epoch": 2.1239242685025816, |
| "grad_norm": 0.22246617078781128, |
| "learning_rate": 7.873940677966101e-06, |
| "loss": 0.0886, |
| "step": 2007 |
| }, |
| { |
| "epoch": 2.124983450284655, |
| "grad_norm": 0.5180307030677795, |
| "learning_rate": 7.872881355932205e-06, |
| "loss": 0.0897, |
| "step": 2008 |
| }, |
| { |
| "epoch": 2.1260426320667283, |
| "grad_norm": 0.3671906292438507, |
| "learning_rate": 7.871822033898306e-06, |
| "loss": 0.0899, |
| "step": 2009 |
| }, |
| { |
| "epoch": 2.127101813848802, |
| "grad_norm": 0.31801825761795044, |
| "learning_rate": 7.870762711864407e-06, |
| "loss": 0.0876, |
| "step": 2010 |
| }, |
| { |
| "epoch": 2.1281609956308754, |
| "grad_norm": 0.3843144178390503, |
| "learning_rate": 7.869703389830509e-06, |
| "loss": 0.0898, |
| "step": 2011 |
| }, |
| { |
| "epoch": 2.1292201774129484, |
| "grad_norm": 0.3329516053199768, |
| "learning_rate": 7.86864406779661e-06, |
| "loss": 0.0888, |
| "step": 2012 |
| }, |
| { |
| "epoch": 2.130279359195022, |
| "grad_norm": 0.31800010800361633, |
| "learning_rate": 7.867584745762713e-06, |
| "loss": 0.0845, |
| "step": 2013 |
| }, |
| { |
| "epoch": 2.131338540977095, |
| "grad_norm": 0.43881750106811523, |
| "learning_rate": 7.866525423728815e-06, |
| "loss": 0.0837, |
| "step": 2014 |
| }, |
| { |
| "epoch": 2.1323977227591686, |
| "grad_norm": 0.7285757064819336, |
| "learning_rate": 7.865466101694916e-06, |
| "loss": 0.0882, |
| "step": 2015 |
| }, |
| { |
| "epoch": 2.1334569045412417, |
| "grad_norm": 0.304055392742157, |
| "learning_rate": 7.864406779661017e-06, |
| "loss": 0.0871, |
| "step": 2016 |
| }, |
| { |
| "epoch": 2.1345160863233152, |
| "grad_norm": 0.45109742879867554, |
| "learning_rate": 7.863347457627119e-06, |
| "loss": 0.0891, |
| "step": 2017 |
| }, |
| { |
| "epoch": 2.135575268105389, |
| "grad_norm": 0.9246112704277039, |
| "learning_rate": 7.862288135593222e-06, |
| "loss": 0.0898, |
| "step": 2018 |
| }, |
| { |
| "epoch": 2.136634449887462, |
| "grad_norm": 0.37992846965789795, |
| "learning_rate": 7.861228813559323e-06, |
| "loss": 0.0882, |
| "step": 2019 |
| }, |
| { |
| "epoch": 2.1376936316695354, |
| "grad_norm": 0.22593331336975098, |
| "learning_rate": 7.860169491525425e-06, |
| "loss": 0.0843, |
| "step": 2020 |
| }, |
| { |
| "epoch": 2.1387528134516085, |
| "grad_norm": 0.569980800151825, |
| "learning_rate": 7.859110169491526e-06, |
| "loss": 0.0885, |
| "step": 2021 |
| }, |
| { |
| "epoch": 2.139811995233682, |
| "grad_norm": 0.24157559871673584, |
| "learning_rate": 7.858050847457628e-06, |
| "loss": 0.0844, |
| "step": 2022 |
| }, |
| { |
| "epoch": 2.140871177015755, |
| "grad_norm": 0.26660817861557007, |
| "learning_rate": 7.856991525423729e-06, |
| "loss": 0.0893, |
| "step": 2023 |
| }, |
| { |
| "epoch": 2.1419303587978287, |
| "grad_norm": 0.3377159535884857, |
| "learning_rate": 7.85593220338983e-06, |
| "loss": 0.086, |
| "step": 2024 |
| }, |
| { |
| "epoch": 2.142989540579902, |
| "grad_norm": 0.46858081221580505, |
| "learning_rate": 7.854872881355934e-06, |
| "loss": 0.0884, |
| "step": 2025 |
| }, |
| { |
| "epoch": 2.1440487223619753, |
| "grad_norm": 0.4171918034553528, |
| "learning_rate": 7.853813559322035e-06, |
| "loss": 0.0893, |
| "step": 2026 |
| }, |
| { |
| "epoch": 2.145107904144049, |
| "grad_norm": 0.9166372418403625, |
| "learning_rate": 7.852754237288136e-06, |
| "loss": 0.0879, |
| "step": 2027 |
| }, |
| { |
| "epoch": 2.146167085926122, |
| "grad_norm": 0.2570573389530182, |
| "learning_rate": 7.851694915254238e-06, |
| "loss": 0.0845, |
| "step": 2028 |
| }, |
| { |
| "epoch": 2.1472262677081955, |
| "grad_norm": 0.437513142824173, |
| "learning_rate": 7.85063559322034e-06, |
| "loss": 0.0885, |
| "step": 2029 |
| }, |
| { |
| "epoch": 2.1482854494902686, |
| "grad_norm": 0.1894887387752533, |
| "learning_rate": 7.84957627118644e-06, |
| "loss": 0.0873, |
| "step": 2030 |
| }, |
| { |
| "epoch": 2.149344631272342, |
| "grad_norm": 0.20361056923866272, |
| "learning_rate": 7.848516949152542e-06, |
| "loss": 0.0857, |
| "step": 2031 |
| }, |
| { |
| "epoch": 2.1504038130544156, |
| "grad_norm": 0.2676304876804352, |
| "learning_rate": 7.847457627118643e-06, |
| "loss": 0.0868, |
| "step": 2032 |
| }, |
| { |
| "epoch": 2.1514629948364887, |
| "grad_norm": 0.2386782020330429, |
| "learning_rate": 7.846398305084747e-06, |
| "loss": 0.0854, |
| "step": 2033 |
| }, |
| { |
| "epoch": 2.1525221766185623, |
| "grad_norm": 0.35165345668792725, |
| "learning_rate": 7.845338983050848e-06, |
| "loss": 0.0868, |
| "step": 2034 |
| }, |
| { |
| "epoch": 2.1535813584006354, |
| "grad_norm": 0.3289099335670471, |
| "learning_rate": 7.844279661016951e-06, |
| "loss": 0.0873, |
| "step": 2035 |
| }, |
| { |
| "epoch": 2.154640540182709, |
| "grad_norm": 1.0715082883834839, |
| "learning_rate": 7.843220338983052e-06, |
| "loss": 0.0857, |
| "step": 2036 |
| }, |
| { |
| "epoch": 2.1556997219647824, |
| "grad_norm": 0.19499650597572327, |
| "learning_rate": 7.842161016949154e-06, |
| "loss": 0.0897, |
| "step": 2037 |
| }, |
| { |
| "epoch": 2.1567589037468555, |
| "grad_norm": 0.2263382226228714, |
| "learning_rate": 7.841101694915255e-06, |
| "loss": 0.0903, |
| "step": 2038 |
| }, |
| { |
| "epoch": 2.157818085528929, |
| "grad_norm": 0.2175901234149933, |
| "learning_rate": 7.840042372881357e-06, |
| "loss": 0.087, |
| "step": 2039 |
| }, |
| { |
| "epoch": 2.158877267311002, |
| "grad_norm": 0.6341164112091064, |
| "learning_rate": 7.838983050847458e-06, |
| "loss": 0.0896, |
| "step": 2040 |
| }, |
| { |
| "epoch": 2.1599364490930757, |
| "grad_norm": 0.3150041997432709, |
| "learning_rate": 7.83792372881356e-06, |
| "loss": 0.0841, |
| "step": 2041 |
| }, |
| { |
| "epoch": 2.160995630875149, |
| "grad_norm": 0.2422228902578354, |
| "learning_rate": 7.836864406779661e-06, |
| "loss": 0.0829, |
| "step": 2042 |
| }, |
| { |
| "epoch": 2.1620548126572223, |
| "grad_norm": 0.20333003997802734, |
| "learning_rate": 7.835805084745764e-06, |
| "loss": 0.0868, |
| "step": 2043 |
| }, |
| { |
| "epoch": 2.1631139944392954, |
| "grad_norm": 0.2158234864473343, |
| "learning_rate": 7.834745762711865e-06, |
| "loss": 0.0826, |
| "step": 2044 |
| }, |
| { |
| "epoch": 2.164173176221369, |
| "grad_norm": 0.3346843421459198, |
| "learning_rate": 7.833686440677967e-06, |
| "loss": 0.0887, |
| "step": 2045 |
| }, |
| { |
| "epoch": 2.1652323580034425, |
| "grad_norm": 0.2267426997423172, |
| "learning_rate": 7.832627118644068e-06, |
| "loss": 0.085, |
| "step": 2046 |
| }, |
| { |
| "epoch": 2.1662915397855156, |
| "grad_norm": 0.3703182339668274, |
| "learning_rate": 7.83156779661017e-06, |
| "loss": 0.0865, |
| "step": 2047 |
| }, |
| { |
| "epoch": 2.167350721567589, |
| "grad_norm": 0.5824213027954102, |
| "learning_rate": 7.830508474576271e-06, |
| "loss": 0.0885, |
| "step": 2048 |
| }, |
| { |
| "epoch": 2.168409903349662, |
| "grad_norm": 0.7234041690826416, |
| "learning_rate": 7.829449152542373e-06, |
| "loss": 0.0859, |
| "step": 2049 |
| }, |
| { |
| "epoch": 2.1694690851317358, |
| "grad_norm": 0.4590138792991638, |
| "learning_rate": 7.828389830508476e-06, |
| "loss": 0.0875, |
| "step": 2050 |
| }, |
| { |
| "epoch": 2.1705282669138093, |
| "grad_norm": 0.22016145288944244, |
| "learning_rate": 7.827330508474577e-06, |
| "loss": 0.0859, |
| "step": 2051 |
| }, |
| { |
| "epoch": 2.1715874486958824, |
| "grad_norm": 0.19176195561885834, |
| "learning_rate": 7.826271186440678e-06, |
| "loss": 0.0851, |
| "step": 2052 |
| }, |
| { |
| "epoch": 2.172646630477956, |
| "grad_norm": 0.2171255648136139, |
| "learning_rate": 7.82521186440678e-06, |
| "loss": 0.0856, |
| "step": 2053 |
| }, |
| { |
| "epoch": 2.173705812260029, |
| "grad_norm": 0.21707554161548615, |
| "learning_rate": 7.824152542372881e-06, |
| "loss": 0.0855, |
| "step": 2054 |
| }, |
| { |
| "epoch": 2.1747649940421026, |
| "grad_norm": 0.1986992061138153, |
| "learning_rate": 7.823093220338984e-06, |
| "loss": 0.0878, |
| "step": 2055 |
| }, |
| { |
| "epoch": 2.1758241758241756, |
| "grad_norm": 0.52188640832901, |
| "learning_rate": 7.822033898305086e-06, |
| "loss": 0.0882, |
| "step": 2056 |
| }, |
| { |
| "epoch": 2.176883357606249, |
| "grad_norm": 0.39829766750335693, |
| "learning_rate": 7.820974576271187e-06, |
| "loss": 0.0883, |
| "step": 2057 |
| }, |
| { |
| "epoch": 2.1779425393883227, |
| "grad_norm": 0.6201052069664001, |
| "learning_rate": 7.819915254237289e-06, |
| "loss": 0.0898, |
| "step": 2058 |
| }, |
| { |
| "epoch": 2.179001721170396, |
| "grad_norm": 0.237601175904274, |
| "learning_rate": 7.81885593220339e-06, |
| "loss": 0.0888, |
| "step": 2059 |
| }, |
| { |
| "epoch": 2.1800609029524693, |
| "grad_norm": 0.22507217526435852, |
| "learning_rate": 7.817796610169493e-06, |
| "loss": 0.085, |
| "step": 2060 |
| }, |
| { |
| "epoch": 2.1811200847345424, |
| "grad_norm": 0.51198810338974, |
| "learning_rate": 7.816737288135595e-06, |
| "loss": 0.0908, |
| "step": 2061 |
| }, |
| { |
| "epoch": 2.182179266516616, |
| "grad_norm": 0.24880242347717285, |
| "learning_rate": 7.815677966101696e-06, |
| "loss": 0.088, |
| "step": 2062 |
| }, |
| { |
| "epoch": 2.183238448298689, |
| "grad_norm": 0.6522811651229858, |
| "learning_rate": 7.814618644067797e-06, |
| "loss": 0.0905, |
| "step": 2063 |
| }, |
| { |
| "epoch": 2.1842976300807626, |
| "grad_norm": 0.259032666683197, |
| "learning_rate": 7.813559322033899e-06, |
| "loss": 0.09, |
| "step": 2064 |
| }, |
| { |
| "epoch": 2.185356811862836, |
| "grad_norm": 0.5633755326271057, |
| "learning_rate": 7.8125e-06, |
| "loss": 0.0882, |
| "step": 2065 |
| }, |
| { |
| "epoch": 2.1864159936449092, |
| "grad_norm": 0.2784501016139984, |
| "learning_rate": 7.811440677966102e-06, |
| "loss": 0.0841, |
| "step": 2066 |
| }, |
| { |
| "epoch": 2.1874751754269828, |
| "grad_norm": 0.2742295265197754, |
| "learning_rate": 7.810381355932205e-06, |
| "loss": 0.0851, |
| "step": 2067 |
| }, |
| { |
| "epoch": 2.188534357209056, |
| "grad_norm": 0.18370847404003143, |
| "learning_rate": 7.809322033898306e-06, |
| "loss": 0.0871, |
| "step": 2068 |
| }, |
| { |
| "epoch": 2.1895935389911294, |
| "grad_norm": 0.29191604256629944, |
| "learning_rate": 7.808262711864407e-06, |
| "loss": 0.0864, |
| "step": 2069 |
| }, |
| { |
| "epoch": 2.1906527207732025, |
| "grad_norm": 0.2865682542324066, |
| "learning_rate": 7.807203389830509e-06, |
| "loss": 0.0862, |
| "step": 2070 |
| }, |
| { |
| "epoch": 2.191711902555276, |
| "grad_norm": 0.2612156867980957, |
| "learning_rate": 7.80614406779661e-06, |
| "loss": 0.0887, |
| "step": 2071 |
| }, |
| { |
| "epoch": 2.1927710843373496, |
| "grad_norm": 0.8060826063156128, |
| "learning_rate": 7.805084745762712e-06, |
| "loss": 0.0861, |
| "step": 2072 |
| }, |
| { |
| "epoch": 2.1938302661194227, |
| "grad_norm": 0.2059231549501419, |
| "learning_rate": 7.804025423728813e-06, |
| "loss": 0.0878, |
| "step": 2073 |
| }, |
| { |
| "epoch": 2.194889447901496, |
| "grad_norm": 0.21191243827342987, |
| "learning_rate": 7.802966101694916e-06, |
| "loss": 0.0864, |
| "step": 2074 |
| }, |
| { |
| "epoch": 2.1959486296835693, |
| "grad_norm": 0.2840358316898346, |
| "learning_rate": 7.801906779661018e-06, |
| "loss": 0.0872, |
| "step": 2075 |
| }, |
| { |
| "epoch": 2.197007811465643, |
| "grad_norm": 0.25956588983535767, |
| "learning_rate": 7.800847457627119e-06, |
| "loss": 0.0863, |
| "step": 2076 |
| }, |
| { |
| "epoch": 2.198066993247716, |
| "grad_norm": 0.197649285197258, |
| "learning_rate": 7.799788135593222e-06, |
| "loss": 0.085, |
| "step": 2077 |
| }, |
| { |
| "epoch": 2.1991261750297895, |
| "grad_norm": 0.2957169711589813, |
| "learning_rate": 7.798728813559324e-06, |
| "loss": 0.0859, |
| "step": 2078 |
| }, |
| { |
| "epoch": 2.200185356811863, |
| "grad_norm": 0.2329864650964737, |
| "learning_rate": 7.797669491525425e-06, |
| "loss": 0.0861, |
| "step": 2079 |
| }, |
| { |
| "epoch": 2.201244538593936, |
| "grad_norm": 0.19923458993434906, |
| "learning_rate": 7.796610169491526e-06, |
| "loss": 0.0848, |
| "step": 2080 |
| }, |
| { |
| "epoch": 2.2023037203760096, |
| "grad_norm": 0.6433698534965515, |
| "learning_rate": 7.795550847457628e-06, |
| "loss": 0.0862, |
| "step": 2081 |
| }, |
| { |
| "epoch": 2.2033629021580827, |
| "grad_norm": 0.507807731628418, |
| "learning_rate": 7.79449152542373e-06, |
| "loss": 0.088, |
| "step": 2082 |
| }, |
| { |
| "epoch": 2.2044220839401563, |
| "grad_norm": 0.2583516538143158, |
| "learning_rate": 7.79343220338983e-06, |
| "loss": 0.085, |
| "step": 2083 |
| }, |
| { |
| "epoch": 2.20548126572223, |
| "grad_norm": 1.012139081954956, |
| "learning_rate": 7.792372881355934e-06, |
| "loss": 0.0847, |
| "step": 2084 |
| }, |
| { |
| "epoch": 2.206540447504303, |
| "grad_norm": 0.6931725144386292, |
| "learning_rate": 7.791313559322035e-06, |
| "loss": 0.0884, |
| "step": 2085 |
| }, |
| { |
| "epoch": 2.2075996292863764, |
| "grad_norm": 0.22996462881565094, |
| "learning_rate": 7.790254237288137e-06, |
| "loss": 0.0852, |
| "step": 2086 |
| }, |
| { |
| "epoch": 2.2086588110684495, |
| "grad_norm": 0.2905077338218689, |
| "learning_rate": 7.789194915254238e-06, |
| "loss": 0.0845, |
| "step": 2087 |
| }, |
| { |
| "epoch": 2.209717992850523, |
| "grad_norm": 0.22558225691318512, |
| "learning_rate": 7.78813559322034e-06, |
| "loss": 0.0896, |
| "step": 2088 |
| }, |
| { |
| "epoch": 2.210777174632596, |
| "grad_norm": 0.291515052318573, |
| "learning_rate": 7.78707627118644e-06, |
| "loss": 0.0854, |
| "step": 2089 |
| }, |
| { |
| "epoch": 2.2118363564146697, |
| "grad_norm": 0.20197857916355133, |
| "learning_rate": 7.786016949152542e-06, |
| "loss": 0.0885, |
| "step": 2090 |
| }, |
| { |
| "epoch": 2.212895538196743, |
| "grad_norm": 0.6940252780914307, |
| "learning_rate": 7.784957627118644e-06, |
| "loss": 0.0895, |
| "step": 2091 |
| }, |
| { |
| "epoch": 2.2139547199788163, |
| "grad_norm": 0.2460675984621048, |
| "learning_rate": 7.783898305084747e-06, |
| "loss": 0.0857, |
| "step": 2092 |
| }, |
| { |
| "epoch": 2.21501390176089, |
| "grad_norm": 0.2917913794517517, |
| "learning_rate": 7.782838983050848e-06, |
| "loss": 0.0818, |
| "step": 2093 |
| }, |
| { |
| "epoch": 2.216073083542963, |
| "grad_norm": 0.21613304316997528, |
| "learning_rate": 7.78177966101695e-06, |
| "loss": 0.0866, |
| "step": 2094 |
| }, |
| { |
| "epoch": 2.2171322653250365, |
| "grad_norm": 0.25949186086654663, |
| "learning_rate": 7.780720338983051e-06, |
| "loss": 0.0865, |
| "step": 2095 |
| }, |
| { |
| "epoch": 2.2181914471071096, |
| "grad_norm": 0.3771454393863678, |
| "learning_rate": 7.779661016949152e-06, |
| "loss": 0.0918, |
| "step": 2096 |
| }, |
| { |
| "epoch": 2.219250628889183, |
| "grad_norm": 0.2963436245918274, |
| "learning_rate": 7.778601694915255e-06, |
| "loss": 0.0833, |
| "step": 2097 |
| }, |
| { |
| "epoch": 2.2203098106712567, |
| "grad_norm": 0.27688950300216675, |
| "learning_rate": 7.777542372881357e-06, |
| "loss": 0.0852, |
| "step": 2098 |
| }, |
| { |
| "epoch": 2.2213689924533297, |
| "grad_norm": 0.3003609776496887, |
| "learning_rate": 7.776483050847458e-06, |
| "loss": 0.0876, |
| "step": 2099 |
| }, |
| { |
| "epoch": 2.2224281742354033, |
| "grad_norm": 0.368274450302124, |
| "learning_rate": 7.77542372881356e-06, |
| "loss": 0.0872, |
| "step": 2100 |
| }, |
| { |
| "epoch": 2.2234873560174764, |
| "grad_norm": 0.7609259486198425, |
| "learning_rate": 7.774364406779663e-06, |
| "loss": 0.085, |
| "step": 2101 |
| }, |
| { |
| "epoch": 2.22454653779955, |
| "grad_norm": 0.23514187335968018, |
| "learning_rate": 7.773305084745764e-06, |
| "loss": 0.087, |
| "step": 2102 |
| }, |
| { |
| "epoch": 2.225605719581623, |
| "grad_norm": 1.0117790699005127, |
| "learning_rate": 7.772245762711866e-06, |
| "loss": 0.0891, |
| "step": 2103 |
| }, |
| { |
| "epoch": 2.2266649013636965, |
| "grad_norm": 0.2696327567100525, |
| "learning_rate": 7.771186440677967e-06, |
| "loss": 0.0859, |
| "step": 2104 |
| }, |
| { |
| "epoch": 2.22772408314577, |
| "grad_norm": 0.24088266491889954, |
| "learning_rate": 7.770127118644068e-06, |
| "loss": 0.0863, |
| "step": 2105 |
| }, |
| { |
| "epoch": 2.228783264927843, |
| "grad_norm": 0.3361911475658417, |
| "learning_rate": 7.76906779661017e-06, |
| "loss": 0.0882, |
| "step": 2106 |
| }, |
| { |
| "epoch": 2.2298424467099167, |
| "grad_norm": 0.28318047523498535, |
| "learning_rate": 7.768008474576271e-06, |
| "loss": 0.084, |
| "step": 2107 |
| }, |
| { |
| "epoch": 2.23090162849199, |
| "grad_norm": 0.27443477511405945, |
| "learning_rate": 7.766949152542373e-06, |
| "loss": 0.0858, |
| "step": 2108 |
| }, |
| { |
| "epoch": 2.2319608102740633, |
| "grad_norm": 0.382974773645401, |
| "learning_rate": 7.765889830508476e-06, |
| "loss": 0.0851, |
| "step": 2109 |
| }, |
| { |
| "epoch": 2.2330199920561364, |
| "grad_norm": 0.5474820137023926, |
| "learning_rate": 7.764830508474577e-06, |
| "loss": 0.087, |
| "step": 2110 |
| }, |
| { |
| "epoch": 2.23407917383821, |
| "grad_norm": 0.6709517240524292, |
| "learning_rate": 7.763771186440679e-06, |
| "loss": 0.0864, |
| "step": 2111 |
| }, |
| { |
| "epoch": 2.2351383556202835, |
| "grad_norm": 0.4770525097846985, |
| "learning_rate": 7.76271186440678e-06, |
| "loss": 0.0867, |
| "step": 2112 |
| }, |
| { |
| "epoch": 2.2361975374023566, |
| "grad_norm": 1.351270318031311, |
| "learning_rate": 7.761652542372881e-06, |
| "loss": 0.0854, |
| "step": 2113 |
| }, |
| { |
| "epoch": 2.23725671918443, |
| "grad_norm": 0.26051637530326843, |
| "learning_rate": 7.760593220338983e-06, |
| "loss": 0.0875, |
| "step": 2114 |
| }, |
| { |
| "epoch": 2.2383159009665032, |
| "grad_norm": 0.4052681028842926, |
| "learning_rate": 7.759533898305084e-06, |
| "loss": 0.0872, |
| "step": 2115 |
| }, |
| { |
| "epoch": 2.2393750827485768, |
| "grad_norm": 0.23296624422073364, |
| "learning_rate": 7.758474576271187e-06, |
| "loss": 0.0848, |
| "step": 2116 |
| }, |
| { |
| "epoch": 2.24043426453065, |
| "grad_norm": 0.31752628087997437, |
| "learning_rate": 7.757415254237289e-06, |
| "loss": 0.0896, |
| "step": 2117 |
| }, |
| { |
| "epoch": 2.2414934463127234, |
| "grad_norm": 0.24866719543933868, |
| "learning_rate": 7.75635593220339e-06, |
| "loss": 0.0873, |
| "step": 2118 |
| }, |
| { |
| "epoch": 2.242552628094797, |
| "grad_norm": 0.17599520087242126, |
| "learning_rate": 7.755296610169493e-06, |
| "loss": 0.0885, |
| "step": 2119 |
| }, |
| { |
| "epoch": 2.24361180987687, |
| "grad_norm": 1.016655445098877, |
| "learning_rate": 7.754237288135595e-06, |
| "loss": 0.0859, |
| "step": 2120 |
| }, |
| { |
| "epoch": 2.2446709916589436, |
| "grad_norm": 0.2759738266468048, |
| "learning_rate": 7.753177966101696e-06, |
| "loss": 0.0897, |
| "step": 2121 |
| }, |
| { |
| "epoch": 2.2457301734410167, |
| "grad_norm": 0.2551572918891907, |
| "learning_rate": 7.752118644067798e-06, |
| "loss": 0.0829, |
| "step": 2122 |
| }, |
| { |
| "epoch": 2.24678935522309, |
| "grad_norm": 0.7468996047973633, |
| "learning_rate": 7.751059322033899e-06, |
| "loss": 0.0834, |
| "step": 2123 |
| }, |
| { |
| "epoch": 2.2478485370051633, |
| "grad_norm": 0.46414947509765625, |
| "learning_rate": 7.75e-06, |
| "loss": 0.0875, |
| "step": 2124 |
| }, |
| { |
| "epoch": 2.248907718787237, |
| "grad_norm": 0.6280553340911865, |
| "learning_rate": 7.748940677966102e-06, |
| "loss": 0.0859, |
| "step": 2125 |
| }, |
| { |
| "epoch": 2.2499669005693104, |
| "grad_norm": 0.22918131947517395, |
| "learning_rate": 7.747881355932205e-06, |
| "loss": 0.0905, |
| "step": 2126 |
| }, |
| { |
| "epoch": 2.2510260823513835, |
| "grad_norm": 0.5105732679367065, |
| "learning_rate": 7.746822033898306e-06, |
| "loss": 0.0827, |
| "step": 2127 |
| }, |
| { |
| "epoch": 2.252085264133457, |
| "grad_norm": 0.44134438037872314, |
| "learning_rate": 7.745762711864408e-06, |
| "loss": 0.0833, |
| "step": 2128 |
| }, |
| { |
| "epoch": 2.25314444591553, |
| "grad_norm": 0.6763642430305481, |
| "learning_rate": 7.744703389830509e-06, |
| "loss": 0.0835, |
| "step": 2129 |
| }, |
| { |
| "epoch": 2.2542036276976036, |
| "grad_norm": 0.35161060094833374, |
| "learning_rate": 7.74364406779661e-06, |
| "loss": 0.0884, |
| "step": 2130 |
| }, |
| { |
| "epoch": 2.255262809479677, |
| "grad_norm": 0.2552676200866699, |
| "learning_rate": 7.742584745762712e-06, |
| "loss": 0.0894, |
| "step": 2131 |
| }, |
| { |
| "epoch": 2.2563219912617503, |
| "grad_norm": 0.30027422308921814, |
| "learning_rate": 7.741525423728813e-06, |
| "loss": 0.0877, |
| "step": 2132 |
| }, |
| { |
| "epoch": 2.257381173043824, |
| "grad_norm": 0.28334829211235046, |
| "learning_rate": 7.740466101694916e-06, |
| "loss": 0.0889, |
| "step": 2133 |
| }, |
| { |
| "epoch": 2.258440354825897, |
| "grad_norm": 0.342746764421463, |
| "learning_rate": 7.739406779661018e-06, |
| "loss": 0.0843, |
| "step": 2134 |
| }, |
| { |
| "epoch": 2.2594995366079704, |
| "grad_norm": 0.5293981432914734, |
| "learning_rate": 7.73834745762712e-06, |
| "loss": 0.0862, |
| "step": 2135 |
| }, |
| { |
| "epoch": 2.2605587183900435, |
| "grad_norm": 0.3510816693305969, |
| "learning_rate": 7.73728813559322e-06, |
| "loss": 0.0866, |
| "step": 2136 |
| }, |
| { |
| "epoch": 2.261617900172117, |
| "grad_norm": 0.16086283326148987, |
| "learning_rate": 7.736228813559322e-06, |
| "loss": 0.0875, |
| "step": 2137 |
| }, |
| { |
| "epoch": 2.26267708195419, |
| "grad_norm": 0.19258259236812592, |
| "learning_rate": 7.735169491525423e-06, |
| "loss": 0.0825, |
| "step": 2138 |
| }, |
| { |
| "epoch": 2.2637362637362637, |
| "grad_norm": 0.2791202664375305, |
| "learning_rate": 7.734110169491527e-06, |
| "loss": 0.0833, |
| "step": 2139 |
| }, |
| { |
| "epoch": 2.2647954455183372, |
| "grad_norm": 1.3526684045791626, |
| "learning_rate": 7.733050847457628e-06, |
| "loss": 0.0876, |
| "step": 2140 |
| }, |
| { |
| "epoch": 2.2658546273004103, |
| "grad_norm": 0.3024195432662964, |
| "learning_rate": 7.73199152542373e-06, |
| "loss": 0.0854, |
| "step": 2141 |
| }, |
| { |
| "epoch": 2.266913809082484, |
| "grad_norm": 0.8857383728027344, |
| "learning_rate": 7.73093220338983e-06, |
| "loss": 0.0857, |
| "step": 2142 |
| }, |
| { |
| "epoch": 2.267972990864557, |
| "grad_norm": 0.25719326734542847, |
| "learning_rate": 7.729872881355934e-06, |
| "loss": 0.0896, |
| "step": 2143 |
| }, |
| { |
| "epoch": 2.2690321726466305, |
| "grad_norm": 0.21778137981891632, |
| "learning_rate": 7.728813559322035e-06, |
| "loss": 0.0869, |
| "step": 2144 |
| }, |
| { |
| "epoch": 2.270091354428704, |
| "grad_norm": 0.7866823077201843, |
| "learning_rate": 7.727754237288137e-06, |
| "loss": 0.0861, |
| "step": 2145 |
| }, |
| { |
| "epoch": 2.271150536210777, |
| "grad_norm": 0.27548232674598694, |
| "learning_rate": 7.726694915254238e-06, |
| "loss": 0.0857, |
| "step": 2146 |
| }, |
| { |
| "epoch": 2.2722097179928507, |
| "grad_norm": 0.26298612356185913, |
| "learning_rate": 7.72563559322034e-06, |
| "loss": 0.0865, |
| "step": 2147 |
| }, |
| { |
| "epoch": 2.2732688997749237, |
| "grad_norm": 0.24503065645694733, |
| "learning_rate": 7.724576271186441e-06, |
| "loss": 0.0867, |
| "step": 2148 |
| }, |
| { |
| "epoch": 2.2743280815569973, |
| "grad_norm": 0.2263982743024826, |
| "learning_rate": 7.723516949152542e-06, |
| "loss": 0.087, |
| "step": 2149 |
| }, |
| { |
| "epoch": 2.2753872633390704, |
| "grad_norm": 0.23325785994529724, |
| "learning_rate": 7.722457627118645e-06, |
| "loss": 0.0841, |
| "step": 2150 |
| }, |
| { |
| "epoch": 2.276446445121144, |
| "grad_norm": 0.6658560037612915, |
| "learning_rate": 7.721398305084747e-06, |
| "loss": 0.0929, |
| "step": 2151 |
| }, |
| { |
| "epoch": 2.2775056269032175, |
| "grad_norm": 1.4839017391204834, |
| "learning_rate": 7.720338983050848e-06, |
| "loss": 0.0915, |
| "step": 2152 |
| }, |
| { |
| "epoch": 2.2785648086852905, |
| "grad_norm": 0.28016865253448486, |
| "learning_rate": 7.71927966101695e-06, |
| "loss": 0.0825, |
| "step": 2153 |
| }, |
| { |
| "epoch": 2.279623990467364, |
| "grad_norm": 0.7911934852600098, |
| "learning_rate": 7.718220338983051e-06, |
| "loss": 0.0868, |
| "step": 2154 |
| }, |
| { |
| "epoch": 2.280683172249437, |
| "grad_norm": 0.5834576487541199, |
| "learning_rate": 7.717161016949153e-06, |
| "loss": 0.0884, |
| "step": 2155 |
| }, |
| { |
| "epoch": 2.2817423540315107, |
| "grad_norm": 0.3871059715747833, |
| "learning_rate": 7.716101694915254e-06, |
| "loss": 0.0871, |
| "step": 2156 |
| }, |
| { |
| "epoch": 2.282801535813584, |
| "grad_norm": 0.2939569652080536, |
| "learning_rate": 7.715042372881355e-06, |
| "loss": 0.0912, |
| "step": 2157 |
| }, |
| { |
| "epoch": 2.2838607175956573, |
| "grad_norm": 0.27009499073028564, |
| "learning_rate": 7.713983050847458e-06, |
| "loss": 0.0867, |
| "step": 2158 |
| }, |
| { |
| "epoch": 2.284919899377731, |
| "grad_norm": 0.4996664226055145, |
| "learning_rate": 7.71292372881356e-06, |
| "loss": 0.0844, |
| "step": 2159 |
| }, |
| { |
| "epoch": 2.285979081159804, |
| "grad_norm": 0.347398042678833, |
| "learning_rate": 7.711864406779663e-06, |
| "loss": 0.0826, |
| "step": 2160 |
| }, |
| { |
| "epoch": 2.2870382629418775, |
| "grad_norm": 0.7798426151275635, |
| "learning_rate": 7.710805084745764e-06, |
| "loss": 0.0864, |
| "step": 2161 |
| }, |
| { |
| "epoch": 2.2880974447239506, |
| "grad_norm": 0.5361624956130981, |
| "learning_rate": 7.709745762711866e-06, |
| "loss": 0.0836, |
| "step": 2162 |
| }, |
| { |
| "epoch": 2.289156626506024, |
| "grad_norm": 0.18656525015830994, |
| "learning_rate": 7.708686440677967e-06, |
| "loss": 0.0829, |
| "step": 2163 |
| }, |
| { |
| "epoch": 2.2902158082880977, |
| "grad_norm": 1.1615647077560425, |
| "learning_rate": 7.707627118644069e-06, |
| "loss": 0.0873, |
| "step": 2164 |
| }, |
| { |
| "epoch": 2.2912749900701708, |
| "grad_norm": 0.1994059979915619, |
| "learning_rate": 7.70656779661017e-06, |
| "loss": 0.0861, |
| "step": 2165 |
| }, |
| { |
| "epoch": 2.2923341718522443, |
| "grad_norm": 0.9500737190246582, |
| "learning_rate": 7.705508474576271e-06, |
| "loss": 0.0888, |
| "step": 2166 |
| }, |
| { |
| "epoch": 2.2933933536343174, |
| "grad_norm": 0.6159976124763489, |
| "learning_rate": 7.704449152542373e-06, |
| "loss": 0.0876, |
| "step": 2167 |
| }, |
| { |
| "epoch": 2.294452535416391, |
| "grad_norm": 0.23081451654434204, |
| "learning_rate": 7.703389830508476e-06, |
| "loss": 0.0866, |
| "step": 2168 |
| }, |
| { |
| "epoch": 2.295511717198464, |
| "grad_norm": 0.3230683505535126, |
| "learning_rate": 7.702330508474577e-06, |
| "loss": 0.0872, |
| "step": 2169 |
| }, |
| { |
| "epoch": 2.2965708989805376, |
| "grad_norm": 0.2588438391685486, |
| "learning_rate": 7.701271186440679e-06, |
| "loss": 0.0863, |
| "step": 2170 |
| }, |
| { |
| "epoch": 2.2976300807626107, |
| "grad_norm": 0.3278768062591553, |
| "learning_rate": 7.70021186440678e-06, |
| "loss": 0.0913, |
| "step": 2171 |
| }, |
| { |
| "epoch": 2.298689262544684, |
| "grad_norm": 0.7292188405990601, |
| "learning_rate": 7.699152542372882e-06, |
| "loss": 0.0895, |
| "step": 2172 |
| }, |
| { |
| "epoch": 2.2997484443267577, |
| "grad_norm": 0.7343543767929077, |
| "learning_rate": 7.698093220338983e-06, |
| "loss": 0.0872, |
| "step": 2173 |
| }, |
| { |
| "epoch": 2.300807626108831, |
| "grad_norm": 0.5069963932037354, |
| "learning_rate": 7.697033898305084e-06, |
| "loss": 0.0866, |
| "step": 2174 |
| }, |
| { |
| "epoch": 2.3018668078909044, |
| "grad_norm": 0.2616865932941437, |
| "learning_rate": 7.695974576271188e-06, |
| "loss": 0.0909, |
| "step": 2175 |
| }, |
| { |
| "epoch": 2.3029259896729775, |
| "grad_norm": 0.40990307927131653, |
| "learning_rate": 7.694915254237289e-06, |
| "loss": 0.0875, |
| "step": 2176 |
| }, |
| { |
| "epoch": 2.303985171455051, |
| "grad_norm": 0.3008265197277069, |
| "learning_rate": 7.69385593220339e-06, |
| "loss": 0.0894, |
| "step": 2177 |
| }, |
| { |
| "epoch": 2.3050443532371245, |
| "grad_norm": 0.5138524174690247, |
| "learning_rate": 7.692796610169492e-06, |
| "loss": 0.0873, |
| "step": 2178 |
| }, |
| { |
| "epoch": 2.3061035350191976, |
| "grad_norm": 1.9138803482055664, |
| "learning_rate": 7.691737288135593e-06, |
| "loss": 0.0898, |
| "step": 2179 |
| }, |
| { |
| "epoch": 2.307162716801271, |
| "grad_norm": 0.2680315375328064, |
| "learning_rate": 7.690677966101695e-06, |
| "loss": 0.0843, |
| "step": 2180 |
| }, |
| { |
| "epoch": 2.3082218985833443, |
| "grad_norm": 0.7578197121620178, |
| "learning_rate": 7.689618644067798e-06, |
| "loss": 0.0935, |
| "step": 2181 |
| }, |
| { |
| "epoch": 2.309281080365418, |
| "grad_norm": 0.49096474051475525, |
| "learning_rate": 7.688559322033899e-06, |
| "loss": 0.0893, |
| "step": 2182 |
| }, |
| { |
| "epoch": 2.310340262147491, |
| "grad_norm": 1.2972736358642578, |
| "learning_rate": 7.6875e-06, |
| "loss": 0.0869, |
| "step": 2183 |
| }, |
| { |
| "epoch": 2.3113994439295644, |
| "grad_norm": 0.7533348798751831, |
| "learning_rate": 7.686440677966102e-06, |
| "loss": 0.0861, |
| "step": 2184 |
| }, |
| { |
| "epoch": 2.3124586257116375, |
| "grad_norm": 0.31361818313598633, |
| "learning_rate": 7.685381355932205e-06, |
| "loss": 0.0871, |
| "step": 2185 |
| }, |
| { |
| "epoch": 2.313517807493711, |
| "grad_norm": 0.5364953875541687, |
| "learning_rate": 7.684322033898306e-06, |
| "loss": 0.0843, |
| "step": 2186 |
| }, |
| { |
| "epoch": 2.3145769892757846, |
| "grad_norm": 0.31093549728393555, |
| "learning_rate": 7.683262711864408e-06, |
| "loss": 0.0878, |
| "step": 2187 |
| }, |
| { |
| "epoch": 2.3156361710578577, |
| "grad_norm": 0.2939999997615814, |
| "learning_rate": 7.68220338983051e-06, |
| "loss": 0.0876, |
| "step": 2188 |
| }, |
| { |
| "epoch": 2.316695352839931, |
| "grad_norm": 0.2430151104927063, |
| "learning_rate": 7.68114406779661e-06, |
| "loss": 0.0865, |
| "step": 2189 |
| }, |
| { |
| "epoch": 2.3177545346220043, |
| "grad_norm": 0.33244743943214417, |
| "learning_rate": 7.680084745762712e-06, |
| "loss": 0.0895, |
| "step": 2190 |
| }, |
| { |
| "epoch": 2.318813716404078, |
| "grad_norm": 0.7019136548042297, |
| "learning_rate": 7.679025423728813e-06, |
| "loss": 0.0881, |
| "step": 2191 |
| }, |
| { |
| "epoch": 2.3198728981861514, |
| "grad_norm": 0.7489039301872253, |
| "learning_rate": 7.677966101694917e-06, |
| "loss": 0.0823, |
| "step": 2192 |
| }, |
| { |
| "epoch": 2.3209320799682245, |
| "grad_norm": 0.3986199200153351, |
| "learning_rate": 7.676906779661018e-06, |
| "loss": 0.0875, |
| "step": 2193 |
| }, |
| { |
| "epoch": 2.321991261750298, |
| "grad_norm": 0.3629228472709656, |
| "learning_rate": 7.67584745762712e-06, |
| "loss": 0.0884, |
| "step": 2194 |
| }, |
| { |
| "epoch": 2.323050443532371, |
| "grad_norm": 0.9728587865829468, |
| "learning_rate": 7.67478813559322e-06, |
| "loss": 0.0877, |
| "step": 2195 |
| }, |
| { |
| "epoch": 2.3241096253144446, |
| "grad_norm": 0.5270370841026306, |
| "learning_rate": 7.673728813559322e-06, |
| "loss": 0.0912, |
| "step": 2196 |
| }, |
| { |
| "epoch": 2.325168807096518, |
| "grad_norm": 0.4018361270427704, |
| "learning_rate": 7.672669491525424e-06, |
| "loss": 0.0885, |
| "step": 2197 |
| }, |
| { |
| "epoch": 2.3262279888785913, |
| "grad_norm": 0.5126635432243347, |
| "learning_rate": 7.671610169491525e-06, |
| "loss": 0.089, |
| "step": 2198 |
| }, |
| { |
| "epoch": 2.327287170660665, |
| "grad_norm": 0.25716903805732727, |
| "learning_rate": 7.670550847457628e-06, |
| "loss": 0.0836, |
| "step": 2199 |
| }, |
| { |
| "epoch": 2.328346352442738, |
| "grad_norm": 0.25198864936828613, |
| "learning_rate": 7.66949152542373e-06, |
| "loss": 0.0861, |
| "step": 2200 |
| }, |
| { |
| "epoch": 2.3294055342248114, |
| "grad_norm": 0.31258684396743774, |
| "learning_rate": 7.668432203389831e-06, |
| "loss": 0.0885, |
| "step": 2201 |
| }, |
| { |
| "epoch": 2.3304647160068845, |
| "grad_norm": 0.5213435292243958, |
| "learning_rate": 7.667372881355934e-06, |
| "loss": 0.0839, |
| "step": 2202 |
| }, |
| { |
| "epoch": 2.331523897788958, |
| "grad_norm": 0.31534504890441895, |
| "learning_rate": 7.666313559322036e-06, |
| "loss": 0.0885, |
| "step": 2203 |
| }, |
| { |
| "epoch": 2.332583079571031, |
| "grad_norm": 1.2737720012664795, |
| "learning_rate": 7.665254237288137e-06, |
| "loss": 0.0919, |
| "step": 2204 |
| }, |
| { |
| "epoch": 2.3336422613531047, |
| "grad_norm": 0.47025302052497864, |
| "learning_rate": 7.664194915254238e-06, |
| "loss": 0.088, |
| "step": 2205 |
| }, |
| { |
| "epoch": 2.3347014431351782, |
| "grad_norm": 0.6608093976974487, |
| "learning_rate": 7.66313559322034e-06, |
| "loss": 0.0869, |
| "step": 2206 |
| }, |
| { |
| "epoch": 2.3357606249172513, |
| "grad_norm": 0.7504128217697144, |
| "learning_rate": 7.662076271186441e-06, |
| "loss": 0.0907, |
| "step": 2207 |
| }, |
| { |
| "epoch": 2.336819806699325, |
| "grad_norm": 0.4346690773963928, |
| "learning_rate": 7.661016949152543e-06, |
| "loss": 0.0905, |
| "step": 2208 |
| }, |
| { |
| "epoch": 2.337878988481398, |
| "grad_norm": 0.36322346329689026, |
| "learning_rate": 7.659957627118646e-06, |
| "loss": 0.0902, |
| "step": 2209 |
| }, |
| { |
| "epoch": 2.3389381702634715, |
| "grad_norm": 0.32090482115745544, |
| "learning_rate": 7.658898305084747e-06, |
| "loss": 0.0883, |
| "step": 2210 |
| }, |
| { |
| "epoch": 2.339997352045545, |
| "grad_norm": 0.7748063206672668, |
| "learning_rate": 7.657838983050848e-06, |
| "loss": 0.0872, |
| "step": 2211 |
| }, |
| { |
| "epoch": 2.341056533827618, |
| "grad_norm": 0.22548404335975647, |
| "learning_rate": 7.65677966101695e-06, |
| "loss": 0.0868, |
| "step": 2212 |
| }, |
| { |
| "epoch": 2.3421157156096917, |
| "grad_norm": 1.2027844190597534, |
| "learning_rate": 7.655720338983051e-06, |
| "loss": 0.0867, |
| "step": 2213 |
| }, |
| { |
| "epoch": 2.3431748973917648, |
| "grad_norm": 0.38570746779441833, |
| "learning_rate": 7.654661016949153e-06, |
| "loss": 0.0858, |
| "step": 2214 |
| }, |
| { |
| "epoch": 2.3442340791738383, |
| "grad_norm": 0.2694943845272064, |
| "learning_rate": 7.653601694915254e-06, |
| "loss": 0.0841, |
| "step": 2215 |
| }, |
| { |
| "epoch": 2.3452932609559114, |
| "grad_norm": 0.4928399324417114, |
| "learning_rate": 7.652542372881356e-06, |
| "loss": 0.0871, |
| "step": 2216 |
| }, |
| { |
| "epoch": 2.346352442737985, |
| "grad_norm": 0.8780227899551392, |
| "learning_rate": 7.651483050847459e-06, |
| "loss": 0.0905, |
| "step": 2217 |
| }, |
| { |
| "epoch": 2.347411624520058, |
| "grad_norm": 0.6822829246520996, |
| "learning_rate": 7.65042372881356e-06, |
| "loss": 0.0885, |
| "step": 2218 |
| }, |
| { |
| "epoch": 2.3484708063021316, |
| "grad_norm": 0.5823767781257629, |
| "learning_rate": 7.649364406779661e-06, |
| "loss": 0.0869, |
| "step": 2219 |
| }, |
| { |
| "epoch": 2.349529988084205, |
| "grad_norm": 0.2178659588098526, |
| "learning_rate": 7.648305084745763e-06, |
| "loss": 0.0892, |
| "step": 2220 |
| }, |
| { |
| "epoch": 2.350589169866278, |
| "grad_norm": 0.8088253736495972, |
| "learning_rate": 7.647245762711864e-06, |
| "loss": 0.0884, |
| "step": 2221 |
| }, |
| { |
| "epoch": 2.3516483516483517, |
| "grad_norm": 0.3263354003429413, |
| "learning_rate": 7.646186440677966e-06, |
| "loss": 0.0863, |
| "step": 2222 |
| }, |
| { |
| "epoch": 2.352707533430425, |
| "grad_norm": 0.26838818192481995, |
| "learning_rate": 7.645127118644067e-06, |
| "loss": 0.0884, |
| "step": 2223 |
| }, |
| { |
| "epoch": 2.3537667152124984, |
| "grad_norm": 0.5517171621322632, |
| "learning_rate": 7.64406779661017e-06, |
| "loss": 0.0913, |
| "step": 2224 |
| }, |
| { |
| "epoch": 2.354825896994572, |
| "grad_norm": 0.3242451548576355, |
| "learning_rate": 7.643008474576272e-06, |
| "loss": 0.0887, |
| "step": 2225 |
| }, |
| { |
| "epoch": 2.355885078776645, |
| "grad_norm": 0.23916837573051453, |
| "learning_rate": 7.641949152542375e-06, |
| "loss": 0.0852, |
| "step": 2226 |
| }, |
| { |
| "epoch": 2.3569442605587185, |
| "grad_norm": 0.25891250371932983, |
| "learning_rate": 7.640889830508476e-06, |
| "loss": 0.0857, |
| "step": 2227 |
| }, |
| { |
| "epoch": 2.3580034423407916, |
| "grad_norm": 0.3916108012199402, |
| "learning_rate": 7.639830508474578e-06, |
| "loss": 0.0858, |
| "step": 2228 |
| }, |
| { |
| "epoch": 2.359062624122865, |
| "grad_norm": 0.38781145215034485, |
| "learning_rate": 7.638771186440679e-06, |
| "loss": 0.0896, |
| "step": 2229 |
| }, |
| { |
| "epoch": 2.3601218059049383, |
| "grad_norm": 0.2440766841173172, |
| "learning_rate": 7.63771186440678e-06, |
| "loss": 0.0871, |
| "step": 2230 |
| }, |
| { |
| "epoch": 2.361180987687012, |
| "grad_norm": 0.6661586165428162, |
| "learning_rate": 7.636652542372882e-06, |
| "loss": 0.0921, |
| "step": 2231 |
| }, |
| { |
| "epoch": 2.362240169469085, |
| "grad_norm": 0.435925155878067, |
| "learning_rate": 7.635593220338983e-06, |
| "loss": 0.0885, |
| "step": 2232 |
| }, |
| { |
| "epoch": 2.3632993512511584, |
| "grad_norm": 0.4927040934562683, |
| "learning_rate": 7.634533898305085e-06, |
| "loss": 0.0866, |
| "step": 2233 |
| }, |
| { |
| "epoch": 2.364358533033232, |
| "grad_norm": 0.29542070627212524, |
| "learning_rate": 7.633474576271188e-06, |
| "loss": 0.084, |
| "step": 2234 |
| }, |
| { |
| "epoch": 2.365417714815305, |
| "grad_norm": 0.2592524588108063, |
| "learning_rate": 7.632415254237289e-06, |
| "loss": 0.0854, |
| "step": 2235 |
| }, |
| { |
| "epoch": 2.3664768965973786, |
| "grad_norm": 0.7580803632736206, |
| "learning_rate": 7.63135593220339e-06, |
| "loss": 0.0832, |
| "step": 2236 |
| }, |
| { |
| "epoch": 2.3675360783794517, |
| "grad_norm": 1.193358302116394, |
| "learning_rate": 7.630296610169492e-06, |
| "loss": 0.088, |
| "step": 2237 |
| }, |
| { |
| "epoch": 2.368595260161525, |
| "grad_norm": 0.6735422015190125, |
| "learning_rate": 7.629237288135593e-06, |
| "loss": 0.0887, |
| "step": 2238 |
| }, |
| { |
| "epoch": 2.3696544419435988, |
| "grad_norm": 0.27006396651268005, |
| "learning_rate": 7.628177966101696e-06, |
| "loss": 0.0886, |
| "step": 2239 |
| }, |
| { |
| "epoch": 2.370713623725672, |
| "grad_norm": 0.27838370203971863, |
| "learning_rate": 7.627118644067797e-06, |
| "loss": 0.087, |
| "step": 2240 |
| }, |
| { |
| "epoch": 2.3717728055077454, |
| "grad_norm": 0.22530795633792877, |
| "learning_rate": 7.6260593220338984e-06, |
| "loss": 0.0904, |
| "step": 2241 |
| }, |
| { |
| "epoch": 2.3728319872898185, |
| "grad_norm": 0.38037195801734924, |
| "learning_rate": 7.625e-06, |
| "loss": 0.0879, |
| "step": 2242 |
| }, |
| { |
| "epoch": 2.373891169071892, |
| "grad_norm": 0.2998390197753906, |
| "learning_rate": 7.623940677966102e-06, |
| "loss": 0.0854, |
| "step": 2243 |
| }, |
| { |
| "epoch": 2.3749503508539656, |
| "grad_norm": 1.02617609500885, |
| "learning_rate": 7.6228813559322035e-06, |
| "loss": 0.087, |
| "step": 2244 |
| }, |
| { |
| "epoch": 2.3760095326360386, |
| "grad_norm": 0.7997170686721802, |
| "learning_rate": 7.621822033898307e-06, |
| "loss": 0.0848, |
| "step": 2245 |
| }, |
| { |
| "epoch": 2.377068714418112, |
| "grad_norm": 0.26829835772514343, |
| "learning_rate": 7.620762711864408e-06, |
| "loss": 0.0851, |
| "step": 2246 |
| }, |
| { |
| "epoch": 2.3781278962001853, |
| "grad_norm": 0.27473142743110657, |
| "learning_rate": 7.6197033898305094e-06, |
| "loss": 0.0853, |
| "step": 2247 |
| }, |
| { |
| "epoch": 2.379187077982259, |
| "grad_norm": 0.5847030878067017, |
| "learning_rate": 7.618644067796611e-06, |
| "loss": 0.0866, |
| "step": 2248 |
| }, |
| { |
| "epoch": 2.380246259764332, |
| "grad_norm": 0.6349015235900879, |
| "learning_rate": 7.617584745762713e-06, |
| "loss": 0.0874, |
| "step": 2249 |
| }, |
| { |
| "epoch": 2.3813054415464054, |
| "grad_norm": 0.41820037364959717, |
| "learning_rate": 7.6165254237288145e-06, |
| "loss": 0.091, |
| "step": 2250 |
| }, |
| { |
| "epoch": 2.3823646233284785, |
| "grad_norm": 1.3365269899368286, |
| "learning_rate": 7.615466101694916e-06, |
| "loss": 0.088, |
| "step": 2251 |
| }, |
| { |
| "epoch": 2.383423805110552, |
| "grad_norm": 0.322917103767395, |
| "learning_rate": 7.614406779661017e-06, |
| "loss": 0.0875, |
| "step": 2252 |
| }, |
| { |
| "epoch": 2.3844829868926256, |
| "grad_norm": 0.24395416676998138, |
| "learning_rate": 7.61334745762712e-06, |
| "loss": 0.0869, |
| "step": 2253 |
| }, |
| { |
| "epoch": 2.3855421686746987, |
| "grad_norm": 0.3095790445804596, |
| "learning_rate": 7.612288135593221e-06, |
| "loss": 0.0903, |
| "step": 2254 |
| }, |
| { |
| "epoch": 2.3866013504567722, |
| "grad_norm": 0.9672554731369019, |
| "learning_rate": 7.611228813559322e-06, |
| "loss": 0.0875, |
| "step": 2255 |
| }, |
| { |
| "epoch": 2.3876605322388453, |
| "grad_norm": 0.662190318107605, |
| "learning_rate": 7.610169491525425e-06, |
| "loss": 0.0875, |
| "step": 2256 |
| }, |
| { |
| "epoch": 2.388719714020919, |
| "grad_norm": 0.28213566541671753, |
| "learning_rate": 7.609110169491526e-06, |
| "loss": 0.0876, |
| "step": 2257 |
| }, |
| { |
| "epoch": 2.3897788958029924, |
| "grad_norm": 0.302276074886322, |
| "learning_rate": 7.6080508474576275e-06, |
| "loss": 0.0893, |
| "step": 2258 |
| }, |
| { |
| "epoch": 2.3908380775850655, |
| "grad_norm": 0.44363200664520264, |
| "learning_rate": 7.606991525423729e-06, |
| "loss": 0.085, |
| "step": 2259 |
| }, |
| { |
| "epoch": 2.391897259367139, |
| "grad_norm": 0.7232718467712402, |
| "learning_rate": 7.605932203389831e-06, |
| "loss": 0.0914, |
| "step": 2260 |
| }, |
| { |
| "epoch": 2.392956441149212, |
| "grad_norm": 0.5587173700332642, |
| "learning_rate": 7.6048728813559326e-06, |
| "loss": 0.0851, |
| "step": 2261 |
| }, |
| { |
| "epoch": 2.3940156229312857, |
| "grad_norm": 0.46465861797332764, |
| "learning_rate": 7.603813559322034e-06, |
| "loss": 0.0881, |
| "step": 2262 |
| }, |
| { |
| "epoch": 2.3950748047133588, |
| "grad_norm": 0.29479849338531494, |
| "learning_rate": 7.602754237288135e-06, |
| "loss": 0.0885, |
| "step": 2263 |
| }, |
| { |
| "epoch": 2.3961339864954323, |
| "grad_norm": 0.443615585565567, |
| "learning_rate": 7.601694915254238e-06, |
| "loss": 0.0862, |
| "step": 2264 |
| }, |
| { |
| "epoch": 2.3971931682775054, |
| "grad_norm": 0.280912309885025, |
| "learning_rate": 7.600635593220339e-06, |
| "loss": 0.0887, |
| "step": 2265 |
| }, |
| { |
| "epoch": 2.398252350059579, |
| "grad_norm": 0.33202967047691345, |
| "learning_rate": 7.599576271186442e-06, |
| "loss": 0.0847, |
| "step": 2266 |
| }, |
| { |
| "epoch": 2.3993115318416525, |
| "grad_norm": 0.2298818826675415, |
| "learning_rate": 7.598516949152544e-06, |
| "loss": 0.0865, |
| "step": 2267 |
| }, |
| { |
| "epoch": 2.4003707136237256, |
| "grad_norm": 0.8562094569206238, |
| "learning_rate": 7.597457627118645e-06, |
| "loss": 0.0881, |
| "step": 2268 |
| }, |
| { |
| "epoch": 2.401429895405799, |
| "grad_norm": 0.30260929465293884, |
| "learning_rate": 7.596398305084746e-06, |
| "loss": 0.0859, |
| "step": 2269 |
| }, |
| { |
| "epoch": 2.402489077187872, |
| "grad_norm": 0.7718713879585266, |
| "learning_rate": 7.595338983050849e-06, |
| "loss": 0.0867, |
| "step": 2270 |
| }, |
| { |
| "epoch": 2.4035482589699457, |
| "grad_norm": 0.23501431941986084, |
| "learning_rate": 7.59427966101695e-06, |
| "loss": 0.0869, |
| "step": 2271 |
| }, |
| { |
| "epoch": 2.4046074407520193, |
| "grad_norm": 0.357307493686676, |
| "learning_rate": 7.5932203389830515e-06, |
| "loss": 0.0824, |
| "step": 2272 |
| }, |
| { |
| "epoch": 2.4056666225340924, |
| "grad_norm": 0.25347843766212463, |
| "learning_rate": 7.592161016949153e-06, |
| "loss": 0.0868, |
| "step": 2273 |
| }, |
| { |
| "epoch": 2.406725804316166, |
| "grad_norm": 0.64441978931427, |
| "learning_rate": 7.591101694915255e-06, |
| "loss": 0.0876, |
| "step": 2274 |
| }, |
| { |
| "epoch": 2.407784986098239, |
| "grad_norm": 0.8823823928833008, |
| "learning_rate": 7.5900423728813566e-06, |
| "loss": 0.0871, |
| "step": 2275 |
| }, |
| { |
| "epoch": 2.4088441678803125, |
| "grad_norm": 0.2525102198123932, |
| "learning_rate": 7.588983050847458e-06, |
| "loss": 0.0901, |
| "step": 2276 |
| }, |
| { |
| "epoch": 2.4099033496623856, |
| "grad_norm": 0.45447444915771484, |
| "learning_rate": 7.58792372881356e-06, |
| "loss": 0.086, |
| "step": 2277 |
| }, |
| { |
| "epoch": 2.410962531444459, |
| "grad_norm": 0.28946614265441895, |
| "learning_rate": 7.586864406779662e-06, |
| "loss": 0.0866, |
| "step": 2278 |
| }, |
| { |
| "epoch": 2.4120217132265327, |
| "grad_norm": 0.3613869547843933, |
| "learning_rate": 7.585805084745763e-06, |
| "loss": 0.0862, |
| "step": 2279 |
| }, |
| { |
| "epoch": 2.413080895008606, |
| "grad_norm": 0.21505282819271088, |
| "learning_rate": 7.5847457627118645e-06, |
| "loss": 0.0907, |
| "step": 2280 |
| }, |
| { |
| "epoch": 2.4141400767906793, |
| "grad_norm": 0.5027592182159424, |
| "learning_rate": 7.583686440677967e-06, |
| "loss": 0.0842, |
| "step": 2281 |
| }, |
| { |
| "epoch": 2.4151992585727524, |
| "grad_norm": 0.5195924043655396, |
| "learning_rate": 7.582627118644068e-06, |
| "loss": 0.0868, |
| "step": 2282 |
| }, |
| { |
| "epoch": 2.416258440354826, |
| "grad_norm": 0.4060683250427246, |
| "learning_rate": 7.5815677966101695e-06, |
| "loss": 0.088, |
| "step": 2283 |
| }, |
| { |
| "epoch": 2.417317622136899, |
| "grad_norm": 0.982610821723938, |
| "learning_rate": 7.580508474576271e-06, |
| "loss": 0.0905, |
| "step": 2284 |
| }, |
| { |
| "epoch": 2.4183768039189726, |
| "grad_norm": 0.8710994124412537, |
| "learning_rate": 7.579449152542373e-06, |
| "loss": 0.0874, |
| "step": 2285 |
| }, |
| { |
| "epoch": 2.419435985701046, |
| "grad_norm": 0.23645208775997162, |
| "learning_rate": 7.578389830508475e-06, |
| "loss": 0.0845, |
| "step": 2286 |
| }, |
| { |
| "epoch": 2.420495167483119, |
| "grad_norm": 0.287193238735199, |
| "learning_rate": 7.577330508474578e-06, |
| "loss": 0.0846, |
| "step": 2287 |
| }, |
| { |
| "epoch": 2.4215543492651928, |
| "grad_norm": 0.2834354341030121, |
| "learning_rate": 7.576271186440679e-06, |
| "loss": 0.0875, |
| "step": 2288 |
| }, |
| { |
| "epoch": 2.422613531047266, |
| "grad_norm": 0.2335229218006134, |
| "learning_rate": 7.5752118644067805e-06, |
| "loss": 0.0836, |
| "step": 2289 |
| }, |
| { |
| "epoch": 2.4236727128293394, |
| "grad_norm": 0.4091585576534271, |
| "learning_rate": 7.574152542372882e-06, |
| "loss": 0.0887, |
| "step": 2290 |
| }, |
| { |
| "epoch": 2.424731894611413, |
| "grad_norm": 0.2867496907711029, |
| "learning_rate": 7.573093220338984e-06, |
| "loss": 0.0884, |
| "step": 2291 |
| }, |
| { |
| "epoch": 2.425791076393486, |
| "grad_norm": 0.3807348906993866, |
| "learning_rate": 7.572033898305086e-06, |
| "loss": 0.09, |
| "step": 2292 |
| }, |
| { |
| "epoch": 2.4268502581755595, |
| "grad_norm": 0.4710707664489746, |
| "learning_rate": 7.570974576271187e-06, |
| "loss": 0.0835, |
| "step": 2293 |
| }, |
| { |
| "epoch": 2.4279094399576326, |
| "grad_norm": 0.9628831744194031, |
| "learning_rate": 7.569915254237289e-06, |
| "loss": 0.0874, |
| "step": 2294 |
| }, |
| { |
| "epoch": 2.428968621739706, |
| "grad_norm": 0.4457227885723114, |
| "learning_rate": 7.568855932203391e-06, |
| "loss": 0.086, |
| "step": 2295 |
| }, |
| { |
| "epoch": 2.4300278035217793, |
| "grad_norm": 0.2667595148086548, |
| "learning_rate": 7.567796610169492e-06, |
| "loss": 0.0868, |
| "step": 2296 |
| }, |
| { |
| "epoch": 2.431086985303853, |
| "grad_norm": 0.3075510263442993, |
| "learning_rate": 7.5667372881355935e-06, |
| "loss": 0.0865, |
| "step": 2297 |
| }, |
| { |
| "epoch": 2.432146167085926, |
| "grad_norm": 0.26120322942733765, |
| "learning_rate": 7.565677966101696e-06, |
| "loss": 0.0888, |
| "step": 2298 |
| }, |
| { |
| "epoch": 2.4332053488679994, |
| "grad_norm": 0.3398880660533905, |
| "learning_rate": 7.564618644067797e-06, |
| "loss": 0.0896, |
| "step": 2299 |
| }, |
| { |
| "epoch": 2.434264530650073, |
| "grad_norm": 0.34000998735427856, |
| "learning_rate": 7.563559322033899e-06, |
| "loss": 0.0848, |
| "step": 2300 |
| }, |
| { |
| "epoch": 2.435323712432146, |
| "grad_norm": 0.2651556432247162, |
| "learning_rate": 7.5625e-06, |
| "loss": 0.0899, |
| "step": 2301 |
| }, |
| { |
| "epoch": 2.4363828942142196, |
| "grad_norm": 0.28574419021606445, |
| "learning_rate": 7.561440677966102e-06, |
| "loss": 0.09, |
| "step": 2302 |
| }, |
| { |
| "epoch": 2.4374420759962927, |
| "grad_norm": 0.25516659021377563, |
| "learning_rate": 7.560381355932204e-06, |
| "loss": 0.0877, |
| "step": 2303 |
| }, |
| { |
| "epoch": 2.4385012577783662, |
| "grad_norm": 0.2714408040046692, |
| "learning_rate": 7.559322033898305e-06, |
| "loss": 0.0884, |
| "step": 2304 |
| }, |
| { |
| "epoch": 2.4395604395604398, |
| "grad_norm": 0.4835186004638672, |
| "learning_rate": 7.558262711864407e-06, |
| "loss": 0.0841, |
| "step": 2305 |
| }, |
| { |
| "epoch": 2.440619621342513, |
| "grad_norm": 0.21308913826942444, |
| "learning_rate": 7.557203389830509e-06, |
| "loss": 0.0872, |
| "step": 2306 |
| }, |
| { |
| "epoch": 2.4416788031245864, |
| "grad_norm": 0.6488706469535828, |
| "learning_rate": 7.55614406779661e-06, |
| "loss": 0.0858, |
| "step": 2307 |
| }, |
| { |
| "epoch": 2.4427379849066595, |
| "grad_norm": 0.27812522649765015, |
| "learning_rate": 7.555084745762713e-06, |
| "loss": 0.0882, |
| "step": 2308 |
| }, |
| { |
| "epoch": 2.443797166688733, |
| "grad_norm": 0.25572165846824646, |
| "learning_rate": 7.554025423728815e-06, |
| "loss": 0.0855, |
| "step": 2309 |
| }, |
| { |
| "epoch": 2.444856348470806, |
| "grad_norm": 0.26096436381340027, |
| "learning_rate": 7.552966101694916e-06, |
| "loss": 0.0867, |
| "step": 2310 |
| }, |
| { |
| "epoch": 2.4459155302528797, |
| "grad_norm": 0.43788591027259827, |
| "learning_rate": 7.5519067796610175e-06, |
| "loss": 0.0866, |
| "step": 2311 |
| }, |
| { |
| "epoch": 2.4469747120349528, |
| "grad_norm": 0.5280694365501404, |
| "learning_rate": 7.55084745762712e-06, |
| "loss": 0.0861, |
| "step": 2312 |
| }, |
| { |
| "epoch": 2.4480338938170263, |
| "grad_norm": 0.21385736763477325, |
| "learning_rate": 7.549788135593221e-06, |
| "loss": 0.0877, |
| "step": 2313 |
| }, |
| { |
| "epoch": 2.4490930755991, |
| "grad_norm": 0.31411486864089966, |
| "learning_rate": 7.548728813559323e-06, |
| "loss": 0.0876, |
| "step": 2314 |
| }, |
| { |
| "epoch": 2.450152257381173, |
| "grad_norm": 0.3917924463748932, |
| "learning_rate": 7.547669491525425e-06, |
| "loss": 0.0896, |
| "step": 2315 |
| }, |
| { |
| "epoch": 2.4512114391632465, |
| "grad_norm": 0.7999858856201172, |
| "learning_rate": 7.546610169491526e-06, |
| "loss": 0.0843, |
| "step": 2316 |
| }, |
| { |
| "epoch": 2.4522706209453196, |
| "grad_norm": 0.30375435948371887, |
| "learning_rate": 7.545550847457628e-06, |
| "loss": 0.0852, |
| "step": 2317 |
| }, |
| { |
| "epoch": 2.453329802727393, |
| "grad_norm": 0.25230348110198975, |
| "learning_rate": 7.544491525423729e-06, |
| "loss": 0.0853, |
| "step": 2318 |
| }, |
| { |
| "epoch": 2.4543889845094666, |
| "grad_norm": 0.5028313398361206, |
| "learning_rate": 7.543432203389831e-06, |
| "loss": 0.0873, |
| "step": 2319 |
| }, |
| { |
| "epoch": 2.4554481662915397, |
| "grad_norm": 0.2625860273838043, |
| "learning_rate": 7.542372881355933e-06, |
| "loss": 0.0858, |
| "step": 2320 |
| }, |
| { |
| "epoch": 2.4565073480736133, |
| "grad_norm": 0.4730969965457916, |
| "learning_rate": 7.541313559322034e-06, |
| "loss": 0.0896, |
| "step": 2321 |
| }, |
| { |
| "epoch": 2.4575665298556864, |
| "grad_norm": 0.2755214273929596, |
| "learning_rate": 7.5402542372881356e-06, |
| "loss": 0.0848, |
| "step": 2322 |
| }, |
| { |
| "epoch": 2.45862571163776, |
| "grad_norm": 0.22363467514514923, |
| "learning_rate": 7.539194915254238e-06, |
| "loss": 0.0833, |
| "step": 2323 |
| }, |
| { |
| "epoch": 2.459684893419833, |
| "grad_norm": 0.2618216574192047, |
| "learning_rate": 7.538135593220339e-06, |
| "loss": 0.0864, |
| "step": 2324 |
| }, |
| { |
| "epoch": 2.4607440752019065, |
| "grad_norm": 0.3937819004058838, |
| "learning_rate": 7.537076271186441e-06, |
| "loss": 0.0863, |
| "step": 2325 |
| }, |
| { |
| "epoch": 2.46180325698398, |
| "grad_norm": 0.6888829469680786, |
| "learning_rate": 7.536016949152543e-06, |
| "loss": 0.0859, |
| "step": 2326 |
| }, |
| { |
| "epoch": 2.462862438766053, |
| "grad_norm": 0.6449849009513855, |
| "learning_rate": 7.534957627118644e-06, |
| "loss": 0.0841, |
| "step": 2327 |
| }, |
| { |
| "epoch": 2.4639216205481267, |
| "grad_norm": 0.3087742030620575, |
| "learning_rate": 7.533898305084746e-06, |
| "loss": 0.0858, |
| "step": 2328 |
| }, |
| { |
| "epoch": 2.4649808023302, |
| "grad_norm": 0.2380569726228714, |
| "learning_rate": 7.532838983050849e-06, |
| "loss": 0.0849, |
| "step": 2329 |
| }, |
| { |
| "epoch": 2.4660399841122733, |
| "grad_norm": 0.18616469204425812, |
| "learning_rate": 7.53177966101695e-06, |
| "loss": 0.0887, |
| "step": 2330 |
| }, |
| { |
| "epoch": 2.4670991658943464, |
| "grad_norm": 0.29187682271003723, |
| "learning_rate": 7.530720338983052e-06, |
| "loss": 0.0848, |
| "step": 2331 |
| }, |
| { |
| "epoch": 2.46815834767642, |
| "grad_norm": 0.25846749544143677, |
| "learning_rate": 7.529661016949154e-06, |
| "loss": 0.0888, |
| "step": 2332 |
| }, |
| { |
| "epoch": 2.4692175294584935, |
| "grad_norm": 0.17183120548725128, |
| "learning_rate": 7.528601694915255e-06, |
| "loss": 0.0878, |
| "step": 2333 |
| }, |
| { |
| "epoch": 2.4702767112405666, |
| "grad_norm": 0.5974867939949036, |
| "learning_rate": 7.527542372881357e-06, |
| "loss": 0.0882, |
| "step": 2334 |
| }, |
| { |
| "epoch": 2.47133589302264, |
| "grad_norm": 0.3511717915534973, |
| "learning_rate": 7.526483050847458e-06, |
| "loss": 0.0883, |
| "step": 2335 |
| }, |
| { |
| "epoch": 2.472395074804713, |
| "grad_norm": 0.29475048184394836, |
| "learning_rate": 7.52542372881356e-06, |
| "loss": 0.0841, |
| "step": 2336 |
| }, |
| { |
| "epoch": 2.4734542565867867, |
| "grad_norm": 1.7622301578521729, |
| "learning_rate": 7.524364406779662e-06, |
| "loss": 0.0909, |
| "step": 2337 |
| }, |
| { |
| "epoch": 2.4745134383688603, |
| "grad_norm": 0.23583351075649261, |
| "learning_rate": 7.523305084745763e-06, |
| "loss": 0.0836, |
| "step": 2338 |
| }, |
| { |
| "epoch": 2.4755726201509334, |
| "grad_norm": 0.878502607345581, |
| "learning_rate": 7.522245762711865e-06, |
| "loss": 0.0884, |
| "step": 2339 |
| }, |
| { |
| "epoch": 2.476631801933007, |
| "grad_norm": 0.23254196345806122, |
| "learning_rate": 7.521186440677967e-06, |
| "loss": 0.0853, |
| "step": 2340 |
| }, |
| { |
| "epoch": 2.47769098371508, |
| "grad_norm": 0.23312729597091675, |
| "learning_rate": 7.520127118644068e-06, |
| "loss": 0.0886, |
| "step": 2341 |
| }, |
| { |
| "epoch": 2.4787501654971535, |
| "grad_norm": 0.2530806064605713, |
| "learning_rate": 7.51906779661017e-06, |
| "loss": 0.0838, |
| "step": 2342 |
| }, |
| { |
| "epoch": 2.4798093472792266, |
| "grad_norm": 0.22649787366390228, |
| "learning_rate": 7.518008474576272e-06, |
| "loss": 0.0877, |
| "step": 2343 |
| }, |
| { |
| "epoch": 2.4808685290613, |
| "grad_norm": 0.27939778566360474, |
| "learning_rate": 7.516949152542373e-06, |
| "loss": 0.0843, |
| "step": 2344 |
| }, |
| { |
| "epoch": 2.4819277108433733, |
| "grad_norm": 0.8074503540992737, |
| "learning_rate": 7.515889830508475e-06, |
| "loss": 0.0878, |
| "step": 2345 |
| }, |
| { |
| "epoch": 2.482986892625447, |
| "grad_norm": 0.37326523661613464, |
| "learning_rate": 7.514830508474576e-06, |
| "loss": 0.0838, |
| "step": 2346 |
| }, |
| { |
| "epoch": 2.4840460744075203, |
| "grad_norm": 0.2246612310409546, |
| "learning_rate": 7.5137711864406785e-06, |
| "loss": 0.0857, |
| "step": 2347 |
| }, |
| { |
| "epoch": 2.4851052561895934, |
| "grad_norm": 0.26936617493629456, |
| "learning_rate": 7.51271186440678e-06, |
| "loss": 0.0856, |
| "step": 2348 |
| }, |
| { |
| "epoch": 2.486164437971667, |
| "grad_norm": 0.33371925354003906, |
| "learning_rate": 7.511652542372881e-06, |
| "loss": 0.0865, |
| "step": 2349 |
| }, |
| { |
| "epoch": 2.48722361975374, |
| "grad_norm": 0.25461632013320923, |
| "learning_rate": 7.510593220338984e-06, |
| "loss": 0.0876, |
| "step": 2350 |
| }, |
| { |
| "epoch": 2.4882828015358136, |
| "grad_norm": 0.27395927906036377, |
| "learning_rate": 7.509533898305086e-06, |
| "loss": 0.0836, |
| "step": 2351 |
| }, |
| { |
| "epoch": 2.489341983317887, |
| "grad_norm": 0.36620545387268066, |
| "learning_rate": 7.508474576271187e-06, |
| "loss": 0.0857, |
| "step": 2352 |
| }, |
| { |
| "epoch": 2.4904011650999602, |
| "grad_norm": 0.4027053415775299, |
| "learning_rate": 7.5074152542372895e-06, |
| "loss": 0.0848, |
| "step": 2353 |
| }, |
| { |
| "epoch": 2.4914603468820338, |
| "grad_norm": 0.8063977360725403, |
| "learning_rate": 7.506355932203391e-06, |
| "loss": 0.0876, |
| "step": 2354 |
| }, |
| { |
| "epoch": 2.492519528664107, |
| "grad_norm": 0.2470046877861023, |
| "learning_rate": 7.505296610169492e-06, |
| "loss": 0.0882, |
| "step": 2355 |
| }, |
| { |
| "epoch": 2.4935787104461804, |
| "grad_norm": 0.2624497413635254, |
| "learning_rate": 7.504237288135594e-06, |
| "loss": 0.0886, |
| "step": 2356 |
| }, |
| { |
| "epoch": 2.4946378922282535, |
| "grad_norm": 0.34627389907836914, |
| "learning_rate": 7.503177966101696e-06, |
| "loss": 0.0845, |
| "step": 2357 |
| }, |
| { |
| "epoch": 2.495697074010327, |
| "grad_norm": 0.29364633560180664, |
| "learning_rate": 7.502118644067797e-06, |
| "loss": 0.088, |
| "step": 2358 |
| }, |
| { |
| "epoch": 2.4967562557924, |
| "grad_norm": 1.2924612760543823, |
| "learning_rate": 7.501059322033899e-06, |
| "loss": 0.0882, |
| "step": 2359 |
| }, |
| { |
| "epoch": 2.4978154375744737, |
| "grad_norm": 0.27166759967803955, |
| "learning_rate": 7.500000000000001e-06, |
| "loss": 0.0906, |
| "step": 2360 |
| }, |
| { |
| "epoch": 2.4978154375744737, |
| "eval_accuracy": 0.9768, |
| "eval_best_f1_from_thresholding": 0.15328467153284675, |
| "eval_loss": 0.1330053061246872, |
| "eval_matthews_corrcoef": 0.15095498585011305, |
| "eval_model_preparation_time": 0.0033, |
| "eval_negative_class_f1": 0.9882388725539897, |
| "eval_negative_class_precision": 0.9926672777268561, |
| "eval_negative_class_recall": 0.9838498031694761, |
| "eval_positive_class_f1": 0.15328467153284672, |
| "eval_positive_class_precision": 0.11602209944751381, |
| "eval_positive_class_recall": 0.22580645161290322, |
| "eval_roc_auc": 0.7900881423040731, |
| "eval_runtime": 20.7623, |
| "eval_samples_per_second": 481.642, |
| "eval_steps_per_second": 7.562, |
| "step": 2360 |
| }, |
| { |
| "epoch": 2.498874619356547, |
| "grad_norm": 0.32740044593811035, |
| "learning_rate": 7.4989406779661024e-06, |
| "loss": 0.0891, |
| "step": 2361 |
| }, |
| { |
| "epoch": 2.4999338011386203, |
| "grad_norm": 0.24599646031856537, |
| "learning_rate": 7.497881355932204e-06, |
| "loss": 0.0901, |
| "step": 2362 |
| }, |
| { |
| "epoch": 2.500992982920694, |
| "grad_norm": 0.4903382658958435, |
| "learning_rate": 7.496822033898305e-06, |
| "loss": 0.0868, |
| "step": 2363 |
| }, |
| { |
| "epoch": 2.502052164702767, |
| "grad_norm": 0.26813480257987976, |
| "learning_rate": 7.4957627118644075e-06, |
| "loss": 0.0918, |
| "step": 2364 |
| }, |
| { |
| "epoch": 2.5031113464848405, |
| "grad_norm": 0.2489306479692459, |
| "learning_rate": 7.494703389830509e-06, |
| "loss": 0.087, |
| "step": 2365 |
| }, |
| { |
| "epoch": 2.504170528266914, |
| "grad_norm": 0.43046361207962036, |
| "learning_rate": 7.49364406779661e-06, |
| "loss": 0.0842, |
| "step": 2366 |
| }, |
| { |
| "epoch": 2.505229710048987, |
| "grad_norm": 0.9100356101989746, |
| "learning_rate": 7.492584745762712e-06, |
| "loss": 0.0868, |
| "step": 2367 |
| }, |
| { |
| "epoch": 2.5062888918310606, |
| "grad_norm": 0.44035395979881287, |
| "learning_rate": 7.491525423728814e-06, |
| "loss": 0.0847, |
| "step": 2368 |
| }, |
| { |
| "epoch": 2.5073480736131337, |
| "grad_norm": 1.0612967014312744, |
| "learning_rate": 7.4904661016949154e-06, |
| "loss": 0.0859, |
| "step": 2369 |
| }, |
| { |
| "epoch": 2.5084072553952073, |
| "grad_norm": 0.2517843544483185, |
| "learning_rate": 7.489406779661017e-06, |
| "loss": 0.0858, |
| "step": 2370 |
| }, |
| { |
| "epoch": 2.509466437177281, |
| "grad_norm": 0.2550938129425049, |
| "learning_rate": 7.48834745762712e-06, |
| "loss": 0.0905, |
| "step": 2371 |
| }, |
| { |
| "epoch": 2.510525618959354, |
| "grad_norm": 0.48128417134284973, |
| "learning_rate": 7.487288135593221e-06, |
| "loss": 0.0878, |
| "step": 2372 |
| }, |
| { |
| "epoch": 2.511584800741427, |
| "grad_norm": 0.2762486934661865, |
| "learning_rate": 7.486228813559323e-06, |
| "loss": 0.085, |
| "step": 2373 |
| }, |
| { |
| "epoch": 2.5126439825235005, |
| "grad_norm": 0.25758475065231323, |
| "learning_rate": 7.485169491525425e-06, |
| "loss": 0.0852, |
| "step": 2374 |
| }, |
| { |
| "epoch": 2.513703164305574, |
| "grad_norm": 0.5497387647628784, |
| "learning_rate": 7.4841101694915264e-06, |
| "loss": 0.0851, |
| "step": 2375 |
| }, |
| { |
| "epoch": 2.514762346087647, |
| "grad_norm": 0.6831100583076477, |
| "learning_rate": 7.483050847457628e-06, |
| "loss": 0.0874, |
| "step": 2376 |
| }, |
| { |
| "epoch": 2.5158215278697207, |
| "grad_norm": 0.559902012348175, |
| "learning_rate": 7.481991525423729e-06, |
| "loss": 0.0833, |
| "step": 2377 |
| }, |
| { |
| "epoch": 2.516880709651794, |
| "grad_norm": 0.2921912968158722, |
| "learning_rate": 7.4809322033898315e-06, |
| "loss": 0.0851, |
| "step": 2378 |
| }, |
| { |
| "epoch": 2.5179398914338673, |
| "grad_norm": 0.21021868288516998, |
| "learning_rate": 7.479872881355933e-06, |
| "loss": 0.0846, |
| "step": 2379 |
| }, |
| { |
| "epoch": 2.518999073215941, |
| "grad_norm": 0.1999545395374298, |
| "learning_rate": 7.478813559322034e-06, |
| "loss": 0.0831, |
| "step": 2380 |
| }, |
| { |
| "epoch": 2.520058254998014, |
| "grad_norm": 0.7873519659042358, |
| "learning_rate": 7.477754237288137e-06, |
| "loss": 0.0852, |
| "step": 2381 |
| }, |
| { |
| "epoch": 2.5211174367800875, |
| "grad_norm": 0.2738882899284363, |
| "learning_rate": 7.476694915254238e-06, |
| "loss": 0.0868, |
| "step": 2382 |
| }, |
| { |
| "epoch": 2.5221766185621606, |
| "grad_norm": 0.30528733134269714, |
| "learning_rate": 7.475635593220339e-06, |
| "loss": 0.0887, |
| "step": 2383 |
| }, |
| { |
| "epoch": 2.523235800344234, |
| "grad_norm": 0.27182379364967346, |
| "learning_rate": 7.474576271186441e-06, |
| "loss": 0.0893, |
| "step": 2384 |
| }, |
| { |
| "epoch": 2.5242949821263077, |
| "grad_norm": 0.9655314683914185, |
| "learning_rate": 7.473516949152543e-06, |
| "loss": 0.0874, |
| "step": 2385 |
| }, |
| { |
| "epoch": 2.5253541639083807, |
| "grad_norm": 0.2309257537126541, |
| "learning_rate": 7.4724576271186445e-06, |
| "loss": 0.0878, |
| "step": 2386 |
| }, |
| { |
| "epoch": 2.5264133456904543, |
| "grad_norm": 0.4812738597393036, |
| "learning_rate": 7.471398305084746e-06, |
| "loss": 0.0826, |
| "step": 2387 |
| }, |
| { |
| "epoch": 2.5274725274725274, |
| "grad_norm": 0.19819387793540955, |
| "learning_rate": 7.470338983050847e-06, |
| "loss": 0.0868, |
| "step": 2388 |
| }, |
| { |
| "epoch": 2.528531709254601, |
| "grad_norm": 0.24691465497016907, |
| "learning_rate": 7.4692796610169496e-06, |
| "loss": 0.0857, |
| "step": 2389 |
| }, |
| { |
| "epoch": 2.529590891036674, |
| "grad_norm": 0.29209673404693604, |
| "learning_rate": 7.468220338983051e-06, |
| "loss": 0.0853, |
| "step": 2390 |
| }, |
| { |
| "epoch": 2.5306500728187475, |
| "grad_norm": 0.43177977204322815, |
| "learning_rate": 7.467161016949152e-06, |
| "loss": 0.0879, |
| "step": 2391 |
| }, |
| { |
| "epoch": 2.5317092546008206, |
| "grad_norm": 0.6204429864883423, |
| "learning_rate": 7.4661016949152555e-06, |
| "loss": 0.0838, |
| "step": 2392 |
| }, |
| { |
| "epoch": 2.532768436382894, |
| "grad_norm": 0.23086082935333252, |
| "learning_rate": 7.465042372881357e-06, |
| "loss": 0.0838, |
| "step": 2393 |
| }, |
| { |
| "epoch": 2.5338276181649677, |
| "grad_norm": 0.2399570345878601, |
| "learning_rate": 7.463983050847458e-06, |
| "loss": 0.0865, |
| "step": 2394 |
| }, |
| { |
| "epoch": 2.534886799947041, |
| "grad_norm": 0.25693604350090027, |
| "learning_rate": 7.462923728813561e-06, |
| "loss": 0.0852, |
| "step": 2395 |
| }, |
| { |
| "epoch": 2.5359459817291143, |
| "grad_norm": 0.26127418875694275, |
| "learning_rate": 7.461864406779662e-06, |
| "loss": 0.0842, |
| "step": 2396 |
| }, |
| { |
| "epoch": 2.5370051635111874, |
| "grad_norm": 0.2086644321680069, |
| "learning_rate": 7.460805084745763e-06, |
| "loss": 0.0855, |
| "step": 2397 |
| }, |
| { |
| "epoch": 2.538064345293261, |
| "grad_norm": 0.6140705347061157, |
| "learning_rate": 7.459745762711866e-06, |
| "loss": 0.0846, |
| "step": 2398 |
| }, |
| { |
| "epoch": 2.5391235270753345, |
| "grad_norm": 0.7806175947189331, |
| "learning_rate": 7.458686440677967e-06, |
| "loss": 0.085, |
| "step": 2399 |
| }, |
| { |
| "epoch": 2.5401827088574076, |
| "grad_norm": 0.2025279402732849, |
| "learning_rate": 7.4576271186440685e-06, |
| "loss": 0.0844, |
| "step": 2400 |
| }, |
| { |
| "epoch": 2.541241890639481, |
| "grad_norm": 0.24406304955482483, |
| "learning_rate": 7.45656779661017e-06, |
| "loss": 0.0815, |
| "step": 2401 |
| }, |
| { |
| "epoch": 2.5423010724215542, |
| "grad_norm": 0.26805198192596436, |
| "learning_rate": 7.455508474576272e-06, |
| "loss": 0.088, |
| "step": 2402 |
| }, |
| { |
| "epoch": 2.5433602542036278, |
| "grad_norm": 0.23744802176952362, |
| "learning_rate": 7.4544491525423736e-06, |
| "loss": 0.085, |
| "step": 2403 |
| }, |
| { |
| "epoch": 2.5444194359857013, |
| "grad_norm": 0.3496606647968292, |
| "learning_rate": 7.453389830508475e-06, |
| "loss": 0.0832, |
| "step": 2404 |
| }, |
| { |
| "epoch": 2.5454786177677744, |
| "grad_norm": 0.4525336027145386, |
| "learning_rate": 7.452330508474576e-06, |
| "loss": 0.0832, |
| "step": 2405 |
| }, |
| { |
| "epoch": 2.5465377995498475, |
| "grad_norm": 0.2229275107383728, |
| "learning_rate": 7.451271186440679e-06, |
| "loss": 0.0859, |
| "step": 2406 |
| }, |
| { |
| "epoch": 2.547596981331921, |
| "grad_norm": 0.33702296018600464, |
| "learning_rate": 7.45021186440678e-06, |
| "loss": 0.0835, |
| "step": 2407 |
| }, |
| { |
| "epoch": 2.5486561631139946, |
| "grad_norm": 0.7689473032951355, |
| "learning_rate": 7.4491525423728815e-06, |
| "loss": 0.0884, |
| "step": 2408 |
| }, |
| { |
| "epoch": 2.5497153448960677, |
| "grad_norm": 0.812541663646698, |
| "learning_rate": 7.448093220338984e-06, |
| "loss": 0.0907, |
| "step": 2409 |
| }, |
| { |
| "epoch": 2.550774526678141, |
| "grad_norm": 0.9435730576515198, |
| "learning_rate": 7.447033898305085e-06, |
| "loss": 0.0909, |
| "step": 2410 |
| }, |
| { |
| "epoch": 2.5518337084602143, |
| "grad_norm": 0.2227533459663391, |
| "learning_rate": 7.4459745762711865e-06, |
| "loss": 0.0862, |
| "step": 2411 |
| }, |
| { |
| "epoch": 2.552892890242288, |
| "grad_norm": 0.3291822373867035, |
| "learning_rate": 7.444915254237288e-06, |
| "loss": 0.0878, |
| "step": 2412 |
| }, |
| { |
| "epoch": 2.5539520720243614, |
| "grad_norm": 0.23760034143924713, |
| "learning_rate": 7.443855932203391e-06, |
| "loss": 0.0873, |
| "step": 2413 |
| }, |
| { |
| "epoch": 2.5550112538064345, |
| "grad_norm": 0.30737602710723877, |
| "learning_rate": 7.4427966101694925e-06, |
| "loss": 0.0862, |
| "step": 2414 |
| }, |
| { |
| "epoch": 2.556070435588508, |
| "grad_norm": 0.1957489252090454, |
| "learning_rate": 7.441737288135594e-06, |
| "loss": 0.0835, |
| "step": 2415 |
| }, |
| { |
| "epoch": 2.557129617370581, |
| "grad_norm": 0.22747154533863068, |
| "learning_rate": 7.440677966101696e-06, |
| "loss": 0.0881, |
| "step": 2416 |
| }, |
| { |
| "epoch": 2.5581887991526546, |
| "grad_norm": 0.5635335445404053, |
| "learning_rate": 7.4396186440677975e-06, |
| "loss": 0.0873, |
| "step": 2417 |
| }, |
| { |
| "epoch": 2.559247980934728, |
| "grad_norm": 0.23749151825904846, |
| "learning_rate": 7.438559322033899e-06, |
| "loss": 0.0852, |
| "step": 2418 |
| }, |
| { |
| "epoch": 2.5603071627168013, |
| "grad_norm": 0.17558777332305908, |
| "learning_rate": 7.437500000000001e-06, |
| "loss": 0.0836, |
| "step": 2419 |
| }, |
| { |
| "epoch": 2.5613663444988743, |
| "grad_norm": 0.28608277440071106, |
| "learning_rate": 7.436440677966103e-06, |
| "loss": 0.0847, |
| "step": 2420 |
| }, |
| { |
| "epoch": 2.562425526280948, |
| "grad_norm": 0.5463235974311829, |
| "learning_rate": 7.435381355932204e-06, |
| "loss": 0.0852, |
| "step": 2421 |
| }, |
| { |
| "epoch": 2.5634847080630214, |
| "grad_norm": 0.46325045824050903, |
| "learning_rate": 7.4343220338983054e-06, |
| "loss": 0.0843, |
| "step": 2422 |
| }, |
| { |
| "epoch": 2.5645438898450945, |
| "grad_norm": 0.41990897059440613, |
| "learning_rate": 7.433262711864408e-06, |
| "loss": 0.0865, |
| "step": 2423 |
| }, |
| { |
| "epoch": 2.565603071627168, |
| "grad_norm": 0.34436190128326416, |
| "learning_rate": 7.432203389830509e-06, |
| "loss": 0.0874, |
| "step": 2424 |
| }, |
| { |
| "epoch": 2.566662253409241, |
| "grad_norm": 0.5995191335678101, |
| "learning_rate": 7.4311440677966105e-06, |
| "loss": 0.0867, |
| "step": 2425 |
| }, |
| { |
| "epoch": 2.5677214351913147, |
| "grad_norm": 0.2410784661769867, |
| "learning_rate": 7.430084745762712e-06, |
| "loss": 0.0848, |
| "step": 2426 |
| }, |
| { |
| "epoch": 2.568780616973388, |
| "grad_norm": 0.4424084722995758, |
| "learning_rate": 7.429025423728814e-06, |
| "loss": 0.0859, |
| "step": 2427 |
| }, |
| { |
| "epoch": 2.5698397987554613, |
| "grad_norm": 0.2659587860107422, |
| "learning_rate": 7.427966101694916e-06, |
| "loss": 0.0853, |
| "step": 2428 |
| }, |
| { |
| "epoch": 2.570898980537535, |
| "grad_norm": 0.20544077455997467, |
| "learning_rate": 7.426906779661017e-06, |
| "loss": 0.0835, |
| "step": 2429 |
| }, |
| { |
| "epoch": 2.571958162319608, |
| "grad_norm": 0.2531146705150604, |
| "learning_rate": 7.425847457627119e-06, |
| "loss": 0.089, |
| "step": 2430 |
| }, |
| { |
| "epoch": 2.5730173441016815, |
| "grad_norm": 0.2648871839046478, |
| "learning_rate": 7.424788135593221e-06, |
| "loss": 0.0857, |
| "step": 2431 |
| }, |
| { |
| "epoch": 2.574076525883755, |
| "grad_norm": 0.328542023897171, |
| "learning_rate": 7.423728813559322e-06, |
| "loss": 0.0872, |
| "step": 2432 |
| }, |
| { |
| "epoch": 2.575135707665828, |
| "grad_norm": 0.38928744196891785, |
| "learning_rate": 7.4226694915254235e-06, |
| "loss": 0.0893, |
| "step": 2433 |
| }, |
| { |
| "epoch": 2.5761948894479016, |
| "grad_norm": 0.23467789590358734, |
| "learning_rate": 7.421610169491527e-06, |
| "loss": 0.0839, |
| "step": 2434 |
| }, |
| { |
| "epoch": 2.5772540712299747, |
| "grad_norm": 0.5394375324249268, |
| "learning_rate": 7.420550847457628e-06, |
| "loss": 0.0849, |
| "step": 2435 |
| }, |
| { |
| "epoch": 2.5783132530120483, |
| "grad_norm": 0.5349376201629639, |
| "learning_rate": 7.41949152542373e-06, |
| "loss": 0.0877, |
| "step": 2436 |
| }, |
| { |
| "epoch": 2.579372434794122, |
| "grad_norm": 0.2408323585987091, |
| "learning_rate": 7.418432203389832e-06, |
| "loss": 0.0853, |
| "step": 2437 |
| }, |
| { |
| "epoch": 2.580431616576195, |
| "grad_norm": 0.26893261075019836, |
| "learning_rate": 7.417372881355933e-06, |
| "loss": 0.0859, |
| "step": 2438 |
| }, |
| { |
| "epoch": 2.581490798358268, |
| "grad_norm": 0.2837241291999817, |
| "learning_rate": 7.4163135593220345e-06, |
| "loss": 0.0909, |
| "step": 2439 |
| }, |
| { |
| "epoch": 2.5825499801403415, |
| "grad_norm": 0.5301665663719177, |
| "learning_rate": 7.415254237288137e-06, |
| "loss": 0.0881, |
| "step": 2440 |
| }, |
| { |
| "epoch": 2.583609161922415, |
| "grad_norm": 0.21783919632434845, |
| "learning_rate": 7.414194915254238e-06, |
| "loss": 0.0867, |
| "step": 2441 |
| }, |
| { |
| "epoch": 2.584668343704488, |
| "grad_norm": 0.29005125164985657, |
| "learning_rate": 7.41313559322034e-06, |
| "loss": 0.0848, |
| "step": 2442 |
| }, |
| { |
| "epoch": 2.5857275254865617, |
| "grad_norm": 0.5109828114509583, |
| "learning_rate": 7.412076271186441e-06, |
| "loss": 0.086, |
| "step": 2443 |
| }, |
| { |
| "epoch": 2.586786707268635, |
| "grad_norm": 0.35774463415145874, |
| "learning_rate": 7.411016949152543e-06, |
| "loss": 0.0879, |
| "step": 2444 |
| }, |
| { |
| "epoch": 2.5878458890507083, |
| "grad_norm": 0.18126487731933594, |
| "learning_rate": 7.409957627118645e-06, |
| "loss": 0.0839, |
| "step": 2445 |
| }, |
| { |
| "epoch": 2.588905070832782, |
| "grad_norm": 0.6130411028862, |
| "learning_rate": 7.408898305084746e-06, |
| "loss": 0.0861, |
| "step": 2446 |
| }, |
| { |
| "epoch": 2.589964252614855, |
| "grad_norm": 0.2746926248073578, |
| "learning_rate": 7.407838983050848e-06, |
| "loss": 0.0862, |
| "step": 2447 |
| }, |
| { |
| "epoch": 2.5910234343969285, |
| "grad_norm": 0.21978271007537842, |
| "learning_rate": 7.40677966101695e-06, |
| "loss": 0.0824, |
| "step": 2448 |
| }, |
| { |
| "epoch": 2.5920826161790016, |
| "grad_norm": 0.260647714138031, |
| "learning_rate": 7.405720338983051e-06, |
| "loss": 0.0885, |
| "step": 2449 |
| }, |
| { |
| "epoch": 2.593141797961075, |
| "grad_norm": 0.23659560084342957, |
| "learning_rate": 7.4046610169491526e-06, |
| "loss": 0.0834, |
| "step": 2450 |
| }, |
| { |
| "epoch": 2.5942009797431487, |
| "grad_norm": 0.1942400336265564, |
| "learning_rate": 7.403601694915255e-06, |
| "loss": 0.0837, |
| "step": 2451 |
| }, |
| { |
| "epoch": 2.5952601615252218, |
| "grad_norm": 0.21580971777439117, |
| "learning_rate": 7.402542372881356e-06, |
| "loss": 0.0825, |
| "step": 2452 |
| }, |
| { |
| "epoch": 2.596319343307295, |
| "grad_norm": 0.5914468765258789, |
| "learning_rate": 7.401483050847458e-06, |
| "loss": 0.0859, |
| "step": 2453 |
| }, |
| { |
| "epoch": 2.5973785250893684, |
| "grad_norm": 0.29213789105415344, |
| "learning_rate": 7.400423728813559e-06, |
| "loss": 0.0857, |
| "step": 2454 |
| }, |
| { |
| "epoch": 2.598437706871442, |
| "grad_norm": 0.41852453351020813, |
| "learning_rate": 7.399364406779662e-06, |
| "loss": 0.0878, |
| "step": 2455 |
| }, |
| { |
| "epoch": 2.599496888653515, |
| "grad_norm": 0.2311413437128067, |
| "learning_rate": 7.3983050847457636e-06, |
| "loss": 0.0834, |
| "step": 2456 |
| }, |
| { |
| "epoch": 2.6005560704355886, |
| "grad_norm": 0.9100884199142456, |
| "learning_rate": 7.397245762711866e-06, |
| "loss": 0.0844, |
| "step": 2457 |
| }, |
| { |
| "epoch": 2.6016152522176617, |
| "grad_norm": 0.6438109874725342, |
| "learning_rate": 7.396186440677967e-06, |
| "loss": 0.087, |
| "step": 2458 |
| }, |
| { |
| "epoch": 2.602674433999735, |
| "grad_norm": 0.20899845659732819, |
| "learning_rate": 7.395127118644069e-06, |
| "loss": 0.0845, |
| "step": 2459 |
| }, |
| { |
| "epoch": 2.6037336157818087, |
| "grad_norm": 0.22641488909721375, |
| "learning_rate": 7.39406779661017e-06, |
| "loss": 0.0864, |
| "step": 2460 |
| }, |
| { |
| "epoch": 2.604792797563882, |
| "grad_norm": 0.15944629907608032, |
| "learning_rate": 7.393008474576272e-06, |
| "loss": 0.0856, |
| "step": 2461 |
| }, |
| { |
| "epoch": 2.6058519793459554, |
| "grad_norm": 0.28860077261924744, |
| "learning_rate": 7.391949152542374e-06, |
| "loss": 0.0873, |
| "step": 2462 |
| }, |
| { |
| "epoch": 2.6069111611280285, |
| "grad_norm": 0.23296071588993073, |
| "learning_rate": 7.390889830508475e-06, |
| "loss": 0.0844, |
| "step": 2463 |
| }, |
| { |
| "epoch": 2.607970342910102, |
| "grad_norm": 0.3206871449947357, |
| "learning_rate": 7.3898305084745766e-06, |
| "loss": 0.0851, |
| "step": 2464 |
| }, |
| { |
| "epoch": 2.6090295246921755, |
| "grad_norm": 0.2582883834838867, |
| "learning_rate": 7.388771186440679e-06, |
| "loss": 0.0858, |
| "step": 2465 |
| }, |
| { |
| "epoch": 2.6100887064742486, |
| "grad_norm": 0.45466917753219604, |
| "learning_rate": 7.38771186440678e-06, |
| "loss": 0.088, |
| "step": 2466 |
| }, |
| { |
| "epoch": 2.6111478882563217, |
| "grad_norm": 0.2250165045261383, |
| "learning_rate": 7.386652542372882e-06, |
| "loss": 0.0812, |
| "step": 2467 |
| }, |
| { |
| "epoch": 2.6122070700383953, |
| "grad_norm": 0.2370089888572693, |
| "learning_rate": 7.385593220338984e-06, |
| "loss": 0.0825, |
| "step": 2468 |
| }, |
| { |
| "epoch": 2.613266251820469, |
| "grad_norm": 0.9961854219436646, |
| "learning_rate": 7.384533898305085e-06, |
| "loss": 0.0862, |
| "step": 2469 |
| }, |
| { |
| "epoch": 2.614325433602542, |
| "grad_norm": 0.25502580404281616, |
| "learning_rate": 7.383474576271187e-06, |
| "loss": 0.0841, |
| "step": 2470 |
| }, |
| { |
| "epoch": 2.6153846153846154, |
| "grad_norm": 0.19646094739437103, |
| "learning_rate": 7.382415254237288e-06, |
| "loss": 0.0838, |
| "step": 2471 |
| }, |
| { |
| "epoch": 2.6164437971666885, |
| "grad_norm": 0.2757315933704376, |
| "learning_rate": 7.38135593220339e-06, |
| "loss": 0.0842, |
| "step": 2472 |
| }, |
| { |
| "epoch": 2.617502978948762, |
| "grad_norm": 0.24290607869625092, |
| "learning_rate": 7.380296610169492e-06, |
| "loss": 0.0883, |
| "step": 2473 |
| }, |
| { |
| "epoch": 2.6185621607308356, |
| "grad_norm": 0.20332032442092896, |
| "learning_rate": 7.379237288135593e-06, |
| "loss": 0.0848, |
| "step": 2474 |
| }, |
| { |
| "epoch": 2.6196213425129087, |
| "grad_norm": 0.4000178575515747, |
| "learning_rate": 7.378177966101695e-06, |
| "loss": 0.0903, |
| "step": 2475 |
| }, |
| { |
| "epoch": 2.620680524294982, |
| "grad_norm": 1.3280612230300903, |
| "learning_rate": 7.377118644067798e-06, |
| "loss": 0.085, |
| "step": 2476 |
| }, |
| { |
| "epoch": 2.6217397060770553, |
| "grad_norm": 0.34385353326797485, |
| "learning_rate": 7.376059322033899e-06, |
| "loss": 0.0878, |
| "step": 2477 |
| }, |
| { |
| "epoch": 2.622798887859129, |
| "grad_norm": 0.37220701575279236, |
| "learning_rate": 7.375000000000001e-06, |
| "loss": 0.0861, |
| "step": 2478 |
| }, |
| { |
| "epoch": 2.6238580696412024, |
| "grad_norm": 0.22052189707756042, |
| "learning_rate": 7.373940677966103e-06, |
| "loss": 0.0859, |
| "step": 2479 |
| }, |
| { |
| "epoch": 2.6249172514232755, |
| "grad_norm": 0.1742621809244156, |
| "learning_rate": 7.372881355932204e-06, |
| "loss": 0.0876, |
| "step": 2480 |
| }, |
| { |
| "epoch": 2.625976433205349, |
| "grad_norm": 0.2315528690814972, |
| "learning_rate": 7.371822033898306e-06, |
| "loss": 0.0843, |
| "step": 2481 |
| }, |
| { |
| "epoch": 2.627035614987422, |
| "grad_norm": 0.744558572769165, |
| "learning_rate": 7.370762711864408e-06, |
| "loss": 0.0838, |
| "step": 2482 |
| }, |
| { |
| "epoch": 2.6280947967694956, |
| "grad_norm": 0.2049311101436615, |
| "learning_rate": 7.369703389830509e-06, |
| "loss": 0.0832, |
| "step": 2483 |
| }, |
| { |
| "epoch": 2.629153978551569, |
| "grad_norm": 0.6654312610626221, |
| "learning_rate": 7.368644067796611e-06, |
| "loss": 0.0807, |
| "step": 2484 |
| }, |
| { |
| "epoch": 2.6302131603336423, |
| "grad_norm": 0.20644965767860413, |
| "learning_rate": 7.367584745762713e-06, |
| "loss": 0.0872, |
| "step": 2485 |
| }, |
| { |
| "epoch": 2.6312723421157154, |
| "grad_norm": 0.19637905061244965, |
| "learning_rate": 7.366525423728814e-06, |
| "loss": 0.0871, |
| "step": 2486 |
| }, |
| { |
| "epoch": 2.632331523897789, |
| "grad_norm": 0.24023404717445374, |
| "learning_rate": 7.365466101694916e-06, |
| "loss": 0.085, |
| "step": 2487 |
| }, |
| { |
| "epoch": 2.6333907056798624, |
| "grad_norm": 0.24043144285678864, |
| "learning_rate": 7.364406779661017e-06, |
| "loss": 0.086, |
| "step": 2488 |
| }, |
| { |
| "epoch": 2.6344498874619355, |
| "grad_norm": 0.3961732089519501, |
| "learning_rate": 7.3633474576271194e-06, |
| "loss": 0.0893, |
| "step": 2489 |
| }, |
| { |
| "epoch": 2.635509069244009, |
| "grad_norm": 0.5193547010421753, |
| "learning_rate": 7.362288135593221e-06, |
| "loss": 0.0875, |
| "step": 2490 |
| }, |
| { |
| "epoch": 2.636568251026082, |
| "grad_norm": 0.24608950316905975, |
| "learning_rate": 7.361228813559322e-06, |
| "loss": 0.0848, |
| "step": 2491 |
| }, |
| { |
| "epoch": 2.6376274328081557, |
| "grad_norm": 0.6626792550086975, |
| "learning_rate": 7.360169491525424e-06, |
| "loss": 0.0853, |
| "step": 2492 |
| }, |
| { |
| "epoch": 2.6386866145902292, |
| "grad_norm": 0.19503068923950195, |
| "learning_rate": 7.359110169491526e-06, |
| "loss": 0.085, |
| "step": 2493 |
| }, |
| { |
| "epoch": 2.6397457963723023, |
| "grad_norm": 0.22753697633743286, |
| "learning_rate": 7.358050847457627e-06, |
| "loss": 0.0827, |
| "step": 2494 |
| }, |
| { |
| "epoch": 2.640804978154376, |
| "grad_norm": 0.19155411422252655, |
| "learning_rate": 7.356991525423729e-06, |
| "loss": 0.0851, |
| "step": 2495 |
| }, |
| { |
| "epoch": 2.641864159936449, |
| "grad_norm": 0.2226436585187912, |
| "learning_rate": 7.355932203389831e-06, |
| "loss": 0.0898, |
| "step": 2496 |
| }, |
| { |
| "epoch": 2.6429233417185225, |
| "grad_norm": 0.20181259512901306, |
| "learning_rate": 7.354872881355933e-06, |
| "loss": 0.0856, |
| "step": 2497 |
| }, |
| { |
| "epoch": 2.643982523500596, |
| "grad_norm": 0.3549362123012543, |
| "learning_rate": 7.353813559322035e-06, |
| "loss": 0.0829, |
| "step": 2498 |
| }, |
| { |
| "epoch": 2.645041705282669, |
| "grad_norm": 0.21644608676433563, |
| "learning_rate": 7.352754237288137e-06, |
| "loss": 0.0862, |
| "step": 2499 |
| }, |
| { |
| "epoch": 2.6461008870647422, |
| "grad_norm": 0.37708693742752075, |
| "learning_rate": 7.351694915254238e-06, |
| "loss": 0.0878, |
| "step": 2500 |
| }, |
| { |
| "epoch": 2.6471600688468158, |
| "grad_norm": 0.2566758692264557, |
| "learning_rate": 7.35063559322034e-06, |
| "loss": 0.0843, |
| "step": 2501 |
| }, |
| { |
| "epoch": 2.6482192506288893, |
| "grad_norm": 0.5458624958992004, |
| "learning_rate": 7.349576271186441e-06, |
| "loss": 0.0861, |
| "step": 2502 |
| }, |
| { |
| "epoch": 2.6492784324109624, |
| "grad_norm": 0.20609797537326813, |
| "learning_rate": 7.3485169491525434e-06, |
| "loss": 0.085, |
| "step": 2503 |
| }, |
| { |
| "epoch": 2.650337614193036, |
| "grad_norm": 0.22142818570137024, |
| "learning_rate": 7.347457627118645e-06, |
| "loss": 0.0874, |
| "step": 2504 |
| }, |
| { |
| "epoch": 2.651396795975109, |
| "grad_norm": 0.1624692678451538, |
| "learning_rate": 7.346398305084746e-06, |
| "loss": 0.0862, |
| "step": 2505 |
| }, |
| { |
| "epoch": 2.6524559777571826, |
| "grad_norm": 0.20099957287311554, |
| "learning_rate": 7.3453389830508485e-06, |
| "loss": 0.0873, |
| "step": 2506 |
| }, |
| { |
| "epoch": 2.653515159539256, |
| "grad_norm": 0.16464291512966156, |
| "learning_rate": 7.34427966101695e-06, |
| "loss": 0.0828, |
| "step": 2507 |
| }, |
| { |
| "epoch": 2.654574341321329, |
| "grad_norm": 0.2808842658996582, |
| "learning_rate": 7.343220338983051e-06, |
| "loss": 0.0854, |
| "step": 2508 |
| }, |
| { |
| "epoch": 2.6556335231034027, |
| "grad_norm": 0.2627069056034088, |
| "learning_rate": 7.342161016949153e-06, |
| "loss": 0.0874, |
| "step": 2509 |
| }, |
| { |
| "epoch": 2.656692704885476, |
| "grad_norm": 0.17322279512882233, |
| "learning_rate": 7.341101694915255e-06, |
| "loss": 0.0845, |
| "step": 2510 |
| }, |
| { |
| "epoch": 2.6577518866675494, |
| "grad_norm": 0.24847866594791412, |
| "learning_rate": 7.340042372881356e-06, |
| "loss": 0.0863, |
| "step": 2511 |
| }, |
| { |
| "epoch": 2.658811068449623, |
| "grad_norm": 0.2386065572500229, |
| "learning_rate": 7.338983050847458e-06, |
| "loss": 0.0853, |
| "step": 2512 |
| }, |
| { |
| "epoch": 2.659870250231696, |
| "grad_norm": 0.8196850419044495, |
| "learning_rate": 7.337923728813559e-06, |
| "loss": 0.0871, |
| "step": 2513 |
| }, |
| { |
| "epoch": 2.6609294320137695, |
| "grad_norm": 0.24189303815364838, |
| "learning_rate": 7.3368644067796615e-06, |
| "loss": 0.0829, |
| "step": 2514 |
| }, |
| { |
| "epoch": 2.6619886137958426, |
| "grad_norm": 0.5739706754684448, |
| "learning_rate": 7.335805084745763e-06, |
| "loss": 0.0859, |
| "step": 2515 |
| }, |
| { |
| "epoch": 2.663047795577916, |
| "grad_norm": 0.2830088138580322, |
| "learning_rate": 7.334745762711864e-06, |
| "loss": 0.0881, |
| "step": 2516 |
| }, |
| { |
| "epoch": 2.6641069773599892, |
| "grad_norm": 0.25852301716804504, |
| "learning_rate": 7.3336864406779666e-06, |
| "loss": 0.083, |
| "step": 2517 |
| }, |
| { |
| "epoch": 2.665166159142063, |
| "grad_norm": 0.24714624881744385, |
| "learning_rate": 7.332627118644068e-06, |
| "loss": 0.0857, |
| "step": 2518 |
| }, |
| { |
| "epoch": 2.666225340924136, |
| "grad_norm": 0.1826709359884262, |
| "learning_rate": 7.33156779661017e-06, |
| "loss": 0.0827, |
| "step": 2519 |
| }, |
| { |
| "epoch": 2.6672845227062094, |
| "grad_norm": 0.26431363821029663, |
| "learning_rate": 7.3305084745762725e-06, |
| "loss": 0.0854, |
| "step": 2520 |
| }, |
| { |
| "epoch": 2.668343704488283, |
| "grad_norm": 0.6349174380302429, |
| "learning_rate": 7.329449152542374e-06, |
| "loss": 0.0842, |
| "step": 2521 |
| }, |
| { |
| "epoch": 2.669402886270356, |
| "grad_norm": 0.3105546832084656, |
| "learning_rate": 7.328389830508475e-06, |
| "loss": 0.0872, |
| "step": 2522 |
| }, |
| { |
| "epoch": 2.6704620680524296, |
| "grad_norm": 0.2025269865989685, |
| "learning_rate": 7.3273305084745776e-06, |
| "loss": 0.0828, |
| "step": 2523 |
| }, |
| { |
| "epoch": 2.6715212498345027, |
| "grad_norm": 0.21857735514640808, |
| "learning_rate": 7.326271186440679e-06, |
| "loss": 0.0839, |
| "step": 2524 |
| }, |
| { |
| "epoch": 2.672580431616576, |
| "grad_norm": 0.1912987232208252, |
| "learning_rate": 7.32521186440678e-06, |
| "loss": 0.0866, |
| "step": 2525 |
| }, |
| { |
| "epoch": 2.6736396133986498, |
| "grad_norm": 0.22973424196243286, |
| "learning_rate": 7.324152542372882e-06, |
| "loss": 0.0834, |
| "step": 2526 |
| }, |
| { |
| "epoch": 2.674698795180723, |
| "grad_norm": 0.22732165455818176, |
| "learning_rate": 7.323093220338984e-06, |
| "loss": 0.0888, |
| "step": 2527 |
| }, |
| { |
| "epoch": 2.6757579769627964, |
| "grad_norm": 0.2239425927400589, |
| "learning_rate": 7.3220338983050855e-06, |
| "loss": 0.087, |
| "step": 2528 |
| }, |
| { |
| "epoch": 2.6768171587448695, |
| "grad_norm": 1.158964991569519, |
| "learning_rate": 7.320974576271187e-06, |
| "loss": 0.0859, |
| "step": 2529 |
| }, |
| { |
| "epoch": 2.677876340526943, |
| "grad_norm": 0.5004084706306458, |
| "learning_rate": 7.319915254237288e-06, |
| "loss": 0.088, |
| "step": 2530 |
| }, |
| { |
| "epoch": 2.6789355223090165, |
| "grad_norm": 0.17939257621765137, |
| "learning_rate": 7.3188559322033906e-06, |
| "loss": 0.0846, |
| "step": 2531 |
| }, |
| { |
| "epoch": 2.6799947040910896, |
| "grad_norm": 0.2022828608751297, |
| "learning_rate": 7.317796610169492e-06, |
| "loss": 0.0848, |
| "step": 2532 |
| }, |
| { |
| "epoch": 2.6810538858731627, |
| "grad_norm": 0.2316572368144989, |
| "learning_rate": 7.316737288135593e-06, |
| "loss": 0.0865, |
| "step": 2533 |
| }, |
| { |
| "epoch": 2.6821130676552363, |
| "grad_norm": 0.178863525390625, |
| "learning_rate": 7.315677966101696e-06, |
| "loss": 0.0816, |
| "step": 2534 |
| }, |
| { |
| "epoch": 2.68317224943731, |
| "grad_norm": 0.22590215504169464, |
| "learning_rate": 7.314618644067797e-06, |
| "loss": 0.0873, |
| "step": 2535 |
| }, |
| { |
| "epoch": 2.684231431219383, |
| "grad_norm": 0.24172040820121765, |
| "learning_rate": 7.3135593220338985e-06, |
| "loss": 0.0823, |
| "step": 2536 |
| }, |
| { |
| "epoch": 2.6852906130014564, |
| "grad_norm": 0.2226477861404419, |
| "learning_rate": 7.3125e-06, |
| "loss": 0.0819, |
| "step": 2537 |
| }, |
| { |
| "epoch": 2.6863497947835295, |
| "grad_norm": 0.19185325503349304, |
| "learning_rate": 7.311440677966102e-06, |
| "loss": 0.0884, |
| "step": 2538 |
| }, |
| { |
| "epoch": 2.687408976565603, |
| "grad_norm": 0.8066223859786987, |
| "learning_rate": 7.3103813559322035e-06, |
| "loss": 0.0885, |
| "step": 2539 |
| }, |
| { |
| "epoch": 2.6884681583476766, |
| "grad_norm": 0.2656841576099396, |
| "learning_rate": 7.309322033898306e-06, |
| "loss": 0.0867, |
| "step": 2540 |
| }, |
| { |
| "epoch": 2.6895273401297497, |
| "grad_norm": 0.37627339363098145, |
| "learning_rate": 7.308262711864408e-06, |
| "loss": 0.0886, |
| "step": 2541 |
| }, |
| { |
| "epoch": 2.6905865219118232, |
| "grad_norm": 0.6449163556098938, |
| "learning_rate": 7.3072033898305095e-06, |
| "loss": 0.0885, |
| "step": 2542 |
| }, |
| { |
| "epoch": 2.6916457036938963, |
| "grad_norm": 0.21589618921279907, |
| "learning_rate": 7.306144067796611e-06, |
| "loss": 0.0862, |
| "step": 2543 |
| }, |
| { |
| "epoch": 2.69270488547597, |
| "grad_norm": 0.24904780089855194, |
| "learning_rate": 7.305084745762713e-06, |
| "loss": 0.0825, |
| "step": 2544 |
| }, |
| { |
| "epoch": 2.6937640672580434, |
| "grad_norm": 0.20243799686431885, |
| "learning_rate": 7.3040254237288145e-06, |
| "loss": 0.0799, |
| "step": 2545 |
| }, |
| { |
| "epoch": 2.6948232490401165, |
| "grad_norm": 0.2245987057685852, |
| "learning_rate": 7.302966101694916e-06, |
| "loss": 0.0848, |
| "step": 2546 |
| }, |
| { |
| "epoch": 2.6958824308221896, |
| "grad_norm": 0.1902856081724167, |
| "learning_rate": 7.301906779661017e-06, |
| "loss": 0.0848, |
| "step": 2547 |
| }, |
| { |
| "epoch": 2.696941612604263, |
| "grad_norm": 0.2722117304801941, |
| "learning_rate": 7.30084745762712e-06, |
| "loss": 0.0846, |
| "step": 2548 |
| }, |
| { |
| "epoch": 2.6980007943863367, |
| "grad_norm": 0.20510976016521454, |
| "learning_rate": 7.299788135593221e-06, |
| "loss": 0.0856, |
| "step": 2549 |
| }, |
| { |
| "epoch": 2.6990599761684098, |
| "grad_norm": 0.142478808760643, |
| "learning_rate": 7.2987288135593224e-06, |
| "loss": 0.0872, |
| "step": 2550 |
| }, |
| { |
| "epoch": 2.7001191579504833, |
| "grad_norm": 0.20077407360076904, |
| "learning_rate": 7.297669491525424e-06, |
| "loss": 0.083, |
| "step": 2551 |
| }, |
| { |
| "epoch": 2.7011783397325564, |
| "grad_norm": 0.5384523868560791, |
| "learning_rate": 7.296610169491526e-06, |
| "loss": 0.0822, |
| "step": 2552 |
| }, |
| { |
| "epoch": 2.70223752151463, |
| "grad_norm": 0.4859084486961365, |
| "learning_rate": 7.2955508474576275e-06, |
| "loss": 0.0875, |
| "step": 2553 |
| }, |
| { |
| "epoch": 2.7032967032967035, |
| "grad_norm": 0.21412122249603271, |
| "learning_rate": 7.294491525423729e-06, |
| "loss": 0.0851, |
| "step": 2554 |
| }, |
| { |
| "epoch": 2.7043558850787766, |
| "grad_norm": 0.2351921945810318, |
| "learning_rate": 7.293432203389831e-06, |
| "loss": 0.0864, |
| "step": 2555 |
| }, |
| { |
| "epoch": 2.70541506686085, |
| "grad_norm": 0.20572692155838013, |
| "learning_rate": 7.292372881355933e-06, |
| "loss": 0.0861, |
| "step": 2556 |
| }, |
| { |
| "epoch": 2.706474248642923, |
| "grad_norm": 0.18501295149326324, |
| "learning_rate": 7.291313559322034e-06, |
| "loss": 0.0835, |
| "step": 2557 |
| }, |
| { |
| "epoch": 2.7075334304249967, |
| "grad_norm": 0.36258664727211, |
| "learning_rate": 7.290254237288135e-06, |
| "loss": 0.0838, |
| "step": 2558 |
| }, |
| { |
| "epoch": 2.7085926122070703, |
| "grad_norm": 0.19975177943706512, |
| "learning_rate": 7.289194915254238e-06, |
| "loss": 0.0855, |
| "step": 2559 |
| }, |
| { |
| "epoch": 2.7096517939891434, |
| "grad_norm": 0.35506847500801086, |
| "learning_rate": 7.288135593220339e-06, |
| "loss": 0.0819, |
| "step": 2560 |
| }, |
| { |
| "epoch": 2.710710975771217, |
| "grad_norm": 0.24622154235839844, |
| "learning_rate": 7.287076271186442e-06, |
| "loss": 0.0849, |
| "step": 2561 |
| }, |
| { |
| "epoch": 2.71177015755329, |
| "grad_norm": 0.4321694076061249, |
| "learning_rate": 7.286016949152544e-06, |
| "loss": 0.0879, |
| "step": 2562 |
| }, |
| { |
| "epoch": 2.7128293393353635, |
| "grad_norm": 0.2831239402294159, |
| "learning_rate": 7.284957627118645e-06, |
| "loss": 0.0864, |
| "step": 2563 |
| }, |
| { |
| "epoch": 2.7138885211174366, |
| "grad_norm": 0.160257950425148, |
| "learning_rate": 7.2838983050847464e-06, |
| "loss": 0.0844, |
| "step": 2564 |
| }, |
| { |
| "epoch": 2.71494770289951, |
| "grad_norm": 0.19795700907707214, |
| "learning_rate": 7.282838983050849e-06, |
| "loss": 0.0835, |
| "step": 2565 |
| }, |
| { |
| "epoch": 2.7160068846815832, |
| "grad_norm": 0.2295176237821579, |
| "learning_rate": 7.28177966101695e-06, |
| "loss": 0.0891, |
| "step": 2566 |
| }, |
| { |
| "epoch": 2.717066066463657, |
| "grad_norm": 0.19950707256793976, |
| "learning_rate": 7.2807203389830515e-06, |
| "loss": 0.0881, |
| "step": 2567 |
| }, |
| { |
| "epoch": 2.7181252482457303, |
| "grad_norm": 0.16419531404972076, |
| "learning_rate": 7.279661016949153e-06, |
| "loss": 0.0856, |
| "step": 2568 |
| }, |
| { |
| "epoch": 2.7191844300278034, |
| "grad_norm": 0.25177082419395447, |
| "learning_rate": 7.278601694915255e-06, |
| "loss": 0.0872, |
| "step": 2569 |
| }, |
| { |
| "epoch": 2.720243611809877, |
| "grad_norm": 0.2026664912700653, |
| "learning_rate": 7.277542372881357e-06, |
| "loss": 0.0821, |
| "step": 2570 |
| }, |
| { |
| "epoch": 2.72130279359195, |
| "grad_norm": 0.5405267477035522, |
| "learning_rate": 7.276483050847458e-06, |
| "loss": 0.0885, |
| "step": 2571 |
| }, |
| { |
| "epoch": 2.7223619753740236, |
| "grad_norm": 0.6258319616317749, |
| "learning_rate": 7.27542372881356e-06, |
| "loss": 0.0861, |
| "step": 2572 |
| }, |
| { |
| "epoch": 2.723421157156097, |
| "grad_norm": 0.19594408571720123, |
| "learning_rate": 7.274364406779662e-06, |
| "loss": 0.0848, |
| "step": 2573 |
| }, |
| { |
| "epoch": 2.72448033893817, |
| "grad_norm": 0.6023727655410767, |
| "learning_rate": 7.273305084745763e-06, |
| "loss": 0.0867, |
| "step": 2574 |
| }, |
| { |
| "epoch": 2.7255395207202437, |
| "grad_norm": 0.28104084730148315, |
| "learning_rate": 7.2722457627118645e-06, |
| "loss": 0.0871, |
| "step": 2575 |
| }, |
| { |
| "epoch": 2.726598702502317, |
| "grad_norm": 0.17260709404945374, |
| "learning_rate": 7.271186440677967e-06, |
| "loss": 0.0837, |
| "step": 2576 |
| }, |
| { |
| "epoch": 2.7276578842843904, |
| "grad_norm": 0.17434747517108917, |
| "learning_rate": 7.270127118644068e-06, |
| "loss": 0.086, |
| "step": 2577 |
| }, |
| { |
| "epoch": 2.728717066066464, |
| "grad_norm": 0.23676802217960358, |
| "learning_rate": 7.2690677966101696e-06, |
| "loss": 0.0824, |
| "step": 2578 |
| }, |
| { |
| "epoch": 2.729776247848537, |
| "grad_norm": 0.3234706223011017, |
| "learning_rate": 7.268008474576271e-06, |
| "loss": 0.0826, |
| "step": 2579 |
| }, |
| { |
| "epoch": 2.73083542963061, |
| "grad_norm": 0.26905763149261475, |
| "learning_rate": 7.266949152542373e-06, |
| "loss": 0.0846, |
| "step": 2580 |
| }, |
| { |
| "epoch": 2.7318946114126836, |
| "grad_norm": 0.21624106168746948, |
| "learning_rate": 7.265889830508475e-06, |
| "loss": 0.0815, |
| "step": 2581 |
| }, |
| { |
| "epoch": 2.732953793194757, |
| "grad_norm": 0.1897197663784027, |
| "learning_rate": 7.264830508474578e-06, |
| "loss": 0.0826, |
| "step": 2582 |
| }, |
| { |
| "epoch": 2.7340129749768303, |
| "grad_norm": 0.43036940693855286, |
| "learning_rate": 7.263771186440679e-06, |
| "loss": 0.0865, |
| "step": 2583 |
| }, |
| { |
| "epoch": 2.735072156758904, |
| "grad_norm": 1.1944246292114258, |
| "learning_rate": 7.2627118644067806e-06, |
| "loss": 0.0836, |
| "step": 2584 |
| }, |
| { |
| "epoch": 2.736131338540977, |
| "grad_norm": 0.4790208637714386, |
| "learning_rate": 7.261652542372882e-06, |
| "loss": 0.0857, |
| "step": 2585 |
| }, |
| { |
| "epoch": 2.7371905203230504, |
| "grad_norm": 0.21819289028644562, |
| "learning_rate": 7.260593220338984e-06, |
| "loss": 0.0845, |
| "step": 2586 |
| }, |
| { |
| "epoch": 2.738249702105124, |
| "grad_norm": 0.2567351162433624, |
| "learning_rate": 7.259533898305086e-06, |
| "loss": 0.0824, |
| "step": 2587 |
| }, |
| { |
| "epoch": 2.739308883887197, |
| "grad_norm": 0.3220031261444092, |
| "learning_rate": 7.258474576271187e-06, |
| "loss": 0.0859, |
| "step": 2588 |
| }, |
| { |
| "epoch": 2.7403680656692706, |
| "grad_norm": 0.583760678768158, |
| "learning_rate": 7.2574152542372885e-06, |
| "loss": 0.088, |
| "step": 2589 |
| }, |
| { |
| "epoch": 2.7414272474513437, |
| "grad_norm": 0.38349083065986633, |
| "learning_rate": 7.256355932203391e-06, |
| "loss": 0.0816, |
| "step": 2590 |
| }, |
| { |
| "epoch": 2.7424864292334172, |
| "grad_norm": 0.6811165809631348, |
| "learning_rate": 7.255296610169492e-06, |
| "loss": 0.0888, |
| "step": 2591 |
| }, |
| { |
| "epoch": 2.7435456110154908, |
| "grad_norm": 0.26285967230796814, |
| "learning_rate": 7.2542372881355936e-06, |
| "loss": 0.0886, |
| "step": 2592 |
| }, |
| { |
| "epoch": 2.744604792797564, |
| "grad_norm": 0.24635250866413116, |
| "learning_rate": 7.253177966101696e-06, |
| "loss": 0.0837, |
| "step": 2593 |
| }, |
| { |
| "epoch": 2.745663974579637, |
| "grad_norm": 0.1973286271095276, |
| "learning_rate": 7.252118644067797e-06, |
| "loss": 0.0827, |
| "step": 2594 |
| }, |
| { |
| "epoch": 2.7467231563617105, |
| "grad_norm": 0.3781270682811737, |
| "learning_rate": 7.251059322033899e-06, |
| "loss": 0.0864, |
| "step": 2595 |
| }, |
| { |
| "epoch": 2.747782338143784, |
| "grad_norm": 0.30764245986938477, |
| "learning_rate": 7.25e-06, |
| "loss": 0.0833, |
| "step": 2596 |
| }, |
| { |
| "epoch": 2.748841519925857, |
| "grad_norm": 0.2728866934776306, |
| "learning_rate": 7.248940677966102e-06, |
| "loss": 0.083, |
| "step": 2597 |
| }, |
| { |
| "epoch": 2.7499007017079307, |
| "grad_norm": 0.21047860383987427, |
| "learning_rate": 7.247881355932204e-06, |
| "loss": 0.0892, |
| "step": 2598 |
| }, |
| { |
| "epoch": 2.7509598834900038, |
| "grad_norm": 0.2374969869852066, |
| "learning_rate": 7.246822033898305e-06, |
| "loss": 0.0837, |
| "step": 2599 |
| }, |
| { |
| "epoch": 2.7520190652720773, |
| "grad_norm": 0.5180090665817261, |
| "learning_rate": 7.2457627118644065e-06, |
| "loss": 0.0848, |
| "step": 2600 |
| }, |
| { |
| "epoch": 2.753078247054151, |
| "grad_norm": 0.5831081867218018, |
| "learning_rate": 7.244703389830509e-06, |
| "loss": 0.0867, |
| "step": 2601 |
| }, |
| { |
| "epoch": 2.754137428836224, |
| "grad_norm": 1.2132313251495361, |
| "learning_rate": 7.24364406779661e-06, |
| "loss": 0.0879, |
| "step": 2602 |
| }, |
| { |
| "epoch": 2.7551966106182975, |
| "grad_norm": 0.22215701639652252, |
| "learning_rate": 7.242584745762713e-06, |
| "loss": 0.0839, |
| "step": 2603 |
| }, |
| { |
| "epoch": 2.7562557924003706, |
| "grad_norm": 0.276680588722229, |
| "learning_rate": 7.241525423728815e-06, |
| "loss": 0.0871, |
| "step": 2604 |
| }, |
| { |
| "epoch": 2.757314974182444, |
| "grad_norm": 0.27246686816215515, |
| "learning_rate": 7.240466101694916e-06, |
| "loss": 0.0811, |
| "step": 2605 |
| }, |
| { |
| "epoch": 2.7583741559645176, |
| "grad_norm": 0.2052409052848816, |
| "learning_rate": 7.2394067796610175e-06, |
| "loss": 0.0843, |
| "step": 2606 |
| }, |
| { |
| "epoch": 2.7594333377465907, |
| "grad_norm": 1.1106832027435303, |
| "learning_rate": 7.23834745762712e-06, |
| "loss": 0.0844, |
| "step": 2607 |
| }, |
| { |
| "epoch": 2.7604925195286643, |
| "grad_norm": 0.23822973668575287, |
| "learning_rate": 7.237288135593221e-06, |
| "loss": 0.0852, |
| "step": 2608 |
| }, |
| { |
| "epoch": 2.7615517013107374, |
| "grad_norm": 0.2477940320968628, |
| "learning_rate": 7.236228813559323e-06, |
| "loss": 0.0892, |
| "step": 2609 |
| }, |
| { |
| "epoch": 2.762610883092811, |
| "grad_norm": 0.2688133418560028, |
| "learning_rate": 7.235169491525425e-06, |
| "loss": 0.0859, |
| "step": 2610 |
| }, |
| { |
| "epoch": 2.7636700648748844, |
| "grad_norm": 0.6514645218849182, |
| "learning_rate": 7.234110169491526e-06, |
| "loss": 0.0841, |
| "step": 2611 |
| }, |
| { |
| "epoch": 2.7647292466569575, |
| "grad_norm": 2.231745719909668, |
| "learning_rate": 7.233050847457628e-06, |
| "loss": 0.088, |
| "step": 2612 |
| }, |
| { |
| "epoch": 2.7657884284390306, |
| "grad_norm": 0.3024356961250305, |
| "learning_rate": 7.231991525423729e-06, |
| "loss": 0.0864, |
| "step": 2613 |
| }, |
| { |
| "epoch": 2.766847610221104, |
| "grad_norm": 0.21650326251983643, |
| "learning_rate": 7.230932203389831e-06, |
| "loss": 0.0838, |
| "step": 2614 |
| }, |
| { |
| "epoch": 2.7679067920031777, |
| "grad_norm": 1.1713981628417969, |
| "learning_rate": 7.229872881355933e-06, |
| "loss": 0.0899, |
| "step": 2615 |
| }, |
| { |
| "epoch": 2.768965973785251, |
| "grad_norm": 0.24282382428646088, |
| "learning_rate": 7.228813559322034e-06, |
| "loss": 0.0864, |
| "step": 2616 |
| }, |
| { |
| "epoch": 2.7700251555673243, |
| "grad_norm": 0.2057073414325714, |
| "learning_rate": 7.227754237288136e-06, |
| "loss": 0.0833, |
| "step": 2617 |
| }, |
| { |
| "epoch": 2.7710843373493974, |
| "grad_norm": 0.18496139347553253, |
| "learning_rate": 7.226694915254238e-06, |
| "loss": 0.0857, |
| "step": 2618 |
| }, |
| { |
| "epoch": 2.772143519131471, |
| "grad_norm": 0.6129516363143921, |
| "learning_rate": 7.225635593220339e-06, |
| "loss": 0.0849, |
| "step": 2619 |
| }, |
| { |
| "epoch": 2.7732027009135445, |
| "grad_norm": 0.2906854748725891, |
| "learning_rate": 7.224576271186441e-06, |
| "loss": 0.0855, |
| "step": 2620 |
| }, |
| { |
| "epoch": 2.7742618826956176, |
| "grad_norm": 0.3260900676250458, |
| "learning_rate": 7.223516949152543e-06, |
| "loss": 0.0829, |
| "step": 2621 |
| }, |
| { |
| "epoch": 2.775321064477691, |
| "grad_norm": 0.6778892278671265, |
| "learning_rate": 7.222457627118644e-06, |
| "loss": 0.0874, |
| "step": 2622 |
| }, |
| { |
| "epoch": 2.776380246259764, |
| "grad_norm": 0.4057232439517975, |
| "learning_rate": 7.221398305084746e-06, |
| "loss": 0.0848, |
| "step": 2623 |
| }, |
| { |
| "epoch": 2.7774394280418377, |
| "grad_norm": 0.31907692551612854, |
| "learning_rate": 7.220338983050849e-06, |
| "loss": 0.0834, |
| "step": 2624 |
| }, |
| { |
| "epoch": 2.7784986098239113, |
| "grad_norm": 0.44375336170196533, |
| "learning_rate": 7.21927966101695e-06, |
| "loss": 0.0892, |
| "step": 2625 |
| }, |
| { |
| "epoch": 2.7795577916059844, |
| "grad_norm": 0.6996042728424072, |
| "learning_rate": 7.218220338983052e-06, |
| "loss": 0.0885, |
| "step": 2626 |
| }, |
| { |
| "epoch": 2.7806169733880575, |
| "grad_norm": 0.8565188050270081, |
| "learning_rate": 7.217161016949153e-06, |
| "loss": 0.0867, |
| "step": 2627 |
| }, |
| { |
| "epoch": 2.781676155170131, |
| "grad_norm": 0.6427931189537048, |
| "learning_rate": 7.216101694915255e-06, |
| "loss": 0.0881, |
| "step": 2628 |
| }, |
| { |
| "epoch": 2.7827353369522045, |
| "grad_norm": 0.6029223203659058, |
| "learning_rate": 7.215042372881357e-06, |
| "loss": 0.0867, |
| "step": 2629 |
| }, |
| { |
| "epoch": 2.7837945187342776, |
| "grad_norm": 0.24458833038806915, |
| "learning_rate": 7.213983050847458e-06, |
| "loss": 0.0895, |
| "step": 2630 |
| }, |
| { |
| "epoch": 2.784853700516351, |
| "grad_norm": 0.22011908888816833, |
| "learning_rate": 7.2129237288135604e-06, |
| "loss": 0.0848, |
| "step": 2631 |
| }, |
| { |
| "epoch": 2.7859128822984243, |
| "grad_norm": 0.38336876034736633, |
| "learning_rate": 7.211864406779662e-06, |
| "loss": 0.0912, |
| "step": 2632 |
| }, |
| { |
| "epoch": 2.786972064080498, |
| "grad_norm": 0.8342489004135132, |
| "learning_rate": 7.210805084745763e-06, |
| "loss": 0.0886, |
| "step": 2633 |
| }, |
| { |
| "epoch": 2.7880312458625713, |
| "grad_norm": 0.7525489926338196, |
| "learning_rate": 7.209745762711865e-06, |
| "loss": 0.0866, |
| "step": 2634 |
| }, |
| { |
| "epoch": 2.7890904276446444, |
| "grad_norm": 0.5347561836242676, |
| "learning_rate": 7.208686440677967e-06, |
| "loss": 0.0876, |
| "step": 2635 |
| }, |
| { |
| "epoch": 2.790149609426718, |
| "grad_norm": 0.29635554552078247, |
| "learning_rate": 7.207627118644068e-06, |
| "loss": 0.0828, |
| "step": 2636 |
| }, |
| { |
| "epoch": 2.791208791208791, |
| "grad_norm": 0.29534098505973816, |
| "learning_rate": 7.20656779661017e-06, |
| "loss": 0.0852, |
| "step": 2637 |
| }, |
| { |
| "epoch": 2.7922679729908646, |
| "grad_norm": 2.8918049335479736, |
| "learning_rate": 7.205508474576271e-06, |
| "loss": 0.0884, |
| "step": 2638 |
| }, |
| { |
| "epoch": 2.793327154772938, |
| "grad_norm": 0.2642069160938263, |
| "learning_rate": 7.204449152542373e-06, |
| "loss": 0.0834, |
| "step": 2639 |
| }, |
| { |
| "epoch": 2.7943863365550112, |
| "grad_norm": 0.30829092860221863, |
| "learning_rate": 7.203389830508475e-06, |
| "loss": 0.0858, |
| "step": 2640 |
| }, |
| { |
| "epoch": 2.7954455183370848, |
| "grad_norm": 0.2687780559062958, |
| "learning_rate": 7.202330508474576e-06, |
| "loss": 0.087, |
| "step": 2641 |
| }, |
| { |
| "epoch": 2.796504700119158, |
| "grad_norm": 1.3224658966064453, |
| "learning_rate": 7.2012711864406785e-06, |
| "loss": 0.0923, |
| "step": 2642 |
| }, |
| { |
| "epoch": 2.7975638819012314, |
| "grad_norm": 0.8700881600379944, |
| "learning_rate": 7.20021186440678e-06, |
| "loss": 0.0844, |
| "step": 2643 |
| }, |
| { |
| "epoch": 2.7986230636833045, |
| "grad_norm": 0.8457787036895752, |
| "learning_rate": 7.199152542372881e-06, |
| "loss": 0.0864, |
| "step": 2644 |
| }, |
| { |
| "epoch": 2.799682245465378, |
| "grad_norm": 0.287913978099823, |
| "learning_rate": 7.198093220338984e-06, |
| "loss": 0.0864, |
| "step": 2645 |
| }, |
| { |
| "epoch": 2.800741427247451, |
| "grad_norm": 0.6349884271621704, |
| "learning_rate": 7.197033898305086e-06, |
| "loss": 0.085, |
| "step": 2646 |
| }, |
| { |
| "epoch": 2.8018006090295247, |
| "grad_norm": 0.22546806931495667, |
| "learning_rate": 7.195974576271187e-06, |
| "loss": 0.0879, |
| "step": 2647 |
| }, |
| { |
| "epoch": 2.802859790811598, |
| "grad_norm": 0.3513873219490051, |
| "learning_rate": 7.1949152542372895e-06, |
| "loss": 0.09, |
| "step": 2648 |
| }, |
| { |
| "epoch": 2.8039189725936713, |
| "grad_norm": 0.6868070363998413, |
| "learning_rate": 7.193855932203391e-06, |
| "loss": 0.0902, |
| "step": 2649 |
| }, |
| { |
| "epoch": 2.804978154375745, |
| "grad_norm": 0.9908745288848877, |
| "learning_rate": 7.192796610169492e-06, |
| "loss": 0.0889, |
| "step": 2650 |
| }, |
| { |
| "epoch": 2.806037336157818, |
| "grad_norm": 0.30235615372657776, |
| "learning_rate": 7.191737288135594e-06, |
| "loss": 0.0892, |
| "step": 2651 |
| }, |
| { |
| "epoch": 2.8070965179398915, |
| "grad_norm": 0.34018200635910034, |
| "learning_rate": 7.190677966101696e-06, |
| "loss": 0.0875, |
| "step": 2652 |
| }, |
| { |
| "epoch": 2.808155699721965, |
| "grad_norm": 0.3707370162010193, |
| "learning_rate": 7.189618644067797e-06, |
| "loss": 0.0858, |
| "step": 2653 |
| }, |
| { |
| "epoch": 2.809214881504038, |
| "grad_norm": 1.7327641248703003, |
| "learning_rate": 7.188559322033899e-06, |
| "loss": 0.0897, |
| "step": 2654 |
| }, |
| { |
| "epoch": 2.8102740632861116, |
| "grad_norm": 1.689785122871399, |
| "learning_rate": 7.1875e-06, |
| "loss": 0.0936, |
| "step": 2655 |
| }, |
| { |
| "epoch": 2.8113332450681847, |
| "grad_norm": 0.3976902365684509, |
| "learning_rate": 7.1864406779661025e-06, |
| "loss": 0.087, |
| "step": 2656 |
| }, |
| { |
| "epoch": 2.8123924268502583, |
| "grad_norm": 0.2942507565021515, |
| "learning_rate": 7.185381355932204e-06, |
| "loss": 0.0837, |
| "step": 2657 |
| }, |
| { |
| "epoch": 2.813451608632332, |
| "grad_norm": 1.63322913646698, |
| "learning_rate": 7.184322033898305e-06, |
| "loss": 0.0885, |
| "step": 2658 |
| }, |
| { |
| "epoch": 2.814510790414405, |
| "grad_norm": 0.4471192955970764, |
| "learning_rate": 7.1832627118644076e-06, |
| "loss": 0.0893, |
| "step": 2659 |
| }, |
| { |
| "epoch": 2.815569972196478, |
| "grad_norm": 0.3946458697319031, |
| "learning_rate": 7.182203389830509e-06, |
| "loss": 0.0835, |
| "step": 2660 |
| }, |
| { |
| "epoch": 2.8166291539785515, |
| "grad_norm": 0.5461068153381348, |
| "learning_rate": 7.18114406779661e-06, |
| "loss": 0.089, |
| "step": 2661 |
| }, |
| { |
| "epoch": 2.817688335760625, |
| "grad_norm": 0.24396973848342896, |
| "learning_rate": 7.180084745762712e-06, |
| "loss": 0.0863, |
| "step": 2662 |
| }, |
| { |
| "epoch": 2.818747517542698, |
| "grad_norm": 0.3486658036708832, |
| "learning_rate": 7.179025423728814e-06, |
| "loss": 0.0865, |
| "step": 2663 |
| }, |
| { |
| "epoch": 2.8198066993247717, |
| "grad_norm": 0.7767401933670044, |
| "learning_rate": 7.1779661016949155e-06, |
| "loss": 0.0818, |
| "step": 2664 |
| }, |
| { |
| "epoch": 2.8208658811068448, |
| "grad_norm": 0.565762996673584, |
| "learning_rate": 7.176906779661017e-06, |
| "loss": 0.0871, |
| "step": 2665 |
| }, |
| { |
| "epoch": 2.8219250628889183, |
| "grad_norm": 0.28572696447372437, |
| "learning_rate": 7.17584745762712e-06, |
| "loss": 0.0857, |
| "step": 2666 |
| }, |
| { |
| "epoch": 2.822984244670992, |
| "grad_norm": 0.5122365951538086, |
| "learning_rate": 7.174788135593221e-06, |
| "loss": 0.086, |
| "step": 2667 |
| }, |
| { |
| "epoch": 2.824043426453065, |
| "grad_norm": 0.3288061022758484, |
| "learning_rate": 7.173728813559323e-06, |
| "loss": 0.084, |
| "step": 2668 |
| }, |
| { |
| "epoch": 2.8251026082351385, |
| "grad_norm": 0.7172766327857971, |
| "learning_rate": 7.172669491525425e-06, |
| "loss": 0.083, |
| "step": 2669 |
| }, |
| { |
| "epoch": 2.8261617900172116, |
| "grad_norm": 1.8225845098495483, |
| "learning_rate": 7.1716101694915265e-06, |
| "loss": 0.0852, |
| "step": 2670 |
| }, |
| { |
| "epoch": 2.827220971799285, |
| "grad_norm": 0.24879427254199982, |
| "learning_rate": 7.170550847457628e-06, |
| "loss": 0.0858, |
| "step": 2671 |
| }, |
| { |
| "epoch": 2.8282801535813586, |
| "grad_norm": 0.9555388689041138, |
| "learning_rate": 7.169491525423729e-06, |
| "loss": 0.0853, |
| "step": 2672 |
| }, |
| { |
| "epoch": 2.8293393353634317, |
| "grad_norm": 0.2729604244232178, |
| "learning_rate": 7.1684322033898315e-06, |
| "loss": 0.0869, |
| "step": 2673 |
| }, |
| { |
| "epoch": 2.830398517145505, |
| "grad_norm": 0.9544708728790283, |
| "learning_rate": 7.167372881355933e-06, |
| "loss": 0.0859, |
| "step": 2674 |
| }, |
| { |
| "epoch": 2.8314576989275784, |
| "grad_norm": 0.5761927962303162, |
| "learning_rate": 7.166313559322034e-06, |
| "loss": 0.0907, |
| "step": 2675 |
| }, |
| { |
| "epoch": 2.832516880709652, |
| "grad_norm": 0.8042075037956238, |
| "learning_rate": 7.165254237288136e-06, |
| "loss": 0.0879, |
| "step": 2676 |
| }, |
| { |
| "epoch": 2.833576062491725, |
| "grad_norm": 0.42558467388153076, |
| "learning_rate": 7.164194915254238e-06, |
| "loss": 0.087, |
| "step": 2677 |
| }, |
| { |
| "epoch": 2.8346352442737985, |
| "grad_norm": 0.2884480655193329, |
| "learning_rate": 7.1631355932203394e-06, |
| "loss": 0.0878, |
| "step": 2678 |
| }, |
| { |
| "epoch": 2.8356944260558716, |
| "grad_norm": 0.39285093545913696, |
| "learning_rate": 7.162076271186441e-06, |
| "loss": 0.0873, |
| "step": 2679 |
| }, |
| { |
| "epoch": 2.836753607837945, |
| "grad_norm": 0.57151859998703, |
| "learning_rate": 7.161016949152543e-06, |
| "loss": 0.0894, |
| "step": 2680 |
| }, |
| { |
| "epoch": 2.8378127896200187, |
| "grad_norm": 0.35518619418144226, |
| "learning_rate": 7.1599576271186445e-06, |
| "loss": 0.0909, |
| "step": 2681 |
| }, |
| { |
| "epoch": 2.838871971402092, |
| "grad_norm": 0.9182879328727722, |
| "learning_rate": 7.158898305084746e-06, |
| "loss": 0.0894, |
| "step": 2682 |
| }, |
| { |
| "epoch": 2.8399311531841653, |
| "grad_norm": 0.6388287544250488, |
| "learning_rate": 7.157838983050847e-06, |
| "loss": 0.089, |
| "step": 2683 |
| }, |
| { |
| "epoch": 2.8409903349662384, |
| "grad_norm": 0.32887426018714905, |
| "learning_rate": 7.15677966101695e-06, |
| "loss": 0.0872, |
| "step": 2684 |
| }, |
| { |
| "epoch": 2.842049516748312, |
| "grad_norm": 0.2990522086620331, |
| "learning_rate": 7.155720338983051e-06, |
| "loss": 0.0883, |
| "step": 2685 |
| }, |
| { |
| "epoch": 2.8431086985303855, |
| "grad_norm": 1.1725788116455078, |
| "learning_rate": 7.154661016949152e-06, |
| "loss": 0.0874, |
| "step": 2686 |
| }, |
| { |
| "epoch": 2.8441678803124586, |
| "grad_norm": 0.35581767559051514, |
| "learning_rate": 7.1536016949152555e-06, |
| "loss": 0.0873, |
| "step": 2687 |
| }, |
| { |
| "epoch": 2.845227062094532, |
| "grad_norm": 0.308718740940094, |
| "learning_rate": 7.152542372881357e-06, |
| "loss": 0.083, |
| "step": 2688 |
| }, |
| { |
| "epoch": 2.8462862438766052, |
| "grad_norm": 0.42534202337265015, |
| "learning_rate": 7.151483050847458e-06, |
| "loss": 0.0855, |
| "step": 2689 |
| }, |
| { |
| "epoch": 2.8473454256586788, |
| "grad_norm": 0.3667755722999573, |
| "learning_rate": 7.150423728813561e-06, |
| "loss": 0.0841, |
| "step": 2690 |
| }, |
| { |
| "epoch": 2.848404607440752, |
| "grad_norm": 0.9837805032730103, |
| "learning_rate": 7.149364406779662e-06, |
| "loss": 0.0885, |
| "step": 2691 |
| }, |
| { |
| "epoch": 2.8494637892228254, |
| "grad_norm": 0.8265017867088318, |
| "learning_rate": 7.1483050847457634e-06, |
| "loss": 0.0858, |
| "step": 2692 |
| }, |
| { |
| "epoch": 2.8505229710048985, |
| "grad_norm": 0.2696377635002136, |
| "learning_rate": 7.147245762711865e-06, |
| "loss": 0.0828, |
| "step": 2693 |
| }, |
| { |
| "epoch": 2.851582152786972, |
| "grad_norm": 0.7192027568817139, |
| "learning_rate": 7.146186440677967e-06, |
| "loss": 0.0912, |
| "step": 2694 |
| }, |
| { |
| "epoch": 2.8526413345690456, |
| "grad_norm": 0.24310527741909027, |
| "learning_rate": 7.1451271186440685e-06, |
| "loss": 0.0853, |
| "step": 2695 |
| }, |
| { |
| "epoch": 2.8537005163511187, |
| "grad_norm": 0.29763269424438477, |
| "learning_rate": 7.14406779661017e-06, |
| "loss": 0.0861, |
| "step": 2696 |
| }, |
| { |
| "epoch": 2.854759698133192, |
| "grad_norm": 0.28719812631607056, |
| "learning_rate": 7.143008474576272e-06, |
| "loss": 0.0891, |
| "step": 2697 |
| }, |
| { |
| "epoch": 2.8558188799152653, |
| "grad_norm": 0.5448430776596069, |
| "learning_rate": 7.141949152542374e-06, |
| "loss": 0.0884, |
| "step": 2698 |
| }, |
| { |
| "epoch": 2.856878061697339, |
| "grad_norm": 0.4081977903842926, |
| "learning_rate": 7.140889830508475e-06, |
| "loss": 0.0854, |
| "step": 2699 |
| }, |
| { |
| "epoch": 2.8579372434794124, |
| "grad_norm": 0.35061120986938477, |
| "learning_rate": 7.139830508474576e-06, |
| "loss": 0.0885, |
| "step": 2700 |
| }, |
| { |
| "epoch": 2.8589964252614855, |
| "grad_norm": 0.3310065269470215, |
| "learning_rate": 7.138771186440679e-06, |
| "loss": 0.087, |
| "step": 2701 |
| }, |
| { |
| "epoch": 2.860055607043559, |
| "grad_norm": 0.28929078578948975, |
| "learning_rate": 7.13771186440678e-06, |
| "loss": 0.0892, |
| "step": 2702 |
| }, |
| { |
| "epoch": 2.861114788825632, |
| "grad_norm": 0.3346499800682068, |
| "learning_rate": 7.1366525423728815e-06, |
| "loss": 0.0851, |
| "step": 2703 |
| }, |
| { |
| "epoch": 2.8621739706077056, |
| "grad_norm": 0.4067153036594391, |
| "learning_rate": 7.135593220338983e-06, |
| "loss": 0.0848, |
| "step": 2704 |
| }, |
| { |
| "epoch": 2.863233152389779, |
| "grad_norm": 0.20712974667549133, |
| "learning_rate": 7.134533898305085e-06, |
| "loss": 0.0881, |
| "step": 2705 |
| }, |
| { |
| "epoch": 2.8642923341718523, |
| "grad_norm": 0.21080565452575684, |
| "learning_rate": 7.1334745762711866e-06, |
| "loss": 0.0862, |
| "step": 2706 |
| }, |
| { |
| "epoch": 2.8653515159539253, |
| "grad_norm": 0.18893392384052277, |
| "learning_rate": 7.132415254237288e-06, |
| "loss": 0.0802, |
| "step": 2707 |
| }, |
| { |
| "epoch": 2.866410697735999, |
| "grad_norm": 0.21575677394866943, |
| "learning_rate": 7.131355932203391e-06, |
| "loss": 0.086, |
| "step": 2708 |
| }, |
| { |
| "epoch": 2.8674698795180724, |
| "grad_norm": 0.3201296925544739, |
| "learning_rate": 7.1302966101694925e-06, |
| "loss": 0.0865, |
| "step": 2709 |
| }, |
| { |
| "epoch": 2.8685290613001455, |
| "grad_norm": 0.24422788619995117, |
| "learning_rate": 7.129237288135594e-06, |
| "loss": 0.0829, |
| "step": 2710 |
| }, |
| { |
| "epoch": 2.869588243082219, |
| "grad_norm": 0.3589276075363159, |
| "learning_rate": 7.128177966101696e-06, |
| "loss": 0.0846, |
| "step": 2711 |
| }, |
| { |
| "epoch": 2.870647424864292, |
| "grad_norm": 0.41220414638519287, |
| "learning_rate": 7.1271186440677976e-06, |
| "loss": 0.0852, |
| "step": 2712 |
| }, |
| { |
| "epoch": 2.8717066066463657, |
| "grad_norm": 0.3045378029346466, |
| "learning_rate": 7.126059322033899e-06, |
| "loss": 0.0868, |
| "step": 2713 |
| }, |
| { |
| "epoch": 2.872765788428439, |
| "grad_norm": 0.2798013389110565, |
| "learning_rate": 7.125e-06, |
| "loss": 0.0847, |
| "step": 2714 |
| }, |
| { |
| "epoch": 2.8738249702105123, |
| "grad_norm": 0.7903490662574768, |
| "learning_rate": 7.123940677966103e-06, |
| "loss": 0.0884, |
| "step": 2715 |
| }, |
| { |
| "epoch": 2.874884151992586, |
| "grad_norm": 0.23004823923110962, |
| "learning_rate": 7.122881355932204e-06, |
| "loss": 0.089, |
| "step": 2716 |
| }, |
| { |
| "epoch": 2.875943333774659, |
| "grad_norm": 0.21731291711330414, |
| "learning_rate": 7.1218220338983055e-06, |
| "loss": 0.0849, |
| "step": 2717 |
| }, |
| { |
| "epoch": 2.8770025155567325, |
| "grad_norm": 0.2031378597021103, |
| "learning_rate": 7.120762711864408e-06, |
| "loss": 0.0857, |
| "step": 2718 |
| }, |
| { |
| "epoch": 2.878061697338806, |
| "grad_norm": 0.18993328511714935, |
| "learning_rate": 7.119703389830509e-06, |
| "loss": 0.0864, |
| "step": 2719 |
| }, |
| { |
| "epoch": 2.879120879120879, |
| "grad_norm": 0.3438829183578491, |
| "learning_rate": 7.1186440677966106e-06, |
| "loss": 0.0849, |
| "step": 2720 |
| }, |
| { |
| "epoch": 2.880180060902952, |
| "grad_norm": 0.7168260216712952, |
| "learning_rate": 7.117584745762712e-06, |
| "loss": 0.0862, |
| "step": 2721 |
| }, |
| { |
| "epoch": 2.8812392426850257, |
| "grad_norm": 0.32097071409225464, |
| "learning_rate": 7.116525423728814e-06, |
| "loss": 0.0854, |
| "step": 2722 |
| }, |
| { |
| "epoch": 2.8822984244670993, |
| "grad_norm": 0.25940510630607605, |
| "learning_rate": 7.115466101694916e-06, |
| "loss": 0.0849, |
| "step": 2723 |
| }, |
| { |
| "epoch": 2.8833576062491724, |
| "grad_norm": 0.1674995869398117, |
| "learning_rate": 7.114406779661017e-06, |
| "loss": 0.0843, |
| "step": 2724 |
| }, |
| { |
| "epoch": 2.884416788031246, |
| "grad_norm": 0.7793422937393188, |
| "learning_rate": 7.1133474576271185e-06, |
| "loss": 0.0873, |
| "step": 2725 |
| }, |
| { |
| "epoch": 2.885475969813319, |
| "grad_norm": 0.4437791109085083, |
| "learning_rate": 7.112288135593221e-06, |
| "loss": 0.0852, |
| "step": 2726 |
| }, |
| { |
| "epoch": 2.8865351515953925, |
| "grad_norm": 0.20173686742782593, |
| "learning_rate": 7.111228813559322e-06, |
| "loss": 0.0868, |
| "step": 2727 |
| }, |
| { |
| "epoch": 2.887594333377466, |
| "grad_norm": 0.25035056471824646, |
| "learning_rate": 7.1101694915254235e-06, |
| "loss": 0.082, |
| "step": 2728 |
| }, |
| { |
| "epoch": 2.888653515159539, |
| "grad_norm": 0.5662713646888733, |
| "learning_rate": 7.109110169491527e-06, |
| "loss": 0.0843, |
| "step": 2729 |
| }, |
| { |
| "epoch": 2.8897126969416127, |
| "grad_norm": 0.26108983159065247, |
| "learning_rate": 7.108050847457628e-06, |
| "loss": 0.0821, |
| "step": 2730 |
| }, |
| { |
| "epoch": 2.890771878723686, |
| "grad_norm": 0.37050431966781616, |
| "learning_rate": 7.1069915254237295e-06, |
| "loss": 0.0842, |
| "step": 2731 |
| }, |
| { |
| "epoch": 2.8918310605057593, |
| "grad_norm": 0.2342778444290161, |
| "learning_rate": 7.105932203389832e-06, |
| "loss": 0.0866, |
| "step": 2732 |
| }, |
| { |
| "epoch": 2.892890242287833, |
| "grad_norm": 0.47755783796310425, |
| "learning_rate": 7.104872881355933e-06, |
| "loss": 0.0867, |
| "step": 2733 |
| }, |
| { |
| "epoch": 2.893949424069906, |
| "grad_norm": 0.17551547288894653, |
| "learning_rate": 7.1038135593220345e-06, |
| "loss": 0.0862, |
| "step": 2734 |
| }, |
| { |
| "epoch": 2.8950086058519795, |
| "grad_norm": 0.19548620283603668, |
| "learning_rate": 7.102754237288137e-06, |
| "loss": 0.0865, |
| "step": 2735 |
| }, |
| { |
| "epoch": 2.8960677876340526, |
| "grad_norm": 0.21691496670246124, |
| "learning_rate": 7.101694915254238e-06, |
| "loss": 0.0856, |
| "step": 2736 |
| }, |
| { |
| "epoch": 2.897126969416126, |
| "grad_norm": 0.4669831693172455, |
| "learning_rate": 7.10063559322034e-06, |
| "loss": 0.0857, |
| "step": 2737 |
| }, |
| { |
| "epoch": 2.8981861511981997, |
| "grad_norm": 0.31361842155456543, |
| "learning_rate": 7.099576271186441e-06, |
| "loss": 0.0839, |
| "step": 2738 |
| }, |
| { |
| "epoch": 2.8992453329802728, |
| "grad_norm": 0.2279847115278244, |
| "learning_rate": 7.098516949152543e-06, |
| "loss": 0.0846, |
| "step": 2739 |
| }, |
| { |
| "epoch": 2.900304514762346, |
| "grad_norm": 0.2264779955148697, |
| "learning_rate": 7.097457627118645e-06, |
| "loss": 0.0835, |
| "step": 2740 |
| }, |
| { |
| "epoch": 2.9013636965444194, |
| "grad_norm": 0.2287377566099167, |
| "learning_rate": 7.096398305084746e-06, |
| "loss": 0.086, |
| "step": 2741 |
| }, |
| { |
| "epoch": 2.902422878326493, |
| "grad_norm": 0.2082810401916504, |
| "learning_rate": 7.0953389830508475e-06, |
| "loss": 0.0838, |
| "step": 2742 |
| }, |
| { |
| "epoch": 2.903482060108566, |
| "grad_norm": 0.1720813512802124, |
| "learning_rate": 7.09427966101695e-06, |
| "loss": 0.0851, |
| "step": 2743 |
| }, |
| { |
| "epoch": 2.9045412418906396, |
| "grad_norm": 0.223737433552742, |
| "learning_rate": 7.093220338983051e-06, |
| "loss": 0.0867, |
| "step": 2744 |
| }, |
| { |
| "epoch": 2.9056004236727127, |
| "grad_norm": 0.28665691614151, |
| "learning_rate": 7.092161016949153e-06, |
| "loss": 0.0872, |
| "step": 2745 |
| }, |
| { |
| "epoch": 2.906659605454786, |
| "grad_norm": 0.25986793637275696, |
| "learning_rate": 7.091101694915255e-06, |
| "loss": 0.0835, |
| "step": 2746 |
| }, |
| { |
| "epoch": 2.9077187872368597, |
| "grad_norm": 0.28776562213897705, |
| "learning_rate": 7.090042372881356e-06, |
| "loss": 0.0843, |
| "step": 2747 |
| }, |
| { |
| "epoch": 2.908777969018933, |
| "grad_norm": 0.6488966941833496, |
| "learning_rate": 7.088983050847458e-06, |
| "loss": 0.088, |
| "step": 2748 |
| }, |
| { |
| "epoch": 2.9098371508010064, |
| "grad_norm": 0.21989387273788452, |
| "learning_rate": 7.087923728813559e-06, |
| "loss": 0.0811, |
| "step": 2749 |
| }, |
| { |
| "epoch": 2.9108963325830794, |
| "grad_norm": 0.6076532006263733, |
| "learning_rate": 7.086864406779662e-06, |
| "loss": 0.0831, |
| "step": 2750 |
| }, |
| { |
| "epoch": 2.911955514365153, |
| "grad_norm": 0.4277835488319397, |
| "learning_rate": 7.085805084745764e-06, |
| "loss": 0.0866, |
| "step": 2751 |
| }, |
| { |
| "epoch": 2.9130146961472265, |
| "grad_norm": 0.23475177586078644, |
| "learning_rate": 7.084745762711865e-06, |
| "loss": 0.0841, |
| "step": 2752 |
| }, |
| { |
| "epoch": 2.9140738779292996, |
| "grad_norm": 0.3141447901725769, |
| "learning_rate": 7.083686440677967e-06, |
| "loss": 0.0869, |
| "step": 2753 |
| }, |
| { |
| "epoch": 2.9151330597113727, |
| "grad_norm": 0.20060321688652039, |
| "learning_rate": 7.082627118644069e-06, |
| "loss": 0.0907, |
| "step": 2754 |
| }, |
| { |
| "epoch": 2.9161922414934462, |
| "grad_norm": 0.1911022663116455, |
| "learning_rate": 7.08156779661017e-06, |
| "loss": 0.0892, |
| "step": 2755 |
| }, |
| { |
| "epoch": 2.91725142327552, |
| "grad_norm": 0.7516857385635376, |
| "learning_rate": 7.080508474576272e-06, |
| "loss": 0.0818, |
| "step": 2756 |
| }, |
| { |
| "epoch": 2.918310605057593, |
| "grad_norm": 0.2007245123386383, |
| "learning_rate": 7.079449152542374e-06, |
| "loss": 0.0863, |
| "step": 2757 |
| }, |
| { |
| "epoch": 2.9193697868396664, |
| "grad_norm": 0.3953344225883484, |
| "learning_rate": 7.078389830508475e-06, |
| "loss": 0.0874, |
| "step": 2758 |
| }, |
| { |
| "epoch": 2.9204289686217395, |
| "grad_norm": 0.19172687828540802, |
| "learning_rate": 7.077330508474577e-06, |
| "loss": 0.0893, |
| "step": 2759 |
| }, |
| { |
| "epoch": 2.921488150403813, |
| "grad_norm": 0.5571119785308838, |
| "learning_rate": 7.076271186440679e-06, |
| "loss": 0.0847, |
| "step": 2760 |
| }, |
| { |
| "epoch": 2.9225473321858866, |
| "grad_norm": 0.30791357159614563, |
| "learning_rate": 7.07521186440678e-06, |
| "loss": 0.0867, |
| "step": 2761 |
| }, |
| { |
| "epoch": 2.9236065139679597, |
| "grad_norm": 0.2687133252620697, |
| "learning_rate": 7.074152542372882e-06, |
| "loss": 0.0836, |
| "step": 2762 |
| }, |
| { |
| "epoch": 2.924665695750033, |
| "grad_norm": 0.2431352734565735, |
| "learning_rate": 7.073093220338983e-06, |
| "loss": 0.0804, |
| "step": 2763 |
| }, |
| { |
| "epoch": 2.9257248775321063, |
| "grad_norm": 0.18212385475635529, |
| "learning_rate": 7.072033898305085e-06, |
| "loss": 0.0851, |
| "step": 2764 |
| }, |
| { |
| "epoch": 2.92678405931418, |
| "grad_norm": 0.22017893195152283, |
| "learning_rate": 7.070974576271187e-06, |
| "loss": 0.0842, |
| "step": 2765 |
| }, |
| { |
| "epoch": 2.9278432410962534, |
| "grad_norm": 0.34417974948883057, |
| "learning_rate": 7.069915254237288e-06, |
| "loss": 0.0822, |
| "step": 2766 |
| }, |
| { |
| "epoch": 2.9289024228783265, |
| "grad_norm": 0.26310163736343384, |
| "learning_rate": 7.06885593220339e-06, |
| "loss": 0.0808, |
| "step": 2767 |
| }, |
| { |
| "epoch": 2.9299616046603996, |
| "grad_norm": 0.19449247419834137, |
| "learning_rate": 7.067796610169492e-06, |
| "loss": 0.083, |
| "step": 2768 |
| }, |
| { |
| "epoch": 2.931020786442473, |
| "grad_norm": 0.19786173105239868, |
| "learning_rate": 7.066737288135593e-06, |
| "loss": 0.0871, |
| "step": 2769 |
| }, |
| { |
| "epoch": 2.9320799682245466, |
| "grad_norm": 0.19906456768512726, |
| "learning_rate": 7.065677966101695e-06, |
| "loss": 0.0846, |
| "step": 2770 |
| }, |
| { |
| "epoch": 2.9331391500066197, |
| "grad_norm": 0.18128810822963715, |
| "learning_rate": 7.064618644067798e-06, |
| "loss": 0.0857, |
| "step": 2771 |
| }, |
| { |
| "epoch": 2.9341983317886933, |
| "grad_norm": 0.5061360597610474, |
| "learning_rate": 7.063559322033899e-06, |
| "loss": 0.0858, |
| "step": 2772 |
| }, |
| { |
| "epoch": 2.9352575135707664, |
| "grad_norm": 0.8924757242202759, |
| "learning_rate": 7.062500000000001e-06, |
| "loss": 0.0853, |
| "step": 2773 |
| }, |
| { |
| "epoch": 2.93631669535284, |
| "grad_norm": 0.25123855471611023, |
| "learning_rate": 7.061440677966103e-06, |
| "loss": 0.0846, |
| "step": 2774 |
| }, |
| { |
| "epoch": 2.9373758771349134, |
| "grad_norm": 0.2065630555152893, |
| "learning_rate": 7.060381355932204e-06, |
| "loss": 0.0836, |
| "step": 2775 |
| }, |
| { |
| "epoch": 2.9384350589169865, |
| "grad_norm": 0.25200194120407104, |
| "learning_rate": 7.059322033898306e-06, |
| "loss": 0.0836, |
| "step": 2776 |
| }, |
| { |
| "epoch": 2.93949424069906, |
| "grad_norm": 0.22266460955142975, |
| "learning_rate": 7.058262711864408e-06, |
| "loss": 0.0864, |
| "step": 2777 |
| }, |
| { |
| "epoch": 2.940553422481133, |
| "grad_norm": 0.3489341139793396, |
| "learning_rate": 7.057203389830509e-06, |
| "loss": 0.085, |
| "step": 2778 |
| }, |
| { |
| "epoch": 2.9416126042632067, |
| "grad_norm": 0.3063749372959137, |
| "learning_rate": 7.056144067796611e-06, |
| "loss": 0.0877, |
| "step": 2779 |
| }, |
| { |
| "epoch": 2.9426717860452802, |
| "grad_norm": 0.23221157491207123, |
| "learning_rate": 7.055084745762712e-06, |
| "loss": 0.0884, |
| "step": 2780 |
| }, |
| { |
| "epoch": 2.9437309678273533, |
| "grad_norm": 0.248238205909729, |
| "learning_rate": 7.054025423728814e-06, |
| "loss": 0.0828, |
| "step": 2781 |
| }, |
| { |
| "epoch": 2.944790149609427, |
| "grad_norm": 0.20400157570838928, |
| "learning_rate": 7.052966101694916e-06, |
| "loss": 0.0866, |
| "step": 2782 |
| }, |
| { |
| "epoch": 2.9458493313915, |
| "grad_norm": 1.1154899597167969, |
| "learning_rate": 7.051906779661017e-06, |
| "loss": 0.0817, |
| "step": 2783 |
| }, |
| { |
| "epoch": 2.9469085131735735, |
| "grad_norm": 0.17604859173297882, |
| "learning_rate": 7.0508474576271195e-06, |
| "loss": 0.0835, |
| "step": 2784 |
| }, |
| { |
| "epoch": 2.947967694955647, |
| "grad_norm": 0.2628978490829468, |
| "learning_rate": 7.049788135593221e-06, |
| "loss": 0.0831, |
| "step": 2785 |
| }, |
| { |
| "epoch": 2.94902687673772, |
| "grad_norm": 0.36698588728904724, |
| "learning_rate": 7.048728813559322e-06, |
| "loss": 0.0853, |
| "step": 2786 |
| }, |
| { |
| "epoch": 2.950086058519793, |
| "grad_norm": 0.21428033709526062, |
| "learning_rate": 7.047669491525424e-06, |
| "loss": 0.0818, |
| "step": 2787 |
| }, |
| { |
| "epoch": 2.9511452403018668, |
| "grad_norm": 0.2346959263086319, |
| "learning_rate": 7.046610169491526e-06, |
| "loss": 0.0841, |
| "step": 2788 |
| }, |
| { |
| "epoch": 2.9522044220839403, |
| "grad_norm": 0.17298530042171478, |
| "learning_rate": 7.045550847457627e-06, |
| "loss": 0.085, |
| "step": 2789 |
| }, |
| { |
| "epoch": 2.9532636038660134, |
| "grad_norm": 0.2308352142572403, |
| "learning_rate": 7.044491525423729e-06, |
| "loss": 0.0816, |
| "step": 2790 |
| }, |
| { |
| "epoch": 2.954322785648087, |
| "grad_norm": 0.6685366034507751, |
| "learning_rate": 7.04343220338983e-06, |
| "loss": 0.0851, |
| "step": 2791 |
| }, |
| { |
| "epoch": 2.95538196743016, |
| "grad_norm": 0.7078801989555359, |
| "learning_rate": 7.042372881355933e-06, |
| "loss": 0.0836, |
| "step": 2792 |
| }, |
| { |
| "epoch": 2.9564411492122336, |
| "grad_norm": 0.3219440281391144, |
| "learning_rate": 7.041313559322035e-06, |
| "loss": 0.0847, |
| "step": 2793 |
| }, |
| { |
| "epoch": 2.957500330994307, |
| "grad_norm": 0.47597241401672363, |
| "learning_rate": 7.040254237288137e-06, |
| "loss": 0.0822, |
| "step": 2794 |
| }, |
| { |
| "epoch": 2.95855951277638, |
| "grad_norm": 0.26617932319641113, |
| "learning_rate": 7.039194915254238e-06, |
| "loss": 0.0879, |
| "step": 2795 |
| }, |
| { |
| "epoch": 2.9596186945584537, |
| "grad_norm": 0.1694178730249405, |
| "learning_rate": 7.03813559322034e-06, |
| "loss": 0.0842, |
| "step": 2796 |
| }, |
| { |
| "epoch": 2.960677876340527, |
| "grad_norm": 0.568081259727478, |
| "learning_rate": 7.037076271186441e-06, |
| "loss": 0.0825, |
| "step": 2797 |
| }, |
| { |
| "epoch": 2.9617370581226004, |
| "grad_norm": 0.31791046261787415, |
| "learning_rate": 7.0360169491525435e-06, |
| "loss": 0.0831, |
| "step": 2798 |
| }, |
| { |
| "epoch": 2.962796239904674, |
| "grad_norm": 0.44022175669670105, |
| "learning_rate": 7.034957627118645e-06, |
| "loss": 0.0832, |
| "step": 2799 |
| }, |
| { |
| "epoch": 2.963855421686747, |
| "grad_norm": 0.33043715357780457, |
| "learning_rate": 7.033898305084746e-06, |
| "loss": 0.0855, |
| "step": 2800 |
| }, |
| { |
| "epoch": 2.96491460346882, |
| "grad_norm": 0.1991463303565979, |
| "learning_rate": 7.032838983050848e-06, |
| "loss": 0.084, |
| "step": 2801 |
| }, |
| { |
| "epoch": 2.9659737852508936, |
| "grad_norm": 0.2289704978466034, |
| "learning_rate": 7.03177966101695e-06, |
| "loss": 0.0832, |
| "step": 2802 |
| }, |
| { |
| "epoch": 2.967032967032967, |
| "grad_norm": 0.7853215336799622, |
| "learning_rate": 7.030720338983051e-06, |
| "loss": 0.0877, |
| "step": 2803 |
| }, |
| { |
| "epoch": 2.9680921488150402, |
| "grad_norm": 0.1657303422689438, |
| "learning_rate": 7.029661016949153e-06, |
| "loss": 0.0847, |
| "step": 2804 |
| }, |
| { |
| "epoch": 2.969151330597114, |
| "grad_norm": 0.2773841619491577, |
| "learning_rate": 7.028601694915255e-06, |
| "loss": 0.0849, |
| "step": 2805 |
| }, |
| { |
| "epoch": 2.970210512379187, |
| "grad_norm": 0.22976569831371307, |
| "learning_rate": 7.0275423728813564e-06, |
| "loss": 0.0852, |
| "step": 2806 |
| }, |
| { |
| "epoch": 2.9712696941612604, |
| "grad_norm": 1.9576987028121948, |
| "learning_rate": 7.026483050847458e-06, |
| "loss": 0.0868, |
| "step": 2807 |
| }, |
| { |
| "epoch": 2.972328875943334, |
| "grad_norm": 0.8150646686553955, |
| "learning_rate": 7.025423728813559e-06, |
| "loss": 0.0839, |
| "step": 2808 |
| }, |
| { |
| "epoch": 2.973388057725407, |
| "grad_norm": 0.18944455683231354, |
| "learning_rate": 7.0243644067796615e-06, |
| "loss": 0.0835, |
| "step": 2809 |
| }, |
| { |
| "epoch": 2.9744472395074806, |
| "grad_norm": 0.20776700973510742, |
| "learning_rate": 7.023305084745763e-06, |
| "loss": 0.0848, |
| "step": 2810 |
| }, |
| { |
| "epoch": 2.9755064212895537, |
| "grad_norm": 0.2295432835817337, |
| "learning_rate": 7.022245762711864e-06, |
| "loss": 0.086, |
| "step": 2811 |
| }, |
| { |
| "epoch": 2.976565603071627, |
| "grad_norm": 0.3069179058074951, |
| "learning_rate": 7.021186440677967e-06, |
| "loss": 0.0884, |
| "step": 2812 |
| }, |
| { |
| "epoch": 2.9776247848537007, |
| "grad_norm": 0.29059523344039917, |
| "learning_rate": 7.020127118644068e-06, |
| "loss": 0.0862, |
| "step": 2813 |
| }, |
| { |
| "epoch": 2.978683966635774, |
| "grad_norm": 0.528268039226532, |
| "learning_rate": 7.01906779661017e-06, |
| "loss": 0.0857, |
| "step": 2814 |
| }, |
| { |
| "epoch": 2.9797431484178474, |
| "grad_norm": 0.5123224854469299, |
| "learning_rate": 7.0180084745762725e-06, |
| "loss": 0.0858, |
| "step": 2815 |
| }, |
| { |
| "epoch": 2.9808023301999205, |
| "grad_norm": 0.5296421051025391, |
| "learning_rate": 7.016949152542374e-06, |
| "loss": 0.0813, |
| "step": 2816 |
| }, |
| { |
| "epoch": 2.981861511981994, |
| "grad_norm": 0.366913378238678, |
| "learning_rate": 7.015889830508475e-06, |
| "loss": 0.0821, |
| "step": 2817 |
| }, |
| { |
| "epoch": 2.982920693764067, |
| "grad_norm": 1.0378891229629517, |
| "learning_rate": 7.014830508474577e-06, |
| "loss": 0.0842, |
| "step": 2818 |
| }, |
| { |
| "epoch": 2.9839798755461406, |
| "grad_norm": 0.5042109489440918, |
| "learning_rate": 7.013771186440679e-06, |
| "loss": 0.0879, |
| "step": 2819 |
| }, |
| { |
| "epoch": 2.9850390573282137, |
| "grad_norm": 0.23699359595775604, |
| "learning_rate": 7.0127118644067804e-06, |
| "loss": 0.0856, |
| "step": 2820 |
| }, |
| { |
| "epoch": 2.9860982391102873, |
| "grad_norm": 0.24904802441596985, |
| "learning_rate": 7.011652542372882e-06, |
| "loss": 0.0865, |
| "step": 2821 |
| }, |
| { |
| "epoch": 2.987157420892361, |
| "grad_norm": 0.1702445149421692, |
| "learning_rate": 7.010593220338984e-06, |
| "loss": 0.0856, |
| "step": 2822 |
| }, |
| { |
| "epoch": 2.988216602674434, |
| "grad_norm": 0.22881048917770386, |
| "learning_rate": 7.0095338983050855e-06, |
| "loss": 0.0812, |
| "step": 2823 |
| }, |
| { |
| "epoch": 2.9892757844565074, |
| "grad_norm": 0.2885756194591522, |
| "learning_rate": 7.008474576271187e-06, |
| "loss": 0.0854, |
| "step": 2824 |
| }, |
| { |
| "epoch": 2.9903349662385805, |
| "grad_norm": 0.20414619147777557, |
| "learning_rate": 7.007415254237288e-06, |
| "loss": 0.0836, |
| "step": 2825 |
| }, |
| { |
| "epoch": 2.991394148020654, |
| "grad_norm": 0.29608118534088135, |
| "learning_rate": 7.006355932203391e-06, |
| "loss": 0.0852, |
| "step": 2826 |
| }, |
| { |
| "epoch": 2.9924533298027276, |
| "grad_norm": 0.23209474980831146, |
| "learning_rate": 7.005296610169492e-06, |
| "loss": 0.0856, |
| "step": 2827 |
| }, |
| { |
| "epoch": 2.9935125115848007, |
| "grad_norm": 0.46132296323776245, |
| "learning_rate": 7.004237288135593e-06, |
| "loss": 0.0867, |
| "step": 2828 |
| }, |
| { |
| "epoch": 2.9945716933668742, |
| "grad_norm": 0.3404957950115204, |
| "learning_rate": 7.003177966101695e-06, |
| "loss": 0.0802, |
| "step": 2829 |
| }, |
| { |
| "epoch": 2.9956308751489473, |
| "grad_norm": 0.22683490812778473, |
| "learning_rate": 7.002118644067797e-06, |
| "loss": 0.0851, |
| "step": 2830 |
| }, |
| { |
| "epoch": 2.996690056931021, |
| "grad_norm": 0.5285332202911377, |
| "learning_rate": 7.0010593220338985e-06, |
| "loss": 0.0848, |
| "step": 2831 |
| }, |
| { |
| "epoch": 2.9977492387130944, |
| "grad_norm": 0.19259807467460632, |
| "learning_rate": 7e-06, |
| "loss": 0.0876, |
| "step": 2832 |
| }, |
| { |
| "epoch": 2.9977492387130944, |
| "eval_accuracy": 0.9844, |
| "eval_best_f1_from_thresholding": 0.20408163265306123, |
| "eval_loss": 0.13191111385822296, |
| "eval_matthews_corrcoef": 0.1964861252164667, |
| "eval_model_preparation_time": 0.0033, |
| "eval_negative_class_f1": 0.9921228034740457, |
| "eval_negative_class_precision": 0.9926240274830757, |
| "eval_negative_class_recall": 0.9916220853941657, |
| "eval_positive_class_f1": 0.20408163265306123, |
| "eval_positive_class_precision": 0.1941747572815534, |
| "eval_positive_class_recall": 0.21505376344086022, |
| "eval_roc_auc": 0.7971723045831611, |
| "eval_runtime": 20.7342, |
| "eval_samples_per_second": 482.295, |
| "eval_steps_per_second": 7.572, |
| "step": 2832 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 9440, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 10, |
| "save_steps": 944, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 4.0462165606190285e+18, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|