mDC-prior-knowledge / checkpoint-2832 /trainer_state.json
cskokgibbs's picture
Upload all model files
9f83573 verified
{
"best_metric": 0.20408163265306123,
"best_model_checkpoint": "/mnt/home/cskokgibbs/ceph/GLM-Prior-exp/mouse-experiments/no_gene_tf_label_overlaps/mDC/prior_network/checkpoint-2832",
"epoch": 2.9977492387130944,
"eval_steps": 472,
"global_step": 2832,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0010591817820733483,
"grad_norm": 3.6720354557037354,
"learning_rate": 9.998940677966102e-06,
"loss": 0.3896,
"step": 1
},
{
"epoch": 0.0021183635641466966,
"grad_norm": 2.9420688152313232,
"learning_rate": 9.997881355932204e-06,
"loss": 0.3246,
"step": 2
},
{
"epoch": 0.003177545346220045,
"grad_norm": 2.360626459121704,
"learning_rate": 9.996822033898305e-06,
"loss": 0.2778,
"step": 3
},
{
"epoch": 0.004236727128293393,
"grad_norm": 1.8121588230133057,
"learning_rate": 9.995762711864408e-06,
"loss": 0.2364,
"step": 4
},
{
"epoch": 0.005295908910366741,
"grad_norm": 1.351688265800476,
"learning_rate": 9.99470338983051e-06,
"loss": 0.2099,
"step": 5
},
{
"epoch": 0.00635509069244009,
"grad_norm": 0.9306207299232483,
"learning_rate": 9.993644067796611e-06,
"loss": 0.1893,
"step": 6
},
{
"epoch": 0.0074142724745134385,
"grad_norm": 0.5691781044006348,
"learning_rate": 9.992584745762712e-06,
"loss": 0.1759,
"step": 7
},
{
"epoch": 0.008473454256586787,
"grad_norm": 0.274517685174942,
"learning_rate": 9.991525423728814e-06,
"loss": 0.1693,
"step": 8
},
{
"epoch": 0.009532636038660135,
"grad_norm": 0.08555290102958679,
"learning_rate": 9.990466101694915e-06,
"loss": 0.167,
"step": 9
},
{
"epoch": 0.010591817820733483,
"grad_norm": 0.1536484807729721,
"learning_rate": 9.989406779661017e-06,
"loss": 0.168,
"step": 10
},
{
"epoch": 0.011650999602806831,
"grad_norm": 0.24952290952205658,
"learning_rate": 9.98834745762712e-06,
"loss": 0.1688,
"step": 11
},
{
"epoch": 0.01271018138488018,
"grad_norm": 0.30534666776657104,
"learning_rate": 9.987288135593221e-06,
"loss": 0.1706,
"step": 12
},
{
"epoch": 0.013769363166953529,
"grad_norm": 0.32023343443870544,
"learning_rate": 9.986228813559323e-06,
"loss": 0.1698,
"step": 13
},
{
"epoch": 0.014828544949026877,
"grad_norm": 0.2840920686721802,
"learning_rate": 9.985169491525426e-06,
"loss": 0.1688,
"step": 14
},
{
"epoch": 0.015887726731100223,
"grad_norm": 0.2063327431678772,
"learning_rate": 9.984110169491527e-06,
"loss": 0.1678,
"step": 15
},
{
"epoch": 0.016946908513173573,
"grad_norm": 0.09346629679203033,
"learning_rate": 9.983050847457628e-06,
"loss": 0.167,
"step": 16
},
{
"epoch": 0.018006090295246923,
"grad_norm": 0.09686527401208878,
"learning_rate": 9.98199152542373e-06,
"loss": 0.1671,
"step": 17
},
{
"epoch": 0.01906527207732027,
"grad_norm": 0.1982012391090393,
"learning_rate": 9.980932203389831e-06,
"loss": 0.1678,
"step": 18
},
{
"epoch": 0.02012445385939362,
"grad_norm": 0.251574844121933,
"learning_rate": 9.979872881355933e-06,
"loss": 0.1678,
"step": 19
},
{
"epoch": 0.021183635641466966,
"grad_norm": 0.21433360874652863,
"learning_rate": 9.978813559322034e-06,
"loss": 0.1671,
"step": 20
},
{
"epoch": 0.022242817423540315,
"grad_norm": 0.14189667999744415,
"learning_rate": 9.977754237288137e-06,
"loss": 0.1664,
"step": 21
},
{
"epoch": 0.023301999205613662,
"grad_norm": 0.07194820791482925,
"learning_rate": 9.976694915254239e-06,
"loss": 0.1664,
"step": 22
},
{
"epoch": 0.02436118098768701,
"grad_norm": 0.04847427085042,
"learning_rate": 9.97563559322034e-06,
"loss": 0.1655,
"step": 23
},
{
"epoch": 0.02542036276976036,
"grad_norm": 0.08483133465051651,
"learning_rate": 9.974576271186441e-06,
"loss": 0.1657,
"step": 24
},
{
"epoch": 0.026479544551833708,
"grad_norm": 0.11271404474973679,
"learning_rate": 9.973516949152543e-06,
"loss": 0.1661,
"step": 25
},
{
"epoch": 0.027538726333907058,
"grad_norm": 0.1302892565727234,
"learning_rate": 9.972457627118644e-06,
"loss": 0.1659,
"step": 26
},
{
"epoch": 0.028597908115980404,
"grad_norm": 0.12819303572177887,
"learning_rate": 9.971398305084746e-06,
"loss": 0.1657,
"step": 27
},
{
"epoch": 0.029657089898053754,
"grad_norm": 0.10965385288000107,
"learning_rate": 9.970338983050847e-06,
"loss": 0.1658,
"step": 28
},
{
"epoch": 0.0307162716801271,
"grad_norm": 0.07511338591575623,
"learning_rate": 9.96927966101695e-06,
"loss": 0.1652,
"step": 29
},
{
"epoch": 0.03177545346220045,
"grad_norm": 0.04806717112660408,
"learning_rate": 9.968220338983052e-06,
"loss": 0.1647,
"step": 30
},
{
"epoch": 0.0328346352442738,
"grad_norm": 0.06465736776590347,
"learning_rate": 9.967161016949153e-06,
"loss": 0.1644,
"step": 31
},
{
"epoch": 0.033893817026347146,
"grad_norm": 0.10100921988487244,
"learning_rate": 9.966101694915256e-06,
"loss": 0.1652,
"step": 32
},
{
"epoch": 0.034952998808420496,
"grad_norm": 0.11376876384019852,
"learning_rate": 9.965042372881358e-06,
"loss": 0.1642,
"step": 33
},
{
"epoch": 0.036012180590493846,
"grad_norm": 0.09480928629636765,
"learning_rate": 9.963983050847459e-06,
"loss": 0.1638,
"step": 34
},
{
"epoch": 0.03707136237256719,
"grad_norm": 0.07092452049255371,
"learning_rate": 9.96292372881356e-06,
"loss": 0.1636,
"step": 35
},
{
"epoch": 0.03813054415464054,
"grad_norm": 0.049816809594631195,
"learning_rate": 9.961864406779662e-06,
"loss": 0.1639,
"step": 36
},
{
"epoch": 0.03918972593671389,
"grad_norm": 0.0559360608458519,
"learning_rate": 9.960805084745763e-06,
"loss": 0.1634,
"step": 37
},
{
"epoch": 0.04024890771878724,
"grad_norm": 0.06750863790512085,
"learning_rate": 9.959745762711866e-06,
"loss": 0.1632,
"step": 38
},
{
"epoch": 0.04130808950086059,
"grad_norm": 0.09359966218471527,
"learning_rate": 9.958686440677968e-06,
"loss": 0.162,
"step": 39
},
{
"epoch": 0.04236727128293393,
"grad_norm": 0.10011648386716843,
"learning_rate": 9.957627118644069e-06,
"loss": 0.1621,
"step": 40
},
{
"epoch": 0.04342645306500728,
"grad_norm": 0.06335251033306122,
"learning_rate": 9.95656779661017e-06,
"loss": 0.1626,
"step": 41
},
{
"epoch": 0.04448563484708063,
"grad_norm": 0.0592561773955822,
"learning_rate": 9.955508474576272e-06,
"loss": 0.1619,
"step": 42
},
{
"epoch": 0.04554481662915398,
"grad_norm": 0.08311517536640167,
"learning_rate": 9.954449152542373e-06,
"loss": 0.163,
"step": 43
},
{
"epoch": 0.046603998411227324,
"grad_norm": 0.10263793170452118,
"learning_rate": 9.953389830508475e-06,
"loss": 0.1625,
"step": 44
},
{
"epoch": 0.047663180193300674,
"grad_norm": 0.10545139759778976,
"learning_rate": 9.952330508474576e-06,
"loss": 0.1618,
"step": 45
},
{
"epoch": 0.04872236197537402,
"grad_norm": 0.0653265044093132,
"learning_rate": 9.95127118644068e-06,
"loss": 0.1606,
"step": 46
},
{
"epoch": 0.04978154375744737,
"grad_norm": 0.0890883207321167,
"learning_rate": 9.95021186440678e-06,
"loss": 0.1611,
"step": 47
},
{
"epoch": 0.05084072553952072,
"grad_norm": 0.08528076112270355,
"learning_rate": 9.949152542372882e-06,
"loss": 0.1612,
"step": 48
},
{
"epoch": 0.051899907321594066,
"grad_norm": 0.0719696655869484,
"learning_rate": 9.948093220338983e-06,
"loss": 0.1601,
"step": 49
},
{
"epoch": 0.052959089103667416,
"grad_norm": 0.09042180329561234,
"learning_rate": 9.947033898305085e-06,
"loss": 0.1604,
"step": 50
},
{
"epoch": 0.054018270885740766,
"grad_norm": 0.08847080916166306,
"learning_rate": 9.945974576271186e-06,
"loss": 0.1578,
"step": 51
},
{
"epoch": 0.055077452667814115,
"grad_norm": 0.10733941942453384,
"learning_rate": 9.944915254237288e-06,
"loss": 0.16,
"step": 52
},
{
"epoch": 0.056136634449887465,
"grad_norm": 0.09904839843511581,
"learning_rate": 9.94385593220339e-06,
"loss": 0.1591,
"step": 53
},
{
"epoch": 0.05719581623196081,
"grad_norm": 0.11285366863012314,
"learning_rate": 9.942796610169492e-06,
"loss": 0.1592,
"step": 54
},
{
"epoch": 0.05825499801403416,
"grad_norm": 0.11618144810199738,
"learning_rate": 9.941737288135594e-06,
"loss": 0.1583,
"step": 55
},
{
"epoch": 0.05931417979610751,
"grad_norm": 0.10348143428564072,
"learning_rate": 9.940677966101697e-06,
"loss": 0.1605,
"step": 56
},
{
"epoch": 0.06037336157818086,
"grad_norm": 0.11249449104070663,
"learning_rate": 9.939618644067798e-06,
"loss": 0.1586,
"step": 57
},
{
"epoch": 0.0614325433602542,
"grad_norm": 0.0900573581457138,
"learning_rate": 9.9385593220339e-06,
"loss": 0.1593,
"step": 58
},
{
"epoch": 0.06249172514232755,
"grad_norm": 0.10308244079351425,
"learning_rate": 9.937500000000001e-06,
"loss": 0.1587,
"step": 59
},
{
"epoch": 0.0635509069244009,
"grad_norm": 0.11713043600320816,
"learning_rate": 9.936440677966102e-06,
"loss": 0.1558,
"step": 60
},
{
"epoch": 0.06461008870647425,
"grad_norm": 0.13361400365829468,
"learning_rate": 9.935381355932204e-06,
"loss": 0.1579,
"step": 61
},
{
"epoch": 0.0656692704885476,
"grad_norm": 0.11023016273975372,
"learning_rate": 9.934322033898305e-06,
"loss": 0.1564,
"step": 62
},
{
"epoch": 0.06672845227062095,
"grad_norm": 0.10673161596059799,
"learning_rate": 9.933262711864408e-06,
"loss": 0.156,
"step": 63
},
{
"epoch": 0.06778763405269429,
"grad_norm": 0.12919782102108002,
"learning_rate": 9.93220338983051e-06,
"loss": 0.155,
"step": 64
},
{
"epoch": 0.06884681583476764,
"grad_norm": 0.10146741569042206,
"learning_rate": 9.931144067796611e-06,
"loss": 0.1564,
"step": 65
},
{
"epoch": 0.06990599761684099,
"grad_norm": 0.10749489068984985,
"learning_rate": 9.930084745762713e-06,
"loss": 0.1561,
"step": 66
},
{
"epoch": 0.07096517939891434,
"grad_norm": 0.1070689857006073,
"learning_rate": 9.929025423728814e-06,
"loss": 0.1542,
"step": 67
},
{
"epoch": 0.07202436118098769,
"grad_norm": 0.11663827300071716,
"learning_rate": 9.927966101694915e-06,
"loss": 0.1544,
"step": 68
},
{
"epoch": 0.07308354296306104,
"grad_norm": 0.10666878521442413,
"learning_rate": 9.926906779661017e-06,
"loss": 0.155,
"step": 69
},
{
"epoch": 0.07414272474513438,
"grad_norm": 0.12409048527479172,
"learning_rate": 9.92584745762712e-06,
"loss": 0.1533,
"step": 70
},
{
"epoch": 0.07520190652720773,
"grad_norm": 0.14574819803237915,
"learning_rate": 9.924788135593221e-06,
"loss": 0.1536,
"step": 71
},
{
"epoch": 0.07626108830928108,
"grad_norm": 0.15090584754943848,
"learning_rate": 9.923728813559323e-06,
"loss": 0.1538,
"step": 72
},
{
"epoch": 0.07732027009135443,
"grad_norm": 0.1653479039669037,
"learning_rate": 9.922669491525424e-06,
"loss": 0.1526,
"step": 73
},
{
"epoch": 0.07837945187342778,
"grad_norm": 0.17721739411354065,
"learning_rate": 9.921610169491527e-06,
"loss": 0.1527,
"step": 74
},
{
"epoch": 0.07943863365550112,
"grad_norm": 0.15513458847999573,
"learning_rate": 9.920550847457629e-06,
"loss": 0.1555,
"step": 75
},
{
"epoch": 0.08049781543757448,
"grad_norm": 0.149288609623909,
"learning_rate": 9.91949152542373e-06,
"loss": 0.153,
"step": 76
},
{
"epoch": 0.08155699721964782,
"grad_norm": 0.14205437898635864,
"learning_rate": 9.918432203389831e-06,
"loss": 0.1519,
"step": 77
},
{
"epoch": 0.08261617900172118,
"grad_norm": 0.14078938961029053,
"learning_rate": 9.917372881355933e-06,
"loss": 0.1488,
"step": 78
},
{
"epoch": 0.08367536078379452,
"grad_norm": 0.18282672762870789,
"learning_rate": 9.916313559322034e-06,
"loss": 0.1517,
"step": 79
},
{
"epoch": 0.08473454256586786,
"grad_norm": 0.1841512769460678,
"learning_rate": 9.915254237288137e-06,
"loss": 0.1511,
"step": 80
},
{
"epoch": 0.08579372434794122,
"grad_norm": 0.14817704260349274,
"learning_rate": 9.914194915254239e-06,
"loss": 0.1528,
"step": 81
},
{
"epoch": 0.08685290613001456,
"grad_norm": 0.1767425686120987,
"learning_rate": 9.91313559322034e-06,
"loss": 0.15,
"step": 82
},
{
"epoch": 0.08791208791208792,
"grad_norm": 0.20340079069137573,
"learning_rate": 9.912076271186442e-06,
"loss": 0.146,
"step": 83
},
{
"epoch": 0.08897126969416126,
"grad_norm": 0.19862839579582214,
"learning_rate": 9.911016949152543e-06,
"loss": 0.1492,
"step": 84
},
{
"epoch": 0.0900304514762346,
"grad_norm": 0.2320733219385147,
"learning_rate": 9.909957627118644e-06,
"loss": 0.1501,
"step": 85
},
{
"epoch": 0.09108963325830796,
"grad_norm": 0.229786679148674,
"learning_rate": 9.908898305084746e-06,
"loss": 0.1503,
"step": 86
},
{
"epoch": 0.0921488150403813,
"grad_norm": 0.17259404063224792,
"learning_rate": 9.907838983050849e-06,
"loss": 0.1497,
"step": 87
},
{
"epoch": 0.09320799682245465,
"grad_norm": 0.20388969779014587,
"learning_rate": 9.90677966101695e-06,
"loss": 0.151,
"step": 88
},
{
"epoch": 0.094267178604528,
"grad_norm": 0.19713272154331207,
"learning_rate": 9.905720338983052e-06,
"loss": 0.1507,
"step": 89
},
{
"epoch": 0.09532636038660135,
"grad_norm": 0.19459038972854614,
"learning_rate": 9.904661016949153e-06,
"loss": 0.1478,
"step": 90
},
{
"epoch": 0.0963855421686747,
"grad_norm": 0.19698107242584229,
"learning_rate": 9.903601694915255e-06,
"loss": 0.15,
"step": 91
},
{
"epoch": 0.09744472395074805,
"grad_norm": 0.16646313667297363,
"learning_rate": 9.902542372881356e-06,
"loss": 0.1512,
"step": 92
},
{
"epoch": 0.09850390573282139,
"grad_norm": 0.15648747980594635,
"learning_rate": 9.901483050847457e-06,
"loss": 0.1482,
"step": 93
},
{
"epoch": 0.09956308751489475,
"grad_norm": 0.2563489079475403,
"learning_rate": 9.900423728813559e-06,
"loss": 0.1487,
"step": 94
},
{
"epoch": 0.10062226929696809,
"grad_norm": 0.21389760076999664,
"learning_rate": 9.899364406779662e-06,
"loss": 0.146,
"step": 95
},
{
"epoch": 0.10168145107904145,
"grad_norm": 0.23555664718151093,
"learning_rate": 9.898305084745763e-06,
"loss": 0.1498,
"step": 96
},
{
"epoch": 0.10274063286111479,
"grad_norm": 0.21392253041267395,
"learning_rate": 9.897245762711866e-06,
"loss": 0.1487,
"step": 97
},
{
"epoch": 0.10379981464318813,
"grad_norm": 0.2690548896789551,
"learning_rate": 9.896186440677968e-06,
"loss": 0.1479,
"step": 98
},
{
"epoch": 0.10485899642526149,
"grad_norm": 0.2211674153804779,
"learning_rate": 9.89512711864407e-06,
"loss": 0.1457,
"step": 99
},
{
"epoch": 0.10591817820733483,
"grad_norm": 0.2760964632034302,
"learning_rate": 9.89406779661017e-06,
"loss": 0.1483,
"step": 100
},
{
"epoch": 0.10697735998940819,
"grad_norm": 0.22649763524532318,
"learning_rate": 9.893008474576272e-06,
"loss": 0.1466,
"step": 101
},
{
"epoch": 0.10803654177148153,
"grad_norm": 0.2080029994249344,
"learning_rate": 9.891949152542374e-06,
"loss": 0.1461,
"step": 102
},
{
"epoch": 0.10909572355355487,
"grad_norm": 0.31648021936416626,
"learning_rate": 9.890889830508475e-06,
"loss": 0.1435,
"step": 103
},
{
"epoch": 0.11015490533562823,
"grad_norm": 0.26235735416412354,
"learning_rate": 9.889830508474576e-06,
"loss": 0.1415,
"step": 104
},
{
"epoch": 0.11121408711770157,
"grad_norm": 0.32502713799476624,
"learning_rate": 9.88877118644068e-06,
"loss": 0.146,
"step": 105
},
{
"epoch": 0.11227326889977493,
"grad_norm": 0.28630173206329346,
"learning_rate": 9.887711864406781e-06,
"loss": 0.1483,
"step": 106
},
{
"epoch": 0.11333245068184827,
"grad_norm": 0.21586979925632477,
"learning_rate": 9.886652542372882e-06,
"loss": 0.1447,
"step": 107
},
{
"epoch": 0.11439163246392162,
"grad_norm": 0.328690767288208,
"learning_rate": 9.885593220338984e-06,
"loss": 0.1414,
"step": 108
},
{
"epoch": 0.11545081424599497,
"grad_norm": 0.20366932451725006,
"learning_rate": 9.884533898305085e-06,
"loss": 0.1436,
"step": 109
},
{
"epoch": 0.11650999602806832,
"grad_norm": 0.26369708776474,
"learning_rate": 9.883474576271186e-06,
"loss": 0.1445,
"step": 110
},
{
"epoch": 0.11756917781014166,
"grad_norm": 0.31383395195007324,
"learning_rate": 9.882415254237288e-06,
"loss": 0.1424,
"step": 111
},
{
"epoch": 0.11862835959221502,
"grad_norm": 0.251658171415329,
"learning_rate": 9.881355932203391e-06,
"loss": 0.1417,
"step": 112
},
{
"epoch": 0.11968754137428836,
"grad_norm": 0.3859596252441406,
"learning_rate": 9.880296610169492e-06,
"loss": 0.1425,
"step": 113
},
{
"epoch": 0.12074672315636172,
"grad_norm": 0.3530981242656708,
"learning_rate": 9.879237288135594e-06,
"loss": 0.1424,
"step": 114
},
{
"epoch": 0.12180590493843506,
"grad_norm": 0.45179763436317444,
"learning_rate": 9.878177966101695e-06,
"loss": 0.1427,
"step": 115
},
{
"epoch": 0.1228650867205084,
"grad_norm": 0.47741344571113586,
"learning_rate": 9.877118644067798e-06,
"loss": 0.1423,
"step": 116
},
{
"epoch": 0.12392426850258176,
"grad_norm": 0.31315502524375916,
"learning_rate": 9.8760593220339e-06,
"loss": 0.1394,
"step": 117
},
{
"epoch": 0.1249834502846551,
"grad_norm": 0.4611978530883789,
"learning_rate": 9.875000000000001e-06,
"loss": 0.1443,
"step": 118
},
{
"epoch": 0.12604263206672844,
"grad_norm": 0.2832074463367462,
"learning_rate": 9.873940677966103e-06,
"loss": 0.1426,
"step": 119
},
{
"epoch": 0.1271018138488018,
"grad_norm": 0.4847926199436188,
"learning_rate": 9.872881355932204e-06,
"loss": 0.1439,
"step": 120
},
{
"epoch": 0.12816099563087516,
"grad_norm": 0.4041496515274048,
"learning_rate": 9.871822033898305e-06,
"loss": 0.1422,
"step": 121
},
{
"epoch": 0.1292201774129485,
"grad_norm": 0.27493688464164734,
"learning_rate": 9.870762711864409e-06,
"loss": 0.1415,
"step": 122
},
{
"epoch": 0.13027935919502184,
"grad_norm": 0.4400162994861603,
"learning_rate": 9.86970338983051e-06,
"loss": 0.1401,
"step": 123
},
{
"epoch": 0.1313385409770952,
"grad_norm": 0.4497718811035156,
"learning_rate": 9.868644067796611e-06,
"loss": 0.1421,
"step": 124
},
{
"epoch": 0.13239772275916853,
"grad_norm": 0.25661447644233704,
"learning_rate": 9.867584745762713e-06,
"loss": 0.138,
"step": 125
},
{
"epoch": 0.1334569045412419,
"grad_norm": 0.29652678966522217,
"learning_rate": 9.866525423728814e-06,
"loss": 0.136,
"step": 126
},
{
"epoch": 0.13451608632331524,
"grad_norm": 0.33211785554885864,
"learning_rate": 9.865466101694916e-06,
"loss": 0.1404,
"step": 127
},
{
"epoch": 0.13557526810538859,
"grad_norm": 0.4695075452327728,
"learning_rate": 9.864406779661017e-06,
"loss": 0.1416,
"step": 128
},
{
"epoch": 0.13663444988746193,
"grad_norm": 0.29075145721435547,
"learning_rate": 9.86334745762712e-06,
"loss": 0.1375,
"step": 129
},
{
"epoch": 0.13769363166953527,
"grad_norm": 0.4561719596385956,
"learning_rate": 9.862288135593221e-06,
"loss": 0.1409,
"step": 130
},
{
"epoch": 0.13875281345160864,
"grad_norm": 0.3260703980922699,
"learning_rate": 9.861228813559323e-06,
"loss": 0.1358,
"step": 131
},
{
"epoch": 0.13981199523368198,
"grad_norm": 0.6748884320259094,
"learning_rate": 9.860169491525424e-06,
"loss": 0.1408,
"step": 132
},
{
"epoch": 0.14087117701575533,
"grad_norm": 0.26813751459121704,
"learning_rate": 9.859110169491526e-06,
"loss": 0.1384,
"step": 133
},
{
"epoch": 0.14193035879782867,
"grad_norm": 0.34277114272117615,
"learning_rate": 9.858050847457627e-06,
"loss": 0.1379,
"step": 134
},
{
"epoch": 0.142989540579902,
"grad_norm": 0.3976724147796631,
"learning_rate": 9.856991525423729e-06,
"loss": 0.1409,
"step": 135
},
{
"epoch": 0.14404872236197538,
"grad_norm": 0.2871648669242859,
"learning_rate": 9.855932203389832e-06,
"loss": 0.137,
"step": 136
},
{
"epoch": 0.14510790414404873,
"grad_norm": 0.48152652382850647,
"learning_rate": 9.854872881355933e-06,
"loss": 0.1373,
"step": 137
},
{
"epoch": 0.14616708592612207,
"grad_norm": 0.32819676399230957,
"learning_rate": 9.853813559322034e-06,
"loss": 0.1322,
"step": 138
},
{
"epoch": 0.1472262677081954,
"grad_norm": 0.34722280502319336,
"learning_rate": 9.852754237288138e-06,
"loss": 0.1369,
"step": 139
},
{
"epoch": 0.14828544949026876,
"grad_norm": 0.47408348321914673,
"learning_rate": 9.851694915254239e-06,
"loss": 0.1377,
"step": 140
},
{
"epoch": 0.14934463127234213,
"grad_norm": 0.3838208317756653,
"learning_rate": 9.85063559322034e-06,
"loss": 0.1389,
"step": 141
},
{
"epoch": 0.15040381305441547,
"grad_norm": 0.39561015367507935,
"learning_rate": 9.849576271186442e-06,
"loss": 0.1377,
"step": 142
},
{
"epoch": 0.1514629948364888,
"grad_norm": 0.7962619066238403,
"learning_rate": 9.848516949152543e-06,
"loss": 0.1373,
"step": 143
},
{
"epoch": 0.15252217661856216,
"grad_norm": 0.3441040813922882,
"learning_rate": 9.847457627118645e-06,
"loss": 0.1349,
"step": 144
},
{
"epoch": 0.1535813584006355,
"grad_norm": 0.47094985842704773,
"learning_rate": 9.846398305084746e-06,
"loss": 0.1361,
"step": 145
},
{
"epoch": 0.15464054018270887,
"grad_norm": 0.6072841882705688,
"learning_rate": 9.845338983050849e-06,
"loss": 0.1322,
"step": 146
},
{
"epoch": 0.1556997219647822,
"grad_norm": 0.3189762830734253,
"learning_rate": 9.84427966101695e-06,
"loss": 0.1342,
"step": 147
},
{
"epoch": 0.15675890374685555,
"grad_norm": 0.9441516995429993,
"learning_rate": 9.843220338983052e-06,
"loss": 0.1373,
"step": 148
},
{
"epoch": 0.1578180855289289,
"grad_norm": 0.634678065776825,
"learning_rate": 9.842161016949153e-06,
"loss": 0.1372,
"step": 149
},
{
"epoch": 0.15887726731100224,
"grad_norm": 0.6699573993682861,
"learning_rate": 9.841101694915255e-06,
"loss": 0.1389,
"step": 150
},
{
"epoch": 0.1599364490930756,
"grad_norm": 0.8605893850326538,
"learning_rate": 9.840042372881356e-06,
"loss": 0.1362,
"step": 151
},
{
"epoch": 0.16099563087514895,
"grad_norm": 0.3965975344181061,
"learning_rate": 9.838983050847458e-06,
"loss": 0.1315,
"step": 152
},
{
"epoch": 0.1620548126572223,
"grad_norm": 1.6671831607818604,
"learning_rate": 9.837923728813559e-06,
"loss": 0.1449,
"step": 153
},
{
"epoch": 0.16311399443929564,
"grad_norm": 1.1250773668289185,
"learning_rate": 9.836864406779662e-06,
"loss": 0.1388,
"step": 154
},
{
"epoch": 0.16417317622136898,
"grad_norm": 0.45315343141555786,
"learning_rate": 9.835805084745764e-06,
"loss": 0.1368,
"step": 155
},
{
"epoch": 0.16523235800344235,
"grad_norm": 0.6775908470153809,
"learning_rate": 9.834745762711865e-06,
"loss": 0.1387,
"step": 156
},
{
"epoch": 0.1662915397855157,
"grad_norm": 0.595596194267273,
"learning_rate": 9.833686440677966e-06,
"loss": 0.1403,
"step": 157
},
{
"epoch": 0.16735072156758904,
"grad_norm": 0.39271387457847595,
"learning_rate": 9.832627118644068e-06,
"loss": 0.1377,
"step": 158
},
{
"epoch": 0.16840990334966238,
"grad_norm": 0.6346777677536011,
"learning_rate": 9.831567796610171e-06,
"loss": 0.1405,
"step": 159
},
{
"epoch": 0.16946908513173572,
"grad_norm": 0.854393482208252,
"learning_rate": 9.830508474576272e-06,
"loss": 0.1395,
"step": 160
},
{
"epoch": 0.1705282669138091,
"grad_norm": 0.3409649431705475,
"learning_rate": 9.829449152542374e-06,
"loss": 0.1351,
"step": 161
},
{
"epoch": 0.17158744869588244,
"grad_norm": 0.5128641724586487,
"learning_rate": 9.828389830508475e-06,
"loss": 0.1384,
"step": 162
},
{
"epoch": 0.17264663047795578,
"grad_norm": 0.6055179834365845,
"learning_rate": 9.827330508474578e-06,
"loss": 0.1356,
"step": 163
},
{
"epoch": 0.17370581226002912,
"grad_norm": 0.3544069230556488,
"learning_rate": 9.82627118644068e-06,
"loss": 0.1302,
"step": 164
},
{
"epoch": 0.17476499404210247,
"grad_norm": 0.4915960729122162,
"learning_rate": 9.825211864406781e-06,
"loss": 0.1351,
"step": 165
},
{
"epoch": 0.17582417582417584,
"grad_norm": 0.7743620872497559,
"learning_rate": 9.824152542372882e-06,
"loss": 0.1345,
"step": 166
},
{
"epoch": 0.17688335760624918,
"grad_norm": 0.5948208570480347,
"learning_rate": 9.823093220338984e-06,
"loss": 0.1372,
"step": 167
},
{
"epoch": 0.17794253938832252,
"grad_norm": 0.37300121784210205,
"learning_rate": 9.822033898305085e-06,
"loss": 0.1335,
"step": 168
},
{
"epoch": 0.17900172117039587,
"grad_norm": 0.5812225341796875,
"learning_rate": 9.820974576271187e-06,
"loss": 0.1355,
"step": 169
},
{
"epoch": 0.1800609029524692,
"grad_norm": 0.6437628269195557,
"learning_rate": 9.819915254237288e-06,
"loss": 0.1352,
"step": 170
},
{
"epoch": 0.18112008473454255,
"grad_norm": 0.3454924523830414,
"learning_rate": 9.818855932203391e-06,
"loss": 0.1355,
"step": 171
},
{
"epoch": 0.18217926651661592,
"grad_norm": 0.656635582447052,
"learning_rate": 9.817796610169493e-06,
"loss": 0.1349,
"step": 172
},
{
"epoch": 0.18323844829868927,
"grad_norm": 0.6227939128875732,
"learning_rate": 9.816737288135594e-06,
"loss": 0.1335,
"step": 173
},
{
"epoch": 0.1842976300807626,
"grad_norm": 0.3746066689491272,
"learning_rate": 9.815677966101695e-06,
"loss": 0.1329,
"step": 174
},
{
"epoch": 0.18535681186283595,
"grad_norm": 0.41495418548583984,
"learning_rate": 9.814618644067797e-06,
"loss": 0.131,
"step": 175
},
{
"epoch": 0.1864159936449093,
"grad_norm": 0.3814808130264282,
"learning_rate": 9.813559322033898e-06,
"loss": 0.1304,
"step": 176
},
{
"epoch": 0.18747517542698267,
"grad_norm": 0.46599066257476807,
"learning_rate": 9.8125e-06,
"loss": 0.13,
"step": 177
},
{
"epoch": 0.188534357209056,
"grad_norm": 0.4790410101413727,
"learning_rate": 9.811440677966103e-06,
"loss": 0.1276,
"step": 178
},
{
"epoch": 0.18959353899112935,
"grad_norm": 0.44499969482421875,
"learning_rate": 9.810381355932204e-06,
"loss": 0.1277,
"step": 179
},
{
"epoch": 0.1906527207732027,
"grad_norm": 0.4200044870376587,
"learning_rate": 9.809322033898306e-06,
"loss": 0.1261,
"step": 180
},
{
"epoch": 0.19171190255527604,
"grad_norm": 0.6261917352676392,
"learning_rate": 9.808262711864409e-06,
"loss": 0.1338,
"step": 181
},
{
"epoch": 0.1927710843373494,
"grad_norm": 0.5882927179336548,
"learning_rate": 9.80720338983051e-06,
"loss": 0.1304,
"step": 182
},
{
"epoch": 0.19383026611942275,
"grad_norm": 0.36154767870903015,
"learning_rate": 9.806144067796612e-06,
"loss": 0.1306,
"step": 183
},
{
"epoch": 0.1948894479014961,
"grad_norm": 0.5511684417724609,
"learning_rate": 9.805084745762713e-06,
"loss": 0.1287,
"step": 184
},
{
"epoch": 0.19594862968356944,
"grad_norm": 0.45902204513549805,
"learning_rate": 9.804025423728814e-06,
"loss": 0.1282,
"step": 185
},
{
"epoch": 0.19700781146564278,
"grad_norm": 0.42389318346977234,
"learning_rate": 9.802966101694916e-06,
"loss": 0.1276,
"step": 186
},
{
"epoch": 0.19806699324771615,
"grad_norm": 1.0849132537841797,
"learning_rate": 9.801906779661017e-06,
"loss": 0.1324,
"step": 187
},
{
"epoch": 0.1991261750297895,
"grad_norm": 0.4857287108898163,
"learning_rate": 9.80084745762712e-06,
"loss": 0.1322,
"step": 188
},
{
"epoch": 0.20018535681186284,
"grad_norm": 0.45011675357818604,
"learning_rate": 9.799788135593222e-06,
"loss": 0.1324,
"step": 189
},
{
"epoch": 0.20124453859393618,
"grad_norm": 0.6487869024276733,
"learning_rate": 9.798728813559323e-06,
"loss": 0.1348,
"step": 190
},
{
"epoch": 0.20230372037600952,
"grad_norm": 0.5920835137367249,
"learning_rate": 9.797669491525424e-06,
"loss": 0.1332,
"step": 191
},
{
"epoch": 0.2033629021580829,
"grad_norm": 0.4455447793006897,
"learning_rate": 9.796610169491526e-06,
"loss": 0.1284,
"step": 192
},
{
"epoch": 0.20442208394015624,
"grad_norm": 0.5541309118270874,
"learning_rate": 9.795550847457627e-06,
"loss": 0.1256,
"step": 193
},
{
"epoch": 0.20548126572222958,
"grad_norm": 1.2032992839813232,
"learning_rate": 9.794491525423729e-06,
"loss": 0.1315,
"step": 194
},
{
"epoch": 0.20654044750430292,
"grad_norm": 0.47795382142066956,
"learning_rate": 9.793432203389832e-06,
"loss": 0.1254,
"step": 195
},
{
"epoch": 0.20759962928637626,
"grad_norm": 0.5737263560295105,
"learning_rate": 9.792372881355933e-06,
"loss": 0.1248,
"step": 196
},
{
"epoch": 0.20865881106844963,
"grad_norm": 0.6147955656051636,
"learning_rate": 9.791313559322035e-06,
"loss": 0.1274,
"step": 197
},
{
"epoch": 0.20971799285052298,
"grad_norm": 0.49131709337234497,
"learning_rate": 9.790254237288136e-06,
"loss": 0.1304,
"step": 198
},
{
"epoch": 0.21077717463259632,
"grad_norm": 0.32255011796951294,
"learning_rate": 9.789194915254237e-06,
"loss": 0.1227,
"step": 199
},
{
"epoch": 0.21183635641466966,
"grad_norm": 0.8382896780967712,
"learning_rate": 9.788135593220339e-06,
"loss": 0.1305,
"step": 200
},
{
"epoch": 0.212895538196743,
"grad_norm": 0.6731181144714355,
"learning_rate": 9.787076271186442e-06,
"loss": 0.1279,
"step": 201
},
{
"epoch": 0.21395471997881638,
"grad_norm": 0.34601402282714844,
"learning_rate": 9.786016949152543e-06,
"loss": 0.1265,
"step": 202
},
{
"epoch": 0.21501390176088972,
"grad_norm": 0.5068625807762146,
"learning_rate": 9.784957627118645e-06,
"loss": 0.1233,
"step": 203
},
{
"epoch": 0.21607308354296306,
"grad_norm": 0.5763030648231506,
"learning_rate": 9.783898305084746e-06,
"loss": 0.1235,
"step": 204
},
{
"epoch": 0.2171322653250364,
"grad_norm": 0.46622368693351746,
"learning_rate": 9.78283898305085e-06,
"loss": 0.1243,
"step": 205
},
{
"epoch": 0.21819144710710975,
"grad_norm": 0.6546030044555664,
"learning_rate": 9.78177966101695e-06,
"loss": 0.1272,
"step": 206
},
{
"epoch": 0.21925062888918312,
"grad_norm": 0.9954103827476501,
"learning_rate": 9.780720338983052e-06,
"loss": 0.1266,
"step": 207
},
{
"epoch": 0.22030981067125646,
"grad_norm": 0.3762997090816498,
"learning_rate": 9.779661016949154e-06,
"loss": 0.1237,
"step": 208
},
{
"epoch": 0.2213689924533298,
"grad_norm": 0.4721704423427582,
"learning_rate": 9.778601694915255e-06,
"loss": 0.1248,
"step": 209
},
{
"epoch": 0.22242817423540315,
"grad_norm": 0.4108166992664337,
"learning_rate": 9.777542372881356e-06,
"loss": 0.1226,
"step": 210
},
{
"epoch": 0.2234873560174765,
"grad_norm": 0.4768858551979065,
"learning_rate": 9.776483050847458e-06,
"loss": 0.1238,
"step": 211
},
{
"epoch": 0.22454653779954986,
"grad_norm": 0.46884432435035706,
"learning_rate": 9.775423728813561e-06,
"loss": 0.1232,
"step": 212
},
{
"epoch": 0.2256057195816232,
"grad_norm": 0.49014735221862793,
"learning_rate": 9.774364406779662e-06,
"loss": 0.1257,
"step": 213
},
{
"epoch": 0.22666490136369655,
"grad_norm": 0.8501893877983093,
"learning_rate": 9.773305084745764e-06,
"loss": 0.1277,
"step": 214
},
{
"epoch": 0.2277240831457699,
"grad_norm": 0.6959905624389648,
"learning_rate": 9.772245762711865e-06,
"loss": 0.1261,
"step": 215
},
{
"epoch": 0.22878326492784323,
"grad_norm": 0.5984706282615662,
"learning_rate": 9.771186440677967e-06,
"loss": 0.1299,
"step": 216
},
{
"epoch": 0.22984244670991658,
"grad_norm": 0.574116587638855,
"learning_rate": 9.770127118644068e-06,
"loss": 0.1266,
"step": 217
},
{
"epoch": 0.23090162849198995,
"grad_norm": 0.6310490369796753,
"learning_rate": 9.76906779661017e-06,
"loss": 0.1318,
"step": 218
},
{
"epoch": 0.2319608102740633,
"grad_norm": 0.61598801612854,
"learning_rate": 9.76800847457627e-06,
"loss": 0.1247,
"step": 219
},
{
"epoch": 0.23301999205613663,
"grad_norm": 0.6924020648002625,
"learning_rate": 9.766949152542374e-06,
"loss": 0.1255,
"step": 220
},
{
"epoch": 0.23407917383820998,
"grad_norm": 0.8936102986335754,
"learning_rate": 9.765889830508475e-06,
"loss": 0.1275,
"step": 221
},
{
"epoch": 0.23513835562028332,
"grad_norm": 0.6406232118606567,
"learning_rate": 9.764830508474578e-06,
"loss": 0.1276,
"step": 222
},
{
"epoch": 0.2361975374023567,
"grad_norm": 0.6062114834785461,
"learning_rate": 9.76377118644068e-06,
"loss": 0.1249,
"step": 223
},
{
"epoch": 0.23725671918443003,
"grad_norm": 0.5603080987930298,
"learning_rate": 9.762711864406781e-06,
"loss": 0.1265,
"step": 224
},
{
"epoch": 0.23831590096650337,
"grad_norm": 0.6400798559188843,
"learning_rate": 9.761652542372883e-06,
"loss": 0.1285,
"step": 225
},
{
"epoch": 0.23937508274857672,
"grad_norm": 0.3875858187675476,
"learning_rate": 9.760593220338984e-06,
"loss": 0.1206,
"step": 226
},
{
"epoch": 0.24043426453065006,
"grad_norm": 0.45703446865081787,
"learning_rate": 9.759533898305085e-06,
"loss": 0.1218,
"step": 227
},
{
"epoch": 0.24149344631272343,
"grad_norm": 1.033738613128662,
"learning_rate": 9.758474576271187e-06,
"loss": 0.1252,
"step": 228
},
{
"epoch": 0.24255262809479677,
"grad_norm": 0.8972013592720032,
"learning_rate": 9.757415254237288e-06,
"loss": 0.125,
"step": 229
},
{
"epoch": 0.24361180987687012,
"grad_norm": 0.4154466986656189,
"learning_rate": 9.756355932203391e-06,
"loss": 0.1241,
"step": 230
},
{
"epoch": 0.24467099165894346,
"grad_norm": 0.6829087734222412,
"learning_rate": 9.755296610169493e-06,
"loss": 0.1271,
"step": 231
},
{
"epoch": 0.2457301734410168,
"grad_norm": 0.6784948706626892,
"learning_rate": 9.754237288135594e-06,
"loss": 0.1251,
"step": 232
},
{
"epoch": 0.24678935522309017,
"grad_norm": 0.6223576664924622,
"learning_rate": 9.753177966101696e-06,
"loss": 0.1177,
"step": 233
},
{
"epoch": 0.24784853700516352,
"grad_norm": 0.3889577388763428,
"learning_rate": 9.752118644067797e-06,
"loss": 0.1188,
"step": 234
},
{
"epoch": 0.24890771878723686,
"grad_norm": 1.6076977252960205,
"learning_rate": 9.751059322033898e-06,
"loss": 0.127,
"step": 235
},
{
"epoch": 0.2499669005693102,
"grad_norm": 1.3593311309814453,
"learning_rate": 9.75e-06,
"loss": 0.1275,
"step": 236
},
{
"epoch": 0.2510260823513836,
"grad_norm": 0.4061569571495056,
"learning_rate": 9.748940677966103e-06,
"loss": 0.1145,
"step": 237
},
{
"epoch": 0.2520852641334569,
"grad_norm": 0.49842679500579834,
"learning_rate": 9.747881355932204e-06,
"loss": 0.1229,
"step": 238
},
{
"epoch": 0.25314444591553026,
"grad_norm": 0.6197004914283752,
"learning_rate": 9.746822033898306e-06,
"loss": 0.1238,
"step": 239
},
{
"epoch": 0.2542036276976036,
"grad_norm": 0.6113892793655396,
"learning_rate": 9.745762711864407e-06,
"loss": 0.1223,
"step": 240
},
{
"epoch": 0.25526280947967694,
"grad_norm": 0.5286921858787537,
"learning_rate": 9.744703389830509e-06,
"loss": 0.1232,
"step": 241
},
{
"epoch": 0.2563219912617503,
"grad_norm": 1.1775904893875122,
"learning_rate": 9.74364406779661e-06,
"loss": 0.1297,
"step": 242
},
{
"epoch": 0.25738117304382363,
"grad_norm": 1.4024102687835693,
"learning_rate": 9.742584745762713e-06,
"loss": 0.1312,
"step": 243
},
{
"epoch": 0.258440354825897,
"grad_norm": 0.5369304418563843,
"learning_rate": 9.741525423728814e-06,
"loss": 0.12,
"step": 244
},
{
"epoch": 0.2594995366079703,
"grad_norm": 0.6096199750900269,
"learning_rate": 9.740466101694916e-06,
"loss": 0.1237,
"step": 245
},
{
"epoch": 0.2605587183900437,
"grad_norm": 0.5490643978118896,
"learning_rate": 9.739406779661017e-06,
"loss": 0.125,
"step": 246
},
{
"epoch": 0.26161790017211706,
"grad_norm": 0.458108514547348,
"learning_rate": 9.73834745762712e-06,
"loss": 0.1226,
"step": 247
},
{
"epoch": 0.2626770819541904,
"grad_norm": 0.7059030532836914,
"learning_rate": 9.737288135593222e-06,
"loss": 0.1214,
"step": 248
},
{
"epoch": 0.26373626373626374,
"grad_norm": 0.5895451903343201,
"learning_rate": 9.736228813559323e-06,
"loss": 0.1162,
"step": 249
},
{
"epoch": 0.26479544551833706,
"grad_norm": 1.0854839086532593,
"learning_rate": 9.735169491525425e-06,
"loss": 0.1257,
"step": 250
},
{
"epoch": 0.26585462730041043,
"grad_norm": 0.3744925856590271,
"learning_rate": 9.734110169491526e-06,
"loss": 0.1193,
"step": 251
},
{
"epoch": 0.2669138090824838,
"grad_norm": 0.49556052684783936,
"learning_rate": 9.733050847457627e-06,
"loss": 0.1194,
"step": 252
},
{
"epoch": 0.2679729908645571,
"grad_norm": 0.6708354949951172,
"learning_rate": 9.731991525423729e-06,
"loss": 0.1232,
"step": 253
},
{
"epoch": 0.2690321726466305,
"grad_norm": 0.6407080292701721,
"learning_rate": 9.730932203389832e-06,
"loss": 0.1152,
"step": 254
},
{
"epoch": 0.2700913544287038,
"grad_norm": 0.48788413405418396,
"learning_rate": 9.729872881355933e-06,
"loss": 0.1187,
"step": 255
},
{
"epoch": 0.27115053621077717,
"grad_norm": 0.8405669331550598,
"learning_rate": 9.728813559322035e-06,
"loss": 0.1193,
"step": 256
},
{
"epoch": 0.27220971799285054,
"grad_norm": 1.2621759176254272,
"learning_rate": 9.727754237288136e-06,
"loss": 0.1222,
"step": 257
},
{
"epoch": 0.27326889977492386,
"grad_norm": 0.521096408367157,
"learning_rate": 9.726694915254238e-06,
"loss": 0.1164,
"step": 258
},
{
"epoch": 0.2743280815569972,
"grad_norm": 0.5464190244674683,
"learning_rate": 9.725635593220339e-06,
"loss": 0.123,
"step": 259
},
{
"epoch": 0.27538726333907054,
"grad_norm": 0.6365671157836914,
"learning_rate": 9.72457627118644e-06,
"loss": 0.117,
"step": 260
},
{
"epoch": 0.2764464451211439,
"grad_norm": 0.5571804046630859,
"learning_rate": 9.723516949152544e-06,
"loss": 0.1248,
"step": 261
},
{
"epoch": 0.2775056269032173,
"grad_norm": 0.49087226390838623,
"learning_rate": 9.722457627118645e-06,
"loss": 0.1209,
"step": 262
},
{
"epoch": 0.2785648086852906,
"grad_norm": 0.8053379654884338,
"learning_rate": 9.721398305084746e-06,
"loss": 0.1151,
"step": 263
},
{
"epoch": 0.27962399046736397,
"grad_norm": 1.0285648107528687,
"learning_rate": 9.72033898305085e-06,
"loss": 0.1207,
"step": 264
},
{
"epoch": 0.2806831722494373,
"grad_norm": 0.5413007736206055,
"learning_rate": 9.719279661016951e-06,
"loss": 0.1151,
"step": 265
},
{
"epoch": 0.28174235403151066,
"grad_norm": 0.5470491647720337,
"learning_rate": 9.718220338983052e-06,
"loss": 0.1198,
"step": 266
},
{
"epoch": 0.282801535813584,
"grad_norm": 0.6262738108634949,
"learning_rate": 9.717161016949154e-06,
"loss": 0.1191,
"step": 267
},
{
"epoch": 0.28386071759565734,
"grad_norm": 0.591308057308197,
"learning_rate": 9.716101694915255e-06,
"loss": 0.1205,
"step": 268
},
{
"epoch": 0.2849198993777307,
"grad_norm": 0.5280201435089111,
"learning_rate": 9.715042372881357e-06,
"loss": 0.1176,
"step": 269
},
{
"epoch": 0.285979081159804,
"grad_norm": 0.9757447838783264,
"learning_rate": 9.713983050847458e-06,
"loss": 0.1221,
"step": 270
},
{
"epoch": 0.2870382629418774,
"grad_norm": 0.5570871829986572,
"learning_rate": 9.712923728813561e-06,
"loss": 0.1154,
"step": 271
},
{
"epoch": 0.28809744472395077,
"grad_norm": 0.7283833026885986,
"learning_rate": 9.711864406779662e-06,
"loss": 0.1161,
"step": 272
},
{
"epoch": 0.2891566265060241,
"grad_norm": 0.6055071353912354,
"learning_rate": 9.710805084745764e-06,
"loss": 0.1161,
"step": 273
},
{
"epoch": 0.29021580828809745,
"grad_norm": 0.6126815676689148,
"learning_rate": 9.709745762711865e-06,
"loss": 0.12,
"step": 274
},
{
"epoch": 0.29127499007017077,
"grad_norm": 0.5102630853652954,
"learning_rate": 9.708686440677967e-06,
"loss": 0.1176,
"step": 275
},
{
"epoch": 0.29233417185224414,
"grad_norm": 0.6075724959373474,
"learning_rate": 9.707627118644068e-06,
"loss": 0.1154,
"step": 276
},
{
"epoch": 0.2933933536343175,
"grad_norm": 0.6419102549552917,
"learning_rate": 9.70656779661017e-06,
"loss": 0.117,
"step": 277
},
{
"epoch": 0.2944525354163908,
"grad_norm": 0.6876809000968933,
"learning_rate": 9.705508474576271e-06,
"loss": 0.1185,
"step": 278
},
{
"epoch": 0.2955117171984642,
"grad_norm": 0.46148595213890076,
"learning_rate": 9.704449152542374e-06,
"loss": 0.1181,
"step": 279
},
{
"epoch": 0.2965708989805375,
"grad_norm": 0.4858454167842865,
"learning_rate": 9.703389830508475e-06,
"loss": 0.1131,
"step": 280
},
{
"epoch": 0.2976300807626109,
"grad_norm": 0.6200568675994873,
"learning_rate": 9.702330508474577e-06,
"loss": 0.1183,
"step": 281
},
{
"epoch": 0.29868926254468425,
"grad_norm": 0.6180820465087891,
"learning_rate": 9.701271186440678e-06,
"loss": 0.1157,
"step": 282
},
{
"epoch": 0.29974844432675757,
"grad_norm": 0.4233243465423584,
"learning_rate": 9.70021186440678e-06,
"loss": 0.1109,
"step": 283
},
{
"epoch": 0.30080762610883094,
"grad_norm": 0.8406049013137817,
"learning_rate": 9.699152542372881e-06,
"loss": 0.11,
"step": 284
},
{
"epoch": 0.30186680789090425,
"grad_norm": 1.2740683555603027,
"learning_rate": 9.698093220338984e-06,
"loss": 0.1238,
"step": 285
},
{
"epoch": 0.3029259896729776,
"grad_norm": 0.7582654356956482,
"learning_rate": 9.697033898305086e-06,
"loss": 0.1141,
"step": 286
},
{
"epoch": 0.303985171455051,
"grad_norm": 0.5228947401046753,
"learning_rate": 9.695974576271187e-06,
"loss": 0.117,
"step": 287
},
{
"epoch": 0.3050443532371243,
"grad_norm": 0.6550778150558472,
"learning_rate": 9.69491525423729e-06,
"loss": 0.1191,
"step": 288
},
{
"epoch": 0.3061035350191977,
"grad_norm": 0.8009293675422668,
"learning_rate": 9.693855932203392e-06,
"loss": 0.122,
"step": 289
},
{
"epoch": 0.307162716801271,
"grad_norm": 0.5879274606704712,
"learning_rate": 9.692796610169493e-06,
"loss": 0.1182,
"step": 290
},
{
"epoch": 0.30822189858334437,
"grad_norm": 0.49802204966545105,
"learning_rate": 9.691737288135594e-06,
"loss": 0.118,
"step": 291
},
{
"epoch": 0.30928108036541774,
"grad_norm": 0.8825365900993347,
"learning_rate": 9.690677966101696e-06,
"loss": 0.1187,
"step": 292
},
{
"epoch": 0.31034026214749105,
"grad_norm": 1.3932257890701294,
"learning_rate": 9.689618644067797e-06,
"loss": 0.1225,
"step": 293
},
{
"epoch": 0.3113994439295644,
"grad_norm": 0.6308886408805847,
"learning_rate": 9.688559322033899e-06,
"loss": 0.1112,
"step": 294
},
{
"epoch": 0.31245862571163774,
"grad_norm": 0.5725395083427429,
"learning_rate": 9.6875e-06,
"loss": 0.1133,
"step": 295
},
{
"epoch": 0.3135178074937111,
"grad_norm": 0.6921817064285278,
"learning_rate": 9.686440677966103e-06,
"loss": 0.1158,
"step": 296
},
{
"epoch": 0.3145769892757845,
"grad_norm": 0.5869089961051941,
"learning_rate": 9.685381355932205e-06,
"loss": 0.1153,
"step": 297
},
{
"epoch": 0.3156361710578578,
"grad_norm": 0.6975012421607971,
"learning_rate": 9.684322033898306e-06,
"loss": 0.1163,
"step": 298
},
{
"epoch": 0.31669535283993117,
"grad_norm": 0.9162189960479736,
"learning_rate": 9.683262711864407e-06,
"loss": 0.1161,
"step": 299
},
{
"epoch": 0.3177545346220045,
"grad_norm": 0.5748836398124695,
"learning_rate": 9.682203389830509e-06,
"loss": 0.1187,
"step": 300
},
{
"epoch": 0.31881371640407785,
"grad_norm": 0.554711103439331,
"learning_rate": 9.68114406779661e-06,
"loss": 0.1188,
"step": 301
},
{
"epoch": 0.3198728981861512,
"grad_norm": 0.6140995025634766,
"learning_rate": 9.680084745762712e-06,
"loss": 0.1157,
"step": 302
},
{
"epoch": 0.32093207996822454,
"grad_norm": 0.5292292237281799,
"learning_rate": 9.679025423728815e-06,
"loss": 0.1134,
"step": 303
},
{
"epoch": 0.3219912617502979,
"grad_norm": 0.4888879358768463,
"learning_rate": 9.677966101694916e-06,
"loss": 0.116,
"step": 304
},
{
"epoch": 0.3230504435323712,
"grad_norm": 0.5932160019874573,
"learning_rate": 9.676906779661017e-06,
"loss": 0.1167,
"step": 305
},
{
"epoch": 0.3241096253144446,
"grad_norm": 0.5574305057525635,
"learning_rate": 9.67584745762712e-06,
"loss": 0.1201,
"step": 306
},
{
"epoch": 0.32516880709651796,
"grad_norm": 1.0020751953125,
"learning_rate": 9.674788135593222e-06,
"loss": 0.1196,
"step": 307
},
{
"epoch": 0.3262279888785913,
"grad_norm": 0.5251221656799316,
"learning_rate": 9.673728813559323e-06,
"loss": 0.1189,
"step": 308
},
{
"epoch": 0.32728717066066465,
"grad_norm": 0.5894964933395386,
"learning_rate": 9.672669491525425e-06,
"loss": 0.1154,
"step": 309
},
{
"epoch": 0.32834635244273797,
"grad_norm": 0.765792191028595,
"learning_rate": 9.671610169491526e-06,
"loss": 0.116,
"step": 310
},
{
"epoch": 0.32940553422481134,
"grad_norm": 0.5536498427391052,
"learning_rate": 9.670550847457628e-06,
"loss": 0.1161,
"step": 311
},
{
"epoch": 0.3304647160068847,
"grad_norm": 0.4105791449546814,
"learning_rate": 9.669491525423729e-06,
"loss": 0.1127,
"step": 312
},
{
"epoch": 0.331523897788958,
"grad_norm": 1.1248149871826172,
"learning_rate": 9.668432203389832e-06,
"loss": 0.1197,
"step": 313
},
{
"epoch": 0.3325830795710314,
"grad_norm": 1.1392658948898315,
"learning_rate": 9.667372881355934e-06,
"loss": 0.1169,
"step": 314
},
{
"epoch": 0.3336422613531047,
"grad_norm": 0.37222108244895935,
"learning_rate": 9.666313559322035e-06,
"loss": 0.1148,
"step": 315
},
{
"epoch": 0.3347014431351781,
"grad_norm": 0.48842084407806396,
"learning_rate": 9.665254237288136e-06,
"loss": 0.1154,
"step": 316
},
{
"epoch": 0.33576062491725145,
"grad_norm": 0.5393485426902771,
"learning_rate": 9.664194915254238e-06,
"loss": 0.1186,
"step": 317
},
{
"epoch": 0.33681980669932476,
"grad_norm": 0.556902289390564,
"learning_rate": 9.66313559322034e-06,
"loss": 0.1205,
"step": 318
},
{
"epoch": 0.33787898848139813,
"grad_norm": 0.49229300022125244,
"learning_rate": 9.66207627118644e-06,
"loss": 0.1172,
"step": 319
},
{
"epoch": 0.33893817026347145,
"grad_norm": 0.5494566559791565,
"learning_rate": 9.661016949152544e-06,
"loss": 0.1169,
"step": 320
},
{
"epoch": 0.3399973520455448,
"grad_norm": 1.6041209697723389,
"learning_rate": 9.659957627118645e-06,
"loss": 0.1222,
"step": 321
},
{
"epoch": 0.3410565338276182,
"grad_norm": 1.3501816987991333,
"learning_rate": 9.658898305084747e-06,
"loss": 0.12,
"step": 322
},
{
"epoch": 0.3421157156096915,
"grad_norm": 0.6245529651641846,
"learning_rate": 9.657838983050848e-06,
"loss": 0.1149,
"step": 323
},
{
"epoch": 0.3431748973917649,
"grad_norm": 0.4942384958267212,
"learning_rate": 9.65677966101695e-06,
"loss": 0.1179,
"step": 324
},
{
"epoch": 0.3442340791738382,
"grad_norm": 0.5967822074890137,
"learning_rate": 9.65572033898305e-06,
"loss": 0.1183,
"step": 325
},
{
"epoch": 0.34529326095591156,
"grad_norm": 0.5735800266265869,
"learning_rate": 9.654661016949152e-06,
"loss": 0.1187,
"step": 326
},
{
"epoch": 0.34635244273798493,
"grad_norm": 0.4783755838871002,
"learning_rate": 9.653601694915255e-06,
"loss": 0.1198,
"step": 327
},
{
"epoch": 0.34741162452005825,
"grad_norm": 1.1743202209472656,
"learning_rate": 9.652542372881357e-06,
"loss": 0.1192,
"step": 328
},
{
"epoch": 0.3484708063021316,
"grad_norm": 1.20207679271698,
"learning_rate": 9.651483050847458e-06,
"loss": 0.1199,
"step": 329
},
{
"epoch": 0.34952998808420493,
"grad_norm": 0.6972348093986511,
"learning_rate": 9.650423728813561e-06,
"loss": 0.1176,
"step": 330
},
{
"epoch": 0.3505891698662783,
"grad_norm": 0.4509320557117462,
"learning_rate": 9.649364406779663e-06,
"loss": 0.1165,
"step": 331
},
{
"epoch": 0.3516483516483517,
"grad_norm": 0.5181882381439209,
"learning_rate": 9.648305084745764e-06,
"loss": 0.1183,
"step": 332
},
{
"epoch": 0.352707533430425,
"grad_norm": 0.5092886686325073,
"learning_rate": 9.647245762711865e-06,
"loss": 0.1156,
"step": 333
},
{
"epoch": 0.35376671521249836,
"grad_norm": 0.4488472044467926,
"learning_rate": 9.646186440677967e-06,
"loss": 0.1174,
"step": 334
},
{
"epoch": 0.3548258969945717,
"grad_norm": 0.4209890365600586,
"learning_rate": 9.645127118644068e-06,
"loss": 0.1139,
"step": 335
},
{
"epoch": 0.35588507877664505,
"grad_norm": 0.888431966304779,
"learning_rate": 9.64406779661017e-06,
"loss": 0.1137,
"step": 336
},
{
"epoch": 0.35694426055871836,
"grad_norm": 0.5200555324554443,
"learning_rate": 9.643008474576273e-06,
"loss": 0.1088,
"step": 337
},
{
"epoch": 0.35800344234079173,
"grad_norm": 0.3198027014732361,
"learning_rate": 9.641949152542374e-06,
"loss": 0.1082,
"step": 338
},
{
"epoch": 0.3590626241228651,
"grad_norm": 0.31940215826034546,
"learning_rate": 9.640889830508476e-06,
"loss": 0.1104,
"step": 339
},
{
"epoch": 0.3601218059049384,
"grad_norm": 0.3348517417907715,
"learning_rate": 9.639830508474577e-06,
"loss": 0.1102,
"step": 340
},
{
"epoch": 0.3611809876870118,
"grad_norm": 0.4220709502696991,
"learning_rate": 9.638771186440678e-06,
"loss": 0.1137,
"step": 341
},
{
"epoch": 0.3622401694690851,
"grad_norm": 0.5341797471046448,
"learning_rate": 9.63771186440678e-06,
"loss": 0.1136,
"step": 342
},
{
"epoch": 0.3632993512511585,
"grad_norm": 0.3537216782569885,
"learning_rate": 9.636652542372881e-06,
"loss": 0.1115,
"step": 343
},
{
"epoch": 0.36435853303323185,
"grad_norm": 1.5838582515716553,
"learning_rate": 9.635593220338983e-06,
"loss": 0.1162,
"step": 344
},
{
"epoch": 0.36541771481530516,
"grad_norm": 0.6268835067749023,
"learning_rate": 9.634533898305086e-06,
"loss": 0.1127,
"step": 345
},
{
"epoch": 0.36647689659737853,
"grad_norm": 0.5873063206672668,
"learning_rate": 9.633474576271187e-06,
"loss": 0.107,
"step": 346
},
{
"epoch": 0.36753607837945185,
"grad_norm": 0.6088775992393494,
"learning_rate": 9.632415254237289e-06,
"loss": 0.1118,
"step": 347
},
{
"epoch": 0.3685952601615252,
"grad_norm": 0.41898828744888306,
"learning_rate": 9.631355932203392e-06,
"loss": 0.1156,
"step": 348
},
{
"epoch": 0.3696544419435986,
"grad_norm": 0.5519065856933594,
"learning_rate": 9.630296610169493e-06,
"loss": 0.1145,
"step": 349
},
{
"epoch": 0.3707136237256719,
"grad_norm": 0.8987408876419067,
"learning_rate": 9.629237288135595e-06,
"loss": 0.1182,
"step": 350
},
{
"epoch": 0.3717728055077453,
"grad_norm": 0.7230226993560791,
"learning_rate": 9.628177966101696e-06,
"loss": 0.1214,
"step": 351
},
{
"epoch": 0.3728319872898186,
"grad_norm": 0.4950161576271057,
"learning_rate": 9.627118644067797e-06,
"loss": 0.1124,
"step": 352
},
{
"epoch": 0.37389116907189196,
"grad_norm": 0.4516774117946625,
"learning_rate": 9.626059322033899e-06,
"loss": 0.1096,
"step": 353
},
{
"epoch": 0.37495035085396533,
"grad_norm": 0.4239518940448761,
"learning_rate": 9.625e-06,
"loss": 0.1112,
"step": 354
},
{
"epoch": 0.37600953263603865,
"grad_norm": 0.35730719566345215,
"learning_rate": 9.623940677966103e-06,
"loss": 0.1075,
"step": 355
},
{
"epoch": 0.377068714418112,
"grad_norm": 1.1237397193908691,
"learning_rate": 9.622881355932205e-06,
"loss": 0.1123,
"step": 356
},
{
"epoch": 0.37812789620018533,
"grad_norm": 0.8006128668785095,
"learning_rate": 9.621822033898306e-06,
"loss": 0.113,
"step": 357
},
{
"epoch": 0.3791870779822587,
"grad_norm": 0.537632942199707,
"learning_rate": 9.620762711864408e-06,
"loss": 0.1125,
"step": 358
},
{
"epoch": 0.3802462597643321,
"grad_norm": 0.44080790877342224,
"learning_rate": 9.619703389830509e-06,
"loss": 0.1113,
"step": 359
},
{
"epoch": 0.3813054415464054,
"grad_norm": 0.4815836250782013,
"learning_rate": 9.61864406779661e-06,
"loss": 0.1136,
"step": 360
},
{
"epoch": 0.38236462332847876,
"grad_norm": 0.4750889539718628,
"learning_rate": 9.617584745762712e-06,
"loss": 0.1143,
"step": 361
},
{
"epoch": 0.3834238051105521,
"grad_norm": 0.4339998662471771,
"learning_rate": 9.616525423728815e-06,
"loss": 0.1107,
"step": 362
},
{
"epoch": 0.38448298689262544,
"grad_norm": 0.45202603936195374,
"learning_rate": 9.615466101694916e-06,
"loss": 0.1082,
"step": 363
},
{
"epoch": 0.3855421686746988,
"grad_norm": 0.8911429643630981,
"learning_rate": 9.614406779661018e-06,
"loss": 0.113,
"step": 364
},
{
"epoch": 0.38660135045677213,
"grad_norm": 0.7170503735542297,
"learning_rate": 9.613347457627119e-06,
"loss": 0.1097,
"step": 365
},
{
"epoch": 0.3876605322388455,
"grad_norm": 0.42108699679374695,
"learning_rate": 9.61228813559322e-06,
"loss": 0.1126,
"step": 366
},
{
"epoch": 0.3887197140209188,
"grad_norm": 0.4251386225223541,
"learning_rate": 9.611228813559322e-06,
"loss": 0.1116,
"step": 367
},
{
"epoch": 0.3897788958029922,
"grad_norm": 0.49028900265693665,
"learning_rate": 9.610169491525423e-06,
"loss": 0.1134,
"step": 368
},
{
"epoch": 0.39083807758506556,
"grad_norm": 0.44346433877944946,
"learning_rate": 9.609110169491526e-06,
"loss": 0.1095,
"step": 369
},
{
"epoch": 0.3918972593671389,
"grad_norm": 0.3316284418106079,
"learning_rate": 9.608050847457628e-06,
"loss": 0.1127,
"step": 370
},
{
"epoch": 0.39295644114921224,
"grad_norm": 0.5099537968635559,
"learning_rate": 9.60699152542373e-06,
"loss": 0.1081,
"step": 371
},
{
"epoch": 0.39401562293128556,
"grad_norm": 0.30008023977279663,
"learning_rate": 9.605932203389832e-06,
"loss": 0.1075,
"step": 372
},
{
"epoch": 0.39507480471335893,
"grad_norm": 0.5581114888191223,
"learning_rate": 9.604872881355934e-06,
"loss": 0.1099,
"step": 373
},
{
"epoch": 0.3961339864954323,
"grad_norm": 0.5502544641494751,
"learning_rate": 9.603813559322035e-06,
"loss": 0.1061,
"step": 374
},
{
"epoch": 0.3971931682775056,
"grad_norm": 0.33419784903526306,
"learning_rate": 9.602754237288137e-06,
"loss": 0.1105,
"step": 375
},
{
"epoch": 0.398252350059579,
"grad_norm": 0.45939692854881287,
"learning_rate": 9.601694915254238e-06,
"loss": 0.1075,
"step": 376
},
{
"epoch": 0.3993115318416523,
"grad_norm": 0.45434147119522095,
"learning_rate": 9.60063559322034e-06,
"loss": 0.113,
"step": 377
},
{
"epoch": 0.40037071362372567,
"grad_norm": 0.5118069648742676,
"learning_rate": 9.59957627118644e-06,
"loss": 0.1134,
"step": 378
},
{
"epoch": 0.40142989540579904,
"grad_norm": 0.4450303316116333,
"learning_rate": 9.598516949152544e-06,
"loss": 0.1101,
"step": 379
},
{
"epoch": 0.40248907718787236,
"grad_norm": 0.6912146210670471,
"learning_rate": 9.597457627118645e-06,
"loss": 0.1131,
"step": 380
},
{
"epoch": 0.40354825896994573,
"grad_norm": 0.4503055810928345,
"learning_rate": 9.596398305084747e-06,
"loss": 0.1146,
"step": 381
},
{
"epoch": 0.40460744075201904,
"grad_norm": 0.46015700697898865,
"learning_rate": 9.595338983050848e-06,
"loss": 0.1059,
"step": 382
},
{
"epoch": 0.4056666225340924,
"grad_norm": 0.48054563999176025,
"learning_rate": 9.59427966101695e-06,
"loss": 0.1079,
"step": 383
},
{
"epoch": 0.4067258043161658,
"grad_norm": 0.32423877716064453,
"learning_rate": 9.593220338983051e-06,
"loss": 0.1105,
"step": 384
},
{
"epoch": 0.4077849860982391,
"grad_norm": 0.5704624056816101,
"learning_rate": 9.592161016949152e-06,
"loss": 0.1089,
"step": 385
},
{
"epoch": 0.40884416788031247,
"grad_norm": 0.8289555907249451,
"learning_rate": 9.591101694915255e-06,
"loss": 0.1126,
"step": 386
},
{
"epoch": 0.4099033496623858,
"grad_norm": 0.4352301359176636,
"learning_rate": 9.590042372881357e-06,
"loss": 0.1074,
"step": 387
},
{
"epoch": 0.41096253144445916,
"grad_norm": 0.3718428313732147,
"learning_rate": 9.588983050847458e-06,
"loss": 0.1077,
"step": 388
},
{
"epoch": 0.4120217132265325,
"grad_norm": 0.49498993158340454,
"learning_rate": 9.58792372881356e-06,
"loss": 0.1086,
"step": 389
},
{
"epoch": 0.41308089500860584,
"grad_norm": 0.3933676481246948,
"learning_rate": 9.586864406779663e-06,
"loss": 0.1102,
"step": 390
},
{
"epoch": 0.4141400767906792,
"grad_norm": 0.2939629554748535,
"learning_rate": 9.585805084745764e-06,
"loss": 0.1044,
"step": 391
},
{
"epoch": 0.4151992585727525,
"grad_norm": 0.5933998823165894,
"learning_rate": 9.584745762711866e-06,
"loss": 0.1078,
"step": 392
},
{
"epoch": 0.4162584403548259,
"grad_norm": 0.6409616470336914,
"learning_rate": 9.583686440677967e-06,
"loss": 0.1115,
"step": 393
},
{
"epoch": 0.41731762213689927,
"grad_norm": 0.5755448341369629,
"learning_rate": 9.582627118644068e-06,
"loss": 0.1114,
"step": 394
},
{
"epoch": 0.4183768039189726,
"grad_norm": 0.4892319440841675,
"learning_rate": 9.58156779661017e-06,
"loss": 0.1103,
"step": 395
},
{
"epoch": 0.41943598570104595,
"grad_norm": 0.5357713103294373,
"learning_rate": 9.580508474576273e-06,
"loss": 0.1082,
"step": 396
},
{
"epoch": 0.42049516748311927,
"grad_norm": 0.4296826720237732,
"learning_rate": 9.579449152542374e-06,
"loss": 0.1051,
"step": 397
},
{
"epoch": 0.42155434926519264,
"grad_norm": 0.31759247183799744,
"learning_rate": 9.578389830508476e-06,
"loss": 0.1061,
"step": 398
},
{
"epoch": 0.422613531047266,
"grad_norm": 0.6764819622039795,
"learning_rate": 9.577330508474577e-06,
"loss": 0.1051,
"step": 399
},
{
"epoch": 0.4236727128293393,
"grad_norm": 0.8692288994789124,
"learning_rate": 9.576271186440679e-06,
"loss": 0.1089,
"step": 400
},
{
"epoch": 0.4247318946114127,
"grad_norm": 0.2702364921569824,
"learning_rate": 9.57521186440678e-06,
"loss": 0.1056,
"step": 401
},
{
"epoch": 0.425791076393486,
"grad_norm": 0.35407984256744385,
"learning_rate": 9.574152542372881e-06,
"loss": 0.1104,
"step": 402
},
{
"epoch": 0.4268502581755594,
"grad_norm": 0.36321505904197693,
"learning_rate": 9.573093220338983e-06,
"loss": 0.1019,
"step": 403
},
{
"epoch": 0.42790943995763275,
"grad_norm": 0.42492276430130005,
"learning_rate": 9.572033898305086e-06,
"loss": 0.1082,
"step": 404
},
{
"epoch": 0.42896862173970607,
"grad_norm": 0.6944525241851807,
"learning_rate": 9.570974576271187e-06,
"loss": 0.1092,
"step": 405
},
{
"epoch": 0.43002780352177944,
"grad_norm": 0.3112322986125946,
"learning_rate": 9.569915254237289e-06,
"loss": 0.1041,
"step": 406
},
{
"epoch": 0.43108698530385275,
"grad_norm": 0.3997170031070709,
"learning_rate": 9.56885593220339e-06,
"loss": 0.1057,
"step": 407
},
{
"epoch": 0.4321461670859261,
"grad_norm": 0.5635967254638672,
"learning_rate": 9.567796610169492e-06,
"loss": 0.1078,
"step": 408
},
{
"epoch": 0.4332053488679995,
"grad_norm": 0.7858012914657593,
"learning_rate": 9.566737288135593e-06,
"loss": 0.1077,
"step": 409
},
{
"epoch": 0.4342645306500728,
"grad_norm": 1.2377523183822632,
"learning_rate": 9.565677966101694e-06,
"loss": 0.1046,
"step": 410
},
{
"epoch": 0.4353237124321462,
"grad_norm": 0.4631684124469757,
"learning_rate": 9.564618644067798e-06,
"loss": 0.1097,
"step": 411
},
{
"epoch": 0.4363828942142195,
"grad_norm": 0.4029408097267151,
"learning_rate": 9.563559322033899e-06,
"loss": 0.1075,
"step": 412
},
{
"epoch": 0.43744207599629287,
"grad_norm": 0.977272093296051,
"learning_rate": 9.562500000000002e-06,
"loss": 0.1108,
"step": 413
},
{
"epoch": 0.43850125777836624,
"grad_norm": 0.7255805134773254,
"learning_rate": 9.561440677966103e-06,
"loss": 0.1108,
"step": 414
},
{
"epoch": 0.43956043956043955,
"grad_norm": 0.43987271189689636,
"learning_rate": 9.560381355932205e-06,
"loss": 0.1066,
"step": 415
},
{
"epoch": 0.4406196213425129,
"grad_norm": 0.49002915620803833,
"learning_rate": 9.559322033898306e-06,
"loss": 0.1081,
"step": 416
},
{
"epoch": 0.44167880312458624,
"grad_norm": 0.49192023277282715,
"learning_rate": 9.558262711864408e-06,
"loss": 0.1077,
"step": 417
},
{
"epoch": 0.4427379849066596,
"grad_norm": 0.489654004573822,
"learning_rate": 9.557203389830509e-06,
"loss": 0.1092,
"step": 418
},
{
"epoch": 0.443797166688733,
"grad_norm": 0.7146844863891602,
"learning_rate": 9.55614406779661e-06,
"loss": 0.1062,
"step": 419
},
{
"epoch": 0.4448563484708063,
"grad_norm": 0.6408959031105042,
"learning_rate": 9.555084745762712e-06,
"loss": 0.1092,
"step": 420
},
{
"epoch": 0.44591553025287967,
"grad_norm": 0.7169507145881653,
"learning_rate": 9.554025423728815e-06,
"loss": 0.1102,
"step": 421
},
{
"epoch": 0.446974712034953,
"grad_norm": 0.6536822319030762,
"learning_rate": 9.552966101694916e-06,
"loss": 0.1117,
"step": 422
},
{
"epoch": 0.44803389381702635,
"grad_norm": 0.5074172019958496,
"learning_rate": 9.551906779661018e-06,
"loss": 0.1141,
"step": 423
},
{
"epoch": 0.4490930755990997,
"grad_norm": 0.5577737092971802,
"learning_rate": 9.55084745762712e-06,
"loss": 0.113,
"step": 424
},
{
"epoch": 0.45015225738117304,
"grad_norm": 0.43877550959587097,
"learning_rate": 9.54978813559322e-06,
"loss": 0.1088,
"step": 425
},
{
"epoch": 0.4512114391632464,
"grad_norm": 0.4872395396232605,
"learning_rate": 9.548728813559322e-06,
"loss": 0.1058,
"step": 426
},
{
"epoch": 0.4522706209453197,
"grad_norm": 0.5919830799102783,
"learning_rate": 9.547669491525423e-06,
"loss": 0.1055,
"step": 427
},
{
"epoch": 0.4533298027273931,
"grad_norm": 0.711881160736084,
"learning_rate": 9.546610169491527e-06,
"loss": 0.1096,
"step": 428
},
{
"epoch": 0.45438898450946646,
"grad_norm": 0.571658194065094,
"learning_rate": 9.545550847457628e-06,
"loss": 0.11,
"step": 429
},
{
"epoch": 0.4554481662915398,
"grad_norm": 0.4957394301891327,
"learning_rate": 9.54449152542373e-06,
"loss": 0.1092,
"step": 430
},
{
"epoch": 0.45650734807361315,
"grad_norm": 0.473998099565506,
"learning_rate": 9.54343220338983e-06,
"loss": 0.1075,
"step": 431
},
{
"epoch": 0.45756652985568647,
"grad_norm": 0.5281217098236084,
"learning_rate": 9.542372881355934e-06,
"loss": 0.1045,
"step": 432
},
{
"epoch": 0.45862571163775984,
"grad_norm": 0.4293951690196991,
"learning_rate": 9.541313559322035e-06,
"loss": 0.1034,
"step": 433
},
{
"epoch": 0.45968489341983315,
"grad_norm": 0.4172426760196686,
"learning_rate": 9.540254237288137e-06,
"loss": 0.106,
"step": 434
},
{
"epoch": 0.4607440752019065,
"grad_norm": 0.4815566837787628,
"learning_rate": 9.539194915254238e-06,
"loss": 0.1046,
"step": 435
},
{
"epoch": 0.4618032569839799,
"grad_norm": 0.9885016679763794,
"learning_rate": 9.53813559322034e-06,
"loss": 0.1067,
"step": 436
},
{
"epoch": 0.4628624387660532,
"grad_norm": 0.5439627170562744,
"learning_rate": 9.537076271186441e-06,
"loss": 0.1033,
"step": 437
},
{
"epoch": 0.4639216205481266,
"grad_norm": 0.47244539856910706,
"learning_rate": 9.536016949152544e-06,
"loss": 0.106,
"step": 438
},
{
"epoch": 0.4649808023301999,
"grad_norm": 0.3577944338321686,
"learning_rate": 9.534957627118645e-06,
"loss": 0.1035,
"step": 439
},
{
"epoch": 0.46603998411227326,
"grad_norm": 0.37519779801368713,
"learning_rate": 9.533898305084747e-06,
"loss": 0.1052,
"step": 440
},
{
"epoch": 0.46709916589434664,
"grad_norm": 0.5341219902038574,
"learning_rate": 9.532838983050848e-06,
"loss": 0.104,
"step": 441
},
{
"epoch": 0.46815834767641995,
"grad_norm": 0.5906185507774353,
"learning_rate": 9.53177966101695e-06,
"loss": 0.1093,
"step": 442
},
{
"epoch": 0.4692175294584933,
"grad_norm": 0.7719901204109192,
"learning_rate": 9.530720338983051e-06,
"loss": 0.1042,
"step": 443
},
{
"epoch": 0.47027671124056664,
"grad_norm": 0.43853050470352173,
"learning_rate": 9.529661016949153e-06,
"loss": 0.1033,
"step": 444
},
{
"epoch": 0.47133589302264,
"grad_norm": 0.41483408212661743,
"learning_rate": 9.528601694915256e-06,
"loss": 0.1037,
"step": 445
},
{
"epoch": 0.4723950748047134,
"grad_norm": 0.38403433561325073,
"learning_rate": 9.527542372881357e-06,
"loss": 0.0989,
"step": 446
},
{
"epoch": 0.4734542565867867,
"grad_norm": 0.41726627945899963,
"learning_rate": 9.526483050847458e-06,
"loss": 0.1047,
"step": 447
},
{
"epoch": 0.47451343836886006,
"grad_norm": 0.6198919415473938,
"learning_rate": 9.52542372881356e-06,
"loss": 0.1052,
"step": 448
},
{
"epoch": 0.4755726201509334,
"grad_norm": 0.37252992391586304,
"learning_rate": 9.524364406779661e-06,
"loss": 0.105,
"step": 449
},
{
"epoch": 0.47663180193300675,
"grad_norm": 0.48047640919685364,
"learning_rate": 9.523305084745763e-06,
"loss": 0.1053,
"step": 450
},
{
"epoch": 0.4776909837150801,
"grad_norm": 0.4455186724662781,
"learning_rate": 9.522245762711864e-06,
"loss": 0.1037,
"step": 451
},
{
"epoch": 0.47875016549715343,
"grad_norm": 0.5429503917694092,
"learning_rate": 9.521186440677967e-06,
"loss": 0.1018,
"step": 452
},
{
"epoch": 0.4798093472792268,
"grad_norm": 0.3205244243144989,
"learning_rate": 9.520127118644069e-06,
"loss": 0.1006,
"step": 453
},
{
"epoch": 0.4808685290613001,
"grad_norm": 0.7264949083328247,
"learning_rate": 9.51906779661017e-06,
"loss": 0.0994,
"step": 454
},
{
"epoch": 0.4819277108433735,
"grad_norm": 0.3654707968235016,
"learning_rate": 9.518008474576273e-06,
"loss": 0.1006,
"step": 455
},
{
"epoch": 0.48298689262544686,
"grad_norm": 0.419162780046463,
"learning_rate": 9.516949152542375e-06,
"loss": 0.1024,
"step": 456
},
{
"epoch": 0.4840460744075202,
"grad_norm": 0.3804956078529358,
"learning_rate": 9.515889830508476e-06,
"loss": 0.1027,
"step": 457
},
{
"epoch": 0.48510525618959355,
"grad_norm": 0.44833818078041077,
"learning_rate": 9.514830508474577e-06,
"loss": 0.1013,
"step": 458
},
{
"epoch": 0.48616443797166686,
"grad_norm": 0.5254035592079163,
"learning_rate": 9.513771186440679e-06,
"loss": 0.1039,
"step": 459
},
{
"epoch": 0.48722361975374023,
"grad_norm": 0.399044394493103,
"learning_rate": 9.51271186440678e-06,
"loss": 0.1075,
"step": 460
},
{
"epoch": 0.4882828015358136,
"grad_norm": 0.38109609484672546,
"learning_rate": 9.511652542372882e-06,
"loss": 0.0988,
"step": 461
},
{
"epoch": 0.4893419833178869,
"grad_norm": 0.5939087271690369,
"learning_rate": 9.510593220338985e-06,
"loss": 0.1009,
"step": 462
},
{
"epoch": 0.4904011650999603,
"grad_norm": 0.407850056886673,
"learning_rate": 9.509533898305086e-06,
"loss": 0.1005,
"step": 463
},
{
"epoch": 0.4914603468820336,
"grad_norm": 0.523597240447998,
"learning_rate": 9.508474576271188e-06,
"loss": 0.102,
"step": 464
},
{
"epoch": 0.492519528664107,
"grad_norm": 0.7457444071769714,
"learning_rate": 9.507415254237289e-06,
"loss": 0.1037,
"step": 465
},
{
"epoch": 0.49357871044618035,
"grad_norm": 0.4090770483016968,
"learning_rate": 9.50635593220339e-06,
"loss": 0.1057,
"step": 466
},
{
"epoch": 0.49463789222825366,
"grad_norm": 0.3972409665584564,
"learning_rate": 9.505296610169492e-06,
"loss": 0.1028,
"step": 467
},
{
"epoch": 0.49569707401032703,
"grad_norm": 0.38050827383995056,
"learning_rate": 9.504237288135593e-06,
"loss": 0.1049,
"step": 468
},
{
"epoch": 0.49675625579240035,
"grad_norm": 0.3014926612377167,
"learning_rate": 9.503177966101695e-06,
"loss": 0.1012,
"step": 469
},
{
"epoch": 0.4978154375744737,
"grad_norm": 0.42940622568130493,
"learning_rate": 9.502118644067798e-06,
"loss": 0.1065,
"step": 470
},
{
"epoch": 0.4988746193565471,
"grad_norm": 0.2840025722980499,
"learning_rate": 9.501059322033899e-06,
"loss": 0.103,
"step": 471
},
{
"epoch": 0.4999338011386204,
"grad_norm": 0.3117368817329407,
"learning_rate": 9.5e-06,
"loss": 0.1035,
"step": 472
},
{
"epoch": 0.4999338011386204,
"eval_accuracy": 0.9803,
"eval_best_f1_from_thresholding": 0.15450643776824033,
"eval_loss": 0.13234694302082062,
"eval_matthews_corrcoef": 0.14806398220854253,
"eval_model_preparation_time": 0.0033,
"eval_negative_class_f1": 0.9900338948752972,
"eval_negative_class_precision": 0.992393509127789,
"eval_negative_class_recall": 0.9876854749167255,
"eval_positive_class_f1": 0.15450643776824036,
"eval_positive_class_precision": 0.12857142857142856,
"eval_positive_class_recall": 0.1935483870967742,
"eval_roc_auc": 0.8230006805224067,
"eval_runtime": 20.7498,
"eval_samples_per_second": 481.933,
"eval_steps_per_second": 7.566,
"step": 472
},
{
"epoch": 0.5009929829206937,
"grad_norm": 1.087823748588562,
"learning_rate": 9.498940677966102e-06,
"loss": 0.1062,
"step": 473
},
{
"epoch": 0.5020521647027671,
"grad_norm": 1.0358397960662842,
"learning_rate": 9.497881355932203e-06,
"loss": 0.1086,
"step": 474
},
{
"epoch": 0.5031113464848405,
"grad_norm": 0.5633784532546997,
"learning_rate": 9.496822033898306e-06,
"loss": 0.1031,
"step": 475
},
{
"epoch": 0.5041705282669138,
"grad_norm": 0.5209754705429077,
"learning_rate": 9.495762711864408e-06,
"loss": 0.1047,
"step": 476
},
{
"epoch": 0.5052297100489872,
"grad_norm": 0.5898464322090149,
"learning_rate": 9.49470338983051e-06,
"loss": 0.1075,
"step": 477
},
{
"epoch": 0.5062888918310605,
"grad_norm": 0.49827930331230164,
"learning_rate": 9.49364406779661e-06,
"loss": 0.104,
"step": 478
},
{
"epoch": 0.5073480736131338,
"grad_norm": 0.4471459686756134,
"learning_rate": 9.492584745762712e-06,
"loss": 0.1048,
"step": 479
},
{
"epoch": 0.5084072553952071,
"grad_norm": 0.3593554198741913,
"learning_rate": 9.491525423728815e-06,
"loss": 0.1025,
"step": 480
},
{
"epoch": 0.5094664371772806,
"grad_norm": 0.5244583487510681,
"learning_rate": 9.490466101694917e-06,
"loss": 0.1067,
"step": 481
},
{
"epoch": 0.5105256189593539,
"grad_norm": 1.4169367551803589,
"learning_rate": 9.489406779661018e-06,
"loss": 0.106,
"step": 482
},
{
"epoch": 0.5115848007414272,
"grad_norm": 1.2409381866455078,
"learning_rate": 9.48834745762712e-06,
"loss": 0.1051,
"step": 483
},
{
"epoch": 0.5126439825235006,
"grad_norm": 0.39561715722084045,
"learning_rate": 9.48728813559322e-06,
"loss": 0.1036,
"step": 484
},
{
"epoch": 0.513703164305574,
"grad_norm": 0.5099272131919861,
"learning_rate": 9.486228813559322e-06,
"loss": 0.1032,
"step": 485
},
{
"epoch": 0.5147623460876473,
"grad_norm": 0.5082338452339172,
"learning_rate": 9.485169491525424e-06,
"loss": 0.1039,
"step": 486
},
{
"epoch": 0.5158215278697207,
"grad_norm": 0.46735939383506775,
"learning_rate": 9.484110169491527e-06,
"loss": 0.1,
"step": 487
},
{
"epoch": 0.516880709651794,
"grad_norm": 0.4456905126571655,
"learning_rate": 9.483050847457628e-06,
"loss": 0.1025,
"step": 488
},
{
"epoch": 0.5179398914338673,
"grad_norm": 0.5637014508247375,
"learning_rate": 9.48199152542373e-06,
"loss": 0.0996,
"step": 489
},
{
"epoch": 0.5189990732159406,
"grad_norm": 1.9828752279281616,
"learning_rate": 9.480932203389831e-06,
"loss": 0.1019,
"step": 490
},
{
"epoch": 0.5200582549980141,
"grad_norm": 0.8592916131019592,
"learning_rate": 9.479872881355932e-06,
"loss": 0.103,
"step": 491
},
{
"epoch": 0.5211174367800874,
"grad_norm": 0.8089073896408081,
"learning_rate": 9.478813559322034e-06,
"loss": 0.1034,
"step": 492
},
{
"epoch": 0.5221766185621607,
"grad_norm": 0.36452987790107727,
"learning_rate": 9.477754237288135e-06,
"loss": 0.1049,
"step": 493
},
{
"epoch": 0.5232358003442341,
"grad_norm": 0.4801510274410248,
"learning_rate": 9.476694915254238e-06,
"loss": 0.1092,
"step": 494
},
{
"epoch": 0.5242949821263074,
"grad_norm": 0.4750489592552185,
"learning_rate": 9.47563559322034e-06,
"loss": 0.1038,
"step": 495
},
{
"epoch": 0.5253541639083807,
"grad_norm": 0.46922165155410767,
"learning_rate": 9.474576271186441e-06,
"loss": 0.1057,
"step": 496
},
{
"epoch": 0.5264133456904542,
"grad_norm": 0.34764742851257324,
"learning_rate": 9.473516949152544e-06,
"loss": 0.0998,
"step": 497
},
{
"epoch": 0.5274725274725275,
"grad_norm": 0.5010620951652527,
"learning_rate": 9.472457627118646e-06,
"loss": 0.1021,
"step": 498
},
{
"epoch": 0.5285317092546008,
"grad_norm": 0.8562049865722656,
"learning_rate": 9.471398305084747e-06,
"loss": 0.1017,
"step": 499
},
{
"epoch": 0.5295908910366741,
"grad_norm": 0.8053882122039795,
"learning_rate": 9.470338983050848e-06,
"loss": 0.1029,
"step": 500
},
{
"epoch": 0.5306500728187475,
"grad_norm": 0.55223548412323,
"learning_rate": 9.46927966101695e-06,
"loss": 0.1075,
"step": 501
},
{
"epoch": 0.5317092546008209,
"grad_norm": 0.5718464255332947,
"learning_rate": 9.468220338983051e-06,
"loss": 0.1007,
"step": 502
},
{
"epoch": 0.5327684363828942,
"grad_norm": 0.42768341302871704,
"learning_rate": 9.467161016949153e-06,
"loss": 0.1018,
"step": 503
},
{
"epoch": 0.5338276181649676,
"grad_norm": 0.6023754477500916,
"learning_rate": 9.466101694915256e-06,
"loss": 0.1071,
"step": 504
},
{
"epoch": 0.5348867999470409,
"grad_norm": 0.42522376775741577,
"learning_rate": 9.465042372881357e-06,
"loss": 0.103,
"step": 505
},
{
"epoch": 0.5359459817291142,
"grad_norm": 0.316423624753952,
"learning_rate": 9.463983050847459e-06,
"loss": 0.0995,
"step": 506
},
{
"epoch": 0.5370051635111877,
"grad_norm": 0.8633352518081665,
"learning_rate": 9.46292372881356e-06,
"loss": 0.103,
"step": 507
},
{
"epoch": 0.538064345293261,
"grad_norm": 0.4316701889038086,
"learning_rate": 9.461864406779661e-06,
"loss": 0.0972,
"step": 508
},
{
"epoch": 0.5391235270753343,
"grad_norm": 0.3259594142436981,
"learning_rate": 9.460805084745763e-06,
"loss": 0.0972,
"step": 509
},
{
"epoch": 0.5401827088574076,
"grad_norm": 0.6332740187644958,
"learning_rate": 9.459745762711864e-06,
"loss": 0.1005,
"step": 510
},
{
"epoch": 0.541241890639481,
"grad_norm": 0.2722731828689575,
"learning_rate": 9.458686440677967e-06,
"loss": 0.0972,
"step": 511
},
{
"epoch": 0.5423010724215543,
"grad_norm": 0.5190332531929016,
"learning_rate": 9.457627118644069e-06,
"loss": 0.1052,
"step": 512
},
{
"epoch": 0.5433602542036277,
"grad_norm": 0.5916107296943665,
"learning_rate": 9.45656779661017e-06,
"loss": 0.1066,
"step": 513
},
{
"epoch": 0.5444194359857011,
"grad_norm": 0.4169541895389557,
"learning_rate": 9.455508474576272e-06,
"loss": 0.099,
"step": 514
},
{
"epoch": 0.5454786177677744,
"grad_norm": 0.6240091919898987,
"learning_rate": 9.454449152542373e-06,
"loss": 0.1006,
"step": 515
},
{
"epoch": 0.5465377995498477,
"grad_norm": 0.38033053278923035,
"learning_rate": 9.453389830508474e-06,
"loss": 0.102,
"step": 516
},
{
"epoch": 0.5475969813319211,
"grad_norm": 0.31779545545578003,
"learning_rate": 9.452330508474578e-06,
"loss": 0.097,
"step": 517
},
{
"epoch": 0.5486561631139945,
"grad_norm": 0.6237276196479797,
"learning_rate": 9.451271186440679e-06,
"loss": 0.1013,
"step": 518
},
{
"epoch": 0.5497153448960678,
"grad_norm": 1.495983600616455,
"learning_rate": 9.45021186440678e-06,
"loss": 0.1101,
"step": 519
},
{
"epoch": 0.5507745266781411,
"grad_norm": 0.47101595997810364,
"learning_rate": 9.449152542372882e-06,
"loss": 0.1034,
"step": 520
},
{
"epoch": 0.5518337084602145,
"grad_norm": 0.29455289244651794,
"learning_rate": 9.448093220338985e-06,
"loss": 0.1003,
"step": 521
},
{
"epoch": 0.5528928902422878,
"grad_norm": 0.38901370763778687,
"learning_rate": 9.447033898305086e-06,
"loss": 0.1029,
"step": 522
},
{
"epoch": 0.5539520720243611,
"grad_norm": 0.47442498803138733,
"learning_rate": 9.445974576271188e-06,
"loss": 0.1052,
"step": 523
},
{
"epoch": 0.5550112538064346,
"grad_norm": 0.4412713646888733,
"learning_rate": 9.444915254237289e-06,
"loss": 0.1006,
"step": 524
},
{
"epoch": 0.5560704355885079,
"grad_norm": 0.33992066979408264,
"learning_rate": 9.44385593220339e-06,
"loss": 0.0973,
"step": 525
},
{
"epoch": 0.5571296173705812,
"grad_norm": 0.5417588353157043,
"learning_rate": 9.442796610169492e-06,
"loss": 0.1028,
"step": 526
},
{
"epoch": 0.5581887991526546,
"grad_norm": 0.8810229301452637,
"learning_rate": 9.441737288135593e-06,
"loss": 0.1031,
"step": 527
},
{
"epoch": 0.5592479809347279,
"grad_norm": 0.8370358347892761,
"learning_rate": 9.440677966101696e-06,
"loss": 0.1009,
"step": 528
},
{
"epoch": 0.5603071627168013,
"grad_norm": 1.0851960182189941,
"learning_rate": 9.439618644067798e-06,
"loss": 0.0996,
"step": 529
},
{
"epoch": 0.5613663444988746,
"grad_norm": 0.32936275005340576,
"learning_rate": 9.4385593220339e-06,
"loss": 0.1002,
"step": 530
},
{
"epoch": 0.562425526280948,
"grad_norm": 0.6002232432365417,
"learning_rate": 9.4375e-06,
"loss": 0.1062,
"step": 531
},
{
"epoch": 0.5634847080630213,
"grad_norm": 0.5921926498413086,
"learning_rate": 9.436440677966102e-06,
"loss": 0.1056,
"step": 532
},
{
"epoch": 0.5645438898450946,
"grad_norm": 0.45451927185058594,
"learning_rate": 9.435381355932204e-06,
"loss": 0.1037,
"step": 533
},
{
"epoch": 0.565603071627168,
"grad_norm": 0.5011440515518188,
"learning_rate": 9.434322033898305e-06,
"loss": 0.1035,
"step": 534
},
{
"epoch": 0.5666622534092414,
"grad_norm": 0.526629626750946,
"learning_rate": 9.433262711864406e-06,
"loss": 0.1001,
"step": 535
},
{
"epoch": 0.5677214351913147,
"grad_norm": 0.7653958797454834,
"learning_rate": 9.43220338983051e-06,
"loss": 0.1008,
"step": 536
},
{
"epoch": 0.5687806169733881,
"grad_norm": 1.3027257919311523,
"learning_rate": 9.431144067796611e-06,
"loss": 0.1039,
"step": 537
},
{
"epoch": 0.5698397987554614,
"grad_norm": 0.5976331830024719,
"learning_rate": 9.430084745762714e-06,
"loss": 0.0998,
"step": 538
},
{
"epoch": 0.5708989805375347,
"grad_norm": 0.29188409447669983,
"learning_rate": 9.429025423728815e-06,
"loss": 0.0997,
"step": 539
},
{
"epoch": 0.571958162319608,
"grad_norm": 0.4244849979877472,
"learning_rate": 9.427966101694917e-06,
"loss": 0.1062,
"step": 540
},
{
"epoch": 0.5730173441016815,
"grad_norm": 0.4817642569541931,
"learning_rate": 9.426906779661018e-06,
"loss": 0.1035,
"step": 541
},
{
"epoch": 0.5740765258837548,
"grad_norm": 0.5244449973106384,
"learning_rate": 9.42584745762712e-06,
"loss": 0.1046,
"step": 542
},
{
"epoch": 0.5751357076658281,
"grad_norm": 0.5034027695655823,
"learning_rate": 9.424788135593221e-06,
"loss": 0.1054,
"step": 543
},
{
"epoch": 0.5761948894479015,
"grad_norm": 0.4198877215385437,
"learning_rate": 9.423728813559322e-06,
"loss": 0.0991,
"step": 544
},
{
"epoch": 0.5772540712299749,
"grad_norm": 0.6624194979667664,
"learning_rate": 9.422669491525424e-06,
"loss": 0.0983,
"step": 545
},
{
"epoch": 0.5783132530120482,
"grad_norm": 0.3857661783695221,
"learning_rate": 9.421610169491527e-06,
"loss": 0.1005,
"step": 546
},
{
"epoch": 0.5793724347941216,
"grad_norm": 0.6397581100463867,
"learning_rate": 9.420550847457628e-06,
"loss": 0.1007,
"step": 547
},
{
"epoch": 0.5804316165761949,
"grad_norm": 0.7347425818443298,
"learning_rate": 9.41949152542373e-06,
"loss": 0.1012,
"step": 548
},
{
"epoch": 0.5814907983582682,
"grad_norm": 0.3354703187942505,
"learning_rate": 9.418432203389831e-06,
"loss": 0.1046,
"step": 549
},
{
"epoch": 0.5825499801403415,
"grad_norm": 0.5054658055305481,
"learning_rate": 9.417372881355933e-06,
"loss": 0.1045,
"step": 550
},
{
"epoch": 0.583609161922415,
"grad_norm": 0.4023343324661255,
"learning_rate": 9.416313559322034e-06,
"loss": 0.1042,
"step": 551
},
{
"epoch": 0.5846683437044883,
"grad_norm": 0.38671228289604187,
"learning_rate": 9.415254237288135e-06,
"loss": 0.0987,
"step": 552
},
{
"epoch": 0.5857275254865616,
"grad_norm": 0.27861088514328003,
"learning_rate": 9.414194915254239e-06,
"loss": 0.0989,
"step": 553
},
{
"epoch": 0.586786707268635,
"grad_norm": 0.7123656868934631,
"learning_rate": 9.41313559322034e-06,
"loss": 0.1009,
"step": 554
},
{
"epoch": 0.5878458890507083,
"grad_norm": 1.1902016401290894,
"learning_rate": 9.412076271186441e-06,
"loss": 0.1051,
"step": 555
},
{
"epoch": 0.5889050708327817,
"grad_norm": 1.7251577377319336,
"learning_rate": 9.411016949152543e-06,
"loss": 0.1072,
"step": 556
},
{
"epoch": 0.5899642526148551,
"grad_norm": 0.29798540472984314,
"learning_rate": 9.409957627118644e-06,
"loss": 0.1017,
"step": 557
},
{
"epoch": 0.5910234343969284,
"grad_norm": 0.418284147977829,
"learning_rate": 9.408898305084746e-06,
"loss": 0.0978,
"step": 558
},
{
"epoch": 0.5920826161790017,
"grad_norm": 0.49750545620918274,
"learning_rate": 9.407838983050849e-06,
"loss": 0.1067,
"step": 559
},
{
"epoch": 0.593141797961075,
"grad_norm": 0.47228100895881653,
"learning_rate": 9.40677966101695e-06,
"loss": 0.1042,
"step": 560
},
{
"epoch": 0.5942009797431484,
"grad_norm": 0.6839432716369629,
"learning_rate": 9.405720338983051e-06,
"loss": 0.1018,
"step": 561
},
{
"epoch": 0.5952601615252218,
"grad_norm": 0.4121408760547638,
"learning_rate": 9.404661016949153e-06,
"loss": 0.0983,
"step": 562
},
{
"epoch": 0.5963193433072951,
"grad_norm": 0.36210134625434875,
"learning_rate": 9.403601694915256e-06,
"loss": 0.102,
"step": 563
},
{
"epoch": 0.5973785250893685,
"grad_norm": 0.392327219247818,
"learning_rate": 9.402542372881357e-06,
"loss": 0.1017,
"step": 564
},
{
"epoch": 0.5984377068714418,
"grad_norm": 1.075141429901123,
"learning_rate": 9.401483050847459e-06,
"loss": 0.1046,
"step": 565
},
{
"epoch": 0.5994968886535151,
"grad_norm": 0.6502388715744019,
"learning_rate": 9.40042372881356e-06,
"loss": 0.1027,
"step": 566
},
{
"epoch": 0.6005560704355886,
"grad_norm": 0.41776108741760254,
"learning_rate": 9.399364406779662e-06,
"loss": 0.1007,
"step": 567
},
{
"epoch": 0.6016152522176619,
"grad_norm": 0.45106184482574463,
"learning_rate": 9.398305084745763e-06,
"loss": 0.1037,
"step": 568
},
{
"epoch": 0.6026744339997352,
"grad_norm": 0.5499406456947327,
"learning_rate": 9.397245762711864e-06,
"loss": 0.105,
"step": 569
},
{
"epoch": 0.6037336157818085,
"grad_norm": 0.43302425742149353,
"learning_rate": 9.396186440677968e-06,
"loss": 0.1009,
"step": 570
},
{
"epoch": 0.6047927975638819,
"grad_norm": 0.3462725281715393,
"learning_rate": 9.395127118644069e-06,
"loss": 0.0988,
"step": 571
},
{
"epoch": 0.6058519793459552,
"grad_norm": 0.29506900906562805,
"learning_rate": 9.39406779661017e-06,
"loss": 0.0994,
"step": 572
},
{
"epoch": 0.6069111611280286,
"grad_norm": 1.0929666757583618,
"learning_rate": 9.393008474576272e-06,
"loss": 0.1017,
"step": 573
},
{
"epoch": 0.607970342910102,
"grad_norm": 1.0449674129486084,
"learning_rate": 9.391949152542373e-06,
"loss": 0.1034,
"step": 574
},
{
"epoch": 0.6090295246921753,
"grad_norm": 1.248158574104309,
"learning_rate": 9.390889830508475e-06,
"loss": 0.0996,
"step": 575
},
{
"epoch": 0.6100887064742486,
"grad_norm": 0.38209083676338196,
"learning_rate": 9.389830508474576e-06,
"loss": 0.1029,
"step": 576
},
{
"epoch": 0.6111478882563219,
"grad_norm": 0.4725791811943054,
"learning_rate": 9.388771186440679e-06,
"loss": 0.0998,
"step": 577
},
{
"epoch": 0.6122070700383954,
"grad_norm": 0.5096263289451599,
"learning_rate": 9.38771186440678e-06,
"loss": 0.1012,
"step": 578
},
{
"epoch": 0.6132662518204687,
"grad_norm": 0.3964233100414276,
"learning_rate": 9.386652542372882e-06,
"loss": 0.1032,
"step": 579
},
{
"epoch": 0.614325433602542,
"grad_norm": 0.31194186210632324,
"learning_rate": 9.385593220338985e-06,
"loss": 0.0973,
"step": 580
},
{
"epoch": 0.6153846153846154,
"grad_norm": 0.3112730383872986,
"learning_rate": 9.384533898305086e-06,
"loss": 0.0969,
"step": 581
},
{
"epoch": 0.6164437971666887,
"grad_norm": 0.7090293765068054,
"learning_rate": 9.383474576271188e-06,
"loss": 0.099,
"step": 582
},
{
"epoch": 0.617502978948762,
"grad_norm": 2.3716042041778564,
"learning_rate": 9.38241525423729e-06,
"loss": 0.1165,
"step": 583
},
{
"epoch": 0.6185621607308355,
"grad_norm": 1.2725728750228882,
"learning_rate": 9.38135593220339e-06,
"loss": 0.1047,
"step": 584
},
{
"epoch": 0.6196213425129088,
"grad_norm": 0.8259700536727905,
"learning_rate": 9.380296610169492e-06,
"loss": 0.1069,
"step": 585
},
{
"epoch": 0.6206805242949821,
"grad_norm": 0.5297931432723999,
"learning_rate": 9.379237288135594e-06,
"loss": 0.1029,
"step": 586
},
{
"epoch": 0.6217397060770554,
"grad_norm": 0.658423900604248,
"learning_rate": 9.378177966101697e-06,
"loss": 0.1086,
"step": 587
},
{
"epoch": 0.6227988878591288,
"grad_norm": 0.6752994656562805,
"learning_rate": 9.377118644067798e-06,
"loss": 0.1063,
"step": 588
},
{
"epoch": 0.6238580696412022,
"grad_norm": 0.45060423016548157,
"learning_rate": 9.3760593220339e-06,
"loss": 0.1021,
"step": 589
},
{
"epoch": 0.6249172514232755,
"grad_norm": 0.394235223531723,
"learning_rate": 9.375000000000001e-06,
"loss": 0.1042,
"step": 590
},
{
"epoch": 0.6259764332053489,
"grad_norm": 0.8248805403709412,
"learning_rate": 9.373940677966102e-06,
"loss": 0.1032,
"step": 591
},
{
"epoch": 0.6270356149874222,
"grad_norm": 1.546491265296936,
"learning_rate": 9.372881355932204e-06,
"loss": 0.1065,
"step": 592
},
{
"epoch": 0.6280947967694955,
"grad_norm": 1.0976604223251343,
"learning_rate": 9.371822033898305e-06,
"loss": 0.1049,
"step": 593
},
{
"epoch": 0.629153978551569,
"grad_norm": 0.31807151436805725,
"learning_rate": 9.370762711864407e-06,
"loss": 0.096,
"step": 594
},
{
"epoch": 0.6302131603336423,
"grad_norm": 0.33156925439834595,
"learning_rate": 9.36970338983051e-06,
"loss": 0.1021,
"step": 595
},
{
"epoch": 0.6312723421157156,
"grad_norm": 0.5377479195594788,
"learning_rate": 9.368644067796611e-06,
"loss": 0.1046,
"step": 596
},
{
"epoch": 0.6323315238977889,
"grad_norm": 0.8017779588699341,
"learning_rate": 9.367584745762712e-06,
"loss": 0.1052,
"step": 597
},
{
"epoch": 0.6333907056798623,
"grad_norm": 0.6710719466209412,
"learning_rate": 9.366525423728814e-06,
"loss": 0.1054,
"step": 598
},
{
"epoch": 0.6344498874619356,
"grad_norm": 0.5041128993034363,
"learning_rate": 9.365466101694915e-06,
"loss": 0.1065,
"step": 599
},
{
"epoch": 0.635509069244009,
"grad_norm": 0.48250842094421387,
"learning_rate": 9.364406779661017e-06,
"loss": 0.1021,
"step": 600
},
{
"epoch": 0.6365682510260824,
"grad_norm": 0.3540663421154022,
"learning_rate": 9.36334745762712e-06,
"loss": 0.0984,
"step": 601
},
{
"epoch": 0.6376274328081557,
"grad_norm": 0.758277177810669,
"learning_rate": 9.362288135593221e-06,
"loss": 0.1028,
"step": 602
},
{
"epoch": 0.638686614590229,
"grad_norm": 1.088519811630249,
"learning_rate": 9.361228813559323e-06,
"loss": 0.1025,
"step": 603
},
{
"epoch": 0.6397457963723024,
"grad_norm": 0.6219644546508789,
"learning_rate": 9.360169491525426e-06,
"loss": 0.1012,
"step": 604
},
{
"epoch": 0.6408049781543758,
"grad_norm": 0.5811134576797485,
"learning_rate": 9.359110169491527e-06,
"loss": 0.1008,
"step": 605
},
{
"epoch": 0.6418641599364491,
"grad_norm": 0.4735073149204254,
"learning_rate": 9.358050847457629e-06,
"loss": 0.107,
"step": 606
},
{
"epoch": 0.6429233417185224,
"grad_norm": 0.42501819133758545,
"learning_rate": 9.35699152542373e-06,
"loss": 0.1035,
"step": 607
},
{
"epoch": 0.6439825235005958,
"grad_norm": 0.5019701719284058,
"learning_rate": 9.355932203389831e-06,
"loss": 0.1002,
"step": 608
},
{
"epoch": 0.6450417052826691,
"grad_norm": 0.29166609048843384,
"learning_rate": 9.354872881355933e-06,
"loss": 0.1006,
"step": 609
},
{
"epoch": 0.6461008870647424,
"grad_norm": 0.8186270594596863,
"learning_rate": 9.353813559322034e-06,
"loss": 0.1038,
"step": 610
},
{
"epoch": 0.6471600688468159,
"grad_norm": 0.9089385867118835,
"learning_rate": 9.352754237288136e-06,
"loss": 0.1045,
"step": 611
},
{
"epoch": 0.6482192506288892,
"grad_norm": 0.54569011926651,
"learning_rate": 9.351694915254239e-06,
"loss": 0.1012,
"step": 612
},
{
"epoch": 0.6492784324109625,
"grad_norm": 0.6908009052276611,
"learning_rate": 9.35063559322034e-06,
"loss": 0.1025,
"step": 613
},
{
"epoch": 0.6503376141930359,
"grad_norm": 0.5601445436477661,
"learning_rate": 9.349576271186442e-06,
"loss": 0.1054,
"step": 614
},
{
"epoch": 0.6513967959751092,
"grad_norm": 0.5484585165977478,
"learning_rate": 9.348516949152543e-06,
"loss": 0.1025,
"step": 615
},
{
"epoch": 0.6524559777571826,
"grad_norm": 0.3607555627822876,
"learning_rate": 9.347457627118644e-06,
"loss": 0.1029,
"step": 616
},
{
"epoch": 0.6535151595392559,
"grad_norm": 0.8862431049346924,
"learning_rate": 9.346398305084746e-06,
"loss": 0.1002,
"step": 617
},
{
"epoch": 0.6545743413213293,
"grad_norm": 0.34155333042144775,
"learning_rate": 9.345338983050847e-06,
"loss": 0.104,
"step": 618
},
{
"epoch": 0.6556335231034026,
"grad_norm": 0.33383896946907043,
"learning_rate": 9.34427966101695e-06,
"loss": 0.1045,
"step": 619
},
{
"epoch": 0.6566927048854759,
"grad_norm": 0.7721969485282898,
"learning_rate": 9.343220338983052e-06,
"loss": 0.1041,
"step": 620
},
{
"epoch": 0.6577518866675494,
"grad_norm": 0.341325581073761,
"learning_rate": 9.342161016949153e-06,
"loss": 0.1061,
"step": 621
},
{
"epoch": 0.6588110684496227,
"grad_norm": 0.4059706926345825,
"learning_rate": 9.341101694915256e-06,
"loss": 0.1019,
"step": 622
},
{
"epoch": 0.659870250231696,
"grad_norm": 0.425484836101532,
"learning_rate": 9.340042372881358e-06,
"loss": 0.1028,
"step": 623
},
{
"epoch": 0.6609294320137694,
"grad_norm": 0.3428255617618561,
"learning_rate": 9.338983050847459e-06,
"loss": 0.0983,
"step": 624
},
{
"epoch": 0.6619886137958427,
"grad_norm": 0.43302783370018005,
"learning_rate": 9.33792372881356e-06,
"loss": 0.1041,
"step": 625
},
{
"epoch": 0.663047795577916,
"grad_norm": 0.4151793122291565,
"learning_rate": 9.336864406779662e-06,
"loss": 0.1053,
"step": 626
},
{
"epoch": 0.6641069773599894,
"grad_norm": 0.3322106599807739,
"learning_rate": 9.335805084745763e-06,
"loss": 0.0977,
"step": 627
},
{
"epoch": 0.6651661591420628,
"grad_norm": 0.3826170265674591,
"learning_rate": 9.334745762711865e-06,
"loss": 0.0987,
"step": 628
},
{
"epoch": 0.6662253409241361,
"grad_norm": 0.45868292450904846,
"learning_rate": 9.333686440677968e-06,
"loss": 0.0975,
"step": 629
},
{
"epoch": 0.6672845227062094,
"grad_norm": 0.9810293912887573,
"learning_rate": 9.33262711864407e-06,
"loss": 0.1012,
"step": 630
},
{
"epoch": 0.6683437044882828,
"grad_norm": 0.6601435542106628,
"learning_rate": 9.33156779661017e-06,
"loss": 0.0984,
"step": 631
},
{
"epoch": 0.6694028862703562,
"grad_norm": 0.35631003975868225,
"learning_rate": 9.330508474576272e-06,
"loss": 0.1006,
"step": 632
},
{
"epoch": 0.6704620680524295,
"grad_norm": 0.5122131705284119,
"learning_rate": 9.329449152542373e-06,
"loss": 0.0995,
"step": 633
},
{
"epoch": 0.6715212498345029,
"grad_norm": 0.4161342680454254,
"learning_rate": 9.328389830508475e-06,
"loss": 0.1002,
"step": 634
},
{
"epoch": 0.6725804316165762,
"grad_norm": 0.39421340823173523,
"learning_rate": 9.327330508474576e-06,
"loss": 0.099,
"step": 635
},
{
"epoch": 0.6736396133986495,
"grad_norm": 0.3188948631286621,
"learning_rate": 9.32627118644068e-06,
"loss": 0.096,
"step": 636
},
{
"epoch": 0.6746987951807228,
"grad_norm": 0.25863417983055115,
"learning_rate": 9.32521186440678e-06,
"loss": 0.0998,
"step": 637
},
{
"epoch": 0.6757579769627963,
"grad_norm": 0.3487548232078552,
"learning_rate": 9.324152542372882e-06,
"loss": 0.098,
"step": 638
},
{
"epoch": 0.6768171587448696,
"grad_norm": 1.3317673206329346,
"learning_rate": 9.323093220338984e-06,
"loss": 0.1035,
"step": 639
},
{
"epoch": 0.6778763405269429,
"grad_norm": 0.9507758021354675,
"learning_rate": 9.322033898305085e-06,
"loss": 0.1063,
"step": 640
},
{
"epoch": 0.6789355223090163,
"grad_norm": 0.49101585149765015,
"learning_rate": 9.320974576271186e-06,
"loss": 0.0989,
"step": 641
},
{
"epoch": 0.6799947040910896,
"grad_norm": 0.3187989294528961,
"learning_rate": 9.319915254237288e-06,
"loss": 0.0992,
"step": 642
},
{
"epoch": 0.681053885873163,
"grad_norm": 0.3617320656776428,
"learning_rate": 9.318855932203391e-06,
"loss": 0.0989,
"step": 643
},
{
"epoch": 0.6821130676552364,
"grad_norm": 0.34317803382873535,
"learning_rate": 9.317796610169492e-06,
"loss": 0.097,
"step": 644
},
{
"epoch": 0.6831722494373097,
"grad_norm": 0.3169088363647461,
"learning_rate": 9.316737288135594e-06,
"loss": 0.1021,
"step": 645
},
{
"epoch": 0.684231431219383,
"grad_norm": 0.2917908728122711,
"learning_rate": 9.315677966101697e-06,
"loss": 0.096,
"step": 646
},
{
"epoch": 0.6852906130014563,
"grad_norm": 0.4770563840866089,
"learning_rate": 9.314618644067798e-06,
"loss": 0.0973,
"step": 647
},
{
"epoch": 0.6863497947835298,
"grad_norm": 1.0763368606567383,
"learning_rate": 9.3135593220339e-06,
"loss": 0.1054,
"step": 648
},
{
"epoch": 0.6874089765656031,
"grad_norm": 1.1050769090652466,
"learning_rate": 9.312500000000001e-06,
"loss": 0.1025,
"step": 649
},
{
"epoch": 0.6884681583476764,
"grad_norm": 0.4197370111942291,
"learning_rate": 9.311440677966102e-06,
"loss": 0.1016,
"step": 650
},
{
"epoch": 0.6895273401297498,
"grad_norm": 0.29724442958831787,
"learning_rate": 9.310381355932204e-06,
"loss": 0.104,
"step": 651
},
{
"epoch": 0.6905865219118231,
"grad_norm": 0.3933386504650116,
"learning_rate": 9.309322033898305e-06,
"loss": 0.1046,
"step": 652
},
{
"epoch": 0.6916457036938964,
"grad_norm": 0.410281240940094,
"learning_rate": 9.308262711864408e-06,
"loss": 0.1016,
"step": 653
},
{
"epoch": 0.6927048854759699,
"grad_norm": 0.6148337125778198,
"learning_rate": 9.30720338983051e-06,
"loss": 0.1007,
"step": 654
},
{
"epoch": 0.6937640672580432,
"grad_norm": 0.41912174224853516,
"learning_rate": 9.306144067796611e-06,
"loss": 0.0959,
"step": 655
},
{
"epoch": 0.6948232490401165,
"grad_norm": 0.3907654583454132,
"learning_rate": 9.305084745762713e-06,
"loss": 0.0986,
"step": 656
},
{
"epoch": 0.6958824308221898,
"grad_norm": 0.24811263382434845,
"learning_rate": 9.304025423728814e-06,
"loss": 0.0963,
"step": 657
},
{
"epoch": 0.6969416126042632,
"grad_norm": 0.4646623134613037,
"learning_rate": 9.302966101694915e-06,
"loss": 0.0973,
"step": 658
},
{
"epoch": 0.6980007943863366,
"grad_norm": 0.873497486114502,
"learning_rate": 9.301906779661017e-06,
"loss": 0.0996,
"step": 659
},
{
"epoch": 0.6990599761684099,
"grad_norm": 0.5654221773147583,
"learning_rate": 9.300847457627118e-06,
"loss": 0.1005,
"step": 660
},
{
"epoch": 0.7001191579504833,
"grad_norm": 0.3629545271396637,
"learning_rate": 9.299788135593221e-06,
"loss": 0.0984,
"step": 661
},
{
"epoch": 0.7011783397325566,
"grad_norm": 0.3742941617965698,
"learning_rate": 9.298728813559323e-06,
"loss": 0.0957,
"step": 662
},
{
"epoch": 0.7022375215146299,
"grad_norm": 0.42546311020851135,
"learning_rate": 9.297669491525424e-06,
"loss": 0.1005,
"step": 663
},
{
"epoch": 0.7032967032967034,
"grad_norm": 0.42576131224632263,
"learning_rate": 9.296610169491527e-06,
"loss": 0.1009,
"step": 664
},
{
"epoch": 0.7043558850787767,
"grad_norm": 0.3426741659641266,
"learning_rate": 9.295550847457629e-06,
"loss": 0.1016,
"step": 665
},
{
"epoch": 0.70541506686085,
"grad_norm": 0.5747078657150269,
"learning_rate": 9.29449152542373e-06,
"loss": 0.0988,
"step": 666
},
{
"epoch": 0.7064742486429233,
"grad_norm": 0.8558834791183472,
"learning_rate": 9.293432203389832e-06,
"loss": 0.0997,
"step": 667
},
{
"epoch": 0.7075334304249967,
"grad_norm": 0.42733004689216614,
"learning_rate": 9.292372881355933e-06,
"loss": 0.0977,
"step": 668
},
{
"epoch": 0.70859261220707,
"grad_norm": 0.4254518449306488,
"learning_rate": 9.291313559322034e-06,
"loss": 0.1014,
"step": 669
},
{
"epoch": 0.7096517939891434,
"grad_norm": 0.4694596529006958,
"learning_rate": 9.290254237288136e-06,
"loss": 0.0976,
"step": 670
},
{
"epoch": 0.7107109757712168,
"grad_norm": 0.4888492226600647,
"learning_rate": 9.289194915254239e-06,
"loss": 0.1005,
"step": 671
},
{
"epoch": 0.7117701575532901,
"grad_norm": 0.34919607639312744,
"learning_rate": 9.28813559322034e-06,
"loss": 0.1037,
"step": 672
},
{
"epoch": 0.7128293393353634,
"grad_norm": 0.37741026282310486,
"learning_rate": 9.287076271186442e-06,
"loss": 0.0982,
"step": 673
},
{
"epoch": 0.7138885211174367,
"grad_norm": 0.3407898247241974,
"learning_rate": 9.286016949152543e-06,
"loss": 0.0969,
"step": 674
},
{
"epoch": 0.7149477028995102,
"grad_norm": 0.4599168300628662,
"learning_rate": 9.284957627118645e-06,
"loss": 0.0985,
"step": 675
},
{
"epoch": 0.7160068846815835,
"grad_norm": 1.6454333066940308,
"learning_rate": 9.283898305084746e-06,
"loss": 0.1072,
"step": 676
},
{
"epoch": 0.7170660664636568,
"grad_norm": 0.4284563362598419,
"learning_rate": 9.282838983050847e-06,
"loss": 0.1034,
"step": 677
},
{
"epoch": 0.7181252482457302,
"grad_norm": 0.4040115475654602,
"learning_rate": 9.28177966101695e-06,
"loss": 0.1005,
"step": 678
},
{
"epoch": 0.7191844300278035,
"grad_norm": 0.31793713569641113,
"learning_rate": 9.280720338983052e-06,
"loss": 0.1022,
"step": 679
},
{
"epoch": 0.7202436118098768,
"grad_norm": 0.314280241727829,
"learning_rate": 9.279661016949153e-06,
"loss": 0.0973,
"step": 680
},
{
"epoch": 0.7213027935919503,
"grad_norm": 0.3845478296279907,
"learning_rate": 9.278601694915255e-06,
"loss": 0.1012,
"step": 681
},
{
"epoch": 0.7223619753740236,
"grad_norm": 0.5738639235496521,
"learning_rate": 9.277542372881356e-06,
"loss": 0.1017,
"step": 682
},
{
"epoch": 0.7234211571560969,
"grad_norm": 0.5146239995956421,
"learning_rate": 9.276483050847457e-06,
"loss": 0.0998,
"step": 683
},
{
"epoch": 0.7244803389381702,
"grad_norm": 0.31752634048461914,
"learning_rate": 9.275423728813559e-06,
"loss": 0.099,
"step": 684
},
{
"epoch": 0.7255395207202436,
"grad_norm": 1.2720450162887573,
"learning_rate": 9.274364406779662e-06,
"loss": 0.102,
"step": 685
},
{
"epoch": 0.726598702502317,
"grad_norm": 0.5716597437858582,
"learning_rate": 9.273305084745763e-06,
"loss": 0.0986,
"step": 686
},
{
"epoch": 0.7276578842843903,
"grad_norm": 0.8185603022575378,
"learning_rate": 9.272245762711865e-06,
"loss": 0.0992,
"step": 687
},
{
"epoch": 0.7287170660664637,
"grad_norm": 0.6696334481239319,
"learning_rate": 9.271186440677968e-06,
"loss": 0.0968,
"step": 688
},
{
"epoch": 0.729776247848537,
"grad_norm": 0.31820279359817505,
"learning_rate": 9.27012711864407e-06,
"loss": 0.0961,
"step": 689
},
{
"epoch": 0.7308354296306103,
"grad_norm": 0.38118937611579895,
"learning_rate": 9.26906779661017e-06,
"loss": 0.0995,
"step": 690
},
{
"epoch": 0.7318946114126837,
"grad_norm": 0.3723813593387604,
"learning_rate": 9.268008474576272e-06,
"loss": 0.0943,
"step": 691
},
{
"epoch": 0.7329537931947571,
"grad_norm": 0.2856021225452423,
"learning_rate": 9.266949152542374e-06,
"loss": 0.0944,
"step": 692
},
{
"epoch": 0.7340129749768304,
"grad_norm": 0.28382545709609985,
"learning_rate": 9.265889830508475e-06,
"loss": 0.0965,
"step": 693
},
{
"epoch": 0.7350721567589037,
"grad_norm": 0.40742388367652893,
"learning_rate": 9.264830508474576e-06,
"loss": 0.0937,
"step": 694
},
{
"epoch": 0.7361313385409771,
"grad_norm": 1.0674604177474976,
"learning_rate": 9.26377118644068e-06,
"loss": 0.1037,
"step": 695
},
{
"epoch": 0.7371905203230504,
"grad_norm": 0.8326146006584167,
"learning_rate": 9.262711864406781e-06,
"loss": 0.1003,
"step": 696
},
{
"epoch": 0.7382497021051238,
"grad_norm": 0.3166416585445404,
"learning_rate": 9.261652542372882e-06,
"loss": 0.0946,
"step": 697
},
{
"epoch": 0.7393088838871972,
"grad_norm": 0.3471001088619232,
"learning_rate": 9.260593220338984e-06,
"loss": 0.1024,
"step": 698
},
{
"epoch": 0.7403680656692705,
"grad_norm": 0.48021578788757324,
"learning_rate": 9.259533898305085e-06,
"loss": 0.1033,
"step": 699
},
{
"epoch": 0.7414272474513438,
"grad_norm": 0.40537741780281067,
"learning_rate": 9.258474576271187e-06,
"loss": 0.0966,
"step": 700
},
{
"epoch": 0.7424864292334172,
"grad_norm": 0.32999980449676514,
"learning_rate": 9.257415254237288e-06,
"loss": 0.1037,
"step": 701
},
{
"epoch": 0.7435456110154905,
"grad_norm": 0.31000056862831116,
"learning_rate": 9.256355932203391e-06,
"loss": 0.1036,
"step": 702
},
{
"epoch": 0.7446047927975639,
"grad_norm": 0.931529700756073,
"learning_rate": 9.255296610169492e-06,
"loss": 0.0996,
"step": 703
},
{
"epoch": 0.7456639745796372,
"grad_norm": 0.9548348188400269,
"learning_rate": 9.254237288135594e-06,
"loss": 0.0993,
"step": 704
},
{
"epoch": 0.7467231563617106,
"grad_norm": 0.4264669120311737,
"learning_rate": 9.253177966101695e-06,
"loss": 0.1025,
"step": 705
},
{
"epoch": 0.7477823381437839,
"grad_norm": 0.483395516872406,
"learning_rate": 9.252118644067798e-06,
"loss": 0.0998,
"step": 706
},
{
"epoch": 0.7488415199258572,
"grad_norm": 0.8699389696121216,
"learning_rate": 9.2510593220339e-06,
"loss": 0.0972,
"step": 707
},
{
"epoch": 0.7499007017079307,
"grad_norm": 0.6093174815177917,
"learning_rate": 9.250000000000001e-06,
"loss": 0.1021,
"step": 708
},
{
"epoch": 0.750959883490004,
"grad_norm": 0.3400423526763916,
"learning_rate": 9.248940677966103e-06,
"loss": 0.0965,
"step": 709
},
{
"epoch": 0.7520190652720773,
"grad_norm": 0.2628816068172455,
"learning_rate": 9.247881355932204e-06,
"loss": 0.0942,
"step": 710
},
{
"epoch": 0.7530782470541507,
"grad_norm": 0.5596092343330383,
"learning_rate": 9.246822033898305e-06,
"loss": 0.0985,
"step": 711
},
{
"epoch": 0.754137428836224,
"grad_norm": 0.3505241870880127,
"learning_rate": 9.245762711864409e-06,
"loss": 0.0977,
"step": 712
},
{
"epoch": 0.7551966106182973,
"grad_norm": 1.2891738414764404,
"learning_rate": 9.24470338983051e-06,
"loss": 0.1018,
"step": 713
},
{
"epoch": 0.7562557924003707,
"grad_norm": 0.7353067994117737,
"learning_rate": 9.243644067796611e-06,
"loss": 0.0998,
"step": 714
},
{
"epoch": 0.7573149741824441,
"grad_norm": 0.5030686259269714,
"learning_rate": 9.242584745762713e-06,
"loss": 0.0948,
"step": 715
},
{
"epoch": 0.7583741559645174,
"grad_norm": 0.3368113338947296,
"learning_rate": 9.241525423728814e-06,
"loss": 0.0965,
"step": 716
},
{
"epoch": 0.7594333377465907,
"grad_norm": 0.42246052622795105,
"learning_rate": 9.240466101694916e-06,
"loss": 0.0975,
"step": 717
},
{
"epoch": 0.7604925195286641,
"grad_norm": 0.4807589650154114,
"learning_rate": 9.239406779661017e-06,
"loss": 0.0996,
"step": 718
},
{
"epoch": 0.7615517013107375,
"grad_norm": 0.9803975224494934,
"learning_rate": 9.238347457627118e-06,
"loss": 0.102,
"step": 719
},
{
"epoch": 0.7626108830928108,
"grad_norm": 0.35242709517478943,
"learning_rate": 9.237288135593222e-06,
"loss": 0.1007,
"step": 720
},
{
"epoch": 0.7636700648748842,
"grad_norm": 0.42026689648628235,
"learning_rate": 9.236228813559323e-06,
"loss": 0.095,
"step": 721
},
{
"epoch": 0.7647292466569575,
"grad_norm": 0.9179818034172058,
"learning_rate": 9.235169491525424e-06,
"loss": 0.0978,
"step": 722
},
{
"epoch": 0.7657884284390308,
"grad_norm": 0.645462155342102,
"learning_rate": 9.234110169491526e-06,
"loss": 0.0983,
"step": 723
},
{
"epoch": 0.7668476102211041,
"grad_norm": 1.0488462448120117,
"learning_rate": 9.233050847457627e-06,
"loss": 0.0978,
"step": 724
},
{
"epoch": 0.7679067920031776,
"grad_norm": 0.37248262763023376,
"learning_rate": 9.231991525423729e-06,
"loss": 0.1027,
"step": 725
},
{
"epoch": 0.7689659737852509,
"grad_norm": 0.4381054937839508,
"learning_rate": 9.23093220338983e-06,
"loss": 0.101,
"step": 726
},
{
"epoch": 0.7700251555673242,
"grad_norm": 0.428743839263916,
"learning_rate": 9.229872881355933e-06,
"loss": 0.0997,
"step": 727
},
{
"epoch": 0.7710843373493976,
"grad_norm": 0.455432653427124,
"learning_rate": 9.228813559322035e-06,
"loss": 0.1007,
"step": 728
},
{
"epoch": 0.772143519131471,
"grad_norm": 0.4211903512477875,
"learning_rate": 9.227754237288138e-06,
"loss": 0.1019,
"step": 729
},
{
"epoch": 0.7732027009135443,
"grad_norm": 0.3063182830810547,
"learning_rate": 9.226694915254239e-06,
"loss": 0.098,
"step": 730
},
{
"epoch": 0.7742618826956177,
"grad_norm": 0.37543249130249023,
"learning_rate": 9.22563559322034e-06,
"loss": 0.0957,
"step": 731
},
{
"epoch": 0.775321064477691,
"grad_norm": 1.2880802154541016,
"learning_rate": 9.224576271186442e-06,
"loss": 0.1037,
"step": 732
},
{
"epoch": 0.7763802462597643,
"grad_norm": 0.6766379475593567,
"learning_rate": 9.223516949152543e-06,
"loss": 0.0988,
"step": 733
},
{
"epoch": 0.7774394280418376,
"grad_norm": 0.5379982590675354,
"learning_rate": 9.222457627118645e-06,
"loss": 0.0977,
"step": 734
},
{
"epoch": 0.7784986098239111,
"grad_norm": 0.35098257660865784,
"learning_rate": 9.221398305084746e-06,
"loss": 0.1021,
"step": 735
},
{
"epoch": 0.7795577916059844,
"grad_norm": 0.3439309298992157,
"learning_rate": 9.220338983050847e-06,
"loss": 0.0996,
"step": 736
},
{
"epoch": 0.7806169733880577,
"grad_norm": 0.38784995675086975,
"learning_rate": 9.21927966101695e-06,
"loss": 0.0969,
"step": 737
},
{
"epoch": 0.7816761551701311,
"grad_norm": 0.3558436930179596,
"learning_rate": 9.218220338983052e-06,
"loss": 0.0995,
"step": 738
},
{
"epoch": 0.7827353369522044,
"grad_norm": 0.31726735830307007,
"learning_rate": 9.217161016949153e-06,
"loss": 0.0968,
"step": 739
},
{
"epoch": 0.7837945187342777,
"grad_norm": 0.3207642138004303,
"learning_rate": 9.216101694915255e-06,
"loss": 0.0983,
"step": 740
},
{
"epoch": 0.7848537005163512,
"grad_norm": 0.3509203791618347,
"learning_rate": 9.215042372881356e-06,
"loss": 0.0951,
"step": 741
},
{
"epoch": 0.7859128822984245,
"grad_norm": 0.8775836229324341,
"learning_rate": 9.213983050847458e-06,
"loss": 0.1061,
"step": 742
},
{
"epoch": 0.7869720640804978,
"grad_norm": 1.3842896223068237,
"learning_rate": 9.212923728813559e-06,
"loss": 0.1037,
"step": 743
},
{
"epoch": 0.7880312458625711,
"grad_norm": 0.8937103152275085,
"learning_rate": 9.211864406779662e-06,
"loss": 0.0983,
"step": 744
},
{
"epoch": 0.7890904276446445,
"grad_norm": 1.2839686870574951,
"learning_rate": 9.210805084745764e-06,
"loss": 0.1005,
"step": 745
},
{
"epoch": 0.7901496094267179,
"grad_norm": 0.5457701683044434,
"learning_rate": 9.209745762711865e-06,
"loss": 0.0983,
"step": 746
},
{
"epoch": 0.7912087912087912,
"grad_norm": 0.533908486366272,
"learning_rate": 9.208686440677966e-06,
"loss": 0.1009,
"step": 747
},
{
"epoch": 0.7922679729908646,
"grad_norm": 0.4222644865512848,
"learning_rate": 9.207627118644068e-06,
"loss": 0.0978,
"step": 748
},
{
"epoch": 0.7933271547729379,
"grad_norm": 0.3377843499183655,
"learning_rate": 9.206567796610171e-06,
"loss": 0.0966,
"step": 749
},
{
"epoch": 0.7943863365550112,
"grad_norm": 0.3032763600349426,
"learning_rate": 9.205508474576272e-06,
"loss": 0.0983,
"step": 750
},
{
"epoch": 0.7954455183370847,
"grad_norm": 0.8250672817230225,
"learning_rate": 9.204449152542374e-06,
"loss": 0.1021,
"step": 751
},
{
"epoch": 0.796504700119158,
"grad_norm": 0.9752795100212097,
"learning_rate": 9.203389830508475e-06,
"loss": 0.1019,
"step": 752
},
{
"epoch": 0.7975638819012313,
"grad_norm": 0.6035350561141968,
"learning_rate": 9.202330508474577e-06,
"loss": 0.0981,
"step": 753
},
{
"epoch": 0.7986230636833046,
"grad_norm": 0.2572724521160126,
"learning_rate": 9.20127118644068e-06,
"loss": 0.097,
"step": 754
},
{
"epoch": 0.799682245465378,
"grad_norm": 0.34515053033828735,
"learning_rate": 9.200211864406781e-06,
"loss": 0.1,
"step": 755
},
{
"epoch": 0.8007414272474513,
"grad_norm": 0.4827374219894409,
"learning_rate": 9.199152542372882e-06,
"loss": 0.0979,
"step": 756
},
{
"epoch": 0.8018006090295247,
"grad_norm": 0.3313664197921753,
"learning_rate": 9.198093220338984e-06,
"loss": 0.0985,
"step": 757
},
{
"epoch": 0.8028597908115981,
"grad_norm": 0.8202570080757141,
"learning_rate": 9.197033898305085e-06,
"loss": 0.1025,
"step": 758
},
{
"epoch": 0.8039189725936714,
"grad_norm": 0.29763662815093994,
"learning_rate": 9.195974576271187e-06,
"loss": 0.099,
"step": 759
},
{
"epoch": 0.8049781543757447,
"grad_norm": 0.4088769853115082,
"learning_rate": 9.194915254237288e-06,
"loss": 0.0978,
"step": 760
},
{
"epoch": 0.8060373361578181,
"grad_norm": 0.31369948387145996,
"learning_rate": 9.193855932203391e-06,
"loss": 0.0996,
"step": 761
},
{
"epoch": 0.8070965179398915,
"grad_norm": 0.5770434737205505,
"learning_rate": 9.192796610169493e-06,
"loss": 0.1017,
"step": 762
},
{
"epoch": 0.8081556997219648,
"grad_norm": 1.0593013763427734,
"learning_rate": 9.191737288135594e-06,
"loss": 0.1015,
"step": 763
},
{
"epoch": 0.8092148815040381,
"grad_norm": 0.385418564081192,
"learning_rate": 9.190677966101695e-06,
"loss": 0.0963,
"step": 764
},
{
"epoch": 0.8102740632861115,
"grad_norm": 0.2897985279560089,
"learning_rate": 9.189618644067797e-06,
"loss": 0.0987,
"step": 765
},
{
"epoch": 0.8113332450681848,
"grad_norm": 0.2844506800174713,
"learning_rate": 9.188559322033898e-06,
"loss": 0.0977,
"step": 766
},
{
"epoch": 0.8123924268502581,
"grad_norm": 0.33510622382164,
"learning_rate": 9.1875e-06,
"loss": 0.0952,
"step": 767
},
{
"epoch": 0.8134516086323316,
"grad_norm": 0.37175965309143066,
"learning_rate": 9.186440677966101e-06,
"loss": 0.096,
"step": 768
},
{
"epoch": 0.8145107904144049,
"grad_norm": 0.38538381457328796,
"learning_rate": 9.185381355932204e-06,
"loss": 0.098,
"step": 769
},
{
"epoch": 0.8155699721964782,
"grad_norm": 1.1351072788238525,
"learning_rate": 9.184322033898306e-06,
"loss": 0.0953,
"step": 770
},
{
"epoch": 0.8166291539785515,
"grad_norm": 0.7433465123176575,
"learning_rate": 9.183262711864409e-06,
"loss": 0.0979,
"step": 771
},
{
"epoch": 0.8176883357606249,
"grad_norm": 0.5828851461410522,
"learning_rate": 9.18220338983051e-06,
"loss": 0.096,
"step": 772
},
{
"epoch": 0.8187475175426983,
"grad_norm": 0.350429505109787,
"learning_rate": 9.181144067796612e-06,
"loss": 0.0944,
"step": 773
},
{
"epoch": 0.8198066993247716,
"grad_norm": 0.5436673760414124,
"learning_rate": 9.180084745762713e-06,
"loss": 0.093,
"step": 774
},
{
"epoch": 0.820865881106845,
"grad_norm": 0.3649758994579315,
"learning_rate": 9.179025423728814e-06,
"loss": 0.0964,
"step": 775
},
{
"epoch": 0.8219250628889183,
"grad_norm": 0.3699047863483429,
"learning_rate": 9.177966101694916e-06,
"loss": 0.1,
"step": 776
},
{
"epoch": 0.8229842446709916,
"grad_norm": 0.5588839054107666,
"learning_rate": 9.176906779661017e-06,
"loss": 0.0982,
"step": 777
},
{
"epoch": 0.824043426453065,
"grad_norm": 0.28516167402267456,
"learning_rate": 9.17584745762712e-06,
"loss": 0.0948,
"step": 778
},
{
"epoch": 0.8251026082351384,
"grad_norm": 0.5804077386856079,
"learning_rate": 9.174788135593222e-06,
"loss": 0.0968,
"step": 779
},
{
"epoch": 0.8261617900172117,
"grad_norm": 0.5543787479400635,
"learning_rate": 9.173728813559323e-06,
"loss": 0.0951,
"step": 780
},
{
"epoch": 0.827220971799285,
"grad_norm": 0.8272714018821716,
"learning_rate": 9.172669491525425e-06,
"loss": 0.0981,
"step": 781
},
{
"epoch": 0.8282801535813584,
"grad_norm": 0.8714408278465271,
"learning_rate": 9.171610169491526e-06,
"loss": 0.096,
"step": 782
},
{
"epoch": 0.8293393353634317,
"grad_norm": 0.34228700399398804,
"learning_rate": 9.170550847457627e-06,
"loss": 0.101,
"step": 783
},
{
"epoch": 0.830398517145505,
"grad_norm": 0.6962174773216248,
"learning_rate": 9.169491525423729e-06,
"loss": 0.098,
"step": 784
},
{
"epoch": 0.8314576989275785,
"grad_norm": 0.35371822118759155,
"learning_rate": 9.16843220338983e-06,
"loss": 0.1023,
"step": 785
},
{
"epoch": 0.8325168807096518,
"grad_norm": 0.36805784702301025,
"learning_rate": 9.167372881355933e-06,
"loss": 0.0958,
"step": 786
},
{
"epoch": 0.8335760624917251,
"grad_norm": 0.2960814833641052,
"learning_rate": 9.166313559322035e-06,
"loss": 0.0987,
"step": 787
},
{
"epoch": 0.8346352442737985,
"grad_norm": 0.3210856020450592,
"learning_rate": 9.165254237288136e-06,
"loss": 0.0978,
"step": 788
},
{
"epoch": 0.8356944260558719,
"grad_norm": 0.6966099143028259,
"learning_rate": 9.164194915254238e-06,
"loss": 0.0986,
"step": 789
},
{
"epoch": 0.8367536078379452,
"grad_norm": 0.86994469165802,
"learning_rate": 9.163135593220339e-06,
"loss": 0.1009,
"step": 790
},
{
"epoch": 0.8378127896200185,
"grad_norm": 1.0499882698059082,
"learning_rate": 9.162076271186442e-06,
"loss": 0.1001,
"step": 791
},
{
"epoch": 0.8388719714020919,
"grad_norm": 0.25604158639907837,
"learning_rate": 9.161016949152543e-06,
"loss": 0.0968,
"step": 792
},
{
"epoch": 0.8399311531841652,
"grad_norm": 0.2237943410873413,
"learning_rate": 9.159957627118645e-06,
"loss": 0.0974,
"step": 793
},
{
"epoch": 0.8409903349662385,
"grad_norm": 0.2961702048778534,
"learning_rate": 9.158898305084746e-06,
"loss": 0.0963,
"step": 794
},
{
"epoch": 0.842049516748312,
"grad_norm": 0.5092357397079468,
"learning_rate": 9.157838983050848e-06,
"loss": 0.0986,
"step": 795
},
{
"epoch": 0.8431086985303853,
"grad_norm": 0.8456157445907593,
"learning_rate": 9.15677966101695e-06,
"loss": 0.0989,
"step": 796
},
{
"epoch": 0.8441678803124586,
"grad_norm": 0.25902438163757324,
"learning_rate": 9.155720338983052e-06,
"loss": 0.0971,
"step": 797
},
{
"epoch": 0.845227062094532,
"grad_norm": 0.43051254749298096,
"learning_rate": 9.154661016949154e-06,
"loss": 0.094,
"step": 798
},
{
"epoch": 0.8462862438766053,
"grad_norm": 1.1956896781921387,
"learning_rate": 9.153601694915255e-06,
"loss": 0.0955,
"step": 799
},
{
"epoch": 0.8473454256586787,
"grad_norm": 0.40283483266830444,
"learning_rate": 9.152542372881356e-06,
"loss": 0.0948,
"step": 800
},
{
"epoch": 0.848404607440752,
"grad_norm": 1.097756266593933,
"learning_rate": 9.151483050847458e-06,
"loss": 0.1013,
"step": 801
},
{
"epoch": 0.8494637892228254,
"grad_norm": 0.4723232686519623,
"learning_rate": 9.15042372881356e-06,
"loss": 0.0934,
"step": 802
},
{
"epoch": 0.8505229710048987,
"grad_norm": 0.35449033975601196,
"learning_rate": 9.149364406779662e-06,
"loss": 0.0944,
"step": 803
},
{
"epoch": 0.851582152786972,
"grad_norm": 0.33567023277282715,
"learning_rate": 9.148305084745764e-06,
"loss": 0.0987,
"step": 804
},
{
"epoch": 0.8526413345690455,
"grad_norm": 0.3942662179470062,
"learning_rate": 9.147245762711865e-06,
"loss": 0.0972,
"step": 805
},
{
"epoch": 0.8537005163511188,
"grad_norm": 0.3090916872024536,
"learning_rate": 9.146186440677967e-06,
"loss": 0.0962,
"step": 806
},
{
"epoch": 0.8547596981331921,
"grad_norm": 0.7566470503807068,
"learning_rate": 9.145127118644068e-06,
"loss": 0.101,
"step": 807
},
{
"epoch": 0.8558188799152655,
"grad_norm": 0.3098606467247009,
"learning_rate": 9.14406779661017e-06,
"loss": 0.098,
"step": 808
},
{
"epoch": 0.8568780616973388,
"grad_norm": 0.6233766078948975,
"learning_rate": 9.14300847457627e-06,
"loss": 0.0964,
"step": 809
},
{
"epoch": 0.8579372434794121,
"grad_norm": 0.5337977409362793,
"learning_rate": 9.141949152542374e-06,
"loss": 0.1023,
"step": 810
},
{
"epoch": 0.8589964252614855,
"grad_norm": 0.33733704686164856,
"learning_rate": 9.140889830508475e-06,
"loss": 0.0966,
"step": 811
},
{
"epoch": 0.8600556070435589,
"grad_norm": 0.25345900654792786,
"learning_rate": 9.139830508474577e-06,
"loss": 0.0981,
"step": 812
},
{
"epoch": 0.8611147888256322,
"grad_norm": 0.42174944281578064,
"learning_rate": 9.13877118644068e-06,
"loss": 0.0975,
"step": 813
},
{
"epoch": 0.8621739706077055,
"grad_norm": 0.5487157702445984,
"learning_rate": 9.137711864406781e-06,
"loss": 0.0967,
"step": 814
},
{
"epoch": 0.8632331523897789,
"grad_norm": 0.2961113154888153,
"learning_rate": 9.136652542372883e-06,
"loss": 0.0982,
"step": 815
},
{
"epoch": 0.8642923341718523,
"grad_norm": 0.26182422041893005,
"learning_rate": 9.135593220338984e-06,
"loss": 0.0957,
"step": 816
},
{
"epoch": 0.8653515159539256,
"grad_norm": 0.2555879056453705,
"learning_rate": 9.134533898305085e-06,
"loss": 0.0891,
"step": 817
},
{
"epoch": 0.866410697735999,
"grad_norm": 0.6295573711395264,
"learning_rate": 9.133474576271187e-06,
"loss": 0.0967,
"step": 818
},
{
"epoch": 0.8674698795180723,
"grad_norm": 0.2758654057979584,
"learning_rate": 9.132415254237288e-06,
"loss": 0.0965,
"step": 819
},
{
"epoch": 0.8685290613001456,
"grad_norm": 1.2126902341842651,
"learning_rate": 9.131355932203391e-06,
"loss": 0.0979,
"step": 820
},
{
"epoch": 0.8695882430822189,
"grad_norm": 0.30555427074432373,
"learning_rate": 9.130296610169493e-06,
"loss": 0.0942,
"step": 821
},
{
"epoch": 0.8706474248642924,
"grad_norm": 0.3945145308971405,
"learning_rate": 9.129237288135594e-06,
"loss": 0.0963,
"step": 822
},
{
"epoch": 0.8717066066463657,
"grad_norm": 0.2948249876499176,
"learning_rate": 9.128177966101696e-06,
"loss": 0.0987,
"step": 823
},
{
"epoch": 0.872765788428439,
"grad_norm": 0.32726436853408813,
"learning_rate": 9.127118644067797e-06,
"loss": 0.0956,
"step": 824
},
{
"epoch": 0.8738249702105124,
"grad_norm": 0.5176602602005005,
"learning_rate": 9.126059322033898e-06,
"loss": 0.0992,
"step": 825
},
{
"epoch": 0.8748841519925857,
"grad_norm": 0.2725953459739685,
"learning_rate": 9.125e-06,
"loss": 0.0971,
"step": 826
},
{
"epoch": 0.875943333774659,
"grad_norm": 0.275778591632843,
"learning_rate": 9.123940677966103e-06,
"loss": 0.0946,
"step": 827
},
{
"epoch": 0.8770025155567325,
"grad_norm": 0.6902645826339722,
"learning_rate": 9.122881355932204e-06,
"loss": 0.0975,
"step": 828
},
{
"epoch": 0.8780616973388058,
"grad_norm": 0.5743526220321655,
"learning_rate": 9.121822033898306e-06,
"loss": 0.0978,
"step": 829
},
{
"epoch": 0.8791208791208791,
"grad_norm": 0.3797874450683594,
"learning_rate": 9.120762711864407e-06,
"loss": 0.0947,
"step": 830
},
{
"epoch": 0.8801800609029524,
"grad_norm": 0.30438700318336487,
"learning_rate": 9.119703389830509e-06,
"loss": 0.0972,
"step": 831
},
{
"epoch": 0.8812392426850258,
"grad_norm": 0.25724926590919495,
"learning_rate": 9.11864406779661e-06,
"loss": 0.0945,
"step": 832
},
{
"epoch": 0.8822984244670992,
"grad_norm": 0.3248598277568817,
"learning_rate": 9.117584745762713e-06,
"loss": 0.0938,
"step": 833
},
{
"epoch": 0.8833576062491725,
"grad_norm": 0.40238186717033386,
"learning_rate": 9.116525423728815e-06,
"loss": 0.0943,
"step": 834
},
{
"epoch": 0.8844167880312459,
"grad_norm": 0.4883701801300049,
"learning_rate": 9.115466101694916e-06,
"loss": 0.0982,
"step": 835
},
{
"epoch": 0.8854759698133192,
"grad_norm": 0.46604031324386597,
"learning_rate": 9.114406779661017e-06,
"loss": 0.0968,
"step": 836
},
{
"epoch": 0.8865351515953925,
"grad_norm": 0.32364338636398315,
"learning_rate": 9.11334745762712e-06,
"loss": 0.0978,
"step": 837
},
{
"epoch": 0.887594333377466,
"grad_norm": 0.2980561852455139,
"learning_rate": 9.112288135593222e-06,
"loss": 0.0939,
"step": 838
},
{
"epoch": 0.8886535151595393,
"grad_norm": 0.2934180796146393,
"learning_rate": 9.111228813559323e-06,
"loss": 0.0943,
"step": 839
},
{
"epoch": 0.8897126969416126,
"grad_norm": 0.3933320939540863,
"learning_rate": 9.110169491525425e-06,
"loss": 0.092,
"step": 840
},
{
"epoch": 0.8907718787236859,
"grad_norm": 0.5394145250320435,
"learning_rate": 9.109110169491526e-06,
"loss": 0.0966,
"step": 841
},
{
"epoch": 0.8918310605057593,
"grad_norm": 0.4660944938659668,
"learning_rate": 9.108050847457628e-06,
"loss": 0.0971,
"step": 842
},
{
"epoch": 0.8928902422878326,
"grad_norm": 0.2673965096473694,
"learning_rate": 9.106991525423729e-06,
"loss": 0.0956,
"step": 843
},
{
"epoch": 0.893949424069906,
"grad_norm": 0.9778403043746948,
"learning_rate": 9.10593220338983e-06,
"loss": 0.0956,
"step": 844
},
{
"epoch": 0.8950086058519794,
"grad_norm": 0.5092254877090454,
"learning_rate": 9.104872881355933e-06,
"loss": 0.0954,
"step": 845
},
{
"epoch": 0.8960677876340527,
"grad_norm": 0.2559935450553894,
"learning_rate": 9.103813559322035e-06,
"loss": 0.0928,
"step": 846
},
{
"epoch": 0.897126969416126,
"grad_norm": 0.2735789716243744,
"learning_rate": 9.102754237288136e-06,
"loss": 0.0941,
"step": 847
},
{
"epoch": 0.8981861511981994,
"grad_norm": 0.6607424020767212,
"learning_rate": 9.101694915254238e-06,
"loss": 0.0934,
"step": 848
},
{
"epoch": 0.8992453329802728,
"grad_norm": 0.5593097805976868,
"learning_rate": 9.100635593220339e-06,
"loss": 0.0956,
"step": 849
},
{
"epoch": 0.9003045147623461,
"grad_norm": 0.7994453310966492,
"learning_rate": 9.09957627118644e-06,
"loss": 0.0962,
"step": 850
},
{
"epoch": 0.9013636965444194,
"grad_norm": 0.29829198122024536,
"learning_rate": 9.098516949152542e-06,
"loss": 0.0971,
"step": 851
},
{
"epoch": 0.9024228783264928,
"grad_norm": 0.3302725851535797,
"learning_rate": 9.097457627118645e-06,
"loss": 0.0945,
"step": 852
},
{
"epoch": 0.9034820601085661,
"grad_norm": 0.3341391682624817,
"learning_rate": 9.096398305084746e-06,
"loss": 0.0933,
"step": 853
},
{
"epoch": 0.9045412418906394,
"grad_norm": 0.45615267753601074,
"learning_rate": 9.09533898305085e-06,
"loss": 0.0945,
"step": 854
},
{
"epoch": 0.9056004236727129,
"grad_norm": 0.3367563784122467,
"learning_rate": 9.094279661016951e-06,
"loss": 0.096,
"step": 855
},
{
"epoch": 0.9066596054547862,
"grad_norm": 0.4675491154193878,
"learning_rate": 9.093220338983052e-06,
"loss": 0.0921,
"step": 856
},
{
"epoch": 0.9077187872368595,
"grad_norm": 1.0834640264511108,
"learning_rate": 9.092161016949154e-06,
"loss": 0.0967,
"step": 857
},
{
"epoch": 0.9087779690189329,
"grad_norm": 0.336375892162323,
"learning_rate": 9.091101694915255e-06,
"loss": 0.0976,
"step": 858
},
{
"epoch": 0.9098371508010062,
"grad_norm": 0.5236802101135254,
"learning_rate": 9.090042372881357e-06,
"loss": 0.0926,
"step": 859
},
{
"epoch": 0.9108963325830796,
"grad_norm": 0.30341917276382446,
"learning_rate": 9.088983050847458e-06,
"loss": 0.0929,
"step": 860
},
{
"epoch": 0.9119555143651529,
"grad_norm": 0.33530858159065247,
"learning_rate": 9.08792372881356e-06,
"loss": 0.0966,
"step": 861
},
{
"epoch": 0.9130146961472263,
"grad_norm": 0.39219993352890015,
"learning_rate": 9.086864406779663e-06,
"loss": 0.0964,
"step": 862
},
{
"epoch": 0.9140738779292996,
"grad_norm": 1.3592567443847656,
"learning_rate": 9.085805084745764e-06,
"loss": 0.1003,
"step": 863
},
{
"epoch": 0.9151330597113729,
"grad_norm": 0.28577157855033875,
"learning_rate": 9.084745762711865e-06,
"loss": 0.1003,
"step": 864
},
{
"epoch": 0.9161922414934464,
"grad_norm": 0.2658151686191559,
"learning_rate": 9.083686440677967e-06,
"loss": 0.0993,
"step": 865
},
{
"epoch": 0.9172514232755197,
"grad_norm": 0.26816198229789734,
"learning_rate": 9.082627118644068e-06,
"loss": 0.0915,
"step": 866
},
{
"epoch": 0.918310605057593,
"grad_norm": 0.5962366461753845,
"learning_rate": 9.08156779661017e-06,
"loss": 0.0972,
"step": 867
},
{
"epoch": 0.9193697868396663,
"grad_norm": 0.2852391302585602,
"learning_rate": 9.080508474576271e-06,
"loss": 0.0958,
"step": 868
},
{
"epoch": 0.9204289686217397,
"grad_norm": 0.44219645857810974,
"learning_rate": 9.079449152542374e-06,
"loss": 0.0978,
"step": 869
},
{
"epoch": 0.921488150403813,
"grad_norm": 0.27444911003112793,
"learning_rate": 9.078389830508476e-06,
"loss": 0.0942,
"step": 870
},
{
"epoch": 0.9225473321858864,
"grad_norm": 0.5224287509918213,
"learning_rate": 9.077330508474577e-06,
"loss": 0.0943,
"step": 871
},
{
"epoch": 0.9236065139679598,
"grad_norm": 0.2701222896575928,
"learning_rate": 9.076271186440678e-06,
"loss": 0.0936,
"step": 872
},
{
"epoch": 0.9246656957500331,
"grad_norm": 0.3342016637325287,
"learning_rate": 9.07521186440678e-06,
"loss": 0.0894,
"step": 873
},
{
"epoch": 0.9257248775321064,
"grad_norm": 0.30203700065612793,
"learning_rate": 9.074152542372881e-06,
"loss": 0.0958,
"step": 874
},
{
"epoch": 0.9267840593141798,
"grad_norm": 0.5597049593925476,
"learning_rate": 9.073093220338984e-06,
"loss": 0.0937,
"step": 875
},
{
"epoch": 0.9278432410962532,
"grad_norm": 0.3891024589538574,
"learning_rate": 9.072033898305086e-06,
"loss": 0.0924,
"step": 876
},
{
"epoch": 0.9289024228783265,
"grad_norm": 0.8529596924781799,
"learning_rate": 9.070974576271187e-06,
"loss": 0.0927,
"step": 877
},
{
"epoch": 0.9299616046603998,
"grad_norm": 0.24214965105056763,
"learning_rate": 9.069915254237288e-06,
"loss": 0.0918,
"step": 878
},
{
"epoch": 0.9310207864424732,
"grad_norm": 0.26121532917022705,
"learning_rate": 9.068855932203392e-06,
"loss": 0.0972,
"step": 879
},
{
"epoch": 0.9320799682245465,
"grad_norm": 0.2927854359149933,
"learning_rate": 9.067796610169493e-06,
"loss": 0.0932,
"step": 880
},
{
"epoch": 0.9331391500066198,
"grad_norm": 0.30440691113471985,
"learning_rate": 9.066737288135594e-06,
"loss": 0.0943,
"step": 881
},
{
"epoch": 0.9341983317886933,
"grad_norm": 0.2894492745399475,
"learning_rate": 9.065677966101696e-06,
"loss": 0.0919,
"step": 882
},
{
"epoch": 0.9352575135707666,
"grad_norm": 0.28730008006095886,
"learning_rate": 9.064618644067797e-06,
"loss": 0.0924,
"step": 883
},
{
"epoch": 0.9363166953528399,
"grad_norm": 1.8249741792678833,
"learning_rate": 9.063559322033899e-06,
"loss": 0.0961,
"step": 884
},
{
"epoch": 0.9373758771349133,
"grad_norm": 0.44120654463768005,
"learning_rate": 9.0625e-06,
"loss": 0.0938,
"step": 885
},
{
"epoch": 0.9384350589169866,
"grad_norm": 0.29483693838119507,
"learning_rate": 9.061440677966103e-06,
"loss": 0.0936,
"step": 886
},
{
"epoch": 0.93949424069906,
"grad_norm": 0.33580589294433594,
"learning_rate": 9.060381355932205e-06,
"loss": 0.0967,
"step": 887
},
{
"epoch": 0.9405534224811333,
"grad_norm": 0.3942689597606659,
"learning_rate": 9.059322033898306e-06,
"loss": 0.0941,
"step": 888
},
{
"epoch": 0.9416126042632067,
"grad_norm": 0.43941256403923035,
"learning_rate": 9.058262711864407e-06,
"loss": 0.0996,
"step": 889
},
{
"epoch": 0.94267178604528,
"grad_norm": 0.2815316319465637,
"learning_rate": 9.057203389830509e-06,
"loss": 0.0962,
"step": 890
},
{
"epoch": 0.9437309678273533,
"grad_norm": 0.3286428451538086,
"learning_rate": 9.05614406779661e-06,
"loss": 0.0939,
"step": 891
},
{
"epoch": 0.9447901496094268,
"grad_norm": 0.2403406798839569,
"learning_rate": 9.055084745762712e-06,
"loss": 0.095,
"step": 892
},
{
"epoch": 0.9458493313915001,
"grad_norm": 0.7640528082847595,
"learning_rate": 9.054025423728813e-06,
"loss": 0.0909,
"step": 893
},
{
"epoch": 0.9469085131735734,
"grad_norm": 0.756924033164978,
"learning_rate": 9.052966101694916e-06,
"loss": 0.0945,
"step": 894
},
{
"epoch": 0.9479676949556468,
"grad_norm": 0.3299170732498169,
"learning_rate": 9.051906779661018e-06,
"loss": 0.0936,
"step": 895
},
{
"epoch": 0.9490268767377201,
"grad_norm": 0.2867504060268402,
"learning_rate": 9.05084745762712e-06,
"loss": 0.0927,
"step": 896
},
{
"epoch": 0.9500860585197934,
"grad_norm": 0.7162857055664062,
"learning_rate": 9.049788135593222e-06,
"loss": 0.0898,
"step": 897
},
{
"epoch": 0.9511452403018668,
"grad_norm": 0.2733360230922699,
"learning_rate": 9.048728813559323e-06,
"loss": 0.092,
"step": 898
},
{
"epoch": 0.9522044220839402,
"grad_norm": 0.991868257522583,
"learning_rate": 9.047669491525425e-06,
"loss": 0.098,
"step": 899
},
{
"epoch": 0.9532636038660135,
"grad_norm": 0.34536120295524597,
"learning_rate": 9.046610169491526e-06,
"loss": 0.0917,
"step": 900
},
{
"epoch": 0.9543227856480868,
"grad_norm": 0.29699528217315674,
"learning_rate": 9.045550847457628e-06,
"loss": 0.0953,
"step": 901
},
{
"epoch": 0.9553819674301602,
"grad_norm": 0.25606414675712585,
"learning_rate": 9.044491525423729e-06,
"loss": 0.0928,
"step": 902
},
{
"epoch": 0.9564411492122336,
"grad_norm": 0.6420966982841492,
"learning_rate": 9.043432203389832e-06,
"loss": 0.0951,
"step": 903
},
{
"epoch": 0.9575003309943069,
"grad_norm": 1.2128124237060547,
"learning_rate": 9.042372881355934e-06,
"loss": 0.0908,
"step": 904
},
{
"epoch": 0.9585595127763803,
"grad_norm": 0.25835537910461426,
"learning_rate": 9.041313559322035e-06,
"loss": 0.0981,
"step": 905
},
{
"epoch": 0.9596186945584536,
"grad_norm": 0.25690484046936035,
"learning_rate": 9.040254237288136e-06,
"loss": 0.0925,
"step": 906
},
{
"epoch": 0.9606778763405269,
"grad_norm": 0.327921986579895,
"learning_rate": 9.039194915254238e-06,
"loss": 0.0898,
"step": 907
},
{
"epoch": 0.9617370581226002,
"grad_norm": 0.3588384985923767,
"learning_rate": 9.03813559322034e-06,
"loss": 0.0904,
"step": 908
},
{
"epoch": 0.9627962399046737,
"grad_norm": 0.373099148273468,
"learning_rate": 9.03707627118644e-06,
"loss": 0.0907,
"step": 909
},
{
"epoch": 0.963855421686747,
"grad_norm": 0.6619918346405029,
"learning_rate": 9.036016949152542e-06,
"loss": 0.0945,
"step": 910
},
{
"epoch": 0.9649146034688203,
"grad_norm": 0.3509720265865326,
"learning_rate": 9.034957627118645e-06,
"loss": 0.0939,
"step": 911
},
{
"epoch": 0.9659737852508937,
"grad_norm": 0.3237987756729126,
"learning_rate": 9.033898305084747e-06,
"loss": 0.0901,
"step": 912
},
{
"epoch": 0.967032967032967,
"grad_norm": 0.6936929225921631,
"learning_rate": 9.032838983050848e-06,
"loss": 0.0984,
"step": 913
},
{
"epoch": 0.9680921488150404,
"grad_norm": 0.38148897886276245,
"learning_rate": 9.03177966101695e-06,
"loss": 0.0939,
"step": 914
},
{
"epoch": 0.9691513305971138,
"grad_norm": 0.4383428990840912,
"learning_rate": 9.03072033898305e-06,
"loss": 0.0934,
"step": 915
},
{
"epoch": 0.9702105123791871,
"grad_norm": 0.5336405634880066,
"learning_rate": 9.029661016949152e-06,
"loss": 0.0937,
"step": 916
},
{
"epoch": 0.9712696941612604,
"grad_norm": 0.3162240982055664,
"learning_rate": 9.028601694915255e-06,
"loss": 0.0952,
"step": 917
},
{
"epoch": 0.9723288759433337,
"grad_norm": 0.2739526927471161,
"learning_rate": 9.027542372881357e-06,
"loss": 0.0925,
"step": 918
},
{
"epoch": 0.9733880577254072,
"grad_norm": 0.3420490622520447,
"learning_rate": 9.026483050847458e-06,
"loss": 0.0928,
"step": 919
},
{
"epoch": 0.9744472395074805,
"grad_norm": 0.3407108187675476,
"learning_rate": 9.02542372881356e-06,
"loss": 0.0912,
"step": 920
},
{
"epoch": 0.9755064212895538,
"grad_norm": 0.3736214339733124,
"learning_rate": 9.024364406779663e-06,
"loss": 0.0936,
"step": 921
},
{
"epoch": 0.9765656030716272,
"grad_norm": 0.4933827519416809,
"learning_rate": 9.023305084745764e-06,
"loss": 0.0979,
"step": 922
},
{
"epoch": 0.9776247848537005,
"grad_norm": 0.5475727915763855,
"learning_rate": 9.022245762711866e-06,
"loss": 0.0956,
"step": 923
},
{
"epoch": 0.9786839666357738,
"grad_norm": 0.29296788573265076,
"learning_rate": 9.021186440677967e-06,
"loss": 0.0946,
"step": 924
},
{
"epoch": 0.9797431484178473,
"grad_norm": 0.29160603880882263,
"learning_rate": 9.020127118644068e-06,
"loss": 0.0935,
"step": 925
},
{
"epoch": 0.9808023301999206,
"grad_norm": 0.3513566553592682,
"learning_rate": 9.01906779661017e-06,
"loss": 0.0895,
"step": 926
},
{
"epoch": 0.9818615119819939,
"grad_norm": 0.49719667434692383,
"learning_rate": 9.018008474576271e-06,
"loss": 0.0907,
"step": 927
},
{
"epoch": 0.9829206937640672,
"grad_norm": 0.7228937149047852,
"learning_rate": 9.016949152542374e-06,
"loss": 0.0924,
"step": 928
},
{
"epoch": 0.9839798755461406,
"grad_norm": 0.9527651071548462,
"learning_rate": 9.015889830508476e-06,
"loss": 0.0979,
"step": 929
},
{
"epoch": 0.985039057328214,
"grad_norm": 0.2832454741001129,
"learning_rate": 9.014830508474577e-06,
"loss": 0.0937,
"step": 930
},
{
"epoch": 0.9860982391102873,
"grad_norm": 0.8853733539581299,
"learning_rate": 9.013771186440679e-06,
"loss": 0.0943,
"step": 931
},
{
"epoch": 0.9871574208923607,
"grad_norm": 0.5969071984291077,
"learning_rate": 9.01271186440678e-06,
"loss": 0.0961,
"step": 932
},
{
"epoch": 0.988216602674434,
"grad_norm": 0.6646391749382019,
"learning_rate": 9.011652542372881e-06,
"loss": 0.0894,
"step": 933
},
{
"epoch": 0.9892757844565073,
"grad_norm": 0.3108821511268616,
"learning_rate": 9.010593220338983e-06,
"loss": 0.0941,
"step": 934
},
{
"epoch": 0.9903349662385807,
"grad_norm": 0.3083324432373047,
"learning_rate": 9.009533898305086e-06,
"loss": 0.091,
"step": 935
},
{
"epoch": 0.9913941480206541,
"grad_norm": 0.33282625675201416,
"learning_rate": 9.008474576271187e-06,
"loss": 0.0925,
"step": 936
},
{
"epoch": 0.9924533298027274,
"grad_norm": 0.9061787128448486,
"learning_rate": 9.007415254237289e-06,
"loss": 0.0933,
"step": 937
},
{
"epoch": 0.9935125115848007,
"grad_norm": 0.33517178893089294,
"learning_rate": 9.006355932203392e-06,
"loss": 0.0956,
"step": 938
},
{
"epoch": 0.9945716933668741,
"grad_norm": 0.6129999160766602,
"learning_rate": 9.005296610169493e-06,
"loss": 0.0901,
"step": 939
},
{
"epoch": 0.9956308751489474,
"grad_norm": 0.38508912920951843,
"learning_rate": 9.004237288135595e-06,
"loss": 0.0924,
"step": 940
},
{
"epoch": 0.9966900569310208,
"grad_norm": 1.1568593978881836,
"learning_rate": 9.003177966101696e-06,
"loss": 0.0973,
"step": 941
},
{
"epoch": 0.9977492387130942,
"grad_norm": 0.3170551061630249,
"learning_rate": 9.002118644067797e-06,
"loss": 0.0966,
"step": 942
},
{
"epoch": 0.9988084204951675,
"grad_norm": 0.3123176395893097,
"learning_rate": 9.001059322033899e-06,
"loss": 0.0923,
"step": 943
},
{
"epoch": 0.9998676022772408,
"grad_norm": 0.38239729404449463,
"learning_rate": 9e-06,
"loss": 0.0991,
"step": 944
},
{
"epoch": 0.9998676022772408,
"eval_accuracy": 0.9741,
"eval_best_f1_from_thresholding": 0.15081967213114753,
"eval_loss": 0.14503583312034607,
"eval_matthews_corrcoef": 0.15208233188029333,
"eval_model_preparation_time": 0.0033,
"eval_negative_class_f1": 0.9868494541761869,
"eval_negative_class_precision": 0.9928483857785043,
"eval_negative_class_recall": 0.9809225799939437,
"eval_positive_class_f1": 0.15081967213114755,
"eval_positive_class_precision": 0.10849056603773585,
"eval_positive_class_recall": 0.24731182795698925,
"eval_roc_auc": 0.8164722239407131,
"eval_runtime": 20.7251,
"eval_samples_per_second": 482.507,
"eval_steps_per_second": 7.575,
"step": 944
},
{
"epoch": 1.0,
"grad_norm": 0.08045551180839539,
"learning_rate": 8.998940677966103e-06,
"loss": 0.0124,
"step": 945
},
{
"epoch": 1.0010591817820733,
"grad_norm": 0.32438716292381287,
"learning_rate": 8.997881355932205e-06,
"loss": 0.093,
"step": 946
},
{
"epoch": 1.0021183635641466,
"grad_norm": 0.34341174364089966,
"learning_rate": 8.996822033898306e-06,
"loss": 0.0952,
"step": 947
},
{
"epoch": 1.00317754534622,
"grad_norm": 0.30472978949546814,
"learning_rate": 8.995762711864408e-06,
"loss": 0.0923,
"step": 948
},
{
"epoch": 1.0042367271282935,
"grad_norm": 0.8406989574432373,
"learning_rate": 8.994703389830509e-06,
"loss": 0.0964,
"step": 949
},
{
"epoch": 1.0052959089103668,
"grad_norm": 0.338326632976532,
"learning_rate": 8.99364406779661e-06,
"loss": 0.088,
"step": 950
},
{
"epoch": 1.0063550906924401,
"grad_norm": 0.47067689895629883,
"learning_rate": 8.992584745762712e-06,
"loss": 0.0976,
"step": 951
},
{
"epoch": 1.0074142724745134,
"grad_norm": 0.40675660967826843,
"learning_rate": 8.991525423728815e-06,
"loss": 0.0906,
"step": 952
},
{
"epoch": 1.0084734542565867,
"grad_norm": 0.3261379301548004,
"learning_rate": 8.990466101694916e-06,
"loss": 0.095,
"step": 953
},
{
"epoch": 1.00953263603866,
"grad_norm": 0.31926316022872925,
"learning_rate": 8.989406779661018e-06,
"loss": 0.0948,
"step": 954
},
{
"epoch": 1.0105918178207334,
"grad_norm": 0.603122889995575,
"learning_rate": 8.988347457627119e-06,
"loss": 0.0925,
"step": 955
},
{
"epoch": 1.011650999602807,
"grad_norm": 0.26193147897720337,
"learning_rate": 8.98728813559322e-06,
"loss": 0.0906,
"step": 956
},
{
"epoch": 1.0127101813848802,
"grad_norm": 1.3907825946807861,
"learning_rate": 8.986228813559322e-06,
"loss": 0.0973,
"step": 957
},
{
"epoch": 1.0137693631669535,
"grad_norm": 0.30133160948753357,
"learning_rate": 8.985169491525423e-06,
"loss": 0.0936,
"step": 958
},
{
"epoch": 1.0148285449490269,
"grad_norm": 0.40129804611206055,
"learning_rate": 8.984110169491526e-06,
"loss": 0.0921,
"step": 959
},
{
"epoch": 1.0158877267311002,
"grad_norm": 0.6812584400177002,
"learning_rate": 8.983050847457628e-06,
"loss": 0.0941,
"step": 960
},
{
"epoch": 1.0169469085131735,
"grad_norm": 0.680328905582428,
"learning_rate": 8.98199152542373e-06,
"loss": 0.0929,
"step": 961
},
{
"epoch": 1.018006090295247,
"grad_norm": 0.433712899684906,
"learning_rate": 8.980932203389832e-06,
"loss": 0.0978,
"step": 962
},
{
"epoch": 1.0190652720773203,
"grad_norm": 0.44195568561553955,
"learning_rate": 8.979872881355934e-06,
"loss": 0.0977,
"step": 963
},
{
"epoch": 1.0201244538593937,
"grad_norm": 0.324758380651474,
"learning_rate": 8.978813559322035e-06,
"loss": 0.0997,
"step": 964
},
{
"epoch": 1.021183635641467,
"grad_norm": 0.32022592425346375,
"learning_rate": 8.977754237288137e-06,
"loss": 0.0953,
"step": 965
},
{
"epoch": 1.0222428174235403,
"grad_norm": 0.38843539357185364,
"learning_rate": 8.976694915254238e-06,
"loss": 0.0981,
"step": 966
},
{
"epoch": 1.0233019992056136,
"grad_norm": 0.36759236454963684,
"learning_rate": 8.97563559322034e-06,
"loss": 0.096,
"step": 967
},
{
"epoch": 1.024361180987687,
"grad_norm": 0.44051095843315125,
"learning_rate": 8.974576271186441e-06,
"loss": 0.098,
"step": 968
},
{
"epoch": 1.0254203627697605,
"grad_norm": 0.3786701560020447,
"learning_rate": 8.973516949152544e-06,
"loss": 0.0939,
"step": 969
},
{
"epoch": 1.0264795445518338,
"grad_norm": 1.195036768913269,
"learning_rate": 8.972457627118645e-06,
"loss": 0.0972,
"step": 970
},
{
"epoch": 1.027538726333907,
"grad_norm": 0.2937662601470947,
"learning_rate": 8.971398305084747e-06,
"loss": 0.0953,
"step": 971
},
{
"epoch": 1.0285979081159804,
"grad_norm": 0.5330064296722412,
"learning_rate": 8.970338983050848e-06,
"loss": 0.0974,
"step": 972
},
{
"epoch": 1.0296570898980537,
"grad_norm": 0.2991742789745331,
"learning_rate": 8.96927966101695e-06,
"loss": 0.0945,
"step": 973
},
{
"epoch": 1.030716271680127,
"grad_norm": 0.34784451127052307,
"learning_rate": 8.968220338983051e-06,
"loss": 0.0907,
"step": 974
},
{
"epoch": 1.0317754534622003,
"grad_norm": 0.3017341196537018,
"learning_rate": 8.967161016949152e-06,
"loss": 0.0932,
"step": 975
},
{
"epoch": 1.0328346352442739,
"grad_norm": 0.27960410714149475,
"learning_rate": 8.966101694915254e-06,
"loss": 0.0872,
"step": 976
},
{
"epoch": 1.0338938170263472,
"grad_norm": 0.950781524181366,
"learning_rate": 8.965042372881357e-06,
"loss": 0.0942,
"step": 977
},
{
"epoch": 1.0349529988084205,
"grad_norm": 1.0935500860214233,
"learning_rate": 8.963983050847458e-06,
"loss": 0.0947,
"step": 978
},
{
"epoch": 1.0360121805904938,
"grad_norm": 1.0450522899627686,
"learning_rate": 8.96292372881356e-06,
"loss": 0.0934,
"step": 979
},
{
"epoch": 1.0370713623725671,
"grad_norm": 0.31627514958381653,
"learning_rate": 8.961864406779663e-06,
"loss": 0.0943,
"step": 980
},
{
"epoch": 1.0381305441546405,
"grad_norm": 0.33799999952316284,
"learning_rate": 8.960805084745764e-06,
"loss": 0.097,
"step": 981
},
{
"epoch": 1.039189725936714,
"grad_norm": 0.4043204188346863,
"learning_rate": 8.959745762711866e-06,
"loss": 0.0882,
"step": 982
},
{
"epoch": 1.0402489077187873,
"grad_norm": 0.7949614524841309,
"learning_rate": 8.958686440677967e-06,
"loss": 0.0928,
"step": 983
},
{
"epoch": 1.0413080895008606,
"grad_norm": 0.4171614944934845,
"learning_rate": 8.957627118644069e-06,
"loss": 0.0954,
"step": 984
},
{
"epoch": 1.042367271282934,
"grad_norm": 0.34418338537216187,
"learning_rate": 8.95656779661017e-06,
"loss": 0.0912,
"step": 985
},
{
"epoch": 1.0434264530650073,
"grad_norm": 0.4631114602088928,
"learning_rate": 8.955508474576271e-06,
"loss": 0.0956,
"step": 986
},
{
"epoch": 1.0444856348470806,
"grad_norm": 1.044867992401123,
"learning_rate": 8.954449152542374e-06,
"loss": 0.0932,
"step": 987
},
{
"epoch": 1.0455448166291539,
"grad_norm": 0.6983899474143982,
"learning_rate": 8.953389830508476e-06,
"loss": 0.0996,
"step": 988
},
{
"epoch": 1.0466039984112274,
"grad_norm": 0.5399383306503296,
"learning_rate": 8.952330508474577e-06,
"loss": 0.0953,
"step": 989
},
{
"epoch": 1.0476631801933007,
"grad_norm": 0.30072763562202454,
"learning_rate": 8.951271186440679e-06,
"loss": 0.0946,
"step": 990
},
{
"epoch": 1.048722361975374,
"grad_norm": 0.7012078762054443,
"learning_rate": 8.95021186440678e-06,
"loss": 0.0963,
"step": 991
},
{
"epoch": 1.0497815437574474,
"grad_norm": 0.3555310070514679,
"learning_rate": 8.949152542372881e-06,
"loss": 0.0968,
"step": 992
},
{
"epoch": 1.0508407255395207,
"grad_norm": 0.40114033222198486,
"learning_rate": 8.948093220338983e-06,
"loss": 0.0968,
"step": 993
},
{
"epoch": 1.051899907321594,
"grad_norm": 0.3639932870864868,
"learning_rate": 8.947033898305086e-06,
"loss": 0.0947,
"step": 994
},
{
"epoch": 1.0529590891036673,
"grad_norm": 0.3300527036190033,
"learning_rate": 8.945974576271187e-06,
"loss": 0.0971,
"step": 995
},
{
"epoch": 1.0540182708857408,
"grad_norm": 0.3679925203323364,
"learning_rate": 8.944915254237289e-06,
"loss": 0.0952,
"step": 996
},
{
"epoch": 1.0550774526678142,
"grad_norm": 1.186897873878479,
"learning_rate": 8.94385593220339e-06,
"loss": 0.0963,
"step": 997
},
{
"epoch": 1.0561366344498875,
"grad_norm": 0.6484549045562744,
"learning_rate": 8.942796610169492e-06,
"loss": 0.0934,
"step": 998
},
{
"epoch": 1.0571958162319608,
"grad_norm": 0.5084095597267151,
"learning_rate": 8.941737288135593e-06,
"loss": 0.0928,
"step": 999
},
{
"epoch": 1.058254998014034,
"grad_norm": 0.5164026021957397,
"learning_rate": 8.940677966101694e-06,
"loss": 0.0967,
"step": 1000
},
{
"epoch": 1.0593141797961074,
"grad_norm": 0.2954999804496765,
"learning_rate": 8.939618644067798e-06,
"loss": 0.0963,
"step": 1001
},
{
"epoch": 1.060373361578181,
"grad_norm": 0.2980453372001648,
"learning_rate": 8.938559322033899e-06,
"loss": 0.0951,
"step": 1002
},
{
"epoch": 1.0614325433602543,
"grad_norm": 0.3077160120010376,
"learning_rate": 8.9375e-06,
"loss": 0.0943,
"step": 1003
},
{
"epoch": 1.0624917251423276,
"grad_norm": 0.2690112292766571,
"learning_rate": 8.936440677966104e-06,
"loss": 0.0941,
"step": 1004
},
{
"epoch": 1.063550906924401,
"grad_norm": 0.9549407362937927,
"learning_rate": 8.935381355932205e-06,
"loss": 0.0951,
"step": 1005
},
{
"epoch": 1.0646100887064742,
"grad_norm": 1.4373801946640015,
"learning_rate": 8.934322033898306e-06,
"loss": 0.0955,
"step": 1006
},
{
"epoch": 1.0656692704885475,
"grad_norm": 0.8598366379737854,
"learning_rate": 8.933262711864408e-06,
"loss": 0.0922,
"step": 1007
},
{
"epoch": 1.0667284522706209,
"grad_norm": 0.3050272464752197,
"learning_rate": 8.932203389830509e-06,
"loss": 0.0913,
"step": 1008
},
{
"epoch": 1.0677876340526944,
"grad_norm": 0.40523460507392883,
"learning_rate": 8.93114406779661e-06,
"loss": 0.0959,
"step": 1009
},
{
"epoch": 1.0688468158347677,
"grad_norm": 0.35312050580978394,
"learning_rate": 8.930084745762712e-06,
"loss": 0.0935,
"step": 1010
},
{
"epoch": 1.069905997616841,
"grad_norm": 0.9593795537948608,
"learning_rate": 8.929025423728815e-06,
"loss": 0.0994,
"step": 1011
},
{
"epoch": 1.0709651793989143,
"grad_norm": 0.45267030596733093,
"learning_rate": 8.927966101694916e-06,
"loss": 0.0943,
"step": 1012
},
{
"epoch": 1.0720243611809877,
"grad_norm": 1.2854154109954834,
"learning_rate": 8.926906779661018e-06,
"loss": 0.1023,
"step": 1013
},
{
"epoch": 1.073083542963061,
"grad_norm": 0.3618859350681305,
"learning_rate": 8.92584745762712e-06,
"loss": 0.0934,
"step": 1014
},
{
"epoch": 1.0741427247451343,
"grad_norm": 0.513184666633606,
"learning_rate": 8.92478813559322e-06,
"loss": 0.0966,
"step": 1015
},
{
"epoch": 1.0752019065272078,
"grad_norm": 0.4073401689529419,
"learning_rate": 8.923728813559322e-06,
"loss": 0.0948,
"step": 1016
},
{
"epoch": 1.0762610883092811,
"grad_norm": 0.5936095714569092,
"learning_rate": 8.922669491525424e-06,
"loss": 0.0929,
"step": 1017
},
{
"epoch": 1.0773202700913544,
"grad_norm": 1.5205022096633911,
"learning_rate": 8.921610169491527e-06,
"loss": 0.1004,
"step": 1018
},
{
"epoch": 1.0783794518734278,
"grad_norm": 0.30164584517478943,
"learning_rate": 8.920550847457628e-06,
"loss": 0.0918,
"step": 1019
},
{
"epoch": 1.079438633655501,
"grad_norm": 0.37064287066459656,
"learning_rate": 8.91949152542373e-06,
"loss": 0.0982,
"step": 1020
},
{
"epoch": 1.0804978154375744,
"grad_norm": 0.32274115085601807,
"learning_rate": 8.918432203389831e-06,
"loss": 0.0951,
"step": 1021
},
{
"epoch": 1.0815569972196477,
"grad_norm": 0.29008156061172485,
"learning_rate": 8.917372881355934e-06,
"loss": 0.0976,
"step": 1022
},
{
"epoch": 1.0826161790017212,
"grad_norm": 0.2828814685344696,
"learning_rate": 8.916313559322035e-06,
"loss": 0.0927,
"step": 1023
},
{
"epoch": 1.0836753607837946,
"grad_norm": 0.5621195435523987,
"learning_rate": 8.915254237288137e-06,
"loss": 0.0966,
"step": 1024
},
{
"epoch": 1.0847345425658679,
"grad_norm": 0.4781251847743988,
"learning_rate": 8.914194915254238e-06,
"loss": 0.0968,
"step": 1025
},
{
"epoch": 1.0857937243479412,
"grad_norm": 0.7829591035842896,
"learning_rate": 8.91313559322034e-06,
"loss": 0.0983,
"step": 1026
},
{
"epoch": 1.0868529061300145,
"grad_norm": 0.5441563725471497,
"learning_rate": 8.912076271186441e-06,
"loss": 0.0926,
"step": 1027
},
{
"epoch": 1.0879120879120878,
"grad_norm": 0.24053499102592468,
"learning_rate": 8.911016949152544e-06,
"loss": 0.0889,
"step": 1028
},
{
"epoch": 1.0889712696941614,
"grad_norm": 0.28083938360214233,
"learning_rate": 8.909957627118646e-06,
"loss": 0.0918,
"step": 1029
},
{
"epoch": 1.0900304514762347,
"grad_norm": 0.28952857851982117,
"learning_rate": 8.908898305084747e-06,
"loss": 0.09,
"step": 1030
},
{
"epoch": 1.091089633258308,
"grad_norm": 0.5241445899009705,
"learning_rate": 8.907838983050848e-06,
"loss": 0.0936,
"step": 1031
},
{
"epoch": 1.0921488150403813,
"grad_norm": 0.361904501914978,
"learning_rate": 8.90677966101695e-06,
"loss": 0.0979,
"step": 1032
},
{
"epoch": 1.0932079968224546,
"grad_norm": 0.2844066917896271,
"learning_rate": 8.905720338983051e-06,
"loss": 0.0922,
"step": 1033
},
{
"epoch": 1.094267178604528,
"grad_norm": 1.230337142944336,
"learning_rate": 8.904661016949153e-06,
"loss": 0.0984,
"step": 1034
},
{
"epoch": 1.0953263603866012,
"grad_norm": 0.9222045540809631,
"learning_rate": 8.903601694915254e-06,
"loss": 0.0895,
"step": 1035
},
{
"epoch": 1.0963855421686748,
"grad_norm": 0.4679190218448639,
"learning_rate": 8.902542372881357e-06,
"loss": 0.0933,
"step": 1036
},
{
"epoch": 1.097444723950748,
"grad_norm": 0.48615601658821106,
"learning_rate": 8.901483050847459e-06,
"loss": 0.1018,
"step": 1037
},
{
"epoch": 1.0985039057328214,
"grad_norm": 1.2758482694625854,
"learning_rate": 8.90042372881356e-06,
"loss": 0.0948,
"step": 1038
},
{
"epoch": 1.0995630875148947,
"grad_norm": 0.9381634593009949,
"learning_rate": 8.899364406779661e-06,
"loss": 0.0984,
"step": 1039
},
{
"epoch": 1.100622269296968,
"grad_norm": 2.216055393218994,
"learning_rate": 8.898305084745763e-06,
"loss": 0.0971,
"step": 1040
},
{
"epoch": 1.1016814510790414,
"grad_norm": 1.2314603328704834,
"learning_rate": 8.897245762711864e-06,
"loss": 0.0961,
"step": 1041
},
{
"epoch": 1.102740632861115,
"grad_norm": 0.4274296164512634,
"learning_rate": 8.896186440677966e-06,
"loss": 0.0937,
"step": 1042
},
{
"epoch": 1.1037998146431882,
"grad_norm": 0.3972962498664856,
"learning_rate": 8.895127118644069e-06,
"loss": 0.0966,
"step": 1043
},
{
"epoch": 1.1048589964252615,
"grad_norm": 0.32898983359336853,
"learning_rate": 8.89406779661017e-06,
"loss": 0.099,
"step": 1044
},
{
"epoch": 1.1059181782073348,
"grad_norm": 0.5144562721252441,
"learning_rate": 8.893008474576273e-06,
"loss": 0.0963,
"step": 1045
},
{
"epoch": 1.1069773599894082,
"grad_norm": 0.9342681169509888,
"learning_rate": 8.891949152542375e-06,
"loss": 0.0961,
"step": 1046
},
{
"epoch": 1.1080365417714815,
"grad_norm": 0.9471574425697327,
"learning_rate": 8.890889830508476e-06,
"loss": 0.0961,
"step": 1047
},
{
"epoch": 1.1090957235535548,
"grad_norm": 0.2984775900840759,
"learning_rate": 8.889830508474577e-06,
"loss": 0.0939,
"step": 1048
},
{
"epoch": 1.1101549053356283,
"grad_norm": 0.28858470916748047,
"learning_rate": 8.888771186440679e-06,
"loss": 0.0908,
"step": 1049
},
{
"epoch": 1.1112140871177016,
"grad_norm": 0.3262318968772888,
"learning_rate": 8.88771186440678e-06,
"loss": 0.0955,
"step": 1050
},
{
"epoch": 1.112273268899775,
"grad_norm": 0.4065167307853699,
"learning_rate": 8.886652542372882e-06,
"loss": 0.0981,
"step": 1051
},
{
"epoch": 1.1133324506818483,
"grad_norm": 0.4788059592247009,
"learning_rate": 8.885593220338983e-06,
"loss": 0.0949,
"step": 1052
},
{
"epoch": 1.1143916324639216,
"grad_norm": 0.7517910599708557,
"learning_rate": 8.884533898305086e-06,
"loss": 0.0925,
"step": 1053
},
{
"epoch": 1.115450814245995,
"grad_norm": 0.36450427770614624,
"learning_rate": 8.883474576271188e-06,
"loss": 0.0925,
"step": 1054
},
{
"epoch": 1.1165099960280682,
"grad_norm": 0.83025723695755,
"learning_rate": 8.882415254237289e-06,
"loss": 0.0981,
"step": 1055
},
{
"epoch": 1.1175691778101418,
"grad_norm": 0.5474830269813538,
"learning_rate": 8.88135593220339e-06,
"loss": 0.0944,
"step": 1056
},
{
"epoch": 1.118628359592215,
"grad_norm": 0.3668607175350189,
"learning_rate": 8.880296610169492e-06,
"loss": 0.0949,
"step": 1057
},
{
"epoch": 1.1196875413742884,
"grad_norm": 0.2962619960308075,
"learning_rate": 8.879237288135593e-06,
"loss": 0.0961,
"step": 1058
},
{
"epoch": 1.1207467231563617,
"grad_norm": 0.306918740272522,
"learning_rate": 8.878177966101695e-06,
"loss": 0.0953,
"step": 1059
},
{
"epoch": 1.121805904938435,
"grad_norm": 0.4895283579826355,
"learning_rate": 8.877118644067798e-06,
"loss": 0.0922,
"step": 1060
},
{
"epoch": 1.1228650867205083,
"grad_norm": 0.3019005060195923,
"learning_rate": 8.8760593220339e-06,
"loss": 0.0903,
"step": 1061
},
{
"epoch": 1.1239242685025816,
"grad_norm": 0.5289521217346191,
"learning_rate": 8.875e-06,
"loss": 0.0942,
"step": 1062
},
{
"epoch": 1.1249834502846552,
"grad_norm": 0.2790992259979248,
"learning_rate": 8.873940677966102e-06,
"loss": 0.0961,
"step": 1063
},
{
"epoch": 1.1260426320667285,
"grad_norm": 0.6811845302581787,
"learning_rate": 8.872881355932203e-06,
"loss": 0.0973,
"step": 1064
},
{
"epoch": 1.1271018138488018,
"grad_norm": 0.4769812822341919,
"learning_rate": 8.871822033898307e-06,
"loss": 0.093,
"step": 1065
},
{
"epoch": 1.1281609956308751,
"grad_norm": 0.6008502244949341,
"learning_rate": 8.870762711864408e-06,
"loss": 0.0992,
"step": 1066
},
{
"epoch": 1.1292201774129484,
"grad_norm": 0.292278915643692,
"learning_rate": 8.86970338983051e-06,
"loss": 0.0953,
"step": 1067
},
{
"epoch": 1.1302793591950218,
"grad_norm": 0.34272485971450806,
"learning_rate": 8.86864406779661e-06,
"loss": 0.0909,
"step": 1068
},
{
"epoch": 1.131338540977095,
"grad_norm": 0.3241511583328247,
"learning_rate": 8.867584745762712e-06,
"loss": 0.0896,
"step": 1069
},
{
"epoch": 1.1323977227591686,
"grad_norm": 0.7826002836227417,
"learning_rate": 8.866525423728815e-06,
"loss": 0.0955,
"step": 1070
},
{
"epoch": 1.133456904541242,
"grad_norm": 0.7677907943725586,
"learning_rate": 8.865466101694917e-06,
"loss": 0.0938,
"step": 1071
},
{
"epoch": 1.1345160863233152,
"grad_norm": 0.30883848667144775,
"learning_rate": 8.864406779661018e-06,
"loss": 0.094,
"step": 1072
},
{
"epoch": 1.1355752681053886,
"grad_norm": 0.2945443093776703,
"learning_rate": 8.86334745762712e-06,
"loss": 0.0935,
"step": 1073
},
{
"epoch": 1.1366344498874619,
"grad_norm": 0.38556718826293945,
"learning_rate": 8.862288135593221e-06,
"loss": 0.0941,
"step": 1074
},
{
"epoch": 1.1376936316695352,
"grad_norm": 1.0006963014602661,
"learning_rate": 8.861228813559322e-06,
"loss": 0.0937,
"step": 1075
},
{
"epoch": 1.1387528134516087,
"grad_norm": 0.6400341391563416,
"learning_rate": 8.860169491525424e-06,
"loss": 0.0965,
"step": 1076
},
{
"epoch": 1.139811995233682,
"grad_norm": 0.33833760023117065,
"learning_rate": 8.859110169491527e-06,
"loss": 0.0917,
"step": 1077
},
{
"epoch": 1.1408711770157554,
"grad_norm": 0.34555482864379883,
"learning_rate": 8.858050847457628e-06,
"loss": 0.0941,
"step": 1078
},
{
"epoch": 1.1419303587978287,
"grad_norm": 0.2939344644546509,
"learning_rate": 8.85699152542373e-06,
"loss": 0.0917,
"step": 1079
},
{
"epoch": 1.142989540579902,
"grad_norm": 0.3510951101779938,
"learning_rate": 8.855932203389831e-06,
"loss": 0.0935,
"step": 1080
},
{
"epoch": 1.1440487223619753,
"grad_norm": 0.40609949827194214,
"learning_rate": 8.854872881355932e-06,
"loss": 0.0965,
"step": 1081
},
{
"epoch": 1.1451079041440488,
"grad_norm": 1.6574863195419312,
"learning_rate": 8.853813559322034e-06,
"loss": 0.0985,
"step": 1082
},
{
"epoch": 1.1461670859261222,
"grad_norm": 1.687386155128479,
"learning_rate": 8.852754237288135e-06,
"loss": 0.0942,
"step": 1083
},
{
"epoch": 1.1472262677081955,
"grad_norm": 0.9652918577194214,
"learning_rate": 8.851694915254237e-06,
"loss": 0.0969,
"step": 1084
},
{
"epoch": 1.1482854494902688,
"grad_norm": 0.447201669216156,
"learning_rate": 8.85063559322034e-06,
"loss": 0.0946,
"step": 1085
},
{
"epoch": 1.149344631272342,
"grad_norm": 0.2875814735889435,
"learning_rate": 8.849576271186441e-06,
"loss": 0.0944,
"step": 1086
},
{
"epoch": 1.1504038130544154,
"grad_norm": 0.3672226667404175,
"learning_rate": 8.848516949152544e-06,
"loss": 0.0967,
"step": 1087
},
{
"epoch": 1.1514629948364887,
"grad_norm": 0.8760420680046082,
"learning_rate": 8.847457627118646e-06,
"loss": 0.0937,
"step": 1088
},
{
"epoch": 1.1525221766185623,
"grad_norm": 0.4764424264431,
"learning_rate": 8.846398305084747e-06,
"loss": 0.0958,
"step": 1089
},
{
"epoch": 1.1535813584006356,
"grad_norm": 0.45019716024398804,
"learning_rate": 8.845338983050849e-06,
"loss": 0.097,
"step": 1090
},
{
"epoch": 1.154640540182709,
"grad_norm": 0.6289698481559753,
"learning_rate": 8.84427966101695e-06,
"loss": 0.0948,
"step": 1091
},
{
"epoch": 1.1556997219647822,
"grad_norm": 0.30257681012153625,
"learning_rate": 8.843220338983051e-06,
"loss": 0.0947,
"step": 1092
},
{
"epoch": 1.1567589037468555,
"grad_norm": 0.26092973351478577,
"learning_rate": 8.842161016949153e-06,
"loss": 0.0976,
"step": 1093
},
{
"epoch": 1.1578180855289288,
"grad_norm": 0.35534968972206116,
"learning_rate": 8.841101694915256e-06,
"loss": 0.0949,
"step": 1094
},
{
"epoch": 1.1588772673110022,
"grad_norm": 0.5100005269050598,
"learning_rate": 8.840042372881357e-06,
"loss": 0.0971,
"step": 1095
},
{
"epoch": 1.1599364490930757,
"grad_norm": 0.2607521712779999,
"learning_rate": 8.838983050847459e-06,
"loss": 0.0907,
"step": 1096
},
{
"epoch": 1.160995630875149,
"grad_norm": 0.33206820487976074,
"learning_rate": 8.83792372881356e-06,
"loss": 0.0914,
"step": 1097
},
{
"epoch": 1.1620548126572223,
"grad_norm": 0.39160943031311035,
"learning_rate": 8.836864406779662e-06,
"loss": 0.0953,
"step": 1098
},
{
"epoch": 1.1631139944392956,
"grad_norm": 0.29793456196784973,
"learning_rate": 8.835805084745763e-06,
"loss": 0.0876,
"step": 1099
},
{
"epoch": 1.164173176221369,
"grad_norm": 0.2818388044834137,
"learning_rate": 8.834745762711864e-06,
"loss": 0.0937,
"step": 1100
},
{
"epoch": 1.1652323580034423,
"grad_norm": 0.31199049949645996,
"learning_rate": 8.833686440677966e-06,
"loss": 0.0901,
"step": 1101
},
{
"epoch": 1.1662915397855156,
"grad_norm": 0.2392428070306778,
"learning_rate": 8.832627118644069e-06,
"loss": 0.0917,
"step": 1102
},
{
"epoch": 1.1673507215675891,
"grad_norm": 0.6399659514427185,
"learning_rate": 8.83156779661017e-06,
"loss": 0.0953,
"step": 1103
},
{
"epoch": 1.1684099033496624,
"grad_norm": 1.0445541143417358,
"learning_rate": 8.830508474576272e-06,
"loss": 0.0955,
"step": 1104
},
{
"epoch": 1.1694690851317358,
"grad_norm": 0.34966060519218445,
"learning_rate": 8.829449152542373e-06,
"loss": 0.0928,
"step": 1105
},
{
"epoch": 1.170528266913809,
"grad_norm": 0.3811902701854706,
"learning_rate": 8.828389830508475e-06,
"loss": 0.0926,
"step": 1106
},
{
"epoch": 1.1715874486958824,
"grad_norm": 0.2676902413368225,
"learning_rate": 8.827330508474578e-06,
"loss": 0.0915,
"step": 1107
},
{
"epoch": 1.1726466304779557,
"grad_norm": 0.2894545793533325,
"learning_rate": 8.826271186440679e-06,
"loss": 0.0924,
"step": 1108
},
{
"epoch": 1.173705812260029,
"grad_norm": 0.26010259985923767,
"learning_rate": 8.82521186440678e-06,
"loss": 0.0924,
"step": 1109
},
{
"epoch": 1.1747649940421026,
"grad_norm": 0.41155922412872314,
"learning_rate": 8.824152542372882e-06,
"loss": 0.0944,
"step": 1110
},
{
"epoch": 1.1758241758241759,
"grad_norm": 0.4124395549297333,
"learning_rate": 8.823093220338983e-06,
"loss": 0.0938,
"step": 1111
},
{
"epoch": 1.1768833576062492,
"grad_norm": 1.0543724298477173,
"learning_rate": 8.822033898305086e-06,
"loss": 0.0987,
"step": 1112
},
{
"epoch": 1.1779425393883225,
"grad_norm": 0.40370675921440125,
"learning_rate": 8.820974576271188e-06,
"loss": 0.0974,
"step": 1113
},
{
"epoch": 1.1790017211703958,
"grad_norm": 0.3587372303009033,
"learning_rate": 8.81991525423729e-06,
"loss": 0.0965,
"step": 1114
},
{
"epoch": 1.1800609029524691,
"grad_norm": 0.2818925082683563,
"learning_rate": 8.81885593220339e-06,
"loss": 0.0922,
"step": 1115
},
{
"epoch": 1.1811200847345424,
"grad_norm": 0.2897863984107971,
"learning_rate": 8.817796610169492e-06,
"loss": 0.0971,
"step": 1116
},
{
"epoch": 1.182179266516616,
"grad_norm": 0.6749652028083801,
"learning_rate": 8.816737288135593e-06,
"loss": 0.0962,
"step": 1117
},
{
"epoch": 1.1832384482986893,
"grad_norm": 0.2877092659473419,
"learning_rate": 8.815677966101695e-06,
"loss": 0.0965,
"step": 1118
},
{
"epoch": 1.1842976300807626,
"grad_norm": 0.2991100549697876,
"learning_rate": 8.814618644067798e-06,
"loss": 0.0974,
"step": 1119
},
{
"epoch": 1.185356811862836,
"grad_norm": 0.27069613337516785,
"learning_rate": 8.8135593220339e-06,
"loss": 0.0934,
"step": 1120
},
{
"epoch": 1.1864159936449092,
"grad_norm": 0.42415690422058105,
"learning_rate": 8.8125e-06,
"loss": 0.0931,
"step": 1121
},
{
"epoch": 1.1874751754269828,
"grad_norm": 0.4270313084125519,
"learning_rate": 8.811440677966102e-06,
"loss": 0.0906,
"step": 1122
},
{
"epoch": 1.188534357209056,
"grad_norm": 0.24667994678020477,
"learning_rate": 8.810381355932204e-06,
"loss": 0.0929,
"step": 1123
},
{
"epoch": 1.1895935389911294,
"grad_norm": 0.7036119103431702,
"learning_rate": 8.809322033898305e-06,
"loss": 0.0949,
"step": 1124
},
{
"epoch": 1.1906527207732027,
"grad_norm": 0.3452354371547699,
"learning_rate": 8.808262711864406e-06,
"loss": 0.0915,
"step": 1125
},
{
"epoch": 1.191711902555276,
"grad_norm": 0.48815253376960754,
"learning_rate": 8.80720338983051e-06,
"loss": 0.0941,
"step": 1126
},
{
"epoch": 1.1927710843373494,
"grad_norm": 0.33038586378097534,
"learning_rate": 8.806144067796611e-06,
"loss": 0.0915,
"step": 1127
},
{
"epoch": 1.1938302661194227,
"grad_norm": 0.30816060304641724,
"learning_rate": 8.805084745762712e-06,
"loss": 0.0921,
"step": 1128
},
{
"epoch": 1.1948894479014962,
"grad_norm": 0.2555977702140808,
"learning_rate": 8.804025423728815e-06,
"loss": 0.0917,
"step": 1129
},
{
"epoch": 1.1959486296835695,
"grad_norm": 0.2907012403011322,
"learning_rate": 8.802966101694917e-06,
"loss": 0.0922,
"step": 1130
},
{
"epoch": 1.1970078114656428,
"grad_norm": 0.6297186613082886,
"learning_rate": 8.801906779661018e-06,
"loss": 0.0919,
"step": 1131
},
{
"epoch": 1.1980669932477161,
"grad_norm": 0.650320291519165,
"learning_rate": 8.80084745762712e-06,
"loss": 0.0908,
"step": 1132
},
{
"epoch": 1.1991261750297895,
"grad_norm": 0.5117915272712708,
"learning_rate": 8.799788135593221e-06,
"loss": 0.0937,
"step": 1133
},
{
"epoch": 1.2001853568118628,
"grad_norm": 0.2948932647705078,
"learning_rate": 8.798728813559322e-06,
"loss": 0.0914,
"step": 1134
},
{
"epoch": 1.201244538593936,
"grad_norm": 0.33919399976730347,
"learning_rate": 8.797669491525424e-06,
"loss": 0.0917,
"step": 1135
},
{
"epoch": 1.2023037203760096,
"grad_norm": 0.3182590901851654,
"learning_rate": 8.796610169491527e-06,
"loss": 0.0922,
"step": 1136
},
{
"epoch": 1.203362902158083,
"grad_norm": 0.26661911606788635,
"learning_rate": 8.795550847457628e-06,
"loss": 0.093,
"step": 1137
},
{
"epoch": 1.2044220839401563,
"grad_norm": 0.26091158390045166,
"learning_rate": 8.79449152542373e-06,
"loss": 0.0901,
"step": 1138
},
{
"epoch": 1.2054812657222296,
"grad_norm": 0.3877406716346741,
"learning_rate": 8.793432203389831e-06,
"loss": 0.0892,
"step": 1139
},
{
"epoch": 1.206540447504303,
"grad_norm": 0.8551795482635498,
"learning_rate": 8.792372881355933e-06,
"loss": 0.0949,
"step": 1140
},
{
"epoch": 1.2075996292863762,
"grad_norm": 0.5187364816665649,
"learning_rate": 8.791313559322034e-06,
"loss": 0.09,
"step": 1141
},
{
"epoch": 1.2086588110684495,
"grad_norm": 0.4340435266494751,
"learning_rate": 8.790254237288135e-06,
"loss": 0.09,
"step": 1142
},
{
"epoch": 1.209717992850523,
"grad_norm": 0.2178150713443756,
"learning_rate": 8.789194915254239e-06,
"loss": 0.0935,
"step": 1143
},
{
"epoch": 1.2107771746325964,
"grad_norm": 0.34359389543533325,
"learning_rate": 8.78813559322034e-06,
"loss": 0.0906,
"step": 1144
},
{
"epoch": 1.2118363564146697,
"grad_norm": 0.2701970636844635,
"learning_rate": 8.787076271186441e-06,
"loss": 0.0934,
"step": 1145
},
{
"epoch": 1.212895538196743,
"grad_norm": 1.226974368095398,
"learning_rate": 8.786016949152543e-06,
"loss": 0.0951,
"step": 1146
},
{
"epoch": 1.2139547199788163,
"grad_norm": 0.2872730493545532,
"learning_rate": 8.784957627118644e-06,
"loss": 0.0912,
"step": 1147
},
{
"epoch": 1.2150139017608896,
"grad_norm": 0.31621283292770386,
"learning_rate": 8.783898305084746e-06,
"loss": 0.0874,
"step": 1148
},
{
"epoch": 1.216073083542963,
"grad_norm": 0.8155948519706726,
"learning_rate": 8.782838983050849e-06,
"loss": 0.0917,
"step": 1149
},
{
"epoch": 1.2171322653250365,
"grad_norm": 0.29345694184303284,
"learning_rate": 8.78177966101695e-06,
"loss": 0.0909,
"step": 1150
},
{
"epoch": 1.2181914471071098,
"grad_norm": 0.2968461811542511,
"learning_rate": 8.780720338983052e-06,
"loss": 0.0959,
"step": 1151
},
{
"epoch": 1.2192506288891831,
"grad_norm": 0.611682116985321,
"learning_rate": 8.779661016949153e-06,
"loss": 0.0886,
"step": 1152
},
{
"epoch": 1.2203098106712564,
"grad_norm": 0.6184428334236145,
"learning_rate": 8.778601694915256e-06,
"loss": 0.0901,
"step": 1153
},
{
"epoch": 1.2213689924533297,
"grad_norm": 0.496878057718277,
"learning_rate": 8.777542372881357e-06,
"loss": 0.0943,
"step": 1154
},
{
"epoch": 1.222428174235403,
"grad_norm": 0.30651700496673584,
"learning_rate": 8.776483050847459e-06,
"loss": 0.0912,
"step": 1155
},
{
"epoch": 1.2234873560174764,
"grad_norm": 0.3181779384613037,
"learning_rate": 8.77542372881356e-06,
"loss": 0.0911,
"step": 1156
},
{
"epoch": 1.22454653779955,
"grad_norm": 0.26597824692726135,
"learning_rate": 8.774364406779662e-06,
"loss": 0.0919,
"step": 1157
},
{
"epoch": 1.2256057195816232,
"grad_norm": 0.35603615641593933,
"learning_rate": 8.773305084745763e-06,
"loss": 0.092,
"step": 1158
},
{
"epoch": 1.2266649013636965,
"grad_norm": 0.28663283586502075,
"learning_rate": 8.772245762711865e-06,
"loss": 0.0893,
"step": 1159
},
{
"epoch": 1.2277240831457699,
"grad_norm": 0.3351318836212158,
"learning_rate": 8.771186440677966e-06,
"loss": 0.0897,
"step": 1160
},
{
"epoch": 1.2287832649278432,
"grad_norm": 0.3456285297870636,
"learning_rate": 8.770127118644069e-06,
"loss": 0.0938,
"step": 1161
},
{
"epoch": 1.2298424467099165,
"grad_norm": 0.9174132347106934,
"learning_rate": 8.76906779661017e-06,
"loss": 0.0882,
"step": 1162
},
{
"epoch": 1.23090162849199,
"grad_norm": 0.3719140589237213,
"learning_rate": 8.768008474576272e-06,
"loss": 0.092,
"step": 1163
},
{
"epoch": 1.2319608102740633,
"grad_norm": 0.26492729783058167,
"learning_rate": 8.766949152542373e-06,
"loss": 0.0891,
"step": 1164
},
{
"epoch": 1.2330199920561367,
"grad_norm": 0.3745848834514618,
"learning_rate": 8.765889830508475e-06,
"loss": 0.0919,
"step": 1165
},
{
"epoch": 1.23407917383821,
"grad_norm": 0.2910005450248718,
"learning_rate": 8.764830508474576e-06,
"loss": 0.0917,
"step": 1166
},
{
"epoch": 1.2351383556202833,
"grad_norm": 0.27463725209236145,
"learning_rate": 8.763771186440678e-06,
"loss": 0.0905,
"step": 1167
},
{
"epoch": 1.2361975374023566,
"grad_norm": 0.3829032778739929,
"learning_rate": 8.76271186440678e-06,
"loss": 0.086,
"step": 1168
},
{
"epoch": 1.2372567191844301,
"grad_norm": 0.3017561137676239,
"learning_rate": 8.761652542372882e-06,
"loss": 0.0905,
"step": 1169
},
{
"epoch": 1.2383159009665035,
"grad_norm": 0.299236923456192,
"learning_rate": 8.760593220338985e-06,
"loss": 0.0913,
"step": 1170
},
{
"epoch": 1.2393750827485768,
"grad_norm": 1.07209312915802,
"learning_rate": 8.759533898305087e-06,
"loss": 0.0902,
"step": 1171
},
{
"epoch": 1.24043426453065,
"grad_norm": 0.7281382083892822,
"learning_rate": 8.758474576271188e-06,
"loss": 0.0941,
"step": 1172
},
{
"epoch": 1.2414934463127234,
"grad_norm": 0.37550926208496094,
"learning_rate": 8.75741525423729e-06,
"loss": 0.0899,
"step": 1173
},
{
"epoch": 1.2425526280947967,
"grad_norm": 0.3048110902309418,
"learning_rate": 8.75635593220339e-06,
"loss": 0.0917,
"step": 1174
},
{
"epoch": 1.24361180987687,
"grad_norm": 1.2708523273468018,
"learning_rate": 8.755296610169492e-06,
"loss": 0.0927,
"step": 1175
},
{
"epoch": 1.2446709916589436,
"grad_norm": 0.30342382192611694,
"learning_rate": 8.754237288135594e-06,
"loss": 0.0954,
"step": 1176
},
{
"epoch": 1.2457301734410169,
"grad_norm": 0.258834570646286,
"learning_rate": 8.753177966101695e-06,
"loss": 0.0868,
"step": 1177
},
{
"epoch": 1.2467893552230902,
"grad_norm": 0.3479873836040497,
"learning_rate": 8.752118644067798e-06,
"loss": 0.087,
"step": 1178
},
{
"epoch": 1.2478485370051635,
"grad_norm": 0.6660997867584229,
"learning_rate": 8.7510593220339e-06,
"loss": 0.093,
"step": 1179
},
{
"epoch": 1.2489077187872368,
"grad_norm": 0.5038926601409912,
"learning_rate": 8.750000000000001e-06,
"loss": 0.0898,
"step": 1180
},
{
"epoch": 1.2499669005693101,
"grad_norm": 0.49546748399734497,
"learning_rate": 8.748940677966102e-06,
"loss": 0.0973,
"step": 1181
},
{
"epoch": 1.2510260823513835,
"grad_norm": 0.7215166091918945,
"learning_rate": 8.747881355932204e-06,
"loss": 0.0882,
"step": 1182
},
{
"epoch": 1.252085264133457,
"grad_norm": 0.5412211418151855,
"learning_rate": 8.746822033898305e-06,
"loss": 0.0898,
"step": 1183
},
{
"epoch": 1.2531444459155303,
"grad_norm": 0.26789650321006775,
"learning_rate": 8.745762711864407e-06,
"loss": 0.0872,
"step": 1184
},
{
"epoch": 1.2542036276976036,
"grad_norm": 0.2784571349620819,
"learning_rate": 8.74470338983051e-06,
"loss": 0.0919,
"step": 1185
},
{
"epoch": 1.255262809479677,
"grad_norm": 0.28960517048835754,
"learning_rate": 8.743644067796611e-06,
"loss": 0.0932,
"step": 1186
},
{
"epoch": 1.2563219912617503,
"grad_norm": 0.4026484489440918,
"learning_rate": 8.742584745762712e-06,
"loss": 0.0916,
"step": 1187
},
{
"epoch": 1.2573811730438236,
"grad_norm": 0.4276771545410156,
"learning_rate": 8.741525423728814e-06,
"loss": 0.0933,
"step": 1188
},
{
"epoch": 1.258440354825897,
"grad_norm": 0.34631893038749695,
"learning_rate": 8.740466101694915e-06,
"loss": 0.0868,
"step": 1189
},
{
"epoch": 1.2594995366079704,
"grad_norm": 0.43336084485054016,
"learning_rate": 8.739406779661017e-06,
"loss": 0.0906,
"step": 1190
},
{
"epoch": 1.2605587183900437,
"grad_norm": 0.6585732698440552,
"learning_rate": 8.73834745762712e-06,
"loss": 0.0932,
"step": 1191
},
{
"epoch": 1.261617900172117,
"grad_norm": 0.2695441246032715,
"learning_rate": 8.737288135593221e-06,
"loss": 0.0918,
"step": 1192
},
{
"epoch": 1.2626770819541904,
"grad_norm": 0.30017632246017456,
"learning_rate": 8.736228813559323e-06,
"loss": 0.0886,
"step": 1193
},
{
"epoch": 1.2637362637362637,
"grad_norm": 0.2944658100605011,
"learning_rate": 8.735169491525424e-06,
"loss": 0.0861,
"step": 1194
},
{
"epoch": 1.264795445518337,
"grad_norm": 0.37323957681655884,
"learning_rate": 8.734110169491527e-06,
"loss": 0.0885,
"step": 1195
},
{
"epoch": 1.2658546273004103,
"grad_norm": 0.301430881023407,
"learning_rate": 8.733050847457629e-06,
"loss": 0.0899,
"step": 1196
},
{
"epoch": 1.2669138090824839,
"grad_norm": 0.7957718372344971,
"learning_rate": 8.73199152542373e-06,
"loss": 0.0906,
"step": 1197
},
{
"epoch": 1.2679729908645572,
"grad_norm": 0.39433908462524414,
"learning_rate": 8.730932203389831e-06,
"loss": 0.0934,
"step": 1198
},
{
"epoch": 1.2690321726466305,
"grad_norm": 0.6956431269645691,
"learning_rate": 8.729872881355933e-06,
"loss": 0.0907,
"step": 1199
},
{
"epoch": 1.2700913544287038,
"grad_norm": 0.6170002222061157,
"learning_rate": 8.728813559322034e-06,
"loss": 0.0884,
"step": 1200
},
{
"epoch": 1.2711505362107771,
"grad_norm": 0.5037360787391663,
"learning_rate": 8.727754237288136e-06,
"loss": 0.0919,
"step": 1201
},
{
"epoch": 1.2722097179928507,
"grad_norm": 0.5087904930114746,
"learning_rate": 8.726694915254239e-06,
"loss": 0.0908,
"step": 1202
},
{
"epoch": 1.2732688997749237,
"grad_norm": 0.3599529266357422,
"learning_rate": 8.72563559322034e-06,
"loss": 0.0907,
"step": 1203
},
{
"epoch": 1.2743280815569973,
"grad_norm": 0.36797240376472473,
"learning_rate": 8.724576271186442e-06,
"loss": 0.0913,
"step": 1204
},
{
"epoch": 1.2753872633390706,
"grad_norm": 0.46812427043914795,
"learning_rate": 8.723516949152543e-06,
"loss": 0.0884,
"step": 1205
},
{
"epoch": 1.276446445121144,
"grad_norm": 0.2462358921766281,
"learning_rate": 8.722457627118644e-06,
"loss": 0.0958,
"step": 1206
},
{
"epoch": 1.2775056269032172,
"grad_norm": 0.6306796669960022,
"learning_rate": 8.721398305084746e-06,
"loss": 0.0945,
"step": 1207
},
{
"epoch": 1.2785648086852905,
"grad_norm": 0.40346699953079224,
"learning_rate": 8.720338983050847e-06,
"loss": 0.0862,
"step": 1208
},
{
"epoch": 1.279623990467364,
"grad_norm": 0.2961828410625458,
"learning_rate": 8.719279661016949e-06,
"loss": 0.0896,
"step": 1209
},
{
"epoch": 1.2806831722494372,
"grad_norm": 0.3986334502696991,
"learning_rate": 8.718220338983052e-06,
"loss": 0.0923,
"step": 1210
},
{
"epoch": 1.2817423540315107,
"grad_norm": 0.411968857049942,
"learning_rate": 8.717161016949153e-06,
"loss": 0.0914,
"step": 1211
},
{
"epoch": 1.282801535813584,
"grad_norm": 0.321750670671463,
"learning_rate": 8.716101694915256e-06,
"loss": 0.0963,
"step": 1212
},
{
"epoch": 1.2838607175956573,
"grad_norm": 0.3059873878955841,
"learning_rate": 8.715042372881358e-06,
"loss": 0.0939,
"step": 1213
},
{
"epoch": 1.2849198993777307,
"grad_norm": 0.25494441390037537,
"learning_rate": 8.713983050847459e-06,
"loss": 0.0891,
"step": 1214
},
{
"epoch": 1.285979081159804,
"grad_norm": 0.6753748059272766,
"learning_rate": 8.71292372881356e-06,
"loss": 0.0892,
"step": 1215
},
{
"epoch": 1.2870382629418775,
"grad_norm": 0.5489036440849304,
"learning_rate": 8.711864406779662e-06,
"loss": 0.0901,
"step": 1216
},
{
"epoch": 1.2880974447239508,
"grad_norm": 0.4101668894290924,
"learning_rate": 8.710805084745763e-06,
"loss": 0.0864,
"step": 1217
},
{
"epoch": 1.2891566265060241,
"grad_norm": 0.516925036907196,
"learning_rate": 8.709745762711865e-06,
"loss": 0.0876,
"step": 1218
},
{
"epoch": 1.2902158082880975,
"grad_norm": 0.8851956725120544,
"learning_rate": 8.708686440677968e-06,
"loss": 0.0911,
"step": 1219
},
{
"epoch": 1.2912749900701708,
"grad_norm": 0.2738077640533447,
"learning_rate": 8.70762711864407e-06,
"loss": 0.092,
"step": 1220
},
{
"epoch": 1.292334171852244,
"grad_norm": 0.34898704290390015,
"learning_rate": 8.70656779661017e-06,
"loss": 0.0917,
"step": 1221
},
{
"epoch": 1.2933933536343174,
"grad_norm": 0.35455602407455444,
"learning_rate": 8.705508474576272e-06,
"loss": 0.0906,
"step": 1222
},
{
"epoch": 1.294452535416391,
"grad_norm": 0.2889910340309143,
"learning_rate": 8.704449152542373e-06,
"loss": 0.0898,
"step": 1223
},
{
"epoch": 1.2955117171984643,
"grad_norm": 0.7005313634872437,
"learning_rate": 8.703389830508475e-06,
"loss": 0.0914,
"step": 1224
},
{
"epoch": 1.2965708989805376,
"grad_norm": 0.8933342695236206,
"learning_rate": 8.702330508474576e-06,
"loss": 0.0908,
"step": 1225
},
{
"epoch": 1.2976300807626109,
"grad_norm": 0.34126585721969604,
"learning_rate": 8.701271186440678e-06,
"loss": 0.0944,
"step": 1226
},
{
"epoch": 1.2986892625446842,
"grad_norm": 0.2783612906932831,
"learning_rate": 8.70021186440678e-06,
"loss": 0.0918,
"step": 1227
},
{
"epoch": 1.2997484443267575,
"grad_norm": 1.59508216381073,
"learning_rate": 8.699152542372882e-06,
"loss": 0.0938,
"step": 1228
},
{
"epoch": 1.3008076261088308,
"grad_norm": 0.2347707450389862,
"learning_rate": 8.698093220338984e-06,
"loss": 0.0896,
"step": 1229
},
{
"epoch": 1.3018668078909044,
"grad_norm": 0.5518621206283569,
"learning_rate": 8.697033898305085e-06,
"loss": 0.0963,
"step": 1230
},
{
"epoch": 1.3029259896729777,
"grad_norm": 0.2808842957019806,
"learning_rate": 8.695974576271186e-06,
"loss": 0.0919,
"step": 1231
},
{
"epoch": 1.303985171455051,
"grad_norm": 0.2591249942779541,
"learning_rate": 8.694915254237288e-06,
"loss": 0.0931,
"step": 1232
},
{
"epoch": 1.3050443532371243,
"grad_norm": 0.5613085627555847,
"learning_rate": 8.693855932203391e-06,
"loss": 0.0915,
"step": 1233
},
{
"epoch": 1.3061035350191976,
"grad_norm": 1.0077388286590576,
"learning_rate": 8.692796610169492e-06,
"loss": 0.0926,
"step": 1234
},
{
"epoch": 1.307162716801271,
"grad_norm": 0.42165160179138184,
"learning_rate": 8.691737288135594e-06,
"loss": 0.0892,
"step": 1235
},
{
"epoch": 1.3082218985833443,
"grad_norm": 0.4207666218280792,
"learning_rate": 8.690677966101695e-06,
"loss": 0.0959,
"step": 1236
},
{
"epoch": 1.3092810803654178,
"grad_norm": 0.37785521149635315,
"learning_rate": 8.689618644067798e-06,
"loss": 0.0936,
"step": 1237
},
{
"epoch": 1.310340262147491,
"grad_norm": 0.35617318749427795,
"learning_rate": 8.6885593220339e-06,
"loss": 0.0888,
"step": 1238
},
{
"epoch": 1.3113994439295644,
"grad_norm": 0.7670419216156006,
"learning_rate": 8.687500000000001e-06,
"loss": 0.0896,
"step": 1239
},
{
"epoch": 1.3124586257116377,
"grad_norm": 0.2375904768705368,
"learning_rate": 8.686440677966103e-06,
"loss": 0.089,
"step": 1240
},
{
"epoch": 1.313517807493711,
"grad_norm": 0.43427926301956177,
"learning_rate": 8.685381355932204e-06,
"loss": 0.0901,
"step": 1241
},
{
"epoch": 1.3145769892757846,
"grad_norm": 0.74751877784729,
"learning_rate": 8.684322033898305e-06,
"loss": 0.0934,
"step": 1242
},
{
"epoch": 1.3156361710578577,
"grad_norm": 0.2564150094985962,
"learning_rate": 8.683262711864407e-06,
"loss": 0.0908,
"step": 1243
},
{
"epoch": 1.3166953528399312,
"grad_norm": 0.4206532835960388,
"learning_rate": 8.68220338983051e-06,
"loss": 0.0905,
"step": 1244
},
{
"epoch": 1.3177545346220045,
"grad_norm": 0.6286159753799438,
"learning_rate": 8.681144067796611e-06,
"loss": 0.0939,
"step": 1245
},
{
"epoch": 1.3188137164040779,
"grad_norm": 1.2643135786056519,
"learning_rate": 8.680084745762713e-06,
"loss": 0.0929,
"step": 1246
},
{
"epoch": 1.3198728981861512,
"grad_norm": 0.7013075351715088,
"learning_rate": 8.679025423728814e-06,
"loss": 0.0867,
"step": 1247
},
{
"epoch": 1.3209320799682245,
"grad_norm": 0.5261450409889221,
"learning_rate": 8.677966101694915e-06,
"loss": 0.0895,
"step": 1248
},
{
"epoch": 1.321991261750298,
"grad_norm": 0.32555660605430603,
"learning_rate": 8.676906779661017e-06,
"loss": 0.0921,
"step": 1249
},
{
"epoch": 1.3230504435323711,
"grad_norm": 0.33761289715766907,
"learning_rate": 8.675847457627118e-06,
"loss": 0.0913,
"step": 1250
},
{
"epoch": 1.3241096253144446,
"grad_norm": 0.40158653259277344,
"learning_rate": 8.674788135593221e-06,
"loss": 0.0959,
"step": 1251
},
{
"epoch": 1.325168807096518,
"grad_norm": 0.3522323966026306,
"learning_rate": 8.673728813559323e-06,
"loss": 0.0907,
"step": 1252
},
{
"epoch": 1.3262279888785913,
"grad_norm": 0.5156732201576233,
"learning_rate": 8.672669491525424e-06,
"loss": 0.0921,
"step": 1253
},
{
"epoch": 1.3272871706606646,
"grad_norm": 1.196157455444336,
"learning_rate": 8.671610169491527e-06,
"loss": 0.091,
"step": 1254
},
{
"epoch": 1.328346352442738,
"grad_norm": 0.3785947263240814,
"learning_rate": 8.670550847457629e-06,
"loss": 0.0909,
"step": 1255
},
{
"epoch": 1.3294055342248114,
"grad_norm": 0.41375061869621277,
"learning_rate": 8.66949152542373e-06,
"loss": 0.093,
"step": 1256
},
{
"epoch": 1.3304647160068848,
"grad_norm": 0.46027684211730957,
"learning_rate": 8.668432203389832e-06,
"loss": 0.0888,
"step": 1257
},
{
"epoch": 1.331523897788958,
"grad_norm": 0.29217252135276794,
"learning_rate": 8.667372881355933e-06,
"loss": 0.0933,
"step": 1258
},
{
"epoch": 1.3325830795710314,
"grad_norm": 0.3266601264476776,
"learning_rate": 8.666313559322034e-06,
"loss": 0.0938,
"step": 1259
},
{
"epoch": 1.3336422613531047,
"grad_norm": 0.22877094149589539,
"learning_rate": 8.665254237288136e-06,
"loss": 0.0907,
"step": 1260
},
{
"epoch": 1.334701443135178,
"grad_norm": 1.2058866024017334,
"learning_rate": 8.664194915254239e-06,
"loss": 0.0923,
"step": 1261
},
{
"epoch": 1.3357606249172513,
"grad_norm": 0.33053499460220337,
"learning_rate": 8.66313559322034e-06,
"loss": 0.0941,
"step": 1262
},
{
"epoch": 1.3368198066993249,
"grad_norm": 0.2770586609840393,
"learning_rate": 8.662076271186442e-06,
"loss": 0.0933,
"step": 1263
},
{
"epoch": 1.3378789884813982,
"grad_norm": 0.4117499887943268,
"learning_rate": 8.661016949152543e-06,
"loss": 0.094,
"step": 1264
},
{
"epoch": 1.3389381702634715,
"grad_norm": 0.4217805862426758,
"learning_rate": 8.659957627118645e-06,
"loss": 0.0926,
"step": 1265
},
{
"epoch": 1.3399973520455448,
"grad_norm": 0.4632180631160736,
"learning_rate": 8.658898305084746e-06,
"loss": 0.0917,
"step": 1266
},
{
"epoch": 1.3410565338276181,
"grad_norm": 0.279499351978302,
"learning_rate": 8.657838983050847e-06,
"loss": 0.0909,
"step": 1267
},
{
"epoch": 1.3421157156096915,
"grad_norm": 0.4228940010070801,
"learning_rate": 8.65677966101695e-06,
"loss": 0.0906,
"step": 1268
},
{
"epoch": 1.3431748973917648,
"grad_norm": 0.2591783106327057,
"learning_rate": 8.655720338983052e-06,
"loss": 0.0906,
"step": 1269
},
{
"epoch": 1.3442340791738383,
"grad_norm": 0.4122743606567383,
"learning_rate": 8.654661016949153e-06,
"loss": 0.0876,
"step": 1270
},
{
"epoch": 1.3452932609559116,
"grad_norm": 0.2966249883174896,
"learning_rate": 8.653601694915255e-06,
"loss": 0.0897,
"step": 1271
},
{
"epoch": 1.346352442737985,
"grad_norm": 0.3015781044960022,
"learning_rate": 8.652542372881356e-06,
"loss": 0.0945,
"step": 1272
},
{
"epoch": 1.3474116245200582,
"grad_norm": 0.38431188464164734,
"learning_rate": 8.651483050847458e-06,
"loss": 0.0922,
"step": 1273
},
{
"epoch": 1.3484708063021316,
"grad_norm": 0.9707023501396179,
"learning_rate": 8.650423728813559e-06,
"loss": 0.0921,
"step": 1274
},
{
"epoch": 1.3495299880842049,
"grad_norm": 0.5032562613487244,
"learning_rate": 8.649364406779662e-06,
"loss": 0.0927,
"step": 1275
},
{
"epoch": 1.3505891698662782,
"grad_norm": 0.2696177363395691,
"learning_rate": 8.648305084745763e-06,
"loss": 0.0909,
"step": 1276
},
{
"epoch": 1.3516483516483517,
"grad_norm": 0.4014403522014618,
"learning_rate": 8.647245762711865e-06,
"loss": 0.0889,
"step": 1277
},
{
"epoch": 1.352707533430425,
"grad_norm": 0.2904941439628601,
"learning_rate": 8.646186440677968e-06,
"loss": 0.0912,
"step": 1278
},
{
"epoch": 1.3537667152124984,
"grad_norm": 0.3719266951084137,
"learning_rate": 8.64512711864407e-06,
"loss": 0.093,
"step": 1279
},
{
"epoch": 1.3548258969945717,
"grad_norm": 0.917984664440155,
"learning_rate": 8.64406779661017e-06,
"loss": 0.0917,
"step": 1280
},
{
"epoch": 1.355885078776645,
"grad_norm": 0.8555766344070435,
"learning_rate": 8.643008474576272e-06,
"loss": 0.0915,
"step": 1281
},
{
"epoch": 1.3569442605587183,
"grad_norm": 0.2927437722682953,
"learning_rate": 8.641949152542374e-06,
"loss": 0.0876,
"step": 1282
},
{
"epoch": 1.3580034423407916,
"grad_norm": 1.2234313488006592,
"learning_rate": 8.640889830508475e-06,
"loss": 0.09,
"step": 1283
},
{
"epoch": 1.3590626241228652,
"grad_norm": 0.2912628948688507,
"learning_rate": 8.639830508474576e-06,
"loss": 0.0923,
"step": 1284
},
{
"epoch": 1.3601218059049385,
"grad_norm": 0.2900404930114746,
"learning_rate": 8.638771186440678e-06,
"loss": 0.0911,
"step": 1285
},
{
"epoch": 1.3611809876870118,
"grad_norm": 0.2967686653137207,
"learning_rate": 8.637711864406781e-06,
"loss": 0.0958,
"step": 1286
},
{
"epoch": 1.362240169469085,
"grad_norm": 0.3014371991157532,
"learning_rate": 8.636652542372882e-06,
"loss": 0.0931,
"step": 1287
},
{
"epoch": 1.3632993512511584,
"grad_norm": 0.4530743360519409,
"learning_rate": 8.635593220338984e-06,
"loss": 0.0911,
"step": 1288
},
{
"epoch": 1.364358533033232,
"grad_norm": 0.6225001811981201,
"learning_rate": 8.634533898305085e-06,
"loss": 0.089,
"step": 1289
},
{
"epoch": 1.365417714815305,
"grad_norm": 0.5223209261894226,
"learning_rate": 8.633474576271187e-06,
"loss": 0.0902,
"step": 1290
},
{
"epoch": 1.3664768965973786,
"grad_norm": 0.23648612201213837,
"learning_rate": 8.632415254237288e-06,
"loss": 0.0866,
"step": 1291
},
{
"epoch": 1.367536078379452,
"grad_norm": 0.3055684268474579,
"learning_rate": 8.63135593220339e-06,
"loss": 0.0899,
"step": 1292
},
{
"epoch": 1.3685952601615252,
"grad_norm": 0.2635006904602051,
"learning_rate": 8.630296610169493e-06,
"loss": 0.0925,
"step": 1293
},
{
"epoch": 1.3696544419435985,
"grad_norm": 0.2705018222332001,
"learning_rate": 8.629237288135594e-06,
"loss": 0.0928,
"step": 1294
},
{
"epoch": 1.3707136237256718,
"grad_norm": 0.33698102831840515,
"learning_rate": 8.628177966101695e-06,
"loss": 0.0912,
"step": 1295
},
{
"epoch": 1.3717728055077454,
"grad_norm": 0.35814690589904785,
"learning_rate": 8.627118644067798e-06,
"loss": 0.0953,
"step": 1296
},
{
"epoch": 1.3728319872898185,
"grad_norm": 0.29399535059928894,
"learning_rate": 8.6260593220339e-06,
"loss": 0.0906,
"step": 1297
},
{
"epoch": 1.373891169071892,
"grad_norm": 0.5228216052055359,
"learning_rate": 8.625000000000001e-06,
"loss": 0.0906,
"step": 1298
},
{
"epoch": 1.3749503508539653,
"grad_norm": 0.8515452742576599,
"learning_rate": 8.623940677966103e-06,
"loss": 0.0907,
"step": 1299
},
{
"epoch": 1.3760095326360386,
"grad_norm": 0.28903132677078247,
"learning_rate": 8.622881355932204e-06,
"loss": 0.0847,
"step": 1300
},
{
"epoch": 1.377068714418112,
"grad_norm": 0.26791977882385254,
"learning_rate": 8.621822033898306e-06,
"loss": 0.0888,
"step": 1301
},
{
"epoch": 1.3781278962001853,
"grad_norm": 0.8018907308578491,
"learning_rate": 8.620762711864407e-06,
"loss": 0.0899,
"step": 1302
},
{
"epoch": 1.3791870779822588,
"grad_norm": 0.48832303285598755,
"learning_rate": 8.61970338983051e-06,
"loss": 0.0886,
"step": 1303
},
{
"epoch": 1.3802462597643321,
"grad_norm": 0.3044288754463196,
"learning_rate": 8.618644067796611e-06,
"loss": 0.0897,
"step": 1304
},
{
"epoch": 1.3813054415464054,
"grad_norm": 0.306273490190506,
"learning_rate": 8.617584745762713e-06,
"loss": 0.093,
"step": 1305
},
{
"epoch": 1.3823646233284788,
"grad_norm": 0.38597342371940613,
"learning_rate": 8.616525423728814e-06,
"loss": 0.092,
"step": 1306
},
{
"epoch": 1.383423805110552,
"grad_norm": 0.5759603381156921,
"learning_rate": 8.615466101694916e-06,
"loss": 0.0908,
"step": 1307
},
{
"epoch": 1.3844829868926254,
"grad_norm": 0.2969980537891388,
"learning_rate": 8.614406779661017e-06,
"loss": 0.0904,
"step": 1308
},
{
"epoch": 1.3855421686746987,
"grad_norm": 0.6405715942382812,
"learning_rate": 8.613347457627118e-06,
"loss": 0.0931,
"step": 1309
},
{
"epoch": 1.3866013504567722,
"grad_norm": 0.47998374700546265,
"learning_rate": 8.612288135593222e-06,
"loss": 0.0907,
"step": 1310
},
{
"epoch": 1.3876605322388456,
"grad_norm": 0.8584996461868286,
"learning_rate": 8.611228813559323e-06,
"loss": 0.0918,
"step": 1311
},
{
"epoch": 1.3887197140209189,
"grad_norm": 0.3902839422225952,
"learning_rate": 8.610169491525424e-06,
"loss": 0.09,
"step": 1312
},
{
"epoch": 1.3897788958029922,
"grad_norm": 0.9900773763656616,
"learning_rate": 8.609110169491526e-06,
"loss": 0.0952,
"step": 1313
},
{
"epoch": 1.3908380775850655,
"grad_norm": 0.388141393661499,
"learning_rate": 8.608050847457627e-06,
"loss": 0.09,
"step": 1314
},
{
"epoch": 1.3918972593671388,
"grad_norm": 0.33699607849121094,
"learning_rate": 8.606991525423729e-06,
"loss": 0.0937,
"step": 1315
},
{
"epoch": 1.3929564411492121,
"grad_norm": 0.5017436146736145,
"learning_rate": 8.60593220338983e-06,
"loss": 0.0897,
"step": 1316
},
{
"epoch": 1.3940156229312857,
"grad_norm": 0.385785847902298,
"learning_rate": 8.604872881355933e-06,
"loss": 0.093,
"step": 1317
},
{
"epoch": 1.395074804713359,
"grad_norm": 0.8142003417015076,
"learning_rate": 8.603813559322035e-06,
"loss": 0.0951,
"step": 1318
},
{
"epoch": 1.3961339864954323,
"grad_norm": 0.9631067514419556,
"learning_rate": 8.602754237288136e-06,
"loss": 0.0892,
"step": 1319
},
{
"epoch": 1.3971931682775056,
"grad_norm": 0.3529389798641205,
"learning_rate": 8.601694915254239e-06,
"loss": 0.0927,
"step": 1320
},
{
"epoch": 1.398252350059579,
"grad_norm": 0.444048136472702,
"learning_rate": 8.60063559322034e-06,
"loss": 0.0914,
"step": 1321
},
{
"epoch": 1.3993115318416522,
"grad_norm": 0.29218658804893494,
"learning_rate": 8.599576271186442e-06,
"loss": 0.0912,
"step": 1322
},
{
"epoch": 1.4003707136237256,
"grad_norm": 0.34913161396980286,
"learning_rate": 8.598516949152543e-06,
"loss": 0.0922,
"step": 1323
},
{
"epoch": 1.401429895405799,
"grad_norm": 0.286085844039917,
"learning_rate": 8.597457627118645e-06,
"loss": 0.0909,
"step": 1324
},
{
"epoch": 1.4024890771878724,
"grad_norm": 0.2745821475982666,
"learning_rate": 8.596398305084746e-06,
"loss": 0.0892,
"step": 1325
},
{
"epoch": 1.4035482589699457,
"grad_norm": 0.3126218020915985,
"learning_rate": 8.595338983050848e-06,
"loss": 0.0929,
"step": 1326
},
{
"epoch": 1.404607440752019,
"grad_norm": 0.6711746454238892,
"learning_rate": 8.59427966101695e-06,
"loss": 0.0865,
"step": 1327
},
{
"epoch": 1.4056666225340924,
"grad_norm": 0.7869289517402649,
"learning_rate": 8.593220338983052e-06,
"loss": 0.0921,
"step": 1328
},
{
"epoch": 1.406725804316166,
"grad_norm": 0.4443584084510803,
"learning_rate": 8.592161016949153e-06,
"loss": 0.0916,
"step": 1329
},
{
"epoch": 1.407784986098239,
"grad_norm": 0.33459603786468506,
"learning_rate": 8.591101694915255e-06,
"loss": 0.089,
"step": 1330
},
{
"epoch": 1.4088441678803125,
"grad_norm": 0.6564658284187317,
"learning_rate": 8.590042372881356e-06,
"loss": 0.0947,
"step": 1331
},
{
"epoch": 1.4099033496623858,
"grad_norm": 0.37691906094551086,
"learning_rate": 8.588983050847458e-06,
"loss": 0.0903,
"step": 1332
},
{
"epoch": 1.4109625314444592,
"grad_norm": 0.4946768581867218,
"learning_rate": 8.587923728813559e-06,
"loss": 0.0909,
"step": 1333
},
{
"epoch": 1.4120217132265325,
"grad_norm": 0.29537561535835266,
"learning_rate": 8.58686440677966e-06,
"loss": 0.0914,
"step": 1334
},
{
"epoch": 1.4130808950086058,
"grad_norm": 0.35921117663383484,
"learning_rate": 8.585805084745764e-06,
"loss": 0.0961,
"step": 1335
},
{
"epoch": 1.4141400767906793,
"grad_norm": 0.37264665961265564,
"learning_rate": 8.584745762711865e-06,
"loss": 0.0888,
"step": 1336
},
{
"epoch": 1.4151992585727524,
"grad_norm": 1.4332431554794312,
"learning_rate": 8.583686440677966e-06,
"loss": 0.0932,
"step": 1337
},
{
"epoch": 1.416258440354826,
"grad_norm": 0.8218671083450317,
"learning_rate": 8.582627118644068e-06,
"loss": 0.0912,
"step": 1338
},
{
"epoch": 1.4173176221368993,
"grad_norm": 0.5757306814193726,
"learning_rate": 8.581567796610171e-06,
"loss": 0.0935,
"step": 1339
},
{
"epoch": 1.4183768039189726,
"grad_norm": 0.3226252794265747,
"learning_rate": 8.580508474576272e-06,
"loss": 0.0905,
"step": 1340
},
{
"epoch": 1.419435985701046,
"grad_norm": 0.24196362495422363,
"learning_rate": 8.579449152542374e-06,
"loss": 0.0871,
"step": 1341
},
{
"epoch": 1.4204951674831192,
"grad_norm": 0.4133007526397705,
"learning_rate": 8.578389830508475e-06,
"loss": 0.0892,
"step": 1342
},
{
"epoch": 1.4215543492651928,
"grad_norm": 0.6345924139022827,
"learning_rate": 8.577330508474577e-06,
"loss": 0.0921,
"step": 1343
},
{
"epoch": 1.422613531047266,
"grad_norm": 0.9568214416503906,
"learning_rate": 8.57627118644068e-06,
"loss": 0.0897,
"step": 1344
},
{
"epoch": 1.4236727128293394,
"grad_norm": 0.3031831383705139,
"learning_rate": 8.575211864406781e-06,
"loss": 0.0911,
"step": 1345
},
{
"epoch": 1.4247318946114127,
"grad_norm": 0.31946277618408203,
"learning_rate": 8.574152542372883e-06,
"loss": 0.0924,
"step": 1346
},
{
"epoch": 1.425791076393486,
"grad_norm": 0.2845727503299713,
"learning_rate": 8.573093220338984e-06,
"loss": 0.0942,
"step": 1347
},
{
"epoch": 1.4268502581755593,
"grad_norm": 1.0196945667266846,
"learning_rate": 8.572033898305085e-06,
"loss": 0.088,
"step": 1348
},
{
"epoch": 1.4279094399576326,
"grad_norm": 0.3057892620563507,
"learning_rate": 8.570974576271187e-06,
"loss": 0.0908,
"step": 1349
},
{
"epoch": 1.4289686217397062,
"grad_norm": 0.26774507761001587,
"learning_rate": 8.569915254237288e-06,
"loss": 0.0907,
"step": 1350
},
{
"epoch": 1.4300278035217795,
"grad_norm": 0.3148769736289978,
"learning_rate": 8.56885593220339e-06,
"loss": 0.0921,
"step": 1351
},
{
"epoch": 1.4310869853038528,
"grad_norm": 0.24930188059806824,
"learning_rate": 8.567796610169493e-06,
"loss": 0.0898,
"step": 1352
},
{
"epoch": 1.4321461670859261,
"grad_norm": 0.2720463275909424,
"learning_rate": 8.566737288135594e-06,
"loss": 0.0923,
"step": 1353
},
{
"epoch": 1.4332053488679994,
"grad_norm": 0.2629014253616333,
"learning_rate": 8.565677966101696e-06,
"loss": 0.0919,
"step": 1354
},
{
"epoch": 1.4342645306500728,
"grad_norm": 0.2753286361694336,
"learning_rate": 8.564618644067797e-06,
"loss": 0.0883,
"step": 1355
},
{
"epoch": 1.435323712432146,
"grad_norm": 0.8989904522895813,
"learning_rate": 8.563559322033898e-06,
"loss": 0.0933,
"step": 1356
},
{
"epoch": 1.4363828942142196,
"grad_norm": 0.30378690361976624,
"learning_rate": 8.5625e-06,
"loss": 0.0921,
"step": 1357
},
{
"epoch": 1.437442075996293,
"grad_norm": 0.3080299198627472,
"learning_rate": 8.561440677966101e-06,
"loss": 0.0905,
"step": 1358
},
{
"epoch": 1.4385012577783662,
"grad_norm": 0.3442351520061493,
"learning_rate": 8.560381355932204e-06,
"loss": 0.0916,
"step": 1359
},
{
"epoch": 1.4395604395604396,
"grad_norm": 0.35361289978027344,
"learning_rate": 8.559322033898306e-06,
"loss": 0.0879,
"step": 1360
},
{
"epoch": 1.4406196213425129,
"grad_norm": 0.48620983958244324,
"learning_rate": 8.558262711864407e-06,
"loss": 0.0905,
"step": 1361
},
{
"epoch": 1.4416788031245862,
"grad_norm": 0.40295881032943726,
"learning_rate": 8.55720338983051e-06,
"loss": 0.0897,
"step": 1362
},
{
"epoch": 1.4427379849066595,
"grad_norm": 0.24092480540275574,
"learning_rate": 8.556144067796612e-06,
"loss": 0.0898,
"step": 1363
},
{
"epoch": 1.443797166688733,
"grad_norm": 0.6216761469841003,
"learning_rate": 8.555084745762713e-06,
"loss": 0.0888,
"step": 1364
},
{
"epoch": 1.4448563484708064,
"grad_norm": 0.7443450689315796,
"learning_rate": 8.554025423728814e-06,
"loss": 0.0918,
"step": 1365
},
{
"epoch": 1.4459155302528797,
"grad_norm": 0.401972234249115,
"learning_rate": 8.552966101694916e-06,
"loss": 0.0915,
"step": 1366
},
{
"epoch": 1.446974712034953,
"grad_norm": 0.2976281940937042,
"learning_rate": 8.551906779661017e-06,
"loss": 0.091,
"step": 1367
},
{
"epoch": 1.4480338938170263,
"grad_norm": 0.4022362232208252,
"learning_rate": 8.550847457627119e-06,
"loss": 0.0934,
"step": 1368
},
{
"epoch": 1.4490930755990998,
"grad_norm": 0.4490336775779724,
"learning_rate": 8.549788135593222e-06,
"loss": 0.0921,
"step": 1369
},
{
"epoch": 1.450152257381173,
"grad_norm": 0.8399879932403564,
"learning_rate": 8.548728813559323e-06,
"loss": 0.0929,
"step": 1370
},
{
"epoch": 1.4512114391632465,
"grad_norm": 0.3975447416305542,
"learning_rate": 8.547669491525425e-06,
"loss": 0.0874,
"step": 1371
},
{
"epoch": 1.4522706209453198,
"grad_norm": 0.297023743391037,
"learning_rate": 8.546610169491526e-06,
"loss": 0.0889,
"step": 1372
},
{
"epoch": 1.453329802727393,
"grad_norm": 0.3174283504486084,
"learning_rate": 8.545550847457627e-06,
"loss": 0.0911,
"step": 1373
},
{
"epoch": 1.4543889845094664,
"grad_norm": 0.3391566276550293,
"learning_rate": 8.544491525423729e-06,
"loss": 0.0913,
"step": 1374
},
{
"epoch": 1.4554481662915397,
"grad_norm": 0.5378783941268921,
"learning_rate": 8.54343220338983e-06,
"loss": 0.0919,
"step": 1375
},
{
"epoch": 1.4565073480736133,
"grad_norm": 0.5963971018791199,
"learning_rate": 8.542372881355933e-06,
"loss": 0.0935,
"step": 1376
},
{
"epoch": 1.4575665298556864,
"grad_norm": 0.4562707841396332,
"learning_rate": 8.541313559322035e-06,
"loss": 0.0879,
"step": 1377
},
{
"epoch": 1.45862571163776,
"grad_norm": 0.28086161613464355,
"learning_rate": 8.540254237288136e-06,
"loss": 0.0881,
"step": 1378
},
{
"epoch": 1.4596848934198332,
"grad_norm": 0.21108803153038025,
"learning_rate": 8.539194915254238e-06,
"loss": 0.0903,
"step": 1379
},
{
"epoch": 1.4607440752019065,
"grad_norm": 0.26527541875839233,
"learning_rate": 8.538135593220339e-06,
"loss": 0.0897,
"step": 1380
},
{
"epoch": 1.4618032569839798,
"grad_norm": 0.26970967650413513,
"learning_rate": 8.537076271186442e-06,
"loss": 0.0899,
"step": 1381
},
{
"epoch": 1.4628624387660532,
"grad_norm": 0.30094999074935913,
"learning_rate": 8.536016949152543e-06,
"loss": 0.0878,
"step": 1382
},
{
"epoch": 1.4639216205481267,
"grad_norm": 1.1830754280090332,
"learning_rate": 8.534957627118645e-06,
"loss": 0.093,
"step": 1383
},
{
"epoch": 1.4649808023301998,
"grad_norm": 0.3395049273967743,
"learning_rate": 8.533898305084746e-06,
"loss": 0.0888,
"step": 1384
},
{
"epoch": 1.4660399841122733,
"grad_norm": 0.2931113541126251,
"learning_rate": 8.532838983050848e-06,
"loss": 0.0905,
"step": 1385
},
{
"epoch": 1.4670991658943466,
"grad_norm": 0.3630742132663727,
"learning_rate": 8.53177966101695e-06,
"loss": 0.0882,
"step": 1386
},
{
"epoch": 1.46815834767642,
"grad_norm": 0.46798133850097656,
"learning_rate": 8.530720338983052e-06,
"loss": 0.0915,
"step": 1387
},
{
"epoch": 1.4692175294584933,
"grad_norm": 0.27987757325172424,
"learning_rate": 8.529661016949154e-06,
"loss": 0.0915,
"step": 1388
},
{
"epoch": 1.4702767112405666,
"grad_norm": 0.33298471570014954,
"learning_rate": 8.528601694915255e-06,
"loss": 0.0892,
"step": 1389
},
{
"epoch": 1.4713358930226401,
"grad_norm": 0.2661607265472412,
"learning_rate": 8.527542372881356e-06,
"loss": 0.0893,
"step": 1390
},
{
"epoch": 1.4723950748047134,
"grad_norm": 0.38440874218940735,
"learning_rate": 8.526483050847458e-06,
"loss": 0.0875,
"step": 1391
},
{
"epoch": 1.4734542565867867,
"grad_norm": 0.4289654791355133,
"learning_rate": 8.52542372881356e-06,
"loss": 0.0903,
"step": 1392
},
{
"epoch": 1.47451343836886,
"grad_norm": 0.6656467318534851,
"learning_rate": 8.524364406779662e-06,
"loss": 0.0893,
"step": 1393
},
{
"epoch": 1.4755726201509334,
"grad_norm": 0.4115320146083832,
"learning_rate": 8.523305084745764e-06,
"loss": 0.0904,
"step": 1394
},
{
"epoch": 1.4766318019330067,
"grad_norm": 1.2950462102890015,
"learning_rate": 8.522245762711865e-06,
"loss": 0.0899,
"step": 1395
},
{
"epoch": 1.47769098371508,
"grad_norm": 0.5561743974685669,
"learning_rate": 8.521186440677967e-06,
"loss": 0.0923,
"step": 1396
},
{
"epoch": 1.4787501654971535,
"grad_norm": 0.29928943514823914,
"learning_rate": 8.520127118644068e-06,
"loss": 0.088,
"step": 1397
},
{
"epoch": 1.4798093472792269,
"grad_norm": 0.5597212910652161,
"learning_rate": 8.51906779661017e-06,
"loss": 0.0902,
"step": 1398
},
{
"epoch": 1.4808685290613002,
"grad_norm": 0.341529905796051,
"learning_rate": 8.518008474576271e-06,
"loss": 0.0869,
"step": 1399
},
{
"epoch": 1.4819277108433735,
"grad_norm": 0.2716520130634308,
"learning_rate": 8.516949152542372e-06,
"loss": 0.0917,
"step": 1400
},
{
"epoch": 1.4829868926254468,
"grad_norm": 0.2677081823348999,
"learning_rate": 8.515889830508475e-06,
"loss": 0.0881,
"step": 1401
},
{
"epoch": 1.4840460744075201,
"grad_norm": 0.27061936259269714,
"learning_rate": 8.514830508474577e-06,
"loss": 0.0901,
"step": 1402
},
{
"epoch": 1.4851052561895934,
"grad_norm": 0.6803617477416992,
"learning_rate": 8.51377118644068e-06,
"loss": 0.0885,
"step": 1403
},
{
"epoch": 1.486164437971667,
"grad_norm": 0.7752925157546997,
"learning_rate": 8.512711864406781e-06,
"loss": 0.09,
"step": 1404
},
{
"epoch": 1.4872236197537403,
"grad_norm": 1.0663254261016846,
"learning_rate": 8.511652542372883e-06,
"loss": 0.0927,
"step": 1405
},
{
"epoch": 1.4882828015358136,
"grad_norm": 1.3076659440994263,
"learning_rate": 8.510593220338984e-06,
"loss": 0.09,
"step": 1406
},
{
"epoch": 1.489341983317887,
"grad_norm": 0.2659742832183838,
"learning_rate": 8.509533898305086e-06,
"loss": 0.0907,
"step": 1407
},
{
"epoch": 1.4904011650999602,
"grad_norm": 0.34570935368537903,
"learning_rate": 8.508474576271187e-06,
"loss": 0.0896,
"step": 1408
},
{
"epoch": 1.4914603468820335,
"grad_norm": 0.34614187479019165,
"learning_rate": 8.507415254237288e-06,
"loss": 0.0911,
"step": 1409
},
{
"epoch": 1.4925195286641069,
"grad_norm": 0.24298426508903503,
"learning_rate": 8.50635593220339e-06,
"loss": 0.0917,
"step": 1410
},
{
"epoch": 1.4935787104461804,
"grad_norm": 0.49938711524009705,
"learning_rate": 8.505296610169493e-06,
"loss": 0.0958,
"step": 1411
},
{
"epoch": 1.4946378922282537,
"grad_norm": 0.9254084825515747,
"learning_rate": 8.504237288135594e-06,
"loss": 0.0909,
"step": 1412
},
{
"epoch": 1.495697074010327,
"grad_norm": 0.430133193731308,
"learning_rate": 8.503177966101696e-06,
"loss": 0.0917,
"step": 1413
},
{
"epoch": 1.4967562557924003,
"grad_norm": 1.0322993993759155,
"learning_rate": 8.502118644067797e-06,
"loss": 0.0922,
"step": 1414
},
{
"epoch": 1.4978154375744737,
"grad_norm": 0.6187616586685181,
"learning_rate": 8.501059322033899e-06,
"loss": 0.0951,
"step": 1415
},
{
"epoch": 1.4988746193565472,
"grad_norm": 0.33860644698143005,
"learning_rate": 8.5e-06,
"loss": 0.0933,
"step": 1416
},
{
"epoch": 1.4988746193565472,
"eval_accuracy": 0.968,
"eval_best_f1_from_thresholding": 0.1443850267379679,
"eval_loss": 0.13970085978507996,
"eval_matthews_corrcoef": 0.1537363369231449,
"eval_model_preparation_time": 0.0033,
"eval_negative_class_f1": 0.9836950983389381,
"eval_negative_class_precision": 0.9932091778989608,
"eval_negative_class_recall": 0.9743615625315434,
"eval_positive_class_f1": 0.1443850267379679,
"eval_positive_class_precision": 0.09608540925266904,
"eval_positive_class_recall": 0.2903225806451613,
"eval_roc_auc": 0.8216168430923719,
"eval_runtime": 20.7317,
"eval_samples_per_second": 482.353,
"eval_steps_per_second": 7.573,
"step": 1416
},
{
"epoch": 1.4999338011386203,
"grad_norm": 0.47634679079055786,
"learning_rate": 8.498940677966101e-06,
"loss": 0.0952,
"step": 1417
},
{
"epoch": 1.5009929829206938,
"grad_norm": 0.40228286385536194,
"learning_rate": 8.497881355932204e-06,
"loss": 0.0916,
"step": 1418
},
{
"epoch": 1.5020521647027671,
"grad_norm": 0.2725696563720703,
"learning_rate": 8.496822033898306e-06,
"loss": 0.0962,
"step": 1419
},
{
"epoch": 1.5031113464848405,
"grad_norm": 0.504587709903717,
"learning_rate": 8.495762711864407e-06,
"loss": 0.0919,
"step": 1420
},
{
"epoch": 1.5041705282669138,
"grad_norm": 0.424791544675827,
"learning_rate": 8.494703389830509e-06,
"loss": 0.0892,
"step": 1421
},
{
"epoch": 1.505229710048987,
"grad_norm": 0.5593955516815186,
"learning_rate": 8.49364406779661e-06,
"loss": 0.0897,
"step": 1422
},
{
"epoch": 1.5062888918310606,
"grad_norm": 0.7979548573493958,
"learning_rate": 8.492584745762713e-06,
"loss": 0.0889,
"step": 1423
},
{
"epoch": 1.5073480736131337,
"grad_norm": 0.6873000264167786,
"learning_rate": 8.491525423728815e-06,
"loss": 0.0909,
"step": 1424
},
{
"epoch": 1.5084072553952073,
"grad_norm": 0.43569207191467285,
"learning_rate": 8.490466101694916e-06,
"loss": 0.0899,
"step": 1425
},
{
"epoch": 1.5094664371772806,
"grad_norm": 0.3459659516811371,
"learning_rate": 8.489406779661017e-06,
"loss": 0.0934,
"step": 1426
},
{
"epoch": 1.5105256189593539,
"grad_norm": 0.7253245115280151,
"learning_rate": 8.488347457627119e-06,
"loss": 0.091,
"step": 1427
},
{
"epoch": 1.5115848007414272,
"grad_norm": 0.3521025776863098,
"learning_rate": 8.487288135593222e-06,
"loss": 0.0901,
"step": 1428
},
{
"epoch": 1.5126439825235005,
"grad_norm": 0.31998488306999207,
"learning_rate": 8.486228813559323e-06,
"loss": 0.0911,
"step": 1429
},
{
"epoch": 1.513703164305574,
"grad_norm": 0.29415151476860046,
"learning_rate": 8.485169491525425e-06,
"loss": 0.0899,
"step": 1430
},
{
"epoch": 1.5147623460876471,
"grad_norm": 0.7465116381645203,
"learning_rate": 8.484110169491526e-06,
"loss": 0.0893,
"step": 1431
},
{
"epoch": 1.5158215278697207,
"grad_norm": 0.37793874740600586,
"learning_rate": 8.483050847457628e-06,
"loss": 0.0872,
"step": 1432
},
{
"epoch": 1.516880709651794,
"grad_norm": 0.6085448265075684,
"learning_rate": 8.481991525423729e-06,
"loss": 0.0898,
"step": 1433
},
{
"epoch": 1.5179398914338673,
"grad_norm": 0.9050545692443848,
"learning_rate": 8.48093220338983e-06,
"loss": 0.0907,
"step": 1434
},
{
"epoch": 1.5189990732159406,
"grad_norm": 0.495643675327301,
"learning_rate": 8.479872881355934e-06,
"loss": 0.0891,
"step": 1435
},
{
"epoch": 1.520058254998014,
"grad_norm": 0.361016184091568,
"learning_rate": 8.478813559322035e-06,
"loss": 0.0888,
"step": 1436
},
{
"epoch": 1.5211174367800875,
"grad_norm": 0.3698117733001709,
"learning_rate": 8.477754237288136e-06,
"loss": 0.0919,
"step": 1437
},
{
"epoch": 1.5221766185621606,
"grad_norm": 0.29097628593444824,
"learning_rate": 8.476694915254238e-06,
"loss": 0.0924,
"step": 1438
},
{
"epoch": 1.5232358003442341,
"grad_norm": 0.42036259174346924,
"learning_rate": 8.47563559322034e-06,
"loss": 0.0934,
"step": 1439
},
{
"epoch": 1.5242949821263074,
"grad_norm": 0.24552021920681,
"learning_rate": 8.47457627118644e-06,
"loss": 0.0879,
"step": 1440
},
{
"epoch": 1.5253541639083807,
"grad_norm": 0.6841972470283508,
"learning_rate": 8.473516949152542e-06,
"loss": 0.0906,
"step": 1441
},
{
"epoch": 1.5264133456904543,
"grad_norm": 0.9200479388237,
"learning_rate": 8.472457627118645e-06,
"loss": 0.0885,
"step": 1442
},
{
"epoch": 1.5274725274725274,
"grad_norm": 0.5507006049156189,
"learning_rate": 8.471398305084746e-06,
"loss": 0.0908,
"step": 1443
},
{
"epoch": 1.528531709254601,
"grad_norm": 1.2419291734695435,
"learning_rate": 8.470338983050848e-06,
"loss": 0.0913,
"step": 1444
},
{
"epoch": 1.529590891036674,
"grad_norm": 0.3634277880191803,
"learning_rate": 8.469279661016951e-06,
"loss": 0.0896,
"step": 1445
},
{
"epoch": 1.5306500728187475,
"grad_norm": 0.5093683004379272,
"learning_rate": 8.468220338983052e-06,
"loss": 0.0939,
"step": 1446
},
{
"epoch": 1.5317092546008209,
"grad_norm": 0.3977075219154358,
"learning_rate": 8.467161016949154e-06,
"loss": 0.0893,
"step": 1447
},
{
"epoch": 1.5327684363828942,
"grad_norm": 0.6540356278419495,
"learning_rate": 8.466101694915255e-06,
"loss": 0.0903,
"step": 1448
},
{
"epoch": 1.5338276181649677,
"grad_norm": 0.3206644356250763,
"learning_rate": 8.465042372881357e-06,
"loss": 0.0908,
"step": 1449
},
{
"epoch": 1.5348867999470408,
"grad_norm": 0.5804445147514343,
"learning_rate": 8.463983050847458e-06,
"loss": 0.091,
"step": 1450
},
{
"epoch": 1.5359459817291143,
"grad_norm": 0.3737938404083252,
"learning_rate": 8.46292372881356e-06,
"loss": 0.0881,
"step": 1451
},
{
"epoch": 1.5370051635111877,
"grad_norm": 0.9696879982948303,
"learning_rate": 8.461864406779663e-06,
"loss": 0.0898,
"step": 1452
},
{
"epoch": 1.538064345293261,
"grad_norm": 0.3338160812854767,
"learning_rate": 8.460805084745764e-06,
"loss": 0.0871,
"step": 1453
},
{
"epoch": 1.5391235270753343,
"grad_norm": 0.5255388021469116,
"learning_rate": 8.459745762711865e-06,
"loss": 0.0868,
"step": 1454
},
{
"epoch": 1.5401827088574076,
"grad_norm": 0.3634493947029114,
"learning_rate": 8.458686440677967e-06,
"loss": 0.0884,
"step": 1455
},
{
"epoch": 1.5412418906394811,
"grad_norm": 0.2899482250213623,
"learning_rate": 8.457627118644068e-06,
"loss": 0.0855,
"step": 1456
},
{
"epoch": 1.5423010724215542,
"grad_norm": 0.4005317986011505,
"learning_rate": 8.45656779661017e-06,
"loss": 0.0912,
"step": 1457
},
{
"epoch": 1.5433602542036278,
"grad_norm": 0.35378047823905945,
"learning_rate": 8.455508474576271e-06,
"loss": 0.0897,
"step": 1458
},
{
"epoch": 1.544419435985701,
"grad_norm": 0.25792017579078674,
"learning_rate": 8.454449152542374e-06,
"loss": 0.0866,
"step": 1459
},
{
"epoch": 1.5454786177677744,
"grad_norm": 0.36935243010520935,
"learning_rate": 8.453389830508476e-06,
"loss": 0.0867,
"step": 1460
},
{
"epoch": 1.5465377995498477,
"grad_norm": 0.5194677114486694,
"learning_rate": 8.452330508474577e-06,
"loss": 0.0903,
"step": 1461
},
{
"epoch": 1.547596981331921,
"grad_norm": 0.4349111318588257,
"learning_rate": 8.451271186440678e-06,
"loss": 0.087,
"step": 1462
},
{
"epoch": 1.5486561631139946,
"grad_norm": 0.24651400744915009,
"learning_rate": 8.45021186440678e-06,
"loss": 0.0911,
"step": 1463
},
{
"epoch": 1.5497153448960677,
"grad_norm": 0.7304729223251343,
"learning_rate": 8.449152542372881e-06,
"loss": 0.096,
"step": 1464
},
{
"epoch": 1.5507745266781412,
"grad_norm": 0.7694931626319885,
"learning_rate": 8.448093220338984e-06,
"loss": 0.094,
"step": 1465
},
{
"epoch": 1.5518337084602145,
"grad_norm": 0.3536149561405182,
"learning_rate": 8.447033898305086e-06,
"loss": 0.0889,
"step": 1466
},
{
"epoch": 1.5528928902422878,
"grad_norm": 0.28751471638679504,
"learning_rate": 8.445974576271187e-06,
"loss": 0.0899,
"step": 1467
},
{
"epoch": 1.5539520720243611,
"grad_norm": 0.2782193720340729,
"learning_rate": 8.444915254237289e-06,
"loss": 0.0909,
"step": 1468
},
{
"epoch": 1.5550112538064345,
"grad_norm": 0.2645580470561981,
"learning_rate": 8.443855932203392e-06,
"loss": 0.0899,
"step": 1469
},
{
"epoch": 1.556070435588508,
"grad_norm": 0.2474951297044754,
"learning_rate": 8.442796610169493e-06,
"loss": 0.0874,
"step": 1470
},
{
"epoch": 1.557129617370581,
"grad_norm": 0.3451857566833496,
"learning_rate": 8.441737288135594e-06,
"loss": 0.092,
"step": 1471
},
{
"epoch": 1.5581887991526546,
"grad_norm": 1.3307249546051025,
"learning_rate": 8.440677966101696e-06,
"loss": 0.091,
"step": 1472
},
{
"epoch": 1.559247980934728,
"grad_norm": 0.5494109988212585,
"learning_rate": 8.439618644067797e-06,
"loss": 0.0883,
"step": 1473
},
{
"epoch": 1.5603071627168013,
"grad_norm": 0.6073052883148193,
"learning_rate": 8.438559322033899e-06,
"loss": 0.0874,
"step": 1474
},
{
"epoch": 1.5613663444988746,
"grad_norm": 0.2551688253879547,
"learning_rate": 8.4375e-06,
"loss": 0.0878,
"step": 1475
},
{
"epoch": 1.5624255262809479,
"grad_norm": 0.3065250813961029,
"learning_rate": 8.436440677966102e-06,
"loss": 0.087,
"step": 1476
},
{
"epoch": 1.5634847080630214,
"grad_norm": 0.472655713558197,
"learning_rate": 8.435381355932205e-06,
"loss": 0.0888,
"step": 1477
},
{
"epoch": 1.5645438898450945,
"grad_norm": 0.30595093965530396,
"learning_rate": 8.434322033898306e-06,
"loss": 0.0903,
"step": 1478
},
{
"epoch": 1.565603071627168,
"grad_norm": 0.5333871841430664,
"learning_rate": 8.433262711864407e-06,
"loss": 0.0912,
"step": 1479
},
{
"epoch": 1.5666622534092414,
"grad_norm": 0.34483611583709717,
"learning_rate": 8.432203389830509e-06,
"loss": 0.0891,
"step": 1480
},
{
"epoch": 1.5677214351913147,
"grad_norm": 0.3512003421783447,
"learning_rate": 8.43114406779661e-06,
"loss": 0.087,
"step": 1481
},
{
"epoch": 1.5687806169733882,
"grad_norm": 0.5785057544708252,
"learning_rate": 8.430084745762712e-06,
"loss": 0.0883,
"step": 1482
},
{
"epoch": 1.5698397987554613,
"grad_norm": 0.41664919257164,
"learning_rate": 8.429025423728813e-06,
"loss": 0.087,
"step": 1483
},
{
"epoch": 1.5708989805375349,
"grad_norm": 0.44375720620155334,
"learning_rate": 8.427966101694916e-06,
"loss": 0.0886,
"step": 1484
},
{
"epoch": 1.571958162319608,
"grad_norm": 0.24387286603450775,
"learning_rate": 8.426906779661018e-06,
"loss": 0.0922,
"step": 1485
},
{
"epoch": 1.5730173441016815,
"grad_norm": 0.48554837703704834,
"learning_rate": 8.42584745762712e-06,
"loss": 0.0905,
"step": 1486
},
{
"epoch": 1.5740765258837548,
"grad_norm": 0.38902896642684937,
"learning_rate": 8.424788135593222e-06,
"loss": 0.0913,
"step": 1487
},
{
"epoch": 1.575135707665828,
"grad_norm": 0.2936050593852997,
"learning_rate": 8.423728813559324e-06,
"loss": 0.0934,
"step": 1488
},
{
"epoch": 1.5761948894479016,
"grad_norm": 1.3971738815307617,
"learning_rate": 8.422669491525425e-06,
"loss": 0.0912,
"step": 1489
},
{
"epoch": 1.5772540712299747,
"grad_norm": 0.5207201838493347,
"learning_rate": 8.421610169491526e-06,
"loss": 0.0894,
"step": 1490
},
{
"epoch": 1.5783132530120483,
"grad_norm": 0.21842218935489655,
"learning_rate": 8.420550847457628e-06,
"loss": 0.0908,
"step": 1491
},
{
"epoch": 1.5793724347941216,
"grad_norm": 0.2736961543560028,
"learning_rate": 8.41949152542373e-06,
"loss": 0.0898,
"step": 1492
},
{
"epoch": 1.580431616576195,
"grad_norm": 1.9446067810058594,
"learning_rate": 8.41843220338983e-06,
"loss": 0.0896,
"step": 1493
},
{
"epoch": 1.5814907983582682,
"grad_norm": 0.3114362359046936,
"learning_rate": 8.417372881355934e-06,
"loss": 0.0931,
"step": 1494
},
{
"epoch": 1.5825499801403415,
"grad_norm": 0.23505762219429016,
"learning_rate": 8.416313559322035e-06,
"loss": 0.0896,
"step": 1495
},
{
"epoch": 1.583609161922415,
"grad_norm": 0.2370254397392273,
"learning_rate": 8.415254237288137e-06,
"loss": 0.0908,
"step": 1496
},
{
"epoch": 1.5846683437044882,
"grad_norm": 0.3030937910079956,
"learning_rate": 8.414194915254238e-06,
"loss": 0.0879,
"step": 1497
},
{
"epoch": 1.5857275254865617,
"grad_norm": 0.29918500781059265,
"learning_rate": 8.41313559322034e-06,
"loss": 0.0881,
"step": 1498
},
{
"epoch": 1.586786707268635,
"grad_norm": 0.627144455909729,
"learning_rate": 8.41207627118644e-06,
"loss": 0.0924,
"step": 1499
},
{
"epoch": 1.5878458890507083,
"grad_norm": 0.2884758412837982,
"learning_rate": 8.411016949152542e-06,
"loss": 0.0876,
"step": 1500
},
{
"epoch": 1.5889050708327817,
"grad_norm": 0.586335301399231,
"learning_rate": 8.409957627118645e-06,
"loss": 0.089,
"step": 1501
},
{
"epoch": 1.589964252614855,
"grad_norm": 0.25162607431411743,
"learning_rate": 8.408898305084747e-06,
"loss": 0.09,
"step": 1502
},
{
"epoch": 1.5910234343969285,
"grad_norm": 0.28372955322265625,
"learning_rate": 8.407838983050848e-06,
"loss": 0.0857,
"step": 1503
},
{
"epoch": 1.5920826161790016,
"grad_norm": 0.6223670244216919,
"learning_rate": 8.40677966101695e-06,
"loss": 0.0945,
"step": 1504
},
{
"epoch": 1.5931417979610751,
"grad_norm": 1.0832895040512085,
"learning_rate": 8.405720338983051e-06,
"loss": 0.088,
"step": 1505
},
{
"epoch": 1.5942009797431484,
"grad_norm": 0.2921046316623688,
"learning_rate": 8.404661016949152e-06,
"loss": 0.0884,
"step": 1506
},
{
"epoch": 1.5952601615252218,
"grad_norm": 0.20038729906082153,
"learning_rate": 8.403601694915255e-06,
"loss": 0.0848,
"step": 1507
},
{
"epoch": 1.596319343307295,
"grad_norm": 0.2741376757621765,
"learning_rate": 8.402542372881357e-06,
"loss": 0.0887,
"step": 1508
},
{
"epoch": 1.5973785250893684,
"grad_norm": 0.25497081875801086,
"learning_rate": 8.401483050847458e-06,
"loss": 0.0892,
"step": 1509
},
{
"epoch": 1.598437706871442,
"grad_norm": 0.22426116466522217,
"learning_rate": 8.40042372881356e-06,
"loss": 0.0912,
"step": 1510
},
{
"epoch": 1.599496888653515,
"grad_norm": 0.3952200412750244,
"learning_rate": 8.399364406779663e-06,
"loss": 0.0879,
"step": 1511
},
{
"epoch": 1.6005560704355886,
"grad_norm": 0.497310072183609,
"learning_rate": 8.398305084745764e-06,
"loss": 0.0896,
"step": 1512
},
{
"epoch": 1.6016152522176619,
"grad_norm": 0.5498274564743042,
"learning_rate": 8.397245762711866e-06,
"loss": 0.0915,
"step": 1513
},
{
"epoch": 1.6026744339997352,
"grad_norm": 0.28143325448036194,
"learning_rate": 8.396186440677967e-06,
"loss": 0.089,
"step": 1514
},
{
"epoch": 1.6037336157818085,
"grad_norm": 0.23657409846782684,
"learning_rate": 8.395127118644068e-06,
"loss": 0.0901,
"step": 1515
},
{
"epoch": 1.6047927975638818,
"grad_norm": 0.3381804823875427,
"learning_rate": 8.39406779661017e-06,
"loss": 0.0912,
"step": 1516
},
{
"epoch": 1.6058519793459554,
"grad_norm": 0.21086886525154114,
"learning_rate": 8.393008474576271e-06,
"loss": 0.0905,
"step": 1517
},
{
"epoch": 1.6069111611280285,
"grad_norm": 0.461028516292572,
"learning_rate": 8.391949152542374e-06,
"loss": 0.0893,
"step": 1518
},
{
"epoch": 1.607970342910102,
"grad_norm": 0.22690759599208832,
"learning_rate": 8.390889830508476e-06,
"loss": 0.0898,
"step": 1519
},
{
"epoch": 1.6090295246921753,
"grad_norm": 0.6278162598609924,
"learning_rate": 8.389830508474577e-06,
"loss": 0.09,
"step": 1520
},
{
"epoch": 1.6100887064742486,
"grad_norm": 0.30574408173561096,
"learning_rate": 8.388771186440679e-06,
"loss": 0.091,
"step": 1521
},
{
"epoch": 1.611147888256322,
"grad_norm": 0.25087496638298035,
"learning_rate": 8.38771186440678e-06,
"loss": 0.0841,
"step": 1522
},
{
"epoch": 1.6122070700383953,
"grad_norm": 0.9940456748008728,
"learning_rate": 8.386652542372881e-06,
"loss": 0.0855,
"step": 1523
},
{
"epoch": 1.6132662518204688,
"grad_norm": 0.22347316145896912,
"learning_rate": 8.385593220338983e-06,
"loss": 0.0888,
"step": 1524
},
{
"epoch": 1.6143254336025419,
"grad_norm": 0.3078644871711731,
"learning_rate": 8.384533898305084e-06,
"loss": 0.0867,
"step": 1525
},
{
"epoch": 1.6153846153846154,
"grad_norm": 0.2520007789134979,
"learning_rate": 8.383474576271187e-06,
"loss": 0.0863,
"step": 1526
},
{
"epoch": 1.6164437971666887,
"grad_norm": 0.3745609223842621,
"learning_rate": 8.382415254237289e-06,
"loss": 0.0879,
"step": 1527
},
{
"epoch": 1.617502978948762,
"grad_norm": 0.9579833149909973,
"learning_rate": 8.381355932203392e-06,
"loss": 0.095,
"step": 1528
},
{
"epoch": 1.6185621607308356,
"grad_norm": 0.8180900812149048,
"learning_rate": 8.380296610169493e-06,
"loss": 0.089,
"step": 1529
},
{
"epoch": 1.6196213425129087,
"grad_norm": 0.25263839960098267,
"learning_rate": 8.379237288135595e-06,
"loss": 0.0932,
"step": 1530
},
{
"epoch": 1.6206805242949822,
"grad_norm": 1.0263060331344604,
"learning_rate": 8.378177966101696e-06,
"loss": 0.0896,
"step": 1531
},
{
"epoch": 1.6217397060770553,
"grad_norm": 1.3182510137557983,
"learning_rate": 8.377118644067797e-06,
"loss": 0.093,
"step": 1532
},
{
"epoch": 1.6227988878591288,
"grad_norm": 0.6498829126358032,
"learning_rate": 8.376059322033899e-06,
"loss": 0.0909,
"step": 1533
},
{
"epoch": 1.6238580696412022,
"grad_norm": 0.34288349747657776,
"learning_rate": 8.375e-06,
"loss": 0.0911,
"step": 1534
},
{
"epoch": 1.6249172514232755,
"grad_norm": 0.627138078212738,
"learning_rate": 8.373940677966103e-06,
"loss": 0.0929,
"step": 1535
},
{
"epoch": 1.625976433205349,
"grad_norm": 0.3161716163158417,
"learning_rate": 8.372881355932205e-06,
"loss": 0.0901,
"step": 1536
},
{
"epoch": 1.627035614987422,
"grad_norm": 0.9007494449615479,
"learning_rate": 8.371822033898306e-06,
"loss": 0.0922,
"step": 1537
},
{
"epoch": 1.6280947967694956,
"grad_norm": 0.908442497253418,
"learning_rate": 8.370762711864408e-06,
"loss": 0.0906,
"step": 1538
},
{
"epoch": 1.629153978551569,
"grad_norm": 0.2966644763946533,
"learning_rate": 8.369703389830509e-06,
"loss": 0.0841,
"step": 1539
},
{
"epoch": 1.6302131603336423,
"grad_norm": 0.5009570121765137,
"learning_rate": 8.36864406779661e-06,
"loss": 0.0916,
"step": 1540
},
{
"epoch": 1.6312723421157156,
"grad_norm": 0.6445338129997253,
"learning_rate": 8.367584745762712e-06,
"loss": 0.0931,
"step": 1541
},
{
"epoch": 1.632331523897789,
"grad_norm": 0.3887215256690979,
"learning_rate": 8.366525423728813e-06,
"loss": 0.0887,
"step": 1542
},
{
"epoch": 1.6333907056798624,
"grad_norm": 0.7631832361221313,
"learning_rate": 8.365466101694916e-06,
"loss": 0.0934,
"step": 1543
},
{
"epoch": 1.6344498874619355,
"grad_norm": 0.2936621606349945,
"learning_rate": 8.364406779661018e-06,
"loss": 0.0952,
"step": 1544
},
{
"epoch": 1.635509069244009,
"grad_norm": 0.66786128282547,
"learning_rate": 8.36334745762712e-06,
"loss": 0.094,
"step": 1545
},
{
"epoch": 1.6365682510260824,
"grad_norm": 0.2722414433956146,
"learning_rate": 8.36228813559322e-06,
"loss": 0.0889,
"step": 1546
},
{
"epoch": 1.6376274328081557,
"grad_norm": 0.2932286560535431,
"learning_rate": 8.361228813559322e-06,
"loss": 0.0907,
"step": 1547
},
{
"epoch": 1.638686614590229,
"grad_norm": 0.2571784257888794,
"learning_rate": 8.360169491525423e-06,
"loss": 0.0888,
"step": 1548
},
{
"epoch": 1.6397457963723023,
"grad_norm": 0.48270049691200256,
"learning_rate": 8.359110169491527e-06,
"loss": 0.0881,
"step": 1549
},
{
"epoch": 1.6408049781543759,
"grad_norm": 0.5231984257698059,
"learning_rate": 8.358050847457628e-06,
"loss": 0.0903,
"step": 1550
},
{
"epoch": 1.641864159936449,
"grad_norm": 0.3783744275569916,
"learning_rate": 8.35699152542373e-06,
"loss": 0.0944,
"step": 1551
},
{
"epoch": 1.6429233417185225,
"grad_norm": 0.565405547618866,
"learning_rate": 8.35593220338983e-06,
"loss": 0.0904,
"step": 1552
},
{
"epoch": 1.6439825235005958,
"grad_norm": 0.739701509475708,
"learning_rate": 8.354872881355934e-06,
"loss": 0.0906,
"step": 1553
},
{
"epoch": 1.6450417052826691,
"grad_norm": 0.32626545429229736,
"learning_rate": 8.353813559322035e-06,
"loss": 0.089,
"step": 1554
},
{
"epoch": 1.6461008870647424,
"grad_norm": 0.32011836767196655,
"learning_rate": 8.352754237288137e-06,
"loss": 0.0932,
"step": 1555
},
{
"epoch": 1.6471600688468158,
"grad_norm": 0.5030165910720825,
"learning_rate": 8.351694915254238e-06,
"loss": 0.0894,
"step": 1556
},
{
"epoch": 1.6482192506288893,
"grad_norm": 0.5868280529975891,
"learning_rate": 8.35063559322034e-06,
"loss": 0.0892,
"step": 1557
},
{
"epoch": 1.6492784324109624,
"grad_norm": 0.5712450742721558,
"learning_rate": 8.349576271186441e-06,
"loss": 0.0905,
"step": 1558
},
{
"epoch": 1.650337614193036,
"grad_norm": 0.5775148272514343,
"learning_rate": 8.348516949152542e-06,
"loss": 0.0918,
"step": 1559
},
{
"epoch": 1.6513967959751092,
"grad_norm": 0.33030012249946594,
"learning_rate": 8.347457627118645e-06,
"loss": 0.0905,
"step": 1560
},
{
"epoch": 1.6524559777571826,
"grad_norm": 0.9324721097946167,
"learning_rate": 8.346398305084747e-06,
"loss": 0.0921,
"step": 1561
},
{
"epoch": 1.6535151595392559,
"grad_norm": 0.2637194097042084,
"learning_rate": 8.345338983050848e-06,
"loss": 0.087,
"step": 1562
},
{
"epoch": 1.6545743413213292,
"grad_norm": 1.0360710620880127,
"learning_rate": 8.34427966101695e-06,
"loss": 0.0916,
"step": 1563
},
{
"epoch": 1.6556335231034027,
"grad_norm": 0.3216269910335541,
"learning_rate": 8.343220338983051e-06,
"loss": 0.0923,
"step": 1564
},
{
"epoch": 1.6566927048854758,
"grad_norm": 0.30418795347213745,
"learning_rate": 8.342161016949152e-06,
"loss": 0.0895,
"step": 1565
},
{
"epoch": 1.6577518866675494,
"grad_norm": 0.3769773840904236,
"learning_rate": 8.341101694915254e-06,
"loss": 0.095,
"step": 1566
},
{
"epoch": 1.6588110684496227,
"grad_norm": 0.22719967365264893,
"learning_rate": 8.340042372881357e-06,
"loss": 0.0911,
"step": 1567
},
{
"epoch": 1.659870250231696,
"grad_norm": 0.3400397300720215,
"learning_rate": 8.338983050847458e-06,
"loss": 0.0908,
"step": 1568
},
{
"epoch": 1.6609294320137695,
"grad_norm": 1.1307333707809448,
"learning_rate": 8.33792372881356e-06,
"loss": 0.0892,
"step": 1569
},
{
"epoch": 1.6619886137958426,
"grad_norm": 0.2560805380344391,
"learning_rate": 8.336864406779663e-06,
"loss": 0.0892,
"step": 1570
},
{
"epoch": 1.6630477955779162,
"grad_norm": 0.7875747084617615,
"learning_rate": 8.335805084745764e-06,
"loss": 0.0933,
"step": 1571
},
{
"epoch": 1.6641069773599892,
"grad_norm": 0.311423122882843,
"learning_rate": 8.334745762711866e-06,
"loss": 0.0876,
"step": 1572
},
{
"epoch": 1.6651661591420628,
"grad_norm": 0.570884644985199,
"learning_rate": 8.333686440677967e-06,
"loss": 0.091,
"step": 1573
},
{
"epoch": 1.666225340924136,
"grad_norm": 0.5333699584007263,
"learning_rate": 8.332627118644069e-06,
"loss": 0.0874,
"step": 1574
},
{
"epoch": 1.6672845227062094,
"grad_norm": 0.6104627847671509,
"learning_rate": 8.33156779661017e-06,
"loss": 0.0904,
"step": 1575
},
{
"epoch": 1.668343704488283,
"grad_norm": 0.2895054519176483,
"learning_rate": 8.330508474576271e-06,
"loss": 0.0893,
"step": 1576
},
{
"epoch": 1.669402886270356,
"grad_norm": 0.2963944673538208,
"learning_rate": 8.329449152542374e-06,
"loss": 0.0914,
"step": 1577
},
{
"epoch": 1.6704620680524296,
"grad_norm": 0.3085760772228241,
"learning_rate": 8.328389830508476e-06,
"loss": 0.0891,
"step": 1578
},
{
"epoch": 1.671521249834503,
"grad_norm": 0.26071521639823914,
"learning_rate": 8.327330508474577e-06,
"loss": 0.0875,
"step": 1579
},
{
"epoch": 1.6725804316165762,
"grad_norm": 0.2093966007232666,
"learning_rate": 8.326271186440679e-06,
"loss": 0.088,
"step": 1580
},
{
"epoch": 1.6736396133986495,
"grad_norm": 0.2903918921947479,
"learning_rate": 8.32521186440678e-06,
"loss": 0.0871,
"step": 1581
},
{
"epoch": 1.6746987951807228,
"grad_norm": 0.3491870164871216,
"learning_rate": 8.324152542372882e-06,
"loss": 0.0917,
"step": 1582
},
{
"epoch": 1.6757579769627964,
"grad_norm": 0.45493122935295105,
"learning_rate": 8.323093220338983e-06,
"loss": 0.0918,
"step": 1583
},
{
"epoch": 1.6768171587448695,
"grad_norm": 0.7867631912231445,
"learning_rate": 8.322033898305086e-06,
"loss": 0.089,
"step": 1584
},
{
"epoch": 1.677876340526943,
"grad_norm": 1.0455607175827026,
"learning_rate": 8.320974576271187e-06,
"loss": 0.0918,
"step": 1585
},
{
"epoch": 1.6789355223090163,
"grad_norm": 0.3224550187587738,
"learning_rate": 8.319915254237289e-06,
"loss": 0.0896,
"step": 1586
},
{
"epoch": 1.6799947040910896,
"grad_norm": 0.30370235443115234,
"learning_rate": 8.31885593220339e-06,
"loss": 0.089,
"step": 1587
},
{
"epoch": 1.681053885873163,
"grad_norm": 0.2551031708717346,
"learning_rate": 8.317796610169492e-06,
"loss": 0.0905,
"step": 1588
},
{
"epoch": 1.6821130676552363,
"grad_norm": 0.3214464783668518,
"learning_rate": 8.316737288135593e-06,
"loss": 0.087,
"step": 1589
},
{
"epoch": 1.6831722494373098,
"grad_norm": 0.2692156136035919,
"learning_rate": 8.315677966101695e-06,
"loss": 0.0911,
"step": 1590
},
{
"epoch": 1.684231431219383,
"grad_norm": 0.8112454414367676,
"learning_rate": 8.314618644067798e-06,
"loss": 0.0883,
"step": 1591
},
{
"epoch": 1.6852906130014564,
"grad_norm": 0.2618613541126251,
"learning_rate": 8.313559322033899e-06,
"loss": 0.0867,
"step": 1592
},
{
"epoch": 1.6863497947835298,
"grad_norm": 0.323166161775589,
"learning_rate": 8.3125e-06,
"loss": 0.0924,
"step": 1593
},
{
"epoch": 1.687408976565603,
"grad_norm": 1.049118161201477,
"learning_rate": 8.311440677966104e-06,
"loss": 0.0934,
"step": 1594
},
{
"epoch": 1.6884681583476764,
"grad_norm": 0.3779328763484955,
"learning_rate": 8.310381355932205e-06,
"loss": 0.0921,
"step": 1595
},
{
"epoch": 1.6895273401297497,
"grad_norm": 0.3923911452293396,
"learning_rate": 8.309322033898306e-06,
"loss": 0.0919,
"step": 1596
},
{
"epoch": 1.6905865219118232,
"grad_norm": 0.28012555837631226,
"learning_rate": 8.308262711864408e-06,
"loss": 0.0919,
"step": 1597
},
{
"epoch": 1.6916457036938963,
"grad_norm": 0.7604759931564331,
"learning_rate": 8.30720338983051e-06,
"loss": 0.0914,
"step": 1598
},
{
"epoch": 1.6927048854759699,
"grad_norm": 0.36507344245910645,
"learning_rate": 8.30614406779661e-06,
"loss": 0.0885,
"step": 1599
},
{
"epoch": 1.6937640672580432,
"grad_norm": 0.32517632842063904,
"learning_rate": 8.305084745762712e-06,
"loss": 0.0835,
"step": 1600
},
{
"epoch": 1.6948232490401165,
"grad_norm": 0.25675442814826965,
"learning_rate": 8.304025423728813e-06,
"loss": 0.0885,
"step": 1601
},
{
"epoch": 1.6958824308221898,
"grad_norm": 0.2421637326478958,
"learning_rate": 8.302966101694917e-06,
"loss": 0.0881,
"step": 1602
},
{
"epoch": 1.6969416126042631,
"grad_norm": 0.5827629566192627,
"learning_rate": 8.301906779661018e-06,
"loss": 0.0886,
"step": 1603
},
{
"epoch": 1.6980007943863367,
"grad_norm": 0.5402868986129761,
"learning_rate": 8.30084745762712e-06,
"loss": 0.0899,
"step": 1604
},
{
"epoch": 1.6990599761684098,
"grad_norm": 0.6650782823562622,
"learning_rate": 8.29978813559322e-06,
"loss": 0.0921,
"step": 1605
},
{
"epoch": 1.7001191579504833,
"grad_norm": 0.29905128479003906,
"learning_rate": 8.298728813559322e-06,
"loss": 0.0866,
"step": 1606
},
{
"epoch": 1.7011783397325566,
"grad_norm": 0.2324621081352234,
"learning_rate": 8.297669491525424e-06,
"loss": 0.0854,
"step": 1607
},
{
"epoch": 1.70223752151463,
"grad_norm": 0.28160741925239563,
"learning_rate": 8.296610169491525e-06,
"loss": 0.0905,
"step": 1608
},
{
"epoch": 1.7032967032967035,
"grad_norm": 0.7123225331306458,
"learning_rate": 8.295550847457628e-06,
"loss": 0.0891,
"step": 1609
},
{
"epoch": 1.7043558850787766,
"grad_norm": 0.9756201505661011,
"learning_rate": 8.29449152542373e-06,
"loss": 0.0912,
"step": 1610
},
{
"epoch": 1.70541506686085,
"grad_norm": 0.6386804580688477,
"learning_rate": 8.293432203389831e-06,
"loss": 0.0906,
"step": 1611
},
{
"epoch": 1.7064742486429232,
"grad_norm": 0.34498628973960876,
"learning_rate": 8.292372881355934e-06,
"loss": 0.0882,
"step": 1612
},
{
"epoch": 1.7075334304249967,
"grad_norm": 0.6193973422050476,
"learning_rate": 8.291313559322035e-06,
"loss": 0.0879,
"step": 1613
},
{
"epoch": 1.70859261220707,
"grad_norm": 0.27502185106277466,
"learning_rate": 8.290254237288137e-06,
"loss": 0.0898,
"step": 1614
},
{
"epoch": 1.7096517939891434,
"grad_norm": 0.7583441138267517,
"learning_rate": 8.289194915254238e-06,
"loss": 0.0873,
"step": 1615
},
{
"epoch": 1.710710975771217,
"grad_norm": 0.29789406061172485,
"learning_rate": 8.28813559322034e-06,
"loss": 0.0903,
"step": 1616
},
{
"epoch": 1.71177015755329,
"grad_norm": 0.2680496573448181,
"learning_rate": 8.287076271186441e-06,
"loss": 0.091,
"step": 1617
},
{
"epoch": 1.7128293393353635,
"grad_norm": 0.22052001953125,
"learning_rate": 8.286016949152543e-06,
"loss": 0.0891,
"step": 1618
},
{
"epoch": 1.7138885211174366,
"grad_norm": 0.4834091365337372,
"learning_rate": 8.284957627118646e-06,
"loss": 0.0889,
"step": 1619
},
{
"epoch": 1.7149477028995102,
"grad_norm": 0.23187105357646942,
"learning_rate": 8.283898305084747e-06,
"loss": 0.0869,
"step": 1620
},
{
"epoch": 1.7160068846815835,
"grad_norm": 0.23044142127037048,
"learning_rate": 8.282838983050848e-06,
"loss": 0.0929,
"step": 1621
},
{
"epoch": 1.7170660664636568,
"grad_norm": 0.3926228880882263,
"learning_rate": 8.28177966101695e-06,
"loss": 0.0941,
"step": 1622
},
{
"epoch": 1.7181252482457303,
"grad_norm": 0.35786905884742737,
"learning_rate": 8.280720338983051e-06,
"loss": 0.092,
"step": 1623
},
{
"epoch": 1.7191844300278034,
"grad_norm": 0.3090403079986572,
"learning_rate": 8.279661016949153e-06,
"loss": 0.0933,
"step": 1624
},
{
"epoch": 1.720243611809877,
"grad_norm": 0.26617270708084106,
"learning_rate": 8.278601694915254e-06,
"loss": 0.0869,
"step": 1625
},
{
"epoch": 1.7213027935919503,
"grad_norm": 0.302048921585083,
"learning_rate": 8.277542372881357e-06,
"loss": 0.0914,
"step": 1626
},
{
"epoch": 1.7223619753740236,
"grad_norm": 0.27975502610206604,
"learning_rate": 8.276483050847459e-06,
"loss": 0.0911,
"step": 1627
},
{
"epoch": 1.723421157156097,
"grad_norm": 0.46345677971839905,
"learning_rate": 8.27542372881356e-06,
"loss": 0.0883,
"step": 1628
},
{
"epoch": 1.7244803389381702,
"grad_norm": 0.7359839081764221,
"learning_rate": 8.274364406779661e-06,
"loss": 0.0903,
"step": 1629
},
{
"epoch": 1.7255395207202437,
"grad_norm": 0.466911643743515,
"learning_rate": 8.273305084745763e-06,
"loss": 0.092,
"step": 1630
},
{
"epoch": 1.7265987025023168,
"grad_norm": 0.2327670305967331,
"learning_rate": 8.272245762711864e-06,
"loss": 0.0876,
"step": 1631
},
{
"epoch": 1.7276578842843904,
"grad_norm": 0.23270419239997864,
"learning_rate": 8.271186440677966e-06,
"loss": 0.089,
"step": 1632
},
{
"epoch": 1.7287170660664637,
"grad_norm": 0.32935333251953125,
"learning_rate": 8.270127118644069e-06,
"loss": 0.0866,
"step": 1633
},
{
"epoch": 1.729776247848537,
"grad_norm": 0.2330481857061386,
"learning_rate": 8.26906779661017e-06,
"loss": 0.0864,
"step": 1634
},
{
"epoch": 1.7308354296306103,
"grad_norm": 0.2925349175930023,
"learning_rate": 8.268008474576272e-06,
"loss": 0.0886,
"step": 1635
},
{
"epoch": 1.7318946114126836,
"grad_norm": 0.27524423599243164,
"learning_rate": 8.266949152542375e-06,
"loss": 0.0842,
"step": 1636
},
{
"epoch": 1.7329537931947572,
"grad_norm": 1.3679767847061157,
"learning_rate": 8.265889830508476e-06,
"loss": 0.0874,
"step": 1637
},
{
"epoch": 1.7340129749768303,
"grad_norm": 0.30476149916648865,
"learning_rate": 8.264830508474577e-06,
"loss": 0.0895,
"step": 1638
},
{
"epoch": 1.7350721567589038,
"grad_norm": 0.7838143706321716,
"learning_rate": 8.263771186440679e-06,
"loss": 0.0853,
"step": 1639
},
{
"epoch": 1.7361313385409771,
"grad_norm": 0.7583155035972595,
"learning_rate": 8.26271186440678e-06,
"loss": 0.0904,
"step": 1640
},
{
"epoch": 1.7371905203230504,
"grad_norm": 0.3275405168533325,
"learning_rate": 8.261652542372882e-06,
"loss": 0.0871,
"step": 1641
},
{
"epoch": 1.7382497021051238,
"grad_norm": 0.3539300262928009,
"learning_rate": 8.260593220338983e-06,
"loss": 0.087,
"step": 1642
},
{
"epoch": 1.739308883887197,
"grad_norm": 0.4087120294570923,
"learning_rate": 8.259533898305086e-06,
"loss": 0.0906,
"step": 1643
},
{
"epoch": 1.7403680656692706,
"grad_norm": 0.22603043913841248,
"learning_rate": 8.258474576271188e-06,
"loss": 0.0916,
"step": 1644
},
{
"epoch": 1.7414272474513437,
"grad_norm": 0.2526402771472931,
"learning_rate": 8.257415254237289e-06,
"loss": 0.0868,
"step": 1645
},
{
"epoch": 1.7424864292334172,
"grad_norm": 0.7923634052276611,
"learning_rate": 8.25635593220339e-06,
"loss": 0.0942,
"step": 1646
},
{
"epoch": 1.7435456110154905,
"grad_norm": 0.8598949313163757,
"learning_rate": 8.255296610169492e-06,
"loss": 0.0943,
"step": 1647
},
{
"epoch": 1.7446047927975639,
"grad_norm": 0.24176791310310364,
"learning_rate": 8.254237288135593e-06,
"loss": 0.0874,
"step": 1648
},
{
"epoch": 1.7456639745796372,
"grad_norm": 0.5251477360725403,
"learning_rate": 8.253177966101695e-06,
"loss": 0.0881,
"step": 1649
},
{
"epoch": 1.7467231563617105,
"grad_norm": 0.2412240356206894,
"learning_rate": 8.252118644067796e-06,
"loss": 0.0895,
"step": 1650
},
{
"epoch": 1.747782338143784,
"grad_norm": 0.3063221275806427,
"learning_rate": 8.2510593220339e-06,
"loss": 0.087,
"step": 1651
},
{
"epoch": 1.7488415199258571,
"grad_norm": 0.6908991932868958,
"learning_rate": 8.25e-06,
"loss": 0.0886,
"step": 1652
},
{
"epoch": 1.7499007017079307,
"grad_norm": 0.2941277027130127,
"learning_rate": 8.248940677966102e-06,
"loss": 0.0927,
"step": 1653
},
{
"epoch": 1.750959883490004,
"grad_norm": 0.8127859234809875,
"learning_rate": 8.247881355932203e-06,
"loss": 0.0892,
"step": 1654
},
{
"epoch": 1.7520190652720773,
"grad_norm": 0.8596274256706238,
"learning_rate": 8.246822033898307e-06,
"loss": 0.0898,
"step": 1655
},
{
"epoch": 1.7530782470541508,
"grad_norm": 0.47415459156036377,
"learning_rate": 8.245762711864408e-06,
"loss": 0.0917,
"step": 1656
},
{
"epoch": 1.754137428836224,
"grad_norm": 0.2681109607219696,
"learning_rate": 8.24470338983051e-06,
"loss": 0.0912,
"step": 1657
},
{
"epoch": 1.7551966106182975,
"grad_norm": 0.260294109582901,
"learning_rate": 8.24364406779661e-06,
"loss": 0.0874,
"step": 1658
},
{
"epoch": 1.7562557924003706,
"grad_norm": 0.24355289340019226,
"learning_rate": 8.242584745762712e-06,
"loss": 0.0905,
"step": 1659
},
{
"epoch": 1.757314974182444,
"grad_norm": 0.24448110163211823,
"learning_rate": 8.241525423728815e-06,
"loss": 0.0857,
"step": 1660
},
{
"epoch": 1.7583741559645174,
"grad_norm": 0.2583911418914795,
"learning_rate": 8.240466101694917e-06,
"loss": 0.0878,
"step": 1661
},
{
"epoch": 1.7594333377465907,
"grad_norm": 0.6160121560096741,
"learning_rate": 8.239406779661018e-06,
"loss": 0.0871,
"step": 1662
},
{
"epoch": 1.7604925195286643,
"grad_norm": 0.29204174876213074,
"learning_rate": 8.23834745762712e-06,
"loss": 0.0892,
"step": 1663
},
{
"epoch": 1.7615517013107374,
"grad_norm": 0.26087382435798645,
"learning_rate": 8.237288135593221e-06,
"loss": 0.0926,
"step": 1664
},
{
"epoch": 1.7626108830928109,
"grad_norm": 0.6562139987945557,
"learning_rate": 8.236228813559322e-06,
"loss": 0.0919,
"step": 1665
},
{
"epoch": 1.7636700648748842,
"grad_norm": 0.747133195400238,
"learning_rate": 8.235169491525424e-06,
"loss": 0.0869,
"step": 1666
},
{
"epoch": 1.7647292466569575,
"grad_norm": 0.4086461663246155,
"learning_rate": 8.234110169491525e-06,
"loss": 0.0898,
"step": 1667
},
{
"epoch": 1.7657884284390308,
"grad_norm": 0.2578766644001007,
"learning_rate": 8.233050847457628e-06,
"loss": 0.0888,
"step": 1668
},
{
"epoch": 1.7668476102211041,
"grad_norm": 0.27525123953819275,
"learning_rate": 8.23199152542373e-06,
"loss": 0.0876,
"step": 1669
},
{
"epoch": 1.7679067920031777,
"grad_norm": 0.38917550444602966,
"learning_rate": 8.230932203389831e-06,
"loss": 0.0923,
"step": 1670
},
{
"epoch": 1.7689659737852508,
"grad_norm": 0.2889343500137329,
"learning_rate": 8.229872881355933e-06,
"loss": 0.089,
"step": 1671
},
{
"epoch": 1.7700251555673243,
"grad_norm": 0.32560211420059204,
"learning_rate": 8.228813559322034e-06,
"loss": 0.0865,
"step": 1672
},
{
"epoch": 1.7710843373493976,
"grad_norm": 0.7226275205612183,
"learning_rate": 8.227754237288135e-06,
"loss": 0.0889,
"step": 1673
},
{
"epoch": 1.772143519131471,
"grad_norm": 0.7984499335289001,
"learning_rate": 8.226694915254237e-06,
"loss": 0.0891,
"step": 1674
},
{
"epoch": 1.7732027009135443,
"grad_norm": 0.24345754086971283,
"learning_rate": 8.22563559322034e-06,
"loss": 0.0896,
"step": 1675
},
{
"epoch": 1.7742618826956176,
"grad_norm": 0.2448401004076004,
"learning_rate": 8.224576271186441e-06,
"loss": 0.0856,
"step": 1676
},
{
"epoch": 1.7753210644776911,
"grad_norm": 0.25796836614608765,
"learning_rate": 8.223516949152543e-06,
"loss": 0.09,
"step": 1677
},
{
"epoch": 1.7763802462597642,
"grad_norm": 0.3721190392971039,
"learning_rate": 8.222457627118646e-06,
"loss": 0.0888,
"step": 1678
},
{
"epoch": 1.7774394280418377,
"grad_norm": 0.3655620813369751,
"learning_rate": 8.221398305084747e-06,
"loss": 0.0868,
"step": 1679
},
{
"epoch": 1.778498609823911,
"grad_norm": 0.29531556367874146,
"learning_rate": 8.220338983050849e-06,
"loss": 0.0928,
"step": 1680
},
{
"epoch": 1.7795577916059844,
"grad_norm": 1.9438749551773071,
"learning_rate": 8.21927966101695e-06,
"loss": 0.0913,
"step": 1681
},
{
"epoch": 1.7806169733880577,
"grad_norm": 0.3475892245769501,
"learning_rate": 8.218220338983051e-06,
"loss": 0.0876,
"step": 1682
},
{
"epoch": 1.781676155170131,
"grad_norm": 0.7077595591545105,
"learning_rate": 8.217161016949153e-06,
"loss": 0.0919,
"step": 1683
},
{
"epoch": 1.7827353369522045,
"grad_norm": 0.49300310015678406,
"learning_rate": 8.216101694915254e-06,
"loss": 0.0902,
"step": 1684
},
{
"epoch": 1.7837945187342776,
"grad_norm": 0.34648701548576355,
"learning_rate": 8.215042372881357e-06,
"loss": 0.0927,
"step": 1685
},
{
"epoch": 1.7848537005163512,
"grad_norm": 0.3139582872390747,
"learning_rate": 8.213983050847459e-06,
"loss": 0.0876,
"step": 1686
},
{
"epoch": 1.7859128822984245,
"grad_norm": 0.4565608501434326,
"learning_rate": 8.21292372881356e-06,
"loss": 0.0934,
"step": 1687
},
{
"epoch": 1.7869720640804978,
"grad_norm": 0.26290515065193176,
"learning_rate": 8.211864406779662e-06,
"loss": 0.0885,
"step": 1688
},
{
"epoch": 1.7880312458625711,
"grad_norm": 0.2464137226343155,
"learning_rate": 8.210805084745763e-06,
"loss": 0.0877,
"step": 1689
},
{
"epoch": 1.7890904276446444,
"grad_norm": 0.22745825350284576,
"learning_rate": 8.209745762711864e-06,
"loss": 0.0889,
"step": 1690
},
{
"epoch": 1.790149609426718,
"grad_norm": 0.32300448417663574,
"learning_rate": 8.208686440677966e-06,
"loss": 0.0852,
"step": 1691
},
{
"epoch": 1.791208791208791,
"grad_norm": 0.2802238166332245,
"learning_rate": 8.207627118644069e-06,
"loss": 0.0871,
"step": 1692
},
{
"epoch": 1.7922679729908646,
"grad_norm": 0.6324068307876587,
"learning_rate": 8.20656779661017e-06,
"loss": 0.0877,
"step": 1693
},
{
"epoch": 1.793327154772938,
"grad_norm": 0.5701524615287781,
"learning_rate": 8.205508474576272e-06,
"loss": 0.0869,
"step": 1694
},
{
"epoch": 1.7943863365550112,
"grad_norm": 0.8544675707817078,
"learning_rate": 8.204449152542373e-06,
"loss": 0.0925,
"step": 1695
},
{
"epoch": 1.7954455183370848,
"grad_norm": 0.3276161253452301,
"learning_rate": 8.203389830508475e-06,
"loss": 0.0898,
"step": 1696
},
{
"epoch": 1.7965047001191579,
"grad_norm": 0.31532466411590576,
"learning_rate": 8.202330508474578e-06,
"loss": 0.0903,
"step": 1697
},
{
"epoch": 1.7975638819012314,
"grad_norm": 0.361342191696167,
"learning_rate": 8.201271186440679e-06,
"loss": 0.0869,
"step": 1698
},
{
"epoch": 1.7986230636833045,
"grad_norm": 0.3243643045425415,
"learning_rate": 8.20021186440678e-06,
"loss": 0.0882,
"step": 1699
},
{
"epoch": 1.799682245465378,
"grad_norm": 0.3016441762447357,
"learning_rate": 8.199152542372882e-06,
"loss": 0.0892,
"step": 1700
},
{
"epoch": 1.8007414272474513,
"grad_norm": 0.2679581642150879,
"learning_rate": 8.198093220338983e-06,
"loss": 0.0874,
"step": 1701
},
{
"epoch": 1.8018006090295247,
"grad_norm": 0.29905226826667786,
"learning_rate": 8.197033898305086e-06,
"loss": 0.0907,
"step": 1702
},
{
"epoch": 1.8028597908115982,
"grad_norm": 1.1050947904586792,
"learning_rate": 8.195974576271188e-06,
"loss": 0.0952,
"step": 1703
},
{
"epoch": 1.8039189725936713,
"grad_norm": 0.38703450560569763,
"learning_rate": 8.19491525423729e-06,
"loss": 0.0895,
"step": 1704
},
{
"epoch": 1.8049781543757448,
"grad_norm": 0.22092828154563904,
"learning_rate": 8.19385593220339e-06,
"loss": 0.089,
"step": 1705
},
{
"epoch": 1.8060373361578181,
"grad_norm": 0.23202571272850037,
"learning_rate": 8.192796610169492e-06,
"loss": 0.0908,
"step": 1706
},
{
"epoch": 1.8070965179398915,
"grad_norm": 0.7841505408287048,
"learning_rate": 8.191737288135593e-06,
"loss": 0.0905,
"step": 1707
},
{
"epoch": 1.8081556997219648,
"grad_norm": 0.5167518258094788,
"learning_rate": 8.190677966101695e-06,
"loss": 0.0887,
"step": 1708
},
{
"epoch": 1.809214881504038,
"grad_norm": 0.4019578993320465,
"learning_rate": 8.189618644067798e-06,
"loss": 0.0882,
"step": 1709
},
{
"epoch": 1.8102740632861116,
"grad_norm": 0.7770986557006836,
"learning_rate": 8.1885593220339e-06,
"loss": 0.0913,
"step": 1710
},
{
"epoch": 1.8113332450681847,
"grad_norm": 0.39623284339904785,
"learning_rate": 8.1875e-06,
"loss": 0.0892,
"step": 1711
},
{
"epoch": 1.8123924268502583,
"grad_norm": 0.22474542260169983,
"learning_rate": 8.186440677966102e-06,
"loss": 0.0854,
"step": 1712
},
{
"epoch": 1.8134516086323316,
"grad_norm": 0.30103063583374023,
"learning_rate": 8.185381355932204e-06,
"loss": 0.0876,
"step": 1713
},
{
"epoch": 1.8145107904144049,
"grad_norm": 0.21331194043159485,
"learning_rate": 8.184322033898305e-06,
"loss": 0.09,
"step": 1714
},
{
"epoch": 1.8155699721964782,
"grad_norm": 0.22102200984954834,
"learning_rate": 8.183262711864406e-06,
"loss": 0.0841,
"step": 1715
},
{
"epoch": 1.8166291539785515,
"grad_norm": 0.2268272191286087,
"learning_rate": 8.182203389830508e-06,
"loss": 0.0879,
"step": 1716
},
{
"epoch": 1.817688335760625,
"grad_norm": 0.28173744678497314,
"learning_rate": 8.181144067796611e-06,
"loss": 0.087,
"step": 1717
},
{
"epoch": 1.8187475175426981,
"grad_norm": 1.1784483194351196,
"learning_rate": 8.180084745762712e-06,
"loss": 0.0876,
"step": 1718
},
{
"epoch": 1.8198066993247717,
"grad_norm": 0.25271815061569214,
"learning_rate": 8.179025423728815e-06,
"loss": 0.0832,
"step": 1719
},
{
"epoch": 1.820865881106845,
"grad_norm": 0.3612355589866638,
"learning_rate": 8.177966101694917e-06,
"loss": 0.0871,
"step": 1720
},
{
"epoch": 1.8219250628889183,
"grad_norm": 0.5274227261543274,
"learning_rate": 8.176906779661018e-06,
"loss": 0.0893,
"step": 1721
},
{
"epoch": 1.8229842446709916,
"grad_norm": 0.4560360610485077,
"learning_rate": 8.17584745762712e-06,
"loss": 0.0883,
"step": 1722
},
{
"epoch": 1.824043426453065,
"grad_norm": 0.9054328203201294,
"learning_rate": 8.174788135593221e-06,
"loss": 0.0876,
"step": 1723
},
{
"epoch": 1.8251026082351385,
"grad_norm": 0.28052154183387756,
"learning_rate": 8.173728813559323e-06,
"loss": 0.0854,
"step": 1724
},
{
"epoch": 1.8261617900172116,
"grad_norm": 0.3094460666179657,
"learning_rate": 8.172669491525424e-06,
"loss": 0.0868,
"step": 1725
},
{
"epoch": 1.827220971799285,
"grad_norm": 0.2433973103761673,
"learning_rate": 8.171610169491525e-06,
"loss": 0.0888,
"step": 1726
},
{
"epoch": 1.8282801535813584,
"grad_norm": 0.2780088782310486,
"learning_rate": 8.170550847457628e-06,
"loss": 0.0844,
"step": 1727
},
{
"epoch": 1.8293393353634317,
"grad_norm": 0.3073442578315735,
"learning_rate": 8.16949152542373e-06,
"loss": 0.0901,
"step": 1728
},
{
"epoch": 1.830398517145505,
"grad_norm": 0.3006991147994995,
"learning_rate": 8.168432203389831e-06,
"loss": 0.0868,
"step": 1729
},
{
"epoch": 1.8314576989275784,
"grad_norm": 0.243610218167305,
"learning_rate": 8.167372881355933e-06,
"loss": 0.0917,
"step": 1730
},
{
"epoch": 1.832516880709652,
"grad_norm": 0.7395919561386108,
"learning_rate": 8.166313559322034e-06,
"loss": 0.0882,
"step": 1731
},
{
"epoch": 1.833576062491725,
"grad_norm": 0.343936562538147,
"learning_rate": 8.165254237288136e-06,
"loss": 0.089,
"step": 1732
},
{
"epoch": 1.8346352442737985,
"grad_norm": 0.2729974091053009,
"learning_rate": 8.164194915254237e-06,
"loss": 0.0882,
"step": 1733
},
{
"epoch": 1.8356944260558719,
"grad_norm": 0.28195783495903015,
"learning_rate": 8.16313559322034e-06,
"loss": 0.0883,
"step": 1734
},
{
"epoch": 1.8367536078379452,
"grad_norm": 0.46009188890457153,
"learning_rate": 8.162076271186441e-06,
"loss": 0.0919,
"step": 1735
},
{
"epoch": 1.8378127896200185,
"grad_norm": 0.2789437770843506,
"learning_rate": 8.161016949152543e-06,
"loss": 0.0915,
"step": 1736
},
{
"epoch": 1.8388719714020918,
"grad_norm": 1.0787837505340576,
"learning_rate": 8.159957627118644e-06,
"loss": 0.0901,
"step": 1737
},
{
"epoch": 1.8399311531841653,
"grad_norm": 0.22357052564620972,
"learning_rate": 8.158898305084746e-06,
"loss": 0.09,
"step": 1738
},
{
"epoch": 1.8409903349662384,
"grad_norm": 0.3277105987071991,
"learning_rate": 8.157838983050849e-06,
"loss": 0.088,
"step": 1739
},
{
"epoch": 1.842049516748312,
"grad_norm": 0.22346952557563782,
"learning_rate": 8.15677966101695e-06,
"loss": 0.0889,
"step": 1740
},
{
"epoch": 1.8431086985303853,
"grad_norm": 0.22219225764274597,
"learning_rate": 8.155720338983052e-06,
"loss": 0.088,
"step": 1741
},
{
"epoch": 1.8441678803124586,
"grad_norm": 0.260712206363678,
"learning_rate": 8.154661016949153e-06,
"loss": 0.0881,
"step": 1742
},
{
"epoch": 1.8452270620945321,
"grad_norm": 0.2360645830631256,
"learning_rate": 8.153601694915254e-06,
"loss": 0.0854,
"step": 1743
},
{
"epoch": 1.8462862438766052,
"grad_norm": 0.46701470017433167,
"learning_rate": 8.152542372881358e-06,
"loss": 0.089,
"step": 1744
},
{
"epoch": 1.8473454256586788,
"grad_norm": 0.27775806188583374,
"learning_rate": 8.151483050847459e-06,
"loss": 0.0859,
"step": 1745
},
{
"epoch": 1.8484046074407519,
"grad_norm": 0.7500215172767639,
"learning_rate": 8.15042372881356e-06,
"loss": 0.0891,
"step": 1746
},
{
"epoch": 1.8494637892228254,
"grad_norm": 0.4179665446281433,
"learning_rate": 8.149364406779662e-06,
"loss": 0.0854,
"step": 1747
},
{
"epoch": 1.8505229710048987,
"grad_norm": 0.42210525274276733,
"learning_rate": 8.148305084745763e-06,
"loss": 0.085,
"step": 1748
},
{
"epoch": 1.851582152786972,
"grad_norm": 0.7828628420829773,
"learning_rate": 8.147245762711865e-06,
"loss": 0.0916,
"step": 1749
},
{
"epoch": 1.8526413345690456,
"grad_norm": 0.3319457769393921,
"learning_rate": 8.146186440677966e-06,
"loss": 0.0869,
"step": 1750
},
{
"epoch": 1.8537005163511187,
"grad_norm": 0.4556196331977844,
"learning_rate": 8.145127118644069e-06,
"loss": 0.0887,
"step": 1751
},
{
"epoch": 1.8547596981331922,
"grad_norm": 1.2392021417617798,
"learning_rate": 8.14406779661017e-06,
"loss": 0.0912,
"step": 1752
},
{
"epoch": 1.8558188799152655,
"grad_norm": 0.38705164194107056,
"learning_rate": 8.143008474576272e-06,
"loss": 0.0885,
"step": 1753
},
{
"epoch": 1.8568780616973388,
"grad_norm": 0.2887822985649109,
"learning_rate": 8.141949152542373e-06,
"loss": 0.0872,
"step": 1754
},
{
"epoch": 1.8579372434794121,
"grad_norm": 0.46790027618408203,
"learning_rate": 8.140889830508475e-06,
"loss": 0.091,
"step": 1755
},
{
"epoch": 1.8589964252614855,
"grad_norm": 0.31695792078971863,
"learning_rate": 8.139830508474576e-06,
"loss": 0.089,
"step": 1756
},
{
"epoch": 1.860055607043559,
"grad_norm": 0.368253231048584,
"learning_rate": 8.138771186440678e-06,
"loss": 0.0912,
"step": 1757
},
{
"epoch": 1.861114788825632,
"grad_norm": 0.6835266351699829,
"learning_rate": 8.13771186440678e-06,
"loss": 0.0862,
"step": 1758
},
{
"epoch": 1.8621739706077056,
"grad_norm": 0.2873135507106781,
"learning_rate": 8.136652542372882e-06,
"loss": 0.086,
"step": 1759
},
{
"epoch": 1.863233152389779,
"grad_norm": 0.8042873740196228,
"learning_rate": 8.135593220338983e-06,
"loss": 0.0922,
"step": 1760
},
{
"epoch": 1.8642923341718523,
"grad_norm": 0.7736334800720215,
"learning_rate": 8.134533898305087e-06,
"loss": 0.0913,
"step": 1761
},
{
"epoch": 1.8653515159539256,
"grad_norm": 0.3275861442089081,
"learning_rate": 8.133474576271188e-06,
"loss": 0.0838,
"step": 1762
},
{
"epoch": 1.8664106977359989,
"grad_norm": 0.28888413310050964,
"learning_rate": 8.13241525423729e-06,
"loss": 0.0895,
"step": 1763
},
{
"epoch": 1.8674698795180724,
"grad_norm": 0.38396742939949036,
"learning_rate": 8.13135593220339e-06,
"loss": 0.0904,
"step": 1764
},
{
"epoch": 1.8685290613001455,
"grad_norm": 0.33555108308792114,
"learning_rate": 8.130296610169492e-06,
"loss": 0.0876,
"step": 1765
},
{
"epoch": 1.869588243082219,
"grad_norm": 0.26808494329452515,
"learning_rate": 8.129237288135594e-06,
"loss": 0.0858,
"step": 1766
},
{
"epoch": 1.8706474248642924,
"grad_norm": 0.30014657974243164,
"learning_rate": 8.128177966101695e-06,
"loss": 0.0874,
"step": 1767
},
{
"epoch": 1.8717066066463657,
"grad_norm": 0.2523983418941498,
"learning_rate": 8.127118644067798e-06,
"loss": 0.0885,
"step": 1768
},
{
"epoch": 1.872765788428439,
"grad_norm": 0.44090861082077026,
"learning_rate": 8.1260593220339e-06,
"loss": 0.0887,
"step": 1769
},
{
"epoch": 1.8738249702105123,
"grad_norm": 0.3840296268463135,
"learning_rate": 8.125000000000001e-06,
"loss": 0.0908,
"step": 1770
},
{
"epoch": 1.8748841519925858,
"grad_norm": 0.2714099586009979,
"learning_rate": 8.123940677966102e-06,
"loss": 0.0902,
"step": 1771
},
{
"epoch": 1.875943333774659,
"grad_norm": 0.20345446467399597,
"learning_rate": 8.122881355932204e-06,
"loss": 0.0874,
"step": 1772
},
{
"epoch": 1.8770025155567325,
"grad_norm": 0.29075461626052856,
"learning_rate": 8.121822033898305e-06,
"loss": 0.0876,
"step": 1773
},
{
"epoch": 1.8780616973388058,
"grad_norm": 0.2817675769329071,
"learning_rate": 8.120762711864407e-06,
"loss": 0.0891,
"step": 1774
},
{
"epoch": 1.879120879120879,
"grad_norm": 0.4427769184112549,
"learning_rate": 8.119703389830508e-06,
"loss": 0.0877,
"step": 1775
},
{
"epoch": 1.8801800609029524,
"grad_norm": 1.1793564558029175,
"learning_rate": 8.118644067796611e-06,
"loss": 0.093,
"step": 1776
},
{
"epoch": 1.8812392426850257,
"grad_norm": 0.35909417271614075,
"learning_rate": 8.117584745762713e-06,
"loss": 0.0876,
"step": 1777
},
{
"epoch": 1.8822984244670993,
"grad_norm": 0.500996470451355,
"learning_rate": 8.116525423728814e-06,
"loss": 0.0881,
"step": 1778
},
{
"epoch": 1.8833576062491724,
"grad_norm": 0.47844716906547546,
"learning_rate": 8.115466101694915e-06,
"loss": 0.0868,
"step": 1779
},
{
"epoch": 1.884416788031246,
"grad_norm": 0.32108375430107117,
"learning_rate": 8.114406779661017e-06,
"loss": 0.09,
"step": 1780
},
{
"epoch": 1.8854759698133192,
"grad_norm": 0.5594972968101501,
"learning_rate": 8.11334745762712e-06,
"loss": 0.0881,
"step": 1781
},
{
"epoch": 1.8865351515953925,
"grad_norm": 0.33978521823883057,
"learning_rate": 8.112288135593221e-06,
"loss": 0.0893,
"step": 1782
},
{
"epoch": 1.887594333377466,
"grad_norm": 0.41193121671676636,
"learning_rate": 8.111228813559323e-06,
"loss": 0.087,
"step": 1783
},
{
"epoch": 1.8886535151595392,
"grad_norm": 0.35275036096572876,
"learning_rate": 8.110169491525424e-06,
"loss": 0.0862,
"step": 1784
},
{
"epoch": 1.8897126969416127,
"grad_norm": 0.32912677526474,
"learning_rate": 8.109110169491527e-06,
"loss": 0.0843,
"step": 1785
},
{
"epoch": 1.8907718787236858,
"grad_norm": 0.7398490309715271,
"learning_rate": 8.108050847457629e-06,
"loss": 0.0875,
"step": 1786
},
{
"epoch": 1.8918310605057593,
"grad_norm": 0.3203893303871155,
"learning_rate": 8.10699152542373e-06,
"loss": 0.0895,
"step": 1787
},
{
"epoch": 1.8928902422878326,
"grad_norm": 0.33273789286613464,
"learning_rate": 8.105932203389831e-06,
"loss": 0.0895,
"step": 1788
},
{
"epoch": 1.893949424069906,
"grad_norm": 0.6345044374465942,
"learning_rate": 8.104872881355933e-06,
"loss": 0.0883,
"step": 1789
},
{
"epoch": 1.8950086058519795,
"grad_norm": 0.6323248147964478,
"learning_rate": 8.103813559322034e-06,
"loss": 0.0886,
"step": 1790
},
{
"epoch": 1.8960677876340526,
"grad_norm": 1.5318913459777832,
"learning_rate": 8.102754237288136e-06,
"loss": 0.09,
"step": 1791
},
{
"epoch": 1.8971269694161261,
"grad_norm": 0.4862882196903229,
"learning_rate": 8.101694915254237e-06,
"loss": 0.089,
"step": 1792
},
{
"epoch": 1.8981861511981994,
"grad_norm": 0.26938316226005554,
"learning_rate": 8.10063559322034e-06,
"loss": 0.0864,
"step": 1793
},
{
"epoch": 1.8992453329802728,
"grad_norm": 0.3050606846809387,
"learning_rate": 8.099576271186442e-06,
"loss": 0.0882,
"step": 1794
},
{
"epoch": 1.900304514762346,
"grad_norm": 0.2837347090244293,
"learning_rate": 8.098516949152543e-06,
"loss": 0.0873,
"step": 1795
},
{
"epoch": 1.9013636965444194,
"grad_norm": 0.23105625808238983,
"learning_rate": 8.097457627118644e-06,
"loss": 0.0886,
"step": 1796
},
{
"epoch": 1.902422878326493,
"grad_norm": 0.3702641725540161,
"learning_rate": 8.096398305084746e-06,
"loss": 0.0863,
"step": 1797
},
{
"epoch": 1.903482060108566,
"grad_norm": 0.5003481507301331,
"learning_rate": 8.095338983050847e-06,
"loss": 0.0879,
"step": 1798
},
{
"epoch": 1.9045412418906396,
"grad_norm": 0.3741811215877533,
"learning_rate": 8.094279661016949e-06,
"loss": 0.0892,
"step": 1799
},
{
"epoch": 1.9056004236727129,
"grad_norm": 0.7591339945793152,
"learning_rate": 8.093220338983052e-06,
"loss": 0.0903,
"step": 1800
},
{
"epoch": 1.9066596054547862,
"grad_norm": 1.3595631122589111,
"learning_rate": 8.092161016949153e-06,
"loss": 0.0863,
"step": 1801
},
{
"epoch": 1.9077187872368595,
"grad_norm": 0.33199191093444824,
"learning_rate": 8.091101694915255e-06,
"loss": 0.0874,
"step": 1802
},
{
"epoch": 1.9087779690189328,
"grad_norm": 0.293661504983902,
"learning_rate": 8.090042372881358e-06,
"loss": 0.0902,
"step": 1803
},
{
"epoch": 1.9098371508010064,
"grad_norm": 0.29978156089782715,
"learning_rate": 8.088983050847459e-06,
"loss": 0.084,
"step": 1804
},
{
"epoch": 1.9108963325830794,
"grad_norm": 0.3597835600376129,
"learning_rate": 8.08792372881356e-06,
"loss": 0.0861,
"step": 1805
},
{
"epoch": 1.911955514365153,
"grad_norm": 0.5429406762123108,
"learning_rate": 8.086864406779662e-06,
"loss": 0.0918,
"step": 1806
},
{
"epoch": 1.9130146961472263,
"grad_norm": 0.5267550945281982,
"learning_rate": 8.085805084745763e-06,
"loss": 0.0878,
"step": 1807
},
{
"epoch": 1.9140738779292996,
"grad_norm": 0.4029728174209595,
"learning_rate": 8.084745762711865e-06,
"loss": 0.0904,
"step": 1808
},
{
"epoch": 1.915133059711373,
"grad_norm": 0.40597569942474365,
"learning_rate": 8.083686440677966e-06,
"loss": 0.0937,
"step": 1809
},
{
"epoch": 1.9161922414934462,
"grad_norm": 0.2881167531013489,
"learning_rate": 8.08262711864407e-06,
"loss": 0.092,
"step": 1810
},
{
"epoch": 1.9172514232755198,
"grad_norm": 0.9564570188522339,
"learning_rate": 8.08156779661017e-06,
"loss": 0.0843,
"step": 1811
},
{
"epoch": 1.9183106050575929,
"grad_norm": 0.24643473327159882,
"learning_rate": 8.080508474576272e-06,
"loss": 0.0879,
"step": 1812
},
{
"epoch": 1.9193697868396664,
"grad_norm": 0.7339272499084473,
"learning_rate": 8.079449152542374e-06,
"loss": 0.0911,
"step": 1813
},
{
"epoch": 1.9204289686217397,
"grad_norm": 0.3867321312427521,
"learning_rate": 8.078389830508475e-06,
"loss": 0.0921,
"step": 1814
},
{
"epoch": 1.921488150403813,
"grad_norm": 0.5025569796562195,
"learning_rate": 8.077330508474576e-06,
"loss": 0.0875,
"step": 1815
},
{
"epoch": 1.9225473321858864,
"grad_norm": 1.2964099645614624,
"learning_rate": 8.076271186440678e-06,
"loss": 0.0893,
"step": 1816
},
{
"epoch": 1.9236065139679597,
"grad_norm": 0.40289801359176636,
"learning_rate": 8.07521186440678e-06,
"loss": 0.0876,
"step": 1817
},
{
"epoch": 1.9246656957500332,
"grad_norm": 0.9181727766990662,
"learning_rate": 8.074152542372882e-06,
"loss": 0.0847,
"step": 1818
},
{
"epoch": 1.9257248775321063,
"grad_norm": 0.3142136335372925,
"learning_rate": 8.073093220338984e-06,
"loss": 0.0901,
"step": 1819
},
{
"epoch": 1.9267840593141798,
"grad_norm": 0.5350835919380188,
"learning_rate": 8.072033898305085e-06,
"loss": 0.0887,
"step": 1820
},
{
"epoch": 1.9278432410962532,
"grad_norm": 0.37053418159484863,
"learning_rate": 8.070974576271186e-06,
"loss": 0.0899,
"step": 1821
},
{
"epoch": 1.9289024228783265,
"grad_norm": 0.7820659279823303,
"learning_rate": 8.069915254237288e-06,
"loss": 0.0856,
"step": 1822
},
{
"epoch": 1.9299616046603998,
"grad_norm": 0.3069775700569153,
"learning_rate": 8.068855932203391e-06,
"loss": 0.0854,
"step": 1823
},
{
"epoch": 1.931020786442473,
"grad_norm": 0.4941248893737793,
"learning_rate": 8.067796610169492e-06,
"loss": 0.0908,
"step": 1824
},
{
"epoch": 1.9320799682245466,
"grad_norm": 0.5605942606925964,
"learning_rate": 8.066737288135594e-06,
"loss": 0.0885,
"step": 1825
},
{
"epoch": 1.9331391500066197,
"grad_norm": 0.8052049279212952,
"learning_rate": 8.065677966101695e-06,
"loss": 0.0889,
"step": 1826
},
{
"epoch": 1.9341983317886933,
"grad_norm": 0.2667480409145355,
"learning_rate": 8.064618644067798e-06,
"loss": 0.0888,
"step": 1827
},
{
"epoch": 1.9352575135707666,
"grad_norm": 0.29833167791366577,
"learning_rate": 8.0635593220339e-06,
"loss": 0.0876,
"step": 1828
},
{
"epoch": 1.93631669535284,
"grad_norm": 0.23164193332195282,
"learning_rate": 8.062500000000001e-06,
"loss": 0.0879,
"step": 1829
},
{
"epoch": 1.9373758771349134,
"grad_norm": 0.6006713509559631,
"learning_rate": 8.061440677966103e-06,
"loss": 0.0891,
"step": 1830
},
{
"epoch": 1.9384350589169865,
"grad_norm": 0.5523638725280762,
"learning_rate": 8.060381355932204e-06,
"loss": 0.088,
"step": 1831
},
{
"epoch": 1.93949424069906,
"grad_norm": 0.512315571308136,
"learning_rate": 8.059322033898305e-06,
"loss": 0.0909,
"step": 1832
},
{
"epoch": 1.9405534224811332,
"grad_norm": 0.28503870964050293,
"learning_rate": 8.058262711864407e-06,
"loss": 0.0872,
"step": 1833
},
{
"epoch": 1.9416126042632067,
"grad_norm": 0.26608702540397644,
"learning_rate": 8.05720338983051e-06,
"loss": 0.0908,
"step": 1834
},
{
"epoch": 1.94267178604528,
"grad_norm": 0.5513134002685547,
"learning_rate": 8.056144067796611e-06,
"loss": 0.0909,
"step": 1835
},
{
"epoch": 1.9437309678273533,
"grad_norm": 0.30664971470832825,
"learning_rate": 8.055084745762713e-06,
"loss": 0.0867,
"step": 1836
},
{
"epoch": 1.9447901496094269,
"grad_norm": 0.2737899720668793,
"learning_rate": 8.054025423728814e-06,
"loss": 0.0897,
"step": 1837
},
{
"epoch": 1.9458493313915,
"grad_norm": 0.8876556158065796,
"learning_rate": 8.052966101694916e-06,
"loss": 0.0839,
"step": 1838
},
{
"epoch": 1.9469085131735735,
"grad_norm": 0.5481213331222534,
"learning_rate": 8.051906779661017e-06,
"loss": 0.0871,
"step": 1839
},
{
"epoch": 1.9479676949556468,
"grad_norm": 0.36651283502578735,
"learning_rate": 8.050847457627118e-06,
"loss": 0.0887,
"step": 1840
},
{
"epoch": 1.9490268767377201,
"grad_norm": 0.6602075099945068,
"learning_rate": 8.04978813559322e-06,
"loss": 0.0874,
"step": 1841
},
{
"epoch": 1.9500860585197934,
"grad_norm": 0.5537017583847046,
"learning_rate": 8.048728813559323e-06,
"loss": 0.0864,
"step": 1842
},
{
"epoch": 1.9511452403018668,
"grad_norm": 0.8584939241409302,
"learning_rate": 8.047669491525424e-06,
"loss": 0.0877,
"step": 1843
},
{
"epoch": 1.9522044220839403,
"grad_norm": 0.3135605454444885,
"learning_rate": 8.046610169491527e-06,
"loss": 0.0885,
"step": 1844
},
{
"epoch": 1.9532636038660134,
"grad_norm": 0.26804184913635254,
"learning_rate": 8.045550847457629e-06,
"loss": 0.0853,
"step": 1845
},
{
"epoch": 1.954322785648087,
"grad_norm": 0.8978933095932007,
"learning_rate": 8.04449152542373e-06,
"loss": 0.0899,
"step": 1846
},
{
"epoch": 1.9553819674301602,
"grad_norm": 0.5151166319847107,
"learning_rate": 8.043432203389832e-06,
"loss": 0.088,
"step": 1847
},
{
"epoch": 1.9564411492122336,
"grad_norm": 0.8717884421348572,
"learning_rate": 8.042372881355933e-06,
"loss": 0.0888,
"step": 1848
},
{
"epoch": 1.9575003309943069,
"grad_norm": 0.37827199697494507,
"learning_rate": 8.041313559322034e-06,
"loss": 0.0846,
"step": 1849
},
{
"epoch": 1.9585595127763802,
"grad_norm": 0.3214173913002014,
"learning_rate": 8.040254237288136e-06,
"loss": 0.0926,
"step": 1850
},
{
"epoch": 1.9596186945584537,
"grad_norm": 0.41147252917289734,
"learning_rate": 8.039194915254237e-06,
"loss": 0.0886,
"step": 1851
},
{
"epoch": 1.9606778763405268,
"grad_norm": 0.24331054091453552,
"learning_rate": 8.03813559322034e-06,
"loss": 0.0836,
"step": 1852
},
{
"epoch": 1.9617370581226004,
"grad_norm": 0.2882451117038727,
"learning_rate": 8.037076271186442e-06,
"loss": 0.0842,
"step": 1853
},
{
"epoch": 1.9627962399046737,
"grad_norm": 0.41089218854904175,
"learning_rate": 8.036016949152543e-06,
"loss": 0.0854,
"step": 1854
},
{
"epoch": 1.963855421686747,
"grad_norm": 0.5555206537246704,
"learning_rate": 8.034957627118645e-06,
"loss": 0.0892,
"step": 1855
},
{
"epoch": 1.9649146034688203,
"grad_norm": 0.8138719201087952,
"learning_rate": 8.033898305084746e-06,
"loss": 0.0899,
"step": 1856
},
{
"epoch": 1.9659737852508936,
"grad_norm": 1.2507460117340088,
"learning_rate": 8.032838983050847e-06,
"loss": 0.089,
"step": 1857
},
{
"epoch": 1.9670329670329672,
"grad_norm": 0.4007565677165985,
"learning_rate": 8.031779661016949e-06,
"loss": 0.0923,
"step": 1858
},
{
"epoch": 1.9680921488150402,
"grad_norm": 0.31860870122909546,
"learning_rate": 8.030720338983052e-06,
"loss": 0.0891,
"step": 1859
},
{
"epoch": 1.9691513305971138,
"grad_norm": 0.8498591184616089,
"learning_rate": 8.029661016949153e-06,
"loss": 0.0909,
"step": 1860
},
{
"epoch": 1.970210512379187,
"grad_norm": 0.24927860498428345,
"learning_rate": 8.028601694915255e-06,
"loss": 0.0903,
"step": 1861
},
{
"epoch": 1.9712696941612604,
"grad_norm": 0.25672590732574463,
"learning_rate": 8.027542372881356e-06,
"loss": 0.0889,
"step": 1862
},
{
"epoch": 1.9723288759433337,
"grad_norm": 0.3672144412994385,
"learning_rate": 8.026483050847458e-06,
"loss": 0.0861,
"step": 1863
},
{
"epoch": 1.973388057725407,
"grad_norm": 0.9591440558433533,
"learning_rate": 8.025423728813559e-06,
"loss": 0.0873,
"step": 1864
},
{
"epoch": 1.9744472395074806,
"grad_norm": 0.2935021221637726,
"learning_rate": 8.024364406779662e-06,
"loss": 0.0866,
"step": 1865
},
{
"epoch": 1.9755064212895537,
"grad_norm": 0.37600111961364746,
"learning_rate": 8.023305084745764e-06,
"loss": 0.0873,
"step": 1866
},
{
"epoch": 1.9765656030716272,
"grad_norm": 0.4699721336364746,
"learning_rate": 8.022245762711865e-06,
"loss": 0.0924,
"step": 1867
},
{
"epoch": 1.9776247848537005,
"grad_norm": 0.37236911058425903,
"learning_rate": 8.021186440677966e-06,
"loss": 0.0894,
"step": 1868
},
{
"epoch": 1.9786839666357738,
"grad_norm": 0.2830633521080017,
"learning_rate": 8.02012711864407e-06,
"loss": 0.0892,
"step": 1869
},
{
"epoch": 1.9797431484178474,
"grad_norm": 0.6510921716690063,
"learning_rate": 8.019067796610171e-06,
"loss": 0.0898,
"step": 1870
},
{
"epoch": 1.9808023301999205,
"grad_norm": 1.2568674087524414,
"learning_rate": 8.018008474576272e-06,
"loss": 0.089,
"step": 1871
},
{
"epoch": 1.981861511981994,
"grad_norm": 0.8623310923576355,
"learning_rate": 8.016949152542374e-06,
"loss": 0.0863,
"step": 1872
},
{
"epoch": 1.982920693764067,
"grad_norm": 0.3959273099899292,
"learning_rate": 8.015889830508475e-06,
"loss": 0.0881,
"step": 1873
},
{
"epoch": 1.9839798755461406,
"grad_norm": 0.43136197328567505,
"learning_rate": 8.014830508474576e-06,
"loss": 0.0936,
"step": 1874
},
{
"epoch": 1.985039057328214,
"grad_norm": 0.26500093936920166,
"learning_rate": 8.013771186440678e-06,
"loss": 0.0891,
"step": 1875
},
{
"epoch": 1.9860982391102873,
"grad_norm": 1.2360635995864868,
"learning_rate": 8.012711864406781e-06,
"loss": 0.0906,
"step": 1876
},
{
"epoch": 1.9871574208923608,
"grad_norm": 0.2535933554172516,
"learning_rate": 8.011652542372882e-06,
"loss": 0.0881,
"step": 1877
},
{
"epoch": 1.988216602674434,
"grad_norm": 0.252047061920166,
"learning_rate": 8.010593220338984e-06,
"loss": 0.0828,
"step": 1878
},
{
"epoch": 1.9892757844565074,
"grad_norm": 0.8809779286384583,
"learning_rate": 8.009533898305085e-06,
"loss": 0.0903,
"step": 1879
},
{
"epoch": 1.9903349662385807,
"grad_norm": 0.5204029083251953,
"learning_rate": 8.008474576271187e-06,
"loss": 0.0872,
"step": 1880
},
{
"epoch": 1.991394148020654,
"grad_norm": 0.24637530744075775,
"learning_rate": 8.007415254237288e-06,
"loss": 0.0877,
"step": 1881
},
{
"epoch": 1.9924533298027274,
"grad_norm": 0.4711182117462158,
"learning_rate": 8.00635593220339e-06,
"loss": 0.0886,
"step": 1882
},
{
"epoch": 1.9935125115848007,
"grad_norm": 0.3140796422958374,
"learning_rate": 8.005296610169493e-06,
"loss": 0.0904,
"step": 1883
},
{
"epoch": 1.9945716933668742,
"grad_norm": 0.4233724772930145,
"learning_rate": 8.004237288135594e-06,
"loss": 0.0848,
"step": 1884
},
{
"epoch": 1.9956308751489473,
"grad_norm": 1.4377028942108154,
"learning_rate": 8.003177966101695e-06,
"loss": 0.0899,
"step": 1885
},
{
"epoch": 1.9966900569310209,
"grad_norm": 0.9122888445854187,
"learning_rate": 8.002118644067799e-06,
"loss": 0.0902,
"step": 1886
},
{
"epoch": 1.9977492387130942,
"grad_norm": 0.27381008863449097,
"learning_rate": 8.0010593220339e-06,
"loss": 0.0922,
"step": 1887
},
{
"epoch": 1.9988084204951675,
"grad_norm": 0.4732683300971985,
"learning_rate": 8.000000000000001e-06,
"loss": 0.0864,
"step": 1888
},
{
"epoch": 1.9988084204951675,
"eval_accuracy": 0.9814,
"eval_best_f1_from_thresholding": 0.15454545454545454,
"eval_loss": 0.13597536087036133,
"eval_matthews_corrcoef": 0.14717610184892235,
"eval_model_preparation_time": 0.0033,
"eval_negative_class_f1": 0.9905965621840243,
"eval_negative_class_precision": 0.9923022384280361,
"eval_negative_class_recall": 0.9888967396790148,
"eval_positive_class_f1": 0.15454545454545454,
"eval_positive_class_precision": 0.13385826771653545,
"eval_positive_class_recall": 0.1827956989247312,
"eval_roc_auc": 0.8065064237190821,
"eval_runtime": 20.7354,
"eval_samples_per_second": 482.267,
"eval_steps_per_second": 7.572,
"step": 1888
},
{
"epoch": 1.9998676022772408,
"grad_norm": 1.7929794788360596,
"learning_rate": 7.998940677966103e-06,
"loss": 0.0934,
"step": 1889
},
{
"epoch": 2.0,
"grad_norm": 0.11227487027645111,
"learning_rate": 7.997881355932204e-06,
"loss": 0.0118,
"step": 1890
},
{
"epoch": 2.0010591817820735,
"grad_norm": 0.2703258693218231,
"learning_rate": 7.996822033898306e-06,
"loss": 0.0879,
"step": 1891
},
{
"epoch": 2.0021183635641466,
"grad_norm": 0.26941847801208496,
"learning_rate": 7.995762711864407e-06,
"loss": 0.0886,
"step": 1892
},
{
"epoch": 2.00317754534622,
"grad_norm": 0.29611021280288696,
"learning_rate": 7.99470338983051e-06,
"loss": 0.0879,
"step": 1893
},
{
"epoch": 2.0042367271282933,
"grad_norm": 0.3016314208507538,
"learning_rate": 7.993644067796611e-06,
"loss": 0.087,
"step": 1894
},
{
"epoch": 2.005295908910367,
"grad_norm": 0.27448010444641113,
"learning_rate": 7.992584745762713e-06,
"loss": 0.0823,
"step": 1895
},
{
"epoch": 2.00635509069244,
"grad_norm": 0.6257076263427734,
"learning_rate": 7.991525423728814e-06,
"loss": 0.0911,
"step": 1896
},
{
"epoch": 2.0074142724745134,
"grad_norm": 0.3046128451824188,
"learning_rate": 7.990466101694916e-06,
"loss": 0.0869,
"step": 1897
},
{
"epoch": 2.008473454256587,
"grad_norm": 0.3856375813484192,
"learning_rate": 7.989406779661017e-06,
"loss": 0.0901,
"step": 1898
},
{
"epoch": 2.00953263603866,
"grad_norm": 0.3367128074169159,
"learning_rate": 7.988347457627119e-06,
"loss": 0.0882,
"step": 1899
},
{
"epoch": 2.0105918178207336,
"grad_norm": 0.2662423551082611,
"learning_rate": 7.987288135593222e-06,
"loss": 0.0863,
"step": 1900
},
{
"epoch": 2.0116509996028067,
"grad_norm": 0.4654860496520996,
"learning_rate": 7.986228813559323e-06,
"loss": 0.0919,
"step": 1901
},
{
"epoch": 2.0127101813848802,
"grad_norm": 0.9134454727172852,
"learning_rate": 7.985169491525424e-06,
"loss": 0.0941,
"step": 1902
},
{
"epoch": 2.0137693631669533,
"grad_norm": 0.2735442817211151,
"learning_rate": 7.984110169491526e-06,
"loss": 0.0894,
"step": 1903
},
{
"epoch": 2.014828544949027,
"grad_norm": 0.31145164370536804,
"learning_rate": 7.983050847457627e-06,
"loss": 0.0863,
"step": 1904
},
{
"epoch": 2.0158877267311004,
"grad_norm": 0.2529715597629547,
"learning_rate": 7.981991525423729e-06,
"loss": 0.0873,
"step": 1905
},
{
"epoch": 2.0169469085131735,
"grad_norm": 0.25238585472106934,
"learning_rate": 7.98093220338983e-06,
"loss": 0.0882,
"step": 1906
},
{
"epoch": 2.018006090295247,
"grad_norm": 0.26991987228393555,
"learning_rate": 7.979872881355933e-06,
"loss": 0.0905,
"step": 1907
},
{
"epoch": 2.01906527207732,
"grad_norm": 0.6508270502090454,
"learning_rate": 7.978813559322035e-06,
"loss": 0.0919,
"step": 1908
},
{
"epoch": 2.0201244538593937,
"grad_norm": 0.7955009341239929,
"learning_rate": 7.977754237288136e-06,
"loss": 0.0934,
"step": 1909
},
{
"epoch": 2.0211836356414667,
"grad_norm": 0.7356244325637817,
"learning_rate": 7.976694915254239e-06,
"loss": 0.0895,
"step": 1910
},
{
"epoch": 2.0222428174235403,
"grad_norm": 0.556024968624115,
"learning_rate": 7.97563559322034e-06,
"loss": 0.0915,
"step": 1911
},
{
"epoch": 2.023301999205614,
"grad_norm": 0.35753247141838074,
"learning_rate": 7.974576271186442e-06,
"loss": 0.09,
"step": 1912
},
{
"epoch": 2.024361180987687,
"grad_norm": 0.27930715680122375,
"learning_rate": 7.973516949152543e-06,
"loss": 0.0913,
"step": 1913
},
{
"epoch": 2.0254203627697605,
"grad_norm": 0.7513614296913147,
"learning_rate": 7.972457627118645e-06,
"loss": 0.0894,
"step": 1914
},
{
"epoch": 2.0264795445518335,
"grad_norm": 0.3672800064086914,
"learning_rate": 7.971398305084746e-06,
"loss": 0.086,
"step": 1915
},
{
"epoch": 2.027538726333907,
"grad_norm": 0.8558787703514099,
"learning_rate": 7.970338983050848e-06,
"loss": 0.0907,
"step": 1916
},
{
"epoch": 2.0285979081159806,
"grad_norm": 0.8640574216842651,
"learning_rate": 7.969279661016949e-06,
"loss": 0.0906,
"step": 1917
},
{
"epoch": 2.0296570898980537,
"grad_norm": 0.3139023184776306,
"learning_rate": 7.968220338983052e-06,
"loss": 0.0882,
"step": 1918
},
{
"epoch": 2.0307162716801272,
"grad_norm": 0.30195289850234985,
"learning_rate": 7.967161016949154e-06,
"loss": 0.0854,
"step": 1919
},
{
"epoch": 2.0317754534622003,
"grad_norm": 0.2863333225250244,
"learning_rate": 7.966101694915255e-06,
"loss": 0.0896,
"step": 1920
},
{
"epoch": 2.032834635244274,
"grad_norm": 0.42541301250457764,
"learning_rate": 7.965042372881356e-06,
"loss": 0.0837,
"step": 1921
},
{
"epoch": 2.033893817026347,
"grad_norm": 0.2463466227054596,
"learning_rate": 7.963983050847458e-06,
"loss": 0.0869,
"step": 1922
},
{
"epoch": 2.0349529988084205,
"grad_norm": 0.256345272064209,
"learning_rate": 7.96292372881356e-06,
"loss": 0.0875,
"step": 1923
},
{
"epoch": 2.036012180590494,
"grad_norm": 0.3295067250728607,
"learning_rate": 7.96186440677966e-06,
"loss": 0.0877,
"step": 1924
},
{
"epoch": 2.037071362372567,
"grad_norm": 0.6613496541976929,
"learning_rate": 7.960805084745764e-06,
"loss": 0.0884,
"step": 1925
},
{
"epoch": 2.0381305441546407,
"grad_norm": 0.3493386209011078,
"learning_rate": 7.959745762711865e-06,
"loss": 0.0905,
"step": 1926
},
{
"epoch": 2.0391897259367138,
"grad_norm": 0.7373542189598083,
"learning_rate": 7.958686440677967e-06,
"loss": 0.0854,
"step": 1927
},
{
"epoch": 2.0402489077187873,
"grad_norm": 0.28745999932289124,
"learning_rate": 7.957627118644068e-06,
"loss": 0.0895,
"step": 1928
},
{
"epoch": 2.0413080895008604,
"grad_norm": 0.6971897482872009,
"learning_rate": 7.956567796610171e-06,
"loss": 0.0895,
"step": 1929
},
{
"epoch": 2.042367271282934,
"grad_norm": 0.8651426434516907,
"learning_rate": 7.955508474576272e-06,
"loss": 0.0871,
"step": 1930
},
{
"epoch": 2.0434264530650075,
"grad_norm": 0.2681843936443329,
"learning_rate": 7.954449152542374e-06,
"loss": 0.0891,
"step": 1931
},
{
"epoch": 2.0444856348470806,
"grad_norm": 0.27684885263442993,
"learning_rate": 7.953389830508475e-06,
"loss": 0.0859,
"step": 1932
},
{
"epoch": 2.045544816629154,
"grad_norm": 0.2875300347805023,
"learning_rate": 7.952330508474577e-06,
"loss": 0.0904,
"step": 1933
},
{
"epoch": 2.046603998411227,
"grad_norm": 0.5383615493774414,
"learning_rate": 7.951271186440678e-06,
"loss": 0.0873,
"step": 1934
},
{
"epoch": 2.0476631801933007,
"grad_norm": 0.2454068958759308,
"learning_rate": 7.950211864406781e-06,
"loss": 0.0883,
"step": 1935
},
{
"epoch": 2.048722361975374,
"grad_norm": 0.29819706082344055,
"learning_rate": 7.949152542372883e-06,
"loss": 0.088,
"step": 1936
},
{
"epoch": 2.0497815437574474,
"grad_norm": 0.3372708261013031,
"learning_rate": 7.948093220338984e-06,
"loss": 0.0889,
"step": 1937
},
{
"epoch": 2.050840725539521,
"grad_norm": 0.39405205845832825,
"learning_rate": 7.947033898305085e-06,
"loss": 0.0906,
"step": 1938
},
{
"epoch": 2.051899907321594,
"grad_norm": 0.3588440418243408,
"learning_rate": 7.945974576271187e-06,
"loss": 0.0864,
"step": 1939
},
{
"epoch": 2.0529590891036675,
"grad_norm": 0.3285020589828491,
"learning_rate": 7.944915254237288e-06,
"loss": 0.0889,
"step": 1940
},
{
"epoch": 2.0540182708857406,
"grad_norm": 1.0305137634277344,
"learning_rate": 7.94385593220339e-06,
"loss": 0.0886,
"step": 1941
},
{
"epoch": 2.055077452667814,
"grad_norm": 0.6089313626289368,
"learning_rate": 7.942796610169493e-06,
"loss": 0.0904,
"step": 1942
},
{
"epoch": 2.0561366344498873,
"grad_norm": 0.23426276445388794,
"learning_rate": 7.941737288135594e-06,
"loss": 0.0853,
"step": 1943
},
{
"epoch": 2.057195816231961,
"grad_norm": 0.28794458508491516,
"learning_rate": 7.940677966101696e-06,
"loss": 0.0847,
"step": 1944
},
{
"epoch": 2.0582549980140343,
"grad_norm": 0.435823529958725,
"learning_rate": 7.939618644067797e-06,
"loss": 0.0905,
"step": 1945
},
{
"epoch": 2.0593141797961074,
"grad_norm": 0.5630863904953003,
"learning_rate": 7.938559322033898e-06,
"loss": 0.0896,
"step": 1946
},
{
"epoch": 2.060373361578181,
"grad_norm": 0.24650096893310547,
"learning_rate": 7.9375e-06,
"loss": 0.0888,
"step": 1947
},
{
"epoch": 2.061432543360254,
"grad_norm": 0.2687147259712219,
"learning_rate": 7.936440677966101e-06,
"loss": 0.0888,
"step": 1948
},
{
"epoch": 2.0624917251423276,
"grad_norm": 0.36030933260917664,
"learning_rate": 7.935381355932204e-06,
"loss": 0.0871,
"step": 1949
},
{
"epoch": 2.0635509069244007,
"grad_norm": 0.5701349973678589,
"learning_rate": 7.934322033898306e-06,
"loss": 0.09,
"step": 1950
},
{
"epoch": 2.0646100887064742,
"grad_norm": 0.8344652652740479,
"learning_rate": 7.933262711864407e-06,
"loss": 0.0883,
"step": 1951
},
{
"epoch": 2.0656692704885478,
"grad_norm": 0.4540218114852905,
"learning_rate": 7.93220338983051e-06,
"loss": 0.0856,
"step": 1952
},
{
"epoch": 2.066728452270621,
"grad_norm": 0.20542527735233307,
"learning_rate": 7.931144067796612e-06,
"loss": 0.0852,
"step": 1953
},
{
"epoch": 2.0677876340526944,
"grad_norm": 0.6437293887138367,
"learning_rate": 7.930084745762713e-06,
"loss": 0.0927,
"step": 1954
},
{
"epoch": 2.0688468158347675,
"grad_norm": 0.33412033319473267,
"learning_rate": 7.929025423728814e-06,
"loss": 0.0867,
"step": 1955
},
{
"epoch": 2.069905997616841,
"grad_norm": 0.4800800681114197,
"learning_rate": 7.927966101694916e-06,
"loss": 0.0899,
"step": 1956
},
{
"epoch": 2.0709651793989146,
"grad_norm": 0.6857233047485352,
"learning_rate": 7.926906779661017e-06,
"loss": 0.0891,
"step": 1957
},
{
"epoch": 2.0720243611809877,
"grad_norm": 0.7694417834281921,
"learning_rate": 7.925847457627119e-06,
"loss": 0.0855,
"step": 1958
},
{
"epoch": 2.073083542963061,
"grad_norm": 0.3634096086025238,
"learning_rate": 7.924788135593222e-06,
"loss": 0.0877,
"step": 1959
},
{
"epoch": 2.0741427247451343,
"grad_norm": 0.38182634115219116,
"learning_rate": 7.923728813559323e-06,
"loss": 0.089,
"step": 1960
},
{
"epoch": 2.075201906527208,
"grad_norm": 0.3368748724460602,
"learning_rate": 7.922669491525425e-06,
"loss": 0.0886,
"step": 1961
},
{
"epoch": 2.076261088309281,
"grad_norm": 0.5356863737106323,
"learning_rate": 7.921610169491526e-06,
"loss": 0.0882,
"step": 1962
},
{
"epoch": 2.0773202700913544,
"grad_norm": 0.5707106590270996,
"learning_rate": 7.920550847457627e-06,
"loss": 0.0881,
"step": 1963
},
{
"epoch": 2.078379451873428,
"grad_norm": 0.32848355174064636,
"learning_rate": 7.919491525423729e-06,
"loss": 0.0867,
"step": 1964
},
{
"epoch": 2.079438633655501,
"grad_norm": 0.24699488282203674,
"learning_rate": 7.91843220338983e-06,
"loss": 0.0917,
"step": 1965
},
{
"epoch": 2.0804978154375746,
"grad_norm": 0.422572523355484,
"learning_rate": 7.917372881355932e-06,
"loss": 0.089,
"step": 1966
},
{
"epoch": 2.0815569972196477,
"grad_norm": 0.4969422221183777,
"learning_rate": 7.916313559322035e-06,
"loss": 0.0908,
"step": 1967
},
{
"epoch": 2.0826161790017212,
"grad_norm": 0.25381964445114136,
"learning_rate": 7.915254237288136e-06,
"loss": 0.0852,
"step": 1968
},
{
"epoch": 2.0836753607837943,
"grad_norm": 0.35738474130630493,
"learning_rate": 7.914194915254238e-06,
"loss": 0.089,
"step": 1969
},
{
"epoch": 2.084734542565868,
"grad_norm": 0.7216671109199524,
"learning_rate": 7.913135593220339e-06,
"loss": 0.0896,
"step": 1970
},
{
"epoch": 2.0857937243479414,
"grad_norm": 0.20853260159492493,
"learning_rate": 7.912076271186442e-06,
"loss": 0.0896,
"step": 1971
},
{
"epoch": 2.0868529061300145,
"grad_norm": 0.2022741138935089,
"learning_rate": 7.911016949152544e-06,
"loss": 0.0847,
"step": 1972
},
{
"epoch": 2.087912087912088,
"grad_norm": 0.857926607131958,
"learning_rate": 7.909957627118645e-06,
"loss": 0.084,
"step": 1973
},
{
"epoch": 2.088971269694161,
"grad_norm": 0.9053319096565247,
"learning_rate": 7.908898305084746e-06,
"loss": 0.0871,
"step": 1974
},
{
"epoch": 2.0900304514762347,
"grad_norm": 0.21877585351467133,
"learning_rate": 7.907838983050848e-06,
"loss": 0.084,
"step": 1975
},
{
"epoch": 2.0910896332583078,
"grad_norm": 0.831366777420044,
"learning_rate": 7.906779661016951e-06,
"loss": 0.0879,
"step": 1976
},
{
"epoch": 2.0921488150403813,
"grad_norm": 0.595077633857727,
"learning_rate": 7.905720338983052e-06,
"loss": 0.0916,
"step": 1977
},
{
"epoch": 2.093207996822455,
"grad_norm": 0.31096351146698,
"learning_rate": 7.904661016949154e-06,
"loss": 0.089,
"step": 1978
},
{
"epoch": 2.094267178604528,
"grad_norm": 0.30939215421676636,
"learning_rate": 7.903601694915255e-06,
"loss": 0.0923,
"step": 1979
},
{
"epoch": 2.0953263603866015,
"grad_norm": 0.5221177935600281,
"learning_rate": 7.902542372881357e-06,
"loss": 0.0857,
"step": 1980
},
{
"epoch": 2.0963855421686746,
"grad_norm": 0.5102083683013916,
"learning_rate": 7.901483050847458e-06,
"loss": 0.0922,
"step": 1981
},
{
"epoch": 2.097444723950748,
"grad_norm": 0.2757764160633087,
"learning_rate": 7.90042372881356e-06,
"loss": 0.0915,
"step": 1982
},
{
"epoch": 2.098503905732821,
"grad_norm": 0.39589911699295044,
"learning_rate": 7.89936440677966e-06,
"loss": 0.0868,
"step": 1983
},
{
"epoch": 2.0995630875148947,
"grad_norm": 0.3275192677974701,
"learning_rate": 7.898305084745764e-06,
"loss": 0.0885,
"step": 1984
},
{
"epoch": 2.1006222692969683,
"grad_norm": 1.0052108764648438,
"learning_rate": 7.897245762711865e-06,
"loss": 0.0904,
"step": 1985
},
{
"epoch": 2.1016814510790414,
"grad_norm": 0.4538261890411377,
"learning_rate": 7.896186440677967e-06,
"loss": 0.0882,
"step": 1986
},
{
"epoch": 2.102740632861115,
"grad_norm": 0.4319491982460022,
"learning_rate": 7.895127118644068e-06,
"loss": 0.0855,
"step": 1987
},
{
"epoch": 2.103799814643188,
"grad_norm": 0.2973875105381012,
"learning_rate": 7.89406779661017e-06,
"loss": 0.089,
"step": 1988
},
{
"epoch": 2.1048589964252615,
"grad_norm": 0.2854253649711609,
"learning_rate": 7.893008474576271e-06,
"loss": 0.0928,
"step": 1989
},
{
"epoch": 2.1059181782073346,
"grad_norm": 0.36707934737205505,
"learning_rate": 7.891949152542372e-06,
"loss": 0.09,
"step": 1990
},
{
"epoch": 2.106977359989408,
"grad_norm": 0.3122307360172272,
"learning_rate": 7.890889830508475e-06,
"loss": 0.0882,
"step": 1991
},
{
"epoch": 2.1080365417714817,
"grad_norm": 0.6345651149749756,
"learning_rate": 7.889830508474577e-06,
"loss": 0.0884,
"step": 1992
},
{
"epoch": 2.109095723553555,
"grad_norm": 0.660128653049469,
"learning_rate": 7.888771186440678e-06,
"loss": 0.0886,
"step": 1993
},
{
"epoch": 2.1101549053356283,
"grad_norm": 1.1793729066848755,
"learning_rate": 7.887711864406781e-06,
"loss": 0.088,
"step": 1994
},
{
"epoch": 2.1112140871177014,
"grad_norm": 0.2564319968223572,
"learning_rate": 7.886652542372883e-06,
"loss": 0.0889,
"step": 1995
},
{
"epoch": 2.112273268899775,
"grad_norm": 0.3774588108062744,
"learning_rate": 7.885593220338984e-06,
"loss": 0.0902,
"step": 1996
},
{
"epoch": 2.113332450681848,
"grad_norm": 0.26271554827690125,
"learning_rate": 7.884533898305086e-06,
"loss": 0.088,
"step": 1997
},
{
"epoch": 2.1143916324639216,
"grad_norm": 0.2500177025794983,
"learning_rate": 7.883474576271187e-06,
"loss": 0.0878,
"step": 1998
},
{
"epoch": 2.115450814245995,
"grad_norm": 0.4386039972305298,
"learning_rate": 7.882415254237288e-06,
"loss": 0.0849,
"step": 1999
},
{
"epoch": 2.116509996028068,
"grad_norm": 0.3402063846588135,
"learning_rate": 7.88135593220339e-06,
"loss": 0.0896,
"step": 2000
},
{
"epoch": 2.1175691778101418,
"grad_norm": 0.5039743185043335,
"learning_rate": 7.880296610169493e-06,
"loss": 0.0895,
"step": 2001
},
{
"epoch": 2.118628359592215,
"grad_norm": 0.8807342052459717,
"learning_rate": 7.879237288135594e-06,
"loss": 0.0863,
"step": 2002
},
{
"epoch": 2.1196875413742884,
"grad_norm": 0.3534659445285797,
"learning_rate": 7.878177966101696e-06,
"loss": 0.0894,
"step": 2003
},
{
"epoch": 2.120746723156362,
"grad_norm": 0.23499086499214172,
"learning_rate": 7.877118644067797e-06,
"loss": 0.0865,
"step": 2004
},
{
"epoch": 2.121805904938435,
"grad_norm": 0.23860704898834229,
"learning_rate": 7.876059322033899e-06,
"loss": 0.0842,
"step": 2005
},
{
"epoch": 2.1228650867205086,
"grad_norm": 0.25318294763565063,
"learning_rate": 7.875e-06,
"loss": 0.0847,
"step": 2006
},
{
"epoch": 2.1239242685025816,
"grad_norm": 0.22246617078781128,
"learning_rate": 7.873940677966101e-06,
"loss": 0.0886,
"step": 2007
},
{
"epoch": 2.124983450284655,
"grad_norm": 0.5180307030677795,
"learning_rate": 7.872881355932205e-06,
"loss": 0.0897,
"step": 2008
},
{
"epoch": 2.1260426320667283,
"grad_norm": 0.3671906292438507,
"learning_rate": 7.871822033898306e-06,
"loss": 0.0899,
"step": 2009
},
{
"epoch": 2.127101813848802,
"grad_norm": 0.31801825761795044,
"learning_rate": 7.870762711864407e-06,
"loss": 0.0876,
"step": 2010
},
{
"epoch": 2.1281609956308754,
"grad_norm": 0.3843144178390503,
"learning_rate": 7.869703389830509e-06,
"loss": 0.0898,
"step": 2011
},
{
"epoch": 2.1292201774129484,
"grad_norm": 0.3329516053199768,
"learning_rate": 7.86864406779661e-06,
"loss": 0.0888,
"step": 2012
},
{
"epoch": 2.130279359195022,
"grad_norm": 0.31800010800361633,
"learning_rate": 7.867584745762713e-06,
"loss": 0.0845,
"step": 2013
},
{
"epoch": 2.131338540977095,
"grad_norm": 0.43881750106811523,
"learning_rate": 7.866525423728815e-06,
"loss": 0.0837,
"step": 2014
},
{
"epoch": 2.1323977227591686,
"grad_norm": 0.7285757064819336,
"learning_rate": 7.865466101694916e-06,
"loss": 0.0882,
"step": 2015
},
{
"epoch": 2.1334569045412417,
"grad_norm": 0.304055392742157,
"learning_rate": 7.864406779661017e-06,
"loss": 0.0871,
"step": 2016
},
{
"epoch": 2.1345160863233152,
"grad_norm": 0.45109742879867554,
"learning_rate": 7.863347457627119e-06,
"loss": 0.0891,
"step": 2017
},
{
"epoch": 2.135575268105389,
"grad_norm": 0.9246112704277039,
"learning_rate": 7.862288135593222e-06,
"loss": 0.0898,
"step": 2018
},
{
"epoch": 2.136634449887462,
"grad_norm": 0.37992846965789795,
"learning_rate": 7.861228813559323e-06,
"loss": 0.0882,
"step": 2019
},
{
"epoch": 2.1376936316695354,
"grad_norm": 0.22593331336975098,
"learning_rate": 7.860169491525425e-06,
"loss": 0.0843,
"step": 2020
},
{
"epoch": 2.1387528134516085,
"grad_norm": 0.569980800151825,
"learning_rate": 7.859110169491526e-06,
"loss": 0.0885,
"step": 2021
},
{
"epoch": 2.139811995233682,
"grad_norm": 0.24157559871673584,
"learning_rate": 7.858050847457628e-06,
"loss": 0.0844,
"step": 2022
},
{
"epoch": 2.140871177015755,
"grad_norm": 0.26660817861557007,
"learning_rate": 7.856991525423729e-06,
"loss": 0.0893,
"step": 2023
},
{
"epoch": 2.1419303587978287,
"grad_norm": 0.3377159535884857,
"learning_rate": 7.85593220338983e-06,
"loss": 0.086,
"step": 2024
},
{
"epoch": 2.142989540579902,
"grad_norm": 0.46858081221580505,
"learning_rate": 7.854872881355934e-06,
"loss": 0.0884,
"step": 2025
},
{
"epoch": 2.1440487223619753,
"grad_norm": 0.4171918034553528,
"learning_rate": 7.853813559322035e-06,
"loss": 0.0893,
"step": 2026
},
{
"epoch": 2.145107904144049,
"grad_norm": 0.9166372418403625,
"learning_rate": 7.852754237288136e-06,
"loss": 0.0879,
"step": 2027
},
{
"epoch": 2.146167085926122,
"grad_norm": 0.2570573389530182,
"learning_rate": 7.851694915254238e-06,
"loss": 0.0845,
"step": 2028
},
{
"epoch": 2.1472262677081955,
"grad_norm": 0.437513142824173,
"learning_rate": 7.85063559322034e-06,
"loss": 0.0885,
"step": 2029
},
{
"epoch": 2.1482854494902686,
"grad_norm": 0.1894887387752533,
"learning_rate": 7.84957627118644e-06,
"loss": 0.0873,
"step": 2030
},
{
"epoch": 2.149344631272342,
"grad_norm": 0.20361056923866272,
"learning_rate": 7.848516949152542e-06,
"loss": 0.0857,
"step": 2031
},
{
"epoch": 2.1504038130544156,
"grad_norm": 0.2676304876804352,
"learning_rate": 7.847457627118643e-06,
"loss": 0.0868,
"step": 2032
},
{
"epoch": 2.1514629948364887,
"grad_norm": 0.2386782020330429,
"learning_rate": 7.846398305084747e-06,
"loss": 0.0854,
"step": 2033
},
{
"epoch": 2.1525221766185623,
"grad_norm": 0.35165345668792725,
"learning_rate": 7.845338983050848e-06,
"loss": 0.0868,
"step": 2034
},
{
"epoch": 2.1535813584006354,
"grad_norm": 0.3289099335670471,
"learning_rate": 7.844279661016951e-06,
"loss": 0.0873,
"step": 2035
},
{
"epoch": 2.154640540182709,
"grad_norm": 1.0715082883834839,
"learning_rate": 7.843220338983052e-06,
"loss": 0.0857,
"step": 2036
},
{
"epoch": 2.1556997219647824,
"grad_norm": 0.19499650597572327,
"learning_rate": 7.842161016949154e-06,
"loss": 0.0897,
"step": 2037
},
{
"epoch": 2.1567589037468555,
"grad_norm": 0.2263382226228714,
"learning_rate": 7.841101694915255e-06,
"loss": 0.0903,
"step": 2038
},
{
"epoch": 2.157818085528929,
"grad_norm": 0.2175901234149933,
"learning_rate": 7.840042372881357e-06,
"loss": 0.087,
"step": 2039
},
{
"epoch": 2.158877267311002,
"grad_norm": 0.6341164112091064,
"learning_rate": 7.838983050847458e-06,
"loss": 0.0896,
"step": 2040
},
{
"epoch": 2.1599364490930757,
"grad_norm": 0.3150041997432709,
"learning_rate": 7.83792372881356e-06,
"loss": 0.0841,
"step": 2041
},
{
"epoch": 2.160995630875149,
"grad_norm": 0.2422228902578354,
"learning_rate": 7.836864406779661e-06,
"loss": 0.0829,
"step": 2042
},
{
"epoch": 2.1620548126572223,
"grad_norm": 0.20333003997802734,
"learning_rate": 7.835805084745764e-06,
"loss": 0.0868,
"step": 2043
},
{
"epoch": 2.1631139944392954,
"grad_norm": 0.2158234864473343,
"learning_rate": 7.834745762711865e-06,
"loss": 0.0826,
"step": 2044
},
{
"epoch": 2.164173176221369,
"grad_norm": 0.3346843421459198,
"learning_rate": 7.833686440677967e-06,
"loss": 0.0887,
"step": 2045
},
{
"epoch": 2.1652323580034425,
"grad_norm": 0.2267426997423172,
"learning_rate": 7.832627118644068e-06,
"loss": 0.085,
"step": 2046
},
{
"epoch": 2.1662915397855156,
"grad_norm": 0.3703182339668274,
"learning_rate": 7.83156779661017e-06,
"loss": 0.0865,
"step": 2047
},
{
"epoch": 2.167350721567589,
"grad_norm": 0.5824213027954102,
"learning_rate": 7.830508474576271e-06,
"loss": 0.0885,
"step": 2048
},
{
"epoch": 2.168409903349662,
"grad_norm": 0.7234041690826416,
"learning_rate": 7.829449152542373e-06,
"loss": 0.0859,
"step": 2049
},
{
"epoch": 2.1694690851317358,
"grad_norm": 0.4590138792991638,
"learning_rate": 7.828389830508476e-06,
"loss": 0.0875,
"step": 2050
},
{
"epoch": 2.1705282669138093,
"grad_norm": 0.22016145288944244,
"learning_rate": 7.827330508474577e-06,
"loss": 0.0859,
"step": 2051
},
{
"epoch": 2.1715874486958824,
"grad_norm": 0.19176195561885834,
"learning_rate": 7.826271186440678e-06,
"loss": 0.0851,
"step": 2052
},
{
"epoch": 2.172646630477956,
"grad_norm": 0.2171255648136139,
"learning_rate": 7.82521186440678e-06,
"loss": 0.0856,
"step": 2053
},
{
"epoch": 2.173705812260029,
"grad_norm": 0.21707554161548615,
"learning_rate": 7.824152542372881e-06,
"loss": 0.0855,
"step": 2054
},
{
"epoch": 2.1747649940421026,
"grad_norm": 0.1986992061138153,
"learning_rate": 7.823093220338984e-06,
"loss": 0.0878,
"step": 2055
},
{
"epoch": 2.1758241758241756,
"grad_norm": 0.52188640832901,
"learning_rate": 7.822033898305086e-06,
"loss": 0.0882,
"step": 2056
},
{
"epoch": 2.176883357606249,
"grad_norm": 0.39829766750335693,
"learning_rate": 7.820974576271187e-06,
"loss": 0.0883,
"step": 2057
},
{
"epoch": 2.1779425393883227,
"grad_norm": 0.6201052069664001,
"learning_rate": 7.819915254237289e-06,
"loss": 0.0898,
"step": 2058
},
{
"epoch": 2.179001721170396,
"grad_norm": 0.237601175904274,
"learning_rate": 7.81885593220339e-06,
"loss": 0.0888,
"step": 2059
},
{
"epoch": 2.1800609029524693,
"grad_norm": 0.22507217526435852,
"learning_rate": 7.817796610169493e-06,
"loss": 0.085,
"step": 2060
},
{
"epoch": 2.1811200847345424,
"grad_norm": 0.51198810338974,
"learning_rate": 7.816737288135595e-06,
"loss": 0.0908,
"step": 2061
},
{
"epoch": 2.182179266516616,
"grad_norm": 0.24880242347717285,
"learning_rate": 7.815677966101696e-06,
"loss": 0.088,
"step": 2062
},
{
"epoch": 2.183238448298689,
"grad_norm": 0.6522811651229858,
"learning_rate": 7.814618644067797e-06,
"loss": 0.0905,
"step": 2063
},
{
"epoch": 2.1842976300807626,
"grad_norm": 0.259032666683197,
"learning_rate": 7.813559322033899e-06,
"loss": 0.09,
"step": 2064
},
{
"epoch": 2.185356811862836,
"grad_norm": 0.5633755326271057,
"learning_rate": 7.8125e-06,
"loss": 0.0882,
"step": 2065
},
{
"epoch": 2.1864159936449092,
"grad_norm": 0.2784501016139984,
"learning_rate": 7.811440677966102e-06,
"loss": 0.0841,
"step": 2066
},
{
"epoch": 2.1874751754269828,
"grad_norm": 0.2742295265197754,
"learning_rate": 7.810381355932205e-06,
"loss": 0.0851,
"step": 2067
},
{
"epoch": 2.188534357209056,
"grad_norm": 0.18370847404003143,
"learning_rate": 7.809322033898306e-06,
"loss": 0.0871,
"step": 2068
},
{
"epoch": 2.1895935389911294,
"grad_norm": 0.29191604256629944,
"learning_rate": 7.808262711864407e-06,
"loss": 0.0864,
"step": 2069
},
{
"epoch": 2.1906527207732025,
"grad_norm": 0.2865682542324066,
"learning_rate": 7.807203389830509e-06,
"loss": 0.0862,
"step": 2070
},
{
"epoch": 2.191711902555276,
"grad_norm": 0.2612156867980957,
"learning_rate": 7.80614406779661e-06,
"loss": 0.0887,
"step": 2071
},
{
"epoch": 2.1927710843373496,
"grad_norm": 0.8060826063156128,
"learning_rate": 7.805084745762712e-06,
"loss": 0.0861,
"step": 2072
},
{
"epoch": 2.1938302661194227,
"grad_norm": 0.2059231549501419,
"learning_rate": 7.804025423728813e-06,
"loss": 0.0878,
"step": 2073
},
{
"epoch": 2.194889447901496,
"grad_norm": 0.21191243827342987,
"learning_rate": 7.802966101694916e-06,
"loss": 0.0864,
"step": 2074
},
{
"epoch": 2.1959486296835693,
"grad_norm": 0.2840358316898346,
"learning_rate": 7.801906779661018e-06,
"loss": 0.0872,
"step": 2075
},
{
"epoch": 2.197007811465643,
"grad_norm": 0.25956588983535767,
"learning_rate": 7.800847457627119e-06,
"loss": 0.0863,
"step": 2076
},
{
"epoch": 2.198066993247716,
"grad_norm": 0.197649285197258,
"learning_rate": 7.799788135593222e-06,
"loss": 0.085,
"step": 2077
},
{
"epoch": 2.1991261750297895,
"grad_norm": 0.2957169711589813,
"learning_rate": 7.798728813559324e-06,
"loss": 0.0859,
"step": 2078
},
{
"epoch": 2.200185356811863,
"grad_norm": 0.2329864650964737,
"learning_rate": 7.797669491525425e-06,
"loss": 0.0861,
"step": 2079
},
{
"epoch": 2.201244538593936,
"grad_norm": 0.19923458993434906,
"learning_rate": 7.796610169491526e-06,
"loss": 0.0848,
"step": 2080
},
{
"epoch": 2.2023037203760096,
"grad_norm": 0.6433698534965515,
"learning_rate": 7.795550847457628e-06,
"loss": 0.0862,
"step": 2081
},
{
"epoch": 2.2033629021580827,
"grad_norm": 0.507807731628418,
"learning_rate": 7.79449152542373e-06,
"loss": 0.088,
"step": 2082
},
{
"epoch": 2.2044220839401563,
"grad_norm": 0.2583516538143158,
"learning_rate": 7.79343220338983e-06,
"loss": 0.085,
"step": 2083
},
{
"epoch": 2.20548126572223,
"grad_norm": 1.012139081954956,
"learning_rate": 7.792372881355934e-06,
"loss": 0.0847,
"step": 2084
},
{
"epoch": 2.206540447504303,
"grad_norm": 0.6931725144386292,
"learning_rate": 7.791313559322035e-06,
"loss": 0.0884,
"step": 2085
},
{
"epoch": 2.2075996292863764,
"grad_norm": 0.22996462881565094,
"learning_rate": 7.790254237288137e-06,
"loss": 0.0852,
"step": 2086
},
{
"epoch": 2.2086588110684495,
"grad_norm": 0.2905077338218689,
"learning_rate": 7.789194915254238e-06,
"loss": 0.0845,
"step": 2087
},
{
"epoch": 2.209717992850523,
"grad_norm": 0.22558225691318512,
"learning_rate": 7.78813559322034e-06,
"loss": 0.0896,
"step": 2088
},
{
"epoch": 2.210777174632596,
"grad_norm": 0.291515052318573,
"learning_rate": 7.78707627118644e-06,
"loss": 0.0854,
"step": 2089
},
{
"epoch": 2.2118363564146697,
"grad_norm": 0.20197857916355133,
"learning_rate": 7.786016949152542e-06,
"loss": 0.0885,
"step": 2090
},
{
"epoch": 2.212895538196743,
"grad_norm": 0.6940252780914307,
"learning_rate": 7.784957627118644e-06,
"loss": 0.0895,
"step": 2091
},
{
"epoch": 2.2139547199788163,
"grad_norm": 0.2460675984621048,
"learning_rate": 7.783898305084747e-06,
"loss": 0.0857,
"step": 2092
},
{
"epoch": 2.21501390176089,
"grad_norm": 0.2917913794517517,
"learning_rate": 7.782838983050848e-06,
"loss": 0.0818,
"step": 2093
},
{
"epoch": 2.216073083542963,
"grad_norm": 0.21613304316997528,
"learning_rate": 7.78177966101695e-06,
"loss": 0.0866,
"step": 2094
},
{
"epoch": 2.2171322653250365,
"grad_norm": 0.25949186086654663,
"learning_rate": 7.780720338983051e-06,
"loss": 0.0865,
"step": 2095
},
{
"epoch": 2.2181914471071096,
"grad_norm": 0.3771454393863678,
"learning_rate": 7.779661016949152e-06,
"loss": 0.0918,
"step": 2096
},
{
"epoch": 2.219250628889183,
"grad_norm": 0.2963436245918274,
"learning_rate": 7.778601694915255e-06,
"loss": 0.0833,
"step": 2097
},
{
"epoch": 2.2203098106712567,
"grad_norm": 0.27688950300216675,
"learning_rate": 7.777542372881357e-06,
"loss": 0.0852,
"step": 2098
},
{
"epoch": 2.2213689924533297,
"grad_norm": 0.3003609776496887,
"learning_rate": 7.776483050847458e-06,
"loss": 0.0876,
"step": 2099
},
{
"epoch": 2.2224281742354033,
"grad_norm": 0.368274450302124,
"learning_rate": 7.77542372881356e-06,
"loss": 0.0872,
"step": 2100
},
{
"epoch": 2.2234873560174764,
"grad_norm": 0.7609259486198425,
"learning_rate": 7.774364406779663e-06,
"loss": 0.085,
"step": 2101
},
{
"epoch": 2.22454653779955,
"grad_norm": 0.23514187335968018,
"learning_rate": 7.773305084745764e-06,
"loss": 0.087,
"step": 2102
},
{
"epoch": 2.225605719581623,
"grad_norm": 1.0117790699005127,
"learning_rate": 7.772245762711866e-06,
"loss": 0.0891,
"step": 2103
},
{
"epoch": 2.2266649013636965,
"grad_norm": 0.2696327567100525,
"learning_rate": 7.771186440677967e-06,
"loss": 0.0859,
"step": 2104
},
{
"epoch": 2.22772408314577,
"grad_norm": 0.24088266491889954,
"learning_rate": 7.770127118644068e-06,
"loss": 0.0863,
"step": 2105
},
{
"epoch": 2.228783264927843,
"grad_norm": 0.3361911475658417,
"learning_rate": 7.76906779661017e-06,
"loss": 0.0882,
"step": 2106
},
{
"epoch": 2.2298424467099167,
"grad_norm": 0.28318047523498535,
"learning_rate": 7.768008474576271e-06,
"loss": 0.084,
"step": 2107
},
{
"epoch": 2.23090162849199,
"grad_norm": 0.27443477511405945,
"learning_rate": 7.766949152542373e-06,
"loss": 0.0858,
"step": 2108
},
{
"epoch": 2.2319608102740633,
"grad_norm": 0.382974773645401,
"learning_rate": 7.765889830508476e-06,
"loss": 0.0851,
"step": 2109
},
{
"epoch": 2.2330199920561364,
"grad_norm": 0.5474820137023926,
"learning_rate": 7.764830508474577e-06,
"loss": 0.087,
"step": 2110
},
{
"epoch": 2.23407917383821,
"grad_norm": 0.6709517240524292,
"learning_rate": 7.763771186440679e-06,
"loss": 0.0864,
"step": 2111
},
{
"epoch": 2.2351383556202835,
"grad_norm": 0.4770525097846985,
"learning_rate": 7.76271186440678e-06,
"loss": 0.0867,
"step": 2112
},
{
"epoch": 2.2361975374023566,
"grad_norm": 1.351270318031311,
"learning_rate": 7.761652542372881e-06,
"loss": 0.0854,
"step": 2113
},
{
"epoch": 2.23725671918443,
"grad_norm": 0.26051637530326843,
"learning_rate": 7.760593220338983e-06,
"loss": 0.0875,
"step": 2114
},
{
"epoch": 2.2383159009665032,
"grad_norm": 0.4052681028842926,
"learning_rate": 7.759533898305084e-06,
"loss": 0.0872,
"step": 2115
},
{
"epoch": 2.2393750827485768,
"grad_norm": 0.23296624422073364,
"learning_rate": 7.758474576271187e-06,
"loss": 0.0848,
"step": 2116
},
{
"epoch": 2.24043426453065,
"grad_norm": 0.31752628087997437,
"learning_rate": 7.757415254237289e-06,
"loss": 0.0896,
"step": 2117
},
{
"epoch": 2.2414934463127234,
"grad_norm": 0.24866719543933868,
"learning_rate": 7.75635593220339e-06,
"loss": 0.0873,
"step": 2118
},
{
"epoch": 2.242552628094797,
"grad_norm": 0.17599520087242126,
"learning_rate": 7.755296610169493e-06,
"loss": 0.0885,
"step": 2119
},
{
"epoch": 2.24361180987687,
"grad_norm": 1.016655445098877,
"learning_rate": 7.754237288135595e-06,
"loss": 0.0859,
"step": 2120
},
{
"epoch": 2.2446709916589436,
"grad_norm": 0.2759738266468048,
"learning_rate": 7.753177966101696e-06,
"loss": 0.0897,
"step": 2121
},
{
"epoch": 2.2457301734410167,
"grad_norm": 0.2551572918891907,
"learning_rate": 7.752118644067798e-06,
"loss": 0.0829,
"step": 2122
},
{
"epoch": 2.24678935522309,
"grad_norm": 0.7468996047973633,
"learning_rate": 7.751059322033899e-06,
"loss": 0.0834,
"step": 2123
},
{
"epoch": 2.2478485370051633,
"grad_norm": 0.46414947509765625,
"learning_rate": 7.75e-06,
"loss": 0.0875,
"step": 2124
},
{
"epoch": 2.248907718787237,
"grad_norm": 0.6280553340911865,
"learning_rate": 7.748940677966102e-06,
"loss": 0.0859,
"step": 2125
},
{
"epoch": 2.2499669005693104,
"grad_norm": 0.22918131947517395,
"learning_rate": 7.747881355932205e-06,
"loss": 0.0905,
"step": 2126
},
{
"epoch": 2.2510260823513835,
"grad_norm": 0.5105732679367065,
"learning_rate": 7.746822033898306e-06,
"loss": 0.0827,
"step": 2127
},
{
"epoch": 2.252085264133457,
"grad_norm": 0.44134438037872314,
"learning_rate": 7.745762711864408e-06,
"loss": 0.0833,
"step": 2128
},
{
"epoch": 2.25314444591553,
"grad_norm": 0.6763642430305481,
"learning_rate": 7.744703389830509e-06,
"loss": 0.0835,
"step": 2129
},
{
"epoch": 2.2542036276976036,
"grad_norm": 0.35161060094833374,
"learning_rate": 7.74364406779661e-06,
"loss": 0.0884,
"step": 2130
},
{
"epoch": 2.255262809479677,
"grad_norm": 0.2552676200866699,
"learning_rate": 7.742584745762712e-06,
"loss": 0.0894,
"step": 2131
},
{
"epoch": 2.2563219912617503,
"grad_norm": 0.30027422308921814,
"learning_rate": 7.741525423728813e-06,
"loss": 0.0877,
"step": 2132
},
{
"epoch": 2.257381173043824,
"grad_norm": 0.28334829211235046,
"learning_rate": 7.740466101694916e-06,
"loss": 0.0889,
"step": 2133
},
{
"epoch": 2.258440354825897,
"grad_norm": 0.342746764421463,
"learning_rate": 7.739406779661018e-06,
"loss": 0.0843,
"step": 2134
},
{
"epoch": 2.2594995366079704,
"grad_norm": 0.5293981432914734,
"learning_rate": 7.73834745762712e-06,
"loss": 0.0862,
"step": 2135
},
{
"epoch": 2.2605587183900435,
"grad_norm": 0.3510816693305969,
"learning_rate": 7.73728813559322e-06,
"loss": 0.0866,
"step": 2136
},
{
"epoch": 2.261617900172117,
"grad_norm": 0.16086283326148987,
"learning_rate": 7.736228813559322e-06,
"loss": 0.0875,
"step": 2137
},
{
"epoch": 2.26267708195419,
"grad_norm": 0.19258259236812592,
"learning_rate": 7.735169491525423e-06,
"loss": 0.0825,
"step": 2138
},
{
"epoch": 2.2637362637362637,
"grad_norm": 0.2791202664375305,
"learning_rate": 7.734110169491527e-06,
"loss": 0.0833,
"step": 2139
},
{
"epoch": 2.2647954455183372,
"grad_norm": 1.3526684045791626,
"learning_rate": 7.733050847457628e-06,
"loss": 0.0876,
"step": 2140
},
{
"epoch": 2.2658546273004103,
"grad_norm": 0.3024195432662964,
"learning_rate": 7.73199152542373e-06,
"loss": 0.0854,
"step": 2141
},
{
"epoch": 2.266913809082484,
"grad_norm": 0.8857383728027344,
"learning_rate": 7.73093220338983e-06,
"loss": 0.0857,
"step": 2142
},
{
"epoch": 2.267972990864557,
"grad_norm": 0.25719326734542847,
"learning_rate": 7.729872881355934e-06,
"loss": 0.0896,
"step": 2143
},
{
"epoch": 2.2690321726466305,
"grad_norm": 0.21778137981891632,
"learning_rate": 7.728813559322035e-06,
"loss": 0.0869,
"step": 2144
},
{
"epoch": 2.270091354428704,
"grad_norm": 0.7866823077201843,
"learning_rate": 7.727754237288137e-06,
"loss": 0.0861,
"step": 2145
},
{
"epoch": 2.271150536210777,
"grad_norm": 0.27548232674598694,
"learning_rate": 7.726694915254238e-06,
"loss": 0.0857,
"step": 2146
},
{
"epoch": 2.2722097179928507,
"grad_norm": 0.26298612356185913,
"learning_rate": 7.72563559322034e-06,
"loss": 0.0865,
"step": 2147
},
{
"epoch": 2.2732688997749237,
"grad_norm": 0.24503065645694733,
"learning_rate": 7.724576271186441e-06,
"loss": 0.0867,
"step": 2148
},
{
"epoch": 2.2743280815569973,
"grad_norm": 0.2263982743024826,
"learning_rate": 7.723516949152542e-06,
"loss": 0.087,
"step": 2149
},
{
"epoch": 2.2753872633390704,
"grad_norm": 0.23325785994529724,
"learning_rate": 7.722457627118645e-06,
"loss": 0.0841,
"step": 2150
},
{
"epoch": 2.276446445121144,
"grad_norm": 0.6658560037612915,
"learning_rate": 7.721398305084747e-06,
"loss": 0.0929,
"step": 2151
},
{
"epoch": 2.2775056269032175,
"grad_norm": 1.4839017391204834,
"learning_rate": 7.720338983050848e-06,
"loss": 0.0915,
"step": 2152
},
{
"epoch": 2.2785648086852905,
"grad_norm": 0.28016865253448486,
"learning_rate": 7.71927966101695e-06,
"loss": 0.0825,
"step": 2153
},
{
"epoch": 2.279623990467364,
"grad_norm": 0.7911934852600098,
"learning_rate": 7.718220338983051e-06,
"loss": 0.0868,
"step": 2154
},
{
"epoch": 2.280683172249437,
"grad_norm": 0.5834576487541199,
"learning_rate": 7.717161016949153e-06,
"loss": 0.0884,
"step": 2155
},
{
"epoch": 2.2817423540315107,
"grad_norm": 0.3871059715747833,
"learning_rate": 7.716101694915254e-06,
"loss": 0.0871,
"step": 2156
},
{
"epoch": 2.282801535813584,
"grad_norm": 0.2939569652080536,
"learning_rate": 7.715042372881355e-06,
"loss": 0.0912,
"step": 2157
},
{
"epoch": 2.2838607175956573,
"grad_norm": 0.27009499073028564,
"learning_rate": 7.713983050847458e-06,
"loss": 0.0867,
"step": 2158
},
{
"epoch": 2.284919899377731,
"grad_norm": 0.4996664226055145,
"learning_rate": 7.71292372881356e-06,
"loss": 0.0844,
"step": 2159
},
{
"epoch": 2.285979081159804,
"grad_norm": 0.347398042678833,
"learning_rate": 7.711864406779663e-06,
"loss": 0.0826,
"step": 2160
},
{
"epoch": 2.2870382629418775,
"grad_norm": 0.7798426151275635,
"learning_rate": 7.710805084745764e-06,
"loss": 0.0864,
"step": 2161
},
{
"epoch": 2.2880974447239506,
"grad_norm": 0.5361624956130981,
"learning_rate": 7.709745762711866e-06,
"loss": 0.0836,
"step": 2162
},
{
"epoch": 2.289156626506024,
"grad_norm": 0.18656525015830994,
"learning_rate": 7.708686440677967e-06,
"loss": 0.0829,
"step": 2163
},
{
"epoch": 2.2902158082880977,
"grad_norm": 1.1615647077560425,
"learning_rate": 7.707627118644069e-06,
"loss": 0.0873,
"step": 2164
},
{
"epoch": 2.2912749900701708,
"grad_norm": 0.1994059979915619,
"learning_rate": 7.70656779661017e-06,
"loss": 0.0861,
"step": 2165
},
{
"epoch": 2.2923341718522443,
"grad_norm": 0.9500737190246582,
"learning_rate": 7.705508474576271e-06,
"loss": 0.0888,
"step": 2166
},
{
"epoch": 2.2933933536343174,
"grad_norm": 0.6159976124763489,
"learning_rate": 7.704449152542373e-06,
"loss": 0.0876,
"step": 2167
},
{
"epoch": 2.294452535416391,
"grad_norm": 0.23081451654434204,
"learning_rate": 7.703389830508476e-06,
"loss": 0.0866,
"step": 2168
},
{
"epoch": 2.295511717198464,
"grad_norm": 0.3230683505535126,
"learning_rate": 7.702330508474577e-06,
"loss": 0.0872,
"step": 2169
},
{
"epoch": 2.2965708989805376,
"grad_norm": 0.2588438391685486,
"learning_rate": 7.701271186440679e-06,
"loss": 0.0863,
"step": 2170
},
{
"epoch": 2.2976300807626107,
"grad_norm": 0.3278768062591553,
"learning_rate": 7.70021186440678e-06,
"loss": 0.0913,
"step": 2171
},
{
"epoch": 2.298689262544684,
"grad_norm": 0.7292188405990601,
"learning_rate": 7.699152542372882e-06,
"loss": 0.0895,
"step": 2172
},
{
"epoch": 2.2997484443267577,
"grad_norm": 0.7343543767929077,
"learning_rate": 7.698093220338983e-06,
"loss": 0.0872,
"step": 2173
},
{
"epoch": 2.300807626108831,
"grad_norm": 0.5069963932037354,
"learning_rate": 7.697033898305084e-06,
"loss": 0.0866,
"step": 2174
},
{
"epoch": 2.3018668078909044,
"grad_norm": 0.2616865932941437,
"learning_rate": 7.695974576271188e-06,
"loss": 0.0909,
"step": 2175
},
{
"epoch": 2.3029259896729775,
"grad_norm": 0.40990307927131653,
"learning_rate": 7.694915254237289e-06,
"loss": 0.0875,
"step": 2176
},
{
"epoch": 2.303985171455051,
"grad_norm": 0.3008265197277069,
"learning_rate": 7.69385593220339e-06,
"loss": 0.0894,
"step": 2177
},
{
"epoch": 2.3050443532371245,
"grad_norm": 0.5138524174690247,
"learning_rate": 7.692796610169492e-06,
"loss": 0.0873,
"step": 2178
},
{
"epoch": 2.3061035350191976,
"grad_norm": 1.9138803482055664,
"learning_rate": 7.691737288135593e-06,
"loss": 0.0898,
"step": 2179
},
{
"epoch": 2.307162716801271,
"grad_norm": 0.2680315375328064,
"learning_rate": 7.690677966101695e-06,
"loss": 0.0843,
"step": 2180
},
{
"epoch": 2.3082218985833443,
"grad_norm": 0.7578197121620178,
"learning_rate": 7.689618644067798e-06,
"loss": 0.0935,
"step": 2181
},
{
"epoch": 2.309281080365418,
"grad_norm": 0.49096474051475525,
"learning_rate": 7.688559322033899e-06,
"loss": 0.0893,
"step": 2182
},
{
"epoch": 2.310340262147491,
"grad_norm": 1.2972736358642578,
"learning_rate": 7.6875e-06,
"loss": 0.0869,
"step": 2183
},
{
"epoch": 2.3113994439295644,
"grad_norm": 0.7533348798751831,
"learning_rate": 7.686440677966102e-06,
"loss": 0.0861,
"step": 2184
},
{
"epoch": 2.3124586257116375,
"grad_norm": 0.31361818313598633,
"learning_rate": 7.685381355932205e-06,
"loss": 0.0871,
"step": 2185
},
{
"epoch": 2.313517807493711,
"grad_norm": 0.5364953875541687,
"learning_rate": 7.684322033898306e-06,
"loss": 0.0843,
"step": 2186
},
{
"epoch": 2.3145769892757846,
"grad_norm": 0.31093549728393555,
"learning_rate": 7.683262711864408e-06,
"loss": 0.0878,
"step": 2187
},
{
"epoch": 2.3156361710578577,
"grad_norm": 0.2939999997615814,
"learning_rate": 7.68220338983051e-06,
"loss": 0.0876,
"step": 2188
},
{
"epoch": 2.316695352839931,
"grad_norm": 0.2430151104927063,
"learning_rate": 7.68114406779661e-06,
"loss": 0.0865,
"step": 2189
},
{
"epoch": 2.3177545346220043,
"grad_norm": 0.33244743943214417,
"learning_rate": 7.680084745762712e-06,
"loss": 0.0895,
"step": 2190
},
{
"epoch": 2.318813716404078,
"grad_norm": 0.7019136548042297,
"learning_rate": 7.679025423728813e-06,
"loss": 0.0881,
"step": 2191
},
{
"epoch": 2.3198728981861514,
"grad_norm": 0.7489039301872253,
"learning_rate": 7.677966101694917e-06,
"loss": 0.0823,
"step": 2192
},
{
"epoch": 2.3209320799682245,
"grad_norm": 0.3986199200153351,
"learning_rate": 7.676906779661018e-06,
"loss": 0.0875,
"step": 2193
},
{
"epoch": 2.321991261750298,
"grad_norm": 0.3629228472709656,
"learning_rate": 7.67584745762712e-06,
"loss": 0.0884,
"step": 2194
},
{
"epoch": 2.323050443532371,
"grad_norm": 0.9728587865829468,
"learning_rate": 7.67478813559322e-06,
"loss": 0.0877,
"step": 2195
},
{
"epoch": 2.3241096253144446,
"grad_norm": 0.5270370841026306,
"learning_rate": 7.673728813559322e-06,
"loss": 0.0912,
"step": 2196
},
{
"epoch": 2.325168807096518,
"grad_norm": 0.4018361270427704,
"learning_rate": 7.672669491525424e-06,
"loss": 0.0885,
"step": 2197
},
{
"epoch": 2.3262279888785913,
"grad_norm": 0.5126635432243347,
"learning_rate": 7.671610169491525e-06,
"loss": 0.089,
"step": 2198
},
{
"epoch": 2.327287170660665,
"grad_norm": 0.25716903805732727,
"learning_rate": 7.670550847457628e-06,
"loss": 0.0836,
"step": 2199
},
{
"epoch": 2.328346352442738,
"grad_norm": 0.25198864936828613,
"learning_rate": 7.66949152542373e-06,
"loss": 0.0861,
"step": 2200
},
{
"epoch": 2.3294055342248114,
"grad_norm": 0.31258684396743774,
"learning_rate": 7.668432203389831e-06,
"loss": 0.0885,
"step": 2201
},
{
"epoch": 2.3304647160068845,
"grad_norm": 0.5213435292243958,
"learning_rate": 7.667372881355934e-06,
"loss": 0.0839,
"step": 2202
},
{
"epoch": 2.331523897788958,
"grad_norm": 0.31534504890441895,
"learning_rate": 7.666313559322036e-06,
"loss": 0.0885,
"step": 2203
},
{
"epoch": 2.332583079571031,
"grad_norm": 1.2737720012664795,
"learning_rate": 7.665254237288137e-06,
"loss": 0.0919,
"step": 2204
},
{
"epoch": 2.3336422613531047,
"grad_norm": 0.47025302052497864,
"learning_rate": 7.664194915254238e-06,
"loss": 0.088,
"step": 2205
},
{
"epoch": 2.3347014431351782,
"grad_norm": 0.6608093976974487,
"learning_rate": 7.66313559322034e-06,
"loss": 0.0869,
"step": 2206
},
{
"epoch": 2.3357606249172513,
"grad_norm": 0.7504128217697144,
"learning_rate": 7.662076271186441e-06,
"loss": 0.0907,
"step": 2207
},
{
"epoch": 2.336819806699325,
"grad_norm": 0.4346690773963928,
"learning_rate": 7.661016949152543e-06,
"loss": 0.0905,
"step": 2208
},
{
"epoch": 2.337878988481398,
"grad_norm": 0.36322346329689026,
"learning_rate": 7.659957627118646e-06,
"loss": 0.0902,
"step": 2209
},
{
"epoch": 2.3389381702634715,
"grad_norm": 0.32090482115745544,
"learning_rate": 7.658898305084747e-06,
"loss": 0.0883,
"step": 2210
},
{
"epoch": 2.339997352045545,
"grad_norm": 0.7748063206672668,
"learning_rate": 7.657838983050848e-06,
"loss": 0.0872,
"step": 2211
},
{
"epoch": 2.341056533827618,
"grad_norm": 0.22548404335975647,
"learning_rate": 7.65677966101695e-06,
"loss": 0.0868,
"step": 2212
},
{
"epoch": 2.3421157156096917,
"grad_norm": 1.2027844190597534,
"learning_rate": 7.655720338983051e-06,
"loss": 0.0867,
"step": 2213
},
{
"epoch": 2.3431748973917648,
"grad_norm": 0.38570746779441833,
"learning_rate": 7.654661016949153e-06,
"loss": 0.0858,
"step": 2214
},
{
"epoch": 2.3442340791738383,
"grad_norm": 0.2694943845272064,
"learning_rate": 7.653601694915254e-06,
"loss": 0.0841,
"step": 2215
},
{
"epoch": 2.3452932609559114,
"grad_norm": 0.4928399324417114,
"learning_rate": 7.652542372881356e-06,
"loss": 0.0871,
"step": 2216
},
{
"epoch": 2.346352442737985,
"grad_norm": 0.8780227899551392,
"learning_rate": 7.651483050847459e-06,
"loss": 0.0905,
"step": 2217
},
{
"epoch": 2.347411624520058,
"grad_norm": 0.6822829246520996,
"learning_rate": 7.65042372881356e-06,
"loss": 0.0885,
"step": 2218
},
{
"epoch": 2.3484708063021316,
"grad_norm": 0.5823767781257629,
"learning_rate": 7.649364406779661e-06,
"loss": 0.0869,
"step": 2219
},
{
"epoch": 2.349529988084205,
"grad_norm": 0.2178659588098526,
"learning_rate": 7.648305084745763e-06,
"loss": 0.0892,
"step": 2220
},
{
"epoch": 2.350589169866278,
"grad_norm": 0.8088253736495972,
"learning_rate": 7.647245762711864e-06,
"loss": 0.0884,
"step": 2221
},
{
"epoch": 2.3516483516483517,
"grad_norm": 0.3263354003429413,
"learning_rate": 7.646186440677966e-06,
"loss": 0.0863,
"step": 2222
},
{
"epoch": 2.352707533430425,
"grad_norm": 0.26838818192481995,
"learning_rate": 7.645127118644067e-06,
"loss": 0.0884,
"step": 2223
},
{
"epoch": 2.3537667152124984,
"grad_norm": 0.5517171621322632,
"learning_rate": 7.64406779661017e-06,
"loss": 0.0913,
"step": 2224
},
{
"epoch": 2.354825896994572,
"grad_norm": 0.3242451548576355,
"learning_rate": 7.643008474576272e-06,
"loss": 0.0887,
"step": 2225
},
{
"epoch": 2.355885078776645,
"grad_norm": 0.23916837573051453,
"learning_rate": 7.641949152542375e-06,
"loss": 0.0852,
"step": 2226
},
{
"epoch": 2.3569442605587185,
"grad_norm": 0.25891250371932983,
"learning_rate": 7.640889830508476e-06,
"loss": 0.0857,
"step": 2227
},
{
"epoch": 2.3580034423407916,
"grad_norm": 0.3916108012199402,
"learning_rate": 7.639830508474578e-06,
"loss": 0.0858,
"step": 2228
},
{
"epoch": 2.359062624122865,
"grad_norm": 0.38781145215034485,
"learning_rate": 7.638771186440679e-06,
"loss": 0.0896,
"step": 2229
},
{
"epoch": 2.3601218059049383,
"grad_norm": 0.2440766841173172,
"learning_rate": 7.63771186440678e-06,
"loss": 0.0871,
"step": 2230
},
{
"epoch": 2.361180987687012,
"grad_norm": 0.6661586165428162,
"learning_rate": 7.636652542372882e-06,
"loss": 0.0921,
"step": 2231
},
{
"epoch": 2.362240169469085,
"grad_norm": 0.435925155878067,
"learning_rate": 7.635593220338983e-06,
"loss": 0.0885,
"step": 2232
},
{
"epoch": 2.3632993512511584,
"grad_norm": 0.4927040934562683,
"learning_rate": 7.634533898305085e-06,
"loss": 0.0866,
"step": 2233
},
{
"epoch": 2.364358533033232,
"grad_norm": 0.29542070627212524,
"learning_rate": 7.633474576271188e-06,
"loss": 0.084,
"step": 2234
},
{
"epoch": 2.365417714815305,
"grad_norm": 0.2592524588108063,
"learning_rate": 7.632415254237289e-06,
"loss": 0.0854,
"step": 2235
},
{
"epoch": 2.3664768965973786,
"grad_norm": 0.7580803632736206,
"learning_rate": 7.63135593220339e-06,
"loss": 0.0832,
"step": 2236
},
{
"epoch": 2.3675360783794517,
"grad_norm": 1.193358302116394,
"learning_rate": 7.630296610169492e-06,
"loss": 0.088,
"step": 2237
},
{
"epoch": 2.368595260161525,
"grad_norm": 0.6735422015190125,
"learning_rate": 7.629237288135593e-06,
"loss": 0.0887,
"step": 2238
},
{
"epoch": 2.3696544419435988,
"grad_norm": 0.27006396651268005,
"learning_rate": 7.628177966101696e-06,
"loss": 0.0886,
"step": 2239
},
{
"epoch": 2.370713623725672,
"grad_norm": 0.27838370203971863,
"learning_rate": 7.627118644067797e-06,
"loss": 0.087,
"step": 2240
},
{
"epoch": 2.3717728055077454,
"grad_norm": 0.22530795633792877,
"learning_rate": 7.6260593220338984e-06,
"loss": 0.0904,
"step": 2241
},
{
"epoch": 2.3728319872898185,
"grad_norm": 0.38037195801734924,
"learning_rate": 7.625e-06,
"loss": 0.0879,
"step": 2242
},
{
"epoch": 2.373891169071892,
"grad_norm": 0.2998390197753906,
"learning_rate": 7.623940677966102e-06,
"loss": 0.0854,
"step": 2243
},
{
"epoch": 2.3749503508539656,
"grad_norm": 1.02617609500885,
"learning_rate": 7.6228813559322035e-06,
"loss": 0.087,
"step": 2244
},
{
"epoch": 2.3760095326360386,
"grad_norm": 0.7997170686721802,
"learning_rate": 7.621822033898307e-06,
"loss": 0.0848,
"step": 2245
},
{
"epoch": 2.377068714418112,
"grad_norm": 0.26829835772514343,
"learning_rate": 7.620762711864408e-06,
"loss": 0.0851,
"step": 2246
},
{
"epoch": 2.3781278962001853,
"grad_norm": 0.27473142743110657,
"learning_rate": 7.6197033898305094e-06,
"loss": 0.0853,
"step": 2247
},
{
"epoch": 2.379187077982259,
"grad_norm": 0.5847030878067017,
"learning_rate": 7.618644067796611e-06,
"loss": 0.0866,
"step": 2248
},
{
"epoch": 2.380246259764332,
"grad_norm": 0.6349015235900879,
"learning_rate": 7.617584745762713e-06,
"loss": 0.0874,
"step": 2249
},
{
"epoch": 2.3813054415464054,
"grad_norm": 0.41820037364959717,
"learning_rate": 7.6165254237288145e-06,
"loss": 0.091,
"step": 2250
},
{
"epoch": 2.3823646233284785,
"grad_norm": 1.3365269899368286,
"learning_rate": 7.615466101694916e-06,
"loss": 0.088,
"step": 2251
},
{
"epoch": 2.383423805110552,
"grad_norm": 0.322917103767395,
"learning_rate": 7.614406779661017e-06,
"loss": 0.0875,
"step": 2252
},
{
"epoch": 2.3844829868926256,
"grad_norm": 0.24395416676998138,
"learning_rate": 7.61334745762712e-06,
"loss": 0.0869,
"step": 2253
},
{
"epoch": 2.3855421686746987,
"grad_norm": 0.3095790445804596,
"learning_rate": 7.612288135593221e-06,
"loss": 0.0903,
"step": 2254
},
{
"epoch": 2.3866013504567722,
"grad_norm": 0.9672554731369019,
"learning_rate": 7.611228813559322e-06,
"loss": 0.0875,
"step": 2255
},
{
"epoch": 2.3876605322388453,
"grad_norm": 0.662190318107605,
"learning_rate": 7.610169491525425e-06,
"loss": 0.0875,
"step": 2256
},
{
"epoch": 2.388719714020919,
"grad_norm": 0.28213566541671753,
"learning_rate": 7.609110169491526e-06,
"loss": 0.0876,
"step": 2257
},
{
"epoch": 2.3897788958029924,
"grad_norm": 0.302276074886322,
"learning_rate": 7.6080508474576275e-06,
"loss": 0.0893,
"step": 2258
},
{
"epoch": 2.3908380775850655,
"grad_norm": 0.44363200664520264,
"learning_rate": 7.606991525423729e-06,
"loss": 0.085,
"step": 2259
},
{
"epoch": 2.391897259367139,
"grad_norm": 0.7232718467712402,
"learning_rate": 7.605932203389831e-06,
"loss": 0.0914,
"step": 2260
},
{
"epoch": 2.392956441149212,
"grad_norm": 0.5587173700332642,
"learning_rate": 7.6048728813559326e-06,
"loss": 0.0851,
"step": 2261
},
{
"epoch": 2.3940156229312857,
"grad_norm": 0.46465861797332764,
"learning_rate": 7.603813559322034e-06,
"loss": 0.0881,
"step": 2262
},
{
"epoch": 2.3950748047133588,
"grad_norm": 0.29479849338531494,
"learning_rate": 7.602754237288135e-06,
"loss": 0.0885,
"step": 2263
},
{
"epoch": 2.3961339864954323,
"grad_norm": 0.443615585565567,
"learning_rate": 7.601694915254238e-06,
"loss": 0.0862,
"step": 2264
},
{
"epoch": 2.3971931682775054,
"grad_norm": 0.280912309885025,
"learning_rate": 7.600635593220339e-06,
"loss": 0.0887,
"step": 2265
},
{
"epoch": 2.398252350059579,
"grad_norm": 0.33202967047691345,
"learning_rate": 7.599576271186442e-06,
"loss": 0.0847,
"step": 2266
},
{
"epoch": 2.3993115318416525,
"grad_norm": 0.2298818826675415,
"learning_rate": 7.598516949152544e-06,
"loss": 0.0865,
"step": 2267
},
{
"epoch": 2.4003707136237256,
"grad_norm": 0.8562094569206238,
"learning_rate": 7.597457627118645e-06,
"loss": 0.0881,
"step": 2268
},
{
"epoch": 2.401429895405799,
"grad_norm": 0.30260929465293884,
"learning_rate": 7.596398305084746e-06,
"loss": 0.0859,
"step": 2269
},
{
"epoch": 2.402489077187872,
"grad_norm": 0.7718713879585266,
"learning_rate": 7.595338983050849e-06,
"loss": 0.0867,
"step": 2270
},
{
"epoch": 2.4035482589699457,
"grad_norm": 0.23501431941986084,
"learning_rate": 7.59427966101695e-06,
"loss": 0.0869,
"step": 2271
},
{
"epoch": 2.4046074407520193,
"grad_norm": 0.357307493686676,
"learning_rate": 7.5932203389830515e-06,
"loss": 0.0824,
"step": 2272
},
{
"epoch": 2.4056666225340924,
"grad_norm": 0.25347843766212463,
"learning_rate": 7.592161016949153e-06,
"loss": 0.0868,
"step": 2273
},
{
"epoch": 2.406725804316166,
"grad_norm": 0.64441978931427,
"learning_rate": 7.591101694915255e-06,
"loss": 0.0876,
"step": 2274
},
{
"epoch": 2.407784986098239,
"grad_norm": 0.8823823928833008,
"learning_rate": 7.5900423728813566e-06,
"loss": 0.0871,
"step": 2275
},
{
"epoch": 2.4088441678803125,
"grad_norm": 0.2525102198123932,
"learning_rate": 7.588983050847458e-06,
"loss": 0.0901,
"step": 2276
},
{
"epoch": 2.4099033496623856,
"grad_norm": 0.45447444915771484,
"learning_rate": 7.58792372881356e-06,
"loss": 0.086,
"step": 2277
},
{
"epoch": 2.410962531444459,
"grad_norm": 0.28946614265441895,
"learning_rate": 7.586864406779662e-06,
"loss": 0.0866,
"step": 2278
},
{
"epoch": 2.4120217132265327,
"grad_norm": 0.3613869547843933,
"learning_rate": 7.585805084745763e-06,
"loss": 0.0862,
"step": 2279
},
{
"epoch": 2.413080895008606,
"grad_norm": 0.21505282819271088,
"learning_rate": 7.5847457627118645e-06,
"loss": 0.0907,
"step": 2280
},
{
"epoch": 2.4141400767906793,
"grad_norm": 0.5027592182159424,
"learning_rate": 7.583686440677967e-06,
"loss": 0.0842,
"step": 2281
},
{
"epoch": 2.4151992585727524,
"grad_norm": 0.5195924043655396,
"learning_rate": 7.582627118644068e-06,
"loss": 0.0868,
"step": 2282
},
{
"epoch": 2.416258440354826,
"grad_norm": 0.4060683250427246,
"learning_rate": 7.5815677966101695e-06,
"loss": 0.088,
"step": 2283
},
{
"epoch": 2.417317622136899,
"grad_norm": 0.982610821723938,
"learning_rate": 7.580508474576271e-06,
"loss": 0.0905,
"step": 2284
},
{
"epoch": 2.4183768039189726,
"grad_norm": 0.8710994124412537,
"learning_rate": 7.579449152542373e-06,
"loss": 0.0874,
"step": 2285
},
{
"epoch": 2.419435985701046,
"grad_norm": 0.23645208775997162,
"learning_rate": 7.578389830508475e-06,
"loss": 0.0845,
"step": 2286
},
{
"epoch": 2.420495167483119,
"grad_norm": 0.287193238735199,
"learning_rate": 7.577330508474578e-06,
"loss": 0.0846,
"step": 2287
},
{
"epoch": 2.4215543492651928,
"grad_norm": 0.2834354341030121,
"learning_rate": 7.576271186440679e-06,
"loss": 0.0875,
"step": 2288
},
{
"epoch": 2.422613531047266,
"grad_norm": 0.2335229218006134,
"learning_rate": 7.5752118644067805e-06,
"loss": 0.0836,
"step": 2289
},
{
"epoch": 2.4236727128293394,
"grad_norm": 0.4091585576534271,
"learning_rate": 7.574152542372882e-06,
"loss": 0.0887,
"step": 2290
},
{
"epoch": 2.424731894611413,
"grad_norm": 0.2867496907711029,
"learning_rate": 7.573093220338984e-06,
"loss": 0.0884,
"step": 2291
},
{
"epoch": 2.425791076393486,
"grad_norm": 0.3807348906993866,
"learning_rate": 7.572033898305086e-06,
"loss": 0.09,
"step": 2292
},
{
"epoch": 2.4268502581755595,
"grad_norm": 0.4710707664489746,
"learning_rate": 7.570974576271187e-06,
"loss": 0.0835,
"step": 2293
},
{
"epoch": 2.4279094399576326,
"grad_norm": 0.9628831744194031,
"learning_rate": 7.569915254237289e-06,
"loss": 0.0874,
"step": 2294
},
{
"epoch": 2.428968621739706,
"grad_norm": 0.4457227885723114,
"learning_rate": 7.568855932203391e-06,
"loss": 0.086,
"step": 2295
},
{
"epoch": 2.4300278035217793,
"grad_norm": 0.2667595148086548,
"learning_rate": 7.567796610169492e-06,
"loss": 0.0868,
"step": 2296
},
{
"epoch": 2.431086985303853,
"grad_norm": 0.3075510263442993,
"learning_rate": 7.5667372881355935e-06,
"loss": 0.0865,
"step": 2297
},
{
"epoch": 2.432146167085926,
"grad_norm": 0.26120322942733765,
"learning_rate": 7.565677966101696e-06,
"loss": 0.0888,
"step": 2298
},
{
"epoch": 2.4332053488679994,
"grad_norm": 0.3398880660533905,
"learning_rate": 7.564618644067797e-06,
"loss": 0.0896,
"step": 2299
},
{
"epoch": 2.434264530650073,
"grad_norm": 0.34000998735427856,
"learning_rate": 7.563559322033899e-06,
"loss": 0.0848,
"step": 2300
},
{
"epoch": 2.435323712432146,
"grad_norm": 0.2651556432247162,
"learning_rate": 7.5625e-06,
"loss": 0.0899,
"step": 2301
},
{
"epoch": 2.4363828942142196,
"grad_norm": 0.28574419021606445,
"learning_rate": 7.561440677966102e-06,
"loss": 0.09,
"step": 2302
},
{
"epoch": 2.4374420759962927,
"grad_norm": 0.25516659021377563,
"learning_rate": 7.560381355932204e-06,
"loss": 0.0877,
"step": 2303
},
{
"epoch": 2.4385012577783662,
"grad_norm": 0.2714408040046692,
"learning_rate": 7.559322033898305e-06,
"loss": 0.0884,
"step": 2304
},
{
"epoch": 2.4395604395604398,
"grad_norm": 0.4835186004638672,
"learning_rate": 7.558262711864407e-06,
"loss": 0.0841,
"step": 2305
},
{
"epoch": 2.440619621342513,
"grad_norm": 0.21308913826942444,
"learning_rate": 7.557203389830509e-06,
"loss": 0.0872,
"step": 2306
},
{
"epoch": 2.4416788031245864,
"grad_norm": 0.6488706469535828,
"learning_rate": 7.55614406779661e-06,
"loss": 0.0858,
"step": 2307
},
{
"epoch": 2.4427379849066595,
"grad_norm": 0.27812522649765015,
"learning_rate": 7.555084745762713e-06,
"loss": 0.0882,
"step": 2308
},
{
"epoch": 2.443797166688733,
"grad_norm": 0.25572165846824646,
"learning_rate": 7.554025423728815e-06,
"loss": 0.0855,
"step": 2309
},
{
"epoch": 2.444856348470806,
"grad_norm": 0.26096436381340027,
"learning_rate": 7.552966101694916e-06,
"loss": 0.0867,
"step": 2310
},
{
"epoch": 2.4459155302528797,
"grad_norm": 0.43788591027259827,
"learning_rate": 7.5519067796610175e-06,
"loss": 0.0866,
"step": 2311
},
{
"epoch": 2.4469747120349528,
"grad_norm": 0.5280694365501404,
"learning_rate": 7.55084745762712e-06,
"loss": 0.0861,
"step": 2312
},
{
"epoch": 2.4480338938170263,
"grad_norm": 0.21385736763477325,
"learning_rate": 7.549788135593221e-06,
"loss": 0.0877,
"step": 2313
},
{
"epoch": 2.4490930755991,
"grad_norm": 0.31411486864089966,
"learning_rate": 7.548728813559323e-06,
"loss": 0.0876,
"step": 2314
},
{
"epoch": 2.450152257381173,
"grad_norm": 0.3917924463748932,
"learning_rate": 7.547669491525425e-06,
"loss": 0.0896,
"step": 2315
},
{
"epoch": 2.4512114391632465,
"grad_norm": 0.7999858856201172,
"learning_rate": 7.546610169491526e-06,
"loss": 0.0843,
"step": 2316
},
{
"epoch": 2.4522706209453196,
"grad_norm": 0.30375435948371887,
"learning_rate": 7.545550847457628e-06,
"loss": 0.0852,
"step": 2317
},
{
"epoch": 2.453329802727393,
"grad_norm": 0.25230348110198975,
"learning_rate": 7.544491525423729e-06,
"loss": 0.0853,
"step": 2318
},
{
"epoch": 2.4543889845094666,
"grad_norm": 0.5028313398361206,
"learning_rate": 7.543432203389831e-06,
"loss": 0.0873,
"step": 2319
},
{
"epoch": 2.4554481662915397,
"grad_norm": 0.2625860273838043,
"learning_rate": 7.542372881355933e-06,
"loss": 0.0858,
"step": 2320
},
{
"epoch": 2.4565073480736133,
"grad_norm": 0.4730969965457916,
"learning_rate": 7.541313559322034e-06,
"loss": 0.0896,
"step": 2321
},
{
"epoch": 2.4575665298556864,
"grad_norm": 0.2755214273929596,
"learning_rate": 7.5402542372881356e-06,
"loss": 0.0848,
"step": 2322
},
{
"epoch": 2.45862571163776,
"grad_norm": 0.22363467514514923,
"learning_rate": 7.539194915254238e-06,
"loss": 0.0833,
"step": 2323
},
{
"epoch": 2.459684893419833,
"grad_norm": 0.2618216574192047,
"learning_rate": 7.538135593220339e-06,
"loss": 0.0864,
"step": 2324
},
{
"epoch": 2.4607440752019065,
"grad_norm": 0.3937819004058838,
"learning_rate": 7.537076271186441e-06,
"loss": 0.0863,
"step": 2325
},
{
"epoch": 2.46180325698398,
"grad_norm": 0.6888829469680786,
"learning_rate": 7.536016949152543e-06,
"loss": 0.0859,
"step": 2326
},
{
"epoch": 2.462862438766053,
"grad_norm": 0.6449849009513855,
"learning_rate": 7.534957627118644e-06,
"loss": 0.0841,
"step": 2327
},
{
"epoch": 2.4639216205481267,
"grad_norm": 0.3087742030620575,
"learning_rate": 7.533898305084746e-06,
"loss": 0.0858,
"step": 2328
},
{
"epoch": 2.4649808023302,
"grad_norm": 0.2380569726228714,
"learning_rate": 7.532838983050849e-06,
"loss": 0.0849,
"step": 2329
},
{
"epoch": 2.4660399841122733,
"grad_norm": 0.18616469204425812,
"learning_rate": 7.53177966101695e-06,
"loss": 0.0887,
"step": 2330
},
{
"epoch": 2.4670991658943464,
"grad_norm": 0.29187682271003723,
"learning_rate": 7.530720338983052e-06,
"loss": 0.0848,
"step": 2331
},
{
"epoch": 2.46815834767642,
"grad_norm": 0.25846749544143677,
"learning_rate": 7.529661016949154e-06,
"loss": 0.0888,
"step": 2332
},
{
"epoch": 2.4692175294584935,
"grad_norm": 0.17183120548725128,
"learning_rate": 7.528601694915255e-06,
"loss": 0.0878,
"step": 2333
},
{
"epoch": 2.4702767112405666,
"grad_norm": 0.5974867939949036,
"learning_rate": 7.527542372881357e-06,
"loss": 0.0882,
"step": 2334
},
{
"epoch": 2.47133589302264,
"grad_norm": 0.3511717915534973,
"learning_rate": 7.526483050847458e-06,
"loss": 0.0883,
"step": 2335
},
{
"epoch": 2.472395074804713,
"grad_norm": 0.29475048184394836,
"learning_rate": 7.52542372881356e-06,
"loss": 0.0841,
"step": 2336
},
{
"epoch": 2.4734542565867867,
"grad_norm": 1.7622301578521729,
"learning_rate": 7.524364406779662e-06,
"loss": 0.0909,
"step": 2337
},
{
"epoch": 2.4745134383688603,
"grad_norm": 0.23583351075649261,
"learning_rate": 7.523305084745763e-06,
"loss": 0.0836,
"step": 2338
},
{
"epoch": 2.4755726201509334,
"grad_norm": 0.878502607345581,
"learning_rate": 7.522245762711865e-06,
"loss": 0.0884,
"step": 2339
},
{
"epoch": 2.476631801933007,
"grad_norm": 0.23254196345806122,
"learning_rate": 7.521186440677967e-06,
"loss": 0.0853,
"step": 2340
},
{
"epoch": 2.47769098371508,
"grad_norm": 0.23312729597091675,
"learning_rate": 7.520127118644068e-06,
"loss": 0.0886,
"step": 2341
},
{
"epoch": 2.4787501654971535,
"grad_norm": 0.2530806064605713,
"learning_rate": 7.51906779661017e-06,
"loss": 0.0838,
"step": 2342
},
{
"epoch": 2.4798093472792266,
"grad_norm": 0.22649787366390228,
"learning_rate": 7.518008474576272e-06,
"loss": 0.0877,
"step": 2343
},
{
"epoch": 2.4808685290613,
"grad_norm": 0.27939778566360474,
"learning_rate": 7.516949152542373e-06,
"loss": 0.0843,
"step": 2344
},
{
"epoch": 2.4819277108433733,
"grad_norm": 0.8074503540992737,
"learning_rate": 7.515889830508475e-06,
"loss": 0.0878,
"step": 2345
},
{
"epoch": 2.482986892625447,
"grad_norm": 0.37326523661613464,
"learning_rate": 7.514830508474576e-06,
"loss": 0.0838,
"step": 2346
},
{
"epoch": 2.4840460744075203,
"grad_norm": 0.2246612310409546,
"learning_rate": 7.5137711864406785e-06,
"loss": 0.0857,
"step": 2347
},
{
"epoch": 2.4851052561895934,
"grad_norm": 0.26936617493629456,
"learning_rate": 7.51271186440678e-06,
"loss": 0.0856,
"step": 2348
},
{
"epoch": 2.486164437971667,
"grad_norm": 0.33371925354003906,
"learning_rate": 7.511652542372881e-06,
"loss": 0.0865,
"step": 2349
},
{
"epoch": 2.48722361975374,
"grad_norm": 0.25461632013320923,
"learning_rate": 7.510593220338984e-06,
"loss": 0.0876,
"step": 2350
},
{
"epoch": 2.4882828015358136,
"grad_norm": 0.27395927906036377,
"learning_rate": 7.509533898305086e-06,
"loss": 0.0836,
"step": 2351
},
{
"epoch": 2.489341983317887,
"grad_norm": 0.36620545387268066,
"learning_rate": 7.508474576271187e-06,
"loss": 0.0857,
"step": 2352
},
{
"epoch": 2.4904011650999602,
"grad_norm": 0.4027053415775299,
"learning_rate": 7.5074152542372895e-06,
"loss": 0.0848,
"step": 2353
},
{
"epoch": 2.4914603468820338,
"grad_norm": 0.8063977360725403,
"learning_rate": 7.506355932203391e-06,
"loss": 0.0876,
"step": 2354
},
{
"epoch": 2.492519528664107,
"grad_norm": 0.2470046877861023,
"learning_rate": 7.505296610169492e-06,
"loss": 0.0882,
"step": 2355
},
{
"epoch": 2.4935787104461804,
"grad_norm": 0.2624497413635254,
"learning_rate": 7.504237288135594e-06,
"loss": 0.0886,
"step": 2356
},
{
"epoch": 2.4946378922282535,
"grad_norm": 0.34627389907836914,
"learning_rate": 7.503177966101696e-06,
"loss": 0.0845,
"step": 2357
},
{
"epoch": 2.495697074010327,
"grad_norm": 0.29364633560180664,
"learning_rate": 7.502118644067797e-06,
"loss": 0.088,
"step": 2358
},
{
"epoch": 2.4967562557924,
"grad_norm": 1.2924612760543823,
"learning_rate": 7.501059322033899e-06,
"loss": 0.0882,
"step": 2359
},
{
"epoch": 2.4978154375744737,
"grad_norm": 0.27166759967803955,
"learning_rate": 7.500000000000001e-06,
"loss": 0.0906,
"step": 2360
},
{
"epoch": 2.4978154375744737,
"eval_accuracy": 0.9768,
"eval_best_f1_from_thresholding": 0.15328467153284675,
"eval_loss": 0.1330053061246872,
"eval_matthews_corrcoef": 0.15095498585011305,
"eval_model_preparation_time": 0.0033,
"eval_negative_class_f1": 0.9882388725539897,
"eval_negative_class_precision": 0.9926672777268561,
"eval_negative_class_recall": 0.9838498031694761,
"eval_positive_class_f1": 0.15328467153284672,
"eval_positive_class_precision": 0.11602209944751381,
"eval_positive_class_recall": 0.22580645161290322,
"eval_roc_auc": 0.7900881423040731,
"eval_runtime": 20.7623,
"eval_samples_per_second": 481.642,
"eval_steps_per_second": 7.562,
"step": 2360
},
{
"epoch": 2.498874619356547,
"grad_norm": 0.32740044593811035,
"learning_rate": 7.4989406779661024e-06,
"loss": 0.0891,
"step": 2361
},
{
"epoch": 2.4999338011386203,
"grad_norm": 0.24599646031856537,
"learning_rate": 7.497881355932204e-06,
"loss": 0.0901,
"step": 2362
},
{
"epoch": 2.500992982920694,
"grad_norm": 0.4903382658958435,
"learning_rate": 7.496822033898305e-06,
"loss": 0.0868,
"step": 2363
},
{
"epoch": 2.502052164702767,
"grad_norm": 0.26813480257987976,
"learning_rate": 7.4957627118644075e-06,
"loss": 0.0918,
"step": 2364
},
{
"epoch": 2.5031113464848405,
"grad_norm": 0.2489306479692459,
"learning_rate": 7.494703389830509e-06,
"loss": 0.087,
"step": 2365
},
{
"epoch": 2.504170528266914,
"grad_norm": 0.43046361207962036,
"learning_rate": 7.49364406779661e-06,
"loss": 0.0842,
"step": 2366
},
{
"epoch": 2.505229710048987,
"grad_norm": 0.9100356101989746,
"learning_rate": 7.492584745762712e-06,
"loss": 0.0868,
"step": 2367
},
{
"epoch": 2.5062888918310606,
"grad_norm": 0.44035395979881287,
"learning_rate": 7.491525423728814e-06,
"loss": 0.0847,
"step": 2368
},
{
"epoch": 2.5073480736131337,
"grad_norm": 1.0612967014312744,
"learning_rate": 7.4904661016949154e-06,
"loss": 0.0859,
"step": 2369
},
{
"epoch": 2.5084072553952073,
"grad_norm": 0.2517843544483185,
"learning_rate": 7.489406779661017e-06,
"loss": 0.0858,
"step": 2370
},
{
"epoch": 2.509466437177281,
"grad_norm": 0.2550938129425049,
"learning_rate": 7.48834745762712e-06,
"loss": 0.0905,
"step": 2371
},
{
"epoch": 2.510525618959354,
"grad_norm": 0.48128417134284973,
"learning_rate": 7.487288135593221e-06,
"loss": 0.0878,
"step": 2372
},
{
"epoch": 2.511584800741427,
"grad_norm": 0.2762486934661865,
"learning_rate": 7.486228813559323e-06,
"loss": 0.085,
"step": 2373
},
{
"epoch": 2.5126439825235005,
"grad_norm": 0.25758475065231323,
"learning_rate": 7.485169491525425e-06,
"loss": 0.0852,
"step": 2374
},
{
"epoch": 2.513703164305574,
"grad_norm": 0.5497387647628784,
"learning_rate": 7.4841101694915264e-06,
"loss": 0.0851,
"step": 2375
},
{
"epoch": 2.514762346087647,
"grad_norm": 0.6831100583076477,
"learning_rate": 7.483050847457628e-06,
"loss": 0.0874,
"step": 2376
},
{
"epoch": 2.5158215278697207,
"grad_norm": 0.559902012348175,
"learning_rate": 7.481991525423729e-06,
"loss": 0.0833,
"step": 2377
},
{
"epoch": 2.516880709651794,
"grad_norm": 0.2921912968158722,
"learning_rate": 7.4809322033898315e-06,
"loss": 0.0851,
"step": 2378
},
{
"epoch": 2.5179398914338673,
"grad_norm": 0.21021868288516998,
"learning_rate": 7.479872881355933e-06,
"loss": 0.0846,
"step": 2379
},
{
"epoch": 2.518999073215941,
"grad_norm": 0.1999545395374298,
"learning_rate": 7.478813559322034e-06,
"loss": 0.0831,
"step": 2380
},
{
"epoch": 2.520058254998014,
"grad_norm": 0.7873519659042358,
"learning_rate": 7.477754237288137e-06,
"loss": 0.0852,
"step": 2381
},
{
"epoch": 2.5211174367800875,
"grad_norm": 0.2738882899284363,
"learning_rate": 7.476694915254238e-06,
"loss": 0.0868,
"step": 2382
},
{
"epoch": 2.5221766185621606,
"grad_norm": 0.30528733134269714,
"learning_rate": 7.475635593220339e-06,
"loss": 0.0887,
"step": 2383
},
{
"epoch": 2.523235800344234,
"grad_norm": 0.27182379364967346,
"learning_rate": 7.474576271186441e-06,
"loss": 0.0893,
"step": 2384
},
{
"epoch": 2.5242949821263077,
"grad_norm": 0.9655314683914185,
"learning_rate": 7.473516949152543e-06,
"loss": 0.0874,
"step": 2385
},
{
"epoch": 2.5253541639083807,
"grad_norm": 0.2309257537126541,
"learning_rate": 7.4724576271186445e-06,
"loss": 0.0878,
"step": 2386
},
{
"epoch": 2.5264133456904543,
"grad_norm": 0.4812738597393036,
"learning_rate": 7.471398305084746e-06,
"loss": 0.0826,
"step": 2387
},
{
"epoch": 2.5274725274725274,
"grad_norm": 0.19819387793540955,
"learning_rate": 7.470338983050847e-06,
"loss": 0.0868,
"step": 2388
},
{
"epoch": 2.528531709254601,
"grad_norm": 0.24691465497016907,
"learning_rate": 7.4692796610169496e-06,
"loss": 0.0857,
"step": 2389
},
{
"epoch": 2.529590891036674,
"grad_norm": 0.29209673404693604,
"learning_rate": 7.468220338983051e-06,
"loss": 0.0853,
"step": 2390
},
{
"epoch": 2.5306500728187475,
"grad_norm": 0.43177977204322815,
"learning_rate": 7.467161016949152e-06,
"loss": 0.0879,
"step": 2391
},
{
"epoch": 2.5317092546008206,
"grad_norm": 0.6204429864883423,
"learning_rate": 7.4661016949152555e-06,
"loss": 0.0838,
"step": 2392
},
{
"epoch": 2.532768436382894,
"grad_norm": 0.23086082935333252,
"learning_rate": 7.465042372881357e-06,
"loss": 0.0838,
"step": 2393
},
{
"epoch": 2.5338276181649677,
"grad_norm": 0.2399570345878601,
"learning_rate": 7.463983050847458e-06,
"loss": 0.0865,
"step": 2394
},
{
"epoch": 2.534886799947041,
"grad_norm": 0.25693604350090027,
"learning_rate": 7.462923728813561e-06,
"loss": 0.0852,
"step": 2395
},
{
"epoch": 2.5359459817291143,
"grad_norm": 0.26127418875694275,
"learning_rate": 7.461864406779662e-06,
"loss": 0.0842,
"step": 2396
},
{
"epoch": 2.5370051635111874,
"grad_norm": 0.2086644321680069,
"learning_rate": 7.460805084745763e-06,
"loss": 0.0855,
"step": 2397
},
{
"epoch": 2.538064345293261,
"grad_norm": 0.6140705347061157,
"learning_rate": 7.459745762711866e-06,
"loss": 0.0846,
"step": 2398
},
{
"epoch": 2.5391235270753345,
"grad_norm": 0.7806175947189331,
"learning_rate": 7.458686440677967e-06,
"loss": 0.085,
"step": 2399
},
{
"epoch": 2.5401827088574076,
"grad_norm": 0.2025279402732849,
"learning_rate": 7.4576271186440685e-06,
"loss": 0.0844,
"step": 2400
},
{
"epoch": 2.541241890639481,
"grad_norm": 0.24406304955482483,
"learning_rate": 7.45656779661017e-06,
"loss": 0.0815,
"step": 2401
},
{
"epoch": 2.5423010724215542,
"grad_norm": 0.26805198192596436,
"learning_rate": 7.455508474576272e-06,
"loss": 0.088,
"step": 2402
},
{
"epoch": 2.5433602542036278,
"grad_norm": 0.23744802176952362,
"learning_rate": 7.4544491525423736e-06,
"loss": 0.085,
"step": 2403
},
{
"epoch": 2.5444194359857013,
"grad_norm": 0.3496606647968292,
"learning_rate": 7.453389830508475e-06,
"loss": 0.0832,
"step": 2404
},
{
"epoch": 2.5454786177677744,
"grad_norm": 0.4525336027145386,
"learning_rate": 7.452330508474576e-06,
"loss": 0.0832,
"step": 2405
},
{
"epoch": 2.5465377995498475,
"grad_norm": 0.2229275107383728,
"learning_rate": 7.451271186440679e-06,
"loss": 0.0859,
"step": 2406
},
{
"epoch": 2.547596981331921,
"grad_norm": 0.33702296018600464,
"learning_rate": 7.45021186440678e-06,
"loss": 0.0835,
"step": 2407
},
{
"epoch": 2.5486561631139946,
"grad_norm": 0.7689473032951355,
"learning_rate": 7.4491525423728815e-06,
"loss": 0.0884,
"step": 2408
},
{
"epoch": 2.5497153448960677,
"grad_norm": 0.812541663646698,
"learning_rate": 7.448093220338984e-06,
"loss": 0.0907,
"step": 2409
},
{
"epoch": 2.550774526678141,
"grad_norm": 0.9435730576515198,
"learning_rate": 7.447033898305085e-06,
"loss": 0.0909,
"step": 2410
},
{
"epoch": 2.5518337084602143,
"grad_norm": 0.2227533459663391,
"learning_rate": 7.4459745762711865e-06,
"loss": 0.0862,
"step": 2411
},
{
"epoch": 2.552892890242288,
"grad_norm": 0.3291822373867035,
"learning_rate": 7.444915254237288e-06,
"loss": 0.0878,
"step": 2412
},
{
"epoch": 2.5539520720243614,
"grad_norm": 0.23760034143924713,
"learning_rate": 7.443855932203391e-06,
"loss": 0.0873,
"step": 2413
},
{
"epoch": 2.5550112538064345,
"grad_norm": 0.30737602710723877,
"learning_rate": 7.4427966101694925e-06,
"loss": 0.0862,
"step": 2414
},
{
"epoch": 2.556070435588508,
"grad_norm": 0.1957489252090454,
"learning_rate": 7.441737288135594e-06,
"loss": 0.0835,
"step": 2415
},
{
"epoch": 2.557129617370581,
"grad_norm": 0.22747154533863068,
"learning_rate": 7.440677966101696e-06,
"loss": 0.0881,
"step": 2416
},
{
"epoch": 2.5581887991526546,
"grad_norm": 0.5635335445404053,
"learning_rate": 7.4396186440677975e-06,
"loss": 0.0873,
"step": 2417
},
{
"epoch": 2.559247980934728,
"grad_norm": 0.23749151825904846,
"learning_rate": 7.438559322033899e-06,
"loss": 0.0852,
"step": 2418
},
{
"epoch": 2.5603071627168013,
"grad_norm": 0.17558777332305908,
"learning_rate": 7.437500000000001e-06,
"loss": 0.0836,
"step": 2419
},
{
"epoch": 2.5613663444988743,
"grad_norm": 0.28608277440071106,
"learning_rate": 7.436440677966103e-06,
"loss": 0.0847,
"step": 2420
},
{
"epoch": 2.562425526280948,
"grad_norm": 0.5463235974311829,
"learning_rate": 7.435381355932204e-06,
"loss": 0.0852,
"step": 2421
},
{
"epoch": 2.5634847080630214,
"grad_norm": 0.46325045824050903,
"learning_rate": 7.4343220338983054e-06,
"loss": 0.0843,
"step": 2422
},
{
"epoch": 2.5645438898450945,
"grad_norm": 0.41990897059440613,
"learning_rate": 7.433262711864408e-06,
"loss": 0.0865,
"step": 2423
},
{
"epoch": 2.565603071627168,
"grad_norm": 0.34436190128326416,
"learning_rate": 7.432203389830509e-06,
"loss": 0.0874,
"step": 2424
},
{
"epoch": 2.566662253409241,
"grad_norm": 0.5995191335678101,
"learning_rate": 7.4311440677966105e-06,
"loss": 0.0867,
"step": 2425
},
{
"epoch": 2.5677214351913147,
"grad_norm": 0.2410784661769867,
"learning_rate": 7.430084745762712e-06,
"loss": 0.0848,
"step": 2426
},
{
"epoch": 2.568780616973388,
"grad_norm": 0.4424084722995758,
"learning_rate": 7.429025423728814e-06,
"loss": 0.0859,
"step": 2427
},
{
"epoch": 2.5698397987554613,
"grad_norm": 0.2659587860107422,
"learning_rate": 7.427966101694916e-06,
"loss": 0.0853,
"step": 2428
},
{
"epoch": 2.570898980537535,
"grad_norm": 0.20544077455997467,
"learning_rate": 7.426906779661017e-06,
"loss": 0.0835,
"step": 2429
},
{
"epoch": 2.571958162319608,
"grad_norm": 0.2531146705150604,
"learning_rate": 7.425847457627119e-06,
"loss": 0.089,
"step": 2430
},
{
"epoch": 2.5730173441016815,
"grad_norm": 0.2648871839046478,
"learning_rate": 7.424788135593221e-06,
"loss": 0.0857,
"step": 2431
},
{
"epoch": 2.574076525883755,
"grad_norm": 0.328542023897171,
"learning_rate": 7.423728813559322e-06,
"loss": 0.0872,
"step": 2432
},
{
"epoch": 2.575135707665828,
"grad_norm": 0.38928744196891785,
"learning_rate": 7.4226694915254235e-06,
"loss": 0.0893,
"step": 2433
},
{
"epoch": 2.5761948894479016,
"grad_norm": 0.23467789590358734,
"learning_rate": 7.421610169491527e-06,
"loss": 0.0839,
"step": 2434
},
{
"epoch": 2.5772540712299747,
"grad_norm": 0.5394375324249268,
"learning_rate": 7.420550847457628e-06,
"loss": 0.0849,
"step": 2435
},
{
"epoch": 2.5783132530120483,
"grad_norm": 0.5349376201629639,
"learning_rate": 7.41949152542373e-06,
"loss": 0.0877,
"step": 2436
},
{
"epoch": 2.579372434794122,
"grad_norm": 0.2408323585987091,
"learning_rate": 7.418432203389832e-06,
"loss": 0.0853,
"step": 2437
},
{
"epoch": 2.580431616576195,
"grad_norm": 0.26893261075019836,
"learning_rate": 7.417372881355933e-06,
"loss": 0.0859,
"step": 2438
},
{
"epoch": 2.581490798358268,
"grad_norm": 0.2837241291999817,
"learning_rate": 7.4163135593220345e-06,
"loss": 0.0909,
"step": 2439
},
{
"epoch": 2.5825499801403415,
"grad_norm": 0.5301665663719177,
"learning_rate": 7.415254237288137e-06,
"loss": 0.0881,
"step": 2440
},
{
"epoch": 2.583609161922415,
"grad_norm": 0.21783919632434845,
"learning_rate": 7.414194915254238e-06,
"loss": 0.0867,
"step": 2441
},
{
"epoch": 2.584668343704488,
"grad_norm": 0.29005125164985657,
"learning_rate": 7.41313559322034e-06,
"loss": 0.0848,
"step": 2442
},
{
"epoch": 2.5857275254865617,
"grad_norm": 0.5109828114509583,
"learning_rate": 7.412076271186441e-06,
"loss": 0.086,
"step": 2443
},
{
"epoch": 2.586786707268635,
"grad_norm": 0.35774463415145874,
"learning_rate": 7.411016949152543e-06,
"loss": 0.0879,
"step": 2444
},
{
"epoch": 2.5878458890507083,
"grad_norm": 0.18126487731933594,
"learning_rate": 7.409957627118645e-06,
"loss": 0.0839,
"step": 2445
},
{
"epoch": 2.588905070832782,
"grad_norm": 0.6130411028862,
"learning_rate": 7.408898305084746e-06,
"loss": 0.0861,
"step": 2446
},
{
"epoch": 2.589964252614855,
"grad_norm": 0.2746926248073578,
"learning_rate": 7.407838983050848e-06,
"loss": 0.0862,
"step": 2447
},
{
"epoch": 2.5910234343969285,
"grad_norm": 0.21978271007537842,
"learning_rate": 7.40677966101695e-06,
"loss": 0.0824,
"step": 2448
},
{
"epoch": 2.5920826161790016,
"grad_norm": 0.260647714138031,
"learning_rate": 7.405720338983051e-06,
"loss": 0.0885,
"step": 2449
},
{
"epoch": 2.593141797961075,
"grad_norm": 0.23659560084342957,
"learning_rate": 7.4046610169491526e-06,
"loss": 0.0834,
"step": 2450
},
{
"epoch": 2.5942009797431487,
"grad_norm": 0.1942400336265564,
"learning_rate": 7.403601694915255e-06,
"loss": 0.0837,
"step": 2451
},
{
"epoch": 2.5952601615252218,
"grad_norm": 0.21580971777439117,
"learning_rate": 7.402542372881356e-06,
"loss": 0.0825,
"step": 2452
},
{
"epoch": 2.596319343307295,
"grad_norm": 0.5914468765258789,
"learning_rate": 7.401483050847458e-06,
"loss": 0.0859,
"step": 2453
},
{
"epoch": 2.5973785250893684,
"grad_norm": 0.29213789105415344,
"learning_rate": 7.400423728813559e-06,
"loss": 0.0857,
"step": 2454
},
{
"epoch": 2.598437706871442,
"grad_norm": 0.41852453351020813,
"learning_rate": 7.399364406779662e-06,
"loss": 0.0878,
"step": 2455
},
{
"epoch": 2.599496888653515,
"grad_norm": 0.2311413437128067,
"learning_rate": 7.3983050847457636e-06,
"loss": 0.0834,
"step": 2456
},
{
"epoch": 2.6005560704355886,
"grad_norm": 0.9100884199142456,
"learning_rate": 7.397245762711866e-06,
"loss": 0.0844,
"step": 2457
},
{
"epoch": 2.6016152522176617,
"grad_norm": 0.6438109874725342,
"learning_rate": 7.396186440677967e-06,
"loss": 0.087,
"step": 2458
},
{
"epoch": 2.602674433999735,
"grad_norm": 0.20899845659732819,
"learning_rate": 7.395127118644069e-06,
"loss": 0.0845,
"step": 2459
},
{
"epoch": 2.6037336157818087,
"grad_norm": 0.22641488909721375,
"learning_rate": 7.39406779661017e-06,
"loss": 0.0864,
"step": 2460
},
{
"epoch": 2.604792797563882,
"grad_norm": 0.15944629907608032,
"learning_rate": 7.393008474576272e-06,
"loss": 0.0856,
"step": 2461
},
{
"epoch": 2.6058519793459554,
"grad_norm": 0.28860077261924744,
"learning_rate": 7.391949152542374e-06,
"loss": 0.0873,
"step": 2462
},
{
"epoch": 2.6069111611280285,
"grad_norm": 0.23296071588993073,
"learning_rate": 7.390889830508475e-06,
"loss": 0.0844,
"step": 2463
},
{
"epoch": 2.607970342910102,
"grad_norm": 0.3206871449947357,
"learning_rate": 7.3898305084745766e-06,
"loss": 0.0851,
"step": 2464
},
{
"epoch": 2.6090295246921755,
"grad_norm": 0.2582883834838867,
"learning_rate": 7.388771186440679e-06,
"loss": 0.0858,
"step": 2465
},
{
"epoch": 2.6100887064742486,
"grad_norm": 0.45466917753219604,
"learning_rate": 7.38771186440678e-06,
"loss": 0.088,
"step": 2466
},
{
"epoch": 2.6111478882563217,
"grad_norm": 0.2250165045261383,
"learning_rate": 7.386652542372882e-06,
"loss": 0.0812,
"step": 2467
},
{
"epoch": 2.6122070700383953,
"grad_norm": 0.2370089888572693,
"learning_rate": 7.385593220338984e-06,
"loss": 0.0825,
"step": 2468
},
{
"epoch": 2.613266251820469,
"grad_norm": 0.9961854219436646,
"learning_rate": 7.384533898305085e-06,
"loss": 0.0862,
"step": 2469
},
{
"epoch": 2.614325433602542,
"grad_norm": 0.25502580404281616,
"learning_rate": 7.383474576271187e-06,
"loss": 0.0841,
"step": 2470
},
{
"epoch": 2.6153846153846154,
"grad_norm": 0.19646094739437103,
"learning_rate": 7.382415254237288e-06,
"loss": 0.0838,
"step": 2471
},
{
"epoch": 2.6164437971666885,
"grad_norm": 0.2757315933704376,
"learning_rate": 7.38135593220339e-06,
"loss": 0.0842,
"step": 2472
},
{
"epoch": 2.617502978948762,
"grad_norm": 0.24290607869625092,
"learning_rate": 7.380296610169492e-06,
"loss": 0.0883,
"step": 2473
},
{
"epoch": 2.6185621607308356,
"grad_norm": 0.20332032442092896,
"learning_rate": 7.379237288135593e-06,
"loss": 0.0848,
"step": 2474
},
{
"epoch": 2.6196213425129087,
"grad_norm": 0.4000178575515747,
"learning_rate": 7.378177966101695e-06,
"loss": 0.0903,
"step": 2475
},
{
"epoch": 2.620680524294982,
"grad_norm": 1.3280612230300903,
"learning_rate": 7.377118644067798e-06,
"loss": 0.085,
"step": 2476
},
{
"epoch": 2.6217397060770553,
"grad_norm": 0.34385353326797485,
"learning_rate": 7.376059322033899e-06,
"loss": 0.0878,
"step": 2477
},
{
"epoch": 2.622798887859129,
"grad_norm": 0.37220701575279236,
"learning_rate": 7.375000000000001e-06,
"loss": 0.0861,
"step": 2478
},
{
"epoch": 2.6238580696412024,
"grad_norm": 0.22052189707756042,
"learning_rate": 7.373940677966103e-06,
"loss": 0.0859,
"step": 2479
},
{
"epoch": 2.6249172514232755,
"grad_norm": 0.1742621809244156,
"learning_rate": 7.372881355932204e-06,
"loss": 0.0876,
"step": 2480
},
{
"epoch": 2.625976433205349,
"grad_norm": 0.2315528690814972,
"learning_rate": 7.371822033898306e-06,
"loss": 0.0843,
"step": 2481
},
{
"epoch": 2.627035614987422,
"grad_norm": 0.744558572769165,
"learning_rate": 7.370762711864408e-06,
"loss": 0.0838,
"step": 2482
},
{
"epoch": 2.6280947967694956,
"grad_norm": 0.2049311101436615,
"learning_rate": 7.369703389830509e-06,
"loss": 0.0832,
"step": 2483
},
{
"epoch": 2.629153978551569,
"grad_norm": 0.6654312610626221,
"learning_rate": 7.368644067796611e-06,
"loss": 0.0807,
"step": 2484
},
{
"epoch": 2.6302131603336423,
"grad_norm": 0.20644965767860413,
"learning_rate": 7.367584745762713e-06,
"loss": 0.0872,
"step": 2485
},
{
"epoch": 2.6312723421157154,
"grad_norm": 0.19637905061244965,
"learning_rate": 7.366525423728814e-06,
"loss": 0.0871,
"step": 2486
},
{
"epoch": 2.632331523897789,
"grad_norm": 0.24023404717445374,
"learning_rate": 7.365466101694916e-06,
"loss": 0.085,
"step": 2487
},
{
"epoch": 2.6333907056798624,
"grad_norm": 0.24043144285678864,
"learning_rate": 7.364406779661017e-06,
"loss": 0.086,
"step": 2488
},
{
"epoch": 2.6344498874619355,
"grad_norm": 0.3961732089519501,
"learning_rate": 7.3633474576271194e-06,
"loss": 0.0893,
"step": 2489
},
{
"epoch": 2.635509069244009,
"grad_norm": 0.5193547010421753,
"learning_rate": 7.362288135593221e-06,
"loss": 0.0875,
"step": 2490
},
{
"epoch": 2.636568251026082,
"grad_norm": 0.24608950316905975,
"learning_rate": 7.361228813559322e-06,
"loss": 0.0848,
"step": 2491
},
{
"epoch": 2.6376274328081557,
"grad_norm": 0.6626792550086975,
"learning_rate": 7.360169491525424e-06,
"loss": 0.0853,
"step": 2492
},
{
"epoch": 2.6386866145902292,
"grad_norm": 0.19503068923950195,
"learning_rate": 7.359110169491526e-06,
"loss": 0.085,
"step": 2493
},
{
"epoch": 2.6397457963723023,
"grad_norm": 0.22753697633743286,
"learning_rate": 7.358050847457627e-06,
"loss": 0.0827,
"step": 2494
},
{
"epoch": 2.640804978154376,
"grad_norm": 0.19155411422252655,
"learning_rate": 7.356991525423729e-06,
"loss": 0.0851,
"step": 2495
},
{
"epoch": 2.641864159936449,
"grad_norm": 0.2226436585187912,
"learning_rate": 7.355932203389831e-06,
"loss": 0.0898,
"step": 2496
},
{
"epoch": 2.6429233417185225,
"grad_norm": 0.20181259512901306,
"learning_rate": 7.354872881355933e-06,
"loss": 0.0856,
"step": 2497
},
{
"epoch": 2.643982523500596,
"grad_norm": 0.3549362123012543,
"learning_rate": 7.353813559322035e-06,
"loss": 0.0829,
"step": 2498
},
{
"epoch": 2.645041705282669,
"grad_norm": 0.21644608676433563,
"learning_rate": 7.352754237288137e-06,
"loss": 0.0862,
"step": 2499
},
{
"epoch": 2.6461008870647422,
"grad_norm": 0.37708693742752075,
"learning_rate": 7.351694915254238e-06,
"loss": 0.0878,
"step": 2500
},
{
"epoch": 2.6471600688468158,
"grad_norm": 0.2566758692264557,
"learning_rate": 7.35063559322034e-06,
"loss": 0.0843,
"step": 2501
},
{
"epoch": 2.6482192506288893,
"grad_norm": 0.5458624958992004,
"learning_rate": 7.349576271186441e-06,
"loss": 0.0861,
"step": 2502
},
{
"epoch": 2.6492784324109624,
"grad_norm": 0.20609797537326813,
"learning_rate": 7.3485169491525434e-06,
"loss": 0.085,
"step": 2503
},
{
"epoch": 2.650337614193036,
"grad_norm": 0.22142818570137024,
"learning_rate": 7.347457627118645e-06,
"loss": 0.0874,
"step": 2504
},
{
"epoch": 2.651396795975109,
"grad_norm": 0.1624692678451538,
"learning_rate": 7.346398305084746e-06,
"loss": 0.0862,
"step": 2505
},
{
"epoch": 2.6524559777571826,
"grad_norm": 0.20099957287311554,
"learning_rate": 7.3453389830508485e-06,
"loss": 0.0873,
"step": 2506
},
{
"epoch": 2.653515159539256,
"grad_norm": 0.16464291512966156,
"learning_rate": 7.34427966101695e-06,
"loss": 0.0828,
"step": 2507
},
{
"epoch": 2.654574341321329,
"grad_norm": 0.2808842658996582,
"learning_rate": 7.343220338983051e-06,
"loss": 0.0854,
"step": 2508
},
{
"epoch": 2.6556335231034027,
"grad_norm": 0.2627069056034088,
"learning_rate": 7.342161016949153e-06,
"loss": 0.0874,
"step": 2509
},
{
"epoch": 2.656692704885476,
"grad_norm": 0.17322279512882233,
"learning_rate": 7.341101694915255e-06,
"loss": 0.0845,
"step": 2510
},
{
"epoch": 2.6577518866675494,
"grad_norm": 0.24847866594791412,
"learning_rate": 7.340042372881356e-06,
"loss": 0.0863,
"step": 2511
},
{
"epoch": 2.658811068449623,
"grad_norm": 0.2386065572500229,
"learning_rate": 7.338983050847458e-06,
"loss": 0.0853,
"step": 2512
},
{
"epoch": 2.659870250231696,
"grad_norm": 0.8196850419044495,
"learning_rate": 7.337923728813559e-06,
"loss": 0.0871,
"step": 2513
},
{
"epoch": 2.6609294320137695,
"grad_norm": 0.24189303815364838,
"learning_rate": 7.3368644067796615e-06,
"loss": 0.0829,
"step": 2514
},
{
"epoch": 2.6619886137958426,
"grad_norm": 0.5739706754684448,
"learning_rate": 7.335805084745763e-06,
"loss": 0.0859,
"step": 2515
},
{
"epoch": 2.663047795577916,
"grad_norm": 0.2830088138580322,
"learning_rate": 7.334745762711864e-06,
"loss": 0.0881,
"step": 2516
},
{
"epoch": 2.6641069773599892,
"grad_norm": 0.25852301716804504,
"learning_rate": 7.3336864406779666e-06,
"loss": 0.083,
"step": 2517
},
{
"epoch": 2.665166159142063,
"grad_norm": 0.24714624881744385,
"learning_rate": 7.332627118644068e-06,
"loss": 0.0857,
"step": 2518
},
{
"epoch": 2.666225340924136,
"grad_norm": 0.1826709359884262,
"learning_rate": 7.33156779661017e-06,
"loss": 0.0827,
"step": 2519
},
{
"epoch": 2.6672845227062094,
"grad_norm": 0.26431363821029663,
"learning_rate": 7.3305084745762725e-06,
"loss": 0.0854,
"step": 2520
},
{
"epoch": 2.668343704488283,
"grad_norm": 0.6349174380302429,
"learning_rate": 7.329449152542374e-06,
"loss": 0.0842,
"step": 2521
},
{
"epoch": 2.669402886270356,
"grad_norm": 0.3105546832084656,
"learning_rate": 7.328389830508475e-06,
"loss": 0.0872,
"step": 2522
},
{
"epoch": 2.6704620680524296,
"grad_norm": 0.2025269865989685,
"learning_rate": 7.3273305084745776e-06,
"loss": 0.0828,
"step": 2523
},
{
"epoch": 2.6715212498345027,
"grad_norm": 0.21857735514640808,
"learning_rate": 7.326271186440679e-06,
"loss": 0.0839,
"step": 2524
},
{
"epoch": 2.672580431616576,
"grad_norm": 0.1912987232208252,
"learning_rate": 7.32521186440678e-06,
"loss": 0.0866,
"step": 2525
},
{
"epoch": 2.6736396133986498,
"grad_norm": 0.22973424196243286,
"learning_rate": 7.324152542372882e-06,
"loss": 0.0834,
"step": 2526
},
{
"epoch": 2.674698795180723,
"grad_norm": 0.22732165455818176,
"learning_rate": 7.323093220338984e-06,
"loss": 0.0888,
"step": 2527
},
{
"epoch": 2.6757579769627964,
"grad_norm": 0.2239425927400589,
"learning_rate": 7.3220338983050855e-06,
"loss": 0.087,
"step": 2528
},
{
"epoch": 2.6768171587448695,
"grad_norm": 1.158964991569519,
"learning_rate": 7.320974576271187e-06,
"loss": 0.0859,
"step": 2529
},
{
"epoch": 2.677876340526943,
"grad_norm": 0.5004084706306458,
"learning_rate": 7.319915254237288e-06,
"loss": 0.088,
"step": 2530
},
{
"epoch": 2.6789355223090165,
"grad_norm": 0.17939257621765137,
"learning_rate": 7.3188559322033906e-06,
"loss": 0.0846,
"step": 2531
},
{
"epoch": 2.6799947040910896,
"grad_norm": 0.2022828608751297,
"learning_rate": 7.317796610169492e-06,
"loss": 0.0848,
"step": 2532
},
{
"epoch": 2.6810538858731627,
"grad_norm": 0.2316572368144989,
"learning_rate": 7.316737288135593e-06,
"loss": 0.0865,
"step": 2533
},
{
"epoch": 2.6821130676552363,
"grad_norm": 0.178863525390625,
"learning_rate": 7.315677966101696e-06,
"loss": 0.0816,
"step": 2534
},
{
"epoch": 2.68317224943731,
"grad_norm": 0.22590215504169464,
"learning_rate": 7.314618644067797e-06,
"loss": 0.0873,
"step": 2535
},
{
"epoch": 2.684231431219383,
"grad_norm": 0.24172040820121765,
"learning_rate": 7.3135593220338985e-06,
"loss": 0.0823,
"step": 2536
},
{
"epoch": 2.6852906130014564,
"grad_norm": 0.2226477861404419,
"learning_rate": 7.3125e-06,
"loss": 0.0819,
"step": 2537
},
{
"epoch": 2.6863497947835295,
"grad_norm": 0.19185325503349304,
"learning_rate": 7.311440677966102e-06,
"loss": 0.0884,
"step": 2538
},
{
"epoch": 2.687408976565603,
"grad_norm": 0.8066223859786987,
"learning_rate": 7.3103813559322035e-06,
"loss": 0.0885,
"step": 2539
},
{
"epoch": 2.6884681583476766,
"grad_norm": 0.2656841576099396,
"learning_rate": 7.309322033898306e-06,
"loss": 0.0867,
"step": 2540
},
{
"epoch": 2.6895273401297497,
"grad_norm": 0.37627339363098145,
"learning_rate": 7.308262711864408e-06,
"loss": 0.0886,
"step": 2541
},
{
"epoch": 2.6905865219118232,
"grad_norm": 0.6449163556098938,
"learning_rate": 7.3072033898305095e-06,
"loss": 0.0885,
"step": 2542
},
{
"epoch": 2.6916457036938963,
"grad_norm": 0.21589618921279907,
"learning_rate": 7.306144067796611e-06,
"loss": 0.0862,
"step": 2543
},
{
"epoch": 2.69270488547597,
"grad_norm": 0.24904780089855194,
"learning_rate": 7.305084745762713e-06,
"loss": 0.0825,
"step": 2544
},
{
"epoch": 2.6937640672580434,
"grad_norm": 0.20243799686431885,
"learning_rate": 7.3040254237288145e-06,
"loss": 0.0799,
"step": 2545
},
{
"epoch": 2.6948232490401165,
"grad_norm": 0.2245987057685852,
"learning_rate": 7.302966101694916e-06,
"loss": 0.0848,
"step": 2546
},
{
"epoch": 2.6958824308221896,
"grad_norm": 0.1902856081724167,
"learning_rate": 7.301906779661017e-06,
"loss": 0.0848,
"step": 2547
},
{
"epoch": 2.696941612604263,
"grad_norm": 0.2722117304801941,
"learning_rate": 7.30084745762712e-06,
"loss": 0.0846,
"step": 2548
},
{
"epoch": 2.6980007943863367,
"grad_norm": 0.20510976016521454,
"learning_rate": 7.299788135593221e-06,
"loss": 0.0856,
"step": 2549
},
{
"epoch": 2.6990599761684098,
"grad_norm": 0.142478808760643,
"learning_rate": 7.2987288135593224e-06,
"loss": 0.0872,
"step": 2550
},
{
"epoch": 2.7001191579504833,
"grad_norm": 0.20077407360076904,
"learning_rate": 7.297669491525424e-06,
"loss": 0.083,
"step": 2551
},
{
"epoch": 2.7011783397325564,
"grad_norm": 0.5384523868560791,
"learning_rate": 7.296610169491526e-06,
"loss": 0.0822,
"step": 2552
},
{
"epoch": 2.70223752151463,
"grad_norm": 0.4859084486961365,
"learning_rate": 7.2955508474576275e-06,
"loss": 0.0875,
"step": 2553
},
{
"epoch": 2.7032967032967035,
"grad_norm": 0.21412122249603271,
"learning_rate": 7.294491525423729e-06,
"loss": 0.0851,
"step": 2554
},
{
"epoch": 2.7043558850787766,
"grad_norm": 0.2351921945810318,
"learning_rate": 7.293432203389831e-06,
"loss": 0.0864,
"step": 2555
},
{
"epoch": 2.70541506686085,
"grad_norm": 0.20572692155838013,
"learning_rate": 7.292372881355933e-06,
"loss": 0.0861,
"step": 2556
},
{
"epoch": 2.706474248642923,
"grad_norm": 0.18501295149326324,
"learning_rate": 7.291313559322034e-06,
"loss": 0.0835,
"step": 2557
},
{
"epoch": 2.7075334304249967,
"grad_norm": 0.36258664727211,
"learning_rate": 7.290254237288135e-06,
"loss": 0.0838,
"step": 2558
},
{
"epoch": 2.7085926122070703,
"grad_norm": 0.19975177943706512,
"learning_rate": 7.289194915254238e-06,
"loss": 0.0855,
"step": 2559
},
{
"epoch": 2.7096517939891434,
"grad_norm": 0.35506847500801086,
"learning_rate": 7.288135593220339e-06,
"loss": 0.0819,
"step": 2560
},
{
"epoch": 2.710710975771217,
"grad_norm": 0.24622154235839844,
"learning_rate": 7.287076271186442e-06,
"loss": 0.0849,
"step": 2561
},
{
"epoch": 2.71177015755329,
"grad_norm": 0.4321694076061249,
"learning_rate": 7.286016949152544e-06,
"loss": 0.0879,
"step": 2562
},
{
"epoch": 2.7128293393353635,
"grad_norm": 0.2831239402294159,
"learning_rate": 7.284957627118645e-06,
"loss": 0.0864,
"step": 2563
},
{
"epoch": 2.7138885211174366,
"grad_norm": 0.160257950425148,
"learning_rate": 7.2838983050847464e-06,
"loss": 0.0844,
"step": 2564
},
{
"epoch": 2.71494770289951,
"grad_norm": 0.19795700907707214,
"learning_rate": 7.282838983050849e-06,
"loss": 0.0835,
"step": 2565
},
{
"epoch": 2.7160068846815832,
"grad_norm": 0.2295176237821579,
"learning_rate": 7.28177966101695e-06,
"loss": 0.0891,
"step": 2566
},
{
"epoch": 2.717066066463657,
"grad_norm": 0.19950707256793976,
"learning_rate": 7.2807203389830515e-06,
"loss": 0.0881,
"step": 2567
},
{
"epoch": 2.7181252482457303,
"grad_norm": 0.16419531404972076,
"learning_rate": 7.279661016949153e-06,
"loss": 0.0856,
"step": 2568
},
{
"epoch": 2.7191844300278034,
"grad_norm": 0.25177082419395447,
"learning_rate": 7.278601694915255e-06,
"loss": 0.0872,
"step": 2569
},
{
"epoch": 2.720243611809877,
"grad_norm": 0.2026664912700653,
"learning_rate": 7.277542372881357e-06,
"loss": 0.0821,
"step": 2570
},
{
"epoch": 2.72130279359195,
"grad_norm": 0.5405267477035522,
"learning_rate": 7.276483050847458e-06,
"loss": 0.0885,
"step": 2571
},
{
"epoch": 2.7223619753740236,
"grad_norm": 0.6258319616317749,
"learning_rate": 7.27542372881356e-06,
"loss": 0.0861,
"step": 2572
},
{
"epoch": 2.723421157156097,
"grad_norm": 0.19594408571720123,
"learning_rate": 7.274364406779662e-06,
"loss": 0.0848,
"step": 2573
},
{
"epoch": 2.72448033893817,
"grad_norm": 0.6023727655410767,
"learning_rate": 7.273305084745763e-06,
"loss": 0.0867,
"step": 2574
},
{
"epoch": 2.7255395207202437,
"grad_norm": 0.28104084730148315,
"learning_rate": 7.2722457627118645e-06,
"loss": 0.0871,
"step": 2575
},
{
"epoch": 2.726598702502317,
"grad_norm": 0.17260709404945374,
"learning_rate": 7.271186440677967e-06,
"loss": 0.0837,
"step": 2576
},
{
"epoch": 2.7276578842843904,
"grad_norm": 0.17434747517108917,
"learning_rate": 7.270127118644068e-06,
"loss": 0.086,
"step": 2577
},
{
"epoch": 2.728717066066464,
"grad_norm": 0.23676802217960358,
"learning_rate": 7.2690677966101696e-06,
"loss": 0.0824,
"step": 2578
},
{
"epoch": 2.729776247848537,
"grad_norm": 0.3234706223011017,
"learning_rate": 7.268008474576271e-06,
"loss": 0.0826,
"step": 2579
},
{
"epoch": 2.73083542963061,
"grad_norm": 0.26905763149261475,
"learning_rate": 7.266949152542373e-06,
"loss": 0.0846,
"step": 2580
},
{
"epoch": 2.7318946114126836,
"grad_norm": 0.21624106168746948,
"learning_rate": 7.265889830508475e-06,
"loss": 0.0815,
"step": 2581
},
{
"epoch": 2.732953793194757,
"grad_norm": 0.1897197663784027,
"learning_rate": 7.264830508474578e-06,
"loss": 0.0826,
"step": 2582
},
{
"epoch": 2.7340129749768303,
"grad_norm": 0.43036940693855286,
"learning_rate": 7.263771186440679e-06,
"loss": 0.0865,
"step": 2583
},
{
"epoch": 2.735072156758904,
"grad_norm": 1.1944246292114258,
"learning_rate": 7.2627118644067806e-06,
"loss": 0.0836,
"step": 2584
},
{
"epoch": 2.736131338540977,
"grad_norm": 0.4790208637714386,
"learning_rate": 7.261652542372882e-06,
"loss": 0.0857,
"step": 2585
},
{
"epoch": 2.7371905203230504,
"grad_norm": 0.21819289028644562,
"learning_rate": 7.260593220338984e-06,
"loss": 0.0845,
"step": 2586
},
{
"epoch": 2.738249702105124,
"grad_norm": 0.2567351162433624,
"learning_rate": 7.259533898305086e-06,
"loss": 0.0824,
"step": 2587
},
{
"epoch": 2.739308883887197,
"grad_norm": 0.3220031261444092,
"learning_rate": 7.258474576271187e-06,
"loss": 0.0859,
"step": 2588
},
{
"epoch": 2.7403680656692706,
"grad_norm": 0.583760678768158,
"learning_rate": 7.2574152542372885e-06,
"loss": 0.088,
"step": 2589
},
{
"epoch": 2.7414272474513437,
"grad_norm": 0.38349083065986633,
"learning_rate": 7.256355932203391e-06,
"loss": 0.0816,
"step": 2590
},
{
"epoch": 2.7424864292334172,
"grad_norm": 0.6811165809631348,
"learning_rate": 7.255296610169492e-06,
"loss": 0.0888,
"step": 2591
},
{
"epoch": 2.7435456110154908,
"grad_norm": 0.26285967230796814,
"learning_rate": 7.2542372881355936e-06,
"loss": 0.0886,
"step": 2592
},
{
"epoch": 2.744604792797564,
"grad_norm": 0.24635250866413116,
"learning_rate": 7.253177966101696e-06,
"loss": 0.0837,
"step": 2593
},
{
"epoch": 2.745663974579637,
"grad_norm": 0.1973286271095276,
"learning_rate": 7.252118644067797e-06,
"loss": 0.0827,
"step": 2594
},
{
"epoch": 2.7467231563617105,
"grad_norm": 0.3781270682811737,
"learning_rate": 7.251059322033899e-06,
"loss": 0.0864,
"step": 2595
},
{
"epoch": 2.747782338143784,
"grad_norm": 0.30764245986938477,
"learning_rate": 7.25e-06,
"loss": 0.0833,
"step": 2596
},
{
"epoch": 2.748841519925857,
"grad_norm": 0.2728866934776306,
"learning_rate": 7.248940677966102e-06,
"loss": 0.083,
"step": 2597
},
{
"epoch": 2.7499007017079307,
"grad_norm": 0.21047860383987427,
"learning_rate": 7.247881355932204e-06,
"loss": 0.0892,
"step": 2598
},
{
"epoch": 2.7509598834900038,
"grad_norm": 0.2374969869852066,
"learning_rate": 7.246822033898305e-06,
"loss": 0.0837,
"step": 2599
},
{
"epoch": 2.7520190652720773,
"grad_norm": 0.5180090665817261,
"learning_rate": 7.2457627118644065e-06,
"loss": 0.0848,
"step": 2600
},
{
"epoch": 2.753078247054151,
"grad_norm": 0.5831081867218018,
"learning_rate": 7.244703389830509e-06,
"loss": 0.0867,
"step": 2601
},
{
"epoch": 2.754137428836224,
"grad_norm": 1.2132313251495361,
"learning_rate": 7.24364406779661e-06,
"loss": 0.0879,
"step": 2602
},
{
"epoch": 2.7551966106182975,
"grad_norm": 0.22215701639652252,
"learning_rate": 7.242584745762713e-06,
"loss": 0.0839,
"step": 2603
},
{
"epoch": 2.7562557924003706,
"grad_norm": 0.276680588722229,
"learning_rate": 7.241525423728815e-06,
"loss": 0.0871,
"step": 2604
},
{
"epoch": 2.757314974182444,
"grad_norm": 0.27246686816215515,
"learning_rate": 7.240466101694916e-06,
"loss": 0.0811,
"step": 2605
},
{
"epoch": 2.7583741559645176,
"grad_norm": 0.2052409052848816,
"learning_rate": 7.2394067796610175e-06,
"loss": 0.0843,
"step": 2606
},
{
"epoch": 2.7594333377465907,
"grad_norm": 1.1106832027435303,
"learning_rate": 7.23834745762712e-06,
"loss": 0.0844,
"step": 2607
},
{
"epoch": 2.7604925195286643,
"grad_norm": 0.23822973668575287,
"learning_rate": 7.237288135593221e-06,
"loss": 0.0852,
"step": 2608
},
{
"epoch": 2.7615517013107374,
"grad_norm": 0.2477940320968628,
"learning_rate": 7.236228813559323e-06,
"loss": 0.0892,
"step": 2609
},
{
"epoch": 2.762610883092811,
"grad_norm": 0.2688133418560028,
"learning_rate": 7.235169491525425e-06,
"loss": 0.0859,
"step": 2610
},
{
"epoch": 2.7636700648748844,
"grad_norm": 0.6514645218849182,
"learning_rate": 7.234110169491526e-06,
"loss": 0.0841,
"step": 2611
},
{
"epoch": 2.7647292466569575,
"grad_norm": 2.231745719909668,
"learning_rate": 7.233050847457628e-06,
"loss": 0.088,
"step": 2612
},
{
"epoch": 2.7657884284390306,
"grad_norm": 0.3024356961250305,
"learning_rate": 7.231991525423729e-06,
"loss": 0.0864,
"step": 2613
},
{
"epoch": 2.766847610221104,
"grad_norm": 0.21650326251983643,
"learning_rate": 7.230932203389831e-06,
"loss": 0.0838,
"step": 2614
},
{
"epoch": 2.7679067920031777,
"grad_norm": 1.1713981628417969,
"learning_rate": 7.229872881355933e-06,
"loss": 0.0899,
"step": 2615
},
{
"epoch": 2.768965973785251,
"grad_norm": 0.24282382428646088,
"learning_rate": 7.228813559322034e-06,
"loss": 0.0864,
"step": 2616
},
{
"epoch": 2.7700251555673243,
"grad_norm": 0.2057073414325714,
"learning_rate": 7.227754237288136e-06,
"loss": 0.0833,
"step": 2617
},
{
"epoch": 2.7710843373493974,
"grad_norm": 0.18496139347553253,
"learning_rate": 7.226694915254238e-06,
"loss": 0.0857,
"step": 2618
},
{
"epoch": 2.772143519131471,
"grad_norm": 0.6129516363143921,
"learning_rate": 7.225635593220339e-06,
"loss": 0.0849,
"step": 2619
},
{
"epoch": 2.7732027009135445,
"grad_norm": 0.2906854748725891,
"learning_rate": 7.224576271186441e-06,
"loss": 0.0855,
"step": 2620
},
{
"epoch": 2.7742618826956176,
"grad_norm": 0.3260900676250458,
"learning_rate": 7.223516949152543e-06,
"loss": 0.0829,
"step": 2621
},
{
"epoch": 2.775321064477691,
"grad_norm": 0.6778892278671265,
"learning_rate": 7.222457627118644e-06,
"loss": 0.0874,
"step": 2622
},
{
"epoch": 2.776380246259764,
"grad_norm": 0.4057232439517975,
"learning_rate": 7.221398305084746e-06,
"loss": 0.0848,
"step": 2623
},
{
"epoch": 2.7774394280418377,
"grad_norm": 0.31907692551612854,
"learning_rate": 7.220338983050849e-06,
"loss": 0.0834,
"step": 2624
},
{
"epoch": 2.7784986098239113,
"grad_norm": 0.44375336170196533,
"learning_rate": 7.21927966101695e-06,
"loss": 0.0892,
"step": 2625
},
{
"epoch": 2.7795577916059844,
"grad_norm": 0.6996042728424072,
"learning_rate": 7.218220338983052e-06,
"loss": 0.0885,
"step": 2626
},
{
"epoch": 2.7806169733880575,
"grad_norm": 0.8565188050270081,
"learning_rate": 7.217161016949153e-06,
"loss": 0.0867,
"step": 2627
},
{
"epoch": 2.781676155170131,
"grad_norm": 0.6427931189537048,
"learning_rate": 7.216101694915255e-06,
"loss": 0.0881,
"step": 2628
},
{
"epoch": 2.7827353369522045,
"grad_norm": 0.6029223203659058,
"learning_rate": 7.215042372881357e-06,
"loss": 0.0867,
"step": 2629
},
{
"epoch": 2.7837945187342776,
"grad_norm": 0.24458833038806915,
"learning_rate": 7.213983050847458e-06,
"loss": 0.0895,
"step": 2630
},
{
"epoch": 2.784853700516351,
"grad_norm": 0.22011908888816833,
"learning_rate": 7.2129237288135604e-06,
"loss": 0.0848,
"step": 2631
},
{
"epoch": 2.7859128822984243,
"grad_norm": 0.38336876034736633,
"learning_rate": 7.211864406779662e-06,
"loss": 0.0912,
"step": 2632
},
{
"epoch": 2.786972064080498,
"grad_norm": 0.8342489004135132,
"learning_rate": 7.210805084745763e-06,
"loss": 0.0886,
"step": 2633
},
{
"epoch": 2.7880312458625713,
"grad_norm": 0.7525489926338196,
"learning_rate": 7.209745762711865e-06,
"loss": 0.0866,
"step": 2634
},
{
"epoch": 2.7890904276446444,
"grad_norm": 0.5347561836242676,
"learning_rate": 7.208686440677967e-06,
"loss": 0.0876,
"step": 2635
},
{
"epoch": 2.790149609426718,
"grad_norm": 0.29635554552078247,
"learning_rate": 7.207627118644068e-06,
"loss": 0.0828,
"step": 2636
},
{
"epoch": 2.791208791208791,
"grad_norm": 0.29534098505973816,
"learning_rate": 7.20656779661017e-06,
"loss": 0.0852,
"step": 2637
},
{
"epoch": 2.7922679729908646,
"grad_norm": 2.8918049335479736,
"learning_rate": 7.205508474576271e-06,
"loss": 0.0884,
"step": 2638
},
{
"epoch": 2.793327154772938,
"grad_norm": 0.2642069160938263,
"learning_rate": 7.204449152542373e-06,
"loss": 0.0834,
"step": 2639
},
{
"epoch": 2.7943863365550112,
"grad_norm": 0.30829092860221863,
"learning_rate": 7.203389830508475e-06,
"loss": 0.0858,
"step": 2640
},
{
"epoch": 2.7954455183370848,
"grad_norm": 0.2687780559062958,
"learning_rate": 7.202330508474576e-06,
"loss": 0.087,
"step": 2641
},
{
"epoch": 2.796504700119158,
"grad_norm": 1.3224658966064453,
"learning_rate": 7.2012711864406785e-06,
"loss": 0.0923,
"step": 2642
},
{
"epoch": 2.7975638819012314,
"grad_norm": 0.8700881600379944,
"learning_rate": 7.20021186440678e-06,
"loss": 0.0844,
"step": 2643
},
{
"epoch": 2.7986230636833045,
"grad_norm": 0.8457787036895752,
"learning_rate": 7.199152542372881e-06,
"loss": 0.0864,
"step": 2644
},
{
"epoch": 2.799682245465378,
"grad_norm": 0.287913978099823,
"learning_rate": 7.198093220338984e-06,
"loss": 0.0864,
"step": 2645
},
{
"epoch": 2.800741427247451,
"grad_norm": 0.6349884271621704,
"learning_rate": 7.197033898305086e-06,
"loss": 0.085,
"step": 2646
},
{
"epoch": 2.8018006090295247,
"grad_norm": 0.22546806931495667,
"learning_rate": 7.195974576271187e-06,
"loss": 0.0879,
"step": 2647
},
{
"epoch": 2.802859790811598,
"grad_norm": 0.3513873219490051,
"learning_rate": 7.1949152542372895e-06,
"loss": 0.09,
"step": 2648
},
{
"epoch": 2.8039189725936713,
"grad_norm": 0.6868070363998413,
"learning_rate": 7.193855932203391e-06,
"loss": 0.0902,
"step": 2649
},
{
"epoch": 2.804978154375745,
"grad_norm": 0.9908745288848877,
"learning_rate": 7.192796610169492e-06,
"loss": 0.0889,
"step": 2650
},
{
"epoch": 2.806037336157818,
"grad_norm": 0.30235615372657776,
"learning_rate": 7.191737288135594e-06,
"loss": 0.0892,
"step": 2651
},
{
"epoch": 2.8070965179398915,
"grad_norm": 0.34018200635910034,
"learning_rate": 7.190677966101696e-06,
"loss": 0.0875,
"step": 2652
},
{
"epoch": 2.808155699721965,
"grad_norm": 0.3707370162010193,
"learning_rate": 7.189618644067797e-06,
"loss": 0.0858,
"step": 2653
},
{
"epoch": 2.809214881504038,
"grad_norm": 1.7327641248703003,
"learning_rate": 7.188559322033899e-06,
"loss": 0.0897,
"step": 2654
},
{
"epoch": 2.8102740632861116,
"grad_norm": 1.689785122871399,
"learning_rate": 7.1875e-06,
"loss": 0.0936,
"step": 2655
},
{
"epoch": 2.8113332450681847,
"grad_norm": 0.3976902365684509,
"learning_rate": 7.1864406779661025e-06,
"loss": 0.087,
"step": 2656
},
{
"epoch": 2.8123924268502583,
"grad_norm": 0.2942507565021515,
"learning_rate": 7.185381355932204e-06,
"loss": 0.0837,
"step": 2657
},
{
"epoch": 2.813451608632332,
"grad_norm": 1.63322913646698,
"learning_rate": 7.184322033898305e-06,
"loss": 0.0885,
"step": 2658
},
{
"epoch": 2.814510790414405,
"grad_norm": 0.4471192955970764,
"learning_rate": 7.1832627118644076e-06,
"loss": 0.0893,
"step": 2659
},
{
"epoch": 2.815569972196478,
"grad_norm": 0.3946458697319031,
"learning_rate": 7.182203389830509e-06,
"loss": 0.0835,
"step": 2660
},
{
"epoch": 2.8166291539785515,
"grad_norm": 0.5461068153381348,
"learning_rate": 7.18114406779661e-06,
"loss": 0.089,
"step": 2661
},
{
"epoch": 2.817688335760625,
"grad_norm": 0.24396973848342896,
"learning_rate": 7.180084745762712e-06,
"loss": 0.0863,
"step": 2662
},
{
"epoch": 2.818747517542698,
"grad_norm": 0.3486658036708832,
"learning_rate": 7.179025423728814e-06,
"loss": 0.0865,
"step": 2663
},
{
"epoch": 2.8198066993247717,
"grad_norm": 0.7767401933670044,
"learning_rate": 7.1779661016949155e-06,
"loss": 0.0818,
"step": 2664
},
{
"epoch": 2.8208658811068448,
"grad_norm": 0.565762996673584,
"learning_rate": 7.176906779661017e-06,
"loss": 0.0871,
"step": 2665
},
{
"epoch": 2.8219250628889183,
"grad_norm": 0.28572696447372437,
"learning_rate": 7.17584745762712e-06,
"loss": 0.0857,
"step": 2666
},
{
"epoch": 2.822984244670992,
"grad_norm": 0.5122365951538086,
"learning_rate": 7.174788135593221e-06,
"loss": 0.086,
"step": 2667
},
{
"epoch": 2.824043426453065,
"grad_norm": 0.3288061022758484,
"learning_rate": 7.173728813559323e-06,
"loss": 0.084,
"step": 2668
},
{
"epoch": 2.8251026082351385,
"grad_norm": 0.7172766327857971,
"learning_rate": 7.172669491525425e-06,
"loss": 0.083,
"step": 2669
},
{
"epoch": 2.8261617900172116,
"grad_norm": 1.8225845098495483,
"learning_rate": 7.1716101694915265e-06,
"loss": 0.0852,
"step": 2670
},
{
"epoch": 2.827220971799285,
"grad_norm": 0.24879427254199982,
"learning_rate": 7.170550847457628e-06,
"loss": 0.0858,
"step": 2671
},
{
"epoch": 2.8282801535813586,
"grad_norm": 0.9555388689041138,
"learning_rate": 7.169491525423729e-06,
"loss": 0.0853,
"step": 2672
},
{
"epoch": 2.8293393353634317,
"grad_norm": 0.2729604244232178,
"learning_rate": 7.1684322033898315e-06,
"loss": 0.0869,
"step": 2673
},
{
"epoch": 2.830398517145505,
"grad_norm": 0.9544708728790283,
"learning_rate": 7.167372881355933e-06,
"loss": 0.0859,
"step": 2674
},
{
"epoch": 2.8314576989275784,
"grad_norm": 0.5761927962303162,
"learning_rate": 7.166313559322034e-06,
"loss": 0.0907,
"step": 2675
},
{
"epoch": 2.832516880709652,
"grad_norm": 0.8042075037956238,
"learning_rate": 7.165254237288136e-06,
"loss": 0.0879,
"step": 2676
},
{
"epoch": 2.833576062491725,
"grad_norm": 0.42558467388153076,
"learning_rate": 7.164194915254238e-06,
"loss": 0.087,
"step": 2677
},
{
"epoch": 2.8346352442737985,
"grad_norm": 0.2884480655193329,
"learning_rate": 7.1631355932203394e-06,
"loss": 0.0878,
"step": 2678
},
{
"epoch": 2.8356944260558716,
"grad_norm": 0.39285093545913696,
"learning_rate": 7.162076271186441e-06,
"loss": 0.0873,
"step": 2679
},
{
"epoch": 2.836753607837945,
"grad_norm": 0.57151859998703,
"learning_rate": 7.161016949152543e-06,
"loss": 0.0894,
"step": 2680
},
{
"epoch": 2.8378127896200187,
"grad_norm": 0.35518619418144226,
"learning_rate": 7.1599576271186445e-06,
"loss": 0.0909,
"step": 2681
},
{
"epoch": 2.838871971402092,
"grad_norm": 0.9182879328727722,
"learning_rate": 7.158898305084746e-06,
"loss": 0.0894,
"step": 2682
},
{
"epoch": 2.8399311531841653,
"grad_norm": 0.6388287544250488,
"learning_rate": 7.157838983050847e-06,
"loss": 0.089,
"step": 2683
},
{
"epoch": 2.8409903349662384,
"grad_norm": 0.32887426018714905,
"learning_rate": 7.15677966101695e-06,
"loss": 0.0872,
"step": 2684
},
{
"epoch": 2.842049516748312,
"grad_norm": 0.2990522086620331,
"learning_rate": 7.155720338983051e-06,
"loss": 0.0883,
"step": 2685
},
{
"epoch": 2.8431086985303855,
"grad_norm": 1.1725788116455078,
"learning_rate": 7.154661016949152e-06,
"loss": 0.0874,
"step": 2686
},
{
"epoch": 2.8441678803124586,
"grad_norm": 0.35581767559051514,
"learning_rate": 7.1536016949152555e-06,
"loss": 0.0873,
"step": 2687
},
{
"epoch": 2.845227062094532,
"grad_norm": 0.308718740940094,
"learning_rate": 7.152542372881357e-06,
"loss": 0.083,
"step": 2688
},
{
"epoch": 2.8462862438766052,
"grad_norm": 0.42534202337265015,
"learning_rate": 7.151483050847458e-06,
"loss": 0.0855,
"step": 2689
},
{
"epoch": 2.8473454256586788,
"grad_norm": 0.3667755722999573,
"learning_rate": 7.150423728813561e-06,
"loss": 0.0841,
"step": 2690
},
{
"epoch": 2.848404607440752,
"grad_norm": 0.9837805032730103,
"learning_rate": 7.149364406779662e-06,
"loss": 0.0885,
"step": 2691
},
{
"epoch": 2.8494637892228254,
"grad_norm": 0.8265017867088318,
"learning_rate": 7.1483050847457634e-06,
"loss": 0.0858,
"step": 2692
},
{
"epoch": 2.8505229710048985,
"grad_norm": 0.2696377635002136,
"learning_rate": 7.147245762711865e-06,
"loss": 0.0828,
"step": 2693
},
{
"epoch": 2.851582152786972,
"grad_norm": 0.7192027568817139,
"learning_rate": 7.146186440677967e-06,
"loss": 0.0912,
"step": 2694
},
{
"epoch": 2.8526413345690456,
"grad_norm": 0.24310527741909027,
"learning_rate": 7.1451271186440685e-06,
"loss": 0.0853,
"step": 2695
},
{
"epoch": 2.8537005163511187,
"grad_norm": 0.29763269424438477,
"learning_rate": 7.14406779661017e-06,
"loss": 0.0861,
"step": 2696
},
{
"epoch": 2.854759698133192,
"grad_norm": 0.28719812631607056,
"learning_rate": 7.143008474576272e-06,
"loss": 0.0891,
"step": 2697
},
{
"epoch": 2.8558188799152653,
"grad_norm": 0.5448430776596069,
"learning_rate": 7.141949152542374e-06,
"loss": 0.0884,
"step": 2698
},
{
"epoch": 2.856878061697339,
"grad_norm": 0.4081977903842926,
"learning_rate": 7.140889830508475e-06,
"loss": 0.0854,
"step": 2699
},
{
"epoch": 2.8579372434794124,
"grad_norm": 0.35061120986938477,
"learning_rate": 7.139830508474576e-06,
"loss": 0.0885,
"step": 2700
},
{
"epoch": 2.8589964252614855,
"grad_norm": 0.3310065269470215,
"learning_rate": 7.138771186440679e-06,
"loss": 0.087,
"step": 2701
},
{
"epoch": 2.860055607043559,
"grad_norm": 0.28929078578948975,
"learning_rate": 7.13771186440678e-06,
"loss": 0.0892,
"step": 2702
},
{
"epoch": 2.861114788825632,
"grad_norm": 0.3346499800682068,
"learning_rate": 7.1366525423728815e-06,
"loss": 0.0851,
"step": 2703
},
{
"epoch": 2.8621739706077056,
"grad_norm": 0.4067153036594391,
"learning_rate": 7.135593220338983e-06,
"loss": 0.0848,
"step": 2704
},
{
"epoch": 2.863233152389779,
"grad_norm": 0.20712974667549133,
"learning_rate": 7.134533898305085e-06,
"loss": 0.0881,
"step": 2705
},
{
"epoch": 2.8642923341718523,
"grad_norm": 0.21080565452575684,
"learning_rate": 7.1334745762711866e-06,
"loss": 0.0862,
"step": 2706
},
{
"epoch": 2.8653515159539253,
"grad_norm": 0.18893392384052277,
"learning_rate": 7.132415254237288e-06,
"loss": 0.0802,
"step": 2707
},
{
"epoch": 2.866410697735999,
"grad_norm": 0.21575677394866943,
"learning_rate": 7.131355932203391e-06,
"loss": 0.086,
"step": 2708
},
{
"epoch": 2.8674698795180724,
"grad_norm": 0.3201296925544739,
"learning_rate": 7.1302966101694925e-06,
"loss": 0.0865,
"step": 2709
},
{
"epoch": 2.8685290613001455,
"grad_norm": 0.24422788619995117,
"learning_rate": 7.129237288135594e-06,
"loss": 0.0829,
"step": 2710
},
{
"epoch": 2.869588243082219,
"grad_norm": 0.3589276075363159,
"learning_rate": 7.128177966101696e-06,
"loss": 0.0846,
"step": 2711
},
{
"epoch": 2.870647424864292,
"grad_norm": 0.41220414638519287,
"learning_rate": 7.1271186440677976e-06,
"loss": 0.0852,
"step": 2712
},
{
"epoch": 2.8717066066463657,
"grad_norm": 0.3045378029346466,
"learning_rate": 7.126059322033899e-06,
"loss": 0.0868,
"step": 2713
},
{
"epoch": 2.872765788428439,
"grad_norm": 0.2798013389110565,
"learning_rate": 7.125e-06,
"loss": 0.0847,
"step": 2714
},
{
"epoch": 2.8738249702105123,
"grad_norm": 0.7903490662574768,
"learning_rate": 7.123940677966103e-06,
"loss": 0.0884,
"step": 2715
},
{
"epoch": 2.874884151992586,
"grad_norm": 0.23004823923110962,
"learning_rate": 7.122881355932204e-06,
"loss": 0.089,
"step": 2716
},
{
"epoch": 2.875943333774659,
"grad_norm": 0.21731291711330414,
"learning_rate": 7.1218220338983055e-06,
"loss": 0.0849,
"step": 2717
},
{
"epoch": 2.8770025155567325,
"grad_norm": 0.2031378597021103,
"learning_rate": 7.120762711864408e-06,
"loss": 0.0857,
"step": 2718
},
{
"epoch": 2.878061697338806,
"grad_norm": 0.18993328511714935,
"learning_rate": 7.119703389830509e-06,
"loss": 0.0864,
"step": 2719
},
{
"epoch": 2.879120879120879,
"grad_norm": 0.3438829183578491,
"learning_rate": 7.1186440677966106e-06,
"loss": 0.0849,
"step": 2720
},
{
"epoch": 2.880180060902952,
"grad_norm": 0.7168260216712952,
"learning_rate": 7.117584745762712e-06,
"loss": 0.0862,
"step": 2721
},
{
"epoch": 2.8812392426850257,
"grad_norm": 0.32097071409225464,
"learning_rate": 7.116525423728814e-06,
"loss": 0.0854,
"step": 2722
},
{
"epoch": 2.8822984244670993,
"grad_norm": 0.25940510630607605,
"learning_rate": 7.115466101694916e-06,
"loss": 0.0849,
"step": 2723
},
{
"epoch": 2.8833576062491724,
"grad_norm": 0.1674995869398117,
"learning_rate": 7.114406779661017e-06,
"loss": 0.0843,
"step": 2724
},
{
"epoch": 2.884416788031246,
"grad_norm": 0.7793422937393188,
"learning_rate": 7.1133474576271185e-06,
"loss": 0.0873,
"step": 2725
},
{
"epoch": 2.885475969813319,
"grad_norm": 0.4437791109085083,
"learning_rate": 7.112288135593221e-06,
"loss": 0.0852,
"step": 2726
},
{
"epoch": 2.8865351515953925,
"grad_norm": 0.20173686742782593,
"learning_rate": 7.111228813559322e-06,
"loss": 0.0868,
"step": 2727
},
{
"epoch": 2.887594333377466,
"grad_norm": 0.25035056471824646,
"learning_rate": 7.1101694915254235e-06,
"loss": 0.082,
"step": 2728
},
{
"epoch": 2.888653515159539,
"grad_norm": 0.5662713646888733,
"learning_rate": 7.109110169491527e-06,
"loss": 0.0843,
"step": 2729
},
{
"epoch": 2.8897126969416127,
"grad_norm": 0.26108983159065247,
"learning_rate": 7.108050847457628e-06,
"loss": 0.0821,
"step": 2730
},
{
"epoch": 2.890771878723686,
"grad_norm": 0.37050431966781616,
"learning_rate": 7.1069915254237295e-06,
"loss": 0.0842,
"step": 2731
},
{
"epoch": 2.8918310605057593,
"grad_norm": 0.2342778444290161,
"learning_rate": 7.105932203389832e-06,
"loss": 0.0866,
"step": 2732
},
{
"epoch": 2.892890242287833,
"grad_norm": 0.47755783796310425,
"learning_rate": 7.104872881355933e-06,
"loss": 0.0867,
"step": 2733
},
{
"epoch": 2.893949424069906,
"grad_norm": 0.17551547288894653,
"learning_rate": 7.1038135593220345e-06,
"loss": 0.0862,
"step": 2734
},
{
"epoch": 2.8950086058519795,
"grad_norm": 0.19548620283603668,
"learning_rate": 7.102754237288137e-06,
"loss": 0.0865,
"step": 2735
},
{
"epoch": 2.8960677876340526,
"grad_norm": 0.21691496670246124,
"learning_rate": 7.101694915254238e-06,
"loss": 0.0856,
"step": 2736
},
{
"epoch": 2.897126969416126,
"grad_norm": 0.4669831693172455,
"learning_rate": 7.10063559322034e-06,
"loss": 0.0857,
"step": 2737
},
{
"epoch": 2.8981861511981997,
"grad_norm": 0.31361842155456543,
"learning_rate": 7.099576271186441e-06,
"loss": 0.0839,
"step": 2738
},
{
"epoch": 2.8992453329802728,
"grad_norm": 0.2279847115278244,
"learning_rate": 7.098516949152543e-06,
"loss": 0.0846,
"step": 2739
},
{
"epoch": 2.900304514762346,
"grad_norm": 0.2264779955148697,
"learning_rate": 7.097457627118645e-06,
"loss": 0.0835,
"step": 2740
},
{
"epoch": 2.9013636965444194,
"grad_norm": 0.2287377566099167,
"learning_rate": 7.096398305084746e-06,
"loss": 0.086,
"step": 2741
},
{
"epoch": 2.902422878326493,
"grad_norm": 0.2082810401916504,
"learning_rate": 7.0953389830508475e-06,
"loss": 0.0838,
"step": 2742
},
{
"epoch": 2.903482060108566,
"grad_norm": 0.1720813512802124,
"learning_rate": 7.09427966101695e-06,
"loss": 0.0851,
"step": 2743
},
{
"epoch": 2.9045412418906396,
"grad_norm": 0.223737433552742,
"learning_rate": 7.093220338983051e-06,
"loss": 0.0867,
"step": 2744
},
{
"epoch": 2.9056004236727127,
"grad_norm": 0.28665691614151,
"learning_rate": 7.092161016949153e-06,
"loss": 0.0872,
"step": 2745
},
{
"epoch": 2.906659605454786,
"grad_norm": 0.25986793637275696,
"learning_rate": 7.091101694915255e-06,
"loss": 0.0835,
"step": 2746
},
{
"epoch": 2.9077187872368597,
"grad_norm": 0.28776562213897705,
"learning_rate": 7.090042372881356e-06,
"loss": 0.0843,
"step": 2747
},
{
"epoch": 2.908777969018933,
"grad_norm": 0.6488966941833496,
"learning_rate": 7.088983050847458e-06,
"loss": 0.088,
"step": 2748
},
{
"epoch": 2.9098371508010064,
"grad_norm": 0.21989387273788452,
"learning_rate": 7.087923728813559e-06,
"loss": 0.0811,
"step": 2749
},
{
"epoch": 2.9108963325830794,
"grad_norm": 0.6076532006263733,
"learning_rate": 7.086864406779662e-06,
"loss": 0.0831,
"step": 2750
},
{
"epoch": 2.911955514365153,
"grad_norm": 0.4277835488319397,
"learning_rate": 7.085805084745764e-06,
"loss": 0.0866,
"step": 2751
},
{
"epoch": 2.9130146961472265,
"grad_norm": 0.23475177586078644,
"learning_rate": 7.084745762711865e-06,
"loss": 0.0841,
"step": 2752
},
{
"epoch": 2.9140738779292996,
"grad_norm": 0.3141447901725769,
"learning_rate": 7.083686440677967e-06,
"loss": 0.0869,
"step": 2753
},
{
"epoch": 2.9151330597113727,
"grad_norm": 0.20060321688652039,
"learning_rate": 7.082627118644069e-06,
"loss": 0.0907,
"step": 2754
},
{
"epoch": 2.9161922414934462,
"grad_norm": 0.1911022663116455,
"learning_rate": 7.08156779661017e-06,
"loss": 0.0892,
"step": 2755
},
{
"epoch": 2.91725142327552,
"grad_norm": 0.7516857385635376,
"learning_rate": 7.080508474576272e-06,
"loss": 0.0818,
"step": 2756
},
{
"epoch": 2.918310605057593,
"grad_norm": 0.2007245123386383,
"learning_rate": 7.079449152542374e-06,
"loss": 0.0863,
"step": 2757
},
{
"epoch": 2.9193697868396664,
"grad_norm": 0.3953344225883484,
"learning_rate": 7.078389830508475e-06,
"loss": 0.0874,
"step": 2758
},
{
"epoch": 2.9204289686217395,
"grad_norm": 0.19172687828540802,
"learning_rate": 7.077330508474577e-06,
"loss": 0.0893,
"step": 2759
},
{
"epoch": 2.921488150403813,
"grad_norm": 0.5571119785308838,
"learning_rate": 7.076271186440679e-06,
"loss": 0.0847,
"step": 2760
},
{
"epoch": 2.9225473321858866,
"grad_norm": 0.30791357159614563,
"learning_rate": 7.07521186440678e-06,
"loss": 0.0867,
"step": 2761
},
{
"epoch": 2.9236065139679597,
"grad_norm": 0.2687133252620697,
"learning_rate": 7.074152542372882e-06,
"loss": 0.0836,
"step": 2762
},
{
"epoch": 2.924665695750033,
"grad_norm": 0.2431352734565735,
"learning_rate": 7.073093220338983e-06,
"loss": 0.0804,
"step": 2763
},
{
"epoch": 2.9257248775321063,
"grad_norm": 0.18212385475635529,
"learning_rate": 7.072033898305085e-06,
"loss": 0.0851,
"step": 2764
},
{
"epoch": 2.92678405931418,
"grad_norm": 0.22017893195152283,
"learning_rate": 7.070974576271187e-06,
"loss": 0.0842,
"step": 2765
},
{
"epoch": 2.9278432410962534,
"grad_norm": 0.34417974948883057,
"learning_rate": 7.069915254237288e-06,
"loss": 0.0822,
"step": 2766
},
{
"epoch": 2.9289024228783265,
"grad_norm": 0.26310163736343384,
"learning_rate": 7.06885593220339e-06,
"loss": 0.0808,
"step": 2767
},
{
"epoch": 2.9299616046603996,
"grad_norm": 0.19449247419834137,
"learning_rate": 7.067796610169492e-06,
"loss": 0.083,
"step": 2768
},
{
"epoch": 2.931020786442473,
"grad_norm": 0.19786173105239868,
"learning_rate": 7.066737288135593e-06,
"loss": 0.0871,
"step": 2769
},
{
"epoch": 2.9320799682245466,
"grad_norm": 0.19906456768512726,
"learning_rate": 7.065677966101695e-06,
"loss": 0.0846,
"step": 2770
},
{
"epoch": 2.9331391500066197,
"grad_norm": 0.18128810822963715,
"learning_rate": 7.064618644067798e-06,
"loss": 0.0857,
"step": 2771
},
{
"epoch": 2.9341983317886933,
"grad_norm": 0.5061360597610474,
"learning_rate": 7.063559322033899e-06,
"loss": 0.0858,
"step": 2772
},
{
"epoch": 2.9352575135707664,
"grad_norm": 0.8924757242202759,
"learning_rate": 7.062500000000001e-06,
"loss": 0.0853,
"step": 2773
},
{
"epoch": 2.93631669535284,
"grad_norm": 0.25123855471611023,
"learning_rate": 7.061440677966103e-06,
"loss": 0.0846,
"step": 2774
},
{
"epoch": 2.9373758771349134,
"grad_norm": 0.2065630555152893,
"learning_rate": 7.060381355932204e-06,
"loss": 0.0836,
"step": 2775
},
{
"epoch": 2.9384350589169865,
"grad_norm": 0.25200194120407104,
"learning_rate": 7.059322033898306e-06,
"loss": 0.0836,
"step": 2776
},
{
"epoch": 2.93949424069906,
"grad_norm": 0.22266460955142975,
"learning_rate": 7.058262711864408e-06,
"loss": 0.0864,
"step": 2777
},
{
"epoch": 2.940553422481133,
"grad_norm": 0.3489341139793396,
"learning_rate": 7.057203389830509e-06,
"loss": 0.085,
"step": 2778
},
{
"epoch": 2.9416126042632067,
"grad_norm": 0.3063749372959137,
"learning_rate": 7.056144067796611e-06,
"loss": 0.0877,
"step": 2779
},
{
"epoch": 2.9426717860452802,
"grad_norm": 0.23221157491207123,
"learning_rate": 7.055084745762712e-06,
"loss": 0.0884,
"step": 2780
},
{
"epoch": 2.9437309678273533,
"grad_norm": 0.248238205909729,
"learning_rate": 7.054025423728814e-06,
"loss": 0.0828,
"step": 2781
},
{
"epoch": 2.944790149609427,
"grad_norm": 0.20400157570838928,
"learning_rate": 7.052966101694916e-06,
"loss": 0.0866,
"step": 2782
},
{
"epoch": 2.9458493313915,
"grad_norm": 1.1154899597167969,
"learning_rate": 7.051906779661017e-06,
"loss": 0.0817,
"step": 2783
},
{
"epoch": 2.9469085131735735,
"grad_norm": 0.17604859173297882,
"learning_rate": 7.0508474576271195e-06,
"loss": 0.0835,
"step": 2784
},
{
"epoch": 2.947967694955647,
"grad_norm": 0.2628978490829468,
"learning_rate": 7.049788135593221e-06,
"loss": 0.0831,
"step": 2785
},
{
"epoch": 2.94902687673772,
"grad_norm": 0.36698588728904724,
"learning_rate": 7.048728813559322e-06,
"loss": 0.0853,
"step": 2786
},
{
"epoch": 2.950086058519793,
"grad_norm": 0.21428033709526062,
"learning_rate": 7.047669491525424e-06,
"loss": 0.0818,
"step": 2787
},
{
"epoch": 2.9511452403018668,
"grad_norm": 0.2346959263086319,
"learning_rate": 7.046610169491526e-06,
"loss": 0.0841,
"step": 2788
},
{
"epoch": 2.9522044220839403,
"grad_norm": 0.17298530042171478,
"learning_rate": 7.045550847457627e-06,
"loss": 0.085,
"step": 2789
},
{
"epoch": 2.9532636038660134,
"grad_norm": 0.2308352142572403,
"learning_rate": 7.044491525423729e-06,
"loss": 0.0816,
"step": 2790
},
{
"epoch": 2.954322785648087,
"grad_norm": 0.6685366034507751,
"learning_rate": 7.04343220338983e-06,
"loss": 0.0851,
"step": 2791
},
{
"epoch": 2.95538196743016,
"grad_norm": 0.7078801989555359,
"learning_rate": 7.042372881355933e-06,
"loss": 0.0836,
"step": 2792
},
{
"epoch": 2.9564411492122336,
"grad_norm": 0.3219440281391144,
"learning_rate": 7.041313559322035e-06,
"loss": 0.0847,
"step": 2793
},
{
"epoch": 2.957500330994307,
"grad_norm": 0.47597241401672363,
"learning_rate": 7.040254237288137e-06,
"loss": 0.0822,
"step": 2794
},
{
"epoch": 2.95855951277638,
"grad_norm": 0.26617932319641113,
"learning_rate": 7.039194915254238e-06,
"loss": 0.0879,
"step": 2795
},
{
"epoch": 2.9596186945584537,
"grad_norm": 0.1694178730249405,
"learning_rate": 7.03813559322034e-06,
"loss": 0.0842,
"step": 2796
},
{
"epoch": 2.960677876340527,
"grad_norm": 0.568081259727478,
"learning_rate": 7.037076271186441e-06,
"loss": 0.0825,
"step": 2797
},
{
"epoch": 2.9617370581226004,
"grad_norm": 0.31791046261787415,
"learning_rate": 7.0360169491525435e-06,
"loss": 0.0831,
"step": 2798
},
{
"epoch": 2.962796239904674,
"grad_norm": 0.44022175669670105,
"learning_rate": 7.034957627118645e-06,
"loss": 0.0832,
"step": 2799
},
{
"epoch": 2.963855421686747,
"grad_norm": 0.33043715357780457,
"learning_rate": 7.033898305084746e-06,
"loss": 0.0855,
"step": 2800
},
{
"epoch": 2.96491460346882,
"grad_norm": 0.1991463303565979,
"learning_rate": 7.032838983050848e-06,
"loss": 0.084,
"step": 2801
},
{
"epoch": 2.9659737852508936,
"grad_norm": 0.2289704978466034,
"learning_rate": 7.03177966101695e-06,
"loss": 0.0832,
"step": 2802
},
{
"epoch": 2.967032967032967,
"grad_norm": 0.7853215336799622,
"learning_rate": 7.030720338983051e-06,
"loss": 0.0877,
"step": 2803
},
{
"epoch": 2.9680921488150402,
"grad_norm": 0.1657303422689438,
"learning_rate": 7.029661016949153e-06,
"loss": 0.0847,
"step": 2804
},
{
"epoch": 2.969151330597114,
"grad_norm": 0.2773841619491577,
"learning_rate": 7.028601694915255e-06,
"loss": 0.0849,
"step": 2805
},
{
"epoch": 2.970210512379187,
"grad_norm": 0.22976569831371307,
"learning_rate": 7.0275423728813564e-06,
"loss": 0.0852,
"step": 2806
},
{
"epoch": 2.9712696941612604,
"grad_norm": 1.9576987028121948,
"learning_rate": 7.026483050847458e-06,
"loss": 0.0868,
"step": 2807
},
{
"epoch": 2.972328875943334,
"grad_norm": 0.8150646686553955,
"learning_rate": 7.025423728813559e-06,
"loss": 0.0839,
"step": 2808
},
{
"epoch": 2.973388057725407,
"grad_norm": 0.18944455683231354,
"learning_rate": 7.0243644067796615e-06,
"loss": 0.0835,
"step": 2809
},
{
"epoch": 2.9744472395074806,
"grad_norm": 0.20776700973510742,
"learning_rate": 7.023305084745763e-06,
"loss": 0.0848,
"step": 2810
},
{
"epoch": 2.9755064212895537,
"grad_norm": 0.2295432835817337,
"learning_rate": 7.022245762711864e-06,
"loss": 0.086,
"step": 2811
},
{
"epoch": 2.976565603071627,
"grad_norm": 0.3069179058074951,
"learning_rate": 7.021186440677967e-06,
"loss": 0.0884,
"step": 2812
},
{
"epoch": 2.9776247848537007,
"grad_norm": 0.29059523344039917,
"learning_rate": 7.020127118644068e-06,
"loss": 0.0862,
"step": 2813
},
{
"epoch": 2.978683966635774,
"grad_norm": 0.528268039226532,
"learning_rate": 7.01906779661017e-06,
"loss": 0.0857,
"step": 2814
},
{
"epoch": 2.9797431484178474,
"grad_norm": 0.5123224854469299,
"learning_rate": 7.0180084745762725e-06,
"loss": 0.0858,
"step": 2815
},
{
"epoch": 2.9808023301999205,
"grad_norm": 0.5296421051025391,
"learning_rate": 7.016949152542374e-06,
"loss": 0.0813,
"step": 2816
},
{
"epoch": 2.981861511981994,
"grad_norm": 0.366913378238678,
"learning_rate": 7.015889830508475e-06,
"loss": 0.0821,
"step": 2817
},
{
"epoch": 2.982920693764067,
"grad_norm": 1.0378891229629517,
"learning_rate": 7.014830508474577e-06,
"loss": 0.0842,
"step": 2818
},
{
"epoch": 2.9839798755461406,
"grad_norm": 0.5042109489440918,
"learning_rate": 7.013771186440679e-06,
"loss": 0.0879,
"step": 2819
},
{
"epoch": 2.9850390573282137,
"grad_norm": 0.23699359595775604,
"learning_rate": 7.0127118644067804e-06,
"loss": 0.0856,
"step": 2820
},
{
"epoch": 2.9860982391102873,
"grad_norm": 0.24904802441596985,
"learning_rate": 7.011652542372882e-06,
"loss": 0.0865,
"step": 2821
},
{
"epoch": 2.987157420892361,
"grad_norm": 0.1702445149421692,
"learning_rate": 7.010593220338984e-06,
"loss": 0.0856,
"step": 2822
},
{
"epoch": 2.988216602674434,
"grad_norm": 0.22881048917770386,
"learning_rate": 7.0095338983050855e-06,
"loss": 0.0812,
"step": 2823
},
{
"epoch": 2.9892757844565074,
"grad_norm": 0.2885756194591522,
"learning_rate": 7.008474576271187e-06,
"loss": 0.0854,
"step": 2824
},
{
"epoch": 2.9903349662385805,
"grad_norm": 0.20414619147777557,
"learning_rate": 7.007415254237288e-06,
"loss": 0.0836,
"step": 2825
},
{
"epoch": 2.991394148020654,
"grad_norm": 0.29608118534088135,
"learning_rate": 7.006355932203391e-06,
"loss": 0.0852,
"step": 2826
},
{
"epoch": 2.9924533298027276,
"grad_norm": 0.23209474980831146,
"learning_rate": 7.005296610169492e-06,
"loss": 0.0856,
"step": 2827
},
{
"epoch": 2.9935125115848007,
"grad_norm": 0.46132296323776245,
"learning_rate": 7.004237288135593e-06,
"loss": 0.0867,
"step": 2828
},
{
"epoch": 2.9945716933668742,
"grad_norm": 0.3404957950115204,
"learning_rate": 7.003177966101695e-06,
"loss": 0.0802,
"step": 2829
},
{
"epoch": 2.9956308751489473,
"grad_norm": 0.22683490812778473,
"learning_rate": 7.002118644067797e-06,
"loss": 0.0851,
"step": 2830
},
{
"epoch": 2.996690056931021,
"grad_norm": 0.5285332202911377,
"learning_rate": 7.0010593220338985e-06,
"loss": 0.0848,
"step": 2831
},
{
"epoch": 2.9977492387130944,
"grad_norm": 0.19259807467460632,
"learning_rate": 7e-06,
"loss": 0.0876,
"step": 2832
},
{
"epoch": 2.9977492387130944,
"eval_accuracy": 0.9844,
"eval_best_f1_from_thresholding": 0.20408163265306123,
"eval_loss": 0.13191111385822296,
"eval_matthews_corrcoef": 0.1964861252164667,
"eval_model_preparation_time": 0.0033,
"eval_negative_class_f1": 0.9921228034740457,
"eval_negative_class_precision": 0.9926240274830757,
"eval_negative_class_recall": 0.9916220853941657,
"eval_positive_class_f1": 0.20408163265306123,
"eval_positive_class_precision": 0.1941747572815534,
"eval_positive_class_recall": 0.21505376344086022,
"eval_roc_auc": 0.7971723045831611,
"eval_runtime": 20.7342,
"eval_samples_per_second": 482.295,
"eval_steps_per_second": 7.572,
"step": 2832
}
],
"logging_steps": 1,
"max_steps": 9440,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 944,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 4.0462165606190285e+18,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}