trainer_state.json · knc6/microscopy_gpt_llama3.2_vision

microscopy_gpt_llama3.2_vision_11b / trainer_state.json

Upload 11 files

2a68d8b verified 11 months ago

343 kB

	{
	"best_global_step": null,
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 4.988593155893536,
	"eval_steps": 500,
	"global_step": 1970,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.0025348542458808617,
	"grad_norm": 1.1835554838180542,
	"learning_rate": 0.0,
	"loss": 2.7162,
	"step": 1
	},
	{
	"epoch": 0.005069708491761723,
	"grad_norm": 1.1406067609786987,
	"learning_rate": 4e-05,
	"loss": 2.7021,
	"step": 2
	},
	{
	"epoch": 0.0076045627376425855,
	"grad_norm": 1.1929512023925781,
	"learning_rate": 8e-05,
	"loss": 2.5728,
	"step": 3
	},
	{
	"epoch": 0.010139416983523447,
	"grad_norm": 1.523325800895691,
	"learning_rate": 0.00012,
	"loss": 2.5825,
	"step": 4
	},
	{
	"epoch": 0.012674271229404309,
	"grad_norm": 1.712708592414856,
	"learning_rate": 0.00016,
	"loss": 2.1986,
	"step": 5
	},
	{
	"epoch": 0.015209125475285171,
	"grad_norm": 1.263485312461853,
	"learning_rate": 0.0002,
	"loss": 2.1478,
	"step": 6
	},
	{
	"epoch": 0.017743979721166033,
	"grad_norm": 1.2837083339691162,
	"learning_rate": 0.00019989821882951655,
	"loss": 2.2153,
	"step": 7
	},
	{
	"epoch": 0.020278833967046894,
	"grad_norm": 1.0831111669540405,
	"learning_rate": 0.0001997964376590331,
	"loss": 1.9272,
	"step": 8
	},
	{
	"epoch": 0.022813688212927757,
	"grad_norm": 0.7921498417854309,
	"learning_rate": 0.00019969465648854963,
	"loss": 1.4929,
	"step": 9
	},
	{
	"epoch": 0.025348542458808618,
	"grad_norm": 0.9243067502975464,
	"learning_rate": 0.00019959287531806617,
	"loss": 1.4312,
	"step": 10
	},
	{
	"epoch": 0.02788339670468948,
	"grad_norm": 1.2378944158554077,
	"learning_rate": 0.0001994910941475827,
	"loss": 1.1605,
	"step": 11
	},
	{
	"epoch": 0.030418250950570342,
	"grad_norm": 1.401106834411621,
	"learning_rate": 0.00019938931297709925,
	"loss": 1.0236,
	"step": 12
	},
	{
	"epoch": 0.032953105196451206,
	"grad_norm": 1.0503413677215576,
	"learning_rate": 0.00019928753180661578,
	"loss": 0.8441,
	"step": 13
	},
	{
	"epoch": 0.035487959442332066,
	"grad_norm": 0.928716778755188,
	"learning_rate": 0.00019918575063613232,
	"loss": 0.8098,
	"step": 14
	},
	{
	"epoch": 0.03802281368821293,
	"grad_norm": 0.6546494364738464,
	"learning_rate": 0.00019908396946564886,
	"loss": 0.5083,
	"step": 15
	},
	{
	"epoch": 0.04055766793409379,
	"grad_norm": 0.8399775624275208,
	"learning_rate": 0.0001989821882951654,
	"loss": 0.5798,
	"step": 16
	},
	{
	"epoch": 0.043092522179974654,
	"grad_norm": 0.6111662983894348,
	"learning_rate": 0.00019888040712468194,
	"loss": 0.471,
	"step": 17
	},
	{
	"epoch": 0.045627376425855515,
	"grad_norm": 0.6786199808120728,
	"learning_rate": 0.00019877862595419848,
	"loss": 0.5124,
	"step": 18
	},
	{
	"epoch": 0.048162230671736375,
	"grad_norm": 0.7001961469650269,
	"learning_rate": 0.00019867684478371502,
	"loss": 0.5764,
	"step": 19
	},
	{
	"epoch": 0.050697084917617236,
	"grad_norm": 0.5670634508132935,
	"learning_rate": 0.00019857506361323156,
	"loss": 0.5595,
	"step": 20
	},
	{
	"epoch": 0.053231939163498096,
	"grad_norm": 0.6825580596923828,
	"learning_rate": 0.0001984732824427481,
	"loss": 0.6601,
	"step": 21
	},
	{
	"epoch": 0.05576679340937896,
	"grad_norm": 0.5777536630630493,
	"learning_rate": 0.00019837150127226464,
	"loss": 0.6232,
	"step": 22
	},
	{
	"epoch": 0.058301647655259824,
	"grad_norm": 0.7791958451271057,
	"learning_rate": 0.00019826972010178118,
	"loss": 0.4741,
	"step": 23
	},
	{
	"epoch": 0.060836501901140684,
	"grad_norm": 0.7647196054458618,
	"learning_rate": 0.00019816793893129772,
	"loss": 0.574,
	"step": 24
	},
	{
	"epoch": 0.06337135614702155,
	"grad_norm": 0.6175855398178101,
	"learning_rate": 0.00019806615776081426,
	"loss": 0.6792,
	"step": 25
	},
	{
	"epoch": 0.06590621039290241,
	"grad_norm": 0.7071298360824585,
	"learning_rate": 0.0001979643765903308,
	"loss": 0.6333,
	"step": 26
	},
	{
	"epoch": 0.06844106463878327,
	"grad_norm": 0.7675352692604065,
	"learning_rate": 0.00019786259541984734,
	"loss": 0.5004,
	"step": 27
	},
	{
	"epoch": 0.07097591888466413,
	"grad_norm": 0.6224766969680786,
	"learning_rate": 0.00019776081424936387,
	"loss": 0.5649,
	"step": 28
	},
	{
	"epoch": 0.07351077313054499,
	"grad_norm": 0.6023550629615784,
	"learning_rate": 0.00019765903307888041,
	"loss": 0.4004,
	"step": 29
	},
	{
	"epoch": 0.07604562737642585,
	"grad_norm": 0.6253474354743958,
	"learning_rate": 0.00019755725190839695,
	"loss": 0.548,
	"step": 30
	},
	{
	"epoch": 0.07858048162230671,
	"grad_norm": 0.43560266494750977,
	"learning_rate": 0.00019745547073791352,
	"loss": 0.4721,
	"step": 31
	},
	{
	"epoch": 0.08111533586818757,
	"grad_norm": 0.6321932077407837,
	"learning_rate": 0.00019735368956743003,
	"loss": 0.4671,
	"step": 32
	},
	{
	"epoch": 0.08365019011406843,
	"grad_norm": 0.41977155208587646,
	"learning_rate": 0.00019725190839694657,
	"loss": 0.3716,
	"step": 33
	},
	{
	"epoch": 0.08618504435994931,
	"grad_norm": 0.4449223279953003,
	"learning_rate": 0.0001971501272264631,
	"loss": 0.6045,
	"step": 34
	},
	{
	"epoch": 0.08871989860583017,
	"grad_norm": 0.5593668222427368,
	"learning_rate": 0.00019704834605597965,
	"loss": 0.3789,
	"step": 35
	},
	{
	"epoch": 0.09125475285171103,
	"grad_norm": 0.4293775260448456,
	"learning_rate": 0.0001969465648854962,
	"loss": 0.3834,
	"step": 36
	},
	{
	"epoch": 0.09378960709759189,
	"grad_norm": 0.49535441398620605,
	"learning_rate": 0.00019684478371501273,
	"loss": 0.5504,
	"step": 37
	},
	{
	"epoch": 0.09632446134347275,
	"grad_norm": 0.4620949625968933,
	"learning_rate": 0.00019674300254452927,
	"loss": 0.3212,
	"step": 38
	},
	{
	"epoch": 0.09885931558935361,
	"grad_norm": 0.46665605902671814,
	"learning_rate": 0.0001966412213740458,
	"loss": 0.4868,
	"step": 39
	},
	{
	"epoch": 0.10139416983523447,
	"grad_norm": 0.4120428264141083,
	"learning_rate": 0.00019653944020356235,
	"loss": 0.4926,
	"step": 40
	},
	{
	"epoch": 0.10392902408111533,
	"grad_norm": 0.41570335626602173,
	"learning_rate": 0.00019643765903307889,
	"loss": 0.5068,
	"step": 41
	},
	{
	"epoch": 0.10646387832699619,
	"grad_norm": 0.4141896665096283,
	"learning_rate": 0.00019633587786259542,
	"loss": 0.4064,
	"step": 42
	},
	{
	"epoch": 0.10899873257287707,
	"grad_norm": 0.3192928433418274,
	"learning_rate": 0.00019623409669211196,
	"loss": 0.4581,
	"step": 43
	},
	{
	"epoch": 0.11153358681875793,
	"grad_norm": 0.4188425838947296,
	"learning_rate": 0.00019613231552162853,
	"loss": 0.371,
	"step": 44
	},
	{
	"epoch": 0.11406844106463879,
	"grad_norm": 0.3750368654727936,
	"learning_rate": 0.00019603053435114504,
	"loss": 0.3728,
	"step": 45
	},
	{
	"epoch": 0.11660329531051965,
	"grad_norm": 0.5102046728134155,
	"learning_rate": 0.00019592875318066158,
	"loss": 0.357,
	"step": 46
	},
	{
	"epoch": 0.11913814955640051,
	"grad_norm": 0.4143039882183075,
	"learning_rate": 0.00019582697201017812,
	"loss": 0.4373,
	"step": 47
	},
	{
	"epoch": 0.12167300380228137,
	"grad_norm": 0.42558473348617554,
	"learning_rate": 0.00019572519083969466,
	"loss": 0.5877,
	"step": 48
	},
	{
	"epoch": 0.12420785804816223,
	"grad_norm": 0.35768038034439087,
	"learning_rate": 0.0001956234096692112,
	"loss": 0.3326,
	"step": 49
	},
	{
	"epoch": 0.1267427122940431,
	"grad_norm": 0.32826319336891174,
	"learning_rate": 0.00019552162849872774,
	"loss": 0.3521,
	"step": 50
	},
	{
	"epoch": 0.12927756653992395,
	"grad_norm": 0.3507271409034729,
	"learning_rate": 0.00019541984732824428,
	"loss": 0.4157,
	"step": 51
	},
	{
	"epoch": 0.13181242078580482,
	"grad_norm": 0.5069169402122498,
	"learning_rate": 0.00019531806615776082,
	"loss": 0.4453,
	"step": 52
	},
	{
	"epoch": 0.13434727503168567,
	"grad_norm": 0.4759957492351532,
	"learning_rate": 0.00019521628498727736,
	"loss": 0.5131,
	"step": 53
	},
	{
	"epoch": 0.13688212927756654,
	"grad_norm": 0.4045158326625824,
	"learning_rate": 0.0001951145038167939,
	"loss": 0.3927,
	"step": 54
	},
	{
	"epoch": 0.1394169835234474,
	"grad_norm": 0.49629393219947815,
	"learning_rate": 0.00019501272264631046,
	"loss": 0.4708,
	"step": 55
	},
	{
	"epoch": 0.14195183776932827,
	"grad_norm": 0.3735599219799042,
	"learning_rate": 0.00019491094147582698,
	"loss": 0.4076,
	"step": 56
	},
	{
	"epoch": 0.1444866920152091,
	"grad_norm": 0.4713466763496399,
	"learning_rate": 0.00019480916030534354,
	"loss": 0.4187,
	"step": 57
	},
	{
	"epoch": 0.14702154626108999,
	"grad_norm": 0.6454377770423889,
	"learning_rate": 0.00019470737913486005,
	"loss": 0.4032,
	"step": 58
	},
	{
	"epoch": 0.14955640050697086,
	"grad_norm": 0.39378786087036133,
	"learning_rate": 0.00019460559796437662,
	"loss": 0.3508,
	"step": 59
	},
	{
	"epoch": 0.1520912547528517,
	"grad_norm": 0.3768695592880249,
	"learning_rate": 0.00019450381679389313,
	"loss": 0.3129,
	"step": 60
	},
	{
	"epoch": 0.15462610899873258,
	"grad_norm": 0.4250476062297821,
	"learning_rate": 0.00019440203562340967,
	"loss": 0.3426,
	"step": 61
	},
	{
	"epoch": 0.15716096324461343,
	"grad_norm": 0.3653964698314667,
	"learning_rate": 0.0001943002544529262,
	"loss": 0.3339,
	"step": 62
	},
	{
	"epoch": 0.1596958174904943,
	"grad_norm": 0.4973353445529938,
	"learning_rate": 0.00019419847328244275,
	"loss": 0.4759,
	"step": 63
	},
	{
	"epoch": 0.16223067173637515,
	"grad_norm": 0.41738295555114746,
	"learning_rate": 0.0001940966921119593,
	"loss": 0.3809,
	"step": 64
	},
	{
	"epoch": 0.16476552598225602,
	"grad_norm": 0.42326119542121887,
	"learning_rate": 0.00019399491094147583,
	"loss": 0.3399,
	"step": 65
	},
	{
	"epoch": 0.16730038022813687,
	"grad_norm": 0.4244116246700287,
	"learning_rate": 0.00019389312977099237,
	"loss": 0.4085,
	"step": 66
	},
	{
	"epoch": 0.16983523447401774,
	"grad_norm": 0.40235379338264465,
	"learning_rate": 0.0001937913486005089,
	"loss": 0.3016,
	"step": 67
	},
	{
	"epoch": 0.17237008871989862,
	"grad_norm": 0.3983120322227478,
	"learning_rate": 0.00019368956743002547,
	"loss": 0.5101,
	"step": 68
	},
	{
	"epoch": 0.17490494296577946,
	"grad_norm": 0.4857071042060852,
	"learning_rate": 0.00019358778625954199,
	"loss": 0.3131,
	"step": 69
	},
	{
	"epoch": 0.17743979721166034,
	"grad_norm": 0.5238108038902283,
	"learning_rate": 0.00019348600508905855,
	"loss": 0.5841,
	"step": 70
	},
	{
	"epoch": 0.17997465145754118,
	"grad_norm": 0.5322052240371704,
	"learning_rate": 0.00019338422391857506,
	"loss": 0.3895,
	"step": 71
	},
	{
	"epoch": 0.18250950570342206,
	"grad_norm": 0.4643409252166748,
	"learning_rate": 0.00019328244274809163,
	"loss": 0.364,
	"step": 72
	},
	{
	"epoch": 0.1850443599493029,
	"grad_norm": 0.36517271399497986,
	"learning_rate": 0.00019318066157760814,
	"loss": 0.4092,
	"step": 73
	},
	{
	"epoch": 0.18757921419518378,
	"grad_norm": 0.49409031867980957,
	"learning_rate": 0.00019307888040712468,
	"loss": 0.3359,
	"step": 74
	},
	{
	"epoch": 0.19011406844106463,
	"grad_norm": 0.44665688276290894,
	"learning_rate": 0.00019297709923664122,
	"loss": 0.3275,
	"step": 75
	},
	{
	"epoch": 0.1926489226869455,
	"grad_norm": 0.353208065032959,
	"learning_rate": 0.00019287531806615776,
	"loss": 0.3396,
	"step": 76
	},
	{
	"epoch": 0.19518377693282637,
	"grad_norm": 0.4061962366104126,
	"learning_rate": 0.0001927735368956743,
	"loss": 0.4658,
	"step": 77
	},
	{
	"epoch": 0.19771863117870722,
	"grad_norm": 0.4785591959953308,
	"learning_rate": 0.00019267175572519084,
	"loss": 0.4705,
	"step": 78
	},
	{
	"epoch": 0.2002534854245881,
	"grad_norm": 0.44644224643707275,
	"learning_rate": 0.00019256997455470738,
	"loss": 0.3573,
	"step": 79
	},
	{
	"epoch": 0.20278833967046894,
	"grad_norm": 0.4554955065250397,
	"learning_rate": 0.00019246819338422392,
	"loss": 0.3822,
	"step": 80
	},
	{
	"epoch": 0.20532319391634982,
	"grad_norm": 0.4537349343299866,
	"learning_rate": 0.00019236641221374049,
	"loss": 0.5222,
	"step": 81
	},
	{
	"epoch": 0.20785804816223066,
	"grad_norm": 0.32820987701416016,
	"learning_rate": 0.000192264631043257,
	"loss": 0.3185,
	"step": 82
	},
	{
	"epoch": 0.21039290240811154,
	"grad_norm": 0.39827391505241394,
	"learning_rate": 0.00019216284987277356,
	"loss": 0.3693,
	"step": 83
	},
	{
	"epoch": 0.21292775665399238,
	"grad_norm": 0.4188093841075897,
	"learning_rate": 0.00019206106870229008,
	"loss": 0.4168,
	"step": 84
	},
	{
	"epoch": 0.21546261089987326,
	"grad_norm": 0.4770517349243164,
	"learning_rate": 0.00019195928753180664,
	"loss": 0.4113,
	"step": 85
	},
	{
	"epoch": 0.21799746514575413,
	"grad_norm": 0.346224844455719,
	"learning_rate": 0.00019185750636132315,
	"loss": 0.4238,
	"step": 86
	},
	{
	"epoch": 0.22053231939163498,
	"grad_norm": 0.37398770451545715,
	"learning_rate": 0.00019175572519083972,
	"loss": 0.4285,
	"step": 87
	},
	{
	"epoch": 0.22306717363751585,
	"grad_norm": 0.35467982292175293,
	"learning_rate": 0.00019165394402035623,
	"loss": 0.3201,
	"step": 88
	},
	{
	"epoch": 0.2256020278833967,
	"grad_norm": 0.3411659002304077,
	"learning_rate": 0.00019155216284987277,
	"loss": 0.3428,
	"step": 89
	},
	{
	"epoch": 0.22813688212927757,
	"grad_norm": 0.4002087712287903,
	"learning_rate": 0.0001914503816793893,
	"loss": 0.5375,
	"step": 90
	},
	{
	"epoch": 0.23067173637515842,
	"grad_norm": 0.4339190423488617,
	"learning_rate": 0.00019134860050890585,
	"loss": 0.3355,
	"step": 91
	},
	{
	"epoch": 0.2332065906210393,
	"grad_norm": 0.43449410796165466,
	"learning_rate": 0.00019124681933842242,
	"loss": 0.4355,
	"step": 92
	},
	{
	"epoch": 0.23574144486692014,
	"grad_norm": 0.4565323293209076,
	"learning_rate": 0.00019114503816793893,
	"loss": 0.3178,
	"step": 93
	},
	{
	"epoch": 0.23827629911280102,
	"grad_norm": 0.46309894323349,
	"learning_rate": 0.0001910432569974555,
	"loss": 0.3308,
	"step": 94
	},
	{
	"epoch": 0.24081115335868186,
	"grad_norm": 0.3554096817970276,
	"learning_rate": 0.000190941475826972,
	"loss": 0.3358,
	"step": 95
	},
	{
	"epoch": 0.24334600760456274,
	"grad_norm": 0.39129987359046936,
	"learning_rate": 0.00019083969465648857,
	"loss": 0.3988,
	"step": 96
	},
	{
	"epoch": 0.2458808618504436,
	"grad_norm": 0.4193456470966339,
	"learning_rate": 0.0001907379134860051,
	"loss": 0.4064,
	"step": 97
	},
	{
	"epoch": 0.24841571609632446,
	"grad_norm": 0.39571425318717957,
	"learning_rate": 0.00019063613231552165,
	"loss": 0.3213,
	"step": 98
	},
	{
	"epoch": 0.2509505703422053,
	"grad_norm": 0.48566195368766785,
	"learning_rate": 0.00019053435114503817,
	"loss": 0.3505,
	"step": 99
	},
	{
	"epoch": 0.2534854245880862,
	"grad_norm": 0.43266433477401733,
	"learning_rate": 0.00019043256997455473,
	"loss": 0.3579,
	"step": 100
	},
	{
	"epoch": 0.25602027883396705,
	"grad_norm": 0.31110769510269165,
	"learning_rate": 0.00019033078880407124,
	"loss": 0.2832,
	"step": 101
	},
	{
	"epoch": 0.2585551330798479,
	"grad_norm": 0.40166690945625305,
	"learning_rate": 0.00019022900763358778,
	"loss": 0.2964,
	"step": 102
	},
	{
	"epoch": 0.26108998732572875,
	"grad_norm": 0.554072380065918,
	"learning_rate": 0.00019012722646310432,
	"loss": 0.3661,
	"step": 103
	},
	{
	"epoch": 0.26362484157160965,
	"grad_norm": 0.45009374618530273,
	"learning_rate": 0.00019002544529262086,
	"loss": 0.3812,
	"step": 104
	},
	{
	"epoch": 0.2661596958174905,
	"grad_norm": 0.48349273204803467,
	"learning_rate": 0.00018992366412213743,
	"loss": 0.4183,
	"step": 105
	},
	{
	"epoch": 0.26869455006337134,
	"grad_norm": 0.4157555103302002,
	"learning_rate": 0.00018982188295165394,
	"loss": 0.2962,
	"step": 106
	},
	{
	"epoch": 0.27122940430925224,
	"grad_norm": 0.3300265073776245,
	"learning_rate": 0.0001897201017811705,
	"loss": 0.3351,
	"step": 107
	},
	{
	"epoch": 0.2737642585551331,
	"grad_norm": 0.3690893054008484,
	"learning_rate": 0.00018961832061068702,
	"loss": 0.3251,
	"step": 108
	},
	{
	"epoch": 0.27629911280101394,
	"grad_norm": 0.49013710021972656,
	"learning_rate": 0.00018951653944020359,
	"loss": 0.4757,
	"step": 109
	},
	{
	"epoch": 0.2788339670468948,
	"grad_norm": 0.4416143000125885,
	"learning_rate": 0.0001894147582697201,
	"loss": 0.4421,
	"step": 110
	},
	{
	"epoch": 0.2813688212927757,
	"grad_norm": 0.3613321781158447,
	"learning_rate": 0.00018931297709923666,
	"loss": 0.3475,
	"step": 111
	},
	{
	"epoch": 0.28390367553865653,
	"grad_norm": 0.45548489689826965,
	"learning_rate": 0.00018921119592875318,
	"loss": 0.3587,
	"step": 112
	},
	{
	"epoch": 0.2864385297845374,
	"grad_norm": 0.49439120292663574,
	"learning_rate": 0.00018910941475826974,
	"loss": 0.4017,
	"step": 113
	},
	{
	"epoch": 0.2889733840304182,
	"grad_norm": 0.35214680433273315,
	"learning_rate": 0.00018900763358778626,
	"loss": 0.2645,
	"step": 114
	},
	{
	"epoch": 0.2915082382762991,
	"grad_norm": 0.5512099266052246,
	"learning_rate": 0.00018890585241730282,
	"loss": 0.3736,
	"step": 115
	},
	{
	"epoch": 0.29404309252217997,
	"grad_norm": 0.4146886467933655,
	"learning_rate": 0.00018880407124681936,
	"loss": 0.3361,
	"step": 116
	},
	{
	"epoch": 0.2965779467680608,
	"grad_norm": 0.42954355478286743,
	"learning_rate": 0.00018870229007633587,
	"loss": 0.3841,
	"step": 117
	},
	{
	"epoch": 0.2991128010139417,
	"grad_norm": 0.47189798951148987,
	"learning_rate": 0.00018860050890585244,
	"loss": 0.3591,
	"step": 118
	},
	{
	"epoch": 0.30164765525982257,
	"grad_norm": 0.5082337260246277,
	"learning_rate": 0.00018849872773536895,
	"loss": 0.4249,
	"step": 119
	},
	{
	"epoch": 0.3041825095057034,
	"grad_norm": 0.4005051255226135,
	"learning_rate": 0.00018839694656488552,
	"loss": 0.4433,
	"step": 120
	},
	{
	"epoch": 0.30671736375158426,
	"grad_norm": 0.4730987250804901,
	"learning_rate": 0.00018829516539440203,
	"loss": 0.3575,
	"step": 121
	},
	{
	"epoch": 0.30925221799746516,
	"grad_norm": 0.5227373242378235,
	"learning_rate": 0.0001881933842239186,
	"loss": 0.3511,
	"step": 122
	},
	{
	"epoch": 0.311787072243346,
	"grad_norm": 0.3693684935569763,
	"learning_rate": 0.0001880916030534351,
	"loss": 0.3097,
	"step": 123
	},
	{
	"epoch": 0.31432192648922685,
	"grad_norm": 0.45321500301361084,
	"learning_rate": 0.00018798982188295168,
	"loss": 0.4464,
	"step": 124
	},
	{
	"epoch": 0.31685678073510776,
	"grad_norm": 0.3797638714313507,
	"learning_rate": 0.0001878880407124682,
	"loss": 0.328,
	"step": 125
	},
	{
	"epoch": 0.3193916349809886,
	"grad_norm": 0.3996891975402832,
	"learning_rate": 0.00018778625954198475,
	"loss": 0.28,
	"step": 126
	},
	{
	"epoch": 0.32192648922686945,
	"grad_norm": 0.3931027352809906,
	"learning_rate": 0.00018768447837150127,
	"loss": 0.2439,
	"step": 127
	},
	{
	"epoch": 0.3244613434727503,
	"grad_norm": 0.4259742200374603,
	"learning_rate": 0.00018758269720101783,
	"loss": 0.3068,
	"step": 128
	},
	{
	"epoch": 0.3269961977186312,
	"grad_norm": 0.4267159402370453,
	"learning_rate": 0.00018748091603053437,
	"loss": 0.3405,
	"step": 129
	},
	{
	"epoch": 0.32953105196451205,
	"grad_norm": 0.41900908946990967,
	"learning_rate": 0.0001873791348600509,
	"loss": 0.327,
	"step": 130
	},
	{
	"epoch": 0.3320659062103929,
	"grad_norm": 0.436499685049057,
	"learning_rate": 0.00018727735368956745,
	"loss": 0.5089,
	"step": 131
	},
	{
	"epoch": 0.33460076045627374,
	"grad_norm": 0.43961402773857117,
	"learning_rate": 0.00018717557251908396,
	"loss": 0.339,
	"step": 132
	},
	{
	"epoch": 0.33713561470215464,
	"grad_norm": 0.45645856857299805,
	"learning_rate": 0.00018707379134860053,
	"loss": 0.3738,
	"step": 133
	},
	{
	"epoch": 0.3396704689480355,
	"grad_norm": 0.36948803067207336,
	"learning_rate": 0.00018697201017811704,
	"loss": 0.2777,
	"step": 134
	},
	{
	"epoch": 0.34220532319391633,
	"grad_norm": 0.32040536403656006,
	"learning_rate": 0.0001868702290076336,
	"loss": 0.3679,
	"step": 135
	},
	{
	"epoch": 0.34474017743979724,
	"grad_norm": 0.37474381923675537,
	"learning_rate": 0.00018676844783715012,
	"loss": 0.4282,
	"step": 136
	},
	{
	"epoch": 0.3472750316856781,
	"grad_norm": 0.4243752360343933,
	"learning_rate": 0.0001866666666666667,
	"loss": 0.533,
	"step": 137
	},
	{
	"epoch": 0.34980988593155893,
	"grad_norm": 0.39162227511405945,
	"learning_rate": 0.0001865648854961832,
	"loss": 0.2989,
	"step": 138
	},
	{
	"epoch": 0.3523447401774398,
	"grad_norm": 0.3585897386074066,
	"learning_rate": 0.00018646310432569977,
	"loss": 0.3368,
	"step": 139
	},
	{
	"epoch": 0.3548795944233207,
	"grad_norm": 0.39330482482910156,
	"learning_rate": 0.00018636132315521628,
	"loss": 0.4904,
	"step": 140
	},
	{
	"epoch": 0.3574144486692015,
	"grad_norm": 0.3404198884963989,
	"learning_rate": 0.00018625954198473284,
	"loss": 0.2684,
	"step": 141
	},
	{
	"epoch": 0.35994930291508237,
	"grad_norm": 0.34813976287841797,
	"learning_rate": 0.00018615776081424938,
	"loss": 0.2988,
	"step": 142
	},
	{
	"epoch": 0.36248415716096327,
	"grad_norm": 0.4100090265274048,
	"learning_rate": 0.00018605597964376592,
	"loss": 0.3325,
	"step": 143
	},
	{
	"epoch": 0.3650190114068441,
	"grad_norm": 0.2897261083126068,
	"learning_rate": 0.00018595419847328246,
	"loss": 0.2487,
	"step": 144
	},
	{
	"epoch": 0.36755386565272496,
	"grad_norm": 0.43023669719696045,
	"learning_rate": 0.00018585241730279897,
	"loss": 0.4875,
	"step": 145
	},
	{
	"epoch": 0.3700887198986058,
	"grad_norm": 0.39708128571510315,
	"learning_rate": 0.00018575063613231554,
	"loss": 0.3742,
	"step": 146
	},
	{
	"epoch": 0.3726235741444867,
	"grad_norm": 0.4191845953464508,
	"learning_rate": 0.00018564885496183205,
	"loss": 0.3253,
	"step": 147
	},
	{
	"epoch": 0.37515842839036756,
	"grad_norm": 0.3373403549194336,
	"learning_rate": 0.00018554707379134862,
	"loss": 0.2636,
	"step": 148
	},
	{
	"epoch": 0.3776932826362484,
	"grad_norm": 0.3522009551525116,
	"learning_rate": 0.00018544529262086513,
	"loss": 0.2413,
	"step": 149
	},
	{
	"epoch": 0.38022813688212925,
	"grad_norm": 0.4140997529029846,
	"learning_rate": 0.0001853435114503817,
	"loss": 0.3663,
	"step": 150
	},
	{
	"epoch": 0.38276299112801015,
	"grad_norm": 0.3986112177371979,
	"learning_rate": 0.0001852417302798982,
	"loss": 0.276,
	"step": 151
	},
	{
	"epoch": 0.385297845373891,
	"grad_norm": 0.46847087144851685,
	"learning_rate": 0.00018513994910941478,
	"loss": 0.3369,
	"step": 152
	},
	{
	"epoch": 0.38783269961977185,
	"grad_norm": 0.43623679876327515,
	"learning_rate": 0.00018503816793893132,
	"loss": 0.37,
	"step": 153
	},
	{
	"epoch": 0.39036755386565275,
	"grad_norm": 0.4128822684288025,
	"learning_rate": 0.00018493638676844785,
	"loss": 0.3763,
	"step": 154
	},
	{
	"epoch": 0.3929024081115336,
	"grad_norm": 0.3352810740470886,
	"learning_rate": 0.0001848346055979644,
	"loss": 0.2446,
	"step": 155
	},
	{
	"epoch": 0.39543726235741444,
	"grad_norm": 0.580634355545044,
	"learning_rate": 0.00018473282442748093,
	"loss": 0.3691,
	"step": 156
	},
	{
	"epoch": 0.3979721166032953,
	"grad_norm": 0.452499657869339,
	"learning_rate": 0.00018463104325699747,
	"loss": 0.4361,
	"step": 157
	},
	{
	"epoch": 0.4005069708491762,
	"grad_norm": 0.4160007834434509,
	"learning_rate": 0.000184529262086514,
	"loss": 0.4003,
	"step": 158
	},
	{
	"epoch": 0.40304182509505704,
	"grad_norm": 0.3049513101577759,
	"learning_rate": 0.00018442748091603055,
	"loss": 0.2167,
	"step": 159
	},
	{
	"epoch": 0.4055766793409379,
	"grad_norm": 0.38912078738212585,
	"learning_rate": 0.00018432569974554706,
	"loss": 0.2766,
	"step": 160
	},
	{
	"epoch": 0.40811153358681873,
	"grad_norm": 0.4433249831199646,
	"learning_rate": 0.00018422391857506363,
	"loss": 0.3331,
	"step": 161
	},
	{
	"epoch": 0.41064638783269963,
	"grad_norm": 0.36410561203956604,
	"learning_rate": 0.00018412213740458014,
	"loss": 0.2719,
	"step": 162
	},
	{
	"epoch": 0.4131812420785805,
	"grad_norm": 0.47044846415519714,
	"learning_rate": 0.0001840203562340967,
	"loss": 0.3602,
	"step": 163
	},
	{
	"epoch": 0.4157160963244613,
	"grad_norm": 0.38755008578300476,
	"learning_rate": 0.00018391857506361322,
	"loss": 0.2815,
	"step": 164
	},
	{
	"epoch": 0.41825095057034223,
	"grad_norm": 0.39241930842399597,
	"learning_rate": 0.0001838167938931298,
	"loss": 0.3642,
	"step": 165
	},
	{
	"epoch": 0.4207858048162231,
	"grad_norm": 0.37138187885284424,
	"learning_rate": 0.00018371501272264633,
	"loss": 0.267,
	"step": 166
	},
	{
	"epoch": 0.4233206590621039,
	"grad_norm": 0.4508083462715149,
	"learning_rate": 0.00018361323155216287,
	"loss": 0.4093,
	"step": 167
	},
	{
	"epoch": 0.42585551330798477,
	"grad_norm": 0.4390806257724762,
	"learning_rate": 0.0001835114503816794,
	"loss": 0.424,
	"step": 168
	},
	{
	"epoch": 0.42839036755386567,
	"grad_norm": 0.4640062153339386,
	"learning_rate": 0.00018340966921119594,
	"loss": 0.4065,
	"step": 169
	},
	{
	"epoch": 0.4309252217997465,
	"grad_norm": 0.37822040915489197,
	"learning_rate": 0.00018330788804071248,
	"loss": 0.2854,
	"step": 170
	},
	{
	"epoch": 0.43346007604562736,
	"grad_norm": 0.3658731281757355,
	"learning_rate": 0.00018320610687022902,
	"loss": 0.2826,
	"step": 171
	},
	{
	"epoch": 0.43599493029150826,
	"grad_norm": 0.4271928369998932,
	"learning_rate": 0.00018310432569974556,
	"loss": 0.4538,
	"step": 172
	},
	{
	"epoch": 0.4385297845373891,
	"grad_norm": 0.33550775051116943,
	"learning_rate": 0.00018300254452926207,
	"loss": 0.3015,
	"step": 173
	},
	{
	"epoch": 0.44106463878326996,
	"grad_norm": 0.5374005436897278,
	"learning_rate": 0.00018290076335877864,
	"loss": 0.2771,
	"step": 174
	},
	{
	"epoch": 0.4435994930291508,
	"grad_norm": 0.4630737602710724,
	"learning_rate": 0.00018279898218829515,
	"loss": 0.3786,
	"step": 175
	},
	{
	"epoch": 0.4461343472750317,
	"grad_norm": 0.4163656234741211,
	"learning_rate": 0.00018269720101781172,
	"loss": 0.3224,
	"step": 176
	},
	{
	"epoch": 0.44866920152091255,
	"grad_norm": 0.43972182273864746,
	"learning_rate": 0.00018259541984732826,
	"loss": 0.4192,
	"step": 177
	},
	{
	"epoch": 0.4512040557667934,
	"grad_norm": 0.4114130437374115,
	"learning_rate": 0.0001824936386768448,
	"loss": 0.2979,
	"step": 178
	},
	{
	"epoch": 0.45373891001267425,
	"grad_norm": 0.5002878308296204,
	"learning_rate": 0.00018239185750636134,
	"loss": 0.3339,
	"step": 179
	},
	{
	"epoch": 0.45627376425855515,
	"grad_norm": 0.42383208870887756,
	"learning_rate": 0.00018229007633587788,
	"loss": 0.2958,
	"step": 180
	},
	{
	"epoch": 0.458808618504436,
	"grad_norm": 0.3234981894493103,
	"learning_rate": 0.00018218829516539442,
	"loss": 0.2215,
	"step": 181
	},
	{
	"epoch": 0.46134347275031684,
	"grad_norm": 0.33356910943984985,
	"learning_rate": 0.00018208651399491096,
	"loss": 0.3017,
	"step": 182
	},
	{
	"epoch": 0.46387832699619774,
	"grad_norm": 0.442376047372818,
	"learning_rate": 0.0001819847328244275,
	"loss": 0.2751,
	"step": 183
	},
	{
	"epoch": 0.4664131812420786,
	"grad_norm": 0.4563845992088318,
	"learning_rate": 0.00018188295165394403,
	"loss": 0.3001,
	"step": 184
	},
	{
	"epoch": 0.46894803548795944,
	"grad_norm": 0.3957296907901764,
	"learning_rate": 0.00018178117048346057,
	"loss": 0.3864,
	"step": 185
	},
	{
	"epoch": 0.4714828897338403,
	"grad_norm": 0.32932132482528687,
	"learning_rate": 0.0001816793893129771,
	"loss": 0.2528,
	"step": 186
	},
	{
	"epoch": 0.4740177439797212,
	"grad_norm": 0.3960365951061249,
	"learning_rate": 0.00018157760814249365,
	"loss": 0.3975,
	"step": 187
	},
	{
	"epoch": 0.47655259822560203,
	"grad_norm": 0.38450995087623596,
	"learning_rate": 0.00018147582697201016,
	"loss": 0.2552,
	"step": 188
	},
	{
	"epoch": 0.4790874524714829,
	"grad_norm": 0.4259994626045227,
	"learning_rate": 0.00018137404580152673,
	"loss": 0.3,
	"step": 189
	},
	{
	"epoch": 0.4816223067173637,
	"grad_norm": 0.4965859055519104,
	"learning_rate": 0.00018127226463104327,
	"loss": 0.3099,
	"step": 190
	},
	{
	"epoch": 0.4841571609632446,
	"grad_norm": 0.38229548931121826,
	"learning_rate": 0.0001811704834605598,
	"loss": 0.3799,
	"step": 191
	},
	{
	"epoch": 0.4866920152091255,
	"grad_norm": 0.4622017741203308,
	"learning_rate": 0.00018106870229007635,
	"loss": 0.4815,
	"step": 192
	},
	{
	"epoch": 0.4892268694550063,
	"grad_norm": 0.3207991123199463,
	"learning_rate": 0.0001809669211195929,
	"loss": 0.2534,
	"step": 193
	},
	{
	"epoch": 0.4917617237008872,
	"grad_norm": 0.3322354555130005,
	"learning_rate": 0.00018086513994910943,
	"loss": 0.2331,
	"step": 194
	},
	{
	"epoch": 0.49429657794676807,
	"grad_norm": 0.35752132534980774,
	"learning_rate": 0.00018076335877862597,
	"loss": 0.3621,
	"step": 195
	},
	{
	"epoch": 0.4968314321926489,
	"grad_norm": 0.2801353633403778,
	"learning_rate": 0.0001806615776081425,
	"loss": 0.2198,
	"step": 196
	},
	{
	"epoch": 0.49936628643852976,
	"grad_norm": 0.5065000057220459,
	"learning_rate": 0.00018055979643765905,
	"loss": 0.3806,
	"step": 197
	},
	{
	"epoch": 0.5019011406844106,
	"grad_norm": 0.4308508336544037,
	"learning_rate": 0.00018045801526717558,
	"loss": 0.4028,
	"step": 198
	},
	{
	"epoch": 0.5044359949302915,
	"grad_norm": 0.5432320833206177,
	"learning_rate": 0.00018035623409669212,
	"loss": 0.506,
	"step": 199
	},
	{
	"epoch": 0.5069708491761724,
	"grad_norm": 0.37079155445098877,
	"learning_rate": 0.00018025445292620866,
	"loss": 0.2242,
	"step": 200
	},
	{
	"epoch": 0.5095057034220533,
	"grad_norm": 0.3533012568950653,
	"learning_rate": 0.00018015267175572518,
	"loss": 0.3462,
	"step": 201
	},
	{
	"epoch": 0.5120405576679341,
	"grad_norm": 0.37727662920951843,
	"learning_rate": 0.00018005089058524174,
	"loss": 0.2421,
	"step": 202
	},
	{
	"epoch": 0.514575411913815,
	"grad_norm": 0.42737269401550293,
	"learning_rate": 0.00017994910941475828,
	"loss": 0.3338,
	"step": 203
	},
	{
	"epoch": 0.5171102661596958,
	"grad_norm": 0.41085687279701233,
	"learning_rate": 0.00017984732824427482,
	"loss": 0.4233,
	"step": 204
	},
	{
	"epoch": 0.5196451204055766,
	"grad_norm": 0.4871644675731659,
	"learning_rate": 0.00017974554707379136,
	"loss": 0.3504,
	"step": 205
	},
	{
	"epoch": 0.5221799746514575,
	"grad_norm": 0.308347225189209,
	"learning_rate": 0.0001796437659033079,
	"loss": 0.27,
	"step": 206
	},
	{
	"epoch": 0.5247148288973384,
	"grad_norm": 0.31587716937065125,
	"learning_rate": 0.00017954198473282444,
	"loss": 0.3161,
	"step": 207
	},
	{
	"epoch": 0.5272496831432193,
	"grad_norm": 0.471392959356308,
	"learning_rate": 0.00017944020356234098,
	"loss": 0.3758,
	"step": 208
	},
	{
	"epoch": 0.5297845373891001,
	"grad_norm": 0.33414778113365173,
	"learning_rate": 0.00017933842239185752,
	"loss": 0.3095,
	"step": 209
	},
	{
	"epoch": 0.532319391634981,
	"grad_norm": 0.26553916931152344,
	"learning_rate": 0.00017923664122137406,
	"loss": 0.232,
	"step": 210
	},
	{
	"epoch": 0.5348542458808618,
	"grad_norm": 0.27914223074913025,
	"learning_rate": 0.0001791348600508906,
	"loss": 0.2438,
	"step": 211
	},
	{
	"epoch": 0.5373891001267427,
	"grad_norm": 0.36625003814697266,
	"learning_rate": 0.00017903307888040713,
	"loss": 0.2479,
	"step": 212
	},
	{
	"epoch": 0.5399239543726235,
	"grad_norm": 0.3876325488090515,
	"learning_rate": 0.00017893129770992367,
	"loss": 0.3428,
	"step": 213
	},
	{
	"epoch": 0.5424588086185045,
	"grad_norm": 0.5402606129646301,
	"learning_rate": 0.0001788295165394402,
	"loss": 0.394,
	"step": 214
	},
	{
	"epoch": 0.5449936628643853,
	"grad_norm": 0.4023256301879883,
	"learning_rate": 0.00017872773536895675,
	"loss": 0.3348,
	"step": 215
	},
	{
	"epoch": 0.5475285171102662,
	"grad_norm": 0.4440263509750366,
	"learning_rate": 0.0001786259541984733,
	"loss": 0.3001,
	"step": 216
	},
	{
	"epoch": 0.550063371356147,
	"grad_norm": 0.39178457856178284,
	"learning_rate": 0.00017852417302798983,
	"loss": 0.2561,
	"step": 217
	},
	{
	"epoch": 0.5525982256020279,
	"grad_norm": 0.5261508226394653,
	"learning_rate": 0.00017842239185750637,
	"loss": 0.4583,
	"step": 218
	},
	{
	"epoch": 0.5551330798479087,
	"grad_norm": 0.3981377184391022,
	"learning_rate": 0.0001783206106870229,
	"loss": 0.265,
	"step": 219
	},
	{
	"epoch": 0.5576679340937896,
	"grad_norm": 0.3689790666103363,
	"learning_rate": 0.00017821882951653945,
	"loss": 0.3965,
	"step": 220
	},
	{
	"epoch": 0.5602027883396705,
	"grad_norm": 0.38442498445510864,
	"learning_rate": 0.000178117048346056,
	"loss": 0.268,
	"step": 221
	},
	{
	"epoch": 0.5627376425855514,
	"grad_norm": 0.3051845133304596,
	"learning_rate": 0.00017801526717557253,
	"loss": 0.2362,
	"step": 222
	},
	{
	"epoch": 0.5652724968314322,
	"grad_norm": 0.41551336646080017,
	"learning_rate": 0.00017791348600508907,
	"loss": 0.3428,
	"step": 223
	},
	{
	"epoch": 0.5678073510773131,
	"grad_norm": 0.2885109484195709,
	"learning_rate": 0.0001778117048346056,
	"loss": 0.2328,
	"step": 224
	},
	{
	"epoch": 0.5703422053231939,
	"grad_norm": 0.48813045024871826,
	"learning_rate": 0.00017770992366412215,
	"loss": 0.3502,
	"step": 225
	},
	{
	"epoch": 0.5728770595690748,
	"grad_norm": 0.4413661062717438,
	"learning_rate": 0.00017760814249363869,
	"loss": 0.2687,
	"step": 226
	},
	{
	"epoch": 0.5754119138149556,
	"grad_norm": 0.422799289226532,
	"learning_rate": 0.00017750636132315522,
	"loss": 0.4776,
	"step": 227
	},
	{
	"epoch": 0.5779467680608364,
	"grad_norm": 0.39486098289489746,
	"learning_rate": 0.00017740458015267176,
	"loss": 0.3551,
	"step": 228
	},
	{
	"epoch": 0.5804816223067174,
	"grad_norm": 0.366207480430603,
	"learning_rate": 0.0001773027989821883,
	"loss": 0.2639,
	"step": 229
	},
	{
	"epoch": 0.5830164765525983,
	"grad_norm": 0.334626704454422,
	"learning_rate": 0.00017720101781170484,
	"loss": 0.2407,
	"step": 230
	},
	{
	"epoch": 0.5855513307984791,
	"grad_norm": 0.5580838918685913,
	"learning_rate": 0.00017709923664122138,
	"loss": 0.3856,
	"step": 231
	},
	{
	"epoch": 0.5880861850443599,
	"grad_norm": 0.3495747148990631,
	"learning_rate": 0.00017699745547073792,
	"loss": 0.3113,
	"step": 232
	},
	{
	"epoch": 0.5906210392902408,
	"grad_norm": 0.38515543937683105,
	"learning_rate": 0.00017689567430025446,
	"loss": 0.3765,
	"step": 233
	},
	{
	"epoch": 0.5931558935361216,
	"grad_norm": 0.43240851163864136,
	"learning_rate": 0.000176793893129771,
	"loss": 0.3094,
	"step": 234
	},
	{
	"epoch": 0.5956907477820025,
	"grad_norm": 0.42353445291519165,
	"learning_rate": 0.00017669211195928754,
	"loss": 0.2992,
	"step": 235
	},
	{
	"epoch": 0.5982256020278834,
	"grad_norm": 0.42463192343711853,
	"learning_rate": 0.00017659033078880408,
	"loss": 0.2486,
	"step": 236
	},
	{
	"epoch": 0.6007604562737643,
	"grad_norm": 0.4749039113521576,
	"learning_rate": 0.00017648854961832062,
	"loss": 0.3742,
	"step": 237
	},
	{
	"epoch": 0.6032953105196451,
	"grad_norm": 0.5651363730430603,
	"learning_rate": 0.00017638676844783716,
	"loss": 0.3079,
	"step": 238
	},
	{
	"epoch": 0.605830164765526,
	"grad_norm": 0.34195011854171753,
	"learning_rate": 0.0001762849872773537,
	"loss": 0.3236,
	"step": 239
	},
	{
	"epoch": 0.6083650190114068,
	"grad_norm": 0.5522583723068237,
	"learning_rate": 0.00017618320610687024,
	"loss": 0.3026,
	"step": 240
	},
	{
	"epoch": 0.6108998732572877,
	"grad_norm": 0.41445448994636536,
	"learning_rate": 0.00017608142493638677,
	"loss": 0.32,
	"step": 241
	},
	{
	"epoch": 0.6134347275031685,
	"grad_norm": 0.5023159384727478,
	"learning_rate": 0.00017597964376590331,
	"loss": 0.2658,
	"step": 242
	},
	{
	"epoch": 0.6159695817490495,
	"grad_norm": 0.39539164304733276,
	"learning_rate": 0.00017587786259541985,
	"loss": 0.2687,
	"step": 243
	},
	{
	"epoch": 0.6185044359949303,
	"grad_norm": 0.3105890154838562,
	"learning_rate": 0.0001757760814249364,
	"loss": 0.2224,
	"step": 244
	},
	{
	"epoch": 0.6210392902408112,
	"grad_norm": 0.3665928840637207,
	"learning_rate": 0.00017567430025445293,
	"loss": 0.3101,
	"step": 245
	},
	{
	"epoch": 0.623574144486692,
	"grad_norm": 0.28569111227989197,
	"learning_rate": 0.00017557251908396947,
	"loss": 0.2316,
	"step": 246
	},
	{
	"epoch": 0.6261089987325729,
	"grad_norm": 0.24598725140094757,
	"learning_rate": 0.000175470737913486,
	"loss": 0.2314,
	"step": 247
	},
	{
	"epoch": 0.6286438529784537,
	"grad_norm": 0.4301004111766815,
	"learning_rate": 0.00017536895674300255,
	"loss": 0.2606,
	"step": 248
	},
	{
	"epoch": 0.6311787072243346,
	"grad_norm": 0.36598455905914307,
	"learning_rate": 0.0001752671755725191,
	"loss": 0.2243,
	"step": 249
	},
	{
	"epoch": 0.6337135614702155,
	"grad_norm": 0.31714677810668945,
	"learning_rate": 0.00017516539440203563,
	"loss": 0.2561,
	"step": 250
	},
	{
	"epoch": 0.6362484157160964,
	"grad_norm": 0.5131182670593262,
	"learning_rate": 0.0001750636132315522,
	"loss": 0.3216,
	"step": 251
	},
	{
	"epoch": 0.6387832699619772,
	"grad_norm": 0.4067549407482147,
	"learning_rate": 0.0001749618320610687,
	"loss": 0.3032,
	"step": 252
	},
	{
	"epoch": 0.641318124207858,
	"grad_norm": 0.6457440853118896,
	"learning_rate": 0.00017486005089058525,
	"loss": 0.349,
	"step": 253
	},
	{
	"epoch": 0.6438529784537389,
	"grad_norm": 0.3759848177433014,
	"learning_rate": 0.00017475826972010179,
	"loss": 0.2974,
	"step": 254
	},
	{
	"epoch": 0.6463878326996197,
	"grad_norm": 0.40348076820373535,
	"learning_rate": 0.00017465648854961833,
	"loss": 0.2781,
	"step": 255
	},
	{
	"epoch": 0.6489226869455006,
	"grad_norm": 0.2639053463935852,
	"learning_rate": 0.00017455470737913486,
	"loss": 0.2413,
	"step": 256
	},
	{
	"epoch": 0.6514575411913816,
	"grad_norm": 0.4014027416706085,
	"learning_rate": 0.0001744529262086514,
	"loss": 0.2878,
	"step": 257
	},
	{
	"epoch": 0.6539923954372624,
	"grad_norm": 0.4871384799480438,
	"learning_rate": 0.00017435114503816794,
	"loss": 0.2527,
	"step": 258
	},
	{
	"epoch": 0.6565272496831432,
	"grad_norm": 0.28687578439712524,
	"learning_rate": 0.00017424936386768448,
	"loss": 0.2233,
	"step": 259
	},
	{
	"epoch": 0.6590621039290241,
	"grad_norm": 0.36948761343955994,
	"learning_rate": 0.00017414758269720102,
	"loss": 0.3007,
	"step": 260
	},
	{
	"epoch": 0.6615969581749049,
	"grad_norm": 0.6034134030342102,
	"learning_rate": 0.00017404580152671756,
	"loss": 0.3054,
	"step": 261
	},
	{
	"epoch": 0.6641318124207858,
	"grad_norm": 0.3481515645980835,
	"learning_rate": 0.0001739440203562341,
	"loss": 0.2388,
	"step": 262
	},
	{
	"epoch": 0.6666666666666666,
	"grad_norm": 0.3772611916065216,
	"learning_rate": 0.00017384223918575064,
	"loss": 0.317,
	"step": 263
	},
	{
	"epoch": 0.6692015209125475,
	"grad_norm": 0.4693986177444458,
	"learning_rate": 0.0001737404580152672,
	"loss": 0.3441,
	"step": 264
	},
	{
	"epoch": 0.6717363751584284,
	"grad_norm": 0.38484400510787964,
	"learning_rate": 0.00017363867684478372,
	"loss": 0.2637,
	"step": 265
	},
	{
	"epoch": 0.6742712294043093,
	"grad_norm": 0.3638555407524109,
	"learning_rate": 0.00017353689567430026,
	"loss": 0.2695,
	"step": 266
	},
	{
	"epoch": 0.6768060836501901,
	"grad_norm": 0.36848586797714233,
	"learning_rate": 0.0001734351145038168,
	"loss": 0.3149,
	"step": 267
	},
	{
	"epoch": 0.679340937896071,
	"grad_norm": 0.31740638613700867,
	"learning_rate": 0.00017333333333333334,
	"loss": 0.3049,
	"step": 268
	},
	{
	"epoch": 0.6818757921419518,
	"grad_norm": 0.41415438055992126,
	"learning_rate": 0.00017323155216284988,
	"loss": 0.231,
	"step": 269
	},
	{
	"epoch": 0.6844106463878327,
	"grad_norm": 0.41449829936027527,
	"learning_rate": 0.00017312977099236641,
	"loss": 0.3344,
	"step": 270
	},
	{
	"epoch": 0.6869455006337135,
	"grad_norm": 0.30683189630508423,
	"learning_rate": 0.00017302798982188295,
	"loss": 0.283,
	"step": 271
	},
	{
	"epoch": 0.6894803548795945,
	"grad_norm": 0.29896244406700134,
	"learning_rate": 0.0001729262086513995,
	"loss": 0.2363,
	"step": 272
	},
	{
	"epoch": 0.6920152091254753,
	"grad_norm": 0.44181492924690247,
	"learning_rate": 0.00017282442748091603,
	"loss": 0.3439,
	"step": 273
	},
	{
	"epoch": 0.6945500633713562,
	"grad_norm": 0.43460434675216675,
	"learning_rate": 0.00017272264631043257,
	"loss": 0.3004,
	"step": 274
	},
	{
	"epoch": 0.697084917617237,
	"grad_norm": 0.40781405568122864,
	"learning_rate": 0.00017262086513994914,
	"loss": 0.2554,
	"step": 275
	},
	{
	"epoch": 0.6996197718631179,
	"grad_norm": 0.39359861612319946,
	"learning_rate": 0.00017251908396946565,
	"loss": 0.3094,
	"step": 276
	},
	{
	"epoch": 0.7021546261089987,
	"grad_norm": 0.4507496953010559,
	"learning_rate": 0.00017241730279898222,
	"loss": 0.2985,
	"step": 277
	},
	{
	"epoch": 0.7046894803548795,
	"grad_norm": 0.4513093829154968,
	"learning_rate": 0.00017231552162849873,
	"loss": 0.4,
	"step": 278
	},
	{
	"epoch": 0.7072243346007605,
	"grad_norm": 0.3133571147918701,
	"learning_rate": 0.0001722137404580153,
	"loss": 0.2241,
	"step": 279
	},
	{
	"epoch": 0.7097591888466414,
	"grad_norm": 0.36957162618637085,
	"learning_rate": 0.0001721119592875318,
	"loss": 0.2461,
	"step": 280
	},
	{
	"epoch": 0.7122940430925222,
	"grad_norm": 0.4224545955657959,
	"learning_rate": 0.00017201017811704835,
	"loss": 0.3178,
	"step": 281
	},
	{
	"epoch": 0.714828897338403,
	"grad_norm": 0.4696861207485199,
	"learning_rate": 0.0001719083969465649,
	"loss": 0.3911,
	"step": 282
	},
	{
	"epoch": 0.7173637515842839,
	"grad_norm": 0.44058746099472046,
	"learning_rate": 0.00017180661577608143,
	"loss": 0.3169,
	"step": 283
	},
	{
	"epoch": 0.7198986058301647,
	"grad_norm": 0.32616788148880005,
	"learning_rate": 0.00017170483460559797,
	"loss": 0.2441,
	"step": 284
	},
	{
	"epoch": 0.7224334600760456,
	"grad_norm": 0.3941279649734497,
	"learning_rate": 0.0001716030534351145,
	"loss": 0.3433,
	"step": 285
	},
	{
	"epoch": 0.7249683143219265,
	"grad_norm": 0.3746216297149658,
	"learning_rate": 0.00017150127226463104,
	"loss": 0.3993,
	"step": 286
	},
	{
	"epoch": 0.7275031685678074,
	"grad_norm": 0.3758716881275177,
	"learning_rate": 0.00017139949109414758,
	"loss": 0.3139,
	"step": 287
	},
	{
	"epoch": 0.7300380228136882,
	"grad_norm": 0.35631927847862244,
	"learning_rate": 0.00017129770992366415,
	"loss": 0.2316,
	"step": 288
	},
	{
	"epoch": 0.7325728770595691,
	"grad_norm": 0.48128026723861694,
	"learning_rate": 0.00017119592875318066,
	"loss": 0.3306,
	"step": 289
	},
	{
	"epoch": 0.7351077313054499,
	"grad_norm": 0.3464122414588928,
	"learning_rate": 0.00017109414758269723,
	"loss": 0.3148,
	"step": 290
	},
	{
	"epoch": 0.7376425855513308,
	"grad_norm": 0.3772057294845581,
	"learning_rate": 0.00017099236641221374,
	"loss": 0.274,
	"step": 291
	},
	{
	"epoch": 0.7401774397972116,
	"grad_norm": 0.2896706759929657,
	"learning_rate": 0.0001708905852417303,
	"loss": 0.2275,
	"step": 292
	},
	{
	"epoch": 0.7427122940430925,
	"grad_norm": 0.48482832312583923,
	"learning_rate": 0.00017078880407124682,
	"loss": 0.2913,
	"step": 293
	},
	{
	"epoch": 0.7452471482889734,
	"grad_norm": 0.3086034655570984,
	"learning_rate": 0.00017068702290076336,
	"loss": 0.2453,
	"step": 294
	},
	{
	"epoch": 0.7477820025348543,
	"grad_norm": 0.42840075492858887,
	"learning_rate": 0.0001705852417302799,
	"loss": 0.352,
	"step": 295
	},
	{
	"epoch": 0.7503168567807351,
	"grad_norm": 0.4574609398841858,
	"learning_rate": 0.00017048346055979644,
	"loss": 0.3698,
	"step": 296
	},
	{
	"epoch": 0.752851711026616,
	"grad_norm": 0.4295889735221863,
	"learning_rate": 0.00017038167938931298,
	"loss": 0.3341,
	"step": 297
	},
	{
	"epoch": 0.7553865652724968,
	"grad_norm": 0.46036672592163086,
	"learning_rate": 0.00017027989821882952,
	"loss": 0.3175,
	"step": 298
	},
	{
	"epoch": 0.7579214195183777,
	"grad_norm": 0.45897790789604187,
	"learning_rate": 0.00017017811704834608,
	"loss": 0.31,
	"step": 299
	},
	{
	"epoch": 0.7604562737642585,
	"grad_norm": 0.2966432273387909,
	"learning_rate": 0.0001700763358778626,
	"loss": 0.2439,
	"step": 300
	},
	{
	"epoch": 0.7629911280101395,
	"grad_norm": 0.32714638113975525,
	"learning_rate": 0.00016997455470737916,
	"loss": 0.2653,
	"step": 301
	},
	{
	"epoch": 0.7655259822560203,
	"grad_norm": 0.32264646887779236,
	"learning_rate": 0.00016987277353689567,
	"loss": 0.2728,
	"step": 302
	},
	{
	"epoch": 0.7680608365019012,
	"grad_norm": 0.4073767066001892,
	"learning_rate": 0.00016977099236641224,
	"loss": 0.3501,
	"step": 303
	},
	{
	"epoch": 0.770595690747782,
	"grad_norm": 0.5493949055671692,
	"learning_rate": 0.00016966921119592875,
	"loss": 0.3212,
	"step": 304
	},
	{
	"epoch": 0.7731305449936628,
	"grad_norm": 0.335705429315567,
	"learning_rate": 0.00016956743002544532,
	"loss": 0.299,
	"step": 305
	},
	{
	"epoch": 0.7756653992395437,
	"grad_norm": 0.32758405804634094,
	"learning_rate": 0.00016946564885496183,
	"loss": 0.2547,
	"step": 306
	},
	{
	"epoch": 0.7782002534854245,
	"grad_norm": 0.32411983609199524,
	"learning_rate": 0.0001693638676844784,
	"loss": 0.2593,
	"step": 307
	},
	{
	"epoch": 0.7807351077313055,
	"grad_norm": 0.5713444352149963,
	"learning_rate": 0.0001692620865139949,
	"loss": 0.3661,
	"step": 308
	},
	{
	"epoch": 0.7832699619771863,
	"grad_norm": 0.3287065327167511,
	"learning_rate": 0.00016916030534351145,
	"loss": 0.2559,
	"step": 309
	},
	{
	"epoch": 0.7858048162230672,
	"grad_norm": 0.3499440550804138,
	"learning_rate": 0.000169058524173028,
	"loss": 0.3489,
	"step": 310
	},
	{
	"epoch": 0.788339670468948,
	"grad_norm": 0.259787917137146,
	"learning_rate": 0.00016895674300254453,
	"loss": 0.2451,
	"step": 311
	},
	{
	"epoch": 0.7908745247148289,
	"grad_norm": 0.3902716338634491,
	"learning_rate": 0.0001688549618320611,
	"loss": 0.2821,
	"step": 312
	},
	{
	"epoch": 0.7934093789607097,
	"grad_norm": 0.4061296582221985,
	"learning_rate": 0.0001687531806615776,
	"loss": 0.4289,
	"step": 313
	},
	{
	"epoch": 0.7959442332065906,
	"grad_norm": 0.3062605857849121,
	"learning_rate": 0.00016865139949109417,
	"loss": 0.2489,
	"step": 314
	},
	{
	"epoch": 0.7984790874524715,
	"grad_norm": 0.36886945366859436,
	"learning_rate": 0.00016854961832061068,
	"loss": 0.4049,
	"step": 315
	},
	{
	"epoch": 0.8010139416983524,
	"grad_norm": 0.25828975439071655,
	"learning_rate": 0.00016844783715012725,
	"loss": 0.238,
	"step": 316
	},
	{
	"epoch": 0.8035487959442332,
	"grad_norm": 0.39747142791748047,
	"learning_rate": 0.00016834605597964376,
	"loss": 0.3928,
	"step": 317
	},
	{
	"epoch": 0.8060836501901141,
	"grad_norm": 0.3884779214859009,
	"learning_rate": 0.00016824427480916033,
	"loss": 0.2881,
	"step": 318
	},
	{
	"epoch": 0.8086185044359949,
	"grad_norm": 0.3687349855899811,
	"learning_rate": 0.00016814249363867684,
	"loss": 0.3662,
	"step": 319
	},
	{
	"epoch": 0.8111533586818758,
	"grad_norm": 0.3631541132926941,
	"learning_rate": 0.0001680407124681934,
	"loss": 0.2657,
	"step": 320
	},
	{
	"epoch": 0.8136882129277566,
	"grad_norm": 0.3174535930156708,
	"learning_rate": 0.00016793893129770992,
	"loss": 0.2636,
	"step": 321
	},
	{
	"epoch": 0.8162230671736375,
	"grad_norm": 0.44168904423713684,
	"learning_rate": 0.00016783715012722646,
	"loss": 0.2882,
	"step": 322
	},
	{
	"epoch": 0.8187579214195184,
	"grad_norm": 0.370685875415802,
	"learning_rate": 0.000167735368956743,
	"loss": 0.3228,
	"step": 323
	},
	{
	"epoch": 0.8212927756653993,
	"grad_norm": 0.3001299798488617,
	"learning_rate": 0.00016763358778625954,
	"loss": 0.2256,
	"step": 324
	},
	{
	"epoch": 0.8238276299112801,
	"grad_norm": 0.37992653250694275,
	"learning_rate": 0.0001675318066157761,
	"loss": 0.2633,
	"step": 325
	},
	{
	"epoch": 0.826362484157161,
	"grad_norm": 0.4739125072956085,
	"learning_rate": 0.00016743002544529262,
	"loss": 0.3044,
	"step": 326
	},
	{
	"epoch": 0.8288973384030418,
	"grad_norm": 0.36424344778060913,
	"learning_rate": 0.00016732824427480918,
	"loss": 0.3311,
	"step": 327
	},
	{
	"epoch": 0.8314321926489227,
	"grad_norm": 0.4474777579307556,
	"learning_rate": 0.0001672264631043257,
	"loss": 0.4099,
	"step": 328
	},
	{
	"epoch": 0.8339670468948035,
	"grad_norm": 0.4337301552295685,
	"learning_rate": 0.00016712468193384226,
	"loss": 0.3567,
	"step": 329
	},
	{
	"epoch": 0.8365019011406845,
	"grad_norm": 0.37666353583335876,
	"learning_rate": 0.00016702290076335877,
	"loss": 0.3079,
	"step": 330
	},
	{
	"epoch": 0.8390367553865653,
	"grad_norm": 0.36810433864593506,
	"learning_rate": 0.00016692111959287534,
	"loss": 0.414,
	"step": 331
	},
	{
	"epoch": 0.8415716096324461,
	"grad_norm": 0.3914581537246704,
	"learning_rate": 0.00016681933842239185,
	"loss": 0.2807,
	"step": 332
	},
	{
	"epoch": 0.844106463878327,
	"grad_norm": 0.3891938626766205,
	"learning_rate": 0.00016671755725190842,
	"loss": 0.3101,
	"step": 333
	},
	{
	"epoch": 0.8466413181242078,
	"grad_norm": 0.4397302269935608,
	"learning_rate": 0.00016661577608142493,
	"loss": 0.2659,
	"step": 334
	},
	{
	"epoch": 0.8491761723700887,
	"grad_norm": 0.3152853846549988,
	"learning_rate": 0.0001665139949109415,
	"loss": 0.308,
	"step": 335
	},
	{
	"epoch": 0.8517110266159695,
	"grad_norm": 0.2894272208213806,
	"learning_rate": 0.00016641221374045804,
	"loss": 0.2675,
	"step": 336
	},
	{
	"epoch": 0.8542458808618505,
	"grad_norm": 0.27995947003364563,
	"learning_rate": 0.00016631043256997455,
	"loss": 0.2603,
	"step": 337
	},
	{
	"epoch": 0.8567807351077313,
	"grad_norm": 0.42209070920944214,
	"learning_rate": 0.00016620865139949112,
	"loss": 0.3417,
	"step": 338
	},
	{
	"epoch": 0.8593155893536122,
	"grad_norm": 0.3781871795654297,
	"learning_rate": 0.00016610687022900763,
	"loss": 0.3441,
	"step": 339
	},
	{
	"epoch": 0.861850443599493,
	"grad_norm": 0.3438952565193176,
	"learning_rate": 0.0001660050890585242,
	"loss": 0.2249,
	"step": 340
	},
	{
	"epoch": 0.8643852978453739,
	"grad_norm": 0.32164961099624634,
	"learning_rate": 0.0001659033078880407,
	"loss": 0.2472,
	"step": 341
	},
	{
	"epoch": 0.8669201520912547,
	"grad_norm": 0.3517252504825592,
	"learning_rate": 0.00016580152671755727,
	"loss": 0.2434,
	"step": 342
	},
	{
	"epoch": 0.8694550063371356,
	"grad_norm": 0.29841092228889465,
	"learning_rate": 0.00016569974554707378,
	"loss": 0.2536,
	"step": 343
	},
	{
	"epoch": 0.8719898605830165,
	"grad_norm": 0.3351423144340515,
	"learning_rate": 0.00016559796437659035,
	"loss": 0.2501,
	"step": 344
	},
	{
	"epoch": 0.8745247148288974,
	"grad_norm": 0.3979301154613495,
	"learning_rate": 0.00016549618320610686,
	"loss": 0.2358,
	"step": 345
	},
	{
	"epoch": 0.8770595690747782,
	"grad_norm": 0.3859489858150482,
	"learning_rate": 0.00016539440203562343,
	"loss": 0.2675,
	"step": 346
	},
	{
	"epoch": 0.8795944233206591,
	"grad_norm": 0.3836475908756256,
	"learning_rate": 0.00016529262086513994,
	"loss": 0.2179,
	"step": 347
	},
	{
	"epoch": 0.8821292775665399,
	"grad_norm": 0.3986142575740814,
	"learning_rate": 0.0001651908396946565,
	"loss": 0.2599,
	"step": 348
	},
	{
	"epoch": 0.8846641318124208,
	"grad_norm": 0.4105628430843353,
	"learning_rate": 0.00016508905852417305,
	"loss": 0.242,
	"step": 349
	},
	{
	"epoch": 0.8871989860583016,
	"grad_norm": 0.34334608912467957,
	"learning_rate": 0.00016498727735368956,
	"loss": 0.2771,
	"step": 350
	},
	{
	"epoch": 0.8897338403041825,
	"grad_norm": 0.3412443995475769,
	"learning_rate": 0.00016488549618320613,
	"loss": 0.2289,
	"step": 351
	},
	{
	"epoch": 0.8922686945500634,
	"grad_norm": 0.3596668541431427,
	"learning_rate": 0.00016478371501272264,
	"loss": 0.2253,
	"step": 352
	},
	{
	"epoch": 0.8948035487959443,
	"grad_norm": 0.43112802505493164,
	"learning_rate": 0.0001646819338422392,
	"loss": 0.3116,
	"step": 353
	},
	{
	"epoch": 0.8973384030418251,
	"grad_norm": 0.4306243062019348,
	"learning_rate": 0.00016458015267175572,
	"loss": 0.3099,
	"step": 354
	},
	{
	"epoch": 0.899873257287706,
	"grad_norm": 0.2773829996585846,
	"learning_rate": 0.00016447837150127228,
	"loss": 0.2765,
	"step": 355
	},
	{
	"epoch": 0.9024081115335868,
	"grad_norm": 0.5014198422431946,
	"learning_rate": 0.0001643765903307888,
	"loss": 0.302,
	"step": 356
	},
	{
	"epoch": 0.9049429657794676,
	"grad_norm": 0.4376792013645172,
	"learning_rate": 0.00016427480916030536,
	"loss": 0.2967,
	"step": 357
	},
	{
	"epoch": 0.9074778200253485,
	"grad_norm": 0.34460946917533875,
	"learning_rate": 0.00016417302798982187,
	"loss": 0.3678,
	"step": 358
	},
	{
	"epoch": 0.9100126742712294,
	"grad_norm": 0.23346909880638123,
	"learning_rate": 0.00016407124681933844,
	"loss": 0.2409,
	"step": 359
	},
	{
	"epoch": 0.9125475285171103,
	"grad_norm": 0.35633108019828796,
	"learning_rate": 0.00016396946564885498,
	"loss": 0.3555,
	"step": 360
	},
	{
	"epoch": 0.9150823827629911,
	"grad_norm": 0.26780250668525696,
	"learning_rate": 0.00016386768447837152,
	"loss": 0.2543,
	"step": 361
	},
	{
	"epoch": 0.917617237008872,
	"grad_norm": 0.34583303332328796,
	"learning_rate": 0.00016376590330788806,
	"loss": 0.2444,
	"step": 362
	},
	{
	"epoch": 0.9201520912547528,
	"grad_norm": 0.38331279158592224,
	"learning_rate": 0.0001636641221374046,
	"loss": 0.3549,
	"step": 363
	},
	{
	"epoch": 0.9226869455006337,
	"grad_norm": 0.37290483713150024,
	"learning_rate": 0.00016356234096692114,
	"loss": 0.3311,
	"step": 364
	},
	{
	"epoch": 0.9252217997465145,
	"grad_norm": 0.406568318605423,
	"learning_rate": 0.00016346055979643765,
	"loss": 0.2774,
	"step": 365
	},
	{
	"epoch": 0.9277566539923955,
	"grad_norm": 0.35498303174972534,
	"learning_rate": 0.00016335877862595422,
	"loss": 0.2121,
	"step": 366
	},
	{
	"epoch": 0.9302915082382763,
	"grad_norm": 0.3682021498680115,
	"learning_rate": 0.00016325699745547073,
	"loss": 0.2648,
	"step": 367
	},
	{
	"epoch": 0.9328263624841572,
	"grad_norm": 0.37826359272003174,
	"learning_rate": 0.0001631552162849873,
	"loss": 0.2214,
	"step": 368
	},
	{
	"epoch": 0.935361216730038,
	"grad_norm": 0.4018029570579529,
	"learning_rate": 0.0001630534351145038,
	"loss": 0.2291,
	"step": 369
	},
	{
	"epoch": 0.9378960709759189,
	"grad_norm": 0.4628411531448364,
	"learning_rate": 0.00016295165394402037,
	"loss": 0.3486,
	"step": 370
	},
	{
	"epoch": 0.9404309252217997,
	"grad_norm": 0.5615106821060181,
	"learning_rate": 0.00016284987277353689,
	"loss": 0.3281,
	"step": 371
	},
	{
	"epoch": 0.9429657794676806,
	"grad_norm": 0.40337833762168884,
	"learning_rate": 0.00016274809160305345,
	"loss": 0.22,
	"step": 372
	},
	{
	"epoch": 0.9455006337135615,
	"grad_norm": 0.4247727692127228,
	"learning_rate": 0.00016264631043257,
	"loss": 0.2801,
	"step": 373
	},
	{
	"epoch": 0.9480354879594424,
	"grad_norm": 0.28746598958969116,
	"learning_rate": 0.00016254452926208653,
	"loss": 0.2349,
	"step": 374
	},
	{
	"epoch": 0.9505703422053232,
	"grad_norm": 0.3654968738555908,
	"learning_rate": 0.00016244274809160307,
	"loss": 0.2696,
	"step": 375
	},
	{
	"epoch": 0.9531051964512041,
	"grad_norm": 0.3999825417995453,
	"learning_rate": 0.0001623409669211196,
	"loss": 0.4228,
	"step": 376
	},
	{
	"epoch": 0.9556400506970849,
	"grad_norm": 0.3065613806247711,
	"learning_rate": 0.00016223918575063615,
	"loss": 0.2505,
	"step": 377
	},
	{
	"epoch": 0.9581749049429658,
	"grad_norm": 0.3503481149673462,
	"learning_rate": 0.0001621374045801527,
	"loss": 0.2953,
	"step": 378
	},
	{
	"epoch": 0.9607097591888466,
	"grad_norm": 0.28918176889419556,
	"learning_rate": 0.00016203562340966923,
	"loss": 0.2454,
	"step": 379
	},
	{
	"epoch": 0.9632446134347274,
	"grad_norm": 0.3047085404396057,
	"learning_rate": 0.00016193384223918574,
	"loss": 0.2639,
	"step": 380
	},
	{
	"epoch": 0.9657794676806084,
	"grad_norm": 0.3775922358036041,
	"learning_rate": 0.0001618320610687023,
	"loss": 0.3787,
	"step": 381
	},
	{
	"epoch": 0.9683143219264893,
	"grad_norm": 0.32147660851478577,
	"learning_rate": 0.00016173027989821882,
	"loss": 0.2273,
	"step": 382
	},
	{
	"epoch": 0.9708491761723701,
	"grad_norm": 0.355747252702713,
	"learning_rate": 0.00016162849872773538,
	"loss": 0.2805,
	"step": 383
	},
	{
	"epoch": 0.973384030418251,
	"grad_norm": 0.2670198082923889,
	"learning_rate": 0.0001615267175572519,
	"loss": 0.2393,
	"step": 384
	},
	{
	"epoch": 0.9759188846641318,
	"grad_norm": 0.3395114839076996,
	"learning_rate": 0.00016142493638676846,
	"loss": 0.2893,
	"step": 385
	},
	{
	"epoch": 0.9784537389100126,
	"grad_norm": 0.3189052641391754,
	"learning_rate": 0.000161323155216285,
	"loss": 0.2442,
	"step": 386
	},
	{
	"epoch": 0.9809885931558935,
	"grad_norm": 0.49379605054855347,
	"learning_rate": 0.00016122137404580154,
	"loss": 0.3126,
	"step": 387
	},
	{
	"epoch": 0.9835234474017744,
	"grad_norm": 0.2787371575832367,
	"learning_rate": 0.00016111959287531808,
	"loss": 0.2329,
	"step": 388
	},
	{
	"epoch": 0.9860583016476553,
	"grad_norm": 0.3559485673904419,
	"learning_rate": 0.00016101781170483462,
	"loss": 0.335,
	"step": 389
	},
	{
	"epoch": 0.9885931558935361,
	"grad_norm": 0.43041396141052246,
	"learning_rate": 0.00016091603053435116,
	"loss": 0.3069,
	"step": 390
	},
	{
	"epoch": 0.991128010139417,
	"grad_norm": 0.3231935203075409,
	"learning_rate": 0.0001608142493638677,
	"loss": 0.2354,
	"step": 391
	},
	{
	"epoch": 0.9936628643852978,
	"grad_norm": 0.3676549792289734,
	"learning_rate": 0.00016071246819338424,
	"loss": 0.2958,
	"step": 392
	},
	{
	"epoch": 0.9961977186311787,
	"grad_norm": 0.37902191281318665,
	"learning_rate": 0.00016061068702290075,
	"loss": 0.2792,
	"step": 393
	},
	{
	"epoch": 0.9987325728770595,
	"grad_norm": 0.47126442193984985,
	"learning_rate": 0.00016050890585241732,
	"loss": 0.4871,
	"step": 394
	},
	{
	"epoch": 1.0,
	"grad_norm": 0.4303727447986603,
	"learning_rate": 0.00016040712468193383,
	"loss": 0.2121,
	"step": 395
	},
	{
	"epoch": 1.002534854245881,
	"grad_norm": 0.3156070411205292,
	"learning_rate": 0.0001603053435114504,
	"loss": 0.2528,
	"step": 396
	},
	{
	"epoch": 1.0050697084917617,
	"grad_norm": 0.3030865788459778,
	"learning_rate": 0.00016020356234096693,
	"loss": 0.2029,
	"step": 397
	},
	{
	"epoch": 1.0076045627376427,
	"grad_norm": 0.2900277376174927,
	"learning_rate": 0.00016010178117048347,
	"loss": 0.2192,
	"step": 398
	},
	{
	"epoch": 1.0101394169835234,
	"grad_norm": 0.4288582503795624,
	"learning_rate": 0.00016,
	"loss": 0.308,
	"step": 399
	},
	{
	"epoch": 1.0126742712294043,
	"grad_norm": 0.3376273214817047,
	"learning_rate": 0.00015989821882951655,
	"loss": 0.2569,
	"step": 400
	},
	{
	"epoch": 1.015209125475285,
	"grad_norm": 0.39375385642051697,
	"learning_rate": 0.0001597964376590331,
	"loss": 0.2104,
	"step": 401
	},
	{
	"epoch": 1.017743979721166,
	"grad_norm": 0.2907378077507019,
	"learning_rate": 0.00015969465648854963,
	"loss": 0.2057,
	"step": 402
	},
	{
	"epoch": 1.020278833967047,
	"grad_norm": 0.3524622917175293,
	"learning_rate": 0.00015959287531806617,
	"loss": 0.2296,
	"step": 403
	},
	{
	"epoch": 1.0228136882129277,
	"grad_norm": 0.36487293243408203,
	"learning_rate": 0.0001594910941475827,
	"loss": 0.2133,
	"step": 404
	},
	{
	"epoch": 1.0253485424588087,
	"grad_norm": 0.4489257335662842,
	"learning_rate": 0.00015938931297709925,
	"loss": 0.2162,
	"step": 405
	},
	{
	"epoch": 1.0278833967046894,
	"grad_norm": 0.41142696142196655,
	"learning_rate": 0.0001592875318066158,
	"loss": 0.2383,
	"step": 406
	},
	{
	"epoch": 1.0304182509505704,
	"grad_norm": 0.3364538848400116,
	"learning_rate": 0.00015918575063613233,
	"loss": 0.2077,
	"step": 407
	},
	{
	"epoch": 1.0329531051964511,
	"grad_norm": 0.576775312423706,
	"learning_rate": 0.00015908396946564884,
	"loss": 0.2435,
	"step": 408
	},
	{
	"epoch": 1.035487959442332,
	"grad_norm": 0.6190880537033081,
	"learning_rate": 0.0001589821882951654,
	"loss": 0.252,
	"step": 409
	},
	{
	"epoch": 1.038022813688213,
	"grad_norm": 0.4943700432777405,
	"learning_rate": 0.00015888040712468195,
	"loss": 0.3275,
	"step": 410
	},
	{
	"epoch": 1.0405576679340938,
	"grad_norm": 0.3160712420940399,
	"learning_rate": 0.00015877862595419848,
	"loss": 0.217,
	"step": 411
	},
	{
	"epoch": 1.0430925221799747,
	"grad_norm": 0.34546172618865967,
	"learning_rate": 0.00015867684478371502,
	"loss": 0.2509,
	"step": 412
	},
	{
	"epoch": 1.0456273764258555,
	"grad_norm": 0.3498256802558899,
	"learning_rate": 0.00015857506361323156,
	"loss": 0.2376,
	"step": 413
	},
	{
	"epoch": 1.0481622306717364,
	"grad_norm": 0.29526984691619873,
	"learning_rate": 0.0001584732824427481,
	"loss": 0.2305,
	"step": 414
	},
	{
	"epoch": 1.0506970849176172,
	"grad_norm": 0.30113956332206726,
	"learning_rate": 0.00015837150127226464,
	"loss": 0.2205,
	"step": 415
	},
	{
	"epoch": 1.053231939163498,
	"grad_norm": 0.4007863402366638,
	"learning_rate": 0.00015826972010178118,
	"loss": 0.2407,
	"step": 416
	},
	{
	"epoch": 1.055766793409379,
	"grad_norm": 0.2594064176082611,
	"learning_rate": 0.00015816793893129772,
	"loss": 0.1923,
	"step": 417
	},
	{
	"epoch": 1.0583016476552598,
	"grad_norm": 0.23412476480007172,
	"learning_rate": 0.00015806615776081426,
	"loss": 0.2158,
	"step": 418
	},
	{
	"epoch": 1.0608365019011408,
	"grad_norm": 0.397443562746048,
	"learning_rate": 0.0001579643765903308,
	"loss": 0.3666,
	"step": 419
	},
	{
	"epoch": 1.0633713561470215,
	"grad_norm": 0.3756926655769348,
	"learning_rate": 0.00015786259541984734,
	"loss": 0.2081,
	"step": 420
	},
	{
	"epoch": 1.0659062103929025,
	"grad_norm": 0.5698515772819519,
	"learning_rate": 0.00015776081424936388,
	"loss": 0.2265,
	"step": 421
	},
	{
	"epoch": 1.0684410646387832,
	"grad_norm": 0.3608737289905548,
	"learning_rate": 0.00015765903307888042,
	"loss": 0.3821,
	"step": 422
	},
	{
	"epoch": 1.0709759188846641,
	"grad_norm": 0.4109106957912445,
	"learning_rate": 0.00015755725190839696,
	"loss": 0.3484,
	"step": 423
	},
	{
	"epoch": 1.073510773130545,
	"grad_norm": 0.38270992040634155,
	"learning_rate": 0.0001574554707379135,
	"loss": 0.2365,
	"step": 424
	},
	{
	"epoch": 1.0760456273764258,
	"grad_norm": 0.2857488989830017,
	"learning_rate": 0.00015735368956743004,
	"loss": 0.263,
	"step": 425
	},
	{
	"epoch": 1.0785804816223068,
	"grad_norm": 0.25236523151397705,
	"learning_rate": 0.00015725190839694657,
	"loss": 0.2216,
	"step": 426
	},
	{
	"epoch": 1.0811153358681875,
	"grad_norm": 0.40370991826057434,
	"learning_rate": 0.00015715012722646311,
	"loss": 0.3711,
	"step": 427
	},
	{
	"epoch": 1.0836501901140685,
	"grad_norm": 0.2624306380748749,
	"learning_rate": 0.00015704834605597965,
	"loss": 0.2082,
	"step": 428
	},
	{
	"epoch": 1.0861850443599492,
	"grad_norm": 0.4375905692577362,
	"learning_rate": 0.0001569465648854962,
	"loss": 0.3474,
	"step": 429
	},
	{
	"epoch": 1.0887198986058302,
	"grad_norm": 0.3287188410758972,
	"learning_rate": 0.00015684478371501273,
	"loss": 0.3097,
	"step": 430
	},
	{
	"epoch": 1.091254752851711,
	"grad_norm": 0.2669587731361389,
	"learning_rate": 0.00015674300254452927,
	"loss": 0.229,
	"step": 431
	},
	{
	"epoch": 1.0937896070975919,
	"grad_norm": 0.28192129731178284,
	"learning_rate": 0.0001566412213740458,
	"loss": 0.2226,
	"step": 432
	},
	{
	"epoch": 1.0963244613434728,
	"grad_norm": 0.30673590302467346,
	"learning_rate": 0.00015653944020356235,
	"loss": 0.2331,
	"step": 433
	},
	{
	"epoch": 1.0988593155893536,
	"grad_norm": 0.34343135356903076,
	"learning_rate": 0.0001564376590330789,
	"loss": 0.2567,
	"step": 434
	},
	{
	"epoch": 1.1013941698352345,
	"grad_norm": 0.4853306710720062,
	"learning_rate": 0.00015633587786259543,
	"loss": 0.3688,
	"step": 435
	},
	{
	"epoch": 1.1039290240811153,
	"grad_norm": 0.42215099930763245,
	"learning_rate": 0.00015623409669211197,
	"loss": 0.3465,
	"step": 436
	},
	{
	"epoch": 1.1064638783269962,
	"grad_norm": 0.5882295370101929,
	"learning_rate": 0.0001561323155216285,
	"loss": 0.4502,
	"step": 437
	},
	{
	"epoch": 1.1089987325728772,
	"grad_norm": 0.44578316807746887,
	"learning_rate": 0.00015603053435114505,
	"loss": 0.3345,
	"step": 438
	},
	{
	"epoch": 1.111533586818758,
	"grad_norm": 0.366653174161911,
	"learning_rate": 0.00015592875318066159,
	"loss": 0.2111,
	"step": 439
	},
	{
	"epoch": 1.1140684410646389,
	"grad_norm": 0.4964495003223419,
	"learning_rate": 0.00015582697201017812,
	"loss": 0.2731,
	"step": 440
	},
	{
	"epoch": 1.1166032953105196,
	"grad_norm": 0.3171039819717407,
	"learning_rate": 0.00015572519083969466,
	"loss": 0.2148,
	"step": 441
	},
	{
	"epoch": 1.1191381495564006,
	"grad_norm": 0.3483026921749115,
	"learning_rate": 0.0001556234096692112,
	"loss": 0.2481,
	"step": 442
	},
	{
	"epoch": 1.1216730038022813,
	"grad_norm": 0.37379321455955505,
	"learning_rate": 0.00015552162849872774,
	"loss": 0.3292,
	"step": 443
	},
	{
	"epoch": 1.1242078580481623,
	"grad_norm": 0.32108721137046814,
	"learning_rate": 0.00015541984732824428,
	"loss": 0.3363,
	"step": 444
	},
	{
	"epoch": 1.126742712294043,
	"grad_norm": 0.3879946768283844,
	"learning_rate": 0.00015531806615776082,
	"loss": 0.2891,
	"step": 445
	},
	{
	"epoch": 1.129277566539924,
	"grad_norm": 0.2334345281124115,
	"learning_rate": 0.00015521628498727736,
	"loss": 0.2183,
	"step": 446
	},
	{
	"epoch": 1.131812420785805,
	"grad_norm": 0.274795264005661,
	"learning_rate": 0.0001551145038167939,
	"loss": 0.2002,
	"step": 447
	},
	{
	"epoch": 1.1343472750316856,
	"grad_norm": 0.45602667331695557,
	"learning_rate": 0.00015501272264631044,
	"loss": 0.3282,
	"step": 448
	},
	{
	"epoch": 1.1368821292775666,
	"grad_norm": 0.25433096289634705,
	"learning_rate": 0.00015491094147582698,
	"loss": 0.2195,
	"step": 449
	},
	{
	"epoch": 1.1394169835234473,
	"grad_norm": 0.3606742024421692,
	"learning_rate": 0.00015480916030534352,
	"loss": 0.244,
	"step": 450
	},
	{
	"epoch": 1.1419518377693283,
	"grad_norm": 0.3597625494003296,
	"learning_rate": 0.00015470737913486006,
	"loss": 0.2117,
	"step": 451
	},
	{
	"epoch": 1.144486692015209,
	"grad_norm": 0.32967302203178406,
	"learning_rate": 0.0001546055979643766,
	"loss": 0.2662,
	"step": 452
	},
	{
	"epoch": 1.14702154626109,
	"grad_norm": 0.32538869976997375,
	"learning_rate": 0.00015450381679389314,
	"loss": 0.2439,
	"step": 453
	},
	{
	"epoch": 1.149556400506971,
	"grad_norm": 0.36263129115104675,
	"learning_rate": 0.00015440203562340968,
	"loss": 0.2688,
	"step": 454
	},
	{
	"epoch": 1.1520912547528517,
	"grad_norm": 0.4200229346752167,
	"learning_rate": 0.00015430025445292621,
	"loss": 0.3201,
	"step": 455
	},
	{
	"epoch": 1.1546261089987326,
	"grad_norm": 0.35889115929603577,
	"learning_rate": 0.00015419847328244275,
	"loss": 0.2584,
	"step": 456
	},
	{
	"epoch": 1.1571609632446134,
	"grad_norm": 0.36060044169425964,
	"learning_rate": 0.0001540966921119593,
	"loss": 0.2496,
	"step": 457
	},
	{
	"epoch": 1.1596958174904943,
	"grad_norm": 0.3046696186065674,
	"learning_rate": 0.00015399491094147583,
	"loss": 0.2102,
	"step": 458
	},
	{
	"epoch": 1.162230671736375,
	"grad_norm": 0.4576256275177002,
	"learning_rate": 0.00015389312977099237,
	"loss": 0.3594,
	"step": 459
	},
	{
	"epoch": 1.164765525982256,
	"grad_norm": 0.3436565697193146,
	"learning_rate": 0.0001537913486005089,
	"loss": 0.2289,
	"step": 460
	},
	{
	"epoch": 1.167300380228137,
	"grad_norm": 0.4197808802127838,
	"learning_rate": 0.00015368956743002545,
	"loss": 0.2863,
	"step": 461
	},
	{
	"epoch": 1.1698352344740177,
	"grad_norm": 0.3584151566028595,
	"learning_rate": 0.000153587786259542,
	"loss": 0.2797,
	"step": 462
	},
	{
	"epoch": 1.1723700887198987,
	"grad_norm": 0.29760056734085083,
	"learning_rate": 0.00015348600508905853,
	"loss": 0.212,
	"step": 463
	},
	{
	"epoch": 1.1749049429657794,
	"grad_norm": 0.3856862485408783,
	"learning_rate": 0.00015338422391857507,
	"loss": 0.2986,
	"step": 464
	},
	{
	"epoch": 1.1774397972116604,
	"grad_norm": 0.42522993683815,
	"learning_rate": 0.0001532824427480916,
	"loss": 0.2869,
	"step": 465
	},
	{
	"epoch": 1.179974651457541,
	"grad_norm": 0.33221253752708435,
	"learning_rate": 0.00015318066157760815,
	"loss": 0.2236,
	"step": 466
	},
	{
	"epoch": 1.182509505703422,
	"grad_norm": 0.35414496064186096,
	"learning_rate": 0.00015307888040712469,
	"loss": 0.2658,
	"step": 467
	},
	{
	"epoch": 1.1850443599493028,
	"grad_norm": 0.41883930563926697,
	"learning_rate": 0.00015297709923664123,
	"loss": 0.3939,
	"step": 468
	},
	{
	"epoch": 1.1875792141951838,
	"grad_norm": 0.3070299029350281,
	"learning_rate": 0.00015287531806615776,
	"loss": 0.2208,
	"step": 469
	},
	{
	"epoch": 1.1901140684410647,
	"grad_norm": 0.30749714374542236,
	"learning_rate": 0.0001527735368956743,
	"loss": 0.242,
	"step": 470
	},
	{
	"epoch": 1.1926489226869454,
	"grad_norm": 0.2579677104949951,
	"learning_rate": 0.00015267175572519084,
	"loss": 0.2435,
	"step": 471
	},
	{
	"epoch": 1.1951837769328264,
	"grad_norm": 0.46220460534095764,
	"learning_rate": 0.00015256997455470738,
	"loss": 0.2803,
	"step": 472
	},
	{
	"epoch": 1.1977186311787071,
	"grad_norm": 0.3824957609176636,
	"learning_rate": 0.00015246819338422392,
	"loss": 0.3143,
	"step": 473
	},
	{
	"epoch": 1.200253485424588,
	"grad_norm": 0.3049899637699127,
	"learning_rate": 0.00015236641221374046,
	"loss": 0.2231,
	"step": 474
	},
	{
	"epoch": 1.202788339670469,
	"grad_norm": 0.4378805458545685,
	"learning_rate": 0.000152264631043257,
	"loss": 0.2041,
	"step": 475
	},
	{
	"epoch": 1.2053231939163498,
	"grad_norm": 0.3902495801448822,
	"learning_rate": 0.00015216284987277354,
	"loss": 0.3055,
	"step": 476
	},
	{
	"epoch": 1.2078580481622307,
	"grad_norm": 0.3150664269924164,
	"learning_rate": 0.00015206106870229008,
	"loss": 0.2222,
	"step": 477
	},
	{
	"epoch": 1.2103929024081115,
	"grad_norm": 0.3551795184612274,
	"learning_rate": 0.00015195928753180662,
	"loss": 0.2304,
	"step": 478
	},
	{
	"epoch": 1.2129277566539924,
	"grad_norm": 0.35522422194480896,
	"learning_rate": 0.00015185750636132316,
	"loss": 0.2636,
	"step": 479
	},
	{
	"epoch": 1.2154626108998732,
	"grad_norm": 0.35261449217796326,
	"learning_rate": 0.0001517557251908397,
	"loss": 0.2743,
	"step": 480
	},
	{
	"epoch": 1.2179974651457541,
	"grad_norm": 0.4755167067050934,
	"learning_rate": 0.00015165394402035624,
	"loss": 0.321,
	"step": 481
	},
	{
	"epoch": 1.2205323193916349,
	"grad_norm": 0.36083585023880005,
	"learning_rate": 0.0001515521628498728,
	"loss": 0.2549,
	"step": 482
	},
	{
	"epoch": 1.2230671736375158,
	"grad_norm": 0.3213503956794739,
	"learning_rate": 0.00015145038167938932,
	"loss": 0.2685,
	"step": 483
	},
	{
	"epoch": 1.2256020278833968,
	"grad_norm": 0.29988422989845276,
	"learning_rate": 0.00015134860050890588,
	"loss": 0.3253,
	"step": 484
	},
	{
	"epoch": 1.2281368821292775,
	"grad_norm": 0.3549601435661316,
	"learning_rate": 0.0001512468193384224,
	"loss": 0.2574,
	"step": 485
	},
	{
	"epoch": 1.2306717363751585,
	"grad_norm": 0.33347830176353455,
	"learning_rate": 0.00015114503816793893,
	"loss": 0.3408,
	"step": 486
	},
	{
	"epoch": 1.2332065906210392,
	"grad_norm": 0.2988692820072174,
	"learning_rate": 0.00015104325699745547,
	"loss": 0.2583,
	"step": 487
	},
	{
	"epoch": 1.2357414448669202,
	"grad_norm": 0.2710984945297241,
	"learning_rate": 0.000150941475826972,
	"loss": 0.2708,
	"step": 488
	},
	{
	"epoch": 1.2382762991128011,
	"grad_norm": 0.28278592228889465,
	"learning_rate": 0.00015083969465648855,
	"loss": 0.2345,
	"step": 489
	},
	{
	"epoch": 1.2408111533586819,
	"grad_norm": 0.31838810443878174,
	"learning_rate": 0.0001507379134860051,
	"loss": 0.2193,
	"step": 490
	},
	{
	"epoch": 1.2433460076045628,
	"grad_norm": 0.31196919083595276,
	"learning_rate": 0.00015063613231552163,
	"loss": 0.2334,
	"step": 491
	},
	{
	"epoch": 1.2458808618504436,
	"grad_norm": 0.3953218460083008,
	"learning_rate": 0.00015053435114503817,
	"loss": 0.2716,
	"step": 492
	},
	{
	"epoch": 1.2484157160963245,
	"grad_norm": 0.4814457297325134,
	"learning_rate": 0.0001504325699745547,
	"loss": 0.2847,
	"step": 493
	},
	{
	"epoch": 1.2509505703422052,
	"grad_norm": 0.5870761275291443,
	"learning_rate": 0.00015033078880407125,
	"loss": 0.3685,
	"step": 494
	},
	{
	"epoch": 1.2534854245880862,
	"grad_norm": 0.30315646529197693,
	"learning_rate": 0.00015022900763358781,
	"loss": 0.2112,
	"step": 495
	},
	{
	"epoch": 1.256020278833967,
	"grad_norm": 0.4358583390712738,
	"learning_rate": 0.00015012722646310433,
	"loss": 0.279,
	"step": 496
	},
	{
	"epoch": 1.258555133079848,
	"grad_norm": 0.3699369728565216,
	"learning_rate": 0.0001500254452926209,
	"loss": 0.2941,
	"step": 497
	},
	{
	"epoch": 1.2610899873257289,
	"grad_norm": 0.338522344827652,
	"learning_rate": 0.0001499236641221374,
	"loss": 0.273,
	"step": 498
	},
	{
	"epoch": 1.2636248415716096,
	"grad_norm": 0.29661208391189575,
	"learning_rate": 0.00014982188295165397,
	"loss": 0.23,
	"step": 499
	},
	{
	"epoch": 1.2661596958174905,
	"grad_norm": 0.4247685968875885,
	"learning_rate": 0.00014972010178117048,
	"loss": 0.3112,
	"step": 500
	},
	{
	"epoch": 1.2686945500633713,
	"grad_norm": 0.44488340616226196,
	"learning_rate": 0.00014961832061068702,
	"loss": 0.3796,
	"step": 501
	},
	{
	"epoch": 1.2712294043092522,
	"grad_norm": 0.30672356486320496,
	"learning_rate": 0.00014951653944020356,
	"loss": 0.2222,
	"step": 502
	},
	{
	"epoch": 1.2737642585551332,
	"grad_norm": 0.3291172981262207,
	"learning_rate": 0.0001494147582697201,
	"loss": 0.2177,
	"step": 503
	},
	{
	"epoch": 1.276299112801014,
	"grad_norm": 0.4180152118206024,
	"learning_rate": 0.00014931297709923664,
	"loss": 0.3673,
	"step": 504
	},
	{
	"epoch": 1.2788339670468947,
	"grad_norm": 0.41350388526916504,
	"learning_rate": 0.00014921119592875318,
	"loss": 0.2544,
	"step": 505
	},
	{
	"epoch": 1.2813688212927756,
	"grad_norm": 0.3517690598964691,
	"learning_rate": 0.00014910941475826972,
	"loss": 0.2139,
	"step": 506
	},
	{
	"epoch": 1.2839036755386566,
	"grad_norm": 0.4273949861526489,
	"learning_rate": 0.00014900763358778626,
	"loss": 0.255,
	"step": 507
	},
	{
	"epoch": 1.2864385297845373,
	"grad_norm": 0.3510381877422333,
	"learning_rate": 0.00014890585241730283,
	"loss": 0.2503,
	"step": 508
	},
	{
	"epoch": 1.2889733840304183,
	"grad_norm": 0.4069119393825531,
	"learning_rate": 0.00014880407124681934,
	"loss": 0.3267,
	"step": 509
	},
	{
	"epoch": 1.291508238276299,
	"grad_norm": 0.6244072318077087,
	"learning_rate": 0.0001487022900763359,
	"loss": 0.2519,
	"step": 510
	},
	{
	"epoch": 1.29404309252218,
	"grad_norm": 0.473450630903244,
	"learning_rate": 0.00014860050890585242,
	"loss": 0.3093,
	"step": 511
	},
	{
	"epoch": 1.296577946768061,
	"grad_norm": 0.3139822781085968,
	"learning_rate": 0.00014849872773536898,
	"loss": 0.2396,
	"step": 512
	},
	{
	"epoch": 1.2991128010139417,
	"grad_norm": 0.23700624704360962,
	"learning_rate": 0.0001483969465648855,
	"loss": 0.1945,
	"step": 513
	},
	{
	"epoch": 1.3016476552598226,
	"grad_norm": 0.42849189043045044,
	"learning_rate": 0.00014829516539440203,
	"loss": 0.2275,
	"step": 514
	},
	{
	"epoch": 1.3041825095057034,
	"grad_norm": 0.4083426296710968,
	"learning_rate": 0.00014819338422391857,
	"loss": 0.3626,
	"step": 515
	},
	{
	"epoch": 1.3067173637515843,
	"grad_norm": 0.4541410207748413,
	"learning_rate": 0.0001480916030534351,
	"loss": 0.3102,
	"step": 516
	},
	{
	"epoch": 1.3092522179974653,
	"grad_norm": 0.6483343839645386,
	"learning_rate": 0.00014798982188295165,
	"loss": 0.3427,
	"step": 517
	},
	{
	"epoch": 1.311787072243346,
	"grad_norm": 0.3928525447845459,
	"learning_rate": 0.0001478880407124682,
	"loss": 0.3155,
	"step": 518
	},
	{
	"epoch": 1.3143219264892267,
	"grad_norm": 0.319035142660141,
	"learning_rate": 0.00014778625954198476,
	"loss": 0.2555,
	"step": 519
	},
	{
	"epoch": 1.3168567807351077,
	"grad_norm": 0.2855183780193329,
	"learning_rate": 0.00014768447837150127,
	"loss": 0.2115,
	"step": 520
	},
	{
	"epoch": 1.3193916349809887,
	"grad_norm": 0.3499714136123657,
	"learning_rate": 0.00014758269720101784,
	"loss": 0.254,
	"step": 521
	},
	{
	"epoch": 1.3219264892268694,
	"grad_norm": 0.40895748138427734,
	"learning_rate": 0.00014748091603053435,
	"loss": 0.2975,
	"step": 522
	},
	{
	"epoch": 1.3244613434727504,
	"grad_norm": 0.30614539980888367,
	"learning_rate": 0.00014737913486005091,
	"loss": 0.2584,
	"step": 523
	},
	{
	"epoch": 1.326996197718631,
	"grad_norm": 0.2832574248313904,
	"learning_rate": 0.00014727735368956743,
	"loss": 0.2259,
	"step": 524
	},
	{
	"epoch": 1.329531051964512,
	"grad_norm": 0.3444589674472809,
	"learning_rate": 0.000147175572519084,
	"loss": 0.2608,
	"step": 525
	},
	{
	"epoch": 1.332065906210393,
	"grad_norm": 0.35170844197273254,
	"learning_rate": 0.0001470737913486005,
	"loss": 0.3019,
	"step": 526
	},
	{
	"epoch": 1.3346007604562737,
	"grad_norm": 0.46164563298225403,
	"learning_rate": 0.00014697201017811707,
	"loss": 0.2024,
	"step": 527
	},
	{
	"epoch": 1.3371356147021547,
	"grad_norm": 0.2369971126317978,
	"learning_rate": 0.00014687022900763358,
	"loss": 0.1967,
	"step": 528
	},
	{
	"epoch": 1.3396704689480354,
	"grad_norm": 0.43180060386657715,
	"learning_rate": 0.00014676844783715012,
	"loss": 0.2415,
	"step": 529
	},
	{
	"epoch": 1.3422053231939164,
	"grad_norm": 0.3531292676925659,
	"learning_rate": 0.00014666666666666666,
	"loss": 0.2283,
	"step": 530
	},
	{
	"epoch": 1.3447401774397973,
	"grad_norm": 0.49374547600746155,
	"learning_rate": 0.0001465648854961832,
	"loss": 0.3025,
	"step": 531
	},
	{
	"epoch": 1.347275031685678,
	"grad_norm": 0.4822668731212616,
	"learning_rate": 0.00014646310432569977,
	"loss": 0.3498,
	"step": 532
	},
	{
	"epoch": 1.3498098859315588,
	"grad_norm": 0.4463392496109009,
	"learning_rate": 0.00014636132315521628,
	"loss": 0.2186,
	"step": 533
	},
	{
	"epoch": 1.3523447401774398,
	"grad_norm": 0.40042299032211304,
	"learning_rate": 0.00014625954198473285,
	"loss": 0.2316,
	"step": 534
	},
	{
	"epoch": 1.3548795944233207,
	"grad_norm": 0.41266927123069763,
	"learning_rate": 0.00014615776081424936,
	"loss": 0.2324,
	"step": 535
	},
	{
	"epoch": 1.3574144486692015,
	"grad_norm": 0.46208152174949646,
	"learning_rate": 0.00014605597964376593,
	"loss": 0.2261,
	"step": 536
	},
	{
	"epoch": 1.3599493029150824,
	"grad_norm": 0.38895705342292786,
	"learning_rate": 0.00014595419847328244,
	"loss": 0.2732,
	"step": 537
	},
	{
	"epoch": 1.3624841571609632,
	"grad_norm": 0.4489743113517761,
	"learning_rate": 0.000145852417302799,
	"loss": 0.3197,
	"step": 538
	},
	{
	"epoch": 1.3650190114068441,
	"grad_norm": 0.25082916021347046,
	"learning_rate": 0.00014575063613231552,
	"loss": 0.2096,
	"step": 539
	},
	{
	"epoch": 1.367553865652725,
	"grad_norm": 0.3681942820549011,
	"learning_rate": 0.00014564885496183208,
	"loss": 0.2496,
	"step": 540
	},
	{
	"epoch": 1.3700887198986058,
	"grad_norm": 0.30986878275871277,
	"learning_rate": 0.0001455470737913486,
	"loss": 0.2244,
	"step": 541
	},
	{
	"epoch": 1.3726235741444868,
	"grad_norm": 0.42349961400032043,
	"learning_rate": 0.00014544529262086513,
	"loss": 0.2315,
	"step": 542
	},
	{
	"epoch": 1.3751584283903675,
	"grad_norm": 0.29656872153282166,
	"learning_rate": 0.00014534351145038167,
	"loss": 0.2458,
	"step": 543
	},
	{
	"epoch": 1.3776932826362485,
	"grad_norm": 0.4033924341201782,
	"learning_rate": 0.0001452417302798982,
	"loss": 0.3506,
	"step": 544
	},
	{
	"epoch": 1.3802281368821292,
	"grad_norm": 0.3998583257198334,
	"learning_rate": 0.00014513994910941478,
	"loss": 0.3108,
	"step": 545
	},
	{
	"epoch": 1.3827629911280102,
	"grad_norm": 0.3335135281085968,
	"learning_rate": 0.0001450381679389313,
	"loss": 0.2816,
	"step": 546
	},
	{
	"epoch": 1.385297845373891,
	"grad_norm": 0.39304816722869873,
	"learning_rate": 0.00014493638676844786,
	"loss": 0.3968,
	"step": 547
	},
	{
	"epoch": 1.3878326996197718,
	"grad_norm": 0.34913384914398193,
	"learning_rate": 0.00014483460559796437,
	"loss": 0.2653,
	"step": 548
	},
	{
	"epoch": 1.3903675538656528,
	"grad_norm": 0.3312399387359619,
	"learning_rate": 0.00014473282442748094,
	"loss": 0.2629,
	"step": 549
	},
	{
	"epoch": 1.3929024081115335,
	"grad_norm": 0.31613558530807495,
	"learning_rate": 0.00014463104325699745,
	"loss": 0.2033,
	"step": 550
	},
	{
	"epoch": 1.3954372623574145,
	"grad_norm": 0.2872864603996277,
	"learning_rate": 0.00014452926208651402,
	"loss": 0.2097,
	"step": 551
	},
	{
	"epoch": 1.3979721166032952,
	"grad_norm": 0.24432098865509033,
	"learning_rate": 0.00014442748091603053,
	"loss": 0.2172,
	"step": 552
	},
	{
	"epoch": 1.4005069708491762,
	"grad_norm": 0.31649062037467957,
	"learning_rate": 0.0001443256997455471,
	"loss": 0.2255,
	"step": 553
	},
	{
	"epoch": 1.4030418250950571,
	"grad_norm": 0.2483261376619339,
	"learning_rate": 0.0001442239185750636,
	"loss": 0.1856,
	"step": 554
	},
	{
	"epoch": 1.4055766793409379,
	"grad_norm": 0.437757670879364,
	"learning_rate": 0.00014412213740458017,
	"loss": 0.2713,
	"step": 555
	},
	{
	"epoch": 1.4081115335868186,
	"grad_norm": 0.43551307916641235,
	"learning_rate": 0.0001440203562340967,
	"loss": 0.2654,
	"step": 556
	},
	{
	"epoch": 1.4106463878326996,
	"grad_norm": 0.5781947374343872,
	"learning_rate": 0.00014391857506361322,
	"loss": 0.3242,
	"step": 557
	},
	{
	"epoch": 1.4131812420785805,
	"grad_norm": 0.3809725344181061,
	"learning_rate": 0.0001438167938931298,
	"loss": 0.2176,
	"step": 558
	},
	{
	"epoch": 1.4157160963244613,
	"grad_norm": 0.38208654522895813,
	"learning_rate": 0.0001437150127226463,
	"loss": 0.2043,
	"step": 559
	},
	{
	"epoch": 1.4182509505703422,
	"grad_norm": 0.39930659532546997,
	"learning_rate": 0.00014361323155216287,
	"loss": 0.2914,
	"step": 560
	},
	{
	"epoch": 1.420785804816223,
	"grad_norm": 0.3019846975803375,
	"learning_rate": 0.00014351145038167938,
	"loss": 0.2037,
	"step": 561
	},
	{
	"epoch": 1.423320659062104,
	"grad_norm": 0.4549913704395294,
	"learning_rate": 0.00014340966921119595,
	"loss": 0.2308,
	"step": 562
	},
	{
	"epoch": 1.4258555133079849,
	"grad_norm": 0.38887929916381836,
	"learning_rate": 0.00014330788804071246,
	"loss": 0.2339,
	"step": 563
	},
	{
	"epoch": 1.4283903675538656,
	"grad_norm": 0.3481290340423584,
	"learning_rate": 0.00014320610687022903,
	"loss": 0.2206,
	"step": 564
	},
	{
	"epoch": 1.4309252217997466,
	"grad_norm": 0.46603840589523315,
	"learning_rate": 0.00014310432569974554,
	"loss": 0.3006,
	"step": 565
	},
	{
	"epoch": 1.4334600760456273,
	"grad_norm": 0.3586963713169098,
	"learning_rate": 0.0001430025445292621,
	"loss": 0.2646,
	"step": 566
	},
	{
	"epoch": 1.4359949302915083,
	"grad_norm": 0.3106522560119629,
	"learning_rate": 0.00014290076335877862,
	"loss": 0.2725,
	"step": 567
	},
	{
	"epoch": 1.4385297845373892,
	"grad_norm": 0.48086050152778625,
	"learning_rate": 0.00014279898218829518,
	"loss": 0.3007,
	"step": 568
	},
	{
	"epoch": 1.44106463878327,
	"grad_norm": 0.44636330008506775,
	"learning_rate": 0.00014269720101781172,
	"loss": 0.3755,
	"step": 569
	},
	{
	"epoch": 1.4435994930291507,
	"grad_norm": 0.3114064633846283,
	"learning_rate": 0.00014259541984732824,
	"loss": 0.2606,
	"step": 570
	},
	{
	"epoch": 1.4461343472750317,
	"grad_norm": 0.358394593000412,
	"learning_rate": 0.0001424936386768448,
	"loss": 0.27,
	"step": 571
	},
	{
	"epoch": 1.4486692015209126,
	"grad_norm": 0.3568032681941986,
	"learning_rate": 0.00014239185750636131,
	"loss": 0.2767,
	"step": 572
	},
	{
	"epoch": 1.4512040557667933,
	"grad_norm": 0.4407200515270233,
	"learning_rate": 0.00014229007633587788,
	"loss": 0.3786,
	"step": 573
	},
	{
	"epoch": 1.4537389100126743,
	"grad_norm": 0.4096840023994446,
	"learning_rate": 0.0001421882951653944,
	"loss": 0.3199,
	"step": 574
	},
	{
	"epoch": 1.456273764258555,
	"grad_norm": 0.3343110680580139,
	"learning_rate": 0.00014208651399491096,
	"loss": 0.2538,
	"step": 575
	},
	{
	"epoch": 1.458808618504436,
	"grad_norm": 0.27782517671585083,
	"learning_rate": 0.00014198473282442747,
	"loss": 0.2179,
	"step": 576
	},
	{
	"epoch": 1.461343472750317,
	"grad_norm": 0.2901310920715332,
	"learning_rate": 0.00014188295165394404,
	"loss": 0.2552,
	"step": 577
	},
	{
	"epoch": 1.4638783269961977,
	"grad_norm": 0.3634903132915497,
	"learning_rate": 0.00014178117048346055,
	"loss": 0.257,
	"step": 578
	},
	{
	"epoch": 1.4664131812420786,
	"grad_norm": 0.37307262420654297,
	"learning_rate": 0.00014167938931297712,
	"loss": 0.254,
	"step": 579
	},
	{
	"epoch": 1.4689480354879594,
	"grad_norm": 0.27726346254348755,
	"learning_rate": 0.00014157760814249366,
	"loss": 0.1938,
	"step": 580
	},
	{
	"epoch": 1.4714828897338403,
	"grad_norm": 0.3364371657371521,
	"learning_rate": 0.0001414758269720102,
	"loss": 0.2094,
	"step": 581
	},
	{
	"epoch": 1.4740177439797213,
	"grad_norm": 0.4418800473213196,
	"learning_rate": 0.00014137404580152673,
	"loss": 0.3243,
	"step": 582
	},
	{
	"epoch": 1.476552598225602,
	"grad_norm": 0.42042022943496704,
	"learning_rate": 0.00014127226463104327,
	"loss": 0.2333,
	"step": 583
	},
	{
	"epoch": 1.4790874524714828,
	"grad_norm": 0.36881470680236816,
	"learning_rate": 0.0001411704834605598,
	"loss": 0.2513,
	"step": 584
	},
	{
	"epoch": 1.4816223067173637,
	"grad_norm": 0.4009782671928406,
	"learning_rate": 0.00014106870229007632,
	"loss": 0.3085,
	"step": 585
	},
	{
	"epoch": 1.4841571609632447,
	"grad_norm": 0.43179744482040405,
	"learning_rate": 0.0001409669211195929,
	"loss": 0.3189,
	"step": 586
	},
	{
	"epoch": 1.4866920152091254,
	"grad_norm": 0.3721300959587097,
	"learning_rate": 0.0001408651399491094,
	"loss": 0.2318,
	"step": 587
	},
	{
	"epoch": 1.4892268694550064,
	"grad_norm": 0.3875066339969635,
	"learning_rate": 0.00014076335877862597,
	"loss": 0.2753,
	"step": 588
	},
	{
	"epoch": 1.491761723700887,
	"grad_norm": 0.35223937034606934,
	"learning_rate": 0.00014066157760814248,
	"loss": 0.2257,
	"step": 589
	},
	{
	"epoch": 1.494296577946768,
	"grad_norm": 0.30979710817337036,
	"learning_rate": 0.00014055979643765905,
	"loss": 0.2149,
	"step": 590
	},
	{
	"epoch": 1.496831432192649,
	"grad_norm": 0.23923753201961517,
	"learning_rate": 0.00014045801526717556,
	"loss": 0.1911,
	"step": 591
	},
	{
	"epoch": 1.4993662864385298,
	"grad_norm": 0.40893304347991943,
	"learning_rate": 0.00014035623409669213,
	"loss": 0.2756,
	"step": 592
	},
	{
	"epoch": 1.5019011406844105,
	"grad_norm": 0.2659086585044861,
	"learning_rate": 0.00014025445292620867,
	"loss": 0.2154,
	"step": 593
	},
	{
	"epoch": 1.5044359949302915,
	"grad_norm": 0.30749884247779846,
	"learning_rate": 0.0001401526717557252,
	"loss": 0.2184,
	"step": 594
	},
	{
	"epoch": 1.5069708491761724,
	"grad_norm": 0.3892879784107208,
	"learning_rate": 0.00014005089058524175,
	"loss": 0.2849,
	"step": 595
	},
	{
	"epoch": 1.5095057034220534,
	"grad_norm": 0.5041462779045105,
	"learning_rate": 0.00013994910941475828,
	"loss": 0.2551,
	"step": 596
	},
	{
	"epoch": 1.512040557667934,
	"grad_norm": 0.4143123924732208,
	"learning_rate": 0.00013984732824427482,
	"loss": 0.2485,
	"step": 597
	},
	{
	"epoch": 1.5145754119138148,
	"grad_norm": 0.5315548181533813,
	"learning_rate": 0.00013974554707379136,
	"loss": 0.3242,
	"step": 598
	},
	{
	"epoch": 1.5171102661596958,
	"grad_norm": 0.28680169582366943,
	"learning_rate": 0.0001396437659033079,
	"loss": 0.227,
	"step": 599
	},
	{
	"epoch": 1.5196451204055768,
	"grad_norm": 0.3015950620174408,
	"learning_rate": 0.00013954198473282441,
	"loss": 0.2122,
	"step": 600
	},
	{
	"epoch": 1.5221799746514575,
	"grad_norm": 0.30785971879959106,
	"learning_rate": 0.00013944020356234098,
	"loss": 0.2194,
	"step": 601
	},
	{
	"epoch": 1.5247148288973384,
	"grad_norm": 0.3596206605434418,
	"learning_rate": 0.0001393384223918575,
	"loss": 0.2574,
	"step": 602
	},
	{
	"epoch": 1.5272496831432192,
	"grad_norm": 0.18499840795993805,
	"learning_rate": 0.00013923664122137406,
	"loss": 0.1944,
	"step": 603
	},
	{
	"epoch": 1.5297845373891001,
	"grad_norm": 0.4346081614494324,
	"learning_rate": 0.00013913486005089057,
	"loss": 0.3187,
	"step": 604
	},
	{
	"epoch": 1.532319391634981,
	"grad_norm": 0.46154457330703735,
	"learning_rate": 0.00013903307888040714,
	"loss": 0.3149,
	"step": 605
	},
	{
	"epoch": 1.5348542458808618,
	"grad_norm": 0.3444209098815918,
	"learning_rate": 0.00013893129770992368,
	"loss": 0.2801,
	"step": 606
	},
	{
	"epoch": 1.5373891001267426,
	"grad_norm": 0.550620436668396,
	"learning_rate": 0.00013882951653944022,
	"loss": 0.3038,
	"step": 607
	},
	{
	"epoch": 1.5399239543726235,
	"grad_norm": 0.36603689193725586,
	"learning_rate": 0.00013872773536895676,
	"loss": 0.3224,
	"step": 608
	},
	{
	"epoch": 1.5424588086185045,
	"grad_norm": 0.213638037443161,
	"learning_rate": 0.0001386259541984733,
	"loss": 0.2081,
	"step": 609
	},
	{
	"epoch": 1.5449936628643854,
	"grad_norm": 0.34508904814720154,
	"learning_rate": 0.00013852417302798983,
	"loss": 0.2474,
	"step": 610
	},
	{
	"epoch": 1.5475285171102662,
	"grad_norm": 0.42072099447250366,
	"learning_rate": 0.00013842239185750637,
	"loss": 0.3049,
	"step": 611
	},
	{
	"epoch": 1.550063371356147,
	"grad_norm": 0.3760271966457367,
	"learning_rate": 0.0001383206106870229,
	"loss": 0.2499,
	"step": 612
	},
	{
	"epoch": 1.5525982256020279,
	"grad_norm": 0.24040678143501282,
	"learning_rate": 0.00013821882951653943,
	"loss": 0.2134,
	"step": 613
	},
	{
	"epoch": 1.5551330798479088,
	"grad_norm": 0.458035945892334,
	"learning_rate": 0.000138117048346056,
	"loss": 0.3375,
	"step": 614
	},
	{
	"epoch": 1.5576679340937896,
	"grad_norm": 0.30446937680244446,
	"learning_rate": 0.0001380152671755725,
	"loss": 0.2252,
	"step": 615
	},
	{
	"epoch": 1.5602027883396705,
	"grad_norm": 0.3036455810070038,
	"learning_rate": 0.00013791348600508907,
	"loss": 0.2095,
	"step": 616
	},
	{
	"epoch": 1.5627376425855513,
	"grad_norm": 0.4190979301929474,
	"learning_rate": 0.0001378117048346056,
	"loss": 0.2932,
	"step": 617
	},
	{
	"epoch": 1.5652724968314322,
	"grad_norm": 0.27648523449897766,
	"learning_rate": 0.00013770992366412215,
	"loss": 0.2133,
	"step": 618
	},
	{
	"epoch": 1.5678073510773132,
	"grad_norm": 0.28326693177223206,
	"learning_rate": 0.0001376081424936387,
	"loss": 0.2087,
	"step": 619
	},
	{
	"epoch": 1.570342205323194,
	"grad_norm": 0.3020143508911133,
	"learning_rate": 0.00013750636132315523,
	"loss": 0.2321,
	"step": 620
	},
	{
	"epoch": 1.5728770595690746,
	"grad_norm": 0.3246900141239166,
	"learning_rate": 0.00013740458015267177,
	"loss": 0.2121,
	"step": 621
	},
	{
	"epoch": 1.5754119138149556,
	"grad_norm": 0.3806106448173523,
	"learning_rate": 0.0001373027989821883,
	"loss": 0.2856,
	"step": 622
	},
	{
	"epoch": 1.5779467680608366,
	"grad_norm": 0.3568238317966461,
	"learning_rate": 0.00013720101781170485,
	"loss": 0.2579,
	"step": 623
	},
	{
	"epoch": 1.5804816223067175,
	"grad_norm": 0.45590534806251526,
	"learning_rate": 0.00013709923664122139,
	"loss": 0.2059,
	"step": 624
	},
	{
	"epoch": 1.5830164765525983,
	"grad_norm": 0.41996893286705017,
	"learning_rate": 0.00013699745547073792,
	"loss": 0.2154,
	"step": 625
	},
	{
	"epoch": 1.585551330798479,
	"grad_norm": 0.5142170190811157,
	"learning_rate": 0.00013689567430025446,
	"loss": 0.2708,
	"step": 626
	},
	{
	"epoch": 1.58808618504436,
	"grad_norm": 0.36335933208465576,
	"learning_rate": 0.000136793893129771,
	"loss": 0.2501,
	"step": 627
	},
	{
	"epoch": 1.590621039290241,
	"grad_norm": 0.3186666667461395,
	"learning_rate": 0.00013669211195928752,
	"loss": 0.2227,
	"step": 628
	},
	{
	"epoch": 1.5931558935361216,
	"grad_norm": 0.29709601402282715,
	"learning_rate": 0.00013659033078880408,
	"loss": 0.2265,
	"step": 629
	},
	{
	"epoch": 1.5956907477820024,
	"grad_norm": 0.2891612648963928,
	"learning_rate": 0.00013648854961832062,
	"loss": 0.2298,
	"step": 630
	},
	{
	"epoch": 1.5982256020278833,
	"grad_norm": 0.2191978096961975,
	"learning_rate": 0.00013638676844783716,
	"loss": 0.2049,
	"step": 631
	},
	{
	"epoch": 1.6007604562737643,
	"grad_norm": 0.37781399488449097,
	"learning_rate": 0.0001362849872773537,
	"loss": 0.3664,
	"step": 632
	},
	{
	"epoch": 1.6032953105196452,
	"grad_norm": 0.3082154393196106,
	"learning_rate": 0.00013618320610687024,
	"loss": 0.2063,
	"step": 633
	},
	{
	"epoch": 1.605830164765526,
	"grad_norm": 0.318317711353302,
	"learning_rate": 0.00013608142493638678,
	"loss": 0.2085,
	"step": 634
	},
	{
	"epoch": 1.6083650190114067,
	"grad_norm": 0.45566102862358093,
	"learning_rate": 0.00013597964376590332,
	"loss": 0.2876,
	"step": 635
	},
	{
	"epoch": 1.6108998732572877,
	"grad_norm": 0.3186021149158478,
	"learning_rate": 0.00013587786259541986,
	"loss": 0.2704,
	"step": 636
	},
	{
	"epoch": 1.6134347275031686,
	"grad_norm": 0.28905680775642395,
	"learning_rate": 0.0001357760814249364,
	"loss": 0.209,
	"step": 637
	},
	{
	"epoch": 1.6159695817490496,
	"grad_norm": 0.23341360688209534,
	"learning_rate": 0.00013567430025445294,
	"loss": 0.1835,
	"step": 638
	},
	{
	"epoch": 1.6185044359949303,
	"grad_norm": 0.336247056722641,
	"learning_rate": 0.00013557251908396947,
	"loss": 0.2547,
	"step": 639
	},
	{
	"epoch": 1.621039290240811,
	"grad_norm": 0.3736225366592407,
	"learning_rate": 0.00013547073791348601,
	"loss": 0.3053,
	"step": 640
	},
	{
	"epoch": 1.623574144486692,
	"grad_norm": 0.3983825743198395,
	"learning_rate": 0.00013536895674300255,
	"loss": 0.2395,
	"step": 641
	},
	{
	"epoch": 1.626108998732573,
	"grad_norm": 0.35913559794425964,
	"learning_rate": 0.0001352671755725191,
	"loss": 0.2918,
	"step": 642
	},
	{
	"epoch": 1.6286438529784537,
	"grad_norm": 0.2984326183795929,
	"learning_rate": 0.00013516539440203563,
	"loss": 0.2148,
	"step": 643
	},
	{
	"epoch": 1.6311787072243344,
	"grad_norm": 0.3113880753517151,
	"learning_rate": 0.00013506361323155217,
	"loss": 0.2044,
	"step": 644
	},
	{
	"epoch": 1.6337135614702154,
	"grad_norm": 0.5340004563331604,
	"learning_rate": 0.0001349618320610687,
	"loss": 0.3234,
	"step": 645
	},
	{
	"epoch": 1.6362484157160964,
	"grad_norm": 0.38927194476127625,
	"learning_rate": 0.00013486005089058525,
	"loss": 0.2866,
	"step": 646
	},
	{
	"epoch": 1.6387832699619773,
	"grad_norm": 0.38895881175994873,
	"learning_rate": 0.0001347582697201018,
	"loss": 0.2324,
	"step": 647
	},
	{
	"epoch": 1.641318124207858,
	"grad_norm": 0.41959917545318604,
	"learning_rate": 0.00013465648854961833,
	"loss": 0.2666,
	"step": 648
	},
	{
	"epoch": 1.6438529784537388,
	"grad_norm": 0.4299626648426056,
	"learning_rate": 0.00013455470737913487,
	"loss": 0.2905,
	"step": 649
	},
	{
	"epoch": 1.6463878326996197,
	"grad_norm": 0.4236285090446472,
	"learning_rate": 0.0001344529262086514,
	"loss": 0.292,
	"step": 650
	},
	{
	"epoch": 1.6489226869455007,
	"grad_norm": 0.8049849271774292,
	"learning_rate": 0.00013435114503816795,
	"loss": 0.2351,
	"step": 651
	},
	{
	"epoch": 1.6514575411913817,
	"grad_norm": 0.3420075476169586,
	"learning_rate": 0.00013424936386768449,
	"loss": 0.2355,
	"step": 652
	},
	{
	"epoch": 1.6539923954372624,
	"grad_norm": 0.3632122874259949,
	"learning_rate": 0.00013414758269720103,
	"loss": 0.2377,
	"step": 653
	},
	{
	"epoch": 1.6565272496831431,
	"grad_norm": 0.27961722016334534,
	"learning_rate": 0.00013404580152671756,
	"loss": 0.2299,
	"step": 654
	},
	{
	"epoch": 1.659062103929024,
	"grad_norm": 0.3043057918548584,
	"learning_rate": 0.0001339440203562341,
	"loss": 0.2321,
	"step": 655
	},
	{
	"epoch": 1.661596958174905,
	"grad_norm": 0.3421036899089813,
	"learning_rate": 0.00013384223918575064,
	"loss": 0.2492,
	"step": 656
	},
	{
	"epoch": 1.6641318124207858,
	"grad_norm": 0.39606526494026184,
	"learning_rate": 0.00013374045801526718,
	"loss": 0.3401,
	"step": 657
	},
	{
	"epoch": 1.6666666666666665,
	"grad_norm": 0.35081973671913147,
	"learning_rate": 0.00013363867684478372,
	"loss": 0.2175,
	"step": 658
	},
	{
	"epoch": 1.6692015209125475,
	"grad_norm": 0.420175701379776,
	"learning_rate": 0.00013353689567430026,
	"loss": 0.2813,
	"step": 659
	},
	{
	"epoch": 1.6717363751584284,
	"grad_norm": 0.24181438982486725,
	"learning_rate": 0.0001334351145038168,
	"loss": 0.219,
	"step": 660
	},
	{
	"epoch": 1.6742712294043094,
	"grad_norm": 0.6243584752082825,
	"learning_rate": 0.00013333333333333334,
	"loss": 0.3087,
	"step": 661
	},
	{
	"epoch": 1.6768060836501901,
	"grad_norm": 0.4036748707294464,
	"learning_rate": 0.00013323155216284988,
	"loss": 0.251,
	"step": 662
	},
	{
	"epoch": 1.6793409378960709,
	"grad_norm": 0.39555415511131287,
	"learning_rate": 0.00013312977099236642,
	"loss": 0.3279,
	"step": 663
	},
	{
	"epoch": 1.6818757921419518,
	"grad_norm": 0.4018571674823761,
	"learning_rate": 0.00013302798982188296,
	"loss": 0.2337,
	"step": 664
	},
	{
	"epoch": 1.6844106463878328,
	"grad_norm": 0.36354130506515503,
	"learning_rate": 0.0001329262086513995,
	"loss": 0.2503,
	"step": 665
	},
	{
	"epoch": 1.6869455006337135,
	"grad_norm": 0.32249706983566284,
	"learning_rate": 0.00013282442748091604,
	"loss": 0.27,
	"step": 666
	},
	{
	"epoch": 1.6894803548795945,
	"grad_norm": 0.33560654520988464,
	"learning_rate": 0.00013272264631043258,
	"loss": 0.203,
	"step": 667
	},
	{
	"epoch": 1.6920152091254752,
	"grad_norm": 0.39997267723083496,
	"learning_rate": 0.00013262086513994911,
	"loss": 0.2662,
	"step": 668
	},
	{
	"epoch": 1.6945500633713562,
	"grad_norm": 0.6739961504936218,
	"learning_rate": 0.00013251908396946565,
	"loss": 0.2803,
	"step": 669
	},
	{
	"epoch": 1.6970849176172371,
	"grad_norm": 0.5863606929779053,
	"learning_rate": 0.0001324173027989822,
	"loss": 0.351,
	"step": 670
	},
	{
	"epoch": 1.6996197718631179,
	"grad_norm": 0.4408819079399109,
	"learning_rate": 0.00013231552162849873,
	"loss": 0.1814,
	"step": 671
	},
	{
	"epoch": 1.7021546261089986,
	"grad_norm": 0.3341253697872162,
	"learning_rate": 0.00013221374045801527,
	"loss": 0.2156,
	"step": 672
	},
	{
	"epoch": 1.7046894803548795,
	"grad_norm": 0.3035176992416382,
	"learning_rate": 0.0001321119592875318,
	"loss": 0.2308,
	"step": 673
	},
	{
	"epoch": 1.7072243346007605,
	"grad_norm": 0.4395483136177063,
	"learning_rate": 0.00013201017811704835,
	"loss": 0.3418,
	"step": 674
	},
	{
	"epoch": 1.7097591888466415,
	"grad_norm": 0.22972792387008667,
	"learning_rate": 0.0001319083969465649,
	"loss": 0.1873,
	"step": 675
	},
	{
	"epoch": 1.7122940430925222,
	"grad_norm": 0.47378918528556824,
	"learning_rate": 0.00013180661577608143,
	"loss": 0.2514,
	"step": 676
	},
	{
	"epoch": 1.714828897338403,
	"grad_norm": 0.3947070240974426,
	"learning_rate": 0.00013170483460559797,
	"loss": 0.2289,
	"step": 677
	},
	{
	"epoch": 1.717363751584284,
	"grad_norm": 0.3789718747138977,
	"learning_rate": 0.0001316030534351145,
	"loss": 0.2476,
	"step": 678
	},
	{
	"epoch": 1.7198986058301649,
	"grad_norm": 0.4904823899269104,
	"learning_rate": 0.00013150127226463105,
	"loss": 0.2163,
	"step": 679
	},
	{
	"epoch": 1.7224334600760456,
	"grad_norm": 0.3285132646560669,
	"learning_rate": 0.0001313994910941476,
	"loss": 0.2786,
	"step": 680
	},
	{
	"epoch": 1.7249683143219265,
	"grad_norm": 0.4326847493648529,
	"learning_rate": 0.00013129770992366413,
	"loss": 0.2409,
	"step": 681
	},
	{
	"epoch": 1.7275031685678073,
	"grad_norm": 0.3819947838783264,
	"learning_rate": 0.00013119592875318067,
	"loss": 0.2076,
	"step": 682
	},
	{
	"epoch": 1.7300380228136882,
	"grad_norm": 0.4046533703804016,
	"learning_rate": 0.0001310941475826972,
	"loss": 0.2717,
	"step": 683
	},
	{
	"epoch": 1.7325728770595692,
	"grad_norm": 0.34681758284568787,
	"learning_rate": 0.00013099236641221374,
	"loss": 0.2389,
	"step": 684
	},
	{
	"epoch": 1.73510773130545,
	"grad_norm": 0.35155028104782104,
	"learning_rate": 0.00013089058524173028,
	"loss": 0.2407,
	"step": 685
	},
	{
	"epoch": 1.7376425855513307,
	"grad_norm": 0.3306678533554077,
	"learning_rate": 0.00013078880407124682,
	"loss": 0.2767,
	"step": 686
	},
	{
	"epoch": 1.7401774397972116,
	"grad_norm": 0.27715572714805603,
	"learning_rate": 0.00013068702290076336,
	"loss": 0.1955,
	"step": 687
	},
	{
	"epoch": 1.7427122940430926,
	"grad_norm": 0.3591010272502899,
	"learning_rate": 0.0001305852417302799,
	"loss": 0.2269,
	"step": 688
	},
	{
	"epoch": 1.7452471482889735,
	"grad_norm": 0.39104408025741577,
	"learning_rate": 0.00013048346055979644,
	"loss": 0.2392,
	"step": 689
	},
	{
	"epoch": 1.7477820025348543,
	"grad_norm": 0.44545605778694153,
	"learning_rate": 0.00013038167938931298,
	"loss": 0.2823,
	"step": 690
	},
	{
	"epoch": 1.750316856780735,
	"grad_norm": 0.29502785205841064,
	"learning_rate": 0.00013027989821882952,
	"loss": 0.1899,
	"step": 691
	},
	{
	"epoch": 1.752851711026616,
	"grad_norm": 0.40423381328582764,
	"learning_rate": 0.00013017811704834606,
	"loss": 0.2069,
	"step": 692
	},
	{
	"epoch": 1.755386565272497,
	"grad_norm": 0.38649502396583557,
	"learning_rate": 0.0001300763358778626,
	"loss": 0.1938,
	"step": 693
	},
	{
	"epoch": 1.7579214195183777,
	"grad_norm": 0.40014389157295227,
	"learning_rate": 0.00012997455470737914,
	"loss": 0.2825,
	"step": 694
	},
	{
	"epoch": 1.7604562737642584,
	"grad_norm": 0.4783387780189514,
	"learning_rate": 0.00012987277353689568,
	"loss": 0.2629,
	"step": 695
	},
	{
	"epoch": 1.7629911280101394,
	"grad_norm": 0.4938651919364929,
	"learning_rate": 0.00012977099236641222,
	"loss": 0.2976,
	"step": 696
	},
	{
	"epoch": 1.7655259822560203,
	"grad_norm": 0.32507607340812683,
	"learning_rate": 0.00012966921119592875,
	"loss": 0.2097,
	"step": 697
	},
	{
	"epoch": 1.7680608365019013,
	"grad_norm": 0.31158536672592163,
	"learning_rate": 0.0001295674300254453,
	"loss": 0.223,
	"step": 698
	},
	{
	"epoch": 1.770595690747782,
	"grad_norm": 0.5594013333320618,
	"learning_rate": 0.00012946564885496183,
	"loss": 0.3523,
	"step": 699
	},
	{
	"epoch": 1.7731305449936627,
	"grad_norm": 0.5820282697677612,
	"learning_rate": 0.00012936386768447837,
	"loss": 0.3181,
	"step": 700
	},
	{
	"epoch": 1.7756653992395437,
	"grad_norm": 0.3635233938694,
	"learning_rate": 0.0001292620865139949,
	"loss": 0.2387,
	"step": 701
	},
	{
	"epoch": 1.7782002534854247,
	"grad_norm": 0.3195054531097412,
	"learning_rate": 0.00012916030534351148,
	"loss": 0.2046,
	"step": 702
	},
	{
	"epoch": 1.7807351077313056,
	"grad_norm": 0.3483947217464447,
	"learning_rate": 0.000129058524173028,
	"loss": 0.2576,
	"step": 703
	},
	{
	"epoch": 1.7832699619771863,
	"grad_norm": 0.3419065475463867,
	"learning_rate": 0.00012895674300254456,
	"loss": 0.2361,
	"step": 704
	},
	{
	"epoch": 1.785804816223067,
	"grad_norm": 0.3142557442188263,
	"learning_rate": 0.00012885496183206107,
	"loss": 0.2172,
	"step": 705
	},
	{
	"epoch": 1.788339670468948,
	"grad_norm": 0.3502836227416992,
	"learning_rate": 0.0001287531806615776,
	"loss": 0.2621,
	"step": 706
	},
	{
	"epoch": 1.790874524714829,
	"grad_norm": 0.37896937131881714,
	"learning_rate": 0.00012865139949109415,
	"loss": 0.2374,
	"step": 707
	},
	{
	"epoch": 1.7934093789607097,
	"grad_norm": 0.3880506455898285,
	"learning_rate": 0.0001285496183206107,
	"loss": 0.2862,
	"step": 708
	},
	{
	"epoch": 1.7959442332065905,
	"grad_norm": 0.2648681700229645,
	"learning_rate": 0.00012844783715012723,
	"loss": 0.206,
	"step": 709
	},
	{
	"epoch": 1.7984790874524714,
	"grad_norm": 0.25072911381721497,
	"learning_rate": 0.00012834605597964377,
	"loss": 0.2123,
	"step": 710
	},
	{
	"epoch": 1.8010139416983524,
	"grad_norm": 0.3076663315296173,
	"learning_rate": 0.0001282442748091603,
	"loss": 0.2983,
	"step": 711
	},
	{
	"epoch": 1.8035487959442333,
	"grad_norm": 0.4219549000263214,
	"learning_rate": 0.00012814249363867684,
	"loss": 0.2213,
	"step": 712
	},
	{
	"epoch": 1.806083650190114,
	"grad_norm": 0.2831745445728302,
	"learning_rate": 0.00012804071246819338,
	"loss": 0.2062,
	"step": 713
	},
	{
	"epoch": 1.8086185044359948,
	"grad_norm": 0.4014468491077423,
	"learning_rate": 0.00012793893129770992,
	"loss": 0.2945,
	"step": 714
	},
	{
	"epoch": 1.8111533586818758,
	"grad_norm": 0.2980962097644806,
	"learning_rate": 0.0001278371501272265,
	"loss": 0.2179,
	"step": 715
	},
	{
	"epoch": 1.8136882129277567,
	"grad_norm": 0.2338070124387741,
	"learning_rate": 0.000127735368956743,
	"loss": 0.1664,
	"step": 716
	},
	{
	"epoch": 1.8162230671736375,
	"grad_norm": 0.6155439615249634,
	"learning_rate": 0.00012763358778625957,
	"loss": 0.3429,
	"step": 717
	},
	{
	"epoch": 1.8187579214195184,
	"grad_norm": 0.46969589591026306,
	"learning_rate": 0.00012753180661577608,
	"loss": 0.2584,
	"step": 718
	},
	{
	"epoch": 1.8212927756653992,
	"grad_norm": 0.5578194260597229,
	"learning_rate": 0.00012743002544529265,
	"loss": 0.2695,
	"step": 719
	},
	{
	"epoch": 1.8238276299112801,
	"grad_norm": 0.34903043508529663,
	"learning_rate": 0.00012732824427480916,
	"loss": 0.2119,
	"step": 720
	},
	{
	"epoch": 1.826362484157161,
	"grad_norm": 0.3990432322025299,
	"learning_rate": 0.0001272264631043257,
	"loss": 0.2487,
	"step": 721
	},
	{
	"epoch": 1.8288973384030418,
	"grad_norm": 0.3382611572742462,
	"learning_rate": 0.00012712468193384224,
	"loss": 0.2313,
	"step": 722
	},
	{
	"epoch": 1.8314321926489225,
	"grad_norm": 0.30938395857810974,
	"learning_rate": 0.00012702290076335878,
	"loss": 0.2113,
	"step": 723
	},
	{
	"epoch": 1.8339670468948035,
	"grad_norm": 0.39266690611839294,
	"learning_rate": 0.00012692111959287532,
	"loss": 0.2609,
	"step": 724
	},
	{
	"epoch": 1.8365019011406845,
	"grad_norm": 0.4396655261516571,
	"learning_rate": 0.00012681933842239186,
	"loss": 0.2518,
	"step": 725
	},
	{
	"epoch": 1.8390367553865654,
	"grad_norm": 0.4134500324726105,
	"learning_rate": 0.0001267175572519084,
	"loss": 0.3317,
	"step": 726
	},
	{
	"epoch": 1.8415716096324461,
	"grad_norm": 0.29644638299942017,
	"learning_rate": 0.00012661577608142493,
	"loss": 0.1912,
	"step": 727
	},
	{
	"epoch": 1.8441064638783269,
	"grad_norm": 0.3661201596260071,
	"learning_rate": 0.0001265139949109415,
	"loss": 0.2911,
	"step": 728
	},
	{
	"epoch": 1.8466413181242078,
	"grad_norm": 0.4504169225692749,
	"learning_rate": 0.000126412213740458,
	"loss": 0.3409,
	"step": 729
	},
	{
	"epoch": 1.8491761723700888,
	"grad_norm": 0.28516069054603577,
	"learning_rate": 0.00012631043256997458,
	"loss": 0.254,
	"step": 730
	},
	{
	"epoch": 1.8517110266159695,
	"grad_norm": 0.33754590153694153,
	"learning_rate": 0.0001262086513994911,
	"loss": 0.2275,
	"step": 731
	},
	{
	"epoch": 1.8542458808618505,
	"grad_norm": 0.26562589406967163,
	"learning_rate": 0.00012610687022900766,
	"loss": 0.1979,
	"step": 732
	},
	{
	"epoch": 1.8567807351077312,
	"grad_norm": 0.3081592321395874,
	"learning_rate": 0.00012600508905852417,
	"loss": 0.2099,
	"step": 733
	},
	{
	"epoch": 1.8593155893536122,
	"grad_norm": 0.34866124391555786,
	"learning_rate": 0.0001259033078880407,
	"loss": 0.3038,
	"step": 734
	},
	{
	"epoch": 1.8618504435994931,
	"grad_norm": 0.2867881953716278,
	"learning_rate": 0.00012580152671755725,
	"loss": 0.2225,
	"step": 735
	},
	{
	"epoch": 1.8643852978453739,
	"grad_norm": 0.2374526560306549,
	"learning_rate": 0.0001256997455470738,
	"loss": 0.1945,
	"step": 736
	},
	{
	"epoch": 1.8669201520912546,
	"grad_norm": 0.3072168827056885,
	"learning_rate": 0.00012559796437659033,
	"loss": 0.2135,
	"step": 737
	},
	{
	"epoch": 1.8694550063371356,
	"grad_norm": 0.36897239089012146,
	"learning_rate": 0.00012549618320610687,
	"loss": 0.3225,
	"step": 738
	},
	{
	"epoch": 1.8719898605830165,
	"grad_norm": 0.3114832937717438,
	"learning_rate": 0.00012539440203562343,
	"loss": 0.2064,
	"step": 739
	},
	{
	"epoch": 1.8745247148288975,
	"grad_norm": 0.40082940459251404,
	"learning_rate": 0.00012529262086513995,
	"loss": 0.2145,
	"step": 740
	},
	{
	"epoch": 1.8770595690747782,
	"grad_norm": 0.28362375497817993,
	"learning_rate": 0.0001251908396946565,
	"loss": 0.2044,
	"step": 741
	},
	{
	"epoch": 1.879594423320659,
	"grad_norm": 0.2738857567310333,
	"learning_rate": 0.00012508905852417302,
	"loss": 0.1852,
	"step": 742
	},
	{
	"epoch": 1.88212927756654,
	"grad_norm": 0.37283095717430115,
	"learning_rate": 0.0001249872773536896,
	"loss": 0.248,
	"step": 743
	},
	{
	"epoch": 1.8846641318124209,
	"grad_norm": 0.3065252900123596,
	"learning_rate": 0.0001248854961832061,
	"loss": 0.2028,
	"step": 744
	},
	{
	"epoch": 1.8871989860583016,
	"grad_norm": 0.2891787588596344,
	"learning_rate": 0.00012478371501272267,
	"loss": 0.1977,
	"step": 745
	},
	{
	"epoch": 1.8897338403041823,
	"grad_norm": 0.5002029538154602,
	"learning_rate": 0.00012468193384223918,
	"loss": 0.2731,
	"step": 746
	},
	{
	"epoch": 1.8922686945500633,
	"grad_norm": 0.34734681248664856,
	"learning_rate": 0.00012458015267175575,
	"loss": 0.2236,
	"step": 747
	},
	{
	"epoch": 1.8948035487959443,
	"grad_norm": 0.4372716248035431,
	"learning_rate": 0.00012447837150127226,
	"loss": 0.3787,
	"step": 748
	},
	{
	"epoch": 1.8973384030418252,
	"grad_norm": 0.41203773021698,
	"learning_rate": 0.0001243765903307888,
	"loss": 0.2385,
	"step": 749
	},
	{
	"epoch": 1.899873257287706,
	"grad_norm": 0.28231269121170044,
	"learning_rate": 0.00012427480916030534,
	"loss": 0.1966,
	"step": 750
	},
	{
	"epoch": 1.9024081115335867,
	"grad_norm": 0.3689015209674835,
	"learning_rate": 0.00012417302798982188,
	"loss": 0.2266,
	"step": 751
	},
	{
	"epoch": 1.9049429657794676,
	"grad_norm": 0.35862621665000916,
	"learning_rate": 0.00012407124681933844,
	"loss": 0.2226,
	"step": 752
	},
	{
	"epoch": 1.9074778200253486,
	"grad_norm": 0.27552056312561035,
	"learning_rate": 0.00012396946564885496,
	"loss": 0.2049,
	"step": 753
	},
	{
	"epoch": 1.9100126742712296,
	"grad_norm": 0.3665705919265747,
	"learning_rate": 0.00012386768447837152,
	"loss": 0.2262,
	"step": 754
	},
	{
	"epoch": 1.9125475285171103,
	"grad_norm": 0.37812677025794983,
	"learning_rate": 0.00012376590330788803,
	"loss": 0.2561,
	"step": 755
	},
	{
	"epoch": 1.915082382762991,
	"grad_norm": 0.34638741612434387,
	"learning_rate": 0.0001236641221374046,
	"loss": 0.2152,
	"step": 756
	},
	{
	"epoch": 1.917617237008872,
	"grad_norm": 0.3499183654785156,
	"learning_rate": 0.00012356234096692111,
	"loss": 0.2823,
	"step": 757
	},
	{
	"epoch": 1.920152091254753,
	"grad_norm": 0.3274863362312317,
	"learning_rate": 0.00012346055979643768,
	"loss": 0.202,
	"step": 758
	},
	{
	"epoch": 1.9226869455006337,
	"grad_norm": 0.4568060338497162,
	"learning_rate": 0.0001233587786259542,
	"loss": 0.3531,
	"step": 759
	},
	{
	"epoch": 1.9252217997465144,
	"grad_norm": 0.3351891040802002,
	"learning_rate": 0.00012325699745547076,
	"loss": 0.3491,
	"step": 760
	},
	{
	"epoch": 1.9277566539923954,
	"grad_norm": 0.3045225739479065,
	"learning_rate": 0.00012315521628498727,
	"loss": 0.2412,
	"step": 761
	},
	{
	"epoch": 1.9302915082382763,
	"grad_norm": 0.4453962445259094,
	"learning_rate": 0.0001230534351145038,
	"loss": 0.485,
	"step": 762
	},
	{
	"epoch": 1.9328263624841573,
	"grad_norm": 0.4568649232387543,
	"learning_rate": 0.00012295165394402038,
	"loss": 0.4203,
	"step": 763
	},
	{
	"epoch": 1.935361216730038,
	"grad_norm": 0.33376067876815796,
	"learning_rate": 0.0001228498727735369,
	"loss": 0.2287,
	"step": 764
	},
	{
	"epoch": 1.9378960709759188,
	"grad_norm": 0.2670106887817383,
	"learning_rate": 0.00012274809160305346,
	"loss": 0.2265,
	"step": 765
	},
	{
	"epoch": 1.9404309252217997,
	"grad_norm": 0.25930914282798767,
	"learning_rate": 0.00012264631043256997,
	"loss": 0.2661,
	"step": 766
	},
	{
	"epoch": 1.9429657794676807,
	"grad_norm": 0.22364859282970428,
	"learning_rate": 0.00012254452926208653,
	"loss": 0.1938,
	"step": 767
	},
	{
	"epoch": 1.9455006337135616,
	"grad_norm": 0.4107860028743744,
	"learning_rate": 0.00012244274809160305,
	"loss": 0.3227,
	"step": 768
	},
	{
	"epoch": 1.9480354879594424,
	"grad_norm": 0.24454613029956818,
	"learning_rate": 0.0001223409669211196,
	"loss": 0.2813,
	"step": 769
	},
	{
	"epoch": 1.950570342205323,
	"grad_norm": 0.28310418128967285,
	"learning_rate": 0.00012223918575063612,
	"loss": 0.2065,
	"step": 770
	},
	{
	"epoch": 1.953105196451204,
	"grad_norm": 0.28080177307128906,
	"learning_rate": 0.0001221374045801527,
	"loss": 0.1941,
	"step": 771
	},
	{
	"epoch": 1.955640050697085,
	"grad_norm": 0.365400105714798,
	"learning_rate": 0.0001220356234096692,
	"loss": 0.2657,
	"step": 772
	},
	{
	"epoch": 1.9581749049429658,
	"grad_norm": 0.3115444779396057,
	"learning_rate": 0.00012193384223918576,
	"loss": 0.2117,
	"step": 773
	},
	{
	"epoch": 1.9607097591888465,
	"grad_norm": 0.30900898575782776,
	"learning_rate": 0.00012183206106870228,
	"loss": 0.2563,
	"step": 774
	},
	{
	"epoch": 1.9632446134347274,
	"grad_norm": 0.341789573431015,
	"learning_rate": 0.00012173027989821883,
	"loss": 0.2396,
	"step": 775
	},
	{
	"epoch": 1.9657794676806084,
	"grad_norm": 0.39556756615638733,
	"learning_rate": 0.00012162849872773539,
	"loss": 0.2203,
	"step": 776
	},
	{
	"epoch": 1.9683143219264894,
	"grad_norm": 0.4282820224761963,
	"learning_rate": 0.00012152671755725191,
	"loss": 0.2476,
	"step": 777
	},
	{
	"epoch": 1.97084917617237,
	"grad_norm": 0.3683648109436035,
	"learning_rate": 0.00012142493638676847,
	"loss": 0.2414,
	"step": 778
	},
	{
	"epoch": 1.9733840304182508,
	"grad_norm": 0.19751296937465668,
	"learning_rate": 0.00012132315521628499,
	"loss": 0.1622,
	"step": 779
	},
	{
	"epoch": 1.9759188846641318,
	"grad_norm": 0.4522268772125244,
	"learning_rate": 0.00012122137404580154,
	"loss": 0.3372,
	"step": 780
	},
	{
	"epoch": 1.9784537389100127,
	"grad_norm": 0.3386411666870117,
	"learning_rate": 0.00012111959287531807,
	"loss": 0.1966,
	"step": 781
	},
	{
	"epoch": 1.9809885931558935,
	"grad_norm": 0.3266599178314209,
	"learning_rate": 0.00012101781170483461,
	"loss": 0.2507,
	"step": 782
	},
	{
	"epoch": 1.9835234474017744,
	"grad_norm": 0.395271897315979,
	"learning_rate": 0.00012091603053435115,
	"loss": 0.2626,
	"step": 783
	},
	{
	"epoch": 1.9860583016476552,
	"grad_norm": 0.23269407451152802,
	"learning_rate": 0.00012081424936386769,
	"loss": 0.1806,
	"step": 784
	},
	{
	"epoch": 1.9885931558935361,
	"grad_norm": 0.3929823040962219,
	"learning_rate": 0.00012071246819338421,
	"loss": 0.2912,
	"step": 785
	},
	{
	"epoch": 1.991128010139417,
	"grad_norm": 0.2597116529941559,
	"learning_rate": 0.00012061068702290077,
	"loss": 0.1918,
	"step": 786
	},
	{
	"epoch": 1.9936628643852978,
	"grad_norm": 0.44690757989883423,
	"learning_rate": 0.00012050890585241729,
	"loss": 0.2644,
	"step": 787
	},
	{
	"epoch": 1.9961977186311786,
	"grad_norm": 0.4133460819721222,
	"learning_rate": 0.00012040712468193385,
	"loss": 0.2541,
	"step": 788
	},
	{
	"epoch": 1.9987325728770595,
	"grad_norm": 0.33399301767349243,
	"learning_rate": 0.0001203053435114504,
	"loss": 0.2778,
	"step": 789
	},
	{
	"epoch": 2.0,
	"grad_norm": 0.6268282532691956,
	"learning_rate": 0.00012020356234096692,
	"loss": 0.3105,
	"step": 790
	},
	{
	"epoch": 2.002534854245881,
	"grad_norm": 0.38419365882873535,
	"learning_rate": 0.00012010178117048348,
	"loss": 0.2352,
	"step": 791
	},
	{
	"epoch": 2.005069708491762,
	"grad_norm": 0.30469566583633423,
	"learning_rate": 0.00012,
	"loss": 0.2011,
	"step": 792
	},
	{
	"epoch": 2.0076045627376424,
	"grad_norm": 0.36411482095718384,
	"learning_rate": 0.00011989821882951656,
	"loss": 0.2324,
	"step": 793
	},
	{
	"epoch": 2.0101394169835234,
	"grad_norm": 0.40986311435699463,
	"learning_rate": 0.00011979643765903308,
	"loss": 0.2217,
	"step": 794
	},
	{
	"epoch": 2.0126742712294043,
	"grad_norm": 0.46682968735694885,
	"learning_rate": 0.00011969465648854963,
	"loss": 0.2688,
	"step": 795
	},
	{
	"epoch": 2.0152091254752853,
	"grad_norm": 0.31846344470977783,
	"learning_rate": 0.00011959287531806616,
	"loss": 0.1984,
	"step": 796
	},
	{
	"epoch": 2.017743979721166,
	"grad_norm": 0.48346126079559326,
	"learning_rate": 0.0001194910941475827,
	"loss": 0.2404,
	"step": 797
	},
	{
	"epoch": 2.0202788339670468,
	"grad_norm": 0.5090253949165344,
	"learning_rate": 0.00011938931297709924,
	"loss": 0.2363,
	"step": 798
	},
	{
	"epoch": 2.0228136882129277,
	"grad_norm": 0.4886679947376251,
	"learning_rate": 0.00011928753180661578,
	"loss": 0.2656,
	"step": 799
	},
	{
	"epoch": 2.0253485424588087,
	"grad_norm": 0.5652650594711304,
	"learning_rate": 0.00011918575063613233,
	"loss": 0.2444,
	"step": 800
	},
	{
	"epoch": 2.0278833967046896,
	"grad_norm": 0.7158893346786499,
	"learning_rate": 0.00011908396946564886,
	"loss": 0.2362,
	"step": 801
	},
	{
	"epoch": 2.03041825095057,
	"grad_norm": 0.5168672800064087,
	"learning_rate": 0.00011898218829516541,
	"loss": 0.2067,
	"step": 802
	},
	{
	"epoch": 2.032953105196451,
	"grad_norm": 0.7243991494178772,
	"learning_rate": 0.00011888040712468194,
	"loss": 0.2458,
	"step": 803
	},
	{
	"epoch": 2.035487959442332,
	"grad_norm": 0.4199936091899872,
	"learning_rate": 0.00011877862595419849,
	"loss": 0.2009,
	"step": 804
	},
	{
	"epoch": 2.038022813688213,
	"grad_norm": 0.41791805624961853,
	"learning_rate": 0.00011867684478371501,
	"loss": 0.2325,
	"step": 805
	},
	{
	"epoch": 2.040557667934094,
	"grad_norm": 0.6389465928077698,
	"learning_rate": 0.00011857506361323157,
	"loss": 0.2636,
	"step": 806
	},
	{
	"epoch": 2.0430925221799745,
	"grad_norm": 0.6254114508628845,
	"learning_rate": 0.00011847328244274809,
	"loss": 0.2292,
	"step": 807
	},
	{
	"epoch": 2.0456273764258555,
	"grad_norm": 0.8436942100524902,
	"learning_rate": 0.00011837150127226465,
	"loss": 0.2913,
	"step": 808
	},
	{
	"epoch": 2.0481622306717364,
	"grad_norm": 0.42698097229003906,
	"learning_rate": 0.00011826972010178117,
	"loss": 0.2107,
	"step": 809
	},
	{
	"epoch": 2.0506970849176174,
	"grad_norm": 0.432607501745224,
	"learning_rate": 0.00011816793893129771,
	"loss": 0.1851,
	"step": 810
	},
	{
	"epoch": 2.053231939163498,
	"grad_norm": 0.48241573572158813,
	"learning_rate": 0.00011806615776081425,
	"loss": 0.2333,
	"step": 811
	},
	{
	"epoch": 2.055766793409379,
	"grad_norm": 0.3920150101184845,
	"learning_rate": 0.00011796437659033079,
	"loss": 0.2256,
	"step": 812
	},
	{
	"epoch": 2.05830164765526,
	"grad_norm": 0.3601329028606415,
	"learning_rate": 0.00011786259541984734,
	"loss": 0.2428,
	"step": 813
	},
	{
	"epoch": 2.0608365019011408,
	"grad_norm": 0.428524911403656,
	"learning_rate": 0.00011776081424936387,
	"loss": 0.3109,
	"step": 814
	},
	{
	"epoch": 2.0633713561470217,
	"grad_norm": 0.22846737504005432,
	"learning_rate": 0.00011765903307888042,
	"loss": 0.1715,
	"step": 815
	},
	{
	"epoch": 2.0659062103929022,
	"grad_norm": 0.3656214475631714,
	"learning_rate": 0.00011755725190839695,
	"loss": 0.2211,
	"step": 816
	},
	{
	"epoch": 2.068441064638783,
	"grad_norm": 0.2633965015411377,
	"learning_rate": 0.0001174554707379135,
	"loss": 0.1933,
	"step": 817
	},
	{
	"epoch": 2.070975918884664,
	"grad_norm": 0.4318942129611969,
	"learning_rate": 0.00011735368956743003,
	"loss": 0.2829,
	"step": 818
	},
	{
	"epoch": 2.073510773130545,
	"grad_norm": 0.2643216848373413,
	"learning_rate": 0.00011725190839694658,
	"loss": 0.1938,
	"step": 819
	},
	{
	"epoch": 2.076045627376426,
	"grad_norm": 0.4560074508190155,
	"learning_rate": 0.0001171501272264631,
	"loss": 0.3017,
	"step": 820
	},
	{
	"epoch": 2.0785804816223066,
	"grad_norm": 0.380374550819397,
	"learning_rate": 0.00011704834605597966,
	"loss": 0.2141,
	"step": 821
	},
	{
	"epoch": 2.0811153358681875,
	"grad_norm": 0.321417897939682,
	"learning_rate": 0.00011694656488549618,
	"loss": 0.2058,
	"step": 822
	},
	{
	"epoch": 2.0836501901140685,
	"grad_norm": 0.350496768951416,
	"learning_rate": 0.00011684478371501274,
	"loss": 0.1761,
	"step": 823
	},
	{
	"epoch": 2.0861850443599494,
	"grad_norm": 0.35794898867607117,
	"learning_rate": 0.00011674300254452927,
	"loss": 0.2016,
	"step": 824
	},
	{
	"epoch": 2.08871989860583,
	"grad_norm": 0.37890860438346863,
	"learning_rate": 0.0001166412213740458,
	"loss": 0.253,
	"step": 825
	},
	{
	"epoch": 2.091254752851711,
	"grad_norm": 0.41833457350730896,
	"learning_rate": 0.00011653944020356235,
	"loss": 0.2012,
	"step": 826
	},
	{
	"epoch": 2.093789607097592,
	"grad_norm": 0.49572086334228516,
	"learning_rate": 0.00011643765903307888,
	"loss": 0.214,
	"step": 827
	},
	{
	"epoch": 2.096324461343473,
	"grad_norm": 0.44266751408576965,
	"learning_rate": 0.00011633587786259543,
	"loss": 0.2496,
	"step": 828
	},
	{
	"epoch": 2.098859315589354,
	"grad_norm": 0.7018102407455444,
	"learning_rate": 0.00011623409669211196,
	"loss": 0.3996,
	"step": 829
	},
	{
	"epoch": 2.1013941698352343,
	"grad_norm": 0.42781826853752136,
	"learning_rate": 0.00011613231552162851,
	"loss": 0.2325,
	"step": 830
	},
	{
	"epoch": 2.1039290240811153,
	"grad_norm": 0.35814788937568665,
	"learning_rate": 0.00011603053435114504,
	"loss": 0.2003,
	"step": 831
	},
	{
	"epoch": 2.106463878326996,
	"grad_norm": 0.2381380945444107,
	"learning_rate": 0.00011592875318066159,
	"loss": 0.1791,
	"step": 832
	},
	{
	"epoch": 2.108998732572877,
	"grad_norm": 0.3152197003364563,
	"learning_rate": 0.00011582697201017811,
	"loss": 0.1802,
	"step": 833
	},
	{
	"epoch": 2.111533586818758,
	"grad_norm": 0.3493264615535736,
	"learning_rate": 0.00011572519083969467,
	"loss": 0.173,
	"step": 834
	},
	{
	"epoch": 2.1140684410646386,
	"grad_norm": 0.339036762714386,
	"learning_rate": 0.0001156234096692112,
	"loss": 0.1875,
	"step": 835
	},
	{
	"epoch": 2.1166032953105196,
	"grad_norm": 0.3622972369194031,
	"learning_rate": 0.00011552162849872775,
	"loss": 0.1892,
	"step": 836
	},
	{
	"epoch": 2.1191381495564006,
	"grad_norm": 0.7021862268447876,
	"learning_rate": 0.00011541984732824429,
	"loss": 0.272,
	"step": 837
	},
	{
	"epoch": 2.1216730038022815,
	"grad_norm": 0.4027453064918518,
	"learning_rate": 0.00011531806615776081,
	"loss": 0.2296,
	"step": 838
	},
	{
	"epoch": 2.124207858048162,
	"grad_norm": 0.3509223163127899,
	"learning_rate": 0.00011521628498727736,
	"loss": 0.1812,
	"step": 839
	},
	{
	"epoch": 2.126742712294043,
	"grad_norm": 0.4156752824783325,
	"learning_rate": 0.00011511450381679389,
	"loss": 0.2444,
	"step": 840
	},
	{
	"epoch": 2.129277566539924,
	"grad_norm": 0.3596971035003662,
	"learning_rate": 0.00011501272264631044,
	"loss": 0.1944,
	"step": 841
	},
	{
	"epoch": 2.131812420785805,
	"grad_norm": 0.4088239371776581,
	"learning_rate": 0.00011491094147582697,
	"loss": 0.1892,
	"step": 842
	},
	{
	"epoch": 2.134347275031686,
	"grad_norm": 0.3603368103504181,
	"learning_rate": 0.00011480916030534352,
	"loss": 0.1955,
	"step": 843
	},
	{
	"epoch": 2.1368821292775664,
	"grad_norm": 0.3702489733695984,
	"learning_rate": 0.00011470737913486005,
	"loss": 0.2401,
	"step": 844
	},
	{
	"epoch": 2.1394169835234473,
	"grad_norm": 0.427312433719635,
	"learning_rate": 0.0001146055979643766,
	"loss": 0.2097,
	"step": 845
	},
	{
	"epoch": 2.1419518377693283,
	"grad_norm": 0.34239426255226135,
	"learning_rate": 0.00011450381679389313,
	"loss": 0.2055,
	"step": 846
	},
	{
	"epoch": 2.1444866920152093,
	"grad_norm": 0.522627055644989,
	"learning_rate": 0.00011440203562340968,
	"loss": 0.2206,
	"step": 847
	},
	{
	"epoch": 2.14702154626109,
	"grad_norm": 0.5005999207496643,
	"learning_rate": 0.0001143002544529262,
	"loss": 0.2187,
	"step": 848
	},
	{
	"epoch": 2.1495564005069707,
	"grad_norm": 0.4834093451499939,
	"learning_rate": 0.00011419847328244276,
	"loss": 0.2616,
	"step": 849
	},
	{
	"epoch": 2.1520912547528517,
	"grad_norm": 0.3305776119232178,
	"learning_rate": 0.0001140966921119593,
	"loss": 0.2193,
	"step": 850
	},
	{
	"epoch": 2.1546261089987326,
	"grad_norm": 0.3691657781600952,
	"learning_rate": 0.00011399491094147584,
	"loss": 0.2343,
	"step": 851
	},
	{
	"epoch": 2.1571609632446136,
	"grad_norm": 0.4711242914199829,
	"learning_rate": 0.00011389312977099238,
	"loss": 0.2961,
	"step": 852
	},
	{
	"epoch": 2.159695817490494,
	"grad_norm": 0.4091726839542389,
	"learning_rate": 0.0001137913486005089,
	"loss": 0.2735,
	"step": 853
	},
	{
	"epoch": 2.162230671736375,
	"grad_norm": 0.28634020686149597,
	"learning_rate": 0.00011368956743002545,
	"loss": 0.2026,
	"step": 854
	},
	{
	"epoch": 2.164765525982256,
	"grad_norm": 0.3120497763156891,
	"learning_rate": 0.00011358778625954198,
	"loss": 0.1826,
	"step": 855
	},
	{
	"epoch": 2.167300380228137,
	"grad_norm": 0.3803773522377014,
	"learning_rate": 0.00011348600508905853,
	"loss": 0.2206,
	"step": 856
	},
	{
	"epoch": 2.169835234474018,
	"grad_norm": 0.4069412648677826,
	"learning_rate": 0.00011338422391857506,
	"loss": 0.23,
	"step": 857
	},
	{
	"epoch": 2.1723700887198985,
	"grad_norm": 0.31032097339630127,
	"learning_rate": 0.00011328244274809161,
	"loss": 0.1774,
	"step": 858
	},
	{
	"epoch": 2.1749049429657794,
	"grad_norm": 0.3429819941520691,
	"learning_rate": 0.00011318066157760814,
	"loss": 0.207,
	"step": 859
	},
	{
	"epoch": 2.1774397972116604,
	"grad_norm": 0.32155394554138184,
	"learning_rate": 0.00011307888040712469,
	"loss": 0.1817,
	"step": 860
	},
	{
	"epoch": 2.1799746514575413,
	"grad_norm": 0.3859189450740814,
	"learning_rate": 0.00011297709923664124,
	"loss": 0.205,
	"step": 861
	},
	{
	"epoch": 2.182509505703422,
	"grad_norm": 0.33794042468070984,
	"learning_rate": 0.00011287531806615777,
	"loss": 0.2002,
	"step": 862
	},
	{
	"epoch": 2.185044359949303,
	"grad_norm": 0.38762131333351135,
	"learning_rate": 0.00011277353689567431,
	"loss": 0.206,
	"step": 863
	},
	{
	"epoch": 2.1875792141951838,
	"grad_norm": 0.35734203457832336,
	"learning_rate": 0.00011267175572519085,
	"loss": 0.2332,
	"step": 864
	},
	{
	"epoch": 2.1901140684410647,
	"grad_norm": 0.32456931471824646,
	"learning_rate": 0.00011256997455470739,
	"loss": 0.1873,
	"step": 865
	},
	{
	"epoch": 2.1926489226869457,
	"grad_norm": 0.5198532938957214,
	"learning_rate": 0.00011246819338422391,
	"loss": 0.2408,
	"step": 866
	},
	{
	"epoch": 2.195183776932826,
	"grad_norm": 0.3863469362258911,
	"learning_rate": 0.00011236641221374046,
	"loss": 0.1778,
	"step": 867
	},
	{
	"epoch": 2.197718631178707,
	"grad_norm": 0.39902037382125854,
	"learning_rate": 0.00011226463104325699,
	"loss": 0.1982,
	"step": 868
	},
	{
	"epoch": 2.200253485424588,
	"grad_norm": 0.3974783718585968,
	"learning_rate": 0.00011216284987277354,
	"loss": 0.2157,
	"step": 869
	},
	{
	"epoch": 2.202788339670469,
	"grad_norm": 0.33785662055015564,
	"learning_rate": 0.00011206106870229007,
	"loss": 0.2152,
	"step": 870
	},
	{
	"epoch": 2.20532319391635,
	"grad_norm": 0.4233367145061493,
	"learning_rate": 0.00011195928753180662,
	"loss": 0.2992,
	"step": 871
	},
	{
	"epoch": 2.2078580481622305,
	"grad_norm": 0.37665534019470215,
	"learning_rate": 0.00011185750636132315,
	"loss": 0.2273,
	"step": 872
	},
	{
	"epoch": 2.2103929024081115,
	"grad_norm": 0.3841243088245392,
	"learning_rate": 0.0001117557251908397,
	"loss": 0.1991,
	"step": 873
	},
	{
	"epoch": 2.2129277566539924,
	"grad_norm": 0.3544892966747284,
	"learning_rate": 0.00011165394402035625,
	"loss": 0.2098,
	"step": 874
	},
	{
	"epoch": 2.2154626108998734,
	"grad_norm": 0.43662142753601074,
	"learning_rate": 0.00011155216284987278,
	"loss": 0.2411,
	"step": 875
	},
	{
	"epoch": 2.2179974651457544,
	"grad_norm": 0.3305199146270752,
	"learning_rate": 0.00011145038167938933,
	"loss": 0.1803,
	"step": 876
	},
	{
	"epoch": 2.220532319391635,
	"grad_norm": 0.34674328565597534,
	"learning_rate": 0.00011134860050890586,
	"loss": 0.2206,
	"step": 877
	},
	{
	"epoch": 2.223067173637516,
	"grad_norm": 0.39985305070877075,
	"learning_rate": 0.0001112468193384224,
	"loss": 0.2951,
	"step": 878
	},
	{
	"epoch": 2.225602027883397,
	"grad_norm": 0.36231693625450134,
	"learning_rate": 0.00011114503816793894,
	"loss": 0.2601,
	"step": 879
	},
	{
	"epoch": 2.2281368821292777,
	"grad_norm": 0.4199659526348114,
	"learning_rate": 0.00011104325699745548,
	"loss": 0.2719,
	"step": 880
	},
	{
	"epoch": 2.2306717363751583,
	"grad_norm": 0.3472574055194855,
	"learning_rate": 0.000110941475826972,
	"loss": 0.2437,
	"step": 881
	},
	{
	"epoch": 2.233206590621039,
	"grad_norm": 0.2765200436115265,
	"learning_rate": 0.00011083969465648855,
	"loss": 0.1983,
	"step": 882
	},
	{
	"epoch": 2.23574144486692,
	"grad_norm": 0.4466260075569153,
	"learning_rate": 0.00011073791348600508,
	"loss": 0.2323,
	"step": 883
	},
	{
	"epoch": 2.238276299112801,
	"grad_norm": 0.43661364912986755,
	"learning_rate": 0.00011063613231552163,
	"loss": 0.2957,
	"step": 884
	},
	{
	"epoch": 2.240811153358682,
	"grad_norm": 0.3262166976928711,
	"learning_rate": 0.00011053435114503819,
	"loss": 0.195,
	"step": 885
	},
	{
	"epoch": 2.2433460076045626,
	"grad_norm": 0.5085666179656982,
	"learning_rate": 0.00011043256997455471,
	"loss": 0.3349,
	"step": 886
	},
	{
	"epoch": 2.2458808618504436,
	"grad_norm": 0.46551409363746643,
	"learning_rate": 0.00011033078880407126,
	"loss": 0.3318,
	"step": 887
	},
	{
	"epoch": 2.2484157160963245,
	"grad_norm": 0.425530344247818,
	"learning_rate": 0.00011022900763358779,
	"loss": 0.2857,
	"step": 888
	},
	{
	"epoch": 2.2509505703422055,
	"grad_norm": 0.3377918601036072,
	"learning_rate": 0.00011012722646310434,
	"loss": 0.2215,
	"step": 889
	},
	{
	"epoch": 2.253485424588086,
	"grad_norm": 0.3491476774215698,
	"learning_rate": 0.00011002544529262087,
	"loss": 0.2471,
	"step": 890
	},
	{
	"epoch": 2.256020278833967,
	"grad_norm": 0.3779531419277191,
	"learning_rate": 0.00010992366412213742,
	"loss": 0.1984,
	"step": 891
	},
	{
	"epoch": 2.258555133079848,
	"grad_norm": 0.425077885389328,
	"learning_rate": 0.00010982188295165395,
	"loss": 0.2535,
	"step": 892
	},
	{
	"epoch": 2.261089987325729,
	"grad_norm": 0.40296900272369385,
	"learning_rate": 0.00010972010178117049,
	"loss": 0.1955,
	"step": 893
	},
	{
	"epoch": 2.26362484157161,
	"grad_norm": 0.4394761919975281,
	"learning_rate": 0.00010961832061068703,
	"loss": 0.2638,
	"step": 894
	},
	{
	"epoch": 2.2661596958174903,
	"grad_norm": 0.4743111729621887,
	"learning_rate": 0.00010951653944020357,
	"loss": 0.1932,
	"step": 895
	},
	{
	"epoch": 2.2686945500633713,
	"grad_norm": 0.5121330618858337,
	"learning_rate": 0.00010941475826972009,
	"loss": 0.2541,
	"step": 896
	},
	{
	"epoch": 2.2712294043092522,
	"grad_norm": 0.2810382544994354,
	"learning_rate": 0.00010931297709923664,
	"loss": 0.1884,
	"step": 897
	},
	{
	"epoch": 2.273764258555133,
	"grad_norm": 0.3637334108352661,
	"learning_rate": 0.0001092111959287532,
	"loss": 0.2208,
	"step": 898
	},
	{
	"epoch": 2.2762991128010137,
	"grad_norm": 0.4116186201572418,
	"learning_rate": 0.00010910941475826972,
	"loss": 0.1898,
	"step": 899
	},
	{
	"epoch": 2.2788339670468947,
	"grad_norm": 0.4166296720504761,
	"learning_rate": 0.00010900763358778628,
	"loss": 0.2399,
	"step": 900
	},
	{
	"epoch": 2.2813688212927756,
	"grad_norm": 0.5998784303665161,
	"learning_rate": 0.0001089058524173028,
	"loss": 0.2926,
	"step": 901
	},
	{
	"epoch": 2.2839036755386566,
	"grad_norm": 0.6252371668815613,
	"learning_rate": 0.00010880407124681935,
	"loss": 0.2392,
	"step": 902
	},
	{
	"epoch": 2.2864385297845375,
	"grad_norm": 0.4495537579059601,
	"learning_rate": 0.00010870229007633588,
	"loss": 0.2142,
	"step": 903
	},
	{
	"epoch": 2.288973384030418,
	"grad_norm": 0.5659827589988708,
	"learning_rate": 0.00010860050890585243,
	"loss": 0.2993,
	"step": 904
	},
	{
	"epoch": 2.291508238276299,
	"grad_norm": 0.4290786385536194,
	"learning_rate": 0.00010849872773536896,
	"loss": 0.3127,
	"step": 905
	},
	{
	"epoch": 2.29404309252218,
	"grad_norm": 0.3835826516151428,
	"learning_rate": 0.0001083969465648855,
	"loss": 0.1927,
	"step": 906
	},
	{
	"epoch": 2.296577946768061,
	"grad_norm": 0.4915788769721985,
	"learning_rate": 0.00010829516539440204,
	"loss": 0.2553,
	"step": 907
	},
	{
	"epoch": 2.299112801013942,
	"grad_norm": 0.42122524976730347,
	"learning_rate": 0.00010819338422391858,
	"loss": 0.2133,
	"step": 908
	},
	{
	"epoch": 2.3016476552598224,
	"grad_norm": 0.3904586732387543,
	"learning_rate": 0.0001080916030534351,
	"loss": 0.2064,
	"step": 909
	},
	{
	"epoch": 2.3041825095057034,
	"grad_norm": 0.3680777847766876,
	"learning_rate": 0.00010798982188295166,
	"loss": 0.1989,
	"step": 910
	},
	{
	"epoch": 2.3067173637515843,
	"grad_norm": 0.44054466485977173,
	"learning_rate": 0.00010788804071246821,
	"loss": 0.2386,
	"step": 911
	},
	{
	"epoch": 2.3092522179974653,
	"grad_norm": 0.28730717301368713,
	"learning_rate": 0.00010778625954198473,
	"loss": 0.175,
	"step": 912
	},
	{
	"epoch": 2.3117870722433462,
	"grad_norm": 0.4209315776824951,
	"learning_rate": 0.00010768447837150129,
	"loss": 0.2197,
	"step": 913
	},
	{
	"epoch": 2.3143219264892267,
	"grad_norm": 0.41457393765449524,
	"learning_rate": 0.00010758269720101781,
	"loss": 0.202,
	"step": 914
	},
	{
	"epoch": 2.3168567807351077,
	"grad_norm": 0.40807071328163147,
	"learning_rate": 0.00010748091603053437,
	"loss": 0.3087,
	"step": 915
	},
	{
	"epoch": 2.3193916349809887,
	"grad_norm": 0.42118731141090393,
	"learning_rate": 0.00010737913486005089,
	"loss": 0.2269,
	"step": 916
	},
	{
	"epoch": 2.3219264892268696,
	"grad_norm": 0.3436257541179657,
	"learning_rate": 0.00010727735368956744,
	"loss": 0.1987,
	"step": 917
	},
	{
	"epoch": 2.32446134347275,
	"grad_norm": 0.3721463978290558,
	"learning_rate": 0.00010717557251908397,
	"loss": 0.2081,
	"step": 918
	},
	{
	"epoch": 2.326996197718631,
	"grad_norm": 0.45050719380378723,
	"learning_rate": 0.00010707379134860052,
	"loss": 0.2199,
	"step": 919
	},
	{
	"epoch": 2.329531051964512,
	"grad_norm": 0.42665717005729675,
	"learning_rate": 0.00010697201017811705,
	"loss": 0.2176,
	"step": 920
	},
	{
	"epoch": 2.332065906210393,
	"grad_norm": 0.35217922925949097,
	"learning_rate": 0.00010687022900763359,
	"loss": 0.1915,
	"step": 921
	},
	{
	"epoch": 2.334600760456274,
	"grad_norm": 0.5407602190971375,
	"learning_rate": 0.00010676844783715014,
	"loss": 0.2309,
	"step": 922
	},
	{
	"epoch": 2.3371356147021545,
	"grad_norm": 0.6984291076660156,
	"learning_rate": 0.00010666666666666667,
	"loss": 0.2779,
	"step": 923
	},
	{
	"epoch": 2.3396704689480354,
	"grad_norm": 0.5333911776542664,
	"learning_rate": 0.00010656488549618322,
	"loss": 0.2659,
	"step": 924
	},
	{
	"epoch": 2.3422053231939164,
	"grad_norm": 0.5130952596664429,
	"learning_rate": 0.00010646310432569974,
	"loss": 0.315,
	"step": 925
	},
	{
	"epoch": 2.3447401774397973,
	"grad_norm": 0.3874262869358063,
	"learning_rate": 0.0001063613231552163,
	"loss": 0.294,
	"step": 926
	},
	{
	"epoch": 2.347275031685678,
	"grad_norm": 0.37864431738853455,
	"learning_rate": 0.00010625954198473282,
	"loss": 0.1894,
	"step": 927
	},
	{
	"epoch": 2.349809885931559,
	"grad_norm": 0.406448632478714,
	"learning_rate": 0.00010615776081424938,
	"loss": 0.1913,
	"step": 928
	},
	{
	"epoch": 2.3523447401774398,
	"grad_norm": 0.4278213381767273,
	"learning_rate": 0.0001060559796437659,
	"loss": 0.2136,
	"step": 929
	},
	{
	"epoch": 2.3548795944233207,
	"grad_norm": 0.3853738009929657,
	"learning_rate": 0.00010595419847328246,
	"loss": 0.213,
	"step": 930
	},
	{
	"epoch": 2.3574144486692017,
	"grad_norm": 0.3785664737224579,
	"learning_rate": 0.00010585241730279898,
	"loss": 0.22,
	"step": 931
	},
	{
	"epoch": 2.359949302915082,
	"grad_norm": 0.5863676071166992,
	"learning_rate": 0.00010575063613231553,
	"loss": 0.2305,
	"step": 932
	},
	{
	"epoch": 2.362484157160963,
	"grad_norm": 0.36629414558410645,
	"learning_rate": 0.00010564885496183206,
	"loss": 0.2041,
	"step": 933
	},
	{
	"epoch": 2.365019011406844,
	"grad_norm": 0.44699156284332275,
	"learning_rate": 0.0001055470737913486,
	"loss": 0.2763,
	"step": 934
	},
	{
	"epoch": 2.367553865652725,
	"grad_norm": 0.4775685667991638,
	"learning_rate": 0.00010544529262086515,
	"loss": 0.2779,
	"step": 935
	},
	{
	"epoch": 2.3700887198986056,
	"grad_norm": 0.3192265033721924,
	"learning_rate": 0.00010534351145038168,
	"loss": 0.1861,
	"step": 936
	},
	{
	"epoch": 2.3726235741444865,
	"grad_norm": 0.3589562177658081,
	"learning_rate": 0.00010524173027989823,
	"loss": 0.2266,
	"step": 937
	},
	{
	"epoch": 2.3751584283903675,
	"grad_norm": 0.36193573474884033,
	"learning_rate": 0.00010513994910941476,
	"loss": 0.2105,
	"step": 938
	},
	{
	"epoch": 2.3776932826362485,
	"grad_norm": 0.4141902029514313,
	"learning_rate": 0.00010503816793893131,
	"loss": 0.2676,
	"step": 939
	},
	{
	"epoch": 2.3802281368821294,
	"grad_norm": 0.3118525445461273,
	"learning_rate": 0.00010493638676844783,
	"loss": 0.1941,
	"step": 940
	},
	{
	"epoch": 2.3827629911280104,
	"grad_norm": 0.3232119679450989,
	"learning_rate": 0.00010483460559796439,
	"loss": 0.2065,
	"step": 941
	},
	{
	"epoch": 2.385297845373891,
	"grad_norm": 0.30440258979797363,
	"learning_rate": 0.00010473282442748091,
	"loss": 0.1834,
	"step": 942
	},
	{
	"epoch": 2.387832699619772,
	"grad_norm": 0.5841143131256104,
	"learning_rate": 0.00010463104325699747,
	"loss": 0.3785,
	"step": 943
	},
	{
	"epoch": 2.390367553865653,
	"grad_norm": 0.31851619482040405,
	"learning_rate": 0.00010452926208651399,
	"loss": 0.1798,
	"step": 944
	},
	{
	"epoch": 2.3929024081115338,
	"grad_norm": 0.3820517361164093,
	"learning_rate": 0.00010442748091603054,
	"loss": 0.2376,
	"step": 945
	},
	{
	"epoch": 2.3954372623574143,
	"grad_norm": 0.4379272758960724,
	"learning_rate": 0.00010432569974554708,
	"loss": 0.2356,
	"step": 946
	},
	{
	"epoch": 2.3979721166032952,
	"grad_norm": 0.3120323419570923,
	"learning_rate": 0.00010422391857506362,
	"loss": 0.1936,
	"step": 947
	},
	{
	"epoch": 2.400506970849176,
	"grad_norm": 0.3143107295036316,
	"learning_rate": 0.00010412213740458016,
	"loss": 0.184,
	"step": 948
	},
	{
	"epoch": 2.403041825095057,
	"grad_norm": 0.44618573784828186,
	"learning_rate": 0.00010402035623409669,
	"loss": 0.2468,
	"step": 949
	},
	{
	"epoch": 2.405576679340938,
	"grad_norm": 0.3838117718696594,
	"learning_rate": 0.00010391857506361324,
	"loss": 0.2276,
	"step": 950
	},
	{
	"epoch": 2.4081115335868186,
	"grad_norm": 0.3427219092845917,
	"learning_rate": 0.00010381679389312977,
	"loss": 0.2169,
	"step": 951
	},
	{
	"epoch": 2.4106463878326996,
	"grad_norm": 0.3738270699977875,
	"learning_rate": 0.00010371501272264632,
	"loss": 0.2447,
	"step": 952
	},
	{
	"epoch": 2.4131812420785805,
	"grad_norm": 0.33645015954971313,
	"learning_rate": 0.00010361323155216285,
	"loss": 0.1939,
	"step": 953
	},
	{
	"epoch": 2.4157160963244615,
	"grad_norm": 0.45420047640800476,
	"learning_rate": 0.0001035114503816794,
	"loss": 0.242,
	"step": 954
	},
	{
	"epoch": 2.418250950570342,
	"grad_norm": 0.47141382098197937,
	"learning_rate": 0.00010340966921119592,
	"loss": 0.2923,
	"step": 955
	},
	{
	"epoch": 2.420785804816223,
	"grad_norm": 0.42177528142929077,
	"learning_rate": 0.00010330788804071248,
	"loss": 0.2827,
	"step": 956
	},
	{
	"epoch": 2.423320659062104,
	"grad_norm": 0.409502774477005,
	"learning_rate": 0.000103206106870229,
	"loss": 0.2016,
	"step": 957
	},
	{
	"epoch": 2.425855513307985,
	"grad_norm": 0.47684770822525024,
	"learning_rate": 0.00010310432569974556,
	"loss": 0.2093,
	"step": 958
	},
	{
	"epoch": 2.428390367553866,
	"grad_norm": 0.3357095718383789,
	"learning_rate": 0.0001030025445292621,
	"loss": 0.1744,
	"step": 959
	},
	{
	"epoch": 2.4309252217997463,
	"grad_norm": 0.4120575487613678,
	"learning_rate": 0.00010290076335877863,
	"loss": 0.214,
	"step": 960
	},
	{
	"epoch": 2.4334600760456273,
	"grad_norm": 0.5090222954750061,
	"learning_rate": 0.00010279898218829517,
	"loss": 0.2427,
	"step": 961
	},
	{
	"epoch": 2.4359949302915083,
	"grad_norm": 0.4142550528049469,
	"learning_rate": 0.0001026972010178117,
	"loss": 0.2412,
	"step": 962
	},
	{
	"epoch": 2.4385297845373892,
	"grad_norm": 0.3446972966194153,
	"learning_rate": 0.00010259541984732825,
	"loss": 0.1952,
	"step": 963
	},
	{
	"epoch": 2.4410646387832697,
	"grad_norm": 0.37858110666275024,
	"learning_rate": 0.00010249363867684478,
	"loss": 0.1964,
	"step": 964
	},
	{
	"epoch": 2.4435994930291507,
	"grad_norm": 0.3989041745662689,
	"learning_rate": 0.00010239185750636133,
	"loss": 0.2115,
	"step": 965
	},
	{
	"epoch": 2.4461343472750317,
	"grad_norm": 0.3948146402835846,
	"learning_rate": 0.00010229007633587786,
	"loss": 0.2067,
	"step": 966
	},
	{
	"epoch": 2.4486692015209126,
	"grad_norm": 0.3683820068836212,
	"learning_rate": 0.00010218829516539441,
	"loss": 0.1881,
	"step": 967
	},
	{
	"epoch": 2.4512040557667936,
	"grad_norm": 0.36742380261421204,
	"learning_rate": 0.00010208651399491094,
	"loss": 0.2302,
	"step": 968
	},
	{
	"epoch": 2.453738910012674,
	"grad_norm": 0.32195988297462463,
	"learning_rate": 0.00010198473282442749,
	"loss": 0.1994,
	"step": 969
	},
	{
	"epoch": 2.456273764258555,
	"grad_norm": 0.42296963930130005,
	"learning_rate": 0.00010188295165394401,
	"loss": 0.2657,
	"step": 970
	},
	{
	"epoch": 2.458808618504436,
	"grad_norm": 0.3555774688720703,
	"learning_rate": 0.00010178117048346057,
	"loss": 0.1812,
	"step": 971
	},
	{
	"epoch": 2.461343472750317,
	"grad_norm": 0.6991668343544006,
	"learning_rate": 0.00010167938931297712,
	"loss": 0.4318,
	"step": 972
	},
	{
	"epoch": 2.463878326996198,
	"grad_norm": 0.4290355443954468,
	"learning_rate": 0.00010157760814249365,
	"loss": 0.1856,
	"step": 973
	},
	{
	"epoch": 2.4664131812420784,
	"grad_norm": 0.3479045331478119,
	"learning_rate": 0.00010147582697201018,
	"loss": 0.1844,
	"step": 974
	},
	{
	"epoch": 2.4689480354879594,
	"grad_norm": 0.3862701952457428,
	"learning_rate": 0.00010137404580152672,
	"loss": 0.2108,
	"step": 975
	},
	{
	"epoch": 2.4714828897338403,
	"grad_norm": 0.34411442279815674,
	"learning_rate": 0.00010127226463104326,
	"loss": 0.1851,
	"step": 976
	},
	{
	"epoch": 2.4740177439797213,
	"grad_norm": 0.2434609979391098,
	"learning_rate": 0.00010117048346055979,
	"loss": 0.1757,
	"step": 977
	},
	{
	"epoch": 2.4765525982256023,
	"grad_norm": 0.3341599106788635,
	"learning_rate": 0.00010106870229007634,
	"loss": 0.1879,
	"step": 978
	},
	{
	"epoch": 2.4790874524714828,
	"grad_norm": 0.27678003907203674,
	"learning_rate": 0.00010096692111959287,
	"loss": 0.1943,
	"step": 979
	},
	{
	"epoch": 2.4816223067173637,
	"grad_norm": 0.2388005256652832,
	"learning_rate": 0.00010086513994910942,
	"loss": 0.1804,
	"step": 980
	},
	{
	"epoch": 2.4841571609632447,
	"grad_norm": 0.5265661478042603,
	"learning_rate": 0.00010076335877862595,
	"loss": 0.2813,
	"step": 981
	},
	{
	"epoch": 2.4866920152091256,
	"grad_norm": 0.337007075548172,
	"learning_rate": 0.0001006615776081425,
	"loss": 0.1976,
	"step": 982
	},
	{
	"epoch": 2.489226869455006,
	"grad_norm": 0.42700427770614624,
	"learning_rate": 0.00010055979643765905,
	"loss": 0.2031,
	"step": 983
	},
	{
	"epoch": 2.491761723700887,
	"grad_norm": 0.3900333642959595,
	"learning_rate": 0.00010045801526717558,
	"loss": 0.2178,
	"step": 984
	},
	{
	"epoch": 2.494296577946768,
	"grad_norm": 0.45332932472229004,
	"learning_rate": 0.00010035623409669213,
	"loss": 0.2537,
	"step": 985
	},
	{
	"epoch": 2.496831432192649,
	"grad_norm": 0.30331265926361084,
	"learning_rate": 0.00010025445292620866,
	"loss": 0.2074,
	"step": 986
	},
	{
	"epoch": 2.49936628643853,
	"grad_norm": 0.3379949927330017,
	"learning_rate": 0.0001001526717557252,
	"loss": 0.1768,
	"step": 987
	},
	{
	"epoch": 2.5019011406844105,
	"grad_norm": 0.40859973430633545,
	"learning_rate": 0.00010005089058524174,
	"loss": 0.1984,
	"step": 988
	},
	{
	"epoch": 2.5044359949302915,
	"grad_norm": 0.3993757963180542,
	"learning_rate": 9.994910941475827e-05,
	"loss": 0.2162,
	"step": 989
	},
	{
	"epoch": 2.5069708491761724,
	"grad_norm": 0.5887713432312012,
	"learning_rate": 9.984732824427481e-05,
	"loss": 0.2806,
	"step": 990
	},
	{
	"epoch": 2.5095057034220534,
	"grad_norm": 0.3590678572654724,
	"learning_rate": 9.974554707379135e-05,
	"loss": 0.2045,
	"step": 991
	},
	{
	"epoch": 2.512040557667934,
	"grad_norm": 0.3090289831161499,
	"learning_rate": 9.964376590330789e-05,
	"loss": 0.2151,
	"step": 992
	},
	{
	"epoch": 2.514575411913815,
	"grad_norm": 0.42125657200813293,
	"learning_rate": 9.954198473282443e-05,
	"loss": 0.2277,
	"step": 993
	},
	{
	"epoch": 2.517110266159696,
	"grad_norm": 0.3213401734828949,
	"learning_rate": 9.944020356234097e-05,
	"loss": 0.1927,
	"step": 994
	},
	{
	"epoch": 2.5196451204055768,
	"grad_norm": 0.4558688998222351,
	"learning_rate": 9.933842239185751e-05,
	"loss": 0.2418,
	"step": 995
	},
	{
	"epoch": 2.5221799746514577,
	"grad_norm": 0.5181113481521606,
	"learning_rate": 9.923664122137405e-05,
	"loss": 0.2955,
	"step": 996
	},
	{
	"epoch": 2.5247148288973387,
	"grad_norm": 0.409424751996994,
	"learning_rate": 9.913486005089059e-05,
	"loss": 0.226,
	"step": 997
	},
	{
	"epoch": 2.527249683143219,
	"grad_norm": 0.44536876678466797,
	"learning_rate": 9.903307888040713e-05,
	"loss": 0.2412,
	"step": 998
	},
	{
	"epoch": 2.5297845373891,
	"grad_norm": 0.5028473734855652,
	"learning_rate": 9.893129770992367e-05,
	"loss": 0.2658,
	"step": 999
	},
	{
	"epoch": 2.532319391634981,
	"grad_norm": 0.3157128691673279,
	"learning_rate": 9.882951653944021e-05,
	"loss": 0.1939,
	"step": 1000
	},
	{
	"epoch": 2.5348542458808616,
	"grad_norm": 0.3184659481048584,
	"learning_rate": 9.872773536895676e-05,
	"loss": 0.2113,
	"step": 1001
	},
	{
	"epoch": 2.5373891001267426,
	"grad_norm": 0.5658953785896301,
	"learning_rate": 9.862595419847329e-05,
	"loss": 0.2641,
	"step": 1002
	},
	{
	"epoch": 2.5399239543726235,
	"grad_norm": 0.5306189060211182,
	"learning_rate": 9.852417302798982e-05,
	"loss": 0.2495,
	"step": 1003
	},
	{
	"epoch": 2.5424588086185045,
	"grad_norm": 0.5272448062896729,
	"learning_rate": 9.842239185750636e-05,
	"loss": 0.2212,
	"step": 1004
	},
	{
	"epoch": 2.5449936628643854,
	"grad_norm": 0.3216992914676666,
	"learning_rate": 9.83206106870229e-05,
	"loss": 0.2284,
	"step": 1005
	},
	{
	"epoch": 2.5475285171102664,
	"grad_norm": 0.3573670983314514,
	"learning_rate": 9.821882951653944e-05,
	"loss": 0.2568,
	"step": 1006
	},
	{
	"epoch": 2.550063371356147,
	"grad_norm": 0.4088655710220337,
	"learning_rate": 9.811704834605598e-05,
	"loss": 0.2033,
	"step": 1007
	},
	{
	"epoch": 2.552598225602028,
	"grad_norm": 0.33729737997055054,
	"learning_rate": 9.801526717557252e-05,
	"loss": 0.1843,
	"step": 1008
	},
	{
	"epoch": 2.555133079847909,
	"grad_norm": 0.3298558294773102,
	"learning_rate": 9.791348600508906e-05,
	"loss": 0.193,
	"step": 1009
	},
	{
	"epoch": 2.5576679340937893,
	"grad_norm": 0.33454427123069763,
	"learning_rate": 9.78117048346056e-05,
	"loss": 0.1823,
	"step": 1010
	},
	{
	"epoch": 2.5602027883396703,
	"grad_norm": 0.3466435670852661,
	"learning_rate": 9.770992366412214e-05,
	"loss": 0.2204,
	"step": 1011
	},
	{
	"epoch": 2.5627376425855513,
	"grad_norm": 0.3551004230976105,
	"learning_rate": 9.760814249363868e-05,
	"loss": 0.2027,
	"step": 1012
	},
	{
	"epoch": 2.565272496831432,
	"grad_norm": 0.4317062795162201,
	"learning_rate": 9.750636132315523e-05,
	"loss": 0.2099,
	"step": 1013
	},
	{
	"epoch": 2.567807351077313,
	"grad_norm": 0.5695217847824097,
	"learning_rate": 9.740458015267177e-05,
	"loss": 0.2547,
	"step": 1014
	},
	{
	"epoch": 2.570342205323194,
	"grad_norm": 0.4523742198944092,
	"learning_rate": 9.730279898218831e-05,
	"loss": 0.2501,
	"step": 1015
	},
	{
	"epoch": 2.5728770595690746,
	"grad_norm": 0.3191470503807068,
	"learning_rate": 9.720101781170484e-05,
	"loss": 0.1918,
	"step": 1016
	},
	{
	"epoch": 2.5754119138149556,
	"grad_norm": 0.36234062910079956,
	"learning_rate": 9.709923664122138e-05,
	"loss": 0.2081,
	"step": 1017
	},
	{
	"epoch": 2.5779467680608366,
	"grad_norm": 0.42196425795555115,
	"learning_rate": 9.699745547073791e-05,
	"loss": 0.2801,
	"step": 1018
	},
	{
	"epoch": 2.5804816223067175,
	"grad_norm": 0.3382538855075836,
	"learning_rate": 9.689567430025445e-05,
	"loss": 0.221,
	"step": 1019
	},
	{
	"epoch": 2.583016476552598,
	"grad_norm": 0.5736209750175476,
	"learning_rate": 9.679389312977099e-05,
	"loss": 0.2684,
	"step": 1020
	},
	{
	"epoch": 2.585551330798479,
	"grad_norm": 0.4692763686180115,
	"learning_rate": 9.669211195928753e-05,
	"loss": 0.244,
	"step": 1021
	},
	{
	"epoch": 2.58808618504436,
	"grad_norm": 0.4888627827167511,
	"learning_rate": 9.659033078880407e-05,
	"loss": 0.2493,
	"step": 1022
	},
	{
	"epoch": 2.590621039290241,
	"grad_norm": 0.29745686054229736,
	"learning_rate": 9.648854961832061e-05,
	"loss": 0.1757,
	"step": 1023
	},
	{
	"epoch": 2.593155893536122,
	"grad_norm": 0.476639062166214,
	"learning_rate": 9.638676844783715e-05,
	"loss": 0.2031,
	"step": 1024
	},
	{
	"epoch": 2.5956907477820024,
	"grad_norm": 0.4214845895767212,
	"learning_rate": 9.628498727735369e-05,
	"loss": 0.2588,
	"step": 1025
	},
	{
	"epoch": 2.5982256020278833,
	"grad_norm": 0.3036046326160431,
	"learning_rate": 9.618320610687024e-05,
	"loss": 0.2031,
	"step": 1026
	},
	{
	"epoch": 2.6007604562737643,
	"grad_norm": 0.7941879630088806,
	"learning_rate": 9.608142493638678e-05,
	"loss": 0.2096,
	"step": 1027
	},
	{
	"epoch": 2.6032953105196452,
	"grad_norm": 0.36381933093070984,
	"learning_rate": 9.597964376590332e-05,
	"loss": 0.2102,
	"step": 1028
	},
	{
	"epoch": 2.6058301647655258,
	"grad_norm": 0.3213381767272949,
	"learning_rate": 9.587786259541986e-05,
	"loss": 0.1884,
	"step": 1029
	},
	{
	"epoch": 2.6083650190114067,
	"grad_norm": 0.38559427857398987,
	"learning_rate": 9.577608142493639e-05,
	"loss": 0.2229,
	"step": 1030
	},
	{
	"epoch": 2.6108998732572877,
	"grad_norm": 0.4000662863254547,
	"learning_rate": 9.567430025445293e-05,
	"loss": 0.198,
	"step": 1031
	},
	{
	"epoch": 2.6134347275031686,
	"grad_norm": 0.3635396659374237,
	"learning_rate": 9.557251908396946e-05,
	"loss": 0.2267,
	"step": 1032
	},
	{
	"epoch": 2.6159695817490496,
	"grad_norm": 0.31810763478279114,
	"learning_rate": 9.5470737913486e-05,
	"loss": 0.1691,
	"step": 1033
	},
	{
	"epoch": 2.6185044359949305,
	"grad_norm": 0.29606062173843384,
	"learning_rate": 9.536895674300254e-05,
	"loss": 0.1834,
	"step": 1034
	},
	{
	"epoch": 2.621039290240811,
	"grad_norm": 0.3528769612312317,
	"learning_rate": 9.526717557251908e-05,
	"loss": 0.2086,
	"step": 1035
	},
	{
	"epoch": 2.623574144486692,
	"grad_norm": 0.4795662760734558,
	"learning_rate": 9.516539440203562e-05,
	"loss": 0.2429,
	"step": 1036
	},
	{
	"epoch": 2.626108998732573,
	"grad_norm": 0.4627299904823303,
	"learning_rate": 9.506361323155216e-05,
	"loss": 0.1956,
	"step": 1037
	},
	{
	"epoch": 2.6286438529784535,
	"grad_norm": 0.3330387473106384,
	"learning_rate": 9.496183206106871e-05,
	"loss": 0.1891,
	"step": 1038
	},
	{
	"epoch": 2.6311787072243344,
	"grad_norm": 0.4265390634536743,
	"learning_rate": 9.486005089058525e-05,
	"loss": 0.2086,
	"step": 1039
	},
	{
	"epoch": 2.6337135614702154,
	"grad_norm": 0.37214142084121704,
	"learning_rate": 9.475826972010179e-05,
	"loss": 0.2321,
	"step": 1040
	},
	{
	"epoch": 2.6362484157160964,
	"grad_norm": 0.4183201491832733,
	"learning_rate": 9.465648854961833e-05,
	"loss": 0.2029,
	"step": 1041
	},
	{
	"epoch": 2.6387832699619773,
	"grad_norm": 0.5688794851303101,
	"learning_rate": 9.455470737913487e-05,
	"loss": 0.2481,
	"step": 1042
	},
	{
	"epoch": 2.6413181242078583,
	"grad_norm": 0.38355833292007446,
	"learning_rate": 9.445292620865141e-05,
	"loss": 0.1989,
	"step": 1043
	},
	{
	"epoch": 2.643852978453739,
	"grad_norm": 0.4998534023761749,
	"learning_rate": 9.435114503816794e-05,
	"loss": 0.2272,
	"step": 1044
	},
	{
	"epoch": 2.6463878326996197,
	"grad_norm": 0.2796792685985565,
	"learning_rate": 9.424936386768448e-05,
	"loss": 0.1694,
	"step": 1045
	},
	{
	"epoch": 2.6489226869455007,
	"grad_norm": 0.30551543831825256,
	"learning_rate": 9.414758269720102e-05,
	"loss": 0.1782,
	"step": 1046
	},
	{
	"epoch": 2.6514575411913817,
	"grad_norm": 0.3933429718017578,
	"learning_rate": 9.404580152671755e-05,
	"loss": 0.272,
	"step": 1047
	},
	{
	"epoch": 2.653992395437262,
	"grad_norm": 0.3543720841407776,
	"learning_rate": 9.39440203562341e-05,
	"loss": 0.2271,
	"step": 1048
	},
	{
	"epoch": 2.656527249683143,
	"grad_norm": 0.2716831564903259,
	"learning_rate": 9.384223918575063e-05,
	"loss": 0.1898,
	"step": 1049
	},
	{
	"epoch": 2.659062103929024,
	"grad_norm": 0.3037743866443634,
	"learning_rate": 9.374045801526719e-05,
	"loss": 0.1911,
	"step": 1050
	},
	{
	"epoch": 2.661596958174905,
	"grad_norm": 0.4390093982219696,
	"learning_rate": 9.363867684478373e-05,
	"loss": 0.2369,
	"step": 1051
	},
	{
	"epoch": 2.664131812420786,
	"grad_norm": 0.3383953273296356,
	"learning_rate": 9.353689567430026e-05,
	"loss": 0.2519,
	"step": 1052
	},
	{
	"epoch": 2.6666666666666665,
	"grad_norm": 0.28227975964546204,
	"learning_rate": 9.34351145038168e-05,
	"loss": 0.1926,
	"step": 1053
	},
	{
	"epoch": 2.6692015209125475,
	"grad_norm": 0.33451253175735474,
	"learning_rate": 9.333333333333334e-05,
	"loss": 0.1864,
	"step": 1054
	},
	{
	"epoch": 2.6717363751584284,
	"grad_norm": 0.4116145372390747,
	"learning_rate": 9.323155216284988e-05,
	"loss": 0.2462,
	"step": 1055
	},
	{
	"epoch": 2.6742712294043094,
	"grad_norm": 0.43822887539863586,
	"learning_rate": 9.312977099236642e-05,
	"loss": 0.2014,
	"step": 1056
	},
	{
	"epoch": 2.67680608365019,
	"grad_norm": 0.4394984841346741,
	"learning_rate": 9.302798982188296e-05,
	"loss": 0.2378,
	"step": 1057
	},
	{
	"epoch": 2.679340937896071,
	"grad_norm": 0.4073251783847809,
	"learning_rate": 9.292620865139949e-05,
	"loss": 0.2711,
	"step": 1058
	},
	{
	"epoch": 2.681875792141952,
	"grad_norm": 0.3316657841205597,
	"learning_rate": 9.282442748091603e-05,
	"loss": 0.214,
	"step": 1059
	},
	{
	"epoch": 2.6844106463878328,
	"grad_norm": 0.2994216978549957,
	"learning_rate": 9.272264631043257e-05,
	"loss": 0.1838,
	"step": 1060
	},
	{
	"epoch": 2.6869455006337137,
	"grad_norm": 0.5388765335083008,
	"learning_rate": 9.26208651399491e-05,
	"loss": 0.277,
	"step": 1061
	},
	{
	"epoch": 2.6894803548795947,
	"grad_norm": 0.3714945912361145,
	"learning_rate": 9.251908396946566e-05,
	"loss": 0.2428,
	"step": 1062
	},
	{
	"epoch": 2.692015209125475,
	"grad_norm": 0.32202383875846863,
	"learning_rate": 9.24173027989822e-05,
	"loss": 0.2063,
	"step": 1063
	},
	{
	"epoch": 2.694550063371356,
	"grad_norm": 0.4116881191730499,
	"learning_rate": 9.231552162849874e-05,
	"loss": 0.2661,
	"step": 1064
	},
	{
	"epoch": 2.697084917617237,
	"grad_norm": 0.36626386642456055,
	"learning_rate": 9.221374045801528e-05,
	"loss": 0.2897,
	"step": 1065
	},
	{
	"epoch": 2.6996197718631176,
	"grad_norm": 0.33859655261039734,
	"learning_rate": 9.211195928753181e-05,
	"loss": 0.1959,
	"step": 1066
	},
	{
	"epoch": 2.7021546261089986,
	"grad_norm": 0.38263705372810364,
	"learning_rate": 9.201017811704835e-05,
	"loss": 0.2827,
	"step": 1067
	},
	{
	"epoch": 2.7046894803548795,
	"grad_norm": 0.3557961583137512,
	"learning_rate": 9.19083969465649e-05,
	"loss": 0.176,
	"step": 1068
	},
	{
	"epoch": 2.7072243346007605,
	"grad_norm": 0.35334861278533936,
	"learning_rate": 9.180661577608143e-05,
	"loss": 0.2183,
	"step": 1069
	},
	{
	"epoch": 2.7097591888466415,
	"grad_norm": 0.4672026038169861,
	"learning_rate": 9.170483460559797e-05,
	"loss": 0.2715,
	"step": 1070
	},
	{
	"epoch": 2.7122940430925224,
	"grad_norm": 0.41585099697113037,
	"learning_rate": 9.160305343511451e-05,
	"loss": 0.1912,
	"step": 1071
	},
	{
	"epoch": 2.714828897338403,
	"grad_norm": 0.54674232006073,
	"learning_rate": 9.150127226463104e-05,
	"loss": 0.2493,
	"step": 1072
	},
	{
	"epoch": 2.717363751584284,
	"grad_norm": 0.30595988035202026,
	"learning_rate": 9.139949109414758e-05,
	"loss": 0.1843,
	"step": 1073
	},
	{
	"epoch": 2.719898605830165,
	"grad_norm": 0.3521415889263153,
	"learning_rate": 9.129770992366413e-05,
	"loss": 0.2047,
	"step": 1074
	},
	{
	"epoch": 2.7224334600760454,
	"grad_norm": 0.47393590211868286,
	"learning_rate": 9.119592875318067e-05,
	"loss": 0.3398,
	"step": 1075
	},
	{
	"epoch": 2.7249683143219263,
	"grad_norm": 0.4672793745994568,
	"learning_rate": 9.109414758269721e-05,
	"loss": 0.3569,
	"step": 1076
	},
	{
	"epoch": 2.7275031685678073,
	"grad_norm": 0.41231435537338257,
	"learning_rate": 9.099236641221375e-05,
	"loss": 0.2323,
	"step": 1077
	},
	{
	"epoch": 2.7300380228136882,
	"grad_norm": 0.36700156331062317,
	"learning_rate": 9.089058524173029e-05,
	"loss": 0.2023,
	"step": 1078
	},
	{
	"epoch": 2.732572877059569,
	"grad_norm": 0.32198184728622437,
	"learning_rate": 9.078880407124683e-05,
	"loss": 0.1814,
	"step": 1079
	},
	{
	"epoch": 2.73510773130545,
	"grad_norm": 0.46826303005218506,
	"learning_rate": 9.068702290076337e-05,
	"loss": 0.2216,
	"step": 1080
	},
	{
	"epoch": 2.7376425855513307,
	"grad_norm": 0.3026100695133209,
	"learning_rate": 9.05852417302799e-05,
	"loss": 0.1826,
	"step": 1081
	},
	{
	"epoch": 2.7401774397972116,
	"grad_norm": 0.2897210717201233,
	"learning_rate": 9.048346055979644e-05,
	"loss": 0.1853,
	"step": 1082
	},
	{
	"epoch": 2.7427122940430926,
	"grad_norm": 0.296286940574646,
	"learning_rate": 9.038167938931298e-05,
	"loss": 0.1776,
	"step": 1083
	},
	{
	"epoch": 2.7452471482889735,
	"grad_norm": 0.374600887298584,
	"learning_rate": 9.027989821882952e-05,
	"loss": 0.2031,
	"step": 1084
	},
	{
	"epoch": 2.747782002534854,
	"grad_norm": 0.5333495140075684,
	"learning_rate": 9.017811704834606e-05,
	"loss": 0.2798,
	"step": 1085
	},
	{
	"epoch": 2.750316856780735,
	"grad_norm": 0.43342864513397217,
	"learning_rate": 9.007633587786259e-05,
	"loss": 0.2063,
	"step": 1086
	},
	{
	"epoch": 2.752851711026616,
	"grad_norm": 0.5283639430999756,
	"learning_rate": 8.997455470737914e-05,
	"loss": 0.25,
	"step": 1087
	},
	{
	"epoch": 2.755386565272497,
	"grad_norm": 0.556190013885498,
	"learning_rate": 8.987277353689568e-05,
	"loss": 0.2044,
	"step": 1088
	},
	{
	"epoch": 2.757921419518378,
	"grad_norm": 0.35083258152008057,
	"learning_rate": 8.977099236641222e-05,
	"loss": 0.188,
	"step": 1089
	},
	{
	"epoch": 2.7604562737642584,
	"grad_norm": 0.42917102575302124,
	"learning_rate": 8.966921119592876e-05,
	"loss": 0.2511,
	"step": 1090
	},
	{
	"epoch": 2.7629911280101394,
	"grad_norm": 0.5665780305862427,
	"learning_rate": 8.95674300254453e-05,
	"loss": 0.3307,
	"step": 1091
	},
	{
	"epoch": 2.7655259822560203,
	"grad_norm": 0.40193435549736023,
	"learning_rate": 8.946564885496184e-05,
	"loss": 0.2453,
	"step": 1092
	},
	{
	"epoch": 2.7680608365019013,
	"grad_norm": 0.46344733238220215,
	"learning_rate": 8.936386768447838e-05,
	"loss": 0.2096,
	"step": 1093
	},
	{
	"epoch": 2.770595690747782,
	"grad_norm": 0.4600921869277954,
	"learning_rate": 8.926208651399492e-05,
	"loss": 0.2161,
	"step": 1094
	},
	{
	"epoch": 2.7731305449936627,
	"grad_norm": 0.46053385734558105,
	"learning_rate": 8.916030534351145e-05,
	"loss": 0.2369,
	"step": 1095
	},
	{
	"epoch": 2.7756653992395437,
	"grad_norm": 0.45449280738830566,
	"learning_rate": 8.9058524173028e-05,
	"loss": 0.2344,
	"step": 1096
	},
	{
	"epoch": 2.7782002534854247,
	"grad_norm": 0.39411383867263794,
	"learning_rate": 8.895674300254453e-05,
	"loss": 0.2082,
	"step": 1097
	},
	{
	"epoch": 2.7807351077313056,
	"grad_norm": 0.38967519998550415,
	"learning_rate": 8.885496183206107e-05,
	"loss": 0.2264,
	"step": 1098
	},
	{
	"epoch": 2.7832699619771866,
	"grad_norm": 0.3357069194316864,
	"learning_rate": 8.875318066157761e-05,
	"loss": 0.1896,
	"step": 1099
	},
	{
	"epoch": 2.785804816223067,
	"grad_norm": 0.4941220283508301,
	"learning_rate": 8.865139949109415e-05,
	"loss": 0.3003,
	"step": 1100
	},
	{
	"epoch": 2.788339670468948,
	"grad_norm": 0.3897833526134491,
	"learning_rate": 8.854961832061069e-05,
	"loss": 0.1907,
	"step": 1101
	},
	{
	"epoch": 2.790874524714829,
	"grad_norm": 0.4247800409793854,
	"learning_rate": 8.844783715012723e-05,
	"loss": 0.1843,
	"step": 1102
	},
	{
	"epoch": 2.7934093789607095,
	"grad_norm": 0.46850237250328064,
	"learning_rate": 8.834605597964377e-05,
	"loss": 0.2501,
	"step": 1103
	},
	{
	"epoch": 2.7959442332065905,
	"grad_norm": 0.4753093421459198,
	"learning_rate": 8.824427480916031e-05,
	"loss": 0.2277,
	"step": 1104
	},
	{
	"epoch": 2.7984790874524714,
	"grad_norm": 0.3235141932964325,
	"learning_rate": 8.814249363867685e-05,
	"loss": 0.1817,
	"step": 1105
	},
	{
	"epoch": 2.8010139416983524,
	"grad_norm": 0.48403674364089966,
	"learning_rate": 8.804071246819339e-05,
	"loss": 0.2278,
	"step": 1106
	},
	{
	"epoch": 2.8035487959442333,
	"grad_norm": 0.30417025089263916,
	"learning_rate": 8.793893129770993e-05,
	"loss": 0.1867,
	"step": 1107
	},
	{
	"epoch": 2.8060836501901143,
	"grad_norm": 0.30289140343666077,
	"learning_rate": 8.783715012722647e-05,
	"loss": 0.1898,
	"step": 1108
	},
	{
	"epoch": 2.808618504435995,
	"grad_norm": 0.47156116366386414,
	"learning_rate": 8.7735368956743e-05,
	"loss": 0.2381,
	"step": 1109
	},
	{
	"epoch": 2.8111533586818758,
	"grad_norm": 0.4420924186706543,
	"learning_rate": 8.763358778625954e-05,
	"loss": 0.251,
	"step": 1110
	},
	{
	"epoch": 2.8136882129277567,
	"grad_norm": 0.42235851287841797,
	"learning_rate": 8.75318066157761e-05,
	"loss": 0.2007,
	"step": 1111
	},
	{
	"epoch": 2.8162230671736372,
	"grad_norm": 0.40069061517715454,
	"learning_rate": 8.743002544529262e-05,
	"loss": 0.2052,
	"step": 1112
	},
	{
	"epoch": 2.818757921419518,
	"grad_norm": 0.5213333368301392,
	"learning_rate": 8.732824427480916e-05,
	"loss": 0.2236,
	"step": 1113
	},
	{
	"epoch": 2.821292775665399,
	"grad_norm": 0.3919121026992798,
	"learning_rate": 8.72264631043257e-05,
	"loss": 0.2338,
	"step": 1114
	},
	{
	"epoch": 2.82382762991128,
	"grad_norm": 0.4295049011707306,
	"learning_rate": 8.712468193384224e-05,
	"loss": 0.2713,
	"step": 1115
	},
	{
	"epoch": 2.826362484157161,
	"grad_norm": 0.25834596157073975,
	"learning_rate": 8.702290076335878e-05,
	"loss": 0.1701,
	"step": 1116
	},
	{
	"epoch": 2.828897338403042,
	"grad_norm": 0.36217084527015686,
	"learning_rate": 8.692111959287532e-05,
	"loss": 0.1963,
	"step": 1117
	},
	{
	"epoch": 2.8314321926489225,
	"grad_norm": 0.39089757204055786,
	"learning_rate": 8.681933842239186e-05,
	"loss": 0.186,
	"step": 1118
	},
	{
	"epoch": 2.8339670468948035,
	"grad_norm": 0.45900896191596985,
	"learning_rate": 8.67175572519084e-05,
	"loss": 0.22,
	"step": 1119
	},
	{
	"epoch": 2.8365019011406845,
	"grad_norm": 0.2946614623069763,
	"learning_rate": 8.661577608142494e-05,
	"loss": 0.1771,
	"step": 1120
	},
	{
	"epoch": 2.8390367553865654,
	"grad_norm": 0.4160090982913971,
	"learning_rate": 8.651399491094148e-05,
	"loss": 0.2083,
	"step": 1121
	},
	{
	"epoch": 2.841571609632446,
	"grad_norm": 0.43507587909698486,
	"learning_rate": 8.641221374045802e-05,
	"loss": 0.2595,
	"step": 1122
	},
	{
	"epoch": 2.844106463878327,
	"grad_norm": 0.449813574552536,
	"learning_rate": 8.631043256997457e-05,
	"loss": 0.2982,
	"step": 1123
	},
	{
	"epoch": 2.846641318124208,
	"grad_norm": 0.33715054392814636,
	"learning_rate": 8.620865139949111e-05,
	"loss": 0.1851,
	"step": 1124
	},
	{
	"epoch": 2.849176172370089,
	"grad_norm": 0.4767422676086426,
	"learning_rate": 8.610687022900765e-05,
	"loss": 0.2865,
	"step": 1125
	},
	{
	"epoch": 2.8517110266159698,
	"grad_norm": 0.4232870042324066,
	"learning_rate": 8.600508905852417e-05,
	"loss": 0.2355,
	"step": 1126
	},
	{
	"epoch": 2.8542458808618507,
	"grad_norm": 0.286565363407135,
	"learning_rate": 8.590330788804071e-05,
	"loss": 0.188,
	"step": 1127
	},
	{
	"epoch": 2.8567807351077312,
	"grad_norm": 0.304606169462204,
	"learning_rate": 8.580152671755725e-05,
	"loss": 0.2367,
	"step": 1128
	},
	{
	"epoch": 2.859315589353612,
	"grad_norm": 0.4730917811393738,
	"learning_rate": 8.569974554707379e-05,
	"loss": 0.2925,
	"step": 1129
	},
	{
	"epoch": 2.861850443599493,
	"grad_norm": 0.348651647567749,
	"learning_rate": 8.559796437659033e-05,
	"loss": 0.242,
	"step": 1130
	},
	{
	"epoch": 2.8643852978453737,
	"grad_norm": 0.31156882643699646,
	"learning_rate": 8.549618320610687e-05,
	"loss": 0.1865,
	"step": 1131
	},
	{
	"epoch": 2.8669201520912546,
	"grad_norm": 0.4416813254356384,
	"learning_rate": 8.539440203562341e-05,
	"loss": 0.311,
	"step": 1132
	},
	{
	"epoch": 2.8694550063371356,
	"grad_norm": 0.2997666895389557,
	"learning_rate": 8.529262086513995e-05,
	"loss": 0.1956,
	"step": 1133
	},
	{
	"epoch": 2.8719898605830165,
	"grad_norm": 0.30020904541015625,
	"learning_rate": 8.519083969465649e-05,
	"loss": 0.206,
	"step": 1134
	},
	{
	"epoch": 2.8745247148288975,
	"grad_norm": 0.4457029104232788,
	"learning_rate": 8.508905852417304e-05,
	"loss": 0.2422,
	"step": 1135
	},
	{
	"epoch": 2.8770595690747784,
	"grad_norm": 0.3519587218761444,
	"learning_rate": 8.498727735368958e-05,
	"loss": 0.2277,
	"step": 1136
	},
	{
	"epoch": 2.879594423320659,
	"grad_norm": 0.3482111394405365,
	"learning_rate": 8.488549618320612e-05,
	"loss": 0.1981,
	"step": 1137
	},
	{
	"epoch": 2.88212927756654,
	"grad_norm": 0.31978392601013184,
	"learning_rate": 8.478371501272266e-05,
	"loss": 0.1849,
	"step": 1138
	},
	{
	"epoch": 2.884664131812421,
	"grad_norm": 0.2380414754152298,
	"learning_rate": 8.46819338422392e-05,
	"loss": 0.1619,
	"step": 1139
	},
	{
	"epoch": 2.8871989860583014,
	"grad_norm": 0.25577735900878906,
	"learning_rate": 8.458015267175572e-05,
	"loss": 0.1594,
	"step": 1140
	},
	{
	"epoch": 2.8897338403041823,
	"grad_norm": 0.36093661189079285,
	"learning_rate": 8.447837150127226e-05,
	"loss": 0.1937,
	"step": 1141
	},
	{
	"epoch": 2.8922686945500633,
	"grad_norm": 0.3542689085006714,
	"learning_rate": 8.43765903307888e-05,
	"loss": 0.2219,
	"step": 1142
	},
	{
	"epoch": 2.8948035487959443,
	"grad_norm": 0.3966139853000641,
	"learning_rate": 8.427480916030534e-05,
	"loss": 0.2427,
	"step": 1143
	},
	{
	"epoch": 2.897338403041825,
	"grad_norm": 0.3684738278388977,
	"learning_rate": 8.417302798982188e-05,
	"loss": 0.2093,
	"step": 1144
	},
	{
	"epoch": 2.899873257287706,
	"grad_norm": 0.430477499961853,
	"learning_rate": 8.407124681933842e-05,
	"loss": 0.2266,
	"step": 1145
	},
	{
	"epoch": 2.9024081115335867,
	"grad_norm": 0.32896652817726135,
	"learning_rate": 8.396946564885496e-05,
	"loss": 0.2447,
	"step": 1146
	},
	{
	"epoch": 2.9049429657794676,
	"grad_norm": 0.45568832755088806,
	"learning_rate": 8.38676844783715e-05,
	"loss": 0.2251,
	"step": 1147
	},
	{
	"epoch": 2.9074778200253486,
	"grad_norm": 0.48290732502937317,
	"learning_rate": 8.376590330788805e-05,
	"loss": 0.2471,
	"step": 1148
	},
	{
	"epoch": 2.9100126742712296,
	"grad_norm": 0.40795937180519104,
	"learning_rate": 8.366412213740459e-05,
	"loss": 0.2031,
	"step": 1149
	},
	{
	"epoch": 2.91254752851711,
	"grad_norm": 0.362835168838501,
	"learning_rate": 8.356234096692113e-05,
	"loss": 0.1991,
	"step": 1150
	},
	{
	"epoch": 2.915082382762991,
	"grad_norm": 0.38601744174957275,
	"learning_rate": 8.346055979643767e-05,
	"loss": 0.1821,
	"step": 1151
	},
	{
	"epoch": 2.917617237008872,
	"grad_norm": 0.2641182541847229,
	"learning_rate": 8.335877862595421e-05,
	"loss": 0.16,
	"step": 1152
	},
	{
	"epoch": 2.920152091254753,
	"grad_norm": 0.5600478053092957,
	"learning_rate": 8.325699745547075e-05,
	"loss": 0.2476,
	"step": 1153
	},
	{
	"epoch": 2.922686945500634,
	"grad_norm": 0.3873019516468048,
	"learning_rate": 8.315521628498727e-05,
	"loss": 0.2264,
	"step": 1154
	},
	{
	"epoch": 2.9252217997465144,
	"grad_norm": 0.2946743667125702,
	"learning_rate": 8.305343511450381e-05,
	"loss": 0.1776,
	"step": 1155
	},
	{
	"epoch": 2.9277566539923954,
	"grad_norm": 0.3886416554450989,
	"learning_rate": 8.295165394402035e-05,
	"loss": 0.2123,
	"step": 1156
	},
	{
	"epoch": 2.9302915082382763,
	"grad_norm": 0.39706671237945557,
	"learning_rate": 8.284987277353689e-05,
	"loss": 0.2319,
	"step": 1157
	},
	{
	"epoch": 2.9328263624841573,
	"grad_norm": 0.30693602561950684,
	"learning_rate": 8.274809160305343e-05,
	"loss": 0.1939,
	"step": 1158
	},
	{
	"epoch": 2.935361216730038,
	"grad_norm": 0.37277474999427795,
	"learning_rate": 8.264631043256997e-05,
	"loss": 0.2194,
	"step": 1159
	},
	{
	"epoch": 2.9378960709759188,
	"grad_norm": 0.442508727312088,
	"learning_rate": 8.254452926208652e-05,
	"loss": 0.2142,
	"step": 1160
	},
	{
	"epoch": 2.9404309252217997,
	"grad_norm": 0.275898814201355,
	"learning_rate": 8.244274809160306e-05,
	"loss": 0.1791,
	"step": 1161
	},
	{
	"epoch": 2.9429657794676807,
	"grad_norm": 0.4033918082714081,
	"learning_rate": 8.23409669211196e-05,
	"loss": 0.295,
	"step": 1162
	},
	{
	"epoch": 2.9455006337135616,
	"grad_norm": 0.46713244915008545,
	"learning_rate": 8.223918575063614e-05,
	"loss": 0.2662,
	"step": 1163
	},
	{
	"epoch": 2.9480354879594426,
	"grad_norm": 0.37975406646728516,
	"learning_rate": 8.213740458015268e-05,
	"loss": 0.1915,
	"step": 1164
	},
	{
	"epoch": 2.950570342205323,
	"grad_norm": 0.31382545828819275,
	"learning_rate": 8.203562340966922e-05,
	"loss": 0.1793,
	"step": 1165
	},
	{
	"epoch": 2.953105196451204,
	"grad_norm": 0.42415499687194824,
	"learning_rate": 8.193384223918576e-05,
	"loss": 0.2375,
	"step": 1166
	},
	{
	"epoch": 2.955640050697085,
	"grad_norm": 0.4227803647518158,
	"learning_rate": 8.18320610687023e-05,
	"loss": 0.213,
	"step": 1167
	},
	{
	"epoch": 2.9581749049429655,
	"grad_norm": 0.3395853638648987,
	"learning_rate": 8.173027989821882e-05,
	"loss": 0.1942,
	"step": 1168
	},
	{
	"epoch": 2.9607097591888465,
	"grad_norm": 0.4627746641635895,
	"learning_rate": 8.162849872773536e-05,
	"loss": 0.2266,
	"step": 1169
	},
	{
	"epoch": 2.9632446134347274,
	"grad_norm": 0.36325398087501526,
	"learning_rate": 8.15267175572519e-05,
	"loss": 0.2176,
	"step": 1170
	},
	{
	"epoch": 2.9657794676806084,
	"grad_norm": 0.4188767671585083,
	"learning_rate": 8.142493638676844e-05,
	"loss": 0.1992,
	"step": 1171
	},
	{
	"epoch": 2.9683143219264894,
	"grad_norm": 0.3149709403514862,
	"learning_rate": 8.1323155216285e-05,
	"loss": 0.1829,
	"step": 1172
	},
	{
	"epoch": 2.9708491761723703,
	"grad_norm": 0.26542145013809204,
	"learning_rate": 8.122137404580153e-05,
	"loss": 0.1801,
	"step": 1173
	},
	{
	"epoch": 2.973384030418251,
	"grad_norm": 0.28748998045921326,
	"learning_rate": 8.111959287531807e-05,
	"loss": 0.1764,
	"step": 1174
	},
	{
	"epoch": 2.975918884664132,
	"grad_norm": 0.3103797733783722,
	"learning_rate": 8.101781170483461e-05,
	"loss": 0.2047,
	"step": 1175
	},
	{
	"epoch": 2.9784537389100127,
	"grad_norm": 0.3357256054878235,
	"learning_rate": 8.091603053435115e-05,
	"loss": 0.2303,
	"step": 1176
	},
	{
	"epoch": 2.9809885931558933,
	"grad_norm": 0.4399915933609009,
	"learning_rate": 8.081424936386769e-05,
	"loss": 0.2423,
	"step": 1177
	},
	{
	"epoch": 2.983523447401774,
	"grad_norm": 0.3486070930957794,
	"learning_rate": 8.071246819338423e-05,
	"loss": 0.19,
	"step": 1178
	},
	{
	"epoch": 2.986058301647655,
	"grad_norm": 0.33286648988723755,
	"learning_rate": 8.061068702290077e-05,
	"loss": 0.1788,
	"step": 1179
	},
	{
	"epoch": 2.988593155893536,
	"grad_norm": 0.2841028571128845,
	"learning_rate": 8.050890585241731e-05,
	"loss": 0.167,
	"step": 1180
	},
	{
	"epoch": 2.991128010139417,
	"grad_norm": 0.44933149218559265,
	"learning_rate": 8.040712468193385e-05,
	"loss": 0.3098,
	"step": 1181
	},
	{
	"epoch": 2.993662864385298,
	"grad_norm": 0.2849741280078888,
	"learning_rate": 8.030534351145038e-05,
	"loss": 0.1896,
	"step": 1182
	},
	{
	"epoch": 2.9961977186311786,
	"grad_norm": 0.39720216393470764,
	"learning_rate": 8.020356234096691e-05,
	"loss": 0.2426,
	"step": 1183
	},
	{
	"epoch": 2.9987325728770595,
	"grad_norm": 0.3838231563568115,
	"learning_rate": 8.010178117048347e-05,
	"loss": 0.2194,
	"step": 1184
	},
	{
	"epoch": 3.0,
	"grad_norm": 0.6684709787368774,
	"learning_rate": 8e-05,
	"loss": 0.2783,
	"step": 1185
	},
	{
	"epoch": 3.002534854245881,
	"grad_norm": 0.44380757212638855,
	"learning_rate": 7.989821882951655e-05,
	"loss": 0.2938,
	"step": 1186
	},
	{
	"epoch": 3.005069708491762,
	"grad_norm": 0.4787996709346771,
	"learning_rate": 7.979643765903309e-05,
	"loss": 0.2998,
	"step": 1187
	},
	{
	"epoch": 3.0076045627376424,
	"grad_norm": 0.36355340480804443,
	"learning_rate": 7.969465648854962e-05,
	"loss": 0.1555,
	"step": 1188
	},
	{
	"epoch": 3.0101394169835234,
	"grad_norm": 0.37890535593032837,
	"learning_rate": 7.959287531806616e-05,
	"loss": 0.1743,
	"step": 1189
	},
	{
	"epoch": 3.0126742712294043,
	"grad_norm": 0.4317542612552643,
	"learning_rate": 7.94910941475827e-05,
	"loss": 0.1891,
	"step": 1190
	},
	{
	"epoch": 3.0152091254752853,
	"grad_norm": 0.3477863669395447,
	"learning_rate": 7.938931297709924e-05,
	"loss": 0.1576,
	"step": 1191
	},
	{
	"epoch": 3.017743979721166,
	"grad_norm": 0.414050817489624,
	"learning_rate": 7.928753180661578e-05,
	"loss": 0.2014,
	"step": 1192
	},
	{
	"epoch": 3.0202788339670468,
	"grad_norm": 0.3596842288970947,
	"learning_rate": 7.918575063613232e-05,
	"loss": 0.1482,
	"step": 1193
	},
	{
	"epoch": 3.0228136882129277,
	"grad_norm": 0.49169921875,
	"learning_rate": 7.908396946564886e-05,
	"loss": 0.1686,
	"step": 1194
	},
	{
	"epoch": 3.0253485424588087,
	"grad_norm": 0.44806674122810364,
	"learning_rate": 7.89821882951654e-05,
	"loss": 0.2044,
	"step": 1195
	},
	{
	"epoch": 3.0278833967046896,
	"grad_norm": 0.43101197481155396,
	"learning_rate": 7.888040712468194e-05,
	"loss": 0.1911,
	"step": 1196
	},
	{
	"epoch": 3.03041825095057,
	"grad_norm": 0.5595632195472717,
	"learning_rate": 7.877862595419848e-05,
	"loss": 0.1823,
	"step": 1197
	},
	{
	"epoch": 3.032953105196451,
	"grad_norm": 0.5024780035018921,
	"learning_rate": 7.867684478371502e-05,
	"loss": 0.1789,
	"step": 1198
	},
	{
	"epoch": 3.035487959442332,
	"grad_norm": 0.4227488934993744,
	"learning_rate": 7.857506361323156e-05,
	"loss": 0.1539,
	"step": 1199
	},
	{
	"epoch": 3.038022813688213,
	"grad_norm": 0.43486127257347107,
	"learning_rate": 7.84732824427481e-05,
	"loss": 0.1577,
	"step": 1200
	},
	{
	"epoch": 3.040557667934094,
	"grad_norm": 0.47951167821884155,
	"learning_rate": 7.837150127226464e-05,
	"loss": 0.1975,
	"step": 1201
	},
	{
	"epoch": 3.0430925221799745,
	"grad_norm": 0.4223075211048126,
	"learning_rate": 7.826972010178117e-05,
	"loss": 0.1719,
	"step": 1202
	},
	{
	"epoch": 3.0456273764258555,
	"grad_norm": 0.6699900031089783,
	"learning_rate": 7.816793893129771e-05,
	"loss": 0.2139,
	"step": 1203
	},
	{
	"epoch": 3.0481622306717364,
	"grad_norm": 0.6038373708724976,
	"learning_rate": 7.806615776081425e-05,
	"loss": 0.2163,
	"step": 1204
	},
	{
	"epoch": 3.0506970849176174,
	"grad_norm": 0.530208945274353,
	"learning_rate": 7.796437659033079e-05,
	"loss": 0.1482,
	"step": 1205
	},
	{
	"epoch": 3.053231939163498,
	"grad_norm": 0.6380701661109924,
	"learning_rate": 7.786259541984733e-05,
	"loss": 0.2191,
	"step": 1206
	},
	{
	"epoch": 3.055766793409379,
	"grad_norm": 0.6455860137939453,
	"learning_rate": 7.776081424936387e-05,
	"loss": 0.1812,
	"step": 1207
	},
	{
	"epoch": 3.05830164765526,
	"grad_norm": 0.5198556184768677,
	"learning_rate": 7.765903307888041e-05,
	"loss": 0.1602,
	"step": 1208
	},
	{
	"epoch": 3.0608365019011408,
	"grad_norm": 0.4842750132083893,
	"learning_rate": 7.755725190839695e-05,
	"loss": 0.1739,
	"step": 1209
	},
	{
	"epoch": 3.0633713561470217,
	"grad_norm": 0.6345165371894836,
	"learning_rate": 7.745547073791349e-05,
	"loss": 0.1841,
	"step": 1210
	},
	{
	"epoch": 3.0659062103929022,
	"grad_norm": 0.551673173904419,
	"learning_rate": 7.735368956743003e-05,
	"loss": 0.1755,
	"step": 1211
	},
	{
	"epoch": 3.068441064638783,
	"grad_norm": 0.5332705974578857,
	"learning_rate": 7.725190839694657e-05,
	"loss": 0.2175,
	"step": 1212
	},
	{
	"epoch": 3.070975918884664,
	"grad_norm": 0.6630911231040955,
	"learning_rate": 7.715012722646311e-05,
	"loss": 0.2868,
	"step": 1213
	},
	{
	"epoch": 3.073510773130545,
	"grad_norm": 0.42508792877197266,
	"learning_rate": 7.704834605597965e-05,
	"loss": 0.1811,
	"step": 1214
	},
	{
	"epoch": 3.076045627376426,
	"grad_norm": 0.504231870174408,
	"learning_rate": 7.694656488549619e-05,
	"loss": 0.1765,
	"step": 1215
	},
	{
	"epoch": 3.0785804816223066,
	"grad_norm": 0.39370813965797424,
	"learning_rate": 7.684478371501273e-05,
	"loss": 0.1739,
	"step": 1216
	},
	{
	"epoch": 3.0811153358681875,
	"grad_norm": 0.5411176085472107,
	"learning_rate": 7.674300254452926e-05,
	"loss": 0.2015,
	"step": 1217
	},
	{
	"epoch": 3.0836501901140685,
	"grad_norm": 0.58034348487854,
	"learning_rate": 7.66412213740458e-05,
	"loss": 0.2293,
	"step": 1218
	},
	{
	"epoch": 3.0861850443599494,
	"grad_norm": 0.48355352878570557,
	"learning_rate": 7.653944020356234e-05,
	"loss": 0.1858,
	"step": 1219
	},
	{
	"epoch": 3.08871989860583,
	"grad_norm": 0.3532313406467438,
	"learning_rate": 7.643765903307888e-05,
	"loss": 0.1689,
	"step": 1220
	},
	{
	"epoch": 3.091254752851711,
	"grad_norm": 0.36245197057724,
	"learning_rate": 7.633587786259542e-05,
	"loss": 0.1744,
	"step": 1221
	},
	{
	"epoch": 3.093789607097592,
	"grad_norm": 0.4752829372882843,
	"learning_rate": 7.623409669211196e-05,
	"loss": 0.1733,
	"step": 1222
	},
	{
	"epoch": 3.096324461343473,
	"grad_norm": 0.3701539933681488,
	"learning_rate": 7.61323155216285e-05,
	"loss": 0.158,
	"step": 1223
	},
	{
	"epoch": 3.098859315589354,
	"grad_norm": 0.45548319816589355,
	"learning_rate": 7.603053435114504e-05,
	"loss": 0.1822,
	"step": 1224
	},
	{
	"epoch": 3.1013941698352343,
	"grad_norm": 0.376499205827713,
	"learning_rate": 7.592875318066158e-05,
	"loss": 0.1613,
	"step": 1225
	},
	{
	"epoch": 3.1039290240811153,
	"grad_norm": 0.4430786967277527,
	"learning_rate": 7.582697201017812e-05,
	"loss": 0.1691,
	"step": 1226
	},
	{
	"epoch": 3.106463878326996,
	"grad_norm": 0.44311538338661194,
	"learning_rate": 7.572519083969466e-05,
	"loss": 0.1853,
	"step": 1227
	},
	{
	"epoch": 3.108998732572877,
	"grad_norm": 0.5815149545669556,
	"learning_rate": 7.56234096692112e-05,
	"loss": 0.2039,
	"step": 1228
	},
	{
	"epoch": 3.111533586818758,
	"grad_norm": 0.5101373195648193,
	"learning_rate": 7.552162849872774e-05,
	"loss": 0.2022,
	"step": 1229
	},
	{
	"epoch": 3.1140684410646386,
	"grad_norm": 0.6038093566894531,
	"learning_rate": 7.541984732824428e-05,
	"loss": 0.1859,
	"step": 1230
	},
	{
	"epoch": 3.1166032953105196,
	"grad_norm": 0.5133914351463318,
	"learning_rate": 7.531806615776081e-05,
	"loss": 0.1626,
	"step": 1231
	},
	{
	"epoch": 3.1191381495564006,
	"grad_norm": 0.40495821833610535,
	"learning_rate": 7.521628498727735e-05,
	"loss": 0.1739,
	"step": 1232
	},
	{
	"epoch": 3.1216730038022815,
	"grad_norm": 0.6585063934326172,
	"learning_rate": 7.511450381679391e-05,
	"loss": 0.2402,
	"step": 1233
	},
	{
	"epoch": 3.124207858048162,
	"grad_norm": 0.45598068833351135,
	"learning_rate": 7.501272264631045e-05,
	"loss": 0.1632,
	"step": 1234
	},
	{
	"epoch": 3.126742712294043,
	"grad_norm": 0.42114904522895813,
	"learning_rate": 7.491094147582699e-05,
	"loss": 0.1638,
	"step": 1235
	},
	{
	"epoch": 3.129277566539924,
	"grad_norm": 0.443198561668396,
	"learning_rate": 7.480916030534351e-05,
	"loss": 0.2148,
	"step": 1236
	},
	{
	"epoch": 3.131812420785805,
	"grad_norm": 0.5573143362998962,
	"learning_rate": 7.470737913486005e-05,
	"loss": 0.2219,
	"step": 1237
	},
	{
	"epoch": 3.134347275031686,
	"grad_norm": 0.6023311614990234,
	"learning_rate": 7.460559796437659e-05,
	"loss": 0.1987,
	"step": 1238
	},
	{
	"epoch": 3.1368821292775664,
	"grad_norm": 0.5282934904098511,
	"learning_rate": 7.450381679389313e-05,
	"loss": 0.2377,
	"step": 1239
	},
	{
	"epoch": 3.1394169835234473,
	"grad_norm": 0.49694669246673584,
	"learning_rate": 7.440203562340967e-05,
	"loss": 0.1804,
	"step": 1240
	},
	{
	"epoch": 3.1419518377693283,
	"grad_norm": 0.43045276403427124,
	"learning_rate": 7.430025445292621e-05,
	"loss": 0.1635,
	"step": 1241
	},
	{
	"epoch": 3.1444866920152093,
	"grad_norm": 0.4798453152179718,
	"learning_rate": 7.419847328244275e-05,
	"loss": 0.1696,
	"step": 1242
	},
	{
	"epoch": 3.14702154626109,
	"grad_norm": 0.5173293352127075,
	"learning_rate": 7.409669211195929e-05,
	"loss": 0.1802,
	"step": 1243
	},
	{
	"epoch": 3.1495564005069707,
	"grad_norm": 0.5398945808410645,
	"learning_rate": 7.399491094147583e-05,
	"loss": 0.1949,
	"step": 1244
	},
	{
	"epoch": 3.1520912547528517,
	"grad_norm": 0.5297830700874329,
	"learning_rate": 7.389312977099238e-05,
	"loss": 0.1987,
	"step": 1245
	},
	{
	"epoch": 3.1546261089987326,
	"grad_norm": 0.5320866703987122,
	"learning_rate": 7.379134860050892e-05,
	"loss": 0.1715,
	"step": 1246
	},
	{
	"epoch": 3.1571609632446136,
	"grad_norm": 0.6132882833480835,
	"learning_rate": 7.368956743002546e-05,
	"loss": 0.3204,
	"step": 1247
	},
	{
	"epoch": 3.159695817490494,
	"grad_norm": 0.4120640158653259,
	"learning_rate": 7.3587786259542e-05,
	"loss": 0.157,
	"step": 1248
	},
	{
	"epoch": 3.162230671736375,
	"grad_norm": 0.6765384674072266,
	"learning_rate": 7.348600508905854e-05,
	"loss": 0.2186,
	"step": 1249
	},
	{
	"epoch": 3.164765525982256,
	"grad_norm": 0.6318830847740173,
	"learning_rate": 7.338422391857506e-05,
	"loss": 0.2189,
	"step": 1250
	},
	{
	"epoch": 3.167300380228137,
	"grad_norm": 0.508305013179779,
	"learning_rate": 7.32824427480916e-05,
	"loss": 0.1962,
	"step": 1251
	},
	{
	"epoch": 3.169835234474018,
	"grad_norm": 0.603520393371582,
	"learning_rate": 7.318066157760814e-05,
	"loss": 0.2615,
	"step": 1252
	},
	{
	"epoch": 3.1723700887198985,
	"grad_norm": 0.7639157176017761,
	"learning_rate": 7.307888040712468e-05,
	"loss": 0.2982,
	"step": 1253
	},
	{
	"epoch": 3.1749049429657794,
	"grad_norm": 0.5995659232139587,
	"learning_rate": 7.297709923664122e-05,
	"loss": 0.2206,
	"step": 1254
	},
	{
	"epoch": 3.1774397972116604,
	"grad_norm": 0.6512479186058044,
	"learning_rate": 7.287531806615776e-05,
	"loss": 0.2065,
	"step": 1255
	},
	{
	"epoch": 3.1799746514575413,
	"grad_norm": 0.4128544330596924,
	"learning_rate": 7.27735368956743e-05,
	"loss": 0.1589,
	"step": 1256
	},
	{
	"epoch": 3.182509505703422,
	"grad_norm": 0.5341802835464478,
	"learning_rate": 7.267175572519084e-05,
	"loss": 0.1812,
	"step": 1257
	},
	{
	"epoch": 3.185044359949303,
	"grad_norm": 0.38032597303390503,
	"learning_rate": 7.256997455470739e-05,
	"loss": 0.1773,
	"step": 1258
	},
	{
	"epoch": 3.1875792141951838,
	"grad_norm": 0.5732728838920593,
	"learning_rate": 7.246819338422393e-05,
	"loss": 0.2047,
	"step": 1259
	},
	{
	"epoch": 3.1901140684410647,
	"grad_norm": 0.47396236658096313,
	"learning_rate": 7.236641221374047e-05,
	"loss": 0.2095,
	"step": 1260
	},
	{
	"epoch": 3.1926489226869457,
	"grad_norm": 0.4764629304409027,
	"learning_rate": 7.226463104325701e-05,
	"loss": 0.1802,
	"step": 1261
	},
	{
	"epoch": 3.195183776932826,
	"grad_norm": 0.5802401304244995,
	"learning_rate": 7.216284987277355e-05,
	"loss": 0.1821,
	"step": 1262
	},
	{
	"epoch": 3.197718631178707,
	"grad_norm": 0.47988972067832947,
	"learning_rate": 7.206106870229009e-05,
	"loss": 0.163,
	"step": 1263
	},
	{
	"epoch": 3.200253485424588,
	"grad_norm": 0.48500359058380127,
	"learning_rate": 7.195928753180661e-05,
	"loss": 0.1739,
	"step": 1264
	},
	{
	"epoch": 3.202788339670469,
	"grad_norm": 0.7479031682014465,
	"learning_rate": 7.185750636132315e-05,
	"loss": 0.2646,
	"step": 1265
	},
	{
	"epoch": 3.20532319391635,
	"grad_norm": 0.48695701360702515,
	"learning_rate": 7.175572519083969e-05,
	"loss": 0.1822,
	"step": 1266
	},
	{
	"epoch": 3.2078580481622305,
	"grad_norm": 0.712354838848114,
	"learning_rate": 7.165394402035623e-05,
	"loss": 0.1827,
	"step": 1267
	},
	{
	"epoch": 3.2103929024081115,
	"grad_norm": 0.4304606020450592,
	"learning_rate": 7.155216284987277e-05,
	"loss": 0.1759,
	"step": 1268
	},
	{
	"epoch": 3.2129277566539924,
	"grad_norm": 0.44741392135620117,
	"learning_rate": 7.145038167938931e-05,
	"loss": 0.1979,
	"step": 1269
	},
	{
	"epoch": 3.2154626108998734,
	"grad_norm": 0.3691045045852661,
	"learning_rate": 7.134860050890586e-05,
	"loss": 0.1575,
	"step": 1270
	},
	{
	"epoch": 3.2179974651457544,
	"grad_norm": 0.4908023476600647,
	"learning_rate": 7.12468193384224e-05,
	"loss": 0.1854,
	"step": 1271
	},
	{
	"epoch": 3.220532319391635,
	"grad_norm": 0.3953510820865631,
	"learning_rate": 7.114503816793894e-05,
	"loss": 0.1821,
	"step": 1272
	},
	{
	"epoch": 3.223067173637516,
	"grad_norm": 0.35227248072624207,
	"learning_rate": 7.104325699745548e-05,
	"loss": 0.173,
	"step": 1273
	},
	{
	"epoch": 3.225602027883397,
	"grad_norm": 0.41285187005996704,
	"learning_rate": 7.094147582697202e-05,
	"loss": 0.1708,
	"step": 1274
	},
	{
	"epoch": 3.2281368821292777,
	"grad_norm": 0.5076828002929688,
	"learning_rate": 7.083969465648856e-05,
	"loss": 0.2128,
	"step": 1275
	},
	{
	"epoch": 3.2306717363751583,
	"grad_norm": 0.5385151505470276,
	"learning_rate": 7.07379134860051e-05,
	"loss": 0.2181,
	"step": 1276
	},
	{
	"epoch": 3.233206590621039,
	"grad_norm": 0.4620850086212158,
	"learning_rate": 7.063613231552164e-05,
	"loss": 0.212,
	"step": 1277
	},
	{
	"epoch": 3.23574144486692,
	"grad_norm": 0.6768701672554016,
	"learning_rate": 7.053435114503816e-05,
	"loss": 0.2704,
	"step": 1278
	},
	{
	"epoch": 3.238276299112801,
	"grad_norm": 0.43216967582702637,
	"learning_rate": 7.04325699745547e-05,
	"loss": 0.1633,
	"step": 1279
	},
	{
	"epoch": 3.240811153358682,
	"grad_norm": 0.3756103813648224,
	"learning_rate": 7.033078880407124e-05,
	"loss": 0.1767,
	"step": 1280
	},
	{
	"epoch": 3.2433460076045626,
	"grad_norm": 0.612819254398346,
	"learning_rate": 7.022900763358778e-05,
	"loss": 0.2563,
	"step": 1281
	},
	{
	"epoch": 3.2458808618504436,
	"grad_norm": 0.5477813482284546,
	"learning_rate": 7.012722646310433e-05,
	"loss": 0.2053,
	"step": 1282
	},
	{
	"epoch": 3.2484157160963245,
	"grad_norm": 0.3412390351295471,
	"learning_rate": 7.002544529262087e-05,
	"loss": 0.1506,
	"step": 1283
	},
	{
	"epoch": 3.2509505703422055,
	"grad_norm": 0.34337860345840454,
	"learning_rate": 6.992366412213741e-05,
	"loss": 0.1612,
	"step": 1284
	},
	{
	"epoch": 3.253485424588086,
	"grad_norm": 0.37943509221076965,
	"learning_rate": 6.982188295165395e-05,
	"loss": 0.168,
	"step": 1285
	},
	{
	"epoch": 3.256020278833967,
	"grad_norm": 0.6030418872833252,
	"learning_rate": 6.972010178117049e-05,
	"loss": 0.2146,
	"step": 1286
	},
	{
	"epoch": 3.258555133079848,
	"grad_norm": 0.34367507696151733,
	"learning_rate": 6.961832061068703e-05,
	"loss": 0.1726,
	"step": 1287
	},
	{
	"epoch": 3.261089987325729,
	"grad_norm": 0.3952295780181885,
	"learning_rate": 6.951653944020357e-05,
	"loss": 0.1754,
	"step": 1288
	},
	{
	"epoch": 3.26362484157161,
	"grad_norm": 0.5151681900024414,
	"learning_rate": 6.941475826972011e-05,
	"loss": 0.1849,
	"step": 1289
	},
	{
	"epoch": 3.2661596958174903,
	"grad_norm": 0.496988445520401,
	"learning_rate": 6.931297709923665e-05,
	"loss": 0.1938,
	"step": 1290
	},
	{
	"epoch": 3.2686945500633713,
	"grad_norm": 0.45343711972236633,
	"learning_rate": 6.921119592875319e-05,
	"loss": 0.1845,
	"step": 1291
	},
	{
	"epoch": 3.2712294043092522,
	"grad_norm": 0.5323635935783386,
	"learning_rate": 6.910941475826971e-05,
	"loss": 0.177,
	"step": 1292
	},
	{
	"epoch": 3.273764258555133,
	"grad_norm": 0.39680036902427673,
	"learning_rate": 6.900763358778625e-05,
	"loss": 0.1843,
	"step": 1293
	},
	{
	"epoch": 3.2762991128010137,
	"grad_norm": 0.4767110049724579,
	"learning_rate": 6.89058524173028e-05,
	"loss": 0.2103,
	"step": 1294
	},
	{
	"epoch": 3.2788339670468947,
	"grad_norm": 0.5565052032470703,
	"learning_rate": 6.880407124681934e-05,
	"loss": 0.2185,
	"step": 1295
	},
	{
	"epoch": 3.2813688212927756,
	"grad_norm": 0.5472534894943237,
	"learning_rate": 6.870229007633588e-05,
	"loss": 0.2237,
	"step": 1296
	},
	{
	"epoch": 3.2839036755386566,
	"grad_norm": 0.632560133934021,
	"learning_rate": 6.860050890585242e-05,
	"loss": 0.2213,
	"step": 1297
	},
	{
	"epoch": 3.2864385297845375,
	"grad_norm": 0.5626386404037476,
	"learning_rate": 6.849872773536896e-05,
	"loss": 0.2324,
	"step": 1298
	},
	{
	"epoch": 3.288973384030418,
	"grad_norm": 0.5527671575546265,
	"learning_rate": 6.83969465648855e-05,
	"loss": 0.227,
	"step": 1299
	},
	{
	"epoch": 3.291508238276299,
	"grad_norm": 0.6093178391456604,
	"learning_rate": 6.829516539440204e-05,
	"loss": 0.2368,
	"step": 1300
	},
	{
	"epoch": 3.29404309252218,
	"grad_norm": 0.3845243453979492,
	"learning_rate": 6.819338422391858e-05,
	"loss": 0.1804,
	"step": 1301
	},
	{
	"epoch": 3.296577946768061,
	"grad_norm": 0.6384890079498291,
	"learning_rate": 6.809160305343512e-05,
	"loss": 0.2598,
	"step": 1302
	},
	{
	"epoch": 3.299112801013942,
	"grad_norm": 0.5135822892189026,
	"learning_rate": 6.798982188295166e-05,
	"loss": 0.2142,
	"step": 1303
	},
	{
	"epoch": 3.3016476552598224,
	"grad_norm": 0.4996071457862854,
	"learning_rate": 6.78880407124682e-05,
	"loss": 0.2107,
	"step": 1304
	},
	{
	"epoch": 3.3041825095057034,
	"grad_norm": 0.31445005536079407,
	"learning_rate": 6.778625954198474e-05,
	"loss": 0.1764,
	"step": 1305
	},
	{
	"epoch": 3.3067173637515843,
	"grad_norm": 0.544301450252533,
	"learning_rate": 6.768447837150128e-05,
	"loss": 0.2856,
	"step": 1306
	},
	{
	"epoch": 3.3092522179974653,
	"grad_norm": 0.5029551982879639,
	"learning_rate": 6.758269720101782e-05,
	"loss": 0.2374,
	"step": 1307
	},
	{
	"epoch": 3.3117870722433462,
	"grad_norm": 0.3769523799419403,
	"learning_rate": 6.748091603053436e-05,
	"loss": 0.1853,
	"step": 1308
	},
	{
	"epoch": 3.3143219264892267,
	"grad_norm": 0.3540287911891937,
	"learning_rate": 6.73791348600509e-05,
	"loss": 0.193,
	"step": 1309
	},
	{
	"epoch": 3.3168567807351077,
	"grad_norm": 0.42674198746681213,
	"learning_rate": 6.727735368956743e-05,
	"loss": 0.1953,
	"step": 1310
	},
	{
	"epoch": 3.3193916349809887,
	"grad_norm": 0.5152068138122559,
	"learning_rate": 6.717557251908397e-05,
	"loss": 0.1871,
	"step": 1311
	},
	{
	"epoch": 3.3219264892268696,
	"grad_norm": 0.48964372277259827,
	"learning_rate": 6.707379134860051e-05,
	"loss": 0.2142,
	"step": 1312
	},
	{
	"epoch": 3.32446134347275,
	"grad_norm": 0.5390191674232483,
	"learning_rate": 6.697201017811705e-05,
	"loss": 0.1764,
	"step": 1313
	},
	{
	"epoch": 3.326996197718631,
	"grad_norm": 0.3849482238292694,
	"learning_rate": 6.687022900763359e-05,
	"loss": 0.1681,
	"step": 1314
	},
	{
	"epoch": 3.329531051964512,
	"grad_norm": 0.36165010929107666,
	"learning_rate": 6.676844783715013e-05,
	"loss": 0.148,
	"step": 1315
	},
	{
	"epoch": 3.332065906210393,
	"grad_norm": 0.47739362716674805,
	"learning_rate": 6.666666666666667e-05,
	"loss": 0.1748,
	"step": 1316
	},
	{
	"epoch": 3.334600760456274,
	"grad_norm": 0.41228094696998596,
	"learning_rate": 6.656488549618321e-05,
	"loss": 0.2006,
	"step": 1317
	},
	{
	"epoch": 3.3371356147021545,
	"grad_norm": 0.43494951725006104,
	"learning_rate": 6.646310432569975e-05,
	"loss": 0.1821,
	"step": 1318
	},
	{
	"epoch": 3.3396704689480354,
	"grad_norm": 0.5502039194107056,
	"learning_rate": 6.636132315521629e-05,
	"loss": 0.208,
	"step": 1319
	},
	{
	"epoch": 3.3422053231939164,
	"grad_norm": 0.5151738524436951,
	"learning_rate": 6.625954198473283e-05,
	"loss": 0.2304,
	"step": 1320
	},
	{
	"epoch": 3.3447401774397973,
	"grad_norm": 0.3866114914417267,
	"learning_rate": 6.615776081424937e-05,
	"loss": 0.1738,
	"step": 1321
	},
	{
	"epoch": 3.347275031685678,
	"grad_norm": 0.5542702674865723,
	"learning_rate": 6.60559796437659e-05,
	"loss": 0.1885,
	"step": 1322
	},
	{
	"epoch": 3.349809885931559,
	"grad_norm": 0.5107680559158325,
	"learning_rate": 6.595419847328245e-05,
	"loss": 0.1856,
	"step": 1323
	},
	{
	"epoch": 3.3523447401774398,
	"grad_norm": 0.8266568183898926,
	"learning_rate": 6.585241730279898e-05,
	"loss": 0.2826,
	"step": 1324
	},
	{
	"epoch": 3.3548795944233207,
	"grad_norm": 0.45209088921546936,
	"learning_rate": 6.575063613231552e-05,
	"loss": 0.1519,
	"step": 1325
	},
	{
	"epoch": 3.3574144486692017,
	"grad_norm": 0.4708397388458252,
	"learning_rate": 6.564885496183206e-05,
	"loss": 0.1834,
	"step": 1326
	},
	{
	"epoch": 3.359949302915082,
	"grad_norm": 0.39958736300468445,
	"learning_rate": 6.55470737913486e-05,
	"loss": 0.1444,
	"step": 1327
	},
	{
	"epoch": 3.362484157160963,
	"grad_norm": 0.5764468312263489,
	"learning_rate": 6.544529262086514e-05,
	"loss": 0.2024,
	"step": 1328
	},
	{
	"epoch": 3.365019011406844,
	"grad_norm": 0.4573269188404083,
	"learning_rate": 6.534351145038168e-05,
	"loss": 0.1857,
	"step": 1329
	},
	{
	"epoch": 3.367553865652725,
	"grad_norm": 0.598423957824707,
	"learning_rate": 6.524173027989822e-05,
	"loss": 0.2206,
	"step": 1330
	},
	{
	"epoch": 3.3700887198986056,
	"grad_norm": 0.5643012523651123,
	"learning_rate": 6.513994910941476e-05,
	"loss": 0.157,
	"step": 1331
	},
	{
	"epoch": 3.3726235741444865,
	"grad_norm": 0.6568096876144409,
	"learning_rate": 6.50381679389313e-05,
	"loss": 0.2588,
	"step": 1332
	},
	{
	"epoch": 3.3751584283903675,
	"grad_norm": 0.6552339792251587,
	"learning_rate": 6.493638676844784e-05,
	"loss": 0.2032,
	"step": 1333
	},
	{
	"epoch": 3.3776932826362485,
	"grad_norm": 0.5274556279182434,
	"learning_rate": 6.483460559796438e-05,
	"loss": 0.1877,
	"step": 1334
	},
	{
	"epoch": 3.3802281368821294,
	"grad_norm": 0.43894869089126587,
	"learning_rate": 6.473282442748092e-05,
	"loss": 0.155,
	"step": 1335
	},
	{
	"epoch": 3.3827629911280104,
	"grad_norm": 0.6116171479225159,
	"learning_rate": 6.463104325699746e-05,
	"loss": 0.2978,
	"step": 1336
	},
	{
	"epoch": 3.385297845373891,
	"grad_norm": 0.4588301479816437,
	"learning_rate": 6.4529262086514e-05,
	"loss": 0.1765,
	"step": 1337
	},
	{
	"epoch": 3.387832699619772,
	"grad_norm": 0.4299813508987427,
	"learning_rate": 6.442748091603053e-05,
	"loss": 0.1725,
	"step": 1338
	},
	{
	"epoch": 3.390367553865653,
	"grad_norm": 0.4996776580810547,
	"learning_rate": 6.432569974554707e-05,
	"loss": 0.1815,
	"step": 1339
	},
	{
	"epoch": 3.3929024081115338,
	"grad_norm": 0.42195963859558105,
	"learning_rate": 6.422391857506361e-05,
	"loss": 0.1544,
	"step": 1340
	},
	{
	"epoch": 3.3954372623574143,
	"grad_norm": 0.3918668031692505,
	"learning_rate": 6.412213740458015e-05,
	"loss": 0.1677,
	"step": 1341
	},
	{
	"epoch": 3.3979721166032952,
	"grad_norm": 0.5436106324195862,
	"learning_rate": 6.402035623409669e-05,
	"loss": 0.2624,
	"step": 1342
	},
	{
	"epoch": 3.400506970849176,
	"grad_norm": 0.5056617856025696,
	"learning_rate": 6.391857506361324e-05,
	"loss": 0.1735,
	"step": 1343
	},
	{
	"epoch": 3.403041825095057,
	"grad_norm": 0.497035950422287,
	"learning_rate": 6.381679389312978e-05,
	"loss": 0.192,
	"step": 1344
	},
	{
	"epoch": 3.405576679340938,
	"grad_norm": 0.4464019238948822,
	"learning_rate": 6.371501272264632e-05,
	"loss": 0.165,
	"step": 1345
	},
	{
	"epoch": 3.4081115335868186,
	"grad_norm": 0.3940610885620117,
	"learning_rate": 6.361323155216285e-05,
	"loss": 0.1698,
	"step": 1346
	},
	{
	"epoch": 3.4106463878326996,
	"grad_norm": 0.34197869896888733,
	"learning_rate": 6.351145038167939e-05,
	"loss": 0.1676,
	"step": 1347
	},
	{
	"epoch": 3.4131812420785805,
	"grad_norm": 0.5477511286735535,
	"learning_rate": 6.340966921119593e-05,
	"loss": 0.2913,
	"step": 1348
	},
	{
	"epoch": 3.4157160963244615,
	"grad_norm": 0.47384947538375854,
	"learning_rate": 6.330788804071247e-05,
	"loss": 0.1807,
	"step": 1349
	},
	{
	"epoch": 3.418250950570342,
	"grad_norm": 0.4805784821510315,
	"learning_rate": 6.3206106870229e-05,
	"loss": 0.1844,
	"step": 1350
	},
	{
	"epoch": 3.420785804816223,
	"grad_norm": 0.4914521276950836,
	"learning_rate": 6.310432569974555e-05,
	"loss": 0.21,
	"step": 1351
	},
	{
	"epoch": 3.423320659062104,
	"grad_norm": 0.42754796147346497,
	"learning_rate": 6.300254452926209e-05,
	"loss": 0.2003,
	"step": 1352
	},
	{
	"epoch": 3.425855513307985,
	"grad_norm": 0.5367889404296875,
	"learning_rate": 6.290076335877862e-05,
	"loss": 0.2126,
	"step": 1353
	},
	{
	"epoch": 3.428390367553866,
	"grad_norm": 0.5015621781349182,
	"learning_rate": 6.279898218829516e-05,
	"loss": 0.176,
	"step": 1354
	},
	{
	"epoch": 3.4309252217997463,
	"grad_norm": 0.4498123228549957,
	"learning_rate": 6.269720101781172e-05,
	"loss": 0.1963,
	"step": 1355
	},
	{
	"epoch": 3.4334600760456273,
	"grad_norm": 0.4548507034778595,
	"learning_rate": 6.259541984732826e-05,
	"loss": 0.185,
	"step": 1356
	},
	{
	"epoch": 3.4359949302915083,
	"grad_norm": 0.5188789963722229,
	"learning_rate": 6.24936386768448e-05,
	"loss": 0.2152,
	"step": 1357
	},
	{
	"epoch": 3.4385297845373892,
	"grad_norm": 0.5717540979385376,
	"learning_rate": 6.239185750636133e-05,
	"loss": 0.2541,
	"step": 1358
	},
	{
	"epoch": 3.4410646387832697,
	"grad_norm": 0.43195176124572754,
	"learning_rate": 6.229007633587787e-05,
	"loss": 0.1841,
	"step": 1359
	},
	{
	"epoch": 3.4435994930291507,
	"grad_norm": 0.8148223161697388,
	"learning_rate": 6.21882951653944e-05,
	"loss": 0.1903,
	"step": 1360
	},
	{
	"epoch": 3.4461343472750317,
	"grad_norm": 0.39928868412971497,
	"learning_rate": 6.208651399491094e-05,
	"loss": 0.1551,
	"step": 1361
	},
	{
	"epoch": 3.4486692015209126,
	"grad_norm": 0.8072621822357178,
	"learning_rate": 6.198473282442748e-05,
	"loss": 0.1973,
	"step": 1362
	},
	{
	"epoch": 3.4512040557667936,
	"grad_norm": 0.6420927047729492,
	"learning_rate": 6.188295165394402e-05,
	"loss": 0.2304,
	"step": 1363
	},
	{
	"epoch": 3.453738910012674,
	"grad_norm": 0.4896611273288727,
	"learning_rate": 6.178117048346056e-05,
	"loss": 0.1968,
	"step": 1364
	},
	{
	"epoch": 3.456273764258555,
	"grad_norm": 0.5518379211425781,
	"learning_rate": 6.16793893129771e-05,
	"loss": 0.2136,
	"step": 1365
	},
	{
	"epoch": 3.458808618504436,
	"grad_norm": 0.35489922761917114,
	"learning_rate": 6.157760814249364e-05,
	"loss": 0.1735,
	"step": 1366
	},
	{
	"epoch": 3.461343472750317,
	"grad_norm": 0.3575512766838074,
	"learning_rate": 6.147582697201019e-05,
	"loss": 0.1704,
	"step": 1367
	},
	{
	"epoch": 3.463878326996198,
	"grad_norm": 0.46745261549949646,
	"learning_rate": 6.137404580152673e-05,
	"loss": 0.1702,
	"step": 1368
	},
	{
	"epoch": 3.4664131812420784,
	"grad_norm": 0.39378833770751953,
	"learning_rate": 6.127226463104327e-05,
	"loss": 0.1512,
	"step": 1369
	},
	{
	"epoch": 3.4689480354879594,
	"grad_norm": 0.5645838975906372,
	"learning_rate": 6.11704834605598e-05,
	"loss": 0.2053,
	"step": 1370
	},
	{
	"epoch": 3.4714828897338403,
	"grad_norm": 0.3613208830356598,
	"learning_rate": 6.106870229007635e-05,
	"loss": 0.1749,
	"step": 1371
	},
	{
	"epoch": 3.4740177439797213,
	"grad_norm": 0.573124349117279,
	"learning_rate": 6.096692111959288e-05,
	"loss": 0.2229,
	"step": 1372
	},
	{
	"epoch": 3.4765525982256023,
	"grad_norm": 0.43110212683677673,
	"learning_rate": 6.086513994910942e-05,
	"loss": 0.2082,
	"step": 1373
	},
	{
	"epoch": 3.4790874524714828,
	"grad_norm": 0.6268284320831299,
	"learning_rate": 6.076335877862596e-05,
	"loss": 0.2826,
	"step": 1374
	},
	{
	"epoch": 3.4816223067173637,
	"grad_norm": 0.5699491500854492,
	"learning_rate": 6.0661577608142496e-05,
	"loss": 0.2373,
	"step": 1375
	},
	{
	"epoch": 3.4841571609632447,
	"grad_norm": 0.451548308134079,
	"learning_rate": 6.0559796437659035e-05,
	"loss": 0.1782,
	"step": 1376
	},
	{
	"epoch": 3.4866920152091256,
	"grad_norm": 0.44955211877822876,
	"learning_rate": 6.0458015267175575e-05,
	"loss": 0.1896,
	"step": 1377
	},
	{
	"epoch": 3.489226869455006,
	"grad_norm": 0.44076019525527954,
	"learning_rate": 6.035623409669211e-05,
	"loss": 0.1854,
	"step": 1378
	},
	{
	"epoch": 3.491761723700887,
	"grad_norm": 0.8012815117835999,
	"learning_rate": 6.0254452926208646e-05,
	"loss": 0.2067,
	"step": 1379
	},
	{
	"epoch": 3.494296577946768,
	"grad_norm": 0.5558981895446777,
	"learning_rate": 6.01526717557252e-05,
	"loss": 0.1913,
	"step": 1380
	},
	{
	"epoch": 3.496831432192649,
	"grad_norm": 0.42501258850097656,
	"learning_rate": 6.005089058524174e-05,
	"loss": 0.1781,
	"step": 1381
	},
	{
	"epoch": 3.49936628643853,
	"grad_norm": 0.3618164658546448,
	"learning_rate": 5.994910941475828e-05,
	"loss": 0.1472,
	"step": 1382
	},
	{
	"epoch": 3.5019011406844105,
	"grad_norm": 0.5384409427642822,
	"learning_rate": 5.984732824427482e-05,
	"loss": 0.2063,
	"step": 1383
	},
	{
	"epoch": 3.5044359949302915,
	"grad_norm": 0.5103084444999695,
	"learning_rate": 5.974554707379135e-05,
	"loss": 0.1737,
	"step": 1384
	},
	{
	"epoch": 3.5069708491761724,
	"grad_norm": 0.37908968329429626,
	"learning_rate": 5.964376590330789e-05,
	"loss": 0.1599,
	"step": 1385
	},
	{
	"epoch": 3.5095057034220534,
	"grad_norm": 0.5049726963043213,
	"learning_rate": 5.954198473282443e-05,
	"loss": 0.1891,
	"step": 1386
	},
	{
	"epoch": 3.512040557667934,
	"grad_norm": 0.4436114430427551,
	"learning_rate": 5.944020356234097e-05,
	"loss": 0.1667,
	"step": 1387
	},
	{
	"epoch": 3.514575411913815,
	"grad_norm": 0.6733534336090088,
	"learning_rate": 5.933842239185751e-05,
	"loss": 0.2714,
	"step": 1388
	},
	{
	"epoch": 3.517110266159696,
	"grad_norm": 0.7258228659629822,
	"learning_rate": 5.9236641221374046e-05,
	"loss": 0.258,
	"step": 1389
	},
	{
	"epoch": 3.5196451204055768,
	"grad_norm": 0.6425923705101013,
	"learning_rate": 5.9134860050890586e-05,
	"loss": 0.1791,
	"step": 1390
	},
	{
	"epoch": 3.5221799746514577,
	"grad_norm": 0.45786988735198975,
	"learning_rate": 5.9033078880407125e-05,
	"loss": 0.1989,
	"step": 1391
	},
	{
	"epoch": 3.5247148288973387,
	"grad_norm": 0.43258994817733765,
	"learning_rate": 5.893129770992367e-05,
	"loss": 0.166,
	"step": 1392
	},
	{
	"epoch": 3.527249683143219,
	"grad_norm": 0.36486050486564636,
	"learning_rate": 5.882951653944021e-05,
	"loss": 0.1634,
	"step": 1393
	},
	{
	"epoch": 3.5297845373891,
	"grad_norm": 0.5883339047431946,
	"learning_rate": 5.872773536895675e-05,
	"loss": 0.2236,
	"step": 1394
	},
	{
	"epoch": 3.532319391634981,
	"grad_norm": 0.6296584010124207,
	"learning_rate": 5.862595419847329e-05,
	"loss": 0.1866,
	"step": 1395
	},
	{
	"epoch": 3.5348542458808616,
	"grad_norm": 0.4262075126171112,
	"learning_rate": 5.852417302798983e-05,
	"loss": 0.1707,
	"step": 1396
	},
	{
	"epoch": 3.5373891001267426,
	"grad_norm": 0.459573894739151,
	"learning_rate": 5.842239185750637e-05,
	"loss": 0.1654,
	"step": 1397
	},
	{
	"epoch": 3.5399239543726235,
	"grad_norm": 0.47115570306777954,
	"learning_rate": 5.83206106870229e-05,
	"loss": 0.1936,
	"step": 1398
	},
	{
	"epoch": 3.5424588086185045,
	"grad_norm": 0.41362589597702026,
	"learning_rate": 5.821882951653944e-05,
	"loss": 0.1897,
	"step": 1399
	},
	{
	"epoch": 3.5449936628643854,
	"grad_norm": 0.4314422607421875,
	"learning_rate": 5.811704834605598e-05,
	"loss": 0.172,
	"step": 1400
	},
	{
	"epoch": 3.5475285171102664,
	"grad_norm": 0.48116129636764526,
	"learning_rate": 5.801526717557252e-05,
	"loss": 0.1721,
	"step": 1401
	},
	{
	"epoch": 3.550063371356147,
	"grad_norm": 0.3902725279331207,
	"learning_rate": 5.791348600508906e-05,
	"loss": 0.1886,
	"step": 1402
	},
	{
	"epoch": 3.552598225602028,
	"grad_norm": 0.37996864318847656,
	"learning_rate": 5.78117048346056e-05,
	"loss": 0.1705,
	"step": 1403
	},
	{
	"epoch": 3.555133079847909,
	"grad_norm": 0.589279294013977,
	"learning_rate": 5.770992366412214e-05,
	"loss": 0.1848,
	"step": 1404
	},
	{
	"epoch": 3.5576679340937893,
	"grad_norm": 0.4233790636062622,
	"learning_rate": 5.760814249363868e-05,
	"loss": 0.18,
	"step": 1405
	},
	{
	"epoch": 3.5602027883396703,
	"grad_norm": 0.3760955333709717,
	"learning_rate": 5.750636132315522e-05,
	"loss": 0.1743,
	"step": 1406
	},
	{
	"epoch": 3.5627376425855513,
	"grad_norm": 0.552793562412262,
	"learning_rate": 5.740458015267176e-05,
	"loss": 0.2315,
	"step": 1407
	},
	{
	"epoch": 3.565272496831432,
	"grad_norm": 0.5440211892127991,
	"learning_rate": 5.73027989821883e-05,
	"loss": 0.186,
	"step": 1408
	},
	{
	"epoch": 3.567807351077313,
	"grad_norm": 0.5183967351913452,
	"learning_rate": 5.720101781170484e-05,
	"loss": 0.1626,
	"step": 1409
	},
	{
	"epoch": 3.570342205323194,
	"grad_norm": 0.47962069511413574,
	"learning_rate": 5.709923664122138e-05,
	"loss": 0.1813,
	"step": 1410
	},
	{
	"epoch": 3.5728770595690746,
	"grad_norm": 0.8065668940544128,
	"learning_rate": 5.699745547073792e-05,
	"loss": 0.2537,
	"step": 1411
	},
	{
	"epoch": 3.5754119138149556,
	"grad_norm": 0.46018585562705994,
	"learning_rate": 5.689567430025445e-05,
	"loss": 0.1756,
	"step": 1412
	},
	{
	"epoch": 3.5779467680608366,
	"grad_norm": 0.5229590535163879,
	"learning_rate": 5.679389312977099e-05,
	"loss": 0.1873,
	"step": 1413
	},
	{
	"epoch": 3.5804816223067175,
	"grad_norm": 0.510209321975708,
	"learning_rate": 5.669211195928753e-05,
	"loss": 0.167,
	"step": 1414
	},
	{
	"epoch": 3.583016476552598,
	"grad_norm": 0.4264031648635864,
	"learning_rate": 5.659033078880407e-05,
	"loss": 0.1705,
	"step": 1415
	},
	{
	"epoch": 3.585551330798479,
	"grad_norm": 0.6208323240280151,
	"learning_rate": 5.648854961832062e-05,
	"loss": 0.2268,
	"step": 1416
	},
	{
	"epoch": 3.58808618504436,
	"grad_norm": 0.3730670213699341,
	"learning_rate": 5.6386768447837154e-05,
	"loss": 0.1676,
	"step": 1417
	},
	{
	"epoch": 3.590621039290241,
	"grad_norm": 0.52936190366745,
	"learning_rate": 5.628498727735369e-05,
	"loss": 0.2055,
	"step": 1418
	},
	{
	"epoch": 3.593155893536122,
	"grad_norm": 0.44800981879234314,
	"learning_rate": 5.618320610687023e-05,
	"loss": 0.1782,
	"step": 1419
	},
	{
	"epoch": 3.5956907477820024,
	"grad_norm": 0.37429654598236084,
	"learning_rate": 5.608142493638677e-05,
	"loss": 0.1566,
	"step": 1420
	},
	{
	"epoch": 3.5982256020278833,
	"grad_norm": 0.5618942975997925,
	"learning_rate": 5.597964376590331e-05,
	"loss": 0.2249,
	"step": 1421
	},
	{
	"epoch": 3.6007604562737643,
	"grad_norm": 0.6893648505210876,
	"learning_rate": 5.587786259541985e-05,
	"loss": 0.2104,
	"step": 1422
	},
	{
	"epoch": 3.6032953105196452,
	"grad_norm": 0.4185943603515625,
	"learning_rate": 5.577608142493639e-05,
	"loss": 0.1729,
	"step": 1423
	},
	{
	"epoch": 3.6058301647655258,
	"grad_norm": 0.46326011419296265,
	"learning_rate": 5.567430025445293e-05,
	"loss": 0.1888,
	"step": 1424
	},
	{
	"epoch": 3.6083650190114067,
	"grad_norm": 0.4564262628555298,
	"learning_rate": 5.557251908396947e-05,
	"loss": 0.1957,
	"step": 1425
	},
	{
	"epoch": 3.6108998732572877,
	"grad_norm": 0.654411256313324,
	"learning_rate": 5.5470737913486e-05,
	"loss": 0.2101,
	"step": 1426
	},
	{
	"epoch": 3.6134347275031686,
	"grad_norm": 0.4059501886367798,
	"learning_rate": 5.536895674300254e-05,
	"loss": 0.1638,
	"step": 1427
	},
	{
	"epoch": 3.6159695817490496,
	"grad_norm": 0.4155724346637726,
	"learning_rate": 5.526717557251909e-05,
	"loss": 0.1799,
	"step": 1428
	},
	{
	"epoch": 3.6185044359949305,
	"grad_norm": 0.4041290581226349,
	"learning_rate": 5.516539440203563e-05,
	"loss": 0.1755,
	"step": 1429
	},
	{
	"epoch": 3.621039290240811,
	"grad_norm": 0.3458746373653412,
	"learning_rate": 5.506361323155217e-05,
	"loss": 0.1474,
	"step": 1430
	},
	{
	"epoch": 3.623574144486692,
	"grad_norm": 0.5046303272247314,
	"learning_rate": 5.496183206106871e-05,
	"loss": 0.2554,
	"step": 1431
	},
	{
	"epoch": 3.626108998732573,
	"grad_norm": 0.4284549951553345,
	"learning_rate": 5.4860050890585244e-05,
	"loss": 0.1855,
	"step": 1432
	},
	{
	"epoch": 3.6286438529784535,
	"grad_norm": 0.5116839408874512,
	"learning_rate": 5.475826972010178e-05,
	"loss": 0.1777,
	"step": 1433
	},
	{
	"epoch": 3.6311787072243344,
	"grad_norm": 0.4303711950778961,
	"learning_rate": 5.465648854961832e-05,
	"loss": 0.1792,
	"step": 1434
	},
	{
	"epoch": 3.6337135614702154,
	"grad_norm": 0.4602053463459015,
	"learning_rate": 5.455470737913486e-05,
	"loss": 0.1716,
	"step": 1435
	},
	{
	"epoch": 3.6362484157160964,
	"grad_norm": 0.47606271505355835,
	"learning_rate": 5.44529262086514e-05,
	"loss": 0.2063,
	"step": 1436
	},
	{
	"epoch": 3.6387832699619773,
	"grad_norm": 0.5861607193946838,
	"learning_rate": 5.435114503816794e-05,
	"loss": 0.2133,
	"step": 1437
	},
	{
	"epoch": 3.6413181242078583,
	"grad_norm": 0.42663708329200745,
	"learning_rate": 5.424936386768448e-05,
	"loss": 0.1662,
	"step": 1438
	},
	{
	"epoch": 3.643852978453739,
	"grad_norm": 0.6255937218666077,
	"learning_rate": 5.414758269720102e-05,
	"loss": 0.1875,
	"step": 1439
	},
	{
	"epoch": 3.6463878326996197,
	"grad_norm": 0.5422307252883911,
	"learning_rate": 5.404580152671755e-05,
	"loss": 0.1624,
	"step": 1440
	},
	{
	"epoch": 3.6489226869455007,
	"grad_norm": 0.540477991104126,
	"learning_rate": 5.3944020356234104e-05,
	"loss": 0.2489,
	"step": 1441
	},
	{
	"epoch": 3.6514575411913817,
	"grad_norm": 0.5656100511550903,
	"learning_rate": 5.3842239185750643e-05,
	"loss": 0.2289,
	"step": 1442
	},
	{
	"epoch": 3.653992395437262,
	"grad_norm": 0.5202456712722778,
	"learning_rate": 5.374045801526718e-05,
	"loss": 0.23,
	"step": 1443
	},
	{
	"epoch": 3.656527249683143,
	"grad_norm": 0.5069813132286072,
	"learning_rate": 5.363867684478372e-05,
	"loss": 0.1845,
	"step": 1444
	},
	{
	"epoch": 3.659062103929024,
	"grad_norm": 0.5711066126823425,
	"learning_rate": 5.353689567430026e-05,
	"loss": 0.2076,
	"step": 1445
	},
	{
	"epoch": 3.661596958174905,
	"grad_norm": 0.5115897059440613,
	"learning_rate": 5.3435114503816794e-05,
	"loss": 0.1696,
	"step": 1446
	},
	{
	"epoch": 3.664131812420786,
	"grad_norm": 0.6119818687438965,
	"learning_rate": 5.333333333333333e-05,
	"loss": 0.1905,
	"step": 1447
	},
	{
	"epoch": 3.6666666666666665,
	"grad_norm": 0.7333729863166809,
	"learning_rate": 5.323155216284987e-05,
	"loss": 0.2208,
	"step": 1448
	},
	{
	"epoch": 3.6692015209125475,
	"grad_norm": 0.5657917857170105,
	"learning_rate": 5.312977099236641e-05,
	"loss": 0.218,
	"step": 1449
	},
	{
	"epoch": 3.6717363751584284,
	"grad_norm": 0.5568459033966064,
	"learning_rate": 5.302798982188295e-05,
	"loss": 0.1957,
	"step": 1450
	},
	{
	"epoch": 3.6742712294043094,
	"grad_norm": 0.40060222148895264,
	"learning_rate": 5.292620865139949e-05,
	"loss": 0.1634,
	"step": 1451
	},
	{
	"epoch": 3.67680608365019,
	"grad_norm": 0.5395296216011047,
	"learning_rate": 5.282442748091603e-05,
	"loss": 0.2284,
	"step": 1452
	},
	{
	"epoch": 3.679340937896071,
	"grad_norm": 0.395298570394516,
	"learning_rate": 5.2722646310432576e-05,
	"loss": 0.1717,
	"step": 1453
	},
	{
	"epoch": 3.681875792141952,
	"grad_norm": 0.4693946838378906,
	"learning_rate": 5.2620865139949115e-05,
	"loss": 0.1719,
	"step": 1454
	},
	{
	"epoch": 3.6844106463878328,
	"grad_norm": 0.5206104516983032,
	"learning_rate": 5.2519083969465654e-05,
	"loss": 0.2158,
	"step": 1455
	},
	{
	"epoch": 3.6869455006337137,
	"grad_norm": 0.5576691031455994,
	"learning_rate": 5.2417302798982194e-05,
	"loss": 0.2031,
	"step": 1456
	},
	{
	"epoch": 3.6894803548795947,
	"grad_norm": 0.5826637148857117,
	"learning_rate": 5.231552162849873e-05,
	"loss": 0.2785,
	"step": 1457
	},
	{
	"epoch": 3.692015209125475,
	"grad_norm": 0.5928865075111389,
	"learning_rate": 5.221374045801527e-05,
	"loss": 0.1765,
	"step": 1458
	},
	{
	"epoch": 3.694550063371356,
	"grad_norm": 0.5932832956314087,
	"learning_rate": 5.211195928753181e-05,
	"loss": 0.1767,
	"step": 1459
	},
	{
	"epoch": 3.697084917617237,
	"grad_norm": 0.4178262948989868,
	"learning_rate": 5.2010178117048344e-05,
	"loss": 0.1636,
	"step": 1460
	},
	{
	"epoch": 3.6996197718631176,
	"grad_norm": 0.6029627919197083,
	"learning_rate": 5.1908396946564884e-05,
	"loss": 0.2086,
	"step": 1461
	},
	{
	"epoch": 3.7021546261089986,
	"grad_norm": 0.48641863465309143,
	"learning_rate": 5.180661577608142e-05,
	"loss": 0.1613,
	"step": 1462
	},
	{
	"epoch": 3.7046894803548795,
	"grad_norm": 0.40176740288734436,
	"learning_rate": 5.170483460559796e-05,
	"loss": 0.1647,
	"step": 1463
	},
	{
	"epoch": 3.7072243346007605,
	"grad_norm": 0.42600035667419434,
	"learning_rate": 5.16030534351145e-05,
	"loss": 0.1818,
	"step": 1464
	},
	{
	"epoch": 3.7097591888466415,
	"grad_norm": 0.48061972856521606,
	"learning_rate": 5.150127226463105e-05,
	"loss": 0.187,
	"step": 1465
	},
	{
	"epoch": 3.7122940430925224,
	"grad_norm": 0.4085710346698761,
	"learning_rate": 5.139949109414759e-05,
	"loss": 0.1562,
	"step": 1466
	},
	{
	"epoch": 3.714828897338403,
	"grad_norm": 0.4378439486026764,
	"learning_rate": 5.1297709923664126e-05,
	"loss": 0.1723,
	"step": 1467
	},
	{
	"epoch": 3.717363751584284,
	"grad_norm": 0.5806863307952881,
	"learning_rate": 5.1195928753180665e-05,
	"loss": 0.2069,
	"step": 1468
	},
	{
	"epoch": 3.719898605830165,
	"grad_norm": 0.4711120128631592,
	"learning_rate": 5.1094147582697205e-05,
	"loss": 0.1851,
	"step": 1469
	},
	{
	"epoch": 3.7224334600760454,
	"grad_norm": 0.47227099537849426,
	"learning_rate": 5.0992366412213744e-05,
	"loss": 0.1885,
	"step": 1470
	},
	{
	"epoch": 3.7249683143219263,
	"grad_norm": 0.4405531585216522,
	"learning_rate": 5.0890585241730283e-05,
	"loss": 0.1662,
	"step": 1471
	},
	{
	"epoch": 3.7275031685678073,
	"grad_norm": 0.5168079733848572,
	"learning_rate": 5.078880407124682e-05,
	"loss": 0.2002,
	"step": 1472
	},
	{
	"epoch": 3.7300380228136882,
	"grad_norm": 0.3839830160140991,
	"learning_rate": 5.068702290076336e-05,
	"loss": 0.168,
	"step": 1473
	},
	{
	"epoch": 3.732572877059569,
	"grad_norm": 0.338012158870697,
	"learning_rate": 5.0585241730279895e-05,
	"loss": 0.1596,
	"step": 1474
	},
	{
	"epoch": 3.73510773130545,
	"grad_norm": 0.5466023087501526,
	"learning_rate": 5.0483460559796434e-05,
	"loss": 0.2379,
	"step": 1475
	},
	{
	"epoch": 3.7376425855513307,
	"grad_norm": 0.44543328881263733,
	"learning_rate": 5.038167938931297e-05,
	"loss": 0.1778,
	"step": 1476
	},
	{
	"epoch": 3.7401774397972116,
	"grad_norm": 0.4166903793811798,
	"learning_rate": 5.0279898218829526e-05,
	"loss": 0.1554,
	"step": 1477
	},
	{
	"epoch": 3.7427122940430926,
	"grad_norm": 0.3806212544441223,
	"learning_rate": 5.0178117048346065e-05,
	"loss": 0.1648,
	"step": 1478
	},
	{
	"epoch": 3.7452471482889735,
	"grad_norm": 0.5990723967552185,
	"learning_rate": 5.00763358778626e-05,
	"loss": 0.2348,
	"step": 1479
	},
	{
	"epoch": 3.747782002534854,
	"grad_norm": 0.715096116065979,
	"learning_rate": 4.997455470737914e-05,
	"loss": 0.2201,
	"step": 1480
	},
	{
	"epoch": 3.750316856780735,
	"grad_norm": 0.6297019124031067,
	"learning_rate": 4.9872773536895677e-05,
	"loss": 0.2398,
	"step": 1481
	},
	{
	"epoch": 3.752851711026616,
	"grad_norm": 0.6131380200386047,
	"learning_rate": 4.9770992366412216e-05,
	"loss": 0.2128,
	"step": 1482
	},
	{
	"epoch": 3.755386565272497,
	"grad_norm": 0.5018277764320374,
	"learning_rate": 4.9669211195928755e-05,
	"loss": 0.1913,
	"step": 1483
	},
	{
	"epoch": 3.757921419518378,
	"grad_norm": 0.516939103603363,
	"learning_rate": 4.9567430025445294e-05,
	"loss": 0.1958,
	"step": 1484
	},
	{
	"epoch": 3.7604562737642584,
	"grad_norm": 0.4485652446746826,
	"learning_rate": 4.9465648854961834e-05,
	"loss": 0.1678,
	"step": 1485
	},
	{
	"epoch": 3.7629911280101394,
	"grad_norm": 0.6227991580963135,
	"learning_rate": 4.936386768447838e-05,
	"loss": 0.2403,
	"step": 1486
	},
	{
	"epoch": 3.7655259822560203,
	"grad_norm": 0.42331916093826294,
	"learning_rate": 4.926208651399491e-05,
	"loss": 0.1673,
	"step": 1487
	},
	{
	"epoch": 3.7680608365019013,
	"grad_norm": 0.5072351098060608,
	"learning_rate": 4.916030534351145e-05,
	"loss": 0.204,
	"step": 1488
	},
	{
	"epoch": 3.770595690747782,
	"grad_norm": 0.445578008890152,
	"learning_rate": 4.905852417302799e-05,
	"loss": 0.1908,
	"step": 1489
	},
	{
	"epoch": 3.7731305449936627,
	"grad_norm": 0.49046698212623596,
	"learning_rate": 4.895674300254453e-05,
	"loss": 0.1615,
	"step": 1490
	},
	{
	"epoch": 3.7756653992395437,
	"grad_norm": 0.37768882513046265,
	"learning_rate": 4.885496183206107e-05,
	"loss": 0.1604,
	"step": 1491
	},
	{
	"epoch": 3.7782002534854247,
	"grad_norm": 0.38343289494514465,
	"learning_rate": 4.8753180661577616e-05,
	"loss": 0.1709,
	"step": 1492
	},
	{
	"epoch": 3.7807351077313056,
	"grad_norm": 0.4102202355861664,
	"learning_rate": 4.8651399491094155e-05,
	"loss": 0.1629,
	"step": 1493
	},
	{
	"epoch": 3.7832699619771866,
	"grad_norm": 0.4545007050037384,
	"learning_rate": 4.854961832061069e-05,
	"loss": 0.1709,
	"step": 1494
	},
	{
	"epoch": 3.785804816223067,
	"grad_norm": 0.48300206661224365,
	"learning_rate": 4.844783715012723e-05,
	"loss": 0.2211,
	"step": 1495
	},
	{
	"epoch": 3.788339670468948,
	"grad_norm": 0.5301868319511414,
	"learning_rate": 4.8346055979643766e-05,
	"loss": 0.2053,
	"step": 1496
	},
	{
	"epoch": 3.790874524714829,
	"grad_norm": 0.48716598749160767,
	"learning_rate": 4.8244274809160306e-05,
	"loss": 0.2392,
	"step": 1497
	},
	{
	"epoch": 3.7934093789607095,
	"grad_norm": 0.6201879978179932,
	"learning_rate": 4.8142493638676845e-05,
	"loss": 0.2267,
	"step": 1498
	},
	{
	"epoch": 3.7959442332065905,
	"grad_norm": 0.46254560351371765,
	"learning_rate": 4.804071246819339e-05,
	"loss": 0.1824,
	"step": 1499
	},
	{
	"epoch": 3.7984790874524714,
	"grad_norm": 0.6153382658958435,
	"learning_rate": 4.793893129770993e-05,
	"loss": 0.2095,
	"step": 1500
	},
	{
	"epoch": 3.8010139416983524,
	"grad_norm": 0.6054911613464355,
	"learning_rate": 4.783715012722646e-05,
	"loss": 0.2291,
	"step": 1501
	},
	{
	"epoch": 3.8035487959442333,
	"grad_norm": 0.3899902403354645,
	"learning_rate": 4.7735368956743e-05,
	"loss": 0.1507,
	"step": 1502
	},
	{
	"epoch": 3.8060836501901143,
	"grad_norm": 0.4634632170200348,
	"learning_rate": 4.763358778625954e-05,
	"loss": 0.1436,
	"step": 1503
	},
	{
	"epoch": 3.808618504435995,
	"grad_norm": 0.6829271912574768,
	"learning_rate": 4.753180661577608e-05,
	"loss": 0.2611,
	"step": 1504
	},
	{
	"epoch": 3.8111533586818758,
	"grad_norm": 0.553393542766571,
	"learning_rate": 4.743002544529263e-05,
	"loss": 0.1862,
	"step": 1505
	},
	{
	"epoch": 3.8136882129277567,
	"grad_norm": 0.4285520315170288,
	"learning_rate": 4.7328244274809166e-05,
	"loss": 0.1522,
	"step": 1506
	},
	{
	"epoch": 3.8162230671736372,
	"grad_norm": 0.5505307912826538,
	"learning_rate": 4.7226463104325705e-05,
	"loss": 0.2056,
	"step": 1507
	},
	{
	"epoch": 3.818757921419518,
	"grad_norm": 0.635071873664856,
	"learning_rate": 4.712468193384224e-05,
	"loss": 0.1899,
	"step": 1508
	},
	{
	"epoch": 3.821292775665399,
	"grad_norm": 0.4297153353691101,
	"learning_rate": 4.702290076335878e-05,
	"loss": 0.1632,
	"step": 1509
	},
	{
	"epoch": 3.82382762991128,
	"grad_norm": 0.5538508892059326,
	"learning_rate": 4.6921119592875317e-05,
	"loss": 0.1965,
	"step": 1510
	},
	{
	"epoch": 3.826362484157161,
	"grad_norm": 0.6736975908279419,
	"learning_rate": 4.681933842239186e-05,
	"loss": 0.2334,
	"step": 1511
	},
	{
	"epoch": 3.828897338403042,
	"grad_norm": 0.49381881952285767,
	"learning_rate": 4.67175572519084e-05,
	"loss": 0.2074,
	"step": 1512
	},
	{
	"epoch": 3.8314321926489225,
	"grad_norm": 0.4285455346107483,
	"learning_rate": 4.661577608142494e-05,
	"loss": 0.176,
	"step": 1513
	},
	{
	"epoch": 3.8339670468948035,
	"grad_norm": 0.5771308541297913,
	"learning_rate": 4.651399491094148e-05,
	"loss": 0.229,
	"step": 1514
	},
	{
	"epoch": 3.8365019011406845,
	"grad_norm": 0.4749429225921631,
	"learning_rate": 4.641221374045801e-05,
	"loss": 0.1968,
	"step": 1515
	},
	{
	"epoch": 3.8390367553865654,
	"grad_norm": 0.48094430565834045,
	"learning_rate": 4.631043256997455e-05,
	"loss": 0.1982,
	"step": 1516
	},
	{
	"epoch": 3.841571609632446,
	"grad_norm": 0.49878042936325073,
	"learning_rate": 4.62086513994911e-05,
	"loss": 0.1552,
	"step": 1517
	},
	{
	"epoch": 3.844106463878327,
	"grad_norm": 0.4872034192085266,
	"learning_rate": 4.610687022900764e-05,
	"loss": 0.1808,
	"step": 1518
	},
	{
	"epoch": 3.846641318124208,
	"grad_norm": 0.4905577600002289,
	"learning_rate": 4.600508905852418e-05,
	"loss": 0.1703,
	"step": 1519
	},
	{
	"epoch": 3.849176172370089,
	"grad_norm": 0.49980783462524414,
	"learning_rate": 4.5903307888040716e-05,
	"loss": 0.1727,
	"step": 1520
	},
	{
	"epoch": 3.8517110266159698,
	"grad_norm": 0.5426180958747864,
	"learning_rate": 4.5801526717557256e-05,
	"loss": 0.2192,
	"step": 1521
	},
	{
	"epoch": 3.8542458808618507,
	"grad_norm": 0.6399853825569153,
	"learning_rate": 4.569974554707379e-05,
	"loss": 0.2387,
	"step": 1522
	},
	{
	"epoch": 3.8567807351077312,
	"grad_norm": 0.5311464667320251,
	"learning_rate": 4.5597964376590334e-05,
	"loss": 0.1976,
	"step": 1523
	},
	{
	"epoch": 3.859315589353612,
	"grad_norm": 0.5433202981948853,
	"learning_rate": 4.5496183206106874e-05,
	"loss": 0.1916,
	"step": 1524
	},
	{
	"epoch": 3.861850443599493,
	"grad_norm": 0.4024597704410553,
	"learning_rate": 4.539440203562341e-05,
	"loss": 0.1643,
	"step": 1525
	},
	{
	"epoch": 3.8643852978453737,
	"grad_norm": 0.347566157579422,
	"learning_rate": 4.529262086513995e-05,
	"loss": 0.1676,
	"step": 1526
	},
	{
	"epoch": 3.8669201520912546,
	"grad_norm": 0.45405861735343933,
	"learning_rate": 4.519083969465649e-05,
	"loss": 0.1963,
	"step": 1527
	},
	{
	"epoch": 3.8694550063371356,
	"grad_norm": 0.6430472731590271,
	"learning_rate": 4.508905852417303e-05,
	"loss": 0.2322,
	"step": 1528
	},
	{
	"epoch": 3.8719898605830165,
	"grad_norm": 0.4391939043998718,
	"learning_rate": 4.498727735368957e-05,
	"loss": 0.1871,
	"step": 1529
	},
	{
	"epoch": 3.8745247148288975,
	"grad_norm": 0.47301623225212097,
	"learning_rate": 4.488549618320611e-05,
	"loss": 0.1549,
	"step": 1530
	},
	{
	"epoch": 3.8770595690747784,
	"grad_norm": 0.4237573742866516,
	"learning_rate": 4.478371501272265e-05,
	"loss": 0.1548,
	"step": 1531
	},
	{
	"epoch": 3.879594423320659,
	"grad_norm": 0.5859849452972412,
	"learning_rate": 4.468193384223919e-05,
	"loss": 0.2023,
	"step": 1532
	},
	{
	"epoch": 3.88212927756654,
	"grad_norm": 0.45050573348999023,
	"learning_rate": 4.458015267175573e-05,
	"loss": 0.165,
	"step": 1533
	},
	{
	"epoch": 3.884664131812421,
	"grad_norm": 0.5347339510917664,
	"learning_rate": 4.447837150127227e-05,
	"loss": 0.1854,
	"step": 1534
	},
	{
	"epoch": 3.8871989860583014,
	"grad_norm": 0.375836580991745,
	"learning_rate": 4.4376590330788806e-05,
	"loss": 0.152,
	"step": 1535
	},
	{
	"epoch": 3.8897338403041823,
	"grad_norm": 0.5403718948364258,
	"learning_rate": 4.4274809160305345e-05,
	"loss": 0.2065,
	"step": 1536
	},
	{
	"epoch": 3.8922686945500633,
	"grad_norm": 0.5624736547470093,
	"learning_rate": 4.4173027989821885e-05,
	"loss": 0.1857,
	"step": 1537
	},
	{
	"epoch": 3.8948035487959443,
	"grad_norm": 0.5971560478210449,
	"learning_rate": 4.4071246819338424e-05,
	"loss": 0.1928,
	"step": 1538
	},
	{
	"epoch": 3.897338403041825,
	"grad_norm": 0.5225517153739929,
	"learning_rate": 4.396946564885496e-05,
	"loss": 0.2054,
	"step": 1539
	},
	{
	"epoch": 3.899873257287706,
	"grad_norm": 0.47341519594192505,
	"learning_rate": 4.38676844783715e-05,
	"loss": 0.1786,
	"step": 1540
	},
	{
	"epoch": 3.9024081115335867,
	"grad_norm": 0.3734676241874695,
	"learning_rate": 4.376590330788805e-05,
	"loss": 0.1447,
	"step": 1541
	},
	{
	"epoch": 3.9049429657794676,
	"grad_norm": 0.5003755688667297,
	"learning_rate": 4.366412213740458e-05,
	"loss": 0.1734,
	"step": 1542
	},
	{
	"epoch": 3.9074778200253486,
	"grad_norm": 0.41165000200271606,
	"learning_rate": 4.356234096692112e-05,
	"loss": 0.172,
	"step": 1543
	},
	{
	"epoch": 3.9100126742712296,
	"grad_norm": 0.45096197724342346,
	"learning_rate": 4.346055979643766e-05,
	"loss": 0.1726,
	"step": 1544
	},
	{
	"epoch": 3.91254752851711,
	"grad_norm": 0.5445842146873474,
	"learning_rate": 4.33587786259542e-05,
	"loss": 0.206,
	"step": 1545
	},
	{
	"epoch": 3.915082382762991,
	"grad_norm": 0.5139321088790894,
	"learning_rate": 4.325699745547074e-05,
	"loss": 0.1803,
	"step": 1546
	},
	{
	"epoch": 3.917617237008872,
	"grad_norm": 0.5652433633804321,
	"learning_rate": 4.3155216284987285e-05,
	"loss": 0.2051,
	"step": 1547
	},
	{
	"epoch": 3.920152091254753,
	"grad_norm": 0.38091734051704407,
	"learning_rate": 4.3053435114503824e-05,
	"loss": 0.1541,
	"step": 1548
	},
	{
	"epoch": 3.922686945500634,
	"grad_norm": 0.3614705801010132,
	"learning_rate": 4.2951653944020356e-05,
	"loss": 0.147,
	"step": 1549
	},
	{
	"epoch": 3.9252217997465144,
	"grad_norm": 0.4551761746406555,
	"learning_rate": 4.2849872773536896e-05,
	"loss": 0.1685,
	"step": 1550
	},
	{
	"epoch": 3.9277566539923954,
	"grad_norm": 0.5226624011993408,
	"learning_rate": 4.2748091603053435e-05,
	"loss": 0.1727,
	"step": 1551
	},
	{
	"epoch": 3.9302915082382763,
	"grad_norm": 0.3541867136955261,
	"learning_rate": 4.2646310432569974e-05,
	"loss": 0.1488,
	"step": 1552
	},
	{
	"epoch": 3.9328263624841573,
	"grad_norm": 0.4599204659461975,
	"learning_rate": 4.254452926208652e-05,
	"loss": 0.1536,
	"step": 1553
	},
	{
	"epoch": 3.935361216730038,
	"grad_norm": 0.45082637667655945,
	"learning_rate": 4.244274809160306e-05,
	"loss": 0.1671,
	"step": 1554
	},
	{
	"epoch": 3.9378960709759188,
	"grad_norm": 0.6053276658058167,
	"learning_rate": 4.23409669211196e-05,
	"loss": 0.2043,
	"step": 1555
	},
	{
	"epoch": 3.9404309252217997,
	"grad_norm": 0.506443440914154,
	"learning_rate": 4.223918575063613e-05,
	"loss": 0.1893,
	"step": 1556
	},
	{
	"epoch": 3.9429657794676807,
	"grad_norm": 0.6029784679412842,
	"learning_rate": 4.213740458015267e-05,
	"loss": 0.201,
	"step": 1557
	},
	{
	"epoch": 3.9455006337135616,
	"grad_norm": 0.3993350863456726,
	"learning_rate": 4.203562340966921e-05,
	"loss": 0.1637,
	"step": 1558
	},
	{
	"epoch": 3.9480354879594426,
	"grad_norm": 0.5887712836265564,
	"learning_rate": 4.193384223918575e-05,
	"loss": 0.2207,
	"step": 1559
	},
	{
	"epoch": 3.950570342205323,
	"grad_norm": 0.5538966059684753,
	"learning_rate": 4.1832061068702296e-05,
	"loss": 0.1674,
	"step": 1560
	},
	{
	"epoch": 3.953105196451204,
	"grad_norm": 0.4831174910068512,
	"learning_rate": 4.1730279898218835e-05,
	"loss": 0.1694,
	"step": 1561
	},
	{
	"epoch": 3.955640050697085,
	"grad_norm": 0.39700761437416077,
	"learning_rate": 4.1628498727735374e-05,
	"loss": 0.1695,
	"step": 1562
	},
	{
	"epoch": 3.9581749049429655,
	"grad_norm": 0.5388202667236328,
	"learning_rate": 4.152671755725191e-05,
	"loss": 0.1769,
	"step": 1563
	},
	{
	"epoch": 3.9607097591888465,
	"grad_norm": 0.5717085599899292,
	"learning_rate": 4.1424936386768446e-05,
	"loss": 0.2602,
	"step": 1564
	},
	{
	"epoch": 3.9632446134347274,
	"grad_norm": 0.4135623872280121,
	"learning_rate": 4.1323155216284985e-05,
	"loss": 0.1512,
	"step": 1565
	},
	{
	"epoch": 3.9657794676806084,
	"grad_norm": 0.478411465883255,
	"learning_rate": 4.122137404580153e-05,
	"loss": 0.1967,
	"step": 1566
	},
	{
	"epoch": 3.9683143219264894,
	"grad_norm": 0.4836915135383606,
	"learning_rate": 4.111959287531807e-05,
	"loss": 0.2297,
	"step": 1567
	},
	{
	"epoch": 3.9708491761723703,
	"grad_norm": 0.6355355978012085,
	"learning_rate": 4.101781170483461e-05,
	"loss": 0.2291,
	"step": 1568
	},
	{
	"epoch": 3.973384030418251,
	"grad_norm": 0.42811089754104614,
	"learning_rate": 4.091603053435115e-05,
	"loss": 0.1518,
	"step": 1569
	},
	{
	"epoch": 3.975918884664132,
	"grad_norm": 0.5778828859329224,
	"learning_rate": 4.081424936386768e-05,
	"loss": 0.1638,
	"step": 1570
	},
	{
	"epoch": 3.9784537389100127,
	"grad_norm": 0.4650358259677887,
	"learning_rate": 4.071246819338422e-05,
	"loss": 0.1658,
	"step": 1571
	},
	{
	"epoch": 3.9809885931558933,
	"grad_norm": 0.5939072966575623,
	"learning_rate": 4.061068702290077e-05,
	"loss": 0.2276,
	"step": 1572
	},
	{
	"epoch": 3.983523447401774,
	"grad_norm": 0.5296881794929504,
	"learning_rate": 4.050890585241731e-05,
	"loss": 0.1895,
	"step": 1573
	},
	{
	"epoch": 3.986058301647655,
	"grad_norm": 0.4479645788669586,
	"learning_rate": 4.0407124681933846e-05,
	"loss": 0.168,
	"step": 1574
	},
	{
	"epoch": 3.988593155893536,
	"grad_norm": 0.6041486859321594,
	"learning_rate": 4.0305343511450385e-05,
	"loss": 0.2225,
	"step": 1575
	},
	{
	"epoch": 3.991128010139417,
	"grad_norm": 1.0764771699905396,
	"learning_rate": 4.0203562340966925e-05,
	"loss": 0.1736,
	"step": 1576
	},
	{
	"epoch": 3.993662864385298,
	"grad_norm": 0.4830266535282135,
	"learning_rate": 4.010178117048346e-05,
	"loss": 0.2017,
	"step": 1577
	},
	{
	"epoch": 3.9961977186311786,
	"grad_norm": 0.4032004773616791,
	"learning_rate": 4e-05,
	"loss": 0.1723,
	"step": 1578
	},
	{
	"epoch": 3.9987325728770595,
	"grad_norm": 0.4441380798816681,
	"learning_rate": 3.989821882951654e-05,
	"loss": 0.1714,
	"step": 1579
	},
	{
	"epoch": 4.0,
	"grad_norm": 0.673060953617096,
	"learning_rate": 3.979643765903308e-05,
	"loss": 0.1651,
	"step": 1580
	},
	{
	"epoch": 4.002534854245881,
	"grad_norm": 0.5185714960098267,
	"learning_rate": 3.969465648854962e-05,
	"loss": 0.1877,
	"step": 1581
	},
	{
	"epoch": 4.005069708491762,
	"grad_norm": 0.4302978217601776,
	"learning_rate": 3.959287531806616e-05,
	"loss": 0.1575,
	"step": 1582
	},
	{
	"epoch": 4.007604562737643,
	"grad_norm": 0.45982813835144043,
	"learning_rate": 3.94910941475827e-05,
	"loss": 0.1615,
	"step": 1583
	},
	{
	"epoch": 4.010139416983524,
	"grad_norm": 0.4118313789367676,
	"learning_rate": 3.938931297709924e-05,
	"loss": 0.1508,
	"step": 1584
	},
	{
	"epoch": 4.012674271229404,
	"grad_norm": 0.6039855480194092,
	"learning_rate": 3.928753180661578e-05,
	"loss": 0.1782,
	"step": 1585
	},
	{
	"epoch": 4.015209125475285,
	"grad_norm": 0.4311355948448181,
	"learning_rate": 3.918575063613232e-05,
	"loss": 0.1488,
	"step": 1586
	},
	{
	"epoch": 4.017743979721166,
	"grad_norm": 0.7398537993431091,
	"learning_rate": 3.908396946564886e-05,
	"loss": 0.1879,
	"step": 1587
	},
	{
	"epoch": 4.020278833967047,
	"grad_norm": 0.37064164876937866,
	"learning_rate": 3.8982188295165396e-05,
	"loss": 0.1257,
	"step": 1588
	},
	{
	"epoch": 4.022813688212928,
	"grad_norm": 0.46931344270706177,
	"learning_rate": 3.8880407124681936e-05,
	"loss": 0.1579,
	"step": 1589
	},
	{
	"epoch": 4.025348542458809,
	"grad_norm": 0.4544156789779663,
	"learning_rate": 3.8778625954198475e-05,
	"loss": 0.134,
	"step": 1590
	},
	{
	"epoch": 4.02788339670469,
	"grad_norm": 0.5562132000923157,
	"learning_rate": 3.8676844783715014e-05,
	"loss": 0.1488,
	"step": 1591
	},
	{
	"epoch": 4.030418250950571,
	"grad_norm": 0.5679481625556946,
	"learning_rate": 3.8575063613231554e-05,
	"loss": 0.1322,
	"step": 1592
	},
	{
	"epoch": 4.032953105196452,
	"grad_norm": 0.6101714372634888,
	"learning_rate": 3.847328244274809e-05,
	"loss": 0.1534,
	"step": 1593
	},
	{
	"epoch": 4.035487959442332,
	"grad_norm": 0.8060622215270996,
	"learning_rate": 3.837150127226463e-05,
	"loss": 0.1986,
	"step": 1594
	},
	{
	"epoch": 4.038022813688213,
	"grad_norm": 0.5501425266265869,
	"learning_rate": 3.826972010178117e-05,
	"loss": 0.1444,
	"step": 1595
	},
	{
	"epoch": 4.0405576679340935,
	"grad_norm": 0.5117461085319519,
	"learning_rate": 3.816793893129771e-05,
	"loss": 0.1259,
	"step": 1596
	},
	{
	"epoch": 4.0430925221799745,
	"grad_norm": 0.571770429611206,
	"learning_rate": 3.806615776081425e-05,
	"loss": 0.1413,
	"step": 1597
	},
	{
	"epoch": 4.0456273764258555,
	"grad_norm": 0.7756439447402954,
	"learning_rate": 3.796437659033079e-05,
	"loss": 0.1874,
	"step": 1598
	},
	{
	"epoch": 4.048162230671736,
	"grad_norm": 0.6393389701843262,
	"learning_rate": 3.786259541984733e-05,
	"loss": 0.1226,
	"step": 1599
	},
	{
	"epoch": 4.050697084917617,
	"grad_norm": 0.7177454233169556,
	"learning_rate": 3.776081424936387e-05,
	"loss": 0.1382,
	"step": 1600
	},
	{
	"epoch": 4.053231939163498,
	"grad_norm": 0.6561391353607178,
	"learning_rate": 3.765903307888041e-05,
	"loss": 0.1557,
	"step": 1601
	},
	{
	"epoch": 4.055766793409379,
	"grad_norm": 0.8319444060325623,
	"learning_rate": 3.7557251908396954e-05,
	"loss": 0.1608,
	"step": 1602
	},
	{
	"epoch": 4.05830164765526,
	"grad_norm": 0.7468693852424622,
	"learning_rate": 3.745547073791349e-05,
	"loss": 0.1442,
	"step": 1603
	},
	{
	"epoch": 4.06083650190114,
	"grad_norm": 0.623657763004303,
	"learning_rate": 3.7353689567430025e-05,
	"loss": 0.1395,
	"step": 1604
	},
	{
	"epoch": 4.063371356147021,
	"grad_norm": 0.5870152115821838,
	"learning_rate": 3.7251908396946565e-05,
	"loss": 0.1322,
	"step": 1605
	},
	{
	"epoch": 4.065906210392902,
	"grad_norm": 0.6840811371803284,
	"learning_rate": 3.7150127226463104e-05,
	"loss": 0.132,
	"step": 1606
	},
	{
	"epoch": 4.068441064638783,
	"grad_norm": 0.6177504658699036,
	"learning_rate": 3.704834605597964e-05,
	"loss": 0.1265,
	"step": 1607
	},
	{
	"epoch": 4.070975918884664,
	"grad_norm": 0.6908831000328064,
	"learning_rate": 3.694656488549619e-05,
	"loss": 0.1593,
	"step": 1608
	},
	{
	"epoch": 4.073510773130545,
	"grad_norm": 0.787434458732605,
	"learning_rate": 3.684478371501273e-05,
	"loss": 0.1184,
	"step": 1609
	},
	{
	"epoch": 4.076045627376426,
	"grad_norm": 0.8011195063591003,
	"learning_rate": 3.674300254452927e-05,
	"loss": 0.1341,
	"step": 1610
	},
	{
	"epoch": 4.078580481622307,
	"grad_norm": 0.5523831248283386,
	"learning_rate": 3.66412213740458e-05,
	"loss": 0.1283,
	"step": 1611
	},
	{
	"epoch": 4.081115335868188,
	"grad_norm": 0.6396963596343994,
	"learning_rate": 3.653944020356234e-05,
	"loss": 0.1424,
	"step": 1612
	},
	{
	"epoch": 4.083650190114068,
	"grad_norm": 0.7471883893013,
	"learning_rate": 3.643765903307888e-05,
	"loss": 0.1627,
	"step": 1613
	},
	{
	"epoch": 4.086185044359949,
	"grad_norm": 0.5498061776161194,
	"learning_rate": 3.633587786259542e-05,
	"loss": 0.1478,
	"step": 1614
	},
	{
	"epoch": 4.08871989860583,
	"grad_norm": 0.6853391528129578,
	"learning_rate": 3.6234096692111965e-05,
	"loss": 0.1588,
	"step": 1615
	},
	{
	"epoch": 4.091254752851711,
	"grad_norm": 0.6638361811637878,
	"learning_rate": 3.6132315521628504e-05,
	"loss": 0.1695,
	"step": 1616
	},
	{
	"epoch": 4.093789607097592,
	"grad_norm": 0.6155263781547546,
	"learning_rate": 3.603053435114504e-05,
	"loss": 0.1355,
	"step": 1617
	},
	{
	"epoch": 4.096324461343473,
	"grad_norm": 0.574590265750885,
	"learning_rate": 3.5928753180661576e-05,
	"loss": 0.1498,
	"step": 1618
	},
	{
	"epoch": 4.098859315589354,
	"grad_norm": 0.5972251296043396,
	"learning_rate": 3.5826972010178115e-05,
	"loss": 0.1684,
	"step": 1619
	},
	{
	"epoch": 4.101394169835235,
	"grad_norm": 0.668618381023407,
	"learning_rate": 3.5725190839694654e-05,
	"loss": 0.1377,
	"step": 1620
	},
	{
	"epoch": 4.103929024081116,
	"grad_norm": 0.6238232851028442,
	"learning_rate": 3.56234096692112e-05,
	"loss": 0.2025,
	"step": 1621
	},
	{
	"epoch": 4.106463878326996,
	"grad_norm": 0.9182467460632324,
	"learning_rate": 3.552162849872774e-05,
	"loss": 0.1539,
	"step": 1622
	},
	{
	"epoch": 4.108998732572877,
	"grad_norm": 0.6368919014930725,
	"learning_rate": 3.541984732824428e-05,
	"loss": 0.1421,
	"step": 1623
	},
	{
	"epoch": 4.111533586818758,
	"grad_norm": 0.7871132493019104,
	"learning_rate": 3.531806615776082e-05,
	"loss": 0.1482,
	"step": 1624
	},
	{
	"epoch": 4.114068441064639,
	"grad_norm": 0.7697343230247498,
	"learning_rate": 3.521628498727735e-05,
	"loss": 0.1607,
	"step": 1625
	},
	{
	"epoch": 4.11660329531052,
	"grad_norm": 0.5805296897888184,
	"learning_rate": 3.511450381679389e-05,
	"loss": 0.1497,
	"step": 1626
	},
	{
	"epoch": 4.119138149556401,
	"grad_norm": 0.6484183073043823,
	"learning_rate": 3.5012722646310436e-05,
	"loss": 0.1827,
	"step": 1627
	},
	{
	"epoch": 4.1216730038022815,
	"grad_norm": 1.0351064205169678,
	"learning_rate": 3.4910941475826976e-05,
	"loss": 0.2331,
	"step": 1628
	},
	{
	"epoch": 4.1242078580481625,
	"grad_norm": 0.620452344417572,
	"learning_rate": 3.4809160305343515e-05,
	"loss": 0.1516,
	"step": 1629
	},
	{
	"epoch": 4.126742712294043,
	"grad_norm": 0.6269112229347229,
	"learning_rate": 3.4707379134860054e-05,
	"loss": 0.1322,
	"step": 1630
	},
	{
	"epoch": 4.129277566539924,
	"grad_norm": 0.7780957221984863,
	"learning_rate": 3.4605597964376594e-05,
	"loss": 0.1974,
	"step": 1631
	},
	{
	"epoch": 4.1318124207858045,
	"grad_norm": 0.6183624267578125,
	"learning_rate": 3.4503816793893126e-05,
	"loss": 0.1423,
	"step": 1632
	},
	{
	"epoch": 4.134347275031685,
	"grad_norm": 0.715943455696106,
	"learning_rate": 3.440203562340967e-05,
	"loss": 0.1422,
	"step": 1633
	},
	{
	"epoch": 4.136882129277566,
	"grad_norm": 0.6383997201919556,
	"learning_rate": 3.430025445292621e-05,
	"loss": 0.1566,
	"step": 1634
	},
	{
	"epoch": 4.139416983523447,
	"grad_norm": 0.6354379653930664,
	"learning_rate": 3.419847328244275e-05,
	"loss": 0.14,
	"step": 1635
	},
	{
	"epoch": 4.141951837769328,
	"grad_norm": 0.5692049264907837,
	"learning_rate": 3.409669211195929e-05,
	"loss": 0.1315,
	"step": 1636
	},
	{
	"epoch": 4.144486692015209,
	"grad_norm": 0.5286855697631836,
	"learning_rate": 3.399491094147583e-05,
	"loss": 0.119,
	"step": 1637
	},
	{
	"epoch": 4.14702154626109,
	"grad_norm": 0.6007808446884155,
	"learning_rate": 3.389312977099237e-05,
	"loss": 0.1368,
	"step": 1638
	},
	{
	"epoch": 4.149556400506971,
	"grad_norm": 0.8727791905403137,
	"learning_rate": 3.379134860050891e-05,
	"loss": 0.1635,
	"step": 1639
	},
	{
	"epoch": 4.152091254752852,
	"grad_norm": 0.7203207015991211,
	"learning_rate": 3.368956743002545e-05,
	"loss": 0.1668,
	"step": 1640
	},
	{
	"epoch": 4.154626108998732,
	"grad_norm": 0.7178492546081543,
	"learning_rate": 3.358778625954199e-05,
	"loss": 0.1601,
	"step": 1641
	},
	{
	"epoch": 4.157160963244613,
	"grad_norm": 0.6133365035057068,
	"learning_rate": 3.3486005089058526e-05,
	"loss": 0.1438,
	"step": 1642
	},
	{
	"epoch": 4.159695817490494,
	"grad_norm": 0.690122127532959,
	"learning_rate": 3.3384223918575065e-05,
	"loss": 0.1592,
	"step": 1643
	},
	{
	"epoch": 4.162230671736375,
	"grad_norm": 0.5469484925270081,
	"learning_rate": 3.3282442748091605e-05,
	"loss": 0.1499,
	"step": 1644
	},
	{
	"epoch": 4.164765525982256,
	"grad_norm": 0.7380850911140442,
	"learning_rate": 3.3180661577608144e-05,
	"loss": 0.1724,
	"step": 1645
	},
	{
	"epoch": 4.167300380228137,
	"grad_norm": 0.6949165463447571,
	"learning_rate": 3.307888040712468e-05,
	"loss": 0.1642,
	"step": 1646
	},
	{
	"epoch": 4.169835234474018,
	"grad_norm": 0.6445840001106262,
	"learning_rate": 3.297709923664122e-05,
	"loss": 0.1576,
	"step": 1647
	},
	{
	"epoch": 4.172370088719899,
	"grad_norm": 0.577178418636322,
	"learning_rate": 3.287531806615776e-05,
	"loss": 0.1482,
	"step": 1648
	},
	{
	"epoch": 4.17490494296578,
	"grad_norm": 0.5232000350952148,
	"learning_rate": 3.27735368956743e-05,
	"loss": 0.1385,
	"step": 1649
	},
	{
	"epoch": 4.17743979721166,
	"grad_norm": 0.8429796695709229,
	"learning_rate": 3.267175572519084e-05,
	"loss": 0.2456,
	"step": 1650
	},
	{
	"epoch": 4.179974651457541,
	"grad_norm": 0.5647293925285339,
	"learning_rate": 3.256997455470738e-05,
	"loss": 0.1482,
	"step": 1651
	},
	{
	"epoch": 4.182509505703422,
	"grad_norm": 0.7679947018623352,
	"learning_rate": 3.246819338422392e-05,
	"loss": 0.1705,
	"step": 1652
	},
	{
	"epoch": 4.185044359949303,
	"grad_norm": 0.7913497686386108,
	"learning_rate": 3.236641221374046e-05,
	"loss": 0.2133,
	"step": 1653
	},
	{
	"epoch": 4.187579214195184,
	"grad_norm": 0.5105036497116089,
	"learning_rate": 3.2264631043257e-05,
	"loss": 0.1335,
	"step": 1654
	},
	{
	"epoch": 4.190114068441065,
	"grad_norm": 0.6503207087516785,
	"learning_rate": 3.216284987277354e-05,
	"loss": 0.1872,
	"step": 1655
	},
	{
	"epoch": 4.192648922686946,
	"grad_norm": 0.9579104781150818,
	"learning_rate": 3.2061068702290076e-05,
	"loss": 0.1985,
	"step": 1656
	},
	{
	"epoch": 4.195183776932827,
	"grad_norm": 0.5334345698356628,
	"learning_rate": 3.195928753180662e-05,
	"loss": 0.137,
	"step": 1657
	},
	{
	"epoch": 4.197718631178708,
	"grad_norm": 0.7031605243682861,
	"learning_rate": 3.185750636132316e-05,
	"loss": 0.1574,
	"step": 1658
	},
	{
	"epoch": 4.200253485424588,
	"grad_norm": 0.6237590909004211,
	"learning_rate": 3.1755725190839694e-05,
	"loss": 0.1686,
	"step": 1659
	},
	{
	"epoch": 4.202788339670469,
	"grad_norm": 0.827680230140686,
	"learning_rate": 3.1653944020356234e-05,
	"loss": 0.1765,
	"step": 1660
	},
	{
	"epoch": 4.20532319391635,
	"grad_norm": 0.6170578002929688,
	"learning_rate": 3.155216284987277e-05,
	"loss": 0.1699,
	"step": 1661
	},
	{
	"epoch": 4.2078580481622305,
	"grad_norm": 0.600803017616272,
	"learning_rate": 3.145038167938931e-05,
	"loss": 0.1345,
	"step": 1662
	},
	{
	"epoch": 4.2103929024081115,
	"grad_norm": 0.5505921840667725,
	"learning_rate": 3.134860050890586e-05,
	"loss": 0.1418,
	"step": 1663
	},
	{
	"epoch": 4.212927756653992,
	"grad_norm": 0.5893916487693787,
	"learning_rate": 3.12468193384224e-05,
	"loss": 0.1414,
	"step": 1664
	},
	{
	"epoch": 4.215462610899873,
	"grad_norm": 0.7622592449188232,
	"learning_rate": 3.114503816793894e-05,
	"loss": 0.1568,
	"step": 1665
	},
	{
	"epoch": 4.217997465145754,
	"grad_norm": 0.6462287306785583,
	"learning_rate": 3.104325699745547e-05,
	"loss": 0.1641,
	"step": 1666
	},
	{
	"epoch": 4.220532319391635,
	"grad_norm": 0.4971311092376709,
	"learning_rate": 3.094147582697201e-05,
	"loss": 0.1276,
	"step": 1667
	},
	{
	"epoch": 4.223067173637516,
	"grad_norm": 0.7270475029945374,
	"learning_rate": 3.083969465648855e-05,
	"loss": 0.1603,
	"step": 1668
	},
	{
	"epoch": 4.225602027883396,
	"grad_norm": 0.5765766501426697,
	"learning_rate": 3.0737913486005094e-05,
	"loss": 0.1341,
	"step": 1669
	},
	{
	"epoch": 4.228136882129277,
	"grad_norm": 0.577694296836853,
	"learning_rate": 3.0636132315521633e-05,
	"loss": 0.1415,
	"step": 1670
	},
	{
	"epoch": 4.230671736375158,
	"grad_norm": 0.6085098385810852,
	"learning_rate": 3.053435114503817e-05,
	"loss": 0.1359,
	"step": 1671
	},
	{
	"epoch": 4.233206590621039,
	"grad_norm": 0.6224119663238525,
	"learning_rate": 3.043256997455471e-05,
	"loss": 0.1494,
	"step": 1672
	},
	{
	"epoch": 4.23574144486692,
	"grad_norm": 0.4535973072052002,
	"learning_rate": 3.0330788804071248e-05,
	"loss": 0.1415,
	"step": 1673
	},
	{
	"epoch": 4.238276299112801,
	"grad_norm": 0.6283777356147766,
	"learning_rate": 3.0229007633587787e-05,
	"loss": 0.1569,
	"step": 1674
	},
	{
	"epoch": 4.240811153358682,
	"grad_norm": 0.6005566120147705,
	"learning_rate": 3.0127226463104323e-05,
	"loss": 0.1385,
	"step": 1675
	},
	{
	"epoch": 4.243346007604563,
	"grad_norm": 0.6437854766845703,
	"learning_rate": 3.002544529262087e-05,
	"loss": 0.1584,
	"step": 1676
	},
	{
	"epoch": 4.245880861850444,
	"grad_norm": 0.5184986591339111,
	"learning_rate": 2.992366412213741e-05,
	"loss": 0.1384,
	"step": 1677
	},
	{
	"epoch": 4.248415716096324,
	"grad_norm": 0.5969160199165344,
	"learning_rate": 2.9821882951653945e-05,
	"loss": 0.1609,
	"step": 1678
	},
	{
	"epoch": 4.250950570342205,
	"grad_norm": 0.85272616147995,
	"learning_rate": 2.9720101781170484e-05,
	"loss": 0.178,
	"step": 1679
	},
	{
	"epoch": 4.253485424588086,
	"grad_norm": 0.5351912379264832,
	"learning_rate": 2.9618320610687023e-05,
	"loss": 0.1465,
	"step": 1680
	},
	{
	"epoch": 4.256020278833967,
	"grad_norm": 0.5821883678436279,
	"learning_rate": 2.9516539440203562e-05,
	"loss": 0.135,
	"step": 1681
	},
	{
	"epoch": 4.258555133079848,
	"grad_norm": 0.5453548431396484,
	"learning_rate": 2.9414758269720105e-05,
	"loss": 0.1287,
	"step": 1682
	},
	{
	"epoch": 4.261089987325729,
	"grad_norm": 0.6280243396759033,
	"learning_rate": 2.9312977099236644e-05,
	"loss": 0.152,
	"step": 1683
	},
	{
	"epoch": 4.26362484157161,
	"grad_norm": 0.5709437131881714,
	"learning_rate": 2.9211195928753184e-05,
	"loss": 0.1487,
	"step": 1684
	},
	{
	"epoch": 4.266159695817491,
	"grad_norm": 0.4667048752307892,
	"learning_rate": 2.910941475826972e-05,
	"loss": 0.129,
	"step": 1685
	},
	{
	"epoch": 4.268694550063372,
	"grad_norm": 0.5744767189025879,
	"learning_rate": 2.900763358778626e-05,
	"loss": 0.1668,
	"step": 1686
	},
	{
	"epoch": 4.271229404309253,
	"grad_norm": 0.552631139755249,
	"learning_rate": 2.89058524173028e-05,
	"loss": 0.128,
	"step": 1687
	},
	{
	"epoch": 4.273764258555133,
	"grad_norm": 0.46616679430007935,
	"learning_rate": 2.880407124681934e-05,
	"loss": 0.1168,
	"step": 1688
	},
	{
	"epoch": 4.276299112801014,
	"grad_norm": 0.7842658758163452,
	"learning_rate": 2.870229007633588e-05,
	"loss": 0.1617,
	"step": 1689
	},
	{
	"epoch": 4.278833967046895,
	"grad_norm": 0.5530945062637329,
	"learning_rate": 2.860050890585242e-05,
	"loss": 0.1619,
	"step": 1690
	},
	{
	"epoch": 4.281368821292776,
	"grad_norm": 0.9341786503791809,
	"learning_rate": 2.849872773536896e-05,
	"loss": 0.231,
	"step": 1691
	},
	{
	"epoch": 4.283903675538657,
	"grad_norm": 0.8043704032897949,
	"learning_rate": 2.8396946564885495e-05,
	"loss": 0.1826,
	"step": 1692
	},
	{
	"epoch": 4.2864385297845375,
	"grad_norm": 0.4446638524532318,
	"learning_rate": 2.8295165394402034e-05,
	"loss": 0.1413,
	"step": 1693
	},
	{
	"epoch": 4.2889733840304185,
	"grad_norm": 0.6845833659172058,
	"learning_rate": 2.8193384223918577e-05,
	"loss": 0.1577,
	"step": 1694
	},
	{
	"epoch": 4.2915082382762995,
	"grad_norm": 0.6702572107315063,
	"learning_rate": 2.8091603053435116e-05,
	"loss": 0.1714,
	"step": 1695
	},
	{
	"epoch": 4.29404309252218,
	"grad_norm": 0.6405001282691956,
	"learning_rate": 2.7989821882951656e-05,
	"loss": 0.1527,
	"step": 1696
	},
	{
	"epoch": 4.2965779467680605,
	"grad_norm": 0.6155828833580017,
	"learning_rate": 2.7888040712468195e-05,
	"loss": 0.1471,
	"step": 1697
	},
	{
	"epoch": 4.299112801013941,
	"grad_norm": 0.5606924295425415,
	"learning_rate": 2.7786259541984734e-05,
	"loss": 0.1331,
	"step": 1698
	},
	{
	"epoch": 4.301647655259822,
	"grad_norm": 0.7498462200164795,
	"learning_rate": 2.768447837150127e-05,
	"loss": 0.1713,
	"step": 1699
	},
	{
	"epoch": 4.304182509505703,
	"grad_norm": 0.6262723803520203,
	"learning_rate": 2.7582697201017816e-05,
	"loss": 0.1585,
	"step": 1700
	},
	{
	"epoch": 4.306717363751584,
	"grad_norm": 0.6729116439819336,
	"learning_rate": 2.7480916030534355e-05,
	"loss": 0.1347,
	"step": 1701
	},
	{
	"epoch": 4.309252217997465,
	"grad_norm": 0.7870539426803589,
	"learning_rate": 2.737913486005089e-05,
	"loss": 0.1512,
	"step": 1702
	},
	{
	"epoch": 4.311787072243346,
	"grad_norm": 0.4943903684616089,
	"learning_rate": 2.727735368956743e-05,
	"loss": 0.1274,
	"step": 1703
	},
	{
	"epoch": 4.314321926489227,
	"grad_norm": 0.4763108193874359,
	"learning_rate": 2.717557251908397e-05,
	"loss": 0.1228,
	"step": 1704
	},
	{
	"epoch": 4.316856780735108,
	"grad_norm": 0.6400578618049622,
	"learning_rate": 2.707379134860051e-05,
	"loss": 0.1558,
	"step": 1705
	},
	{
	"epoch": 4.319391634980988,
	"grad_norm": 0.5445212125778198,
	"learning_rate": 2.6972010178117052e-05,
	"loss": 0.1328,
	"step": 1706
	},
	{
	"epoch": 4.321926489226869,
	"grad_norm": 0.6329374313354492,
	"learning_rate": 2.687022900763359e-05,
	"loss": 0.1615,
	"step": 1707
	},
	{
	"epoch": 4.32446134347275,
	"grad_norm": 0.5299343466758728,
	"learning_rate": 2.676844783715013e-05,
	"loss": 0.122,
	"step": 1708
	},
	{
	"epoch": 4.326996197718631,
	"grad_norm": 0.6486507058143616,
	"learning_rate": 2.6666666666666667e-05,
	"loss": 0.1553,
	"step": 1709
	},
	{
	"epoch": 4.329531051964512,
	"grad_norm": 0.6306889653205872,
	"learning_rate": 2.6564885496183206e-05,
	"loss": 0.1638,
	"step": 1710
	},
	{
	"epoch": 4.332065906210393,
	"grad_norm": 0.6417018175125122,
	"learning_rate": 2.6463104325699745e-05,
	"loss": 0.1404,
	"step": 1711
	},
	{
	"epoch": 4.334600760456274,
	"grad_norm": 0.7283552289009094,
	"learning_rate": 2.6361323155216288e-05,
	"loss": 0.1837,
	"step": 1712
	},
	{
	"epoch": 4.337135614702155,
	"grad_norm": 0.7142099142074585,
	"learning_rate": 2.6259541984732827e-05,
	"loss": 0.1535,
	"step": 1713
	},
	{
	"epoch": 4.339670468948036,
	"grad_norm": 0.6059632897377014,
	"learning_rate": 2.6157760814249367e-05,
	"loss": 0.1551,
	"step": 1714
	},
	{
	"epoch": 4.342205323193916,
	"grad_norm": 0.6492133140563965,
	"learning_rate": 2.6055979643765906e-05,
	"loss": 0.1413,
	"step": 1715
	},
	{
	"epoch": 4.344740177439797,
	"grad_norm": 0.7166099548339844,
	"learning_rate": 2.5954198473282442e-05,
	"loss": 0.1534,
	"step": 1716
	},
	{
	"epoch": 4.347275031685678,
	"grad_norm": 0.6357300877571106,
	"learning_rate": 2.585241730279898e-05,
	"loss": 0.1445,
	"step": 1717
	},
	{
	"epoch": 4.349809885931559,
	"grad_norm": 0.6684461236000061,
	"learning_rate": 2.5750636132315524e-05,
	"loss": 0.1469,
	"step": 1718
	},
	{
	"epoch": 4.35234474017744,
	"grad_norm": 0.7808713912963867,
	"learning_rate": 2.5648854961832063e-05,
	"loss": 0.1892,
	"step": 1719
	},
	{
	"epoch": 4.354879594423321,
	"grad_norm": 0.6660336852073669,
	"learning_rate": 2.5547073791348602e-05,
	"loss": 0.1545,
	"step": 1720
	},
	{
	"epoch": 4.357414448669202,
	"grad_norm": 0.7266603112220764,
	"learning_rate": 2.5445292620865142e-05,
	"loss": 0.1346,
	"step": 1721
	},
	{
	"epoch": 4.359949302915083,
	"grad_norm": 0.5710493326187134,
	"learning_rate": 2.534351145038168e-05,
	"loss": 0.1199,
	"step": 1722
	},
	{
	"epoch": 4.362484157160964,
	"grad_norm": 0.6178765296936035,
	"learning_rate": 2.5241730279898217e-05,
	"loss": 0.1416,
	"step": 1723
	},
	{
	"epoch": 4.365019011406844,
	"grad_norm": 0.5881832242012024,
	"learning_rate": 2.5139949109414763e-05,
	"loss": 0.1389,
	"step": 1724
	},
	{
	"epoch": 4.367553865652725,
	"grad_norm": 0.5589767694473267,
	"learning_rate": 2.50381679389313e-05,
	"loss": 0.1356,
	"step": 1725
	},
	{
	"epoch": 4.370088719898606,
	"grad_norm": 0.611072301864624,
	"learning_rate": 2.4936386768447838e-05,
	"loss": 0.1618,
	"step": 1726
	},
	{
	"epoch": 4.3726235741444865,
	"grad_norm": 1.0045723915100098,
	"learning_rate": 2.4834605597964378e-05,
	"loss": 0.2004,
	"step": 1727
	},
	{
	"epoch": 4.3751584283903675,
	"grad_norm": 1.0154621601104736,
	"learning_rate": 2.4732824427480917e-05,
	"loss": 0.1593,
	"step": 1728
	},
	{
	"epoch": 4.3776932826362485,
	"grad_norm": 0.7933842539787292,
	"learning_rate": 2.4631043256997456e-05,
	"loss": 0.183,
	"step": 1729
	},
	{
	"epoch": 4.380228136882129,
	"grad_norm": 0.8141732811927795,
	"learning_rate": 2.4529262086513996e-05,
	"loss": 0.1412,
	"step": 1730
	},
	{
	"epoch": 4.38276299112801,
	"grad_norm": 0.6575155854225159,
	"learning_rate": 2.4427480916030535e-05,
	"loss": 0.1592,
	"step": 1731
	},
	{
	"epoch": 4.385297845373891,
	"grad_norm": 0.7710108757019043,
	"learning_rate": 2.4325699745547078e-05,
	"loss": 0.2306,
	"step": 1732
	},
	{
	"epoch": 4.387832699619771,
	"grad_norm": 0.6438276767730713,
	"learning_rate": 2.4223918575063613e-05,
	"loss": 0.143,
	"step": 1733
	},
	{
	"epoch": 4.390367553865652,
	"grad_norm": 0.7019467949867249,
	"learning_rate": 2.4122137404580153e-05,
	"loss": 0.1641,
	"step": 1734
	},
	{
	"epoch": 4.392902408111533,
	"grad_norm": 0.598584771156311,
	"learning_rate": 2.4020356234096695e-05,
	"loss": 0.1456,
	"step": 1735
	},
	{
	"epoch": 4.395437262357414,
	"grad_norm": 0.6024305820465088,
	"learning_rate": 2.391857506361323e-05,
	"loss": 0.1287,
	"step": 1736
	},
	{
	"epoch": 4.397972116603295,
	"grad_norm": 0.8446558713912964,
	"learning_rate": 2.381679389312977e-05,
	"loss": 0.1705,
	"step": 1737
	},
	{
	"epoch": 4.400506970849176,
	"grad_norm": 0.5697831511497498,
	"learning_rate": 2.3715012722646313e-05,
	"loss": 0.1386,
	"step": 1738
	},
	{
	"epoch": 4.403041825095057,
	"grad_norm": 0.6655327677726746,
	"learning_rate": 2.3613231552162853e-05,
	"loss": 0.186,
	"step": 1739
	},
	{
	"epoch": 4.405576679340938,
	"grad_norm": 1.1001065969467163,
	"learning_rate": 2.351145038167939e-05,
	"loss": 0.2531,
	"step": 1740
	},
	{
	"epoch": 4.408111533586819,
	"grad_norm": 0.5302372574806213,
	"learning_rate": 2.340966921119593e-05,
	"loss": 0.1342,
	"step": 1741
	},
	{
	"epoch": 4.4106463878327,
	"grad_norm": 0.6450605392456055,
	"learning_rate": 2.330788804071247e-05,
	"loss": 0.1499,
	"step": 1742
	},
	{
	"epoch": 4.41318124207858,
	"grad_norm": 0.5733135342597961,
	"learning_rate": 2.3206106870229007e-05,
	"loss": 0.166,
	"step": 1743
	},
	{
	"epoch": 4.415716096324461,
	"grad_norm": 0.609865665435791,
	"learning_rate": 2.310432569974555e-05,
	"loss": 0.1306,
	"step": 1744
	},
	{
	"epoch": 4.418250950570342,
	"grad_norm": 0.5957082509994507,
	"learning_rate": 2.300254452926209e-05,
	"loss": 0.1309,
	"step": 1745
	},
	{
	"epoch": 4.420785804816223,
	"grad_norm": 0.5951780080795288,
	"learning_rate": 2.2900763358778628e-05,
	"loss": 0.1366,
	"step": 1746
	},
	{
	"epoch": 4.423320659062104,
	"grad_norm": 0.7225191593170166,
	"learning_rate": 2.2798982188295167e-05,
	"loss": 0.1825,
	"step": 1747
	},
	{
	"epoch": 4.425855513307985,
	"grad_norm": 0.6427996158599854,
	"learning_rate": 2.2697201017811707e-05,
	"loss": 0.1326,
	"step": 1748
	},
	{
	"epoch": 4.428390367553866,
	"grad_norm": 0.49267786741256714,
	"learning_rate": 2.2595419847328246e-05,
	"loss": 0.1367,
	"step": 1749
	},
	{
	"epoch": 4.430925221799747,
	"grad_norm": 0.5365452766418457,
	"learning_rate": 2.2493638676844785e-05,
	"loss": 0.1456,
	"step": 1750
	},
	{
	"epoch": 4.433460076045628,
	"grad_norm": 0.65265291929245,
	"learning_rate": 2.2391857506361324e-05,
	"loss": 0.1379,
	"step": 1751
	},
	{
	"epoch": 4.435994930291509,
	"grad_norm": 0.5401502847671509,
	"learning_rate": 2.2290076335877864e-05,
	"loss": 0.1293,
	"step": 1752
	},
	{
	"epoch": 4.438529784537389,
	"grad_norm": 0.6832171678543091,
	"learning_rate": 2.2188295165394403e-05,
	"loss": 0.1448,
	"step": 1753
	},
	{
	"epoch": 4.44106463878327,
	"grad_norm": 0.8080681562423706,
	"learning_rate": 2.2086513994910942e-05,
	"loss": 0.1832,
	"step": 1754
	},
	{
	"epoch": 4.443599493029151,
	"grad_norm": 0.6201688051223755,
	"learning_rate": 2.198473282442748e-05,
	"loss": 0.159,
	"step": 1755
	},
	{
	"epoch": 4.446134347275032,
	"grad_norm": 0.8549275994300842,
	"learning_rate": 2.1882951653944024e-05,
	"loss": 0.2103,
	"step": 1756
	},
	{
	"epoch": 4.448669201520913,
	"grad_norm": 0.5879942178726196,
	"learning_rate": 2.178117048346056e-05,
	"loss": 0.1524,
	"step": 1757
	},
	{
	"epoch": 4.451204055766794,
	"grad_norm": 0.6592312455177307,
	"learning_rate": 2.16793893129771e-05,
	"loss": 0.1535,
	"step": 1758
	},
	{
	"epoch": 4.4537389100126745,
	"grad_norm": 0.6493979096412659,
	"learning_rate": 2.1577608142493642e-05,
	"loss": 0.1451,
	"step": 1759
	},
	{
	"epoch": 4.4562737642585555,
	"grad_norm": 0.7973134517669678,
	"learning_rate": 2.1475826972010178e-05,
	"loss": 0.1519,
	"step": 1760
	},
	{
	"epoch": 4.458808618504436,
	"grad_norm": 0.7703438401222229,
	"learning_rate": 2.1374045801526718e-05,
	"loss": 0.1653,
	"step": 1761
	},
	{
	"epoch": 4.4613434727503165,
	"grad_norm": 1.0013222694396973,
	"learning_rate": 2.127226463104326e-05,
	"loss": 0.2064,
	"step": 1762
	},
	{
	"epoch": 4.4638783269961975,
	"grad_norm": 0.7007017135620117,
	"learning_rate": 2.11704834605598e-05,
	"loss": 0.1401,
	"step": 1763
	},
	{
	"epoch": 4.466413181242078,
	"grad_norm": 0.5366234183311462,
	"learning_rate": 2.1068702290076335e-05,
	"loss": 0.1389,
	"step": 1764
	},
	{
	"epoch": 4.468948035487959,
	"grad_norm": 0.7167120575904846,
	"learning_rate": 2.0966921119592875e-05,
	"loss": 0.1817,
	"step": 1765
	},
	{
	"epoch": 4.47148288973384,
	"grad_norm": 0.7901313900947571,
	"learning_rate": 2.0865139949109417e-05,
	"loss": 0.1817,
	"step": 1766
	},
	{
	"epoch": 4.474017743979721,
	"grad_norm": 0.6681633591651917,
	"learning_rate": 2.0763358778625953e-05,
	"loss": 0.1458,
	"step": 1767
	},
	{
	"epoch": 4.476552598225602,
	"grad_norm": 0.5067597031593323,
	"learning_rate": 2.0661577608142493e-05,
	"loss": 0.1301,
	"step": 1768
	},
	{
	"epoch": 4.479087452471483,
	"grad_norm": 0.6582893133163452,
	"learning_rate": 2.0559796437659035e-05,
	"loss": 0.1576,
	"step": 1769
	},
	{
	"epoch": 4.481622306717364,
	"grad_norm": 0.6628451943397522,
	"learning_rate": 2.0458015267175575e-05,
	"loss": 0.168,
	"step": 1770
	},
	{
	"epoch": 4.484157160963244,
	"grad_norm": 0.5435721278190613,
	"learning_rate": 2.035623409669211e-05,
	"loss": 0.1476,
	"step": 1771
	},
	{
	"epoch": 4.486692015209125,
	"grad_norm": 0.6182110905647278,
	"learning_rate": 2.0254452926208653e-05,
	"loss": 0.1441,
	"step": 1772
	},
	{
	"epoch": 4.489226869455006,
	"grad_norm": 0.9246516823768616,
	"learning_rate": 2.0152671755725193e-05,
	"loss": 0.1747,
	"step": 1773
	},
	{
	"epoch": 4.491761723700887,
	"grad_norm": 0.5967719554901123,
	"learning_rate": 2.005089058524173e-05,
	"loss": 0.1461,
	"step": 1774
	},
	{
	"epoch": 4.494296577946768,
	"grad_norm": 0.5998682379722595,
	"learning_rate": 1.994910941475827e-05,
	"loss": 0.1276,
	"step": 1775
	},
	{
	"epoch": 4.496831432192649,
	"grad_norm": 0.6168457865715027,
	"learning_rate": 1.984732824427481e-05,
	"loss": 0.1407,
	"step": 1776
	},
	{
	"epoch": 4.49936628643853,
	"grad_norm": 0.6580602526664734,
	"learning_rate": 1.974554707379135e-05,
	"loss": 0.149,
	"step": 1777
	},
	{
	"epoch": 4.501901140684411,
	"grad_norm": 0.5117031335830688,
	"learning_rate": 1.964376590330789e-05,
	"loss": 0.1397,
	"step": 1778
	},
	{
	"epoch": 4.504435994930292,
	"grad_norm": 0.4603317975997925,
	"learning_rate": 1.954198473282443e-05,
	"loss": 0.1211,
	"step": 1779
	},
	{
	"epoch": 4.506970849176172,
	"grad_norm": 0.5981631278991699,
	"learning_rate": 1.9440203562340968e-05,
	"loss": 0.1371,
	"step": 1780
	},
	{
	"epoch": 4.509505703422053,
	"grad_norm": 0.6693590879440308,
	"learning_rate": 1.9338422391857507e-05,
	"loss": 0.1495,
	"step": 1781
	},
	{
	"epoch": 4.512040557667934,
	"grad_norm": 0.5286784172058105,
	"learning_rate": 1.9236641221374046e-05,
	"loss": 0.1304,
	"step": 1782
	},
	{
	"epoch": 4.514575411913815,
	"grad_norm": 0.7040352821350098,
	"learning_rate": 1.9134860050890586e-05,
	"loss": 0.1584,
	"step": 1783
	},
	{
	"epoch": 4.517110266159696,
	"grad_norm": 0.6396339535713196,
	"learning_rate": 1.9033078880407125e-05,
	"loss": 0.1529,
	"step": 1784
	},
	{
	"epoch": 4.519645120405577,
	"grad_norm": 0.6708245873451233,
	"learning_rate": 1.8931297709923664e-05,
	"loss": 0.1477,
	"step": 1785
	},
	{
	"epoch": 4.522179974651458,
	"grad_norm": 0.6562108993530273,
	"learning_rate": 1.8829516539440204e-05,
	"loss": 0.1499,
	"step": 1786
	},
	{
	"epoch": 4.524714828897339,
	"grad_norm": 0.5181876420974731,
	"learning_rate": 1.8727735368956746e-05,
	"loss": 0.1398,
	"step": 1787
	},
	{
	"epoch": 4.52724968314322,
	"grad_norm": 0.5952017307281494,
	"learning_rate": 1.8625954198473282e-05,
	"loss": 0.1438,
	"step": 1788
	},
	{
	"epoch": 4.5297845373891,
	"grad_norm": 0.6668636202812195,
	"learning_rate": 1.852417302798982e-05,
	"loss": 0.1805,
	"step": 1789
	},
	{
	"epoch": 4.532319391634981,
	"grad_norm": 0.5433321595191956,
	"learning_rate": 1.8422391857506364e-05,
	"loss": 0.1397,
	"step": 1790
	},
	{
	"epoch": 4.534854245880862,
	"grad_norm": 0.5353025197982788,
	"learning_rate": 1.83206106870229e-05,
	"loss": 0.1419,
	"step": 1791
	},
	{
	"epoch": 4.537389100126743,
	"grad_norm": 0.6123271584510803,
	"learning_rate": 1.821882951653944e-05,
	"loss": 0.1493,
	"step": 1792
	},
	{
	"epoch": 4.5399239543726235,
	"grad_norm": 0.6581493616104126,
	"learning_rate": 1.8117048346055982e-05,
	"loss": 0.1467,
	"step": 1793
	},
	{
	"epoch": 4.5424588086185045,
	"grad_norm": 0.5537798404693604,
	"learning_rate": 1.801526717557252e-05,
	"loss": 0.1467,
	"step": 1794
	},
	{
	"epoch": 4.544993662864385,
	"grad_norm": 0.7163582444190979,
	"learning_rate": 1.7913486005089058e-05,
	"loss": 0.1736,
	"step": 1795
	},
	{
	"epoch": 4.547528517110266,
	"grad_norm": 0.694922149181366,
	"learning_rate": 1.78117048346056e-05,
	"loss": 0.1516,
	"step": 1796
	},
	{
	"epoch": 4.550063371356147,
	"grad_norm": 0.7119778394699097,
	"learning_rate": 1.770992366412214e-05,
	"loss": 0.1899,
	"step": 1797
	},
	{
	"epoch": 4.552598225602027,
	"grad_norm": 0.7570186853408813,
	"learning_rate": 1.7608142493638675e-05,
	"loss": 0.1951,
	"step": 1798
	},
	{
	"epoch": 4.555133079847908,
	"grad_norm": 0.6789132356643677,
	"learning_rate": 1.7506361323155218e-05,
	"loss": 0.1475,
	"step": 1799
	},
	{
	"epoch": 4.557667934093789,
	"grad_norm": 0.5750378966331482,
	"learning_rate": 1.7404580152671757e-05,
	"loss": 0.1431,
	"step": 1800
	},
	{
	"epoch": 4.56020278833967,
	"grad_norm": 0.6066502332687378,
	"learning_rate": 1.7302798982188297e-05,
	"loss": 0.16,
	"step": 1801
	},
	{
	"epoch": 4.562737642585551,
	"grad_norm": 0.5730226039886475,
	"learning_rate": 1.7201017811704836e-05,
	"loss": 0.1455,
	"step": 1802
	},
	{
	"epoch": 4.565272496831432,
	"grad_norm": 0.5752687454223633,
	"learning_rate": 1.7099236641221375e-05,
	"loss": 0.1281,
	"step": 1803
	},
	{
	"epoch": 4.567807351077313,
	"grad_norm": 0.5497205853462219,
	"learning_rate": 1.6997455470737915e-05,
	"loss": 0.1431,
	"step": 1804
	},
	{
	"epoch": 4.570342205323194,
	"grad_norm": 0.7738269567489624,
	"learning_rate": 1.6895674300254454e-05,
	"loss": 0.1523,
	"step": 1805
	},
	{
	"epoch": 4.572877059569075,
	"grad_norm": 0.5750918388366699,
	"learning_rate": 1.6793893129770993e-05,
	"loss": 0.1466,
	"step": 1806
	},
	{
	"epoch": 4.575411913814955,
	"grad_norm": 0.5575040578842163,
	"learning_rate": 1.6692111959287533e-05,
	"loss": 0.1267,
	"step": 1807
	},
	{
	"epoch": 4.577946768060836,
	"grad_norm": 0.509616494178772,
	"learning_rate": 1.6590330788804072e-05,
	"loss": 0.1434,
	"step": 1808
	},
	{
	"epoch": 4.580481622306717,
	"grad_norm": 0.643009603023529,
	"learning_rate": 1.648854961832061e-05,
	"loss": 0.136,
	"step": 1809
	},
	{
	"epoch": 4.583016476552598,
	"grad_norm": 0.5133553743362427,
	"learning_rate": 1.638676844783715e-05,
	"loss": 0.1223,
	"step": 1810
	},
	{
	"epoch": 4.585551330798479,
	"grad_norm": 0.7505659461021423,
	"learning_rate": 1.628498727735369e-05,
	"loss": 0.1607,
	"step": 1811
	},
	{
	"epoch": 4.58808618504436,
	"grad_norm": 0.6981300711631775,
	"learning_rate": 1.618320610687023e-05,
	"loss": 0.1525,
	"step": 1812
	},
	{
	"epoch": 4.590621039290241,
	"grad_norm": 0.4981435537338257,
	"learning_rate": 1.608142493638677e-05,
	"loss": 0.1236,
	"step": 1813
	},
	{
	"epoch": 4.593155893536122,
	"grad_norm": 0.6467440724372864,
	"learning_rate": 1.597964376590331e-05,
	"loss": 0.153,
	"step": 1814
	},
	{
	"epoch": 4.595690747782003,
	"grad_norm": 0.6843181848526001,
	"learning_rate": 1.5877862595419847e-05,
	"loss": 0.1604,
	"step": 1815
	},
	{
	"epoch": 4.598225602027884,
	"grad_norm": 0.49898776412010193,
	"learning_rate": 1.5776081424936386e-05,
	"loss": 0.1165,
	"step": 1816
	},
	{
	"epoch": 4.600760456273765,
	"grad_norm": 0.6252351403236389,
	"learning_rate": 1.567430025445293e-05,
	"loss": 0.1228,
	"step": 1817
	},
	{
	"epoch": 4.603295310519645,
	"grad_norm": 0.5452350974082947,
	"learning_rate": 1.557251908396947e-05,
	"loss": 0.1245,
	"step": 1818
	},
	{
	"epoch": 4.605830164765526,
	"grad_norm": 0.6847854852676392,
	"learning_rate": 1.5470737913486004e-05,
	"loss": 0.1462,
	"step": 1819
	},
	{
	"epoch": 4.608365019011407,
	"grad_norm": 0.49941131472587585,
	"learning_rate": 1.5368956743002547e-05,
	"loss": 0.1268,
	"step": 1820
	},
	{
	"epoch": 4.610899873257288,
	"grad_norm": 0.581243097782135,
	"learning_rate": 1.5267175572519086e-05,
	"loss": 0.1296,
	"step": 1821
	},
	{
	"epoch": 4.613434727503169,
	"grad_norm": 0.8345553874969482,
	"learning_rate": 1.5165394402035624e-05,
	"loss": 0.1307,
	"step": 1822
	},
	{
	"epoch": 4.61596958174905,
	"grad_norm": 0.6534408926963806,
	"learning_rate": 1.5063613231552162e-05,
	"loss": 0.1446,
	"step": 1823
	},
	{
	"epoch": 4.6185044359949305,
	"grad_norm": 0.7743064165115356,
	"learning_rate": 1.4961832061068704e-05,
	"loss": 0.2027,
	"step": 1824
	},
	{
	"epoch": 4.6210392902408115,
	"grad_norm": 0.6709569096565247,
	"learning_rate": 1.4860050890585242e-05,
	"loss": 0.1427,
	"step": 1825
	},
	{
	"epoch": 4.6235741444866925,
	"grad_norm": 0.6598264575004578,
	"learning_rate": 1.4758269720101781e-05,
	"loss": 0.1399,
	"step": 1826
	},
	{
	"epoch": 4.6261089987325725,
	"grad_norm": 0.49041053652763367,
	"learning_rate": 1.4656488549618322e-05,
	"loss": 0.133,
	"step": 1827
	},
	{
	"epoch": 4.6286438529784535,
	"grad_norm": 0.6697686910629272,
	"learning_rate": 1.455470737913486e-05,
	"loss": 0.1735,
	"step": 1828
	},
	{
	"epoch": 4.6311787072243344,
	"grad_norm": 0.5481597781181335,
	"learning_rate": 1.44529262086514e-05,
	"loss": 0.1244,
	"step": 1829
	},
	{
	"epoch": 4.633713561470215,
	"grad_norm": 0.6251161694526672,
	"learning_rate": 1.435114503816794e-05,
	"loss": 0.1436,
	"step": 1830
	},
	{
	"epoch": 4.636248415716096,
	"grad_norm": 0.7515272498130798,
	"learning_rate": 1.424936386768448e-05,
	"loss": 0.1493,
	"step": 1831
	},
	{
	"epoch": 4.638783269961977,
	"grad_norm": 0.8478451371192932,
	"learning_rate": 1.4147582697201017e-05,
	"loss": 0.1519,
	"step": 1832
	},
	{
	"epoch": 4.641318124207858,
	"grad_norm": 0.5417062640190125,
	"learning_rate": 1.4045801526717558e-05,
	"loss": 0.1318,
	"step": 1833
	},
	{
	"epoch": 4.643852978453739,
	"grad_norm": 0.6493893265724182,
	"learning_rate": 1.3944020356234097e-05,
	"loss": 0.1546,
	"step": 1834
	},
	{
	"epoch": 4.64638783269962,
	"grad_norm": 0.8475616574287415,
	"learning_rate": 1.3842239185750635e-05,
	"loss": 0.172,
	"step": 1835
	},
	{
	"epoch": 4.6489226869455,
	"grad_norm": 0.5484082698822021,
	"learning_rate": 1.3740458015267178e-05,
	"loss": 0.1203,
	"step": 1836
	},
	{
	"epoch": 4.651457541191381,
	"grad_norm": 0.6533843874931335,
	"learning_rate": 1.3638676844783715e-05,
	"loss": 0.1501,
	"step": 1837
	},
	{
	"epoch": 4.653992395437262,
	"grad_norm": 0.7521854043006897,
	"learning_rate": 1.3536895674300255e-05,
	"loss": 0.1955,
	"step": 1838
	},
	{
	"epoch": 4.656527249683143,
	"grad_norm": 0.6500900983810425,
	"learning_rate": 1.3435114503816796e-05,
	"loss": 0.14,
	"step": 1839
	},
	{
	"epoch": 4.659062103929024,
	"grad_norm": 0.7133599519729614,
	"learning_rate": 1.3333333333333333e-05,
	"loss": 0.1707,
	"step": 1840
	},
	{
	"epoch": 4.661596958174905,
	"grad_norm": 0.7065775394439697,
	"learning_rate": 1.3231552162849873e-05,
	"loss": 0.144,
	"step": 1841
	},
	{
	"epoch": 4.664131812420786,
	"grad_norm": 0.7716514468193054,
	"learning_rate": 1.3129770992366414e-05,
	"loss": 0.1792,
	"step": 1842
	},
	{
	"epoch": 4.666666666666667,
	"grad_norm": 0.9312828779220581,
	"learning_rate": 1.3027989821882953e-05,
	"loss": 0.2139,
	"step": 1843
	},
	{
	"epoch": 4.669201520912548,
	"grad_norm": 0.5163487792015076,
	"learning_rate": 1.292620865139949e-05,
	"loss": 0.139,
	"step": 1844
	},
	{
	"epoch": 4.671736375158428,
	"grad_norm": 0.7424818277359009,
	"learning_rate": 1.2824427480916032e-05,
	"loss": 0.1533,
	"step": 1845
	},
	{
	"epoch": 4.674271229404309,
	"grad_norm": 0.5935065150260925,
	"learning_rate": 1.2722646310432571e-05,
	"loss": 0.1319,
	"step": 1846
	},
	{
	"epoch": 4.67680608365019,
	"grad_norm": 0.7372322678565979,
	"learning_rate": 1.2620865139949108e-05,
	"loss": 0.1832,
	"step": 1847
	},
	{
	"epoch": 4.679340937896071,
	"grad_norm": 0.5936238765716553,
	"learning_rate": 1.251908396946565e-05,
	"loss": 0.1357,
	"step": 1848
	},
	{
	"epoch": 4.681875792141952,
	"grad_norm": 0.6689032316207886,
	"learning_rate": 1.2417302798982189e-05,
	"loss": 0.1709,
	"step": 1849
	},
	{
	"epoch": 4.684410646387833,
	"grad_norm": 0.6519850492477417,
	"learning_rate": 1.2315521628498728e-05,
	"loss": 0.1438,
	"step": 1850
	},
	{
	"epoch": 4.686945500633714,
	"grad_norm": 0.5853939056396484,
	"learning_rate": 1.2213740458015267e-05,
	"loss": 0.134,
	"step": 1851
	},
	{
	"epoch": 4.689480354879595,
	"grad_norm": 0.5059859752655029,
	"learning_rate": 1.2111959287531807e-05,
	"loss": 0.1088,
	"step": 1852
	},
	{
	"epoch": 4.692015209125476,
	"grad_norm": 0.6989784240722656,
	"learning_rate": 1.2010178117048348e-05,
	"loss": 0.1527,
	"step": 1853
	},
	{
	"epoch": 4.694550063371356,
	"grad_norm": 0.5851006507873535,
	"learning_rate": 1.1908396946564885e-05,
	"loss": 0.143,
	"step": 1854
	},
	{
	"epoch": 4.697084917617237,
	"grad_norm": 0.5606602430343628,
	"learning_rate": 1.1806615776081426e-05,
	"loss": 0.1288,
	"step": 1855
	},
	{
	"epoch": 4.699619771863118,
	"grad_norm": 0.6175526976585388,
	"learning_rate": 1.1704834605597966e-05,
	"loss": 0.1564,
	"step": 1856
	},
	{
	"epoch": 4.702154626108999,
	"grad_norm": 0.5776654481887817,
	"learning_rate": 1.1603053435114503e-05,
	"loss": 0.1323,
	"step": 1857
	},
	{
	"epoch": 4.7046894803548795,
	"grad_norm": 0.5664159059524536,
	"learning_rate": 1.1501272264631044e-05,
	"loss": 0.1371,
	"step": 1858
	},
	{
	"epoch": 4.7072243346007605,
	"grad_norm": 0.7187889218330383,
	"learning_rate": 1.1399491094147584e-05,
	"loss": 0.1476,
	"step": 1859
	},
	{
	"epoch": 4.7097591888466415,
	"grad_norm": 0.5795005559921265,
	"learning_rate": 1.1297709923664123e-05,
	"loss": 0.1373,
	"step": 1860
	},
	{
	"epoch": 4.712294043092522,
	"grad_norm": 0.5491251945495605,
	"learning_rate": 1.1195928753180662e-05,
	"loss": 0.1192,
	"step": 1861
	},
	{
	"epoch": 4.714828897338403,
	"grad_norm": 0.4715762734413147,
	"learning_rate": 1.1094147582697202e-05,
	"loss": 0.1106,
	"step": 1862
	},
	{
	"epoch": 4.7173637515842834,
	"grad_norm": 0.6300286054611206,
	"learning_rate": 1.099236641221374e-05,
	"loss": 0.138,
	"step": 1863
	},
	{
	"epoch": 4.719898605830164,
	"grad_norm": 0.7265313267707825,
	"learning_rate": 1.089058524173028e-05,
	"loss": 0.2246,
	"step": 1864
	},
	{
	"epoch": 4.722433460076045,
	"grad_norm": 0.7080928087234497,
	"learning_rate": 1.0788804071246821e-05,
	"loss": 0.1335,
	"step": 1865
	},
	{
	"epoch": 4.724968314321926,
	"grad_norm": 0.605714738368988,
	"learning_rate": 1.0687022900763359e-05,
	"loss": 0.1412,
	"step": 1866
	},
	{
	"epoch": 4.727503168567807,
	"grad_norm": 0.6648192405700684,
	"learning_rate": 1.05852417302799e-05,
	"loss": 0.1648,
	"step": 1867
	},
	{
	"epoch": 4.730038022813688,
	"grad_norm": 0.6057281494140625,
	"learning_rate": 1.0483460559796437e-05,
	"loss": 0.1266,
	"step": 1868
	},
	{
	"epoch": 4.732572877059569,
	"grad_norm": 0.6135514974594116,
	"learning_rate": 1.0381679389312977e-05,
	"loss": 0.1457,
	"step": 1869
	},
	{
	"epoch": 4.73510773130545,
	"grad_norm": 0.6599459052085876,
	"learning_rate": 1.0279898218829518e-05,
	"loss": 0.1558,
	"step": 1870
	},
	{
	"epoch": 4.737642585551331,
	"grad_norm": 0.5975873470306396,
	"learning_rate": 1.0178117048346055e-05,
	"loss": 0.134,
	"step": 1871
	},
	{
	"epoch": 4.740177439797211,
	"grad_norm": 0.6581792235374451,
	"learning_rate": 1.0076335877862596e-05,
	"loss": 0.1463,
	"step": 1872
	},
	{
	"epoch": 4.742712294043092,
	"grad_norm": 0.5627064108848572,
	"learning_rate": 9.974554707379136e-06,
	"loss": 0.1238,
	"step": 1873
	},
	{
	"epoch": 4.745247148288973,
	"grad_norm": 0.6461361050605774,
	"learning_rate": 9.872773536895675e-06,
	"loss": 0.1621,
	"step": 1874
	},
	{
	"epoch": 4.747782002534854,
	"grad_norm": 0.5615333914756775,
	"learning_rate": 9.770992366412214e-06,
	"loss": 0.1387,
	"step": 1875
	},
	{
	"epoch": 4.750316856780735,
	"grad_norm": 0.6830117702484131,
	"learning_rate": 9.669211195928754e-06,
	"loss": 0.1397,
	"step": 1876
	},
	{
	"epoch": 4.752851711026616,
	"grad_norm": 0.731072187423706,
	"learning_rate": 9.567430025445293e-06,
	"loss": 0.1508,
	"step": 1877
	},
	{
	"epoch": 4.755386565272497,
	"grad_norm": 0.7469286918640137,
	"learning_rate": 9.465648854961832e-06,
	"loss": 0.1944,
	"step": 1878
	},
	{
	"epoch": 4.757921419518378,
	"grad_norm": 0.700532078742981,
	"learning_rate": 9.363867684478373e-06,
	"loss": 0.1697,
	"step": 1879
	},
	{
	"epoch": 4.760456273764259,
	"grad_norm": 0.7140323519706726,
	"learning_rate": 9.26208651399491e-06,
	"loss": 0.1597,
	"step": 1880
	},
	{
	"epoch": 4.76299112801014,
	"grad_norm": 0.6711133718490601,
	"learning_rate": 9.16030534351145e-06,
	"loss": 0.1731,
	"step": 1881
	},
	{
	"epoch": 4.765525982256021,
	"grad_norm": 0.43002957105636597,
	"learning_rate": 9.058524173027991e-06,
	"loss": 0.1181,
	"step": 1882
	},
	{
	"epoch": 4.768060836501901,
	"grad_norm": 0.669159471988678,
	"learning_rate": 8.956743002544529e-06,
	"loss": 0.1578,
	"step": 1883
	},
	{
	"epoch": 4.770595690747782,
	"grad_norm": 0.5030307769775391,
	"learning_rate": 8.85496183206107e-06,
	"loss": 0.1213,
	"step": 1884
	},
	{
	"epoch": 4.773130544993663,
	"grad_norm": 0.7841615080833435,
	"learning_rate": 8.753180661577609e-06,
	"loss": 0.1619,
	"step": 1885
	},
	{
	"epoch": 4.775665399239544,
	"grad_norm": 0.5570418834686279,
	"learning_rate": 8.651399491094148e-06,
	"loss": 0.1308,
	"step": 1886
	},
	{
	"epoch": 4.778200253485425,
	"grad_norm": 0.6690031886100769,
	"learning_rate": 8.549618320610688e-06,
	"loss": 0.1413,
	"step": 1887
	},
	{
	"epoch": 4.780735107731306,
	"grad_norm": 0.524140477180481,
	"learning_rate": 8.447837150127227e-06,
	"loss": 0.1354,
	"step": 1888
	},
	{
	"epoch": 4.783269961977187,
	"grad_norm": 0.5612379908561707,
	"learning_rate": 8.346055979643766e-06,
	"loss": 0.1375,
	"step": 1889
	},
	{
	"epoch": 4.7858048162230675,
	"grad_norm": 0.851925790309906,
	"learning_rate": 8.244274809160306e-06,
	"loss": 0.1783,
	"step": 1890
	},
	{
	"epoch": 4.7883396704689485,
	"grad_norm": 0.8507834672927856,
	"learning_rate": 8.142493638676845e-06,
	"loss": 0.1743,
	"step": 1891
	},
	{
	"epoch": 4.7908745247148286,
	"grad_norm": 0.8136033415794373,
	"learning_rate": 8.040712468193384e-06,
	"loss": 0.1381,
	"step": 1892
	},
	{
	"epoch": 4.7934093789607095,
	"grad_norm": 0.7247329354286194,
	"learning_rate": 7.938931297709924e-06,
	"loss": 0.1793,
	"step": 1893
	},
	{
	"epoch": 4.7959442332065905,
	"grad_norm": 0.5494823455810547,
	"learning_rate": 7.837150127226465e-06,
	"loss": 0.1231,
	"step": 1894
	},
	{
	"epoch": 4.798479087452471,
	"grad_norm": 0.6107218861579895,
	"learning_rate": 7.735368956743002e-06,
	"loss": 0.1358,
	"step": 1895
	},
	{
	"epoch": 4.801013941698352,
	"grad_norm": 0.6297575235366821,
	"learning_rate": 7.633587786259543e-06,
	"loss": 0.1699,
	"step": 1896
	},
	{
	"epoch": 4.803548795944233,
	"grad_norm": 0.8669266700744629,
	"learning_rate": 7.531806615776081e-06,
	"loss": 0.1982,
	"step": 1897
	},
	{
	"epoch": 4.806083650190114,
	"grad_norm": 0.583975076675415,
	"learning_rate": 7.430025445292621e-06,
	"loss": 0.1517,
	"step": 1898
	},
	{
	"epoch": 4.808618504435995,
	"grad_norm": 0.6059403419494629,
	"learning_rate": 7.328244274809161e-06,
	"loss": 0.138,
	"step": 1899
	},
	{
	"epoch": 4.811153358681876,
	"grad_norm": 1.0802148580551147,
	"learning_rate": 7.2264631043257e-06,
	"loss": 0.1677,
	"step": 1900
	},
	{
	"epoch": 4.813688212927756,
	"grad_norm": 0.5637528300285339,
	"learning_rate": 7.12468193384224e-06,
	"loss": 0.1517,
	"step": 1901
	},
	{
	"epoch": 4.816223067173637,
	"grad_norm": 0.6925719976425171,
	"learning_rate": 7.022900763358779e-06,
	"loss": 0.1636,
	"step": 1902
	},
	{
	"epoch": 4.818757921419518,
	"grad_norm": 0.6529707908630371,
	"learning_rate": 6.9211195928753175e-06,
	"loss": 0.1587,
	"step": 1903
	},
	{
	"epoch": 4.821292775665399,
	"grad_norm": 1.1477290391921997,
	"learning_rate": 6.819338422391858e-06,
	"loss": 0.1655,
	"step": 1904
	},
	{
	"epoch": 4.82382762991128,
	"grad_norm": 0.7867985367774963,
	"learning_rate": 6.717557251908398e-06,
	"loss": 0.1955,
	"step": 1905
	},
	{
	"epoch": 4.826362484157161,
	"grad_norm": 0.617871105670929,
	"learning_rate": 6.615776081424936e-06,
	"loss": 0.1554,
	"step": 1906
	},
	{
	"epoch": 4.828897338403042,
	"grad_norm": 0.5985192656517029,
	"learning_rate": 6.5139949109414765e-06,
	"loss": 0.1484,
	"step": 1907
	},
	{
	"epoch": 4.831432192648923,
	"grad_norm": 0.6069400310516357,
	"learning_rate": 6.412213740458016e-06,
	"loss": 0.1326,
	"step": 1908
	},
	{
	"epoch": 4.833967046894804,
	"grad_norm": 0.9009010195732117,
	"learning_rate": 6.310432569974554e-06,
	"loss": 0.1999,
	"step": 1909
	},
	{
	"epoch": 4.836501901140684,
	"grad_norm": 0.5913792848587036,
	"learning_rate": 6.208651399491094e-06,
	"loss": 0.1381,
	"step": 1910
	},
	{
	"epoch": 4.839036755386565,
	"grad_norm": 0.5730859637260437,
	"learning_rate": 6.106870229007634e-06,
	"loss": 0.1346,
	"step": 1911
	},
	{
	"epoch": 4.841571609632446,
	"grad_norm": 0.6579172611236572,
	"learning_rate": 6.005089058524174e-06,
	"loss": 0.1572,
	"step": 1912
	},
	{
	"epoch": 4.844106463878327,
	"grad_norm": 0.5854265093803406,
	"learning_rate": 5.903307888040713e-06,
	"loss": 0.1359,
	"step": 1913
	},
	{
	"epoch": 4.846641318124208,
	"grad_norm": 0.7668277025222778,
	"learning_rate": 5.801526717557252e-06,
	"loss": 0.1728,
	"step": 1914
	},
	{
	"epoch": 4.849176172370089,
	"grad_norm": 0.8092861175537109,
	"learning_rate": 5.699745547073792e-06,
	"loss": 0.1741,
	"step": 1915
	},
	{
	"epoch": 4.85171102661597,
	"grad_norm": 0.6868001818656921,
	"learning_rate": 5.597964376590331e-06,
	"loss": 0.1604,
	"step": 1916
	},
	{
	"epoch": 4.854245880861851,
	"grad_norm": 0.6506228446960449,
	"learning_rate": 5.49618320610687e-06,
	"loss": 0.1459,
	"step": 1917
	},
	{
	"epoch": 4.856780735107732,
	"grad_norm": 0.6033440232276917,
	"learning_rate": 5.394402035623411e-06,
	"loss": 0.1435,
	"step": 1918
	},
	{
	"epoch": 4.859315589353612,
	"grad_norm": 0.7446348071098328,
	"learning_rate": 5.29262086513995e-06,
	"loss": 0.165,
	"step": 1919
	},
	{
	"epoch": 4.861850443599493,
	"grad_norm": 0.5380656123161316,
	"learning_rate": 5.190839694656488e-06,
	"loss": 0.1504,
	"step": 1920
	},
	{
	"epoch": 4.864385297845374,
	"grad_norm": 0.6752755641937256,
	"learning_rate": 5.089058524173028e-06,
	"loss": 0.1616,
	"step": 1921
	},
	{
	"epoch": 4.866920152091255,
	"grad_norm": 0.6897322535514832,
	"learning_rate": 4.987277353689568e-06,
	"loss": 0.1409,
	"step": 1922
	},
	{
	"epoch": 4.869455006337136,
	"grad_norm": 0.5405673980712891,
	"learning_rate": 4.885496183206107e-06,
	"loss": 0.1215,
	"step": 1923
	},
	{
	"epoch": 4.8719898605830165,
	"grad_norm": 0.6921371221542358,
	"learning_rate": 4.7837150127226464e-06,
	"loss": 0.1554,
	"step": 1924
	},
	{
	"epoch": 4.8745247148288975,
	"grad_norm": 0.6672477722167969,
	"learning_rate": 4.681933842239187e-06,
	"loss": 0.1685,
	"step": 1925
	},
	{
	"epoch": 4.8770595690747784,
	"grad_norm": 0.5887411236763,
	"learning_rate": 4.580152671755725e-06,
	"loss": 0.1495,
	"step": 1926
	},
	{
	"epoch": 4.879594423320659,
	"grad_norm": 0.8119281530380249,
	"learning_rate": 4.478371501272264e-06,
	"loss": 0.1778,
	"step": 1927
	},
	{
	"epoch": 4.8821292775665395,
	"grad_norm": 0.6423155665397644,
	"learning_rate": 4.3765903307888045e-06,
	"loss": 0.1532,
	"step": 1928
	},
	{
	"epoch": 4.88466413181242,
	"grad_norm": 0.576859712600708,
	"learning_rate": 4.274809160305344e-06,
	"loss": 0.1474,
	"step": 1929
	},
	{
	"epoch": 4.887198986058301,
	"grad_norm": 0.668792188167572,
	"learning_rate": 4.173027989821883e-06,
	"loss": 0.1583,
	"step": 1930
	},
	{
	"epoch": 4.889733840304182,
	"grad_norm": 0.727428138256073,
	"learning_rate": 4.0712468193384225e-06,
	"loss": 0.1759,
	"step": 1931
	},
	{
	"epoch": 4.892268694550063,
	"grad_norm": 0.7260742783546448,
	"learning_rate": 3.969465648854962e-06,
	"loss": 0.1665,
	"step": 1932
	},
	{
	"epoch": 4.894803548795944,
	"grad_norm": 0.6192269921302795,
	"learning_rate": 3.867684478371501e-06,
	"loss": 0.1377,
	"step": 1933
	},
	{
	"epoch": 4.897338403041825,
	"grad_norm": 0.7672135233879089,
	"learning_rate": 3.7659033078880404e-06,
	"loss": 0.1696,
	"step": 1934
	},
	{
	"epoch": 4.899873257287706,
	"grad_norm": 0.5162369012832642,
	"learning_rate": 3.6641221374045806e-06,
	"loss": 0.1384,
	"step": 1935
	},
	{
	"epoch": 4.902408111533587,
	"grad_norm": 0.6594913601875305,
	"learning_rate": 3.56234096692112e-06,
	"loss": 0.1714,
	"step": 1936
	},
	{
	"epoch": 4.904942965779467,
	"grad_norm": 0.7748851776123047,
	"learning_rate": 3.4605597964376588e-06,
	"loss": 0.2014,
	"step": 1937
	},
	{
	"epoch": 4.907477820025348,
	"grad_norm": 0.6400601267814636,
	"learning_rate": 3.358778625954199e-06,
	"loss": 0.1522,
	"step": 1938
	},
	{
	"epoch": 4.910012674271229,
	"grad_norm": 0.5443174839019775,
	"learning_rate": 3.2569974554707382e-06,
	"loss": 0.1276,
	"step": 1939
	},
	{
	"epoch": 4.91254752851711,
	"grad_norm": 0.6544225811958313,
	"learning_rate": 3.155216284987277e-06,
	"loss": 0.1441,
	"step": 1940
	},
	{
	"epoch": 4.915082382762991,
	"grad_norm": 0.6579450368881226,
	"learning_rate": 3.053435114503817e-06,
	"loss": 0.1688,
	"step": 1941
	},
	{
	"epoch": 4.917617237008872,
	"grad_norm": 0.594393253326416,
	"learning_rate": 2.9516539440203566e-06,
	"loss": 0.1586,
	"step": 1942
	},
	{
	"epoch": 4.920152091254753,
	"grad_norm": 0.6417977213859558,
	"learning_rate": 2.849872773536896e-06,
	"loss": 0.1389,
	"step": 1943
	},
	{
	"epoch": 4.922686945500634,
	"grad_norm": 0.5247513055801392,
	"learning_rate": 2.748091603053435e-06,
	"loss": 0.1282,
	"step": 1944
	},
	{
	"epoch": 4.925221799746515,
	"grad_norm": 0.6372106075286865,
	"learning_rate": 2.646310432569975e-06,
	"loss": 0.1391,
	"step": 1945
	},
	{
	"epoch": 4.927756653992396,
	"grad_norm": 0.5967155694961548,
	"learning_rate": 2.544529262086514e-06,
	"loss": 0.1358,
	"step": 1946
	},
	{
	"epoch": 4.930291508238277,
	"grad_norm": 0.6050627827644348,
	"learning_rate": 2.4427480916030536e-06,
	"loss": 0.1449,
	"step": 1947
	},
	{
	"epoch": 4.932826362484157,
	"grad_norm": 0.7595526576042175,
	"learning_rate": 2.3409669211195933e-06,
	"loss": 0.1838,
	"step": 1948
	},
	{
	"epoch": 4.935361216730038,
	"grad_norm": 0.7220463156700134,
	"learning_rate": 2.239185750636132e-06,
	"loss": 0.1695,
	"step": 1949
	},
	{
	"epoch": 4.937896070975919,
	"grad_norm": 0.4891555905342102,
	"learning_rate": 2.137404580152672e-06,
	"loss": 0.1394,
	"step": 1950
	},
	{
	"epoch": 4.9404309252218,
	"grad_norm": 0.5262938141822815,
	"learning_rate": 2.0356234096692112e-06,
	"loss": 0.1452,
	"step": 1951
	},
	{
	"epoch": 4.942965779467681,
	"grad_norm": 0.7193884253501892,
	"learning_rate": 1.9338422391857505e-06,
	"loss": 0.176,
	"step": 1952
	},
	{
	"epoch": 4.945500633713562,
	"grad_norm": 0.7117200493812561,
	"learning_rate": 1.8320610687022903e-06,
	"loss": 0.1697,
	"step": 1953
	},
	{
	"epoch": 4.948035487959443,
	"grad_norm": 0.7884610891342163,
	"learning_rate": 1.7302798982188294e-06,
	"loss": 0.1864,
	"step": 1954
	},
	{
	"epoch": 4.9505703422053235,
	"grad_norm": 0.8606098890304565,
	"learning_rate": 1.6284987277353691e-06,
	"loss": 0.1568,
	"step": 1955
	},
	{
	"epoch": 4.9531051964512045,
	"grad_norm": 0.5030885338783264,
	"learning_rate": 1.5267175572519084e-06,
	"loss": 0.1306,
	"step": 1956
	},
	{
	"epoch": 4.955640050697085,
	"grad_norm": 0.5155559182167053,
	"learning_rate": 1.424936386768448e-06,
	"loss": 0.1311,
	"step": 1957
	},
	{
	"epoch": 4.9581749049429655,
	"grad_norm": 0.4945980906486511,
	"learning_rate": 1.3231552162849875e-06,
	"loss": 0.1212,
	"step": 1958
	},
	{
	"epoch": 4.9607097591888465,
	"grad_norm": 0.79302978515625,
	"learning_rate": 1.2213740458015268e-06,
	"loss": 0.1763,
	"step": 1959
	},
	{
	"epoch": 4.9632446134347274,
	"grad_norm": 0.6397921442985535,
	"learning_rate": 1.119592875318066e-06,
	"loss": 0.1416,
	"step": 1960
	},
	{
	"epoch": 4.965779467680608,
	"grad_norm": 0.6680799722671509,
	"learning_rate": 1.0178117048346056e-06,
	"loss": 0.1519,
	"step": 1961
	},
	{
	"epoch": 4.968314321926489,
	"grad_norm": 0.5919336080551147,
	"learning_rate": 9.160305343511451e-07,
	"loss": 0.16,
	"step": 1962
	},
	{
	"epoch": 4.97084917617237,
	"grad_norm": 0.5929127335548401,
	"learning_rate": 8.142493638676846e-07,
	"loss": 0.143,
	"step": 1963
	},
	{
	"epoch": 4.973384030418251,
	"grad_norm": 0.5678686499595642,
	"learning_rate": 7.12468193384224e-07,
	"loss": 0.1236,
	"step": 1964
	},
	{
	"epoch": 4.975918884664132,
	"grad_norm": 0.5478057861328125,
	"learning_rate": 6.106870229007634e-07,
	"loss": 0.1407,
	"step": 1965
	},
	{
	"epoch": 4.978453738910012,
	"grad_norm": 0.6003939509391785,
	"learning_rate": 5.089058524173028e-07,
	"loss": 0.1315,
	"step": 1966
	},
	{
	"epoch": 4.980988593155893,
	"grad_norm": 0.5943416357040405,
	"learning_rate": 4.071246819338423e-07,
	"loss": 0.1451,
	"step": 1967
	},
	{
	"epoch": 4.983523447401774,
	"grad_norm": 0.5419045090675354,
	"learning_rate": 3.053435114503817e-07,
	"loss": 0.1338,
	"step": 1968
	},
	{
	"epoch": 4.986058301647655,
	"grad_norm": 0.5665134787559509,
	"learning_rate": 2.0356234096692114e-07,
	"loss": 0.1347,
	"step": 1969
	},
	{
	"epoch": 4.988593155893536,
	"grad_norm": 0.5646002292633057,
	"learning_rate": 1.0178117048346057e-07,
	"loss": 0.1352,
	"step": 1970
	}
	],
	"logging_steps": 1,
	"max_steps": 1970,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 5,
	"save_steps": 100,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 1.5558390987853286e+17,
	"train_batch_size": 2,
	"trial_name": null,
	"trial_params": null
	}