Upload folder using huggingface_hub

f1f5944 verified over 1 year ago

24.9 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 0.012,
	"eval_steps": 500,
	"global_step": 150,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 8e-05,
	"grad_norm": 0.18695563077926636,
	"learning_rate": 4e-05,
	"loss": 0.626,
	"step": 1
	},
	{
	"epoch": 0.00016,
	"grad_norm": 0.16322645545005798,
	"learning_rate": 8e-05,
	"loss": 0.6748,
	"step": 2
	},
	{
	"epoch": 0.00024,
	"grad_norm": 0.20551565289497375,
	"learning_rate": 0.00012,
	"loss": 0.8631,
	"step": 3
	},
	{
	"epoch": 0.00032,
	"grad_norm": 0.19189168512821198,
	"learning_rate": 0.00016,
	"loss": 0.7236,
	"step": 4
	},
	{
	"epoch": 0.0004,
	"grad_norm": 0.17240828275680542,
	"learning_rate": 0.0002,
	"loss": 0.6456,
	"step": 5
	},
	{
	"epoch": 0.00048,
	"grad_norm": 0.1921045035123825,
	"learning_rate": 0.00016,
	"loss": 0.7794,
	"step": 6
	},
	{
	"epoch": 0.00056,
	"grad_norm": 0.17362892627716064,
	"learning_rate": 0.00012,
	"loss": 0.5114,
	"step": 7
	},
	{
	"epoch": 0.00064,
	"grad_norm": 0.22235442698001862,
	"learning_rate": 8e-05,
	"loss": 0.8547,
	"step": 8
	},
	{
	"epoch": 0.00072,
	"grad_norm": 0.17304766178131104,
	"learning_rate": 4e-05,
	"loss": 0.7826,
	"step": 9
	},
	{
	"epoch": 0.0008,
	"grad_norm": 0.20972397923469543,
	"learning_rate": 0.0,
	"loss": 0.6725,
	"step": 10
	},
	{
	"epoch": 0.00088,
	"grad_norm": 0.18230140209197998,
	"learning_rate": 0.0,
	"loss": 0.8175,
	"step": 11
	},
	{
	"epoch": 0.00096,
	"grad_norm": 0.2014843076467514,
	"learning_rate": 0.00019988795518207283,
	"loss": 1.0219,
	"step": 12
	},
	{
	"epoch": 0.00104,
	"grad_norm": 0.24639324843883514,
	"learning_rate": 0.0001998719487795118,
	"loss": 0.8582,
	"step": 13
	},
	{
	"epoch": 0.00112,
	"grad_norm": 0.1707516610622406,
	"learning_rate": 0.0001998559423769508,
	"loss": 0.5871,
	"step": 14
	},
	{
	"epoch": 0.0012,
	"grad_norm": 0.17754444479942322,
	"learning_rate": 0.00019983993597438976,
	"loss": 0.8023,
	"step": 15
	},
	{
	"epoch": 0.00128,
	"grad_norm": 0.2088427096605301,
	"learning_rate": 0.00019982392957182873,
	"loss": 0.5597,
	"step": 16
	},
	{
	"epoch": 0.00136,
	"grad_norm": 0.2147207260131836,
	"learning_rate": 0.0001998079231692677,
	"loss": 0.9279,
	"step": 17
	},
	{
	"epoch": 0.00144,
	"grad_norm": 0.2416459172964096,
	"learning_rate": 0.0001997919167667067,
	"loss": 0.7524,
	"step": 18
	},
	{
	"epoch": 0.00152,
	"grad_norm": 0.18660244345664978,
	"learning_rate": 0.00019977591036414566,
	"loss": 0.723,
	"step": 19
	},
	{
	"epoch": 0.0016,
	"grad_norm": 0.24767373502254486,
	"learning_rate": 0.00019975990396158463,
	"loss": 0.8528,
	"step": 20
	},
	{
	"epoch": 0.00168,
	"grad_norm": 0.2119741290807724,
	"learning_rate": 0.00019974389755902363,
	"loss": 0.8555,
	"step": 21
	},
	{
	"epoch": 0.00176,
	"grad_norm": 0.19749240577220917,
	"learning_rate": 0.0001997278911564626,
	"loss": 0.8141,
	"step": 22
	},
	{
	"epoch": 0.00184,
	"grad_norm": 0.15635022521018982,
	"learning_rate": 0.00019971188475390156,
	"loss": 1.0032,
	"step": 23
	},
	{
	"epoch": 0.00192,
	"grad_norm": 0.28329262137413025,
	"learning_rate": 0.00019969587835134053,
	"loss": 0.8104,
	"step": 24
	},
	{
	"epoch": 0.002,
	"grad_norm": 0.2267996221780777,
	"learning_rate": 0.00019967987194877953,
	"loss": 0.5932,
	"step": 25
	},
	{
	"epoch": 0.00208,
	"grad_norm": 0.2392159253358841,
	"learning_rate": 0.0001996638655462185,
	"loss": 0.7813,
	"step": 26
	},
	{
	"epoch": 0.00216,
	"grad_norm": 0.29656457901000977,
	"learning_rate": 0.00019964785914365746,
	"loss": 0.7647,
	"step": 27
	},
	{
	"epoch": 0.00224,
	"grad_norm": 0.276050329208374,
	"learning_rate": 0.00019963185274109646,
	"loss": 0.7368,
	"step": 28
	},
	{
	"epoch": 0.00232,
	"grad_norm": 0.26816362142562866,
	"learning_rate": 0.00019961584633853543,
	"loss": 1.0461,
	"step": 29
	},
	{
	"epoch": 0.0024,
	"grad_norm": 0.2567765414714813,
	"learning_rate": 0.0001995998399359744,
	"loss": 1.0064,
	"step": 30
	},
	{
	"epoch": 0.00248,
	"grad_norm": 0.28481513261795044,
	"learning_rate": 0.00019958383353341336,
	"loss": 0.6283,
	"step": 31
	},
	{
	"epoch": 0.00256,
	"grad_norm": 0.19182950258255005,
	"learning_rate": 0.00019956782713085236,
	"loss": 0.5184,
	"step": 32
	},
	{
	"epoch": 0.00264,
	"grad_norm": 0.2858627438545227,
	"learning_rate": 0.00019955182072829133,
	"loss": 0.5853,
	"step": 33
	},
	{
	"epoch": 0.00272,
	"grad_norm": 0.23260071873664856,
	"learning_rate": 0.0001995358143257303,
	"loss": 0.5486,
	"step": 34
	},
	{
	"epoch": 0.0028,
	"grad_norm": 0.2574014365673065,
	"learning_rate": 0.00019951980792316926,
	"loss": 0.7127,
	"step": 35
	},
	{
	"epoch": 0.00288,
	"grad_norm": 0.27332785725593567,
	"learning_rate": 0.00019950380152060826,
	"loss": 0.9821,
	"step": 36
	},
	{
	"epoch": 0.00296,
	"grad_norm": 0.2918913960456848,
	"learning_rate": 0.00019948779511804723,
	"loss": 0.853,
	"step": 37
	},
	{
	"epoch": 0.00304,
	"grad_norm": 0.22690187394618988,
	"learning_rate": 0.0001994717887154862,
	"loss": 0.6959,
	"step": 38
	},
	{
	"epoch": 0.00312,
	"grad_norm": 0.24837082624435425,
	"learning_rate": 0.00019945578231292518,
	"loss": 0.7622,
	"step": 39
	},
	{
	"epoch": 0.0032,
	"grad_norm": 0.24773573875427246,
	"learning_rate": 0.00019943977591036416,
	"loss": 0.9853,
	"step": 40
	},
	{
	"epoch": 0.00328,
	"grad_norm": 0.2665715515613556,
	"learning_rate": 0.00019942376950780313,
	"loss": 0.7365,
	"step": 41
	},
	{
	"epoch": 0.00336,
	"grad_norm": 0.2815437912940979,
	"learning_rate": 0.0001994077631052421,
	"loss": 0.9859,
	"step": 42
	},
	{
	"epoch": 0.00344,
	"grad_norm": 0.23276300728321075,
	"learning_rate": 0.00019939175670268108,
	"loss": 0.7499,
	"step": 43
	},
	{
	"epoch": 0.00352,
	"grad_norm": 0.2659528851509094,
	"learning_rate": 0.00019937575030012006,
	"loss": 0.7896,
	"step": 44
	},
	{
	"epoch": 0.0036,
	"grad_norm": 0.2777968943119049,
	"learning_rate": 0.00019935974389755903,
	"loss": 0.7405,
	"step": 45
	},
	{
	"epoch": 0.00368,
	"grad_norm": 0.2703694999217987,
	"learning_rate": 0.000199343737494998,
	"loss": 0.8549,
	"step": 46
	},
	{
	"epoch": 0.00376,
	"grad_norm": 0.2913441061973572,
	"learning_rate": 0.00019932773109243698,
	"loss": 0.7648,
	"step": 47
	},
	{
	"epoch": 0.00384,
	"grad_norm": 0.21126149594783783,
	"learning_rate": 0.00019931172468987596,
	"loss": 0.6442,
	"step": 48
	},
	{
	"epoch": 0.00392,
	"grad_norm": 0.35344573855400085,
	"learning_rate": 0.00019929571828731493,
	"loss": 1.0157,
	"step": 49
	},
	{
	"epoch": 0.004,
	"grad_norm": 0.211960569024086,
	"learning_rate": 0.0001992797118847539,
	"loss": 1.0145,
	"step": 50
	},
	{
	"epoch": 0.00408,
	"grad_norm": 0.17948386073112488,
	"learning_rate": 0.00019926370548219288,
	"loss": 0.4476,
	"step": 51
	},
	{
	"epoch": 0.00416,
	"grad_norm": 0.18907713890075684,
	"learning_rate": 0.00019924769907963185,
	"loss": 0.8988,
	"step": 52
	},
	{
	"epoch": 0.00424,
	"grad_norm": 0.9255684614181519,
	"learning_rate": 0.00019923169267707086,
	"loss": 1.3091,
	"step": 53
	},
	{
	"epoch": 0.00432,
	"grad_norm": 0.23128096759319305,
	"learning_rate": 0.0001992156862745098,
	"loss": 0.6738,
	"step": 54
	},
	{
	"epoch": 0.0044,
	"grad_norm": 0.32358431816101074,
	"learning_rate": 0.00019919967987194878,
	"loss": 1.0512,
	"step": 55
	},
	{
	"epoch": 0.00448,
	"grad_norm": 0.21004758775234222,
	"learning_rate": 0.00019918367346938775,
	"loss": 1.1109,
	"step": 56
	},
	{
	"epoch": 0.00456,
	"grad_norm": 0.17308218777179718,
	"learning_rate": 0.00019916766706682676,
	"loss": 0.7186,
	"step": 57
	},
	{
	"epoch": 0.00464,
	"grad_norm": 0.1969563513994217,
	"learning_rate": 0.0001991516606642657,
	"loss": 0.8231,
	"step": 58
	},
	{
	"epoch": 0.00472,
	"grad_norm": 0.20930427312850952,
	"learning_rate": 0.00019913565426170468,
	"loss": 0.7483,
	"step": 59
	},
	{
	"epoch": 0.0048,
	"grad_norm": 0.2239973247051239,
	"learning_rate": 0.00019911964785914368,
	"loss": 0.9065,
	"step": 60
	},
	{
	"epoch": 0.00488,
	"grad_norm": 0.21532970666885376,
	"learning_rate": 0.00019910364145658266,
	"loss": 0.7133,
	"step": 61
	},
	{
	"epoch": 0.00496,
	"grad_norm": 0.22679661214351654,
	"learning_rate": 0.0001990876350540216,
	"loss": 0.8632,
	"step": 62
	},
	{
	"epoch": 0.00504,
	"grad_norm": 0.18961389362812042,
	"learning_rate": 0.00019907162865146058,
	"loss": 0.7713,
	"step": 63
	},
	{
	"epoch": 0.00512,
	"grad_norm": 0.3985270857810974,
	"learning_rate": 0.00019905562224889958,
	"loss": 1.1621,
	"step": 64
	},
	{
	"epoch": 0.0052,
	"grad_norm": 0.1857418268918991,
	"learning_rate": 0.00019903961584633856,
	"loss": 0.7665,
	"step": 65
	},
	{
	"epoch": 0.00528,
	"grad_norm": 0.21082746982574463,
	"learning_rate": 0.0001990236094437775,
	"loss": 0.8936,
	"step": 66
	},
	{
	"epoch": 0.00536,
	"grad_norm": 0.2598806619644165,
	"learning_rate": 0.0001990076030412165,
	"loss": 0.8367,
	"step": 67
	},
	{
	"epoch": 0.00544,
	"grad_norm": 0.21064138412475586,
	"learning_rate": 0.00019899159663865548,
	"loss": 0.7481,
	"step": 68
	},
	{
	"epoch": 0.00552,
	"grad_norm": 0.17963984608650208,
	"learning_rate": 0.00019897559023609445,
	"loss": 0.857,
	"step": 69
	},
	{
	"epoch": 0.0056,
	"grad_norm": 0.2018403857946396,
	"learning_rate": 0.0001989595838335334,
	"loss": 0.6117,
	"step": 70
	},
	{
	"epoch": 0.00568,
	"grad_norm": 0.2090141773223877,
	"learning_rate": 0.0001989435774309724,
	"loss": 0.4831,
	"step": 71
	},
	{
	"epoch": 0.00576,
	"grad_norm": 0.19442321360111237,
	"learning_rate": 0.00019892757102841138,
	"loss": 0.6887,
	"step": 72
	},
	{
	"epoch": 0.00584,
	"grad_norm": 0.20884303748607635,
	"learning_rate": 0.00019891156462585035,
	"loss": 0.8775,
	"step": 73
	},
	{
	"epoch": 0.00592,
	"grad_norm": 0.23718436062335968,
	"learning_rate": 0.00019889555822328933,
	"loss": 0.9292,
	"step": 74
	},
	{
	"epoch": 0.006,
	"grad_norm": 0.2717212438583374,
	"learning_rate": 0.0001988795518207283,
	"loss": 0.66,
	"step": 75
	},
	{
	"epoch": 0.00608,
	"grad_norm": 0.2522720396518707,
	"learning_rate": 0.00019886354541816728,
	"loss": 0.7293,
	"step": 76
	},
	{
	"epoch": 0.00616,
	"grad_norm": 0.22638511657714844,
	"learning_rate": 0.00019884753901560625,
	"loss": 0.9605,
	"step": 77
	},
	{
	"epoch": 0.00624,
	"grad_norm": 0.25353768467903137,
	"learning_rate": 0.00019883153261304523,
	"loss": 0.7305,
	"step": 78
	},
	{
	"epoch": 0.00632,
	"grad_norm": 0.26388710737228394,
	"learning_rate": 0.0001988155262104842,
	"loss": 1.0473,
	"step": 79
	},
	{
	"epoch": 0.0064,
	"grad_norm": 0.22967277467250824,
	"learning_rate": 0.00019879951980792318,
	"loss": 0.8024,
	"step": 80
	},
	{
	"epoch": 0.00648,
	"grad_norm": 0.21001595258712769,
	"learning_rate": 0.00019878351340536215,
	"loss": 0.6549,
	"step": 81
	},
	{
	"epoch": 0.00656,
	"grad_norm": 0.22154393792152405,
	"learning_rate": 0.00019876750700280113,
	"loss": 0.8322,
	"step": 82
	},
	{
	"epoch": 0.00664,
	"grad_norm": 0.2273344248533249,
	"learning_rate": 0.0001987515006002401,
	"loss": 0.8533,
	"step": 83
	},
	{
	"epoch": 0.00672,
	"grad_norm": 0.2042098492383957,
	"learning_rate": 0.00019873549419767908,
	"loss": 1.0104,
	"step": 84
	},
	{
	"epoch": 0.0068,
	"grad_norm": 0.18789270520210266,
	"learning_rate": 0.00019871948779511805,
	"loss": 0.8703,
	"step": 85
	},
	{
	"epoch": 0.00688,
	"grad_norm": 0.16704747080802917,
	"learning_rate": 0.00019870348139255703,
	"loss": 0.6079,
	"step": 86
	},
	{
	"epoch": 0.00696,
	"grad_norm": 0.20875659584999084,
	"learning_rate": 0.000198687474989996,
	"loss": 0.8806,
	"step": 87
	},
	{
	"epoch": 0.00704,
	"grad_norm": 0.17773783206939697,
	"learning_rate": 0.000198671468587435,
	"loss": 0.6195,
	"step": 88
	},
	{
	"epoch": 0.00712,
	"grad_norm": 0.20498760044574738,
	"learning_rate": 0.00019865546218487395,
	"loss": 0.8146,
	"step": 89
	},
	{
	"epoch": 0.0072,
	"grad_norm": 0.1688094437122345,
	"learning_rate": 0.00019863945578231293,
	"loss": 1.1415,
	"step": 90
	},
	{
	"epoch": 0.00728,
	"grad_norm": 0.22424210608005524,
	"learning_rate": 0.0001986234493797519,
	"loss": 0.8,
	"step": 91
	},
	{
	"epoch": 0.00736,
	"grad_norm": 0.21771728992462158,
	"learning_rate": 0.0001986074429771909,
	"loss": 0.5614,
	"step": 92
	},
	{
	"epoch": 0.00744,
	"grad_norm": 0.2241130769252777,
	"learning_rate": 0.00019859143657462985,
	"loss": 0.8084,
	"step": 93
	},
	{
	"epoch": 0.00752,
	"grad_norm": 0.1654769629240036,
	"learning_rate": 0.00019857543017206883,
	"loss": 0.687,
	"step": 94
	},
	{
	"epoch": 0.0076,
	"grad_norm": 0.16390787065029144,
	"learning_rate": 0.0001985594237695078,
	"loss": 0.5289,
	"step": 95
	},
	{
	"epoch": 0.00768,
	"grad_norm": 0.259437620639801,
	"learning_rate": 0.0001985434173669468,
	"loss": 0.5644,
	"step": 96
	},
	{
	"epoch": 0.00776,
	"grad_norm": 0.20152436196804047,
	"learning_rate": 0.00019852741096438575,
	"loss": 0.6532,
	"step": 97
	},
	{
	"epoch": 0.00784,
	"grad_norm": 0.22755707800388336,
	"learning_rate": 0.00019851140456182473,
	"loss": 0.7435,
	"step": 98
	},
	{
	"epoch": 0.00792,
	"grad_norm": 0.21967531740665436,
	"learning_rate": 0.00019849539815926373,
	"loss": 0.7607,
	"step": 99
	},
	{
	"epoch": 0.008,
	"grad_norm": 0.18391412496566772,
	"learning_rate": 0.0001984793917567027,
	"loss": 0.7239,
	"step": 100
	},
	{
	"epoch": 0.00808,
	"grad_norm": 0.2660037577152252,
	"learning_rate": 0.00019846338535414165,
	"loss": 0.7299,
	"step": 101
	},
	{
	"epoch": 0.00816,
	"grad_norm": 0.1816340982913971,
	"learning_rate": 0.00019844737895158062,
	"loss": 0.7276,
	"step": 102
	},
	{
	"epoch": 0.00824,
	"grad_norm": 0.21206796169281006,
	"learning_rate": 0.00019843137254901963,
	"loss": 0.7689,
	"step": 103
	},
	{
	"epoch": 0.00832,
	"grad_norm": 0.18705548346042633,
	"learning_rate": 0.0001984153661464586,
	"loss": 0.7199,
	"step": 104
	},
	{
	"epoch": 0.0084,
	"grad_norm": 0.2467879205942154,
	"learning_rate": 0.00019839935974389755,
	"loss": 1.0206,
	"step": 105
	},
	{
	"epoch": 0.00848,
	"grad_norm": 0.2145715057849884,
	"learning_rate": 0.00019838335334133655,
	"loss": 0.8011,
	"step": 106
	},
	{
	"epoch": 0.00856,
	"grad_norm": 0.23377610743045807,
	"learning_rate": 0.00019836734693877553,
	"loss": 0.9455,
	"step": 107
	},
	{
	"epoch": 0.00864,
	"grad_norm": 0.13857395946979523,
	"learning_rate": 0.0001983513405362145,
	"loss": 0.603,
	"step": 108
	},
	{
	"epoch": 0.00872,
	"grad_norm": 0.21066828072071075,
	"learning_rate": 0.00019833533413365345,
	"loss": 0.7147,
	"step": 109
	},
	{
	"epoch": 0.0088,
	"grad_norm": 0.22423389554023743,
	"learning_rate": 0.00019831932773109245,
	"loss": 0.7619,
	"step": 110
	},
	{
	"epoch": 0.00888,
	"grad_norm": 0.20110934972763062,
	"learning_rate": 0.00019830332132853143,
	"loss": 0.6215,
	"step": 111
	},
	{
	"epoch": 0.00896,
	"grad_norm": 0.22843226790428162,
	"learning_rate": 0.0001982873149259704,
	"loss": 0.833,
	"step": 112
	},
	{
	"epoch": 0.00904,
	"grad_norm": 0.171301007270813,
	"learning_rate": 0.00019827130852340938,
	"loss": 0.9602,
	"step": 113
	},
	{
	"epoch": 0.00912,
	"grad_norm": 0.21754777431488037,
	"learning_rate": 0.00019825530212084835,
	"loss": 0.8931,
	"step": 114
	},
	{
	"epoch": 0.0092,
	"grad_norm": 0.16314199566841125,
	"learning_rate": 0.00019823929571828732,
	"loss": 0.6414,
	"step": 115
	},
	{
	"epoch": 0.00928,
	"grad_norm": 0.17339545488357544,
	"learning_rate": 0.0001982232893157263,
	"loss": 0.8579,
	"step": 116
	},
	{
	"epoch": 0.00936,
	"grad_norm": 0.2185641974210739,
	"learning_rate": 0.00019820728291316527,
	"loss": 0.5762,
	"step": 117
	},
	{
	"epoch": 0.00944,
	"grad_norm": 0.23066163063049316,
	"learning_rate": 0.00019819127651060425,
	"loss": 0.7929,
	"step": 118
	},
	{
	"epoch": 0.00952,
	"grad_norm": 0.16946138441562653,
	"learning_rate": 0.00019817527010804322,
	"loss": 0.6734,
	"step": 119
	},
	{
	"epoch": 0.0096,
	"grad_norm": 0.16290231049060822,
	"learning_rate": 0.0001981592637054822,
	"loss": 0.4331,
	"step": 120
	},
	{
	"epoch": 0.00968,
	"grad_norm": 0.14785629510879517,
	"learning_rate": 0.00019814325730292117,
	"loss": 0.5846,
	"step": 121
	},
	{
	"epoch": 0.00976,
	"grad_norm": 0.15986767411231995,
	"learning_rate": 0.00019812725090036015,
	"loss": 0.7937,
	"step": 122
	},
	{
	"epoch": 0.00984,
	"grad_norm": 0.22597737610340118,
	"learning_rate": 0.00019811124449779912,
	"loss": 0.645,
	"step": 123
	},
	{
	"epoch": 0.00992,
	"grad_norm": 0.16873855888843536,
	"learning_rate": 0.0001980952380952381,
	"loss": 0.783,
	"step": 124
	},
	{
	"epoch": 0.01,
	"grad_norm": 0.24884037673473358,
	"learning_rate": 0.00019807923169267707,
	"loss": 0.806,
	"step": 125
	},
	{
	"epoch": 0.01008,
	"grad_norm": 0.1921387016773224,
	"learning_rate": 0.00019806322529011605,
	"loss": 0.7133,
	"step": 126
	},
	{
	"epoch": 0.01016,
	"grad_norm": 0.1714552938938141,
	"learning_rate": 0.00019804721888755505,
	"loss": 0.8823,
	"step": 127
	},
	{
	"epoch": 0.01024,
	"grad_norm": 0.17558862268924713,
	"learning_rate": 0.000198031212484994,
	"loss": 0.5438,
	"step": 128
	},
	{
	"epoch": 0.01032,
	"grad_norm": 0.20176133513450623,
	"learning_rate": 0.00019801520608243297,
	"loss": 0.6564,
	"step": 129
	},
	{
	"epoch": 0.0104,
	"grad_norm": 0.16648930311203003,
	"learning_rate": 0.00019799919967987195,
	"loss": 0.8251,
	"step": 130
	},
	{
	"epoch": 0.01048,
	"grad_norm": 0.15654760599136353,
	"learning_rate": 0.00019798319327731095,
	"loss": 0.7868,
	"step": 131
	},
	{
	"epoch": 0.01056,
	"grad_norm": 0.1604606807231903,
	"learning_rate": 0.0001979671868747499,
	"loss": 0.9408,
	"step": 132
	},
	{
	"epoch": 0.01064,
	"grad_norm": 0.1831110417842865,
	"learning_rate": 0.00019795118047218887,
	"loss": 0.6789,
	"step": 133
	},
	{
	"epoch": 0.01072,
	"grad_norm": 0.19563564658164978,
	"learning_rate": 0.00019793517406962787,
	"loss": 0.6197,
	"step": 134
	},
	{
	"epoch": 0.0108,
	"grad_norm": 0.15682204067707062,
	"learning_rate": 0.00019791916766706685,
	"loss": 0.3705,
	"step": 135
	},
	{
	"epoch": 0.01088,
	"grad_norm": 0.20387424528598785,
	"learning_rate": 0.0001979031612645058,
	"loss": 0.6203,
	"step": 136
	},
	{
	"epoch": 0.01096,
	"grad_norm": 0.18805289268493652,
	"learning_rate": 0.00019788715486194477,
	"loss": 0.5732,
	"step": 137
	},
	{
	"epoch": 0.01104,
	"grad_norm": 0.190113365650177,
	"learning_rate": 0.00019787114845938377,
	"loss": 0.4919,
	"step": 138
	},
	{
	"epoch": 0.01112,
	"grad_norm": 0.22532878816127777,
	"learning_rate": 0.00019785514205682275,
	"loss": 0.4651,
	"step": 139
	},
	{
	"epoch": 0.0112,
	"grad_norm": 0.23364323377609253,
	"learning_rate": 0.0001978391356542617,
	"loss": 0.9228,
	"step": 140
	},
	{
	"epoch": 0.01128,
	"grad_norm": 0.18550938367843628,
	"learning_rate": 0.0001978231292517007,
	"loss": 0.7556,
	"step": 141
	},
	{
	"epoch": 0.01136,
	"grad_norm": 0.21325847506523132,
	"learning_rate": 0.00019780712284913967,
	"loss": 0.6726,
	"step": 142
	},
	{
	"epoch": 0.01144,
	"grad_norm": 0.21966691315174103,
	"learning_rate": 0.00019779111644657865,
	"loss": 0.7203,
	"step": 143
	},
	{
	"epoch": 0.01152,
	"grad_norm": 0.17671513557434082,
	"learning_rate": 0.0001977751100440176,
	"loss": 0.7607,
	"step": 144
	},
	{
	"epoch": 0.0116,
	"grad_norm": 0.21979670226573944,
	"learning_rate": 0.0001977591036414566,
	"loss": 0.738,
	"step": 145
	},
	{
	"epoch": 0.01168,
	"grad_norm": 0.24346943199634552,
	"learning_rate": 0.00019774309723889557,
	"loss": 0.9575,
	"step": 146
	},
	{
	"epoch": 0.01176,
	"grad_norm": 0.17305152118206024,
	"learning_rate": 0.00019772709083633455,
	"loss": 0.7207,
	"step": 147
	},
	{
	"epoch": 0.01184,
	"grad_norm": 0.17260083556175232,
	"learning_rate": 0.0001977110844337735,
	"loss": 0.7765,
	"step": 148
	},
	{
	"epoch": 0.01192,
	"grad_norm": 0.20756393671035767,
	"learning_rate": 0.0001976950780312125,
	"loss": 0.7864,
	"step": 149
	},
	{
	"epoch": 0.012,
	"grad_norm": 0.20012526214122772,
	"learning_rate": 0.00019767907162865147,
	"loss": 0.6435,
	"step": 150
	}
	],
	"logging_steps": 1,
	"max_steps": 12500,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 1,
	"save_steps": 10,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": false
	},
	"attributes": {}
	}
	},
	"total_flos": 1.5678754959040512e+16,
	"train_batch_size": 2,
	"trial_name": null,
	"trial_params": null
	}