afroscope-model / trainer_state.json
14kwonss's picture
Upload model checkpoint-138718
4e55c46 verified
{
"best_global_step": 138718,
"best_metric": 0.9915470627263667,
"best_model_checkpoint": "/home/skwon01/scratch/sibal/finetuned_models/serengeti_camera_ready/checkpoint-138718",
"epoch": 2.0,
"eval_steps": 1000.0,
"global_step": 138718,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.007208869793393791,
"grad_norm": 2.880587577819824,
"learning_rate": 1.9985611095892387e-05,
"loss": 3.675,
"step": 500
},
{
"epoch": 0.014417739586787583,
"grad_norm": 3.1965484619140625,
"learning_rate": 1.99711933563056e-05,
"loss": 1.3703,
"step": 1000
},
{
"epoch": 0.021626609380181374,
"grad_norm": 3.587383270263672,
"learning_rate": 1.9956775616718814e-05,
"loss": 0.7317,
"step": 1500
},
{
"epoch": 0.028835479173575165,
"grad_norm": 2.73246169090271,
"learning_rate": 1.9942357877132026e-05,
"loss": 0.4764,
"step": 2000
},
{
"epoch": 0.03604434896696896,
"grad_norm": 3.9599311351776123,
"learning_rate": 1.9927940137545237e-05,
"loss": 0.3488,
"step": 2500
},
{
"epoch": 0.04325321876036275,
"grad_norm": 3.690446138381958,
"learning_rate": 1.991352239795845e-05,
"loss": 0.2729,
"step": 3000
},
{
"epoch": 0.05046208855375654,
"grad_norm": 3.0428125858306885,
"learning_rate": 1.989910465837166e-05,
"loss": 0.2249,
"step": 3500
},
{
"epoch": 0.05767095834715033,
"grad_norm": 2.6362786293029785,
"learning_rate": 1.9884686918784876e-05,
"loss": 0.1907,
"step": 4000
},
{
"epoch": 0.06487982814054413,
"grad_norm": 4.072872161865234,
"learning_rate": 1.9870269179198087e-05,
"loss": 0.1695,
"step": 4500
},
{
"epoch": 0.07208869793393792,
"grad_norm": 2.4177143573760986,
"learning_rate": 1.98558514396113e-05,
"loss": 0.1535,
"step": 5000
},
{
"epoch": 0.07929756772733171,
"grad_norm": 2.4438438415527344,
"learning_rate": 1.9841433700024514e-05,
"loss": 0.1429,
"step": 5500
},
{
"epoch": 0.0865064375207255,
"grad_norm": 1.9982225894927979,
"learning_rate": 1.9827015960437722e-05,
"loss": 0.1348,
"step": 6000
},
{
"epoch": 0.0937153073141193,
"grad_norm": 2.988769769668579,
"learning_rate": 1.9812598220850938e-05,
"loss": 0.1226,
"step": 6500
},
{
"epoch": 0.10092417710751309,
"grad_norm": 2.386380672454834,
"learning_rate": 1.979818048126415e-05,
"loss": 0.1168,
"step": 7000
},
{
"epoch": 0.10813304690090687,
"grad_norm": 1.9924527406692505,
"learning_rate": 1.978376274167736e-05,
"loss": 0.1082,
"step": 7500
},
{
"epoch": 0.11534191669430066,
"grad_norm": 1.9020510911941528,
"learning_rate": 1.9769345002090573e-05,
"loss": 0.1064,
"step": 8000
},
{
"epoch": 0.12255078648769446,
"grad_norm": 2.333510160446167,
"learning_rate": 1.9754927262503788e-05,
"loss": 0.1029,
"step": 8500
},
{
"epoch": 0.12975965628108826,
"grad_norm": 2.677407741546631,
"learning_rate": 1.9740509522917e-05,
"loss": 0.0995,
"step": 9000
},
{
"epoch": 0.13696852607448204,
"grad_norm": 1.5480279922485352,
"learning_rate": 1.972609178333021e-05,
"loss": 0.0948,
"step": 9500
},
{
"epoch": 0.14417739586787584,
"grad_norm": 2.630037546157837,
"learning_rate": 1.9711674043743423e-05,
"loss": 0.0937,
"step": 10000
},
{
"epoch": 0.15138626566126961,
"grad_norm": 2.267946243286133,
"learning_rate": 1.9697256304156634e-05,
"loss": 0.0909,
"step": 10500
},
{
"epoch": 0.15859513545466342,
"grad_norm": 2.932375907897949,
"learning_rate": 1.968283856456985e-05,
"loss": 0.0889,
"step": 11000
},
{
"epoch": 0.16580400524805722,
"grad_norm": 2.69350528717041,
"learning_rate": 1.966842082498306e-05,
"loss": 0.086,
"step": 11500
},
{
"epoch": 0.173012875041451,
"grad_norm": 2.1316378116607666,
"learning_rate": 1.9654003085396273e-05,
"loss": 0.0843,
"step": 12000
},
{
"epoch": 0.1802217448348448,
"grad_norm": 2.52103853225708,
"learning_rate": 1.9639585345809488e-05,
"loss": 0.0828,
"step": 12500
},
{
"epoch": 0.1874306146282386,
"grad_norm": 1.939334511756897,
"learning_rate": 1.9625167606222696e-05,
"loss": 0.0795,
"step": 13000
},
{
"epoch": 0.19463948442163237,
"grad_norm": 2.3057949542999268,
"learning_rate": 1.961074986663591e-05,
"loss": 0.0786,
"step": 13500
},
{
"epoch": 0.20184835421502617,
"grad_norm": 2.0021777153015137,
"learning_rate": 1.9596332127049123e-05,
"loss": 0.0773,
"step": 14000
},
{
"epoch": 0.20905722400841997,
"grad_norm": 2.276421546936035,
"learning_rate": 1.9581914387462335e-05,
"loss": 0.0772,
"step": 14500
},
{
"epoch": 0.21626609380181375,
"grad_norm": 2.426966428756714,
"learning_rate": 1.9567496647875546e-05,
"loss": 0.0746,
"step": 15000
},
{
"epoch": 0.22347496359520755,
"grad_norm": 1.984330415725708,
"learning_rate": 1.955307890828876e-05,
"loss": 0.074,
"step": 15500
},
{
"epoch": 0.23068383338860132,
"grad_norm": 2.1131157875061035,
"learning_rate": 1.9538661168701973e-05,
"loss": 0.0754,
"step": 16000
},
{
"epoch": 0.23789270318199512,
"grad_norm": 2.672717332839966,
"learning_rate": 1.9524243429115185e-05,
"loss": 0.0719,
"step": 16500
},
{
"epoch": 0.24510157297538893,
"grad_norm": 1.4720840454101562,
"learning_rate": 1.9509825689528396e-05,
"loss": 0.0689,
"step": 17000
},
{
"epoch": 0.25231044276878273,
"grad_norm": 1.7824233770370483,
"learning_rate": 1.9495407949941608e-05,
"loss": 0.0711,
"step": 17500
},
{
"epoch": 0.25951931256217653,
"grad_norm": 1.7139828205108643,
"learning_rate": 1.9480990210354823e-05,
"loss": 0.067,
"step": 18000
},
{
"epoch": 0.2667281823555703,
"grad_norm": 2.2731082439422607,
"learning_rate": 1.9466572470768035e-05,
"loss": 0.0678,
"step": 18500
},
{
"epoch": 0.2739370521489641,
"grad_norm": 2.2537448406219482,
"learning_rate": 1.9452154731181247e-05,
"loss": 0.0657,
"step": 19000
},
{
"epoch": 0.2811459219423579,
"grad_norm": 3.0216615200042725,
"learning_rate": 1.943773699159446e-05,
"loss": 0.0656,
"step": 19500
},
{
"epoch": 0.2883547917357517,
"grad_norm": 1.4544578790664673,
"learning_rate": 1.942331925200767e-05,
"loss": 0.0658,
"step": 20000
},
{
"epoch": 0.2955636615291455,
"grad_norm": 2.4549198150634766,
"learning_rate": 1.9408901512420885e-05,
"loss": 0.0641,
"step": 20500
},
{
"epoch": 0.30277253132253923,
"grad_norm": 1.514060616493225,
"learning_rate": 1.9394483772834097e-05,
"loss": 0.0633,
"step": 21000
},
{
"epoch": 0.30998140111593303,
"grad_norm": 2.4346635341644287,
"learning_rate": 1.9380066033247308e-05,
"loss": 0.0627,
"step": 21500
},
{
"epoch": 0.31719027090932683,
"grad_norm": 1.432133436203003,
"learning_rate": 1.9365648293660523e-05,
"loss": 0.0616,
"step": 22000
},
{
"epoch": 0.32439914070272063,
"grad_norm": 1.2359411716461182,
"learning_rate": 1.9351230554073735e-05,
"loss": 0.0628,
"step": 22500
},
{
"epoch": 0.33160801049611444,
"grad_norm": 2.1902575492858887,
"learning_rate": 1.9336812814486947e-05,
"loss": 0.0628,
"step": 23000
},
{
"epoch": 0.33881688028950824,
"grad_norm": 1.7415978908538818,
"learning_rate": 1.932239507490016e-05,
"loss": 0.0616,
"step": 23500
},
{
"epoch": 0.346025750082902,
"grad_norm": 1.401383399963379,
"learning_rate": 1.930797733531337e-05,
"loss": 0.0589,
"step": 24000
},
{
"epoch": 0.3532346198762958,
"grad_norm": 1.5828105211257935,
"learning_rate": 1.9293559595726582e-05,
"loss": 0.0604,
"step": 24500
},
{
"epoch": 0.3604434896696896,
"grad_norm": 0.8541142344474792,
"learning_rate": 1.9279141856139797e-05,
"loss": 0.0599,
"step": 25000
},
{
"epoch": 0.3676523594630834,
"grad_norm": 2.8157145977020264,
"learning_rate": 1.926472411655301e-05,
"loss": 0.0593,
"step": 25500
},
{
"epoch": 0.3748612292564772,
"grad_norm": 2.129725217819214,
"learning_rate": 1.925030637696622e-05,
"loss": 0.0578,
"step": 26000
},
{
"epoch": 0.38207009904987094,
"grad_norm": 2.5838279724121094,
"learning_rate": 1.9235888637379435e-05,
"loss": 0.0574,
"step": 26500
},
{
"epoch": 0.38927896884326474,
"grad_norm": 1.7000998258590698,
"learning_rate": 1.9221470897792647e-05,
"loss": 0.0553,
"step": 27000
},
{
"epoch": 0.39648783863665854,
"grad_norm": 1.2641727924346924,
"learning_rate": 1.920705315820586e-05,
"loss": 0.0549,
"step": 27500
},
{
"epoch": 0.40369670843005234,
"grad_norm": 1.7529101371765137,
"learning_rate": 1.919263541861907e-05,
"loss": 0.0562,
"step": 28000
},
{
"epoch": 0.41090557822344614,
"grad_norm": 1.4027022123336792,
"learning_rate": 1.9178217679032282e-05,
"loss": 0.0552,
"step": 28500
},
{
"epoch": 0.41811444801683995,
"grad_norm": 1.6767141819000244,
"learning_rate": 1.9163799939445497e-05,
"loss": 0.0572,
"step": 29000
},
{
"epoch": 0.4253233178102337,
"grad_norm": 0.8946545720100403,
"learning_rate": 1.914938219985871e-05,
"loss": 0.0556,
"step": 29500
},
{
"epoch": 0.4325321876036275,
"grad_norm": 2.469862937927246,
"learning_rate": 1.913496446027192e-05,
"loss": 0.0546,
"step": 30000
},
{
"epoch": 0.4397410573970213,
"grad_norm": 3.368171215057373,
"learning_rate": 1.9120546720685132e-05,
"loss": 0.0527,
"step": 30500
},
{
"epoch": 0.4469499271904151,
"grad_norm": 2.107477903366089,
"learning_rate": 1.9106128981098344e-05,
"loss": 0.0538,
"step": 31000
},
{
"epoch": 0.4541587969838089,
"grad_norm": 1.8676276206970215,
"learning_rate": 1.9091711241511555e-05,
"loss": 0.0529,
"step": 31500
},
{
"epoch": 0.46136766677720265,
"grad_norm": 1.8789501190185547,
"learning_rate": 1.907729350192477e-05,
"loss": 0.0525,
"step": 32000
},
{
"epoch": 0.46857653657059645,
"grad_norm": 1.8588016033172607,
"learning_rate": 1.9062875762337982e-05,
"loss": 0.0519,
"step": 32500
},
{
"epoch": 0.47578540636399025,
"grad_norm": 1.6721725463867188,
"learning_rate": 1.9048458022751194e-05,
"loss": 0.0508,
"step": 33000
},
{
"epoch": 0.48299427615738405,
"grad_norm": 1.9724555015563965,
"learning_rate": 1.903404028316441e-05,
"loss": 0.0502,
"step": 33500
},
{
"epoch": 0.49020314595077785,
"grad_norm": 1.9921311140060425,
"learning_rate": 1.901962254357762e-05,
"loss": 0.051,
"step": 34000
},
{
"epoch": 0.49741201574417165,
"grad_norm": 2.889782190322876,
"learning_rate": 1.9005204803990832e-05,
"loss": 0.0518,
"step": 34500
},
{
"epoch": 0.5046208855375655,
"grad_norm": 1.7622694969177246,
"learning_rate": 1.8990787064404044e-05,
"loss": 0.0494,
"step": 35000
},
{
"epoch": 0.5118297553309592,
"grad_norm": 1.713699460029602,
"learning_rate": 1.8976369324817256e-05,
"loss": 0.0493,
"step": 35500
},
{
"epoch": 0.5190386251243531,
"grad_norm": 1.262862205505371,
"learning_rate": 1.896195158523047e-05,
"loss": 0.0496,
"step": 36000
},
{
"epoch": 0.5262474949177468,
"grad_norm": 2.085010051727295,
"learning_rate": 1.8947533845643682e-05,
"loss": 0.0509,
"step": 36500
},
{
"epoch": 0.5334563647111406,
"grad_norm": 1.6257765293121338,
"learning_rate": 1.8933116106056894e-05,
"loss": 0.0498,
"step": 37000
},
{
"epoch": 0.5406652345045344,
"grad_norm": 0.6558777093887329,
"learning_rate": 1.8918698366470106e-05,
"loss": 0.0484,
"step": 37500
},
{
"epoch": 0.5478741042979282,
"grad_norm": 1.7351698875427246,
"learning_rate": 1.8904280626883318e-05,
"loss": 0.0496,
"step": 38000
},
{
"epoch": 0.555082974091322,
"grad_norm": 0.915392279624939,
"learning_rate": 1.888986288729653e-05,
"loss": 0.0467,
"step": 38500
},
{
"epoch": 0.5622918438847158,
"grad_norm": 0.9719710350036621,
"learning_rate": 1.8875445147709744e-05,
"loss": 0.0491,
"step": 39000
},
{
"epoch": 0.5695007136781095,
"grad_norm": 0.4347970485687256,
"learning_rate": 1.8861027408122956e-05,
"loss": 0.0478,
"step": 39500
},
{
"epoch": 0.5767095834715034,
"grad_norm": 1.4013206958770752,
"learning_rate": 1.8846609668536168e-05,
"loss": 0.0482,
"step": 40000
},
{
"epoch": 0.5839184532648971,
"grad_norm": 1.6916135549545288,
"learning_rate": 1.8832191928949383e-05,
"loss": 0.0487,
"step": 40500
},
{
"epoch": 0.591127323058291,
"grad_norm": 1.1497479677200317,
"learning_rate": 1.8817774189362594e-05,
"loss": 0.0473,
"step": 41000
},
{
"epoch": 0.5983361928516847,
"grad_norm": 2.1202707290649414,
"learning_rate": 1.8803356449775806e-05,
"loss": 0.046,
"step": 41500
},
{
"epoch": 0.6055450626450785,
"grad_norm": 1.8288294076919556,
"learning_rate": 1.8788938710189018e-05,
"loss": 0.0473,
"step": 42000
},
{
"epoch": 0.6127539324384723,
"grad_norm": 0.8600142598152161,
"learning_rate": 1.877452097060223e-05,
"loss": 0.0452,
"step": 42500
},
{
"epoch": 0.6199628022318661,
"grad_norm": 2.8069839477539062,
"learning_rate": 1.8760103231015445e-05,
"loss": 0.048,
"step": 43000
},
{
"epoch": 0.6271716720252599,
"grad_norm": 0.8850429058074951,
"learning_rate": 1.8745685491428656e-05,
"loss": 0.0474,
"step": 43500
},
{
"epoch": 0.6343805418186537,
"grad_norm": 1.063219666481018,
"learning_rate": 1.8731267751841868e-05,
"loss": 0.0446,
"step": 44000
},
{
"epoch": 0.6415894116120474,
"grad_norm": 1.3925724029541016,
"learning_rate": 1.871685001225508e-05,
"loss": 0.0468,
"step": 44500
},
{
"epoch": 0.6487982814054413,
"grad_norm": 0.9575428366661072,
"learning_rate": 1.870243227266829e-05,
"loss": 0.0447,
"step": 45000
},
{
"epoch": 0.656007151198835,
"grad_norm": 2.547752618789673,
"learning_rate": 1.8688014533081503e-05,
"loss": 0.0456,
"step": 45500
},
{
"epoch": 0.6632160209922289,
"grad_norm": 0.6029974222183228,
"learning_rate": 1.8673596793494718e-05,
"loss": 0.0464,
"step": 46000
},
{
"epoch": 0.6704248907856226,
"grad_norm": 0.27106812596321106,
"learning_rate": 1.865917905390793e-05,
"loss": 0.0437,
"step": 46500
},
{
"epoch": 0.6776337605790165,
"grad_norm": 1.3233801126480103,
"learning_rate": 1.864476131432114e-05,
"loss": 0.0447,
"step": 47000
},
{
"epoch": 0.6848426303724102,
"grad_norm": 0.38903898000717163,
"learning_rate": 1.8630343574734356e-05,
"loss": 0.0455,
"step": 47500
},
{
"epoch": 0.692051500165804,
"grad_norm": 1.247036337852478,
"learning_rate": 1.8615925835147568e-05,
"loss": 0.044,
"step": 48000
},
{
"epoch": 0.6992603699591978,
"grad_norm": 0.9771102666854858,
"learning_rate": 1.860150809556078e-05,
"loss": 0.0446,
"step": 48500
},
{
"epoch": 0.7064692397525916,
"grad_norm": 1.6191680431365967,
"learning_rate": 1.858709035597399e-05,
"loss": 0.0455,
"step": 49000
},
{
"epoch": 0.7136781095459854,
"grad_norm": 0.9542379975318909,
"learning_rate": 1.8572672616387203e-05,
"loss": 0.0426,
"step": 49500
},
{
"epoch": 0.7208869793393792,
"grad_norm": 1.6160619258880615,
"learning_rate": 1.8558254876800418e-05,
"loss": 0.0433,
"step": 50000
},
{
"epoch": 0.7280958491327729,
"grad_norm": 1.1810977458953857,
"learning_rate": 1.854383713721363e-05,
"loss": 0.0443,
"step": 50500
},
{
"epoch": 0.7353047189261668,
"grad_norm": 1.4848960638046265,
"learning_rate": 1.852941939762684e-05,
"loss": 0.0442,
"step": 51000
},
{
"epoch": 0.7425135887195605,
"grad_norm": 1.2140188217163086,
"learning_rate": 1.8515001658040053e-05,
"loss": 0.0436,
"step": 51500
},
{
"epoch": 0.7497224585129544,
"grad_norm": 0.6803346276283264,
"learning_rate": 1.8500583918453265e-05,
"loss": 0.0416,
"step": 52000
},
{
"epoch": 0.7569313283063481,
"grad_norm": 2.847879409790039,
"learning_rate": 1.8486166178866477e-05,
"loss": 0.0401,
"step": 52500
},
{
"epoch": 0.7641401980997419,
"grad_norm": 1.3574286699295044,
"learning_rate": 1.8471748439279692e-05,
"loss": 0.0426,
"step": 53000
},
{
"epoch": 0.7713490678931357,
"grad_norm": 1.5763428211212158,
"learning_rate": 1.8457330699692903e-05,
"loss": 0.0416,
"step": 53500
},
{
"epoch": 0.7785579376865295,
"grad_norm": 2.006143808364868,
"learning_rate": 1.8442912960106115e-05,
"loss": 0.0423,
"step": 54000
},
{
"epoch": 0.7857668074799233,
"grad_norm": 2.0041260719299316,
"learning_rate": 1.842849522051933e-05,
"loss": 0.043,
"step": 54500
},
{
"epoch": 0.7929756772733171,
"grad_norm": 1.0083436965942383,
"learning_rate": 1.8414077480932542e-05,
"loss": 0.0428,
"step": 55000
},
{
"epoch": 0.8001845470667108,
"grad_norm": 1.2364863157272339,
"learning_rate": 1.8399659741345754e-05,
"loss": 0.0431,
"step": 55500
},
{
"epoch": 0.8073934168601047,
"grad_norm": 1.1397020816802979,
"learning_rate": 1.8385242001758965e-05,
"loss": 0.0408,
"step": 56000
},
{
"epoch": 0.8146022866534984,
"grad_norm": 1.046647071838379,
"learning_rate": 1.8370824262172177e-05,
"loss": 0.0424,
"step": 56500
},
{
"epoch": 0.8218111564468923,
"grad_norm": 0.7180289626121521,
"learning_rate": 1.8356406522585392e-05,
"loss": 0.0417,
"step": 57000
},
{
"epoch": 0.829020026240286,
"grad_norm": 1.866095781326294,
"learning_rate": 1.8341988782998604e-05,
"loss": 0.0406,
"step": 57500
},
{
"epoch": 0.8362288960336799,
"grad_norm": 1.7192025184631348,
"learning_rate": 1.8327571043411815e-05,
"loss": 0.042,
"step": 58000
},
{
"epoch": 0.8434377658270736,
"grad_norm": 1.3043447732925415,
"learning_rate": 1.8313153303825027e-05,
"loss": 0.0419,
"step": 58500
},
{
"epoch": 0.8506466356204674,
"grad_norm": 2.372190237045288,
"learning_rate": 1.829873556423824e-05,
"loss": 0.0421,
"step": 59000
},
{
"epoch": 0.8578555054138612,
"grad_norm": 0.9028930068016052,
"learning_rate": 1.828431782465145e-05,
"loss": 0.0396,
"step": 59500
},
{
"epoch": 0.865064375207255,
"grad_norm": 1.2869058847427368,
"learning_rate": 1.8269900085064665e-05,
"loss": 0.0401,
"step": 60000
},
{
"epoch": 0.8722732450006488,
"grad_norm": 2.214855670928955,
"learning_rate": 1.8255482345477877e-05,
"loss": 0.04,
"step": 60500
},
{
"epoch": 0.8794821147940426,
"grad_norm": 0.9826574325561523,
"learning_rate": 1.824106460589109e-05,
"loss": 0.0397,
"step": 61000
},
{
"epoch": 0.8866909845874363,
"grad_norm": 0.7741074562072754,
"learning_rate": 1.8226646866304304e-05,
"loss": 0.0397,
"step": 61500
},
{
"epoch": 0.8938998543808302,
"grad_norm": 1.2778081893920898,
"learning_rate": 1.8212229126717516e-05,
"loss": 0.0396,
"step": 62000
},
{
"epoch": 0.9011087241742239,
"grad_norm": 0.7415226697921753,
"learning_rate": 1.8197811387130727e-05,
"loss": 0.0398,
"step": 62500
},
{
"epoch": 0.9083175939676178,
"grad_norm": 2.152737617492676,
"learning_rate": 1.818339364754394e-05,
"loss": 0.0395,
"step": 63000
},
{
"epoch": 0.9155264637610115,
"grad_norm": 0.9719590544700623,
"learning_rate": 1.816897590795715e-05,
"loss": 0.0387,
"step": 63500
},
{
"epoch": 0.9227353335544053,
"grad_norm": 1.4587551355361938,
"learning_rate": 1.8154558168370366e-05,
"loss": 0.0395,
"step": 64000
},
{
"epoch": 0.9299442033477991,
"grad_norm": 1.4218809604644775,
"learning_rate": 1.8140140428783577e-05,
"loss": 0.0375,
"step": 64500
},
{
"epoch": 0.9371530731411929,
"grad_norm": 1.8009737730026245,
"learning_rate": 1.812572268919679e-05,
"loss": 0.0387,
"step": 65000
},
{
"epoch": 0.9443619429345868,
"grad_norm": 1.2379016876220703,
"learning_rate": 1.811130494961e-05,
"loss": 0.0386,
"step": 65500
},
{
"epoch": 0.9515708127279805,
"grad_norm": 1.1901589632034302,
"learning_rate": 1.8096887210023216e-05,
"loss": 0.0381,
"step": 66000
},
{
"epoch": 0.9587796825213742,
"grad_norm": 1.0341569185256958,
"learning_rate": 1.8082469470436424e-05,
"loss": 0.0402,
"step": 66500
},
{
"epoch": 0.9659885523147681,
"grad_norm": 1.4235957860946655,
"learning_rate": 1.806805173084964e-05,
"loss": 0.0382,
"step": 67000
},
{
"epoch": 0.9731974221081618,
"grad_norm": 1.095893383026123,
"learning_rate": 1.805363399126285e-05,
"loss": 0.0396,
"step": 67500
},
{
"epoch": 0.9804062919015557,
"grad_norm": 1.8859561681747437,
"learning_rate": 1.8039216251676063e-05,
"loss": 0.038,
"step": 68000
},
{
"epoch": 0.9876151616949495,
"grad_norm": 1.8770360946655273,
"learning_rate": 1.8024798512089278e-05,
"loss": 0.039,
"step": 68500
},
{
"epoch": 0.9948240314883433,
"grad_norm": 1.870827555656433,
"learning_rate": 1.801038077250249e-05,
"loss": 0.038,
"step": 69000
},
{
"epoch": 1.0,
"eval_f1": 0.9895049158009324,
"eval_loss": 0.034001659601926804,
"eval_runtime": 683.1241,
"eval_samples_per_second": 1528.989,
"eval_steps_per_second": 47.782,
"step": 69359
},
{
"epoch": 1.002032901281737,
"grad_norm": 0.4856395125389099,
"learning_rate": 1.79959630329157e-05,
"loss": 0.0352,
"step": 69500
},
{
"epoch": 1.009241771075131,
"grad_norm": 1.8835086822509766,
"learning_rate": 1.7981545293328913e-05,
"loss": 0.0287,
"step": 70000
},
{
"epoch": 1.0164506408685245,
"grad_norm": 1.941490888595581,
"learning_rate": 1.7967127553742124e-05,
"loss": 0.0307,
"step": 70500
},
{
"epoch": 1.0236595106619184,
"grad_norm": 1.525707483291626,
"learning_rate": 1.795270981415534e-05,
"loss": 0.03,
"step": 71000
},
{
"epoch": 1.0308683804553123,
"grad_norm": 0.6174446940422058,
"learning_rate": 1.793829207456855e-05,
"loss": 0.029,
"step": 71500
},
{
"epoch": 1.0380772502487061,
"grad_norm": 1.043771505355835,
"learning_rate": 1.7923874334981763e-05,
"loss": 0.0311,
"step": 72000
},
{
"epoch": 1.0452861200420998,
"grad_norm": 0.28765255212783813,
"learning_rate": 1.7909456595394978e-05,
"loss": 0.0291,
"step": 72500
},
{
"epoch": 1.0524949898354936,
"grad_norm": 0.8367669582366943,
"learning_rate": 1.789503885580819e-05,
"loss": 0.0307,
"step": 73000
},
{
"epoch": 1.0597038596288875,
"grad_norm": 0.8930952548980713,
"learning_rate": 1.7880621116221398e-05,
"loss": 0.0297,
"step": 73500
},
{
"epoch": 1.066912729422281,
"grad_norm": 1.0413399934768677,
"learning_rate": 1.7866203376634613e-05,
"loss": 0.03,
"step": 74000
},
{
"epoch": 1.074121599215675,
"grad_norm": 1.1929751634597778,
"learning_rate": 1.7851785637047825e-05,
"loss": 0.0287,
"step": 74500
},
{
"epoch": 1.0813304690090688,
"grad_norm": 0.8676954507827759,
"learning_rate": 1.7837367897461036e-05,
"loss": 0.0307,
"step": 75000
},
{
"epoch": 1.0885393388024625,
"grad_norm": 0.733383059501648,
"learning_rate": 1.782295015787425e-05,
"loss": 0.029,
"step": 75500
},
{
"epoch": 1.0957482085958563,
"grad_norm": 1.005913257598877,
"learning_rate": 1.7808532418287463e-05,
"loss": 0.0288,
"step": 76000
},
{
"epoch": 1.1029570783892502,
"grad_norm": 1.4946510791778564,
"learning_rate": 1.7794114678700675e-05,
"loss": 0.0294,
"step": 76500
},
{
"epoch": 1.110165948182644,
"grad_norm": 0.966665506362915,
"learning_rate": 1.7779696939113886e-05,
"loss": 0.0311,
"step": 77000
},
{
"epoch": 1.1173748179760377,
"grad_norm": 0.8129379749298096,
"learning_rate": 1.7765279199527098e-05,
"loss": 0.0301,
"step": 77500
},
{
"epoch": 1.1245836877694315,
"grad_norm": 1.1672717332839966,
"learning_rate": 1.7750861459940313e-05,
"loss": 0.0297,
"step": 78000
},
{
"epoch": 1.1317925575628254,
"grad_norm": 1.0149409770965576,
"learning_rate": 1.7736443720353525e-05,
"loss": 0.031,
"step": 78500
},
{
"epoch": 1.139001427356219,
"grad_norm": 1.3319754600524902,
"learning_rate": 1.7722025980766736e-05,
"loss": 0.0294,
"step": 79000
},
{
"epoch": 1.1462102971496129,
"grad_norm": 3.036787509918213,
"learning_rate": 1.770760824117995e-05,
"loss": 0.0294,
"step": 79500
},
{
"epoch": 1.1534191669430067,
"grad_norm": 0.6281238198280334,
"learning_rate": 1.7693190501593163e-05,
"loss": 0.0312,
"step": 80000
},
{
"epoch": 1.1606280367364006,
"grad_norm": 1.39284086227417,
"learning_rate": 1.767877276200637e-05,
"loss": 0.0299,
"step": 80500
},
{
"epoch": 1.1678369065297942,
"grad_norm": 2.4636764526367188,
"learning_rate": 1.7664355022419587e-05,
"loss": 0.0304,
"step": 81000
},
{
"epoch": 1.175045776323188,
"grad_norm": 1.0513309240341187,
"learning_rate": 1.7649937282832798e-05,
"loss": 0.0293,
"step": 81500
},
{
"epoch": 1.182254646116582,
"grad_norm": 0.739205539226532,
"learning_rate": 1.763551954324601e-05,
"loss": 0.0297,
"step": 82000
},
{
"epoch": 1.1894635159099756,
"grad_norm": 1.1646817922592163,
"learning_rate": 1.7621101803659225e-05,
"loss": 0.0281,
"step": 82500
},
{
"epoch": 1.1966723857033694,
"grad_norm": 1.6882481575012207,
"learning_rate": 1.7606684064072437e-05,
"loss": 0.0308,
"step": 83000
},
{
"epoch": 1.2038812554967633,
"grad_norm": 2.1905980110168457,
"learning_rate": 1.759226632448565e-05,
"loss": 0.0301,
"step": 83500
},
{
"epoch": 1.211090125290157,
"grad_norm": 0.4102253317832947,
"learning_rate": 1.757784858489886e-05,
"loss": 0.0296,
"step": 84000
},
{
"epoch": 1.2182989950835508,
"grad_norm": 1.5355827808380127,
"learning_rate": 1.7563430845312072e-05,
"loss": 0.031,
"step": 84500
},
{
"epoch": 1.2255078648769446,
"grad_norm": 0.4144400954246521,
"learning_rate": 1.7549013105725287e-05,
"loss": 0.0303,
"step": 85000
},
{
"epoch": 1.2327167346703383,
"grad_norm": 0.5286178588867188,
"learning_rate": 1.75345953661385e-05,
"loss": 0.0311,
"step": 85500
},
{
"epoch": 1.2399256044637321,
"grad_norm": 1.3401720523834229,
"learning_rate": 1.752017762655171e-05,
"loss": 0.0303,
"step": 86000
},
{
"epoch": 1.247134474257126,
"grad_norm": 1.5546993017196655,
"learning_rate": 1.7505759886964925e-05,
"loss": 0.0296,
"step": 86500
},
{
"epoch": 1.2543433440505198,
"grad_norm": 1.7993361949920654,
"learning_rate": 1.7491342147378137e-05,
"loss": 0.03,
"step": 87000
},
{
"epoch": 1.2615522138439135,
"grad_norm": 1.058311939239502,
"learning_rate": 1.7476924407791345e-05,
"loss": 0.0283,
"step": 87500
},
{
"epoch": 1.2687610836373073,
"grad_norm": 1.1616915464401245,
"learning_rate": 1.746250666820456e-05,
"loss": 0.0306,
"step": 88000
},
{
"epoch": 1.2759699534307012,
"grad_norm": 1.5120762586593628,
"learning_rate": 1.7448088928617772e-05,
"loss": 0.0296,
"step": 88500
},
{
"epoch": 1.283178823224095,
"grad_norm": 1.033087134361267,
"learning_rate": 1.7433671189030984e-05,
"loss": 0.0296,
"step": 89000
},
{
"epoch": 1.2903876930174887,
"grad_norm": 0.9456692337989807,
"learning_rate": 1.74192534494442e-05,
"loss": 0.0293,
"step": 89500
},
{
"epoch": 1.2975965628108825,
"grad_norm": 0.4252309799194336,
"learning_rate": 1.740483570985741e-05,
"loss": 0.0287,
"step": 90000
},
{
"epoch": 1.3048054326042764,
"grad_norm": 1.4315825700759888,
"learning_rate": 1.7390417970270622e-05,
"loss": 0.0314,
"step": 90500
},
{
"epoch": 1.31201430239767,
"grad_norm": 0.9023242592811584,
"learning_rate": 1.7376000230683834e-05,
"loss": 0.0296,
"step": 91000
},
{
"epoch": 1.3192231721910639,
"grad_norm": 1.8055963516235352,
"learning_rate": 1.7361582491097045e-05,
"loss": 0.0289,
"step": 91500
},
{
"epoch": 1.3264320419844577,
"grad_norm": 1.2063618898391724,
"learning_rate": 1.734716475151026e-05,
"loss": 0.03,
"step": 92000
},
{
"epoch": 1.3336409117778514,
"grad_norm": 2.5645272731781006,
"learning_rate": 1.7332747011923472e-05,
"loss": 0.0289,
"step": 92500
},
{
"epoch": 1.3408497815712452,
"grad_norm": 1.9335203170776367,
"learning_rate": 1.7318329272336684e-05,
"loss": 0.0285,
"step": 93000
},
{
"epoch": 1.348058651364639,
"grad_norm": 0.8842147588729858,
"learning_rate": 1.73039115327499e-05,
"loss": 0.0287,
"step": 93500
},
{
"epoch": 1.3552675211580327,
"grad_norm": 1.2006937265396118,
"learning_rate": 1.728949379316311e-05,
"loss": 0.0288,
"step": 94000
},
{
"epoch": 1.3624763909514266,
"grad_norm": 1.1261006593704224,
"learning_rate": 1.7275076053576322e-05,
"loss": 0.0293,
"step": 94500
},
{
"epoch": 1.3696852607448204,
"grad_norm": 1.2065215110778809,
"learning_rate": 1.7260658313989534e-05,
"loss": 0.0282,
"step": 95000
},
{
"epoch": 1.3768941305382143,
"grad_norm": 1.8486534357070923,
"learning_rate": 1.7246240574402746e-05,
"loss": 0.029,
"step": 95500
},
{
"epoch": 1.384103000331608,
"grad_norm": 0.8908069729804993,
"learning_rate": 1.7231822834815957e-05,
"loss": 0.0294,
"step": 96000
},
{
"epoch": 1.3913118701250018,
"grad_norm": 0.6375325918197632,
"learning_rate": 1.7217405095229172e-05,
"loss": 0.0287,
"step": 96500
},
{
"epoch": 1.3985207399183957,
"grad_norm": 1.9673434495925903,
"learning_rate": 1.7202987355642384e-05,
"loss": 0.0282,
"step": 97000
},
{
"epoch": 1.4057296097117895,
"grad_norm": 1.1606006622314453,
"learning_rate": 1.7188569616055596e-05,
"loss": 0.0284,
"step": 97500
},
{
"epoch": 1.4129384795051831,
"grad_norm": 1.003493309020996,
"learning_rate": 1.7174151876468807e-05,
"loss": 0.0283,
"step": 98000
},
{
"epoch": 1.420147349298577,
"grad_norm": 0.9186868071556091,
"learning_rate": 1.715973413688202e-05,
"loss": 0.0277,
"step": 98500
},
{
"epoch": 1.4273562190919709,
"grad_norm": 1.3305683135986328,
"learning_rate": 1.7145316397295234e-05,
"loss": 0.0292,
"step": 99000
},
{
"epoch": 1.4345650888853645,
"grad_norm": 1.3776835203170776,
"learning_rate": 1.7130898657708446e-05,
"loss": 0.0286,
"step": 99500
},
{
"epoch": 1.4417739586787583,
"grad_norm": 1.6687921285629272,
"learning_rate": 1.7116480918121658e-05,
"loss": 0.029,
"step": 100000
},
{
"epoch": 1.4489828284721522,
"grad_norm": 1.9249308109283447,
"learning_rate": 1.7102063178534873e-05,
"loss": 0.0262,
"step": 100500
},
{
"epoch": 1.4561916982655458,
"grad_norm": 1.1834752559661865,
"learning_rate": 1.7087645438948084e-05,
"loss": 0.0294,
"step": 101000
},
{
"epoch": 1.4634005680589397,
"grad_norm": 2.1350696086883545,
"learning_rate": 1.7073227699361296e-05,
"loss": 0.0276,
"step": 101500
},
{
"epoch": 1.4706094378523336,
"grad_norm": 2.563725709915161,
"learning_rate": 1.7058809959774508e-05,
"loss": 0.0276,
"step": 102000
},
{
"epoch": 1.4778183076457272,
"grad_norm": 0.9226647019386292,
"learning_rate": 1.704439222018772e-05,
"loss": 0.0284,
"step": 102500
},
{
"epoch": 1.485027177439121,
"grad_norm": 0.34231990575790405,
"learning_rate": 1.702997448060093e-05,
"loss": 0.0281,
"step": 103000
},
{
"epoch": 1.492236047232515,
"grad_norm": 2.339191436767578,
"learning_rate": 1.7015556741014146e-05,
"loss": 0.029,
"step": 103500
},
{
"epoch": 1.4994449170259085,
"grad_norm": 1.7756520509719849,
"learning_rate": 1.7001139001427358e-05,
"loss": 0.0288,
"step": 104000
},
{
"epoch": 1.5066537868193026,
"grad_norm": 2.0807387828826904,
"learning_rate": 1.698672126184057e-05,
"loss": 0.0281,
"step": 104500
},
{
"epoch": 1.5138626566126963,
"grad_norm": 1.4787542819976807,
"learning_rate": 1.6972303522253785e-05,
"loss": 0.0284,
"step": 105000
},
{
"epoch": 1.52107152640609,
"grad_norm": 1.719581961631775,
"learning_rate": 1.6957885782666993e-05,
"loss": 0.0287,
"step": 105500
},
{
"epoch": 1.528280396199484,
"grad_norm": 0.8158332109451294,
"learning_rate": 1.6943468043080208e-05,
"loss": 0.029,
"step": 106000
},
{
"epoch": 1.5354892659928776,
"grad_norm": 0.10212863981723785,
"learning_rate": 1.692905030349342e-05,
"loss": 0.0275,
"step": 106500
},
{
"epoch": 1.5426981357862715,
"grad_norm": 1.0970171689987183,
"learning_rate": 1.691463256390663e-05,
"loss": 0.0282,
"step": 107000
},
{
"epoch": 1.5499070055796653,
"grad_norm": 0.4221758246421814,
"learning_rate": 1.6900214824319846e-05,
"loss": 0.0285,
"step": 107500
},
{
"epoch": 1.557115875373059,
"grad_norm": 1.5400525331497192,
"learning_rate": 1.6885797084733058e-05,
"loss": 0.0282,
"step": 108000
},
{
"epoch": 1.5643247451664528,
"grad_norm": 1.6638318300247192,
"learning_rate": 1.687137934514627e-05,
"loss": 0.0301,
"step": 108500
},
{
"epoch": 1.5715336149598467,
"grad_norm": 1.3407906293869019,
"learning_rate": 1.685696160555948e-05,
"loss": 0.0276,
"step": 109000
},
{
"epoch": 1.5787424847532403,
"grad_norm": 0.8864063024520874,
"learning_rate": 1.6842543865972693e-05,
"loss": 0.0273,
"step": 109500
},
{
"epoch": 1.5859513545466342,
"grad_norm": 1.5699615478515625,
"learning_rate": 1.6828126126385905e-05,
"loss": 0.0267,
"step": 110000
},
{
"epoch": 1.593160224340028,
"grad_norm": 0.20337066054344177,
"learning_rate": 1.681370838679912e-05,
"loss": 0.0285,
"step": 110500
},
{
"epoch": 1.6003690941334217,
"grad_norm": 0.7260587811470032,
"learning_rate": 1.679929064721233e-05,
"loss": 0.028,
"step": 111000
},
{
"epoch": 1.6075779639268155,
"grad_norm": 0.434865266084671,
"learning_rate": 1.6784872907625543e-05,
"loss": 0.027,
"step": 111500
},
{
"epoch": 1.6147868337202094,
"grad_norm": 1.0067859888076782,
"learning_rate": 1.677045516803876e-05,
"loss": 0.0276,
"step": 112000
},
{
"epoch": 1.621995703513603,
"grad_norm": 1.7014882564544678,
"learning_rate": 1.6756037428451967e-05,
"loss": 0.0276,
"step": 112500
},
{
"epoch": 1.629204573306997,
"grad_norm": 1.2809230089187622,
"learning_rate": 1.674161968886518e-05,
"loss": 0.0276,
"step": 113000
},
{
"epoch": 1.6364134431003907,
"grad_norm": 1.2574232816696167,
"learning_rate": 1.6727201949278393e-05,
"loss": 0.0284,
"step": 113500
},
{
"epoch": 1.6436223128937844,
"grad_norm": 1.3797274827957153,
"learning_rate": 1.6712784209691605e-05,
"loss": 0.0282,
"step": 114000
},
{
"epoch": 1.6508311826871784,
"grad_norm": 0.32101693749427795,
"learning_rate": 1.669836647010482e-05,
"loss": 0.0274,
"step": 114500
},
{
"epoch": 1.658040052480572,
"grad_norm": 0.41121360659599304,
"learning_rate": 1.6683948730518032e-05,
"loss": 0.0286,
"step": 115000
},
{
"epoch": 1.665248922273966,
"grad_norm": 0.5161770582199097,
"learning_rate": 1.6669530990931243e-05,
"loss": 0.0271,
"step": 115500
},
{
"epoch": 1.6724577920673598,
"grad_norm": 1.153785228729248,
"learning_rate": 1.6655113251344455e-05,
"loss": 0.0264,
"step": 116000
},
{
"epoch": 1.6796666618607534,
"grad_norm": 1.5621336698532104,
"learning_rate": 1.6640695511757667e-05,
"loss": 0.0272,
"step": 116500
},
{
"epoch": 1.6868755316541473,
"grad_norm": 2.4250948429107666,
"learning_rate": 1.662627777217088e-05,
"loss": 0.0282,
"step": 117000
},
{
"epoch": 1.6940844014475411,
"grad_norm": 0.24833956360816956,
"learning_rate": 1.6611860032584094e-05,
"loss": 0.0279,
"step": 117500
},
{
"epoch": 1.7012932712409348,
"grad_norm": 2.7739059925079346,
"learning_rate": 1.6597442292997305e-05,
"loss": 0.0283,
"step": 118000
},
{
"epoch": 1.7085021410343286,
"grad_norm": 0.29604852199554443,
"learning_rate": 1.6583024553410517e-05,
"loss": 0.0271,
"step": 118500
},
{
"epoch": 1.7157110108277225,
"grad_norm": 1.0948668718338013,
"learning_rate": 1.6568606813823732e-05,
"loss": 0.0269,
"step": 119000
},
{
"epoch": 1.7229198806211161,
"grad_norm": 0.20236891508102417,
"learning_rate": 1.655418907423694e-05,
"loss": 0.0264,
"step": 119500
},
{
"epoch": 1.73012875041451,
"grad_norm": 0.9090920090675354,
"learning_rate": 1.6539771334650155e-05,
"loss": 0.0282,
"step": 120000
},
{
"epoch": 1.7373376202079038,
"grad_norm": 2.128474473953247,
"learning_rate": 1.6525353595063367e-05,
"loss": 0.0283,
"step": 120500
},
{
"epoch": 1.7445464900012975,
"grad_norm": 1.6552634239196777,
"learning_rate": 1.651093585547658e-05,
"loss": 0.0272,
"step": 121000
},
{
"epoch": 1.7517553597946915,
"grad_norm": 0.7921839356422424,
"learning_rate": 1.6496518115889794e-05,
"loss": 0.0301,
"step": 121500
},
{
"epoch": 1.7589642295880852,
"grad_norm": 0.8467416763305664,
"learning_rate": 1.6482100376303006e-05,
"loss": 0.0266,
"step": 122000
},
{
"epoch": 1.7661730993814788,
"grad_norm": 1.4604544639587402,
"learning_rate": 1.6467682636716217e-05,
"loss": 0.0253,
"step": 122500
},
{
"epoch": 1.773381969174873,
"grad_norm": 0.677890956401825,
"learning_rate": 1.645326489712943e-05,
"loss": 0.0266,
"step": 123000
},
{
"epoch": 1.7805908389682665,
"grad_norm": 0.2728472352027893,
"learning_rate": 1.643884715754264e-05,
"loss": 0.027,
"step": 123500
},
{
"epoch": 1.7877997087616604,
"grad_norm": 1.2005136013031006,
"learning_rate": 1.6424429417955852e-05,
"loss": 0.0265,
"step": 124000
},
{
"epoch": 1.7950085785550542,
"grad_norm": 2.1395583152770996,
"learning_rate": 1.6410011678369067e-05,
"loss": 0.0285,
"step": 124500
},
{
"epoch": 1.8022174483484479,
"grad_norm": 1.5524953603744507,
"learning_rate": 1.639559393878228e-05,
"loss": 0.026,
"step": 125000
},
{
"epoch": 1.8094263181418417,
"grad_norm": 1.5434062480926514,
"learning_rate": 1.638117619919549e-05,
"loss": 0.0272,
"step": 125500
},
{
"epoch": 1.8166351879352356,
"grad_norm": 1.4732664823532104,
"learning_rate": 1.6366758459608706e-05,
"loss": 0.0264,
"step": 126000
},
{
"epoch": 1.8238440577286292,
"grad_norm": 0.5316962599754333,
"learning_rate": 1.6352340720021914e-05,
"loss": 0.0262,
"step": 126500
},
{
"epoch": 1.831052927522023,
"grad_norm": 0.09009312838315964,
"learning_rate": 1.633792298043513e-05,
"loss": 0.0272,
"step": 127000
},
{
"epoch": 1.838261797315417,
"grad_norm": 1.211990475654602,
"learning_rate": 1.632350524084834e-05,
"loss": 0.0272,
"step": 127500
},
{
"epoch": 1.8454706671088106,
"grad_norm": 1.1306172609329224,
"learning_rate": 1.6309087501261552e-05,
"loss": 0.0268,
"step": 128000
},
{
"epoch": 1.8526795369022044,
"grad_norm": 1.8232672214508057,
"learning_rate": 1.6294669761674768e-05,
"loss": 0.0282,
"step": 128500
},
{
"epoch": 1.8598884066955983,
"grad_norm": 2.736703395843506,
"learning_rate": 1.628025202208798e-05,
"loss": 0.0271,
"step": 129000
},
{
"epoch": 1.867097276488992,
"grad_norm": 2.2017531394958496,
"learning_rate": 1.626583428250119e-05,
"loss": 0.0264,
"step": 129500
},
{
"epoch": 1.874306146282386,
"grad_norm": 0.6630580425262451,
"learning_rate": 1.6251416542914403e-05,
"loss": 0.0268,
"step": 130000
},
{
"epoch": 1.8815150160757796,
"grad_norm": 0.2576875388622284,
"learning_rate": 1.6236998803327614e-05,
"loss": 0.0275,
"step": 130500
},
{
"epoch": 1.8887238858691733,
"grad_norm": 0.625859260559082,
"learning_rate": 1.6222581063740826e-05,
"loss": 0.0263,
"step": 131000
},
{
"epoch": 1.8959327556625674,
"grad_norm": 2.3079171180725098,
"learning_rate": 1.620816332415404e-05,
"loss": 0.0266,
"step": 131500
},
{
"epoch": 1.903141625455961,
"grad_norm": 0.8551648259162903,
"learning_rate": 1.6193745584567253e-05,
"loss": 0.0268,
"step": 132000
},
{
"epoch": 1.9103504952493549,
"grad_norm": 1.2068754434585571,
"learning_rate": 1.6179327844980464e-05,
"loss": 0.0276,
"step": 132500
},
{
"epoch": 1.9175593650427487,
"grad_norm": 0.4594031274318695,
"learning_rate": 1.616491010539368e-05,
"loss": 0.0271,
"step": 133000
},
{
"epoch": 1.9247682348361423,
"grad_norm": 0.5821360945701599,
"learning_rate": 1.6150492365806888e-05,
"loss": 0.0267,
"step": 133500
},
{
"epoch": 1.9319771046295362,
"grad_norm": 0.5188286304473877,
"learning_rate": 1.6136074626220103e-05,
"loss": 0.027,
"step": 134000
},
{
"epoch": 1.93918597442293,
"grad_norm": 1.6506882905960083,
"learning_rate": 1.6121656886633315e-05,
"loss": 0.026,
"step": 134500
},
{
"epoch": 1.9463948442163237,
"grad_norm": 1.5678963661193848,
"learning_rate": 1.6107239147046526e-05,
"loss": 0.0264,
"step": 135000
},
{
"epoch": 1.9536037140097176,
"grad_norm": 0.3626735210418701,
"learning_rate": 1.609282140745974e-05,
"loss": 0.0264,
"step": 135500
},
{
"epoch": 1.9608125838031114,
"grad_norm": 0.48542195558547974,
"learning_rate": 1.6078403667872953e-05,
"loss": 0.0257,
"step": 136000
},
{
"epoch": 1.968021453596505,
"grad_norm": 0.93156498670578,
"learning_rate": 1.6063985928286165e-05,
"loss": 0.0274,
"step": 136500
},
{
"epoch": 1.975230323389899,
"grad_norm": 0.6599089503288269,
"learning_rate": 1.6049568188699376e-05,
"loss": 0.0253,
"step": 137000
},
{
"epoch": 1.9824391931832928,
"grad_norm": 2.511162519454956,
"learning_rate": 1.6035150449112588e-05,
"loss": 0.0264,
"step": 137500
},
{
"epoch": 1.9896480629766864,
"grad_norm": 0.7365297675132751,
"learning_rate": 1.6020732709525803e-05,
"loss": 0.0263,
"step": 138000
},
{
"epoch": 1.9968569327700805,
"grad_norm": 0.9106433391571045,
"learning_rate": 1.6006314969939015e-05,
"loss": 0.027,
"step": 138500
},
{
"epoch": 2.0,
"eval_f1": 0.9915470627263667,
"eval_loss": 0.02749801054596901,
"eval_runtime": 1640.2112,
"eval_samples_per_second": 636.802,
"eval_steps_per_second": 19.9,
"step": 138718
}
],
"logging_steps": 500,
"max_steps": 693590,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 1000.0,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.005
},
"attributes": {
"early_stopping_patience_counter": 1
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 2.35149119187216e+18,
"train_batch_size": 64,
"trial_name": null,
"trial_params": null
}