distilbert-fr-explorer-mlm / trainer_state.json
edanigoben's picture
(1) classification crawler_total 70e 0.5
17c8a36
raw
history blame
23.8 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 65.0,
"global_step": 6760,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.99,
"learning_rate": 4.9238165680473374e-05,
"loss": 1.4565,
"step": 103
},
{
"epoch": 1.0,
"eval_f1": 0.5061315988298739,
"eval_loss": 1.226438045501709,
"eval_runtime": 2.9031,
"eval_samples_per_second": 190.487,
"eval_steps_per_second": 12.056,
"step": 104
},
{
"epoch": 1.98,
"learning_rate": 4.8483727810650895e-05,
"loss": 1.08,
"step": 206
},
{
"epoch": 2.0,
"eval_f1": 0.5585235698125446,
"eval_loss": 1.1974396705627441,
"eval_runtime": 2.9416,
"eval_samples_per_second": 187.991,
"eval_steps_per_second": 11.898,
"step": 208
},
{
"epoch": 2.97,
"learning_rate": 4.772189349112427e-05,
"loss": 0.8073,
"step": 309
},
{
"epoch": 3.0,
"eval_f1": 0.5539323613883379,
"eval_loss": 1.276489496231079,
"eval_runtime": 2.969,
"eval_samples_per_second": 186.259,
"eval_steps_per_second": 11.789,
"step": 312
},
{
"epoch": 3.96,
"learning_rate": 4.696005917159764e-05,
"loss": 0.5577,
"step": 412
},
{
"epoch": 4.0,
"eval_f1": 0.5553847052045314,
"eval_loss": 1.427822232246399,
"eval_runtime": 2.9361,
"eval_samples_per_second": 188.346,
"eval_steps_per_second": 11.921,
"step": 416
},
{
"epoch": 4.95,
"learning_rate": 4.619822485207101e-05,
"loss": 0.3941,
"step": 515
},
{
"epoch": 5.0,
"eval_f1": 0.5570342860609194,
"eval_loss": 1.6517128944396973,
"eval_runtime": 2.9022,
"eval_samples_per_second": 190.542,
"eval_steps_per_second": 12.06,
"step": 520
},
{
"epoch": 5.94,
"learning_rate": 4.543639053254438e-05,
"loss": 0.2878,
"step": 618
},
{
"epoch": 6.0,
"eval_f1": 0.5619826716090497,
"eval_loss": 1.8180437088012695,
"eval_runtime": 2.855,
"eval_samples_per_second": 193.697,
"eval_steps_per_second": 12.259,
"step": 624
},
{
"epoch": 6.93,
"learning_rate": 4.468195266272189e-05,
"loss": 0.2337,
"step": 721
},
{
"epoch": 7.0,
"eval_f1": 0.5674708526030706,
"eval_loss": 1.9061989784240723,
"eval_runtime": 2.8641,
"eval_samples_per_second": 193.077,
"eval_steps_per_second": 12.22,
"step": 728
},
{
"epoch": 7.92,
"learning_rate": 4.392011834319526e-05,
"loss": 0.1743,
"step": 824
},
{
"epoch": 8.0,
"eval_f1": 0.5571774381839604,
"eval_loss": 2.166078805923462,
"eval_runtime": 2.8885,
"eval_samples_per_second": 191.449,
"eval_steps_per_second": 12.117,
"step": 832
},
{
"epoch": 8.91,
"learning_rate": 4.315828402366864e-05,
"loss": 0.1324,
"step": 927
},
{
"epoch": 9.0,
"eval_f1": 0.5586999359656486,
"eval_loss": 2.1434192657470703,
"eval_runtime": 2.8793,
"eval_samples_per_second": 192.062,
"eval_steps_per_second": 12.156,
"step": 936
},
{
"epoch": 9.9,
"learning_rate": 4.239644970414201e-05,
"loss": 0.1051,
"step": 1030
},
{
"epoch": 10.0,
"eval_f1": 0.571861247626083,
"eval_loss": 2.2514231204986572,
"eval_runtime": 2.8876,
"eval_samples_per_second": 191.511,
"eval_steps_per_second": 12.121,
"step": 1040
},
{
"epoch": 10.89,
"learning_rate": 4.163461538461539e-05,
"loss": 0.1016,
"step": 1133
},
{
"epoch": 11.0,
"eval_f1": 0.5608736700927537,
"eval_loss": 2.452277898788452,
"eval_runtime": 2.9662,
"eval_samples_per_second": 186.434,
"eval_steps_per_second": 11.8,
"step": 1144
},
{
"epoch": 11.88,
"learning_rate": 4.0872781065088764e-05,
"loss": 0.0814,
"step": 1236
},
{
"epoch": 12.0,
"eval_f1": 0.5643677851728315,
"eval_loss": 2.5340888500213623,
"eval_runtime": 2.8457,
"eval_samples_per_second": 194.329,
"eval_steps_per_second": 12.299,
"step": 1248
},
{
"epoch": 12.88,
"learning_rate": 4.0110946745562136e-05,
"loss": 0.0673,
"step": 1339
},
{
"epoch": 13.0,
"eval_f1": 0.5738915229311208,
"eval_loss": 2.6217703819274902,
"eval_runtime": 2.9035,
"eval_samples_per_second": 190.46,
"eval_steps_per_second": 12.054,
"step": 1352
},
{
"epoch": 13.87,
"learning_rate": 3.934911242603551e-05,
"loss": 0.0684,
"step": 1442
},
{
"epoch": 14.0,
"eval_f1": 0.5366433281464598,
"eval_loss": 2.9552414417266846,
"eval_runtime": 2.9063,
"eval_samples_per_second": 190.277,
"eval_steps_per_second": 12.043,
"step": 1456
},
{
"epoch": 14.86,
"learning_rate": 3.858727810650888e-05,
"loss": 0.0466,
"step": 1545
},
{
"epoch": 15.0,
"eval_f1": 0.5787084254032917,
"eval_loss": 2.7240512371063232,
"eval_runtime": 2.8887,
"eval_samples_per_second": 191.438,
"eval_steps_per_second": 12.116,
"step": 1560
},
{
"epoch": 15.85,
"learning_rate": 3.782544378698225e-05,
"loss": 0.0577,
"step": 1648
},
{
"epoch": 16.0,
"eval_f1": 0.5666557248979172,
"eval_loss": 2.821897506713867,
"eval_runtime": 2.8994,
"eval_samples_per_second": 190.727,
"eval_steps_per_second": 12.071,
"step": 1664
},
{
"epoch": 16.84,
"learning_rate": 3.706360946745562e-05,
"loss": 0.042,
"step": 1751
},
{
"epoch": 17.0,
"eval_f1": 0.56033452806457,
"eval_loss": 2.9155900478363037,
"eval_runtime": 2.9258,
"eval_samples_per_second": 189.01,
"eval_steps_per_second": 11.963,
"step": 1768
},
{
"epoch": 17.83,
"learning_rate": 3.6301775147928995e-05,
"loss": 0.0404,
"step": 1854
},
{
"epoch": 18.0,
"eval_f1": 0.5621979513908701,
"eval_loss": 2.893630266189575,
"eval_runtime": 2.9319,
"eval_samples_per_second": 188.613,
"eval_steps_per_second": 11.938,
"step": 1872
},
{
"epoch": 18.82,
"learning_rate": 3.553994082840237e-05,
"loss": 0.0426,
"step": 1957
},
{
"epoch": 19.0,
"eval_f1": 0.5766503161850353,
"eval_loss": 3.00762939453125,
"eval_runtime": 2.9064,
"eval_samples_per_second": 190.271,
"eval_steps_per_second": 12.042,
"step": 1976
},
{
"epoch": 19.81,
"learning_rate": 3.477810650887574e-05,
"loss": 0.0361,
"step": 2060
},
{
"epoch": 20.0,
"eval_f1": 0.5565194377868121,
"eval_loss": 3.043562173843384,
"eval_runtime": 2.9099,
"eval_samples_per_second": 190.043,
"eval_steps_per_second": 12.028,
"step": 2080
},
{
"epoch": 20.8,
"learning_rate": 3.401627218934911e-05,
"loss": 0.039,
"step": 2163
},
{
"epoch": 21.0,
"eval_f1": 0.5661283975776907,
"eval_loss": 3.034050226211548,
"eval_runtime": 2.8472,
"eval_samples_per_second": 194.228,
"eval_steps_per_second": 12.293,
"step": 2184
},
{
"epoch": 21.79,
"learning_rate": 3.325443786982248e-05,
"loss": 0.0311,
"step": 2266
},
{
"epoch": 22.0,
"eval_f1": 0.5698598461896062,
"eval_loss": 3.1546428203582764,
"eval_runtime": 2.8965,
"eval_samples_per_second": 190.921,
"eval_steps_per_second": 12.084,
"step": 2288
},
{
"epoch": 22.78,
"learning_rate": 3.2492603550295855e-05,
"loss": 0.0296,
"step": 2369
},
{
"epoch": 23.0,
"eval_f1": 0.5584145320343268,
"eval_loss": 3.3160221576690674,
"eval_runtime": 2.9004,
"eval_samples_per_second": 190.664,
"eval_steps_per_second": 12.067,
"step": 2392
},
{
"epoch": 23.77,
"learning_rate": 3.1730769230769234e-05,
"loss": 0.03,
"step": 2472
},
{
"epoch": 24.0,
"eval_f1": 0.5765799312977243,
"eval_loss": 3.2025678157806396,
"eval_runtime": 2.837,
"eval_samples_per_second": 194.921,
"eval_steps_per_second": 12.337,
"step": 2496
},
{
"epoch": 24.76,
"learning_rate": 3.0968934911242606e-05,
"loss": 0.0333,
"step": 2575
},
{
"epoch": 25.0,
"eval_f1": 0.5689553713820321,
"eval_loss": 3.211634397506714,
"eval_runtime": 2.9044,
"eval_samples_per_second": 190.402,
"eval_steps_per_second": 12.051,
"step": 2600
},
{
"epoch": 25.75,
"learning_rate": 3.0207100591715974e-05,
"loss": 0.0321,
"step": 2678
},
{
"epoch": 26.0,
"eval_f1": 0.5756108062994573,
"eval_loss": 3.2678425312042236,
"eval_runtime": 2.8888,
"eval_samples_per_second": 191.428,
"eval_steps_per_second": 12.116,
"step": 2704
},
{
"epoch": 26.74,
"learning_rate": 2.944526627218935e-05,
"loss": 0.0263,
"step": 2781
},
{
"epoch": 27.0,
"eval_f1": 0.5758065273285641,
"eval_loss": 3.2969822883605957,
"eval_runtime": 2.9527,
"eval_samples_per_second": 187.286,
"eval_steps_per_second": 11.854,
"step": 2808
},
{
"epoch": 27.73,
"learning_rate": 2.8683431952662725e-05,
"loss": 0.0281,
"step": 2884
},
{
"epoch": 28.0,
"eval_f1": 0.5781354966097151,
"eval_loss": 3.3730037212371826,
"eval_runtime": 2.8614,
"eval_samples_per_second": 193.264,
"eval_steps_per_second": 12.232,
"step": 2912
},
{
"epoch": 28.72,
"learning_rate": 2.7921597633136097e-05,
"loss": 0.0282,
"step": 2987
},
{
"epoch": 29.0,
"eval_f1": 0.5741866124789994,
"eval_loss": 3.364117383956909,
"eval_runtime": 2.8696,
"eval_samples_per_second": 192.707,
"eval_steps_per_second": 12.197,
"step": 3016
},
{
"epoch": 29.71,
"learning_rate": 2.7159763313609472e-05,
"loss": 0.0296,
"step": 3090
},
{
"epoch": 30.0,
"eval_f1": 0.5771762774162508,
"eval_loss": 3.3623032569885254,
"eval_runtime": 2.9567,
"eval_samples_per_second": 187.031,
"eval_steps_per_second": 11.837,
"step": 3120
},
{
"epoch": 30.7,
"learning_rate": 2.6397928994082844e-05,
"loss": 0.0308,
"step": 3193
},
{
"epoch": 31.0,
"eval_f1": 0.578537002980747,
"eval_loss": 3.4039528369903564,
"eval_runtime": 2.8263,
"eval_samples_per_second": 195.66,
"eval_steps_per_second": 12.384,
"step": 3224
},
{
"epoch": 31.69,
"learning_rate": 2.5636094674556216e-05,
"loss": 0.0308,
"step": 3296
},
{
"epoch": 32.0,
"eval_f1": 0.575919412837488,
"eval_loss": 3.392319679260254,
"eval_runtime": 2.9375,
"eval_samples_per_second": 188.254,
"eval_steps_per_second": 11.915,
"step": 3328
},
{
"epoch": 32.68,
"learning_rate": 2.4874260355029588e-05,
"loss": 0.0262,
"step": 3399
},
{
"epoch": 33.0,
"eval_f1": 0.5563772891428104,
"eval_loss": 3.4757542610168457,
"eval_runtime": 2.865,
"eval_samples_per_second": 193.019,
"eval_steps_per_second": 12.216,
"step": 3432
},
{
"epoch": 33.67,
"learning_rate": 2.411242603550296e-05,
"loss": 0.0319,
"step": 3502
},
{
"epoch": 34.0,
"eval_f1": 0.5738865992034025,
"eval_loss": 3.425334930419922,
"eval_runtime": 2.9109,
"eval_samples_per_second": 189.974,
"eval_steps_per_second": 12.024,
"step": 3536
},
{
"epoch": 34.66,
"learning_rate": 2.3350591715976332e-05,
"loss": 0.0277,
"step": 3605
},
{
"epoch": 35.0,
"eval_f1": 0.5785980513801816,
"eval_loss": 3.4686436653137207,
"eval_runtime": 2.93,
"eval_samples_per_second": 188.738,
"eval_steps_per_second": 11.945,
"step": 3640
},
{
"epoch": 35.65,
"learning_rate": 2.2588757396449707e-05,
"loss": 0.0289,
"step": 3708
},
{
"epoch": 36.0,
"eval_f1": 0.5836924697871717,
"eval_loss": 3.462078094482422,
"eval_runtime": 2.8428,
"eval_samples_per_second": 194.527,
"eval_steps_per_second": 12.312,
"step": 3744
},
{
"epoch": 36.64,
"learning_rate": 2.182692307692308e-05,
"loss": 0.0247,
"step": 3811
},
{
"epoch": 37.0,
"eval_f1": 0.5734707197245945,
"eval_loss": 3.481998920440674,
"eval_runtime": 3.0017,
"eval_samples_per_second": 184.228,
"eval_steps_per_second": 11.66,
"step": 3848
},
{
"epoch": 37.63,
"learning_rate": 2.106508875739645e-05,
"loss": 0.0303,
"step": 3914
},
{
"epoch": 38.0,
"eval_f1": 0.5770262969511715,
"eval_loss": 3.466510772705078,
"eval_runtime": 2.8587,
"eval_samples_per_second": 193.442,
"eval_steps_per_second": 12.243,
"step": 3952
},
{
"epoch": 38.62,
"learning_rate": 2.0303254437869823e-05,
"loss": 0.0239,
"step": 4017
},
{
"epoch": 39.0,
"eval_f1": 0.5666519467364683,
"eval_loss": 3.5593807697296143,
"eval_runtime": 2.8222,
"eval_samples_per_second": 195.946,
"eval_steps_per_second": 12.402,
"step": 4056
},
{
"epoch": 39.62,
"learning_rate": 1.9541420118343195e-05,
"loss": 0.0262,
"step": 4120
},
{
"epoch": 40.0,
"eval_f1": 0.5808476343157906,
"eval_loss": 3.5302422046661377,
"eval_runtime": 2.8598,
"eval_samples_per_second": 193.368,
"eval_steps_per_second": 12.238,
"step": 4160
},
{
"epoch": 40.61,
"learning_rate": 1.8779585798816567e-05,
"loss": 0.0282,
"step": 4223
},
{
"epoch": 41.0,
"eval_f1": 0.5835890408164021,
"eval_loss": 3.4572339057922363,
"eval_runtime": 2.8566,
"eval_samples_per_second": 193.584,
"eval_steps_per_second": 12.252,
"step": 4264
},
{
"epoch": 41.6,
"learning_rate": 1.8025147928994084e-05,
"loss": 0.0469,
"step": 4326
},
{
"epoch": 42.0,
"eval_f1": 0.5685331156394952,
"eval_loss": 3.609334707260132,
"eval_runtime": 2.8251,
"eval_samples_per_second": 195.747,
"eval_steps_per_second": 12.389,
"step": 4368
},
{
"epoch": 42.59,
"learning_rate": 1.7263313609467456e-05,
"loss": 0.0302,
"step": 4429
},
{
"epoch": 43.0,
"eval_f1": 0.5684067370608473,
"eval_loss": 3.6115400791168213,
"eval_runtime": 2.9194,
"eval_samples_per_second": 189.42,
"eval_steps_per_second": 11.989,
"step": 4472
},
{
"epoch": 43.58,
"learning_rate": 1.650147928994083e-05,
"loss": 0.0289,
"step": 4532
},
{
"epoch": 44.0,
"eval_f1": 0.5757900647671246,
"eval_loss": 3.629568099975586,
"eval_runtime": 2.9036,
"eval_samples_per_second": 190.453,
"eval_steps_per_second": 12.054,
"step": 4576
},
{
"epoch": 44.57,
"learning_rate": 1.5739644970414204e-05,
"loss": 0.0254,
"step": 4635
},
{
"epoch": 45.0,
"eval_f1": 0.5689505752768721,
"eval_loss": 3.7250843048095703,
"eval_runtime": 2.9726,
"eval_samples_per_second": 186.035,
"eval_steps_per_second": 11.774,
"step": 4680
},
{
"epoch": 45.56,
"learning_rate": 1.4977810650887576e-05,
"loss": 0.0283,
"step": 4738
},
{
"epoch": 46.0,
"eval_f1": 0.5592198654774546,
"eval_loss": 3.726353645324707,
"eval_runtime": 2.9328,
"eval_samples_per_second": 188.559,
"eval_steps_per_second": 11.934,
"step": 4784
},
{
"epoch": 46.55,
"learning_rate": 1.4215976331360948e-05,
"loss": 0.0246,
"step": 4841
},
{
"epoch": 47.0,
"eval_f1": 0.5650157110711802,
"eval_loss": 3.7832093238830566,
"eval_runtime": 2.9067,
"eval_samples_per_second": 190.249,
"eval_steps_per_second": 12.041,
"step": 4888
},
{
"epoch": 47.54,
"learning_rate": 1.345414201183432e-05,
"loss": 0.0311,
"step": 4944
},
{
"epoch": 48.0,
"eval_f1": 0.5681512072556809,
"eval_loss": 3.6964025497436523,
"eval_runtime": 2.9008,
"eval_samples_per_second": 190.634,
"eval_steps_per_second": 12.065,
"step": 4992
},
{
"epoch": 48.53,
"learning_rate": 1.2692307692307691e-05,
"loss": 0.0268,
"step": 5047
},
{
"epoch": 49.0,
"eval_f1": 0.5674808111122996,
"eval_loss": 3.7195167541503906,
"eval_runtime": 2.8604,
"eval_samples_per_second": 193.33,
"eval_steps_per_second": 12.236,
"step": 5096
},
{
"epoch": 49.52,
"learning_rate": 1.1930473372781067e-05,
"loss": 0.0293,
"step": 5150
},
{
"epoch": 50.0,
"eval_f1": 0.5614419693521525,
"eval_loss": 3.752530097961426,
"eval_runtime": 2.8761,
"eval_samples_per_second": 192.275,
"eval_steps_per_second": 12.169,
"step": 5200
},
{
"epoch": 50.51,
"learning_rate": 1.1168639053254439e-05,
"loss": 0.0282,
"step": 5253
},
{
"epoch": 51.0,
"eval_f1": 0.5655838635083059,
"eval_loss": 3.7514984607696533,
"eval_runtime": 2.8609,
"eval_samples_per_second": 193.296,
"eval_steps_per_second": 12.234,
"step": 5304
},
{
"epoch": 51.5,
"learning_rate": 1.040680473372781e-05,
"loss": 0.0248,
"step": 5356
},
{
"epoch": 52.0,
"eval_f1": 0.5590951084274065,
"eval_loss": 3.7639315128326416,
"eval_runtime": 2.8211,
"eval_samples_per_second": 196.025,
"eval_steps_per_second": 12.407,
"step": 5408
},
{
"epoch": 52.49,
"learning_rate": 9.644970414201183e-06,
"loss": 0.0257,
"step": 5459
},
{
"epoch": 53.0,
"eval_f1": 0.5480134247467852,
"eval_loss": 3.824922800064087,
"eval_runtime": 2.8475,
"eval_samples_per_second": 194.205,
"eval_steps_per_second": 12.291,
"step": 5512
},
{
"epoch": 53.48,
"learning_rate": 8.883136094674558e-06,
"loss": 0.0235,
"step": 5562
},
{
"epoch": 54.0,
"eval_f1": 0.5565796472147394,
"eval_loss": 3.7871253490448,
"eval_runtime": 2.9817,
"eval_samples_per_second": 185.462,
"eval_steps_per_second": 11.738,
"step": 5616
},
{
"epoch": 54.47,
"learning_rate": 8.12130177514793e-06,
"loss": 0.0299,
"step": 5665
},
{
"epoch": 55.0,
"eval_f1": 0.5574154263000176,
"eval_loss": 3.788760185241699,
"eval_runtime": 2.8852,
"eval_samples_per_second": 191.665,
"eval_steps_per_second": 12.131,
"step": 5720
},
{
"epoch": 55.46,
"learning_rate": 7.359467455621302e-06,
"loss": 0.0277,
"step": 5768
},
{
"epoch": 56.0,
"eval_f1": 0.563024311843682,
"eval_loss": 3.7907044887542725,
"eval_runtime": 2.8658,
"eval_samples_per_second": 192.962,
"eval_steps_per_second": 12.213,
"step": 5824
},
{
"epoch": 56.45,
"learning_rate": 6.597633136094675e-06,
"loss": 0.0256,
"step": 5871
},
{
"epoch": 57.0,
"eval_f1": 0.56153234588093,
"eval_loss": 3.799422264099121,
"eval_runtime": 2.8666,
"eval_samples_per_second": 192.912,
"eval_steps_per_second": 12.21,
"step": 5928
},
{
"epoch": 57.44,
"learning_rate": 5.8357988165680474e-06,
"loss": 0.0226,
"step": 5974
},
{
"epoch": 58.0,
"eval_f1": 0.5555061070073688,
"eval_loss": 3.811858892440796,
"eval_runtime": 2.8683,
"eval_samples_per_second": 192.797,
"eval_steps_per_second": 12.202,
"step": 6032
},
{
"epoch": 58.43,
"learning_rate": 5.07396449704142e-06,
"loss": 0.0284,
"step": 6077
},
{
"epoch": 59.0,
"eval_f1": 0.5597671150511061,
"eval_loss": 3.8192451000213623,
"eval_runtime": 2.8512,
"eval_samples_per_second": 193.951,
"eval_steps_per_second": 12.275,
"step": 6136
},
{
"epoch": 59.42,
"learning_rate": 4.312130177514793e-06,
"loss": 0.0233,
"step": 6180
},
{
"epoch": 60.0,
"eval_f1": 0.5584681716027172,
"eval_loss": 3.823091983795166,
"eval_runtime": 2.9385,
"eval_samples_per_second": 188.191,
"eval_steps_per_second": 11.911,
"step": 6240
},
{
"epoch": 60.41,
"learning_rate": 3.550295857988166e-06,
"loss": 0.0266,
"step": 6283
},
{
"epoch": 61.0,
"eval_f1": 0.5625000576804086,
"eval_loss": 3.8085415363311768,
"eval_runtime": 2.9015,
"eval_samples_per_second": 190.588,
"eval_steps_per_second": 12.063,
"step": 6344
},
{
"epoch": 61.4,
"learning_rate": 2.7958579881656803e-06,
"loss": 0.0267,
"step": 6386
},
{
"epoch": 62.0,
"eval_f1": 0.5622167257088028,
"eval_loss": 3.80642032623291,
"eval_runtime": 2.8514,
"eval_samples_per_second": 193.94,
"eval_steps_per_second": 12.275,
"step": 6448
},
{
"epoch": 62.39,
"learning_rate": 2.034023668639053e-06,
"loss": 0.0281,
"step": 6489
},
{
"epoch": 63.0,
"eval_f1": 0.564106811375439,
"eval_loss": 3.8057875633239746,
"eval_runtime": 2.8945,
"eval_samples_per_second": 191.055,
"eval_steps_per_second": 12.092,
"step": 6552
},
{
"epoch": 63.38,
"learning_rate": 1.2721893491124261e-06,
"loss": 0.025,
"step": 6592
},
{
"epoch": 64.0,
"eval_f1": 0.5644375312998279,
"eval_loss": 3.807055950164795,
"eval_runtime": 2.8941,
"eval_samples_per_second": 191.08,
"eval_steps_per_second": 12.094,
"step": 6656
},
{
"epoch": 64.38,
"learning_rate": 5.103550295857988e-07,
"loss": 0.0226,
"step": 6695
},
{
"epoch": 65.0,
"eval_f1": 0.5644375312998279,
"eval_loss": 3.807528018951416,
"eval_runtime": 2.8626,
"eval_samples_per_second": 193.181,
"eval_steps_per_second": 12.227,
"step": 6760
}
],
"max_steps": 6760,
"num_train_epochs": 65,
"total_flos": 1.4286659901696e+16,
"trial_name": null,
"trial_params": null
}