A-og-ttack25 / trainer_state.json
Neesgaard's picture
Uploaded modle files
9d283bd verified
{
"best_metric": 0.7704865330126336,
"best_model_checkpoint": "output/checkpoint-12000",
"epoch": 4.752475247524752,
"eval_steps": 250,
"global_step": 12000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.09900990099009901,
"grad_norm": 1.5793111324310303,
"learning_rate": 1e-05,
"loss": 0.1198,
"step": 250
},
{
"epoch": 0.09900990099009901,
"eval_f1": 0.713525737782454,
"eval_loss": 0.12419664114713669,
"eval_precision": 0.7395227595992926,
"eval_recall": 0.6962858255614207,
"eval_runtime": 321.2972,
"eval_samples_per_second": 13.467,
"eval_steps_per_second": 0.675,
"step": 250
},
{
"epoch": 0.19801980198019803,
"grad_norm": 0.9309703707695007,
"learning_rate": 1e-05,
"loss": 0.1045,
"step": 500
},
{
"epoch": 0.19801980198019803,
"eval_f1": 0.7206475251105509,
"eval_loss": 0.11620143055915833,
"eval_precision": 0.7629260609509252,
"eval_recall": 0.6966676403290979,
"eval_runtime": 329.1747,
"eval_samples_per_second": 13.145,
"eval_steps_per_second": 0.659,
"step": 500
},
{
"epoch": 0.297029702970297,
"grad_norm": 0.3302266299724579,
"learning_rate": 1e-05,
"loss": 0.1063,
"step": 750
},
{
"epoch": 0.297029702970297,
"eval_f1": 0.7252193838416583,
"eval_loss": 0.11440324038267136,
"eval_precision": 0.7635823487211817,
"eval_recall": 0.7023769534102526,
"eval_runtime": 330.0354,
"eval_samples_per_second": 13.111,
"eval_steps_per_second": 0.658,
"step": 750
},
{
"epoch": 0.39603960396039606,
"grad_norm": 2.2105612754821777,
"learning_rate": 1e-05,
"loss": 0.1021,
"step": 1000
},
{
"epoch": 0.39603960396039606,
"eval_f1": 0.7330270803885934,
"eval_loss": 0.113602414727211,
"eval_precision": 0.7541870386601477,
"eval_recall": 0.7176272889760549,
"eval_runtime": 330.4375,
"eval_samples_per_second": 13.095,
"eval_steps_per_second": 0.657,
"step": 1000
},
{
"epoch": 0.49504950495049505,
"grad_norm": 2.663264274597168,
"learning_rate": 1e-05,
"loss": 0.1065,
"step": 1250
},
{
"epoch": 0.49504950495049505,
"eval_f1": 0.7386791782384943,
"eval_loss": 0.1129036471247673,
"eval_precision": 0.7609068815282719,
"eval_recall": 0.7225689735511556,
"eval_runtime": 330.3036,
"eval_samples_per_second": 13.1,
"eval_steps_per_second": 0.657,
"step": 1250
},
{
"epoch": 0.594059405940594,
"grad_norm": 0.2760612964630127,
"learning_rate": 1e-05,
"loss": 0.1038,
"step": 1500
},
{
"epoch": 0.594059405940594,
"eval_f1": 0.7218572845371175,
"eval_loss": 0.11184883862733841,
"eval_precision": 0.7802744438382736,
"eval_recall": 0.6929390348223414,
"eval_runtime": 330.314,
"eval_samples_per_second": 13.1,
"eval_steps_per_second": 0.657,
"step": 1500
},
{
"epoch": 0.693069306930693,
"grad_norm": 0.3853608965873718,
"learning_rate": 1e-05,
"loss": 0.1013,
"step": 1750
},
{
"epoch": 0.693069306930693,
"eval_f1": 0.74378295527466,
"eval_loss": 0.10998328775167465,
"eval_precision": 0.7703728976253078,
"eval_recall": 0.7252778415294846,
"eval_runtime": 330.8211,
"eval_samples_per_second": 13.08,
"eval_steps_per_second": 0.656,
"step": 1750
},
{
"epoch": 0.7920792079207921,
"grad_norm": 0.4774291217327118,
"learning_rate": 1e-05,
"loss": 0.1074,
"step": 2000
},
{
"epoch": 0.7920792079207921,
"eval_f1": 0.740637178295641,
"eval_loss": 0.10963314771652222,
"eval_precision": 0.7560941570162041,
"eval_recall": 0.7284361590408035,
"eval_runtime": 330.2879,
"eval_samples_per_second": 13.101,
"eval_steps_per_second": 0.657,
"step": 2000
},
{
"epoch": 0.8910891089108911,
"grad_norm": 2.6813926696777344,
"learning_rate": 1e-05,
"loss": 0.1016,
"step": 2250
},
{
"epoch": 0.8910891089108911,
"eval_f1": 0.736115628557489,
"eval_loss": 0.10999644547700882,
"eval_precision": 0.7713716773177735,
"eval_recall": 0.7139623192639112,
"eval_runtime": 330.3901,
"eval_samples_per_second": 13.097,
"eval_steps_per_second": 0.657,
"step": 2250
},
{
"epoch": 0.9900990099009901,
"grad_norm": 0.26967763900756836,
"learning_rate": 1e-05,
"loss": 0.1027,
"step": 2500
},
{
"epoch": 0.9900990099009901,
"eval_f1": 0.7451604650395085,
"eval_loss": 0.10895609110593796,
"eval_precision": 0.7685630379034225,
"eval_recall": 0.7282629862226766,
"eval_runtime": 329.6713,
"eval_samples_per_second": 13.125,
"eval_steps_per_second": 0.658,
"step": 2500
},
{
"epoch": 1.0891089108910892,
"grad_norm": 0.9345588684082031,
"learning_rate": 1e-05,
"loss": 0.0947,
"step": 2750
},
{
"epoch": 1.0891089108910892,
"eval_f1": 0.7540236340162096,
"eval_loss": 0.11148978024721146,
"eval_precision": 0.7576570195811476,
"eval_recall": 0.7506000194732486,
"eval_runtime": 329.265,
"eval_samples_per_second": 13.141,
"eval_steps_per_second": 0.659,
"step": 2750
},
{
"epoch": 1.188118811881188,
"grad_norm": 1.7818280458450317,
"learning_rate": 1e-05,
"loss": 0.0941,
"step": 3000
},
{
"epoch": 1.188118811881188,
"eval_f1": 0.7513449964701451,
"eval_loss": 0.11351278424263,
"eval_precision": 0.7627544097693352,
"eval_recall": 0.7417476892904087,
"eval_runtime": 330.7392,
"eval_samples_per_second": 13.083,
"eval_steps_per_second": 0.656,
"step": 3000
},
{
"epoch": 1.2871287128712872,
"grad_norm": 0.29553142189979553,
"learning_rate": 1e-05,
"loss": 0.093,
"step": 3250
},
{
"epoch": 1.2871287128712872,
"eval_f1": 0.7310646699770766,
"eval_loss": 0.11457356810569763,
"eval_precision": 0.7899499010545827,
"eval_recall": 0.701254111985089,
"eval_runtime": 330.9605,
"eval_samples_per_second": 13.074,
"eval_steps_per_second": 0.656,
"step": 3250
},
{
"epoch": 1.386138613861386,
"grad_norm": 0.8314707279205322,
"learning_rate": 1e-05,
"loss": 0.0965,
"step": 3500
},
{
"epoch": 1.386138613861386,
"eval_f1": 0.7446224165970077,
"eval_loss": 0.11156675964593887,
"eval_precision": 0.7669786860620493,
"eval_recall": 0.7283024543247999,
"eval_runtime": 329.2254,
"eval_samples_per_second": 13.143,
"eval_steps_per_second": 0.659,
"step": 3500
},
{
"epoch": 1.4851485148514851,
"grad_norm": 0.238117054104805,
"learning_rate": 1e-05,
"loss": 0.0949,
"step": 3750
},
{
"epoch": 1.4851485148514851,
"eval_f1": 0.7545433038240896,
"eval_loss": 0.11255145817995071,
"eval_precision": 0.7551434520731723,
"eval_recall": 0.753949244368406,
"eval_runtime": 329.0801,
"eval_samples_per_second": 13.149,
"eval_steps_per_second": 0.659,
"step": 3750
},
{
"epoch": 1.5841584158415842,
"grad_norm": 0.36143016815185547,
"learning_rate": 1e-05,
"loss": 0.0935,
"step": 4000
},
{
"epoch": 1.5841584158415842,
"eval_f1": 0.7567706745300815,
"eval_loss": 0.11153747141361237,
"eval_precision": 0.7570114849625239,
"eval_recall": 0.7565308407575093,
"eval_runtime": 328.785,
"eval_samples_per_second": 13.161,
"eval_steps_per_second": 0.66,
"step": 4000
},
{
"epoch": 1.6831683168316833,
"grad_norm": 1.6590158939361572,
"learning_rate": 1e-05,
"loss": 0.0923,
"step": 4250
},
{
"epoch": 1.6831683168316833,
"eval_f1": 0.750615488110628,
"eval_loss": 0.11224941164255142,
"eval_precision": 0.7794619961345578,
"eval_recall": 0.730789814100023,
"eval_runtime": 328.0765,
"eval_samples_per_second": 13.189,
"eval_steps_per_second": 0.661,
"step": 4250
},
{
"epoch": 1.7821782178217822,
"grad_norm": 0.7645926475524902,
"learning_rate": 1e-05,
"loss": 0.0957,
"step": 4500
},
{
"epoch": 1.7821782178217822,
"eval_f1": 0.7547025065064623,
"eval_loss": 0.1093941256403923,
"eval_precision": 0.7719325552656011,
"eval_recall": 0.741186268577827,
"eval_runtime": 328.088,
"eval_samples_per_second": 13.189,
"eval_steps_per_second": 0.661,
"step": 4500
},
{
"epoch": 1.881188118811881,
"grad_norm": 1.7966728210449219,
"learning_rate": 1e-05,
"loss": 0.0914,
"step": 4750
},
{
"epoch": 1.881188118811881,
"eval_f1": 0.7618047956088785,
"eval_loss": 0.11087872087955475,
"eval_precision": 0.7672179303677568,
"eval_recall": 0.756824852038084,
"eval_runtime": 328.324,
"eval_samples_per_second": 13.179,
"eval_steps_per_second": 0.661,
"step": 4750
},
{
"epoch": 1.9801980198019802,
"grad_norm": 0.28777483105659485,
"learning_rate": 1e-05,
"loss": 0.0963,
"step": 5000
},
{
"epoch": 1.9801980198019802,
"eval_f1": 0.7554586723328673,
"eval_loss": 0.11140972375869751,
"eval_precision": 0.7716512789760404,
"eval_recall": 0.7425878208739316,
"eval_runtime": 329.8561,
"eval_samples_per_second": 13.118,
"eval_steps_per_second": 0.658,
"step": 5000
},
{
"epoch": 2.0792079207920793,
"grad_norm": 1.7094814777374268,
"learning_rate": 1e-05,
"loss": 0.0896,
"step": 5250
},
{
"epoch": 2.0792079207920793,
"eval_f1": 0.7604882007256375,
"eval_loss": 0.11669965833425522,
"eval_precision": 0.7473437356807244,
"eval_recall": 0.7778062342214526,
"eval_runtime": 329.4978,
"eval_samples_per_second": 13.132,
"eval_steps_per_second": 0.659,
"step": 5250
},
{
"epoch": 2.1782178217821784,
"grad_norm": 0.47071635723114014,
"learning_rate": 1e-05,
"loss": 0.0896,
"step": 5500
},
{
"epoch": 2.1782178217821784,
"eval_f1": 0.7668274682246945,
"eval_loss": 0.11288785934448242,
"eval_precision": 0.767203513653942,
"eval_recall": 0.766453678009834,
"eval_runtime": 329.9865,
"eval_samples_per_second": 13.113,
"eval_steps_per_second": 0.658,
"step": 5500
},
{
"epoch": 2.2772277227722775,
"grad_norm": 0.6770134568214417,
"learning_rate": 1e-05,
"loss": 0.0838,
"step": 5750
},
{
"epoch": 2.2772277227722775,
"eval_f1": 0.7671750432534277,
"eval_loss": 0.11229284107685089,
"eval_precision": 0.7630772158891853,
"eval_recall": 0.7715685354030615,
"eval_runtime": 330.0378,
"eval_samples_per_second": 13.111,
"eval_steps_per_second": 0.658,
"step": 5750
},
{
"epoch": 2.376237623762376,
"grad_norm": 1.9502440690994263,
"learning_rate": 1e-05,
"loss": 0.0846,
"step": 6000
},
{
"epoch": 2.376237623762376,
"eval_f1": 0.7651510799847114,
"eval_loss": 0.11747279018163681,
"eval_precision": 0.7683948018832312,
"eval_recall": 0.7620669810205374,
"eval_runtime": 330.3569,
"eval_samples_per_second": 13.098,
"eval_steps_per_second": 0.657,
"step": 6000
},
{
"epoch": 2.4752475247524752,
"grad_norm": 0.43725672364234924,
"learning_rate": 1e-05,
"loss": 0.0885,
"step": 6250
},
{
"epoch": 2.4752475247524752,
"eval_f1": 0.7655915615026703,
"eval_loss": 0.11502809077501297,
"eval_precision": 0.7608718307527647,
"eval_recall": 0.770713103409905,
"eval_runtime": 330.4941,
"eval_samples_per_second": 13.093,
"eval_steps_per_second": 0.657,
"step": 6250
},
{
"epoch": 2.5742574257425743,
"grad_norm": 1.3364715576171875,
"learning_rate": 1e-05,
"loss": 0.0847,
"step": 6500
},
{
"epoch": 2.5742574257425743,
"eval_f1": 0.7656431107915714,
"eval_loss": 0.11593661457300186,
"eval_precision": 0.7612439448703502,
"eval_recall": 0.7703884913100628,
"eval_runtime": 329.9536,
"eval_samples_per_second": 13.114,
"eval_steps_per_second": 0.658,
"step": 6500
},
{
"epoch": 2.6732673267326734,
"grad_norm": 0.8091257810592651,
"learning_rate": 1e-05,
"loss": 0.0898,
"step": 6750
},
{
"epoch": 2.6732673267326734,
"eval_f1": 0.7642224417610171,
"eval_loss": 0.11731592565774918,
"eval_precision": 0.7507351472736794,
"eval_recall": 0.7820350588022562,
"eval_runtime": 330.5066,
"eval_samples_per_second": 13.092,
"eval_steps_per_second": 0.657,
"step": 6750
},
{
"epoch": 2.772277227722772,
"grad_norm": 0.6623993515968323,
"learning_rate": 1e-05,
"loss": 0.0877,
"step": 7000
},
{
"epoch": 2.772277227722772,
"eval_f1": 0.7683511472377138,
"eval_loss": 0.11674495786428452,
"eval_precision": 0.7588554349876182,
"eval_recall": 0.779668537489481,
"eval_runtime": 329.2959,
"eval_samples_per_second": 13.14,
"eval_steps_per_second": 0.659,
"step": 7000
},
{
"epoch": 2.871287128712871,
"grad_norm": 0.19114291667938232,
"learning_rate": 1e-05,
"loss": 0.0815,
"step": 7250
},
{
"epoch": 2.871287128712871,
"eval_f1": 0.7577154015271775,
"eval_loss": 0.11400625854730606,
"eval_precision": 0.7864181813154634,
"eval_recall": 0.7377428070687893,
"eval_runtime": 328.8093,
"eval_samples_per_second": 13.16,
"eval_steps_per_second": 0.66,
"step": 7250
},
{
"epoch": 2.9702970297029703,
"grad_norm": 0.7965342998504639,
"learning_rate": 1e-05,
"loss": 0.0902,
"step": 7500
},
{
"epoch": 2.9702970297029703,
"eval_f1": 0.7484147336968241,
"eval_loss": 0.11417645215988159,
"eval_precision": 0.7828750315445061,
"eval_recall": 0.7260785050108842,
"eval_runtime": 328.5671,
"eval_samples_per_second": 13.169,
"eval_steps_per_second": 0.66,
"step": 7500
},
{
"epoch": 3.0693069306930694,
"grad_norm": 2.098968982696533,
"learning_rate": 1e-05,
"loss": 0.0802,
"step": 7750
},
{
"epoch": 3.0693069306930694,
"eval_f1": 0.7688979882499555,
"eval_loss": 0.1173376813530922,
"eval_precision": 0.7535579745567124,
"eval_recall": 0.7900408242747954,
"eval_runtime": 329.2583,
"eval_samples_per_second": 13.142,
"eval_steps_per_second": 0.659,
"step": 7750
},
{
"epoch": 3.1683168316831685,
"grad_norm": 2.423370361328125,
"learning_rate": 1e-05,
"loss": 0.0748,
"step": 8000
},
{
"epoch": 3.1683168316831685,
"eval_f1": 0.7671750432534277,
"eval_loss": 0.11995264887809753,
"eval_precision": 0.7630772158891853,
"eval_recall": 0.7715685354030615,
"eval_runtime": 330.3633,
"eval_samples_per_second": 13.098,
"eval_steps_per_second": 0.657,
"step": 8000
},
{
"epoch": 3.2673267326732676,
"grad_norm": 0.2435981184244156,
"learning_rate": 1e-05,
"loss": 0.0877,
"step": 8250
},
{
"epoch": 3.2673267326732676,
"eval_f1": 0.7682401568437223,
"eval_loss": 0.11544305831193924,
"eval_precision": 0.7605157685857473,
"eval_recall": 0.7771111087928673,
"eval_runtime": 331.1394,
"eval_samples_per_second": 13.067,
"eval_steps_per_second": 0.655,
"step": 8250
},
{
"epoch": 3.366336633663366,
"grad_norm": 0.3148539960384369,
"learning_rate": 1e-05,
"loss": 0.0776,
"step": 8500
},
{
"epoch": 3.366336633663366,
"eval_f1": 0.7506009456389866,
"eval_loss": 0.12167887389659882,
"eval_precision": 0.7774573692979017,
"eval_recall": 0.7317636503995493,
"eval_runtime": 330.4561,
"eval_samples_per_second": 13.094,
"eval_steps_per_second": 0.657,
"step": 8500
},
{
"epoch": 3.4653465346534653,
"grad_norm": 1.8756177425384521,
"learning_rate": 1e-05,
"loss": 0.075,
"step": 8750
},
{
"epoch": 3.4653465346534653,
"eval_f1": 0.7520150871817991,
"eval_loss": 0.12817060947418213,
"eval_precision": 0.7849829873110825,
"eval_recall": 0.7302042256949516,
"eval_runtime": 330.0881,
"eval_samples_per_second": 13.109,
"eval_steps_per_second": 0.657,
"step": 8750
},
{
"epoch": 3.5643564356435644,
"grad_norm": 1.7097699642181396,
"learning_rate": 1e-05,
"loss": 0.0786,
"step": 9000
},
{
"epoch": 3.5643564356435644,
"eval_f1": 0.7664651841589353,
"eval_loss": 0.126222163438797,
"eval_precision": 0.7584130472040356,
"eval_recall": 0.7757884927010091,
"eval_runtime": 330.0335,
"eval_samples_per_second": 13.111,
"eval_steps_per_second": 0.658,
"step": 9000
},
{
"epoch": 3.6633663366336635,
"grad_norm": 0.3647657036781311,
"learning_rate": 1e-05,
"loss": 0.0828,
"step": 9250
},
{
"epoch": 3.6633663366336635,
"eval_f1": 0.7644029198454667,
"eval_loss": 0.12351341545581818,
"eval_precision": 0.7535178310806523,
"eval_recall": 0.7778545696064317,
"eval_runtime": 329.9562,
"eval_samples_per_second": 13.114,
"eval_steps_per_second": 0.658,
"step": 9250
},
{
"epoch": 3.762376237623762,
"grad_norm": 0.8577436804771423,
"learning_rate": 1e-05,
"loss": 0.0658,
"step": 9500
},
{
"epoch": 3.762376237623762,
"eval_f1": 0.7625425953787222,
"eval_loss": 0.13211406767368317,
"eval_precision": 0.7572532685294451,
"eval_recall": 0.7683530152239075,
"eval_runtime": 330.4643,
"eval_samples_per_second": 13.094,
"eval_steps_per_second": 0.657,
"step": 9500
},
{
"epoch": 3.8613861386138613,
"grad_norm": 1.1118955612182617,
"learning_rate": 1e-05,
"loss": 0.0929,
"step": 9750
},
{
"epoch": 3.8613861386138613,
"eval_f1": 0.7654929694539482,
"eval_loss": 0.1250331550836563,
"eval_precision": 0.7551473309426884,
"eval_recall": 0.7781002455020274,
"eval_runtime": 330.8519,
"eval_samples_per_second": 13.078,
"eval_steps_per_second": 0.656,
"step": 9750
},
{
"epoch": 3.9603960396039604,
"grad_norm": 0.2617945969104767,
"learning_rate": 1e-05,
"loss": 0.079,
"step": 10000
},
{
"epoch": 3.9603960396039604,
"eval_f1": 0.7647937419076998,
"eval_loss": 0.12892000377178192,
"eval_precision": 0.7670959127005181,
"eval_recall": 0.7625736332213622,
"eval_runtime": 329.8245,
"eval_samples_per_second": 13.119,
"eval_steps_per_second": 0.658,
"step": 10000
},
{
"epoch": 4.0594059405940595,
"grad_norm": 1.7066285610198975,
"learning_rate": 1e-05,
"loss": 0.0827,
"step": 10250
},
{
"epoch": 4.0594059405940595,
"eval_f1": 0.758990760980963,
"eval_loss": 0.12950246036052704,
"eval_precision": 0.741826860901623,
"eval_recall": 0.78480512841912,
"eval_runtime": 330.1148,
"eval_samples_per_second": 13.108,
"eval_steps_per_second": 0.657,
"step": 10250
},
{
"epoch": 4.158415841584159,
"grad_norm": 3.2379682064056396,
"learning_rate": 1e-05,
"loss": 0.0709,
"step": 10500
},
{
"epoch": 4.158415841584159,
"eval_f1": 0.7665793170128494,
"eval_loss": 0.13100141286849976,
"eval_precision": 0.765117406734896,
"eval_recall": 0.7680767385090446,
"eval_runtime": 328.8925,
"eval_samples_per_second": 13.156,
"eval_steps_per_second": 0.66,
"step": 10500
},
{
"epoch": 4.257425742574258,
"grad_norm": 0.4713508188724518,
"learning_rate": 1e-05,
"loss": 0.067,
"step": 10750
},
{
"epoch": 4.257425742574258,
"eval_f1": 0.7622919716228407,
"eval_loss": 0.13499750196933746,
"eval_precision": 0.7648353770910121,
"eval_recall": 0.7598494648333993,
"eval_runtime": 329.8598,
"eval_samples_per_second": 13.118,
"eval_steps_per_second": 0.658,
"step": 10750
},
{
"epoch": 4.356435643564357,
"grad_norm": 1.8301159143447876,
"learning_rate": 1e-05,
"loss": 0.0775,
"step": 11000
},
{
"epoch": 4.356435643564357,
"eval_f1": 0.7618188925299318,
"eval_loss": 0.1320166289806366,
"eval_precision": 0.7623117959801409,
"eval_recall": 0.7613299533337505,
"eval_runtime": 330.5622,
"eval_samples_per_second": 13.09,
"eval_steps_per_second": 0.656,
"step": 11000
},
{
"epoch": 4.455445544554456,
"grad_norm": 2.722707509994507,
"learning_rate": 1e-05,
"loss": 0.064,
"step": 11250
},
{
"epoch": 4.455445544554456,
"eval_f1": 0.7610237264171784,
"eval_loss": 0.1370055377483368,
"eval_precision": 0.755460351976167,
"eval_recall": 0.767172971130909,
"eval_runtime": 330.6803,
"eval_samples_per_second": 13.085,
"eval_steps_per_second": 0.656,
"step": 11250
},
{
"epoch": 4.554455445544555,
"grad_norm": 0.3536905348300934,
"learning_rate": 1e-05,
"loss": 0.0752,
"step": 11500
},
{
"epoch": 4.554455445544555,
"eval_f1": 0.761344776580898,
"eval_loss": 0.13751940429210663,
"eval_precision": 0.7645423335256452,
"eval_recall": 0.7583053405384352,
"eval_runtime": 330.8743,
"eval_samples_per_second": 13.077,
"eval_steps_per_second": 0.656,
"step": 11500
},
{
"epoch": 4.653465346534653,
"grad_norm": 2.0691871643066406,
"learning_rate": 1e-05,
"loss": 0.0753,
"step": 11750
},
{
"epoch": 4.653465346534653,
"eval_f1": 0.7634977092513291,
"eval_loss": 0.13433216512203217,
"eval_precision": 0.7707010020069438,
"eval_recall": 0.7570310598315564,
"eval_runtime": 330.8577,
"eval_samples_per_second": 13.078,
"eval_steps_per_second": 0.656,
"step": 11750
},
{
"epoch": 4.752475247524752,
"grad_norm": 1.6911152601242065,
"learning_rate": 1e-05,
"loss": 0.065,
"step": 12000
},
{
"epoch": 4.752475247524752,
"eval_f1": 0.7704865330126336,
"eval_loss": 0.13676360249519348,
"eval_precision": 0.7719050041777025,
"eval_recall": 0.7690989101935501,
"eval_runtime": 329.4666,
"eval_samples_per_second": 13.133,
"eval_steps_per_second": 0.659,
"step": 12000
}
],
"logging_steps": 250,
"max_steps": 25250,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 250,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 7.1756719325184e+16,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}