authorship-e5-base / trainer_state.json
adugeen's picture
Upload folder using huggingface_hub
7615637 verified
{
"best_global_step": 4324,
"best_metric": 8.351966857910156,
"best_model_checkpoint": "printing_press/author-paraphrase/models/intfloat/multilingual-e5-base/checkpoint-4324",
"epoch": 3.0,
"eval_steps": 500,
"global_step": 12972,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02312673450508788,
"grad_norm": 10.87702751159668,
"learning_rate": 4.977104532839963e-05,
"loss": 2.4824,
"step": 100
},
{
"epoch": 0.04625346901017576,
"grad_norm": 11.214370727539062,
"learning_rate": 4.953977798334876e-05,
"loss": 2.2607,
"step": 200
},
{
"epoch": 0.06938020351526364,
"grad_norm": 9.89275074005127,
"learning_rate": 4.930851063829787e-05,
"loss": 2.1716,
"step": 300
},
{
"epoch": 0.09250693802035152,
"grad_norm": 8.298206329345703,
"learning_rate": 4.9077243293247e-05,
"loss": 2.0981,
"step": 400
},
{
"epoch": 0.11563367252543941,
"grad_norm": 9.424484252929688,
"learning_rate": 4.8845975948196116e-05,
"loss": 1.9617,
"step": 500
},
{
"epoch": 0.13876040703052728,
"grad_norm": 8.752665519714355,
"learning_rate": 4.8614708603145235e-05,
"loss": 1.987,
"step": 600
},
{
"epoch": 0.16188714153561518,
"grad_norm": 9.41073989868164,
"learning_rate": 4.838344125809436e-05,
"loss": 1.9429,
"step": 700
},
{
"epoch": 0.18501387604070305,
"grad_norm": 11.025175094604492,
"learning_rate": 4.815217391304348e-05,
"loss": 1.9398,
"step": 800
},
{
"epoch": 0.20814061054579094,
"grad_norm": 9.1474027633667,
"learning_rate": 4.79209065679926e-05,
"loss": 1.8745,
"step": 900
},
{
"epoch": 0.23126734505087881,
"grad_norm": 8.528326034545898,
"learning_rate": 4.7689639222941726e-05,
"loss": 1.8484,
"step": 1000
},
{
"epoch": 0.2543940795559667,
"grad_norm": 9.510236740112305,
"learning_rate": 4.7458371877890846e-05,
"loss": 1.846,
"step": 1100
},
{
"epoch": 0.27752081406105455,
"grad_norm": 10.6268892288208,
"learning_rate": 4.7227104532839965e-05,
"loss": 1.7953,
"step": 1200
},
{
"epoch": 0.30064754856614245,
"grad_norm": 9.342752456665039,
"learning_rate": 4.699583718778909e-05,
"loss": 1.8189,
"step": 1300
},
{
"epoch": 0.32377428307123035,
"grad_norm": 8.43892765045166,
"learning_rate": 4.676456984273821e-05,
"loss": 1.7775,
"step": 1400
},
{
"epoch": 0.34690101757631825,
"grad_norm": 12.730334281921387,
"learning_rate": 4.653330249768733e-05,
"loss": 1.76,
"step": 1500
},
{
"epoch": 0.3700277520814061,
"grad_norm": 27.328882217407227,
"learning_rate": 4.6302035152636456e-05,
"loss": 1.7638,
"step": 1600
},
{
"epoch": 0.393154486586494,
"grad_norm": 9.745387077331543,
"learning_rate": 4.607076780758557e-05,
"loss": 1.7039,
"step": 1700
},
{
"epoch": 0.4162812210915819,
"grad_norm": 9.242189407348633,
"learning_rate": 4.583950046253469e-05,
"loss": 1.706,
"step": 1800
},
{
"epoch": 0.43940795559666973,
"grad_norm": 9.079887390136719,
"learning_rate": 4.5608233117483814e-05,
"loss": 1.7255,
"step": 1900
},
{
"epoch": 0.46253469010175763,
"grad_norm": 8.81753158569336,
"learning_rate": 4.537696577243293e-05,
"loss": 1.705,
"step": 2000
},
{
"epoch": 0.4856614246068455,
"grad_norm": 10.990808486938477,
"learning_rate": 4.514569842738205e-05,
"loss": 1.6823,
"step": 2100
},
{
"epoch": 0.5087881591119334,
"grad_norm": 9.342223167419434,
"learning_rate": 4.491443108233118e-05,
"loss": 1.6921,
"step": 2200
},
{
"epoch": 0.5319148936170213,
"grad_norm": 9.898025512695312,
"learning_rate": 4.46831637372803e-05,
"loss": 1.6801,
"step": 2300
},
{
"epoch": 0.5550416281221091,
"grad_norm": 8.3154296875,
"learning_rate": 4.445189639222942e-05,
"loss": 1.6547,
"step": 2400
},
{
"epoch": 0.5781683626271971,
"grad_norm": 8.5817232131958,
"learning_rate": 4.422062904717854e-05,
"loss": 1.6512,
"step": 2500
},
{
"epoch": 0.6012950971322849,
"grad_norm": 8.931965827941895,
"learning_rate": 4.398936170212766e-05,
"loss": 1.6466,
"step": 2600
},
{
"epoch": 0.6244218316373727,
"grad_norm": 10.271759033203125,
"learning_rate": 4.375809435707678e-05,
"loss": 1.6545,
"step": 2700
},
{
"epoch": 0.6475485661424607,
"grad_norm": 8.621456146240234,
"learning_rate": 4.352682701202591e-05,
"loss": 1.5985,
"step": 2800
},
{
"epoch": 0.6706753006475485,
"grad_norm": 9.487942695617676,
"learning_rate": 4.329555966697503e-05,
"loss": 1.5941,
"step": 2900
},
{
"epoch": 0.6938020351526365,
"grad_norm": 9.156566619873047,
"learning_rate": 4.306429232192415e-05,
"loss": 1.6178,
"step": 3000
},
{
"epoch": 0.7169287696577243,
"grad_norm": 11.126986503601074,
"learning_rate": 4.2833024976873266e-05,
"loss": 1.6035,
"step": 3100
},
{
"epoch": 0.7400555041628122,
"grad_norm": 10.304364204406738,
"learning_rate": 4.2601757631822385e-05,
"loss": 1.568,
"step": 3200
},
{
"epoch": 0.7631822386679001,
"grad_norm": 9.863882064819336,
"learning_rate": 4.2370490286771505e-05,
"loss": 1.5733,
"step": 3300
},
{
"epoch": 0.786308973172988,
"grad_norm": 10.20065689086914,
"learning_rate": 4.213922294172063e-05,
"loss": 1.5841,
"step": 3400
},
{
"epoch": 0.8094357076780758,
"grad_norm": 9.240017890930176,
"learning_rate": 4.190795559666975e-05,
"loss": 1.5869,
"step": 3500
},
{
"epoch": 0.8325624421831638,
"grad_norm": 9.452214241027832,
"learning_rate": 4.167668825161887e-05,
"loss": 1.5806,
"step": 3600
},
{
"epoch": 0.8556891766882516,
"grad_norm": 10.292157173156738,
"learning_rate": 4.1445420906567996e-05,
"loss": 1.5682,
"step": 3700
},
{
"epoch": 0.8788159111933395,
"grad_norm": 12.675426483154297,
"learning_rate": 4.1214153561517115e-05,
"loss": 1.553,
"step": 3800
},
{
"epoch": 0.9019426456984274,
"grad_norm": 11.213452339172363,
"learning_rate": 4.0982886216466234e-05,
"loss": 1.5564,
"step": 3900
},
{
"epoch": 0.9250693802035153,
"grad_norm": 11.688211441040039,
"learning_rate": 4.075161887141536e-05,
"loss": 1.5389,
"step": 4000
},
{
"epoch": 0.9481961147086031,
"grad_norm": 19.469762802124023,
"learning_rate": 4.052035152636448e-05,
"loss": 1.5091,
"step": 4100
},
{
"epoch": 0.971322849213691,
"grad_norm": 11.896566390991211,
"learning_rate": 4.02890841813136e-05,
"loss": 1.5358,
"step": 4200
},
{
"epoch": 0.9944495837187789,
"grad_norm": 26.442358016967773,
"learning_rate": 4.0057816836262725e-05,
"loss": 1.512,
"step": 4300
},
{
"epoch": 1.0,
"eval_cosine_accuracy": 0.9215032424577613,
"eval_cosine_accuracy_threshold": 0.8687731027603149,
"eval_cosine_ap": 0.844612185583288,
"eval_cosine_f1": 0.7527066450567261,
"eval_cosine_f1_threshold": 0.8616929054260254,
"eval_cosine_mcc": 0.7030390290848069,
"eval_cosine_precision": 0.7499838511724048,
"eval_cosine_recall": 0.7554492810202356,
"eval_loss": 8.351966857910156,
"eval_runtime": 1467.219,
"eval_samples_per_second": 125.699,
"eval_steps_per_second": 1.964,
"step": 4324
},
{
"epoch": 1.0175763182238668,
"grad_norm": 10.624114990234375,
"learning_rate": 3.9826549491211844e-05,
"loss": 1.4186,
"step": 4400
},
{
"epoch": 1.0407030527289547,
"grad_norm": 10.404448509216309,
"learning_rate": 3.9595282146160964e-05,
"loss": 1.4075,
"step": 4500
},
{
"epoch": 1.0638297872340425,
"grad_norm": 9.519079208374023,
"learning_rate": 3.936401480111008e-05,
"loss": 1.3934,
"step": 4600
},
{
"epoch": 1.0869565217391304,
"grad_norm": 9.599648475646973,
"learning_rate": 3.91327474560592e-05,
"loss": 1.3799,
"step": 4700
},
{
"epoch": 1.1100832562442182,
"grad_norm": 14.585043907165527,
"learning_rate": 3.890148011100833e-05,
"loss": 1.3597,
"step": 4800
},
{
"epoch": 1.1332099907493063,
"grad_norm": 11.30241584777832,
"learning_rate": 3.867021276595745e-05,
"loss": 1.3351,
"step": 4900
},
{
"epoch": 1.1563367252543941,
"grad_norm": 10.888731002807617,
"learning_rate": 3.843894542090657e-05,
"loss": 1.3082,
"step": 5000
},
{
"epoch": 1.179463459759482,
"grad_norm": 9.09694766998291,
"learning_rate": 3.820767807585569e-05,
"loss": 1.3105,
"step": 5100
},
{
"epoch": 1.2025901942645698,
"grad_norm": 13.162363052368164,
"learning_rate": 3.797641073080481e-05,
"loss": 1.2948,
"step": 5200
},
{
"epoch": 1.2257169287696577,
"grad_norm": 26.262020111083984,
"learning_rate": 3.774514338575393e-05,
"loss": 1.3486,
"step": 5300
},
{
"epoch": 1.2488436632747457,
"grad_norm": 21.114444732666016,
"learning_rate": 3.751387604070306e-05,
"loss": 1.3155,
"step": 5400
},
{
"epoch": 1.2719703977798336,
"grad_norm": 10.668533325195312,
"learning_rate": 3.728260869565218e-05,
"loss": 1.2761,
"step": 5500
},
{
"epoch": 1.2950971322849214,
"grad_norm": 12.978555679321289,
"learning_rate": 3.70513413506013e-05,
"loss": 1.2541,
"step": 5600
},
{
"epoch": 1.3182238667900092,
"grad_norm": 13.066253662109375,
"learning_rate": 3.682007400555042e-05,
"loss": 1.2346,
"step": 5700
},
{
"epoch": 1.341350601295097,
"grad_norm": 11.974534034729004,
"learning_rate": 3.658880666049954e-05,
"loss": 1.2285,
"step": 5800
},
{
"epoch": 1.364477335800185,
"grad_norm": 12.733871459960938,
"learning_rate": 3.6357539315448655e-05,
"loss": 1.2013,
"step": 5900
},
{
"epoch": 1.3876040703052728,
"grad_norm": 13.184294700622559,
"learning_rate": 3.612627197039778e-05,
"loss": 1.1986,
"step": 6000
},
{
"epoch": 1.4107308048103608,
"grad_norm": 10.159133911132812,
"learning_rate": 3.58950046253469e-05,
"loss": 1.1755,
"step": 6100
},
{
"epoch": 1.4338575393154487,
"grad_norm": 12.212769508361816,
"learning_rate": 3.566373728029602e-05,
"loss": 1.1937,
"step": 6200
},
{
"epoch": 1.4569842738205365,
"grad_norm": 15.231562614440918,
"learning_rate": 3.5432469935245146e-05,
"loss": 1.202,
"step": 6300
},
{
"epoch": 1.4801110083256244,
"grad_norm": 14.845349311828613,
"learning_rate": 3.5201202590194265e-05,
"loss": 1.1607,
"step": 6400
},
{
"epoch": 1.5032377428307124,
"grad_norm": 12.119277954101562,
"learning_rate": 3.4969935245143384e-05,
"loss": 1.2116,
"step": 6500
},
{
"epoch": 1.5263644773358003,
"grad_norm": 12.19071102142334,
"learning_rate": 3.473866790009251e-05,
"loss": 1.1797,
"step": 6600
},
{
"epoch": 1.5494912118408881,
"grad_norm": 12.373006820678711,
"learning_rate": 3.450740055504163e-05,
"loss": 1.1571,
"step": 6700
},
{
"epoch": 1.572617946345976,
"grad_norm": 14.413765907287598,
"learning_rate": 3.427613320999075e-05,
"loss": 1.1526,
"step": 6800
},
{
"epoch": 1.5957446808510638,
"grad_norm": 12.39973258972168,
"learning_rate": 3.4044865864939875e-05,
"loss": 1.1438,
"step": 6900
},
{
"epoch": 1.6188714153561516,
"grad_norm": 15.030281066894531,
"learning_rate": 3.3813598519888994e-05,
"loss": 1.1634,
"step": 7000
},
{
"epoch": 1.6419981498612395,
"grad_norm": 15.445487022399902,
"learning_rate": 3.3582331174838114e-05,
"loss": 1.1367,
"step": 7100
},
{
"epoch": 1.6651248843663273,
"grad_norm": 15.726140022277832,
"learning_rate": 3.335106382978724e-05,
"loss": 1.1133,
"step": 7200
},
{
"epoch": 1.6882516188714154,
"grad_norm": 12.940756797790527,
"learning_rate": 3.311979648473636e-05,
"loss": 1.1156,
"step": 7300
},
{
"epoch": 1.7113783533765032,
"grad_norm": 14.717055320739746,
"learning_rate": 3.288852913968548e-05,
"loss": 1.1102,
"step": 7400
},
{
"epoch": 1.734505087881591,
"grad_norm": 12.775771141052246,
"learning_rate": 3.26572617946346e-05,
"loss": 1.1123,
"step": 7500
},
{
"epoch": 1.7576318223866791,
"grad_norm": 12.529901504516602,
"learning_rate": 3.242599444958372e-05,
"loss": 1.1066,
"step": 7600
},
{
"epoch": 1.780758556891767,
"grad_norm": 12.506126403808594,
"learning_rate": 3.219472710453284e-05,
"loss": 1.1291,
"step": 7700
},
{
"epoch": 1.8038852913968548,
"grad_norm": 16.9326114654541,
"learning_rate": 3.196345975948196e-05,
"loss": 1.1094,
"step": 7800
},
{
"epoch": 1.8270120259019427,
"grad_norm": 12.257229804992676,
"learning_rate": 3.173219241443108e-05,
"loss": 1.094,
"step": 7900
},
{
"epoch": 1.8501387604070305,
"grad_norm": 15.346363067626953,
"learning_rate": 3.150092506938021e-05,
"loss": 1.1585,
"step": 8000
},
{
"epoch": 1.8732654949121184,
"grad_norm": 12.800529479980469,
"learning_rate": 3.126965772432933e-05,
"loss": 1.077,
"step": 8100
},
{
"epoch": 1.8963922294172062,
"grad_norm": 14.989642143249512,
"learning_rate": 3.103839037927845e-05,
"loss": 1.108,
"step": 8200
},
{
"epoch": 1.919518963922294,
"grad_norm": 14.930624008178711,
"learning_rate": 3.080712303422757e-05,
"loss": 1.1431,
"step": 8300
},
{
"epoch": 1.942645698427382,
"grad_norm": 13.488320350646973,
"learning_rate": 3.057585568917669e-05,
"loss": 1.0784,
"step": 8400
},
{
"epoch": 1.96577243293247,
"grad_norm": 12.30305004119873,
"learning_rate": 3.034458834412581e-05,
"loss": 1.0834,
"step": 8500
},
{
"epoch": 1.9888991674375578,
"grad_norm": 13.152817726135254,
"learning_rate": 3.0113320999074934e-05,
"loss": 1.1268,
"step": 8600
},
{
"epoch": 2.0,
"eval_cosine_accuracy": 0.9209068037391286,
"eval_cosine_accuracy_threshold": 0.8271753191947937,
"eval_cosine_ap": 0.8450278341834471,
"eval_cosine_f1": 0.7521474761123443,
"eval_cosine_f1_threshold": 0.8189181089401245,
"eval_cosine_mcc": 0.701977809831327,
"eval_cosine_precision": 0.7438907980145093,
"eval_cosine_recall": 0.7605894983408159,
"eval_loss": 9.69921588897705,
"eval_runtime": 1472.4233,
"eval_samples_per_second": 125.255,
"eval_steps_per_second": 1.957,
"step": 8648
},
{
"epoch": 2.012025901942646,
"grad_norm": 15.71314811706543,
"learning_rate": 2.9882053654024057e-05,
"loss": 1.0443,
"step": 8700
},
{
"epoch": 2.0351526364477337,
"grad_norm": 14.261523246765137,
"learning_rate": 2.9650786308973173e-05,
"loss": 0.9715,
"step": 8800
},
{
"epoch": 2.0582793709528215,
"grad_norm": 13.405384063720703,
"learning_rate": 2.9419518963922292e-05,
"loss": 0.957,
"step": 8900
},
{
"epoch": 2.0814061054579094,
"grad_norm": 13.5853853225708,
"learning_rate": 2.9188251618871415e-05,
"loss": 0.9784,
"step": 9000
},
{
"epoch": 2.1045328399629972,
"grad_norm": 14.918572425842285,
"learning_rate": 2.8956984273820538e-05,
"loss": 0.9581,
"step": 9100
},
{
"epoch": 2.127659574468085,
"grad_norm": 13.354079246520996,
"learning_rate": 2.8725716928769657e-05,
"loss": 0.9569,
"step": 9200
},
{
"epoch": 2.150786308973173,
"grad_norm": 15.024975776672363,
"learning_rate": 2.849444958371878e-05,
"loss": 0.9518,
"step": 9300
},
{
"epoch": 2.1739130434782608,
"grad_norm": 13.5723876953125,
"learning_rate": 2.8263182238667902e-05,
"loss": 0.9485,
"step": 9400
},
{
"epoch": 2.1970397779833486,
"grad_norm": 12.383338928222656,
"learning_rate": 2.8031914893617022e-05,
"loss": 0.9433,
"step": 9500
},
{
"epoch": 2.2201665124884364,
"grad_norm": 13.54079532623291,
"learning_rate": 2.7800647548566144e-05,
"loss": 0.9392,
"step": 9600
},
{
"epoch": 2.2432932469935247,
"grad_norm": 15.325400352478027,
"learning_rate": 2.7569380203515267e-05,
"loss": 0.9248,
"step": 9700
},
{
"epoch": 2.2664199814986126,
"grad_norm": 15.651702880859375,
"learning_rate": 2.7338112858464387e-05,
"loss": 0.9105,
"step": 9800
},
{
"epoch": 2.2895467160037004,
"grad_norm": 15.196932792663574,
"learning_rate": 2.710684551341351e-05,
"loss": 0.9769,
"step": 9900
},
{
"epoch": 2.3126734505087883,
"grad_norm": 13.820411682128906,
"learning_rate": 2.6875578168362632e-05,
"loss": 0.9502,
"step": 10000
},
{
"epoch": 2.335800185013876,
"grad_norm": 15.181200981140137,
"learning_rate": 2.664431082331175e-05,
"loss": 0.9604,
"step": 10100
},
{
"epoch": 2.358926919518964,
"grad_norm": 17.202287673950195,
"learning_rate": 2.6413043478260867e-05,
"loss": 0.9291,
"step": 10200
},
{
"epoch": 2.382053654024052,
"grad_norm": 16.69881248474121,
"learning_rate": 2.618177613320999e-05,
"loss": 0.9552,
"step": 10300
},
{
"epoch": 2.4051803885291396,
"grad_norm": 16.424978256225586,
"learning_rate": 2.5950508788159113e-05,
"loss": 0.9621,
"step": 10400
},
{
"epoch": 2.4283071230342275,
"grad_norm": 17.192264556884766,
"learning_rate": 2.5719241443108232e-05,
"loss": 0.9357,
"step": 10500
},
{
"epoch": 2.4514338575393153,
"grad_norm": 16.9178524017334,
"learning_rate": 2.5487974098057355e-05,
"loss": 0.9323,
"step": 10600
},
{
"epoch": 2.474560592044403,
"grad_norm": 11.07499885559082,
"learning_rate": 2.5256706753006477e-05,
"loss": 0.9327,
"step": 10700
},
{
"epoch": 2.4976873265494914,
"grad_norm": 28.121694564819336,
"learning_rate": 2.5025439407955597e-05,
"loss": 0.9067,
"step": 10800
},
{
"epoch": 2.520814061054579,
"grad_norm": 15.262548446655273,
"learning_rate": 2.479417206290472e-05,
"loss": 0.9411,
"step": 10900
},
{
"epoch": 2.543940795559667,
"grad_norm": 16.59368133544922,
"learning_rate": 2.4562904717853842e-05,
"loss": 0.9305,
"step": 11000
},
{
"epoch": 2.567067530064755,
"grad_norm": 12.39183235168457,
"learning_rate": 2.433163737280296e-05,
"loss": 0.9378,
"step": 11100
},
{
"epoch": 2.590194264569843,
"grad_norm": 19.034120559692383,
"learning_rate": 2.4100370027752084e-05,
"loss": 0.9171,
"step": 11200
},
{
"epoch": 2.6133209990749307,
"grad_norm": 16.727380752563477,
"learning_rate": 2.3869102682701204e-05,
"loss": 0.9074,
"step": 11300
},
{
"epoch": 2.6364477335800185,
"grad_norm": 17.257230758666992,
"learning_rate": 2.3637835337650323e-05,
"loss": 0.9262,
"step": 11400
},
{
"epoch": 2.6595744680851063,
"grad_norm": 18.956735610961914,
"learning_rate": 2.3406567992599446e-05,
"loss": 0.9063,
"step": 11500
},
{
"epoch": 2.682701202590194,
"grad_norm": 15.463052749633789,
"learning_rate": 2.317530064754857e-05,
"loss": 0.8814,
"step": 11600
},
{
"epoch": 2.705827937095282,
"grad_norm": 13.263724327087402,
"learning_rate": 2.2944033302497688e-05,
"loss": 0.9089,
"step": 11700
},
{
"epoch": 2.72895467160037,
"grad_norm": 15.596953392028809,
"learning_rate": 2.271276595744681e-05,
"loss": 0.9048,
"step": 11800
},
{
"epoch": 2.752081406105458,
"grad_norm": 16.61760711669922,
"learning_rate": 2.2481498612395933e-05,
"loss": 0.9268,
"step": 11900
},
{
"epoch": 2.7752081406105455,
"grad_norm": 16.409652709960938,
"learning_rate": 2.2250231267345052e-05,
"loss": 0.8913,
"step": 12000
},
{
"epoch": 2.798334875115634,
"grad_norm": 14.509052276611328,
"learning_rate": 2.2018963922294172e-05,
"loss": 0.9064,
"step": 12100
},
{
"epoch": 2.8214616096207217,
"grad_norm": 13.914097785949707,
"learning_rate": 2.1787696577243295e-05,
"loss": 0.8585,
"step": 12200
},
{
"epoch": 2.8445883441258095,
"grad_norm": 13.852704048156738,
"learning_rate": 2.1556429232192414e-05,
"loss": 0.878,
"step": 12300
},
{
"epoch": 2.8677150786308974,
"grad_norm": 15.329246520996094,
"learning_rate": 2.1325161887141537e-05,
"loss": 0.8612,
"step": 12400
},
{
"epoch": 2.890841813135985,
"grad_norm": 15.046784400939941,
"learning_rate": 2.109389454209066e-05,
"loss": 0.8799,
"step": 12500
},
{
"epoch": 2.913968547641073,
"grad_norm": 15.824640274047852,
"learning_rate": 2.086262719703978e-05,
"loss": 0.8541,
"step": 12600
},
{
"epoch": 2.937095282146161,
"grad_norm": 15.991775512695312,
"learning_rate": 2.0631359851988898e-05,
"loss": 0.8521,
"step": 12700
},
{
"epoch": 2.9602220166512487,
"grad_norm": 16.13051986694336,
"learning_rate": 2.040009250693802e-05,
"loss": 0.8582,
"step": 12800
},
{
"epoch": 2.9833487511563366,
"grad_norm": 14.818473815917969,
"learning_rate": 2.0168825161887143e-05,
"loss": 0.869,
"step": 12900
},
{
"epoch": 3.0,
"eval_cosine_accuracy": 0.9225280326197758,
"eval_cosine_accuracy_threshold": 0.7901061773300171,
"eval_cosine_ap": 0.8478615501518483,
"eval_cosine_f1": 0.7559554803436604,
"eval_cosine_f1_threshold": 0.7817596793174744,
"eval_cosine_mcc": 0.7071656901034916,
"eval_cosine_precision": 0.756201575623413,
"eval_cosine_recall": 0.7557095451883662,
"eval_loss": 10.411548614501953,
"eval_runtime": 1486.9605,
"eval_samples_per_second": 124.03,
"eval_steps_per_second": 1.938,
"step": 12972
}
],
"logging_steps": 100,
"max_steps": 21620,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 64,
"trial_name": null,
"trial_params": null
}