| { | |
| "best_metric": 0.9361970057366729, | |
| "best_model_checkpoint": "deberta-bib-style-classification/checkpoint-61266", | |
| "epoch": 6.0, | |
| "eval_steps": 500, | |
| "global_step": 61266, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.048966800509254726, | |
| "grad_norm": 0.18245625495910645, | |
| "learning_rate": 1.9836777331635818e-05, | |
| "loss": 0.1329, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.09793360101850945, | |
| "grad_norm": 0.17618948221206665, | |
| "learning_rate": 1.9673554663271638e-05, | |
| "loss": 0.0446, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.14690040152776418, | |
| "grad_norm": 0.16028529405593872, | |
| "learning_rate": 1.9510331994907454e-05, | |
| "loss": 0.0436, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.1958672020370189, | |
| "grad_norm": 0.13800546526908875, | |
| "learning_rate": 1.934710932654327e-05, | |
| "loss": 0.0382, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.24483400254627363, | |
| "grad_norm": 0.1666691154241562, | |
| "learning_rate": 1.918388665817909e-05, | |
| "loss": 0.0317, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.29380080305552836, | |
| "grad_norm": 0.2547225058078766, | |
| "learning_rate": 1.9020663989814907e-05, | |
| "loss": 0.0276, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.3427676035647831, | |
| "grad_norm": 0.10508100688457489, | |
| "learning_rate": 1.8857441321450724e-05, | |
| "loss": 0.0236, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.3917344040740378, | |
| "grad_norm": 0.1274070292711258, | |
| "learning_rate": 1.8694218653086543e-05, | |
| "loss": 0.0208, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.44070120458329254, | |
| "grad_norm": 0.1652757227420807, | |
| "learning_rate": 1.853099598472236e-05, | |
| "loss": 0.0187, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.48966800509254726, | |
| "grad_norm": 0.15535283088684082, | |
| "learning_rate": 1.8367773316358176e-05, | |
| "loss": 0.0164, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.538634805601802, | |
| "grad_norm": 0.1361575722694397, | |
| "learning_rate": 1.8204550647993996e-05, | |
| "loss": 0.0151, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.5876016061110567, | |
| "grad_norm": 0.14795701205730438, | |
| "learning_rate": 1.8041327979629813e-05, | |
| "loss": 0.0141, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.6365684066203114, | |
| "grad_norm": 0.24855226278305054, | |
| "learning_rate": 1.787810531126563e-05, | |
| "loss": 0.0128, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.6855352071295662, | |
| "grad_norm": 0.11990305036306381, | |
| "learning_rate": 1.771488264290145e-05, | |
| "loss": 0.0119, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.7345020076388209, | |
| "grad_norm": 0.12674401700496674, | |
| "learning_rate": 1.7551659974537265e-05, | |
| "loss": 0.0109, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.7834688081480756, | |
| "grad_norm": 0.05565750598907471, | |
| "learning_rate": 1.7388437306173082e-05, | |
| "loss": 0.0103, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.8324356086573304, | |
| "grad_norm": 0.1362127959728241, | |
| "learning_rate": 1.72252146378089e-05, | |
| "loss": 0.0097, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.8814024091665851, | |
| "grad_norm": 0.1502208560705185, | |
| "learning_rate": 1.7062318414781447e-05, | |
| "loss": 0.0096, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.9303692096758398, | |
| "grad_norm": 0.1307108849287033, | |
| "learning_rate": 1.689942219175399e-05, | |
| "loss": 0.009, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.9793360101850945, | |
| "grad_norm": 0.08258962631225586, | |
| "learning_rate": 1.6736199523389808e-05, | |
| "loss": 0.0088, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.7270709032884696, | |
| "eval_f1": 0.8196123147092359, | |
| "eval_loss": 0.007612535264343023, | |
| "eval_roc_auc": 0.8736920143065684, | |
| "eval_runtime": 56.0999, | |
| "eval_samples_per_second": 128.467, | |
| "eval_steps_per_second": 16.061, | |
| "step": 10211 | |
| }, | |
| { | |
| "epoch": 1.0283028106943493, | |
| "grad_norm": 0.15880635380744934, | |
| "learning_rate": 1.6572976855025628e-05, | |
| "loss": 0.0086, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 1.077269611203604, | |
| "grad_norm": 0.04505603387951851, | |
| "learning_rate": 1.6409754186661444e-05, | |
| "loss": 0.0082, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 1.1262364117128587, | |
| "grad_norm": 0.1436477154493332, | |
| "learning_rate": 1.624653151829726e-05, | |
| "loss": 0.0076, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 1.1752032122221134, | |
| "grad_norm": 0.10293476283550262, | |
| "learning_rate": 1.608330884993308e-05, | |
| "loss": 0.0074, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 1.2241700127313682, | |
| "grad_norm": 0.12684179842472076, | |
| "learning_rate": 1.5920086181568897e-05, | |
| "loss": 0.0076, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 1.2731368132406229, | |
| "grad_norm": 0.09429904818534851, | |
| "learning_rate": 1.5756863513204713e-05, | |
| "loss": 0.0073, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 1.3221036137498776, | |
| "grad_norm": 0.24173329770565033, | |
| "learning_rate": 1.5593640844840533e-05, | |
| "loss": 0.0068, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 1.3710704142591323, | |
| "grad_norm": 0.15347008407115936, | |
| "learning_rate": 1.543041817647635e-05, | |
| "loss": 0.0069, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 1.420037214768387, | |
| "grad_norm": 0.30062130093574524, | |
| "learning_rate": 1.5267195508112166e-05, | |
| "loss": 0.0064, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 1.4690040152776418, | |
| "grad_norm": 0.0312146358191967, | |
| "learning_rate": 1.5103972839747984e-05, | |
| "loss": 0.0065, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 1.5179708157868965, | |
| "grad_norm": 0.19778664410114288, | |
| "learning_rate": 1.4941076616720531e-05, | |
| "loss": 0.0063, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 1.5669376162961512, | |
| "grad_norm": 0.07010342180728912, | |
| "learning_rate": 1.4778180393693078e-05, | |
| "loss": 0.0058, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 1.615904416805406, | |
| "grad_norm": 0.11643481999635696, | |
| "learning_rate": 1.4614957725328896e-05, | |
| "loss": 0.0058, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 1.6648712173146607, | |
| "grad_norm": 0.14050887525081635, | |
| "learning_rate": 1.4452061502301439e-05, | |
| "loss": 0.0059, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 1.7138380178239154, | |
| "grad_norm": 0.09967122972011566, | |
| "learning_rate": 1.4288838833937257e-05, | |
| "loss": 0.0053, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 1.7628048183331702, | |
| "grad_norm": 0.06283292174339294, | |
| "learning_rate": 1.4125616165573075e-05, | |
| "loss": 0.0057, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 1.8117716188424249, | |
| "grad_norm": 0.04395497962832451, | |
| "learning_rate": 1.3962393497208892e-05, | |
| "loss": 0.0051, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 1.8607384193516796, | |
| "grad_norm": 0.15392401814460754, | |
| "learning_rate": 1.379917082884471e-05, | |
| "loss": 0.0049, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 1.9097052198609343, | |
| "grad_norm": 0.053341180086135864, | |
| "learning_rate": 1.3635948160480528e-05, | |
| "loss": 0.0052, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 1.958672020370189, | |
| "grad_norm": 0.14263851940631866, | |
| "learning_rate": 1.3472725492116345e-05, | |
| "loss": 0.005, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.8507007076453448, | |
| "eval_f1": 0.8938003106905804, | |
| "eval_loss": 0.00466541014611721, | |
| "eval_roc_auc": 0.9387280731428467, | |
| "eval_runtime": 55.7959, | |
| "eval_samples_per_second": 129.167, | |
| "eval_steps_per_second": 16.148, | |
| "step": 20422 | |
| }, | |
| { | |
| "epoch": 2.0076388208794436, | |
| "grad_norm": 0.16177518665790558, | |
| "learning_rate": 1.3309502823752163e-05, | |
| "loss": 0.0048, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 2.0566056213886985, | |
| "grad_norm": 0.13520753383636475, | |
| "learning_rate": 1.314628015538798e-05, | |
| "loss": 0.0045, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 2.105572421897953, | |
| "grad_norm": 0.09872843325138092, | |
| "learning_rate": 1.2983057487023797e-05, | |
| "loss": 0.0043, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 2.154539222407208, | |
| "grad_norm": 0.07923103123903275, | |
| "learning_rate": 1.2819834818659615e-05, | |
| "loss": 0.0042, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 2.2035060229164625, | |
| "grad_norm": 0.01526894886046648, | |
| "learning_rate": 1.2656612150295434e-05, | |
| "loss": 0.0041, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 2.2524728234257174, | |
| "grad_norm": 0.09783605486154556, | |
| "learning_rate": 1.249338948193125e-05, | |
| "loss": 0.0042, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 2.301439623934972, | |
| "grad_norm": 0.12495558708906174, | |
| "learning_rate": 1.2330819704240527e-05, | |
| "loss": 0.0042, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 2.350406424444227, | |
| "grad_norm": 0.09078874439001083, | |
| "learning_rate": 1.2167597035876343e-05, | |
| "loss": 0.0045, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 2.3993732249534814, | |
| "grad_norm": 0.0656205415725708, | |
| "learning_rate": 1.2004374367512162e-05, | |
| "loss": 0.0039, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 2.4483400254627363, | |
| "grad_norm": 0.22209672629833221, | |
| "learning_rate": 1.184115169914798e-05, | |
| "loss": 0.0043, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 2.497306825971991, | |
| "grad_norm": 0.4231460988521576, | |
| "learning_rate": 1.1677929030783796e-05, | |
| "loss": 0.0042, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 2.5462736264812458, | |
| "grad_norm": 0.0051184347830712795, | |
| "learning_rate": 1.1514706362419614e-05, | |
| "loss": 0.0039, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 2.5952404269905003, | |
| "grad_norm": 0.15397199988365173, | |
| "learning_rate": 1.1351483694055432e-05, | |
| "loss": 0.0037, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 2.6442072274997552, | |
| "grad_norm": 0.03935805708169937, | |
| "learning_rate": 1.1188261025691249e-05, | |
| "loss": 0.0035, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 2.6931740280090097, | |
| "grad_norm": 0.011553222313523293, | |
| "learning_rate": 1.1025364802663794e-05, | |
| "loss": 0.0038, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 2.7421408285182647, | |
| "grad_norm": 0.06817249953746796, | |
| "learning_rate": 1.0862142134299612e-05, | |
| "loss": 0.0035, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 2.791107629027519, | |
| "grad_norm": 0.003078105626627803, | |
| "learning_rate": 1.0699245911272159e-05, | |
| "loss": 0.0036, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 2.840074429536774, | |
| "grad_norm": 0.11958350241184235, | |
| "learning_rate": 1.0536023242907975e-05, | |
| "loss": 0.0037, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 2.8890412300460286, | |
| "grad_norm": 0.17206184566020966, | |
| "learning_rate": 1.0372800574543793e-05, | |
| "loss": 0.0036, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 2.9380080305552836, | |
| "grad_norm": 0.018106259405612946, | |
| "learning_rate": 1.0209577906179611e-05, | |
| "loss": 0.0035, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 2.986974831064538, | |
| "grad_norm": 0.14708341658115387, | |
| "learning_rate": 1.0046355237815428e-05, | |
| "loss": 0.0035, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.8998196198140697, | |
| "eval_f1": 0.9224077451943314, | |
| "eval_loss": 0.0033930453937500715, | |
| "eval_roc_auc": 0.9558131614465735, | |
| "eval_runtime": 55.6722, | |
| "eval_samples_per_second": 129.454, | |
| "eval_steps_per_second": 16.184, | |
| "step": 30633 | |
| }, | |
| { | |
| "epoch": 3.035941631573793, | |
| "grad_norm": 0.03020176850259304, | |
| "learning_rate": 9.883132569451246e-06, | |
| "loss": 0.0033, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 3.0849084320830475, | |
| "grad_norm": 0.1869770586490631, | |
| "learning_rate": 9.720236346423793e-06, | |
| "loss": 0.0029, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 3.1338752325923025, | |
| "grad_norm": 0.02884034253656864, | |
| "learning_rate": 9.55701367805961e-06, | |
| "loss": 0.0033, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 3.182842033101557, | |
| "grad_norm": 0.003137261839583516, | |
| "learning_rate": 9.393791009695427e-06, | |
| "loss": 0.0031, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 3.231808833610812, | |
| "grad_norm": 0.007661271840333939, | |
| "learning_rate": 9.230568341331245e-06, | |
| "loss": 0.0028, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 3.2807756341200665, | |
| "grad_norm": 0.006506490521132946, | |
| "learning_rate": 9.06767211830379e-06, | |
| "loss": 0.0029, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 3.3297424346293214, | |
| "grad_norm": 0.03698953613638878, | |
| "learning_rate": 8.904449449939609e-06, | |
| "loss": 0.003, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 3.378709235138576, | |
| "grad_norm": 0.1853983998298645, | |
| "learning_rate": 8.741226781575427e-06, | |
| "loss": 0.0029, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 3.427676035647831, | |
| "grad_norm": 0.053507931530475616, | |
| "learning_rate": 8.578004113211243e-06, | |
| "loss": 0.0029, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 3.4766428361570854, | |
| "grad_norm": 0.10720884054899216, | |
| "learning_rate": 8.414781444847061e-06, | |
| "loss": 0.003, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 3.5256096366663403, | |
| "grad_norm": 0.015493680723011494, | |
| "learning_rate": 8.25155877648288e-06, | |
| "loss": 0.0031, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 3.574576437175595, | |
| "grad_norm": 0.07669718563556671, | |
| "learning_rate": 8.088336108118696e-06, | |
| "loss": 0.0029, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 3.6235432376848498, | |
| "grad_norm": 0.16198168694972992, | |
| "learning_rate": 7.925439885091243e-06, | |
| "loss": 0.003, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 3.6725100381941043, | |
| "grad_norm": 0.00896318256855011, | |
| "learning_rate": 7.76254366206379e-06, | |
| "loss": 0.0029, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 3.721476838703359, | |
| "grad_norm": 0.12104916572570801, | |
| "learning_rate": 7.599320993699606e-06, | |
| "loss": 0.0028, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 3.7704436392126137, | |
| "grad_norm": 0.10880939662456512, | |
| "learning_rate": 7.436098325335424e-06, | |
| "loss": 0.0029, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 3.8194104397218687, | |
| "grad_norm": 0.030039768666028976, | |
| "learning_rate": 7.272875656971241e-06, | |
| "loss": 0.0027, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 3.868377240231123, | |
| "grad_norm": 0.09756383299827576, | |
| "learning_rate": 7.109652988607058e-06, | |
| "loss": 0.0029, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 3.917344040740378, | |
| "grad_norm": 0.002539890818297863, | |
| "learning_rate": 6.9464303202428765e-06, | |
| "loss": 0.0027, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 3.9663108412496326, | |
| "grad_norm": 0.08850258588790894, | |
| "learning_rate": 6.783207651878694e-06, | |
| "loss": 0.0029, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.9124462328291938, | |
| "eval_f1": 0.9324835411122006, | |
| "eval_loss": 0.0029195661190897226, | |
| "eval_roc_auc": 0.9616052804493336, | |
| "eval_runtime": 55.6822, | |
| "eval_samples_per_second": 129.431, | |
| "eval_steps_per_second": 16.181, | |
| "step": 40844 | |
| }, | |
| { | |
| "epoch": 4.015277641758887, | |
| "grad_norm": 0.1031348779797554, | |
| "learning_rate": 6.619984983514511e-06, | |
| "loss": 0.0027, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 4.0642444422681425, | |
| "grad_norm": 0.07331918925046921, | |
| "learning_rate": 6.456762315150329e-06, | |
| "loss": 0.0025, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 4.113211242777397, | |
| "grad_norm": 0.10652918368577957, | |
| "learning_rate": 6.2935396467861465e-06, | |
| "loss": 0.0025, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 4.1621780432866515, | |
| "grad_norm": 0.11473935097455978, | |
| "learning_rate": 6.130316978421964e-06, | |
| "loss": 0.0026, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 4.211144843795906, | |
| "grad_norm": 0.0035420297645032406, | |
| "learning_rate": 5.967094310057782e-06, | |
| "loss": 0.0025, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 4.260111644305161, | |
| "grad_norm": 0.01006217859685421, | |
| "learning_rate": 5.8045245323670555e-06, | |
| "loss": 0.0025, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 4.309078444814416, | |
| "grad_norm": 0.09146247059106827, | |
| "learning_rate": 5.641301864002873e-06, | |
| "loss": 0.0022, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 4.35804524532367, | |
| "grad_norm": 0.05604245886206627, | |
| "learning_rate": 5.47807919563869e-06, | |
| "loss": 0.0025, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 4.407012045832925, | |
| "grad_norm": 0.017924629151821136, | |
| "learning_rate": 5.314856527274508e-06, | |
| "loss": 0.0025, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 4.45597884634218, | |
| "grad_norm": 0.06204945594072342, | |
| "learning_rate": 5.1516338589103255e-06, | |
| "loss": 0.0026, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 4.504945646851435, | |
| "grad_norm": 0.06027592718601227, | |
| "learning_rate": 4.988411190546144e-06, | |
| "loss": 0.0024, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 4.553912447360689, | |
| "grad_norm": 0.26520290970802307, | |
| "learning_rate": 4.825188522181961e-06, | |
| "loss": 0.0022, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 4.602879247869944, | |
| "grad_norm": 0.025959959253668785, | |
| "learning_rate": 4.661965853817778e-06, | |
| "loss": 0.0022, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 4.651846048379199, | |
| "grad_norm": 0.0017334806034341455, | |
| "learning_rate": 4.498743185453596e-06, | |
| "loss": 0.0026, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 4.700812848888454, | |
| "grad_norm": 0.09476437419652939, | |
| "learning_rate": 4.335520517089414e-06, | |
| "loss": 0.0024, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 4.749779649397708, | |
| "grad_norm": 0.011143738403916359, | |
| "learning_rate": 4.172297848725231e-06, | |
| "loss": 0.0021, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 4.798746449906963, | |
| "grad_norm": 0.10621017217636108, | |
| "learning_rate": 4.009075180361049e-06, | |
| "loss": 0.0023, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 4.847713250416218, | |
| "grad_norm": 0.004438555799424648, | |
| "learning_rate": 3.845852511996866e-06, | |
| "loss": 0.0023, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 4.896680050925473, | |
| "grad_norm": 0.09955357015132904, | |
| "learning_rate": 3.682629843632684e-06, | |
| "loss": 0.0022, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 4.945646851434727, | |
| "grad_norm": 0.14140157401561737, | |
| "learning_rate": 3.51973362060523e-06, | |
| "loss": 0.0021, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 4.994613651943982, | |
| "grad_norm": 0.06944791227579117, | |
| "learning_rate": 3.3565109522410477e-06, | |
| "loss": 0.0022, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.9174413764395727, | |
| "eval_f1": 0.9352578475336324, | |
| "eval_loss": 0.0027621558401733637, | |
| "eval_roc_auc": 0.9627978767199474, | |
| "eval_runtime": 55.5741, | |
| "eval_samples_per_second": 129.683, | |
| "eval_steps_per_second": 16.213, | |
| "step": 51055 | |
| }, | |
| { | |
| "epoch": 5.043580452453237, | |
| "grad_norm": 0.011898011900484562, | |
| "learning_rate": 3.1932882838768654e-06, | |
| "loss": 0.0021, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 5.0925472529624916, | |
| "grad_norm": 0.09524281322956085, | |
| "learning_rate": 3.0300656155126827e-06, | |
| "loss": 0.002, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 5.141514053471746, | |
| "grad_norm": 0.08005507290363312, | |
| "learning_rate": 2.8668429471485004e-06, | |
| "loss": 0.0019, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 5.190480853981001, | |
| "grad_norm": 0.05041489377617836, | |
| "learning_rate": 2.703946724121046e-06, | |
| "loss": 0.0022, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 5.239447654490256, | |
| "grad_norm": 0.001608343911357224, | |
| "learning_rate": 2.5407240557568635e-06, | |
| "loss": 0.0019, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 5.2884144549995105, | |
| "grad_norm": 0.11538127809762955, | |
| "learning_rate": 2.3775013873926812e-06, | |
| "loss": 0.0019, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 5.337381255508765, | |
| "grad_norm": 0.11458936333656311, | |
| "learning_rate": 2.214278719028499e-06, | |
| "loss": 0.002, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 5.3863480560180195, | |
| "grad_norm": 0.07239941507577896, | |
| "learning_rate": 2.051382496001045e-06, | |
| "loss": 0.0021, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 5.435314856527275, | |
| "grad_norm": 0.1313902884721756, | |
| "learning_rate": 1.8881598276368623e-06, | |
| "loss": 0.0018, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 5.484281657036529, | |
| "grad_norm": 0.0023473671171814203, | |
| "learning_rate": 1.7249371592726798e-06, | |
| "loss": 0.0021, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 5.533248457545784, | |
| "grad_norm": 0.09586118161678314, | |
| "learning_rate": 1.5617144909084975e-06, | |
| "loss": 0.0019, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 5.582215258055038, | |
| "grad_norm": 0.08927006274461746, | |
| "learning_rate": 1.398491822544315e-06, | |
| "loss": 0.0019, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 5.631182058564294, | |
| "grad_norm": 0.011845240369439125, | |
| "learning_rate": 1.235595599516861e-06, | |
| "loss": 0.002, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 5.680148859073548, | |
| "grad_norm": 0.041209351271390915, | |
| "learning_rate": 1.0723729311526786e-06, | |
| "loss": 0.002, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 5.729115659582803, | |
| "grad_norm": 0.09277820587158203, | |
| "learning_rate": 9.091502627884961e-07, | |
| "loss": 0.002, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 5.778082460092057, | |
| "grad_norm": 0.07851295173168182, | |
| "learning_rate": 7.459275944243137e-07, | |
| "loss": 0.002, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 5.827049260601313, | |
| "grad_norm": 0.046077970415353775, | |
| "learning_rate": 5.827049260601313e-07, | |
| "loss": 0.0018, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 5.876016061110567, | |
| "grad_norm": 0.13472139835357666, | |
| "learning_rate": 4.198087030326772e-07, | |
| "loss": 0.0019, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 5.924982861619822, | |
| "grad_norm": 0.05562426894903183, | |
| "learning_rate": 2.5658603466849477e-07, | |
| "loss": 0.0019, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 5.973949662129076, | |
| "grad_norm": 0.016598107293248177, | |
| "learning_rate": 9.368981164104072e-08, | |
| "loss": 0.0019, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.920632718190648, | |
| "eval_f1": 0.9361970057366729, | |
| "eval_loss": 0.002764922333881259, | |
| "eval_roc_auc": 0.9639744188099952, | |
| "eval_runtime": 55.7082, | |
| "eval_samples_per_second": 129.37, | |
| "eval_steps_per_second": 16.174, | |
| "step": 61266 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 61266, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 6, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.2909461039327232e+17, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |