| { | |
| "best_metric": 0.9895929814239887, | |
| "best_model_checkpoint": "models/pos_final_mono_de/checkpoint-4224", | |
| "epoch": 39.994174757281556, | |
| "global_step": 5120, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.99, | |
| "eval_accuracy": 0.9475099341812547, | |
| "eval_f1": 0.9428053278974075, | |
| "eval_loss": 0.235727921128273, | |
| "eval_precision": 0.9442734211134948, | |
| "eval_recall": 0.941341792581462, | |
| "eval_runtime": 18.99, | |
| "eval_samples_per_second": 771.406, | |
| "eval_steps_per_second": 3.054, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "eval_accuracy": 0.9852943432700717, | |
| "eval_f1": 0.9842332493182053, | |
| "eval_loss": 0.05128009244799614, | |
| "eval_precision": 0.9842997713944935, | |
| "eval_recall": 0.9841667362328519, | |
| "eval_runtime": 19.2922, | |
| "eval_samples_per_second": 759.323, | |
| "eval_steps_per_second": 3.006, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "eval_accuracy": 0.9875228217677473, | |
| "eval_f1": 0.9867158568898448, | |
| "eval_loss": 0.04063262417912483, | |
| "eval_precision": 0.9867884320258268, | |
| "eval_recall": 0.9866432924284164, | |
| "eval_runtime": 19.7655, | |
| "eval_samples_per_second": 741.14, | |
| "eval_steps_per_second": 2.934, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 3.9, | |
| "learning_rate": 5e-05, | |
| "loss": 0.6822, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 3.99, | |
| "eval_accuracy": 0.9884893907546909, | |
| "eval_f1": 0.9876851402812782, | |
| "eval_loss": 0.036450713872909546, | |
| "eval_precision": 0.9876576580157648, | |
| "eval_recall": 0.9877126240762605, | |
| "eval_runtime": 18.3474, | |
| "eval_samples_per_second": 798.424, | |
| "eval_steps_per_second": 3.161, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 4.99, | |
| "eval_accuracy": 0.9889918532042529, | |
| "eval_f1": 0.9881764176274528, | |
| "eval_loss": 0.03515882417559624, | |
| "eval_precision": 0.9881194651573207, | |
| "eval_recall": 0.9882333766631287, | |
| "eval_runtime": 19.0555, | |
| "eval_samples_per_second": 768.756, | |
| "eval_steps_per_second": 3.044, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 5.99, | |
| "eval_accuracy": 0.9894521241504165, | |
| "eval_f1": 0.9887225068869429, | |
| "eval_loss": 0.0344870425760746, | |
| "eval_precision": 0.9887067858661908, | |
| "eval_recall": 0.9887382284076499, | |
| "eval_runtime": 18.7765, | |
| "eval_samples_per_second": 780.176, | |
| "eval_steps_per_second": 3.089, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 6.99, | |
| "eval_accuracy": 0.9896017122079197, | |
| "eval_f1": 0.9887940595397575, | |
| "eval_loss": 0.03525426983833313, | |
| "eval_precision": 0.9887783373812941, | |
| "eval_recall": 0.9888097821982119, | |
| "eval_runtime": 18.226, | |
| "eval_samples_per_second": 803.744, | |
| "eval_steps_per_second": 3.182, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 7.81, | |
| "learning_rate": 4.458874458874459e-05, | |
| "loss": 0.024, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 7.99, | |
| "eval_accuracy": 0.9894866444713788, | |
| "eval_f1": 0.9887030802192603, | |
| "eval_loss": 0.037094976752996445, | |
| "eval_precision": 0.9886480621017779, | |
| "eval_recall": 0.9887581044605838, | |
| "eval_runtime": 18.998, | |
| "eval_samples_per_second": 771.08, | |
| "eval_steps_per_second": 3.053, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 8.99, | |
| "eval_accuracy": 0.9895556851133034, | |
| "eval_f1": 0.988807816838561, | |
| "eval_loss": 0.03866518661379814, | |
| "eval_precision": 0.9888058514867228, | |
| "eval_recall": 0.9888097821982119, | |
| "eval_runtime": 19.63, | |
| "eval_samples_per_second": 746.255, | |
| "eval_steps_per_second": 2.955, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 9.99, | |
| "eval_accuracy": 0.9897513002654229, | |
| "eval_f1": 0.9889666056460926, | |
| "eval_loss": 0.04022372514009476, | |
| "eval_precision": 0.9889842973563904, | |
| "eval_recall": 0.9889489145687492, | |
| "eval_runtime": 17.8198, | |
| "eval_samples_per_second": 822.064, | |
| "eval_steps_per_second": 3.255, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 10.99, | |
| "eval_accuracy": 0.9897282867181147, | |
| "eval_f1": 0.9889296106084937, | |
| "eval_loss": 0.04293292760848999, | |
| "eval_precision": 0.9888785102450464, | |
| "eval_recall": 0.9889807162534435, | |
| "eval_runtime": 18.5105, | |
| "eval_samples_per_second": 791.389, | |
| "eval_steps_per_second": 3.133, | |
| "step": 1408 | |
| }, | |
| { | |
| "epoch": 11.71, | |
| "learning_rate": 3.917748917748918e-05, | |
| "loss": 0.0128, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 11.99, | |
| "eval_accuracy": 0.989628561346446, | |
| "eval_f1": 0.9888770954828604, | |
| "eval_loss": 0.045427996665239334, | |
| "eval_precision": 0.9889006825762183, | |
| "eval_recall": 0.9888535095146666, | |
| "eval_runtime": 18.3742, | |
| "eval_samples_per_second": 797.257, | |
| "eval_steps_per_second": 3.157, | |
| "step": 1536 | |
| }, | |
| { | |
| "epoch": 12.99, | |
| "eval_accuracy": 0.9896899308059345, | |
| "eval_f1": 0.9889251953792704, | |
| "eval_loss": 0.04608777165412903, | |
| "eval_precision": 0.9889134021028363, | |
| "eval_recall": 0.9889369889369889, | |
| "eval_runtime": 18.3253, | |
| "eval_samples_per_second": 799.387, | |
| "eval_steps_per_second": 3.165, | |
| "step": 1664 | |
| }, | |
| { | |
| "epoch": 13.99, | |
| "eval_accuracy": 0.989889381549272, | |
| "eval_f1": 0.9891329626839416, | |
| "eval_loss": 0.04769197106361389, | |
| "eval_precision": 0.9892057156034064, | |
| "eval_recall": 0.9890602204651792, | |
| "eval_runtime": 18.8558, | |
| "eval_samples_per_second": 776.896, | |
| "eval_steps_per_second": 3.076, | |
| "step": 1792 | |
| }, | |
| { | |
| "epoch": 14.99, | |
| "eval_accuracy": 0.9897743138127311, | |
| "eval_f1": 0.9890247489724366, | |
| "eval_loss": 0.0506986528635025, | |
| "eval_precision": 0.9889972294324113, | |
| "eval_recall": 0.9890522700440055, | |
| "eval_runtime": 17.9307, | |
| "eval_samples_per_second": 816.978, | |
| "eval_steps_per_second": 3.235, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 15.62, | |
| "learning_rate": 3.376623376623377e-05, | |
| "loss": 0.0069, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 15.99, | |
| "eval_accuracy": 0.9900581475628654, | |
| "eval_f1": 0.9893163454944793, | |
| "eval_loss": 0.05137912556529045, | |
| "eval_precision": 0.9893419096308429, | |
| "eval_recall": 0.9892907826792124, | |
| "eval_runtime": 18.9861, | |
| "eval_samples_per_second": 771.564, | |
| "eval_steps_per_second": 3.055, | |
| "step": 2048 | |
| }, | |
| { | |
| "epoch": 16.99, | |
| "eval_accuracy": 0.989889381549272, | |
| "eval_f1": 0.989197257872486, | |
| "eval_loss": 0.053016748279333115, | |
| "eval_precision": 0.9892070887364145, | |
| "eval_recall": 0.9891874272039561, | |
| "eval_runtime": 18.1987, | |
| "eval_samples_per_second": 804.946, | |
| "eval_steps_per_second": 3.187, | |
| "step": 2176 | |
| }, | |
| { | |
| "epoch": 17.99, | |
| "eval_accuracy": 0.9898203409073475, | |
| "eval_f1": 0.9890543664272952, | |
| "eval_loss": 0.05524001270532608, | |
| "eval_precision": 0.9890445373741871, | |
| "eval_recall": 0.989064195675766, | |
| "eval_runtime": 18.53, | |
| "eval_samples_per_second": 790.554, | |
| "eval_steps_per_second": 3.13, | |
| "step": 2304 | |
| }, | |
| { | |
| "epoch": 18.99, | |
| "eval_accuracy": 0.9898395188634376, | |
| "eval_f1": 0.9891659296212747, | |
| "eval_loss": 0.0566512756049633, | |
| "eval_precision": 0.9891325086653735, | |
| "eval_recall": 0.9891993528357165, | |
| "eval_runtime": 19.0959, | |
| "eval_samples_per_second": 767.129, | |
| "eval_steps_per_second": 3.037, | |
| "step": 2432 | |
| }, | |
| { | |
| "epoch": 19.53, | |
| "learning_rate": 2.8354978354978357e-05, | |
| "loss": 0.0037, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 19.99, | |
| "eval_accuracy": 0.9899507510087605, | |
| "eval_f1": 0.989249406222982, | |
| "eval_loss": 0.057712409645318985, | |
| "eval_precision": 0.9892159824466563, | |
| "eval_recall": 0.9892828322580389, | |
| "eval_runtime": 18.1495, | |
| "eval_samples_per_second": 807.13, | |
| "eval_steps_per_second": 3.196, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 20.99, | |
| "eval_accuracy": 0.989897052731708, | |
| "eval_f1": 0.9892537230374182, | |
| "eval_loss": 0.05920035019516945, | |
| "eval_precision": 0.9891888454322872, | |
| "eval_recall": 0.9893186091533199, | |
| "eval_runtime": 18.5483, | |
| "eval_samples_per_second": 789.775, | |
| "eval_steps_per_second": 3.127, | |
| "step": 2688 | |
| }, | |
| { | |
| "epoch": 21.99, | |
| "eval_accuracy": 0.9899584221911966, | |
| "eval_f1": 0.9892630842496084, | |
| "eval_loss": 0.06059529632329941, | |
| "eval_precision": 0.9892512869437322, | |
| "eval_recall": 0.9892748818368653, | |
| "eval_runtime": 18.2219, | |
| "eval_samples_per_second": 803.923, | |
| "eval_steps_per_second": 3.183, | |
| "step": 2816 | |
| }, | |
| { | |
| "epoch": 22.99, | |
| "eval_accuracy": 0.9899699289648506, | |
| "eval_f1": 0.9892710345759693, | |
| "eval_loss": 0.06275586783885956, | |
| "eval_precision": 0.9892592371752827, | |
| "eval_recall": 0.9892828322580389, | |
| "eval_runtime": 18.6724, | |
| "eval_samples_per_second": 784.529, | |
| "eval_steps_per_second": 3.106, | |
| "step": 2944 | |
| }, | |
| { | |
| "epoch": 23.43, | |
| "learning_rate": 2.2943722943722946e-05, | |
| "loss": 0.0023, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 23.99, | |
| "eval_accuracy": 0.9899162306877982, | |
| "eval_f1": 0.9891494254701287, | |
| "eval_loss": 0.06293565034866333, | |
| "eval_precision": 0.9891710528408098, | |
| "eval_recall": 0.9891277990451545, | |
| "eval_runtime": 18.198, | |
| "eval_samples_per_second": 804.98, | |
| "eval_steps_per_second": 3.187, | |
| "step": 3072 | |
| }, | |
| { | |
| "epoch": 24.99, | |
| "eval_accuracy": 0.9899776001472868, | |
| "eval_f1": 0.9892692816043408, | |
| "eval_loss": 0.06246413290500641, | |
| "eval_precision": 0.9892358571564855, | |
| "eval_recall": 0.9893027083109728, | |
| "eval_runtime": 18.2292, | |
| "eval_samples_per_second": 803.601, | |
| "eval_steps_per_second": 3.182, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 25.99, | |
| "eval_accuracy": 0.990008284877031, | |
| "eval_f1": 0.9893007845031315, | |
| "eval_loss": 0.06362640857696533, | |
| "eval_precision": 0.9892948855550521, | |
| "eval_recall": 0.9893066835215596, | |
| "eval_runtime": 19.3067, | |
| "eval_samples_per_second": 758.751, | |
| "eval_steps_per_second": 3.004, | |
| "step": 3328 | |
| }, | |
| { | |
| "epoch": 26.99, | |
| "eval_accuracy": 0.9900926678838277, | |
| "eval_f1": 0.9893981976538494, | |
| "eval_loss": 0.0649913027882576, | |
| "eval_precision": 0.9893903316465458, | |
| "eval_recall": 0.9894060637862291, | |
| "eval_runtime": 18.4146, | |
| "eval_samples_per_second": 795.511, | |
| "eval_steps_per_second": 3.15, | |
| "step": 3456 | |
| }, | |
| { | |
| "epoch": 27.34, | |
| "learning_rate": 1.7532467532467535e-05, | |
| "loss": 0.0017, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 27.99, | |
| "eval_accuracy": 0.9901003390662637, | |
| "eval_f1": 0.989384347826087, | |
| "eval_loss": 0.0644073411822319, | |
| "eval_precision": 0.9893705826701542, | |
| "eval_recall": 0.9893981133650556, | |
| "eval_runtime": 18.6787, | |
| "eval_samples_per_second": 784.263, | |
| "eval_steps_per_second": 3.105, | |
| "step": 3584 | |
| }, | |
| { | |
| "epoch": 28.99, | |
| "eval_accuracy": 0.9901425305696621, | |
| "eval_f1": 0.9894557748763214, | |
| "eval_loss": 0.06558605283498764, | |
| "eval_precision": 0.9894538082366036, | |
| "eval_recall": 0.9894577415238572, | |
| "eval_runtime": 18.1086, | |
| "eval_samples_per_second": 808.954, | |
| "eval_steps_per_second": 3.203, | |
| "step": 3712 | |
| }, | |
| { | |
| "epoch": 29.99, | |
| "eval_accuracy": 0.9901502017520981, | |
| "eval_f1": 0.9894956104173334, | |
| "eval_loss": 0.0667632669210434, | |
| "eval_precision": 0.989485776979218, | |
| "eval_recall": 0.9895054440508986, | |
| "eval_runtime": 18.5261, | |
| "eval_samples_per_second": 790.723, | |
| "eval_steps_per_second": 3.131, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 30.99, | |
| "eval_accuracy": 0.9901003390662637, | |
| "eval_f1": 0.9894474469341146, | |
| "eval_loss": 0.06663960218429565, | |
| "eval_precision": 0.9894808819203155, | |
| "eval_recall": 0.9894140142074026, | |
| "eval_runtime": 18.0695, | |
| "eval_samples_per_second": 810.702, | |
| "eval_steps_per_second": 3.21, | |
| "step": 3968 | |
| }, | |
| { | |
| "epoch": 31.25, | |
| "learning_rate": 1.2121212121212122e-05, | |
| "loss": 0.0011, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 31.99, | |
| "eval_accuracy": 0.9900466407892112, | |
| "eval_f1": 0.9893740508996081, | |
| "eval_loss": 0.06780469417572021, | |
| "eval_precision": 0.9893937165323654, | |
| "eval_recall": 0.9893543860486009, | |
| "eval_runtime": 18.1642, | |
| "eval_samples_per_second": 806.478, | |
| "eval_steps_per_second": 3.193, | |
| "step": 4096 | |
| }, | |
| { | |
| "epoch": 32.99, | |
| "eval_accuracy": 0.9902230779852407, | |
| "eval_f1": 0.9895929814239887, | |
| "eval_loss": 0.06849976629018784, | |
| "eval_precision": 0.9895851138680967, | |
| "eval_recall": 0.9896008491049814, | |
| "eval_runtime": 18.9151, | |
| "eval_samples_per_second": 774.46, | |
| "eval_steps_per_second": 3.066, | |
| "step": 4224 | |
| }, | |
| { | |
| "epoch": 33.99, | |
| "eval_accuracy": 0.99014636616088, | |
| "eval_f1": 0.9894398320867711, | |
| "eval_loss": 0.06920044124126434, | |
| "eval_precision": 0.9894417987104366, | |
| "eval_recall": 0.9894378654709233, | |
| "eval_runtime": 18.3423, | |
| "eval_samples_per_second": 798.645, | |
| "eval_steps_per_second": 3.162, | |
| "step": 4352 | |
| }, | |
| { | |
| "epoch": 34.99, | |
| "eval_accuracy": 0.9902000644379325, | |
| "eval_f1": 0.9895391709648887, | |
| "eval_loss": 0.06976373493671417, | |
| "eval_precision": 0.9895450714751387, | |
| "eval_recall": 0.9895332705250061, | |
| "eval_runtime": 18.8004, | |
| "eval_samples_per_second": 779.185, | |
| "eval_steps_per_second": 3.085, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 35.16, | |
| "learning_rate": 6.709956709956711e-06, | |
| "loss": 0.0009, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 35.99, | |
| "eval_accuracy": 0.9900658187453014, | |
| "eval_f1": 0.9893825501754999, | |
| "eval_loss": 0.06981877237558365, | |
| "eval_precision": 0.9893510881446884, | |
| "eval_recall": 0.9894140142074026, | |
| "eval_runtime": 18.1896, | |
| "eval_samples_per_second": 805.351, | |
| "eval_steps_per_second": 3.189, | |
| "step": 4608 | |
| }, | |
| { | |
| "epoch": 36.99, | |
| "eval_accuracy": 0.9902039000291505, | |
| "eval_f1": 0.9894797097330076, | |
| "eval_loss": 0.0695314109325409, | |
| "eval_precision": 0.9894698764529106, | |
| "eval_recall": 0.9894895432085514, | |
| "eval_runtime": 18.7061, | |
| "eval_samples_per_second": 783.113, | |
| "eval_steps_per_second": 3.101, | |
| "step": 4736 | |
| }, | |
| { | |
| "epoch": 37.99, | |
| "eval_accuracy": 0.9901732152994063, | |
| "eval_f1": 0.9894400419774727, | |
| "eval_loss": 0.06961216777563095, | |
| "eval_precision": 0.9894223430643007, | |
| "eval_recall": 0.9894577415238572, | |
| "eval_runtime": 18.6705, | |
| "eval_samples_per_second": 784.607, | |
| "eval_steps_per_second": 3.107, | |
| "step": 4864 | |
| }, | |
| { | |
| "epoch": 38.99, | |
| "eval_accuracy": 0.9901962288467144, | |
| "eval_f1": 0.9894779103694458, | |
| "eval_loss": 0.06985215842723846, | |
| "eval_precision": 0.9894503782202383, | |
| "eval_recall": 0.9895054440508986, | |
| "eval_runtime": 18.2919, | |
| "eval_samples_per_second": 800.846, | |
| "eval_steps_per_second": 3.171, | |
| "step": 4992 | |
| }, | |
| { | |
| "epoch": 39.06, | |
| "learning_rate": 1.2987012987012988e-06, | |
| "loss": 0.0007, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 39.99, | |
| "eval_accuracy": 0.990138694978444, | |
| "eval_f1": 0.9894261920378432, | |
| "eval_loss": 0.06969785690307617, | |
| "eval_precision": 0.9894025940986839, | |
| "eval_recall": 0.9894497911026837, | |
| "eval_runtime": 18.675, | |
| "eval_samples_per_second": 784.419, | |
| "eval_steps_per_second": 3.106, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 39.99, | |
| "step": 5120, | |
| "total_flos": 2.72643266432467e+17, | |
| "train_loss": 0.07192220802244265, | |
| "train_runtime": 4057.7347, | |
| "train_samples_per_second": 1299.572, | |
| "train_steps_per_second": 1.262 | |
| } | |
| ], | |
| "max_steps": 5120, | |
| "num_train_epochs": 40, | |
| "total_flos": 2.72643266432467e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |