| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 10.0, | |
| "global_step": 6260, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.920127795527157e-05, | |
| "loss": 0.7778, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.840255591054313e-05, | |
| "loss": 0.5964, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 4.76038338658147e-05, | |
| "loss": 0.5901, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 4.680511182108626e-05, | |
| "loss": 0.4494, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 4.600638977635783e-05, | |
| "loss": 0.4166, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 4.520766773162939e-05, | |
| "loss": 0.4516, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.8979708054137165, | |
| "eval_f1": 0.44450345193839613, | |
| "eval_loss": 0.40465056896209717, | |
| "eval_precision": 0.4332298136645963, | |
| "eval_recall": 0.4563794983642312, | |
| "eval_runtime": 68.5329, | |
| "eval_samples_per_second": 146.017, | |
| "eval_steps_per_second": 36.508, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 4.440894568690096e-05, | |
| "loss": 0.4155, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 4.361022364217253e-05, | |
| "loss": 0.3645, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 4.2811501597444096e-05, | |
| "loss": 0.3712, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 4.201277955271566e-05, | |
| "loss": 0.3669, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 4.1214057507987225e-05, | |
| "loss": 0.3424, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 4.041533546325879e-05, | |
| "loss": 0.3677, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.9192688065906114, | |
| "eval_f1": 0.5293376983127676, | |
| "eval_loss": 0.2773844301700592, | |
| "eval_precision": 0.4918109499298081, | |
| "eval_recall": 0.5730643402399127, | |
| "eval_runtime": 67.6554, | |
| "eval_samples_per_second": 147.911, | |
| "eval_steps_per_second": 36.982, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 3.9616613418530355e-05, | |
| "loss": 0.3039, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 3.8817891373801916e-05, | |
| "loss": 0.2599, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 3.8019169329073485e-05, | |
| "loss": 0.3243, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 3.722044728434505e-05, | |
| "loss": 0.2701, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 3.6421725239616614e-05, | |
| "loss": 0.2634, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 3.562300319488818e-05, | |
| "loss": 0.2892, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.9383648753820163, | |
| "eval_f1": 0.6352631578947368, | |
| "eval_loss": 0.21329015493392944, | |
| "eval_precision": 0.6139369277721262, | |
| "eval_recall": 0.6581243184296619, | |
| "eval_runtime": 67.955, | |
| "eval_samples_per_second": 147.259, | |
| "eval_steps_per_second": 36.818, | |
| "step": 1878 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "learning_rate": 3.482428115015975e-05, | |
| "loss": 0.315, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 3.19, | |
| "learning_rate": 3.402555910543131e-05, | |
| "loss": 0.242, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 3.35, | |
| "learning_rate": 3.322683706070287e-05, | |
| "loss": 0.2366, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 3.51, | |
| "learning_rate": 3.242811501597444e-05, | |
| "loss": 0.2233, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 3.67, | |
| "learning_rate": 3.162939297124601e-05, | |
| "loss": 0.233, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 3.83, | |
| "learning_rate": 3.083067092651757e-05, | |
| "loss": 0.2469, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 3.99, | |
| "learning_rate": 3.003194888178914e-05, | |
| "loss": 0.2736, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.9488050720373569, | |
| "eval_f1": 0.6536661466458659, | |
| "eval_loss": 0.17724330723285675, | |
| "eval_precision": 0.6247514910536779, | |
| "eval_recall": 0.6853871319520175, | |
| "eval_runtime": 68.1206, | |
| "eval_samples_per_second": 146.901, | |
| "eval_steps_per_second": 36.729, | |
| "step": 2504 | |
| }, | |
| { | |
| "epoch": 4.15, | |
| "learning_rate": 2.9233226837060707e-05, | |
| "loss": 0.1691, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 4.31, | |
| "learning_rate": 2.843450479233227e-05, | |
| "loss": 0.1941, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 4.47, | |
| "learning_rate": 2.7635782747603834e-05, | |
| "loss": 0.1891, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 4.63, | |
| "learning_rate": 2.68370607028754e-05, | |
| "loss": 0.2037, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 4.79, | |
| "learning_rate": 2.6038338658146967e-05, | |
| "loss": 0.2222, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 4.95, | |
| "learning_rate": 2.523961661341853e-05, | |
| "loss": 0.221, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.9560372809931474, | |
| "eval_f1": 0.6772486772486773, | |
| "eval_loss": 0.15026314556598663, | |
| "eval_precision": 0.6295081967213115, | |
| "eval_recall": 0.732824427480916, | |
| "eval_runtime": 68.8869, | |
| "eval_samples_per_second": 145.267, | |
| "eval_steps_per_second": 36.32, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 5.11, | |
| "learning_rate": 2.44408945686901e-05, | |
| "loss": 0.1524, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 5.27, | |
| "learning_rate": 2.364217252396166e-05, | |
| "loss": 0.1575, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 5.43, | |
| "learning_rate": 2.284345047923323e-05, | |
| "loss": 0.1606, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 5.59, | |
| "learning_rate": 2.2044728434504794e-05, | |
| "loss": 0.1314, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 5.75, | |
| "learning_rate": 2.124600638977636e-05, | |
| "loss": 0.1845, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 5.91, | |
| "learning_rate": 2.0447284345047924e-05, | |
| "loss": 0.1569, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.9622824168106149, | |
| "eval_f1": 0.7409068261086198, | |
| "eval_loss": 0.1283087134361267, | |
| "eval_precision": 0.6821100917431193, | |
| "eval_recall": 0.8107960741548528, | |
| "eval_runtime": 69.0068, | |
| "eval_samples_per_second": 145.015, | |
| "eval_steps_per_second": 36.257, | |
| "step": 3756 | |
| }, | |
| { | |
| "epoch": 6.07, | |
| "learning_rate": 1.964856230031949e-05, | |
| "loss": 0.1495, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 6.23, | |
| "learning_rate": 1.8849840255591057e-05, | |
| "loss": 0.1309, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 6.39, | |
| "learning_rate": 1.805111821086262e-05, | |
| "loss": 0.131, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 6.55, | |
| "learning_rate": 1.7252396166134186e-05, | |
| "loss": 0.1177, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 6.71, | |
| "learning_rate": 1.645367412140575e-05, | |
| "loss": 0.1046, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 6.87, | |
| "learning_rate": 1.565495207667732e-05, | |
| "loss": 0.1534, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.9707674493650462, | |
| "eval_f1": 0.7749154306531356, | |
| "eval_loss": 0.09951327741146088, | |
| "eval_precision": 0.7411647585863613, | |
| "eval_recall": 0.811886586695747, | |
| "eval_runtime": 67.5487, | |
| "eval_samples_per_second": 148.145, | |
| "eval_steps_per_second": 37.04, | |
| "step": 4382 | |
| }, | |
| { | |
| "epoch": 7.03, | |
| "learning_rate": 1.485623003194888e-05, | |
| "loss": 0.1147, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 7.19, | |
| "learning_rate": 1.4057507987220447e-05, | |
| "loss": 0.1158, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 7.35, | |
| "learning_rate": 1.3258785942492014e-05, | |
| "loss": 0.0993, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 7.51, | |
| "learning_rate": 1.2460063897763578e-05, | |
| "loss": 0.1288, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 7.67, | |
| "learning_rate": 1.1661341853035145e-05, | |
| "loss": 0.0874, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 7.83, | |
| "learning_rate": 1.086261980830671e-05, | |
| "loss": 0.105, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 7.99, | |
| "learning_rate": 1.0063897763578276e-05, | |
| "loss": 0.089, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.9760065298684535, | |
| "eval_f1": 0.8010457516339871, | |
| "eval_loss": 0.08459383249282837, | |
| "eval_precision": 0.7694625816172778, | |
| "eval_recall": 0.8353326063249727, | |
| "eval_runtime": 67.5952, | |
| "eval_samples_per_second": 148.043, | |
| "eval_steps_per_second": 37.014, | |
| "step": 5008 | |
| }, | |
| { | |
| "epoch": 8.15, | |
| "learning_rate": 9.265175718849841e-06, | |
| "loss": 0.0766, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 8.31, | |
| "learning_rate": 8.466453674121406e-06, | |
| "loss": 0.0929, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 8.47, | |
| "learning_rate": 7.66773162939297e-06, | |
| "loss": 0.089, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 8.63, | |
| "learning_rate": 6.869009584664538e-06, | |
| "loss": 0.0946, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 8.79, | |
| "learning_rate": 6.070287539936103e-06, | |
| "loss": 0.0757, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 8.95, | |
| "learning_rate": 5.2715654952076674e-06, | |
| "loss": 0.0923, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.9789108027562119, | |
| "eval_f1": 0.828852119958635, | |
| "eval_loss": 0.07430661469697952, | |
| "eval_precision": 0.788102261553589, | |
| "eval_recall": 0.8740458015267175, | |
| "eval_runtime": 67.8851, | |
| "eval_samples_per_second": 147.411, | |
| "eval_steps_per_second": 36.856, | |
| "step": 5634 | |
| }, | |
| { | |
| "epoch": 9.11, | |
| "learning_rate": 4.472843450479233e-06, | |
| "loss": 0.0578, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 9.27, | |
| "learning_rate": 3.6741214057507987e-06, | |
| "loss": 0.0664, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 9.42, | |
| "learning_rate": 2.8753993610223644e-06, | |
| "loss": 0.0616, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 9.58, | |
| "learning_rate": 2.0766773162939296e-06, | |
| "loss": 0.0759, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 9.74, | |
| "learning_rate": 1.2779552715654952e-06, | |
| "loss": 0.0769, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 9.9, | |
| "learning_rate": 4.792332268370607e-07, | |
| "loss": 0.0711, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.9819289686983922, | |
| "eval_f1": 0.8444211629125196, | |
| "eval_loss": 0.06683139503002167, | |
| "eval_precision": 0.8125, | |
| "eval_recall": 0.8789531079607416, | |
| "eval_runtime": 67.6072, | |
| "eval_samples_per_second": 148.017, | |
| "eval_steps_per_second": 37.008, | |
| "step": 6260 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "step": 6260, | |
| "total_flos": 1.307859275810304e+16, | |
| "train_loss": 0.217802461039144, | |
| "train_runtime": 2284.8138, | |
| "train_samples_per_second": 43.798, | |
| "train_steps_per_second": 2.74 | |
| } | |
| ], | |
| "max_steps": 6260, | |
| "num_train_epochs": 10, | |
| "total_flos": 1.307859275810304e+16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |