| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 10.0, | |
| "global_step": 2500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 2.88e-05, | |
| "loss": 0.3202, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "eval_accuracy": 0.8813009961455675, | |
| "eval_f1": 0.16261325703385787, | |
| "eval_loss": 0.24328218400478363, | |
| "eval_precision": 0.2519394163280384, | |
| "eval_recall": 0.12004928709734201, | |
| "eval_runtime": 4.054, | |
| "eval_samples_per_second": 123.334, | |
| "eval_steps_per_second": 30.833, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 2.7600000000000003e-05, | |
| "loss": 0.23, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "eval_accuracy": 0.9034639835811182, | |
| "eval_f1": 0.49651100375738055, | |
| "eval_loss": 0.21028906106948853, | |
| "eval_precision": 0.5048208113516464, | |
| "eval_recall": 0.48847033972892095, | |
| "eval_runtime": 4.2572, | |
| "eval_samples_per_second": 117.449, | |
| "eval_steps_per_second": 29.362, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 2.64e-05, | |
| "loss": 0.2013, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "eval_accuracy": 0.8792611503228713, | |
| "eval_f1": 0.5627305035874741, | |
| "eval_loss": 0.2621181607246399, | |
| "eval_precision": 0.4545060658578856, | |
| "eval_recall": 0.7386023587396585, | |
| "eval_runtime": 4.1357, | |
| "eval_samples_per_second": 120.897, | |
| "eval_steps_per_second": 30.224, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 2.52e-05, | |
| "loss": 0.1874, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "eval_accuracy": 0.893452470340892, | |
| "eval_f1": 0.5619149696320114, | |
| "eval_loss": 0.2326020449399948, | |
| "eval_precision": 0.47293721433726243, | |
| "eval_recall": 0.6921316669600422, | |
| "eval_runtime": 4.241, | |
| "eval_samples_per_second": 117.897, | |
| "eval_steps_per_second": 29.474, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 2.4e-05, | |
| "loss": 0.1847, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.9060920058066777, | |
| "eval_f1": 0.5619039721369932, | |
| "eval_loss": 0.20794397592544556, | |
| "eval_precision": 0.5312010034493572, | |
| "eval_recall": 0.5963738778384088, | |
| "eval_runtime": 4.124, | |
| "eval_samples_per_second": 121.24, | |
| "eval_steps_per_second": 30.31, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 2.2800000000000002e-05, | |
| "loss": 0.1567, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "eval_accuracy": 0.9071932722631025, | |
| "eval_f1": 0.503008186211658, | |
| "eval_loss": 0.23015139997005463, | |
| "eval_precision": 0.5720053835800808, | |
| "eval_recall": 0.4488646365076571, | |
| "eval_runtime": 4.1201, | |
| "eval_samples_per_second": 121.355, | |
| "eval_steps_per_second": 30.339, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 2.16e-05, | |
| "loss": 0.1484, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "eval_accuracy": 0.9038269009360765, | |
| "eval_f1": 0.5580642412882338, | |
| "eval_loss": 0.22997109591960907, | |
| "eval_precision": 0.540785997357992, | |
| "eval_recall": 0.5764830135539518, | |
| "eval_runtime": 4.134, | |
| "eval_samples_per_second": 120.948, | |
| "eval_steps_per_second": 30.237, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "learning_rate": 2.04e-05, | |
| "loss": 0.1388, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "eval_accuracy": 0.9075561896180607, | |
| "eval_f1": 0.5364304509572634, | |
| "eval_loss": 0.2365296632051468, | |
| "eval_precision": 0.5535580524344569, | |
| "eval_recall": 0.5203309276535821, | |
| "eval_runtime": 4.1587, | |
| "eval_samples_per_second": 120.229, | |
| "eval_steps_per_second": 30.057, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "learning_rate": 1.9200000000000003e-05, | |
| "loss": 0.1191, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "eval_accuracy": 0.9067302397757421, | |
| "eval_f1": 0.574726200505476, | |
| "eval_loss": 0.26086461544036865, | |
| "eval_precision": 0.5511391177896268, | |
| "eval_recall": 0.6004224608343601, | |
| "eval_runtime": 4.1544, | |
| "eval_samples_per_second": 120.354, | |
| "eval_steps_per_second": 30.088, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "learning_rate": 1.8e-05, | |
| "loss": 0.1193, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.9059543474996246, | |
| "eval_f1": 0.5809305373525557, | |
| "eval_loss": 0.25283825397491455, | |
| "eval_precision": 0.543281752719473, | |
| "eval_recall": 0.6241858827671185, | |
| "eval_runtime": 4.1474, | |
| "eval_samples_per_second": 120.557, | |
| "eval_steps_per_second": 30.139, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "learning_rate": 1.6800000000000002e-05, | |
| "loss": 0.088, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "eval_accuracy": 0.9037142714121239, | |
| "eval_f1": 0.5845009103142563, | |
| "eval_loss": 0.2839806079864502, | |
| "eval_precision": 0.5310701956271576, | |
| "eval_recall": 0.6498855835240275, | |
| "eval_runtime": 4.1556, | |
| "eval_samples_per_second": 120.318, | |
| "eval_steps_per_second": 30.08, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "learning_rate": 1.56e-05, | |
| "loss": 0.0924, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "eval_accuracy": 0.9085197977674325, | |
| "eval_f1": 0.5776627856834843, | |
| "eval_loss": 0.27629220485687256, | |
| "eval_precision": 0.5662833953331079, | |
| "eval_recall": 0.5895088892800563, | |
| "eval_runtime": 4.1675, | |
| "eval_samples_per_second": 119.975, | |
| "eval_steps_per_second": 29.994, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 5.2, | |
| "learning_rate": 1.44e-05, | |
| "loss": 0.0834, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 5.2, | |
| "eval_accuracy": 0.9037267858036743, | |
| "eval_f1": 0.5866475003992974, | |
| "eval_loss": 0.332010954618454, | |
| "eval_precision": 0.5369098085075281, | |
| "eval_recall": 0.6465411019186763, | |
| "eval_runtime": 4.1738, | |
| "eval_samples_per_second": 119.795, | |
| "eval_steps_per_second": 29.949, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 5.6, | |
| "learning_rate": 1.32e-05, | |
| "loss": 0.0654, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 5.6, | |
| "eval_accuracy": 0.9057416028432698, | |
| "eval_f1": 0.574710687542546, | |
| "eval_loss": 0.32423922419548035, | |
| "eval_precision": 0.5562510294844342, | |
| "eval_recall": 0.5944375990142581, | |
| "eval_runtime": 4.1539, | |
| "eval_samples_per_second": 120.368, | |
| "eval_steps_per_second": 30.092, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "learning_rate": 1.2e-05, | |
| "loss": 0.0689, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.9046403363868448, | |
| "eval_f1": 0.5581112750629285, | |
| "eval_loss": 0.31789475679397583, | |
| "eval_precision": 0.550513698630137, | |
| "eval_recall": 0.5659214926949481, | |
| "eval_runtime": 4.1716, | |
| "eval_samples_per_second": 119.859, | |
| "eval_steps_per_second": 29.965, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 6.4, | |
| "learning_rate": 1.08e-05, | |
| "loss": 0.0498, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 6.4, | |
| "eval_accuracy": 0.9053661710967613, | |
| "eval_f1": 0.5820808768579258, | |
| "eval_loss": 0.38915345072746277, | |
| "eval_precision": 0.5509273813266269, | |
| "eval_recall": 0.6169688435134659, | |
| "eval_runtime": 4.1814, | |
| "eval_samples_per_second": 119.577, | |
| "eval_steps_per_second": 29.894, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 6.8, | |
| "learning_rate": 9.600000000000001e-06, | |
| "loss": 0.0528, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 6.8, | |
| "eval_accuracy": 0.9048155378685488, | |
| "eval_f1": 0.5776866283839212, | |
| "eval_loss": 0.3601633608341217, | |
| "eval_precision": 0.5409433092640958, | |
| "eval_recall": 0.619785249075867, | |
| "eval_runtime": 4.1473, | |
| "eval_samples_per_second": 120.56, | |
| "eval_steps_per_second": 30.14, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 7.2, | |
| "learning_rate": 8.400000000000001e-06, | |
| "loss": 0.0474, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 7.2, | |
| "eval_accuracy": 0.9040396455924313, | |
| "eval_f1": 0.5793253173012691, | |
| "eval_loss": 0.39758625626564026, | |
| "eval_precision": 0.5510722795869738, | |
| "eval_recall": 0.6106319309980637, | |
| "eval_runtime": 4.1737, | |
| "eval_samples_per_second": 119.798, | |
| "eval_steps_per_second": 29.949, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 7.6, | |
| "learning_rate": 7.2e-06, | |
| "loss": 0.039, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 7.6, | |
| "eval_accuracy": 0.9035766131050709, | |
| "eval_f1": 0.5778368499750789, | |
| "eval_loss": 0.4138449728488922, | |
| "eval_precision": 0.5471134182790625, | |
| "eval_recall": 0.6122161591269143, | |
| "eval_runtime": 4.1525, | |
| "eval_samples_per_second": 120.408, | |
| "eval_steps_per_second": 30.102, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "learning_rate": 6e-06, | |
| "loss": 0.0446, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.9039520448515793, | |
| "eval_f1": 0.5882447535579319, | |
| "eval_loss": 0.408151775598526, | |
| "eval_precision": 0.5414446417998816, | |
| "eval_recall": 0.6439007217039253, | |
| "eval_runtime": 4.1562, | |
| "eval_samples_per_second": 120.303, | |
| "eval_steps_per_second": 30.076, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 8.4, | |
| "learning_rate": 4.800000000000001e-06, | |
| "loss": 0.0333, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 8.4, | |
| "eval_accuracy": 0.9046528507783952, | |
| "eval_f1": 0.5720617062984743, | |
| "eval_loss": 0.4318484365940094, | |
| "eval_precision": 0.5545274289491078, | |
| "eval_recall": 0.5907410667136067, | |
| "eval_runtime": 4.1724, | |
| "eval_samples_per_second": 119.834, | |
| "eval_steps_per_second": 29.959, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 8.8, | |
| "learning_rate": 3.6e-06, | |
| "loss": 0.0327, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 8.8, | |
| "eval_accuracy": 0.9054913150122641, | |
| "eval_f1": 0.5734657499363381, | |
| "eval_loss": 0.4232546091079712, | |
| "eval_precision": 0.5537704918032786, | |
| "eval_recall": 0.5946136243619081, | |
| "eval_runtime": 4.1536, | |
| "eval_samples_per_second": 120.378, | |
| "eval_steps_per_second": 30.095, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 9.2, | |
| "learning_rate": 2.4000000000000003e-06, | |
| "loss": 0.03, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 9.2, | |
| "eval_accuracy": 0.9049782249587025, | |
| "eval_f1": 0.5769523005487548, | |
| "eval_loss": 0.44003215432167053, | |
| "eval_precision": 0.5543478260869565, | |
| "eval_recall": 0.6014786129202605, | |
| "eval_runtime": 4.2605, | |
| "eval_samples_per_second": 117.358, | |
| "eval_steps_per_second": 29.339, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 9.6, | |
| "learning_rate": 1.2000000000000002e-06, | |
| "loss": 0.0286, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 9.6, | |
| "eval_accuracy": 0.9048280522600991, | |
| "eval_f1": 0.5807528586929305, | |
| "eval_loss": 0.4442707598209381, | |
| "eval_precision": 0.5522222222222222, | |
| "eval_recall": 0.6123921844745643, | |
| "eval_runtime": 4.3652, | |
| "eval_samples_per_second": 114.542, | |
| "eval_steps_per_second": 28.636, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "learning_rate": 0.0, | |
| "loss": 0.0261, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.9050407969164539, | |
| "eval_f1": 0.5811535881958416, | |
| "eval_loss": 0.4490407407283783, | |
| "eval_precision": 0.5548263166319833, | |
| "eval_recall": 0.6101038549551135, | |
| "eval_runtime": 4.2364, | |
| "eval_samples_per_second": 118.025, | |
| "eval_steps_per_second": 29.506, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "step": 2500, | |
| "total_flos": 2612991191040000.0, | |
| "train_loss": 0.10352115373611451, | |
| "train_runtime": 364.2147, | |
| "train_samples_per_second": 27.456, | |
| "train_steps_per_second": 6.864 | |
| } | |
| ], | |
| "max_steps": 2500, | |
| "num_train_epochs": 10, | |
| "total_flos": 2612991191040000.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |