| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 5.0, | |
| "eval_steps": 50, | |
| "global_step": 1715, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.15, | |
| "eval_accuracy": 0.35737704918032787, | |
| "eval_f1": 0.07522429261559696, | |
| "eval_loss": 1.667272925376892, | |
| "eval_precision": 0.05105386416861827, | |
| "eval_recall": 0.14285714285714285, | |
| "eval_runtime": 25.9688, | |
| "eval_samples_per_second": 11.745, | |
| "eval_steps_per_second": 1.502, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "eval_accuracy": 0.5016393442622951, | |
| "eval_f1": 0.2412952804098472, | |
| "eval_loss": 1.472964882850647, | |
| "eval_precision": 0.22701762523191094, | |
| "eval_recall": 0.2760011649919907, | |
| "eval_runtime": 25.1854, | |
| "eval_samples_per_second": 12.11, | |
| "eval_steps_per_second": 1.549, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "eval_accuracy": 0.5672131147540984, | |
| "eval_f1": 0.35741119048011755, | |
| "eval_loss": 1.2623624801635742, | |
| "eval_precision": 0.46281513214918163, | |
| "eval_recall": 0.34605526990848096, | |
| "eval_runtime": 24.7658, | |
| "eval_samples_per_second": 12.315, | |
| "eval_steps_per_second": 1.575, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "eval_accuracy": 0.5639344262295082, | |
| "eval_f1": 0.3512701619659464, | |
| "eval_loss": 1.2795928716659546, | |
| "eval_precision": 0.48283711313562055, | |
| "eval_recall": 0.34593069478390576, | |
| "eval_runtime": 26.4061, | |
| "eval_samples_per_second": 11.55, | |
| "eval_steps_per_second": 1.477, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "eval_accuracy": 0.639344262295082, | |
| "eval_f1": 0.42203695116777995, | |
| "eval_loss": 1.0558925867080688, | |
| "eval_precision": 0.48377222428420436, | |
| "eval_recall": 0.4281083079248217, | |
| "eval_runtime": 24.1696, | |
| "eval_samples_per_second": 12.619, | |
| "eval_steps_per_second": 1.614, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "eval_accuracy": 0.659016393442623, | |
| "eval_f1": 0.5624420508569995, | |
| "eval_loss": 0.9904302954673767, | |
| "eval_precision": 0.6148969264343321, | |
| "eval_recall": 0.5786355212960718, | |
| "eval_runtime": 24.8748, | |
| "eval_samples_per_second": 12.261, | |
| "eval_steps_per_second": 1.568, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "eval_accuracy": 0.5967213114754099, | |
| "eval_f1": 0.42053130606223654, | |
| "eval_loss": 1.1112089157104492, | |
| "eval_precision": 0.5042065435600455, | |
| "eval_recall": 0.42027804344318104, | |
| "eval_runtime": 26.735, | |
| "eval_samples_per_second": 11.408, | |
| "eval_steps_per_second": 1.459, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "eval_accuracy": 0.6622950819672131, | |
| "eval_f1": 0.540005974333324, | |
| "eval_loss": 1.0169768333435059, | |
| "eval_precision": 0.6707201514365236, | |
| "eval_recall": 0.5058656801776068, | |
| "eval_runtime": 24.681, | |
| "eval_samples_per_second": 12.358, | |
| "eval_steps_per_second": 1.58, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "eval_accuracy": 0.7049180327868853, | |
| "eval_f1": 0.5548474859699348, | |
| "eval_loss": 0.963288426399231, | |
| "eval_precision": 0.6737802840434419, | |
| "eval_recall": 0.5320264412007532, | |
| "eval_runtime": 85.8855, | |
| "eval_samples_per_second": 3.551, | |
| "eval_steps_per_second": 0.454, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "grad_norm": 6.387063980102539, | |
| "learning_rate": 3.542274052478135e-05, | |
| "loss": 1.2491, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "eval_accuracy": 0.740983606557377, | |
| "eval_f1": 0.6032578704144542, | |
| "eval_loss": 0.8153039216995239, | |
| "eval_precision": 0.6389329389702317, | |
| "eval_recall": 0.5814238802770912, | |
| "eval_runtime": 25.7584, | |
| "eval_samples_per_second": 11.841, | |
| "eval_steps_per_second": 1.514, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "eval_accuracy": 0.7311475409836066, | |
| "eval_f1": 0.59968879894362, | |
| "eval_loss": 0.8648873567581177, | |
| "eval_precision": 0.7235630919034282, | |
| "eval_recall": 0.5553548759970778, | |
| "eval_runtime": 26.1622, | |
| "eval_samples_per_second": 11.658, | |
| "eval_steps_per_second": 1.491, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "eval_accuracy": 0.780327868852459, | |
| "eval_f1": 0.6441369205133565, | |
| "eval_loss": 0.7085286974906921, | |
| "eval_precision": 0.6888112350883506, | |
| "eval_recall": 0.6200901173378238, | |
| "eval_runtime": 25.1463, | |
| "eval_samples_per_second": 12.129, | |
| "eval_steps_per_second": 1.551, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "eval_accuracy": 0.7639344262295082, | |
| "eval_f1": 0.6348228528062524, | |
| "eval_loss": 0.778355062007904, | |
| "eval_precision": 0.6448629451780713, | |
| "eval_recall": 0.6364373118960275, | |
| "eval_runtime": 25.0645, | |
| "eval_samples_per_second": 12.169, | |
| "eval_steps_per_second": 1.556, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "eval_accuracy": 0.7475409836065574, | |
| "eval_f1": 0.6299370987134999, | |
| "eval_loss": 0.7480295896530151, | |
| "eval_precision": 0.6783989867133294, | |
| "eval_recall": 0.6207732331585543, | |
| "eval_runtime": 24.3631, | |
| "eval_samples_per_second": 12.519, | |
| "eval_steps_per_second": 1.601, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "eval_accuracy": 0.7672131147540984, | |
| "eval_f1": 0.6541376568128562, | |
| "eval_loss": 0.7434977293014526, | |
| "eval_precision": 0.7108263630655515, | |
| "eval_recall": 0.6220120665074793, | |
| "eval_runtime": 25.2918, | |
| "eval_samples_per_second": 12.059, | |
| "eval_steps_per_second": 1.542, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "eval_accuracy": 0.7311475409836066, | |
| "eval_f1": 0.6427856362037365, | |
| "eval_loss": 0.9037203192710876, | |
| "eval_precision": 0.7008431166108425, | |
| "eval_recall": 0.6458018130036478, | |
| "eval_runtime": 24.5352, | |
| "eval_samples_per_second": 12.431, | |
| "eval_steps_per_second": 1.59, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "eval_accuracy": 0.7508196721311475, | |
| "eval_f1": 0.6470554102382174, | |
| "eval_loss": 0.7644699215888977, | |
| "eval_precision": 0.6814040333813435, | |
| "eval_recall": 0.6477453030205323, | |
| "eval_runtime": 24.2035, | |
| "eval_samples_per_second": 12.601, | |
| "eval_steps_per_second": 1.611, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "eval_accuracy": 0.8, | |
| "eval_f1": 0.677888944499949, | |
| "eval_loss": 0.7002153396606445, | |
| "eval_precision": 0.7160759040458288, | |
| "eval_recall": 0.6559052650795769, | |
| "eval_runtime": 24.4891, | |
| "eval_samples_per_second": 12.455, | |
| "eval_steps_per_second": 1.593, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "eval_accuracy": 0.8098360655737705, | |
| "eval_f1": 0.6772457805812352, | |
| "eval_loss": 0.6718072891235352, | |
| "eval_precision": 0.6894471952583333, | |
| "eval_recall": 0.6719617414571543, | |
| "eval_runtime": 25.7261, | |
| "eval_samples_per_second": 11.856, | |
| "eval_steps_per_second": 1.516, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "grad_norm": 6.155330181121826, | |
| "learning_rate": 2.0845481049562683e-05, | |
| "loss": 0.6665, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "eval_accuracy": 0.7868852459016393, | |
| "eval_f1": 0.6645166174577939, | |
| "eval_loss": 0.6745351552963257, | |
| "eval_precision": 0.6831230788775687, | |
| "eval_recall": 0.6611539278970471, | |
| "eval_runtime": 23.8162, | |
| "eval_samples_per_second": 12.806, | |
| "eval_steps_per_second": 1.638, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 3.06, | |
| "eval_accuracy": 0.8, | |
| "eval_f1": 0.6707482450301798, | |
| "eval_loss": 0.6912632584571838, | |
| "eval_precision": 0.7038907777116936, | |
| "eval_recall": 0.6636155184320323, | |
| "eval_runtime": 25.3425, | |
| "eval_samples_per_second": 12.035, | |
| "eval_steps_per_second": 1.539, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 3.21, | |
| "eval_accuracy": 0.7344262295081967, | |
| "eval_f1": 0.6124227128896254, | |
| "eval_loss": 0.839020848274231, | |
| "eval_precision": 0.6036382548711792, | |
| "eval_recall": 0.6331067003085352, | |
| "eval_runtime": 25.3402, | |
| "eval_samples_per_second": 12.036, | |
| "eval_steps_per_second": 1.539, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 3.35, | |
| "eval_accuracy": 0.7901639344262295, | |
| "eval_f1": 0.6632550969550687, | |
| "eval_loss": 0.7742147445678711, | |
| "eval_precision": 0.6782513190376637, | |
| "eval_recall": 0.6595217659437844, | |
| "eval_runtime": 24.3329, | |
| "eval_samples_per_second": 12.534, | |
| "eval_steps_per_second": 1.603, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 3.5, | |
| "eval_accuracy": 0.8131147540983606, | |
| "eval_f1": 0.6727344613312225, | |
| "eval_loss": 0.6753232479095459, | |
| "eval_precision": 0.6878592989192068, | |
| "eval_recall": 0.6654702186812278, | |
| "eval_runtime": 24.7418, | |
| "eval_samples_per_second": 12.327, | |
| "eval_steps_per_second": 1.576, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 3.64, | |
| "eval_accuracy": 0.760655737704918, | |
| "eval_f1": 0.6525488286572031, | |
| "eval_loss": 0.8542010188102722, | |
| "eval_precision": 0.6985802437047138, | |
| "eval_recall": 0.6503794246913512, | |
| "eval_runtime": 23.8367, | |
| "eval_samples_per_second": 12.795, | |
| "eval_steps_per_second": 1.636, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 3.79, | |
| "eval_accuracy": 0.8065573770491803, | |
| "eval_f1": 0.6740742285136359, | |
| "eval_loss": 0.7735392451286316, | |
| "eval_precision": 0.6767778649921506, | |
| "eval_recall": 0.676755828820049, | |
| "eval_runtime": 24.4555, | |
| "eval_samples_per_second": 12.472, | |
| "eval_steps_per_second": 1.595, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 3.94, | |
| "eval_accuracy": 0.8131147540983606, | |
| "eval_f1": 0.6832302669636724, | |
| "eval_loss": 0.6795472502708435, | |
| "eval_precision": 0.7133861439312567, | |
| "eval_recall": 0.6736156637532783, | |
| "eval_runtime": 25.0647, | |
| "eval_samples_per_second": 12.169, | |
| "eval_steps_per_second": 1.556, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 4.08, | |
| "eval_accuracy": 0.7967213114754098, | |
| "eval_f1": 0.6579730420842168, | |
| "eval_loss": 0.7954936623573303, | |
| "eval_precision": 0.6799340904944353, | |
| "eval_recall": 0.654890922780831, | |
| "eval_runtime": 25.0454, | |
| "eval_samples_per_second": 12.178, | |
| "eval_steps_per_second": 1.557, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 4.23, | |
| "eval_accuracy": 0.8229508196721311, | |
| "eval_f1": 0.6892471358587405, | |
| "eval_loss": 0.7999194264411926, | |
| "eval_precision": 0.7156453634629862, | |
| "eval_recall": 0.6809495048027159, | |
| "eval_runtime": 25.4511, | |
| "eval_samples_per_second": 11.984, | |
| "eval_steps_per_second": 1.532, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 4.37, | |
| "grad_norm": 22.507097244262695, | |
| "learning_rate": 6.268221574344024e-06, | |
| "loss": 0.3776, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 4.37, | |
| "eval_accuracy": 0.7934426229508197, | |
| "eval_f1": 0.6679894092876133, | |
| "eval_loss": 0.9446977376937866, | |
| "eval_precision": 0.686414129348912, | |
| "eval_recall": 0.6591507199305365, | |
| "eval_runtime": 25.6463, | |
| "eval_samples_per_second": 11.893, | |
| "eval_steps_per_second": 1.521, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 4.52, | |
| "eval_accuracy": 0.8065573770491803, | |
| "eval_f1": 0.6705779982361941, | |
| "eval_loss": 0.9103355407714844, | |
| "eval_precision": 0.6804748603360347, | |
| "eval_recall": 0.6682874981040119, | |
| "eval_runtime": 24.8615, | |
| "eval_samples_per_second": 12.268, | |
| "eval_steps_per_second": 1.569, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 4.66, | |
| "eval_accuracy": 0.7868852459016393, | |
| "eval_f1": 0.6581300883025021, | |
| "eval_loss": 0.9052944779396057, | |
| "eval_precision": 0.6659523069247808, | |
| "eval_recall": 0.6643693723968953, | |
| "eval_runtime": 24.6745, | |
| "eval_samples_per_second": 12.361, | |
| "eval_steps_per_second": 1.581, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 4.81, | |
| "eval_accuracy": 0.819672131147541, | |
| "eval_f1": 0.6860677167570124, | |
| "eval_loss": 0.85903000831604, | |
| "eval_precision": 0.7103231643472162, | |
| "eval_recall": 0.678005387179699, | |
| "eval_runtime": 25.3638, | |
| "eval_samples_per_second": 12.025, | |
| "eval_steps_per_second": 1.538, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 4.96, | |
| "eval_accuracy": 0.8229508196721311, | |
| "eval_f1": 0.6861351260633056, | |
| "eval_loss": 0.8571685552597046, | |
| "eval_precision": 0.7088281080916173, | |
| "eval_recall": 0.6787861482815611, | |
| "eval_runtime": 23.7505, | |
| "eval_samples_per_second": 12.842, | |
| "eval_steps_per_second": 1.642, | |
| "step": 1700 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 1715, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 500, | |
| "total_flos": 1.278165457700352e+16, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |