{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "eval_steps": 50, "global_step": 1715, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.15, "eval_accuracy": 0.35737704918032787, "eval_f1": 0.07522429261559696, "eval_loss": 1.667272925376892, "eval_precision": 0.05105386416861827, "eval_recall": 0.14285714285714285, "eval_runtime": 25.9688, "eval_samples_per_second": 11.745, "eval_steps_per_second": 1.502, "step": 50 }, { "epoch": 0.29, "eval_accuracy": 0.5016393442622951, "eval_f1": 0.2412952804098472, "eval_loss": 1.472964882850647, "eval_precision": 0.22701762523191094, "eval_recall": 0.2760011649919907, "eval_runtime": 25.1854, "eval_samples_per_second": 12.11, "eval_steps_per_second": 1.549, "step": 100 }, { "epoch": 0.44, "eval_accuracy": 0.5672131147540984, "eval_f1": 0.35741119048011755, "eval_loss": 1.2623624801635742, "eval_precision": 0.46281513214918163, "eval_recall": 0.34605526990848096, "eval_runtime": 24.7658, "eval_samples_per_second": 12.315, "eval_steps_per_second": 1.575, "step": 150 }, { "epoch": 0.58, "eval_accuracy": 0.5639344262295082, "eval_f1": 0.3512701619659464, "eval_loss": 1.2795928716659546, "eval_precision": 0.48283711313562055, "eval_recall": 0.34593069478390576, "eval_runtime": 26.4061, "eval_samples_per_second": 11.55, "eval_steps_per_second": 1.477, "step": 200 }, { "epoch": 0.73, "eval_accuracy": 0.639344262295082, "eval_f1": 0.42203695116777995, "eval_loss": 1.0558925867080688, "eval_precision": 0.48377222428420436, "eval_recall": 0.4281083079248217, "eval_runtime": 24.1696, "eval_samples_per_second": 12.619, "eval_steps_per_second": 1.614, "step": 250 }, { "epoch": 0.87, "eval_accuracy": 0.659016393442623, "eval_f1": 0.5624420508569995, "eval_loss": 0.9904302954673767, "eval_precision": 0.6148969264343321, "eval_recall": 0.5786355212960718, "eval_runtime": 24.8748, "eval_samples_per_second": 12.261, "eval_steps_per_second": 1.568, "step": 300 }, { "epoch": 1.02, "eval_accuracy": 0.5967213114754099, "eval_f1": 0.42053130606223654, "eval_loss": 1.1112089157104492, "eval_precision": 0.5042065435600455, "eval_recall": 0.42027804344318104, "eval_runtime": 26.735, "eval_samples_per_second": 11.408, "eval_steps_per_second": 1.459, "step": 350 }, { "epoch": 1.17, "eval_accuracy": 0.6622950819672131, "eval_f1": 0.540005974333324, "eval_loss": 1.0169768333435059, "eval_precision": 0.6707201514365236, "eval_recall": 0.5058656801776068, "eval_runtime": 24.681, "eval_samples_per_second": 12.358, "eval_steps_per_second": 1.58, "step": 400 }, { "epoch": 1.31, "eval_accuracy": 0.7049180327868853, "eval_f1": 0.5548474859699348, "eval_loss": 0.963288426399231, "eval_precision": 0.6737802840434419, "eval_recall": 0.5320264412007532, "eval_runtime": 85.8855, "eval_samples_per_second": 3.551, "eval_steps_per_second": 0.454, "step": 450 }, { "epoch": 1.46, "grad_norm": 6.387063980102539, "learning_rate": 3.542274052478135e-05, "loss": 1.2491, "step": 500 }, { "epoch": 1.46, "eval_accuracy": 0.740983606557377, "eval_f1": 0.6032578704144542, "eval_loss": 0.8153039216995239, "eval_precision": 0.6389329389702317, "eval_recall": 0.5814238802770912, "eval_runtime": 25.7584, "eval_samples_per_second": 11.841, "eval_steps_per_second": 1.514, "step": 500 }, { "epoch": 1.6, "eval_accuracy": 0.7311475409836066, "eval_f1": 0.59968879894362, "eval_loss": 0.8648873567581177, "eval_precision": 0.7235630919034282, "eval_recall": 0.5553548759970778, "eval_runtime": 26.1622, "eval_samples_per_second": 11.658, "eval_steps_per_second": 1.491, "step": 550 }, { "epoch": 1.75, "eval_accuracy": 0.780327868852459, "eval_f1": 0.6441369205133565, "eval_loss": 0.7085286974906921, "eval_precision": 0.6888112350883506, "eval_recall": 0.6200901173378238, "eval_runtime": 25.1463, "eval_samples_per_second": 12.129, "eval_steps_per_second": 1.551, "step": 600 }, { "epoch": 1.9, "eval_accuracy": 0.7639344262295082, "eval_f1": 0.6348228528062524, "eval_loss": 0.778355062007904, "eval_precision": 0.6448629451780713, "eval_recall": 0.6364373118960275, "eval_runtime": 25.0645, "eval_samples_per_second": 12.169, "eval_steps_per_second": 1.556, "step": 650 }, { "epoch": 2.04, "eval_accuracy": 0.7475409836065574, "eval_f1": 0.6299370987134999, "eval_loss": 0.7480295896530151, "eval_precision": 0.6783989867133294, "eval_recall": 0.6207732331585543, "eval_runtime": 24.3631, "eval_samples_per_second": 12.519, "eval_steps_per_second": 1.601, "step": 700 }, { "epoch": 2.19, "eval_accuracy": 0.7672131147540984, "eval_f1": 0.6541376568128562, "eval_loss": 0.7434977293014526, "eval_precision": 0.7108263630655515, "eval_recall": 0.6220120665074793, "eval_runtime": 25.2918, "eval_samples_per_second": 12.059, "eval_steps_per_second": 1.542, "step": 750 }, { "epoch": 2.33, "eval_accuracy": 0.7311475409836066, "eval_f1": 0.6427856362037365, "eval_loss": 0.9037203192710876, "eval_precision": 0.7008431166108425, "eval_recall": 0.6458018130036478, "eval_runtime": 24.5352, "eval_samples_per_second": 12.431, "eval_steps_per_second": 1.59, "step": 800 }, { "epoch": 2.48, "eval_accuracy": 0.7508196721311475, "eval_f1": 0.6470554102382174, "eval_loss": 0.7644699215888977, "eval_precision": 0.6814040333813435, "eval_recall": 0.6477453030205323, "eval_runtime": 24.2035, "eval_samples_per_second": 12.601, "eval_steps_per_second": 1.611, "step": 850 }, { "epoch": 2.62, "eval_accuracy": 0.8, "eval_f1": 0.677888944499949, "eval_loss": 0.7002153396606445, "eval_precision": 0.7160759040458288, "eval_recall": 0.6559052650795769, "eval_runtime": 24.4891, "eval_samples_per_second": 12.455, "eval_steps_per_second": 1.593, "step": 900 }, { "epoch": 2.77, "eval_accuracy": 0.8098360655737705, "eval_f1": 0.6772457805812352, "eval_loss": 0.6718072891235352, "eval_precision": 0.6894471952583333, "eval_recall": 0.6719617414571543, "eval_runtime": 25.7261, "eval_samples_per_second": 11.856, "eval_steps_per_second": 1.516, "step": 950 }, { "epoch": 2.92, "grad_norm": 6.155330181121826, "learning_rate": 2.0845481049562683e-05, "loss": 0.6665, "step": 1000 }, { "epoch": 2.92, "eval_accuracy": 0.7868852459016393, "eval_f1": 0.6645166174577939, "eval_loss": 0.6745351552963257, "eval_precision": 0.6831230788775687, "eval_recall": 0.6611539278970471, "eval_runtime": 23.8162, "eval_samples_per_second": 12.806, "eval_steps_per_second": 1.638, "step": 1000 }, { "epoch": 3.06, "eval_accuracy": 0.8, "eval_f1": 0.6707482450301798, "eval_loss": 0.6912632584571838, "eval_precision": 0.7038907777116936, "eval_recall": 0.6636155184320323, "eval_runtime": 25.3425, "eval_samples_per_second": 12.035, "eval_steps_per_second": 1.539, "step": 1050 }, { "epoch": 3.21, "eval_accuracy": 0.7344262295081967, "eval_f1": 0.6124227128896254, "eval_loss": 0.839020848274231, "eval_precision": 0.6036382548711792, "eval_recall": 0.6331067003085352, "eval_runtime": 25.3402, "eval_samples_per_second": 12.036, "eval_steps_per_second": 1.539, "step": 1100 }, { "epoch": 3.35, "eval_accuracy": 0.7901639344262295, "eval_f1": 0.6632550969550687, "eval_loss": 0.7742147445678711, "eval_precision": 0.6782513190376637, "eval_recall": 0.6595217659437844, "eval_runtime": 24.3329, "eval_samples_per_second": 12.534, "eval_steps_per_second": 1.603, "step": 1150 }, { "epoch": 3.5, "eval_accuracy": 0.8131147540983606, "eval_f1": 0.6727344613312225, "eval_loss": 0.6753232479095459, "eval_precision": 0.6878592989192068, "eval_recall": 0.6654702186812278, "eval_runtime": 24.7418, "eval_samples_per_second": 12.327, "eval_steps_per_second": 1.576, "step": 1200 }, { "epoch": 3.64, "eval_accuracy": 0.760655737704918, "eval_f1": 0.6525488286572031, "eval_loss": 0.8542010188102722, "eval_precision": 0.6985802437047138, "eval_recall": 0.6503794246913512, "eval_runtime": 23.8367, "eval_samples_per_second": 12.795, "eval_steps_per_second": 1.636, "step": 1250 }, { "epoch": 3.79, "eval_accuracy": 0.8065573770491803, "eval_f1": 0.6740742285136359, "eval_loss": 0.7735392451286316, "eval_precision": 0.6767778649921506, "eval_recall": 0.676755828820049, "eval_runtime": 24.4555, "eval_samples_per_second": 12.472, "eval_steps_per_second": 1.595, "step": 1300 }, { "epoch": 3.94, "eval_accuracy": 0.8131147540983606, "eval_f1": 0.6832302669636724, "eval_loss": 0.6795472502708435, "eval_precision": 0.7133861439312567, "eval_recall": 0.6736156637532783, "eval_runtime": 25.0647, "eval_samples_per_second": 12.169, "eval_steps_per_second": 1.556, "step": 1350 }, { "epoch": 4.08, "eval_accuracy": 0.7967213114754098, "eval_f1": 0.6579730420842168, "eval_loss": 0.7954936623573303, "eval_precision": 0.6799340904944353, "eval_recall": 0.654890922780831, "eval_runtime": 25.0454, "eval_samples_per_second": 12.178, "eval_steps_per_second": 1.557, "step": 1400 }, { "epoch": 4.23, "eval_accuracy": 0.8229508196721311, "eval_f1": 0.6892471358587405, "eval_loss": 0.7999194264411926, "eval_precision": 0.7156453634629862, "eval_recall": 0.6809495048027159, "eval_runtime": 25.4511, "eval_samples_per_second": 11.984, "eval_steps_per_second": 1.532, "step": 1450 }, { "epoch": 4.37, "grad_norm": 22.507097244262695, "learning_rate": 6.268221574344024e-06, "loss": 0.3776, "step": 1500 }, { "epoch": 4.37, "eval_accuracy": 0.7934426229508197, "eval_f1": 0.6679894092876133, "eval_loss": 0.9446977376937866, "eval_precision": 0.686414129348912, "eval_recall": 0.6591507199305365, "eval_runtime": 25.6463, "eval_samples_per_second": 11.893, "eval_steps_per_second": 1.521, "step": 1500 }, { "epoch": 4.52, "eval_accuracy": 0.8065573770491803, "eval_f1": 0.6705779982361941, "eval_loss": 0.9103355407714844, "eval_precision": 0.6804748603360347, "eval_recall": 0.6682874981040119, "eval_runtime": 24.8615, "eval_samples_per_second": 12.268, "eval_steps_per_second": 1.569, "step": 1550 }, { "epoch": 4.66, "eval_accuracy": 0.7868852459016393, "eval_f1": 0.6581300883025021, "eval_loss": 0.9052944779396057, "eval_precision": 0.6659523069247808, "eval_recall": 0.6643693723968953, "eval_runtime": 24.6745, "eval_samples_per_second": 12.361, "eval_steps_per_second": 1.581, "step": 1600 }, { "epoch": 4.81, "eval_accuracy": 0.819672131147541, "eval_f1": 0.6860677167570124, "eval_loss": 0.85903000831604, "eval_precision": 0.7103231643472162, "eval_recall": 0.678005387179699, "eval_runtime": 25.3638, "eval_samples_per_second": 12.025, "eval_steps_per_second": 1.538, "step": 1650 }, { "epoch": 4.96, "eval_accuracy": 0.8229508196721311, "eval_f1": 0.6861351260633056, "eval_loss": 0.8571685552597046, "eval_precision": 0.7088281080916173, "eval_recall": 0.6787861482815611, "eval_runtime": 23.7505, "eval_samples_per_second": 12.842, "eval_steps_per_second": 1.642, "step": 1700 } ], "logging_steps": 500, "max_steps": 1715, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "total_flos": 1.278165457700352e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }