| { | |
| "best_metric": 0.4396501457725948, | |
| "best_model_checkpoint": "/content/our_data/checkpoint-11500", | |
| "epoch": 10.0, | |
| "eval_steps": 500, | |
| "global_step": 12410, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 1.91941982272361e-05, | |
| "loss": 1.7927, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "eval_accuracy": 0.6139180171591992, | |
| "eval_f1": 0.08594319009468317, | |
| "eval_loss": 1.5607472658157349, | |
| "eval_precision": 0.09562398703403566, | |
| "eval_recall": 0.07804232804232804, | |
| "eval_runtime": 1.9857, | |
| "eval_samples_per_second": 153.092, | |
| "eval_steps_per_second": 76.546, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 1.83883964544722e-05, | |
| "loss": 1.3551, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "eval_accuracy": 0.6495471877979028, | |
| "eval_f1": 0.22113022113022113, | |
| "eval_loss": 1.3530131578445435, | |
| "eval_precision": 0.20642201834862386, | |
| "eval_recall": 0.23809523809523808, | |
| "eval_runtime": 1.4676, | |
| "eval_samples_per_second": 207.142, | |
| "eval_steps_per_second": 103.571, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 1.75825946817083e-05, | |
| "loss": 1.0432, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "eval_accuracy": 0.6739752144899904, | |
| "eval_f1": 0.26135726303982054, | |
| "eval_loss": 1.310741662979126, | |
| "eval_precision": 0.22687439143135346, | |
| "eval_recall": 0.3082010582010582, | |
| "eval_runtime": 1.4924, | |
| "eval_samples_per_second": 203.693, | |
| "eval_steps_per_second": 101.847, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 1.67767929089444e-05, | |
| "loss": 0.8468, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "eval_accuracy": 0.6767159199237369, | |
| "eval_f1": 0.28364849833147937, | |
| "eval_loss": 1.249666690826416, | |
| "eval_precision": 0.24472168905950095, | |
| "eval_recall": 0.3373015873015873, | |
| "eval_runtime": 1.9366, | |
| "eval_samples_per_second": 156.977, | |
| "eval_steps_per_second": 78.489, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 1.59709911361805e-05, | |
| "loss": 0.7775, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "eval_accuracy": 0.6938751191611058, | |
| "eval_f1": 0.3260115606936416, | |
| "eval_loss": 1.2709521055221558, | |
| "eval_precision": 0.28952772073921973, | |
| "eval_recall": 0.373015873015873, | |
| "eval_runtime": 2.1473, | |
| "eval_samples_per_second": 141.572, | |
| "eval_steps_per_second": 70.786, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 1.5165189363416601e-05, | |
| "loss": 0.5374, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "eval_accuracy": 0.7043612964728313, | |
| "eval_f1": 0.34498308906426156, | |
| "eval_loss": 1.3020099401474, | |
| "eval_precision": 0.3005893909626719, | |
| "eval_recall": 0.40476190476190477, | |
| "eval_runtime": 1.4555, | |
| "eval_samples_per_second": 208.867, | |
| "eval_steps_per_second": 104.433, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 1.4359387590652701e-05, | |
| "loss": 0.5071, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "eval_accuracy": 0.7080552907530981, | |
| "eval_f1": 0.34189944134078215, | |
| "eval_loss": 1.2613815069198608, | |
| "eval_precision": 0.29593810444874274, | |
| "eval_recall": 0.40476190476190477, | |
| "eval_runtime": 1.655, | |
| "eval_samples_per_second": 183.688, | |
| "eval_steps_per_second": 91.844, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 3.22, | |
| "learning_rate": 1.35535858178888e-05, | |
| "loss": 0.4237, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 3.22, | |
| "eval_accuracy": 0.7166348903717826, | |
| "eval_f1": 0.38166189111747856, | |
| "eval_loss": 1.3250571489334106, | |
| "eval_precision": 0.3367037411526795, | |
| "eval_recall": 0.44047619047619047, | |
| "eval_runtime": 1.9058, | |
| "eval_samples_per_second": 159.516, | |
| "eval_steps_per_second": 79.758, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 3.63, | |
| "learning_rate": 1.27477840451249e-05, | |
| "loss": 0.3597, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 3.63, | |
| "eval_accuracy": 0.7124642516682554, | |
| "eval_f1": 0.38974358974358975, | |
| "eval_loss": 1.3852567672729492, | |
| "eval_precision": 0.34234234234234234, | |
| "eval_recall": 0.4523809523809524, | |
| "eval_runtime": 1.478, | |
| "eval_samples_per_second": 205.686, | |
| "eval_steps_per_second": 102.843, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 4.03, | |
| "learning_rate": 1.1941982272361e-05, | |
| "loss": 0.3632, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 4.03, | |
| "eval_accuracy": 0.7127025738798856, | |
| "eval_f1": 0.39836924868957485, | |
| "eval_loss": 1.415600061416626, | |
| "eval_precision": 0.3558792924037461, | |
| "eval_recall": 0.4523809523809524, | |
| "eval_runtime": 1.5214, | |
| "eval_samples_per_second": 199.817, | |
| "eval_steps_per_second": 99.908, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 4.43, | |
| "learning_rate": 1.11361804995971e-05, | |
| "loss": 0.2589, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 4.43, | |
| "eval_accuracy": 0.717349857006673, | |
| "eval_f1": 0.40180586907449206, | |
| "eval_loss": 1.4472498893737793, | |
| "eval_precision": 0.35039370078740156, | |
| "eval_recall": 0.4708994708994709, | |
| "eval_runtime": 1.4818, | |
| "eval_samples_per_second": 205.152, | |
| "eval_steps_per_second": 102.576, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 4.83, | |
| "learning_rate": 1.0330378726833199e-05, | |
| "loss": 0.323, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 4.83, | |
| "eval_accuracy": 0.7222354623450906, | |
| "eval_f1": 0.39455782312925164, | |
| "eval_loss": 1.399746298789978, | |
| "eval_precision": 0.34523809523809523, | |
| "eval_recall": 0.4603174603174603, | |
| "eval_runtime": 1.4893, | |
| "eval_samples_per_second": 204.116, | |
| "eval_steps_per_second": 102.058, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 5.24, | |
| "learning_rate": 9.5245769540693e-06, | |
| "loss": 0.2167, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 5.24, | |
| "eval_accuracy": 0.7233079122974261, | |
| "eval_f1": 0.39499146272054636, | |
| "eval_loss": 1.519398808479309, | |
| "eval_precision": 0.34665334665334663, | |
| "eval_recall": 0.458994708994709, | |
| "eval_runtime": 1.803, | |
| "eval_samples_per_second": 168.606, | |
| "eval_steps_per_second": 84.303, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 5.64, | |
| "learning_rate": 8.7187751813054e-06, | |
| "loss": 0.2363, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 5.64, | |
| "eval_accuracy": 0.7222354623450906, | |
| "eval_f1": 0.4024802705749719, | |
| "eval_loss": 1.5585495233535767, | |
| "eval_precision": 0.3506876227897839, | |
| "eval_recall": 0.4722222222222222, | |
| "eval_runtime": 2.0739, | |
| "eval_samples_per_second": 146.582, | |
| "eval_steps_per_second": 73.291, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 6.04, | |
| "learning_rate": 7.9129734085415e-06, | |
| "loss": 0.2721, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 6.04, | |
| "eval_accuracy": 0.7210438512869399, | |
| "eval_f1": 0.4208715596330276, | |
| "eval_loss": 1.5420488119125366, | |
| "eval_precision": 0.3714574898785425, | |
| "eval_recall": 0.48544973544973546, | |
| "eval_runtime": 1.4879, | |
| "eval_samples_per_second": 204.311, | |
| "eval_steps_per_second": 102.155, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 6.45, | |
| "learning_rate": 7.107171635777599e-06, | |
| "loss": 0.2073, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 6.45, | |
| "eval_accuracy": 0.7147283126787417, | |
| "eval_f1": 0.40914158305462656, | |
| "eval_loss": 1.5877846479415894, | |
| "eval_precision": 0.3535645472061657, | |
| "eval_recall": 0.48544973544973546, | |
| "eval_runtime": 1.4406, | |
| "eval_samples_per_second": 211.016, | |
| "eval_steps_per_second": 105.508, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 6.85, | |
| "learning_rate": 6.301369863013699e-06, | |
| "loss": 0.2021, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 6.85, | |
| "eval_accuracy": 0.7197330791229742, | |
| "eval_f1": 0.42135476463834676, | |
| "eval_loss": 1.6636826992034912, | |
| "eval_precision": 0.372210953346856, | |
| "eval_recall": 0.48544973544973546, | |
| "eval_runtime": 1.7337, | |
| "eval_samples_per_second": 175.352, | |
| "eval_steps_per_second": 87.676, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 7.25, | |
| "learning_rate": 5.495568090249799e-06, | |
| "loss": 0.1648, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 7.25, | |
| "eval_accuracy": 0.7254528122020972, | |
| "eval_f1": 0.42339181286549704, | |
| "eval_loss": 1.6723591089248657, | |
| "eval_precision": 0.37945492662473795, | |
| "eval_recall": 0.47883597883597884, | |
| "eval_runtime": 2.0842, | |
| "eval_samples_per_second": 145.858, | |
| "eval_steps_per_second": 72.929, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 7.66, | |
| "learning_rate": 4.689766317485899e-06, | |
| "loss": 0.1927, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 7.66, | |
| "eval_accuracy": 0.7244995233555768, | |
| "eval_f1": 0.4298850574712644, | |
| "eval_loss": 1.689092993736267, | |
| "eval_precision": 0.3800813008130081, | |
| "eval_recall": 0.4947089947089947, | |
| "eval_runtime": 1.551, | |
| "eval_samples_per_second": 196.006, | |
| "eval_steps_per_second": 98.003, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 8.06, | |
| "learning_rate": 3.883964544721999e-06, | |
| "loss": 0.1958, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 8.06, | |
| "eval_accuracy": 0.7280743565300286, | |
| "eval_f1": 0.4395090590298071, | |
| "eval_loss": 1.67740797996521, | |
| "eval_precision": 0.393717277486911, | |
| "eval_recall": 0.4973544973544973, | |
| "eval_runtime": 1.5248, | |
| "eval_samples_per_second": 199.375, | |
| "eval_steps_per_second": 99.688, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 8.46, | |
| "learning_rate": 3.0781627719580986e-06, | |
| "loss": 0.1508, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 8.46, | |
| "eval_accuracy": 0.7259294566253575, | |
| "eval_f1": 0.4272409778812573, | |
| "eval_loss": 1.7378581762313843, | |
| "eval_precision": 0.3814968814968815, | |
| "eval_recall": 0.48544973544973546, | |
| "eval_runtime": 1.5008, | |
| "eval_samples_per_second": 202.555, | |
| "eval_steps_per_second": 101.278, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 8.86, | |
| "learning_rate": 2.2723609991941985e-06, | |
| "loss": 0.184, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 8.86, | |
| "eval_accuracy": 0.7277168732125834, | |
| "eval_f1": 0.43638457109959694, | |
| "eval_loss": 1.700131893157959, | |
| "eval_precision": 0.38634046890927626, | |
| "eval_recall": 0.5013227513227513, | |
| "eval_runtime": 1.5178, | |
| "eval_samples_per_second": 200.287, | |
| "eval_steps_per_second": 100.144, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 9.27, | |
| "learning_rate": 1.4665592264302982e-06, | |
| "loss": 0.1696, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 9.27, | |
| "eval_accuracy": 0.7295042897998093, | |
| "eval_f1": 0.4396501457725948, | |
| "eval_loss": 1.6932308673858643, | |
| "eval_precision": 0.3931178310740355, | |
| "eval_recall": 0.49867724867724866, | |
| "eval_runtime": 2.1423, | |
| "eval_samples_per_second": 141.904, | |
| "eval_steps_per_second": 70.952, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 9.67, | |
| "learning_rate": 6.607574536663981e-07, | |
| "loss": 0.1425, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 9.67, | |
| "eval_accuracy": 0.7275977121067684, | |
| "eval_f1": 0.433886662850601, | |
| "eval_loss": 1.7137079238891602, | |
| "eval_precision": 0.38244197780020184, | |
| "eval_recall": 0.5013227513227513, | |
| "eval_runtime": 2.2155, | |
| "eval_samples_per_second": 137.215, | |
| "eval_steps_per_second": 68.608, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "step": 12410, | |
| "total_flos": 243842156652198.0, | |
| "train_loss": 0.4448028106443542, | |
| "train_runtime": 816.9339, | |
| "train_samples_per_second": 30.37, | |
| "train_steps_per_second": 15.191 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 12410, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 10, | |
| "save_steps": 500, | |
| "total_flos": 243842156652198.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |