| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 5.0, | |
| "global_step": 350, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0, | |
| "step": 0, | |
| "train_loss": 0.9102265238761902 | |
| }, | |
| { | |
| "epoch": 0, | |
| "step": 0, | |
| "train_loss": 0.7791410684585571 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 1.942857142857143e-05, | |
| "loss": 0.6171, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "step": 20, | |
| "train_loss": 0.9948083162307739 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "step": 20, | |
| "train_loss": 0.43645456433296204 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 1.885714285714286e-05, | |
| "loss": 0.5985, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "step": 40, | |
| "train_loss": 0.6493304967880249 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "step": 40, | |
| "train_loss": 0.44140735268592834 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 1.8285714285714288e-05, | |
| "loss": 0.5388, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "step": 60, | |
| "train_loss": 0.5935375690460205 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "step": 60, | |
| "train_loss": 0.7153045535087585 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": { | |
| "accuracy": 0.8339285714285715 | |
| }, | |
| "eval_auc": 0.7155102040816327, | |
| "eval_f1": { | |
| "f1": 0.3404255319148936 | |
| }, | |
| "eval_loss": 0.6475747227668762, | |
| "eval_precision": { | |
| "precision": 0.3380281690140845 | |
| }, | |
| "eval_recall": { | |
| "recall": 0.34285714285714286 | |
| }, | |
| "eval_runtime": 0.6529, | |
| "eval_samples_per_second": 857.718, | |
| "eval_steps_per_second": 53.607, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 1.7714285714285717e-05, | |
| "loss": 0.5155, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "step": 80, | |
| "train_loss": 0.30939409136772156 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "step": 80, | |
| "train_loss": 0.46914660930633545 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 1.7142857142857142e-05, | |
| "loss": 0.4106, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "step": 100, | |
| "train_loss": 0.5445544719696045 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "step": 100, | |
| "train_loss": 0.2606019377708435 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 1.6571428571428574e-05, | |
| "loss": 0.4368, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "step": 120, | |
| "train_loss": 0.3977287709712982 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "step": 120, | |
| "train_loss": 0.27664005756378174 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 1.6000000000000003e-05, | |
| "loss": 0.3439, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "step": 140, | |
| "train_loss": 0.281027227640152 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "step": 140, | |
| "train_loss": 0.8136294484138489 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "step": 140, | |
| "train_loss": 0.12487435340881348 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "step": 140, | |
| "train_loss": 0.4965817332267761 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "step": 140, | |
| "train_loss": 0.10273457318544388 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "step": 140, | |
| "train_loss": 0.4042325019836426 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "step": 140, | |
| "train_loss": 0.18423768877983093 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "step": 140, | |
| "train_loss": 0.2865528464317322 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "step": 140, | |
| "train_loss": 0.6649780869483948 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "step": 140, | |
| "train_loss": 0.187837615609169 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "step": 140, | |
| "train_loss": 0.5332860946655273 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "step": 140, | |
| "train_loss": 1.3017665147781372 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "step": 140, | |
| "train_loss": 0.17419536411762238 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "step": 140, | |
| "train_loss": 0.21602007746696472 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "step": 140, | |
| "train_loss": 0.19301216304302216 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "step": 140, | |
| "train_loss": 0.7078395485877991 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "step": 140, | |
| "train_loss": 1.2670217752456665 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "step": 140, | |
| "train_loss": 1.3781671524047852 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "step": 140, | |
| "train_loss": 1.1845088005065918 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "step": 140, | |
| "train_loss": 1.5374538898468018 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "step": 140, | |
| "train_loss": 0.8708707690238953 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "step": 140, | |
| "train_loss": 0.8791667222976685 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "step": 140, | |
| "train_loss": 0.30802324414253235 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "step": 140, | |
| "train_loss": 1.2370085716247559 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "step": 140, | |
| "train_loss": 1.2875761985778809 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "step": 140, | |
| "train_loss": 1.280977487564087 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "step": 140, | |
| "train_loss": 1.4434828758239746 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "step": 140, | |
| "train_loss": 0.9846087098121643 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "step": 140, | |
| "train_loss": 0.3032301068305969 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "step": 140, | |
| "train_loss": 0.22137752175331116 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "step": 140, | |
| "train_loss": 0.8634898066520691 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "step": 140, | |
| "train_loss": 1.080783486366272 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "step": 140, | |
| "train_loss": 0.8349682092666626 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "step": 140, | |
| "train_loss": 0.6795739531517029 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "step": 140, | |
| "train_loss": 0.676697850227356 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": { | |
| "accuracy": 0.8428571428571429 | |
| }, | |
| "eval_auc": 0.7470845481049562, | |
| "eval_f1": { | |
| "f1": 0.3802816901408451 | |
| }, | |
| "eval_loss": 0.7140511870384216, | |
| "eval_precision": { | |
| "precision": 0.375 | |
| }, | |
| "eval_recall": { | |
| "recall": 0.38571428571428573 | |
| }, | |
| "eval_runtime": 0.8142, | |
| "eval_samples_per_second": 687.816, | |
| "eval_steps_per_second": 42.989, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "step": 140, | |
| "train_loss": 0.18269318342208862 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "step": 140, | |
| "train_loss": 0.16265031695365906 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 1.542857142857143e-05, | |
| "loss": 0.22, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "step": 160, | |
| "train_loss": 0.15252527594566345 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "step": 160, | |
| "train_loss": 0.26980623602867126 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "learning_rate": 1.4857142857142858e-05, | |
| "loss": 0.2508, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "step": 180, | |
| "train_loss": 0.1389356404542923 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "step": 180, | |
| "train_loss": 0.07751139253377914 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 1.4285714285714287e-05, | |
| "loss": 0.2149, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "step": 200, | |
| "train_loss": 0.13591702282428741 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "step": 200, | |
| "train_loss": 0.04827806353569031 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": { | |
| "accuracy": 0.8232142857142857 | |
| }, | |
| "eval_auc": 0.7547521865889213, | |
| "eval_f1": { | |
| "f1": 0.3926380368098159 | |
| }, | |
| "eval_loss": 0.9247345924377441, | |
| "eval_precision": { | |
| "precision": 0.34408602150537637 | |
| }, | |
| "eval_recall": { | |
| "recall": 0.45714285714285713 | |
| }, | |
| "eval_runtime": 0.6565, | |
| "eval_samples_per_second": 852.978, | |
| "eval_steps_per_second": 53.311, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 3.14, | |
| "learning_rate": 1.3714285714285716e-05, | |
| "loss": 0.2083, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 3.14, | |
| "step": 220, | |
| "train_loss": 0.08572366833686829 | |
| }, | |
| { | |
| "epoch": 3.14, | |
| "step": 220, | |
| "train_loss": 0.16466383635997772 | |
| }, | |
| { | |
| "epoch": 3.43, | |
| "learning_rate": 1.3142857142857145e-05, | |
| "loss": 0.1521, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 3.43, | |
| "step": 240, | |
| "train_loss": 0.4424760043621063 | |
| }, | |
| { | |
| "epoch": 3.43, | |
| "step": 240, | |
| "train_loss": 0.0380471907556057 | |
| }, | |
| { | |
| "epoch": 3.71, | |
| "learning_rate": 1.2571428571428572e-05, | |
| "loss": 0.1872, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 3.71, | |
| "step": 260, | |
| "train_loss": 0.4089410901069641 | |
| }, | |
| { | |
| "epoch": 3.71, | |
| "step": 260, | |
| "train_loss": 0.2289683222770691 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "learning_rate": 1.2e-05, | |
| "loss": 0.1486, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "step": 280, | |
| "train_loss": 0.4819692373275757 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "step": 280, | |
| "train_loss": 1.6059560775756836 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "step": 280, | |
| "train_loss": 0.02861696481704712 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "step": 280, | |
| "train_loss": 0.6295557618141174 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "step": 280, | |
| "train_loss": 0.011990266852080822 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "step": 280, | |
| "train_loss": 0.8871378302574158 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "step": 280, | |
| "train_loss": 0.96287602186203 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "step": 280, | |
| "train_loss": 0.368556946516037 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "step": 280, | |
| "train_loss": 0.9895154237747192 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "step": 280, | |
| "train_loss": 0.11174698173999786 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "step": 280, | |
| "train_loss": 0.6365806460380554 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "step": 280, | |
| "train_loss": 1.9894587993621826 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "step": 280, | |
| "train_loss": 0.27545443177223206 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "step": 280, | |
| "train_loss": 0.15115408599376678 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "step": 280, | |
| "train_loss": 0.1443568766117096 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "step": 280, | |
| "train_loss": 0.6999047994613647 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "step": 280, | |
| "train_loss": 1.7545350790023804 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "step": 280, | |
| "train_loss": 2.3427138328552246 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "step": 280, | |
| "train_loss": 2.2553696632385254 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "step": 280, | |
| "train_loss": 2.8793208599090576 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "step": 280, | |
| "train_loss": 1.293062686920166 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "step": 280, | |
| "train_loss": 1.2954611778259277 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "step": 280, | |
| "train_loss": 0.26605361700057983 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "step": 280, | |
| "train_loss": 2.2778561115264893 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "step": 280, | |
| "train_loss": 2.4143030643463135 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "step": 280, | |
| "train_loss": 2.61956524848938 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "step": 280, | |
| "train_loss": 2.9289610385894775 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "step": 280, | |
| "train_loss": 1.7221819162368774 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "step": 280, | |
| "train_loss": 0.3664304316043854 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "step": 280, | |
| "train_loss": 0.2494506984949112 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "step": 280, | |
| "train_loss": 1.011549472808838 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "step": 280, | |
| "train_loss": 1.9930472373962402 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "step": 280, | |
| "train_loss": 0.8549365997314453 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "step": 280, | |
| "train_loss": 1.246674656867981 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "step": 280, | |
| "train_loss": 1.2460875511169434 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": { | |
| "accuracy": 0.8375 | |
| }, | |
| "eval_auc": 0.7523032069970845, | |
| "eval_f1": { | |
| "f1": 0.35460992907801414 | |
| }, | |
| "eval_loss": 1.1712112426757812, | |
| "eval_precision": { | |
| "precision": 0.352112676056338 | |
| }, | |
| "eval_recall": { | |
| "recall": 0.35714285714285715 | |
| }, | |
| "eval_runtime": 0.8151, | |
| "eval_samples_per_second": 687.069, | |
| "eval_steps_per_second": 42.942, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "step": 280, | |
| "train_loss": 0.1625460833311081 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "step": 280, | |
| "train_loss": 0.25639742612838745 | |
| }, | |
| { | |
| "epoch": 4.29, | |
| "learning_rate": 1.1428571428571429e-05, | |
| "loss": 0.1546, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 4.29, | |
| "step": 300, | |
| "train_loss": 0.030452944338321686 | |
| }, | |
| { | |
| "epoch": 4.29, | |
| "step": 300, | |
| "train_loss": 0.010571416467428207 | |
| }, | |
| { | |
| "epoch": 4.57, | |
| "learning_rate": 1.0857142857142858e-05, | |
| "loss": 0.1151, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 4.57, | |
| "step": 320, | |
| "train_loss": 0.11510075628757477 | |
| }, | |
| { | |
| "epoch": 4.57, | |
| "step": 320, | |
| "train_loss": 0.19758647680282593 | |
| }, | |
| { | |
| "epoch": 4.86, | |
| "learning_rate": 1.0285714285714285e-05, | |
| "loss": 0.1334, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 4.86, | |
| "step": 340, | |
| "train_loss": 0.25014737248420715 | |
| }, | |
| { | |
| "epoch": 4.86, | |
| "step": 340, | |
| "train_loss": 0.019543316215276718 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": { | |
| "accuracy": 0.8482142857142857 | |
| }, | |
| "eval_auc": 0.7345772594752188, | |
| "eval_f1": { | |
| "f1": 0.32 | |
| }, | |
| "eval_loss": 1.4309738874435425, | |
| "eval_precision": { | |
| "precision": 0.36363636363636365 | |
| }, | |
| "eval_recall": { | |
| "recall": 0.2857142857142857 | |
| }, | |
| "eval_runtime": 0.6857, | |
| "eval_samples_per_second": 816.675, | |
| "eval_steps_per_second": 51.042, | |
| "step": 350 | |
| } | |
| ], | |
| "max_steps": 700, | |
| "num_train_epochs": 10, | |
| "total_flos": 161884215698040.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |