| { | |
| "best_metric": 0.1180819422006607, | |
| "best_model_checkpoint": "CXR-Classifier/checkpoint-1224", | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 1224, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 3.592426061630249, | |
| "learning_rate": 8.130081300813009e-06, | |
| "loss": 0.5972, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 2.819566249847412, | |
| "learning_rate": 1.6260162601626018e-05, | |
| "loss": 0.4976, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "grad_norm": 5.789632320404053, | |
| "learning_rate": 2.4390243902439026e-05, | |
| "loss": 0.3321, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 5.508607864379883, | |
| "learning_rate": 3.2520325203252037e-05, | |
| "loss": 0.415, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "grad_norm": 7.562315464019775, | |
| "learning_rate": 4.065040650406504e-05, | |
| "loss": 0.2412, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "grad_norm": 4.414723873138428, | |
| "learning_rate": 4.878048780487805e-05, | |
| "loss": 0.3456, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "grad_norm": 2.0423896312713623, | |
| "learning_rate": 4.922797456857402e-05, | |
| "loss": 0.2415, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "grad_norm": 2.041806221008301, | |
| "learning_rate": 4.83197093551317e-05, | |
| "loss": 0.3546, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "grad_norm": 1.6937503814697266, | |
| "learning_rate": 4.741144414168938e-05, | |
| "loss": 0.3947, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "grad_norm": 6.492763996124268, | |
| "learning_rate": 4.650317892824705e-05, | |
| "loss": 0.3063, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "grad_norm": 1.9708950519561768, | |
| "learning_rate": 4.559491371480473e-05, | |
| "loss": 0.3115, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "grad_norm": 12.533012390136719, | |
| "learning_rate": 4.46866485013624e-05, | |
| "loss": 0.5087, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 8.02456283569336, | |
| "learning_rate": 4.377838328792008e-05, | |
| "loss": 0.2745, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "grad_norm": 1.0878229141235352, | |
| "learning_rate": 4.287011807447775e-05, | |
| "loss": 0.1905, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "grad_norm": 7.465769290924072, | |
| "learning_rate": 4.196185286103542e-05, | |
| "loss": 0.2509, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "grad_norm": 15.646003723144531, | |
| "learning_rate": 4.10535876475931e-05, | |
| "loss": 0.4353, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "grad_norm": 3.2481565475463867, | |
| "learning_rate": 4.014532243415077e-05, | |
| "loss": 0.3478, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "grad_norm": 2.395519733428955, | |
| "learning_rate": 3.923705722070845e-05, | |
| "loss": 0.2199, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "grad_norm": 8.089118003845215, | |
| "learning_rate": 3.832879200726612e-05, | |
| "loss": 0.2715, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "grad_norm": 8.150867462158203, | |
| "learning_rate": 3.74205267938238e-05, | |
| "loss": 0.2074, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.9387254901960784, | |
| "eval_auc": 0.9766835240883684, | |
| "eval_f1": 0.957841483979764, | |
| "eval_loss": 0.2350389063358307, | |
| "eval_precision": 0.961082910321489, | |
| "eval_recall": 0.9546218487394958, | |
| "eval_runtime": 246.6561, | |
| "eval_samples_per_second": 3.308, | |
| "eval_steps_per_second": 0.207, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "grad_norm": 6.791078090667725, | |
| "learning_rate": 3.651226158038147e-05, | |
| "loss": 0.1235, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "grad_norm": 5.592333793640137, | |
| "learning_rate": 3.560399636693915e-05, | |
| "loss": 0.4199, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "grad_norm": 0.2713923752307892, | |
| "learning_rate": 3.469573115349682e-05, | |
| "loss": 0.3119, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "grad_norm": 5.907072067260742, | |
| "learning_rate": 3.37874659400545e-05, | |
| "loss": 0.2118, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "grad_norm": 0.9097113013267517, | |
| "learning_rate": 3.287920072661217e-05, | |
| "loss": 0.2174, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "grad_norm": 6.9212141036987305, | |
| "learning_rate": 3.197093551316985e-05, | |
| "loss": 0.2448, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "grad_norm": 6.113616466522217, | |
| "learning_rate": 3.106267029972752e-05, | |
| "loss": 0.1619, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "grad_norm": 0.9741531014442444, | |
| "learning_rate": 3.0154405086285197e-05, | |
| "loss": 0.3296, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "grad_norm": 1.604313611984253, | |
| "learning_rate": 2.924613987284287e-05, | |
| "loss": 0.1598, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "grad_norm": 5.160298824310303, | |
| "learning_rate": 2.8337874659400547e-05, | |
| "loss": 0.2605, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "grad_norm": 7.961933135986328, | |
| "learning_rate": 2.7429609445958222e-05, | |
| "loss": 0.295, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "grad_norm": 3.545825719833374, | |
| "learning_rate": 2.6521344232515894e-05, | |
| "loss": 0.2613, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "grad_norm": 0.7656643390655518, | |
| "learning_rate": 2.5613079019073572e-05, | |
| "loss": 0.1684, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "grad_norm": 14.269344329833984, | |
| "learning_rate": 2.4704813805631247e-05, | |
| "loss": 0.3285, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "grad_norm": 0.21142134070396423, | |
| "learning_rate": 2.379654859218892e-05, | |
| "loss": 0.2071, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "grad_norm": 1.0282666683197021, | |
| "learning_rate": 2.2888283378746594e-05, | |
| "loss": 0.2701, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "grad_norm": 12.365777969360352, | |
| "learning_rate": 2.198001816530427e-05, | |
| "loss": 0.1753, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "grad_norm": 6.909509181976318, | |
| "learning_rate": 2.1071752951861944e-05, | |
| "loss": 0.185, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "grad_norm": 10.059576034545898, | |
| "learning_rate": 2.016348773841962e-05, | |
| "loss": 0.1403, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "grad_norm": 13.194554328918457, | |
| "learning_rate": 1.9255222524977297e-05, | |
| "loss": 0.177, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.9522058823529411, | |
| "eval_auc": 0.9864329442184113, | |
| "eval_f1": 0.967418546365915, | |
| "eval_loss": 0.15405645966529846, | |
| "eval_precision": 0.9617940199335548, | |
| "eval_recall": 0.973109243697479, | |
| "eval_runtime": 257.0506, | |
| "eval_samples_per_second": 3.174, | |
| "eval_steps_per_second": 0.198, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "grad_norm": 0.45505988597869873, | |
| "learning_rate": 1.834695731153497e-05, | |
| "loss": 0.1334, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "grad_norm": 0.5608593821525574, | |
| "learning_rate": 1.7438692098092644e-05, | |
| "loss": 0.1801, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "grad_norm": 1.9215396642684937, | |
| "learning_rate": 1.653042688465032e-05, | |
| "loss": 0.1397, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "grad_norm": 0.03459596261382103, | |
| "learning_rate": 1.5622161671207994e-05, | |
| "loss": 0.0797, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "grad_norm": 4.931589603424072, | |
| "learning_rate": 1.4713896457765669e-05, | |
| "loss": 0.1547, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "grad_norm": 12.403867721557617, | |
| "learning_rate": 1.3805631244323344e-05, | |
| "loss": 0.1008, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "grad_norm": 6.834578514099121, | |
| "learning_rate": 1.2897366030881017e-05, | |
| "loss": 0.3086, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "grad_norm": 0.12356822937726974, | |
| "learning_rate": 1.1989100817438692e-05, | |
| "loss": 0.1367, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "grad_norm": 0.23836758732795715, | |
| "learning_rate": 1.1080835603996367e-05, | |
| "loss": 0.1204, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "grad_norm": 0.645460307598114, | |
| "learning_rate": 1.0172570390554042e-05, | |
| "loss": 0.2857, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "grad_norm": 6.155028820037842, | |
| "learning_rate": 9.264305177111717e-06, | |
| "loss": 0.1514, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "grad_norm": 6.625197410583496, | |
| "learning_rate": 8.356039963669392e-06, | |
| "loss": 0.1973, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "grad_norm": 0.4476400911808014, | |
| "learning_rate": 7.447774750227067e-06, | |
| "loss": 0.1153, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "grad_norm": 11.432110786437988, | |
| "learning_rate": 6.539509536784741e-06, | |
| "loss": 0.1943, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "grad_norm": 6.038093090057373, | |
| "learning_rate": 5.631244323342416e-06, | |
| "loss": 0.0998, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "grad_norm": 0.24591827392578125, | |
| "learning_rate": 4.722979109900091e-06, | |
| "loss": 0.1767, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "grad_norm": 3.9476640224456787, | |
| "learning_rate": 3.814713896457766e-06, | |
| "loss": 0.1798, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "grad_norm": 9.382974624633789, | |
| "learning_rate": 2.9064486830154405e-06, | |
| "loss": 0.1707, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "grad_norm": 0.10719335079193115, | |
| "learning_rate": 1.9981834695731155e-06, | |
| "loss": 0.2662, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "grad_norm": 10.07032299041748, | |
| "learning_rate": 1.0899182561307902e-06, | |
| "loss": 0.218, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "grad_norm": 17.199472427368164, | |
| "learning_rate": 1.8165304268846503e-07, | |
| "loss": 0.1692, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.9644607843137255, | |
| "eval_auc": 0.9916270580630442, | |
| "eval_f1": 0.9755686604886269, | |
| "eval_loss": 0.1180819422006607, | |
| "eval_precision": 0.9780405405405406, | |
| "eval_recall": 0.973109243697479, | |
| "eval_runtime": 252.4161, | |
| "eval_samples_per_second": 3.233, | |
| "eval_steps_per_second": 0.202, | |
| "step": 1224 | |
| } | |
| ], | |
| "logging_steps": 20, | |
| "max_steps": 1224, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "total_flos": 7.581041343995535e+17, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |