| { |
| "best_metric": 0.15279646217823029, |
| "best_model_checkpoint": "CartoonOrNotV2/checkpoint-288", |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 288, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.04, |
| "learning_rate": 6.896551724137932e-06, |
| "loss": 0.8299, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 1.3793103448275863e-05, |
| "loss": 0.8639, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 2.0689655172413793e-05, |
| "loss": 0.6099, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 2.7586206896551727e-05, |
| "loss": 0.5108, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 3.4482758620689657e-05, |
| "loss": 0.6755, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 4.1379310344827587e-05, |
| "loss": 0.402, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 4.827586206896552e-05, |
| "loss": 0.354, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 4.9420849420849425e-05, |
| "loss": 0.2615, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 4.8648648648648654e-05, |
| "loss": 0.4872, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 4.787644787644788e-05, |
| "loss": 0.4822, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 4.710424710424711e-05, |
| "loss": 0.1944, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 4.6332046332046336e-05, |
| "loss": 0.4792, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 4.555984555984556e-05, |
| "loss": 0.1135, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 4.478764478764479e-05, |
| "loss": 0.1958, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 4.401544401544402e-05, |
| "loss": 0.0708, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 4.324324324324325e-05, |
| "loss": 0.4999, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 4.247104247104247e-05, |
| "loss": 0.257, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 4.16988416988417e-05, |
| "loss": 0.1863, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 4.092664092664093e-05, |
| "loss": 0.2244, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 4.015444015444015e-05, |
| "loss": 0.4371, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 3.938223938223938e-05, |
| "loss": 0.1398, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 3.861003861003861e-05, |
| "loss": 0.3429, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 3.783783783783784e-05, |
| "loss": 0.4481, |
| "step": 92 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 3.7065637065637065e-05, |
| "loss": 0.117, |
| "step": 96 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.9427083333333334, |
| "eval_auc": 0.9909929462832339, |
| "eval_f1": 0.9411764705882354, |
| "eval_loss": 0.15437577664852142, |
| "eval_precision": 0.9565217391304348, |
| "eval_recall": 0.9263157894736842, |
| "eval_runtime": 59.3659, |
| "eval_samples_per_second": 3.234, |
| "eval_steps_per_second": 0.202, |
| "step": 96 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 3.6293436293436295e-05, |
| "loss": 0.3791, |
| "step": 100 |
| }, |
| { |
| "epoch": 1.08, |
| "learning_rate": 3.5521235521235524e-05, |
| "loss": 0.1039, |
| "step": 104 |
| }, |
| { |
| "epoch": 1.12, |
| "learning_rate": 3.4749034749034754e-05, |
| "loss": 0.0458, |
| "step": 108 |
| }, |
| { |
| "epoch": 1.17, |
| "learning_rate": 3.397683397683398e-05, |
| "loss": 0.0149, |
| "step": 112 |
| }, |
| { |
| "epoch": 1.21, |
| "learning_rate": 3.3204633204633207e-05, |
| "loss": 0.1961, |
| "step": 116 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 3.2432432432432436e-05, |
| "loss": 0.2281, |
| "step": 120 |
| }, |
| { |
| "epoch": 1.29, |
| "learning_rate": 3.166023166023166e-05, |
| "loss": 0.1105, |
| "step": 124 |
| }, |
| { |
| "epoch": 1.33, |
| "learning_rate": 3.088803088803089e-05, |
| "loss": 0.3031, |
| "step": 128 |
| }, |
| { |
| "epoch": 1.38, |
| "learning_rate": 3.011583011583012e-05, |
| "loss": 0.3272, |
| "step": 132 |
| }, |
| { |
| "epoch": 1.42, |
| "learning_rate": 2.9343629343629348e-05, |
| "loss": 0.1091, |
| "step": 136 |
| }, |
| { |
| "epoch": 1.46, |
| "learning_rate": 2.857142857142857e-05, |
| "loss": 0.0637, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.5, |
| "learning_rate": 2.77992277992278e-05, |
| "loss": 0.3734, |
| "step": 144 |
| }, |
| { |
| "epoch": 1.54, |
| "learning_rate": 2.702702702702703e-05, |
| "loss": 0.5033, |
| "step": 148 |
| }, |
| { |
| "epoch": 1.58, |
| "learning_rate": 2.6254826254826253e-05, |
| "loss": 0.1018, |
| "step": 152 |
| }, |
| { |
| "epoch": 1.62, |
| "learning_rate": 2.5482625482625483e-05, |
| "loss": 0.7375, |
| "step": 156 |
| }, |
| { |
| "epoch": 1.67, |
| "learning_rate": 2.4710424710424712e-05, |
| "loss": 0.1537, |
| "step": 160 |
| }, |
| { |
| "epoch": 1.71, |
| "learning_rate": 2.393822393822394e-05, |
| "loss": 0.1889, |
| "step": 164 |
| }, |
| { |
| "epoch": 1.75, |
| "learning_rate": 2.3166023166023168e-05, |
| "loss": 0.3122, |
| "step": 168 |
| }, |
| { |
| "epoch": 1.79, |
| "learning_rate": 2.2393822393822394e-05, |
| "loss": 0.1504, |
| "step": 172 |
| }, |
| { |
| "epoch": 1.83, |
| "learning_rate": 2.1621621621621624e-05, |
| "loss": 0.3638, |
| "step": 176 |
| }, |
| { |
| "epoch": 1.88, |
| "learning_rate": 2.084942084942085e-05, |
| "loss": 0.0842, |
| "step": 180 |
| }, |
| { |
| "epoch": 1.92, |
| "learning_rate": 2.0077220077220077e-05, |
| "loss": 0.1984, |
| "step": 184 |
| }, |
| { |
| "epoch": 1.96, |
| "learning_rate": 1.9305019305019306e-05, |
| "loss": 0.1108, |
| "step": 188 |
| }, |
| { |
| "epoch": 2.0, |
| "learning_rate": 1.8532818532818533e-05, |
| "loss": 0.1461, |
| "step": 192 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.953125, |
| "eval_auc": 0.9920781334780249, |
| "eval_f1": 0.9508196721311476, |
| "eval_loss": 0.17911452054977417, |
| "eval_precision": 0.9886363636363636, |
| "eval_recall": 0.9157894736842105, |
| "eval_runtime": 60.1067, |
| "eval_samples_per_second": 3.194, |
| "eval_steps_per_second": 0.2, |
| "step": 192 |
| }, |
| { |
| "epoch": 2.04, |
| "learning_rate": 1.7760617760617762e-05, |
| "loss": 0.0114, |
| "step": 196 |
| }, |
| { |
| "epoch": 2.08, |
| "learning_rate": 1.698841698841699e-05, |
| "loss": 0.1118, |
| "step": 200 |
| }, |
| { |
| "epoch": 2.12, |
| "learning_rate": 1.6216216216216218e-05, |
| "loss": 0.1376, |
| "step": 204 |
| }, |
| { |
| "epoch": 2.17, |
| "learning_rate": 1.5444015444015444e-05, |
| "loss": 0.0606, |
| "step": 208 |
| }, |
| { |
| "epoch": 2.21, |
| "learning_rate": 1.4671814671814674e-05, |
| "loss": 0.1765, |
| "step": 212 |
| }, |
| { |
| "epoch": 2.25, |
| "learning_rate": 1.38996138996139e-05, |
| "loss": 0.2129, |
| "step": 216 |
| }, |
| { |
| "epoch": 2.29, |
| "learning_rate": 1.3127413127413127e-05, |
| "loss": 0.0257, |
| "step": 220 |
| }, |
| { |
| "epoch": 2.33, |
| "learning_rate": 1.2355212355212356e-05, |
| "loss": 0.0493, |
| "step": 224 |
| }, |
| { |
| "epoch": 2.38, |
| "learning_rate": 1.1583011583011584e-05, |
| "loss": 0.1198, |
| "step": 228 |
| }, |
| { |
| "epoch": 2.42, |
| "learning_rate": 1.0810810810810812e-05, |
| "loss": 0.0078, |
| "step": 232 |
| }, |
| { |
| "epoch": 2.46, |
| "learning_rate": 1.0038610038610038e-05, |
| "loss": 0.11, |
| "step": 236 |
| }, |
| { |
| "epoch": 2.5, |
| "learning_rate": 9.266409266409266e-06, |
| "loss": 0.0264, |
| "step": 240 |
| }, |
| { |
| "epoch": 2.54, |
| "learning_rate": 8.494208494208494e-06, |
| "loss": 0.2191, |
| "step": 244 |
| }, |
| { |
| "epoch": 2.58, |
| "learning_rate": 7.722007722007722e-06, |
| "loss": 0.3105, |
| "step": 248 |
| }, |
| { |
| "epoch": 2.62, |
| "learning_rate": 6.94980694980695e-06, |
| "loss": 0.0933, |
| "step": 252 |
| }, |
| { |
| "epoch": 2.67, |
| "learning_rate": 6.177606177606178e-06, |
| "loss": 0.2103, |
| "step": 256 |
| }, |
| { |
| "epoch": 2.71, |
| "learning_rate": 5.405405405405406e-06, |
| "loss": 0.0115, |
| "step": 260 |
| }, |
| { |
| "epoch": 2.75, |
| "learning_rate": 4.633204633204633e-06, |
| "loss": 0.0261, |
| "step": 264 |
| }, |
| { |
| "epoch": 2.79, |
| "learning_rate": 3.861003861003861e-06, |
| "loss": 0.3301, |
| "step": 268 |
| }, |
| { |
| "epoch": 2.83, |
| "learning_rate": 3.088803088803089e-06, |
| "loss": 0.1125, |
| "step": 272 |
| }, |
| { |
| "epoch": 2.88, |
| "learning_rate": 2.3166023166023166e-06, |
| "loss": 0.0756, |
| "step": 276 |
| }, |
| { |
| "epoch": 2.92, |
| "learning_rate": 1.5444015444015445e-06, |
| "loss": 0.0775, |
| "step": 280 |
| }, |
| { |
| "epoch": 2.96, |
| "learning_rate": 7.722007722007723e-07, |
| "loss": 0.0029, |
| "step": 284 |
| }, |
| { |
| "epoch": 3.0, |
| "learning_rate": 0.0, |
| "loss": 0.0068, |
| "step": 288 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.9739583333333334, |
| "eval_auc": 0.9932718393922951, |
| "eval_f1": 0.9732620320855614, |
| "eval_loss": 0.15279646217823029, |
| "eval_precision": 0.9891304347826086, |
| "eval_recall": 0.9578947368421052, |
| "eval_runtime": 59.2078, |
| "eval_samples_per_second": 3.243, |
| "eval_steps_per_second": 0.203, |
| "step": 288 |
| } |
| ], |
| "logging_steps": 4, |
| "max_steps": 288, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "total_flos": 1.8050816016698573e+17, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|