| { | |
| "best_metric": 0.9025482535362244, | |
| "best_model_checkpoint": "xblock-large-patch1-224/checkpoint-498", | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 498, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": Infinity, | |
| "learning_rate": 7.000000000000001e-06, | |
| "loss": 2.4849, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 11.997095108032227, | |
| "learning_rate": 1.5e-05, | |
| "loss": 2.3188, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "grad_norm": 12.830357551574707, | |
| "learning_rate": 2.2000000000000003e-05, | |
| "loss": 2.1147, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "grad_norm": 11.86018180847168, | |
| "learning_rate": 3e-05, | |
| "loss": 1.9589, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 13.479438781738281, | |
| "learning_rate": 3.8e-05, | |
| "loss": 1.7637, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "grad_norm": 13.69570255279541, | |
| "learning_rate": 4.600000000000001e-05, | |
| "loss": 1.8184, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "grad_norm": 9.349321365356445, | |
| "learning_rate": 4.955357142857143e-05, | |
| "loss": 2.0031, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "grad_norm": 11.169551849365234, | |
| "learning_rate": 4.866071428571429e-05, | |
| "loss": 1.7263, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "grad_norm": 9.97819995880127, | |
| "learning_rate": 4.7767857142857144e-05, | |
| "loss": 1.9287, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 11.742727279663086, | |
| "learning_rate": 4.6875e-05, | |
| "loss": 1.814, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "grad_norm": 8.304338455200195, | |
| "learning_rate": 4.598214285714286e-05, | |
| "loss": 1.4417, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "grad_norm": 10.799261093139648, | |
| "learning_rate": 4.5089285714285714e-05, | |
| "loss": 1.7538, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "grad_norm": 8.281989097595215, | |
| "learning_rate": 4.419642857142857e-05, | |
| "loss": 1.7161, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "grad_norm": 7.088228225708008, | |
| "learning_rate": 4.3303571428571435e-05, | |
| "loss": 1.7526, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 9.898828506469727, | |
| "learning_rate": 4.2410714285714285e-05, | |
| "loss": 1.8377, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "grad_norm": 9.417756080627441, | |
| "learning_rate": 4.151785714285715e-05, | |
| "loss": 1.6742, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "grad_norm": 10.03836441040039, | |
| "learning_rate": 4.0625000000000005e-05, | |
| "loss": 1.5157, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "grad_norm": 10.27881908416748, | |
| "learning_rate": 3.9732142857142855e-05, | |
| "loss": 1.5828, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "grad_norm": 10.77905559539795, | |
| "learning_rate": 3.883928571428572e-05, | |
| "loss": 1.3418, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 12.170005798339844, | |
| "learning_rate": 3.794642857142857e-05, | |
| "loss": 1.8624, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.5271084337349398, | |
| "eval_f1_macro": 0.4518532713560705, | |
| "eval_f1_micro": 0.5271084337349398, | |
| "eval_f1_weighted": 0.496654440865482, | |
| "eval_loss": 1.4107117652893066, | |
| "eval_precision_macro": 0.5016472507129397, | |
| "eval_precision_micro": 0.5271084337349398, | |
| "eval_precision_weighted": 0.5568483249244561, | |
| "eval_recall_macro": 0.47663109756097566, | |
| "eval_recall_micro": 0.5271084337349398, | |
| "eval_recall_weighted": 0.5271084337349398, | |
| "eval_runtime": 316.798, | |
| "eval_samples_per_second": 1.048, | |
| "eval_steps_per_second": 0.066, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "grad_norm": 8.142753601074219, | |
| "learning_rate": 3.716517857142857e-05, | |
| "loss": 1.5166, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "grad_norm": 5.322903633117676, | |
| "learning_rate": 3.627232142857143e-05, | |
| "loss": 1.1869, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "grad_norm": 9.545618057250977, | |
| "learning_rate": 3.5379464285714287e-05, | |
| "loss": 1.3475, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "grad_norm": 13.654799461364746, | |
| "learning_rate": 3.448660714285715e-05, | |
| "loss": 1.4239, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "grad_norm": 8.991721153259277, | |
| "learning_rate": 3.359375e-05, | |
| "loss": 1.3054, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "grad_norm": 12.191709518432617, | |
| "learning_rate": 3.270089285714286e-05, | |
| "loss": 1.1966, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "grad_norm": 9.003186225891113, | |
| "learning_rate": 3.1808035714285713e-05, | |
| "loss": 1.2069, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "grad_norm": 12.034103393554688, | |
| "learning_rate": 3.091517857142857e-05, | |
| "loss": 1.4924, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "grad_norm": 12.357869148254395, | |
| "learning_rate": 3.013392857142857e-05, | |
| "loss": 1.4235, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "grad_norm": 13.986096382141113, | |
| "learning_rate": 2.9241071428571432e-05, | |
| "loss": 1.3803, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "grad_norm": 10.213234901428223, | |
| "learning_rate": 2.8348214285714285e-05, | |
| "loss": 1.2811, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "grad_norm": 11.94521713256836, | |
| "learning_rate": 2.7455357142857145e-05, | |
| "loss": 1.3474, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "grad_norm": 7.413544654846191, | |
| "learning_rate": 2.6562500000000002e-05, | |
| "loss": 1.2188, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "grad_norm": 6.420960426330566, | |
| "learning_rate": 2.5669642857142855e-05, | |
| "loss": 1.195, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "grad_norm": 7.711160659790039, | |
| "learning_rate": 2.4776785714285715e-05, | |
| "loss": 1.4389, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "grad_norm": 7.766310214996338, | |
| "learning_rate": 2.3883928571428572e-05, | |
| "loss": 1.5062, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "grad_norm": 11.04636001586914, | |
| "learning_rate": 2.299107142857143e-05, | |
| "loss": 1.2456, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "grad_norm": 7.171872138977051, | |
| "learning_rate": 2.2098214285714286e-05, | |
| "loss": 0.7718, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "grad_norm": 9.676796913146973, | |
| "learning_rate": 2.1205357142857142e-05, | |
| "loss": 1.0983, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "grad_norm": 9.70329761505127, | |
| "learning_rate": 2.0312500000000002e-05, | |
| "loss": 1.0594, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "grad_norm": 10.712843894958496, | |
| "learning_rate": 1.941964285714286e-05, | |
| "loss": 1.2865, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.6295180722891566, | |
| "eval_f1_macro": 0.6326430342148868, | |
| "eval_f1_micro": 0.6295180722891566, | |
| "eval_f1_weighted": 0.6385426615207972, | |
| "eval_loss": 1.0817334651947021, | |
| "eval_precision_macro": 0.6845586183973281, | |
| "eval_precision_micro": 0.6295180722891566, | |
| "eval_precision_weighted": 0.6861824846026167, | |
| "eval_recall_macro": 0.6314774629363941, | |
| "eval_recall_micro": 0.6295180722891566, | |
| "eval_recall_weighted": 0.6295180722891566, | |
| "eval_runtime": 311.4659, | |
| "eval_samples_per_second": 1.066, | |
| "eval_steps_per_second": 0.067, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "grad_norm": 12.722149848937988, | |
| "learning_rate": 1.8526785714285716e-05, | |
| "loss": 1.0814, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "grad_norm": 7.002964019775391, | |
| "learning_rate": 1.7633928571428573e-05, | |
| "loss": 0.8135, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "grad_norm": 7.798354625701904, | |
| "learning_rate": 1.674107142857143e-05, | |
| "loss": 0.9007, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "grad_norm": 5.395328998565674, | |
| "learning_rate": 1.5848214285714286e-05, | |
| "loss": 0.8786, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "grad_norm": 11.069001197814941, | |
| "learning_rate": 1.4955357142857143e-05, | |
| "loss": 1.0465, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "grad_norm": 11.216327667236328, | |
| "learning_rate": 1.4062500000000001e-05, | |
| "loss": 1.1112, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "grad_norm": 10.935038566589355, | |
| "learning_rate": 1.3169642857142858e-05, | |
| "loss": 0.8433, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "grad_norm": 18.220169067382812, | |
| "learning_rate": 1.2276785714285715e-05, | |
| "loss": 0.812, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "grad_norm": 13.36108112335205, | |
| "learning_rate": 1.1383928571428572e-05, | |
| "loss": 1.0622, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "grad_norm": 12.954853057861328, | |
| "learning_rate": 1.049107142857143e-05, | |
| "loss": 0.8341, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "grad_norm": 15.882329940795898, | |
| "learning_rate": 9.598214285714287e-06, | |
| "loss": 0.8285, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "grad_norm": 7.79279899597168, | |
| "learning_rate": 8.705357142857143e-06, | |
| "loss": 0.9739, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "grad_norm": 11.043404579162598, | |
| "learning_rate": 7.8125e-06, | |
| "loss": 0.9797, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "grad_norm": 7.065421104431152, | |
| "learning_rate": 6.919642857142858e-06, | |
| "loss": 0.8766, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "grad_norm": 7.5092878341674805, | |
| "learning_rate": 6.0267857142857145e-06, | |
| "loss": 0.637, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "grad_norm": 10.960742950439453, | |
| "learning_rate": 5.133928571428571e-06, | |
| "loss": 0.9343, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "grad_norm": 23.01622772216797, | |
| "learning_rate": 4.241071428571429e-06, | |
| "loss": 1.0866, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "grad_norm": 2.7104849815368652, | |
| "learning_rate": 3.348214285714286e-06, | |
| "loss": 0.7754, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "grad_norm": 16.19709014892578, | |
| "learning_rate": 2.455357142857143e-06, | |
| "loss": 0.9792, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "grad_norm": 9.475327491760254, | |
| "learning_rate": 1.5625e-06, | |
| "loss": 0.8772, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "grad_norm": 7.728980541229248, | |
| "learning_rate": 6.696428571428571e-07, | |
| "loss": 0.7722, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.6716867469879518, | |
| "eval_f1_macro": 0.6858654529218409, | |
| "eval_f1_micro": 0.6716867469879518, | |
| "eval_f1_weighted": 0.676828467951081, | |
| "eval_loss": 0.9025482535362244, | |
| "eval_precision_macro": 0.7239086041672248, | |
| "eval_precision_micro": 0.6716867469879518, | |
| "eval_precision_weighted": 0.7046011538585282, | |
| "eval_recall_macro": 0.6707409732185557, | |
| "eval_recall_micro": 0.6716867469879518, | |
| "eval_recall_weighted": 0.6716867469879518, | |
| "eval_runtime": 302.7239, | |
| "eval_samples_per_second": 1.097, | |
| "eval_steps_per_second": 0.069, | |
| "step": 498 | |
| } | |
| ], | |
| "logging_steps": 8, | |
| "max_steps": 498, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "total_flos": 1.0897396284801761e+18, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |