| { |
| "best_metric": 0.0417679101228714, |
| "best_model_checkpoint": "./finetune-vit-base-patch16-224/checkpoint-1200", |
| "epoch": 4.0, |
| "eval_steps": 400, |
| "global_step": 1408, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.028409090909090908, |
| "grad_norm": 246460.15625, |
| "learning_rate": 4.9644886363636365e-05, |
| "loss": 1.1228, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.056818181818181816, |
| "grad_norm": 226149.65625, |
| "learning_rate": 4.9289772727272735e-05, |
| "loss": 0.9359, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.08522727272727272, |
| "grad_norm": 229797.6875, |
| "learning_rate": 4.893465909090909e-05, |
| "loss": 0.9185, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.11363636363636363, |
| "grad_norm": 274111.03125, |
| "learning_rate": 4.857954545454545e-05, |
| "loss": 0.9599, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.14204545454545456, |
| "grad_norm": 189042.953125, |
| "learning_rate": 4.822443181818182e-05, |
| "loss": 0.9459, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.17045454545454544, |
| "grad_norm": 233362.859375, |
| "learning_rate": 4.7869318181818185e-05, |
| "loss": 0.9634, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.19886363636363635, |
| "grad_norm": 267175.90625, |
| "learning_rate": 4.751420454545455e-05, |
| "loss": 0.8705, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.22727272727272727, |
| "grad_norm": 211430.734375, |
| "learning_rate": 4.715909090909091e-05, |
| "loss": 0.9014, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.2556818181818182, |
| "grad_norm": 238574.546875, |
| "learning_rate": 4.6803977272727274e-05, |
| "loss": 0.8607, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.2840909090909091, |
| "grad_norm": 260448.125, |
| "learning_rate": 4.6448863636363636e-05, |
| "loss": 0.8127, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.3125, |
| "grad_norm": 168009.265625, |
| "learning_rate": 4.609375e-05, |
| "loss": 0.8228, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.3409090909090909, |
| "grad_norm": 232205.125, |
| "learning_rate": 4.573863636363637e-05, |
| "loss": 0.8704, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.3693181818181818, |
| "grad_norm": 302465.9375, |
| "learning_rate": 4.538352272727273e-05, |
| "loss": 0.8896, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.3977272727272727, |
| "grad_norm": 210630.53125, |
| "learning_rate": 4.5028409090909094e-05, |
| "loss": 0.8732, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.42613636363636365, |
| "grad_norm": 171584.9375, |
| "learning_rate": 4.4673295454545457e-05, |
| "loss": 0.7886, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.45454545454545453, |
| "grad_norm": 255000.359375, |
| "learning_rate": 4.431818181818182e-05, |
| "loss": 0.9411, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.48295454545454547, |
| "grad_norm": 244293.703125, |
| "learning_rate": 4.396306818181818e-05, |
| "loss": 0.8608, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.5113636363636364, |
| "grad_norm": 235527.875, |
| "learning_rate": 4.360795454545455e-05, |
| "loss": 0.8106, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.5397727272727273, |
| "grad_norm": 234210.1875, |
| "learning_rate": 4.3252840909090914e-05, |
| "loss": 0.795, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.5681818181818182, |
| "grad_norm": 182797.875, |
| "learning_rate": 4.289772727272727e-05, |
| "loss": 0.7926, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.5965909090909091, |
| "grad_norm": 324642.5, |
| "learning_rate": 4.254261363636364e-05, |
| "loss": 0.778, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.625, |
| "grad_norm": 359272.71875, |
| "learning_rate": 4.21875e-05, |
| "loss": 0.7829, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.6534090909090909, |
| "grad_norm": 279676.875, |
| "learning_rate": 4.1832386363636365e-05, |
| "loss": 0.8244, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.6818181818181818, |
| "grad_norm": 259783.71875, |
| "learning_rate": 4.1477272727272734e-05, |
| "loss": 0.7465, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.7102272727272727, |
| "grad_norm": 184817.609375, |
| "learning_rate": 4.112215909090909e-05, |
| "loss": 0.7447, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.7386363636363636, |
| "grad_norm": 221672.1875, |
| "learning_rate": 4.076704545454545e-05, |
| "loss": 0.8206, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.7670454545454546, |
| "grad_norm": 251710.0, |
| "learning_rate": 4.041193181818182e-05, |
| "loss": 0.8222, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.7954545454545454, |
| "grad_norm": 287394.75, |
| "learning_rate": 4.0056818181818185e-05, |
| "loss": 0.8751, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.8238636363636364, |
| "grad_norm": 261405.84375, |
| "learning_rate": 3.970170454545455e-05, |
| "loss": 0.8049, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.8522727272727273, |
| "grad_norm": 339216.5, |
| "learning_rate": 3.934659090909091e-05, |
| "loss": 0.7734, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.8806818181818182, |
| "grad_norm": 253168.921875, |
| "learning_rate": 3.899147727272727e-05, |
| "loss": 0.7916, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.9090909090909091, |
| "grad_norm": 243938.09375, |
| "learning_rate": 3.8636363636363636e-05, |
| "loss": 0.8075, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.9375, |
| "grad_norm": 224975.296875, |
| "learning_rate": 3.828125e-05, |
| "loss": 0.724, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.9659090909090909, |
| "grad_norm": 314409.71875, |
| "learning_rate": 3.792613636363637e-05, |
| "loss": 0.86, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.9943181818181818, |
| "grad_norm": 254573.59375, |
| "learning_rate": 3.757102272727273e-05, |
| "loss": 0.7882, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.0227272727272727, |
| "grad_norm": 239098.109375, |
| "learning_rate": 3.721590909090909e-05, |
| "loss": 0.5987, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.0511363636363635, |
| "grad_norm": 212271.015625, |
| "learning_rate": 3.6860795454545456e-05, |
| "loss": 0.5594, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.0795454545454546, |
| "grad_norm": 258443.203125, |
| "learning_rate": 3.650568181818182e-05, |
| "loss": 0.5778, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.1079545454545454, |
| "grad_norm": 251415.8125, |
| "learning_rate": 3.615056818181818e-05, |
| "loss": 0.5707, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.1363636363636362, |
| "grad_norm": 191828.046875, |
| "learning_rate": 3.579545454545455e-05, |
| "loss": 0.6151, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.1363636363636362, |
| "eval_f1": 0.7879138483446066, |
| "eval_loss": 0.5355119705200195, |
| "eval_runtime": 204.0556, |
| "eval_samples_per_second": 55.063, |
| "eval_steps_per_second": 3.445, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.1647727272727273, |
| "grad_norm": 233159.53125, |
| "learning_rate": 3.5440340909090914e-05, |
| "loss": 0.548, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.1931818181818181, |
| "grad_norm": 206000.609375, |
| "learning_rate": 3.508522727272727e-05, |
| "loss": 0.5118, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.2215909090909092, |
| "grad_norm": 262176.0625, |
| "learning_rate": 3.473011363636364e-05, |
| "loss": 0.5221, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.25, |
| "grad_norm": 225265.671875, |
| "learning_rate": 3.4375e-05, |
| "loss": 0.5489, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.2784090909090908, |
| "grad_norm": 261512.140625, |
| "learning_rate": 3.4019886363636365e-05, |
| "loss": 0.5682, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.3068181818181819, |
| "grad_norm": 336397.46875, |
| "learning_rate": 3.3664772727272734e-05, |
| "loss": 0.5585, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.3352272727272727, |
| "grad_norm": 253634.796875, |
| "learning_rate": 3.330965909090909e-05, |
| "loss": 0.5239, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.3636363636363638, |
| "grad_norm": 253387.1875, |
| "learning_rate": 3.295454545454545e-05, |
| "loss": 0.5411, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.3920454545454546, |
| "grad_norm": 175611.75, |
| "learning_rate": 3.259943181818182e-05, |
| "loss": 0.4704, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.4204545454545454, |
| "grad_norm": 210382.125, |
| "learning_rate": 3.2244318181818185e-05, |
| "loss": 0.4668, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.4488636363636362, |
| "grad_norm": 207340.484375, |
| "learning_rate": 3.188920454545455e-05, |
| "loss": 0.5243, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.4772727272727273, |
| "grad_norm": 211227.53125, |
| "learning_rate": 3.153409090909091e-05, |
| "loss": 0.5158, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.5056818181818183, |
| "grad_norm": 263875.125, |
| "learning_rate": 3.117897727272727e-05, |
| "loss": 0.5264, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.5340909090909092, |
| "grad_norm": 250973.984375, |
| "learning_rate": 3.0823863636363636e-05, |
| "loss": 0.4892, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.5625, |
| "grad_norm": 210192.90625, |
| "learning_rate": 3.0468750000000002e-05, |
| "loss": 0.565, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.5909090909090908, |
| "grad_norm": 277090.34375, |
| "learning_rate": 3.0113636363636365e-05, |
| "loss": 0.5501, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.6193181818181817, |
| "grad_norm": 262420.625, |
| "learning_rate": 2.975852272727273e-05, |
| "loss": 0.4802, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.6477272727272727, |
| "grad_norm": 247244.59375, |
| "learning_rate": 2.940340909090909e-05, |
| "loss": 0.4778, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.6761363636363638, |
| "grad_norm": 238716.140625, |
| "learning_rate": 2.9048295454545453e-05, |
| "loss": 0.4998, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.7045454545454546, |
| "grad_norm": 288676.875, |
| "learning_rate": 2.869318181818182e-05, |
| "loss": 0.4763, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.7329545454545454, |
| "grad_norm": 254478.03125, |
| "learning_rate": 2.8338068181818185e-05, |
| "loss": 0.4912, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.7613636363636362, |
| "grad_norm": 295674.3125, |
| "learning_rate": 2.7982954545454548e-05, |
| "loss": 0.4892, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.7897727272727273, |
| "grad_norm": 279737.21875, |
| "learning_rate": 2.7627840909090914e-05, |
| "loss": 0.4677, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.8181818181818183, |
| "grad_norm": 325599.34375, |
| "learning_rate": 2.7272727272727273e-05, |
| "loss": 0.4977, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.8465909090909092, |
| "grad_norm": 303249.375, |
| "learning_rate": 2.6917613636363636e-05, |
| "loss": 0.5212, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.875, |
| "grad_norm": 269595.21875, |
| "learning_rate": 2.6562500000000002e-05, |
| "loss": 0.5283, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.9034090909090908, |
| "grad_norm": 274965.3125, |
| "learning_rate": 2.6207386363636365e-05, |
| "loss": 0.5194, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.9318181818181817, |
| "grad_norm": 250650.328125, |
| "learning_rate": 2.585227272727273e-05, |
| "loss": 0.5274, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.9602272727272727, |
| "grad_norm": 232058.15625, |
| "learning_rate": 2.549715909090909e-05, |
| "loss": 0.5002, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.9886363636363638, |
| "grad_norm": 251402.0, |
| "learning_rate": 2.5142045454545453e-05, |
| "loss": 0.4618, |
| "step": 700 |
| }, |
| { |
| "epoch": 2.0170454545454546, |
| "grad_norm": 192832.578125, |
| "learning_rate": 2.478693181818182e-05, |
| "loss": 0.3425, |
| "step": 710 |
| }, |
| { |
| "epoch": 2.0454545454545454, |
| "grad_norm": 200086.390625, |
| "learning_rate": 2.4431818181818185e-05, |
| "loss": 0.2832, |
| "step": 720 |
| }, |
| { |
| "epoch": 2.0738636363636362, |
| "grad_norm": 162459.609375, |
| "learning_rate": 2.4076704545454544e-05, |
| "loss": 0.2102, |
| "step": 730 |
| }, |
| { |
| "epoch": 2.102272727272727, |
| "grad_norm": 132360.765625, |
| "learning_rate": 2.372159090909091e-05, |
| "loss": 0.2097, |
| "step": 740 |
| }, |
| { |
| "epoch": 2.1306818181818183, |
| "grad_norm": 146930.046875, |
| "learning_rate": 2.3366477272727273e-05, |
| "loss": 0.1884, |
| "step": 750 |
| }, |
| { |
| "epoch": 2.159090909090909, |
| "grad_norm": 246238.796875, |
| "learning_rate": 2.3011363636363636e-05, |
| "loss": 0.1969, |
| "step": 760 |
| }, |
| { |
| "epoch": 2.1875, |
| "grad_norm": 232657.203125, |
| "learning_rate": 2.2656250000000002e-05, |
| "loss": 0.1925, |
| "step": 770 |
| }, |
| { |
| "epoch": 2.215909090909091, |
| "grad_norm": 227103.3125, |
| "learning_rate": 2.2301136363636365e-05, |
| "loss": 0.1851, |
| "step": 780 |
| }, |
| { |
| "epoch": 2.2443181818181817, |
| "grad_norm": 171326.71875, |
| "learning_rate": 2.1946022727272727e-05, |
| "loss": 0.2253, |
| "step": 790 |
| }, |
| { |
| "epoch": 2.2727272727272725, |
| "grad_norm": 121495.1953125, |
| "learning_rate": 2.1590909090909093e-05, |
| "loss": 0.1867, |
| "step": 800 |
| }, |
| { |
| "epoch": 2.2727272727272725, |
| "eval_f1": 0.9550551797792809, |
| "eval_loss": 0.17148956656455994, |
| "eval_runtime": 203.949, |
| "eval_samples_per_second": 55.092, |
| "eval_steps_per_second": 3.447, |
| "step": 800 |
| }, |
| { |
| "epoch": 2.3011363636363638, |
| "grad_norm": 238023.546875, |
| "learning_rate": 2.1235795454545456e-05, |
| "loss": 0.2143, |
| "step": 810 |
| }, |
| { |
| "epoch": 2.3295454545454546, |
| "grad_norm": 215472.78125, |
| "learning_rate": 2.088068181818182e-05, |
| "loss": 0.1681, |
| "step": 820 |
| }, |
| { |
| "epoch": 2.3579545454545454, |
| "grad_norm": 185951.046875, |
| "learning_rate": 2.0525568181818185e-05, |
| "loss": 0.2, |
| "step": 830 |
| }, |
| { |
| "epoch": 2.3863636363636362, |
| "grad_norm": 288287.34375, |
| "learning_rate": 2.0170454545454544e-05, |
| "loss": 0.1899, |
| "step": 840 |
| }, |
| { |
| "epoch": 2.4147727272727275, |
| "grad_norm": 184342.796875, |
| "learning_rate": 1.981534090909091e-05, |
| "loss": 0.1898, |
| "step": 850 |
| }, |
| { |
| "epoch": 2.4431818181818183, |
| "grad_norm": 143657.375, |
| "learning_rate": 1.9460227272727273e-05, |
| "loss": 0.1707, |
| "step": 860 |
| }, |
| { |
| "epoch": 2.471590909090909, |
| "grad_norm": 142439.578125, |
| "learning_rate": 1.9105113636363636e-05, |
| "loss": 0.1505, |
| "step": 870 |
| }, |
| { |
| "epoch": 2.5, |
| "grad_norm": 255553.71875, |
| "learning_rate": 1.8750000000000002e-05, |
| "loss": 0.2047, |
| "step": 880 |
| }, |
| { |
| "epoch": 2.528409090909091, |
| "grad_norm": 217335.078125, |
| "learning_rate": 1.8394886363636364e-05, |
| "loss": 0.18, |
| "step": 890 |
| }, |
| { |
| "epoch": 2.5568181818181817, |
| "grad_norm": 143375.3125, |
| "learning_rate": 1.8039772727272727e-05, |
| "loss": 0.2372, |
| "step": 900 |
| }, |
| { |
| "epoch": 2.5852272727272725, |
| "grad_norm": 325331.0625, |
| "learning_rate": 1.7684659090909093e-05, |
| "loss": 0.2047, |
| "step": 910 |
| }, |
| { |
| "epoch": 2.6136363636363638, |
| "grad_norm": 160601.78125, |
| "learning_rate": 1.7329545454545456e-05, |
| "loss": 0.1999, |
| "step": 920 |
| }, |
| { |
| "epoch": 2.6420454545454546, |
| "grad_norm": 114873.859375, |
| "learning_rate": 1.697443181818182e-05, |
| "loss": 0.1736, |
| "step": 930 |
| }, |
| { |
| "epoch": 2.6704545454545454, |
| "grad_norm": 191060.78125, |
| "learning_rate": 1.6619318181818185e-05, |
| "loss": 0.1809, |
| "step": 940 |
| }, |
| { |
| "epoch": 2.6988636363636362, |
| "grad_norm": 303838.96875, |
| "learning_rate": 1.6264204545454544e-05, |
| "loss": 0.238, |
| "step": 950 |
| }, |
| { |
| "epoch": 2.7272727272727275, |
| "grad_norm": 92415.265625, |
| "learning_rate": 1.590909090909091e-05, |
| "loss": 0.137, |
| "step": 960 |
| }, |
| { |
| "epoch": 2.7556818181818183, |
| "grad_norm": 227939.296875, |
| "learning_rate": 1.5553977272727273e-05, |
| "loss": 0.1811, |
| "step": 970 |
| }, |
| { |
| "epoch": 2.784090909090909, |
| "grad_norm": 244860.359375, |
| "learning_rate": 1.5198863636363636e-05, |
| "loss": 0.2235, |
| "step": 980 |
| }, |
| { |
| "epoch": 2.8125, |
| "grad_norm": 199524.078125, |
| "learning_rate": 1.484375e-05, |
| "loss": 0.1885, |
| "step": 990 |
| }, |
| { |
| "epoch": 2.840909090909091, |
| "grad_norm": 245456.046875, |
| "learning_rate": 1.4488636363636366e-05, |
| "loss": 0.2261, |
| "step": 1000 |
| }, |
| { |
| "epoch": 2.8693181818181817, |
| "grad_norm": 291130.96875, |
| "learning_rate": 1.4133522727272727e-05, |
| "loss": 0.1767, |
| "step": 1010 |
| }, |
| { |
| "epoch": 2.8977272727272725, |
| "grad_norm": 119223.3046875, |
| "learning_rate": 1.3778409090909091e-05, |
| "loss": 0.1589, |
| "step": 1020 |
| }, |
| { |
| "epoch": 2.9261363636363638, |
| "grad_norm": 205424.078125, |
| "learning_rate": 1.3423295454545456e-05, |
| "loss": 0.1666, |
| "step": 1030 |
| }, |
| { |
| "epoch": 2.9545454545454546, |
| "grad_norm": 177895.84375, |
| "learning_rate": 1.3068181818181819e-05, |
| "loss": 0.1572, |
| "step": 1040 |
| }, |
| { |
| "epoch": 2.9829545454545454, |
| "grad_norm": 337598.78125, |
| "learning_rate": 1.2713068181818183e-05, |
| "loss": 0.1938, |
| "step": 1050 |
| }, |
| { |
| "epoch": 3.0113636363636362, |
| "grad_norm": 173000.0, |
| "learning_rate": 1.2357954545454546e-05, |
| "loss": 0.1126, |
| "step": 1060 |
| }, |
| { |
| "epoch": 3.039772727272727, |
| "grad_norm": 97144.171875, |
| "learning_rate": 1.200284090909091e-05, |
| "loss": 0.0462, |
| "step": 1070 |
| }, |
| { |
| "epoch": 3.0681818181818183, |
| "grad_norm": 54899.234375, |
| "learning_rate": 1.1647727272727273e-05, |
| "loss": 0.0615, |
| "step": 1080 |
| }, |
| { |
| "epoch": 3.096590909090909, |
| "grad_norm": 36492.046875, |
| "learning_rate": 1.1292613636363637e-05, |
| "loss": 0.0491, |
| "step": 1090 |
| }, |
| { |
| "epoch": 3.125, |
| "grad_norm": 37996.1953125, |
| "learning_rate": 1.09375e-05, |
| "loss": 0.0562, |
| "step": 1100 |
| }, |
| { |
| "epoch": 3.153409090909091, |
| "grad_norm": 190393.703125, |
| "learning_rate": 1.0582386363636364e-05, |
| "loss": 0.054, |
| "step": 1110 |
| }, |
| { |
| "epoch": 3.1818181818181817, |
| "grad_norm": 179904.40625, |
| "learning_rate": 1.0227272727272729e-05, |
| "loss": 0.0728, |
| "step": 1120 |
| }, |
| { |
| "epoch": 3.210227272727273, |
| "grad_norm": 100628.515625, |
| "learning_rate": 9.872159090909091e-06, |
| "loss": 0.0625, |
| "step": 1130 |
| }, |
| { |
| "epoch": 3.2386363636363638, |
| "grad_norm": 118374.3984375, |
| "learning_rate": 9.517045454545454e-06, |
| "loss": 0.0569, |
| "step": 1140 |
| }, |
| { |
| "epoch": 3.2670454545454546, |
| "grad_norm": 75175.8359375, |
| "learning_rate": 9.161931818181818e-06, |
| "loss": 0.0436, |
| "step": 1150 |
| }, |
| { |
| "epoch": 3.2954545454545454, |
| "grad_norm": 158238.78125, |
| "learning_rate": 8.806818181818183e-06, |
| "loss": 0.079, |
| "step": 1160 |
| }, |
| { |
| "epoch": 3.3238636363636362, |
| "grad_norm": 68349.515625, |
| "learning_rate": 8.451704545454546e-06, |
| "loss": 0.056, |
| "step": 1170 |
| }, |
| { |
| "epoch": 3.3522727272727275, |
| "grad_norm": 43816.8671875, |
| "learning_rate": 8.09659090909091e-06, |
| "loss": 0.0443, |
| "step": 1180 |
| }, |
| { |
| "epoch": 3.3806818181818183, |
| "grad_norm": 61632.68359375, |
| "learning_rate": 7.741477272727273e-06, |
| "loss": 0.0554, |
| "step": 1190 |
| }, |
| { |
| "epoch": 3.409090909090909, |
| "grad_norm": 60831.44140625, |
| "learning_rate": 7.386363636363637e-06, |
| "loss": 0.0871, |
| "step": 1200 |
| }, |
| { |
| "epoch": 3.409090909090909, |
| "eval_f1": 0.9917230331078676, |
| "eval_loss": 0.0417679101228714, |
| "eval_runtime": 204.1321, |
| "eval_samples_per_second": 55.043, |
| "eval_steps_per_second": 3.444, |
| "step": 1200 |
| }, |
| { |
| "epoch": 3.4375, |
| "grad_norm": 90207.28125, |
| "learning_rate": 7.031250000000001e-06, |
| "loss": 0.0676, |
| "step": 1210 |
| }, |
| { |
| "epoch": 3.465909090909091, |
| "grad_norm": 63487.5546875, |
| "learning_rate": 6.676136363636363e-06, |
| "loss": 0.0346, |
| "step": 1220 |
| }, |
| { |
| "epoch": 3.4943181818181817, |
| "grad_norm": 83902.515625, |
| "learning_rate": 6.321022727272729e-06, |
| "loss": 0.0587, |
| "step": 1230 |
| }, |
| { |
| "epoch": 3.5227272727272725, |
| "grad_norm": 26082.44921875, |
| "learning_rate": 5.965909090909091e-06, |
| "loss": 0.0385, |
| "step": 1240 |
| }, |
| { |
| "epoch": 3.5511363636363638, |
| "grad_norm": 71738.4140625, |
| "learning_rate": 5.610795454545455e-06, |
| "loss": 0.0497, |
| "step": 1250 |
| }, |
| { |
| "epoch": 3.5795454545454546, |
| "grad_norm": 115759.3671875, |
| "learning_rate": 5.255681818181818e-06, |
| "loss": 0.0679, |
| "step": 1260 |
| }, |
| { |
| "epoch": 3.6079545454545454, |
| "grad_norm": 49416.74609375, |
| "learning_rate": 4.900568181818182e-06, |
| "loss": 0.0565, |
| "step": 1270 |
| }, |
| { |
| "epoch": 3.6363636363636362, |
| "grad_norm": 164339.484375, |
| "learning_rate": 4.5454545454545455e-06, |
| "loss": 0.0374, |
| "step": 1280 |
| }, |
| { |
| "epoch": 3.6647727272727275, |
| "grad_norm": 74746.796875, |
| "learning_rate": 4.190340909090909e-06, |
| "loss": 0.0382, |
| "step": 1290 |
| }, |
| { |
| "epoch": 3.6931818181818183, |
| "grad_norm": 29929.04296875, |
| "learning_rate": 3.835227272727273e-06, |
| "loss": 0.039, |
| "step": 1300 |
| }, |
| { |
| "epoch": 3.721590909090909, |
| "grad_norm": 59106.06640625, |
| "learning_rate": 3.480113636363636e-06, |
| "loss": 0.0376, |
| "step": 1310 |
| }, |
| { |
| "epoch": 3.75, |
| "grad_norm": 187797.71875, |
| "learning_rate": 3.125e-06, |
| "loss": 0.056, |
| "step": 1320 |
| }, |
| { |
| "epoch": 3.778409090909091, |
| "grad_norm": 42829.46875, |
| "learning_rate": 2.7698863636363637e-06, |
| "loss": 0.0434, |
| "step": 1330 |
| }, |
| { |
| "epoch": 3.8068181818181817, |
| "grad_norm": 252679.109375, |
| "learning_rate": 2.4147727272727273e-06, |
| "loss": 0.0502, |
| "step": 1340 |
| }, |
| { |
| "epoch": 3.8352272727272725, |
| "grad_norm": 35090.86328125, |
| "learning_rate": 2.059659090909091e-06, |
| "loss": 0.0686, |
| "step": 1350 |
| }, |
| { |
| "epoch": 3.8636363636363638, |
| "grad_norm": 287442.9375, |
| "learning_rate": 1.7045454545454546e-06, |
| "loss": 0.0579, |
| "step": 1360 |
| }, |
| { |
| "epoch": 3.8920454545454546, |
| "grad_norm": 241179.890625, |
| "learning_rate": 1.3494318181818183e-06, |
| "loss": 0.065, |
| "step": 1370 |
| }, |
| { |
| "epoch": 3.9204545454545454, |
| "grad_norm": 20388.59765625, |
| "learning_rate": 9.943181818181819e-07, |
| "loss": 0.0281, |
| "step": 1380 |
| }, |
| { |
| "epoch": 3.9488636363636362, |
| "grad_norm": 44893.046875, |
| "learning_rate": 6.392045454545455e-07, |
| "loss": 0.0297, |
| "step": 1390 |
| }, |
| { |
| "epoch": 3.9772727272727275, |
| "grad_norm": 30813.0546875, |
| "learning_rate": 2.840909090909091e-07, |
| "loss": 0.048, |
| "step": 1400 |
| }, |
| { |
| "epoch": 4.0, |
| "step": 1408, |
| "total_flos": 3.4828624117074493e+18, |
| "train_loss": 0.40373469023457303, |
| "train_runtime": 1995.1511, |
| "train_samples_per_second": 22.527, |
| "train_steps_per_second": 0.706 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 1408, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 4, |
| "save_steps": 400, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.4828624117074493e+18, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|