| { | |
| "best_metric": 1.0, | |
| "best_model_checkpoint": "ViTFineTuned/checkpoint-603", | |
| "epoch": 14.988929889298893, | |
| "global_step": 1005, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.950495049504951e-05, | |
| "loss": 2.3113, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 9.900990099009902e-05, | |
| "loss": 1.8782, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.0001485148514851485, | |
| "loss": 1.2023, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.00019801980198019803, | |
| "loss": 0.6597, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 0.00024752475247524753, | |
| "loss": 0.4134, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 0.000297029702970297, | |
| "loss": 0.2859, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "eval_accuracy": 0.9784172661870504, | |
| "eval_loss": 0.21802514791488647, | |
| "eval_runtime": 6.2997, | |
| "eval_samples_per_second": 66.193, | |
| "eval_steps_per_second": 4.286, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 0.0003465346534653465, | |
| "loss": 0.285, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 0.00039603960396039607, | |
| "loss": 0.261, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 0.00044554455445544556, | |
| "loss": 0.2659, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 0.0004950495049504951, | |
| "loss": 0.2812, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 0.0004950221238938053, | |
| "loss": 0.2512, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 0.0004894911504424779, | |
| "loss": 0.2033, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 0.0004839601769911505, | |
| "loss": 0.293, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "eval_accuracy": 0.9184652278177458, | |
| "eval_loss": 0.3308344781398773, | |
| "eval_runtime": 6.9556, | |
| "eval_samples_per_second": 59.952, | |
| "eval_steps_per_second": 3.882, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 0.000478429203539823, | |
| "loss": 0.2754, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 0.0004728982300884956, | |
| "loss": 0.2746, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 0.00046736725663716817, | |
| "loss": 0.1477, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 0.0004618362831858407, | |
| "loss": 0.1984, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 0.00045630530973451327, | |
| "loss": 0.279, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 0.00045077433628318585, | |
| "loss": 0.1846, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 0.0004452433628318584, | |
| "loss": 0.1444, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "eval_accuracy": 0.9568345323741008, | |
| "eval_loss": 0.15321111679077148, | |
| "eval_runtime": 6.3227, | |
| "eval_samples_per_second": 65.953, | |
| "eval_steps_per_second": 4.27, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 3.13, | |
| "learning_rate": 0.000439712389380531, | |
| "loss": 0.1136, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 3.28, | |
| "learning_rate": 0.0004341814159292036, | |
| "loss": 0.1482, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 3.43, | |
| "learning_rate": 0.0004286504424778761, | |
| "loss": 0.1124, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 3.58, | |
| "learning_rate": 0.0004231194690265487, | |
| "loss": 0.2128, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 3.72, | |
| "learning_rate": 0.00041758849557522125, | |
| "loss": 0.0859, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 3.87, | |
| "learning_rate": 0.0004120575221238938, | |
| "loss": 0.0833, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 3.99, | |
| "eval_accuracy": 0.9856115107913669, | |
| "eval_loss": 0.051548197865486145, | |
| "eval_runtime": 6.2776, | |
| "eval_samples_per_second": 66.426, | |
| "eval_steps_per_second": 4.301, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 4.03, | |
| "learning_rate": 0.0004065265486725664, | |
| "loss": 0.1025, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 4.18, | |
| "learning_rate": 0.000400995575221239, | |
| "loss": 0.0917, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 4.32, | |
| "learning_rate": 0.0003954646017699115, | |
| "loss": 0.0812, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 4.47, | |
| "learning_rate": 0.0003899336283185841, | |
| "loss": 0.0879, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 4.62, | |
| "learning_rate": 0.0003844026548672566, | |
| "loss": 0.0763, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 4.77, | |
| "learning_rate": 0.0003788716814159292, | |
| "loss": 0.0582, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 4.92, | |
| "learning_rate": 0.00037334070796460176, | |
| "loss": 0.1007, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 4.99, | |
| "eval_accuracy": 0.9904076738609112, | |
| "eval_loss": 0.029486695304512978, | |
| "eval_runtime": 6.2974, | |
| "eval_samples_per_second": 66.218, | |
| "eval_steps_per_second": 4.288, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 5.07, | |
| "learning_rate": 0.00036780973451327434, | |
| "loss": 0.0564, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 5.22, | |
| "learning_rate": 0.0003622787610619469, | |
| "loss": 0.0506, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 5.37, | |
| "learning_rate": 0.0003567477876106195, | |
| "loss": 0.0611, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 5.52, | |
| "learning_rate": 0.000351216814159292, | |
| "loss": 0.0576, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 5.66, | |
| "learning_rate": 0.0003456858407079646, | |
| "loss": 0.0832, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 5.81, | |
| "learning_rate": 0.0003401548672566372, | |
| "loss": 0.0561, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 5.96, | |
| "learning_rate": 0.0003346238938053097, | |
| "loss": 0.0372, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 5.99, | |
| "eval_accuracy": 0.9808153477218226, | |
| "eval_loss": 0.05741920694708824, | |
| "eval_runtime": 6.2799, | |
| "eval_samples_per_second": 66.402, | |
| "eval_steps_per_second": 4.299, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 6.12, | |
| "learning_rate": 0.00032909292035398233, | |
| "loss": 0.0749, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 6.27, | |
| "learning_rate": 0.0003235619469026549, | |
| "loss": 0.0464, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 6.41, | |
| "learning_rate": 0.00031803097345132743, | |
| "loss": 0.0538, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 6.56, | |
| "learning_rate": 0.0003125, | |
| "loss": 0.073, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 6.71, | |
| "learning_rate": 0.0003069690265486726, | |
| "loss": 0.0533, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 6.86, | |
| "learning_rate": 0.0003014380530973451, | |
| "loss": 0.0919, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 6.99, | |
| "eval_accuracy": 0.988009592326139, | |
| "eval_loss": 0.05372486636042595, | |
| "eval_runtime": 6.3165, | |
| "eval_samples_per_second": 66.017, | |
| "eval_steps_per_second": 4.275, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 7.01, | |
| "learning_rate": 0.0002959070796460177, | |
| "loss": 0.0518, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 7.16, | |
| "learning_rate": 0.0002903761061946903, | |
| "loss": 0.0233, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 7.31, | |
| "learning_rate": 0.00028484513274336284, | |
| "loss": 0.0501, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 7.46, | |
| "learning_rate": 0.0002793141592920354, | |
| "loss": 0.0685, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 7.61, | |
| "learning_rate": 0.000273783185840708, | |
| "loss": 0.0116, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 7.75, | |
| "learning_rate": 0.0002682522123893805, | |
| "loss": 0.0228, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 7.9, | |
| "learning_rate": 0.0002627212389380531, | |
| "loss": 0.0135, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 7.99, | |
| "eval_accuracy": 0.9952038369304557, | |
| "eval_loss": 0.011652274057269096, | |
| "eval_runtime": 6.3118, | |
| "eval_samples_per_second": 66.067, | |
| "eval_steps_per_second": 4.278, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 8.06, | |
| "learning_rate": 0.00025719026548672567, | |
| "loss": 0.0091, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 8.21, | |
| "learning_rate": 0.00025165929203539825, | |
| "loss": 0.0469, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 8.35, | |
| "learning_rate": 0.0002461283185840708, | |
| "loss": 0.0157, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 8.5, | |
| "learning_rate": 0.00024059734513274338, | |
| "loss": 0.0172, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 8.65, | |
| "learning_rate": 0.00023506637168141593, | |
| "loss": 0.0202, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 8.8, | |
| "learning_rate": 0.0002295353982300885, | |
| "loss": 0.0303, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 8.94, | |
| "learning_rate": 0.00022400442477876108, | |
| "loss": 0.0472, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 8.99, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 0.007474538870155811, | |
| "eval_runtime": 6.2502, | |
| "eval_samples_per_second": 66.718, | |
| "eval_steps_per_second": 4.32, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 9.1, | |
| "learning_rate": 0.00021847345132743363, | |
| "loss": 0.0147, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 9.25, | |
| "learning_rate": 0.0002129424778761062, | |
| "loss": 0.0133, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 9.4, | |
| "learning_rate": 0.00020741150442477876, | |
| "loss": 0.0264, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 9.55, | |
| "learning_rate": 0.00020188053097345134, | |
| "loss": 0.053, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 9.69, | |
| "learning_rate": 0.0001963495575221239, | |
| "loss": 0.0056, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 9.84, | |
| "learning_rate": 0.00019081858407079646, | |
| "loss": 0.0128, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 9.99, | |
| "learning_rate": 0.00018528761061946904, | |
| "loss": 0.0151, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 9.99, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 0.004758651368319988, | |
| "eval_runtime": 6.2595, | |
| "eval_samples_per_second": 66.619, | |
| "eval_steps_per_second": 4.313, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 10.15, | |
| "learning_rate": 0.0001797566371681416, | |
| "loss": 0.0135, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 10.3, | |
| "learning_rate": 0.00017422566371681417, | |
| "loss": 0.0394, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 10.44, | |
| "learning_rate": 0.00016869469026548672, | |
| "loss": 0.0122, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 10.59, | |
| "learning_rate": 0.0001631637168141593, | |
| "loss": 0.0136, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 10.74, | |
| "learning_rate": 0.00015763274336283187, | |
| "loss": 0.01, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 10.89, | |
| "learning_rate": 0.00015210176991150442, | |
| "loss": 0.0052, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 10.99, | |
| "eval_accuracy": 0.9976019184652278, | |
| "eval_loss": 0.007309095934033394, | |
| "eval_runtime": 6.2533, | |
| "eval_samples_per_second": 66.685, | |
| "eval_steps_per_second": 4.318, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 11.04, | |
| "learning_rate": 0.00014657079646017697, | |
| "loss": 0.0113, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 11.19, | |
| "learning_rate": 0.00014103982300884958, | |
| "loss": 0.0325, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 11.34, | |
| "learning_rate": 0.00013550884955752213, | |
| "loss": 0.012, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 11.49, | |
| "learning_rate": 0.00012997787610619468, | |
| "loss": 0.0185, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 11.63, | |
| "learning_rate": 0.00012444690265486725, | |
| "loss": 0.0049, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 11.78, | |
| "learning_rate": 0.00011891592920353983, | |
| "loss": 0.0038, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 11.93, | |
| "learning_rate": 0.00011338495575221238, | |
| "loss": 0.0109, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 11.99, | |
| "eval_accuracy": 0.9952038369304557, | |
| "eval_loss": 0.01980188488960266, | |
| "eval_runtime": 6.279, | |
| "eval_samples_per_second": 66.411, | |
| "eval_steps_per_second": 4.3, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 12.09, | |
| "learning_rate": 0.00010785398230088496, | |
| "loss": 0.0096, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 12.24, | |
| "learning_rate": 0.00010232300884955754, | |
| "loss": 0.0115, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 12.38, | |
| "learning_rate": 9.679203539823009e-05, | |
| "loss": 0.0171, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 12.53, | |
| "learning_rate": 9.126106194690266e-05, | |
| "loss": 0.004, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 12.68, | |
| "learning_rate": 8.573008849557521e-05, | |
| "loss": 0.0073, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 12.83, | |
| "learning_rate": 8.019911504424779e-05, | |
| "loss": 0.0045, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 12.97, | |
| "learning_rate": 7.466814159292036e-05, | |
| "loss": 0.0033, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 12.99, | |
| "eval_accuracy": 0.9976019184652278, | |
| "eval_loss": 0.006568592973053455, | |
| "eval_runtime": 6.2861, | |
| "eval_samples_per_second": 66.336, | |
| "eval_steps_per_second": 4.295, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 13.13, | |
| "learning_rate": 6.913716814159292e-05, | |
| "loss": 0.0077, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 13.28, | |
| "learning_rate": 6.360619469026548e-05, | |
| "loss": 0.0027, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 13.43, | |
| "learning_rate": 5.8075221238938054e-05, | |
| "loss": 0.0028, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 13.58, | |
| "learning_rate": 5.254424778761062e-05, | |
| "loss": 0.0025, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 13.72, | |
| "learning_rate": 4.701327433628318e-05, | |
| "loss": 0.0024, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 13.87, | |
| "learning_rate": 4.148230088495576e-05, | |
| "loss": 0.011, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 13.99, | |
| "eval_accuracy": 0.9976019184652278, | |
| "eval_loss": 0.006684896536171436, | |
| "eval_runtime": 6.258, | |
| "eval_samples_per_second": 66.635, | |
| "eval_steps_per_second": 4.314, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 14.03, | |
| "learning_rate": 3.595132743362832e-05, | |
| "loss": 0.0027, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 14.18, | |
| "learning_rate": 3.0420353982300886e-05, | |
| "loss": 0.0092, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 14.32, | |
| "learning_rate": 2.4889380530973453e-05, | |
| "loss": 0.0025, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 14.47, | |
| "learning_rate": 1.9358407079646017e-05, | |
| "loss": 0.0024, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 14.62, | |
| "learning_rate": 1.3827433628318586e-05, | |
| "loss": 0.0061, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 14.77, | |
| "learning_rate": 8.296460176991151e-06, | |
| "loss": 0.0028, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 14.92, | |
| "learning_rate": 2.765486725663717e-06, | |
| "loss": 0.0032, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 14.99, | |
| "eval_accuracy": 0.9976019184652278, | |
| "eval_loss": 0.00603157514706254, | |
| "eval_runtime": 6.2764, | |
| "eval_samples_per_second": 66.44, | |
| "eval_steps_per_second": 4.302, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 14.99, | |
| "step": 1005, | |
| "total_flos": 5.035680667331113e+18, | |
| "train_loss": 0.13053628388996147, | |
| "train_runtime": 2520.4596, | |
| "train_samples_per_second": 25.799, | |
| "train_steps_per_second": 0.399 | |
| } | |
| ], | |
| "max_steps": 1005, | |
| "num_train_epochs": 15, | |
| "total_flos": 5.035680667331113e+18, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |