| { | |
| "best_metric": 0.27537932991981506, | |
| "best_model_checkpoint": "vit-snacks/checkpoint-1300", | |
| "epoch": 5.0, | |
| "global_step": 1515, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.0001986798679867987, | |
| "loss": 2.8877, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00019735973597359737, | |
| "loss": 2.6212, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00019603960396039606, | |
| "loss": 2.3151, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00019471947194719472, | |
| "loss": 1.9591, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0001933993399339934, | |
| "loss": 1.7106, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00019207920792079208, | |
| "loss": 1.367, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00019075907590759077, | |
| "loss": 1.2285, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.00018943894389438946, | |
| "loss": 1.0703, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.00018811881188118812, | |
| "loss": 1.1937, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.00018679867986798681, | |
| "loss": 0.8724, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "eval_accuracy": 0.8670157068062827, | |
| "eval_loss": 0.9118377566337585, | |
| "eval_runtime": 14.6857, | |
| "eval_samples_per_second": 65.029, | |
| "eval_steps_per_second": 8.171, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00018547854785478548, | |
| "loss": 0.9309, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00018415841584158417, | |
| "loss": 0.8163, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.00018283828382838283, | |
| "loss": 0.7416, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.00018151815181518153, | |
| "loss": 0.6442, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.00018019801980198022, | |
| "loss": 0.7906, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.0001788778877887789, | |
| "loss": 0.6137, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.00017755775577557757, | |
| "loss": 0.6342, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.00017623762376237624, | |
| "loss": 0.7081, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.00017491749174917493, | |
| "loss": 0.51, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.0001735973597359736, | |
| "loss": 0.5628, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "eval_accuracy": 0.8471204188481676, | |
| "eval_loss": 0.6873391270637512, | |
| "eval_runtime": 16.1319, | |
| "eval_samples_per_second": 59.2, | |
| "eval_steps_per_second": 7.439, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 0.00017227722772277228, | |
| "loss": 0.6635, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 0.00017095709570957098, | |
| "loss": 0.51, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 0.00016963696369636967, | |
| "loss": 0.5002, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 0.00016831683168316833, | |
| "loss": 0.6353, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 0.000166996699669967, | |
| "loss": 0.5326, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.00016567656765676569, | |
| "loss": 0.5051, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 0.00016435643564356435, | |
| "loss": 0.6527, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 0.00016303630363036304, | |
| "loss": 0.4961, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 0.00016171617161716173, | |
| "loss": 0.4981, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 0.00016039603960396042, | |
| "loss": 0.4421, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "eval_accuracy": 0.8691099476439791, | |
| "eval_loss": 0.4995421767234802, | |
| "eval_runtime": 15.1178, | |
| "eval_samples_per_second": 63.17, | |
| "eval_steps_per_second": 7.938, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 0.0001590759075907591, | |
| "loss": 0.4434, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 0.00015775577557755775, | |
| "loss": 0.2569, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 0.00015643564356435644, | |
| "loss": 0.2208, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 0.0001551155115511551, | |
| "loss": 0.3017, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 0.0001537953795379538, | |
| "loss": 0.2288, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 0.0001524752475247525, | |
| "loss": 0.1533, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 0.00015115511551155118, | |
| "loss": 0.3527, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 0.00014983498349834985, | |
| "loss": 0.1966, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 0.0001485148514851485, | |
| "loss": 0.1962, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 0.0001471947194719472, | |
| "loss": 0.2837, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "eval_accuracy": 0.9026178010471204, | |
| "eval_loss": 0.40083953738212585, | |
| "eval_runtime": 14.9253, | |
| "eval_samples_per_second": 63.985, | |
| "eval_steps_per_second": 8.04, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 0.00014587458745874587, | |
| "loss": 0.1951, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 0.00014455445544554456, | |
| "loss": 0.2244, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 0.00014323432343234325, | |
| "loss": 0.144, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 0.00014191419141914194, | |
| "loss": 0.1707, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 0.0001405940594059406, | |
| "loss": 0.1881, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 0.00013927392739273927, | |
| "loss": 0.2441, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 0.00013795379537953796, | |
| "loss": 0.2128, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 0.00013663366336633665, | |
| "loss": 0.2773, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 0.00013531353135313532, | |
| "loss": 0.2554, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 0.000133993399339934, | |
| "loss": 0.1645, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "eval_accuracy": 0.9057591623036649, | |
| "eval_loss": 0.3701848089694977, | |
| "eval_runtime": 14.4771, | |
| "eval_samples_per_second": 65.966, | |
| "eval_steps_per_second": 8.289, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 0.0001326732673267327, | |
| "loss": 0.2479, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 0.00013135313531353136, | |
| "loss": 0.1427, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 0.00013003300330033003, | |
| "loss": 0.234, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 0.00012871287128712872, | |
| "loss": 0.2, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 0.0001273927392739274, | |
| "loss": 0.3339, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 0.00012607260726072607, | |
| "loss": 0.1089, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 0.00012475247524752477, | |
| "loss": 0.2647, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 0.00012343234323432346, | |
| "loss": 0.1648, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 0.00012211221122112212, | |
| "loss": 0.1728, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 0.0001207920792079208, | |
| "loss": 0.1604, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "eval_accuracy": 0.8921465968586387, | |
| "eval_loss": 0.3981299102306366, | |
| "eval_runtime": 14.3841, | |
| "eval_samples_per_second": 66.393, | |
| "eval_steps_per_second": 8.343, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 0.00011947194719471948, | |
| "loss": 0.0877, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 0.00011815181518151817, | |
| "loss": 0.119, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 0.00011683168316831683, | |
| "loss": 0.0758, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 0.00011551155115511551, | |
| "loss": 0.1496, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 0.0001141914191419142, | |
| "loss": 0.0617, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 0.00011287128712871287, | |
| "loss": 0.0627, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "learning_rate": 0.00011155115511551156, | |
| "loss": 0.0948, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 0.00011023102310231023, | |
| "loss": 0.0987, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 0.00010891089108910893, | |
| "loss": 0.0473, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 0.00010759075907590759, | |
| "loss": 0.0498, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "eval_accuracy": 0.9204188481675393, | |
| "eval_loss": 0.31845271587371826, | |
| "eval_runtime": 14.4624, | |
| "eval_samples_per_second": 66.033, | |
| "eval_steps_per_second": 8.297, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 0.00010627062706270627, | |
| "loss": 0.0428, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 0.00010495049504950496, | |
| "loss": 0.0848, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 0.00010363036303630365, | |
| "loss": 0.0512, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 0.00010231023102310232, | |
| "loss": 0.043, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 0.00010099009900990099, | |
| "loss": 0.0372, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 9.966996699669967e-05, | |
| "loss": 0.1442, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 9.834983498349836e-05, | |
| "loss": 0.0426, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "learning_rate": 9.702970297029703e-05, | |
| "loss": 0.0511, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 9.570957095709572e-05, | |
| "loss": 0.0634, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 9.43894389438944e-05, | |
| "loss": 0.0406, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "eval_accuracy": 0.9141361256544502, | |
| "eval_loss": 0.3426617980003357, | |
| "eval_runtime": 14.6543, | |
| "eval_samples_per_second": 65.168, | |
| "eval_steps_per_second": 8.189, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 9.306930693069307e-05, | |
| "loss": 0.1104, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 9.174917491749175e-05, | |
| "loss": 0.0778, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 9.042904290429043e-05, | |
| "loss": 0.1189, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 8.910891089108912e-05, | |
| "loss": 0.0873, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "learning_rate": 8.778877887788778e-05, | |
| "loss": 0.0414, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 8.646864686468648e-05, | |
| "loss": 0.0723, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 8.514851485148515e-05, | |
| "loss": 0.0495, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 8.382838283828383e-05, | |
| "loss": 0.0698, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 8.250825082508251e-05, | |
| "loss": 0.0636, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 8.11881188118812e-05, | |
| "loss": 0.1049, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "eval_accuracy": 0.9172774869109948, | |
| "eval_loss": 0.344361275434494, | |
| "eval_runtime": 14.443, | |
| "eval_samples_per_second": 66.122, | |
| "eval_steps_per_second": 8.309, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 7.986798679867988e-05, | |
| "loss": 0.0148, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "learning_rate": 7.854785478547854e-05, | |
| "loss": 0.0134, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 3.07, | |
| "learning_rate": 7.722772277227723e-05, | |
| "loss": 0.0263, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 3.1, | |
| "learning_rate": 7.590759075907591e-05, | |
| "loss": 0.0453, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 3.14, | |
| "learning_rate": 7.458745874587459e-05, | |
| "loss": 0.0156, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 3.17, | |
| "learning_rate": 7.326732673267327e-05, | |
| "loss": 0.0125, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "learning_rate": 7.194719471947196e-05, | |
| "loss": 0.0118, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 3.23, | |
| "learning_rate": 7.062706270627064e-05, | |
| "loss": 0.0116, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 3.27, | |
| "learning_rate": 6.93069306930693e-05, | |
| "loss": 0.011, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 3.3, | |
| "learning_rate": 6.798679867986799e-05, | |
| "loss": 0.0272, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 3.3, | |
| "eval_accuracy": 0.9246073298429319, | |
| "eval_loss": 0.31677740812301636, | |
| "eval_runtime": 14.7709, | |
| "eval_samples_per_second": 64.654, | |
| "eval_steps_per_second": 8.124, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 3.33, | |
| "learning_rate": 6.666666666666667e-05, | |
| "loss": 0.0113, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 3.37, | |
| "learning_rate": 6.534653465346535e-05, | |
| "loss": 0.0287, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 3.4, | |
| "learning_rate": 6.402640264026403e-05, | |
| "loss": 0.0326, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 3.43, | |
| "learning_rate": 6.270627062706272e-05, | |
| "loss": 0.0157, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 3.47, | |
| "learning_rate": 6.13861386138614e-05, | |
| "loss": 0.0135, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 3.5, | |
| "learning_rate": 6.006600660066007e-05, | |
| "loss": 0.0154, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 3.53, | |
| "learning_rate": 5.874587458745875e-05, | |
| "loss": 0.0357, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 3.56, | |
| "learning_rate": 5.742574257425742e-05, | |
| "loss": 0.0161, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "learning_rate": 5.610561056105611e-05, | |
| "loss": 0.04, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 3.63, | |
| "learning_rate": 5.4785478547854784e-05, | |
| "loss": 0.0186, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 3.63, | |
| "eval_accuracy": 0.9287958115183246, | |
| "eval_loss": 0.3141845762729645, | |
| "eval_runtime": 14.3909, | |
| "eval_samples_per_second": 66.361, | |
| "eval_steps_per_second": 8.339, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 3.66, | |
| "learning_rate": 5.346534653465347e-05, | |
| "loss": 0.0119, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 3.7, | |
| "learning_rate": 5.2145214521452146e-05, | |
| "loss": 0.0089, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 3.73, | |
| "learning_rate": 5.082508250825083e-05, | |
| "loss": 0.0097, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 3.76, | |
| "learning_rate": 4.950495049504951e-05, | |
| "loss": 0.0561, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 3.8, | |
| "learning_rate": 4.8184818481848186e-05, | |
| "loss": 0.0457, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 3.83, | |
| "learning_rate": 4.686468646864687e-05, | |
| "loss": 0.0129, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 3.86, | |
| "learning_rate": 4.554455445544555e-05, | |
| "loss": 0.0657, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 3.89, | |
| "learning_rate": 4.4224422442244226e-05, | |
| "loss": 0.0081, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 3.93, | |
| "learning_rate": 4.2904290429042904e-05, | |
| "loss": 0.0084, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 3.96, | |
| "learning_rate": 4.158415841584158e-05, | |
| "loss": 0.0203, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 3.96, | |
| "eval_accuracy": 0.9298429319371728, | |
| "eval_loss": 0.29309454560279846, | |
| "eval_runtime": 14.4376, | |
| "eval_samples_per_second": 66.147, | |
| "eval_steps_per_second": 8.312, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 3.99, | |
| "learning_rate": 4.0264026402640266e-05, | |
| "loss": 0.0459, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 4.03, | |
| "learning_rate": 3.8943894389438944e-05, | |
| "loss": 0.008, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 4.06, | |
| "learning_rate": 3.762376237623763e-05, | |
| "loss": 0.0078, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 4.09, | |
| "learning_rate": 3.6303630363036307e-05, | |
| "loss": 0.0079, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 4.13, | |
| "learning_rate": 3.4983498349834984e-05, | |
| "loss": 0.0087, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 4.16, | |
| "learning_rate": 3.366336633663367e-05, | |
| "loss": 0.008, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 4.19, | |
| "learning_rate": 3.234323432343234e-05, | |
| "loss": 0.0074, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 4.22, | |
| "learning_rate": 3.1023102310231024e-05, | |
| "loss": 0.0078, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 4.26, | |
| "learning_rate": 2.9702970297029702e-05, | |
| "loss": 0.0076, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 4.29, | |
| "learning_rate": 2.8382838283828383e-05, | |
| "loss": 0.007, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 4.29, | |
| "eval_accuracy": 0.9392670157068063, | |
| "eval_loss": 0.27537932991981506, | |
| "eval_runtime": 14.7619, | |
| "eval_samples_per_second": 64.694, | |
| "eval_steps_per_second": 8.129, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 4.32, | |
| "learning_rate": 2.7062706270627065e-05, | |
| "loss": 0.0073, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 4.36, | |
| "learning_rate": 2.5742574257425746e-05, | |
| "loss": 0.007, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 4.39, | |
| "learning_rate": 2.4422442244224424e-05, | |
| "loss": 0.0071, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 4.42, | |
| "learning_rate": 2.31023102310231e-05, | |
| "loss": 0.0068, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 4.46, | |
| "learning_rate": 2.1782178217821783e-05, | |
| "loss": 0.007, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 4.49, | |
| "learning_rate": 2.0462046204620464e-05, | |
| "loss": 0.0068, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 4.52, | |
| "learning_rate": 1.9141914191419145e-05, | |
| "loss": 0.0083, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 4.55, | |
| "learning_rate": 1.7821782178217823e-05, | |
| "loss": 0.0167, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 4.59, | |
| "learning_rate": 1.6501650165016504e-05, | |
| "loss": 0.0069, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 4.62, | |
| "learning_rate": 1.5181518151815183e-05, | |
| "loss": 0.0072, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 4.62, | |
| "eval_accuracy": 0.9403141361256544, | |
| "eval_loss": 0.27776801586151123, | |
| "eval_runtime": 14.4524, | |
| "eval_samples_per_second": 66.079, | |
| "eval_steps_per_second": 8.303, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 4.65, | |
| "learning_rate": 1.3861386138613863e-05, | |
| "loss": 0.0069, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 4.69, | |
| "learning_rate": 1.254125412541254e-05, | |
| "loss": 0.0067, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 4.72, | |
| "learning_rate": 1.1221122112211222e-05, | |
| "loss": 0.0066, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 4.75, | |
| "learning_rate": 9.900990099009901e-06, | |
| "loss": 0.0065, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 4.79, | |
| "learning_rate": 8.58085808580858e-06, | |
| "loss": 0.0342, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 4.82, | |
| "learning_rate": 7.260726072607261e-06, | |
| "loss": 0.0068, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 4.85, | |
| "learning_rate": 5.940594059405941e-06, | |
| "loss": 0.0075, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 4.88, | |
| "learning_rate": 4.62046204620462e-06, | |
| "loss": 0.0067, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 4.92, | |
| "learning_rate": 3.3003300330033e-06, | |
| "loss": 0.0066, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 4.95, | |
| "learning_rate": 1.9801980198019803e-06, | |
| "loss": 0.0073, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 4.95, | |
| "eval_accuracy": 0.9392670157068063, | |
| "eval_loss": 0.2782096564769745, | |
| "eval_runtime": 14.5029, | |
| "eval_samples_per_second": 65.849, | |
| "eval_steps_per_second": 8.274, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 4.98, | |
| "learning_rate": 6.600660066006602e-07, | |
| "loss": 0.0408, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "step": 1515, | |
| "total_flos": 1.874833643725701e+18, | |
| "train_loss": 0.2613857026712926, | |
| "train_runtime": 1218.6852, | |
| "train_samples_per_second": 19.849, | |
| "train_steps_per_second": 1.243 | |
| } | |
| ], | |
| "max_steps": 1515, | |
| "num_train_epochs": 5, | |
| "total_flos": 1.874833643725701e+18, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |