| { |
| "best_metric": 0.02033529244363308, |
| "best_model_checkpoint": "./timri-classification/checkpoint-840", |
| "epoch": 3.74468085106383, |
| "global_step": 880, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.00019787234042553193, |
| "loss": 0.9777, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 0.00019574468085106384, |
| "loss": 0.4325, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 0.00019361702127659576, |
| "loss": 0.3674, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 0.00019148936170212768, |
| "loss": 0.5014, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.17, |
| "eval_accuracy": 0.8704566635601119, |
| "eval_loss": 0.3428270220756531, |
| "eval_runtime": 16.978, |
| "eval_samples_per_second": 63.199, |
| "eval_steps_per_second": 7.951, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 0.00018936170212765957, |
| "loss": 0.1515, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 0.0001872340425531915, |
| "loss": 0.1543, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 0.0001851063829787234, |
| "loss": 0.1933, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 0.00018297872340425532, |
| "loss": 0.1244, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.34, |
| "eval_accuracy": 0.9794967381174278, |
| "eval_loss": 0.11025915294885635, |
| "eval_runtime": 17.4184, |
| "eval_samples_per_second": 61.601, |
| "eval_steps_per_second": 7.75, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 0.00018085106382978726, |
| "loss": 0.0805, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 0.00017872340425531915, |
| "loss": 0.1156, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 0.00017659574468085107, |
| "loss": 0.2888, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 0.00017446808510638298, |
| "loss": 0.1569, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.51, |
| "eval_accuracy": 0.9478098788443616, |
| "eval_loss": 0.18117330968379974, |
| "eval_runtime": 17.9321, |
| "eval_samples_per_second": 59.837, |
| "eval_steps_per_second": 7.528, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 0.0001723404255319149, |
| "loss": 0.1843, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 0.00017021276595744682, |
| "loss": 0.1066, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 0.00016808510638297873, |
| "loss": 0.1578, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 0.00016595744680851065, |
| "loss": 0.1581, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.68, |
| "eval_accuracy": 0.9561975768872321, |
| "eval_loss": 0.15296928584575653, |
| "eval_runtime": 19.3736, |
| "eval_samples_per_second": 55.385, |
| "eval_steps_per_second": 6.968, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 0.00016382978723404257, |
| "loss": 0.0961, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 0.00016170212765957446, |
| "loss": 0.0696, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 0.00015957446808510637, |
| "loss": 0.1529, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 0.00015744680851063832, |
| "loss": 0.0748, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.85, |
| "eval_accuracy": 0.9804287045666356, |
| "eval_loss": 0.08471864461898804, |
| "eval_runtime": 18.2622, |
| "eval_samples_per_second": 58.755, |
| "eval_steps_per_second": 7.392, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 0.0001553191489361702, |
| "loss": 0.2481, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 0.00015319148936170213, |
| "loss": 0.0898, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 0.00015106382978723407, |
| "loss": 0.0974, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 0.00014893617021276596, |
| "loss": 0.0794, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.02, |
| "eval_accuracy": 0.9841565703634669, |
| "eval_loss": 0.0651417151093483, |
| "eval_runtime": 18.0804, |
| "eval_samples_per_second": 59.346, |
| "eval_steps_per_second": 7.467, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.06, |
| "learning_rate": 0.00014680851063829788, |
| "loss": 0.0721, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.11, |
| "learning_rate": 0.0001446808510638298, |
| "loss": 0.0672, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.15, |
| "learning_rate": 0.0001425531914893617, |
| "loss": 0.0517, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.19, |
| "learning_rate": 0.00014042553191489363, |
| "loss": 0.0345, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.19, |
| "eval_accuracy": 0.9869524697110904, |
| "eval_loss": 0.059121981263160706, |
| "eval_runtime": 17.7664, |
| "eval_samples_per_second": 60.395, |
| "eval_steps_per_second": 7.599, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.23, |
| "learning_rate": 0.00013829787234042554, |
| "loss": 0.0883, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.28, |
| "learning_rate": 0.00013617021276595746, |
| "loss": 0.0519, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.32, |
| "learning_rate": 0.00013404255319148938, |
| "loss": 0.0269, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.36, |
| "learning_rate": 0.00013191489361702127, |
| "loss": 0.0496, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.36, |
| "eval_accuracy": 0.9794967381174278, |
| "eval_loss": 0.08983205258846283, |
| "eval_runtime": 17.7992, |
| "eval_samples_per_second": 60.283, |
| "eval_steps_per_second": 7.585, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.4, |
| "learning_rate": 0.00012978723404255318, |
| "loss": 0.0962, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.45, |
| "learning_rate": 0.00012765957446808513, |
| "loss": 0.068, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.49, |
| "learning_rate": 0.00012553191489361702, |
| "loss": 0.0592, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.53, |
| "learning_rate": 0.00012340425531914893, |
| "loss": 0.0588, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.53, |
| "eval_accuracy": 0.9683131407269339, |
| "eval_loss": 0.09058272838592529, |
| "eval_runtime": 18.1011, |
| "eval_samples_per_second": 59.278, |
| "eval_steps_per_second": 7.458, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.57, |
| "learning_rate": 0.00012127659574468086, |
| "loss": 0.0188, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.62, |
| "learning_rate": 0.00011914893617021277, |
| "loss": 0.0131, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.66, |
| "learning_rate": 0.00011702127659574468, |
| "loss": 0.0488, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.7, |
| "learning_rate": 0.00011489361702127661, |
| "loss": 0.0125, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.7, |
| "eval_accuracy": 0.9925442684063374, |
| "eval_loss": 0.02937113679945469, |
| "eval_runtime": 18.1506, |
| "eval_samples_per_second": 59.117, |
| "eval_steps_per_second": 7.438, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.74, |
| "learning_rate": 0.00011276595744680852, |
| "loss": 0.0198, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.79, |
| "learning_rate": 0.00011063829787234043, |
| "loss": 0.0874, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.83, |
| "learning_rate": 0.00010851063829787234, |
| "loss": 0.011, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.87, |
| "learning_rate": 0.00010638297872340425, |
| "loss": 0.0437, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.87, |
| "eval_accuracy": 0.9664492078285182, |
| "eval_loss": 0.122951939702034, |
| "eval_runtime": 17.8792, |
| "eval_samples_per_second": 60.014, |
| "eval_steps_per_second": 7.551, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.91, |
| "learning_rate": 0.00010425531914893618, |
| "loss": 0.1603, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.96, |
| "learning_rate": 0.00010212765957446809, |
| "loss": 0.0557, |
| "step": 460 |
| }, |
| { |
| "epoch": 2.0, |
| "learning_rate": 0.0001, |
| "loss": 0.0084, |
| "step": 470 |
| }, |
| { |
| "epoch": 2.04, |
| "learning_rate": 9.787234042553192e-05, |
| "loss": 0.0223, |
| "step": 480 |
| }, |
| { |
| "epoch": 2.04, |
| "eval_accuracy": 0.9888164026095061, |
| "eval_loss": 0.04005502909421921, |
| "eval_runtime": 17.7107, |
| "eval_samples_per_second": 60.585, |
| "eval_steps_per_second": 7.623, |
| "step": 480 |
| }, |
| { |
| "epoch": 2.09, |
| "learning_rate": 9.574468085106384e-05, |
| "loss": 0.0189, |
| "step": 490 |
| }, |
| { |
| "epoch": 2.13, |
| "learning_rate": 9.361702127659576e-05, |
| "loss": 0.0086, |
| "step": 500 |
| }, |
| { |
| "epoch": 2.17, |
| "learning_rate": 9.148936170212766e-05, |
| "loss": 0.0064, |
| "step": 510 |
| }, |
| { |
| "epoch": 2.21, |
| "learning_rate": 8.936170212765958e-05, |
| "loss": 0.0056, |
| "step": 520 |
| }, |
| { |
| "epoch": 2.21, |
| "eval_accuracy": 0.994408201304753, |
| "eval_loss": 0.025717683136463165, |
| "eval_runtime": 17.4332, |
| "eval_samples_per_second": 61.549, |
| "eval_steps_per_second": 7.744, |
| "step": 520 |
| }, |
| { |
| "epoch": 2.26, |
| "learning_rate": 8.723404255319149e-05, |
| "loss": 0.0296, |
| "step": 530 |
| }, |
| { |
| "epoch": 2.3, |
| "learning_rate": 8.510638297872341e-05, |
| "loss": 0.0195, |
| "step": 540 |
| }, |
| { |
| "epoch": 2.34, |
| "learning_rate": 8.297872340425533e-05, |
| "loss": 0.0059, |
| "step": 550 |
| }, |
| { |
| "epoch": 2.38, |
| "learning_rate": 8.085106382978723e-05, |
| "loss": 0.0077, |
| "step": 560 |
| }, |
| { |
| "epoch": 2.38, |
| "eval_accuracy": 0.9860205032618826, |
| "eval_loss": 0.05676997825503349, |
| "eval_runtime": 17.6076, |
| "eval_samples_per_second": 60.94, |
| "eval_steps_per_second": 7.667, |
| "step": 560 |
| }, |
| { |
| "epoch": 2.43, |
| "learning_rate": 7.872340425531916e-05, |
| "loss": 0.0054, |
| "step": 570 |
| }, |
| { |
| "epoch": 2.47, |
| "learning_rate": 7.659574468085106e-05, |
| "loss": 0.0357, |
| "step": 580 |
| }, |
| { |
| "epoch": 2.51, |
| "learning_rate": 7.446808510638298e-05, |
| "loss": 0.005, |
| "step": 590 |
| }, |
| { |
| "epoch": 2.55, |
| "learning_rate": 7.23404255319149e-05, |
| "loss": 0.0327, |
| "step": 600 |
| }, |
| { |
| "epoch": 2.55, |
| "eval_accuracy": 0.9897483690587139, |
| "eval_loss": 0.04828759282827377, |
| "eval_runtime": 17.4529, |
| "eval_samples_per_second": 61.48, |
| "eval_steps_per_second": 7.735, |
| "step": 600 |
| }, |
| { |
| "epoch": 2.6, |
| "learning_rate": 7.021276595744681e-05, |
| "loss": 0.0161, |
| "step": 610 |
| }, |
| { |
| "epoch": 2.64, |
| "learning_rate": 6.808510638297873e-05, |
| "loss": 0.0066, |
| "step": 620 |
| }, |
| { |
| "epoch": 2.68, |
| "learning_rate": 6.595744680851063e-05, |
| "loss": 0.0049, |
| "step": 630 |
| }, |
| { |
| "epoch": 2.72, |
| "learning_rate": 6.382978723404256e-05, |
| "loss": 0.0056, |
| "step": 640 |
| }, |
| { |
| "epoch": 2.72, |
| "eval_accuracy": 0.9925442684063374, |
| "eval_loss": 0.025615280494093895, |
| "eval_runtime": 17.7515, |
| "eval_samples_per_second": 60.446, |
| "eval_steps_per_second": 7.605, |
| "step": 640 |
| }, |
| { |
| "epoch": 2.77, |
| "learning_rate": 6.170212765957447e-05, |
| "loss": 0.0046, |
| "step": 650 |
| }, |
| { |
| "epoch": 2.81, |
| "learning_rate": 5.9574468085106384e-05, |
| "loss": 0.005, |
| "step": 660 |
| }, |
| { |
| "epoch": 2.85, |
| "learning_rate": 5.744680851063831e-05, |
| "loss": 0.004, |
| "step": 670 |
| }, |
| { |
| "epoch": 2.89, |
| "learning_rate": 5.531914893617022e-05, |
| "loss": 0.0043, |
| "step": 680 |
| }, |
| { |
| "epoch": 2.89, |
| "eval_accuracy": 0.9906803355079217, |
| "eval_loss": 0.040840886533260345, |
| "eval_runtime": 18.0012, |
| "eval_samples_per_second": 59.607, |
| "eval_steps_per_second": 7.5, |
| "step": 680 |
| }, |
| { |
| "epoch": 2.94, |
| "learning_rate": 5.319148936170213e-05, |
| "loss": 0.009, |
| "step": 690 |
| }, |
| { |
| "epoch": 2.98, |
| "learning_rate": 5.1063829787234044e-05, |
| "loss": 0.0041, |
| "step": 700 |
| }, |
| { |
| "epoch": 3.02, |
| "learning_rate": 4.893617021276596e-05, |
| "loss": 0.004, |
| "step": 710 |
| }, |
| { |
| "epoch": 3.06, |
| "learning_rate": 4.680851063829788e-05, |
| "loss": 0.0037, |
| "step": 720 |
| }, |
| { |
| "epoch": 3.06, |
| "eval_accuracy": 0.9897483690587139, |
| "eval_loss": 0.049264904111623764, |
| "eval_runtime": 17.6233, |
| "eval_samples_per_second": 60.885, |
| "eval_steps_per_second": 7.66, |
| "step": 720 |
| }, |
| { |
| "epoch": 3.11, |
| "learning_rate": 4.468085106382979e-05, |
| "loss": 0.004, |
| "step": 730 |
| }, |
| { |
| "epoch": 3.15, |
| "learning_rate": 4.2553191489361704e-05, |
| "loss": 0.0051, |
| "step": 740 |
| }, |
| { |
| "epoch": 3.19, |
| "learning_rate": 4.0425531914893614e-05, |
| "loss": 0.0039, |
| "step": 750 |
| }, |
| { |
| "epoch": 3.23, |
| "learning_rate": 3.829787234042553e-05, |
| "loss": 0.0036, |
| "step": 760 |
| }, |
| { |
| "epoch": 3.23, |
| "eval_accuracy": 0.9934762348555451, |
| "eval_loss": 0.030669014900922775, |
| "eval_runtime": 17.7182, |
| "eval_samples_per_second": 60.559, |
| "eval_steps_per_second": 7.619, |
| "step": 760 |
| }, |
| { |
| "epoch": 3.28, |
| "learning_rate": 3.617021276595745e-05, |
| "loss": 0.0038, |
| "step": 770 |
| }, |
| { |
| "epoch": 3.32, |
| "learning_rate": 3.4042553191489365e-05, |
| "loss": 0.0037, |
| "step": 780 |
| }, |
| { |
| "epoch": 3.36, |
| "learning_rate": 3.191489361702128e-05, |
| "loss": 0.0173, |
| "step": 790 |
| }, |
| { |
| "epoch": 3.4, |
| "learning_rate": 2.9787234042553192e-05, |
| "loss": 0.0035, |
| "step": 800 |
| }, |
| { |
| "epoch": 3.4, |
| "eval_accuracy": 0.9953401677539608, |
| "eval_loss": 0.023747418075799942, |
| "eval_runtime": 18.3552, |
| "eval_samples_per_second": 58.458, |
| "eval_steps_per_second": 7.355, |
| "step": 800 |
| }, |
| { |
| "epoch": 3.45, |
| "learning_rate": 2.765957446808511e-05, |
| "loss": 0.0036, |
| "step": 810 |
| }, |
| { |
| "epoch": 3.49, |
| "learning_rate": 2.5531914893617022e-05, |
| "loss": 0.0032, |
| "step": 820 |
| }, |
| { |
| "epoch": 3.53, |
| "learning_rate": 2.340425531914894e-05, |
| "loss": 0.0035, |
| "step": 830 |
| }, |
| { |
| "epoch": 3.57, |
| "learning_rate": 2.1276595744680852e-05, |
| "loss": 0.0275, |
| "step": 840 |
| }, |
| { |
| "epoch": 3.57, |
| "eval_accuracy": 0.9953401677539608, |
| "eval_loss": 0.02033529244363308, |
| "eval_runtime": 18.0247, |
| "eval_samples_per_second": 59.529, |
| "eval_steps_per_second": 7.49, |
| "step": 840 |
| }, |
| { |
| "epoch": 3.62, |
| "learning_rate": 1.9148936170212766e-05, |
| "loss": 0.0035, |
| "step": 850 |
| }, |
| { |
| "epoch": 3.66, |
| "learning_rate": 1.7021276595744682e-05, |
| "loss": 0.0035, |
| "step": 860 |
| }, |
| { |
| "epoch": 3.7, |
| "learning_rate": 1.4893617021276596e-05, |
| "loss": 0.0033, |
| "step": 870 |
| }, |
| { |
| "epoch": 3.74, |
| "learning_rate": 1.2765957446808511e-05, |
| "loss": 0.0033, |
| "step": 880 |
| }, |
| { |
| "epoch": 3.74, |
| "eval_accuracy": 0.9953401677539608, |
| "eval_loss": 0.020447665825486183, |
| "eval_runtime": 17.3813, |
| "eval_samples_per_second": 61.733, |
| "eval_steps_per_second": 7.767, |
| "step": 880 |
| } |
| ], |
| "max_steps": 940, |
| "num_train_epochs": 4, |
| "total_flos": 1.0908742917765612e+18, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|