{ "best_metric": 0.5942012071609497, "best_model_checkpoint": "Hierarchical_Agent_Action/checkpoint-3100", "epoch": 30.0, "eval_steps": 100, "global_step": 3720, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.16, "learning_rate": 9.946236559139786e-05, "loss": 3.4378, "step": 20 }, { "epoch": 0.32, "learning_rate": 9.892473118279571e-05, "loss": 3.1863, "step": 40 }, { "epoch": 0.48, "learning_rate": 9.838709677419355e-05, "loss": 2.9075, "step": 60 }, { "epoch": 0.65, "learning_rate": 9.78494623655914e-05, "loss": 2.6661, "step": 80 }, { "epoch": 0.81, "learning_rate": 9.731182795698925e-05, "loss": 2.4407, "step": 100 }, { "epoch": 0.81, "eval_accuracy": 0.6057553956834533, "eval_loss": 2.271561622619629, "eval_runtime": 12.2022, "eval_samples_per_second": 56.957, "eval_steps_per_second": 7.13, "step": 100 }, { "epoch": 0.97, "learning_rate": 9.677419354838711e-05, "loss": 2.2877, "step": 120 }, { "epoch": 1.13, "learning_rate": 9.626344086021506e-05, "loss": 2.0861, "step": 140 }, { "epoch": 1.29, "learning_rate": 9.572580645161291e-05, "loss": 2.0179, "step": 160 }, { "epoch": 1.45, "learning_rate": 9.518817204301076e-05, "loss": 1.8839, "step": 180 }, { "epoch": 1.61, "learning_rate": 9.465053763440861e-05, "loss": 1.7756, "step": 200 }, { "epoch": 1.61, "eval_accuracy": 0.7064748201438849, "eval_loss": 1.6162313222885132, "eval_runtime": 13.1922, "eval_samples_per_second": 52.683, "eval_steps_per_second": 6.595, "step": 200 }, { "epoch": 1.77, "learning_rate": 9.411290322580645e-05, "loss": 1.7271, "step": 220 }, { "epoch": 1.94, "learning_rate": 9.357526881720431e-05, "loss": 1.5882, "step": 240 }, { "epoch": 2.1, "learning_rate": 9.303763440860216e-05, "loss": 1.4613, "step": 260 }, { "epoch": 2.26, "learning_rate": 9.250000000000001e-05, "loss": 1.4197, "step": 280 }, { "epoch": 2.42, "learning_rate": 9.196236559139786e-05, "loss": 1.3948, "step": 300 }, { "epoch": 2.42, "eval_accuracy": 0.7697841726618705, "eval_loss": 1.2199510335922241, "eval_runtime": 12.2033, "eval_samples_per_second": 56.952, "eval_steps_per_second": 7.129, "step": 300 }, { "epoch": 2.58, "learning_rate": 9.14247311827957e-05, "loss": 1.3314, "step": 320 }, { "epoch": 2.74, "learning_rate": 9.088709677419354e-05, "loss": 1.3175, "step": 340 }, { "epoch": 2.9, "learning_rate": 9.03494623655914e-05, "loss": 1.2503, "step": 360 }, { "epoch": 3.06, "learning_rate": 8.983870967741936e-05, "loss": 1.1104, "step": 380 }, { "epoch": 3.23, "learning_rate": 8.930107526881721e-05, "loss": 1.131, "step": 400 }, { "epoch": 3.23, "eval_accuracy": 0.7856115107913669, "eval_loss": 1.0011802911758423, "eval_runtime": 12.1771, "eval_samples_per_second": 57.074, "eval_steps_per_second": 7.145, "step": 400 }, { "epoch": 3.39, "learning_rate": 8.876344086021506e-05, "loss": 1.1084, "step": 420 }, { "epoch": 3.55, "learning_rate": 8.82258064516129e-05, "loss": 0.9978, "step": 440 }, { "epoch": 3.71, "learning_rate": 8.768817204301076e-05, "loss": 1.0633, "step": 460 }, { "epoch": 3.87, "learning_rate": 8.715053763440861e-05, "loss": 0.9801, "step": 480 }, { "epoch": 4.03, "learning_rate": 8.661290322580645e-05, "loss": 0.9239, "step": 500 }, { "epoch": 4.03, "eval_accuracy": 0.7827338129496403, "eval_loss": 0.9055307507514954, "eval_runtime": 12.1524, "eval_samples_per_second": 57.19, "eval_steps_per_second": 7.159, "step": 500 }, { "epoch": 4.19, "learning_rate": 8.60752688172043e-05, "loss": 0.8721, "step": 520 }, { "epoch": 4.35, "learning_rate": 8.553763440860215e-05, "loss": 0.8904, "step": 540 }, { "epoch": 4.52, "learning_rate": 8.5e-05, "loss": 0.8903, "step": 560 }, { "epoch": 4.68, "learning_rate": 8.446236559139785e-05, "loss": 0.8564, "step": 580 }, { "epoch": 4.84, "learning_rate": 8.39247311827957e-05, "loss": 0.8699, "step": 600 }, { "epoch": 4.84, "eval_accuracy": 0.7827338129496403, "eval_loss": 0.8103253841400146, "eval_runtime": 11.9742, "eval_samples_per_second": 58.041, "eval_steps_per_second": 7.266, "step": 600 }, { "epoch": 5.0, "learning_rate": 8.338709677419355e-05, "loss": 0.8503, "step": 620 }, { "epoch": 5.16, "learning_rate": 8.28494623655914e-05, "loss": 0.7251, "step": 640 }, { "epoch": 5.32, "learning_rate": 8.231182795698926e-05, "loss": 0.6939, "step": 660 }, { "epoch": 5.48, "learning_rate": 8.17741935483871e-05, "loss": 0.7263, "step": 680 }, { "epoch": 5.65, "learning_rate": 8.123655913978495e-05, "loss": 0.6707, "step": 700 }, { "epoch": 5.65, "eval_accuracy": 0.7841726618705036, "eval_loss": 0.7609687447547913, "eval_runtime": 12.2276, "eval_samples_per_second": 56.839, "eval_steps_per_second": 7.115, "step": 700 }, { "epoch": 5.81, "learning_rate": 8.06989247311828e-05, "loss": 0.6822, "step": 720 }, { "epoch": 5.97, "learning_rate": 8.016129032258065e-05, "loss": 0.716, "step": 740 }, { "epoch": 6.13, "learning_rate": 7.96236559139785e-05, "loss": 0.6162, "step": 760 }, { "epoch": 6.29, "learning_rate": 7.908602150537635e-05, "loss": 0.6409, "step": 780 }, { "epoch": 6.45, "learning_rate": 7.85483870967742e-05, "loss": 0.6206, "step": 800 }, { "epoch": 6.45, "eval_accuracy": 0.7884892086330936, "eval_loss": 0.7312321066856384, "eval_runtime": 12.0325, "eval_samples_per_second": 57.76, "eval_steps_per_second": 7.23, "step": 800 }, { "epoch": 6.61, "learning_rate": 7.801075268817205e-05, "loss": 0.6156, "step": 820 }, { "epoch": 6.77, "learning_rate": 7.74731182795699e-05, "loss": 0.5887, "step": 840 }, { "epoch": 6.94, "learning_rate": 7.693548387096776e-05, "loss": 0.549, "step": 860 }, { "epoch": 7.1, "learning_rate": 7.63978494623656e-05, "loss": 0.581, "step": 880 }, { "epoch": 7.26, "learning_rate": 7.586021505376343e-05, "loss": 0.5795, "step": 900 }, { "epoch": 7.26, "eval_accuracy": 0.8100719424460432, "eval_loss": 0.6988666653633118, "eval_runtime": 12.1203, "eval_samples_per_second": 57.342, "eval_steps_per_second": 7.178, "step": 900 }, { "epoch": 7.42, "learning_rate": 7.532258064516129e-05, "loss": 0.4974, "step": 920 }, { "epoch": 7.58, "learning_rate": 7.478494623655914e-05, "loss": 0.5397, "step": 940 }, { "epoch": 7.74, "learning_rate": 7.424731182795699e-05, "loss": 0.6128, "step": 960 }, { "epoch": 7.9, "learning_rate": 7.370967741935485e-05, "loss": 0.5246, "step": 980 }, { "epoch": 8.06, "learning_rate": 7.317204301075268e-05, "loss": 0.4914, "step": 1000 }, { "epoch": 8.06, "eval_accuracy": 0.781294964028777, "eval_loss": 0.7066284418106079, "eval_runtime": 12.169, "eval_samples_per_second": 57.112, "eval_steps_per_second": 7.149, "step": 1000 }, { "epoch": 8.23, "learning_rate": 7.263440860215054e-05, "loss": 0.4653, "step": 1020 }, { "epoch": 8.39, "learning_rate": 7.209677419354839e-05, "loss": 0.486, "step": 1040 }, { "epoch": 8.55, "learning_rate": 7.155913978494624e-05, "loss": 0.4781, "step": 1060 }, { "epoch": 8.71, "learning_rate": 7.102150537634408e-05, "loss": 0.5277, "step": 1080 }, { "epoch": 8.87, "learning_rate": 7.048387096774193e-05, "loss": 0.5087, "step": 1100 }, { "epoch": 8.87, "eval_accuracy": 0.818705035971223, "eval_loss": 0.6398155689239502, "eval_runtime": 12.2159, "eval_samples_per_second": 56.893, "eval_steps_per_second": 7.122, "step": 1100 }, { "epoch": 9.03, "learning_rate": 6.994623655913979e-05, "loss": 0.4671, "step": 1120 }, { "epoch": 9.19, "learning_rate": 6.940860215053764e-05, "loss": 0.4841, "step": 1140 }, { "epoch": 9.35, "learning_rate": 6.887096774193549e-05, "loss": 0.4619, "step": 1160 }, { "epoch": 9.52, "learning_rate": 6.833333333333333e-05, "loss": 0.479, "step": 1180 }, { "epoch": 9.68, "learning_rate": 6.779569892473118e-05, "loss": 0.4373, "step": 1200 }, { "epoch": 9.68, "eval_accuracy": 0.8043165467625899, "eval_loss": 0.6293413043022156, "eval_runtime": 12.0132, "eval_samples_per_second": 57.853, "eval_steps_per_second": 7.242, "step": 1200 }, { "epoch": 9.84, "learning_rate": 6.725806451612904e-05, "loss": 0.4989, "step": 1220 }, { "epoch": 10.0, "learning_rate": 6.672043010752689e-05, "loss": 0.4829, "step": 1240 }, { "epoch": 10.16, "learning_rate": 6.618279569892474e-05, "loss": 0.458, "step": 1260 }, { "epoch": 10.32, "learning_rate": 6.564516129032258e-05, "loss": 0.4135, "step": 1280 }, { "epoch": 10.48, "learning_rate": 6.510752688172043e-05, "loss": 0.4365, "step": 1300 }, { "epoch": 10.48, "eval_accuracy": 0.7971223021582734, "eval_loss": 0.672641932964325, "eval_runtime": 12.1208, "eval_samples_per_second": 57.339, "eval_steps_per_second": 7.178, "step": 1300 }, { "epoch": 10.65, "learning_rate": 6.456989247311829e-05, "loss": 0.4852, "step": 1320 }, { "epoch": 10.81, "learning_rate": 6.403225806451614e-05, "loss": 0.3868, "step": 1340 }, { "epoch": 10.97, "learning_rate": 6.349462365591398e-05, "loss": 0.4663, "step": 1360 }, { "epoch": 11.13, "learning_rate": 6.295698924731183e-05, "loss": 0.396, "step": 1380 }, { "epoch": 11.29, "learning_rate": 6.241935483870967e-05, "loss": 0.4517, "step": 1400 }, { "epoch": 11.29, "eval_accuracy": 0.8244604316546763, "eval_loss": 0.6046626567840576, "eval_runtime": 12.0781, "eval_samples_per_second": 57.542, "eval_steps_per_second": 7.203, "step": 1400 }, { "epoch": 11.45, "learning_rate": 6.188172043010752e-05, "loss": 0.4272, "step": 1420 }, { "epoch": 11.61, "learning_rate": 6.134408602150538e-05, "loss": 0.3438, "step": 1440 }, { "epoch": 11.77, "learning_rate": 6.080645161290322e-05, "loss": 0.3741, "step": 1460 }, { "epoch": 11.94, "learning_rate": 6.0268817204301075e-05, "loss": 0.3633, "step": 1480 }, { "epoch": 12.1, "learning_rate": 5.973118279569893e-05, "loss": 0.4114, "step": 1500 }, { "epoch": 12.1, "eval_accuracy": 0.823021582733813, "eval_loss": 0.6088372468948364, "eval_runtime": 12.1467, "eval_samples_per_second": 57.217, "eval_steps_per_second": 7.162, "step": 1500 }, { "epoch": 12.26, "learning_rate": 5.9193548387096774e-05, "loss": 0.3284, "step": 1520 }, { "epoch": 12.42, "learning_rate": 5.8655913978494627e-05, "loss": 0.3329, "step": 1540 }, { "epoch": 12.58, "learning_rate": 5.811827956989247e-05, "loss": 0.3656, "step": 1560 }, { "epoch": 12.74, "learning_rate": 5.7580645161290325e-05, "loss": 0.3581, "step": 1580 }, { "epoch": 12.9, "learning_rate": 5.704301075268818e-05, "loss": 0.426, "step": 1600 }, { "epoch": 12.9, "eval_accuracy": 0.8201438848920863, "eval_loss": 0.6164522171020508, "eval_runtime": 12.2797, "eval_samples_per_second": 56.597, "eval_steps_per_second": 7.085, "step": 1600 }, { "epoch": 13.06, "learning_rate": 5.6505376344086024e-05, "loss": 0.4441, "step": 1620 }, { "epoch": 13.23, "learning_rate": 5.599462365591398e-05, "loss": 0.3141, "step": 1640 }, { "epoch": 13.39, "learning_rate": 5.545698924731183e-05, "loss": 0.3072, "step": 1660 }, { "epoch": 13.55, "learning_rate": 5.491935483870968e-05, "loss": 0.3348, "step": 1680 }, { "epoch": 13.71, "learning_rate": 5.438172043010753e-05, "loss": 0.3456, "step": 1700 }, { "epoch": 13.71, "eval_accuracy": 0.8258992805755395, "eval_loss": 0.6133091449737549, "eval_runtime": 12.0446, "eval_samples_per_second": 57.702, "eval_steps_per_second": 7.223, "step": 1700 }, { "epoch": 13.87, "learning_rate": 5.384408602150538e-05, "loss": 0.3338, "step": 1720 }, { "epoch": 14.03, "learning_rate": 5.330645161290323e-05, "loss": 0.3473, "step": 1740 }, { "epoch": 14.19, "learning_rate": 5.276881720430108e-05, "loss": 0.3563, "step": 1760 }, { "epoch": 14.35, "learning_rate": 5.223118279569893e-05, "loss": 0.3086, "step": 1780 }, { "epoch": 14.52, "learning_rate": 5.169354838709678e-05, "loss": 0.332, "step": 1800 }, { "epoch": 14.52, "eval_accuracy": 0.8201438848920863, "eval_loss": 0.6735997200012207, "eval_runtime": 11.991, "eval_samples_per_second": 57.96, "eval_steps_per_second": 7.255, "step": 1800 }, { "epoch": 14.68, "learning_rate": 5.115591397849463e-05, "loss": 0.3146, "step": 1820 }, { "epoch": 14.84, "learning_rate": 5.061827956989248e-05, "loss": 0.3475, "step": 1840 }, { "epoch": 15.0, "learning_rate": 5.008064516129033e-05, "loss": 0.3397, "step": 1860 }, { "epoch": 15.16, "learning_rate": 4.954301075268817e-05, "loss": 0.2829, "step": 1880 }, { "epoch": 15.32, "learning_rate": 4.9005376344086024e-05, "loss": 0.3646, "step": 1900 }, { "epoch": 15.32, "eval_accuracy": 0.8172661870503597, "eval_loss": 0.6405801177024841, "eval_runtime": 12.0646, "eval_samples_per_second": 57.607, "eval_steps_per_second": 7.211, "step": 1900 }, { "epoch": 15.48, "learning_rate": 4.846774193548387e-05, "loss": 0.2571, "step": 1920 }, { "epoch": 15.65, "learning_rate": 4.793010752688172e-05, "loss": 0.3495, "step": 1940 }, { "epoch": 15.81, "learning_rate": 4.7392473118279576e-05, "loss": 0.2647, "step": 1960 }, { "epoch": 15.97, "learning_rate": 4.685483870967742e-05, "loss": 0.3063, "step": 1980 }, { "epoch": 16.13, "learning_rate": 4.6317204301075275e-05, "loss": 0.3287, "step": 2000 }, { "epoch": 16.13, "eval_accuracy": 0.7971223021582734, "eval_loss": 0.6978276968002319, "eval_runtime": 12.1198, "eval_samples_per_second": 57.344, "eval_steps_per_second": 7.178, "step": 2000 }, { "epoch": 16.29, "learning_rate": 4.577956989247312e-05, "loss": 0.2456, "step": 2020 }, { "epoch": 16.45, "learning_rate": 4.5241935483870966e-05, "loss": 0.3242, "step": 2040 }, { "epoch": 16.61, "learning_rate": 4.470430107526882e-05, "loss": 0.3186, "step": 2060 }, { "epoch": 16.77, "learning_rate": 4.4166666666666665e-05, "loss": 0.3462, "step": 2080 }, { "epoch": 16.94, "learning_rate": 4.362903225806452e-05, "loss": 0.2793, "step": 2100 }, { "epoch": 16.94, "eval_accuracy": 0.8172661870503597, "eval_loss": 0.6432561278343201, "eval_runtime": 12.0081, "eval_samples_per_second": 57.878, "eval_steps_per_second": 7.245, "step": 2100 }, { "epoch": 17.1, "learning_rate": 4.309139784946237e-05, "loss": 0.2849, "step": 2120 }, { "epoch": 17.26, "learning_rate": 4.2553763440860216e-05, "loss": 0.3371, "step": 2140 }, { "epoch": 17.42, "learning_rate": 4.201612903225807e-05, "loss": 0.2561, "step": 2160 }, { "epoch": 17.58, "learning_rate": 4.1478494623655915e-05, "loss": 0.2628, "step": 2180 }, { "epoch": 17.74, "learning_rate": 4.094086021505377e-05, "loss": 0.2924, "step": 2200 }, { "epoch": 17.74, "eval_accuracy": 0.814388489208633, "eval_loss": 0.6474089622497559, "eval_runtime": 12.255, "eval_samples_per_second": 56.711, "eval_steps_per_second": 7.099, "step": 2200 }, { "epoch": 17.9, "learning_rate": 4.0403225806451614e-05, "loss": 0.2935, "step": 2220 }, { "epoch": 18.06, "learning_rate": 3.9865591397849466e-05, "loss": 0.5124, "step": 2240 }, { "epoch": 18.23, "learning_rate": 3.932795698924731e-05, "loss": 0.2954, "step": 2260 }, { "epoch": 18.39, "learning_rate": 3.879032258064516e-05, "loss": 0.2584, "step": 2280 }, { "epoch": 18.55, "learning_rate": 3.825268817204301e-05, "loss": 0.2605, "step": 2300 }, { "epoch": 18.55, "eval_accuracy": 0.8287769784172662, "eval_loss": 0.627909243106842, "eval_runtime": 12.1775, "eval_samples_per_second": 57.072, "eval_steps_per_second": 7.144, "step": 2300 }, { "epoch": 18.71, "learning_rate": 3.7715053763440864e-05, "loss": 0.312, "step": 2320 }, { "epoch": 18.87, "learning_rate": 3.717741935483871e-05, "loss": 0.2679, "step": 2340 }, { "epoch": 19.03, "learning_rate": 3.663978494623656e-05, "loss": 0.2654, "step": 2360 }, { "epoch": 19.19, "learning_rate": 3.610215053763441e-05, "loss": 0.2524, "step": 2380 }, { "epoch": 19.35, "learning_rate": 3.556451612903226e-05, "loss": 0.2016, "step": 2400 }, { "epoch": 19.35, "eval_accuracy": 0.8215827338129497, "eval_loss": 0.6360746026039124, "eval_runtime": 12.0929, "eval_samples_per_second": 57.472, "eval_steps_per_second": 7.194, "step": 2400 }, { "epoch": 19.52, "learning_rate": 3.502688172043011e-05, "loss": 0.2691, "step": 2420 }, { "epoch": 19.68, "learning_rate": 3.448924731182796e-05, "loss": 0.2068, "step": 2440 }, { "epoch": 19.84, "learning_rate": 3.395161290322581e-05, "loss": 0.3017, "step": 2460 }, { "epoch": 20.0, "learning_rate": 3.341397849462366e-05, "loss": 0.2318, "step": 2480 }, { "epoch": 20.16, "learning_rate": 3.2876344086021504e-05, "loss": 0.2524, "step": 2500 }, { "epoch": 20.16, "eval_accuracy": 0.8258992805755395, "eval_loss": 0.6393768191337585, "eval_runtime": 12.2058, "eval_samples_per_second": 56.94, "eval_steps_per_second": 7.128, "step": 2500 }, { "epoch": 20.32, "learning_rate": 3.233870967741936e-05, "loss": 0.2603, "step": 2520 }, { "epoch": 20.48, "learning_rate": 3.18010752688172e-05, "loss": 0.207, "step": 2540 }, { "epoch": 20.65, "learning_rate": 3.1263440860215056e-05, "loss": 0.1927, "step": 2560 }, { "epoch": 20.81, "learning_rate": 3.07258064516129e-05, "loss": 0.1947, "step": 2580 }, { "epoch": 20.97, "learning_rate": 3.0188172043010754e-05, "loss": 0.2017, "step": 2600 }, { "epoch": 20.97, "eval_accuracy": 0.8158273381294964, "eval_loss": 0.6682714223861694, "eval_runtime": 12.0188, "eval_samples_per_second": 57.826, "eval_steps_per_second": 7.239, "step": 2600 }, { "epoch": 21.13, "learning_rate": 2.9650537634408604e-05, "loss": 0.2351, "step": 2620 }, { "epoch": 21.29, "learning_rate": 2.9112903225806453e-05, "loss": 0.1842, "step": 2640 }, { "epoch": 21.45, "learning_rate": 2.8575268817204302e-05, "loss": 0.2131, "step": 2660 }, { "epoch": 21.61, "learning_rate": 2.8037634408602155e-05, "loss": 0.2509, "step": 2680 }, { "epoch": 21.77, "learning_rate": 2.7500000000000004e-05, "loss": 0.2082, "step": 2700 }, { "epoch": 21.77, "eval_accuracy": 0.8345323741007195, "eval_loss": 0.6389289498329163, "eval_runtime": 12.0715, "eval_samples_per_second": 57.574, "eval_steps_per_second": 7.207, "step": 2700 }, { "epoch": 21.94, "learning_rate": 2.6962365591397854e-05, "loss": 0.2671, "step": 2720 }, { "epoch": 22.1, "learning_rate": 2.6424731182795696e-05, "loss": 0.2534, "step": 2740 }, { "epoch": 22.26, "learning_rate": 2.588709677419355e-05, "loss": 0.205, "step": 2760 }, { "epoch": 22.42, "learning_rate": 2.5349462365591398e-05, "loss": 0.1904, "step": 2780 }, { "epoch": 22.58, "learning_rate": 2.4811827956989248e-05, "loss": 0.2751, "step": 2800 }, { "epoch": 22.58, "eval_accuracy": 0.837410071942446, "eval_loss": 0.6141177415847778, "eval_runtime": 12.0253, "eval_samples_per_second": 57.795, "eval_steps_per_second": 7.235, "step": 2800 }, { "epoch": 22.74, "learning_rate": 2.4274193548387097e-05, "loss": 0.1863, "step": 2820 }, { "epoch": 22.9, "learning_rate": 2.3736559139784946e-05, "loss": 0.2385, "step": 2840 }, { "epoch": 23.06, "learning_rate": 2.31989247311828e-05, "loss": 0.1666, "step": 2860 }, { "epoch": 23.23, "learning_rate": 2.266129032258065e-05, "loss": 0.2087, "step": 2880 }, { "epoch": 23.39, "learning_rate": 2.2123655913978494e-05, "loss": 0.207, "step": 2900 }, { "epoch": 23.39, "eval_accuracy": 0.8258992805755395, "eval_loss": 0.6051694750785828, "eval_runtime": 12.011, "eval_samples_per_second": 57.864, "eval_steps_per_second": 7.243, "step": 2900 }, { "epoch": 23.55, "learning_rate": 2.1586021505376344e-05, "loss": 0.2026, "step": 2920 }, { "epoch": 23.71, "learning_rate": 2.1048387096774193e-05, "loss": 0.2073, "step": 2940 }, { "epoch": 23.87, "learning_rate": 2.0510752688172046e-05, "loss": 0.1829, "step": 2960 }, { "epoch": 24.03, "learning_rate": 1.9973118279569895e-05, "loss": 0.2048, "step": 2980 }, { "epoch": 24.19, "learning_rate": 1.9435483870967744e-05, "loss": 0.1791, "step": 3000 }, { "epoch": 24.19, "eval_accuracy": 0.823021582733813, "eval_loss": 0.6331909894943237, "eval_runtime": 12.2518, "eval_samples_per_second": 56.726, "eval_steps_per_second": 7.101, "step": 3000 }, { "epoch": 24.35, "learning_rate": 1.889784946236559e-05, "loss": 0.2243, "step": 3020 }, { "epoch": 24.52, "learning_rate": 1.836021505376344e-05, "loss": 0.2148, "step": 3040 }, { "epoch": 24.68, "learning_rate": 1.7822580645161292e-05, "loss": 0.2568, "step": 3060 }, { "epoch": 24.84, "learning_rate": 1.728494623655914e-05, "loss": 0.1782, "step": 3080 }, { "epoch": 25.0, "learning_rate": 1.674731182795699e-05, "loss": 0.1719, "step": 3100 }, { "epoch": 25.0, "eval_accuracy": 0.8402877697841726, "eval_loss": 0.5942012071609497, "eval_runtime": 12.1124, "eval_samples_per_second": 57.379, "eval_steps_per_second": 7.183, "step": 3100 }, { "epoch": 25.16, "learning_rate": 1.620967741935484e-05, "loss": 0.1881, "step": 3120 }, { "epoch": 25.32, "learning_rate": 1.5672043010752686e-05, "loss": 0.2375, "step": 3140 }, { "epoch": 25.48, "learning_rate": 1.5134408602150537e-05, "loss": 0.1779, "step": 3160 }, { "epoch": 25.65, "learning_rate": 1.4596774193548388e-05, "loss": 0.1753, "step": 3180 }, { "epoch": 25.81, "learning_rate": 1.4059139784946238e-05, "loss": 0.1685, "step": 3200 }, { "epoch": 25.81, "eval_accuracy": 0.8359712230215828, "eval_loss": 0.612082839012146, "eval_runtime": 12.1399, "eval_samples_per_second": 57.249, "eval_steps_per_second": 7.166, "step": 3200 }, { "epoch": 25.97, "learning_rate": 1.3521505376344087e-05, "loss": 0.1407, "step": 3220 }, { "epoch": 26.13, "learning_rate": 1.2983870967741938e-05, "loss": 0.1854, "step": 3240 }, { "epoch": 26.29, "learning_rate": 1.2446236559139786e-05, "loss": 0.1956, "step": 3260 }, { "epoch": 26.45, "learning_rate": 1.1908602150537635e-05, "loss": 0.2, "step": 3280 }, { "epoch": 26.61, "learning_rate": 1.1370967741935484e-05, "loss": 0.1557, "step": 3300 }, { "epoch": 26.61, "eval_accuracy": 0.8345323741007195, "eval_loss": 0.6236761212348938, "eval_runtime": 11.9687, "eval_samples_per_second": 58.068, "eval_steps_per_second": 7.269, "step": 3300 }, { "epoch": 26.77, "learning_rate": 1.0833333333333334e-05, "loss": 0.212, "step": 3320 }, { "epoch": 26.94, "learning_rate": 1.0295698924731183e-05, "loss": 0.1893, "step": 3340 }, { "epoch": 27.1, "learning_rate": 9.758064516129032e-06, "loss": 0.159, "step": 3360 }, { "epoch": 27.26, "learning_rate": 9.220430107526883e-06, "loss": 0.1755, "step": 3380 }, { "epoch": 27.42, "learning_rate": 8.68279569892473e-06, "loss": 0.1694, "step": 3400 }, { "epoch": 27.42, "eval_accuracy": 0.8316546762589928, "eval_loss": 0.6371967792510986, "eval_runtime": 12.2075, "eval_samples_per_second": 56.932, "eval_steps_per_second": 7.127, "step": 3400 }, { "epoch": 27.58, "learning_rate": 8.145161290322582e-06, "loss": 0.1552, "step": 3420 }, { "epoch": 27.74, "learning_rate": 7.607526881720431e-06, "loss": 0.1848, "step": 3440 }, { "epoch": 27.9, "learning_rate": 7.06989247311828e-06, "loss": 0.1467, "step": 3460 }, { "epoch": 28.06, "learning_rate": 6.532258064516129e-06, "loss": 0.1292, "step": 3480 }, { "epoch": 28.23, "learning_rate": 5.994623655913978e-06, "loss": 0.1927, "step": 3500 }, { "epoch": 28.23, "eval_accuracy": 0.8273381294964028, "eval_loss": 0.6377986073493958, "eval_runtime": 12.0256, "eval_samples_per_second": 57.793, "eval_steps_per_second": 7.235, "step": 3500 }, { "epoch": 28.39, "learning_rate": 5.4569892473118285e-06, "loss": 0.2078, "step": 3520 }, { "epoch": 28.55, "learning_rate": 4.919354838709678e-06, "loss": 0.1992, "step": 3540 }, { "epoch": 28.71, "learning_rate": 4.381720430107527e-06, "loss": 0.1773, "step": 3560 }, { "epoch": 28.87, "learning_rate": 3.8440860215053765e-06, "loss": 0.1334, "step": 3580 }, { "epoch": 29.03, "learning_rate": 3.3064516129032262e-06, "loss": 0.1375, "step": 3600 }, { "epoch": 29.03, "eval_accuracy": 0.8330935251798561, "eval_loss": 0.6257502436637878, "eval_runtime": 12.1079, "eval_samples_per_second": 57.4, "eval_steps_per_second": 7.185, "step": 3600 }, { "epoch": 29.19, "learning_rate": 2.768817204301075e-06, "loss": 0.1842, "step": 3620 }, { "epoch": 29.35, "learning_rate": 2.231182795698925e-06, "loss": 0.169, "step": 3640 }, { "epoch": 29.52, "learning_rate": 1.6935483870967744e-06, "loss": 0.1623, "step": 3660 }, { "epoch": 29.68, "learning_rate": 1.1559139784946237e-06, "loss": 0.1658, "step": 3680 }, { "epoch": 29.84, "learning_rate": 6.182795698924732e-07, "loss": 0.1653, "step": 3700 }, { "epoch": 29.84, "eval_accuracy": 0.8330935251798561, "eval_loss": 0.626188337802887, "eval_runtime": 12.1517, "eval_samples_per_second": 57.193, "eval_steps_per_second": 7.159, "step": 3700 }, { "epoch": 30.0, "learning_rate": 8.064516129032259e-08, "loss": 0.1634, "step": 3720 }, { "epoch": 30.0, "step": 3720, "total_flos": 9.155203906807849e+18, "train_loss": 0.5316014153983003, "train_runtime": 4018.8325, "train_samples_per_second": 29.389, "train_steps_per_second": 0.926 } ], "logging_steps": 20, "max_steps": 3720, "num_train_epochs": 30, "save_steps": 100, "total_flos": 9.155203906807849e+18, "trial_name": null, "trial_params": null }