| { | |
| "best_metric": 0.5942012071609497, | |
| "best_model_checkpoint": "Hierarchical_Agent_Action/checkpoint-3100", | |
| "epoch": 30.0, | |
| "eval_steps": 100, | |
| "global_step": 3720, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 9.946236559139786e-05, | |
| "loss": 3.4378, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 9.892473118279571e-05, | |
| "loss": 3.1863, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 9.838709677419355e-05, | |
| "loss": 2.9075, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 9.78494623655914e-05, | |
| "loss": 2.6661, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 9.731182795698925e-05, | |
| "loss": 2.4407, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "eval_accuracy": 0.6057553956834533, | |
| "eval_loss": 2.271561622619629, | |
| "eval_runtime": 12.2022, | |
| "eval_samples_per_second": 56.957, | |
| "eval_steps_per_second": 7.13, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 9.677419354838711e-05, | |
| "loss": 2.2877, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 9.626344086021506e-05, | |
| "loss": 2.0861, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 9.572580645161291e-05, | |
| "loss": 2.0179, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 9.518817204301076e-05, | |
| "loss": 1.8839, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 9.465053763440861e-05, | |
| "loss": 1.7756, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "eval_accuracy": 0.7064748201438849, | |
| "eval_loss": 1.6162313222885132, | |
| "eval_runtime": 13.1922, | |
| "eval_samples_per_second": 52.683, | |
| "eval_steps_per_second": 6.595, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 9.411290322580645e-05, | |
| "loss": 1.7271, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 9.357526881720431e-05, | |
| "loss": 1.5882, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 9.303763440860216e-05, | |
| "loss": 1.4613, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 9.250000000000001e-05, | |
| "loss": 1.4197, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 9.196236559139786e-05, | |
| "loss": 1.3948, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "eval_accuracy": 0.7697841726618705, | |
| "eval_loss": 1.2199510335922241, | |
| "eval_runtime": 12.2033, | |
| "eval_samples_per_second": 56.952, | |
| "eval_steps_per_second": 7.129, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 9.14247311827957e-05, | |
| "loss": 1.3314, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 9.088709677419354e-05, | |
| "loss": 1.3175, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 9.03494623655914e-05, | |
| "loss": 1.2503, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 3.06, | |
| "learning_rate": 8.983870967741936e-05, | |
| "loss": 1.1104, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 3.23, | |
| "learning_rate": 8.930107526881721e-05, | |
| "loss": 1.131, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 3.23, | |
| "eval_accuracy": 0.7856115107913669, | |
| "eval_loss": 1.0011802911758423, | |
| "eval_runtime": 12.1771, | |
| "eval_samples_per_second": 57.074, | |
| "eval_steps_per_second": 7.145, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 3.39, | |
| "learning_rate": 8.876344086021506e-05, | |
| "loss": 1.1084, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 3.55, | |
| "learning_rate": 8.82258064516129e-05, | |
| "loss": 0.9978, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 3.71, | |
| "learning_rate": 8.768817204301076e-05, | |
| "loss": 1.0633, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 3.87, | |
| "learning_rate": 8.715053763440861e-05, | |
| "loss": 0.9801, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 4.03, | |
| "learning_rate": 8.661290322580645e-05, | |
| "loss": 0.9239, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 4.03, | |
| "eval_accuracy": 0.7827338129496403, | |
| "eval_loss": 0.9055307507514954, | |
| "eval_runtime": 12.1524, | |
| "eval_samples_per_second": 57.19, | |
| "eval_steps_per_second": 7.159, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 4.19, | |
| "learning_rate": 8.60752688172043e-05, | |
| "loss": 0.8721, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 4.35, | |
| "learning_rate": 8.553763440860215e-05, | |
| "loss": 0.8904, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 4.52, | |
| "learning_rate": 8.5e-05, | |
| "loss": 0.8903, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 4.68, | |
| "learning_rate": 8.446236559139785e-05, | |
| "loss": 0.8564, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 4.84, | |
| "learning_rate": 8.39247311827957e-05, | |
| "loss": 0.8699, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 4.84, | |
| "eval_accuracy": 0.7827338129496403, | |
| "eval_loss": 0.8103253841400146, | |
| "eval_runtime": 11.9742, | |
| "eval_samples_per_second": 58.041, | |
| "eval_steps_per_second": 7.266, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "learning_rate": 8.338709677419355e-05, | |
| "loss": 0.8503, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 5.16, | |
| "learning_rate": 8.28494623655914e-05, | |
| "loss": 0.7251, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 5.32, | |
| "learning_rate": 8.231182795698926e-05, | |
| "loss": 0.6939, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 5.48, | |
| "learning_rate": 8.17741935483871e-05, | |
| "loss": 0.7263, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 5.65, | |
| "learning_rate": 8.123655913978495e-05, | |
| "loss": 0.6707, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 5.65, | |
| "eval_accuracy": 0.7841726618705036, | |
| "eval_loss": 0.7609687447547913, | |
| "eval_runtime": 12.2276, | |
| "eval_samples_per_second": 56.839, | |
| "eval_steps_per_second": 7.115, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 5.81, | |
| "learning_rate": 8.06989247311828e-05, | |
| "loss": 0.6822, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 5.97, | |
| "learning_rate": 8.016129032258065e-05, | |
| "loss": 0.716, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 6.13, | |
| "learning_rate": 7.96236559139785e-05, | |
| "loss": 0.6162, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 6.29, | |
| "learning_rate": 7.908602150537635e-05, | |
| "loss": 0.6409, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 6.45, | |
| "learning_rate": 7.85483870967742e-05, | |
| "loss": 0.6206, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 6.45, | |
| "eval_accuracy": 0.7884892086330936, | |
| "eval_loss": 0.7312321066856384, | |
| "eval_runtime": 12.0325, | |
| "eval_samples_per_second": 57.76, | |
| "eval_steps_per_second": 7.23, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 6.61, | |
| "learning_rate": 7.801075268817205e-05, | |
| "loss": 0.6156, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 6.77, | |
| "learning_rate": 7.74731182795699e-05, | |
| "loss": 0.5887, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 6.94, | |
| "learning_rate": 7.693548387096776e-05, | |
| "loss": 0.549, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 7.1, | |
| "learning_rate": 7.63978494623656e-05, | |
| "loss": 0.581, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 7.26, | |
| "learning_rate": 7.586021505376343e-05, | |
| "loss": 0.5795, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 7.26, | |
| "eval_accuracy": 0.8100719424460432, | |
| "eval_loss": 0.6988666653633118, | |
| "eval_runtime": 12.1203, | |
| "eval_samples_per_second": 57.342, | |
| "eval_steps_per_second": 7.178, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 7.42, | |
| "learning_rate": 7.532258064516129e-05, | |
| "loss": 0.4974, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 7.58, | |
| "learning_rate": 7.478494623655914e-05, | |
| "loss": 0.5397, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 7.74, | |
| "learning_rate": 7.424731182795699e-05, | |
| "loss": 0.6128, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 7.9, | |
| "learning_rate": 7.370967741935485e-05, | |
| "loss": 0.5246, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 8.06, | |
| "learning_rate": 7.317204301075268e-05, | |
| "loss": 0.4914, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 8.06, | |
| "eval_accuracy": 0.781294964028777, | |
| "eval_loss": 0.7066284418106079, | |
| "eval_runtime": 12.169, | |
| "eval_samples_per_second": 57.112, | |
| "eval_steps_per_second": 7.149, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 8.23, | |
| "learning_rate": 7.263440860215054e-05, | |
| "loss": 0.4653, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 8.39, | |
| "learning_rate": 7.209677419354839e-05, | |
| "loss": 0.486, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 8.55, | |
| "learning_rate": 7.155913978494624e-05, | |
| "loss": 0.4781, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 8.71, | |
| "learning_rate": 7.102150537634408e-05, | |
| "loss": 0.5277, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 8.87, | |
| "learning_rate": 7.048387096774193e-05, | |
| "loss": 0.5087, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 8.87, | |
| "eval_accuracy": 0.818705035971223, | |
| "eval_loss": 0.6398155689239502, | |
| "eval_runtime": 12.2159, | |
| "eval_samples_per_second": 56.893, | |
| "eval_steps_per_second": 7.122, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 9.03, | |
| "learning_rate": 6.994623655913979e-05, | |
| "loss": 0.4671, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 9.19, | |
| "learning_rate": 6.940860215053764e-05, | |
| "loss": 0.4841, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 9.35, | |
| "learning_rate": 6.887096774193549e-05, | |
| "loss": 0.4619, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 9.52, | |
| "learning_rate": 6.833333333333333e-05, | |
| "loss": 0.479, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 9.68, | |
| "learning_rate": 6.779569892473118e-05, | |
| "loss": 0.4373, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 9.68, | |
| "eval_accuracy": 0.8043165467625899, | |
| "eval_loss": 0.6293413043022156, | |
| "eval_runtime": 12.0132, | |
| "eval_samples_per_second": 57.853, | |
| "eval_steps_per_second": 7.242, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 9.84, | |
| "learning_rate": 6.725806451612904e-05, | |
| "loss": 0.4989, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "learning_rate": 6.672043010752689e-05, | |
| "loss": 0.4829, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 10.16, | |
| "learning_rate": 6.618279569892474e-05, | |
| "loss": 0.458, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 10.32, | |
| "learning_rate": 6.564516129032258e-05, | |
| "loss": 0.4135, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 10.48, | |
| "learning_rate": 6.510752688172043e-05, | |
| "loss": 0.4365, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 10.48, | |
| "eval_accuracy": 0.7971223021582734, | |
| "eval_loss": 0.672641932964325, | |
| "eval_runtime": 12.1208, | |
| "eval_samples_per_second": 57.339, | |
| "eval_steps_per_second": 7.178, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 10.65, | |
| "learning_rate": 6.456989247311829e-05, | |
| "loss": 0.4852, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 10.81, | |
| "learning_rate": 6.403225806451614e-05, | |
| "loss": 0.3868, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 10.97, | |
| "learning_rate": 6.349462365591398e-05, | |
| "loss": 0.4663, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 11.13, | |
| "learning_rate": 6.295698924731183e-05, | |
| "loss": 0.396, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 11.29, | |
| "learning_rate": 6.241935483870967e-05, | |
| "loss": 0.4517, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 11.29, | |
| "eval_accuracy": 0.8244604316546763, | |
| "eval_loss": 0.6046626567840576, | |
| "eval_runtime": 12.0781, | |
| "eval_samples_per_second": 57.542, | |
| "eval_steps_per_second": 7.203, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 11.45, | |
| "learning_rate": 6.188172043010752e-05, | |
| "loss": 0.4272, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 11.61, | |
| "learning_rate": 6.134408602150538e-05, | |
| "loss": 0.3438, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 11.77, | |
| "learning_rate": 6.080645161290322e-05, | |
| "loss": 0.3741, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 11.94, | |
| "learning_rate": 6.0268817204301075e-05, | |
| "loss": 0.3633, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 12.1, | |
| "learning_rate": 5.973118279569893e-05, | |
| "loss": 0.4114, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 12.1, | |
| "eval_accuracy": 0.823021582733813, | |
| "eval_loss": 0.6088372468948364, | |
| "eval_runtime": 12.1467, | |
| "eval_samples_per_second": 57.217, | |
| "eval_steps_per_second": 7.162, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 12.26, | |
| "learning_rate": 5.9193548387096774e-05, | |
| "loss": 0.3284, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 12.42, | |
| "learning_rate": 5.8655913978494627e-05, | |
| "loss": 0.3329, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 12.58, | |
| "learning_rate": 5.811827956989247e-05, | |
| "loss": 0.3656, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 12.74, | |
| "learning_rate": 5.7580645161290325e-05, | |
| "loss": 0.3581, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 12.9, | |
| "learning_rate": 5.704301075268818e-05, | |
| "loss": 0.426, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 12.9, | |
| "eval_accuracy": 0.8201438848920863, | |
| "eval_loss": 0.6164522171020508, | |
| "eval_runtime": 12.2797, | |
| "eval_samples_per_second": 56.597, | |
| "eval_steps_per_second": 7.085, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 13.06, | |
| "learning_rate": 5.6505376344086024e-05, | |
| "loss": 0.4441, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 13.23, | |
| "learning_rate": 5.599462365591398e-05, | |
| "loss": 0.3141, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 13.39, | |
| "learning_rate": 5.545698924731183e-05, | |
| "loss": 0.3072, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 13.55, | |
| "learning_rate": 5.491935483870968e-05, | |
| "loss": 0.3348, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 13.71, | |
| "learning_rate": 5.438172043010753e-05, | |
| "loss": 0.3456, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 13.71, | |
| "eval_accuracy": 0.8258992805755395, | |
| "eval_loss": 0.6133091449737549, | |
| "eval_runtime": 12.0446, | |
| "eval_samples_per_second": 57.702, | |
| "eval_steps_per_second": 7.223, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 13.87, | |
| "learning_rate": 5.384408602150538e-05, | |
| "loss": 0.3338, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 14.03, | |
| "learning_rate": 5.330645161290323e-05, | |
| "loss": 0.3473, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 14.19, | |
| "learning_rate": 5.276881720430108e-05, | |
| "loss": 0.3563, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 14.35, | |
| "learning_rate": 5.223118279569893e-05, | |
| "loss": 0.3086, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 14.52, | |
| "learning_rate": 5.169354838709678e-05, | |
| "loss": 0.332, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 14.52, | |
| "eval_accuracy": 0.8201438848920863, | |
| "eval_loss": 0.6735997200012207, | |
| "eval_runtime": 11.991, | |
| "eval_samples_per_second": 57.96, | |
| "eval_steps_per_second": 7.255, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 14.68, | |
| "learning_rate": 5.115591397849463e-05, | |
| "loss": 0.3146, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 14.84, | |
| "learning_rate": 5.061827956989248e-05, | |
| "loss": 0.3475, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "learning_rate": 5.008064516129033e-05, | |
| "loss": 0.3397, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 15.16, | |
| "learning_rate": 4.954301075268817e-05, | |
| "loss": 0.2829, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 15.32, | |
| "learning_rate": 4.9005376344086024e-05, | |
| "loss": 0.3646, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 15.32, | |
| "eval_accuracy": 0.8172661870503597, | |
| "eval_loss": 0.6405801177024841, | |
| "eval_runtime": 12.0646, | |
| "eval_samples_per_second": 57.607, | |
| "eval_steps_per_second": 7.211, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 15.48, | |
| "learning_rate": 4.846774193548387e-05, | |
| "loss": 0.2571, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 15.65, | |
| "learning_rate": 4.793010752688172e-05, | |
| "loss": 0.3495, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 15.81, | |
| "learning_rate": 4.7392473118279576e-05, | |
| "loss": 0.2647, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 15.97, | |
| "learning_rate": 4.685483870967742e-05, | |
| "loss": 0.3063, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 16.13, | |
| "learning_rate": 4.6317204301075275e-05, | |
| "loss": 0.3287, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 16.13, | |
| "eval_accuracy": 0.7971223021582734, | |
| "eval_loss": 0.6978276968002319, | |
| "eval_runtime": 12.1198, | |
| "eval_samples_per_second": 57.344, | |
| "eval_steps_per_second": 7.178, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 16.29, | |
| "learning_rate": 4.577956989247312e-05, | |
| "loss": 0.2456, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 16.45, | |
| "learning_rate": 4.5241935483870966e-05, | |
| "loss": 0.3242, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 16.61, | |
| "learning_rate": 4.470430107526882e-05, | |
| "loss": 0.3186, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 16.77, | |
| "learning_rate": 4.4166666666666665e-05, | |
| "loss": 0.3462, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 16.94, | |
| "learning_rate": 4.362903225806452e-05, | |
| "loss": 0.2793, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 16.94, | |
| "eval_accuracy": 0.8172661870503597, | |
| "eval_loss": 0.6432561278343201, | |
| "eval_runtime": 12.0081, | |
| "eval_samples_per_second": 57.878, | |
| "eval_steps_per_second": 7.245, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 17.1, | |
| "learning_rate": 4.309139784946237e-05, | |
| "loss": 0.2849, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 17.26, | |
| "learning_rate": 4.2553763440860216e-05, | |
| "loss": 0.3371, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 17.42, | |
| "learning_rate": 4.201612903225807e-05, | |
| "loss": 0.2561, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 17.58, | |
| "learning_rate": 4.1478494623655915e-05, | |
| "loss": 0.2628, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 17.74, | |
| "learning_rate": 4.094086021505377e-05, | |
| "loss": 0.2924, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 17.74, | |
| "eval_accuracy": 0.814388489208633, | |
| "eval_loss": 0.6474089622497559, | |
| "eval_runtime": 12.255, | |
| "eval_samples_per_second": 56.711, | |
| "eval_steps_per_second": 7.099, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 17.9, | |
| "learning_rate": 4.0403225806451614e-05, | |
| "loss": 0.2935, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 18.06, | |
| "learning_rate": 3.9865591397849466e-05, | |
| "loss": 0.5124, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 18.23, | |
| "learning_rate": 3.932795698924731e-05, | |
| "loss": 0.2954, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 18.39, | |
| "learning_rate": 3.879032258064516e-05, | |
| "loss": 0.2584, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 18.55, | |
| "learning_rate": 3.825268817204301e-05, | |
| "loss": 0.2605, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 18.55, | |
| "eval_accuracy": 0.8287769784172662, | |
| "eval_loss": 0.627909243106842, | |
| "eval_runtime": 12.1775, | |
| "eval_samples_per_second": 57.072, | |
| "eval_steps_per_second": 7.144, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 18.71, | |
| "learning_rate": 3.7715053763440864e-05, | |
| "loss": 0.312, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 18.87, | |
| "learning_rate": 3.717741935483871e-05, | |
| "loss": 0.2679, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 19.03, | |
| "learning_rate": 3.663978494623656e-05, | |
| "loss": 0.2654, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 19.19, | |
| "learning_rate": 3.610215053763441e-05, | |
| "loss": 0.2524, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 19.35, | |
| "learning_rate": 3.556451612903226e-05, | |
| "loss": 0.2016, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 19.35, | |
| "eval_accuracy": 0.8215827338129497, | |
| "eval_loss": 0.6360746026039124, | |
| "eval_runtime": 12.0929, | |
| "eval_samples_per_second": 57.472, | |
| "eval_steps_per_second": 7.194, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 19.52, | |
| "learning_rate": 3.502688172043011e-05, | |
| "loss": 0.2691, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 19.68, | |
| "learning_rate": 3.448924731182796e-05, | |
| "loss": 0.2068, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 19.84, | |
| "learning_rate": 3.395161290322581e-05, | |
| "loss": 0.3017, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "learning_rate": 3.341397849462366e-05, | |
| "loss": 0.2318, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 20.16, | |
| "learning_rate": 3.2876344086021504e-05, | |
| "loss": 0.2524, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 20.16, | |
| "eval_accuracy": 0.8258992805755395, | |
| "eval_loss": 0.6393768191337585, | |
| "eval_runtime": 12.2058, | |
| "eval_samples_per_second": 56.94, | |
| "eval_steps_per_second": 7.128, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 20.32, | |
| "learning_rate": 3.233870967741936e-05, | |
| "loss": 0.2603, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 20.48, | |
| "learning_rate": 3.18010752688172e-05, | |
| "loss": 0.207, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 20.65, | |
| "learning_rate": 3.1263440860215056e-05, | |
| "loss": 0.1927, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 20.81, | |
| "learning_rate": 3.07258064516129e-05, | |
| "loss": 0.1947, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 20.97, | |
| "learning_rate": 3.0188172043010754e-05, | |
| "loss": 0.2017, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 20.97, | |
| "eval_accuracy": 0.8158273381294964, | |
| "eval_loss": 0.6682714223861694, | |
| "eval_runtime": 12.0188, | |
| "eval_samples_per_second": 57.826, | |
| "eval_steps_per_second": 7.239, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 21.13, | |
| "learning_rate": 2.9650537634408604e-05, | |
| "loss": 0.2351, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 21.29, | |
| "learning_rate": 2.9112903225806453e-05, | |
| "loss": 0.1842, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 21.45, | |
| "learning_rate": 2.8575268817204302e-05, | |
| "loss": 0.2131, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 21.61, | |
| "learning_rate": 2.8037634408602155e-05, | |
| "loss": 0.2509, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 21.77, | |
| "learning_rate": 2.7500000000000004e-05, | |
| "loss": 0.2082, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 21.77, | |
| "eval_accuracy": 0.8345323741007195, | |
| "eval_loss": 0.6389289498329163, | |
| "eval_runtime": 12.0715, | |
| "eval_samples_per_second": 57.574, | |
| "eval_steps_per_second": 7.207, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 21.94, | |
| "learning_rate": 2.6962365591397854e-05, | |
| "loss": 0.2671, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 22.1, | |
| "learning_rate": 2.6424731182795696e-05, | |
| "loss": 0.2534, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 22.26, | |
| "learning_rate": 2.588709677419355e-05, | |
| "loss": 0.205, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 22.42, | |
| "learning_rate": 2.5349462365591398e-05, | |
| "loss": 0.1904, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 22.58, | |
| "learning_rate": 2.4811827956989248e-05, | |
| "loss": 0.2751, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 22.58, | |
| "eval_accuracy": 0.837410071942446, | |
| "eval_loss": 0.6141177415847778, | |
| "eval_runtime": 12.0253, | |
| "eval_samples_per_second": 57.795, | |
| "eval_steps_per_second": 7.235, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 22.74, | |
| "learning_rate": 2.4274193548387097e-05, | |
| "loss": 0.1863, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 22.9, | |
| "learning_rate": 2.3736559139784946e-05, | |
| "loss": 0.2385, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 23.06, | |
| "learning_rate": 2.31989247311828e-05, | |
| "loss": 0.1666, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 23.23, | |
| "learning_rate": 2.266129032258065e-05, | |
| "loss": 0.2087, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 23.39, | |
| "learning_rate": 2.2123655913978494e-05, | |
| "loss": 0.207, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 23.39, | |
| "eval_accuracy": 0.8258992805755395, | |
| "eval_loss": 0.6051694750785828, | |
| "eval_runtime": 12.011, | |
| "eval_samples_per_second": 57.864, | |
| "eval_steps_per_second": 7.243, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 23.55, | |
| "learning_rate": 2.1586021505376344e-05, | |
| "loss": 0.2026, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 23.71, | |
| "learning_rate": 2.1048387096774193e-05, | |
| "loss": 0.2073, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 23.87, | |
| "learning_rate": 2.0510752688172046e-05, | |
| "loss": 0.1829, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 24.03, | |
| "learning_rate": 1.9973118279569895e-05, | |
| "loss": 0.2048, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 24.19, | |
| "learning_rate": 1.9435483870967744e-05, | |
| "loss": 0.1791, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 24.19, | |
| "eval_accuracy": 0.823021582733813, | |
| "eval_loss": 0.6331909894943237, | |
| "eval_runtime": 12.2518, | |
| "eval_samples_per_second": 56.726, | |
| "eval_steps_per_second": 7.101, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 24.35, | |
| "learning_rate": 1.889784946236559e-05, | |
| "loss": 0.2243, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 24.52, | |
| "learning_rate": 1.836021505376344e-05, | |
| "loss": 0.2148, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 24.68, | |
| "learning_rate": 1.7822580645161292e-05, | |
| "loss": 0.2568, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 24.84, | |
| "learning_rate": 1.728494623655914e-05, | |
| "loss": 0.1782, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "learning_rate": 1.674731182795699e-05, | |
| "loss": 0.1719, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_accuracy": 0.8402877697841726, | |
| "eval_loss": 0.5942012071609497, | |
| "eval_runtime": 12.1124, | |
| "eval_samples_per_second": 57.379, | |
| "eval_steps_per_second": 7.183, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 25.16, | |
| "learning_rate": 1.620967741935484e-05, | |
| "loss": 0.1881, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 25.32, | |
| "learning_rate": 1.5672043010752686e-05, | |
| "loss": 0.2375, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 25.48, | |
| "learning_rate": 1.5134408602150537e-05, | |
| "loss": 0.1779, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 25.65, | |
| "learning_rate": 1.4596774193548388e-05, | |
| "loss": 0.1753, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 25.81, | |
| "learning_rate": 1.4059139784946238e-05, | |
| "loss": 0.1685, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 25.81, | |
| "eval_accuracy": 0.8359712230215828, | |
| "eval_loss": 0.612082839012146, | |
| "eval_runtime": 12.1399, | |
| "eval_samples_per_second": 57.249, | |
| "eval_steps_per_second": 7.166, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 25.97, | |
| "learning_rate": 1.3521505376344087e-05, | |
| "loss": 0.1407, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 26.13, | |
| "learning_rate": 1.2983870967741938e-05, | |
| "loss": 0.1854, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 26.29, | |
| "learning_rate": 1.2446236559139786e-05, | |
| "loss": 0.1956, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 26.45, | |
| "learning_rate": 1.1908602150537635e-05, | |
| "loss": 0.2, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 26.61, | |
| "learning_rate": 1.1370967741935484e-05, | |
| "loss": 0.1557, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 26.61, | |
| "eval_accuracy": 0.8345323741007195, | |
| "eval_loss": 0.6236761212348938, | |
| "eval_runtime": 11.9687, | |
| "eval_samples_per_second": 58.068, | |
| "eval_steps_per_second": 7.269, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 26.77, | |
| "learning_rate": 1.0833333333333334e-05, | |
| "loss": 0.212, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 26.94, | |
| "learning_rate": 1.0295698924731183e-05, | |
| "loss": 0.1893, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 27.1, | |
| "learning_rate": 9.758064516129032e-06, | |
| "loss": 0.159, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 27.26, | |
| "learning_rate": 9.220430107526883e-06, | |
| "loss": 0.1755, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 27.42, | |
| "learning_rate": 8.68279569892473e-06, | |
| "loss": 0.1694, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 27.42, | |
| "eval_accuracy": 0.8316546762589928, | |
| "eval_loss": 0.6371967792510986, | |
| "eval_runtime": 12.2075, | |
| "eval_samples_per_second": 56.932, | |
| "eval_steps_per_second": 7.127, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 27.58, | |
| "learning_rate": 8.145161290322582e-06, | |
| "loss": 0.1552, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 27.74, | |
| "learning_rate": 7.607526881720431e-06, | |
| "loss": 0.1848, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 27.9, | |
| "learning_rate": 7.06989247311828e-06, | |
| "loss": 0.1467, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 28.06, | |
| "learning_rate": 6.532258064516129e-06, | |
| "loss": 0.1292, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 28.23, | |
| "learning_rate": 5.994623655913978e-06, | |
| "loss": 0.1927, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 28.23, | |
| "eval_accuracy": 0.8273381294964028, | |
| "eval_loss": 0.6377986073493958, | |
| "eval_runtime": 12.0256, | |
| "eval_samples_per_second": 57.793, | |
| "eval_steps_per_second": 7.235, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 28.39, | |
| "learning_rate": 5.4569892473118285e-06, | |
| "loss": 0.2078, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 28.55, | |
| "learning_rate": 4.919354838709678e-06, | |
| "loss": 0.1992, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 28.71, | |
| "learning_rate": 4.381720430107527e-06, | |
| "loss": 0.1773, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 28.87, | |
| "learning_rate": 3.8440860215053765e-06, | |
| "loss": 0.1334, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 29.03, | |
| "learning_rate": 3.3064516129032262e-06, | |
| "loss": 0.1375, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 29.03, | |
| "eval_accuracy": 0.8330935251798561, | |
| "eval_loss": 0.6257502436637878, | |
| "eval_runtime": 12.1079, | |
| "eval_samples_per_second": 57.4, | |
| "eval_steps_per_second": 7.185, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 29.19, | |
| "learning_rate": 2.768817204301075e-06, | |
| "loss": 0.1842, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 29.35, | |
| "learning_rate": 2.231182795698925e-06, | |
| "loss": 0.169, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 29.52, | |
| "learning_rate": 1.6935483870967744e-06, | |
| "loss": 0.1623, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 29.68, | |
| "learning_rate": 1.1559139784946237e-06, | |
| "loss": 0.1658, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 29.84, | |
| "learning_rate": 6.182795698924732e-07, | |
| "loss": 0.1653, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 29.84, | |
| "eval_accuracy": 0.8330935251798561, | |
| "eval_loss": 0.626188337802887, | |
| "eval_runtime": 12.1517, | |
| "eval_samples_per_second": 57.193, | |
| "eval_steps_per_second": 7.159, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "learning_rate": 8.064516129032259e-08, | |
| "loss": 0.1634, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "step": 3720, | |
| "total_flos": 9.155203906807849e+18, | |
| "train_loss": 0.5316014153983003, | |
| "train_runtime": 4018.8325, | |
| "train_samples_per_second": 29.389, | |
| "train_steps_per_second": 0.926 | |
| } | |
| ], | |
| "logging_steps": 20, | |
| "max_steps": 3720, | |
| "num_train_epochs": 30, | |
| "save_steps": 100, | |
| "total_flos": 9.155203906807849e+18, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |