| { | |
| "best_metric": 0.8123190611646329, | |
| "best_model_checkpoint": "output/roberta-large-question-classifier/checkpoint-2563", | |
| "epoch": 30.0, | |
| "eval_steps": 500, | |
| "global_step": 6990, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 1.430615164520744e-06, | |
| "loss": 2.3372, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 2.861230329041488e-06, | |
| "loss": 2.276, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 4.291845493562232e-06, | |
| "loss": 2.1988, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 5.722460658082976e-06, | |
| "loss": 1.9467, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_f1": 0.4050404697492347, | |
| "eval_loss": 1.3099409341812134, | |
| "eval_runtime": 1.3906, | |
| "eval_samples_per_second": 417.1, | |
| "eval_steps_per_second": 1.438, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 7.15307582260372e-06, | |
| "loss": 1.5551, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 8.583690987124465e-06, | |
| "loss": 1.0537, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 1.0014306151645208e-05, | |
| "loss": 0.872, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 1.1444921316165953e-05, | |
| "loss": 0.6619, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 1.2875536480686697e-05, | |
| "loss": 0.6381, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_f1": 0.7785421184302428, | |
| "eval_loss": 0.5586220622062683, | |
| "eval_runtime": 1.4464, | |
| "eval_samples_per_second": 400.997, | |
| "eval_steps_per_second": 1.383, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 1.430615164520744e-05, | |
| "loss": 0.509, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 1.5736766809728185e-05, | |
| "loss": 0.5387, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 1.716738197424893e-05, | |
| "loss": 0.5163, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "learning_rate": 1.859799713876967e-05, | |
| "loss": 0.628, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_f1": 0.7831151120797589, | |
| "eval_loss": 0.6418800354003906, | |
| "eval_runtime": 1.467, | |
| "eval_samples_per_second": 395.356, | |
| "eval_steps_per_second": 1.363, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 1.9996820855189955e-05, | |
| "loss": 0.5632, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 3.22, | |
| "learning_rate": 1.983786361468765e-05, | |
| "loss": 0.4046, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 3.43, | |
| "learning_rate": 1.9678906374185345e-05, | |
| "loss": 0.3985, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 3.65, | |
| "learning_rate": 1.951994913368304e-05, | |
| "loss": 0.4307, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 3.86, | |
| "learning_rate": 1.9360991893180737e-05, | |
| "loss": 0.4487, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_f1": 0.8093842888236766, | |
| "eval_loss": 0.5770355463027954, | |
| "eval_runtime": 1.4647, | |
| "eval_samples_per_second": 395.985, | |
| "eval_steps_per_second": 1.365, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 4.08, | |
| "learning_rate": 1.9202034652678432e-05, | |
| "loss": 0.3373, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 4.29, | |
| "learning_rate": 1.9043077412176127e-05, | |
| "loss": 0.2578, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 4.51, | |
| "learning_rate": 1.888412017167382e-05, | |
| "loss": 0.2675, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 4.72, | |
| "learning_rate": 1.8725162931171516e-05, | |
| "loss": 0.2697, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 4.94, | |
| "learning_rate": 1.8566205690669214e-05, | |
| "loss": 0.3319, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_f1": 0.7952503005676876, | |
| "eval_loss": 0.7712982296943665, | |
| "eval_runtime": 1.5475, | |
| "eval_samples_per_second": 374.79, | |
| "eval_steps_per_second": 1.292, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 5.15, | |
| "learning_rate": 1.8407248450166905e-05, | |
| "loss": 0.2049, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 5.36, | |
| "learning_rate": 1.82482912096646e-05, | |
| "loss": 0.2344, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 5.58, | |
| "learning_rate": 1.8089333969162298e-05, | |
| "loss": 0.1843, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 5.79, | |
| "learning_rate": 1.7930376728659993e-05, | |
| "loss": 0.2095, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_f1": 0.8017807103839256, | |
| "eval_loss": 0.8798965811729431, | |
| "eval_runtime": 1.4572, | |
| "eval_samples_per_second": 398.025, | |
| "eval_steps_per_second": 1.372, | |
| "step": 1398 | |
| }, | |
| { | |
| "epoch": 6.01, | |
| "learning_rate": 1.7771419488157687e-05, | |
| "loss": 0.2039, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 6.22, | |
| "learning_rate": 1.7612462247655382e-05, | |
| "loss": 0.0876, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 6.44, | |
| "learning_rate": 1.7453505007153077e-05, | |
| "loss": 0.1054, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 6.65, | |
| "learning_rate": 1.7294547766650775e-05, | |
| "loss": 0.1629, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 6.87, | |
| "learning_rate": 1.7135590526148466e-05, | |
| "loss": 0.1355, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_f1": 0.7961224122154954, | |
| "eval_loss": 1.0646474361419678, | |
| "eval_runtime": 1.4774, | |
| "eval_samples_per_second": 392.581, | |
| "eval_steps_per_second": 1.354, | |
| "step": 1631 | |
| }, | |
| { | |
| "epoch": 7.08, | |
| "learning_rate": 1.6976633285646164e-05, | |
| "loss": 0.1457, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 7.3, | |
| "learning_rate": 1.681767604514386e-05, | |
| "loss": 0.0861, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 7.51, | |
| "learning_rate": 1.6658718804641553e-05, | |
| "loss": 0.0852, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 7.73, | |
| "learning_rate": 1.6499761564139248e-05, | |
| "loss": 0.1283, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 7.94, | |
| "learning_rate": 1.6340804323636943e-05, | |
| "loss": 0.0956, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_f1": 0.7998718228606326, | |
| "eval_loss": 1.2174800634384155, | |
| "eval_runtime": 1.5358, | |
| "eval_samples_per_second": 377.658, | |
| "eval_steps_per_second": 1.302, | |
| "step": 1864 | |
| }, | |
| { | |
| "epoch": 8.15, | |
| "learning_rate": 1.6181847083134637e-05, | |
| "loss": 0.0862, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 8.37, | |
| "learning_rate": 1.6022889842632335e-05, | |
| "loss": 0.0486, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 8.58, | |
| "learning_rate": 1.5863932602130026e-05, | |
| "loss": 0.0321, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 8.8, | |
| "learning_rate": 1.5704975361627725e-05, | |
| "loss": 0.0687, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_f1": 0.789186529273271, | |
| "eval_loss": 1.3646652698516846, | |
| "eval_runtime": 1.5089, | |
| "eval_samples_per_second": 384.398, | |
| "eval_steps_per_second": 1.326, | |
| "step": 2097 | |
| }, | |
| { | |
| "epoch": 9.01, | |
| "learning_rate": 1.554601812112542e-05, | |
| "loss": 0.0522, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 9.23, | |
| "learning_rate": 1.5387060880623114e-05, | |
| "loss": 0.0349, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 9.44, | |
| "learning_rate": 1.5228103640120809e-05, | |
| "loss": 0.0529, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 9.66, | |
| "learning_rate": 1.5069146399618503e-05, | |
| "loss": 0.0284, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 9.87, | |
| "learning_rate": 1.49101891591162e-05, | |
| "loss": 0.0371, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_f1": 0.7986917021269787, | |
| "eval_loss": 1.3809223175048828, | |
| "eval_runtime": 1.6909, | |
| "eval_samples_per_second": 343.007, | |
| "eval_steps_per_second": 1.183, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 10.09, | |
| "learning_rate": 1.4751231918613892e-05, | |
| "loss": 0.0143, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 10.3, | |
| "learning_rate": 1.4592274678111589e-05, | |
| "loss": 0.0012, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 10.52, | |
| "learning_rate": 1.4433317437609285e-05, | |
| "loss": 0.0117, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 10.73, | |
| "learning_rate": 1.427436019710698e-05, | |
| "loss": 0.0248, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 10.94, | |
| "learning_rate": 1.4115402956604673e-05, | |
| "loss": 0.0303, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_f1": 0.8123190611646329, | |
| "eval_loss": 1.3591104745864868, | |
| "eval_runtime": 1.57, | |
| "eval_samples_per_second": 369.434, | |
| "eval_steps_per_second": 1.274, | |
| "step": 2563 | |
| }, | |
| { | |
| "epoch": 11.16, | |
| "learning_rate": 1.395644571610237e-05, | |
| "loss": 0.0142, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 11.37, | |
| "learning_rate": 1.3797488475600066e-05, | |
| "loss": 0.0136, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 11.59, | |
| "learning_rate": 1.363853123509776e-05, | |
| "loss": 0.0126, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 11.8, | |
| "learning_rate": 1.3479573994595455e-05, | |
| "loss": 0.0263, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_f1": 0.8100291935535177, | |
| "eval_loss": 1.5316766500473022, | |
| "eval_runtime": 1.5184, | |
| "eval_samples_per_second": 381.982, | |
| "eval_steps_per_second": 1.317, | |
| "step": 2796 | |
| }, | |
| { | |
| "epoch": 12.02, | |
| "learning_rate": 1.332061675409315e-05, | |
| "loss": 0.011, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 12.23, | |
| "learning_rate": 1.3161659513590846e-05, | |
| "loss": 0.0002, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 12.45, | |
| "learning_rate": 1.300270227308854e-05, | |
| "loss": 0.0057, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 12.66, | |
| "learning_rate": 1.2843745032586235e-05, | |
| "loss": 0.0016, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 12.88, | |
| "learning_rate": 1.268478779208393e-05, | |
| "loss": 0.0144, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_f1": 0.7959241618420011, | |
| "eval_loss": 1.5725551843643188, | |
| "eval_runtime": 1.4849, | |
| "eval_samples_per_second": 390.601, | |
| "eval_steps_per_second": 1.347, | |
| "step": 3029 | |
| }, | |
| { | |
| "epoch": 13.09, | |
| "learning_rate": 1.2525830551581626e-05, | |
| "loss": 0.006, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 13.3, | |
| "learning_rate": 1.236687331107932e-05, | |
| "loss": 0.0056, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 13.52, | |
| "learning_rate": 1.2207916070577015e-05, | |
| "loss": 0.0114, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 13.73, | |
| "learning_rate": 1.204895883007471e-05, | |
| "loss": 0.021, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 13.95, | |
| "learning_rate": 1.1890001589572406e-05, | |
| "loss": 0.0436, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_f1": 0.7987626313618129, | |
| "eval_loss": 1.6159876585006714, | |
| "eval_runtime": 1.4555, | |
| "eval_samples_per_second": 398.497, | |
| "eval_steps_per_second": 1.374, | |
| "step": 3262 | |
| }, | |
| { | |
| "epoch": 14.16, | |
| "learning_rate": 1.1731044349070103e-05, | |
| "loss": 0.0002, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 14.38, | |
| "learning_rate": 1.1572087108567796e-05, | |
| "loss": 0.0062, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 14.59, | |
| "learning_rate": 1.141312986806549e-05, | |
| "loss": 0.0056, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 14.81, | |
| "learning_rate": 1.1254172627563187e-05, | |
| "loss": 0.0048, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_f1": 0.7957479636902922, | |
| "eval_loss": 1.6826026439666748, | |
| "eval_runtime": 1.4617, | |
| "eval_samples_per_second": 396.789, | |
| "eval_steps_per_second": 1.368, | |
| "step": 3495 | |
| }, | |
| { | |
| "epoch": 15.02, | |
| "learning_rate": 1.1095215387060883e-05, | |
| "loss": 0.0039, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 15.24, | |
| "learning_rate": 1.0936258146558576e-05, | |
| "loss": 0.0001, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 15.45, | |
| "learning_rate": 1.0777300906056272e-05, | |
| "loss": 0.0236, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 15.67, | |
| "learning_rate": 1.0618343665553967e-05, | |
| "loss": 0.0004, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 15.88, | |
| "learning_rate": 1.0459386425051663e-05, | |
| "loss": 0.0001, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_f1": 0.7956639409293647, | |
| "eval_loss": 1.6912556886672974, | |
| "eval_runtime": 1.4563, | |
| "eval_samples_per_second": 398.278, | |
| "eval_steps_per_second": 1.373, | |
| "step": 3728 | |
| }, | |
| { | |
| "epoch": 16.09, | |
| "learning_rate": 1.0300429184549356e-05, | |
| "loss": 0.0002, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 16.31, | |
| "learning_rate": 1.0141471944047053e-05, | |
| "loss": 0.0002, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 16.52, | |
| "learning_rate": 9.982514703544747e-06, | |
| "loss": 0.0006, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 16.74, | |
| "learning_rate": 9.823557463042442e-06, | |
| "loss": 0.0002, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 16.95, | |
| "learning_rate": 9.664600222540137e-06, | |
| "loss": 0.0001, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_f1": 0.7994751240525658, | |
| "eval_loss": 1.7075979709625244, | |
| "eval_runtime": 1.4886, | |
| "eval_samples_per_second": 389.634, | |
| "eval_steps_per_second": 1.344, | |
| "step": 3961 | |
| }, | |
| { | |
| "epoch": 17.17, | |
| "learning_rate": 9.505642982037833e-06, | |
| "loss": 0.0002, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 17.38, | |
| "learning_rate": 9.346685741535528e-06, | |
| "loss": 0.0185, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 17.6, | |
| "learning_rate": 9.187728501033222e-06, | |
| "loss": 0.0001, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 17.81, | |
| "learning_rate": 9.028771260530917e-06, | |
| "loss": 0.0034, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_f1": 0.7960354805040918, | |
| "eval_loss": 1.8018221855163574, | |
| "eval_runtime": 1.5408, | |
| "eval_samples_per_second": 376.422, | |
| "eval_steps_per_second": 1.298, | |
| "step": 4194 | |
| }, | |
| { | |
| "epoch": 18.03, | |
| "learning_rate": 8.869814020028613e-06, | |
| "loss": 0.013, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 18.24, | |
| "learning_rate": 8.710856779526308e-06, | |
| "loss": 0.0003, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 18.45, | |
| "learning_rate": 8.551899539024003e-06, | |
| "loss": 0.0001, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 18.67, | |
| "learning_rate": 8.392942298521697e-06, | |
| "loss": 0.0002, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 18.88, | |
| "learning_rate": 8.233985058019394e-06, | |
| "loss": 0.0228, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_f1": 0.7915974698658704, | |
| "eval_loss": 1.7456856966018677, | |
| "eval_runtime": 1.4762, | |
| "eval_samples_per_second": 392.912, | |
| "eval_steps_per_second": 1.355, | |
| "step": 4427 | |
| }, | |
| { | |
| "epoch": 19.1, | |
| "learning_rate": 8.075027817517088e-06, | |
| "loss": 0.0006, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 19.31, | |
| "learning_rate": 7.916070577014783e-06, | |
| "loss": 0.0037, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 19.53, | |
| "learning_rate": 7.757113336512478e-06, | |
| "loss": 0.0314, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 19.74, | |
| "learning_rate": 7.598156096010174e-06, | |
| "loss": 0.0028, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 19.96, | |
| "learning_rate": 7.439198855507869e-06, | |
| "loss": 0.0083, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_f1": 0.7868576028090374, | |
| "eval_loss": 1.9279075860977173, | |
| "eval_runtime": 1.4679, | |
| "eval_samples_per_second": 395.119, | |
| "eval_steps_per_second": 1.362, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 20.17, | |
| "learning_rate": 7.280241615005564e-06, | |
| "loss": 0.0009, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 20.39, | |
| "learning_rate": 7.121284374503259e-06, | |
| "loss": 0.0002, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 20.6, | |
| "learning_rate": 6.962327134000954e-06, | |
| "loss": 0.0082, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 20.82, | |
| "learning_rate": 6.803369893498649e-06, | |
| "loss": 0.0001, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_f1": 0.7915377946685866, | |
| "eval_loss": 1.8367053270339966, | |
| "eval_runtime": 2.0999, | |
| "eval_samples_per_second": 276.201, | |
| "eval_steps_per_second": 0.952, | |
| "step": 4893 | |
| }, | |
| { | |
| "epoch": 21.03, | |
| "learning_rate": 6.6444126529963445e-06, | |
| "loss": 0.0001, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 21.24, | |
| "learning_rate": 6.485455412494039e-06, | |
| "loss": 0.0072, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 21.46, | |
| "learning_rate": 6.326498171991735e-06, | |
| "loss": 0.0, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 21.67, | |
| "learning_rate": 6.167540931489429e-06, | |
| "loss": 0.0, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 21.89, | |
| "learning_rate": 6.008583690987126e-06, | |
| "loss": 0.0003, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_f1": 0.7842117575951872, | |
| "eval_loss": 1.8620420694351196, | |
| "eval_runtime": 1.8603, | |
| "eval_samples_per_second": 311.785, | |
| "eval_steps_per_second": 1.075, | |
| "step": 5126 | |
| }, | |
| { | |
| "epoch": 22.1, | |
| "learning_rate": 5.8496264504848195e-06, | |
| "loss": 0.0007, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 22.32, | |
| "learning_rate": 5.690669209982516e-06, | |
| "loss": 0.0, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 22.53, | |
| "learning_rate": 5.5317119694802105e-06, | |
| "loss": 0.0021, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 22.75, | |
| "learning_rate": 5.372754728977906e-06, | |
| "loss": 0.0077, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 22.96, | |
| "learning_rate": 5.213797488475601e-06, | |
| "loss": 0.0002, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_f1": 0.7828476594276503, | |
| "eval_loss": 1.919188141822815, | |
| "eval_runtime": 1.4859, | |
| "eval_samples_per_second": 390.344, | |
| "eval_steps_per_second": 1.346, | |
| "step": 5359 | |
| }, | |
| { | |
| "epoch": 23.18, | |
| "learning_rate": 5.054840247973296e-06, | |
| "loss": 0.0194, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 23.39, | |
| "learning_rate": 4.895883007470991e-06, | |
| "loss": 0.0132, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 23.61, | |
| "learning_rate": 4.7369257669686855e-06, | |
| "loss": 0.0001, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 23.82, | |
| "learning_rate": 4.577968526466381e-06, | |
| "loss": 0.0, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_f1": 0.7927310235612234, | |
| "eval_loss": 1.9081404209136963, | |
| "eval_runtime": 1.4831, | |
| "eval_samples_per_second": 391.082, | |
| "eval_steps_per_second": 1.349, | |
| "step": 5592 | |
| }, | |
| { | |
| "epoch": 24.03, | |
| "learning_rate": 4.419011285964076e-06, | |
| "loss": 0.0, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 24.25, | |
| "learning_rate": 4.260054045461771e-06, | |
| "loss": 0.0001, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 24.46, | |
| "learning_rate": 4.101096804959467e-06, | |
| "loss": 0.0122, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 24.68, | |
| "learning_rate": 3.942139564457161e-06, | |
| "loss": 0.0, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 24.89, | |
| "learning_rate": 3.7831823239548564e-06, | |
| "loss": 0.0003, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_f1": 0.7812550199347442, | |
| "eval_loss": 1.9822450876235962, | |
| "eval_runtime": 1.5174, | |
| "eval_samples_per_second": 382.228, | |
| "eval_steps_per_second": 1.318, | |
| "step": 5825 | |
| }, | |
| { | |
| "epoch": 25.11, | |
| "learning_rate": 3.6242250834525515e-06, | |
| "loss": 0.0116, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 25.32, | |
| "learning_rate": 3.4652678429502466e-06, | |
| "loss": 0.0, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 25.54, | |
| "learning_rate": 3.306310602447942e-06, | |
| "loss": 0.0004, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 25.75, | |
| "learning_rate": 3.147353361945637e-06, | |
| "loss": 0.0027, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 25.97, | |
| "learning_rate": 2.9883961214433322e-06, | |
| "loss": 0.0059, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_f1": 0.7953953204096383, | |
| "eval_loss": 1.8736791610717773, | |
| "eval_runtime": 1.4646, | |
| "eval_samples_per_second": 396.003, | |
| "eval_steps_per_second": 1.366, | |
| "step": 6058 | |
| }, | |
| { | |
| "epoch": 26.18, | |
| "learning_rate": 2.8294388809410273e-06, | |
| "loss": 0.0001, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 26.39, | |
| "learning_rate": 2.6704816404387224e-06, | |
| "loss": 0.0, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 26.61, | |
| "learning_rate": 2.5115243999364175e-06, | |
| "loss": 0.0, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 26.82, | |
| "learning_rate": 2.3525671594341126e-06, | |
| "loss": 0.0, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "eval_f1": 0.7929217495075929, | |
| "eval_loss": 1.879309892654419, | |
| "eval_runtime": 1.9514, | |
| "eval_samples_per_second": 297.228, | |
| "eval_steps_per_second": 1.025, | |
| "step": 6291 | |
| }, | |
| { | |
| "epoch": 27.04, | |
| "learning_rate": 2.1936099189318076e-06, | |
| "loss": 0.0, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 27.25, | |
| "learning_rate": 2.0346526784295027e-06, | |
| "loss": 0.0, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 27.47, | |
| "learning_rate": 1.8756954379271978e-06, | |
| "loss": 0.0111, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 27.68, | |
| "learning_rate": 1.7167381974248929e-06, | |
| "loss": 0.0, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 27.9, | |
| "learning_rate": 1.557780956922588e-06, | |
| "loss": 0.0, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_f1": 0.794029634093503, | |
| "eval_loss": 1.8904625177383423, | |
| "eval_runtime": 2.2478, | |
| "eval_samples_per_second": 258.035, | |
| "eval_steps_per_second": 0.89, | |
| "step": 6524 | |
| }, | |
| { | |
| "epoch": 28.11, | |
| "learning_rate": 1.398823716420283e-06, | |
| "loss": 0.0, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 28.33, | |
| "learning_rate": 1.2398664759179781e-06, | |
| "loss": 0.0, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 28.54, | |
| "learning_rate": 1.0809092354156734e-06, | |
| "loss": 0.0, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 28.76, | |
| "learning_rate": 9.219519949133683e-07, | |
| "loss": 0.0, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 28.97, | |
| "learning_rate": 7.629947544110635e-07, | |
| "loss": 0.0, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "eval_f1": 0.794029634093503, | |
| "eval_loss": 1.8970826864242554, | |
| "eval_runtime": 1.9492, | |
| "eval_samples_per_second": 297.551, | |
| "eval_steps_per_second": 1.026, | |
| "step": 6757 | |
| }, | |
| { | |
| "epoch": 29.18, | |
| "learning_rate": 6.040375139087585e-07, | |
| "loss": 0.0, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 29.4, | |
| "learning_rate": 4.450802734064537e-07, | |
| "loss": 0.0, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 29.61, | |
| "learning_rate": 2.861230329041488e-07, | |
| "loss": 0.0, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 29.83, | |
| "learning_rate": 1.2716579240184392e-07, | |
| "loss": 0.0002, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_f1": 0.7954091951908298, | |
| "eval_loss": 1.9001948833465576, | |
| "eval_runtime": 1.8428, | |
| "eval_samples_per_second": 314.746, | |
| "eval_steps_per_second": 1.085, | |
| "step": 6990 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "step": 6990, | |
| "total_flos": 5566168764425088.0, | |
| "train_loss": 0.16078996370909257, | |
| "train_runtime": 2045.6711, | |
| "train_samples_per_second": 54.496, | |
| "train_steps_per_second": 3.417 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 6990, | |
| "num_train_epochs": 30, | |
| "save_steps": 500, | |
| "total_flos": 5566168764425088.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |