| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 100.0, | |
| "global_step": 25000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 1.2708368301391602, | |
| "eval_mean_acc": 0.0, | |
| "eval_median_acc": 0.0, | |
| "eval_runtime": 29.5811, | |
| "eval_samples_per_second": 624.621, | |
| "eval_steps_per_second": 0.811, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 0.000294, | |
| "loss": 1.2586, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 1.240064024925232, | |
| "eval_mean_acc": 0.0, | |
| "eval_median_acc": 0.0, | |
| "eval_runtime": 28.81, | |
| "eval_samples_per_second": 641.34, | |
| "eval_steps_per_second": 0.833, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_loss": 1.221199631690979, | |
| "eval_mean_acc": 0.0, | |
| "eval_median_acc": 0.0, | |
| "eval_runtime": 28.8341, | |
| "eval_samples_per_second": 640.805, | |
| "eval_steps_per_second": 0.832, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "learning_rate": 0.00028799999999999995, | |
| "loss": 1.1999, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_loss": 1.1985267400741577, | |
| "eval_mean_acc": 0.002639524371016869, | |
| "eval_median_acc": 0.0, | |
| "eval_runtime": 29.2636, | |
| "eval_samples_per_second": 631.399, | |
| "eval_steps_per_second": 0.82, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_loss": 1.1824160814285278, | |
| "eval_mean_acc": 0.012239608588524707, | |
| "eval_median_acc": 0.0, | |
| "eval_runtime": 29.1488, | |
| "eval_samples_per_second": 633.885, | |
| "eval_steps_per_second": 0.823, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "learning_rate": 0.00028199999999999997, | |
| "loss": 1.1635, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_loss": 1.1715551614761353, | |
| "eval_mean_acc": 0.030072790065877857, | |
| "eval_median_acc": 0.0, | |
| "eval_runtime": 29.3036, | |
| "eval_samples_per_second": 630.536, | |
| "eval_steps_per_second": 0.819, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_loss": 1.156156301498413, | |
| "eval_mean_acc": 0.029585336742049613, | |
| "eval_median_acc": 0.0, | |
| "eval_runtime": 29.2009, | |
| "eval_samples_per_second": 632.755, | |
| "eval_steps_per_second": 0.822, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "learning_rate": 0.000276, | |
| "loss": 1.1361, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_loss": 1.1506972312927246, | |
| "eval_mean_acc": 0.008759450723621017, | |
| "eval_median_acc": 0.0, | |
| "eval_runtime": 29.1539, | |
| "eval_samples_per_second": 633.775, | |
| "eval_steps_per_second": 0.823, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_loss": 1.1393311023712158, | |
| "eval_mean_acc": 0.06489605427376179, | |
| "eval_median_acc": 0.0, | |
| "eval_runtime": 29.65, | |
| "eval_samples_per_second": 623.17, | |
| "eval_steps_per_second": 0.809, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "learning_rate": 0.00027, | |
| "loss": 1.1142, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_loss": 1.1311384439468384, | |
| "eval_mean_acc": 0.015000378543482608, | |
| "eval_median_acc": 0.0, | |
| "eval_runtime": 29.295, | |
| "eval_samples_per_second": 630.723, | |
| "eval_steps_per_second": 0.819, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_loss": 1.0849117040634155, | |
| "eval_mean_acc": 0.053357515682597535, | |
| "eval_median_acc": 0.0, | |
| "eval_runtime": 29.2649, | |
| "eval_samples_per_second": 631.37, | |
| "eval_steps_per_second": 0.82, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "learning_rate": 0.00026399999999999997, | |
| "loss": 1.0648, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_loss": 1.0643727779388428, | |
| "eval_mean_acc": 0.09116804447578762, | |
| "eval_median_acc": 0.0, | |
| "eval_runtime": 29.1753, | |
| "eval_samples_per_second": 633.309, | |
| "eval_steps_per_second": 0.823, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_loss": 1.0415236949920654, | |
| "eval_mean_acc": 0.1348069252298496, | |
| "eval_median_acc": 0.0, | |
| "eval_runtime": 29.2028, | |
| "eval_samples_per_second": 632.713, | |
| "eval_steps_per_second": 0.822, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "learning_rate": 0.000258, | |
| "loss": 1.0185, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_loss": 1.0389618873596191, | |
| "eval_mean_acc": 0.0442671721053236, | |
| "eval_median_acc": 0.0, | |
| "eval_runtime": 29.3877, | |
| "eval_samples_per_second": 628.732, | |
| "eval_steps_per_second": 0.817, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_loss": 1.0213755369186401, | |
| "eval_mean_acc": 0.11202564075823995, | |
| "eval_median_acc": 0.0, | |
| "eval_runtime": 29.1898, | |
| "eval_samples_per_second": 632.996, | |
| "eval_steps_per_second": 0.822, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "learning_rate": 0.00025199999999999995, | |
| "loss": 0.9951, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_loss": 1.015223741531372, | |
| "eval_mean_acc": 0.16473584913990302, | |
| "eval_median_acc": 0.0, | |
| "eval_runtime": 29.2399, | |
| "eval_samples_per_second": 631.91, | |
| "eval_steps_per_second": 0.821, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_loss": 1.019250750541687, | |
| "eval_mean_acc": 0.11940677048185683, | |
| "eval_median_acc": 0.0, | |
| "eval_runtime": 29.2372, | |
| "eval_samples_per_second": 631.97, | |
| "eval_steps_per_second": 0.821, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "learning_rate": 0.00024599999999999996, | |
| "loss": 0.9813, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_loss": 1.00924813747406, | |
| "eval_mean_acc": 0.11822115362550029, | |
| "eval_median_acc": 0.0, | |
| "eval_runtime": 29.1553, | |
| "eval_samples_per_second": 633.744, | |
| "eval_steps_per_second": 0.823, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_loss": 1.0164929628372192, | |
| "eval_mean_acc": 0.06716894444980748, | |
| "eval_median_acc": 0.0, | |
| "eval_runtime": 29.1521, | |
| "eval_samples_per_second": 633.813, | |
| "eval_steps_per_second": 0.823, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "learning_rate": 0.00023999999999999998, | |
| "loss": 0.9625, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_loss": 0.9429653286933899, | |
| "eval_mean_acc": 9.008243218749312, | |
| "eval_median_acc": 0.0, | |
| "eval_runtime": 30.0347, | |
| "eval_samples_per_second": 615.187, | |
| "eval_steps_per_second": 0.799, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_loss": 0.9300616979598999, | |
| "eval_mean_acc": 13.245211581468084, | |
| "eval_median_acc": 0.0, | |
| "eval_runtime": 30.3256, | |
| "eval_samples_per_second": 609.287, | |
| "eval_steps_per_second": 0.791, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "learning_rate": 0.000234, | |
| "loss": 0.8958, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_loss": 0.9260903596878052, | |
| "eval_mean_acc": 8.987847368165264, | |
| "eval_median_acc": 0.0, | |
| "eval_runtime": 30.0974, | |
| "eval_samples_per_second": 613.906, | |
| "eval_steps_per_second": 0.797, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_loss": 0.9173910617828369, | |
| "eval_mean_acc": 15.36412671561701, | |
| "eval_median_acc": 0.0, | |
| "eval_runtime": 30.1139, | |
| "eval_samples_per_second": 613.57, | |
| "eval_steps_per_second": 0.797, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "learning_rate": 0.00022799999999999999, | |
| "loss": 0.8756, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_loss": 0.9115529656410217, | |
| "eval_mean_acc": 14.901058980647452, | |
| "eval_median_acc": 0.0, | |
| "eval_runtime": 30.2491, | |
| "eval_samples_per_second": 610.828, | |
| "eval_steps_per_second": 0.793, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_loss": 0.9130357503890991, | |
| "eval_mean_acc": 12.268143458883413, | |
| "eval_median_acc": 0.0, | |
| "eval_runtime": 30.1967, | |
| "eval_samples_per_second": 611.888, | |
| "eval_steps_per_second": 0.795, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "learning_rate": 0.00022199999999999998, | |
| "loss": 0.8607, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_loss": 0.9113653898239136, | |
| "eval_mean_acc": 15.28737721209223, | |
| "eval_median_acc": 0.0, | |
| "eval_runtime": 30.4015, | |
| "eval_samples_per_second": 607.766, | |
| "eval_steps_per_second": 0.789, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "eval_loss": 0.9104825854301453, | |
| "eval_mean_acc": 24.120487175005213, | |
| "eval_median_acc": 0.0, | |
| "eval_runtime": 30.7064, | |
| "eval_samples_per_second": 601.731, | |
| "eval_steps_per_second": 0.782, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "learning_rate": 0.00021599999999999996, | |
| "loss": 0.8482, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_loss": 0.9082564115524292, | |
| "eval_mean_acc": 18.66072430390773, | |
| "eval_median_acc": 0.0, | |
| "eval_runtime": 30.4946, | |
| "eval_samples_per_second": 605.911, | |
| "eval_steps_per_second": 0.787, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "eval_loss": 0.9196337461471558, | |
| "eval_mean_acc": 17.72464537190866, | |
| "eval_median_acc": 0.0, | |
| "eval_runtime": 30.2701, | |
| "eval_samples_per_second": 610.404, | |
| "eval_steps_per_second": 0.793, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "learning_rate": 0.00020999999999999998, | |
| "loss": 0.8359, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_loss": 0.9148876667022705, | |
| "eval_mean_acc": 19.7733289435757, | |
| "eval_median_acc": 0.0, | |
| "eval_runtime": 30.705, | |
| "eval_samples_per_second": 601.759, | |
| "eval_steps_per_second": 0.782, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "eval_loss": 0.9133378863334656, | |
| "eval_mean_acc": 18.62968067275681, | |
| "eval_median_acc": 0.0, | |
| "eval_runtime": 30.3878, | |
| "eval_samples_per_second": 608.039, | |
| "eval_steps_per_second": 0.79, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "learning_rate": 0.000204, | |
| "loss": 0.8232, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "eval_loss": 0.9479327201843262, | |
| "eval_mean_acc": 12.27033306041223, | |
| "eval_median_acc": 0.0, | |
| "eval_runtime": 30.187, | |
| "eval_samples_per_second": 612.085, | |
| "eval_steps_per_second": 0.795, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 33.0, | |
| "eval_loss": 0.930338978767395, | |
| "eval_mean_acc": 19.904256184480708, | |
| "eval_median_acc": 0.0, | |
| "eval_runtime": 30.5106, | |
| "eval_samples_per_second": 605.593, | |
| "eval_steps_per_second": 0.787, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "learning_rate": 0.000198, | |
| "loss": 0.8092, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "eval_loss": 0.9299731254577637, | |
| "eval_mean_acc": 22.351005658701947, | |
| "eval_median_acc": 0.0, | |
| "eval_runtime": 30.5242, | |
| "eval_samples_per_second": 605.324, | |
| "eval_steps_per_second": 0.786, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "eval_loss": 0.9295333027839661, | |
| "eval_mean_acc": 27.811848359305156, | |
| "eval_median_acc": 0.0, | |
| "eval_runtime": 30.6542, | |
| "eval_samples_per_second": 602.756, | |
| "eval_steps_per_second": 0.783, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "learning_rate": 0.00019199999999999998, | |
| "loss": 0.7951, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "eval_loss": 0.9439055323600769, | |
| "eval_mean_acc": 23.296268042588853, | |
| "eval_median_acc": 0.0, | |
| "eval_runtime": 30.5922, | |
| "eval_samples_per_second": 603.977, | |
| "eval_steps_per_second": 0.785, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 37.0, | |
| "eval_loss": 0.962045431137085, | |
| "eval_mean_acc": 20.907431263561396, | |
| "eval_median_acc": 0.0, | |
| "eval_runtime": 30.3915, | |
| "eval_samples_per_second": 607.966, | |
| "eval_steps_per_second": 0.79, | |
| "step": 9250 | |
| }, | |
| { | |
| "epoch": 38.0, | |
| "learning_rate": 0.000186, | |
| "loss": 0.7803, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 38.0, | |
| "eval_loss": 0.9570873975753784, | |
| "eval_mean_acc": 28.782833553923417, | |
| "eval_median_acc": 52.30263157894737, | |
| "eval_runtime": 30.7856, | |
| "eval_samples_per_second": 600.183, | |
| "eval_steps_per_second": 0.78, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 39.0, | |
| "eval_loss": 0.9814175367355347, | |
| "eval_mean_acc": 25.267935353876744, | |
| "eval_median_acc": 0.0, | |
| "eval_runtime": 30.4662, | |
| "eval_samples_per_second": 606.475, | |
| "eval_steps_per_second": 0.788, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "learning_rate": 0.00017999999999999998, | |
| "loss": 0.7669, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "eval_loss": 0.9786842465400696, | |
| "eval_mean_acc": 31.28196134808705, | |
| "eval_median_acc": 53.38645418326693, | |
| "eval_runtime": 30.754, | |
| "eval_samples_per_second": 600.8, | |
| "eval_steps_per_second": 0.78, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 41.0, | |
| "eval_loss": 0.9765278100967407, | |
| "eval_mean_acc": 28.633316896351385, | |
| "eval_median_acc": 52.20338983050847, | |
| "eval_runtime": 30.6096, | |
| "eval_samples_per_second": 603.634, | |
| "eval_steps_per_second": 0.784, | |
| "step": 10250 | |
| }, | |
| { | |
| "epoch": 42.0, | |
| "learning_rate": 0.00017399999999999997, | |
| "loss": 0.7529, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 42.0, | |
| "eval_loss": 1.0037761926651, | |
| "eval_mean_acc": 27.51536558563458, | |
| "eval_median_acc": 0.0, | |
| "eval_runtime": 30.6294, | |
| "eval_samples_per_second": 603.244, | |
| "eval_steps_per_second": 0.784, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 43.0, | |
| "eval_loss": 1.033768892288208, | |
| "eval_mean_acc": 28.494027960898663, | |
| "eval_median_acc": 52.13675213675214, | |
| "eval_runtime": 30.6823, | |
| "eval_samples_per_second": 602.204, | |
| "eval_steps_per_second": 0.782, | |
| "step": 10750 | |
| }, | |
| { | |
| "epoch": 44.0, | |
| "learning_rate": 0.000168, | |
| "loss": 0.7411, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 44.0, | |
| "eval_loss": 1.0279306173324585, | |
| "eval_mean_acc": 28.72059143849434, | |
| "eval_median_acc": 52.27817745803357, | |
| "eval_runtime": 30.711, | |
| "eval_samples_per_second": 601.642, | |
| "eval_steps_per_second": 0.781, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 45.0, | |
| "eval_loss": 1.0176538228988647, | |
| "eval_mean_acc": 29.058336469348834, | |
| "eval_median_acc": 52.41157556270096, | |
| "eval_runtime": 30.72, | |
| "eval_samples_per_second": 601.464, | |
| "eval_steps_per_second": 0.781, | |
| "step": 11250 | |
| }, | |
| { | |
| "epoch": 46.0, | |
| "learning_rate": 0.000162, | |
| "loss": 0.7299, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 46.0, | |
| "eval_loss": 1.0147888660430908, | |
| "eval_mean_acc": 32.92298407705084, | |
| "eval_median_acc": 53.6, | |
| "eval_runtime": 30.8969, | |
| "eval_samples_per_second": 598.021, | |
| "eval_steps_per_second": 0.777, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 47.0, | |
| "eval_loss": 1.0400363206863403, | |
| "eval_mean_acc": 33.51743615357999, | |
| "eval_median_acc": 53.813559322033896, | |
| "eval_runtime": 31.0611, | |
| "eval_samples_per_second": 594.86, | |
| "eval_steps_per_second": 0.773, | |
| "step": 11750 | |
| }, | |
| { | |
| "epoch": 48.0, | |
| "learning_rate": 0.000156, | |
| "loss": 0.7198, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 48.0, | |
| "eval_loss": 1.0477961301803589, | |
| "eval_mean_acc": 30.930946796462933, | |
| "eval_median_acc": 53.25443786982249, | |
| "eval_runtime": 31.0217, | |
| "eval_samples_per_second": 595.615, | |
| "eval_steps_per_second": 0.774, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 49.0, | |
| "eval_loss": 1.0538278818130493, | |
| "eval_mean_acc": 29.24450853094501, | |
| "eval_median_acc": 52.569169960474305, | |
| "eval_runtime": 30.7608, | |
| "eval_samples_per_second": 600.668, | |
| "eval_steps_per_second": 0.78, | |
| "step": 12250 | |
| }, | |
| { | |
| "epoch": 50.0, | |
| "learning_rate": 0.00015, | |
| "loss": 0.7109, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 50.0, | |
| "eval_loss": 1.0524476766586304, | |
| "eval_mean_acc": 27.231249267136203, | |
| "eval_median_acc": 0.0, | |
| "eval_runtime": 30.6942, | |
| "eval_samples_per_second": 601.971, | |
| "eval_steps_per_second": 0.782, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 51.0, | |
| "eval_loss": 1.0861831903457642, | |
| "eval_mean_acc": 33.350417690919826, | |
| "eval_median_acc": 53.72340425531915, | |
| "eval_runtime": 31.0377, | |
| "eval_samples_per_second": 595.308, | |
| "eval_steps_per_second": 0.773, | |
| "step": 12750 | |
| }, | |
| { | |
| "epoch": 52.0, | |
| "learning_rate": 0.00014399999999999998, | |
| "loss": 0.7036, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 52.0, | |
| "eval_loss": 1.074357271194458, | |
| "eval_mean_acc": 31.877880928875545, | |
| "eval_median_acc": 53.36787564766839, | |
| "eval_runtime": 30.8278, | |
| "eval_samples_per_second": 599.361, | |
| "eval_steps_per_second": 0.779, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 53.0, | |
| "eval_loss": 1.062804937362671, | |
| "eval_mean_acc": 28.372776202894872, | |
| "eval_median_acc": 51.71232876712328, | |
| "eval_runtime": 30.6841, | |
| "eval_samples_per_second": 602.169, | |
| "eval_steps_per_second": 0.782, | |
| "step": 13250 | |
| }, | |
| { | |
| "epoch": 54.0, | |
| "learning_rate": 0.000138, | |
| "loss": 0.6963, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 54.0, | |
| "eval_loss": 1.0586843490600586, | |
| "eval_mean_acc": 30.98220246074368, | |
| "eval_median_acc": 53.125, | |
| "eval_runtime": 31.0347, | |
| "eval_samples_per_second": 595.366, | |
| "eval_steps_per_second": 0.773, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 55.0, | |
| "eval_loss": 1.083398699760437, | |
| "eval_mean_acc": 33.21303099917168, | |
| "eval_median_acc": 53.57142857142857, | |
| "eval_runtime": 30.8467, | |
| "eval_samples_per_second": 598.994, | |
| "eval_steps_per_second": 0.778, | |
| "step": 13750 | |
| }, | |
| { | |
| "epoch": 56.0, | |
| "learning_rate": 0.00013199999999999998, | |
| "loss": 0.69, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 56.0, | |
| "eval_loss": 1.1077489852905273, | |
| "eval_mean_acc": 36.38462165724688, | |
| "eval_median_acc": 54.285714285714285, | |
| "eval_runtime": 31.0446, | |
| "eval_samples_per_second": 595.175, | |
| "eval_steps_per_second": 0.773, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 57.0, | |
| "eval_loss": 1.114971399307251, | |
| "eval_mean_acc": 32.35859059650532, | |
| "eval_median_acc": 53.49544072948328, | |
| "eval_runtime": 30.8614, | |
| "eval_samples_per_second": 598.708, | |
| "eval_steps_per_second": 0.778, | |
| "step": 14250 | |
| }, | |
| { | |
| "epoch": 58.0, | |
| "learning_rate": 0.00012599999999999997, | |
| "loss": 0.6855, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 58.0, | |
| "eval_loss": 1.1352181434631348, | |
| "eval_mean_acc": 36.9014347424795, | |
| "eval_median_acc": 54.406130268199234, | |
| "eval_runtime": 31.0913, | |
| "eval_samples_per_second": 594.282, | |
| "eval_steps_per_second": 0.772, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 59.0, | |
| "eval_loss": 1.1556544303894043, | |
| "eval_mean_acc": 33.88705951883505, | |
| "eval_median_acc": 53.84615384615385, | |
| "eval_runtime": 30.8633, | |
| "eval_samples_per_second": 598.672, | |
| "eval_steps_per_second": 0.778, | |
| "step": 14750 | |
| }, | |
| { | |
| "epoch": 60.0, | |
| "learning_rate": 0.00011999999999999999, | |
| "loss": 0.6811, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 60.0, | |
| "eval_loss": 1.1314884424209595, | |
| "eval_mean_acc": 33.595877918258616, | |
| "eval_median_acc": 53.77358490566038, | |
| "eval_runtime": 31.0682, | |
| "eval_samples_per_second": 594.724, | |
| "eval_steps_per_second": 0.772, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 61.0, | |
| "eval_loss": 1.0957316160202026, | |
| "eval_mean_acc": 32.99076589659652, | |
| "eval_median_acc": 53.57142857142857, | |
| "eval_runtime": 30.909, | |
| "eval_samples_per_second": 597.788, | |
| "eval_steps_per_second": 0.776, | |
| "step": 15250 | |
| }, | |
| { | |
| "epoch": 62.0, | |
| "learning_rate": 0.00011399999999999999, | |
| "loss": 0.6768, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 62.0, | |
| "eval_loss": 1.1236425638198853, | |
| "eval_mean_acc": 32.76527042786048, | |
| "eval_median_acc": 53.61216730038023, | |
| "eval_runtime": 31.1533, | |
| "eval_samples_per_second": 593.099, | |
| "eval_steps_per_second": 0.77, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 63.0, | |
| "eval_loss": 1.1152857542037964, | |
| "eval_mean_acc": 34.82832308606056, | |
| "eval_median_acc": 53.94321766561514, | |
| "eval_runtime": 31.0538, | |
| "eval_samples_per_second": 595.001, | |
| "eval_steps_per_second": 0.773, | |
| "step": 15750 | |
| }, | |
| { | |
| "epoch": 64.0, | |
| "learning_rate": 0.00010799999999999998, | |
| "loss": 0.6722, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 64.0, | |
| "eval_loss": 1.1300369501113892, | |
| "eval_mean_acc": 35.01100739222209, | |
| "eval_median_acc": 53.96825396825397, | |
| "eval_runtime": 30.8553, | |
| "eval_samples_per_second": 598.827, | |
| "eval_steps_per_second": 0.778, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 65.0, | |
| "eval_loss": 1.1825590133666992, | |
| "eval_mean_acc": 35.92286498667092, | |
| "eval_median_acc": 54.146341463414636, | |
| "eval_runtime": 30.8364, | |
| "eval_samples_per_second": 599.194, | |
| "eval_steps_per_second": 0.778, | |
| "step": 16250 | |
| }, | |
| { | |
| "epoch": 66.0, | |
| "learning_rate": 0.000102, | |
| "loss": 0.6682, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 66.0, | |
| "eval_loss": 1.153441309928894, | |
| "eval_mean_acc": 38.55105344645134, | |
| "eval_median_acc": 54.48504983388705, | |
| "eval_runtime": 31.0294, | |
| "eval_samples_per_second": 595.468, | |
| "eval_steps_per_second": 0.773, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 67.0, | |
| "eval_loss": 1.1635504961013794, | |
| "eval_mean_acc": 35.828909444304365, | |
| "eval_median_acc": 54.09836065573771, | |
| "eval_runtime": 30.7671, | |
| "eval_samples_per_second": 600.544, | |
| "eval_steps_per_second": 0.78, | |
| "step": 16750 | |
| }, | |
| { | |
| "epoch": 68.0, | |
| "learning_rate": 9.599999999999999e-05, | |
| "loss": 0.6653, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 68.0, | |
| "eval_loss": 1.1404204368591309, | |
| "eval_mean_acc": 34.65428003659497, | |
| "eval_median_acc": 53.84615384615385, | |
| "eval_runtime": 30.8901, | |
| "eval_samples_per_second": 598.152, | |
| "eval_steps_per_second": 0.777, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 69.0, | |
| "eval_loss": 1.1473366022109985, | |
| "eval_mean_acc": 36.24934966791388, | |
| "eval_median_acc": 54.12087912087912, | |
| "eval_runtime": 31.0971, | |
| "eval_samples_per_second": 594.172, | |
| "eval_steps_per_second": 0.772, | |
| "step": 17250 | |
| }, | |
| { | |
| "epoch": 70.0, | |
| "learning_rate": 8.999999999999999e-05, | |
| "loss": 0.6624, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 70.0, | |
| "eval_loss": 1.1532074213027954, | |
| "eval_mean_acc": 39.51401788696222, | |
| "eval_median_acc": 54.5774647887324, | |
| "eval_runtime": 31.0363, | |
| "eval_samples_per_second": 595.334, | |
| "eval_steps_per_second": 0.773, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 71.0, | |
| "eval_loss": 1.1715244054794312, | |
| "eval_mean_acc": 36.23543546260951, | |
| "eval_median_acc": 54.12541254125413, | |
| "eval_runtime": 30.897, | |
| "eval_samples_per_second": 598.02, | |
| "eval_steps_per_second": 0.777, | |
| "step": 17750 | |
| }, | |
| { | |
| "epoch": 72.0, | |
| "learning_rate": 8.4e-05, | |
| "loss": 0.6597, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 72.0, | |
| "eval_loss": 1.187477469444275, | |
| "eval_mean_acc": 35.50233390601532, | |
| "eval_median_acc": 54.08560311284047, | |
| "eval_runtime": 30.998, | |
| "eval_samples_per_second": 596.07, | |
| "eval_steps_per_second": 0.774, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 73.0, | |
| "eval_loss": 1.164323329925537, | |
| "eval_mean_acc": 34.50470237269365, | |
| "eval_median_acc": 53.90243902439025, | |
| "eval_runtime": 30.8465, | |
| "eval_samples_per_second": 598.997, | |
| "eval_steps_per_second": 0.778, | |
| "step": 18250 | |
| }, | |
| { | |
| "epoch": 74.0, | |
| "learning_rate": 7.8e-05, | |
| "loss": 0.657, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 74.0, | |
| "eval_loss": 1.1893519163131714, | |
| "eval_mean_acc": 38.75609974678352, | |
| "eval_median_acc": 54.492753623188406, | |
| "eval_runtime": 31.2805, | |
| "eval_samples_per_second": 590.688, | |
| "eval_steps_per_second": 0.767, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 75.0, | |
| "eval_loss": 1.208187222480774, | |
| "eval_mean_acc": 38.215720244506755, | |
| "eval_median_acc": 54.460093896713616, | |
| "eval_runtime": 31.3374, | |
| "eval_samples_per_second": 589.615, | |
| "eval_steps_per_second": 0.766, | |
| "step": 18750 | |
| }, | |
| { | |
| "epoch": 76.0, | |
| "learning_rate": 7.199999999999999e-05, | |
| "loss": 0.6543, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 76.0, | |
| "eval_loss": 1.1842811107635498, | |
| "eval_mean_acc": 34.28566002554328, | |
| "eval_median_acc": 53.883495145631066, | |
| "eval_runtime": 30.9371, | |
| "eval_samples_per_second": 597.244, | |
| "eval_steps_per_second": 0.776, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 77.0, | |
| "eval_loss": 1.1689387559890747, | |
| "eval_mean_acc": 38.42640276827011, | |
| "eval_median_acc": 54.43548387096774, | |
| "eval_runtime": 31.0373, | |
| "eval_samples_per_second": 595.316, | |
| "eval_steps_per_second": 0.773, | |
| "step": 19250 | |
| }, | |
| { | |
| "epoch": 78.0, | |
| "learning_rate": 6.599999999999999e-05, | |
| "loss": 0.652, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 78.0, | |
| "eval_loss": 1.2084593772888184, | |
| "eval_mean_acc": 37.749512857893784, | |
| "eval_median_acc": 54.37499999999999, | |
| "eval_runtime": 31.1448, | |
| "eval_samples_per_second": 593.261, | |
| "eval_steps_per_second": 0.771, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 79.0, | |
| "eval_loss": 1.1984684467315674, | |
| "eval_mean_acc": 39.14441481832044, | |
| "eval_median_acc": 54.518950437317784, | |
| "eval_runtime": 30.9307, | |
| "eval_samples_per_second": 597.368, | |
| "eval_steps_per_second": 0.776, | |
| "step": 19750 | |
| }, | |
| { | |
| "epoch": 80.0, | |
| "learning_rate": 5.9999999999999995e-05, | |
| "loss": 0.6497, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 80.0, | |
| "eval_loss": 1.2331078052520752, | |
| "eval_mean_acc": 40.01896688672192, | |
| "eval_median_acc": 54.61254612546126, | |
| "eval_runtime": 31.1205, | |
| "eval_samples_per_second": 593.724, | |
| "eval_steps_per_second": 0.771, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 81.0, | |
| "eval_loss": 1.2402710914611816, | |
| "eval_mean_acc": 39.05107714371045, | |
| "eval_median_acc": 54.52054794520548, | |
| "eval_runtime": 30.8964, | |
| "eval_samples_per_second": 598.031, | |
| "eval_steps_per_second": 0.777, | |
| "step": 20250 | |
| }, | |
| { | |
| "epoch": 82.0, | |
| "learning_rate": 5.399999999999999e-05, | |
| "loss": 0.6476, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 82.0, | |
| "eval_loss": 1.1909747123718262, | |
| "eval_mean_acc": 37.29363801013909, | |
| "eval_median_acc": 54.24528301886793, | |
| "eval_runtime": 31.0069, | |
| "eval_samples_per_second": 595.9, | |
| "eval_steps_per_second": 0.774, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 83.0, | |
| "eval_loss": 1.2035155296325684, | |
| "eval_mean_acc": 41.25721916721249, | |
| "eval_median_acc": 54.7244094488189, | |
| "eval_runtime": 31.1934, | |
| "eval_samples_per_second": 592.336, | |
| "eval_steps_per_second": 0.769, | |
| "step": 20750 | |
| }, | |
| { | |
| "epoch": 84.0, | |
| "learning_rate": 4.7999999999999994e-05, | |
| "loss": 0.6457, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 84.0, | |
| "eval_loss": 1.2123523950576782, | |
| "eval_mean_acc": 38.7564566539536, | |
| "eval_median_acc": 54.4891640866873, | |
| "eval_runtime": 31.1487, | |
| "eval_samples_per_second": 593.187, | |
| "eval_steps_per_second": 0.77, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 85.0, | |
| "eval_loss": 1.232680320739746, | |
| "eval_mean_acc": 39.12864363006366, | |
| "eval_median_acc": 54.495912806539515, | |
| "eval_runtime": 30.8988, | |
| "eval_samples_per_second": 597.985, | |
| "eval_steps_per_second": 0.777, | |
| "step": 21250 | |
| }, | |
| { | |
| "epoch": 86.0, | |
| "learning_rate": 4.2e-05, | |
| "loss": 0.6437, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 86.0, | |
| "eval_loss": 1.22517991065979, | |
| "eval_mean_acc": 39.8797101803931, | |
| "eval_median_acc": 54.5774647887324, | |
| "eval_runtime": 31.1149, | |
| "eval_samples_per_second": 593.83, | |
| "eval_steps_per_second": 0.771, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 87.0, | |
| "eval_loss": 1.2346075773239136, | |
| "eval_mean_acc": 38.11276454520886, | |
| "eval_median_acc": 54.385964912280706, | |
| "eval_runtime": 31.1511, | |
| "eval_samples_per_second": 593.142, | |
| "eval_steps_per_second": 0.77, | |
| "step": 21750 | |
| }, | |
| { | |
| "epoch": 88.0, | |
| "learning_rate": 3.5999999999999994e-05, | |
| "loss": 0.642, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 88.0, | |
| "eval_loss": 1.244125247001648, | |
| "eval_mean_acc": 40.43582155286865, | |
| "eval_median_acc": 54.65116279069767, | |
| "eval_runtime": 31.1673, | |
| "eval_samples_per_second": 592.833, | |
| "eval_steps_per_second": 0.77, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 89.0, | |
| "eval_loss": 1.24880850315094, | |
| "eval_mean_acc": 39.99124281027319, | |
| "eval_median_acc": 54.59770114942529, | |
| "eval_runtime": 31.0818, | |
| "eval_samples_per_second": 594.463, | |
| "eval_steps_per_second": 0.772, | |
| "step": 22250 | |
| }, | |
| { | |
| "epoch": 90.0, | |
| "learning_rate": 2.9999999999999997e-05, | |
| "loss": 0.6403, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 90.0, | |
| "eval_loss": 1.2482763528823853, | |
| "eval_mean_acc": 39.91157811070793, | |
| "eval_median_acc": 54.60526315789473, | |
| "eval_runtime": 31.194, | |
| "eval_samples_per_second": 592.325, | |
| "eval_steps_per_second": 0.769, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 91.0, | |
| "eval_loss": 1.2674145698547363, | |
| "eval_mean_acc": 40.94577170886235, | |
| "eval_median_acc": 54.700854700854705, | |
| "eval_runtime": 31.202, | |
| "eval_samples_per_second": 592.174, | |
| "eval_steps_per_second": 0.769, | |
| "step": 22750 | |
| }, | |
| { | |
| "epoch": 92.0, | |
| "learning_rate": 2.3999999999999997e-05, | |
| "loss": 0.6387, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 92.0, | |
| "eval_loss": 1.269442081451416, | |
| "eval_mean_acc": 40.442322594799656, | |
| "eval_median_acc": 54.63576158940398, | |
| "eval_runtime": 31.0407, | |
| "eval_samples_per_second": 595.25, | |
| "eval_steps_per_second": 0.773, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 93.0, | |
| "eval_loss": 1.2716701030731201, | |
| "eval_mean_acc": 40.739734228756824, | |
| "eval_median_acc": 54.67625899280576, | |
| "eval_runtime": 31.2132, | |
| "eval_samples_per_second": 591.961, | |
| "eval_steps_per_second": 0.769, | |
| "step": 23250 | |
| }, | |
| { | |
| "epoch": 94.0, | |
| "learning_rate": 1.7999999999999997e-05, | |
| "loss": 0.6371, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 94.0, | |
| "eval_loss": 1.2819631099700928, | |
| "eval_mean_acc": 40.571867011274925, | |
| "eval_median_acc": 54.63917525773196, | |
| "eval_runtime": 31.0385, | |
| "eval_samples_per_second": 595.292, | |
| "eval_steps_per_second": 0.773, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 95.0, | |
| "eval_loss": 1.28830885887146, | |
| "eval_mean_acc": 40.353439886436945, | |
| "eval_median_acc": 54.666666666666664, | |
| "eval_runtime": 31.183, | |
| "eval_samples_per_second": 592.534, | |
| "eval_steps_per_second": 0.77, | |
| "step": 23750 | |
| }, | |
| { | |
| "epoch": 96.0, | |
| "learning_rate": 1.1999999999999999e-05, | |
| "loss": 0.6358, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 96.0, | |
| "eval_loss": 1.30391263961792, | |
| "eval_mean_acc": 40.60459621916925, | |
| "eval_median_acc": 54.666666666666664, | |
| "eval_runtime": 31.0782, | |
| "eval_samples_per_second": 594.532, | |
| "eval_steps_per_second": 0.772, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 97.0, | |
| "eval_loss": 1.3067735433578491, | |
| "eval_mean_acc": 41.452516923874725, | |
| "eval_median_acc": 54.773869346733676, | |
| "eval_runtime": 30.922, | |
| "eval_samples_per_second": 597.536, | |
| "eval_steps_per_second": 0.776, | |
| "step": 24250 | |
| }, | |
| { | |
| "epoch": 98.0, | |
| "learning_rate": 5.999999999999999e-06, | |
| "loss": 0.6347, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 98.0, | |
| "eval_loss": 1.313217282295227, | |
| "eval_mean_acc": 41.42466457051602, | |
| "eval_median_acc": 54.75409836065573, | |
| "eval_runtime": 31.2774, | |
| "eval_samples_per_second": 590.746, | |
| "eval_steps_per_second": 0.767, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 99.0, | |
| "eval_loss": 1.3124245405197144, | |
| "eval_mean_acc": 41.25630205018876, | |
| "eval_median_acc": 54.74452554744526, | |
| "eval_runtime": 30.8857, | |
| "eval_samples_per_second": 598.238, | |
| "eval_steps_per_second": 0.777, | |
| "step": 24750 | |
| }, | |
| { | |
| "epoch": 100.0, | |
| "learning_rate": 0.0, | |
| "loss": 0.6339, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 100.0, | |
| "eval_loss": 1.3174444437026978, | |
| "eval_mean_acc": 41.405191345713106, | |
| "eval_median_acc": 54.75578406169666, | |
| "eval_runtime": 31.1131, | |
| "eval_samples_per_second": 593.865, | |
| "eval_steps_per_second": 0.771, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 100.0, | |
| "step": 25000, | |
| "total_flos": 1.660761144e+18, | |
| "train_loss": 0.7862733935546875, | |
| "train_runtime": 36507.5594, | |
| "train_samples_per_second": 273.916, | |
| "train_steps_per_second": 0.685 | |
| } | |
| ], | |
| "max_steps": 25000, | |
| "num_train_epochs": 100, | |
| "total_flos": 1.660761144e+18, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |