| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 100.0, |
| "global_step": 25000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 1.0, |
| "eval_loss": 1.2708368301391602, |
| "eval_mean_acc": 0.0, |
| "eval_median_acc": 0.0, |
| "eval_runtime": 29.5811, |
| "eval_samples_per_second": 624.621, |
| "eval_steps_per_second": 0.811, |
| "step": 250 |
| }, |
| { |
| "epoch": 2.0, |
| "learning_rate": 0.000294, |
| "loss": 1.2586, |
| "step": 500 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_loss": 1.240064024925232, |
| "eval_mean_acc": 0.0, |
| "eval_median_acc": 0.0, |
| "eval_runtime": 28.81, |
| "eval_samples_per_second": 641.34, |
| "eval_steps_per_second": 0.833, |
| "step": 500 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_loss": 1.221199631690979, |
| "eval_mean_acc": 0.0, |
| "eval_median_acc": 0.0, |
| "eval_runtime": 28.8341, |
| "eval_samples_per_second": 640.805, |
| "eval_steps_per_second": 0.832, |
| "step": 750 |
| }, |
| { |
| "epoch": 4.0, |
| "learning_rate": 0.00028799999999999995, |
| "loss": 1.1999, |
| "step": 1000 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_loss": 1.1985267400741577, |
| "eval_mean_acc": 0.002639524371016869, |
| "eval_median_acc": 0.0, |
| "eval_runtime": 29.2636, |
| "eval_samples_per_second": 631.399, |
| "eval_steps_per_second": 0.82, |
| "step": 1000 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_loss": 1.1824160814285278, |
| "eval_mean_acc": 0.012239608588524707, |
| "eval_median_acc": 0.0, |
| "eval_runtime": 29.1488, |
| "eval_samples_per_second": 633.885, |
| "eval_steps_per_second": 0.823, |
| "step": 1250 |
| }, |
| { |
| "epoch": 6.0, |
| "learning_rate": 0.00028199999999999997, |
| "loss": 1.1635, |
| "step": 1500 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_loss": 1.1715551614761353, |
| "eval_mean_acc": 0.030072790065877857, |
| "eval_median_acc": 0.0, |
| "eval_runtime": 29.3036, |
| "eval_samples_per_second": 630.536, |
| "eval_steps_per_second": 0.819, |
| "step": 1500 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_loss": 1.156156301498413, |
| "eval_mean_acc": 0.029585336742049613, |
| "eval_median_acc": 0.0, |
| "eval_runtime": 29.2009, |
| "eval_samples_per_second": 632.755, |
| "eval_steps_per_second": 0.822, |
| "step": 1750 |
| }, |
| { |
| "epoch": 8.0, |
| "learning_rate": 0.000276, |
| "loss": 1.1361, |
| "step": 2000 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_loss": 1.1506972312927246, |
| "eval_mean_acc": 0.008759450723621017, |
| "eval_median_acc": 0.0, |
| "eval_runtime": 29.1539, |
| "eval_samples_per_second": 633.775, |
| "eval_steps_per_second": 0.823, |
| "step": 2000 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_loss": 1.1393311023712158, |
| "eval_mean_acc": 0.06489605427376179, |
| "eval_median_acc": 0.0, |
| "eval_runtime": 29.65, |
| "eval_samples_per_second": 623.17, |
| "eval_steps_per_second": 0.809, |
| "step": 2250 |
| }, |
| { |
| "epoch": 10.0, |
| "learning_rate": 0.00027, |
| "loss": 1.1142, |
| "step": 2500 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_loss": 1.1311384439468384, |
| "eval_mean_acc": 0.015000378543482608, |
| "eval_median_acc": 0.0, |
| "eval_runtime": 29.295, |
| "eval_samples_per_second": 630.723, |
| "eval_steps_per_second": 0.819, |
| "step": 2500 |
| }, |
| { |
| "epoch": 11.0, |
| "eval_loss": 1.0849117040634155, |
| "eval_mean_acc": 0.053357515682597535, |
| "eval_median_acc": 0.0, |
| "eval_runtime": 29.2649, |
| "eval_samples_per_second": 631.37, |
| "eval_steps_per_second": 0.82, |
| "step": 2750 |
| }, |
| { |
| "epoch": 12.0, |
| "learning_rate": 0.00026399999999999997, |
| "loss": 1.0648, |
| "step": 3000 |
| }, |
| { |
| "epoch": 12.0, |
| "eval_loss": 1.0643727779388428, |
| "eval_mean_acc": 0.09116804447578762, |
| "eval_median_acc": 0.0, |
| "eval_runtime": 29.1753, |
| "eval_samples_per_second": 633.309, |
| "eval_steps_per_second": 0.823, |
| "step": 3000 |
| }, |
| { |
| "epoch": 13.0, |
| "eval_loss": 1.0415236949920654, |
| "eval_mean_acc": 0.1348069252298496, |
| "eval_median_acc": 0.0, |
| "eval_runtime": 29.2028, |
| "eval_samples_per_second": 632.713, |
| "eval_steps_per_second": 0.822, |
| "step": 3250 |
| }, |
| { |
| "epoch": 14.0, |
| "learning_rate": 0.000258, |
| "loss": 1.0185, |
| "step": 3500 |
| }, |
| { |
| "epoch": 14.0, |
| "eval_loss": 1.0389618873596191, |
| "eval_mean_acc": 0.0442671721053236, |
| "eval_median_acc": 0.0, |
| "eval_runtime": 29.3877, |
| "eval_samples_per_second": 628.732, |
| "eval_steps_per_second": 0.817, |
| "step": 3500 |
| }, |
| { |
| "epoch": 15.0, |
| "eval_loss": 1.0213755369186401, |
| "eval_mean_acc": 0.11202564075823995, |
| "eval_median_acc": 0.0, |
| "eval_runtime": 29.1898, |
| "eval_samples_per_second": 632.996, |
| "eval_steps_per_second": 0.822, |
| "step": 3750 |
| }, |
| { |
| "epoch": 16.0, |
| "learning_rate": 0.00025199999999999995, |
| "loss": 0.9951, |
| "step": 4000 |
| }, |
| { |
| "epoch": 16.0, |
| "eval_loss": 1.015223741531372, |
| "eval_mean_acc": 0.16473584913990302, |
| "eval_median_acc": 0.0, |
| "eval_runtime": 29.2399, |
| "eval_samples_per_second": 631.91, |
| "eval_steps_per_second": 0.821, |
| "step": 4000 |
| }, |
| { |
| "epoch": 17.0, |
| "eval_loss": 1.019250750541687, |
| "eval_mean_acc": 0.11940677048185683, |
| "eval_median_acc": 0.0, |
| "eval_runtime": 29.2372, |
| "eval_samples_per_second": 631.97, |
| "eval_steps_per_second": 0.821, |
| "step": 4250 |
| }, |
| { |
| "epoch": 18.0, |
| "learning_rate": 0.00024599999999999996, |
| "loss": 0.9813, |
| "step": 4500 |
| }, |
| { |
| "epoch": 18.0, |
| "eval_loss": 1.00924813747406, |
| "eval_mean_acc": 0.11822115362550029, |
| "eval_median_acc": 0.0, |
| "eval_runtime": 29.1553, |
| "eval_samples_per_second": 633.744, |
| "eval_steps_per_second": 0.823, |
| "step": 4500 |
| }, |
| { |
| "epoch": 19.0, |
| "eval_loss": 1.0164929628372192, |
| "eval_mean_acc": 0.06716894444980748, |
| "eval_median_acc": 0.0, |
| "eval_runtime": 29.1521, |
| "eval_samples_per_second": 633.813, |
| "eval_steps_per_second": 0.823, |
| "step": 4750 |
| }, |
| { |
| "epoch": 20.0, |
| "learning_rate": 0.00023999999999999998, |
| "loss": 0.9625, |
| "step": 5000 |
| }, |
| { |
| "epoch": 20.0, |
| "eval_loss": 0.9429653286933899, |
| "eval_mean_acc": 9.008243218749312, |
| "eval_median_acc": 0.0, |
| "eval_runtime": 30.0347, |
| "eval_samples_per_second": 615.187, |
| "eval_steps_per_second": 0.799, |
| "step": 5000 |
| }, |
| { |
| "epoch": 21.0, |
| "eval_loss": 0.9300616979598999, |
| "eval_mean_acc": 13.245211581468084, |
| "eval_median_acc": 0.0, |
| "eval_runtime": 30.3256, |
| "eval_samples_per_second": 609.287, |
| "eval_steps_per_second": 0.791, |
| "step": 5250 |
| }, |
| { |
| "epoch": 22.0, |
| "learning_rate": 0.000234, |
| "loss": 0.8958, |
| "step": 5500 |
| }, |
| { |
| "epoch": 22.0, |
| "eval_loss": 0.9260903596878052, |
| "eval_mean_acc": 8.987847368165264, |
| "eval_median_acc": 0.0, |
| "eval_runtime": 30.0974, |
| "eval_samples_per_second": 613.906, |
| "eval_steps_per_second": 0.797, |
| "step": 5500 |
| }, |
| { |
| "epoch": 23.0, |
| "eval_loss": 0.9173910617828369, |
| "eval_mean_acc": 15.36412671561701, |
| "eval_median_acc": 0.0, |
| "eval_runtime": 30.1139, |
| "eval_samples_per_second": 613.57, |
| "eval_steps_per_second": 0.797, |
| "step": 5750 |
| }, |
| { |
| "epoch": 24.0, |
| "learning_rate": 0.00022799999999999999, |
| "loss": 0.8756, |
| "step": 6000 |
| }, |
| { |
| "epoch": 24.0, |
| "eval_loss": 0.9115529656410217, |
| "eval_mean_acc": 14.901058980647452, |
| "eval_median_acc": 0.0, |
| "eval_runtime": 30.2491, |
| "eval_samples_per_second": 610.828, |
| "eval_steps_per_second": 0.793, |
| "step": 6000 |
| }, |
| { |
| "epoch": 25.0, |
| "eval_loss": 0.9130357503890991, |
| "eval_mean_acc": 12.268143458883413, |
| "eval_median_acc": 0.0, |
| "eval_runtime": 30.1967, |
| "eval_samples_per_second": 611.888, |
| "eval_steps_per_second": 0.795, |
| "step": 6250 |
| }, |
| { |
| "epoch": 26.0, |
| "learning_rate": 0.00022199999999999998, |
| "loss": 0.8607, |
| "step": 6500 |
| }, |
| { |
| "epoch": 26.0, |
| "eval_loss": 0.9113653898239136, |
| "eval_mean_acc": 15.28737721209223, |
| "eval_median_acc": 0.0, |
| "eval_runtime": 30.4015, |
| "eval_samples_per_second": 607.766, |
| "eval_steps_per_second": 0.789, |
| "step": 6500 |
| }, |
| { |
| "epoch": 27.0, |
| "eval_loss": 0.9104825854301453, |
| "eval_mean_acc": 24.120487175005213, |
| "eval_median_acc": 0.0, |
| "eval_runtime": 30.7064, |
| "eval_samples_per_second": 601.731, |
| "eval_steps_per_second": 0.782, |
| "step": 6750 |
| }, |
| { |
| "epoch": 28.0, |
| "learning_rate": 0.00021599999999999996, |
| "loss": 0.8482, |
| "step": 7000 |
| }, |
| { |
| "epoch": 28.0, |
| "eval_loss": 0.9082564115524292, |
| "eval_mean_acc": 18.66072430390773, |
| "eval_median_acc": 0.0, |
| "eval_runtime": 30.4946, |
| "eval_samples_per_second": 605.911, |
| "eval_steps_per_second": 0.787, |
| "step": 7000 |
| }, |
| { |
| "epoch": 29.0, |
| "eval_loss": 0.9196337461471558, |
| "eval_mean_acc": 17.72464537190866, |
| "eval_median_acc": 0.0, |
| "eval_runtime": 30.2701, |
| "eval_samples_per_second": 610.404, |
| "eval_steps_per_second": 0.793, |
| "step": 7250 |
| }, |
| { |
| "epoch": 30.0, |
| "learning_rate": 0.00020999999999999998, |
| "loss": 0.8359, |
| "step": 7500 |
| }, |
| { |
| "epoch": 30.0, |
| "eval_loss": 0.9148876667022705, |
| "eval_mean_acc": 19.7733289435757, |
| "eval_median_acc": 0.0, |
| "eval_runtime": 30.705, |
| "eval_samples_per_second": 601.759, |
| "eval_steps_per_second": 0.782, |
| "step": 7500 |
| }, |
| { |
| "epoch": 31.0, |
| "eval_loss": 0.9133378863334656, |
| "eval_mean_acc": 18.62968067275681, |
| "eval_median_acc": 0.0, |
| "eval_runtime": 30.3878, |
| "eval_samples_per_second": 608.039, |
| "eval_steps_per_second": 0.79, |
| "step": 7750 |
| }, |
| { |
| "epoch": 32.0, |
| "learning_rate": 0.000204, |
| "loss": 0.8232, |
| "step": 8000 |
| }, |
| { |
| "epoch": 32.0, |
| "eval_loss": 0.9479327201843262, |
| "eval_mean_acc": 12.27033306041223, |
| "eval_median_acc": 0.0, |
| "eval_runtime": 30.187, |
| "eval_samples_per_second": 612.085, |
| "eval_steps_per_second": 0.795, |
| "step": 8000 |
| }, |
| { |
| "epoch": 33.0, |
| "eval_loss": 0.930338978767395, |
| "eval_mean_acc": 19.904256184480708, |
| "eval_median_acc": 0.0, |
| "eval_runtime": 30.5106, |
| "eval_samples_per_second": 605.593, |
| "eval_steps_per_second": 0.787, |
| "step": 8250 |
| }, |
| { |
| "epoch": 34.0, |
| "learning_rate": 0.000198, |
| "loss": 0.8092, |
| "step": 8500 |
| }, |
| { |
| "epoch": 34.0, |
| "eval_loss": 0.9299731254577637, |
| "eval_mean_acc": 22.351005658701947, |
| "eval_median_acc": 0.0, |
| "eval_runtime": 30.5242, |
| "eval_samples_per_second": 605.324, |
| "eval_steps_per_second": 0.786, |
| "step": 8500 |
| }, |
| { |
| "epoch": 35.0, |
| "eval_loss": 0.9295333027839661, |
| "eval_mean_acc": 27.811848359305156, |
| "eval_median_acc": 0.0, |
| "eval_runtime": 30.6542, |
| "eval_samples_per_second": 602.756, |
| "eval_steps_per_second": 0.783, |
| "step": 8750 |
| }, |
| { |
| "epoch": 36.0, |
| "learning_rate": 0.00019199999999999998, |
| "loss": 0.7951, |
| "step": 9000 |
| }, |
| { |
| "epoch": 36.0, |
| "eval_loss": 0.9439055323600769, |
| "eval_mean_acc": 23.296268042588853, |
| "eval_median_acc": 0.0, |
| "eval_runtime": 30.5922, |
| "eval_samples_per_second": 603.977, |
| "eval_steps_per_second": 0.785, |
| "step": 9000 |
| }, |
| { |
| "epoch": 37.0, |
| "eval_loss": 0.962045431137085, |
| "eval_mean_acc": 20.907431263561396, |
| "eval_median_acc": 0.0, |
| "eval_runtime": 30.3915, |
| "eval_samples_per_second": 607.966, |
| "eval_steps_per_second": 0.79, |
| "step": 9250 |
| }, |
| { |
| "epoch": 38.0, |
| "learning_rate": 0.000186, |
| "loss": 0.7803, |
| "step": 9500 |
| }, |
| { |
| "epoch": 38.0, |
| "eval_loss": 0.9570873975753784, |
| "eval_mean_acc": 28.782833553923417, |
| "eval_median_acc": 52.30263157894737, |
| "eval_runtime": 30.7856, |
| "eval_samples_per_second": 600.183, |
| "eval_steps_per_second": 0.78, |
| "step": 9500 |
| }, |
| { |
| "epoch": 39.0, |
| "eval_loss": 0.9814175367355347, |
| "eval_mean_acc": 25.267935353876744, |
| "eval_median_acc": 0.0, |
| "eval_runtime": 30.4662, |
| "eval_samples_per_second": 606.475, |
| "eval_steps_per_second": 0.788, |
| "step": 9750 |
| }, |
| { |
| "epoch": 40.0, |
| "learning_rate": 0.00017999999999999998, |
| "loss": 0.7669, |
| "step": 10000 |
| }, |
| { |
| "epoch": 40.0, |
| "eval_loss": 0.9786842465400696, |
| "eval_mean_acc": 31.28196134808705, |
| "eval_median_acc": 53.38645418326693, |
| "eval_runtime": 30.754, |
| "eval_samples_per_second": 600.8, |
| "eval_steps_per_second": 0.78, |
| "step": 10000 |
| }, |
| { |
| "epoch": 41.0, |
| "eval_loss": 0.9765278100967407, |
| "eval_mean_acc": 28.633316896351385, |
| "eval_median_acc": 52.20338983050847, |
| "eval_runtime": 30.6096, |
| "eval_samples_per_second": 603.634, |
| "eval_steps_per_second": 0.784, |
| "step": 10250 |
| }, |
| { |
| "epoch": 42.0, |
| "learning_rate": 0.00017399999999999997, |
| "loss": 0.7529, |
| "step": 10500 |
| }, |
| { |
| "epoch": 42.0, |
| "eval_loss": 1.0037761926651, |
| "eval_mean_acc": 27.51536558563458, |
| "eval_median_acc": 0.0, |
| "eval_runtime": 30.6294, |
| "eval_samples_per_second": 603.244, |
| "eval_steps_per_second": 0.784, |
| "step": 10500 |
| }, |
| { |
| "epoch": 43.0, |
| "eval_loss": 1.033768892288208, |
| "eval_mean_acc": 28.494027960898663, |
| "eval_median_acc": 52.13675213675214, |
| "eval_runtime": 30.6823, |
| "eval_samples_per_second": 602.204, |
| "eval_steps_per_second": 0.782, |
| "step": 10750 |
| }, |
| { |
| "epoch": 44.0, |
| "learning_rate": 0.000168, |
| "loss": 0.7411, |
| "step": 11000 |
| }, |
| { |
| "epoch": 44.0, |
| "eval_loss": 1.0279306173324585, |
| "eval_mean_acc": 28.72059143849434, |
| "eval_median_acc": 52.27817745803357, |
| "eval_runtime": 30.711, |
| "eval_samples_per_second": 601.642, |
| "eval_steps_per_second": 0.781, |
| "step": 11000 |
| }, |
| { |
| "epoch": 45.0, |
| "eval_loss": 1.0176538228988647, |
| "eval_mean_acc": 29.058336469348834, |
| "eval_median_acc": 52.41157556270096, |
| "eval_runtime": 30.72, |
| "eval_samples_per_second": 601.464, |
| "eval_steps_per_second": 0.781, |
| "step": 11250 |
| }, |
| { |
| "epoch": 46.0, |
| "learning_rate": 0.000162, |
| "loss": 0.7299, |
| "step": 11500 |
| }, |
| { |
| "epoch": 46.0, |
| "eval_loss": 1.0147888660430908, |
| "eval_mean_acc": 32.92298407705084, |
| "eval_median_acc": 53.6, |
| "eval_runtime": 30.8969, |
| "eval_samples_per_second": 598.021, |
| "eval_steps_per_second": 0.777, |
| "step": 11500 |
| }, |
| { |
| "epoch": 47.0, |
| "eval_loss": 1.0400363206863403, |
| "eval_mean_acc": 33.51743615357999, |
| "eval_median_acc": 53.813559322033896, |
| "eval_runtime": 31.0611, |
| "eval_samples_per_second": 594.86, |
| "eval_steps_per_second": 0.773, |
| "step": 11750 |
| }, |
| { |
| "epoch": 48.0, |
| "learning_rate": 0.000156, |
| "loss": 0.7198, |
| "step": 12000 |
| }, |
| { |
| "epoch": 48.0, |
| "eval_loss": 1.0477961301803589, |
| "eval_mean_acc": 30.930946796462933, |
| "eval_median_acc": 53.25443786982249, |
| "eval_runtime": 31.0217, |
| "eval_samples_per_second": 595.615, |
| "eval_steps_per_second": 0.774, |
| "step": 12000 |
| }, |
| { |
| "epoch": 49.0, |
| "eval_loss": 1.0538278818130493, |
| "eval_mean_acc": 29.24450853094501, |
| "eval_median_acc": 52.569169960474305, |
| "eval_runtime": 30.7608, |
| "eval_samples_per_second": 600.668, |
| "eval_steps_per_second": 0.78, |
| "step": 12250 |
| }, |
| { |
| "epoch": 50.0, |
| "learning_rate": 0.00015, |
| "loss": 0.7109, |
| "step": 12500 |
| }, |
| { |
| "epoch": 50.0, |
| "eval_loss": 1.0524476766586304, |
| "eval_mean_acc": 27.231249267136203, |
| "eval_median_acc": 0.0, |
| "eval_runtime": 30.6942, |
| "eval_samples_per_second": 601.971, |
| "eval_steps_per_second": 0.782, |
| "step": 12500 |
| }, |
| { |
| "epoch": 51.0, |
| "eval_loss": 1.0861831903457642, |
| "eval_mean_acc": 33.350417690919826, |
| "eval_median_acc": 53.72340425531915, |
| "eval_runtime": 31.0377, |
| "eval_samples_per_second": 595.308, |
| "eval_steps_per_second": 0.773, |
| "step": 12750 |
| }, |
| { |
| "epoch": 52.0, |
| "learning_rate": 0.00014399999999999998, |
| "loss": 0.7036, |
| "step": 13000 |
| }, |
| { |
| "epoch": 52.0, |
| "eval_loss": 1.074357271194458, |
| "eval_mean_acc": 31.877880928875545, |
| "eval_median_acc": 53.36787564766839, |
| "eval_runtime": 30.8278, |
| "eval_samples_per_second": 599.361, |
| "eval_steps_per_second": 0.779, |
| "step": 13000 |
| }, |
| { |
| "epoch": 53.0, |
| "eval_loss": 1.062804937362671, |
| "eval_mean_acc": 28.372776202894872, |
| "eval_median_acc": 51.71232876712328, |
| "eval_runtime": 30.6841, |
| "eval_samples_per_second": 602.169, |
| "eval_steps_per_second": 0.782, |
| "step": 13250 |
| }, |
| { |
| "epoch": 54.0, |
| "learning_rate": 0.000138, |
| "loss": 0.6963, |
| "step": 13500 |
| }, |
| { |
| "epoch": 54.0, |
| "eval_loss": 1.0586843490600586, |
| "eval_mean_acc": 30.98220246074368, |
| "eval_median_acc": 53.125, |
| "eval_runtime": 31.0347, |
| "eval_samples_per_second": 595.366, |
| "eval_steps_per_second": 0.773, |
| "step": 13500 |
| }, |
| { |
| "epoch": 55.0, |
| "eval_loss": 1.083398699760437, |
| "eval_mean_acc": 33.21303099917168, |
| "eval_median_acc": 53.57142857142857, |
| "eval_runtime": 30.8467, |
| "eval_samples_per_second": 598.994, |
| "eval_steps_per_second": 0.778, |
| "step": 13750 |
| }, |
| { |
| "epoch": 56.0, |
| "learning_rate": 0.00013199999999999998, |
| "loss": 0.69, |
| "step": 14000 |
| }, |
| { |
| "epoch": 56.0, |
| "eval_loss": 1.1077489852905273, |
| "eval_mean_acc": 36.38462165724688, |
| "eval_median_acc": 54.285714285714285, |
| "eval_runtime": 31.0446, |
| "eval_samples_per_second": 595.175, |
| "eval_steps_per_second": 0.773, |
| "step": 14000 |
| }, |
| { |
| "epoch": 57.0, |
| "eval_loss": 1.114971399307251, |
| "eval_mean_acc": 32.35859059650532, |
| "eval_median_acc": 53.49544072948328, |
| "eval_runtime": 30.8614, |
| "eval_samples_per_second": 598.708, |
| "eval_steps_per_second": 0.778, |
| "step": 14250 |
| }, |
| { |
| "epoch": 58.0, |
| "learning_rate": 0.00012599999999999997, |
| "loss": 0.6855, |
| "step": 14500 |
| }, |
| { |
| "epoch": 58.0, |
| "eval_loss": 1.1352181434631348, |
| "eval_mean_acc": 36.9014347424795, |
| "eval_median_acc": 54.406130268199234, |
| "eval_runtime": 31.0913, |
| "eval_samples_per_second": 594.282, |
| "eval_steps_per_second": 0.772, |
| "step": 14500 |
| }, |
| { |
| "epoch": 59.0, |
| "eval_loss": 1.1556544303894043, |
| "eval_mean_acc": 33.88705951883505, |
| "eval_median_acc": 53.84615384615385, |
| "eval_runtime": 30.8633, |
| "eval_samples_per_second": 598.672, |
| "eval_steps_per_second": 0.778, |
| "step": 14750 |
| }, |
| { |
| "epoch": 60.0, |
| "learning_rate": 0.00011999999999999999, |
| "loss": 0.6811, |
| "step": 15000 |
| }, |
| { |
| "epoch": 60.0, |
| "eval_loss": 1.1314884424209595, |
| "eval_mean_acc": 33.595877918258616, |
| "eval_median_acc": 53.77358490566038, |
| "eval_runtime": 31.0682, |
| "eval_samples_per_second": 594.724, |
| "eval_steps_per_second": 0.772, |
| "step": 15000 |
| }, |
| { |
| "epoch": 61.0, |
| "eval_loss": 1.0957316160202026, |
| "eval_mean_acc": 32.99076589659652, |
| "eval_median_acc": 53.57142857142857, |
| "eval_runtime": 30.909, |
| "eval_samples_per_second": 597.788, |
| "eval_steps_per_second": 0.776, |
| "step": 15250 |
| }, |
| { |
| "epoch": 62.0, |
| "learning_rate": 0.00011399999999999999, |
| "loss": 0.6768, |
| "step": 15500 |
| }, |
| { |
| "epoch": 62.0, |
| "eval_loss": 1.1236425638198853, |
| "eval_mean_acc": 32.76527042786048, |
| "eval_median_acc": 53.61216730038023, |
| "eval_runtime": 31.1533, |
| "eval_samples_per_second": 593.099, |
| "eval_steps_per_second": 0.77, |
| "step": 15500 |
| }, |
| { |
| "epoch": 63.0, |
| "eval_loss": 1.1152857542037964, |
| "eval_mean_acc": 34.82832308606056, |
| "eval_median_acc": 53.94321766561514, |
| "eval_runtime": 31.0538, |
| "eval_samples_per_second": 595.001, |
| "eval_steps_per_second": 0.773, |
| "step": 15750 |
| }, |
| { |
| "epoch": 64.0, |
| "learning_rate": 0.00010799999999999998, |
| "loss": 0.6722, |
| "step": 16000 |
| }, |
| { |
| "epoch": 64.0, |
| "eval_loss": 1.1300369501113892, |
| "eval_mean_acc": 35.01100739222209, |
| "eval_median_acc": 53.96825396825397, |
| "eval_runtime": 30.8553, |
| "eval_samples_per_second": 598.827, |
| "eval_steps_per_second": 0.778, |
| "step": 16000 |
| }, |
| { |
| "epoch": 65.0, |
| "eval_loss": 1.1825590133666992, |
| "eval_mean_acc": 35.92286498667092, |
| "eval_median_acc": 54.146341463414636, |
| "eval_runtime": 30.8364, |
| "eval_samples_per_second": 599.194, |
| "eval_steps_per_second": 0.778, |
| "step": 16250 |
| }, |
| { |
| "epoch": 66.0, |
| "learning_rate": 0.000102, |
| "loss": 0.6682, |
| "step": 16500 |
| }, |
| { |
| "epoch": 66.0, |
| "eval_loss": 1.153441309928894, |
| "eval_mean_acc": 38.55105344645134, |
| "eval_median_acc": 54.48504983388705, |
| "eval_runtime": 31.0294, |
| "eval_samples_per_second": 595.468, |
| "eval_steps_per_second": 0.773, |
| "step": 16500 |
| }, |
| { |
| "epoch": 67.0, |
| "eval_loss": 1.1635504961013794, |
| "eval_mean_acc": 35.828909444304365, |
| "eval_median_acc": 54.09836065573771, |
| "eval_runtime": 30.7671, |
| "eval_samples_per_second": 600.544, |
| "eval_steps_per_second": 0.78, |
| "step": 16750 |
| }, |
| { |
| "epoch": 68.0, |
| "learning_rate": 9.599999999999999e-05, |
| "loss": 0.6653, |
| "step": 17000 |
| }, |
| { |
| "epoch": 68.0, |
| "eval_loss": 1.1404204368591309, |
| "eval_mean_acc": 34.65428003659497, |
| "eval_median_acc": 53.84615384615385, |
| "eval_runtime": 30.8901, |
| "eval_samples_per_second": 598.152, |
| "eval_steps_per_second": 0.777, |
| "step": 17000 |
| }, |
| { |
| "epoch": 69.0, |
| "eval_loss": 1.1473366022109985, |
| "eval_mean_acc": 36.24934966791388, |
| "eval_median_acc": 54.12087912087912, |
| "eval_runtime": 31.0971, |
| "eval_samples_per_second": 594.172, |
| "eval_steps_per_second": 0.772, |
| "step": 17250 |
| }, |
| { |
| "epoch": 70.0, |
| "learning_rate": 8.999999999999999e-05, |
| "loss": 0.6624, |
| "step": 17500 |
| }, |
| { |
| "epoch": 70.0, |
| "eval_loss": 1.1532074213027954, |
| "eval_mean_acc": 39.51401788696222, |
| "eval_median_acc": 54.5774647887324, |
| "eval_runtime": 31.0363, |
| "eval_samples_per_second": 595.334, |
| "eval_steps_per_second": 0.773, |
| "step": 17500 |
| }, |
| { |
| "epoch": 71.0, |
| "eval_loss": 1.1715244054794312, |
| "eval_mean_acc": 36.23543546260951, |
| "eval_median_acc": 54.12541254125413, |
| "eval_runtime": 30.897, |
| "eval_samples_per_second": 598.02, |
| "eval_steps_per_second": 0.777, |
| "step": 17750 |
| }, |
| { |
| "epoch": 72.0, |
| "learning_rate": 8.4e-05, |
| "loss": 0.6597, |
| "step": 18000 |
| }, |
| { |
| "epoch": 72.0, |
| "eval_loss": 1.187477469444275, |
| "eval_mean_acc": 35.50233390601532, |
| "eval_median_acc": 54.08560311284047, |
| "eval_runtime": 30.998, |
| "eval_samples_per_second": 596.07, |
| "eval_steps_per_second": 0.774, |
| "step": 18000 |
| }, |
| { |
| "epoch": 73.0, |
| "eval_loss": 1.164323329925537, |
| "eval_mean_acc": 34.50470237269365, |
| "eval_median_acc": 53.90243902439025, |
| "eval_runtime": 30.8465, |
| "eval_samples_per_second": 598.997, |
| "eval_steps_per_second": 0.778, |
| "step": 18250 |
| }, |
| { |
| "epoch": 74.0, |
| "learning_rate": 7.8e-05, |
| "loss": 0.657, |
| "step": 18500 |
| }, |
| { |
| "epoch": 74.0, |
| "eval_loss": 1.1893519163131714, |
| "eval_mean_acc": 38.75609974678352, |
| "eval_median_acc": 54.492753623188406, |
| "eval_runtime": 31.2805, |
| "eval_samples_per_second": 590.688, |
| "eval_steps_per_second": 0.767, |
| "step": 18500 |
| }, |
| { |
| "epoch": 75.0, |
| "eval_loss": 1.208187222480774, |
| "eval_mean_acc": 38.215720244506755, |
| "eval_median_acc": 54.460093896713616, |
| "eval_runtime": 31.3374, |
| "eval_samples_per_second": 589.615, |
| "eval_steps_per_second": 0.766, |
| "step": 18750 |
| }, |
| { |
| "epoch": 76.0, |
| "learning_rate": 7.199999999999999e-05, |
| "loss": 0.6543, |
| "step": 19000 |
| }, |
| { |
| "epoch": 76.0, |
| "eval_loss": 1.1842811107635498, |
| "eval_mean_acc": 34.28566002554328, |
| "eval_median_acc": 53.883495145631066, |
| "eval_runtime": 30.9371, |
| "eval_samples_per_second": 597.244, |
| "eval_steps_per_second": 0.776, |
| "step": 19000 |
| }, |
| { |
| "epoch": 77.0, |
| "eval_loss": 1.1689387559890747, |
| "eval_mean_acc": 38.42640276827011, |
| "eval_median_acc": 54.43548387096774, |
| "eval_runtime": 31.0373, |
| "eval_samples_per_second": 595.316, |
| "eval_steps_per_second": 0.773, |
| "step": 19250 |
| }, |
| { |
| "epoch": 78.0, |
| "learning_rate": 6.599999999999999e-05, |
| "loss": 0.652, |
| "step": 19500 |
| }, |
| { |
| "epoch": 78.0, |
| "eval_loss": 1.2084593772888184, |
| "eval_mean_acc": 37.749512857893784, |
| "eval_median_acc": 54.37499999999999, |
| "eval_runtime": 31.1448, |
| "eval_samples_per_second": 593.261, |
| "eval_steps_per_second": 0.771, |
| "step": 19500 |
| }, |
| { |
| "epoch": 79.0, |
| "eval_loss": 1.1984684467315674, |
| "eval_mean_acc": 39.14441481832044, |
| "eval_median_acc": 54.518950437317784, |
| "eval_runtime": 30.9307, |
| "eval_samples_per_second": 597.368, |
| "eval_steps_per_second": 0.776, |
| "step": 19750 |
| }, |
| { |
| "epoch": 80.0, |
| "learning_rate": 5.9999999999999995e-05, |
| "loss": 0.6497, |
| "step": 20000 |
| }, |
| { |
| "epoch": 80.0, |
| "eval_loss": 1.2331078052520752, |
| "eval_mean_acc": 40.01896688672192, |
| "eval_median_acc": 54.61254612546126, |
| "eval_runtime": 31.1205, |
| "eval_samples_per_second": 593.724, |
| "eval_steps_per_second": 0.771, |
| "step": 20000 |
| }, |
| { |
| "epoch": 81.0, |
| "eval_loss": 1.2402710914611816, |
| "eval_mean_acc": 39.05107714371045, |
| "eval_median_acc": 54.52054794520548, |
| "eval_runtime": 30.8964, |
| "eval_samples_per_second": 598.031, |
| "eval_steps_per_second": 0.777, |
| "step": 20250 |
| }, |
| { |
| "epoch": 82.0, |
| "learning_rate": 5.399999999999999e-05, |
| "loss": 0.6476, |
| "step": 20500 |
| }, |
| { |
| "epoch": 82.0, |
| "eval_loss": 1.1909747123718262, |
| "eval_mean_acc": 37.29363801013909, |
| "eval_median_acc": 54.24528301886793, |
| "eval_runtime": 31.0069, |
| "eval_samples_per_second": 595.9, |
| "eval_steps_per_second": 0.774, |
| "step": 20500 |
| }, |
| { |
| "epoch": 83.0, |
| "eval_loss": 1.2035155296325684, |
| "eval_mean_acc": 41.25721916721249, |
| "eval_median_acc": 54.7244094488189, |
| "eval_runtime": 31.1934, |
| "eval_samples_per_second": 592.336, |
| "eval_steps_per_second": 0.769, |
| "step": 20750 |
| }, |
| { |
| "epoch": 84.0, |
| "learning_rate": 4.7999999999999994e-05, |
| "loss": 0.6457, |
| "step": 21000 |
| }, |
| { |
| "epoch": 84.0, |
| "eval_loss": 1.2123523950576782, |
| "eval_mean_acc": 38.7564566539536, |
| "eval_median_acc": 54.4891640866873, |
| "eval_runtime": 31.1487, |
| "eval_samples_per_second": 593.187, |
| "eval_steps_per_second": 0.77, |
| "step": 21000 |
| }, |
| { |
| "epoch": 85.0, |
| "eval_loss": 1.232680320739746, |
| "eval_mean_acc": 39.12864363006366, |
| "eval_median_acc": 54.495912806539515, |
| "eval_runtime": 30.8988, |
| "eval_samples_per_second": 597.985, |
| "eval_steps_per_second": 0.777, |
| "step": 21250 |
| }, |
| { |
| "epoch": 86.0, |
| "learning_rate": 4.2e-05, |
| "loss": 0.6437, |
| "step": 21500 |
| }, |
| { |
| "epoch": 86.0, |
| "eval_loss": 1.22517991065979, |
| "eval_mean_acc": 39.8797101803931, |
| "eval_median_acc": 54.5774647887324, |
| "eval_runtime": 31.1149, |
| "eval_samples_per_second": 593.83, |
| "eval_steps_per_second": 0.771, |
| "step": 21500 |
| }, |
| { |
| "epoch": 87.0, |
| "eval_loss": 1.2346075773239136, |
| "eval_mean_acc": 38.11276454520886, |
| "eval_median_acc": 54.385964912280706, |
| "eval_runtime": 31.1511, |
| "eval_samples_per_second": 593.142, |
| "eval_steps_per_second": 0.77, |
| "step": 21750 |
| }, |
| { |
| "epoch": 88.0, |
| "learning_rate": 3.5999999999999994e-05, |
| "loss": 0.642, |
| "step": 22000 |
| }, |
| { |
| "epoch": 88.0, |
| "eval_loss": 1.244125247001648, |
| "eval_mean_acc": 40.43582155286865, |
| "eval_median_acc": 54.65116279069767, |
| "eval_runtime": 31.1673, |
| "eval_samples_per_second": 592.833, |
| "eval_steps_per_second": 0.77, |
| "step": 22000 |
| }, |
| { |
| "epoch": 89.0, |
| "eval_loss": 1.24880850315094, |
| "eval_mean_acc": 39.99124281027319, |
| "eval_median_acc": 54.59770114942529, |
| "eval_runtime": 31.0818, |
| "eval_samples_per_second": 594.463, |
| "eval_steps_per_second": 0.772, |
| "step": 22250 |
| }, |
| { |
| "epoch": 90.0, |
| "learning_rate": 2.9999999999999997e-05, |
| "loss": 0.6403, |
| "step": 22500 |
| }, |
| { |
| "epoch": 90.0, |
| "eval_loss": 1.2482763528823853, |
| "eval_mean_acc": 39.91157811070793, |
| "eval_median_acc": 54.60526315789473, |
| "eval_runtime": 31.194, |
| "eval_samples_per_second": 592.325, |
| "eval_steps_per_second": 0.769, |
| "step": 22500 |
| }, |
| { |
| "epoch": 91.0, |
| "eval_loss": 1.2674145698547363, |
| "eval_mean_acc": 40.94577170886235, |
| "eval_median_acc": 54.700854700854705, |
| "eval_runtime": 31.202, |
| "eval_samples_per_second": 592.174, |
| "eval_steps_per_second": 0.769, |
| "step": 22750 |
| }, |
| { |
| "epoch": 92.0, |
| "learning_rate": 2.3999999999999997e-05, |
| "loss": 0.6387, |
| "step": 23000 |
| }, |
| { |
| "epoch": 92.0, |
| "eval_loss": 1.269442081451416, |
| "eval_mean_acc": 40.442322594799656, |
| "eval_median_acc": 54.63576158940398, |
| "eval_runtime": 31.0407, |
| "eval_samples_per_second": 595.25, |
| "eval_steps_per_second": 0.773, |
| "step": 23000 |
| }, |
| { |
| "epoch": 93.0, |
| "eval_loss": 1.2716701030731201, |
| "eval_mean_acc": 40.739734228756824, |
| "eval_median_acc": 54.67625899280576, |
| "eval_runtime": 31.2132, |
| "eval_samples_per_second": 591.961, |
| "eval_steps_per_second": 0.769, |
| "step": 23250 |
| }, |
| { |
| "epoch": 94.0, |
| "learning_rate": 1.7999999999999997e-05, |
| "loss": 0.6371, |
| "step": 23500 |
| }, |
| { |
| "epoch": 94.0, |
| "eval_loss": 1.2819631099700928, |
| "eval_mean_acc": 40.571867011274925, |
| "eval_median_acc": 54.63917525773196, |
| "eval_runtime": 31.0385, |
| "eval_samples_per_second": 595.292, |
| "eval_steps_per_second": 0.773, |
| "step": 23500 |
| }, |
| { |
| "epoch": 95.0, |
| "eval_loss": 1.28830885887146, |
| "eval_mean_acc": 40.353439886436945, |
| "eval_median_acc": 54.666666666666664, |
| "eval_runtime": 31.183, |
| "eval_samples_per_second": 592.534, |
| "eval_steps_per_second": 0.77, |
| "step": 23750 |
| }, |
| { |
| "epoch": 96.0, |
| "learning_rate": 1.1999999999999999e-05, |
| "loss": 0.6358, |
| "step": 24000 |
| }, |
| { |
| "epoch": 96.0, |
| "eval_loss": 1.30391263961792, |
| "eval_mean_acc": 40.60459621916925, |
| "eval_median_acc": 54.666666666666664, |
| "eval_runtime": 31.0782, |
| "eval_samples_per_second": 594.532, |
| "eval_steps_per_second": 0.772, |
| "step": 24000 |
| }, |
| { |
| "epoch": 97.0, |
| "eval_loss": 1.3067735433578491, |
| "eval_mean_acc": 41.452516923874725, |
| "eval_median_acc": 54.773869346733676, |
| "eval_runtime": 30.922, |
| "eval_samples_per_second": 597.536, |
| "eval_steps_per_second": 0.776, |
| "step": 24250 |
| }, |
| { |
| "epoch": 98.0, |
| "learning_rate": 5.999999999999999e-06, |
| "loss": 0.6347, |
| "step": 24500 |
| }, |
| { |
| "epoch": 98.0, |
| "eval_loss": 1.313217282295227, |
| "eval_mean_acc": 41.42466457051602, |
| "eval_median_acc": 54.75409836065573, |
| "eval_runtime": 31.2774, |
| "eval_samples_per_second": 590.746, |
| "eval_steps_per_second": 0.767, |
| "step": 24500 |
| }, |
| { |
| "epoch": 99.0, |
| "eval_loss": 1.3124245405197144, |
| "eval_mean_acc": 41.25630205018876, |
| "eval_median_acc": 54.74452554744526, |
| "eval_runtime": 30.8857, |
| "eval_samples_per_second": 598.238, |
| "eval_steps_per_second": 0.777, |
| "step": 24750 |
| }, |
| { |
| "epoch": 100.0, |
| "learning_rate": 0.0, |
| "loss": 0.6339, |
| "step": 25000 |
| }, |
| { |
| "epoch": 100.0, |
| "eval_loss": 1.3174444437026978, |
| "eval_mean_acc": 41.405191345713106, |
| "eval_median_acc": 54.75578406169666, |
| "eval_runtime": 31.1131, |
| "eval_samples_per_second": 593.865, |
| "eval_steps_per_second": 0.771, |
| "step": 25000 |
| }, |
| { |
| "epoch": 100.0, |
| "step": 25000, |
| "total_flos": 1.660761144e+18, |
| "train_loss": 0.7862733935546875, |
| "train_runtime": 36507.5594, |
| "train_samples_per_second": 273.916, |
| "train_steps_per_second": 0.685 |
| } |
| ], |
| "max_steps": 25000, |
| "num_train_epochs": 100, |
| "total_flos": 1.660761144e+18, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|