| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 9.94413407821229, | |
| "global_step": 890, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 9.887640449438202e-05, | |
| "loss": 1.3963, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "eval_accuracy": 0.33006536960601807, | |
| "eval_loss": 1.3515560626983643, | |
| "eval_runtime": 318.9295, | |
| "eval_samples_per_second": 0.959, | |
| "eval_steps_per_second": 0.241, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 9.775280898876405e-05, | |
| "loss": 1.2981, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "eval_accuracy": 0.30392158031463623, | |
| "eval_loss": 1.2864599227905273, | |
| "eval_runtime": 533.6299, | |
| "eval_samples_per_second": 0.573, | |
| "eval_steps_per_second": 0.144, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 9.662921348314608e-05, | |
| "loss": 1.2614, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "eval_accuracy": 0.6535947918891907, | |
| "eval_loss": 0.9824705719947815, | |
| "eval_runtime": 348.7575, | |
| "eval_samples_per_second": 0.877, | |
| "eval_steps_per_second": 0.221, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 9.550561797752809e-05, | |
| "loss": 0.9419, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "eval_accuracy": 0.49346405267715454, | |
| "eval_loss": 1.0978724956512451, | |
| "eval_runtime": 312.9714, | |
| "eval_samples_per_second": 0.978, | |
| "eval_steps_per_second": 0.246, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 9.438202247191012e-05, | |
| "loss": 0.9813, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "eval_accuracy": 0.5196078419685364, | |
| "eval_loss": 0.9674614667892456, | |
| "eval_runtime": 312.3793, | |
| "eval_samples_per_second": 0.98, | |
| "eval_steps_per_second": 0.246, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 9.325842696629214e-05, | |
| "loss": 0.7973, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "eval_accuracy": 0.5947712659835815, | |
| "eval_loss": 1.0033761262893677, | |
| "eval_runtime": 316.8558, | |
| "eval_samples_per_second": 0.966, | |
| "eval_steps_per_second": 0.243, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 9.213483146067416e-05, | |
| "loss": 0.9575, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "eval_accuracy": 0.5882353186607361, | |
| "eval_loss": 0.8489904403686523, | |
| "eval_runtime": 535.4904, | |
| "eval_samples_per_second": 0.571, | |
| "eval_steps_per_second": 0.144, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 9.112359550561799e-05, | |
| "loss": 0.8752, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "eval_accuracy": 0.6895424723625183, | |
| "eval_loss": 0.7460987567901611, | |
| "eval_runtime": 597.5484, | |
| "eval_samples_per_second": 0.512, | |
| "eval_steps_per_second": 0.129, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 9e-05, | |
| "loss": 0.719, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "eval_accuracy": 0.7973856329917908, | |
| "eval_loss": 0.5500715374946594, | |
| "eval_runtime": 583.2832, | |
| "eval_samples_per_second": 0.525, | |
| "eval_steps_per_second": 0.132, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 8.887640449438202e-05, | |
| "loss": 0.5311, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "eval_accuracy": 0.7973856329917908, | |
| "eval_loss": 0.5389693379402161, | |
| "eval_runtime": 590.4987, | |
| "eval_samples_per_second": 0.518, | |
| "eval_steps_per_second": 0.13, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 8.775280898876405e-05, | |
| "loss": 0.3922, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "eval_accuracy": 0.843137264251709, | |
| "eval_loss": 0.42480573058128357, | |
| "eval_runtime": 597.2704, | |
| "eval_samples_per_second": 0.512, | |
| "eval_steps_per_second": 0.129, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 8.662921348314608e-05, | |
| "loss": 0.3043, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "eval_accuracy": 0.8496732115745544, | |
| "eval_loss": 0.5262107253074646, | |
| "eval_runtime": 618.0607, | |
| "eval_samples_per_second": 0.495, | |
| "eval_steps_per_second": 0.125, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 8.550561797752809e-05, | |
| "loss": 0.7841, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "eval_accuracy": 0.8202614188194275, | |
| "eval_loss": 0.618194580078125, | |
| "eval_runtime": 480.4521, | |
| "eval_samples_per_second": 0.637, | |
| "eval_steps_per_second": 0.16, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 8.438202247191012e-05, | |
| "loss": 0.4881, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "eval_accuracy": 0.8333333134651184, | |
| "eval_loss": 0.4706672728061676, | |
| "eval_runtime": 546.9776, | |
| "eval_samples_per_second": 0.559, | |
| "eval_steps_per_second": 0.141, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 8.325842696629214e-05, | |
| "loss": 0.39, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "eval_accuracy": 0.8594771027565002, | |
| "eval_loss": 0.4261144995689392, | |
| "eval_runtime": 314.3222, | |
| "eval_samples_per_second": 0.974, | |
| "eval_steps_per_second": 0.245, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 8.213483146067417e-05, | |
| "loss": 0.4687, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "eval_accuracy": 0.7745097875595093, | |
| "eval_loss": 0.7588664293289185, | |
| "eval_runtime": 316.9261, | |
| "eval_samples_per_second": 0.966, | |
| "eval_steps_per_second": 0.243, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 8.101123595505618e-05, | |
| "loss": 0.4289, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "eval_accuracy": 0.843137264251709, | |
| "eval_loss": 0.4307919144630432, | |
| "eval_runtime": 299.2563, | |
| "eval_samples_per_second": 1.023, | |
| "eval_steps_per_second": 0.257, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 7.988764044943821e-05, | |
| "loss": 0.4127, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "eval_accuracy": 0.9117646813392639, | |
| "eval_loss": 0.30210039019584656, | |
| "eval_runtime": 293.1021, | |
| "eval_samples_per_second": 1.044, | |
| "eval_steps_per_second": 0.263, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 7.876404494382022e-05, | |
| "loss": 0.3205, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "eval_accuracy": 0.9052287340164185, | |
| "eval_loss": 0.3764496445655823, | |
| "eval_runtime": 626.3698, | |
| "eval_samples_per_second": 0.489, | |
| "eval_steps_per_second": 0.123, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 7.764044943820225e-05, | |
| "loss": 0.302, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "eval_accuracy": 0.8169934749603271, | |
| "eval_loss": 0.6414448618888855, | |
| "eval_runtime": 406.1799, | |
| "eval_samples_per_second": 0.753, | |
| "eval_steps_per_second": 0.19, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 7.651685393258428e-05, | |
| "loss": 0.2767, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "eval_accuracy": 0.8856208920478821, | |
| "eval_loss": 0.3875592350959778, | |
| "eval_runtime": 358.8222, | |
| "eval_samples_per_second": 0.853, | |
| "eval_steps_per_second": 0.215, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 7.53932584269663e-05, | |
| "loss": 0.4107, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "eval_accuracy": 0.8104575276374817, | |
| "eval_loss": 0.6241660118103027, | |
| "eval_runtime": 301.8068, | |
| "eval_samples_per_second": 1.014, | |
| "eval_steps_per_second": 0.255, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "learning_rate": 7.426966292134831e-05, | |
| "loss": 0.4392, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "eval_accuracy": 0.9183006286621094, | |
| "eval_loss": 0.2545139491558075, | |
| "eval_runtime": 315.8591, | |
| "eval_samples_per_second": 0.969, | |
| "eval_steps_per_second": 0.244, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 7.314606741573034e-05, | |
| "loss": 0.2376, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "eval_accuracy": 0.8496732115745544, | |
| "eval_loss": 0.49588432908058167, | |
| "eval_runtime": 304.5879, | |
| "eval_samples_per_second": 1.005, | |
| "eval_steps_per_second": 0.253, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "learning_rate": 7.202247191011237e-05, | |
| "loss": 0.4715, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "eval_accuracy": 0.8235294222831726, | |
| "eval_loss": 0.6127722859382629, | |
| "eval_runtime": 296.1686, | |
| "eval_samples_per_second": 1.033, | |
| "eval_steps_per_second": 0.26, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 7.089887640449438e-05, | |
| "loss": 0.2753, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "eval_accuracy": 0.898692786693573, | |
| "eval_loss": 0.33089637756347656, | |
| "eval_runtime": 318.4459, | |
| "eval_samples_per_second": 0.961, | |
| "eval_steps_per_second": 0.242, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 3.02, | |
| "learning_rate": 6.97752808988764e-05, | |
| "loss": 0.2919, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 3.02, | |
| "eval_accuracy": 0.8921568393707275, | |
| "eval_loss": 0.41311776638031006, | |
| "eval_runtime": 285.5443, | |
| "eval_samples_per_second": 1.072, | |
| "eval_steps_per_second": 0.27, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 3.13, | |
| "learning_rate": 6.865168539325843e-05, | |
| "loss": 0.2222, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 3.13, | |
| "eval_accuracy": 0.898692786693573, | |
| "eval_loss": 0.3918479084968567, | |
| "eval_runtime": 297.6229, | |
| "eval_samples_per_second": 1.028, | |
| "eval_steps_per_second": 0.259, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 3.24, | |
| "learning_rate": 6.752808988764046e-05, | |
| "loss": 0.0371, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 3.24, | |
| "eval_accuracy": 0.9183006286621094, | |
| "eval_loss": 0.28783220052719116, | |
| "eval_runtime": 293.7099, | |
| "eval_samples_per_second": 1.042, | |
| "eval_steps_per_second": 0.262, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 3.35, | |
| "learning_rate": 6.640449438202247e-05, | |
| "loss": 0.0172, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 3.35, | |
| "eval_accuracy": 0.9215686321258545, | |
| "eval_loss": 0.3087099492549896, | |
| "eval_runtime": 317.6569, | |
| "eval_samples_per_second": 0.963, | |
| "eval_steps_per_second": 0.242, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 3.46, | |
| "learning_rate": 6.52808988764045e-05, | |
| "loss": 0.1953, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 3.46, | |
| "eval_accuracy": 0.9183006286621094, | |
| "eval_loss": 0.29489144682884216, | |
| "eval_runtime": 293.4224, | |
| "eval_samples_per_second": 1.043, | |
| "eval_steps_per_second": 0.262, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 3.58, | |
| "learning_rate": 6.415730337078652e-05, | |
| "loss": 0.2093, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 3.58, | |
| "eval_accuracy": 0.8921568393707275, | |
| "eval_loss": 0.38903045654296875, | |
| "eval_runtime": 313.0985, | |
| "eval_samples_per_second": 0.977, | |
| "eval_steps_per_second": 0.246, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 3.69, | |
| "learning_rate": 6.303370786516854e-05, | |
| "loss": 0.1393, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 3.69, | |
| "eval_accuracy": 0.898692786693573, | |
| "eval_loss": 0.34058284759521484, | |
| "eval_runtime": 316.0484, | |
| "eval_samples_per_second": 0.968, | |
| "eval_steps_per_second": 0.244, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 3.8, | |
| "learning_rate": 6.191011235955056e-05, | |
| "loss": 0.0532, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 3.8, | |
| "eval_accuracy": 0.9313725233078003, | |
| "eval_loss": 0.38309353590011597, | |
| "eval_runtime": 315.0427, | |
| "eval_samples_per_second": 0.971, | |
| "eval_steps_per_second": 0.244, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 3.91, | |
| "learning_rate": 6.078651685393258e-05, | |
| "loss": 0.2061, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 3.91, | |
| "eval_accuracy": 0.8954248428344727, | |
| "eval_loss": 0.43487662076950073, | |
| "eval_runtime": 308.5388, | |
| "eval_samples_per_second": 0.992, | |
| "eval_steps_per_second": 0.25, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 4.02, | |
| "learning_rate": 5.96629213483146e-05, | |
| "loss": 0.1755, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 4.02, | |
| "eval_accuracy": 0.8954248428344727, | |
| "eval_loss": 0.4112664461135864, | |
| "eval_runtime": 314.6158, | |
| "eval_samples_per_second": 0.973, | |
| "eval_steps_per_second": 0.245, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 4.13, | |
| "learning_rate": 5.853932584269663e-05, | |
| "loss": 0.0155, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 4.13, | |
| "eval_accuracy": 0.9084967374801636, | |
| "eval_loss": 0.34788793325424194, | |
| "eval_runtime": 317.1579, | |
| "eval_samples_per_second": 0.965, | |
| "eval_steps_per_second": 0.243, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 4.25, | |
| "learning_rate": 5.7415730337078654e-05, | |
| "loss": 0.1389, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 4.25, | |
| "eval_accuracy": 0.9248365759849548, | |
| "eval_loss": 0.28591012954711914, | |
| "eval_runtime": 323.1301, | |
| "eval_samples_per_second": 0.947, | |
| "eval_steps_per_second": 0.238, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 4.36, | |
| "learning_rate": 5.6292134831460676e-05, | |
| "loss": 0.1102, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 4.36, | |
| "eval_accuracy": 0.9183006286621094, | |
| "eval_loss": 0.2804703712463379, | |
| "eval_runtime": 308.9869, | |
| "eval_samples_per_second": 0.99, | |
| "eval_steps_per_second": 0.249, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 4.47, | |
| "learning_rate": 5.516853932584269e-05, | |
| "loss": 0.0447, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 4.47, | |
| "eval_accuracy": 0.9281045794487, | |
| "eval_loss": 0.28759482502937317, | |
| "eval_runtime": 325.9228, | |
| "eval_samples_per_second": 0.939, | |
| "eval_steps_per_second": 0.236, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 4.58, | |
| "learning_rate": 5.4044943820224726e-05, | |
| "loss": 0.3047, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 4.58, | |
| "eval_accuracy": 0.9281045794487, | |
| "eval_loss": 0.28950873017311096, | |
| "eval_runtime": 302.1849, | |
| "eval_samples_per_second": 1.013, | |
| "eval_steps_per_second": 0.255, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 4.69, | |
| "learning_rate": 5.292134831460674e-05, | |
| "loss": 0.2309, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 4.69, | |
| "eval_accuracy": 0.9313725233078003, | |
| "eval_loss": 0.20166385173797607, | |
| "eval_runtime": 313.1316, | |
| "eval_samples_per_second": 0.977, | |
| "eval_steps_per_second": 0.246, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "learning_rate": 5.179775280898876e-05, | |
| "loss": 0.2097, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "eval_accuracy": 0.8692810535430908, | |
| "eval_loss": 0.4348565936088562, | |
| "eval_runtime": 328.5983, | |
| "eval_samples_per_second": 0.931, | |
| "eval_steps_per_second": 0.234, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 4.92, | |
| "learning_rate": 5.0674157303370785e-05, | |
| "loss": 0.0094, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 4.92, | |
| "eval_accuracy": 0.898692786693573, | |
| "eval_loss": 0.3308834135532379, | |
| "eval_runtime": 261.8274, | |
| "eval_samples_per_second": 1.169, | |
| "eval_steps_per_second": 0.294, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 5.03, | |
| "learning_rate": 4.955056179775281e-05, | |
| "loss": 0.0068, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 5.03, | |
| "eval_accuracy": 0.9379084706306458, | |
| "eval_loss": 0.22690723836421967, | |
| "eval_runtime": 321.9057, | |
| "eval_samples_per_second": 0.951, | |
| "eval_steps_per_second": 0.239, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 5.14, | |
| "learning_rate": 4.8426966292134836e-05, | |
| "loss": 0.0428, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 5.14, | |
| "eval_accuracy": 0.9313725233078003, | |
| "eval_loss": 0.24997933208942413, | |
| "eval_runtime": 306.0175, | |
| "eval_samples_per_second": 1.0, | |
| "eval_steps_per_second": 0.252, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 5.25, | |
| "learning_rate": 4.730337078651685e-05, | |
| "loss": 0.0555, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 5.25, | |
| "eval_accuracy": 0.8888888955116272, | |
| "eval_loss": 0.4563826024532318, | |
| "eval_runtime": 304.3479, | |
| "eval_samples_per_second": 1.005, | |
| "eval_steps_per_second": 0.253, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 5.36, | |
| "learning_rate": 4.617977528089888e-05, | |
| "loss": 0.0928, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 5.36, | |
| "eval_accuracy": 0.915032684803009, | |
| "eval_loss": 0.3516130745410919, | |
| "eval_runtime": 320.668, | |
| "eval_samples_per_second": 0.954, | |
| "eval_steps_per_second": 0.24, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 5.47, | |
| "learning_rate": 4.50561797752809e-05, | |
| "loss": 0.1947, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 5.47, | |
| "eval_accuracy": 0.9379084706306458, | |
| "eval_loss": 0.24630288779735565, | |
| "eval_runtime": 305.3109, | |
| "eval_samples_per_second": 1.002, | |
| "eval_steps_per_second": 0.252, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 5.59, | |
| "learning_rate": 4.393258426966292e-05, | |
| "loss": 0.0934, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 5.59, | |
| "eval_accuracy": 0.9477124214172363, | |
| "eval_loss": 0.18016140162944794, | |
| "eval_runtime": 308.018, | |
| "eval_samples_per_second": 0.993, | |
| "eval_steps_per_second": 0.25, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 5.7, | |
| "learning_rate": 4.2808988764044945e-05, | |
| "loss": 0.0035, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 5.7, | |
| "eval_accuracy": 0.9411764740943909, | |
| "eval_loss": 0.22933033108711243, | |
| "eval_runtime": 316.2253, | |
| "eval_samples_per_second": 0.968, | |
| "eval_steps_per_second": 0.243, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 5.81, | |
| "learning_rate": 4.168539325842697e-05, | |
| "loss": 0.0038, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 5.81, | |
| "eval_accuracy": 0.9215686321258545, | |
| "eval_loss": 0.2865773141384125, | |
| "eval_runtime": 306.9042, | |
| "eval_samples_per_second": 0.997, | |
| "eval_steps_per_second": 0.251, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 5.92, | |
| "learning_rate": 4.056179775280899e-05, | |
| "loss": 0.0027, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 5.92, | |
| "eval_accuracy": 0.915032684803009, | |
| "eval_loss": 0.3221026659011841, | |
| "eval_runtime": 308.5611, | |
| "eval_samples_per_second": 0.992, | |
| "eval_steps_per_second": 0.25, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 6.03, | |
| "learning_rate": 3.943820224719101e-05, | |
| "loss": 0.0586, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 6.03, | |
| "eval_accuracy": 0.9215686321258545, | |
| "eval_loss": 0.2714509665966034, | |
| "eval_runtime": 307.936, | |
| "eval_samples_per_second": 0.994, | |
| "eval_steps_per_second": 0.25, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 6.15, | |
| "learning_rate": 3.831460674157303e-05, | |
| "loss": 0.003, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 6.15, | |
| "eval_accuracy": 0.9117646813392639, | |
| "eval_loss": 0.2935124635696411, | |
| "eval_runtime": 322.5288, | |
| "eval_samples_per_second": 0.949, | |
| "eval_steps_per_second": 0.239, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 6.26, | |
| "learning_rate": 3.719101123595506e-05, | |
| "loss": 0.0748, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 6.26, | |
| "eval_accuracy": 0.9379084706306458, | |
| "eval_loss": 0.2554876506328583, | |
| "eval_runtime": 310.2055, | |
| "eval_samples_per_second": 0.986, | |
| "eval_steps_per_second": 0.248, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 6.37, | |
| "learning_rate": 3.6179775280898874e-05, | |
| "loss": 0.0273, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 6.37, | |
| "eval_accuracy": 0.9477124214172363, | |
| "eval_loss": 0.26076748967170715, | |
| "eval_runtime": 309.3501, | |
| "eval_samples_per_second": 0.989, | |
| "eval_steps_per_second": 0.249, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 6.48, | |
| "learning_rate": 3.50561797752809e-05, | |
| "loss": 0.0021, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 6.48, | |
| "eval_accuracy": 0.9542483687400818, | |
| "eval_loss": 0.2612459659576416, | |
| "eval_runtime": 317.8944, | |
| "eval_samples_per_second": 0.963, | |
| "eval_steps_per_second": 0.242, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 6.59, | |
| "learning_rate": 3.393258426966292e-05, | |
| "loss": 0.0042, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 6.59, | |
| "eval_accuracy": 0.9575163125991821, | |
| "eval_loss": 0.24474120140075684, | |
| "eval_runtime": 308.9228, | |
| "eval_samples_per_second": 0.991, | |
| "eval_steps_per_second": 0.249, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 6.7, | |
| "learning_rate": 3.2808988764044946e-05, | |
| "loss": 0.0274, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 6.7, | |
| "eval_accuracy": 0.9542483687400818, | |
| "eval_loss": 0.23039507865905762, | |
| "eval_runtime": 311.9509, | |
| "eval_samples_per_second": 0.981, | |
| "eval_steps_per_second": 0.247, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 6.82, | |
| "learning_rate": 3.168539325842697e-05, | |
| "loss": 0.0037, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 6.82, | |
| "eval_accuracy": 0.9542483687400818, | |
| "eval_loss": 0.22053539752960205, | |
| "eval_runtime": 315.7913, | |
| "eval_samples_per_second": 0.969, | |
| "eval_steps_per_second": 0.244, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 6.93, | |
| "learning_rate": 3.056179775280899e-05, | |
| "loss": 0.0059, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 6.93, | |
| "eval_accuracy": 0.9379084706306458, | |
| "eval_loss": 0.26862725615501404, | |
| "eval_runtime": 321.9798, | |
| "eval_samples_per_second": 0.95, | |
| "eval_steps_per_second": 0.239, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 7.04, | |
| "learning_rate": 2.9438202247191012e-05, | |
| "loss": 0.002, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 7.04, | |
| "eval_accuracy": 0.9379084706306458, | |
| "eval_loss": 0.29074591398239136, | |
| "eval_runtime": 311.6792, | |
| "eval_samples_per_second": 0.982, | |
| "eval_steps_per_second": 0.247, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 7.15, | |
| "learning_rate": 2.8314606741573037e-05, | |
| "loss": 0.0214, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 7.15, | |
| "eval_accuracy": 0.9509803652763367, | |
| "eval_loss": 0.217881441116333, | |
| "eval_runtime": 312.0489, | |
| "eval_samples_per_second": 0.981, | |
| "eval_steps_per_second": 0.247, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 7.26, | |
| "learning_rate": 2.7191011235955055e-05, | |
| "loss": 0.0011, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 7.26, | |
| "eval_accuracy": 0.9444444179534912, | |
| "eval_loss": 0.24239015579223633, | |
| "eval_runtime": 320.4623, | |
| "eval_samples_per_second": 0.955, | |
| "eval_steps_per_second": 0.24, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 7.37, | |
| "learning_rate": 2.606741573033708e-05, | |
| "loss": 0.1222, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 7.37, | |
| "eval_accuracy": 0.9607843160629272, | |
| "eval_loss": 0.22233766317367554, | |
| "eval_runtime": 316.2672, | |
| "eval_samples_per_second": 0.968, | |
| "eval_steps_per_second": 0.243, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 7.49, | |
| "learning_rate": 2.4943820224719103e-05, | |
| "loss": 0.0308, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 7.49, | |
| "eval_accuracy": 0.9542483687400818, | |
| "eval_loss": 0.23289808630943298, | |
| "eval_runtime": 314.263, | |
| "eval_samples_per_second": 0.974, | |
| "eval_steps_per_second": 0.245, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 7.6, | |
| "learning_rate": 2.3820224719101125e-05, | |
| "loss": 0.0047, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 7.6, | |
| "eval_accuracy": 0.9444444179534912, | |
| "eval_loss": 0.2540358901023865, | |
| "eval_runtime": 314.1415, | |
| "eval_samples_per_second": 0.974, | |
| "eval_steps_per_second": 0.245, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 7.71, | |
| "learning_rate": 2.2696629213483146e-05, | |
| "loss": 0.0033, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 7.71, | |
| "eval_accuracy": 0.9379084706306458, | |
| "eval_loss": 0.26486942172050476, | |
| "eval_runtime": 313.7641, | |
| "eval_samples_per_second": 0.975, | |
| "eval_steps_per_second": 0.245, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 7.82, | |
| "learning_rate": 2.157303370786517e-05, | |
| "loss": 0.0799, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 7.82, | |
| "eval_accuracy": 0.9346405267715454, | |
| "eval_loss": 0.2804279029369354, | |
| "eval_runtime": 305.6654, | |
| "eval_samples_per_second": 1.001, | |
| "eval_steps_per_second": 0.252, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 7.93, | |
| "learning_rate": 2.0449438202247194e-05, | |
| "loss": 0.0223, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 7.93, | |
| "eval_accuracy": 0.9346405267715454, | |
| "eval_loss": 0.2961590886116028, | |
| "eval_runtime": 33.833, | |
| "eval_samples_per_second": 9.044, | |
| "eval_steps_per_second": 2.276, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 8.04, | |
| "learning_rate": 1.9325842696629215e-05, | |
| "loss": 0.0065, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 8.04, | |
| "eval_accuracy": 0.9313725233078003, | |
| "eval_loss": 0.3002856373786926, | |
| "eval_runtime": 28.9416, | |
| "eval_samples_per_second": 10.573, | |
| "eval_steps_per_second": 2.661, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 8.16, | |
| "learning_rate": 1.8202247191011237e-05, | |
| "loss": 0.0032, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 8.16, | |
| "eval_accuracy": 0.9215686321258545, | |
| "eval_loss": 0.33132508397102356, | |
| "eval_runtime": 28.9077, | |
| "eval_samples_per_second": 10.585, | |
| "eval_steps_per_second": 2.664, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 8.27, | |
| "learning_rate": 1.707865168539326e-05, | |
| "loss": 0.0012, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 8.27, | |
| "eval_accuracy": 0.9117646813392639, | |
| "eval_loss": 0.3802509605884552, | |
| "eval_runtime": 28.4035, | |
| "eval_samples_per_second": 10.773, | |
| "eval_steps_per_second": 2.711, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 8.38, | |
| "learning_rate": 1.595505617977528e-05, | |
| "loss": 0.0022, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 8.38, | |
| "eval_accuracy": 0.9183006286621094, | |
| "eval_loss": 0.3360930383205414, | |
| "eval_runtime": 28.4227, | |
| "eval_samples_per_second": 10.766, | |
| "eval_steps_per_second": 2.709, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 8.49, | |
| "learning_rate": 1.4831460674157305e-05, | |
| "loss": 0.0012, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 8.49, | |
| "eval_accuracy": 0.9346405267715454, | |
| "eval_loss": 0.29600241780281067, | |
| "eval_runtime": 300.6077, | |
| "eval_samples_per_second": 1.018, | |
| "eval_steps_per_second": 0.256, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 8.6, | |
| "learning_rate": 1.3707865168539327e-05, | |
| "loss": 0.0011, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 8.6, | |
| "eval_accuracy": 0.9346405267715454, | |
| "eval_loss": 0.2943996489048004, | |
| "eval_runtime": 312.1252, | |
| "eval_samples_per_second": 0.98, | |
| "eval_steps_per_second": 0.247, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 8.72, | |
| "learning_rate": 1.258426966292135e-05, | |
| "loss": 0.0039, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 8.72, | |
| "eval_accuracy": 0.9346405267715454, | |
| "eval_loss": 0.29687556624412537, | |
| "eval_runtime": 310.6326, | |
| "eval_samples_per_second": 0.985, | |
| "eval_steps_per_second": 0.248, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 8.83, | |
| "learning_rate": 1.146067415730337e-05, | |
| "loss": 0.0011, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 8.83, | |
| "eval_accuracy": 0.9281045794487, | |
| "eval_loss": 0.32291698455810547, | |
| "eval_runtime": 312.3321, | |
| "eval_samples_per_second": 0.98, | |
| "eval_steps_per_second": 0.247, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 8.94, | |
| "learning_rate": 1.0337078651685394e-05, | |
| "loss": 0.0011, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 8.94, | |
| "eval_accuracy": 0.9281045794487, | |
| "eval_loss": 0.3310171067714691, | |
| "eval_runtime": 309.2689, | |
| "eval_samples_per_second": 0.989, | |
| "eval_steps_per_second": 0.249, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 9.05, | |
| "learning_rate": 9.213483146067416e-06, | |
| "loss": 0.0023, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 9.05, | |
| "eval_accuracy": 0.9346405267715454, | |
| "eval_loss": 0.3102934658527374, | |
| "eval_runtime": 308.188, | |
| "eval_samples_per_second": 0.993, | |
| "eval_steps_per_second": 0.25, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 9.16, | |
| "learning_rate": 8.089887640449438e-06, | |
| "loss": 0.0011, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 9.16, | |
| "eval_accuracy": 0.9444444179534912, | |
| "eval_loss": 0.29238083958625793, | |
| "eval_runtime": 323.8949, | |
| "eval_samples_per_second": 0.945, | |
| "eval_steps_per_second": 0.238, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 9.27, | |
| "learning_rate": 6.96629213483146e-06, | |
| "loss": 0.0015, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 9.27, | |
| "eval_accuracy": 0.9509803652763367, | |
| "eval_loss": 0.282598614692688, | |
| "eval_runtime": 298.2173, | |
| "eval_samples_per_second": 1.026, | |
| "eval_steps_per_second": 0.258, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 9.39, | |
| "learning_rate": 5.842696629213484e-06, | |
| "loss": 0.0015, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 9.39, | |
| "eval_accuracy": 0.9477124214172363, | |
| "eval_loss": 0.2785097658634186, | |
| "eval_runtime": 310.1298, | |
| "eval_samples_per_second": 0.987, | |
| "eval_steps_per_second": 0.248, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 9.5, | |
| "learning_rate": 4.719101123595506e-06, | |
| "loss": 0.0011, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 9.5, | |
| "eval_accuracy": 0.9477124214172363, | |
| "eval_loss": 0.2726196050643921, | |
| "eval_runtime": 315.4917, | |
| "eval_samples_per_second": 0.97, | |
| "eval_steps_per_second": 0.244, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 9.61, | |
| "learning_rate": 3.5955056179775286e-06, | |
| "loss": 0.0297, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 9.61, | |
| "eval_accuracy": 0.9509803652763367, | |
| "eval_loss": 0.26638907194137573, | |
| "eval_runtime": 299.8251, | |
| "eval_samples_per_second": 1.021, | |
| "eval_steps_per_second": 0.257, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 9.72, | |
| "learning_rate": 2.4719101123595505e-06, | |
| "loss": 0.0045, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 9.72, | |
| "eval_accuracy": 0.9509803652763367, | |
| "eval_loss": 0.26572802662849426, | |
| "eval_runtime": 300.0469, | |
| "eval_samples_per_second": 1.02, | |
| "eval_steps_per_second": 0.257, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 9.83, | |
| "learning_rate": 1.3483146067415732e-06, | |
| "loss": 0.0016, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 9.83, | |
| "eval_accuracy": 0.9477124214172363, | |
| "eval_loss": 0.2656039297580719, | |
| "eval_runtime": 305.8484, | |
| "eval_samples_per_second": 1.0, | |
| "eval_steps_per_second": 0.252, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 9.94, | |
| "learning_rate": 2.2471910112359554e-07, | |
| "loss": 0.0008, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 9.94, | |
| "eval_accuracy": 0.9477124214172363, | |
| "eval_loss": 0.26553988456726074, | |
| "eval_runtime": 335.2081, | |
| "eval_samples_per_second": 0.913, | |
| "eval_steps_per_second": 0.23, | |
| "step": 890 | |
| } | |
| ], | |
| "max_steps": 890, | |
| "num_train_epochs": 10, | |
| "total_flos": 1.51047400011648e+18, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |