| { | |
| "best_metric": 0.6892715692520142, | |
| "best_model_checkpoint": "final_models/glue_models/structroberta_s2_50ep//finetune/mnli/checkpoint-13400", | |
| "epoch": 3.793103448275862, | |
| "global_step": 15400, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.05, | |
| "eval_accuracy": 0.453520268201828, | |
| "eval_loss": 1.0527480840682983, | |
| "eval_runtime": 13.8459, | |
| "eval_samples_per_second": 473.93, | |
| "eval_steps_per_second": 59.295, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "eval_accuracy": 0.5099055171012878, | |
| "eval_loss": 0.9751997590065002, | |
| "eval_runtime": 13.8646, | |
| "eval_samples_per_second": 473.293, | |
| "eval_steps_per_second": 59.216, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.938423645320197e-05, | |
| "loss": 1.036, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "eval_accuracy": 0.5175251364707947, | |
| "eval_loss": 0.9599342942237854, | |
| "eval_runtime": 13.9722, | |
| "eval_samples_per_second": 469.648, | |
| "eval_steps_per_second": 58.76, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "eval_accuracy": 0.5405364036560059, | |
| "eval_loss": 0.9497725367546082, | |
| "eval_runtime": 13.977, | |
| "eval_samples_per_second": 469.486, | |
| "eval_steps_per_second": 58.739, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.876847290640394e-05, | |
| "loss": 0.9512, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "eval_accuracy": 0.5626333355903625, | |
| "eval_loss": 0.9136764407157898, | |
| "eval_runtime": 13.8916, | |
| "eval_samples_per_second": 472.373, | |
| "eval_steps_per_second": 59.101, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "eval_accuracy": 0.5790917277336121, | |
| "eval_loss": 0.9000873565673828, | |
| "eval_runtime": 13.9157, | |
| "eval_samples_per_second": 471.554, | |
| "eval_steps_per_second": 58.998, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "eval_accuracy": 0.5789393186569214, | |
| "eval_loss": 0.8979566097259521, | |
| "eval_runtime": 13.9289, | |
| "eval_samples_per_second": 471.106, | |
| "eval_steps_per_second": 58.942, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.8152709359605915e-05, | |
| "loss": 0.9165, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "eval_accuracy": 0.5932642221450806, | |
| "eval_loss": 0.8788071870803833, | |
| "eval_runtime": 13.9191, | |
| "eval_samples_per_second": 471.437, | |
| "eval_steps_per_second": 58.984, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "eval_accuracy": 0.5729960203170776, | |
| "eval_loss": 0.9093856811523438, | |
| "eval_runtime": 13.9251, | |
| "eval_samples_per_second": 471.237, | |
| "eval_steps_per_second": 58.958, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 4.753694581280788e-05, | |
| "loss": 0.8915, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "eval_accuracy": 0.5684242844581604, | |
| "eval_loss": 0.8900429606437683, | |
| "eval_runtime": 13.875, | |
| "eval_samples_per_second": 472.938, | |
| "eval_steps_per_second": 59.171, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "eval_accuracy": 0.606217622756958, | |
| "eval_loss": 0.8568419218063354, | |
| "eval_runtime": 13.8846, | |
| "eval_samples_per_second": 472.611, | |
| "eval_steps_per_second": 59.13, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "eval_accuracy": 0.6046937108039856, | |
| "eval_loss": 0.8561736345291138, | |
| "eval_runtime": 13.8666, | |
| "eval_samples_per_second": 473.223, | |
| "eval_steps_per_second": 59.207, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 4.6921182266009855e-05, | |
| "loss": 0.873, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "eval_accuracy": 0.6234380006790161, | |
| "eval_loss": 0.8509392142295837, | |
| "eval_runtime": 13.8669, | |
| "eval_samples_per_second": 473.214, | |
| "eval_steps_per_second": 59.206, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "eval_accuracy": 0.6031697392463684, | |
| "eval_loss": 0.8599078059196472, | |
| "eval_runtime": 13.8611, | |
| "eval_samples_per_second": 473.412, | |
| "eval_steps_per_second": 59.231, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 4.630541871921182e-05, | |
| "loss": 0.861, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "eval_accuracy": 0.6165803074836731, | |
| "eval_loss": 0.8439797759056091, | |
| "eval_runtime": 13.8579, | |
| "eval_samples_per_second": 473.522, | |
| "eval_steps_per_second": 59.244, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "eval_accuracy": 0.6261810660362244, | |
| "eval_loss": 0.8294622898101807, | |
| "eval_runtime": 13.8888, | |
| "eval_samples_per_second": 472.468, | |
| "eval_steps_per_second": 59.113, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "eval_accuracy": 0.6272478103637695, | |
| "eval_loss": 0.8390009999275208, | |
| "eval_runtime": 13.8984, | |
| "eval_samples_per_second": 472.14, | |
| "eval_steps_per_second": 59.072, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 4.5689655172413794e-05, | |
| "loss": 0.8449, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "eval_accuracy": 0.6289241313934326, | |
| "eval_loss": 0.8257491588592529, | |
| "eval_runtime": 13.9296, | |
| "eval_samples_per_second": 471.084, | |
| "eval_steps_per_second": 58.939, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "eval_accuracy": 0.6309052109718323, | |
| "eval_loss": 0.81971675157547, | |
| "eval_runtime": 13.9569, | |
| "eval_samples_per_second": 470.163, | |
| "eval_steps_per_second": 58.824, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 4.507389162561577e-05, | |
| "loss": 0.8409, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "eval_accuracy": 0.6334958672523499, | |
| "eval_loss": 0.8270503878593445, | |
| "eval_runtime": 13.9849, | |
| "eval_samples_per_second": 469.221, | |
| "eval_steps_per_second": 58.706, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "eval_accuracy": 0.6324291229248047, | |
| "eval_loss": 0.8238465785980225, | |
| "eval_runtime": 13.986, | |
| "eval_samples_per_second": 469.184, | |
| "eval_steps_per_second": 58.702, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "eval_accuracy": 0.6322767734527588, | |
| "eval_loss": 0.8234522342681885, | |
| "eval_runtime": 13.9817, | |
| "eval_samples_per_second": 469.329, | |
| "eval_steps_per_second": 58.72, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 4.4458128078817734e-05, | |
| "loss": 0.7883, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "eval_accuracy": 0.6405059695243835, | |
| "eval_loss": 0.8090473413467407, | |
| "eval_runtime": 13.9835, | |
| "eval_samples_per_second": 469.267, | |
| "eval_steps_per_second": 58.712, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "eval_accuracy": 0.635019838809967, | |
| "eval_loss": 0.8067367076873779, | |
| "eval_runtime": 13.9898, | |
| "eval_samples_per_second": 469.056, | |
| "eval_steps_per_second": 58.686, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 4.384236453201971e-05, | |
| "loss": 0.7789, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "eval_accuracy": 0.6330386996269226, | |
| "eval_loss": 0.8147866129875183, | |
| "eval_runtime": 13.941, | |
| "eval_samples_per_second": 470.699, | |
| "eval_steps_per_second": 58.891, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "eval_accuracy": 0.6482779383659363, | |
| "eval_loss": 0.7999472618103027, | |
| "eval_runtime": 13.8888, | |
| "eval_samples_per_second": 472.466, | |
| "eval_steps_per_second": 59.112, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "eval_accuracy": 0.6427918076515198, | |
| "eval_loss": 0.8316212296485901, | |
| "eval_runtime": 13.8743, | |
| "eval_samples_per_second": 472.96, | |
| "eval_steps_per_second": 59.174, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 4.3226600985221674e-05, | |
| "loss": 0.7644, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "eval_accuracy": 0.6462968587875366, | |
| "eval_loss": 0.8003305196762085, | |
| "eval_runtime": 13.8974, | |
| "eval_samples_per_second": 472.173, | |
| "eval_steps_per_second": 59.076, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "eval_accuracy": 0.6531545519828796, | |
| "eval_loss": 0.7893626689910889, | |
| "eval_runtime": 13.9226, | |
| "eval_samples_per_second": 471.319, | |
| "eval_steps_per_second": 58.969, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 4.261083743842365e-05, | |
| "loss": 0.7648, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "eval_accuracy": 0.6549832224845886, | |
| "eval_loss": 0.7921696305274963, | |
| "eval_runtime": 13.9392, | |
| "eval_samples_per_second": 470.76, | |
| "eval_steps_per_second": 58.899, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "eval_accuracy": 0.6469064354896545, | |
| "eval_loss": 0.8024189472198486, | |
| "eval_runtime": 13.9187, | |
| "eval_samples_per_second": 471.45, | |
| "eval_steps_per_second": 58.985, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "eval_accuracy": 0.6566595435142517, | |
| "eval_loss": 0.7816490530967712, | |
| "eval_runtime": 13.9487, | |
| "eval_samples_per_second": 470.437, | |
| "eval_steps_per_second": 58.858, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 4.199507389162562e-05, | |
| "loss": 0.765, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "eval_accuracy": 0.6543736457824707, | |
| "eval_loss": 0.7835971117019653, | |
| "eval_runtime": 13.8588, | |
| "eval_samples_per_second": 473.491, | |
| "eval_steps_per_second": 59.24, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "eval_accuracy": 0.6411154866218567, | |
| "eval_loss": 0.8067611455917358, | |
| "eval_runtime": 13.8687, | |
| "eval_samples_per_second": 473.153, | |
| "eval_steps_per_second": 59.198, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 4.1379310344827587e-05, | |
| "loss": 0.7583, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "eval_accuracy": 0.6603170037269592, | |
| "eval_loss": 0.7717912197113037, | |
| "eval_runtime": 13.8664, | |
| "eval_samples_per_second": 473.231, | |
| "eval_steps_per_second": 59.208, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "eval_accuracy": 0.6635172367095947, | |
| "eval_loss": 0.7803527116775513, | |
| "eval_runtime": 13.8557, | |
| "eval_samples_per_second": 473.596, | |
| "eval_steps_per_second": 59.254, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "eval_accuracy": 0.6674794554710388, | |
| "eval_loss": 0.7677510380744934, | |
| "eval_runtime": 13.8768, | |
| "eval_samples_per_second": 472.875, | |
| "eval_steps_per_second": 59.163, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 4.076354679802955e-05, | |
| "loss": 0.7584, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "eval_accuracy": 0.6589454412460327, | |
| "eval_loss": 0.7867729067802429, | |
| "eval_runtime": 13.9787, | |
| "eval_samples_per_second": 469.43, | |
| "eval_steps_per_second": 58.732, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "eval_accuracy": 0.6604693531990051, | |
| "eval_loss": 0.7763269543647766, | |
| "eval_runtime": 13.9708, | |
| "eval_samples_per_second": 469.693, | |
| "eval_steps_per_second": 58.765, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 4.014778325123153e-05, | |
| "loss": 0.7515, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "eval_accuracy": 0.6673270463943481, | |
| "eval_loss": 0.7527127265930176, | |
| "eval_runtime": 13.9175, | |
| "eval_samples_per_second": 471.491, | |
| "eval_steps_per_second": 58.99, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "eval_accuracy": 0.6624504923820496, | |
| "eval_loss": 0.8022358417510986, | |
| "eval_runtime": 13.9295, | |
| "eval_samples_per_second": 471.086, | |
| "eval_steps_per_second": 58.94, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "eval_accuracy": 0.6630600690841675, | |
| "eval_loss": 0.7974384427070618, | |
| "eval_runtime": 13.9703, | |
| "eval_samples_per_second": 469.711, | |
| "eval_steps_per_second": 58.768, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 3.95320197044335e-05, | |
| "loss": 0.6779, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "eval_accuracy": 0.6708320379257202, | |
| "eval_loss": 0.768686830997467, | |
| "eval_runtime": 13.9807, | |
| "eval_samples_per_second": 469.361, | |
| "eval_steps_per_second": 58.724, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "eval_accuracy": 0.6661078929901123, | |
| "eval_loss": 0.7770901322364807, | |
| "eval_runtime": 13.9743, | |
| "eval_samples_per_second": 469.578, | |
| "eval_steps_per_second": 58.751, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 3.891625615763547e-05, | |
| "loss": 0.6587, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "eval_accuracy": 0.6674794554710388, | |
| "eval_loss": 0.7751796245574951, | |
| "eval_runtime": 13.9722, | |
| "eval_samples_per_second": 469.647, | |
| "eval_steps_per_second": 58.76, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "eval_accuracy": 0.6688509583473206, | |
| "eval_loss": 0.7814744114875793, | |
| "eval_runtime": 13.9424, | |
| "eval_samples_per_second": 470.652, | |
| "eval_steps_per_second": 58.885, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "eval_accuracy": 0.6693081259727478, | |
| "eval_loss": 0.7871124744415283, | |
| "eval_runtime": 13.9317, | |
| "eval_samples_per_second": 471.012, | |
| "eval_steps_per_second": 58.93, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 3.830049261083744e-05, | |
| "loss": 0.6662, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "eval_accuracy": 0.6717464327812195, | |
| "eval_loss": 0.768136203289032, | |
| "eval_runtime": 13.9811, | |
| "eval_samples_per_second": 469.347, | |
| "eval_steps_per_second": 58.722, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "eval_accuracy": 0.6755562424659729, | |
| "eval_loss": 0.7916134595870972, | |
| "eval_runtime": 13.9048, | |
| "eval_samples_per_second": 471.925, | |
| "eval_steps_per_second": 59.045, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 3.768472906403941e-05, | |
| "loss": 0.6585, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "eval_accuracy": 0.6796708106994629, | |
| "eval_loss": 0.7765262722969055, | |
| "eval_runtime": 13.8808, | |
| "eval_samples_per_second": 472.74, | |
| "eval_steps_per_second": 59.146, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "eval_accuracy": 0.6746419072151184, | |
| "eval_loss": 0.7696249485015869, | |
| "eval_runtime": 13.8943, | |
| "eval_samples_per_second": 472.28, | |
| "eval_steps_per_second": 59.089, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "eval_accuracy": 0.6705272793769836, | |
| "eval_loss": 0.795970618724823, | |
| "eval_runtime": 13.923, | |
| "eval_samples_per_second": 471.308, | |
| "eval_steps_per_second": 58.967, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "learning_rate": 3.7068965517241385e-05, | |
| "loss": 0.6648, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "eval_accuracy": 0.681347131729126, | |
| "eval_loss": 0.7532095909118652, | |
| "eval_runtime": 13.9308, | |
| "eval_samples_per_second": 471.042, | |
| "eval_steps_per_second": 58.934, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "eval_accuracy": 0.6834806203842163, | |
| "eval_loss": 0.7632550001144409, | |
| "eval_runtime": 13.9049, | |
| "eval_samples_per_second": 471.921, | |
| "eval_steps_per_second": 59.044, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 3.645320197044335e-05, | |
| "loss": 0.6663, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "eval_accuracy": 0.6773849725723267, | |
| "eval_loss": 0.7755422592163086, | |
| "eval_runtime": 13.8928, | |
| "eval_samples_per_second": 472.331, | |
| "eval_steps_per_second": 59.095, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "eval_accuracy": 0.6784517168998718, | |
| "eval_loss": 0.7512595653533936, | |
| "eval_runtime": 13.8668, | |
| "eval_samples_per_second": 473.218, | |
| "eval_steps_per_second": 59.206, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "eval_accuracy": 0.6869856715202332, | |
| "eval_loss": 0.7553817629814148, | |
| "eval_runtime": 13.8821, | |
| "eval_samples_per_second": 472.693, | |
| "eval_steps_per_second": 59.141, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 3.583743842364532e-05, | |
| "loss": 0.6645, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "eval_accuracy": 0.6833282709121704, | |
| "eval_loss": 0.7605084776878357, | |
| "eval_runtime": 13.8864, | |
| "eval_samples_per_second": 472.548, | |
| "eval_steps_per_second": 59.122, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "eval_accuracy": 0.681347131729126, | |
| "eval_loss": 0.7520666718482971, | |
| "eval_runtime": 13.9251, | |
| "eval_samples_per_second": 471.236, | |
| "eval_steps_per_second": 58.958, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 3.522167487684729e-05, | |
| "loss": 0.6596, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "eval_accuracy": 0.6738799214363098, | |
| "eval_loss": 0.7592176795005798, | |
| "eval_runtime": 13.9705, | |
| "eval_samples_per_second": 469.705, | |
| "eval_steps_per_second": 58.767, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.6863760948181152, | |
| "eval_loss": 0.7859818935394287, | |
| "eval_runtime": 13.9639, | |
| "eval_samples_per_second": 469.927, | |
| "eval_steps_per_second": 58.795, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 3.05, | |
| "eval_accuracy": 0.6819567084312439, | |
| "eval_loss": 0.7805635929107666, | |
| "eval_runtime": 13.8834, | |
| "eval_samples_per_second": 472.651, | |
| "eval_steps_per_second": 59.135, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 3.08, | |
| "learning_rate": 3.4605911330049265e-05, | |
| "loss": 0.5974, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 3.1, | |
| "eval_accuracy": 0.6827186942100525, | |
| "eval_loss": 0.8164608478546143, | |
| "eval_runtime": 13.879, | |
| "eval_samples_per_second": 472.8, | |
| "eval_steps_per_second": 59.154, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 3.15, | |
| "eval_accuracy": 0.6831758618354797, | |
| "eval_loss": 0.7926530838012695, | |
| "eval_runtime": 13.8407, | |
| "eval_samples_per_second": 474.109, | |
| "eval_steps_per_second": 59.318, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "learning_rate": 3.399014778325123e-05, | |
| "loss": 0.5539, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "eval_accuracy": 0.6781468987464905, | |
| "eval_loss": 0.8301470279693604, | |
| "eval_runtime": 13.8501, | |
| "eval_samples_per_second": 473.786, | |
| "eval_steps_per_second": 59.277, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 3.25, | |
| "eval_accuracy": 0.6764705777168274, | |
| "eval_loss": 0.8108323812484741, | |
| "eval_runtime": 13.8741, | |
| "eval_samples_per_second": 472.969, | |
| "eval_steps_per_second": 59.175, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 3.3, | |
| "eval_accuracy": 0.6892715692520142, | |
| "eval_loss": 0.8093796968460083, | |
| "eval_runtime": 13.8736, | |
| "eval_samples_per_second": 472.983, | |
| "eval_steps_per_second": 59.177, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 3.33, | |
| "learning_rate": 3.3374384236453204e-05, | |
| "loss": 0.5556, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 3.35, | |
| "eval_accuracy": 0.6766229867935181, | |
| "eval_loss": 0.8061802983283997, | |
| "eval_runtime": 13.8439, | |
| "eval_samples_per_second": 474.0, | |
| "eval_steps_per_second": 59.304, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 3.4, | |
| "eval_accuracy": 0.6857665181159973, | |
| "eval_loss": 0.8102853894233704, | |
| "eval_runtime": 13.8952, | |
| "eval_samples_per_second": 472.25, | |
| "eval_steps_per_second": 59.085, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 3.45, | |
| "learning_rate": 3.275862068965517e-05, | |
| "loss": 0.561, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 3.45, | |
| "eval_accuracy": 0.6811947822570801, | |
| "eval_loss": 0.7732057571411133, | |
| "eval_runtime": 13.9222, | |
| "eval_samples_per_second": 471.335, | |
| "eval_steps_per_second": 58.971, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 3.5, | |
| "eval_accuracy": 0.6784517168998718, | |
| "eval_loss": 0.8165723085403442, | |
| "eval_runtime": 13.9595, | |
| "eval_samples_per_second": 470.073, | |
| "eval_steps_per_second": 58.813, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 3.55, | |
| "eval_accuracy": 0.6802803874015808, | |
| "eval_loss": 0.807296097278595, | |
| "eval_runtime": 14.0128, | |
| "eval_samples_per_second": 468.288, | |
| "eval_steps_per_second": 58.589, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 3.57, | |
| "learning_rate": 3.2142857142857144e-05, | |
| "loss": 0.5708, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "eval_accuracy": 0.6888144016265869, | |
| "eval_loss": 0.796098530292511, | |
| "eval_runtime": 13.9315, | |
| "eval_samples_per_second": 471.019, | |
| "eval_steps_per_second": 58.931, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 3.65, | |
| "eval_accuracy": 0.6831758618354797, | |
| "eval_loss": 0.7848635315895081, | |
| "eval_runtime": 13.9748, | |
| "eval_samples_per_second": 469.561, | |
| "eval_steps_per_second": 58.749, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 3.69, | |
| "learning_rate": 3.152709359605912e-05, | |
| "loss": 0.5664, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 3.69, | |
| "eval_accuracy": 0.6843950152397156, | |
| "eval_loss": 0.8060910105705261, | |
| "eval_runtime": 13.9727, | |
| "eval_samples_per_second": 469.631, | |
| "eval_steps_per_second": 58.758, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 3.74, | |
| "eval_accuracy": 0.6804327964782715, | |
| "eval_loss": 0.7997169494628906, | |
| "eval_runtime": 13.9901, | |
| "eval_samples_per_second": 469.045, | |
| "eval_steps_per_second": 58.684, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 3.79, | |
| "eval_accuracy": 0.6877476572990417, | |
| "eval_loss": 0.7792339324951172, | |
| "eval_runtime": 13.9337, | |
| "eval_samples_per_second": 470.946, | |
| "eval_steps_per_second": 58.922, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 3.79, | |
| "step": 15400, | |
| "total_flos": 9.026901710390784e+16, | |
| "train_loss": 0.7314797597117239, | |
| "train_runtime": 6341.8601, | |
| "train_samples_per_second": 409.627, | |
| "train_steps_per_second": 6.402 | |
| } | |
| ], | |
| "max_steps": 40600, | |
| "num_train_epochs": 10, | |
| "total_flos": 9.026901710390784e+16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |