{ "best_metric": 0.6304347826086957, "best_model_checkpoint": "SW2-DMAE-DA\\checkpoint-368", "epoch": 38.26086956521739, "eval_steps": 500, "global_step": 440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.87, "learning_rate": 9.090909090909091e-06, "loss": 6.464, "step": 10 }, { "epoch": 0.96, "eval_accuracy": 0.10869565217391304, "eval_loss": 7.919026851654053, "eval_runtime": 0.5926, "eval_samples_per_second": 77.618, "eval_steps_per_second": 5.062, "step": 11 }, { "epoch": 1.74, "learning_rate": 1.8181818181818182e-05, "loss": 6.5496, "step": 20 }, { "epoch": 2.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 7.5824174880981445, "eval_runtime": 0.6062, "eval_samples_per_second": 75.889, "eval_steps_per_second": 4.949, "step": 23 }, { "epoch": 2.61, "learning_rate": 2.7272727272727273e-05, "loss": 6.1541, "step": 30 }, { "epoch": 2.96, "eval_accuracy": 0.10869565217391304, "eval_loss": 6.415637016296387, "eval_runtime": 0.6292, "eval_samples_per_second": 73.114, "eval_steps_per_second": 4.768, "step": 34 }, { "epoch": 3.48, "learning_rate": 3.6363636363636364e-05, "loss": 5.2649, "step": 40 }, { "epoch": 4.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 4.506710052490234, "eval_runtime": 0.6096, "eval_samples_per_second": 75.454, "eval_steps_per_second": 4.921, "step": 46 }, { "epoch": 4.35, "learning_rate": 3.93939393939394e-05, "loss": 4.1175, "step": 50 }, { "epoch": 4.96, "eval_accuracy": 0.10869565217391304, "eval_loss": 2.724062442779541, "eval_runtime": 0.6126, "eval_samples_per_second": 75.084, "eval_steps_per_second": 4.897, "step": 57 }, { "epoch": 5.22, "learning_rate": 3.838383838383839e-05, "loss": 2.8424, "step": 60 }, { "epoch": 6.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 1.562261939048767, "eval_runtime": 0.6031, "eval_samples_per_second": 76.268, "eval_steps_per_second": 4.974, "step": 69 }, { "epoch": 6.09, "learning_rate": 3.7373737373737376e-05, "loss": 1.8484, "step": 70 }, { "epoch": 6.96, "learning_rate": 3.6363636363636364e-05, "loss": 1.4376, "step": 80 }, { "epoch": 6.96, "eval_accuracy": 0.10869565217391304, "eval_loss": 1.40028977394104, "eval_runtime": 0.5771, "eval_samples_per_second": 79.703, "eval_steps_per_second": 5.198, "step": 80 }, { "epoch": 7.83, "learning_rate": 3.535353535353536e-05, "loss": 1.4054, "step": 90 }, { "epoch": 8.0, "eval_accuracy": 0.10869565217391304, "eval_loss": 1.4054746627807617, "eval_runtime": 0.6232, "eval_samples_per_second": 73.818, "eval_steps_per_second": 4.814, "step": 92 }, { "epoch": 8.7, "learning_rate": 3.434343434343435e-05, "loss": 1.3798, "step": 100 }, { "epoch": 8.96, "eval_accuracy": 0.45652173913043476, "eval_loss": 1.3465980291366577, "eval_runtime": 0.6126, "eval_samples_per_second": 75.084, "eval_steps_per_second": 4.897, "step": 103 }, { "epoch": 9.57, "learning_rate": 3.3333333333333335e-05, "loss": 1.3331, "step": 110 }, { "epoch": 10.0, "eval_accuracy": 0.15217391304347827, "eval_loss": 1.4385319948196411, "eval_runtime": 0.5961, "eval_samples_per_second": 77.164, "eval_steps_per_second": 5.032, "step": 115 }, { "epoch": 10.43, "learning_rate": 3.232323232323232e-05, "loss": 1.2736, "step": 120 }, { "epoch": 10.96, "eval_accuracy": 0.2391304347826087, "eval_loss": 1.3105802536010742, "eval_runtime": 0.5992, "eval_samples_per_second": 76.774, "eval_steps_per_second": 5.007, "step": 126 }, { "epoch": 11.3, "learning_rate": 3.131313131313132e-05, "loss": 1.2127, "step": 130 }, { "epoch": 12.0, "eval_accuracy": 0.1956521739130435, "eval_loss": 1.2908252477645874, "eval_runtime": 0.6151, "eval_samples_per_second": 74.78, "eval_steps_per_second": 4.877, "step": 138 }, { "epoch": 12.17, "learning_rate": 3.0303030303030306e-05, "loss": 1.2531, "step": 140 }, { "epoch": 12.96, "eval_accuracy": 0.5, "eval_loss": 1.2544833421707153, "eval_runtime": 0.5956, "eval_samples_per_second": 77.228, "eval_steps_per_second": 5.037, "step": 149 }, { "epoch": 13.04, "learning_rate": 2.9292929292929297e-05, "loss": 1.2027, "step": 150 }, { "epoch": 13.91, "learning_rate": 2.8282828282828285e-05, "loss": 1.0972, "step": 160 }, { "epoch": 14.0, "eval_accuracy": 0.34782608695652173, "eval_loss": 1.25150465965271, "eval_runtime": 0.6331, "eval_samples_per_second": 72.654, "eval_steps_per_second": 4.738, "step": 161 }, { "epoch": 14.78, "learning_rate": 2.7272727272727273e-05, "loss": 1.0029, "step": 170 }, { "epoch": 14.96, "eval_accuracy": 0.2608695652173913, "eval_loss": 1.2238019704818726, "eval_runtime": 0.6177, "eval_samples_per_second": 74.474, "eval_steps_per_second": 4.857, "step": 172 }, { "epoch": 15.65, "learning_rate": 2.6262626262626265e-05, "loss": 1.0141, "step": 180 }, { "epoch": 16.0, "eval_accuracy": 0.3695652173913043, "eval_loss": 1.20670485496521, "eval_runtime": 0.5971, "eval_samples_per_second": 77.033, "eval_steps_per_second": 5.024, "step": 184 }, { "epoch": 16.52, "learning_rate": 2.5252525252525253e-05, "loss": 0.9129, "step": 190 }, { "epoch": 16.96, "eval_accuracy": 0.5652173913043478, "eval_loss": 1.1149216890335083, "eval_runtime": 0.6201, "eval_samples_per_second": 74.177, "eval_steps_per_second": 4.838, "step": 195 }, { "epoch": 17.39, "learning_rate": 2.4242424242424244e-05, "loss": 0.9157, "step": 200 }, { "epoch": 18.0, "eval_accuracy": 0.391304347826087, "eval_loss": 1.1956795454025269, "eval_runtime": 0.6221, "eval_samples_per_second": 73.938, "eval_steps_per_second": 4.822, "step": 207 }, { "epoch": 18.26, "learning_rate": 2.3232323232323232e-05, "loss": 0.8516, "step": 210 }, { "epoch": 18.96, "eval_accuracy": 0.5434782608695652, "eval_loss": 1.0033633708953857, "eval_runtime": 0.5911, "eval_samples_per_second": 77.815, "eval_steps_per_second": 5.075, "step": 218 }, { "epoch": 19.13, "learning_rate": 2.2222222222222227e-05, "loss": 0.8139, "step": 220 }, { "epoch": 20.0, "learning_rate": 2.121212121212121e-05, "loss": 0.7804, "step": 230 }, { "epoch": 20.0, "eval_accuracy": 0.4782608695652174, "eval_loss": 0.9990861415863037, "eval_runtime": 0.6019, "eval_samples_per_second": 76.429, "eval_steps_per_second": 4.984, "step": 230 }, { "epoch": 20.87, "learning_rate": 2.0202020202020206e-05, "loss": 0.7328, "step": 240 }, { "epoch": 20.96, "eval_accuracy": 0.5869565217391305, "eval_loss": 0.9839891195297241, "eval_runtime": 0.6041, "eval_samples_per_second": 76.142, "eval_steps_per_second": 4.966, "step": 241 }, { "epoch": 21.74, "learning_rate": 1.9191919191919194e-05, "loss": 0.7101, "step": 250 }, { "epoch": 22.0, "eval_accuracy": 0.5434782608695652, "eval_loss": 0.9661365747451782, "eval_runtime": 0.6302, "eval_samples_per_second": 72.989, "eval_steps_per_second": 4.76, "step": 253 }, { "epoch": 22.61, "learning_rate": 1.8181818181818182e-05, "loss": 0.7099, "step": 260 }, { "epoch": 22.96, "eval_accuracy": 0.5434782608695652, "eval_loss": 0.9392473697662354, "eval_runtime": 0.5961, "eval_samples_per_second": 77.164, "eval_steps_per_second": 5.032, "step": 264 }, { "epoch": 23.48, "learning_rate": 1.7171717171717173e-05, "loss": 0.7238, "step": 270 }, { "epoch": 24.0, "eval_accuracy": 0.5, "eval_loss": 0.9552508592605591, "eval_runtime": 0.6066, "eval_samples_per_second": 75.827, "eval_steps_per_second": 4.945, "step": 276 }, { "epoch": 24.35, "learning_rate": 1.616161616161616e-05, "loss": 0.6605, "step": 280 }, { "epoch": 24.96, "eval_accuracy": 0.5434782608695652, "eval_loss": 0.9571367502212524, "eval_runtime": 0.5891, "eval_samples_per_second": 78.081, "eval_steps_per_second": 5.092, "step": 287 }, { "epoch": 25.22, "learning_rate": 1.5151515151515153e-05, "loss": 0.639, "step": 290 }, { "epoch": 26.0, "eval_accuracy": 0.5652173913043478, "eval_loss": 1.0534369945526123, "eval_runtime": 0.6126, "eval_samples_per_second": 75.084, "eval_steps_per_second": 4.897, "step": 299 }, { "epoch": 26.09, "learning_rate": 1.4141414141414143e-05, "loss": 0.6241, "step": 300 }, { "epoch": 26.96, "learning_rate": 1.3131313131313132e-05, "loss": 0.6123, "step": 310 }, { "epoch": 26.96, "eval_accuracy": 0.6086956521739131, "eval_loss": 0.9152448773384094, "eval_runtime": 0.7392, "eval_samples_per_second": 62.232, "eval_steps_per_second": 4.059, "step": 310 }, { "epoch": 27.83, "learning_rate": 1.2121212121212122e-05, "loss": 0.6021, "step": 320 }, { "epoch": 28.0, "eval_accuracy": 0.5869565217391305, "eval_loss": 0.8703503608703613, "eval_runtime": 0.6151, "eval_samples_per_second": 74.78, "eval_steps_per_second": 4.877, "step": 322 }, { "epoch": 28.7, "learning_rate": 1.1111111111111113e-05, "loss": 0.5971, "step": 330 }, { "epoch": 28.96, "eval_accuracy": 0.5652173913043478, "eval_loss": 0.8726406097412109, "eval_runtime": 0.6187, "eval_samples_per_second": 74.355, "eval_steps_per_second": 4.849, "step": 333 }, { "epoch": 29.57, "learning_rate": 1.0101010101010103e-05, "loss": 0.5413, "step": 340 }, { "epoch": 30.0, "eval_accuracy": 0.5869565217391305, "eval_loss": 0.8287004232406616, "eval_runtime": 0.5996, "eval_samples_per_second": 76.712, "eval_steps_per_second": 5.003, "step": 345 }, { "epoch": 30.43, "learning_rate": 9.090909090909091e-06, "loss": 0.5663, "step": 350 }, { "epoch": 30.96, "eval_accuracy": 0.5434782608695652, "eval_loss": 0.9271087646484375, "eval_runtime": 0.6056, "eval_samples_per_second": 75.952, "eval_steps_per_second": 4.953, "step": 356 }, { "epoch": 31.3, "learning_rate": 8.08080808080808e-06, "loss": 0.5343, "step": 360 }, { "epoch": 32.0, "eval_accuracy": 0.6304347826086957, "eval_loss": 0.8855524659156799, "eval_runtime": 0.5976, "eval_samples_per_second": 76.969, "eval_steps_per_second": 5.02, "step": 368 }, { "epoch": 32.17, "learning_rate": 7.070707070707071e-06, "loss": 0.525, "step": 370 }, { "epoch": 32.96, "eval_accuracy": 0.6086956521739131, "eval_loss": 0.8579288721084595, "eval_runtime": 0.6126, "eval_samples_per_second": 75.084, "eval_steps_per_second": 4.897, "step": 379 }, { "epoch": 33.04, "learning_rate": 6.060606060606061e-06, "loss": 0.5172, "step": 380 }, { "epoch": 33.91, "learning_rate": 5.0505050505050515e-06, "loss": 0.5447, "step": 390 }, { "epoch": 34.0, "eval_accuracy": 0.5869565217391305, "eval_loss": 0.8746341466903687, "eval_runtime": 0.6122, "eval_samples_per_second": 75.144, "eval_steps_per_second": 4.901, "step": 391 }, { "epoch": 34.78, "learning_rate": 4.04040404040404e-06, "loss": 0.5036, "step": 400 }, { "epoch": 34.96, "eval_accuracy": 0.5652173913043478, "eval_loss": 0.8684332370758057, "eval_runtime": 0.6021, "eval_samples_per_second": 76.395, "eval_steps_per_second": 4.982, "step": 402 }, { "epoch": 35.65, "learning_rate": 3.0303030303030305e-06, "loss": 0.4918, "step": 410 }, { "epoch": 36.0, "eval_accuracy": 0.5869565217391305, "eval_loss": 0.8268190622329712, "eval_runtime": 0.6312, "eval_samples_per_second": 72.881, "eval_steps_per_second": 4.753, "step": 414 }, { "epoch": 36.52, "learning_rate": 2.02020202020202e-06, "loss": 0.503, "step": 420 }, { "epoch": 36.96, "eval_accuracy": 0.5869565217391305, "eval_loss": 0.8373963832855225, "eval_runtime": 0.5726, "eval_samples_per_second": 80.33, "eval_steps_per_second": 5.239, "step": 425 }, { "epoch": 37.39, "learning_rate": 1.01010101010101e-06, "loss": 0.5114, "step": 430 }, { "epoch": 38.0, "eval_accuracy": 0.6086956521739131, "eval_loss": 0.8380029797554016, "eval_runtime": 0.6106, "eval_samples_per_second": 75.33, "eval_steps_per_second": 4.913, "step": 437 }, { "epoch": 38.26, "learning_rate": 0.0, "loss": 0.5272, "step": 440 }, { "epoch": 38.26, "eval_accuracy": 0.6086956521739131, "eval_loss": 0.8386858105659485, "eval_runtime": 0.6126, "eval_samples_per_second": 75.084, "eval_steps_per_second": 4.897, "step": 440 }, { "epoch": 38.26, "step": 440, "total_flos": 8.989085534729011e+17, "train_loss": 1.443298595601862, "train_runtime": 559.502, "train_samples_per_second": 51.617, "train_steps_per_second": 0.786 } ], "logging_steps": 10, "max_steps": 440, "num_input_tokens_seen": 0, "num_train_epochs": 40, "save_steps": 500, "total_flos": 8.989085534729011e+17, "train_batch_size": 16, "trial_name": null, "trial_params": null }