| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 9.91466545838997, | |
| "global_step": 8200, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "acc": 0.1328, | |
| "epoch": 0.0, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3943, | |
| "rl_loss": 0.0, | |
| "step": 1 | |
| }, | |
| { | |
| "acc": 0.6697, | |
| "epoch": 0.01, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2157, | |
| "rl_loss": 0.0, | |
| "step": 10 | |
| }, | |
| { | |
| "acc": 0.6586, | |
| "epoch": 0.02, | |
| "learning_rate": 5e-05, | |
| "loss": 1.041, | |
| "rl_loss": 0.0, | |
| "step": 20 | |
| }, | |
| { | |
| "acc": 0.6762, | |
| "epoch": 0.04, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9294, | |
| "rl_loss": 0.0, | |
| "step": 30 | |
| }, | |
| { | |
| "acc": 0.7078, | |
| "epoch": 0.05, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8388, | |
| "rl_loss": 0.0, | |
| "step": 40 | |
| }, | |
| { | |
| "acc": 0.7273, | |
| "epoch": 0.06, | |
| "learning_rate": 5e-05, | |
| "loss": 0.7722, | |
| "rl_loss": 0.0, | |
| "step": 50 | |
| }, | |
| { | |
| "acc": 0.7461, | |
| "epoch": 0.07, | |
| "learning_rate": 5e-05, | |
| "loss": 0.7189, | |
| "rl_loss": 0.0, | |
| "step": 60 | |
| }, | |
| { | |
| "acc": 0.7527, | |
| "epoch": 0.08, | |
| "learning_rate": 5e-05, | |
| "loss": 0.6749, | |
| "rl_loss": 0.0, | |
| "step": 70 | |
| }, | |
| { | |
| "acc": 0.7535, | |
| "epoch": 0.1, | |
| "learning_rate": 5e-05, | |
| "loss": 0.6507, | |
| "rl_loss": 0.0, | |
| "step": 80 | |
| }, | |
| { | |
| "acc": 0.7547, | |
| "epoch": 0.11, | |
| "learning_rate": 5e-05, | |
| "loss": 0.6252, | |
| "rl_loss": 0.0, | |
| "step": 90 | |
| }, | |
| { | |
| "acc": 0.7531, | |
| "epoch": 0.12, | |
| "learning_rate": 5e-05, | |
| "loss": 0.6337, | |
| "rl_loss": 0.0, | |
| "step": 100 | |
| }, | |
| { | |
| "acc": 0.7398, | |
| "epoch": 0.13, | |
| "learning_rate": 5e-05, | |
| "loss": 0.6142, | |
| "rl_loss": 0.0, | |
| "step": 110 | |
| }, | |
| { | |
| "acc": 0.7641, | |
| "epoch": 0.14, | |
| "learning_rate": 5e-05, | |
| "loss": 0.5817, | |
| "rl_loss": 0.0, | |
| "step": 120 | |
| }, | |
| { | |
| "acc": 0.7473, | |
| "epoch": 0.16, | |
| "learning_rate": 5e-05, | |
| "loss": 0.5992, | |
| "rl_loss": 0.0, | |
| "step": 130 | |
| }, | |
| { | |
| "acc": 0.7559, | |
| "epoch": 0.17, | |
| "learning_rate": 5e-05, | |
| "loss": 0.5864, | |
| "rl_loss": 0.0, | |
| "step": 140 | |
| }, | |
| { | |
| "acc": 0.773, | |
| "epoch": 0.18, | |
| "learning_rate": 5e-05, | |
| "loss": 0.5623, | |
| "rl_loss": 0.0, | |
| "step": 150 | |
| }, | |
| { | |
| "acc": 0.7652, | |
| "epoch": 0.19, | |
| "learning_rate": 5e-05, | |
| "loss": 0.5569, | |
| "rl_loss": 0.0, | |
| "step": 160 | |
| }, | |
| { | |
| "acc": 0.7797, | |
| "epoch": 0.21, | |
| "learning_rate": 5e-05, | |
| "loss": 0.5486, | |
| "rl_loss": 0.0, | |
| "step": 170 | |
| }, | |
| { | |
| "acc": 0.7594, | |
| "epoch": 0.22, | |
| "learning_rate": 5e-05, | |
| "loss": 0.563, | |
| "rl_loss": 0.0, | |
| "step": 180 | |
| }, | |
| { | |
| "acc": 0.7602, | |
| "epoch": 0.23, | |
| "learning_rate": 5e-05, | |
| "loss": 0.5732, | |
| "rl_loss": 0.0, | |
| "step": 190 | |
| }, | |
| { | |
| "acc": 0.759, | |
| "epoch": 0.24, | |
| "learning_rate": 5e-05, | |
| "loss": 0.5498, | |
| "rl_loss": 0.0, | |
| "step": 200 | |
| }, | |
| { | |
| "acc": 0.7707, | |
| "epoch": 0.25, | |
| "learning_rate": 5e-05, | |
| "loss": 0.5329, | |
| "rl_loss": 0.0, | |
| "step": 210 | |
| }, | |
| { | |
| "acc": 0.7777, | |
| "epoch": 0.27, | |
| "learning_rate": 5e-05, | |
| "loss": 0.515, | |
| "rl_loss": 0.0, | |
| "step": 220 | |
| }, | |
| { | |
| "acc": 0.775, | |
| "epoch": 0.28, | |
| "learning_rate": 5e-05, | |
| "loss": 0.5167, | |
| "rl_loss": 0.0, | |
| "step": 230 | |
| }, | |
| { | |
| "acc": 0.7789, | |
| "epoch": 0.29, | |
| "learning_rate": 5e-05, | |
| "loss": 0.5019, | |
| "rl_loss": 0.0, | |
| "step": 240 | |
| }, | |
| { | |
| "acc": 0.7715, | |
| "epoch": 0.3, | |
| "learning_rate": 5e-05, | |
| "loss": 0.5308, | |
| "rl_loss": 0.0, | |
| "step": 250 | |
| }, | |
| { | |
| "acc": 0.7812, | |
| "epoch": 0.31, | |
| "learning_rate": 5e-05, | |
| "loss": 0.5084, | |
| "rl_loss": 0.0, | |
| "step": 260 | |
| }, | |
| { | |
| "acc": 0.7891, | |
| "epoch": 0.33, | |
| "learning_rate": 5e-05, | |
| "loss": 0.4911, | |
| "rl_loss": 0.0, | |
| "step": 270 | |
| }, | |
| { | |
| "acc": 0.7742, | |
| "epoch": 0.34, | |
| "learning_rate": 5e-05, | |
| "loss": 0.4978, | |
| "rl_loss": 0.0, | |
| "step": 280 | |
| }, | |
| { | |
| "acc": 0.7781, | |
| "epoch": 0.35, | |
| "learning_rate": 5e-05, | |
| "loss": 0.4922, | |
| "rl_loss": 0.0, | |
| "step": 290 | |
| }, | |
| { | |
| "acc": 0.7766, | |
| "epoch": 0.36, | |
| "learning_rate": 5e-05, | |
| "loss": 0.5059, | |
| "rl_loss": 0.0, | |
| "step": 300 | |
| }, | |
| { | |
| "acc": 0.8074, | |
| "epoch": 0.37, | |
| "learning_rate": 5e-05, | |
| "loss": 0.4612, | |
| "rl_loss": 0.0, | |
| "step": 310 | |
| }, | |
| { | |
| "acc": 0.8012, | |
| "epoch": 0.39, | |
| "learning_rate": 5e-05, | |
| "loss": 0.4727, | |
| "rl_loss": 0.0, | |
| "step": 320 | |
| }, | |
| { | |
| "acc": 0.7855, | |
| "epoch": 0.4, | |
| "learning_rate": 5e-05, | |
| "loss": 0.4901, | |
| "rl_loss": 0.0, | |
| "step": 330 | |
| }, | |
| { | |
| "acc": 0.7879, | |
| "epoch": 0.41, | |
| "learning_rate": 5e-05, | |
| "loss": 0.4861, | |
| "rl_loss": 0.0, | |
| "step": 340 | |
| }, | |
| { | |
| "acc": 0.7902, | |
| "epoch": 0.42, | |
| "learning_rate": 5e-05, | |
| "loss": 0.48, | |
| "rl_loss": 0.0, | |
| "step": 350 | |
| }, | |
| { | |
| "acc": 0.7902, | |
| "epoch": 0.43, | |
| "learning_rate": 5e-05, | |
| "loss": 0.482, | |
| "rl_loss": 0.0, | |
| "step": 360 | |
| }, | |
| { | |
| "acc": 0.8086, | |
| "epoch": 0.45, | |
| "learning_rate": 5e-05, | |
| "loss": 0.4502, | |
| "rl_loss": 0.0, | |
| "step": 370 | |
| }, | |
| { | |
| "acc": 0.7898, | |
| "epoch": 0.46, | |
| "learning_rate": 5e-05, | |
| "loss": 0.4722, | |
| "rl_loss": 0.0, | |
| "step": 380 | |
| }, | |
| { | |
| "acc": 0.7852, | |
| "epoch": 0.47, | |
| "learning_rate": 5e-05, | |
| "loss": 0.4937, | |
| "rl_loss": 0.0, | |
| "step": 390 | |
| }, | |
| { | |
| "acc": 0.7766, | |
| "epoch": 0.48, | |
| "learning_rate": 5e-05, | |
| "loss": 0.5014, | |
| "rl_loss": 0.0, | |
| "step": 400 | |
| }, | |
| { | |
| "acc": 0.7887, | |
| "epoch": 0.5, | |
| "learning_rate": 5e-05, | |
| "loss": 0.4828, | |
| "rl_loss": 0.0, | |
| "step": 410 | |
| }, | |
| { | |
| "acc": 0.7996, | |
| "epoch": 0.51, | |
| "learning_rate": 5e-05, | |
| "loss": 0.4732, | |
| "rl_loss": 0.0, | |
| "step": 420 | |
| }, | |
| { | |
| "acc": 0.7953, | |
| "epoch": 0.52, | |
| "learning_rate": 5e-05, | |
| "loss": 0.4587, | |
| "rl_loss": 0.0, | |
| "step": 430 | |
| }, | |
| { | |
| "acc": 0.7941, | |
| "epoch": 0.53, | |
| "learning_rate": 5e-05, | |
| "loss": 0.4657, | |
| "rl_loss": 0.0, | |
| "step": 440 | |
| }, | |
| { | |
| "acc": 0.7961, | |
| "epoch": 0.54, | |
| "learning_rate": 5e-05, | |
| "loss": 0.473, | |
| "rl_loss": 0.0, | |
| "step": 450 | |
| }, | |
| { | |
| "acc": 0.8004, | |
| "epoch": 0.56, | |
| "learning_rate": 5e-05, | |
| "loss": 0.4624, | |
| "rl_loss": 0.0, | |
| "step": 460 | |
| }, | |
| { | |
| "acc": 0.8008, | |
| "epoch": 0.57, | |
| "learning_rate": 5e-05, | |
| "loss": 0.4538, | |
| "rl_loss": 0.0, | |
| "step": 470 | |
| }, | |
| { | |
| "acc": 0.8035, | |
| "epoch": 0.58, | |
| "learning_rate": 5e-05, | |
| "loss": 0.4436, | |
| "rl_loss": 0.0, | |
| "step": 480 | |
| }, | |
| { | |
| "acc": 0.8031, | |
| "epoch": 0.59, | |
| "learning_rate": 5e-05, | |
| "loss": 0.458, | |
| "rl_loss": 0.0, | |
| "step": 490 | |
| }, | |
| { | |
| "acc": 0.7945, | |
| "epoch": 0.6, | |
| "learning_rate": 5e-05, | |
| "loss": 0.4644, | |
| "rl_loss": 0.0, | |
| "step": 500 | |
| }, | |
| { | |
| "acc": 0.7977, | |
| "epoch": 0.62, | |
| "learning_rate": 5e-05, | |
| "loss": 0.4546, | |
| "rl_loss": 0.0, | |
| "step": 510 | |
| }, | |
| { | |
| "acc": 0.8031, | |
| "epoch": 0.63, | |
| "learning_rate": 5e-05, | |
| "loss": 0.4448, | |
| "rl_loss": 0.0, | |
| "step": 520 | |
| }, | |
| { | |
| "acc": 0.8113, | |
| "epoch": 0.64, | |
| "learning_rate": 5e-05, | |
| "loss": 0.4422, | |
| "rl_loss": 0.0, | |
| "step": 530 | |
| }, | |
| { | |
| "acc": 0.8039, | |
| "epoch": 0.65, | |
| "learning_rate": 5e-05, | |
| "loss": 0.4473, | |
| "rl_loss": 0.0, | |
| "step": 540 | |
| }, | |
| { | |
| "acc": 0.7953, | |
| "epoch": 0.66, | |
| "learning_rate": 5e-05, | |
| "loss": 0.4599, | |
| "rl_loss": 0.0, | |
| "step": 550 | |
| }, | |
| { | |
| "acc": 0.7996, | |
| "epoch": 0.68, | |
| "learning_rate": 5e-05, | |
| "loss": 0.4652, | |
| "rl_loss": 0.0, | |
| "step": 560 | |
| }, | |
| { | |
| "acc": 0.8059, | |
| "epoch": 0.69, | |
| "learning_rate": 5e-05, | |
| "loss": 0.4421, | |
| "rl_loss": 0.0, | |
| "step": 570 | |
| }, | |
| { | |
| "acc": 0.7816, | |
| "epoch": 0.7, | |
| "learning_rate": 5e-05, | |
| "loss": 0.4716, | |
| "rl_loss": 0.0, | |
| "step": 580 | |
| }, | |
| { | |
| "acc": 0.8074, | |
| "epoch": 0.71, | |
| "learning_rate": 5e-05, | |
| "loss": 0.4375, | |
| "rl_loss": 0.0, | |
| "step": 590 | |
| }, | |
| { | |
| "acc": 0.7992, | |
| "epoch": 0.72, | |
| "learning_rate": 5e-05, | |
| "loss": 0.4549, | |
| "rl_loss": 0.0, | |
| "step": 600 | |
| }, | |
| { | |
| "acc": 0.7937, | |
| "epoch": 0.74, | |
| "learning_rate": 5e-05, | |
| "loss": 0.4568, | |
| "rl_loss": 0.0, | |
| "step": 610 | |
| }, | |
| { | |
| "acc": 0.8055, | |
| "epoch": 0.75, | |
| "learning_rate": 5e-05, | |
| "loss": 0.4444, | |
| "rl_loss": 0.0, | |
| "step": 620 | |
| }, | |
| { | |
| "acc": 0.7957, | |
| "epoch": 0.76, | |
| "learning_rate": 5e-05, | |
| "loss": 0.458, | |
| "rl_loss": 0.0, | |
| "step": 630 | |
| }, | |
| { | |
| "acc": 0.8098, | |
| "epoch": 0.77, | |
| "learning_rate": 5e-05, | |
| "loss": 0.4293, | |
| "rl_loss": 0.0, | |
| "step": 640 | |
| }, | |
| { | |
| "acc": 0.8055, | |
| "epoch": 0.79, | |
| "learning_rate": 5e-05, | |
| "loss": 0.4332, | |
| "rl_loss": 0.0, | |
| "step": 650 | |
| }, | |
| { | |
| "acc": 0.8063, | |
| "epoch": 0.8, | |
| "learning_rate": 5e-05, | |
| "loss": 0.4421, | |
| "rl_loss": 0.0, | |
| "step": 660 | |
| }, | |
| { | |
| "acc": 0.8023, | |
| "epoch": 0.81, | |
| "learning_rate": 5e-05, | |
| "loss": 0.4424, | |
| "rl_loss": 0.0, | |
| "step": 670 | |
| }, | |
| { | |
| "acc": 0.8078, | |
| "epoch": 0.82, | |
| "learning_rate": 5e-05, | |
| "loss": 0.4443, | |
| "rl_loss": 0.0, | |
| "step": 680 | |
| }, | |
| { | |
| "acc": 0.8063, | |
| "epoch": 0.83, | |
| "learning_rate": 5e-05, | |
| "loss": 0.4392, | |
| "rl_loss": 0.0, | |
| "step": 690 | |
| }, | |
| { | |
| "acc": 0.8023, | |
| "epoch": 0.85, | |
| "learning_rate": 5e-05, | |
| "loss": 0.4447, | |
| "rl_loss": 0.0, | |
| "step": 700 | |
| }, | |
| { | |
| "acc": 0.798, | |
| "epoch": 0.86, | |
| "learning_rate": 5e-05, | |
| "loss": 0.4477, | |
| "rl_loss": 0.0, | |
| "step": 710 | |
| }, | |
| { | |
| "acc": 0.8027, | |
| "epoch": 0.87, | |
| "learning_rate": 5e-05, | |
| "loss": 0.4328, | |
| "rl_loss": 0.0, | |
| "step": 720 | |
| }, | |
| { | |
| "acc": 0.8016, | |
| "epoch": 0.88, | |
| "learning_rate": 5e-05, | |
| "loss": 0.4439, | |
| "rl_loss": 0.0, | |
| "step": 730 | |
| }, | |
| { | |
| "acc": 0.8125, | |
| "epoch": 0.89, | |
| "learning_rate": 5e-05, | |
| "loss": 0.4186, | |
| "rl_loss": 0.0, | |
| "step": 740 | |
| }, | |
| { | |
| "acc": 0.8047, | |
| "epoch": 0.91, | |
| "learning_rate": 5e-05, | |
| "loss": 0.4392, | |
| "rl_loss": 0.0, | |
| "step": 750 | |
| }, | |
| { | |
| "acc": 0.8105, | |
| "epoch": 0.92, | |
| "learning_rate": 5e-05, | |
| "loss": 0.421, | |
| "rl_loss": 0.0, | |
| "step": 760 | |
| }, | |
| { | |
| "acc": 0.8184, | |
| "epoch": 0.93, | |
| "learning_rate": 5e-05, | |
| "loss": 0.4253, | |
| "rl_loss": 0.0, | |
| "step": 770 | |
| }, | |
| { | |
| "acc": 0.8133, | |
| "epoch": 0.94, | |
| "learning_rate": 5e-05, | |
| "loss": 0.4194, | |
| "rl_loss": 0.0, | |
| "step": 780 | |
| }, | |
| { | |
| "acc": 0.8191, | |
| "epoch": 0.95, | |
| "learning_rate": 5e-05, | |
| "loss": 0.4115, | |
| "rl_loss": 0.0, | |
| "step": 790 | |
| }, | |
| { | |
| "acc": 0.8168, | |
| "epoch": 0.97, | |
| "learning_rate": 5e-05, | |
| "loss": 0.4055, | |
| "rl_loss": 0.0, | |
| "step": 800 | |
| }, | |
| { | |
| "acc": 0.8113, | |
| "epoch": 0.98, | |
| "learning_rate": 5e-05, | |
| "loss": 0.4293, | |
| "rl_loss": 0.0, | |
| "step": 810 | |
| }, | |
| { | |
| "acc": 0.8121, | |
| "epoch": 0.99, | |
| "learning_rate": 5e-05, | |
| "loss": 0.4212, | |
| "rl_loss": 0.0, | |
| "step": 820 | |
| }, | |
| { | |
| "acc": 0.8781, | |
| "epoch": 1.0, | |
| "learning_rate": 5e-05, | |
| "loss": 0.4307, | |
| "rl_loss": 0.0, | |
| "step": 830 | |
| }, | |
| { | |
| "acc": 0.8316, | |
| "epoch": 1.02, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3971, | |
| "rl_loss": 0.0, | |
| "step": 840 | |
| }, | |
| { | |
| "acc": 0.8121, | |
| "epoch": 1.03, | |
| "learning_rate": 5e-05, | |
| "loss": 0.4076, | |
| "rl_loss": 0.0, | |
| "step": 850 | |
| }, | |
| { | |
| "acc": 0.8113, | |
| "epoch": 1.04, | |
| "learning_rate": 5e-05, | |
| "loss": 0.427, | |
| "rl_loss": 0.0, | |
| "step": 860 | |
| }, | |
| { | |
| "acc": 0.818, | |
| "epoch": 1.05, | |
| "learning_rate": 5e-05, | |
| "loss": 0.4076, | |
| "rl_loss": 0.0, | |
| "step": 870 | |
| }, | |
| { | |
| "acc": 0.832, | |
| "epoch": 1.06, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3907, | |
| "rl_loss": 0.0, | |
| "step": 880 | |
| }, | |
| { | |
| "acc": 0.8344, | |
| "epoch": 1.08, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3847, | |
| "rl_loss": 0.0, | |
| "step": 890 | |
| }, | |
| { | |
| "acc": 0.8258, | |
| "epoch": 1.09, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3954, | |
| "rl_loss": 0.0, | |
| "step": 900 | |
| }, | |
| { | |
| "acc": 0.8105, | |
| "epoch": 1.1, | |
| "learning_rate": 5e-05, | |
| "loss": 0.4183, | |
| "rl_loss": 0.0, | |
| "step": 910 | |
| }, | |
| { | |
| "acc": 0.8109, | |
| "epoch": 1.11, | |
| "learning_rate": 5e-05, | |
| "loss": 0.4205, | |
| "rl_loss": 0.0, | |
| "step": 920 | |
| }, | |
| { | |
| "acc": 0.8207, | |
| "epoch": 1.12, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3987, | |
| "rl_loss": 0.0, | |
| "step": 930 | |
| }, | |
| { | |
| "acc": 0.8176, | |
| "epoch": 1.14, | |
| "learning_rate": 5e-05, | |
| "loss": 0.4212, | |
| "rl_loss": 0.0, | |
| "step": 940 | |
| }, | |
| { | |
| "acc": 0.8125, | |
| "epoch": 1.15, | |
| "learning_rate": 5e-05, | |
| "loss": 0.41, | |
| "rl_loss": 0.0, | |
| "step": 950 | |
| }, | |
| { | |
| "acc": 0.8082, | |
| "epoch": 1.16, | |
| "learning_rate": 5e-05, | |
| "loss": 0.4162, | |
| "rl_loss": 0.0, | |
| "step": 960 | |
| }, | |
| { | |
| "acc": 0.827, | |
| "epoch": 1.17, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3952, | |
| "rl_loss": 0.0, | |
| "step": 970 | |
| }, | |
| { | |
| "acc": 0.8352, | |
| "epoch": 1.18, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3855, | |
| "rl_loss": 0.0, | |
| "step": 980 | |
| }, | |
| { | |
| "acc": 0.8172, | |
| "epoch": 1.2, | |
| "learning_rate": 5e-05, | |
| "loss": 0.4156, | |
| "rl_loss": 0.0, | |
| "step": 990 | |
| }, | |
| { | |
| "acc": 0.8187, | |
| "epoch": 1.21, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3965, | |
| "rl_loss": 0.0, | |
| "step": 1000 | |
| }, | |
| { | |
| "acc": 0.8168, | |
| "epoch": 1.22, | |
| "learning_rate": 5e-05, | |
| "loss": 0.4094, | |
| "rl_loss": 0.0, | |
| "step": 1010 | |
| }, | |
| { | |
| "acc": 0.8305, | |
| "epoch": 1.23, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3935, | |
| "rl_loss": 0.0, | |
| "step": 1020 | |
| }, | |
| { | |
| "acc": 0.827, | |
| "epoch": 1.25, | |
| "learning_rate": 5e-05, | |
| "loss": 0.4029, | |
| "rl_loss": 0.0, | |
| "step": 1030 | |
| }, | |
| { | |
| "acc": 0.8266, | |
| "epoch": 1.26, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3923, | |
| "rl_loss": 0.0, | |
| "step": 1040 | |
| }, | |
| { | |
| "acc": 0.8254, | |
| "epoch": 1.27, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3972, | |
| "rl_loss": 0.0, | |
| "step": 1050 | |
| }, | |
| { | |
| "acc": 0.8148, | |
| "epoch": 1.28, | |
| "learning_rate": 5e-05, | |
| "loss": 0.4093, | |
| "rl_loss": 0.0, | |
| "step": 1060 | |
| }, | |
| { | |
| "acc": 0.8215, | |
| "epoch": 1.29, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3899, | |
| "rl_loss": 0.0, | |
| "step": 1070 | |
| }, | |
| { | |
| "acc": 0.8293, | |
| "epoch": 1.31, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3939, | |
| "rl_loss": 0.0, | |
| "step": 1080 | |
| }, | |
| { | |
| "acc": 0.8324, | |
| "epoch": 1.32, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3707, | |
| "rl_loss": 0.0, | |
| "step": 1090 | |
| }, | |
| { | |
| "acc": 0.818, | |
| "epoch": 1.33, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3953, | |
| "rl_loss": 0.0, | |
| "step": 1100 | |
| }, | |
| { | |
| "acc": 0.8273, | |
| "epoch": 1.34, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3765, | |
| "rl_loss": 0.0, | |
| "step": 1110 | |
| }, | |
| { | |
| "acc": 0.8258, | |
| "epoch": 1.35, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3968, | |
| "rl_loss": 0.0, | |
| "step": 1120 | |
| }, | |
| { | |
| "acc": 0.8324, | |
| "epoch": 1.37, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3767, | |
| "rl_loss": 0.0, | |
| "step": 1130 | |
| }, | |
| { | |
| "acc": 0.8187, | |
| "epoch": 1.38, | |
| "learning_rate": 5e-05, | |
| "loss": 0.4018, | |
| "rl_loss": 0.0, | |
| "step": 1140 | |
| }, | |
| { | |
| "acc": 0.8387, | |
| "epoch": 1.39, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3744, | |
| "rl_loss": 0.0, | |
| "step": 1150 | |
| }, | |
| { | |
| "acc": 0.8328, | |
| "epoch": 1.4, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3772, | |
| "rl_loss": 0.0, | |
| "step": 1160 | |
| }, | |
| { | |
| "acc": 0.8344, | |
| "epoch": 1.41, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3756, | |
| "rl_loss": 0.0, | |
| "step": 1170 | |
| }, | |
| { | |
| "acc": 0.8414, | |
| "epoch": 1.43, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3749, | |
| "rl_loss": 0.0, | |
| "step": 1180 | |
| }, | |
| { | |
| "acc": 0.8426, | |
| "epoch": 1.44, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3701, | |
| "rl_loss": 0.0, | |
| "step": 1190 | |
| }, | |
| { | |
| "acc": 0.8387, | |
| "epoch": 1.45, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3705, | |
| "rl_loss": 0.0, | |
| "step": 1200 | |
| }, | |
| { | |
| "acc": 0.8395, | |
| "epoch": 1.46, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3613, | |
| "rl_loss": 0.0, | |
| "step": 1210 | |
| }, | |
| { | |
| "acc": 0.8266, | |
| "epoch": 1.47, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3773, | |
| "rl_loss": 0.0, | |
| "step": 1220 | |
| }, | |
| { | |
| "acc": 0.8328, | |
| "epoch": 1.49, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3721, | |
| "rl_loss": 0.0, | |
| "step": 1230 | |
| }, | |
| { | |
| "acc": 0.8414, | |
| "epoch": 1.5, | |
| "learning_rate": 5e-05, | |
| "loss": 0.376, | |
| "rl_loss": 0.0, | |
| "step": 1240 | |
| }, | |
| { | |
| "acc": 0.8328, | |
| "epoch": 1.51, | |
| "learning_rate": 5e-05, | |
| "loss": 0.368, | |
| "rl_loss": 0.0, | |
| "step": 1250 | |
| }, | |
| { | |
| "acc": 0.823, | |
| "epoch": 1.52, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3858, | |
| "rl_loss": 0.0, | |
| "step": 1260 | |
| }, | |
| { | |
| "acc": 0.8395, | |
| "epoch": 1.54, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3623, | |
| "rl_loss": 0.0, | |
| "step": 1270 | |
| }, | |
| { | |
| "acc": 0.8398, | |
| "epoch": 1.55, | |
| "learning_rate": 5e-05, | |
| "loss": 0.368, | |
| "rl_loss": 0.0, | |
| "step": 1280 | |
| }, | |
| { | |
| "acc": 0.8285, | |
| "epoch": 1.56, | |
| "learning_rate": 5e-05, | |
| "loss": 0.369, | |
| "rl_loss": 0.0, | |
| "step": 1290 | |
| }, | |
| { | |
| "acc": 0.8352, | |
| "epoch": 1.57, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3751, | |
| "rl_loss": 0.0, | |
| "step": 1300 | |
| }, | |
| { | |
| "acc": 0.8371, | |
| "epoch": 1.58, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3655, | |
| "rl_loss": 0.0, | |
| "step": 1310 | |
| }, | |
| { | |
| "acc": 0.8234, | |
| "epoch": 1.6, | |
| "learning_rate": 5e-05, | |
| "loss": 0.391, | |
| "rl_loss": 0.0, | |
| "step": 1320 | |
| }, | |
| { | |
| "acc": 0.8398, | |
| "epoch": 1.61, | |
| "learning_rate": 5e-05, | |
| "loss": 0.357, | |
| "rl_loss": 0.0, | |
| "step": 1330 | |
| }, | |
| { | |
| "acc": 0.8371, | |
| "epoch": 1.62, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3612, | |
| "rl_loss": 0.0, | |
| "step": 1340 | |
| }, | |
| { | |
| "acc": 0.8422, | |
| "epoch": 1.63, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3626, | |
| "rl_loss": 0.0, | |
| "step": 1350 | |
| }, | |
| { | |
| "acc": 0.8531, | |
| "epoch": 1.64, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3425, | |
| "rl_loss": 0.0, | |
| "step": 1360 | |
| }, | |
| { | |
| "acc": 0.825, | |
| "epoch": 1.66, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3995, | |
| "rl_loss": 0.0, | |
| "step": 1370 | |
| }, | |
| { | |
| "acc": 0.8348, | |
| "epoch": 1.67, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3711, | |
| "rl_loss": 0.0, | |
| "step": 1380 | |
| }, | |
| { | |
| "acc": 0.8344, | |
| "epoch": 1.68, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3767, | |
| "rl_loss": 0.0, | |
| "step": 1390 | |
| }, | |
| { | |
| "acc": 0.85, | |
| "epoch": 1.69, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3641, | |
| "rl_loss": 0.0, | |
| "step": 1400 | |
| }, | |
| { | |
| "acc": 0.8355, | |
| "epoch": 1.7, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3808, | |
| "rl_loss": 0.0, | |
| "step": 1410 | |
| }, | |
| { | |
| "acc": 0.8344, | |
| "epoch": 1.72, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3613, | |
| "rl_loss": 0.0, | |
| "step": 1420 | |
| }, | |
| { | |
| "acc": 0.8484, | |
| "epoch": 1.73, | |
| "learning_rate": 5e-05, | |
| "loss": 0.361, | |
| "rl_loss": 0.0, | |
| "step": 1430 | |
| }, | |
| { | |
| "acc": 0.8406, | |
| "epoch": 1.74, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3547, | |
| "rl_loss": 0.0, | |
| "step": 1440 | |
| }, | |
| { | |
| "acc": 0.852, | |
| "epoch": 1.75, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3526, | |
| "rl_loss": 0.0, | |
| "step": 1450 | |
| }, | |
| { | |
| "acc": 0.8504, | |
| "epoch": 1.76, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3487, | |
| "rl_loss": 0.0, | |
| "step": 1460 | |
| }, | |
| { | |
| "acc": 0.8391, | |
| "epoch": 1.78, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3595, | |
| "rl_loss": 0.0, | |
| "step": 1470 | |
| }, | |
| { | |
| "acc": 0.8562, | |
| "epoch": 1.79, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3311, | |
| "rl_loss": 0.0, | |
| "step": 1480 | |
| }, | |
| { | |
| "acc": 0.8602, | |
| "epoch": 1.8, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3269, | |
| "rl_loss": 0.0, | |
| "step": 1490 | |
| }, | |
| { | |
| "acc": 0.8484, | |
| "epoch": 1.81, | |
| "learning_rate": 5e-05, | |
| "loss": 0.358, | |
| "rl_loss": 0.0, | |
| "step": 1500 | |
| }, | |
| { | |
| "acc": 0.852, | |
| "epoch": 1.83, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3417, | |
| "rl_loss": 0.0, | |
| "step": 1510 | |
| }, | |
| { | |
| "acc": 0.8469, | |
| "epoch": 1.84, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3557, | |
| "rl_loss": 0.0, | |
| "step": 1520 | |
| }, | |
| { | |
| "acc": 0.8469, | |
| "epoch": 1.85, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3586, | |
| "rl_loss": 0.0, | |
| "step": 1530 | |
| }, | |
| { | |
| "acc": 0.8371, | |
| "epoch": 1.86, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3646, | |
| "rl_loss": 0.0, | |
| "step": 1540 | |
| }, | |
| { | |
| "acc": 0.8453, | |
| "epoch": 1.87, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3548, | |
| "rl_loss": 0.0, | |
| "step": 1550 | |
| }, | |
| { | |
| "acc": 0.8617, | |
| "epoch": 1.89, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3349, | |
| "rl_loss": 0.0, | |
| "step": 1560 | |
| }, | |
| { | |
| "acc": 0.8484, | |
| "epoch": 1.9, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3502, | |
| "rl_loss": 0.0, | |
| "step": 1570 | |
| }, | |
| { | |
| "acc": 0.8359, | |
| "epoch": 1.91, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3621, | |
| "rl_loss": 0.0, | |
| "step": 1580 | |
| }, | |
| { | |
| "acc": 0.8441, | |
| "epoch": 1.92, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3461, | |
| "rl_loss": 0.0, | |
| "step": 1590 | |
| }, | |
| { | |
| "acc": 0.8508, | |
| "epoch": 1.93, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3459, | |
| "rl_loss": 0.0, | |
| "step": 1600 | |
| }, | |
| { | |
| "acc": 0.8398, | |
| "epoch": 1.95, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3699, | |
| "rl_loss": 0.0, | |
| "step": 1610 | |
| }, | |
| { | |
| "acc": 0.8504, | |
| "epoch": 1.96, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3345, | |
| "rl_loss": 0.0, | |
| "step": 1620 | |
| }, | |
| { | |
| "acc": 0.8625, | |
| "epoch": 1.97, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3296, | |
| "rl_loss": 0.0, | |
| "step": 1630 | |
| }, | |
| { | |
| "acc": 0.8594, | |
| "epoch": 1.98, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3304, | |
| "rl_loss": 0.0, | |
| "step": 1640 | |
| }, | |
| { | |
| "acc": 0.868, | |
| "epoch": 1.99, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3121, | |
| "rl_loss": 0.0, | |
| "step": 1650 | |
| }, | |
| { | |
| "acc": 0.9039, | |
| "epoch": 2.01, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3579, | |
| "rl_loss": 0.0, | |
| "step": 1660 | |
| }, | |
| { | |
| "acc": 0.8711, | |
| "epoch": 2.02, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3151, | |
| "rl_loss": 0.0, | |
| "step": 1670 | |
| }, | |
| { | |
| "acc": 0.8652, | |
| "epoch": 2.03, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3252, | |
| "rl_loss": 0.0, | |
| "step": 1680 | |
| }, | |
| { | |
| "acc": 0.8617, | |
| "epoch": 2.04, | |
| "learning_rate": 5e-05, | |
| "loss": 0.341, | |
| "rl_loss": 0.0, | |
| "step": 1690 | |
| }, | |
| { | |
| "acc": 0.8559, | |
| "epoch": 2.06, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3418, | |
| "rl_loss": 0.0, | |
| "step": 1700 | |
| }, | |
| { | |
| "acc": 0.8578, | |
| "epoch": 2.07, | |
| "learning_rate": 5e-05, | |
| "loss": 0.331, | |
| "rl_loss": 0.0, | |
| "step": 1710 | |
| }, | |
| { | |
| "acc": 0.8762, | |
| "epoch": 2.08, | |
| "learning_rate": 5e-05, | |
| "loss": 0.307, | |
| "rl_loss": 0.0, | |
| "step": 1720 | |
| }, | |
| { | |
| "acc": 0.866, | |
| "epoch": 2.09, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3248, | |
| "rl_loss": 0.0, | |
| "step": 1730 | |
| }, | |
| { | |
| "acc": 0.8703, | |
| "epoch": 2.1, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3186, | |
| "rl_loss": 0.0, | |
| "step": 1740 | |
| }, | |
| { | |
| "acc": 0.8562, | |
| "epoch": 2.12, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3326, | |
| "rl_loss": 0.0, | |
| "step": 1750 | |
| }, | |
| { | |
| "acc": 0.8652, | |
| "epoch": 2.13, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3305, | |
| "rl_loss": 0.0, | |
| "step": 1760 | |
| }, | |
| { | |
| "acc": 0.8578, | |
| "epoch": 2.14, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3151, | |
| "rl_loss": 0.0, | |
| "step": 1770 | |
| }, | |
| { | |
| "acc": 0.8637, | |
| "epoch": 2.15, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3102, | |
| "rl_loss": 0.0, | |
| "step": 1780 | |
| }, | |
| { | |
| "acc": 0.8656, | |
| "epoch": 2.16, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3211, | |
| "rl_loss": 0.0, | |
| "step": 1790 | |
| }, | |
| { | |
| "acc": 0.8613, | |
| "epoch": 2.18, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3273, | |
| "rl_loss": 0.0, | |
| "step": 1800 | |
| }, | |
| { | |
| "acc": 0.8594, | |
| "epoch": 2.19, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3464, | |
| "rl_loss": 0.0, | |
| "step": 1810 | |
| }, | |
| { | |
| "acc": 0.8523, | |
| "epoch": 2.2, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3397, | |
| "rl_loss": 0.0, | |
| "step": 1820 | |
| }, | |
| { | |
| "acc": 0.8566, | |
| "epoch": 2.21, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3336, | |
| "rl_loss": 0.0, | |
| "step": 1830 | |
| }, | |
| { | |
| "acc": 0.8539, | |
| "epoch": 2.22, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3379, | |
| "rl_loss": 0.0, | |
| "step": 1840 | |
| }, | |
| { | |
| "acc": 0.8641, | |
| "epoch": 2.24, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3224, | |
| "rl_loss": 0.0, | |
| "step": 1850 | |
| }, | |
| { | |
| "acc": 0.8684, | |
| "epoch": 2.25, | |
| "learning_rate": 5e-05, | |
| "loss": 0.311, | |
| "rl_loss": 0.0, | |
| "step": 1860 | |
| }, | |
| { | |
| "acc": 0.8602, | |
| "epoch": 2.26, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3266, | |
| "rl_loss": 0.0, | |
| "step": 1870 | |
| }, | |
| { | |
| "acc": 0.859, | |
| "epoch": 2.27, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3218, | |
| "rl_loss": 0.0, | |
| "step": 1880 | |
| }, | |
| { | |
| "acc": 0.8734, | |
| "epoch": 2.29, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3036, | |
| "rl_loss": 0.0, | |
| "step": 1890 | |
| }, | |
| { | |
| "acc": 0.8602, | |
| "epoch": 2.3, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3198, | |
| "rl_loss": 0.0, | |
| "step": 1900 | |
| }, | |
| { | |
| "acc": 0.8652, | |
| "epoch": 2.31, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3203, | |
| "rl_loss": 0.0, | |
| "step": 1910 | |
| }, | |
| { | |
| "acc": 0.8707, | |
| "epoch": 2.32, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3062, | |
| "rl_loss": 0.0, | |
| "step": 1920 | |
| }, | |
| { | |
| "acc": 0.8711, | |
| "epoch": 2.33, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3125, | |
| "rl_loss": 0.0, | |
| "step": 1930 | |
| }, | |
| { | |
| "acc": 0.8703, | |
| "epoch": 2.35, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3192, | |
| "rl_loss": 0.0, | |
| "step": 1940 | |
| }, | |
| { | |
| "acc": 0.8602, | |
| "epoch": 2.36, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3343, | |
| "rl_loss": 0.0, | |
| "step": 1950 | |
| }, | |
| { | |
| "acc": 0.8582, | |
| "epoch": 2.37, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3355, | |
| "rl_loss": 0.0, | |
| "step": 1960 | |
| }, | |
| { | |
| "acc": 0.8797, | |
| "epoch": 2.38, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3057, | |
| "rl_loss": 0.0, | |
| "step": 1970 | |
| }, | |
| { | |
| "acc": 0.8766, | |
| "epoch": 2.39, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2924, | |
| "rl_loss": 0.0, | |
| "step": 1980 | |
| }, | |
| { | |
| "acc": 0.8711, | |
| "epoch": 2.41, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3238, | |
| "rl_loss": 0.0, | |
| "step": 1990 | |
| }, | |
| { | |
| "acc": 0.8621, | |
| "epoch": 2.42, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3301, | |
| "rl_loss": 0.0, | |
| "step": 2000 | |
| }, | |
| { | |
| "acc": 0.8562, | |
| "epoch": 2.43, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3339, | |
| "rl_loss": 0.0, | |
| "step": 2010 | |
| }, | |
| { | |
| "acc": 0.8723, | |
| "epoch": 2.44, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3075, | |
| "rl_loss": 0.0, | |
| "step": 2020 | |
| }, | |
| { | |
| "acc": 0.8633, | |
| "epoch": 2.45, | |
| "learning_rate": 5e-05, | |
| "loss": 0.308, | |
| "rl_loss": 0.0, | |
| "step": 2030 | |
| }, | |
| { | |
| "acc": 0.8656, | |
| "epoch": 2.47, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3188, | |
| "rl_loss": 0.0, | |
| "step": 2040 | |
| }, | |
| { | |
| "acc": 0.8742, | |
| "epoch": 2.48, | |
| "learning_rate": 5e-05, | |
| "loss": 0.314, | |
| "rl_loss": 0.0, | |
| "step": 2050 | |
| }, | |
| { | |
| "acc": 0.8586, | |
| "epoch": 2.49, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3236, | |
| "rl_loss": 0.0, | |
| "step": 2060 | |
| }, | |
| { | |
| "acc": 0.8648, | |
| "epoch": 2.5, | |
| "learning_rate": 5e-05, | |
| "loss": 0.314, | |
| "rl_loss": 0.0, | |
| "step": 2070 | |
| }, | |
| { | |
| "acc": 0.8809, | |
| "epoch": 2.51, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3056, | |
| "rl_loss": 0.0, | |
| "step": 2080 | |
| }, | |
| { | |
| "acc": 0.882, | |
| "epoch": 2.53, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2934, | |
| "rl_loss": 0.0, | |
| "step": 2090 | |
| }, | |
| { | |
| "acc": 0.8766, | |
| "epoch": 2.54, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2955, | |
| "rl_loss": 0.0, | |
| "step": 2100 | |
| }, | |
| { | |
| "acc": 0.8805, | |
| "epoch": 2.55, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3019, | |
| "rl_loss": 0.0, | |
| "step": 2110 | |
| }, | |
| { | |
| "acc": 0.8629, | |
| "epoch": 2.56, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3222, | |
| "rl_loss": 0.0, | |
| "step": 2120 | |
| }, | |
| { | |
| "acc": 0.8652, | |
| "epoch": 2.58, | |
| "learning_rate": 5e-05, | |
| "loss": 0.318, | |
| "rl_loss": 0.0, | |
| "step": 2130 | |
| }, | |
| { | |
| "acc": 0.8715, | |
| "epoch": 2.59, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3015, | |
| "rl_loss": 0.0, | |
| "step": 2140 | |
| }, | |
| { | |
| "acc": 0.8609, | |
| "epoch": 2.6, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3323, | |
| "rl_loss": 0.0, | |
| "step": 2150 | |
| }, | |
| { | |
| "acc": 0.8699, | |
| "epoch": 2.61, | |
| "learning_rate": 5e-05, | |
| "loss": 0.311, | |
| "rl_loss": 0.0, | |
| "step": 2160 | |
| }, | |
| { | |
| "acc": 0.8762, | |
| "epoch": 2.62, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2953, | |
| "rl_loss": 0.0, | |
| "step": 2170 | |
| }, | |
| { | |
| "acc": 0.8629, | |
| "epoch": 2.64, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3286, | |
| "rl_loss": 0.0, | |
| "step": 2180 | |
| }, | |
| { | |
| "acc": 0.8723, | |
| "epoch": 2.65, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3052, | |
| "rl_loss": 0.0, | |
| "step": 2190 | |
| }, | |
| { | |
| "acc": 0.8914, | |
| "epoch": 2.66, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2737, | |
| "rl_loss": 0.0, | |
| "step": 2200 | |
| }, | |
| { | |
| "acc": 0.8641, | |
| "epoch": 2.67, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3221, | |
| "rl_loss": 0.0, | |
| "step": 2210 | |
| }, | |
| { | |
| "acc": 0.873, | |
| "epoch": 2.68, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3037, | |
| "rl_loss": 0.0, | |
| "step": 2220 | |
| }, | |
| { | |
| "acc": 0.8754, | |
| "epoch": 2.7, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3028, | |
| "rl_loss": 0.0, | |
| "step": 2230 | |
| }, | |
| { | |
| "acc": 0.8828, | |
| "epoch": 2.71, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2832, | |
| "rl_loss": 0.0, | |
| "step": 2240 | |
| }, | |
| { | |
| "acc": 0.8691, | |
| "epoch": 2.72, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2996, | |
| "rl_loss": 0.0, | |
| "step": 2250 | |
| }, | |
| { | |
| "acc": 0.866, | |
| "epoch": 2.73, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3034, | |
| "rl_loss": 0.0, | |
| "step": 2260 | |
| }, | |
| { | |
| "acc": 0.8613, | |
| "epoch": 2.74, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3218, | |
| "rl_loss": 0.0, | |
| "step": 2270 | |
| }, | |
| { | |
| "acc": 0.8738, | |
| "epoch": 2.76, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3046, | |
| "rl_loss": 0.0, | |
| "step": 2280 | |
| }, | |
| { | |
| "acc": 0.8715, | |
| "epoch": 2.77, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3066, | |
| "rl_loss": 0.0, | |
| "step": 2290 | |
| }, | |
| { | |
| "acc": 0.8855, | |
| "epoch": 2.78, | |
| "learning_rate": 5e-05, | |
| "loss": 0.271, | |
| "rl_loss": 0.0, | |
| "step": 2300 | |
| }, | |
| { | |
| "acc": 0.8715, | |
| "epoch": 2.79, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3029, | |
| "rl_loss": 0.0, | |
| "step": 2310 | |
| }, | |
| { | |
| "acc": 0.8746, | |
| "epoch": 2.8, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2981, | |
| "rl_loss": 0.0, | |
| "step": 2320 | |
| }, | |
| { | |
| "acc": 0.875, | |
| "epoch": 2.82, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2939, | |
| "rl_loss": 0.0, | |
| "step": 2330 | |
| }, | |
| { | |
| "acc": 0.8766, | |
| "epoch": 2.83, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2847, | |
| "rl_loss": 0.0, | |
| "step": 2340 | |
| }, | |
| { | |
| "acc": 0.8777, | |
| "epoch": 2.84, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2957, | |
| "rl_loss": 0.0, | |
| "step": 2350 | |
| }, | |
| { | |
| "acc": 0.8746, | |
| "epoch": 2.85, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2954, | |
| "rl_loss": 0.0, | |
| "step": 2360 | |
| }, | |
| { | |
| "acc": 0.8754, | |
| "epoch": 2.87, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3155, | |
| "rl_loss": 0.0, | |
| "step": 2370 | |
| }, | |
| { | |
| "acc": 0.8762, | |
| "epoch": 2.88, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2897, | |
| "rl_loss": 0.0, | |
| "step": 2380 | |
| }, | |
| { | |
| "acc": 0.8812, | |
| "epoch": 2.89, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2935, | |
| "rl_loss": 0.0, | |
| "step": 2390 | |
| }, | |
| { | |
| "acc": 0.8672, | |
| "epoch": 2.9, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2996, | |
| "rl_loss": 0.0, | |
| "step": 2400 | |
| }, | |
| { | |
| "acc": 0.8852, | |
| "epoch": 2.91, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2733, | |
| "rl_loss": 0.0, | |
| "step": 2410 | |
| }, | |
| { | |
| "acc": 0.8656, | |
| "epoch": 2.93, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3098, | |
| "rl_loss": 0.0, | |
| "step": 2420 | |
| }, | |
| { | |
| "acc": 0.8785, | |
| "epoch": 2.94, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2878, | |
| "rl_loss": 0.0, | |
| "step": 2430 | |
| }, | |
| { | |
| "acc": 0.8715, | |
| "epoch": 2.95, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3002, | |
| "rl_loss": 0.0, | |
| "step": 2440 | |
| }, | |
| { | |
| "acc": 0.8727, | |
| "epoch": 2.96, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3119, | |
| "rl_loss": 0.0, | |
| "step": 2450 | |
| }, | |
| { | |
| "acc": 0.866, | |
| "epoch": 2.97, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3058, | |
| "rl_loss": 0.0, | |
| "step": 2460 | |
| }, | |
| { | |
| "acc": 0.882, | |
| "epoch": 2.99, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2928, | |
| "rl_loss": 0.0, | |
| "step": 2470 | |
| }, | |
| { | |
| "acc": 0.8902, | |
| "epoch": 3.0, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2775, | |
| "rl_loss": 0.0, | |
| "step": 2480 | |
| }, | |
| { | |
| "acc": 0.9328, | |
| "epoch": 3.01, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3065, | |
| "rl_loss": 0.0, | |
| "step": 2490 | |
| }, | |
| { | |
| "acc": 0.8887, | |
| "epoch": 3.02, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2727, | |
| "rl_loss": 0.0, | |
| "step": 2500 | |
| }, | |
| { | |
| "acc": 0.8773, | |
| "epoch": 3.04, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2947, | |
| "rl_loss": 0.0, | |
| "step": 2510 | |
| }, | |
| { | |
| "acc": 0.8953, | |
| "epoch": 3.05, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2719, | |
| "rl_loss": 0.0, | |
| "step": 2520 | |
| }, | |
| { | |
| "acc": 0.8832, | |
| "epoch": 3.06, | |
| "learning_rate": 5e-05, | |
| "loss": 0.289, | |
| "rl_loss": 0.0, | |
| "step": 2530 | |
| }, | |
| { | |
| "acc": 0.8871, | |
| "epoch": 3.07, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2744, | |
| "rl_loss": 0.0, | |
| "step": 2540 | |
| }, | |
| { | |
| "acc": 0.8848, | |
| "epoch": 3.08, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2806, | |
| "rl_loss": 0.0, | |
| "step": 2550 | |
| }, | |
| { | |
| "acc": 0.8879, | |
| "epoch": 3.1, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2833, | |
| "rl_loss": 0.0, | |
| "step": 2560 | |
| }, | |
| { | |
| "acc": 0.8715, | |
| "epoch": 3.11, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2987, | |
| "rl_loss": 0.0, | |
| "step": 2570 | |
| }, | |
| { | |
| "acc": 0.8898, | |
| "epoch": 3.12, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2736, | |
| "rl_loss": 0.0, | |
| "step": 2580 | |
| }, | |
| { | |
| "acc": 0.8914, | |
| "epoch": 3.13, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2683, | |
| "rl_loss": 0.0, | |
| "step": 2590 | |
| }, | |
| { | |
| "acc": 0.8902, | |
| "epoch": 3.14, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2705, | |
| "rl_loss": 0.0, | |
| "step": 2600 | |
| }, | |
| { | |
| "acc": 0.8785, | |
| "epoch": 3.16, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2759, | |
| "rl_loss": 0.0, | |
| "step": 2610 | |
| }, | |
| { | |
| "acc": 0.8805, | |
| "epoch": 3.17, | |
| "learning_rate": 5e-05, | |
| "loss": 0.292, | |
| "rl_loss": 0.0, | |
| "step": 2620 | |
| }, | |
| { | |
| "acc": 0.8742, | |
| "epoch": 3.18, | |
| "learning_rate": 5e-05, | |
| "loss": 0.293, | |
| "rl_loss": 0.0, | |
| "step": 2630 | |
| }, | |
| { | |
| "acc": 0.868, | |
| "epoch": 3.19, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2994, | |
| "rl_loss": 0.0, | |
| "step": 2640 | |
| }, | |
| { | |
| "acc": 0.8785, | |
| "epoch": 3.2, | |
| "learning_rate": 5e-05, | |
| "loss": 0.285, | |
| "rl_loss": 0.0, | |
| "step": 2650 | |
| }, | |
| { | |
| "acc": 0.8859, | |
| "epoch": 3.22, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2569, | |
| "rl_loss": 0.0, | |
| "step": 2660 | |
| }, | |
| { | |
| "acc": 0.882, | |
| "epoch": 3.23, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2825, | |
| "rl_loss": 0.0, | |
| "step": 2670 | |
| }, | |
| { | |
| "acc": 0.8762, | |
| "epoch": 3.24, | |
| "learning_rate": 5e-05, | |
| "loss": 0.279, | |
| "rl_loss": 0.0, | |
| "step": 2680 | |
| }, | |
| { | |
| "acc": 0.8883, | |
| "epoch": 3.25, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2641, | |
| "rl_loss": 0.0, | |
| "step": 2690 | |
| }, | |
| { | |
| "acc": 0.8926, | |
| "epoch": 3.26, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2656, | |
| "rl_loss": 0.0, | |
| "step": 2700 | |
| }, | |
| { | |
| "acc": 0.8742, | |
| "epoch": 3.28, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3045, | |
| "rl_loss": 0.0, | |
| "step": 2710 | |
| }, | |
| { | |
| "acc": 0.8793, | |
| "epoch": 3.29, | |
| "learning_rate": 5e-05, | |
| "loss": 0.288, | |
| "rl_loss": 0.0, | |
| "step": 2720 | |
| }, | |
| { | |
| "acc": 0.8859, | |
| "epoch": 3.3, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2725, | |
| "rl_loss": 0.0, | |
| "step": 2730 | |
| }, | |
| { | |
| "acc": 0.8844, | |
| "epoch": 3.31, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2754, | |
| "rl_loss": 0.0, | |
| "step": 2740 | |
| }, | |
| { | |
| "acc": 0.8848, | |
| "epoch": 3.33, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2703, | |
| "rl_loss": 0.0, | |
| "step": 2750 | |
| }, | |
| { | |
| "acc": 0.8883, | |
| "epoch": 3.34, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2772, | |
| "rl_loss": 0.0, | |
| "step": 2760 | |
| }, | |
| { | |
| "acc": 0.8805, | |
| "epoch": 3.35, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2789, | |
| "rl_loss": 0.0, | |
| "step": 2770 | |
| }, | |
| { | |
| "acc": 0.8859, | |
| "epoch": 3.36, | |
| "learning_rate": 5e-05, | |
| "loss": 0.276, | |
| "rl_loss": 0.0, | |
| "step": 2780 | |
| }, | |
| { | |
| "acc": 0.8828, | |
| "epoch": 3.37, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2764, | |
| "rl_loss": 0.0, | |
| "step": 2790 | |
| }, | |
| { | |
| "acc": 0.8766, | |
| "epoch": 3.39, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2975, | |
| "rl_loss": 0.0, | |
| "step": 2800 | |
| }, | |
| { | |
| "acc": 0.8844, | |
| "epoch": 3.4, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2911, | |
| "rl_loss": 0.0, | |
| "step": 2810 | |
| }, | |
| { | |
| "acc": 0.8906, | |
| "epoch": 3.41, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2705, | |
| "rl_loss": 0.0, | |
| "step": 2820 | |
| }, | |
| { | |
| "acc": 0.8875, | |
| "epoch": 3.42, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2655, | |
| "rl_loss": 0.0, | |
| "step": 2830 | |
| }, | |
| { | |
| "acc": 0.8855, | |
| "epoch": 3.43, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2735, | |
| "rl_loss": 0.0, | |
| "step": 2840 | |
| }, | |
| { | |
| "acc": 0.8922, | |
| "epoch": 3.45, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2545, | |
| "rl_loss": 0.0, | |
| "step": 2850 | |
| }, | |
| { | |
| "acc": 0.8871, | |
| "epoch": 3.46, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2778, | |
| "rl_loss": 0.0, | |
| "step": 2860 | |
| }, | |
| { | |
| "acc": 0.884, | |
| "epoch": 3.47, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2765, | |
| "rl_loss": 0.0, | |
| "step": 2870 | |
| }, | |
| { | |
| "acc": 0.8789, | |
| "epoch": 3.48, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2767, | |
| "rl_loss": 0.0, | |
| "step": 2880 | |
| }, | |
| { | |
| "acc": 0.8895, | |
| "epoch": 3.49, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2776, | |
| "rl_loss": 0.0, | |
| "step": 2890 | |
| }, | |
| { | |
| "acc": 0.8902, | |
| "epoch": 3.51, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2635, | |
| "rl_loss": 0.0, | |
| "step": 2900 | |
| }, | |
| { | |
| "acc": 0.8926, | |
| "epoch": 3.52, | |
| "learning_rate": 5e-05, | |
| "loss": 0.263, | |
| "rl_loss": 0.0, | |
| "step": 2910 | |
| }, | |
| { | |
| "acc": 0.8641, | |
| "epoch": 3.53, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2988, | |
| "rl_loss": 0.0, | |
| "step": 2920 | |
| }, | |
| { | |
| "acc": 0.8887, | |
| "epoch": 3.54, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2704, | |
| "rl_loss": 0.0, | |
| "step": 2930 | |
| }, | |
| { | |
| "acc": 0.893, | |
| "epoch": 3.55, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2637, | |
| "rl_loss": 0.0, | |
| "step": 2940 | |
| }, | |
| { | |
| "acc": 0.8863, | |
| "epoch": 3.57, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2808, | |
| "rl_loss": 0.0, | |
| "step": 2950 | |
| }, | |
| { | |
| "acc": 0.8805, | |
| "epoch": 3.58, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2893, | |
| "rl_loss": 0.0, | |
| "step": 2960 | |
| }, | |
| { | |
| "acc": 0.891, | |
| "epoch": 3.59, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2735, | |
| "rl_loss": 0.0, | |
| "step": 2970 | |
| }, | |
| { | |
| "acc": 0.8836, | |
| "epoch": 3.6, | |
| "learning_rate": 5e-05, | |
| "loss": 0.279, | |
| "rl_loss": 0.0, | |
| "step": 2980 | |
| }, | |
| { | |
| "acc": 0.8832, | |
| "epoch": 3.62, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2745, | |
| "rl_loss": 0.0, | |
| "step": 2990 | |
| }, | |
| { | |
| "acc": 0.8883, | |
| "epoch": 3.63, | |
| "learning_rate": 5e-05, | |
| "loss": 0.27, | |
| "rl_loss": 0.0, | |
| "step": 3000 | |
| }, | |
| { | |
| "acc": 0.882, | |
| "epoch": 3.64, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2817, | |
| "rl_loss": 0.0, | |
| "step": 3010 | |
| }, | |
| { | |
| "acc": 0.8887, | |
| "epoch": 3.65, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2745, | |
| "rl_loss": 0.0, | |
| "step": 3020 | |
| }, | |
| { | |
| "acc": 0.8848, | |
| "epoch": 3.66, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2817, | |
| "rl_loss": 0.0, | |
| "step": 3030 | |
| }, | |
| { | |
| "acc": 0.8793, | |
| "epoch": 3.68, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2878, | |
| "rl_loss": 0.0, | |
| "step": 3040 | |
| }, | |
| { | |
| "acc": 0.8828, | |
| "epoch": 3.69, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2729, | |
| "rl_loss": 0.0, | |
| "step": 3050 | |
| }, | |
| { | |
| "acc": 0.8988, | |
| "epoch": 3.7, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2552, | |
| "rl_loss": 0.0, | |
| "step": 3060 | |
| }, | |
| { | |
| "acc": 0.8832, | |
| "epoch": 3.71, | |
| "learning_rate": 5e-05, | |
| "loss": 0.28, | |
| "rl_loss": 0.0, | |
| "step": 3070 | |
| }, | |
| { | |
| "acc": 0.882, | |
| "epoch": 3.72, | |
| "learning_rate": 5e-05, | |
| "loss": 0.277, | |
| "rl_loss": 0.0, | |
| "step": 3080 | |
| }, | |
| { | |
| "acc": 0.882, | |
| "epoch": 3.74, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2807, | |
| "rl_loss": 0.0, | |
| "step": 3090 | |
| }, | |
| { | |
| "acc": 0.8844, | |
| "epoch": 3.75, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2867, | |
| "rl_loss": 0.0, | |
| "step": 3100 | |
| }, | |
| { | |
| "acc": 0.9035, | |
| "epoch": 3.76, | |
| "learning_rate": 5e-05, | |
| "loss": 0.254, | |
| "rl_loss": 0.0, | |
| "step": 3110 | |
| }, | |
| { | |
| "acc": 0.8953, | |
| "epoch": 3.77, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2649, | |
| "rl_loss": 0.0, | |
| "step": 3120 | |
| }, | |
| { | |
| "acc": 0.8828, | |
| "epoch": 3.78, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2786, | |
| "rl_loss": 0.0, | |
| "step": 3130 | |
| }, | |
| { | |
| "acc": 0.8848, | |
| "epoch": 3.8, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2772, | |
| "rl_loss": 0.0, | |
| "step": 3140 | |
| }, | |
| { | |
| "acc": 0.8949, | |
| "epoch": 3.81, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2648, | |
| "rl_loss": 0.0, | |
| "step": 3150 | |
| }, | |
| { | |
| "acc": 0.8934, | |
| "epoch": 3.82, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2547, | |
| "rl_loss": 0.0, | |
| "step": 3160 | |
| }, | |
| { | |
| "acc": 0.8789, | |
| "epoch": 3.83, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2892, | |
| "rl_loss": 0.0, | |
| "step": 3170 | |
| }, | |
| { | |
| "acc": 0.8887, | |
| "epoch": 3.84, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2681, | |
| "rl_loss": 0.0, | |
| "step": 3180 | |
| }, | |
| { | |
| "acc": 0.8844, | |
| "epoch": 3.86, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2824, | |
| "rl_loss": 0.0, | |
| "step": 3190 | |
| }, | |
| { | |
| "acc": 0.8785, | |
| "epoch": 3.87, | |
| "learning_rate": 5e-05, | |
| "loss": 0.286, | |
| "rl_loss": 0.0, | |
| "step": 3200 | |
| }, | |
| { | |
| "acc": 0.8926, | |
| "epoch": 3.88, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2637, | |
| "rl_loss": 0.0, | |
| "step": 3210 | |
| }, | |
| { | |
| "acc": 0.8801, | |
| "epoch": 3.89, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2692, | |
| "rl_loss": 0.0, | |
| "step": 3220 | |
| }, | |
| { | |
| "acc": 0.8883, | |
| "epoch": 3.9, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2677, | |
| "rl_loss": 0.0, | |
| "step": 3230 | |
| }, | |
| { | |
| "acc": 0.8879, | |
| "epoch": 3.92, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2556, | |
| "rl_loss": 0.0, | |
| "step": 3240 | |
| }, | |
| { | |
| "acc": 0.8805, | |
| "epoch": 3.93, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2892, | |
| "rl_loss": 0.0, | |
| "step": 3250 | |
| }, | |
| { | |
| "acc": 0.8875, | |
| "epoch": 3.94, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2747, | |
| "rl_loss": 0.0, | |
| "step": 3260 | |
| }, | |
| { | |
| "acc": 0.898, | |
| "epoch": 3.95, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2588, | |
| "rl_loss": 0.0, | |
| "step": 3270 | |
| }, | |
| { | |
| "acc": 0.8844, | |
| "epoch": 3.97, | |
| "learning_rate": 5e-05, | |
| "loss": 0.284, | |
| "rl_loss": 0.0, | |
| "step": 3280 | |
| }, | |
| { | |
| "acc": 0.8859, | |
| "epoch": 3.98, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2669, | |
| "rl_loss": 0.0, | |
| "step": 3290 | |
| }, | |
| { | |
| "acc": 0.8895, | |
| "epoch": 3.99, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2705, | |
| "rl_loss": 0.0, | |
| "step": 3300 | |
| }, | |
| { | |
| "acc": 0.9437, | |
| "epoch": 4.0, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2833, | |
| "rl_loss": 0.0, | |
| "step": 3310 | |
| }, | |
| { | |
| "acc": 0.8984, | |
| "epoch": 4.01, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2508, | |
| "rl_loss": 0.0, | |
| "step": 3320 | |
| }, | |
| { | |
| "acc": 0.9098, | |
| "epoch": 4.03, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2356, | |
| "rl_loss": 0.0, | |
| "step": 3330 | |
| }, | |
| { | |
| "acc": 0.8898, | |
| "epoch": 4.04, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2654, | |
| "rl_loss": 0.0, | |
| "step": 3340 | |
| }, | |
| { | |
| "acc": 0.8895, | |
| "epoch": 4.05, | |
| "learning_rate": 5e-05, | |
| "loss": 0.249, | |
| "rl_loss": 0.0, | |
| "step": 3350 | |
| }, | |
| { | |
| "acc": 0.8859, | |
| "epoch": 4.06, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2751, | |
| "rl_loss": 0.0, | |
| "step": 3360 | |
| }, | |
| { | |
| "acc": 0.9043, | |
| "epoch": 4.07, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2353, | |
| "rl_loss": 0.0, | |
| "step": 3370 | |
| }, | |
| { | |
| "acc": 0.8918, | |
| "epoch": 4.09, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2566, | |
| "rl_loss": 0.0, | |
| "step": 3380 | |
| }, | |
| { | |
| "acc": 0.8863, | |
| "epoch": 4.1, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2618, | |
| "rl_loss": 0.0, | |
| "step": 3390 | |
| }, | |
| { | |
| "acc": 0.8992, | |
| "epoch": 4.11, | |
| "learning_rate": 5e-05, | |
| "loss": 0.253, | |
| "rl_loss": 0.0, | |
| "step": 3400 | |
| }, | |
| { | |
| "acc": 0.891, | |
| "epoch": 4.12, | |
| "learning_rate": 5e-05, | |
| "loss": 0.251, | |
| "rl_loss": 0.0, | |
| "step": 3410 | |
| }, | |
| { | |
| "acc": 0.8875, | |
| "epoch": 4.14, | |
| "learning_rate": 5e-05, | |
| "loss": 0.265, | |
| "rl_loss": 0.0, | |
| "step": 3420 | |
| }, | |
| { | |
| "acc": 0.8914, | |
| "epoch": 4.15, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2546, | |
| "rl_loss": 0.0, | |
| "step": 3430 | |
| }, | |
| { | |
| "acc": 0.8871, | |
| "epoch": 4.16, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2697, | |
| "rl_loss": 0.0, | |
| "step": 3440 | |
| }, | |
| { | |
| "acc": 0.8859, | |
| "epoch": 4.17, | |
| "learning_rate": 5e-05, | |
| "loss": 0.265, | |
| "rl_loss": 0.0, | |
| "step": 3450 | |
| }, | |
| { | |
| "acc": 0.8902, | |
| "epoch": 4.18, | |
| "learning_rate": 5e-05, | |
| "loss": 0.251, | |
| "rl_loss": 0.0, | |
| "step": 3460 | |
| }, | |
| { | |
| "acc": 0.8832, | |
| "epoch": 4.2, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2828, | |
| "rl_loss": 0.0, | |
| "step": 3470 | |
| }, | |
| { | |
| "acc": 0.8781, | |
| "epoch": 4.21, | |
| "learning_rate": 5e-05, | |
| "loss": 0.287, | |
| "rl_loss": 0.0, | |
| "step": 3480 | |
| }, | |
| { | |
| "acc": 0.8988, | |
| "epoch": 4.22, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2476, | |
| "rl_loss": 0.0, | |
| "step": 3490 | |
| }, | |
| { | |
| "acc": 0.8953, | |
| "epoch": 4.23, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2549, | |
| "rl_loss": 0.0, | |
| "step": 3500 | |
| }, | |
| { | |
| "acc": 0.8957, | |
| "epoch": 4.24, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2506, | |
| "rl_loss": 0.0, | |
| "step": 3510 | |
| }, | |
| { | |
| "acc": 0.8953, | |
| "epoch": 4.26, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2428, | |
| "rl_loss": 0.0, | |
| "step": 3520 | |
| }, | |
| { | |
| "acc": 0.8863, | |
| "epoch": 4.27, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2586, | |
| "rl_loss": 0.0, | |
| "step": 3530 | |
| }, | |
| { | |
| "acc": 0.8969, | |
| "epoch": 4.28, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2487, | |
| "rl_loss": 0.0, | |
| "step": 3540 | |
| }, | |
| { | |
| "acc": 0.8898, | |
| "epoch": 4.29, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2643, | |
| "rl_loss": 0.0, | |
| "step": 3550 | |
| }, | |
| { | |
| "acc": 0.8984, | |
| "epoch": 4.3, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2455, | |
| "rl_loss": 0.0, | |
| "step": 3560 | |
| }, | |
| { | |
| "acc": 0.8891, | |
| "epoch": 4.32, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2676, | |
| "rl_loss": 0.0, | |
| "step": 3570 | |
| }, | |
| { | |
| "acc": 0.8957, | |
| "epoch": 4.33, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2598, | |
| "rl_loss": 0.0, | |
| "step": 3580 | |
| }, | |
| { | |
| "acc": 0.9004, | |
| "epoch": 4.34, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2513, | |
| "rl_loss": 0.0, | |
| "step": 3590 | |
| }, | |
| { | |
| "acc": 0.8902, | |
| "epoch": 4.35, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2581, | |
| "rl_loss": 0.0, | |
| "step": 3600 | |
| }, | |
| { | |
| "acc": 0.8895, | |
| "epoch": 4.36, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2816, | |
| "rl_loss": 0.0, | |
| "step": 3610 | |
| }, | |
| { | |
| "acc": 0.8949, | |
| "epoch": 4.38, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2582, | |
| "rl_loss": 0.0, | |
| "step": 3620 | |
| }, | |
| { | |
| "acc": 0.8984, | |
| "epoch": 4.39, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2431, | |
| "rl_loss": 0.0, | |
| "step": 3630 | |
| }, | |
| { | |
| "acc": 0.8895, | |
| "epoch": 4.4, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2572, | |
| "rl_loss": 0.0, | |
| "step": 3640 | |
| }, | |
| { | |
| "acc": 0.8934, | |
| "epoch": 4.41, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2538, | |
| "rl_loss": 0.0, | |
| "step": 3650 | |
| }, | |
| { | |
| "acc": 0.8914, | |
| "epoch": 4.43, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2577, | |
| "rl_loss": 0.0, | |
| "step": 3660 | |
| }, | |
| { | |
| "acc": 0.8992, | |
| "epoch": 4.44, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2354, | |
| "rl_loss": 0.0, | |
| "step": 3670 | |
| }, | |
| { | |
| "acc": 0.8875, | |
| "epoch": 4.45, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2542, | |
| "rl_loss": 0.0, | |
| "step": 3680 | |
| }, | |
| { | |
| "acc": 0.8977, | |
| "epoch": 4.46, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2467, | |
| "rl_loss": 0.0, | |
| "step": 3690 | |
| }, | |
| { | |
| "acc": 0.8984, | |
| "epoch": 4.47, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2433, | |
| "rl_loss": 0.0, | |
| "step": 3700 | |
| }, | |
| { | |
| "acc": 0.8902, | |
| "epoch": 4.49, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2578, | |
| "rl_loss": 0.0, | |
| "step": 3710 | |
| }, | |
| { | |
| "acc": 0.8887, | |
| "epoch": 4.5, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2677, | |
| "rl_loss": 0.0, | |
| "step": 3720 | |
| }, | |
| { | |
| "acc": 0.8961, | |
| "epoch": 4.51, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2475, | |
| "rl_loss": 0.0, | |
| "step": 3730 | |
| }, | |
| { | |
| "acc": 0.8992, | |
| "epoch": 4.52, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2458, | |
| "rl_loss": 0.0, | |
| "step": 3740 | |
| }, | |
| { | |
| "acc": 0.8848, | |
| "epoch": 4.53, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2748, | |
| "rl_loss": 0.0, | |
| "step": 3750 | |
| }, | |
| { | |
| "acc": 0.882, | |
| "epoch": 4.55, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2737, | |
| "rl_loss": 0.0, | |
| "step": 3760 | |
| }, | |
| { | |
| "acc": 0.8887, | |
| "epoch": 4.56, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2705, | |
| "rl_loss": 0.0, | |
| "step": 3770 | |
| }, | |
| { | |
| "acc": 0.8902, | |
| "epoch": 4.57, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2595, | |
| "rl_loss": 0.0, | |
| "step": 3780 | |
| }, | |
| { | |
| "acc": 0.8816, | |
| "epoch": 4.58, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2742, | |
| "rl_loss": 0.0, | |
| "step": 3790 | |
| }, | |
| { | |
| "acc": 0.8969, | |
| "epoch": 4.59, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2627, | |
| "rl_loss": 0.0, | |
| "step": 3800 | |
| }, | |
| { | |
| "acc": 0.8887, | |
| "epoch": 4.61, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2675, | |
| "rl_loss": 0.0, | |
| "step": 3810 | |
| }, | |
| { | |
| "acc": 0.8824, | |
| "epoch": 4.62, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2725, | |
| "rl_loss": 0.0, | |
| "step": 3820 | |
| }, | |
| { | |
| "acc": 0.8867, | |
| "epoch": 4.63, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2694, | |
| "rl_loss": 0.0, | |
| "step": 3830 | |
| }, | |
| { | |
| "acc": 0.8832, | |
| "epoch": 4.64, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2744, | |
| "rl_loss": 0.0, | |
| "step": 3840 | |
| }, | |
| { | |
| "acc": 0.8953, | |
| "epoch": 4.65, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2611, | |
| "rl_loss": 0.0, | |
| "step": 3850 | |
| }, | |
| { | |
| "acc": 0.8977, | |
| "epoch": 4.67, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2499, | |
| "rl_loss": 0.0, | |
| "step": 3860 | |
| }, | |
| { | |
| "acc": 0.8816, | |
| "epoch": 4.68, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2717, | |
| "rl_loss": 0.0, | |
| "step": 3870 | |
| }, | |
| { | |
| "acc": 0.8891, | |
| "epoch": 4.69, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2557, | |
| "rl_loss": 0.0, | |
| "step": 3880 | |
| }, | |
| { | |
| "acc": 0.8789, | |
| "epoch": 4.7, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2706, | |
| "rl_loss": 0.0, | |
| "step": 3890 | |
| }, | |
| { | |
| "acc": 0.8863, | |
| "epoch": 4.72, | |
| "learning_rate": 5e-05, | |
| "loss": 0.267, | |
| "rl_loss": 0.0, | |
| "step": 3900 | |
| }, | |
| { | |
| "acc": 0.8914, | |
| "epoch": 4.73, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2596, | |
| "rl_loss": 0.0, | |
| "step": 3910 | |
| }, | |
| { | |
| "acc": 0.8855, | |
| "epoch": 4.74, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2613, | |
| "rl_loss": 0.0, | |
| "step": 3920 | |
| }, | |
| { | |
| "acc": 0.8934, | |
| "epoch": 4.75, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2509, | |
| "rl_loss": 0.0, | |
| "step": 3930 | |
| }, | |
| { | |
| "acc": 0.8883, | |
| "epoch": 4.76, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2635, | |
| "rl_loss": 0.0, | |
| "step": 3940 | |
| }, | |
| { | |
| "acc": 0.8918, | |
| "epoch": 4.78, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2614, | |
| "rl_loss": 0.0, | |
| "step": 3950 | |
| }, | |
| { | |
| "acc": 0.8992, | |
| "epoch": 4.79, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2475, | |
| "rl_loss": 0.0, | |
| "step": 3960 | |
| }, | |
| { | |
| "acc": 0.8961, | |
| "epoch": 4.8, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2516, | |
| "rl_loss": 0.0, | |
| "step": 3970 | |
| }, | |
| { | |
| "acc": 0.9039, | |
| "epoch": 4.81, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2313, | |
| "rl_loss": 0.0, | |
| "step": 3980 | |
| }, | |
| { | |
| "acc": 0.8922, | |
| "epoch": 4.82, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2563, | |
| "rl_loss": 0.0, | |
| "step": 3990 | |
| }, | |
| { | |
| "acc": 0.8879, | |
| "epoch": 4.84, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2607, | |
| "rl_loss": 0.0, | |
| "step": 4000 | |
| }, | |
| { | |
| "acc": 0.8879, | |
| "epoch": 4.85, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2664, | |
| "rl_loss": 0.0, | |
| "step": 4010 | |
| }, | |
| { | |
| "acc": 0.8879, | |
| "epoch": 4.86, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2573, | |
| "rl_loss": 0.0, | |
| "step": 4020 | |
| }, | |
| { | |
| "acc": 0.8918, | |
| "epoch": 4.87, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2436, | |
| "rl_loss": 0.0, | |
| "step": 4030 | |
| }, | |
| { | |
| "acc": 0.891, | |
| "epoch": 4.88, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2606, | |
| "rl_loss": 0.0, | |
| "step": 4040 | |
| }, | |
| { | |
| "acc": 0.8879, | |
| "epoch": 4.9, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2555, | |
| "rl_loss": 0.0, | |
| "step": 4050 | |
| }, | |
| { | |
| "acc": 0.8969, | |
| "epoch": 4.91, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2581, | |
| "rl_loss": 0.0, | |
| "step": 4060 | |
| }, | |
| { | |
| "acc": 0.8906, | |
| "epoch": 4.92, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2567, | |
| "rl_loss": 0.0, | |
| "step": 4070 | |
| }, | |
| { | |
| "acc": 0.9043, | |
| "epoch": 4.93, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2432, | |
| "rl_loss": 0.0, | |
| "step": 4080 | |
| }, | |
| { | |
| "acc": 0.8934, | |
| "epoch": 4.94, | |
| "learning_rate": 5e-05, | |
| "loss": 0.255, | |
| "rl_loss": 0.0, | |
| "step": 4090 | |
| }, | |
| { | |
| "acc": 0.8977, | |
| "epoch": 4.96, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2382, | |
| "rl_loss": 0.0, | |
| "step": 4100 | |
| }, | |
| { | |
| "acc": 0.8926, | |
| "epoch": 4.97, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2566, | |
| "rl_loss": 0.0, | |
| "step": 4110 | |
| }, | |
| { | |
| "acc": 0.8875, | |
| "epoch": 4.98, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2645, | |
| "rl_loss": 0.0, | |
| "step": 4120 | |
| }, | |
| { | |
| "acc": 0.8984, | |
| "epoch": 4.99, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2474, | |
| "rl_loss": 0.0, | |
| "step": 4130 | |
| }, | |
| { | |
| "acc": 0.9531, | |
| "epoch": 5.01, | |
| "learning_rate": 5e-05, | |
| "loss": 0.251, | |
| "rl_loss": 0.0, | |
| "step": 4140 | |
| }, | |
| { | |
| "acc": 0.891, | |
| "epoch": 5.02, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2551, | |
| "rl_loss": 0.0, | |
| "step": 4150 | |
| }, | |
| { | |
| "acc": 0.9059, | |
| "epoch": 5.03, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2359, | |
| "rl_loss": 0.0, | |
| "step": 4160 | |
| }, | |
| { | |
| "acc": 0.8883, | |
| "epoch": 5.04, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2504, | |
| "rl_loss": 0.0, | |
| "step": 4170 | |
| }, | |
| { | |
| "acc": 0.8984, | |
| "epoch": 5.05, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2486, | |
| "rl_loss": 0.0, | |
| "step": 4180 | |
| }, | |
| { | |
| "acc": 0.8977, | |
| "epoch": 5.07, | |
| "learning_rate": 5e-05, | |
| "loss": 0.233, | |
| "rl_loss": 0.0, | |
| "step": 4190 | |
| }, | |
| { | |
| "acc": 0.8938, | |
| "epoch": 5.08, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2506, | |
| "rl_loss": 0.0, | |
| "step": 4200 | |
| }, | |
| { | |
| "acc": 0.902, | |
| "epoch": 5.09, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2227, | |
| "rl_loss": 0.0, | |
| "step": 4210 | |
| }, | |
| { | |
| "acc": 0.9086, | |
| "epoch": 5.1, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2308, | |
| "rl_loss": 0.0, | |
| "step": 4220 | |
| }, | |
| { | |
| "acc": 0.8887, | |
| "epoch": 5.11, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2545, | |
| "rl_loss": 0.0, | |
| "step": 4230 | |
| }, | |
| { | |
| "acc": 0.8957, | |
| "epoch": 5.13, | |
| "learning_rate": 5e-05, | |
| "loss": 0.248, | |
| "rl_loss": 0.0, | |
| "step": 4240 | |
| }, | |
| { | |
| "acc": 0.898, | |
| "epoch": 5.14, | |
| "learning_rate": 5e-05, | |
| "loss": 0.243, | |
| "rl_loss": 0.0, | |
| "step": 4250 | |
| }, | |
| { | |
| "acc": 0.9031, | |
| "epoch": 5.15, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2238, | |
| "rl_loss": 0.0, | |
| "step": 4260 | |
| }, | |
| { | |
| "acc": 0.9031, | |
| "epoch": 5.16, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2275, | |
| "rl_loss": 0.0, | |
| "step": 4270 | |
| }, | |
| { | |
| "acc": 0.9055, | |
| "epoch": 5.18, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2199, | |
| "rl_loss": 0.0, | |
| "step": 4280 | |
| }, | |
| { | |
| "acc": 0.8906, | |
| "epoch": 5.19, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2551, | |
| "rl_loss": 0.0, | |
| "step": 4290 | |
| }, | |
| { | |
| "acc": 0.8957, | |
| "epoch": 5.2, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2463, | |
| "rl_loss": 0.0, | |
| "step": 4300 | |
| }, | |
| { | |
| "acc": 0.8953, | |
| "epoch": 5.21, | |
| "learning_rate": 5e-05, | |
| "loss": 0.257, | |
| "rl_loss": 0.0, | |
| "step": 4310 | |
| }, | |
| { | |
| "acc": 0.8992, | |
| "epoch": 5.22, | |
| "learning_rate": 5e-05, | |
| "loss": 0.236, | |
| "rl_loss": 0.0, | |
| "step": 4320 | |
| }, | |
| { | |
| "acc": 0.8914, | |
| "epoch": 5.24, | |
| "learning_rate": 5e-05, | |
| "loss": 0.246, | |
| "rl_loss": 0.0, | |
| "step": 4330 | |
| }, | |
| { | |
| "acc": 0.8902, | |
| "epoch": 5.25, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2595, | |
| "rl_loss": 0.0, | |
| "step": 4340 | |
| }, | |
| { | |
| "acc": 0.891, | |
| "epoch": 5.26, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2433, | |
| "rl_loss": 0.0, | |
| "step": 4350 | |
| }, | |
| { | |
| "acc": 0.9, | |
| "epoch": 5.27, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2368, | |
| "rl_loss": 0.0, | |
| "step": 4360 | |
| }, | |
| { | |
| "acc": 0.8992, | |
| "epoch": 5.28, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2393, | |
| "rl_loss": 0.0, | |
| "step": 4370 | |
| }, | |
| { | |
| "acc": 0.8926, | |
| "epoch": 5.3, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2545, | |
| "rl_loss": 0.0, | |
| "step": 4380 | |
| }, | |
| { | |
| "acc": 0.902, | |
| "epoch": 5.31, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2364, | |
| "rl_loss": 0.0, | |
| "step": 4390 | |
| }, | |
| { | |
| "acc": 0.8969, | |
| "epoch": 5.32, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2378, | |
| "rl_loss": 0.0, | |
| "step": 4400 | |
| }, | |
| { | |
| "acc": 0.8922, | |
| "epoch": 5.33, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2647, | |
| "rl_loss": 0.0, | |
| "step": 4410 | |
| }, | |
| { | |
| "acc": 0.9, | |
| "epoch": 5.34, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2338, | |
| "rl_loss": 0.0, | |
| "step": 4420 | |
| }, | |
| { | |
| "acc": 0.8969, | |
| "epoch": 5.36, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2327, | |
| "rl_loss": 0.0, | |
| "step": 4430 | |
| }, | |
| { | |
| "acc": 0.9, | |
| "epoch": 5.37, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2393, | |
| "rl_loss": 0.0, | |
| "step": 4440 | |
| }, | |
| { | |
| "acc": 0.8922, | |
| "epoch": 5.38, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2557, | |
| "rl_loss": 0.0, | |
| "step": 4450 | |
| }, | |
| { | |
| "acc": 0.9098, | |
| "epoch": 5.39, | |
| "learning_rate": 5e-05, | |
| "loss": 0.233, | |
| "rl_loss": 0.0, | |
| "step": 4460 | |
| }, | |
| { | |
| "acc": 0.8977, | |
| "epoch": 5.4, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2526, | |
| "rl_loss": 0.0, | |
| "step": 4470 | |
| }, | |
| { | |
| "acc": 0.8926, | |
| "epoch": 5.42, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2476, | |
| "rl_loss": 0.0, | |
| "step": 4480 | |
| }, | |
| { | |
| "acc": 0.8945, | |
| "epoch": 5.43, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2539, | |
| "rl_loss": 0.0, | |
| "step": 4490 | |
| }, | |
| { | |
| "acc": 0.8844, | |
| "epoch": 5.44, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2559, | |
| "rl_loss": 0.0, | |
| "step": 4500 | |
| }, | |
| { | |
| "acc": 0.9023, | |
| "epoch": 5.45, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2292, | |
| "rl_loss": 0.0, | |
| "step": 4510 | |
| }, | |
| { | |
| "acc": 0.9031, | |
| "epoch": 5.47, | |
| "learning_rate": 5e-05, | |
| "loss": 0.23, | |
| "rl_loss": 0.0, | |
| "step": 4520 | |
| }, | |
| { | |
| "acc": 0.898, | |
| "epoch": 5.48, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2446, | |
| "rl_loss": 0.0, | |
| "step": 4530 | |
| }, | |
| { | |
| "acc": 0.8969, | |
| "epoch": 5.49, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2518, | |
| "rl_loss": 0.0, | |
| "step": 4540 | |
| }, | |
| { | |
| "acc": 0.9004, | |
| "epoch": 5.5, | |
| "learning_rate": 5e-05, | |
| "loss": 0.24, | |
| "rl_loss": 0.0, | |
| "step": 4550 | |
| }, | |
| { | |
| "acc": 0.8969, | |
| "epoch": 5.51, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2391, | |
| "rl_loss": 0.0, | |
| "step": 4560 | |
| }, | |
| { | |
| "acc": 0.8965, | |
| "epoch": 5.53, | |
| "learning_rate": 5e-05, | |
| "loss": 0.262, | |
| "rl_loss": 0.0, | |
| "step": 4570 | |
| }, | |
| { | |
| "acc": 0.9035, | |
| "epoch": 5.54, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2338, | |
| "rl_loss": 0.0, | |
| "step": 4580 | |
| }, | |
| { | |
| "acc": 0.9094, | |
| "epoch": 5.55, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2321, | |
| "rl_loss": 0.0, | |
| "step": 4590 | |
| }, | |
| { | |
| "acc": 0.8965, | |
| "epoch": 5.56, | |
| "learning_rate": 5e-05, | |
| "loss": 0.242, | |
| "rl_loss": 0.0, | |
| "step": 4600 | |
| }, | |
| { | |
| "acc": 0.8883, | |
| "epoch": 5.57, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2548, | |
| "rl_loss": 0.0, | |
| "step": 4610 | |
| }, | |
| { | |
| "acc": 0.8957, | |
| "epoch": 5.59, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2387, | |
| "rl_loss": 0.0, | |
| "step": 4620 | |
| }, | |
| { | |
| "acc": 0.8867, | |
| "epoch": 5.6, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2529, | |
| "rl_loss": 0.0, | |
| "step": 4630 | |
| }, | |
| { | |
| "acc": 0.8957, | |
| "epoch": 5.61, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2482, | |
| "rl_loss": 0.0, | |
| "step": 4640 | |
| }, | |
| { | |
| "acc": 0.8977, | |
| "epoch": 5.62, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2325, | |
| "rl_loss": 0.0, | |
| "step": 4650 | |
| }, | |
| { | |
| "acc": 0.8844, | |
| "epoch": 5.63, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2644, | |
| "rl_loss": 0.0, | |
| "step": 4660 | |
| }, | |
| { | |
| "acc": 0.8938, | |
| "epoch": 5.65, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2488, | |
| "rl_loss": 0.0, | |
| "step": 4670 | |
| }, | |
| { | |
| "acc": 0.8977, | |
| "epoch": 5.66, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2538, | |
| "rl_loss": 0.0, | |
| "step": 4680 | |
| }, | |
| { | |
| "acc": 0.8918, | |
| "epoch": 5.67, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2407, | |
| "rl_loss": 0.0, | |
| "step": 4690 | |
| }, | |
| { | |
| "acc": 0.9016, | |
| "epoch": 5.68, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2327, | |
| "rl_loss": 0.0, | |
| "step": 4700 | |
| }, | |
| { | |
| "acc": 0.891, | |
| "epoch": 5.69, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2525, | |
| "rl_loss": 0.0, | |
| "step": 4710 | |
| }, | |
| { | |
| "acc": 0.8918, | |
| "epoch": 5.71, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2531, | |
| "rl_loss": 0.0, | |
| "step": 4720 | |
| }, | |
| { | |
| "acc": 0.9016, | |
| "epoch": 5.72, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2374, | |
| "rl_loss": 0.0, | |
| "step": 4730 | |
| }, | |
| { | |
| "acc": 0.907, | |
| "epoch": 5.73, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2235, | |
| "rl_loss": 0.0, | |
| "step": 4740 | |
| }, | |
| { | |
| "acc": 0.8961, | |
| "epoch": 5.74, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2478, | |
| "rl_loss": 0.0, | |
| "step": 4750 | |
| }, | |
| { | |
| "acc": 0.8973, | |
| "epoch": 5.76, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2462, | |
| "rl_loss": 0.0, | |
| "step": 4760 | |
| }, | |
| { | |
| "acc": 0.891, | |
| "epoch": 5.77, | |
| "learning_rate": 5e-05, | |
| "loss": 0.244, | |
| "rl_loss": 0.0, | |
| "step": 4770 | |
| }, | |
| { | |
| "acc": 0.8969, | |
| "epoch": 5.78, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2471, | |
| "rl_loss": 0.0, | |
| "step": 4780 | |
| }, | |
| { | |
| "acc": 0.8949, | |
| "epoch": 5.79, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2392, | |
| "rl_loss": 0.0, | |
| "step": 4790 | |
| }, | |
| { | |
| "acc": 0.8895, | |
| "epoch": 5.8, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2376, | |
| "rl_loss": 0.0, | |
| "step": 4800 | |
| }, | |
| { | |
| "acc": 0.9039, | |
| "epoch": 5.82, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2312, | |
| "rl_loss": 0.0, | |
| "step": 4810 | |
| }, | |
| { | |
| "acc": 0.8938, | |
| "epoch": 5.83, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2433, | |
| "rl_loss": 0.0, | |
| "step": 4820 | |
| }, | |
| { | |
| "acc": 0.8945, | |
| "epoch": 5.84, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2596, | |
| "rl_loss": 0.0, | |
| "step": 4830 | |
| }, | |
| { | |
| "acc": 0.8965, | |
| "epoch": 5.85, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2499, | |
| "rl_loss": 0.0, | |
| "step": 4840 | |
| }, | |
| { | |
| "acc": 0.8895, | |
| "epoch": 5.86, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2508, | |
| "rl_loss": 0.0, | |
| "step": 4850 | |
| }, | |
| { | |
| "acc": 0.8945, | |
| "epoch": 5.88, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2476, | |
| "rl_loss": 0.0, | |
| "step": 4860 | |
| }, | |
| { | |
| "acc": 0.9012, | |
| "epoch": 5.89, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2424, | |
| "rl_loss": 0.0, | |
| "step": 4870 | |
| }, | |
| { | |
| "acc": 0.8836, | |
| "epoch": 5.9, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2593, | |
| "rl_loss": 0.0, | |
| "step": 4880 | |
| }, | |
| { | |
| "acc": 0.8977, | |
| "epoch": 5.91, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2457, | |
| "rl_loss": 0.0, | |
| "step": 4890 | |
| }, | |
| { | |
| "acc": 0.8949, | |
| "epoch": 5.92, | |
| "learning_rate": 5e-05, | |
| "loss": 0.239, | |
| "rl_loss": 0.0, | |
| "step": 4900 | |
| }, | |
| { | |
| "acc": 0.8844, | |
| "epoch": 5.94, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2577, | |
| "rl_loss": 0.0, | |
| "step": 4910 | |
| }, | |
| { | |
| "acc": 0.9066, | |
| "epoch": 5.95, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2272, | |
| "rl_loss": 0.0, | |
| "step": 4920 | |
| }, | |
| { | |
| "acc": 0.9008, | |
| "epoch": 5.96, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2405, | |
| "rl_loss": 0.0, | |
| "step": 4930 | |
| }, | |
| { | |
| "acc": 0.8945, | |
| "epoch": 5.97, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2507, | |
| "rl_loss": 0.0, | |
| "step": 4940 | |
| }, | |
| { | |
| "acc": 0.8969, | |
| "epoch": 5.98, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2401, | |
| "rl_loss": 0.0, | |
| "step": 4950 | |
| }, | |
| { | |
| "acc": 0.9027, | |
| "epoch": 6.0, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2287, | |
| "rl_loss": 0.0, | |
| "step": 4960 | |
| }, | |
| { | |
| "acc": 0.9559, | |
| "epoch": 6.01, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2473, | |
| "rl_loss": 0.0, | |
| "step": 4970 | |
| }, | |
| { | |
| "acc": 0.8988, | |
| "epoch": 6.02, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2334, | |
| "rl_loss": 0.0, | |
| "step": 4980 | |
| }, | |
| { | |
| "acc": 0.9133, | |
| "epoch": 6.03, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2112, | |
| "rl_loss": 0.0, | |
| "step": 4990 | |
| }, | |
| { | |
| "acc": 0.9094, | |
| "epoch": 6.05, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2097, | |
| "rl_loss": 0.0, | |
| "step": 5000 | |
| }, | |
| { | |
| "acc": 0.9023, | |
| "epoch": 6.06, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2458, | |
| "rl_loss": 0.0, | |
| "step": 5010 | |
| }, | |
| { | |
| "acc": 0.9195, | |
| "epoch": 6.07, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2143, | |
| "rl_loss": 0.0, | |
| "step": 5020 | |
| }, | |
| { | |
| "acc": 0.9078, | |
| "epoch": 6.08, | |
| "learning_rate": 5e-05, | |
| "loss": 0.22, | |
| "rl_loss": 0.0, | |
| "step": 5030 | |
| }, | |
| { | |
| "acc": 0.9043, | |
| "epoch": 6.09, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2333, | |
| "rl_loss": 0.0, | |
| "step": 5040 | |
| }, | |
| { | |
| "acc": 0.8973, | |
| "epoch": 6.11, | |
| "learning_rate": 5e-05, | |
| "loss": 0.233, | |
| "rl_loss": 0.0, | |
| "step": 5050 | |
| }, | |
| { | |
| "acc": 0.9027, | |
| "epoch": 6.12, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2356, | |
| "rl_loss": 0.0, | |
| "step": 5060 | |
| }, | |
| { | |
| "acc": 0.8977, | |
| "epoch": 6.13, | |
| "learning_rate": 5e-05, | |
| "loss": 0.236, | |
| "rl_loss": 0.0, | |
| "step": 5070 | |
| }, | |
| { | |
| "acc": 0.8992, | |
| "epoch": 6.14, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2319, | |
| "rl_loss": 0.0, | |
| "step": 5080 | |
| }, | |
| { | |
| "acc": 0.8957, | |
| "epoch": 6.15, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2436, | |
| "rl_loss": 0.0, | |
| "step": 5090 | |
| }, | |
| { | |
| "acc": 0.8949, | |
| "epoch": 6.17, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2317, | |
| "rl_loss": 0.0, | |
| "step": 5100 | |
| }, | |
| { | |
| "acc": 0.9184, | |
| "epoch": 6.18, | |
| "learning_rate": 5e-05, | |
| "loss": 0.202, | |
| "rl_loss": 0.0, | |
| "step": 5110 | |
| }, | |
| { | |
| "acc": 0.9023, | |
| "epoch": 6.19, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2157, | |
| "rl_loss": 0.0, | |
| "step": 5120 | |
| }, | |
| { | |
| "acc": 0.8914, | |
| "epoch": 6.2, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2443, | |
| "rl_loss": 0.0, | |
| "step": 5130 | |
| }, | |
| { | |
| "acc": 0.909, | |
| "epoch": 6.22, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2188, | |
| "rl_loss": 0.0, | |
| "step": 5140 | |
| }, | |
| { | |
| "acc": 0.8992, | |
| "epoch": 6.23, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2261, | |
| "rl_loss": 0.0, | |
| "step": 5150 | |
| }, | |
| { | |
| "acc": 0.8934, | |
| "epoch": 6.24, | |
| "learning_rate": 5e-05, | |
| "loss": 0.239, | |
| "rl_loss": 0.0, | |
| "step": 5160 | |
| }, | |
| { | |
| "acc": 0.8957, | |
| "epoch": 6.25, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2425, | |
| "rl_loss": 0.0, | |
| "step": 5170 | |
| }, | |
| { | |
| "acc": 0.9023, | |
| "epoch": 6.26, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2338, | |
| "rl_loss": 0.0, | |
| "step": 5180 | |
| }, | |
| { | |
| "acc": 0.9, | |
| "epoch": 6.28, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2321, | |
| "rl_loss": 0.0, | |
| "step": 5190 | |
| }, | |
| { | |
| "acc": 0.9, | |
| "epoch": 6.29, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2441, | |
| "rl_loss": 0.0, | |
| "step": 5200 | |
| }, | |
| { | |
| "acc": 0.8926, | |
| "epoch": 6.3, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2302, | |
| "rl_loss": 0.0, | |
| "step": 5210 | |
| }, | |
| { | |
| "acc": 0.8953, | |
| "epoch": 6.31, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2384, | |
| "rl_loss": 0.0, | |
| "step": 5220 | |
| }, | |
| { | |
| "acc": 0.9062, | |
| "epoch": 6.32, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2246, | |
| "rl_loss": 0.0, | |
| "step": 5230 | |
| }, | |
| { | |
| "acc": 0.893, | |
| "epoch": 6.34, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2463, | |
| "rl_loss": 0.0, | |
| "step": 5240 | |
| }, | |
| { | |
| "acc": 0.9016, | |
| "epoch": 6.35, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2235, | |
| "rl_loss": 0.0, | |
| "step": 5250 | |
| }, | |
| { | |
| "acc": 0.9, | |
| "epoch": 6.36, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2359, | |
| "rl_loss": 0.0, | |
| "step": 5260 | |
| }, | |
| { | |
| "acc": 0.8973, | |
| "epoch": 6.37, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2371, | |
| "rl_loss": 0.0, | |
| "step": 5270 | |
| }, | |
| { | |
| "acc": 0.9094, | |
| "epoch": 6.38, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2097, | |
| "rl_loss": 0.0, | |
| "step": 5280 | |
| }, | |
| { | |
| "acc": 0.9082, | |
| "epoch": 6.4, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2312, | |
| "rl_loss": 0.0, | |
| "step": 5290 | |
| }, | |
| { | |
| "acc": 0.9008, | |
| "epoch": 6.41, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2338, | |
| "rl_loss": 0.0, | |
| "step": 5300 | |
| }, | |
| { | |
| "acc": 0.8855, | |
| "epoch": 6.42, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2573, | |
| "rl_loss": 0.0, | |
| "step": 5310 | |
| }, | |
| { | |
| "acc": 0.8965, | |
| "epoch": 6.43, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2454, | |
| "rl_loss": 0.0, | |
| "step": 5320 | |
| }, | |
| { | |
| "acc": 0.9, | |
| "epoch": 6.44, | |
| "learning_rate": 5e-05, | |
| "loss": 0.24, | |
| "rl_loss": 0.0, | |
| "step": 5330 | |
| }, | |
| { | |
| "acc": 0.8977, | |
| "epoch": 6.46, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2316, | |
| "rl_loss": 0.0, | |
| "step": 5340 | |
| }, | |
| { | |
| "acc": 0.9121, | |
| "epoch": 6.47, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2175, | |
| "rl_loss": 0.0, | |
| "step": 5350 | |
| }, | |
| { | |
| "acc": 0.9051, | |
| "epoch": 6.48, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2355, | |
| "rl_loss": 0.0, | |
| "step": 5360 | |
| }, | |
| { | |
| "acc": 0.9086, | |
| "epoch": 6.49, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2235, | |
| "rl_loss": 0.0, | |
| "step": 5370 | |
| }, | |
| { | |
| "acc": 0.8969, | |
| "epoch": 6.51, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2357, | |
| "rl_loss": 0.0, | |
| "step": 5380 | |
| }, | |
| { | |
| "acc": 0.898, | |
| "epoch": 6.52, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2331, | |
| "rl_loss": 0.0, | |
| "step": 5390 | |
| }, | |
| { | |
| "acc": 0.9023, | |
| "epoch": 6.53, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2187, | |
| "rl_loss": 0.0, | |
| "step": 5400 | |
| }, | |
| { | |
| "acc": 0.9, | |
| "epoch": 6.54, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2406, | |
| "rl_loss": 0.0, | |
| "step": 5410 | |
| }, | |
| { | |
| "acc": 0.9066, | |
| "epoch": 6.55, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2271, | |
| "rl_loss": 0.0, | |
| "step": 5420 | |
| }, | |
| { | |
| "acc": 0.9055, | |
| "epoch": 6.57, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2255, | |
| "rl_loss": 0.0, | |
| "step": 5430 | |
| }, | |
| { | |
| "acc": 0.9074, | |
| "epoch": 6.58, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2219, | |
| "rl_loss": 0.0, | |
| "step": 5440 | |
| }, | |
| { | |
| "acc": 0.8992, | |
| "epoch": 6.59, | |
| "learning_rate": 5e-05, | |
| "loss": 0.23, | |
| "rl_loss": 0.0, | |
| "step": 5450 | |
| }, | |
| { | |
| "acc": 0.8965, | |
| "epoch": 6.6, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2299, | |
| "rl_loss": 0.0, | |
| "step": 5460 | |
| }, | |
| { | |
| "acc": 0.907, | |
| "epoch": 6.61, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2257, | |
| "rl_loss": 0.0, | |
| "step": 5470 | |
| }, | |
| { | |
| "acc": 0.9051, | |
| "epoch": 6.63, | |
| "learning_rate": 5e-05, | |
| "loss": 0.23, | |
| "rl_loss": 0.0, | |
| "step": 5480 | |
| }, | |
| { | |
| "acc": 0.8961, | |
| "epoch": 6.64, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2435, | |
| "rl_loss": 0.0, | |
| "step": 5490 | |
| }, | |
| { | |
| "acc": 0.8961, | |
| "epoch": 6.65, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2547, | |
| "rl_loss": 0.0, | |
| "step": 5500 | |
| }, | |
| { | |
| "acc": 0.9043, | |
| "epoch": 6.66, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2245, | |
| "rl_loss": 0.0, | |
| "step": 5510 | |
| }, | |
| { | |
| "acc": 0.893, | |
| "epoch": 6.67, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2431, | |
| "rl_loss": 0.0, | |
| "step": 5520 | |
| }, | |
| { | |
| "acc": 0.8969, | |
| "epoch": 6.69, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2322, | |
| "rl_loss": 0.0, | |
| "step": 5530 | |
| }, | |
| { | |
| "acc": 0.898, | |
| "epoch": 6.7, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2297, | |
| "rl_loss": 0.0, | |
| "step": 5540 | |
| }, | |
| { | |
| "acc": 0.8926, | |
| "epoch": 6.71, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2476, | |
| "rl_loss": 0.0, | |
| "step": 5550 | |
| }, | |
| { | |
| "acc": 0.9098, | |
| "epoch": 6.72, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2139, | |
| "rl_loss": 0.0, | |
| "step": 5560 | |
| }, | |
| { | |
| "acc": 0.8883, | |
| "epoch": 6.73, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2527, | |
| "rl_loss": 0.0, | |
| "step": 5570 | |
| }, | |
| { | |
| "acc": 0.8945, | |
| "epoch": 6.75, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2334, | |
| "rl_loss": 0.0, | |
| "step": 5580 | |
| }, | |
| { | |
| "acc": 0.9039, | |
| "epoch": 6.76, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2243, | |
| "rl_loss": 0.0, | |
| "step": 5590 | |
| }, | |
| { | |
| "acc": 0.8996, | |
| "epoch": 6.77, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2365, | |
| "rl_loss": 0.0, | |
| "step": 5600 | |
| }, | |
| { | |
| "acc": 0.9047, | |
| "epoch": 6.78, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2334, | |
| "rl_loss": 0.0, | |
| "step": 5610 | |
| }, | |
| { | |
| "acc": 0.9027, | |
| "epoch": 6.8, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2357, | |
| "rl_loss": 0.0, | |
| "step": 5620 | |
| }, | |
| { | |
| "acc": 0.9148, | |
| "epoch": 6.81, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2108, | |
| "rl_loss": 0.0, | |
| "step": 5630 | |
| }, | |
| { | |
| "acc": 0.8828, | |
| "epoch": 6.82, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2616, | |
| "rl_loss": 0.0, | |
| "step": 5640 | |
| }, | |
| { | |
| "acc": 0.9043, | |
| "epoch": 6.83, | |
| "learning_rate": 5e-05, | |
| "loss": 0.236, | |
| "rl_loss": 0.0, | |
| "step": 5650 | |
| }, | |
| { | |
| "acc": 0.8945, | |
| "epoch": 6.84, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2441, | |
| "rl_loss": 0.0, | |
| "step": 5660 | |
| }, | |
| { | |
| "acc": 0.8984, | |
| "epoch": 6.86, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2485, | |
| "rl_loss": 0.0, | |
| "step": 5670 | |
| }, | |
| { | |
| "acc": 0.8867, | |
| "epoch": 6.87, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2446, | |
| "rl_loss": 0.0, | |
| "step": 5680 | |
| }, | |
| { | |
| "acc": 0.9062, | |
| "epoch": 6.88, | |
| "learning_rate": 5e-05, | |
| "loss": 0.232, | |
| "rl_loss": 0.0, | |
| "step": 5690 | |
| }, | |
| { | |
| "acc": 0.891, | |
| "epoch": 6.89, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2542, | |
| "rl_loss": 0.0, | |
| "step": 5700 | |
| }, | |
| { | |
| "acc": 0.8945, | |
| "epoch": 6.9, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2443, | |
| "rl_loss": 0.0, | |
| "step": 5710 | |
| }, | |
| { | |
| "acc": 0.8941, | |
| "epoch": 6.92, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2522, | |
| "rl_loss": 0.0, | |
| "step": 5720 | |
| }, | |
| { | |
| "acc": 0.9031, | |
| "epoch": 6.93, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2224, | |
| "rl_loss": 0.0, | |
| "step": 5730 | |
| }, | |
| { | |
| "acc": 0.9066, | |
| "epoch": 6.94, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2335, | |
| "rl_loss": 0.0, | |
| "step": 5740 | |
| }, | |
| { | |
| "acc": 0.9012, | |
| "epoch": 6.95, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2368, | |
| "rl_loss": 0.0, | |
| "step": 5750 | |
| }, | |
| { | |
| "acc": 0.8941, | |
| "epoch": 6.96, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2411, | |
| "rl_loss": 0.0, | |
| "step": 5760 | |
| }, | |
| { | |
| "acc": 0.8922, | |
| "epoch": 6.98, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2423, | |
| "rl_loss": 0.0, | |
| "step": 5770 | |
| }, | |
| { | |
| "acc": 0.8871, | |
| "epoch": 6.99, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2414, | |
| "rl_loss": 0.0, | |
| "step": 5780 | |
| }, | |
| { | |
| "acc": 0.9543, | |
| "epoch": 7.0, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2478, | |
| "rl_loss": 0.0, | |
| "step": 5790 | |
| }, | |
| { | |
| "acc": 0.9062, | |
| "epoch": 7.01, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2141, | |
| "rl_loss": 0.0, | |
| "step": 5800 | |
| }, | |
| { | |
| "acc": 0.9164, | |
| "epoch": 7.03, | |
| "learning_rate": 5e-05, | |
| "loss": 0.1963, | |
| "rl_loss": 0.0, | |
| "step": 5810 | |
| }, | |
| { | |
| "acc": 0.9086, | |
| "epoch": 7.04, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2166, | |
| "rl_loss": 0.0, | |
| "step": 5820 | |
| }, | |
| { | |
| "acc": 0.8938, | |
| "epoch": 7.05, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2319, | |
| "rl_loss": 0.0, | |
| "step": 5830 | |
| }, | |
| { | |
| "acc": 0.907, | |
| "epoch": 7.06, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2158, | |
| "rl_loss": 0.0, | |
| "step": 5840 | |
| }, | |
| { | |
| "acc": 0.9059, | |
| "epoch": 7.07, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2192, | |
| "rl_loss": 0.0, | |
| "step": 5850 | |
| }, | |
| { | |
| "acc": 0.9074, | |
| "epoch": 7.09, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2291, | |
| "rl_loss": 0.0, | |
| "step": 5860 | |
| }, | |
| { | |
| "acc": 0.9051, | |
| "epoch": 7.1, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2221, | |
| "rl_loss": 0.0, | |
| "step": 5870 | |
| }, | |
| { | |
| "acc": 0.9121, | |
| "epoch": 7.11, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2056, | |
| "rl_loss": 0.0, | |
| "step": 5880 | |
| }, | |
| { | |
| "acc": 0.8941, | |
| "epoch": 7.12, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2307, | |
| "rl_loss": 0.0, | |
| "step": 5890 | |
| }, | |
| { | |
| "acc": 0.8996, | |
| "epoch": 7.13, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2211, | |
| "rl_loss": 0.0, | |
| "step": 5900 | |
| }, | |
| { | |
| "acc": 0.9031, | |
| "epoch": 7.15, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2321, | |
| "rl_loss": 0.0, | |
| "step": 5910 | |
| }, | |
| { | |
| "acc": 0.8988, | |
| "epoch": 7.16, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2268, | |
| "rl_loss": 0.0, | |
| "step": 5920 | |
| }, | |
| { | |
| "acc": 0.8977, | |
| "epoch": 7.17, | |
| "learning_rate": 5e-05, | |
| "loss": 0.226, | |
| "rl_loss": 0.0, | |
| "step": 5930 | |
| }, | |
| { | |
| "acc": 0.9055, | |
| "epoch": 7.18, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2115, | |
| "rl_loss": 0.0, | |
| "step": 5940 | |
| }, | |
| { | |
| "acc": 0.9051, | |
| "epoch": 7.19, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2225, | |
| "rl_loss": 0.0, | |
| "step": 5950 | |
| }, | |
| { | |
| "acc": 0.9031, | |
| "epoch": 7.21, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2289, | |
| "rl_loss": 0.0, | |
| "step": 5960 | |
| }, | |
| { | |
| "acc": 0.8973, | |
| "epoch": 7.22, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2334, | |
| "rl_loss": 0.0, | |
| "step": 5970 | |
| }, | |
| { | |
| "acc": 0.8992, | |
| "epoch": 7.23, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2238, | |
| "rl_loss": 0.0, | |
| "step": 5980 | |
| }, | |
| { | |
| "acc": 0.8969, | |
| "epoch": 7.24, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2298, | |
| "rl_loss": 0.0, | |
| "step": 5990 | |
| }, | |
| { | |
| "acc": 0.9035, | |
| "epoch": 7.25, | |
| "learning_rate": 5e-05, | |
| "loss": 0.215, | |
| "rl_loss": 0.0, | |
| "step": 6000 | |
| }, | |
| { | |
| "acc": 0.9035, | |
| "epoch": 7.27, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2061, | |
| "rl_loss": 0.0, | |
| "step": 6010 | |
| }, | |
| { | |
| "acc": 0.9082, | |
| "epoch": 7.28, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2154, | |
| "rl_loss": 0.0, | |
| "step": 6020 | |
| }, | |
| { | |
| "acc": 0.9078, | |
| "epoch": 7.29, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2174, | |
| "rl_loss": 0.0, | |
| "step": 6030 | |
| }, | |
| { | |
| "acc": 0.909, | |
| "epoch": 7.3, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2166, | |
| "rl_loss": 0.0, | |
| "step": 6040 | |
| }, | |
| { | |
| "acc": 0.9043, | |
| "epoch": 7.32, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2206, | |
| "rl_loss": 0.0, | |
| "step": 6050 | |
| }, | |
| { | |
| "acc": 0.9035, | |
| "epoch": 7.33, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2215, | |
| "rl_loss": 0.0, | |
| "step": 6060 | |
| }, | |
| { | |
| "acc": 0.893, | |
| "epoch": 7.34, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2379, | |
| "rl_loss": 0.0, | |
| "step": 6070 | |
| }, | |
| { | |
| "acc": 0.907, | |
| "epoch": 7.35, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2284, | |
| "rl_loss": 0.0, | |
| "step": 6080 | |
| }, | |
| { | |
| "acc": 0.8969, | |
| "epoch": 7.36, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2308, | |
| "rl_loss": 0.0, | |
| "step": 6090 | |
| }, | |
| { | |
| "acc": 0.8957, | |
| "epoch": 7.38, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2407, | |
| "rl_loss": 0.0, | |
| "step": 6100 | |
| }, | |
| { | |
| "acc": 0.909, | |
| "epoch": 7.39, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2109, | |
| "rl_loss": 0.0, | |
| "step": 6110 | |
| }, | |
| { | |
| "acc": 0.9062, | |
| "epoch": 7.4, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2318, | |
| "rl_loss": 0.0, | |
| "step": 6120 | |
| }, | |
| { | |
| "acc": 0.898, | |
| "epoch": 7.41, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2384, | |
| "rl_loss": 0.0, | |
| "step": 6130 | |
| }, | |
| { | |
| "acc": 0.9027, | |
| "epoch": 7.42, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2274, | |
| "rl_loss": 0.0, | |
| "step": 6140 | |
| }, | |
| { | |
| "acc": 0.907, | |
| "epoch": 7.44, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2192, | |
| "rl_loss": 0.0, | |
| "step": 6150 | |
| }, | |
| { | |
| "acc": 0.9035, | |
| "epoch": 7.45, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2225, | |
| "rl_loss": 0.0, | |
| "step": 6160 | |
| }, | |
| { | |
| "acc": 0.918, | |
| "epoch": 7.46, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2077, | |
| "rl_loss": 0.0, | |
| "step": 6170 | |
| }, | |
| { | |
| "acc": 0.9008, | |
| "epoch": 7.47, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2206, | |
| "rl_loss": 0.0, | |
| "step": 6180 | |
| }, | |
| { | |
| "acc": 0.9062, | |
| "epoch": 7.48, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2168, | |
| "rl_loss": 0.0, | |
| "step": 6190 | |
| }, | |
| { | |
| "acc": 0.9043, | |
| "epoch": 7.5, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2264, | |
| "rl_loss": 0.0, | |
| "step": 6200 | |
| }, | |
| { | |
| "acc": 0.8949, | |
| "epoch": 7.51, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2331, | |
| "rl_loss": 0.0, | |
| "step": 6210 | |
| }, | |
| { | |
| "acc": 0.9129, | |
| "epoch": 7.52, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2069, | |
| "rl_loss": 0.0, | |
| "step": 6220 | |
| }, | |
| { | |
| "acc": 0.9008, | |
| "epoch": 7.53, | |
| "learning_rate": 5e-05, | |
| "loss": 0.218, | |
| "rl_loss": 0.0, | |
| "step": 6230 | |
| }, | |
| { | |
| "acc": 0.8984, | |
| "epoch": 7.54, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2333, | |
| "rl_loss": 0.0, | |
| "step": 6240 | |
| }, | |
| { | |
| "acc": 0.9, | |
| "epoch": 7.56, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2296, | |
| "rl_loss": 0.0, | |
| "step": 6250 | |
| }, | |
| { | |
| "acc": 0.8918, | |
| "epoch": 7.57, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2538, | |
| "rl_loss": 0.0, | |
| "step": 6260 | |
| }, | |
| { | |
| "acc": 0.9055, | |
| "epoch": 7.58, | |
| "learning_rate": 5e-05, | |
| "loss": 0.219, | |
| "rl_loss": 0.0, | |
| "step": 6270 | |
| }, | |
| { | |
| "acc": 0.8949, | |
| "epoch": 7.59, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2342, | |
| "rl_loss": 0.0, | |
| "step": 6280 | |
| }, | |
| { | |
| "acc": 0.9023, | |
| "epoch": 7.61, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2358, | |
| "rl_loss": 0.0, | |
| "step": 6290 | |
| }, | |
| { | |
| "acc": 0.9, | |
| "epoch": 7.62, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2326, | |
| "rl_loss": 0.0, | |
| "step": 6300 | |
| }, | |
| { | |
| "acc": 0.9094, | |
| "epoch": 7.63, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2152, | |
| "rl_loss": 0.0, | |
| "step": 6310 | |
| }, | |
| { | |
| "acc": 0.9117, | |
| "epoch": 7.64, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2034, | |
| "rl_loss": 0.0, | |
| "step": 6320 | |
| }, | |
| { | |
| "acc": 0.907, | |
| "epoch": 7.65, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2065, | |
| "rl_loss": 0.0, | |
| "step": 6330 | |
| }, | |
| { | |
| "acc": 0.9043, | |
| "epoch": 7.67, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2328, | |
| "rl_loss": 0.0, | |
| "step": 6340 | |
| }, | |
| { | |
| "acc": 0.8992, | |
| "epoch": 7.68, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2406, | |
| "rl_loss": 0.0, | |
| "step": 6350 | |
| }, | |
| { | |
| "acc": 0.9062, | |
| "epoch": 7.69, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2138, | |
| "rl_loss": 0.0, | |
| "step": 6360 | |
| }, | |
| { | |
| "acc": 0.9012, | |
| "epoch": 7.7, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2313, | |
| "rl_loss": 0.0, | |
| "step": 6370 | |
| }, | |
| { | |
| "acc": 0.9027, | |
| "epoch": 7.71, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2229, | |
| "rl_loss": 0.0, | |
| "step": 6380 | |
| }, | |
| { | |
| "acc": 0.8961, | |
| "epoch": 7.73, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2378, | |
| "rl_loss": 0.0, | |
| "step": 6390 | |
| }, | |
| { | |
| "acc": 0.9031, | |
| "epoch": 7.74, | |
| "learning_rate": 5e-05, | |
| "loss": 0.215, | |
| "rl_loss": 0.0, | |
| "step": 6400 | |
| }, | |
| { | |
| "acc": 0.9172, | |
| "epoch": 7.75, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2011, | |
| "rl_loss": 0.0, | |
| "step": 6410 | |
| }, | |
| { | |
| "acc": 0.9039, | |
| "epoch": 7.76, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2221, | |
| "rl_loss": 0.0, | |
| "step": 6420 | |
| }, | |
| { | |
| "acc": 0.8957, | |
| "epoch": 7.77, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2373, | |
| "rl_loss": 0.0, | |
| "step": 6430 | |
| }, | |
| { | |
| "acc": 0.9, | |
| "epoch": 7.79, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2326, | |
| "rl_loss": 0.0, | |
| "step": 6440 | |
| }, | |
| { | |
| "acc": 0.9051, | |
| "epoch": 7.8, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2285, | |
| "rl_loss": 0.0, | |
| "step": 6450 | |
| }, | |
| { | |
| "acc": 0.9043, | |
| "epoch": 7.81, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2195, | |
| "rl_loss": 0.0, | |
| "step": 6460 | |
| }, | |
| { | |
| "acc": 0.9016, | |
| "epoch": 7.82, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2296, | |
| "rl_loss": 0.0, | |
| "step": 6470 | |
| }, | |
| { | |
| "acc": 0.9039, | |
| "epoch": 7.83, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2132, | |
| "rl_loss": 0.0, | |
| "step": 6480 | |
| }, | |
| { | |
| "acc": 0.9047, | |
| "epoch": 7.85, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2287, | |
| "rl_loss": 0.0, | |
| "step": 6490 | |
| }, | |
| { | |
| "acc": 0.8984, | |
| "epoch": 7.86, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2271, | |
| "rl_loss": 0.0, | |
| "step": 6500 | |
| }, | |
| { | |
| "acc": 0.9047, | |
| "epoch": 7.87, | |
| "learning_rate": 5e-05, | |
| "loss": 0.217, | |
| "rl_loss": 0.0, | |
| "step": 6510 | |
| }, | |
| { | |
| "acc": 0.9039, | |
| "epoch": 7.88, | |
| "learning_rate": 5e-05, | |
| "loss": 0.221, | |
| "rl_loss": 0.0, | |
| "step": 6520 | |
| }, | |
| { | |
| "acc": 0.8984, | |
| "epoch": 7.9, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2314, | |
| "rl_loss": 0.0, | |
| "step": 6530 | |
| }, | |
| { | |
| "acc": 0.9109, | |
| "epoch": 7.91, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2261, | |
| "rl_loss": 0.0, | |
| "step": 6540 | |
| }, | |
| { | |
| "acc": 0.8949, | |
| "epoch": 7.92, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2411, | |
| "rl_loss": 0.0, | |
| "step": 6550 | |
| }, | |
| { | |
| "acc": 0.9, | |
| "epoch": 7.93, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2295, | |
| "rl_loss": 0.0, | |
| "step": 6560 | |
| }, | |
| { | |
| "acc": 0.8969, | |
| "epoch": 7.94, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2428, | |
| "rl_loss": 0.0, | |
| "step": 6570 | |
| }, | |
| { | |
| "acc": 0.8977, | |
| "epoch": 7.96, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2302, | |
| "rl_loss": 0.0, | |
| "step": 6580 | |
| }, | |
| { | |
| "acc": 0.9004, | |
| "epoch": 7.97, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2262, | |
| "rl_loss": 0.0, | |
| "step": 6590 | |
| }, | |
| { | |
| "acc": 0.8973, | |
| "epoch": 7.98, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2305, | |
| "rl_loss": 0.0, | |
| "step": 6600 | |
| }, | |
| { | |
| "acc": 0.9047, | |
| "epoch": 7.99, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2164, | |
| "rl_loss": 0.0, | |
| "step": 6610 | |
| }, | |
| { | |
| "acc": 0.9652, | |
| "epoch": 8.0, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2292, | |
| "rl_loss": 0.0, | |
| "step": 6620 | |
| }, | |
| { | |
| "acc": 0.9094, | |
| "epoch": 8.02, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2152, | |
| "rl_loss": 0.0, | |
| "step": 6630 | |
| }, | |
| { | |
| "acc": 0.9254, | |
| "epoch": 8.03, | |
| "learning_rate": 5e-05, | |
| "loss": 0.1956, | |
| "rl_loss": 0.0, | |
| "step": 6640 | |
| }, | |
| { | |
| "acc": 0.9031, | |
| "epoch": 8.04, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2173, | |
| "rl_loss": 0.0, | |
| "step": 6650 | |
| }, | |
| { | |
| "acc": 0.9141, | |
| "epoch": 8.05, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2052, | |
| "rl_loss": 0.0, | |
| "step": 6660 | |
| }, | |
| { | |
| "acc": 0.9102, | |
| "epoch": 8.07, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2029, | |
| "rl_loss": 0.0, | |
| "step": 6670 | |
| }, | |
| { | |
| "acc": 0.9121, | |
| "epoch": 8.08, | |
| "learning_rate": 5e-05, | |
| "loss": 0.199, | |
| "rl_loss": 0.0, | |
| "step": 6680 | |
| }, | |
| { | |
| "acc": 0.9148, | |
| "epoch": 8.09, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2018, | |
| "rl_loss": 0.0, | |
| "step": 6690 | |
| }, | |
| { | |
| "acc": 0.9109, | |
| "epoch": 8.1, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2055, | |
| "rl_loss": 0.0, | |
| "step": 6700 | |
| }, | |
| { | |
| "acc": 0.907, | |
| "epoch": 8.11, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2038, | |
| "rl_loss": 0.0, | |
| "step": 6710 | |
| }, | |
| { | |
| "acc": 0.9156, | |
| "epoch": 8.13, | |
| "learning_rate": 5e-05, | |
| "loss": 0.1928, | |
| "rl_loss": 0.0, | |
| "step": 6720 | |
| }, | |
| { | |
| "acc": 0.9012, | |
| "epoch": 8.14, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2187, | |
| "rl_loss": 0.0, | |
| "step": 6730 | |
| }, | |
| { | |
| "acc": 0.9094, | |
| "epoch": 8.15, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2075, | |
| "rl_loss": 0.0, | |
| "step": 6740 | |
| }, | |
| { | |
| "acc": 0.9004, | |
| "epoch": 8.16, | |
| "learning_rate": 5e-05, | |
| "loss": 0.237, | |
| "rl_loss": 0.0, | |
| "step": 6750 | |
| }, | |
| { | |
| "acc": 0.9066, | |
| "epoch": 8.17, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2121, | |
| "rl_loss": 0.0, | |
| "step": 6760 | |
| }, | |
| { | |
| "acc": 0.9051, | |
| "epoch": 8.19, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2179, | |
| "rl_loss": 0.0, | |
| "step": 6770 | |
| }, | |
| { | |
| "acc": 0.9047, | |
| "epoch": 8.2, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2233, | |
| "rl_loss": 0.0, | |
| "step": 6780 | |
| }, | |
| { | |
| "acc": 0.9062, | |
| "epoch": 8.21, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2175, | |
| "rl_loss": 0.0, | |
| "step": 6790 | |
| }, | |
| { | |
| "acc": 0.9086, | |
| "epoch": 8.22, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2094, | |
| "rl_loss": 0.0, | |
| "step": 6800 | |
| }, | |
| { | |
| "acc": 0.9062, | |
| "epoch": 8.23, | |
| "learning_rate": 5e-05, | |
| "loss": 0.215, | |
| "rl_loss": 0.0, | |
| "step": 6810 | |
| }, | |
| { | |
| "acc": 0.9043, | |
| "epoch": 8.25, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2169, | |
| "rl_loss": 0.0, | |
| "step": 6820 | |
| }, | |
| { | |
| "acc": 0.8988, | |
| "epoch": 8.26, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2252, | |
| "rl_loss": 0.0, | |
| "step": 6830 | |
| }, | |
| { | |
| "acc": 0.9109, | |
| "epoch": 8.27, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2123, | |
| "rl_loss": 0.0, | |
| "step": 6840 | |
| }, | |
| { | |
| "acc": 0.909, | |
| "epoch": 8.28, | |
| "learning_rate": 5e-05, | |
| "loss": 0.1983, | |
| "rl_loss": 0.0, | |
| "step": 6850 | |
| }, | |
| { | |
| "acc": 0.9191, | |
| "epoch": 8.29, | |
| "learning_rate": 5e-05, | |
| "loss": 0.196, | |
| "rl_loss": 0.0, | |
| "step": 6860 | |
| }, | |
| { | |
| "acc": 0.9156, | |
| "epoch": 8.31, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2081, | |
| "rl_loss": 0.0, | |
| "step": 6870 | |
| }, | |
| { | |
| "acc": 0.8879, | |
| "epoch": 8.32, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2408, | |
| "rl_loss": 0.0, | |
| "step": 6880 | |
| }, | |
| { | |
| "acc": 0.9047, | |
| "epoch": 8.33, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2203, | |
| "rl_loss": 0.0, | |
| "step": 6890 | |
| }, | |
| { | |
| "acc": 0.9031, | |
| "epoch": 8.34, | |
| "learning_rate": 5e-05, | |
| "loss": 0.228, | |
| "rl_loss": 0.0, | |
| "step": 6900 | |
| }, | |
| { | |
| "acc": 0.9074, | |
| "epoch": 8.36, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2209, | |
| "rl_loss": 0.0, | |
| "step": 6910 | |
| }, | |
| { | |
| "acc": 0.9125, | |
| "epoch": 8.37, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2202, | |
| "rl_loss": 0.0, | |
| "step": 6920 | |
| }, | |
| { | |
| "acc": 0.9016, | |
| "epoch": 8.38, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2306, | |
| "rl_loss": 0.0, | |
| "step": 6930 | |
| }, | |
| { | |
| "acc": 0.8992, | |
| "epoch": 8.39, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2198, | |
| "rl_loss": 0.0, | |
| "step": 6940 | |
| }, | |
| { | |
| "acc": 0.9012, | |
| "epoch": 8.4, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2195, | |
| "rl_loss": 0.0, | |
| "step": 6950 | |
| }, | |
| { | |
| "acc": 0.9105, | |
| "epoch": 8.42, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2102, | |
| "rl_loss": 0.0, | |
| "step": 6960 | |
| }, | |
| { | |
| "acc": 0.9078, | |
| "epoch": 8.43, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2196, | |
| "rl_loss": 0.0, | |
| "step": 6970 | |
| }, | |
| { | |
| "acc": 0.9047, | |
| "epoch": 8.44, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2228, | |
| "rl_loss": 0.0, | |
| "step": 6980 | |
| }, | |
| { | |
| "acc": 0.9004, | |
| "epoch": 8.45, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2177, | |
| "rl_loss": 0.0, | |
| "step": 6990 | |
| }, | |
| { | |
| "acc": 0.8949, | |
| "epoch": 8.46, | |
| "learning_rate": 5e-05, | |
| "loss": 0.234, | |
| "rl_loss": 0.0, | |
| "step": 7000 | |
| }, | |
| { | |
| "acc": 0.8969, | |
| "epoch": 8.48, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2275, | |
| "rl_loss": 0.0, | |
| "step": 7010 | |
| }, | |
| { | |
| "acc": 0.9105, | |
| "epoch": 8.49, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2061, | |
| "rl_loss": 0.0, | |
| "step": 7020 | |
| }, | |
| { | |
| "acc": 0.9109, | |
| "epoch": 8.5, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2033, | |
| "rl_loss": 0.0, | |
| "step": 7030 | |
| }, | |
| { | |
| "acc": 0.9156, | |
| "epoch": 8.51, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2019, | |
| "rl_loss": 0.0, | |
| "step": 7040 | |
| }, | |
| { | |
| "acc": 0.9203, | |
| "epoch": 8.52, | |
| "learning_rate": 5e-05, | |
| "loss": 0.1853, | |
| "rl_loss": 0.0, | |
| "step": 7050 | |
| }, | |
| { | |
| "acc": 0.9066, | |
| "epoch": 8.54, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2096, | |
| "rl_loss": 0.0, | |
| "step": 7060 | |
| }, | |
| { | |
| "acc": 0.898, | |
| "epoch": 8.55, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2241, | |
| "rl_loss": 0.0, | |
| "step": 7070 | |
| }, | |
| { | |
| "acc": 0.9035, | |
| "epoch": 8.56, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2189, | |
| "rl_loss": 0.0, | |
| "step": 7080 | |
| }, | |
| { | |
| "acc": 0.9043, | |
| "epoch": 8.57, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2234, | |
| "rl_loss": 0.0, | |
| "step": 7090 | |
| }, | |
| { | |
| "acc": 0.9113, | |
| "epoch": 8.58, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2069, | |
| "rl_loss": 0.0, | |
| "step": 7100 | |
| }, | |
| { | |
| "acc": 0.9004, | |
| "epoch": 8.6, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2319, | |
| "rl_loss": 0.0, | |
| "step": 7110 | |
| }, | |
| { | |
| "acc": 0.8988, | |
| "epoch": 8.61, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2193, | |
| "rl_loss": 0.0, | |
| "step": 7120 | |
| }, | |
| { | |
| "acc": 0.9105, | |
| "epoch": 8.62, | |
| "learning_rate": 5e-05, | |
| "loss": 0.225, | |
| "rl_loss": 0.0, | |
| "step": 7130 | |
| }, | |
| { | |
| "acc": 0.9102, | |
| "epoch": 8.63, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2152, | |
| "rl_loss": 0.0, | |
| "step": 7140 | |
| }, | |
| { | |
| "acc": 0.893, | |
| "epoch": 8.65, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2399, | |
| "rl_loss": 0.0, | |
| "step": 7150 | |
| }, | |
| { | |
| "acc": 0.8965, | |
| "epoch": 8.66, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2326, | |
| "rl_loss": 0.0, | |
| "step": 7160 | |
| }, | |
| { | |
| "acc": 0.9184, | |
| "epoch": 8.67, | |
| "learning_rate": 5e-05, | |
| "loss": 0.1933, | |
| "rl_loss": 0.0, | |
| "step": 7170 | |
| }, | |
| { | |
| "acc": 0.9074, | |
| "epoch": 8.68, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2037, | |
| "rl_loss": 0.0, | |
| "step": 7180 | |
| }, | |
| { | |
| "acc": 0.9211, | |
| "epoch": 8.69, | |
| "learning_rate": 5e-05, | |
| "loss": 0.1929, | |
| "rl_loss": 0.0, | |
| "step": 7190 | |
| }, | |
| { | |
| "acc": 0.8988, | |
| "epoch": 8.71, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2202, | |
| "rl_loss": 0.0, | |
| "step": 7200 | |
| }, | |
| { | |
| "acc": 0.9082, | |
| "epoch": 8.72, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2131, | |
| "rl_loss": 0.0, | |
| "step": 7210 | |
| }, | |
| { | |
| "acc": 0.9109, | |
| "epoch": 8.73, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2108, | |
| "rl_loss": 0.0, | |
| "step": 7220 | |
| }, | |
| { | |
| "acc": 0.9004, | |
| "epoch": 8.74, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2248, | |
| "rl_loss": 0.0, | |
| "step": 7230 | |
| }, | |
| { | |
| "acc": 0.8965, | |
| "epoch": 8.75, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2431, | |
| "rl_loss": 0.0, | |
| "step": 7240 | |
| }, | |
| { | |
| "acc": 0.9031, | |
| "epoch": 8.77, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2242, | |
| "rl_loss": 0.0, | |
| "step": 7250 | |
| }, | |
| { | |
| "acc": 0.9094, | |
| "epoch": 8.78, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2268, | |
| "rl_loss": 0.0, | |
| "step": 7260 | |
| }, | |
| { | |
| "acc": 0.8996, | |
| "epoch": 8.79, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2247, | |
| "rl_loss": 0.0, | |
| "step": 7270 | |
| }, | |
| { | |
| "acc": 0.9047, | |
| "epoch": 8.8, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2211, | |
| "rl_loss": 0.0, | |
| "step": 7280 | |
| }, | |
| { | |
| "acc": 0.9094, | |
| "epoch": 8.81, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2201, | |
| "rl_loss": 0.0, | |
| "step": 7290 | |
| }, | |
| { | |
| "acc": 0.9051, | |
| "epoch": 8.83, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2211, | |
| "rl_loss": 0.0, | |
| "step": 7300 | |
| }, | |
| { | |
| "acc": 0.9145, | |
| "epoch": 8.84, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2034, | |
| "rl_loss": 0.0, | |
| "step": 7310 | |
| }, | |
| { | |
| "acc": 0.9129, | |
| "epoch": 8.85, | |
| "learning_rate": 5e-05, | |
| "loss": 0.201, | |
| "rl_loss": 0.0, | |
| "step": 7320 | |
| }, | |
| { | |
| "acc": 0.9086, | |
| "epoch": 8.86, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2131, | |
| "rl_loss": 0.0, | |
| "step": 7330 | |
| }, | |
| { | |
| "acc": 0.9121, | |
| "epoch": 8.87, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2024, | |
| "rl_loss": 0.0, | |
| "step": 7340 | |
| }, | |
| { | |
| "acc": 0.9016, | |
| "epoch": 8.89, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2324, | |
| "rl_loss": 0.0, | |
| "step": 7350 | |
| }, | |
| { | |
| "acc": 0.9086, | |
| "epoch": 8.9, | |
| "learning_rate": 5e-05, | |
| "loss": 0.217, | |
| "rl_loss": 0.0, | |
| "step": 7360 | |
| }, | |
| { | |
| "acc": 0.902, | |
| "epoch": 8.91, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2225, | |
| "rl_loss": 0.0, | |
| "step": 7370 | |
| }, | |
| { | |
| "acc": 0.9156, | |
| "epoch": 8.92, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2023, | |
| "rl_loss": 0.0, | |
| "step": 7380 | |
| }, | |
| { | |
| "acc": 0.9105, | |
| "epoch": 8.94, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2168, | |
| "rl_loss": 0.0, | |
| "step": 7390 | |
| }, | |
| { | |
| "acc": 0.9145, | |
| "epoch": 8.95, | |
| "learning_rate": 5e-05, | |
| "loss": 0.1942, | |
| "rl_loss": 0.0, | |
| "step": 7400 | |
| }, | |
| { | |
| "acc": 0.8902, | |
| "epoch": 8.96, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2277, | |
| "rl_loss": 0.0, | |
| "step": 7410 | |
| }, | |
| { | |
| "acc": 0.902, | |
| "epoch": 8.97, | |
| "learning_rate": 5e-05, | |
| "loss": 0.222, | |
| "rl_loss": 0.0, | |
| "step": 7420 | |
| }, | |
| { | |
| "acc": 0.9035, | |
| "epoch": 8.98, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2294, | |
| "rl_loss": 0.0, | |
| "step": 7430 | |
| }, | |
| { | |
| "acc": 0.902, | |
| "epoch": 9.0, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2255, | |
| "rl_loss": 0.0, | |
| "step": 7440 | |
| }, | |
| { | |
| "acc": 0.957, | |
| "epoch": 9.01, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2405, | |
| "rl_loss": 0.0, | |
| "step": 7450 | |
| }, | |
| { | |
| "acc": 0.9199, | |
| "epoch": 9.02, | |
| "learning_rate": 5e-05, | |
| "loss": 0.1814, | |
| "rl_loss": 0.0, | |
| "step": 7460 | |
| }, | |
| { | |
| "acc": 0.9164, | |
| "epoch": 9.03, | |
| "learning_rate": 5e-05, | |
| "loss": 0.1854, | |
| "rl_loss": 0.0, | |
| "step": 7470 | |
| }, | |
| { | |
| "acc": 0.9227, | |
| "epoch": 9.04, | |
| "learning_rate": 5e-05, | |
| "loss": 0.1869, | |
| "rl_loss": 0.0, | |
| "step": 7480 | |
| }, | |
| { | |
| "acc": 0.9078, | |
| "epoch": 9.06, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2062, | |
| "rl_loss": 0.0, | |
| "step": 7490 | |
| }, | |
| { | |
| "acc": 0.916, | |
| "epoch": 9.07, | |
| "learning_rate": 5e-05, | |
| "loss": 0.1988, | |
| "rl_loss": 0.0, | |
| "step": 7500 | |
| }, | |
| { | |
| "acc": 0.9113, | |
| "epoch": 9.08, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2136, | |
| "rl_loss": 0.0, | |
| "step": 7510 | |
| }, | |
| { | |
| "acc": 0.9148, | |
| "epoch": 9.09, | |
| "learning_rate": 5e-05, | |
| "loss": 0.1898, | |
| "rl_loss": 0.0, | |
| "step": 7520 | |
| }, | |
| { | |
| "acc": 0.9043, | |
| "epoch": 9.11, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2121, | |
| "rl_loss": 0.0, | |
| "step": 7530 | |
| }, | |
| { | |
| "acc": 0.9148, | |
| "epoch": 9.12, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2116, | |
| "rl_loss": 0.0, | |
| "step": 7540 | |
| }, | |
| { | |
| "acc": 0.909, | |
| "epoch": 9.13, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2064, | |
| "rl_loss": 0.0, | |
| "step": 7550 | |
| }, | |
| { | |
| "acc": 0.9082, | |
| "epoch": 9.14, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2092, | |
| "rl_loss": 0.0, | |
| "step": 7560 | |
| }, | |
| { | |
| "acc": 0.9215, | |
| "epoch": 9.15, | |
| "learning_rate": 5e-05, | |
| "loss": 0.1927, | |
| "rl_loss": 0.0, | |
| "step": 7570 | |
| }, | |
| { | |
| "acc": 0.9125, | |
| "epoch": 9.17, | |
| "learning_rate": 5e-05, | |
| "loss": 0.193, | |
| "rl_loss": 0.0, | |
| "step": 7580 | |
| }, | |
| { | |
| "acc": 0.9078, | |
| "epoch": 9.18, | |
| "learning_rate": 5e-05, | |
| "loss": 0.212, | |
| "rl_loss": 0.0, | |
| "step": 7590 | |
| }, | |
| { | |
| "acc": 0.9156, | |
| "epoch": 9.19, | |
| "learning_rate": 5e-05, | |
| "loss": 0.1954, | |
| "rl_loss": 0.0, | |
| "step": 7600 | |
| }, | |
| { | |
| "acc": 0.909, | |
| "epoch": 9.2, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2186, | |
| "rl_loss": 0.0, | |
| "step": 7610 | |
| }, | |
| { | |
| "acc": 0.907, | |
| "epoch": 9.21, | |
| "learning_rate": 5e-05, | |
| "loss": 0.205, | |
| "rl_loss": 0.0, | |
| "step": 7620 | |
| }, | |
| { | |
| "acc": 0.9004, | |
| "epoch": 9.23, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2218, | |
| "rl_loss": 0.0, | |
| "step": 7630 | |
| }, | |
| { | |
| "acc": 0.9113, | |
| "epoch": 9.24, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2047, | |
| "rl_loss": 0.0, | |
| "step": 7640 | |
| }, | |
| { | |
| "acc": 0.9062, | |
| "epoch": 9.25, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2037, | |
| "rl_loss": 0.0, | |
| "step": 7650 | |
| }, | |
| { | |
| "acc": 0.9078, | |
| "epoch": 9.26, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2036, | |
| "rl_loss": 0.0, | |
| "step": 7660 | |
| }, | |
| { | |
| "acc": 0.9055, | |
| "epoch": 9.27, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2161, | |
| "rl_loss": 0.0, | |
| "step": 7670 | |
| }, | |
| { | |
| "acc": 0.9023, | |
| "epoch": 9.29, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2226, | |
| "rl_loss": 0.0, | |
| "step": 7680 | |
| }, | |
| { | |
| "acc": 0.9098, | |
| "epoch": 9.3, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2138, | |
| "rl_loss": 0.0, | |
| "step": 7690 | |
| }, | |
| { | |
| "acc": 0.9145, | |
| "epoch": 9.31, | |
| "learning_rate": 5e-05, | |
| "loss": 0.1901, | |
| "rl_loss": 0.0, | |
| "step": 7700 | |
| }, | |
| { | |
| "acc": 0.9105, | |
| "epoch": 9.32, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2066, | |
| "rl_loss": 0.0, | |
| "step": 7710 | |
| }, | |
| { | |
| "acc": 0.9102, | |
| "epoch": 9.33, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2074, | |
| "rl_loss": 0.0, | |
| "step": 7720 | |
| }, | |
| { | |
| "acc": 0.9035, | |
| "epoch": 9.35, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2181, | |
| "rl_loss": 0.0, | |
| "step": 7730 | |
| }, | |
| { | |
| "acc": 0.9113, | |
| "epoch": 9.36, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2012, | |
| "rl_loss": 0.0, | |
| "step": 7740 | |
| }, | |
| { | |
| "acc": 0.9129, | |
| "epoch": 9.37, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2036, | |
| "rl_loss": 0.0, | |
| "step": 7750 | |
| }, | |
| { | |
| "acc": 0.9109, | |
| "epoch": 9.38, | |
| "learning_rate": 5e-05, | |
| "loss": 0.1933, | |
| "rl_loss": 0.0, | |
| "step": 7760 | |
| }, | |
| { | |
| "acc": 0.9145, | |
| "epoch": 9.4, | |
| "learning_rate": 5e-05, | |
| "loss": 0.203, | |
| "rl_loss": 0.0, | |
| "step": 7770 | |
| }, | |
| { | |
| "acc": 0.9109, | |
| "epoch": 9.41, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2099, | |
| "rl_loss": 0.0, | |
| "step": 7780 | |
| }, | |
| { | |
| "acc": 0.9109, | |
| "epoch": 9.42, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2103, | |
| "rl_loss": 0.0, | |
| "step": 7790 | |
| }, | |
| { | |
| "acc": 0.9105, | |
| "epoch": 9.43, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2047, | |
| "rl_loss": 0.0, | |
| "step": 7800 | |
| }, | |
| { | |
| "acc": 0.9156, | |
| "epoch": 9.44, | |
| "learning_rate": 5e-05, | |
| "loss": 0.1967, | |
| "rl_loss": 0.0, | |
| "step": 7810 | |
| }, | |
| { | |
| "acc": 0.9125, | |
| "epoch": 9.46, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2044, | |
| "rl_loss": 0.0, | |
| "step": 7820 | |
| }, | |
| { | |
| "acc": 0.9086, | |
| "epoch": 9.47, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2108, | |
| "rl_loss": 0.0, | |
| "step": 7830 | |
| }, | |
| { | |
| "acc": 0.9125, | |
| "epoch": 9.48, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2113, | |
| "rl_loss": 0.0, | |
| "step": 7840 | |
| }, | |
| { | |
| "acc": 0.9059, | |
| "epoch": 9.49, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2072, | |
| "rl_loss": 0.0, | |
| "step": 7850 | |
| }, | |
| { | |
| "acc": 0.9066, | |
| "epoch": 9.5, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2116, | |
| "rl_loss": 0.0, | |
| "step": 7860 | |
| }, | |
| { | |
| "acc": 0.9055, | |
| "epoch": 9.52, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2105, | |
| "rl_loss": 0.0, | |
| "step": 7870 | |
| }, | |
| { | |
| "acc": 0.9004, | |
| "epoch": 9.53, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2214, | |
| "rl_loss": 0.0, | |
| "step": 7880 | |
| }, | |
| { | |
| "acc": 0.909, | |
| "epoch": 9.54, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2172, | |
| "rl_loss": 0.0, | |
| "step": 7890 | |
| }, | |
| { | |
| "acc": 0.9012, | |
| "epoch": 9.55, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2189, | |
| "rl_loss": 0.0, | |
| "step": 7900 | |
| }, | |
| { | |
| "acc": 0.9055, | |
| "epoch": 9.56, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2211, | |
| "rl_loss": 0.0, | |
| "step": 7910 | |
| }, | |
| { | |
| "acc": 0.9129, | |
| "epoch": 9.58, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2007, | |
| "rl_loss": 0.0, | |
| "step": 7920 | |
| }, | |
| { | |
| "acc": 0.9133, | |
| "epoch": 9.59, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2079, | |
| "rl_loss": 0.0, | |
| "step": 7930 | |
| }, | |
| { | |
| "acc": 0.8984, | |
| "epoch": 9.6, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2254, | |
| "rl_loss": 0.0, | |
| "step": 7940 | |
| }, | |
| { | |
| "acc": 0.9168, | |
| "epoch": 9.61, | |
| "learning_rate": 5e-05, | |
| "loss": 0.1911, | |
| "rl_loss": 0.0, | |
| "step": 7950 | |
| }, | |
| { | |
| "acc": 0.9039, | |
| "epoch": 9.62, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2081, | |
| "rl_loss": 0.0, | |
| "step": 7960 | |
| }, | |
| { | |
| "acc": 0.8973, | |
| "epoch": 9.64, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2365, | |
| "rl_loss": 0.0, | |
| "step": 7970 | |
| }, | |
| { | |
| "acc": 0.9031, | |
| "epoch": 9.65, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2237, | |
| "rl_loss": 0.0, | |
| "step": 7980 | |
| }, | |
| { | |
| "acc": 0.9184, | |
| "epoch": 9.66, | |
| "learning_rate": 5e-05, | |
| "loss": 0.1816, | |
| "rl_loss": 0.0, | |
| "step": 7990 | |
| }, | |
| { | |
| "acc": 0.9148, | |
| "epoch": 9.67, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2001, | |
| "rl_loss": 0.0, | |
| "step": 8000 | |
| }, | |
| { | |
| "acc": 0.9043, | |
| "epoch": 9.69, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2123, | |
| "rl_loss": 0.0, | |
| "step": 8010 | |
| }, | |
| { | |
| "acc": 0.9203, | |
| "epoch": 9.7, | |
| "learning_rate": 5e-05, | |
| "loss": 0.1899, | |
| "rl_loss": 0.0, | |
| "step": 8020 | |
| }, | |
| { | |
| "acc": 0.9105, | |
| "epoch": 9.71, | |
| "learning_rate": 5e-05, | |
| "loss": 0.1963, | |
| "rl_loss": 0.0, | |
| "step": 8030 | |
| }, | |
| { | |
| "acc": 0.907, | |
| "epoch": 9.72, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2216, | |
| "rl_loss": 0.0, | |
| "step": 8040 | |
| }, | |
| { | |
| "acc": 0.9062, | |
| "epoch": 9.73, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2198, | |
| "rl_loss": 0.0, | |
| "step": 8050 | |
| }, | |
| { | |
| "acc": 0.9148, | |
| "epoch": 9.75, | |
| "learning_rate": 5e-05, | |
| "loss": 0.1963, | |
| "rl_loss": 0.0, | |
| "step": 8060 | |
| }, | |
| { | |
| "acc": 0.9066, | |
| "epoch": 9.76, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2136, | |
| "rl_loss": 0.0, | |
| "step": 8070 | |
| }, | |
| { | |
| "acc": 0.9012, | |
| "epoch": 9.77, | |
| "learning_rate": 5e-05, | |
| "loss": 0.219, | |
| "rl_loss": 0.0, | |
| "step": 8080 | |
| }, | |
| { | |
| "acc": 0.9133, | |
| "epoch": 9.78, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2047, | |
| "rl_loss": 0.0, | |
| "step": 8090 | |
| }, | |
| { | |
| "acc": 0.8977, | |
| "epoch": 9.79, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2291, | |
| "rl_loss": 0.0, | |
| "step": 8100 | |
| }, | |
| { | |
| "acc": 0.9113, | |
| "epoch": 9.81, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2122, | |
| "rl_loss": 0.0, | |
| "step": 8110 | |
| }, | |
| { | |
| "acc": 0.9145, | |
| "epoch": 9.82, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2029, | |
| "rl_loss": 0.0, | |
| "step": 8120 | |
| }, | |
| { | |
| "acc": 0.9176, | |
| "epoch": 9.83, | |
| "learning_rate": 5e-05, | |
| "loss": 0.1953, | |
| "rl_loss": 0.0, | |
| "step": 8130 | |
| }, | |
| { | |
| "acc": 0.9059, | |
| "epoch": 9.84, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2194, | |
| "rl_loss": 0.0, | |
| "step": 8140 | |
| }, | |
| { | |
| "acc": 0.9145, | |
| "epoch": 9.85, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2068, | |
| "rl_loss": 0.0, | |
| "step": 8150 | |
| }, | |
| { | |
| "acc": 0.8973, | |
| "epoch": 9.87, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2199, | |
| "rl_loss": 0.0, | |
| "step": 8160 | |
| }, | |
| { | |
| "acc": 0.8945, | |
| "epoch": 9.88, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2365, | |
| "rl_loss": 0.0, | |
| "step": 8170 | |
| }, | |
| { | |
| "acc": 0.9078, | |
| "epoch": 9.89, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2137, | |
| "rl_loss": 0.0, | |
| "step": 8180 | |
| }, | |
| { | |
| "acc": 0.9102, | |
| "epoch": 9.9, | |
| "learning_rate": 5e-05, | |
| "loss": 0.1982, | |
| "rl_loss": 0.0, | |
| "step": 8190 | |
| }, | |
| { | |
| "acc": 0.9051, | |
| "epoch": 9.91, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2191, | |
| "rl_loss": 0.0, | |
| "step": 8200 | |
| } | |
| ], | |
| "max_steps": 8270, | |
| "num_train_epochs": 10, | |
| "total_flos": 3.33669694189824e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |