| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.916396629941672, |
| "eval_steps": 500, |
| "global_step": 18000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0, |
| "learning_rate": 0.0002, |
| "loss": 3.3044, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 0.0002, |
| "loss": 2.9614, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 0.0002, |
| "loss": 2.8054, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 0.0002, |
| "loss": 2.5516, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 0.0002, |
| "loss": 2.8391, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 0.0002, |
| "loss": 2.4576, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.0002, |
| "loss": 2.6535, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.0002, |
| "loss": 2.4572, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.0002, |
| "loss": 2.6712, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.0002, |
| "loss": 2.3453, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.0002, |
| "loss": 2.5588, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 0.0002, |
| "loss": 2.441, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 0.0002, |
| "loss": 2.6621, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 0.0002, |
| "loss": 2.5328, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 0.0002, |
| "loss": 2.6159, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 0.0002, |
| "loss": 2.4162, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 0.0002, |
| "loss": 2.4839, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 0.0002, |
| "loss": 2.3451, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 0.0002, |
| "loss": 2.5323, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 0.0002, |
| "loss": 2.433, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 0.0002, |
| "loss": 2.5864, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 0.0002, |
| "loss": 2.3248, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 0.0002, |
| "loss": 2.6075, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 0.0002, |
| "loss": 2.3894, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 0.0002, |
| "loss": 2.6016, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 0.0002, |
| "loss": 2.3992, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 0.0002, |
| "loss": 2.4086, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 0.0002, |
| "loss": 2.303, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 0.0002, |
| "loss": 2.4355, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 0.0002, |
| "loss": 2.3693, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 0.0002, |
| "loss": 2.4441, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 0.0002, |
| "loss": 2.3237, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 0.0002, |
| "loss": 2.4797, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 0.0002, |
| "loss": 2.3291, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 0.0002, |
| "loss": 2.3765, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 0.0002, |
| "loss": 2.3485, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 0.0002, |
| "loss": 2.3761, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 0.0002, |
| "loss": 2.1423, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 0.0002, |
| "loss": 2.5231, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 0.0002, |
| "loss": 2.397, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 0.0002, |
| "loss": 2.384, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 0.0002, |
| "loss": 2.2133, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 0.0002, |
| "loss": 2.4448, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 0.0002, |
| "loss": 2.2569, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 0.0002, |
| "loss": 2.4679, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 0.0002, |
| "loss": 2.2591, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 0.0002, |
| "loss": 2.4579, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 0.0002, |
| "loss": 2.1727, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 0.0002, |
| "loss": 2.4947, |
| "step": 1225 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 0.0002, |
| "loss": 2.2964, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 0.0002, |
| "loss": 2.4288, |
| "step": 1275 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 0.0002, |
| "loss": 2.2271, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 0.0002, |
| "loss": 2.3102, |
| "step": 1325 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 0.0002, |
| "loss": 2.2645, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 0.0002, |
| "loss": 2.4434, |
| "step": 1375 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 0.0002, |
| "loss": 2.3105, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 0.0002, |
| "loss": 2.3998, |
| "step": 1425 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 0.0002, |
| "loss": 2.2101, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 0.0002, |
| "loss": 2.384, |
| "step": 1475 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 0.0002, |
| "loss": 2.3274, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 0.0002, |
| "loss": 2.5178, |
| "step": 1525 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 0.0002, |
| "loss": 2.2085, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 0.0002, |
| "loss": 2.3946, |
| "step": 1575 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 0.0002, |
| "loss": 2.233, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 0.0002, |
| "loss": 2.4607, |
| "step": 1625 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 0.0002, |
| "loss": 2.3017, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 0.0002, |
| "loss": 2.5035, |
| "step": 1675 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 0.0002, |
| "loss": 2.2364, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 0.0002, |
| "loss": 2.3528, |
| "step": 1725 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 0.0002, |
| "loss": 2.2548, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 0.0002, |
| "loss": 2.48, |
| "step": 1775 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 0.0002, |
| "loss": 2.2583, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 0.0002, |
| "loss": 2.3577, |
| "step": 1825 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 0.0002, |
| "loss": 2.3183, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 0.0002, |
| "loss": 2.4696, |
| "step": 1875 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 0.0002, |
| "loss": 2.2888, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 0.0002, |
| "loss": 2.4176, |
| "step": 1925 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 0.0002, |
| "loss": 2.2553, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 0.0002, |
| "loss": 2.4154, |
| "step": 1975 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 0.0002, |
| "loss": 2.1653, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 0.0002, |
| "loss": 2.5119, |
| "step": 2025 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 0.0002, |
| "loss": 2.244, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 0.0002, |
| "loss": 2.4014, |
| "step": 2075 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 0.0002, |
| "loss": 2.2161, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 0.0002, |
| "loss": 2.3874, |
| "step": 2125 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 0.0002, |
| "loss": 2.1541, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 0.0002, |
| "loss": 2.3145, |
| "step": 2175 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 0.0002, |
| "loss": 2.0975, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 0.0002, |
| "loss": 2.3086, |
| "step": 2225 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 0.0002, |
| "loss": 2.2297, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 0.0002, |
| "loss": 2.3859, |
| "step": 2275 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 0.0002, |
| "loss": 1.9693, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 0.0002, |
| "loss": 2.35, |
| "step": 2325 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 0.0002, |
| "loss": 2.0767, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 0.0002, |
| "loss": 2.4507, |
| "step": 2375 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 0.0002, |
| "loss": 2.1896, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 0.0002, |
| "loss": 2.4073, |
| "step": 2425 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 0.0002, |
| "loss": 2.2483, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 0.0002, |
| "loss": 2.4123, |
| "step": 2475 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 0.0002, |
| "loss": 2.0906, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 0.0002, |
| "loss": 2.3711, |
| "step": 2525 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 0.0002, |
| "loss": 2.2335, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 0.0002, |
| "loss": 2.3186, |
| "step": 2575 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 0.0002, |
| "loss": 2.1362, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 0.0002, |
| "loss": 2.355, |
| "step": 2625 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 0.0002, |
| "loss": 2.1693, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 0.0002, |
| "loss": 2.4107, |
| "step": 2675 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 0.0002, |
| "loss": 2.2098, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 0.0002, |
| "loss": 2.2907, |
| "step": 2725 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 0.0002, |
| "loss": 2.2335, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 0.0002, |
| "loss": 2.3959, |
| "step": 2775 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 0.0002, |
| "loss": 2.1394, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 0.0002, |
| "loss": 2.4236, |
| "step": 2825 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 0.0002, |
| "loss": 2.1658, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 0.0002, |
| "loss": 2.2197, |
| "step": 2875 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 0.0002, |
| "loss": 2.101, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 0.0002, |
| "loss": 2.3416, |
| "step": 2925 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 0.0002, |
| "loss": 2.0611, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 0.0002, |
| "loss": 2.3655, |
| "step": 2975 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 0.0002, |
| "loss": 2.1746, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 0.0002, |
| "loss": 2.3561, |
| "step": 3025 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 0.0002, |
| "loss": 2.0445, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 0.0002, |
| "loss": 2.4062, |
| "step": 3075 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 0.0002, |
| "loss": 2.2256, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 0.0002, |
| "loss": 2.3861, |
| "step": 3125 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 0.0002, |
| "loss": 2.1233, |
| "step": 3150 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 0.0002, |
| "loss": 2.3603, |
| "step": 3175 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 0.0002, |
| "loss": 2.1118, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 0.0002, |
| "loss": 2.3485, |
| "step": 3225 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 0.0002, |
| "loss": 2.0191, |
| "step": 3250 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 0.0002, |
| "loss": 2.2822, |
| "step": 3275 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 0.0002, |
| "loss": 2.2297, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 0.0002, |
| "loss": 2.375, |
| "step": 3325 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 0.0002, |
| "loss": 2.1859, |
| "step": 3350 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 0.0002, |
| "loss": 2.3411, |
| "step": 3375 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 0.0002, |
| "loss": 2.0084, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 0.0002, |
| "loss": 2.2536, |
| "step": 3425 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 0.0002, |
| "loss": 2.1425, |
| "step": 3450 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 0.0002, |
| "loss": 2.327, |
| "step": 3475 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 0.0002, |
| "loss": 2.1356, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 0.0002, |
| "loss": 2.2898, |
| "step": 3525 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 0.0002, |
| "loss": 2.2311, |
| "step": 3550 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 0.0002, |
| "loss": 2.3137, |
| "step": 3575 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 0.0002, |
| "loss": 2.1663, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 0.0002, |
| "loss": 2.3658, |
| "step": 3625 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 0.0002, |
| "loss": 2.2129, |
| "step": 3650 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 0.0002, |
| "loss": 2.4101, |
| "step": 3675 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 0.0002, |
| "loss": 1.9538, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 0.0002, |
| "loss": 2.4423, |
| "step": 3725 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 0.0002, |
| "loss": 2.1344, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 0.0002, |
| "loss": 2.3684, |
| "step": 3775 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 0.0002, |
| "loss": 2.0626, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 0.0002, |
| "loss": 2.3094, |
| "step": 3825 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 0.0002, |
| "loss": 1.9973, |
| "step": 3850 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 0.0002, |
| "loss": 2.3486, |
| "step": 3875 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 0.0002, |
| "loss": 2.1224, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 0.0002, |
| "loss": 2.2489, |
| "step": 3925 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 0.0002, |
| "loss": 2.0245, |
| "step": 3950 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 0.0002, |
| "loss": 2.3817, |
| "step": 3975 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 0.0002, |
| "loss": 2.1786, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 0.0002, |
| "loss": 2.3312, |
| "step": 4025 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 0.0002, |
| "loss": 2.0933, |
| "step": 4050 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 0.0002, |
| "loss": 2.4028, |
| "step": 4075 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 0.0002, |
| "loss": 2.0841, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 0.0002, |
| "loss": 2.2924, |
| "step": 4125 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 0.0002, |
| "loss": 2.0088, |
| "step": 4150 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 0.0002, |
| "loss": 2.3636, |
| "step": 4175 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 0.0002, |
| "loss": 2.1101, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 0.0002, |
| "loss": 2.3225, |
| "step": 4225 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 0.0002, |
| "loss": 2.1775, |
| "step": 4250 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 0.0002, |
| "loss": 2.3995, |
| "step": 4275 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 0.0002, |
| "loss": 2.0986, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 0.0002, |
| "loss": 2.3905, |
| "step": 4325 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 0.0002, |
| "loss": 2.0652, |
| "step": 4350 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 0.0002, |
| "loss": 2.421, |
| "step": 4375 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 0.0002, |
| "loss": 2.0402, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 0.0002, |
| "loss": 2.3154, |
| "step": 4425 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 0.0002, |
| "loss": 2.1875, |
| "step": 4450 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 0.0002, |
| "loss": 2.3481, |
| "step": 4475 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 0.0002, |
| "loss": 1.9964, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 0.0002, |
| "loss": 2.2526, |
| "step": 4525 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 0.0002, |
| "loss": 2.1539, |
| "step": 4550 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 0.0002, |
| "loss": 2.2843, |
| "step": 4575 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 0.0002, |
| "loss": 2.1451, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 0.0002, |
| "loss": 2.2955, |
| "step": 4625 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 0.0002, |
| "loss": 1.9855, |
| "step": 4650 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 0.0002, |
| "loss": 2.4539, |
| "step": 4675 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 0.0002, |
| "loss": 2.081, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 0.0002, |
| "loss": 2.3724, |
| "step": 4725 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 0.0002, |
| "loss": 1.9166, |
| "step": 4750 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 0.0002, |
| "loss": 2.2773, |
| "step": 4775 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 0.0002, |
| "loss": 2.0122, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 0.0002, |
| "loss": 2.2272, |
| "step": 4825 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 0.0002, |
| "loss": 2.079, |
| "step": 4850 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 0.0002, |
| "loss": 2.4039, |
| "step": 4875 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 0.0002, |
| "loss": 2.0559, |
| "step": 4900 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 0.0002, |
| "loss": 2.3187, |
| "step": 4925 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 0.0002, |
| "loss": 2.1956, |
| "step": 4950 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 0.0002, |
| "loss": 2.2611, |
| "step": 4975 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 0.0002, |
| "loss": 2.1821, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 0.0002, |
| "loss": 2.2348, |
| "step": 5025 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 0.0002, |
| "loss": 1.9541, |
| "step": 5050 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 0.0002, |
| "loss": 2.381, |
| "step": 5075 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 0.0002, |
| "loss": 2.0978, |
| "step": 5100 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 0.0002, |
| "loss": 2.3287, |
| "step": 5125 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 0.0002, |
| "loss": 2.1468, |
| "step": 5150 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 0.0002, |
| "loss": 2.3461, |
| "step": 5175 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 0.0002, |
| "loss": 2.1531, |
| "step": 5200 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 0.0002, |
| "loss": 2.3668, |
| "step": 5225 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 0.0002, |
| "loss": 2.0828, |
| "step": 5250 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 0.0002, |
| "loss": 2.3509, |
| "step": 5275 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 0.0002, |
| "loss": 2.0817, |
| "step": 5300 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 0.0002, |
| "loss": 2.3609, |
| "step": 5325 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 0.0002, |
| "loss": 2.1038, |
| "step": 5350 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 0.0002, |
| "loss": 2.1818, |
| "step": 5375 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 0.0002, |
| "loss": 2.0505, |
| "step": 5400 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 0.0002, |
| "loss": 2.224, |
| "step": 5425 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 0.0002, |
| "loss": 2.1014, |
| "step": 5450 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 0.0002, |
| "loss": 2.2959, |
| "step": 5475 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 0.0002, |
| "loss": 2.0824, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 0.0002, |
| "loss": 2.2384, |
| "step": 5525 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 0.0002, |
| "loss": 2.0479, |
| "step": 5550 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 0.0002, |
| "loss": 2.4386, |
| "step": 5575 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 0.0002, |
| "loss": 2.0333, |
| "step": 5600 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 0.0002, |
| "loss": 2.2645, |
| "step": 5625 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 0.0002, |
| "loss": 2.0613, |
| "step": 5650 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 0.0002, |
| "loss": 2.2632, |
| "step": 5675 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 0.0002, |
| "loss": 2.046, |
| "step": 5700 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 0.0002, |
| "loss": 2.2734, |
| "step": 5725 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 0.0002, |
| "loss": 2.1293, |
| "step": 5750 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 0.0002, |
| "loss": 2.3176, |
| "step": 5775 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 0.0002, |
| "loss": 2.0572, |
| "step": 5800 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 0.0002, |
| "loss": 2.3778, |
| "step": 5825 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 0.0002, |
| "loss": 2.1428, |
| "step": 5850 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 0.0002, |
| "loss": 2.2676, |
| "step": 5875 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 0.0002, |
| "loss": 2.1128, |
| "step": 5900 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 0.0002, |
| "loss": 2.2816, |
| "step": 5925 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 0.0002, |
| "loss": 2.1031, |
| "step": 5950 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 0.0002, |
| "loss": 2.2525, |
| "step": 5975 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 0.0002, |
| "loss": 2.0176, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 0.0002, |
| "loss": 2.3356, |
| "step": 6025 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 0.0002, |
| "loss": 2.0888, |
| "step": 6050 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 0.0002, |
| "loss": 2.2696, |
| "step": 6075 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 0.0002, |
| "loss": 2.0839, |
| "step": 6100 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 0.0002, |
| "loss": 2.1857, |
| "step": 6125 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 0.0002, |
| "loss": 1.9069, |
| "step": 6150 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 0.0002, |
| "loss": 2.1828, |
| "step": 6175 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 0.0002, |
| "loss": 2.2461, |
| "step": 6200 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 0.0002, |
| "loss": 2.0132, |
| "step": 6225 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 0.0002, |
| "loss": 2.1801, |
| "step": 6250 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 0.0002, |
| "loss": 1.951, |
| "step": 6275 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 0.0002, |
| "loss": 2.2283, |
| "step": 6300 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 0.0002, |
| "loss": 2.0281, |
| "step": 6325 |
| }, |
| { |
| "epoch": 1.03, |
| "learning_rate": 0.0002, |
| "loss": 2.1224, |
| "step": 6350 |
| }, |
| { |
| "epoch": 1.03, |
| "learning_rate": 0.0002, |
| "loss": 2.0129, |
| "step": 6375 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 0.0002, |
| "loss": 2.1152, |
| "step": 6400 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 0.0002, |
| "loss": 1.9273, |
| "step": 6425 |
| }, |
| { |
| "epoch": 1.05, |
| "learning_rate": 0.0002, |
| "loss": 2.1951, |
| "step": 6450 |
| }, |
| { |
| "epoch": 1.05, |
| "learning_rate": 0.0002, |
| "loss": 2.0412, |
| "step": 6475 |
| }, |
| { |
| "epoch": 1.05, |
| "learning_rate": 0.0002, |
| "loss": 2.2057, |
| "step": 6500 |
| }, |
| { |
| "epoch": 1.06, |
| "learning_rate": 0.0002, |
| "loss": 1.981, |
| "step": 6525 |
| }, |
| { |
| "epoch": 1.06, |
| "learning_rate": 0.0002, |
| "loss": 2.1747, |
| "step": 6550 |
| }, |
| { |
| "epoch": 1.07, |
| "learning_rate": 0.0002, |
| "loss": 2.1069, |
| "step": 6575 |
| }, |
| { |
| "epoch": 1.07, |
| "learning_rate": 0.0002, |
| "loss": 2.0922, |
| "step": 6600 |
| }, |
| { |
| "epoch": 1.07, |
| "learning_rate": 0.0002, |
| "loss": 2.0494, |
| "step": 6625 |
| }, |
| { |
| "epoch": 1.08, |
| "learning_rate": 0.0002, |
| "loss": 2.2018, |
| "step": 6650 |
| }, |
| { |
| "epoch": 1.08, |
| "learning_rate": 0.0002, |
| "loss": 1.9717, |
| "step": 6675 |
| }, |
| { |
| "epoch": 1.09, |
| "learning_rate": 0.0002, |
| "loss": 2.1925, |
| "step": 6700 |
| }, |
| { |
| "epoch": 1.09, |
| "learning_rate": 0.0002, |
| "loss": 1.9474, |
| "step": 6725 |
| }, |
| { |
| "epoch": 1.09, |
| "learning_rate": 0.0002, |
| "loss": 2.144, |
| "step": 6750 |
| }, |
| { |
| "epoch": 1.1, |
| "learning_rate": 0.0002, |
| "loss": 1.9665, |
| "step": 6775 |
| }, |
| { |
| "epoch": 1.1, |
| "learning_rate": 0.0002, |
| "loss": 2.1815, |
| "step": 6800 |
| }, |
| { |
| "epoch": 1.11, |
| "learning_rate": 0.0002, |
| "loss": 1.9634, |
| "step": 6825 |
| }, |
| { |
| "epoch": 1.11, |
| "learning_rate": 0.0002, |
| "loss": 2.1305, |
| "step": 6850 |
| }, |
| { |
| "epoch": 1.11, |
| "learning_rate": 0.0002, |
| "loss": 2.0749, |
| "step": 6875 |
| }, |
| { |
| "epoch": 1.12, |
| "learning_rate": 0.0002, |
| "loss": 2.1224, |
| "step": 6900 |
| }, |
| { |
| "epoch": 1.12, |
| "learning_rate": 0.0002, |
| "loss": 1.9251, |
| "step": 6925 |
| }, |
| { |
| "epoch": 1.13, |
| "learning_rate": 0.0002, |
| "loss": 2.129, |
| "step": 6950 |
| }, |
| { |
| "epoch": 1.13, |
| "learning_rate": 0.0002, |
| "loss": 2.0075, |
| "step": 6975 |
| }, |
| { |
| "epoch": 1.13, |
| "learning_rate": 0.0002, |
| "loss": 2.1379, |
| "step": 7000 |
| }, |
| { |
| "epoch": 1.14, |
| "learning_rate": 0.0002, |
| "loss": 1.933, |
| "step": 7025 |
| }, |
| { |
| "epoch": 1.14, |
| "learning_rate": 0.0002, |
| "loss": 2.1166, |
| "step": 7050 |
| }, |
| { |
| "epoch": 1.15, |
| "learning_rate": 0.0002, |
| "loss": 2.0678, |
| "step": 7075 |
| }, |
| { |
| "epoch": 1.15, |
| "learning_rate": 0.0002, |
| "loss": 2.27, |
| "step": 7100 |
| }, |
| { |
| "epoch": 1.15, |
| "learning_rate": 0.0002, |
| "loss": 2.0561, |
| "step": 7125 |
| }, |
| { |
| "epoch": 1.16, |
| "learning_rate": 0.0002, |
| "loss": 2.1881, |
| "step": 7150 |
| }, |
| { |
| "epoch": 1.16, |
| "learning_rate": 0.0002, |
| "loss": 2.09, |
| "step": 7175 |
| }, |
| { |
| "epoch": 1.17, |
| "learning_rate": 0.0002, |
| "loss": 2.1593, |
| "step": 7200 |
| }, |
| { |
| "epoch": 1.17, |
| "learning_rate": 0.0002, |
| "loss": 2.006, |
| "step": 7225 |
| }, |
| { |
| "epoch": 1.17, |
| "learning_rate": 0.0002, |
| "loss": 2.2042, |
| "step": 7250 |
| }, |
| { |
| "epoch": 1.18, |
| "learning_rate": 0.0002, |
| "loss": 2.0281, |
| "step": 7275 |
| }, |
| { |
| "epoch": 1.18, |
| "learning_rate": 0.0002, |
| "loss": 2.1054, |
| "step": 7300 |
| }, |
| { |
| "epoch": 1.19, |
| "learning_rate": 0.0002, |
| "loss": 1.8642, |
| "step": 7325 |
| }, |
| { |
| "epoch": 1.19, |
| "learning_rate": 0.0002, |
| "loss": 2.2682, |
| "step": 7350 |
| }, |
| { |
| "epoch": 1.19, |
| "learning_rate": 0.0002, |
| "loss": 1.9484, |
| "step": 7375 |
| }, |
| { |
| "epoch": 1.2, |
| "learning_rate": 0.0002, |
| "loss": 2.2109, |
| "step": 7400 |
| }, |
| { |
| "epoch": 1.2, |
| "learning_rate": 0.0002, |
| "loss": 2.0173, |
| "step": 7425 |
| }, |
| { |
| "epoch": 1.21, |
| "learning_rate": 0.0002, |
| "loss": 2.2798, |
| "step": 7450 |
| }, |
| { |
| "epoch": 1.21, |
| "learning_rate": 0.0002, |
| "loss": 1.994, |
| "step": 7475 |
| }, |
| { |
| "epoch": 1.22, |
| "learning_rate": 0.0002, |
| "loss": 2.2614, |
| "step": 7500 |
| }, |
| { |
| "epoch": 1.22, |
| "learning_rate": 0.0002, |
| "loss": 1.9984, |
| "step": 7525 |
| }, |
| { |
| "epoch": 1.22, |
| "learning_rate": 0.0002, |
| "loss": 2.2028, |
| "step": 7550 |
| }, |
| { |
| "epoch": 1.23, |
| "learning_rate": 0.0002, |
| "loss": 2.0623, |
| "step": 7575 |
| }, |
| { |
| "epoch": 1.23, |
| "learning_rate": 0.0002, |
| "loss": 2.1407, |
| "step": 7600 |
| }, |
| { |
| "epoch": 1.24, |
| "learning_rate": 0.0002, |
| "loss": 2.033, |
| "step": 7625 |
| }, |
| { |
| "epoch": 1.24, |
| "learning_rate": 0.0002, |
| "loss": 2.1987, |
| "step": 7650 |
| }, |
| { |
| "epoch": 1.24, |
| "learning_rate": 0.0002, |
| "loss": 1.9441, |
| "step": 7675 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 0.0002, |
| "loss": 2.2531, |
| "step": 7700 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 0.0002, |
| "loss": 2.0297, |
| "step": 7725 |
| }, |
| { |
| "epoch": 1.26, |
| "learning_rate": 0.0002, |
| "loss": 2.2639, |
| "step": 7750 |
| }, |
| { |
| "epoch": 1.26, |
| "learning_rate": 0.0002, |
| "loss": 2.0911, |
| "step": 7775 |
| }, |
| { |
| "epoch": 1.26, |
| "learning_rate": 0.0002, |
| "loss": 2.2469, |
| "step": 7800 |
| }, |
| { |
| "epoch": 1.27, |
| "learning_rate": 0.0002, |
| "loss": 1.9717, |
| "step": 7825 |
| }, |
| { |
| "epoch": 1.27, |
| "learning_rate": 0.0002, |
| "loss": 2.1949, |
| "step": 7850 |
| }, |
| { |
| "epoch": 1.28, |
| "learning_rate": 0.0002, |
| "loss": 1.9858, |
| "step": 7875 |
| }, |
| { |
| "epoch": 1.28, |
| "learning_rate": 0.0002, |
| "loss": 2.1441, |
| "step": 7900 |
| }, |
| { |
| "epoch": 1.28, |
| "learning_rate": 0.0002, |
| "loss": 1.9925, |
| "step": 7925 |
| }, |
| { |
| "epoch": 1.29, |
| "learning_rate": 0.0002, |
| "loss": 2.1327, |
| "step": 7950 |
| }, |
| { |
| "epoch": 1.29, |
| "learning_rate": 0.0002, |
| "loss": 1.8171, |
| "step": 7975 |
| }, |
| { |
| "epoch": 1.3, |
| "learning_rate": 0.0002, |
| "loss": 2.1612, |
| "step": 8000 |
| }, |
| { |
| "epoch": 1.3, |
| "learning_rate": 0.0002, |
| "loss": 1.982, |
| "step": 8025 |
| }, |
| { |
| "epoch": 1.3, |
| "learning_rate": 0.0002, |
| "loss": 2.1808, |
| "step": 8050 |
| }, |
| { |
| "epoch": 1.31, |
| "learning_rate": 0.0002, |
| "loss": 1.936, |
| "step": 8075 |
| }, |
| { |
| "epoch": 1.31, |
| "learning_rate": 0.0002, |
| "loss": 2.1597, |
| "step": 8100 |
| }, |
| { |
| "epoch": 1.32, |
| "learning_rate": 0.0002, |
| "loss": 1.8461, |
| "step": 8125 |
| }, |
| { |
| "epoch": 1.32, |
| "learning_rate": 0.0002, |
| "loss": 2.1309, |
| "step": 8150 |
| }, |
| { |
| "epoch": 1.32, |
| "learning_rate": 0.0002, |
| "loss": 2.1594, |
| "step": 8175 |
| }, |
| { |
| "epoch": 1.33, |
| "learning_rate": 0.0002, |
| "loss": 2.1131, |
| "step": 8200 |
| }, |
| { |
| "epoch": 1.33, |
| "learning_rate": 0.0002, |
| "loss": 1.8944, |
| "step": 8225 |
| }, |
| { |
| "epoch": 1.34, |
| "learning_rate": 0.0002, |
| "loss": 2.0845, |
| "step": 8250 |
| }, |
| { |
| "epoch": 1.34, |
| "learning_rate": 0.0002, |
| "loss": 2.0502, |
| "step": 8275 |
| }, |
| { |
| "epoch": 1.34, |
| "learning_rate": 0.0002, |
| "loss": 2.1475, |
| "step": 8300 |
| }, |
| { |
| "epoch": 1.35, |
| "learning_rate": 0.0002, |
| "loss": 1.9887, |
| "step": 8325 |
| }, |
| { |
| "epoch": 1.35, |
| "learning_rate": 0.0002, |
| "loss": 2.0891, |
| "step": 8350 |
| }, |
| { |
| "epoch": 1.36, |
| "learning_rate": 0.0002, |
| "loss": 1.9402, |
| "step": 8375 |
| }, |
| { |
| "epoch": 1.36, |
| "learning_rate": 0.0002, |
| "loss": 2.1112, |
| "step": 8400 |
| }, |
| { |
| "epoch": 1.37, |
| "learning_rate": 0.0002, |
| "loss": 2.061, |
| "step": 8425 |
| }, |
| { |
| "epoch": 1.37, |
| "learning_rate": 0.0002, |
| "loss": 2.2356, |
| "step": 8450 |
| }, |
| { |
| "epoch": 1.37, |
| "learning_rate": 0.0002, |
| "loss": 1.9425, |
| "step": 8475 |
| }, |
| { |
| "epoch": 1.38, |
| "learning_rate": 0.0002, |
| "loss": 2.2236, |
| "step": 8500 |
| }, |
| { |
| "epoch": 1.38, |
| "learning_rate": 0.0002, |
| "loss": 1.84, |
| "step": 8525 |
| }, |
| { |
| "epoch": 1.39, |
| "learning_rate": 0.0002, |
| "loss": 2.1553, |
| "step": 8550 |
| }, |
| { |
| "epoch": 1.39, |
| "learning_rate": 0.0002, |
| "loss": 1.8677, |
| "step": 8575 |
| }, |
| { |
| "epoch": 1.39, |
| "learning_rate": 0.0002, |
| "loss": 2.2828, |
| "step": 8600 |
| }, |
| { |
| "epoch": 1.4, |
| "learning_rate": 0.0002, |
| "loss": 1.907, |
| "step": 8625 |
| }, |
| { |
| "epoch": 1.4, |
| "learning_rate": 0.0002, |
| "loss": 2.1928, |
| "step": 8650 |
| }, |
| { |
| "epoch": 1.41, |
| "learning_rate": 0.0002, |
| "loss": 1.9845, |
| "step": 8675 |
| }, |
| { |
| "epoch": 1.41, |
| "learning_rate": 0.0002, |
| "loss": 2.1495, |
| "step": 8700 |
| }, |
| { |
| "epoch": 1.41, |
| "learning_rate": 0.0002, |
| "loss": 1.9496, |
| "step": 8725 |
| }, |
| { |
| "epoch": 1.42, |
| "learning_rate": 0.0002, |
| "loss": 2.2124, |
| "step": 8750 |
| }, |
| { |
| "epoch": 1.42, |
| "learning_rate": 0.0002, |
| "loss": 2.0276, |
| "step": 8775 |
| }, |
| { |
| "epoch": 1.43, |
| "learning_rate": 0.0002, |
| "loss": 2.1716, |
| "step": 8800 |
| }, |
| { |
| "epoch": 1.43, |
| "learning_rate": 0.0002, |
| "loss": 2.0762, |
| "step": 8825 |
| }, |
| { |
| "epoch": 1.43, |
| "learning_rate": 0.0002, |
| "loss": 2.1875, |
| "step": 8850 |
| }, |
| { |
| "epoch": 1.44, |
| "learning_rate": 0.0002, |
| "loss": 1.9567, |
| "step": 8875 |
| }, |
| { |
| "epoch": 1.44, |
| "learning_rate": 0.0002, |
| "loss": 2.1331, |
| "step": 8900 |
| }, |
| { |
| "epoch": 1.45, |
| "learning_rate": 0.0002, |
| "loss": 1.9842, |
| "step": 8925 |
| }, |
| { |
| "epoch": 1.45, |
| "learning_rate": 0.0002, |
| "loss": 2.1453, |
| "step": 8950 |
| }, |
| { |
| "epoch": 1.45, |
| "learning_rate": 0.0002, |
| "loss": 2.0681, |
| "step": 8975 |
| }, |
| { |
| "epoch": 1.46, |
| "learning_rate": 0.0002, |
| "loss": 2.1777, |
| "step": 9000 |
| }, |
| { |
| "epoch": 1.46, |
| "learning_rate": 0.0002, |
| "loss": 1.9691, |
| "step": 9025 |
| }, |
| { |
| "epoch": 1.47, |
| "learning_rate": 0.0002, |
| "loss": 2.195, |
| "step": 9050 |
| }, |
| { |
| "epoch": 1.47, |
| "learning_rate": 0.0002, |
| "loss": 1.9626, |
| "step": 9075 |
| }, |
| { |
| "epoch": 1.47, |
| "learning_rate": 0.0002, |
| "loss": 2.2002, |
| "step": 9100 |
| }, |
| { |
| "epoch": 1.48, |
| "learning_rate": 0.0002, |
| "loss": 1.9548, |
| "step": 9125 |
| }, |
| { |
| "epoch": 1.48, |
| "learning_rate": 0.0002, |
| "loss": 2.0627, |
| "step": 9150 |
| }, |
| { |
| "epoch": 1.49, |
| "learning_rate": 0.0002, |
| "loss": 1.8949, |
| "step": 9175 |
| }, |
| { |
| "epoch": 1.49, |
| "learning_rate": 0.0002, |
| "loss": 2.0806, |
| "step": 9200 |
| }, |
| { |
| "epoch": 1.49, |
| "learning_rate": 0.0002, |
| "loss": 1.945, |
| "step": 9225 |
| }, |
| { |
| "epoch": 1.5, |
| "learning_rate": 0.0002, |
| "loss": 2.2907, |
| "step": 9250 |
| }, |
| { |
| "epoch": 1.5, |
| "learning_rate": 0.0002, |
| "loss": 1.9685, |
| "step": 9275 |
| }, |
| { |
| "epoch": 1.51, |
| "learning_rate": 0.0002, |
| "loss": 2.1251, |
| "step": 9300 |
| }, |
| { |
| "epoch": 1.51, |
| "learning_rate": 0.0002, |
| "loss": 1.9426, |
| "step": 9325 |
| }, |
| { |
| "epoch": 1.51, |
| "learning_rate": 0.0002, |
| "loss": 2.1402, |
| "step": 9350 |
| }, |
| { |
| "epoch": 1.52, |
| "learning_rate": 0.0002, |
| "loss": 1.9268, |
| "step": 9375 |
| }, |
| { |
| "epoch": 1.52, |
| "learning_rate": 0.0002, |
| "loss": 2.1373, |
| "step": 9400 |
| }, |
| { |
| "epoch": 1.53, |
| "learning_rate": 0.0002, |
| "loss": 1.9606, |
| "step": 9425 |
| }, |
| { |
| "epoch": 1.53, |
| "learning_rate": 0.0002, |
| "loss": 2.2019, |
| "step": 9450 |
| }, |
| { |
| "epoch": 1.54, |
| "learning_rate": 0.0002, |
| "loss": 1.9014, |
| "step": 9475 |
| }, |
| { |
| "epoch": 1.54, |
| "learning_rate": 0.0002, |
| "loss": 2.071, |
| "step": 9500 |
| }, |
| { |
| "epoch": 1.54, |
| "learning_rate": 0.0002, |
| "loss": 1.9199, |
| "step": 9525 |
| }, |
| { |
| "epoch": 1.55, |
| "learning_rate": 0.0002, |
| "loss": 2.224, |
| "step": 9550 |
| }, |
| { |
| "epoch": 1.55, |
| "learning_rate": 0.0002, |
| "loss": 2.0849, |
| "step": 9575 |
| }, |
| { |
| "epoch": 1.56, |
| "learning_rate": 0.0002, |
| "loss": 2.0944, |
| "step": 9600 |
| }, |
| { |
| "epoch": 1.56, |
| "learning_rate": 0.0002, |
| "loss": 1.9544, |
| "step": 9625 |
| }, |
| { |
| "epoch": 1.56, |
| "learning_rate": 0.0002, |
| "loss": 2.24, |
| "step": 9650 |
| }, |
| { |
| "epoch": 1.57, |
| "learning_rate": 0.0002, |
| "loss": 2.0462, |
| "step": 9675 |
| }, |
| { |
| "epoch": 1.57, |
| "learning_rate": 0.0002, |
| "loss": 2.09, |
| "step": 9700 |
| }, |
| { |
| "epoch": 1.58, |
| "learning_rate": 0.0002, |
| "loss": 2.1015, |
| "step": 9725 |
| }, |
| { |
| "epoch": 1.58, |
| "learning_rate": 0.0002, |
| "loss": 2.205, |
| "step": 9750 |
| }, |
| { |
| "epoch": 1.58, |
| "learning_rate": 0.0002, |
| "loss": 1.9051, |
| "step": 9775 |
| }, |
| { |
| "epoch": 1.59, |
| "learning_rate": 0.0002, |
| "loss": 2.1992, |
| "step": 9800 |
| }, |
| { |
| "epoch": 1.59, |
| "learning_rate": 0.0002, |
| "loss": 1.9637, |
| "step": 9825 |
| }, |
| { |
| "epoch": 1.6, |
| "learning_rate": 0.0002, |
| "loss": 2.1629, |
| "step": 9850 |
| }, |
| { |
| "epoch": 1.6, |
| "learning_rate": 0.0002, |
| "loss": 1.9888, |
| "step": 9875 |
| }, |
| { |
| "epoch": 1.6, |
| "learning_rate": 0.0002, |
| "loss": 2.0987, |
| "step": 9900 |
| }, |
| { |
| "epoch": 1.61, |
| "learning_rate": 0.0002, |
| "loss": 1.982, |
| "step": 9925 |
| }, |
| { |
| "epoch": 1.61, |
| "learning_rate": 0.0002, |
| "loss": 2.0717, |
| "step": 9950 |
| }, |
| { |
| "epoch": 1.62, |
| "learning_rate": 0.0002, |
| "loss": 1.9921, |
| "step": 9975 |
| }, |
| { |
| "epoch": 1.62, |
| "learning_rate": 0.0002, |
| "loss": 2.1405, |
| "step": 10000 |
| }, |
| { |
| "epoch": 1.62, |
| "learning_rate": 0.0002, |
| "loss": 1.8981, |
| "step": 10025 |
| }, |
| { |
| "epoch": 1.63, |
| "learning_rate": 0.0002, |
| "loss": 2.1338, |
| "step": 10050 |
| }, |
| { |
| "epoch": 1.63, |
| "learning_rate": 0.0002, |
| "loss": 1.8935, |
| "step": 10075 |
| }, |
| { |
| "epoch": 1.64, |
| "learning_rate": 0.0002, |
| "loss": 2.12, |
| "step": 10100 |
| }, |
| { |
| "epoch": 1.64, |
| "learning_rate": 0.0002, |
| "loss": 1.9227, |
| "step": 10125 |
| }, |
| { |
| "epoch": 1.64, |
| "learning_rate": 0.0002, |
| "loss": 2.2621, |
| "step": 10150 |
| }, |
| { |
| "epoch": 1.65, |
| "learning_rate": 0.0002, |
| "loss": 2.0911, |
| "step": 10175 |
| }, |
| { |
| "epoch": 1.65, |
| "learning_rate": 0.0002, |
| "loss": 2.1278, |
| "step": 10200 |
| }, |
| { |
| "epoch": 1.66, |
| "learning_rate": 0.0002, |
| "loss": 2.0571, |
| "step": 10225 |
| }, |
| { |
| "epoch": 1.66, |
| "learning_rate": 0.0002, |
| "loss": 2.1462, |
| "step": 10250 |
| }, |
| { |
| "epoch": 1.66, |
| "learning_rate": 0.0002, |
| "loss": 1.9058, |
| "step": 10275 |
| }, |
| { |
| "epoch": 1.67, |
| "learning_rate": 0.0002, |
| "loss": 2.1154, |
| "step": 10300 |
| }, |
| { |
| "epoch": 1.67, |
| "learning_rate": 0.0002, |
| "loss": 1.9732, |
| "step": 10325 |
| }, |
| { |
| "epoch": 1.68, |
| "learning_rate": 0.0002, |
| "loss": 2.2269, |
| "step": 10350 |
| }, |
| { |
| "epoch": 1.68, |
| "learning_rate": 0.0002, |
| "loss": 1.976, |
| "step": 10375 |
| }, |
| { |
| "epoch": 1.69, |
| "learning_rate": 0.0002, |
| "loss": 2.2662, |
| "step": 10400 |
| }, |
| { |
| "epoch": 1.69, |
| "learning_rate": 0.0002, |
| "loss": 1.9424, |
| "step": 10425 |
| }, |
| { |
| "epoch": 1.69, |
| "learning_rate": 0.0002, |
| "loss": 2.1602, |
| "step": 10450 |
| }, |
| { |
| "epoch": 1.7, |
| "learning_rate": 0.0002, |
| "loss": 1.9668, |
| "step": 10475 |
| }, |
| { |
| "epoch": 1.7, |
| "learning_rate": 0.0002, |
| "loss": 2.1355, |
| "step": 10500 |
| }, |
| { |
| "epoch": 1.71, |
| "learning_rate": 0.0002, |
| "loss": 2.0549, |
| "step": 10525 |
| }, |
| { |
| "epoch": 1.71, |
| "learning_rate": 0.0002, |
| "loss": 2.1387, |
| "step": 10550 |
| }, |
| { |
| "epoch": 1.71, |
| "learning_rate": 0.0002, |
| "loss": 2.0052, |
| "step": 10575 |
| }, |
| { |
| "epoch": 1.72, |
| "learning_rate": 0.0002, |
| "loss": 2.142, |
| "step": 10600 |
| }, |
| { |
| "epoch": 1.72, |
| "learning_rate": 0.0002, |
| "loss": 1.9063, |
| "step": 10625 |
| }, |
| { |
| "epoch": 1.73, |
| "learning_rate": 0.0002, |
| "loss": 2.1204, |
| "step": 10650 |
| }, |
| { |
| "epoch": 1.73, |
| "learning_rate": 0.0002, |
| "loss": 1.8533, |
| "step": 10675 |
| }, |
| { |
| "epoch": 1.73, |
| "learning_rate": 0.0002, |
| "loss": 2.0855, |
| "step": 10700 |
| }, |
| { |
| "epoch": 1.74, |
| "learning_rate": 0.0002, |
| "loss": 2.0132, |
| "step": 10725 |
| }, |
| { |
| "epoch": 1.74, |
| "learning_rate": 0.0002, |
| "loss": 2.1836, |
| "step": 10750 |
| }, |
| { |
| "epoch": 1.75, |
| "learning_rate": 0.0002, |
| "loss": 2.0041, |
| "step": 10775 |
| }, |
| { |
| "epoch": 1.75, |
| "learning_rate": 0.0002, |
| "loss": 2.1063, |
| "step": 10800 |
| }, |
| { |
| "epoch": 1.75, |
| "learning_rate": 0.0002, |
| "loss": 1.9192, |
| "step": 10825 |
| }, |
| { |
| "epoch": 1.76, |
| "learning_rate": 0.0002, |
| "loss": 2.2027, |
| "step": 10850 |
| }, |
| { |
| "epoch": 1.76, |
| "learning_rate": 0.0002, |
| "loss": 1.8827, |
| "step": 10875 |
| }, |
| { |
| "epoch": 1.77, |
| "learning_rate": 0.0002, |
| "loss": 2.1606, |
| "step": 10900 |
| }, |
| { |
| "epoch": 1.77, |
| "learning_rate": 0.0002, |
| "loss": 2.0008, |
| "step": 10925 |
| }, |
| { |
| "epoch": 1.77, |
| "learning_rate": 0.0002, |
| "loss": 2.1251, |
| "step": 10950 |
| }, |
| { |
| "epoch": 1.78, |
| "learning_rate": 0.0002, |
| "loss": 1.9729, |
| "step": 10975 |
| }, |
| { |
| "epoch": 1.78, |
| "learning_rate": 0.0002, |
| "loss": 2.2528, |
| "step": 11000 |
| }, |
| { |
| "epoch": 1.79, |
| "learning_rate": 0.0002, |
| "loss": 1.9996, |
| "step": 11025 |
| }, |
| { |
| "epoch": 1.79, |
| "learning_rate": 0.0002, |
| "loss": 2.2393, |
| "step": 11050 |
| }, |
| { |
| "epoch": 1.79, |
| "learning_rate": 0.0002, |
| "loss": 1.9691, |
| "step": 11075 |
| }, |
| { |
| "epoch": 1.8, |
| "learning_rate": 0.0002, |
| "loss": 2.0502, |
| "step": 11100 |
| }, |
| { |
| "epoch": 1.8, |
| "learning_rate": 0.0002, |
| "loss": 1.9065, |
| "step": 11125 |
| }, |
| { |
| "epoch": 1.81, |
| "learning_rate": 0.0002, |
| "loss": 2.1418, |
| "step": 11150 |
| }, |
| { |
| "epoch": 1.81, |
| "learning_rate": 0.0002, |
| "loss": 1.964, |
| "step": 11175 |
| }, |
| { |
| "epoch": 1.81, |
| "learning_rate": 0.0002, |
| "loss": 2.1955, |
| "step": 11200 |
| }, |
| { |
| "epoch": 1.82, |
| "learning_rate": 0.0002, |
| "loss": 1.9258, |
| "step": 11225 |
| }, |
| { |
| "epoch": 1.82, |
| "learning_rate": 0.0002, |
| "loss": 2.1197, |
| "step": 11250 |
| }, |
| { |
| "epoch": 1.83, |
| "learning_rate": 0.0002, |
| "loss": 1.9796, |
| "step": 11275 |
| }, |
| { |
| "epoch": 1.83, |
| "learning_rate": 0.0002, |
| "loss": 2.1744, |
| "step": 11300 |
| }, |
| { |
| "epoch": 1.83, |
| "learning_rate": 0.0002, |
| "loss": 2.0322, |
| "step": 11325 |
| }, |
| { |
| "epoch": 1.84, |
| "learning_rate": 0.0002, |
| "loss": 2.1835, |
| "step": 11350 |
| }, |
| { |
| "epoch": 1.84, |
| "learning_rate": 0.0002, |
| "loss": 1.8594, |
| "step": 11375 |
| }, |
| { |
| "epoch": 1.85, |
| "learning_rate": 0.0002, |
| "loss": 2.1748, |
| "step": 11400 |
| }, |
| { |
| "epoch": 1.85, |
| "learning_rate": 0.0002, |
| "loss": 1.9016, |
| "step": 11425 |
| }, |
| { |
| "epoch": 1.86, |
| "learning_rate": 0.0002, |
| "loss": 2.1567, |
| "step": 11450 |
| }, |
| { |
| "epoch": 1.86, |
| "learning_rate": 0.0002, |
| "loss": 1.9872, |
| "step": 11475 |
| }, |
| { |
| "epoch": 1.86, |
| "learning_rate": 0.0002, |
| "loss": 2.1728, |
| "step": 11500 |
| }, |
| { |
| "epoch": 1.87, |
| "learning_rate": 0.0002, |
| "loss": 1.9501, |
| "step": 11525 |
| }, |
| { |
| "epoch": 1.87, |
| "learning_rate": 0.0002, |
| "loss": 2.1301, |
| "step": 11550 |
| }, |
| { |
| "epoch": 1.88, |
| "learning_rate": 0.0002, |
| "loss": 1.9502, |
| "step": 11575 |
| }, |
| { |
| "epoch": 1.88, |
| "learning_rate": 0.0002, |
| "loss": 2.2177, |
| "step": 11600 |
| }, |
| { |
| "epoch": 1.88, |
| "learning_rate": 0.0002, |
| "loss": 1.8511, |
| "step": 11625 |
| }, |
| { |
| "epoch": 1.89, |
| "learning_rate": 0.0002, |
| "loss": 2.1206, |
| "step": 11650 |
| }, |
| { |
| "epoch": 1.89, |
| "learning_rate": 0.0002, |
| "loss": 1.9794, |
| "step": 11675 |
| }, |
| { |
| "epoch": 1.9, |
| "learning_rate": 0.0002, |
| "loss": 2.1558, |
| "step": 11700 |
| }, |
| { |
| "epoch": 1.9, |
| "learning_rate": 0.0002, |
| "loss": 2.0251, |
| "step": 11725 |
| }, |
| { |
| "epoch": 1.9, |
| "learning_rate": 0.0002, |
| "loss": 2.0404, |
| "step": 11750 |
| }, |
| { |
| "epoch": 1.91, |
| "learning_rate": 0.0002, |
| "loss": 1.98, |
| "step": 11775 |
| }, |
| { |
| "epoch": 1.91, |
| "learning_rate": 0.0002, |
| "loss": 2.1388, |
| "step": 11800 |
| }, |
| { |
| "epoch": 1.92, |
| "learning_rate": 0.0002, |
| "loss": 1.9228, |
| "step": 11825 |
| }, |
| { |
| "epoch": 1.92, |
| "learning_rate": 0.0002, |
| "loss": 2.101, |
| "step": 11850 |
| }, |
| { |
| "epoch": 1.92, |
| "learning_rate": 0.0002, |
| "loss": 2.0433, |
| "step": 11875 |
| }, |
| { |
| "epoch": 1.93, |
| "learning_rate": 0.0002, |
| "loss": 2.0715, |
| "step": 11900 |
| }, |
| { |
| "epoch": 1.93, |
| "learning_rate": 0.0002, |
| "loss": 2.0914, |
| "step": 11925 |
| }, |
| { |
| "epoch": 1.94, |
| "learning_rate": 0.0002, |
| "loss": 2.1653, |
| "step": 11950 |
| }, |
| { |
| "epoch": 1.94, |
| "learning_rate": 0.0002, |
| "loss": 1.868, |
| "step": 11975 |
| }, |
| { |
| "epoch": 1.94, |
| "learning_rate": 0.0002, |
| "loss": 2.1383, |
| "step": 12000 |
| }, |
| { |
| "epoch": 1.95, |
| "learning_rate": 0.0002, |
| "loss": 2.046, |
| "step": 12025 |
| }, |
| { |
| "epoch": 1.95, |
| "learning_rate": 0.0002, |
| "loss": 2.1616, |
| "step": 12050 |
| }, |
| { |
| "epoch": 1.96, |
| "learning_rate": 0.0002, |
| "loss": 1.9724, |
| "step": 12075 |
| }, |
| { |
| "epoch": 1.96, |
| "learning_rate": 0.0002, |
| "loss": 2.119, |
| "step": 12100 |
| }, |
| { |
| "epoch": 1.96, |
| "learning_rate": 0.0002, |
| "loss": 2.0735, |
| "step": 12125 |
| }, |
| { |
| "epoch": 1.97, |
| "learning_rate": 0.0002, |
| "loss": 2.0902, |
| "step": 12150 |
| }, |
| { |
| "epoch": 1.97, |
| "learning_rate": 0.0002, |
| "loss": 1.8635, |
| "step": 12175 |
| }, |
| { |
| "epoch": 1.98, |
| "learning_rate": 0.0002, |
| "loss": 2.1361, |
| "step": 12200 |
| }, |
| { |
| "epoch": 1.98, |
| "learning_rate": 0.0002, |
| "loss": 2.0382, |
| "step": 12225 |
| }, |
| { |
| "epoch": 1.98, |
| "learning_rate": 0.0002, |
| "loss": 2.1705, |
| "step": 12250 |
| }, |
| { |
| "epoch": 1.99, |
| "learning_rate": 0.0002, |
| "loss": 1.9679, |
| "step": 12275 |
| }, |
| { |
| "epoch": 1.99, |
| "learning_rate": 0.0002, |
| "loss": 2.1322, |
| "step": 12300 |
| }, |
| { |
| "epoch": 2.0, |
| "learning_rate": 0.0002, |
| "loss": 1.951, |
| "step": 12325 |
| }, |
| { |
| "epoch": 2.0, |
| "learning_rate": 0.0002, |
| "loss": 2.0835, |
| "step": 12350 |
| }, |
| { |
| "epoch": 2.01, |
| "learning_rate": 0.0002, |
| "loss": 1.966, |
| "step": 12375 |
| }, |
| { |
| "epoch": 2.01, |
| "learning_rate": 0.0002, |
| "loss": 1.8028, |
| "step": 12400 |
| }, |
| { |
| "epoch": 2.01, |
| "learning_rate": 0.0002, |
| "loss": 2.0308, |
| "step": 12425 |
| }, |
| { |
| "epoch": 2.02, |
| "learning_rate": 0.0002, |
| "loss": 1.8125, |
| "step": 12450 |
| }, |
| { |
| "epoch": 2.02, |
| "learning_rate": 0.0002, |
| "loss": 2.1039, |
| "step": 12475 |
| }, |
| { |
| "epoch": 2.03, |
| "learning_rate": 0.0002, |
| "loss": 1.8705, |
| "step": 12500 |
| }, |
| { |
| "epoch": 2.03, |
| "learning_rate": 0.0002, |
| "loss": 1.9316, |
| "step": 12525 |
| }, |
| { |
| "epoch": 2.03, |
| "learning_rate": 0.0002, |
| "loss": 1.8444, |
| "step": 12550 |
| }, |
| { |
| "epoch": 2.04, |
| "learning_rate": 0.0002, |
| "loss": 2.036, |
| "step": 12575 |
| }, |
| { |
| "epoch": 2.04, |
| "learning_rate": 0.0002, |
| "loss": 1.7983, |
| "step": 12600 |
| }, |
| { |
| "epoch": 2.05, |
| "learning_rate": 0.0002, |
| "loss": 1.9918, |
| "step": 12625 |
| }, |
| { |
| "epoch": 2.05, |
| "learning_rate": 0.0002, |
| "loss": 1.8069, |
| "step": 12650 |
| }, |
| { |
| "epoch": 2.05, |
| "learning_rate": 0.0002, |
| "loss": 2.0905, |
| "step": 12675 |
| }, |
| { |
| "epoch": 2.06, |
| "learning_rate": 0.0002, |
| "loss": 1.7724, |
| "step": 12700 |
| }, |
| { |
| "epoch": 2.06, |
| "learning_rate": 0.0002, |
| "loss": 1.9975, |
| "step": 12725 |
| }, |
| { |
| "epoch": 2.07, |
| "learning_rate": 0.0002, |
| "loss": 1.8409, |
| "step": 12750 |
| }, |
| { |
| "epoch": 2.07, |
| "learning_rate": 0.0002, |
| "loss": 1.9692, |
| "step": 12775 |
| }, |
| { |
| "epoch": 2.07, |
| "learning_rate": 0.0002, |
| "loss": 1.7994, |
| "step": 12800 |
| }, |
| { |
| "epoch": 2.08, |
| "learning_rate": 0.0002, |
| "loss": 1.9763, |
| "step": 12825 |
| }, |
| { |
| "epoch": 2.08, |
| "learning_rate": 0.0002, |
| "loss": 1.8558, |
| "step": 12850 |
| }, |
| { |
| "epoch": 2.09, |
| "learning_rate": 0.0002, |
| "loss": 1.9636, |
| "step": 12875 |
| }, |
| { |
| "epoch": 2.09, |
| "learning_rate": 0.0002, |
| "loss": 1.8986, |
| "step": 12900 |
| }, |
| { |
| "epoch": 2.09, |
| "learning_rate": 0.0002, |
| "loss": 1.9416, |
| "step": 12925 |
| }, |
| { |
| "epoch": 2.1, |
| "learning_rate": 0.0002, |
| "loss": 1.8119, |
| "step": 12950 |
| }, |
| { |
| "epoch": 2.1, |
| "learning_rate": 0.0002, |
| "loss": 2.0878, |
| "step": 12975 |
| }, |
| { |
| "epoch": 2.11, |
| "learning_rate": 0.0002, |
| "loss": 1.8937, |
| "step": 13000 |
| }, |
| { |
| "epoch": 2.11, |
| "learning_rate": 0.0002, |
| "loss": 2.0047, |
| "step": 13025 |
| }, |
| { |
| "epoch": 2.11, |
| "learning_rate": 0.0002, |
| "loss": 1.8724, |
| "step": 13050 |
| }, |
| { |
| "epoch": 2.12, |
| "learning_rate": 0.0002, |
| "loss": 2.0153, |
| "step": 13075 |
| }, |
| { |
| "epoch": 2.12, |
| "learning_rate": 0.0002, |
| "loss": 1.8569, |
| "step": 13100 |
| }, |
| { |
| "epoch": 2.13, |
| "learning_rate": 0.0002, |
| "loss": 1.9678, |
| "step": 13125 |
| }, |
| { |
| "epoch": 2.13, |
| "learning_rate": 0.0002, |
| "loss": 1.8821, |
| "step": 13150 |
| }, |
| { |
| "epoch": 2.13, |
| "learning_rate": 0.0002, |
| "loss": 2.0174, |
| "step": 13175 |
| }, |
| { |
| "epoch": 2.14, |
| "learning_rate": 0.0002, |
| "loss": 1.9131, |
| "step": 13200 |
| }, |
| { |
| "epoch": 2.14, |
| "learning_rate": 0.0002, |
| "loss": 2.007, |
| "step": 13225 |
| }, |
| { |
| "epoch": 2.15, |
| "learning_rate": 0.0002, |
| "loss": 1.8461, |
| "step": 13250 |
| }, |
| { |
| "epoch": 2.15, |
| "learning_rate": 0.0002, |
| "loss": 1.9892, |
| "step": 13275 |
| }, |
| { |
| "epoch": 2.15, |
| "learning_rate": 0.0002, |
| "loss": 1.9311, |
| "step": 13300 |
| }, |
| { |
| "epoch": 2.16, |
| "learning_rate": 0.0002, |
| "loss": 1.9571, |
| "step": 13325 |
| }, |
| { |
| "epoch": 2.16, |
| "learning_rate": 0.0002, |
| "loss": 1.8195, |
| "step": 13350 |
| }, |
| { |
| "epoch": 2.17, |
| "learning_rate": 0.0002, |
| "loss": 1.9952, |
| "step": 13375 |
| }, |
| { |
| "epoch": 2.17, |
| "learning_rate": 0.0002, |
| "loss": 1.9007, |
| "step": 13400 |
| }, |
| { |
| "epoch": 2.18, |
| "learning_rate": 0.0002, |
| "loss": 1.9624, |
| "step": 13425 |
| }, |
| { |
| "epoch": 2.18, |
| "learning_rate": 0.0002, |
| "loss": 1.9803, |
| "step": 13450 |
| }, |
| { |
| "epoch": 2.18, |
| "learning_rate": 0.0002, |
| "loss": 1.976, |
| "step": 13475 |
| }, |
| { |
| "epoch": 2.19, |
| "learning_rate": 0.0002, |
| "loss": 1.9023, |
| "step": 13500 |
| }, |
| { |
| "epoch": 2.19, |
| "learning_rate": 0.0002, |
| "loss": 1.9851, |
| "step": 13525 |
| }, |
| { |
| "epoch": 2.2, |
| "learning_rate": 0.0002, |
| "loss": 1.8301, |
| "step": 13550 |
| }, |
| { |
| "epoch": 2.2, |
| "learning_rate": 0.0002, |
| "loss": 2.0179, |
| "step": 13575 |
| }, |
| { |
| "epoch": 2.2, |
| "learning_rate": 0.0002, |
| "loss": 1.9377, |
| "step": 13600 |
| }, |
| { |
| "epoch": 2.21, |
| "learning_rate": 0.0002, |
| "loss": 2.0117, |
| "step": 13625 |
| }, |
| { |
| "epoch": 2.21, |
| "learning_rate": 0.0002, |
| "loss": 1.8109, |
| "step": 13650 |
| }, |
| { |
| "epoch": 2.22, |
| "learning_rate": 0.0002, |
| "loss": 2.0088, |
| "step": 13675 |
| }, |
| { |
| "epoch": 2.22, |
| "learning_rate": 0.0002, |
| "loss": 1.8731, |
| "step": 13700 |
| }, |
| { |
| "epoch": 2.22, |
| "learning_rate": 0.0002, |
| "loss": 1.9932, |
| "step": 13725 |
| }, |
| { |
| "epoch": 2.23, |
| "learning_rate": 0.0002, |
| "loss": 1.9334, |
| "step": 13750 |
| }, |
| { |
| "epoch": 2.23, |
| "learning_rate": 0.0002, |
| "loss": 2.0354, |
| "step": 13775 |
| }, |
| { |
| "epoch": 2.24, |
| "learning_rate": 0.0002, |
| "loss": 1.8924, |
| "step": 13800 |
| }, |
| { |
| "epoch": 2.24, |
| "learning_rate": 0.0002, |
| "loss": 2.0722, |
| "step": 13825 |
| }, |
| { |
| "epoch": 2.24, |
| "learning_rate": 0.0002, |
| "loss": 1.9154, |
| "step": 13850 |
| }, |
| { |
| "epoch": 2.25, |
| "learning_rate": 0.0002, |
| "loss": 2.0131, |
| "step": 13875 |
| }, |
| { |
| "epoch": 2.25, |
| "learning_rate": 0.0002, |
| "loss": 1.874, |
| "step": 13900 |
| }, |
| { |
| "epoch": 2.26, |
| "learning_rate": 0.0002, |
| "loss": 2.0444, |
| "step": 13925 |
| }, |
| { |
| "epoch": 2.26, |
| "learning_rate": 0.0002, |
| "loss": 1.8059, |
| "step": 13950 |
| }, |
| { |
| "epoch": 2.26, |
| "learning_rate": 0.0002, |
| "loss": 1.9809, |
| "step": 13975 |
| }, |
| { |
| "epoch": 2.27, |
| "learning_rate": 0.0002, |
| "loss": 2.0035, |
| "step": 14000 |
| }, |
| { |
| "epoch": 2.27, |
| "learning_rate": 0.0002, |
| "loss": 1.9896, |
| "step": 14025 |
| }, |
| { |
| "epoch": 2.28, |
| "learning_rate": 0.0002, |
| "loss": 1.796, |
| "step": 14050 |
| }, |
| { |
| "epoch": 2.28, |
| "learning_rate": 0.0002, |
| "loss": 2.0247, |
| "step": 14075 |
| }, |
| { |
| "epoch": 2.28, |
| "learning_rate": 0.0002, |
| "loss": 1.8983, |
| "step": 14100 |
| }, |
| { |
| "epoch": 2.29, |
| "learning_rate": 0.0002, |
| "loss": 1.9623, |
| "step": 14125 |
| }, |
| { |
| "epoch": 2.29, |
| "learning_rate": 0.0002, |
| "loss": 1.8984, |
| "step": 14150 |
| }, |
| { |
| "epoch": 2.3, |
| "learning_rate": 0.0002, |
| "loss": 2.0151, |
| "step": 14175 |
| }, |
| { |
| "epoch": 2.3, |
| "learning_rate": 0.0002, |
| "loss": 1.8974, |
| "step": 14200 |
| }, |
| { |
| "epoch": 2.3, |
| "learning_rate": 0.0002, |
| "loss": 2.0638, |
| "step": 14225 |
| }, |
| { |
| "epoch": 2.31, |
| "learning_rate": 0.0002, |
| "loss": 1.8475, |
| "step": 14250 |
| }, |
| { |
| "epoch": 2.31, |
| "learning_rate": 0.0002, |
| "loss": 2.0365, |
| "step": 14275 |
| }, |
| { |
| "epoch": 2.32, |
| "learning_rate": 0.0002, |
| "loss": 1.8248, |
| "step": 14300 |
| }, |
| { |
| "epoch": 2.32, |
| "learning_rate": 0.0002, |
| "loss": 2.0305, |
| "step": 14325 |
| }, |
| { |
| "epoch": 2.33, |
| "learning_rate": 0.0002, |
| "loss": 1.8457, |
| "step": 14350 |
| }, |
| { |
| "epoch": 2.33, |
| "learning_rate": 0.0002, |
| "loss": 2.007, |
| "step": 14375 |
| }, |
| { |
| "epoch": 2.33, |
| "learning_rate": 0.0002, |
| "loss": 1.9849, |
| "step": 14400 |
| }, |
| { |
| "epoch": 2.34, |
| "learning_rate": 0.0002, |
| "loss": 2.0349, |
| "step": 14425 |
| }, |
| { |
| "epoch": 2.34, |
| "learning_rate": 0.0002, |
| "loss": 1.798, |
| "step": 14450 |
| }, |
| { |
| "epoch": 2.35, |
| "learning_rate": 0.0002, |
| "loss": 1.9238, |
| "step": 14475 |
| }, |
| { |
| "epoch": 2.35, |
| "learning_rate": 0.0002, |
| "loss": 1.8017, |
| "step": 14500 |
| }, |
| { |
| "epoch": 2.35, |
| "learning_rate": 0.0002, |
| "loss": 2.154, |
| "step": 14525 |
| }, |
| { |
| "epoch": 2.36, |
| "learning_rate": 0.0002, |
| "loss": 1.8748, |
| "step": 14550 |
| }, |
| { |
| "epoch": 2.36, |
| "learning_rate": 0.0002, |
| "loss": 1.9396, |
| "step": 14575 |
| }, |
| { |
| "epoch": 2.37, |
| "learning_rate": 0.0002, |
| "loss": 1.8473, |
| "step": 14600 |
| }, |
| { |
| "epoch": 2.37, |
| "learning_rate": 0.0002, |
| "loss": 2.0343, |
| "step": 14625 |
| }, |
| { |
| "epoch": 2.37, |
| "learning_rate": 0.0002, |
| "loss": 1.8666, |
| "step": 14650 |
| }, |
| { |
| "epoch": 2.38, |
| "learning_rate": 0.0002, |
| "loss": 2.147, |
| "step": 14675 |
| }, |
| { |
| "epoch": 2.38, |
| "learning_rate": 0.0002, |
| "loss": 1.8163, |
| "step": 14700 |
| }, |
| { |
| "epoch": 2.39, |
| "learning_rate": 0.0002, |
| "loss": 1.9378, |
| "step": 14725 |
| }, |
| { |
| "epoch": 2.39, |
| "learning_rate": 0.0002, |
| "loss": 1.9602, |
| "step": 14750 |
| }, |
| { |
| "epoch": 2.39, |
| "learning_rate": 0.0002, |
| "loss": 1.9138, |
| "step": 14775 |
| }, |
| { |
| "epoch": 2.4, |
| "learning_rate": 0.0002, |
| "loss": 1.9567, |
| "step": 14800 |
| }, |
| { |
| "epoch": 2.4, |
| "learning_rate": 0.0002, |
| "loss": 1.9788, |
| "step": 14825 |
| }, |
| { |
| "epoch": 2.41, |
| "learning_rate": 0.0002, |
| "loss": 1.8203, |
| "step": 14850 |
| }, |
| { |
| "epoch": 2.41, |
| "learning_rate": 0.0002, |
| "loss": 1.9832, |
| "step": 14875 |
| }, |
| { |
| "epoch": 2.41, |
| "learning_rate": 0.0002, |
| "loss": 1.8244, |
| "step": 14900 |
| }, |
| { |
| "epoch": 2.42, |
| "learning_rate": 0.0002, |
| "loss": 1.9275, |
| "step": 14925 |
| }, |
| { |
| "epoch": 2.42, |
| "learning_rate": 0.0002, |
| "loss": 1.8399, |
| "step": 14950 |
| }, |
| { |
| "epoch": 2.43, |
| "learning_rate": 0.0002, |
| "loss": 2.0371, |
| "step": 14975 |
| }, |
| { |
| "epoch": 2.43, |
| "learning_rate": 0.0002, |
| "loss": 1.8246, |
| "step": 15000 |
| }, |
| { |
| "epoch": 2.43, |
| "learning_rate": 0.0002, |
| "loss": 2.0811, |
| "step": 15025 |
| }, |
| { |
| "epoch": 2.44, |
| "learning_rate": 0.0002, |
| "loss": 1.8986, |
| "step": 15050 |
| }, |
| { |
| "epoch": 2.44, |
| "learning_rate": 0.0002, |
| "loss": 1.9898, |
| "step": 15075 |
| }, |
| { |
| "epoch": 2.45, |
| "learning_rate": 0.0002, |
| "loss": 1.888, |
| "step": 15100 |
| }, |
| { |
| "epoch": 2.45, |
| "learning_rate": 0.0002, |
| "loss": 2.0787, |
| "step": 15125 |
| }, |
| { |
| "epoch": 2.45, |
| "learning_rate": 0.0002, |
| "loss": 1.8367, |
| "step": 15150 |
| }, |
| { |
| "epoch": 2.46, |
| "learning_rate": 0.0002, |
| "loss": 1.8602, |
| "step": 15175 |
| }, |
| { |
| "epoch": 2.46, |
| "learning_rate": 0.0002, |
| "loss": 1.8423, |
| "step": 15200 |
| }, |
| { |
| "epoch": 2.47, |
| "learning_rate": 0.0002, |
| "loss": 1.9676, |
| "step": 15225 |
| }, |
| { |
| "epoch": 2.47, |
| "learning_rate": 0.0002, |
| "loss": 1.9784, |
| "step": 15250 |
| }, |
| { |
| "epoch": 2.47, |
| "learning_rate": 0.0002, |
| "loss": 1.9939, |
| "step": 15275 |
| }, |
| { |
| "epoch": 2.48, |
| "learning_rate": 0.0002, |
| "loss": 1.8257, |
| "step": 15300 |
| }, |
| { |
| "epoch": 2.48, |
| "learning_rate": 0.0002, |
| "loss": 1.9068, |
| "step": 15325 |
| }, |
| { |
| "epoch": 2.49, |
| "learning_rate": 0.0002, |
| "loss": 1.7919, |
| "step": 15350 |
| }, |
| { |
| "epoch": 2.49, |
| "learning_rate": 0.0002, |
| "loss": 1.9515, |
| "step": 15375 |
| }, |
| { |
| "epoch": 2.5, |
| "learning_rate": 0.0002, |
| "loss": 1.8452, |
| "step": 15400 |
| }, |
| { |
| "epoch": 2.5, |
| "learning_rate": 0.0002, |
| "loss": 1.9245, |
| "step": 15425 |
| }, |
| { |
| "epoch": 2.5, |
| "learning_rate": 0.0002, |
| "loss": 1.9447, |
| "step": 15450 |
| }, |
| { |
| "epoch": 2.51, |
| "learning_rate": 0.0002, |
| "loss": 1.9471, |
| "step": 15475 |
| }, |
| { |
| "epoch": 2.51, |
| "learning_rate": 0.0002, |
| "loss": 1.7937, |
| "step": 15500 |
| }, |
| { |
| "epoch": 2.52, |
| "learning_rate": 0.0002, |
| "loss": 2.03, |
| "step": 15525 |
| }, |
| { |
| "epoch": 2.52, |
| "learning_rate": 0.0002, |
| "loss": 1.92, |
| "step": 15550 |
| }, |
| { |
| "epoch": 2.52, |
| "learning_rate": 0.0002, |
| "loss": 2.0764, |
| "step": 15575 |
| }, |
| { |
| "epoch": 2.53, |
| "learning_rate": 0.0002, |
| "loss": 1.9053, |
| "step": 15600 |
| }, |
| { |
| "epoch": 2.53, |
| "learning_rate": 0.0002, |
| "loss": 1.9965, |
| "step": 15625 |
| }, |
| { |
| "epoch": 2.54, |
| "learning_rate": 0.0002, |
| "loss": 1.8693, |
| "step": 15650 |
| }, |
| { |
| "epoch": 2.54, |
| "learning_rate": 0.0002, |
| "loss": 1.9852, |
| "step": 15675 |
| }, |
| { |
| "epoch": 2.54, |
| "learning_rate": 0.0002, |
| "loss": 2.0139, |
| "step": 15700 |
| }, |
| { |
| "epoch": 2.55, |
| "learning_rate": 0.0002, |
| "loss": 2.058, |
| "step": 15725 |
| }, |
| { |
| "epoch": 2.55, |
| "learning_rate": 0.0002, |
| "loss": 1.8563, |
| "step": 15750 |
| }, |
| { |
| "epoch": 2.56, |
| "learning_rate": 0.0002, |
| "loss": 2.0112, |
| "step": 15775 |
| }, |
| { |
| "epoch": 2.56, |
| "learning_rate": 0.0002, |
| "loss": 1.7621, |
| "step": 15800 |
| }, |
| { |
| "epoch": 2.56, |
| "learning_rate": 0.0002, |
| "loss": 1.9343, |
| "step": 15825 |
| }, |
| { |
| "epoch": 2.57, |
| "learning_rate": 0.0002, |
| "loss": 1.8864, |
| "step": 15850 |
| }, |
| { |
| "epoch": 2.57, |
| "learning_rate": 0.0002, |
| "loss": 2.0502, |
| "step": 15875 |
| }, |
| { |
| "epoch": 2.58, |
| "learning_rate": 0.0002, |
| "loss": 1.7788, |
| "step": 15900 |
| }, |
| { |
| "epoch": 2.58, |
| "learning_rate": 0.0002, |
| "loss": 1.992, |
| "step": 15925 |
| }, |
| { |
| "epoch": 2.58, |
| "learning_rate": 0.0002, |
| "loss": 1.8523, |
| "step": 15950 |
| }, |
| { |
| "epoch": 2.59, |
| "learning_rate": 0.0002, |
| "loss": 2.0512, |
| "step": 15975 |
| }, |
| { |
| "epoch": 2.59, |
| "learning_rate": 0.0002, |
| "loss": 1.8112, |
| "step": 16000 |
| }, |
| { |
| "epoch": 2.6, |
| "learning_rate": 0.0002, |
| "loss": 2.02, |
| "step": 16025 |
| }, |
| { |
| "epoch": 2.6, |
| "learning_rate": 0.0002, |
| "loss": 1.971, |
| "step": 16050 |
| }, |
| { |
| "epoch": 2.6, |
| "learning_rate": 0.0002, |
| "loss": 1.9678, |
| "step": 16075 |
| }, |
| { |
| "epoch": 2.61, |
| "learning_rate": 0.0002, |
| "loss": 1.8012, |
| "step": 16100 |
| }, |
| { |
| "epoch": 2.61, |
| "learning_rate": 0.0002, |
| "loss": 2.0761, |
| "step": 16125 |
| }, |
| { |
| "epoch": 2.62, |
| "learning_rate": 0.0002, |
| "loss": 1.8307, |
| "step": 16150 |
| }, |
| { |
| "epoch": 2.62, |
| "learning_rate": 0.0002, |
| "loss": 2.0867, |
| "step": 16175 |
| }, |
| { |
| "epoch": 2.62, |
| "learning_rate": 0.0002, |
| "loss": 1.7947, |
| "step": 16200 |
| }, |
| { |
| "epoch": 2.63, |
| "learning_rate": 0.0002, |
| "loss": 2.011, |
| "step": 16225 |
| }, |
| { |
| "epoch": 2.63, |
| "learning_rate": 0.0002, |
| "loss": 1.8579, |
| "step": 16250 |
| }, |
| { |
| "epoch": 2.64, |
| "learning_rate": 0.0002, |
| "loss": 2.0436, |
| "step": 16275 |
| }, |
| { |
| "epoch": 2.64, |
| "learning_rate": 0.0002, |
| "loss": 1.8294, |
| "step": 16300 |
| }, |
| { |
| "epoch": 2.65, |
| "learning_rate": 0.0002, |
| "loss": 2.0266, |
| "step": 16325 |
| }, |
| { |
| "epoch": 2.65, |
| "learning_rate": 0.0002, |
| "loss": 1.7949, |
| "step": 16350 |
| }, |
| { |
| "epoch": 2.65, |
| "learning_rate": 0.0002, |
| "loss": 2.0295, |
| "step": 16375 |
| }, |
| { |
| "epoch": 2.66, |
| "learning_rate": 0.0002, |
| "loss": 1.7991, |
| "step": 16400 |
| }, |
| { |
| "epoch": 2.66, |
| "learning_rate": 0.0002, |
| "loss": 2.0991, |
| "step": 16425 |
| }, |
| { |
| "epoch": 2.67, |
| "learning_rate": 0.0002, |
| "loss": 1.868, |
| "step": 16450 |
| }, |
| { |
| "epoch": 2.67, |
| "learning_rate": 0.0002, |
| "loss": 1.9726, |
| "step": 16475 |
| }, |
| { |
| "epoch": 2.67, |
| "learning_rate": 0.0002, |
| "loss": 1.8131, |
| "step": 16500 |
| }, |
| { |
| "epoch": 2.68, |
| "learning_rate": 0.0002, |
| "loss": 2.0037, |
| "step": 16525 |
| }, |
| { |
| "epoch": 2.68, |
| "learning_rate": 0.0002, |
| "loss": 1.8865, |
| "step": 16550 |
| }, |
| { |
| "epoch": 2.69, |
| "learning_rate": 0.0002, |
| "loss": 1.9698, |
| "step": 16575 |
| }, |
| { |
| "epoch": 2.69, |
| "learning_rate": 0.0002, |
| "loss": 2.0733, |
| "step": 16600 |
| }, |
| { |
| "epoch": 2.69, |
| "learning_rate": 0.0002, |
| "loss": 1.9668, |
| "step": 16625 |
| }, |
| { |
| "epoch": 2.7, |
| "learning_rate": 0.0002, |
| "loss": 1.9262, |
| "step": 16650 |
| }, |
| { |
| "epoch": 2.7, |
| "learning_rate": 0.0002, |
| "loss": 1.9017, |
| "step": 16675 |
| }, |
| { |
| "epoch": 2.71, |
| "learning_rate": 0.0002, |
| "loss": 1.9897, |
| "step": 16700 |
| }, |
| { |
| "epoch": 2.71, |
| "learning_rate": 0.0002, |
| "loss": 2.0554, |
| "step": 16725 |
| }, |
| { |
| "epoch": 2.71, |
| "learning_rate": 0.0002, |
| "loss": 1.7832, |
| "step": 16750 |
| }, |
| { |
| "epoch": 2.72, |
| "learning_rate": 0.0002, |
| "loss": 1.9988, |
| "step": 16775 |
| }, |
| { |
| "epoch": 2.72, |
| "learning_rate": 0.0002, |
| "loss": 1.9868, |
| "step": 16800 |
| }, |
| { |
| "epoch": 2.73, |
| "learning_rate": 0.0002, |
| "loss": 1.9771, |
| "step": 16825 |
| }, |
| { |
| "epoch": 2.73, |
| "learning_rate": 0.0002, |
| "loss": 1.7744, |
| "step": 16850 |
| }, |
| { |
| "epoch": 2.73, |
| "learning_rate": 0.0002, |
| "loss": 2.0474, |
| "step": 16875 |
| }, |
| { |
| "epoch": 2.74, |
| "learning_rate": 0.0002, |
| "loss": 1.8683, |
| "step": 16900 |
| }, |
| { |
| "epoch": 2.74, |
| "learning_rate": 0.0002, |
| "loss": 1.9437, |
| "step": 16925 |
| }, |
| { |
| "epoch": 2.75, |
| "learning_rate": 0.0002, |
| "loss": 1.7555, |
| "step": 16950 |
| }, |
| { |
| "epoch": 2.75, |
| "learning_rate": 0.0002, |
| "loss": 1.9213, |
| "step": 16975 |
| }, |
| { |
| "epoch": 2.75, |
| "learning_rate": 0.0002, |
| "loss": 1.9488, |
| "step": 17000 |
| }, |
| { |
| "epoch": 2.76, |
| "learning_rate": 0.0002, |
| "loss": 2.0102, |
| "step": 17025 |
| }, |
| { |
| "epoch": 2.76, |
| "learning_rate": 0.0002, |
| "loss": 1.8886, |
| "step": 17050 |
| }, |
| { |
| "epoch": 2.77, |
| "learning_rate": 0.0002, |
| "loss": 2.0411, |
| "step": 17075 |
| }, |
| { |
| "epoch": 2.77, |
| "learning_rate": 0.0002, |
| "loss": 1.848, |
| "step": 17100 |
| }, |
| { |
| "epoch": 2.77, |
| "learning_rate": 0.0002, |
| "loss": 1.998, |
| "step": 17125 |
| }, |
| { |
| "epoch": 2.78, |
| "learning_rate": 0.0002, |
| "loss": 1.8947, |
| "step": 17150 |
| }, |
| { |
| "epoch": 2.78, |
| "learning_rate": 0.0002, |
| "loss": 1.9295, |
| "step": 17175 |
| }, |
| { |
| "epoch": 2.79, |
| "learning_rate": 0.0002, |
| "loss": 1.9264, |
| "step": 17200 |
| }, |
| { |
| "epoch": 2.79, |
| "learning_rate": 0.0002, |
| "loss": 1.9114, |
| "step": 17225 |
| }, |
| { |
| "epoch": 2.79, |
| "learning_rate": 0.0002, |
| "loss": 1.8539, |
| "step": 17250 |
| }, |
| { |
| "epoch": 2.8, |
| "learning_rate": 0.0002, |
| "loss": 1.9194, |
| "step": 17275 |
| }, |
| { |
| "epoch": 2.8, |
| "learning_rate": 0.0002, |
| "loss": 1.8498, |
| "step": 17300 |
| }, |
| { |
| "epoch": 2.81, |
| "learning_rate": 0.0002, |
| "loss": 1.9933, |
| "step": 17325 |
| }, |
| { |
| "epoch": 2.81, |
| "learning_rate": 0.0002, |
| "loss": 1.8788, |
| "step": 17350 |
| }, |
| { |
| "epoch": 2.82, |
| "learning_rate": 0.0002, |
| "loss": 2.0578, |
| "step": 17375 |
| }, |
| { |
| "epoch": 2.82, |
| "learning_rate": 0.0002, |
| "loss": 1.8991, |
| "step": 17400 |
| }, |
| { |
| "epoch": 2.82, |
| "learning_rate": 0.0002, |
| "loss": 1.9389, |
| "step": 17425 |
| }, |
| { |
| "epoch": 2.83, |
| "learning_rate": 0.0002, |
| "loss": 1.8064, |
| "step": 17450 |
| }, |
| { |
| "epoch": 2.83, |
| "learning_rate": 0.0002, |
| "loss": 2.0353, |
| "step": 17475 |
| }, |
| { |
| "epoch": 2.84, |
| "learning_rate": 0.0002, |
| "loss": 2.0251, |
| "step": 17500 |
| }, |
| { |
| "epoch": 2.84, |
| "learning_rate": 0.0002, |
| "loss": 2.0272, |
| "step": 17525 |
| }, |
| { |
| "epoch": 2.84, |
| "learning_rate": 0.0002, |
| "loss": 1.8236, |
| "step": 17550 |
| }, |
| { |
| "epoch": 2.85, |
| "learning_rate": 0.0002, |
| "loss": 1.9476, |
| "step": 17575 |
| }, |
| { |
| "epoch": 2.85, |
| "learning_rate": 0.0002, |
| "loss": 1.8953, |
| "step": 17600 |
| }, |
| { |
| "epoch": 2.86, |
| "learning_rate": 0.0002, |
| "loss": 2.0235, |
| "step": 17625 |
| }, |
| { |
| "epoch": 2.86, |
| "learning_rate": 0.0002, |
| "loss": 1.8151, |
| "step": 17650 |
| }, |
| { |
| "epoch": 2.86, |
| "learning_rate": 0.0002, |
| "loss": 1.9714, |
| "step": 17675 |
| }, |
| { |
| "epoch": 2.87, |
| "learning_rate": 0.0002, |
| "loss": 1.8929, |
| "step": 17700 |
| }, |
| { |
| "epoch": 2.87, |
| "learning_rate": 0.0002, |
| "loss": 2.091, |
| "step": 17725 |
| }, |
| { |
| "epoch": 2.88, |
| "learning_rate": 0.0002, |
| "loss": 1.914, |
| "step": 17750 |
| }, |
| { |
| "epoch": 2.88, |
| "learning_rate": 0.0002, |
| "loss": 2.0598, |
| "step": 17775 |
| }, |
| { |
| "epoch": 2.88, |
| "learning_rate": 0.0002, |
| "loss": 1.893, |
| "step": 17800 |
| }, |
| { |
| "epoch": 2.89, |
| "learning_rate": 0.0002, |
| "loss": 2.0322, |
| "step": 17825 |
| }, |
| { |
| "epoch": 2.89, |
| "learning_rate": 0.0002, |
| "loss": 1.8023, |
| "step": 17850 |
| }, |
| { |
| "epoch": 2.9, |
| "learning_rate": 0.0002, |
| "loss": 2.0638, |
| "step": 17875 |
| }, |
| { |
| "epoch": 2.9, |
| "learning_rate": 0.0002, |
| "loss": 1.8813, |
| "step": 17900 |
| }, |
| { |
| "epoch": 2.9, |
| "learning_rate": 0.0002, |
| "loss": 1.9056, |
| "step": 17925 |
| }, |
| { |
| "epoch": 2.91, |
| "learning_rate": 0.0002, |
| "loss": 1.7811, |
| "step": 17950 |
| }, |
| { |
| "epoch": 2.91, |
| "learning_rate": 0.0002, |
| "loss": 1.9817, |
| "step": 17975 |
| }, |
| { |
| "epoch": 2.92, |
| "learning_rate": 0.0002, |
| "loss": 1.7945, |
| "step": 18000 |
| } |
| ], |
| "logging_steps": 25, |
| "max_steps": 18516, |
| "num_train_epochs": 3, |
| "save_steps": 2000, |
| "total_flos": 3.638276029315154e+17, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|