| { |
| "best_metric": 0.7817407250404358, |
| "best_model_checkpoint": "./model_fine-tune/glot/mbert/sin-Sinh/checkpoint-88000", |
| "epoch": 57.40378343118069, |
| "eval_steps": 500, |
| "global_step": 88000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.32615786040443573, |
| "grad_norm": 11.542426109313965, |
| "learning_rate": 9.95e-05, |
| "loss": 1.3665, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.32615786040443573, |
| "eval_accuracy": 0.7435959972211136, |
| "eval_loss": 1.184606671333313, |
| "eval_runtime": 96.3373, |
| "eval_samples_per_second": 127.282, |
| "eval_steps_per_second": 3.986, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.6523157208088715, |
| "grad_norm": 4.45168399810791, |
| "learning_rate": 9.900000000000001e-05, |
| "loss": 1.2146, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.6523157208088715, |
| "eval_accuracy": 0.7612149949461918, |
| "eval_loss": 1.1183263063430786, |
| "eval_runtime": 95.7412, |
| "eval_samples_per_second": 128.074, |
| "eval_steps_per_second": 4.011, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.9784735812133072, |
| "grad_norm": 4.524835109710693, |
| "learning_rate": 9.850000000000001e-05, |
| "loss": 1.1681, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.9784735812133072, |
| "eval_accuracy": 0.768949081178049, |
| "eval_loss": 1.0730011463165283, |
| "eval_runtime": 95.6577, |
| "eval_samples_per_second": 128.186, |
| "eval_steps_per_second": 4.014, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.304631441617743, |
| "grad_norm": 6.722175598144531, |
| "learning_rate": 9.8e-05, |
| "loss": 1.1172, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.304631441617743, |
| "eval_accuracy": 0.7709548983117451, |
| "eval_loss": 1.0751733779907227, |
| "eval_runtime": 96.0121, |
| "eval_samples_per_second": 127.713, |
| "eval_steps_per_second": 3.999, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.6307893020221789, |
| "grad_norm": 4.5525407791137695, |
| "learning_rate": 9.75e-05, |
| "loss": 1.0856, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.6307893020221789, |
| "eval_accuracy": 0.7754712373171111, |
| "eval_loss": 1.0482443571090698, |
| "eval_runtime": 96.0314, |
| "eval_samples_per_second": 127.687, |
| "eval_steps_per_second": 3.999, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.9569471624266144, |
| "grad_norm": 3.725614309310913, |
| "learning_rate": 9.7e-05, |
| "loss": 1.0682, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.9569471624266144, |
| "eval_accuracy": 0.7820907707722566, |
| "eval_loss": 1.017106056213379, |
| "eval_runtime": 96.2201, |
| "eval_samples_per_second": 127.437, |
| "eval_steps_per_second": 3.991, |
| "step": 3000 |
| }, |
| { |
| "epoch": 2.2831050228310503, |
| "grad_norm": 4.34595251083374, |
| "learning_rate": 9.65e-05, |
| "loss": 1.046, |
| "step": 3500 |
| }, |
| { |
| "epoch": 2.2831050228310503, |
| "eval_accuracy": 0.7852632360034133, |
| "eval_loss": 1.006613850593567, |
| "eval_runtime": 95.8046, |
| "eval_samples_per_second": 127.99, |
| "eval_steps_per_second": 4.008, |
| "step": 3500 |
| }, |
| { |
| "epoch": 2.609262883235486, |
| "grad_norm": 3.8304195404052734, |
| "learning_rate": 9.6e-05, |
| "loss": 1.0252, |
| "step": 4000 |
| }, |
| { |
| "epoch": 2.609262883235486, |
| "eval_accuracy": 0.7844518509171077, |
| "eval_loss": 0.9925754070281982, |
| "eval_runtime": 96.0595, |
| "eval_samples_per_second": 127.65, |
| "eval_steps_per_second": 3.998, |
| "step": 4000 |
| }, |
| { |
| "epoch": 2.935420743639922, |
| "grad_norm": 4.004533290863037, |
| "learning_rate": 9.55e-05, |
| "loss": 1.0202, |
| "step": 4500 |
| }, |
| { |
| "epoch": 2.935420743639922, |
| "eval_accuracy": 0.7862023720892519, |
| "eval_loss": 0.9879273772239685, |
| "eval_runtime": 96.1151, |
| "eval_samples_per_second": 127.576, |
| "eval_steps_per_second": 3.995, |
| "step": 4500 |
| }, |
| { |
| "epoch": 3.2615786040443573, |
| "grad_norm": 5.68316650390625, |
| "learning_rate": 9.5e-05, |
| "loss": 0.9935, |
| "step": 5000 |
| }, |
| { |
| "epoch": 3.2615786040443573, |
| "eval_accuracy": 0.7892981535025771, |
| "eval_loss": 0.9794703722000122, |
| "eval_runtime": 95.6967, |
| "eval_samples_per_second": 128.134, |
| "eval_steps_per_second": 4.013, |
| "step": 5000 |
| }, |
| { |
| "epoch": 3.5877364644487932, |
| "grad_norm": 3.8201630115509033, |
| "learning_rate": 9.449999999999999e-05, |
| "loss": 0.9965, |
| "step": 5500 |
| }, |
| { |
| "epoch": 3.5877364644487932, |
| "eval_accuracy": 0.7902053479095879, |
| "eval_loss": 0.9639435410499573, |
| "eval_runtime": 96.059, |
| "eval_samples_per_second": 127.651, |
| "eval_steps_per_second": 3.998, |
| "step": 5500 |
| }, |
| { |
| "epoch": 3.9138943248532287, |
| "grad_norm": 4.568619728088379, |
| "learning_rate": 9.4e-05, |
| "loss": 0.9773, |
| "step": 6000 |
| }, |
| { |
| "epoch": 3.9138943248532287, |
| "eval_accuracy": 0.7901874809885227, |
| "eval_loss": 0.9642708897590637, |
| "eval_runtime": 96.0775, |
| "eval_samples_per_second": 127.626, |
| "eval_steps_per_second": 3.997, |
| "step": 6000 |
| }, |
| { |
| "epoch": 4.240052185257665, |
| "grad_norm": 2.4079413414001465, |
| "learning_rate": 9.350000000000001e-05, |
| "loss": 0.9573, |
| "step": 6500 |
| }, |
| { |
| "epoch": 4.240052185257665, |
| "eval_accuracy": 0.7912424302921408, |
| "eval_loss": 0.9602788686752319, |
| "eval_runtime": 95.6177, |
| "eval_samples_per_second": 128.24, |
| "eval_steps_per_second": 4.016, |
| "step": 6500 |
| }, |
| { |
| "epoch": 4.566210045662101, |
| "grad_norm": 3.638129711151123, |
| "learning_rate": 9.300000000000001e-05, |
| "loss": 0.9655, |
| "step": 7000 |
| }, |
| { |
| "epoch": 4.566210045662101, |
| "eval_accuracy": 0.7932507570316953, |
| "eval_loss": 0.9564482569694519, |
| "eval_runtime": 96.2782, |
| "eval_samples_per_second": 127.36, |
| "eval_steps_per_second": 3.988, |
| "step": 7000 |
| }, |
| { |
| "epoch": 4.892367906066536, |
| "grad_norm": 6.558393955230713, |
| "learning_rate": 9.250000000000001e-05, |
| "loss": 0.9555, |
| "step": 7500 |
| }, |
| { |
| "epoch": 4.892367906066536, |
| "eval_accuracy": 0.7944629506255089, |
| "eval_loss": 0.9509521722793579, |
| "eval_runtime": 96.1362, |
| "eval_samples_per_second": 127.548, |
| "eval_steps_per_second": 3.994, |
| "step": 7500 |
| }, |
| { |
| "epoch": 5.218525766470972, |
| "grad_norm": 3.637993335723877, |
| "learning_rate": 9.200000000000001e-05, |
| "loss": 0.9378, |
| "step": 8000 |
| }, |
| { |
| "epoch": 5.218525766470972, |
| "eval_accuracy": 0.7949035700239684, |
| "eval_loss": 0.9451656937599182, |
| "eval_runtime": 95.4917, |
| "eval_samples_per_second": 128.409, |
| "eval_steps_per_second": 4.021, |
| "step": 8000 |
| }, |
| { |
| "epoch": 5.544683626875408, |
| "grad_norm": 3.2590765953063965, |
| "learning_rate": 9.15e-05, |
| "loss": 0.942, |
| "step": 8500 |
| }, |
| { |
| "epoch": 5.544683626875408, |
| "eval_accuracy": 0.7976977603049797, |
| "eval_loss": 0.9348493814468384, |
| "eval_runtime": 95.6296, |
| "eval_samples_per_second": 128.224, |
| "eval_steps_per_second": 4.015, |
| "step": 8500 |
| }, |
| { |
| "epoch": 5.870841487279844, |
| "grad_norm": 6.074111461639404, |
| "learning_rate": 9.1e-05, |
| "loss": 0.9189, |
| "step": 9000 |
| }, |
| { |
| "epoch": 5.870841487279844, |
| "eval_accuracy": 0.7951165602308211, |
| "eval_loss": 0.9503134489059448, |
| "eval_runtime": 95.929, |
| "eval_samples_per_second": 127.824, |
| "eval_steps_per_second": 4.003, |
| "step": 9000 |
| }, |
| { |
| "epoch": 6.1969993476842795, |
| "grad_norm": 4.091071605682373, |
| "learning_rate": 9.05e-05, |
| "loss": 0.9096, |
| "step": 9500 |
| }, |
| { |
| "epoch": 6.1969993476842795, |
| "eval_accuracy": 0.7966632678695453, |
| "eval_loss": 0.9422316551208496, |
| "eval_runtime": 95.9886, |
| "eval_samples_per_second": 127.744, |
| "eval_steps_per_second": 4.0, |
| "step": 9500 |
| }, |
| { |
| "epoch": 6.523157208088715, |
| "grad_norm": 4.287664890289307, |
| "learning_rate": 9e-05, |
| "loss": 0.9207, |
| "step": 10000 |
| }, |
| { |
| "epoch": 6.523157208088715, |
| "eval_accuracy": 0.7982671256149266, |
| "eval_loss": 0.9268999695777893, |
| "eval_runtime": 95.9908, |
| "eval_samples_per_second": 127.741, |
| "eval_steps_per_second": 4.0, |
| "step": 10000 |
| }, |
| { |
| "epoch": 6.8493150684931505, |
| "grad_norm": 4.4528703689575195, |
| "learning_rate": 8.950000000000001e-05, |
| "loss": 0.9067, |
| "step": 10500 |
| }, |
| { |
| "epoch": 6.8493150684931505, |
| "eval_accuracy": 0.8010467333934564, |
| "eval_loss": 0.9209058284759521, |
| "eval_runtime": 95.8309, |
| "eval_samples_per_second": 127.955, |
| "eval_steps_per_second": 4.007, |
| "step": 10500 |
| }, |
| { |
| "epoch": 7.1754729288975865, |
| "grad_norm": 4.453877925872803, |
| "learning_rate": 8.900000000000001e-05, |
| "loss": 0.8987, |
| "step": 11000 |
| }, |
| { |
| "epoch": 7.1754729288975865, |
| "eval_accuracy": 0.8013618864649552, |
| "eval_loss": 0.9242467284202576, |
| "eval_runtime": 95.5629, |
| "eval_samples_per_second": 128.313, |
| "eval_steps_per_second": 4.018, |
| "step": 11000 |
| }, |
| { |
| "epoch": 7.501630789302022, |
| "grad_norm": 4.282083988189697, |
| "learning_rate": 8.850000000000001e-05, |
| "loss": 0.8906, |
| "step": 11500 |
| }, |
| { |
| "epoch": 7.501630789302022, |
| "eval_accuracy": 0.8037603951451062, |
| "eval_loss": 0.9170116186141968, |
| "eval_runtime": 96.0767, |
| "eval_samples_per_second": 127.627, |
| "eval_steps_per_second": 3.997, |
| "step": 11500 |
| }, |
| { |
| "epoch": 7.8277886497064575, |
| "grad_norm": 4.653249740600586, |
| "learning_rate": 8.800000000000001e-05, |
| "loss": 0.8976, |
| "step": 12000 |
| }, |
| { |
| "epoch": 7.8277886497064575, |
| "eval_accuracy": 0.8042002012072434, |
| "eval_loss": 0.9147453308105469, |
| "eval_runtime": 95.4281, |
| "eval_samples_per_second": 128.495, |
| "eval_steps_per_second": 4.024, |
| "step": 12000 |
| }, |
| { |
| "epoch": 8.153946510110893, |
| "grad_norm": 3.689419984817505, |
| "learning_rate": 8.75e-05, |
| "loss": 0.8822, |
| "step": 12500 |
| }, |
| { |
| "epoch": 8.153946510110893, |
| "eval_accuracy": 0.8048982122558644, |
| "eval_loss": 0.8996030688285828, |
| "eval_runtime": 96.1046, |
| "eval_samples_per_second": 127.59, |
| "eval_steps_per_second": 3.996, |
| "step": 12500 |
| }, |
| { |
| "epoch": 8.48010437051533, |
| "grad_norm": 3.341475009918213, |
| "learning_rate": 8.7e-05, |
| "loss": 0.8643, |
| "step": 13000 |
| }, |
| { |
| "epoch": 8.48010437051533, |
| "eval_accuracy": 0.8045987248855143, |
| "eval_loss": 0.9084787368774414, |
| "eval_runtime": 95.8893, |
| "eval_samples_per_second": 127.877, |
| "eval_steps_per_second": 4.005, |
| "step": 13000 |
| }, |
| { |
| "epoch": 8.806262230919765, |
| "grad_norm": 3.8555312156677246, |
| "learning_rate": 8.65e-05, |
| "loss": 0.8652, |
| "step": 13500 |
| }, |
| { |
| "epoch": 8.806262230919765, |
| "eval_accuracy": 0.8064863340113129, |
| "eval_loss": 0.891981840133667, |
| "eval_runtime": 96.0373, |
| "eval_samples_per_second": 127.68, |
| "eval_steps_per_second": 3.998, |
| "step": 13500 |
| }, |
| { |
| "epoch": 9.132420091324201, |
| "grad_norm": 3.0453758239746094, |
| "learning_rate": 8.6e-05, |
| "loss": 0.8624, |
| "step": 14000 |
| }, |
| { |
| "epoch": 9.132420091324201, |
| "eval_accuracy": 0.807120171356821, |
| "eval_loss": 0.8966971039772034, |
| "eval_runtime": 96.0645, |
| "eval_samples_per_second": 127.643, |
| "eval_steps_per_second": 3.997, |
| "step": 14000 |
| }, |
| { |
| "epoch": 9.458577951728637, |
| "grad_norm": 2.872584342956543, |
| "learning_rate": 8.55e-05, |
| "loss": 0.849, |
| "step": 14500 |
| }, |
| { |
| "epoch": 9.458577951728637, |
| "eval_accuracy": 0.805082546907549, |
| "eval_loss": 0.8947042226791382, |
| "eval_runtime": 95.5177, |
| "eval_samples_per_second": 128.374, |
| "eval_steps_per_second": 4.02, |
| "step": 14500 |
| }, |
| { |
| "epoch": 9.784735812133073, |
| "grad_norm": 3.990968942642212, |
| "learning_rate": 8.5e-05, |
| "loss": 0.8464, |
| "step": 15000 |
| }, |
| { |
| "epoch": 9.784735812133073, |
| "eval_accuracy": 0.8059118422518474, |
| "eval_loss": 0.8978227376937866, |
| "eval_runtime": 96.134, |
| "eval_samples_per_second": 127.551, |
| "eval_steps_per_second": 3.994, |
| "step": 15000 |
| }, |
| { |
| "epoch": 10.110893672537507, |
| "grad_norm": 4.263514995574951, |
| "learning_rate": 8.450000000000001e-05, |
| "loss": 0.8502, |
| "step": 15500 |
| }, |
| { |
| "epoch": 10.110893672537507, |
| "eval_accuracy": 0.8088704201234094, |
| "eval_loss": 0.8844937086105347, |
| "eval_runtime": 96.0116, |
| "eval_samples_per_second": 127.714, |
| "eval_steps_per_second": 4.0, |
| "step": 15500 |
| }, |
| { |
| "epoch": 10.437051532941943, |
| "grad_norm": 4.85188102722168, |
| "learning_rate": 8.4e-05, |
| "loss": 0.8368, |
| "step": 16000 |
| }, |
| { |
| "epoch": 10.437051532941943, |
| "eval_accuracy": 0.8090195118695352, |
| "eval_loss": 0.8942297101020813, |
| "eval_runtime": 95.8669, |
| "eval_samples_per_second": 127.907, |
| "eval_steps_per_second": 4.006, |
| "step": 16000 |
| }, |
| { |
| "epoch": 10.76320939334638, |
| "grad_norm": 4.126960754394531, |
| "learning_rate": 8.35e-05, |
| "loss": 0.8443, |
| "step": 16500 |
| }, |
| { |
| "epoch": 10.76320939334638, |
| "eval_accuracy": 0.80988295142817, |
| "eval_loss": 0.8783074021339417, |
| "eval_runtime": 95.4865, |
| "eval_samples_per_second": 128.416, |
| "eval_steps_per_second": 4.022, |
| "step": 16500 |
| }, |
| { |
| "epoch": 11.089367253750815, |
| "grad_norm": 3.0771892070770264, |
| "learning_rate": 8.3e-05, |
| "loss": 0.8407, |
| "step": 17000 |
| }, |
| { |
| "epoch": 11.089367253750815, |
| "eval_accuracy": 0.810145120489948, |
| "eval_loss": 0.8840489983558655, |
| "eval_runtime": 95.9033, |
| "eval_samples_per_second": 127.858, |
| "eval_steps_per_second": 4.004, |
| "step": 17000 |
| }, |
| { |
| "epoch": 11.415525114155251, |
| "grad_norm": 3.296961784362793, |
| "learning_rate": 8.25e-05, |
| "loss": 0.822, |
| "step": 17500 |
| }, |
| { |
| "epoch": 11.415525114155251, |
| "eval_accuracy": 0.8105256945571506, |
| "eval_loss": 0.8853065371513367, |
| "eval_runtime": 95.4717, |
| "eval_samples_per_second": 128.436, |
| "eval_steps_per_second": 4.022, |
| "step": 17500 |
| }, |
| { |
| "epoch": 11.741682974559687, |
| "grad_norm": 2.933551788330078, |
| "learning_rate": 8.2e-05, |
| "loss": 0.8373, |
| "step": 18000 |
| }, |
| { |
| "epoch": 11.741682974559687, |
| "eval_accuracy": 0.8086732617590695, |
| "eval_loss": 0.875987708568573, |
| "eval_runtime": 95.5854, |
| "eval_samples_per_second": 128.283, |
| "eval_steps_per_second": 4.017, |
| "step": 18000 |
| }, |
| { |
| "epoch": 12.067840834964123, |
| "grad_norm": 2.9668538570404053, |
| "learning_rate": 8.15e-05, |
| "loss": 0.82, |
| "step": 18500 |
| }, |
| { |
| "epoch": 12.067840834964123, |
| "eval_accuracy": 0.8100703443169197, |
| "eval_loss": 0.8864119648933411, |
| "eval_runtime": 95.6174, |
| "eval_samples_per_second": 128.24, |
| "eval_steps_per_second": 4.016, |
| "step": 18500 |
| }, |
| { |
| "epoch": 12.393998695368559, |
| "grad_norm": 3.0517213344573975, |
| "learning_rate": 8.1e-05, |
| "loss": 0.8075, |
| "step": 19000 |
| }, |
| { |
| "epoch": 12.393998695368559, |
| "eval_accuracy": 0.8106054996964357, |
| "eval_loss": 0.8773519396781921, |
| "eval_runtime": 95.5913, |
| "eval_samples_per_second": 128.275, |
| "eval_steps_per_second": 4.017, |
| "step": 19000 |
| }, |
| { |
| "epoch": 12.720156555772995, |
| "grad_norm": 2.811016321182251, |
| "learning_rate": 8.05e-05, |
| "loss": 0.8034, |
| "step": 19500 |
| }, |
| { |
| "epoch": 12.720156555772995, |
| "eval_accuracy": 0.812250382040057, |
| "eval_loss": 0.8746283650398254, |
| "eval_runtime": 96.0236, |
| "eval_samples_per_second": 127.698, |
| "eval_steps_per_second": 3.999, |
| "step": 19500 |
| }, |
| { |
| "epoch": 13.04631441617743, |
| "grad_norm": 3.343775987625122, |
| "learning_rate": 8e-05, |
| "loss": 0.812, |
| "step": 20000 |
| }, |
| { |
| "epoch": 13.04631441617743, |
| "eval_accuracy": 0.8116936347456187, |
| "eval_loss": 0.8740746974945068, |
| "eval_runtime": 96.1535, |
| "eval_samples_per_second": 127.525, |
| "eval_steps_per_second": 3.994, |
| "step": 20000 |
| }, |
| { |
| "epoch": 13.372472276581865, |
| "grad_norm": 3.361279010772705, |
| "learning_rate": 7.950000000000001e-05, |
| "loss": 0.7896, |
| "step": 20500 |
| }, |
| { |
| "epoch": 13.372472276581865, |
| "eval_accuracy": 0.8125255285808708, |
| "eval_loss": 0.8752043843269348, |
| "eval_runtime": 95.5714, |
| "eval_samples_per_second": 128.302, |
| "eval_steps_per_second": 4.018, |
| "step": 20500 |
| }, |
| { |
| "epoch": 13.698630136986301, |
| "grad_norm": 4.277713298797607, |
| "learning_rate": 7.900000000000001e-05, |
| "loss": 0.808, |
| "step": 21000 |
| }, |
| { |
| "epoch": 13.698630136986301, |
| "eval_accuracy": 0.8146125537606598, |
| "eval_loss": 0.8755714893341064, |
| "eval_runtime": 95.9658, |
| "eval_samples_per_second": 127.775, |
| "eval_steps_per_second": 4.001, |
| "step": 21000 |
| }, |
| { |
| "epoch": 14.024787997390737, |
| "grad_norm": 4.07389497756958, |
| "learning_rate": 7.850000000000001e-05, |
| "loss": 0.8066, |
| "step": 21500 |
| }, |
| { |
| "epoch": 14.024787997390737, |
| "eval_accuracy": 0.810667004970075, |
| "eval_loss": 0.8822521567344666, |
| "eval_runtime": 96.0854, |
| "eval_samples_per_second": 127.616, |
| "eval_steps_per_second": 3.996, |
| "step": 21500 |
| }, |
| { |
| "epoch": 14.350945857795173, |
| "grad_norm": 3.8551950454711914, |
| "learning_rate": 7.800000000000001e-05, |
| "loss": 0.7873, |
| "step": 22000 |
| }, |
| { |
| "epoch": 14.350945857795173, |
| "eval_accuracy": 0.8103896295640106, |
| "eval_loss": 0.8803927302360535, |
| "eval_runtime": 95.4874, |
| "eval_samples_per_second": 128.415, |
| "eval_steps_per_second": 4.021, |
| "step": 22000 |
| }, |
| { |
| "epoch": 14.677103718199609, |
| "grad_norm": 3.6163582801818848, |
| "learning_rate": 7.75e-05, |
| "loss": 0.7853, |
| "step": 22500 |
| }, |
| { |
| "epoch": 14.677103718199609, |
| "eval_accuracy": 0.8146700066154773, |
| "eval_loss": 0.862837016582489, |
| "eval_runtime": 95.5698, |
| "eval_samples_per_second": 128.304, |
| "eval_steps_per_second": 4.018, |
| "step": 22500 |
| }, |
| { |
| "epoch": 15.003261578604045, |
| "grad_norm": 2.713881492614746, |
| "learning_rate": 7.7e-05, |
| "loss": 0.7852, |
| "step": 23000 |
| }, |
| { |
| "epoch": 15.003261578604045, |
| "eval_accuracy": 0.8136574074074074, |
| "eval_loss": 0.8718428611755371, |
| "eval_runtime": 95.7231, |
| "eval_samples_per_second": 128.099, |
| "eval_steps_per_second": 4.012, |
| "step": 23000 |
| }, |
| { |
| "epoch": 15.32941943900848, |
| "grad_norm": 3.4165608882904053, |
| "learning_rate": 7.65e-05, |
| "loss": 0.7733, |
| "step": 23500 |
| }, |
| { |
| "epoch": 15.32941943900848, |
| "eval_accuracy": 0.813523767016291, |
| "eval_loss": 0.8665691614151001, |
| "eval_runtime": 96.1635, |
| "eval_samples_per_second": 127.512, |
| "eval_steps_per_second": 3.993, |
| "step": 23500 |
| }, |
| { |
| "epoch": 15.655577299412915, |
| "grad_norm": 4.0435028076171875, |
| "learning_rate": 7.6e-05, |
| "loss": 0.7837, |
| "step": 24000 |
| }, |
| { |
| "epoch": 15.655577299412915, |
| "eval_accuracy": 0.8137777942210218, |
| "eval_loss": 0.8687108159065247, |
| "eval_runtime": 95.6602, |
| "eval_samples_per_second": 128.183, |
| "eval_steps_per_second": 4.014, |
| "step": 24000 |
| }, |
| { |
| "epoch": 15.981735159817351, |
| "grad_norm": 4.136204719543457, |
| "learning_rate": 7.55e-05, |
| "loss": 0.7826, |
| "step": 24500 |
| }, |
| { |
| "epoch": 15.981735159817351, |
| "eval_accuracy": 0.8160837708759752, |
| "eval_loss": 0.8521152138710022, |
| "eval_runtime": 95.6741, |
| "eval_samples_per_second": 128.164, |
| "eval_steps_per_second": 4.014, |
| "step": 24500 |
| }, |
| { |
| "epoch": 16.307893020221787, |
| "grad_norm": 3.419949769973755, |
| "learning_rate": 7.500000000000001e-05, |
| "loss": 0.7583, |
| "step": 25000 |
| }, |
| { |
| "epoch": 16.307893020221787, |
| "eval_accuracy": 0.8170063016613471, |
| "eval_loss": 0.8535459041595459, |
| "eval_runtime": 95.3894, |
| "eval_samples_per_second": 128.547, |
| "eval_steps_per_second": 4.026, |
| "step": 25000 |
| }, |
| { |
| "epoch": 16.634050880626223, |
| "grad_norm": 3.9244208335876465, |
| "learning_rate": 7.450000000000001e-05, |
| "loss": 0.7598, |
| "step": 25500 |
| }, |
| { |
| "epoch": 16.634050880626223, |
| "eval_accuracy": 0.8164982668338373, |
| "eval_loss": 0.8608457446098328, |
| "eval_runtime": 96.0993, |
| "eval_samples_per_second": 127.597, |
| "eval_steps_per_second": 3.996, |
| "step": 25500 |
| }, |
| { |
| "epoch": 16.96020874103066, |
| "grad_norm": 3.6305317878723145, |
| "learning_rate": 7.4e-05, |
| "loss": 0.7802, |
| "step": 26000 |
| }, |
| { |
| "epoch": 16.96020874103066, |
| "eval_accuracy": 0.8151692519974184, |
| "eval_loss": 0.8590179681777954, |
| "eval_runtime": 95.6373, |
| "eval_samples_per_second": 128.214, |
| "eval_steps_per_second": 4.015, |
| "step": 26000 |
| }, |
| { |
| "epoch": 17.286366601435095, |
| "grad_norm": 2.6497814655303955, |
| "learning_rate": 7.35e-05, |
| "loss": 0.756, |
| "step": 26500 |
| }, |
| { |
| "epoch": 17.286366601435095, |
| "eval_accuracy": 0.817159829218489, |
| "eval_loss": 0.8574303388595581, |
| "eval_runtime": 96.134, |
| "eval_samples_per_second": 127.551, |
| "eval_steps_per_second": 3.994, |
| "step": 26500 |
| }, |
| { |
| "epoch": 17.61252446183953, |
| "grad_norm": 3.486994504928589, |
| "learning_rate": 7.3e-05, |
| "loss": 0.7676, |
| "step": 27000 |
| }, |
| { |
| "epoch": 17.61252446183953, |
| "eval_accuracy": 0.816771978631783, |
| "eval_loss": 0.8543539047241211, |
| "eval_runtime": 96.1458, |
| "eval_samples_per_second": 127.535, |
| "eval_steps_per_second": 3.994, |
| "step": 27000 |
| }, |
| { |
| "epoch": 17.938682322243967, |
| "grad_norm": 3.2243542671203613, |
| "learning_rate": 7.25e-05, |
| "loss": 0.7547, |
| "step": 27500 |
| }, |
| { |
| "epoch": 17.938682322243967, |
| "eval_accuracy": 0.8156156378296409, |
| "eval_loss": 0.8701485395431519, |
| "eval_runtime": 96.0383, |
| "eval_samples_per_second": 127.678, |
| "eval_steps_per_second": 3.998, |
| "step": 27500 |
| }, |
| { |
| "epoch": 18.264840182648403, |
| "grad_norm": 2.7559664249420166, |
| "learning_rate": 7.2e-05, |
| "loss": 0.7561, |
| "step": 28000 |
| }, |
| { |
| "epoch": 18.264840182648403, |
| "eval_accuracy": 0.8179125205918637, |
| "eval_loss": 0.8573756217956543, |
| "eval_runtime": 95.5829, |
| "eval_samples_per_second": 128.287, |
| "eval_steps_per_second": 4.017, |
| "step": 28000 |
| }, |
| { |
| "epoch": 18.59099804305284, |
| "grad_norm": 3.51413631439209, |
| "learning_rate": 7.15e-05, |
| "loss": 0.749, |
| "step": 28500 |
| }, |
| { |
| "epoch": 18.59099804305284, |
| "eval_accuracy": 0.8171728232288245, |
| "eval_loss": 0.8583955764770508, |
| "eval_runtime": 95.9961, |
| "eval_samples_per_second": 127.734, |
| "eval_steps_per_second": 4.0, |
| "step": 28500 |
| }, |
| { |
| "epoch": 18.917155903457274, |
| "grad_norm": 4.024128437042236, |
| "learning_rate": 7.1e-05, |
| "loss": 0.7537, |
| "step": 29000 |
| }, |
| { |
| "epoch": 18.917155903457274, |
| "eval_accuracy": 0.8167475998072432, |
| "eval_loss": 0.8595439195632935, |
| "eval_runtime": 95.6024, |
| "eval_samples_per_second": 128.26, |
| "eval_steps_per_second": 4.017, |
| "step": 29000 |
| }, |
| { |
| "epoch": 19.24331376386171, |
| "grad_norm": 3.6477324962615967, |
| "learning_rate": 7.05e-05, |
| "loss": 0.7487, |
| "step": 29500 |
| }, |
| { |
| "epoch": 19.24331376386171, |
| "eval_accuracy": 0.8184730854235579, |
| "eval_loss": 0.853278636932373, |
| "eval_runtime": 96.1748, |
| "eval_samples_per_second": 127.497, |
| "eval_steps_per_second": 3.993, |
| "step": 29500 |
| }, |
| { |
| "epoch": 19.569471624266146, |
| "grad_norm": 2.851353406906128, |
| "learning_rate": 7e-05, |
| "loss": 0.7344, |
| "step": 30000 |
| }, |
| { |
| "epoch": 19.569471624266146, |
| "eval_accuracy": 0.8171112162521809, |
| "eval_loss": 0.8451135158538818, |
| "eval_runtime": 96.1975, |
| "eval_samples_per_second": 127.467, |
| "eval_steps_per_second": 3.992, |
| "step": 30000 |
| }, |
| { |
| "epoch": 19.89562948467058, |
| "grad_norm": 3.1359875202178955, |
| "learning_rate": 6.95e-05, |
| "loss": 0.7403, |
| "step": 30500 |
| }, |
| { |
| "epoch": 19.89562948467058, |
| "eval_accuracy": 0.8189718509502995, |
| "eval_loss": 0.846347987651825, |
| "eval_runtime": 96.1126, |
| "eval_samples_per_second": 127.579, |
| "eval_steps_per_second": 3.995, |
| "step": 30500 |
| }, |
| { |
| "epoch": 20.221787345075015, |
| "grad_norm": 3.7490246295928955, |
| "learning_rate": 6.9e-05, |
| "loss": 0.7309, |
| "step": 31000 |
| }, |
| { |
| "epoch": 20.221787345075015, |
| "eval_accuracy": 0.818888083223582, |
| "eval_loss": 0.8462125658988953, |
| "eval_runtime": 95.5039, |
| "eval_samples_per_second": 128.393, |
| "eval_steps_per_second": 4.021, |
| "step": 31000 |
| }, |
| { |
| "epoch": 20.54794520547945, |
| "grad_norm": 4.7173590660095215, |
| "learning_rate": 6.850000000000001e-05, |
| "loss": 0.7343, |
| "step": 31500 |
| }, |
| { |
| "epoch": 20.54794520547945, |
| "eval_accuracy": 0.8196272538269742, |
| "eval_loss": 0.8415057063102722, |
| "eval_runtime": 95.5642, |
| "eval_samples_per_second": 128.312, |
| "eval_steps_per_second": 4.018, |
| "step": 31500 |
| }, |
| { |
| "epoch": 20.874103065883887, |
| "grad_norm": 2.962167501449585, |
| "learning_rate": 6.800000000000001e-05, |
| "loss": 0.7232, |
| "step": 32000 |
| }, |
| { |
| "epoch": 20.874103065883887, |
| "eval_accuracy": 0.8207100504439659, |
| "eval_loss": 0.8412309288978577, |
| "eval_runtime": 95.5324, |
| "eval_samples_per_second": 128.354, |
| "eval_steps_per_second": 4.02, |
| "step": 32000 |
| }, |
| { |
| "epoch": 21.200260926288323, |
| "grad_norm": 4.313079357147217, |
| "learning_rate": 6.750000000000001e-05, |
| "loss": 0.7354, |
| "step": 32500 |
| }, |
| { |
| "epoch": 21.200260926288323, |
| "eval_accuracy": 0.8217455730903164, |
| "eval_loss": 0.8336274027824402, |
| "eval_runtime": 96.048, |
| "eval_samples_per_second": 127.665, |
| "eval_steps_per_second": 3.998, |
| "step": 32500 |
| }, |
| { |
| "epoch": 21.52641878669276, |
| "grad_norm": 3.247512102127075, |
| "learning_rate": 6.7e-05, |
| "loss": 0.7157, |
| "step": 33000 |
| }, |
| { |
| "epoch": 21.52641878669276, |
| "eval_accuracy": 0.821757600384175, |
| "eval_loss": 0.8315772414207458, |
| "eval_runtime": 96.0261, |
| "eval_samples_per_second": 127.694, |
| "eval_steps_per_second": 3.999, |
| "step": 33000 |
| }, |
| { |
| "epoch": 21.852576647097194, |
| "grad_norm": 4.300323486328125, |
| "learning_rate": 6.65e-05, |
| "loss": 0.7122, |
| "step": 33500 |
| }, |
| { |
| "epoch": 21.852576647097194, |
| "eval_accuracy": 0.8213358263218032, |
| "eval_loss": 0.8339926600456238, |
| "eval_runtime": 96.0296, |
| "eval_samples_per_second": 127.69, |
| "eval_steps_per_second": 3.999, |
| "step": 33500 |
| }, |
| { |
| "epoch": 22.17873450750163, |
| "grad_norm": 3.0622804164886475, |
| "learning_rate": 6.6e-05, |
| "loss": 0.7069, |
| "step": 34000 |
| }, |
| { |
| "epoch": 22.17873450750163, |
| "eval_accuracy": 0.8231302606696578, |
| "eval_loss": 0.8308265805244446, |
| "eval_runtime": 96.0547, |
| "eval_samples_per_second": 127.656, |
| "eval_steps_per_second": 3.998, |
| "step": 34000 |
| }, |
| { |
| "epoch": 22.504892367906066, |
| "grad_norm": 2.5095720291137695, |
| "learning_rate": 6.55e-05, |
| "loss": 0.7033, |
| "step": 34500 |
| }, |
| { |
| "epoch": 22.504892367906066, |
| "eval_accuracy": 0.8223606059040972, |
| "eval_loss": 0.8291507959365845, |
| "eval_runtime": 96.0539, |
| "eval_samples_per_second": 127.658, |
| "eval_steps_per_second": 3.998, |
| "step": 34500 |
| }, |
| { |
| "epoch": 22.831050228310502, |
| "grad_norm": 5.5047526359558105, |
| "learning_rate": 6.500000000000001e-05, |
| "loss": 0.7065, |
| "step": 35000 |
| }, |
| { |
| "epoch": 22.831050228310502, |
| "eval_accuracy": 0.8222459148693573, |
| "eval_loss": 0.8312565684318542, |
| "eval_runtime": 95.5399, |
| "eval_samples_per_second": 128.344, |
| "eval_steps_per_second": 4.019, |
| "step": 35000 |
| }, |
| { |
| "epoch": 23.15720808871494, |
| "grad_norm": 4.183090686798096, |
| "learning_rate": 6.450000000000001e-05, |
| "loss": 0.7172, |
| "step": 35500 |
| }, |
| { |
| "epoch": 23.15720808871494, |
| "eval_accuracy": 0.8214502944798018, |
| "eval_loss": 0.8354819416999817, |
| "eval_runtime": 95.6311, |
| "eval_samples_per_second": 128.222, |
| "eval_steps_per_second": 4.015, |
| "step": 35500 |
| }, |
| { |
| "epoch": 23.483365949119374, |
| "grad_norm": 2.854464292526245, |
| "learning_rate": 6.400000000000001e-05, |
| "loss": 0.7011, |
| "step": 36000 |
| }, |
| { |
| "epoch": 23.483365949119374, |
| "eval_accuracy": 0.8203978144669242, |
| "eval_loss": 0.8381808400154114, |
| "eval_runtime": 95.5013, |
| "eval_samples_per_second": 128.396, |
| "eval_steps_per_second": 4.021, |
| "step": 36000 |
| }, |
| { |
| "epoch": 23.80952380952381, |
| "grad_norm": 3.004467725753784, |
| "learning_rate": 6.35e-05, |
| "loss": 0.7038, |
| "step": 36500 |
| }, |
| { |
| "epoch": 23.80952380952381, |
| "eval_accuracy": 0.8229179786185795, |
| "eval_loss": 0.8298040628433228, |
| "eval_runtime": 95.9071, |
| "eval_samples_per_second": 127.853, |
| "eval_steps_per_second": 4.004, |
| "step": 36500 |
| }, |
| { |
| "epoch": 24.135681669928246, |
| "grad_norm": 4.745896816253662, |
| "learning_rate": 6.3e-05, |
| "loss": 0.6954, |
| "step": 37000 |
| }, |
| { |
| "epoch": 24.135681669928246, |
| "eval_accuracy": 0.8235633886255924, |
| "eval_loss": 0.828513503074646, |
| "eval_runtime": 96.1965, |
| "eval_samples_per_second": 127.468, |
| "eval_steps_per_second": 3.992, |
| "step": 37000 |
| }, |
| { |
| "epoch": 24.461839530332682, |
| "grad_norm": 2.830902338027954, |
| "learning_rate": 6.25e-05, |
| "loss": 0.6934, |
| "step": 37500 |
| }, |
| { |
| "epoch": 24.461839530332682, |
| "eval_accuracy": 0.8210810246933818, |
| "eval_loss": 0.8337345123291016, |
| "eval_runtime": 96.1694, |
| "eval_samples_per_second": 127.504, |
| "eval_steps_per_second": 3.993, |
| "step": 37500 |
| }, |
| { |
| "epoch": 24.787997390737118, |
| "grad_norm": 4.337691783905029, |
| "learning_rate": 6.2e-05, |
| "loss": 0.7033, |
| "step": 38000 |
| }, |
| { |
| "epoch": 24.787997390737118, |
| "eval_accuracy": 0.8232294407589964, |
| "eval_loss": 0.833626925945282, |
| "eval_runtime": 96.0655, |
| "eval_samples_per_second": 127.642, |
| "eval_steps_per_second": 3.997, |
| "step": 38000 |
| }, |
| { |
| "epoch": 25.114155251141554, |
| "grad_norm": 3.392941951751709, |
| "learning_rate": 6.15e-05, |
| "loss": 0.6916, |
| "step": 38500 |
| }, |
| { |
| "epoch": 25.114155251141554, |
| "eval_accuracy": 0.823752687374898, |
| "eval_loss": 0.8312628865242004, |
| "eval_runtime": 96.059, |
| "eval_samples_per_second": 127.651, |
| "eval_steps_per_second": 3.998, |
| "step": 38500 |
| }, |
| { |
| "epoch": 25.44031311154599, |
| "grad_norm": 2.851759910583496, |
| "learning_rate": 6.1e-05, |
| "loss": 0.686, |
| "step": 39000 |
| }, |
| { |
| "epoch": 25.44031311154599, |
| "eval_accuracy": 0.8232236734329158, |
| "eval_loss": 0.832546055316925, |
| "eval_runtime": 95.5576, |
| "eval_samples_per_second": 128.321, |
| "eval_steps_per_second": 4.019, |
| "step": 39000 |
| }, |
| { |
| "epoch": 25.766470971950422, |
| "grad_norm": 3.790151596069336, |
| "learning_rate": 6.05e-05, |
| "loss": 0.6905, |
| "step": 39500 |
| }, |
| { |
| "epoch": 25.766470971950422, |
| "eval_accuracy": 0.823065086481128, |
| "eval_loss": 0.8281510472297668, |
| "eval_runtime": 95.5599, |
| "eval_samples_per_second": 128.317, |
| "eval_steps_per_second": 4.018, |
| "step": 39500 |
| }, |
| { |
| "epoch": 26.09262883235486, |
| "grad_norm": 6.393603324890137, |
| "learning_rate": 6e-05, |
| "loss": 0.691, |
| "step": 40000 |
| }, |
| { |
| "epoch": 26.09262883235486, |
| "eval_accuracy": 0.8235015047960742, |
| "eval_loss": 0.8384564518928528, |
| "eval_runtime": 95.5299, |
| "eval_samples_per_second": 128.358, |
| "eval_steps_per_second": 4.02, |
| "step": 40000 |
| }, |
| { |
| "epoch": 26.418786692759294, |
| "grad_norm": 4.260555267333984, |
| "learning_rate": 5.95e-05, |
| "loss": 0.6893, |
| "step": 40500 |
| }, |
| { |
| "epoch": 26.418786692759294, |
| "eval_accuracy": 0.8240677513211805, |
| "eval_loss": 0.8298010230064392, |
| "eval_runtime": 95.5669, |
| "eval_samples_per_second": 128.308, |
| "eval_steps_per_second": 4.018, |
| "step": 40500 |
| }, |
| { |
| "epoch": 26.74494455316373, |
| "grad_norm": 2.733369827270508, |
| "learning_rate": 5.9e-05, |
| "loss": 0.6701, |
| "step": 41000 |
| }, |
| { |
| "epoch": 26.74494455316373, |
| "eval_accuracy": 0.8248589664707147, |
| "eval_loss": 0.8331744074821472, |
| "eval_runtime": 95.5789, |
| "eval_samples_per_second": 128.292, |
| "eval_steps_per_second": 4.018, |
| "step": 41000 |
| }, |
| { |
| "epoch": 27.071102413568166, |
| "grad_norm": 4.225335597991943, |
| "learning_rate": 5.85e-05, |
| "loss": 0.671, |
| "step": 41500 |
| }, |
| { |
| "epoch": 27.071102413568166, |
| "eval_accuracy": 0.8248499337562081, |
| "eval_loss": 0.8276962041854858, |
| "eval_runtime": 95.6366, |
| "eval_samples_per_second": 128.214, |
| "eval_steps_per_second": 4.015, |
| "step": 41500 |
| }, |
| { |
| "epoch": 27.397260273972602, |
| "grad_norm": 3.768827199935913, |
| "learning_rate": 5.8e-05, |
| "loss": 0.6702, |
| "step": 42000 |
| }, |
| { |
| "epoch": 27.397260273972602, |
| "eval_accuracy": 0.8253913197493783, |
| "eval_loss": 0.8237897753715515, |
| "eval_runtime": 95.5458, |
| "eval_samples_per_second": 128.336, |
| "eval_steps_per_second": 4.019, |
| "step": 42000 |
| }, |
| { |
| "epoch": 27.723418134377038, |
| "grad_norm": 3.414738178253174, |
| "learning_rate": 5.7499999999999995e-05, |
| "loss": 0.673, |
| "step": 42500 |
| }, |
| { |
| "epoch": 27.723418134377038, |
| "eval_accuracy": 0.8261082246911201, |
| "eval_loss": 0.8088709712028503, |
| "eval_runtime": 95.7571, |
| "eval_samples_per_second": 128.053, |
| "eval_steps_per_second": 4.01, |
| "step": 42500 |
| }, |
| { |
| "epoch": 28.049575994781474, |
| "grad_norm": 3.5771758556365967, |
| "learning_rate": 5.6999999999999996e-05, |
| "loss": 0.6634, |
| "step": 43000 |
| }, |
| { |
| "epoch": 28.049575994781474, |
| "eval_accuracy": 0.8264020093800707, |
| "eval_loss": 0.818773090839386, |
| "eval_runtime": 96.1431, |
| "eval_samples_per_second": 127.539, |
| "eval_steps_per_second": 3.994, |
| "step": 43000 |
| }, |
| { |
| "epoch": 28.37573385518591, |
| "grad_norm": 3.543851852416992, |
| "learning_rate": 5.65e-05, |
| "loss": 0.6646, |
| "step": 43500 |
| }, |
| { |
| "epoch": 28.37573385518591, |
| "eval_accuracy": 0.8269009920796627, |
| "eval_loss": 0.8075844049453735, |
| "eval_runtime": 96.1529, |
| "eval_samples_per_second": 127.526, |
| "eval_steps_per_second": 3.994, |
| "step": 43500 |
| }, |
| { |
| "epoch": 28.701891715590346, |
| "grad_norm": 2.2786142826080322, |
| "learning_rate": 5.6000000000000006e-05, |
| "loss": 0.6531, |
| "step": 44000 |
| }, |
| { |
| "epoch": 28.701891715590346, |
| "eval_accuracy": 0.8257994932295168, |
| "eval_loss": 0.8344977498054504, |
| "eval_runtime": 95.968, |
| "eval_samples_per_second": 127.772, |
| "eval_steps_per_second": 4.001, |
| "step": 44000 |
| }, |
| { |
| "epoch": 29.028049575994782, |
| "grad_norm": 1.8194258213043213, |
| "learning_rate": 5.550000000000001e-05, |
| "loss": 0.6614, |
| "step": 44500 |
| }, |
| { |
| "epoch": 29.028049575994782, |
| "eval_accuracy": 0.8255736392742796, |
| "eval_loss": 0.8443058729171753, |
| "eval_runtime": 95.5717, |
| "eval_samples_per_second": 128.302, |
| "eval_steps_per_second": 4.018, |
| "step": 44500 |
| }, |
| { |
| "epoch": 29.354207436399218, |
| "grad_norm": 3.3961451053619385, |
| "learning_rate": 5.500000000000001e-05, |
| "loss": 0.6449, |
| "step": 45000 |
| }, |
| { |
| "epoch": 29.354207436399218, |
| "eval_accuracy": 0.8267738583860818, |
| "eval_loss": 0.8181779384613037, |
| "eval_runtime": 95.6353, |
| "eval_samples_per_second": 128.216, |
| "eval_steps_per_second": 4.015, |
| "step": 45000 |
| }, |
| { |
| "epoch": 29.680365296803654, |
| "grad_norm": 2.9679033756256104, |
| "learning_rate": 5.45e-05, |
| "loss": 0.6635, |
| "step": 45500 |
| }, |
| { |
| "epoch": 29.680365296803654, |
| "eval_accuracy": 0.8279328287606433, |
| "eval_loss": 0.8159452676773071, |
| "eval_runtime": 95.4111, |
| "eval_samples_per_second": 128.517, |
| "eval_steps_per_second": 4.025, |
| "step": 45500 |
| }, |
| { |
| "epoch": 30.00652315720809, |
| "grad_norm": 2.6850576400756836, |
| "learning_rate": 5.4000000000000005e-05, |
| "loss": 0.6447, |
| "step": 46000 |
| }, |
| { |
| "epoch": 30.00652315720809, |
| "eval_accuracy": 0.8257240166507135, |
| "eval_loss": 0.8173608183860779, |
| "eval_runtime": 96.0834, |
| "eval_samples_per_second": 127.618, |
| "eval_steps_per_second": 3.997, |
| "step": 46000 |
| }, |
| { |
| "epoch": 30.332681017612526, |
| "grad_norm": 3.2325327396392822, |
| "learning_rate": 5.3500000000000006e-05, |
| "loss": 0.6485, |
| "step": 46500 |
| }, |
| { |
| "epoch": 30.332681017612526, |
| "eval_accuracy": 0.8278799641462891, |
| "eval_loss": 0.8071762919425964, |
| "eval_runtime": 95.5652, |
| "eval_samples_per_second": 128.31, |
| "eval_steps_per_second": 4.018, |
| "step": 46500 |
| }, |
| { |
| "epoch": 30.65883887801696, |
| "grad_norm": 2.906803607940674, |
| "learning_rate": 5.300000000000001e-05, |
| "loss": 0.6469, |
| "step": 47000 |
| }, |
| { |
| "epoch": 30.65883887801696, |
| "eval_accuracy": 0.8256298324146351, |
| "eval_loss": 0.8320009112358093, |
| "eval_runtime": 96.0736, |
| "eval_samples_per_second": 127.631, |
| "eval_steps_per_second": 3.997, |
| "step": 47000 |
| }, |
| { |
| "epoch": 30.984996738421398, |
| "grad_norm": 3.0411088466644287, |
| "learning_rate": 5.25e-05, |
| "loss": 0.646, |
| "step": 47500 |
| }, |
| { |
| "epoch": 30.984996738421398, |
| "eval_accuracy": 0.8279118105560092, |
| "eval_loss": 0.8084993958473206, |
| "eval_runtime": 96.0838, |
| "eval_samples_per_second": 127.618, |
| "eval_steps_per_second": 3.997, |
| "step": 47500 |
| }, |
| { |
| "epoch": 31.31115459882583, |
| "grad_norm": 2.555142879486084, |
| "learning_rate": 5.2000000000000004e-05, |
| "loss": 0.6406, |
| "step": 48000 |
| }, |
| { |
| "epoch": 31.31115459882583, |
| "eval_accuracy": 0.8262719717693807, |
| "eval_loss": 0.8186313509941101, |
| "eval_runtime": 96.129, |
| "eval_samples_per_second": 127.558, |
| "eval_steps_per_second": 3.995, |
| "step": 48000 |
| }, |
| { |
| "epoch": 31.637312459230266, |
| "grad_norm": 3.7424721717834473, |
| "learning_rate": 5.1500000000000005e-05, |
| "loss": 0.6288, |
| "step": 48500 |
| }, |
| { |
| "epoch": 31.637312459230266, |
| "eval_accuracy": 0.8274525815671282, |
| "eval_loss": 0.8299573659896851, |
| "eval_runtime": 95.5383, |
| "eval_samples_per_second": 128.346, |
| "eval_steps_per_second": 4.019, |
| "step": 48500 |
| }, |
| { |
| "epoch": 31.963470319634702, |
| "grad_norm": 3.879730701446533, |
| "learning_rate": 5.1000000000000006e-05, |
| "loss": 0.65, |
| "step": 49000 |
| }, |
| { |
| "epoch": 31.963470319634702, |
| "eval_accuracy": 0.8267922127873606, |
| "eval_loss": 0.8268994092941284, |
| "eval_runtime": 96.0503, |
| "eval_samples_per_second": 127.662, |
| "eval_steps_per_second": 3.998, |
| "step": 49000 |
| }, |
| { |
| "epoch": 32.28962818003914, |
| "grad_norm": 3.536586046218872, |
| "learning_rate": 5.05e-05, |
| "loss": 0.6274, |
| "step": 49500 |
| }, |
| { |
| "epoch": 32.28962818003914, |
| "eval_accuracy": 0.8275685236934419, |
| "eval_loss": 0.8197194933891296, |
| "eval_runtime": 95.5627, |
| "eval_samples_per_second": 128.314, |
| "eval_steps_per_second": 4.018, |
| "step": 49500 |
| }, |
| { |
| "epoch": 32.615786040443574, |
| "grad_norm": 3.459678888320923, |
| "learning_rate": 5e-05, |
| "loss": 0.6356, |
| "step": 50000 |
| }, |
| { |
| "epoch": 32.615786040443574, |
| "eval_accuracy": 0.8304228315712264, |
| "eval_loss": 0.8121655583381653, |
| "eval_runtime": 96.0794, |
| "eval_samples_per_second": 127.624, |
| "eval_steps_per_second": 3.997, |
| "step": 50000 |
| }, |
| { |
| "epoch": 32.94194390084801, |
| "grad_norm": 2.755117177963257, |
| "learning_rate": 4.9500000000000004e-05, |
| "loss": 0.635, |
| "step": 50500 |
| }, |
| { |
| "epoch": 32.94194390084801, |
| "eval_accuracy": 0.828548567533728, |
| "eval_loss": 0.8212075233459473, |
| "eval_runtime": 96.1005, |
| "eval_samples_per_second": 127.596, |
| "eval_steps_per_second": 3.996, |
| "step": 50500 |
| }, |
| { |
| "epoch": 33.268101761252446, |
| "grad_norm": 3.8209736347198486, |
| "learning_rate": 4.9e-05, |
| "loss": 0.6281, |
| "step": 51000 |
| }, |
| { |
| "epoch": 33.268101761252446, |
| "eval_accuracy": 0.8276856793583589, |
| "eval_loss": 0.8094030022621155, |
| "eval_runtime": 95.616, |
| "eval_samples_per_second": 128.242, |
| "eval_steps_per_second": 4.016, |
| "step": 51000 |
| }, |
| { |
| "epoch": 33.59425962165688, |
| "grad_norm": 3.2926814556121826, |
| "learning_rate": 4.85e-05, |
| "loss": 0.6307, |
| "step": 51500 |
| }, |
| { |
| "epoch": 33.59425962165688, |
| "eval_accuracy": 0.8286809543543899, |
| "eval_loss": 0.8193591237068176, |
| "eval_runtime": 95.5699, |
| "eval_samples_per_second": 128.304, |
| "eval_steps_per_second": 4.018, |
| "step": 51500 |
| }, |
| { |
| "epoch": 33.92041748206132, |
| "grad_norm": 3.2185568809509277, |
| "learning_rate": 4.8e-05, |
| "loss": 0.6321, |
| "step": 52000 |
| }, |
| { |
| "epoch": 33.92041748206132, |
| "eval_accuracy": 0.8282692108265063, |
| "eval_loss": 0.8121231198310852, |
| "eval_runtime": 96.125, |
| "eval_samples_per_second": 127.563, |
| "eval_steps_per_second": 3.995, |
| "step": 52000 |
| }, |
| { |
| "epoch": 34.24657534246575, |
| "grad_norm": 3.80593204498291, |
| "learning_rate": 4.75e-05, |
| "loss": 0.6193, |
| "step": 52500 |
| }, |
| { |
| "epoch": 34.24657534246575, |
| "eval_accuracy": 0.829132275838543, |
| "eval_loss": 0.820734441280365, |
| "eval_runtime": 96.0513, |
| "eval_samples_per_second": 127.661, |
| "eval_steps_per_second": 3.998, |
| "step": 52500 |
| }, |
| { |
| "epoch": 34.57273320287019, |
| "grad_norm": 2.613852024078369, |
| "learning_rate": 4.7e-05, |
| "loss": 0.623, |
| "step": 53000 |
| }, |
| { |
| "epoch": 34.57273320287019, |
| "eval_accuracy": 0.8284756446565424, |
| "eval_loss": 0.8152901530265808, |
| "eval_runtime": 95.6772, |
| "eval_samples_per_second": 128.16, |
| "eval_steps_per_second": 4.013, |
| "step": 53000 |
| }, |
| { |
| "epoch": 34.898891063274625, |
| "grad_norm": 3.2367916107177734, |
| "learning_rate": 4.6500000000000005e-05, |
| "loss": 0.6145, |
| "step": 53500 |
| }, |
| { |
| "epoch": 34.898891063274625, |
| "eval_accuracy": 0.8289422000874521, |
| "eval_loss": 0.8108070492744446, |
| "eval_runtime": 96.0171, |
| "eval_samples_per_second": 127.706, |
| "eval_steps_per_second": 3.999, |
| "step": 53500 |
| }, |
| { |
| "epoch": 35.22504892367906, |
| "grad_norm": 3.648937225341797, |
| "learning_rate": 4.600000000000001e-05, |
| "loss": 0.6239, |
| "step": 54000 |
| }, |
| { |
| "epoch": 35.22504892367906, |
| "eval_accuracy": 0.8282779590625927, |
| "eval_loss": 0.823903501033783, |
| "eval_runtime": 95.6837, |
| "eval_samples_per_second": 128.151, |
| "eval_steps_per_second": 4.013, |
| "step": 54000 |
| }, |
| { |
| "epoch": 35.5512067840835, |
| "grad_norm": 3.7973756790161133, |
| "learning_rate": 4.55e-05, |
| "loss": 0.6156, |
| "step": 54500 |
| }, |
| { |
| "epoch": 35.5512067840835, |
| "eval_accuracy": 0.8285820066641985, |
| "eval_loss": 0.8196555972099304, |
| "eval_runtime": 96.1175, |
| "eval_samples_per_second": 127.573, |
| "eval_steps_per_second": 3.995, |
| "step": 54500 |
| }, |
| { |
| "epoch": 35.87736464448793, |
| "grad_norm": 2.7111735343933105, |
| "learning_rate": 4.5e-05, |
| "loss": 0.6145, |
| "step": 55000 |
| }, |
| { |
| "epoch": 35.87736464448793, |
| "eval_accuracy": 0.8307134489048389, |
| "eval_loss": 0.8130238652229309, |
| "eval_runtime": 95.5978, |
| "eval_samples_per_second": 128.266, |
| "eval_steps_per_second": 4.017, |
| "step": 55000 |
| }, |
| { |
| "epoch": 36.20352250489237, |
| "grad_norm": 3.3603804111480713, |
| "learning_rate": 4.4500000000000004e-05, |
| "loss": 0.6039, |
| "step": 55500 |
| }, |
| { |
| "epoch": 36.20352250489237, |
| "eval_accuracy": 0.8315611388544674, |
| "eval_loss": 0.8143065571784973, |
| "eval_runtime": 95.5067, |
| "eval_samples_per_second": 128.389, |
| "eval_steps_per_second": 4.021, |
| "step": 55500 |
| }, |
| { |
| "epoch": 36.529680365296805, |
| "grad_norm": 4.012826919555664, |
| "learning_rate": 4.4000000000000006e-05, |
| "loss": 0.6146, |
| "step": 56000 |
| }, |
| { |
| "epoch": 36.529680365296805, |
| "eval_accuracy": 0.8305585729381424, |
| "eval_loss": 0.8082120418548584, |
| "eval_runtime": 96.1141, |
| "eval_samples_per_second": 127.578, |
| "eval_steps_per_second": 3.995, |
| "step": 56000 |
| }, |
| { |
| "epoch": 36.85583822570124, |
| "grad_norm": 3.831219434738159, |
| "learning_rate": 4.35e-05, |
| "loss": 0.6008, |
| "step": 56500 |
| }, |
| { |
| "epoch": 36.85583822570124, |
| "eval_accuracy": 0.8321564326943739, |
| "eval_loss": 0.8039395213127136, |
| "eval_runtime": 95.527, |
| "eval_samples_per_second": 128.362, |
| "eval_steps_per_second": 4.02, |
| "step": 56500 |
| }, |
| { |
| "epoch": 37.18199608610568, |
| "grad_norm": 3.3420674800872803, |
| "learning_rate": 4.3e-05, |
| "loss": 0.6025, |
| "step": 57000 |
| }, |
| { |
| "epoch": 37.18199608610568, |
| "eval_accuracy": 0.831398227733968, |
| "eval_loss": 0.8035178780555725, |
| "eval_runtime": 95.5597, |
| "eval_samples_per_second": 128.318, |
| "eval_steps_per_second": 4.018, |
| "step": 57000 |
| }, |
| { |
| "epoch": 37.50815394651011, |
| "grad_norm": 3.31644344329834, |
| "learning_rate": 4.25e-05, |
| "loss": 0.6014, |
| "step": 57500 |
| }, |
| { |
| "epoch": 37.50815394651011, |
| "eval_accuracy": 0.8313993123480942, |
| "eval_loss": 0.8076711893081665, |
| "eval_runtime": 95.5701, |
| "eval_samples_per_second": 128.304, |
| "eval_steps_per_second": 4.018, |
| "step": 57500 |
| }, |
| { |
| "epoch": 37.83431180691455, |
| "grad_norm": 2.9908030033111572, |
| "learning_rate": 4.2e-05, |
| "loss": 0.5964, |
| "step": 58000 |
| }, |
| { |
| "epoch": 37.83431180691455, |
| "eval_accuracy": 0.8307974621476649, |
| "eval_loss": 0.8147750496864319, |
| "eval_runtime": 95.5728, |
| "eval_samples_per_second": 128.3, |
| "eval_steps_per_second": 4.018, |
| "step": 58000 |
| }, |
| { |
| "epoch": 38.160469667318985, |
| "grad_norm": 3.3767218589782715, |
| "learning_rate": 4.15e-05, |
| "loss": 0.5919, |
| "step": 58500 |
| }, |
| { |
| "epoch": 38.160469667318985, |
| "eval_accuracy": 0.8321526738027666, |
| "eval_loss": 0.8096674084663391, |
| "eval_runtime": 95.502, |
| "eval_samples_per_second": 128.395, |
| "eval_steps_per_second": 4.021, |
| "step": 58500 |
| }, |
| { |
| "epoch": 38.48662752772342, |
| "grad_norm": 3.17134428024292, |
| "learning_rate": 4.1e-05, |
| "loss": 0.5915, |
| "step": 59000 |
| }, |
| { |
| "epoch": 38.48662752772342, |
| "eval_accuracy": 0.8314462895539176, |
| "eval_loss": 0.8136842846870422, |
| "eval_runtime": 96.17, |
| "eval_samples_per_second": 127.503, |
| "eval_steps_per_second": 3.993, |
| "step": 59000 |
| }, |
| { |
| "epoch": 38.81278538812786, |
| "grad_norm": 2.8910727500915527, |
| "learning_rate": 4.05e-05, |
| "loss": 0.6036, |
| "step": 59500 |
| }, |
| { |
| "epoch": 38.81278538812786, |
| "eval_accuracy": 0.8307667164389769, |
| "eval_loss": 0.8048545718193054, |
| "eval_runtime": 96.2445, |
| "eval_samples_per_second": 127.405, |
| "eval_steps_per_second": 3.99, |
| "step": 59500 |
| }, |
| { |
| "epoch": 39.13894324853229, |
| "grad_norm": 3.3615550994873047, |
| "learning_rate": 4e-05, |
| "loss": 0.5865, |
| "step": 60000 |
| }, |
| { |
| "epoch": 39.13894324853229, |
| "eval_accuracy": 0.8312174034922063, |
| "eval_loss": 0.8044449090957642, |
| "eval_runtime": 95.555, |
| "eval_samples_per_second": 128.324, |
| "eval_steps_per_second": 4.019, |
| "step": 60000 |
| }, |
| { |
| "epoch": 39.46510110893673, |
| "grad_norm": 2.6367311477661133, |
| "learning_rate": 3.9500000000000005e-05, |
| "loss": 0.588, |
| "step": 60500 |
| }, |
| { |
| "epoch": 39.46510110893673, |
| "eval_accuracy": 0.8304663566029479, |
| "eval_loss": 0.821357250213623, |
| "eval_runtime": 95.9701, |
| "eval_samples_per_second": 127.769, |
| "eval_steps_per_second": 4.001, |
| "step": 60500 |
| }, |
| { |
| "epoch": 39.79125896934116, |
| "grad_norm": 2.833712100982666, |
| "learning_rate": 3.9000000000000006e-05, |
| "loss": 0.586, |
| "step": 61000 |
| }, |
| { |
| "epoch": 39.79125896934116, |
| "eval_accuracy": 0.829917319891339, |
| "eval_loss": 0.8134341239929199, |
| "eval_runtime": 95.9703, |
| "eval_samples_per_second": 127.769, |
| "eval_steps_per_second": 4.001, |
| "step": 61000 |
| }, |
| { |
| "epoch": 40.11741682974559, |
| "grad_norm": 2.9397034645080566, |
| "learning_rate": 3.85e-05, |
| "loss": 0.571, |
| "step": 61500 |
| }, |
| { |
| "epoch": 40.11741682974559, |
| "eval_accuracy": 0.8292021602772011, |
| "eval_loss": 0.8118214011192322, |
| "eval_runtime": 95.6187, |
| "eval_samples_per_second": 128.239, |
| "eval_steps_per_second": 4.016, |
| "step": 61500 |
| }, |
| { |
| "epoch": 40.44357469015003, |
| "grad_norm": 2.5662078857421875, |
| "learning_rate": 3.8e-05, |
| "loss": 0.5749, |
| "step": 62000 |
| }, |
| { |
| "epoch": 40.44357469015003, |
| "eval_accuracy": 0.8333482202670716, |
| "eval_loss": 0.8056595325469971, |
| "eval_runtime": 96.04, |
| "eval_samples_per_second": 127.676, |
| "eval_steps_per_second": 3.998, |
| "step": 62000 |
| }, |
| { |
| "epoch": 40.769732550554465, |
| "grad_norm": 3.049619674682617, |
| "learning_rate": 3.7500000000000003e-05, |
| "loss": 0.5793, |
| "step": 62500 |
| }, |
| { |
| "epoch": 40.769732550554465, |
| "eval_accuracy": 0.8336420510760487, |
| "eval_loss": 0.8024507164955139, |
| "eval_runtime": 95.6023, |
| "eval_samples_per_second": 128.261, |
| "eval_steps_per_second": 4.017, |
| "step": 62500 |
| }, |
| { |
| "epoch": 41.0958904109589, |
| "grad_norm": 2.7201578617095947, |
| "learning_rate": 3.7e-05, |
| "loss": 0.5778, |
| "step": 63000 |
| }, |
| { |
| "epoch": 41.0958904109589, |
| "eval_accuracy": 0.8319174128616887, |
| "eval_loss": 0.8089193105697632, |
| "eval_runtime": 95.6644, |
| "eval_samples_per_second": 128.177, |
| "eval_steps_per_second": 4.014, |
| "step": 63000 |
| }, |
| { |
| "epoch": 41.42204827136334, |
| "grad_norm": 2.6951823234558105, |
| "learning_rate": 3.65e-05, |
| "loss": 0.5612, |
| "step": 63500 |
| }, |
| { |
| "epoch": 41.42204827136334, |
| "eval_accuracy": 0.8335347454422788, |
| "eval_loss": 0.80367112159729, |
| "eval_runtime": 95.6191, |
| "eval_samples_per_second": 128.238, |
| "eval_steps_per_second": 4.016, |
| "step": 63500 |
| }, |
| { |
| "epoch": 41.74820613176777, |
| "grad_norm": 4.181463718414307, |
| "learning_rate": 3.6e-05, |
| "loss": 0.573, |
| "step": 64000 |
| }, |
| { |
| "epoch": 41.74820613176777, |
| "eval_accuracy": 0.8344243842473971, |
| "eval_loss": 0.7983211278915405, |
| "eval_runtime": 95.629, |
| "eval_samples_per_second": 128.225, |
| "eval_steps_per_second": 4.016, |
| "step": 64000 |
| }, |
| { |
| "epoch": 42.07436399217221, |
| "grad_norm": 3.2401015758514404, |
| "learning_rate": 3.55e-05, |
| "loss": 0.5696, |
| "step": 64500 |
| }, |
| { |
| "epoch": 42.07436399217221, |
| "eval_accuracy": 0.8310029766521158, |
| "eval_loss": 0.8181082606315613, |
| "eval_runtime": 96.0081, |
| "eval_samples_per_second": 127.718, |
| "eval_steps_per_second": 4.0, |
| "step": 64500 |
| }, |
| { |
| "epoch": 42.400521852576645, |
| "grad_norm": 3.277033805847168, |
| "learning_rate": 3.5e-05, |
| "loss": 0.564, |
| "step": 65000 |
| }, |
| { |
| "epoch": 42.400521852576645, |
| "eval_accuracy": 0.8335396635951874, |
| "eval_loss": 0.8013662695884705, |
| "eval_runtime": 95.6901, |
| "eval_samples_per_second": 128.143, |
| "eval_steps_per_second": 4.013, |
| "step": 65000 |
| }, |
| { |
| "epoch": 42.72667971298108, |
| "grad_norm": 4.847507476806641, |
| "learning_rate": 3.45e-05, |
| "loss": 0.5686, |
| "step": 65500 |
| }, |
| { |
| "epoch": 42.72667971298108, |
| "eval_accuracy": 0.833032886000638, |
| "eval_loss": 0.8119781613349915, |
| "eval_runtime": 96.1047, |
| "eval_samples_per_second": 127.59, |
| "eval_steps_per_second": 3.996, |
| "step": 65500 |
| }, |
| { |
| "epoch": 43.05283757338552, |
| "grad_norm": 3.5789191722869873, |
| "learning_rate": 3.4000000000000007e-05, |
| "loss": 0.5599, |
| "step": 66000 |
| }, |
| { |
| "epoch": 43.05283757338552, |
| "eval_accuracy": 0.8339582654059341, |
| "eval_loss": 0.8123458027839661, |
| "eval_runtime": 96.2815, |
| "eval_samples_per_second": 127.356, |
| "eval_steps_per_second": 3.988, |
| "step": 66000 |
| }, |
| { |
| "epoch": 43.37899543378995, |
| "grad_norm": 3.250049352645874, |
| "learning_rate": 3.35e-05, |
| "loss": 0.5555, |
| "step": 66500 |
| }, |
| { |
| "epoch": 43.37899543378995, |
| "eval_accuracy": 0.8328555366091573, |
| "eval_loss": 0.8025438785552979, |
| "eval_runtime": 96.065, |
| "eval_samples_per_second": 127.643, |
| "eval_steps_per_second": 3.997, |
| "step": 66500 |
| }, |
| { |
| "epoch": 43.70515329419439, |
| "grad_norm": 2.0880448818206787, |
| "learning_rate": 3.3e-05, |
| "loss": 0.5599, |
| "step": 67000 |
| }, |
| { |
| "epoch": 43.70515329419439, |
| "eval_accuracy": 0.8333740804125086, |
| "eval_loss": 0.8076988458633423, |
| "eval_runtime": 95.9358, |
| "eval_samples_per_second": 127.815, |
| "eval_steps_per_second": 4.003, |
| "step": 67000 |
| }, |
| { |
| "epoch": 44.031311154598825, |
| "grad_norm": 4.393618106842041, |
| "learning_rate": 3.2500000000000004e-05, |
| "loss": 0.557, |
| "step": 67500 |
| }, |
| { |
| "epoch": 44.031311154598825, |
| "eval_accuracy": 0.8331855464420306, |
| "eval_loss": 0.8098081946372986, |
| "eval_runtime": 95.4292, |
| "eval_samples_per_second": 128.493, |
| "eval_steps_per_second": 4.024, |
| "step": 67500 |
| }, |
| { |
| "epoch": 44.35746901500326, |
| "grad_norm": 5.500895023345947, |
| "learning_rate": 3.2000000000000005e-05, |
| "loss": 0.5488, |
| "step": 68000 |
| }, |
| { |
| "epoch": 44.35746901500326, |
| "eval_accuracy": 0.8343688661284848, |
| "eval_loss": 0.8053340315818787, |
| "eval_runtime": 95.5428, |
| "eval_samples_per_second": 128.34, |
| "eval_steps_per_second": 4.019, |
| "step": 68000 |
| }, |
| { |
| "epoch": 44.6836268754077, |
| "grad_norm": 2.8524134159088135, |
| "learning_rate": 3.15e-05, |
| "loss": 0.5504, |
| "step": 68500 |
| }, |
| { |
| "epoch": 44.6836268754077, |
| "eval_accuracy": 0.8342111492484318, |
| "eval_loss": 0.7959148287773132, |
| "eval_runtime": 96.1022, |
| "eval_samples_per_second": 127.593, |
| "eval_steps_per_second": 3.996, |
| "step": 68500 |
| }, |
| { |
| "epoch": 45.00978473581213, |
| "grad_norm": 3.5730416774749756, |
| "learning_rate": 3.1e-05, |
| "loss": 0.5505, |
| "step": 69000 |
| }, |
| { |
| "epoch": 45.00978473581213, |
| "eval_accuracy": 0.836165363858391, |
| "eval_loss": 0.7981916069984436, |
| "eval_runtime": 95.9912, |
| "eval_samples_per_second": 127.741, |
| "eval_steps_per_second": 4.0, |
| "step": 69000 |
| }, |
| { |
| "epoch": 45.33594259621657, |
| "grad_norm": 4.396024703979492, |
| "learning_rate": 3.05e-05, |
| "loss": 0.5486, |
| "step": 69500 |
| }, |
| { |
| "epoch": 45.33594259621657, |
| "eval_accuracy": 0.8332422855455485, |
| "eval_loss": 0.803587794303894, |
| "eval_runtime": 95.7722, |
| "eval_samples_per_second": 128.033, |
| "eval_steps_per_second": 4.01, |
| "step": 69500 |
| }, |
| { |
| "epoch": 45.662100456621005, |
| "grad_norm": 2.496528387069702, |
| "learning_rate": 3e-05, |
| "loss": 0.5448, |
| "step": 70000 |
| }, |
| { |
| "epoch": 45.662100456621005, |
| "eval_accuracy": 0.8342580056510478, |
| "eval_loss": 0.807097315788269, |
| "eval_runtime": 95.754, |
| "eval_samples_per_second": 128.057, |
| "eval_steps_per_second": 4.01, |
| "step": 70000 |
| }, |
| { |
| "epoch": 45.98825831702544, |
| "grad_norm": 3.3834452629089355, |
| "learning_rate": 2.95e-05, |
| "loss": 0.5464, |
| "step": 70500 |
| }, |
| { |
| "epoch": 45.98825831702544, |
| "eval_accuracy": 0.8337018788400463, |
| "eval_loss": 0.8085704445838928, |
| "eval_runtime": 95.6878, |
| "eval_samples_per_second": 128.146, |
| "eval_steps_per_second": 4.013, |
| "step": 70500 |
| }, |
| { |
| "epoch": 46.31441617742988, |
| "grad_norm": 2.3634603023529053, |
| "learning_rate": 2.9e-05, |
| "loss": 0.5438, |
| "step": 71000 |
| }, |
| { |
| "epoch": 46.31441617742988, |
| "eval_accuracy": 0.8341511430834018, |
| "eval_loss": 0.8026093244552612, |
| "eval_runtime": 96.1617, |
| "eval_samples_per_second": 127.514, |
| "eval_steps_per_second": 3.993, |
| "step": 71000 |
| }, |
| { |
| "epoch": 46.64057403783431, |
| "grad_norm": 3.5233588218688965, |
| "learning_rate": 2.8499999999999998e-05, |
| "loss": 0.5394, |
| "step": 71500 |
| }, |
| { |
| "epoch": 46.64057403783431, |
| "eval_accuracy": 0.8342311802203471, |
| "eval_loss": 0.8086482882499695, |
| "eval_runtime": 96.1007, |
| "eval_samples_per_second": 127.595, |
| "eval_steps_per_second": 3.996, |
| "step": 71500 |
| }, |
| { |
| "epoch": 46.96673189823875, |
| "grad_norm": 2.4381179809570312, |
| "learning_rate": 2.8000000000000003e-05, |
| "loss": 0.5349, |
| "step": 72000 |
| }, |
| { |
| "epoch": 46.96673189823875, |
| "eval_accuracy": 0.8354184145463185, |
| "eval_loss": 0.805791437625885, |
| "eval_runtime": 96.0711, |
| "eval_samples_per_second": 127.635, |
| "eval_steps_per_second": 3.997, |
| "step": 72000 |
| }, |
| { |
| "epoch": 47.292889758643184, |
| "grad_norm": 3.008922576904297, |
| "learning_rate": 2.7500000000000004e-05, |
| "loss": 0.5327, |
| "step": 72500 |
| }, |
| { |
| "epoch": 47.292889758643184, |
| "eval_accuracy": 0.8362029367748818, |
| "eval_loss": 0.7979453206062317, |
| "eval_runtime": 96.1804, |
| "eval_samples_per_second": 127.49, |
| "eval_steps_per_second": 3.992, |
| "step": 72500 |
| }, |
| { |
| "epoch": 47.61904761904762, |
| "grad_norm": 3.482806444168091, |
| "learning_rate": 2.7000000000000002e-05, |
| "loss": 0.5334, |
| "step": 73000 |
| }, |
| { |
| "epoch": 47.61904761904762, |
| "eval_accuracy": 0.8365827562635599, |
| "eval_loss": 0.8000433444976807, |
| "eval_runtime": 95.6103, |
| "eval_samples_per_second": 128.25, |
| "eval_steps_per_second": 4.016, |
| "step": 73000 |
| }, |
| { |
| "epoch": 47.945205479452056, |
| "grad_norm": 2.3070437908172607, |
| "learning_rate": 2.6500000000000004e-05, |
| "loss": 0.5339, |
| "step": 73500 |
| }, |
| { |
| "epoch": 47.945205479452056, |
| "eval_accuracy": 0.8357628837739592, |
| "eval_loss": 0.8132687211036682, |
| "eval_runtime": 96.239, |
| "eval_samples_per_second": 127.412, |
| "eval_steps_per_second": 3.99, |
| "step": 73500 |
| }, |
| { |
| "epoch": 48.27136333985649, |
| "grad_norm": 3.9446592330932617, |
| "learning_rate": 2.6000000000000002e-05, |
| "loss": 0.523, |
| "step": 74000 |
| }, |
| { |
| "epoch": 48.27136333985649, |
| "eval_accuracy": 0.8368259826536262, |
| "eval_loss": 0.786713719367981, |
| "eval_runtime": 96.19, |
| "eval_samples_per_second": 127.477, |
| "eval_steps_per_second": 3.992, |
| "step": 74000 |
| }, |
| { |
| "epoch": 48.59752120026093, |
| "grad_norm": 3.631774663925171, |
| "learning_rate": 2.5500000000000003e-05, |
| "loss": 0.527, |
| "step": 74500 |
| }, |
| { |
| "epoch": 48.59752120026093, |
| "eval_accuracy": 0.8353441631588309, |
| "eval_loss": 0.7979721426963806, |
| "eval_runtime": 96.3234, |
| "eval_samples_per_second": 127.3, |
| "eval_steps_per_second": 3.987, |
| "step": 74500 |
| }, |
| { |
| "epoch": 48.923679060665364, |
| "grad_norm": 3.334470510482788, |
| "learning_rate": 2.5e-05, |
| "loss": 0.5318, |
| "step": 75000 |
| }, |
| { |
| "epoch": 48.923679060665364, |
| "eval_accuracy": 0.8345138108475779, |
| "eval_loss": 0.8020584583282471, |
| "eval_runtime": 96.0458, |
| "eval_samples_per_second": 127.668, |
| "eval_steps_per_second": 3.998, |
| "step": 75000 |
| }, |
| { |
| "epoch": 49.2498369210698, |
| "grad_norm": 3.5331332683563232, |
| "learning_rate": 2.45e-05, |
| "loss": 0.5207, |
| "step": 75500 |
| }, |
| { |
| "epoch": 49.2498369210698, |
| "eval_accuracy": 0.8387281134722294, |
| "eval_loss": 0.7923426628112793, |
| "eval_runtime": 96.1278, |
| "eval_samples_per_second": 127.559, |
| "eval_steps_per_second": 3.995, |
| "step": 75500 |
| }, |
| { |
| "epoch": 49.575994781474236, |
| "grad_norm": 3.9823110103607178, |
| "learning_rate": 2.4e-05, |
| "loss": 0.5209, |
| "step": 76000 |
| }, |
| { |
| "epoch": 49.575994781474236, |
| "eval_accuracy": 0.8363262984494693, |
| "eval_loss": 0.8024851083755493, |
| "eval_runtime": 96.1881, |
| "eval_samples_per_second": 127.479, |
| "eval_steps_per_second": 3.992, |
| "step": 76000 |
| }, |
| { |
| "epoch": 49.90215264187867, |
| "grad_norm": 3.4313673973083496, |
| "learning_rate": 2.35e-05, |
| "loss": 0.5201, |
| "step": 76500 |
| }, |
| { |
| "epoch": 49.90215264187867, |
| "eval_accuracy": 0.836565998919567, |
| "eval_loss": 0.8021891713142395, |
| "eval_runtime": 96.0086, |
| "eval_samples_per_second": 127.718, |
| "eval_steps_per_second": 4.0, |
| "step": 76500 |
| }, |
| { |
| "epoch": 50.22831050228311, |
| "grad_norm": 3.1152913570404053, |
| "learning_rate": 2.3000000000000003e-05, |
| "loss": 0.5139, |
| "step": 77000 |
| }, |
| { |
| "epoch": 50.22831050228311, |
| "eval_accuracy": 0.8371820454427441, |
| "eval_loss": 0.8075475692749023, |
| "eval_runtime": 96.1279, |
| "eval_samples_per_second": 127.559, |
| "eval_steps_per_second": 3.995, |
| "step": 77000 |
| }, |
| { |
| "epoch": 50.554468362687544, |
| "grad_norm": 4.154286861419678, |
| "learning_rate": 2.25e-05, |
| "loss": 0.5136, |
| "step": 77500 |
| }, |
| { |
| "epoch": 50.554468362687544, |
| "eval_accuracy": 0.8396914258560779, |
| "eval_loss": 0.7858642339706421, |
| "eval_runtime": 96.2083, |
| "eval_samples_per_second": 127.453, |
| "eval_steps_per_second": 3.991, |
| "step": 77500 |
| }, |
| { |
| "epoch": 50.88062622309198, |
| "grad_norm": 2.3929026126861572, |
| "learning_rate": 2.2000000000000003e-05, |
| "loss": 0.5116, |
| "step": 78000 |
| }, |
| { |
| "epoch": 50.88062622309198, |
| "eval_accuracy": 0.8367701163066699, |
| "eval_loss": 0.7931028604507446, |
| "eval_runtime": 95.7046, |
| "eval_samples_per_second": 128.123, |
| "eval_steps_per_second": 4.012, |
| "step": 78000 |
| }, |
| { |
| "epoch": 51.20678408349641, |
| "grad_norm": 3.0909852981567383, |
| "learning_rate": 2.15e-05, |
| "loss": 0.5189, |
| "step": 78500 |
| }, |
| { |
| "epoch": 51.20678408349641, |
| "eval_accuracy": 0.8370578517272915, |
| "eval_loss": 0.799592137336731, |
| "eval_runtime": 96.1212, |
| "eval_samples_per_second": 127.568, |
| "eval_steps_per_second": 3.995, |
| "step": 78500 |
| }, |
| { |
| "epoch": 51.532941943900845, |
| "grad_norm": 3.350635051727295, |
| "learning_rate": 2.1e-05, |
| "loss": 0.5095, |
| "step": 79000 |
| }, |
| { |
| "epoch": 51.532941943900845, |
| "eval_accuracy": 0.836364720171079, |
| "eval_loss": 0.8035129308700562, |
| "eval_runtime": 95.5727, |
| "eval_samples_per_second": 128.3, |
| "eval_steps_per_second": 4.018, |
| "step": 79000 |
| }, |
| { |
| "epoch": 51.85909980430528, |
| "grad_norm": 4.1038432121276855, |
| "learning_rate": 2.05e-05, |
| "loss": 0.51, |
| "step": 79500 |
| }, |
| { |
| "epoch": 51.85909980430528, |
| "eval_accuracy": 0.836879590639366, |
| "eval_loss": 0.8043432235717773, |
| "eval_runtime": 96.0977, |
| "eval_samples_per_second": 127.599, |
| "eval_steps_per_second": 3.996, |
| "step": 79500 |
| }, |
| { |
| "epoch": 52.18525766470972, |
| "grad_norm": 3.108510971069336, |
| "learning_rate": 2e-05, |
| "loss": 0.5036, |
| "step": 80000 |
| }, |
| { |
| "epoch": 52.18525766470972, |
| "eval_accuracy": 0.8368426134295879, |
| "eval_loss": 0.8010953664779663, |
| "eval_runtime": 95.5615, |
| "eval_samples_per_second": 128.315, |
| "eval_steps_per_second": 4.018, |
| "step": 80000 |
| }, |
| { |
| "epoch": 52.51141552511415, |
| "grad_norm": 3.1937355995178223, |
| "learning_rate": 1.9500000000000003e-05, |
| "loss": 0.5127, |
| "step": 80500 |
| }, |
| { |
| "epoch": 52.51141552511415, |
| "eval_accuracy": 0.8369965405101081, |
| "eval_loss": 0.7906477451324463, |
| "eval_runtime": 95.7155, |
| "eval_samples_per_second": 128.109, |
| "eval_steps_per_second": 4.012, |
| "step": 80500 |
| }, |
| { |
| "epoch": 52.83757338551859, |
| "grad_norm": 3.720996141433716, |
| "learning_rate": 1.9e-05, |
| "loss": 0.5035, |
| "step": 81000 |
| }, |
| { |
| "epoch": 52.83757338551859, |
| "eval_accuracy": 0.8397895174956149, |
| "eval_loss": 0.7947555184364319, |
| "eval_runtime": 95.6287, |
| "eval_samples_per_second": 128.225, |
| "eval_steps_per_second": 4.016, |
| "step": 81000 |
| }, |
| { |
| "epoch": 53.163731245923024, |
| "grad_norm": 2.5395772457122803, |
| "learning_rate": 1.85e-05, |
| "loss": 0.4958, |
| "step": 81500 |
| }, |
| { |
| "epoch": 53.163731245923024, |
| "eval_accuracy": 0.8374669324421457, |
| "eval_loss": 0.8044614791870117, |
| "eval_runtime": 96.0682, |
| "eval_samples_per_second": 127.638, |
| "eval_steps_per_second": 3.997, |
| "step": 81500 |
| }, |
| { |
| "epoch": 53.48988910632746, |
| "grad_norm": 3.4834647178649902, |
| "learning_rate": 1.8e-05, |
| "loss": 0.5056, |
| "step": 82000 |
| }, |
| { |
| "epoch": 53.48988910632746, |
| "eval_accuracy": 0.8400154459651129, |
| "eval_loss": 0.789566695690155, |
| "eval_runtime": 96.0293, |
| "eval_samples_per_second": 127.69, |
| "eval_steps_per_second": 3.999, |
| "step": 82000 |
| }, |
| { |
| "epoch": 53.816046966731896, |
| "grad_norm": 4.184600830078125, |
| "learning_rate": 1.75e-05, |
| "loss": 0.4952, |
| "step": 82500 |
| }, |
| { |
| "epoch": 53.816046966731896, |
| "eval_accuracy": 0.8376462588050942, |
| "eval_loss": 0.811129629611969, |
| "eval_runtime": 96.2175, |
| "eval_samples_per_second": 127.44, |
| "eval_steps_per_second": 3.991, |
| "step": 82500 |
| }, |
| { |
| "epoch": 54.14220482713633, |
| "grad_norm": 2.301683187484741, |
| "learning_rate": 1.7000000000000003e-05, |
| "loss": 0.4987, |
| "step": 83000 |
| }, |
| { |
| "epoch": 54.14220482713633, |
| "eval_accuracy": 0.8384409578370847, |
| "eval_loss": 0.7945307493209839, |
| "eval_runtime": 96.0056, |
| "eval_samples_per_second": 127.722, |
| "eval_steps_per_second": 4.0, |
| "step": 83000 |
| }, |
| { |
| "epoch": 54.46836268754077, |
| "grad_norm": 3.919367551803589, |
| "learning_rate": 1.65e-05, |
| "loss": 0.496, |
| "step": 83500 |
| }, |
| { |
| "epoch": 54.46836268754077, |
| "eval_accuracy": 0.8394707240542301, |
| "eval_loss": 0.7912357449531555, |
| "eval_runtime": 96.1449, |
| "eval_samples_per_second": 127.537, |
| "eval_steps_per_second": 3.994, |
| "step": 83500 |
| }, |
| { |
| "epoch": 54.794520547945204, |
| "grad_norm": 2.68684983253479, |
| "learning_rate": 1.6000000000000003e-05, |
| "loss": 0.4895, |
| "step": 84000 |
| }, |
| { |
| "epoch": 54.794520547945204, |
| "eval_accuracy": 0.8381230887985037, |
| "eval_loss": 0.788875162601471, |
| "eval_runtime": 96.2309, |
| "eval_samples_per_second": 127.423, |
| "eval_steps_per_second": 3.99, |
| "step": 84000 |
| }, |
| { |
| "epoch": 55.12067840834964, |
| "grad_norm": 4.197926044464111, |
| "learning_rate": 1.55e-05, |
| "loss": 0.4946, |
| "step": 84500 |
| }, |
| { |
| "epoch": 55.12067840834964, |
| "eval_accuracy": 0.8396828466447496, |
| "eval_loss": 0.7965431213378906, |
| "eval_runtime": 96.0388, |
| "eval_samples_per_second": 127.678, |
| "eval_steps_per_second": 3.998, |
| "step": 84500 |
| }, |
| { |
| "epoch": 55.446836268754076, |
| "grad_norm": 2.5157694816589355, |
| "learning_rate": 1.5e-05, |
| "loss": 0.4886, |
| "step": 85000 |
| }, |
| { |
| "epoch": 55.446836268754076, |
| "eval_accuracy": 0.8387194828110288, |
| "eval_loss": 0.8012397289276123, |
| "eval_runtime": 96.2067, |
| "eval_samples_per_second": 127.455, |
| "eval_steps_per_second": 3.991, |
| "step": 85000 |
| }, |
| { |
| "epoch": 55.77299412915851, |
| "grad_norm": 2.7158167362213135, |
| "learning_rate": 1.45e-05, |
| "loss": 0.484, |
| "step": 85500 |
| }, |
| { |
| "epoch": 55.77299412915851, |
| "eval_accuracy": 0.8395588257109374, |
| "eval_loss": 0.7894542813301086, |
| "eval_runtime": 95.6318, |
| "eval_samples_per_second": 128.221, |
| "eval_steps_per_second": 4.015, |
| "step": 85500 |
| }, |
| { |
| "epoch": 56.09915198956295, |
| "grad_norm": 3.833855390548706, |
| "learning_rate": 1.4000000000000001e-05, |
| "loss": 0.4921, |
| "step": 86000 |
| }, |
| { |
| "epoch": 56.09915198956295, |
| "eval_accuracy": 0.8383367532042704, |
| "eval_loss": 0.8041108250617981, |
| "eval_runtime": 95.6095, |
| "eval_samples_per_second": 128.251, |
| "eval_steps_per_second": 4.016, |
| "step": 86000 |
| }, |
| { |
| "epoch": 56.425309849967384, |
| "grad_norm": 3.353053331375122, |
| "learning_rate": 1.3500000000000001e-05, |
| "loss": 0.4866, |
| "step": 86500 |
| }, |
| { |
| "epoch": 56.425309849967384, |
| "eval_accuracy": 0.8387854971453993, |
| "eval_loss": 0.7873290181159973, |
| "eval_runtime": 96.098, |
| "eval_samples_per_second": 127.599, |
| "eval_steps_per_second": 3.996, |
| "step": 86500 |
| }, |
| { |
| "epoch": 56.75146771037182, |
| "grad_norm": 3.2117395401000977, |
| "learning_rate": 1.3000000000000001e-05, |
| "loss": 0.4878, |
| "step": 87000 |
| }, |
| { |
| "epoch": 56.75146771037182, |
| "eval_accuracy": 0.8396631660025774, |
| "eval_loss": 0.7974265813827515, |
| "eval_runtime": 95.6357, |
| "eval_samples_per_second": 128.216, |
| "eval_steps_per_second": 4.015, |
| "step": 87000 |
| }, |
| { |
| "epoch": 57.077625570776256, |
| "grad_norm": 3.84195613861084, |
| "learning_rate": 1.25e-05, |
| "loss": 0.4757, |
| "step": 87500 |
| }, |
| { |
| "epoch": 57.077625570776256, |
| "eval_accuracy": 0.8371895013512013, |
| "eval_loss": 0.8067038655281067, |
| "eval_runtime": 96.1865, |
| "eval_samples_per_second": 127.481, |
| "eval_steps_per_second": 3.992, |
| "step": 87500 |
| }, |
| { |
| "epoch": 57.40378343118069, |
| "grad_norm": 2.6807780265808105, |
| "learning_rate": 1.2e-05, |
| "loss": 0.4793, |
| "step": 88000 |
| }, |
| { |
| "epoch": 57.40378343118069, |
| "eval_accuracy": 0.840466263302501, |
| "eval_loss": 0.7817407250404358, |
| "eval_runtime": 96.1004, |
| "eval_samples_per_second": 127.596, |
| "eval_steps_per_second": 3.996, |
| "step": 88000 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 100000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 66, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 7.419534048504054e+17, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|