Invalid JSON:
Expected double-quoted property name in JSON
at line 4, column 1
| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| <<<<<<< HEAD | |
| "epoch": 408.505875769446, | |
| "global_step": 730000, | |
| ======= | |
| "epoch": 111.9194180190263, | |
| "global_step": 200000, | |
| >>>>>>> f31c512c34f45b1f3e7b799ebda1f4af417ffe1b | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| <<<<<<< HEAD | |
| "epoch": 5.6, | |
| "learning_rate": 2.3437499999999998e-07, | |
| "loss": 0.8947, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 5.6, | |
| "eval_loss": 0.7632947564125061, | |
| "eval_runtime": 76.0776, | |
| "eval_samples_per_second": 101.633, | |
| "eval_steps_per_second": 12.711, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 11.19, | |
| "learning_rate": 4.6874999999999996e-07, | |
| "loss": 0.7738, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 11.19, | |
| "eval_loss": 0.7603365182876587, | |
| "eval_runtime": 76.429, | |
| "eval_samples_per_second": 101.166, | |
| "eval_steps_per_second": 12.652, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 16.79, | |
| "learning_rate": 7.031249999999999e-07, | |
| "loss": 0.7725, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 16.79, | |
| "eval_loss": 0.7571617960929871, | |
| "eval_runtime": 76.146, | |
| "eval_samples_per_second": 101.542, | |
| "eval_steps_per_second": 12.699, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 22.38, | |
| "learning_rate": 9.374999999999999e-07, | |
| "loss": 0.7715, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 22.38, | |
| "eval_loss": 0.7568734884262085, | |
| "eval_runtime": 76.4339, | |
| "eval_samples_per_second": 101.159, | |
| "eval_steps_per_second": 12.651, | |
| ======= | |
| "epoch": 11.19, | |
| "learning_rate": 1.8749999999999998e-06, | |
| "loss": 0.8164, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 11.19, | |
| "eval_loss": 0.7568955421447754, | |
| "eval_runtime": 301.8174, | |
| "eval_samples_per_second": 25.618, | |
| "eval_steps_per_second": 1.604, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 22.37, | |
| "learning_rate": 3.7499999999999997e-06, | |
| "loss": 0.7702, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 22.37, | |
| "eval_loss": 0.7498099207878113, | |
| "eval_runtime": 249.204, | |
| "eval_samples_per_second": 31.027, | |
| "eval_steps_per_second": 1.942, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 33.56, | |
| "learning_rate": 5.6249999999999995e-06, | |
| "loss": 0.7668, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 33.56, | |
| "eval_loss": 0.7477062344551086, | |
| "eval_runtime": 189.0409, | |
| "eval_samples_per_second": 40.901, | |
| "eval_steps_per_second": 2.56, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 44.74, | |
| "learning_rate": 7.499999999999999e-06, | |
| "loss": 0.7655, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 44.74, | |
| "eval_loss": 0.7450574040412903, | |
| "eval_runtime": 66.2284, | |
| "eval_samples_per_second": 116.748, | |
| "eval_steps_per_second": 7.308, | |
| >>>>>>> f31c512c34f45b1f3e7b799ebda1f4af417ffe1b | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 27.98, | |
| <<<<<<< HEAD | |
| "learning_rate": 1.171875e-06, | |
| "loss": 0.7695, | |
| ======= | |
| "learning_rate": 9.375e-06, | |
| "loss": 0.7653, | |
| >>>>>>> f31c512c34f45b1f3e7b799ebda1f4af417ffe1b | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 27.98, | |
| <<<<<<< HEAD | |
| "eval_loss": 0.7500209212303162, | |
| "eval_runtime": 76.3619, | |
| "eval_samples_per_second": 101.255, | |
| "eval_steps_per_second": 12.663, | |
| ======= | |
| "eval_loss": 0.7478589415550232, | |
| "eval_runtime": 76.5001, | |
| "eval_samples_per_second": 101.072, | |
| "eval_steps_per_second": 12.641, | |
| >>>>>>> f31c512c34f45b1f3e7b799ebda1f4af417ffe1b | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 33.58, | |
| <<<<<<< HEAD | |
| "learning_rate": 1.4062499999999999e-06, | |
| "loss": 0.7688, | |
| ======= | |
| "learning_rate": 1.1249999999999999e-05, | |
| "loss": 0.7648, | |
| >>>>>>> f31c512c34f45b1f3e7b799ebda1f4af417ffe1b | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 33.58, | |
| <<<<<<< HEAD | |
| "eval_loss": 0.7491664886474609, | |
| "eval_runtime": 76.419, | |
| "eval_samples_per_second": 101.179, | |
| "eval_steps_per_second": 12.654, | |
| ======= | |
| "eval_loss": 0.7447686195373535, | |
| "eval_runtime": 76.3539, | |
| "eval_samples_per_second": 101.265, | |
| "eval_steps_per_second": 12.665, | |
| >>>>>>> f31c512c34f45b1f3e7b799ebda1f4af417ffe1b | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 39.17, | |
| <<<<<<< HEAD | |
| "learning_rate": 1.6406249999999999e-06, | |
| "loss": 0.768, | |
| ======= | |
| "learning_rate": 1.3124999999999999e-05, | |
| "loss": 0.7645, | |
| >>>>>>> f31c512c34f45b1f3e7b799ebda1f4af417ffe1b | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 39.17, | |
| <<<<<<< HEAD | |
| "eval_loss": 0.748078465461731, | |
| "eval_runtime": 76.5312, | |
| "eval_samples_per_second": 101.031, | |
| "eval_steps_per_second": 12.635, | |
| ======= | |
| "eval_loss": 0.7464274764060974, | |
| "eval_runtime": 76.7958, | |
| "eval_samples_per_second": 100.683, | |
| "eval_steps_per_second": 12.592, | |
| >>>>>>> f31c512c34f45b1f3e7b799ebda1f4af417ffe1b | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 44.77, | |
| <<<<<<< HEAD | |
| "learning_rate": 1.8749999999999998e-06, | |
| "loss": 0.7667, | |
| ======= | |
| "learning_rate": 1.4999999999999999e-05, | |
| "loss": 0.7642, | |
| >>>>>>> f31c512c34f45b1f3e7b799ebda1f4af417ffe1b | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 44.77, | |
| <<<<<<< HEAD | |
| "eval_loss": 0.7448051571846008, | |
| "eval_runtime": 76.5688, | |
| "eval_samples_per_second": 100.981, | |
| "eval_steps_per_second": 12.629, | |
| ======= | |
| "eval_loss": 0.7449608445167542, | |
| "eval_runtime": 122.9116, | |
| "eval_samples_per_second": 62.907, | |
| "eval_steps_per_second": 7.867, | |
| >>>>>>> f31c512c34f45b1f3e7b799ebda1f4af417ffe1b | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 50.36, | |
| <<<<<<< HEAD | |
| "learning_rate": 2.109375e-06, | |
| "loss": 0.7663, | |
| ======= | |
| "learning_rate": 1.6875e-05, | |
| "loss": 0.7636, | |
| >>>>>>> f31c512c34f45b1f3e7b799ebda1f4af417ffe1b | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 50.36, | |
| <<<<<<< HEAD | |
| "eval_loss": 0.7472007870674133, | |
| "eval_runtime": 76.5244, | |
| "eval_samples_per_second": 101.04, | |
| "eval_steps_per_second": 12.636, | |
| ======= | |
| "eval_loss": 0.7427342534065247, | |
| "eval_runtime": 76.4172, | |
| "eval_samples_per_second": 101.181, | |
| "eval_steps_per_second": 12.654, | |
| >>>>>>> f31c512c34f45b1f3e7b799ebda1f4af417ffe1b | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 55.96, | |
| <<<<<<< HEAD | |
| "learning_rate": 2.34375e-06, | |
| "loss": 0.766, | |
| ======= | |
| "learning_rate": 2e-05, | |
| "loss": 0.7602, | |
| >>>>>>> f31c512c34f45b1f3e7b799ebda1f4af417ffe1b | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 55.96, | |
| <<<<<<< HEAD | |
| "eval_loss": 0.7444973587989807, | |
| "eval_runtime": 76.6067, | |
| "eval_samples_per_second": 100.931, | |
| "eval_steps_per_second": 12.623, | |
| ======= | |
| "eval_loss": 0.726163387298584, | |
| "eval_runtime": 76.3938, | |
| "eval_samples_per_second": 101.212, | |
| "eval_steps_per_second": 12.658, | |
| >>>>>>> f31c512c34f45b1f3e7b799ebda1f4af417ffe1b | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 61.56, | |
| <<<<<<< HEAD | |
| "learning_rate": 2.578125e-06, | |
| "loss": 0.7656, | |
| ======= | |
| "learning_rate": 2e-05, | |
| "loss": 0.7279, | |
| >>>>>>> f31c512c34f45b1f3e7b799ebda1f4af417ffe1b | |
| "step": 110000 | |
| }, | |
| { | |
| "epoch": 61.56, | |
| <<<<<<< HEAD | |
| "eval_loss": 0.7434288263320923, | |
| "eval_runtime": 76.5916, | |
| "eval_samples_per_second": 100.951, | |
| "eval_steps_per_second": 12.625, | |
| ======= | |
| "eval_loss": 0.6971690654754639, | |
| "eval_runtime": 76.7625, | |
| "eval_samples_per_second": 100.726, | |
| "eval_steps_per_second": 12.597, | |
| >>>>>>> f31c512c34f45b1f3e7b799ebda1f4af417ffe1b | |
| "step": 110000 | |
| }, | |
| { | |
| "epoch": 67.15, | |
| <<<<<<< HEAD | |
| "learning_rate": 2.8124999999999998e-06, | |
| "loss": 0.7654, | |
| ======= | |
| "learning_rate": 2e-05, | |
| "loss": 0.6981, | |
| >>>>>>> f31c512c34f45b1f3e7b799ebda1f4af417ffe1b | |
| "step": 120000 | |
| }, | |
| { | |
| "epoch": 67.15, | |
| <<<<<<< HEAD | |
| "eval_loss": 0.7411925196647644, | |
| "eval_runtime": 76.517, | |
| "eval_samples_per_second": 101.049, | |
| "eval_steps_per_second": 12.638, | |
| ======= | |
| "eval_loss": 0.6809367537498474, | |
| "eval_runtime": 76.4831, | |
| "eval_samples_per_second": 101.094, | |
| "eval_steps_per_second": 12.643, | |
| >>>>>>> f31c512c34f45b1f3e7b799ebda1f4af417ffe1b | |
| "step": 120000 | |
| }, | |
| { | |
| "epoch": 72.75, | |
| <<<<<<< HEAD | |
| "learning_rate": 3.046875e-06, | |
| "loss": 0.7652, | |
| ======= | |
| "learning_rate": 2e-05, | |
| "loss": 0.6781, | |
| >>>>>>> f31c512c34f45b1f3e7b799ebda1f4af417ffe1b | |
| "step": 130000 | |
| }, | |
| { | |
| "epoch": 72.75, | |
| <<<<<<< HEAD | |
| "eval_loss": 0.7399063110351562, | |
| "eval_runtime": 76.4205, | |
| "eval_samples_per_second": 101.177, | |
| "eval_steps_per_second": 12.654, | |
| ======= | |
| "eval_loss": 0.6643149852752686, | |
| "eval_runtime": 76.5075, | |
| "eval_samples_per_second": 101.062, | |
| "eval_steps_per_second": 12.639, | |
| >>>>>>> f31c512c34f45b1f3e7b799ebda1f4af417ffe1b | |
| "step": 130000 | |
| }, | |
| { | |
| "epoch": 78.34, | |
| <<<<<<< HEAD | |
| "learning_rate": 3.2812499999999997e-06, | |
| "loss": 0.7649, | |
| ======= | |
| "learning_rate": 2e-05, | |
| "loss": 0.6612, | |
| >>>>>>> f31c512c34f45b1f3e7b799ebda1f4af417ffe1b | |
| "step": 140000 | |
| }, | |
| { | |
| "epoch": 78.34, | |
| <<<<<<< HEAD | |
| "eval_loss": 0.7432417869567871, | |
| "eval_runtime": 76.3896, | |
| "eval_samples_per_second": 101.218, | |
| "eval_steps_per_second": 12.659, | |
| ======= | |
| "eval_loss": 0.653438150882721, | |
| "eval_runtime": 76.7069, | |
| "eval_samples_per_second": 100.799, | |
| "eval_steps_per_second": 12.606, | |
| >>>>>>> f31c512c34f45b1f3e7b799ebda1f4af417ffe1b | |
| "step": 140000 | |
| }, | |
| { | |
| "epoch": 83.94, | |
| <<<<<<< HEAD | |
| "learning_rate": 3.515625e-06, | |
| "loss": 0.7647, | |
| ======= | |
| "learning_rate": 2e-05, | |
| "loss": 0.6483, | |
| >>>>>>> f31c512c34f45b1f3e7b799ebda1f4af417ffe1b | |
| "step": 150000 | |
| }, | |
| { | |
| "epoch": 83.94, | |
| <<<<<<< HEAD | |
| "eval_loss": 0.7411432862281799, | |
| "eval_runtime": 76.5523, | |
| "eval_samples_per_second": 101.003, | |
| "eval_steps_per_second": 12.632, | |
| ======= | |
| "eval_loss": 0.6426078081130981, | |
| "eval_runtime": 76.587, | |
| "eval_samples_per_second": 100.957, | |
| "eval_steps_per_second": 12.626, | |
| >>>>>>> f31c512c34f45b1f3e7b799ebda1f4af417ffe1b | |
| "step": 150000 | |
| }, | |
| { | |
| "epoch": 89.54, | |
| <<<<<<< HEAD | |
| "learning_rate": 3.7499999999999997e-06, | |
| "loss": 0.7645, | |
| ======= | |
| "learning_rate": 2e-05, | |
| "loss": 0.6389, | |
| >>>>>>> f31c512c34f45b1f3e7b799ebda1f4af417ffe1b | |
| "step": 160000 | |
| }, | |
| { | |
| "epoch": 89.54, | |
| <<<<<<< HEAD | |
| "eval_loss": 0.7415673136711121, | |
| "eval_runtime": 76.1013, | |
| "eval_samples_per_second": 101.601, | |
| "eval_steps_per_second": 12.707, | |
| ======= | |
| "eval_loss": 0.6356751918792725, | |
| "eval_runtime": 76.2962, | |
| "eval_samples_per_second": 101.342, | |
| "eval_steps_per_second": 12.674, | |
| >>>>>>> f31c512c34f45b1f3e7b799ebda1f4af417ffe1b | |
| "step": 160000 | |
| }, | |
| { | |
| "epoch": 95.13, | |
| <<<<<<< HEAD | |
| "learning_rate": 3.9843749999999994e-06, | |
| "loss": 0.7642, | |
| ======= | |
| "learning_rate": 2e-05, | |
| "loss": 0.6318, | |
| >>>>>>> f31c512c34f45b1f3e7b799ebda1f4af417ffe1b | |
| "step": 170000 | |
| }, | |
| { | |
| "epoch": 95.13, | |
| <<<<<<< HEAD | |
| "eval_loss": 0.742856502532959, | |
| "eval_runtime": 76.276, | |
| "eval_samples_per_second": 101.369, | |
| "eval_steps_per_second": 12.678, | |
| ======= | |
| "eval_loss": 0.6319578289985657, | |
| "eval_runtime": 134.8378, | |
| "eval_samples_per_second": 57.343, | |
| "eval_steps_per_second": 7.172, | |
| >>>>>>> f31c512c34f45b1f3e7b799ebda1f4af417ffe1b | |
| "step": 170000 | |
| }, | |
| { | |
| "epoch": 100.73, | |
| <<<<<<< HEAD | |
| "learning_rate": 4.21875e-06, | |
| "loss": 0.764, | |
| ======= | |
| "learning_rate": 2e-05, | |
| "loss": 0.6261, | |
| >>>>>>> f31c512c34f45b1f3e7b799ebda1f4af417ffe1b | |
| "step": 180000 | |
| }, | |
| { | |
| "epoch": 100.73, | |
| <<<<<<< HEAD | |
| "eval_loss": 0.7411246299743652, | |
| "eval_runtime": 76.2112, | |
| "eval_samples_per_second": 101.455, | |
| "eval_steps_per_second": 12.688, | |
| ======= | |
| "eval_loss": 0.6279829740524292, | |
| "eval_runtime": 76.2996, | |
| "eval_samples_per_second": 101.337, | |
| "eval_steps_per_second": 12.674, | |
| >>>>>>> f31c512c34f45b1f3e7b799ebda1f4af417ffe1b | |
| "step": 180000 | |
| }, | |
| { | |
| "epoch": 106.32, | |
| <<<<<<< HEAD | |
| "learning_rate": 4.453125e-06, | |
| "loss": 0.764, | |
| ======= | |
| "learning_rate": 2e-05, | |
| "loss": 0.6214, | |
| >>>>>>> f31c512c34f45b1f3e7b799ebda1f4af417ffe1b | |
| "step": 190000 | |
| }, | |
| { | |
| "epoch": 106.32, | |
| <<<<<<< HEAD | |
| "eval_loss": 0.7412048578262329, | |
| "eval_runtime": 76.6531, | |
| "eval_samples_per_second": 100.87, | |
| "eval_steps_per_second": 12.615, | |
| ======= | |
| "eval_loss": 0.6199918389320374, | |
| "eval_runtime": 76.2832, | |
| "eval_samples_per_second": 101.359, | |
| "eval_steps_per_second": 12.676, | |
| >>>>>>> f31c512c34f45b1f3e7b799ebda1f4af417ffe1b | |
| "step": 190000 | |
| }, | |
| { | |
| "epoch": 111.92, | |
| <<<<<<< HEAD | |
| "learning_rate": 1e-05, | |
| "loss": 0.7632, | |
| ======= | |
| "learning_rate": 2e-05, | |
| "loss": 0.6177, | |
| >>>>>>> f31c512c34f45b1f3e7b799ebda1f4af417ffe1b | |
| "step": 200000 | |
| }, | |
| { | |
| "epoch": 111.92, | |
| <<<<<<< HEAD | |
| "eval_loss": 0.7407946586608887, | |
| "eval_runtime": 76.5545, | |
| "eval_samples_per_second": 101.0, | |
| "eval_steps_per_second": 12.632, | |
| "step": 200000 | |
| }, | |
| { | |
| "epoch": 117.52, | |
| "learning_rate": 1e-05, | |
| "loss": 0.7575, | |
| "step": 210000 | |
| }, | |
| { | |
| "epoch": 117.52, | |
| "eval_loss": 0.7322171330451965, | |
| "eval_runtime": 76.7018, | |
| "eval_samples_per_second": 100.806, | |
| "eval_steps_per_second": 12.607, | |
| "step": 210000 | |
| }, | |
| { | |
| "epoch": 123.11, | |
| "learning_rate": 1e-05, | |
| "loss": 0.7422, | |
| "step": 220000 | |
| }, | |
| { | |
| "epoch": 123.11, | |
| "eval_loss": 0.7116619944572449, | |
| "eval_runtime": 76.7768, | |
| "eval_samples_per_second": 100.707, | |
| "eval_steps_per_second": 12.595, | |
| "step": 220000 | |
| }, | |
| { | |
| "epoch": 128.71, | |
| "learning_rate": 1e-05, | |
| "loss": 0.7142, | |
| "step": 230000 | |
| }, | |
| { | |
| "epoch": 128.71, | |
| "eval_loss": 0.6831667423248291, | |
| "eval_runtime": 76.4706, | |
| "eval_samples_per_second": 101.111, | |
| "eval_steps_per_second": 12.645, | |
| "step": 230000 | |
| }, | |
| { | |
| "epoch": 134.3, | |
| "learning_rate": 1e-05, | |
| "loss": 0.6903, | |
| "step": 240000 | |
| }, | |
| { | |
| "epoch": 134.3, | |
| "eval_loss": 0.6659817695617676, | |
| "eval_runtime": 76.4365, | |
| "eval_samples_per_second": 101.156, | |
| "eval_steps_per_second": 12.651, | |
| "step": 240000 | |
| }, | |
| { | |
| "epoch": 139.9, | |
| "learning_rate": 1e-05, | |
| "loss": 0.6732, | |
| "step": 250000 | |
| }, | |
| { | |
| "epoch": 139.9, | |
| "eval_loss": 0.6514819860458374, | |
| "eval_runtime": 76.6466, | |
| "eval_samples_per_second": 100.879, | |
| "eval_steps_per_second": 12.616, | |
| "step": 250000 | |
| }, | |
| { | |
| "epoch": 145.5, | |
| "learning_rate": 1e-05, | |
| "loss": 0.661, | |
| "step": 260000 | |
| }, | |
| { | |
| "epoch": 145.5, | |
| "eval_loss": 0.6453074812889099, | |
| "eval_runtime": 76.4842, | |
| "eval_samples_per_second": 101.093, | |
| "eval_steps_per_second": 12.643, | |
| "step": 260000 | |
| }, | |
| { | |
| "epoch": 151.09, | |
| "learning_rate": 1e-05, | |
| "loss": 0.652, | |
| "step": 270000 | |
| }, | |
| { | |
| "epoch": 151.09, | |
| "eval_loss": 0.637267529964447, | |
| "eval_runtime": 76.4521, | |
| "eval_samples_per_second": 101.135, | |
| "eval_steps_per_second": 12.648, | |
| "step": 270000 | |
| }, | |
| { | |
| "epoch": 156.69, | |
| "learning_rate": 1e-05, | |
| "loss": 0.6446, | |
| "step": 280000 | |
| }, | |
| { | |
| "epoch": 156.69, | |
| "eval_loss": 0.6328049302101135, | |
| "eval_runtime": 76.4339, | |
| "eval_samples_per_second": 101.159, | |
| "eval_steps_per_second": 12.651, | |
| "step": 280000 | |
| }, | |
| { | |
| "epoch": 162.28, | |
| "learning_rate": 1e-05, | |
| "loss": 0.6384, | |
| "step": 290000 | |
| }, | |
| { | |
| "epoch": 162.28, | |
| "eval_loss": 0.6286044120788574, | |
| "eval_runtime": 76.4408, | |
| "eval_samples_per_second": 101.15, | |
| "eval_steps_per_second": 12.65, | |
| "step": 290000 | |
| }, | |
| { | |
| "epoch": 167.88, | |
| "learning_rate": 1e-05, | |
| "loss": 0.6313, | |
| "step": 300000 | |
| }, | |
| { | |
| "epoch": 167.88, | |
| "eval_loss": 0.627047598361969, | |
| "eval_runtime": 76.2029, | |
| "eval_samples_per_second": 101.466, | |
| "eval_steps_per_second": 12.69, | |
| "step": 300000 | |
| }, | |
| { | |
| "epoch": 173.48, | |
| "learning_rate": 1e-05, | |
| "loss": 0.6267, | |
| "step": 310000 | |
| }, | |
| { | |
| "epoch": 173.48, | |
| "eval_loss": 0.6226180195808411, | |
| "eval_runtime": 76.351, | |
| "eval_samples_per_second": 101.269, | |
| "eval_steps_per_second": 12.665, | |
| "step": 310000 | |
| }, | |
| { | |
| "epoch": 179.07, | |
| "learning_rate": 1e-05, | |
| "loss": 0.6225, | |
| "step": 320000 | |
| }, | |
| { | |
| "epoch": 179.07, | |
| "eval_loss": 0.6174684166908264, | |
| "eval_runtime": 77.0828, | |
| "eval_samples_per_second": 100.308, | |
| "eval_steps_per_second": 12.545, | |
| "step": 320000 | |
| }, | |
| { | |
| "epoch": 184.67, | |
| "learning_rate": 1e-05, | |
| "loss": 0.6195, | |
| "step": 330000 | |
| }, | |
| { | |
| "epoch": 184.67, | |
| "eval_loss": 0.6189109086990356, | |
| "eval_runtime": 76.6515, | |
| "eval_samples_per_second": 100.872, | |
| "eval_steps_per_second": 12.616, | |
| "step": 330000 | |
| }, | |
| { | |
| "epoch": 190.26, | |
| "learning_rate": 1e-05, | |
| "loss": 0.6166, | |
| "step": 340000 | |
| }, | |
| { | |
| "epoch": 190.26, | |
| "eval_loss": 0.6162586808204651, | |
| "eval_runtime": 76.6287, | |
| "eval_samples_per_second": 100.902, | |
| "eval_steps_per_second": 12.619, | |
| "step": 340000 | |
| }, | |
| { | |
| "epoch": 195.86, | |
| "learning_rate": 1e-05, | |
| "loss": 0.614, | |
| "step": 350000 | |
| }, | |
| { | |
| "epoch": 195.86, | |
| "eval_loss": 0.6159895658493042, | |
| "eval_runtime": 76.4934, | |
| "eval_samples_per_second": 101.081, | |
| "eval_steps_per_second": 12.642, | |
| "step": 350000 | |
| }, | |
| { | |
| "epoch": 201.45, | |
| "learning_rate": 1e-05, | |
| "loss": 0.6117, | |
| "step": 360000 | |
| }, | |
| { | |
| "epoch": 201.45, | |
| "eval_loss": 0.6115593910217285, | |
| "eval_runtime": 75.8517, | |
| "eval_samples_per_second": 101.936, | |
| "eval_steps_per_second": 12.749, | |
| "step": 360000 | |
| }, | |
| { | |
| "epoch": 207.05, | |
| "learning_rate": 1e-05, | |
| "loss": 0.6094, | |
| "step": 370000 | |
| }, | |
| { | |
| "epoch": 207.05, | |
| "eval_loss": 0.6114900708198547, | |
| "eval_runtime": 76.348, | |
| "eval_samples_per_second": 101.273, | |
| "eval_steps_per_second": 12.666, | |
| "step": 370000 | |
| }, | |
| { | |
| "epoch": 212.65, | |
| "learning_rate": 1e-05, | |
| "loss": 0.6071, | |
| "step": 380000 | |
| }, | |
| { | |
| "epoch": 212.65, | |
| "eval_loss": 0.6110843420028687, | |
| "eval_runtime": 76.4292, | |
| "eval_samples_per_second": 101.165, | |
| "eval_steps_per_second": 12.652, | |
| "step": 380000 | |
| }, | |
| { | |
| "epoch": 218.24, | |
| "learning_rate": 1e-05, | |
| "loss": 0.6048, | |
| "step": 390000 | |
| }, | |
| { | |
| "epoch": 218.24, | |
| "eval_loss": 0.6108397245407104, | |
| "eval_runtime": 76.2883, | |
| "eval_samples_per_second": 101.352, | |
| "eval_steps_per_second": 12.676, | |
| "step": 390000 | |
| }, | |
| { | |
| "epoch": 223.84, | |
| "learning_rate": 1e-05, | |
| "loss": 0.6025, | |
| "step": 400000 | |
| }, | |
| { | |
| "epoch": 223.84, | |
| "eval_loss": 0.6071902513504028, | |
| "eval_runtime": 76.1442, | |
| "eval_samples_per_second": 101.544, | |
| "eval_steps_per_second": 12.7, | |
| "step": 400000 | |
| }, | |
| { | |
| "epoch": 229.43, | |
| "learning_rate": 1e-05, | |
| "loss": 0.6006, | |
| "step": 410000 | |
| }, | |
| { | |
| "epoch": 229.43, | |
| "eval_loss": 0.6058005690574646, | |
| "eval_runtime": 76.1177, | |
| "eval_samples_per_second": 101.579, | |
| "eval_steps_per_second": 12.704, | |
| "step": 410000 | |
| }, | |
| { | |
| "epoch": 235.03, | |
| "learning_rate": 1e-05, | |
| "loss": 0.599, | |
| "step": 420000 | |
| }, | |
| { | |
| "epoch": 235.03, | |
| "eval_loss": 0.6018807888031006, | |
| "eval_runtime": 76.6301, | |
| "eval_samples_per_second": 100.9, | |
| "eval_steps_per_second": 12.619, | |
| "step": 420000 | |
| }, | |
| { | |
| "epoch": 240.63, | |
| "learning_rate": 1e-05, | |
| "loss": 0.5969, | |
| "step": 430000 | |
| }, | |
| { | |
| "epoch": 240.63, | |
| "eval_loss": 0.6019513010978699, | |
| "eval_runtime": 76.6382, | |
| "eval_samples_per_second": 100.89, | |
| "eval_steps_per_second": 12.618, | |
| "step": 430000 | |
| }, | |
| { | |
| "epoch": 246.22, | |
| "learning_rate": 1e-05, | |
| "loss": 0.5956, | |
| "step": 440000 | |
| }, | |
| { | |
| "epoch": 246.22, | |
| "eval_loss": 0.6009297370910645, | |
| "eval_runtime": 76.7463, | |
| "eval_samples_per_second": 100.747, | |
| "eval_steps_per_second": 12.6, | |
| "step": 440000 | |
| }, | |
| { | |
| "epoch": 251.82, | |
| "learning_rate": 1e-05, | |
| "loss": 0.5937, | |
| "step": 450000 | |
| }, | |
| { | |
| "epoch": 251.82, | |
| "eval_loss": 0.6020432114601135, | |
| "eval_runtime": 76.498, | |
| "eval_samples_per_second": 101.075, | |
| "eval_steps_per_second": 12.641, | |
| "step": 450000 | |
| }, | |
| { | |
| "epoch": 257.41, | |
| "learning_rate": 1e-05, | |
| "loss": 0.5923, | |
| "step": 460000 | |
| }, | |
| { | |
| "epoch": 257.41, | |
| "eval_loss": 0.5997503399848938, | |
| "eval_runtime": 76.4264, | |
| "eval_samples_per_second": 101.169, | |
| "eval_steps_per_second": 12.653, | |
| "step": 460000 | |
| }, | |
| { | |
| "epoch": 263.01, | |
| "learning_rate": 1e-05, | |
| "loss": 0.5907, | |
| "step": 470000 | |
| }, | |
| { | |
| "epoch": 263.01, | |
| "eval_loss": 0.6007161140441895, | |
| "eval_runtime": 76.517, | |
| "eval_samples_per_second": 101.049, | |
| "eval_steps_per_second": 12.638, | |
| "step": 470000 | |
| }, | |
| { | |
| "epoch": 268.61, | |
| "learning_rate": 1e-05, | |
| "loss": 0.5894, | |
| "step": 480000 | |
| }, | |
| { | |
| "epoch": 268.61, | |
| "eval_loss": 0.5984556674957275, | |
| "eval_runtime": 76.4528, | |
| "eval_samples_per_second": 101.134, | |
| "eval_steps_per_second": 12.648, | |
| "step": 480000 | |
| }, | |
| { | |
| "epoch": 274.2, | |
| "learning_rate": 1e-05, | |
| "loss": 0.5876, | |
| "step": 490000 | |
| }, | |
| { | |
| "epoch": 274.2, | |
| "eval_loss": 0.5970821976661682, | |
| "eval_runtime": 76.2171, | |
| "eval_samples_per_second": 101.447, | |
| "eval_steps_per_second": 12.687, | |
| "step": 490000 | |
| }, | |
| { | |
| "epoch": 279.8, | |
| "learning_rate": 1e-05, | |
| "loss": 0.5863, | |
| "step": 500000 | |
| }, | |
| { | |
| "epoch": 279.8, | |
| "eval_loss": 0.5982722640037537, | |
| "eval_runtime": 76.3256, | |
| "eval_samples_per_second": 101.303, | |
| "eval_steps_per_second": 12.669, | |
| "step": 500000 | |
| }, | |
| { | |
| "epoch": 285.39, | |
| "learning_rate": 1e-05, | |
| "loss": 0.585, | |
| "step": 510000 | |
| }, | |
| { | |
| "epoch": 285.39, | |
| "eval_loss": 0.5990148782730103, | |
| "eval_runtime": 76.2053, | |
| "eval_samples_per_second": 101.463, | |
| "eval_steps_per_second": 12.689, | |
| "step": 510000 | |
| }, | |
| { | |
| "epoch": 290.99, | |
| "learning_rate": 1e-05, | |
| "loss": 0.583, | |
| "step": 520000 | |
| }, | |
| { | |
| "epoch": 290.99, | |
| "eval_loss": 0.5960124135017395, | |
| "eval_runtime": 76.218, | |
| "eval_samples_per_second": 101.446, | |
| "eval_steps_per_second": 12.687, | |
| "step": 520000 | |
| }, | |
| { | |
| "epoch": 296.59, | |
| "learning_rate": 1e-05, | |
| "loss": 0.5822, | |
| "step": 530000 | |
| }, | |
| { | |
| "epoch": 296.59, | |
| "eval_loss": 0.593532145023346, | |
| "eval_runtime": 76.3226, | |
| "eval_samples_per_second": 101.307, | |
| "eval_steps_per_second": 12.67, | |
| "step": 530000 | |
| }, | |
| { | |
| "epoch": 302.18, | |
| "learning_rate": 1e-05, | |
| "loss": 0.5808, | |
| "step": 540000 | |
| }, | |
| { | |
| "epoch": 302.18, | |
| "eval_loss": 0.596666693687439, | |
| "eval_runtime": 76.1588, | |
| "eval_samples_per_second": 101.525, | |
| "eval_steps_per_second": 12.697, | |
| "step": 540000 | |
| }, | |
| { | |
| "epoch": 307.78, | |
| "learning_rate": 1e-05, | |
| "loss": 0.5794, | |
| "step": 550000 | |
| }, | |
| { | |
| "epoch": 307.78, | |
| "eval_loss": 0.5946430563926697, | |
| "eval_runtime": 76.3226, | |
| "eval_samples_per_second": 101.307, | |
| "eval_steps_per_second": 12.67, | |
| "step": 550000 | |
| }, | |
| { | |
| "epoch": 313.37, | |
| "learning_rate": 1e-05, | |
| "loss": 0.578, | |
| "step": 560000 | |
| }, | |
| { | |
| "epoch": 313.37, | |
| "eval_loss": 0.5950666666030884, | |
| "eval_runtime": 76.243, | |
| "eval_samples_per_second": 101.413, | |
| "eval_steps_per_second": 12.683, | |
| "step": 560000 | |
| }, | |
| { | |
| "epoch": 318.97, | |
| "learning_rate": 1e-05, | |
| "loss": 0.5766, | |
| "step": 570000 | |
| }, | |
| { | |
| "epoch": 318.97, | |
| "eval_loss": 0.5932120680809021, | |
| "eval_runtime": 76.0598, | |
| "eval_samples_per_second": 101.657, | |
| "eval_steps_per_second": 12.714, | |
| "step": 570000 | |
| }, | |
| { | |
| "epoch": 324.57, | |
| "learning_rate": 1e-05, | |
| "loss": 0.5752, | |
| "step": 580000 | |
| }, | |
| { | |
| "epoch": 324.57, | |
| "eval_loss": 0.5916844606399536, | |
| "eval_runtime": 76.215, | |
| "eval_samples_per_second": 101.45, | |
| "eval_steps_per_second": 12.688, | |
| "step": 580000 | |
| }, | |
| { | |
| "epoch": 330.16, | |
| "learning_rate": 1e-05, | |
| "loss": 0.5739, | |
| "step": 590000 | |
| }, | |
| { | |
| "epoch": 330.16, | |
| "eval_loss": 0.592149019241333, | |
| "eval_runtime": 76.1575, | |
| "eval_samples_per_second": 101.526, | |
| "eval_steps_per_second": 12.697, | |
| "step": 590000 | |
| }, | |
| { | |
| "epoch": 335.76, | |
| "learning_rate": 1e-05, | |
| "loss": 0.5726, | |
| "step": 600000 | |
| }, | |
| { | |
| "epoch": 335.76, | |
| "eval_loss": 0.5907247066497803, | |
| "eval_runtime": 76.2114, | |
| "eval_samples_per_second": 101.455, | |
| "eval_steps_per_second": 12.688, | |
| "step": 600000 | |
| }, | |
| { | |
| "epoch": 341.35, | |
| "learning_rate": 1e-05, | |
| "loss": 0.5714, | |
| "step": 610000 | |
| }, | |
| { | |
| "epoch": 341.35, | |
| "eval_loss": 0.5907928347587585, | |
| "eval_runtime": 76.207, | |
| "eval_samples_per_second": 101.461, | |
| "eval_steps_per_second": 12.689, | |
| "step": 610000 | |
| }, | |
| { | |
| "epoch": 346.95, | |
| "learning_rate": 1e-05, | |
| "loss": 0.5702, | |
| "step": 620000 | |
| }, | |
| { | |
| "epoch": 346.95, | |
| "eval_loss": 0.5909689664840698, | |
| "eval_runtime": 76.3919, | |
| "eval_samples_per_second": 101.215, | |
| "eval_steps_per_second": 12.658, | |
| "step": 620000 | |
| }, | |
| { | |
| "epoch": 352.55, | |
| "learning_rate": 1e-05, | |
| "loss": 0.5686, | |
| "step": 630000 | |
| }, | |
| { | |
| "epoch": 352.55, | |
| "eval_loss": 0.5894390940666199, | |
| "eval_runtime": 76.3494, | |
| "eval_samples_per_second": 101.271, | |
| "eval_steps_per_second": 12.665, | |
| "step": 630000 | |
| }, | |
| { | |
| "epoch": 358.14, | |
| "learning_rate": 1e-05, | |
| "loss": 0.5674, | |
| "step": 640000 | |
| }, | |
| { | |
| "epoch": 358.14, | |
| "eval_loss": 0.5915200114250183, | |
| "eval_runtime": 76.5727, | |
| "eval_samples_per_second": 100.976, | |
| "eval_steps_per_second": 12.629, | |
| "step": 640000 | |
| }, | |
| { | |
| "epoch": 363.74, | |
| "learning_rate": 1e-05, | |
| "loss": 0.5664, | |
| "step": 650000 | |
| }, | |
| { | |
| "epoch": 363.74, | |
| "eval_loss": 0.5875544548034668, | |
| "eval_runtime": 76.0536, | |
| "eval_samples_per_second": 101.665, | |
| "eval_steps_per_second": 12.715, | |
| "step": 650000 | |
| }, | |
| { | |
| "epoch": 369.33, | |
| "learning_rate": 1e-05, | |
| "loss": 0.565, | |
| "step": 660000 | |
| }, | |
| { | |
| "epoch": 369.33, | |
| "eval_loss": 0.5878584980964661, | |
| "eval_runtime": 76.1299, | |
| "eval_samples_per_second": 101.563, | |
| "eval_steps_per_second": 12.702, | |
| "step": 660000 | |
| }, | |
| { | |
| "epoch": 374.93, | |
| "learning_rate": 1e-05, | |
| "loss": 0.5636, | |
| "step": 670000 | |
| }, | |
| { | |
| "epoch": 374.93, | |
| "eval_loss": 0.5897438526153564, | |
| "eval_runtime": 76.3557, | |
| "eval_samples_per_second": 101.263, | |
| "eval_steps_per_second": 12.664, | |
| "step": 670000 | |
| }, | |
| { | |
| "epoch": 380.53, | |
| "learning_rate": 1e-05, | |
| "loss": 0.5625, | |
| "step": 680000 | |
| }, | |
| { | |
| "epoch": 380.53, | |
| "eval_loss": 0.5888833999633789, | |
| "eval_runtime": 76.7072, | |
| "eval_samples_per_second": 100.799, | |
| "eval_steps_per_second": 12.606, | |
| "step": 680000 | |
| }, | |
| { | |
| "epoch": 386.12, | |
| "learning_rate": 1e-05, | |
| "loss": 0.5609, | |
| "step": 690000 | |
| }, | |
| { | |
| "epoch": 386.12, | |
| "eval_loss": 0.5903308987617493, | |
| "eval_runtime": 76.5139, | |
| "eval_samples_per_second": 101.053, | |
| "eval_steps_per_second": 12.638, | |
| "step": 690000 | |
| }, | |
| { | |
| "epoch": 391.72, | |
| "learning_rate": 1e-05, | |
| "loss": 0.5594, | |
| "step": 700000 | |
| }, | |
| { | |
| "epoch": 391.72, | |
| "eval_loss": 0.5877216458320618, | |
| "eval_runtime": 76.4574, | |
| "eval_samples_per_second": 101.128, | |
| "eval_steps_per_second": 12.648, | |
| "step": 700000 | |
| }, | |
| { | |
| "epoch": 397.31, | |
| "learning_rate": 1e-05, | |
| "loss": 0.5584, | |
| "step": 710000 | |
| }, | |
| { | |
| "epoch": 397.31, | |
| "eval_loss": 0.5875140428543091, | |
| "eval_runtime": 76.1634, | |
| "eval_samples_per_second": 101.519, | |
| "eval_steps_per_second": 12.696, | |
| "step": 710000 | |
| }, | |
| { | |
| "epoch": 402.91, | |
| "learning_rate": 1e-05, | |
| "loss": 0.5573, | |
| "step": 720000 | |
| }, | |
| { | |
| "epoch": 402.91, | |
| "eval_loss": 0.5887530446052551, | |
| "eval_runtime": 76.2807, | |
| "eval_samples_per_second": 101.362, | |
| "eval_steps_per_second": 12.677, | |
| "step": 720000 | |
| }, | |
| { | |
| "epoch": 408.51, | |
| "learning_rate": 1e-05, | |
| "loss": 0.5561, | |
| "step": 730000 | |
| }, | |
| { | |
| "epoch": 408.51, | |
| "eval_loss": 0.5863147974014282, | |
| "eval_runtime": 76.2255, | |
| "eval_samples_per_second": 101.436, | |
| "eval_steps_per_second": 12.686, | |
| "step": 730000 | |
| } | |
| ], | |
| "max_steps": 1000000, | |
| "num_train_epochs": 560, | |
| "total_flos": 3.197989282913906e+21, | |
| ======= | |
| "eval_loss": 0.6199995875358582, | |
| "eval_runtime": 76.4051, | |
| "eval_samples_per_second": 101.197, | |
| "eval_steps_per_second": 12.656, | |
| "step": 200000 | |
| }, | |
| { | |
| "epoch": 111.92, | |
| "step": 200000, | |
| "total_flos": 1.0512018951481177e+21, | |
| "train_loss": 0.5566074145507812, | |
| "train_runtime": 84698.4102, | |
| "train_samples_per_second": 37.781, | |
| "train_steps_per_second": 2.361 | |
| } | |
| ], | |
| "max_steps": 200000, | |
| "num_train_epochs": 112, | |
| "total_flos": 1.0512018951481177e+21, | |
| >>>>>>> f31c512c34f45b1f3e7b799ebda1f4af417ffe1b | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |