pixel-barec-pretrain / trainer_state.json
bensapir's picture
Upload folder using huggingface_hub
3b9fc47 verified
Invalid JSON: Expected double-quoted property name in JSON at line 4, column 1
{
"best_metric": null,
"best_model_checkpoint": null,
<<<<<<< HEAD
"epoch": 408.505875769446,
"global_step": 730000,
=======
"epoch": 111.9194180190263,
"global_step": 200000,
>>>>>>> f31c512c34f45b1f3e7b799ebda1f4af417ffe1b
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
<<<<<<< HEAD
"epoch": 5.6,
"learning_rate": 2.3437499999999998e-07,
"loss": 0.8947,
"step": 10000
},
{
"epoch": 5.6,
"eval_loss": 0.7632947564125061,
"eval_runtime": 76.0776,
"eval_samples_per_second": 101.633,
"eval_steps_per_second": 12.711,
"step": 10000
},
{
"epoch": 11.19,
"learning_rate": 4.6874999999999996e-07,
"loss": 0.7738,
"step": 20000
},
{
"epoch": 11.19,
"eval_loss": 0.7603365182876587,
"eval_runtime": 76.429,
"eval_samples_per_second": 101.166,
"eval_steps_per_second": 12.652,
"step": 20000
},
{
"epoch": 16.79,
"learning_rate": 7.031249999999999e-07,
"loss": 0.7725,
"step": 30000
},
{
"epoch": 16.79,
"eval_loss": 0.7571617960929871,
"eval_runtime": 76.146,
"eval_samples_per_second": 101.542,
"eval_steps_per_second": 12.699,
"step": 30000
},
{
"epoch": 22.38,
"learning_rate": 9.374999999999999e-07,
"loss": 0.7715,
"step": 40000
},
{
"epoch": 22.38,
"eval_loss": 0.7568734884262085,
"eval_runtime": 76.4339,
"eval_samples_per_second": 101.159,
"eval_steps_per_second": 12.651,
=======
"epoch": 11.19,
"learning_rate": 1.8749999999999998e-06,
"loss": 0.8164,
"step": 10000
},
{
"epoch": 11.19,
"eval_loss": 0.7568955421447754,
"eval_runtime": 301.8174,
"eval_samples_per_second": 25.618,
"eval_steps_per_second": 1.604,
"step": 10000
},
{
"epoch": 22.37,
"learning_rate": 3.7499999999999997e-06,
"loss": 0.7702,
"step": 20000
},
{
"epoch": 22.37,
"eval_loss": 0.7498099207878113,
"eval_runtime": 249.204,
"eval_samples_per_second": 31.027,
"eval_steps_per_second": 1.942,
"step": 20000
},
{
"epoch": 33.56,
"learning_rate": 5.6249999999999995e-06,
"loss": 0.7668,
"step": 30000
},
{
"epoch": 33.56,
"eval_loss": 0.7477062344551086,
"eval_runtime": 189.0409,
"eval_samples_per_second": 40.901,
"eval_steps_per_second": 2.56,
"step": 30000
},
{
"epoch": 44.74,
"learning_rate": 7.499999999999999e-06,
"loss": 0.7655,
"step": 40000
},
{
"epoch": 44.74,
"eval_loss": 0.7450574040412903,
"eval_runtime": 66.2284,
"eval_samples_per_second": 116.748,
"eval_steps_per_second": 7.308,
>>>>>>> f31c512c34f45b1f3e7b799ebda1f4af417ffe1b
"step": 40000
},
{
"epoch": 27.98,
<<<<<<< HEAD
"learning_rate": 1.171875e-06,
"loss": 0.7695,
=======
"learning_rate": 9.375e-06,
"loss": 0.7653,
>>>>>>> f31c512c34f45b1f3e7b799ebda1f4af417ffe1b
"step": 50000
},
{
"epoch": 27.98,
<<<<<<< HEAD
"eval_loss": 0.7500209212303162,
"eval_runtime": 76.3619,
"eval_samples_per_second": 101.255,
"eval_steps_per_second": 12.663,
=======
"eval_loss": 0.7478589415550232,
"eval_runtime": 76.5001,
"eval_samples_per_second": 101.072,
"eval_steps_per_second": 12.641,
>>>>>>> f31c512c34f45b1f3e7b799ebda1f4af417ffe1b
"step": 50000
},
{
"epoch": 33.58,
<<<<<<< HEAD
"learning_rate": 1.4062499999999999e-06,
"loss": 0.7688,
=======
"learning_rate": 1.1249999999999999e-05,
"loss": 0.7648,
>>>>>>> f31c512c34f45b1f3e7b799ebda1f4af417ffe1b
"step": 60000
},
{
"epoch": 33.58,
<<<<<<< HEAD
"eval_loss": 0.7491664886474609,
"eval_runtime": 76.419,
"eval_samples_per_second": 101.179,
"eval_steps_per_second": 12.654,
=======
"eval_loss": 0.7447686195373535,
"eval_runtime": 76.3539,
"eval_samples_per_second": 101.265,
"eval_steps_per_second": 12.665,
>>>>>>> f31c512c34f45b1f3e7b799ebda1f4af417ffe1b
"step": 60000
},
{
"epoch": 39.17,
<<<<<<< HEAD
"learning_rate": 1.6406249999999999e-06,
"loss": 0.768,
=======
"learning_rate": 1.3124999999999999e-05,
"loss": 0.7645,
>>>>>>> f31c512c34f45b1f3e7b799ebda1f4af417ffe1b
"step": 70000
},
{
"epoch": 39.17,
<<<<<<< HEAD
"eval_loss": 0.748078465461731,
"eval_runtime": 76.5312,
"eval_samples_per_second": 101.031,
"eval_steps_per_second": 12.635,
=======
"eval_loss": 0.7464274764060974,
"eval_runtime": 76.7958,
"eval_samples_per_second": 100.683,
"eval_steps_per_second": 12.592,
>>>>>>> f31c512c34f45b1f3e7b799ebda1f4af417ffe1b
"step": 70000
},
{
"epoch": 44.77,
<<<<<<< HEAD
"learning_rate": 1.8749999999999998e-06,
"loss": 0.7667,
=======
"learning_rate": 1.4999999999999999e-05,
"loss": 0.7642,
>>>>>>> f31c512c34f45b1f3e7b799ebda1f4af417ffe1b
"step": 80000
},
{
"epoch": 44.77,
<<<<<<< HEAD
"eval_loss": 0.7448051571846008,
"eval_runtime": 76.5688,
"eval_samples_per_second": 100.981,
"eval_steps_per_second": 12.629,
=======
"eval_loss": 0.7449608445167542,
"eval_runtime": 122.9116,
"eval_samples_per_second": 62.907,
"eval_steps_per_second": 7.867,
>>>>>>> f31c512c34f45b1f3e7b799ebda1f4af417ffe1b
"step": 80000
},
{
"epoch": 50.36,
<<<<<<< HEAD
"learning_rate": 2.109375e-06,
"loss": 0.7663,
=======
"learning_rate": 1.6875e-05,
"loss": 0.7636,
>>>>>>> f31c512c34f45b1f3e7b799ebda1f4af417ffe1b
"step": 90000
},
{
"epoch": 50.36,
<<<<<<< HEAD
"eval_loss": 0.7472007870674133,
"eval_runtime": 76.5244,
"eval_samples_per_second": 101.04,
"eval_steps_per_second": 12.636,
=======
"eval_loss": 0.7427342534065247,
"eval_runtime": 76.4172,
"eval_samples_per_second": 101.181,
"eval_steps_per_second": 12.654,
>>>>>>> f31c512c34f45b1f3e7b799ebda1f4af417ffe1b
"step": 90000
},
{
"epoch": 55.96,
<<<<<<< HEAD
"learning_rate": 2.34375e-06,
"loss": 0.766,
=======
"learning_rate": 2e-05,
"loss": 0.7602,
>>>>>>> f31c512c34f45b1f3e7b799ebda1f4af417ffe1b
"step": 100000
},
{
"epoch": 55.96,
<<<<<<< HEAD
"eval_loss": 0.7444973587989807,
"eval_runtime": 76.6067,
"eval_samples_per_second": 100.931,
"eval_steps_per_second": 12.623,
=======
"eval_loss": 0.726163387298584,
"eval_runtime": 76.3938,
"eval_samples_per_second": 101.212,
"eval_steps_per_second": 12.658,
>>>>>>> f31c512c34f45b1f3e7b799ebda1f4af417ffe1b
"step": 100000
},
{
"epoch": 61.56,
<<<<<<< HEAD
"learning_rate": 2.578125e-06,
"loss": 0.7656,
=======
"learning_rate": 2e-05,
"loss": 0.7279,
>>>>>>> f31c512c34f45b1f3e7b799ebda1f4af417ffe1b
"step": 110000
},
{
"epoch": 61.56,
<<<<<<< HEAD
"eval_loss": 0.7434288263320923,
"eval_runtime": 76.5916,
"eval_samples_per_second": 100.951,
"eval_steps_per_second": 12.625,
=======
"eval_loss": 0.6971690654754639,
"eval_runtime": 76.7625,
"eval_samples_per_second": 100.726,
"eval_steps_per_second": 12.597,
>>>>>>> f31c512c34f45b1f3e7b799ebda1f4af417ffe1b
"step": 110000
},
{
"epoch": 67.15,
<<<<<<< HEAD
"learning_rate": 2.8124999999999998e-06,
"loss": 0.7654,
=======
"learning_rate": 2e-05,
"loss": 0.6981,
>>>>>>> f31c512c34f45b1f3e7b799ebda1f4af417ffe1b
"step": 120000
},
{
"epoch": 67.15,
<<<<<<< HEAD
"eval_loss": 0.7411925196647644,
"eval_runtime": 76.517,
"eval_samples_per_second": 101.049,
"eval_steps_per_second": 12.638,
=======
"eval_loss": 0.6809367537498474,
"eval_runtime": 76.4831,
"eval_samples_per_second": 101.094,
"eval_steps_per_second": 12.643,
>>>>>>> f31c512c34f45b1f3e7b799ebda1f4af417ffe1b
"step": 120000
},
{
"epoch": 72.75,
<<<<<<< HEAD
"learning_rate": 3.046875e-06,
"loss": 0.7652,
=======
"learning_rate": 2e-05,
"loss": 0.6781,
>>>>>>> f31c512c34f45b1f3e7b799ebda1f4af417ffe1b
"step": 130000
},
{
"epoch": 72.75,
<<<<<<< HEAD
"eval_loss": 0.7399063110351562,
"eval_runtime": 76.4205,
"eval_samples_per_second": 101.177,
"eval_steps_per_second": 12.654,
=======
"eval_loss": 0.6643149852752686,
"eval_runtime": 76.5075,
"eval_samples_per_second": 101.062,
"eval_steps_per_second": 12.639,
>>>>>>> f31c512c34f45b1f3e7b799ebda1f4af417ffe1b
"step": 130000
},
{
"epoch": 78.34,
<<<<<<< HEAD
"learning_rate": 3.2812499999999997e-06,
"loss": 0.7649,
=======
"learning_rate": 2e-05,
"loss": 0.6612,
>>>>>>> f31c512c34f45b1f3e7b799ebda1f4af417ffe1b
"step": 140000
},
{
"epoch": 78.34,
<<<<<<< HEAD
"eval_loss": 0.7432417869567871,
"eval_runtime": 76.3896,
"eval_samples_per_second": 101.218,
"eval_steps_per_second": 12.659,
=======
"eval_loss": 0.653438150882721,
"eval_runtime": 76.7069,
"eval_samples_per_second": 100.799,
"eval_steps_per_second": 12.606,
>>>>>>> f31c512c34f45b1f3e7b799ebda1f4af417ffe1b
"step": 140000
},
{
"epoch": 83.94,
<<<<<<< HEAD
"learning_rate": 3.515625e-06,
"loss": 0.7647,
=======
"learning_rate": 2e-05,
"loss": 0.6483,
>>>>>>> f31c512c34f45b1f3e7b799ebda1f4af417ffe1b
"step": 150000
},
{
"epoch": 83.94,
<<<<<<< HEAD
"eval_loss": 0.7411432862281799,
"eval_runtime": 76.5523,
"eval_samples_per_second": 101.003,
"eval_steps_per_second": 12.632,
=======
"eval_loss": 0.6426078081130981,
"eval_runtime": 76.587,
"eval_samples_per_second": 100.957,
"eval_steps_per_second": 12.626,
>>>>>>> f31c512c34f45b1f3e7b799ebda1f4af417ffe1b
"step": 150000
},
{
"epoch": 89.54,
<<<<<<< HEAD
"learning_rate": 3.7499999999999997e-06,
"loss": 0.7645,
=======
"learning_rate": 2e-05,
"loss": 0.6389,
>>>>>>> f31c512c34f45b1f3e7b799ebda1f4af417ffe1b
"step": 160000
},
{
"epoch": 89.54,
<<<<<<< HEAD
"eval_loss": 0.7415673136711121,
"eval_runtime": 76.1013,
"eval_samples_per_second": 101.601,
"eval_steps_per_second": 12.707,
=======
"eval_loss": 0.6356751918792725,
"eval_runtime": 76.2962,
"eval_samples_per_second": 101.342,
"eval_steps_per_second": 12.674,
>>>>>>> f31c512c34f45b1f3e7b799ebda1f4af417ffe1b
"step": 160000
},
{
"epoch": 95.13,
<<<<<<< HEAD
"learning_rate": 3.9843749999999994e-06,
"loss": 0.7642,
=======
"learning_rate": 2e-05,
"loss": 0.6318,
>>>>>>> f31c512c34f45b1f3e7b799ebda1f4af417ffe1b
"step": 170000
},
{
"epoch": 95.13,
<<<<<<< HEAD
"eval_loss": 0.742856502532959,
"eval_runtime": 76.276,
"eval_samples_per_second": 101.369,
"eval_steps_per_second": 12.678,
=======
"eval_loss": 0.6319578289985657,
"eval_runtime": 134.8378,
"eval_samples_per_second": 57.343,
"eval_steps_per_second": 7.172,
>>>>>>> f31c512c34f45b1f3e7b799ebda1f4af417ffe1b
"step": 170000
},
{
"epoch": 100.73,
<<<<<<< HEAD
"learning_rate": 4.21875e-06,
"loss": 0.764,
=======
"learning_rate": 2e-05,
"loss": 0.6261,
>>>>>>> f31c512c34f45b1f3e7b799ebda1f4af417ffe1b
"step": 180000
},
{
"epoch": 100.73,
<<<<<<< HEAD
"eval_loss": 0.7411246299743652,
"eval_runtime": 76.2112,
"eval_samples_per_second": 101.455,
"eval_steps_per_second": 12.688,
=======
"eval_loss": 0.6279829740524292,
"eval_runtime": 76.2996,
"eval_samples_per_second": 101.337,
"eval_steps_per_second": 12.674,
>>>>>>> f31c512c34f45b1f3e7b799ebda1f4af417ffe1b
"step": 180000
},
{
"epoch": 106.32,
<<<<<<< HEAD
"learning_rate": 4.453125e-06,
"loss": 0.764,
=======
"learning_rate": 2e-05,
"loss": 0.6214,
>>>>>>> f31c512c34f45b1f3e7b799ebda1f4af417ffe1b
"step": 190000
},
{
"epoch": 106.32,
<<<<<<< HEAD
"eval_loss": 0.7412048578262329,
"eval_runtime": 76.6531,
"eval_samples_per_second": 100.87,
"eval_steps_per_second": 12.615,
=======
"eval_loss": 0.6199918389320374,
"eval_runtime": 76.2832,
"eval_samples_per_second": 101.359,
"eval_steps_per_second": 12.676,
>>>>>>> f31c512c34f45b1f3e7b799ebda1f4af417ffe1b
"step": 190000
},
{
"epoch": 111.92,
<<<<<<< HEAD
"learning_rate": 1e-05,
"loss": 0.7632,
=======
"learning_rate": 2e-05,
"loss": 0.6177,
>>>>>>> f31c512c34f45b1f3e7b799ebda1f4af417ffe1b
"step": 200000
},
{
"epoch": 111.92,
<<<<<<< HEAD
"eval_loss": 0.7407946586608887,
"eval_runtime": 76.5545,
"eval_samples_per_second": 101.0,
"eval_steps_per_second": 12.632,
"step": 200000
},
{
"epoch": 117.52,
"learning_rate": 1e-05,
"loss": 0.7575,
"step": 210000
},
{
"epoch": 117.52,
"eval_loss": 0.7322171330451965,
"eval_runtime": 76.7018,
"eval_samples_per_second": 100.806,
"eval_steps_per_second": 12.607,
"step": 210000
},
{
"epoch": 123.11,
"learning_rate": 1e-05,
"loss": 0.7422,
"step": 220000
},
{
"epoch": 123.11,
"eval_loss": 0.7116619944572449,
"eval_runtime": 76.7768,
"eval_samples_per_second": 100.707,
"eval_steps_per_second": 12.595,
"step": 220000
},
{
"epoch": 128.71,
"learning_rate": 1e-05,
"loss": 0.7142,
"step": 230000
},
{
"epoch": 128.71,
"eval_loss": 0.6831667423248291,
"eval_runtime": 76.4706,
"eval_samples_per_second": 101.111,
"eval_steps_per_second": 12.645,
"step": 230000
},
{
"epoch": 134.3,
"learning_rate": 1e-05,
"loss": 0.6903,
"step": 240000
},
{
"epoch": 134.3,
"eval_loss": 0.6659817695617676,
"eval_runtime": 76.4365,
"eval_samples_per_second": 101.156,
"eval_steps_per_second": 12.651,
"step": 240000
},
{
"epoch": 139.9,
"learning_rate": 1e-05,
"loss": 0.6732,
"step": 250000
},
{
"epoch": 139.9,
"eval_loss": 0.6514819860458374,
"eval_runtime": 76.6466,
"eval_samples_per_second": 100.879,
"eval_steps_per_second": 12.616,
"step": 250000
},
{
"epoch": 145.5,
"learning_rate": 1e-05,
"loss": 0.661,
"step": 260000
},
{
"epoch": 145.5,
"eval_loss": 0.6453074812889099,
"eval_runtime": 76.4842,
"eval_samples_per_second": 101.093,
"eval_steps_per_second": 12.643,
"step": 260000
},
{
"epoch": 151.09,
"learning_rate": 1e-05,
"loss": 0.652,
"step": 270000
},
{
"epoch": 151.09,
"eval_loss": 0.637267529964447,
"eval_runtime": 76.4521,
"eval_samples_per_second": 101.135,
"eval_steps_per_second": 12.648,
"step": 270000
},
{
"epoch": 156.69,
"learning_rate": 1e-05,
"loss": 0.6446,
"step": 280000
},
{
"epoch": 156.69,
"eval_loss": 0.6328049302101135,
"eval_runtime": 76.4339,
"eval_samples_per_second": 101.159,
"eval_steps_per_second": 12.651,
"step": 280000
},
{
"epoch": 162.28,
"learning_rate": 1e-05,
"loss": 0.6384,
"step": 290000
},
{
"epoch": 162.28,
"eval_loss": 0.6286044120788574,
"eval_runtime": 76.4408,
"eval_samples_per_second": 101.15,
"eval_steps_per_second": 12.65,
"step": 290000
},
{
"epoch": 167.88,
"learning_rate": 1e-05,
"loss": 0.6313,
"step": 300000
},
{
"epoch": 167.88,
"eval_loss": 0.627047598361969,
"eval_runtime": 76.2029,
"eval_samples_per_second": 101.466,
"eval_steps_per_second": 12.69,
"step": 300000
},
{
"epoch": 173.48,
"learning_rate": 1e-05,
"loss": 0.6267,
"step": 310000
},
{
"epoch": 173.48,
"eval_loss": 0.6226180195808411,
"eval_runtime": 76.351,
"eval_samples_per_second": 101.269,
"eval_steps_per_second": 12.665,
"step": 310000
},
{
"epoch": 179.07,
"learning_rate": 1e-05,
"loss": 0.6225,
"step": 320000
},
{
"epoch": 179.07,
"eval_loss": 0.6174684166908264,
"eval_runtime": 77.0828,
"eval_samples_per_second": 100.308,
"eval_steps_per_second": 12.545,
"step": 320000
},
{
"epoch": 184.67,
"learning_rate": 1e-05,
"loss": 0.6195,
"step": 330000
},
{
"epoch": 184.67,
"eval_loss": 0.6189109086990356,
"eval_runtime": 76.6515,
"eval_samples_per_second": 100.872,
"eval_steps_per_second": 12.616,
"step": 330000
},
{
"epoch": 190.26,
"learning_rate": 1e-05,
"loss": 0.6166,
"step": 340000
},
{
"epoch": 190.26,
"eval_loss": 0.6162586808204651,
"eval_runtime": 76.6287,
"eval_samples_per_second": 100.902,
"eval_steps_per_second": 12.619,
"step": 340000
},
{
"epoch": 195.86,
"learning_rate": 1e-05,
"loss": 0.614,
"step": 350000
},
{
"epoch": 195.86,
"eval_loss": 0.6159895658493042,
"eval_runtime": 76.4934,
"eval_samples_per_second": 101.081,
"eval_steps_per_second": 12.642,
"step": 350000
},
{
"epoch": 201.45,
"learning_rate": 1e-05,
"loss": 0.6117,
"step": 360000
},
{
"epoch": 201.45,
"eval_loss": 0.6115593910217285,
"eval_runtime": 75.8517,
"eval_samples_per_second": 101.936,
"eval_steps_per_second": 12.749,
"step": 360000
},
{
"epoch": 207.05,
"learning_rate": 1e-05,
"loss": 0.6094,
"step": 370000
},
{
"epoch": 207.05,
"eval_loss": 0.6114900708198547,
"eval_runtime": 76.348,
"eval_samples_per_second": 101.273,
"eval_steps_per_second": 12.666,
"step": 370000
},
{
"epoch": 212.65,
"learning_rate": 1e-05,
"loss": 0.6071,
"step": 380000
},
{
"epoch": 212.65,
"eval_loss": 0.6110843420028687,
"eval_runtime": 76.4292,
"eval_samples_per_second": 101.165,
"eval_steps_per_second": 12.652,
"step": 380000
},
{
"epoch": 218.24,
"learning_rate": 1e-05,
"loss": 0.6048,
"step": 390000
},
{
"epoch": 218.24,
"eval_loss": 0.6108397245407104,
"eval_runtime": 76.2883,
"eval_samples_per_second": 101.352,
"eval_steps_per_second": 12.676,
"step": 390000
},
{
"epoch": 223.84,
"learning_rate": 1e-05,
"loss": 0.6025,
"step": 400000
},
{
"epoch": 223.84,
"eval_loss": 0.6071902513504028,
"eval_runtime": 76.1442,
"eval_samples_per_second": 101.544,
"eval_steps_per_second": 12.7,
"step": 400000
},
{
"epoch": 229.43,
"learning_rate": 1e-05,
"loss": 0.6006,
"step": 410000
},
{
"epoch": 229.43,
"eval_loss": 0.6058005690574646,
"eval_runtime": 76.1177,
"eval_samples_per_second": 101.579,
"eval_steps_per_second": 12.704,
"step": 410000
},
{
"epoch": 235.03,
"learning_rate": 1e-05,
"loss": 0.599,
"step": 420000
},
{
"epoch": 235.03,
"eval_loss": 0.6018807888031006,
"eval_runtime": 76.6301,
"eval_samples_per_second": 100.9,
"eval_steps_per_second": 12.619,
"step": 420000
},
{
"epoch": 240.63,
"learning_rate": 1e-05,
"loss": 0.5969,
"step": 430000
},
{
"epoch": 240.63,
"eval_loss": 0.6019513010978699,
"eval_runtime": 76.6382,
"eval_samples_per_second": 100.89,
"eval_steps_per_second": 12.618,
"step": 430000
},
{
"epoch": 246.22,
"learning_rate": 1e-05,
"loss": 0.5956,
"step": 440000
},
{
"epoch": 246.22,
"eval_loss": 0.6009297370910645,
"eval_runtime": 76.7463,
"eval_samples_per_second": 100.747,
"eval_steps_per_second": 12.6,
"step": 440000
},
{
"epoch": 251.82,
"learning_rate": 1e-05,
"loss": 0.5937,
"step": 450000
},
{
"epoch": 251.82,
"eval_loss": 0.6020432114601135,
"eval_runtime": 76.498,
"eval_samples_per_second": 101.075,
"eval_steps_per_second": 12.641,
"step": 450000
},
{
"epoch": 257.41,
"learning_rate": 1e-05,
"loss": 0.5923,
"step": 460000
},
{
"epoch": 257.41,
"eval_loss": 0.5997503399848938,
"eval_runtime": 76.4264,
"eval_samples_per_second": 101.169,
"eval_steps_per_second": 12.653,
"step": 460000
},
{
"epoch": 263.01,
"learning_rate": 1e-05,
"loss": 0.5907,
"step": 470000
},
{
"epoch": 263.01,
"eval_loss": 0.6007161140441895,
"eval_runtime": 76.517,
"eval_samples_per_second": 101.049,
"eval_steps_per_second": 12.638,
"step": 470000
},
{
"epoch": 268.61,
"learning_rate": 1e-05,
"loss": 0.5894,
"step": 480000
},
{
"epoch": 268.61,
"eval_loss": 0.5984556674957275,
"eval_runtime": 76.4528,
"eval_samples_per_second": 101.134,
"eval_steps_per_second": 12.648,
"step": 480000
},
{
"epoch": 274.2,
"learning_rate": 1e-05,
"loss": 0.5876,
"step": 490000
},
{
"epoch": 274.2,
"eval_loss": 0.5970821976661682,
"eval_runtime": 76.2171,
"eval_samples_per_second": 101.447,
"eval_steps_per_second": 12.687,
"step": 490000
},
{
"epoch": 279.8,
"learning_rate": 1e-05,
"loss": 0.5863,
"step": 500000
},
{
"epoch": 279.8,
"eval_loss": 0.5982722640037537,
"eval_runtime": 76.3256,
"eval_samples_per_second": 101.303,
"eval_steps_per_second": 12.669,
"step": 500000
},
{
"epoch": 285.39,
"learning_rate": 1e-05,
"loss": 0.585,
"step": 510000
},
{
"epoch": 285.39,
"eval_loss": 0.5990148782730103,
"eval_runtime": 76.2053,
"eval_samples_per_second": 101.463,
"eval_steps_per_second": 12.689,
"step": 510000
},
{
"epoch": 290.99,
"learning_rate": 1e-05,
"loss": 0.583,
"step": 520000
},
{
"epoch": 290.99,
"eval_loss": 0.5960124135017395,
"eval_runtime": 76.218,
"eval_samples_per_second": 101.446,
"eval_steps_per_second": 12.687,
"step": 520000
},
{
"epoch": 296.59,
"learning_rate": 1e-05,
"loss": 0.5822,
"step": 530000
},
{
"epoch": 296.59,
"eval_loss": 0.593532145023346,
"eval_runtime": 76.3226,
"eval_samples_per_second": 101.307,
"eval_steps_per_second": 12.67,
"step": 530000
},
{
"epoch": 302.18,
"learning_rate": 1e-05,
"loss": 0.5808,
"step": 540000
},
{
"epoch": 302.18,
"eval_loss": 0.596666693687439,
"eval_runtime": 76.1588,
"eval_samples_per_second": 101.525,
"eval_steps_per_second": 12.697,
"step": 540000
},
{
"epoch": 307.78,
"learning_rate": 1e-05,
"loss": 0.5794,
"step": 550000
},
{
"epoch": 307.78,
"eval_loss": 0.5946430563926697,
"eval_runtime": 76.3226,
"eval_samples_per_second": 101.307,
"eval_steps_per_second": 12.67,
"step": 550000
},
{
"epoch": 313.37,
"learning_rate": 1e-05,
"loss": 0.578,
"step": 560000
},
{
"epoch": 313.37,
"eval_loss": 0.5950666666030884,
"eval_runtime": 76.243,
"eval_samples_per_second": 101.413,
"eval_steps_per_second": 12.683,
"step": 560000
},
{
"epoch": 318.97,
"learning_rate": 1e-05,
"loss": 0.5766,
"step": 570000
},
{
"epoch": 318.97,
"eval_loss": 0.5932120680809021,
"eval_runtime": 76.0598,
"eval_samples_per_second": 101.657,
"eval_steps_per_second": 12.714,
"step": 570000
},
{
"epoch": 324.57,
"learning_rate": 1e-05,
"loss": 0.5752,
"step": 580000
},
{
"epoch": 324.57,
"eval_loss": 0.5916844606399536,
"eval_runtime": 76.215,
"eval_samples_per_second": 101.45,
"eval_steps_per_second": 12.688,
"step": 580000
},
{
"epoch": 330.16,
"learning_rate": 1e-05,
"loss": 0.5739,
"step": 590000
},
{
"epoch": 330.16,
"eval_loss": 0.592149019241333,
"eval_runtime": 76.1575,
"eval_samples_per_second": 101.526,
"eval_steps_per_second": 12.697,
"step": 590000
},
{
"epoch": 335.76,
"learning_rate": 1e-05,
"loss": 0.5726,
"step": 600000
},
{
"epoch": 335.76,
"eval_loss": 0.5907247066497803,
"eval_runtime": 76.2114,
"eval_samples_per_second": 101.455,
"eval_steps_per_second": 12.688,
"step": 600000
},
{
"epoch": 341.35,
"learning_rate": 1e-05,
"loss": 0.5714,
"step": 610000
},
{
"epoch": 341.35,
"eval_loss": 0.5907928347587585,
"eval_runtime": 76.207,
"eval_samples_per_second": 101.461,
"eval_steps_per_second": 12.689,
"step": 610000
},
{
"epoch": 346.95,
"learning_rate": 1e-05,
"loss": 0.5702,
"step": 620000
},
{
"epoch": 346.95,
"eval_loss": 0.5909689664840698,
"eval_runtime": 76.3919,
"eval_samples_per_second": 101.215,
"eval_steps_per_second": 12.658,
"step": 620000
},
{
"epoch": 352.55,
"learning_rate": 1e-05,
"loss": 0.5686,
"step": 630000
},
{
"epoch": 352.55,
"eval_loss": 0.5894390940666199,
"eval_runtime": 76.3494,
"eval_samples_per_second": 101.271,
"eval_steps_per_second": 12.665,
"step": 630000
},
{
"epoch": 358.14,
"learning_rate": 1e-05,
"loss": 0.5674,
"step": 640000
},
{
"epoch": 358.14,
"eval_loss": 0.5915200114250183,
"eval_runtime": 76.5727,
"eval_samples_per_second": 100.976,
"eval_steps_per_second": 12.629,
"step": 640000
},
{
"epoch": 363.74,
"learning_rate": 1e-05,
"loss": 0.5664,
"step": 650000
},
{
"epoch": 363.74,
"eval_loss": 0.5875544548034668,
"eval_runtime": 76.0536,
"eval_samples_per_second": 101.665,
"eval_steps_per_second": 12.715,
"step": 650000
},
{
"epoch": 369.33,
"learning_rate": 1e-05,
"loss": 0.565,
"step": 660000
},
{
"epoch": 369.33,
"eval_loss": 0.5878584980964661,
"eval_runtime": 76.1299,
"eval_samples_per_second": 101.563,
"eval_steps_per_second": 12.702,
"step": 660000
},
{
"epoch": 374.93,
"learning_rate": 1e-05,
"loss": 0.5636,
"step": 670000
},
{
"epoch": 374.93,
"eval_loss": 0.5897438526153564,
"eval_runtime": 76.3557,
"eval_samples_per_second": 101.263,
"eval_steps_per_second": 12.664,
"step": 670000
},
{
"epoch": 380.53,
"learning_rate": 1e-05,
"loss": 0.5625,
"step": 680000
},
{
"epoch": 380.53,
"eval_loss": 0.5888833999633789,
"eval_runtime": 76.7072,
"eval_samples_per_second": 100.799,
"eval_steps_per_second": 12.606,
"step": 680000
},
{
"epoch": 386.12,
"learning_rate": 1e-05,
"loss": 0.5609,
"step": 690000
},
{
"epoch": 386.12,
"eval_loss": 0.5903308987617493,
"eval_runtime": 76.5139,
"eval_samples_per_second": 101.053,
"eval_steps_per_second": 12.638,
"step": 690000
},
{
"epoch": 391.72,
"learning_rate": 1e-05,
"loss": 0.5594,
"step": 700000
},
{
"epoch": 391.72,
"eval_loss": 0.5877216458320618,
"eval_runtime": 76.4574,
"eval_samples_per_second": 101.128,
"eval_steps_per_second": 12.648,
"step": 700000
},
{
"epoch": 397.31,
"learning_rate": 1e-05,
"loss": 0.5584,
"step": 710000
},
{
"epoch": 397.31,
"eval_loss": 0.5875140428543091,
"eval_runtime": 76.1634,
"eval_samples_per_second": 101.519,
"eval_steps_per_second": 12.696,
"step": 710000
},
{
"epoch": 402.91,
"learning_rate": 1e-05,
"loss": 0.5573,
"step": 720000
},
{
"epoch": 402.91,
"eval_loss": 0.5887530446052551,
"eval_runtime": 76.2807,
"eval_samples_per_second": 101.362,
"eval_steps_per_second": 12.677,
"step": 720000
},
{
"epoch": 408.51,
"learning_rate": 1e-05,
"loss": 0.5561,
"step": 730000
},
{
"epoch": 408.51,
"eval_loss": 0.5863147974014282,
"eval_runtime": 76.2255,
"eval_samples_per_second": 101.436,
"eval_steps_per_second": 12.686,
"step": 730000
}
],
"max_steps": 1000000,
"num_train_epochs": 560,
"total_flos": 3.197989282913906e+21,
=======
"eval_loss": 0.6199995875358582,
"eval_runtime": 76.4051,
"eval_samples_per_second": 101.197,
"eval_steps_per_second": 12.656,
"step": 200000
},
{
"epoch": 111.92,
"step": 200000,
"total_flos": 1.0512018951481177e+21,
"train_loss": 0.5566074145507812,
"train_runtime": 84698.4102,
"train_samples_per_second": 37.781,
"train_steps_per_second": 2.361
}
],
"max_steps": 200000,
"num_train_epochs": 112,
"total_flos": 1.0512018951481177e+21,
>>>>>>> f31c512c34f45b1f3e7b799ebda1f4af417ffe1b
"trial_name": null,
"trial_params": null
}