LamaDiab's picture
Training checkpoint - Epoch 3, Step 168364
d19120f verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.837993870423606,
"eval_steps": 10000,
"global_step": 168364,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 2.3758048038773133e-05,
"grad_norm": 4.939470291137695,
"learning_rate": 0.0,
"loss": 3.9948,
"step": 1
},
{
"epoch": 0.23758048038773136,
"grad_norm": 2.9926953315734863,
"learning_rate": 3.5623663578047044e-06,
"loss": 3.2694,
"step": 10000
},
{
"epoch": 0.23758048038773136,
"eval_cosine_accuracy": 0.9539926052093506,
"eval_loss": 0.764169454574585,
"eval_runtime": 26.3677,
"eval_samples_per_second": 358.583,
"eval_steps_per_second": 0.721,
"step": 10000
},
{
"epoch": 0.4751609607754627,
"grad_norm": 3.001824140548706,
"learning_rate": 7.124732715609409e-06,
"loss": 2.6376,
"step": 20000
},
{
"epoch": 0.4751609607754627,
"eval_cosine_accuracy": 0.9616076350212097,
"eval_loss": 0.6928738355636597,
"eval_runtime": 26.4682,
"eval_samples_per_second": 357.221,
"eval_steps_per_second": 0.718,
"step": 20000
},
{
"epoch": 0.712741441163194,
"grad_norm": 2.8817107677459717,
"learning_rate": 1.0687099073414113e-05,
"loss": 2.4101,
"step": 30000
},
{
"epoch": 0.712741441163194,
"eval_cosine_accuracy": 0.9626652598381042,
"eval_loss": 0.6757322549819946,
"eval_runtime": 26.9757,
"eval_samples_per_second": 350.5,
"eval_steps_per_second": 0.704,
"step": 30000
},
{
"epoch": 0.9503219215509254,
"grad_norm": 2.5843374729156494,
"learning_rate": 1.4249465431218818e-05,
"loss": 2.274,
"step": 40000
},
{
"epoch": 0.9503219215509254,
"eval_cosine_accuracy": 0.9663670063018799,
"eval_loss": 0.6607074737548828,
"eval_runtime": 27.5896,
"eval_samples_per_second": 342.701,
"eval_steps_per_second": 0.689,
"step": 40000
},
{
"epoch": 1.1879024019386568,
"grad_norm": 2.4177308082580566,
"learning_rate": 1.7811831789023522e-05,
"loss": 2.2229,
"step": 50000
},
{
"epoch": 1.1879024019386568,
"eval_cosine_accuracy": 0.9650978446006775,
"eval_loss": 0.6596832871437073,
"eval_runtime": 28.4294,
"eval_samples_per_second": 332.578,
"eval_steps_per_second": 0.668,
"step": 50000
},
{
"epoch": 1.425482882326388,
"grad_norm": 2.5173802375793457,
"learning_rate": 2.1374198146828226e-05,
"loss": 2.1147,
"step": 60000
},
{
"epoch": 1.425482882326388,
"eval_cosine_accuracy": 0.967107355594635,
"eval_loss": 0.6737655997276306,
"eval_runtime": 28.0648,
"eval_samples_per_second": 336.899,
"eval_steps_per_second": 0.677,
"step": 60000
},
{
"epoch": 1.6630633627141194,
"grad_norm": 2.5599939823150635,
"learning_rate": 2.4935851746258018e-05,
"loss": 2.0603,
"step": 70000
},
{
"epoch": 1.6630633627141194,
"eval_cosine_accuracy": 0.9672130942344666,
"eval_loss": 0.6480635404586792,
"eval_runtime": 27.0703,
"eval_samples_per_second": 349.276,
"eval_steps_per_second": 0.702,
"step": 70000
},
{
"epoch": 1.9006438431018506,
"grad_norm": 2.274336099624634,
"learning_rate": 2.8498218104062724e-05,
"loss": 2.0208,
"step": 80000
},
{
"epoch": 1.9006438431018506,
"eval_cosine_accuracy": 0.9679534435272217,
"eval_loss": 0.6634311079978943,
"eval_runtime": 26.5714,
"eval_samples_per_second": 355.834,
"eval_steps_per_second": 0.715,
"step": 80000
},
{
"epoch": 2.138224323489582,
"grad_norm": 38.125125885009766,
"learning_rate": 2.9652644925951974e-05,
"loss": 1.4874,
"step": 90000
},
{
"epoch": 2.138224323489582,
"eval_cosine_accuracy": 0.9315705895423889,
"eval_loss": 0.8910566568374634,
"eval_runtime": 27.0766,
"eval_samples_per_second": 349.195,
"eval_steps_per_second": 0.702,
"step": 90000
},
{
"epoch": 2.21436886745385,
"grad_norm": 1.930785059928894,
"learning_rate": 2.7474065271765394e-05,
"loss": 1.7984,
"step": 100000
},
{
"epoch": 2.21436886745385,
"eval_cosine_accuracy": 0.9664727449417114,
"eval_loss": 0.6721837520599365,
"eval_runtime": 29.2032,
"eval_samples_per_second": 323.766,
"eval_steps_per_second": 0.651,
"step": 100000
},
{
"epoch": 2.4519493478415813,
"grad_norm": 1.9230319261550903,
"learning_rate": 2.3579735325035144e-05,
"loss": 1.9529,
"step": 110000
},
{
"epoch": 2.4519493478415813,
"eval_cosine_accuracy": 0.9691168665885925,
"eval_loss": 0.6769542694091797,
"eval_runtime": 26.9479,
"eval_samples_per_second": 350.863,
"eval_steps_per_second": 0.705,
"step": 110000
},
{
"epoch": 2.6895298282293125,
"grad_norm": 1.9773123264312744,
"learning_rate": 1.8505599334305437e-05,
"loss": 1.9337,
"step": 120000
},
{
"epoch": 2.6895298282293125,
"eval_cosine_accuracy": 0.9710206389427185,
"eval_loss": 0.6512405276298523,
"eval_runtime": 26.87,
"eval_samples_per_second": 351.879,
"eval_steps_per_second": 0.707,
"step": 120000
},
{
"epoch": 3.0885462450405075,
"grad_norm": 12.169724464416504,
"learning_rate": 1.2950387678813519e-05,
"loss": 1.6912,
"step": 130000
},
{
"epoch": 3.0885462450405075,
"eval_cosine_accuracy": 0.9476467370986938,
"eval_loss": 0.8111016750335693,
"eval_runtime": 26.3122,
"eval_samples_per_second": 359.339,
"eval_steps_per_second": 0.722,
"step": 130000
},
{
"epoch": 3.164120595851845,
"grad_norm": 1.8629993200302124,
"learning_rate": 7.675028535386121e-06,
"loss": 1.4797,
"step": 140000
},
{
"epoch": 3.164120595851845,
"eval_cosine_accuracy": 0.9709148406982422,
"eval_loss": 0.6482858061790466,
"eval_runtime": 29.0766,
"eval_samples_per_second": 325.176,
"eval_steps_per_second": 0.653,
"step": 140000
},
{
"epoch": 3.401701076239576,
"grad_norm": 1.8768196105957031,
"learning_rate": 3.407595832899329e-06,
"loss": 1.9017,
"step": 150000
},
{
"epoch": 3.401701076239576,
"eval_cosine_accuracy": 0.9719725251197815,
"eval_loss": 0.6554318070411682,
"eval_runtime": 27.6894,
"eval_samples_per_second": 341.466,
"eval_steps_per_second": 0.686,
"step": 150000
},
{
"epoch": 3.6392815566273073,
"grad_norm": 2.0169677734375,
"learning_rate": 7.356406115445186e-07,
"loss": 1.8975,
"step": 160000
},
{
"epoch": 3.6392815566273073,
"eval_cosine_accuracy": 0.9717609882354736,
"eval_loss": 0.6504931449890137,
"eval_runtime": 27.7147,
"eval_samples_per_second": 341.155,
"eval_steps_per_second": 0.686,
"step": 160000
}
],
"logging_steps": 10000,
"max_steps": 168364,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 30000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 512,
"trial_name": null,
"trial_params": null
}