sai-prakash-c's picture
Added trained model files
9ce87c9
{
"best_metric": 0.06405104696750641,
"best_model_checkpoint": "./trained_model/checkpoint-2144",
"epoch": 20.0,
"eval_steps": 500,
"global_step": 2680,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 35.17252731323242,
"learning_rate": 5e-06,
"loss": 3.7473,
"step": 134
},
{
"epoch": 1.0,
"eval_f1": 0.04727812496565904,
"eval_loss": 3.678004264831543,
"eval_runtime": 4.792,
"eval_samples_per_second": 98.706,
"eval_steps_per_second": 1.669,
"step": 134
},
{
"epoch": 2.0,
"grad_norm": 199.6754913330078,
"learning_rate": 1e-05,
"loss": 3.2514,
"step": 268
},
{
"epoch": 2.0,
"eval_f1": 0.266607271215907,
"eval_loss": 2.5477890968322754,
"eval_runtime": 4.7749,
"eval_samples_per_second": 99.059,
"eval_steps_per_second": 1.675,
"step": 268
},
{
"epoch": 3.0,
"grad_norm": 171.65182495117188,
"learning_rate": 9.444444444444445e-06,
"loss": 1.8984,
"step": 402
},
{
"epoch": 3.0,
"eval_f1": 0.7324588125106856,
"eval_loss": 1.1185234785079956,
"eval_runtime": 4.6545,
"eval_samples_per_second": 101.621,
"eval_steps_per_second": 1.719,
"step": 402
},
{
"epoch": 4.0,
"grad_norm": 35.59435272216797,
"learning_rate": 8.888888888888888e-06,
"loss": 1.0211,
"step": 536
},
{
"epoch": 4.0,
"eval_f1": 0.860704216321126,
"eval_loss": 0.5424026846885681,
"eval_runtime": 4.7373,
"eval_samples_per_second": 99.846,
"eval_steps_per_second": 1.689,
"step": 536
},
{
"epoch": 5.0,
"grad_norm": 3.402517318725586,
"learning_rate": 8.333333333333334e-06,
"loss": 0.6218,
"step": 670
},
{
"epoch": 5.0,
"eval_f1": 0.9176158056018094,
"eval_loss": 0.29218313097953796,
"eval_runtime": 4.6529,
"eval_samples_per_second": 101.658,
"eval_steps_per_second": 1.719,
"step": 670
},
{
"epoch": 6.0,
"grad_norm": 2.050248384475708,
"learning_rate": 7.77777777777778e-06,
"loss": 0.4323,
"step": 804
},
{
"epoch": 6.0,
"eval_f1": 0.9496526464988996,
"eval_loss": 0.19649288058280945,
"eval_runtime": 4.695,
"eval_samples_per_second": 100.745,
"eval_steps_per_second": 1.704,
"step": 804
},
{
"epoch": 7.0,
"grad_norm": 0.15155179798603058,
"learning_rate": 7.222222222222223e-06,
"loss": 0.3037,
"step": 938
},
{
"epoch": 7.0,
"eval_f1": 0.9658213272069875,
"eval_loss": 0.1448889821767807,
"eval_runtime": 4.6742,
"eval_samples_per_second": 101.194,
"eval_steps_per_second": 1.712,
"step": 938
},
{
"epoch": 8.0,
"grad_norm": 1.328462839126587,
"learning_rate": 6.666666666666667e-06,
"loss": 0.2473,
"step": 1072
},
{
"epoch": 8.0,
"eval_f1": 0.9722665265768234,
"eval_loss": 0.11456680297851562,
"eval_runtime": 4.6339,
"eval_samples_per_second": 102.073,
"eval_steps_per_second": 1.726,
"step": 1072
},
{
"epoch": 9.0,
"grad_norm": 313.9409484863281,
"learning_rate": 6.111111111111112e-06,
"loss": 0.2064,
"step": 1206
},
{
"epoch": 9.0,
"eval_f1": 0.9780034572033636,
"eval_loss": 0.09856382757425308,
"eval_runtime": 4.6934,
"eval_samples_per_second": 100.779,
"eval_steps_per_second": 1.705,
"step": 1206
},
{
"epoch": 10.0,
"grad_norm": 189.65391540527344,
"learning_rate": 5.555555555555557e-06,
"loss": 0.1533,
"step": 1340
},
{
"epoch": 10.0,
"eval_f1": 0.9778725803283007,
"eval_loss": 0.08937614411115646,
"eval_runtime": 4.6566,
"eval_samples_per_second": 101.577,
"eval_steps_per_second": 1.718,
"step": 1340
},
{
"epoch": 11.0,
"grad_norm": 22.184070587158203,
"learning_rate": 5e-06,
"loss": 0.1395,
"step": 1474
},
{
"epoch": 11.0,
"eval_f1": 0.9807261483480538,
"eval_loss": 0.0832449197769165,
"eval_runtime": 4.6769,
"eval_samples_per_second": 101.135,
"eval_steps_per_second": 1.711,
"step": 1474
},
{
"epoch": 12.0,
"grad_norm": 7.0296196937561035,
"learning_rate": 4.444444444444444e-06,
"loss": 0.1249,
"step": 1608
},
{
"epoch": 12.0,
"eval_f1": 0.9865706513457244,
"eval_loss": 0.07267424464225769,
"eval_runtime": 4.6735,
"eval_samples_per_second": 101.21,
"eval_steps_per_second": 1.712,
"step": 1608
},
{
"epoch": 13.0,
"grad_norm": 0.09863970428705215,
"learning_rate": 3.88888888888889e-06,
"loss": 0.1238,
"step": 1742
},
{
"epoch": 13.0,
"eval_f1": 0.9886923352646306,
"eval_loss": 0.0811920091509819,
"eval_runtime": 4.7013,
"eval_samples_per_second": 100.611,
"eval_steps_per_second": 1.702,
"step": 1742
},
{
"epoch": 14.0,
"grad_norm": 0.17580562829971313,
"learning_rate": 3.3333333333333333e-06,
"loss": 0.1103,
"step": 1876
},
{
"epoch": 14.0,
"eval_f1": 0.9887811136168541,
"eval_loss": 0.07010301947593689,
"eval_runtime": 4.6405,
"eval_samples_per_second": 101.93,
"eval_steps_per_second": 1.724,
"step": 1876
},
{
"epoch": 15.0,
"grad_norm": 1.5174826383590698,
"learning_rate": 2.7777777777777783e-06,
"loss": 0.0953,
"step": 2010
},
{
"epoch": 15.0,
"eval_f1": 0.9882510714507506,
"eval_loss": 0.07096822559833527,
"eval_runtime": 4.6554,
"eval_samples_per_second": 101.602,
"eval_steps_per_second": 1.718,
"step": 2010
},
{
"epoch": 16.0,
"grad_norm": 0.018986046314239502,
"learning_rate": 2.222222222222222e-06,
"loss": 0.0831,
"step": 2144
},
{
"epoch": 16.0,
"eval_f1": 0.9904615337218803,
"eval_loss": 0.06405104696750641,
"eval_runtime": 4.7164,
"eval_samples_per_second": 100.288,
"eval_steps_per_second": 1.696,
"step": 2144
},
{
"epoch": 17.0,
"grad_norm": 0.7639440298080444,
"learning_rate": 1.6666666666666667e-06,
"loss": 0.0813,
"step": 2278
},
{
"epoch": 17.0,
"eval_f1": 0.9904615337218803,
"eval_loss": 0.06663929671049118,
"eval_runtime": 4.6329,
"eval_samples_per_second": 102.095,
"eval_steps_per_second": 1.727,
"step": 2278
},
{
"epoch": 18.0,
"grad_norm": 0.013097506947815418,
"learning_rate": 1.111111111111111e-06,
"loss": 0.085,
"step": 2412
},
{
"epoch": 18.0,
"eval_f1": 0.9904615337218803,
"eval_loss": 0.06558458507061005,
"eval_runtime": 4.6458,
"eval_samples_per_second": 101.813,
"eval_steps_per_second": 1.722,
"step": 2412
},
{
"epoch": 19.0,
"grad_norm": 0.7450041770935059,
"learning_rate": 5.555555555555555e-07,
"loss": 0.0716,
"step": 2546
},
{
"epoch": 19.0,
"eval_f1": 0.9904615337218803,
"eval_loss": 0.06490221619606018,
"eval_runtime": 4.6882,
"eval_samples_per_second": 100.891,
"eval_steps_per_second": 1.706,
"step": 2546
},
{
"epoch": 20.0,
"grad_norm": 0.07539009302854538,
"learning_rate": 0.0,
"loss": 0.084,
"step": 2680
},
{
"epoch": 20.0,
"eval_f1": 0.9904615337218803,
"eval_loss": 0.06570780277252197,
"eval_runtime": 4.6524,
"eval_samples_per_second": 101.669,
"eval_steps_per_second": 1.72,
"step": 2680
},
{
"epoch": 20.0,
"step": 2680,
"total_flos": 2.773139351595909e+18,
"train_loss": 0.6440982267038146,
"train_runtime": 2307.5728,
"train_samples_per_second": 36.896,
"train_steps_per_second": 1.161
},
{
"epoch": 20.0,
"eval_f1": 0.9689867070609877,
"eval_loss": 0.10238795727491379,
"eval_runtime": 125.4849,
"eval_samples_per_second": 100.65,
"eval_steps_per_second": 1.578,
"step": 2680
}
],
"logging_steps": 500,
"max_steps": 2680,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.773139351595909e+18,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}