eunyu_test / checkpoint-254 /trainer_state.json
eunyuOffice's picture
Upload folder using huggingface_hub
5304b60 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.9901768172888015,
"eval_steps": 500,
"global_step": 254,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.07858546168958742,
"grad_norm": 0.9010041952133179,
"learning_rate": 2e-05,
"loss": 1.3369,
"step": 10
},
{
"epoch": 0.15717092337917485,
"grad_norm": 0.9163873791694641,
"learning_rate": 4e-05,
"loss": 1.3095,
"step": 20
},
{
"epoch": 0.2357563850687623,
"grad_norm": 0.8760176301002502,
"learning_rate": 6e-05,
"loss": 1.2597,
"step": 30
},
{
"epoch": 0.3143418467583497,
"grad_norm": 0.9276160001754761,
"learning_rate": 8e-05,
"loss": 1.1331,
"step": 40
},
{
"epoch": 0.3929273084479371,
"grad_norm": 0.9490565061569214,
"learning_rate": 0.0001,
"loss": 0.9676,
"step": 50
},
{
"epoch": 0.4715127701375246,
"grad_norm": 1.088897943496704,
"learning_rate": 9.509803921568627e-05,
"loss": 0.7618,
"step": 60
},
{
"epoch": 0.550098231827112,
"grad_norm": 0.7143833041191101,
"learning_rate": 9.019607843137255e-05,
"loss": 0.6247,
"step": 70
},
{
"epoch": 0.6286836935166994,
"grad_norm": 0.6208414435386658,
"learning_rate": 8.529411764705883e-05,
"loss": 0.5592,
"step": 80
},
{
"epoch": 0.7072691552062869,
"grad_norm": 0.5866327881813049,
"learning_rate": 8.039215686274511e-05,
"loss": 0.5452,
"step": 90
},
{
"epoch": 0.7858546168958742,
"grad_norm": 0.48709139227867126,
"learning_rate": 7.549019607843137e-05,
"loss": 0.5323,
"step": 100
},
{
"epoch": 0.8644400785854617,
"grad_norm": 0.45397838950157166,
"learning_rate": 7.058823529411765e-05,
"loss": 0.498,
"step": 110
},
{
"epoch": 0.9430255402750491,
"grad_norm": 0.44005146622657776,
"learning_rate": 6.568627450980392e-05,
"loss": 0.5066,
"step": 120
},
{
"epoch": 1.0157170923379175,
"grad_norm": 0.40322428941726685,
"learning_rate": 6.078431372549019e-05,
"loss": 0.4765,
"step": 130
},
{
"epoch": 1.0943025540275049,
"grad_norm": 0.3713725805282593,
"learning_rate": 5.588235294117647e-05,
"loss": 0.4957,
"step": 140
},
{
"epoch": 1.1728880157170924,
"grad_norm": 0.3928331732749939,
"learning_rate": 5.0980392156862745e-05,
"loss": 0.4857,
"step": 150
},
{
"epoch": 1.2514734774066798,
"grad_norm": 0.5155062675476074,
"learning_rate": 4.607843137254902e-05,
"loss": 0.4885,
"step": 160
},
{
"epoch": 1.3300589390962672,
"grad_norm": 0.43860283493995667,
"learning_rate": 4.11764705882353e-05,
"loss": 0.5031,
"step": 170
},
{
"epoch": 1.4086444007858545,
"grad_norm": 0.3964126706123352,
"learning_rate": 3.627450980392157e-05,
"loss": 0.5079,
"step": 180
},
{
"epoch": 1.487229862475442,
"grad_norm": 0.401035338640213,
"learning_rate": 3.137254901960784e-05,
"loss": 0.4938,
"step": 190
},
{
"epoch": 1.5658153241650294,
"grad_norm": 0.4307032525539398,
"learning_rate": 2.647058823529412e-05,
"loss": 0.4737,
"step": 200
},
{
"epoch": 1.644400785854617,
"grad_norm": 0.44220617413520813,
"learning_rate": 2.1568627450980395e-05,
"loss": 0.4633,
"step": 210
},
{
"epoch": 1.7229862475442044,
"grad_norm": 0.4620322287082672,
"learning_rate": 1.6666666666666667e-05,
"loss": 0.4723,
"step": 220
},
{
"epoch": 1.8015717092337917,
"grad_norm": 0.41082972288131714,
"learning_rate": 1.1764705882352942e-05,
"loss": 0.4767,
"step": 230
},
{
"epoch": 1.880157170923379,
"grad_norm": 0.5318537950515747,
"learning_rate": 6.862745098039216e-06,
"loss": 0.4725,
"step": 240
},
{
"epoch": 1.9587426326129664,
"grad_norm": 0.3858027458190918,
"learning_rate": 1.96078431372549e-06,
"loss": 0.4795,
"step": 250
}
],
"logging_steps": 10,
"max_steps": 254,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 6444092488679424.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}