k1h0's picture
Upload folder using huggingface_hub
ed86b87 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9966329966329966,
"eval_steps": 500,
"global_step": 185,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0053872053872053875,
"grad_norm": 0.0002994377864524722,
"learning_rate": 1.0526315789473684e-05,
"loss": 0.8054,
"step": 1
},
{
"epoch": 0.05387205387205387,
"grad_norm": 0.0027586156502366066,
"learning_rate": 0.00010526315789473685,
"loss": 1.4791,
"step": 10
},
{
"epoch": 0.10774410774410774,
"grad_norm": 0.0025585689581930637,
"learning_rate": 0.00019879518072289158,
"loss": 1.3673,
"step": 20
},
{
"epoch": 0.16161616161616163,
"grad_norm": 0.003544808831065893,
"learning_rate": 0.00018674698795180723,
"loss": 1.2927,
"step": 30
},
{
"epoch": 0.21548821548821548,
"grad_norm": 0.004599341191351414,
"learning_rate": 0.0001746987951807229,
"loss": 1.0129,
"step": 40
},
{
"epoch": 0.26936026936026936,
"grad_norm": 0.002882454078644514,
"learning_rate": 0.00016265060240963855,
"loss": 0.9945,
"step": 50
},
{
"epoch": 0.32323232323232326,
"grad_norm": 0.0020832906011492014,
"learning_rate": 0.00015060240963855423,
"loss": 0.9074,
"step": 60
},
{
"epoch": 0.3771043771043771,
"grad_norm": 0.0012261488009244204,
"learning_rate": 0.00013855421686746988,
"loss": 0.8895,
"step": 70
},
{
"epoch": 0.43097643097643096,
"grad_norm": 0.0009639089112170041,
"learning_rate": 0.00012650602409638556,
"loss": 0.9137,
"step": 80
},
{
"epoch": 0.48484848484848486,
"grad_norm": 0.0008281469927169383,
"learning_rate": 0.0001144578313253012,
"loss": 0.8956,
"step": 90
},
{
"epoch": 0.5387205387205387,
"grad_norm": 0.002506977878510952,
"learning_rate": 0.00010240963855421688,
"loss": 0.9406,
"step": 100
},
{
"epoch": 0.5925925925925926,
"grad_norm": 0.0017301805783063173,
"learning_rate": 9.036144578313253e-05,
"loss": 0.8768,
"step": 110
},
{
"epoch": 0.6464646464646465,
"grad_norm": 0.0010177810909226537,
"learning_rate": 7.83132530120482e-05,
"loss": 0.8657,
"step": 120
},
{
"epoch": 0.7003367003367004,
"grad_norm": 0.0009739714441820979,
"learning_rate": 6.626506024096386e-05,
"loss": 0.891,
"step": 130
},
{
"epoch": 0.7542087542087542,
"grad_norm": 0.0008057655650191009,
"learning_rate": 5.4216867469879516e-05,
"loss": 0.8853,
"step": 140
},
{
"epoch": 0.8080808080808081,
"grad_norm": 0.0023450921289622784,
"learning_rate": 4.2168674698795186e-05,
"loss": 0.9276,
"step": 150
},
{
"epoch": 0.8619528619528619,
"grad_norm": 0.0011686653597280383,
"learning_rate": 3.012048192771085e-05,
"loss": 0.8676,
"step": 160
},
{
"epoch": 0.9158249158249159,
"grad_norm": 0.00091711146524176,
"learning_rate": 1.8072289156626505e-05,
"loss": 0.8601,
"step": 170
},
{
"epoch": 0.9696969696969697,
"grad_norm": 0.0008369652787223458,
"learning_rate": 6.024096385542169e-06,
"loss": 0.885,
"step": 180
}
],
"logging_steps": 10,
"max_steps": 185,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.985763285898625e+18,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}