evoxtral / checkpoint-100 /trainer_state.json
YongkangZOU's picture
Duplicate from YongkangZOU/evoxtral-lora
fc6036c
{
"best_global_step": 100,
"best_metric": 0.1666431427001953,
"best_model_checkpoint": "/output/evoxtral-lora/checkpoint-100",
"epoch": 1.9702970297029703,
"eval_steps": 50,
"global_step": 100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.09900990099009901,
"grad_norm": 5.802285194396973,
"learning_rate": 1.6000000000000003e-05,
"loss": 0.9557,
"step": 5
},
{
"epoch": 0.19801980198019803,
"grad_norm": 2.306023597717285,
"learning_rate": 3.6e-05,
"loss": 0.461,
"step": 10
},
{
"epoch": 0.297029702970297,
"grad_norm": 2.0266661643981934,
"learning_rate": 5.6000000000000006e-05,
"loss": 0.3281,
"step": 15
},
{
"epoch": 0.39603960396039606,
"grad_norm": 1.5811512470245361,
"learning_rate": 7.6e-05,
"loss": 0.2656,
"step": 20
},
{
"epoch": 0.49504950495049505,
"grad_norm": 1.5266460180282593,
"learning_rate": 9.6e-05,
"loss": 0.2477,
"step": 25
},
{
"epoch": 0.594059405940594,
"grad_norm": 1.0605043172836304,
"learning_rate": 0.000116,
"loss": 0.2199,
"step": 30
},
{
"epoch": 0.693069306930693,
"grad_norm": 1.4974918365478516,
"learning_rate": 0.00013600000000000003,
"loss": 0.1935,
"step": 35
},
{
"epoch": 0.7920792079207921,
"grad_norm": 1.1257297992706299,
"learning_rate": 0.00015600000000000002,
"loss": 0.1739,
"step": 40
},
{
"epoch": 0.8910891089108911,
"grad_norm": 0.9485541582107544,
"learning_rate": 0.00017600000000000002,
"loss": 0.1829,
"step": 45
},
{
"epoch": 0.9900990099009901,
"grad_norm": 0.6643406748771667,
"learning_rate": 0.000196,
"loss": 0.1887,
"step": 50
},
{
"epoch": 0.9900990099009901,
"eval_loss": 0.1841340959072113,
"eval_runtime": 17.7494,
"eval_samples_per_second": 5.69,
"eval_steps_per_second": 2.873,
"step": 50
},
{
"epoch": 1.0792079207920793,
"grad_norm": 0.7675807476043701,
"learning_rate": 0.0001992566788083908,
"loss": 0.1535,
"step": 55
},
{
"epoch": 1.1782178217821782,
"grad_norm": 0.6500945687294006,
"learning_rate": 0.0001962558656223516,
"loss": 0.15,
"step": 60
},
{
"epoch": 1.2772277227722773,
"grad_norm": 3.4771132469177246,
"learning_rate": 0.00019102070542141328,
"loss": 0.1058,
"step": 65
},
{
"epoch": 1.3762376237623761,
"grad_norm": 6.509069919586182,
"learning_rate": 0.0001836727197823842,
"loss": 0.148,
"step": 70
},
{
"epoch": 1.4752475247524752,
"grad_norm": 1.21564519405365,
"learning_rate": 0.0001743824744123196,
"loss": 0.154,
"step": 75
},
{
"epoch": 1.5742574257425743,
"grad_norm": 0.9891815185546875,
"learning_rate": 0.00016336561987834153,
"loss": 0.1472,
"step": 80
},
{
"epoch": 1.6732673267326734,
"grad_norm": 0.8960129022598267,
"learning_rate": 0.00015087788580152206,
"loss": 0.1388,
"step": 85
},
{
"epoch": 1.7722772277227723,
"grad_norm": 0.6417158842086792,
"learning_rate": 0.00013720914471250644,
"loss": 0.1544,
"step": 90
},
{
"epoch": 1.8712871287128712,
"grad_norm": 0.6971271634101868,
"learning_rate": 0.00012267668336210413,
"loss": 0.1216,
"step": 95
},
{
"epoch": 1.9702970297029703,
"grad_norm": 0.6400907039642334,
"learning_rate": 0.00010761783767709182,
"loss": 0.1347,
"step": 100
},
{
"epoch": 1.9702970297029703,
"eval_loss": 0.1666431427001953,
"eval_runtime": 17.7221,
"eval_samples_per_second": 5.699,
"eval_steps_per_second": 2.878,
"step": 100
}
],
"logging_steps": 5,
"max_steps": 153,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 50,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.8528360565456896e+16,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}