qgen-vit5 / trainer_state.json
ambivalent02's picture
Upload folder using huggingface_hub
f625b14 verified
{
"best_metric": 4.043909549713135,
"best_model_checkpoint": "vit5-base_vi/checkpoint-200",
"epoch": 1.5021459227467813,
"eval_steps": 50,
"global_step": 350,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.09,
"learning_rate": 0.0002948497854077253,
"loss": 2.5915,
"step": 20
},
{
"epoch": 0.17,
"learning_rate": 0.0002896995708154506,
"loss": 1.9115,
"step": 40
},
{
"epoch": 0.21,
"eval_loss": 4.051170349121094,
"eval_runtime": 8.1419,
"eval_samples_per_second": 280.646,
"eval_steps_per_second": 3.562,
"step": 50
},
{
"epoch": 0.26,
"learning_rate": 0.00028454935622317595,
"loss": 1.8611,
"step": 60
},
{
"epoch": 0.34,
"learning_rate": 0.00027939914163090123,
"loss": 1.8184,
"step": 80
},
{
"epoch": 0.43,
"learning_rate": 0.0002742489270386266,
"loss": 1.783,
"step": 100
},
{
"epoch": 0.43,
"eval_loss": 4.106208801269531,
"eval_runtime": 8.1079,
"eval_samples_per_second": 281.824,
"eval_steps_per_second": 3.577,
"step": 100
},
{
"epoch": 0.52,
"learning_rate": 0.0002690987124463519,
"loss": 1.7446,
"step": 120
},
{
"epoch": 0.6,
"learning_rate": 0.00026394849785407726,
"loss": 1.7667,
"step": 140
},
{
"epoch": 0.64,
"eval_loss": 4.075512886047363,
"eval_runtime": 8.0983,
"eval_samples_per_second": 282.159,
"eval_steps_per_second": 3.581,
"step": 150
},
{
"epoch": 0.69,
"learning_rate": 0.00025879828326180255,
"loss": 1.7709,
"step": 160
},
{
"epoch": 0.77,
"learning_rate": 0.0002536480686695279,
"loss": 1.7525,
"step": 180
},
{
"epoch": 0.86,
"learning_rate": 0.0002484978540772532,
"loss": 1.734,
"step": 200
},
{
"epoch": 0.86,
"eval_loss": 4.043909549713135,
"eval_runtime": 8.089,
"eval_samples_per_second": 282.482,
"eval_steps_per_second": 3.585,
"step": 200
},
{
"epoch": 0.94,
"learning_rate": 0.00024334763948497852,
"loss": 1.6832,
"step": 220
},
{
"epoch": 1.03,
"learning_rate": 0.00023819742489270384,
"loss": 1.5453,
"step": 240
},
{
"epoch": 1.07,
"eval_loss": 4.2676682472229,
"eval_runtime": 8.1069,
"eval_samples_per_second": 281.859,
"eval_steps_per_second": 3.577,
"step": 250
},
{
"epoch": 1.12,
"learning_rate": 0.00023304721030042918,
"loss": 1.2799,
"step": 260
},
{
"epoch": 1.2,
"learning_rate": 0.0002278969957081545,
"loss": 1.2939,
"step": 280
},
{
"epoch": 1.29,
"learning_rate": 0.0002227467811158798,
"loss": 1.3032,
"step": 300
},
{
"epoch": 1.29,
"eval_loss": 4.165661334991455,
"eval_runtime": 8.1052,
"eval_samples_per_second": 281.916,
"eval_steps_per_second": 3.578,
"step": 300
},
{
"epoch": 1.37,
"learning_rate": 0.00021759656652360512,
"loss": 1.3073,
"step": 320
},
{
"epoch": 1.46,
"learning_rate": 0.00021244635193133044,
"loss": 1.3146,
"step": 340
},
{
"epoch": 1.5,
"eval_loss": 4.157186508178711,
"eval_runtime": 8.1161,
"eval_samples_per_second": 281.54,
"eval_steps_per_second": 3.573,
"step": 350
}
],
"logging_steps": 20,
"max_steps": 1165,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 50,
"total_flos": 1.705082034847744e+16,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}