shad_arxiv_clf / checkpoint-3000 /trainer_state.json
dsorokin's picture
add models
75c83d9
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.1779661016949152,
"global_step": 3000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.53,
"learning_rate": 1.978813559322034e-05,
"loss": 2.0034,
"step": 500
},
{
"epoch": 1.06,
"learning_rate": 1.9576271186440678e-05,
"loss": 1.3659,
"step": 1000
},
{
"epoch": 1.59,
"learning_rate": 1.9364406779661017e-05,
"loss": 1.2089,
"step": 1500
},
{
"epoch": 2.12,
"learning_rate": 1.9152542372881357e-05,
"loss": 1.1463,
"step": 2000
},
{
"epoch": 2.65,
"learning_rate": 1.8940677966101697e-05,
"loss": 1.0185,
"step": 2500
},
{
"epoch": 3.18,
"learning_rate": 1.8728813559322033e-05,
"loss": 0.9664,
"step": 3000
}
],
"max_steps": 47200,
"num_train_epochs": 50,
"total_flos": 9490525857741888.0,
"trial_name": null,
"trial_params": null
}