mistral-instruct-7b-news / trainer_state.json
bachbouch's picture
Upload 8 files
e22b17d verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.11857121683711279,
"eval_steps": 50,
"global_step": 400,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 2.3869346733668342e-05,
"loss": 1.7706,
"step": 50
},
{
"epoch": 0.01,
"eval_loss": 1.6274527311325073,
"eval_runtime": 2691.0986,
"eval_samples_per_second": 5.73,
"eval_steps_per_second": 0.358,
"step": 50
},
{
"epoch": 0.03,
"learning_rate": 2.2613065326633167e-05,
"loss": 1.5912,
"step": 100
},
{
"epoch": 0.03,
"eval_loss": 1.5995770692825317,
"eval_runtime": 2694.2411,
"eval_samples_per_second": 5.724,
"eval_steps_per_second": 0.358,
"step": 100
},
{
"epoch": 0.04,
"learning_rate": 2.135678391959799e-05,
"loss": 1.5518,
"step": 150
},
{
"epoch": 0.04,
"eval_loss": 1.5821588039398193,
"eval_runtime": 2684.3159,
"eval_samples_per_second": 5.745,
"eval_steps_per_second": 0.359,
"step": 150
},
{
"epoch": 0.06,
"learning_rate": 2.0100502512562815e-05,
"loss": 1.5279,
"step": 200
},
{
"epoch": 0.06,
"eval_loss": 1.5699576139450073,
"eval_runtime": 2686.6745,
"eval_samples_per_second": 5.74,
"eval_steps_per_second": 0.359,
"step": 200
},
{
"epoch": 0.07,
"learning_rate": 1.884422110552764e-05,
"loss": 1.5335,
"step": 250
},
{
"epoch": 0.07,
"eval_loss": 1.563302755355835,
"eval_runtime": 2703.1674,
"eval_samples_per_second": 5.705,
"eval_steps_per_second": 0.357,
"step": 250
},
{
"epoch": 0.09,
"learning_rate": 1.7587939698492464e-05,
"loss": 1.5505,
"step": 300
},
{
"epoch": 0.09,
"eval_loss": 1.5592865943908691,
"eval_runtime": 2692.6951,
"eval_samples_per_second": 5.727,
"eval_steps_per_second": 0.358,
"step": 300
},
{
"epoch": 0.1,
"learning_rate": 1.6331658291457288e-05,
"loss": 1.5241,
"step": 350
},
{
"epoch": 0.1,
"eval_loss": 1.5557039976119995,
"eval_runtime": 2694.2866,
"eval_samples_per_second": 5.724,
"eval_steps_per_second": 0.358,
"step": 350
},
{
"epoch": 0.12,
"learning_rate": 1.507537688442211e-05,
"loss": 1.5386,
"step": 400
},
{
"epoch": 0.12,
"eval_loss": 1.5525078773498535,
"eval_runtime": 2696.0209,
"eval_samples_per_second": 5.72,
"eval_steps_per_second": 0.358,
"step": 400
}
],
"logging_steps": 50,
"max_steps": 1000,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 50,
"total_flos": 4.107986141184e+17,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}