LexLLMv0.0.0.x.10.18a / trainer_state.json
andrealexroom's picture
Upload folder using huggingface_hub
90d6b4e verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.043664808382709204,
"eval_steps": 187,
"global_step": 561,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"grad_norm": 3.96875,
"learning_rate": 1e-05,
"loss": 0.6204,
"step": 1
},
{
"epoch": 0.01,
"grad_norm": 2.46875,
"learning_rate": 1e-05,
"loss": 0.7321,
"step": 187
},
{
"epoch": 0.01,
"eval_validation_privacy_sources_loss": 1.0316673517227173,
"eval_validation_privacy_sources_runtime": 211.0568,
"eval_validation_privacy_sources_samples_per_second": 1.815,
"eval_validation_privacy_sources_steps_per_second": 0.91,
"step": 187
},
{
"epoch": 0.01,
"eval_validation_agenda_digitale_loss": 1.550207495689392,
"eval_validation_agenda_digitale_runtime": 187.116,
"eval_validation_agenda_digitale_samples_per_second": 1.817,
"eval_validation_agenda_digitale_steps_per_second": 0.909,
"step": 187
},
{
"epoch": 0.01,
"eval_validation_leggepertutti_loss": 1.384333610534668,
"eval_validation_leggepertutti_runtime": 35.7693,
"eval_validation_leggepertutti_samples_per_second": 1.817,
"eval_validation_leggepertutti_steps_per_second": 0.923,
"step": 187
},
{
"epoch": 0.01,
"eval_validation_altalexprivacy_loss": 1.3865731954574585,
"eval_validation_altalexprivacy_runtime": 58.3528,
"eval_validation_altalexprivacy_samples_per_second": 1.817,
"eval_validation_altalexprivacy_steps_per_second": 0.908,
"step": 187
},
{
"epoch": 0.01,
"eval_validation_stack_loss": 0.8435820937156677,
"eval_validation_stack_runtime": 176.7029,
"eval_validation_stack_samples_per_second": 1.817,
"eval_validation_stack_steps_per_second": 0.911,
"step": 187
},
{
"epoch": 0.03,
"grad_norm": 2.90625,
"learning_rate": 1e-05,
"loss": 0.6811,
"step": 374
},
{
"epoch": 0.03,
"eval_validation_privacy_sources_loss": 0.9877639412879944,
"eval_validation_privacy_sources_runtime": 210.7786,
"eval_validation_privacy_sources_samples_per_second": 1.817,
"eval_validation_privacy_sources_steps_per_second": 0.911,
"step": 374
},
{
"epoch": 0.03,
"eval_validation_agenda_digitale_loss": 1.5268653631210327,
"eval_validation_agenda_digitale_runtime": 187.1083,
"eval_validation_agenda_digitale_samples_per_second": 1.817,
"eval_validation_agenda_digitale_steps_per_second": 0.909,
"step": 374
},
{
"epoch": 0.03,
"eval_validation_leggepertutti_loss": 1.3688914775848389,
"eval_validation_leggepertutti_runtime": 35.7855,
"eval_validation_leggepertutti_samples_per_second": 1.816,
"eval_validation_leggepertutti_steps_per_second": 0.922,
"step": 374
},
{
"epoch": 0.03,
"eval_validation_altalexprivacy_loss": 1.3580296039581299,
"eval_validation_altalexprivacy_runtime": 58.359,
"eval_validation_altalexprivacy_samples_per_second": 1.816,
"eval_validation_altalexprivacy_steps_per_second": 0.908,
"step": 374
},
{
"epoch": 0.03,
"eval_validation_stack_loss": 0.8437520861625671,
"eval_validation_stack_runtime": 176.687,
"eval_validation_stack_samples_per_second": 1.817,
"eval_validation_stack_steps_per_second": 0.911,
"step": 374
},
{
"epoch": 0.04,
"grad_norm": 2.515625,
"learning_rate": 1e-05,
"loss": 0.6448,
"step": 561
},
{
"epoch": 0.04,
"eval_validation_privacy_sources_loss": 0.9547243714332581,
"eval_validation_privacy_sources_runtime": 210.8602,
"eval_validation_privacy_sources_samples_per_second": 1.816,
"eval_validation_privacy_sources_steps_per_second": 0.911,
"step": 561
},
{
"epoch": 0.04,
"eval_validation_agenda_digitale_loss": 1.505064606666565,
"eval_validation_agenda_digitale_runtime": 187.2462,
"eval_validation_agenda_digitale_samples_per_second": 1.816,
"eval_validation_agenda_digitale_steps_per_second": 0.908,
"step": 561
},
{
"epoch": 0.04,
"eval_validation_leggepertutti_loss": 1.3536248207092285,
"eval_validation_leggepertutti_runtime": 35.7938,
"eval_validation_leggepertutti_samples_per_second": 1.816,
"eval_validation_leggepertutti_steps_per_second": 0.922,
"step": 561
},
{
"epoch": 0.04,
"eval_validation_altalexprivacy_loss": 1.3345048427581787,
"eval_validation_altalexprivacy_runtime": 58.3467,
"eval_validation_altalexprivacy_samples_per_second": 1.817,
"eval_validation_altalexprivacy_steps_per_second": 0.908,
"step": 561
},
{
"epoch": 0.04,
"eval_validation_stack_loss": 0.8495179414749146,
"eval_validation_stack_runtime": 176.6904,
"eval_validation_stack_samples_per_second": 1.817,
"eval_validation_stack_steps_per_second": 0.911,
"step": 561
}
],
"logging_steps": 187,
"max_steps": 25694,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 187,
"total_flos": 2.3528553070398013e+18,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}