LexLLMv0.0.0.x.10.15a / trainer_state.json
andrealexroom's picture
Upload folder using huggingface_hub
af662e6 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.09460370994940978,
"eval_steps": 187,
"global_step": 1122,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"grad_norm": 8.5,
"learning_rate": 1e-05,
"loss": 1.9119,
"step": 1
},
{
"epoch": 0.02,
"grad_norm": 4.59375,
"learning_rate": 1e-05,
"loss": 1.7164,
"step": 187
},
{
"epoch": 0.02,
"eval_validation_privacy_sources_loss": 1.0334585905075073,
"eval_validation_privacy_sources_runtime": 212.2756,
"eval_validation_privacy_sources_samples_per_second": 1.804,
"eval_validation_privacy_sources_steps_per_second": 0.904,
"step": 187
},
{
"epoch": 0.02,
"eval_validation_agenda_digitale_loss": 1.5160526037216187,
"eval_validation_agenda_digitale_runtime": 188.4106,
"eval_validation_agenda_digitale_samples_per_second": 1.805,
"eval_validation_agenda_digitale_steps_per_second": 0.902,
"step": 187
},
{
"epoch": 0.02,
"eval_validation_leggepertutti_loss": 1.3417482376098633,
"eval_validation_leggepertutti_runtime": 35.8837,
"eval_validation_leggepertutti_samples_per_second": 1.811,
"eval_validation_leggepertutti_steps_per_second": 0.92,
"step": 187
},
{
"epoch": 0.02,
"eval_validation_altalexprivacy_loss": 1.3672271966934204,
"eval_validation_altalexprivacy_runtime": 58.6063,
"eval_validation_altalexprivacy_samples_per_second": 1.809,
"eval_validation_altalexprivacy_steps_per_second": 0.904,
"step": 187
},
{
"epoch": 0.02,
"eval_validation_mc4_it_loss": 1.9217756986618042,
"eval_validation_mc4_it_runtime": 78.6277,
"eval_validation_mc4_it_samples_per_second": 1.806,
"eval_validation_mc4_it_steps_per_second": 0.903,
"step": 187
},
{
"epoch": 0.02,
"eval_validation_dolma_loss": 1.9627543687820435,
"eval_validation_dolma_runtime": 54.0088,
"eval_validation_dolma_samples_per_second": 1.815,
"eval_validation_dolma_steps_per_second": 0.907,
"step": 187
},
{
"epoch": 0.03,
"grad_norm": 3.859375,
"learning_rate": 1e-05,
"loss": 1.6579,
"step": 374
},
{
"epoch": 0.03,
"eval_validation_privacy_sources_loss": 0.9830030798912048,
"eval_validation_privacy_sources_runtime": 210.811,
"eval_validation_privacy_sources_samples_per_second": 1.817,
"eval_validation_privacy_sources_steps_per_second": 0.911,
"step": 374
},
{
"epoch": 0.03,
"eval_validation_agenda_digitale_loss": 1.4846152067184448,
"eval_validation_agenda_digitale_runtime": 187.1837,
"eval_validation_agenda_digitale_samples_per_second": 1.816,
"eval_validation_agenda_digitale_steps_per_second": 0.908,
"step": 374
},
{
"epoch": 0.03,
"eval_validation_leggepertutti_loss": 1.3172152042388916,
"eval_validation_leggepertutti_runtime": 35.7866,
"eval_validation_leggepertutti_samples_per_second": 1.816,
"eval_validation_leggepertutti_steps_per_second": 0.922,
"step": 374
},
{
"epoch": 0.03,
"eval_validation_altalexprivacy_loss": 1.3326139450073242,
"eval_validation_altalexprivacy_runtime": 58.3381,
"eval_validation_altalexprivacy_samples_per_second": 1.817,
"eval_validation_altalexprivacy_steps_per_second": 0.908,
"step": 374
},
{
"epoch": 0.03,
"eval_validation_mc4_it_loss": 1.8915678262710571,
"eval_validation_mc4_it_runtime": 78.169,
"eval_validation_mc4_it_samples_per_second": 1.817,
"eval_validation_mc4_it_steps_per_second": 0.908,
"step": 374
},
{
"epoch": 0.03,
"eval_validation_dolma_loss": 1.9608409404754639,
"eval_validation_dolma_runtime": 53.9716,
"eval_validation_dolma_samples_per_second": 1.816,
"eval_validation_dolma_steps_per_second": 0.908,
"step": 374
},
{
"epoch": 0.05,
"grad_norm": 3.484375,
"learning_rate": 1e-05,
"loss": 1.6402,
"step": 561
},
{
"epoch": 0.05,
"eval_validation_privacy_sources_loss": 0.9559596180915833,
"eval_validation_privacy_sources_runtime": 210.7246,
"eval_validation_privacy_sources_samples_per_second": 1.818,
"eval_validation_privacy_sources_steps_per_second": 0.911,
"step": 561
},
{
"epoch": 0.05,
"eval_validation_agenda_digitale_loss": 1.4720743894577026,
"eval_validation_agenda_digitale_runtime": 187.11,
"eval_validation_agenda_digitale_samples_per_second": 1.817,
"eval_validation_agenda_digitale_steps_per_second": 0.909,
"step": 561
},
{
"epoch": 0.05,
"eval_validation_leggepertutti_loss": 1.3074837923049927,
"eval_validation_leggepertutti_runtime": 35.7797,
"eval_validation_leggepertutti_samples_per_second": 1.817,
"eval_validation_leggepertutti_steps_per_second": 0.922,
"step": 561
},
{
"epoch": 0.05,
"eval_validation_altalexprivacy_loss": 1.3142317533493042,
"eval_validation_altalexprivacy_runtime": 58.315,
"eval_validation_altalexprivacy_samples_per_second": 1.818,
"eval_validation_altalexprivacy_steps_per_second": 0.909,
"step": 561
},
{
"epoch": 0.05,
"eval_validation_mc4_it_loss": 1.87451171875,
"eval_validation_mc4_it_runtime": 78.1299,
"eval_validation_mc4_it_samples_per_second": 1.817,
"eval_validation_mc4_it_steps_per_second": 0.909,
"step": 561
},
{
"epoch": 0.05,
"eval_validation_dolma_loss": 1.956404447555542,
"eval_validation_dolma_runtime": 53.9194,
"eval_validation_dolma_samples_per_second": 1.818,
"eval_validation_dolma_steps_per_second": 0.909,
"step": 561
},
{
"epoch": 0.06,
"grad_norm": 9.625,
"learning_rate": 1e-05,
"loss": 1.6284,
"step": 748
},
{
"epoch": 0.06,
"eval_validation_privacy_sources_loss": 0.933682918548584,
"eval_validation_privacy_sources_runtime": 210.799,
"eval_validation_privacy_sources_samples_per_second": 1.817,
"eval_validation_privacy_sources_steps_per_second": 0.911,
"step": 748
},
{
"epoch": 0.06,
"eval_validation_agenda_digitale_loss": 1.4578524827957153,
"eval_validation_agenda_digitale_runtime": 187.08,
"eval_validation_agenda_digitale_samples_per_second": 1.817,
"eval_validation_agenda_digitale_steps_per_second": 0.909,
"step": 748
},
{
"epoch": 0.06,
"eval_validation_leggepertutti_loss": 1.2905200719833374,
"eval_validation_leggepertutti_runtime": 35.7738,
"eval_validation_leggepertutti_samples_per_second": 1.817,
"eval_validation_leggepertutti_steps_per_second": 0.922,
"step": 748
},
{
"epoch": 0.06,
"eval_validation_altalexprivacy_loss": 1.2977306842803955,
"eval_validation_altalexprivacy_runtime": 58.3215,
"eval_validation_altalexprivacy_samples_per_second": 1.818,
"eval_validation_altalexprivacy_steps_per_second": 0.909,
"step": 748
},
{
"epoch": 0.06,
"eval_validation_mc4_it_loss": 1.8592724800109863,
"eval_validation_mc4_it_runtime": 78.1141,
"eval_validation_mc4_it_samples_per_second": 1.818,
"eval_validation_mc4_it_steps_per_second": 0.909,
"step": 748
},
{
"epoch": 0.06,
"eval_validation_dolma_loss": 1.9574466943740845,
"eval_validation_dolma_runtime": 53.9297,
"eval_validation_dolma_samples_per_second": 1.817,
"eval_validation_dolma_steps_per_second": 0.909,
"step": 748
},
{
"epoch": 0.08,
"grad_norm": 3.390625,
"learning_rate": 1e-05,
"loss": 1.6082,
"step": 935
},
{
"epoch": 0.08,
"eval_validation_privacy_sources_loss": 0.919241726398468,
"eval_validation_privacy_sources_runtime": 210.6302,
"eval_validation_privacy_sources_samples_per_second": 1.818,
"eval_validation_privacy_sources_steps_per_second": 0.912,
"step": 935
},
{
"epoch": 0.08,
"eval_validation_agenda_digitale_loss": 1.4438046216964722,
"eval_validation_agenda_digitale_runtime": 187.0727,
"eval_validation_agenda_digitale_samples_per_second": 1.817,
"eval_validation_agenda_digitale_steps_per_second": 0.909,
"step": 935
},
{
"epoch": 0.08,
"eval_validation_leggepertutti_loss": 1.2822848558425903,
"eval_validation_leggepertutti_runtime": 35.7909,
"eval_validation_leggepertutti_samples_per_second": 1.816,
"eval_validation_leggepertutti_steps_per_second": 0.922,
"step": 935
},
{
"epoch": 0.08,
"eval_validation_altalexprivacy_loss": 1.2801928520202637,
"eval_validation_altalexprivacy_runtime": 58.3369,
"eval_validation_altalexprivacy_samples_per_second": 1.817,
"eval_validation_altalexprivacy_steps_per_second": 0.909,
"step": 935
},
{
"epoch": 0.08,
"eval_validation_mc4_it_loss": 1.8457767963409424,
"eval_validation_mc4_it_runtime": 78.1714,
"eval_validation_mc4_it_samples_per_second": 1.817,
"eval_validation_mc4_it_steps_per_second": 0.908,
"step": 935
},
{
"epoch": 0.08,
"eval_validation_dolma_loss": 1.9588344097137451,
"eval_validation_dolma_runtime": 53.9317,
"eval_validation_dolma_samples_per_second": 1.817,
"eval_validation_dolma_steps_per_second": 0.909,
"step": 935
},
{
"epoch": 0.09,
"grad_norm": 3.46875,
"learning_rate": 1e-05,
"loss": 1.5989,
"step": 1122
},
{
"epoch": 0.09,
"eval_validation_privacy_sources_loss": 0.9032600522041321,
"eval_validation_privacy_sources_runtime": 210.7002,
"eval_validation_privacy_sources_samples_per_second": 1.818,
"eval_validation_privacy_sources_steps_per_second": 0.911,
"step": 1122
},
{
"epoch": 0.09,
"eval_validation_agenda_digitale_loss": 1.433292031288147,
"eval_validation_agenda_digitale_runtime": 187.0521,
"eval_validation_agenda_digitale_samples_per_second": 1.818,
"eval_validation_agenda_digitale_steps_per_second": 0.909,
"step": 1122
},
{
"epoch": 0.09,
"eval_validation_leggepertutti_loss": 1.2763991355895996,
"eval_validation_leggepertutti_runtime": 35.7627,
"eval_validation_leggepertutti_samples_per_second": 1.818,
"eval_validation_leggepertutti_steps_per_second": 0.923,
"step": 1122
},
{
"epoch": 0.09,
"eval_validation_altalexprivacy_loss": 1.2663257122039795,
"eval_validation_altalexprivacy_runtime": 58.3197,
"eval_validation_altalexprivacy_samples_per_second": 1.818,
"eval_validation_altalexprivacy_steps_per_second": 0.909,
"step": 1122
},
{
"epoch": 0.09,
"eval_validation_mc4_it_loss": 1.8358547687530518,
"eval_validation_mc4_it_runtime": 78.1183,
"eval_validation_mc4_it_samples_per_second": 1.818,
"eval_validation_mc4_it_steps_per_second": 0.909,
"step": 1122
},
{
"epoch": 0.09,
"eval_validation_dolma_loss": 2.0097882747650146,
"eval_validation_dolma_runtime": 53.9266,
"eval_validation_dolma_samples_per_second": 1.817,
"eval_validation_dolma_steps_per_second": 0.909,
"step": 1122
}
],
"logging_steps": 187,
"max_steps": 23720,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 187,
"total_flos": 4.705710614079603e+18,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}