LexLLMv0.0.0.x.10.17a / trainer_state.json
andrealexroom's picture
Upload folder using huggingface_hub
c46dc0f verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.09191659306478246,
"eval_steps": 187,
"global_step": 1496,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"grad_norm": 9.0625,
"learning_rate": 1e-05,
"loss": 1.8816,
"step": 1
},
{
"epoch": 0.01,
"grad_norm": 3.953125,
"learning_rate": 1e-05,
"loss": 1.7744,
"step": 187
},
{
"epoch": 0.01,
"eval_validation_privacy_sources_loss": 1.0537431240081787,
"eval_validation_privacy_sources_runtime": 211.2726,
"eval_validation_privacy_sources_samples_per_second": 1.813,
"eval_validation_privacy_sources_steps_per_second": 0.909,
"step": 187
},
{
"epoch": 0.01,
"eval_validation_agenda_digitale_loss": 1.5260587930679321,
"eval_validation_agenda_digitale_runtime": 187.3877,
"eval_validation_agenda_digitale_samples_per_second": 1.814,
"eval_validation_agenda_digitale_steps_per_second": 0.907,
"step": 187
},
{
"epoch": 0.01,
"eval_validation_leggepertutti_loss": 1.3466496467590332,
"eval_validation_leggepertutti_runtime": 35.8176,
"eval_validation_leggepertutti_samples_per_second": 1.815,
"eval_validation_leggepertutti_steps_per_second": 0.921,
"step": 187
},
{
"epoch": 0.01,
"eval_validation_altalexprivacy_loss": 1.3750169277191162,
"eval_validation_altalexprivacy_runtime": 58.3803,
"eval_validation_altalexprivacy_samples_per_second": 1.816,
"eval_validation_altalexprivacy_steps_per_second": 0.908,
"step": 187
},
{
"epoch": 0.01,
"eval_validation_mc4_it_loss": 1.9072209596633911,
"eval_validation_mc4_it_runtime": 78.2378,
"eval_validation_mc4_it_samples_per_second": 1.815,
"eval_validation_mc4_it_steps_per_second": 0.907,
"step": 187
},
{
"epoch": 0.01,
"eval_validation_dolma_loss": 1.9709844589233398,
"eval_validation_dolma_runtime": 54.0008,
"eval_validation_dolma_samples_per_second": 1.815,
"eval_validation_dolma_steps_per_second": 0.907,
"step": 187
},
{
"epoch": 0.02,
"grad_norm": 4.09375,
"learning_rate": 1e-05,
"loss": 1.7218,
"step": 374
},
{
"epoch": 0.02,
"eval_validation_privacy_sources_loss": 1.0035160779953003,
"eval_validation_privacy_sources_runtime": 210.9306,
"eval_validation_privacy_sources_samples_per_second": 1.816,
"eval_validation_privacy_sources_steps_per_second": 0.91,
"step": 374
},
{
"epoch": 0.02,
"eval_validation_agenda_digitale_loss": 1.4919817447662354,
"eval_validation_agenda_digitale_runtime": 187.2244,
"eval_validation_agenda_digitale_samples_per_second": 1.816,
"eval_validation_agenda_digitale_steps_per_second": 0.908,
"step": 374
},
{
"epoch": 0.02,
"eval_validation_leggepertutti_loss": 1.3234634399414062,
"eval_validation_leggepertutti_runtime": 35.8104,
"eval_validation_leggepertutti_samples_per_second": 1.815,
"eval_validation_leggepertutti_steps_per_second": 0.922,
"step": 374
},
{
"epoch": 0.02,
"eval_validation_altalexprivacy_loss": 1.3424029350280762,
"eval_validation_altalexprivacy_runtime": 58.367,
"eval_validation_altalexprivacy_samples_per_second": 1.816,
"eval_validation_altalexprivacy_steps_per_second": 0.908,
"step": 374
},
{
"epoch": 0.02,
"eval_validation_mc4_it_loss": 1.8769702911376953,
"eval_validation_mc4_it_runtime": 78.1952,
"eval_validation_mc4_it_samples_per_second": 1.816,
"eval_validation_mc4_it_steps_per_second": 0.908,
"step": 374
},
{
"epoch": 0.02,
"eval_validation_dolma_loss": 1.9725910425186157,
"eval_validation_dolma_runtime": 53.957,
"eval_validation_dolma_samples_per_second": 1.816,
"eval_validation_dolma_steps_per_second": 0.908,
"step": 374
},
{
"epoch": 0.03,
"grad_norm": 3.53125,
"learning_rate": 1e-05,
"loss": 1.7007,
"step": 561
},
{
"epoch": 0.03,
"eval_validation_privacy_sources_loss": 0.9692163467407227,
"eval_validation_privacy_sources_runtime": 210.8774,
"eval_validation_privacy_sources_samples_per_second": 1.816,
"eval_validation_privacy_sources_steps_per_second": 0.91,
"step": 561
},
{
"epoch": 0.03,
"eval_validation_agenda_digitale_loss": 1.4738318920135498,
"eval_validation_agenda_digitale_runtime": 187.1789,
"eval_validation_agenda_digitale_samples_per_second": 1.816,
"eval_validation_agenda_digitale_steps_per_second": 0.908,
"step": 561
},
{
"epoch": 0.03,
"eval_validation_leggepertutti_loss": 1.3062564134597778,
"eval_validation_leggepertutti_runtime": 35.7887,
"eval_validation_leggepertutti_samples_per_second": 1.816,
"eval_validation_leggepertutti_steps_per_second": 0.922,
"step": 561
},
{
"epoch": 0.03,
"eval_validation_altalexprivacy_loss": 1.3234490156173706,
"eval_validation_altalexprivacy_runtime": 58.3587,
"eval_validation_altalexprivacy_samples_per_second": 1.816,
"eval_validation_altalexprivacy_steps_per_second": 0.908,
"step": 561
},
{
"epoch": 0.03,
"eval_validation_mc4_it_loss": 1.8544881343841553,
"eval_validation_mc4_it_runtime": 78.1604,
"eval_validation_mc4_it_samples_per_second": 1.817,
"eval_validation_mc4_it_steps_per_second": 0.908,
"step": 561
},
{
"epoch": 0.03,
"eval_validation_dolma_loss": 1.969956874847412,
"eval_validation_dolma_runtime": 53.9662,
"eval_validation_dolma_samples_per_second": 1.816,
"eval_validation_dolma_steps_per_second": 0.908,
"step": 561
},
{
"epoch": 0.05,
"grad_norm": 3.796875,
"learning_rate": 1e-05,
"loss": 1.6866,
"step": 748
},
{
"epoch": 0.05,
"eval_validation_privacy_sources_loss": 0.9504958391189575,
"eval_validation_privacy_sources_runtime": 210.7635,
"eval_validation_privacy_sources_samples_per_second": 1.817,
"eval_validation_privacy_sources_steps_per_second": 0.911,
"step": 748
},
{
"epoch": 0.05,
"eval_validation_agenda_digitale_loss": 1.4634190797805786,
"eval_validation_agenda_digitale_runtime": 187.1135,
"eval_validation_agenda_digitale_samples_per_second": 1.817,
"eval_validation_agenda_digitale_steps_per_second": 0.909,
"step": 748
},
{
"epoch": 0.05,
"eval_validation_leggepertutti_loss": 1.2935285568237305,
"eval_validation_leggepertutti_runtime": 35.7883,
"eval_validation_leggepertutti_samples_per_second": 1.816,
"eval_validation_leggepertutti_steps_per_second": 0.922,
"step": 748
},
{
"epoch": 0.05,
"eval_validation_altalexprivacy_loss": 1.3104432821273804,
"eval_validation_altalexprivacy_runtime": 58.3511,
"eval_validation_altalexprivacy_samples_per_second": 1.817,
"eval_validation_altalexprivacy_steps_per_second": 0.908,
"step": 748
},
{
"epoch": 0.05,
"eval_validation_mc4_it_loss": 1.8366450071334839,
"eval_validation_mc4_it_runtime": 78.1461,
"eval_validation_mc4_it_samples_per_second": 1.817,
"eval_validation_mc4_it_steps_per_second": 0.909,
"step": 748
},
{
"epoch": 0.05,
"eval_validation_dolma_loss": 1.9702107906341553,
"eval_validation_dolma_runtime": 53.9537,
"eval_validation_dolma_samples_per_second": 1.816,
"eval_validation_dolma_steps_per_second": 0.908,
"step": 748
},
{
"epoch": 0.06,
"grad_norm": 4.59375,
"learning_rate": 1e-05,
"loss": 1.6602,
"step": 935
},
{
"epoch": 0.06,
"eval_validation_privacy_sources_loss": 0.9363130927085876,
"eval_validation_privacy_sources_runtime": 210.7426,
"eval_validation_privacy_sources_samples_per_second": 1.817,
"eval_validation_privacy_sources_steps_per_second": 0.911,
"step": 935
},
{
"epoch": 0.06,
"eval_validation_agenda_digitale_loss": 1.4509363174438477,
"eval_validation_agenda_digitale_runtime": 187.1896,
"eval_validation_agenda_digitale_samples_per_second": 1.816,
"eval_validation_agenda_digitale_steps_per_second": 0.908,
"step": 935
},
{
"epoch": 0.06,
"eval_validation_leggepertutti_loss": 1.2871097326278687,
"eval_validation_leggepertutti_runtime": 35.8163,
"eval_validation_leggepertutti_samples_per_second": 1.815,
"eval_validation_leggepertutti_steps_per_second": 0.921,
"step": 935
},
{
"epoch": 0.06,
"eval_validation_altalexprivacy_loss": 1.2997937202453613,
"eval_validation_altalexprivacy_runtime": 58.363,
"eval_validation_altalexprivacy_samples_per_second": 1.816,
"eval_validation_altalexprivacy_steps_per_second": 0.908,
"step": 935
},
{
"epoch": 0.06,
"eval_validation_mc4_it_loss": 1.8237556219100952,
"eval_validation_mc4_it_runtime": 78.2373,
"eval_validation_mc4_it_samples_per_second": 1.815,
"eval_validation_mc4_it_steps_per_second": 0.907,
"step": 935
},
{
"epoch": 0.06,
"eval_validation_dolma_loss": 1.9671412706375122,
"eval_validation_dolma_runtime": 53.9727,
"eval_validation_dolma_samples_per_second": 1.816,
"eval_validation_dolma_steps_per_second": 0.908,
"step": 935
},
{
"epoch": 0.07,
"grad_norm": 3.203125,
"learning_rate": 1e-05,
"loss": 1.6552,
"step": 1122
},
{
"epoch": 0.07,
"eval_validation_privacy_sources_loss": 0.9231541752815247,
"eval_validation_privacy_sources_runtime": 210.8141,
"eval_validation_privacy_sources_samples_per_second": 1.817,
"eval_validation_privacy_sources_steps_per_second": 0.911,
"step": 1122
},
{
"epoch": 0.07,
"eval_validation_agenda_digitale_loss": 1.4408273696899414,
"eval_validation_agenda_digitale_runtime": 187.3646,
"eval_validation_agenda_digitale_samples_per_second": 1.815,
"eval_validation_agenda_digitale_steps_per_second": 0.907,
"step": 1122
},
{
"epoch": 0.07,
"eval_validation_leggepertutti_loss": 1.277867317199707,
"eval_validation_leggepertutti_runtime": 35.8389,
"eval_validation_leggepertutti_samples_per_second": 1.814,
"eval_validation_leggepertutti_steps_per_second": 0.921,
"step": 1122
},
{
"epoch": 0.07,
"eval_validation_altalexprivacy_loss": 1.2885111570358276,
"eval_validation_altalexprivacy_runtime": 58.4214,
"eval_validation_altalexprivacy_samples_per_second": 1.814,
"eval_validation_altalexprivacy_steps_per_second": 0.907,
"step": 1122
},
{
"epoch": 0.07,
"eval_validation_mc4_it_loss": 1.81148362159729,
"eval_validation_mc4_it_runtime": 78.2293,
"eval_validation_mc4_it_samples_per_second": 1.815,
"eval_validation_mc4_it_steps_per_second": 0.908,
"step": 1122
},
{
"epoch": 0.07,
"eval_validation_dolma_loss": 1.9675589799880981,
"eval_validation_dolma_runtime": 53.9854,
"eval_validation_dolma_samples_per_second": 1.815,
"eval_validation_dolma_steps_per_second": 0.908,
"step": 1122
},
{
"epoch": 0.08,
"grad_norm": 3.625,
"learning_rate": 1e-05,
"loss": 1.6499,
"step": 1309
},
{
"epoch": 0.08,
"eval_validation_privacy_sources_loss": 0.9118366241455078,
"eval_validation_privacy_sources_runtime": 210.7511,
"eval_validation_privacy_sources_samples_per_second": 1.817,
"eval_validation_privacy_sources_steps_per_second": 0.911,
"step": 1309
},
{
"epoch": 0.08,
"eval_validation_agenda_digitale_loss": 1.4312654733657837,
"eval_validation_agenda_digitale_runtime": 187.1317,
"eval_validation_agenda_digitale_samples_per_second": 1.817,
"eval_validation_agenda_digitale_steps_per_second": 0.908,
"step": 1309
},
{
"epoch": 0.08,
"eval_validation_leggepertutti_loss": 1.270119547843933,
"eval_validation_leggepertutti_runtime": 35.7811,
"eval_validation_leggepertutti_samples_per_second": 1.817,
"eval_validation_leggepertutti_steps_per_second": 0.922,
"step": 1309
},
{
"epoch": 0.08,
"eval_validation_altalexprivacy_loss": 1.2772430181503296,
"eval_validation_altalexprivacy_runtime": 58.3202,
"eval_validation_altalexprivacy_samples_per_second": 1.818,
"eval_validation_altalexprivacy_steps_per_second": 0.909,
"step": 1309
},
{
"epoch": 0.08,
"eval_validation_mc4_it_loss": 1.8016338348388672,
"eval_validation_mc4_it_runtime": 78.1274,
"eval_validation_mc4_it_samples_per_second": 1.818,
"eval_validation_mc4_it_steps_per_second": 0.909,
"step": 1309
},
{
"epoch": 0.08,
"eval_validation_dolma_loss": 1.967103123664856,
"eval_validation_dolma_runtime": 53.9496,
"eval_validation_dolma_samples_per_second": 1.817,
"eval_validation_dolma_steps_per_second": 0.908,
"step": 1309
},
{
"epoch": 0.09,
"grad_norm": 3.40625,
"learning_rate": 1e-05,
"loss": 1.6352,
"step": 1496
},
{
"epoch": 0.09,
"eval_validation_privacy_sources_loss": 0.904914140701294,
"eval_validation_privacy_sources_runtime": 210.817,
"eval_validation_privacy_sources_samples_per_second": 1.817,
"eval_validation_privacy_sources_steps_per_second": 0.911,
"step": 1496
},
{
"epoch": 0.09,
"eval_validation_agenda_digitale_loss": 1.4257241487503052,
"eval_validation_agenda_digitale_runtime": 187.1239,
"eval_validation_agenda_digitale_samples_per_second": 1.817,
"eval_validation_agenda_digitale_steps_per_second": 0.908,
"step": 1496
},
{
"epoch": 0.09,
"eval_validation_leggepertutti_loss": 1.2642766237258911,
"eval_validation_leggepertutti_runtime": 35.7971,
"eval_validation_leggepertutti_samples_per_second": 1.816,
"eval_validation_leggepertutti_steps_per_second": 0.922,
"step": 1496
},
{
"epoch": 0.09,
"eval_validation_altalexprivacy_loss": 1.2678091526031494,
"eval_validation_altalexprivacy_runtime": 58.3563,
"eval_validation_altalexprivacy_samples_per_second": 1.816,
"eval_validation_altalexprivacy_steps_per_second": 0.908,
"step": 1496
},
{
"epoch": 0.09,
"eval_validation_mc4_it_loss": 1.793602705001831,
"eval_validation_mc4_it_runtime": 78.1834,
"eval_validation_mc4_it_samples_per_second": 1.816,
"eval_validation_mc4_it_steps_per_second": 0.908,
"step": 1496
},
{
"epoch": 0.09,
"eval_validation_dolma_loss": 1.9663574695587158,
"eval_validation_dolma_runtime": 53.9397,
"eval_validation_dolma_samples_per_second": 1.817,
"eval_validation_dolma_steps_per_second": 0.908,
"step": 1496
}
],
"logging_steps": 187,
"max_steps": 32550,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 187,
"total_flos": 6.274280818772804e+18,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}