LexLLMv0.0.2_16kcxt / trainer_state.json
andrealexroom's picture
Upload folder using huggingface_hub
5367dd4 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.23360399750156152,
"eval_steps": 187,
"global_step": 935,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.00024984384759525296,
"grad_norm": 72.0,
"learning_rate": 2.5000000000000004e-07,
"loss": 2.0834,
"step": 1
},
{
"epoch": 0.046720799500312306,
"grad_norm": 0.58984375,
"learning_rate": 1e-05,
"loss": 1.51,
"step": 187
},
{
"epoch": 0.046720799500312306,
"eval_validation_loss": 1.2587465047836304,
"eval_validation_runtime": 107.1911,
"eval_validation_samples_per_second": 1.119,
"eval_validation_steps_per_second": 1.119,
"step": 187
},
{
"epoch": 0.046720799500312306,
"eval_validation_privacy_sources_loss": 1.2587465047836304,
"eval_validation_privacy_sources_runtime": 107.266,
"eval_validation_privacy_sources_samples_per_second": 1.119,
"eval_validation_privacy_sources_steps_per_second": 1.119,
"step": 187
},
{
"epoch": 0.046720799500312306,
"eval_validation_agenda_digitale_loss": 1.7502987384796143,
"eval_validation_agenda_digitale_runtime": 97.3699,
"eval_validation_agenda_digitale_samples_per_second": 1.119,
"eval_validation_agenda_digitale_steps_per_second": 1.119,
"step": 187
},
{
"epoch": 0.046720799500312306,
"eval_validation_legal_articles_loss": 1.6148390769958496,
"eval_validation_legal_articles_runtime": 155.4352,
"eval_validation_legal_articles_samples_per_second": 1.119,
"eval_validation_legal_articles_steps_per_second": 1.119,
"step": 187
},
{
"epoch": 0.046720799500312306,
"eval_validation_leggepertutti_loss": 1.400872826576233,
"eval_validation_leggepertutti_runtime": 15.1917,
"eval_validation_leggepertutti_samples_per_second": 1.119,
"eval_validation_leggepertutti_steps_per_second": 1.119,
"step": 187
},
{
"epoch": 0.09344159900062461,
"grad_norm": 0.58984375,
"learning_rate": 1e-05,
"loss": 1.4038,
"step": 374
},
{
"epoch": 0.09344159900062461,
"eval_validation_loss": 1.2273409366607666,
"eval_validation_runtime": 107.1014,
"eval_validation_samples_per_second": 1.12,
"eval_validation_steps_per_second": 1.12,
"step": 374
},
{
"epoch": 0.09344159900062461,
"eval_validation_privacy_sources_loss": 1.2273409366607666,
"eval_validation_privacy_sources_runtime": 107.0462,
"eval_validation_privacy_sources_samples_per_second": 1.121,
"eval_validation_privacy_sources_steps_per_second": 1.121,
"step": 374
},
{
"epoch": 0.09344159900062461,
"eval_validation_agenda_digitale_loss": 1.745036005973816,
"eval_validation_agenda_digitale_runtime": 97.2787,
"eval_validation_agenda_digitale_samples_per_second": 1.12,
"eval_validation_agenda_digitale_steps_per_second": 1.12,
"step": 374
},
{
"epoch": 0.09344159900062461,
"eval_validation_legal_articles_loss": 1.6085145473480225,
"eval_validation_legal_articles_runtime": 155.2206,
"eval_validation_legal_articles_samples_per_second": 1.121,
"eval_validation_legal_articles_steps_per_second": 1.121,
"step": 374
},
{
"epoch": 0.09344159900062461,
"eval_validation_leggepertutti_loss": 1.3941675424575806,
"eval_validation_leggepertutti_runtime": 15.1745,
"eval_validation_leggepertutti_samples_per_second": 1.12,
"eval_validation_leggepertutti_steps_per_second": 1.12,
"step": 374
},
{
"epoch": 0.14016239850093692,
"grad_norm": 0.53125,
"learning_rate": 1e-05,
"loss": 1.3853,
"step": 561
},
{
"epoch": 0.14016239850093692,
"eval_validation_loss": 1.2080044746398926,
"eval_validation_runtime": 107.2381,
"eval_validation_samples_per_second": 1.119,
"eval_validation_steps_per_second": 1.119,
"step": 561
},
{
"epoch": 0.14016239850093692,
"eval_validation_privacy_sources_loss": 1.2080044746398926,
"eval_validation_privacy_sources_runtime": 107.2154,
"eval_validation_privacy_sources_samples_per_second": 1.119,
"eval_validation_privacy_sources_steps_per_second": 1.119,
"step": 561
},
{
"epoch": 0.14016239850093692,
"eval_validation_agenda_digitale_loss": 1.7419259548187256,
"eval_validation_agenda_digitale_runtime": 97.4488,
"eval_validation_agenda_digitale_samples_per_second": 1.119,
"eval_validation_agenda_digitale_steps_per_second": 1.119,
"step": 561
},
{
"epoch": 0.14016239850093692,
"eval_validation_legal_articles_loss": 1.6046104431152344,
"eval_validation_legal_articles_runtime": 155.5238,
"eval_validation_legal_articles_samples_per_second": 1.119,
"eval_validation_legal_articles_steps_per_second": 1.119,
"step": 561
},
{
"epoch": 0.14016239850093692,
"eval_validation_leggepertutti_loss": 1.3902024030685425,
"eval_validation_leggepertutti_runtime": 15.1931,
"eval_validation_leggepertutti_samples_per_second": 1.119,
"eval_validation_leggepertutti_steps_per_second": 1.119,
"step": 561
},
{
"epoch": 0.18688319800124922,
"grad_norm": 0.58984375,
"learning_rate": 1e-05,
"loss": 1.3639,
"step": 748
},
{
"epoch": 0.18688319800124922,
"eval_validation_loss": 1.1931297779083252,
"eval_validation_runtime": 107.2136,
"eval_validation_samples_per_second": 1.119,
"eval_validation_steps_per_second": 1.119,
"step": 748
},
{
"epoch": 0.18688319800124922,
"eval_validation_privacy_sources_loss": 1.1931297779083252,
"eval_validation_privacy_sources_runtime": 107.2505,
"eval_validation_privacy_sources_samples_per_second": 1.119,
"eval_validation_privacy_sources_steps_per_second": 1.119,
"step": 748
},
{
"epoch": 0.18688319800124922,
"eval_validation_agenda_digitale_loss": 1.739527940750122,
"eval_validation_agenda_digitale_runtime": 97.3978,
"eval_validation_agenda_digitale_samples_per_second": 1.119,
"eval_validation_agenda_digitale_steps_per_second": 1.119,
"step": 748
},
{
"epoch": 0.18688319800124922,
"eval_validation_legal_articles_loss": 1.601611614227295,
"eval_validation_legal_articles_runtime": 155.4821,
"eval_validation_legal_articles_samples_per_second": 1.119,
"eval_validation_legal_articles_steps_per_second": 1.119,
"step": 748
},
{
"epoch": 0.18688319800124922,
"eval_validation_leggepertutti_loss": 1.3867729902267456,
"eval_validation_leggepertutti_runtime": 15.1827,
"eval_validation_leggepertutti_samples_per_second": 1.12,
"eval_validation_leggepertutti_steps_per_second": 1.12,
"step": 748
},
{
"epoch": 0.23360399750156152,
"grad_norm": 0.58203125,
"learning_rate": 1e-05,
"loss": 1.3565,
"step": 935
},
{
"epoch": 0.23360399750156152,
"eval_validation_loss": 1.182249903678894,
"eval_validation_runtime": 107.2517,
"eval_validation_samples_per_second": 1.119,
"eval_validation_steps_per_second": 1.119,
"step": 935
},
{
"epoch": 0.23360399750156152,
"eval_validation_privacy_sources_loss": 1.182249903678894,
"eval_validation_privacy_sources_runtime": 107.2175,
"eval_validation_privacy_sources_samples_per_second": 1.119,
"eval_validation_privacy_sources_steps_per_second": 1.119,
"step": 935
},
{
"epoch": 0.23360399750156152,
"eval_validation_agenda_digitale_loss": 1.7376903295516968,
"eval_validation_agenda_digitale_runtime": 97.43,
"eval_validation_agenda_digitale_samples_per_second": 1.119,
"eval_validation_agenda_digitale_steps_per_second": 1.119,
"step": 935
},
{
"epoch": 0.23360399750156152,
"eval_validation_legal_articles_loss": 1.5993213653564453,
"eval_validation_legal_articles_runtime": 155.5081,
"eval_validation_legal_articles_samples_per_second": 1.119,
"eval_validation_legal_articles_steps_per_second": 1.119,
"step": 935
},
{
"epoch": 0.23360399750156152,
"eval_validation_leggepertutti_loss": 1.3844820261001587,
"eval_validation_leggepertutti_runtime": 15.1978,
"eval_validation_leggepertutti_samples_per_second": 1.119,
"eval_validation_leggepertutti_steps_per_second": 1.119,
"step": 935
}
],
"logging_steps": 187,
"max_steps": 4002,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 187,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 4.1453283886025933e+18,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}