SynCABEL_QUAERO_MEDLINE / trainer_state.json
AnonymousARR42's picture
Upload trainer_state.json with huggingface_hub
105e6b7 verified
{
"best_global_step": 14000,
"best_metric": 0.5805,
"best_model_checkpoint": "/lustre/fsn1/projects/rech/ssq/usk98ia/expe_data_ratio/MEDLINE_full_upsampled_tfidf/Meta-Llama-3-8B-Instruct/checkpoint-14000",
"epoch": 3.0,
"eval_steps": 2000,
"global_step": 19056,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"entropy": 1.969329814195633,
"epoch": 0.3148614609571788,
"grad_norm": 4.3125,
"learning_rate": 2.7683942869508766e-05,
"loss": 0.5794,
"mean_token_accuracy": 0.8737598845213652,
"num_tokens": 15559602.0,
"step": 2000
},
{
"epoch": 0.3148614609571788,
"eval_entropy": 2.0184345861409336,
"eval_loss": 1.1819707155227661,
"eval_mean_token_accuracy": 0.8292109202718575,
"eval_num_gold": 298,
"eval_num_guess": 298,
"eval_num_tokens": 15559602.0,
"eval_recall": 0.5268,
"eval_runtime": 5.3876,
"eval_samples_per_second": 55.312,
"eval_steps_per_second": 55.312,
"step": 2000
},
{
"entropy": 1.9614297151565552,
"epoch": 0.6297229219143576,
"grad_norm": 0.8828125,
"learning_rate": 2.4437892231118805e-05,
"loss": 0.3147,
"mean_token_accuracy": 0.9210941952764988,
"num_tokens": 31152472.0,
"step": 4000
},
{
"epoch": 0.6297229219143576,
"eval_entropy": 1.9975530157153238,
"eval_loss": 1.255624532699585,
"eval_mean_token_accuracy": 0.8376039716621373,
"eval_num_gold": 298,
"eval_num_guess": 298,
"eval_num_tokens": 31152472.0,
"eval_recall": 0.557,
"eval_runtime": 5.1227,
"eval_samples_per_second": 58.173,
"eval_steps_per_second": 58.173,
"step": 4000
},
{
"entropy": 1.8552323149442673,
"epoch": 0.9445843828715366,
"grad_norm": 5.84375,
"learning_rate": 2.1191841592728847e-05,
"loss": 0.2448,
"mean_token_accuracy": 0.9366229450702668,
"num_tokens": 46755525.0,
"step": 6000
},
{
"epoch": 0.9445843828715366,
"eval_entropy": 1.7975720761206326,
"eval_loss": 1.4483306407928467,
"eval_mean_token_accuracy": 0.8389281419499609,
"eval_num_gold": 298,
"eval_num_guess": 298,
"eval_num_tokens": 46755525.0,
"eval_recall": 0.5537,
"eval_runtime": 5.1544,
"eval_samples_per_second": 57.815,
"eval_steps_per_second": 57.815,
"step": 6000
},
{
"entropy": 1.5045488215088845,
"epoch": 1.2594458438287153,
"grad_norm": 6.25,
"learning_rate": 1.794579095433889e-05,
"loss": 0.1197,
"mean_token_accuracy": 0.9654132520854473,
"num_tokens": 62322213.0,
"step": 8000
},
{
"epoch": 1.2594458438287153,
"eval_entropy": 1.6012752000117463,
"eval_loss": 1.4600104093551636,
"eval_mean_token_accuracy": 0.8446898946426059,
"eval_num_gold": 298,
"eval_num_guess": 298,
"eval_num_tokens": 62322213.0,
"eval_recall": 0.5638,
"eval_runtime": 5.1609,
"eval_samples_per_second": 57.742,
"eval_steps_per_second": 57.742,
"step": 8000
},
{
"entropy": 1.3820615062713624,
"epoch": 1.5743073047858942,
"grad_norm": 2.734375,
"learning_rate": 1.469974031594893e-05,
"loss": 0.0943,
"mean_token_accuracy": 0.9713528198897838,
"num_tokens": 77897553.0,
"step": 10000
},
{
"epoch": 1.5743073047858942,
"eval_entropy": 1.4945057386519924,
"eval_loss": 1.4899256229400635,
"eval_mean_token_accuracy": 0.8435980430585426,
"eval_num_gold": 298,
"eval_num_guess": 298,
"eval_num_tokens": 77897553.0,
"eval_recall": 0.5638,
"eval_runtime": 5.1773,
"eval_samples_per_second": 57.558,
"eval_steps_per_second": 57.558,
"step": 10000
},
{
"entropy": 1.3741806761026383,
"epoch": 1.8891687657430731,
"grad_norm": 4.65625,
"learning_rate": 1.1453689677558971e-05,
"loss": 0.0877,
"mean_token_accuracy": 0.9735741688013076,
"num_tokens": 93526033.0,
"step": 12000
},
{
"epoch": 1.8891687657430731,
"eval_entropy": 1.4978553134722998,
"eval_loss": 1.519768476486206,
"eval_mean_token_accuracy": 0.8461721192470333,
"eval_num_gold": 298,
"eval_num_guess": 298,
"eval_num_tokens": 93526033.0,
"eval_recall": 0.5772,
"eval_runtime": 5.1186,
"eval_samples_per_second": 58.219,
"eval_steps_per_second": 58.219,
"step": 12000
},
{
"entropy": 1.3528405362963676,
"epoch": 2.204030226700252,
"grad_norm": 2.15625,
"learning_rate": 8.207639039169012e-06,
"loss": 0.0503,
"mean_token_accuracy": 0.9846821752786636,
"num_tokens": 109130930.0,
"step": 14000
},
{
"epoch": 2.204030226700252,
"eval_entropy": 1.4673509179745745,
"eval_loss": 1.6728711128234863,
"eval_mean_token_accuracy": 0.8439136392518178,
"eval_num_gold": 298,
"eval_num_guess": 298,
"eval_num_tokens": 109130930.0,
"eval_recall": 0.5805,
"eval_runtime": 5.1395,
"eval_samples_per_second": 57.982,
"eval_steps_per_second": 57.982,
"step": 14000
},
{
"entropy": 1.3303052598834038,
"epoch": 2.5188916876574305,
"grad_norm": 2.25,
"learning_rate": 4.961588400779053e-06,
"loss": 0.0296,
"mean_token_accuracy": 0.9909585509300232,
"num_tokens": 124740845.0,
"step": 16000
},
{
"epoch": 2.5188916876574305,
"eval_entropy": 1.4869463125731321,
"eval_loss": 1.6893718242645264,
"eval_mean_token_accuracy": 0.8412250805421163,
"eval_num_gold": 298,
"eval_num_guess": 298,
"eval_num_tokens": 124740845.0,
"eval_recall": 0.5705,
"eval_runtime": 5.1055,
"eval_samples_per_second": 58.368,
"eval_steps_per_second": 58.368,
"step": 16000
},
{
"entropy": 1.3306579428315162,
"epoch": 2.8337531486146097,
"grad_norm": 0.0537109375,
"learning_rate": 1.7155377623890934e-06,
"loss": 0.0295,
"mean_token_accuracy": 0.9908962353467942,
"num_tokens": 140357465.0,
"step": 18000
},
{
"epoch": 2.8337531486146097,
"eval_entropy": 1.4714410711054835,
"eval_loss": 1.6864608526229858,
"eval_mean_token_accuracy": 0.8417843647251193,
"eval_num_gold": 298,
"eval_num_guess": 298,
"eval_num_tokens": 140357465.0,
"eval_recall": 0.5738,
"eval_runtime": 5.2683,
"eval_samples_per_second": 56.564,
"eval_steps_per_second": 56.564,
"step": 18000
}
],
"logging_steps": 2000,
"max_steps": 19056,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 2000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 6.689385209600672e+18,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}