phi-2-telecom-ft / trainer_state.json
dinho1597's picture
Subiendo modelo inicial
dd05793 verified
{
"best_metric": 0.9679633867276888,
"best_model_checkpoint": "/content/drive/MyDrive/Papers/RAG_3GPP/models/checkpoints/embedding/bge-small-telecom_10e_256bs/checkpoint-150",
"epoch": 6.857142857142857,
"eval_steps": 15,
"global_step": 150,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.7142857142857143,
"grad_norm": 1.681250810623169,
"learning_rate": 3.571428571428572e-05,
"loss": 0.824,
"step": 15
},
{
"epoch": 0.7142857142857143,
"eval_loss": 0.13330750167369843,
"eval_runtime": 3.6814,
"eval_samples_per_second": 356.115,
"eval_steps_per_second": 1.63,
"eval_telecom-ir-eval_cosine_accuracy@1": 0.9397406559877955,
"eval_telecom-ir-eval_cosine_accuracy@10": 0.9931350114416476,
"eval_telecom-ir-eval_cosine_accuracy@3": 0.9839816933638444,
"eval_telecom-ir-eval_cosine_accuracy@5": 0.9893211289092296,
"eval_telecom-ir-eval_cosine_map@100": 0.9625163452108533,
"eval_telecom-ir-eval_cosine_mrr@10": 0.9623769568849659,
"eval_telecom-ir-eval_cosine_ndcg@10": 0.9701258981216676,
"eval_telecom-ir-eval_cosine_precision@1": 0.9397406559877955,
"eval_telecom-ir-eval_cosine_recall@1": 0.9397406559877955,
"step": 15
},
{
"epoch": 1.380952380952381,
"grad_norm": 0.8189207315444946,
"learning_rate": 4.972077065562821e-05,
"loss": 0.1731,
"step": 30
},
{
"epoch": 1.380952380952381,
"eval_loss": 0.07593704760074615,
"eval_runtime": 4.0688,
"eval_samples_per_second": 322.209,
"eval_steps_per_second": 1.475,
"eval_telecom-ir-eval_cosine_accuracy@1": 0.9565217391304348,
"eval_telecom-ir-eval_cosine_accuracy@10": 0.9938977879481312,
"eval_telecom-ir-eval_cosine_accuracy@3": 0.9877955758962624,
"eval_telecom-ir-eval_cosine_accuracy@5": 0.9908466819221968,
"eval_telecom-ir-eval_cosine_map@100": 0.9723266300874301,
"eval_telecom-ir-eval_cosine_mrr@10": 0.9721883210441564,
"eval_telecom-ir-eval_cosine_ndcg@10": 0.9776352051817517,
"eval_telecom-ir-eval_cosine_precision@1": 0.9565217391304348,
"eval_telecom-ir-eval_cosine_recall@1": 0.9565217391304348,
"step": 30
},
{
"epoch": 2.0476190476190474,
"grad_norm": 0.7057574391365051,
"learning_rate": 4.803690529676019e-05,
"loss": 0.0917,
"step": 45
},
{
"epoch": 2.0476190476190474,
"eval_loss": 0.06566686183214188,
"eval_runtime": 3.7186,
"eval_samples_per_second": 352.553,
"eval_steps_per_second": 1.614,
"eval_telecom-ir-eval_cosine_accuracy@1": 0.9649122807017544,
"eval_telecom-ir-eval_cosine_accuracy@10": 0.9931350114416476,
"eval_telecom-ir-eval_cosine_accuracy@3": 0.9900839054157132,
"eval_telecom-ir-eval_cosine_accuracy@5": 0.9908466819221968,
"eval_telecom-ir-eval_cosine_map@100": 0.9768047979761636,
"eval_telecom-ir-eval_cosine_mrr@10": 0.9765700483091787,
"eval_telecom-ir-eval_cosine_ndcg@10": 0.9807364362901521,
"eval_telecom-ir-eval_cosine_precision@1": 0.9649122807017544,
"eval_telecom-ir-eval_cosine_recall@1": 0.9649122807017544,
"step": 45
},
{
"epoch": 2.761904761904762,
"grad_norm": 0.7498806118965149,
"learning_rate": 4.4928312680573064e-05,
"loss": 0.0676,
"step": 60
},
{
"epoch": 2.761904761904762,
"eval_loss": 0.06091764196753502,
"eval_runtime": 3.7927,
"eval_samples_per_second": 345.667,
"eval_steps_per_second": 1.582,
"eval_telecom-ir-eval_cosine_accuracy@1": 0.9641495041952708,
"eval_telecom-ir-eval_cosine_accuracy@10": 0.9931350114416476,
"eval_telecom-ir-eval_cosine_accuracy@3": 0.9916094584286804,
"eval_telecom-ir-eval_cosine_accuracy@5": 0.9916094584286804,
"eval_telecom-ir-eval_cosine_map@100": 0.977428148947981,
"eval_telecom-ir-eval_cosine_mrr@10": 0.9771802695143658,
"eval_telecom-ir-eval_cosine_ndcg@10": 0.9812569737659373,
"eval_telecom-ir-eval_cosine_precision@1": 0.9641495041952708,
"eval_telecom-ir-eval_cosine_recall@1": 0.9641495041952708,
"step": 60
},
{
"epoch": 3.4285714285714284,
"grad_norm": 0.48658156394958496,
"learning_rate": 4.058724504646834e-05,
"loss": 0.0435,
"step": 75
},
{
"epoch": 3.4285714285714284,
"eval_loss": 0.05956002324819565,
"eval_runtime": 4.2667,
"eval_samples_per_second": 307.261,
"eval_steps_per_second": 1.406,
"eval_telecom-ir-eval_cosine_accuracy@1": 0.9649122807017544,
"eval_telecom-ir-eval_cosine_accuracy@10": 0.9931350114416476,
"eval_telecom-ir-eval_cosine_accuracy@3": 0.9916094584286804,
"eval_telecom-ir-eval_cosine_accuracy@5": 0.9916094584286804,
"eval_telecom-ir-eval_cosine_map@100": 0.978052610298987,
"eval_telecom-ir-eval_cosine_mrr@10": 0.9778295376121463,
"eval_telecom-ir-eval_cosine_ndcg@10": 0.9817518617980646,
"eval_telecom-ir-eval_cosine_precision@1": 0.9649122807017544,
"eval_telecom-ir-eval_cosine_recall@1": 0.9649122807017544,
"step": 75
},
{
"epoch": 4.095238095238095,
"grad_norm": 0.4985809624195099,
"learning_rate": 3.5282177578265296e-05,
"loss": 0.038,
"step": 90
},
{
"epoch": 4.095238095238095,
"eval_loss": 0.060632411390542984,
"eval_runtime": 4.6488,
"eval_samples_per_second": 282.008,
"eval_steps_per_second": 1.291,
"eval_telecom-ir-eval_cosine_accuracy@1": 0.9649122807017544,
"eval_telecom-ir-eval_cosine_accuracy@10": 0.9931350114416476,
"eval_telecom-ir-eval_cosine_accuracy@3": 0.9916094584286804,
"eval_telecom-ir-eval_cosine_accuracy@5": 0.992372234935164,
"eval_telecom-ir-eval_cosine_map@100": 0.9775869566334031,
"eval_telecom-ir-eval_cosine_mrr@10": 0.9773646071700992,
"eval_telecom-ir-eval_cosine_ndcg@10": 0.9813932046352999,
"eval_telecom-ir-eval_cosine_precision@1": 0.9649122807017544,
"eval_telecom-ir-eval_cosine_recall@1": 0.9649122807017544,
"step": 90
},
{
"epoch": 4.809523809523809,
"grad_norm": 0.4105435609817505,
"learning_rate": 2.9341204441673266e-05,
"loss": 0.0332,
"step": 105
},
{
"epoch": 4.809523809523809,
"eval_loss": 0.05935605987906456,
"eval_runtime": 4.0644,
"eval_samples_per_second": 322.554,
"eval_steps_per_second": 1.476,
"eval_telecom-ir-eval_cosine_accuracy@1": 0.965675057208238,
"eval_telecom-ir-eval_cosine_accuracy@10": 0.9931350114416476,
"eval_telecom-ir-eval_cosine_accuracy@3": 0.9916094584286804,
"eval_telecom-ir-eval_cosine_accuracy@5": 0.992372234935164,
"eval_telecom-ir-eval_cosine_map@100": 0.9783638236659703,
"eval_telecom-ir-eval_cosine_mrr@10": 0.9781273836765828,
"eval_telecom-ir-eval_cosine_ndcg@10": 0.9819743331685896,
"eval_telecom-ir-eval_cosine_precision@1": 0.965675057208238,
"eval_telecom-ir-eval_cosine_recall@1": 0.965675057208238,
"step": 105
},
{
"epoch": 5.476190476190476,
"grad_norm": 0.468258261680603,
"learning_rate": 2.3131747660339394e-05,
"loss": 0.0269,
"step": 120
},
{
"epoch": 5.476190476190476,
"eval_loss": 0.060672808438539505,
"eval_runtime": 4.0797,
"eval_samples_per_second": 321.343,
"eval_steps_per_second": 1.471,
"eval_telecom-ir-eval_cosine_accuracy@1": 0.9664378337147216,
"eval_telecom-ir-eval_cosine_accuracy@10": 0.9931350114416476,
"eval_telecom-ir-eval_cosine_accuracy@3": 0.9908466819221968,
"eval_telecom-ir-eval_cosine_accuracy@5": 0.9916094584286804,
"eval_telecom-ir-eval_cosine_map@100": 0.9780891289133677,
"eval_telecom-ir-eval_cosine_mrr@10": 0.9778688871938299,
"eval_telecom-ir-eval_cosine_ndcg@10": 0.9817380288044749,
"eval_telecom-ir-eval_cosine_precision@1": 0.9664378337147216,
"eval_telecom-ir-eval_cosine_recall@1": 0.9664378337147216,
"step": 120
},
{
"epoch": 6.142857142857143,
"grad_norm": 0.192308709025383,
"learning_rate": 1.7037833743707892e-05,
"loss": 0.0219,
"step": 135
},
{
"epoch": 6.142857142857143,
"eval_loss": 0.06004022806882858,
"eval_runtime": 3.6988,
"eval_samples_per_second": 354.443,
"eval_steps_per_second": 1.622,
"eval_telecom-ir-eval_cosine_accuracy@1": 0.965675057208238,
"eval_telecom-ir-eval_cosine_accuracy@10": 0.9938977879481312,
"eval_telecom-ir-eval_cosine_accuracy@3": 0.9908466819221968,
"eval_telecom-ir-eval_cosine_accuracy@5": 0.9916094584286804,
"eval_telecom-ir-eval_cosine_map@100": 0.9779666698415427,
"eval_telecom-ir-eval_cosine_mrr@10": 0.9778095601322145,
"eval_telecom-ir-eval_cosine_ndcg@10": 0.9818676160795978,
"eval_telecom-ir-eval_cosine_precision@1": 0.965675057208238,
"eval_telecom-ir-eval_cosine_recall@1": 0.965675057208238,
"step": 135
},
{
"epoch": 6.857142857142857,
"grad_norm": 0.3330775499343872,
"learning_rate": 1.1436343403356017e-05,
"loss": 0.0244,
"step": 150
},
{
"epoch": 6.857142857142857,
"eval_loss": 0.05985964834690094,
"eval_runtime": 3.8386,
"eval_samples_per_second": 341.53,
"eval_steps_per_second": 1.563,
"eval_telecom-ir-eval_cosine_accuracy@1": 0.9679633867276888,
"eval_telecom-ir-eval_cosine_accuracy@10": 0.992372234935164,
"eval_telecom-ir-eval_cosine_accuracy@3": 0.9916094584286804,
"eval_telecom-ir-eval_cosine_accuracy@5": 0.9916094584286804,
"eval_telecom-ir-eval_cosine_map@100": 0.9791402442094453,
"eval_telecom-ir-eval_cosine_mrr@10": 0.9788647342995168,
"eval_telecom-ir-eval_cosine_ndcg@10": 0.9823240649953693,
"eval_telecom-ir-eval_cosine_precision@1": 0.9679633867276888,
"eval_telecom-ir-eval_cosine_recall@1": 0.9679633867276888,
"step": 150
}
],
"logging_steps": 15,
"max_steps": 210,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 15,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 256,
"trial_name": null,
"trial_params": null
}