MNLP_M2_rag_model / checkpoint-16446 /trainer_state.json
qchapp's picture
Upload folder using huggingface_hub
b3b040c verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 16446,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.09120758847136082,
"grad_norm": 2.8996551036834717,
"learning_rate": 4.849507479022255e-05,
"loss": 0.8414,
"step": 500
},
{
"epoch": 0.09120758847136082,
"eval_loss": 0.8034613132476807,
"eval_runtime": 67.9143,
"eval_samples_per_second": 35.883,
"eval_steps_per_second": 4.491,
"step": 500
},
{
"epoch": 0.18241517694272164,
"grad_norm": 3.896031141281128,
"learning_rate": 4.697494831569987e-05,
"loss": 0.8357,
"step": 1000
},
{
"epoch": 0.18241517694272164,
"eval_loss": 0.7636010050773621,
"eval_runtime": 66.4597,
"eval_samples_per_second": 36.669,
"eval_steps_per_second": 4.589,
"step": 1000
},
{
"epoch": 0.2736227654140824,
"grad_norm": 3.0594074726104736,
"learning_rate": 4.5454821841177186e-05,
"loss": 0.784,
"step": 1500
},
{
"epoch": 0.2736227654140824,
"eval_loss": 0.7404520511627197,
"eval_runtime": 66.4753,
"eval_samples_per_second": 36.66,
"eval_steps_per_second": 4.588,
"step": 1500
},
{
"epoch": 0.36483035388544327,
"grad_norm": 2.4666194915771484,
"learning_rate": 4.393469536665451e-05,
"loss": 0.7527,
"step": 2000
},
{
"epoch": 0.36483035388544327,
"eval_loss": 0.720551073551178,
"eval_runtime": 66.4761,
"eval_samples_per_second": 36.66,
"eval_steps_per_second": 4.588,
"step": 2000
},
{
"epoch": 0.45603794235680406,
"grad_norm": 3.1520893573760986,
"learning_rate": 4.2414568892131825e-05,
"loss": 0.7588,
"step": 2500
},
{
"epoch": 0.45603794235680406,
"eval_loss": 0.7047598958015442,
"eval_runtime": 66.6712,
"eval_samples_per_second": 36.552,
"eval_steps_per_second": 4.575,
"step": 2500
},
{
"epoch": 0.5472455308281649,
"grad_norm": 2.88120174407959,
"learning_rate": 4.0894442417609145e-05,
"loss": 0.7203,
"step": 3000
},
{
"epoch": 0.5472455308281649,
"eval_loss": 0.6884846091270447,
"eval_runtime": 66.5954,
"eval_samples_per_second": 36.594,
"eval_steps_per_second": 4.58,
"step": 3000
},
{
"epoch": 0.6384531192995258,
"grad_norm": 2.235811710357666,
"learning_rate": 3.9374315943086464e-05,
"loss": 0.7098,
"step": 3500
},
{
"epoch": 0.6384531192995258,
"eval_loss": 0.6729713082313538,
"eval_runtime": 66.5208,
"eval_samples_per_second": 36.635,
"eval_steps_per_second": 4.585,
"step": 3500
},
{
"epoch": 0.7296607077708865,
"grad_norm": 4.2170891761779785,
"learning_rate": 3.785418946856379e-05,
"loss": 0.6946,
"step": 4000
},
{
"epoch": 0.7296607077708865,
"eval_loss": 0.6604794263839722,
"eval_runtime": 66.5342,
"eval_samples_per_second": 36.628,
"eval_steps_per_second": 4.584,
"step": 4000
},
{
"epoch": 0.8208682962422473,
"grad_norm": 3.053722381591797,
"learning_rate": 3.633406299404111e-05,
"loss": 0.684,
"step": 4500
},
{
"epoch": 0.8208682962422473,
"eval_loss": 0.646191418170929,
"eval_runtime": 66.6938,
"eval_samples_per_second": 36.54,
"eval_steps_per_second": 4.573,
"step": 4500
},
{
"epoch": 0.9120758847136081,
"grad_norm": 2.7756240367889404,
"learning_rate": 3.481393651951842e-05,
"loss": 0.6772,
"step": 5000
},
{
"epoch": 0.9120758847136081,
"eval_loss": 0.6353339552879333,
"eval_runtime": 66.5122,
"eval_samples_per_second": 36.64,
"eval_steps_per_second": 4.586,
"step": 5000
},
{
"epoch": 1.003283473184969,
"grad_norm": 1.9400490522384644,
"learning_rate": 3.329381004499574e-05,
"loss": 0.6393,
"step": 5500
},
{
"epoch": 1.003283473184969,
"eval_loss": 0.643139123916626,
"eval_runtime": 66.5024,
"eval_samples_per_second": 36.645,
"eval_steps_per_second": 4.586,
"step": 5500
},
{
"epoch": 1.09449106165633,
"grad_norm": 2.2215569019317627,
"learning_rate": 3.177368357047307e-05,
"loss": 0.4513,
"step": 6000
},
{
"epoch": 1.09449106165633,
"eval_loss": 0.6397776007652283,
"eval_runtime": 66.4386,
"eval_samples_per_second": 36.68,
"eval_steps_per_second": 4.591,
"step": 6000
},
{
"epoch": 1.1856986501276907,
"grad_norm": 2.093801736831665,
"learning_rate": 3.0253557095950385e-05,
"loss": 0.4427,
"step": 6500
},
{
"epoch": 1.1856986501276907,
"eval_loss": 0.636920154094696,
"eval_runtime": 66.5515,
"eval_samples_per_second": 36.618,
"eval_steps_per_second": 4.583,
"step": 6500
},
{
"epoch": 1.2769062385990515,
"grad_norm": 4.663333892822266,
"learning_rate": 2.8733430621427704e-05,
"loss": 0.4468,
"step": 7000
},
{
"epoch": 1.2769062385990515,
"eval_loss": 0.6325265169143677,
"eval_runtime": 66.5785,
"eval_samples_per_second": 36.603,
"eval_steps_per_second": 4.581,
"step": 7000
},
{
"epoch": 1.3681138270704123,
"grad_norm": 3.2814857959747314,
"learning_rate": 2.721330414690502e-05,
"loss": 0.4555,
"step": 7500
},
{
"epoch": 1.3681138270704123,
"eval_loss": 0.6226893067359924,
"eval_runtime": 66.6251,
"eval_samples_per_second": 36.578,
"eval_steps_per_second": 4.578,
"step": 7500
},
{
"epoch": 1.459321415541773,
"grad_norm": 1.9859124422073364,
"learning_rate": 2.5693177672382347e-05,
"loss": 0.4306,
"step": 8000
},
{
"epoch": 1.459321415541773,
"eval_loss": 0.6232908368110657,
"eval_runtime": 66.5211,
"eval_samples_per_second": 36.635,
"eval_steps_per_second": 4.585,
"step": 8000
},
{
"epoch": 1.5505290040131339,
"grad_norm": 2.86844539642334,
"learning_rate": 2.4173051197859663e-05,
"loss": 0.4399,
"step": 8500
},
{
"epoch": 1.5505290040131339,
"eval_loss": 0.6131536960601807,
"eval_runtime": 66.4759,
"eval_samples_per_second": 36.66,
"eval_steps_per_second": 4.588,
"step": 8500
},
{
"epoch": 1.6417365924844947,
"grad_norm": 2.0596117973327637,
"learning_rate": 2.2652924723336982e-05,
"loss": 0.4357,
"step": 9000
},
{
"epoch": 1.6417365924844947,
"eval_loss": 0.6083381175994873,
"eval_runtime": 66.5319,
"eval_samples_per_second": 36.629,
"eval_steps_per_second": 4.584,
"step": 9000
},
{
"epoch": 1.7329441809558555,
"grad_norm": 2.2367780208587646,
"learning_rate": 2.1132798248814302e-05,
"loss": 0.4387,
"step": 9500
},
{
"epoch": 1.7329441809558555,
"eval_loss": 0.6063674688339233,
"eval_runtime": 66.5477,
"eval_samples_per_second": 36.62,
"eval_steps_per_second": 4.583,
"step": 9500
},
{
"epoch": 1.8241517694272162,
"grad_norm": 2.397143602371216,
"learning_rate": 1.961267177429162e-05,
"loss": 0.4353,
"step": 10000
},
{
"epoch": 1.8241517694272162,
"eval_loss": 0.5975276231765747,
"eval_runtime": 66.5784,
"eval_samples_per_second": 36.603,
"eval_steps_per_second": 4.581,
"step": 10000
},
{
"epoch": 1.915359357898577,
"grad_norm": 3.369065284729004,
"learning_rate": 1.8092545299768944e-05,
"loss": 0.4334,
"step": 10500
},
{
"epoch": 1.915359357898577,
"eval_loss": 0.5899476408958435,
"eval_runtime": 66.4613,
"eval_samples_per_second": 36.668,
"eval_steps_per_second": 4.589,
"step": 10500
},
{
"epoch": 2.006566946369938,
"grad_norm": 2.555560827255249,
"learning_rate": 1.657241882524626e-05,
"loss": 0.4188,
"step": 11000
},
{
"epoch": 2.006566946369938,
"eval_loss": 0.670684278011322,
"eval_runtime": 66.4465,
"eval_samples_per_second": 36.676,
"eval_steps_per_second": 4.59,
"step": 11000
},
{
"epoch": 2.0977745348412986,
"grad_norm": 3.1445837020874023,
"learning_rate": 1.5052292350723582e-05,
"loss": 0.2206,
"step": 11500
},
{
"epoch": 2.0977745348412986,
"eval_loss": 0.7085195183753967,
"eval_runtime": 66.4599,
"eval_samples_per_second": 36.669,
"eval_steps_per_second": 4.589,
"step": 11500
},
{
"epoch": 2.18898212331266,
"grad_norm": 1.821514368057251,
"learning_rate": 1.35321658762009e-05,
"loss": 0.2125,
"step": 12000
},
{
"epoch": 2.18898212331266,
"eval_loss": 0.7055649161338806,
"eval_runtime": 66.4182,
"eval_samples_per_second": 36.692,
"eval_steps_per_second": 4.592,
"step": 12000
},
{
"epoch": 2.28018971178402,
"grad_norm": 2.517010450363159,
"learning_rate": 1.2015079654627266e-05,
"loss": 0.2136,
"step": 12500
},
{
"epoch": 2.28018971178402,
"eval_loss": 0.7125562429428101,
"eval_runtime": 66.4822,
"eval_samples_per_second": 36.656,
"eval_steps_per_second": 4.588,
"step": 12500
},
{
"epoch": 2.3713973002553814,
"grad_norm": 2.654905319213867,
"learning_rate": 1.049799343305363e-05,
"loss": 0.2186,
"step": 13000
},
{
"epoch": 2.3713973002553814,
"eval_loss": 0.711520254611969,
"eval_runtime": 66.4793,
"eval_samples_per_second": 36.658,
"eval_steps_per_second": 4.588,
"step": 13000
},
{
"epoch": 2.462604888726742,
"grad_norm": 1.6538429260253906,
"learning_rate": 8.97786695853095e-06,
"loss": 0.2119,
"step": 13500
},
{
"epoch": 2.462604888726742,
"eval_loss": 0.7095320820808411,
"eval_runtime": 66.4626,
"eval_samples_per_second": 36.667,
"eval_steps_per_second": 4.589,
"step": 13500
},
{
"epoch": 2.553812477198103,
"grad_norm": 2.4926905632019043,
"learning_rate": 7.45774048400827e-06,
"loss": 0.2093,
"step": 14000
},
{
"epoch": 2.553812477198103,
"eval_loss": 0.7063737511634827,
"eval_runtime": 66.4346,
"eval_samples_per_second": 36.683,
"eval_steps_per_second": 4.591,
"step": 14000
},
{
"epoch": 2.645020065669464,
"grad_norm": 2.814349412918091,
"learning_rate": 5.93761400948559e-06,
"loss": 0.203,
"step": 14500
},
{
"epoch": 2.645020065669464,
"eval_loss": 0.7055577635765076,
"eval_runtime": 66.5157,
"eval_samples_per_second": 36.638,
"eval_steps_per_second": 4.585,
"step": 14500
},
{
"epoch": 2.7362276541408246,
"grad_norm": 2.242487907409668,
"learning_rate": 4.417487534962909e-06,
"loss": 0.2019,
"step": 15000
},
{
"epoch": 2.7362276541408246,
"eval_loss": 0.7038553357124329,
"eval_runtime": 66.6171,
"eval_samples_per_second": 36.582,
"eval_steps_per_second": 4.578,
"step": 15000
},
{
"epoch": 2.8274352426121854,
"grad_norm": 3.0727193355560303,
"learning_rate": 2.8973610604402286e-06,
"loss": 0.2,
"step": 15500
},
{
"epoch": 2.8274352426121854,
"eval_loss": 0.7052037119865417,
"eval_runtime": 66.5239,
"eval_samples_per_second": 36.633,
"eval_steps_per_second": 4.585,
"step": 15500
},
{
"epoch": 2.918642831083546,
"grad_norm": 2.016516923904419,
"learning_rate": 1.3772345859175483e-06,
"loss": 0.2011,
"step": 16000
},
{
"epoch": 2.918642831083546,
"eval_loss": 0.7026786804199219,
"eval_runtime": 66.488,
"eval_samples_per_second": 36.653,
"eval_steps_per_second": 4.587,
"step": 16000
}
],
"logging_steps": 500,
"max_steps": 16446,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 8.901338457754829e+16,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}