phi3-mini-clustered-flan / ts_expert_3 /trainer_state.json
TahaBa's picture
Upload folder using huggingface_hub
6956723 verified
{
"best_metric": 1.7059074640274048,
"best_model_checkpoint": "./results/cluster3_batch1_prop0.2/checkpoint-2000",
"epoch": 0.9998175134909669,
"eval_steps": 500,
"global_step": 2397,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0417112020647045,
"grad_norm": 0.6617676019668579,
"learning_rate": 9.996421853676199e-05,
"loss": 1.9795,
"step": 100
},
{
"epoch": 0.083422404129409,
"grad_norm": 0.9614683985710144,
"learning_rate": 9.92540136395745e-05,
"loss": 1.8034,
"step": 200
},
{
"epoch": 0.1251336061941135,
"grad_norm": 1.0093903541564941,
"learning_rate": 9.764589508626579e-05,
"loss": 1.7261,
"step": 300
},
{
"epoch": 0.166844808258818,
"grad_norm": 0.7080962657928467,
"learning_rate": 9.516917930209698e-05,
"loss": 1.7056,
"step": 400
},
{
"epoch": 0.2085560103235225,
"grad_norm": 0.8058099150657654,
"learning_rate": 9.186901746888266e-05,
"loss": 1.7093,
"step": 500
},
{
"epoch": 0.2085560103235225,
"eval_loss": 1.7287803888320923,
"eval_runtime": 1120.1405,
"eval_samples_per_second": 4.354,
"eval_steps_per_second": 2.177,
"step": 500
},
{
"epoch": 0.250267212388227,
"grad_norm": 0.8433169722557068,
"learning_rate": 8.780557240704062e-05,
"loss": 1.6892,
"step": 600
},
{
"epoch": 0.2919784144529315,
"grad_norm": 0.8156673908233643,
"learning_rate": 8.305292179151174e-05,
"loss": 1.6795,
"step": 700
},
{
"epoch": 0.333689616517636,
"grad_norm": 1.0388058423995972,
"learning_rate": 7.769770769621275e-05,
"loss": 1.6765,
"step": 800
},
{
"epoch": 0.37540081858234053,
"grad_norm": 0.9966904520988464,
"learning_rate": 7.183755708618267e-05,
"loss": 1.6632,
"step": 900
},
{
"epoch": 0.417112020647045,
"grad_norm": 1.0595520734786987,
"learning_rate": 6.557930205226752e-05,
"loss": 1.662,
"step": 1000
},
{
"epoch": 0.417112020647045,
"eval_loss": 1.7165497541427612,
"eval_runtime": 1121.1862,
"eval_samples_per_second": 4.35,
"eval_steps_per_second": 2.175,
"step": 1000
},
{
"epoch": 0.4588232227117495,
"grad_norm": 1.003791093826294,
"learning_rate": 5.903703223393429e-05,
"loss": 1.6331,
"step": 1100
},
{
"epoch": 0.500534424776454,
"grad_norm": 1.7693170309066772,
"learning_rate": 5.2330014935059945e-05,
"loss": 1.6673,
"step": 1200
},
{
"epoch": 0.5422456268411585,
"grad_norm": 1.3780925273895264,
"learning_rate": 4.558052084953292e-05,
"loss": 1.6418,
"step": 1300
},
{
"epoch": 0.583956828905863,
"grad_norm": 1.0700241327285767,
"learning_rate": 3.891159503426274e-05,
"loss": 1.6426,
"step": 1400
},
{
"epoch": 0.6256680309705676,
"grad_norm": 1.2618963718414307,
"learning_rate": 3.244481376534764e-05,
"loss": 1.6575,
"step": 1500
},
{
"epoch": 0.6256680309705676,
"eval_loss": 1.7095658779144287,
"eval_runtime": 1120.2118,
"eval_samples_per_second": 4.354,
"eval_steps_per_second": 2.177,
"step": 1500
},
{
"epoch": 0.667379233035272,
"grad_norm": 1.447568655014038,
"learning_rate": 2.6298068170503566e-05,
"loss": 1.6475,
"step": 1600
},
{
"epoch": 0.7090904350999765,
"grad_norm": 1.2349563837051392,
"learning_rate": 2.0583415042720094e-05,
"loss": 1.6533,
"step": 1700
},
{
"epoch": 0.7508016371646811,
"grad_norm": 1.3562304973602295,
"learning_rate": 1.5405034015376557e-05,
"loss": 1.6364,
"step": 1800
},
{
"epoch": 0.7925128392293855,
"grad_norm": 2.0301923751831055,
"learning_rate": 1.0857328340055205e-05,
"loss": 1.6443,
"step": 1900
},
{
"epoch": 0.83422404129409,
"grad_norm": 1.3831614255905151,
"learning_rate": 7.023203890372182e-06,
"loss": 1.6349,
"step": 2000
},
{
"epoch": 0.83422404129409,
"eval_loss": 1.7059074640274048,
"eval_runtime": 1120.6584,
"eval_samples_per_second": 4.352,
"eval_steps_per_second": 2.176,
"step": 2000
},
{
"epoch": 0.8759352433587946,
"grad_norm": 1.450674057006836,
"learning_rate": 3.972557766040636e-06,
"loss": 1.6622,
"step": 2100
},
{
"epoch": 0.917646445423499,
"grad_norm": 0.9268587827682495,
"learning_rate": 1.7610040503122649e-06,
"loss": 1.6419,
"step": 2200
},
{
"epoch": 0.9593576474882035,
"grad_norm": 1.4191687107086182,
"learning_rate": 4.288599505750612e-07,
"loss": 1.6441,
"step": 2300
}
],
"logging_steps": 100,
"max_steps": 2397,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 4.869083564909568e+17,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}