test-lora-repo / checkpoint-210 /trainer_state.json
officeseiko's picture
Upload folder using huggingface_hub
5ecaacd verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9470124013528749,
"eval_steps": 10,
"global_step": 210,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.04509582863585118,
"grad_norm": 4.649589538574219,
"learning_rate": 1.173913043478261e-06,
"loss": 1.6576,
"step": 10
},
{
"epoch": 0.04509582863585118,
"eval_loss": 1.9276622533798218,
"eval_runtime": 13.2788,
"eval_samples_per_second": 14.083,
"eval_steps_per_second": 7.079,
"step": 10
},
{
"epoch": 0.09019165727170236,
"grad_norm": 3.7612740993499756,
"learning_rate": 2.4782608695652173e-06,
"loss": 1.6856,
"step": 20
},
{
"epoch": 0.09019165727170236,
"eval_loss": 1.8389543294906616,
"eval_runtime": 13.2051,
"eval_samples_per_second": 14.161,
"eval_steps_per_second": 7.118,
"step": 20
},
{
"epoch": 0.13528748590755355,
"grad_norm": 3.048597812652588,
"learning_rate": 2.993785263797297e-06,
"loss": 1.5539,
"step": 30
},
{
"epoch": 0.13528748590755355,
"eval_loss": 1.6339466571807861,
"eval_runtime": 13.2214,
"eval_samples_per_second": 14.144,
"eval_steps_per_second": 7.11,
"step": 30
},
{
"epoch": 0.18038331454340473,
"grad_norm": 1.3381422758102417,
"learning_rate": 2.9559926521749324e-06,
"loss": 1.6382,
"step": 40
},
{
"epoch": 0.18038331454340473,
"eval_loss": 1.478708028793335,
"eval_runtime": 13.3673,
"eval_samples_per_second": 13.989,
"eval_steps_per_second": 7.032,
"step": 40
},
{
"epoch": 0.2254791431792559,
"grad_norm": 0.7458074688911438,
"learning_rate": 2.8847278242602185e-06,
"loss": 1.2616,
"step": 50
},
{
"epoch": 0.2254791431792559,
"eval_loss": 1.407120943069458,
"eval_runtime": 13.2044,
"eval_samples_per_second": 14.162,
"eval_steps_per_second": 7.119,
"step": 50
},
{
"epoch": 0.2705749718151071,
"grad_norm": 0.6770926713943481,
"learning_rate": 2.7816291068197328e-06,
"loss": 1.2366,
"step": 60
},
{
"epoch": 0.2705749718151071,
"eval_loss": 1.3675240278244019,
"eval_runtime": 13.4556,
"eval_samples_per_second": 13.898,
"eval_steps_per_second": 6.986,
"step": 60
},
{
"epoch": 0.3156708004509583,
"grad_norm": 0.7203531265258789,
"learning_rate": 2.649066664678467e-06,
"loss": 1.3251,
"step": 70
},
{
"epoch": 0.3156708004509583,
"eval_loss": 1.3370201587677002,
"eval_runtime": 13.2298,
"eval_samples_per_second": 14.135,
"eval_steps_per_second": 7.105,
"step": 70
},
{
"epoch": 0.36076662908680945,
"grad_norm": 0.6693026423454285,
"learning_rate": 2.4900880123475463e-06,
"loss": 1.2595,
"step": 80
},
{
"epoch": 0.36076662908680945,
"eval_loss": 1.3134440183639526,
"eval_runtime": 13.3078,
"eval_samples_per_second": 14.052,
"eval_steps_per_second": 7.064,
"step": 80
},
{
"epoch": 0.40586245772266066,
"grad_norm": 0.5636974573135376,
"learning_rate": 2.3083479538733636e-06,
"loss": 1.0858,
"step": 90
},
{
"epoch": 0.40586245772266066,
"eval_loss": 1.2941912412643433,
"eval_runtime": 13.1999,
"eval_samples_per_second": 14.167,
"eval_steps_per_second": 7.121,
"step": 90
},
{
"epoch": 0.4509582863585118,
"grad_norm": 0.571853756904602,
"learning_rate": 2.108024561540242e-06,
"loss": 1.2219,
"step": 100
},
{
"epoch": 0.4509582863585118,
"eval_loss": 1.2784264087677002,
"eval_runtime": 13.24,
"eval_samples_per_second": 14.124,
"eval_steps_per_second": 7.1,
"step": 100
},
{
"epoch": 0.496054114994363,
"grad_norm": 0.5300303101539612,
"learning_rate": 1.8937231250153905e-06,
"loss": 1.1319,
"step": 110
},
{
"epoch": 0.496054114994363,
"eval_loss": 1.264504313468933,
"eval_runtime": 13.2023,
"eval_samples_per_second": 14.164,
"eval_steps_per_second": 7.12,
"step": 110
},
{
"epoch": 0.5411499436302142,
"grad_norm": 0.5525142550468445,
"learning_rate": 1.6703702790757123e-06,
"loss": 1.1702,
"step": 120
},
{
"epoch": 0.5411499436302142,
"eval_loss": 1.2530410289764404,
"eval_runtime": 13.2204,
"eval_samples_per_second": 14.145,
"eval_steps_per_second": 7.11,
"step": 120
},
{
"epoch": 0.5862457722660653,
"grad_norm": 0.3444017171859741,
"learning_rate": 1.4431007438433431e-06,
"loss": 1.1054,
"step": 130
},
{
"epoch": 0.5862457722660653,
"eval_loss": 1.2430415153503418,
"eval_runtime": 13.205,
"eval_samples_per_second": 14.161,
"eval_steps_per_second": 7.119,
"step": 130
},
{
"epoch": 0.6313416009019166,
"grad_norm": 0.5441855192184448,
"learning_rate": 1.2171392812898465e-06,
"loss": 1.1249,
"step": 140
},
{
"epoch": 0.6313416009019166,
"eval_loss": 1.2348647117614746,
"eval_runtime": 13.2648,
"eval_samples_per_second": 14.097,
"eval_steps_per_second": 7.086,
"step": 140
},
{
"epoch": 0.6764374295377678,
"grad_norm": 0.5227693915367126,
"learning_rate": 9.976805817435208e-07,
"loss": 1.1867,
"step": 150
},
{
"epoch": 0.6764374295377678,
"eval_loss": 1.2283574342727661,
"eval_runtime": 13.1435,
"eval_samples_per_second": 14.228,
"eval_steps_per_second": 7.152,
"step": 150
},
{
"epoch": 0.7215332581736189,
"grad_norm": 0.542244017124176,
"learning_rate": 7.89769841721999e-07,
"loss": 1.0513,
"step": 160
},
{
"epoch": 0.7215332581736189,
"eval_loss": 1.223202109336853,
"eval_runtime": 13.3114,
"eval_samples_per_second": 14.048,
"eval_steps_per_second": 7.062,
"step": 160
},
{
"epoch": 0.7666290868094702,
"grad_norm": 0.43865886330604553,
"learning_rate": 5.98186778519257e-07,
"loss": 1.1238,
"step": 170
},
{
"epoch": 0.7666290868094702,
"eval_loss": 1.2190639972686768,
"eval_runtime": 13.2662,
"eval_samples_per_second": 14.096,
"eval_steps_per_second": 7.086,
"step": 170
},
{
"epoch": 0.8117249154453213,
"grad_norm": 0.4573892056941986,
"learning_rate": 4.273357479676508e-07,
"loss": 1.1548,
"step": 180
},
{
"epoch": 0.8117249154453213,
"eval_loss": 1.2160392999649048,
"eval_runtime": 13.1667,
"eval_samples_per_second": 14.203,
"eval_steps_per_second": 7.139,
"step": 180
},
{
"epoch": 0.8568207440811725,
"grad_norm": 0.476138710975647,
"learning_rate": 2.8114449148802547e-07,
"loss": 1.2207,
"step": 190
},
{
"epoch": 0.8568207440811725,
"eval_loss": 1.2139577865600586,
"eval_runtime": 13.1987,
"eval_samples_per_second": 14.168,
"eval_steps_per_second": 7.122,
"step": 190
},
{
"epoch": 0.9019165727170236,
"grad_norm": 0.4513246715068817,
"learning_rate": 1.6297384015986928e-07,
"loss": 1.1339,
"step": 200
},
{
"epoch": 0.9019165727170236,
"eval_loss": 1.212600827217102,
"eval_runtime": 13.239,
"eval_samples_per_second": 14.125,
"eval_steps_per_second": 7.1,
"step": 200
},
{
"epoch": 0.9470124013528749,
"grad_norm": 0.4398881196975708,
"learning_rate": 7.554045164952123e-08,
"loss": 1.0351,
"step": 210
},
{
"epoch": 0.9470124013528749,
"eval_loss": 1.21186363697052,
"eval_runtime": 13.2875,
"eval_samples_per_second": 14.073,
"eval_steps_per_second": 7.074,
"step": 210
}
],
"logging_steps": 10,
"max_steps": 230,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 10,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 3.622155888280781e+16,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}