gui360-fullparam-sft-step250 / trainer_state.json
Stevenshuqing's picture
Upload trainer_state.json with huggingface_hub
c988e28 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.6821130676552363,
"eval_steps": 50,
"global_step": 250,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.14828544949026876,
"grad_norm": 3.0305959544719996,
"learning_rate": 6.4285714285714295e-06,
"loss": 0.662,
"step": 10
},
{
"epoch": 0.2965708989805375,
"grad_norm": 2.7485654160854565,
"learning_rate": 9.990735836893226e-06,
"loss": 0.1743,
"step": 20
},
{
"epoch": 0.4448563484708063,
"grad_norm": 1.1430530985128236,
"learning_rate": 9.91682838414733e-06,
"loss": 0.1469,
"step": 30
},
{
"epoch": 0.593141797961075,
"grad_norm": 1.2712538596934821,
"learning_rate": 9.770107968877004e-06,
"loss": 0.1355,
"step": 40
},
{
"epoch": 0.7414272474513438,
"grad_norm": 0.8986934874682488,
"learning_rate": 9.552747363297172e-06,
"loss": 0.1262,
"step": 50
},
{
"epoch": 0.7414272474513438,
"eval_loss": 0.12907367944717407,
"eval_runtime": 40.4419,
"eval_samples_per_second": 6.404,
"eval_steps_per_second": 0.42,
"step": 50
},
{
"epoch": 0.8897126969416126,
"grad_norm": 1.2078337800655208,
"learning_rate": 9.267965445186733e-06,
"loss": 0.1183,
"step": 60
},
{
"epoch": 1.0296570898980537,
"grad_norm": 0.7746195941421475,
"learning_rate": 8.919979529756008e-06,
"loss": 0.1103,
"step": 70
},
{
"epoch": 1.1779425393883225,
"grad_norm": 0.9315687365661568,
"learning_rate": 8.513942915725159e-06,
"loss": 0.0967,
"step": 80
},
{
"epoch": 1.3262279888785913,
"grad_norm": 0.9221104617429706,
"learning_rate": 8.055868570489247e-06,
"loss": 0.0921,
"step": 90
},
{
"epoch": 1.47451343836886,
"grad_norm": 0.8320239593764249,
"learning_rate": 7.552540084510896e-06,
"loss": 0.0936,
"step": 100
},
{
"epoch": 1.47451343836886,
"eval_loss": 0.10962820053100586,
"eval_runtime": 39.3838,
"eval_samples_per_second": 6.576,
"eval_steps_per_second": 0.432,
"step": 100
},
{
"epoch": 1.6227988878591288,
"grad_norm": 0.9524938303051578,
"learning_rate": 7.011411213610663e-06,
"loss": 0.0924,
"step": 110
},
{
"epoch": 1.7710843373493976,
"grad_norm": 0.8096971233176471,
"learning_rate": 6.440495496826189e-06,
"loss": 0.0926,
"step": 120
},
{
"epoch": 1.9193697868396664,
"grad_norm": 1.0892516777146075,
"learning_rate": 5.848247584481424e-06,
"loss": 0.0912,
"step": 130
},
{
"epoch": 2.0593141797961074,
"grad_norm": 0.6756500246813129,
"learning_rate": 5.243438033870126e-06,
"loss": 0.079,
"step": 140
},
{
"epoch": 2.2075996292863764,
"grad_norm": 0.6535243252037057,
"learning_rate": 4.635023426695462e-06,
"loss": 0.0649,
"step": 150
},
{
"epoch": 2.2075996292863764,
"eval_loss": 0.09530726075172424,
"eval_runtime": 39.7995,
"eval_samples_per_second": 6.508,
"eval_steps_per_second": 0.427,
"step": 150
},
{
"epoch": 2.355885078776645,
"grad_norm": 0.7206259212079309,
"learning_rate": 4.032013731687351e-06,
"loss": 0.0662,
"step": 160
},
{
"epoch": 2.504170528266914,
"grad_norm": 0.6535997772962888,
"learning_rate": 3.443338876615092e-06,
"loss": 0.0631,
"step": 170
},
{
"epoch": 2.6524559777571826,
"grad_norm": 0.6645747980854602,
"learning_rate": 2.8777165056209256e-06,
"loss": 0.0654,
"step": 180
},
{
"epoch": 2.800741427247451,
"grad_norm": 0.5993925035499262,
"learning_rate": 2.343522880246734e-06,
"loss": 0.0615,
"step": 190
},
{
"epoch": 2.94902687673772,
"grad_norm": 1.1938299764947649,
"learning_rate": 1.8486688359714567e-06,
"loss": 0.0582,
"step": 200
},
{
"epoch": 2.94902687673772,
"eval_loss": 0.09135068207979202,
"eval_runtime": 40.159,
"eval_samples_per_second": 6.449,
"eval_steps_per_second": 0.423,
"step": 200
},
{
"epoch": 3.088971269694161,
"grad_norm": 0.9541639227633548,
"learning_rate": 1.4004826312100218e-06,
"loss": 0.0474,
"step": 210
},
{
"epoch": 3.23725671918443,
"grad_norm": 0.6148134757370654,
"learning_rate": 1.0056014236546647e-06,
"loss": 0.0403,
"step": 220
},
{
"epoch": 3.3855421686746987,
"grad_norm": 0.7706323260120672,
"learning_rate": 6.698729810778065e-07,
"loss": 0.0399,
"step": 230
},
{
"epoch": 3.5338276181649677,
"grad_norm": 0.7014591388584486,
"learning_rate": 3.9826908215420344e-07,
"loss": 0.0377,
"step": 240
},
{
"epoch": 3.6821130676552363,
"grad_norm": 0.8460829330545679,
"learning_rate": 1.9481188974346698e-07,
"loss": 0.0379,
"step": 250
},
{
"epoch": 3.6821130676552363,
"eval_loss": 0.0978400707244873,
"eval_runtime": 39.5311,
"eval_samples_per_second": 6.552,
"eval_steps_per_second": 0.43,
"step": 250
}
],
"logging_steps": 10,
"max_steps": 272,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 50,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 912772139319296.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}