ChessSLM-Nano / trainer_state.json
FlameF0X's picture
Upload folder using huggingface_hub
9387d36 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.4454976303317535,
"eval_steps": 500,
"global_step": 4000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.08616975441619991,
"grad_norm": 1.8284112215042114,
"learning_rate": 1.98e-05,
"loss": 5.544659423828125,
"step": 100
},
{
"epoch": 0.17233950883239982,
"grad_norm": 0.5319015383720398,
"learning_rate": 3.979999999999999e-05,
"loss": 3.90488037109375,
"step": 200
},
{
"epoch": 0.25850926324859974,
"grad_norm": 1.3977950811386108,
"learning_rate": 5.98e-05,
"loss": 3.39756103515625,
"step": 300
},
{
"epoch": 0.34467901766479964,
"grad_norm": 1.9291573762893677,
"learning_rate": 7.98e-05,
"loss": 3.019991149902344,
"step": 400
},
{
"epoch": 0.4308487720809996,
"grad_norm": 1.4095340967178345,
"learning_rate": 9.979999999999999e-05,
"loss": 2.815445861816406,
"step": 500
},
{
"epoch": 0.5170185264971995,
"grad_norm": 3.2716641426086426,
"learning_rate": 0.00011979999999999998,
"loss": 2.6590045166015623,
"step": 600
},
{
"epoch": 0.6031882809133994,
"grad_norm": 1.3838716745376587,
"learning_rate": 0.00013979999999999998,
"loss": 2.543310089111328,
"step": 700
},
{
"epoch": 0.6893580353295993,
"grad_norm": 1.069161057472229,
"learning_rate": 0.00015979999999999998,
"loss": 2.396273651123047,
"step": 800
},
{
"epoch": 0.7755277897457993,
"grad_norm": 0.8585665822029114,
"learning_rate": 0.0001798,
"loss": 2.242165985107422,
"step": 900
},
{
"epoch": 0.8616975441619992,
"grad_norm": 0.7467069625854492,
"learning_rate": 0.0001998,
"loss": 2.1027012634277344,
"step": 1000
},
{
"epoch": 0.9478672985781991,
"grad_norm": 0.5805935859680176,
"learning_rate": 0.00021979999999999998,
"loss": 2.037454376220703,
"step": 1100
},
{
"epoch": 1.033606204222318,
"grad_norm": 0.5948718786239624,
"learning_rate": 0.00023979999999999997,
"loss": 1.9681085205078126,
"step": 1200
},
{
"epoch": 1.1197759586385179,
"grad_norm": 0.5413378477096558,
"learning_rate": 0.00025979999999999997,
"loss": 1.9135774230957032,
"step": 1300
},
{
"epoch": 1.2059457130547178,
"grad_norm": 0.5196030139923096,
"learning_rate": 0.00027979999999999997,
"loss": 1.8392716979980468,
"step": 1400
},
{
"epoch": 1.2921154674709177,
"grad_norm": 0.49619364738464355,
"learning_rate": 0.00029979999999999997,
"loss": 1.8049734497070313,
"step": 1500
},
{
"epoch": 1.3782852218871176,
"grad_norm": 0.44414839148521423,
"learning_rate": 0.000299991068233357,
"loss": 1.7638165283203124,
"step": 1600
},
{
"epoch": 1.4644549763033177,
"grad_norm": 0.46444711089134216,
"learning_rate": 0.0002999639122316208,
"loss": 1.7137832641601562,
"step": 1700
},
{
"epoch": 1.5506247307195173,
"grad_norm": 0.5176238417625427,
"learning_rate": 0.0002999185343831476,
"loss": 1.675589599609375,
"step": 1800
},
{
"epoch": 1.6367944851357175,
"grad_norm": 0.4177858829498291,
"learning_rate": 0.0002998549402017187,
"loss": 1.6349491882324219,
"step": 1900
},
{
"epoch": 1.7229642395519171,
"grad_norm": 0.42198434472084045,
"learning_rate": 0.0002997731374145493,
"loss": 1.596505126953125,
"step": 2000
},
{
"epoch": 1.8091339939681172,
"grad_norm": 0.4523915946483612,
"learning_rate": 0.0002996731359613498,
"loss": 1.5908058166503907,
"step": 2100
},
{
"epoch": 1.8953037483843171,
"grad_norm": 0.3901713788509369,
"learning_rate": 0.0002995549479931178,
"loss": 1.5610142517089844,
"step": 2200
},
{
"epoch": 1.981473502800517,
"grad_norm": 0.41816478967666626,
"learning_rate": 0.00029941858787066206,
"loss": 1.5319706726074218,
"step": 2300
},
{
"epoch": 2.067212408444636,
"grad_norm": 0.3872755765914917,
"learning_rate": 0.00029926407216285706,
"loss": 1.5055549621582032,
"step": 2400
},
{
"epoch": 2.1533821628608356,
"grad_norm": 0.4193103611469269,
"learning_rate": 0.0002990914196446301,
"loss": 1.4792218017578125,
"step": 2500
},
{
"epoch": 2.2395519172770357,
"grad_norm": 0.4024358093738556,
"learning_rate": 0.00029890065129467986,
"loss": 1.4786280822753906,
"step": 2600
},
{
"epoch": 2.325721671693236,
"grad_norm": 0.37588468194007874,
"learning_rate": 0.0002986917902929273,
"loss": 1.4545697021484374,
"step": 2700
},
{
"epoch": 2.4118914261094355,
"grad_norm": 0.39736974239349365,
"learning_rate": 0.0002984648620176991,
"loss": 1.4498170471191407,
"step": 2800
},
{
"epoch": 2.4980611805256356,
"grad_norm": 0.42380592226982117,
"learning_rate": 0.00029821989404264424,
"loss": 1.4262150573730468,
"step": 2900
},
{
"epoch": 2.5842309349418353,
"grad_norm": 0.411803662776947,
"learning_rate": 0.00029795691613338307,
"loss": 1.417086181640625,
"step": 3000
},
{
"epoch": 2.6704006893580354,
"grad_norm": 0.3662901818752289,
"learning_rate": 0.000297675960243891,
"loss": 1.3942941284179688,
"step": 3100
},
{
"epoch": 2.756570443774235,
"grad_norm": 0.3642771244049072,
"learning_rate": 0.00029737706051261557,
"loss": 1.38471923828125,
"step": 3200
},
{
"epoch": 2.842740198190435,
"grad_norm": 0.4138600826263428,
"learning_rate": 0.00029706025325832857,
"loss": 1.3765927124023438,
"step": 3300
},
{
"epoch": 2.9289099526066353,
"grad_norm": 0.3687536418437958,
"learning_rate": 0.0002967255769757127,
"loss": 1.3617820739746094,
"step": 3400
},
{
"epoch": 3.014648858250754,
"grad_norm": 0.3252148926258087,
"learning_rate": 0.0002963730723306845,
"loss": 1.3490205383300782,
"step": 3500
},
{
"epoch": 3.100818612666954,
"grad_norm": 0.3874260187149048,
"learning_rate": 0.0002960027821554529,
"loss": 1.3380169677734375,
"step": 3600
},
{
"epoch": 3.1869883670831536,
"grad_norm": 0.37778887152671814,
"learning_rate": 0.00029561475144331467,
"loss": 1.3190237426757812,
"step": 3700
},
{
"epoch": 3.2731581214993537,
"grad_norm": 0.37266016006469727,
"learning_rate": 0.00029520902734318766,
"loss": 1.313209991455078,
"step": 3800
},
{
"epoch": 3.359327875915554,
"grad_norm": 0.3792646527290344,
"learning_rate": 0.00029478565915388153,
"loss": 1.3055996704101562,
"step": 3900
},
{
"epoch": 3.4454976303317535,
"grad_norm": 0.3583495318889618,
"learning_rate": 0.00029434469831810764,
"loss": 1.301021728515625,
"step": 4000
}
],
"logging_steps": 100,
"max_steps": 30000,
"num_input_tokens_seen": 0,
"num_train_epochs": 26,
"save_steps": 2000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.9825523114901504e+16,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}