ChessSLM-Nano / trainer_state.json

Upload folder using huggingface_hub

9387d36 verified 5 days ago

8.17 kB

	{
	"best_global_step": null,
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 3.4454976303317535,
	"eval_steps": 500,
	"global_step": 4000,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.08616975441619991,
	"grad_norm": 1.8284112215042114,
	"learning_rate": 1.98e-05,
	"loss": 5.544659423828125,
	"step": 100
	},
	{
	"epoch": 0.17233950883239982,
	"grad_norm": 0.5319015383720398,
	"learning_rate": 3.979999999999999e-05,
	"loss": 3.90488037109375,
	"step": 200
	},
	{
	"epoch": 0.25850926324859974,
	"grad_norm": 1.3977950811386108,
	"learning_rate": 5.98e-05,
	"loss": 3.39756103515625,
	"step": 300
	},
	{
	"epoch": 0.34467901766479964,
	"grad_norm": 1.9291573762893677,
	"learning_rate": 7.98e-05,
	"loss": 3.019991149902344,
	"step": 400
	},
	{
	"epoch": 0.4308487720809996,
	"grad_norm": 1.4095340967178345,
	"learning_rate": 9.979999999999999e-05,
	"loss": 2.815445861816406,
	"step": 500
	},
	{
	"epoch": 0.5170185264971995,
	"grad_norm": 3.2716641426086426,
	"learning_rate": 0.00011979999999999998,
	"loss": 2.6590045166015623,
	"step": 600
	},
	{
	"epoch": 0.6031882809133994,
	"grad_norm": 1.3838716745376587,
	"learning_rate": 0.00013979999999999998,
	"loss": 2.543310089111328,
	"step": 700
	},
	{
	"epoch": 0.6893580353295993,
	"grad_norm": 1.069161057472229,
	"learning_rate": 0.00015979999999999998,
	"loss": 2.396273651123047,
	"step": 800
	},
	{
	"epoch": 0.7755277897457993,
	"grad_norm": 0.8585665822029114,
	"learning_rate": 0.0001798,
	"loss": 2.242165985107422,
	"step": 900
	},
	{
	"epoch": 0.8616975441619992,
	"grad_norm": 0.7467069625854492,
	"learning_rate": 0.0001998,
	"loss": 2.1027012634277344,
	"step": 1000
	},
	{
	"epoch": 0.9478672985781991,
	"grad_norm": 0.5805935859680176,
	"learning_rate": 0.00021979999999999998,
	"loss": 2.037454376220703,
	"step": 1100
	},
	{
	"epoch": 1.033606204222318,
	"grad_norm": 0.5948718786239624,
	"learning_rate": 0.00023979999999999997,
	"loss": 1.9681085205078126,
	"step": 1200
	},
	{
	"epoch": 1.1197759586385179,
	"grad_norm": 0.5413378477096558,
	"learning_rate": 0.00025979999999999997,
	"loss": 1.9135774230957032,
	"step": 1300
	},
	{
	"epoch": 1.2059457130547178,
	"grad_norm": 0.5196030139923096,
	"learning_rate": 0.00027979999999999997,
	"loss": 1.8392716979980468,
	"step": 1400
	},
	{
	"epoch": 1.2921154674709177,
	"grad_norm": 0.49619364738464355,
	"learning_rate": 0.00029979999999999997,
	"loss": 1.8049734497070313,
	"step": 1500
	},
	{
	"epoch": 1.3782852218871176,
	"grad_norm": 0.44414839148521423,
	"learning_rate": 0.000299991068233357,
	"loss": 1.7638165283203124,
	"step": 1600
	},
	{
	"epoch": 1.4644549763033177,
	"grad_norm": 0.46444711089134216,
	"learning_rate": 0.0002999639122316208,
	"loss": 1.7137832641601562,
	"step": 1700
	},
	{
	"epoch": 1.5506247307195173,
	"grad_norm": 0.5176238417625427,
	"learning_rate": 0.0002999185343831476,
	"loss": 1.675589599609375,
	"step": 1800
	},
	{
	"epoch": 1.6367944851357175,
	"grad_norm": 0.4177858829498291,
	"learning_rate": 0.0002998549402017187,
	"loss": 1.6349491882324219,
	"step": 1900
	},
	{
	"epoch": 1.7229642395519171,
	"grad_norm": 0.42198434472084045,
	"learning_rate": 0.0002997731374145493,
	"loss": 1.596505126953125,
	"step": 2000
	},
	{
	"epoch": 1.8091339939681172,
	"grad_norm": 0.4523915946483612,
	"learning_rate": 0.0002996731359613498,
	"loss": 1.5908058166503907,
	"step": 2100
	},
	{
	"epoch": 1.8953037483843171,
	"grad_norm": 0.3901713788509369,
	"learning_rate": 0.0002995549479931178,
	"loss": 1.5610142517089844,
	"step": 2200
	},
	{
	"epoch": 1.981473502800517,
	"grad_norm": 0.41816478967666626,
	"learning_rate": 0.00029941858787066206,
	"loss": 1.5319706726074218,
	"step": 2300
	},
	{
	"epoch": 2.067212408444636,
	"grad_norm": 0.3872755765914917,
	"learning_rate": 0.00029926407216285706,
	"loss": 1.5055549621582032,
	"step": 2400
	},
	{
	"epoch": 2.1533821628608356,
	"grad_norm": 0.4193103611469269,
	"learning_rate": 0.0002990914196446301,
	"loss": 1.4792218017578125,
	"step": 2500
	},
	{
	"epoch": 2.2395519172770357,
	"grad_norm": 0.4024358093738556,
	"learning_rate": 0.00029890065129467986,
	"loss": 1.4786280822753906,
	"step": 2600
	},
	{
	"epoch": 2.325721671693236,
	"grad_norm": 0.37588468194007874,
	"learning_rate": 0.0002986917902929273,
	"loss": 1.4545697021484374,
	"step": 2700
	},
	{
	"epoch": 2.4118914261094355,
	"grad_norm": 0.39736974239349365,
	"learning_rate": 0.0002984648620176991,
	"loss": 1.4498170471191407,
	"step": 2800
	},
	{
	"epoch": 2.4980611805256356,
	"grad_norm": 0.42380592226982117,
	"learning_rate": 0.00029821989404264424,
	"loss": 1.4262150573730468,
	"step": 2900
	},
	{
	"epoch": 2.5842309349418353,
	"grad_norm": 0.411803662776947,
	"learning_rate": 0.00029795691613338307,
	"loss": 1.417086181640625,
	"step": 3000
	},
	{
	"epoch": 2.6704006893580354,
	"grad_norm": 0.3662901818752289,
	"learning_rate": 0.000297675960243891,
	"loss": 1.3942941284179688,
	"step": 3100
	},
	{
	"epoch": 2.756570443774235,
	"grad_norm": 0.3642771244049072,
	"learning_rate": 0.00029737706051261557,
	"loss": 1.38471923828125,
	"step": 3200
	},
	{
	"epoch": 2.842740198190435,
	"grad_norm": 0.4138600826263428,
	"learning_rate": 0.00029706025325832857,
	"loss": 1.3765927124023438,
	"step": 3300
	},
	{
	"epoch": 2.9289099526066353,
	"grad_norm": 0.3687536418437958,
	"learning_rate": 0.0002967255769757127,
	"loss": 1.3617820739746094,
	"step": 3400
	},
	{
	"epoch": 3.014648858250754,
	"grad_norm": 0.3252148926258087,
	"learning_rate": 0.0002963730723306845,
	"loss": 1.3490205383300782,
	"step": 3500
	},
	{
	"epoch": 3.100818612666954,
	"grad_norm": 0.3874260187149048,
	"learning_rate": 0.0002960027821554529,
	"loss": 1.3380169677734375,
	"step": 3600
	},
	{
	"epoch": 3.1869883670831536,
	"grad_norm": 0.37778887152671814,
	"learning_rate": 0.00029561475144331467,
	"loss": 1.3190237426757812,
	"step": 3700
	},
	{
	"epoch": 3.2731581214993537,
	"grad_norm": 0.37266016006469727,
	"learning_rate": 0.00029520902734318766,
	"loss": 1.313209991455078,
	"step": 3800
	},
	{
	"epoch": 3.359327875915554,
	"grad_norm": 0.3792646527290344,
	"learning_rate": 0.00029478565915388153,
	"loss": 1.3055996704101562,
	"step": 3900
	},
	{
	"epoch": 3.4454976303317535,
	"grad_norm": 0.3583495318889618,
	"learning_rate": 0.00029434469831810764,
	"loss": 1.301021728515625,
	"step": 4000
	}
	],
	"logging_steps": 100,
	"max_steps": 30000,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 26,
	"save_steps": 2000,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": false
	},
	"attributes": {}
	}
	},
	"total_flos": 1.9825523114901504e+16,
	"train_batch_size": 4,
	"trial_name": null,
	"trial_params": null
	}