klora_2000_skill / 124 /trainer_state.json

Upload folder using huggingface_hub

1c34470 verified 8 months ago

6.8 kB

	{
	"best_global_step": null,
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 1.0,
	"eval_steps": 500,
	"global_step": 178,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.028089887640449437,
	"grad_norm": 1.2830733060836792,
	"learning_rate": 2.666666666666667e-06,
	"loss": 1.3035,
	"step": 5
	},
	{
	"epoch": 0.056179775280898875,
	"grad_norm": 0.6879112124443054,
	"learning_rate": 6e-06,
	"loss": 1.4105,
	"step": 10
	},
	{
	"epoch": 0.08426966292134831,
	"grad_norm": 0.5197162628173828,
	"learning_rate": 9.333333333333334e-06,
	"loss": 1.3754,
	"step": 15
	},
	{
	"epoch": 0.11235955056179775,
	"grad_norm": 0.5167145133018494,
	"learning_rate": 1.2666666666666667e-05,
	"loss": 1.3014,
	"step": 20
	},
	{
	"epoch": 0.1404494382022472,
	"grad_norm": 0.5114685297012329,
	"learning_rate": 1.6e-05,
	"loss": 1.3148,
	"step": 25
	},
	{
	"epoch": 0.16853932584269662,
	"grad_norm": 0.542929470539093,
	"learning_rate": 1.9333333333333333e-05,
	"loss": 1.2024,
	"step": 30
	},
	{
	"epoch": 0.19662921348314608,
	"grad_norm": 0.47515320777893066,
	"learning_rate": 2.2666666666666668e-05,
	"loss": 1.2537,
	"step": 35
	},
	{
	"epoch": 0.2247191011235955,
	"grad_norm": 0.4169822633266449,
	"learning_rate": 2.6000000000000002e-05,
	"loss": 1.2063,
	"step": 40
	},
	{
	"epoch": 0.25280898876404495,
	"grad_norm": 0.42015039920806885,
	"learning_rate": 2.9333333333333333e-05,
	"loss": 1.1947,
	"step": 45
	},
	{
	"epoch": 0.2808988764044944,
	"grad_norm": 0.5036826133728027,
	"learning_rate": 2.9998341331605073e-05,
	"loss": 1.1333,
	"step": 50
	},
	{
	"epoch": 0.3089887640449438,
	"grad_norm": 0.47650378942489624,
	"learning_rate": 2.9991603619933566e-05,
	"loss": 1.1118,
	"step": 55
	},
	{
	"epoch": 0.33707865168539325,
	"grad_norm": 0.5294891595840454,
	"learning_rate": 2.99796855246516e-05,
	"loss": 1.0854,
	"step": 60
	},
	{
	"epoch": 0.3651685393258427,
	"grad_norm": 0.4981288015842438,
	"learning_rate": 2.9962591164084806e-05,
	"loss": 1.0895,
	"step": 65
	},
	{
	"epoch": 0.39325842696629215,
	"grad_norm": 0.6262781620025635,
	"learning_rate": 2.9940326445229367e-05,
	"loss": 1.0693,
	"step": 70
	},
	{
	"epoch": 0.42134831460674155,
	"grad_norm": 0.6170726418495178,
	"learning_rate": 2.991289906171083e-05,
	"loss": 1.0707,
	"step": 75
	},
	{
	"epoch": 0.449438202247191,
	"grad_norm": 0.4974953830242157,
	"learning_rate": 2.9880318491125568e-05,
	"loss": 1.0228,
	"step": 80
	},
	{
	"epoch": 0.47752808988764045,
	"grad_norm": 0.6400043964385986,
	"learning_rate": 2.9842595991765766e-05,
	"loss": 1.0408,
	"step": 85
	},
	{
	"epoch": 0.5056179775280899,
	"grad_norm": 0.6472094058990479,
	"learning_rate": 2.9799744598729097e-05,
	"loss": 1.0381,
	"step": 90
	},
	{
	"epoch": 0.5337078651685393,
	"grad_norm": 0.6478850245475769,
	"learning_rate": 2.97517791194144e-05,
	"loss": 0.9523,
	"step": 95
	},
	{
	"epoch": 0.5617977528089888,
	"grad_norm": 0.6548949480056763,
	"learning_rate": 2.9698716128404985e-05,
	"loss": 0.9321,
	"step": 100
	},
	{
	"epoch": 0.5898876404494382,
	"grad_norm": 0.7128049731254578,
	"learning_rate": 2.964057396174119e-05,
	"loss": 0.9353,
	"step": 105
	},
	{
	"epoch": 0.6179775280898876,
	"grad_norm": 0.7917237281799316,
	"learning_rate": 2.9577372710584375e-05,
	"loss": 0.9522,
	"step": 110
	},
	{
	"epoch": 0.6460674157303371,
	"grad_norm": 0.7267556190490723,
	"learning_rate": 2.9509134214274343e-05,
	"loss": 0.8641,
	"step": 115
	},
	{
	"epoch": 0.6741573033707865,
	"grad_norm": 0.8102006912231445,
	"learning_rate": 2.9435882052782717e-05,
	"loss": 0.869,
	"step": 120
	},
	{
	"epoch": 0.702247191011236,
	"grad_norm": 0.740906298160553,
	"learning_rate": 2.935764153856481e-05,
	"loss": 0.8267,
	"step": 125
	},
	{
	"epoch": 0.7303370786516854,
	"grad_norm": 0.8049883246421814,
	"learning_rate": 2.927443970781287e-05,
	"loss": 0.8282,
	"step": 130
	},
	{
	"epoch": 0.7584269662921348,
	"grad_norm": 0.8820163607597351,
	"learning_rate": 2.918630531111365e-05,
	"loss": 0.8037,
	"step": 135
	},
	{
	"epoch": 0.7865168539325843,
	"grad_norm": 0.9033377766609192,
	"learning_rate": 2.9093268803513564e-05,
	"loss": 0.8585,
	"step": 140
	},
	{
	"epoch": 0.8146067415730337,
	"grad_norm": 0.9345238208770752,
	"learning_rate": 2.8995362333994906e-05,
	"loss": 0.8272,
	"step": 145
	},
	{
	"epoch": 0.8426966292134831,
	"grad_norm": 1.106722354888916,
	"learning_rate": 2.889261973436665e-05,
	"loss": 0.8008,
	"step": 150
	},
	{
	"epoch": 0.8707865168539326,
	"grad_norm": 0.9598326683044434,
	"learning_rate": 2.8785076507573825e-05,
	"loss": 0.7377,
	"step": 155
	},
	{
	"epoch": 0.898876404494382,
	"grad_norm": 0.873813271522522,
	"learning_rate": 2.8672769815429385e-05,
	"loss": 0.7478,
	"step": 160
	},
	{
	"epoch": 0.9269662921348315,
	"grad_norm": 0.90259850025177,
	"learning_rate": 2.855573846577284e-05,
	"loss": 0.7459,
	"step": 165
	},
	{
	"epoch": 0.9550561797752809,
	"grad_norm": 1.0914926528930664,
	"learning_rate": 2.843402289906013e-05,
	"loss": 0.7069,
	"step": 170
	},
	{
	"epoch": 0.9831460674157303,
	"grad_norm": 1.1495745182037354,
	"learning_rate": 2.8307665174389323e-05,
	"loss": 0.7393,
	"step": 175
	}
	],
	"logging_steps": 5,
	"max_steps": 890,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 5,
	"save_steps": 2000,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": false
	},
	"attributes": {}
	}
	},
	"total_flos": 2.5878203703964467e+17,
	"train_batch_size": 2,
	"trial_name": null,
	"trial_params": null
	}