klora_2000_skill / 104 /trainer_state.json

Upload folder using huggingface_hub

fa02d5d verified 8 months ago

8.55 kB

	{
	"best_global_step": null,
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 1.0,
	"eval_steps": 500,
	"global_step": 229,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.02188183807439825,
	"grad_norm": 1.2056468725204468,
	"learning_rate": 2.068965517241379e-06,
	"loss": 1.3589,
	"step": 5
	},
	{
	"epoch": 0.0437636761487965,
	"grad_norm": 0.7383206486701965,
	"learning_rate": 4.655172413793104e-06,
	"loss": 1.3299,
	"step": 10
	},
	{
	"epoch": 0.06564551422319474,
	"grad_norm": 0.5696788430213928,
	"learning_rate": 7.241379310344828e-06,
	"loss": 1.3287,
	"step": 15
	},
	{
	"epoch": 0.087527352297593,
	"grad_norm": 0.4898790419101715,
	"learning_rate": 9.827586206896551e-06,
	"loss": 1.3286,
	"step": 20
	},
	{
	"epoch": 0.10940919037199125,
	"grad_norm": 0.6403371691703796,
	"learning_rate": 1.2413793103448277e-05,
	"loss": 1.3167,
	"step": 25
	},
	{
	"epoch": 0.13129102844638948,
	"grad_norm": 0.6023938655853271,
	"learning_rate": 1.5e-05,
	"loss": 1.3163,
	"step": 30
	},
	{
	"epoch": 0.15317286652078774,
	"grad_norm": 0.4848966598510742,
	"learning_rate": 1.7586206896551724e-05,
	"loss": 1.27,
	"step": 35
	},
	{
	"epoch": 0.175054704595186,
	"grad_norm": 0.6767584681510925,
	"learning_rate": 2.017241379310345e-05,
	"loss": 1.2358,
	"step": 40
	},
	{
	"epoch": 0.19693654266958424,
	"grad_norm": 0.42833346128463745,
	"learning_rate": 2.275862068965517e-05,
	"loss": 1.2356,
	"step": 45
	},
	{
	"epoch": 0.2188183807439825,
	"grad_norm": 0.5033416152000427,
	"learning_rate": 2.5344827586206897e-05,
	"loss": 1.1764,
	"step": 50
	},
	{
	"epoch": 0.24070021881838075,
	"grad_norm": 0.5110407471656799,
	"learning_rate": 2.793103448275862e-05,
	"loss": 1.1782,
	"step": 55
	},
	{
	"epoch": 0.26258205689277897,
	"grad_norm": 0.5004281997680664,
	"learning_rate": 2.9999937352806748e-05,
	"loss": 1.2135,
	"step": 60
	},
	{
	"epoch": 0.2844638949671772,
	"grad_norm": 0.5905261039733887,
	"learning_rate": 2.9997744755987852e-05,
	"loss": 1.1598,
	"step": 65
	},
	{
	"epoch": 0.3063457330415755,
	"grad_norm": 0.46601855754852295,
	"learning_rate": 2.999242032277618e-05,
	"loss": 1.1192,
	"step": 70
	},
	{
	"epoch": 0.3282275711159737,
	"grad_norm": 0.5664127469062805,
	"learning_rate": 2.9983965165022473e-05,
	"loss": 1.135,
	"step": 75
	},
	{
	"epoch": 0.350109409190372,
	"grad_norm": 0.5967416763305664,
	"learning_rate": 2.9972381048336917e-05,
	"loss": 1.1354,
	"step": 80
	},
	{
	"epoch": 0.37199124726477023,
	"grad_norm": 0.5042751431465149,
	"learning_rate": 2.995767039172042e-05,
	"loss": 1.0858,
	"step": 85
	},
	{
	"epoch": 0.3938730853391685,
	"grad_norm": 0.5433419942855835,
	"learning_rate": 2.9939836267059482e-05,
	"loss": 1.0781,
	"step": 90
	},
	{
	"epoch": 0.41575492341356673,
	"grad_norm": 0.5662135481834412,
	"learning_rate": 2.9918882398484742e-05,
	"loss": 1.0949,
	"step": 95
	},
	{
	"epoch": 0.437636761487965,
	"grad_norm": 0.6487781405448914,
	"learning_rate": 2.989481316159328e-05,
	"loss": 1.0721,
	"step": 100
	},
	{
	"epoch": 0.45951859956236324,
	"grad_norm": 0.5745230317115784,
	"learning_rate": 2.9867633582534904e-05,
	"loss": 1.0192,
	"step": 105
	},
	{
	"epoch": 0.4814004376367615,
	"grad_norm": 0.7092564702033997,
	"learning_rate": 2.9837349336962612e-05,
	"loss": 1.0884,
	"step": 110
	},
	{
	"epoch": 0.5032822757111597,
	"grad_norm": 0.6593285202980042,
	"learning_rate": 2.9803966748847366e-05,
	"loss": 0.9648,
	"step": 115
	},
	{
	"epoch": 0.5251641137855579,
	"grad_norm": 0.751462996006012,
	"learning_rate": 2.976749278915754e-05,
	"loss": 0.9229,
	"step": 120
	},
	{
	"epoch": 0.5470459518599562,
	"grad_norm": 0.6278636455535889,
	"learning_rate": 2.9727935074403228e-05,
	"loss": 1.0129,
	"step": 125
	},
	{
	"epoch": 0.5689277899343544,
	"grad_norm": 0.7625859379768372,
	"learning_rate": 2.9685301865045768e-05,
	"loss": 0.9795,
	"step": 130
	},
	{
	"epoch": 0.5908096280087527,
	"grad_norm": 0.7692922949790955,
	"learning_rate": 2.9639602063772777e-05,
	"loss": 0.9894,
	"step": 135
	},
	{
	"epoch": 0.612691466083151,
	"grad_norm": 0.7171132564544678,
	"learning_rate": 2.959084521363911e-05,
	"loss": 0.9059,
	"step": 140
	},
	{
	"epoch": 0.6345733041575492,
	"grad_norm": 0.9169694781303406,
	"learning_rate": 2.9539041496074043e-05,
	"loss": 0.9285,
	"step": 145
	},
	{
	"epoch": 0.6564551422319475,
	"grad_norm": 0.8271639347076416,
	"learning_rate": 2.9484201728755205e-05,
	"loss": 0.8914,
	"step": 150
	},
	{
	"epoch": 0.6783369803063457,
	"grad_norm": 0.8969722986221313,
	"learning_rate": 2.9426337363349627e-05,
	"loss": 0.8813,
	"step": 155
	},
	{
	"epoch": 0.700218818380744,
	"grad_norm": 0.8180518746376038,
	"learning_rate": 2.9365460483122385e-05,
	"loss": 0.8638,
	"step": 160
	},
	{
	"epoch": 0.7221006564551422,
	"grad_norm": 0.9082233905792236,
	"learning_rate": 2.9301583800413363e-05,
	"loss": 0.9084,
	"step": 165
	},
	{
	"epoch": 0.7439824945295405,
	"grad_norm": 0.8155914545059204,
	"learning_rate": 2.923472065398268e-05,
	"loss": 0.8917,
	"step": 170
	},
	{
	"epoch": 0.7658643326039387,
	"grad_norm": 0.8886885643005371,
	"learning_rate": 2.916488500622527e-05,
	"loss": 0.8079,
	"step": 175
	},
	{
	"epoch": 0.787746170678337,
	"grad_norm": 0.8544840812683105,
	"learning_rate": 2.909209144025524e-05,
	"loss": 0.8317,
	"step": 180
	},
	{
	"epoch": 0.8096280087527352,
	"grad_norm": 0.8505244851112366,
	"learning_rate": 2.9016355156860625e-05,
	"loss": 0.7849,
	"step": 185
	},
	{
	"epoch": 0.8315098468271335,
	"grad_norm": 0.9838951826095581,
	"learning_rate": 2.8937691971329155e-05,
	"loss": 0.8021,
	"step": 190
	},
	{
	"epoch": 0.8533916849015317,
	"grad_norm": 0.9686183333396912,
	"learning_rate": 2.8856118310145687e-05,
	"loss": 0.8038,
	"step": 195
	},
	{
	"epoch": 0.87527352297593,
	"grad_norm": 0.9777634739875793,
	"learning_rate": 2.8771651207562043e-05,
	"loss": 0.773,
	"step": 200
	},
	{
	"epoch": 0.8971553610503282,
	"grad_norm": 1.0718952417373657,
	"learning_rate": 2.8684308302039878e-05,
	"loss": 0.723,
	"step": 205
	},
	{
	"epoch": 0.9190371991247265,
	"grad_norm": 1.031557321548462,
	"learning_rate": 2.8594107832567424e-05,
	"loss": 0.6804,
	"step": 210
	},
	{
	"epoch": 0.9409190371991247,
	"grad_norm": 0.9825000762939453,
	"learning_rate": 2.850106863485082e-05,
	"loss": 0.7704,
	"step": 215
	},
	{
	"epoch": 0.962800875273523,
	"grad_norm": 1.1034126281738281,
	"learning_rate": 2.840521013738083e-05,
	"loss": 0.7545,
	"step": 220
	},
	{
	"epoch": 0.9846827133479212,
	"grad_norm": 1.1799556016921997,
	"learning_rate": 2.8306552357375753e-05,
	"loss": 0.7517,
	"step": 225
	}
	],
	"logging_steps": 5,
	"max_steps": 1145,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 5,
	"save_steps": 2000,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": false
	},
	"attributes": {}
	}
	},
	"total_flos": 3.164877723722056e+17,
	"train_batch_size": 2,
	"trial_name": null,
	"trial_params": null
	}