working / last-checkpoint /trainer_state.json

Training in progress, step 1500, checkpoint

c2cae9c verified 7 months ago

6.42 kB

	{
	"best_global_step": null,
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 1.766671573678787,
	"eval_steps": 500,
	"global_step": 1500,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.014721036360959812,
	"grad_norm": 1.0893694162368774,
	"learning_rate": 4.808635917566242e-06,
	"loss": 1.1444,
	"step": 50
	},
	{
	"epoch": 0.029442072721919624,
	"grad_norm": 1.118213415145874,
	"learning_rate": 9.715407262021591e-06,
	"loss": 0.9497,
	"step": 100
	},
	{
	"epoch": 0.04416310908287943,
	"grad_norm": 1.3216179609298706,
	"learning_rate": 1.4622178606476939e-05,
	"loss": 0.8346,
	"step": 150
	},
	{
	"epoch": 0.05888414544383925,
	"grad_norm": 1.292870044708252,
	"learning_rate": 1.9528949950932288e-05,
	"loss": 0.7699,
	"step": 200
	},
	{
	"epoch": 0.07360518180479905,
	"grad_norm": 1.4046356678009033,
	"learning_rate": 2.4435721295387637e-05,
	"loss": 0.7252,
	"step": 250
	},
	{
	"epoch": 0.08832621816575886,
	"grad_norm": 1.4438663721084595,
	"learning_rate": 2.9342492639842983e-05,
	"loss": 0.6996,
	"step": 300
	},
	{
	"epoch": 0.10304725452671869,
	"grad_norm": 1.3257337808609009,
	"learning_rate": 3.424926398429833e-05,
	"loss": 0.686,
	"step": 350
	},
	{
	"epoch": 0.1177682908876785,
	"grad_norm": 1.3639789819717407,
	"learning_rate": 3.9156035328753685e-05,
	"loss": 0.6601,
	"step": 400
	},
	{
	"epoch": 0.1324893272486383,
	"grad_norm": 1.4148070812225342,
	"learning_rate": 4.406280667320903e-05,
	"loss": 0.6371,
	"step": 450
	},
	{
	"epoch": 0.1472103636095981,
	"grad_norm": 1.3940412998199463,
	"learning_rate": 4.8969578017664384e-05,
	"loss": 0.6417,
	"step": 500
	},
	{
	"epoch": 0.6477255998822317,
	"grad_norm": 0.6072946190834045,
	"learning_rate": 9.599483839268026e-05,
	"loss": 0.6122,
	"step": 550
	},
	{
	"epoch": 0.7066097453260709,
	"grad_norm": 0.6030572652816772,
	"learning_rate": 9.454410179022932e-05,
	"loss": 0.5809,
	"step": 600
	},
	{
	"epoch": 0.7654938907699103,
	"grad_norm": 0.5781008005142212,
	"learning_rate": 9.288422825194501e-05,
	"loss": 0.5446,
	"step": 650
	},
	{
	"epoch": 0.8243780362137495,
	"grad_norm": 0.5412103533744812,
	"learning_rate": 9.102301097269974e-05,
	"loss": 0.5339,
	"step": 700
	},
	{
	"epoch": 0.8832621816575887,
	"grad_norm": 0.5678456425666809,
	"learning_rate": 8.896918846697821e-05,
	"loss": 0.5296,
	"step": 750
	},
	{
	"epoch": 0.942146327101428,
	"grad_norm": 0.525556206703186,
	"learning_rate": 8.673240354108538e-05,
	"loss": 0.5176,
	"step": 800
	},
	{
	"epoch": 1.0011776829088768,
	"grad_norm": 1.9685856103897095,
	"learning_rate": 8.432315801965616e-05,
	"loss": 0.5104,
	"step": 850
	},
	{
	"epoch": 1.0600618283527161,
	"grad_norm": 0.6006094217300415,
	"learning_rate": 8.175276343902802e-05,
	"loss": 0.4685,
	"step": 900
	},
	{
	"epoch": 1.1189459737965552,
	"grad_norm": 0.5228903889656067,
	"learning_rate": 7.903328793897418e-05,
	"loss": 0.473,
	"step": 950
	},
	{
	"epoch": 1.1778301192403946,
	"grad_norm": 0.5006899237632751,
	"learning_rate": 7.6177499602143e-05,
	"loss": 0.4679,
	"step": 1000
	},
	{
	"epoch": 1.1778301192403946,
	"eval_loss": 0.4844963848590851,
	"eval_runtime": 2172.4438,
	"eval_samples_per_second": 1.39,
	"eval_steps_per_second": 0.695,
	"step": 1000
	},
	{
	"epoch": 1.2367142646842337,
	"grad_norm": 0.5041179060935974,
	"learning_rate": 7.319880650722838e-05,
	"loss": 0.4541,
	"step": 1050
	},
	{
	"epoch": 1.295598410128073,
	"grad_norm": 0.5369197726249695,
	"learning_rate": 7.01111937773246e-05,
	"loss": 0.4576,
	"step": 1100
	},
	{
	"epoch": 1.3544825555719122,
	"grad_norm": 0.5211925506591797,
	"learning_rate": 6.692915791902665e-05,
	"loss": 0.4472,
	"step": 1150
	},
	{
	"epoch": 1.4133667010157516,
	"grad_norm": 0.5664705038070679,
	"learning_rate": 6.366763876055806e-05,
	"loss": 0.4427,
	"step": 1200
	},
	{
	"epoch": 1.4722508464595907,
	"grad_norm": 0.5420666337013245,
	"learning_rate": 6.034194930847975e-05,
	"loss": 0.4395,
	"step": 1250
	},
	{
	"epoch": 1.53113499190343,
	"grad_norm": 0.558952271938324,
	"learning_rate": 5.6967703852306786e-05,
	"loss": 0.4305,
	"step": 1300
	},
	{
	"epoch": 1.5900191373472694,
	"grad_norm": 0.510136067867279,
	"learning_rate": 5.356074465458553e-05,
	"loss": 0.428,
	"step": 1350
	},
	{
	"epoch": 1.6489032827911085,
	"grad_norm": 0.506799578666687,
	"learning_rate": 5.013706757062534e-05,
	"loss": 0.4251,
	"step": 1400
	},
	{
	"epoch": 1.7077874282349477,
	"grad_norm": 0.5179591178894043,
	"learning_rate": 4.671274694710388e-05,
	"loss": 0.4188,
	"step": 1450
	},
	{
	"epoch": 1.766671573678787,
	"grad_norm": 0.531908392906189,
	"learning_rate": 4.3303860152151445e-05,
	"loss": 0.4177,
	"step": 1500
	},
	{
	"epoch": 1.766671573678787,
	"eval_loss": 0.4321376383304596,
	"eval_runtime": 2174.1694,
	"eval_samples_per_second": 1.389,
	"eval_steps_per_second": 0.695,
	"step": 1500
	}
	],
	"logging_steps": 50,
	"max_steps": 2547,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 3,
	"save_steps": 500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": false
	},
	"attributes": {}
	}
	},
	"total_flos": 2.011350882666414e+17,
	"train_batch_size": 1,
	"trial_name": null,
	"trial_params": null
	}