End of training

bdb23dd verified over 1 year ago

6.25 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 0.9973394146712277,
	"eval_steps": 500,
	"global_step": 328,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.03040668947168377,
	"grad_norm": 5.6152509966106985,
	"learning_rate": 5e-06,
	"loss": 0.9255,
	"step": 10
	},
	{
	"epoch": 0.06081337894336754,
	"grad_norm": 1.948199272854136,
	"learning_rate": 5e-06,
	"loss": 0.8117,
	"step": 20
	},
	{
	"epoch": 0.09122006841505131,
	"grad_norm": 1.6637843548352236,
	"learning_rate": 5e-06,
	"loss": 0.7758,
	"step": 30
	},
	{
	"epoch": 0.12162675788673508,
	"grad_norm": 0.907983749953646,
	"learning_rate": 5e-06,
	"loss": 0.7519,
	"step": 40
	},
	{
	"epoch": 0.15203344735841884,
	"grad_norm": 1.7280229551951782,
	"learning_rate": 5e-06,
	"loss": 0.7352,
	"step": 50
	},
	{
	"epoch": 0.18244013683010263,
	"grad_norm": 0.8504963430088522,
	"learning_rate": 5e-06,
	"loss": 0.7224,
	"step": 60
	},
	{
	"epoch": 0.2128468263017864,
	"grad_norm": 1.2357854791523568,
	"learning_rate": 5e-06,
	"loss": 0.7105,
	"step": 70
	},
	{
	"epoch": 0.24325351577347015,
	"grad_norm": 1.2476890602998016,
	"learning_rate": 5e-06,
	"loss": 0.7038,
	"step": 80
	},
	{
	"epoch": 0.27366020524515394,
	"grad_norm": 0.63122069402106,
	"learning_rate": 5e-06,
	"loss": 0.6999,
	"step": 90
	},
	{
	"epoch": 0.3040668947168377,
	"grad_norm": 0.6553804145899658,
	"learning_rate": 5e-06,
	"loss": 0.6863,
	"step": 100
	},
	{
	"epoch": 0.33447358418852147,
	"grad_norm": 0.5841011076306307,
	"learning_rate": 5e-06,
	"loss": 0.6907,
	"step": 110
	},
	{
	"epoch": 0.36488027366020526,
	"grad_norm": 0.7996499453541979,
	"learning_rate": 5e-06,
	"loss": 0.6819,
	"step": 120
	},
	{
	"epoch": 0.395286963131889,
	"grad_norm": 0.5950822241048144,
	"learning_rate": 5e-06,
	"loss": 0.6871,
	"step": 130
	},
	{
	"epoch": 0.4256936526035728,
	"grad_norm": 0.5683219252381152,
	"learning_rate": 5e-06,
	"loss": 0.6825,
	"step": 140
	},
	{
	"epoch": 0.45610034207525657,
	"grad_norm": 0.7530256115772663,
	"learning_rate": 5e-06,
	"loss": 0.6774,
	"step": 150
	},
	{
	"epoch": 0.4865070315469403,
	"grad_norm": 0.6459654005909365,
	"learning_rate": 5e-06,
	"loss": 0.6847,
	"step": 160
	},
	{
	"epoch": 0.5169137210186241,
	"grad_norm": 0.5836331839357853,
	"learning_rate": 5e-06,
	"loss": 0.6768,
	"step": 170
	},
	{
	"epoch": 0.5473204104903079,
	"grad_norm": 0.5565233021284824,
	"learning_rate": 5e-06,
	"loss": 0.6849,
	"step": 180
	},
	{
	"epoch": 0.5777270999619917,
	"grad_norm": 0.6013400542500057,
	"learning_rate": 5e-06,
	"loss": 0.6713,
	"step": 190
	},
	{
	"epoch": 0.6081337894336754,
	"grad_norm": 0.5233954675275515,
	"learning_rate": 5e-06,
	"loss": 0.671,
	"step": 200
	},
	{
	"epoch": 0.6385404789053591,
	"grad_norm": 0.5779435903050023,
	"learning_rate": 5e-06,
	"loss": 0.6724,
	"step": 210
	},
	{
	"epoch": 0.6689471683770429,
	"grad_norm": 0.6526020744171068,
	"learning_rate": 5e-06,
	"loss": 0.6719,
	"step": 220
	},
	{
	"epoch": 0.6993538578487267,
	"grad_norm": 0.5895434275536652,
	"learning_rate": 5e-06,
	"loss": 0.6689,
	"step": 230
	},
	{
	"epoch": 0.7297605473204105,
	"grad_norm": 0.6343504733705319,
	"learning_rate": 5e-06,
	"loss": 0.6687,
	"step": 240
	},
	{
	"epoch": 0.7601672367920943,
	"grad_norm": 0.7332518294295413,
	"learning_rate": 5e-06,
	"loss": 0.6639,
	"step": 250
	},
	{
	"epoch": 0.790573926263778,
	"grad_norm": 0.5332685530635167,
	"learning_rate": 5e-06,
	"loss": 0.6684,
	"step": 260
	},
	{
	"epoch": 0.8209806157354618,
	"grad_norm": 0.5890443926278276,
	"learning_rate": 5e-06,
	"loss": 0.6619,
	"step": 270
	},
	{
	"epoch": 0.8513873052071456,
	"grad_norm": 0.5702438242189162,
	"learning_rate": 5e-06,
	"loss": 0.6611,
	"step": 280
	},
	{
	"epoch": 0.8817939946788294,
	"grad_norm": 0.6228763934659501,
	"learning_rate": 5e-06,
	"loss": 0.6596,
	"step": 290
	},
	{
	"epoch": 0.9122006841505131,
	"grad_norm": 0.6318262944444193,
	"learning_rate": 5e-06,
	"loss": 0.6623,
	"step": 300
	},
	{
	"epoch": 0.9426073736221969,
	"grad_norm": 0.6675343568518476,
	"learning_rate": 5e-06,
	"loss": 0.6565,
	"step": 310
	},
	{
	"epoch": 0.9730140630938806,
	"grad_norm": 0.5399374272267031,
	"learning_rate": 5e-06,
	"loss": 0.6579,
	"step": 320
	},
	{
	"epoch": 0.9973394146712277,
	"eval_loss": 0.6590226292610168,
	"eval_runtime": 348.3776,
	"eval_samples_per_second": 25.435,
	"eval_steps_per_second": 0.399,
	"step": 328
	},
	{
	"epoch": 0.9973394146712277,
	"step": 328,
	"total_flos": 549202836848640.0,
	"train_loss": 0.696046143043332,
	"train_runtime": 19374.8951,
	"train_samples_per_second": 8.689,
	"train_steps_per_second": 0.017
	}
	],
	"logging_steps": 10,
	"max_steps": 328,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 1,
	"save_steps": 500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 549202836848640.0,
	"train_batch_size": 8,
	"trial_name": null,
	"trial_params": null
	}