End of training

b6ddb0f verified over 1 year ago

6.4 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 0.9998129092609915,
	"eval_steps": 500,
	"global_step": 334,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.029934518241347054,
	"grad_norm": 1.3387344639766428,
	"learning_rate": 5e-06,
	"loss": 0.7726,
	"step": 10
	},
	{
	"epoch": 0.05986903648269411,
	"grad_norm": 0.7768397662301797,
	"learning_rate": 5e-06,
	"loss": 0.6941,
	"step": 20
	},
	{
	"epoch": 0.08980355472404115,
	"grad_norm": 0.707220098018675,
	"learning_rate": 5e-06,
	"loss": 0.669,
	"step": 30
	},
	{
	"epoch": 0.11973807296538821,
	"grad_norm": 0.6673639225008394,
	"learning_rate": 5e-06,
	"loss": 0.6651,
	"step": 40
	},
	{
	"epoch": 0.14967259120673526,
	"grad_norm": 0.931884409826821,
	"learning_rate": 5e-06,
	"loss": 0.6563,
	"step": 50
	},
	{
	"epoch": 0.1796071094480823,
	"grad_norm": 0.7248290568232586,
	"learning_rate": 5e-06,
	"loss": 0.6486,
	"step": 60
	},
	{
	"epoch": 0.20954162768942938,
	"grad_norm": 0.7317021024341339,
	"learning_rate": 5e-06,
	"loss": 0.6456,
	"step": 70
	},
	{
	"epoch": 0.23947614593077643,
	"grad_norm": 0.7878073380412045,
	"learning_rate": 5e-06,
	"loss": 0.647,
	"step": 80
	},
	{
	"epoch": 0.2694106641721235,
	"grad_norm": 0.9477711469098681,
	"learning_rate": 5e-06,
	"loss": 0.638,
	"step": 90
	},
	{
	"epoch": 0.2993451824134705,
	"grad_norm": 0.7824185434647374,
	"learning_rate": 5e-06,
	"loss": 0.6395,
	"step": 100
	},
	{
	"epoch": 0.3292797006548176,
	"grad_norm": 0.7691665877451451,
	"learning_rate": 5e-06,
	"loss": 0.6352,
	"step": 110
	},
	{
	"epoch": 0.3592142188961646,
	"grad_norm": 0.85227640579011,
	"learning_rate": 5e-06,
	"loss": 0.6344,
	"step": 120
	},
	{
	"epoch": 0.3891487371375117,
	"grad_norm": 0.7349060200879011,
	"learning_rate": 5e-06,
	"loss": 0.6343,
	"step": 130
	},
	{
	"epoch": 0.41908325537885877,
	"grad_norm": 0.7948303412589806,
	"learning_rate": 5e-06,
	"loss": 0.628,
	"step": 140
	},
	{
	"epoch": 0.4490177736202058,
	"grad_norm": 0.6403554998368226,
	"learning_rate": 5e-06,
	"loss": 0.6296,
	"step": 150
	},
	{
	"epoch": 0.47895229186155286,
	"grad_norm": 0.6579875234047781,
	"learning_rate": 5e-06,
	"loss": 0.6286,
	"step": 160
	},
	{
	"epoch": 0.5088868101028999,
	"grad_norm": 0.6635988202382727,
	"learning_rate": 5e-06,
	"loss": 0.6228,
	"step": 170
	},
	{
	"epoch": 0.538821328344247,
	"grad_norm": 0.6766189913752628,
	"learning_rate": 5e-06,
	"loss": 0.6242,
	"step": 180
	},
	{
	"epoch": 0.568755846585594,
	"grad_norm": 0.7350593528596518,
	"learning_rate": 5e-06,
	"loss": 0.6221,
	"step": 190
	},
	{
	"epoch": 0.598690364826941,
	"grad_norm": 0.7042346638489535,
	"learning_rate": 5e-06,
	"loss": 0.6299,
	"step": 200
	},
	{
	"epoch": 0.6286248830682881,
	"grad_norm": 0.7310808118757001,
	"learning_rate": 5e-06,
	"loss": 0.6238,
	"step": 210
	},
	{
	"epoch": 0.6585594013096352,
	"grad_norm": 0.915027325723907,
	"learning_rate": 5e-06,
	"loss": 0.622,
	"step": 220
	},
	{
	"epoch": 0.6884939195509823,
	"grad_norm": 0.7146070049162939,
	"learning_rate": 5e-06,
	"loss": 0.6245,
	"step": 230
	},
	{
	"epoch": 0.7184284377923292,
	"grad_norm": 0.6682750595418552,
	"learning_rate": 5e-06,
	"loss": 0.6234,
	"step": 240
	},
	{
	"epoch": 0.7483629560336763,
	"grad_norm": 0.6075185838844738,
	"learning_rate": 5e-06,
	"loss": 0.6199,
	"step": 250
	},
	{
	"epoch": 0.7782974742750234,
	"grad_norm": 0.6614005815451841,
	"learning_rate": 5e-06,
	"loss": 0.6186,
	"step": 260
	},
	{
	"epoch": 0.8082319925163705,
	"grad_norm": 0.7431404889886319,
	"learning_rate": 5e-06,
	"loss": 0.6261,
	"step": 270
	},
	{
	"epoch": 0.8381665107577175,
	"grad_norm": 0.6659051677024307,
	"learning_rate": 5e-06,
	"loss": 0.6163,
	"step": 280
	},
	{
	"epoch": 0.8681010289990645,
	"grad_norm": 0.761848043492867,
	"learning_rate": 5e-06,
	"loss": 0.6131,
	"step": 290
	},
	{
	"epoch": 0.8980355472404116,
	"grad_norm": 0.7790121000018401,
	"learning_rate": 5e-06,
	"loss": 0.6145,
	"step": 300
	},
	{
	"epoch": 0.9279700654817586,
	"grad_norm": 0.700122187157476,
	"learning_rate": 5e-06,
	"loss": 0.6131,
	"step": 310
	},
	{
	"epoch": 0.9579045837231057,
	"grad_norm": 0.6439594670749554,
	"learning_rate": 5e-06,
	"loss": 0.6152,
	"step": 320
	},
	{
	"epoch": 0.9878391019644528,
	"grad_norm": 0.9505341757424101,
	"learning_rate": 5e-06,
	"loss": 0.6082,
	"step": 330
	},
	{
	"epoch": 0.9998129092609915,
	"eval_loss": 0.619162917137146,
	"eval_runtime": 519.149,
	"eval_samples_per_second": 17.338,
	"eval_steps_per_second": 0.543,
	"step": 334
	},
	{
	"epoch": 0.9998129092609915,
	"step": 334,
	"total_flos": 1272538881589248.0,
	"train_loss": 0.6363063543856501,
	"train_runtime": 30442.9812,
	"train_samples_per_second": 5.617,
	"train_steps_per_second": 0.011
	}
	],
	"logging_steps": 10,
	"max_steps": 334,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 1,
	"save_steps": 500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 1272538881589248.0,
	"train_batch_size": 4,
	"trial_name": null,
	"trial_params": null
	}