Upload model

5a089d0 verified 12 months ago

7.2 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 5.0,
	"eval_steps": 500,
	"global_step": 40,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.125,
	"grad_norm": 5.211779927637621,
	"learning_rate": 2e-05,
	"loss": 0.9857,
	"step": 1
	},
	{
	"epoch": 0.25,
	"grad_norm": 5.042557082573154,
	"learning_rate": 4e-05,
	"loss": 1.0105,
	"step": 2
	},
	{
	"epoch": 0.375,
	"grad_norm": 1.6016497975651993,
	"learning_rate": 6.000000000000001e-05,
	"loss": 0.8315,
	"step": 3
	},
	{
	"epoch": 0.5,
	"grad_norm": 4.25741909777284,
	"learning_rate": 8e-05,
	"loss": 0.8376,
	"step": 4
	},
	{
	"epoch": 0.625,
	"grad_norm": 4.221010570287036,
	"learning_rate": 7.984778792366983e-05,
	"loss": 0.8615,
	"step": 5
	},
	{
	"epoch": 0.75,
	"grad_norm": 2.7341942887765978,
	"learning_rate": 7.939231012048833e-05,
	"loss": 0.8683,
	"step": 6
	},
	{
	"epoch": 0.875,
	"grad_norm": 2.869440032757376,
	"learning_rate": 7.863703305156273e-05,
	"loss": 0.8362,
	"step": 7
	},
	{
	"epoch": 1.0,
	"grad_norm": 2.122066806364661,
	"learning_rate": 7.758770483143634e-05,
	"loss": 0.784,
	"step": 8
	},
	{
	"epoch": 1.125,
	"grad_norm": 1.439471039607689,
	"learning_rate": 7.625231148146601e-05,
	"loss": 0.7544,
	"step": 9
	},
	{
	"epoch": 1.25,
	"grad_norm": 1.6534051953010969,
	"learning_rate": 7.464101615137756e-05,
	"loss": 0.7631,
	"step": 10
	},
	{
	"epoch": 1.375,
	"grad_norm": 1.8137551875928488,
	"learning_rate": 7.276608177155968e-05,
	"loss": 0.7403,
	"step": 11
	},
	{
	"epoch": 1.5,
	"grad_norm": 1.3142504730332583,
	"learning_rate": 7.064177772475912e-05,
	"loss": 0.7002,
	"step": 12
	},
	{
	"epoch": 1.625,
	"grad_norm": 5.925202262290306,
	"learning_rate": 6.828427124746191e-05,
	"loss": 0.6861,
	"step": 13
	},
	{
	"epoch": 1.75,
	"grad_norm": 1.7227726654448416,
	"learning_rate": 6.571150438746157e-05,
	"loss": 0.7334,
	"step": 14
	},
	{
	"epoch": 1.875,
	"grad_norm": 1.0449895112458119,
	"learning_rate": 6.294305745404185e-05,
	"loss": 0.6708,
	"step": 15
	},
	{
	"epoch": 2.0,
	"grad_norm": 5.096810621766211,
	"learning_rate": 6.000000000000001e-05,
	"loss": 0.6817,
	"step": 16
	},
	{
	"epoch": 2.125,
	"grad_norm": 1.8744800353306919,
	"learning_rate": 5.6904730469627985e-05,
	"loss": 0.6989,
	"step": 17
	},
	{
	"epoch": 2.25,
	"grad_norm": 1.230587778975542,
	"learning_rate": 5.368080573302676e-05,
	"loss": 0.6331,
	"step": 18
	},
	{
	"epoch": 2.375,
	"grad_norm": 8.839539018981332,
	"learning_rate": 5.0352761804100835e-05,
	"loss": 0.6351,
	"step": 19
	},
	{
	"epoch": 2.5,
	"grad_norm": 5.01265204416041,
	"learning_rate": 4.694592710667723e-05,
	"loss": 0.661,
	"step": 20
	},
	{
	"epoch": 2.625,
	"grad_norm": 2.099879032906444,
	"learning_rate": 4.348622970990634e-05,
	"loss": 0.6938,
	"step": 21
	},
	{
	"epoch": 2.75,
	"grad_norm": 1.4954684606615343,
	"learning_rate": 4e-05,
	"loss": 0.6499,
	"step": 22
	},
	{
	"epoch": 2.875,
	"grad_norm": 0.742639587291529,
	"learning_rate": 3.6513770290093674e-05,
	"loss": 0.6396,
	"step": 23
	},
	{
	"epoch": 3.0,
	"grad_norm": 0.7952668726605489,
	"learning_rate": 3.305407289332279e-05,
	"loss": 0.6316,
	"step": 24
	},
	{
	"epoch": 3.125,
	"grad_norm": 0.9332649605547626,
	"learning_rate": 2.9647238195899168e-05,
	"loss": 0.6314,
	"step": 25
	},
	{
	"epoch": 3.25,
	"grad_norm": 0.8319077523070111,
	"learning_rate": 2.6319194266973256e-05,
	"loss": 0.5775,
	"step": 26
	},
	{
	"epoch": 3.375,
	"grad_norm": 0.6043436502068867,
	"learning_rate": 2.3095269530372032e-05,
	"loss": 0.5838,
	"step": 27
	},
	{
	"epoch": 3.5,
	"grad_norm": 0.48007057002472814,
	"learning_rate": 2.0000000000000012e-05,
	"loss": 0.5709,
	"step": 28
	},
	{
	"epoch": 3.625,
	"grad_norm": 0.4637812767567033,
	"learning_rate": 1.7056942545958167e-05,
	"loss": 0.5556,
	"step": 29
	},
	{
	"epoch": 3.75,
	"grad_norm": 0.46472503204848403,
	"learning_rate": 1.4288495612538427e-05,
	"loss": 0.5443,
	"step": 30
	},
	{
	"epoch": 3.875,
	"grad_norm": 0.46363908296159384,
	"learning_rate": 1.1715728752538103e-05,
	"loss": 0.557,
	"step": 31
	},
	{
	"epoch": 4.0,
	"grad_norm": 0.41103161754751094,
	"learning_rate": 9.358222275240884e-06,
	"loss": 0.5874,
	"step": 32
	},
	{
	"epoch": 4.125,
	"grad_norm": 0.3604630063954317,
	"learning_rate": 7.233918228440324e-06,
	"loss": 0.5322,
	"step": 33
	},
	{
	"epoch": 4.25,
	"grad_norm": 0.3152024677309081,
	"learning_rate": 5.358983848622452e-06,
	"loss": 0.5663,
	"step": 34
	},
	{
	"epoch": 4.375,
	"grad_norm": 0.2782735119012474,
	"learning_rate": 3.747688518534003e-06,
	"loss": 0.5334,
	"step": 35
	},
	{
	"epoch": 4.5,
	"grad_norm": 0.2517890470284474,
	"learning_rate": 2.4122951685636674e-06,
	"loss": 0.5227,
	"step": 36
	},
	{
	"epoch": 4.625,
	"grad_norm": 0.2307830936984302,
	"learning_rate": 1.3629669484372722e-06,
	"loss": 0.5605,
	"step": 37
	},
	{
	"epoch": 4.75,
	"grad_norm": 0.2120443064521722,
	"learning_rate": 6.076898795116792e-07,
	"loss": 0.5255,
	"step": 38
	},
	{
	"epoch": 4.875,
	"grad_norm": 0.21220571438664002,
	"learning_rate": 1.522120763301782e-07,
	"loss": 0.5098,
	"step": 39
	},
	{
	"epoch": 5.0,
	"grad_norm": 0.20885459665653022,
	"learning_rate": 0.0,
	"loss": 0.5482,
	"step": 40
	},
	{
	"epoch": 5.0,
	"step": 40,
	"total_flos": 671045690327040.0,
	"train_loss": 0.0,
	"train_runtime": 7.8738,
	"train_samples_per_second": 2427.029,
	"train_steps_per_second": 5.08
	}
	],
	"logging_steps": 1,
	"max_steps": 40,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 5,
	"save_steps": 500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 671045690327040.0,
	"train_batch_size": 1,
	"trial_name": null,
	"trial_params": null
	}