MHGanainy/8-clusters-balanced-lex-best-1

8c61d11 verified over 1 year ago

6.06 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 1.0,
	"eval_steps": 500,
	"global_step": 2888,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.03462603878116344,
	"grad_norm": 0.14943788945674896,
	"learning_rate": 6.944444444444445e-06,
	"loss": 2.3885,
	"step": 100
	},
	{
	"epoch": 0.06925207756232687,
	"grad_norm": 0.2681087851524353,
	"learning_rate": 1.388888888888889e-05,
	"loss": 2.3454,
	"step": 200
	},
	{
	"epoch": 0.1038781163434903,
	"grad_norm": 0.4204924404621124,
	"learning_rate": 1.9998948817948157e-05,
	"loss": 2.258,
	"step": 300
	},
	{
	"epoch": 0.13850415512465375,
	"grad_norm": 0.5873008966445923,
	"learning_rate": 1.9908568428746408e-05,
	"loss": 2.165,
	"step": 400
	},
	{
	"epoch": 0.1731301939058172,
	"grad_norm": 0.7970355153083801,
	"learning_rate": 1.9673698799700582e-05,
	"loss": 2.0815,
	"step": 500
	},
	{
	"epoch": 0.2077562326869806,
	"grad_norm": 0.7298622131347656,
	"learning_rate": 1.9297764858882516e-05,
	"loss": 2.0287,
	"step": 600
	},
	{
	"epoch": 0.24238227146814403,
	"grad_norm": 0.7727171778678894,
	"learning_rate": 1.8786248569678847e-05,
	"loss": 2.037,
	"step": 700
	},
	{
	"epoch": 0.2770083102493075,
	"grad_norm": 0.791151225566864,
	"learning_rate": 1.8146608991420533e-05,
	"loss": 1.9875,
	"step": 800
	},
	{
	"epoch": 0.31163434903047094,
	"grad_norm": 0.9811259508132935,
	"learning_rate": 1.7388173509501475e-05,
	"loss": 1.9107,
	"step": 900
	},
	{
	"epoch": 0.3462603878116344,
	"grad_norm": 0.9305247068405151,
	"learning_rate": 1.652200182109602e-05,
	"loss": 1.8919,
	"step": 1000
	},
	{
	"epoch": 0.3808864265927978,
	"grad_norm": 1.0283896923065186,
	"learning_rate": 1.5560724659869905e-05,
	"loss": 1.878,
	"step": 1100
	},
	{
	"epoch": 0.4155124653739612,
	"grad_norm": 1.1012296676635742,
	"learning_rate": 1.4518359611441452e-05,
	"loss": 1.8174,
	"step": 1200
	},
	{
	"epoch": 0.45013850415512463,
	"grad_norm": 1.0275688171386719,
	"learning_rate": 1.3410106705418424e-05,
	"loss": 1.8528,
	"step": 1300
	},
	{
	"epoch": 0.48476454293628807,
	"grad_norm": 0.9701151251792908,
	"learning_rate": 1.2252126764738845e-05,
	"loss": 1.9103,
	"step": 1400
	},
	{
	"epoch": 0.5193905817174516,
	"grad_norm": 0.9857981204986572,
	"learning_rate": 1.106130574448156e-05,
	"loss": 1.796,
	"step": 1500
	},
	{
	"epoch": 0.554016620498615,
	"grad_norm": 1.2235304117202759,
	"learning_rate": 9.855008496617326e-06,
	"loss": 1.8084,
	"step": 1600
	},
	{
	"epoch": 0.5886426592797784,
	"grad_norm": 1.137815237045288,
	"learning_rate": 8.650825551364844e-06,
	"loss": 1.878,
	"step": 1700
	},
	{
	"epoch": 0.6232686980609419,
	"grad_norm": 1.0799235105514526,
	"learning_rate": 7.4663166076497376e-06,
	"loss": 1.8077,
	"step": 1800
	},
	{
	"epoch": 0.6578947368421053,
	"grad_norm": 0.8830498456954956,
	"learning_rate": 6.318754473153221e-06,
	"loss": 1.8718,
	"step": 1900
	},
	{
	"epoch": 0.6925207756232687,
	"grad_norm": 0.9494577050209045,
	"learning_rate": 5.224873187881136e-06,
	"loss": 1.8099,
	"step": 2000
	},
	{
	"epoch": 0.7271468144044322,
	"grad_norm": 0.9683797955513,
	"learning_rate": 4.200624004178883e-06,
	"loss": 1.8241,
	"step": 2100
	},
	{
	"epoch": 0.7617728531855956,
	"grad_norm": 1.5775721073150635,
	"learning_rate": 3.2609427815531426e-06,
	"loss": 1.7979,
	"step": 2200
	},
	{
	"epoch": 0.796398891966759,
	"grad_norm": 1.1023374795913696,
	"learning_rate": 2.4195321882076295e-06,
	"loss": 1.8127,
	"step": 2300
	},
	{
	"epoch": 0.8310249307479224,
	"grad_norm": 1.4638538360595703,
	"learning_rate": 1.6886618852849723e-06,
	"loss": 1.7909,
	"step": 2400
	},
	{
	"epoch": 0.8656509695290858,
	"grad_norm": 0.9252508282661438,
	"learning_rate": 1.0789896075783734e-06,
	"loss": 1.818,
	"step": 2500
	},
	{
	"epoch": 0.9002770083102493,
	"grad_norm": 0.8653609752655029,
	"learning_rate": 5.994057497592032e-07,
	"loss": 1.7445,
	"step": 2600
	},
	{
	"epoch": 0.9349030470914127,
	"grad_norm": 1.0171947479248047,
	"learning_rate": 2.569037244032657e-07,
	"loss": 1.7914,
	"step": 2700
	},
	{
	"epoch": 0.9695290858725761,
	"grad_norm": 1.0968868732452393,
	"learning_rate": 5.647798228764156e-08,
	"loss": 1.8258,
	"step": 2800
	},
	{
	"epoch": 1.0,
	"eval_loss": 1.476217269897461,
	"eval_runtime": 13.7874,
	"eval_samples_per_second": 13.708,
	"eval_steps_per_second": 1.741,
	"step": 2888
	},
	{
	"epoch": 1.0,
	"step": 2888,
	"total_flos": 5.2595085312e+16,
	"train_loss": 1.9235466986151613,
	"train_runtime": 918.9364,
	"train_samples_per_second": 6.284,
	"train_steps_per_second": 3.143
	}
	],
	"logging_steps": 100,
	"max_steps": 2888,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 1,
	"save_steps": 500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 5.2595085312e+16,
	"train_batch_size": 2,
	"trial_name": null,
	"trial_params": null
	}