MHGanainy/best-performing-clustering-2

9c3328a verified over 1 year ago

6.54 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 1.0,
	"eval_steps": 500,
	"global_step": 3215,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.03110419906687403,
	"grad_norm": 0.11611274629831314,
	"learning_rate": 6.666666666666667e-06,
	"loss": 2.3215,
	"step": 100
	},
	{
	"epoch": 0.06220839813374806,
	"grad_norm": 0.1991909295320511,
	"learning_rate": 1.3333333333333333e-05,
	"loss": 2.2888,
	"step": 200
	},
	{
	"epoch": 0.09331259720062209,
	"grad_norm": 0.3986590504646301,
	"learning_rate": 2e-05,
	"loss": 2.2411,
	"step": 300
	},
	{
	"epoch": 0.12441679626749612,
	"grad_norm": 0.6170083284378052,
	"learning_rate": 1.9941980734479214e-05,
	"loss": 2.1325,
	"step": 400
	},
	{
	"epoch": 0.15552099533437014,
	"grad_norm": 0.5896138548851013,
	"learning_rate": 1.9768596184951174e-05,
	"loss": 2.0947,
	"step": 500
	},
	{
	"epoch": 0.18662519440124417,
	"grad_norm": 0.69114750623703,
	"learning_rate": 1.948185828025913e-05,
	"loss": 2.0607,
	"step": 600
	},
	{
	"epoch": 0.2177293934681182,
	"grad_norm": 0.8546278476715088,
	"learning_rate": 1.908509428492852e-05,
	"loss": 1.9856,
	"step": 700
	},
	{
	"epoch": 0.24883359253499224,
	"grad_norm": 0.6112543344497681,
	"learning_rate": 1.8582908190078184e-05,
	"loss": 1.9717,
	"step": 800
	},
	{
	"epoch": 0.27993779160186627,
	"grad_norm": 0.8754922747612,
	"learning_rate": 1.7981127289383718e-05,
	"loss": 1.9524,
	"step": 900
	},
	{
	"epoch": 0.3110419906687403,
	"grad_norm": 0.8028755187988281,
	"learning_rate": 1.728673456001766e-05,
	"loss": 1.9325,
	"step": 1000
	},
	{
	"epoch": 0.3421461897356143,
	"grad_norm": 0.7798598408699036,
	"learning_rate": 1.6507787633208173e-05,
	"loss": 1.9153,
	"step": 1100
	},
	{
	"epoch": 0.37325038880248834,
	"grad_norm": 1.0763300657272339,
	"learning_rate": 1.5653325294669884e-05,
	"loss": 1.8883,
	"step": 1200
	},
	{
	"epoch": 0.40435458786936235,
	"grad_norm": 1.1706815958023071,
	"learning_rate": 1.4733262599862234e-05,
	"loss": 1.9038,
	"step": 1300
	},
	{
	"epoch": 0.4354587869362364,
	"grad_norm": 1.2860356569290161,
	"learning_rate": 1.3758275821142382e-05,
	"loss": 1.8769,
	"step": 1400
	},
	{
	"epoch": 0.4665629860031104,
	"grad_norm": 1.268933892250061,
	"learning_rate": 1.273967856186909e-05,
	"loss": 1.8601,
	"step": 1500
	},
	{
	"epoch": 0.4976671850699845,
	"grad_norm": 0.9673184156417847,
	"learning_rate": 1.1689290475011258e-05,
	"loss": 1.9104,
	"step": 1600
	},
	{
	"epoch": 0.5287713841368584,
	"grad_norm": 1.3028920888900757,
	"learning_rate": 1.0619300109631146e-05,
	"loss": 1.9084,
	"step": 1700
	},
	{
	"epoch": 0.5598755832037325,
	"grad_norm": 1.10184645652771,
	"learning_rate": 9.542123476751484e-06,
	"loss": 1.8605,
	"step": 1800
	},
	{
	"epoch": 0.5909797822706065,
	"grad_norm": 1.1696065664291382,
	"learning_rate": 8.470259975787438e-06,
	"loss": 1.8693,
	"step": 1900
	},
	{
	"epoch": 0.6220839813374806,
	"grad_norm": 1.0528351068496704,
	"learning_rate": 7.416147353351909e-06,
	"loss": 1.8479,
	"step": 2000
	},
	{
	"epoch": 0.6531881804043546,
	"grad_norm": 0.8877471089363098,
	"learning_rate": 6.392017377470867e-06,
	"loss": 1.8941,
	"step": 2100
	},
	{
	"epoch": 0.6842923794712286,
	"grad_norm": 1.711288571357727,
	"learning_rate": 5.409753901944006e-06,
	"loss": 1.8153,
	"step": 2200
	},
	{
	"epoch": 0.7153965785381027,
	"grad_norm": 1.6181174516677856,
	"learning_rate": 4.48075496785092e-06,
	"loss": 1.8127,
	"step": 2300
	},
	{
	"epoch": 0.7465007776049767,
	"grad_norm": 0.8964147567749023,
	"learning_rate": 3.615800542356738e-06,
	"loss": 1.9051,
	"step": 2400
	},
	{
	"epoch": 0.7776049766718507,
	"grad_norm": 1.0576339960098267,
	"learning_rate": 2.8249274295566863e-06,
	"loss": 1.9044,
	"step": 2500
	},
	{
	"epoch": 0.8087091757387247,
	"grad_norm": 1.3190217018127441,
	"learning_rate": 2.1173128048757307e-06,
	"loss": 1.7841,
	"step": 2600
	},
	{
	"epoch": 0.8398133748055988,
	"grad_norm": 1.4849908351898193,
	"learning_rate": 1.501167724473016e-06,
	"loss": 1.843,
	"step": 2700
	},
	{
	"epoch": 0.8709175738724728,
	"grad_norm": 1.069494366645813,
	"learning_rate": 9.836418453523833e-07,
	"loss": 1.8534,
	"step": 2800
	},
	{
	"epoch": 0.9020217729393468,
	"grad_norm": 1.185276746749878,
	"learning_rate": 5.707404617927526e-07,
	"loss": 1.8983,
	"step": 2900
	},
	{
	"epoch": 0.9331259720062208,
	"grad_norm": 1.2488497495651245,
	"learning_rate": 2.672548207954495e-07,
	"loss": 1.8157,
	"step": 3000
	},
	{
	"epoch": 0.9642301710730948,
	"grad_norm": 1.7197282314300537,
	"learning_rate": 7.670652515782917e-08,
	"loss": 1.8261,
	"step": 3100
	},
	{
	"epoch": 0.995334370139969,
	"grad_norm": 0.9283524751663208,
	"learning_rate": 1.3066693117191886e-09,
	"loss": 1.8391,
	"step": 3200
	},
	{
	"epoch": 1.0,
	"step": 3215,
	"total_flos": 5.85604153344e+16,
	"train_loss": 1.938271311616082,
	"train_runtime": 1682.5697,
	"train_samples_per_second": 3.822,
	"train_steps_per_second": 1.911
	}
	],
	"logging_steps": 100,
	"max_steps": 3215,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 1,
	"save_steps": 500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 5.85604153344e+16,
	"train_batch_size": 2,
	"trial_name": null,
	"trial_params": null
	}