best-performing-clustering-2 / trainer_state.json
MHGanainy's picture
MHGanainy/best-performing-clustering-2
9c3328a verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 3215,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03110419906687403,
"grad_norm": 0.11611274629831314,
"learning_rate": 6.666666666666667e-06,
"loss": 2.3215,
"step": 100
},
{
"epoch": 0.06220839813374806,
"grad_norm": 0.1991909295320511,
"learning_rate": 1.3333333333333333e-05,
"loss": 2.2888,
"step": 200
},
{
"epoch": 0.09331259720062209,
"grad_norm": 0.3986590504646301,
"learning_rate": 2e-05,
"loss": 2.2411,
"step": 300
},
{
"epoch": 0.12441679626749612,
"grad_norm": 0.6170083284378052,
"learning_rate": 1.9941980734479214e-05,
"loss": 2.1325,
"step": 400
},
{
"epoch": 0.15552099533437014,
"grad_norm": 0.5896138548851013,
"learning_rate": 1.9768596184951174e-05,
"loss": 2.0947,
"step": 500
},
{
"epoch": 0.18662519440124417,
"grad_norm": 0.69114750623703,
"learning_rate": 1.948185828025913e-05,
"loss": 2.0607,
"step": 600
},
{
"epoch": 0.2177293934681182,
"grad_norm": 0.8546278476715088,
"learning_rate": 1.908509428492852e-05,
"loss": 1.9856,
"step": 700
},
{
"epoch": 0.24883359253499224,
"grad_norm": 0.6112543344497681,
"learning_rate": 1.8582908190078184e-05,
"loss": 1.9717,
"step": 800
},
{
"epoch": 0.27993779160186627,
"grad_norm": 0.8754922747612,
"learning_rate": 1.7981127289383718e-05,
"loss": 1.9524,
"step": 900
},
{
"epoch": 0.3110419906687403,
"grad_norm": 0.8028755187988281,
"learning_rate": 1.728673456001766e-05,
"loss": 1.9325,
"step": 1000
},
{
"epoch": 0.3421461897356143,
"grad_norm": 0.7798598408699036,
"learning_rate": 1.6507787633208173e-05,
"loss": 1.9153,
"step": 1100
},
{
"epoch": 0.37325038880248834,
"grad_norm": 1.0763300657272339,
"learning_rate": 1.5653325294669884e-05,
"loss": 1.8883,
"step": 1200
},
{
"epoch": 0.40435458786936235,
"grad_norm": 1.1706815958023071,
"learning_rate": 1.4733262599862234e-05,
"loss": 1.9038,
"step": 1300
},
{
"epoch": 0.4354587869362364,
"grad_norm": 1.2860356569290161,
"learning_rate": 1.3758275821142382e-05,
"loss": 1.8769,
"step": 1400
},
{
"epoch": 0.4665629860031104,
"grad_norm": 1.268933892250061,
"learning_rate": 1.273967856186909e-05,
"loss": 1.8601,
"step": 1500
},
{
"epoch": 0.4976671850699845,
"grad_norm": 0.9673184156417847,
"learning_rate": 1.1689290475011258e-05,
"loss": 1.9104,
"step": 1600
},
{
"epoch": 0.5287713841368584,
"grad_norm": 1.3028920888900757,
"learning_rate": 1.0619300109631146e-05,
"loss": 1.9084,
"step": 1700
},
{
"epoch": 0.5598755832037325,
"grad_norm": 1.10184645652771,
"learning_rate": 9.542123476751484e-06,
"loss": 1.8605,
"step": 1800
},
{
"epoch": 0.5909797822706065,
"grad_norm": 1.1696065664291382,
"learning_rate": 8.470259975787438e-06,
"loss": 1.8693,
"step": 1900
},
{
"epoch": 0.6220839813374806,
"grad_norm": 1.0528351068496704,
"learning_rate": 7.416147353351909e-06,
"loss": 1.8479,
"step": 2000
},
{
"epoch": 0.6531881804043546,
"grad_norm": 0.8877471089363098,
"learning_rate": 6.392017377470867e-06,
"loss": 1.8941,
"step": 2100
},
{
"epoch": 0.6842923794712286,
"grad_norm": 1.711288571357727,
"learning_rate": 5.409753901944006e-06,
"loss": 1.8153,
"step": 2200
},
{
"epoch": 0.7153965785381027,
"grad_norm": 1.6181174516677856,
"learning_rate": 4.48075496785092e-06,
"loss": 1.8127,
"step": 2300
},
{
"epoch": 0.7465007776049767,
"grad_norm": 0.8964147567749023,
"learning_rate": 3.615800542356738e-06,
"loss": 1.9051,
"step": 2400
},
{
"epoch": 0.7776049766718507,
"grad_norm": 1.0576339960098267,
"learning_rate": 2.8249274295566863e-06,
"loss": 1.9044,
"step": 2500
},
{
"epoch": 0.8087091757387247,
"grad_norm": 1.3190217018127441,
"learning_rate": 2.1173128048757307e-06,
"loss": 1.7841,
"step": 2600
},
{
"epoch": 0.8398133748055988,
"grad_norm": 1.4849908351898193,
"learning_rate": 1.501167724473016e-06,
"loss": 1.843,
"step": 2700
},
{
"epoch": 0.8709175738724728,
"grad_norm": 1.069494366645813,
"learning_rate": 9.836418453523833e-07,
"loss": 1.8534,
"step": 2800
},
{
"epoch": 0.9020217729393468,
"grad_norm": 1.185276746749878,
"learning_rate": 5.707404617927526e-07,
"loss": 1.8983,
"step": 2900
},
{
"epoch": 0.9331259720062208,
"grad_norm": 1.2488497495651245,
"learning_rate": 2.672548207954495e-07,
"loss": 1.8157,
"step": 3000
},
{
"epoch": 0.9642301710730948,
"grad_norm": 1.7197282314300537,
"learning_rate": 7.670652515782917e-08,
"loss": 1.8261,
"step": 3100
},
{
"epoch": 0.995334370139969,
"grad_norm": 0.9283524751663208,
"learning_rate": 1.3066693117191886e-09,
"loss": 1.8391,
"step": 3200
},
{
"epoch": 1.0,
"step": 3215,
"total_flos": 5.85604153344e+16,
"train_loss": 1.938271311616082,
"train_runtime": 1682.5697,
"train_samples_per_second": 3.822,
"train_steps_per_second": 1.911
}
],
"logging_steps": 100,
"max_steps": 3215,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 5.85604153344e+16,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}