results-3 / checkpoint-63 /trainer_state.json
sharkMeow's picture
Training in progress, epoch 2, checkpoint
5c6f7e3
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.01008064516129,
"eval_steps": 25,
"global_step": 63,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03,
"learning_rate": 5e-05,
"loss": 4.2505,
"step": 1
},
{
"epoch": 0.03,
"eval_loss": 4.052073955535889,
"eval_runtime": 60.4806,
"eval_samples_per_second": 4.134,
"eval_steps_per_second": 4.134,
"step": 1
},
{
"epoch": 0.06,
"learning_rate": 0.0001,
"loss": 4.2141,
"step": 2
},
{
"epoch": 0.1,
"learning_rate": 0.00015,
"loss": 4.0033,
"step": 3
},
{
"epoch": 0.13,
"learning_rate": 0.0002,
"loss": 3.9023,
"step": 4
},
{
"epoch": 0.16,
"learning_rate": 0.00025,
"loss": 4.0955,
"step": 5
},
{
"epoch": 0.19,
"learning_rate": 0.0003,
"loss": 3.8902,
"step": 6
},
{
"epoch": 0.23,
"learning_rate": 0.00035,
"loss": 3.9076,
"step": 7
},
{
"epoch": 0.26,
"learning_rate": 0.0004,
"loss": 3.7008,
"step": 8
},
{
"epoch": 0.29,
"learning_rate": 0.00045000000000000004,
"loss": 3.7071,
"step": 9
},
{
"epoch": 0.32,
"learning_rate": 0.0005,
"loss": 3.6304,
"step": 10
},
{
"epoch": 0.35,
"learning_rate": 0.0004999782204181027,
"loss": 3.6851,
"step": 11
},
{
"epoch": 0.39,
"learning_rate": 0.0004999128854672123,
"loss": 3.4864,
"step": 12
},
{
"epoch": 0.42,
"learning_rate": 0.000499804006531072,
"loss": 3.3743,
"step": 13
},
{
"epoch": 0.45,
"learning_rate": 0.0004996516025803834,
"loss": 3.2944,
"step": 14
},
{
"epoch": 0.48,
"learning_rate": 0.0004994557001695014,
"loss": 3.1978,
"step": 15
},
{
"epoch": 0.52,
"learning_rate": 0.0004992163334318065,
"loss": 3.0865,
"step": 16
},
{
"epoch": 0.55,
"learning_rate": 0.0004989335440737586,
"loss": 3.0893,
"step": 17
},
{
"epoch": 0.58,
"learning_rate": 0.0004986073813676296,
"loss": 2.9218,
"step": 18
},
{
"epoch": 0.61,
"learning_rate": 0.0004982379021429183,
"loss": 2.8753,
"step": 19
},
{
"epoch": 0.65,
"learning_rate": 0.0004978251707764492,
"loss": 2.7118,
"step": 20
},
{
"epoch": 0.68,
"learning_rate": 0.0004973692591811548,
"loss": 2.6693,
"step": 21
},
{
"epoch": 0.71,
"learning_rate": 0.0004968702467935468,
"loss": 2.474,
"step": 22
},
{
"epoch": 0.74,
"learning_rate": 0.0004963282205598744,
"loss": 2.3912,
"step": 23
},
{
"epoch": 0.77,
"learning_rate": 0.0004957432749209755,
"loss": 2.2401,
"step": 24
},
{
"epoch": 0.81,
"learning_rate": 0.0004951155117958217,
"loss": 2.1461,
"step": 25
},
{
"epoch": 0.81,
"eval_loss": 2.080416202545166,
"eval_runtime": 61.4527,
"eval_samples_per_second": 4.068,
"eval_steps_per_second": 4.068,
"step": 25
},
{
"epoch": 0.84,
"learning_rate": 0.0004944450405637602,
"loss": 2.1114,
"step": 26
},
{
"epoch": 0.87,
"learning_rate": 0.0004937319780454559,
"loss": 1.8994,
"step": 27
},
{
"epoch": 0.9,
"learning_rate": 0.0004929764484825369,
"loss": 2.0342,
"step": 28
},
{
"epoch": 0.94,
"learning_rate": 0.0004921785835159472,
"loss": 2.0333,
"step": 29
},
{
"epoch": 0.97,
"learning_rate": 0.0004913385221630097,
"loss": 1.8424,
"step": 30
},
{
"epoch": 1.0,
"learning_rate": 0.0004904564107932048,
"loss": 1.7993,
"step": 31
},
{
"epoch": 1.01,
"learning_rate": 0.000489532403102667,
"loss": 1.7643,
"step": 32
},
{
"epoch": 1.04,
"learning_rate": 0.0004885666600874058,
"loss": 1.8718,
"step": 33
},
{
"epoch": 1.07,
"learning_rate": 0.00048755935001525374,
"loss": 1.7178,
"step": 34
},
{
"epoch": 1.11,
"learning_rate": 0.0004865106483965487,
"loss": 1.7185,
"step": 35
},
{
"epoch": 1.14,
"learning_rate": 0.0004854207379535529,
"loss": 1.7702,
"step": 36
},
{
"epoch": 1.17,
"learning_rate": 0.00048428980858861636,
"loss": 1.7089,
"step": 37
},
{
"epoch": 1.2,
"learning_rate": 0.00048311805735108893,
"loss": 1.6515,
"step": 38
},
{
"epoch": 1.24,
"learning_rate": 0.000481905688402987,
"loss": 1.7014,
"step": 39
},
{
"epoch": 1.27,
"learning_rate": 0.0004806529129834207,
"loss": 1.5697,
"step": 40
},
{
"epoch": 1.3,
"learning_rate": 0.0004793599493717891,
"loss": 1.6295,
"step": 41
},
{
"epoch": 1.33,
"learning_rate": 0.00047802702284974695,
"loss": 1.4599,
"step": 42
},
{
"epoch": 1.36,
"learning_rate": 0.0004766543656619532,
"loss": 1.5082,
"step": 43
},
{
"epoch": 1.4,
"learning_rate": 0.00047524221697560476,
"loss": 1.4499,
"step": 44
},
{
"epoch": 1.43,
"learning_rate": 0.00047379082283876563,
"loss": 1.432,
"step": 45
},
{
"epoch": 1.46,
"learning_rate": 0.0004723004361374953,
"loss": 1.4362,
"step": 46
},
{
"epoch": 1.49,
"learning_rate": 0.0004707713165517877,
"loss": 1.4588,
"step": 47
},
{
"epoch": 1.53,
"learning_rate": 0.0004692037305103247,
"loss": 1.4489,
"step": 48
},
{
"epoch": 1.56,
"learning_rate": 0.0004675979511440549,
"loss": 1.3382,
"step": 49
},
{
"epoch": 1.59,
"learning_rate": 0.00046595425823860404,
"loss": 1.3896,
"step": 50
},
{
"epoch": 1.59,
"eval_loss": 1.4752908945083618,
"eval_runtime": 61.5879,
"eval_samples_per_second": 4.059,
"eval_steps_per_second": 4.059,
"step": 50
},
{
"epoch": 1.62,
"learning_rate": 0.0004642729381855262,
"loss": 1.3671,
"step": 51
},
{
"epoch": 1.66,
"learning_rate": 0.0004625542839324036,
"loss": 1.3629,
"step": 52
},
{
"epoch": 1.69,
"learning_rate": 0.00046079859493180467,
"loss": 1.3171,
"step": 53
},
{
"epoch": 1.72,
"learning_rate": 0.00045900617708910854,
"loss": 1.4639,
"step": 54
},
{
"epoch": 1.75,
"learning_rate": 0.0004571773427092047,
"loss": 1.3031,
"step": 55
},
{
"epoch": 1.78,
"learning_rate": 0.00045531241044207836,
"loss": 1.3329,
"step": 56
},
{
"epoch": 1.82,
"learning_rate": 0.00045341170522729,
"loss": 1.3298,
"step": 57
},
{
"epoch": 1.85,
"learning_rate": 0.00045147555823735875,
"loss": 1.3794,
"step": 58
},
{
"epoch": 1.88,
"learning_rate": 0.00044950430682005996,
"loss": 1.3068,
"step": 59
},
{
"epoch": 1.91,
"learning_rate": 0.00044749829443964704,
"loss": 1.3701,
"step": 60
},
{
"epoch": 1.95,
"learning_rate": 0.0004454578706170075,
"loss": 1.2326,
"step": 61
},
{
"epoch": 1.98,
"learning_rate": 0.0004433833908687633,
"loss": 1.1632,
"step": 62
},
{
"epoch": 2.01,
"learning_rate": 0.00044127521664532704,
"loss": 1.3177,
"step": 63
}
],
"logging_steps": 1,
"max_steps": 248,
"num_train_epochs": 8,
"save_steps": 500,
"total_flos": 8.266246254074266e+16,
"trial_name": null,
"trial_params": null
}