klora_2000_skill / 104 /trainer_state.json
RayDu0010's picture
Upload folder using huggingface_hub
fa02d5d verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 229,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02188183807439825,
"grad_norm": 1.2056468725204468,
"learning_rate": 2.068965517241379e-06,
"loss": 1.3589,
"step": 5
},
{
"epoch": 0.0437636761487965,
"grad_norm": 0.7383206486701965,
"learning_rate": 4.655172413793104e-06,
"loss": 1.3299,
"step": 10
},
{
"epoch": 0.06564551422319474,
"grad_norm": 0.5696788430213928,
"learning_rate": 7.241379310344828e-06,
"loss": 1.3287,
"step": 15
},
{
"epoch": 0.087527352297593,
"grad_norm": 0.4898790419101715,
"learning_rate": 9.827586206896551e-06,
"loss": 1.3286,
"step": 20
},
{
"epoch": 0.10940919037199125,
"grad_norm": 0.6403371691703796,
"learning_rate": 1.2413793103448277e-05,
"loss": 1.3167,
"step": 25
},
{
"epoch": 0.13129102844638948,
"grad_norm": 0.6023938655853271,
"learning_rate": 1.5e-05,
"loss": 1.3163,
"step": 30
},
{
"epoch": 0.15317286652078774,
"grad_norm": 0.4848966598510742,
"learning_rate": 1.7586206896551724e-05,
"loss": 1.27,
"step": 35
},
{
"epoch": 0.175054704595186,
"grad_norm": 0.6767584681510925,
"learning_rate": 2.017241379310345e-05,
"loss": 1.2358,
"step": 40
},
{
"epoch": 0.19693654266958424,
"grad_norm": 0.42833346128463745,
"learning_rate": 2.275862068965517e-05,
"loss": 1.2356,
"step": 45
},
{
"epoch": 0.2188183807439825,
"grad_norm": 0.5033416152000427,
"learning_rate": 2.5344827586206897e-05,
"loss": 1.1764,
"step": 50
},
{
"epoch": 0.24070021881838075,
"grad_norm": 0.5110407471656799,
"learning_rate": 2.793103448275862e-05,
"loss": 1.1782,
"step": 55
},
{
"epoch": 0.26258205689277897,
"grad_norm": 0.5004281997680664,
"learning_rate": 2.9999937352806748e-05,
"loss": 1.2135,
"step": 60
},
{
"epoch": 0.2844638949671772,
"grad_norm": 0.5905261039733887,
"learning_rate": 2.9997744755987852e-05,
"loss": 1.1598,
"step": 65
},
{
"epoch": 0.3063457330415755,
"grad_norm": 0.46601855754852295,
"learning_rate": 2.999242032277618e-05,
"loss": 1.1192,
"step": 70
},
{
"epoch": 0.3282275711159737,
"grad_norm": 0.5664127469062805,
"learning_rate": 2.9983965165022473e-05,
"loss": 1.135,
"step": 75
},
{
"epoch": 0.350109409190372,
"grad_norm": 0.5967416763305664,
"learning_rate": 2.9972381048336917e-05,
"loss": 1.1354,
"step": 80
},
{
"epoch": 0.37199124726477023,
"grad_norm": 0.5042751431465149,
"learning_rate": 2.995767039172042e-05,
"loss": 1.0858,
"step": 85
},
{
"epoch": 0.3938730853391685,
"grad_norm": 0.5433419942855835,
"learning_rate": 2.9939836267059482e-05,
"loss": 1.0781,
"step": 90
},
{
"epoch": 0.41575492341356673,
"grad_norm": 0.5662135481834412,
"learning_rate": 2.9918882398484742e-05,
"loss": 1.0949,
"step": 95
},
{
"epoch": 0.437636761487965,
"grad_norm": 0.6487781405448914,
"learning_rate": 2.989481316159328e-05,
"loss": 1.0721,
"step": 100
},
{
"epoch": 0.45951859956236324,
"grad_norm": 0.5745230317115784,
"learning_rate": 2.9867633582534904e-05,
"loss": 1.0192,
"step": 105
},
{
"epoch": 0.4814004376367615,
"grad_norm": 0.7092564702033997,
"learning_rate": 2.9837349336962612e-05,
"loss": 1.0884,
"step": 110
},
{
"epoch": 0.5032822757111597,
"grad_norm": 0.6593285202980042,
"learning_rate": 2.9803966748847366e-05,
"loss": 0.9648,
"step": 115
},
{
"epoch": 0.5251641137855579,
"grad_norm": 0.751462996006012,
"learning_rate": 2.976749278915754e-05,
"loss": 0.9229,
"step": 120
},
{
"epoch": 0.5470459518599562,
"grad_norm": 0.6278636455535889,
"learning_rate": 2.9727935074403228e-05,
"loss": 1.0129,
"step": 125
},
{
"epoch": 0.5689277899343544,
"grad_norm": 0.7625859379768372,
"learning_rate": 2.9685301865045768e-05,
"loss": 0.9795,
"step": 130
},
{
"epoch": 0.5908096280087527,
"grad_norm": 0.7692922949790955,
"learning_rate": 2.9639602063772777e-05,
"loss": 0.9894,
"step": 135
},
{
"epoch": 0.612691466083151,
"grad_norm": 0.7171132564544678,
"learning_rate": 2.959084521363911e-05,
"loss": 0.9059,
"step": 140
},
{
"epoch": 0.6345733041575492,
"grad_norm": 0.9169694781303406,
"learning_rate": 2.9539041496074043e-05,
"loss": 0.9285,
"step": 145
},
{
"epoch": 0.6564551422319475,
"grad_norm": 0.8271639347076416,
"learning_rate": 2.9484201728755205e-05,
"loss": 0.8914,
"step": 150
},
{
"epoch": 0.6783369803063457,
"grad_norm": 0.8969722986221313,
"learning_rate": 2.9426337363349627e-05,
"loss": 0.8813,
"step": 155
},
{
"epoch": 0.700218818380744,
"grad_norm": 0.8180518746376038,
"learning_rate": 2.9365460483122385e-05,
"loss": 0.8638,
"step": 160
},
{
"epoch": 0.7221006564551422,
"grad_norm": 0.9082233905792236,
"learning_rate": 2.9301583800413363e-05,
"loss": 0.9084,
"step": 165
},
{
"epoch": 0.7439824945295405,
"grad_norm": 0.8155914545059204,
"learning_rate": 2.923472065398268e-05,
"loss": 0.8917,
"step": 170
},
{
"epoch": 0.7658643326039387,
"grad_norm": 0.8886885643005371,
"learning_rate": 2.916488500622527e-05,
"loss": 0.8079,
"step": 175
},
{
"epoch": 0.787746170678337,
"grad_norm": 0.8544840812683105,
"learning_rate": 2.909209144025524e-05,
"loss": 0.8317,
"step": 180
},
{
"epoch": 0.8096280087527352,
"grad_norm": 0.8505244851112366,
"learning_rate": 2.9016355156860625e-05,
"loss": 0.7849,
"step": 185
},
{
"epoch": 0.8315098468271335,
"grad_norm": 0.9838951826095581,
"learning_rate": 2.8937691971329155e-05,
"loss": 0.8021,
"step": 190
},
{
"epoch": 0.8533916849015317,
"grad_norm": 0.9686183333396912,
"learning_rate": 2.8856118310145687e-05,
"loss": 0.8038,
"step": 195
},
{
"epoch": 0.87527352297593,
"grad_norm": 0.9777634739875793,
"learning_rate": 2.8771651207562043e-05,
"loss": 0.773,
"step": 200
},
{
"epoch": 0.8971553610503282,
"grad_norm": 1.0718952417373657,
"learning_rate": 2.8684308302039878e-05,
"loss": 0.723,
"step": 205
},
{
"epoch": 0.9190371991247265,
"grad_norm": 1.031557321548462,
"learning_rate": 2.8594107832567424e-05,
"loss": 0.6804,
"step": 210
},
{
"epoch": 0.9409190371991247,
"grad_norm": 0.9825000762939453,
"learning_rate": 2.850106863485082e-05,
"loss": 0.7704,
"step": 215
},
{
"epoch": 0.962800875273523,
"grad_norm": 1.1034126281738281,
"learning_rate": 2.840521013738083e-05,
"loss": 0.7545,
"step": 220
},
{
"epoch": 0.9846827133479212,
"grad_norm": 1.1799556016921997,
"learning_rate": 2.8306552357375753e-05,
"loss": 0.7517,
"step": 225
}
],
"logging_steps": 5,
"max_steps": 1145,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 2000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 3.164877723722056e+17,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}