aitaNeutral / checkpoint-1000 /trainer_state.json
alhosseini's picture
Upload folder using huggingface_hub
f4d8755 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.7479431563201197,
"eval_steps": 500,
"global_step": 1000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.018698578908002993,
"grad_norm": 9.572704315185547,
"learning_rate": 3.048780487804878e-05,
"loss": 2.5708,
"step": 25
},
{
"epoch": 0.037397157816005985,
"grad_norm": 6.995045185089111,
"learning_rate": 4.999405067699773e-05,
"loss": 2.6291,
"step": 50
},
{
"epoch": 0.05609573672400898,
"grad_norm": 7.9323601722717285,
"learning_rate": 4.991513829823945e-05,
"loss": 2.7109,
"step": 75
},
{
"epoch": 0.07479431563201197,
"grad_norm": 7.052701473236084,
"learning_rate": 4.9744751398665467e-05,
"loss": 2.857,
"step": 100
},
{
"epoch": 0.09349289454001496,
"grad_norm": 6.857184410095215,
"learning_rate": 4.948351554413879e-05,
"loss": 2.8639,
"step": 125
},
{
"epoch": 0.11219147344801796,
"grad_norm": 6.162296772003174,
"learning_rate": 4.9132389847321244e-05,
"loss": 2.7908,
"step": 150
},
{
"epoch": 0.13089005235602094,
"grad_norm": 96.66787719726562,
"learning_rate": 4.869266344634556e-05,
"loss": 3.0533,
"step": 175
},
{
"epoch": 0.14958863126402394,
"grad_norm": 6.0340375900268555,
"learning_rate": 4.816595077181764e-05,
"loss": 2.8847,
"step": 200
},
{
"epoch": 0.1682872101720269,
"grad_norm": 5.803041934967041,
"learning_rate": 4.755418561952595e-05,
"loss": 3.2308,
"step": 225
},
{
"epoch": 0.1869857890800299,
"grad_norm": 5.644896030426025,
"learning_rate": 4.6859614050619644e-05,
"loss": 3.66,
"step": 250
},
{
"epoch": 0.2056843679880329,
"grad_norm": 6.0701751708984375,
"learning_rate": 4.608478614532215e-05,
"loss": 3.2233,
"step": 275
},
{
"epoch": 0.2243829468960359,
"grad_norm": 7.215365409851074,
"learning_rate": 4.523254664045583e-05,
"loss": 3.3212,
"step": 300
},
{
"epoch": 0.24308152580403888,
"grad_norm": 42.88441848754883,
"learning_rate": 4.430602448515173e-05,
"loss": 5.5739,
"step": 325
},
{
"epoch": 0.2617801047120419,
"grad_norm": 6.698093414306641,
"learning_rate": 4.330862135308981e-05,
"loss": 6.687,
"step": 350
},
{
"epoch": 0.28047868362004486,
"grad_norm": 7.0725321769714355,
"learning_rate": 4.2243999153446444e-05,
"loss": 6.676,
"step": 375
},
{
"epoch": 0.2991772625280479,
"grad_norm": 4.725111961364746,
"learning_rate": 4.111606658640209e-05,
"loss": 6.6665,
"step": 400
},
{
"epoch": 0.31787584143605085,
"grad_norm": 6.662052631378174,
"learning_rate": 3.9928964792569655e-05,
"loss": 6.9399,
"step": 425
},
{
"epoch": 0.3365744203440538,
"grad_norm": 8.17912769317627,
"learning_rate": 3.868705214903098e-05,
"loss": 6.2064,
"step": 450
},
{
"epoch": 0.35527299925205685,
"grad_norm": 4.715211868286133,
"learning_rate": 3.7394888267801986e-05,
"loss": 5.9489,
"step": 475
},
{
"epoch": 0.3739715781600598,
"grad_norm": 7.79115629196167,
"learning_rate": 3.6057217255475034e-05,
"loss": 5.7699,
"step": 500
},
{
"epoch": 0.39267015706806285,
"grad_norm": 40.13149642944336,
"learning_rate": 3.4678950295500015e-05,
"loss": 5.6122,
"step": 525
},
{
"epoch": 0.4113687359760658,
"grad_norm": 5.954702854156494,
"learning_rate": 3.326514761705209e-05,
"loss": 5.4873,
"step": 550
},
{
"epoch": 0.4300673148840688,
"grad_norm": 6.408680438995361,
"learning_rate": 3.182099991668653e-05,
"loss": 5.4693,
"step": 575
},
{
"epoch": 0.4487658937920718,
"grad_norm": 5.902630805969238,
"learning_rate": 3.035180930098997e-05,
"loss": 5.3252,
"step": 600
},
{
"epoch": 0.4674644727000748,
"grad_norm": 6.601041793823242,
"learning_rate": 2.8862969820196016e-05,
"loss": 5.2455,
"step": 625
},
{
"epoch": 0.48616305160807777,
"grad_norm": 7.046901702880859,
"learning_rate": 2.7359947664234937e-05,
"loss": 5.1306,
"step": 650
},
{
"epoch": 0.5048616305160808,
"grad_norm": 5.962765216827393,
"learning_rate": 2.5848261093926563e-05,
"loss": 5.0464,
"step": 675
},
{
"epoch": 0.5235602094240838,
"grad_norm": 6.858371734619141,
"learning_rate": 2.433346018099786e-05,
"loss": 5.0013,
"step": 700
},
{
"epoch": 0.5422587883320867,
"grad_norm": 5.1485443115234375,
"learning_rate": 2.2821106431308544e-05,
"loss": 4.8979,
"step": 725
},
{
"epoch": 0.5609573672400897,
"grad_norm": 5.508594512939453,
"learning_rate": 2.1316752366096948e-05,
"loss": 4.8487,
"step": 750
},
{
"epoch": 0.5796559461480928,
"grad_norm": 6.006443023681641,
"learning_rate": 1.982592113621237e-05,
"loss": 4.8909,
"step": 775
},
{
"epoch": 0.5983545250560958,
"grad_norm": 5.325293064117432,
"learning_rate": 1.835408624417918e-05,
"loss": 4.7447,
"step": 800
},
{
"epoch": 0.6170531039640987,
"grad_norm": 5.54567289352417,
"learning_rate": 1.690665144854198e-05,
"loss": 4.7865,
"step": 825
},
{
"epoch": 0.6357516828721017,
"grad_norm": 5.164028167724609,
"learning_rate": 1.5488930924271722e-05,
"loss": 4.72,
"step": 850
},
{
"epoch": 0.6544502617801047,
"grad_norm": 6.280778884887695,
"learning_rate": 1.4106129752073022e-05,
"loss": 4.6891,
"step": 875
},
{
"epoch": 0.6731488406881077,
"grad_norm": 5.060294151306152,
"learning_rate": 1.276332480822468e-05,
"loss": 4.677,
"step": 900
},
{
"epoch": 0.6918474195961107,
"grad_norm": 5.764603137969971,
"learning_rate": 1.1465446125115758e-05,
"loss": 4.5532,
"step": 925
},
{
"epoch": 0.7105459985041137,
"grad_norm": 5.3928046226501465,
"learning_rate": 1.0217258790910448e-05,
"loss": 4.5508,
"step": 950
},
{
"epoch": 0.7292445774121167,
"grad_norm": 6.133120536804199,
"learning_rate": 9.023345454796459e-06,
"loss": 4.5482,
"step": 975
},
{
"epoch": 0.7479431563201197,
"grad_norm": 5.548661708831787,
"learning_rate": 7.88808950204783e-06,
"loss": 4.5212,
"step": 1000
}
],
"logging_steps": 25,
"max_steps": 1337,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"total_flos": 9.2220514369536e+16,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}