web_real_final_bs_48_LR_1e-5 / trainer_state.json
Rubywong123's picture
Upload folder using huggingface_hub
e9602b0 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.9898386132695756,
"eval_steps": 500,
"global_step": 278,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.007172743574417215,
"grad_norm": 1.7706772687409793,
"learning_rate": 3.5714285714285716e-07,
"loss": 1.8258,
"step": 1
},
{
"epoch": 0.03586371787208607,
"grad_norm": 1.3979460761770042,
"learning_rate": 1.7857142857142859e-06,
"loss": 1.7889,
"step": 5
},
{
"epoch": 0.07172743574417215,
"grad_norm": 0.8391387150686271,
"learning_rate": 3.5714285714285718e-06,
"loss": 1.4752,
"step": 10
},
{
"epoch": 0.10759115361625822,
"grad_norm": 0.29622114410594524,
"learning_rate": 5.357142857142857e-06,
"loss": 0.8542,
"step": 15
},
{
"epoch": 0.1434548714883443,
"grad_norm": 0.1867024790344403,
"learning_rate": 7.1428571428571436e-06,
"loss": 0.5935,
"step": 20
},
{
"epoch": 0.17931858936043035,
"grad_norm": 0.1397115741987044,
"learning_rate": 8.92857142857143e-06,
"loss": 0.4806,
"step": 25
},
{
"epoch": 0.21518230723251644,
"grad_norm": 0.11207900324892817,
"learning_rate": 9.9984209464165e-06,
"loss": 0.3543,
"step": 30
},
{
"epoch": 0.2510460251046025,
"grad_norm": 0.12471379118682165,
"learning_rate": 9.980668045715864e-06,
"loss": 0.3099,
"step": 35
},
{
"epoch": 0.2869097429766886,
"grad_norm": 0.13942124773513623,
"learning_rate": 9.94325872368957e-06,
"loss": 0.2671,
"step": 40
},
{
"epoch": 0.3227734608487747,
"grad_norm": 0.08633657901146753,
"learning_rate": 9.886340617840968e-06,
"loss": 0.2409,
"step": 45
},
{
"epoch": 0.3586371787208607,
"grad_norm": 0.09525370649740562,
"learning_rate": 9.81013835793043e-06,
"loss": 0.2193,
"step": 50
},
{
"epoch": 0.3945008965929468,
"grad_norm": 0.08224573044279379,
"learning_rate": 9.714952679464324e-06,
"loss": 0.1873,
"step": 55
},
{
"epoch": 0.4303646144650329,
"grad_norm": 0.0960148670109882,
"learning_rate": 9.601159236829353e-06,
"loss": 0.1986,
"step": 60
},
{
"epoch": 0.46622833233711897,
"grad_norm": 0.08468885213082336,
"learning_rate": 9.46920712075632e-06,
"loss": 0.181,
"step": 65
},
{
"epoch": 0.502092050209205,
"grad_norm": 0.06651705476346571,
"learning_rate": 9.319617085964177e-06,
"loss": 0.1776,
"step": 70
},
{
"epoch": 0.5379557680812911,
"grad_norm": 0.07914440346820356,
"learning_rate": 9.152979495979064e-06,
"loss": 0.1729,
"step": 75
},
{
"epoch": 0.5738194859533772,
"grad_norm": 0.06318757579211376,
"learning_rate": 8.969951993239177e-06,
"loss": 0.1544,
"step": 80
},
{
"epoch": 0.6096832038254633,
"grad_norm": 0.06806462149173399,
"learning_rate": 8.77125690368052e-06,
"loss": 0.1452,
"step": 85
},
{
"epoch": 0.6455469216975493,
"grad_norm": 0.09204580355773379,
"learning_rate": 8.557678386046429e-06,
"loss": 0.148,
"step": 90
},
{
"epoch": 0.6814106395696354,
"grad_norm": 0.07404375144145035,
"learning_rate": 8.33005933717126e-06,
"loss": 0.1537,
"step": 95
},
{
"epoch": 0.7172743574417214,
"grad_norm": 0.06673338321385565,
"learning_rate": 8.089298065451673e-06,
"loss": 0.154,
"step": 100
},
{
"epoch": 0.7531380753138075,
"grad_norm": 0.07248494023312937,
"learning_rate": 7.836344745633785e-06,
"loss": 0.1415,
"step": 105
},
{
"epoch": 0.7890017931858936,
"grad_norm": 0.06250391175809607,
"learning_rate": 7.572197668907533e-06,
"loss": 0.132,
"step": 110
},
{
"epoch": 0.8248655110579797,
"grad_norm": 0.056578659740463,
"learning_rate": 7.297899303107441e-06,
"loss": 0.112,
"step": 115
},
{
"epoch": 0.8607292289300658,
"grad_norm": 0.06486223998598174,
"learning_rate": 7.014532178568314e-06,
"loss": 0.121,
"step": 120
},
{
"epoch": 0.8965929468021518,
"grad_norm": 0.056771834577397436,
"learning_rate": 6.723214615872585e-06,
"loss": 0.1134,
"step": 125
},
{
"epoch": 0.9324566646742379,
"grad_norm": 0.06319732340937169,
"learning_rate": 6.425096312349881e-06,
"loss": 0.1166,
"step": 130
},
{
"epoch": 0.968320382546324,
"grad_norm": 0.0649079486366395,
"learning_rate": 6.121353804746907e-06,
"loss": 0.1122,
"step": 135
},
{
"epoch": 1.0,
"grad_norm": 0.07566991126398058,
"learning_rate": 5.813185825974419e-06,
"loss": 0.1192,
"step": 140
},
{
"epoch": 1.0,
"eval_loss": 0.1111445426940918,
"eval_runtime": 3.972,
"eval_samples_per_second": 17.12,
"eval_steps_per_second": 4.28,
"step": 140
},
{
"epoch": 1.035863717872086,
"grad_norm": 0.06962658936027795,
"learning_rate": 5.5018085742560745e-06,
"loss": 0.0911,
"step": 145
},
{
"epoch": 1.0717274357441722,
"grad_norm": 0.05916257421087811,
"learning_rate": 5.188450913349674e-06,
"loss": 0.0905,
"step": 150
},
{
"epoch": 1.1075911536162582,
"grad_norm": 0.06400686740251811,
"learning_rate": 4.874349522783313e-06,
"loss": 0.0977,
"step": 155
},
{
"epoch": 1.1434548714883443,
"grad_norm": 0.05927545440254994,
"learning_rate": 4.560744017246284e-06,
"loss": 0.0834,
"step": 160
},
{
"epoch": 1.1793185893604303,
"grad_norm": 0.06649126661802829,
"learning_rate": 4.248872054396215e-06,
"loss": 0.0953,
"step": 165
},
{
"epoch": 1.2151823072325165,
"grad_norm": 0.06680856093283863,
"learning_rate": 3.939964450389728e-06,
"loss": 0.096,
"step": 170
},
{
"epoch": 1.2510460251046025,
"grad_norm": 0.07655116396488153,
"learning_rate": 3.635240322413375e-06,
"loss": 0.0843,
"step": 175
},
{
"epoch": 1.2869097429766887,
"grad_norm": 0.0679118797019963,
"learning_rate": 3.3359022773850673e-06,
"loss": 0.0933,
"step": 180
},
{
"epoch": 1.3227734608487747,
"grad_norm": 0.06352669282993445,
"learning_rate": 3.043131665813988e-06,
"loss": 0.0869,
"step": 185
},
{
"epoch": 1.3586371787208606,
"grad_norm": 0.05614250533642248,
"learning_rate": 2.7580839195498397e-06,
"loss": 0.0784,
"step": 190
},
{
"epoch": 1.3945008965929468,
"grad_norm": 0.06268476802798255,
"learning_rate": 2.4818839918211963e-06,
"loss": 0.0966,
"step": 195
},
{
"epoch": 1.4303646144650328,
"grad_norm": 0.09016849271972376,
"learning_rate": 2.2156219175590623e-06,
"loss": 0.0861,
"step": 200
},
{
"epoch": 1.466228332337119,
"grad_norm": 0.05616043304073107,
"learning_rate": 1.9603485115269743e-06,
"loss": 0.0821,
"step": 205
},
{
"epoch": 1.502092050209205,
"grad_norm": 0.056894314885253386,
"learning_rate": 1.7170712212352187e-06,
"loss": 0.0759,
"step": 210
},
{
"epoch": 1.5379557680812912,
"grad_norm": 0.05749554956523113,
"learning_rate": 1.4867501510057548e-06,
"loss": 0.0779,
"step": 215
},
{
"epoch": 1.5738194859533772,
"grad_norm": 0.06101506302740175,
"learning_rate": 1.2702942728790897e-06,
"loss": 0.0812,
"step": 220
},
{
"epoch": 1.6096832038254631,
"grad_norm": 0.052956740115013924,
"learning_rate": 1.0685578393169054e-06,
"loss": 0.0878,
"step": 225
},
{
"epoch": 1.6455469216975493,
"grad_norm": 0.05444893484606994,
"learning_rate": 8.823370118578628e-07,
"loss": 0.0808,
"step": 230
},
{
"epoch": 1.6814106395696355,
"grad_norm": 0.05577402210270524,
"learning_rate": 7.123667190317396e-07,
"loss": 0.0835,
"step": 235
},
{
"epoch": 1.7172743574417213,
"grad_norm": 0.060795662497347525,
"learning_rate": 5.593177559322776e-07,
"loss": 0.0755,
"step": 240
},
{
"epoch": 1.7531380753138075,
"grad_norm": 0.05146247489312187,
"learning_rate": 4.237941368954124e-07,
"loss": 0.0808,
"step": 245
},
{
"epoch": 1.7890017931858937,
"grad_norm": 0.05742062783254797,
"learning_rate": 3.0633071173062966e-07,
"loss": 0.0772,
"step": 250
},
{
"epoch": 1.8248655110579797,
"grad_norm": 0.06159979933911591,
"learning_rate": 2.0739105491312028e-07,
"loss": 0.0835,
"step": 255
},
{
"epoch": 1.8607292289300656,
"grad_norm": 0.05865983987282493,
"learning_rate": 1.2736563606711384e-07,
"loss": 0.0752,
"step": 260
},
{
"epoch": 1.8965929468021518,
"grad_norm": 0.06382021063164005,
"learning_rate": 6.657027896065982e-08,
"loss": 0.077,
"step": 265
},
{
"epoch": 1.932456664674238,
"grad_norm": 0.04725385570056433,
"learning_rate": 2.5244915093499134e-08,
"loss": 0.0731,
"step": 270
},
{
"epoch": 1.968320382546324,
"grad_norm": 0.053647864100340815,
"learning_rate": 3.5526367970539765e-09,
"loss": 0.0751,
"step": 275
},
{
"epoch": 1.9898386132695756,
"eval_loss": 0.08560756593942642,
"eval_runtime": 3.7696,
"eval_samples_per_second": 18.039,
"eval_steps_per_second": 4.51,
"step": 278
},
{
"epoch": 1.9898386132695756,
"step": 278,
"total_flos": 8.430395390385193e+17,
"train_loss": 0.20757551041009614,
"train_runtime": 3036.6171,
"train_samples_per_second": 4.407,
"train_steps_per_second": 0.092
}
],
"logging_steps": 5,
"max_steps": 278,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 8.430395390385193e+17,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}