Utkarsh524's picture
Upload folder using huggingface_hub
7ac68cc verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 4.0,
"eval_steps": 500,
"global_step": 2252,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.08888888888888889,
"grad_norm": 0.045956578105688095,
"learning_rate": 0.00019564831261101244,
"loss": 0.322,
"step": 50
},
{
"epoch": 0.17777777777777778,
"grad_norm": 0.03815697133541107,
"learning_rate": 0.00019120781527531084,
"loss": 0.1842,
"step": 100
},
{
"epoch": 0.26666666666666666,
"grad_norm": 0.061464082449674606,
"learning_rate": 0.00018676731793960925,
"loss": 0.1801,
"step": 150
},
{
"epoch": 0.35555555555555557,
"grad_norm": 0.0637449324131012,
"learning_rate": 0.00018232682060390765,
"loss": 0.1773,
"step": 200
},
{
"epoch": 0.4444444444444444,
"grad_norm": 0.053989194333553314,
"learning_rate": 0.00017788632326820605,
"loss": 0.17,
"step": 250
},
{
"epoch": 0.5333333333333333,
"grad_norm": 0.06381799280643463,
"learning_rate": 0.00017344582593250445,
"loss": 0.1527,
"step": 300
},
{
"epoch": 0.6222222222222222,
"grad_norm": 0.07270950824022293,
"learning_rate": 0.00016900532859680286,
"loss": 0.1594,
"step": 350
},
{
"epoch": 0.7111111111111111,
"grad_norm": 0.051719773560762405,
"learning_rate": 0.00016456483126110126,
"loss": 0.1535,
"step": 400
},
{
"epoch": 0.8,
"grad_norm": 0.06734076142311096,
"learning_rate": 0.00016012433392539966,
"loss": 0.1431,
"step": 450
},
{
"epoch": 0.8888888888888888,
"grad_norm": 0.04238704591989517,
"learning_rate": 0.00015568383658969806,
"loss": 0.1439,
"step": 500
},
{
"epoch": 0.9777777777777777,
"grad_norm": 0.056586671620607376,
"learning_rate": 0.00015124333925399647,
"loss": 0.1333,
"step": 550
},
{
"epoch": 1.0657777777777777,
"grad_norm": 0.05698104947805405,
"learning_rate": 0.00014680284191829484,
"loss": 0.1474,
"step": 600
},
{
"epoch": 1.1546666666666667,
"grad_norm": 0.054345980286598206,
"learning_rate": 0.00014236234458259324,
"loss": 0.1253,
"step": 650
},
{
"epoch": 1.2435555555555555,
"grad_norm": 0.04708417132496834,
"learning_rate": 0.00013792184724689165,
"loss": 0.1349,
"step": 700
},
{
"epoch": 1.3324444444444445,
"grad_norm": 0.06845518201589584,
"learning_rate": 0.00013348134991119005,
"loss": 0.1144,
"step": 750
},
{
"epoch": 1.4213333333333333,
"grad_norm": 0.0530928298830986,
"learning_rate": 0.00012904085257548845,
"loss": 0.1168,
"step": 800
},
{
"epoch": 1.5102222222222221,
"grad_norm": 0.08338514715433121,
"learning_rate": 0.00012460035523978685,
"loss": 0.1237,
"step": 850
},
{
"epoch": 1.5991111111111111,
"grad_norm": 0.06609191745519638,
"learning_rate": 0.00012015985790408525,
"loss": 0.141,
"step": 900
},
{
"epoch": 1.688,
"grad_norm": 0.08270981907844543,
"learning_rate": 0.00011571936056838366,
"loss": 0.1256,
"step": 950
},
{
"epoch": 1.7768888888888887,
"grad_norm": 0.048950765281915665,
"learning_rate": 0.00011127886323268206,
"loss": 0.1175,
"step": 1000
},
{
"epoch": 1.8657777777777778,
"grad_norm": 0.06318267434835434,
"learning_rate": 0.00010683836589698046,
"loss": 0.1275,
"step": 1050
},
{
"epoch": 1.9546666666666668,
"grad_norm": 0.07041537761688232,
"learning_rate": 0.00010239786856127886,
"loss": 0.124,
"step": 1100
},
{
"epoch": 2.042666666666667,
"grad_norm": 0.0656278058886528,
"learning_rate": 9.795737122557727e-05,
"loss": 0.1327,
"step": 1150
},
{
"epoch": 2.1315555555555554,
"grad_norm": 0.06331595033407211,
"learning_rate": 9.351687388987567e-05,
"loss": 0.1069,
"step": 1200
},
{
"epoch": 2.2204444444444444,
"grad_norm": 0.04332101345062256,
"learning_rate": 8.907637655417407e-05,
"loss": 0.1024,
"step": 1250
},
{
"epoch": 2.3093333333333335,
"grad_norm": 0.06444103270769119,
"learning_rate": 8.463587921847247e-05,
"loss": 0.1026,
"step": 1300
},
{
"epoch": 2.398222222222222,
"grad_norm": 0.06379590928554535,
"learning_rate": 8.019538188277088e-05,
"loss": 0.1189,
"step": 1350
},
{
"epoch": 2.487111111111111,
"grad_norm": 0.0656711533665657,
"learning_rate": 7.575488454706926e-05,
"loss": 0.1085,
"step": 1400
},
{
"epoch": 2.576,
"grad_norm": 0.07427001744508743,
"learning_rate": 7.131438721136767e-05,
"loss": 0.1245,
"step": 1450
},
{
"epoch": 2.664888888888889,
"grad_norm": 0.06245647370815277,
"learning_rate": 6.687388987566607e-05,
"loss": 0.1118,
"step": 1500
},
{
"epoch": 2.7537777777777777,
"grad_norm": 0.08108062297105789,
"learning_rate": 6.243339253996447e-05,
"loss": 0.1137,
"step": 1550
},
{
"epoch": 2.8426666666666667,
"grad_norm": 0.061958227306604385,
"learning_rate": 5.7992895204262874e-05,
"loss": 0.1001,
"step": 1600
},
{
"epoch": 2.9315555555555557,
"grad_norm": 0.056670840829610825,
"learning_rate": 5.3552397868561276e-05,
"loss": 0.1172,
"step": 1650
},
{
"epoch": 3.0195555555555558,
"grad_norm": 0.07005661725997925,
"learning_rate": 4.9111900532859686e-05,
"loss": 0.1099,
"step": 1700
},
{
"epoch": 3.1084444444444443,
"grad_norm": 0.0712517499923706,
"learning_rate": 4.467140319715809e-05,
"loss": 0.0932,
"step": 1750
},
{
"epoch": 3.1973333333333334,
"grad_norm": 0.07057236135005951,
"learning_rate": 4.0230905861456483e-05,
"loss": 0.0982,
"step": 1800
},
{
"epoch": 3.2862222222222224,
"grad_norm": 0.07291365414857864,
"learning_rate": 3.5790408525754886e-05,
"loss": 0.1008,
"step": 1850
},
{
"epoch": 3.375111111111111,
"grad_norm": 0.11394993960857391,
"learning_rate": 3.134991119005329e-05,
"loss": 0.0933,
"step": 1900
},
{
"epoch": 3.464,
"grad_norm": 0.09188514947891235,
"learning_rate": 2.690941385435169e-05,
"loss": 0.0969,
"step": 1950
},
{
"epoch": 3.552888888888889,
"grad_norm": 0.08308542519807816,
"learning_rate": 2.246891651865009e-05,
"loss": 0.1067,
"step": 2000
},
{
"epoch": 3.6417777777777776,
"grad_norm": 0.10817820578813553,
"learning_rate": 1.8028419182948492e-05,
"loss": 0.0931,
"step": 2050
},
{
"epoch": 3.7306666666666666,
"grad_norm": 0.06676340103149414,
"learning_rate": 1.3587921847246892e-05,
"loss": 0.0984,
"step": 2100
},
{
"epoch": 3.8195555555555556,
"grad_norm": 0.0792100727558136,
"learning_rate": 9.147424511545293e-06,
"loss": 0.1007,
"step": 2150
},
{
"epoch": 3.9084444444444446,
"grad_norm": 0.06362316757440567,
"learning_rate": 4.706927175843695e-06,
"loss": 0.0935,
"step": 2200
},
{
"epoch": 3.997333333333333,
"grad_norm": 0.08906163275241852,
"learning_rate": 2.6642984014209596e-07,
"loss": 0.0958,
"step": 2250
}
],
"logging_steps": 50,
"max_steps": 2252,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 200,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.468874889363456e+18,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}