Python-UML-v0.3 / trainer_state.json
HA-Siala's picture
Upload folder using huggingface_hub
f60c5ea verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 4.0,
"eval_steps": 50,
"global_step": 2089,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.09573958831977022,
"grad_norm": 1.268161654472351,
"learning_rate": 2.3467432950191573e-06,
"loss": 0.7797,
"step": 50
},
{
"epoch": 0.19147917663954045,
"grad_norm": 1.4730911254882812,
"learning_rate": 4.741379310344828e-06,
"loss": 0.5423,
"step": 100
},
{
"epoch": 0.2872187649593107,
"grad_norm": 0.5494027137756348,
"learning_rate": 7.136015325670499e-06,
"loss": 0.3713,
"step": 150
},
{
"epoch": 0.3829583532790809,
"grad_norm": 0.4706728458404541,
"learning_rate": 9.530651340996169e-06,
"loss": 0.3247,
"step": 200
},
{
"epoch": 0.47869794159885115,
"grad_norm": 0.6062602400779724,
"learning_rate": 1.192528735632184e-05,
"loss": 0.2856,
"step": 250
},
{
"epoch": 0.5744375299186214,
"grad_norm": 0.7379469275474548,
"learning_rate": 1.431992337164751e-05,
"loss": 0.2889,
"step": 300
},
{
"epoch": 0.6701771182383915,
"grad_norm": 0.7957981824874878,
"learning_rate": 1.6666666666666667e-05,
"loss": 0.2783,
"step": 350
},
{
"epoch": 0.7659167065581618,
"grad_norm": 0.8965973854064941,
"learning_rate": 1.9061302681992337e-05,
"loss": 0.2609,
"step": 400
},
{
"epoch": 0.861656294877932,
"grad_norm": 0.5201160311698914,
"learning_rate": 2.145593869731801e-05,
"loss": 0.2752,
"step": 450
},
{
"epoch": 0.9573958831977023,
"grad_norm": 0.733790934085846,
"learning_rate": 2.385057471264368e-05,
"loss": 0.2374,
"step": 500
},
{
"epoch": 0.9995213020584012,
"eval_loss": 0.4093788266181946,
"eval_runtime": 193.3736,
"eval_samples_per_second": 1.205,
"eval_steps_per_second": 0.155,
"step": 522
},
{
"epoch": 1.0531354715174726,
"grad_norm": 0.5924952030181885,
"learning_rate": 2.624521072796935e-05,
"loss": 0.2357,
"step": 550
},
{
"epoch": 1.1488750598372426,
"grad_norm": 0.6746445894241333,
"learning_rate": 2.863984674329502e-05,
"loss": 0.243,
"step": 600
},
{
"epoch": 1.2446146481570128,
"grad_norm": 0.7088050842285156,
"learning_rate": 3.103448275862069e-05,
"loss": 0.2491,
"step": 650
},
{
"epoch": 1.340354236476783,
"grad_norm": 0.4997643828392029,
"learning_rate": 3.342911877394636e-05,
"loss": 0.2221,
"step": 700
},
{
"epoch": 1.4360938247965533,
"grad_norm": 0.5417702794075012,
"learning_rate": 3.582375478927204e-05,
"loss": 0.2376,
"step": 750
},
{
"epoch": 1.5318334131163236,
"grad_norm": 0.8095722198486328,
"learning_rate": 3.82183908045977e-05,
"loss": 0.2369,
"step": 800
},
{
"epoch": 1.6275730014360938,
"grad_norm": 0.6943379044532776,
"learning_rate": 4.061302681992337e-05,
"loss": 0.2482,
"step": 850
},
{
"epoch": 1.723312589755864,
"grad_norm": 0.6098237633705139,
"learning_rate": 4.3007662835249046e-05,
"loss": 0.2245,
"step": 900
},
{
"epoch": 1.8190521780756344,
"grad_norm": 0.4689110815525055,
"learning_rate": 4.5402298850574716e-05,
"loss": 0.2276,
"step": 950
},
{
"epoch": 1.9147917663954046,
"grad_norm": 0.6285553574562073,
"learning_rate": 4.7796934865900385e-05,
"loss": 0.2308,
"step": 1000
},
{
"epoch": 1.9990426041168023,
"eval_loss": 0.3900133967399597,
"eval_runtime": 192.4384,
"eval_samples_per_second": 1.211,
"eval_steps_per_second": 0.156,
"step": 1044
},
{
"epoch": 2.010531354715175,
"grad_norm": 0.40644219517707825,
"learning_rate": 4.999988680990267e-05,
"loss": 0.227,
"step": 1050
},
{
"epoch": 2.106270943034945,
"grad_norm": 0.6110271215438843,
"learning_rate": 4.9979373926052865e-05,
"loss": 0.2053,
"step": 1100
},
{
"epoch": 2.2020105313547154,
"grad_norm": 0.7076897025108337,
"learning_rate": 4.992352246040183e-05,
"loss": 0.232,
"step": 1150
},
{
"epoch": 2.297750119674485,
"grad_norm": 0.5460345149040222,
"learning_rate": 4.983241142660274e-05,
"loss": 0.2202,
"step": 1200
},
{
"epoch": 2.393489707994256,
"grad_norm": 0.774861216545105,
"learning_rate": 4.970616972038894e-05,
"loss": 0.2135,
"step": 1250
},
{
"epoch": 2.4892292963140257,
"grad_norm": 0.489745557308197,
"learning_rate": 4.954497593722384e-05,
"loss": 0.2125,
"step": 1300
},
{
"epoch": 2.584968884633796,
"grad_norm": 0.43657568097114563,
"learning_rate": 4.9349058119640005e-05,
"loss": 0.1994,
"step": 1350
},
{
"epoch": 2.680708472953566,
"grad_norm": 0.5336562395095825,
"learning_rate": 4.911869343462504e-05,
"loss": 0.2077,
"step": 1400
},
{
"epoch": 2.7764480612733364,
"grad_norm": 0.3822609782218933,
"learning_rate": 4.88542077815105e-05,
"loss": 0.2077,
"step": 1450
},
{
"epoch": 2.8721876495931067,
"grad_norm": 0.41932567954063416,
"learning_rate": 4.8555975330918736e-05,
"loss": 0.1983,
"step": 1500
},
{
"epoch": 2.967927237912877,
"grad_norm": 0.5260158181190491,
"learning_rate": 4.822441799541979e-05,
"loss": 0.2149,
"step": 1550
},
{
"epoch": 2.998563906175203,
"eval_loss": 0.3759535849094391,
"eval_runtime": 192.4413,
"eval_samples_per_second": 1.211,
"eval_steps_per_second": 0.156,
"step": 1566
},
{
"epoch": 3.063666826232647,
"grad_norm": 0.35727375745773315,
"learning_rate": 4.786000483264725e-05,
"loss": 0.183,
"step": 1600
},
{
"epoch": 3.1594064145524174,
"grad_norm": 0.44125649333000183,
"learning_rate": 4.7463251381717515e-05,
"loss": 0.2217,
"step": 1650
},
{
"epoch": 3.2551460028721877,
"grad_norm": 0.39317113161087036,
"learning_rate": 4.703471893389122e-05,
"loss": 0.1947,
"step": 1700
},
{
"epoch": 3.350885591191958,
"grad_norm": 0.5991040468215942,
"learning_rate": 4.6575013738508575e-05,
"loss": 0.2082,
"step": 1750
},
{
"epoch": 3.446625179511728,
"grad_norm": 0.4587060213088989,
"learning_rate": 4.608478614532215e-05,
"loss": 0.1812,
"step": 1800
},
{
"epoch": 3.5423647678314985,
"grad_norm": 0.6657220125198364,
"learning_rate": 4.556472968444017e-05,
"loss": 0.17,
"step": 1850
},
{
"epoch": 3.6381043561512687,
"grad_norm": 0.6673758029937744,
"learning_rate": 4.501558008518231e-05,
"loss": 0.1833,
"step": 1900
},
{
"epoch": 3.7338439444710385,
"grad_norm": 0.5800417065620422,
"learning_rate": 4.4438114235235655e-05,
"loss": 0.1819,
"step": 1950
},
{
"epoch": 3.829583532790809,
"grad_norm": 0.9954800605773926,
"learning_rate": 4.3833149081583604e-05,
"loss": 0.2147,
"step": 2000
},
{
"epoch": 3.925323121110579,
"grad_norm": 0.3738028109073639,
"learning_rate": 4.320154047476237e-05,
"loss": 0.1718,
"step": 2050
},
{
"epoch": 4.0,
"eval_loss": 0.37361159920692444,
"eval_runtime": 192.3984,
"eval_samples_per_second": 1.211,
"eval_steps_per_second": 0.156,
"step": 2089
}
],
"logging_steps": 50,
"max_steps": 5220,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"total_flos": 8.597365381542052e+17,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}