web_os_genesis_0_2_48_LR_1e-5 / trainer_state.json
Rubywong123's picture
Upload folder using huggingface_hub
fe20841 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.9962264150943396,
"eval_steps": 500,
"global_step": 308,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.00646900269541779,
"grad_norm": 1.6469532608759772,
"learning_rate": 3.2258064516129035e-07,
"loss": 1.7835,
"step": 1
},
{
"epoch": 0.03234501347708895,
"grad_norm": 1.2482964329798694,
"learning_rate": 1.6129032258064516e-06,
"loss": 1.6751,
"step": 5
},
{
"epoch": 0.0646900269541779,
"grad_norm": 0.8520203638711542,
"learning_rate": 3.225806451612903e-06,
"loss": 1.4066,
"step": 10
},
{
"epoch": 0.09703504043126684,
"grad_norm": 0.24467662319379258,
"learning_rate": 4.838709677419355e-06,
"loss": 0.8625,
"step": 15
},
{
"epoch": 0.1293800539083558,
"grad_norm": 0.17065318217659073,
"learning_rate": 6.451612903225806e-06,
"loss": 0.5845,
"step": 20
},
{
"epoch": 0.16172506738544473,
"grad_norm": 0.12299088556353172,
"learning_rate": 8.064516129032258e-06,
"loss": 0.481,
"step": 25
},
{
"epoch": 0.1940700808625337,
"grad_norm": 0.10618332452588639,
"learning_rate": 9.67741935483871e-06,
"loss": 0.4052,
"step": 30
},
{
"epoch": 0.22641509433962265,
"grad_norm": 0.1033707741023481,
"learning_rate": 9.994855706800666e-06,
"loss": 0.3419,
"step": 35
},
{
"epoch": 0.2587601078167116,
"grad_norm": 0.10584999343170592,
"learning_rate": 9.973975156498866e-06,
"loss": 0.3256,
"step": 40
},
{
"epoch": 0.29110512129380056,
"grad_norm": 0.0901285523198311,
"learning_rate": 9.937103907387626e-06,
"loss": 0.2797,
"step": 45
},
{
"epoch": 0.32345013477088946,
"grad_norm": 0.09991878402985224,
"learning_rate": 9.884360495852984e-06,
"loss": 0.2625,
"step": 50
},
{
"epoch": 0.3557951482479784,
"grad_norm": 0.08464546463539248,
"learning_rate": 9.815914485268598e-06,
"loss": 0.2351,
"step": 55
},
{
"epoch": 0.3881401617250674,
"grad_norm": 0.08197436606116067,
"learning_rate": 9.731985920871028e-06,
"loss": 0.2254,
"step": 60
},
{
"epoch": 0.42048517520215634,
"grad_norm": 0.09215717950753108,
"learning_rate": 9.63284462234223e-06,
"loss": 0.2137,
"step": 65
},
{
"epoch": 0.4528301886792453,
"grad_norm": 0.07752856948079369,
"learning_rate": 9.51880931637353e-06,
"loss": 0.1888,
"step": 70
},
{
"epoch": 0.48517520215633425,
"grad_norm": 0.07883922492159129,
"learning_rate": 9.390246611999754e-06,
"loss": 0.1916,
"step": 75
},
{
"epoch": 0.5175202156334232,
"grad_norm": 0.07853251479970542,
"learning_rate": 9.247569821997724e-06,
"loss": 0.1911,
"step": 80
},
{
"epoch": 0.5498652291105122,
"grad_norm": 0.08737319968077989,
"learning_rate": 9.091237634138133e-06,
"loss": 0.1711,
"step": 85
},
{
"epoch": 0.5822102425876011,
"grad_norm": 0.07843101893653079,
"learning_rate": 8.921752636562582e-06,
"loss": 0.1767,
"step": 90
},
{
"epoch": 0.6145552560646901,
"grad_norm": 0.07279664888133161,
"learning_rate": 8.739659702026502e-06,
"loss": 0.1756,
"step": 95
},
{
"epoch": 0.6469002695417789,
"grad_norm": 0.07368129133912499,
"learning_rate": 8.54554423620239e-06,
"loss": 0.1578,
"step": 100
},
{
"epoch": 0.6792452830188679,
"grad_norm": 0.08004273889398619,
"learning_rate": 8.340030295674887e-06,
"loss": 0.1627,
"step": 105
},
{
"epoch": 0.7115902964959568,
"grad_norm": 0.06936426180633778,
"learning_rate": 8.123778581678064e-06,
"loss": 0.1458,
"step": 110
},
{
"epoch": 0.7439353099730458,
"grad_norm": 0.07394716823095644,
"learning_rate": 7.897484316024799e-06,
"loss": 0.1502,
"step": 115
},
{
"epoch": 0.7762803234501348,
"grad_norm": 0.06318411109637828,
"learning_rate": 7.661875006056914e-06,
"loss": 0.1324,
"step": 120
},
{
"epoch": 0.8086253369272237,
"grad_norm": 0.08071371188965755,
"learning_rate": 7.417708105801386e-06,
"loss": 0.1291,
"step": 125
},
{
"epoch": 0.8409703504043127,
"grad_norm": 0.06518774296350069,
"learning_rate": 7.165768580851806e-06,
"loss": 0.1399,
"step": 130
},
{
"epoch": 0.8733153638814016,
"grad_norm": 0.06678829904280204,
"learning_rate": 6.90686638480362e-06,
"loss": 0.1208,
"step": 135
},
{
"epoch": 0.9056603773584906,
"grad_norm": 0.07712741967383008,
"learning_rate": 6.6418338553561225e-06,
"loss": 0.129,
"step": 140
},
{
"epoch": 0.9380053908355795,
"grad_norm": 0.09245765264021505,
"learning_rate": 6.371523038452398e-06,
"loss": 0.129,
"step": 145
},
{
"epoch": 0.9703504043126685,
"grad_norm": 0.0813308979175732,
"learning_rate": 6.096802949059757e-06,
"loss": 0.1406,
"step": 150
},
{
"epoch": 0.9962264150943396,
"eval_loss": 0.1126493588089943,
"eval_runtime": 4.6858,
"eval_samples_per_second": 16.006,
"eval_steps_per_second": 4.055,
"step": 154
},
{
"epoch": 1.0064690026954177,
"grad_norm": 0.11598192209575617,
"learning_rate": 5.818556777396923e-06,
"loss": 0.1432,
"step": 155
},
{
"epoch": 1.0388140161725068,
"grad_norm": 0.06894087767992937,
"learning_rate": 5.537679049589568e-06,
"loss": 0.0953,
"step": 160
},
{
"epoch": 1.0711590296495956,
"grad_norm": 0.07124980246876612,
"learning_rate": 5.255072751882363e-06,
"loss": 0.0936,
"step": 165
},
{
"epoch": 1.1035040431266847,
"grad_norm": 0.07602278070196083,
"learning_rate": 4.971646427652806e-06,
"loss": 0.1001,
"step": 170
},
{
"epoch": 1.1358490566037736,
"grad_norm": 0.07547518876417068,
"learning_rate": 4.688311256559587e-06,
"loss": 0.1,
"step": 175
},
{
"epoch": 1.1681940700808626,
"grad_norm": 0.07204342005275981,
"learning_rate": 4.405978125215627e-06,
"loss": 0.0878,
"step": 180
},
{
"epoch": 1.2005390835579515,
"grad_norm": 0.060953239705185125,
"learning_rate": 4.125554698803241e-06,
"loss": 0.0879,
"step": 185
},
{
"epoch": 1.2328840970350403,
"grad_norm": 0.07580082959742644,
"learning_rate": 3.847942503045776e-06,
"loss": 0.0813,
"step": 190
},
{
"epoch": 1.2652291105121294,
"grad_norm": 0.06974818603848078,
"learning_rate": 3.5740340259168383e-06,
"loss": 0.0796,
"step": 195
},
{
"epoch": 1.2975741239892185,
"grad_norm": 0.06234723254345099,
"learning_rate": 3.3047098484047314e-06,
"loss": 0.088,
"step": 200
},
{
"epoch": 1.3299191374663073,
"grad_norm": 0.06103357802751846,
"learning_rate": 3.040835813556352e-06,
"loss": 0.0909,
"step": 205
},
{
"epoch": 1.3622641509433961,
"grad_norm": 0.05846487357821416,
"learning_rate": 2.783260242901694e-06,
"loss": 0.0809,
"step": 210
},
{
"epoch": 1.3946091644204852,
"grad_norm": 0.06625932022799419,
"learning_rate": 2.5328112092077882e-06,
"loss": 0.0813,
"step": 215
},
{
"epoch": 1.426954177897574,
"grad_norm": 0.08344401168680668,
"learning_rate": 2.2902938743298765e-06,
"loss": 0.0849,
"step": 220
},
{
"epoch": 1.4592991913746631,
"grad_norm": 0.06798398373465986,
"learning_rate": 2.056487900718227e-06,
"loss": 0.0838,
"step": 225
},
{
"epoch": 1.491644204851752,
"grad_norm": 0.06615029905608064,
"learning_rate": 1.8321449449023215e-06,
"loss": 0.0888,
"step": 230
},
{
"epoch": 1.523989218328841,
"grad_norm": 0.0678269709159078,
"learning_rate": 1.6179862410105197e-06,
"loss": 0.0768,
"step": 235
},
{
"epoch": 1.55633423180593,
"grad_norm": 0.05517565703970911,
"learning_rate": 1.4147002820938743e-06,
"loss": 0.0749,
"step": 240
},
{
"epoch": 1.5886792452830187,
"grad_norm": 0.060770439128733256,
"learning_rate": 1.2229406067083566e-06,
"loss": 0.0815,
"step": 245
},
{
"epoch": 1.6210242587601078,
"grad_norm": 0.06213193516806644,
"learning_rate": 1.0433236978713546e-06,
"loss": 0.0783,
"step": 250
},
{
"epoch": 1.6533692722371969,
"grad_norm": 0.07190883052211446,
"learning_rate": 8.764270011470144e-07,
"loss": 0.0762,
"step": 255
},
{
"epoch": 1.6857142857142857,
"grad_norm": 0.0596697946070157,
"learning_rate": 7.227870682320432e-07,
"loss": 0.0755,
"step": 260
},
{
"epoch": 1.7180592991913746,
"grad_norm": 0.05413502291668993,
"learning_rate": 5.828978320101109e-07,
"loss": 0.0832,
"step": 265
},
{
"epoch": 1.7504043126684636,
"grad_norm": 0.07737072813418777,
"learning_rate": 4.572090186203171e-07,
"loss": 0.082,
"step": 270
},
{
"epoch": 1.7827493261455527,
"grad_norm": 0.06289367757714523,
"learning_rate": 3.461247016447372e-07,
"loss": 0.0725,
"step": 275
},
{
"epoch": 1.8150943396226416,
"grad_norm": 0.06648151536128295,
"learning_rate": 2.500020030631356e-07,
"loss": 0.0806,
"step": 280
},
{
"epoch": 1.8474393530997304,
"grad_norm": 0.059849953071029575,
"learning_rate": 1.6914994515114082e-07,
"loss": 0.0758,
"step": 285
},
{
"epoch": 1.8797843665768195,
"grad_norm": 0.06154381772700072,
"learning_rate": 1.0382845701284228e-07,
"loss": 0.0737,
"step": 290
},
{
"epoch": 1.9121293800539083,
"grad_norm": 0.06222625997511854,
"learning_rate": 5.424753894171519e-08,
"loss": 0.0706,
"step": 295
},
{
"epoch": 1.9444743935309972,
"grad_norm": 0.06835624541815795,
"learning_rate": 2.056658729633121e-08,
"loss": 0.0753,
"step": 300
},
{
"epoch": 1.9768194070080862,
"grad_norm": 0.05761761360710151,
"learning_rate": 2.8938820612961494e-09,
"loss": 0.0659,
"step": 305
},
{
"epoch": 1.9962264150943396,
"eval_loss": 0.08172182738780975,
"eval_runtime": 4.3645,
"eval_samples_per_second": 17.184,
"eval_steps_per_second": 4.353,
"step": 308
},
{
"epoch": 1.9962264150943396,
"step": 308,
"total_flos": 1.0209786248256553e+18,
"train_loss": 0.2044623964405679,
"train_runtime": 3413.7868,
"train_samples_per_second": 4.347,
"train_steps_per_second": 0.09
}
],
"logging_steps": 5,
"max_steps": 308,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.0209786248256553e+18,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}