16-clusters-imbalanced-10 / trainer_state.json
MHGanainy's picture
MHGanainy/16-clusters-imbalanced-10
131ef5b verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 4413,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.022660321776569226,
"grad_norm": 0.09582193195819855,
"learning_rate": 6.666666666666667e-06,
"loss": 2.4053,
"step": 100
},
{
"epoch": 0.04532064355313845,
"grad_norm": 0.1730571836233139,
"learning_rate": 1.3333333333333333e-05,
"loss": 2.3825,
"step": 200
},
{
"epoch": 0.06798096532970768,
"grad_norm": 0.3355884253978729,
"learning_rate": 2e-05,
"loss": 2.356,
"step": 300
},
{
"epoch": 0.0906412871062769,
"grad_norm": 0.37008875608444214,
"learning_rate": 1.9970843111690533e-05,
"loss": 2.2935,
"step": 400
},
{
"epoch": 0.11330160888284614,
"grad_norm": 0.5142782330513,
"learning_rate": 1.9883542471589315e-05,
"loss": 2.2561,
"step": 500
},
{
"epoch": 0.13596193065941536,
"grad_norm": 0.42936116456985474,
"learning_rate": 1.9738607162698895e-05,
"loss": 2.2424,
"step": 600
},
{
"epoch": 0.1586222524359846,
"grad_norm": 0.537521243095398,
"learning_rate": 1.9536882357541958e-05,
"loss": 2.242,
"step": 700
},
{
"epoch": 0.1812825742125538,
"grad_norm": 0.602051854133606,
"learning_rate": 1.927954438964115e-05,
"loss": 2.214,
"step": 800
},
{
"epoch": 0.20394289598912305,
"grad_norm": 0.5285528898239136,
"learning_rate": 1.8968093893874042e-05,
"loss": 2.1943,
"step": 900
},
{
"epoch": 0.22660321776569228,
"grad_norm": 0.6450159549713135,
"learning_rate": 1.8604347055704433e-05,
"loss": 2.1566,
"step": 1000
},
{
"epoch": 0.2492635395422615,
"grad_norm": 0.6594407558441162,
"learning_rate": 1.8190425020319016e-05,
"loss": 2.1578,
"step": 1100
},
{
"epoch": 0.27192386131883073,
"grad_norm": 0.6740846633911133,
"learning_rate": 1.7728741523428696e-05,
"loss": 2.1578,
"step": 1200
},
{
"epoch": 0.29458418309539997,
"grad_norm": 0.6054636240005493,
"learning_rate": 1.722198881586411e-05,
"loss": 2.1301,
"step": 1300
},
{
"epoch": 0.3172445048719692,
"grad_norm": 0.5829110145568848,
"learning_rate": 1.667312196404425e-05,
"loss": 2.1366,
"step": 1400
},
{
"epoch": 0.3399048266485384,
"grad_norm": 0.6636696457862854,
"learning_rate": 1.6085341617868172e-05,
"loss": 2.1301,
"step": 1500
},
{
"epoch": 0.3625651484251076,
"grad_norm": 0.8352382779121399,
"learning_rate": 1.546207534651667e-05,
"loss": 2.1157,
"step": 1600
},
{
"epoch": 0.38522547020167686,
"grad_norm": 0.6855395436286926,
"learning_rate": 1.4806957651001911e-05,
"loss": 2.1084,
"step": 1700
},
{
"epoch": 0.4078857919782461,
"grad_norm": 0.8896074891090393,
"learning_rate": 1.4123808770019433e-05,
"loss": 2.1494,
"step": 1800
},
{
"epoch": 0.43054611375481533,
"grad_norm": 0.7051901817321777,
"learning_rate": 1.3416612402693543e-05,
"loss": 2.1406,
"step": 1900
},
{
"epoch": 0.45320643553138457,
"grad_norm": 0.9859122633934021,
"learning_rate": 1.2689492478123242e-05,
"loss": 2.1142,
"step": 2000
},
{
"epoch": 0.47586675730795375,
"grad_norm": 0.9501364827156067,
"learning_rate": 1.1946689107194183e-05,
"loss": 2.091,
"step": 2100
},
{
"epoch": 0.498527079084523,
"grad_norm": 1.0703001022338867,
"learning_rate": 1.119253385689078e-05,
"loss": 2.0765,
"step": 2200
},
{
"epoch": 0.5211874008610923,
"grad_norm": 0.669400691986084,
"learning_rate": 1.0431424491293254e-05,
"loss": 2.0824,
"step": 2300
},
{
"epoch": 0.5438477226376615,
"grad_norm": 0.7835758924484253,
"learning_rate": 9.667799326554403e-06,
"loss": 2.0818,
"step": 2400
},
{
"epoch": 0.5665080444142306,
"grad_norm": 0.8207575082778931,
"learning_rate": 8.906111349401949e-06,
"loss": 2.1016,
"step": 2500
},
{
"epoch": 0.5891683661907999,
"grad_norm": 0.8124341368675232,
"learning_rate": 8.150802250091193e-06,
"loss": 2.0647,
"step": 2600
},
{
"epoch": 0.6118286879673691,
"grad_norm": 0.8744191527366638,
"learning_rate": 7.406276521231679e-06,
"loss": 2.0657,
"step": 2700
},
{
"epoch": 0.6344890097439384,
"grad_norm": 1.0869206190109253,
"learning_rate": 6.676875773527383e-06,
"loss": 2.0547,
"step": 2800
},
{
"epoch": 0.6571493315205076,
"grad_norm": 0.7237268686294556,
"learning_rate": 5.966853418205035e-06,
"loss": 2.124,
"step": 2900
},
{
"epoch": 0.6798096532970768,
"grad_norm": 0.9836551547050476,
"learning_rate": 5.2803498637669055e-06,
"loss": 2.0877,
"step": 3000
},
{
"epoch": 0.7024699750736461,
"grad_norm": 0.8831650614738464,
"learning_rate": 4.621368371705162e-06,
"loss": 2.0978,
"step": 3100
},
{
"epoch": 0.7251302968502152,
"grad_norm": 0.8482229709625244,
"learning_rate": 3.993751711972204e-06,
"loss": 2.075,
"step": 3200
},
{
"epoch": 0.7477906186267845,
"grad_norm": 0.8325951099395752,
"learning_rate": 3.401159754337836e-06,
"loss": 2.1016,
"step": 3300
},
{
"epoch": 0.7704509404033537,
"grad_norm": 1.3220783472061157,
"learning_rate": 2.8470481263064255e-06,
"loss": 2.1096,
"step": 3400
},
{
"epoch": 0.793111262179923,
"grad_norm": 0.8809642195701599,
"learning_rate": 2.3346480620478685e-06,
"loss": 2.079,
"step": 3500
},
{
"epoch": 0.8157715839564922,
"grad_norm": 0.9344497919082642,
"learning_rate": 1.866947559850839e-06,
"loss": 2.1025,
"step": 3600
},
{
"epoch": 0.8384319057330614,
"grad_norm": 0.9643566012382507,
"learning_rate": 1.446673957976298e-06,
"loss": 2.1116,
"step": 3700
},
{
"epoch": 0.8610922275096307,
"grad_norm": 1.0109236240386963,
"learning_rate": 1.0762780305181064e-06,
"loss": 2.0662,
"step": 3800
},
{
"epoch": 0.8837525492861998,
"grad_norm": 0.9456806182861328,
"learning_rate": 7.579196960136958e-07,
"loss": 2.0815,
"step": 3900
},
{
"epoch": 0.9064128710627691,
"grad_norm": 1.229778528213501,
"learning_rate": 4.934554221433741e-07,
"loss": 2.0636,
"step": 4000
},
{
"epoch": 0.9290731928393383,
"grad_norm": 0.8360131978988647,
"learning_rate": 2.8442739996615956e-07,
"loss": 2.0465,
"step": 4100
},
{
"epoch": 0.9517335146159075,
"grad_norm": 0.6662079691886902,
"learning_rate": 1.3205455082128228e-07,
"loss": 2.0419,
"step": 4200
},
{
"epoch": 0.9743938363924768,
"grad_norm": 0.8761087656021118,
"learning_rate": 3.7225418337528685e-08,
"loss": 2.0664,
"step": 4300
},
{
"epoch": 0.997054158169046,
"grad_norm": 1.0942589044570923,
"learning_rate": 4.929869997571945e-10,
"loss": 2.0249,
"step": 4400
},
{
"epoch": 1.0,
"step": 4413,
"total_flos": 8.01984399409152e+16,
"train_loss": 2.1353629073560736,
"train_runtime": 1379.8897,
"train_samples_per_second": 6.396,
"train_steps_per_second": 3.198
}
],
"logging_steps": 100,
"max_steps": 4413,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 8.01984399409152e+16,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}