16-clusters-balanced-10 / trainer_state.json
MHGanainy's picture
MHGanainy/16-clusters-balanced-10
21f3454 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 4029,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02482005460412013,
"grad_norm": 0.1369238644838333,
"learning_rate": 6.666666666666667e-06,
"loss": 2.4042,
"step": 100
},
{
"epoch": 0.04964010920824026,
"grad_norm": 0.17341101169586182,
"learning_rate": 1.3333333333333333e-05,
"loss": 2.3581,
"step": 200
},
{
"epoch": 0.07446016381236038,
"grad_norm": 0.30298689007759094,
"learning_rate": 2e-05,
"loss": 2.2979,
"step": 300
},
{
"epoch": 0.09928021841648052,
"grad_norm": 0.4181392788887024,
"learning_rate": 1.9964532702725803e-05,
"loss": 2.2736,
"step": 400
},
{
"epoch": 0.12410027302060064,
"grad_norm": 0.4833754301071167,
"learning_rate": 1.9858382396738395e-05,
"loss": 2.2352,
"step": 500
},
{
"epoch": 0.14892032762472077,
"grad_norm": 0.5508949756622314,
"learning_rate": 1.9682302054929414e-05,
"loss": 2.1951,
"step": 600
},
{
"epoch": 0.17374038222884092,
"grad_norm": 0.5856565833091736,
"learning_rate": 1.943754069606428e-05,
"loss": 2.1662,
"step": 700
},
{
"epoch": 0.19856043683296104,
"grad_norm": 0.5611233115196228,
"learning_rate": 1.9125834524918215e-05,
"loss": 2.1815,
"step": 800
},
{
"epoch": 0.22338049143708116,
"grad_norm": 0.6802138090133667,
"learning_rate": 1.8749394616578068e-05,
"loss": 2.1675,
"step": 900
},
{
"epoch": 0.2482005460412013,
"grad_norm": 0.6513592004776001,
"learning_rate": 1.8310891232270827e-05,
"loss": 2.1402,
"step": 1000
},
{
"epoch": 0.2730206006453214,
"grad_norm": 0.6889598369598389,
"learning_rate": 1.781343487797389e-05,
"loss": 2.1334,
"step": 1100
},
{
"epoch": 0.29784065524944153,
"grad_norm": 0.7928256988525391,
"learning_rate": 1.7260554240167017e-05,
"loss": 2.1295,
"step": 1200
},
{
"epoch": 0.32266070985356166,
"grad_norm": 0.7162489295005798,
"learning_rate": 1.665617115523785e-05,
"loss": 2.1232,
"step": 1300
},
{
"epoch": 0.34748076445768183,
"grad_norm": 0.7136086225509644,
"learning_rate": 1.6004572790094535e-05,
"loss": 2.1148,
"step": 1400
},
{
"epoch": 0.37230081906180196,
"grad_norm": 0.7688263654708862,
"learning_rate": 1.531038123132105e-05,
"loss": 2.0873,
"step": 1500
},
{
"epoch": 0.3971208736659221,
"grad_norm": 0.772521436214447,
"learning_rate": 1.4578520698593441e-05,
"loss": 2.117,
"step": 1600
},
{
"epoch": 0.4219409282700422,
"grad_norm": 1.010330080986023,
"learning_rate": 1.3814182614927217e-05,
"loss": 2.071,
"step": 1700
},
{
"epoch": 0.4467609828741623,
"grad_norm": 0.6752054691314697,
"learning_rate": 1.3022788781528653e-05,
"loss": 2.0636,
"step": 1800
},
{
"epoch": 0.47158103747828245,
"grad_norm": 0.841232180595398,
"learning_rate": 1.220995291846777e-05,
"loss": 2.0532,
"step": 1900
},
{
"epoch": 0.4964010920824026,
"grad_norm": 0.7984778881072998,
"learning_rate": 1.1381440843982634e-05,
"loss": 2.0438,
"step": 2000
},
{
"epoch": 0.5212211466865228,
"grad_norm": 0.8068585395812988,
"learning_rate": 1.0543129574881446e-05,
"loss": 2.0687,
"step": 2100
},
{
"epoch": 0.5460412012906428,
"grad_norm": 0.8497598767280579,
"learning_rate": 9.700965638162112e-06,
"loss": 2.0477,
"step": 2200
},
{
"epoch": 0.570861255894763,
"grad_norm": 0.7474705576896667,
"learning_rate": 8.860922889564078e-06,
"loss": 2.0429,
"step": 2300
},
{
"epoch": 0.5956813104988831,
"grad_norm": 1.0781651735305786,
"learning_rate": 8.028960138264857e-06,
"loss": 2.0389,
"step": 2400
},
{
"epoch": 0.6205013651030032,
"grad_norm": 0.8750322461128235,
"learning_rate": 7.21097887830873e-06,
"loss": 2.046,
"step": 2500
},
{
"epoch": 0.6453214197071233,
"grad_norm": 0.9259145855903625,
"learning_rate": 6.4127814265980095e-06,
"loss": 2.0243,
"step": 2600
},
{
"epoch": 0.6701414743112435,
"grad_norm": 1.1625196933746338,
"learning_rate": 5.640029764393366e-06,
"loss": 2.0513,
"step": 2700
},
{
"epoch": 0.6949615289153637,
"grad_norm": 0.8271129727363586,
"learning_rate": 4.8982053742793025e-06,
"loss": 2.0228,
"step": 2800
},
{
"epoch": 0.7197815835194837,
"grad_norm": 0.7196031212806702,
"learning_rate": 4.1925703574897115e-06,
"loss": 2.0496,
"step": 2900
},
{
"epoch": 0.7446016381236039,
"grad_norm": 0.7880265712738037,
"learning_rate": 3.528130107406099e-06,
"loss": 2.0145,
"step": 3000
},
{
"epoch": 0.769421692727724,
"grad_norm": 0.909106433391571,
"learning_rate": 2.909597804002603e-06,
"loss": 2.0437,
"step": 3100
},
{
"epoch": 0.7942417473318442,
"grad_norm": 1.2606161832809448,
"learning_rate": 2.341360981094921e-06,
"loss": 2.0443,
"step": 3200
},
{
"epoch": 0.8190618019359642,
"grad_norm": 0.795652449131012,
"learning_rate": 1.8274504035470942e-06,
"loss": 2.0568,
"step": 3300
},
{
"epoch": 0.8438818565400844,
"grad_norm": 0.8904260993003845,
"learning_rate": 1.3715114752043746e-06,
"loss": 2.0787,
"step": 3400
},
{
"epoch": 0.8687019111442045,
"grad_norm": 1.0925287008285522,
"learning_rate": 9.767783803688414e-07,
"loss": 2.045,
"step": 3500
},
{
"epoch": 0.8935219657483247,
"grad_norm": 0.799608588218689,
"learning_rate": 6.460511422441984e-07,
"loss": 2.0167,
"step": 3600
},
{
"epoch": 0.9183420203524447,
"grad_norm": 0.9094216227531433,
"learning_rate": 3.8167576108468994e-07,
"loss": 2.057,
"step": 3700
},
{
"epoch": 0.9431620749565649,
"grad_norm": 0.8395094871520996,
"learning_rate": 1.855275729374284e-07,
"loss": 2.0425,
"step": 3800
},
{
"epoch": 0.9679821295606851,
"grad_norm": 0.8606423735618591,
"learning_rate": 5.89979470221802e-08,
"loss": 2.0208,
"step": 3900
},
{
"epoch": 0.9928021841648051,
"grad_norm": 0.8908767700195312,
"learning_rate": 2.9844161102077218e-09,
"loss": 2.0512,
"step": 4000
},
{
"epoch": 1.0,
"step": 4029,
"total_flos": 7.32108351012864e+16,
"train_loss": 2.105005581936963,
"train_runtime": 1251.4031,
"train_samples_per_second": 6.438,
"train_steps_per_second": 3.22
}
],
"logging_steps": 100,
"max_steps": 4029,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 7.32108351012864e+16,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}