FinetunedByYalcin / trainer_state.json
YALCINKAYA's picture
initial fine-tuning commit
4aa6851 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 90.0,
"eval_steps": 500,
"global_step": 360,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 2.5,
"grad_norm": 0.5360991358757019,
"learning_rate": 0.0009980973490458728,
"loss": 2.5493,
"step": 10
},
{
"epoch": 5.0,
"grad_norm": 0.2548345923423767,
"learning_rate": 0.000992403876506104,
"loss": 1.6144,
"step": 20
},
{
"epoch": 7.5,
"grad_norm": 0.24080035090446472,
"learning_rate": 0.0009829629131445341,
"loss": 1.3919,
"step": 30
},
{
"epoch": 10.0,
"grad_norm": 0.2729661166667938,
"learning_rate": 0.0009698463103929542,
"loss": 1.254,
"step": 40
},
{
"epoch": 12.5,
"grad_norm": 0.30077192187309265,
"learning_rate": 0.0009531538935183251,
"loss": 1.138,
"step": 50
},
{
"epoch": 15.0,
"grad_norm": 0.3787655234336853,
"learning_rate": 0.0009330127018922195,
"loss": 1.0206,
"step": 60
},
{
"epoch": 17.5,
"grad_norm": 0.416939377784729,
"learning_rate": 0.0009095760221444959,
"loss": 0.91,
"step": 70
},
{
"epoch": 20.0,
"grad_norm": 0.5407611727714539,
"learning_rate": 0.000883022221559489,
"loss": 0.8329,
"step": 80
},
{
"epoch": 22.5,
"grad_norm": 0.5476299524307251,
"learning_rate": 0.0008535533905932737,
"loss": 0.7483,
"step": 90
},
{
"epoch": 25.0,
"grad_norm": 0.5024413466453552,
"learning_rate": 0.0008213938048432696,
"loss": 0.6778,
"step": 100
},
{
"epoch": 27.5,
"grad_norm": 0.5071346759796143,
"learning_rate": 0.0007867882181755231,
"loss": 0.6175,
"step": 110
},
{
"epoch": 30.0,
"grad_norm": 0.5480501055717468,
"learning_rate": 0.00075,
"loss": 0.5726,
"step": 120
},
{
"epoch": 32.5,
"grad_norm": 0.5539716482162476,
"learning_rate": 0.0007113091308703497,
"loss": 0.5276,
"step": 130
},
{
"epoch": 35.0,
"grad_norm": 0.5367885231971741,
"learning_rate": 0.0006710100716628344,
"loss": 0.4889,
"step": 140
},
{
"epoch": 37.5,
"grad_norm": 0.5145406126976013,
"learning_rate": 0.0006294095225512603,
"loss": 0.4513,
"step": 150
},
{
"epoch": 40.0,
"grad_norm": 0.5838665962219238,
"learning_rate": 0.0005868240888334653,
"loss": 0.4196,
"step": 160
},
{
"epoch": 42.5,
"grad_norm": 0.49232539534568787,
"learning_rate": 0.0005435778713738292,
"loss": 0.3972,
"step": 170
},
{
"epoch": 45.0,
"grad_norm": 0.4959801137447357,
"learning_rate": 0.0005,
"loss": 0.372,
"step": 180
},
{
"epoch": 47.5,
"grad_norm": 0.5067233443260193,
"learning_rate": 0.00045642212862617086,
"loss": 0.3527,
"step": 190
},
{
"epoch": 50.0,
"grad_norm": 0.5176546573638916,
"learning_rate": 0.00041317591116653486,
"loss": 0.3385,
"step": 200
},
{
"epoch": 52.5,
"grad_norm": 0.4937039911746979,
"learning_rate": 0.0003705904774487396,
"loss": 0.3191,
"step": 210
},
{
"epoch": 55.0,
"grad_norm": 0.4794902205467224,
"learning_rate": 0.0003289899283371657,
"loss": 0.3082,
"step": 220
},
{
"epoch": 57.5,
"grad_norm": 0.4285900294780731,
"learning_rate": 0.0002886908691296504,
"loss": 0.295,
"step": 230
},
{
"epoch": 60.0,
"grad_norm": 0.45302724838256836,
"learning_rate": 0.0002500000000000001,
"loss": 0.2889,
"step": 240
},
{
"epoch": 62.5,
"grad_norm": 0.42409127950668335,
"learning_rate": 0.00021321178182447708,
"loss": 0.2781,
"step": 250
},
{
"epoch": 65.0,
"grad_norm": 0.4453699588775635,
"learning_rate": 0.0001786061951567303,
"loss": 0.273,
"step": 260
},
{
"epoch": 67.5,
"grad_norm": 0.4217115044593811,
"learning_rate": 0.00014644660940672628,
"loss": 0.2646,
"step": 270
},
{
"epoch": 70.0,
"grad_norm": 0.43468865752220154,
"learning_rate": 0.00011697777844051105,
"loss": 0.2611,
"step": 280
},
{
"epoch": 72.5,
"grad_norm": 0.41657349467277527,
"learning_rate": 9.042397785550405e-05,
"loss": 0.2534,
"step": 290
},
{
"epoch": 75.0,
"grad_norm": 0.40352940559387207,
"learning_rate": 6.698729810778065e-05,
"loss": 0.2509,
"step": 300
},
{
"epoch": 77.5,
"grad_norm": 0.3772071301937103,
"learning_rate": 4.684610648167503e-05,
"loss": 0.2493,
"step": 310
},
{
"epoch": 80.0,
"grad_norm": 0.3778958022594452,
"learning_rate": 3.0153689607045842e-05,
"loss": 0.249,
"step": 320
},
{
"epoch": 82.5,
"grad_norm": 0.38997748494148254,
"learning_rate": 1.70370868554659e-05,
"loss": 0.2441,
"step": 330
},
{
"epoch": 85.0,
"grad_norm": 0.3619975447654724,
"learning_rate": 7.59612349389599e-06,
"loss": 0.2472,
"step": 340
},
{
"epoch": 87.5,
"grad_norm": 0.4022546708583832,
"learning_rate": 1.9026509541272275e-06,
"loss": 0.2493,
"step": 350
},
{
"epoch": 90.0,
"grad_norm": 0.36714521050453186,
"learning_rate": 0.0,
"loss": 0.2444,
"step": 360
}
],
"logging_steps": 10,
"max_steps": 360,
"num_input_tokens_seen": 0,
"num_train_epochs": 90,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 5.02434468200448e+16,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}