phi2-qlora-finetuned / trainer_state.json
Amirmohammad Erfan
Upload 8 files
0727122 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.40160642570281124,
"eval_steps": 25,
"global_step": 500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.020080321285140562,
"grad_norm": 4.975222110748291,
"learning_rate": 2.375e-05,
"loss": 3.699,
"step": 25
},
{
"epoch": 0.020080321285140562,
"eval_loss": 3.4337058067321777,
"eval_runtime": 73.7432,
"eval_samples_per_second": 5.967,
"eval_steps_per_second": 0.746,
"step": 25
},
{
"epoch": 0.040160642570281124,
"grad_norm": 17.880794525146484,
"learning_rate": 2.25e-05,
"loss": 3.3958,
"step": 50
},
{
"epoch": 0.040160642570281124,
"eval_loss": 3.2245774269104004,
"eval_runtime": 72.5106,
"eval_samples_per_second": 6.068,
"eval_steps_per_second": 0.759,
"step": 50
},
{
"epoch": 0.060240963855421686,
"grad_norm": 14.544445991516113,
"learning_rate": 2.125e-05,
"loss": 3.2819,
"step": 75
},
{
"epoch": 0.060240963855421686,
"eval_loss": 3.0899553298950195,
"eval_runtime": 71.5892,
"eval_samples_per_second": 6.146,
"eval_steps_per_second": 0.768,
"step": 75
},
{
"epoch": 0.08032128514056225,
"grad_norm": 9.934131622314453,
"learning_rate": 2e-05,
"loss": 3.0848,
"step": 100
},
{
"epoch": 0.08032128514056225,
"eval_loss": 2.9913179874420166,
"eval_runtime": 71.5036,
"eval_samples_per_second": 6.154,
"eval_steps_per_second": 0.769,
"step": 100
},
{
"epoch": 0.10040160642570281,
"grad_norm": 27.045995712280273,
"learning_rate": 1.8750000000000002e-05,
"loss": 3.0192,
"step": 125
},
{
"epoch": 0.10040160642570281,
"eval_loss": 2.901963710784912,
"eval_runtime": 71.6168,
"eval_samples_per_second": 6.144,
"eval_steps_per_second": 0.768,
"step": 125
},
{
"epoch": 0.12048192771084337,
"grad_norm": 37.5889778137207,
"learning_rate": 1.75e-05,
"loss": 2.7809,
"step": 150
},
{
"epoch": 0.12048192771084337,
"eval_loss": 2.842862129211426,
"eval_runtime": 71.6469,
"eval_samples_per_second": 6.141,
"eval_steps_per_second": 0.768,
"step": 150
},
{
"epoch": 0.14056224899598393,
"grad_norm": 29.42604637145996,
"learning_rate": 1.6250000000000002e-05,
"loss": 2.7966,
"step": 175
},
{
"epoch": 0.14056224899598393,
"eval_loss": 2.8016371726989746,
"eval_runtime": 71.7672,
"eval_samples_per_second": 6.131,
"eval_steps_per_second": 0.766,
"step": 175
},
{
"epoch": 0.1606425702811245,
"grad_norm": 18.478330612182617,
"learning_rate": 1.5e-05,
"loss": 2.7628,
"step": 200
},
{
"epoch": 0.1606425702811245,
"eval_loss": 2.7751874923706055,
"eval_runtime": 71.6702,
"eval_samples_per_second": 6.139,
"eval_steps_per_second": 0.767,
"step": 200
},
{
"epoch": 0.18072289156626506,
"grad_norm": 14.404654502868652,
"learning_rate": 1.3750000000000002e-05,
"loss": 2.7318,
"step": 225
},
{
"epoch": 0.18072289156626506,
"eval_loss": 2.761225461959839,
"eval_runtime": 72.6836,
"eval_samples_per_second": 6.054,
"eval_steps_per_second": 0.757,
"step": 225
},
{
"epoch": 0.20080321285140562,
"grad_norm": 11.6594820022583,
"learning_rate": 1.25e-05,
"loss": 2.6541,
"step": 250
},
{
"epoch": 0.20080321285140562,
"eval_loss": 2.748612642288208,
"eval_runtime": 71.8454,
"eval_samples_per_second": 6.124,
"eval_steps_per_second": 0.766,
"step": 250
},
{
"epoch": 0.22088353413654618,
"grad_norm": 17.30568504333496,
"learning_rate": 1.125e-05,
"loss": 2.7524,
"step": 275
},
{
"epoch": 0.22088353413654618,
"eval_loss": 2.7362046241760254,
"eval_runtime": 71.4941,
"eval_samples_per_second": 6.154,
"eval_steps_per_second": 0.769,
"step": 275
},
{
"epoch": 0.24096385542168675,
"grad_norm": 37.215980529785156,
"learning_rate": 1e-05,
"loss": 2.6456,
"step": 300
},
{
"epoch": 0.24096385542168675,
"eval_loss": 2.730060577392578,
"eval_runtime": 71.5118,
"eval_samples_per_second": 6.153,
"eval_steps_per_second": 0.769,
"step": 300
},
{
"epoch": 0.26104417670682734,
"grad_norm": 36.52507781982422,
"learning_rate": 8.75e-06,
"loss": 2.7501,
"step": 325
},
{
"epoch": 0.26104417670682734,
"eval_loss": 2.7272305488586426,
"eval_runtime": 71.6593,
"eval_samples_per_second": 6.14,
"eval_steps_per_second": 0.768,
"step": 325
},
{
"epoch": 0.28112449799196787,
"grad_norm": 11.31718635559082,
"learning_rate": 7.5e-06,
"loss": 2.6978,
"step": 350
},
{
"epoch": 0.28112449799196787,
"eval_loss": 2.725891590118408,
"eval_runtime": 71.721,
"eval_samples_per_second": 6.135,
"eval_steps_per_second": 0.767,
"step": 350
},
{
"epoch": 0.30120481927710846,
"grad_norm": 19.520795822143555,
"learning_rate": 6.25e-06,
"loss": 2.6585,
"step": 375
},
{
"epoch": 0.30120481927710846,
"eval_loss": 2.7254438400268555,
"eval_runtime": 71.6451,
"eval_samples_per_second": 6.141,
"eval_steps_per_second": 0.768,
"step": 375
},
{
"epoch": 0.321285140562249,
"grad_norm": 9.94796371459961,
"learning_rate": 5e-06,
"loss": 2.7164,
"step": 400
},
{
"epoch": 0.321285140562249,
"eval_loss": 2.724626302719116,
"eval_runtime": 71.7028,
"eval_samples_per_second": 6.136,
"eval_steps_per_second": 0.767,
"step": 400
},
{
"epoch": 0.3413654618473896,
"grad_norm": 22.991910934448242,
"learning_rate": 3.75e-06,
"loss": 2.7357,
"step": 425
},
{
"epoch": 0.3413654618473896,
"eval_loss": 2.722055673599243,
"eval_runtime": 73.1673,
"eval_samples_per_second": 6.014,
"eval_steps_per_second": 0.752,
"step": 425
},
{
"epoch": 0.3614457831325301,
"grad_norm": 10.925905227661133,
"learning_rate": 2.5e-06,
"loss": 2.5698,
"step": 450
},
{
"epoch": 0.3614457831325301,
"eval_loss": 2.7187235355377197,
"eval_runtime": 71.5477,
"eval_samples_per_second": 6.15,
"eval_steps_per_second": 0.769,
"step": 450
},
{
"epoch": 0.3815261044176707,
"grad_norm": 26.42523765563965,
"learning_rate": 1.25e-06,
"loss": 2.727,
"step": 475
},
{
"epoch": 0.3815261044176707,
"eval_loss": 2.7193753719329834,
"eval_runtime": 71.4705,
"eval_samples_per_second": 6.156,
"eval_steps_per_second": 0.77,
"step": 475
},
{
"epoch": 0.40160642570281124,
"grad_norm": 13.879467964172363,
"learning_rate": 0.0,
"loss": 2.5964,
"step": 500
},
{
"epoch": 0.40160642570281124,
"eval_loss": 2.718266725540161,
"eval_runtime": 72.5501,
"eval_samples_per_second": 6.065,
"eval_steps_per_second": 0.758,
"step": 500
}
],
"logging_steps": 25,
"max_steps": 500,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 25,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 6419582976000000.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}