chiayisu's picture
update
fe7b4a6
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.0779220779220777,
"eval_steps": 500,
"global_step": 180,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05772005772005772,
"grad_norm": 0.018310546875,
"learning_rate": 0.0001,
"loss": 0.5558,
"step": 5
},
{
"epoch": 0.11544011544011544,
"grad_norm": 0.01544189453125,
"learning_rate": 0.0001,
"loss": 0.4953,
"step": 10
},
{
"epoch": 0.17316017316017315,
"grad_norm": 0.01611328125,
"learning_rate": 0.0001,
"loss": 0.4465,
"step": 15
},
{
"epoch": 0.23088023088023088,
"grad_norm": 0.0186767578125,
"learning_rate": 0.0001,
"loss": 0.419,
"step": 20
},
{
"epoch": 0.2886002886002886,
"grad_norm": 0.022705078125,
"learning_rate": 0.0001,
"loss": 0.5274,
"step": 25
},
{
"epoch": 0.3463203463203463,
"grad_norm": 0.020751953125,
"learning_rate": 0.0001,
"loss": 0.5132,
"step": 30
},
{
"epoch": 0.40404040404040403,
"grad_norm": 0.0174560546875,
"learning_rate": 0.0001,
"loss": 0.4466,
"step": 35
},
{
"epoch": 0.46176046176046176,
"grad_norm": 0.01904296875,
"learning_rate": 0.0001,
"loss": 0.4116,
"step": 40
},
{
"epoch": 0.5194805194805194,
"grad_norm": 0.026611328125,
"learning_rate": 0.0001,
"loss": 0.4913,
"step": 45
},
{
"epoch": 0.5772005772005772,
"grad_norm": 0.0208740234375,
"learning_rate": 0.0001,
"loss": 0.4982,
"step": 50
},
{
"epoch": 0.6349206349206349,
"grad_norm": 0.021484375,
"learning_rate": 0.0001,
"loss": 0.4586,
"step": 55
},
{
"epoch": 0.6926406926406926,
"grad_norm": 0.0191650390625,
"learning_rate": 0.0001,
"loss": 0.4132,
"step": 60
},
{
"epoch": 0.7503607503607503,
"grad_norm": 0.03125,
"learning_rate": 0.0001,
"loss": 0.445,
"step": 65
},
{
"epoch": 0.8080808080808081,
"grad_norm": 0.0242919921875,
"learning_rate": 0.0001,
"loss": 0.5186,
"step": 70
},
{
"epoch": 0.8658008658008658,
"grad_norm": 0.022216796875,
"learning_rate": 0.0001,
"loss": 0.463,
"step": 75
},
{
"epoch": 0.9235209235209235,
"grad_norm": 0.02001953125,
"learning_rate": 0.0001,
"loss": 0.4233,
"step": 80
},
{
"epoch": 0.9812409812409812,
"grad_norm": 0.0299072265625,
"learning_rate": 0.0001,
"loss": 0.4396,
"step": 85
},
{
"epoch": 1.0389610389610389,
"grad_norm": 0.02685546875,
"learning_rate": 0.0001,
"loss": 0.4944,
"step": 90
},
{
"epoch": 1.0966810966810967,
"grad_norm": 0.0260009765625,
"learning_rate": 0.0001,
"loss": 0.4896,
"step": 95
},
{
"epoch": 1.1544011544011543,
"grad_norm": 0.0247802734375,
"learning_rate": 0.0001,
"loss": 0.4402,
"step": 100
},
{
"epoch": 1.2121212121212122,
"grad_norm": 0.0240478515625,
"learning_rate": 0.0001,
"loss": 0.3963,
"step": 105
},
{
"epoch": 1.2698412698412698,
"grad_norm": 0.037353515625,
"learning_rate": 0.0001,
"loss": 0.4535,
"step": 110
},
{
"epoch": 1.3275613275613276,
"grad_norm": 0.032470703125,
"learning_rate": 0.0001,
"loss": 0.5045,
"step": 115
},
{
"epoch": 1.3852813852813852,
"grad_norm": 0.0301513671875,
"learning_rate": 0.0001,
"loss": 0.4466,
"step": 120
},
{
"epoch": 1.443001443001443,
"grad_norm": 0.0244140625,
"learning_rate": 0.0001,
"loss": 0.4095,
"step": 125
},
{
"epoch": 1.5007215007215007,
"grad_norm": 0.046630859375,
"learning_rate": 0.0001,
"loss": 0.4346,
"step": 130
},
{
"epoch": 1.5584415584415585,
"grad_norm": 0.0299072265625,
"learning_rate": 0.0001,
"loss": 0.5046,
"step": 135
},
{
"epoch": 1.6161616161616161,
"grad_norm": 0.032958984375,
"learning_rate": 0.0001,
"loss": 0.4556,
"step": 140
},
{
"epoch": 1.6738816738816737,
"grad_norm": 0.0272216796875,
"learning_rate": 0.0001,
"loss": 0.4245,
"step": 145
},
{
"epoch": 1.7316017316017316,
"grad_norm": 0.036865234375,
"learning_rate": 0.0001,
"loss": 0.3834,
"step": 150
},
{
"epoch": 1.7893217893217894,
"grad_norm": 0.03662109375,
"learning_rate": 0.0001,
"loss": 0.5163,
"step": 155
},
{
"epoch": 1.847041847041847,
"grad_norm": 0.033935546875,
"learning_rate": 0.0001,
"loss": 0.4565,
"step": 160
},
{
"epoch": 1.9047619047619047,
"grad_norm": 0.02880859375,
"learning_rate": 0.0001,
"loss": 0.4164,
"step": 165
},
{
"epoch": 1.9624819624819625,
"grad_norm": 0.03271484375,
"learning_rate": 0.0001,
"loss": 0.3956,
"step": 170
},
{
"epoch": 2.0202020202020203,
"grad_norm": 0.041748046875,
"learning_rate": 0.0001,
"loss": 0.4453,
"step": 175
},
{
"epoch": 2.0779220779220777,
"grad_norm": 0.03857421875,
"learning_rate": 0.0001,
"loss": 0.4681,
"step": 180
},
{
"epoch": 2.0779220779220777,
"step": 180,
"total_flos": 1.8562430640540058e+18,
"train_loss": 0.45838437411520216,
"train_runtime": 56927.0701,
"train_samples_per_second": 0.405,
"train_steps_per_second": 0.003
}
],
"logging_steps": 5,
"max_steps": 180,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 180,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.8562430640540058e+18,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}