sedrickkeh's picture
End of training
bdb23dd verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9973394146712277,
"eval_steps": 500,
"global_step": 328,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03040668947168377,
"grad_norm": 5.6152509966106985,
"learning_rate": 5e-06,
"loss": 0.9255,
"step": 10
},
{
"epoch": 0.06081337894336754,
"grad_norm": 1.948199272854136,
"learning_rate": 5e-06,
"loss": 0.8117,
"step": 20
},
{
"epoch": 0.09122006841505131,
"grad_norm": 1.6637843548352236,
"learning_rate": 5e-06,
"loss": 0.7758,
"step": 30
},
{
"epoch": 0.12162675788673508,
"grad_norm": 0.907983749953646,
"learning_rate": 5e-06,
"loss": 0.7519,
"step": 40
},
{
"epoch": 0.15203344735841884,
"grad_norm": 1.7280229551951782,
"learning_rate": 5e-06,
"loss": 0.7352,
"step": 50
},
{
"epoch": 0.18244013683010263,
"grad_norm": 0.8504963430088522,
"learning_rate": 5e-06,
"loss": 0.7224,
"step": 60
},
{
"epoch": 0.2128468263017864,
"grad_norm": 1.2357854791523568,
"learning_rate": 5e-06,
"loss": 0.7105,
"step": 70
},
{
"epoch": 0.24325351577347015,
"grad_norm": 1.2476890602998016,
"learning_rate": 5e-06,
"loss": 0.7038,
"step": 80
},
{
"epoch": 0.27366020524515394,
"grad_norm": 0.63122069402106,
"learning_rate": 5e-06,
"loss": 0.6999,
"step": 90
},
{
"epoch": 0.3040668947168377,
"grad_norm": 0.6553804145899658,
"learning_rate": 5e-06,
"loss": 0.6863,
"step": 100
},
{
"epoch": 0.33447358418852147,
"grad_norm": 0.5841011076306307,
"learning_rate": 5e-06,
"loss": 0.6907,
"step": 110
},
{
"epoch": 0.36488027366020526,
"grad_norm": 0.7996499453541979,
"learning_rate": 5e-06,
"loss": 0.6819,
"step": 120
},
{
"epoch": 0.395286963131889,
"grad_norm": 0.5950822241048144,
"learning_rate": 5e-06,
"loss": 0.6871,
"step": 130
},
{
"epoch": 0.4256936526035728,
"grad_norm": 0.5683219252381152,
"learning_rate": 5e-06,
"loss": 0.6825,
"step": 140
},
{
"epoch": 0.45610034207525657,
"grad_norm": 0.7530256115772663,
"learning_rate": 5e-06,
"loss": 0.6774,
"step": 150
},
{
"epoch": 0.4865070315469403,
"grad_norm": 0.6459654005909365,
"learning_rate": 5e-06,
"loss": 0.6847,
"step": 160
},
{
"epoch": 0.5169137210186241,
"grad_norm": 0.5836331839357853,
"learning_rate": 5e-06,
"loss": 0.6768,
"step": 170
},
{
"epoch": 0.5473204104903079,
"grad_norm": 0.5565233021284824,
"learning_rate": 5e-06,
"loss": 0.6849,
"step": 180
},
{
"epoch": 0.5777270999619917,
"grad_norm": 0.6013400542500057,
"learning_rate": 5e-06,
"loss": 0.6713,
"step": 190
},
{
"epoch": 0.6081337894336754,
"grad_norm": 0.5233954675275515,
"learning_rate": 5e-06,
"loss": 0.671,
"step": 200
},
{
"epoch": 0.6385404789053591,
"grad_norm": 0.5779435903050023,
"learning_rate": 5e-06,
"loss": 0.6724,
"step": 210
},
{
"epoch": 0.6689471683770429,
"grad_norm": 0.6526020744171068,
"learning_rate": 5e-06,
"loss": 0.6719,
"step": 220
},
{
"epoch": 0.6993538578487267,
"grad_norm": 0.5895434275536652,
"learning_rate": 5e-06,
"loss": 0.6689,
"step": 230
},
{
"epoch": 0.7297605473204105,
"grad_norm": 0.6343504733705319,
"learning_rate": 5e-06,
"loss": 0.6687,
"step": 240
},
{
"epoch": 0.7601672367920943,
"grad_norm": 0.7332518294295413,
"learning_rate": 5e-06,
"loss": 0.6639,
"step": 250
},
{
"epoch": 0.790573926263778,
"grad_norm": 0.5332685530635167,
"learning_rate": 5e-06,
"loss": 0.6684,
"step": 260
},
{
"epoch": 0.8209806157354618,
"grad_norm": 0.5890443926278276,
"learning_rate": 5e-06,
"loss": 0.6619,
"step": 270
},
{
"epoch": 0.8513873052071456,
"grad_norm": 0.5702438242189162,
"learning_rate": 5e-06,
"loss": 0.6611,
"step": 280
},
{
"epoch": 0.8817939946788294,
"grad_norm": 0.6228763934659501,
"learning_rate": 5e-06,
"loss": 0.6596,
"step": 290
},
{
"epoch": 0.9122006841505131,
"grad_norm": 0.6318262944444193,
"learning_rate": 5e-06,
"loss": 0.6623,
"step": 300
},
{
"epoch": 0.9426073736221969,
"grad_norm": 0.6675343568518476,
"learning_rate": 5e-06,
"loss": 0.6565,
"step": 310
},
{
"epoch": 0.9730140630938806,
"grad_norm": 0.5399374272267031,
"learning_rate": 5e-06,
"loss": 0.6579,
"step": 320
},
{
"epoch": 0.9973394146712277,
"eval_loss": 0.6590226292610168,
"eval_runtime": 348.3776,
"eval_samples_per_second": 25.435,
"eval_steps_per_second": 0.399,
"step": 328
},
{
"epoch": 0.9973394146712277,
"step": 328,
"total_flos": 549202836848640.0,
"train_loss": 0.696046143043332,
"train_runtime": 19374.8951,
"train_samples_per_second": 8.689,
"train_steps_per_second": 0.017
}
],
"logging_steps": 10,
"max_steps": 328,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 549202836848640.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}