penfever's picture
Add files using upload-large-folder tool
aa01a21 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.9984,
"eval_steps": 500,
"global_step": 1250,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.08,
"grad_norm": 12.125,
"learning_rate": 1.555555555555556e-05,
"loss": 1.2021,
"mean_token_accuracy": 0.7523527503013611,
"num_input_tokens_seen": 462464,
"num_tokens": 462464.0,
"step": 50
},
{
"epoch": 0.16,
"grad_norm": 7.5625,
"learning_rate": 1.9954642956864198e-05,
"loss": 0.6381,
"mean_token_accuracy": 0.828709350824356,
"num_input_tokens_seen": 895104,
"num_tokens": 895104.0,
"step": 100
},
{
"epoch": 0.24,
"grad_norm": 9.0625,
"learning_rate": 1.9742077388709354e-05,
"loss": 0.6022,
"mean_token_accuracy": 0.8348629665374756,
"num_input_tokens_seen": 1447552,
"num_tokens": 1447552.0,
"step": 150
},
{
"epoch": 0.32,
"grad_norm": 4.03125,
"learning_rate": 1.9359156377376714e-05,
"loss": 0.508,
"mean_token_accuracy": 0.8551409709453582,
"num_input_tokens_seen": 2040064,
"num_tokens": 2040064.0,
"step": 200
},
{
"epoch": 0.4,
"grad_norm": 14.9375,
"learning_rate": 1.8812575894986476e-05,
"loss": 0.5894,
"mean_token_accuracy": 0.8338349997997284,
"num_input_tokens_seen": 2481312,
"num_tokens": 2481312.0,
"step": 250
},
{
"epoch": 0.48,
"grad_norm": 6.875,
"learning_rate": 1.811189375519529e-05,
"loss": 0.555,
"mean_token_accuracy": 0.8397143912315369,
"num_input_tokens_seen": 2908288,
"num_tokens": 2908288.0,
"step": 300
},
{
"epoch": 0.56,
"grad_norm": 21.75,
"learning_rate": 1.7269362479892304e-05,
"loss": 0.6172,
"mean_token_accuracy": 0.8344850993156433,
"num_input_tokens_seen": 3346848,
"num_tokens": 3346848.0,
"step": 350
},
{
"epoch": 0.64,
"grad_norm": 7.34375,
"learning_rate": 1.629971504471182e-05,
"loss": 0.6697,
"mean_token_accuracy": 0.8219912588596344,
"num_input_tokens_seen": 3815936,
"num_tokens": 3815936.0,
"step": 400
},
{
"epoch": 0.72,
"grad_norm": 11.5,
"learning_rate": 1.5219907249937036e-05,
"loss": 0.5813,
"mean_token_accuracy": 0.84231409907341,
"num_input_tokens_seen": 4219808,
"num_tokens": 4219808.0,
"step": 450
},
{
"epoch": 0.8,
"grad_norm": 10.8125,
"learning_rate": 1.4048821221842053e-05,
"loss": 0.5867,
"mean_token_accuracy": 0.8388340866565704,
"num_input_tokens_seen": 4729216,
"num_tokens": 4729216.0,
"step": 500
},
{
"epoch": 0.88,
"grad_norm": 21.5,
"learning_rate": 1.2806935229214456e-05,
"loss": 0.6442,
"mean_token_accuracy": 0.8239680206775666,
"num_input_tokens_seen": 5136224,
"num_tokens": 5136224.0,
"step": 550
},
{
"epoch": 0.96,
"grad_norm": 15.5625,
"learning_rate": 1.1515965588832394e-05,
"loss": 0.6695,
"mean_token_accuracy": 0.8146113741397858,
"num_input_tokens_seen": 5558144,
"num_tokens": 5558144.0,
"step": 600
},
{
"epoch": 1.0384,
"grad_norm": 14.8125,
"learning_rate": 1.0198486921738313e-05,
"loss": 0.5815,
"mean_token_accuracy": 0.838525402545929,
"num_input_tokens_seen": 6015168,
"num_tokens": 6015168.0,
"step": 650
},
{
"epoch": 1.1184,
"grad_norm": 14.625,
"learning_rate": 8.87753740072175e-06,
"loss": 0.4959,
"mean_token_accuracy": 0.8590600204467773,
"num_input_tokens_seen": 6525472,
"num_tokens": 6525472.0,
"step": 700
},
{
"epoch": 1.1984,
"grad_norm": 6.4375,
"learning_rate": 7.5762158918755844e-06,
"loss": 0.209,
"mean_token_accuracy": 0.9405078291893005,
"num_input_tokens_seen": 6961280,
"num_tokens": 6961280.0,
"step": 750
},
{
"epoch": 1.2784,
"grad_norm": 15.4375,
"learning_rate": 6.317278034835077e-06,
"loss": 0.2078,
"mean_token_accuracy": 0.9385969924926758,
"num_input_tokens_seen": 7553248,
"num_tokens": 7553248.0,
"step": 800
},
{
"epoch": 1.3584,
"grad_norm": 6.25,
"learning_rate": 5.122738324867738e-06,
"loss": 0.1648,
"mean_token_accuracy": 0.9483961355686188,
"num_input_tokens_seen": 8028480,
"num_tokens": 8028480.0,
"step": 850
},
{
"epoch": 1.4384000000000001,
"grad_norm": 20.375,
"learning_rate": 4.01348515503035e-06,
"loss": 0.1534,
"mean_token_accuracy": 0.954008765220642,
"num_input_tokens_seen": 8481600,
"num_tokens": 8481600.0,
"step": 900
},
{
"epoch": 1.5184,
"grad_norm": 11.875,
"learning_rate": 3.008915549982461e-06,
"loss": 0.1688,
"mean_token_accuracy": 0.9496164512634278,
"num_input_tokens_seen": 8863616,
"num_tokens": 8863616.0,
"step": 950
},
{
"epoch": 1.5984,
"grad_norm": 9.75,
"learning_rate": 2.126595978706265e-06,
"loss": 0.2746,
"mean_token_accuracy": 0.9160845911502838,
"num_input_tokens_seen": 9401952,
"num_tokens": 9401952.0,
"step": 1000
},
{
"epoch": 1.6784,
"grad_norm": 13.0625,
"learning_rate": 1.3819551773523687e-06,
"loss": 0.2955,
"mean_token_accuracy": 0.9055855929851532,
"num_input_tokens_seen": 9802240,
"num_tokens": 9802240.0,
"step": 1050
},
{
"epoch": 1.7584,
"grad_norm": 10.375,
"learning_rate": 7.880143536839091e-07,
"loss": 0.4093,
"mean_token_accuracy": 0.8785555076599121,
"num_input_tokens_seen": 10295392,
"num_tokens": 10295392.0,
"step": 1100
},
{
"epoch": 1.8384,
"grad_norm": 6.96875,
"learning_rate": 3.5515949091578514e-07,
"loss": 0.403,
"mean_token_accuracy": 0.8825811994075775,
"num_input_tokens_seen": 10749696,
"num_tokens": 10749696.0,
"step": 1150
},
{
"epoch": 1.9184,
"grad_norm": 9.5,
"learning_rate": 9.095973257215118e-08,
"loss": 0.468,
"mean_token_accuracy": 0.8632709145545959,
"num_input_tokens_seen": 11138752,
"num_tokens": 11138752.0,
"step": 1200
},
{
"epoch": 1.9984,
"grad_norm": 5.4375,
"learning_rate": 3.502418662093554e-11,
"loss": 0.4481,
"mean_token_accuracy": 0.8703732848167419,
"num_input_tokens_seen": 11596480,
"num_tokens": 11596480.0,
"step": 1250
},
{
"epoch": 1.9984,
"num_input_tokens_seen": 11596480,
"step": 1250,
"total_flos": 6.737991978870374e+16,
"train_loss": 0.48572284927368164,
"train_runtime": 2793.6323,
"train_samples_per_second": 7.159,
"train_steps_per_second": 0.447,
"train_tokens_per_second": 323.951
}
],
"logging_steps": 50,
"max_steps": 1250,
"num_input_tokens_seen": 11596480,
"num_train_epochs": 2,
"save_steps": 0,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 6.737991978870374e+16,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}