FormlessAI's picture
Training in progress, step 120, checkpoint
37ad9d6 verified
{
"best_global_step": 120,
"best_metric": 1.1266472339630127,
"best_model_checkpoint": "miner_id_24/checkpoint-120",
"epoch": 0.9876543209876543,
"eval_steps": 20,
"global_step": 120,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.00823045267489712,
"eval_loss": 1.56075918674469,
"eval_runtime": 276.42,
"eval_samples_per_second": 14.308,
"eval_steps_per_second": 0.897,
"step": 1
},
{
"epoch": 0.0411522633744856,
"grad_norm": 0.12783201038837433,
"learning_rate": 5.333333333333334e-06,
"loss": 1.5382,
"step": 5
},
{
"epoch": 0.0823045267489712,
"grad_norm": 0.1401992291212082,
"learning_rate": 1.2e-05,
"loss": 1.5389,
"step": 10
},
{
"epoch": 0.12345679012345678,
"grad_norm": 0.16071239113807678,
"learning_rate": 1.866666666666667e-05,
"loss": 1.5253,
"step": 15
},
{
"epoch": 0.1646090534979424,
"grad_norm": 0.20610080659389496,
"learning_rate": 2.5333333333333337e-05,
"loss": 1.5192,
"step": 20
},
{
"epoch": 0.1646090534979424,
"eval_loss": 1.5330003499984741,
"eval_runtime": 275.787,
"eval_samples_per_second": 14.341,
"eval_steps_per_second": 0.899,
"step": 20
},
{
"epoch": 0.205761316872428,
"grad_norm": 0.24521860480308533,
"learning_rate": 3.2000000000000005e-05,
"loss": 1.4731,
"step": 25
},
{
"epoch": 0.24691358024691357,
"grad_norm": 0.20491117238998413,
"learning_rate": 3.866666666666667e-05,
"loss": 1.4531,
"step": 30
},
{
"epoch": 0.2880658436213992,
"grad_norm": 0.1347217708826065,
"learning_rate": 4.5333333333333335e-05,
"loss": 1.39,
"step": 35
},
{
"epoch": 0.3292181069958848,
"grad_norm": 0.09733594208955765,
"learning_rate": 5.2000000000000004e-05,
"loss": 1.3194,
"step": 40
},
{
"epoch": 0.3292181069958848,
"eval_loss": 1.3070895671844482,
"eval_runtime": 274.1888,
"eval_samples_per_second": 14.424,
"eval_steps_per_second": 0.904,
"step": 40
},
{
"epoch": 0.37037037037037035,
"grad_norm": 0.09786524623632431,
"learning_rate": 5.866666666666667e-05,
"loss": 1.2866,
"step": 45
},
{
"epoch": 0.411522633744856,
"grad_norm": 0.0775938630104065,
"learning_rate": 6.533333333333334e-05,
"loss": 1.2626,
"step": 50
},
{
"epoch": 0.45267489711934156,
"grad_norm": 0.07579416036605835,
"learning_rate": 7.2e-05,
"loss": 1.2564,
"step": 55
},
{
"epoch": 0.49382716049382713,
"grad_norm": 0.06526237726211548,
"learning_rate": 7.866666666666666e-05,
"loss": 1.2392,
"step": 60
},
{
"epoch": 0.49382716049382713,
"eval_loss": 1.2206859588623047,
"eval_runtime": 273.7144,
"eval_samples_per_second": 14.449,
"eval_steps_per_second": 0.906,
"step": 60
},
{
"epoch": 0.5349794238683128,
"grad_norm": 0.06369958817958832,
"learning_rate": 8.533333333333334e-05,
"loss": 1.2078,
"step": 65
},
{
"epoch": 0.5761316872427984,
"grad_norm": 0.06731989234685898,
"learning_rate": 9.200000000000001e-05,
"loss": 1.2159,
"step": 70
},
{
"epoch": 0.6172839506172839,
"grad_norm": 0.06425522267818451,
"learning_rate": 9.866666666666668e-05,
"loss": 1.2011,
"step": 75
},
{
"epoch": 0.6584362139917695,
"grad_norm": 0.06875820457935333,
"learning_rate": 0.00010533333333333332,
"loss": 1.177,
"step": 80
},
{
"epoch": 0.6584362139917695,
"eval_loss": 1.1793321371078491,
"eval_runtime": 273.3191,
"eval_samples_per_second": 14.47,
"eval_steps_per_second": 0.907,
"step": 80
},
{
"epoch": 0.6995884773662552,
"grad_norm": 0.06558868288993835,
"learning_rate": 0.00011200000000000001,
"loss": 1.1719,
"step": 85
},
{
"epoch": 0.7407407407407407,
"grad_norm": 0.06846272945404053,
"learning_rate": 0.00011866666666666669,
"loss": 1.1555,
"step": 90
},
{
"epoch": 0.7818930041152263,
"grad_norm": 0.07038144767284393,
"learning_rate": 0.00012533333333333334,
"loss": 1.1683,
"step": 95
},
{
"epoch": 0.823045267489712,
"grad_norm": 0.07254977524280548,
"learning_rate": 0.000132,
"loss": 1.1592,
"step": 100
},
{
"epoch": 0.823045267489712,
"eval_loss": 1.148273229598999,
"eval_runtime": 274.3966,
"eval_samples_per_second": 14.413,
"eval_steps_per_second": 0.904,
"step": 100
},
{
"epoch": 0.8641975308641975,
"grad_norm": 0.07729992270469666,
"learning_rate": 0.00013866666666666669,
"loss": 1.1482,
"step": 105
},
{
"epoch": 0.9053497942386831,
"grad_norm": 0.08038458973169327,
"learning_rate": 0.00014533333333333333,
"loss": 1.1458,
"step": 110
},
{
"epoch": 0.9465020576131687,
"grad_norm": 0.08932825922966003,
"learning_rate": 0.000152,
"loss": 1.1384,
"step": 115
},
{
"epoch": 0.9876543209876543,
"grad_norm": 0.08660374581813812,
"learning_rate": 0.00015866666666666668,
"loss": 1.129,
"step": 120
},
{
"epoch": 0.9876543209876543,
"eval_loss": 1.1266472339630127,
"eval_runtime": 272.2952,
"eval_samples_per_second": 14.525,
"eval_steps_per_second": 0.911,
"step": 120
}
],
"logging_steps": 5,
"max_steps": 121,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 40,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 4,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 2.5093043373987594e+18,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}