| { |
| "best_global_step": 120, |
| "best_metric": 1.1266472339630127, |
| "best_model_checkpoint": "miner_id_24/checkpoint-120", |
| "epoch": 0.9876543209876543, |
| "eval_steps": 20, |
| "global_step": 120, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.00823045267489712, |
| "eval_loss": 1.56075918674469, |
| "eval_runtime": 276.42, |
| "eval_samples_per_second": 14.308, |
| "eval_steps_per_second": 0.897, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0411522633744856, |
| "grad_norm": 0.12783201038837433, |
| "learning_rate": 5.333333333333334e-06, |
| "loss": 1.5382, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.0823045267489712, |
| "grad_norm": 0.1401992291212082, |
| "learning_rate": 1.2e-05, |
| "loss": 1.5389, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.12345679012345678, |
| "grad_norm": 0.16071239113807678, |
| "learning_rate": 1.866666666666667e-05, |
| "loss": 1.5253, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.1646090534979424, |
| "grad_norm": 0.20610080659389496, |
| "learning_rate": 2.5333333333333337e-05, |
| "loss": 1.5192, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.1646090534979424, |
| "eval_loss": 1.5330003499984741, |
| "eval_runtime": 275.787, |
| "eval_samples_per_second": 14.341, |
| "eval_steps_per_second": 0.899, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.205761316872428, |
| "grad_norm": 0.24521860480308533, |
| "learning_rate": 3.2000000000000005e-05, |
| "loss": 1.4731, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.24691358024691357, |
| "grad_norm": 0.20491117238998413, |
| "learning_rate": 3.866666666666667e-05, |
| "loss": 1.4531, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.2880658436213992, |
| "grad_norm": 0.1347217708826065, |
| "learning_rate": 4.5333333333333335e-05, |
| "loss": 1.39, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.3292181069958848, |
| "grad_norm": 0.09733594208955765, |
| "learning_rate": 5.2000000000000004e-05, |
| "loss": 1.3194, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.3292181069958848, |
| "eval_loss": 1.3070895671844482, |
| "eval_runtime": 274.1888, |
| "eval_samples_per_second": 14.424, |
| "eval_steps_per_second": 0.904, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.37037037037037035, |
| "grad_norm": 0.09786524623632431, |
| "learning_rate": 5.866666666666667e-05, |
| "loss": 1.2866, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.411522633744856, |
| "grad_norm": 0.0775938630104065, |
| "learning_rate": 6.533333333333334e-05, |
| "loss": 1.2626, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.45267489711934156, |
| "grad_norm": 0.07579416036605835, |
| "learning_rate": 7.2e-05, |
| "loss": 1.2564, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.49382716049382713, |
| "grad_norm": 0.06526237726211548, |
| "learning_rate": 7.866666666666666e-05, |
| "loss": 1.2392, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.49382716049382713, |
| "eval_loss": 1.2206859588623047, |
| "eval_runtime": 273.7144, |
| "eval_samples_per_second": 14.449, |
| "eval_steps_per_second": 0.906, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.5349794238683128, |
| "grad_norm": 0.06369958817958832, |
| "learning_rate": 8.533333333333334e-05, |
| "loss": 1.2078, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.5761316872427984, |
| "grad_norm": 0.06731989234685898, |
| "learning_rate": 9.200000000000001e-05, |
| "loss": 1.2159, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.6172839506172839, |
| "grad_norm": 0.06425522267818451, |
| "learning_rate": 9.866666666666668e-05, |
| "loss": 1.2011, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.6584362139917695, |
| "grad_norm": 0.06875820457935333, |
| "learning_rate": 0.00010533333333333332, |
| "loss": 1.177, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.6584362139917695, |
| "eval_loss": 1.1793321371078491, |
| "eval_runtime": 273.3191, |
| "eval_samples_per_second": 14.47, |
| "eval_steps_per_second": 0.907, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.6995884773662552, |
| "grad_norm": 0.06558868288993835, |
| "learning_rate": 0.00011200000000000001, |
| "loss": 1.1719, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.7407407407407407, |
| "grad_norm": 0.06846272945404053, |
| "learning_rate": 0.00011866666666666669, |
| "loss": 1.1555, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.7818930041152263, |
| "grad_norm": 0.07038144767284393, |
| "learning_rate": 0.00012533333333333334, |
| "loss": 1.1683, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.823045267489712, |
| "grad_norm": 0.07254977524280548, |
| "learning_rate": 0.000132, |
| "loss": 1.1592, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.823045267489712, |
| "eval_loss": 1.148273229598999, |
| "eval_runtime": 274.3966, |
| "eval_samples_per_second": 14.413, |
| "eval_steps_per_second": 0.904, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.8641975308641975, |
| "grad_norm": 0.07729992270469666, |
| "learning_rate": 0.00013866666666666669, |
| "loss": 1.1482, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.9053497942386831, |
| "grad_norm": 0.08038458973169327, |
| "learning_rate": 0.00014533333333333333, |
| "loss": 1.1458, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.9465020576131687, |
| "grad_norm": 0.08932825922966003, |
| "learning_rate": 0.000152, |
| "loss": 1.1384, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.9876543209876543, |
| "grad_norm": 0.08660374581813812, |
| "learning_rate": 0.00015866666666666668, |
| "loss": 1.129, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.9876543209876543, |
| "eval_loss": 1.1266472339630127, |
| "eval_runtime": 272.2952, |
| "eval_samples_per_second": 14.525, |
| "eval_steps_per_second": 0.911, |
| "step": 120 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 121, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 40, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 4, |
| "early_stopping_threshold": 0.0 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 0 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.5093043373987594e+18, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|