| { |
| "best_metric": 0.029243575409054756, |
| "best_model_checkpoint": "saves/chess/no_explain/checkpoint-4000", |
| "epoch": 3.202643171806167, |
| "eval_steps": 1000, |
| "global_step": 4000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.08009611533840609, |
| "grad_norm": 0.8625897724596373, |
| "learning_rate": 4.006410256410257e-07, |
| "loss": 1.3897, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.16019223067681218, |
| "grad_norm": 0.8895947937892531, |
| "learning_rate": 8.012820512820515e-07, |
| "loss": 0.0598, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.24028834601521826, |
| "grad_norm": 0.5221246844134636, |
| "learning_rate": 1.201923076923077e-06, |
| "loss": 0.0551, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.32038446135362436, |
| "grad_norm": 0.5590357289952654, |
| "learning_rate": 1.602564102564103e-06, |
| "loss": 0.0516, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.4004805766920304, |
| "grad_norm": 0.36991974174438536, |
| "learning_rate": 2.0032051282051286e-06, |
| "loss": 0.0501, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.4805766920304365, |
| "grad_norm": 0.6389443947236714, |
| "learning_rate": 2.403846153846154e-06, |
| "loss": 0.0486, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.5606728073688426, |
| "grad_norm": 0.44563280571067243, |
| "learning_rate": 2.8044871794871797e-06, |
| "loss": 0.0463, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.6407689227072487, |
| "grad_norm": 0.44266380357676305, |
| "learning_rate": 3.205128205128206e-06, |
| "loss": 0.0447, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.7208650380456548, |
| "grad_norm": 0.585654631503778, |
| "learning_rate": 3.605769230769231e-06, |
| "loss": 0.0441, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.8009611533840608, |
| "grad_norm": 0.600751877456253, |
| "learning_rate": 4.006410256410257e-06, |
| "loss": 0.0429, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.8009611533840608, |
| "eval_loss": 0.042210426181554794, |
| "eval_runtime": 97.133, |
| "eval_samples_per_second": 1462.17, |
| "eval_steps_per_second": 2.862, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.8810572687224669, |
| "grad_norm": 0.2641551118831142, |
| "learning_rate": 4.4070512820512826e-06, |
| "loss": 0.0414, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.961153384060873, |
| "grad_norm": 0.29049561928975876, |
| "learning_rate": 4.807692307692308e-06, |
| "loss": 0.0402, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.0408490188225872, |
| "grad_norm": 0.5344113116420023, |
| "learning_rate": 4.999735579817769e-06, |
| "loss": 0.0386, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.1209451341609933, |
| "grad_norm": 0.31257482202449377, |
| "learning_rate": 4.997740994288484e-06, |
| "loss": 0.0373, |
| "step": 1400 |
| }, |
| { |
| "epoch": 1.2010412494993994, |
| "grad_norm": 0.4593106982622164, |
| "learning_rate": 4.993792498360407e-06, |
| "loss": 0.0366, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.2811373648378055, |
| "grad_norm": 0.2012883704449717, |
| "learning_rate": 4.9878931808274796e-06, |
| "loss": 0.0357, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.3612334801762114, |
| "grad_norm": 0.22908626001592647, |
| "learning_rate": 4.980047656554856e-06, |
| "loss": 0.0352, |
| "step": 1700 |
| }, |
| { |
| "epoch": 1.4413295955146175, |
| "grad_norm": 0.3169879320183415, |
| "learning_rate": 4.970262062868821e-06, |
| "loss": 0.0346, |
| "step": 1800 |
| }, |
| { |
| "epoch": 1.5214257108530236, |
| "grad_norm": 0.2078878255601618, |
| "learning_rate": 4.958544054755741e-06, |
| "loss": 0.0336, |
| "step": 1900 |
| }, |
| { |
| "epoch": 1.6015218261914297, |
| "grad_norm": 0.2978110993331312, |
| "learning_rate": 4.944902798873794e-06, |
| "loss": 0.0329, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.6015218261914297, |
| "eval_loss": 0.03361953794956207, |
| "eval_runtime": 97.2876, |
| "eval_samples_per_second": 1459.847, |
| "eval_steps_per_second": 2.858, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.6816179415298358, |
| "grad_norm": 0.16678424956102253, |
| "learning_rate": 4.92934896638215e-06, |
| "loss": 0.0328, |
| "step": 2100 |
| }, |
| { |
| "epoch": 1.761714056868242, |
| "grad_norm": 0.19029664571581045, |
| "learning_rate": 4.91189472459324e-06, |
| "loss": 0.0316, |
| "step": 2200 |
| }, |
| { |
| "epoch": 1.841810172206648, |
| "grad_norm": 0.2388908631462674, |
| "learning_rate": 4.892553727454616e-06, |
| "loss": 0.0317, |
| "step": 2300 |
| }, |
| { |
| "epoch": 1.921906287545054, |
| "grad_norm": 0.15794270702360638, |
| "learning_rate": 4.8713411048678635e-06, |
| "loss": 0.0309, |
| "step": 2400 |
| }, |
| { |
| "epoch": 2.0016019223067683, |
| "grad_norm": 0.2103115075663395, |
| "learning_rate": 4.848273450852921e-06, |
| "loss": 0.0305, |
| "step": 2500 |
| }, |
| { |
| "epoch": 2.0816980376451744, |
| "grad_norm": 0.28601246983481904, |
| "learning_rate": 4.823368810567056e-06, |
| "loss": 0.0268, |
| "step": 2600 |
| }, |
| { |
| "epoch": 2.1617941529835805, |
| "grad_norm": 0.25522616878445004, |
| "learning_rate": 4.796646666188663e-06, |
| "loss": 0.0268, |
| "step": 2700 |
| }, |
| { |
| "epoch": 2.2418902683219866, |
| "grad_norm": 0.2343538332348778, |
| "learning_rate": 4.768127921676916e-06, |
| "loss": 0.0272, |
| "step": 2800 |
| }, |
| { |
| "epoch": 2.3219863836603922, |
| "grad_norm": 0.22903658893889398, |
| "learning_rate": 4.737834886419217e-06, |
| "loss": 0.0297, |
| "step": 2900 |
| }, |
| { |
| "epoch": 2.4020824989987988, |
| "grad_norm": 0.19855668130980528, |
| "learning_rate": 4.705791257779196e-06, |
| "loss": 0.0275, |
| "step": 3000 |
| }, |
| { |
| "epoch": 2.4020824989987988, |
| "eval_loss": 0.029653793200850487, |
| "eval_runtime": 97.2179, |
| "eval_samples_per_second": 1460.893, |
| "eval_steps_per_second": 2.86, |
| "step": 3000 |
| }, |
| { |
| "epoch": 2.4821786143372044, |
| "grad_norm": 0.1868527106405498, |
| "learning_rate": 4.672022102558958e-06, |
| "loss": 0.0269, |
| "step": 3100 |
| }, |
| { |
| "epoch": 2.562274729675611, |
| "grad_norm": 0.1985255713449175, |
| "learning_rate": 4.636553837390051e-06, |
| "loss": 0.0269, |
| "step": 3200 |
| }, |
| { |
| "epoch": 2.6423708450140166, |
| "grad_norm": 0.17528235376425527, |
| "learning_rate": 4.5994142080684956e-06, |
| "loss": 0.026, |
| "step": 3300 |
| }, |
| { |
| "epoch": 2.7224669603524227, |
| "grad_norm": 0.20238382028782428, |
| "learning_rate": 4.560632267850054e-06, |
| "loss": 0.026, |
| "step": 3400 |
| }, |
| { |
| "epoch": 2.802563075690829, |
| "grad_norm": 0.20789525240306345, |
| "learning_rate": 4.5202383547227134e-06, |
| "loss": 0.0257, |
| "step": 3500 |
| }, |
| { |
| "epoch": 2.882659191029235, |
| "grad_norm": 0.2849074845845128, |
| "learning_rate": 4.478264067674155e-06, |
| "loss": 0.0256, |
| "step": 3600 |
| }, |
| { |
| "epoch": 2.962755306367641, |
| "grad_norm": 0.1826392119567578, |
| "learning_rate": 4.43474224197278e-06, |
| "loss": 0.0255, |
| "step": 3700 |
| }, |
| { |
| "epoch": 3.0424509411293554, |
| "grad_norm": 0.3254043272458406, |
| "learning_rate": 4.389706923481633e-06, |
| "loss": 0.0224, |
| "step": 3800 |
| }, |
| { |
| "epoch": 3.122547056467761, |
| "grad_norm": 0.2695456046362865, |
| "learning_rate": 4.34319334202531e-06, |
| "loss": 0.0198, |
| "step": 3900 |
| }, |
| { |
| "epoch": 3.202643171806167, |
| "grad_norm": 0.24345073976828904, |
| "learning_rate": 4.2952378838306855e-06, |
| "loss": 0.0202, |
| "step": 4000 |
| }, |
| { |
| "epoch": 3.202643171806167, |
| "eval_loss": 0.029243575409054756, |
| "eval_runtime": 97.6159, |
| "eval_samples_per_second": 1454.937, |
| "eval_steps_per_second": 2.848, |
| "step": 4000 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 12480, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 10, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 892260770119680.0, |
| "train_batch_size": 64, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|