| { | |
| "best_metric": 0.017106017097830772, | |
| "best_model_checkpoint": "saves/chess/tactic/checkpoint-1000", | |
| "epoch": 5.0, | |
| "eval_steps": 1000, | |
| "global_step": 3075, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.16260162601626016, | |
| "grad_norm": 4.799216229524151, | |
| "learning_rate": 1.6233766233766235e-06, | |
| "loss": 0.8994, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.3252032520325203, | |
| "grad_norm": 0.7153367237536395, | |
| "learning_rate": 3.246753246753247e-06, | |
| "loss": 0.0317, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.4878048780487805, | |
| "grad_norm": 0.7387556653345047, | |
| "learning_rate": 4.870129870129871e-06, | |
| "loss": 0.0245, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.6504065040650406, | |
| "grad_norm": 1.9740058439148331, | |
| "learning_rate": 4.986373880811079e-06, | |
| "loss": 0.0222, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.8130081300813008, | |
| "grad_norm": 0.6344698055959135, | |
| "learning_rate": 4.940833840455932e-06, | |
| "loss": 0.0208, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.975609756097561, | |
| "grad_norm": 0.6970636016265686, | |
| "learning_rate": 4.863863172170709e-06, | |
| "loss": 0.02, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.1382113821138211, | |
| "grad_norm": 0.6801856070533878, | |
| "learning_rate": 4.756453027584134e-06, | |
| "loss": 0.0171, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.3008130081300813, | |
| "grad_norm": 0.275933332771696, | |
| "learning_rate": 4.619986527593033e-06, | |
| "loss": 0.0173, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.4634146341463414, | |
| "grad_norm": 0.5872805765471233, | |
| "learning_rate": 4.4562209519085615e-06, | |
| "loss": 0.0174, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.6260162601626016, | |
| "grad_norm": 0.3927107277822733, | |
| "learning_rate": 4.26726511055776e-06, | |
| "loss": 0.0169, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.6260162601626016, | |
| "eval_loss": 0.017106017097830772, | |
| "eval_runtime": 190.9398, | |
| "eval_samples_per_second": 183.105, | |
| "eval_steps_per_second": 0.718, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.7886178861788617, | |
| "grad_norm": 0.33192992931188736, | |
| "learning_rate": 4.055552188727706e-06, | |
| "loss": 0.0159, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.951219512195122, | |
| "grad_norm": 0.4102076449010124, | |
| "learning_rate": 3.823808414629323e-06, | |
| "loss": 0.016, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 2.113821138211382, | |
| "grad_norm": 0.4366312248115214, | |
| "learning_rate": 3.575017953844908e-06, | |
| "loss": 0.0126, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 2.2764227642276422, | |
| "grad_norm": 0.38777672675426217, | |
| "learning_rate": 3.3123844822150126e-06, | |
| "loss": 0.0119, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 2.4390243902439024, | |
| "grad_norm": 0.573942741447281, | |
| "learning_rate": 3.0392899320907716e-06, | |
| "loss": 0.0118, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 2.6016260162601625, | |
| "grad_norm": 0.4798738867766201, | |
| "learning_rate": 2.759250943176377e-06, | |
| "loss": 0.0117, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 2.7642276422764227, | |
| "grad_norm": 0.2747941215742872, | |
| "learning_rate": 2.4758735787443878e-06, | |
| "loss": 0.0116, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 2.926829268292683, | |
| "grad_norm": 0.39700972538910706, | |
| "learning_rate": 2.192806890343352e-06, | |
| "loss": 0.0111, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 3.089430894308943, | |
| "grad_norm": 0.45186864274833893, | |
| "learning_rate": 1.9136959289452223e-06, | |
| "loss": 0.0077, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 3.252032520325203, | |
| "grad_norm": 0.6795786745533399, | |
| "learning_rate": 1.6421348076082123e-06, | |
| "loss": 0.0049, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 3.252032520325203, | |
| "eval_loss": 0.023808766156435013, | |
| "eval_runtime": 191.1442, | |
| "eval_samples_per_second": 182.909, | |
| "eval_steps_per_second": 0.717, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 3.4146341463414633, | |
| "grad_norm": 0.6090255517872498, | |
| "learning_rate": 1.3816204200673827e-06, | |
| "loss": 0.0052, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 3.5772357723577235, | |
| "grad_norm": 0.7553892788821762, | |
| "learning_rate": 1.1355074112188802e-06, | |
| "loss": 0.0048, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 3.7398373983739837, | |
| "grad_norm": 0.33557205962825315, | |
| "learning_rate": 9.069649793430869e-07, | |
| "loss": 0.0046, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 3.902439024390244, | |
| "grad_norm": 0.39121616514997803, | |
| "learning_rate": 6.989360663246406e-07, | |
| "loss": 0.0043, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 4.065040650406504, | |
| "grad_norm": 0.37417208848590455, | |
| "learning_rate": 5.1409946137705e-07, | |
| "loss": 0.003, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 4.227642276422764, | |
| "grad_norm": 0.3450534162422346, | |
| "learning_rate": 3.548353062623949e-07, | |
| "loss": 0.0007, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 4.390243902439025, | |
| "grad_norm": 0.055395596176947635, | |
| "learning_rate": 2.231944461955507e-07, | |
| "loss": 0.0007, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 4.5528455284552845, | |
| "grad_norm": 0.7534781076862362, | |
| "learning_rate": 1.2087202110147994e-07, | |
| "loss": 0.0007, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 4.715447154471545, | |
| "grad_norm": 0.6312940968696128, | |
| "learning_rate": 4.9185637291078724e-08, | |
| "loss": 0.0006, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 4.878048780487805, | |
| "grad_norm": 0.6118892403786919, | |
| "learning_rate": 9.058400639009313e-09, | |
| "loss": 0.0005, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 4.878048780487805, | |
| "eval_loss": 0.03251836076378822, | |
| "eval_runtime": 191.2785, | |
| "eval_samples_per_second": 182.781, | |
| "eval_steps_per_second": 0.716, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "step": 3075, | |
| "total_flos": 502860412354560.0, | |
| "train_loss": 0.03991650681670119, | |
| "train_runtime": 33327.5964, | |
| "train_samples_per_second": 47.207, | |
| "train_steps_per_second": 0.092 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 3075, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 1000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 502860412354560.0, | |
| "train_batch_size": 64, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |