| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.14814814814814814, |
| "eval_steps": 500, |
| "global_step": 150, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "entropy": 1.2568040400743485, |
| "epoch": 0.009876543209876543, |
| "grad_norm": 0.926980197429657, |
| "learning_rate": 0.00012, |
| "loss": 1.9095975875854492, |
| "mean_token_accuracy": 0.5853493079543114, |
| "num_tokens": 23832.0, |
| "step": 10 |
| }, |
| { |
| "entropy": 1.5708743289113045, |
| "epoch": 0.019753086419753086, |
| "grad_norm": 0.46579790115356445, |
| "learning_rate": 0.00019956707906498044, |
| "loss": 1.5156055450439454, |
| "mean_token_accuracy": 0.6387933611869812, |
| "num_tokens": 46858.0, |
| "step": 20 |
| }, |
| { |
| "entropy": 1.5461681112647057, |
| "epoch": 0.02962962962962963, |
| "grad_norm": 0.42597582936286926, |
| "learning_rate": 0.00019473966425143292, |
| "loss": 1.2923666000366212, |
| "mean_token_accuracy": 0.6715767234563828, |
| "num_tokens": 72056.0, |
| "step": 30 |
| }, |
| { |
| "entropy": 1.1817909434437752, |
| "epoch": 0.03950617283950617, |
| "grad_norm": 0.5361228585243225, |
| "learning_rate": 0.0001848048096156426, |
| "loss": 1.0183001518249513, |
| "mean_token_accuracy": 0.7339986249804497, |
| "num_tokens": 93639.0, |
| "step": 40 |
| }, |
| { |
| "entropy": 0.8827707976102829, |
| "epoch": 0.04938271604938271, |
| "grad_norm": 0.49438047409057617, |
| "learning_rate": 0.0001702981057425662, |
| "loss": 0.8319701194763184, |
| "mean_token_accuracy": 0.7700131639838219, |
| "num_tokens": 118401.0, |
| "step": 50 |
| }, |
| { |
| "entropy": 0.748842728883028, |
| "epoch": 0.05925925925925926, |
| "grad_norm": 0.6138308644294739, |
| "learning_rate": 0.00015200161279292155, |
| "loss": 0.7784955024719238, |
| "mean_token_accuracy": 0.7956750705838204, |
| "num_tokens": 139540.0, |
| "step": 60 |
| }, |
| { |
| "entropy": 0.7260295443236828, |
| "epoch": 0.0691358024691358, |
| "grad_norm": 0.6582333445549011, |
| "learning_rate": 0.00013090169943749476, |
| "loss": 0.7784926891326904, |
| "mean_token_accuracy": 0.7997785583138466, |
| "num_tokens": 161194.0, |
| "step": 70 |
| }, |
| { |
| "entropy": 0.5300135292112828, |
| "epoch": 0.07901234567901234, |
| "grad_norm": 0.7576186656951904, |
| "learning_rate": 0.00010813586746678583, |
| "loss": 0.5675814628601075, |
| "mean_token_accuracy": 0.8377173274755478, |
| "num_tokens": 182672.0, |
| "step": 80 |
| }, |
| { |
| "entropy": 0.5887195959687233, |
| "epoch": 0.08888888888888889, |
| "grad_norm": 0.4873444437980652, |
| "learning_rate": 8.49314287750517e-05, |
| "loss": 0.6080675601959229, |
| "mean_token_accuracy": 0.8072204932570457, |
| "num_tokens": 209701.0, |
| "step": 90 |
| }, |
| { |
| "entropy": 0.5359628431499004, |
| "epoch": 0.09876543209876543, |
| "grad_norm": 0.545722484588623, |
| "learning_rate": 6.25393406584088e-05, |
| "loss": 0.5403496742248535, |
| "mean_token_accuracy": 0.834045910090208, |
| "num_tokens": 235023.0, |
| "step": 100 |
| }, |
| { |
| "entropy": 0.5043216332793236, |
| "epoch": 0.10864197530864197, |
| "grad_norm": 0.734391450881958, |
| "learning_rate": 4.216676638320135e-05, |
| "loss": 0.5571066856384277, |
| "mean_token_accuracy": 0.8353787913918496, |
| "num_tokens": 258948.0, |
| "step": 110 |
| }, |
| { |
| "entropy": 0.7284694246947765, |
| "epoch": 0.11851851851851852, |
| "grad_norm": 0.5130884647369385, |
| "learning_rate": 2.491199670185008e-05, |
| "loss": 0.6980243682861328, |
| "mean_token_accuracy": 0.7769531480967998, |
| "num_tokens": 286219.0, |
| "step": 120 |
| }, |
| { |
| "entropy": 0.6834345638751984, |
| "epoch": 0.12839506172839507, |
| "grad_norm": 0.4917851388454437, |
| "learning_rate": 1.1705240714107302e-05, |
| "loss": 0.6982964515686035, |
| "mean_token_accuracy": 0.7917674139142037, |
| "num_tokens": 309582.0, |
| "step": 130 |
| }, |
| { |
| "entropy": 0.6758594036102294, |
| "epoch": 0.1382716049382716, |
| "grad_norm": 1.1197789907455444, |
| "learning_rate": 3.2584780537136207e-06, |
| "loss": 0.6473836898803711, |
| "mean_token_accuracy": 0.7972797065973282, |
| "num_tokens": 332439.0, |
| "step": 140 |
| }, |
| { |
| "entropy": 0.5859654754400253, |
| "epoch": 0.14814814814814814, |
| "grad_norm": 0.7272841334342957, |
| "learning_rate": 2.7075882053828605e-08, |
| "loss": 0.6555652618408203, |
| "mean_token_accuracy": 0.8194831728935241, |
| "num_tokens": 355196.0, |
| "step": 150 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 150, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 30, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 7756223621246976.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|