| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 1, |
| "global_step": 17, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.058823529411764705, |
| "grad_norm": 27.23904800415039, |
| "learning_rate": 1e-07, |
| "loss": 2.4516, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.11764705882352941, |
| "grad_norm": 27.284263610839844, |
| "learning_rate": 9.91486549841951e-08, |
| "loss": 2.4052, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.17647058823529413, |
| "grad_norm": 26.715576171875, |
| "learning_rate": 9.662361147021778e-08, |
| "loss": 2.4273, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.23529411764705882, |
| "grad_norm": 27.109235763549805, |
| "learning_rate": 9.25108567864807e-08, |
| "loss": 2.372, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.29411764705882354, |
| "grad_norm": 22.088886260986328, |
| "learning_rate": 8.695044586103295e-08, |
| "loss": 2.0321, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.35294117647058826, |
| "grad_norm": 29.467775344848633, |
| "learning_rate": 8.013173181896283e-08, |
| "loss": 2.5105, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.4117647058823529, |
| "grad_norm": 25.27092742919922, |
| "learning_rate": 7.228691778882692e-08, |
| "loss": 2.2482, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.47058823529411764, |
| "grad_norm": 28.22859001159668, |
| "learning_rate": 6.368314950360415e-08, |
| "loss": 2.4815, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.47058823529411764, |
| "eval_loss": 2.301804542541504, |
| "eval_runtime": 3.8155, |
| "eval_samples_per_second": 0.786, |
| "eval_steps_per_second": 0.262, |
| "step": 8 |
| }, |
| { |
| "checkpoint_runtime": 66.2871 |
| }, |
| { |
| "epoch": 0.5294117647058824, |
| "grad_norm": 32.29556655883789, |
| "learning_rate": 5.46134179731651e-08, |
| "loss": 2.1465, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.5294117647058824, |
| "eval_loss": 2.299485921859741, |
| "eval_runtime": 3.8091, |
| "eval_samples_per_second": 0.788, |
| "eval_steps_per_second": 0.263, |
| "step": 9 |
| }, |
| { |
| "checkpoint_runtime": 70.0087 |
| }, |
| { |
| "epoch": 0.5882352941176471, |
| "grad_norm": 28.886428833007812, |
| "learning_rate": 4.5386582026834904e-08, |
| "loss": 2.4141, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.5882352941176471, |
| "eval_loss": 2.2999422550201416, |
| "eval_runtime": 3.8063, |
| "eval_samples_per_second": 0.788, |
| "eval_steps_per_second": 0.263, |
| "step": 10 |
| }, |
| { |
| "checkpoint_runtime": 67.5608 |
| }, |
| { |
| "epoch": 0.6470588235294118, |
| "grad_norm": 26.31535530090332, |
| "learning_rate": 3.6316850496395855e-08, |
| "loss": 2.3997, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.6470588235294118, |
| "eval_loss": 2.2996702194213867, |
| "eval_runtime": 3.7927, |
| "eval_samples_per_second": 0.791, |
| "eval_steps_per_second": 0.264, |
| "step": 11 |
| }, |
| { |
| "checkpoint_runtime": 72.5546 |
| }, |
| { |
| "epoch": 0.7058823529411765, |
| "grad_norm": 26.56271743774414, |
| "learning_rate": 2.771308221117309e-08, |
| "loss": 2.3832, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.7058823529411765, |
| "eval_loss": 2.2992560863494873, |
| "eval_runtime": 3.8194, |
| "eval_samples_per_second": 0.785, |
| "eval_steps_per_second": 0.262, |
| "step": 12 |
| }, |
| { |
| "checkpoint_runtime": 74.4665 |
| }, |
| { |
| "epoch": 0.7647058823529411, |
| "grad_norm": 30.143421173095703, |
| "learning_rate": 1.9868268181037185e-08, |
| "loss": 2.5278, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.7647058823529411, |
| "eval_loss": 2.2993052005767822, |
| "eval_runtime": 3.8917, |
| "eval_samples_per_second": 0.771, |
| "eval_steps_per_second": 0.257, |
| "step": 13 |
| }, |
| { |
| "checkpoint_runtime": 74.9732 |
| }, |
| { |
| "epoch": 0.8235294117647058, |
| "grad_norm": 27.023157119750977, |
| "learning_rate": 1.304955413896705e-08, |
| "loss": 2.346, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.8235294117647058, |
| "eval_loss": 2.2990870475769043, |
| "eval_runtime": 4.6942, |
| "eval_samples_per_second": 0.639, |
| "eval_steps_per_second": 0.213, |
| "step": 14 |
| }, |
| { |
| "checkpoint_runtime": 80.5035 |
| }, |
| { |
| "epoch": 0.8823529411764706, |
| "grad_norm": 26.858203887939453, |
| "learning_rate": 7.4891432135193e-09, |
| "loss": 2.3549, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.8823529411764706, |
| "eval_loss": 2.2999765872955322, |
| "eval_runtime": 6.1015, |
| "eval_samples_per_second": 0.492, |
| "eval_steps_per_second": 0.164, |
| "step": 15 |
| }, |
| { |
| "checkpoint_runtime": 77.296 |
| }, |
| { |
| "epoch": 0.9411764705882353, |
| "grad_norm": 28.46251678466797, |
| "learning_rate": 3.376388529782215e-09, |
| "loss": 2.428, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.9411764705882353, |
| "eval_loss": 2.2982423305511475, |
| "eval_runtime": 5.4136, |
| "eval_samples_per_second": 0.554, |
| "eval_steps_per_second": 0.185, |
| "step": 16 |
| }, |
| { |
| "checkpoint_runtime": 86.2539 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 29.63337516784668, |
| "learning_rate": 8.513450158049107e-10, |
| "loss": 2.4294, |
| "step": 17 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_loss": 2.29897141456604, |
| "eval_runtime": 5.6926, |
| "eval_samples_per_second": 0.527, |
| "eval_steps_per_second": 0.176, |
| "step": 17 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 17, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 1, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.0006846020937318e+17, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|