| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.9820971867007673, |
| "eval_steps": 500, |
| "global_step": 64, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.030690537084398978, |
| "grad_norm": 0.13874784250562586, |
| "learning_rate": 1.4285714285714286e-06, |
| "loss": 0.5217, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.1534526854219949, |
| "grad_norm": 0.08867673869818674, |
| "learning_rate": 7.1428571428571436e-06, |
| "loss": 0.5081, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.3069053708439898, |
| "grad_norm": 0.1708878126934411, |
| "learning_rate": 9.931806517013612e-06, |
| "loss": 0.4752, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.46035805626598464, |
| "grad_norm": 0.0963904508895781, |
| "learning_rate": 9.521785803487888e-06, |
| "loss": 0.4432, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.6138107416879796, |
| "grad_norm": 0.06982228815595948, |
| "learning_rate": 8.770533048884483e-06, |
| "loss": 0.4573, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.7672634271099744, |
| "grad_norm": 0.07015950515433515, |
| "learning_rate": 7.734740790612137e-06, |
| "loss": 0.4213, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.9207161125319693, |
| "grad_norm": 0.054896941467642804, |
| "learning_rate": 6.492574055008474e-06, |
| "loss": 0.4237, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.9820971867007673, |
| "eval_loss": 0.4264456629753113, |
| "eval_runtime": 28.9203, |
| "eval_samples_per_second": 18.845, |
| "eval_steps_per_second": 4.737, |
| "step": 32 |
| }, |
| { |
| "epoch": 1.092071611253197, |
| "grad_norm": 0.06382244755020999, |
| "learning_rate": 5.137771711840811e-06, |
| "loss": 0.4519, |
| "step": 35 |
| }, |
| { |
| "epoch": 1.2455242966751918, |
| "grad_norm": 0.056628063212756805, |
| "learning_rate": 3.7725725642960047e-06, |
| "loss": 0.3904, |
| "step": 40 |
| }, |
| { |
| "epoch": 1.3989769820971867, |
| "grad_norm": 0.0505084664996252, |
| "learning_rate": 2.5000000000000015e-06, |
| "loss": 0.3524, |
| "step": 45 |
| }, |
| { |
| "epoch": 1.5524296675191815, |
| "grad_norm": 0.05364523140110051, |
| "learning_rate": 1.4160874341577447e-06, |
| "loss": 0.3426, |
| "step": 50 |
| }, |
| { |
| "epoch": 1.7058823529411766, |
| "grad_norm": 0.054002137212089386, |
| "learning_rate": 6.026312439675553e-07, |
| "loss": 0.3682, |
| "step": 55 |
| }, |
| { |
| "epoch": 1.8593350383631715, |
| "grad_norm": 0.052807283780708696, |
| "learning_rate": 1.210180868628219e-07, |
| "loss": 0.3482, |
| "step": 60 |
| }, |
| { |
| "epoch": 1.9820971867007673, |
| "eval_loss": 0.4236310124397278, |
| "eval_runtime": 28.6054, |
| "eval_samples_per_second": 19.052, |
| "eval_steps_per_second": 4.789, |
| "step": 64 |
| }, |
| { |
| "epoch": 1.9820971867007673, |
| "step": 64, |
| "total_flos": 1.6374020737440154e+17, |
| "train_loss": 0.41061520762741566, |
| "train_runtime": 932.5034, |
| "train_samples_per_second": 3.354, |
| "train_steps_per_second": 0.069 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 64, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.6374020737440154e+17, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|