| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.0, |
| "global_step": 22128, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.05, |
| "learning_rate": 4.887020968908171e-05, |
| "loss": 3.0359, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 4.7740419378163416e-05, |
| "loss": 2.319, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 4.661062906724512e-05, |
| "loss": 2.1802, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 4.548083875632683e-05, |
| "loss": 2.0314, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 4.4351048445408536e-05, |
| "loss": 1.9451, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 4.322125813449024e-05, |
| "loss": 1.8929, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 4.209146782357194e-05, |
| "loss": 1.8416, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 4.096167751265365e-05, |
| "loss": 1.7529, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 3.9831887201735355e-05, |
| "loss": 1.7618, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 3.870209689081707e-05, |
| "loss": 1.7104, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 3.7572306579898775e-05, |
| "loss": 1.688, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 3.644251626898048e-05, |
| "loss": 1.675, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 3.531272595806219e-05, |
| "loss": 1.6657, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 3.4182935647143895e-05, |
| "loss": 1.6604, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 3.30531453362256e-05, |
| "loss": 1.5929, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 3.192335502530731e-05, |
| "loss": 1.6047, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 3.079356471438901e-05, |
| "loss": 1.5751, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 2.9663774403470718e-05, |
| "loss": 1.5734, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 2.8533984092552425e-05, |
| "loss": 1.5579, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 2.740419378163413e-05, |
| "loss": 1.5285, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 2.6274403470715835e-05, |
| "loss": 1.5379, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 2.514461315979754e-05, |
| "loss": 1.525, |
| "step": 11000 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 2.4014822848879248e-05, |
| "loss": 1.3185, |
| "step": 11500 |
| }, |
| { |
| "epoch": 1.08, |
| "learning_rate": 2.2885032537960955e-05, |
| "loss": 1.2779, |
| "step": 12000 |
| }, |
| { |
| "epoch": 1.13, |
| "learning_rate": 2.1755242227042665e-05, |
| "loss": 1.265, |
| "step": 12500 |
| }, |
| { |
| "epoch": 1.17, |
| "learning_rate": 2.0625451916124368e-05, |
| "loss": 1.2229, |
| "step": 13000 |
| }, |
| { |
| "epoch": 1.22, |
| "learning_rate": 1.9495661605206075e-05, |
| "loss": 1.2639, |
| "step": 13500 |
| }, |
| { |
| "epoch": 1.27, |
| "learning_rate": 1.836587129428778e-05, |
| "loss": 1.2634, |
| "step": 14000 |
| }, |
| { |
| "epoch": 1.31, |
| "learning_rate": 1.7236080983369488e-05, |
| "loss": 1.2153, |
| "step": 14500 |
| }, |
| { |
| "epoch": 1.36, |
| "learning_rate": 1.6106290672451194e-05, |
| "loss": 1.2602, |
| "step": 15000 |
| }, |
| { |
| "epoch": 1.4, |
| "learning_rate": 1.49765003615329e-05, |
| "loss": 1.2276, |
| "step": 15500 |
| }, |
| { |
| "epoch": 1.45, |
| "learning_rate": 1.3846710050614606e-05, |
| "loss": 1.2623, |
| "step": 16000 |
| }, |
| { |
| "epoch": 1.49, |
| "learning_rate": 1.2716919739696313e-05, |
| "loss": 1.2397, |
| "step": 16500 |
| }, |
| { |
| "epoch": 1.54, |
| "learning_rate": 1.158712942877802e-05, |
| "loss": 1.2361, |
| "step": 17000 |
| }, |
| { |
| "epoch": 1.58, |
| "learning_rate": 1.0457339117859726e-05, |
| "loss": 1.2135, |
| "step": 17500 |
| }, |
| { |
| "epoch": 1.63, |
| "learning_rate": 9.32754880694143e-06, |
| "loss": 1.2195, |
| "step": 18000 |
| }, |
| { |
| "epoch": 1.67, |
| "learning_rate": 8.197758496023139e-06, |
| "loss": 1.2124, |
| "step": 18500 |
| }, |
| { |
| "epoch": 1.72, |
| "learning_rate": 7.067968185104845e-06, |
| "loss": 1.1943, |
| "step": 19000 |
| }, |
| { |
| "epoch": 1.76, |
| "learning_rate": 5.9381778741865515e-06, |
| "loss": 1.2251, |
| "step": 19500 |
| }, |
| { |
| "epoch": 1.81, |
| "learning_rate": 4.808387563268257e-06, |
| "loss": 1.2001, |
| "step": 20000 |
| }, |
| { |
| "epoch": 1.85, |
| "learning_rate": 3.678597252349964e-06, |
| "loss": 1.1923, |
| "step": 20500 |
| }, |
| { |
| "epoch": 1.9, |
| "learning_rate": 2.54880694143167e-06, |
| "loss": 1.1982, |
| "step": 21000 |
| }, |
| { |
| "epoch": 1.94, |
| "learning_rate": 1.4190166305133768e-06, |
| "loss": 1.1522, |
| "step": 21500 |
| }, |
| { |
| "epoch": 1.99, |
| "learning_rate": 2.8922631959508315e-07, |
| "loss": 1.1485, |
| "step": 22000 |
| }, |
| { |
| "epoch": 2.0, |
| "step": 22128, |
| "total_flos": 5.203777169748787e+16, |
| "train_loss": 1.513158853383316, |
| "train_runtime": 5655.0967, |
| "train_samples_per_second": 46.955, |
| "train_steps_per_second": 3.913 |
| } |
| ], |
| "max_steps": 22128, |
| "num_train_epochs": 2, |
| "total_flos": 5.203777169748787e+16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|