| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 5962, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.01677289500167729, |
| "grad_norm": 0.048490237444639206, |
| "learning_rate": 4.193751310547285e-07, |
| "loss": 2.7454, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.03354579000335458, |
| "grad_norm": 0.05207354202866554, |
| "learning_rate": 8.38750262109457e-07, |
| "loss": 2.7456, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.050318685005031866, |
| "grad_norm": 0.06083838641643524, |
| "learning_rate": 1.2581253931641854e-06, |
| "loss": 2.7341, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.06709158000670916, |
| "grad_norm": 0.07314612716436386, |
| "learning_rate": 1.677500524218914e-06, |
| "loss": 2.7514, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.08386447500838645, |
| "grad_norm": 0.07996375858783722, |
| "learning_rate": 2.0968756552736426e-06, |
| "loss": 2.7325, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.10063737001006373, |
| "grad_norm": 0.09303133934736252, |
| "learning_rate": 2.516250786328371e-06, |
| "loss": 2.7265, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.11741026501174102, |
| "grad_norm": 0.11050993204116821, |
| "learning_rate": 2.9356259173830992e-06, |
| "loss": 2.7105, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.13418316001341832, |
| "grad_norm": 0.11374402791261673, |
| "learning_rate": 3.355001048437828e-06, |
| "loss": 2.6912, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.1509560550150956, |
| "grad_norm": 0.12549300491809845, |
| "learning_rate": 3.7743761794925563e-06, |
| "loss": 2.6951, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.1677289500167729, |
| "grad_norm": 0.1376326084136963, |
| "learning_rate": 4.193751310547285e-06, |
| "loss": 2.6876, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.18450184501845018, |
| "grad_norm": 0.14521992206573486, |
| "learning_rate": 4.613126441602013e-06, |
| "loss": 2.6837, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.20127474002012746, |
| "grad_norm": 0.1486833244562149, |
| "learning_rate": 5.032501572656742e-06, |
| "loss": 2.6621, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.21804763502180477, |
| "grad_norm": 0.15669290721416473, |
| "learning_rate": 5.4518767037114705e-06, |
| "loss": 2.6594, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.23482053002348205, |
| "grad_norm": 0.16500656306743622, |
| "learning_rate": 5.8712518347661985e-06, |
| "loss": 2.6575, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.25159342502515936, |
| "grad_norm": 0.17107869684696198, |
| "learning_rate": 6.290626965820927e-06, |
| "loss": 2.6549, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.26836632002683664, |
| "grad_norm": 0.19115546345710754, |
| "learning_rate": 6.710002096875656e-06, |
| "loss": 2.646, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.2851392150285139, |
| "grad_norm": 0.20467370748519897, |
| "learning_rate": 7.129377227930384e-06, |
| "loss": 2.6481, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.3019121100301912, |
| "grad_norm": 0.19398543238639832, |
| "learning_rate": 7.548752358985113e-06, |
| "loss": 2.658, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.3186850050318685, |
| "grad_norm": 0.20218446850776672, |
| "learning_rate": 7.96812749003984e-06, |
| "loss": 2.6312, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.3354579000335458, |
| "grad_norm": 0.21208696067333221, |
| "learning_rate": 8.38750262109457e-06, |
| "loss": 2.6279, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.3522307950352231, |
| "grad_norm": 0.25312340259552, |
| "learning_rate": 8.806877752149298e-06, |
| "loss": 2.6271, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.36900369003690037, |
| "grad_norm": 0.21412670612335205, |
| "learning_rate": 9.226252883204026e-06, |
| "loss": 2.618, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.38577658503857765, |
| "grad_norm": 0.2197524607181549, |
| "learning_rate": 9.645628014258756e-06, |
| "loss": 2.6211, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.4025494800402549, |
| "grad_norm": 0.23409396409988403, |
| "learning_rate": 1.0065003145313484e-05, |
| "loss": 2.6159, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.41932237504193226, |
| "grad_norm": 0.24235695600509644, |
| "learning_rate": 1.0484378276368211e-05, |
| "loss": 2.6063, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.43609527004360954, |
| "grad_norm": 0.23461420834064484, |
| "learning_rate": 1.0903753407422941e-05, |
| "loss": 2.61, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.4528681650452868, |
| "grad_norm": 0.24338868260383606, |
| "learning_rate": 1.132312853847767e-05, |
| "loss": 2.577, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.4696410600469641, |
| "grad_norm": 0.26369062066078186, |
| "learning_rate": 1.1742503669532397e-05, |
| "loss": 2.5954, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.4864139550486414, |
| "grad_norm": 0.26913905143737793, |
| "learning_rate": 1.2161878800587127e-05, |
| "loss": 2.5754, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.5031868500503187, |
| "grad_norm": 0.2918139398097992, |
| "learning_rate": 1.2581253931641854e-05, |
| "loss": 2.577, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.5199597450519959, |
| "grad_norm": 0.2750920057296753, |
| "learning_rate": 1.3000629062696584e-05, |
| "loss": 2.578, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.5367326400536733, |
| "grad_norm": 0.2634315490722656, |
| "learning_rate": 1.3420004193751312e-05, |
| "loss": 2.5653, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.5535055350553506, |
| "grad_norm": 0.27983254194259644, |
| "learning_rate": 1.383937932480604e-05, |
| "loss": 2.5693, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.5702784300570278, |
| "grad_norm": 0.29475557804107666, |
| "learning_rate": 1.4258754455860768e-05, |
| "loss": 2.5698, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.5870513250587052, |
| "grad_norm": 0.29615071415901184, |
| "learning_rate": 1.4678129586915497e-05, |
| "loss": 2.5605, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.6038242200603824, |
| "grad_norm": 0.2656971216201782, |
| "learning_rate": 1.5097504717970225e-05, |
| "loss": 2.5599, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.6205971150620597, |
| "grad_norm": 0.2948819696903229, |
| "learning_rate": 1.5516879849024955e-05, |
| "loss": 2.5535, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.637370010063737, |
| "grad_norm": 0.3076747953891754, |
| "learning_rate": 1.593625498007968e-05, |
| "loss": 2.5525, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.6541429050654143, |
| "grad_norm": 0.31171539425849915, |
| "learning_rate": 1.635563011113441e-05, |
| "loss": 2.5563, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.6709158000670916, |
| "grad_norm": 0.3256972134113312, |
| "learning_rate": 1.677500524218914e-05, |
| "loss": 2.5491, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.6876886950687688, |
| "grad_norm": 0.2948451340198517, |
| "learning_rate": 1.719438037324387e-05, |
| "loss": 2.5493, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.7044615900704462, |
| "grad_norm": 0.29186537861824036, |
| "learning_rate": 1.7613755504298596e-05, |
| "loss": 2.5316, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.7212344850721234, |
| "grad_norm": 0.3038988411426544, |
| "learning_rate": 1.8033130635353326e-05, |
| "loss": 2.5322, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.7380073800738007, |
| "grad_norm": 0.32824885845184326, |
| "learning_rate": 1.8452505766408052e-05, |
| "loss": 2.5196, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.7547802750754781, |
| "grad_norm": 0.38570263981819153, |
| "learning_rate": 1.887188089746278e-05, |
| "loss": 2.5375, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.7715531700771553, |
| "grad_norm": 0.32352355122566223, |
| "learning_rate": 1.929125602851751e-05, |
| "loss": 2.5231, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.7883260650788326, |
| "grad_norm": 0.32961755990982056, |
| "learning_rate": 1.971063115957224e-05, |
| "loss": 2.5256, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.8050989600805099, |
| "grad_norm": 0.3353755474090576, |
| "learning_rate": 1.9966697943413548e-05, |
| "loss": 2.5265, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.8218718550821872, |
| "grad_norm": 0.3189639747142792, |
| "learning_rate": 1.941085760330316e-05, |
| "loss": 2.5308, |
| "step": 4900 |
| }, |
| { |
| "epoch": 0.8386447500838645, |
| "grad_norm": 0.3028644323348999, |
| "learning_rate": 1.8206178320980295e-05, |
| "loss": 2.5113, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.8554176450855417, |
| "grad_norm": 0.3632523715496063, |
| "learning_rate": 1.6435717652323097e-05, |
| "loss": 2.5153, |
| "step": 5100 |
| }, |
| { |
| "epoch": 0.8721905400872191, |
| "grad_norm": 0.3340721130371094, |
| "learning_rate": 1.4221541393659966e-05, |
| "loss": 2.5175, |
| "step": 5200 |
| }, |
| { |
| "epoch": 0.8889634350888963, |
| "grad_norm": 0.3549550175666809, |
| "learning_rate": 1.171630765991538e-05, |
| "loss": 2.5159, |
| "step": 5300 |
| }, |
| { |
| "epoch": 0.9057363300905736, |
| "grad_norm": 0.3556847870349884, |
| "learning_rate": 9.092741751409186e-06, |
| "loss": 2.5052, |
| "step": 5400 |
| }, |
| { |
| "epoch": 0.922509225092251, |
| "grad_norm": 0.33662235736846924, |
| "learning_rate": 6.531727472854617e-06, |
| "loss": 2.4994, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.9392821200939282, |
| "grad_norm": 0.3705320954322815, |
| "learning_rate": 4.209835957886196e-06, |
| "loss": 2.5104, |
| "step": 5600 |
| }, |
| { |
| "epoch": 0.9560550150956055, |
| "grad_norm": 0.3348437249660492, |
| "learning_rate": 2.287151834070226e-06, |
| "loss": 2.4979, |
| "step": 5700 |
| }, |
| { |
| "epoch": 0.9728279100972828, |
| "grad_norm": 0.3829787075519562, |
| "learning_rate": 8.962360629781164e-07, |
| "loss": 2.519, |
| "step": 5800 |
| }, |
| { |
| "epoch": 0.9896008050989601, |
| "grad_norm": 0.35299184918403625, |
| "learning_rate": 1.329864209512377e-07, |
| "loss": 2.5051, |
| "step": 5900 |
| }, |
| { |
| "epoch": 1.0, |
| "step": 5962, |
| "total_flos": 8.687706333480223e+17, |
| "train_loss": 2.596681281969236, |
| "train_runtime": 1897.3372, |
| "train_samples_per_second": 50.272, |
| "train_steps_per_second": 3.142 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 5962, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 8.687706333480223e+17, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|