| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 6.0, |
| "eval_steps": 500, |
| "global_step": 1284, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.09345794392523364, |
| "grad_norm": 0.25848760281456373, |
| "learning_rate": 9.743589743589744e-05, |
| "loss": 1.0955, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.18691588785046728, |
| "grad_norm": 0.12152540967981099, |
| "learning_rate": 0.0002, |
| "loss": 0.8826, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.2803738317757009, |
| "grad_norm": 0.08766356744406019, |
| "learning_rate": 0.00019987267934654538, |
| "loss": 0.7805, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.37383177570093457, |
| "grad_norm": 0.07045964353767724, |
| "learning_rate": 0.00019949104159715743, |
| "loss": 0.7223, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.4672897196261682, |
| "grad_norm": 0.09463916110765375, |
| "learning_rate": 0.00019885605855918885, |
| "loss": 0.6933, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.5607476635514018, |
| "grad_norm": 0.09122862084573752, |
| "learning_rate": 0.0001979693471617462, |
| "loss": 0.6758, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.6542056074766355, |
| "grad_norm": 0.10577377685042465, |
| "learning_rate": 0.00019683316533832042, |
| "loss": 0.6605, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.7476635514018691, |
| "grad_norm": 0.11067163440878329, |
| "learning_rate": 0.0001954504062771555, |
| "loss": 0.6538, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.8411214953271028, |
| "grad_norm": 0.1121421761603273, |
| "learning_rate": 0.00019382459105399632, |
| "loss": 0.6534, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.9345794392523364, |
| "grad_norm": 0.10812490276750944, |
| "learning_rate": 0.00019195985966597494, |
| "loss": 0.6428, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.02803738317757, |
| "grad_norm": 0.11935465896028669, |
| "learning_rate": 0.00018986096048946824, |
| "loss": 0.6327, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.1214953271028036, |
| "grad_norm": 0.11781728745931254, |
| "learning_rate": 0.0001875332381887699, |
| "loss": 0.6322, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.2149532710280373, |
| "grad_norm": 0.1316640938577259, |
| "learning_rate": 0.00018498262010636774, |
| "loss": 0.6235, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.308411214953271, |
| "grad_norm": 0.13767423202221835, |
| "learning_rate": 0.00018221560116948103, |
| "loss": 0.6176, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.4018691588785046, |
| "grad_norm": 0.1255781456618538, |
| "learning_rate": 0.00017923922735129302, |
| "loss": 0.6194, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.4953271028037383, |
| "grad_norm": 0.12566754750336923, |
| "learning_rate": 0.00017606107772899287, |
| "loss": 0.6169, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.588785046728972, |
| "grad_norm": 0.1224054760200796, |
| "learning_rate": 0.00017268924518431438, |
| "loss": 0.6126, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.6822429906542056, |
| "grad_norm": 0.12754703201153322, |
| "learning_rate": 0.00016913231579571608, |
| "loss": 0.6127, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.7757009345794392, |
| "grad_norm": 0.13837660664241036, |
| "learning_rate": 0.00016539934697467894, |
| "loss": 0.6097, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.8691588785046729, |
| "grad_norm": 0.11566328949125707, |
| "learning_rate": 0.00016149984440179537, |
| "loss": 0.6039, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.9626168224299065, |
| "grad_norm": 0.13012304346488843, |
| "learning_rate": 0.00015744373782137992, |
| "loss": 0.6028, |
| "step": 420 |
| }, |
| { |
| "epoch": 2.05607476635514, |
| "grad_norm": 0.13699329797526194, |
| "learning_rate": 0.00015324135575623857, |
| "loss": 0.5963, |
| "step": 440 |
| }, |
| { |
| "epoch": 2.149532710280374, |
| "grad_norm": 0.12056530916496998, |
| "learning_rate": 0.00014890339920698334, |
| "loss": 0.5891, |
| "step": 460 |
| }, |
| { |
| "epoch": 2.2429906542056073, |
| "grad_norm": 0.12376168201657378, |
| "learning_rate": 0.0001444409144028644, |
| "loss": 0.5913, |
| "step": 480 |
| }, |
| { |
| "epoch": 2.336448598130841, |
| "grad_norm": 0.1285403052607913, |
| "learning_rate": 0.0001398652646735076, |
| "loss": 0.5893, |
| "step": 500 |
| }, |
| { |
| "epoch": 2.4299065420560746, |
| "grad_norm": 0.12392132477639638, |
| "learning_rate": 0.0001351881015131833, |
| "loss": 0.5883, |
| "step": 520 |
| }, |
| { |
| "epoch": 2.5233644859813085, |
| "grad_norm": 0.1338793765627719, |
| "learning_rate": 0.00013042133491128935, |
| "loss": 0.5859, |
| "step": 540 |
| }, |
| { |
| "epoch": 2.616822429906542, |
| "grad_norm": 0.12415978424557397, |
| "learning_rate": 0.00012557710302459803, |
| "loss": 0.5856, |
| "step": 560 |
| }, |
| { |
| "epoch": 2.710280373831776, |
| "grad_norm": 0.1313564813990853, |
| "learning_rate": 0.00012066774126849529, |
| "loss": 0.5893, |
| "step": 580 |
| }, |
| { |
| "epoch": 2.803738317757009, |
| "grad_norm": 0.11960890307280825, |
| "learning_rate": 0.00011570575090591791, |
| "loss": 0.5849, |
| "step": 600 |
| }, |
| { |
| "epoch": 2.897196261682243, |
| "grad_norm": 0.11986525148452289, |
| "learning_rate": 0.00011070376721397373, |
| "loss": 0.5861, |
| "step": 620 |
| }, |
| { |
| "epoch": 2.9906542056074765, |
| "grad_norm": 0.12094208349063497, |
| "learning_rate": 0.00010567452730930743, |
| "loss": 0.5843, |
| "step": 640 |
| }, |
| { |
| "epoch": 3.0841121495327104, |
| "grad_norm": 0.12592138466913735, |
| "learning_rate": 0.00010063083771413975, |
| "loss": 0.5776, |
| "step": 660 |
| }, |
| { |
| "epoch": 3.177570093457944, |
| "grad_norm": 0.13879295451237433, |
| "learning_rate": 9.55855417455723e-05, |
| "loss": 0.5735, |
| "step": 680 |
| }, |
| { |
| "epoch": 3.2710280373831777, |
| "grad_norm": 0.132827192019763, |
| "learning_rate": 9.055148681119688e-05, |
| "loss": 0.5762, |
| "step": 700 |
| }, |
| { |
| "epoch": 3.364485981308411, |
| "grad_norm": 0.135202644806124, |
| "learning_rate": 8.554149169428894e-05, |
| "loss": 0.5738, |
| "step": 720 |
| }, |
| { |
| "epoch": 3.457943925233645, |
| "grad_norm": 0.12535841207061021, |
| "learning_rate": 8.056831391189023e-05, |
| "loss": 0.5703, |
| "step": 740 |
| }, |
| { |
| "epoch": 3.5514018691588785, |
| "grad_norm": 0.13117365043331122, |
| "learning_rate": 7.564461722890081e-05, |
| "loss": 0.5707, |
| "step": 760 |
| }, |
| { |
| "epoch": 3.6448598130841123, |
| "grad_norm": 0.12563061031824993, |
| "learning_rate": 7.078293941090249e-05, |
| "loss": 0.5687, |
| "step": 780 |
| }, |
| { |
| "epoch": 3.7383177570093458, |
| "grad_norm": 0.13424430991092495, |
| "learning_rate": 6.599566029782863e-05, |
| "loss": 0.5717, |
| "step": 800 |
| }, |
| { |
| "epoch": 3.831775700934579, |
| "grad_norm": 0.12859665304631457, |
| "learning_rate": 6.129497027977829e-05, |
| "loss": 0.5694, |
| "step": 820 |
| }, |
| { |
| "epoch": 3.925233644859813, |
| "grad_norm": 0.12403658321797129, |
| "learning_rate": 5.669283925524715e-05, |
| "loss": 0.5696, |
| "step": 840 |
| }, |
| { |
| "epoch": 4.018691588785047, |
| "grad_norm": 0.1241236470560002, |
| "learning_rate": 5.2200986150821696e-05, |
| "loss": 0.5678, |
| "step": 860 |
| }, |
| { |
| "epoch": 4.11214953271028, |
| "grad_norm": 0.13373450432597092, |
| "learning_rate": 4.783084907995156e-05, |
| "loss": 0.559, |
| "step": 880 |
| }, |
| { |
| "epoch": 4.205607476635514, |
| "grad_norm": 0.12955521613965384, |
| "learning_rate": 4.359355621678764e-05, |
| "loss": 0.5626, |
| "step": 900 |
| }, |
| { |
| "epoch": 4.299065420560748, |
| "grad_norm": 0.1201829111448084, |
| "learning_rate": 3.9499897459254375e-05, |
| "loss": 0.5655, |
| "step": 920 |
| }, |
| { |
| "epoch": 4.392523364485982, |
| "grad_norm": 0.1293018490823381, |
| "learning_rate": 3.5560296953512295e-05, |
| "loss": 0.5638, |
| "step": 940 |
| }, |
| { |
| "epoch": 4.485981308411215, |
| "grad_norm": 0.11920916186970969, |
| "learning_rate": 3.178478654977624e-05, |
| "loss": 0.5608, |
| "step": 960 |
| }, |
| { |
| "epoch": 4.579439252336448, |
| "grad_norm": 0.11405851380231317, |
| "learning_rate": 2.818298025708075e-05, |
| "loss": 0.562, |
| "step": 980 |
| }, |
| { |
| "epoch": 4.672897196261682, |
| "grad_norm": 0.12170514209424545, |
| "learning_rate": 2.4764049762041874e-05, |
| "loss": 0.5686, |
| "step": 1000 |
| }, |
| { |
| "epoch": 4.766355140186916, |
| "grad_norm": 0.14556896397750538, |
| "learning_rate": 2.1536701073954558e-05, |
| "loss": 0.5635, |
| "step": 1020 |
| }, |
| { |
| "epoch": 4.859813084112149, |
| "grad_norm": 0.12172194075595427, |
| "learning_rate": 1.8509152355696623e-05, |
| "loss": 0.559, |
| "step": 1040 |
| }, |
| { |
| "epoch": 4.953271028037383, |
| "grad_norm": 0.1318350224140456, |
| "learning_rate": 1.5689112996891576e-05, |
| "loss": 0.5616, |
| "step": 1060 |
| }, |
| { |
| "epoch": 5.046728971962617, |
| "grad_norm": 0.12691480311352626, |
| "learning_rate": 1.3083763982618025e-05, |
| "loss": 0.56, |
| "step": 1080 |
| }, |
| { |
| "epoch": 5.140186915887851, |
| "grad_norm": 0.11296084200322255, |
| "learning_rate": 1.0699739607655435e-05, |
| "loss": 0.5558, |
| "step": 1100 |
| }, |
| { |
| "epoch": 5.233644859813084, |
| "grad_norm": 0.1309044210983197, |
| "learning_rate": 8.543110582829272e-06, |
| "loss": 0.5588, |
| "step": 1120 |
| }, |
| { |
| "epoch": 5.327102803738318, |
| "grad_norm": 0.13260357681059787, |
| "learning_rate": 6.61936857647355e-06, |
| "loss": 0.5605, |
| "step": 1140 |
| }, |
| { |
| "epoch": 5.420560747663552, |
| "grad_norm": 0.12322372236537348, |
| "learning_rate": 4.933412230374812e-06, |
| "loss": 0.5554, |
| "step": 1160 |
| }, |
| { |
| "epoch": 5.5140186915887845, |
| "grad_norm": 0.11617175237743727, |
| "learning_rate": 3.4895346858066724e-06, |
| "loss": 0.5605, |
| "step": 1180 |
| }, |
| { |
| "epoch": 5.607476635514018, |
| "grad_norm": 0.12278895489019458, |
| "learning_rate": 2.291412651418778e-06, |
| "loss": 0.5642, |
| "step": 1200 |
| }, |
| { |
| "epoch": 5.700934579439252, |
| "grad_norm": 0.12252134829919185, |
| "learning_rate": 1.3420970408178913e-06, |
| "loss": 0.5619, |
| "step": 1220 |
| }, |
| { |
| "epoch": 5.794392523364486, |
| "grad_norm": 0.21509033654221743, |
| "learning_rate": 6.440052036815081e-07, |
| "loss": 0.5539, |
| "step": 1240 |
| }, |
| { |
| "epoch": 5.88785046728972, |
| "grad_norm": 0.12357352260583994, |
| "learning_rate": 1.989147701871641e-07, |
| "loss": 0.5559, |
| "step": 1260 |
| }, |
| { |
| "epoch": 5.981308411214953, |
| "grad_norm": 0.12149087465170151, |
| "learning_rate": 7.959124431622389e-09, |
| "loss": 0.5592, |
| "step": 1280 |
| }, |
| { |
| "epoch": 6.0, |
| "step": 1284, |
| "total_flos": 8949072262070272.0, |
| "train_loss": 0.6056221575009118, |
| "train_runtime": 16251.3363, |
| "train_samples_per_second": 5.056, |
| "train_steps_per_second": 0.079 |
| } |
| ], |
| "logging_steps": 20, |
| "max_steps": 1284, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 6, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 8949072262070272.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|