| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 750.0, |
| "eval_steps": 100, |
| "global_step": 6000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 12.5, |
| "grad_norm": 0.9450863599777222, |
| "learning_rate": 5.94e-05, |
| "loss": 3.582935485839844, |
| "step": 100 |
| }, |
| { |
| "epoch": 25.0, |
| "grad_norm": 0.19262371957302094, |
| "learning_rate": 0.0001194, |
| "loss": 0.49665924072265627, |
| "step": 200 |
| }, |
| { |
| "epoch": 37.5, |
| "grad_norm": 0.15991109609603882, |
| "learning_rate": 0.00017939999999999997, |
| "loss": 0.4021767044067383, |
| "step": 300 |
| }, |
| { |
| "epoch": 50.0, |
| "grad_norm": 0.18609966337680817, |
| "learning_rate": 0.0002394, |
| "loss": 0.37090412139892576, |
| "step": 400 |
| }, |
| { |
| "epoch": 62.5, |
| "grad_norm": 0.24805887043476105, |
| "learning_rate": 0.00029939999999999996, |
| "loss": 0.3474049758911133, |
| "step": 500 |
| }, |
| { |
| "epoch": 75.0, |
| "grad_norm": 0.17464160919189453, |
| "learning_rate": 0.0002980838709677419, |
| "loss": 0.3306478118896484, |
| "step": 600 |
| }, |
| { |
| "epoch": 87.5, |
| "grad_norm": 0.24544841051101685, |
| "learning_rate": 0.00029614838709677416, |
| "loss": 0.3101034927368164, |
| "step": 700 |
| }, |
| { |
| "epoch": 100.0, |
| "grad_norm": 0.2018628716468811, |
| "learning_rate": 0.00029421290322580645, |
| "loss": 0.2937062835693359, |
| "step": 800 |
| }, |
| { |
| "epoch": 112.5, |
| "grad_norm": 0.2136959582567215, |
| "learning_rate": 0.0002922774193548387, |
| "loss": 0.2772307777404785, |
| "step": 900 |
| }, |
| { |
| "epoch": 125.0, |
| "grad_norm": 0.23597952723503113, |
| "learning_rate": 0.0002903419354838709, |
| "loss": 0.258614501953125, |
| "step": 1000 |
| }, |
| { |
| "epoch": 137.5, |
| "grad_norm": 0.30438488721847534, |
| "learning_rate": 0.0002884064516129032, |
| "loss": 0.2398568344116211, |
| "step": 1100 |
| }, |
| { |
| "epoch": 150.0, |
| "grad_norm": 0.27026715874671936, |
| "learning_rate": 0.00028647096774193546, |
| "loss": 0.21622713088989257, |
| "step": 1200 |
| }, |
| { |
| "epoch": 162.5, |
| "grad_norm": 0.2623114287853241, |
| "learning_rate": 0.0002845354838709677, |
| "loss": 0.19229209899902344, |
| "step": 1300 |
| }, |
| { |
| "epoch": 175.0, |
| "grad_norm": 0.34945833683013916, |
| "learning_rate": 0.0002826, |
| "loss": 0.16757678985595703, |
| "step": 1400 |
| }, |
| { |
| "epoch": 187.5, |
| "grad_norm": 0.29883235692977905, |
| "learning_rate": 0.0002806645161290322, |
| "loss": 0.14341635704040528, |
| "step": 1500 |
| }, |
| { |
| "epoch": 200.0, |
| "grad_norm": 0.31376898288726807, |
| "learning_rate": 0.0002787290322580645, |
| "loss": 0.1221920394897461, |
| "step": 1600 |
| }, |
| { |
| "epoch": 212.5, |
| "grad_norm": 0.28367292881011963, |
| "learning_rate": 0.00027679354838709675, |
| "loss": 0.1032716178894043, |
| "step": 1700 |
| }, |
| { |
| "epoch": 225.0, |
| "grad_norm": 0.2790682315826416, |
| "learning_rate": 0.000274858064516129, |
| "loss": 0.08668439865112304, |
| "step": 1800 |
| }, |
| { |
| "epoch": 237.5, |
| "grad_norm": 0.2293432205915451, |
| "learning_rate": 0.0002729225806451613, |
| "loss": 0.0720753002166748, |
| "step": 1900 |
| }, |
| { |
| "epoch": 250.0, |
| "grad_norm": 0.27616050839424133, |
| "learning_rate": 0.0002709870967741935, |
| "loss": 0.062033796310424806, |
| "step": 2000 |
| }, |
| { |
| "epoch": 262.5, |
| "grad_norm": 0.2692248225212097, |
| "learning_rate": 0.0002690516129032258, |
| "loss": 0.05264517307281494, |
| "step": 2100 |
| }, |
| { |
| "epoch": 275.0, |
| "grad_norm": 0.21932683885097504, |
| "learning_rate": 0.00026711612903225805, |
| "loss": 0.045755772590637206, |
| "step": 2200 |
| }, |
| { |
| "epoch": 287.5, |
| "grad_norm": 0.20013022422790527, |
| "learning_rate": 0.0002651806451612903, |
| "loss": 0.04000330924987793, |
| "step": 2300 |
| }, |
| { |
| "epoch": 300.0, |
| "grad_norm": 0.16391867399215698, |
| "learning_rate": 0.0002632451612903226, |
| "loss": 0.03490618944168091, |
| "step": 2400 |
| }, |
| { |
| "epoch": 312.5, |
| "grad_norm": 0.19230681657791138, |
| "learning_rate": 0.0002613096774193548, |
| "loss": 0.031311240196228024, |
| "step": 2500 |
| }, |
| { |
| "epoch": 325.0, |
| "grad_norm": 0.1750553548336029, |
| "learning_rate": 0.00025937419354838705, |
| "loss": 0.02794300317764282, |
| "step": 2600 |
| }, |
| { |
| "epoch": 337.5, |
| "grad_norm": 0.2386818677186966, |
| "learning_rate": 0.00025743870967741934, |
| "loss": 0.02533245801925659, |
| "step": 2700 |
| }, |
| { |
| "epoch": 350.0, |
| "grad_norm": 0.15160086750984192, |
| "learning_rate": 0.00025550322580645163, |
| "loss": 0.023450531959533692, |
| "step": 2800 |
| }, |
| { |
| "epoch": 362.5, |
| "grad_norm": 0.15656723082065582, |
| "learning_rate": 0.00025356774193548387, |
| "loss": 0.02200608015060425, |
| "step": 2900 |
| }, |
| { |
| "epoch": 375.0, |
| "grad_norm": 0.14112432301044464, |
| "learning_rate": 0.0002516322580645161, |
| "loss": 0.020031318664550782, |
| "step": 3000 |
| }, |
| { |
| "epoch": 387.5, |
| "grad_norm": 0.12787914276123047, |
| "learning_rate": 0.00024969677419354834, |
| "loss": 0.01899993300437927, |
| "step": 3100 |
| }, |
| { |
| "epoch": 400.0, |
| "grad_norm": 0.1528688669204712, |
| "learning_rate": 0.00024776129032258063, |
| "loss": 0.01810125231742859, |
| "step": 3200 |
| }, |
| { |
| "epoch": 412.5, |
| "grad_norm": 0.15528737008571625, |
| "learning_rate": 0.00024582580645161287, |
| "loss": 0.016684828996658324, |
| "step": 3300 |
| }, |
| { |
| "epoch": 425.0, |
| "grad_norm": 0.1281791627407074, |
| "learning_rate": 0.00024389032258064514, |
| "loss": 0.015172331333160401, |
| "step": 3400 |
| }, |
| { |
| "epoch": 437.5, |
| "grad_norm": 0.11617272347211838, |
| "learning_rate": 0.0002419548387096774, |
| "loss": 0.01434700846672058, |
| "step": 3500 |
| }, |
| { |
| "epoch": 450.0, |
| "grad_norm": 0.11877749860286713, |
| "learning_rate": 0.00024001935483870966, |
| "loss": 0.01436853289604187, |
| "step": 3600 |
| }, |
| { |
| "epoch": 462.5, |
| "grad_norm": 0.11250139772891998, |
| "learning_rate": 0.00023808387096774193, |
| "loss": 0.013647955656051636, |
| "step": 3700 |
| }, |
| { |
| "epoch": 475.0, |
| "grad_norm": 0.12692750990390778, |
| "learning_rate": 0.00023614838709677417, |
| "loss": 0.012936822175979613, |
| "step": 3800 |
| }, |
| { |
| "epoch": 487.5, |
| "grad_norm": 0.08776593208312988, |
| "learning_rate": 0.00023421290322580643, |
| "loss": 0.01205775499343872, |
| "step": 3900 |
| }, |
| { |
| "epoch": 500.0, |
| "grad_norm": 0.08575516194105148, |
| "learning_rate": 0.00023227741935483867, |
| "loss": 0.012118096351623536, |
| "step": 4000 |
| }, |
| { |
| "epoch": 512.5, |
| "grad_norm": 0.11763694882392883, |
| "learning_rate": 0.00023034193548387093, |
| "loss": 0.010872763395309449, |
| "step": 4100 |
| }, |
| { |
| "epoch": 525.0, |
| "grad_norm": 0.11833110451698303, |
| "learning_rate": 0.00022840645161290322, |
| "loss": 0.010899600982666015, |
| "step": 4200 |
| }, |
| { |
| "epoch": 537.5, |
| "grad_norm": 0.11374954879283905, |
| "learning_rate": 0.00022647096774193546, |
| "loss": 0.010327227115631103, |
| "step": 4300 |
| }, |
| { |
| "epoch": 550.0, |
| "grad_norm": 0.10840512067079544, |
| "learning_rate": 0.00022453548387096773, |
| "loss": 0.010270411968231202, |
| "step": 4400 |
| }, |
| { |
| "epoch": 562.5, |
| "grad_norm": 0.07199712842702866, |
| "learning_rate": 0.0002226, |
| "loss": 0.009772901535034179, |
| "step": 4500 |
| }, |
| { |
| "epoch": 575.0, |
| "grad_norm": 0.15016108751296997, |
| "learning_rate": 0.00022066451612903223, |
| "loss": 0.009401602745056152, |
| "step": 4600 |
| }, |
| { |
| "epoch": 587.5, |
| "grad_norm": 0.08698810636997223, |
| "learning_rate": 0.0002187290322580645, |
| "loss": 0.00952852725982666, |
| "step": 4700 |
| }, |
| { |
| "epoch": 600.0, |
| "grad_norm": 0.11057093739509583, |
| "learning_rate": 0.00021679354838709678, |
| "loss": 0.008922239542007446, |
| "step": 4800 |
| }, |
| { |
| "epoch": 612.5, |
| "grad_norm": 0.11917728185653687, |
| "learning_rate": 0.00021485806451612902, |
| "loss": 0.008766108751296997, |
| "step": 4900 |
| }, |
| { |
| "epoch": 625.0, |
| "grad_norm": 0.07486002892255783, |
| "learning_rate": 0.00021292258064516128, |
| "loss": 0.008633826971054076, |
| "step": 5000 |
| }, |
| { |
| "epoch": 637.5, |
| "grad_norm": 0.11766602843999863, |
| "learning_rate": 0.00021098709677419352, |
| "loss": 0.008536132574081421, |
| "step": 5100 |
| }, |
| { |
| "epoch": 650.0, |
| "grad_norm": 0.0582246296107769, |
| "learning_rate": 0.00020905161290322579, |
| "loss": 0.008086669445037841, |
| "step": 5200 |
| }, |
| { |
| "epoch": 662.5, |
| "grad_norm": 0.0658862367272377, |
| "learning_rate": 0.00020711612903225805, |
| "loss": 0.007744500637054444, |
| "step": 5300 |
| }, |
| { |
| "epoch": 675.0, |
| "grad_norm": 0.10022356361150742, |
| "learning_rate": 0.0002051806451612903, |
| "loss": 0.007699260115623474, |
| "step": 5400 |
| }, |
| { |
| "epoch": 687.5, |
| "grad_norm": 0.12475644052028656, |
| "learning_rate": 0.00020324516129032258, |
| "loss": 0.0076804465055465695, |
| "step": 5500 |
| }, |
| { |
| "epoch": 700.0, |
| "grad_norm": 0.12272350490093231, |
| "learning_rate": 0.00020130967741935484, |
| "loss": 0.007605299353599548, |
| "step": 5600 |
| }, |
| { |
| "epoch": 712.5, |
| "grad_norm": 0.08131624013185501, |
| "learning_rate": 0.00019937419354838708, |
| "loss": 0.0075324904918670655, |
| "step": 5700 |
| }, |
| { |
| "epoch": 725.0, |
| "grad_norm": 0.12169747799634933, |
| "learning_rate": 0.00019743870967741935, |
| "loss": 0.0071774739027023315, |
| "step": 5800 |
| }, |
| { |
| "epoch": 737.5, |
| "grad_norm": 0.05529671907424927, |
| "learning_rate": 0.00019550322580645158, |
| "loss": 0.007025536298751831, |
| "step": 5900 |
| }, |
| { |
| "epoch": 750.0, |
| "grad_norm": 0.07039643824100494, |
| "learning_rate": 0.00019356774193548385, |
| "loss": 0.006921111941337586, |
| "step": 6000 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 16000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2000, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 7074582945792000.0, |
| "train_batch_size": 125, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|