| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9978118161925602, |
| "eval_steps": 500, |
| "global_step": 171, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.005835156819839533, |
| "grad_norm": 9.299546505823763, |
| "learning_rate": 0.0, |
| "loss": 1.0902, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.011670313639679067, |
| "grad_norm": 10.01587881780193, |
| "learning_rate": 5.555555555555555e-07, |
| "loss": 1.0916, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0175054704595186, |
| "grad_norm": 10.518050626465923, |
| "learning_rate": 1.111111111111111e-06, |
| "loss": 1.185, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.023340627279358133, |
| "grad_norm": 12.737913938008038, |
| "learning_rate": 1.6666666666666667e-06, |
| "loss": 1.3884, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.029175784099197667, |
| "grad_norm": 9.014224223654711, |
| "learning_rate": 2.222222222222222e-06, |
| "loss": 0.9737, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.0350109409190372, |
| "grad_norm": 6.486863142815939, |
| "learning_rate": 2.7777777777777783e-06, |
| "loss": 1.103, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.040846097738876735, |
| "grad_norm": 3.090673000091082, |
| "learning_rate": 3.3333333333333333e-06, |
| "loss": 0.9362, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.046681254558716266, |
| "grad_norm": 3.623705568051016, |
| "learning_rate": 3.88888888888889e-06, |
| "loss": 1.1013, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.0525164113785558, |
| "grad_norm": 2.591117338468344, |
| "learning_rate": 4.444444444444444e-06, |
| "loss": 1.0012, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.058351568198395334, |
| "grad_norm": 2.9279721323005337, |
| "learning_rate": 5e-06, |
| "loss": 1.2603, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.06418672501823487, |
| "grad_norm": 2.37939201882965, |
| "learning_rate": 5.555555555555557e-06, |
| "loss": 1.1851, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.0700218818380744, |
| "grad_norm": 1.8983860469518798, |
| "learning_rate": 6.111111111111112e-06, |
| "loss": 0.9673, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.07585703865791393, |
| "grad_norm": 2.2504673657172405, |
| "learning_rate": 6.666666666666667e-06, |
| "loss": 1.0382, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.08169219547775347, |
| "grad_norm": 2.0400300434462006, |
| "learning_rate": 7.222222222222223e-06, |
| "loss": 0.9073, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.087527352297593, |
| "grad_norm": 1.591499726407413, |
| "learning_rate": 7.77777777777778e-06, |
| "loss": 0.7972, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.09336250911743253, |
| "grad_norm": 1.9431852427499376, |
| "learning_rate": 8.333333333333334e-06, |
| "loss": 1.0072, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.09919766593727207, |
| "grad_norm": 1.6444838781555973, |
| "learning_rate": 8.888888888888888e-06, |
| "loss": 0.9623, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.1050328227571116, |
| "grad_norm": 1.7236424484988195, |
| "learning_rate": 9.444444444444445e-06, |
| "loss": 0.9221, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.11086797957695113, |
| "grad_norm": 1.5941224642401501, |
| "learning_rate": 1e-05, |
| "loss": 0.9128, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.11670313639679067, |
| "grad_norm": 1.757727168003814, |
| "learning_rate": 9.998945997517957e-06, |
| "loss": 1.1367, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.12253829321663019, |
| "grad_norm": 1.386463357150709, |
| "learning_rate": 9.99578443444032e-06, |
| "loss": 0.8651, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.12837345003646974, |
| "grad_norm": 1.1831324111413184, |
| "learning_rate": 9.990516643685222e-06, |
| "loss": 0.7457, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.13420860685630925, |
| "grad_norm": 1.8748983623503905, |
| "learning_rate": 9.983144846158472e-06, |
| "loss": 1.2395, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.1400437636761488, |
| "grad_norm": 1.1016292798385203, |
| "learning_rate": 9.973672149817232e-06, |
| "loss": 0.7565, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.14587892049598833, |
| "grad_norm": 1.1697557850634346, |
| "learning_rate": 9.96210254835968e-06, |
| "loss": 0.7653, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.15171407731582787, |
| "grad_norm": 1.4384496697664464, |
| "learning_rate": 9.948440919541277e-06, |
| "loss": 0.9953, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.1575492341356674, |
| "grad_norm": 1.3351028893218353, |
| "learning_rate": 9.932693023118299e-06, |
| "loss": 0.9787, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.16338439095550694, |
| "grad_norm": 1.378322829821297, |
| "learning_rate": 9.91486549841951e-06, |
| "loss": 0.8526, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.16921954777534645, |
| "grad_norm": 1.26787992067087, |
| "learning_rate": 9.894965861547023e-06, |
| "loss": 0.8895, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.175054704595186, |
| "grad_norm": 1.3552848829003714, |
| "learning_rate": 9.873002502207502e-06, |
| "loss": 0.7729, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.18088986141502553, |
| "grad_norm": 1.3950152737046397, |
| "learning_rate": 9.848984680175049e-06, |
| "loss": 0.9177, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.18672501823486506, |
| "grad_norm": 1.3520794560278928, |
| "learning_rate": 9.822922521387277e-06, |
| "loss": 0.7891, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.1925601750547046, |
| "grad_norm": 1.0535307158758438, |
| "learning_rate": 9.794827013676206e-06, |
| "loss": 0.7412, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.19839533187454414, |
| "grad_norm": 1.0016811092876226, |
| "learning_rate": 9.764710002135784e-06, |
| "loss": 0.6851, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.20423048869438365, |
| "grad_norm": 1.5626250486041073, |
| "learning_rate": 9.732584184127973e-06, |
| "loss": 1.1109, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.2100656455142232, |
| "grad_norm": 1.039951299379022, |
| "learning_rate": 9.698463103929542e-06, |
| "loss": 0.7201, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.21590080233406272, |
| "grad_norm": 1.3146828860185786, |
| "learning_rate": 9.66236114702178e-06, |
| "loss": 0.898, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.22173595915390226, |
| "grad_norm": 1.1959284262689285, |
| "learning_rate": 9.62429353402556e-06, |
| "loss": 0.8495, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.2275711159737418, |
| "grad_norm": 1.3300182598109136, |
| "learning_rate": 9.584276314284316e-06, |
| "loss": 0.8887, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.23340627279358134, |
| "grad_norm": 1.423333725022639, |
| "learning_rate": 9.542326359097619e-06, |
| "loss": 0.9251, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.23924142961342085, |
| "grad_norm": 1.0188766020111937, |
| "learning_rate": 9.498461354608228e-06, |
| "loss": 0.6946, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.24507658643326038, |
| "grad_norm": 1.1546926940871387, |
| "learning_rate": 9.452699794345583e-06, |
| "loss": 0.8254, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.25091174325309995, |
| "grad_norm": 0.9851256354782937, |
| "learning_rate": 9.405060971428924e-06, |
| "loss": 0.6776, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.2567469000729395, |
| "grad_norm": 1.0157420845089018, |
| "learning_rate": 9.355564970433288e-06, |
| "loss": 0.7233, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.26258205689277897, |
| "grad_norm": 1.0619142701167041, |
| "learning_rate": 9.30423265892184e-06, |
| "loss": 0.8033, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.2684172137126185, |
| "grad_norm": 1.3551421509960442, |
| "learning_rate": 9.251085678648072e-06, |
| "loss": 1.0114, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.27425237053245805, |
| "grad_norm": 1.5329523702009467, |
| "learning_rate": 9.196146436431635e-06, |
| "loss": 1.0851, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.2800875273522976, |
| "grad_norm": 1.3149537104367242, |
| "learning_rate": 9.13943809471159e-06, |
| "loss": 0.8997, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.2859226841721371, |
| "grad_norm": 0.9560157606341646, |
| "learning_rate": 9.08098456178111e-06, |
| "loss": 0.6784, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.29175784099197666, |
| "grad_norm": 1.3327614439098379, |
| "learning_rate": 9.020810481707709e-06, |
| "loss": 0.9211, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.2975929978118162, |
| "grad_norm": 0.8643544120556634, |
| "learning_rate": 8.958941223943292e-06, |
| "loss": 0.6223, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.30342815463165573, |
| "grad_norm": 1.2957025562470537, |
| "learning_rate": 8.895402872628352e-06, |
| "loss": 0.9063, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.30926331145149527, |
| "grad_norm": 1.2036899073181069, |
| "learning_rate": 8.83022221559489e-06, |
| "loss": 0.806, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.3150984682713348, |
| "grad_norm": 1.1088264111919222, |
| "learning_rate": 8.763426733072624e-06, |
| "loss": 0.7629, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.32093362509117435, |
| "grad_norm": 1.0592294445638804, |
| "learning_rate": 8.695044586103297e-06, |
| "loss": 0.7936, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.3267687819110139, |
| "grad_norm": 1.0608638933809196, |
| "learning_rate": 8.625104604667965e-06, |
| "loss": 0.8037, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.33260393873085337, |
| "grad_norm": 0.8845990311674103, |
| "learning_rate": 8.553636275532236e-06, |
| "loss": 0.6343, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.3384390955506929, |
| "grad_norm": 1.11966109375501, |
| "learning_rate": 8.480669729814635e-06, |
| "loss": 0.7116, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.34427425237053244, |
| "grad_norm": 1.058595745732236, |
| "learning_rate": 8.40623573028327e-06, |
| "loss": 0.7479, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.350109409190372, |
| "grad_norm": 1.1755313313178577, |
| "learning_rate": 8.330365658386252e-06, |
| "loss": 0.8864, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.3559445660102115, |
| "grad_norm": 1.0660158498908405, |
| "learning_rate": 8.25309150102121e-06, |
| "loss": 0.7619, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.36177972283005105, |
| "grad_norm": 0.9470744104347243, |
| "learning_rate": 8.174445837049614e-06, |
| "loss": 0.7459, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.3676148796498906, |
| "grad_norm": 1.1792022589463713, |
| "learning_rate": 8.094461823561473e-06, |
| "loss": 0.8396, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.37345003646973013, |
| "grad_norm": 1.1803977687770104, |
| "learning_rate": 8.013173181896283e-06, |
| "loss": 0.8477, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.37928519328956967, |
| "grad_norm": 1.2544495921132872, |
| "learning_rate": 7.930614183426074e-06, |
| "loss": 0.8091, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.3851203501094092, |
| "grad_norm": 0.9504234536879141, |
| "learning_rate": 7.846819635106569e-06, |
| "loss": 0.7368, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.39095550692924874, |
| "grad_norm": 0.8943883537307726, |
| "learning_rate": 7.76182486480253e-06, |
| "loss": 0.6219, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.3967906637490883, |
| "grad_norm": 1.146327931819408, |
| "learning_rate": 7.675665706393502e-06, |
| "loss": 0.8638, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.4026258205689278, |
| "grad_norm": 1.5236107946014117, |
| "learning_rate": 7.588378484666214e-06, |
| "loss": 1.014, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.4084609773887673, |
| "grad_norm": 1.416864267894214, |
| "learning_rate": 7.500000000000001e-06, |
| "loss": 0.916, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.41429613420860684, |
| "grad_norm": 0.9280527813604279, |
| "learning_rate": 7.4105675128517456e-06, |
| "loss": 0.6819, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.4201312910284464, |
| "grad_norm": 1.2463650332593321, |
| "learning_rate": 7.320118728046818e-06, |
| "loss": 0.9046, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.4259664478482859, |
| "grad_norm": 0.9912971345348434, |
| "learning_rate": 7.2286917788826926e-06, |
| "loss": 0.665, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.43180160466812545, |
| "grad_norm": 1.425510459464162, |
| "learning_rate": 7.136325211051905e-06, |
| "loss": 0.9981, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.437636761487965, |
| "grad_norm": 1.1459307932052534, |
| "learning_rate": 7.043057966391158e-06, |
| "loss": 0.8068, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.4434719183078045, |
| "grad_norm": 0.8776118756697501, |
| "learning_rate": 6.948929366463397e-06, |
| "loss": 0.6275, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.44930707512764406, |
| "grad_norm": 0.9979489407999791, |
| "learning_rate": 6.8539790959798045e-06, |
| "loss": 0.7076, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.4551422319474836, |
| "grad_norm": 1.3942127913832905, |
| "learning_rate": 6.758247186068684e-06, |
| "loss": 0.9126, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.46097738876732314, |
| "grad_norm": 1.0524270195584062, |
| "learning_rate": 6.6617739973982985e-06, |
| "loss": 0.7423, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.4668125455871627, |
| "grad_norm": 1.0306469704543544, |
| "learning_rate": 6.5646002031607726e-06, |
| "loss": 0.7423, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.4726477024070022, |
| "grad_norm": 0.9890091862718351, |
| "learning_rate": 6.466766771924231e-06, |
| "loss": 0.8287, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.4784828592268417, |
| "grad_norm": 1.0066281884974029, |
| "learning_rate": 6.368314950360416e-06, |
| "loss": 0.6768, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.48431801604668123, |
| "grad_norm": 1.2355302365451943, |
| "learning_rate": 6.269286245855039e-06, |
| "loss": 0.9904, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.49015317286652077, |
| "grad_norm": 1.0779965970308134, |
| "learning_rate": 6.169722409008244e-06, |
| "loss": 0.7825, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.4959883296863603, |
| "grad_norm": 0.9709689623157064, |
| "learning_rate": 6.0696654160324875e-06, |
| "loss": 0.7533, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.5018234865061999, |
| "grad_norm": 1.442825562282201, |
| "learning_rate": 5.9691574510553505e-06, |
| "loss": 0.9101, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.5076586433260394, |
| "grad_norm": 1.0021911338557055, |
| "learning_rate": 5.8682408883346535e-06, |
| "loss": 0.7235, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.513493800145879, |
| "grad_norm": 1.4693273134948137, |
| "learning_rate": 5.766958274393428e-06, |
| "loss": 0.9827, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.5193289569657185, |
| "grad_norm": 1.410987671617291, |
| "learning_rate": 5.66535231008227e-06, |
| "loss": 0.993, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.5251641137855579, |
| "grad_norm": 1.038363675581389, |
| "learning_rate": 5.5634658325766066e-06, |
| "loss": 0.6742, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.5309992706053975, |
| "grad_norm": 0.9898071815874125, |
| "learning_rate": 5.46134179731651e-06, |
| "loss": 0.7614, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.536834427425237, |
| "grad_norm": 1.4985338163137971, |
| "learning_rate": 5.359023259896638e-06, |
| "loss": 1.1281, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.5426695842450766, |
| "grad_norm": 0.8622120201400366, |
| "learning_rate": 5.2565533579139484e-06, |
| "loss": 0.6284, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.5485047410649161, |
| "grad_norm": 0.9587497658247528, |
| "learning_rate": 5.153975292780852e-06, |
| "loss": 0.7377, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.5543398978847557, |
| "grad_norm": 1.2901783290377642, |
| "learning_rate": 5.05133231151145e-06, |
| "loss": 0.9462, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.5601750547045952, |
| "grad_norm": 1.138527112754194, |
| "learning_rate": 4.948667688488552e-06, |
| "loss": 0.8416, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.5660102115244348, |
| "grad_norm": 0.9438989378506446, |
| "learning_rate": 4.846024707219149e-06, |
| "loss": 0.6938, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.5718453683442742, |
| "grad_norm": 1.144451449361183, |
| "learning_rate": 4.7434466420860515e-06, |
| "loss": 0.8747, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.5776805251641138, |
| "grad_norm": 1.3300629267634485, |
| "learning_rate": 4.640976740103363e-06, |
| "loss": 0.9545, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.5835156819839533, |
| "grad_norm": 1.1041466595046083, |
| "learning_rate": 4.53865820268349e-06, |
| "loss": 0.8229, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.5893508388037928, |
| "grad_norm": 1.1884660637688764, |
| "learning_rate": 4.436534167423395e-06, |
| "loss": 0.8462, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.5951859956236324, |
| "grad_norm": 1.0856389865828382, |
| "learning_rate": 4.334647689917734e-06, |
| "loss": 0.7946, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.6010211524434719, |
| "grad_norm": 1.1278394620019967, |
| "learning_rate": 4.233041725606573e-06, |
| "loss": 0.7504, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.6068563092633115, |
| "grad_norm": 0.8712693300444356, |
| "learning_rate": 4.131759111665349e-06, |
| "loss": 0.6347, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.612691466083151, |
| "grad_norm": 1.3330858764052926, |
| "learning_rate": 4.03084254894465e-06, |
| "loss": 0.881, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.6185266229029905, |
| "grad_norm": 1.4768502249681943, |
| "learning_rate": 3.930334583967514e-06, |
| "loss": 1.1756, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.62436177972283, |
| "grad_norm": 0.9342610163210304, |
| "learning_rate": 3.8302775909917585e-06, |
| "loss": 0.7378, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.6301969365426696, |
| "grad_norm": 1.21916808547768, |
| "learning_rate": 3.730713754144961e-06, |
| "loss": 0.8151, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.6360320933625091, |
| "grad_norm": 0.9354287732139978, |
| "learning_rate": 3.6316850496395863e-06, |
| "loss": 0.7366, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.6418672501823487, |
| "grad_norm": 1.3997263344251962, |
| "learning_rate": 3.5332332280757706e-06, |
| "loss": 0.867, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.6477024070021882, |
| "grad_norm": 1.1166981780092131, |
| "learning_rate": 3.4353997968392295e-06, |
| "loss": 0.784, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.6535375638220278, |
| "grad_norm": 0.9396355396473667, |
| "learning_rate": 3.3382260026017027e-06, |
| "loss": 0.6894, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.6593727206418672, |
| "grad_norm": 1.1116911967258196, |
| "learning_rate": 3.241752813931316e-06, |
| "loss": 0.8626, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.6652078774617067, |
| "grad_norm": 1.0393838367512098, |
| "learning_rate": 3.1460209040201967e-06, |
| "loss": 0.7411, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.6710430342815463, |
| "grad_norm": 0.8438881622188874, |
| "learning_rate": 3.0510706335366034e-06, |
| "loss": 0.617, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.6768781911013858, |
| "grad_norm": 1.1233691212133115, |
| "learning_rate": 2.956942033608843e-06, |
| "loss": 0.7837, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.6827133479212254, |
| "grad_norm": 1.1838046764518801, |
| "learning_rate": 2.863674788948097e-06, |
| "loss": 0.8285, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.6885485047410649, |
| "grad_norm": 1.1067852090435224, |
| "learning_rate": 2.771308221117309e-06, |
| "loss": 0.8183, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.6943836615609045, |
| "grad_norm": 0.9602760669828811, |
| "learning_rate": 2.6798812719531843e-06, |
| "loss": 0.6839, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.700218818380744, |
| "grad_norm": 1.0375127283326684, |
| "learning_rate": 2.5894324871482557e-06, |
| "loss": 0.812, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.7060539752005836, |
| "grad_norm": 0.9087116914763318, |
| "learning_rate": 2.5000000000000015e-06, |
| "loss": 0.6985, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.711889132020423, |
| "grad_norm": 1.00794866391255, |
| "learning_rate": 2.411621515333788e-06, |
| "loss": 0.7672, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.7177242888402626, |
| "grad_norm": 1.073597416847521, |
| "learning_rate": 2.324334293606499e-06, |
| "loss": 0.8843, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.7235594456601021, |
| "grad_norm": 1.1275188964626082, |
| "learning_rate": 2.238175135197471e-06, |
| "loss": 0.7386, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.7293946024799417, |
| "grad_norm": 0.980424314571382, |
| "learning_rate": 2.1531803648934333e-06, |
| "loss": 0.7302, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.7352297592997812, |
| "grad_norm": 1.216063731669915, |
| "learning_rate": 2.069385816573928e-06, |
| "loss": 0.8876, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.7410649161196207, |
| "grad_norm": 1.1171215223050623, |
| "learning_rate": 1.9868268181037186e-06, |
| "loss": 0.7459, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.7469000729394603, |
| "grad_norm": 0.9641486294813004, |
| "learning_rate": 1.9055381764385272e-06, |
| "loss": 0.699, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.7527352297592997, |
| "grad_norm": 1.130060617138146, |
| "learning_rate": 1.8255541629503865e-06, |
| "loss": 0.8457, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.7585703865791393, |
| "grad_norm": 0.9933090706388402, |
| "learning_rate": 1.746908498978791e-06, |
| "loss": 0.7078, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.7644055433989788, |
| "grad_norm": 0.9268338589614799, |
| "learning_rate": 1.6696343416137495e-06, |
| "loss": 0.7091, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.7702407002188184, |
| "grad_norm": 1.1667181202160928, |
| "learning_rate": 1.5937642697167288e-06, |
| "loss": 0.8571, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.7760758570386579, |
| "grad_norm": 1.1722058069576977, |
| "learning_rate": 1.5193302701853674e-06, |
| "loss": 0.8629, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.7819110138584975, |
| "grad_norm": 1.2708367308444, |
| "learning_rate": 1.4463637244677648e-06, |
| "loss": 1.0594, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.787746170678337, |
| "grad_norm": 1.1286061916245125, |
| "learning_rate": 1.374895395332037e-06, |
| "loss": 0.7631, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.7935813274981766, |
| "grad_norm": 0.8503920526479639, |
| "learning_rate": 1.3049554138967052e-06, |
| "loss": 0.6188, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.799416484318016, |
| "grad_norm": 1.2988967851486795, |
| "learning_rate": 1.2365732669273778e-06, |
| "loss": 0.7461, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.8052516411378556, |
| "grad_norm": 1.0692858069057927, |
| "learning_rate": 1.1697777844051105e-06, |
| "loss": 0.7808, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.8110867979576951, |
| "grad_norm": 0.9967325104922946, |
| "learning_rate": 1.1045971273716476e-06, |
| "loss": 0.749, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.8169219547775346, |
| "grad_norm": 0.9904523906189663, |
| "learning_rate": 1.0410587760567104e-06, |
| "loss": 0.81, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.8227571115973742, |
| "grad_norm": 1.040023386236895, |
| "learning_rate": 9.791895182922911e-07, |
| "loss": 0.7465, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.8285922684172137, |
| "grad_norm": 1.3237815420111214, |
| "learning_rate": 9.190154382188921e-07, |
| "loss": 0.9365, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.8344274252370533, |
| "grad_norm": 1.420519742774693, |
| "learning_rate": 8.605619052884106e-07, |
| "loss": 1.0061, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.8402625820568927, |
| "grad_norm": 1.0185053858458084, |
| "learning_rate": 8.03853563568367e-07, |
| "loss": 0.7789, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.8460977388767323, |
| "grad_norm": 1.0049562751860923, |
| "learning_rate": 7.489143213519301e-07, |
| "loss": 0.8384, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.8519328956965718, |
| "grad_norm": 1.094639781048571, |
| "learning_rate": 6.957673410781617e-07, |
| "loss": 0.7956, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.8577680525164114, |
| "grad_norm": 1.2566130793880201, |
| "learning_rate": 6.444350295667112e-07, |
| "loss": 0.8434, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.8636032093362509, |
| "grad_norm": 1.1692727321621572, |
| "learning_rate": 5.949390285710777e-07, |
| "loss": 0.8875, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.8694383661560905, |
| "grad_norm": 0.9165824307523833, |
| "learning_rate": 5.473002056544191e-07, |
| "loss": 0.7093, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.87527352297593, |
| "grad_norm": 0.9855161898890391, |
| "learning_rate": 5.015386453917742e-07, |
| "loss": 0.7251, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.8811086797957695, |
| "grad_norm": 1.1140729758514698, |
| "learning_rate": 4.576736409023813e-07, |
| "loss": 0.7576, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.886943836615609, |
| "grad_norm": 1.0985856641455918, |
| "learning_rate": 4.15723685715686e-07, |
| "loss": 0.8802, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.8927789934354485, |
| "grad_norm": 1.054855033939115, |
| "learning_rate": 3.7570646597444196e-07, |
| "loss": 0.8055, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.8986141502552881, |
| "grad_norm": 1.192735007179705, |
| "learning_rate": 3.3763885297822153e-07, |
| "loss": 0.8976, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.9044493070751276, |
| "grad_norm": 1.1221443218870846, |
| "learning_rate": 3.015368960704584e-07, |
| "loss": 0.7736, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.9102844638949672, |
| "grad_norm": 1.148682041385469, |
| "learning_rate": 2.6741581587202747e-07, |
| "loss": 0.8462, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.9161196207148067, |
| "grad_norm": 1.0703405484534654, |
| "learning_rate": 2.3528999786421758e-07, |
| "loss": 0.8121, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.9219547775346463, |
| "grad_norm": 1.1658393585654097, |
| "learning_rate": 2.0517298632379445e-07, |
| "loss": 0.8399, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.9277899343544858, |
| "grad_norm": 1.149469219887576, |
| "learning_rate": 1.770774786127244e-07, |
| "loss": 0.7776, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.9336250911743253, |
| "grad_norm": 0.9088384189732779, |
| "learning_rate": 1.510153198249531e-07, |
| "loss": 0.7099, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.9394602479941648, |
| "grad_norm": 0.9396900010095327, |
| "learning_rate": 1.2699749779249926e-07, |
| "loss": 0.6818, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.9452954048140044, |
| "grad_norm": 1.0187501367923484, |
| "learning_rate": 1.0503413845297739e-07, |
| "loss": 0.848, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.9511305616338439, |
| "grad_norm": 0.8945331697127373, |
| "learning_rate": 8.513450158049109e-08, |
| "loss": 0.7572, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.9569657184536834, |
| "grad_norm": 1.1263964389313517, |
| "learning_rate": 6.730697688170251e-08, |
| "loss": 0.8438, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.962800875273523, |
| "grad_norm": 0.9475010544197467, |
| "learning_rate": 5.155908045872349e-08, |
| "loss": 0.701, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.9686360320933625, |
| "grad_norm": 1.0119832043122319, |
| "learning_rate": 3.7897451640321326e-08, |
| "loss": 0.7566, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.9744711889132021, |
| "grad_norm": 0.9652886585419143, |
| "learning_rate": 2.6327850182769065e-08, |
| "loss": 0.6965, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.9803063457330415, |
| "grad_norm": 0.9801864470674052, |
| "learning_rate": 1.6855153841527915e-08, |
| "loss": 0.6939, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.9861415025528811, |
| "grad_norm": 0.9990158801452097, |
| "learning_rate": 9.48335631477948e-09, |
| "loss": 0.8307, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.9919766593727206, |
| "grad_norm": 0.8306257375789021, |
| "learning_rate": 4.2155655596809455e-09, |
| "loss": 0.6511, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.9978118161925602, |
| "grad_norm": 0.8925267700204672, |
| "learning_rate": 1.054002482043237e-09, |
| "loss": 0.7083, |
| "step": 171 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 171, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 4050, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 18593527627776.0, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|