| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 10.0, |
| "eval_steps": 500, |
| "global_step": 590, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.08547008547008547, |
| "grad_norm": 4.597737789154053, |
| "learning_rate": 8e-05, |
| "loss": 3.144, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.17094017094017094, |
| "grad_norm": 3.6257293224334717, |
| "learning_rate": 0.00018, |
| "loss": 1.023, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.2564102564102564, |
| "grad_norm": 0.9387032389640808, |
| "learning_rate": 0.00019862068965517243, |
| "loss": 0.3362, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.3418803418803419, |
| "grad_norm": 0.41102728247642517, |
| "learning_rate": 0.00019689655172413795, |
| "loss": 0.1727, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.42735042735042733, |
| "grad_norm": 0.504966676235199, |
| "learning_rate": 0.00019517241379310345, |
| "loss": 0.168, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.5128205128205128, |
| "grad_norm": 0.43475794792175293, |
| "learning_rate": 0.00019344827586206898, |
| "loss": 0.1699, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.5982905982905983, |
| "grad_norm": 0.5228849649429321, |
| "learning_rate": 0.0001917241379310345, |
| "loss": 0.1454, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.6837606837606838, |
| "grad_norm": 0.4822940230369568, |
| "learning_rate": 0.00019, |
| "loss": 0.206, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.7692307692307693, |
| "grad_norm": 0.4049399197101593, |
| "learning_rate": 0.00018827586206896554, |
| "loss": 0.132, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.8547008547008547, |
| "grad_norm": 0.4594310224056244, |
| "learning_rate": 0.00018655172413793104, |
| "loss": 0.1541, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.9401709401709402, |
| "grad_norm": 0.3666519820690155, |
| "learning_rate": 0.00018482758620689654, |
| "loss": 0.1144, |
| "step": 55 |
| }, |
| { |
| "epoch": 1.017094017094017, |
| "grad_norm": 0.16928212344646454, |
| "learning_rate": 0.00018310344827586207, |
| "loss": 0.1208, |
| "step": 60 |
| }, |
| { |
| "epoch": 1.1025641025641026, |
| "grad_norm": 0.09923699498176575, |
| "learning_rate": 0.0001813793103448276, |
| "loss": 0.0761, |
| "step": 65 |
| }, |
| { |
| "epoch": 1.188034188034188, |
| "grad_norm": 0.23626509308815002, |
| "learning_rate": 0.0001796551724137931, |
| "loss": 0.0786, |
| "step": 70 |
| }, |
| { |
| "epoch": 1.2735042735042734, |
| "grad_norm": 0.45735999941825867, |
| "learning_rate": 0.00017793103448275862, |
| "loss": 0.13, |
| "step": 75 |
| }, |
| { |
| "epoch": 1.358974358974359, |
| "grad_norm": 0.24871651828289032, |
| "learning_rate": 0.00017620689655172415, |
| "loss": 0.0814, |
| "step": 80 |
| }, |
| { |
| "epoch": 1.4444444444444444, |
| "grad_norm": 0.21524538099765778, |
| "learning_rate": 0.00017448275862068965, |
| "loss": 0.0737, |
| "step": 85 |
| }, |
| { |
| "epoch": 1.5299145299145298, |
| "grad_norm": 0.4590378701686859, |
| "learning_rate": 0.00017275862068965518, |
| "loss": 0.0955, |
| "step": 90 |
| }, |
| { |
| "epoch": 1.6153846153846154, |
| "grad_norm": 0.7036776542663574, |
| "learning_rate": 0.0001710344827586207, |
| "loss": 0.0671, |
| "step": 95 |
| }, |
| { |
| "epoch": 1.7008547008547008, |
| "grad_norm": 0.26162663102149963, |
| "learning_rate": 0.0001693103448275862, |
| "loss": 0.0828, |
| "step": 100 |
| }, |
| { |
| "epoch": 1.7863247863247862, |
| "grad_norm": 0.4105569124221802, |
| "learning_rate": 0.00016758620689655173, |
| "loss": 0.0768, |
| "step": 105 |
| }, |
| { |
| "epoch": 1.8717948717948718, |
| "grad_norm": 0.3037894666194916, |
| "learning_rate": 0.00016586206896551726, |
| "loss": 0.1149, |
| "step": 110 |
| }, |
| { |
| "epoch": 1.9572649572649574, |
| "grad_norm": 0.19420042634010315, |
| "learning_rate": 0.00016413793103448276, |
| "loss": 0.0635, |
| "step": 115 |
| }, |
| { |
| "epoch": 2.034188034188034, |
| "grad_norm": 0.13855452835559845, |
| "learning_rate": 0.0001624137931034483, |
| "loss": 0.0594, |
| "step": 120 |
| }, |
| { |
| "epoch": 2.1196581196581197, |
| "grad_norm": 0.17749273777008057, |
| "learning_rate": 0.00016068965517241382, |
| "loss": 0.0725, |
| "step": 125 |
| }, |
| { |
| "epoch": 2.2051282051282053, |
| "grad_norm": 0.13107630610466003, |
| "learning_rate": 0.00015896551724137932, |
| "loss": 0.0619, |
| "step": 130 |
| }, |
| { |
| "epoch": 2.2905982905982905, |
| "grad_norm": 0.11133825778961182, |
| "learning_rate": 0.00015724137931034485, |
| "loss": 0.0624, |
| "step": 135 |
| }, |
| { |
| "epoch": 2.376068376068376, |
| "grad_norm": 0.187343031167984, |
| "learning_rate": 0.00015551724137931037, |
| "loss": 0.0581, |
| "step": 140 |
| }, |
| { |
| "epoch": 2.4615384615384617, |
| "grad_norm": 0.27685755491256714, |
| "learning_rate": 0.00015379310344827587, |
| "loss": 0.0613, |
| "step": 145 |
| }, |
| { |
| "epoch": 2.547008547008547, |
| "grad_norm": 0.4320373833179474, |
| "learning_rate": 0.0001520689655172414, |
| "loss": 0.0735, |
| "step": 150 |
| }, |
| { |
| "epoch": 2.6324786324786325, |
| "grad_norm": 0.13862545788288116, |
| "learning_rate": 0.0001503448275862069, |
| "loss": 0.0582, |
| "step": 155 |
| }, |
| { |
| "epoch": 2.717948717948718, |
| "grad_norm": 0.7963452339172363, |
| "learning_rate": 0.00014862068965517243, |
| "loss": 0.0651, |
| "step": 160 |
| }, |
| { |
| "epoch": 2.8034188034188032, |
| "grad_norm": 0.14564156532287598, |
| "learning_rate": 0.00014689655172413793, |
| "loss": 0.0559, |
| "step": 165 |
| }, |
| { |
| "epoch": 2.888888888888889, |
| "grad_norm": 0.15069833397865295, |
| "learning_rate": 0.00014517241379310346, |
| "loss": 0.0529, |
| "step": 170 |
| }, |
| { |
| "epoch": 2.9743589743589745, |
| "grad_norm": 0.3557753562927246, |
| "learning_rate": 0.00014344827586206896, |
| "loss": 0.0773, |
| "step": 175 |
| }, |
| { |
| "epoch": 3.051282051282051, |
| "grad_norm": 0.08716096729040146, |
| "learning_rate": 0.0001417241379310345, |
| "loss": 0.0513, |
| "step": 180 |
| }, |
| { |
| "epoch": 3.1367521367521367, |
| "grad_norm": 0.15282496809959412, |
| "learning_rate": 0.00014, |
| "loss": 0.0621, |
| "step": 185 |
| }, |
| { |
| "epoch": 3.2222222222222223, |
| "grad_norm": 0.09816001355648041, |
| "learning_rate": 0.00013827586206896552, |
| "loss": 0.0648, |
| "step": 190 |
| }, |
| { |
| "epoch": 3.3076923076923075, |
| "grad_norm": 0.13748367130756378, |
| "learning_rate": 0.00013655172413793104, |
| "loss": 0.0485, |
| "step": 195 |
| }, |
| { |
| "epoch": 3.393162393162393, |
| "grad_norm": 0.10656469315290451, |
| "learning_rate": 0.00013482758620689654, |
| "loss": 0.0531, |
| "step": 200 |
| }, |
| { |
| "epoch": 3.4786324786324787, |
| "grad_norm": 0.1901499480009079, |
| "learning_rate": 0.00013310344827586207, |
| "loss": 0.0612, |
| "step": 205 |
| }, |
| { |
| "epoch": 3.564102564102564, |
| "grad_norm": 0.16148889064788818, |
| "learning_rate": 0.0001313793103448276, |
| "loss": 0.0546, |
| "step": 210 |
| }, |
| { |
| "epoch": 3.6495726495726495, |
| "grad_norm": 0.19384047389030457, |
| "learning_rate": 0.0001296551724137931, |
| "loss": 0.0589, |
| "step": 215 |
| }, |
| { |
| "epoch": 3.735042735042735, |
| "grad_norm": 0.08794084936380386, |
| "learning_rate": 0.00012793103448275863, |
| "loss": 0.0573, |
| "step": 220 |
| }, |
| { |
| "epoch": 3.8205128205128203, |
| "grad_norm": 0.10576070100069046, |
| "learning_rate": 0.00012620689655172415, |
| "loss": 0.0471, |
| "step": 225 |
| }, |
| { |
| "epoch": 3.905982905982906, |
| "grad_norm": 0.08111118525266647, |
| "learning_rate": 0.00012448275862068966, |
| "loss": 0.0572, |
| "step": 230 |
| }, |
| { |
| "epoch": 3.9914529914529915, |
| "grad_norm": 0.4230298101902008, |
| "learning_rate": 0.00012275862068965518, |
| "loss": 0.0617, |
| "step": 235 |
| }, |
| { |
| "epoch": 4.068376068376068, |
| "grad_norm": 0.08736063539981842, |
| "learning_rate": 0.00012103448275862071, |
| "loss": 0.0493, |
| "step": 240 |
| }, |
| { |
| "epoch": 4.153846153846154, |
| "grad_norm": 0.06979858875274658, |
| "learning_rate": 0.00011931034482758621, |
| "loss": 0.0469, |
| "step": 245 |
| }, |
| { |
| "epoch": 4.239316239316239, |
| "grad_norm": 0.10242439806461334, |
| "learning_rate": 0.00011758620689655173, |
| "loss": 0.0508, |
| "step": 250 |
| }, |
| { |
| "epoch": 4.3247863247863245, |
| "grad_norm": 0.11685860902070999, |
| "learning_rate": 0.00011586206896551725, |
| "loss": 0.0522, |
| "step": 255 |
| }, |
| { |
| "epoch": 4.410256410256411, |
| "grad_norm": 0.1084512323141098, |
| "learning_rate": 0.00011413793103448275, |
| "loss": 0.0519, |
| "step": 260 |
| }, |
| { |
| "epoch": 4.495726495726496, |
| "grad_norm": 0.09368503093719482, |
| "learning_rate": 0.00011241379310344828, |
| "loss": 0.0494, |
| "step": 265 |
| }, |
| { |
| "epoch": 4.581196581196581, |
| "grad_norm": 0.1777074784040451, |
| "learning_rate": 0.00011068965517241381, |
| "loss": 0.0515, |
| "step": 270 |
| }, |
| { |
| "epoch": 4.666666666666667, |
| "grad_norm": 0.056768111884593964, |
| "learning_rate": 0.00010896551724137931, |
| "loss": 0.044, |
| "step": 275 |
| }, |
| { |
| "epoch": 4.752136752136752, |
| "grad_norm": 0.08062291890382767, |
| "learning_rate": 0.00010724137931034484, |
| "loss": 0.0476, |
| "step": 280 |
| }, |
| { |
| "epoch": 4.837606837606837, |
| "grad_norm": 0.09975454211235046, |
| "learning_rate": 0.00010551724137931037, |
| "loss": 0.0522, |
| "step": 285 |
| }, |
| { |
| "epoch": 4.923076923076923, |
| "grad_norm": 0.14652380347251892, |
| "learning_rate": 0.00010379310344827587, |
| "loss": 0.0498, |
| "step": 290 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 0.12409216165542603, |
| "learning_rate": 0.0001020689655172414, |
| "loss": 0.0474, |
| "step": 295 |
| }, |
| { |
| "epoch": 5.085470085470085, |
| "grad_norm": 0.09494274109601974, |
| "learning_rate": 0.0001003448275862069, |
| "loss": 0.0462, |
| "step": 300 |
| }, |
| { |
| "epoch": 5.170940170940171, |
| "grad_norm": 0.1240062415599823, |
| "learning_rate": 9.862068965517242e-05, |
| "loss": 0.0425, |
| "step": 305 |
| }, |
| { |
| "epoch": 5.256410256410256, |
| "grad_norm": 0.1713438332080841, |
| "learning_rate": 9.689655172413794e-05, |
| "loss": 0.0431, |
| "step": 310 |
| }, |
| { |
| "epoch": 5.3418803418803416, |
| "grad_norm": 0.1990644931793213, |
| "learning_rate": 9.517241379310345e-05, |
| "loss": 0.048, |
| "step": 315 |
| }, |
| { |
| "epoch": 5.427350427350428, |
| "grad_norm": 0.09711036831140518, |
| "learning_rate": 9.344827586206896e-05, |
| "loss": 0.0476, |
| "step": 320 |
| }, |
| { |
| "epoch": 5.512820512820513, |
| "grad_norm": 0.11504214257001877, |
| "learning_rate": 9.172413793103448e-05, |
| "loss": 0.0494, |
| "step": 325 |
| }, |
| { |
| "epoch": 5.598290598290598, |
| "grad_norm": 0.08380427211523056, |
| "learning_rate": 9e-05, |
| "loss": 0.047, |
| "step": 330 |
| }, |
| { |
| "epoch": 5.683760683760684, |
| "grad_norm": 0.08641541749238968, |
| "learning_rate": 8.827586206896552e-05, |
| "loss": 0.0457, |
| "step": 335 |
| }, |
| { |
| "epoch": 5.769230769230769, |
| "grad_norm": 0.0935196503996849, |
| "learning_rate": 8.655172413793103e-05, |
| "loss": 0.0489, |
| "step": 340 |
| }, |
| { |
| "epoch": 5.854700854700854, |
| "grad_norm": 0.11386577785015106, |
| "learning_rate": 8.482758620689656e-05, |
| "loss": 0.0479, |
| "step": 345 |
| }, |
| { |
| "epoch": 5.94017094017094, |
| "grad_norm": 0.08249244838953018, |
| "learning_rate": 8.310344827586208e-05, |
| "loss": 0.0469, |
| "step": 350 |
| }, |
| { |
| "epoch": 6.017094017094017, |
| "grad_norm": 0.09115161001682281, |
| "learning_rate": 8.137931034482759e-05, |
| "loss": 0.0455, |
| "step": 355 |
| }, |
| { |
| "epoch": 6.102564102564102, |
| "grad_norm": 0.06610054522752762, |
| "learning_rate": 7.965517241379312e-05, |
| "loss": 0.0432, |
| "step": 360 |
| }, |
| { |
| "epoch": 6.188034188034188, |
| "grad_norm": 0.09798604249954224, |
| "learning_rate": 7.793103448275862e-05, |
| "loss": 0.0442, |
| "step": 365 |
| }, |
| { |
| "epoch": 6.273504273504273, |
| "grad_norm": 0.12107487767934799, |
| "learning_rate": 7.620689655172413e-05, |
| "loss": 0.0418, |
| "step": 370 |
| }, |
| { |
| "epoch": 6.358974358974359, |
| "grad_norm": 0.10651250928640366, |
| "learning_rate": 7.448275862068966e-05, |
| "loss": 0.0437, |
| "step": 375 |
| }, |
| { |
| "epoch": 6.444444444444445, |
| "grad_norm": 0.09335967153310776, |
| "learning_rate": 7.275862068965517e-05, |
| "loss": 0.044, |
| "step": 380 |
| }, |
| { |
| "epoch": 6.52991452991453, |
| "grad_norm": 0.10894130915403366, |
| "learning_rate": 7.103448275862069e-05, |
| "loss": 0.0493, |
| "step": 385 |
| }, |
| { |
| "epoch": 6.615384615384615, |
| "grad_norm": 0.09522519260644913, |
| "learning_rate": 6.931034482758622e-05, |
| "loss": 0.0463, |
| "step": 390 |
| }, |
| { |
| "epoch": 6.700854700854701, |
| "grad_norm": 0.09910976886749268, |
| "learning_rate": 6.758620689655173e-05, |
| "loss": 0.0427, |
| "step": 395 |
| }, |
| { |
| "epoch": 6.786324786324786, |
| "grad_norm": 0.11286190897226334, |
| "learning_rate": 6.586206896551724e-05, |
| "loss": 0.0444, |
| "step": 400 |
| }, |
| { |
| "epoch": 6.871794871794872, |
| "grad_norm": 0.07890793681144714, |
| "learning_rate": 6.413793103448276e-05, |
| "loss": 0.0407, |
| "step": 405 |
| }, |
| { |
| "epoch": 6.957264957264957, |
| "grad_norm": 0.08769431710243225, |
| "learning_rate": 6.241379310344829e-05, |
| "loss": 0.0479, |
| "step": 410 |
| }, |
| { |
| "epoch": 7.034188034188034, |
| "grad_norm": 0.06925784051418304, |
| "learning_rate": 6.068965517241379e-05, |
| "loss": 0.0439, |
| "step": 415 |
| }, |
| { |
| "epoch": 7.119658119658119, |
| "grad_norm": 0.08389502763748169, |
| "learning_rate": 5.896551724137931e-05, |
| "loss": 0.0437, |
| "step": 420 |
| }, |
| { |
| "epoch": 7.205128205128205, |
| "grad_norm": 0.10391002893447876, |
| "learning_rate": 5.7241379310344835e-05, |
| "loss": 0.042, |
| "step": 425 |
| }, |
| { |
| "epoch": 7.2905982905982905, |
| "grad_norm": 0.09842480719089508, |
| "learning_rate": 5.551724137931035e-05, |
| "loss": 0.0407, |
| "step": 430 |
| }, |
| { |
| "epoch": 7.3760683760683765, |
| "grad_norm": 0.09367308020591736, |
| "learning_rate": 5.379310344827586e-05, |
| "loss": 0.0422, |
| "step": 435 |
| }, |
| { |
| "epoch": 7.461538461538462, |
| "grad_norm": 0.11631827801465988, |
| "learning_rate": 5.2068965517241384e-05, |
| "loss": 0.0453, |
| "step": 440 |
| }, |
| { |
| "epoch": 7.547008547008547, |
| "grad_norm": 0.13546331226825714, |
| "learning_rate": 5.03448275862069e-05, |
| "loss": 0.0405, |
| "step": 445 |
| }, |
| { |
| "epoch": 7.632478632478632, |
| "grad_norm": 0.1015164852142334, |
| "learning_rate": 4.862068965517241e-05, |
| "loss": 0.0433, |
| "step": 450 |
| }, |
| { |
| "epoch": 7.717948717948718, |
| "grad_norm": 0.12304691225290298, |
| "learning_rate": 4.689655172413793e-05, |
| "loss": 0.0439, |
| "step": 455 |
| }, |
| { |
| "epoch": 7.803418803418803, |
| "grad_norm": 0.11133451014757156, |
| "learning_rate": 4.5172413793103454e-05, |
| "loss": 0.0404, |
| "step": 460 |
| }, |
| { |
| "epoch": 7.888888888888889, |
| "grad_norm": 0.11199292540550232, |
| "learning_rate": 4.344827586206897e-05, |
| "loss": 0.0401, |
| "step": 465 |
| }, |
| { |
| "epoch": 7.9743589743589745, |
| "grad_norm": 0.10854869335889816, |
| "learning_rate": 4.172413793103448e-05, |
| "loss": 0.047, |
| "step": 470 |
| }, |
| { |
| "epoch": 8.051282051282051, |
| "grad_norm": 0.08034314215183258, |
| "learning_rate": 4e-05, |
| "loss": 0.0372, |
| "step": 475 |
| }, |
| { |
| "epoch": 8.136752136752136, |
| "grad_norm": 0.07888869941234589, |
| "learning_rate": 3.827586206896552e-05, |
| "loss": 0.0374, |
| "step": 480 |
| }, |
| { |
| "epoch": 8.222222222222221, |
| "grad_norm": 0.08299173414707184, |
| "learning_rate": 3.655172413793104e-05, |
| "loss": 0.0415, |
| "step": 485 |
| }, |
| { |
| "epoch": 8.307692307692308, |
| "grad_norm": 0.10082942992448807, |
| "learning_rate": 3.482758620689655e-05, |
| "loss": 0.0431, |
| "step": 490 |
| }, |
| { |
| "epoch": 8.393162393162394, |
| "grad_norm": 0.13129588961601257, |
| "learning_rate": 3.310344827586207e-05, |
| "loss": 0.0381, |
| "step": 495 |
| }, |
| { |
| "epoch": 8.478632478632479, |
| "grad_norm": 0.0956198126077652, |
| "learning_rate": 3.137931034482759e-05, |
| "loss": 0.0391, |
| "step": 500 |
| }, |
| { |
| "epoch": 8.564102564102564, |
| "grad_norm": 0.10935048758983612, |
| "learning_rate": 2.96551724137931e-05, |
| "loss": 0.0415, |
| "step": 505 |
| }, |
| { |
| "epoch": 8.649572649572649, |
| "grad_norm": 0.09700857102870941, |
| "learning_rate": 2.7931034482758622e-05, |
| "loss": 0.042, |
| "step": 510 |
| }, |
| { |
| "epoch": 8.735042735042736, |
| "grad_norm": 0.09681924432516098, |
| "learning_rate": 2.620689655172414e-05, |
| "loss": 0.041, |
| "step": 515 |
| }, |
| { |
| "epoch": 8.820512820512821, |
| "grad_norm": 0.10170122236013412, |
| "learning_rate": 2.4482758620689654e-05, |
| "loss": 0.0404, |
| "step": 520 |
| }, |
| { |
| "epoch": 8.905982905982906, |
| "grad_norm": 0.10559462755918503, |
| "learning_rate": 2.2758620689655175e-05, |
| "loss": 0.0395, |
| "step": 525 |
| }, |
| { |
| "epoch": 8.991452991452991, |
| "grad_norm": 0.11863423138856888, |
| "learning_rate": 2.1034482758620692e-05, |
| "loss": 0.0433, |
| "step": 530 |
| }, |
| { |
| "epoch": 9.068376068376068, |
| "grad_norm": 0.0633588433265686, |
| "learning_rate": 1.9310344827586207e-05, |
| "loss": 0.0383, |
| "step": 535 |
| }, |
| { |
| "epoch": 9.153846153846153, |
| "grad_norm": 0.08409127593040466, |
| "learning_rate": 1.7586206896551724e-05, |
| "loss": 0.038, |
| "step": 540 |
| }, |
| { |
| "epoch": 9.239316239316238, |
| "grad_norm": 0.12133090943098068, |
| "learning_rate": 1.586206896551724e-05, |
| "loss": 0.0366, |
| "step": 545 |
| }, |
| { |
| "epoch": 9.324786324786325, |
| "grad_norm": 0.09883731603622437, |
| "learning_rate": 1.4137931034482759e-05, |
| "loss": 0.0386, |
| "step": 550 |
| }, |
| { |
| "epoch": 9.41025641025641, |
| "grad_norm": 0.20076970756053925, |
| "learning_rate": 1.2413793103448277e-05, |
| "loss": 0.0375, |
| "step": 555 |
| }, |
| { |
| "epoch": 9.495726495726496, |
| "grad_norm": 0.103940449655056, |
| "learning_rate": 1.0689655172413794e-05, |
| "loss": 0.0394, |
| "step": 560 |
| }, |
| { |
| "epoch": 9.581196581196581, |
| "grad_norm": 0.09235844761133194, |
| "learning_rate": 8.96551724137931e-06, |
| "loss": 0.0405, |
| "step": 565 |
| }, |
| { |
| "epoch": 9.666666666666666, |
| "grad_norm": 0.07304095476865768, |
| "learning_rate": 7.241379310344828e-06, |
| "loss": 0.0352, |
| "step": 570 |
| }, |
| { |
| "epoch": 9.752136752136753, |
| "grad_norm": 0.12776847183704376, |
| "learning_rate": 5.517241379310345e-06, |
| "loss": 0.04, |
| "step": 575 |
| }, |
| { |
| "epoch": 9.837606837606838, |
| "grad_norm": 0.11009430885314941, |
| "learning_rate": 3.793103448275862e-06, |
| "loss": 0.0374, |
| "step": 580 |
| }, |
| { |
| "epoch": 9.923076923076923, |
| "grad_norm": 0.13841569423675537, |
| "learning_rate": 2.0689655172413796e-06, |
| "loss": 0.0401, |
| "step": 585 |
| }, |
| { |
| "epoch": 10.0, |
| "grad_norm": 0.1534666121006012, |
| "learning_rate": 3.4482758620689656e-07, |
| "loss": 0.0366, |
| "step": 590 |
| }, |
| { |
| "epoch": 10.0, |
| "step": 590, |
| "total_flos": 9496524054435840.0, |
| "train_loss": 0.09625538042036154, |
| "train_runtime": 681.7113, |
| "train_samples_per_second": 6.85, |
| "train_steps_per_second": 0.865 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 590, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 10, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 9496524054435840.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|