| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 2493, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.00802447464767541, |
| "grad_norm": 1.7389945502862767, |
| "learning_rate": 3.8e-06, |
| "loss": 0.4595, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.01604894929535082, |
| "grad_norm": 1.21721434066401, |
| "learning_rate": 7.8e-06, |
| "loss": 0.34, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.02407342394302623, |
| "grad_norm": 1.4115052448687924, |
| "learning_rate": 1.18e-05, |
| "loss": 0.3013, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.03209789859070164, |
| "grad_norm": 1.1854124218870574, |
| "learning_rate": 1.58e-05, |
| "loss": 0.2841, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.04012237323837705, |
| "grad_norm": 1.0821081494365905, |
| "learning_rate": 1.9800000000000004e-05, |
| "loss": 0.2679, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.04814684788605246, |
| "grad_norm": 1.1396877768521696, |
| "learning_rate": 2.38e-05, |
| "loss": 0.2983, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.05617132253372787, |
| "grad_norm": 0.8863724861851471, |
| "learning_rate": 2.7800000000000005e-05, |
| "loss": 0.2976, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.06419579718140328, |
| "grad_norm": 1.1286540636393274, |
| "learning_rate": 3.18e-05, |
| "loss": 0.3047, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.07222027182907868, |
| "grad_norm": 1.6326246678745442, |
| "learning_rate": 3.58e-05, |
| "loss": 0.314, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.0802447464767541, |
| "grad_norm": 1.012185268707376, |
| "learning_rate": 3.9800000000000005e-05, |
| "loss": 0.3085, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.0882692211244295, |
| "grad_norm": 0.8913802081695356, |
| "learning_rate": 4.38e-05, |
| "loss": 0.3193, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.09629369577210492, |
| "grad_norm": 0.9943286125063995, |
| "learning_rate": 4.78e-05, |
| "loss": 0.3397, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.10431817041978032, |
| "grad_norm": 1.9971168907457575, |
| "learning_rate": 4.999801376569132e-05, |
| "loss": 0.3527, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.11234264506745574, |
| "grad_norm": 0.8841449553244364, |
| "learning_rate": 4.9979380055256516e-05, |
| "loss": 0.3249, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.12036711971513114, |
| "grad_norm": 0.7868550214589912, |
| "learning_rate": 4.99411464181739e-05, |
| "loss": 0.342, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.12839159436280656, |
| "grad_norm": 0.8832077761817314, |
| "learning_rate": 4.988334285424654e-05, |
| "loss": 0.3471, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.13641606901048198, |
| "grad_norm": 0.83820196705535, |
| "learning_rate": 4.980601471870785e-05, |
| "loss": 0.346, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.14444054365815737, |
| "grad_norm": 0.9195620174146593, |
| "learning_rate": 4.970922268663387e-05, |
| "loss": 0.341, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.15246501830583278, |
| "grad_norm": 0.8652054536281212, |
| "learning_rate": 4.9593042705334926e-05, |
| "loss": 0.3415, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.1604894929535082, |
| "grad_norm": 0.7223602683001062, |
| "learning_rate": 4.945756593476401e-05, |
| "loss": 0.34, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.16851396760118362, |
| "grad_norm": 0.7592581825293532, |
| "learning_rate": 4.9302898675988616e-05, |
| "loss": 0.3454, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.176538442248859, |
| "grad_norm": 0.8985377633784642, |
| "learning_rate": 4.912916228778228e-05, |
| "loss": 0.354, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.18456291689653442, |
| "grad_norm": 0.8407832804783139, |
| "learning_rate": 4.893649309140107e-05, |
| "loss": 0.3381, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.19258739154420984, |
| "grad_norm": 0.7817366357087974, |
| "learning_rate": 4.8725042263619896e-05, |
| "loss": 0.3384, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.20061186619188526, |
| "grad_norm": 0.7998744498632891, |
| "learning_rate": 4.849497571811257e-05, |
| "loss": 0.3433, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.20863634083956065, |
| "grad_norm": 0.7366878614464625, |
| "learning_rate": 4.824647397526854e-05, |
| "loss": 0.332, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.21666081548723606, |
| "grad_norm": 0.9257245781477709, |
| "learning_rate": 4.797973202054865e-05, |
| "loss": 0.3578, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.22468529013491148, |
| "grad_norm": 0.7229221912787754, |
| "learning_rate": 4.769495915149091e-05, |
| "loss": 0.3318, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.2327097647825869, |
| "grad_norm": 0.7896861351942223, |
| "learning_rate": 4.7392378813486374e-05, |
| "loss": 0.3239, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.2407342394302623, |
| "grad_norm": 0.7057182651650622, |
| "learning_rate": 4.707222842445401e-05, |
| "loss": 0.3367, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.2487587140779377, |
| "grad_norm": 0.7646297450840412, |
| "learning_rate": 4.6734759188552116e-05, |
| "loss": 0.3345, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.2567831887256131, |
| "grad_norm": 0.8144984197146983, |
| "learning_rate": 4.638023589907239e-05, |
| "loss": 0.3264, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.26480766337328854, |
| "grad_norm": 0.799665735836617, |
| "learning_rate": 4.6008936730671414e-05, |
| "loss": 0.339, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.27283213802096395, |
| "grad_norm": 0.888997665044708, |
| "learning_rate": 4.562115302110254e-05, |
| "loss": 0.3413, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.28085661266863937, |
| "grad_norm": 0.8011920131999447, |
| "learning_rate": 4.5217189042619345e-05, |
| "loss": 0.3389, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.28888108731631473, |
| "grad_norm": 0.8067168553032359, |
| "learning_rate": 4.4797361763230224e-05, |
| "loss": 0.3185, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.29690556196399015, |
| "grad_norm": 0.6665228849255169, |
| "learning_rate": 4.436200059799121e-05, |
| "loss": 0.3232, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.30493003661166557, |
| "grad_norm": 0.6617968011184702, |
| "learning_rate": 4.391144715053238e-05, |
| "loss": 0.318, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.312954511259341, |
| "grad_norm": 0.7925748113649975, |
| "learning_rate": 4.344605494502053e-05, |
| "loss": 0.3438, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.3209789859070164, |
| "grad_norm": 0.7874288584383095, |
| "learning_rate": 4.2966189148768474e-05, |
| "loss": 0.3218, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.3290034605546918, |
| "grad_norm": 0.5964012085577759, |
| "learning_rate": 4.247222628570862e-05, |
| "loss": 0.325, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.33702793520236723, |
| "grad_norm": 0.7901232690113225, |
| "learning_rate": 4.196455394095561e-05, |
| "loss": 0.3259, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.34505240985004265, |
| "grad_norm": 0.6161692661704234, |
| "learning_rate": 4.144357045668993e-05, |
| "loss": 0.3282, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.353076884497718, |
| "grad_norm": 0.7071927191922839, |
| "learning_rate": 4.0909684619601e-05, |
| "loss": 0.3215, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.36110135914539343, |
| "grad_norm": 0.7540766905998272, |
| "learning_rate": 4.036331534013502e-05, |
| "loss": 0.3141, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.36912583379306885, |
| "grad_norm": 0.7828196756015018, |
| "learning_rate": 3.980489132379937e-05, |
| "loss": 0.3099, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.37715030844074426, |
| "grad_norm": 0.7665378450501007, |
| "learning_rate": 3.923485073478123e-05, |
| "loss": 0.3154, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.3851747830884197, |
| "grad_norm": 0.7064791446421703, |
| "learning_rate": 3.8653640852144643e-05, |
| "loss": 0.3192, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.3931992577360951, |
| "grad_norm": 0.6894942843454174, |
| "learning_rate": 3.80617177188755e-05, |
| "loss": 0.3274, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.4012237323837705, |
| "grad_norm": 0.6747469008968356, |
| "learning_rate": 3.745954578405012e-05, |
| "loss": 0.3216, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.40924820703144593, |
| "grad_norm": 0.8112185519554851, |
| "learning_rate": 3.684759753840789e-05, |
| "loss": 0.3239, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.4172726816791213, |
| "grad_norm": 0.6811337824140572, |
| "learning_rate": 3.622635314361416e-05, |
| "loss": 0.3125, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.4252971563267967, |
| "grad_norm": 0.7484799717463894, |
| "learning_rate": 3.559630005550416e-05, |
| "loss": 0.3209, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.4333216309744721, |
| "grad_norm": 0.6914997047255979, |
| "learning_rate": 3.495793264160359e-05, |
| "loss": 0.3061, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.44134610562214754, |
| "grad_norm": 0.6177004112053062, |
| "learning_rate": 3.4311751793225964e-05, |
| "loss": 0.3081, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.44937058026982296, |
| "grad_norm": 0.7656940899490844, |
| "learning_rate": 3.365826453245115e-05, |
| "loss": 0.3258, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.4573950549174984, |
| "grad_norm": 0.7072757457639977, |
| "learning_rate": 3.2997983614293404e-05, |
| "loss": 0.3158, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.4654195295651738, |
| "grad_norm": 0.7029721091978121, |
| "learning_rate": 3.2331427124371115e-05, |
| "loss": 0.3155, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.4734440042128492, |
| "grad_norm": 0.742312775553165, |
| "learning_rate": 3.1659118072393906e-05, |
| "loss": 0.3151, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.4814684788605246, |
| "grad_norm": 0.7586510237201778, |
| "learning_rate": 3.098158398178606e-05, |
| "loss": 0.2921, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.4894929535082, |
| "grad_norm": 0.6302386060292237, |
| "learning_rate": 3.0299356475768326e-05, |
| "loss": 0.3042, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.4975174281558754, |
| "grad_norm": 0.7177873646521117, |
| "learning_rate": 2.9612970860222816e-05, |
| "loss": 0.3074, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.5055419028035508, |
| "grad_norm": 0.6442414947030823, |
| "learning_rate": 2.8922965703668337e-05, |
| "loss": 0.3047, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.5135663774512262, |
| "grad_norm": 0.7076061109007938, |
| "learning_rate": 2.8229882414675672e-05, |
| "loss": 0.3033, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.5215908520989017, |
| "grad_norm": 0.6578079486576341, |
| "learning_rate": 2.75342648170545e-05, |
| "loss": 0.3051, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.5296153267465771, |
| "grad_norm": 0.7945155922763601, |
| "learning_rate": 2.6836658723145175e-05, |
| "loss": 0.2967, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.5376398013942525, |
| "grad_norm": 0.6487682986757239, |
| "learning_rate": 2.613761150555019e-05, |
| "loss": 0.3044, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.5456642760419279, |
| "grad_norm": 0.6958050009061036, |
| "learning_rate": 2.5437671667641445e-05, |
| "loss": 0.3061, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.5536887506896033, |
| "grad_norm": 0.6266867011506455, |
| "learning_rate": 2.4737388413180217e-05, |
| "loss": 0.2897, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.5617132253372787, |
| "grad_norm": 0.7124320790824831, |
| "learning_rate": 2.403731121538762e-05, |
| "loss": 0.2862, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.5697376999849542, |
| "grad_norm": 0.7470494242121043, |
| "learning_rate": 2.3337989385803567e-05, |
| "loss": 0.3018, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.5777621746326295, |
| "grad_norm": 0.6734120153458941, |
| "learning_rate": 2.2639971643272688e-05, |
| "loss": 0.2927, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.5857866492803049, |
| "grad_norm": 0.6694340995198956, |
| "learning_rate": 2.194380568339519e-05, |
| "loss": 0.2912, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.5938111239279803, |
| "grad_norm": 0.6416254443296999, |
| "learning_rate": 2.1250037748780706e-05, |
| "loss": 0.2833, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.6018355985756557, |
| "grad_norm": 0.6576796287869531, |
| "learning_rate": 2.0559212200442152e-05, |
| "loss": 0.3005, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.6098600732233311, |
| "grad_norm": 0.7534672667085268, |
| "learning_rate": 1.9871871090666023e-05, |
| "loss": 0.2902, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.6178845478710066, |
| "grad_norm": 0.683888843023483, |
| "learning_rate": 1.9188553737694142e-05, |
| "loss": 0.2802, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.625909022518682, |
| "grad_norm": 0.6505090735155789, |
| "learning_rate": 1.8509796302550763e-05, |
| "loss": 0.287, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.6339334971663574, |
| "grad_norm": 0.7052076493927231, |
| "learning_rate": 1.783613136834688e-05, |
| "loss": 0.281, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.6419579718140328, |
| "grad_norm": 0.6384510613508823, |
| "learning_rate": 1.7168087522391958e-05, |
| "loss": 0.2763, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.6499824464617082, |
| "grad_norm": 0.6633378463426743, |
| "learning_rate": 1.6506188941440958e-05, |
| "loss": 0.285, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.6580069211093836, |
| "grad_norm": 0.6982839509670195, |
| "learning_rate": 1.585095498040205e-05, |
| "loss": 0.2804, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.666031395757059, |
| "grad_norm": 0.6270272887976673, |
| "learning_rate": 1.5202899764827799e-05, |
| "loss": 0.2785, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.6740558704047345, |
| "grad_norm": 0.6172418320458167, |
| "learning_rate": 1.4562531787509504e-05, |
| "loss": 0.2734, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.6820803450524099, |
| "grad_norm": 0.7230716801753746, |
| "learning_rate": 1.3930353509491225e-05, |
| "loss": 0.2921, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.6901048197000853, |
| "grad_norm": 0.624829631926983, |
| "learning_rate": 1.3306860965816687e-05, |
| "loss": 0.2745, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.6981292943477607, |
| "grad_norm": 0.6057171773833744, |
| "learning_rate": 1.2692543376318172e-05, |
| "loss": 0.2812, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.706153768995436, |
| "grad_norm": 0.6600106987481957, |
| "learning_rate": 1.2087882761753052e-05, |
| "loss": 0.2762, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.7141782436431114, |
| "grad_norm": 0.6513027790812009, |
| "learning_rate": 1.1493353565588946e-05, |
| "loss": 0.2824, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.7222027182907869, |
| "grad_norm": 0.6464858105319229, |
| "learning_rate": 1.0909422281734372e-05, |
| "loss": 0.2772, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.7302271929384623, |
| "grad_norm": 0.5987416675990789, |
| "learning_rate": 1.033654708850704e-05, |
| "loss": 0.2729, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.7382516675861377, |
| "grad_norm": 0.6700627827898618, |
| "learning_rate": 9.775177489126839e-06, |
| "loss": 0.2662, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.7462761422338131, |
| "grad_norm": 0.6252184614146273, |
| "learning_rate": 9.225753959015726e-06, |
| "loss": 0.2757, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.7543006168814885, |
| "grad_norm": 0.6775143307950428, |
| "learning_rate": 8.688707600181237e-06, |
| "loss": 0.2714, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.762325091529164, |
| "grad_norm": 0.6952554867252799, |
| "learning_rate": 8.164459802954771e-06, |
| "loss": 0.2782, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.7703495661768394, |
| "grad_norm": 0.6783881222495053, |
| "learning_rate": 7.653421915350096e-06, |
| "loss": 0.2699, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.7783740408245148, |
| "grad_norm": 0.6556587479775633, |
| "learning_rate": 7.1559949203015005e-06, |
| "loss": 0.2674, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.7863985154721902, |
| "grad_norm": 0.7269086771080256, |
| "learning_rate": 6.67256912103485e-06, |
| "loss": 0.2696, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.7944229901198656, |
| "grad_norm": 0.6782398065845676, |
| "learning_rate": 6.203523834818395e-06, |
| "loss": 0.2616, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.802447464767541, |
| "grad_norm": 0.6401864638918741, |
| "learning_rate": 5.749227095333684e-06, |
| "loss": 0.2727, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.8104719394152164, |
| "grad_norm": 0.6920549327363028, |
| "learning_rate": 5.310035363900029e-06, |
| "loss": 0.2613, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.8184964140628919, |
| "grad_norm": 0.7544225062042402, |
| "learning_rate": 4.886293249779203e-06, |
| "loss": 0.2649, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.8265208887105673, |
| "grad_norm": 0.7784252166653262, |
| "learning_rate": 4.4783332397797725e-06, |
| "loss": 0.2646, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.8345453633582426, |
| "grad_norm": 0.7372167602245259, |
| "learning_rate": 4.086475437373222e-06, |
| "loss": 0.2663, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.842569838005918, |
| "grad_norm": 0.5940476502560079, |
| "learning_rate": 3.711027311526605e-06, |
| "loss": 0.2474, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.8505943126535934, |
| "grad_norm": 0.7426516397535643, |
| "learning_rate": 3.3522834554488265e-06, |
| "loss": 0.2667, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.8586187873012688, |
| "grad_norm": 0.6474191378185361, |
| "learning_rate": 3.010525355439739e-06, |
| "loss": 0.2633, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.8666432619489443, |
| "grad_norm": 0.7138316369428281, |
| "learning_rate": 2.6860211700235616e-06, |
| "loss": 0.2653, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.8746677365966197, |
| "grad_norm": 0.7205767765435191, |
| "learning_rate": 2.3790255195398293e-06, |
| "loss": 0.269, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.8826922112442951, |
| "grad_norm": 0.6866728998175627, |
| "learning_rate": 2.0897792863570133e-06, |
| "loss": 0.2781, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.8907166858919705, |
| "grad_norm": 0.6386115100985029, |
| "learning_rate": 1.818509425865561e-06, |
| "loss": 0.248, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.8987411605396459, |
| "grad_norm": 0.5816063089909334, |
| "learning_rate": 1.5654287883986568e-06, |
| "loss": 0.2495, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.9067656351873213, |
| "grad_norm": 0.6044106984921237, |
| "learning_rate": 1.3307359522204187e-06, |
| "loss": 0.2546, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.9147901098349968, |
| "grad_norm": 0.6487114272956552, |
| "learning_rate": 1.1146150677126321e-06, |
| "loss": 0.2493, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.9228145844826722, |
| "grad_norm": 0.7113121988343898, |
| "learning_rate": 9.172357128822001e-07, |
| "loss": 0.2649, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.9308390591303476, |
| "grad_norm": 0.684922616701092, |
| "learning_rate": 7.387527603027383e-07, |
| "loss": 0.2619, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.938863533778023, |
| "grad_norm": 0.6512571317668248, |
| "learning_rate": 5.793062555946999e-07, |
| "loss": 0.2626, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.9468880084256984, |
| "grad_norm": 0.9231979423940574, |
| "learning_rate": 4.390213075393973e-07, |
| "loss": 0.268, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.9549124830733738, |
| "grad_norm": 0.5874482600389693, |
| "learning_rate": 3.1800798991309944e-07, |
| "loss": 0.2627, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.9629369577210491, |
| "grad_norm": 0.827108940722453, |
| "learning_rate": 2.163612551182942e-07, |
| "loss": 0.2586, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.9709614323687246, |
| "grad_norm": 0.5907349717505673, |
| "learning_rate": 1.3416085967982994e-07, |
| "loss": 0.2642, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.9789859070164, |
| "grad_norm": 0.7334026096749088, |
| "learning_rate": 7.14713016644053e-08, |
| "loss": 0.2649, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.9870103816640754, |
| "grad_norm": 0.523367756897521, |
| "learning_rate": 2.8341770072548567e-08, |
| "loss": 0.2682, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.9950348563117508, |
| "grad_norm": 0.6000740449055824, |
| "learning_rate": 4.806106242741249e-09, |
| "loss": 0.2627, |
| "step": 2480 |
| } |
| ], |
| "logging_steps": 20, |
| "max_steps": 2493, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 200, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 41482954211328.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|