| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.004092034400369192, | |
| "eval_steps": 500, | |
| "global_step": 2250, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.818681955719641e-05, | |
| "grad_norm": 1.608859896659851, | |
| "learning_rate": 0.0002, | |
| "loss": 3.3372, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 3.637363911439282e-05, | |
| "grad_norm": 0.9594001770019531, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1895, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 5.4560458671589234e-05, | |
| "grad_norm": 0.7858404517173767, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1819, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 7.274727822878565e-05, | |
| "grad_norm": 0.05236278474330902, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0967, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 9.093409778598205e-05, | |
| "grad_norm": 0.00239331996999681, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0001, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.00010912091734317847, | |
| "grad_norm": 0.6015797853469849, | |
| "learning_rate": 0.0002, | |
| "loss": 0.9137, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.00012730773690037487, | |
| "grad_norm": 0.2916141152381897, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1584, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.0001454945564575713, | |
| "grad_norm": 0.22034427523612976, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1212, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.0001636813760147677, | |
| "grad_norm": 0.05342680215835571, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0933, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.0001818681955719641, | |
| "grad_norm": 0.0009122246992774308, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0002, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.00020005501512916052, | |
| "grad_norm": 0.30845287442207336, | |
| "learning_rate": 0.0002, | |
| "loss": 0.8141, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.00021824183468635694, | |
| "grad_norm": 0.1849660873413086, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1604, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.00023642865424355333, | |
| "grad_norm": 0.09605516493320465, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1163, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.00025461547380074975, | |
| "grad_norm": 0.4438878893852234, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1043, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.00027280229335794617, | |
| "grad_norm": 0.0012718827929347754, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0018, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.0002909891129151426, | |
| "grad_norm": 0.19092628359794617, | |
| "learning_rate": 0.0002, | |
| "loss": 0.6226, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.000309175932472339, | |
| "grad_norm": 0.04102358967065811, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1575, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.0003273627520295354, | |
| "grad_norm": 0.06057624891400337, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1119, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.0003455495715867318, | |
| "grad_norm": 0.13942261040210724, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0781, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.0003637363911439282, | |
| "grad_norm": 0.005650315433740616, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0013, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.0003819232107011246, | |
| "grad_norm": 0.6245204210281372, | |
| "learning_rate": 0.0002, | |
| "loss": 0.7091, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.00040011003025832104, | |
| "grad_norm": 0.058550119400024414, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1411, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.00041829684981551746, | |
| "grad_norm": 0.08625461906194687, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1138, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.0004364836693727139, | |
| "grad_norm": 0.06455521285533905, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0805, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.0004546704889299103, | |
| "grad_norm": 0.0020822149235755205, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0013, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.00047285730848710666, | |
| "grad_norm": 0.1977258324623108, | |
| "learning_rate": 0.0002, | |
| "loss": 0.5558, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.0004910441280443031, | |
| "grad_norm": 0.20794034004211426, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1263, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.0005092309476014995, | |
| "grad_norm": 0.08760973066091537, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1118, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.0005274177671586959, | |
| "grad_norm": 0.299059122800827, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0804, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.0005456045867158923, | |
| "grad_norm": 0.002420844743028283, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0022, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.0005637914062730887, | |
| "grad_norm": 2.2061026096343994, | |
| "learning_rate": 0.0002, | |
| "loss": 0.5624, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.0005819782258302852, | |
| "grad_norm": 0.7011717557907104, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1177, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.0006001650453874816, | |
| "grad_norm": 0.37657421827316284, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1005, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.000618351864944678, | |
| "grad_norm": 0.048011403530836105, | |
| "learning_rate": 0.0002, | |
| "loss": 0.079, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.0006365386845018744, | |
| "grad_norm": 0.002076848642900586, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0018, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.0006547255040590708, | |
| "grad_norm": 0.721218466758728, | |
| "learning_rate": 0.0002, | |
| "loss": 0.5647, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.0006729123236162671, | |
| "grad_norm": 0.09965512156486511, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1403, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.0006910991431734636, | |
| "grad_norm": 0.21733985841274261, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1024, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.00070928596273066, | |
| "grad_norm": 0.003134253202006221, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0707, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.0007274727822878564, | |
| "grad_norm": 0.0011866611894220114, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0003, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.0007456596018450528, | |
| "grad_norm": 0.37573525309562683, | |
| "learning_rate": 0.0002, | |
| "loss": 0.6122, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.0007638464214022492, | |
| "grad_norm": 0.34029653668403625, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1149, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.0007820332409594457, | |
| "grad_norm": 0.35701191425323486, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0972, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.0008002200605166421, | |
| "grad_norm": 0.06324547529220581, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0722, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.0008184068800738385, | |
| "grad_norm": 0.0011920438846573234, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0011, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.0008365936996310349, | |
| "grad_norm": 0.861393392086029, | |
| "learning_rate": 0.0002, | |
| "loss": 0.4433, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.0008547805191882313, | |
| "grad_norm": 0.16104361414909363, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1176, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.0008729673387454278, | |
| "grad_norm": 0.28712376952171326, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0983, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.0008911541583026242, | |
| "grad_norm": 0.07980292290449142, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0721, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.0009093409778598206, | |
| "grad_norm": 0.0018368292367085814, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0019, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.000927527797417017, | |
| "grad_norm": 0.05152284353971481, | |
| "learning_rate": 0.0002, | |
| "loss": 0.2823, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.0009457146169742133, | |
| "grad_norm": 0.04693318158388138, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1179, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.0009639014365314097, | |
| "grad_norm": 0.10586889833211899, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1094, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.0009820882560886062, | |
| "grad_norm": 0.006325385998934507, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0706, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.0010002750756458027, | |
| "grad_norm": 4.665973028750159e-05, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0002, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.001018461895202999, | |
| "grad_norm": 0.293944776058197, | |
| "learning_rate": 0.0002, | |
| "loss": 0.732, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.0010366487147601955, | |
| "grad_norm": 0.22614754736423492, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1226, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.0010548355343173918, | |
| "grad_norm": 0.10801248252391815, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1065, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.0010730223538745881, | |
| "grad_norm": 0.04501640051603317, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0759, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.0010912091734317847, | |
| "grad_norm": 0.00014656950952485204, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0001, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.001109395992988981, | |
| "grad_norm": 0.2490423321723938, | |
| "learning_rate": 0.0002, | |
| "loss": 0.4868, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.0011275828125461775, | |
| "grad_norm": 0.026224857196211815, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1242, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.0011457696321033738, | |
| "grad_norm": 0.11845973134040833, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1153, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.0011639564516605703, | |
| "grad_norm": 0.8349707722663879, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1229, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.0011821432712177666, | |
| "grad_norm": 0.00733955716714263, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0016, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.0012003300907749632, | |
| "grad_norm": 0.3534531891345978, | |
| "learning_rate": 0.0002, | |
| "loss": 0.4961, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.0012185169103321595, | |
| "grad_norm": 0.3938736915588379, | |
| "learning_rate": 0.0002, | |
| "loss": 0.126, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.001236703729889356, | |
| "grad_norm": 0.16779105365276337, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1217, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.0012548905494465523, | |
| "grad_norm": 0.6998353600502014, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1171, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.0012730773690037488, | |
| "grad_norm": 0.0005113715888001025, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0143, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.0012912641885609452, | |
| "grad_norm": 0.4034057557582855, | |
| "learning_rate": 0.0002, | |
| "loss": 0.6274, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.0013094510081181417, | |
| "grad_norm": 0.08985241502523422, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1295, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.001327637827675338, | |
| "grad_norm": 0.20418916642665863, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1234, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.0013458246472325343, | |
| "grad_norm": 1.0206961631774902, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1107, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.0013640114667897308, | |
| "grad_norm": 0.0008244478958658874, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0087, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.0013821982863469271, | |
| "grad_norm": 3.696362018585205, | |
| "learning_rate": 0.0002, | |
| "loss": 0.9078, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.0014003851059041237, | |
| "grad_norm": 0.8782555460929871, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1732, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.00141857192546132, | |
| "grad_norm": 0.18350496888160706, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1205, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.0014367587450185165, | |
| "grad_norm": 0.634567141532898, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0997, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.0014549455645757128, | |
| "grad_norm": 0.01041293516755104, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0065, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.0014731323841329093, | |
| "grad_norm": 3.0739810466766357, | |
| "learning_rate": 0.0002, | |
| "loss": 0.7739, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.0014913192036901056, | |
| "grad_norm": 0.4407779276371002, | |
| "learning_rate": 0.0002, | |
| "loss": 0.2848, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.0015095060232473022, | |
| "grad_norm": 0.25743165612220764, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1318, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.0015276928428044985, | |
| "grad_norm": 2.0397753715515137, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1385, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.001545879662361695, | |
| "grad_norm": 0.060638878494501114, | |
| "learning_rate": 0.0002, | |
| "loss": 0.047, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.0015640664819188913, | |
| "grad_norm": 2.5641930103302, | |
| "learning_rate": 0.0002, | |
| "loss": 0.5497, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.0015822533014760878, | |
| "grad_norm": 0.9419782161712646, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1626, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.0016004401210332842, | |
| "grad_norm": 0.1152188628911972, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1063, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.0016186269405904805, | |
| "grad_norm": 0.6502537131309509, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0871, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.001636813760147677, | |
| "grad_norm": 0.023487605154514313, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0094, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.0016550005797048733, | |
| "grad_norm": 1.9080859422683716, | |
| "learning_rate": 0.0002, | |
| "loss": 0.5073, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.0016731873992620698, | |
| "grad_norm": 0.44722509384155273, | |
| "learning_rate": 0.0002, | |
| "loss": 0.167, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.0016913742188192661, | |
| "grad_norm": 0.24151289463043213, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1237, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.0017095610383764627, | |
| "grad_norm": 1.1394294500350952, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1014, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.001727747857933659, | |
| "grad_norm": 0.011057032272219658, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0069, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.0017459346774908555, | |
| "grad_norm": 4.32397985458374, | |
| "learning_rate": 0.0002, | |
| "loss": 0.7672, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.0017641214970480518, | |
| "grad_norm": 0.9529788494110107, | |
| "learning_rate": 0.0002, | |
| "loss": 0.3286, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.0017823083166052483, | |
| "grad_norm": 0.27676528692245483, | |
| "learning_rate": 0.0002, | |
| "loss": 0.126, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.0018004951361624446, | |
| "grad_norm": 0.62413090467453, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0844, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.0018186819557196412, | |
| "grad_norm": 0.010768013074994087, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0038, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.0018368687752768375, | |
| "grad_norm": 4.500253200531006, | |
| "learning_rate": 0.0002, | |
| "loss": 0.8415, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.001855055594834034, | |
| "grad_norm": 0.4661908447742462, | |
| "learning_rate": 0.0002, | |
| "loss": 0.2552, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.0018732424143912303, | |
| "grad_norm": 0.17337530851364136, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1032, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.0018914292339484266, | |
| "grad_norm": 0.3994196355342865, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0814, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.0019096160535056232, | |
| "grad_norm": 0.025604812428355217, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0077, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.0019278028730628195, | |
| "grad_norm": 3.7293856143951416, | |
| "learning_rate": 0.0002, | |
| "loss": 0.6543, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.001945989692620016, | |
| "grad_norm": 1.2915587425231934, | |
| "learning_rate": 0.0002, | |
| "loss": 0.3782, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.0019641765121772123, | |
| "grad_norm": 1.0336438417434692, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1515, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.0019823633317344086, | |
| "grad_norm": 1.6816803216934204, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1173, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.0020005501512916054, | |
| "grad_norm": 0.015431606210768223, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0165, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.0020187369708488017, | |
| "grad_norm": 3.059936046600342, | |
| "learning_rate": 0.0002, | |
| "loss": 0.6981, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.002036923790405998, | |
| "grad_norm": 0.5564419031143188, | |
| "learning_rate": 0.0002, | |
| "loss": 0.2193, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.0020551106099631943, | |
| "grad_norm": 0.11465179920196533, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1157, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.002073297429520391, | |
| "grad_norm": 1.7084763050079346, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0997, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.0020914842490775873, | |
| "grad_norm": 0.00997951254248619, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0185, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.0021096710686347836, | |
| "grad_norm": 4.252767086029053, | |
| "learning_rate": 0.0002, | |
| "loss": 0.6719, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.00212785788819198, | |
| "grad_norm": 0.7261558175086975, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1939, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.0021460447077491763, | |
| "grad_norm": 0.3190513253211975, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0981, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.002164231527306373, | |
| "grad_norm": 0.5305098295211792, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0756, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.0021824183468635693, | |
| "grad_norm": 0.03356161713600159, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0064, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.0022006051664207656, | |
| "grad_norm": 3.8724617958068848, | |
| "learning_rate": 0.0002, | |
| "loss": 0.6282, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.002218791985977962, | |
| "grad_norm": 1.3044495582580566, | |
| "learning_rate": 0.0002, | |
| "loss": 0.3827, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.0022369788055351587, | |
| "grad_norm": 0.18937312066555023, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1412, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.002255165625092355, | |
| "grad_norm": 2.488002061843872, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1299, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.0022733524446495513, | |
| "grad_norm": 0.05979600548744202, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0276, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.0022915392642067476, | |
| "grad_norm": 4.089362144470215, | |
| "learning_rate": 0.0002, | |
| "loss": 0.6152, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.0023097260837639444, | |
| "grad_norm": 0.2717827260494232, | |
| "learning_rate": 0.0002, | |
| "loss": 0.277, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.0023279129033211407, | |
| "grad_norm": 0.40145063400268555, | |
| "learning_rate": 0.0002, | |
| "loss": 0.113, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.002346099722878337, | |
| "grad_norm": 0.8193599581718445, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0952, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.0023642865424355333, | |
| "grad_norm": 0.03877554461359978, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0187, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.00238247336199273, | |
| "grad_norm": 3.7022697925567627, | |
| "learning_rate": 0.0002, | |
| "loss": 0.6711, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.0024006601815499263, | |
| "grad_norm": 1.0773606300354004, | |
| "learning_rate": 0.0002, | |
| "loss": 0.3495, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.0024188470011071227, | |
| "grad_norm": 0.46499383449554443, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1181, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.002437033820664319, | |
| "grad_norm": 0.7035688757896423, | |
| "learning_rate": 0.0002, | |
| "loss": 0.08, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.0024552206402215153, | |
| "grad_norm": 0.022035669535398483, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0102, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.002473407459778712, | |
| "grad_norm": 3.3636128902435303, | |
| "learning_rate": 0.0002, | |
| "loss": 0.6578, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.0024915942793359083, | |
| "grad_norm": 0.21912692487239838, | |
| "learning_rate": 0.0002, | |
| "loss": 0.2275, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.0025097810988931046, | |
| "grad_norm": 0.1632055938243866, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1015, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.002527967918450301, | |
| "grad_norm": 0.44282346963882446, | |
| "learning_rate": 0.0002, | |
| "loss": 0.103, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.0025461547380074977, | |
| "grad_norm": 0.17366968095302582, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0322, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.002564341557564694, | |
| "grad_norm": 1.816606879234314, | |
| "learning_rate": 0.0002, | |
| "loss": 0.3914, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.0025825283771218903, | |
| "grad_norm": 0.6741718649864197, | |
| "learning_rate": 0.0002, | |
| "loss": 0.2593, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.0026007151966790866, | |
| "grad_norm": 0.580172598361969, | |
| "learning_rate": 0.0002, | |
| "loss": 0.151, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.0026189020162362834, | |
| "grad_norm": 1.500544548034668, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1266, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.0026370888357934797, | |
| "grad_norm": 0.03482064977288246, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0209, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.002655275655350676, | |
| "grad_norm": 1.9266266822814941, | |
| "learning_rate": 0.0002, | |
| "loss": 0.5335, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.0026734624749078723, | |
| "grad_norm": 0.6076328158378601, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1771, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.0026916492944650686, | |
| "grad_norm": 0.047803062945604324, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1312, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.0027098361140222653, | |
| "grad_norm": 2.2670884132385254, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1303, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.0027280229335794617, | |
| "grad_norm": 0.4342607259750366, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0709, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.002746209753136658, | |
| "grad_norm": 1.5955005884170532, | |
| "learning_rate": 0.0002, | |
| "loss": 0.2632, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.0027643965726938543, | |
| "grad_norm": 0.20393006503582, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1324, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.002782583392251051, | |
| "grad_norm": 0.2312391996383667, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1056, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.0028007702118082473, | |
| "grad_norm": 1.2107295989990234, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1038, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.0028189570313654436, | |
| "grad_norm": 0.07030847668647766, | |
| "learning_rate": 0.0002, | |
| "loss": 0.032, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.00283714385092264, | |
| "grad_norm": 3.563960552215576, | |
| "learning_rate": 0.0002, | |
| "loss": 0.5389, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.0028553306704798367, | |
| "grad_norm": 0.6965789794921875, | |
| "learning_rate": 0.0002, | |
| "loss": 0.2889, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.002873517490037033, | |
| "grad_norm": 0.5975427031517029, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1235, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.0028917043095942293, | |
| "grad_norm": 1.371771216392517, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0971, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.0029098911291514256, | |
| "grad_norm": 0.01906588114798069, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0172, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.002928077948708622, | |
| "grad_norm": 3.8812315464019775, | |
| "learning_rate": 0.0002, | |
| "loss": 0.621, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.0029462647682658187, | |
| "grad_norm": 0.41589802503585815, | |
| "learning_rate": 0.0002, | |
| "loss": 0.2029, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.002964451587823015, | |
| "grad_norm": 0.24198026955127716, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1042, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.0029826384073802113, | |
| "grad_norm": 0.8711221814155579, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0904, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.0030008252269374076, | |
| "grad_norm": 0.06849978119134903, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0226, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.0030190120464946043, | |
| "grad_norm": 3.912189483642578, | |
| "learning_rate": 0.0002, | |
| "loss": 0.5554, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.0030371988660518007, | |
| "grad_norm": 1.076832890510559, | |
| "learning_rate": 0.0002, | |
| "loss": 0.2968, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.003055385685608997, | |
| "grad_norm": 0.3734837472438812, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1151, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.0030735725051661933, | |
| "grad_norm": 0.8407588005065918, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0897, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.00309175932472339, | |
| "grad_norm": 0.023632407188415527, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0113, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.0031099461442805863, | |
| "grad_norm": 4.268885612487793, | |
| "learning_rate": 0.0002, | |
| "loss": 0.6717, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.0031281329638377826, | |
| "grad_norm": 0.3088800013065338, | |
| "learning_rate": 0.0002, | |
| "loss": 0.2635, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.003146319783394979, | |
| "grad_norm": 0.05659230053424835, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0987, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.0031645066029521757, | |
| "grad_norm": 0.5756633281707764, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0949, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.003182693422509372, | |
| "grad_norm": 0.23241274058818817, | |
| "learning_rate": 0.0002, | |
| "loss": 0.044, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.0032008802420665683, | |
| "grad_norm": 2.2380006313323975, | |
| "learning_rate": 0.0002, | |
| "loss": 0.3458, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.0032190670616237646, | |
| "grad_norm": 0.4196106493473053, | |
| "learning_rate": 0.0002, | |
| "loss": 0.2116, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.003237253881180961, | |
| "grad_norm": 0.3544403612613678, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1011, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.0032554407007381577, | |
| "grad_norm": 0.6422521471977234, | |
| "learning_rate": 0.0002, | |
| "loss": 0.084, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.003273627520295354, | |
| "grad_norm": 0.03676289692521095, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0115, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.0032918143398525503, | |
| "grad_norm": 3.173424243927002, | |
| "learning_rate": 0.0002, | |
| "loss": 0.5644, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.0033100011594097466, | |
| "grad_norm": 0.14629468321800232, | |
| "learning_rate": 0.0002, | |
| "loss": 0.2249, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.0033281879789669433, | |
| "grad_norm": 0.27524232864379883, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0965, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.0033463747985241397, | |
| "grad_norm": 0.5685613751411438, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0949, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.003364561618081336, | |
| "grad_norm": 0.19684627652168274, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0423, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.0033827484376385323, | |
| "grad_norm": 2.0270469188690186, | |
| "learning_rate": 0.0002, | |
| "loss": 0.3322, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.003400935257195729, | |
| "grad_norm": 0.3960348963737488, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1933, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.0034191220767529253, | |
| "grad_norm": 0.8636507391929626, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1056, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.0034373088963101216, | |
| "grad_norm": 0.7978588342666626, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0812, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.003455495715867318, | |
| "grad_norm": 0.020584411919116974, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0098, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.0034736825354245143, | |
| "grad_norm": 2.648928165435791, | |
| "learning_rate": 0.0002, | |
| "loss": 0.5485, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.003491869354981711, | |
| "grad_norm": 0.5433089137077332, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1546, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.0035100561745389073, | |
| "grad_norm": 0.2638677656650543, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0935, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.0035282429940961036, | |
| "grad_norm": 0.4292812943458557, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0879, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.0035464298136533, | |
| "grad_norm": 0.09974557906389236, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0234, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.0035646166332104967, | |
| "grad_norm": 1.626259446144104, | |
| "learning_rate": 0.0002, | |
| "loss": 0.4046, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.003582803452767693, | |
| "grad_norm": 0.7747110724449158, | |
| "learning_rate": 0.0002, | |
| "loss": 0.2436, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.0036009902723248893, | |
| "grad_norm": 1.130542516708374, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1371, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.0036191770918820856, | |
| "grad_norm": 2.542160987854004, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1204, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.0036373639114392823, | |
| "grad_norm": 0.1563112586736679, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0343, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.0036555507309964787, | |
| "grad_norm": 3.1544902324676514, | |
| "learning_rate": 0.0002, | |
| "loss": 0.4769, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.003673737550553675, | |
| "grad_norm": 1.0212864875793457, | |
| "learning_rate": 0.0002, | |
| "loss": 0.2462, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.0036919243701108713, | |
| "grad_norm": 0.3565104305744171, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1209, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.003710111189668068, | |
| "grad_norm": 1.3275020122528076, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1064, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.0037282980092252643, | |
| "grad_norm": 0.11180760711431503, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0326, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.0037464848287824606, | |
| "grad_norm": 1.9683802127838135, | |
| "learning_rate": 0.0002, | |
| "loss": 0.3878, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.003764671648339657, | |
| "grad_norm": 0.7875238060951233, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1145, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.0037828584678968533, | |
| "grad_norm": 0.4307851195335388, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0891, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.00380104528745405, | |
| "grad_norm": 0.6907076239585876, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0801, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.0038192321070112463, | |
| "grad_norm": 0.04466943070292473, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0148, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.0038374189265684426, | |
| "grad_norm": 2.8212766647338867, | |
| "learning_rate": 0.0002, | |
| "loss": 0.501, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.003855605746125639, | |
| "grad_norm": 0.4052332639694214, | |
| "learning_rate": 0.0002, | |
| "loss": 0.2379, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.0038737925656828357, | |
| "grad_norm": 0.5726248621940613, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0925, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.003891979385240032, | |
| "grad_norm": 0.7385726571083069, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0744, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.003910166204797228, | |
| "grad_norm": 0.01478211022913456, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0094, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.003928353024354425, | |
| "grad_norm": 4.001941204071045, | |
| "learning_rate": 0.0002, | |
| "loss": 0.5398, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.003946539843911621, | |
| "grad_norm": 0.5501906275749207, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1502, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.003964726663468817, | |
| "grad_norm": 0.05887573957443237, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1137, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.003982913483026014, | |
| "grad_norm": 0.6087843179702759, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0738, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.004001100302583211, | |
| "grad_norm": 0.027440447360277176, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0122, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.004019287122140407, | |
| "grad_norm": 3.8189752101898193, | |
| "learning_rate": 0.0002, | |
| "loss": 0.5507, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.004037473941697603, | |
| "grad_norm": 0.7837066054344177, | |
| "learning_rate": 0.0002, | |
| "loss": 0.2931, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.0040556607612548, | |
| "grad_norm": 0.4113297462463379, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1169, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.004073847580811996, | |
| "grad_norm": 0.9759702086448669, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0902, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.004092034400369192, | |
| "grad_norm": 0.03002658113837242, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0165, | |
| "step": 2250 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 100000, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 250, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.3899519044675994e+17, | |
| "train_batch_size": 24, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |