| { | |
| "best_global_step": 4284, | |
| "best_metric": 0.9999124458258547, | |
| "best_model_checkpoint": "./aynur_model3/checkpoint-4284", | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 4284, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00023346757719021772, | |
| "grad_norm": 3.623156785964966, | |
| "learning_rate": 0.0, | |
| "loss": 0.6427, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.002334675771902177, | |
| "grad_norm": 1.9220610857009888, | |
| "learning_rate": 8.391608391608393e-06, | |
| "loss": 0.6315, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.004669351543804354, | |
| "grad_norm": 2.1033425331115723, | |
| "learning_rate": 1.7715617715617717e-05, | |
| "loss": 0.4519, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.007004027315706532, | |
| "grad_norm": 0.2185792475938797, | |
| "learning_rate": 2.7039627039627042e-05, | |
| "loss": 0.0917, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.009338703087608709, | |
| "grad_norm": 0.013207816518843174, | |
| "learning_rate": 3.6363636363636364e-05, | |
| "loss": 0.0019, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.011673378859510886, | |
| "grad_norm": 0.005016999784857035, | |
| "learning_rate": 4.568764568764569e-05, | |
| "loss": 0.0002, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.014008054631413063, | |
| "grad_norm": 0.004350466653704643, | |
| "learning_rate": 5.314685314685315e-05, | |
| "loss": 0.0264, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.01634273040331524, | |
| "grad_norm": 0.0037658039946109056, | |
| "learning_rate": 6.247086247086247e-05, | |
| "loss": 0.0609, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.018677406175217418, | |
| "grad_norm": 0.003915839828550816, | |
| "learning_rate": 7.17948717948718e-05, | |
| "loss": 0.0008, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.021012081947119593, | |
| "grad_norm": 0.018606621772050858, | |
| "learning_rate": 8.111888111888112e-05, | |
| "loss": 0.0134, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.023346757719021772, | |
| "grad_norm": 0.005168155301362276, | |
| "learning_rate": 9.044289044289046e-05, | |
| "loss": 0.0012, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.025681433490923947, | |
| "grad_norm": 0.004493937361985445, | |
| "learning_rate": 9.976689976689977e-05, | |
| "loss": 0.0035, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.028016109262826126, | |
| "grad_norm": 0.006067783106118441, | |
| "learning_rate": 0.00010909090909090909, | |
| "loss": 0.0091, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.030350785034728302, | |
| "grad_norm": 0.017138389870524406, | |
| "learning_rate": 0.00011841491841491842, | |
| "loss": 0.0003, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.03268546080663048, | |
| "grad_norm": 0.06054692715406418, | |
| "learning_rate": 0.00012773892773892774, | |
| "loss": 0.0036, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.03502013657853266, | |
| "grad_norm": 0.09328058362007141, | |
| "learning_rate": 0.00013706293706293706, | |
| "loss": 0.1175, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.037354812350434835, | |
| "grad_norm": 0.006551404017955065, | |
| "learning_rate": 0.00014638694638694638, | |
| "loss": 0.0009, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.03968948812233701, | |
| "grad_norm": 0.08505500108003616, | |
| "learning_rate": 0.0001557109557109557, | |
| "loss": 0.0159, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.042024163894239186, | |
| "grad_norm": 0.007050831336528063, | |
| "learning_rate": 0.00016503496503496504, | |
| "loss": 0.0008, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.04435883966614136, | |
| "grad_norm": 0.031755417585372925, | |
| "learning_rate": 0.00017435897435897436, | |
| "loss": 0.05, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.046693515438043544, | |
| "grad_norm": 0.06047971546649933, | |
| "learning_rate": 0.00018368298368298368, | |
| "loss": 0.0186, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.04902819120994572, | |
| "grad_norm": 0.11913339048624039, | |
| "learning_rate": 0.000193006993006993, | |
| "loss": 0.0093, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.051362866981847895, | |
| "grad_norm": 0.006012667436152697, | |
| "learning_rate": 0.00020233100233100232, | |
| "loss": 0.0105, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.05369754275375007, | |
| "grad_norm": 0.006747289560735226, | |
| "learning_rate": 0.00021165501165501164, | |
| "loss": 0.0003, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.05603221852565225, | |
| "grad_norm": 5.616020202636719, | |
| "learning_rate": 0.00022097902097902096, | |
| "loss": 0.0484, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.05836689429755443, | |
| "grad_norm": 0.04582913592457771, | |
| "learning_rate": 0.00023030303030303033, | |
| "loss": 0.0243, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.060701570069456603, | |
| "grad_norm": 5.976644992828369, | |
| "learning_rate": 0.00023962703962703965, | |
| "loss": 0.0437, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.06303624584135878, | |
| "grad_norm": 0.006856445223093033, | |
| "learning_rate": 0.00024895104895104897, | |
| "loss": 0.0933, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.06537092161326095, | |
| "grad_norm": 0.031221158802509308, | |
| "learning_rate": 0.0002582750582750583, | |
| "loss": 0.015, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.06770559738516313, | |
| "grad_norm": 0.5157426595687866, | |
| "learning_rate": 0.0002675990675990676, | |
| "loss": 0.0367, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.07004027315706532, | |
| "grad_norm": 0.011151552200317383, | |
| "learning_rate": 0.00027692307692307695, | |
| "loss": 0.0501, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.0723749489289675, | |
| "grad_norm": 0.03010399080812931, | |
| "learning_rate": 0.00028624708624708624, | |
| "loss": 0.0472, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.07470962470086967, | |
| "grad_norm": 0.044886477291584015, | |
| "learning_rate": 0.0002955710955710956, | |
| "loss": 0.0259, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.07704430047277185, | |
| "grad_norm": 2.6834709644317627, | |
| "learning_rate": 0.0003048951048951049, | |
| "loss": 0.0336, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.07937897624467402, | |
| "grad_norm": 0.021105894818902016, | |
| "learning_rate": 0.0003142191142191143, | |
| "loss": 0.009, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.0817136520165762, | |
| "grad_norm": 0.0037551075220108032, | |
| "learning_rate": 0.00032354312354312357, | |
| "loss": 0.0003, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.08404832778847837, | |
| "grad_norm": 0.001763952779583633, | |
| "learning_rate": 0.0003328671328671329, | |
| "loss": 0.0001, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.08638300356038055, | |
| "grad_norm": 0.10732467472553253, | |
| "learning_rate": 0.0003421911421911422, | |
| "loss": 0.0161, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.08871767933228272, | |
| "grad_norm": 0.028179295361042023, | |
| "learning_rate": 0.00035151515151515155, | |
| "loss": 0.0233, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.09105235510418491, | |
| "grad_norm": 1.2709873914718628, | |
| "learning_rate": 0.00036083916083916084, | |
| "loss": 0.0281, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.09338703087608709, | |
| "grad_norm": 0.6180899143218994, | |
| "learning_rate": 0.0003701631701631702, | |
| "loss": 0.041, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.09572170664798926, | |
| "grad_norm": 2.0692641735076904, | |
| "learning_rate": 0.0003794871794871795, | |
| "loss": 0.1022, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.09805638241989144, | |
| "grad_norm": 0.06485776603221893, | |
| "learning_rate": 0.0003888111888111888, | |
| "loss": 0.0525, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.10039105819179361, | |
| "grad_norm": 0.03012872114777565, | |
| "learning_rate": 0.00039813519813519817, | |
| "loss": 0.011, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.10272573396369579, | |
| "grad_norm": 4.067958354949951, | |
| "learning_rate": 0.0003991699092088197, | |
| "loss": 0.0437, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.10506040973559796, | |
| "grad_norm": 0.11274830996990204, | |
| "learning_rate": 0.0003981322957198444, | |
| "loss": 0.0304, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.10739508550750014, | |
| "grad_norm": 1.0862525701522827, | |
| "learning_rate": 0.00039709468223086904, | |
| "loss": 0.0078, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.10972976127940233, | |
| "grad_norm": 1.328466773033142, | |
| "learning_rate": 0.0003960570687418937, | |
| "loss": 0.0373, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.1120644370513045, | |
| "grad_norm": 0.19725392758846283, | |
| "learning_rate": 0.00039501945525291835, | |
| "loss": 0.0802, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.11439911282320668, | |
| "grad_norm": 1.5255461931228638, | |
| "learning_rate": 0.00039398184176394295, | |
| "loss": 0.0357, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.11673378859510886, | |
| "grad_norm": 0.059472762048244476, | |
| "learning_rate": 0.00039294422827496756, | |
| "loss": 0.0269, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.11906846436701103, | |
| "grad_norm": 0.14000196754932404, | |
| "learning_rate": 0.0003919066147859922, | |
| "loss": 0.0142, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.12140314013891321, | |
| "grad_norm": 0.012143092229962349, | |
| "learning_rate": 0.00039086900129701687, | |
| "loss": 0.03, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.12373781591081538, | |
| "grad_norm": 0.5115292072296143, | |
| "learning_rate": 0.00038983138780804153, | |
| "loss": 0.0342, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.12607249168271756, | |
| "grad_norm": 5.99329137802124, | |
| "learning_rate": 0.0003887937743190662, | |
| "loss": 0.0446, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.12840716745461975, | |
| "grad_norm": 0.1291157752275467, | |
| "learning_rate": 0.0003877561608300908, | |
| "loss": 0.0161, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.1307418432265219, | |
| "grad_norm": 1.0032269954681396, | |
| "learning_rate": 0.00038671854734111544, | |
| "loss": 0.0166, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.1330765189984241, | |
| "grad_norm": 16.30780601501465, | |
| "learning_rate": 0.0003856809338521401, | |
| "loss": 0.1032, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.13541119477032626, | |
| "grad_norm": 4.544622421264648, | |
| "learning_rate": 0.00038464332036316476, | |
| "loss": 0.0279, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.13774587054222845, | |
| "grad_norm": 5.893352508544922, | |
| "learning_rate": 0.0003836057068741894, | |
| "loss": 0.0304, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.14008054631413064, | |
| "grad_norm": 0.32387709617614746, | |
| "learning_rate": 0.000382568093385214, | |
| "loss": 0.0404, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.1424152220860328, | |
| "grad_norm": 0.051759008318185806, | |
| "learning_rate": 0.0003815304798962387, | |
| "loss": 0.0182, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.144749897857935, | |
| "grad_norm": 0.09647126495838165, | |
| "learning_rate": 0.0003804928664072633, | |
| "loss": 0.019, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.14708457362983715, | |
| "grad_norm": 0.1588832288980484, | |
| "learning_rate": 0.00037945525291828793, | |
| "loss": 0.016, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.14941924940173934, | |
| "grad_norm": 0.008694116957485676, | |
| "learning_rate": 0.0003784176394293126, | |
| "loss": 0.0042, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.1517539251736415, | |
| "grad_norm": 0.040139373391866684, | |
| "learning_rate": 0.00037738002594033725, | |
| "loss": 0.0201, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.1540886009455437, | |
| "grad_norm": 0.032652173191308975, | |
| "learning_rate": 0.0003763424124513619, | |
| "loss": 0.0127, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.15642327671744585, | |
| "grad_norm": 0.027006104588508606, | |
| "learning_rate": 0.0003753047989623865, | |
| "loss": 0.0044, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.15875795248934804, | |
| "grad_norm": 2.665090799331665, | |
| "learning_rate": 0.00037426718547341117, | |
| "loss": 0.0179, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.16109262826125023, | |
| "grad_norm": 0.011558118276298046, | |
| "learning_rate": 0.0003732295719844358, | |
| "loss": 0.0132, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.1634273040331524, | |
| "grad_norm": 0.013224626891314983, | |
| "learning_rate": 0.0003721919584954605, | |
| "loss": 0.0044, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.16576197980505458, | |
| "grad_norm": 0.03626665472984314, | |
| "learning_rate": 0.00037115434500648514, | |
| "loss": 0.0019, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.16809665557695674, | |
| "grad_norm": 0.10942396521568298, | |
| "learning_rate": 0.00037011673151750974, | |
| "loss": 0.0472, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.17043133134885893, | |
| "grad_norm": 0.8933264017105103, | |
| "learning_rate": 0.0003690791180285344, | |
| "loss": 0.0148, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.1727660071207611, | |
| "grad_norm": 0.06506644189357758, | |
| "learning_rate": 0.000368041504539559, | |
| "loss": 0.0249, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.17510068289266328, | |
| "grad_norm": 0.16546858847141266, | |
| "learning_rate": 0.00036700389105058366, | |
| "loss": 0.0105, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.17743535866456545, | |
| "grad_norm": 0.032960060983896255, | |
| "learning_rate": 0.0003659662775616083, | |
| "loss": 0.0022, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.17977003443646764, | |
| "grad_norm": 0.04502630606293678, | |
| "learning_rate": 0.00036492866407263297, | |
| "loss": 0.0234, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.18210471020836982, | |
| "grad_norm": 0.018004219979047775, | |
| "learning_rate": 0.0003638910505836576, | |
| "loss": 0.0024, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.18443938598027199, | |
| "grad_norm": 0.026385951787233353, | |
| "learning_rate": 0.00036285343709468223, | |
| "loss": 0.0079, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.18677406175217418, | |
| "grad_norm": 0.02292817272245884, | |
| "learning_rate": 0.0003618158236057069, | |
| "loss": 0.0113, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.18910873752407634, | |
| "grad_norm": 0.01237889751791954, | |
| "learning_rate": 0.00036077821011673154, | |
| "loss": 0.01, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.19144341329597853, | |
| "grad_norm": 0.011882675811648369, | |
| "learning_rate": 0.0003597405966277562, | |
| "loss": 0.0067, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.1937780890678807, | |
| "grad_norm": 0.021467048674821854, | |
| "learning_rate": 0.00035870298313878086, | |
| "loss": 0.0138, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.19611276483978288, | |
| "grad_norm": 0.04117121547460556, | |
| "learning_rate": 0.00035766536964980546, | |
| "loss": 0.0131, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.19844744061168504, | |
| "grad_norm": 0.04125780984759331, | |
| "learning_rate": 0.0003566277561608301, | |
| "loss": 0.0063, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.20078211638358723, | |
| "grad_norm": 0.01064964011311531, | |
| "learning_rate": 0.0003555901426718547, | |
| "loss": 0.0068, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.20311679215548942, | |
| "grad_norm": 0.18367743492126465, | |
| "learning_rate": 0.0003545525291828794, | |
| "loss": 0.0197, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.20545146792739158, | |
| "grad_norm": 0.017399262636899948, | |
| "learning_rate": 0.00035351491569390403, | |
| "loss": 0.0048, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.20778614369929377, | |
| "grad_norm": 0.04123668745160103, | |
| "learning_rate": 0.0003524773022049287, | |
| "loss": 0.012, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.21012081947119593, | |
| "grad_norm": 0.021881213411688805, | |
| "learning_rate": 0.0003514396887159533, | |
| "loss": 0.002, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.21245549524309812, | |
| "grad_norm": 0.7196763753890991, | |
| "learning_rate": 0.00035040207522697795, | |
| "loss": 0.0053, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.21479017101500028, | |
| "grad_norm": 0.006894146092236042, | |
| "learning_rate": 0.0003493644617380026, | |
| "loss": 0.0004, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.21712484678690247, | |
| "grad_norm": 0.02064809761941433, | |
| "learning_rate": 0.00034832684824902726, | |
| "loss": 0.0022, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.21945952255880466, | |
| "grad_norm": 0.0018330852035433054, | |
| "learning_rate": 0.0003472892347600519, | |
| "loss": 0.0008, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.22179419833070682, | |
| "grad_norm": 0.00754689471796155, | |
| "learning_rate": 0.0003462516212710766, | |
| "loss": 0.0356, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.224128874102609, | |
| "grad_norm": 0.0938534140586853, | |
| "learning_rate": 0.0003452140077821012, | |
| "loss": 0.0021, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.22646354987451117, | |
| "grad_norm": 0.043399691581726074, | |
| "learning_rate": 0.00034417639429312584, | |
| "loss": 0.0056, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.22879822564641336, | |
| "grad_norm": 0.014040129259228706, | |
| "learning_rate": 0.00034313878080415044, | |
| "loss": 0.0006, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.23113290141831552, | |
| "grad_norm": 0.003576503833755851, | |
| "learning_rate": 0.0003421011673151751, | |
| "loss": 0.0007, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.2334675771902177, | |
| "grad_norm": 0.0051997085101902485, | |
| "learning_rate": 0.00034106355382619976, | |
| "loss": 0.0001, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.23580225296211987, | |
| "grad_norm": 0.0035423666704446077, | |
| "learning_rate": 0.0003400259403372244, | |
| "loss": 0.011, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.23813692873402206, | |
| "grad_norm": 0.009176980704069138, | |
| "learning_rate": 0.000338988326848249, | |
| "loss": 0.0129, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.24047160450592425, | |
| "grad_norm": 1.0268243551254272, | |
| "learning_rate": 0.00033795071335927367, | |
| "loss": 0.0128, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.24280628027782641, | |
| "grad_norm": 0.15778960287570953, | |
| "learning_rate": 0.00033691309987029833, | |
| "loss": 0.0025, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.2451409560497286, | |
| "grad_norm": 0.014147180132567883, | |
| "learning_rate": 0.000335875486381323, | |
| "loss": 0.0012, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.24747563182163076, | |
| "grad_norm": 0.6503289341926575, | |
| "learning_rate": 0.00033483787289234764, | |
| "loss": 0.0077, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.24981030759353295, | |
| "grad_norm": 0.036625299602746964, | |
| "learning_rate": 0.0003338002594033723, | |
| "loss": 0.0066, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.2521449833654351, | |
| "grad_norm": 0.05064311996102333, | |
| "learning_rate": 0.0003327626459143969, | |
| "loss": 0.0132, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.2544796591373373, | |
| "grad_norm": 0.01040785014629364, | |
| "learning_rate": 0.00033172503242542156, | |
| "loss": 0.0022, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.2568143349092395, | |
| "grad_norm": 0.0481790155172348, | |
| "learning_rate": 0.00033068741893644616, | |
| "loss": 0.0169, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.25914901068114166, | |
| "grad_norm": 0.04906298220157623, | |
| "learning_rate": 0.0003296498054474708, | |
| "loss": 0.0122, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.2614836864530438, | |
| "grad_norm": 0.010882526636123657, | |
| "learning_rate": 0.0003286121919584955, | |
| "loss": 0.01, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.26381836222494603, | |
| "grad_norm": 0.05711141228675842, | |
| "learning_rate": 0.00032757457846952013, | |
| "loss": 0.0017, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.2661530379968482, | |
| "grad_norm": 0.578333854675293, | |
| "learning_rate": 0.00032653696498054474, | |
| "loss": 0.0067, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.26848771376875036, | |
| "grad_norm": 0.01890755444765091, | |
| "learning_rate": 0.0003254993514915694, | |
| "loss": 0.004, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.2708223895406525, | |
| "grad_norm": 0.07548126578330994, | |
| "learning_rate": 0.00032446173800259405, | |
| "loss": 0.0018, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.27315706531255474, | |
| "grad_norm": 0.13876883685588837, | |
| "learning_rate": 0.0003234241245136187, | |
| "loss": 0.0002, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.2754917410844569, | |
| "grad_norm": 0.0030123014003038406, | |
| "learning_rate": 0.00032238651102464336, | |
| "loss": 0.0001, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.27782641685635906, | |
| "grad_norm": 0.00535109406337142, | |
| "learning_rate": 0.00032134889753566797, | |
| "loss": 0.0072, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.2801610926282613, | |
| "grad_norm": 0.05122077465057373, | |
| "learning_rate": 0.0003203112840466926, | |
| "loss": 0.0016, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.28249576840016344, | |
| "grad_norm": 0.003601687727496028, | |
| "learning_rate": 0.0003192736705577173, | |
| "loss": 0.0003, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.2848304441720656, | |
| "grad_norm": 0.001865709782578051, | |
| "learning_rate": 0.0003182360570687419, | |
| "loss": 0.0001, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.28716511994396776, | |
| "grad_norm": 0.0040197898633778095, | |
| "learning_rate": 0.00031719844357976654, | |
| "loss": 0.0, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.28949979571587, | |
| "grad_norm": 1.5420883893966675, | |
| "learning_rate": 0.0003161608300907912, | |
| "loss": 0.0048, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.29183447148777214, | |
| "grad_norm": 0.6053478717803955, | |
| "learning_rate": 0.00031512321660181585, | |
| "loss": 0.0023, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.2941691472596743, | |
| "grad_norm": 0.008091798983514309, | |
| "learning_rate": 0.00031408560311284046, | |
| "loss": 0.0011, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.29650382303157646, | |
| "grad_norm": 0.027423491701483727, | |
| "learning_rate": 0.0003130479896238651, | |
| "loss": 0.002, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.2988384988034787, | |
| "grad_norm": 0.008556556887924671, | |
| "learning_rate": 0.00031201037613488977, | |
| "loss": 0.0204, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.30117317457538084, | |
| "grad_norm": 0.02086860127747059, | |
| "learning_rate": 0.00031097276264591443, | |
| "loss": 0.001, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.303507850347283, | |
| "grad_norm": 0.016817396506667137, | |
| "learning_rate": 0.0003099351491569391, | |
| "loss": 0.0005, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.3058425261191852, | |
| "grad_norm": 0.038000259548425674, | |
| "learning_rate": 0.0003088975356679637, | |
| "loss": 0.0136, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.3081772018910874, | |
| "grad_norm": 0.08027376979589462, | |
| "learning_rate": 0.00030785992217898834, | |
| "loss": 0.0015, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.31051187766298954, | |
| "grad_norm": 0.009664513170719147, | |
| "learning_rate": 0.000306822308690013, | |
| "loss": 0.0007, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.3128465534348917, | |
| "grad_norm": 0.004226653836667538, | |
| "learning_rate": 0.0003057846952010376, | |
| "loss": 0.0002, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.3151812292067939, | |
| "grad_norm": 0.0029185679741203785, | |
| "learning_rate": 0.00030474708171206226, | |
| "loss": 0.0006, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.3175159049786961, | |
| "grad_norm": 0.13733680546283722, | |
| "learning_rate": 0.0003037094682230869, | |
| "loss": 0.0002, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.31985058075059825, | |
| "grad_norm": 0.0015662899240851402, | |
| "learning_rate": 0.0003026718547341116, | |
| "loss": 0.0, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.32218525652250046, | |
| "grad_norm": 0.009845585562288761, | |
| "learning_rate": 0.0003016342412451362, | |
| "loss": 0.0038, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.3245199322944026, | |
| "grad_norm": 0.0025016157887876034, | |
| "learning_rate": 0.00030059662775616084, | |
| "loss": 0.0023, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.3268546080663048, | |
| "grad_norm": 0.0021448610350489616, | |
| "learning_rate": 0.0002995590142671855, | |
| "loss": 0.0014, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.32918928383820695, | |
| "grad_norm": 0.0016592498868703842, | |
| "learning_rate": 0.00029852140077821015, | |
| "loss": 0.0031, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.33152395961010916, | |
| "grad_norm": 0.0032906723208725452, | |
| "learning_rate": 0.0002974837872892348, | |
| "loss": 0.0088, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.3338586353820113, | |
| "grad_norm": 0.007907208986580372, | |
| "learning_rate": 0.0002964461738002594, | |
| "loss": 0.0045, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.3361933111539135, | |
| "grad_norm": 0.004995182156562805, | |
| "learning_rate": 0.00029540856031128407, | |
| "loss": 0.0009, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.3385279869258157, | |
| "grad_norm": 0.004081010818481445, | |
| "learning_rate": 0.0002943709468223087, | |
| "loss": 0.0004, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.34086266269771787, | |
| "grad_norm": 0.003877257462590933, | |
| "learning_rate": 0.0002933333333333333, | |
| "loss": 0.0012, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.34319733846962003, | |
| "grad_norm": 0.002449814695864916, | |
| "learning_rate": 0.000292295719844358, | |
| "loss": 0.0002, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.3455320142415222, | |
| "grad_norm": 0.002753973240032792, | |
| "learning_rate": 0.00029125810635538264, | |
| "loss": 0.0002, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.3478666900134244, | |
| "grad_norm": 0.0017747861566022038, | |
| "learning_rate": 0.00029022049286640724, | |
| "loss": 0.0001, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.35020136578532657, | |
| "grad_norm": 0.0011892006732523441, | |
| "learning_rate": 0.0002891828793774319, | |
| "loss": 0.0004, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.35253604155722873, | |
| "grad_norm": 0.0011454072082415223, | |
| "learning_rate": 0.00028814526588845656, | |
| "loss": 0.0, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.3548707173291309, | |
| "grad_norm": 0.0009849355556070805, | |
| "learning_rate": 0.0002871076523994812, | |
| "loss": 0.0, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.3572053931010331, | |
| "grad_norm": 0.0009979312308132648, | |
| "learning_rate": 0.00028607003891050587, | |
| "loss": 0.0, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.35954006887293527, | |
| "grad_norm": 0.6208717226982117, | |
| "learning_rate": 0.00028503242542153053, | |
| "loss": 0.0012, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.36187474464483743, | |
| "grad_norm": 1.135374665260315, | |
| "learning_rate": 0.00028399481193255513, | |
| "loss": 0.0008, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.36420942041673965, | |
| "grad_norm": 0.0006218306953087449, | |
| "learning_rate": 0.0002829571984435798, | |
| "loss": 0.0001, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.3665440961886418, | |
| "grad_norm": 0.002007074421271682, | |
| "learning_rate": 0.00028191958495460444, | |
| "loss": 0.0013, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.36887877196054397, | |
| "grad_norm": 0.0011203576577827334, | |
| "learning_rate": 0.00028088197146562905, | |
| "loss": 0.0051, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.37121344773244613, | |
| "grad_norm": 0.004436762072145939, | |
| "learning_rate": 0.0002798443579766537, | |
| "loss": 0.0091, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.37354812350434835, | |
| "grad_norm": 0.4702003598213196, | |
| "learning_rate": 0.00027880674448767836, | |
| "loss": 0.0009, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.3758827992762505, | |
| "grad_norm": 0.013339284807443619, | |
| "learning_rate": 0.00027776913099870296, | |
| "loss": 0.0093, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.3782174750481527, | |
| "grad_norm": 0.0076319011859595776, | |
| "learning_rate": 0.0002767315175097276, | |
| "loss": 0.0025, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.3805521508200549, | |
| "grad_norm": 0.005932167172431946, | |
| "learning_rate": 0.0002756939040207523, | |
| "loss": 0.0004, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.38288682659195705, | |
| "grad_norm": 0.003560519078746438, | |
| "learning_rate": 0.00027465629053177693, | |
| "loss": 0.0003, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.3852215023638592, | |
| "grad_norm": 0.001825852901674807, | |
| "learning_rate": 0.0002736186770428016, | |
| "loss": 0.0003, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.3875561781357614, | |
| "grad_norm": 0.11422229558229446, | |
| "learning_rate": 0.00027258106355382625, | |
| "loss": 0.0006, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.3898908539076636, | |
| "grad_norm": 0.00336836208589375, | |
| "learning_rate": 0.00027154345006485085, | |
| "loss": 0.0002, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.39222552967956575, | |
| "grad_norm": 0.020830854773521423, | |
| "learning_rate": 0.0002705058365758755, | |
| "loss": 0.0003, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.3945602054514679, | |
| "grad_norm": 0.5437701940536499, | |
| "learning_rate": 0.00026946822308690017, | |
| "loss": 0.0006, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.3968948812233701, | |
| "grad_norm": 0.012015492655336857, | |
| "learning_rate": 0.00026843060959792477, | |
| "loss": 0.0001, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.3992295569952723, | |
| "grad_norm": 0.0010134581243619323, | |
| "learning_rate": 0.0002673929961089494, | |
| "loss": 0.0005, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.40156423276717446, | |
| "grad_norm": 0.001305864891037345, | |
| "learning_rate": 0.0002663553826199741, | |
| "loss": 0.0006, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.4038989085390766, | |
| "grad_norm": 0.0030054424423724413, | |
| "learning_rate": 0.0002653177691309987, | |
| "loss": 0.0, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.40623358431097883, | |
| "grad_norm": 0.0008168119820766151, | |
| "learning_rate": 0.00026428015564202334, | |
| "loss": 0.005, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.408568260082881, | |
| "grad_norm": 0.0005433742771856487, | |
| "learning_rate": 0.000263242542153048, | |
| "loss": 0.0022, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.41090293585478316, | |
| "grad_norm": 0.03222297504544258, | |
| "learning_rate": 0.00026220492866407266, | |
| "loss": 0.0001, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.4132376116266853, | |
| "grad_norm": 0.001766858738847077, | |
| "learning_rate": 0.0002611673151750973, | |
| "loss": 0.0001, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.41557228739858754, | |
| "grad_norm": 0.00046385781024582684, | |
| "learning_rate": 0.0002601297016861219, | |
| "loss": 0.0, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.4179069631704897, | |
| "grad_norm": 0.00034939011675305665, | |
| "learning_rate": 0.00025909208819714657, | |
| "loss": 0.0, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.42024163894239186, | |
| "grad_norm": 0.0006307600415311754, | |
| "learning_rate": 0.00025805447470817123, | |
| "loss": 0.0, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.4225763147142941, | |
| "grad_norm": 0.0005922391428612173, | |
| "learning_rate": 0.0002570168612191959, | |
| "loss": 0.0, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.42491099048619624, | |
| "grad_norm": 0.00035804559593088925, | |
| "learning_rate": 0.0002559792477302205, | |
| "loss": 0.0, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.4272456662580984, | |
| "grad_norm": 0.002265334827825427, | |
| "learning_rate": 0.00025494163424124515, | |
| "loss": 0.0, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.42958034203000056, | |
| "grad_norm": 0.00030151245300658047, | |
| "learning_rate": 0.0002539040207522698, | |
| "loss": 0.0001, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.4319150178019028, | |
| "grad_norm": 0.0007045480306260288, | |
| "learning_rate": 0.0002528664072632944, | |
| "loss": 0.0, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.43424969357380494, | |
| "grad_norm": 0.0061547341756522655, | |
| "learning_rate": 0.00025182879377431906, | |
| "loss": 0.0, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.4365843693457071, | |
| "grad_norm": 0.0003125610819552094, | |
| "learning_rate": 0.0002507911802853437, | |
| "loss": 0.0, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.4389190451176093, | |
| "grad_norm": 0.0002632684481795877, | |
| "learning_rate": 0.0002497535667963684, | |
| "loss": 0.0, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.4412537208895115, | |
| "grad_norm": 0.0003021568991243839, | |
| "learning_rate": 0.00024871595330739303, | |
| "loss": 0.0, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.44358839666141364, | |
| "grad_norm": 0.0002902685955632478, | |
| "learning_rate": 0.00024767833981841764, | |
| "loss": 0.0, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.4459230724333158, | |
| "grad_norm": 0.000261983135715127, | |
| "learning_rate": 0.0002466407263294423, | |
| "loss": 0.0001, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.448257748205218, | |
| "grad_norm": 0.00039503935840912163, | |
| "learning_rate": 0.00024560311284046695, | |
| "loss": 0.0, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.4505924239771202, | |
| "grad_norm": 0.00022091201390139759, | |
| "learning_rate": 0.0002445654993514916, | |
| "loss": 0.0, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.45292709974902234, | |
| "grad_norm": 2.493230104446411, | |
| "learning_rate": 0.0002435278858625162, | |
| "loss": 0.0371, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.4552617755209245, | |
| "grad_norm": 0.07945345342159271, | |
| "learning_rate": 0.00024249027237354084, | |
| "loss": 0.0034, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.4575964512928267, | |
| "grad_norm": 0.0050026909448206425, | |
| "learning_rate": 0.0002414526588845655, | |
| "loss": 0.0043, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.4599311270647289, | |
| "grad_norm": 0.006855088286101818, | |
| "learning_rate": 0.00024041504539559015, | |
| "loss": 0.0065, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.46226580283663105, | |
| "grad_norm": 0.007564285770058632, | |
| "learning_rate": 0.00023937743190661478, | |
| "loss": 0.025, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.46460047860853326, | |
| "grad_norm": 0.13361288607120514, | |
| "learning_rate": 0.00023833981841763944, | |
| "loss": 0.0058, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.4669351543804354, | |
| "grad_norm": 0.011933702044188976, | |
| "learning_rate": 0.0002373022049286641, | |
| "loss": 0.0034, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.4692698301523376, | |
| "grad_norm": 0.0047375899739563465, | |
| "learning_rate": 0.00023626459143968873, | |
| "loss": 0.0051, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.47160450592423975, | |
| "grad_norm": 0.012455107644200325, | |
| "learning_rate": 0.00023522697795071338, | |
| "loss": 0.0001, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.47393918169614196, | |
| "grad_norm": 0.003286924911662936, | |
| "learning_rate": 0.00023418936446173801, | |
| "loss": 0.0026, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.4762738574680441, | |
| "grad_norm": 0.004305190406739712, | |
| "learning_rate": 0.00023315175097276267, | |
| "loss": 0.0034, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.4786085332399463, | |
| "grad_norm": 0.003077031811699271, | |
| "learning_rate": 0.00023211413748378733, | |
| "loss": 0.0007, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.4809432090118485, | |
| "grad_norm": 0.003154961857944727, | |
| "learning_rate": 0.00023107652399481193, | |
| "loss": 0.0003, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.48327788478375067, | |
| "grad_norm": 0.0026587171014398336, | |
| "learning_rate": 0.00023003891050583656, | |
| "loss": 0.0001, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.48561256055565283, | |
| "grad_norm": 0.018204033374786377, | |
| "learning_rate": 0.00022900129701686122, | |
| "loss": 0.0002, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.487947236327555, | |
| "grad_norm": 0.013659532181918621, | |
| "learning_rate": 0.00022796368352788588, | |
| "loss": 0.0001, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.4902819120994572, | |
| "grad_norm": 0.0016216342337429523, | |
| "learning_rate": 0.0002269260700389105, | |
| "loss": 0.0092, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.49261658787135937, | |
| "grad_norm": 0.13890917599201202, | |
| "learning_rate": 0.00022588845654993516, | |
| "loss": 0.0006, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.49495126364326153, | |
| "grad_norm": 0.004591196309775114, | |
| "learning_rate": 0.0002248508430609598, | |
| "loss": 0.0014, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.49728593941516375, | |
| "grad_norm": 0.0019425478531047702, | |
| "learning_rate": 0.00022381322957198445, | |
| "loss": 0.0001, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.4996206151870659, | |
| "grad_norm": 0.016683168709278107, | |
| "learning_rate": 0.0002227756160830091, | |
| "loss": 0.0042, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.5019552909589681, | |
| "grad_norm": 0.0052938396111130714, | |
| "learning_rate": 0.00022173800259403374, | |
| "loss": 0.0005, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.5042899667308702, | |
| "grad_norm": 0.0712481215596199, | |
| "learning_rate": 0.0002207003891050584, | |
| "loss": 0.0003, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.5066246425027724, | |
| "grad_norm": 0.002925017150118947, | |
| "learning_rate": 0.00021966277561608305, | |
| "loss": 0.005, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.5089593182746746, | |
| "grad_norm": 0.06688928604125977, | |
| "learning_rate": 0.00021862516212710765, | |
| "loss": 0.0108, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.5112939940465768, | |
| "grad_norm": 0.007370030973106623, | |
| "learning_rate": 0.00021758754863813228, | |
| "loss": 0.0005, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.513628669818479, | |
| "grad_norm": 0.006824078969657421, | |
| "learning_rate": 0.00021654993514915694, | |
| "loss": 0.0002, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.5159633455903812, | |
| "grad_norm": 0.004564494825899601, | |
| "learning_rate": 0.00021551232166018157, | |
| "loss": 0.0001, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.5182980213622833, | |
| "grad_norm": 0.0024187033995985985, | |
| "learning_rate": 0.00021447470817120623, | |
| "loss": 0.0003, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.5206326971341855, | |
| "grad_norm": 0.0019623206462711096, | |
| "learning_rate": 0.00021343709468223088, | |
| "loss": 0.0004, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.5229673729060876, | |
| "grad_norm": 0.006479791831225157, | |
| "learning_rate": 0.0002123994811932555, | |
| "loss": 0.0009, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.5253020486779898, | |
| "grad_norm": 0.001472059520892799, | |
| "learning_rate": 0.00021136186770428017, | |
| "loss": 0.0022, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.5276367244498921, | |
| "grad_norm": 0.0011262299958616495, | |
| "learning_rate": 0.00021032425421530483, | |
| "loss": 0.0, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.5299714002217942, | |
| "grad_norm": 0.0010004049399867654, | |
| "learning_rate": 0.00020928664072632946, | |
| "loss": 0.0034, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.5323060759936964, | |
| "grad_norm": 0.010012038052082062, | |
| "learning_rate": 0.00020824902723735411, | |
| "loss": 0.0003, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.5346407517655986, | |
| "grad_norm": 0.002231718273833394, | |
| "learning_rate": 0.00020721141374837877, | |
| "loss": 0.0003, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.5369754275375007, | |
| "grad_norm": 0.0018004688899964094, | |
| "learning_rate": 0.00020617380025940337, | |
| "loss": 0.0001, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.5393101033094029, | |
| "grad_norm": 0.0008523733704350889, | |
| "learning_rate": 0.000205136186770428, | |
| "loss": 0.0001, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.541644779081305, | |
| "grad_norm": 0.0011259455932304263, | |
| "learning_rate": 0.00020409857328145266, | |
| "loss": 0.0, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.5439794548532073, | |
| "grad_norm": 0.0006843574810773134, | |
| "learning_rate": 0.0002030609597924773, | |
| "loss": 0.0, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.5463141306251095, | |
| "grad_norm": 0.0007879248005338013, | |
| "learning_rate": 0.00020202334630350195, | |
| "loss": 0.0, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.5486488063970116, | |
| "grad_norm": 0.0011782748624682426, | |
| "learning_rate": 0.0002009857328145266, | |
| "loss": 0.0001, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.5509834821689138, | |
| "grad_norm": 0.004246284253895283, | |
| "learning_rate": 0.00019994811932555123, | |
| "loss": 0.0163, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.553318157940816, | |
| "grad_norm": 0.7899481058120728, | |
| "learning_rate": 0.0001989105058365759, | |
| "loss": 0.0054, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.5556528337127181, | |
| "grad_norm": 0.0048600370064377785, | |
| "learning_rate": 0.00019787289234760055, | |
| "loss": 0.0001, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.5579875094846203, | |
| "grad_norm": 0.2455766797065735, | |
| "learning_rate": 0.00019683527885862515, | |
| "loss": 0.0009, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.5603221852565226, | |
| "grad_norm": 0.004527187906205654, | |
| "learning_rate": 0.0001957976653696498, | |
| "loss": 0.0005, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.5626568610284247, | |
| "grad_norm": 0.003127218456938863, | |
| "learning_rate": 0.00019476005188067446, | |
| "loss": 0.0001, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.5649915368003269, | |
| "grad_norm": 0.0033744657412171364, | |
| "learning_rate": 0.0001937224383916991, | |
| "loss": 0.0008, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.567326212572229, | |
| "grad_norm": 0.0021291917655617, | |
| "learning_rate": 0.00019268482490272375, | |
| "loss": 0.0001, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.5696608883441312, | |
| "grad_norm": 0.002303266664966941, | |
| "learning_rate": 0.0001916472114137484, | |
| "loss": 0.0001, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.5719955641160334, | |
| "grad_norm": 0.5721760392189026, | |
| "learning_rate": 0.000190609597924773, | |
| "loss": 0.011, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.5743302398879355, | |
| "grad_norm": 1.1442689895629883, | |
| "learning_rate": 0.00018957198443579767, | |
| "loss": 0.0065, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.5766649156598377, | |
| "grad_norm": 0.03165394440293312, | |
| "learning_rate": 0.00018853437094682233, | |
| "loss": 0.001, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.57899959143174, | |
| "grad_norm": 0.007602803409099579, | |
| "learning_rate": 0.00018749675745784696, | |
| "loss": 0.0132, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.5813342672036421, | |
| "grad_norm": 0.026837633922696114, | |
| "learning_rate": 0.0001864591439688716, | |
| "loss": 0.001, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.5836689429755443, | |
| "grad_norm": 0.024656491354107857, | |
| "learning_rate": 0.00018542153047989624, | |
| "loss": 0.0018, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.5860036187474464, | |
| "grad_norm": 0.011152198538184166, | |
| "learning_rate": 0.0001843839169909209, | |
| "loss": 0.0004, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.5883382945193486, | |
| "grad_norm": 0.010260018520057201, | |
| "learning_rate": 0.00018334630350194553, | |
| "loss": 0.0002, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.5906729702912508, | |
| "grad_norm": 0.003675712738186121, | |
| "learning_rate": 0.00018230869001297019, | |
| "loss": 0.0025, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.5930076460631529, | |
| "grad_norm": 0.00440176110714674, | |
| "learning_rate": 0.00018127107652399482, | |
| "loss": 0.0001, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.5953423218350552, | |
| "grad_norm": 0.004901622422039509, | |
| "learning_rate": 0.00018023346303501947, | |
| "loss": 0.0019, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.5976769976069574, | |
| "grad_norm": 0.0028110845014452934, | |
| "learning_rate": 0.0001791958495460441, | |
| "loss": 0.0009, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.6000116733788595, | |
| "grad_norm": 0.0025059175677597523, | |
| "learning_rate": 0.00017815823605706876, | |
| "loss": 0.0, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.6023463491507617, | |
| "grad_norm": 0.0019852565601468086, | |
| "learning_rate": 0.0001771206225680934, | |
| "loss": 0.0, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.6046810249226638, | |
| "grad_norm": 0.0014332541031762958, | |
| "learning_rate": 0.00017608300907911802, | |
| "loss": 0.0004, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.607015700694566, | |
| "grad_norm": 0.32902491092681885, | |
| "learning_rate": 0.00017504539559014268, | |
| "loss": 0.0005, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.6093503764664682, | |
| "grad_norm": 0.0036561412271112204, | |
| "learning_rate": 0.00017400778210116733, | |
| "loss": 0.0011, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.6116850522383704, | |
| "grad_norm": 0.06626530736684799, | |
| "learning_rate": 0.00017297016861219196, | |
| "loss": 0.0001, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.6140197280102726, | |
| "grad_norm": 0.0011569494381546974, | |
| "learning_rate": 0.00017193255512321662, | |
| "loss": 0.0041, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 0.6163544037821748, | |
| "grad_norm": 0.002706947736442089, | |
| "learning_rate": 0.00017089494163424125, | |
| "loss": 0.0111, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.6186890795540769, | |
| "grad_norm": 0.0033384524285793304, | |
| "learning_rate": 0.00016985732814526588, | |
| "loss": 0.0003, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.6210237553259791, | |
| "grad_norm": 0.0037459495943039656, | |
| "learning_rate": 0.00016881971465629054, | |
| "loss": 0.0001, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.6233584310978812, | |
| "grad_norm": 0.0036509244237095118, | |
| "learning_rate": 0.0001677821011673152, | |
| "loss": 0.0001, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 0.6256931068697834, | |
| "grad_norm": 0.004108482040464878, | |
| "learning_rate": 0.00016674448767833982, | |
| "loss": 0.0002, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.6280277826416857, | |
| "grad_norm": 0.003080847905948758, | |
| "learning_rate": 0.00016570687418936448, | |
| "loss": 0.0001, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 0.6303624584135878, | |
| "grad_norm": 0.0028391852974891663, | |
| "learning_rate": 0.0001646692607003891, | |
| "loss": 0.0002, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.63269713418549, | |
| "grad_norm": 0.001830106251873076, | |
| "learning_rate": 0.00016363164721141374, | |
| "loss": 0.0004, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 0.6350318099573922, | |
| "grad_norm": 0.0024860044941306114, | |
| "learning_rate": 0.0001625940337224384, | |
| "loss": 0.0001, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 0.6373664857292943, | |
| "grad_norm": 0.0016177381621673703, | |
| "learning_rate": 0.00016155642023346305, | |
| "loss": 0.0003, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 0.6397011615011965, | |
| "grad_norm": 0.0024260838981717825, | |
| "learning_rate": 0.00016051880674448768, | |
| "loss": 0.0004, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 0.6420358372730987, | |
| "grad_norm": 0.038342151790857315, | |
| "learning_rate": 0.00015948119325551234, | |
| "loss": 0.0003, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.6443705130450009, | |
| "grad_norm": 0.0013671324122697115, | |
| "learning_rate": 0.00015844357976653697, | |
| "loss": 0.0055, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 0.6467051888169031, | |
| "grad_norm": 0.0012879414716735482, | |
| "learning_rate": 0.0001574059662775616, | |
| "loss": 0.0, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 0.6490398645888052, | |
| "grad_norm": 0.0014536501839756966, | |
| "learning_rate": 0.00015636835278858626, | |
| "loss": 0.0, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 0.6513745403607074, | |
| "grad_norm": 0.8653482794761658, | |
| "learning_rate": 0.00015533073929961092, | |
| "loss": 0.002, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 0.6537092161326096, | |
| "grad_norm": 0.0016417702427133918, | |
| "learning_rate": 0.00015429312581063555, | |
| "loss": 0.0003, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.6560438919045117, | |
| "grad_norm": 0.07089340686798096, | |
| "learning_rate": 0.0001532555123216602, | |
| "loss": 0.0131, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 0.6583785676764139, | |
| "grad_norm": 0.0033747514244168997, | |
| "learning_rate": 0.00015221789883268483, | |
| "loss": 0.0028, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 0.6607132434483162, | |
| "grad_norm": 0.006389171350747347, | |
| "learning_rate": 0.00015118028534370946, | |
| "loss": 0.0002, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 0.6630479192202183, | |
| "grad_norm": 0.0026964943390339613, | |
| "learning_rate": 0.00015014267185473412, | |
| "loss": 0.0002, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 0.6653825949921205, | |
| "grad_norm": 0.002625943860039115, | |
| "learning_rate": 0.00014910505836575878, | |
| "loss": 0.0004, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.6677172707640227, | |
| "grad_norm": 0.003076399676501751, | |
| "learning_rate": 0.0001480674448767834, | |
| "loss": 0.0001, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 0.6700519465359248, | |
| "grad_norm": 0.004514554515480995, | |
| "learning_rate": 0.00014702983138780806, | |
| "loss": 0.0001, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 0.672386622307827, | |
| "grad_norm": 0.0014996561221778393, | |
| "learning_rate": 0.0001459922178988327, | |
| "loss": 0.0002, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 0.6747212980797291, | |
| "grad_norm": 0.0015088297659531236, | |
| "learning_rate": 0.00014495460440985732, | |
| "loss": 0.0003, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 0.6770559738516314, | |
| "grad_norm": 0.0016345715848729014, | |
| "learning_rate": 0.00014391699092088198, | |
| "loss": 0.0, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.6793906496235336, | |
| "grad_norm": 0.0027825534343719482, | |
| "learning_rate": 0.00014287937743190664, | |
| "loss": 0.0008, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 0.6817253253954357, | |
| "grad_norm": 0.0012849323684349656, | |
| "learning_rate": 0.00014184176394293127, | |
| "loss": 0.0, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 0.6840600011673379, | |
| "grad_norm": 0.0021583992056548595, | |
| "learning_rate": 0.00014080415045395592, | |
| "loss": 0.0, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 0.6863946769392401, | |
| "grad_norm": 0.0012560015311464667, | |
| "learning_rate": 0.00013976653696498055, | |
| "loss": 0.0001, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 0.6887293527111422, | |
| "grad_norm": 0.0009112095576710999, | |
| "learning_rate": 0.00013872892347600518, | |
| "loss": 0.0001, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.6910640284830444, | |
| "grad_norm": 0.0013899313053116202, | |
| "learning_rate": 0.00013769130998702984, | |
| "loss": 0.0, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 0.6933987042549465, | |
| "grad_norm": 1.1169312000274658, | |
| "learning_rate": 0.0001366536964980545, | |
| "loss": 0.005, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 0.6957333800268488, | |
| "grad_norm": 0.0009174107108265162, | |
| "learning_rate": 0.00013561608300907913, | |
| "loss": 0.0009, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 0.698068055798751, | |
| "grad_norm": 0.0038010200951248407, | |
| "learning_rate": 0.00013457846952010378, | |
| "loss": 0.0071, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 0.7004027315706531, | |
| "grad_norm": 0.002235995838418603, | |
| "learning_rate": 0.00013354085603112841, | |
| "loss": 0.0002, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.7027374073425553, | |
| "grad_norm": 0.039830174297094345, | |
| "learning_rate": 0.00013250324254215304, | |
| "loss": 0.0004, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 0.7050720831144575, | |
| "grad_norm": 0.30222392082214355, | |
| "learning_rate": 0.0001314656290531777, | |
| "loss": 0.0008, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 0.7074067588863596, | |
| "grad_norm": 0.47657474875450134, | |
| "learning_rate": 0.00013042801556420233, | |
| "loss": 0.0008, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 0.7097414346582618, | |
| "grad_norm": 0.0009529945673421025, | |
| "learning_rate": 0.000129390402075227, | |
| "loss": 0.0, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 0.712076110430164, | |
| "grad_norm": 0.00109247793443501, | |
| "learning_rate": 0.00012835278858625164, | |
| "loss": 0.0004, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.7144107862020662, | |
| "grad_norm": 0.0016496065072715282, | |
| "learning_rate": 0.00012731517509727627, | |
| "loss": 0.0014, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 0.7167454619739684, | |
| "grad_norm": 0.0007458662148565054, | |
| "learning_rate": 0.0001262775616083009, | |
| "loss": 0.0, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 0.7190801377458705, | |
| "grad_norm": 0.0010477920295670629, | |
| "learning_rate": 0.00012523994811932556, | |
| "loss": 0.0, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 0.7214148135177727, | |
| "grad_norm": 0.0039003838319331408, | |
| "learning_rate": 0.0001242023346303502, | |
| "loss": 0.0098, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 0.7237494892896749, | |
| "grad_norm": 0.6328915953636169, | |
| "learning_rate": 0.00012316472114137485, | |
| "loss": 0.0048, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.726084165061577, | |
| "grad_norm": 0.0023845217656344175, | |
| "learning_rate": 0.0001221271076523995, | |
| "loss": 0.0001, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 0.7284188408334793, | |
| "grad_norm": 0.005935149732977152, | |
| "learning_rate": 0.00012108949416342412, | |
| "loss": 0.0003, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 0.7307535166053815, | |
| "grad_norm": 0.002948681591078639, | |
| "learning_rate": 0.00012005188067444876, | |
| "loss": 0.0002, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 0.7330881923772836, | |
| "grad_norm": 1.1137011051177979, | |
| "learning_rate": 0.00011901426718547342, | |
| "loss": 0.0072, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 0.7354228681491858, | |
| "grad_norm": 0.03960300236940384, | |
| "learning_rate": 0.00011797665369649807, | |
| "loss": 0.0064, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.7377575439210879, | |
| "grad_norm": 0.004956856369972229, | |
| "learning_rate": 0.00011693904020752271, | |
| "loss": 0.0048, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 0.7400922196929901, | |
| "grad_norm": 0.0028774456586688757, | |
| "learning_rate": 0.00011590142671854735, | |
| "loss": 0.001, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 0.7424268954648923, | |
| "grad_norm": 0.05007918179035187, | |
| "learning_rate": 0.00011486381322957198, | |
| "loss": 0.0014, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 0.7447615712367945, | |
| "grad_norm": 0.004933805204927921, | |
| "learning_rate": 0.00011382619974059663, | |
| "loss": 0.0001, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 0.7470962470086967, | |
| "grad_norm": 0.0028584490064531565, | |
| "learning_rate": 0.00011278858625162127, | |
| "loss": 0.011, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.7494309227805989, | |
| "grad_norm": 0.015388348139822483, | |
| "learning_rate": 0.00011175097276264593, | |
| "loss": 0.0004, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 0.751765598552501, | |
| "grad_norm": 0.04148218780755997, | |
| "learning_rate": 0.00011071335927367057, | |
| "loss": 0.0015, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 0.7541002743244032, | |
| "grad_norm": 0.6182008981704712, | |
| "learning_rate": 0.00010967574578469521, | |
| "loss": 0.0036, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 0.7564349500963053, | |
| "grad_norm": 0.0034724888391792774, | |
| "learning_rate": 0.00010863813229571984, | |
| "loss": 0.0002, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 0.7587696258682075, | |
| "grad_norm": 0.019181331619620323, | |
| "learning_rate": 0.00010760051880674449, | |
| "loss": 0.0002, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.7611043016401098, | |
| "grad_norm": 0.007054260466247797, | |
| "learning_rate": 0.00010656290531776913, | |
| "loss": 0.0004, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 0.7634389774120119, | |
| "grad_norm": 0.0026721367612481117, | |
| "learning_rate": 0.00010552529182879379, | |
| "loss": 0.0001, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 0.7657736531839141, | |
| "grad_norm": 0.001824371051043272, | |
| "learning_rate": 0.00010448767833981843, | |
| "loss": 0.0002, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 0.7681083289558163, | |
| "grad_norm": 0.2344302535057068, | |
| "learning_rate": 0.00010345006485084307, | |
| "loss": 0.0004, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 0.7704430047277184, | |
| "grad_norm": 0.0015703398967161775, | |
| "learning_rate": 0.0001024124513618677, | |
| "loss": 0.0, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.7727776804996206, | |
| "grad_norm": 0.0013199965469539165, | |
| "learning_rate": 0.00010137483787289235, | |
| "loss": 0.0, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 0.7751123562715228, | |
| "grad_norm": 0.001341565977782011, | |
| "learning_rate": 0.00010033722438391699, | |
| "loss": 0.0, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 0.777447032043425, | |
| "grad_norm": 0.0012060283916071057, | |
| "learning_rate": 9.929961089494165e-05, | |
| "loss": 0.0001, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 0.7797817078153272, | |
| "grad_norm": 0.0020445692352950573, | |
| "learning_rate": 9.826199740596628e-05, | |
| "loss": 0.0, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 0.7821163835872293, | |
| "grad_norm": 0.0010797139257192612, | |
| "learning_rate": 9.722438391699092e-05, | |
| "loss": 0.0, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.7844510593591315, | |
| "grad_norm": 0.0013513348530977964, | |
| "learning_rate": 9.618677042801558e-05, | |
| "loss": 0.0, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 0.7867857351310337, | |
| "grad_norm": 0.0008800049545243382, | |
| "learning_rate": 9.514915693904021e-05, | |
| "loss": 0.0, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 0.7891204109029358, | |
| "grad_norm": 0.001039789873175323, | |
| "learning_rate": 9.411154345006485e-05, | |
| "loss": 0.0, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 0.791455086674838, | |
| "grad_norm": 0.0011056034127250314, | |
| "learning_rate": 9.307392996108951e-05, | |
| "loss": 0.0, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 0.7937897624467402, | |
| "grad_norm": 0.00087336590513587, | |
| "learning_rate": 9.203631647211414e-05, | |
| "loss": 0.0005, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.7961244382186424, | |
| "grad_norm": 0.0016204583225771785, | |
| "learning_rate": 9.099870298313878e-05, | |
| "loss": 0.0, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 0.7984591139905446, | |
| "grad_norm": 0.0010950096184387803, | |
| "learning_rate": 8.996108949416342e-05, | |
| "loss": 0.0, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 0.8007937897624468, | |
| "grad_norm": 0.0011948348255828023, | |
| "learning_rate": 8.892347600518807e-05, | |
| "loss": 0.0037, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 0.8031284655343489, | |
| "grad_norm": 0.009840068407356739, | |
| "learning_rate": 8.788586251621271e-05, | |
| "loss": 0.0001, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 0.8054631413062511, | |
| "grad_norm": 0.0017067514127120376, | |
| "learning_rate": 8.684824902723735e-05, | |
| "loss": 0.0001, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.8077978170781532, | |
| "grad_norm": 0.0011140963761135936, | |
| "learning_rate": 8.5810635538262e-05, | |
| "loss": 0.004, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 0.8101324928500554, | |
| "grad_norm": 0.7195191979408264, | |
| "learning_rate": 8.477302204928664e-05, | |
| "loss": 0.0035, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 0.8124671686219577, | |
| "grad_norm": 0.0012634329032152891, | |
| "learning_rate": 8.373540856031128e-05, | |
| "loss": 0.0001, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 0.8148018443938598, | |
| "grad_norm": 0.0016726938774809241, | |
| "learning_rate": 8.269779507133593e-05, | |
| "loss": 0.0006, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 0.817136520165762, | |
| "grad_norm": 0.0019955493044108152, | |
| "learning_rate": 8.166018158236057e-05, | |
| "loss": 0.0006, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.8194711959376642, | |
| "grad_norm": 0.0008943151333369315, | |
| "learning_rate": 8.062256809338522e-05, | |
| "loss": 0.0006, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 0.8218058717095663, | |
| "grad_norm": 0.0013045528903603554, | |
| "learning_rate": 7.958495460440986e-05, | |
| "loss": 0.0002, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 0.8241405474814685, | |
| "grad_norm": 0.0010028982069343328, | |
| "learning_rate": 7.85473411154345e-05, | |
| "loss": 0.0, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 0.8264752232533706, | |
| "grad_norm": 0.0007102734525687993, | |
| "learning_rate": 7.750972762645915e-05, | |
| "loss": 0.0001, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 0.8288098990252729, | |
| "grad_norm": 0.0014275240246206522, | |
| "learning_rate": 7.647211413748379e-05, | |
| "loss": 0.0, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.8311445747971751, | |
| "grad_norm": 0.0009326430154033005, | |
| "learning_rate": 7.543450064850843e-05, | |
| "loss": 0.0, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 0.8334792505690772, | |
| "grad_norm": 0.0008573593804612756, | |
| "learning_rate": 7.439688715953308e-05, | |
| "loss": 0.0001, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 0.8358139263409794, | |
| "grad_norm": 0.0015563720371574163, | |
| "learning_rate": 7.335927367055772e-05, | |
| "loss": 0.0002, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 0.8381486021128816, | |
| "grad_norm": 0.0008948877803049982, | |
| "learning_rate": 7.232166018158236e-05, | |
| "loss": 0.0, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 0.8404832778847837, | |
| "grad_norm": 0.0015601961640641093, | |
| "learning_rate": 7.1284046692607e-05, | |
| "loss": 0.0, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.8428179536566859, | |
| "grad_norm": 0.0013114233734086156, | |
| "learning_rate": 7.024643320363165e-05, | |
| "loss": 0.0073, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 0.8451526294285882, | |
| "grad_norm": 0.00176639249548316, | |
| "learning_rate": 6.920881971465629e-05, | |
| "loss": 0.0037, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 0.8474873052004903, | |
| "grad_norm": 0.002710576867684722, | |
| "learning_rate": 6.817120622568094e-05, | |
| "loss": 0.0001, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 0.8498219809723925, | |
| "grad_norm": 0.13074593245983124, | |
| "learning_rate": 6.713359273670558e-05, | |
| "loss": 0.001, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 0.8521566567442946, | |
| "grad_norm": 0.002523267176002264, | |
| "learning_rate": 6.609597924773022e-05, | |
| "loss": 0.0001, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.8544913325161968, | |
| "grad_norm": 0.002858164021745324, | |
| "learning_rate": 6.505836575875487e-05, | |
| "loss": 0.0003, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 0.856826008288099, | |
| "grad_norm": 0.002222646027803421, | |
| "learning_rate": 6.402075226977951e-05, | |
| "loss": 0.0006, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 0.8591606840600011, | |
| "grad_norm": 0.03722568228840828, | |
| "learning_rate": 6.298313878080415e-05, | |
| "loss": 0.0001, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 0.8614953598319034, | |
| "grad_norm": 0.0012012380175292492, | |
| "learning_rate": 6.19455252918288e-05, | |
| "loss": 0.0, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 0.8638300356038056, | |
| "grad_norm": 0.0019116230541840196, | |
| "learning_rate": 6.0907911802853433e-05, | |
| "loss": 0.0, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.8661647113757077, | |
| "grad_norm": 0.0011818850180134177, | |
| "learning_rate": 5.9870298313878084e-05, | |
| "loss": 0.0001, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 0.8684993871476099, | |
| "grad_norm": 0.0008876454085111618, | |
| "learning_rate": 5.883268482490273e-05, | |
| "loss": 0.0001, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 0.870834062919512, | |
| "grad_norm": 0.0011559055419638753, | |
| "learning_rate": 5.7795071335927364e-05, | |
| "loss": 0.0013, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 0.8731687386914142, | |
| "grad_norm": 0.0008210024680010974, | |
| "learning_rate": 5.6757457846952014e-05, | |
| "loss": 0.0001, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 0.8755034144633164, | |
| "grad_norm": 0.0019268837058916688, | |
| "learning_rate": 5.571984435797666e-05, | |
| "loss": 0.0001, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.8778380902352186, | |
| "grad_norm": 0.40103089809417725, | |
| "learning_rate": 5.4682230869001294e-05, | |
| "loss": 0.0006, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 0.8801727660071208, | |
| "grad_norm": 0.002693564398214221, | |
| "learning_rate": 5.3644617380025944e-05, | |
| "loss": 0.002, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 0.882507441779023, | |
| "grad_norm": 0.11337973922491074, | |
| "learning_rate": 5.260700389105059e-05, | |
| "loss": 0.0002, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 0.8848421175509251, | |
| "grad_norm": 0.000948163156863302, | |
| "learning_rate": 5.156939040207524e-05, | |
| "loss": 0.01, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 0.8871767933228273, | |
| "grad_norm": 0.0012289845617488027, | |
| "learning_rate": 5.053177691309987e-05, | |
| "loss": 0.0051, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.8895114690947294, | |
| "grad_norm": 1.0166712999343872, | |
| "learning_rate": 4.949416342412452e-05, | |
| "loss": 0.0009, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 0.8918461448666316, | |
| "grad_norm": 0.0020474784541875124, | |
| "learning_rate": 4.845654993514916e-05, | |
| "loss": 0.0003, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 0.8941808206385338, | |
| "grad_norm": 0.0022713476791977882, | |
| "learning_rate": 4.74189364461738e-05, | |
| "loss": 0.0001, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 0.896515496410436, | |
| "grad_norm": 0.004341310355812311, | |
| "learning_rate": 4.638132295719845e-05, | |
| "loss": 0.0, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 0.8988501721823382, | |
| "grad_norm": 0.0015770102618262172, | |
| "learning_rate": 4.534370946822309e-05, | |
| "loss": 0.0001, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 0.9011848479542404, | |
| "grad_norm": 0.012255331501364708, | |
| "learning_rate": 4.430609597924773e-05, | |
| "loss": 0.0001, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 0.9035195237261425, | |
| "grad_norm": 0.02712065726518631, | |
| "learning_rate": 4.326848249027238e-05, | |
| "loss": 0.0001, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 0.9058541994980447, | |
| "grad_norm": 0.001247554668225348, | |
| "learning_rate": 4.223086900129702e-05, | |
| "loss": 0.0, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 0.9081888752699468, | |
| "grad_norm": 0.0011685139033943415, | |
| "learning_rate": 4.119325551232166e-05, | |
| "loss": 0.0, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 0.910523551041849, | |
| "grad_norm": 0.0018253360176458955, | |
| "learning_rate": 4.015564202334631e-05, | |
| "loss": 0.0, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.9128582268137513, | |
| "grad_norm": 0.0008875974453985691, | |
| "learning_rate": 3.9118028534370945e-05, | |
| "loss": 0.0, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 0.9151929025856534, | |
| "grad_norm": 0.0011628433130681515, | |
| "learning_rate": 3.808041504539559e-05, | |
| "loss": 0.0001, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 0.9175275783575556, | |
| "grad_norm": 0.001058564055711031, | |
| "learning_rate": 3.704280155642024e-05, | |
| "loss": 0.0, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 0.9198622541294578, | |
| "grad_norm": 0.0010234726360067725, | |
| "learning_rate": 3.6005188067444876e-05, | |
| "loss": 0.0, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 0.9221969299013599, | |
| "grad_norm": 0.0009812922216951847, | |
| "learning_rate": 3.496757457846952e-05, | |
| "loss": 0.0004, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 0.9245316056732621, | |
| "grad_norm": 0.0009394401567988098, | |
| "learning_rate": 3.392996108949417e-05, | |
| "loss": 0.0, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 0.9268662814451643, | |
| "grad_norm": 0.0009512811666354537, | |
| "learning_rate": 3.2892347600518806e-05, | |
| "loss": 0.0001, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 0.9292009572170665, | |
| "grad_norm": 0.0007261955761350691, | |
| "learning_rate": 3.185473411154345e-05, | |
| "loss": 0.0, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 0.9315356329889687, | |
| "grad_norm": 0.0010610457975417376, | |
| "learning_rate": 3.08171206225681e-05, | |
| "loss": 0.0, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 0.9338703087608708, | |
| "grad_norm": 0.0012513543479144573, | |
| "learning_rate": 2.9779507133592736e-05, | |
| "loss": 0.0, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.936204984532773, | |
| "grad_norm": 0.0014717354206368327, | |
| "learning_rate": 2.874189364461738e-05, | |
| "loss": 0.0011, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 0.9385396603046752, | |
| "grad_norm": 0.0008392130257561803, | |
| "learning_rate": 2.7704280155642027e-05, | |
| "loss": 0.0, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 0.9408743360765773, | |
| "grad_norm": 0.0015690367436036468, | |
| "learning_rate": 2.6666666666666667e-05, | |
| "loss": 0.0, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 0.9432090118484795, | |
| "grad_norm": 0.0011248665396124125, | |
| "learning_rate": 2.562905317769131e-05, | |
| "loss": 0.002, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 0.9455436876203818, | |
| "grad_norm": 0.0010001506889238954, | |
| "learning_rate": 2.4591439688715953e-05, | |
| "loss": 0.0, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 0.9478783633922839, | |
| "grad_norm": 0.001105117262341082, | |
| "learning_rate": 2.3553826199740597e-05, | |
| "loss": 0.0, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 0.9502130391641861, | |
| "grad_norm": 0.010793734341859818, | |
| "learning_rate": 2.251621271076524e-05, | |
| "loss": 0.0001, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 0.9525477149360883, | |
| "grad_norm": 0.0010343483882024884, | |
| "learning_rate": 2.1478599221789884e-05, | |
| "loss": 0.0001, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 0.9548823907079904, | |
| "grad_norm": 0.0006852949154563248, | |
| "learning_rate": 2.0440985732814527e-05, | |
| "loss": 0.0, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 0.9572170664798926, | |
| "grad_norm": 0.001185077242553234, | |
| "learning_rate": 1.940337224383917e-05, | |
| "loss": 0.0, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.9595517422517947, | |
| "grad_norm": 0.055520687252283096, | |
| "learning_rate": 1.8365758754863814e-05, | |
| "loss": 0.0001, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 0.961886418023697, | |
| "grad_norm": 0.0012979560997337103, | |
| "learning_rate": 1.7328145265888457e-05, | |
| "loss": 0.0, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 0.9642210937955992, | |
| "grad_norm": 0.0013245136942714453, | |
| "learning_rate": 1.62905317769131e-05, | |
| "loss": 0.0, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 0.9665557695675013, | |
| "grad_norm": 0.0013994915643706918, | |
| "learning_rate": 1.5252918287937746e-05, | |
| "loss": 0.002, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 0.9688904453394035, | |
| "grad_norm": 0.6272192001342773, | |
| "learning_rate": 1.4215304798962386e-05, | |
| "loss": 0.0013, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 0.9712251211113057, | |
| "grad_norm": 0.0012450398644432425, | |
| "learning_rate": 1.3177691309987031e-05, | |
| "loss": 0.0002, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 0.9735597968832078, | |
| "grad_norm": 0.0009830017806962132, | |
| "learning_rate": 1.2140077821011673e-05, | |
| "loss": 0.0001, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 0.97589447265511, | |
| "grad_norm": 0.0007283110171556473, | |
| "learning_rate": 1.1102464332036316e-05, | |
| "loss": 0.0, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 0.9782291484270123, | |
| "grad_norm": 0.0008772446890361607, | |
| "learning_rate": 1.006485084306096e-05, | |
| "loss": 0.0, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 0.9805638241989144, | |
| "grad_norm": 0.0007983844261616468, | |
| "learning_rate": 9.027237354085603e-06, | |
| "loss": 0.0002, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.9828984999708166, | |
| "grad_norm": 0.0045978049747645855, | |
| "learning_rate": 7.989623865110247e-06, | |
| "loss": 0.0001, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 0.9852331757427187, | |
| "grad_norm": 0.0006691565504297614, | |
| "learning_rate": 6.95201037613489e-06, | |
| "loss": 0.0, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 0.9875678515146209, | |
| "grad_norm": 0.000847226707264781, | |
| "learning_rate": 5.9143968871595335e-06, | |
| "loss": 0.0, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 0.9899025272865231, | |
| "grad_norm": 0.0008708458044566214, | |
| "learning_rate": 4.876783398184177e-06, | |
| "loss": 0.0, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 0.9922372030584252, | |
| "grad_norm": 0.001017833361402154, | |
| "learning_rate": 3.83916990920882e-06, | |
| "loss": 0.0, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 0.9945718788303275, | |
| "grad_norm": 0.001224992680363357, | |
| "learning_rate": 2.8015564202334633e-06, | |
| "loss": 0.0, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 0.9969065546022297, | |
| "grad_norm": 0.0007001100457273424, | |
| "learning_rate": 1.7639429312581063e-06, | |
| "loss": 0.0001, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 0.9992412303741318, | |
| "grad_norm": 0.0008271584520116448, | |
| "learning_rate": 7.263294422827498e-07, | |
| "loss": 0.0001, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.9999124458258547, | |
| "eval_loss": 0.00038632494397461414, | |
| "eval_runtime": 4629.6003, | |
| "eval_samples_per_second": 14.802, | |
| "eval_steps_per_second": 0.925, | |
| "step": 4284 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 4284, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.44199965772096e+19, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |