| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 5871, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.005109862033725089, | |
| "grad_norm": 57.87140382113745, | |
| "learning_rate": 5e-06, | |
| "loss": 2.3817, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.010219724067450179, | |
| "grad_norm": 22.753974610258698, | |
| "learning_rate": 5e-06, | |
| "loss": 1.7816, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.015329586101175269, | |
| "grad_norm": 10.200028852824934, | |
| "learning_rate": 5e-06, | |
| "loss": 1.4328, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.020439448134900357, | |
| "grad_norm": 10.263199576103078, | |
| "learning_rate": 5e-06, | |
| "loss": 1.2383, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.025549310168625446, | |
| "grad_norm": 4.8615267890156915, | |
| "learning_rate": 5e-06, | |
| "loss": 1.131, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.030659172202350538, | |
| "grad_norm": 4.118120498213385, | |
| "learning_rate": 5e-06, | |
| "loss": 1.0356, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.03576903423607562, | |
| "grad_norm": 2.8275806453598338, | |
| "learning_rate": 5e-06, | |
| "loss": 1.0006, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.040878896269800714, | |
| "grad_norm": 2.315792934153761, | |
| "learning_rate": 5e-06, | |
| "loss": 0.9655, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.045988758303525806, | |
| "grad_norm": 2.165672978783245, | |
| "learning_rate": 5e-06, | |
| "loss": 0.9467, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.05109862033725089, | |
| "grad_norm": 1.5488035180810096, | |
| "learning_rate": 5e-06, | |
| "loss": 0.9281, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.05620848237097598, | |
| "grad_norm": 1.7243341087325224, | |
| "learning_rate": 5e-06, | |
| "loss": 0.9128, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.061318344404701075, | |
| "grad_norm": 1.5359895854005425, | |
| "learning_rate": 5e-06, | |
| "loss": 0.9031, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.06642820643842616, | |
| "grad_norm": 1.435850065371256, | |
| "learning_rate": 5e-06, | |
| "loss": 0.8922, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.07153806847215124, | |
| "grad_norm": 1.2790521202391505, | |
| "learning_rate": 5e-06, | |
| "loss": 0.8765, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.07664793050587634, | |
| "grad_norm": 1.5917071745489466, | |
| "learning_rate": 5e-06, | |
| "loss": 0.883, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.08175779253960143, | |
| "grad_norm": 1.4686990635112733, | |
| "learning_rate": 5e-06, | |
| "loss": 0.8874, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.08686765457332651, | |
| "grad_norm": 1.2956583323307207, | |
| "learning_rate": 5e-06, | |
| "loss": 0.8411, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.09197751660705161, | |
| "grad_norm": 1.2357587318309058, | |
| "learning_rate": 5e-06, | |
| "loss": 0.8726, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.0970873786407767, | |
| "grad_norm": 1.1075519305139339, | |
| "learning_rate": 5e-06, | |
| "loss": 0.8548, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.10219724067450178, | |
| "grad_norm": 1.086727474202395, | |
| "learning_rate": 5e-06, | |
| "loss": 0.8493, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.10730710270822688, | |
| "grad_norm": 1.0270014162441539, | |
| "learning_rate": 5e-06, | |
| "loss": 0.8545, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.11241696474195197, | |
| "grad_norm": 0.8505362329385927, | |
| "learning_rate": 5e-06, | |
| "loss": 0.8388, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.11752682677567705, | |
| "grad_norm": 1.109098072362057, | |
| "learning_rate": 5e-06, | |
| "loss": 0.8384, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.12263668880940215, | |
| "grad_norm": 1.2809391093206264, | |
| "learning_rate": 5e-06, | |
| "loss": 0.8471, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.12774655084312725, | |
| "grad_norm": 0.9245822751345097, | |
| "learning_rate": 5e-06, | |
| "loss": 0.836, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.13285641287685232, | |
| "grad_norm": 0.8873874884959316, | |
| "learning_rate": 5e-06, | |
| "loss": 0.8283, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.13796627491057742, | |
| "grad_norm": 0.6839627248028421, | |
| "learning_rate": 5e-06, | |
| "loss": 0.8191, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.1430761369443025, | |
| "grad_norm": 0.8006134542844254, | |
| "learning_rate": 5e-06, | |
| "loss": 0.82, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.1481859989780276, | |
| "grad_norm": 0.7249813175028869, | |
| "learning_rate": 5e-06, | |
| "loss": 0.8254, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.1532958610117527, | |
| "grad_norm": 0.611471064933341, | |
| "learning_rate": 5e-06, | |
| "loss": 0.8148, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.15840572304547776, | |
| "grad_norm": 0.8403753613185879, | |
| "learning_rate": 5e-06, | |
| "loss": 0.8091, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.16351558507920286, | |
| "grad_norm": 0.5250234838001376, | |
| "learning_rate": 5e-06, | |
| "loss": 0.8328, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.16862544711292796, | |
| "grad_norm": 0.5411140081911021, | |
| "learning_rate": 5e-06, | |
| "loss": 0.8171, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.17373530914665303, | |
| "grad_norm": 0.7002721154510084, | |
| "learning_rate": 5e-06, | |
| "loss": 0.8073, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.17884517118037813, | |
| "grad_norm": 0.7366019714677299, | |
| "learning_rate": 5e-06, | |
| "loss": 0.8139, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.18395503321410323, | |
| "grad_norm": 0.4960107622038265, | |
| "learning_rate": 5e-06, | |
| "loss": 0.8195, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.1890648952478283, | |
| "grad_norm": 0.590921742782821, | |
| "learning_rate": 5e-06, | |
| "loss": 0.8131, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.1941747572815534, | |
| "grad_norm": 0.43046345355299165, | |
| "learning_rate": 5e-06, | |
| "loss": 0.8166, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.1992846193152785, | |
| "grad_norm": 0.37156340060616266, | |
| "learning_rate": 5e-06, | |
| "loss": 0.8082, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.20439448134900357, | |
| "grad_norm": 0.5029827349762587, | |
| "learning_rate": 5e-06, | |
| "loss": 0.8187, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.20950434338272866, | |
| "grad_norm": 0.39268177137347837, | |
| "learning_rate": 5e-06, | |
| "loss": 0.8132, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.21461420541645376, | |
| "grad_norm": 0.38787210509179787, | |
| "learning_rate": 5e-06, | |
| "loss": 0.8153, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.21972406745017883, | |
| "grad_norm": 0.5073537687615494, | |
| "learning_rate": 5e-06, | |
| "loss": 0.8032, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.22483392948390393, | |
| "grad_norm": 0.36232141966438186, | |
| "learning_rate": 5e-06, | |
| "loss": 0.786, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.22994379151762903, | |
| "grad_norm": 0.3362374048828632, | |
| "learning_rate": 5e-06, | |
| "loss": 0.8019, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.2350536535513541, | |
| "grad_norm": 0.3661481336901712, | |
| "learning_rate": 5e-06, | |
| "loss": 0.8012, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.2401635155850792, | |
| "grad_norm": 0.3186478374299407, | |
| "learning_rate": 5e-06, | |
| "loss": 0.8088, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.2452733776188043, | |
| "grad_norm": 0.3742412176358808, | |
| "learning_rate": 5e-06, | |
| "loss": 0.795, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.2503832396525294, | |
| "grad_norm": 0.3687134051282934, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7899, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.2554931016862545, | |
| "grad_norm": 0.2844684124400157, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7992, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.26060296371997954, | |
| "grad_norm": 0.3445705328013273, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7908, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.26571282575370464, | |
| "grad_norm": 0.3100104564359116, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7874, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.27082268778742974, | |
| "grad_norm": 0.32495110543783357, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7982, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.27593254982115484, | |
| "grad_norm": 0.27997656185948167, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7954, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.28104241185487994, | |
| "grad_norm": 0.4055280072220847, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7895, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.286152273888605, | |
| "grad_norm": 0.2751499734062912, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7931, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.2912621359223301, | |
| "grad_norm": 0.3232277546201772, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7972, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.2963719979560552, | |
| "grad_norm": 0.2915040535303487, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7912, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.3014818599897803, | |
| "grad_norm": 0.2637267252402153, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7903, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.3065917220235054, | |
| "grad_norm": 0.329139944193737, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7906, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.3117015840572305, | |
| "grad_norm": 0.24558162809212852, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7947, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.3168114460909555, | |
| "grad_norm": 0.27224578177967845, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7998, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.3219213081246806, | |
| "grad_norm": 0.2379116871443806, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7702, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.3270311701584057, | |
| "grad_norm": 0.2609800304013704, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7799, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.3321410321921308, | |
| "grad_norm": 0.23810454322283608, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7897, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.3372508942258559, | |
| "grad_norm": 0.252487995051141, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7726, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.342360756259581, | |
| "grad_norm": 0.24925719360242624, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7978, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.34747061829330605, | |
| "grad_norm": 0.2656149332274455, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7915, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.35258048032703115, | |
| "grad_norm": 0.2661464494899381, | |
| "learning_rate": 5e-06, | |
| "loss": 0.78, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.35769034236075625, | |
| "grad_norm": 0.20145837945729328, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7734, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.36280020439448135, | |
| "grad_norm": 0.26148666494272177, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7792, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.36791006642820645, | |
| "grad_norm": 0.24346683962909085, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7816, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.37301992846193155, | |
| "grad_norm": 0.22278218589963927, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7827, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.3781297904956566, | |
| "grad_norm": 0.2303051334399858, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7707, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.3832396525293817, | |
| "grad_norm": 0.2543406962739334, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7721, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.3883495145631068, | |
| "grad_norm": 0.22568609222745584, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7836, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.3934593765968319, | |
| "grad_norm": 0.2294845661519681, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7872, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.398569238630557, | |
| "grad_norm": 0.2197759747155408, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7681, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.4036791006642821, | |
| "grad_norm": 0.22484907906751414, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7797, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.40878896269800713, | |
| "grad_norm": 0.21381439198625019, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7891, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.41389882473173223, | |
| "grad_norm": 0.21726381466920616, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7779, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.4190086867654573, | |
| "grad_norm": 0.21218649602843365, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7756, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.4241185487991824, | |
| "grad_norm": 0.2321703903701098, | |
| "learning_rate": 5e-06, | |
| "loss": 0.778, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.4292284108329075, | |
| "grad_norm": 0.21376686462742833, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7754, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.4343382728666326, | |
| "grad_norm": 0.20773121342846562, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7729, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.43944813490035767, | |
| "grad_norm": 0.23075209429146118, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7743, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.44455799693408277, | |
| "grad_norm": 0.21889813942522324, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7872, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.44966785896780787, | |
| "grad_norm": 0.23158111213452945, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7729, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.45477772100153296, | |
| "grad_norm": 0.23759359366214816, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7781, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.45988758303525806, | |
| "grad_norm": 0.20737512188117463, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7733, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.46499744506898316, | |
| "grad_norm": 0.21978896076440746, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7724, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.4701073071027082, | |
| "grad_norm": 0.21326048741734183, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7779, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.4752171691364333, | |
| "grad_norm": 0.2127733995734116, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7862, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.4803270311701584, | |
| "grad_norm": 0.20545254783011138, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7741, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.4854368932038835, | |
| "grad_norm": 0.22401938020775877, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7879, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.4905467552376086, | |
| "grad_norm": 0.20313108726519555, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7795, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.4956566172713337, | |
| "grad_norm": 0.20676236489485517, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7658, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.5007664793050588, | |
| "grad_norm": 0.1997349100496237, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7665, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.5058763413387839, | |
| "grad_norm": 0.18582330695996696, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7678, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.510986203372509, | |
| "grad_norm": 0.20480578385489007, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7916, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.516096065406234, | |
| "grad_norm": 0.2068595244192038, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7728, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.5212059274399591, | |
| "grad_norm": 0.19202510728167327, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7875, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.5263157894736842, | |
| "grad_norm": 0.20142193391262955, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7691, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.5314256515074093, | |
| "grad_norm": 0.19153357968487428, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7717, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.5365355135411344, | |
| "grad_norm": 0.21070221575444148, | |
| "learning_rate": 5e-06, | |
| "loss": 0.771, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.5416453755748595, | |
| "grad_norm": 0.19388651883167224, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7674, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.5467552376085846, | |
| "grad_norm": 0.19235966239305358, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7639, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.5518650996423097, | |
| "grad_norm": 0.2020128053771939, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7838, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.5569749616760348, | |
| "grad_norm": 0.19898211373916952, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7734, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.5620848237097599, | |
| "grad_norm": 0.19556108367687597, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7668, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.567194685743485, | |
| "grad_norm": 0.19409586285487196, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7778, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.57230454777721, | |
| "grad_norm": 0.20681598650606026, | |
| "learning_rate": 5e-06, | |
| "loss": 0.779, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.5774144098109351, | |
| "grad_norm": 0.2017546177375358, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7573, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.5825242718446602, | |
| "grad_norm": 0.1913755514728684, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7625, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.5876341338783853, | |
| "grad_norm": 0.18303523769215124, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7752, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.5927439959121104, | |
| "grad_norm": 0.19646699079126273, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7734, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.5978538579458355, | |
| "grad_norm": 0.18589450588787854, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7631, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.6029637199795606, | |
| "grad_norm": 0.19141416094028266, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7721, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.6080735820132857, | |
| "grad_norm": 0.2010700371236207, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7723, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.6131834440470108, | |
| "grad_norm": 0.209693144160535, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7709, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.6182933060807358, | |
| "grad_norm": 0.19006264249604685, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7731, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.623403168114461, | |
| "grad_norm": 0.19625031587081004, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7683, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.628513030148186, | |
| "grad_norm": 0.19078272960681916, | |
| "learning_rate": 5e-06, | |
| "loss": 0.773, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.633622892181911, | |
| "grad_norm": 0.2007688157803181, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7533, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.6387327542156361, | |
| "grad_norm": 0.20004504142951482, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7721, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.6438426162493612, | |
| "grad_norm": 0.18940428161938747, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7525, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.6489524782830863, | |
| "grad_norm": 0.19192245014604287, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7743, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.6540623403168114, | |
| "grad_norm": 0.18803291404020508, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7781, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.6591722023505365, | |
| "grad_norm": 0.19034763073417169, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7609, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.6642820643842616, | |
| "grad_norm": 0.2000260745898994, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7493, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.6693919264179867, | |
| "grad_norm": 0.19721212735320226, | |
| "learning_rate": 5e-06, | |
| "loss": 0.759, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.6745017884517118, | |
| "grad_norm": 0.1840594216152482, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7554, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.6796116504854369, | |
| "grad_norm": 0.19294533218500587, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7601, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.684721512519162, | |
| "grad_norm": 0.2003437533474394, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7665, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.6898313745528871, | |
| "grad_norm": 0.19522682692250634, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7682, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.6949412365866121, | |
| "grad_norm": 0.19114649796975278, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7675, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.7000510986203372, | |
| "grad_norm": 0.18795490185304542, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7514, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.7051609606540623, | |
| "grad_norm": 0.19982770311498202, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7587, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.7102708226877874, | |
| "grad_norm": 0.2094868748722276, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7424, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.7153806847215125, | |
| "grad_norm": 0.18141939898186682, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7668, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.7204905467552376, | |
| "grad_norm": 0.184317543793234, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7659, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.7256004087889627, | |
| "grad_norm": 0.19482359157799217, | |
| "learning_rate": 5e-06, | |
| "loss": 0.766, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.7307102708226878, | |
| "grad_norm": 0.20609416559465576, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7591, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.7358201328564129, | |
| "grad_norm": 0.1733817918744796, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7657, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.740929994890138, | |
| "grad_norm": 0.20231819059208814, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7693, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.7460398569238631, | |
| "grad_norm": 0.19384075901742115, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7677, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.7511497189575882, | |
| "grad_norm": 0.20242534213073207, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7658, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.7562595809913132, | |
| "grad_norm": 0.18992152096280124, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7604, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.7613694430250383, | |
| "grad_norm": 0.20300312286644698, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7654, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.7664793050587634, | |
| "grad_norm": 0.2110214114358105, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7579, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.7715891670924885, | |
| "grad_norm": 0.18507470375993035, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7535, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.7766990291262136, | |
| "grad_norm": 0.20773697020515, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7681, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.7818088911599387, | |
| "grad_norm": 0.1846942481775099, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7589, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.7869187531936638, | |
| "grad_norm": 0.19873266556747132, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7701, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.7920286152273889, | |
| "grad_norm": 0.1889722343751879, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7637, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.797138477261114, | |
| "grad_norm": 0.2160555515101488, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7459, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.8022483392948391, | |
| "grad_norm": 0.18958425843094595, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7499, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.8073582013285642, | |
| "grad_norm": 0.18917128647246217, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7572, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.8124680633622893, | |
| "grad_norm": 0.17914225162735836, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7506, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.8175779253960143, | |
| "grad_norm": 0.19189511614416635, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7613, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.8226877874297394, | |
| "grad_norm": 0.19059344363394998, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7604, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.8277976494634645, | |
| "grad_norm": 0.20852250489781288, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7737, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.8329075114971896, | |
| "grad_norm": 0.1877964215413997, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7725, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.8380173735309147, | |
| "grad_norm": 0.18419774730049385, | |
| "learning_rate": 5e-06, | |
| "loss": 0.757, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.8431272355646398, | |
| "grad_norm": 0.1926784804428757, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7515, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.8482370975983649, | |
| "grad_norm": 0.19627214391258455, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7718, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.85334695963209, | |
| "grad_norm": 0.1944045450874202, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7449, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.858456821665815, | |
| "grad_norm": 0.18125259142482736, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7604, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.8635666836995401, | |
| "grad_norm": 0.17817637475202877, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7641, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.8686765457332652, | |
| "grad_norm": 0.20471658407314103, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7734, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.8737864077669902, | |
| "grad_norm": 0.1878161208418682, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7696, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.8788962698007153, | |
| "grad_norm": 0.18334210713527221, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7536, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.8840061318344404, | |
| "grad_norm": 0.18076699199021762, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7522, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.8891159938681655, | |
| "grad_norm": 0.19181861440876702, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7625, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.8942258559018906, | |
| "grad_norm": 0.17965104525055625, | |
| "learning_rate": 5e-06, | |
| "loss": 0.754, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.8993357179356157, | |
| "grad_norm": 0.19942739585455946, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7663, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.9044455799693408, | |
| "grad_norm": 0.19441177912329213, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7643, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.9095554420030659, | |
| "grad_norm": 0.1989153914067934, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7538, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.914665304036791, | |
| "grad_norm": 0.18779250602110079, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7582, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.9197751660705161, | |
| "grad_norm": 0.18880847142963628, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7834, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.9248850281042412, | |
| "grad_norm": 0.19100671149679851, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7633, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.9299948901379663, | |
| "grad_norm": 0.19156019315031683, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7644, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.9351047521716913, | |
| "grad_norm": 0.18435442552610007, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7597, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.9402146142054164, | |
| "grad_norm": 0.2051724242213117, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7504, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.9453244762391415, | |
| "grad_norm": 0.18536587248191086, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7396, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.9504343382728666, | |
| "grad_norm": 0.17780024213447235, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7506, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.9555442003065917, | |
| "grad_norm": 0.18708967218779626, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7455, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.9606540623403168, | |
| "grad_norm": 0.17747103600840475, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7444, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.9657639243740419, | |
| "grad_norm": 0.19063629309146018, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7533, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.970873786407767, | |
| "grad_norm": 0.19471871824403422, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7539, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.9759836484414921, | |
| "grad_norm": 0.18403984668742995, | |
| "learning_rate": 5e-06, | |
| "loss": 0.766, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.9810935104752172, | |
| "grad_norm": 0.19270062603489418, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7661, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.9862033725089423, | |
| "grad_norm": 0.19463685028697894, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7591, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.9913132345426674, | |
| "grad_norm": 0.18870267371498323, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7619, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.9964230965763924, | |
| "grad_norm": 0.19561509144751293, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7727, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 1.0015329586101176, | |
| "grad_norm": 0.1865614045173204, | |
| "learning_rate": 5e-06, | |
| "loss": 0.753, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 1.0066428206438427, | |
| "grad_norm": 0.17671337095527262, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7557, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 1.0117526826775678, | |
| "grad_norm": 0.19010483409505236, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7446, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 1.016862544711293, | |
| "grad_norm": 0.17413564167981435, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7389, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 1.021972406745018, | |
| "grad_norm": 0.18070567046481728, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7451, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.0270822687787429, | |
| "grad_norm": 0.2047148011452083, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7404, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 1.032192130812468, | |
| "grad_norm": 0.19369702939207809, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7432, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 1.037301992846193, | |
| "grad_norm": 0.19067195646001925, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7396, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 1.0424118548799182, | |
| "grad_norm": 0.20517054385871566, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7312, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 1.0475217169136433, | |
| "grad_norm": 0.19412259781936536, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7434, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 1.0526315789473684, | |
| "grad_norm": 0.1873188104877111, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7411, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 1.0577414409810935, | |
| "grad_norm": 0.1897369700854769, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7345, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 1.0628513030148186, | |
| "grad_norm": 0.1853976118509793, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7549, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 1.0679611650485437, | |
| "grad_norm": 0.17933568978007214, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7531, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 1.0730710270822688, | |
| "grad_norm": 0.20312467005419368, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7498, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 1.0781808891159939, | |
| "grad_norm": 0.18428135458855252, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7404, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 1.083290751149719, | |
| "grad_norm": 0.1973755972738785, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7329, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 1.088400613183444, | |
| "grad_norm": 0.1917623224859124, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7555, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 1.0935104752171692, | |
| "grad_norm": 0.18406588696688597, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7433, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 1.0986203372508943, | |
| "grad_norm": 0.17921040061727433, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7305, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 1.1037301992846194, | |
| "grad_norm": 0.18963146030246644, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7397, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 1.1088400613183444, | |
| "grad_norm": 0.18712686418913257, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7409, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 1.1139499233520695, | |
| "grad_norm": 0.19771359798461643, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7427, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 1.1190597853857946, | |
| "grad_norm": 0.1840406013875161, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7611, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 1.1241696474195197, | |
| "grad_norm": 0.18827676786628125, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7364, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 1.1292795094532448, | |
| "grad_norm": 0.191153055094231, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7539, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 1.13438937148697, | |
| "grad_norm": 0.1822666967889064, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7414, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 1.139499233520695, | |
| "grad_norm": 0.19699245088881503, | |
| "learning_rate": 5e-06, | |
| "loss": 0.738, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 1.14460909555442, | |
| "grad_norm": 0.1898722587971926, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7461, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 1.1497189575881452, | |
| "grad_norm": 0.18927249674651098, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7429, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 1.1548288196218701, | |
| "grad_norm": 0.18896514284627008, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7549, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 1.1599386816555952, | |
| "grad_norm": 0.18739970646008372, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7499, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 1.1650485436893203, | |
| "grad_norm": 0.1910100009129843, | |
| "learning_rate": 5e-06, | |
| "loss": 0.747, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 1.1701584057230454, | |
| "grad_norm": 0.20198153170551428, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7386, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 1.1752682677567705, | |
| "grad_norm": 0.18720641288978465, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7578, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 1.1803781297904956, | |
| "grad_norm": 0.18961987449195758, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7529, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 1.1854879918242207, | |
| "grad_norm": 0.17712198248177036, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7476, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 1.1905978538579458, | |
| "grad_norm": 0.18490722732969878, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7437, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 1.195707715891671, | |
| "grad_norm": 0.18822524406653396, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7469, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 1.200817577925396, | |
| "grad_norm": 0.17979619340691932, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7476, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 1.205927439959121, | |
| "grad_norm": 0.19302201862857232, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7519, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 1.2110373019928462, | |
| "grad_norm": 0.17331596795239365, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7421, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 1.2161471640265713, | |
| "grad_norm": 0.1900974772777454, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7491, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 1.2212570260602964, | |
| "grad_norm": 0.18235079887869074, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7398, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 1.2263668880940215, | |
| "grad_norm": 0.1990024061618005, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7367, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 1.2314767501277466, | |
| "grad_norm": 0.19774880108315787, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7509, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 1.2365866121614717, | |
| "grad_norm": 0.18038613594979708, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7397, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 1.2416964741951968, | |
| "grad_norm": 0.19148490320343095, | |
| "learning_rate": 5e-06, | |
| "loss": 0.738, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 1.246806336228922, | |
| "grad_norm": 0.1764579818726389, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7344, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 1.2519161982626468, | |
| "grad_norm": 0.19292171667184566, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7386, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 1.257026060296372, | |
| "grad_norm": 0.1830054013064937, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7428, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 1.262135922330097, | |
| "grad_norm": 0.1771188524320852, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7349, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 1.2672457843638223, | |
| "grad_norm": 0.19308200340380383, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7428, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 1.2723556463975472, | |
| "grad_norm": 0.17723395601019812, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7271, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 1.2774655084312725, | |
| "grad_norm": 0.21808963432632347, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7551, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.2825753704649974, | |
| "grad_norm": 0.1944324454402299, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7388, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 1.2876852324987225, | |
| "grad_norm": 0.1737167892046253, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7383, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 1.2927950945324476, | |
| "grad_norm": 0.19316243219818216, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7447, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 1.2979049565661727, | |
| "grad_norm": 0.18271045253382115, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7439, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 1.3030148185998978, | |
| "grad_norm": 0.19235060247622612, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7362, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 1.3081246806336229, | |
| "grad_norm": 0.17975422534798727, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7355, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 1.313234542667348, | |
| "grad_norm": 0.19133431284185412, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7474, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 1.318344404701073, | |
| "grad_norm": 0.18525107476229646, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7362, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 1.3234542667347982, | |
| "grad_norm": 0.17940598778920888, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7456, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 1.3285641287685233, | |
| "grad_norm": 0.18377019808836909, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7371, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 1.3336739908022484, | |
| "grad_norm": 0.19380006248186266, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7382, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 1.3387838528359735, | |
| "grad_norm": 0.19130341477919996, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7249, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 1.3438937148696986, | |
| "grad_norm": 0.19399937386692442, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7461, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 1.3490035769034237, | |
| "grad_norm": 0.19026606995853784, | |
| "learning_rate": 5e-06, | |
| "loss": 0.744, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 1.3541134389371488, | |
| "grad_norm": 0.17865393066414276, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7562, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 1.3592233009708738, | |
| "grad_norm": 0.18268806699965215, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7369, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 1.364333163004599, | |
| "grad_norm": 0.1859894512893362, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7303, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 1.369443025038324, | |
| "grad_norm": 0.1858579729895718, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7338, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 1.3745528870720491, | |
| "grad_norm": 0.18844334675064925, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7399, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 1.379662749105774, | |
| "grad_norm": 0.19488780104365555, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7297, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 1.3847726111394993, | |
| "grad_norm": 0.2012609184785339, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7438, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 1.3898824731732242, | |
| "grad_norm": 0.19240664121181153, | |
| "learning_rate": 5e-06, | |
| "loss": 0.724, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 1.3949923352069495, | |
| "grad_norm": 0.1989866337354731, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7387, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 1.4001021972406744, | |
| "grad_norm": 0.19173593318229185, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7389, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 1.4052120592743995, | |
| "grad_norm": 0.18490218706957628, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7354, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 1.4103219213081246, | |
| "grad_norm": 0.19596927441115194, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7294, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 1.4154317833418497, | |
| "grad_norm": 0.1884149575008095, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7535, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 1.4205416453755748, | |
| "grad_norm": 0.19011351454021505, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7311, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 1.4256515074093, | |
| "grad_norm": 0.18330086878582655, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7238, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 1.430761369443025, | |
| "grad_norm": 0.18245127655957494, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7297, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 1.43587123147675, | |
| "grad_norm": 0.1899683440493854, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7388, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 1.4409810935104752, | |
| "grad_norm": 0.17955923753560576, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7382, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 1.4460909555442003, | |
| "grad_norm": 0.19252118964036657, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7428, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 1.4512008175779254, | |
| "grad_norm": 0.1993095814580447, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7453, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 1.4563106796116505, | |
| "grad_norm": 0.19453340379561043, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7316, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 1.4614205416453756, | |
| "grad_norm": 0.18991487065634022, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7386, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 1.4665304036791007, | |
| "grad_norm": 0.18973483396294438, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7466, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 1.4716402657128258, | |
| "grad_norm": 0.1919078737205217, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7495, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 1.476750127746551, | |
| "grad_norm": 0.1839372662925669, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7389, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 1.481859989780276, | |
| "grad_norm": 0.19249510537236922, | |
| "learning_rate": 5e-06, | |
| "loss": 0.749, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 1.486969851814001, | |
| "grad_norm": 0.179246890906481, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7282, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 1.4920797138477262, | |
| "grad_norm": 0.19514232006253573, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7446, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 1.497189575881451, | |
| "grad_norm": 0.18596165643891152, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7334, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 1.5022994379151764, | |
| "grad_norm": 0.18786595484474303, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7279, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 1.5074092999489013, | |
| "grad_norm": 0.1971738538736254, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7461, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 1.5125191619826266, | |
| "grad_norm": 0.18866040282412957, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7474, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 1.5176290240163515, | |
| "grad_norm": 0.19040908132898832, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7445, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 1.5227388860500768, | |
| "grad_norm": 0.1929810400978154, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7362, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 1.5278487480838017, | |
| "grad_norm": 0.19442375737357984, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7305, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 1.532958610117527, | |
| "grad_norm": 0.18546826858387847, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7311, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.5380684721512519, | |
| "grad_norm": 0.18542784404927515, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7447, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 1.543178334184977, | |
| "grad_norm": 0.2020846723209545, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7113, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 1.548288196218702, | |
| "grad_norm": 0.19026884893782828, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7157, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 1.5533980582524272, | |
| "grad_norm": 0.18111421662938304, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7323, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 1.5585079202861523, | |
| "grad_norm": 0.19367385202342016, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7305, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 1.5636177823198774, | |
| "grad_norm": 0.18590394121821466, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7341, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 1.5687276443536025, | |
| "grad_norm": 0.18488441186992707, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7482, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 1.5738375063873276, | |
| "grad_norm": 0.18226306867076514, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7334, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 1.5789473684210527, | |
| "grad_norm": 0.20053856155426641, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7414, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 1.5840572304547778, | |
| "grad_norm": 0.19672564131420983, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7508, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 1.5891670924885029, | |
| "grad_norm": 0.1790851772225089, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7471, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 1.594276954522228, | |
| "grad_norm": 0.1900047612676954, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7421, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 1.599386816555953, | |
| "grad_norm": 0.19746465955340986, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7471, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 1.604496678589678, | |
| "grad_norm": 0.186549540683221, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7275, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 1.6096065406234032, | |
| "grad_norm": 0.1876261054598287, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7359, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 1.6147164026571281, | |
| "grad_norm": 0.19082325492370317, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7268, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 1.6198262646908534, | |
| "grad_norm": 0.2016402119888201, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7377, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 1.6249361267245783, | |
| "grad_norm": 0.1888126317070555, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7265, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 1.6300459887583036, | |
| "grad_norm": 0.17743730583327474, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7203, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 1.6351558507920285, | |
| "grad_norm": 0.1826162903853255, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7215, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 1.6402657128257538, | |
| "grad_norm": 0.19419266754404552, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7376, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 1.6453755748594787, | |
| "grad_norm": 0.1956453565355767, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7316, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 1.650485436893204, | |
| "grad_norm": 0.19765143129125318, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7374, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 1.655595298926929, | |
| "grad_norm": 0.19127982430051427, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7405, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 1.660705160960654, | |
| "grad_norm": 0.1847472801458583, | |
| "learning_rate": 5e-06, | |
| "loss": 0.724, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 1.6658150229943791, | |
| "grad_norm": 0.18698307703261788, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7374, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 1.6709248850281042, | |
| "grad_norm": 0.17533798523255767, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7202, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 1.6760347470618293, | |
| "grad_norm": 0.1806351825859557, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7452, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 1.6811446090955544, | |
| "grad_norm": 0.1767976805961292, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7338, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 1.6862544711292795, | |
| "grad_norm": 0.19498984484111873, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7403, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 1.6913643331630046, | |
| "grad_norm": 0.17701307669892918, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7299, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 1.6964741951967297, | |
| "grad_norm": 0.19220216566407472, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7314, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 1.7015840572304548, | |
| "grad_norm": 0.1829279730231264, | |
| "learning_rate": 5e-06, | |
| "loss": 0.743, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 1.70669391926418, | |
| "grad_norm": 0.19526766653061225, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7222, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 1.711803781297905, | |
| "grad_norm": 0.19455609962672274, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7253, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 1.71691364333163, | |
| "grad_norm": 0.21002379536162816, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7429, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 1.722023505365355, | |
| "grad_norm": 0.1990882316461353, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7443, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 1.7271333673990803, | |
| "grad_norm": 0.17934672167038826, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7497, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 1.7322432294328052, | |
| "grad_norm": 0.19501165745940965, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7425, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 1.7373530914665305, | |
| "grad_norm": 0.19248650606756543, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7297, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 1.7424629535002554, | |
| "grad_norm": 0.17721599710417338, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7251, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 1.7475728155339807, | |
| "grad_norm": 0.18509365156353424, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7221, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 1.7526826775677056, | |
| "grad_norm": 0.18289284691122754, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7327, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 1.757792539601431, | |
| "grad_norm": 0.18756279151165123, | |
| "learning_rate": 5e-06, | |
| "loss": 0.732, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 1.7629024016351558, | |
| "grad_norm": 0.17439303769229625, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7344, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 1.768012263668881, | |
| "grad_norm": 0.17783376482824478, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7323, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 1.773122125702606, | |
| "grad_norm": 0.19448194078586717, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7292, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 1.778231987736331, | |
| "grad_norm": 0.18000237860427712, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7219, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 1.7833418497700562, | |
| "grad_norm": 0.18519882940772772, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7354, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 1.7884517118037813, | |
| "grad_norm": 0.19301292549147336, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7487, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.7935615738375064, | |
| "grad_norm": 0.17758380897102066, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7373, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 1.7986714358712315, | |
| "grad_norm": 0.1794720757802905, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7226, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 1.8037812979049566, | |
| "grad_norm": 0.18100374933694008, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7256, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 1.8088911599386817, | |
| "grad_norm": 0.1954603145633284, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7283, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 1.8140010219724068, | |
| "grad_norm": 0.19558607958635285, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7384, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 1.8191108840061319, | |
| "grad_norm": 0.1772107537935853, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7382, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 1.824220746039857, | |
| "grad_norm": 0.17916901000763397, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7367, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 1.829330608073582, | |
| "grad_norm": 0.19083482072843658, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7247, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 1.8344404701073072, | |
| "grad_norm": 0.1770449813805881, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7187, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 1.839550332141032, | |
| "grad_norm": 0.18790158384523442, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7403, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 1.8446601941747574, | |
| "grad_norm": 0.17892362311216914, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7368, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 1.8497700562084822, | |
| "grad_norm": 0.1839659251785667, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7255, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 1.8548799182422075, | |
| "grad_norm": 0.19138496555502849, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7453, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 1.8599897802759324, | |
| "grad_norm": 0.18135734354491537, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7166, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 1.8650996423096577, | |
| "grad_norm": 0.1859082044449026, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7285, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 1.8702095043433826, | |
| "grad_norm": 0.1913280855307758, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7279, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 1.875319366377108, | |
| "grad_norm": 0.19148047998384163, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7381, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 1.8804292284108328, | |
| "grad_norm": 0.190776629149848, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7347, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 1.8855390904445581, | |
| "grad_norm": 0.18748890464701637, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7214, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 1.890648952478283, | |
| "grad_norm": 0.19691029617370956, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7396, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 1.8957588145120083, | |
| "grad_norm": 0.17143385370457725, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7305, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 1.9008686765457332, | |
| "grad_norm": 0.19115494111352774, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7252, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 1.9059785385794583, | |
| "grad_norm": 0.18589155104150923, | |
| "learning_rate": 5e-06, | |
| "loss": 0.728, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 1.9110884006131834, | |
| "grad_norm": 0.1870078279938856, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7259, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 1.9161982626469085, | |
| "grad_norm": 0.17909224396912188, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7345, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 1.9213081246806336, | |
| "grad_norm": 0.19885632401705697, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7289, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 1.9264179867143587, | |
| "grad_norm": 0.18778552722329356, | |
| "learning_rate": 5e-06, | |
| "loss": 0.725, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 1.9315278487480838, | |
| "grad_norm": 0.2019790249495369, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7398, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 1.936637710781809, | |
| "grad_norm": 0.19338701904495897, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7265, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 1.941747572815534, | |
| "grad_norm": 0.18703296264974872, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7332, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 1.946857434849259, | |
| "grad_norm": 0.1700175440342506, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7205, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 1.9519672968829842, | |
| "grad_norm": 0.18636496202992153, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7154, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 1.9570771589167093, | |
| "grad_norm": 0.1826391337083993, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7356, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 1.9621870209504344, | |
| "grad_norm": 0.17766191115765154, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7369, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 1.9672968829841593, | |
| "grad_norm": 0.18034528150782342, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7288, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 1.9724067450178846, | |
| "grad_norm": 0.18541894497456152, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7296, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 1.9775166070516095, | |
| "grad_norm": 0.19539634425789987, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7374, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 1.9826264690853348, | |
| "grad_norm": 0.1928837424204438, | |
| "learning_rate": 5e-06, | |
| "loss": 0.732, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 1.9877363311190597, | |
| "grad_norm": 0.18813671735265705, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7285, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 1.992846193152785, | |
| "grad_norm": 0.19024591983517306, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7386, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 1.9979560551865099, | |
| "grad_norm": 0.1791330764130833, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7324, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 2.003065917220235, | |
| "grad_norm": 0.18457472513280188, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7303, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 2.00817577925396, | |
| "grad_norm": 0.18684788166920566, | |
| "learning_rate": 5e-06, | |
| "loss": 0.735, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 2.0132856412876854, | |
| "grad_norm": 0.18385152341485855, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7204, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 2.0183955033214103, | |
| "grad_norm": 0.1893087134675762, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7271, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 2.0235053653551356, | |
| "grad_norm": 0.19001500071497598, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7286, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 2.0286152273888605, | |
| "grad_norm": 0.18883227887144083, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7157, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 2.033725089422586, | |
| "grad_norm": 0.17689498199700174, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7063, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 2.0388349514563107, | |
| "grad_norm": 0.19270059169949594, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7295, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 2.043944813490036, | |
| "grad_norm": 0.19597358123850073, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7268, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 2.049054675523761, | |
| "grad_norm": 0.19711968415004932, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7229, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 2.0541645375574857, | |
| "grad_norm": 0.18776496261358783, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7158, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 2.059274399591211, | |
| "grad_norm": 0.18818737294591004, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7194, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 2.064384261624936, | |
| "grad_norm": 0.19845798280803176, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7265, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 2.0694941236586613, | |
| "grad_norm": 0.18713228958457867, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7095, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 2.074603985692386, | |
| "grad_norm": 0.17787870833728897, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7149, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 2.0797138477261115, | |
| "grad_norm": 0.19472810880013228, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7178, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 2.0848237097598363, | |
| "grad_norm": 0.19429450371850024, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7118, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 2.0899335717935617, | |
| "grad_norm": 0.1941609760118733, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7265, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 2.0950434338272865, | |
| "grad_norm": 0.19290976310635458, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7085, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 2.100153295861012, | |
| "grad_norm": 0.1765241017205207, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7301, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 2.1052631578947367, | |
| "grad_norm": 0.17846133756954982, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7105, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 2.110373019928462, | |
| "grad_norm": 0.1990655813201847, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7197, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 2.115482881962187, | |
| "grad_norm": 0.18227727070573727, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7225, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 2.1205927439959122, | |
| "grad_norm": 0.1950861579503913, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7151, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 2.125702606029637, | |
| "grad_norm": 0.1852273630173904, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7349, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 2.1308124680633624, | |
| "grad_norm": 0.19463032921601597, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7209, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 2.1359223300970873, | |
| "grad_norm": 0.20049767813158514, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7272, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 2.1410321921308126, | |
| "grad_norm": 0.1964989410548727, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7205, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 2.1461420541645375, | |
| "grad_norm": 0.18656787454813695, | |
| "learning_rate": 5e-06, | |
| "loss": 0.73, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 2.151251916198263, | |
| "grad_norm": 0.1869811709706087, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7211, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 2.1563617782319877, | |
| "grad_norm": 0.1822452717215839, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7149, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 2.161471640265713, | |
| "grad_norm": 0.18992052776084958, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7343, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 2.166581502299438, | |
| "grad_norm": 0.18438525645647527, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7094, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 2.171691364333163, | |
| "grad_norm": 0.19529306952253045, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7309, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 2.176801226366888, | |
| "grad_norm": 0.19540047885297857, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7128, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 2.181911088400613, | |
| "grad_norm": 0.18411641667621176, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7225, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 2.1870209504343383, | |
| "grad_norm": 0.19493918166759708, | |
| "learning_rate": 5e-06, | |
| "loss": 0.723, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 2.192130812468063, | |
| "grad_norm": 0.1843201705851165, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7133, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 2.1972406745017885, | |
| "grad_norm": 0.1945434906423164, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7196, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 2.2023505365355134, | |
| "grad_norm": 0.19008929161664118, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7224, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 2.2074603985692387, | |
| "grad_norm": 0.18391558057092405, | |
| "learning_rate": 5e-06, | |
| "loss": 0.723, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 2.2125702606029636, | |
| "grad_norm": 0.19348061164149957, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7331, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 2.217680122636689, | |
| "grad_norm": 0.18607054173383442, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7244, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 2.2227899846704138, | |
| "grad_norm": 0.18618780601208676, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7272, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 2.227899846704139, | |
| "grad_norm": 0.17823884880532717, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7091, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 2.233009708737864, | |
| "grad_norm": 0.18561422862650115, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7205, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 2.2381195707715893, | |
| "grad_norm": 0.17953482905796772, | |
| "learning_rate": 5e-06, | |
| "loss": 0.729, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 2.243229432805314, | |
| "grad_norm": 0.18191363924178688, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7111, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 2.2483392948390395, | |
| "grad_norm": 0.180435799688224, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7107, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 2.2534491568727644, | |
| "grad_norm": 0.18960270587770217, | |
| "learning_rate": 5e-06, | |
| "loss": 0.725, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 2.2585590189064897, | |
| "grad_norm": 0.17769968679729267, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7143, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 2.2636688809402146, | |
| "grad_norm": 0.19163278997385685, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7182, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 2.26877874297394, | |
| "grad_norm": 0.1940788858796441, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7366, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 2.2738886050076648, | |
| "grad_norm": 0.1776800674678045, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7069, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 2.27899846704139, | |
| "grad_norm": 0.17843289642160187, | |
| "learning_rate": 5e-06, | |
| "loss": 0.724, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 2.284108329075115, | |
| "grad_norm": 0.18057741833938729, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7137, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 2.28921819110884, | |
| "grad_norm": 0.1866133357180047, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7166, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 2.294328053142565, | |
| "grad_norm": 0.18193348825294622, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7244, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 2.2994379151762905, | |
| "grad_norm": 0.1929190761683958, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7277, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 2.3045477772100154, | |
| "grad_norm": 0.19392177452359835, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7198, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 2.3096576392437402, | |
| "grad_norm": 0.18143577291270357, | |
| "learning_rate": 5e-06, | |
| "loss": 0.728, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 2.3147675012774656, | |
| "grad_norm": 0.19443872804506757, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7272, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 2.3198773633111904, | |
| "grad_norm": 0.1851328945489432, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7234, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 2.3249872253449158, | |
| "grad_norm": 0.19038110214278162, | |
| "learning_rate": 5e-06, | |
| "loss": 0.713, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 2.3300970873786406, | |
| "grad_norm": 0.1874741106018047, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7161, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 2.335206949412366, | |
| "grad_norm": 0.19581977792981697, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7162, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 2.340316811446091, | |
| "grad_norm": 0.17541497625191085, | |
| "learning_rate": 5e-06, | |
| "loss": 0.723, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 2.345426673479816, | |
| "grad_norm": 0.20023862697490177, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7185, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 2.350536535513541, | |
| "grad_norm": 0.1959974413991938, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7088, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 2.3556463975472663, | |
| "grad_norm": 0.18473631827351808, | |
| "learning_rate": 5e-06, | |
| "loss": 0.719, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 2.3607562595809912, | |
| "grad_norm": 0.19338189496040809, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7239, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 2.3658661216147165, | |
| "grad_norm": 0.19066355345819264, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7266, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 2.3709759836484414, | |
| "grad_norm": 0.1883524242314222, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7186, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 2.3760858456821667, | |
| "grad_norm": 0.18502939250064523, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7199, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 2.3811957077158916, | |
| "grad_norm": 0.1792178984338457, | |
| "learning_rate": 5e-06, | |
| "loss": 0.714, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 2.386305569749617, | |
| "grad_norm": 0.18226944439325007, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7041, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 2.391415431783342, | |
| "grad_norm": 0.18015473190902764, | |
| "learning_rate": 5e-06, | |
| "loss": 0.716, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 2.396525293817067, | |
| "grad_norm": 0.18115429600775665, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7086, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 2.401635155850792, | |
| "grad_norm": 0.2043473041616948, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7165, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 2.406745017884517, | |
| "grad_norm": 0.20268566073488395, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7153, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 2.411854879918242, | |
| "grad_norm": 0.19488489934465453, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7261, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 2.4169647419519675, | |
| "grad_norm": 0.1869426176451986, | |
| "learning_rate": 5e-06, | |
| "loss": 0.6977, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 2.4220746039856924, | |
| "grad_norm": 0.19817580694708986, | |
| "learning_rate": 5e-06, | |
| "loss": 0.71, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 2.4271844660194173, | |
| "grad_norm": 0.2099570721696579, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7204, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 2.4322943280531426, | |
| "grad_norm": 0.1964657272176851, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7104, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 2.4374041900868675, | |
| "grad_norm": 0.1886905609835038, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7085, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 2.442514052120593, | |
| "grad_norm": 0.18220683381465547, | |
| "learning_rate": 5e-06, | |
| "loss": 0.722, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 2.4476239141543177, | |
| "grad_norm": 0.18487998134947345, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7112, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 2.452733776188043, | |
| "grad_norm": 0.16830917755794478, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7157, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 2.457843638221768, | |
| "grad_norm": 0.18354244943334422, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7152, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 2.462953500255493, | |
| "grad_norm": 0.18986840496033355, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7161, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 2.468063362289218, | |
| "grad_norm": 0.1826318712937385, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7295, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 2.4731732243229434, | |
| "grad_norm": 0.18527008923688568, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7115, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 2.4782830863566683, | |
| "grad_norm": 0.18224411387144135, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7173, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 2.4833929483903936, | |
| "grad_norm": 0.17811035407666295, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7095, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 2.4885028104241185, | |
| "grad_norm": 0.18261169308001168, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7162, | |
| "step": 4870 | |
| }, | |
| { | |
| "epoch": 2.493612672457844, | |
| "grad_norm": 0.2002887514635378, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7208, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 2.4987225344915687, | |
| "grad_norm": 0.1869680657646229, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7081, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 2.5038323965252935, | |
| "grad_norm": 0.18675570886820952, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7117, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 2.508942258559019, | |
| "grad_norm": 0.19944201285579877, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7198, | |
| "step": 4910 | |
| }, | |
| { | |
| "epoch": 2.514052120592744, | |
| "grad_norm": 0.18956975903685294, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7179, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 2.519161982626469, | |
| "grad_norm": 0.1942120087148544, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7216, | |
| "step": 4930 | |
| }, | |
| { | |
| "epoch": 2.524271844660194, | |
| "grad_norm": 0.18623164151592378, | |
| "learning_rate": 5e-06, | |
| "loss": 0.6997, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 2.5293817066939193, | |
| "grad_norm": 0.19081196748322382, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7091, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 2.5344915687276446, | |
| "grad_norm": 0.191889152108264, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7122, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 2.5396014307613695, | |
| "grad_norm": 0.19018109192168367, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7195, | |
| "step": 4970 | |
| }, | |
| { | |
| "epoch": 2.5447112927950943, | |
| "grad_norm": 0.18029975730571046, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7067, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 2.5498211548288197, | |
| "grad_norm": 0.19081905131548996, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7184, | |
| "step": 4990 | |
| }, | |
| { | |
| "epoch": 2.554931016862545, | |
| "grad_norm": 0.18184068561404432, | |
| "learning_rate": 5e-06, | |
| "loss": 0.711, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 2.56004087889627, | |
| "grad_norm": 0.18141265501857734, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7298, | |
| "step": 5010 | |
| }, | |
| { | |
| "epoch": 2.5651507409299947, | |
| "grad_norm": 0.19059042586600472, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7114, | |
| "step": 5020 | |
| }, | |
| { | |
| "epoch": 2.57026060296372, | |
| "grad_norm": 0.18752747662093455, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7139, | |
| "step": 5030 | |
| }, | |
| { | |
| "epoch": 2.575370464997445, | |
| "grad_norm": 0.18531393933159526, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7152, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 2.5804803270311703, | |
| "grad_norm": 0.2020545252464496, | |
| "learning_rate": 5e-06, | |
| "loss": 0.724, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 2.585590189064895, | |
| "grad_norm": 0.1817661308445167, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7099, | |
| "step": 5060 | |
| }, | |
| { | |
| "epoch": 2.5907000510986204, | |
| "grad_norm": 0.1874710204305083, | |
| "learning_rate": 5e-06, | |
| "loss": 0.727, | |
| "step": 5070 | |
| }, | |
| { | |
| "epoch": 2.5958099131323453, | |
| "grad_norm": 0.19697140144619885, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7184, | |
| "step": 5080 | |
| }, | |
| { | |
| "epoch": 2.6009197751660706, | |
| "grad_norm": 0.1889733854232041, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7169, | |
| "step": 5090 | |
| }, | |
| { | |
| "epoch": 2.6060296371997955, | |
| "grad_norm": 0.20000474912796498, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7089, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 2.611139499233521, | |
| "grad_norm": 0.18020813610110156, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7264, | |
| "step": 5110 | |
| }, | |
| { | |
| "epoch": 2.6162493612672457, | |
| "grad_norm": 0.17876620180082428, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7007, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 2.6213592233009706, | |
| "grad_norm": 0.186937820916383, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7219, | |
| "step": 5130 | |
| }, | |
| { | |
| "epoch": 2.626469085334696, | |
| "grad_norm": 0.19293145357331443, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7116, | |
| "step": 5140 | |
| }, | |
| { | |
| "epoch": 2.6315789473684212, | |
| "grad_norm": 0.18779972078705487, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7147, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 2.636688809402146, | |
| "grad_norm": 0.2004320087337195, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7046, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 2.641798671435871, | |
| "grad_norm": 0.18155864298582336, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7024, | |
| "step": 5170 | |
| }, | |
| { | |
| "epoch": 2.6469085334695963, | |
| "grad_norm": 0.18766505517066065, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7092, | |
| "step": 5180 | |
| }, | |
| { | |
| "epoch": 2.6520183955033216, | |
| "grad_norm": 0.18241808648535346, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7231, | |
| "step": 5190 | |
| }, | |
| { | |
| "epoch": 2.6571282575370465, | |
| "grad_norm": 0.1956290063678602, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7071, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 2.6622381195707714, | |
| "grad_norm": 0.19687281837098963, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7237, | |
| "step": 5210 | |
| }, | |
| { | |
| "epoch": 2.6673479816044967, | |
| "grad_norm": 0.18253071868952309, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7085, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 2.672457843638222, | |
| "grad_norm": 0.18783632566318903, | |
| "learning_rate": 5e-06, | |
| "loss": 0.717, | |
| "step": 5230 | |
| }, | |
| { | |
| "epoch": 2.677567705671947, | |
| "grad_norm": 0.1856712541693123, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7121, | |
| "step": 5240 | |
| }, | |
| { | |
| "epoch": 2.682677567705672, | |
| "grad_norm": 0.17913630024093868, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7141, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 2.687787429739397, | |
| "grad_norm": 0.1814607328558563, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7143, | |
| "step": 5260 | |
| }, | |
| { | |
| "epoch": 2.692897291773122, | |
| "grad_norm": 0.20516860190923697, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7153, | |
| "step": 5270 | |
| }, | |
| { | |
| "epoch": 2.6980071538068473, | |
| "grad_norm": 0.18691014833866346, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7021, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 2.703117015840572, | |
| "grad_norm": 0.19243627032023453, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7198, | |
| "step": 5290 | |
| }, | |
| { | |
| "epoch": 2.7082268778742975, | |
| "grad_norm": 0.1782810408332171, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7144, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 2.7133367399080224, | |
| "grad_norm": 0.18246834780709842, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7073, | |
| "step": 5310 | |
| }, | |
| { | |
| "epoch": 2.7184466019417477, | |
| "grad_norm": 0.19188458416675994, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7033, | |
| "step": 5320 | |
| }, | |
| { | |
| "epoch": 2.7235564639754726, | |
| "grad_norm": 0.19431224425803312, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7121, | |
| "step": 5330 | |
| }, | |
| { | |
| "epoch": 2.728666326009198, | |
| "grad_norm": 0.19472680477435397, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7204, | |
| "step": 5340 | |
| }, | |
| { | |
| "epoch": 2.7337761880429228, | |
| "grad_norm": 0.17416333272267362, | |
| "learning_rate": 5e-06, | |
| "loss": 0.722, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 2.738886050076648, | |
| "grad_norm": 0.17997245175531543, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7209, | |
| "step": 5360 | |
| }, | |
| { | |
| "epoch": 2.743995912110373, | |
| "grad_norm": 0.1818254672253798, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7064, | |
| "step": 5370 | |
| }, | |
| { | |
| "epoch": 2.7491057741440983, | |
| "grad_norm": 0.19143052614657435, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7251, | |
| "step": 5380 | |
| }, | |
| { | |
| "epoch": 2.754215636177823, | |
| "grad_norm": 0.1849449249230007, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7134, | |
| "step": 5390 | |
| }, | |
| { | |
| "epoch": 2.759325498211548, | |
| "grad_norm": 0.18960800644635575, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7132, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 2.7644353602452734, | |
| "grad_norm": 0.18450711551330742, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7218, | |
| "step": 5410 | |
| }, | |
| { | |
| "epoch": 2.7695452222789987, | |
| "grad_norm": 0.1775798100347251, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7246, | |
| "step": 5420 | |
| }, | |
| { | |
| "epoch": 2.7746550843127236, | |
| "grad_norm": 0.19385869953809406, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7154, | |
| "step": 5430 | |
| }, | |
| { | |
| "epoch": 2.7797649463464484, | |
| "grad_norm": 0.18947315882216123, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7146, | |
| "step": 5440 | |
| }, | |
| { | |
| "epoch": 2.7848748083801738, | |
| "grad_norm": 0.19874490028355687, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7098, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 2.789984670413899, | |
| "grad_norm": 0.19216650910265412, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7157, | |
| "step": 5460 | |
| }, | |
| { | |
| "epoch": 2.795094532447624, | |
| "grad_norm": 0.1884532290700664, | |
| "learning_rate": 5e-06, | |
| "loss": 0.73, | |
| "step": 5470 | |
| }, | |
| { | |
| "epoch": 2.800204394481349, | |
| "grad_norm": 0.178944069456414, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7023, | |
| "step": 5480 | |
| }, | |
| { | |
| "epoch": 2.805314256515074, | |
| "grad_norm": 0.19897724638804584, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7266, | |
| "step": 5490 | |
| }, | |
| { | |
| "epoch": 2.810424118548799, | |
| "grad_norm": 0.19676207860709408, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7042, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 2.8155339805825244, | |
| "grad_norm": 0.17311366117436, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7059, | |
| "step": 5510 | |
| }, | |
| { | |
| "epoch": 2.8206438426162492, | |
| "grad_norm": 0.1862348663291006, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7012, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 2.8257537046499746, | |
| "grad_norm": 0.19532852701482903, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7105, | |
| "step": 5530 | |
| }, | |
| { | |
| "epoch": 2.8308635666836994, | |
| "grad_norm": 0.18410412264007187, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7163, | |
| "step": 5540 | |
| }, | |
| { | |
| "epoch": 2.8359734287174247, | |
| "grad_norm": 0.21025902958863738, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7359, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 2.8410832907511496, | |
| "grad_norm": 0.17818540055082727, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7111, | |
| "step": 5560 | |
| }, | |
| { | |
| "epoch": 2.846193152784875, | |
| "grad_norm": 0.1842948913149892, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7167, | |
| "step": 5570 | |
| }, | |
| { | |
| "epoch": 2.8513030148186, | |
| "grad_norm": 0.18686037650976978, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7138, | |
| "step": 5580 | |
| }, | |
| { | |
| "epoch": 2.856412876852325, | |
| "grad_norm": 0.203403946822995, | |
| "learning_rate": 5e-06, | |
| "loss": 0.6996, | |
| "step": 5590 | |
| }, | |
| { | |
| "epoch": 2.86152273888605, | |
| "grad_norm": 0.19304193259535427, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7184, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 2.8666326009197753, | |
| "grad_norm": 0.17636221017029402, | |
| "learning_rate": 5e-06, | |
| "loss": 0.6989, | |
| "step": 5610 | |
| }, | |
| { | |
| "epoch": 2.8717424629535, | |
| "grad_norm": 0.2037376472703345, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7179, | |
| "step": 5620 | |
| }, | |
| { | |
| "epoch": 2.876852324987225, | |
| "grad_norm": 0.19200332931870842, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7139, | |
| "step": 5630 | |
| }, | |
| { | |
| "epoch": 2.8819621870209504, | |
| "grad_norm": 0.18491595431761487, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7128, | |
| "step": 5640 | |
| }, | |
| { | |
| "epoch": 2.8870720490546757, | |
| "grad_norm": 0.1838785582117868, | |
| "learning_rate": 5e-06, | |
| "loss": 0.6955, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 2.8921819110884006, | |
| "grad_norm": 0.19428058651152774, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7102, | |
| "step": 5660 | |
| }, | |
| { | |
| "epoch": 2.8972917731221255, | |
| "grad_norm": 0.17936626194327465, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7078, | |
| "step": 5670 | |
| }, | |
| { | |
| "epoch": 2.902401635155851, | |
| "grad_norm": 0.17631468596491473, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7162, | |
| "step": 5680 | |
| }, | |
| { | |
| "epoch": 2.907511497189576, | |
| "grad_norm": 0.18415745158446212, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7233, | |
| "step": 5690 | |
| }, | |
| { | |
| "epoch": 2.912621359223301, | |
| "grad_norm": 0.17279113129182289, | |
| "learning_rate": 5e-06, | |
| "loss": 0.6992, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 2.917731221257026, | |
| "grad_norm": 0.19005862366918605, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7015, | |
| "step": 5710 | |
| }, | |
| { | |
| "epoch": 2.922841083290751, | |
| "grad_norm": 0.1865410089900959, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7043, | |
| "step": 5720 | |
| }, | |
| { | |
| "epoch": 2.927950945324476, | |
| "grad_norm": 0.19166084388796084, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7212, | |
| "step": 5730 | |
| }, | |
| { | |
| "epoch": 2.9330608073582014, | |
| "grad_norm": 0.19183886093470404, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7117, | |
| "step": 5740 | |
| }, | |
| { | |
| "epoch": 2.9381706693919263, | |
| "grad_norm": 0.18542747500934698, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7031, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 2.9432805314256516, | |
| "grad_norm": 0.19800100325472195, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7003, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 2.9483903934593765, | |
| "grad_norm": 0.19068782203027565, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7157, | |
| "step": 5770 | |
| }, | |
| { | |
| "epoch": 2.953500255493102, | |
| "grad_norm": 0.1893011994562663, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7049, | |
| "step": 5780 | |
| }, | |
| { | |
| "epoch": 2.9586101175268267, | |
| "grad_norm": 0.17894632421886825, | |
| "learning_rate": 5e-06, | |
| "loss": 0.721, | |
| "step": 5790 | |
| }, | |
| { | |
| "epoch": 2.963719979560552, | |
| "grad_norm": 0.18749644947518349, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7245, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 2.968829841594277, | |
| "grad_norm": 0.18920042895955116, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7086, | |
| "step": 5810 | |
| }, | |
| { | |
| "epoch": 2.973939703628002, | |
| "grad_norm": 0.18597523756708192, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7143, | |
| "step": 5820 | |
| }, | |
| { | |
| "epoch": 2.979049565661727, | |
| "grad_norm": 0.18741696462849414, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7195, | |
| "step": 5830 | |
| }, | |
| { | |
| "epoch": 2.9841594276954524, | |
| "grad_norm": 0.18985455877786367, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7091, | |
| "step": 5840 | |
| }, | |
| { | |
| "epoch": 2.9892692897291773, | |
| "grad_norm": 0.18003618910403507, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7111, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 2.994379151762902, | |
| "grad_norm": 0.19511416723954572, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7105, | |
| "step": 5860 | |
| }, | |
| { | |
| "epoch": 2.9994890137966275, | |
| "grad_norm": 0.18652462173874393, | |
| "learning_rate": 5e-06, | |
| "loss": 0.7294, | |
| "step": 5870 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 5871, | |
| "total_flos": 2506240622592000.0, | |
| "train_loss": 0.754048796695819, | |
| "train_runtime": 27383.3094, | |
| "train_samples_per_second": 109.726, | |
| "train_steps_per_second": 0.214 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 5871, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2506240622592000.0, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |